This commit is contained in:
nuno maduro
2026-04-23 09:29:56 -07:00
parent d9c18f9c02
commit e876dba8ba
4 changed files with 206 additions and 5 deletions

View File

@ -395,7 +395,7 @@ trait Testable
$tia = Container::getInstance()->get(Tia::class); $tia = Container::getInstance()->get(Tia::class);
$assertions = $tia->getCachedAssertions($this::class.'::'.$this->name()); $assertions = $tia->getCachedAssertions($this::class.'::'.$this->name());
$this->addToAssertionCount($assertions > 0 ? $assertions : 1); $this->addToAssertionCount($assertions);
return null; return null;
} }

View File

@ -85,7 +85,7 @@ final readonly class ChangedFiles
continue; continue;
} }
$hash = @hash_file('xxh128', $absolute); $hash = ContentHash::of($absolute);
if ($hash === false || $hash !== $snapshot) { if ($hash === false || $hash !== $snapshot) {
$remaining[] = $file; $remaining[] = $file;
@ -119,7 +119,7 @@ final readonly class ChangedFiles
continue; continue;
} }
$hash = @hash_file('xxh128', $absolute); $hash = ContentHash::of($absolute);
if ($hash !== false) { if ($hash !== false) {
$out[$file] = $hash; $out[$file] = $hash;
@ -167,7 +167,85 @@ final readonly class ChangedFiles
$unique[$file] = true; $unique[$file] = true;
} }
return array_keys($unique); $candidates = array_keys($unique);
// Behavioural de-noising: for every file git calls "changed", hash
// the current content and the content at `$sha` through
// `ContentHash::of()`. A change that only touched comments /
// whitespace / blade `{{-- --}}` blocks produces the same hash on
// both sides and gets dropped before it can invalidate any test.
// Without this, a single-comment edit on a migration re-runs the
// entire DB-touching suite.
if ($sha !== null && $sha !== '') {
return $this->filterBehaviourallyUnchanged($candidates, $sha);
}
return $candidates;
}
/**
* @param array<int, string> $files
* @return array<int, string>
*/
private function filterBehaviourallyUnchanged(array $files, string $sha): array
{
$remaining = [];
foreach ($files as $file) {
$absolute = $this->projectRoot.DIRECTORY_SEPARATOR.$file;
if (! is_file($absolute)) {
// Deleted on disk — a genuine change, keep it.
$remaining[] = $file;
continue;
}
$currentHash = ContentHash::of($absolute);
if ($currentHash === false) {
$remaining[] = $file;
continue;
}
$baselineContent = $this->contentAtSha($sha, $file);
if ($baselineContent === null) {
// Couldn't read the baseline (new file, binary, `git show`
// failed). Err on the side of re-running.
$remaining[] = $file;
continue;
}
$baselineHash = ContentHash::ofContent($file, $baselineContent);
if ($currentHash !== $baselineHash) {
$remaining[] = $file;
}
}
return $remaining;
}
/**
* Reads `$path` at `$sha` via `git show`. Returns null when the file
* didn't exist at that SHA, when git errors, or when the content
* isn't valid UTF-8-safe bytes (rare — binary files that happen to
* be tracked).
*/
private function contentAtSha(string $sha, string $path): ?string
{
$process = new Process(['git', 'show', $sha.':'.$path], $this->projectRoot);
$process->setTimeout(5.0);
$process->run();
if (! $process->isSuccessful()) {
return null;
}
return $process->getOutput();
} }
private function shouldIgnore(string $path): bool private function shouldIgnore(string $path): bool

View File

@ -0,0 +1,118 @@
<?php
declare(strict_types=1);
namespace Pest\Plugins\Tia;
/**
* Per-file hashing that ignores changes which can't alter behaviour —
* comments and whitespace for PHP, `{{-- … --}}` comments and whitespace
* runs for Blade templates. Every other file type falls back to a plain
* xxh128 of the raw bytes.
*
* Why it matters: TIA's file diff signals drive which tests re-run. A
* one-line comment tweak on a migration is a behavioural no-op, but the
* raw-bytes hash still differs, so every test that talks to the DB would
* currently re-execute. Normalising to the parsed-token / compiled-shape
* keeps the drift signal honest: edits that can't change runtime
* behaviour don't invalidate the replay cache.
*
* Important: this hash is stored in the graph's last-run tree, so any
* format change here must be paired with a `Fingerprint::SCHEMA_VERSION`
* bump — otherwise stale hashes from older graphs would be compared
* against normalised hashes from the new code and everything would
* appear changed.
*
* @internal
*/
final class ContentHash
{
/**
* xxh128 hex of the file's "behavioural" shape, or `false` when the
* file can't be read. Callers should treat `false` the same way they
* treated a failed `hash_file()` previously.
*/
public static function of(string $absolute): string|false
{
$raw = @file_get_contents($absolute);
if ($raw === false) {
return false;
}
return self::ofContent($absolute, $raw);
}
/**
* Same as `of()` but accepts the file contents in memory. Used when
* we already have the bytes (e.g. from `git show <sha>:<path>`) and
* want to avoid a disk round-trip.
*/
public static function ofContent(string $path, string $raw): string
{
$lower = strtolower($path);
if (str_ends_with($lower, '.blade.php')) {
return self::hashBladeContent($raw);
}
if (str_ends_with($lower, '.php')) {
return self::hashPhpContent($raw);
}
return hash('xxh128', $raw);
}
/**
* Tokenise the content and hash the concatenated values of every
* token except whitespace / comment / docblock. `token_get_all()`
* is built-in, fast, and enough to collapse any formatting-only
* edit. If tokenisation fails (rare syntax error), fall back to
* the raw hash so the caller still gets a deterministic signal.
*/
private static function hashPhpContent(string $raw): string
{
$tokens = @token_get_all($raw);
if ($tokens === []) {
return hash('xxh128', $raw);
}
$normalised = '';
foreach ($tokens as $token) {
if (is_array($token)) {
if ($token[0] === T_WHITESPACE) {
continue;
}
if ($token[0] === T_COMMENT) {
continue;
}
if ($token[0] === T_DOC_COMMENT) {
continue;
}
$normalised .= $token[1];
} else {
$normalised .= $token;
}
}
return hash('xxh128', $normalised);
}
/**
* Blade templates aren't PHP syntactically, so `token_get_all()`
* doesn't help. Strip `{{-- … --}}` comments (the only Blade-native
* comment form) and collapse whitespace runs. Output differences
* that would survive the Blade compiler (markup reordering, new
* directives, changed interpolation) still flip the hash; pure
* reformatting does not.
*/
private static function hashBladeContent(string $raw): string
{
$stripped = preg_replace('/\{\{--.*?--\}\}/s', '', $raw) ?? $raw;
$stripped = preg_replace('/\s+/', ' ', $stripped) ?? $stripped;
return hash('xxh128', trim($stripped));
}
}

View File

@ -29,7 +29,12 @@ final readonly class Fingerprint
{ {
// Bump this whenever the set of inputs or the hash algorithm changes, // Bump this whenever the set of inputs or the hash algorithm changes,
// so older graphs are invalidated automatically. // so older graphs are invalidated automatically.
private const int SCHEMA_VERSION = 4; //
// v5: ChangedFiles now hashes via `ContentHash` (normalises PHP
// tokens + Blade whitespace/comments) instead of raw bytes.
// Old graphs' run-tree hashes are incompatible and must be
// rebuilt.
private const int SCHEMA_VERSION = 5;
/** /**
* @return array{ * @return array{