From e876dba8bad1a2af9b96027a24ca713ccbde6ad1 Mon Sep 17 00:00:00 2001 From: nuno maduro Date: Thu, 23 Apr 2026 09:29:56 -0700 Subject: [PATCH] wip --- src/Concerns/Testable.php | 2 +- src/Plugins/Tia/ChangedFiles.php | 84 +++++++++++++++++++++- src/Plugins/Tia/ContentHash.php | 118 +++++++++++++++++++++++++++++++ src/Plugins/Tia/Fingerprint.php | 7 +- 4 files changed, 206 insertions(+), 5 deletions(-) create mode 100644 src/Plugins/Tia/ContentHash.php diff --git a/src/Concerns/Testable.php b/src/Concerns/Testable.php index 1e35cc2a..3f8e2dc9 100644 --- a/src/Concerns/Testable.php +++ b/src/Concerns/Testable.php @@ -395,7 +395,7 @@ trait Testable $tia = Container::getInstance()->get(Tia::class); $assertions = $tia->getCachedAssertions($this::class.'::'.$this->name()); - $this->addToAssertionCount($assertions > 0 ? $assertions : 1); + $this->addToAssertionCount($assertions); return null; } diff --git a/src/Plugins/Tia/ChangedFiles.php b/src/Plugins/Tia/ChangedFiles.php index 171b007a..2b511a39 100644 --- a/src/Plugins/Tia/ChangedFiles.php +++ b/src/Plugins/Tia/ChangedFiles.php @@ -85,7 +85,7 @@ final readonly class ChangedFiles continue; } - $hash = @hash_file('xxh128', $absolute); + $hash = ContentHash::of($absolute); if ($hash === false || $hash !== $snapshot) { $remaining[] = $file; @@ -119,7 +119,7 @@ final readonly class ChangedFiles continue; } - $hash = @hash_file('xxh128', $absolute); + $hash = ContentHash::of($absolute); if ($hash !== false) { $out[$file] = $hash; @@ -167,7 +167,85 @@ final readonly class ChangedFiles $unique[$file] = true; } - return array_keys($unique); + $candidates = array_keys($unique); + + // Behavioural de-noising: for every file git calls "changed", hash + // the current content and the content at `$sha` through + // `ContentHash::of()`. A change that only touched comments / + // whitespace / blade `{{-- --}}` blocks produces the same hash on + // both sides and gets dropped before it can invalidate any test. + // Without this, a single-comment edit on a migration re-runs the + // entire DB-touching suite. + if ($sha !== null && $sha !== '') { + return $this->filterBehaviourallyUnchanged($candidates, $sha); + } + + return $candidates; + } + + /** + * @param array $files + * @return array + */ + private function filterBehaviourallyUnchanged(array $files, string $sha): array + { + $remaining = []; + + foreach ($files as $file) { + $absolute = $this->projectRoot.DIRECTORY_SEPARATOR.$file; + + if (! is_file($absolute)) { + // Deleted on disk — a genuine change, keep it. + $remaining[] = $file; + + continue; + } + + $currentHash = ContentHash::of($absolute); + + if ($currentHash === false) { + $remaining[] = $file; + + continue; + } + + $baselineContent = $this->contentAtSha($sha, $file); + + if ($baselineContent === null) { + // Couldn't read the baseline (new file, binary, `git show` + // failed). Err on the side of re-running. + $remaining[] = $file; + + continue; + } + + $baselineHash = ContentHash::ofContent($file, $baselineContent); + + if ($currentHash !== $baselineHash) { + $remaining[] = $file; + } + } + + return $remaining; + } + + /** + * Reads `$path` at `$sha` via `git show`. Returns null when the file + * didn't exist at that SHA, when git errors, or when the content + * isn't valid UTF-8-safe bytes (rare — binary files that happen to + * be tracked). + */ + private function contentAtSha(string $sha, string $path): ?string + { + $process = new Process(['git', 'show', $sha.':'.$path], $this->projectRoot); + $process->setTimeout(5.0); + $process->run(); + + if (! $process->isSuccessful()) { + return null; + } + + return $process->getOutput(); } private function shouldIgnore(string $path): bool diff --git a/src/Plugins/Tia/ContentHash.php b/src/Plugins/Tia/ContentHash.php new file mode 100644 index 00000000..3c56b6cd --- /dev/null +++ b/src/Plugins/Tia/ContentHash.php @@ -0,0 +1,118 @@ +:`) and + * want to avoid a disk round-trip. + */ + public static function ofContent(string $path, string $raw): string + { + $lower = strtolower($path); + + if (str_ends_with($lower, '.blade.php')) { + return self::hashBladeContent($raw); + } + + if (str_ends_with($lower, '.php')) { + return self::hashPhpContent($raw); + } + + return hash('xxh128', $raw); + } + + /** + * Tokenise the content and hash the concatenated values of every + * token except whitespace / comment / docblock. `token_get_all()` + * is built-in, fast, and enough to collapse any formatting-only + * edit. If tokenisation fails (rare syntax error), fall back to + * the raw hash so the caller still gets a deterministic signal. + */ + private static function hashPhpContent(string $raw): string + { + $tokens = @token_get_all($raw); + + if ($tokens === []) { + return hash('xxh128', $raw); + } + + $normalised = ''; + + foreach ($tokens as $token) { + if (is_array($token)) { + if ($token[0] === T_WHITESPACE) { + continue; + } + if ($token[0] === T_COMMENT) { + continue; + } + if ($token[0] === T_DOC_COMMENT) { + continue; + } + $normalised .= $token[1]; + } else { + $normalised .= $token; + } + } + + return hash('xxh128', $normalised); + } + + /** + * Blade templates aren't PHP syntactically, so `token_get_all()` + * doesn't help. Strip `{{-- … --}}` comments (the only Blade-native + * comment form) and collapse whitespace runs. Output differences + * that would survive the Blade compiler (markup reordering, new + * directives, changed interpolation) still flip the hash; pure + * reformatting does not. + */ + private static function hashBladeContent(string $raw): string + { + $stripped = preg_replace('/\{\{--.*?--\}\}/s', '', $raw) ?? $raw; + $stripped = preg_replace('/\s+/', ' ', $stripped) ?? $stripped; + + return hash('xxh128', trim($stripped)); + } +} diff --git a/src/Plugins/Tia/Fingerprint.php b/src/Plugins/Tia/Fingerprint.php index 076273a3..fd6dbec2 100644 --- a/src/Plugins/Tia/Fingerprint.php +++ b/src/Plugins/Tia/Fingerprint.php @@ -29,7 +29,12 @@ final readonly class Fingerprint { // Bump this whenever the set of inputs or the hash algorithm changes, // so older graphs are invalidated automatically. - private const int SCHEMA_VERSION = 4; + // + // v5: ChangedFiles now hashes via `ContentHash` (normalises PHP + // tokens + Blade whitespace/comments) instead of raw bytes. + // Old graphs' run-tree hashes are incompatible and must be + // rebuilt. + private const int SCHEMA_VERSION = 5; /** * @return array{