mirror of
https://github.com/pestphp/pest.git
synced 2026-04-24 07:57:29 +02:00
wip
This commit is contained in:
@ -395,7 +395,7 @@ trait Testable
|
||||
$tia = Container::getInstance()->get(Tia::class);
|
||||
$assertions = $tia->getCachedAssertions($this::class.'::'.$this->name());
|
||||
|
||||
$this->addToAssertionCount($assertions > 0 ? $assertions : 1);
|
||||
$this->addToAssertionCount($assertions);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@ -85,7 +85,7 @@ final readonly class ChangedFiles
|
||||
continue;
|
||||
}
|
||||
|
||||
$hash = @hash_file('xxh128', $absolute);
|
||||
$hash = ContentHash::of($absolute);
|
||||
|
||||
if ($hash === false || $hash !== $snapshot) {
|
||||
$remaining[] = $file;
|
||||
@ -119,7 +119,7 @@ final readonly class ChangedFiles
|
||||
continue;
|
||||
}
|
||||
|
||||
$hash = @hash_file('xxh128', $absolute);
|
||||
$hash = ContentHash::of($absolute);
|
||||
|
||||
if ($hash !== false) {
|
||||
$out[$file] = $hash;
|
||||
@ -167,7 +167,85 @@ final readonly class ChangedFiles
|
||||
$unique[$file] = true;
|
||||
}
|
||||
|
||||
return array_keys($unique);
|
||||
$candidates = array_keys($unique);
|
||||
|
||||
// Behavioural de-noising: for every file git calls "changed", hash
|
||||
// the current content and the content at `$sha` through
|
||||
// `ContentHash::of()`. A change that only touched comments /
|
||||
// whitespace / blade `{{-- --}}` blocks produces the same hash on
|
||||
// both sides and gets dropped before it can invalidate any test.
|
||||
// Without this, a single-comment edit on a migration re-runs the
|
||||
// entire DB-touching suite.
|
||||
if ($sha !== null && $sha !== '') {
|
||||
return $this->filterBehaviourallyUnchanged($candidates, $sha);
|
||||
}
|
||||
|
||||
return $candidates;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, string> $files
|
||||
* @return array<int, string>
|
||||
*/
|
||||
private function filterBehaviourallyUnchanged(array $files, string $sha): array
|
||||
{
|
||||
$remaining = [];
|
||||
|
||||
foreach ($files as $file) {
|
||||
$absolute = $this->projectRoot.DIRECTORY_SEPARATOR.$file;
|
||||
|
||||
if (! is_file($absolute)) {
|
||||
// Deleted on disk — a genuine change, keep it.
|
||||
$remaining[] = $file;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
$currentHash = ContentHash::of($absolute);
|
||||
|
||||
if ($currentHash === false) {
|
||||
$remaining[] = $file;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
$baselineContent = $this->contentAtSha($sha, $file);
|
||||
|
||||
if ($baselineContent === null) {
|
||||
// Couldn't read the baseline (new file, binary, `git show`
|
||||
// failed). Err on the side of re-running.
|
||||
$remaining[] = $file;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
$baselineHash = ContentHash::ofContent($file, $baselineContent);
|
||||
|
||||
if ($currentHash !== $baselineHash) {
|
||||
$remaining[] = $file;
|
||||
}
|
||||
}
|
||||
|
||||
return $remaining;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads `$path` at `$sha` via `git show`. Returns null when the file
|
||||
* didn't exist at that SHA, when git errors, or when the content
|
||||
* isn't valid UTF-8-safe bytes (rare — binary files that happen to
|
||||
* be tracked).
|
||||
*/
|
||||
private function contentAtSha(string $sha, string $path): ?string
|
||||
{
|
||||
$process = new Process(['git', 'show', $sha.':'.$path], $this->projectRoot);
|
||||
$process->setTimeout(5.0);
|
||||
$process->run();
|
||||
|
||||
if (! $process->isSuccessful()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return $process->getOutput();
|
||||
}
|
||||
|
||||
private function shouldIgnore(string $path): bool
|
||||
|
||||
118
src/Plugins/Tia/ContentHash.php
Normal file
118
src/Plugins/Tia/ContentHash.php
Normal file
@ -0,0 +1,118 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Pest\Plugins\Tia;
|
||||
|
||||
/**
|
||||
* Per-file hashing that ignores changes which can't alter behaviour —
|
||||
* comments and whitespace for PHP, `{{-- … --}}` comments and whitespace
|
||||
* runs for Blade templates. Every other file type falls back to a plain
|
||||
* xxh128 of the raw bytes.
|
||||
*
|
||||
* Why it matters: TIA's file diff signals drive which tests re-run. A
|
||||
* one-line comment tweak on a migration is a behavioural no-op, but the
|
||||
* raw-bytes hash still differs, so every test that talks to the DB would
|
||||
* currently re-execute. Normalising to the parsed-token / compiled-shape
|
||||
* keeps the drift signal honest: edits that can't change runtime
|
||||
* behaviour don't invalidate the replay cache.
|
||||
*
|
||||
* Important: this hash is stored in the graph's last-run tree, so any
|
||||
* format change here must be paired with a `Fingerprint::SCHEMA_VERSION`
|
||||
* bump — otherwise stale hashes from older graphs would be compared
|
||||
* against normalised hashes from the new code and everything would
|
||||
* appear changed.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
final class ContentHash
|
||||
{
|
||||
/**
|
||||
* xxh128 hex of the file's "behavioural" shape, or `false` when the
|
||||
* file can't be read. Callers should treat `false` the same way they
|
||||
* treated a failed `hash_file()` previously.
|
||||
*/
|
||||
public static function of(string $absolute): string|false
|
||||
{
|
||||
$raw = @file_get_contents($absolute);
|
||||
|
||||
if ($raw === false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return self::ofContent($absolute, $raw);
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as `of()` but accepts the file contents in memory. Used when
|
||||
* we already have the bytes (e.g. from `git show <sha>:<path>`) and
|
||||
* want to avoid a disk round-trip.
|
||||
*/
|
||||
public static function ofContent(string $path, string $raw): string
|
||||
{
|
||||
$lower = strtolower($path);
|
||||
|
||||
if (str_ends_with($lower, '.blade.php')) {
|
||||
return self::hashBladeContent($raw);
|
||||
}
|
||||
|
||||
if (str_ends_with($lower, '.php')) {
|
||||
return self::hashPhpContent($raw);
|
||||
}
|
||||
|
||||
return hash('xxh128', $raw);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tokenise the content and hash the concatenated values of every
|
||||
* token except whitespace / comment / docblock. `token_get_all()`
|
||||
* is built-in, fast, and enough to collapse any formatting-only
|
||||
* edit. If tokenisation fails (rare syntax error), fall back to
|
||||
* the raw hash so the caller still gets a deterministic signal.
|
||||
*/
|
||||
private static function hashPhpContent(string $raw): string
|
||||
{
|
||||
$tokens = @token_get_all($raw);
|
||||
|
||||
if ($tokens === []) {
|
||||
return hash('xxh128', $raw);
|
||||
}
|
||||
|
||||
$normalised = '';
|
||||
|
||||
foreach ($tokens as $token) {
|
||||
if (is_array($token)) {
|
||||
if ($token[0] === T_WHITESPACE) {
|
||||
continue;
|
||||
}
|
||||
if ($token[0] === T_COMMENT) {
|
||||
continue;
|
||||
}
|
||||
if ($token[0] === T_DOC_COMMENT) {
|
||||
continue;
|
||||
}
|
||||
$normalised .= $token[1];
|
||||
} else {
|
||||
$normalised .= $token;
|
||||
}
|
||||
}
|
||||
|
||||
return hash('xxh128', $normalised);
|
||||
}
|
||||
|
||||
/**
|
||||
* Blade templates aren't PHP syntactically, so `token_get_all()`
|
||||
* doesn't help. Strip `{{-- … --}}` comments (the only Blade-native
|
||||
* comment form) and collapse whitespace runs. Output differences
|
||||
* that would survive the Blade compiler (markup reordering, new
|
||||
* directives, changed interpolation) still flip the hash; pure
|
||||
* reformatting does not.
|
||||
*/
|
||||
private static function hashBladeContent(string $raw): string
|
||||
{
|
||||
$stripped = preg_replace('/\{\{--.*?--\}\}/s', '', $raw) ?? $raw;
|
||||
$stripped = preg_replace('/\s+/', ' ', $stripped) ?? $stripped;
|
||||
|
||||
return hash('xxh128', trim($stripped));
|
||||
}
|
||||
}
|
||||
@ -29,7 +29,12 @@ final readonly class Fingerprint
|
||||
{
|
||||
// Bump this whenever the set of inputs or the hash algorithm changes,
|
||||
// so older graphs are invalidated automatically.
|
||||
private const int SCHEMA_VERSION = 4;
|
||||
//
|
||||
// v5: ChangedFiles now hashes via `ContentHash` (normalises PHP
|
||||
// tokens + Blade whitespace/comments) instead of raw bytes.
|
||||
// Old graphs' run-tree hashes are incompatible and must be
|
||||
// rebuilt.
|
||||
private const int SCHEMA_VERSION = 5;
|
||||
|
||||
/**
|
||||
* @return array{
|
||||
|
||||
Reference in New Issue
Block a user