mirror of
https://github.com/pestphp/pest.git
synced 2026-04-24 07:57:29 +02:00
wip
This commit is contained in:
@ -395,7 +395,7 @@ trait Testable
|
|||||||
$tia = Container::getInstance()->get(Tia::class);
|
$tia = Container::getInstance()->get(Tia::class);
|
||||||
$assertions = $tia->getCachedAssertions($this::class.'::'.$this->name());
|
$assertions = $tia->getCachedAssertions($this::class.'::'.$this->name());
|
||||||
|
|
||||||
$this->addToAssertionCount($assertions > 0 ? $assertions : 1);
|
$this->addToAssertionCount($assertions);
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -85,7 +85,7 @@ final readonly class ChangedFiles
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
$hash = @hash_file('xxh128', $absolute);
|
$hash = ContentHash::of($absolute);
|
||||||
|
|
||||||
if ($hash === false || $hash !== $snapshot) {
|
if ($hash === false || $hash !== $snapshot) {
|
||||||
$remaining[] = $file;
|
$remaining[] = $file;
|
||||||
@ -119,7 +119,7 @@ final readonly class ChangedFiles
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
$hash = @hash_file('xxh128', $absolute);
|
$hash = ContentHash::of($absolute);
|
||||||
|
|
||||||
if ($hash !== false) {
|
if ($hash !== false) {
|
||||||
$out[$file] = $hash;
|
$out[$file] = $hash;
|
||||||
@ -167,7 +167,85 @@ final readonly class ChangedFiles
|
|||||||
$unique[$file] = true;
|
$unique[$file] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return array_keys($unique);
|
$candidates = array_keys($unique);
|
||||||
|
|
||||||
|
// Behavioural de-noising: for every file git calls "changed", hash
|
||||||
|
// the current content and the content at `$sha` through
|
||||||
|
// `ContentHash::of()`. A change that only touched comments /
|
||||||
|
// whitespace / blade `{{-- --}}` blocks produces the same hash on
|
||||||
|
// both sides and gets dropped before it can invalidate any test.
|
||||||
|
// Without this, a single-comment edit on a migration re-runs the
|
||||||
|
// entire DB-touching suite.
|
||||||
|
if ($sha !== null && $sha !== '') {
|
||||||
|
return $this->filterBehaviourallyUnchanged($candidates, $sha);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $candidates;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array<int, string> $files
|
||||||
|
* @return array<int, string>
|
||||||
|
*/
|
||||||
|
private function filterBehaviourallyUnchanged(array $files, string $sha): array
|
||||||
|
{
|
||||||
|
$remaining = [];
|
||||||
|
|
||||||
|
foreach ($files as $file) {
|
||||||
|
$absolute = $this->projectRoot.DIRECTORY_SEPARATOR.$file;
|
||||||
|
|
||||||
|
if (! is_file($absolute)) {
|
||||||
|
// Deleted on disk — a genuine change, keep it.
|
||||||
|
$remaining[] = $file;
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$currentHash = ContentHash::of($absolute);
|
||||||
|
|
||||||
|
if ($currentHash === false) {
|
||||||
|
$remaining[] = $file;
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$baselineContent = $this->contentAtSha($sha, $file);
|
||||||
|
|
||||||
|
if ($baselineContent === null) {
|
||||||
|
// Couldn't read the baseline (new file, binary, `git show`
|
||||||
|
// failed). Err on the side of re-running.
|
||||||
|
$remaining[] = $file;
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$baselineHash = ContentHash::ofContent($file, $baselineContent);
|
||||||
|
|
||||||
|
if ($currentHash !== $baselineHash) {
|
||||||
|
$remaining[] = $file;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $remaining;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads `$path` at `$sha` via `git show`. Returns null when the file
|
||||||
|
* didn't exist at that SHA, when git errors, or when the content
|
||||||
|
* isn't valid UTF-8-safe bytes (rare — binary files that happen to
|
||||||
|
* be tracked).
|
||||||
|
*/
|
||||||
|
private function contentAtSha(string $sha, string $path): ?string
|
||||||
|
{
|
||||||
|
$process = new Process(['git', 'show', $sha.':'.$path], $this->projectRoot);
|
||||||
|
$process->setTimeout(5.0);
|
||||||
|
$process->run();
|
||||||
|
|
||||||
|
if (! $process->isSuccessful()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $process->getOutput();
|
||||||
}
|
}
|
||||||
|
|
||||||
private function shouldIgnore(string $path): bool
|
private function shouldIgnore(string $path): bool
|
||||||
|
|||||||
118
src/Plugins/Tia/ContentHash.php
Normal file
118
src/Plugins/Tia/ContentHash.php
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace Pest\Plugins\Tia;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per-file hashing that ignores changes which can't alter behaviour —
|
||||||
|
* comments and whitespace for PHP, `{{-- … --}}` comments and whitespace
|
||||||
|
* runs for Blade templates. Every other file type falls back to a plain
|
||||||
|
* xxh128 of the raw bytes.
|
||||||
|
*
|
||||||
|
* Why it matters: TIA's file diff signals drive which tests re-run. A
|
||||||
|
* one-line comment tweak on a migration is a behavioural no-op, but the
|
||||||
|
* raw-bytes hash still differs, so every test that talks to the DB would
|
||||||
|
* currently re-execute. Normalising to the parsed-token / compiled-shape
|
||||||
|
* keeps the drift signal honest: edits that can't change runtime
|
||||||
|
* behaviour don't invalidate the replay cache.
|
||||||
|
*
|
||||||
|
* Important: this hash is stored in the graph's last-run tree, so any
|
||||||
|
* format change here must be paired with a `Fingerprint::SCHEMA_VERSION`
|
||||||
|
* bump — otherwise stale hashes from older graphs would be compared
|
||||||
|
* against normalised hashes from the new code and everything would
|
||||||
|
* appear changed.
|
||||||
|
*
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
final class ContentHash
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* xxh128 hex of the file's "behavioural" shape, or `false` when the
|
||||||
|
* file can't be read. Callers should treat `false` the same way they
|
||||||
|
* treated a failed `hash_file()` previously.
|
||||||
|
*/
|
||||||
|
public static function of(string $absolute): string|false
|
||||||
|
{
|
||||||
|
$raw = @file_get_contents($absolute);
|
||||||
|
|
||||||
|
if ($raw === false) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return self::ofContent($absolute, $raw);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Same as `of()` but accepts the file contents in memory. Used when
|
||||||
|
* we already have the bytes (e.g. from `git show <sha>:<path>`) and
|
||||||
|
* want to avoid a disk round-trip.
|
||||||
|
*/
|
||||||
|
public static function ofContent(string $path, string $raw): string
|
||||||
|
{
|
||||||
|
$lower = strtolower($path);
|
||||||
|
|
||||||
|
if (str_ends_with($lower, '.blade.php')) {
|
||||||
|
return self::hashBladeContent($raw);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (str_ends_with($lower, '.php')) {
|
||||||
|
return self::hashPhpContent($raw);
|
||||||
|
}
|
||||||
|
|
||||||
|
return hash('xxh128', $raw);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tokenise the content and hash the concatenated values of every
|
||||||
|
* token except whitespace / comment / docblock. `token_get_all()`
|
||||||
|
* is built-in, fast, and enough to collapse any formatting-only
|
||||||
|
* edit. If tokenisation fails (rare syntax error), fall back to
|
||||||
|
* the raw hash so the caller still gets a deterministic signal.
|
||||||
|
*/
|
||||||
|
private static function hashPhpContent(string $raw): string
|
||||||
|
{
|
||||||
|
$tokens = @token_get_all($raw);
|
||||||
|
|
||||||
|
if ($tokens === []) {
|
||||||
|
return hash('xxh128', $raw);
|
||||||
|
}
|
||||||
|
|
||||||
|
$normalised = '';
|
||||||
|
|
||||||
|
foreach ($tokens as $token) {
|
||||||
|
if (is_array($token)) {
|
||||||
|
if ($token[0] === T_WHITESPACE) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ($token[0] === T_COMMENT) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ($token[0] === T_DOC_COMMENT) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$normalised .= $token[1];
|
||||||
|
} else {
|
||||||
|
$normalised .= $token;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return hash('xxh128', $normalised);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Blade templates aren't PHP syntactically, so `token_get_all()`
|
||||||
|
* doesn't help. Strip `{{-- … --}}` comments (the only Blade-native
|
||||||
|
* comment form) and collapse whitespace runs. Output differences
|
||||||
|
* that would survive the Blade compiler (markup reordering, new
|
||||||
|
* directives, changed interpolation) still flip the hash; pure
|
||||||
|
* reformatting does not.
|
||||||
|
*/
|
||||||
|
private static function hashBladeContent(string $raw): string
|
||||||
|
{
|
||||||
|
$stripped = preg_replace('/\{\{--.*?--\}\}/s', '', $raw) ?? $raw;
|
||||||
|
$stripped = preg_replace('/\s+/', ' ', $stripped) ?? $stripped;
|
||||||
|
|
||||||
|
return hash('xxh128', trim($stripped));
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -29,7 +29,12 @@ final readonly class Fingerprint
|
|||||||
{
|
{
|
||||||
// Bump this whenever the set of inputs or the hash algorithm changes,
|
// Bump this whenever the set of inputs or the hash algorithm changes,
|
||||||
// so older graphs are invalidated automatically.
|
// so older graphs are invalidated automatically.
|
||||||
private const int SCHEMA_VERSION = 4;
|
//
|
||||||
|
// v5: ChangedFiles now hashes via `ContentHash` (normalises PHP
|
||||||
|
// tokens + Blade whitespace/comments) instead of raw bytes.
|
||||||
|
// Old graphs' run-tree hashes are incompatible and must be
|
||||||
|
// rebuilt.
|
||||||
|
private const int SCHEMA_VERSION = 5;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return array{
|
* @return array{
|
||||||
|
|||||||
Reference in New Issue
Block a user