This commit is contained in:
nuno maduro
2026-04-23 10:30:44 -07:00
parent c1feefbb9e
commit 470a5833d4
7 changed files with 589 additions and 28 deletions

View File

@ -412,8 +412,10 @@ final class Tia implements AddsOutput, HandlesArguments, Terminable
return;
}
$perTestTables = $recorder->perTestTables();
if (Parallel::isWorker()) {
$this->flushWorkerPartial($perTest);
$this->flushWorkerPartial($perTest, $perTestTables);
$recorder->reset();
$this->coverageCollector->reset();
@ -436,6 +438,7 @@ final class Tia implements AddsOutput, HandlesArguments, Terminable
$changedFiles->snapshotTree($changedFiles->since($currentSha) ?? []),
);
$graph->replaceEdges($perTest);
$graph->replaceTestTables($perTestTables);
$graph->pruneMissingTests();
// Fold in the results collected during this same record run. The
@ -522,7 +525,8 @@ final class Tia implements AddsOutput, HandlesArguments, Terminable
$changedFiles->snapshotTree($changedFiles->since($currentSha) ?? []),
);
$merged = [];
$mergedFiles = [];
$mergedTables = [];
foreach ($partialKeys as $key) {
$data = $this->readPartial($key);
@ -531,13 +535,23 @@ final class Tia implements AddsOutput, HandlesArguments, Terminable
continue;
}
foreach ($data as $testFile => $sources) {
if (! isset($merged[$testFile])) {
$merged[$testFile] = [];
foreach ($data['files'] as $testFile => $sources) {
if (! isset($mergedFiles[$testFile])) {
$mergedFiles[$testFile] = [];
}
foreach ($sources as $source) {
$merged[$testFile][$source] = true;
$mergedFiles[$testFile][$source] = true;
}
}
foreach ($data['tables'] as $testFile => $tables) {
if (! isset($mergedTables[$testFile])) {
$mergedTables[$testFile] = [];
}
foreach ($tables as $table) {
$mergedTables[$testFile][$table] = true;
}
}
@ -546,10 +560,16 @@ final class Tia implements AddsOutput, HandlesArguments, Terminable
$finalised = [];
foreach ($merged as $testFile => $sourceSet) {
foreach ($mergedFiles as $testFile => $sourceSet) {
$finalised[$testFile] = array_keys($sourceSet);
}
$finalisedTables = [];
foreach ($mergedTables as $testFile => $tableSet) {
$finalisedTables[$testFile] = array_keys($tableSet);
}
// Empty-edges guard: if every worker returned no edges it almost
// always means the coverage driver wasn't loaded in the workers
// (common footgun with custom PHP ini scan dirs, Herd profiles,
@ -567,6 +587,7 @@ final class Tia implements AddsOutput, HandlesArguments, Terminable
}
$graph->replaceEdges($finalised);
$graph->replaceTestTables($finalisedTables);
$graph->pruneMissingTests();
if (! $this->saveGraph($graph)) {
@ -949,11 +970,15 @@ final class Tia implements AddsOutput, HandlesArguments, Terminable
}
/**
* @param array<string, array<int, string>> $perTest
* @param array<string, array<int, string>> $perTestFiles
* @param array<string, array<int, string>> $perTestTables
*/
private function flushWorkerPartial(array $perTest): void
private function flushWorkerPartial(array $perTestFiles, array $perTestTables): void
{
$json = json_encode($perTest, JSON_UNESCAPED_SLASHES);
$json = json_encode([
'files' => $perTestFiles,
'tables' => $perTestTables,
], JSON_UNESCAPED_SLASHES);
if ($json === false) {
return;
@ -1090,7 +1115,7 @@ final class Tia implements AddsOutput, HandlesArguments, Terminable
}
/**
* @return array<string, array<int, string>>|null
* @return array{files: array<string, array<int, string>>, tables: array<string, array<int, string>>}|null
*/
private function readPartial(string $key): ?array
{
@ -1106,20 +1131,36 @@ final class Tia implements AddsOutput, HandlesArguments, Terminable
return null;
}
$filesSource = is_array($data['files'] ?? null) ? $data['files'] : [];
$tablesSource = is_array($data['tables'] ?? null) ? $data['tables'] : [];
return [
'files' => $this->cleanPartialSection($filesSource),
'tables' => $this->cleanPartialSection($tablesSource),
];
}
/**
* @param array<mixed, mixed> $section
* @return array<string, array<int, string>>
*/
private function cleanPartialSection(array $section): array
{
$out = [];
foreach ($data as $test => $sources) {
foreach ($section as $test => $items) {
if (! is_string($test)) {
continue;
}
if (! is_array($sources)) {
if (! is_array($items)) {
continue;
}
$clean = [];
foreach ($sources as $source) {
if (is_string($source)) {
$clean[] = $source;
foreach ($items as $item) {
if (is_string($item)) {
$clean[] = $item;
}
}

View File

@ -34,7 +34,13 @@ final readonly class Fingerprint
// tokens + Blade whitespace/comments) instead of raw bytes.
// Old graphs' run-tree hashes are incompatible and must be
// rebuilt.
private const int SCHEMA_VERSION = 5;
// v6: Graph gained per-test table edges (`$testTables`) powering
// surgical migration invalidation. Worker partial shape
// changed to `{files, tables}`. Old graphs have no table
// coverage, which would leave every DB test invalidated by
// any migration change — force a rebuild so the new edges
// are populated.
private const int SCHEMA_VERSION = 6;
/**
* @return array{

View File

@ -40,6 +40,24 @@ final class Graph
*/
private array $edges = [];
/**
* Table edges: test file (relative) → list of lowercase SQL table
* names the test queried during record. Populated from the
* Recorder's `perTestTables()` snapshot; consumed at replay time
* to do surgical invalidation when a migration changes — the
* test only re-runs if its set intersects the tables the changed
* migration touches. Empty for tests that never hit the DB, which
* is exactly why those tests stay unaffected by migration edits.
*
* Unlike `$edges`, we store names rather than ids: the table
* universe is small (hundreds at most on a giant app), storing
* strings keeps the on-disk graph diff-readable, and the lookup
* cost is negligible compared to the per-file ids used above.
*
* @var array<string, array<int, string>>
*/
private array $testTables = [];
/**
* Environment fingerprint captured at record time.
*
@ -126,11 +144,68 @@ final class Graph
}
}
// 1. Coverage-edge lookup (PHP → PHP).
$affectedSet = [];
// Migration changes don't flow through the coverage-edge path —
// `RefreshDatabase` in every test's `setUp()` means every test
// has an edge to every migration, so step 1 would re-run the
// whole DB-touching suite on any migration edit. Route them
// separately: static-parse the migration source, union the
// referenced tables, and match tests whose recorded query
// footprint intersects that set. Missed files (rare: migrations
// with pure raw SQL or dynamic names) fall back to the watch
// pattern below.
$migrationPaths = [];
$nonMigrationPaths = [];
foreach ($normalised as $rel) {
if ($this->isMigrationPath($rel)) {
$migrationPaths[] = $rel;
} else {
$nonMigrationPaths[] = $rel;
}
}
$changedTables = [];
$unparseableMigrations = [];
foreach ($migrationPaths as $rel) {
$tables = $this->tablesForMigration($rel);
if ($tables === []) {
$unparseableMigrations[] = $rel;
continue;
}
foreach ($tables as $table) {
$changedTables[$table] = true;
}
}
if ($changedTables !== []) {
foreach ($this->testTables as $testFile => $tables) {
if (isset($affectedSet[$testFile])) {
continue;
}
foreach ($tables as $table) {
if (isset($changedTables[$table])) {
$affectedSet[$testFile] = true;
break;
}
}
}
}
// 1. Coverage-edge lookup (PHP → PHP). Migrations are already
// handled above; skipping them here prevents their always-on
// coverage edges from invalidating the whole DB suite.
$changedIds = [];
$unknownSourceDirs = [];
foreach ($normalised as $rel) {
foreach ($nonMigrationPaths as $rel) {
if (isset($this->fileIds[$rel])) {
$changedIds[$this->fileIds[$rel]] = true;
} elseif (str_ends_with($rel, '.php') && ! str_starts_with($rel, 'tests/')) {
@ -141,9 +216,11 @@ final class Graph
}
}
$affectedSet = [];
foreach ($this->edges as $testFile => $ids) {
if (isset($affectedSet[$testFile])) {
continue;
}
foreach ($ids as $id) {
if (isset($changedIds[$id])) {
$affectedSet[$testFile] = true;
@ -160,9 +237,12 @@ final class Graph
// defeating the point of recording the edge in the first place.
// Blade templates captured via Laravel's view composer are the
// motivating case — we want their specific tests, not every
// feature test.
$unknownToGraph = [];
foreach ($normalised as $rel) {
// feature test. Migrations whose static parse yielded nothing
// (exotic syntax, raw SQL) are funneled back in here too so
// broad invalidation still kicks in for edge cases we can't
// parse.
$unknownToGraph = $unparseableMigrations;
foreach ($nonMigrationPaths as $rel) {
if (! isset($this->fileIds[$rel])) {
$unknownToGraph[] = $rel;
}
@ -406,6 +486,79 @@ final class Graph
}
}
/**
* Replaces table edges for the given test files. Table names are
* lowercased + deduplicated; the input comes straight from the
* Recorder's `perTestTables()` snapshot. Tests absent from the
* input keep their existing table set (same partial-update policy
* as `replaceEdges`).
*
* @param array<string, array<int, string>> $testToTables
*/
public function replaceTestTables(array $testToTables): void
{
foreach ($testToTables as $testFile => $tables) {
$testRel = $this->relative($testFile);
if ($testRel === null) {
continue;
}
$normalised = [];
foreach ($tables as $table) {
$lower = strtolower($table);
if ($lower !== '') {
$normalised[$lower] = true;
}
}
$names = array_keys($normalised);
sort($names);
$this->testTables[$testRel] = $names;
}
}
/**
* Projects under Laravel conventionally keep migrations at
* `database/migrations/`. We recognise the directory as a prefix
* so nested subdirectories (a pattern some teams use for grouping
* — `database/migrations/tenant/`, `database/migrations/archived/`)
* are still routed through the table-intersection path.
*/
private function isMigrationPath(string $rel): bool
{
return str_starts_with($rel, 'database/migrations/') && str_ends_with($rel, '.php');
}
/**
* Reads `$rel` relative to the project root and extracts the
* tables it declares via `Schema::create/table/drop/rename`.
* Empty on missing/unreadable files or when the parser finds
* nothing — the caller escalates those cases to the watch
* pattern safety net.
*
* @return list<string>
*/
private function tablesForMigration(string $rel): array
{
$absolute = rtrim($this->projectRoot, DIRECTORY_SEPARATOR).DIRECTORY_SEPARATOR.$rel;
if (! is_file($absolute)) {
return [];
}
$content = @file_get_contents($absolute);
if ($content === false) {
return [];
}
return TableExtractor::fromMigrationSource($content);
}
/**
* Drops edges whose test file no longer exists on disk. Prevents the graph
* from keeping stale entries for deleted / renamed tests that would later
@ -420,6 +573,12 @@ final class Graph
unset($this->edges[$testRel]);
}
}
foreach (array_keys($this->testTables) as $testRel) {
if (! is_file($root.$testRel)) {
unset($this->testTables[$testRel]);
}
}
}
/**
@ -443,6 +602,28 @@ final class Graph
$graph->edges = is_array($data['edges'] ?? null) ? $data['edges'] : [];
$graph->baselines = is_array($data['baselines'] ?? null) ? $data['baselines'] : [];
if (isset($data['test_tables']) && is_array($data['test_tables'])) {
foreach ($data['test_tables'] as $testRel => $tables) {
if (! is_string($testRel)) {
continue;
}
if (! is_array($tables)) {
continue;
}
$names = [];
foreach ($tables as $table) {
if (is_string($table) && $table !== '') {
$names[] = $table;
}
}
if ($names !== []) {
$graph->testTables[$testRel] = $names;
}
}
}
return $graph;
}
@ -460,6 +641,7 @@ final class Graph
'files' => $this->files,
'edges' => $this->edges,
'baselines' => $this->baselines,
'test_tables' => $this->testTables,
];
$json = json_encode($payload, JSON_UNESCAPED_SLASHES);

View File

@ -29,6 +29,16 @@ final class Recorder
*/
private array $perTestFiles = [];
/**
* Aggregated map: absolute test file → set<lowercase table name>.
* Populated by `TableTracker` from `DB::listen` callbacks; consumed
* at record finalize to populate the graph's `$testTables` edges
* that drive migration-change impact analysis.
*
* @var array<string, array<string, true>>
*/
private array $perTestTables = [];
/**
* Cached class → test file resolution.
*
@ -170,6 +180,31 @@ final class Recorder
$this->perTestFiles[$this->currentTestFile][$sourceFile] = true;
}
/**
* Records that the currently-running test queried `$table`. Called
* by `TableTracker` for every DML statement Laravel's `DB::listen`
* reports; the table name has already been extracted by
* `TableExtractor::fromSql()` so we just store it. No-op outside
* a test window, so the callback is safe to leave armed across
* setUp / tearDown boundaries.
*/
public function linkTable(string $table): void
{
if (! $this->active) {
return;
}
if ($this->currentTestFile === null) {
return;
}
if ($table === '') {
return;
}
$this->perTestTables[$this->currentTestFile][strtolower($table)] = true;
}
/**
* @return array<string, array<int, string>> absolute test file → list of absolute source files.
*/
@ -184,6 +219,22 @@ final class Recorder
return $out;
}
/**
* @return array<string, array<int, string>> absolute test file → sorted list of table names.
*/
public function perTestTables(): array
{
$out = [];
foreach ($this->perTestTables as $testFile => $tables) {
$names = array_keys($tables);
sort($names);
$out[$testFile] = $names;
}
return $out;
}
private function resolveTestFile(string $className, string $fallbackFile): ?string
{
if (array_key_exists($className, $this->classFileCache)) {
@ -249,6 +300,7 @@ final class Recorder
{
$this->currentTestFile = null;
$this->perTestFiles = [];
$this->perTestTables = [];
$this->classFileCache = [];
$this->active = false;
}

View File

@ -0,0 +1,154 @@
<?php
declare(strict_types=1);
namespace Pest\Plugins\Tia;
/**
* Extracts table names from SQL statements and migration PHP sources.
*
* Two callers, two methods:
*
* - `fromSql()` runs against query strings Laravel's `DB::listen`
* hands us at record time. We only look at DML (`SELECT`, `INSERT`,
* `UPDATE`, `DELETE`) because DDL emitted by `RefreshDatabase` in
* `setUp()` is noise — we don't want every test to end up linked
* to every migration's `CREATE TABLE`.
* - `fromMigrationSource()` reads a migration file on disk at
* replay time and pulls table names out of `Schema::` calls.
* Used in two places:
* 1. For every migration file reported as changed — what
* tables does the current version of this file touch?
* 2. For brand-new migration files that weren't in the graph
* yet, so we never had a chance to observe their DDL.
*
* Regex isn't a parser. CTEs, subqueries, and raw `DB::statement()`
* that reference tables only inside exotic syntax can slip through.
* The direction of that error is under-attribution (a table the test
* genuinely touches but we missed), so the safety net is to keep the
* broad `database/migrations/**` watch pattern as a last resort for
* files that produce an empty extraction.
*
* @internal
*/
final class TableExtractor
{
/**
* DML prefixes we accept. DDL (`CREATE`, `ALTER`, `DROP`,
* `TRUNCATE`, `RENAME`) is deliberately excluded — those come
* from migrations fired by `RefreshDatabase`, and capturing them
* here would attribute every migration table to every test.
*/
private const array DML_PREFIXES = ['select', 'insert', 'update', 'delete'];
/**
* @return list<string> Sorted, deduped table names referenced by the
* SQL statement. Empty when the statement is
* DDL, empty, or unparseable.
*/
public static function fromSql(string $sql): array
{
$trimmed = ltrim($sql);
if ($trimmed === '') {
return [];
}
$prefix = strtolower(substr($trimmed, 0, 6));
$matched = false;
foreach (self::DML_PREFIXES as $dml) {
if (str_starts_with($prefix, $dml)) {
$matched = true;
break;
}
}
if (! $matched) {
return [];
}
// Match `from`, `into`, `update`, `join` and capture the
// following identifier, tolerating the common quoting
// styles: "double", `back`, [bracket], or bare.
$pattern = '/(?:\bfrom|\binto|\bupdate|\bjoin)\s+(?:"([^"]+)"|`([^`]+)`|\[([^\]]+)\]|(\w+))/i';
if (preg_match_all($pattern, $sql, $matches) === false) {
return [];
}
$tables = [];
for ($i = 0, $n = count($matches[0]); $i < $n; $i++) {
$name = $matches[1][$i] !== ''
? $matches[1][$i]
: ($matches[2][$i] !== ''
? $matches[2][$i]
: ($matches[3][$i] !== ''
? $matches[3][$i]
: $matches[4][$i]));
if ($name === '') {
continue;
}
if (self::isSchemaMeta($name)) {
continue;
}
$tables[strtolower($name)] = true;
}
$out = array_keys($tables);
sort($out);
return $out;
}
/**
* @return list<string> Table names referenced by `Schema::` calls
* in the given migration file contents. Empty
* when nothing matches — callers treat that
* as "fall back to the broad watch pattern".
*/
public static function fromMigrationSource(string $php): array
{
$pattern = '/Schema::\s*(?:create|table|drop|dropIfExists|dropColumns|rename)\s*\(\s*[\'"]([^\'"]+)[\'"](?:\s*,\s*[\'"]([^\'"]+)[\'"])?/';
if (preg_match_all($pattern, $php, $matches) === false) {
return [];
}
$tables = [];
foreach ($matches[1] as $i => $primary) {
// Group 1 always captures at least one char per the regex.
$tables[strtolower($primary)] = true;
// Group 2 (`Schema::rename('old', 'new')`) is optional and
// absent from non-rename matches.
$secondary = $matches[2][$i] ?? '';
if ($secondary !== '') {
$tables[strtolower($secondary)] = true;
}
}
$out = array_keys($tables);
sort($out);
return $out;
}
/**
* Filters out driver-internal tables that show up as DB::listen
* targets without representing user schema: SQLite's master
* catalogue, Laravel's own `migrations` metadata.
*/
private static function isSchemaMeta(string $name): bool
{
$lower = strtolower($name);
return in_array($lower, ['sqlite_master', 'sqlite_sequence', 'migrations'], true)
|| str_starts_with($lower, 'pg_')
|| str_starts_with($lower, 'information_schema');
}
}

View File

@ -0,0 +1,123 @@
<?php
declare(strict_types=1);
namespace Pest\Plugins\Tia;
/**
* Laravel-only collaborator: during record mode, attributes every SQL
* table the test body queries to the currently-running test.
*
* Why this exists: the coverage graph can tell us which PHP files a
* test touched but cannot distinguish "this test depends on the
* `users` table" from "this test depends on `questions`". That
* distinction is the whole point of surgical migration invalidation —
* a column rename in `create_questions_table.php` should only re-run
* tests whose body actually queried `questions`.
*
* Mechanism: install a listener on Laravel's event dispatcher that
* subscribes to `Illuminate\Database\Events\QueryExecuted`. Each
* query string is piped through `TableExtractor::fromSql()`; DDL is
* filtered at extraction time so migrations running in `setUp` don't
* attribute every table to every test.
*
* Same dep-free handshake as `BladeEdges`: string class lookup +
* method-capability probes so Pest's `require` stays Laravel-free.
*
* @internal
*/
final class TableTracker
{
private const string CONTAINER_CLASS = '\\Illuminate\\Container\\Container';
/**
* App-scoped marker that makes `arm()` idempotent across the 774
* per-test `setUp()` calls — Laravel reuses the same app instance
* within a single test run, so without this guard we'd stack
* one listener per test and each query would fire the closure
* hundreds of times.
*/
private const string MARKER = 'pest.tia.table-tracker-armed';
public static function arm(Recorder $recorder): void
{
if (! $recorder->isActive()) {
return;
}
$containerClass = self::CONTAINER_CLASS;
if (! class_exists($containerClass)) {
return;
}
/** @var object $app */
$app = $containerClass::getInstance();
if (! method_exists($app, 'bound') || ! method_exists($app, 'make') || ! method_exists($app, 'instance')) {
return;
}
if ($app->bound(self::MARKER)) {
return;
}
if (! $app->bound('db')) {
return;
}
$app->instance(self::MARKER, true);
$listener = static function (object $query) use ($recorder): void {
if (! property_exists($query, 'sql')) {
return;
}
/** @var mixed $sql */
$sql = $query->sql;
if (! is_string($sql) || $sql === '') {
return;
}
foreach (TableExtractor::fromSql($sql) as $table) {
$recorder->linkTable($table);
}
};
// Preferred path: `DatabaseManager::listen(Closure $callback)`.
// It's a real method — `method_exists` returns false because
// some Laravel versions compose it via a trait the reflection
// probe can't always see, so we gate via `is_callable` instead.
// This path pushes the listener onto every existing AND future
// connection, which is what we want for a process-wide capture.
/** @var object $db */
$db = $app->make('db');
if (is_callable([$db, 'listen'])) {
/** @var callable $listen */
$listen = [$db, 'listen'];
$listen($listener);
return;
}
// Fallback: register directly on the event dispatcher. Works
// as long as every connection shares the same dispatcher
// instance this app resolved to — true in vanilla setups,
// but not guaranteed with connections instantiated pre-arm
// that captured an older dispatcher.
if (! $app->bound('events')) {
return;
}
/** @var object $events */
$events = $app->make('events');
if (! method_exists($events, 'listen')) {
return;
}
$events->listen('\\Illuminate\\Database\\Events\\QueryExecuted', $listener);
}
}