Skip to content

Commit 7a4b965

Browse files
anandgupta42claude
andcommitted
feat: lightweight ref()-based DAG for static mode impact analysis
Static mode now builds a dependency graph by parsing {{ ref() }} calls directly from SQL files — no manifest, CLI, or LLM needed. Changes: - New dbt-lightweight.ts: extractRefs, extractSources, buildLightweightDAG, analyzeLightweightImpact — scans all .sql files, builds parent→child edges - Index.ts: falls back to lightweight DAG when manifest unavailable - dbt.ts: replaced LLM-based dbt compile with direct subprocess call - comment.ts: buildMermaidDAG uses explicit edges for precise rendering - types.ts: added edges field to ImpactResult - 22 new tests for lightweight DAG module This means static mode PRs on dbt projects now show: - Mermaid DAG blast radius diagram - Downstream model count - Impact score 370 tests, 0 failures. Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]>
1 parent ff7dde8 commit 7a4b965

File tree

8 files changed

+871
-116
lines changed

8 files changed

+871
-116
lines changed

dist/index.js

Lines changed: 234 additions & 82 deletions
Large diffs are not rendered by default.

dist/index.js.map

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/analysis/types.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ export interface ImpactResult {
4444
affectedTests: string[];
4545
/** Aggregate impact score 0-100 (higher = more risk). */
4646
impactScore: number;
47+
/** Explicit DAG edges for Mermaid rendering. */
48+
edges?: Array<{ from: string; to: string }>;
4749
}
4850

4951
/** Cost estimate for a single file or model. */

src/context/dbt-lightweight.ts

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
import { readFileSync, readdirSync, existsSync, statSync } from "node:fs";
2+
import { join, basename, relative } from "node:path";
3+
import * as core from "@actions/core";
4+
import type { ChangedFile, ImpactResult } from "../analysis/types.js";
5+
6+
/**
7+
* Extract model names from `{{ ref('model_name') }}` calls in SQL.
8+
* Handles single and double quotes, optional whitespace.
9+
*/
10+
export function extractRefs(sql: string): string[] {
11+
const refs: string[] = [];
12+
const pattern = /\{\{\s*ref\s*\(\s*['"](\w+)['"]\s*\)\s*\}\}/gi;
13+
let match;
14+
while ((match = pattern.exec(sql)) !== null) {
15+
refs.push(match[1]);
16+
}
17+
return [...new Set(refs)];
18+
}
19+
20+
/**
21+
* Extract source references from `{{ source('source_name', 'table_name') }}` calls.
22+
* Returns "source_name.table_name" strings.
23+
*/
24+
export function extractSources(sql: string): string[] {
25+
const sources: string[] = [];
26+
const pattern =
27+
/\{\{\s*source\s*\(\s*['"](\w+)['"]\s*,\s*['"](\w+)['"]\s*\)\s*\}\}/gi;
28+
let match;
29+
while ((match = pattern.exec(sql)) !== null) {
30+
sources.push(`${match[1]}.${match[2]}`);
31+
}
32+
return [...new Set(sources)];
33+
}
34+
35+
/**
36+
* Recursively find all .sql files under `dir`.
37+
*/
38+
export function findSQLFiles(dir: string): string[] {
39+
const results: string[] = [];
40+
if (!existsSync(dir)) return results;
41+
42+
const entries = readdirSync(dir);
43+
for (const entry of entries) {
44+
const fullPath = join(dir, entry);
45+
const stat = statSync(fullPath);
46+
if (stat.isDirectory()) {
47+
results.push(...findSQLFiles(fullPath));
48+
} else if (entry.endsWith(".sql")) {
49+
results.push(fullPath);
50+
}
51+
}
52+
return results;
53+
}
54+
55+
export interface LightweightNode {
56+
name: string;
57+
filePath: string;
58+
refs: string[];
59+
sources: string[];
60+
}
61+
62+
export interface LightweightDAG {
63+
/** Map of model name to node metadata. */
64+
nodes: Map<string, LightweightNode>;
65+
/** Map of model name to its direct children (models that ref it). */
66+
childMap: Map<string, string[]>;
67+
}
68+
69+
/**
70+
* Build a lightweight DAG by scanning all .sql files in the dbt project's
71+
* model directories and extracting ref() calls.
72+
*
73+
* This requires NO manifest, NO CLI, NO LLM.
74+
*/
75+
export function buildLightweightDAG(dbtProjectDir: string): LightweightDAG {
76+
const nodes = new Map<string, LightweightNode>();
77+
const childMap = new Map<string, string[]>();
78+
79+
// Find model directories — check common locations
80+
const modelDirs = ["models", "dbt_models", "model"];
81+
let sqlFiles: string[] = [];
82+
83+
for (const dir of modelDirs) {
84+
const fullDir = join(dbtProjectDir, dir);
85+
if (existsSync(fullDir)) {
86+
sqlFiles.push(...findSQLFiles(fullDir));
87+
}
88+
}
89+
90+
// Fallback: scan entire project if no model dir found
91+
if (sqlFiles.length === 0) {
92+
sqlFiles = findSQLFiles(dbtProjectDir).filter(
93+
(f) => !f.includes("target/") && !f.includes("dbt_packages/"),
94+
);
95+
}
96+
97+
// Build nodes
98+
for (const filePath of sqlFiles) {
99+
const modelName = basename(filePath, ".sql");
100+
const sql = readFileSync(filePath, "utf-8");
101+
const refs = extractRefs(sql);
102+
const sources = extractSources(sql);
103+
104+
nodes.set(modelName, { name: modelName, filePath, refs, sources });
105+
}
106+
107+
// Build childMap from ref() edges: if model B refs model A, then A -> B
108+
for (const [modelName, node] of nodes) {
109+
for (const parentName of node.refs) {
110+
if (!childMap.has(parentName)) {
111+
childMap.set(parentName, []);
112+
}
113+
childMap.get(parentName)!.push(modelName);
114+
}
115+
}
116+
117+
core.info(
118+
`Lightweight DAG: ${nodes.size} models, ${[...childMap.values()].reduce((s, c) => s + c.length, 0)} edges`,
119+
);
120+
121+
return { nodes, childMap };
122+
}
123+
124+
/**
125+
* Analyze impact using the lightweight DAG (no manifest needed).
126+
*
127+
* 1. Scans all .sql files in the project to build ref() edges
128+
* 2. Maps changed files to model names
129+
* 3. BFS from changed models to find all downstream
130+
* 4. Builds explicit edges for Mermaid rendering
131+
* 5. Returns ImpactResult
132+
*/
133+
export function analyzeLightweightImpact(
134+
changedFiles: ChangedFile[],
135+
dbtProjectDir: string,
136+
): ImpactResult | null {
137+
const dag = buildLightweightDAG(dbtProjectDir);
138+
139+
if (dag.nodes.size === 0) {
140+
core.info("Lightweight DAG: no models found — skipping impact analysis");
141+
return null;
142+
}
143+
144+
const workspace = process.env.GITHUB_WORKSPACE ?? process.cwd();
145+
146+
// Map changed files to model names
147+
const modifiedModels: string[] = [];
148+
for (const file of changedFiles) {
149+
if (!file.filename.endsWith(".sql")) continue;
150+
const modelName = basename(file.filename, ".sql");
151+
if (dag.nodes.has(modelName)) {
152+
modifiedModels.push(modelName);
153+
} else {
154+
// Try matching by file path
155+
const absPath = join(workspace, file.filename);
156+
for (const [name, node] of dag.nodes) {
157+
const relFromProject = relative(dbtProjectDir, node.filePath);
158+
const relFromWorkspace = relative(workspace, node.filePath);
159+
if (
160+
file.filename === relFromProject ||
161+
file.filename === relFromWorkspace ||
162+
absPath === node.filePath
163+
) {
164+
modifiedModels.push(name);
165+
break;
166+
}
167+
}
168+
}
169+
}
170+
171+
const uniqueModified = [...new Set(modifiedModels)];
172+
173+
if (uniqueModified.length === 0) {
174+
core.info(
175+
"Lightweight DAG: no changed files matched any models — skipping",
176+
);
177+
return {
178+
modifiedModels: [],
179+
downstreamModels: [],
180+
affectedExposures: [],
181+
affectedTests: [],
182+
impactScore: 0,
183+
};
184+
}
185+
186+
// BFS to find downstream models
187+
const visited = new Set<string>(uniqueModified);
188+
const queue = [...uniqueModified];
189+
const downstreamModels: string[] = [];
190+
const edges: Array<{ from: string; to: string }> = [];
191+
192+
while (queue.length > 0) {
193+
const current = queue.shift()!;
194+
const children = dag.childMap.get(current) ?? [];
195+
196+
for (const child of children) {
197+
edges.push({ from: current, to: child });
198+
if (!visited.has(child)) {
199+
visited.add(child);
200+
downstreamModels.push(child);
201+
queue.push(child);
202+
}
203+
}
204+
}
205+
206+
// Compute a simple impact score
207+
const totalModels = dag.nodes.size;
208+
const affectedRatio =
209+
(uniqueModified.length + downstreamModels.length) / totalModels;
210+
const downstreamScore = Math.min(50, Math.round(affectedRatio * 100));
211+
const modificationScore = Math.min(20, uniqueModified.length * 5);
212+
const impactScore = Math.min(100, downstreamScore + modificationScore);
213+
214+
core.info(
215+
`Lightweight impact: ${uniqueModified.length} modified, ${downstreamModels.length} downstream, score=${impactScore}`,
216+
);
217+
218+
return {
219+
modifiedModels: uniqueModified,
220+
downstreamModels,
221+
affectedExposures: [],
222+
affectedTests: [],
223+
impactScore,
224+
edges,
225+
};
226+
}

src/context/dbt.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import * as core from "@actions/core";
22
import { readFile } from "node:fs/promises";
33
import { join, relative } from "node:path";
44
import { existsSync } from "node:fs";
5-
import { runCLIOrThrow } from "../util/cli.js";
5+
import { execFileSync } from "node:child_process";
66
import type {
77
ChangedFile,
88
DBTManifest,
@@ -81,13 +81,13 @@ export async function getManifest(
8181
}
8282
}
8383

84-
// Attempt to compile dbt to generate manifest
85-
core.info("No manifest found — attempting dbt compile via CLI...");
84+
// Attempt to compile dbt directly to generate manifest
85+
core.info("No manifest found — attempting dbt compile...");
8686
try {
87-
await runCLIOrThrow(
88-
["run", "--format", "json", "--prompt", "run dbt compile"],
89-
{ cwd: dbtProjectDir, timeout: 120_000 },
90-
);
87+
execFileSync("dbt", ["compile", "--project-dir", dbtProjectDir], {
88+
timeout: 120_000,
89+
stdio: "pipe",
90+
});
9191

9292
const defaultPath = join(dbtProjectDir, "target", "manifest.json");
9393
if (existsSync(defaultPath)) {

src/index.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import {
1616
import { parseCommand } from "./interactive/commands.js";
1717
import { handleCommand } from "./interactive/handler.js";
1818
import { detectDBTProject, getManifest } from "./context/dbt.js";
19+
import { analyzeLightweightImpact } from "./context/dbt-lightweight.js";
1920
import { analyzeSQLFiles } from "./analysis/sql-review.js";
2021
import { analyzeImpact } from "./analysis/impact.js";
2122
import { estimateCost, getTotalCostDelta } from "./analysis/cost.js";
@@ -194,9 +195,13 @@ async function runAnalyses(
194195
}
195196

196197
const manifest = await getManifest(dbtProjectDir, config.manifestPath);
197-
if (!manifest) return null;
198+
if (manifest) {
199+
return analyzeImpact(dbtFiles, manifest, dbtProjectDir);
200+
}
198201

199-
return analyzeImpact(dbtFiles, manifest, dbtProjectDir);
202+
// Lightweight fallback: parse ref() calls directly from SQL files
203+
core.info("No manifest — using lightweight ref() parsing for impact analysis");
204+
return analyzeLightweightImpact(dbtFiles, dbtProjectDir);
200205
})(),
201206

202207
// Cost estimation

src/reporting/comment.ts

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -270,30 +270,50 @@ export function buildMermaidDAG(impact: ImpactResult): string {
270270
const sanitize = (name: string): string =>
271271
name.replace(/[^a-zA-Z0-9_]/g, "_");
272272

273-
// Build edges: modified → downstream
274-
for (const mod of impact.modifiedModels) {
275-
const modId = sanitize(mod);
276-
for (const ds of impact.downstreamModels) {
277-
const dsId = sanitize(ds);
278-
lines.push(` ${modId}:::modified --> ${dsId}:::downstream`);
273+
const modifiedSet = new Set(impact.modifiedModels);
274+
const downstreamSet = new Set(impact.downstreamModels);
275+
const exposureSet = new Set(impact.affectedExposures);
276+
277+
// Use explicit edges if available (from lightweight DAG), otherwise
278+
// fall back to the cartesian product approach (modified → downstream).
279+
if (impact.edges && impact.edges.length > 0) {
280+
for (const edge of impact.edges) {
281+
const fromId = sanitize(edge.from);
282+
const toId = sanitize(edge.to);
283+
const fromClass = modifiedSet.has(edge.from) ? "modified" : "downstream";
284+
const toClass = exposureSet.has(edge.to)
285+
? "exposure"
286+
: downstreamSet.has(edge.to)
287+
? "downstream"
288+
: modifiedSet.has(edge.to)
289+
? "modified"
290+
: "downstream";
291+
lines.push(` ${fromId}:::${fromClass} --> ${toId}:::${toClass}`);
279292
}
280-
// modified → exposures
281-
for (const exp of impact.affectedExposures) {
282-
const expId = sanitize(exp);
283-
lines.push(` ${modId}:::modified --> ${expId}:::exposure`);
284-
}
285-
}
286-
287-
// If downstream also connects to exposures
288-
if (
289-
impact.downstreamModels.length > 0 &&
290-
impact.affectedExposures.length > 0
291-
) {
292-
for (const ds of impact.downstreamModels) {
293-
const dsId = sanitize(ds);
293+
} else {
294+
// Legacy: cartesian product of modified → downstream
295+
for (const mod of impact.modifiedModels) {
296+
const modId = sanitize(mod);
297+
for (const ds of impact.downstreamModels) {
298+
const dsId = sanitize(ds);
299+
lines.push(` ${modId}:::modified --> ${dsId}:::downstream`);
300+
}
294301
for (const exp of impact.affectedExposures) {
295302
const expId = sanitize(exp);
296-
lines.push(` ${dsId}:::downstream --> ${expId}:::exposure`);
303+
lines.push(` ${modId}:::modified --> ${expId}:::exposure`);
304+
}
305+
}
306+
307+
if (
308+
impact.downstreamModels.length > 0 &&
309+
impact.affectedExposures.length > 0
310+
) {
311+
for (const ds of impact.downstreamModels) {
312+
const dsId = sanitize(ds);
313+
for (const exp of impact.affectedExposures) {
314+
const expId = sanitize(exp);
315+
lines.push(` ${dsId}:::downstream --> ${expId}:::exposure`);
316+
}
297317
}
298318
}
299319
}

0 commit comments

Comments
 (0)