[Query API] Flag For Exact Name Matching (#1058)

feat(query): exact name flag
flowr-analysis · Oct 11, 2024 · 318533b · 318533b · github-actions · Oct 11, 2024
1 parent 9336043
commit 318533b
Show file tree

Hide file tree

Showing 5 changed files with 20 additions and 4 deletions.
diff --git a/src/documentation/print-query-wiki.ts b/src/documentation/print-query-wiki.ts
@@ -31,7 +31,7 @@ registerQueryDocumentation('call-context', {
 Call context queries may be used to identify calls to specific functions that match criteria of your interest.
 For now, we support two criteria:
 
-1. **Function Name** (\`callName\`): The function name is specified by a regular expression. This allows you to find all calls to functions that match a specific pattern.
+1. **Function Name** (\`callName\`): The function name is specified by a regular expression. This allows you to find all calls to functions that match a specific pattern. Please note, that if you do not use Regex-Anchors, the query will match any function name that contains the given pattern (you can set the \`callNameExact\` property to \`true\` to automatically add the \`^...$\` anchors).
 2. **Call Targets**  (\`callTargets\`): This specifies to what the function call targets. For example, you may want to find all calls to a function that is not defined locally.
 
 Besides this, we provide the following ways to automatically categorize and link identified invocations:

diff --git a/src/queries/catalog/call-context-query/call-context-query-executor.ts b/src/queries/catalog/call-context-query/call-context-query-executor.ts
@@ -106,15 +106,20 @@ function isSubCallQuery(query: CallContextQuery): query is SubCallContextQueryFo
 	return 'linkTo' in query;
 }
 
+function exactCallNameRegex(name: RegExp | string): RegExp {
+	return new RegExp(`^${name}$`);
+}
+
 function promoteQueryCallNames(queries: readonly CallContextQuery[]): { promotedQueries: CallContextQuery<RegExp>[], requiresCfg: boolean } {
 	let requiresCfg = false;
 	const promotedQueries = queries.map(q => {
 		if(isSubCallQuery(q)) {
 			requiresCfg = true;
 			return {
 				...q,
-				callName: new RegExp(q.callName),
-				linkTo:   {
+				callName: q.callNameExact ? exactCallNameRegex(q.callName)
+					: new RegExp(q.callName),
+				linkTo: {
 					...q.linkTo,
 					/* we have to add another promotion layer whenever we add something without this call name */
 					callName: new RegExp(q.linkTo.callName)
@@ -123,7 +128,8 @@ function promoteQueryCallNames(queries: readonly CallContextQuery[]): { promoted
 		} else {
 			return {
 				...q,
-				callName: new RegExp(q.callName)
+				callName: q.callNameExact ? exactCallNameRegex(q.callName)
+					: new RegExp(q.callName)
 			};
 		}
 	});

diff --git a/src/queries/catalog/call-context-query/call-context-query-format.ts b/src/queries/catalog/call-context-query/call-context-query-format.ts
@@ -18,6 +18,10 @@ export interface DefaultCallContextQueryFormat<CallName extends RegExp | string>
 	readonly type:            'call-context';
 	/** Regex regarding the function name, please note that strings will be interpreted as regular expressions too! */
 	readonly callName:        CallName;
+	/**
+	 * Should we automatically add the `^` and `$` anchors to the regex to make it an exact match?
+	 */
+	readonly callNameExact?:  boolean;
 	/** kind may be a step or anything that you attach to the call, this can be used to group calls together (e.g., linking `ggplot` to `visualize`). Defaults to `.` */
 	readonly kind?:           string;
 	/** subkinds are used to uniquely identify the respective call type when grouping the output (e.g., the normalized name, linking `ggplot` to `plot`). Defaults to `.` */

diff --git a/src/queries/query-schema.ts b/src/queries/query-schema.ts
@@ -4,6 +4,7 @@ import { CallTargets } from './catalog/call-context-query/call-context-query-for
 export const CallContextQuerySchema = Joi.object({
 	type:           Joi.string().valid('call-context').required().description('The type of the query.'),
 	callName:       Joi.string().required().description('Regex regarding the function name!'),
+	callNameExact:  Joi.boolean().optional().description('Should we automatically add the `^` and `$` anchors to the regex to make it an exact match?'),
 	kind:           Joi.string().optional().description('The kind of the call, this can be used to group calls together (e.g., linking `plot` to `visualize`). Defaults to `.`'),
 	subkind:        Joi.string().optional().description('The subkind of the call, this can be used to uniquely identify the respective call type when grouping the output (e.g., the normalized name, linking `ggplot` to `plot`). Defaults to `.`'),
 	callTargets:    Joi.string().valid(...Object.values(CallTargets)).optional().description('Call targets the function may have. This defaults to `any`. Request this specifically to gain all call targets we can resolve.'),

diff --git a/test/functionality/dataflow/query/call-context-query-tests.ts b/test/functionality/dataflow/query/call-context-query-tests.ts
@@ -121,4 +121,9 @@ describe('Call Context Query', withShell(shell => {
 			}
 		}));
 	});
+	describe('Exact Names', () => {
+		testQuery('Contained Match (expl undefined)', 'foo()', [q(/o/, { })], r([{ id: 1 }]));
+		testQuery('Contained Match (expl. false)', 'foo()', [q(/o/, { callNameExact: false })], r([{ id: 1 }]));
+		testQuery('No Contained Match', 'foo()', [q(/o/, { callNameExact: true })], baseResult({}));
+	});
 }));
Benchmark suite	Current: `318533b`	Previous: `9d8b361`	Ratio
`Retrieve AST from R code`	`236.17019563636362` ms (`99.8282844999578`)	`238.12351204545453` ms (`103.59720843756357`)	`0.99`
`Normalize R AST`	`18.383343318181815` ms (`31.467736054084728`)	`19.968034227272728` ms (`34.84298543847825`)	`0.92`
`Produce dataflow information`	`39.393869045454544` ms (`86.04388358994296`)	`38.310942090909094` ms (`82.04448044777155`)	`1.03`
`Total per-file`	`815.3046362272728` ms (`1470.9304491130974`)	`811.1703915909092` ms (`1431.4404310276739`)	`1.01`
`Static slicing`	`2.143588970553026` ms (`1.2698762789852083`)	`2.258090287874194` ms (`1.2792808105316449`)	`0.95`
`Reconstruct code`	`0.23780691419512304` ms (`0.19293024919839574`)	`0.22489327849282828` ms (`0.17585774592637268`)	`1.06`
`Total per-slice`	`2.397146201559683` ms (`1.3522574067867275`)	`2.4996261233332735` ms (`1.3278746913052974`)	`0.96`
`failed to reconstruct/re-parse`	`0` #	`0` #	`1`
`times hit threshold`	`0` #	`0` #	`1`
`reduction (characters)`	`0.7869360165281424` #	`0.7869360165281424` #	`1`
`reduction (normalized tokens)`	`0.7639690077689504` #	`0.7639690077689504` #	`1`
`memory (df-graph)`	`95.46617542613636` KiB (`244.77619956879823`)	`147.42458274147728` KiB (`358.6827375397903`)	`0.65`
Benchmark suite	Current: `318533b`	Previous: `9d8b361`	Ratio
`Retrieve AST from R code`	`237.2066748` ms (`43.996941882428736`)	`238.40722376` ms (`42.95412443307438`)	`0.99`
`Normalize R AST`	`20.30701584` ms (`14.946062045199502`)	`22.0872248` ms (`17.016890594916376`)	`0.92`
`Produce dataflow information`	`75.64578623999999` ms (`87.83401625755535`)	`74.60461736` ms (`88.95210983454488`)	`1.01`
`Total per-file`	`7738.1176486` ms (`29073.79581006938`)	`11091.201449639999` ms (`52310.41942604725`)	`0.70`
`Static slicing`	`16.0548633524423` ms (`44.40380137448573`)	`22.047137876062838` ms (`78.30877993604865`)	`0.73`
`Reconstruct code`	`0.23842831264773334` ms (`0.14772639801117235`)	`0.2327517832436913` ms (`0.14954480815603388`)	`1.02`
`Total per-slice`	`16.30146244078766` ms (`44.43054514677405`)	`22.287796325154986` ms (`78.33211951742135`)	`0.73`
`failed to reconstruct/re-parse`	`0` #	`0` #	`1`
`times hit threshold`	`0` #	`0` #	`1`
`reduction (characters)`	`0.8712997340230448` #	`0.8719618340615195` #	`1.00`
`reduction (normalized tokens)`	`0.8102441553774778` #	`0.810633662275233` #	`1.00`
`memory (df-graph)`	`99.8990234375` KiB (`113.72812769327498`)	`145.6434765625` KiB (`153.49028997815503`)	`0.69`