Skip to content

Commit

Permalink
[Query API] Dataflow Query (#1035)
Browse files Browse the repository at this point in the history
  • Loading branch information
EagleoutIce authored Oct 5, 2024
2 parents 29dfda5 + bfd5bf8 commit 32d51e8
Show file tree
Hide file tree
Showing 18 changed files with 1,094 additions and 315 deletions.
8 changes: 7 additions & 1 deletion src/cli/repl/commands/repl-query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { splitAtEscapeSensitive } from '../../../util/args';
import type { OutputFormatter } from '../../../util/ansi';
import { bold, italic } from '../../../util/ansi';

import type { CallContextQuerySubKindResult } from '../../../queries/call-context-query/call-context-query-format';
import type { CallContextQuerySubKindResult } from '../../../queries/catalog/call-context-query/call-context-query-format';
import { describeSchema } from '../../../util/schema';
import type { Query, QueryResults, SupportedQueryTypes } from '../../../queries/query';
import { executeQueries } from '../../../queries/query';
Expand All @@ -17,6 +17,7 @@ import { jsonReplacer } from '../../../util/json';
import { AnyQuerySchema, QueriesSchema } from '../../../queries/query-schema';
import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id';
import { BuiltIn } from '../../../dataflow/environments/built-in';
import { graphToMermaidUrl } from '../../../util/mermaid/dfg';

async function getDataflow(shell: RShell, remainingLine: string) {
return await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, {
Expand Down Expand Up @@ -126,6 +127,11 @@ export function asciiSummaryOfQueryResult(formatter: OutputFormatter, totalInMs:
result.push(`Query: ${bold(query, formatter)} (${out['.meta'].timing.toFixed(0)}ms)`);
result.push(asciiCallContext(formatter, out, processed));
continue;
} else if(query === 'dataflow') {
const out = queryResults as QueryResults<'dataflow'>['dataflow'];
result.push(`Query: ${bold(query, formatter)} (${out['.meta'].timing.toFixed(0)}ms)`);
result.push(` ╰ [Dataflow Graph](${graphToMermaidUrl(out.graph)})`);
continue;
}

result.push(`Query: ${bold(query, formatter)}`);
Expand Down
2 changes: 1 addition & 1 deletion src/documentation/data/server/doc-data-server-messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import {
responseQueryMessage
} from '../../../cli/repl/server/messages/message-query';
import { exampleQueryCode } from '../query/example-query-code';
import { CallTargets } from '../../../queries/call-context-query/call-context-query-format';
import { CallTargets } from '../../../queries/catalog/call-context-query/call-context-query-format';
import { requestLineageMessage, responseLineageMessage } from '../../../cli/repl/server/messages/message-lineage';

export function documentAllMessages() {
Expand Down
10 changes: 10 additions & 0 deletions src/documentation/doc-util/doc-code.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
import { jsonReplacer } from '../../util/json';

export function codeBlock(language: string, code: string | undefined): string {
return `\n\`\`\`${language}\n${code?.trim() ?? ''}\n\`\`\`\n`;
}

export function jsonWithLimit(object: object, maxLength: number = 5_000, tooLongText: string = '_As the code is pretty long, we inhibit pretty printing and syntax highlighting (JSON):_'): string {
const prettyPrinted = JSON.stringify(object, jsonReplacer, 2);
return `
${prettyPrinted.length > maxLength ? tooLongText : ''}
${codeBlock(prettyPrinted.length > maxLength ? 'text' : 'json', prettyPrinted.length > 5_000 ? JSON.stringify(object, jsonReplacer) : prettyPrinted)}
`;
}
9 changes: 3 additions & 6 deletions src/documentation/doc-util/doc-query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import { FlowrWikiBaseRef, getFilePathMd } from './doc-files';
import type { SupportedVirtualQueryTypes } from '../../queries/virtual-query/virtual-queries';
import type { VirtualCompoundConstraint } from '../../queries/virtual-query/compound-query';
import { printDfGraphForCode } from './doc-dfg';
import { jsonWithLimit } from './doc-code';

export interface ShowQueryOptions {
readonly showCode?: boolean;
Expand All @@ -34,8 +35,6 @@ export async function showQuery<
The analysis required _${printAsMs(duration)}_ (including parsing and normalization and the query) within the generation environment.
`.trim();

const resultAsString = JSON.stringify(results, jsonReplacer, 2);

return `
\`\`\`json
Expand All @@ -47,7 +46,7 @@ ${collapseResult ? ' <details> <summary style="color:gray">Show Results</summary
_Results (prettified and summarized):_
${
asciiSummaryOfQueryResult(markdownFormatter, duration, results as QueryResults<'call-context'>, analysis)
asciiSummaryOfQueryResult(markdownFormatter, duration, results as QueryResults<SupportedQueryTypes>, analysis)
}
<details> <summary style="color:gray">Show Detailed Results as Json</summary>
Expand All @@ -57,9 +56,7 @@ ${metaInfo}
In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR.
Please consult the [Interface](${FlowrWikiBaseRef}/Interface) wiki page for more information on how to get those.
\`\`\`json
${resultAsString}
\`\`\`
${jsonWithLimit(results)}
</details>
Expand Down
6 changes: 2 additions & 4 deletions src/documentation/doc-util/doc-server-message.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { markdownFormatter } from '../../util/ansi';
import type { FlowrMessage, IdMessageBase, MessageDefinition } from '../../cli/repl/server/messages/all-messages';
import type { FakeServer, FakeSocket } from '../../../test/functionality/_helper/net';
import { withSocket } from '../../../test/functionality/_helper/net';
import { codeBlock } from './doc-code';
import { jsonWithLimit } from './doc-code';
import { printAsMs } from './doc-ms';
import { guard } from '../../util/assert';

Expand Down Expand Up @@ -64,7 +64,6 @@ export interface MessagePingPongDocumentationArguments {

function explainMsg(msg: IdMessageBase, type: 'request' | 'response', desc = '', open = false): string {
const bold: (s: string) => string = open ? s => `<b>${s}</b>` : s => s;
const msgPrettyPrint = JSON.stringify(msg, null, 2);
return `
<li> ${bold( '<code>' + msg.type + `</code> (${type})`)}
<details${open ? ' open' : ''}>
Expand All @@ -73,8 +72,7 @@ function explainMsg(msg: IdMessageBase, type: 'request' | 'response', desc = '',
${desc}
${msgPrettyPrint.length > 5_000 ? '_As the message is pretty long, we inhibit pretty printing and syntax highlighting:_' : ''}
${codeBlock(msgPrettyPrint.length > 5_000 ? 'text' : 'json', msgPrettyPrint.length > 5_000 ? JSON.stringify(msg) : msgPrettyPrint)}
${jsonWithLimit(msg)}
</details>
</li>
Expand Down
4 changes: 3 additions & 1 deletion src/documentation/print-dataflow-graph-wiki.ts
Original file line number Diff line number Diff line change
Expand Up @@ -789,7 +789,9 @@ Additionally, you may be interested in the set of [Unknown Side Effects](#unknow
> [!TIP]
> If you want to investigate the dataflow graph,
> you can either use the [Visual Studio Code extension](${FlowrGithubBaseRef}/vscode-flowr) or the ${getReplCommand('dataflow*')}
> command in the REPL (see the [Interface wiki page](${FlowrWikiBaseRef}/Interface) for more information).
> command in the REPL (see the [Interface wiki page](${FlowrWikiBaseRef}/Interface) for more information). When using _flowR_ as a library, you may use the functions in ${getFilePathMd('../util/mermaid/dfg.ts')}.
>
> If you receive a dataflow graph in its serialized form (e.g., by talking to a [_flowR_ server](${FlowrWikiBaseRef}/Interface)), you can use \`${DataflowGraph.name}::${DataflowGraph.fromJson.name}\` to retrieve the graph from the JSON representation.
${await printDfGraphForCode(shell,'x <- 3\ny <- x + 1\ny')}
Expand Down
33 changes: 28 additions & 5 deletions src/documentation/print-query-wiki.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,25 @@ import { LogLevel } from '../util/log';
import { executeQueries } from '../queries/query';
import { FlowrWikiBaseRef, getFilePathMd } from './doc-util/doc-files';
import { explainQueries, registerQueryDocumentation, showQuery, tocForQueryType } from './doc-util/doc-query';
import { CallTargets } from '../queries/call-context-query/call-context-query-format';
import { CallTargets } from '../queries/catalog/call-context-query/call-context-query-format';
import { describeSchema } from '../util/schema';
import { QueriesSchema } from '../queries/query-schema';
import { markdownFormatter } from '../util/ansi';
import { executeCallContextQueries } from '../queries/call-context-query/call-context-query-executor';
import { executeCallContextQueries } from '../queries/catalog/call-context-query/call-context-query-executor';
import { executeCompoundQueries } from '../queries/virtual-query/compound-query';
import { autoGenHeader } from './doc-util/doc-auto-gen';
import { exampleQueryCode } from './data/query/example-query-code';
import { details } from './doc-util/doc-structure';
import { codeBlock } from './doc-util/doc-code';
import { executeDataflowQuery } from '../queries/catalog/dataflow-query/dataflow-query-executor';


registerQueryDocumentation('call-context', {
name: 'Call-Context Query',
type: 'active',
shortDescription: 'Finds all calls in a set of files that matches specified criteria.',
functionName: executeCallContextQueries.name,
functionFile: '../queries/call-context-query/call-context-query-executor.ts',
functionFile: '../queries/catalog/call-context-query/call-context-query-executor.ts',
buildExplanation: async(shell: RShell) => {
return `
Call context queries may be used to identify calls to specific functions that match criteria of your interest.
Expand Down Expand Up @@ -83,6 +84,27 @@ my_test_function()
`;
}
});
registerQueryDocumentation('dataflow', {
name: 'Dataflow Query',
type: 'active',
shortDescription: 'Returns the dataflow graph of the given code.',
functionName: executeDataflowQuery.name,
functionFile: '../queries/catalog/dataflow-query/dataflow-query-executor.ts',
buildExplanation: async(shell: RShell) => {
return `
Maybe you want to handle only the result of the query execution, or you just need the [dataflow graph](${FlowrWikiBaseRef}/Dataflow%20Graph) again.
This query type does exactly that!
Using the example code from above, the following query returns the dataflow graph of the code:
${
await showQuery(shell, exampleQueryCode, [{
type: 'dataflow'
}], { showCode: true })
}
`;
}
});

registerQueryDocumentation('compound', {
name: 'Compound Query',
type: 'virtual',
Expand Down Expand Up @@ -122,7 +144,7 @@ ${
}
However, compound queries become more useful whenever common arguments can not be expressed as a union in one of their properties.
Additionally, you can still overwrite default arguments.
Additionally, you can still overwrite default arguments.
In the following, we (by default) want all calls to not resolve to a local definition, except for those to \`print\` for which we explicitly
want to resolve to a local definition:
Expand All @@ -138,12 +160,13 @@ ${
}], { showCode: false })
}
Now, the results no longer contain calls to \`plot\` that are not defined locally.
Now, the results no longer contain calls to \`plot\` that are not defined locally.
`;
}
});


async function getText(shell: RShell) {
const rversion = (await shell.usedRVersion())?.format() ?? 'unknown';
return `${autoGenHeader({ filename: module.filename, purpose: 'query API', rVersion: rversion })}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { DataflowGraph } from '../../dataflow/graph/graph';
import type { DataflowGraph } from '../../../dataflow/graph/graph';
import type {
CallContextQuery,
CallContextQueryKindResult,
Expand All @@ -7,20 +7,20 @@ import type {
SubCallContextQueryFormat
} from './call-context-query-format';
import { CallTargets } from './call-context-query-format';
import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id';
import { recoverContent } from '../../r-bridge/lang-4.x/ast/model/processing/node-id';
import { VertexType } from '../../dataflow/graph/vertex';
import { assertUnreachable } from '../../util/assert';
import { edgeIncludesType, EdgeType } from '../../dataflow/graph/edge';
import { resolveByName } from '../../dataflow/environments/resolve-by-name';
import { BuiltIn } from '../../dataflow/environments/built-in';
import type { ControlFlowGraph } from '../../util/cfg/cfg';
import { extractCFG } from '../../util/cfg/cfg';
import { TwoLayerCollector } from '../two-layer-collector';
import type { BasicQueryData } from '../query';
import { compactRecord } from '../../util/objects';
import { visitInReverseOrder } from '../../util/cfg/visitor';
import { ReferenceType } from '../../dataflow/environments/identifier';
import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id';
import { recoverContent } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id';
import { VertexType } from '../../../dataflow/graph/vertex';
import { assertUnreachable } from '../../../util/assert';
import { edgeIncludesType, EdgeType } from '../../../dataflow/graph/edge';
import { resolveByName } from '../../../dataflow/environments/resolve-by-name';
import { BuiltIn } from '../../../dataflow/environments/built-in';
import type { ControlFlowGraph } from '../../../util/cfg/cfg';
import { extractCFG } from '../../../util/cfg/cfg';
import { TwoLayerCollector } from '../../two-layer-collector';
import type { BasicQueryData } from '../../query';
import { compactRecord } from '../../../util/objects';
import { visitInReverseOrder } from '../../../util/cfg/visitor';
import { ReferenceType } from '../../../dataflow/environments/identifier';

function satisfiesCallTargets(id: NodeId, graph: DataflowGraph, callTarget: CallTargets): NodeId[] | 'no' {
const callVertex = graph.get(id);
Expand Down Expand Up @@ -211,9 +211,10 @@ function retrieveAllCallAliases(nodeId: NodeId, graph: DataflowGraph): Map<strin
* Multi-stage call context query resolve.
*
* 1. Resolve all calls in the DF graph that match the respective {@link DefaultCallContextQueryFormat#callName} regex.
* 2. Identify their respective call targets, if {@link DefaultCallContextQueryFormat#callTargets} is set to be non-any.
* 2. If there is an alias attached, consider all call traces.
* 3. Identify their respective call targets, if {@link DefaultCallContextQueryFormat#callTargets} is set to be non-any.
* This happens during the main resolution!
* 3. Attach `linkTo` calls to the respective calls.
* 4. Attach `linkTo` calls to the respective calls.
*/
export function executeCallContextQueries({ graph, ast }: BasicQueryData, queries: readonly CallContextQuery[]): CallContextQueryResult {
/* omit performance page load */
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import type { BaseQueryFormat, BaseQueryResult } from '../base-query-format';
import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id';
import type { BaseQueryFormat, BaseQueryResult } from '../../base-query-format';
import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id';

export enum CallTargets {
/** call targets a function that is not defined locally (e.g., the call targets a library function) */
Expand Down
17 changes: 17 additions & 0 deletions src/queries/catalog/dataflow-query/dataflow-query-executor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import type { BasicQueryData } from '../../query';
import type { DataflowQuery, DataflowQueryResult } from './dataflow-query-format';
import { log } from '../../../util/log';


export function executeDataflowQuery({ graph }: BasicQueryData, queries: readonly DataflowQuery[]): DataflowQueryResult {
if(queries.length !== 1) {
log.warn('Dataflow query expects only up to one query, but got', queries.length);
}
return {
'.meta': {
/* there is no sense in measuring a get */
timing: 0
},
graph
};
}
14 changes: 14 additions & 0 deletions src/queries/catalog/dataflow-query/dataflow-query-format.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import type { BaseQueryFormat, BaseQueryResult } from '../../base-query-format';
import type { DataflowGraph } from '../../../dataflow/graph/graph';

/**
* Simple re-returns the dataflow graph of the analysis.
*/
export interface DataflowQuery extends BaseQueryFormat {
readonly type: 'dataflow';
}

export interface DataflowQueryResult extends BaseQueryResult {
/** Please be aware that this is the graph in its JSON representation, use {@link DataflowGraph#fromJson} if the result is serialized */
readonly graph: DataflowGraph;
}
9 changes: 7 additions & 2 deletions src/queries/query-schema.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import Joi from 'joi';
import { CallTargets } from './call-context-query/call-context-query-format';
import { CallTargets } from './catalog/call-context-query/call-context-query-format';

export const CallContextQuerySchema = Joi.object({
type: Joi.string().valid('call-context').required().description('The type of the query.'),
Expand All @@ -14,8 +14,13 @@ export const CallContextQuerySchema = Joi.object({
}).optional().description('Links the current call to the last call of the given kind. This way, you can link a call like `points` to the latest graphics plot etc.')
}).description('Call context query used to find calls in the dataflow graph');

export const DataflowQuerySchema = Joi.object({
type: Joi.string().valid('dataflow').required().description('The type of the query.'),
}).description('The dataflow query simply returns the dataflow graph, there is no need to pass it multiple times!');

export const SupportedQueriesSchema = Joi.alternatives(
CallContextQuerySchema
CallContextQuerySchema,
DataflowQuerySchema
).description('Supported queries');

export const CompoundQuerySchema = Joi.object({
Expand Down
17 changes: 10 additions & 7 deletions src/queries/query.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
import type { CallContextQuery } from './call-context-query/call-context-query-format';
import type { CallContextQuery } from './catalog/call-context-query/call-context-query-format';
import type { DataflowGraph } from '../dataflow/graph/graph';
import type { BaseQueryFormat, BaseQueryResult } from './base-query-format';
import { executeCallContextQueries } from './call-context-query/call-context-query-executor';
import { executeCallContextQueries } from './catalog/call-context-query/call-context-query-executor';
import { guard } from '../util/assert';
import type { VirtualQueryArgumentsWithType } from './virtual-query/virtual-queries';
import { SupportedVirtualQueries } from './virtual-query/virtual-queries';
import type { Writable } from 'ts-essentials';
import type { VirtualCompoundConstraint } from './virtual-query/compound-query';
import type { NormalizedAst } from '../r-bridge/lang-4.x/ast/model/processing/decorate';
import { executeDataflowQuery } from './catalog/dataflow-query/dataflow-query-executor';
import type { DataflowQuery } from './catalog/dataflow-query/dataflow-query-format';

export type Query = CallContextQuery;
export type Query = CallContextQuery | DataflowQuery;

export type QueryArgumentsWithType<QueryType extends BaseQueryFormat['type']> = Query & { type: QueryType };

Expand All @@ -26,7 +28,8 @@ type SupportedQueries = {
}

export const SupportedQueries = {
'call-context': executeCallContextQueries
'call-context': executeCallContextQueries,
'dataflow': executeDataflowQuery
} as const satisfies SupportedQueries;

export type SupportedQueryTypes = keyof typeof SupportedQueries;
Expand All @@ -38,7 +41,7 @@ export function executeQueriesOfSameType<SpecificQuery extends Query>(data: Basi
guard(queries.every(q => q.type === queries[0].type), 'All queries must have the same type');
const executor = SupportedQueries[queries[0].type];
guard(executor !== undefined, `Unsupported query type: ${queries[0].type}`);
return executor(data, queries) as QueryResult<SpecificQuery['type']>;
return executor(data, queries as never) as QueryResult<SpecificQuery['type']>;
}

function isVirtualQuery<
Expand Down Expand Up @@ -67,7 +70,7 @@ function groupQueriesByType<
addQuery(subQuery);
}
} else {
addQuery(query as Query);
addQuery(query);
}
}
return grouped;
Expand All @@ -80,7 +83,7 @@ export type QueryResults<Base extends SupportedQueryTypes> = {


type OmitFromValues<T, K extends string | number | symbol> = {
[P in keyof T]: Omit<T[P], K>
[P in keyof T]?: Omit<T[P], K>
}

export type QueryResultsWithoutMeta<Queries extends Query> = OmitFromValues<Omit<QueryResults<Queries['type']>, '.meta'>, '.meta'>;
Expand Down
Loading

2 comments on commit 32d51e8

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"artificial" Benchmark Suite

Benchmark suite Current: 32d51e8 Previous: 9d8b361 Ratio
Retrieve AST from R code 236.48823454545453 ms (100.95124305461889) 238.12351204545453 ms (103.59720843756357) 0.99
Normalize R AST 18.6814945 ms (32.43041376202251) 19.968034227272728 ms (34.84298543847825) 0.94
Produce dataflow information 39.218006409090904 ms (83.23010785098833) 38.310942090909094 ms (82.04448044777155) 1.02
Total per-file 814.1409290454545 ms (1464.0122828304495) 811.1703915909092 ms (1431.4404310276739) 1.00
Static slicing 2.297206455224587 ms (1.5987465399086547) 2.258090287874194 ms (1.2792808105316449) 1.02
Reconstruct code 0.23958454615379787 ms (0.19299648466878672) 0.22489327849282828 ms (0.17585774592637268) 1.07
Total per-slice 2.552788396221629 ms (1.670613024578578) 2.4996261233332735 ms (1.3278746913052974) 1.02
failed to reconstruct/re-parse 0 # 0 # 1
times hit threshold 0 # 0 # 1
reduction (characters) 0.7869360165281424 # 0.7869360165281424 # 1
reduction (normalized tokens) 0.7639690077689504 # 0.7639690077689504 # 1
memory (df-graph) 95.46617542613636 KiB (244.77619956879823) 147.42458274147728 KiB (358.6827375397903) 0.65

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"social-science" Benchmark Suite

Benchmark suite Current: 32d51e8 Previous: 9d8b361 Ratio
Retrieve AST from R code 237.28889544 ms (43.975090915089936) 238.40722376 ms (42.95412443307438) 1.00
Normalize R AST 20.72894466 ms (15.712860099581313) 22.0872248 ms (17.016890594916376) 0.94
Produce dataflow information 76.10204879999999 ms (87.85498320596544) 74.60461736 ms (88.95210983454488) 1.02
Total per-file 7716.47521212 ms (28900.2856987741) 11091.201449639999 ms (52310.41942604725) 0.70
Static slicing 16.07740446755544 ms (44.1702473342068) 22.047137876062838 ms (78.30877993604865) 0.73
Reconstruct code 0.23467998152275588 ms (0.15163252847745484) 0.2327517832436913 ms (0.14954480815603388) 1.01
Total per-slice 16.32001826234056 ms (44.19572290555723) 22.287796325154986 ms (78.33211951742135) 0.73
failed to reconstruct/re-parse 0 # 0 # 1
times hit threshold 0 # 0 # 1
reduction (characters) 0.8712997340230448 # 0.8719618340615195 # 1.00
reduction (normalized tokens) 0.8102441553774778 # 0.810633662275233 # 1.00
memory (df-graph) 99.8990234375 KiB (113.72812769327498) 145.6434765625 KiB (153.49028997815503) 0.69

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.