Skip to content

Commit

Permalink
[Query API] Slice Query (#1073)
Browse files Browse the repository at this point in the history
* feat(slice-query): base slice query implementation

* doc(slice-query): document the slice query
  • Loading branch information
EagleoutIce authored Oct 12, 2024
1 parent 9f778ba commit fb720ff
Show file tree
Hide file tree
Showing 13 changed files with 464 additions and 43 deletions.
21 changes: 21 additions & 0 deletions src/cli/repl/commands/repl-query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import { normalizedAstToMermaidUrl } from '../../../util/mermaid/ast';

import { printAsMs } from '../../../util/time';
import { textWithTooltip } from '../../../documentation/doc-util/doc-hover-over';
import type { StaticSliceQuery } from '../../../queries/catalog/static-slice-query/static-slice-query-format';

async function getDataflow(shell: RShell, remainingLine: string) {
return await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, {
Expand Down Expand Up @@ -162,6 +163,26 @@ export function asciiSummaryOfQueryResult(formatter: OutputFormatter, totalInMs:
result.push(`Query: ${bold(query, formatter)} (${printAsMs(out['.meta'].timing, 0)})`);
result.push(` ╰ [Normalized AST](${normalizedAstToMermaidUrl(out.normalized.ast)})`);
continue;
} else if(query === 'static-slice') {
const out = queryResults as QueryResults<'static-slice'>['static-slice'];
result.push(`Query: ${bold(query, formatter)} (${printAsMs(out['.meta'].timing, 0)})`);
for(const [fingerprint, obj] of Object.entries(out.results)) {
const { criteria, noMagicComments, noReconstruction } = JSON.parse(fingerprint) as StaticSliceQuery;
const addons = [];
if(noReconstruction) {
addons.push('no reconstruction');
}
if(noMagicComments) {
addons.push('no magic comments');
}
result.push(` ╰ Slice for {${criteria.join(', ')}} ${addons.join(', ')}`);
if('reconstruct' in obj) {
result.push(' ╰ Code (newline as <code>&#92;n</code>): <code>' + obj.reconstruct.code.split('\n').join('\\n') + '</code>');
} else {
result.push(` ╰ Id List: {${summarizeIdsIfTooLong([...obj.slice.result])}}`);
}
}
continue;
} else if(query === 'dataflow-cluster') {
const out = queryResults as QueryResults<'dataflow-cluster'>['dataflow-cluster'];
result.push(`Query: ${bold(query, formatter)} (${out['.meta'].timing.toFixed(0)}ms)`);
Expand Down
1 change: 1 addition & 0 deletions src/core/steps/pipeline/default-pipelines.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { NAIVE_RECONSTRUCT } from '../all/static-slicing/10-reconstruct';

export const DEFAULT_SLICING_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW, STATIC_SLICE, NAIVE_RECONSTRUCT);
export const DEFAULT_SLICE_AND_RECONSTRUCT_PIPELINE = DEFAULT_SLICING_PIPELINE;
export const DEFAULT_SLICE_WITHOUT_RECONSTRUCT_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW, STATIC_SLICE);

export const DEFAULT_DATAFLOW_PIPELINE = createPipeline(PARSE_WITH_R_SHELL_STEP, NORMALIZE, STATIC_DATAFLOW);

Expand Down
10 changes: 8 additions & 2 deletions src/documentation/data/server/doc-data-server-messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ import {
import { exampleQueryCode } from '../query/example-query-code';
import { CallTargets } from '../../../queries/catalog/call-context-query/call-context-query-format';
import { requestLineageMessage, responseLineageMessage } from '../../../cli/repl/server/messages/message-lineage';
import { block } from '../../doc-util/doc-structure';

export function documentAllMessages() {
export function documentAllServerMessages() {

documentServerMessage({
title: 'Hello',
Expand Down Expand Up @@ -244,9 +245,14 @@ While the context is derived from the \`filename\`, we currently offer no way to
end
deactivate Server
`,
shortDescription: 'The server slices a file based on the given criteria.',
shortDescription: `([DEPRECATED](${FlowrWikiBaseRef}/Query%20API)) The server slices a file based on the given criteria.`,
text: async(shell: RShell) => {
return `
${block({
type: 'WARNING',
content: `We deprecated the slice request in favor of the \`static-slice\` [Query](${FlowrWikiBaseRef}/Query%20API).`
})}
To slice, you have to send a file analysis request first. The \`filetoken\` you assign is of use here as you can re-use it to repeatedly slice the same file.
Besides that, you only need to add an array of slicing criteria, using one of the formats described on the [terminology wiki page](${FlowrWikiBaseRef}/Terminology#slicing-criterion)
(however, instead of using \`;\`, you can simply pass separate array elements).
Expand Down
4 changes: 2 additions & 2 deletions src/documentation/print-interface-wiki.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { FlowrGithubBaseRef, FlowrNpmRef, FlowrWikiBaseRef, getFileContentFromRo
import { autoGenHeader } from './doc-util/doc-auto-gen';
import { getCliLongOptionOf, getReplCommand, multipleCliOptions } from './doc-util/doc-cli-option';
import { printServerMessages } from './doc-util/doc-server-message';
import { documentAllMessages } from './data/server/doc-data-server-messages';
import { documentAllServerMessages } from './data/server/doc-data-server-messages';
import { codeBlock } from './doc-util/doc-code';
import type { FileAnalysisRequestMessage } from '../cli/repl/server/messages/message-analysis';
import { fileProtocol, removeRQuotes, requestFromInput } from '../r-bridge/retriever';
Expand All @@ -21,7 +21,7 @@ import { NewIssueUrl } from './doc-util/doc-issue';
import { PipelineExecutor } from '../core/pipeline-executor';

async function explainServer(shell: RShell): Promise<string> {
documentAllMessages();
documentAllServerMessages();

return `
As explained in the [Overview](${FlowrWikiBaseRef}/Overview), you can simply run the [TCP](https://de.wikipedia.org/wiki/Transmission_Control_Protocol)&nbsp;server by adding the ${getCliLongOptionOf('flowr', 'server', true)} flag (and, due to the interactive mode, exit with the conventional <kbd>CTRL</kbd>+<kbd>C</kbd>).
Expand Down
46 changes: 46 additions & 0 deletions src/documentation/print-query-wiki.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import { executeDataflowQuery } from '../queries/catalog/dataflow-query/dataflow
import { executeIdMapQuery } from '../queries/catalog/id-map-query/id-map-query-executor';
import { executeNormalizedAstQuery } from '../queries/catalog/normalized-ast-query/normalized-ast-query-executor';
import { executeDataflowClusterQuery } from '../queries/catalog/cluster-query/cluster-query-executor';
import { executeStaticSliceClusterQuery } from '../queries/catalog/static-slice-query/static-slice-query-executor';


registerQueryDocumentation('call-context', {
Expand Down Expand Up @@ -246,6 +247,51 @@ Now, the results no longer contain calls to \`plot\` that are not defined locall
});


registerQueryDocumentation('static-slice', {
name: 'Static Slice Query',
type: 'active',
shortDescription: 'Slice the dataflow graph reducing the code to just the parts relevant for the given criteria.',
functionName: executeStaticSliceClusterQuery.name,
functionFile: '../queries/catalog/static-slice-query/static-slice-query-executor.ts',
buildExplanation: async(shell: RShell) => {
const exampleCode = 'x <- 1\ny <- 2\nx';
return `
To slice, _flowR_ needs one thing from you: a variable or a list of variables (function calls are supported to, referring to the anonymous
return of the call) that you want to slice the dataflow graph for.
Given this, the slice is essentially the subpart of the program that may influence the value of the variables you are interested in.
To specify a variable of interest, you have to present flowR with a [slicing criterion](${FlowrWikiBaseRef}/Terminology#slicing-criterion) (or, respectively, an array of them).
To exemplify the capabilities, consider the following code:
${codeBlock('r', exampleCode)}
If you are interested in the parts required for the use of \`x\` in the last line, you can use the following query:
${
await showQuery(shell, exampleCode, [{
type: 'static-slice',
criteria: ['3@x']
}], { showCode: false })
}
In general you may be uninterested in seeing the reconstructed version and want to save some computation time, for this,
you can use the \`noReconstruction\` flag.
${
details('No Reconstruction Example',
await showQuery(shell, exampleCode, [{
type: 'static-slice',
criteria: ['3@x'],
noReconstruction: true
}], { showCode: false })
)
}
You can disable [magic comments](${FlowrWikiBaseRef}/Interface#slice-magic-comments) using the \`noMagicComments\` flag.
This query replaces the old [\`request-slice\`](${FlowrWikiBaseRef}/Interface#message-request-slice) message.
`;
}
});


async function getText(shell: RShell) {
const rversion = (await shell.usedRVersion())?.format() ?? 'unknown';
return `${autoGenHeader({ filename: module.filename, purpose: 'query API', rVersion: rversion })}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ function satisfiesCallTargets(id: NodeId, graph: DataflowGraph, callTarget: Call
* including any potential built-in mapping.
*/
const reResolved = resolveByName(info.name, info.environment, ReferenceType.Unknown);
if(reResolved && reResolved.some(t => t.definedAt === BuiltIn)) {
if(reResolved?.some(t => t.definedAt === BuiltIn)) {
builtIn = true;
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import type { BasicQueryData } from '../../query';
import type { StaticSliceQuery, StaticSliceQueryResult } from './static-slice-query-format';
import { staticSlicing } from '../../../slicing/static/static-slicer';
import { reconstructToCode } from '../../../reconstruct/reconstruct';
import { doNotAutoSelect } from '../../../reconstruct/auto-select/auto-select-defaults';
import { makeMagicCommentHandler } from '../../../reconstruct/auto-select/magic-comments';
import { log } from '../../../util/log';

export function fingerPrintOfQuery(query: StaticSliceQuery): string {
return JSON.stringify(query);
}

export function executeStaticSliceClusterQuery({ graph, ast }: BasicQueryData, queries: readonly StaticSliceQuery[]): StaticSliceQueryResult {
const start = Date.now();
const results: StaticSliceQueryResult['results'] = {};
for(const query of queries) {
const key = fingerPrintOfQuery(query);
if(results[key]) {
log.warn(`Duplicate Key for slicing-query: ${key}, skipping...`);
}
const { criteria, noReconstruction, noMagicComments } = query;
const slice = staticSlicing(graph, ast, criteria);
if(noReconstruction) {
results[key] = { slice };
} else {
results[key] = {
slice,
reconstruct: reconstructToCode(ast, slice.result, noMagicComments ? doNotAutoSelect : makeMagicCommentHandler(doNotAutoSelect))
};
}
}
return {
'.meta': {
timing: Date.now() - start
},
results
};
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import type { BaseQueryFormat, BaseQueryResult } from '../../base-query-format';
import type { PipelineOutput } from '../../../core/steps/pipeline/pipeline';
import type {
DEFAULT_DATAFLOW_PIPELINE, DEFAULT_SLICE_WITHOUT_RECONSTRUCT_PIPELINE,
DEFAULT_SLICING_PIPELINE
} from '../../../core/steps/pipeline/default-pipelines';
import type { SlicingCriteria } from '../../../slicing/criterion/parse';

/** Calculates and returns all clusters encountered in the dataflow graph. */
export interface StaticSliceQuery extends BaseQueryFormat {
readonly type: 'static-slice';
/** The slicing criteria to use */
readonly criteria: SlicingCriteria,
/** do not reconstruct the slice into readable code */
readonly noReconstruction?: boolean;
/** Should the magic comments (force-including lines within the slice) be ignored? */
readonly noMagicComments?: boolean
}

export interface StaticSliceQueryResult extends BaseQueryResult {
/**
* only contains the results of the slice steps to not repeat ourselves, this does not contain the reconstruction
* if you set the {@link SliceQuery#noReconstruction|noReconstruction} flag.
*
* The keys are serialized versions of the used queries (i.e., the result of `JSON.stringify`).
* This implies that multiple slice queries with the same query configuration will _not_ be re-executed.
*/
results: Record<string,
Omit<PipelineOutput<typeof DEFAULT_SLICING_PIPELINE>, keyof PipelineOutput<typeof DEFAULT_DATAFLOW_PIPELINE>> |
Omit<PipelineOutput<typeof DEFAULT_SLICE_WITHOUT_RECONSTRUCT_PIPELINE>, keyof PipelineOutput<typeof DEFAULT_DATAFLOW_PIPELINE>>
>
}
10 changes: 9 additions & 1 deletion src/queries/query-schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,21 @@ export const DataflowClusterQuerySchema = Joi.object({
type: Joi.string().valid('dataflow-cluster').required().description('The type of the query.'),
}).description('The cluster query calculates and returns all clusters in the dataflow graph.');

export const StaticSliceQuerySchema = Joi.object({
type: Joi.string().valid('static-slice').required().description('The type of the query.'),
criteria: Joi.array().items(Joi.string()).min(0).required().description('The slicing criteria to use.'),
noReconstruction: Joi.boolean().optional().description('Do not reconstruct the slice into readable code.'),
noMagicComments: Joi.boolean().optional().description('Should the magic comments (force-including lines within the slice) be ignored?')
}).description('Slice query used to slice the dataflow graph');


export const SupportedQueriesSchema = Joi.alternatives(
CallContextQuerySchema,
DataflowQuerySchema,
IdMapQuerySchema,
NormalizedAstQuerySchema,
DataflowClusterQuerySchema
DataflowClusterQuerySchema,
StaticSliceQuerySchema
).description('Supported queries');

export const CompoundQuerySchema = Joi.object({
Expand Down
5 changes: 4 additions & 1 deletion src/queries/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ import { executeNormalizedAstQuery } from './catalog/normalized-ast-query/normal
import type { NormalizedAstQuery } from './catalog/normalized-ast-query/normalized-ast-query-format';
import type { DataflowClusterQuery } from './catalog/cluster-query/cluster-query-format';
import { executeDataflowClusterQuery } from './catalog/cluster-query/cluster-query-executor';
import type { StaticSliceQuery } from './catalog/static-slice-query/static-slice-query-format';
import { executeStaticSliceClusterQuery } from './catalog/static-slice-query/static-slice-query-executor';

export type Query = CallContextQuery | DataflowQuery | NormalizedAstQuery | IdMapQuery | DataflowClusterQuery;
export type Query = CallContextQuery | DataflowQuery | NormalizedAstQuery | IdMapQuery | DataflowClusterQuery | StaticSliceQuery;

export type QueryArgumentsWithType<QueryType extends BaseQueryFormat['type']> = Query & { type: QueryType };

Expand All @@ -39,6 +41,7 @@ export const SupportedQueries = {
'id-map': executeIdMapQuery,
'normalized-ast': executeNormalizedAstQuery,
'dataflow-cluster': executeDataflowClusterQuery,
'static-slice': executeStaticSliceClusterQuery
} as const satisfies SupportedQueries;

export type SupportedQueryTypes = keyof typeof SupportedQueries;
Expand Down
10 changes: 7 additions & 3 deletions test/functionality/_helper/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,12 @@ function normalizeResults<Queries extends Query>(result: QueryResults<Queries['t
export function assertQuery<
Queries extends Query,
VirtualArguments extends VirtualCompoundConstraint<Queries['type']> = VirtualCompoundConstraint<Queries['type']>
>(name: string | TestLabel, shell: RShell, code: string, queries: readonly (Queries | VirtualQueryArgumentsWithType<Queries['type'], VirtualArguments>)[], expected:
QueryResultsWithoutMeta<Queries> | ((info: PipelineOutput<typeof DEFAULT_DATAFLOW_PIPELINE>) => QueryResultsWithoutMeta<Queries>)
>(
name: string | TestLabel,
shell: RShell,
code: string,
queries: readonly (Queries | VirtualQueryArgumentsWithType<Queries['type'], VirtualArguments>)[],
expected: QueryResultsWithoutMeta<Queries> | ((info: PipelineOutput<typeof DEFAULT_DATAFLOW_PIPELINE>) => (QueryResultsWithoutMeta<Queries> | Promise<QueryResultsWithoutMeta<Queries>>))
) {
const effectiveName = decorateLabelContext(name, ['query']);

Expand All @@ -68,7 +72,7 @@ export function assertQuery<
/* expect them to be deeply equal */
try {
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
const expectedNormalized = typeof expected === 'function' ? expected(info) : expected;
const expectedNormalized = typeof expected === 'function' ? await expected(info) : expected;
assert.deepStrictEqual(normalized, expectedNormalized, 'The result of the call context query does not match the expected result');
} catch(e: unknown) {
console.error('Dataflow-Graph', dataflowGraphToMermaidUrl(info.dataflow));
Expand Down
51 changes: 51 additions & 0 deletions test/functionality/dataflow/query/slice-query-tests.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import { assertQuery } from '../../_helper/query';
import { label } from '../../_helper/label';
import { withShell } from '../../_helper/shell';
import type {
StaticSliceQuery,
StaticSliceQueryResult
} from '../../../../src/queries/catalog/static-slice-query/static-slice-query-format';
import { fingerPrintOfQuery } from '../../../../src/queries/catalog/static-slice-query/static-slice-query-executor';
import { PipelineExecutor } from '../../../../src/core/pipeline-executor';
import {
DEFAULT_SLICE_WITHOUT_RECONSTRUCT_PIPELINE,
DEFAULT_SLICING_PIPELINE
} from '../../../../src/core/steps/pipeline/default-pipelines';
import { requestFromInput } from '../../../../src/r-bridge/retriever';
import { doNotAutoSelect } from '../../../../src/reconstruct/auto-select/auto-select-defaults';
import { makeMagicCommentHandler } from '../../../../src/reconstruct/auto-select/magic-comments';

describe('Static Slice Query', withShell(shell => {
function testQuery(name: string, code: string, queries: readonly StaticSliceQuery[]) {
assertQuery(label(name), shell, code, queries, async() => {
const results: StaticSliceQueryResult['results'] = {};
for(const query of queries) {
const out = await new PipelineExecutor(query.noReconstruction ? DEFAULT_SLICE_WITHOUT_RECONSTRUCT_PIPELINE : DEFAULT_SLICING_PIPELINE, {
shell: shell,
request: requestFromInput(code),
criterion: query.criteria,
autoSelectIf: query.noMagicComments ? doNotAutoSelect : makeMagicCommentHandler(doNotAutoSelect)
}).allRemainingSteps();
const key = fingerPrintOfQuery(query);
results[key] = query.noReconstruction ? { slice: out.slice } : { slice: out.slice, reconstruct: out.reconstruct };
}

return {
'static-slice': {
results
}
};
});
}

const baseQuery: StaticSliceQuery = { type: 'static-slice', criteria: ['1@x'] };
describe('With Reconstruction', () => {
testQuery('Single Expression', 'x + 1', [baseQuery]);
testQuery('Multiple Queries', 'x + 1', [baseQuery, baseQuery, baseQuery]);
});
const noReconstructQuery: StaticSliceQuery = { type: 'static-slice', criteria: ['1@x'], noReconstruction: true };
describe('Without Reconstruction', () => {
testQuery('Single Expression (No Reconstruct)', 'x + 1', [noReconstructQuery]);
testQuery('Multiple Queries (No Reconstruct)', 'x + 1', [noReconstructQuery, noReconstructQuery, noReconstructQuery]);
});
}));
Loading

2 comments on commit fb720ff

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"artificial" Benchmark Suite

Benchmark suite Current: fb720ff Previous: 9d8b361 Ratio
Retrieve AST from R code 239.1878134090909 ms (100.07016011716344) 238.12351204545453 ms (103.59720843756357) 1.00
Normalize R AST 18.289318045454547 ms (31.42206962413676) 19.968034227272728 ms (34.84298543847825) 0.92
Produce dataflow information 39.44545631818182 ms (83.21461285961888) 38.310942090909094 ms (82.04448044777155) 1.03
Total per-file 818.9351105 ms (1463.0636513006111) 811.1703915909092 ms (1431.4404310276739) 1.01
Static slicing 2.136388743223465 ms (1.3517004805025001) 2.258090287874194 ms (1.2792808105316449) 0.95
Reconstruct code 0.23223599750638615 ms (0.17812559248692914) 0.22489327849282828 ms (0.17585774592637268) 1.03
Total per-slice 2.383691379856576 ms (1.423209995731651) 2.4996261233332735 ms (1.3278746913052974) 0.95
failed to reconstruct/re-parse 0 # 0 # 1
times hit threshold 0 # 0 # 1
reduction (characters) 0.7869360165281424 # 0.7869360165281424 # 1
reduction (normalized tokens) 0.7639690077689504 # 0.7639690077689504 # 1
memory (df-graph) 95.46617542613636 KiB (244.77619956879823) 147.42458274147728 KiB (358.6827375397903) 0.65

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"social-science" Benchmark Suite

Benchmark suite Current: fb720ff Previous: 9d8b361 Ratio
Retrieve AST from R code 237.497045 ms (43.583564030948246) 238.40722376 ms (42.95412443307438) 1.00
Normalize R AST 20.6991038 ms (15.360338777622799) 22.0872248 ms (17.016890594916376) 0.94
Produce dataflow information 75.26545594 ms (87.93598631340696) 74.60461736 ms (88.95210983454488) 1.01
Total per-file 7671.73540802 ms (28654.839983725476) 11091.201449639999 ms (52310.41942604725) 0.69
Static slicing 15.936251020016991 ms (43.79666198137641) 22.047137876062838 ms (78.30877993604865) 0.72
Reconstruct code 0.24095534033824254 ms (0.14875106021074844) 0.2327517832436913 ms (0.14954480815603388) 1.04
Total per-slice 16.184893299955597 ms (43.822625551525974) 22.287796325154986 ms (78.33211951742135) 0.73
failed to reconstruct/re-parse 0 # 0 # 1
times hit threshold 0 # 0 # 1
reduction (characters) 0.8712997340230448 # 0.8719618340615195 # 1.00
reduction (normalized tokens) 0.8102441553774778 # 0.810633662275233 # 1.00
memory (df-graph) 99.8990234375 KiB (113.72812769327498) 145.6434765625 KiB (153.49028997815503) 0.69

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.