diff --git a/src/cli/repl/commands/repl-query.ts b/src/cli/repl/commands/repl-query.ts index bd4ed54c076..6875d19b72b 100644 --- a/src/cli/repl/commands/repl-query.ts +++ b/src/cli/repl/commands/repl-query.ts @@ -76,7 +76,7 @@ async function processQueryArgs(line: string, parser: KnownParser, output: ReplO const processed = await getDataflow(config, parser, args.join(' ')); return { parsedQuery, - query: await Promise.resolve(executeQueries({ dataflow: processed.dataflow, ast: processed.normalize, config }, parsedQuery)), + query: await Promise.resolve(executeQueries({ parse: processed.parse, dataflow: processed.dataflow, ast: processed.normalize, config }, parsedQuery)), processed }; } diff --git a/src/cli/repl/server/connection.ts b/src/cli/repl/server/connection.ts index 112a26d2cbe..09513e6999d 100644 --- a/src/cli/repl/server/connection.ts +++ b/src/cli/repl/server/connection.ts @@ -375,10 +375,11 @@ export class FlowRServerConnection { return; } - const { dataflow: dfg, normalize: ast } = fileInformation.pipeline.getResults(true); + const { parse, dataflow: dfg, normalize: ast } = fileInformation.pipeline.getResults(true); + guard(parse !== undefined, `Parse results must be present (request: ${request.filetoken})`); guard(dfg !== undefined, `Dataflow graph must be present (request: ${request.filetoken})`); guard(ast !== undefined, `AST must be present (request: ${request.filetoken})`); - void Promise.resolve(executeQueries({ dataflow: dfg, ast, config: this.config }, request.query)).then(results => { + void Promise.resolve(executeQueries({ parse, dataflow: dfg, ast, config: this.config }, request.query)).then(results => { sendMessage(this.socket, { type: 'response-query', id: request.id, diff --git a/src/core/steps/all/core/01-parse-tree-sitter.ts b/src/core/steps/all/core/01-parse-tree-sitter.ts index 32111527487..33611d486fb 100644 --- a/src/core/steps/all/core/01-parse-tree-sitter.ts +++ b/src/core/steps/all/core/01-parse-tree-sitter.ts @@ -5,18 +5,6 @@ import type { Tree } from 'web-tree-sitter'; import type { ParseRequiredInput } from '../../../../r-bridge/parser'; import { parseRequests } from '../../../../r-bridge/parser'; -export interface ParseStepOutputTS { - readonly parsed: Tree -} - -export interface TreeSitterParseJson { - readonly '.meta': { - readonly tokenCount: number, - readonly tokenCountNoComments: number - }, - readonly str: string -} - export const PARSE_WITH_TREE_SITTER_STEP = { name: 'parse', humanReadableName: 'parse with tree-sitter', diff --git a/src/core/steps/all/core/11-normalize-tree-sitter.ts b/src/core/steps/all/core/11-normalize-tree-sitter.ts index 70a53299f2d..199b7794bc5 100644 --- a/src/core/steps/all/core/11-normalize-tree-sitter.ts +++ b/src/core/steps/all/core/11-normalize-tree-sitter.ts @@ -11,11 +11,12 @@ import type { DeepReadonly } from 'ts-essentials'; import { normalizeTreeSitter } from '../../../../r-bridge/lang-4.x/ast/parser/json/parser'; import type { NormalizeRequiredInput } from './10-normalize'; import { getCurrentRequestFile } from './10-normalize'; -import type { ParseStepOutputTS } from './01-parse-tree-sitter'; import type { FlowrConfigOptions } from '../../../../config'; +import type { ParseStepOutput } from '../../../../r-bridge/parser'; +import type { Tree } from 'web-tree-sitter'; -function processor(results: { 'parse'?: ParseStepOutputTS }, input: Partial, config: FlowrConfigOptions) { - return normalizeTreeSitter(results['parse'] as ParseStepOutputTS, input.getId, config, input.overwriteFilePath ?? getCurrentRequestFile(input.request)); +function processor(results: { 'parse'?: ParseStepOutput }, input: Partial, config: FlowrConfigOptions) { + return normalizeTreeSitter(results['parse'] as ParseStepOutput, input.getId, config, input.overwriteFilePath ?? getCurrentRequestFile(input.request)); } export const NORMALIZE_TREE_SITTER = { diff --git a/src/documentation/doc-util/doc-query.ts b/src/documentation/doc-util/doc-query.ts index 9631c7c483f..83faa921504 100644 --- a/src/documentation/doc-util/doc-query.ts +++ b/src/documentation/doc-util/doc-query.ts @@ -34,6 +34,7 @@ export async function showQuery< request: requestFromInput(code) }, defaultConfigOptions).allRemainingSteps(); const results = await Promise.resolve(executeQueries({ + parse: analysis.parse, dataflow: analysis.dataflow, ast: analysis.normalize, config: cloneConfig(defaultConfigOptions) diff --git a/src/linter/linter-executor.ts b/src/linter/linter-executor.ts index ed932ce6ab7..a9f6b7c0235 100644 --- a/src/linter/linter-executor.ts +++ b/src/linter/linter-executor.ts @@ -7,8 +7,9 @@ import { runSearch } from '../search/flowr-search-executor'; import type { DeepPartial } from 'ts-essentials'; import { deepMergeObject } from '../util/objects'; import type { FlowrConfigOptions } from '../config'; +import type { KnownParserType, ParseStepOutput } from '../r-bridge/parser'; -export function executeLintingRule(ruleName: Name, input: { normalize: NormalizedAst, dataflow: DataflowInformation, config: FlowrConfigOptions }, lintingRuleConfig?: DeepPartial>): LintingResults { +export function executeLintingRule(ruleName: Name, input: { parse: ParseStepOutput, normalize: NormalizedAst, dataflow: DataflowInformation, config: FlowrConfigOptions }, lintingRuleConfig?: DeepPartial>): LintingResults { try { const rule = LintingRules[ruleName] as unknown as LintingRule, LintingRuleMetadata, LintingRuleConfig>; const fullConfig = deepMergeObject>(rule.info.defaultConfig, lintingRuleConfig); diff --git a/src/queries/base-query-format.ts b/src/queries/base-query-format.ts index 5522d35bbeb..e9a024ec37c 100644 --- a/src/queries/base-query-format.ts +++ b/src/queries/base-query-format.ts @@ -2,6 +2,7 @@ import type { NormalizedAst } from '../r-bridge/lang-4.x/ast/model/processing/de import type { DataflowInformation } from '../dataflow/info'; import type { FlowrConfigOptions } from '../config'; import type { SemVer } from 'semver'; +import type { KnownParserType } from '../r-bridge/parser'; export interface BaseQueryFormat { /** used to select the query type :) */ @@ -17,8 +18,9 @@ export interface BaseQueryResult { } export interface BasicQueryData { - readonly lib?: Record; - readonly ast: NormalizedAst; - readonly dataflow: DataflowInformation; - readonly config: FlowrConfigOptions; + readonly lib?: Record; + readonly parse: { parsed: KnownParserType }; + readonly ast: NormalizedAst; + readonly dataflow: DataflowInformation; + readonly config: FlowrConfigOptions; } diff --git a/src/queries/catalog/linter-query/linter-query-executor.ts b/src/queries/catalog/linter-query/linter-query-executor.ts index 985c0056420..5968f7a51d2 100644 --- a/src/queries/catalog/linter-query/linter-query-executor.ts +++ b/src/queries/catalog/linter-query/linter-query-executor.ts @@ -6,7 +6,7 @@ import { log } from '../../../util/log'; import type { ConfiguredLintingRule } from '../../../linter/linter-format'; import { executeLintingRule } from '../../../linter/linter-executor'; -export function executeLinterQuery({ ast, dataflow, config }: BasicQueryData, queries: readonly LinterQuery[]): LinterQueryResult { +export function executeLinterQuery({ parse, ast, dataflow, config }: BasicQueryData, queries: readonly LinterQuery[]): LinterQueryResult { const flattened = queries.flatMap(q => q.rules ?? (Object.keys(LintingRules) as LintingRuleNames[])); const distinct = new Set(flattened); if(distinct.size !== flattened.length) { @@ -18,7 +18,7 @@ export function executeLinterQuery({ ast, dataflow, config }: BasicQueryData, qu const start = Date.now(); - const input = { normalize: ast, dataflow, config }; + const input = { parse, normalize: ast, dataflow, config }; for(const entry of distinct) { const ruleName = typeof entry === 'string' ? entry : entry.name; results.results[ruleName] = executeLintingRule(ruleName, input, (entry as ConfiguredLintingRule)?.config); diff --git a/src/queries/catalog/search-query/search-query-executor.ts b/src/queries/catalog/search-query/search-query-executor.ts index 95655e549a5..4e1d0b9bf74 100644 --- a/src/queries/catalog/search-query/search-query-executor.ts +++ b/src/queries/catalog/search-query/search-query-executor.ts @@ -4,13 +4,13 @@ import { runSearch } from '../../../search/flowr-search-executor'; import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id'; import type { FlowrSearch } from '../../../search/flowr-search-builder'; -export function executeSearch({ ast, dataflow, config }: BasicQueryData, queries: readonly SearchQuery[]): SearchQueryResult { +export function executeSearch({ parse, ast, dataflow, config }: BasicQueryData, queries: readonly SearchQuery[]): SearchQueryResult { const start = Date.now(); const results: { ids: NodeId[], search: FlowrSearch }[] = []; for(const query of queries) { const { search } = query; results.push({ - ids: runSearch(search, { normalize: ast, dataflow, config } ) + ids: runSearch(search, { parse, normalize: ast, dataflow, config } ) .getElements().map(({ node }) => node.info.id), search }); diff --git a/src/r-bridge/lang-4.x/ast/parser/json/parser.ts b/src/r-bridge/lang-4.x/ast/parser/json/parser.ts index a3204f1a920..d1d7ef1e5b7 100644 --- a/src/r-bridge/lang-4.x/ast/parser/json/parser.ts +++ b/src/r-bridge/lang-4.x/ast/parser/json/parser.ts @@ -5,11 +5,11 @@ import { decorateAst, deterministicCountingIdGenerator } from '../../model/proce import type { NoInfo, RNode } from '../../model/model'; import { normalizeRootObjToAst } from '../main/internal/structure/normalize-root'; import type { NormalizerData } from '../main/normalizer-data'; -import type { ParseStepOutputTS } from '../../../../../core/steps/all/core/01-parse-tree-sitter'; import { normalizeTreeSitterTreeToAst } from '../../../tree-sitter/tree-sitter-normalize'; import type { ParseStepOutput } from '../../../../parser'; import type { FlowrConfigOptions } from '../../../../../config'; import { getEngineConfig } from '../../../../../config'; +import type { Tree } from 'web-tree-sitter'; export const parseLog = log.getSubLogger({ name: 'ast-parser' }); @@ -44,7 +44,7 @@ export function normalizeButNotDecorated( * Tree-Sitter pendant to {@link normalize}. */ export function normalizeTreeSitter( - { parsed }: ParseStepOutputTS, + { parsed }: ParseStepOutput, getId: IdGenerator = deterministicCountingIdGenerator(0), config: FlowrConfigOptions, file?: string diff --git a/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts b/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts index 81cb76101bd..c030826197f 100644 --- a/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts +++ b/src/r-bridge/lang-4.x/tree-sitter/tree-sitter-executor.ts @@ -1,3 +1,4 @@ +import type { QueryCapture } from 'web-tree-sitter'; import Parser from 'web-tree-sitter'; import type { RParseRequest } from '../../retriever'; @@ -68,6 +69,12 @@ export class TreeSitterExecutor implements SyncParser { return this.parser.parse(sourceCode); } + public query(source: string, tree: Parser.Tree): QueryCapture[] { + const query = this.parser.getLanguage().query(source); + const matches = query.matches(tree.rootNode); + return matches.flatMap(m => m.captures); + } + public close(): void { this.parser.delete(); } diff --git a/src/search/flowr-search.ts b/src/search/flowr-search.ts index b693204caf4..ad9d13861ba 100644 --- a/src/search/flowr-search.ts +++ b/src/search/flowr-search.ts @@ -6,6 +6,7 @@ import type { DataflowInformation } from '../dataflow/info'; import type { FlowrConfigOptions } from '../config'; import type { Enrichment, EnrichmentSearchArguments, EnrichmentData, EnrichmentElementContent, EnrichmentSearchContent, EnrichmentElementArguments } from './search-executor/search-enrichers'; import { Enrichments } from './search-executor/search-enrichers'; +import type { KnownParserType, ParseStepOutput } from '../r-bridge/parser'; /** * Yes, for now we do technically not need a wrapper around the RNode, but this allows us to attach caches etc. @@ -52,9 +53,11 @@ export interface FlowrSearchGetFilter extends Record { } type MinimumInputForFlowrSearch

= - PipelineStepOutputWithName extends NormalizedAst ? ( - PipelineStepOutputWithName extends DataflowInformation ? PipelineOutput

& { normalize: NormalizedAst, dataflow: DataflowInformation, config: FlowrConfigOptions } - : never + PipelineStepOutputWithName extends ParseStepOutput ? ( + PipelineStepOutputWithName extends NormalizedAst ? ( + PipelineStepOutputWithName extends DataflowInformation ? PipelineOutput

& { parse: { parsed: KnownParserType }, normalize: NormalizedAst, dataflow: DataflowInformation, config: FlowrConfigOptions } + : never + ): never ): never /** we allow any pipeline, which provides us with a 'normalize' and 'dataflow' step */ diff --git a/src/search/search-executor/search-generators.ts b/src/search/search-executor/search-generators.ts index c4042e92ac3..138fa3a0d16 100644 --- a/src/search/search-executor/search-generators.ts +++ b/src/search/search-executor/search-generators.ts @@ -36,11 +36,12 @@ export type GetGenerator = FlowrSearchGeneratorNode * All supported generators! */ export const generators = { - all: generateAll, - get: generateGet, - criterion: generateCriterion, - from: generateFrom, - 'from-query': generateFromQuery + all: generateAll, + get: generateGet, + criterion: generateCriterion, + from: generateFrom, + 'from-query': generateFromQuery, + 'from-tree-sitter-query': generateFromTreeSitterQuery } as const; function generateAll(data: FlowrSearchInput): FlowrSearchElements { @@ -91,7 +92,7 @@ function generateFrom(data: FlowrSearchInput, args: { from: FlowrSearc } function generateFromQuery(data: FlowrSearchInput, args: { from: readonly SynchronousQuery[] } ): FlowrSearchElements[]> { - const result = executeQueries({ ast: data.normalize, dataflow: data.dataflow, config: data.config }, args.from); + const result = executeQueries({ parse: data.parse, ast: data.normalize, dataflow: data.dataflow, config: data.config }, args.from); // collect involved nodes const nodesByQuery = new Map>>(); @@ -115,6 +116,15 @@ function generateFromQuery(data: FlowrSearchInput, args: { from: reado })) as unknown as FlowrSearchElements[]>; } +function generateFromTreeSitterQuery(data: FlowrSearchInput, args: { source: string } ): FlowrSearchElements[]> { + if(typeof data.parse.parsed === 'string') { + // TODO maybe a warning or something here? + return new FlowrSearchElements([]); + } + // TODO run query using TreeSitterExecutor and convert nodes to our ids using a map that we will generate in tree sitter normalization + return new FlowrSearchElements([]); +} + function generateCriterion(data: FlowrSearchInput, args: { criterion: SlicingCriteria }): FlowrSearchElements { return new FlowrSearchElements( args.criterion.map(c => ({ node: data.normalize.idMap.get(slicingCriterionToId(c, data.normalize.idMap)) as RNodeWithParent })) diff --git a/test/functionality/_helper/query.ts b/test/functionality/_helper/query.ts index 1d1455dd1db..557cc2260f0 100644 --- a/test/functionality/_helper/query.ts +++ b/test/functionality/_helper/query.ts @@ -72,7 +72,7 @@ export function assertQuery< getId: deterministicCountingIdGenerator(0) }, defaultConfigOptions).allRemainingSteps(); - const result = await Promise.resolve(executeQueries({ dataflow: info.dataflow, ast: info.normalize, config: defaultConfigOptions }, queries)); + const result = await Promise.resolve(executeQueries({ parse: info.parse, dataflow: info.dataflow, ast: info.normalize, config: defaultConfigOptions }, queries)); log.info(`total query time: ${result['.meta'].timing.toFixed(0)}ms (~1ms accuracy)`); diff --git a/test/functionality/_helper/shell.ts b/test/functionality/_helper/shell.ts index c09716716ad..14b1ce1ee9f 100644 --- a/test/functionality/_helper/shell.ts +++ b/test/functionality/_helper/shell.ts @@ -58,7 +58,7 @@ import type { CfgProperty } from '../../../src/control-flow/cfg-properties'; import { assertCfgSatisfiesProperties } from '../../../src/control-flow/cfg-properties'; import type { FlowrConfigOptions } from '../../../src/config'; import { cloneConfig, defaultConfigOptions } from '../../../src/config'; -import type { KnownParser } from '../../../src/r-bridge/parser'; +import type { KnownParser, KnownParserType, ParseStepOutput } from '../../../src/r-bridge/parser'; import { SliceDirection } from '../../../src/core/steps/all/static-slicing/00-slice'; export const testWithShell = (msg: string, fn: (shell: RShell, test: unknown) => void | Promise) => { @@ -364,7 +364,7 @@ export function assertDataflow

( name: string | TestLabel, shell: RShell, input: string | RParseRequests, - expected: DataflowGraph | ((data: PipelineOutput

& { normalize: NormalizedAst, dataflow: DataflowInformation }) => DataflowGraph), + expected: DataflowGraph | ((data: PipelineOutput

& { parse: ParseStepOutput, normalize: NormalizedAst, dataflow: DataflowInformation }) => DataflowGraph), userConfig?: Partial, startIndexForDeterministicIds = 0, config = cloneConfig(defaultConfigOptions)