Skip to content

Commit

Permalink
Register Mermaid Printers for All Steps (#504)
Browse files Browse the repository at this point in the history
* refactor: remove unnecessary async of json text pass for parse

* refactor: further remove redundant `async`/`Promise` wrappers

* feat: mermaid and mermaid url printers for normalize and df steps

* lint-fix: deal with lintr errors (redundant default and imports)
  • Loading branch information
EagleoutIce authored Nov 21, 2023
1 parent 9e11058 commit aefbf04
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 30 deletions.
15 changes: 11 additions & 4 deletions src/core/print/dataflow-printer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import { jsonReplacer } from '../../util/json'
import { DataflowInformation } from '../../dataflow/internal/info'
import { QuadSerializationConfiguration } from '../../util/quads'
import { df2quads } from '../../dataflow/graph/quads'
import { graphToMermaid, graphToMermaidUrl } from '../../util/mermaid'
import { DataflowMap } from '../../dataflow'


function mayObjectJson(d: unknown): string {
Expand Down Expand Up @@ -43,13 +45,18 @@ function objectJson(df: object): string {
}

/** Should work with larger things as well */
// eslint-disable-next-line @typescript-eslint/require-await
export async function dataflowGraphToJson(df: DataflowInformation): Promise<string> {
export function dataflowGraphToJson(df: DataflowInformation): string {
return objectJson(df)
}

export function dataflowGraphToMermaid(df: DataflowInformation, idMap: DataflowMap): string {
return graphToMermaid(df.graph, idMap)
}

export function dataflowGraphToMermaidUrl(df: DataflowInformation, idMap: DataflowMap): string {
return graphToMermaidUrl(df.graph, idMap)
}

// eslint-disable-next-line @typescript-eslint/require-await
export async function dataflowGraphToQuads(df: DataflowInformation, config: QuadSerializationConfiguration): Promise<string> {
export function dataflowGraphToQuads(df: DataflowInformation, config: QuadSerializationConfiguration): string {
return df2quads(df.graph, config)
}
15 changes: 11 additions & 4 deletions src/core/print/normalize-printer.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import { NormalizedAst } from '../../r-bridge'
import { jsonReplacer } from '../../util/json'
import { QuadSerializationConfiguration, serialize2quads } from '../../util/quads'
import { normalizedAstToMermaid, normalizedAstToMermaidUrl } from '../../util/mermaid'

/** Should work with larger things as well */
// eslint-disable-next-line @typescript-eslint/require-await
export async function normalizedAstToJson(ast: NormalizedAst): Promise<string> {
export function normalizedAstToJson(ast: NormalizedAst): string {
// we never serialize the idmap, as it just duplicates the ast, additionally we now miss the full-lexeme to further safe memory
return JSON.stringify({ ...ast.ast, idMap: undefined }, (k, v) => {
if(k === 'fullLexeme') {
Expand All @@ -15,7 +15,14 @@ export async function normalizedAstToJson(ast: NormalizedAst): Promise<string> {
})
}

// eslint-disable-next-line @typescript-eslint/require-await
export async function normalizedAstToQuads(ast: NormalizedAst, config: QuadSerializationConfiguration): Promise<string> {
export function normalizedAstToQuads(ast: NormalizedAst, config: QuadSerializationConfiguration): string {
return serialize2quads(ast.ast, config)
}

export function printNormalizedAstToMermaid(ast: NormalizedAst): string {
return normalizedAstToMermaid(ast.ast)
}

export function printNormalizedAstToMermaidUrl(ast: NormalizedAst): string {
return normalizedAstToMermaidUrl(ast.ast)
}
2 changes: 1 addition & 1 deletion src/core/print/print.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,4 @@ export function internalPrinter<Input>(input: Input): Input {
*/
export type IStepPrinter<StepInput extends StepFunction, Format extends StepOutputFormat, AdditionalInput extends unknown[]> =
Format extends StepOutputFormat.Internal ? (input: Awaited<ReturnType<StepInput>>) => Awaited<ReturnType<StepInput>> :
(input: Awaited<ReturnType<StepInput>>, ...additional: AdditionalInput) => Promise<string>
(input: Awaited<ReturnType<StepInput>>, ...additional: AdditionalInput) => Promise<string> | string
33 changes: 23 additions & 10 deletions src/core/steps.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,19 @@ import {
import { produceDataFlowGraph } from '../dataflow'
import { reconstructToCode, staticSlicing } from '../slicing'
import { internalPrinter, IStepPrinter, StepOutputFormat } from './print/print'
import { normalizedAstToJson, normalizedAstToQuads } from './print/normalize-printer'
import {
normalizedAstToJson,
normalizedAstToQuads,
printNormalizedAstToMermaid,
printNormalizedAstToMermaidUrl
} from './print/normalize-printer'
import { guard } from '../util/assert'
import { dataflowGraphToJson, dataflowGraphToQuads } from './print/dataflow-printer'
import {
dataflowGraphToJson,
dataflowGraphToMermaid,
dataflowGraphToMermaidUrl,
dataflowGraphToQuads
} from './print/dataflow-printer'
import { parseToQuads } from './print/parse-printer'

/**
Expand Down Expand Up @@ -65,8 +75,7 @@ export const STEPS_PER_FILE = {
required: 'once-per-file',
printer: {
[StepOutputFormat.Internal]: internalPrinter,
// eslint-disable-next-line @typescript-eslint/require-await -- async printer wrapper, string is already json
[StepOutputFormat.Json]: async text => text,
[StepOutputFormat.Json]: text => text,
[StepOutputFormat.RdfQuads]: parseToQuads
}
} satisfies IStep<typeof retrieveXmlFromRCode>,
Expand All @@ -75,19 +84,23 @@ export const STEPS_PER_FILE = {
processor: normalize,
required: 'once-per-file',
printer: {
[StepOutputFormat.Internal]: internalPrinter,
[StepOutputFormat.Json]: normalizedAstToJson,
[StepOutputFormat.RdfQuads]: normalizedAstToQuads
[StepOutputFormat.Internal]: internalPrinter,
[StepOutputFormat.Json]: normalizedAstToJson,
[StepOutputFormat.RdfQuads]: normalizedAstToQuads,
[StepOutputFormat.Mermaid]: printNormalizedAstToMermaid,
[StepOutputFormat.MermaidUrl]: printNormalizedAstToMermaidUrl
}
} satisfies IStep<typeof normalize>,
'dataflow': {
description: 'Construct the dataflow graph',
processor: produceDataFlowGraph,
required: 'once-per-file',
printer: {
[StepOutputFormat.Internal]: internalPrinter,
[StepOutputFormat.Json]: dataflowGraphToJson,
[StepOutputFormat.RdfQuads]: dataflowGraphToQuads
[StepOutputFormat.Internal]: internalPrinter,
[StepOutputFormat.Json]: dataflowGraphToJson,
[StepOutputFormat.RdfQuads]: dataflowGraphToQuads,
[StepOutputFormat.Mermaid]: dataflowGraphToMermaid,
[StepOutputFormat.MermaidUrl]: dataflowGraphToMermaidUrl
}
} satisfies IStep<typeof produceDataFlowGraph>
} as const
Expand Down
4 changes: 2 additions & 2 deletions src/dataflow/graph/graph.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { guard } from '../../util/assert'
import { NodeId, RNodeWithParent } from '../../r-bridge'
import { NodeId, NoInfo, RNodeWithParent } from '../../r-bridge'
import {
cloneEnvironments,
IdentifierDefinition,
Expand All @@ -22,7 +22,7 @@ import { setEquals } from '../../util/set'
import { dataflowLogger } from '../index'

/** Used to get an entry point for every id, after that it allows reference-chasing of the graph */
export type DataflowMap<OtherInfo> = BiMap<NodeId, RNodeWithParent<OtherInfo>>
export type DataflowMap<OtherInfo=NoInfo> = BiMap<NodeId, RNodeWithParent<OtherInfo>>



Expand Down
18 changes: 9 additions & 9 deletions src/util/mermaid/dfg.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { NodeId, NoInfo } from '../../r-bridge'
import { NodeId } from '../../r-bridge'
import { SourceRange } from '../range'
import {
BuiltIn,
Expand Down Expand Up @@ -42,7 +42,7 @@ function scopeToMermaid(scope: DataflowScopeName, when: DataflowGraphEdgeAttribu
return `, *${scope.replace('<', '#lt;')}${whenText}*`
}

function createArtificialExitPoints(exitPoints: NodeId[], mermaid: MermaidGraph, dataflowIdMap: DataflowMap<NoInfo>, idPrefix: string) {
function createArtificialExitPoints(exitPoints: NodeId[], mermaid: MermaidGraph, dataflowIdMap: DataflowMap, idPrefix: string) {
for(const exitPoint of exitPoints) {
if(!mermaid.rootGraph.hasNode(exitPoint, true)) {
const node = dataflowIdMap.get(exitPoint)
Expand All @@ -53,7 +53,7 @@ function createArtificialExitPoints(exitPoints: NodeId[], mermaid: MermaidGraph,
}
}

function subflowToMermaid(nodeId: NodeId, exitPoints: NodeId[], subflow: DataflowFunctionFlowInformation | undefined, dataflowIdMap: DataflowMap<NoInfo> | undefined, mermaid: MermaidGraph, idPrefix = ''): void {
function subflowToMermaid(nodeId: NodeId, exitPoints: NodeId[], subflow: DataflowFunctionFlowInformation | undefined, dataflowIdMap: DataflowMap | undefined, mermaid: MermaidGraph, idPrefix = ''): void {
if(subflow === undefined) {
return
}
Expand Down Expand Up @@ -123,7 +123,7 @@ function mermaidNodeBrackets(def: boolean, fCall: boolean) {
return { open, close }
}

function nodeToMermaid(graph: DataflowGraph, info: DataflowGraphVertexInfo, mermaid: MermaidGraph, id: NodeId, idPrefix: string, dataflowIdMap: DataflowMap<NoInfo> | undefined, mark: Set<NodeId> | undefined): void {
function nodeToMermaid(graph: DataflowGraph, info: DataflowGraphVertexInfo, mermaid: MermaidGraph, id: NodeId, idPrefix: string, dataflowIdMap: DataflowMap | undefined, mark: Set<NodeId> | undefined): void {
const def = info.tag === 'variable-definition' || info.tag === 'function-definition'
const fCall = info.tag === 'function-call'
const defText = def ? scopeToMermaid(info.scope, info.when) : ''
Expand Down Expand Up @@ -159,7 +159,7 @@ function nodeToMermaid(graph: DataflowGraph, info: DataflowGraphVertexInfo, merm


// make the passing of root ids more performant again
function graphToMermaidGraph(rootIds: ReadonlySet<NodeId>, graph: DataflowGraph, dataflowIdMap: DataflowMap<NoInfo> | undefined, prefix: string | null = 'flowchart TD', idPrefix = '', includeEnvironments = true, mark?: Set<NodeId>, rootGraph?: DataflowGraph): MermaidGraph {
function graphToMermaidGraph(rootIds: ReadonlySet<NodeId>, graph: DataflowGraph, dataflowIdMap: DataflowMap | undefined, prefix: string | null = 'flowchart TD', idPrefix = '', includeEnvironments = true, mark?: Set<NodeId>, rootGraph?: DataflowGraph): MermaidGraph {
const mermaid: MermaidGraph = { nodeLines: prefix === null ? [] : [prefix], edgeLines: [], presentEdges: new Set<string>(), hasBuiltIn: false, mark, rootGraph: rootGraph ?? graph, includeEnvironments }

for(const [id, info] of graph.vertices(true)) {
Expand All @@ -173,7 +173,7 @@ function graphToMermaidGraph(rootIds: ReadonlySet<NodeId>, graph: DataflowGraph,
return mermaid
}

export function graphToMermaid(graph: DataflowGraph, dataflowIdMap: DataflowMap<NoInfo> | undefined, prefix: string | null = 'flowchart TD', idPrefix = '', includeEnvironments?: boolean, mark?: Set<NodeId>, rootGraph?: DataflowGraph): string {
export function graphToMermaid(graph: DataflowGraph, dataflowIdMap: DataflowMap | undefined, prefix: string | null = 'flowchart TD', idPrefix = '', includeEnvironments?: boolean, mark?: Set<NodeId>, rootGraph?: DataflowGraph): string {
const mermaid = graphToMermaidGraph(graph.rootIds(), graph, dataflowIdMap, prefix, idPrefix, includeEnvironments, mark, rootGraph)
return `${mermaid.nodeLines.join('\n')}\n${mermaid.edgeLines.join('\n')}`
}
Expand All @@ -186,7 +186,7 @@ export function graphToMermaid(graph: DataflowGraph, dataflowIdMap: DataflowMap<
* @param includeEnvironments - Whether to include the environments in the mermaid graph code
* @param mark - Special nodes to mark (e.g. those included in the slice)
*/
export function graphToMermaidUrl(graph: DataflowGraph, dataflowIdMap: DataflowMap<NoInfo>, includeEnvironments?: boolean, mark?: Set<NodeId>): string {
export function graphToMermaidUrl(graph: DataflowGraph, dataflowIdMap: DataflowMap, includeEnvironments?: boolean, mark?: Set<NodeId>): string {
return mermaidCodeToUrl(graphToMermaid(graph, dataflowIdMap, undefined, undefined, includeEnvironments, mark))
}

Expand All @@ -196,14 +196,14 @@ export interface LabeledDiffGraph {
}

/** uses same id map but ensures, it is different from the rhs so that mermaid can work with that */
export function diffGraphsToMermaid(left: LabeledDiffGraph, right: LabeledDiffGraph, dataflowIdMap: DataflowMap<NoInfo> | undefined, prefix: string): string {
export function diffGraphsToMermaid(left: LabeledDiffGraph, right: LabeledDiffGraph, dataflowIdMap: DataflowMap | undefined, prefix: string): string {
// we add the prefix ourselves
const leftGraph = graphToMermaid(left.graph, dataflowIdMap, '', `l-${left.label}`)
const rightGraph = graphToMermaid(right.graph, dataflowIdMap, '', `r-${right.label}`)

return `${prefix}flowchart TD\nsubgraph "${left.label}"\n${leftGraph}\nend\nsubgraph "${right.label}"\n${rightGraph}\nend`
}

export function diffGraphsToMermaidUrl(left: LabeledDiffGraph, right: LabeledDiffGraph, dataflowIdMap: DataflowMap<NoInfo> | undefined, prefix: string): string {
export function diffGraphsToMermaidUrl(left: LabeledDiffGraph, right: LabeledDiffGraph, dataflowIdMap: DataflowMap | undefined, prefix: string): string {
return mermaidCodeToUrl(diffGraphsToMermaid(left, right, dataflowIdMap, prefix))
}

2 comments on commit aefbf04

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"artificial" Benchmark Suite

Benchmark suite Current: aefbf04 Previous: 243959c Ratio
Total per-file 3305.2998632272725 ms (3865.72077184573) 3190.570812409091 ms (3747.790963743062) 1.04
Retrieve AST from R code 70.73382040909091 ms (141.5844189456703) 68.74045054545455 ms (132.55959690808635) 1.03
Normalize R AST 94.73230945454546 ms (153.50314031379116) 96.19992009090909 ms (156.15063334620123) 0.98
Produce dataflow information 66.10015845454545 ms (170.5480288102214) 66.09041786363636 ms (169.92667998599904) 1.00
Total per-slice 1.8689008839784487 ms (1.278701775804726) 1.8411702253695927 ms (1.267065280512554) 1.02
Static slicing 1.378943090604754 ms (1.1697245386098545) 1.3684034386686694 ms (1.1799623517395228) 1.01
Reconstruct code 0.4722163287149884 ms (0.2809087585286514) 0.4553098527401606 ms (0.2299566392692304) 1.04
failed to reconstruct/re-parse 0 # 0 # NaN
times hit threshold 0 # 0 # NaN
reduction (characters) 0.7329390759026896 # 0.7329390759026896 # 1
reduction (normalized tokens) 0.720988345209971 # 0.720988345209971 # 1

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"social-science" Benchmark Suite

Benchmark suite Current: aefbf04 Previous: 243959c Ratio
Total per-file 5263.0471051 ms (5825.172116087725) 5419.399718899999 ms (6111.491408384184) 0.97
Retrieve AST from R code 76.77899434 ms (68.53852268684321) 85.22318365999999 ms (79.41151212889127) 0.90
Normalize R AST 111.15333478 ms (68.12276474636369) 113.45254984 ms (69.72126242848063) 0.98
Produce dataflow information 162.67770334 ms (277.1038777598488) 164.38322226 ms (279.00889908327076) 0.99
Total per-slice 8.435838410588591 ms (13.978837728748294) 8.82827828085479 ms (14.575511768060265) 0.96
Static slicing 7.949540791119793 ms (13.857018372899377) 8.228727295296194 ms (14.45423896853327) 0.97
Reconstruct code 0.47802791197901223 ms (0.23861320655029147) 0.5898725311439794 ms (0.30780610835907124) 0.81
failed to reconstruct/re-parse 9 # 9 # 1
times hit threshold 967 # 967 # 1
reduction (characters) 0.898713819973478 # 0.8987761232201357 # 1.00
reduction (normalized tokens) 0.8579790415512589 # 0.8582032343145828 # 1.00

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.