Basic Structure for the Abstract Interpretation (#397)

flowr-analysis · Jan 26, 2024 · c8f1933 · c8f1933 · github-actions · Jan 26, 2024
2 parents f059b24 + dcf9eae
commit c8f1933
Show file tree

Hide file tree

Showing 21 changed files with 706 additions and 91 deletions.
diff --git a/src/abstract-interpretation/domain.ts b/src/abstract-interpretation/domain.ts
@@ -0,0 +1,169 @@
+import {assertUnreachable, guard} from '../util/assert'
+
+interface IntervalBound {
+	readonly value:     number,
+	readonly inclusive: boolean
+}
+
+export class Interval {
+	constructor(readonly min: IntervalBound, readonly max: IntervalBound) {
+		guard(min.value <= max.value, () => `The interval ${this.toString()} has a minimum that is greater than its maximum`)
+		guard(min.value !== max.value || (min.inclusive === max.inclusive), `The bound ${min.value} cannot be in- and exclusive at the same time`)
+	}
+
+	toString(): string {
+		return `${this.min.inclusive ? '[' : '('}${this.min.value}, ${this.max.value}${this.max.inclusive ? ']' : ')'}`
+	}
+}
+
+export class Domain {
+	private readonly _intervals: Set<Interval>
+
+	private constructor(intervals: Interval[] = []) {
+		this._intervals = new Set(unifyOverlappingIntervals(intervals))
+	}
+
+	static bottom(): Domain {
+		return new Domain()
+	}
+
+	static fromIntervals(intervals: Interval[] | Set<Interval>): Domain {
+		return new Domain(Array.from(intervals))
+	}
+
+	static fromScalar(n: number): Domain {
+		return new Domain([new Interval(
+			{value: n, inclusive: true},
+			{value: n, inclusive: true}
+		)])
+	}
+
+	get intervals(): Set<Interval> {
+		return this._intervals
+	}
+
+	private set intervals(intervals: Interval[]) {
+		this._intervals.clear()
+		for(const interval of intervals) {
+			this._intervals.add(interval)
+		}
+	}
+
+	addInterval(interval: Interval): void {
+		this.intervals = unifyOverlappingIntervals([...this.intervals, interval])
+	}
+
+	toString(): string {
+		return `{${Array.from(this.intervals).join(', ')}}`
+	}
+}
+
+const enum CompareType {
+	/** If qual, the bound that's inclusive is the smaller one */
+	Min,
+	/** If equal, the bound that's inclusive is the greater one */
+	Max,
+	/** Equality is only based on the "raw" values */
+	IgnoreInclusivity
+}
+
+function compareIntervals(compareType: CompareType, interval1: IntervalBound, interval2: IntervalBound): number {
+	const diff = interval1.value - interval2.value
+	if(diff !== 0 || compareType === CompareType.IgnoreInclusivity) {
+		return diff
+	}
+	switch(compareType) {
+		case CompareType.Min:
+			return Number(!interval1.inclusive) - Number(!interval2.inclusive)
+		case CompareType.Max:
+			return Number(interval1.inclusive) - Number(interval2.inclusive)
+		default:
+			assertUnreachable(compareType)
+	}
+}
+
+function compareIntervalsByTheirMinimum(interval1: Interval, interval2: Interval): number {
+	return compareIntervals(CompareType.Min, interval1.min, interval2.min)
+}
+
+function compareIntervalsByTheirMaximum(interval1: Interval, interval2: Interval): number {
+	return compareIntervals(CompareType.Max, interval1.max, interval2.max)
+}
+
+export function doIntervalsOverlap(interval1: Interval, interval2: Interval): boolean {
+	const diff1 = compareIntervals(CompareType.IgnoreInclusivity, interval1.max, interval2.min)
+	const diff2 = compareIntervals(CompareType.IgnoreInclusivity, interval2.max, interval1.min)
+
+	// If one interval ends before the other starts, they don't overlap
+	if(diff1 < 0 || diff2 < 0) {
+		return false
+	}
+	// If their end and start are equal, they only overlap if both are inclusive
+	if(diff1 === 0) {
+		return interval1.max.inclusive && interval2.min.inclusive
+	}
+	if(diff2 === 0) {
+		return interval2.max.inclusive && interval1.min.inclusive
+	}
+
+	return true
+}
+
+export function unifyDomains(domains: Domain[]): Domain {
+	const unifiedIntervals = unifyOverlappingIntervals(domains.flatMap(domain => Array.from(domain.intervals)))
+	return Domain.fromIntervals(unifiedIntervals)
+}
+
+export function unifyOverlappingIntervals(intervals: Interval[]): Interval[] {
+	if(intervals.length === 0) {
+		return []
+	}
+	const sortedIntervals = intervals.sort(compareIntervalsByTheirMinimum)
+
+	const unifiedIntervals: Interval[] = []
+	let currentInterval = sortedIntervals[0]
+	for(const nextInterval of sortedIntervals) {
+		if(doIntervalsOverlap(currentInterval, nextInterval)) {
+			const intervalWithEarlierStart = compareIntervalsByTheirMinimum(currentInterval, nextInterval) < 0 ? currentInterval : nextInterval
+			const intervalWithLaterEnd = compareIntervalsByTheirMaximum(currentInterval, nextInterval) > 0 ? currentInterval : nextInterval
+			currentInterval = new Interval(intervalWithEarlierStart.min, intervalWithLaterEnd.max)
+		} else {
+			unifiedIntervals.push(currentInterval)
+			currentInterval = nextInterval
+		}
+	}
+	unifiedIntervals.push(currentInterval)
+	return unifiedIntervals
+}
+
+export function addDomains(domain1: Domain, domain2: Domain): Domain {
+	const intervals = new Set<Interval>()
+	for(const interval1 of domain1.intervals) {
+		for(const interval2 of domain2.intervals) {
+			intervals.add(new Interval({
+				value:     interval1.min.value + interval2.min.value,
+				inclusive: interval1.min.inclusive && interval2.min.inclusive
+			}, {
+				value:     interval1.max.value + interval2.max.value,
+				inclusive: interval1.max.inclusive && interval2.max.inclusive
+			}))
+		}
+	}
+	return Domain.fromIntervals(intervals)
+}
+
+export function subtractDomains(domain1: Domain, domain2: Domain): Domain {
+	const intervals = new Set<Interval>()
+	for(const interval1 of domain1.intervals) {
+		for(const interval2 of domain2.intervals) {
+			intervals.add(new Interval({
+				value:     interval1.min.value - interval2.max.value,
+				inclusive: interval1.min.inclusive && interval2.max.inclusive
+			}, {
+				value:     interval1.max.value - interval2.min.value,
+				inclusive: interval1.max.inclusive && interval2.min.inclusive
+			}))
+		}
+	}
+	return Domain.fromIntervals(intervals)
+}
diff --git a/src/abstract-interpretation/handler/binop/binop.ts b/src/abstract-interpretation/handler/binop/binop.ts
@@ -0,0 +1,42 @@
+import {Handler} from '../handler'
+import {aiLogger, AINode} from '../../processor'
+import {BinaryOperatorFlavor, ParentInformation, RBinaryOp} from '../../../r-bridge'
+import {guard} from '../../../util/assert'
+import {operators} from './operators'
+
+export type BinOpOperators = {
+	[key in BinaryOperatorFlavor]: (lhs: AINode, rhs: AINode, node: RBinaryOp<ParentInformation>) => AINode
+}
+
+export class BinOp implements Handler<AINode> {
+	lhs: AINode | undefined
+	rhs: AINode | undefined
+
+	constructor(readonly node: RBinaryOp<ParentInformation>) {}
+
+	getName(): string {
+		return `Bin Op (${this.node.flavor})`
+	}
+
+	enter(): void {
+		aiLogger.trace(`Entered ${this.getName()}`)
+	}
+
+	exit(): AINode {
+		aiLogger.trace(`Exited ${this.getName()}`)
+		guard(this.lhs !== undefined, `No LHS found for assignment ${this.node.info.id}`)
+		guard(this.rhs !== undefined, `No RHS found for assignment ${this.node.info.id}`)
+		return operators[this.node.flavor](this.lhs, this.rhs, this.node)
+	}
+
+	next(node: AINode): void {
+		aiLogger.trace(`${this.getName()} received`)
+		if(this.lhs === undefined) {
+			this.lhs = node
+		} else if(this.rhs === undefined) {
+			this.rhs = node
+		} else {
+			guard(false, `BinOp ${this.node.info.id} already has both LHS and RHS`)
+		}
+	}
+}
diff --git a/src/abstract-interpretation/handler/binop/operators.ts b/src/abstract-interpretation/handler/binop/operators.ts
@@ -0,0 +1,40 @@
+import {guard} from '../../../util/assert'
+import {BinOpOperators} from './binop'
+import {addDomains, subtractDomains} from '../../domain'
+
+export const operators: BinOpOperators = {
+	'assignment': (lhs, rhs, node) => {
+		return {
+			id:      lhs.id,
+			domain:  rhs.domain,
+			astNode: node.lhs,
+		}
+	},
+	'arithmetic': (lhs, rhs, node) => {
+		switch(node.operator) {
+			case '+':
+				return {
+					id:      lhs.id,
+					domain:  addDomains(lhs.domain, rhs.domain),
+					astNode: node,
+				}
+			case '-':
+				return {
+					id:      lhs.id,
+					domain:  subtractDomains(lhs.domain, rhs.domain),
+					astNode: node,
+				}
+			default:
+				guard(false, `Unknown binary operator ${node.operator}`)
+		}
+	},
+	'logical': () => {
+		guard(false, 'Not implemented yet')
+	},
+	'model formula': () => {
+		guard(false, 'Not implemented yet')
+	},
+	'comparison': () => {
+		guard(false, 'Not implemented yet')
+	},
+}
diff --git a/src/abstract-interpretation/handler/handler.ts b/src/abstract-interpretation/handler/handler.ts
@@ -0,0 +1,6 @@
+export interface Handler<ValueType> {
+	getName: () => string,
+	enter:   () => void
+	exit:    () => ValueType
+	next:    (value: ValueType) => void
+}
diff --git a/src/abstract-interpretation/processor.ts b/src/abstract-interpretation/processor.ts
@@ -0,0 +1,83 @@
+import {DataflowInformation} from '../dataflow/internal/info'
+import {NodeId, NormalizedAst, ParentInformation, RNodeWithParent, RType} from '../r-bridge'
+import {CfgVertexType, extractCFG} from '../util/cfg/cfg'
+import {visitCfg} from '../util/cfg/visitor'
+import {guard} from '../util/assert'
+import {DataflowGraphVertexInfo, EdgeType, OutgoingEdges} from '../dataflow'
+import {Handler} from './handler/handler'
+import {BinOp} from './handler/binop/binop'
+import {Domain, unifyDomains} from './domain'
+import {log} from '../util/log'
+
+export const aiLogger = log.getSubLogger({name: 'abstract-interpretation'})
+
+export interface AINode {
+	readonly id:      NodeId
+	readonly domain:  Domain
+	readonly astNode: RNodeWithParent<ParentInformation>
+}
+
+class Stack<ElementType> {
+	private backingStore: ElementType[] = []
+
+	size(): number { return this.backingStore.length }
+	peek(): ElementType | undefined { return this.backingStore[this.size() - 1] }
+	pop(): ElementType | undefined { return this.backingStore.pop() }
+	push(item: ElementType): ElementType {
+		this.backingStore.push(item)
+		return item
+	}
+}
+
+function getDomainOfDfgChild(node: NodeId, dfg: DataflowInformation, nodeMap: Map<NodeId, AINode>): Domain {
+	const dfgNode: [DataflowGraphVertexInfo, OutgoingEdges] | undefined = dfg.graph.get(node)
+	guard(dfgNode !== undefined, `No DFG-Node found with ID ${node}`)
+	const [_, children] = dfgNode
+	const ids = Array.from(children.entries())
+		.filter(([_, edge]) => edge.types.has(EdgeType.Reads))
+		.map(([id, _]) => id)
+	const domains: Domain[] = []
+	for(const id of ids) {
+		const domain = nodeMap.get(id)?.domain
+		guard(domain !== undefined, `No domain found for ID ${id}`)
+		domains.push(domain)
+	}
+	return unifyDomains(domains)
+}
+
+export function runAbstractInterpretation(ast: NormalizedAst, dfg: DataflowInformation): DataflowInformation {
+	const cfg = extractCFG(ast)
+	const operationStack = new Stack<Handler<AINode>>()
+	const nodeMap = new Map<NodeId, AINode>()
+	visitCfg(cfg, (node, _) => {
+		const astNode = ast.idMap.get(node.id)
+		if(astNode?.type === RType.BinaryOp) {
+			operationStack.push(new BinOp(astNode)).enter()
+		} else if(astNode?.type === RType.Symbol) {
+			operationStack.peek()?.next({
+				id:      astNode.info.id,
+				domain:  getDomainOfDfgChild(node.id, dfg, nodeMap),
+				astNode: astNode,
+			})
+		} else if(astNode?.type === RType.Number){
+			const num = astNode.content.num
+			operationStack.peek()?.next({
+				id:      astNode.info.id,
+				domain:  Domain.fromScalar(num),
+				astNode: astNode,
+			})
+		} else if(node.type === CfgVertexType.EndMarker) {
+			const operation = operationStack.pop()
+			if(operation === undefined) {
+				return
+			}
+			const operationResult = operation.exit()
+			guard(!nodeMap.has(operationResult.id), `Domain for ID ${operationResult.id} already exists`)
+			nodeMap.set(operationResult.id, operationResult)
+			operationStack.peek()?.next(operationResult)
+		} else {
+			aiLogger.warn(`Unknown node type ${node.type}`)
+		}
+	})
+	return dfg
+}
diff --git a/src/benchmark/slicer.ts b/src/benchmark/slicer.ts
@@ -82,6 +82,7 @@ export class BenchmarkSlicer {
 	private loadedXml:      string | undefined
 	private tokenMap:       Record<string, string> | undefined
 	private dataflow:       DataflowInformation | undefined
+	private ai:             DataflowInformation | undefined
 	private normalizedAst:  NormalizedAst | undefined
 	private totalStopwatch: IStoppableStopwatch
 	private finished = false
@@ -133,6 +134,7 @@ export class BenchmarkSlicer {
 		this.loadedXml = await this.measureCommonStep('parse', 'retrieve AST from R code')
 		this.normalizedAst = await this.measureCommonStep('normalize', 'normalize R AST')
 		this.dataflow = await this.measureCommonStep('dataflow', 'produce dataflow information')
+		this.ai = await this.measureCommonStep('ai', 'run abstract interpretation')
 
 		this.stepper.switchToSliceStage()
 

diff --git a/src/benchmark/stats/stats.ts b/src/benchmark/stats/stats.ts
@@ -2,7 +2,7 @@ import { SingleSlicingCriterion, SlicingCriteria } from '../../slicing'
 import { NodeId, RParseRequestFromFile, RParseRequestFromText } from '../../r-bridge'
 import { ReconstructionResult } from '../../slicing'
 
-export const CommonSlicerMeasurements = ['initialize R session', 'inject home path', 'ensure installation of xmlparsedata', 'retrieve token map', 'retrieve AST from R code', 'normalize R AST', 'produce dataflow information', 'close R session', 'total'] as const
+export const CommonSlicerMeasurements = ['initialize R session', 'inject home path', 'ensure installation of xmlparsedata', 'retrieve token map', 'retrieve AST from R code', 'normalize R AST', 'produce dataflow information', 'run abstract interpretation', 'close R session', 'total'] as const
 export type CommonSlicerMeasurements = typeof CommonSlicerMeasurements[number]
 
 export const PerSliceMeasurements = ['static slicing', 'reconstruct code', 'total'] as const

diff --git a/src/cli/repl/commands/cfg.ts b/src/cli/repl/commands/cfg.ts
@@ -1,7 +1,7 @@
 import { ReplCommand } from './main'
 import { SteppingSlicer } from '../../../core'
 import { requestFromInput, RShell } from '../../../r-bridge'
-import { extractCFG } from '../../../util/cfg'
+import { extractCFG } from '../../../util/cfg/cfg'
 import { cfgToMermaid, cfgToMermaidUrl } from '../../../util/mermaid'
 
 async function controlflow(shell: RShell, remainingLine: string) {
Benchmark suite	Current: `c8f1933`	Previous: `816c844`	Ratio
`Total per-file`	`1520.632932590909` ms (`3718.0788752077087`)	`2967.1418025454545` ms (`3618.949366683997`)	`0.51`
`Retrieve AST from R code`	`65.65571895454545` ms (`127.99873381107686`)	`69.06575931818182` ms (`145.0290465282857`)	`0.95`
`Normalize R AST`	`96.81986868181819` ms (`156.3036505430814`)	`95.2519069090909` ms (`153.1479590068876`)	`1.02`
`Produce dataflow information`	`66.28680963636363` ms (`169.83045168427645`)	`64.6035405` ms (`167.23909055295272`)	`1.03`
`Run abstract interpretation`	`0.035418181818181814` ms (`0.009757366381058718`)
`Total per-slice`	`1.8768233145591395` ms (`1.2464725091547884`)	`1.8720710902829922` ms (`1.3532400635998951`)	`1.00`
`Static slicing`	`1.4073809977077163` ms (`1.1578506956320742`)	`1.4122959211773074` ms (`1.281855192577784`)	`1.00`
`Reconstruct code`	`0.45631723974807037` ms (`0.22792186179502585`)	`0.442741380921788` ms (`0.19367073211054742`)	`1.03`
`failed to reconstruct/re-parse`	`0` #	`0` #	`NaN`
`times hit threshold`	`0` #	`0` #	`NaN`
`reduction (characters)`	`0.7329390759026896` #	`0.7329390759026896` #	`1`
`reduction (normalized tokens)`	`0.720988345209971` #	`0.720988345209971` #	`1`
Benchmark suite	Current: `c8f1933`	Previous: `816c844`	Ratio
`Total per-file`	`3732.80898666` ms (`6186.511816833479`)	`5046.89271988` ms (`6042.128928598865`)	`0.74`
`Retrieve AST from R code`	`75.62208516` ms (`64.67354092309284`)	`73.85860644` ms (`69.28452321715349`)	`1.02`
`Normalize R AST`	`114.76827184` ms (`71.85682254089758`)	`114.01183648` ms (`69.33746165288669`)	`1.01`
`Produce dataflow information`	`169.32506618` ms (`290.9706107144802`)	`163.82888334` ms (`279.6344141597903`)	`1.03`
`Run abstract interpretation`	`0.07139494` ms (`0.032116960591195426`)
`Total per-slice`	`8.94063926542521` ms (`14.743572336810447`)	`8.618077094909866` ms (`14.411839324447698`)	`1.04`
`Static slicing`	`8.296802683364657` ms (`14.626376535409552`)	`8.092352677712956` ms (`14.275084427627904`)	`1.03`
`Reconstruct code`	`0.6338942891717891` ms (`0.3109928307683894`)	`0.5170374517553479` ms (`0.28554733887508554`)	`1.23`
`failed to reconstruct/re-parse`	`9` #	`9` #	`1`
`times hit threshold`	`967` #	`967` #	`1`
`reduction (characters)`	`0.898713819973478` #	`0.898713819973478` #	`1`
`reduction (normalized tokens)`	`0.8579790415512589` #	`0.8579790415512589` #	`1`