Skip to content

Commit

Permalink
Basic Structure for the Abstract Interpretation (#397)
Browse files Browse the repository at this point in the history
  • Loading branch information
EagleoutIce authored Jan 26, 2024
2 parents f059b24 + dcf9eae commit c8f1933
Show file tree
Hide file tree
Showing 21 changed files with 706 additions and 91 deletions.
169 changes: 169 additions & 0 deletions src/abstract-interpretation/domain.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import {assertUnreachable, guard} from '../util/assert'

interface IntervalBound {
readonly value: number,
readonly inclusive: boolean
}

export class Interval {
constructor(readonly min: IntervalBound, readonly max: IntervalBound) {
guard(min.value <= max.value, () => `The interval ${this.toString()} has a minimum that is greater than its maximum`)
guard(min.value !== max.value || (min.inclusive === max.inclusive), `The bound ${min.value} cannot be in- and exclusive at the same time`)
}

toString(): string {
return `${this.min.inclusive ? '[' : '('}${this.min.value}, ${this.max.value}${this.max.inclusive ? ']' : ')'}`
}
}

export class Domain {
private readonly _intervals: Set<Interval>

private constructor(intervals: Interval[] = []) {
this._intervals = new Set(unifyOverlappingIntervals(intervals))
}

static bottom(): Domain {
return new Domain()
}

static fromIntervals(intervals: Interval[] | Set<Interval>): Domain {
return new Domain(Array.from(intervals))
}

static fromScalar(n: number): Domain {
return new Domain([new Interval(
{value: n, inclusive: true},
{value: n, inclusive: true}
)])
}

get intervals(): Set<Interval> {
return this._intervals
}

private set intervals(intervals: Interval[]) {
this._intervals.clear()
for(const interval of intervals) {
this._intervals.add(interval)
}
}

addInterval(interval: Interval): void {
this.intervals = unifyOverlappingIntervals([...this.intervals, interval])
}

toString(): string {
return `{${Array.from(this.intervals).join(', ')}}`
}
}

const enum CompareType {
/** If qual, the bound that's inclusive is the smaller one */
Min,
/** If equal, the bound that's inclusive is the greater one */
Max,
/** Equality is only based on the "raw" values */
IgnoreInclusivity
}

function compareIntervals(compareType: CompareType, interval1: IntervalBound, interval2: IntervalBound): number {
const diff = interval1.value - interval2.value
if(diff !== 0 || compareType === CompareType.IgnoreInclusivity) {
return diff
}
switch(compareType) {
case CompareType.Min:
return Number(!interval1.inclusive) - Number(!interval2.inclusive)
case CompareType.Max:
return Number(interval1.inclusive) - Number(interval2.inclusive)
default:
assertUnreachable(compareType)
}
}

function compareIntervalsByTheirMinimum(interval1: Interval, interval2: Interval): number {
return compareIntervals(CompareType.Min, interval1.min, interval2.min)
}

function compareIntervalsByTheirMaximum(interval1: Interval, interval2: Interval): number {
return compareIntervals(CompareType.Max, interval1.max, interval2.max)
}

export function doIntervalsOverlap(interval1: Interval, interval2: Interval): boolean {
const diff1 = compareIntervals(CompareType.IgnoreInclusivity, interval1.max, interval2.min)
const diff2 = compareIntervals(CompareType.IgnoreInclusivity, interval2.max, interval1.min)

// If one interval ends before the other starts, they don't overlap
if(diff1 < 0 || diff2 < 0) {
return false
}
// If their end and start are equal, they only overlap if both are inclusive
if(diff1 === 0) {
return interval1.max.inclusive && interval2.min.inclusive
}
if(diff2 === 0) {
return interval2.max.inclusive && interval1.min.inclusive
}

return true
}

export function unifyDomains(domains: Domain[]): Domain {
const unifiedIntervals = unifyOverlappingIntervals(domains.flatMap(domain => Array.from(domain.intervals)))
return Domain.fromIntervals(unifiedIntervals)
}

export function unifyOverlappingIntervals(intervals: Interval[]): Interval[] {
if(intervals.length === 0) {
return []
}
const sortedIntervals = intervals.sort(compareIntervalsByTheirMinimum)

const unifiedIntervals: Interval[] = []
let currentInterval = sortedIntervals[0]
for(const nextInterval of sortedIntervals) {
if(doIntervalsOverlap(currentInterval, nextInterval)) {
const intervalWithEarlierStart = compareIntervalsByTheirMinimum(currentInterval, nextInterval) < 0 ? currentInterval : nextInterval
const intervalWithLaterEnd = compareIntervalsByTheirMaximum(currentInterval, nextInterval) > 0 ? currentInterval : nextInterval
currentInterval = new Interval(intervalWithEarlierStart.min, intervalWithLaterEnd.max)
} else {
unifiedIntervals.push(currentInterval)
currentInterval = nextInterval
}
}
unifiedIntervals.push(currentInterval)
return unifiedIntervals
}

export function addDomains(domain1: Domain, domain2: Domain): Domain {
const intervals = new Set<Interval>()
for(const interval1 of domain1.intervals) {
for(const interval2 of domain2.intervals) {
intervals.add(new Interval({
value: interval1.min.value + interval2.min.value,
inclusive: interval1.min.inclusive && interval2.min.inclusive
}, {
value: interval1.max.value + interval2.max.value,
inclusive: interval1.max.inclusive && interval2.max.inclusive
}))
}
}
return Domain.fromIntervals(intervals)
}

export function subtractDomains(domain1: Domain, domain2: Domain): Domain {
const intervals = new Set<Interval>()
for(const interval1 of domain1.intervals) {
for(const interval2 of domain2.intervals) {
intervals.add(new Interval({
value: interval1.min.value - interval2.max.value,
inclusive: interval1.min.inclusive && interval2.max.inclusive
}, {
value: interval1.max.value - interval2.min.value,
inclusive: interval1.max.inclusive && interval2.min.inclusive
}))
}
}
return Domain.fromIntervals(intervals)
}
42 changes: 42 additions & 0 deletions src/abstract-interpretation/handler/binop/binop.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import {Handler} from '../handler'
import {aiLogger, AINode} from '../../processor'
import {BinaryOperatorFlavor, ParentInformation, RBinaryOp} from '../../../r-bridge'
import {guard} from '../../../util/assert'
import {operators} from './operators'

export type BinOpOperators = {
[key in BinaryOperatorFlavor]: (lhs: AINode, rhs: AINode, node: RBinaryOp<ParentInformation>) => AINode
}

export class BinOp implements Handler<AINode> {
lhs: AINode | undefined
rhs: AINode | undefined

constructor(readonly node: RBinaryOp<ParentInformation>) {}

getName(): string {
return `Bin Op (${this.node.flavor})`
}

enter(): void {
aiLogger.trace(`Entered ${this.getName()}`)
}

exit(): AINode {
aiLogger.trace(`Exited ${this.getName()}`)
guard(this.lhs !== undefined, `No LHS found for assignment ${this.node.info.id}`)
guard(this.rhs !== undefined, `No RHS found for assignment ${this.node.info.id}`)
return operators[this.node.flavor](this.lhs, this.rhs, this.node)
}

next(node: AINode): void {
aiLogger.trace(`${this.getName()} received`)
if(this.lhs === undefined) {
this.lhs = node
} else if(this.rhs === undefined) {
this.rhs = node
} else {
guard(false, `BinOp ${this.node.info.id} already has both LHS and RHS`)
}
}
}
40 changes: 40 additions & 0 deletions src/abstract-interpretation/handler/binop/operators.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import {guard} from '../../../util/assert'
import {BinOpOperators} from './binop'
import {addDomains, subtractDomains} from '../../domain'

export const operators: BinOpOperators = {
'assignment': (lhs, rhs, node) => {
return {
id: lhs.id,
domain: rhs.domain,
astNode: node.lhs,
}
},
'arithmetic': (lhs, rhs, node) => {
switch(node.operator) {
case '+':
return {
id: lhs.id,
domain: addDomains(lhs.domain, rhs.domain),
astNode: node,
}
case '-':
return {
id: lhs.id,
domain: subtractDomains(lhs.domain, rhs.domain),
astNode: node,
}
default:
guard(false, `Unknown binary operator ${node.operator}`)
}
},
'logical': () => {
guard(false, 'Not implemented yet')
},
'model formula': () => {
guard(false, 'Not implemented yet')
},
'comparison': () => {
guard(false, 'Not implemented yet')
},
}
6 changes: 6 additions & 0 deletions src/abstract-interpretation/handler/handler.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
export interface Handler<ValueType> {
getName: () => string,
enter: () => void
exit: () => ValueType
next: (value: ValueType) => void
}
83 changes: 83 additions & 0 deletions src/abstract-interpretation/processor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import {DataflowInformation} from '../dataflow/internal/info'
import {NodeId, NormalizedAst, ParentInformation, RNodeWithParent, RType} from '../r-bridge'
import {CfgVertexType, extractCFG} from '../util/cfg/cfg'
import {visitCfg} from '../util/cfg/visitor'
import {guard} from '../util/assert'
import {DataflowGraphVertexInfo, EdgeType, OutgoingEdges} from '../dataflow'
import {Handler} from './handler/handler'
import {BinOp} from './handler/binop/binop'
import {Domain, unifyDomains} from './domain'
import {log} from '../util/log'

export const aiLogger = log.getSubLogger({name: 'abstract-interpretation'})

export interface AINode {
readonly id: NodeId
readonly domain: Domain
readonly astNode: RNodeWithParent<ParentInformation>
}

class Stack<ElementType> {
private backingStore: ElementType[] = []

size(): number { return this.backingStore.length }
peek(): ElementType | undefined { return this.backingStore[this.size() - 1] }
pop(): ElementType | undefined { return this.backingStore.pop() }
push(item: ElementType): ElementType {
this.backingStore.push(item)
return item
}
}

function getDomainOfDfgChild(node: NodeId, dfg: DataflowInformation, nodeMap: Map<NodeId, AINode>): Domain {
const dfgNode: [DataflowGraphVertexInfo, OutgoingEdges] | undefined = dfg.graph.get(node)
guard(dfgNode !== undefined, `No DFG-Node found with ID ${node}`)
const [_, children] = dfgNode
const ids = Array.from(children.entries())
.filter(([_, edge]) => edge.types.has(EdgeType.Reads))
.map(([id, _]) => id)
const domains: Domain[] = []
for(const id of ids) {
const domain = nodeMap.get(id)?.domain
guard(domain !== undefined, `No domain found for ID ${id}`)
domains.push(domain)
}
return unifyDomains(domains)
}

export function runAbstractInterpretation(ast: NormalizedAst, dfg: DataflowInformation): DataflowInformation {
const cfg = extractCFG(ast)
const operationStack = new Stack<Handler<AINode>>()
const nodeMap = new Map<NodeId, AINode>()
visitCfg(cfg, (node, _) => {
const astNode = ast.idMap.get(node.id)
if(astNode?.type === RType.BinaryOp) {
operationStack.push(new BinOp(astNode)).enter()
} else if(astNode?.type === RType.Symbol) {
operationStack.peek()?.next({
id: astNode.info.id,
domain: getDomainOfDfgChild(node.id, dfg, nodeMap),
astNode: astNode,
})
} else if(astNode?.type === RType.Number){
const num = astNode.content.num
operationStack.peek()?.next({
id: astNode.info.id,
domain: Domain.fromScalar(num),
astNode: astNode,
})
} else if(node.type === CfgVertexType.EndMarker) {
const operation = operationStack.pop()
if(operation === undefined) {
return
}
const operationResult = operation.exit()
guard(!nodeMap.has(operationResult.id), `Domain for ID ${operationResult.id} already exists`)
nodeMap.set(operationResult.id, operationResult)
operationStack.peek()?.next(operationResult)
} else {
aiLogger.warn(`Unknown node type ${node.type}`)
}
})
return dfg
}
2 changes: 2 additions & 0 deletions src/benchmark/slicer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ export class BenchmarkSlicer {
private loadedXml: string | undefined
private tokenMap: Record<string, string> | undefined
private dataflow: DataflowInformation | undefined
private ai: DataflowInformation | undefined
private normalizedAst: NormalizedAst | undefined
private totalStopwatch: IStoppableStopwatch
private finished = false
Expand Down Expand Up @@ -133,6 +134,7 @@ export class BenchmarkSlicer {
this.loadedXml = await this.measureCommonStep('parse', 'retrieve AST from R code')
this.normalizedAst = await this.measureCommonStep('normalize', 'normalize R AST')
this.dataflow = await this.measureCommonStep('dataflow', 'produce dataflow information')
this.ai = await this.measureCommonStep('ai', 'run abstract interpretation')

this.stepper.switchToSliceStage()

Expand Down
2 changes: 1 addition & 1 deletion src/benchmark/stats/stats.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { SingleSlicingCriterion, SlicingCriteria } from '../../slicing'
import { NodeId, RParseRequestFromFile, RParseRequestFromText } from '../../r-bridge'
import { ReconstructionResult } from '../../slicing'

export const CommonSlicerMeasurements = ['initialize R session', 'inject home path', 'ensure installation of xmlparsedata', 'retrieve token map', 'retrieve AST from R code', 'normalize R AST', 'produce dataflow information', 'close R session', 'total'] as const
export const CommonSlicerMeasurements = ['initialize R session', 'inject home path', 'ensure installation of xmlparsedata', 'retrieve token map', 'retrieve AST from R code', 'normalize R AST', 'produce dataflow information', 'run abstract interpretation', 'close R session', 'total'] as const
export type CommonSlicerMeasurements = typeof CommonSlicerMeasurements[number]

export const PerSliceMeasurements = ['static slicing', 'reconstruct code', 'total'] as const
Expand Down
2 changes: 1 addition & 1 deletion src/cli/repl/commands/cfg.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { ReplCommand } from './main'
import { SteppingSlicer } from '../../../core'
import { requestFromInput, RShell } from '../../../r-bridge'
import { extractCFG } from '../../../util/cfg'
import { extractCFG } from '../../../util/cfg/cfg'
import { cfgToMermaid, cfgToMermaidUrl } from '../../../util/mermaid'

async function controlflow(shell: RShell, remainingLine: string) {
Expand Down
Loading

2 comments on commit c8f1933

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"artificial" Benchmark Suite

Benchmark suite Current: c8f1933 Previous: 816c844 Ratio
Total per-file 1520.632932590909 ms (3718.0788752077087) 2967.1418025454545 ms (3618.949366683997) 0.51
Retrieve AST from R code 65.65571895454545 ms (127.99873381107686) 69.06575931818182 ms (145.0290465282857) 0.95
Normalize R AST 96.81986868181819 ms (156.3036505430814) 95.2519069090909 ms (153.1479590068876) 1.02
Produce dataflow information 66.28680963636363 ms (169.83045168427645) 64.6035405 ms (167.23909055295272) 1.03
Run abstract interpretation 0.035418181818181814 ms (0.009757366381058718)
Total per-slice 1.8768233145591395 ms (1.2464725091547884) 1.8720710902829922 ms (1.3532400635998951) 1.00
Static slicing 1.4073809977077163 ms (1.1578506956320742) 1.4122959211773074 ms (1.281855192577784) 1.00
Reconstruct code 0.45631723974807037 ms (0.22792186179502585) 0.442741380921788 ms (0.19367073211054742) 1.03
failed to reconstruct/re-parse 0 # 0 # NaN
times hit threshold 0 # 0 # NaN
reduction (characters) 0.7329390759026896 # 0.7329390759026896 # 1
reduction (normalized tokens) 0.720988345209971 # 0.720988345209971 # 1

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"social-science" Benchmark Suite

Benchmark suite Current: c8f1933 Previous: 816c844 Ratio
Total per-file 3732.80898666 ms (6186.511816833479) 5046.89271988 ms (6042.128928598865) 0.74
Retrieve AST from R code 75.62208516 ms (64.67354092309284) 73.85860644 ms (69.28452321715349) 1.02
Normalize R AST 114.76827184 ms (71.85682254089758) 114.01183648 ms (69.33746165288669) 1.01
Produce dataflow information 169.32506618 ms (290.9706107144802) 163.82888334 ms (279.6344141597903) 1.03
Run abstract interpretation 0.07139494 ms (0.032116960591195426)
Total per-slice 8.94063926542521 ms (14.743572336810447) 8.618077094909866 ms (14.411839324447698) 1.04
Static slicing 8.296802683364657 ms (14.626376535409552) 8.092352677712956 ms (14.275084427627904) 1.03
Reconstruct code 0.6338942891717891 ms (0.3109928307683894) 0.5170374517553479 ms (0.28554733887508554) 1.23
failed to reconstruct/re-parse 9 # 9 # 1
times hit threshold 967 # 967 # 1
reduction (characters) 0.898713819973478 # 0.898713819973478 # 1
reduction (normalized tokens) 0.8579790415512589 # 0.8579790415512589 # 1

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.