-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
18 changed files
with
405 additions
and
256 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import type { StdioProcessor } from './repl/execute'; | ||
import { waitOnScript } from './repl/execute'; | ||
import { scripts } from './common/scripts-info'; | ||
import path from 'path'; | ||
|
||
/** | ||
* Path-safe helper of {@link waitOnScript} for other flowR scripts. | ||
* | ||
* @see waitOnScript | ||
*/ | ||
export async function runScript(name: keyof typeof scripts, args: readonly string[], io?: StdioProcessor, exitOnError = false): Promise<void> { | ||
return waitOnScript(path.resolve(__dirname,scripts[name].target), args, io, exitOnError); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
import path from 'path'; | ||
import type { Arguments } from '../../util/parallel'; | ||
import { LimitedThreadPool } from '../../util/parallel'; | ||
import { allRFilesFrom } from '../../util/files'; | ||
import { retrieveArchiveName, validateFeatures } from '../common/features'; | ||
import fs from 'fs'; | ||
import { initFileProvider } from '../../statistics/output/statistics-file'; | ||
import { jsonReplacer } from '../../util/json'; | ||
import { log } from '../../util/log'; | ||
import type { StatsCliOptions } from '../statistics-app'; | ||
import { getStatsForSingleFile } from './statistics-helper-core'; | ||
import commandLineArgs from 'command-line-args'; | ||
import { scripts } from '../common/scripts-info'; | ||
import type { StatsHelperCliOptions } from '../statistics-helper-app'; | ||
import { setFormatter, voidFormatter } from '../../util/ansi'; | ||
|
||
const testRegex = /[^/]*\/test/i; | ||
const exampleRegex = /[^/]*\/example/i; | ||
|
||
function getPrefixForFile(file: string) { | ||
if(testRegex.test(file)) { | ||
return 'test-'; | ||
} else if(exampleRegex.test(file)) { | ||
return 'example-'; | ||
} else { | ||
return ''; | ||
} | ||
} | ||
|
||
function getSuffixForFile(base: string, file: string) { | ||
const subpath = path.relative(base, file); | ||
return '--' + subpath.replace(/\//g, '/'); | ||
} | ||
|
||
async function collectFileArguments(options: StatsCliOptions, verboseAdd: readonly string[], dumpJson: readonly string[], features: readonly string[]) { | ||
const files: Arguments[] = []; | ||
let counter = 0; | ||
let presentSteps = 5000; | ||
let skipped = 0; | ||
for await (const f of allRFilesFrom(options.input)) { | ||
const outputDir = path.join(options['output-dir'], `${getPrefixForFile(f.content)}${getSuffixForFile(options.input.length === 1 ? options.input[0] : '', f.content)}`); | ||
const target = retrieveArchiveName(outputDir); | ||
if(fs.existsSync(target)) { | ||
console.log(`Archive ${target} exists. Skip.`); | ||
skipped++; | ||
continue; | ||
} | ||
files.push(['--input', f.content, '--output-dir', outputDir,'--compress', '--root-dir', options.input.length === 1 ? options.input[0] : '""', ...verboseAdd, ...features, ...dumpJson]); | ||
if(++counter % presentSteps === 0) { | ||
console.log(`Collected ${counter} files`); | ||
if(counter >= 10 * presentSteps) { | ||
presentSteps *= 5; | ||
} | ||
} | ||
} | ||
console.log(`Total: ${counter} files (${skipped} skipped with archive existing)`); | ||
return files; | ||
} | ||
|
||
export async function flowrScriptGetStats(options: StatsCliOptions) { | ||
if(options.input.length === 0) { | ||
console.error('No input files given. Nothing to do. See \'--help\' if this is an error.'); | ||
process.exit(0); | ||
} | ||
|
||
if(options['no-ansi']) { | ||
log.info('disabling ansi colors'); | ||
setFormatter(voidFormatter); | ||
} | ||
|
||
const processedFeatures = validateFeatures(options.features); | ||
initFileProvider(options['output-dir']); | ||
console.log(`Processing features: ${JSON.stringify(processedFeatures, jsonReplacer)}`); | ||
console.log(`Using ${options.parallel} parallel executors`); | ||
|
||
const verboseAdd = options.verbose ? ['--verbose'] : []; | ||
const features = [...processedFeatures].flatMap(s => ['--features', s]); | ||
const dumpJson = options['dump-json'] ? ['--dump-json'] : []; | ||
|
||
// we do not use the limit argument to be able to pick the limit randomly | ||
const args = await collectFileArguments(options, verboseAdd, dumpJson, features); | ||
|
||
if(options.limit) { | ||
console.log('Shuffle...'); | ||
log.info(`limiting to ${options.limit} files`); | ||
// shuffle and limit | ||
args.sort(() => Math.random() - 0.5); | ||
} | ||
console.log('Prepare Pool...'); | ||
|
||
const limit = options.limit ?? args.length; | ||
|
||
if(options.parallel > 0) { | ||
const pool = new LimitedThreadPool( | ||
`${__dirname}/statistics-helper-app`, | ||
args, | ||
limit, | ||
options.parallel | ||
); | ||
console.log('Run Pool...'); | ||
await pool.run(); | ||
const stats = pool.getStats(); | ||
console.log(`Processed ${stats.counter} files, skipped ${stats.skipped.length} files due to errors`); | ||
} else { | ||
console.log('Run Sequentially as parallel <= 0...'); | ||
for(const arg of args) { | ||
await getStatsForSingleFile(commandLineArgs(scripts['stats-helper'].options, { argv: arg }) as StatsHelperCliOptions); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
import { retrieveArchiveName } from '../common/features'; | ||
import fs from 'fs'; | ||
import type { FeatureKey } from '../../statistics/features/feature'; | ||
import { RShell } from '../../r-bridge/shell'; | ||
import { initFileProvider, statisticsFileProvider } from '../../statistics/output/statistics-file'; | ||
import { extractUsageStatistics, staticRequests } from '../../statistics/statistics'; | ||
import { extractCFG } from '../../util/cfg/cfg'; | ||
import { printStepResult, StepOutputFormat } from '../../core/print/print'; | ||
import { PARSE_WITH_R_SHELL_STEP } from '../../core/steps/all/core/00-parse'; | ||
import { NORMALIZE } from '../../core/steps/all/core/10-normalize'; | ||
import { STATIC_DATAFLOW } from '../../core/steps/all/core/20-dataflow'; | ||
import { jsonReplacer } from '../../util/json'; | ||
import { log } from '../../util/log'; | ||
import { guard } from '../../util/assert'; | ||
import { date2string } from '../../util/time'; | ||
import type { StatsHelperCliOptions } from '../statistics-helper-app'; | ||
import { create } from 'tar'; | ||
import { setFormatter, voidFormatter } from '../../util/ansi'; | ||
|
||
|
||
function compressFolder(folder: string, target: string) { | ||
// eslint-disable-next-line @typescript-eslint/no-unsafe-call,@typescript-eslint/no-unsafe-member-access | ||
return create({ | ||
gzip: true, | ||
file: target, | ||
portable: true, | ||
preservePaths: false | ||
}, [folder]).then(() => { | ||
// now, remove the folder | ||
fs.rmSync(folder, { recursive: true, force: true }); | ||
}, () => { | ||
console.log(`failed to compress ${folder}`); | ||
}); | ||
} | ||
|
||
|
||
export async function getStatsForSingleFile(options: StatsHelperCliOptions) { | ||
if(options['no-ansi']) { | ||
log.info('disabling ansi colors'); | ||
setFormatter(voidFormatter); | ||
} | ||
|
||
let target: string | undefined = undefined; | ||
if(options.compress) { | ||
target = retrieveArchiveName(options['output-dir']); | ||
if(fs.existsSync(target)) { | ||
console.log(`Archive ${target} exists. Skip.`); | ||
process.exit(0); | ||
} | ||
} | ||
|
||
// assume correct | ||
const processedFeatures = new Set<FeatureKey>(options.features as FeatureKey[]); | ||
|
||
const shell = new RShell(); | ||
|
||
initFileProvider(options['output-dir']); | ||
|
||
await shell.obtainTmpDir(); | ||
const stats = await extractUsageStatistics(shell, | ||
() => { /* do nothing */ }, | ||
processedFeatures, | ||
staticRequests({ request: 'file', content: options.input }), | ||
options['root-dir'] | ||
); | ||
// console.warn(`skipped ${stats.meta.failedRequests.length} requests due to errors (run with logs to get more info)`) | ||
|
||
if(stats.outputs.size === 1) { | ||
if(options['dump-json']) { | ||
const [, output] = [...stats.outputs.entries()][0]; | ||
const cfg = extractCFG(output.normalize); | ||
statisticsFileProvider.append('output-json', 'parse', await printStepResult(PARSE_WITH_R_SHELL_STEP, output.parse, StepOutputFormat.Json)); | ||
statisticsFileProvider.append('output-json', 'normalize', await printStepResult(NORMALIZE, output.normalize, StepOutputFormat.Json)); | ||
statisticsFileProvider.append('output-json', 'dataflow', await printStepResult(STATIC_DATAFLOW, output.dataflow, StepOutputFormat.Json)); | ||
statisticsFileProvider.append('output-json', 'cfg', JSON.stringify(cfg, jsonReplacer)); | ||
} | ||
|
||
statisticsFileProvider.append('meta', 'stats', JSON.stringify({ ...stats.meta, file: options.input }, jsonReplacer)); | ||
statisticsFileProvider.append('meta', 'features', JSON.stringify(stats.features, jsonReplacer)); | ||
} else { | ||
log.error(`expected exactly one output vs. ${stats.outputs.size}, got: ${JSON.stringify([...stats.outputs.keys()], jsonReplacer, 2)}`); | ||
} | ||
if(options.compress) { | ||
guard(target !== undefined, 'target must be defined given the compress option'); | ||
console.log(`[${date2string(new Date())}] Compressing ${options['output-dir']} to ${target}`); | ||
await compressFolder(options['output-dir'], target); | ||
} | ||
|
||
shell.close(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import type { SummarizerCliOptions } from '../summarizer-app'; | ||
import { StatisticsSummarizer } from '../../statistics/summarizer/summarizer'; | ||
import { BenchmarkSummarizer } from '../../benchmark/summarizer/summarizer'; | ||
import { detectSummarizationType } from '../../statistics/summarizer/auto-detect'; | ||
import { SummarizerType } from '../../util/summarizer'; | ||
import { allFeatureNames } from '../../statistics/features/feature'; | ||
|
||
|
||
function getBenchmarkSummarizer(options: SummarizerCliOptions, outputBase: string) { | ||
return new BenchmarkSummarizer({ | ||
graphOutputPath: options.graph ? `${outputBase}-graph.json` : undefined, | ||
inputPath: options.input, | ||
intermediateOutputPath: outputBase, | ||
outputPath: `${outputBase}-ultimate.json`, | ||
logger: console.log | ||
}); | ||
} | ||
|
||
function getStatisticsSummarizer(options: SummarizerCliOptions, outputBase: string) { | ||
return new StatisticsSummarizer({ | ||
inputPath: options.input, | ||
outputPath: `${outputBase}-final`, | ||
intermediateOutputPath: `${outputBase}-intermediate/`, | ||
projectSkip: options['project-skip'], | ||
featuresToUse: allFeatureNames, | ||
logger: console.log | ||
}); | ||
} | ||
|
||
|
||
async function retrieveSummarizer(options: SummarizerCliOptions, outputBase: string): Promise<StatisticsSummarizer | BenchmarkSummarizer> { | ||
const type = options.type === 'auto' ? await detectSummarizationType(options.input) : options.type; | ||
if(type === SummarizerType.Benchmark) { | ||
console.log('Summarizing benchmark'); | ||
return getBenchmarkSummarizer(options, outputBase); | ||
} else if(type === SummarizerType.Statistics) { | ||
console.log('Summarizing statistics'); | ||
return getStatisticsSummarizer(options, outputBase); | ||
} else { | ||
console.error('Unknown type', type, 'either give "benchmark" or "statistics"'); | ||
process.exit(1); | ||
} | ||
} | ||
|
||
export async function flowrScriptSummarizer(options: SummarizerCliOptions) { | ||
const outputBase = (options.output ?? options.input).replace(/\.json$|\/$/, '-summary'); | ||
console.log(`Writing outputs to base ${outputBase}`); | ||
|
||
const summarizer = await retrieveSummarizer(options, outputBase); | ||
|
||
if(!options['ultimate-only']) { | ||
await summarizer.preparationPhase(options.categorize); | ||
} | ||
|
||
await summarizer.summarizePhase(); | ||
} |
Oops, something went wrong.
3a4a3c9
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"artificial" Benchmark Suite
Retrieve AST from R code
233.51630863636362
ms (98.42728308061673
)238.12351204545453
ms (103.59720843756357
)0.98
Normalize R AST
19.09238813636364
ms (32.88451942624459
)19.968034227272728
ms (34.84298543847825
)0.96
Produce dataflow information
37.29713595454545
ms (79.6841893426286
)38.310942090909094
ms (82.04448044777155
)0.97
Total per-file
807.1687020454546
ms (1432.9006933100156
)811.1703915909092
ms (1431.4404310276739
)1.00
Static slicing
2.34738781930876
ms (1.6261119892174756
)2.258090287874194
ms (1.2792808105316449
)1.04
Reconstruct code
0.22065341321404836
ms (0.16889238590670527
)0.22489327849282828
ms (0.17585774592637268
)0.98
Total per-slice
2.5841764538418106
ms (1.6702238972102916
)2.4996261233332735
ms (1.3278746913052974
)1.03
failed to reconstruct/re-parse
0
#0
#1
times hit threshold
0
#0
#1
reduction (characters)
0.7869360165281424
#0.7869360165281424
#1
reduction (normalized tokens)
0.7639690077689504
#0.7639690077689504
#1
memory (df-graph)
147.42458274147728
KiB (358.6827375397903
)147.42458274147728
KiB (358.6827375397903
)1
This comment was automatically generated by workflow using github-action-benchmark.
3a4a3c9
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"social-science" Benchmark Suite
Retrieve AST from R code
233.44970384
ms (41.73554223159988
)238.40722376
ms (42.95412443307438
)0.98
Normalize R AST
21.90124094
ms (16.569524093502846
)22.0872248
ms (17.016890594916376
)0.99
Produce dataflow information
72.99715495999999
ms (85.8570988657697
)74.60461736
ms (88.95210983454488
)0.98
Total per-file
11064.453014379998
ms (52688.75402251567
)11091.201449639999
ms (52310.41942604725
)1.00
Static slicing
21.98562903373952
ms (78.82751436203577
)22.047137876062838
ms (78.30877993604865
)1.00
Reconstruct code
0.213731997402135
ms (0.14176452099017856
)0.2327517832436913
ms (0.14954480815603388
)0.92
Total per-slice
22.20681718216753
ms (78.84859290915547
)22.287796325154986
ms (78.33211951742135
)1.00
failed to reconstruct/re-parse
0
#0
#1
times hit threshold
0
#0
#1
reduction (characters)
0.8719618340615195
#0.8719618340615195
#1
reduction (normalized tokens)
0.810633662275233
#0.810633662275233
#1
memory (df-graph)
145.6434765625
KiB (153.49028997815503
)145.6434765625
KiB (153.49028997815503
)1
This comment was automatically generated by workflow using github-action-benchmark.