Skip to content

Commit

Permalink
[WIP] Feedback-driven prompting (plasma-umass#772)
Browse files Browse the repository at this point in the history
* Extended stacks with Python and C time %.

* Prompt assembly.

* Updated flamegraph functionality, added credits.

* Updated flamegraph functionality.

* Removed old prompt stuff.

* Minor mod for clarity.

* Removed numba.
  • Loading branch information
emeryberger authored Feb 18, 2024
1 parent 98210c7 commit 9b456c9
Show file tree
Hide file tree
Showing 5 changed files with 188 additions and 50 deletions.
179 changes: 144 additions & 35 deletions scalene/scalene-gui/scalene-gui.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,84 @@ function vsNavigate(filename, lineno) {
} catch {}
}

function generateScaleneOptimizedCodeRequest(
context,
sourceCode,
line,
recommendedLibraries = [],
includeGpuOptimizations = false,
) {
// Default high-performance libraries known for their efficiency
const defaultLibraries = [
"NumPy",
"Scikit-learn",
"Pandas",
"TensorFlow",
"PyTorch",
];
const highPerformanceLibraries = [
...new Set([...defaultLibraries, ...recommendedLibraries]),
];

let promptParts = [
"Optimize the following Python code to make it more efficient WITHOUT CHANGING ITS RESULTS.\n\n",
context.trim(),
"\n# Start of code\n",
sourceCode.trim(),
"\n# End of code\n\n",
"Rewrite the above Python code from 'Start of code' to 'End of code', aiming for clear and simple optimizations. ",
"Your output should consist only of valid Python code, with brief explanatory comments prefaced with #. ",
"Include a detailed explanatory comment before the code, starting with '# Proposed optimization:'. ",
"Leverage high-performance native libraries, especially those utilizing GPU, for significant performance improvements. ",
"Consider using the following other libraries, if appropriate:\n",
highPerformanceLibraries.map((e) => " import " + e).join("\n") + "\n",
"Eliminate as many for loops, while loops, and list or dict comprehensions as possible, replacing them with vectorized equivalents. ",
// "Consider GPU utilization, memory consumption, and copy volume when using GPU-accelerated libraries. ",
// "Low GPU utilization and high copy volume indicate inefficient use of such libraries. ",
"Quantify the expected speedup in terms of orders of magnitude if possible. ",
"Fix any errors in the optimized code. ",
// "Consider the peak amount of memory used per line and CPU utilization for targeted optimization. ",
// "Note on CPU utilization: Low utilization in libraries known for multi-threading/multi-processing indicates inefficiency.\n\n",
];

// Conditional inclusion of GPU optimizations
if (includeGpuOptimizations) {
promptParts.push(
"Use GPU-accelerated libraries whenever it would substantially increase performance. ",
);
}

// Performance Insights
promptParts.push(
"Consider the following insights gathered from the Scalene profiler for optimization:\n",
);
const total_cpu_percent = line.n_cpu_percent_python + line.n_cpu_percent_c + line.n_sys_percent;

promptParts.push(`- CPU time: percent spent in the Python interpreter: ${(100*line.n_cpu_percent_python/total_cpu_percent).toFixed(2)}%\n`);
promptParts.push(`- CPU time: percent spent executing native code: ${(100*line.n_cpu_percent_c/total_cpu_percent).toFixed(2)}%\n`);
promptParts.push(`- CPU time: percent of system time: ${(100*line.n_sys_percent/total_cpu_percent).toFixed(2)}%\n`);
// `- CPU utilization: ${performanceMetrics.cpu_utilization}. Low utilization with high-core count might indicate inefficient use of multi-threaded/multi-process libraries.\n`,
promptParts.push(`- Core utilization: ${(100*line.n_core_utilization/total_cpu_percent).toFixed(2)}%\n`);
// `- Peak memory per line: Focus on lines with high memory usage, specifically ${performanceMetrics.peak_memory_per_line}.\n`,
promptParts.push(`- Peak memory usage: ${line.n_peak_mb.toFixed(0)}MB (${(100 * line.n_python_fraction).toFixed(2)}% Python memory)\n`);
// `- Copy volume: ${performanceMetrics.copy_volume} MB. High volume indicates inefficient data handling with GPU libraries.\n`,
if (line.n_copy_mb_s > 1) {
promptParts.push(`- Megabytes copied per second by memcpy/strcpy: ${line.n_copy_mb_s.toFixed(2)}\n`);
}
if (includeGpuOptimizations) {
// ` - GPU utilization: ${performanceMetrics.gpu_utilization}%. Low utilization indicates potential inefficiencies in GPU-accelerated library use.\n`
promptParts.push(`- GPU percent utilization: ${(100 * line.n_gpu_percent).toFixed(2)}%\n`);
// ` - GPU memory usage: ${performanceMetrics.gpu_memory} MB. Optimize to reduce unnecessary GPU memory consumption.\n`
// TODO GPU memory
}
promptParts.push(`Optimized code:`);
return promptParts.join("");
}

const recommendedLibraries = ["Cython", "Dask"]; // Add any domain-specific libraries here

// const prompt = generateScaleneOptimizedCodeRequest(context, sourceCode, line, recommendedLibraries, true);

function extractPythonCodeBlock(markdown) {
// Pattern to match code blocks optionally tagged with "python"
// - ``` optionally followed by "python"
Expand Down Expand Up @@ -354,9 +432,22 @@ function countSpaces(str) {
return 0;
}

async function optimizeCode(imports, code, context) {
async function optimizeCode(imports, code, line, context) {
// Tailor prompt to request GPU optimizations or not.
const useGPUs = document.getElementById("use-gpu-checkbox").checked; // globalThis.profile.gpu;
const useGPUs = document.getElementById("use-gpu-checkbox").checked; // globalThis.profile.gpu;

let recommendedLibraries = ["sklearn"];
if (useGPUs) {
// Suggest cupy if we are using the GPU.
recommendedLibraries.push("cupy");
} else {
// Suggest numpy otherwise.
recommendedLibraries.push("numpy");
}
// TODO: remove anything already imported in imports

const bigPrompt = generateScaleneOptimizedCodeRequest(context, code, line, recommendedLibraries, useGPUs);

const useGPUstring = useGPUs ? " or the GPU " : " ";
// Check for a valid API key.
// TODO: Add checks for Amazon / local
Expand Down Expand Up @@ -411,10 +502,9 @@ async function optimizeCode(imports, code, context) {
prompt = memoryEfficiencyPrompt;
}

// const prompt = `Below is some Python code to optimize, from "Start of code" to "End of code":\n\n# Start of code\n\n${code}\n\n# End of code\n\nRewrite the above Python code to make it more efficient without changing the results. Assume the code has already executed these imports. Do NOT include them in the optimized code:\n\n${imports}\n\nUse fast native libraries if that would make it faster than pure Python. Your output should only consist of valid Python code. Output the resulting Python with brief explanations only included as comments prefaced with #. Include a detailed explanatory comment before the code, starting with the text "# Proposed optimization:". Make the code as clear and simple as possible, while also making it as fast and memory-efficient as possible. Use vectorized operations${useGPUstring}whenever it would substantially increase performance, and quantify the speedup in terms of orders of magnitude. If the performance is not likely to increase, leave the code unchanged. Check carefully by generating inputs to see that the output is identical for both the original and optimized versions. Correctly-optimized code:`;

// const prev_prompt = `Below is some Python code to optimize:\n\n${code}\n\nRewrite the above Python code to make it more efficient while keeping the same semantics. Use fast native libraries if that would make it faster than pure Python. Your output should only consist of valid Python code. Output only the resulting Python with brief explanations only included as comments prefaced with #. Include a detailed explanatory comment before the code, starting with the text "# Proposed optimization:". Make the code as clear and simple as possible, while also making it as fast and memory-efficient as possible. Use vectorized operations or the GPU whenever it would substantially increase performance, and try to quantify the speedup in terms of orders of magnitude. If the performance is not likely to increase, leave the code unchanged. Your output should only consist of legal Python code. Format all comments to be less than 40 columns wide:\n\n`;

// Just use big prompt maybe FIXME
prompt = bigPrompt;

// Use number of words in the original code as a proxy for the number of tokens.
const numWords = code.match(/\b\w+\b/g).length;

Expand All @@ -429,10 +519,11 @@ async function optimizeCode(imports, code, context) {
return extractCode(result);
}
case "local": {
console.log("Running " + document.getElementById("service-select").value);
console.log(optimizePerformancePrompt_ollama);
console.log("Running " + document.getElementById("service-select").value);
console.log(prompt);
// console.log(optimizePerformancePrompt_ollama);
const result = await sendPromptToOllama(
optimizePerformancePrompt_ollama,
prompt, // optimizePerformancePrompt_ollama,
Math.max(numWords * 4, 500),
document.getElementById("language-model-local").value,
document.getElementById("local-ip").value,
Expand All @@ -446,9 +537,9 @@ async function optimizeCode(imports, code, context) {
}
case "amazon": {
console.log("Running " + document.getElementById("service-select").value);
console.log(optimizePerformancePrompt_ollama);
console.log(prompt); // optimizePerformancePrompt_ollama);
const result = await sendPromptToAmazon(
optimizePerformancePrompt_ollama,
prompt, // optimizePerformancePrompt_ollama,
Math.max(numWords * 4, 500),
);
console.log(
Expand All @@ -459,24 +550,24 @@ async function optimizeCode(imports, code, context) {
}
}

function proposeOptimizationRegion(filename, file_number, lineno) {
proposeOptimization(filename, file_number, lineno, { regions: true });
function proposeOptimizationRegion(filename, file_number, line) {
proposeOptimization(filename, file_number, JSON.parse(decodeURIComponent(line)), { regions: true });
}

function proposeOptimizationLine(filename, file_number, lineno) {
proposeOptimization(filename, file_number, lineno, { regions: false });
function proposeOptimizationLine(filename, file_number, line) {
proposeOptimization(filename, file_number, JSON.parse(decodeURIComponent(line)), { regions: false });
}

function proposeOptimization(filename, file_number, lineno, params) {
function proposeOptimization(filename, file_number, line, params) {
filename = unescape(filename);
const useRegion = params["regions"];
const prof = globalThis.profile;
const this_file = prof.files[filename].lines;
const imports = prof.files[filename].imports.join("\n");
const start_region_line = this_file[lineno - 1]["start_region_line"];
const end_region_line = this_file[lineno - 1]["end_region_line"];
const start_region_line = this_file[line.lineno - 1]["start_region_line"];
const end_region_line = this_file[line.lineno - 1]["end_region_line"];
let context;
const code_line = this_file[lineno - 1]["line"];
const code_line = this_file[line.lineno - 1]["line"];
let code_region;
if (useRegion) {
code_region = this_file
Expand All @@ -493,15 +584,18 @@ function proposeOptimization(filename, file_number, lineno, params) {
} else {
code_region = code_line;
context = this_file
.slice(Math.max(0, lineno - 10), Math.min(lineno - 1, this_file.length))
.slice(
Math.max(0, line.lineno - 10),
Math.min(line.lineno - 1, this_file.length),
)
.map((e) => e["line"])
.join("");
}
// Count the number of leading spaces to match indentation level on output
let leadingSpaceCount = countSpaces(code_line) + 3; // including the lightning bolt and explosion
let indent =
WhiteLightning + WhiteExplosion + " ".repeat(leadingSpaceCount - 1);
const elt = document.getElementById(`code-${file_number}-${lineno}`);
const elt = document.getElementById(`code-${file_number}-${line.lineno}`);
(async () => {
// TODO: check Amazon credentials
const service = document.getElementById("service-select").value;
Expand Down Expand Up @@ -530,7 +624,7 @@ function proposeOptimization(filename, file_number, lineno, params) {
}
}
elt.innerHTML = `<em>${indent}working...</em>`;
let message = await optimizeCode(imports, code_region, context);
let message = await optimizeCode(imports, code_region, line, context);
if (!message) {
elt.innerHTML = "";
return;
Expand All @@ -546,8 +640,8 @@ function proposeOptimization(filename, file_number, lineno, params) {
)
.join("<br />");
// Display the proposed optimization, with click-to-copy functionality.
elt.innerHTML = `<hr><span title="click to copy" style="cursor: copy" id="opt-${file_number}-${lineno}">${formattedCode}</span>`;
thisElt = document.getElementById(`opt-${file_number}-${lineno}`);
elt.innerHTML = `<hr><span title="click to copy" style="cursor: copy" id="opt-${file_number}-${line.lineno}">${formattedCode}</span>`;
thisElt = document.getElementById(`opt-${file_number}-${line.lineno}`);
thisElt.addEventListener("click", async (e) => {
await copyOnClick(e, message);
// After copying, briefly change the cursor back to the default to provide some visual feedback..
Expand Down Expand Up @@ -1322,25 +1416,40 @@ function makeProfileLine(

const codeLine = Prism.highlight(line.line, Prism.languages.python, "python");
s += `<td style="height:10" align="left" bgcolor="whitesmoke" style="vertical-align: middle" data-sort="${line.lineno}">`;
if (propose_optimizations && showExplosion) {
s += `<span style="vertical-align: middle; cursor: pointer" title="Propose an optimization for the entire region starting here." onclick="proposeOptimizationRegion('${escape(
let newLine = structuredClone(line);
// TODO: verify that this isn't double counting anything
if (propose_optimizations && showExplosion) {
// Construct a new line corresponding to this region.
let mb_copied = 0;
for (let lineno = start_region_line; lineno < end_region_line; lineno++) {
currline = prof["files"][filename]["lines"][lineno];
mb_copied += currline.n_copy_mb * prof.elapsed_time_sec;
newLine.n_cpu_percent_python += currline.n_cpu_percent_python;
newLine.n_cpu_percent_c += currline.n_cpu_percent_c;
newLine.n_sys_percent += currline.n_sys_percent;
newLine.n_gpu_percent += currline.n_gpu_percent;
if (currline.n_peak_mb > newLine.n_peak_mb) {
newLine.n_peak_mb = currline.n_peak_mb;
newLine.n_python_fraction = currline.n_python_fraction;
}
// TODO:
// GPU memory
newLine.n_core_utilization += (currline.n_cpu_percent_python + currline.n_cpu_percent_c) * currline.n_core_utilization; // weigh by percentage
}
newLine.n_copy_mb_s = mb_copied / prof.elapsed_time_sec;
s += `<span style="vertical-align: middle; cursor: pointer" title="Propose an optimization for the entire region starting here." onclick="proposeOptimizationRegion('${escape(
filename,
)}', ${file_number}, ${parseInt(
line.lineno,
)}); event.preventDefault()">${regionOptimizationString}</span>`;
)}', ${file_number}, '${encodeURIComponent(JSON.stringify(newLine))}'); event.preventDefault()">${regionOptimizationString}</span>`;
} else {
s += regionOptimizationString;
}

const lineOptimizationString = propose_optimizations
? `${Lightning}`
: `${WhiteLightning}`;
if (propose_optimizations) {
s += `<span style="vertical-align: middle; cursor: pointer" title="Propose an optimization for this line." onclick="proposeOptimizationLine('${escape(
filename,
)}', ${file_number}, ${parseInt(
line.lineno,
)}); event.preventDefault()">${lineOptimizationString}</span>`;
if (propose_optimizations) {
s += `<span style="vertical-align: middle; cursor: pointer" title="Propose an optimization for this line." onclick="proposeOptimizationLine('${escape(filename)}', ${file_number}, '${encodeURIComponent(JSON.stringify(line))}'); event.preventDefault()">${lineOptimizationString}</span>`;
// s += `<span style="vertical-align: middle; cursor: pointer" title="Propose an optimization for this line." onclick="proposeOptimizationLine('${escape(filename,)}', ${file_number}, ${JSON.stringify(line)}); event.preventDefault()">${lineOptimizationString}</span>`;
} else {
s += lineOptimizationString;
}
Expand Down
8 changes: 8 additions & 0 deletions scalene/scalene_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,14 @@ def output_profiles(
if result:
program = Filename("[" + result.group(1) + "]")

# Process the stacks to normalize by total number of CPU samples.
for stk in stats.stacks.keys():
(count, python_time, c_time, cpu_samples) = stats.stacks[stk]
stats.stacks[stk] = (count,
python_time / stats.total_cpu_samples,
c_time / stats.total_cpu_samples,
cpu_samples / stats.total_cpu_samples)

# Convert stacks into a representation suitable for JSON dumping.
stks = []
for stk in stats.stacks.keys():
Expand Down
30 changes: 20 additions & 10 deletions scalene/scalene_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
See the paper "docs/osdi23-berger.pdf" in this repository for technical
details on Scalene's design.
by Emery Berger
https://emeryberger.com
by Emery Berger, Sam Stern, and Juan Altmayer Pizzorno
usage: scalene test/testme.py
usage help: scalene --help
Expand Down Expand Up @@ -804,7 +803,7 @@ def cpu_signal_handler(
def output_profile(program_args: Optional[List[str]] = None) -> bool:
"""Output the profile. Returns true iff there was any info reported the profile."""
# sourcery skip: inline-immediately-returned-variable
# print(Scalene.flamegraph_format(Scalene.__stats.stacks))
# print(flamegraph_format(Scalene.__stats.stacks))
if Scalene.__args.json:
json_output = Scalene.__json.output_profiles(
Scalene.__program_being_profiled,
Expand Down Expand Up @@ -1008,16 +1007,21 @@ def process_cpu_sample(

main_thread_frame = new_frames[0][0]

if Scalene.__args.stacks:
add_stack(
main_thread_frame, Scalene.should_trace, Scalene.__stats.stacks
)

average_python_time = python_time / total_frames
average_c_time = c_time / total_frames
average_gpu_time = gpu_time / total_frames
average_cpu_time = (python_time + c_time) / total_frames

if Scalene.__args.stacks:
add_stack(
main_thread_frame,
Scalene.should_trace,
Scalene.__stats.stacks,
average_python_time,
average_c_time,
average_cpu_time
)

# First, handle the main thread.
Scalene.enter_function_meta(main_thread_frame, Scalene.__stats)
fname = Filename(main_thread_frame.f_code.co_filename)
Expand All @@ -1044,7 +1048,12 @@ def process_cpu_sample(
for (frame, tident, orig_frame) in new_frames:
if frame == main_thread_frame:
continue
add_stack(frame, Scalene.should_trace, Scalene.__stats.stacks)
add_stack(frame,
Scalene.should_trace,
Scalene.__stats.stacks,
average_python_time,
average_c_time,
average_cpu_time)

# In a thread.
fname = Filename(frame.f_code.co_filename)
Expand Down Expand Up @@ -1494,8 +1503,9 @@ def should_trace(filename: Filename, func: str) -> bool:
if not Scalene.__args.profile_all:
for n in sysconfig.get_scheme_names():
for p in sysconfig.get_path_names():
the_path = sysconfig.get_path(p, n)
libdir = str(
pathlib.Path(sysconfig.get_path(p, n)).resolve()
pathlib.Path(the_path).resolve()
)
if libdir in resolved_filename:
return False
Expand Down
2 changes: 1 addition & 1 deletion scalene/scalene_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def __init__(self) -> None:
self.alloc_samples: int = 0

# full stacks taken during CPU samples, together with number of hits
self.stacks: Dict[Tuple[Any], int] = defaultdict(int)
self.stacks: Dict[Tuple[Any], Any] = defaultdict(None)

# CPU samples for each location in the program
# spent in the interpreter
Expand Down
Loading

0 comments on commit 9b456c9

Please sign in to comment.