diff --git a/scalene/find_browser.py b/scalene/find_browser.py index dca9bff67..e8a5cdccc 100644 --- a/scalene/find_browser.py +++ b/scalene/find_browser.py @@ -1,18 +1,25 @@ import webbrowser from typing import Optional + def find_browser(browserClass: Optional[str] = None) -> Optional[str]: """Find the default system browser, excluding text browsers. - + If you want a specific browser, pass its class as an argument.""" text_browsers = [ - "browsh", "elinks", "links", "lynx", "w3m", + "browsh", + "elinks", + "links", + "lynx", + "w3m", ] try: # Get the default browser object browser = webbrowser.get(browserClass) - browser_name = browser.name if browser.name else browser.__class__.__name__ + browser_name = ( + browser.name if browser.name else browser.__class__.__name__ + ) return browser_name if browser_name not in text_browsers else None except AttributeError: # https://github.com/plasma-umass/scalene/issues/790 @@ -21,7 +28,7 @@ def find_browser(browserClass: Optional[str] = None) -> Optional[str]: # we need to refer to it as such to prevent this error: # 'MacOSXOSAScript' object has no attribute 'name' browser = webbrowser.get(browserClass) - return browser._name if browser._name not in text_browsers else None # type: ignore[attr-defined] + return browser._name if browser._name not in text_browsers else None # type: ignore[attr-defined] except webbrowser.Error: # Return None if there is an error in getting the browser return None diff --git a/scalene/get_module_details.py b/scalene/get_module_details.py index 76abfa079..7fbdbd25b 100644 --- a/scalene/get_module_details.py +++ b/scalene/get_module_details.py @@ -3,18 +3,10 @@ from importlib.abc import SourceLoader from importlib.machinery import ModuleSpec -from types import CodeType, FrameType +from types import CodeType from typing import ( - Any, - Callable, - Dict, - List, - Optional, - Set, Tuple, Type, - Union, - cast, ) diff --git a/scalene/launchbrowser.py b/scalene/launchbrowser.py index 5327f7e51..421f24bb0 100644 --- a/scalene/launchbrowser.py +++ b/scalene/launchbrowser.py @@ -4,7 +4,6 @@ import shutil import socket import socketserver -import subprocess import sys import tempfile import threading @@ -15,18 +14,23 @@ from jinja2 import Environment, FileSystemLoader from typing import Any, NewType + def read_file_content(directory: str, subdirectory: str, filename: str) -> str: file_path = os.path.join(directory, subdirectory, filename) return pathlib.Path(file_path).read_text() def launch_browser_insecure(url: str) -> None: - if platform.system() == 'Windows': - chrome_path = 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe' - elif platform.system() == 'Linux': - chrome_path = '/usr/bin/google-chrome' - elif platform.system() == 'Darwin': - chrome_path = '/Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome' + if platform.system() == "Windows": + chrome_path = ( + "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe" + ) + elif platform.system() == "Linux": + chrome_path = "/usr/bin/google-chrome" + elif platform.system() == "Darwin": + chrome_path = ( + "/Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome" + ) # Create a temporary directory with tempfile.TemporaryDirectory() as temp_dir: @@ -34,22 +38,27 @@ def launch_browser_insecure(url: str) -> None: chrome_cmd = f'{chrome_path} %s --disable-web-security --user-data-dir="{temp_dir}"' # Register the new browser type - webbrowser.register('chrome_with_flags', None, - webbrowser.Chrome(chrome_cmd), preferred=True) + webbrowser.register( + "chrome_with_flags", + None, + webbrowser.Chrome(chrome_cmd), + preferred=True, + ) # Open a URL using the new browser type webbrowser.get(chrome_cmd).open(url) -HOST = 'localhost' +HOST = "localhost" shutdown_requested = False last_heartbeat = time.time() server_running = True + class CustomHandler(http.server.SimpleHTTPRequestHandler): def do_GET(self) -> Any: global last_heartbeat - if self.path == '/heartbeat': + if self.path == "/heartbeat": last_heartbeat = time.time() self.send_response(200) self.end_headers() @@ -57,6 +66,7 @@ def do_GET(self) -> Any: else: return http.server.SimpleHTTPRequestHandler.do_GET(self) + def monitor_heartbeat() -> None: global server_running while server_running: @@ -66,15 +76,18 @@ def monitor_heartbeat() -> None: os._exit(0) time.sleep(1) + def serve_forever(httpd: Any) -> None: while server_running: httpd.handle_request() + def run_server(host: str, port: int) -> None: with socketserver.TCPServer((host, port), CustomHandler) as httpd: print(f"Serving at http://{host}:{port}") serve_forever(httpd) + def is_port_available(port: int) -> bool: """ Check if a given TCP port is available to start a server on the local machine. @@ -84,14 +97,16 @@ def is_port_available(port: int) -> bool: """ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: try: - s.bind(('localhost', port)) + s.bind(("localhost", port)) return True except socket.error: return False + Filename = NewType("Filename", str) LineNumber = NewType("LineNumber", int) - + + def generate_html(profile_fname: Filename, output_fname: Filename) -> None: """Apply a template to generate a single HTML payload containing the current profile.""" @@ -108,13 +123,23 @@ def generate_html(profile_fname: Filename, output_fname: Filename) -> None: scalene_dir = os.path.dirname(__file__) file_contents = { - 'scalene_gui_js_text': read_file_content(scalene_dir, "scalene-gui", "scalene-gui.js"), - 'prism_css_text': read_file_content(scalene_dir, "scalene-gui", "prism.css"), - 'prism_js_text': read_file_content(scalene_dir, "scalene-gui", "prism.js"), - 'tablesort_js_text': read_file_content(scalene_dir, "scalene-gui", "tablesort.js"), - 'tablesort_number_js_text': read_file_content(scalene_dir, "scalene-gui", "tablesort.number.js") + "scalene_gui_js_text": read_file_content( + scalene_dir, "scalene-gui", "scalene-gui.js" + ), + "prism_css_text": read_file_content( + scalene_dir, "scalene-gui", "prism.css" + ), + "prism_js_text": read_file_content( + scalene_dir, "scalene-gui", "prism.js" + ), + "tablesort_js_text": read_file_content( + scalene_dir, "scalene-gui", "tablesort.js" + ), + "tablesort_number_js_text": read_file_content( + scalene_dir, "scalene-gui", "tablesort.number.js" + ), } - + # Put the profile and everything else into the template. environment = Environment( loader=FileSystemLoader(os.path.join(scalene_dir, "scalene-gui")) @@ -126,11 +151,11 @@ def generate_html(profile_fname: Filename, output_fname: Filename) -> None: import scalene.scalene_config as scalene_config rendered_content = template.render( profile=profile, - gui_js=file_contents['scalene_gui_js_text'], - prism_css=file_contents['prism_css_text'], - prism_js=file_contents['prism_js_text'], - tablesort_js=file_contents['tablesort_js_text'], - tablesort_number_js=file_contents['tablesort_number_js_text'], + gui_js=file_contents["scalene_gui_js_text"], + prism_css=file_contents["prism_css_text"], + prism_js=file_contents["prism_js_text"], + tablesort_js=file_contents["tablesort_js_text"], + tablesort_number_js=file_contents["tablesort_number_js_text"], scalene_version=scalene_config.scalene_version, scalene_date=scalene_config.scalene_date, ) @@ -146,32 +171,33 @@ def generate_html(profile_fname: Filename, output_fname: Filename) -> None: def start(filename: str, port: int) -> None: while not is_port_available(port): port += 1 - + cwd = os.getcwd() if filename == "demo": generate_html(Filename("demo"), Filename("demo.html")) filename = "demo.html" - shutil.copy(filename, os.path.join(tempfile.gettempdir(), 'index.html')) + shutil.copy(filename, os.path.join(tempfile.gettempdir(), "index.html")) os.chdir(tempfile.gettempdir()) server_thread = threading.Thread(target=run_server, args=[HOST, port]) server_thread.start() threading.Thread(target=monitor_heartbeat).start() - webbrowser.open_new(f'http://{HOST}:{port}/') + webbrowser.open_new(f"http://{HOST}:{port}/") server_thread.join() os.chdir(cwd) - + # Optional: a delay to ensure all resources are released time.sleep(1) os._exit(0) # Forcefully stops the program -if __name__ == '__main__': + +if __name__ == "__main__": import sys + if len(sys.argv) > 2: filename = sys.argv[1] port = int(sys.argv[2]) start(filename, port) else: print("Need to supply filename and port arguments.") - diff --git a/scalene/redirect_python.py b/scalene/redirect_python.py index f1efeaf44..b51d0245b 100644 --- a/scalene/redirect_python.py +++ b/scalene/redirect_python.py @@ -1,11 +1,12 @@ import os import pathlib -import re import stat import sys -def redirect_python(preface: str, cmdline: str, python_alias_dir: pathlib.Path) -> str: +def redirect_python( + preface: str, cmdline: str, python_alias_dir: pathlib.Path +) -> str: """ Redirects Python calls to a different command with a preface and cmdline. @@ -51,5 +52,5 @@ def redirect_python(preface: str, cmdline: str, python_alias_dir: pathlib.Path) sys_executable_path = sys_executable_path.with_suffix(".bat") sys.executable = str(sys_executable_path) - + return orig_sys_executable diff --git a/scalene/replacement_mp_lock.py b/scalene/replacement_mp_lock.py index 2058b0fa8..b81836c04 100644 --- a/scalene/replacement_mp_lock.py +++ b/scalene/replacement_mp_lock.py @@ -1,7 +1,4 @@ import multiprocessing.synchronize -import sys -import threading -from typing import Any from scalene.scalene_profiler import Scalene diff --git a/scalene/replacement_sem_lock.py b/scalene/replacement_sem_lock.py index f3f2d877a..52102151e 100644 --- a/scalene/replacement_sem_lock.py +++ b/scalene/replacement_sem_lock.py @@ -1,13 +1,16 @@ -import multiprocessing +import multiprocessing.context +import multiprocessing.synchronize import random import sys import threading -from multiprocessing.synchronize import Lock from scalene.scalene_profiler import Scalene from typing import Any, Callable, Optional, Tuple + class ReplacementSemLock(multiprocessing.synchronize.Lock): - def __init__(self, ctx: Optional[multiprocessing.context.DefaultContext] = None) -> None: + def __init__( + self, ctx: Optional[multiprocessing.context.DefaultContext] = None + ) -> None: # Ensure to use the appropriate context while initializing if ctx is None: ctx = multiprocessing.get_context() diff --git a/scalene/replacement_signal_fns.py b/scalene/replacement_signal_fns.py index 7bc05ccf9..c7d0f2571 100644 --- a/scalene/replacement_signal_fns.py +++ b/scalene/replacement_signal_fns.py @@ -5,16 +5,17 @@ from scalene.scalene_profiler import Scalene from typing import Any, Tuple + @Scalene.shim def replacement_signal_fns(scalene: Scalene) -> None: scalene_signals = scalene.get_signals() expected_handlers_map = { - scalene_signals.malloc_signal: scalene.malloc_signal_handler, - scalene_signals.free_signal: scalene.free_signal_handler, - scalene_signals.memcpy_signal: scalene.memcpy_signal_handler, - signal.SIGTERM: scalene.term_signal_handler, - scalene_signals.cpu_signal: scalene.cpu_signal_handler, - } + scalene_signals.malloc_signal: scalene.malloc_signal_handler, + scalene_signals.free_signal: scalene.free_signal_handler, + scalene_signals.memcpy_signal: scalene.memcpy_signal_handler, + signal.SIGTERM: scalene.term_signal_handler, + scalene_signals.cpu_signal: scalene.cpu_signal_handler, + } old_signal = signal.signal if sys.version_info < (3, 8): @@ -61,11 +62,14 @@ def replacement_signal(signum: int, handler: Any) -> Any: # a NOP-like, then we can ignore it. It can't have been set already, and the expected return value is the # previous handler, so this behavior is reasonable if signum in all_signals and ( - handler is signal.SIG_IGN or handler is signal.SIG_DFL + handler is signal.SIG_IGN or handler is signal.SIG_DFL ): return handler # If trying to "reset" to a handler that we already set it to, ignore - if signal.Signals(signum) in expected_handlers_map and expected_handlers_map[signal.Signals(signum)] is handler: + if ( + signal.Signals(signum) in expected_handlers_map + and expected_handlers_map[signal.Signals(signum)] is handler + ): return signal.SIG_IGN if signum in all_signals: print( @@ -108,7 +112,9 @@ def replacement_siginterrupt(signum: int, flag: bool) -> None: ) return old_siginterrupt(signum, flag) - def replacement_setitimer(which: int, seconds: float, interval: float = 0.0) -> Tuple[float, float]: + def replacement_setitimer( + which: int, seconds: float, interval: float = 0.0 + ) -> Tuple[float, float]: timer_signal, cpu_signal = scalene.get_timer_signals() if which == timer_signal: old = scalene.client_timer.get_itimer() @@ -122,7 +128,7 @@ def replacement_setitimer(which: int, seconds: float, interval: float = 0.0) -> signal.setitimer = replacement_setitimer signal.siginterrupt = replacement_siginterrupt - signal.signal = replacement_signal # type: ignore + signal.signal = replacement_signal # type: ignore if sys.version_info >= (3, 8): signal.raise_signal = replacement_raise_signal os.kill = replacement_kill diff --git a/scalene/scalene-gui/example-profile.js b/scalene/scalene-gui/example-profile.js index 79b661549..731cd43d7 100644 --- a/scalene/scalene-gui/example-profile.js +++ b/scalene/scalene-gui/example-profile.js @@ -1,9 +1,9 @@ const example_profile = { elapsed_time_sec: 12.696948051452637, - memory: true, + memory: true, files: { - "./test/testme.py": { - imports: [], + "./test/testme.py": { + imports: [], functions: [ { line: "doit1", diff --git a/scalene/scalene-gui/prism.js b/scalene/scalene-gui/prism.js index 59158fc66..2eeec47fb 100644 --- a/scalene/scalene-gui/prism.js +++ b/scalene/scalene-gui/prism.js @@ -6,9 +6,9 @@ var _self = typeof window !== "undefined" ? window // if in browser : typeof WorkerGlobalScope !== "undefined" && - self instanceof WorkerGlobalScope - ? self // if in worker - : {}; // if in node js + self instanceof WorkerGlobalScope + ? self // if in worker + : {}; // if in node js /** * Prism: Lightweight, robust, elegant syntax highlighting @@ -169,12 +169,11 @@ var Prism = (function (_self) { clone = []; visited[id] = clone; - /** @type {Array} */ (/** @type {any} */ (o)).forEach(function ( - v, - i - ) { - clone[i] = deepClone(v, visited); - }); + /** @type {Array} */ (/** @type {any} */ (o)).forEach( + function (v, i) { + clone[i] = deepClone(v, visited); + }, + ); return /** @type {any} */ (clone); @@ -535,7 +534,7 @@ var Prism = (function (_self) { _.hooks.run("before-highlightall", env); env.elements = Array.prototype.slice.apply( - env.container.querySelectorAll(env.selector) + env.container.querySelectorAll(env.selector), ); _.hooks.run("before-all-elements-highlight", env); @@ -645,7 +644,7 @@ var Prism = (function (_self) { language: env.language, code: env.code, immediateClose: true, - }) + }), ); } else { insertHighlightedCode(_.highlight(env.code, env.grammar, env.language)); @@ -952,7 +951,7 @@ var Prism = (function (_self) { grammar, startNode, startPos, - rematch + rematch, ) { for (var token in grammar) { if (!grammar.hasOwnProperty(token) || !grammar[token]) { @@ -1076,7 +1075,7 @@ var Prism = (function (_self) { token, inside ? _.tokenize(matchStr, inside) : matchStr, alias, - matchStr + matchStr, ); currentNode = addAfter(tokenList, removeFrom, wrapped); @@ -1099,7 +1098,7 @@ var Prism = (function (_self) { grammar, currentNode.prev, pos, - nestedRematch + nestedRematch, ); // the reach might have been extended because of the rematching @@ -1212,7 +1211,7 @@ var Prism = (function (_self) { _self.close(); } }, - false + false, ); } @@ -1250,7 +1249,7 @@ var Prism = (function (_self) { ) { document.addEventListener( "DOMContentLoaded", - highlightAutomaticallyCallback + highlightAutomaticallyCallback, ); } else { if (window.requestAnimationFrame) { @@ -1449,9 +1448,9 @@ Object.defineProperty(Prism.languages.markup.tag, "addInlined", { /__/g, function () { return tagName; - } + }, ), - "i" + "i", ), lookbehind: true, greedy: true, @@ -1481,7 +1480,7 @@ Object.defineProperty(Prism.languages.markup.tag, "addAttribute", { attrName + ")" + /\s*=\s*(?:"[^"]*"|'[^']*'|[^\s'">=]+(?=[\s>]))/.source, - "i" + "i", ), lookbehind: true, inside: { @@ -1549,7 +1548,7 @@ Prism.languages.rss = Prism.languages.xml; "|" + /(?:[^\\\r\n()"']|\\[\s\S])*/.source + ")\\)", - "i" + "i", ), greedy: true, inside: { @@ -1565,7 +1564,7 @@ Prism.languages.rss = Prism.languages.xml; pattern: RegExp( "(^|[{}\\s])[^{}\\s](?:[^{};\"'\\s]|\\s+(?![\\s{])|" + string.source + - ")*(?=\\s*\\{)" + ")*(?=\\s*\\{)", ), lookbehind: true, }, @@ -1675,7 +1674,7 @@ Prism.languages.javascript = Prism.languages.extend("clike", { /(?:\d+(?:_\d+)*(?:\.(?:\d+(?:_\d+)*)?)?|\.\d+(?:_\d+)*)(?:[Ee][+-]?\d+(?:_\d+)*)?/ .source) + ")" + - /(?![\w$])/.source + /(?![\w$])/.source, ), lookbehind: true, }, @@ -1795,7 +1794,7 @@ if (Prism.languages.markup) { Prism.languages.markup.tag.addAttribute( /on(?:abort|blur|change|click|composition(?:end|start|update)|dblclick|error|focus(?:in|out)?|key(?:down|up)|load|mouse(?:down|enter|leave|move|out|over|up)|reset|resize|scroll|select|slotchange|submit|unload|wheel)/ .source, - "javascript" + "javascript", ); } @@ -1976,7 +1975,7 @@ Prism.languages.py = Prism.languages.python; indent: function (input, tabs) { return input.replace( /^[^\S\n\r]*(?=\S)/gm, - new Array(++tabs).join("\t") + "$&" + new Array(++tabs).join("\t") + "$&", ); }, breakLines: function (input, characters) { diff --git a/scalene/scalene-gui/scalene-gui.js b/scalene/scalene-gui/scalene-gui.js index a65cc7819..b288bd906 100644 --- a/scalene/scalene-gui/scalene-gui.js +++ b/scalene/scalene-gui/scalene-gui.js @@ -13,43 +13,43 @@ function vsNavigate(filename, lineno) { } function generateScaleneOptimizedCodeRequest( - context, - sourceCode, - line, + context, + sourceCode, + line, recommendedLibraries = [], includeGpuOptimizations = false, ) { // Default high-performance libraries known for their efficiency const defaultLibraries = [ - "NumPy", - "Scikit-learn", - "Pandas", - "TensorFlow", - "PyTorch", + "NumPy", + "Scikit-learn", + "Pandas", + "TensorFlow", + "PyTorch", ]; const highPerformanceLibraries = [ ...new Set([...defaultLibraries, ...recommendedLibraries]), ]; - + let promptParts = [ - "Optimize the following Python code to make it more efficient WITHOUT CHANGING ITS RESULTS.\n\n", - context.trim(), + "Optimize the following Python code to make it more efficient WITHOUT CHANGING ITS RESULTS.\n\n", + context.trim(), "\n# Start of code\n", sourceCode.trim(), "\n# End of code\n\n", "Rewrite the above Python code from 'Start of code' to 'End of code', aiming for clear and simple optimizations. ", "Your output should consist only of valid Python code, with brief explanatory comments prefaced with #. ", "Include a detailed explanatory comment before the code, starting with '# Proposed optimization:'. ", - "Leverage high-performance native libraries, especially those utilizing GPU, for significant performance improvements. ", - "Consider using the following other libraries, if appropriate:\n", - highPerformanceLibraries.map((e) => " import " + e).join("\n") + "\n", + "Leverage high-performance native libraries, especially those utilizing GPU, for significant performance improvements. ", + "Consider using the following other libraries, if appropriate:\n", + highPerformanceLibraries.map((e) => " import " + e).join("\n") + "\n", "Eliminate as many for loops, while loops, and list or dict comprehensions as possible, replacing them with vectorized equivalents. ", -// "Consider GPU utilization, memory consumption, and copy volume when using GPU-accelerated libraries. ", -// "Low GPU utilization and high copy volume indicate inefficient use of such libraries. ", + // "Consider GPU utilization, memory consumption, and copy volume when using GPU-accelerated libraries. ", + // "Low GPU utilization and high copy volume indicate inefficient use of such libraries. ", "Quantify the expected speedup in terms of orders of magnitude if possible. ", "Fix any errors in the optimized code. ", -// "Consider the peak amount of memory used per line and CPU utilization for targeted optimization. ", - // "Note on CPU utilization: Low utilization in libraries known for multi-threading/multi-processing indicates inefficiency.\n\n", + // "Consider the peak amount of memory used per line and CPU utilization for targeted optimization. ", + // "Note on CPU utilization: Low utilization in libraries known for multi-threading/multi-processing indicates inefficiency.\n\n", ]; // Conditional inclusion of GPU optimizations @@ -63,26 +63,41 @@ function generateScaleneOptimizedCodeRequest( promptParts.push( "Consider the following insights gathered from the Scalene profiler for optimization:\n", ); - const total_cpu_percent = line.n_cpu_percent_python + line.n_cpu_percent_c + line.n_sys_percent; - - promptParts.push(`- CPU time: percent spent in the Python interpreter: ${(100*line.n_cpu_percent_python/total_cpu_percent).toFixed(2)}%\n`); - promptParts.push(`- CPU time: percent spent executing native code: ${(100*line.n_cpu_percent_c/total_cpu_percent).toFixed(2)}%\n`); - promptParts.push(`- CPU time: percent of system time: ${(100*line.n_sys_percent/total_cpu_percent).toFixed(2)}%\n`); - // `- CPU utilization: ${performanceMetrics.cpu_utilization}. Low utilization with high-core count might indicate inefficient use of multi-threaded/multi-process libraries.\n`, - promptParts.push(`- Core utilization: ${(100*line.n_core_utilization/total_cpu_percent).toFixed(2)}%\n`); - // `- Peak memory per line: Focus on lines with high memory usage, specifically ${performanceMetrics.peak_memory_per_line}.\n`, - promptParts.push(`- Peak memory usage: ${line.n_peak_mb.toFixed(0)}MB (${(100 * line.n_python_fraction).toFixed(2)}% Python memory)\n`); - // `- Copy volume: ${performanceMetrics.copy_volume} MB. High volume indicates inefficient data handling with GPU libraries.\n`, - if (line.n_copy_mb_s > 1) { - promptParts.push(`- Megabytes copied per second by memcpy/strcpy: ${line.n_copy_mb_s.toFixed(2)}\n`); - } - if (includeGpuOptimizations) { - // ` - GPU utilization: ${performanceMetrics.gpu_utilization}%. Low utilization indicates potential inefficiencies in GPU-accelerated library use.\n` - promptParts.push(`- GPU percent utilization: ${(100 * line.n_gpu_percent).toFixed(2)}%\n`); - // ` - GPU memory usage: ${performanceMetrics.gpu_memory} MB. Optimize to reduce unnecessary GPU memory consumption.\n` - // TODO GPU memory - } - promptParts.push(`Optimized code:`); + const total_cpu_percent = + line.n_cpu_percent_python + line.n_cpu_percent_c + line.n_sys_percent; + + promptParts.push( + `- CPU time: percent spent in the Python interpreter: ${((100 * line.n_cpu_percent_python) / total_cpu_percent).toFixed(2)}%\n`, + ); + promptParts.push( + `- CPU time: percent spent executing native code: ${((100 * line.n_cpu_percent_c) / total_cpu_percent).toFixed(2)}%\n`, + ); + promptParts.push( + `- CPU time: percent of system time: ${((100 * line.n_sys_percent) / total_cpu_percent).toFixed(2)}%\n`, + ); + // `- CPU utilization: ${performanceMetrics.cpu_utilization}. Low utilization with high-core count might indicate inefficient use of multi-threaded/multi-process libraries.\n`, + promptParts.push( + `- Core utilization: ${((100 * line.n_core_utilization) / total_cpu_percent).toFixed(2)}%\n`, + ); + // `- Peak memory per line: Focus on lines with high memory usage, specifically ${performanceMetrics.peak_memory_per_line}.\n`, + promptParts.push( + `- Peak memory usage: ${line.n_peak_mb.toFixed(0)}MB (${(100 * line.n_python_fraction).toFixed(2)}% Python memory)\n`, + ); + // `- Copy volume: ${performanceMetrics.copy_volume} MB. High volume indicates inefficient data handling with GPU libraries.\n`, + if (line.n_copy_mb_s > 1) { + promptParts.push( + `- Megabytes copied per second by memcpy/strcpy: ${line.n_copy_mb_s.toFixed(2)}\n`, + ); + } + if (includeGpuOptimizations) { + // ` - GPU utilization: ${performanceMetrics.gpu_utilization}%. Low utilization indicates potential inefficiencies in GPU-accelerated library use.\n` + promptParts.push( + `- GPU percent utilization: ${(100 * line.n_gpu_percent).toFixed(2)}%\n`, + ); + // ` - GPU memory usage: ${performanceMetrics.gpu_memory} MB. Optimize to reduce unnecessary GPU memory consumption.\n` + // TODO GPU memory + } + promptParts.push(`Optimized code:`); return promptParts.join(""); } @@ -289,23 +304,23 @@ async function sendPromptToOpenAI(prompt, len, apiKey) { } } -async function sendPromptToAzureOpenAI(prompt, len, apiKey, apiUrl, aiModel){ +async function sendPromptToAzureOpenAI(prompt, len, apiKey, apiUrl, aiModel) { const apiVersion = document.getElementById("azure-api-model-version").value; const endpoint = `${apiUrl}/openai/deployments/${aiModel}/chat/completions?api-version=${apiVersion}`; const body = JSON.stringify({ - "messages": [ + messages: [ { - "role": "system", - "content": - "You are a Python programming assistant who ONLY responds with blocks of commented, optimized code. You never respond with text. Just code, starting with ``` and ending with ```." + role: "system", + content: + "You are a Python programming assistant who ONLY responds with blocks of commented, optimized code. You never respond with text. Just code, starting with ``` and ending with ```.", }, { - "role": "user", - "content": prompt, - } + role: "user", + content: prompt, + }, ], - "user": "scalene-user" + user: "scalene-user", }); console.log(body); @@ -373,7 +388,8 @@ async function sendPromptToAmazon(prompt, len) { var bedrockruntime = new AWS.BedrockRuntime(); bedrockruntime.invokeModel(body, function (err, data) { - if (err) console.log(err, err.stack); // an error occurred + if (err) + console.log(err, err.stack); // an error occurred else console.log(data); // successful response }); const response = await fetch(endpoint, { @@ -497,19 +513,25 @@ function countSpaces(str) { async function optimizeCode(imports, code, line, context) { // Tailor prompt to request GPU optimizations or not. - const useGPUs = document.getElementById("use-gpu-checkbox").checked; // globalThis.profile.gpu; + const useGPUs = document.getElementById("use-gpu-checkbox").checked; // globalThis.profile.gpu; - let recommendedLibraries = ["sklearn"]; - if (useGPUs) { - // Suggest cupy if we are using the GPU. - recommendedLibraries.push("cupy"); - } else { - // Suggest numpy otherwise. - recommendedLibraries.push("numpy"); - } - // TODO: remove anything already imported in imports + let recommendedLibraries = ["sklearn"]; + if (useGPUs) { + // Suggest cupy if we are using the GPU. + recommendedLibraries.push("cupy"); + } else { + // Suggest numpy otherwise. + recommendedLibraries.push("numpy"); + } + // TODO: remove anything already imported in imports - const bigPrompt = generateScaleneOptimizedCodeRequest(context, code, line, recommendedLibraries, useGPUs); + const bigPrompt = generateScaleneOptimizedCodeRequest( + context, + code, + line, + recommendedLibraries, + useGPUs, + ); const useGPUstring = useGPUs ? " or the GPU " : " "; // Check for a valid API key. @@ -518,16 +540,16 @@ async function optimizeCode(imports, code, line, context) { let aiService = document.getElementById("service-select").value; if (aiService === "openai") { apiKey = document.getElementById("api-key").value; - } else if (aiService === "azure-openai"){ + } else if (aiService === "azure-openai") { apiKey = document.getElementById("azure-api-key").value; } - if ((aiService === "openai" || aiService === "azure-openai") && (!apiKey)) { - alert( - "To activate proposed optimizations, enter an OpenAI API key in AI optimization options.", - ); - document.getElementById("ai-optimization-options").open = true; - return ""; + if ((aiService === "openai" || aiService === "azure-openai") && !apiKey) { + alert( + "To activate proposed optimizations, enter an OpenAI API key in AI optimization options.", + ); + document.getElementById("ai-optimization-options").open = true; + return ""; } // If the code to be optimized is just one line of code, say so. let lineOf = " "; @@ -569,9 +591,9 @@ async function optimizeCode(imports, code, line, context) { prompt = memoryEfficiencyPrompt; } - // Just use big prompt maybe FIXME - prompt = bigPrompt; - + // Just use big prompt maybe FIXME + prompt = bigPrompt; + // Use number of words in the original code as a proxy for the number of tokens. const numWords = code.match(/\b\w+\b/g).length; @@ -586,11 +608,11 @@ async function optimizeCode(imports, code, line, context) { return extractCode(result); } case "local": { - console.log("Running " + document.getElementById("service-select").value); - console.log(prompt); -// console.log(optimizePerformancePrompt_ollama); + console.log("Running " + document.getElementById("service-select").value); + console.log(prompt); + // console.log(optimizePerformancePrompt_ollama); const result = await sendPromptToOllama( - prompt, // optimizePerformancePrompt_ollama, + prompt, // optimizePerformancePrompt_ollama, Math.max(numWords * 4, 500), document.getElementById("language-model-local").value, document.getElementById("local-ip").value, @@ -604,9 +626,9 @@ async function optimizeCode(imports, code, line, context) { } case "amazon": { console.log("Running " + document.getElementById("service-select").value); - console.log(prompt); // optimizePerformancePrompt_ollama); + console.log(prompt); // optimizePerformancePrompt_ollama); const result = await sendPromptToAmazon( - prompt, // optimizePerformancePrompt_ollama, + prompt, // optimizePerformancePrompt_ollama, Math.max(numWords * 4, 500), ); console.log( @@ -615,22 +637,38 @@ async function optimizeCode(imports, code, line, context) { return ""; } case "azure-openai": { - console.log("Running "+ document.getElementById("service-select").value); + console.log("Running " + document.getElementById("service-select").value); console.log(prompt); let azureOpenAiEndpoint = document.getElementById("azure-api-url").value; let azureOpenAiModel = document.getElementById("azure-api-model").value; - const result = await sendPromptToAzureOpenAI(prompt, Math.max(numWords * 4, 500), apiKey, azureOpenAiEndpoint, azureOpenAiModel); + const result = await sendPromptToAzureOpenAI( + prompt, + Math.max(numWords * 4, 500), + apiKey, + azureOpenAiEndpoint, + azureOpenAiModel, + ); return extractCode(result); } } } function proposeOptimizationRegion(filename, file_number, line) { - proposeOptimization(filename, file_number, JSON.parse(decodeURIComponent(line)), { regions: true }); + proposeOptimization( + filename, + file_number, + JSON.parse(decodeURIComponent(line)), + { regions: true }, + ); } function proposeOptimizationLine(filename, file_number, line) { - proposeOptimization(filename, file_number, JSON.parse(decodeURIComponent(line)), { regions: false }); + proposeOptimization( + filename, + file_number, + JSON.parse(decodeURIComponent(line)), + { regions: false }, + ); } function proposeOptimization(filename, file_number, line, params) { @@ -699,7 +737,7 @@ function proposeOptimization(filename, file_number, line, params) { } } elt.innerHTML = `${indent}working...`; - let message = await optimizeCode(imports, code_region, line, context); + let message = await optimizeCode(imports, code_region, line, context); if (!message) { elt.innerHTML = ""; return; @@ -793,8 +831,8 @@ function makeBar(python, native, system, params) { python >= widthThreshold1 ? python.toFixed(0) + "%" : python >= widthThreshold2 - ? python.toFixed(0) - : "", + ? python.toFixed(0) + : "", q: python / 2, }, { @@ -805,8 +843,8 @@ function makeBar(python, native, system, params) { native >= widthThreshold1 ? native.toFixed(0) + "%" : native >= widthThreshold2 - ? native.toFixed(0) - : "", + ? native.toFixed(0) + : "", q: python + native / 2, }, { @@ -817,8 +855,8 @@ function makeBar(python, native, system, params) { system >= widthThreshold1 ? system.toFixed(0) + "%" : system >= widthThreshold2 - ? system.toFixed(0) - : "", + ? system.toFixed(0) + : "", q: python + native + system / 2, }, ], @@ -1491,28 +1529,30 @@ function makeProfileLine( const codeLine = Prism.highlight(line.line, Prism.languages.python, "python"); s += ``; - let newLine = structuredClone(line); - // TODO: verify that this isn't double counting anything - if (propose_optimizations && showExplosion) { - // Construct a new line corresponding to this region. - let mb_copied = 0; - for (let lineno = start_region_line; lineno < end_region_line; lineno++) { - currline = prof["files"][filename]["lines"][lineno]; - mb_copied += currline.n_copy_mb * prof.elapsed_time_sec; - newLine.n_cpu_percent_python += currline.n_cpu_percent_python; - newLine.n_cpu_percent_c += currline.n_cpu_percent_c; - newLine.n_sys_percent += currline.n_sys_percent; - newLine.n_gpu_percent += currline.n_gpu_percent; - if (currline.n_peak_mb > newLine.n_peak_mb) { - newLine.n_peak_mb = currline.n_peak_mb; - newLine.n_python_fraction = currline.n_python_fraction; - } - // TODO: - // GPU memory - newLine.n_core_utilization += (currline.n_cpu_percent_python + currline.n_cpu_percent_c) * currline.n_core_utilization; // weigh by percentage - } - newLine.n_copy_mb_s = mb_copied / prof.elapsed_time_sec; - s += ` newLine.n_peak_mb) { + newLine.n_peak_mb = currline.n_peak_mb; + newLine.n_python_fraction = currline.n_python_fraction; + } + // TODO: + // GPU memory + newLine.n_core_utilization += + (currline.n_cpu_percent_python + currline.n_cpu_percent_c) * + currline.n_core_utilization; // weigh by percentage + } + newLine.n_copy_mb_s = mb_copied / prof.elapsed_time_sec; + s += `${regionOptimizationString}`; } else { @@ -1522,9 +1562,9 @@ function makeProfileLine( const lineOptimizationString = propose_optimizations ? `${Lightning}` : `${WhiteLightning}`; - if (propose_optimizations) { - s += `${lineOptimizationString}`; - // s += `${lineOptimizationString}`; + if (propose_optimizations) { + s += `${lineOptimizationString}`; + // s += `${lineOptimizationString}`; } else { s += lineOptimizationString; } @@ -2008,7 +2048,7 @@ function toggleServiceFields() { service === "amazon" ? "block" : "none"; document.getElementById("local-fields").style.display = service === "local" ? "block" : "none"; - document.getElementById("azure-openai-fields").style.display = + document.getElementById("azure-openai-fields").style.display = service === "azure-openai" ? "block" : "none"; } diff --git a/scalene/scalene-gui/tablesort.number.js b/scalene/scalene-gui/tablesort.number.js index 4d3a2e880..37d29a761 100644 --- a/scalene/scalene-gui/tablesort.number.js +++ b/scalene/scalene-gui/tablesort.number.js @@ -26,6 +26,6 @@ b = cleanNumber(b); return compareNumber(b, a); - } + }, ); })(); diff --git a/scalene/scalene_analysis.py b/scalene/scalene_analysis.py index 37ea4738b..a8a3db5be 100644 --- a/scalene/scalene_analysis.py +++ b/scalene/scalene_analysis.py @@ -25,7 +25,9 @@ def is_native(package_name: str) -> bool: package_dir = os.path.dirname(package.__file__) for root, dirs, files in os.walk(package_dir): for filename in files: - if filename.endswith(".so") or filename.endswith(".pyd"): + if filename.endswith(".so") or filename.endswith( + ".pyd" + ): return True result = False except ImportError: @@ -141,7 +143,9 @@ def find_outermost_loop(src: str) -> Dict[int, Tuple[int, int]]: tree = ast.parse(src) regions = {} - def walk(node : ast.AST, current_outermost_region : Any, outer_class : Any) -> None: + def walk( + node: ast.AST, current_outermost_region: Any, outer_class: Any + ) -> None: nonlocal regions if isinstance( node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef) diff --git a/scalene/scalene_funcutils.py b/scalene/scalene_funcutils.py index 090b637d2..9832fdd11 100644 --- a/scalene/scalene_funcutils.py +++ b/scalene/scalene_funcutils.py @@ -1,5 +1,4 @@ import dis -import sys from functools import lru_cache from types import CodeType from typing import FrozenSet diff --git a/scalene/scalene_gpu.py b/scalene/scalene_gpu.py index 403c25147..bef7a00ba 100644 --- a/scalene/scalene_gpu.py +++ b/scalene/scalene_gpu.py @@ -38,11 +38,11 @@ def __del__(self) -> None: if self.has_gpu() and not self.__has_per_pid_accounting: print( "NOTE: The GPU is currently running in a mode that can reduce Scalene's accuracy when reporting GPU utilization.", - file=sys.stderr + file=sys.stderr, ) print( "Run once as Administrator or root (i.e., prefixed with `sudo`) to enable per-process GPU accounting.", - file=sys.stderr + file=sys.stderr, ) def _set_accounting_mode(self) -> bool: @@ -73,7 +73,8 @@ def _set_accounting_mode(self) -> bool: def gpu_utilization(self, pid: int) -> float: """Return overall GPU utilization by pid if possible. - Otherwise, returns aggregate utilization across all running processes.""" + Otherwise, returns aggregate utilization across all running processes. + """ if not self.has_gpu(): return 0 ngpus = self.__ngpus diff --git a/scalene/scalene_json.py b/scalene/scalene_json.py index 07d5264ff..738cbeeb6 100644 --- a/scalene/scalene_json.py +++ b/scalene/scalene_json.py @@ -1,5 +1,4 @@ import copy -import os import random import re import sys @@ -200,11 +199,11 @@ def output_profile_line( else: n_copy_mb_s = 0 - stats.per_line_footprint_samples[fname][ - line_no - ] = self.compress_samples( - stats.per_line_footprint_samples[fname][line_no], - stats.max_footprint, + stats.per_line_footprint_samples[fname][line_no] = ( + self.compress_samples( + stats.per_line_footprint_samples[fname][line_no], + stats.max_footprint, + ) ) return { @@ -289,11 +288,13 @@ def output_profiles( # Process the stacks to normalize by total number of CPU samples. for stk in stats.stacks.keys(): (count, python_time, c_time, cpu_samples) = stats.stacks[stk] - stats.stacks[stk] = (count, - python_time / stats.total_cpu_samples, - c_time / stats.total_cpu_samples, - cpu_samples / stats.total_cpu_samples) - + stats.stacks[stk] = ( + count, + python_time / stats.total_cpu_samples, + c_time / stats.total_cpu_samples, + cpu_samples / stats.total_cpu_samples, + ) + # Convert stacks into a representation suitable for JSON dumping. stks = [] for stk in stats.stacks.keys(): @@ -310,13 +311,13 @@ def output_profiles( "elapsed_time_sec": stats.elapsed_time, "growth_rate": growth_rate, "max_footprint_mb": stats.max_footprint, - "max_footprint_python_fraction" : stats.max_footprint_python_fraction, - "max_footprint_fname": stats.max_footprint_loc[0] - if stats.max_footprint_loc - else None, - "max_footprint_lineno": stats.max_footprint_loc[1] - if stats.max_footprint_loc - else None, + "max_footprint_python_fraction": stats.max_footprint_python_fraction, + "max_footprint_fname": ( + stats.max_footprint_loc[0] if stats.max_footprint_loc else None + ), + "max_footprint_lineno": ( + stats.max_footprint_loc[1] if stats.max_footprint_loc else None + ), "files": {}, "gpu": self.gpu, "memory": profile_memory, @@ -399,7 +400,7 @@ def output_profiles( reported_leaks = {} - for (leak_lineno, leak_likelihood, leak_velocity) in leaks: + for leak_lineno, leak_likelihood, leak_velocity in leaks: reported_leaks[str(leak_lineno)] = { "likelihood": leak_likelihood, "velocity_mb_s": leak_velocity / stats.elapsed_time, diff --git a/scalene/scalene_jupyter.py b/scalene/scalene_jupyter.py index a9fa7c67a..364a061e2 100644 --- a/scalene/scalene_jupyter.py +++ b/scalene/scalene_jupyter.py @@ -55,7 +55,7 @@ def do_GET(self) -> None: except FileNotFoundError: print("Scalene error: profile file not found.") elif self.path == "/shutdown": - self.server.should_shutdown = True # type: ignore + self.server.should_shutdown = True # type: ignore self.send_response(204) # self._send_response("Server is shutting down...") else: diff --git a/scalene/scalene_parseargs.py b/scalene/scalene_parseargs.py index 9d79a2cfa..1f6b7b907 100644 --- a/scalene/scalene_parseargs.py +++ b/scalene/scalene_parseargs.py @@ -37,8 +37,6 @@ def clean_exit(code: object = 0) -> NoReturn: """Replacement for sys.exit that exits cleanly from within Jupyter notebooks.""" raise StopJupyterExecution - - @staticmethod def parse_args() -> Tuple[argparse.Namespace, List[str]]: # In IPython, intercept exit cleanly (because sys.exit triggers a backtrace). @@ -167,7 +165,7 @@ def parse_args() -> Tuple[argparse.Namespace, List[str]]: action="store_const", const=True, default=False, - help=f"opens the Scalene web UI.", + help="opens the Scalene web UI.", ) parser.add_argument( "--reduced-profile", @@ -368,21 +366,28 @@ def parse_args() -> Tuple[argparse.Namespace, List[str]]: # Launch the UI if `--viewer` was selected. if args.viewer: - if browser := find_browser(): + if find_browser(): assert not args.no_browser dir = os.path.dirname(__file__) import scalene.scalene_config import subprocess - subprocess.Popen([sys.executable, - f"{dir}{os.sep}launchbrowser.py", - "demo", - str(scalene.scalene_config.SCALENE_PORT)], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL) + + subprocess.Popen( + [ + sys.executable, + f"{dir}{os.sep}launchbrowser.py", + "demo", + str(scalene.scalene_config.SCALENE_PORT), + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) sys.exit(0) pass else: - print(f"Scalene: could not open a browser.") # {scalene_gui_url}.") + print( + "Scalene: could not open a browser." + ) # {scalene_gui_url}.") sys.exit(0) # If any of the individual profiling metrics were specified, diff --git a/scalene/scalene_profiler.py b/scalene/scalene_profiler.py index 31cc7836d..767d35658 100644 --- a/scalene/scalene_profiler.py +++ b/scalene/scalene_profiler.py @@ -1,4 +1,6 @@ -from __future__ import annotations # work around Python 3.8 issue, see https://stackoverflow.com/a/68072481/335756 +from __future__ import ( + annotations, +) # work around Python 3.8 issue, see https://stackoverflow.com/a/68072481/335756 """Scalene: a CPU+memory+GPU (and more) profiler for Python. @@ -27,7 +29,6 @@ import contextlib import functools import gc -import importlib import inspect import json import math @@ -38,7 +39,6 @@ import queue import re import signal -import stat import subprocess import sys import sysconfig @@ -46,7 +46,6 @@ import threading import time import traceback -import webbrowser # For debugging purposes from rich.console import Console @@ -57,10 +56,7 @@ from scalene.redirect_python import redirect_python from collections import defaultdict -from dataclasses import dataclass -from importlib.abc import SourceLoader -from importlib.machinery import ModuleSpec -from types import CodeType, FrameType +from types import FrameType from typing import ( Any, Callable, @@ -70,7 +66,6 @@ Optional, Set, Tuple, - Type, Union, cast, ) @@ -98,13 +93,12 @@ on_stack, ) -if sys.platform != "win32": - import resource - from scalene.scalene_parseargs import ScaleneParseArgs, StopJupyterExecution from scalene.scalene_sigqueue import ScaleneSigQueue console = Console(style="white on blue") + + # Assigning to `nada` disables any console.log commands. def nada(*args: Any) -> None: pass @@ -115,6 +109,7 @@ def nada(*args: Any) -> None: MINIMUM_PYTHON_VERSION_MAJOR = 3 MINIMUM_PYTHON_VERSION_MINOR = 8 + def require_python(version: Tuple[int, int]) -> None: assert ( sys.version_info >= version @@ -127,6 +122,7 @@ def require_python(version: Tuple[int, int]) -> None: # particular, Linux, Mac OS X, and WSL 2 (Windows Subsystem for Linux 2 = Ubuntu). # It also has partial support for Windows. + # Install our profile decorator. def scalene_redirect_profile(func: Any) -> Any: """Handle @profile decorators. @@ -140,6 +136,7 @@ def scalene_redirect_profile(func: Any) -> Any: builtins.profile = scalene_redirect_profile # type: ignore + def start() -> None: """Start profiling.""" Scalene.start() @@ -170,7 +167,8 @@ class Scalene: __parent_pid = -1 __initialized: bool = False __last_profiled = [Filename("NADA"), LineNumber(0), ByteCodeIndex(0)] - __orig_python = sys.executable # will be rewritten later + __orig_python = sys.executable # will be rewritten later + @staticmethod def last_profiled_tuple() -> Tuple[Filename, LineNumber, ByteCodeIndex]: """Helper function to type last profiled information.""" @@ -183,9 +181,9 @@ def last_profiled_tuple() -> Tuple[Filename, LineNumber, ByteCodeIndex]: __profile_filename = Filename("profile.json") __profiler_html = Filename("profile.html") __error_message = "Error in program being profiled" - __windows_queue: queue.Queue[ - Any - ] = queue.Queue() # only used for Windows timer logic + __windows_queue: queue.Queue[Any] = ( + queue.Queue() + ) # only used for Windows timer logic BYTES_PER_MB = 1024 * 1024 MALLOC_ACTION = "M" @@ -209,7 +207,7 @@ def last_profiled_tuple() -> Tuple[Filename, LineNumber, ByteCodeIndex]: __stats = ScaleneStatistics() __output = ScaleneOutput() __json = ScaleneJSON() - __gpu = None # initialized after parsing arguments in `main` + __gpu = None # initialized after parsing arguments in `main` __invalidate_queue: List[Tuple[Filename, LineNumber]] = [] __invalidate_mutex: threading.Lock __profiler_base: str @@ -218,9 +216,11 @@ def last_profiled_tuple() -> Tuple[Filename, LineNumber, ByteCodeIndex]: def get_original_lock() -> threading.Lock: """Return the true lock, which we shim in replacement_lock.py.""" return Scalene.__original_lock() + @staticmethod def get_signals() -> ScaleneSignals: return Scalene.__signals + # when did we last receive a signal? __last_signal_time = TimeInfo() @@ -250,9 +250,9 @@ def get_signals() -> ScaleneSignals: bool ) # False by default - child_pids: Set[ - int - ] = set() # Needs to be unmangled to be accessed by shims + child_pids: Set[int] = ( + set() + ) # Needs to be unmangled to be accessed by shims # Signal queues for allocations and memcpy __alloc_sigq: ScaleneSigQueue[Any] @@ -278,6 +278,7 @@ def get_all_signals_set() -> Set[int]: Used by replacement_signal_fns.py to shim signals used by the client program. """ return set(Scalene.__signals.get_all_signals()) + @staticmethod def get_lifecycle_signals() -> Tuple[signal.Signals, signal.Signals]: return Scalene.__signals.get_lifecycle_signals() @@ -285,6 +286,7 @@ def get_lifecycle_signals() -> Tuple[signal.Signals, signal.Signals]: @staticmethod def disable_lifecycle() -> None: Scalene.__lifecycle_disabled = True + @staticmethod def get_lifecycle_disabled() -> bool: return Scalene.__lifecycle_disabled @@ -362,7 +364,7 @@ def invalidate_lines_python( Scalene.__last_profiled = [ Filename("NADA"), LineNumber(0), - ByteCodeIndex(0) + ByteCodeIndex(0), # Filename(ff), # LineNumber(fl), # ByteCodeIndex(frame.f_lasti), @@ -649,7 +651,8 @@ def __init__( if sys.platform == "win32": if arguments.memory: print( - f"Scalene warning: Memory profiling is not currently supported for Windows.", file=sys.stderr + "Scalene warning: Memory profiling is not currently supported for Windows.", + file=sys.stderr, ) arguments.memory = False @@ -714,7 +717,9 @@ def __init__( "=".join((k, str(v))) for (k, v) in environ.items() ) - Scalene.__orig_python = redirect_python(preface, cmdline, Scalene.__python_alias_dir) + Scalene.__orig_python = redirect_python( + preface, cmdline, Scalene.__python_alias_dir + ) # Register the exit handler to run when the program terminates or we quit. atexit.register(Scalene.exit_handler) @@ -817,7 +822,7 @@ def cpu_signal_handler( def output_profile(program_args: Optional[List[str]] = None) -> bool: """Output the profile. Returns true iff there was any info reported the profile.""" # sourcery skip: inline-immediately-returned-variable - # print(flamegraph_format(Scalene.__stats.stacks)) + # print(flamegraph_format(Scalene.__stats.stacks)) if Scalene.__args.json: json_output = Scalene.__json.output_profiles( Scalene.__program_being_profiled, @@ -1033,15 +1038,15 @@ def process_cpu_sample( Scalene.__stats.stacks, average_python_time, average_c_time, - average_cpu_time + average_cpu_time, ) # First, handle the main thread. Scalene.enter_function_meta(main_thread_frame, Scalene.__stats) fname = Filename(main_thread_frame.f_code.co_filename) lineno = LineNumber(main_thread_frame.f_lineno) - #print(main_thread_frame) - #print(fname, lineno) + # print(main_thread_frame) + # print(fname, lineno) main_tid = cast(int, threading.main_thread().ident) if not is_thread_sleeping[main_tid]: Scalene.__stats.cpu_samples_python[fname][ @@ -1059,15 +1064,17 @@ def process_cpu_sample( Scalene.__stats.gpu_mem_samples[fname][lineno].push(gpu_mem_used) # Now handle the rest of the threads. - for (frame, tident, orig_frame) in new_frames: + for frame, tident, orig_frame in new_frames: if frame == main_thread_frame: continue - add_stack(frame, - Scalene.should_trace, - Scalene.__stats.stacks, - average_python_time, - average_c_time, - average_cpu_time) + add_stack( + frame, + Scalene.should_trace, + Scalene.__stats.stacks, + average_python_time, + average_c_time, + average_cpu_time, + ) # In a thread. fname = Filename(frame.f_code.co_filename) @@ -1135,7 +1142,7 @@ def compute_frames_to_record() -> List[Tuple[FrameType, int, FrameType]]: ) # Process all the frames to remove ones we aren't going to track. new_frames: List[Tuple[FrameType, int, FrameType]] = [] - for (frame, tident) in frames: + for frame, tident in frames: orig_frame = frame if not frame: continue @@ -1328,7 +1335,10 @@ def alloc_sigqueue_processor(x: Optional[List[int]]) -> None: ) = item is_malloc = action == Scalene.MALLOC_ACTION - if is_malloc and count == scalene.scalene_config.NEWLINE_TRIGGER_LENGTH + 1: + if ( + is_malloc + and count == scalene.scalene_config.NEWLINE_TRIGGER_LENGTH + 1 + ): with Scalene.__invalidate_mutex: last_file, last_line = Scalene.__invalidate_queue.pop(0) @@ -1359,9 +1369,9 @@ def alloc_sigqueue_processor(x: Optional[List[int]]) -> None: stats.memory_current_footprint[fname][lineno] > stats.memory_current_highwater_mark[fname][lineno] ): - stats.memory_current_highwater_mark[fname][ - lineno - ] = stats.memory_current_footprint[fname][lineno] + stats.memory_current_highwater_mark[fname][lineno] = ( + stats.memory_current_footprint[fname][lineno] + ) stats.memory_current_highwater_mark[fname][lineno] = max( stats.memory_current_highwater_mark[fname][lineno], stats.memory_current_footprint[fname][lineno], @@ -1518,9 +1528,7 @@ def should_trace(filename: Filename, func: str) -> bool: for n in sysconfig.get_scheme_names(): for p in sysconfig.get_path_names(): the_path = sysconfig.get_path(p, n) - libdir = str( - pathlib.Path(the_path).resolve() - ) + libdir = str(pathlib.Path(the_path).resolve()) if libdir in resolved_filename: return False @@ -1577,7 +1585,7 @@ def start() -> None: "ERROR: Do not try to invoke `start` if you have not called Scalene using one of the methods\n" "in https://github.com/plasma-umass/scalene#using-scalene\n" "(The most likely issue is that you need to run your code with `scalene`, not `python`).", - file=sys.stderr + file=sys.stderr, ) sys.exit(1) Scalene.__stats.start_clock() @@ -1587,8 +1595,8 @@ def start() -> None: if Scalene.__args.memory: from scalene import pywhere # type: ignore - pywhere.set_scalene_done_false() + pywhere.set_scalene_done_false() @staticmethod def stop() -> None: @@ -1596,9 +1604,8 @@ def stop() -> None: Scalene.__done = True if Scalene.__args.memory: from scalene import pywhere # type: ignore - pywhere.set_scalene_done_true() - + pywhere.set_scalene_done_true() Scalene.disable_signals() Scalene.__stats.stop_clock() @@ -1731,7 +1738,7 @@ def profile_code( exec(code, the_globals, the_locals) except SystemExit as se: # Intercept sys.exit and propagate the error code. - exit_status = se.code if type(se.code) == int else 1 + exit_status = se.code if type(se.code) is int else 1 except KeyboardInterrupt: # Cleanly handle keyboard interrupts (quits execution and dumps the profile). print("Scalene execution interrupted.", file=sys.stderr) @@ -1757,7 +1764,7 @@ def profile_code( if not did_output: print( "Scalene: The specified code did not run for long enough to profile.", - file=sys.stderr + file=sys.stderr, ) # Print out hints to explain why the above message may have been printed. if not Scalene.__args.profile_all: @@ -1767,30 +1774,35 @@ def profile_code( # default). print( "By default, Scalene only profiles code in the file executed and its subdirectories.", - file=sys.stderr - ) + file=sys.stderr, + ) print( "To track the time spent in all files, use the `--profile-all` option.", - file=sys.stderr + file=sys.stderr, ) - elif Scalene.__args.profile_only or Scalene.__args.profile_exclude: + elif ( + Scalene.__args.profile_only + or Scalene.__args.profile_exclude + ): # if --profile-only or --profile-exclude were # specified, suggest that the patterns might be # excluding too many files. Collecting the # previously filtered out files could allow # suggested fixes (as in, remove foo because it # matches too many files). - print("The patterns used in `--profile-only` or `--profile-exclude` may be filtering out too many files.", - file=sys.stderr - ) + print( + "The patterns used in `--profile-only` or `--profile-exclude` may be filtering out too many files.", + file=sys.stderr, + ) else: # if none of the above cases hold, indicate that # Scalene can only profile code that runs for at # least one second or allocates some threshold # amount of memory. - print("Scalene can only profile code that runs for at least one second or allocates at least 10MB.", - file=sys.stderr - ) + print( + "Scalene can only profile code that runs for at least one second or allocates at least 10MB.", + file=sys.stderr, + ) if not ( did_output and Scalene.__args.web @@ -1801,17 +1813,21 @@ def profile_code( generate_html( profile_fname=Scalene.__profile_filename, - output_fname=Scalene.__args.outfile - if Scalene.__args.outfile - else Scalene.__profiler_html, + output_fname=( + Scalene.__args.outfile + if Scalene.__args.outfile + else Scalene.__profiler_html + ), ) if Scalene.in_jupyter(): from scalene.scalene_jupyter import ScaleneJupyter port = ScaleneJupyter.find_available_port(8181, 9000) if not port: - print("Scalene error: could not find an available port.", - file=sys.stderr) + print( + "Scalene error: could not find an available port.", + file=sys.stderr, + ) else: ScaleneJupyter.display_profile( port, Scalene.__profiler_html @@ -1830,16 +1846,17 @@ def profile_code( ) if Scalene.__pid == 0: # Only open a browser tab for the parent. - # url = f"file:///{output_fname}" - # webbrowser.open(url) - # show_browser(output_fname, SCALENE_PORT, Scalene.__orig_python) dir = os.path.dirname(__file__) - subprocess.Popen([Scalene.__orig_python, - f"{dir}{os.sep}launchbrowser.py", - output_fname, - str(scalene.scalene_config.SCALENE_PORT)], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL) + subprocess.Popen( + [ + Scalene.__orig_python, + f"{dir}{os.sep}launchbrowser.py", + output_fname, + str(scalene.scalene_config.SCALENE_PORT), + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) # Restore them. os.environ.update( { @@ -1885,7 +1902,9 @@ def main() -> None: # Try to profile a GPU if one is found and `--gpu` is selected / it's the default (see ScaleneArguments). if args.gpu: if platform.system() == "Darwin": - from scalene.scalene_apple_gpu import ScaleneAppleGPU as ScaleneGPU + from scalene.scalene_apple_gpu import ( + ScaleneAppleGPU as ScaleneGPU, + ) else: from scalene.scalene_gpu import ScaleneGPU # type: ignore Scalene.__gpu = ScaleneGPU() @@ -1910,8 +1929,7 @@ def register_files_to_profile() -> None: Scalene.__program_path, Scalene.__args.profile_all, ) - - + @staticmethod def run_profiler( args: argparse.Namespace, left: List[str], is_jupyter: bool = False @@ -1924,7 +1942,7 @@ def run_profiler( print( "ERROR: Do not try to manually invoke `run_profiler`.\n" "To invoke Scalene programmatically, see the usage noted in https://github.com/plasma-umass/scalene#using-scalene", - file=sys.stderr + file=sys.stderr, ) sys.exit(1) if sys.platform != "win32": @@ -1949,14 +1967,17 @@ def run_profiler( # If running in the background, print the PID. if os.getpgrp() != os.tcgetpgrp(sys.stdout.fileno()): # In the background. - print(f"Scalene now profiling process {os.getpid()}", file=sys.stderr) + print( + f"Scalene now profiling process {os.getpid()}", + file=sys.stderr, + ) print( f" to disable profiling: python3 -m scalene.profile --off --pid {os.getpid()}", - file=sys.stderr + file=sys.stderr, ) print( f" to resume profiling: python3 -m scalene.profile --on --pid {os.getpid()}", - file=sys.stderr + file=sys.stderr, ) Scalene.__stats.clear_all() sys.argv = left @@ -2082,17 +2103,23 @@ def run_profiler( print(traceback.format_exc(), file=sys.stderr) except (FileNotFoundError, IOError): if progs: - print(f"Scalene: could not find input file {prog_name}", file=sys.stderr) + print( + f"Scalene: could not find input file {prog_name}", + file=sys.stderr, + ) else: print("Scalene: no input file specified.", file=sys.stderr) sys.exit(1) except SystemExit as e: - exit_status = e.code if type(e.code) == int else 1 + exit_status = e.code if type(e.code) is int else 1 except StopJupyterExecution: pass except Exception: - print("Scalene failed to initialize.\n" + traceback.format_exc(), file=sys.stderr) + print( + "Scalene failed to initialize.\n" + traceback.format_exc(), + file=sys.stderr, + ) sys.exit(1) finally: with contextlib.suppress(Exception): diff --git a/scalene/scalene_signals.py b/scalene/scalene_signals.py index 68d858181..af31d667f 100644 --- a/scalene/scalene_signals.py +++ b/scalene/scalene_signals.py @@ -57,8 +57,10 @@ def get_timer_signals(self) -> Tuple[int, signal.Signals]: Returns 2-tuple of the integers representing the CPU timer signal and the CPU signal. """ return self.cpu_timer_signal, self.cpu_signal + def get_lifecycle_signals(self) -> Tuple[signal.Signals, signal.Signals]: return (self.start_profiling_signal, self.stop_profiling_signal) + def get_all_signals(self) -> List[int]: """ Return all the signals used for controlling profiling, except the CPU timer. diff --git a/scalene/scalene_statistics.py b/scalene/scalene_statistics.py index cd3bd5984..350401084 100644 --- a/scalene/scalene_statistics.py +++ b/scalene/scalene_statistics.py @@ -46,20 +46,20 @@ def __init__(self) -> None: # CPU samples for each location in the program # spent in the interpreter - self.cpu_samples_python: Dict[ - Filename, Dict[LineNumber, float] - ] = defaultdict(lambda: defaultdict(float)) + self.cpu_samples_python: Dict[Filename, Dict[LineNumber, float]] = ( + defaultdict(lambda: defaultdict(float)) + ) # CPU samples for each location in the program # spent in C / libraries / system calls - self.cpu_samples_c: Dict[ - Filename, Dict[LineNumber, float] - ] = defaultdict(lambda: defaultdict(float)) + self.cpu_samples_c: Dict[Filename, Dict[LineNumber, float]] = ( + defaultdict(lambda: defaultdict(float)) + ) # GPU samples for each location in the program - self.gpu_samples: Dict[ - Filename, Dict[LineNumber, float] - ] = defaultdict(lambda: defaultdict(float)) + self.gpu_samples: Dict[Filename, Dict[LineNumber, float]] = ( + defaultdict(lambda: defaultdict(float)) + ) # GPU memory samples for each location in the program self.gpu_mem_samples: DefaultDict[ @@ -83,14 +83,14 @@ def __init__(self) -> None: self.malloc_samples: Dict[Filename, float] = defaultdict(float) # malloc samples for each location in the program - self.memory_malloc_samples: Dict[ - Filename, Dict[LineNumber, float] - ] = defaultdict(lambda: defaultdict(float)) + self.memory_malloc_samples: Dict[Filename, Dict[LineNumber, float]] = ( + defaultdict(lambda: defaultdict(float)) + ) # number of times samples were added for the above - self.memory_malloc_count: Dict[ - Filename, Dict[LineNumber, int] - ] = defaultdict(lambda: defaultdict(int)) + self.memory_malloc_count: Dict[Filename, Dict[LineNumber, int]] = ( + defaultdict(lambda: defaultdict(int)) + ) # the current footprint for this line self.memory_current_footprint: Dict[ @@ -98,9 +98,9 @@ def __init__(self) -> None: ] = defaultdict(lambda: defaultdict(float)) # the max footprint for this line - self.memory_max_footprint: Dict[ - Filename, Dict[LineNumber, float] - ] = defaultdict(lambda: defaultdict(float)) + self.memory_max_footprint: Dict[Filename, Dict[LineNumber, float]] = ( + defaultdict(lambda: defaultdict(float)) + ) # the current high watermark for this line self.memory_current_highwater_mark: Dict[ @@ -120,29 +120,29 @@ def __init__(self) -> None: ) # mallocs attributable to Python, for each location in the program - self.memory_python_samples: Dict[ - Filename, Dict[LineNumber, float] - ] = defaultdict(lambda: defaultdict(float)) + self.memory_python_samples: Dict[Filename, Dict[LineNumber, float]] = ( + defaultdict(lambda: defaultdict(float)) + ) # free samples for each location in the program - self.memory_free_samples: Dict[ - Filename, Dict[LineNumber, float] - ] = defaultdict(lambda: defaultdict(float)) + self.memory_free_samples: Dict[Filename, Dict[LineNumber, float]] = ( + defaultdict(lambda: defaultdict(float)) + ) # number of times samples were added for the above - self.memory_free_count: Dict[ - Filename, Dict[LineNumber, int] - ] = defaultdict(lambda: defaultdict(int)) + self.memory_free_count: Dict[Filename, Dict[LineNumber, int]] = ( + defaultdict(lambda: defaultdict(int)) + ) # memcpy samples for each location in the program - self.memcpy_samples: Dict[ - Filename, Dict[LineNumber, int] - ] = defaultdict(lambda: defaultdict(int)) + self.memcpy_samples: Dict[Filename, Dict[LineNumber, int]] = ( + defaultdict(lambda: defaultdict(int)) + ) # leak score tracking - self.leak_score: Dict[ - Filename, Dict[LineNumber, Tuple[int, int]] - ] = defaultdict(lambda: defaultdict(lambda: ((0, 0)))) + self.leak_score: Dict[Filename, Dict[LineNumber, Tuple[int, int]]] = ( + defaultdict(lambda: defaultdict(lambda: ((0, 0)))) + ) self.allocation_velocity: Tuple[float, float] = (0.0, 0.0) @@ -163,7 +163,7 @@ def __init__(self) -> None: # the peak memory footprint self.max_footprint: float = 0.0 - self.max_footprint_python_fraction : float = 0 + self.max_footprint_python_fraction: float = 0 self.max_footprint_loc: Optional[Tuple[Filename, LineNumber]] = None # memory footprint samples (time, footprint) self.memory_footprint_samples: List[List[float]] = [] @@ -181,9 +181,9 @@ def __init__(self) -> None: # maps filenames and line numbers to functions (collected at runtime) # [filename][lineno] -> function name - self.function_map: Dict[ - Filename, Dict[LineNumber, Filename] - ] = defaultdict(lambda: defaultdict(lambda: Filename(""))) + self.function_map: Dict[Filename, Dict[LineNumber, Filename]] = ( + defaultdict(lambda: defaultdict(lambda: Filename(""))) + ) self.firstline_map: Dict[Filename, LineNumber] = defaultdict( lambda: LineNumber(1) ) diff --git a/scalene/scalene_utility.py b/scalene/scalene_utility.py index 88f67da81..3dbc4f66b 100644 --- a/scalene/scalene_utility.py +++ b/scalene/scalene_utility.py @@ -1,5 +1,4 @@ import http.server -import inspect import os import pathlib import sys @@ -11,19 +10,20 @@ from jinja2 import Environment, FileSystemLoader -from types import CodeType, FrameType +from types import FrameType from typing import Any, Callable, Dict, List, Optional, Tuple, cast from scalene.scalene_statistics import Filename, LineNumber from scalene.scalene_config import scalene_version, scalene_date + def add_stack( frame: FrameType, should_trace: Callable[[Filename, str], bool], stacks: Dict[Any, Any], python_time: float, c_time: float, - cpu_samples: float + cpu_samples: float, ) -> None: """Add one to the stack starting from this frame.""" stk: List[Tuple[str, str, int]] = list() @@ -35,11 +35,15 @@ def add_stack( if tuple(stk) not in stacks: stacks[tuple(stk)] = (1, python_time, c_time, cpu_samples) else: - (prev_count, prev_python_time, prev_c_time, prev_cpu_samples) = stacks[tuple(stk)] - stacks[tuple(stk)] = (prev_count + 1, - prev_python_time + python_time, - prev_c_time + c_time, - prev_cpu_samples + cpu_samples) + (prev_count, prev_python_time, prev_c_time, prev_cpu_samples) = stacks[ + tuple(stk) + ] + stacks[tuple(stk)] = ( + prev_count + 1, + prev_python_time + python_time, + prev_c_time + c_time, + prev_cpu_samples + cpu_samples, + ) # stacks[tuple(stk)] += 1 @@ -102,7 +106,9 @@ def flamegraph_format(stacks: Dict[Tuple[Any], Any]) -> str: def generate_html(profile_fname: Filename, output_fname: Filename) -> None: """Apply a template to generate a single HTML payload containing the current profile.""" - def read_file_content(directory: str, subdirectory: str, filename: str) -> str: + def read_file_content( + directory: str, subdirectory: str, filename: str + ) -> str: file_path = os.path.join(directory, subdirectory, filename) return pathlib.Path(file_path).read_text() @@ -119,13 +125,23 @@ def read_file_content(directory: str, subdirectory: str, filename: str) -> str: scalene_dir = os.path.dirname(__file__) file_contents = { - 'scalene_gui_js_text': read_file_content(scalene_dir, "scalene-gui", "scalene-gui.js"), - 'prism_css_text': read_file_content(scalene_dir, "scalene-gui", "prism.css"), - 'prism_js_text': read_file_content(scalene_dir, "scalene-gui", "prism.js"), - 'tablesort_js_text': read_file_content(scalene_dir, "scalene-gui", "tablesort.js"), - 'tablesort_number_js_text': read_file_content(scalene_dir, "scalene-gui", "tablesort.number.js") + "scalene_gui_js_text": read_file_content( + scalene_dir, "scalene-gui", "scalene-gui.js" + ), + "prism_css_text": read_file_content( + scalene_dir, "scalene-gui", "prism.css" + ), + "prism_js_text": read_file_content( + scalene_dir, "scalene-gui", "prism.js" + ), + "tablesort_js_text": read_file_content( + scalene_dir, "scalene-gui", "tablesort.js" + ), + "tablesort_number_js_text": read_file_content( + scalene_dir, "scalene-gui", "tablesort.number.js" + ), } - + # Put the profile and everything else into the template. environment = Environment( loader=FileSystemLoader(os.path.join(scalene_dir, "scalene-gui")) @@ -133,11 +149,11 @@ def read_file_content(directory: str, subdirectory: str, filename: str) -> str: template = environment.get_template("index.html.template") rendered_content = template.render( profile=profile, - gui_js=file_contents['scalene_gui_js_text'], - prism_css=file_contents['prism_css_text'], - prism_js=file_contents['prism_js_text'], - tablesort_js=file_contents['tablesort_js_text'], - tablesort_number_js=file_contents['tablesort_number_js_text'], + gui_js=file_contents["scalene_gui_js_text"], + prism_css=file_contents["prism_css_text"], + prism_js=file_contents["prism_js_text"], + tablesort_js=file_contents["tablesort_js_text"], + tablesort_number_js=file_contents["tablesort_number_js_text"], scalene_version=scalene_version, scalene_date=scalene_date, ) @@ -155,36 +171,35 @@ def start_server(port: int, directory: str) -> None: handler = http.server.SimpleHTTPRequestHandler with socketserver.TCPServer(("", port), handler) as httpd: os.chdir(directory) - # print(f"Serving at port {port}") httpd.serve_forever() - except OSError as e: + except OSError: # print(f"Port {port} is already in use. Please try a different port.") pass -def show_browser(file_path: str, port: int, orig_python : str ='python3') -> None: + +def show_browser( + file_path: str, port: int, orig_python: str = "python3" +) -> None: temp_dir = tempfile.gettempdir() # Copy file to the temporary directory - shutil.copy(file_path, os.path.join(temp_dir, 'index.html')) + shutil.copy(file_path, os.path.join(temp_dir, "index.html")) # Open web browser in a new subprocess - url = f'http://localhost:{port}/' curr_dir = os.getcwd() try: os.chdir(temp_dir) - # subprocess.Popen([orig_python, '-m', 'http.server', f"{port}"], - subprocess.Popen([orig_python, os.path.join(os.path.dirname(__file__), 'launchbrowser.py'), file_path, f"{port}"]) - #stdout=subprocess.DEVNULL, - #stderr=subprocess.DEVNULL) - # Start server in a new thread - #server_thread = Thread(target=start_server, args=(port, temp_dir)) - #server_thread.daemon = True - #server_thread.start() - + subprocess.Popen( + [ + orig_python, + os.path.join(os.path.dirname(__file__), "launchbrowser.py"), + file_path, + f"{port}", + ] + ) # Open web browser to local server - webbrowser.open(f'http://localhost:{port}/') + webbrowser.open(f"http://localhost:{port}/") except: pass finally: os.chdir(curr_dir) - diff --git a/src/include/sampleheap.hpp b/src/include/sampleheap.hpp index b64d4aeea..662526c26 100644 --- a/src/include/sampleheap.hpp +++ b/src/include/sampleheap.hpp @@ -20,7 +20,7 @@ // We're unable to use the limited API because, for example, // there doesn't seem to be a function returning co_filename -//#define Py_LIMITED_API 0x03070000 +// #define Py_LIMITED_API 0x03070000 #include "common.hpp" #include "mallocrecursionguard.hpp" diff --git a/src/include/traceconfig.hpp b/src/include/traceconfig.hpp index 8a4503389..5310bb8a6 100644 --- a/src/include/traceconfig.hpp +++ b/src/include/traceconfig.hpp @@ -4,6 +4,7 @@ #define __TRACECONFIG_H #include + #include #include #include @@ -34,14 +35,14 @@ class TraceConfig { // Defensive programming. return false; } - + auto res = _memoize.find(filename); - if ( res != _memoize.end()) { + if (res != _memoize.end()) { return res->second; } - // Return false if filename contains paths corresponding to the native Python libraries. - // This is to avoid profiling the Python interpreter itself. - // Also exclude site-packages and any IPython files. + // Return false if filename contains paths corresponding to the native + // Python libraries. This is to avoid profiling the Python interpreter + // itself. Also exclude site-packages and any IPython files. #if defined(_WIN32) // If on Windows, use \\ as the path separator. @@ -52,25 +53,30 @@ class TraceConfig { #endif if (!profile_all) { + auto python_lib = + std::string("lib") + std::string(PATH_SEP) + std::string("python"); + auto scalene_lib = std::string("scalene") + std::string(PATH_SEP) + + std::string("scalene"); + auto anaconda_lib = + std::string("anaconda3") + std::string(PATH_SEP) + std::string("lib"); - auto python_lib = std::string("lib") + std::string(PATH_SEP) + std::string("python"); - auto scalene_lib = std::string("scalene") + std::string(PATH_SEP) + std::string("scalene"); - auto anaconda_lib = std::string("anaconda3") + std::string(PATH_SEP) + std::string("lib"); - if (strstr(filename, python_lib.c_str()) || - strstr(filename, scalene_lib.c_str()) || - strstr(filename, anaconda_lib.c_str()) || - // strstr(filename, "site-packages") != nullptr || - (strstr(filename, "<") && (strstr(filename, "(std::string(filename), false)); - return false; + strstr(filename, scalene_lib.c_str()) || + strstr(filename, anaconda_lib.c_str()) || + // strstr(filename, "site-packages") != nullptr || + (strstr(filename, "<") && + (strstr(filename, "(std::string(filename), false)); + return false; } } - + if (owner != nullptr) { for (char* traceable : items) { if (strstr(filename, traceable)) { - _memoize.insert(std::pair(std::string(filename), true)); + _memoize.insert( + std::pair(std::string(filename), true)); return true; } } @@ -97,7 +103,8 @@ class TraceConfig { // Now change back to the original current working directory. chdir(oldcwd); - _memoize.insert(std::pair(std::string(filename), result)); + _memoize.insert( + std::pair(std::string(filename), result)); return result; } diff --git a/src/source/libscalene.cpp b/src/source/libscalene.cpp index 0930b5f14..8ca10f2b7 100644 --- a/src/source/libscalene.cpp +++ b/src/source/libscalene.cpp @@ -29,7 +29,7 @@ using BaseHeap = HL::OneHeap; extern "C" void _putchar(char ch) { ::write(1, (void *)&ch, 1); } constexpr uint64_t DefaultAllocationSamplingRate = - 1 * 10485767ULL; // was 1 * 1549351ULL; + 1 * 10485767ULL; // was 1 * 1549351ULL; constexpr uint64_t MemcpySamplingRate = DefaultAllocationSamplingRate * 7; /** @@ -130,7 +130,7 @@ class MakeLocalAllocator { /// @brief the actual allocator we use to satisfy object allocations PyMemAllocatorEx localAlloc; - static inline PyMemAllocatorEx* get_original_allocator() { + static inline PyMemAllocatorEx *get_original_allocator() { // poor man's "static inline" member static PyMemAllocatorEx original_allocator; return &original_allocator; @@ -153,11 +153,13 @@ class MakeLocalAllocator { #if USE_HEADERS void *buf = nullptr; const auto allocSize = len + sizeof(ScaleneHeader); - buf = get_original_allocator()->malloc(get_original_allocator()->ctx, allocSize); + buf = get_original_allocator()->malloc(get_original_allocator()->ctx, + allocSize); auto *header = new (buf) ScaleneHeader(len); class Nada {}; #else - auto *header = (ScaleneHeader *)get_original_allocator()->malloc(get_original_allocator()->ctx, len); + auto *header = (ScaleneHeader *)get_original_allocator()->malloc( + get_original_allocator()->ctx, len); #endif assert(header); // We expect this to always succeed. if (!m.wasInMalloc()) { @@ -190,7 +192,8 @@ class MakeLocalAllocator { if (!m.wasInMalloc()) { TheHeapWrapper::register_free(sz, ptr); } - get_original_allocator()->free(get_original_allocator()->ctx, ScaleneHeader::getHeader(ptr)); + get_original_allocator()->free(get_original_allocator()->ctx, + ScaleneHeader::getHeader(ptr)); } } @@ -205,8 +208,9 @@ class MakeLocalAllocator { const auto sz = ScaleneHeader::getSize(ptr); void *p = nullptr; const auto allocSize = new_size + sizeof(ScaleneHeader); - void *buf = get_original_allocator()->realloc( - get_original_allocator()->ctx, ScaleneHeader::getHeader(ptr), allocSize); + void *buf = get_original_allocator()->realloc(get_original_allocator()->ctx, + ScaleneHeader::getHeader(ptr), + allocSize); ScaleneHeader *result = new (buf) ScaleneHeader(new_size); if (result && !m.wasInMalloc()) { if (sz < new_size) { @@ -235,8 +239,8 @@ class MakeLocalAllocator { }; // from pywhere.hpp -decltype(p_whereInPython) __attribute((visibility("default"))) -p_whereInPython{nullptr}; +decltype(p_whereInPython) + __attribute((visibility("default"))) p_whereInPython{nullptr}; std::atomic_bool __attribute((visibility("default"))) p_scalene_done{true}; diff --git a/src/source/pywhere.cpp b/src/source/pywhere.cpp index 9234a4a58..d501d6ae8 100644 --- a/src/source/pywhere.cpp +++ b/src/source/pywhere.cpp @@ -1,16 +1,15 @@ #include "pywhere.hpp" -#include "traceconfig.hpp" #include #include #include +#include #include -#include #include +#include -#include - +#include "traceconfig.hpp" // NOTE: uncomment for debugging, but this causes issues // for production builds on Alpine @@ -59,19 +58,19 @@ inline int PyFrame_GetLasti(PyFrameObject* frame) { return frame->f_lasti; } #if PY_VERSION_HEX >= 0x030B0000 typedef struct _frame { - PyObject_HEAD - PyFrameObject *f_back; /* previous frame, or NULL */ - void *f_frame; /* points to the frame data */ - PyObject *f_trace; /* Trace function */ - int f_lineno; /* Current line number. Only valid if non-zero */ - char f_trace_lines; /* Emit per-line trace events? */ - char f_trace_opcodes; /* Emit per-opcode trace events? */ - char f_fast_as_locals; /* Have the fast locals of this frame been converted to a dict? */ - /* The frame data, if this frame object owns the frame */ - PyObject *_f_frame_data[1]; + PyObject_HEAD PyFrameObject* f_back; /* previous frame, or NULL */ + void* f_frame; /* points to the frame data */ + PyObject* f_trace; /* Trace function */ + int f_lineno; /* Current line number. Only valid if non-zero */ + char f_trace_lines; /* Emit per-line trace events? */ + char f_trace_opcodes; /* Emit per-opcode trace events? */ + char f_fast_as_locals; /* Have the fast locals of this frame been converted to + a dict? */ + /* The frame data, if this frame object owns the frame */ + PyObject* _f_frame_data[1]; } PyFrameType; #else -typedef PyFrameObject PyFrameType; +typedef PyFrameObject PyFrameType; #endif static PyPtr findMainPythonThread_frame() { @@ -94,7 +93,8 @@ static PyPtr findMainPythonThread_frame() { return PyPtr(main ? PyThreadState_GetFrame(main) : nullptr); } -// I'm not sure whether last_profiled_invalidated is quite needed, so I'm leaving this infrastructure here +// I'm not sure whether last_profiled_invalidated is quite needed, so I'm +// leaving this infrastructure here // PyObject* get_last_profiled_invalidated(PyObject* self, PyObject* args) { if (last_profiled_invalidated) { @@ -113,24 +113,21 @@ PyObject* set_last_profiled_invalidated_false(PyObject* self, PyObject* args) { Py_RETURN_NONE; } - PyObject* set_scalene_done_true(PyObject* self, PyObject* args) { - auto scalene_done = - (std::atomic_bool*)dlsym(RTLD_DEFAULT, "p_scalene_done"); - if (scalene_done == nullptr) { - PyErr_SetString(PyExc_Exception, "Unable to find p_scalene_done"); - return NULL; - } + auto scalene_done = (std::atomic_bool*)dlsym(RTLD_DEFAULT, "p_scalene_done"); + if (scalene_done == nullptr) { + PyErr_SetString(PyExc_Exception, "Unable to find p_scalene_done"); + return NULL; + } *scalene_done = true; Py_RETURN_NONE; } PyObject* set_scalene_done_false(PyObject* self, PyObject* args) { - auto scalene_done = - (std::atomic_bool*)dlsym(RTLD_DEFAULT, "p_scalene_done"); - if (scalene_done == nullptr) { - PyErr_SetString(PyExc_Exception, "Unable to find p_whereInPython"); - return NULL; - } + auto scalene_done = (std::atomic_bool*)dlsym(RTLD_DEFAULT, "p_scalene_done"); + if (scalene_done == nullptr) { + PyErr_SetString(PyExc_Exception, "Unable to find p_whereInPython"); + return NULL; + } *scalene_done = false; Py_RETURN_NONE; } @@ -170,7 +167,7 @@ int whereInPython(std::string& filename, int& lineno, int& bytei) { PyFrame_GetCode(static_cast(frame)); PyPtr<> co_filename = PyUnicode_AsASCIIString(static_cast(code)->co_filename); - + if (!(static_cast(co_filename))) { return 0; } @@ -181,7 +178,6 @@ int whereInPython(std::string& filename, int& lineno, int& bytei) { } if (traceConfig->should_trace(filenameStr)) { - #if defined(PyPy_FatalError) // If this macro is defined, we are compiling PyPy, which // AFAICT does not have any way to access bytecode index, so @@ -191,7 +187,7 @@ int whereInPython(std::string& filename, int& lineno, int& bytei) { bytei = PyFrame_GetLasti(static_cast(frame)); #endif lineno = PyFrame_GetLineNumber(static_cast(frame)); - + filename = filenameStr; return 1; } @@ -222,7 +218,6 @@ static PyObject* register_files_to_profile(PyObject* self, PyObject* args) { } *p_where = whereInPython; - Py_RETURN_NONE; } @@ -246,16 +241,15 @@ typedef struct { static unchanging_modules module_pointers; - static bool on_stack(char* outer_filename, int lineno, PyFrameObject* frame) { - while(frame != NULL) { + while (frame != NULL) { int iter_lineno = PyFrame_GetLineNumber(frame); - PyPtr code = - PyFrame_GetCode(static_cast(frame)); + PyFrame_GetCode(static_cast(frame)); - PyPtr<> co_filename(PyUnicode_AsASCIIString(static_cast(code)->co_filename)); + PyPtr<> co_filename( + PyUnicode_AsASCIIString(static_cast(code)->co_filename)); auto fname = PyBytes_AsString(static_cast(co_filename)); if (iter_lineno == lineno && strstr(fname, outer_filename)) { Py_XDECREF(frame); @@ -270,10 +264,10 @@ static bool on_stack(char* outer_filename, int lineno, PyFrameObject* frame) { static void allocate_newline() { PyPtr<> abc(PyLong_FromLong(NEWLINE_TRIGGER_LENGTH)); PyPtr<> tmp(PyByteArray_FromObject(static_cast(abc))); - } -static int trace_func(PyObject* obj, PyFrameObject* frame, int what, PyObject* arg) { +static int trace_func(PyObject* obj, PyFrameObject* frame, int what, + PyObject* arg) { if (what != PyTrace_LINE) { return 0; } @@ -282,17 +276,23 @@ static int trace_func(PyObject* obj, PyFrameObject* frame, int what, PyObject* a PyPtr code(PyFrame_GetCode(static_cast(frame))); // Take ownership of these right now - PyObject* last_fname(PyList_GetItem(static_cast(module_pointers.scalene_last_profiled), 0)); + PyObject* last_fname(PyList_GetItem( + static_cast(module_pointers.scalene_last_profiled), 0)); Py_IncRef(last_fname); - PyObject* last_lineno(PyList_GetItem(static_cast(module_pointers.scalene_last_profiled), 1)); + PyObject* last_lineno(PyList_GetItem( + static_cast(module_pointers.scalene_last_profiled), 1)); Py_IncRef(last_lineno); auto lineno_l = PyLong_AsLong(static_cast(last_lineno)); - if (lineno == lineno_l && PyUnicode_Compare(static_cast(last_fname), static_cast(code)->co_filename) == 0) { + if (lineno == lineno_l && + PyUnicode_Compare(static_cast(last_fname), + static_cast(code)->co_filename) == 0) { return 0; } - PyPtr<> last_fname_unicode( PyUnicode_AsASCIIString(last_fname)); - auto last_fname_s = PyBytes_AsString(static_cast(last_fname_unicode)); - PyPtr<> co_filename(PyUnicode_AsASCIIString(static_cast(code)->co_filename)); + PyPtr<> last_fname_unicode(PyUnicode_AsASCIIString(last_fname)); + auto last_fname_s = + PyBytes_AsString(static_cast(last_fname_unicode)); + PyPtr<> co_filename( + PyUnicode_AsASCIIString(static_cast(code)->co_filename)); // Needed because decref will be called in on_stack Py_INCREF(frame); @@ -303,48 +303,54 @@ static int trace_func(PyObject* obj, PyFrameObject* frame, int what, PyObject* a PyEval_SetTrace(NULL, NULL); Py_IncRef(module_pointers.nada); - auto res = PyList_SetItem(module_pointers.scalene_last_profiled, 0, module_pointers.nada); + auto res = PyList_SetItem(module_pointers.scalene_last_profiled, 0, + module_pointers.nada); Py_IncRef(module_pointers.zero); - res = PyList_SetItem(module_pointers.scalene_last_profiled, 1, module_pointers.zero); + res = PyList_SetItem(module_pointers.scalene_last_profiled, 1, + module_pointers.zero); - PyObject* last_profiled_ret(PyTuple_Pack(2, last_fname,last_lineno )); + PyObject* last_profiled_ret(PyTuple_Pack(2, last_fname, last_lineno)); Py_IncRef(module_pointers.zero); - res = PyList_SetItem(module_pointers.scalene_last_profiled, 2, module_pointers.zero); + res = PyList_SetItem(module_pointers.scalene_last_profiled, 2, + module_pointers.zero); allocate_newline(); last_profiled_invalidated = true; Py_IncRef(last_profiled_ret); - + res = PyList_Append(module_pointers.invalidate_queue, last_profiled_ret); - return 0; } static PyObject* populate_struct(PyObject* self, PyObject* args) { - PyObject* scalene_module(PyImport_GetModule(PyUnicode_FromString("scalene"))); // New reference - PyObject* scalene_dict(PyModule_GetDict(static_cast(scalene_module))); + PyObject* scalene_module( + PyImport_GetModule(PyUnicode_FromString("scalene"))); // New reference + PyObject* scalene_dict( + PyModule_GetDict(static_cast(scalene_module))); Py_IncRef(scalene_dict); - PyObject* scalene_profiler_module(PyDict_GetItemString(scalene_dict, "scalene_profiler")); + PyObject* scalene_profiler_module( + PyDict_GetItemString(scalene_dict, "scalene_profiler")); Py_IncRef(scalene_profiler_module); - PyObject* scalene_class(PyDict_GetItemString(PyModule_GetDict(scalene_profiler_module), "Scalene")); + PyObject* scalene_class(PyDict_GetItemString( + PyModule_GetDict(scalene_profiler_module), "Scalene")); Py_IncRef(scalene_class); PyObject* scalene_class_dict(PyObject_GenericGetDict(scalene_class, NULL)); - PyObject* last_profiled(PyObject_GetAttrString(scalene_class, "_Scalene__last_profiled")); - PyObject* invalidate_queue(PyObject_GetAttrString(scalene_class, "_Scalene__invalidate_queue")); + PyObject* last_profiled( + PyObject_GetAttrString(scalene_class, "_Scalene__last_profiled")); + PyObject* invalidate_queue( + PyObject_GetAttrString(scalene_class, "_Scalene__invalidate_queue")); PyObject* zero(PyLong_FromSize_t(0)); PyObject* nada(PyUnicode_FromString("NADA")); - module_pointers = { - scalene_module, - scalene_dict, - scalene_profiler_module, - scalene_class, - scalene_class_dict, - last_profiled, - invalidate_queue, - nada, - zero - }; + module_pointers = {scalene_module, + scalene_dict, + scalene_profiler_module, + scalene_class, + scalene_class_dict, + last_profiled, + invalidate_queue, + nada, + zero}; Py_RETURN_NONE; } @@ -387,13 +393,15 @@ static PyMethodDef EmbMethods[] = { {"disable_settrace", disable_settrace, METH_NOARGS, ""}, {"populate_struct", populate_struct, METH_NOARGS, ""}, {"depopulate_struct", depopulate_struct, METH_NOARGS, ""}, - {"get_last_profiled_invalidated", get_last_profiled_invalidated, METH_NOARGS, ""}, - {"set_last_profiled_invalidated_true", set_last_profiled_invalidated_true, METH_NOARGS, ""}, - {"set_last_profiled_invalidated_false", set_last_profiled_invalidated_false, METH_NOARGS, ""}, + {"get_last_profiled_invalidated", get_last_profiled_invalidated, + METH_NOARGS, ""}, + {"set_last_profiled_invalidated_true", set_last_profiled_invalidated_true, + METH_NOARGS, ""}, + {"set_last_profiled_invalidated_false", set_last_profiled_invalidated_false, + METH_NOARGS, ""}, {"set_scalene_done_true", set_scalene_done_true, METH_NOARGS, ""}, {"set_scalene_done_false", set_scalene_done_false, METH_NOARGS, ""}, - {NULL, NULL, 0, NULL}}; static PyModuleDef EmbedModule = {PyModuleDef_HEAD_INIT,