diff --git a/pysr/__init__.py b/pysr/__init__.py index e303becb2..96365bf88 100644 --- a/pysr/__init__.py +++ b/pysr/__init__.py @@ -7,7 +7,7 @@ best_callable, best_row, ) -from .julia_helpers import install +from .package_compiler import install, compile from .feynman_problems import Problem, FeynmanProblem from .export_jax import sympy2jax from .export_torch import sympy2torch diff --git a/pysr/julia_helpers.py b/pysr/julia_helpers.py index b0ec30d24..1dea3f6f3 100644 --- a/pysr/julia_helpers.py +++ b/pysr/julia_helpers.py @@ -32,11 +32,44 @@ def _load_juliainfo(): return juliainfo +def _get_julia_project_dir(): + # Assumes it is in JULIA_PROJECT: + assert "JULIA_PROJECT" in os.environ and os.environ["JULIA_PROJECT"] != "" + try: + cmds = [ + "julia", + "--compile=min", + "--startup-file=no", + "-O0", + "-g0", + "-e import Pkg: project; print(project().path)", + ] + julia_project_dir_str = subprocess.run( + cmds, + capture_output=True, + env=os.environ, + ).stdout.decode() + except FileNotFoundError: + env_path = os.environ["PATH"] + raise FileNotFoundError( + f"Julia is not installed in your PATH. Please install Julia and add it to your PATH.\n\nCurrent PATH: {env_path}", + ) + return Path(julia_project_dir_str).parent + + def _get_julia_env_dir(): # Have to manually get env dir: try: + cmds = [ + "julia", + "--compile=min", + "--startup-file=no", + "-O0", + "-g0", + "-e import Pkg: envdir; print(envdir())", + ] julia_env_dir_str = subprocess.run( - ["julia", "-e using Pkg; print(Pkg.envdir())"], + cmds, capture_output=True, env=os.environ, ).stdout.decode() @@ -65,49 +98,6 @@ def _get_io_arg(quiet): return io_arg -def install(julia_project=None, quiet=False, precompile=None): # pragma: no cover - """ - Install PyCall.jl and all required dependencies for SymbolicRegression.jl. - - Also updates the local Julia registry. - """ - import julia - - _julia_version_assertion() - # Set JULIA_PROJECT so that we install in the pysr environment - processed_julia_project, is_shared = _process_julia_project(julia_project) - _set_julia_project_env(processed_julia_project, is_shared) - - if precompile == False: - os.environ["JULIA_PKG_PRECOMPILE_AUTO"] = "0" - - julia.install(quiet=quiet) - Main, init_log = init_julia(julia_project, quiet=quiet, return_aux=True) - io_arg = _get_io_arg(quiet) - - if precompile is None: - precompile = init_log["compiled_modules"] - - if not precompile: - Main.eval('ENV["JULIA_PKG_PRECOMPILE_AUTO"] = 0') - - if is_shared: - # Install SymbolicRegression.jl: - _add_sr_to_julia_project(Main, io_arg) - - Main.eval("using Pkg") - Main.eval(f"Pkg.instantiate({io_arg})") - - if precompile: - Main.eval(f"Pkg.precompile({io_arg})") - - if not quiet: - warnings.warn( - "It is recommended to restart Python after installing PySR's dependencies," - " so that the Julia environment is properly initialized." - ) - - def _import_error(): return """ Required dependencies are not installed or built. Run the following code in the Python REPL: @@ -157,7 +147,14 @@ def _check_for_conflicting_libraries(): # pragma: no cover ) -def init_julia(julia_project=None, quiet=False, julia_kwargs=None, return_aux=False): +def init_julia( + julia_project=None, + quiet=False, + use_sysimage=True, + sysimage_name=None, + julia_kwargs=None, + return_aux=False, +): """Initialize julia binary, turning off compiled modules if needed.""" global julia_initialized global julia_kwargs_at_initialization @@ -166,16 +163,35 @@ def init_julia(julia_project=None, quiet=False, julia_kwargs=None, return_aux=Fa if not julia_initialized: _check_for_conflicting_libraries() + _julia_version_assertion() + processed_julia_project, is_shared = _process_julia_project(julia_project) + _set_julia_project_env(processed_julia_project, is_shared) + + # TODO: Make checking optional. + # Check if sysimage exists: + if use_sysimage and sysimage_name is None and not julia_initialized: + # TODO: Is there a faster way to get this dir? + expected_sysimage = _get_julia_project_dir() / "pysr.so" + # Check if this file exists: + if expected_sysimage.exists(): + sysimage_name = str(expected_sysimage) + if julia_kwargs is None: julia_kwargs = {"optimize": 3} - from julia.core import JuliaInfo, UnsupportedPythonError + if ( + sysimage_name is not None + and "sysimage" not in julia_kwargs + and not julia_initialized + ): + sysimage = str(sysimage_name) + print(f"Found existing sysimage at {sysimage}. Loading.") + julia_kwargs["sysimage"] = sysimage_name - _julia_version_assertion() - processed_julia_project, is_shared = _process_julia_project(julia_project) - _set_julia_project_env(processed_julia_project, is_shared) + from julia.core import JuliaInfo, UnsupportedPythonError try: + # TODO: Can we just get env info from this? info = JuliaInfo.load(julia="julia") except FileNotFoundError: env_path = os.environ["PATH"] @@ -243,21 +259,6 @@ def init_julia(julia_project=None, quiet=False, julia_kwargs=None, return_aux=Fa return Main -def _add_sr_to_julia_project(Main, io_arg): - Main.eval("using Pkg") - Main.sr_spec = Main.PackageSpec( - name="SymbolicRegression", - url="https://github.com/MilesCranmer/SymbolicRegression.jl", - rev="v" + __symbolic_regression_jl_version__, - ) - Main.clustermanagers_spec = Main.PackageSpec( - name="ClusterManagers", - url="https://github.com/JuliaParallel/ClusterManagers.jl", - rev="14e7302f068794099344d5d93f71979aaf4fbeb3", - ) - Main.eval(f"Pkg.add([sr_spec, clustermanagers_spec], {io_arg})") - - def _escape_filename(filename): """Turn a path into a string with correctly escaped backslashes.""" str_repr = str(filename) @@ -299,16 +300,6 @@ def _load_cluster_manager(Main, cluster_manager): return Main.eval(f"addprocs_{cluster_manager}") -def _update_julia_project(Main, is_shared, io_arg): - try: - if is_shared: - _add_sr_to_julia_project(Main, io_arg) - Main.eval("using Pkg") - Main.eval(f"Pkg.resolve({io_arg})") - except (JuliaError, RuntimeError) as e: - raise ImportError(_import_error()) from e - - def _load_backend(Main): try: # Load namespace, so that various internal operators work: diff --git a/pysr/package_compiler.py b/pysr/package_compiler.py new file mode 100644 index 000000000..59edb76c0 --- /dev/null +++ b/pysr/package_compiler.py @@ -0,0 +1,128 @@ +"""Functions to create a sysimage for PySR.""" + +import os +from pathlib import Path +import warnings +from multiprocessing import cpu_count + +import numpy as np +from julia.api import JuliaError + +from .version import __symbolic_regression_jl_version__ +from .julia_helpers import ( + init_julia, + _julia_version_assertion, + _set_julia_project_env, + _get_io_arg, + _process_julia_project, + _import_error, +) + + +def _add_sr_to_julia_project(Main, io_arg): + Main.eval("using Pkg") + Main.sr_spec = Main.PackageSpec( + name="SymbolicRegression", + url="https://github.com/MilesCranmer/SymbolicRegression.jl", + rev="v" + __symbolic_regression_jl_version__, + ) + Main.clustermanagers_spec = Main.PackageSpec( + name="ClusterManagers", + rev="v0.4.2", + ) + Main.packagecompiler_spec = Main.PackageSpec( + name="PackageCompiler", + rev="v2.1.0", + ) + Main.pycall_spec = Main.PackageSpec( + name="PyCall", + rev="v1.94.1", + ) + Main.eval( + "Pkg.add([" + + ", ".join( + ["sr_spec", "clustermanagers_spec", "packagecompiler_spec", "pycall_spec"] + ) + + f"], {io_arg})" + ) + Main.eval(f'Pkg.build("PyCall", {io_arg})') + + +def _update_julia_project(Main, is_shared, io_arg): + try: + if is_shared: + _add_sr_to_julia_project(Main, io_arg) + Main.eval("using Pkg") + Main.eval(f"Pkg.resolve({io_arg})") + except (JuliaError, RuntimeError) as e: + raise ImportError(_import_error()) from e + + +def install(julia_project=None, quiet=False, precompile=None): # pragma: no cover + """ + Install PyCall.jl and all required dependencies for SymbolicRegression.jl. + + Also updates the local Julia registry. + """ + import julia + + _julia_version_assertion() + # Set JULIA_PROJECT so that we install in the pysr environment + processed_julia_project, is_shared = _process_julia_project(julia_project) + _set_julia_project_env(processed_julia_project, is_shared) + + if precompile == False: + os.environ["JULIA_PKG_PRECOMPILE_AUTO"] = "0" + + julia.install(quiet=quiet) + Main, init_log = init_julia( + julia_project, quiet=quiet, use_sysimage=False, return_aux=True + ) + io_arg = _get_io_arg(quiet) + + if precompile is None: + precompile = init_log["compiled_modules"] + + if not precompile: + Main.eval('ENV["JULIA_PKG_PRECOMPILE_AUTO"] = 0') + + if is_shared: + # Install SymbolicRegression.jl: + _add_sr_to_julia_project(Main, io_arg) + + Main.eval("using Pkg") + Main.eval(f"Pkg.instantiate({io_arg})") + if precompile: + Main.eval(f"Pkg.precompile({io_arg})") + + if not quiet: + warnings.warn( + "It is recommended to restart Python after installing PySR's dependencies," + " so that the Julia environment is properly initialized." + ) + + +def compile( + julia_project=None, + quiet=False, + sysimage_name="pysr.so", +): + """Create a PackageCompiler.jl sysimage for SymbolicRegression.jl.""" + Main = init_julia( + julia_project=julia_project, + quiet=quiet, + use_sysimage=False, + julia_kwargs={ + "compiled_modules": False, + "optimize": 3, + "threads": cpu_count(), + "compile": "all", + }, + ) + cur_project_dir = Main.eval("dirname(Base.active_project())") + sysimage_path = str(Path(cur_project_dir) / sysimage_name) + from julia import PackageCompiler + + Main.eval("using SymbolicRegression") + + PackageCompiler.create_sysimage(sysimage_path=sysimage_path) diff --git a/pysr/sr.py b/pysr/sr.py index 760ec1c84..2fd6185eb 100644 --- a/pysr/sr.py +++ b/pysr/sr.py @@ -21,13 +21,13 @@ check_is_fitted, ) +from .package_compiler import _update_julia_project from .julia_helpers import ( init_julia, _process_julia_project, is_julia_version_greater_eq, _escape_filename, _load_cluster_manager, - _update_julia_project, _load_backend, ) from .export_numpy import CallableEquation @@ -741,7 +741,6 @@ def __init__( julia_kwargs=None, **kwargs, ): - # Hyperparameters # - Model search parameters self.model_selection = model_selection @@ -2095,7 +2094,6 @@ def get_hof(self): equation_file_contents = copy.deepcopy(self.equation_file_contents_) for output in equation_file_contents: - scores = [] lastMSE = None lastComplexity = 0 diff --git a/pysr/test/test_env.py b/pysr/test/test_env.py index c044cfd0c..0d18412ad 100644 --- a/pysr/test/test_env.py +++ b/pysr/test/test_env.py @@ -5,6 +5,7 @@ from tempfile import TemporaryDirectory from .. import julia_helpers +from .. import package_compiler class TestJuliaProject(unittest.TestCase): @@ -29,7 +30,7 @@ def test_custom_shared_env(self): f'pushfirst!(DEPOT_PATH, "{julia_helpers._escape_filename(tmpdir)}")' ) test_env_name = "@pysr_test_env" - julia_helpers.install(julia_project=test_env_name) + package_compiler.install(julia_project=test_env_name) Main = julia_helpers.init_julia(julia_project=test_env_name) # Try to use env: