diff --git a/.github/actions/setup-python/action.yaml b/.github/actions/setup-python/action.yaml new file mode 100644 index 0000000..cbef6de --- /dev/null +++ b/.github/actions/setup-python/action.yaml @@ -0,0 +1,45 @@ +name: Setup Python Environment +description: Setup Python, reinstall pip, and install dependencies +inputs: + python-version: + required: true + +runs: + using: "composite" + steps: + - name: Cache pip dependencies + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ hashFiles('requirements/release.txt') }}-${{ hashFiles('requirements/dev.txt') }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + + - name: Reinstall pip + shell: bash + run: | + PY_VER=$(python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')") + PY_MAJOR=$(echo $PY_VER | cut -d. -f1) + PY_MINOR=$(echo $PY_VER | cut -d. -f2) + + if [ "$PY_MAJOR" -eq 3 ] && [ "$PY_MINOR" -le 8 ]; then + URL="https://bootstrap.pypa.io/pip/${PY_VER}/get-pip.py" + else + URL="https://bootstrap.pypa.io/get-pip.py" + fi + + curl -sS "$URL" -o /tmp/get-pip.py + python /tmp/get-pip.py --force-reinstall + + pip --version + pip3 --version + + - name: Install dependencies + shell: bash + run: | + python3 -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install --upgrade -r requirements.txt; fi + pip3 install --upgrade -r requirements/dev.txt diff --git a/.github/workflows/push-pr_workflow.yml b/.github/workflows/push-pr_workflow.yml index 8ced825..8962c17 100644 --- a/.github/workflows/push-pr_workflow.yml +++ b/.github/workflows/push-pr_workflow.yml @@ -9,40 +9,43 @@ jobs: if: github.event_name == 'pull_request' steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Checkout the whole history, in case the target is way far behind + + - name: Check if target branch has been merged + run: | + if git merge-base --is-ancestor ${{ github.event.pull_request.base.sha }} ${{ github.sha }}; then + echo "Target branch has been merged into the source branch." + else + echo "Target branch has not been merged into the source branch. Please merge in target first." + exit 1 + fi - name: Check that CHANGELOG has been updated run: | # If this step fails, this means you haven't updated the CHANGELOG.md file with notes on your contribution. - git diff --name-only $(git merge-base origin/main HEAD) | grep '^CHANGELOG.md$' && echo "Thanks for helping keep our CHANGELOG up-to-date!" + if git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep -q '^CHANGELOG.md$'; then + echo "Thanks for helping keep our CHANGELOG up-to-date!" + else + echo "Please update the CHANGELOG.md file with notes on your contribution." + exit 1 + fi Lint: runs-on: ubuntu-latest env: - MAX_LINE_LENGTH: 88 - MAX_COMPLEXITY: 18 + MAX_LINE_LENGTH: 127 + MAX_COMPLEXITY: 15 steps: - - uses: actions/checkout@v2 - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: '3.x' + - uses: actions/checkout@v4 - - name: Check cache - uses: actions/cache@v2 + - uses: ./.github/actions/setup-python with: - path: ~/.cache/pip - key: ${{ hashFiles('requirements/release.txt') }}-${{ hashFiles('requirements/dev.txt') }} - - - name: Install dependencies - run: | - python3 -m pip install --upgrade pip - if [ -f requirements.txt ]; then pip install --upgrade -r requirements.txt; fi - pip3 install --upgrade -r requirements/dev.txt + python-version: '3.x' - name: Lint with flake8 - if: always() run: | # stop the build if there are Python syntax errors or undefined names flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics @@ -50,25 +53,16 @@ jobs: flake8 . --count --max-complexity=$MAX_COMPLEXITY --statistics --max-line-length=$MAX_LINE_LENGTH - name: Lint with isort - if: always() run: | - python3 -m isort --check --line-length $MAX_LINE_LENGTH spellbook - # Skipping test_conduit due to temporary fix for missing conduit python package. - python3 -m isort --check --line-length $MAX_LINE_LENGTH --skip tests/data_formatting/conduit/test_conduit.py tests - python3 -m isort --check --line-length $MAX_LINE_LENGTH *.py + isort --check --line-length $MAX_LINE_LENGTH spellbook tests *.py - name: Lint with Black - if: always() run: | - python3 -m black --check --line-length $MAX_LINE_LENGTH --target-version py36 spellbook - python3 -m black --check --line-length $MAX_LINE_LENGTH --target-version py36 tests - python3 -m black --check --line-length $MAX_LINE_LENGTH --target-version py36 *.py + black --check --line-length $MAX_LINE_LENGTH --target-version py311 spellbook tests *.py - name: Lint with PyLint - if: always() run: | - python3 -m pylint spellbook --rcfile=setup.cfg --exit-zero - python3 -m pylint tests --rcfile=setup.cfg --exit-zero + pylint spellbook tests --rcfile=setup.cfg --exit-zero Local-test-suite: runs-on: ubuntu-latest @@ -78,23 +72,11 @@ jobs: python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} + - uses: actions/checkout@v4 - - name: Check cache - uses: actions/cache@v2 + - uses: ./.github/actions/setup-python with: - path: ${{ env.pythonLocation }} - key: ${{ env.pythonLocation }}-${{ hashFiles('requirements/release.txt') }}-${{ hashFiles('requirements/dev.txt') }} - - - name: Install dependencies - run: | - python3 -m pip install --upgrade pip - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - pip3 install -r requirements/dev.txt + python-version: ${{ matrix.python-version }} - name: Install merlin-spellbook to run unit tests run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index 5542922..abe425e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ All notable changes to Merlin Spellbook will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Changed +- Updated GitHub actions + - Now uses a version of actions/cache that's not deprecated + - Utilizes a shared action for jobs to reduce duplicate code + ## [0.9.0] ### Added diff --git a/setup.cfg b/setup.cfg index 52466ea..c28ef1c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,7 +3,7 @@ multi_line_output=3 include_trailing_comma=True force_grid_wrap=0 use_parentheses=True -line_length=88 +line_length=127 known_first_party=merlin known_third_party=psutil,conduit,matplotlib,pandas,numpy lines_after_imports=2 @@ -11,8 +11,8 @@ lines_after_imports=2 [flake8] ignore = E203, E266, E501, W503 -max-line-length = 88 -max-complexity = 18 +max-line-length = 127 +max-complexity = 15 select = B,C,E,F,W,T4 [mypy] diff --git a/setup.py b/setup.py index 153e041..84acf30 100644 --- a/setup.py +++ b/setup.py @@ -58,10 +58,7 @@ def _pip_requirement(req): def _reqs(*f): return [ _pip_requirement(r) - for r in ( - _strip_comments(line) - for line in open(os.path.join(os.getcwd(), "requirements", *f)).readlines() - ) + for r in (_strip_comments(line) for line in open(os.path.join(os.getcwd(), "requirements", *f)).readlines()) if r ] diff --git a/spellbook/commands/collect.py b/spellbook/commands/collect.py index 033f975..9cfc3c0 100644 --- a/spellbook/commands/collect.py +++ b/spellbook/commands/collect.py @@ -11,9 +11,7 @@ type=str, help="whitespace separated list of files to collect", ) -@click.option( - "-outfile", required=False, default="results.hdf5", type=str, help="output file" -) +@click.option("-outfile", required=False, default="results.hdf5", type=str, help="output file") def cli(instring, outfile): """ Collect many json files into a single json file diff --git a/spellbook/commands/learn.py b/spellbook/commands/learn.py index 2978fa4..a723742 100644 --- a/spellbook/commands/learn.py +++ b/spellbook/commands/learn.py @@ -45,7 +45,5 @@ def cli(infile, x, y, outfile, regressor): """ from spellbook.ml import learn_alt as learn - args = SimpleNamespace( - **{"infile": infile, "X": x, "y": y, "outfile": outfile, "regressor": regressor} - ) + args = SimpleNamespace(**{"infile": infile, "X": x, "y": y, "outfile": outfile, "regressor": regressor}) learn.random_forest(args) diff --git a/spellbook/commands/serialize.py b/spellbook/commands/serialize.py index 6eeac7d..d31c91b 100644 --- a/spellbook/commands/serialize.py +++ b/spellbook/commands/serialize.py @@ -6,9 +6,7 @@ @click.command() -@click.option( - "--output", required=False, default="output.json", type=str, help="output file" -) +@click.option("--output", required=False, default="output.json", type=str, help="output file") @click.option( "--vars", required=False, diff --git a/spellbook/data_formatting/conduit/python/conduit_bundler.py b/spellbook/data_formatting/conduit/python/conduit_bundler.py index 9693e9f..20ce57f 100644 --- a/spellbook/data_formatting/conduit/python/conduit_bundler.py +++ b/spellbook/data_formatting/conduit/python/conduit_bundler.py @@ -58,9 +58,7 @@ def determine_protocol(fname): if ext.startswith("."): protocol = ext.lower().strip(".") else: - raise ValueError( - "{0} needs an ext (eg .hdf5) to determine protocol!".format(fname) - ) + raise ValueError("{0} needs an ext (eg .hdf5) to determine protocol!".format(fname)) # Map .h5 to .hdf5 if protocol == "h5": protocol = "hdf5" @@ -157,10 +155,7 @@ def dump_node( try: conduit.relay.io.save(conduit_node, fname, options=save_options) except TypeError: # Conduit version needs to be updated. - LOG.error( - "Unable to customize save: please upgrade conduit to " - "expose save options!" - ) + LOG.error("Unable to customize save: please upgrade conduit to " "expose save options!") conduit.relay.io.save(conduit_node, fname) else: conduit.relay.io.save(conduit_node, fname) diff --git a/spellbook/data_formatting/conduit/python/translator.py b/spellbook/data_formatting/conduit/python/translator.py index 93cbd92..3dc5d59 100644 --- a/spellbook/data_formatting/conduit/python/translator.py +++ b/spellbook/data_formatting/conduit/python/translator.py @@ -86,9 +86,7 @@ def run(_input, output, schema): if data_loader.has_path(sample_path): data_loader.read(filtered_node[path], sample_path) else: - filtered_node[sample_path] = ( - np.nan - ) # if a value is missing, that could be a problem + filtered_node[sample_path] = np.nan # if a value is missing, that could be a problem make_data_array_dict(all_dict, filtered_node) for dat in all_dict.keys(): @@ -129,9 +127,7 @@ def generate_scalar_path_pairs(node, path=""): children = node.child_names() for child in children: if isinstance(node[child], conduit.Node): - for pair in generate_scalar_path_pairs( - node[child], path=path + child + "/" - ): + for pair in generate_scalar_path_pairs(node[child], path=path + child + "/"): yield pair else: yield path + child, node[child] diff --git a/spellbook/data_formatting/stack_npz.py b/spellbook/data_formatting/stack_npz.py index f1aa463..a8e4450 100644 --- a/spellbook/data_formatting/stack_npz.py +++ b/spellbook/data_formatting/stack_npz.py @@ -24,9 +24,7 @@ def pad_many(arrays, dims, dont_pad_first=False, value=np.nan): if dont_pad_first: pad_dist[0] = 0 padder = np.column_stack((zeros, pad_dist)) - fixed.append( - np.pad(np.atleast_2d(a), padder, mode="constant", constant_values=value) - ) + fixed.append(np.pad(np.atleast_2d(a), padder, mode="constant", constant_values=value)) return fixed @@ -48,11 +46,7 @@ def run(self, target, source, force=False): if not force: if os.path.isfile(target): - print( - "stack_npz error opening target file (does {0} exist?).".format( - target - ) - ) + print("stack_npz error opening target file (does {0} exist?).".format(target)) print('Pass "-f" argument to force re-creation of output file.') return diff --git a/spellbook/data_formatting/translator.py b/spellbook/data_formatting/translator.py index 88cc515..775de60 100644 --- a/spellbook/data_formatting/translator.py +++ b/spellbook/data_formatting/translator.py @@ -13,12 +13,8 @@ def setup_argparse(): help=".json file with X and y data in each sample", default="results.json", ) - parser.add_argument( - "-output", help=".npz file with the arrays", default="results.npz" - ) - parser.add_argument( - "-schema", help="schema for a single sample data", default="features.json" - ) + parser.add_argument("-output", help=".npz file with the arrays", default="results.npz") + parser.add_argument("-schema", help="schema for a single sample data", default="features.json") return parser @@ -51,9 +47,7 @@ def generate_scalar_path_pairs(node, schema, path=""): if child in schema.keys(): if isinstance(node[child], dict): if isinstance(schema[child], dict): - for pair in generate_scalar_path_pairs( - node[child], schema[child], path=path + child + "/" - ): + for pair in generate_scalar_path_pairs(node[child], schema[child], path=path + child + "/"): yield pair else: if not isinstance(schema[child], dict): diff --git a/spellbook/ml/surrogates.py b/spellbook/ml/surrogates.py index 494e1c2..9cbbc13 100644 --- a/spellbook/ml/surrogates.py +++ b/spellbook/ml/surrogates.py @@ -65,11 +65,7 @@ def factory(cls, name, *args, **kwargs): if name in cls.all_regs: return cls.all_regs[name](*args, **kwargs) else: - raise ValueError( - "Unknown regressor name " - + name - + "! For valid choices see sklearnRegressors.names()" - ) + raise ValueError("Unknown regressor name " + name + "! For valid choices see sklearnRegressors.names()") @classmethod def names(cls): @@ -85,12 +81,8 @@ def test_factory(): def test_random_forest(): - rf1 = sklearnRegressors.factory( - "RandomForestRegressor", n_estimators=10, max_depth=5 - ) - rf2 = sklearnRegressors.factory( - "RandomForestRegressor", n_estimators=2, max_depth=3 - ) + rf1 = sklearnRegressors.factory("RandomForestRegressor", n_estimators=10, max_depth=5) + rf2 = sklearnRegressors.factory("RandomForestRegressor", n_estimators=2, max_depth=3) assert rf1.n_estimators == 10 assert rf1.max_depth == 5 diff --git a/spellbook/optimization/qoi.py b/spellbook/optimization/qoi.py index b16a65d..2abf8a4 100644 --- a/spellbook/optimization/qoi.py +++ b/spellbook/optimization/qoi.py @@ -41,10 +41,7 @@ def barrier(x, threshold, threshold_type="greater"): penalty[group2] = ( sign_x[group2] * xx[group2] - * ( - (1.0 / g0[group2]) - * ((gi[group2] / g0[group2]) ** 2 - 3.0 * (gi[group2] / g0[group2]) + 3.0) - ) + * ((1.0 / g0[group2]) * ((gi[group2] / g0[group2]) ** 2 - 3.0 * (gi[group2] / g0[group2]) + 3.0)) ) penalty[group3] = 0.0 @@ -134,9 +131,7 @@ def parse_constraints(constraint_args, data): threshold_type = "greater" splitter = ">" else: - raise ValueError( - 'Bad constraint format: must be "namevalue"' - ) + raise ValueError('Bad constraint format: must be "namevalue"') name, value_name = constraint.split(splitter) value = float(value_name) constraint_data.append((data[name], value, threshold_type)) diff --git a/spellbook/sampling/make_samples.py b/spellbook/sampling/make_samples.py index e899a67..dbf798f 100644 --- a/spellbook/sampling/make_samples.py +++ b/spellbook/sampling/make_samples.py @@ -152,9 +152,7 @@ def apply_repeat(self, x, repeat): try: repeat = [int(r) for r in repeat] except ValueError: - raise ValueError( - f"one of the values in {repeat} is not in integer format." - ) + raise ValueError(f"one of the values in {repeat} is not in integer format.") num_repeat = repeat[0] x = np.repeat(x, num_repeat, axis=0) if len(repeat) == 2: diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/command_line_tests.py b/tests/command_line_tests.py index 227b39e..b137332 100644 --- a/tests/command_line_tests.py +++ b/tests/command_line_tests.py @@ -161,9 +161,7 @@ def run_tests(args, tests): if failures == 0: print(f"Done. {n_to_run} tests passed in {round(total_time, 2)} s.") return 0 - print( - f"Done. {failures} tests out of {n_to_run} failed after {round(total_time, 2)} s.\n" - ) + print(f"Done. {failures} tests out of {n_to_run} failed after {round(total_time, 2)} s.\n") return 1 @@ -283,9 +281,7 @@ def setup_argparse(): action="store_true", help="Flag for stopping all testing upon first failure", ) - parser.add_argument( - "--verbose", action="store_true", help="Flag for more detailed output messages" - ) + parser.add_argument("--verbose", action="store_true", help="Flag for more detailed output messages") parser.add_argument( "--ids", action="store", @@ -293,8 +289,7 @@ def setup_argparse(): type=int, nargs="+", default=None, - help="Provide space-delimited ids of tests you want to run." - "Example: '--ids 1 5 8 13'", + help="Provide space-delimited ids of tests you want to run." "Example: '--ids 1 5 8 13'", ) return parser diff --git a/tests/data_formatting/conduit/test_conduit.py b/tests/data_formatting/conduit/test_conduit.py index f352b29..5d39e68 100644 --- a/tests/data_formatting/conduit/test_conduit.py +++ b/tests/data_formatting/conduit/test_conduit.py @@ -35,9 +35,7 @@ def test_save_node(): delete_data() -def save_node_many( - node, base="_dummy", exts=(".h5", ".hdf5", ".json", ".yaml", ".cbin") -): +def save_node_many(node, base="_dummy", exts=(".h5", ".hdf5", ".json", ".yaml", ".cbin")): for ext in exts: cb.dump_node(node, base + ext) @@ -97,9 +95,7 @@ def nodes_equal(node1, node2): return node1.to_json() == node2.to_json() -def load_node_many( - base="_dummy", exts=(".h5", ".hdf5", ".json", ".yaml", ".cbin"), path="/" -): +def load_node_many(base="_dummy", exts=(".h5", ".hdf5", ".json", ".yaml", ".cbin"), path="/"): nodes = [] for ext in exts: node = cb.load_node(base + ext, path) diff --git a/tests/sampling/test_make_samples.py b/tests/sampling/test_make_samples.py index 5de5113..cae780a 100644 --- a/tests/sampling/test_make_samples.py +++ b/tests/sampling/test_make_samples.py @@ -38,9 +38,7 @@ def test_scale_samples_nolog_2(): print("expected------------------------------") print(expected) - numpy.testing.assert_allclose( - real_values, expected, rtol=0.02, atol=0, verbose=True - ) + numpy.testing.assert_allclose(real_values, expected, rtol=0.02, atol=0, verbose=True) # Turn 0:1 samples into 1:10 with log scaling @@ -52,6 +50,4 @@ def test_scale_samples_log_1(): expected = [[1.0, 1.77, 3.16, 5.62, 10.0]] print("expected------------------------------") print(expected) - numpy.testing.assert_allclose( - real_values, expected, rtol=0.02, atol=0, verbose=True - ) + numpy.testing.assert_allclose(real_values, expected, rtol=0.02, atol=0, verbose=True)