diff --git a/.github/workflows/ci-integration-test-reusable.yml b/.github/workflows/ci-integration-test-reusable.yml new file mode 100644 index 00000000..8c54a874 --- /dev/null +++ b/.github/workflows/ci-integration-test-reusable.yml @@ -0,0 +1,127 @@ +name: Reusable Docker Test for running heavy test loads + +on: + # Can only be called by another workflow, not directly by the user + workflow_call: + inputs: + build_mode: + description: 'Build mode: "lock" for reproducible builds, "yaml" for flexible dev builds' + required: true + type: string + cuda_base_image_tag: + description: 'CUDA base image tag (e.g., 12.2.2-cudnn8-devel-ubuntu22.04)' + required: true + type: string + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }}/openfold3-docker + +jobs: + start-aws-runner: + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + outputs: + mapping: ${{ steps.aws-start.outputs.mapping }} + instances: ${{ steps.aws-start.outputs.instances }} + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v6 + with: + role-to-assume: arn:aws:iam::203627415330:role/of-gha-runner + aws-region: us-east-1 + - name: Create cloud runner + id: aws-start + uses: omsf/start-aws-gha-runner@v1.1.1 + with: + aws_image_id: ami-00839c71d8f6096b4 # Deep Learning Base AMI with Single CUDA (Ubuntu 22.04) + aws_instance_type: "g5.4xlarge" # A10G 64 GB + aws_home_dir: /home/ubuntu + aws_root_device_size: 200 + env: + GH_PAT: ${{ secrets.GH_PAT }} + + test-openfold-docker: + runs-on: ${{ fromJSON(needs.start-aws-runner.outputs.instances) }} + needs: + - start-aws-runner + permissions: + contents: read + packages: write + steps: + - uses: actions/checkout@v6 + + - name: Log in to GHCR + uses: docker/login-action@v4 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v4 + + - name: Build and push test image + uses: docker/build-push-action@v7 + with: + context: . + file: docker/Dockerfile + target: test + push: true + build-args: | + CUDA_BASE_IMAGE_TAG=${{ inputs.cuda_base_image_tag }} + BUILD_MODE=${{ inputs.build_mode }} + tags: | + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:test-${{ inputs.cuda_base_image_tag }}-${{ github.sha }} + cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:cache-${{ inputs.cuda_base_image_tag }} + cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:cache-${{ inputs.cuda_base_image_tag }},mode=max + + - name: Create parameter cache directory + run: mkdir -p ~/.openfold3 + + - name: Cache download of parameters + id: cache-openfold_parameters + uses: actions/cache@v4 + with: + path: ~/.openfold3/ + key: shared-params_of3 + restore-keys: | + shared-params_of3 + + - name: Install AWS CLI and Download OpenFold parameters + if: steps.cache-openfold_parameters.outputs.cache-hit != 'true' + run: | + echo "Cache miss: Downloading OpenFold parameters..." + aws s3 cp s3://openfold3-data/openfold3-parameters/of3-p2-155k.pt ~/.openfold3/ --no-sign-request + + - name: Run integration test + run: | + docker run --gpus all\ + -v ${{ github.workspace }}:/opt/openfold3 \ + -v ~/.openfold3:/root/.openfold3 \ + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:test-${{ inputs.cuda_base_image_tag }}-${{ github.sha }} \ + pytest -x openfold3/tests/ -m slow -vvv + + stop-aws-runner: + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + needs: + - start-aws-runner + - test-openfold-docker + if: ${{ always() }} + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v6 + with: + role-to-assume: arn:aws:iam::203627415330:role/of-gha-runner + aws-region: us-east-1 + - name: Stop instances + uses: omsf/stop-aws-gha-runner@v1.0.0 + with: + instance_mapping: ${{ needs.start-aws-runner.outputs.mapping }} + env: + GH_PAT: ${{ secrets.GH_PAT }} diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml new file mode 100644 index 00000000..44e91c05 --- /dev/null +++ b/.github/workflows/integration-test.yml @@ -0,0 +1,26 @@ +name: Run integration tests with docker image + +on: + schedule: + - cron: '0 0 * * *' # every daily at 00:00 UTC + workflow_dispatch: + +jobs: + test: + permissions: + id-token: write + contents: read + packages: write + strategy: + matrix: + include: + - cuda_base_image_tag: "12.1.1-cudnn8-devel-ubuntu22.04" + build_mode: "yaml" + concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}-${{ matrix.cuda_base_image_tag }} + cancel-in-progress: true + uses: ./.github/workflows/ci-integration-test-reusable.yml + with: + cuda_base_image_tag: ${{ matrix.cuda_base_image_tag }} + build_mode: ${{ matrix.build_mode }} + secrets: inherit diff --git a/openfold3/tests/test_inference_full.py b/openfold3/tests/test_inference_full.py index f328cc47..55839e09 100644 --- a/openfold3/tests/test_inference_full.py +++ b/openfold3/tests/test_inference_full.py @@ -19,10 +19,12 @@ import logging import os +import textwrap from unittest.mock import patch import pytest +from openfold3.core.config import config_utils from openfold3.entry_points.experiment_runner import InferenceExperimentRunner from openfold3.entry_points.validator import ( InferenceExperimentConfig, @@ -32,7 +34,7 @@ ) from openfold3.tests.compare_utils import skip_unless_cuda_available -pytestmark = pytest.mark.inference_verification +pytestmark = [pytest.mark.inference_verification, pytest.mark.slow] logging.basicConfig(level=logging.WARNING) logger = logging.getLogger(__name__) @@ -75,16 +77,37 @@ } ) +inference_test_yaml_str = textwrap.dedent("""\ + model_update: + presets: + - predict + - low_mem + custom: + settings: + memory: + eval: + use_deepspeed_evo_attention: false + """) + @skip_unless_cuda_available() @pytest.mark.parametrize("query_set", [protein_only_query, protein_and_ligand_query]) def test_inference_run(tmp_path, query_set): + # Set up runner args + runner_yaml = tmp_path / "runner_config.yaml" + runner_yaml.write_text(inference_test_yaml_str) + # Trigger validation logic to replace the cache path with patch("builtins.input", return_value="no"): - # your test code that calls _maybe_download_parameters - experiment_config = InferenceExperimentConfig.model_validate({}) + experiment_config = InferenceExperimentConfig( + **config_utils.load_yaml(runner_yaml) + ) expt_runner = InferenceExperimentRunner( - experiment_config, num_diffusion_samples=1, output_dir=tmp_path + experiment_config, + num_diffusion_samples=1, + output_dir=tmp_path, + use_msa_server=True, + use_templates=True, ) try: expt_runner.setup() diff --git a/openfold3/tests/test_kernels.py b/openfold3/tests/test_kernels.py index 9e3d1105..adb0bd71 100644 --- a/openfold3/tests/test_kernels.py +++ b/openfold3/tests/test_kernels.py @@ -20,6 +20,7 @@ import unittest +import pytest import torch from torch.nn import functional as F @@ -41,6 +42,7 @@ # Needed to do backward for cuEq kernels with FP32 torch.backends.cuda.matmul.allow_tf32 = True +pytestmark = [pytest.mark.slow] @compare_utils.skip_unless_cuda_available() diff --git a/openfold3/tests/test_primitives.py b/openfold3/tests/test_primitives.py index 74e95558..5afdf116 100644 --- a/openfold3/tests/test_primitives.py +++ b/openfold3/tests/test_primitives.py @@ -14,6 +14,7 @@ import unittest +import pytest import torch import openfold3.tests.compare_utils as compare_utils @@ -24,6 +25,7 @@ class TestLMA(unittest.TestCase): + @pytest.mark.slow @compare_utils.skip_unless_cuda_available() def test_lma_vs_attention(self): c_hidden = 32