aqlaboratory · jnwei · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026
diff --git a/.github/workflows/ci-integration-test-reusable.yml b/.github/workflows/ci-integration-test-reusable.yml
@@ -0,0 +1,127 @@
+name: Reusable Docker Test for running heavy test loads
+
+on:
+  # Can only be called by another workflow, not directly by the user
+  workflow_call:
+    inputs:
+      build_mode:
+        description: 'Build mode: "lock" for reproducible builds, "yaml" for flexible dev builds'
+        required: true
+        type: string
+      cuda_base_image_tag:
+        description: 'CUDA base image tag (e.g., 12.2.2-cudnn8-devel-ubuntu22.04)'
+        required: true
+        type: string
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}/openfold3-docker
+
+jobs:
+  start-aws-runner:
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write
+      contents: read
+    outputs:
+      mapping: ${{ steps.aws-start.outputs.mapping }}
+      instances: ${{ steps.aws-start.outputs.instances }}
+    steps:
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v6
+        with:
+          role-to-assume: arn:aws:iam::203627415330:role/of-gha-runner
+          aws-region: us-east-1
+      - name: Create cloud runner
+        id: aws-start
+        uses: omsf/[email protected]
+        with:
+          aws_image_id: ami-00839c71d8f6096b4  # Deep Learning Base AMI with Single CUDA (Ubuntu 22.04)
+          aws_instance_type: "g5.4xlarge" # A10G 64 GB 
+          aws_home_dir: /home/ubuntu
+          aws_root_device_size: 200
+        env:
+          GH_PAT: ${{ secrets.GH_PAT }}
+
+  test-openfold-docker:
+    runs-on: ${{ fromJSON(needs.start-aws-runner.outputs.instances) }}
+    needs:
+      - start-aws-runner
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Log in to GHCR
+        uses: docker/login-action@v4
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v4
+
+      - name: Build and push test image
+        uses: docker/build-push-action@v7
+        with:
+          context: .
+          file: docker/Dockerfile
+          target: test
+          push: true
+          build-args: |
+            CUDA_BASE_IMAGE_TAG=${{ inputs.cuda_base_image_tag }}
+            BUILD_MODE=${{ inputs.build_mode }}
+          tags: |
+            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:test-${{ inputs.cuda_base_image_tag }}-${{ github.sha }}
+          cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:cache-${{ inputs.cuda_base_image_tag }}
+          cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:cache-${{ inputs.cuda_base_image_tag }},mode=max
+
+      - name: Create parameter cache directory
+        run: mkdir -p ~/.openfold3
+
+      - name: Cache download of parameters 
+        id: cache-openfold_parameters
+        uses: actions/cache@v4
+        with:
+          path: ~/.openfold3/
+          key: shared-params_of3
+          restore-keys: |
+            shared-params_of3
+
+      - name: Install AWS CLI and Download OpenFold parameters
+        if: steps.cache-openfold_parameters.outputs.cache-hit != 'true'
+        run: |
+          echo "Cache miss: Downloading OpenFold parameters..."
+          aws s3 cp s3://openfold3-data/openfold3-parameters/of3-p2-155k.pt ~/.openfold3/ --no-sign-request
+
+      - name: Run integration test
+        run: |
+          docker run --gpus all\
+            -v ${{ github.workspace }}:/opt/openfold3 \
+            -v ~/.openfold3:/root/.openfold3 \
+            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:test-${{ inputs.cuda_base_image_tag }}-${{ github.sha }} \
+            pytest -x openfold3/tests/ -m slow -vvv
+
+  stop-aws-runner:
+    runs-on: ubuntu-latest
+    permissions:
+        id-token: write
+        contents: read
+    needs:
+      - start-aws-runner
+      - test-openfold-docker
+    if: ${{ always() }}
+    steps:
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v6
+        with:
+          role-to-assume: arn:aws:iam::203627415330:role/of-gha-runner
+          aws-region: us-east-1
+      - name: Stop instances
+        uses: omsf/[email protected]
+        with:
+          instance_mapping: ${{ needs.start-aws-runner.outputs.mapping }}
+        env:
+          GH_PAT: ${{ secrets.GH_PAT }}
diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
@@ -0,0 +1,26 @@
+name: Run integration tests with docker image 
+
+on:
+  schedule:
+    - cron: '0 0 * * *' # every daily at 00:00 UTC
+  workflow_dispatch:
+
+jobs:
+  test:
+    permissions:
+      id-token: write
+      contents: read
+      packages: write
+    strategy:
+      matrix:
+        include:
+          - cuda_base_image_tag: "12.1.1-cudnn8-devel-ubuntu22.04"
+            build_mode: "yaml"
+    concurrency:
+      group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}-${{ matrix.cuda_base_image_tag }}
+      cancel-in-progress: true
+    uses: ./.github/workflows/ci-integration-test-reusable.yml
+    with:
+      cuda_base_image_tag: ${{ matrix.cuda_base_image_tag }}
+      build_mode: ${{ matrix.build_mode }}
+    secrets: inherit
diff --git a/openfold3/tests/test_inference_full.py b/openfold3/tests/test_inference_full.py
@@ -19,10 +19,12 @@
 
 import logging
 import os
+import textwrap
 from unittest.mock import patch
 
 import pytest
 
+from openfold3.core.config import config_utils
 from openfold3.entry_points.experiment_runner import InferenceExperimentRunner
 from openfold3.entry_points.validator import (
     InferenceExperimentConfig,
@@ -32,7 +34,7 @@
 )
 from openfold3.tests.compare_utils import skip_unless_cuda_available
 
-pytestmark = pytest.mark.inference_verification
+pytestmark = [pytest.mark.inference_verification, pytest.mark.slow]
 
 logging.basicConfig(level=logging.WARNING)
 logger = logging.getLogger(__name__)
@@ -75,16 +77,37 @@
     }
 )
 
+inference_test_yaml_str = textwrap.dedent("""\
+    model_update:
+      presets: 
+        - predict
+        - low_mem
+      custom:
+        settings:
+          memory:
+            eval:
+              use_deepspeed_evo_attention: false 
+    """)
+
 
 @skip_unless_cuda_available()
 @pytest.mark.parametrize("query_set", [protein_only_query, protein_and_ligand_query])
 def test_inference_run(tmp_path, query_set):
+    # Set up runner args
+    runner_yaml = tmp_path / "runner_config.yaml"
+    runner_yaml.write_text(inference_test_yaml_str)
+
     # Trigger validation logic to replace the cache path
     with patch("builtins.input", return_value="no"):
-        # your test code that calls _maybe_download_parameters
-        experiment_config = InferenceExperimentConfig.model_validate({})
+        experiment_config = InferenceExperimentConfig(
+            **config_utils.load_yaml(runner_yaml)
+        )
     expt_runner = InferenceExperimentRunner(
-        experiment_config, num_diffusion_samples=1, output_dir=tmp_path
+        experiment_config,
+        num_diffusion_samples=1,
+        output_dir=tmp_path,
+        use_msa_server=True,
+        use_templates=True,
     )
     try:
         expt_runner.setup()

diff --git a/openfold3/tests/test_kernels.py b/openfold3/tests/test_kernels.py
@@ -20,6 +20,7 @@
 
 import unittest
 
+import pytest
 import torch
 from torch.nn import functional as F
 
@@ -41,6 +42,7 @@
 
 # Needed to do backward for cuEq kernels with FP32
 torch.backends.cuda.matmul.allow_tf32 = True
+pytestmark = [pytest.mark.slow]
 
 
 @compare_utils.skip_unless_cuda_available()

diff --git a/openfold3/tests/test_primitives.py b/openfold3/tests/test_primitives.py
@@ -14,6 +14,7 @@
 
 import unittest
 
+import pytest
 import torch
 
 import openfold3.tests.compare_utils as compare_utils
@@ -24,6 +25,7 @@
 
 
 class TestLMA(unittest.TestCase):
+    @pytest.mark.slow
     @compare_utils.skip_unless_cuda_available()
     def test_lma_vs_attention(self):
         c_hidden = 32