diff --git a/.github/workflows/ci-build.yaml b/.github/workflows/ci-build.yaml index e5483bf..db951ef 100644 --- a/.github/workflows/ci-build.yaml +++ b/.github/workflows/ci-build.yaml @@ -2,10 +2,10 @@ name: CI Build on: push: branches: - - main + - ci_test pull_request: branches: - - main + - ci_test jobs: pre-commit-run: runs-on: ubuntu-latest @@ -53,3 +53,131 @@ jobs: - name: Unit tests run: | pytest --cov=. tests/ + + detect_secrets: + name: IBM detect-secrets + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install prerequisites + run: sudo apt-get update && sudo apt-get install -y jq diffutils + - name: Install IBM detect-secrets + + run: | + python3 -m pip install --upgrade pip + python3 -m pip install --upgrade "git+https://github.com/ibm/detect-secrets.git@master#egg=detect-secrets" + python3 -m pip install boxsdk + - name: Scan repository & write snapshot + run: | + mkdir -p security-outputs + # Run the scan; send stderr to a log; stdout to JSON + detect-secrets scan \ + --exclude-files '.*\.ipynb$' \ + --exclude-lines '"(outputs|image/\w+|id|hash)":.*' \ + > security-outputs/ds-results.json 2> security-outputs/.secrets.new + + + + - name: Upload scan artifacts (for triage) + if: always() + uses: actions/upload-artifact@v4 + with: + name: detect-secrets-artifacts + path: .secrets.new + + codeql: + name: CodeQL (Python) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: github/codeql-action/init@v3 + with: + languages: python + - uses: github/codeql-action/autobuild@v3 + - uses: github/codeql-action/analyze@v3 + with: + category: "/language:python" + + + #semgrep: + # name: Semgrep (p/default + local overrides) + # runs-on: ubuntu-latest + # steps: + # - uses: actions/checkout@v4 + + # - name: Install Semgrep + # run: pip install semgrep + + + # - name: Semgrep scan (PR diff-aware) + # if: ${{ github.event_name == 'pull_request' }} + # run: | + # semgrep \ + # --config=p/default \ + # --config=.semgrep.yaml \ + # --error \ + # --baseline-commit "${{ github.event.pull_request.base.sha }}" \ + # . + + # - name: Semgrep scan (full) + # if: ${{ github.event_name != 'pull_request' }} + # run: | + # semgrep \ + # --config=p/default \ + # --config=.semgrep.yaml \ + # --error \ + + + bandit: + name: Bandit (Python) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install Bandit + run: pip install bandit + - name: Run Bandit + run: bandit -ll -ii -r gridfm-graphkit -f json -o bandit-report.json + + - name: Upload artifact + if: always() + uses: actions/upload-artifact@v4 + with: + name: bandit-report + path: bandit-report.json + + pip_audit: + name: pip-audit (deps) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install package (editable) and dev/test extras if present + run: | + python -m pip install --upgrade pip + pip install -e .[dev,test] || pip install -e . + - name: Run pip-audit + uses: pypa/gh-action-pip-audit@v1.1.0 + + trivy_repo: + name: Trivy (repo scan) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Run Trivy filesystem scan + uses: aquasecurity/trivy-action@0.33.1 + with: + scan-type: 'fs' + scan-ref: '.' + format: 'sarif' + output: 'trivy-results.sarif' + severity: 'HIGH,CRITICAL' + ignore-unfixed: true + - name: Upload SARIF to Code Scanning + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: trivy-results.sarif diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 88492f3..28073e0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,7 +22,7 @@ repos: hooks: - id: add-trailing-comma - repo: https://github.com/ibm/detect-secrets - rev: 0.13.1+ibm.62.dss + rev: 0.13.1+ibm.64.dss hooks: - id: detect-secrets # pragma: whitelist secret # Add options for detect-secrets-hook binary. You can run `detect-secrets-hook --help` to list out all possible options. diff --git a/.secrets.baseline b/.secrets.baseline index 4d3101c..40f1a6e 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -3,7 +3,7 @@ "files": "^.secrets.baseline$", "lines": null }, - "generated_at": "2025-04-09T07:19:08Z", + "generated_at": "2026-01-19T09:47:05Z", "plugins_used": [ { "name": "AWSKeyDetector" @@ -76,19 +76,8 @@ "name": "TwilioKeyDetector" } ], - "results": { - "notebooks/visualization/Reconstruction_task_visualization.ipynb": [ - { - "hashed_secret": "b5111672e1b2d341aad7c2f27f4bc19bf10b9447", - "is_secret": false, - "is_verified": false, - "line_number": 6, - "type": "Base64 High Entropy String", - "verified_result": null - } - ] - }, - "version": "0.13.1+ibm.62.dss", + "results": {}, + "version": "0.13.1+ibm.64.dss", "word_list": { "file": null, "hash": null diff --git a/.semgrep.yaml b/.semgrep.yaml new file mode 100644 index 0000000..fc48b6d --- /dev/null +++ b/.semgrep.yaml @@ -0,0 +1,16 @@ +# Goal: do not fail on torch.load(..), for now + +rules: + + - id: python.deserialization.pickle + + # 👇 Exclude torch.load from this rule so it won't be reported as ERROR: + pattern-not: | + torch.load(...) + + # --- record torch.load usage as INFO, non-blocking --- + - id: python.deserialization.pytorch.torch-load + message: "torch.load detected (accepted for trusted checkpoints). Prefer weights_only=True." + languages: [python] + severity: INFO + pattern: |