dbg #2716
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This workflow runs benchmark | |
# Separation of jobs helps to cache data even benchmark is fail | |
name: Benchmark | |
on: | |
push: | |
branches: [ main ] | |
pull_request: | |
branches: [ main ] | |
permissions: | |
contents: read | |
jobs: | |
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # | |
download_data: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Harden Runner | |
uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 | |
with: | |
egress-policy: audit | |
- name: Checkout CredData | |
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | |
with: | |
repository: Samsung/CredData | |
ref: main | |
- name: Markup hashing | |
run: | | |
md5sum snapshot.yaml >checksums.md5 | |
for f in $(find meta -type f|sort); do md5sum $f; done >>checksums.md5 | |
for f in $(find . -maxdepth 1 -type f -name "*.py"|sort); do md5sum $f; done >>checksums.md5 | |
cat checksums.md5 | |
sha256sum checksums.md5 | |
- name: Cache data | |
id: cache-data | |
uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 | |
with: | |
path: data | |
key: cred-data-${{ hashFiles('checksums.md5') }} | |
- name: Set up Python 3.10 | |
if: steps.cache-data.outputs.cache-hit != 'true' | |
uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4.7.1 | |
with: | |
python-version: "3.10" | |
- name: Update PIP | |
run: python -m pip install --upgrade pip | |
- name: DEBUG PIP | |
run: python -m pip --version | |
- name: Install requirements of CredData | |
if: steps.cache-data.outputs.cache-hit != 'true' | |
run: python -m pip install --requirement requirements.txt | |
- name: Generate Data Asset | |
if: steps.cache-data.outputs.cache-hit != 'true' | |
run: python download_data.py --data_dir data --jobs $(nproc) | |
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # | |
experiment: | |
# the ml train test is placed here to use cached data set | |
needs: [ download_data ] | |
runs-on: ubuntu-latest | |
steps: | |
- name: Harden Runner | |
uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 | |
with: | |
egress-policy: audit | |
- name: Checkout CredData | |
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | |
with: | |
repository: Samsung/CredData | |
ref: main | |
- name: Markup hashing | |
run: | | |
md5sum snapshot.yaml >checksums.md5 | |
for f in $(find meta -type f|sort); do md5sum $f; done >>checksums.md5 | |
for f in $(find . -maxdepth 1 -type f -name "*.py"|sort); do md5sum $f; done >>checksums.md5 | |
cat checksums.md5 | |
sha256sum checksums.md5 | |
- name: Cache data | |
id: cache-data | |
uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 | |
with: | |
path: data | |
key: cred-data-${{ hashFiles('checksums.md5') }} | |
- name: Failure in case when cache missed | |
if: steps.cache-data.outputs.cache-hit != 'true' | |
run: exit 1 | |
- name: Exclude some sets for speed-up | |
run: | | |
rm -rf data/4* data/5* data/6* data/7* data/8* data/9* data/a* data/b* data/c* data/d* data/e* data/f* | |
rm -rf meta/4* meta/5* meta/6* meta/7* meta/8* meta/9* meta/a* meta/b* meta/c* meta/d* meta/e* meta/f* | |
mkdir -vp ${{ github.workspace }}/CredData | |
mv data ${{ github.workspace }}/CredData/ | |
mv meta ${{ github.workspace }}/CredData/ | |
- name: Set up Python 3.10 | |
if: steps.cache-data.outputs.cache-hit != 'true' | |
uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 | |
with: | |
python-version: "3.10" | |
- name: Update PIP | |
run: python -m pip install --upgrade pip | |
- name: DEBUG PIP | |
run: python -m pip --version | |
- name: Checkout current CredSweeper | |
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | |
with: | |
ref: ${{ github.event.pull_request.head.sha }} | |
path: CredSweeper.head | |
- name: Install development packages | |
run: python -m pip install --requirement CredSweeper.head/requirements.txt | |
- name: Install experimental packages | |
# some versions will be changed for compatibility | |
run: python -m pip install --requirement CredSweeper.head/experiment/requirements.txt | |
- name: dbg | |
run: echo ${{ github.workspace }} && ls -al ${{ github.workspace }} && tree ${{ github.workspace }} | |
- name: Run the experiment | |
run: | | |
cd CredSweeper.head | |
ls -al #dbg | |
pwd #dbg | |
export PYTHONPATH=$(pwd):${PYTHONPATH} | |
cd experiment | |
# check whether credsweeper is available as module | |
python -m credsweeper --banner | |
# use only 2 epochs for the test | |
sed -i 's/max_epochs = .*/max_epochs = 2/' main.py | |
python main.py --data ${{ github.workspace }}/CredData -j $(( 2 * $(nproc) )) | |
# dbg | |
git diff | |
# crc32 should be changed | |
python -m credsweeper --banner | |
# run quick scan | |
python -m credsweeper --ml_providers AzureExecutionProvider,CPUExecutionProvider --log debug --path ../tests/samples --save-json | |
NEW_MODEL_FOUND_SAMPLES=$(jq '.|length' output.json) | |
if [ 10 -gt ${NEW_MODEL_FOUND_SAMPLES} ]; then | |
echo "Failure: found ${NEW_MODEL_FOUND_SAMPLES} credentials" | |
exit 1 | |
fi | |
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # |