Skip to content

dbg

dbg #2716

Workflow file for this run

# This workflow runs benchmark
# Separation of jobs helps to cache data even benchmark is fail
name: Benchmark
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
permissions:
contents: read
jobs:
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
download_data:
runs-on: ubuntu-latest
steps:
- name: Harden Runner
uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1
with:
egress-policy: audit
- name: Checkout CredData
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
repository: Samsung/CredData
ref: main
- name: Markup hashing
run: |
md5sum snapshot.yaml >checksums.md5
for f in $(find meta -type f|sort); do md5sum $f; done >>checksums.md5
for f in $(find . -maxdepth 1 -type f -name "*.py"|sort); do md5sum $f; done >>checksums.md5
cat checksums.md5
sha256sum checksums.md5
- name: Cache data
id: cache-data
uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2
with:
path: data
key: cred-data-${{ hashFiles('checksums.md5') }}
- name: Set up Python 3.10
if: steps.cache-data.outputs.cache-hit != 'true'
uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4.7.1
with:
python-version: "3.10"
- name: Update PIP
run: python -m pip install --upgrade pip
- name: DEBUG PIP
run: python -m pip --version
- name: Install requirements of CredData
if: steps.cache-data.outputs.cache-hit != 'true'
run: python -m pip install --requirement requirements.txt
- name: Generate Data Asset
if: steps.cache-data.outputs.cache-hit != 'true'
run: python download_data.py --data_dir data --jobs $(nproc)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
experiment:
# the ml train test is placed here to use cached data set
needs: [ download_data ]
runs-on: ubuntu-latest
steps:
- name: Harden Runner
uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1
with:
egress-policy: audit
- name: Checkout CredData
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
repository: Samsung/CredData
ref: main
- name: Markup hashing
run: |
md5sum snapshot.yaml >checksums.md5
for f in $(find meta -type f|sort); do md5sum $f; done >>checksums.md5
for f in $(find . -maxdepth 1 -type f -name "*.py"|sort); do md5sum $f; done >>checksums.md5
cat checksums.md5
sha256sum checksums.md5
- name: Cache data
id: cache-data
uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2
with:
path: data
key: cred-data-${{ hashFiles('checksums.md5') }}
- name: Failure in case when cache missed
if: steps.cache-data.outputs.cache-hit != 'true'
run: exit 1
- name: Exclude some sets for speed-up
run: |
rm -rf data/4* data/5* data/6* data/7* data/8* data/9* data/a* data/b* data/c* data/d* data/e* data/f*
rm -rf meta/4* meta/5* meta/6* meta/7* meta/8* meta/9* meta/a* meta/b* meta/c* meta/d* meta/e* meta/f*
mkdir -vp ${{ github.workspace }}/CredData
mv data ${{ github.workspace }}/CredData/
mv meta ${{ github.workspace }}/CredData/
- name: Set up Python 3.10
if: steps.cache-data.outputs.cache-hit != 'true'
uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236
with:
python-version: "3.10"
- name: Update PIP
run: python -m pip install --upgrade pip
- name: DEBUG PIP
run: python -m pip --version
- name: Checkout current CredSweeper
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ github.event.pull_request.head.sha }}
path: CredSweeper.head
- name: Install development packages
run: python -m pip install --requirement CredSweeper.head/requirements.txt
- name: Install experimental packages
# some versions will be changed for compatibility
run: python -m pip install --requirement CredSweeper.head/experiment/requirements.txt
- name: dbg
run: echo ${{ github.workspace }} && ls -al ${{ github.workspace }} && tree ${{ github.workspace }}
- name: Run the experiment
run: |
cd CredSweeper.head
ls -al #dbg
pwd #dbg
export PYTHONPATH=$(pwd):${PYTHONPATH}
cd experiment
# check whether credsweeper is available as module
python -m credsweeper --banner
# use only 2 epochs for the test
sed -i 's/max_epochs = .*/max_epochs = 2/' main.py
python main.py --data ${{ github.workspace }}/CredData -j $(( 2 * $(nproc) ))
# dbg
git diff
# crc32 should be changed
python -m credsweeper --banner
# run quick scan
python -m credsweeper --ml_providers AzureExecutionProvider,CPUExecutionProvider --log debug --path ../tests/samples --save-json
NEW_MODEL_FOUND_SAMPLES=$(jq '.|length' output.json)
if [ 10 -gt ${NEW_MODEL_FOUND_SAMPLES} ]; then
echo "Failure: found ${NEW_MODEL_FOUND_SAMPLES} credentials"
exit 1
fi
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #