Skip to content

[NOT FOR MERGE] New file name in benchmark test #1564

[NOT FOR MERGE] New file name in benchmark test

[NOT FOR MERGE] New file name in benchmark test #1564

Workflow file for this run

# This workflow runs benchmark
# Separation of jobs helps to cache data even benchmark is fail
name: Benchmark
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
download_data:
runs-on: ubuntu-latest
steps:
- name: Checkout CredData
uses: actions/checkout@v3
with:
repository: babenek/CredData
ref: fileid
- name: Cache data
id: cache-data
uses: actions/cache@v3
with:
path: data
key: cred-data-${{ hashFiles('snapshot.yaml') }}
- name: Set up Python 3.8
if: steps.cache-data.outputs.cache-hit != 'true'
uses: actions/setup-python@v3
with:
python-version: "3.8"
- name: Update PIP
run: python -m pip install --upgrade pip
- name: Install requirements of CredData
if: steps.cache-data.outputs.cache-hit != 'true'
run: python -m pip install --requirement requirements.txt
- name: Generate Data Asset
if: steps.cache-data.outputs.cache-hit != 'true'
run: python download_data.py --data_dir data --lite_obfuscation
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
run_benchmark:
needs: [download_data]
runs-on: ubuntu-latest
steps:
- name: Checkout CredData
uses: actions/checkout@v3
with:
repository: babenek/CredData
ref: fileid
- name: Cache data
id: cache-data
uses: actions/cache@v3
with:
path: data
key: cred-data-${{ hashFiles('snapshot.yaml') }}
- name: Failure in case when cache missed
if: steps.cache-data.outputs.cache-hit != 'true'
run: exit 1
- name: Check Data Asset - DEBUG
if: steps.cache-data.outputs.cache-hit == 'true'
run: ls -al . && ls -al data
- name: Set up Python 3.8
uses: actions/setup-python@v3
with:
python-version: "3.8"
- name: Update PIP
run: python -m pip install --upgrade pip
- name: Install requirements of CredData
run: python -m pip install --requirement requirements.txt
- name: Fix onnxruntime lib for released version 1.5.5 - todo remove it after new release
run: python -m pip install onnxruntime==1.15.1
- name: Checkout CredSweeper
if: ${{ 'pull_request' == github.event_name }}
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.sha }}
path: temp/CredSweeper
- name: Patch benchmark for PR work
if: ${{ 'pull_request' == github.event_name }}
run: |
sed -i 's|CREDSWEEPER = "https://github.com/Samsung/CredSweeper.git"|CREDSWEEPER = "dummy://github.com/Samsung/CredSweeper.git"|' benchmark/common/constants.py
grep --with-filename --line-number 'dummy://github.com/Samsung/CredSweeper.git' benchmark/common/constants.py
- name: Run Benchmark
run: |
python -m benchmark --scanner credsweeper | tee credsweeper.log
- name: Get only results
run: |
head -n 12 credsweeper.log | tee benchmark.txt
tail -n 14 credsweeper.log | grep -v 'Time Elapsed:' | tee -a benchmark.txt
cp -vf ./temp/CredSweeper/output.json report.json
- name: Upload artifact
if: always()
uses: actions/upload-artifact@v3
with:
name: report
path: report.json
- name: Upload artifact
if: always()
uses: actions/upload-artifact@v3
with:
name: benchmark
path: benchmark.txt
- name: Verify benchmark scores of the PR
if: ${{ 'pull_request' == github.event_name }}
# update cicd/benchmark.txt with uploaded artifact if a difference is found
run: |
diff temp/CredSweeper/cicd/benchmark.txt benchmark.txt
- name: Checkout CredSweeper on push event
if: ${{ 'pull_request' != github.event_name }}
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.sha }}
path: CredSweeper
- name: Verify benchmark scores on push event
if: ${{ 'pull_request' != github.event_name }}
# update cicd/benchmark.txt with uploaded artifact if a difference is found
run: |
diff CredSweeper/cicd/benchmark.txt benchmark.txt
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
performance_benchmark:
# put the benchmark in single job to keep constant environment during test
needs: [download_data]
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
steps:
- name: Checkout CredData
uses: actions/checkout@v3
with:
repository: Samsung/CredData
- name: Cache data
id: cache-data
uses: actions/cache@v3
with:
path: data
key: cred-data-${{ hashFiles('snapshot.yaml') }}
- name: Failure in case when cache missed
if: steps.cache-data.outputs.cache-hit != 'true'
run: exit 1
- name: Exclude very huge data
if: steps.cache-data.outputs.cache-hit == 'true'
run: rm -rf data/8* data/7* data/a* data/2* data/0* data/f* data/b* data/d*
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Add synthetic huge data
if: steps.cache-data.outputs.cache-hit == 'true'
run: python -c "for n in range(7654321):print(f'{n:08x}')" >data/test.text
- name: Update PIP
run: python -m pip install --upgrade pip
- name: Fix onnxruntime lib for released version 1.5.5 - todo remove it after new release
run: python -m pip install onnxruntime==1.15.1
- name: Install released CredSweeper
run: |
python -m pip install credsweeper
# check the banner
credsweeper --banner
- name: Run performance benchmark RELEASE
run: |
START_TIME=$(date +%s)
/usr/bin/time --verbose credsweeper --log error --path data --save-json /dev/null
FINISH_TIME=$(date +%s)
RELEASE_TIME=$(( ${FINISH_TIME} - ${START_TIME} ))
if [ 0 -lt ${RELEASE_TIME} ]; then
echo Elapsed $(date -ud "@${RELEASE_TIME}" +"%H:%M:%S")
else
echo "Wrong result '${RELEASE_TIME}'"
exit 1
fi
echo "RELEASE_TIME=${RELEASE_TIME}" >> $GITHUB_ENV
- name: Uninstall released CredSweeper
run: |
python -m pip uninstall -y credsweeper
- name: Checkout base CredSweeper
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.base.sha }}
path: temp/CredSweeper.base
- name: Install base CredSweeper
run: |
python -m pip install temp/CredSweeper.base
# check the banner
credsweeper --banner
- name: Run performance benchmark BASE
run: |
START_TIME=$(date +%s)
/usr/bin/time --verbose credsweeper --log error --path data --save-json /dev/null
FINISH_TIME=$(date +%s)
BASE_TIME=$(( ${FINISH_TIME} - ${START_TIME} ))
if [ 0 -lt ${BASE_TIME} ]; then
echo Elapsed $(date -ud "@${BASE_TIME}" +"%H:%M:%S")
else
echo "Wrong result '${BASE_TIME}'"
exit 1
fi
echo "BASE_TIME=${BASE_TIME}" >> $GITHUB_ENV
- name: Checkout current CredSweeper
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.sha }}
path: temp/CredSweeper.head
- name: Install current CredSweeper
run: |
python -m pip install temp/CredSweeper.head
# check the banner
credsweeper --banner
- name: Run performance benchmark CURRENT
run: |
START_TIME=$(date +%s)
/usr/bin/time --verbose credsweeper --log error --path data --save-json /dev/null
FINISH_TIME=$(date +%s)
HEAD_TIME=$(( ${FINISH_TIME} - ${START_TIME} ))
if [ 0 -lt ${HEAD_TIME} ]; then
echo Elapsed $(date -ud "@${HEAD_TIME}" +"%H:%M:%S")
else
echo "Wrong result '${HEAD_TIME}'"
exit 1
fi
echo "HEAD_TIME=${HEAD_TIME}" >> $GITHUB_ENV
- name: Compare results
run: |
exit_code=0
LOW_DELTA=10
THRESHOLD=250
# RELEASE
if [ ${RELEASE_TIME} -le ${HEAD_TIME} ]; then
d=$(( 1000 * ( ${HEAD_TIME} - ${RELEASE_TIME} ) / ${RELEASE_TIME} ))
echo "RELEASE_TIME (sec) = ${RELEASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
echo "Significantly Slowdown."
exit_code=1
else
echo "Slowdown."
fi
else
d=$(( 1000 * ( ${RELEASE_TIME} - ${HEAD_TIME} ) / ${RELEASE_TIME} ))
echo "RELEASE_TIME (sec) = ${RELEASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
echo "Significantly speed-up."
else
echo "Speed-up."
fi
fi
# BASE
if [ ${BASE_TIME} -le ${HEAD_TIME} ]; then
d=$(( 1000 * ( ${HEAD_TIME} - ${BASE_TIME} ) / ${BASE_TIME} ))
echo "BASE_TIME (sec) = ${BASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
echo "Significantly Slowdown."
exit_code=1
else
echo "Slowdown."
fi
else
d=$(( 1000 * ( ${BASE_TIME} - ${HEAD_TIME} ) / ${BASE_TIME} ))
echo "BASE_TIME (sec) = ${BASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
echo "Significantly speed-up."
else
echo "Speed-up."
fi
fi
exit ${exit_code}
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #