Skip to content

Commit

Permalink
test fix and custom enchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
babenek committed Dec 1, 2023
1 parent 3d68951 commit e39e9a4
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 212 deletions.
190 changes: 4 additions & 186 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ jobs:
- name: Checkout CredData
uses: actions/checkout@v3
with:
repository: Samsung/CredData
repository: babenek/CredData
ref: jwt

- name: Cache data
id: cache-data
Expand Down Expand Up @@ -62,7 +63,8 @@ jobs:
- name: Checkout CredData
uses: actions/checkout@v3
with:
repository: Samsung/CredData
repository: babenek/CredData
ref: jwt

- name: Cache data
id: cache-data
Expand Down Expand Up @@ -148,187 +150,3 @@ jobs:
# update cicd/benchmark.txt with uploaded artifact if a difference is found
run: |
diff CredSweeper/cicd/benchmark.txt benchmark.txt
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

performance_benchmark:
# put the benchmark in single job to keep constant environment during test
needs: [download_data]

runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]

steps:

- name: Checkout CredData
uses: actions/checkout@v3
with:
repository: Samsung/CredData

- name: Cache data
id: cache-data
uses: actions/cache@v3
with:
path: data
key: cred-data-${{ hashFiles('snapshot.yaml') }}

- name: Failure in case when cache missed
if: steps.cache-data.outputs.cache-hit != 'true'
run: exit 1

- name: Exclude very huge data
if: steps.cache-data.outputs.cache-hit == 'true'
run: rm -rf data/8* data/7* data/a* data/2* data/0* data/f* data/b* data/d*

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}

- name: Add synthetic huge data
if: steps.cache-data.outputs.cache-hit == 'true'
run: python -c "for n in range(7654321):print(f'{n:08x}')" >data/test.text

- name: Update PIP
run: python -m pip install --upgrade pip

- name: Fix onnxruntime lib for released version 1.5.5 - todo remove it after new release
run: python -m pip install onnxruntime==1.15.1

- name: Install released CredSweeper
run: |
python -m pip install credsweeper
# check the banner
credsweeper --banner
- name: Run performance benchmark RELEASE
run: |
START_TIME=$(date +%s)
/usr/bin/time --verbose credsweeper --log error --path data --save-json /dev/null
FINISH_TIME=$(date +%s)
RELEASE_TIME=$(( ${FINISH_TIME} - ${START_TIME} ))
if [ 0 -lt ${RELEASE_TIME} ]; then
echo Elapsed $(date -ud "@${RELEASE_TIME}" +"%H:%M:%S")
else
echo "Wrong result '${RELEASE_TIME}'"
exit 1
fi
echo "RELEASE_TIME=${RELEASE_TIME}" >> $GITHUB_ENV
- name: Uninstall released CredSweeper
run: |
python -m pip uninstall -y credsweeper
- name: Checkout base CredSweeper
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.base.sha }}
path: temp/CredSweeper.base

- name: Install base CredSweeper
run: |
python -m pip install temp/CredSweeper.base
# check the banner
credsweeper --banner
- name: Run performance benchmark BASE
run: |
START_TIME=$(date +%s)
/usr/bin/time --verbose credsweeper --log error --path data --save-json /dev/null
FINISH_TIME=$(date +%s)
BASE_TIME=$(( ${FINISH_TIME} - ${START_TIME} ))
if [ 0 -lt ${BASE_TIME} ]; then
echo Elapsed $(date -ud "@${BASE_TIME}" +"%H:%M:%S")
else
echo "Wrong result '${BASE_TIME}'"
exit 1
fi
echo "BASE_TIME=${BASE_TIME}" >> $GITHUB_ENV
- name: Checkout current CredSweeper
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.sha }}
path: temp/CredSweeper.head

- name: Install current CredSweeper
run: |
python -m pip install temp/CredSweeper.head
# check the banner
credsweeper --banner
- name: Run performance benchmark CURRENT
run: |
START_TIME=$(date +%s)
/usr/bin/time --verbose credsweeper --log error --path data --save-json /dev/null
FINISH_TIME=$(date +%s)
HEAD_TIME=$(( ${FINISH_TIME} - ${START_TIME} ))
if [ 0 -lt ${HEAD_TIME} ]; then
echo Elapsed $(date -ud "@${HEAD_TIME}" +"%H:%M:%S")
else
echo "Wrong result '${HEAD_TIME}'"
exit 1
fi
echo "HEAD_TIME=${HEAD_TIME}" >> $GITHUB_ENV
- name: Compare results
run: |
exit_code=0
LOW_DELTA=10
THRESHOLD=250
# RELEASE
if [ ${RELEASE_TIME} -le ${HEAD_TIME} ]; then
d=$(( 1000 * ( ${HEAD_TIME} - ${RELEASE_TIME} ) / ${RELEASE_TIME} ))
echo "RELEASE_TIME (sec) = ${RELEASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
echo "Significantly Slowdown."
exit_code=1
else
echo "Slowdown."
fi
else
d=$(( 1000 * ( ${RELEASE_TIME} - ${HEAD_TIME} ) / ${RELEASE_TIME} ))
echo "RELEASE_TIME (sec) = ${RELEASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
echo "Significantly speed-up."
else
echo "Speed-up."
fi
fi
# BASE
if [ ${BASE_TIME} -le ${HEAD_TIME} ]; then
d=$(( 1000 * ( ${HEAD_TIME} - ${BASE_TIME} ) / ${BASE_TIME} ))
echo "BASE_TIME (sec) = ${BASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
echo "Significantly Slowdown."
exit_code=1
else
echo "Slowdown."
fi
else
d=$(( 1000 * ( ${BASE_TIME} - ${HEAD_TIME} ) / ${BASE_TIME} ))
echo "BASE_TIME (sec) = ${BASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
echo "Significantly speed-up."
else
echo "Speed-up."
fi
fi
exit ${exit_code}
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

2 changes: 1 addition & 1 deletion tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pathlib import Path

# total number of files in test samples
SAMPLES_FILES_COUNT: int = 120
SAMPLES_FILES_COUNT: int = 121

# credentials count after scan
SAMPLES_CRED_COUNT: int = 373
Expand Down
12 changes: 6 additions & 6 deletions tests/data/depth_3.json
Original file line number Diff line number Diff line change
Expand Up @@ -5807,23 +5807,23 @@
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "VALIDATED_KEY",
"ml_probability": 0.99907,
"ml_probability": 0.82258,
"rule": "JSON Web Token",
"severity": "medium",
"line_data_list": [
{
"line": "$payload = 'eyJhbGciOiJ0eXAifQ.eyJcaaF9xCe7shE0ENPiBlEJOpS'",
"line": "$payload = \"eyJhbGciOiJ0-_-_-_-_-_-.eyJcaaF9xCe7shE0ENPiBlEJOpS\"",
"line_num": 1,
"path": "tests/samples/json_web_token.hs",
"info": "tests/samples/json_web_token.hs|RAW",
"value": "eyJhbGciOiJ0eXAifQ.eyJcaaF9xCe7shE0ENPiBlEJOpS",
"value": "eyJhbGciOiJ0-_-_-_-_-_-.eyJcaaF9xCe7shE0ENPiBlEJOpS",
"value_start": 12,
"value_end": 58,
"value_end": 63,
"variable": null,
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 4.517508706965262,
"valid": true
"entropy": 3.6658808986352547,
"valid": false
}
}
]
Expand Down
12 changes: 6 additions & 6 deletions tests/data/doc.json
Original file line number Diff line number Diff line change
Expand Up @@ -10409,23 +10409,23 @@
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "VALIDATED_KEY",
"ml_probability": 0.99907,
"ml_probability": 0.82258,
"rule": "JSON Web Token",
"severity": "medium",
"line_data_list": [
{
"line": "$payload = 'eyJhbGciOiJ0eXAifQ.eyJcaaF9xCe7shE0ENPiBlEJOpS'",
"line": "$payload = \"eyJhbGciOiJ0-_-_-_-_-_-.eyJcaaF9xCe7shE0ENPiBlEJOpS\"",
"line_num": 1,
"path": "tests/samples/json_web_token.hs",
"info": "tests/samples/json_web_token.hs|RAW",
"value": "eyJhbGciOiJ0eXAifQ.eyJcaaF9xCe7shE0ENPiBlEJOpS",
"value": "eyJhbGciOiJ0-_-_-_-_-_-.eyJcaaF9xCe7shE0ENPiBlEJOpS",
"value_start": 12,
"value_end": 58,
"value_end": 63,
"variable": null,
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 4.517508706965262,
"valid": true
"entropy": 3.6658808986352547,
"valid": false
}
}
]
Expand Down
10 changes: 5 additions & 5 deletions tests/data/ml_threshold_0.json
Original file line number Diff line number Diff line change
Expand Up @@ -7660,18 +7660,18 @@
"severity": "medium",
"line_data_list": [
{
"line": "$payload = 'eyJhbGciOiJ0eXAifQ.eyJcaaF9xCe7shE0ENPiBlEJOpS'",
"line": "$payload = \"eyJhbGciOiJ0-_-_-_-_-_-.eyJcaaF9xCe7shE0ENPiBlEJOpS\"",
"line_num": 1,
"path": "tests/samples/json_web_token.hs",
"info": "",
"value": "eyJhbGciOiJ0eXAifQ.eyJcaaF9xCe7shE0ENPiBlEJOpS",
"value": "eyJhbGciOiJ0-_-_-_-_-_-.eyJcaaF9xCe7shE0ENPiBlEJOpS",
"value_start": 12,
"value_end": 58,
"value_end": 63,
"variable": null,
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 4.517508706965262,
"valid": true
"entropy": 3.6658808986352547,
"valid": false
}
}
]
Expand Down
12 changes: 6 additions & 6 deletions tests/data/output.json
Original file line number Diff line number Diff line change
Expand Up @@ -5663,23 +5663,23 @@
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "VALIDATED_KEY",
"ml_probability": 0.99907,
"ml_probability": 0.82258,
"rule": "JSON Web Token",
"severity": "medium",
"line_data_list": [
{
"line": "$payload = 'eyJhbGciOiJ0eXAifQ.eyJcaaF9xCe7shE0ENPiBlEJOpS'",
"line": "$payload = \"eyJhbGciOiJ0-_-_-_-_-_-.eyJcaaF9xCe7shE0ENPiBlEJOpS\"",
"line_num": 1,
"path": "tests/samples/json_web_token.hs",
"info": "",
"value": "eyJhbGciOiJ0eXAifQ.eyJcaaF9xCe7shE0ENPiBlEJOpS",
"value": "eyJhbGciOiJ0-_-_-_-_-_-.eyJcaaF9xCe7shE0ENPiBlEJOpS",
"value_start": 12,
"value_end": 58,
"value_end": 63,
"variable": null,
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 4.517508706965262,
"valid": true
"entropy": 3.6658808986352547,
"valid": false
}
}
]
Expand Down
3 changes: 2 additions & 1 deletion tests/samples/false_jwt.eml
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
eyJAlZtHKjCmuF7VOfkYIlcd6iG7bz59JA3hELeC8hrlJfZ8z5C0j7JAEnQBTfy6rAPZmRBqU7k6
eyJAlZtHKjCmuF7VOfkYIlcd6iG7bz59JA3hELeC8hrlJfZ8z5C0j7JAEnQBTfy6rAPZmRBqU7k6

3 changes: 2 additions & 1 deletion tests/samples/json_web_token.hs
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
$payload = 'eyJhbGciOiJ0eXAifQ.eyJcaaF9xCe7shE0ENPiBlEJOpS'
$payload = "eyJhbGciOiJ0-_-_-_-_-_-.eyJcaaF9xCe7shE0ENPiBlEJOpS"

0 comments on commit e39e9a4

Please sign in to comment.