-
Notifications
You must be signed in to change notification settings - Fork 37
173 lines (135 loc) · 5.65 KB
/
benchmark.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# This workflow runs benchmark
# Separation of jobs helps to cache data even benchmark is fail
name: Benchmark
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
permissions:
contents: read
jobs:
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
download_data:
runs-on: ubuntu-latest
steps:
- name: Harden Runner
uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1
with:
egress-policy: audit
- name: Checkout CredData
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
repository: Samsung/CredData
ref: main
- name: Markup hashing
run: |
md5sum snapshot.yaml >checksums.md5
for f in $(find meta -type f|sort); do md5sum $f; done >>checksums.md5
for f in $(find . -maxdepth 1 -type f -name "*.py"|sort); do md5sum $f; done >>checksums.md5
cat checksums.md5
sha256sum checksums.md5
- name: Cache data
id: cache-data
uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2
with:
path: data
key: cred-data-${{ hashFiles('checksums.md5') }}
- name: Set up Python 3.10
if: steps.cache-data.outputs.cache-hit != 'true'
uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4.7.1
with:
python-version: "3.10"
- name: Update PIP
run: python -m pip install --upgrade pip
- name: DEBUG PIP
run: python -m pip --version
- name: Install requirements of CredData
if: steps.cache-data.outputs.cache-hit != 'true'
run: python -m pip install --requirement requirements.txt
- name: Generate Data Asset
if: steps.cache-data.outputs.cache-hit != 'true'
run: python download_data.py --data_dir data --jobs $(nproc)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
experiment:
# the ml train test is placed here to use cached data set
needs: [ download_data ]
runs-on: ubuntu-latest
steps:
- name: Harden Runner
uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1
with:
egress-policy: audit
- name: Checkout CredData
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
repository: Samsung/CredData
ref: main
- name: Markup hashing
run: |
md5sum snapshot.yaml >checksums.md5
for f in $(find meta -type f|sort); do md5sum $f; done >>checksums.md5
for f in $(find . -maxdepth 1 -type f -name "*.py"|sort); do md5sum $f; done >>checksums.md5
cat checksums.md5
sha256sum checksums.md5
- name: Cache data
id: cache-data
uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2
with:
path: data
key: cred-data-${{ hashFiles('checksums.md5') }}
- name: Failure in case when cache missed
if: steps.cache-data.outputs.cache-hit != 'true'
run: exit 1
- name: Exclude some sets for speed-up
run: |
rm -rf data/4* data/5* data/6* data/7* data/8* data/9* data/a* data/b* data/c* data/d* data/e* data/f*
rm -rf meta/4* meta/5* meta/6* meta/7* meta/8* meta/9* meta/a* meta/b* meta/c* meta/d* meta/e* meta/f*
mkdir -vp ${{ github.workspace }}/CredData
mv data ${{ github.workspace }}/CredData/
mv meta ${{ github.workspace }}/CredData/
- name: Set up Python 3.10
if: steps.cache-data.outputs.cache-hit != 'true'
uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236
with:
python-version: "3.10"
- name: Update PIP
run: python -m pip install --upgrade pip
- name: DEBUG PIP
run: python -m pip --version
- name: Checkout current CredSweeper
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ github.event.pull_request.head.sha }}
path: CredSweeper.head
- name: Install development packages
run: python -m pip install --requirement CredSweeper.head/requirements.txt
- name: Install experimental packages
# some versions will be changed for compatibility
run: python -m pip install --requirement CredSweeper.head/experiment/requirements.txt
- name: dbg
run: echo ${{ github.workspace }} && ls -al ${{ github.workspace }} && tree ${{ github.workspace }}
- name: Run the experiment
run: |
cd CredSweeper.head
ls -al #dbg
pwd #dbg
export PYTHONPATH=$(pwd):${PYTHONPATH}
cd experiment
# check whether credsweeper is available as module
python -m credsweeper --banner
# use only 2 epochs for the test
sed -i 's/max_epochs = .*/max_epochs = 2/' main.py
python main.py --data ${{ github.workspace }}/CredData -j $(( 2 * $(nproc) ))
# dbg
git diff
# crc32 should be changed
python -m credsweeper --banner
# run quick scan
python -m credsweeper --ml_providers AzureExecutionProvider,CPUExecutionProvider --log debug --path ../tests/samples --save-json
NEW_MODEL_FOUND_SAMPLES=$(jq '.|length' output.json)
if [ 10 -gt ${NEW_MODEL_FOUND_SAMPLES} ]; then
echo "Failure: found ${NEW_MODEL_FOUND_SAMPLES} credentials"
exit 1
fi
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #