Skip to content

Commit

Permalink
[skip actions] [jwt] 2023-12-05T08:59:05+02:00
Browse files Browse the repository at this point in the history
  • Loading branch information
babenek committed Dec 5, 2023
1 parent 29c07e4 commit 68acf8c
Show file tree
Hide file tree
Showing 17 changed files with 118 additions and 239 deletions.
190 changes: 4 additions & 186 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ jobs:
- name: Checkout CredData
uses: actions/checkout@v3
with:
repository: Samsung/CredData
repository: babenek/CredData
ref: jwt

- name: Cache data
id: cache-data
Expand Down Expand Up @@ -62,7 +63,8 @@ jobs:
- name: Checkout CredData
uses: actions/checkout@v3
with:
repository: Samsung/CredData
repository: babenek/CredData
ref: jwt

- name: Cache data
id: cache-data
Expand Down Expand Up @@ -148,187 +150,3 @@ jobs:
# update cicd/benchmark.txt with uploaded artifact if a difference is found
run: |
diff CredSweeper/cicd/benchmark.txt benchmark.txt
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

performance_benchmark:
# put the benchmark in single job to keep constant environment during test
needs: [download_data]

runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]

steps:

- name: Checkout CredData
uses: actions/checkout@v3
with:
repository: Samsung/CredData

- name: Cache data
id: cache-data
uses: actions/cache@v3
with:
path: data
key: cred-data-${{ hashFiles('snapshot.yaml') }}

- name: Failure in case when cache missed
if: steps.cache-data.outputs.cache-hit != 'true'
run: exit 1

- name: Exclude very huge data
if: steps.cache-data.outputs.cache-hit == 'true'
run: rm -rf data/8* data/7* data/a* data/2* data/0* data/f* data/b* data/d*

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}

- name: Add synthetic huge data
if: steps.cache-data.outputs.cache-hit == 'true'
run: python -c "for n in range(7654321):print(f'{n:08x}')" >data/test.text

- name: Update PIP
run: python -m pip install --upgrade pip

- name: Fix onnxruntime lib for released version 1.5.5 - todo remove it after new release
run: python -m pip install onnxruntime==1.15.1

- name: Install released CredSweeper
run: |
python -m pip install credsweeper
# check the banner
credsweeper --banner
- name: Run performance benchmark RELEASE
run: |
START_TIME=$(date +%s)
/usr/bin/time --verbose credsweeper --log error --path data --save-json /dev/null
FINISH_TIME=$(date +%s)
RELEASE_TIME=$(( ${FINISH_TIME} - ${START_TIME} ))
if [ 0 -lt ${RELEASE_TIME} ]; then
echo Elapsed $(date -ud "@${RELEASE_TIME}" +"%H:%M:%S")
else
echo "Wrong result '${RELEASE_TIME}'"
exit 1
fi
echo "RELEASE_TIME=${RELEASE_TIME}" >> $GITHUB_ENV
- name: Uninstall released CredSweeper
run: |
python -m pip uninstall -y credsweeper
- name: Checkout base CredSweeper
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.base.sha }}
path: temp/CredSweeper.base

- name: Install base CredSweeper
run: |
python -m pip install temp/CredSweeper.base
# check the banner
credsweeper --banner
- name: Run performance benchmark BASE
run: |
START_TIME=$(date +%s)
/usr/bin/time --verbose credsweeper --log error --path data --save-json /dev/null
FINISH_TIME=$(date +%s)
BASE_TIME=$(( ${FINISH_TIME} - ${START_TIME} ))
if [ 0 -lt ${BASE_TIME} ]; then
echo Elapsed $(date -ud "@${BASE_TIME}" +"%H:%M:%S")
else
echo "Wrong result '${BASE_TIME}'"
exit 1
fi
echo "BASE_TIME=${BASE_TIME}" >> $GITHUB_ENV
- name: Checkout current CredSweeper
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.sha }}
path: temp/CredSweeper.head

- name: Install current CredSweeper
run: |
python -m pip install temp/CredSweeper.head
# check the banner
credsweeper --banner
- name: Run performance benchmark CURRENT
run: |
START_TIME=$(date +%s)
/usr/bin/time --verbose credsweeper --log error --path data --save-json /dev/null
FINISH_TIME=$(date +%s)
HEAD_TIME=$(( ${FINISH_TIME} - ${START_TIME} ))
if [ 0 -lt ${HEAD_TIME} ]; then
echo Elapsed $(date -ud "@${HEAD_TIME}" +"%H:%M:%S")
else
echo "Wrong result '${HEAD_TIME}'"
exit 1
fi
echo "HEAD_TIME=${HEAD_TIME}" >> $GITHUB_ENV
- name: Compare results
run: |
exit_code=0
LOW_DELTA=10
THRESHOLD=250
# RELEASE
if [ ${RELEASE_TIME} -le ${HEAD_TIME} ]; then
d=$(( 1000 * ( ${HEAD_TIME} - ${RELEASE_TIME} ) / ${RELEASE_TIME} ))
echo "RELEASE_TIME (sec) = ${RELEASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
echo "Significantly Slowdown."
exit_code=1
else
echo "Slowdown."
fi
else
d=$(( 1000 * ( ${RELEASE_TIME} - ${HEAD_TIME} ) / ${RELEASE_TIME} ))
echo "RELEASE_TIME (sec) = ${RELEASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
echo "Significantly speed-up."
else
echo "Speed-up."
fi
fi
# BASE
if [ ${BASE_TIME} -le ${HEAD_TIME} ]; then
d=$(( 1000 * ( ${HEAD_TIME} - ${BASE_TIME} ) / ${BASE_TIME} ))
echo "BASE_TIME (sec) = ${BASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
echo "Significantly Slowdown."
exit_code=1
else
echo "Slowdown."
fi
else
d=$(( 1000 * ( ${BASE_TIME} - ${HEAD_TIME} ) / ${BASE_TIME} ))
echo "BASE_TIME (sec) = ${BASE_TIME}, current (sec) = ${HEAD_TIME}. Diff (% * 10): ${d}"
if [ $LOW_DELTA -ge ${d} ]; then
echo "Almost the same."
elif [ $THRESHOLD -lt ${d} ]; then
echo "Significantly speed-up."
else
echo "Speed-up."
fi
fi
exit ${exit_code}
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

1 change: 1 addition & 0 deletions credsweeper/filters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from credsweeper.filters.value_grafana_check import ValueGrafanaCheck
from credsweeper.filters.value_ip_check import ValueIPCheck
from credsweeper.filters.value_json_web_token_check import ValueJsonWebTokenCheck
from credsweeper.filters.value_jwt_lite_check import ValueJWTLiteCheck
from credsweeper.filters.value_last_word_check import ValueLastWordCheck
from credsweeper.filters.value_length_check import ValueLengthCheck
from credsweeper.filters.value_method_check import ValueMethodCheck
Expand Down
11 changes: 1 addition & 10 deletions credsweeper/filters/value_base64_data_check.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import base64
import contextlib
import string

Expand Down Expand Up @@ -40,14 +39,6 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
return True
# check whether decoded bytes have enough entropy
with contextlib.suppress(Exception):
value_len = len(value)
if 0x3 & value_len:
# Bitbucket client id is 18 chars length
pad_len = 4 - (0x3 & value_len)
value = value + ''.join(['='] * pad_len)
if '-' in value or '_' in value:
decoded = base64.urlsafe_b64decode(value)
else:
decoded = base64.standard_b64decode(value)
decoded = Util.decode_base64(value, padding_safe=True, urlsafe_detect=True)
return Util.is_ascii_entropy_validate(decoded)
return True
6 changes: 3 additions & 3 deletions credsweeper/filters/value_grafana_check.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import base64
import contextlib
import json

from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.filters import Filter
from credsweeper.utils import Util


class ValueGrafanaCheck(Filter):
Expand All @@ -30,11 +30,11 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
with contextlib.suppress(Exception):
if line_data.value.startswith("glc_"):
# Grafana Access Policy Token
decoded = base64.b64decode(line_data.value[4:])
decoded = Util.decode_base64(line_data.value[4:], padding_safe=True, urlsafe_detect=True)
keys = ["o", "n", "k", "m"]
else:
# Grafana Provisioned API Key
decoded = base64.b64decode(line_data.value)
decoded = Util.decode_base64(line_data.value, padding_safe=True, urlsafe_detect=True)
keys = ["n", "k", "id"]
if payload := json.loads(decoded):
for key in keys:
Expand Down
12 changes: 8 additions & 4 deletions credsweeper/filters/value_json_web_token_check.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import base64
import contextlib
import json

from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.filters import Filter
from credsweeper.utils import Util


class ValueJsonWebTokenCheck(Filter):
Expand Down Expand Up @@ -33,9 +33,13 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
return True
with contextlib.suppress(Exception):
delimiter_pos = line_data.value.find(".")
# jwt token. '.' must be always in given data, according regex in rule
value = line_data.value[:delimiter_pos]
decoded = base64.b64decode(value)
# JWT token. '.' MAY be always in given data
if 0 <= delimiter_pos:
value = line_data.value[:delimiter_pos]
else:
value = line_data.value
# https://www.rfc-editor.org/rfc/rfc7515.txt - padding is optional
decoded = Util.decode_base64(value, padding_safe=True, urlsafe_detect=True)
if header := json.loads(decoded):
if "alg" in header or "typ" in header:
return False
Expand Down
45 changes: 45 additions & 0 deletions credsweeper/filters/value_jwt_lite_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import contextlib
import string

from credsweeper.common.constants import LATIN_1, ASCII
from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.filters import Filter
from credsweeper.utils import Util


class ValueJWTLiteCheck(Filter):
"""
Lite check for Json Web Token in base64 encoding.
Checks first 12 decoded bytes - only ascii symbols allowed.
It requires only 16 symbols in base64 encoding.
eyJ0eXAiOm51bGx9 -> {"typ":null}
"""

def __init__(self, config: Config = None) -> None:
self.printable = set(string.printable)

def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
"""Run filter checks on received token which might be structured.
Args:
line_data: credential candidate data
target: multiline target from which line data was obtained
Return:
True, when need to filter candidate and False if left
"""
if not line_data.value or 16 > len(line_data.value):
return True
with contextlib.suppress(Exception):
decoded = Util.decode_base64(line_data.value[0:16], urlsafe_detect=True)
for i in decoded.decode(ASCII):
# check that only printable symbols must be
if i not in self.printable:
break
else:
# no wrong symbols found - may be a JWT
return False
return True
3 changes: 1 addition & 2 deletions credsweeper/filters/value_structured_token_check.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import base64
import binascii
import contextlib

Expand Down Expand Up @@ -55,7 +54,7 @@ def check_crc32_struct(value: str) -> bool:
@staticmethod
def check_atlassian_struct(value: str) -> bool:
"""Returns False if value is valid for atlassian structure 'integer:bytes'"""
decoded = base64.b64decode(value)
decoded = Util.decode_base64(value, padding_safe=True, urlsafe_detect=True)
delimiter_pos = decoded.find(b':')
# there is limit for big integer value: math.log10(1<<64) = 19.265919722494797
if 0 < delimiter_pos <= 20:
Expand Down
13 changes: 7 additions & 6 deletions credsweeper/rules/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -265,11 +265,12 @@
min_line_len: 105

- name: JSON Web Token
severity: medium
severity: critical
type: pattern
values:
- (^|[^.0-9A-Za-z_/+-])(?P<value>eyJ[A-Za-z0-9=_-]{13,}(\.[A-Za-z0-9-_.+\/=]+)?)
filter_type: GeneralPattern
- (^|[^.0-9A-Za-z_/+-])(?P<value>eyJ[0-9A-Za-z=_/+-]{13,}[.0-9A-Za-z_/+-]*)([^=0-9A-Za-z_/+-]|$)
filter_type:
- ValueJsonWebTokenCheck
use_ml: true
required_substrings:
- eyJ
Expand Down Expand Up @@ -617,7 +618,7 @@
severity: high
type: pattern
values:
- (^|[^.0-9A-Za-z_/+-])(?P<value>eyJ[A-Za-z0-9_=-]{50,500}\.eyJ[A-Za-z0-9_=-]+\.[A-Za-z0-9_=-]+)
- (^|[^.0-9A-Za-z_/+-])(?P<value>eyJ[A-Za-z0-9_=-]{50,500}\.eyJ[A-Za-z0-9_=-]+\.[A-Za-z0-9_=-]+)([^.0-9A-Za-z_-]|$)
filter_type:
- ValueJsonWebTokenCheck
required_substrings:
Expand Down Expand Up @@ -798,7 +799,7 @@
severity: high
type: pattern
values:
- (^|[^.0-9A-Za-z_/+-])(?P<value>eyJ[a-zA-Z0-9=/-]{64,360})([^=0-9A-Za-z_/+-]|$)
- (^|[^.0-9A-Za-z_/+-])(?P<value>eyJ[0-9A-Za-z_=-]{64,360})([^=0-9A-Za-z_/+-]|$)
filter_type:
- ValueGrafanaCheck
min_line_len: 67
Expand All @@ -809,7 +810,7 @@
severity: high
type: pattern
values:
- (^|[^.0-9A-Za-z_/+-])(?P<value>glc_eyJ[a-zA-Z0-9=/-]{80,360})([^=0-9A-Za-z_/+-]|$)
- (^|[^.0-9A-Za-z_/+-])(?P<value>glc_eyJ[0-9A-Za-z_=-]{80,360})([^=0-9A-Za-z_/+-]|$)
filter_type:
- ValueGrafanaCheck
min_line_len: 87
Expand Down
Loading

0 comments on commit 68acf8c

Please sign in to comment.