Skip to content

Commit

Permalink
private keys enchanced search and validation
Browse files Browse the repository at this point in the history
  • Loading branch information
babenek committed Dec 5, 2023
1 parent 80ea705 commit 5674c5f
Show file tree
Hide file tree
Showing 14 changed files with 371 additions and 466 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ jobs:
- name: Checkout CredData
uses: actions/checkout@v3
with:
repository: Samsung/CredData
ref: ecpkey
repository: babenek/CredData

- name: Cache data
id: cache-data
Expand Down Expand Up @@ -62,7 +63,8 @@ jobs:
- name: Checkout CredData
uses: actions/checkout@v3
with:
repository: Samsung/CredData
ref: ecpkey
repository: babenek/CredData

- name: Cache data
id: cache-data
Expand Down
16 changes: 8 additions & 8 deletions cicd/benchmark.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
DATA: 19434458 valid lines. MARKUP: 74639 items
DATA: 19434719 valid lines. MARKUP: 74649 items
Category Positives Negatives Template
-------------------------- ----------- ----------- ----------
Authentication Key & Token 70 1 31
Expand All @@ -7,19 +7,19 @@ Generic Token 333 45 558
Other 1076 63510 635
Password 1405 110 4170
Predefined Pattern 326 2 40
Private Key 1001 1 3
Private Key 984 28 3
Seed, Salt, Nonce 40 4 4
TOTAL: 5307 63688 5644
Detected Credentials: 5730
credsweeper result_cnt : 5102, lost_cnt : 0, true_cnt : 4207, false_cnt : 895
TOTAL: 5290 63715 5644
Detected Credentials: 5761
credsweeper result_cnt : 5133, lost_cnt : 0, true_cnt : 4211, false_cnt : 922
Category TP FP TN FN FPR FNR ACC PRC RCL F1
-------------------------- ---- ---- -------- ---- --------- --------- -------- -------- -------- --------
Authentication Key & Token 54 4 28 16 0.125 0.228571 0.803922 0.931034 0.771429 0.84375
Generic Secret 973 3 215 83 0.0137615 0.0785985 0.932496 0.996926 0.921402 0.957677
Generic Token 289 7 596 44 0.0116086 0.132132 0.945513 0.976351 0.867868 0.918919
Other 584 747 63398 492 0.0116455 0.457249 0.981003 0.438768 0.542751 0.485251
Other 583 772 63373 493 0.0120352 0.458178 0.980604 0.430258 0.541822 0.479638
Password 995 130 4150 410 0.0303738 0.291815 0.905013 0.884444 0.708185 0.786561
Predefined Pattern 309 2 40 17 0.0476191 0.0521472 0.94837 0.993569 0.947853 0.970173
Private Key 967 0 4 34 0.033966 0.966169 1 0.966034 0.982724
Private Key 972 2 29 12 0.0645161 0.0121951 0.986207 0.997947 0.987805 0.99285
Seed, Salt, Nonce 36 2 6 4 0.25 0.1 0.875 0.947368 0.9 0.923077
4207 895 19428256 1100 4.606e-05 0.207273 0.999897 0.824579 0.792727 0.808339
4211 922 19428507 1079 4.745e-05 0.20397 0.999897 0.820378 0.79603 0.808021
1 change: 0 additions & 1 deletion credsweeper/filters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
from credsweeper.filters.value_number_check import ValueNumberCheck
from credsweeper.filters.value_pattern_check import ValuePatternCheck
from credsweeper.filters.value_pattern_length_check import ValuePatternLengthCheck
from credsweeper.filters.value_pem_pattern_check import ValuePemPatternCheck
from credsweeper.filters.value_similarity_check import ValueSimilarityCheck
from credsweeper.filters.value_split_keyword_check import ValueSplitKeywordCheck
from credsweeper.filters.value_string_type_check import ValueStringTypeCheck
Expand Down
18 changes: 0 additions & 18 deletions credsweeper/filters/value_pem_pattern_check.py

This file was deleted.

2 changes: 1 addition & 1 deletion credsweeper/rules/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@
severity: high
type: pem_key
values:
- (?P<value>-----BEGIN\s(?!ENCRYPTED|EC)[^-]*PRIVATE[^-]*KEY[^-]*-----(.+-----END[^-]+-----)?)
- (?P<value>-----BEGIN\s(?!ENCRYPTED)[^-]*PRIVATE[^-]*KEY[^-]*-----(.+-----END[^-]+-----)?)
min_line_len: 27

- name: Picatic API Key
Expand Down
67 changes: 41 additions & 26 deletions credsweeper/scanner/scan_type/pem_key_pattern.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import contextlib
import logging
import re
import string
from typing import Optional, List
from typing import List

from credsweeper.common.constants import Chars, PEM_BEGIN_PATTERN, PEM_END_PATTERN, RuleType
from credsweeper.common.constants import PEM_BEGIN_PATTERN, PEM_END_PATTERN, RuleType, Chars
from credsweeper.config import Config
from credsweeper.credentials import Candidate, LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.filters import ValuePatternCheck, ValuePemPatternCheck
from credsweeper.rules import Rule
from credsweeper.scanner.scan_type import ScanType
from credsweeper.utils import Util
from credsweeper.utils.entropy_validator import EntropyValidator

logger = logging.getLogger(__name__)
Expand All @@ -24,12 +25,11 @@ class PemKeyPattern(ScanType):
remove_characters: This characters would be striped from PEM lines before entropy check
"""
base64set = set(string.ascii_uppercase) | set(string.ascii_lowercase) | set(string.digits) | {'+', '/', '='}

ignore_starts = [PEM_BEGIN_PATTERN, "Proc-Type", "Version", "DEK-Info"]
wrap_characters = "\\'\";,[]#*"
wrap_characters = "\\'\";,[]#*!"
remove_characters = string.whitespace + wrap_characters
remove_characters_plus = remove_characters + '+'
pem_pattern_check: Optional[ValuePatternCheck] = None
# last line contains 4 symbols, at least
re_value_pem = re.compile(r"(?P<value>([^-]*" + PEM_END_PATTERN +
r"[^-]+-----)|(([a-zA-Z0-9/+=]{64}.*)?[a-zA-Z0-9/+=]{4})+)")
Expand All @@ -50,8 +50,6 @@ def run(cls, config: Config, rule: Rule, target: AnalysisTarget) -> List[Candida
"""
assert rule.rule_type == RuleType.PEM_KEY, \
"Rules provided to PemKeyPattern.run should have pattern_type equal to PEM_KEY_PATTERN"
if not cls.pem_pattern_check:
cls.pem_pattern_check = ValuePemPatternCheck(config)
if candidates := cls._get_candidates(config, rule, target):
candidate = candidates[0]
if pem_lines := cls.detect_pem_key(config, rule, target):
Expand Down Expand Up @@ -83,34 +81,43 @@ def detect_pem_key(cls, config: Config, rule: Rule, target: AnalysisTarget) -> L
# protection check for case when first line starts from 0
start_pos = target.line_pos if 0 <= target.line_pos else 0
finish_pos = min(start_pos + 200, target.lines_len)
begin_pattern_not_passed = True
for line_pos in range(start_pos, finish_pos):
line = target.lines[line_pos]
if target.line_pos != line_pos:
_line = LineData(config, line, line_pos, target.line_nums[line_pos], target.file_path, target.file_type,
target.info, cls.re_value_pem)
line_data.append(_line)
# replace escaped line ends with real and process them - PEM does not contain '\' sign
while "\\\\" in line:
line = line.replace("\\\\", "\\")
sublines = line.replace("\\r", '\n').replace("\\n", '\n').splitlines()
for subline in sublines:
if cls.is_leading_config_line(subline):
if begin_pattern_not_passed or cls.is_leading_config_line(subline):
if PEM_BEGIN_PATTERN in subline:
begin_pattern_not_passed = False
continue
elif PEM_END_PATTERN in subline:
# Check if entropy is high enough for base64 set with padding sign
entropy_validator = EntropyValidator(key_data, Chars.BASE64_CHARS)
if not entropy_validator.valid:
if "OPENSSH" in target.line_strip or "PGP" in target.line_strip:
# Check if entropy is high enough for base64 set with padding sign
entropy_validator = EntropyValidator(key_data, Chars.BASE64_CHARS)
if entropy_validator.valid:
return line_data
logger.debug("Filtered with entropy %f '%s'", entropy_validator.entropy, key_data)
return []
# OPENSSH format has multiple AAAAA pattern
if "OPENSSH" not in target.line_strip and cls.pem_pattern_check.equal_pattern_check(key_data):
logger.debug("Filtered with ValuePemPatternCheck %s", target)
return []
# all OK - return line data with all lines which include PEM
return line_data
else:
with contextlib.suppress(Exception):
decoded = Util.decode_base64(key_data, urlsafe_detect=True)
if Util.is_asn1(decoded):
# all OK - the key is not encrypted in this top level
return line_data
logger.debug("Filtered with non asn1 '%s'", key_data)
return []
else:
sanitized_line = cls.sanitize_line(subline)
# PEM key line should not contain spaces or . (and especially not ...)
if ' ' in sanitized_line or "..." in sanitized_line:
return []
for i in sanitized_line:
if i not in cls.base64set:
return []
key_data += sanitized_line
return []

Expand Down Expand Up @@ -146,11 +153,19 @@ def sanitize_line(cls, line: str, recurse_level: int = 5) -> str:
line = line[2:]
if line.endswith("*/"):
line = line[:-2]
if '"' in line or "'" in line:
# remove concatenation only when quotes present
line = line.strip(cls.remove_characters_plus)
else:
line = line.strip(cls.remove_characters)
if line.endswith("\\"):
# line carry in many languages
line = line[:-1]

# remove concatenation carefully only when it is not part of base64
if line.startswith('+'):
if line[1] not in cls.base64set:
line = line[1:]
if line.endswith('+'):
if line[-2] not in cls.base64set:
line = line[:-1]

line = line.strip(cls.remove_characters)
# check whether new iteration requires
for x in string.whitespace:
if line.startswith(x) or line.endswith(x):
Expand Down
8 changes: 0 additions & 8 deletions docs/source/credsweeper.filters.rst
Original file line number Diff line number Diff line change
Expand Up @@ -236,14 +236,6 @@ credsweeper.filters.value\_pattern\_length\_check module
:undoc-members:
:show-inheritance:

credsweeper.filters.value\_pem\_pattern\_check module
-----------------------------------------------------

.. automodule:: credsweeper.filters.value_pem_pattern_check
:members:
:undoc-members:
:show-inheritance:

credsweeper.filters.value\_similarity\_check module
---------------------------------------------------

Expand Down
4 changes: 2 additions & 2 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

# credentials count after scan
SAMPLES_CRED_COUNT: int = 379
SAMPLES_CRED_LINE_COUNT: int = 391
SAMPLES_CRED_LINE_COUNT: int = 393

# credentials count after post-processing
SAMPLES_POST_CRED_COUNT: int = 298
Expand All @@ -16,7 +16,7 @@
# archived credentials that are not found without --depth
SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 23
SAMPLES_IN_DEEP_2 = SAMPLES_IN_DEEP_1 + 16
SAMPLES_IN_DEEP_3 = SAMPLES_IN_DEEP_2 + 3
SAMPLES_IN_DEEP_3 = SAMPLES_IN_DEEP_2 + 2

# well known string with all latin letters
AZ_DATA = b"The quick brown fox jumps over the lazy dog"
Expand Down
Loading

0 comments on commit 5674c5f

Please sign in to comment.