Skip to content

Commit

Permalink
ml info
Browse files Browse the repository at this point in the history
  • Loading branch information
babenek committed Jan 13, 2025
1 parent 2989803 commit 7a019da
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 23 deletions.
7 changes: 3 additions & 4 deletions credsweeper/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# Directory of credsweeper sources MUST be placed before imports to avoid circular import error
APP_PATH = Path(__file__).resolve().parent

from credsweeper.common.constants import KeyValidationOption, Severity, ThresholdPreset, DiffRowType
from credsweeper.common.constants import Severity, ThresholdPreset, DiffRowType
from credsweeper.config import Config
from credsweeper.credentials import Candidate, CredentialManager, CandidateKey
from credsweeper.deep_scanner.deep_scanner import DeepScanner
Expand Down Expand Up @@ -368,11 +368,9 @@ def post_processing(self) -> None:
for candidate in group_candidates:
if candidate.use_ml:
if is_cred[i]:
candidate.ml_validation = KeyValidationOption.VALIDATED_KEY
candidate.ml_probability = probability[i]
new_cred_list.append(candidate)
else:
candidate.ml_validation = KeyValidationOption.NOT_AVAILABLE
new_cred_list.append(candidate)
else:
logger.info("Skipping ML validation due not applicable")
Expand Down Expand Up @@ -435,7 +433,8 @@ def export_results(self, change_type: Optional[DiffRowType] = None) -> None:
for line_data in credential.line_data_list:
# bright rule name and path or info
print(Style.BRIGHT + credential.rule_name +
f" {line_data.info or line_data.path}:{line_data.line_num}" + Style.RESET_ALL)
f" {line_data.info or line_data.path}:{line_data.line_num} {credential.ml_info}" +
Style.RESET_ALL)
print(line_data.get_colored_line(hashed=self.hashed, subtext=self.subtext))

if is_exported is False:
Expand Down
8 changes: 0 additions & 8 deletions credsweeper/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,14 +97,6 @@ class Chars(Enum):
ENTROPY_LIMIT_BASE3x = 3


class KeyValidationOption(Enum):
"""API validation state"""
INVALID_KEY = 0
VALIDATED_KEY = 1
UNDECIDED = 2
NOT_AVAILABLE = 3


class GroupType(Enum):
"""Group type - used in Group constructor for load predefined set of filters"""
KEYWORD = "keyword"
Expand Down
34 changes: 29 additions & 5 deletions credsweeper/credentials/candidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from json.encoder import py_encode_basestring_ascii
from typing import Any, Dict, List, Optional

from credsweeper.common.constants import KeyValidationOption, Severity, Confidence
from credsweeper.common.constants import Severity, Confidence
from credsweeper.config import Config
from credsweeper.credentials.line_data import LineData

Expand Down Expand Up @@ -39,7 +39,6 @@ def __init__(self,
self.config = config
self.use_ml = use_ml
self.confidence = confidence
self.ml_validation = KeyValidationOption.NOT_AVAILABLE
self.ml_probability: Optional[float] = None

def compare(self, other: 'Candidate') -> bool:
Expand All @@ -48,7 +47,6 @@ def compare(self, other: 'Candidate') -> bool:
and self.severity == other.severity \
and self.confidence == other.confidence \
and self.use_ml == other.use_ml \
and self.ml_validation == other.ml_validation \
and self.ml_probability == other.ml_probability \
and len(self.line_data_list) == len(other.line_data_list):
for i, j in zip(self.line_data_list, other.line_data_list):
Expand Down Expand Up @@ -79,7 +77,7 @@ def to_str(self, subtext: bool = False, hashed: bool = False) -> str:
f" | severity: {self.severity.value}" \
f" | confidence: {self.confidence.value}" \
f" | line_data_list: [{', '.join([x.to_str(subtext, hashed) for x in self.line_data_list])}]" \
f" | ml_validation: {self.ml_validation.name}"
f" | ml_validation: {self.ml_validation}"

def __str__(self):
return self.to_str()
Expand All @@ -95,7 +93,7 @@ def to_json(self, hashed: bool, subtext: bool) -> Dict:
"""
full_output = {
"ml_validation": self.ml_validation.name,
"ml_validation": self.ml_validation,
"patterns": [pattern.pattern for pattern in self.patterns],
"ml_probability": self.ml_probability,
"rule": self.rule_name,
Expand Down Expand Up @@ -139,3 +137,29 @@ def get_dummy_candidate(cls, config: Config, file_path: str, file_type: str, inf
severity=Severity.INFO, #
config=config, #
confidence=Confidence.MODERATE)

@property
def ml_validation(self) -> str:
"""Temporally replaced self.ml_validation"""
if not self.use_ml:
return "NOT_AVAILABLE"
elif isinstance(self.ml_probability, float):
return "VALIDATED_KEY"
elif self.ml_probability is None:
return "UNDECIDED"
else:
return "INVALID_KEY"

@property
def ml_info(self) -> str:
"""Used to generate short info about ML of the candidate
Returns:
NA - Not applicable ML for the credential type
None - ML was not calculated
float - the probability
"""
if not self.use_ml:
return "NA"
else:
return str(self.ml_probability)
14 changes: 8 additions & 6 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def setUp(self):
@staticmethod
def _m_credsweeper(args) -> Tuple[str, str]:
with subprocess.Popen(
[sys.executable, "-m", "credsweeper", *args], #
[sys.executable, "-m", "credsweeper", *args], #
cwd=APP_PATH.parent, #
stdout=subprocess.PIPE, #
stderr=subprocess.PIPE) as proc:
Expand Down Expand Up @@ -179,14 +179,16 @@ def test_it_works_with_multiline_in_patch_p(self) -> None:
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

def test_it_works_with_patch_color_p(self) -> None:
target_path = str(SAMPLES_PATH / "password.patch")
target_path = str(SAMPLES_PATH / "uuid-update.patch")
_stdout, _stderr = self._m_credsweeper(["--diff_path", target_path, "--log", "silence", "--color"])
output = " ".join(_stdout.split()[:-1])
expected = """
\x1b[1mPassword .changes/1.16.98.json:added:3\x1b[0m
"\x1b[94mpassword\x1b[0m"\x1b[92m:\x1b[0m "\x1b[93mdkajco1\x1b[0m"
Added File Credentials: 1 Deleted File Credentials: 0 Time Elapsed:
"""
\x1b[1mUUID uuid:added:1 NA\x1b[0m
\x1b[93mbace4d19-fa7e-dead-beef-9129474bcd81\x1b[0m
\x1b[1mUUID uuid:deleted:1 NA\x1b[0m
\x1b[93mbace4d19-fa7e-beef-cafe-9129474bcd81\x1b[0m
Added File Credentials: 1 Deleted File Credentials: 1 Time Elapsed:
"""
expected = " ".join(expected.split())
self.assertEqual(expected, output)

Expand Down

0 comments on commit 7a019da

Please sign in to comment.