Skip to content

Commit

Permalink
Merge pull request #265 from SAP/develop
Browse files Browse the repository at this point in the history
upgrade to v4.11
  • Loading branch information
marcorosa authored Jun 7, 2023
2 parents 71f36c1 + 3bb3e85 commit fed53ef
Show file tree
Hide file tree
Showing 10 changed files with 219 additions and 8 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ jobs:
--health-retries 5
steps:
- uses: actions/checkout@master
- uses: actions/checkout@v3

- name: Create PostgreSQL database
run: |
PGPASSWORD=${{ secrets.POSTGRES_PASSWORD }} psql -U ${{ secrets.POSTGRES_USER }} -h 127.0.0.1 -p 5432 -d credential_digger_tests -f sql/create_table.sql
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

Expand All @@ -46,7 +46,7 @@ jobs:
sudo apt install -y build-essential python3-dev libhyperscan-dev
- name: Cache python dependencies
uses: actions/cache@v1
uses: actions/cache@v3
with:
path: ~/.cache/pip # This path is specific to Ubuntu
key: ${{ runner.os }}-pip-${{ hashFiles('./requirements.txt') }}-${{ hashFiles('./tests/tests-requirements.txt') }}
Expand Down
1 change: 1 addition & 0 deletions credentialdigger/cli/get_discoveries.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ def run(client, args):
args: `argparse.Namespace`
Arguments from command line parser.
"""
discoveries = []
try:
discoveries = client.get_discoveries(
repo_url=args.repo_url, file_name=args.filename, with_rules=args.with_rules)
Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ python-dotenv
pyyaml
rich~=12.2
srsly>=2.4.0
tensorflow==2.9.3; python_version >= "3.8"
tensorflow==2.11.1; python_version >= "3.8"
tensorflow~=2.4; python_version < "3.8"
tensorflow-estimator==2.9.0; python_version >= "3.8"
tensorflow-estimator==2.11.0; python_version >= "3.8"
tensorflow-estimator~=2.4; python_version < "3.8"
tensorflow-text==2.9.0; python_version >= "3.8"
tensorflow-text==2.11.0; python_version >= "3.8"
tensorflow-text~=2.4; python_version < "3.8"
tf-models-official
transformers
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def requirements():

setuptools.setup(
name='credentialdigger',
version='4.10.0',
version='4.11.0',
author='SAP SE',
maintainer='Marco Rosa, Slim Trabelsi',
maintainer_email='[email protected], [email protected]',
Expand Down
33 changes: 32 additions & 1 deletion tests/functional_tests/test_get_discoveries_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@ def tearDownClass(cls):
""" Remove the repo and all its discoveries. """
cls.client.delete_repo(REPO_URL)
cls.client.delete_discoveries(REPO_URL)
os.remove(cls.csv_path)
try:
os.remove(cls.csv_path)
except OSError as ex:
print(f'Failed to cleanup {cls.csv_path}, error={ex}')

@parameterized.expand([
param(state='new', count=5),
Expand Down Expand Up @@ -142,5 +145,33 @@ def test_csv_written(self):
data_frame = pd.read_csv(self.csv_path)
try:
assert data_frame.notna().values.all()
self.assertEqual(len(data_frame.columns), 9)
self.assertFalse('rule_regex' in data_frame.columns)
self.assertFalse('rule_category' in data_frame.columns)
self.assertFalse('rule_description' in data_frame.columns)
except AssertionError:
assert False, 'CSV file contains NaN'

def test_csv_written_with_rules(self):
""" Test if the CLI command writes correctly the CSV file with the rule details. """
with self.assertRaises(SystemExit) as cm:
cli.main(
[
'',
'get_discoveries',
REPO_URL,
'--save',
self.csv_path,
'--dotenv',
self.dotenv,
'--with_rules',
]
)
data_frame = pd.read_csv(self.csv_path)
try:
self.assertEqual(len(data_frame.columns), 12)
self.assertTrue('rule_regex' in data_frame.columns)
self.assertTrue('rule_category' in data_frame.columns)
self.assertTrue('rule_description' in data_frame.columns)
except AssertionError:
assert False, 'CSV file does not contain the rule details'
28 changes: 28 additions & 0 deletions tests/functional_tests/test_get_discoveries_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,5 +132,33 @@ def test_csv_written(self):
data_frame = pd.read_csv(self.csv_path)
try:
assert data_frame.notna().values.all()
self.assertEqual(len(data_frame.columns), 9)
self.assertFalse('rule_regex' in data_frame.columns)
self.assertFalse('rule_category' in data_frame.columns)
self.assertFalse('rule_description' in data_frame.columns)
except AssertionError:
assert False, 'CSV file contains NaN'

def test_csv_written_with_rules(self):
""" Test if the CLI command writes correctly the CSV file with the rule details. """
with self.assertRaises(SystemExit):
cli.main(
[
'',
'get_discoveries',
'test_repo',
'--sqlite',
self.db_path,
'--save',
self.csv_path,
'--with_rules',
]
)
data_frame = pd.read_csv(self.csv_path)
try:
self.assertEqual(len(data_frame.columns), 12)
self.assertTrue('rule_regex' in data_frame.columns)
self.assertTrue('rule_category' in data_frame.columns)
self.assertTrue('rule_description' in data_frame.columns)
except AssertionError:
assert False, 'CSV file does not contain the rule details'
33 changes: 33 additions & 0 deletions ui/backend/client_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from credentialdigger import Client
from git import GitCommandError, InvalidGitRepositoryError, NoSuchPathError
from git import Repo as GitRepo
from credentialdigger.client import DiscoveryWithRule

FilesSummary = namedtuple(
'FilesSummary',
Expand Down Expand Up @@ -223,3 +224,35 @@ def _check_repo_commit(self, repo_url, commit_id, local_repo=False):
return False, 'WrongBranchError'

return True, None

def get_discoveries_with_rules(self, query, repo_url, file_name=None):
""" Get all the discoveries of a repository with rule details.
Parameters
----------
query: str
The query to be run, with placeholders in place of parameters
repo_url: str
The url of the repository
file_name: str, optional
The name of the file to filter discoveries on
Returns
-------
list
A list of discoveries (dictionaries)
Raises
------
TypeError
If any of the required arguments is missing
"""
cursor = self.db.cursor()
all_discoveries = []
params = (repo_url,) if not file_name else (repo_url, file_name)
cursor.execute(query, params)
result = cursor.fetchone()
while result:
all_discoveries.append(dict(DiscoveryWithRule(*result)._asdict()))
result = cursor.fetchone()
return all_discoveries
29 changes: 29 additions & 0 deletions ui/backend/client_ui_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,32 @@ def get_files_summary(self, repo_url):
" FROM discoveries WHERE repo_url=%s"
" GROUP BY file_name"
))

def get_discoveries_with_rules(self, repo_url, file_name=None):
""" Get all the discoveries of a repository with rule details.
Parameters
----------
repo_url: str
The url of the repository
file_name: str, optional
The filename to filter discoveries on
Returns
-------
list
A list of discoveries (dictionaries)
"""
query = '''
SELECT discoveries.*, r.regex as rule_regex, r.category as rule_category, r.description as rule_description
FROM discoveries
LEFT JOIN rules r
ON rule_id=r.id
WHERE repo_url=%s
'''
if file_name:
query += ' AND file_name=%s'
return super().get_discoveries_with_rules(
repo_url=repo_url,
file_name=file_name,
query=query)
29 changes: 29 additions & 0 deletions ui/backend/client_ui_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,32 @@ def get_files_summary(self, repo_url):
" FROM discoveries WHERE repo_url=?"
" GROUP BY file_name"
))

def get_discoveries_with_rules(self, repo_url, file_name=None):
""" Get all the discoveries of a repository with rule details.
Parameters
----------
repo_url: str
The url of the repository
file_name: str, optional
The filename to filter discoveries on
Returns
-------
list
A list of discoveries (dictionaries)
"""
query = '''
SELECT discoveries.*, r.regex as rule_regex, r.category as rule_category, r.description as rule_description
FROM discoveries
LEFT JOIN rules r
ON rule_id=r.id
WHERE repo_url=?
'''
if file_name:
query += ' AND file_name=?'
return super().get_discoveries_with_rules(
repo_url=repo_url,
file_name=file_name,
query=query)
60 changes: 60 additions & 0 deletions ui/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,66 @@ def update_similar_discoveries():
return 'OK', 200


@app.route('/scan_file', methods=['POST'])
def scan_file():
""" Scan a file. """
# Get scan properties
rules_to_use = request.form.get('rule_to_use')
use_password_model = request.form.get('passwordModel')
use_path_model = request.form.get('pathModel')
force_scan = request.form.get('forceScan') == 'force'
file = request.files['filename']
filename = secure_filename(file.filename)
# Save file
# TODO: perform malware scan on the file
try:
file_path = os.path.abspath(os.path.join(
app.config['UPLOAD_FOLDER'], 'uploads', filename))
file.save(file_path)
app.logger.debug(f'File saved to {file_path}')
except Exception as ex:
app.logger.error(
f'Error occured when saving file={filename}, file path={file_path}, error={ex}')
return 'Error in saving file', 500

# Set up models
models = []
if use_path_model == 'path':
models.append('PathModel')
if use_password_model == 'password':
models.append('PasswordModel')

# Setup scan arguments
if rules_to_use != 'all':
app.logger.debug(f'Use rules only from {rules_to_use} category')
else:
rules_to_use = None

# Scan
try:
discoveries = c.scan_path(scan_path=file_path, models=models, force=force_scan,
similarity=False, max_depth=-1, ignore_list=[], category=rules_to_use)
except OSError as ex:
app.logger.error(
f'Error occured when scanning file={filename}, file path={file_path}, error={ex}')
os.remove(file_path)
return f'Error in scanning file {filename}', 500

# Get discoveries
discoveries_with_rules = []
if len(discoveries):
try:
discoveries_with_rules = c.get_discoveries_with_rules(
repo_url=file_path)
except OSError as ex:
app.logger.error(
f'Error occured when getting discoveries of file={filename}, file path={file_path}, error={ex}')
return f'Error in getting discoveries of file {filename}', 500
finally:
os.remove(file_path)
return jsonify(discoveries_with_rules)


jwt = JWTManager(app)
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)

0 comments on commit fed53ef

Please sign in to comment.