diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b8b9104f..15ffa727 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -30,14 +30,14 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@master + - uses: actions/checkout@v3 - name: Create PostgreSQL database run: | PGPASSWORD=${{ secrets.POSTGRES_PASSWORD }} psql -U ${{ secrets.POSTGRES_USER }} -h 127.0.0.1 -p 5432 -d credential_digger_tests -f sql/create_table.sql - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} @@ -46,7 +46,7 @@ jobs: sudo apt install -y build-essential python3-dev libhyperscan-dev - name: Cache python dependencies - uses: actions/cache@v1 + uses: actions/cache@v3 with: path: ~/.cache/pip # This path is specific to Ubuntu key: ${{ runner.os }}-pip-${{ hashFiles('./requirements.txt') }}-${{ hashFiles('./tests/tests-requirements.txt') }} diff --git a/credentialdigger/cli/get_discoveries.py b/credentialdigger/cli/get_discoveries.py index 5db9c618..8f5c864a 100644 --- a/credentialdigger/cli/get_discoveries.py +++ b/credentialdigger/cli/get_discoveries.py @@ -225,6 +225,7 @@ def run(client, args): args: `argparse.Namespace` Arguments from command line parser. """ + discoveries = [] try: discoveries = client.get_discoveries( repo_url=args.repo_url, file_name=args.filename, with_rules=args.with_rules) diff --git a/requirements.txt b/requirements.txt index 24c52022..b7384472 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,11 +11,11 @@ python-dotenv pyyaml rich~=12.2 srsly>=2.4.0 -tensorflow==2.9.3; python_version >= "3.8" +tensorflow==2.11.1; python_version >= "3.8" tensorflow~=2.4; python_version < "3.8" -tensorflow-estimator==2.9.0; python_version >= "3.8" +tensorflow-estimator==2.11.0; python_version >= "3.8" tensorflow-estimator~=2.4; python_version < "3.8" -tensorflow-text==2.9.0; python_version >= "3.8" +tensorflow-text==2.11.0; python_version >= "3.8" tensorflow-text~=2.4; python_version < "3.8" tf-models-official transformers diff --git a/setup.py b/setup.py index 224f8a32..b54bc92c 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ def requirements(): setuptools.setup( name='credentialdigger', - version='4.10.0', + version='4.11.0', author='SAP SE', maintainer='Marco Rosa, Slim Trabelsi', maintainer_email='marco.rosa@sap.com, slim.trabelsi@sap.com', diff --git a/tests/functional_tests/test_get_discoveries_postgres.py b/tests/functional_tests/test_get_discoveries_postgres.py index 150dddae..98dd3a74 100644 --- a/tests/functional_tests/test_get_discoveries_postgres.py +++ b/tests/functional_tests/test_get_discoveries_postgres.py @@ -58,7 +58,10 @@ def tearDownClass(cls): """ Remove the repo and all its discoveries. """ cls.client.delete_repo(REPO_URL) cls.client.delete_discoveries(REPO_URL) - os.remove(cls.csv_path) + try: + os.remove(cls.csv_path) + except OSError as ex: + print(f'Failed to cleanup {cls.csv_path}, error={ex}') @parameterized.expand([ param(state='new', count=5), @@ -142,5 +145,33 @@ def test_csv_written(self): data_frame = pd.read_csv(self.csv_path) try: assert data_frame.notna().values.all() + self.assertEqual(len(data_frame.columns), 9) + self.assertFalse('rule_regex' in data_frame.columns) + self.assertFalse('rule_category' in data_frame.columns) + self.assertFalse('rule_description' in data_frame.columns) except AssertionError: assert False, 'CSV file contains NaN' + + def test_csv_written_with_rules(self): + """ Test if the CLI command writes correctly the CSV file with the rule details. """ + with self.assertRaises(SystemExit) as cm: + cli.main( + [ + '', + 'get_discoveries', + REPO_URL, + '--save', + self.csv_path, + '--dotenv', + self.dotenv, + '--with_rules', + ] + ) + data_frame = pd.read_csv(self.csv_path) + try: + self.assertEqual(len(data_frame.columns), 12) + self.assertTrue('rule_regex' in data_frame.columns) + self.assertTrue('rule_category' in data_frame.columns) + self.assertTrue('rule_description' in data_frame.columns) + except AssertionError: + assert False, 'CSV file does not contain the rule details' diff --git a/tests/functional_tests/test_get_discoveries_sqlite.py b/tests/functional_tests/test_get_discoveries_sqlite.py index b64e21a8..2470506f 100644 --- a/tests/functional_tests/test_get_discoveries_sqlite.py +++ b/tests/functional_tests/test_get_discoveries_sqlite.py @@ -132,5 +132,33 @@ def test_csv_written(self): data_frame = pd.read_csv(self.csv_path) try: assert data_frame.notna().values.all() + self.assertEqual(len(data_frame.columns), 9) + self.assertFalse('rule_regex' in data_frame.columns) + self.assertFalse('rule_category' in data_frame.columns) + self.assertFalse('rule_description' in data_frame.columns) except AssertionError: assert False, 'CSV file contains NaN' + + def test_csv_written_with_rules(self): + """ Test if the CLI command writes correctly the CSV file with the rule details. """ + with self.assertRaises(SystemExit): + cli.main( + [ + '', + 'get_discoveries', + 'test_repo', + '--sqlite', + self.db_path, + '--save', + self.csv_path, + '--with_rules', + ] + ) + data_frame = pd.read_csv(self.csv_path) + try: + self.assertEqual(len(data_frame.columns), 12) + self.assertTrue('rule_regex' in data_frame.columns) + self.assertTrue('rule_category' in data_frame.columns) + self.assertTrue('rule_description' in data_frame.columns) + except AssertionError: + assert False, 'CSV file does not contain the rule details' diff --git a/ui/backend/client_ui.py b/ui/backend/client_ui.py index 9bd116ff..3d7207ae 100644 --- a/ui/backend/client_ui.py +++ b/ui/backend/client_ui.py @@ -8,6 +8,7 @@ from credentialdigger import Client from git import GitCommandError, InvalidGitRepositoryError, NoSuchPathError from git import Repo as GitRepo +from credentialdigger.client import DiscoveryWithRule FilesSummary = namedtuple( 'FilesSummary', @@ -223,3 +224,35 @@ def _check_repo_commit(self, repo_url, commit_id, local_repo=False): return False, 'WrongBranchError' return True, None + + def get_discoveries_with_rules(self, query, repo_url, file_name=None): + """ Get all the discoveries of a repository with rule details. + + Parameters + ---------- + query: str + The query to be run, with placeholders in place of parameters + repo_url: str + The url of the repository + file_name: str, optional + The name of the file to filter discoveries on + + Returns + ------- + list + A list of discoveries (dictionaries) + + Raises + ------ + TypeError + If any of the required arguments is missing + """ + cursor = self.db.cursor() + all_discoveries = [] + params = (repo_url,) if not file_name else (repo_url, file_name) + cursor.execute(query, params) + result = cursor.fetchone() + while result: + all_discoveries.append(dict(DiscoveryWithRule(*result)._asdict())) + result = cursor.fetchone() + return all_discoveries diff --git a/ui/backend/client_ui_postgres.py b/ui/backend/client_ui_postgres.py index 309f0848..30d803d7 100644 --- a/ui/backend/client_ui_postgres.py +++ b/ui/backend/client_ui_postgres.py @@ -167,3 +167,32 @@ def get_files_summary(self, repo_url): " FROM discoveries WHERE repo_url=%s" " GROUP BY file_name" )) + + def get_discoveries_with_rules(self, repo_url, file_name=None): + """ Get all the discoveries of a repository with rule details. + + Parameters + ---------- + repo_url: str + The url of the repository + file_name: str, optional + The filename to filter discoveries on + + Returns + ------- + list + A list of discoveries (dictionaries) + """ + query = ''' + SELECT discoveries.*, r.regex as rule_regex, r.category as rule_category, r.description as rule_description + FROM discoveries + LEFT JOIN rules r + ON rule_id=r.id + WHERE repo_url=%s + ''' + if file_name: + query += ' AND file_name=%s' + return super().get_discoveries_with_rules( + repo_url=repo_url, + file_name=file_name, + query=query) diff --git a/ui/backend/client_ui_sqlite.py b/ui/backend/client_ui_sqlite.py index 9c02c34b..ee4a79e6 100644 --- a/ui/backend/client_ui_sqlite.py +++ b/ui/backend/client_ui_sqlite.py @@ -170,3 +170,32 @@ def get_files_summary(self, repo_url): " FROM discoveries WHERE repo_url=?" " GROUP BY file_name" )) + + def get_discoveries_with_rules(self, repo_url, file_name=None): + """ Get all the discoveries of a repository with rule details. + + Parameters + ---------- + repo_url: str + The url of the repository + file_name: str, optional + The filename to filter discoveries on + + Returns + ------- + list + A list of discoveries (dictionaries) + """ + query = ''' + SELECT discoveries.*, r.regex as rule_regex, r.category as rule_category, r.description as rule_description + FROM discoveries + LEFT JOIN rules r + ON rule_id=r.id + WHERE repo_url=? + ''' + if file_name: + query += ' AND file_name=?' + return super().get_discoveries_with_rules( + repo_url=repo_url, + file_name=file_name, + query=query) diff --git a/ui/server.py b/ui/server.py index 4405395f..6be45f86 100755 --- a/ui/server.py +++ b/ui/server.py @@ -538,6 +538,66 @@ def update_similar_discoveries(): return 'OK', 200 +@app.route('/scan_file', methods=['POST']) +def scan_file(): + """ Scan a file. """ + # Get scan properties + rules_to_use = request.form.get('rule_to_use') + use_password_model = request.form.get('passwordModel') + use_path_model = request.form.get('pathModel') + force_scan = request.form.get('forceScan') == 'force' + file = request.files['filename'] + filename = secure_filename(file.filename) + # Save file + # TODO: perform malware scan on the file + try: + file_path = os.path.abspath(os.path.join( + app.config['UPLOAD_FOLDER'], 'uploads', filename)) + file.save(file_path) + app.logger.debug(f'File saved to {file_path}') + except Exception as ex: + app.logger.error( + f'Error occured when saving file={filename}, file path={file_path}, error={ex}') + return 'Error in saving file', 500 + + # Set up models + models = [] + if use_path_model == 'path': + models.append('PathModel') + if use_password_model == 'password': + models.append('PasswordModel') + + # Setup scan arguments + if rules_to_use != 'all': + app.logger.debug(f'Use rules only from {rules_to_use} category') + else: + rules_to_use = None + + # Scan + try: + discoveries = c.scan_path(scan_path=file_path, models=models, force=force_scan, + similarity=False, max_depth=-1, ignore_list=[], category=rules_to_use) + except OSError as ex: + app.logger.error( + f'Error occured when scanning file={filename}, file path={file_path}, error={ex}') + os.remove(file_path) + return f'Error in scanning file {filename}', 500 + + # Get discoveries + discoveries_with_rules = [] + if len(discoveries): + try: + discoveries_with_rules = c.get_discoveries_with_rules( + repo_url=file_path) + except OSError as ex: + app.logger.error( + f'Error occured when getting discoveries of file={filename}, file path={file_path}, error={ex}') + return f'Error in getting discoveries of file {filename}', 500 + finally: + os.remove(file_path) + return jsonify(discoveries_with_rules) + + jwt = JWTManager(app) if __name__ == '__main__': app.run(host='0.0.0.0', port=5000)