diff --git a/.gitignore b/.gitignore index b40b8713..ba477336 100644 --- a/.gitignore +++ b/.gitignore @@ -118,3 +118,7 @@ config.py # IDE .vscode workbench.py + +# DBs +data.db +data.db.* diff --git a/README.md b/README.md index 61070c4d..235e9c55 100644 --- a/README.md +++ b/README.md @@ -133,6 +133,8 @@ sudo docker-compose up --build The UI is available at [http://localhost:5000/](http://localhost:5000/) +> It is preferrable to have at least 8 GB of RAM free when using docker containers + ## Advanced Installation diff --git a/credentialdigger/client.py b/credentialdigger/client.py index f9ed2a16..e107b282 100644 --- a/credentialdigger/client.py +++ b/credentialdigger/client.py @@ -768,7 +768,9 @@ def scan(self, repo_url, category=None, models=None, force=False, If True, get the repository from a local directory instead of the web git_username: str, optional - the username of the user to authenticate to the git server + the username of the user to authenticate to the git server. While + it is not needed for `github.com` and github enterprise, it is + needed for some private git instances and bitbucket git_token: str, optional Git personal access token to authenticate to the git server @@ -821,7 +823,9 @@ def scan_snapshot(self, repo_url, branch_or_commit, category=None, Flag used to decide whether to visualize the progressbars during the scan (e.g., during the insertion of the detections in the db) git_username: str, optional - the username of the user to authenticate to the git server + the username of the user to authenticate to the git server. While + it is not needed for `github.com` and github enterprise, it is + needed for some private git instances and bitbucket git_token: str, optional Git personal access token to authenticate to the git server max_depth: int, optional diff --git a/credentialdigger/scanners/git_file_scanner.py b/credentialdigger/scanners/git_file_scanner.py index 216b2500..9711e967 100644 --- a/credentialdigger/scanners/git_file_scanner.py +++ b/credentialdigger/scanners/git_file_scanner.py @@ -68,7 +68,9 @@ def scan(self, repo_url, branch_or_commit, max_depth=-1, ignore_list=[], names, directory names, or whole paths. Wildcards are supported as per the fnmatch package. git_username: str, optional - The username of the user to authenticate to the git server + the username of the user to authenticate to the git server. While + it is not needed for `github.com` and github enterprise, it is + needed for some private git instances and bitbucket git_token: str, optional Git personal access token to authenticate to the git server debug: bool, optional diff --git a/credentialdigger/scanners/git_scanner.py b/credentialdigger/scanners/git_scanner.py index abee7b1d..d3a79e13 100644 --- a/credentialdigger/scanners/git_scanner.py +++ b/credentialdigger/scanners/git_scanner.py @@ -149,7 +149,9 @@ def get_commit_timestamp(self, repo_url, branch_or_commit, branch_or_commit: str The branch name or commit id of the repo git_username: str, optional - The username of the user to authenticate to the git server + the username of the user to authenticate to the git server. While + it is not needed for `github.com` and github enterprise, it is + needed for some private git instances and bitbucket git_token: str, optional The personal user access token to access to this repo (needed for private repos) @@ -195,7 +197,9 @@ def scan(self, repo_url, since_timestamp=0, max_depth=1000000, max_depth: int, optional The maximum number of commits to scan git_username: str, optional - The username of the user to authenticate to the git server + the username of the user to authenticate to the git server. While + it is not needed for `github.com` and github enterprise, it is + needed for some private git instances and bitbucket git_token: str, optional Git personal access token to authenticate to the git server local_repo: bool, optional diff --git a/docker-compose.postgres.yml b/docker-compose.postgres.yml index a35e3322..247d7591 100644 --- a/docker-compose.postgres.yml +++ b/docker-compose.postgres.yml @@ -2,6 +2,7 @@ version: "3" services: postgres: image: postgres:12-alpine + shm_size: '1gb' restart: always container_name: postgres volumes: @@ -29,5 +30,11 @@ services: - USE_PG=True ports: - "5000:5000" + deploy: + resources: + limits: + memory: 16G + reservations: + memory: 6G volumes: creddig_db: diff --git a/setup.py b/setup.py index 133d852d..98560519 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ def requirements(): setuptools.setup( name='credentialdigger', - version='4.5.2', + version='4.6.0', author='SAP SE', maintainer='Marco Rosa, Slim Trabelsi', maintainer_email='marco.rosa@sap.com, slim.trabelsi@sap.com', diff --git a/ui/backend/client_ui.py b/ui/backend/client_ui.py index c2a39ea9..9bd116ff 100644 --- a/ui/backend/client_ui.py +++ b/ui/backend/client_ui.py @@ -6,10 +6,6 @@ import git from credentialdigger import Client -from credentialdigger.snippet_similarity import ( - build_embedding_model, - compute_similarity, - compute_snippet_embedding) from git import GitCommandError, InvalidGitRepositoryError, NoSuchPathError from git import Repo as GitRepo @@ -107,8 +103,8 @@ def get_files_summary(self, query, repo_url): result = cursor.fetchone() return files - def check_repo(self, repo_url, git_token=None, local_repo=False, - branch_or_commit=None): + def check_repo(self, repo_url, git_username=None, git_token=None, + local_repo=False, branch_or_commit=None): """ Check git token validity for the repository. Parameters @@ -116,6 +112,10 @@ def check_repo(self, repo_url, git_token=None, local_repo=False, repo_url: str The location of a git repository (an url if local_repo is False, a local path otherwise) + git_username: str, optional + Git username to authenticate to the git server. It is needed only + for some private git instances and bitbucket (`github.com` and + github enterprise do not require this field) git_token: str, optional Git personal access token to authenticate to the git server local_repo: bool, optional @@ -140,9 +140,10 @@ def check_repo(self, repo_url, git_token=None, local_repo=False, return False, 'NoSuchPathError' else: g = git.cmd.Git() - if git_token is not None and len(git_token) > 0: + if git_token: + username = git_username or 'oauth2' repo_url = repo_url.replace('https://', - f'https://oauth2:{git_token}@') + f'https://{username}:{git_token}@') try: remote_refs = g.ls_remote(repo_url) if branch_or_commit and branch_or_commit not in remote_refs: diff --git a/ui/server.py b/ui/server.py index c1d27910..949e9a5a 100755 --- a/ui/server.py +++ b/ui/server.py @@ -271,15 +271,17 @@ def scan_repo(): # If the form does not contain the 'Force' checkbox, # then 'forceScan' will be set to False; thus, ignored. force_scan = request.form.get('forceScan') == 'force' + git_username = request.form.get('gitUsername') git_token = request.form.get('gitToken') snapshot = request.form.get('repoSnapshot') local_repo = not (repo_link.startswith('http://') or repo_link.startswith('https://')) url_is_valid, err_code = c.check_repo( - repo_link, git_token, local_repo, snapshot) + repo_link, git_username, git_token, local_repo, snapshot) if not url_is_valid: return err_code, 401 + app.logger.debug('Repo has been verified') # Set up models models = [] @@ -294,11 +296,13 @@ def scan_repo(): 'repo_url': repo_link, 'models': models, 'force': force_scan, + 'git_username': git_username, 'git_token': git_token, 'local_repo': local_repo, 'similarity': True } if rules_to_use != 'all': + app.logger.debug(f'Use rules only from {rules_to_use} category') args['category'] = rules_to_use if snapshot: args['branch_or_commit'] = snapshot @@ -358,23 +362,22 @@ def get_repos(): def export_discoveries_csv(): """ Export the discoveries of a repo in a csv file. """ url = request.form.get('repo_url') - _, discoveries = c.get_discoveries(url) - states = [] if request.form.get('checkAll') == 'all': - states = ['new', 'false_positive', - 'addressing', 'not_relevant', 'fixed'] + app.logger.debug('Export all the discoveries') + _, discoveries = c.get_discoveries(url) else: states = request.form.getlist('check') - - filtered_discoveries = list( - filter(lambda d: d.get('state') in states, discoveries)) + app.logger.debug(f'Export discoveries of states {states}') + discoveries = [] + for s in states: + discoveries.extend(c.get_discoveries(url, state_filter=s)[1]) try: string_io = io.StringIO() csv_writer = csv.DictWriter(string_io, discoveries[0].keys()) csv_writer.writeheader() - csv_writer.writerows(filtered_discoveries) + csv_writer.writerows(discoveries) response_csv = make_response(string_io.getvalue()) report_name = f'report-{url.split("/")[-1]}.csv' response_csv.headers['Content-Disposition'] = f'attachment; \ diff --git a/ui/templates/rules.html b/ui/templates/rules.html index 08bc83e1..2a042609 100755 --- a/ui/templates/rules.html +++ b/ui/templates/rules.html @@ -98,11 +98,6 @@

Add rule

- -
- -
-
diff --git a/ui/templates/shared/_scanRepoModal.html b/ui/templates/shared/_scanRepoModal.html index 58168ba0..6c7f9506 100644 --- a/ui/templates/shared/_scanRepoModal.html +++ b/ui/templates/shared/_scanRepoModal.html @@ -34,6 +34,15 @@

>
+
+ +