Address style and format issues #512 #515

Signed-off-by: Jono Yang <[email protected]>
aboutcode-org · Aug 13, 2024 · f1d05f8 · f1d05f8
1 parent 57f9cc0
commit f1d05f8
Show file tree

Hide file tree

Showing 74 changed files with 219 additions and 309 deletions.
diff --git a/clearcode/load.py b/clearcode/load.py
@@ -71,9 +71,7 @@ def walk_and_load_from_filesystem(input_dir, cd_root_dir):
 
             # Save to DB
             try:
-                cditem = models.CDitem.objects.create(
-                    path=cditem_rel_path, content=content
-                )
+                models.CDitem.objects.create(path=cditem_rel_path, content=content)
             except IntegrityError:
                 # skip if we already have it in the DB
                 continue

diff --git a/clearcode/store_scans.py b/clearcode/store_scans.py
@@ -33,18 +33,18 @@
 from clearcode.models import CDitem
 
 """
-The input is a bunch of scans from ClearlyDefined and 
-the output is a bunch of git repositories with commited and 
-pushed scans such that we balance the scans roughly evenly accross 
+The input is a bunch of scans from ClearlyDefined and
+the output is a bunch of git repositories with commited and
+pushed scans such that we balance the scans roughly evenly accross
 different repositories.
 
-The primary reason for multiple repositories is size of a single 
-repo. There is a size limit of 5 GB at GitHub and it's difficult 
+The primary reason for multiple repositories is size of a single
+repo. There is a size limit of 5 GB at GitHub and it's difficult
 to work with repositories with million files.
 
-Therefore the approach is to use hashing as a way to name git 
-repositories and directories. We compute hash on the purl of the scanned 
-package and use the first few layers of this hash for the repo and 
+Therefore the approach is to use hashing as a way to name git
+repositories and directories. We compute hash on the purl of the scanned
+package and use the first few layers of this hash for the repo and
 directory names.
 
 Initial processing steps are:
@@ -54,15 +54,15 @@
 - Then we store the scan using the purl hash and purl as path.
 - Finally commit and push! : )
 
-Because it's not practical to process many repos at once, we organize the 
-processing one repo a time. For this, we iterate over a bunch of records get or compute 
+Because it's not practical to process many repos at once, we organize the
+processing one repo a time. For this, we iterate over a bunch of records get or compute
 the purl hash and process the records that share the same hash.
 
-We are using a short hash that is three characters long using hexadecimal encoding. 
-Therefore we can have 16*16*16 = 4096 repositories where each repo would contain about 
+We are using a short hash that is three characters long using hexadecimal encoding.
+Therefore we can have 16*16*16 = 4096 repositories where each repo would contain about
 25k scan files, if we were to store 100 million scans (which is a high mark).
-For reference one scan should use less than a 100k on average when compressed 
-with gzip or git based on looking at 15 million scans. Each repo should be roughly 
+For reference one scan should use less than a 100k on average when compressed
+with gzip or git based on looking at 15 million scans. Each repo should be roughly
 couple hundred mega bytes big, based on 15 million scans.
 """
 

diff --git a/clearcode/sync.py b/clearcode/sync.py
@@ -350,7 +350,7 @@ def is_unchanged_remotely(self, url, session=session):
             remote_etag = response.headers.get("etag")
             if remote_etag and self.etags_cache.get(url) == remote_etag:
                 return True
-        except:
+        except Exception:
             return False
 
     def is_fetched(self, checksum, url):

diff --git a/clearcode/tests/test_models.py b/clearcode/tests/test_models.py
@@ -39,7 +39,7 @@ def test_known_package_types(self):
         # This path starts with npm, which is known
         cditem_1 = CDitem.objects.create(path="npm/name/version")
         # asdf is not a proper type
-        cditem_2 = CDitem.objects.create(path="asdf/name/version")
+        CDitem.objects.create(path="asdf/name/version")
         cditems = list(CDitem.objects.known_package_types())
         self.assertEqual(1, len(cditems))
         cditem = cditems[0]
@@ -50,7 +50,7 @@ def test_definitions(self):
             path="composer/packagist/yoast/wordpress-seo/revision/9.5-RC3.json"
         )
         # harvest should not be in cditems
-        harvest = CDitem.objects.create(
+        CDitem.objects.create(
             path="sourcearchive/mavencentral/io.nats/jnats/revision/2.6.6/tool/scancode/3.2.2.json"
         )
         cditems = list(CDitem.objects.definitions())
@@ -63,7 +63,7 @@ def test_scancode_harvests(self):
             path="sourcearchive/mavencentral/io.nats/jnats/revision/2.6.6/tool/scancode/3.2.2.json"
         )
         # unexpected_harvest should not be in cditems
-        unexpected_harvest = CDitem.objects.create(
+        CDitem.objects.create(
             path="sourcearchive/mavencentral/io.nats/jnats/revision/2.6.6/tool/licensee/9.13.0.json"
         )
         harvests = list(CDitem.objects.scancode_harvests())
@@ -75,7 +75,8 @@ def test_mappable(self):
         definition_1 = CDitem.objects.create(
             path="sourcearchive/mavencentral/io.nats/jnats/revision/2.6.6.json"
         )
-        definition_2 = CDitem.objects.create(
+        # This should not be mappable
+        CDitem.objects.create(
             path="sourcearchive/mavencentral/io.quarkus/quarkus-jsonb/revision/0.26.1.json",
             last_map_date=timezone.now(),
             map_error="error",
@@ -92,12 +93,14 @@ def test_mappable_definitions(self):
         definition_1 = CDitem.objects.create(
             path="sourcearchive/mavencentral/io.nats/jnats/revision/2.6.6.json"
         )
-        definition_2 = CDitem.objects.create(
+        # This should not be mappable
+        CDitem.objects.create(
             path="sourcearchive/mavencentral/io.quarkus/quarkus-jsonb/revision/0.26.1.json",
             last_map_date=timezone.now(),
             map_error="error",
         )
-        harvest = CDitem.objects.create(
+        # This should not be mappable
+        CDitem.objects.create(
             path="sourcearchive/mavencentral/io.nats/jnats/revision/2.6.6/tool/scancode/3.2.2.json"
         )
         mappables = list(CDitem.objects.mappable_definitions())
@@ -109,12 +112,14 @@ def test_mappable_scancode_harvests(self):
         harvest_1 = CDitem.objects.create(
             path="sourcearchive/mavencentral/io.nats/jnats/revision/2.6.6/tool/scancode/3.2.2.json"
         )
-        harvest_2 = CDitem.objects.create(
+        # This should not be mappable
+        CDitem.objects.create(
             path="sourcearchive/mavencentral/io.cucumber/cucumber-core/revision/5.0.0-RC1/tool/scancode/3.2.2.json",
             last_map_date=timezone.now(),
             map_error="error",
         )
-        definition_1 = CDitem.objects.create(
+        # This should not be mappable
+        CDitem.objects.create(
             path="sourcearchive/mavencentral/io.nats/jnats/revision/2.6.6.json"
         )
         mappables = list(CDitem.objects.mappable_scancode_harvests())

diff --git a/clearindex/harvest.py b/clearindex/harvest.py
@@ -94,9 +94,7 @@ def create_from_harvest(package_scan={}, files_data=[], cditem_path=""):
 
     download_url = package_data.get("download_url")
     if not download_url:
-        logger.error(
-            f"Null `download_url` value for `package_data`: {package_data}"
-        )
+        logger.error(f"Null `download_url` value for `package_data`: {package_data}")
         return
 
     # This ugly block is needed until https://github.com/nexB/packagedb/issues/14
@@ -115,9 +113,7 @@ def create_from_harvest(package_scan={}, files_data=[], cditem_path=""):
         merge_packages(
             existing_package=package, new_package_data=package_data, replace=False
         )
-        package.append_to_history(
-            f"Updated package from CDitem harvest: {cditem_path}"
-        )
+        package.append_to_history(f"Updated package from CDitem harvest: {cditem_path}")
 
         logger.info(f"Merged package data from scancode harvest: {package}")
 

diff --git a/etc/scripts/clearcode-api-backup.py b/etc/scripts/clearcode-api-backup.py
@@ -151,10 +151,6 @@ def run_api_backup(api_root_url, extra_payload=None):
         objects = get_all_objects_from_endpoint(endpoint_url, extra_payload=extra_payload)
         print('{} {} collected.'.format(len(objects), endpoint_name))
 
-        collect_extra_conditions = [
-            extra_payload.get('last_modified_date'),
-        ]
-
         results[endpoint_name] += objects
 
     timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

diff --git a/etc/scripts/utils_thirdparty.py b/etc/scripts/utils_thirdparty.py
@@ -845,7 +845,7 @@ def fetch_license_files(self, dest_dir=THIRDPARTY_DIR, use_cached_index=False):
                 if TRACE:
                     print(f"Fetched license from remote: {lic_url}")
 
-            except:
+            except Exception:
                 try:
                     # try licensedb second
                     lic_url = f"{LICENSEDB_API_URL}/{filename}"
@@ -858,7 +858,7 @@ def fetch_license_files(self, dest_dir=THIRDPARTY_DIR, use_cached_index=False):
                     if TRACE:
                         print(f"Fetched license from licensedb: {lic_url}")
 
-                except:
+                except Exception:
                     msg = f'No text for license {filename} in expression "{self.license_expression}" from {self}'
                     print(msg)
                     errors.append(msg)
@@ -1290,7 +1290,7 @@ def is_pure(self):
 def is_pure_wheel(filename):
     try:
         return Wheel.from_filename(filename).is_pure()
-    except:
+    except Exception:
         return False
 
 

diff --git a/matchcode/tests/test_match.py b/matchcode/tests/test_match.py
@@ -34,7 +34,7 @@ class MatchPackagesTestCase(MatchcodeTestCase):
     def setUp(self):
         # Execute the superclass' setUp method before creating our own
         # DB objects
-        super(MatchPackagesTestCase, self).setUp()
+        super().setUp()
 
         self.test_package1, _ = Package.objects.get_or_create(
             filename="abbot-0.12.3.jar",
@@ -158,7 +158,7 @@ class MatchNestedPackagesTestCase(MatchcodeTestCase):
     def setUp(self):
         # Execute the superclass' setUp method before creating our own
         # DB objects
-        super(MatchNestedPackagesTestCase, self).setUp()
+        super().setUp()
 
         self.test_package1, _ = Package.objects.get_or_create(
             filename="plugin-request-2.4.1.tgz",
@@ -219,7 +219,7 @@ class DirectoryMatchingTestCase(MatchcodeTestCase):
     maxDiff = None
 
     def setUp(self):
-        super(DirectoryMatchingTestCase, self).setUp()
+        super().setUp()
 
         self.test_package1, _ = Package.objects.get_or_create(
             filename="abbrev-1.0.3.tgz",

diff --git a/matchcode/tests/test_models.py b/matchcode/tests/test_models.py
@@ -41,7 +41,7 @@ class BaseModelTest(MatchcodeTestCase):
     maxDiff = None
 
     def setUp(self):
-        super(BaseModelTest, self).setUp()
+        super().setUp()
 
         self.test_package1, _ = Package.objects.get_or_create(
             filename="abbot-0.12.3.jar",

diff --git a/matchcode_pipeline/tests/test_api.py b/matchcode_pipeline/tests/test_api.py
@@ -141,11 +141,8 @@ def test_matching_pipeline_api_matching_create(self, mock_execute_pipeline_task)
         self.assertEqual("matching", response.data["runs"][0]["pipeline_name"])
         mock_execute_pipeline_task.assert_called_once()
 
-        created_matching_project_detail_url = response.data["url"]
-        matching_project_uuid = response.data["uuid"]
-        results_url = reverse("matching-results", args=[matching_project_uuid])
-
         # Check that the file was uploaded
+        created_matching_project_detail_url = response.data["url"]
         response = self.csrf_client.get(created_matching_project_detail_url)
         self.assertEqual("test-out.json", response.data["input_sources"][0]["filename"])
 
@@ -165,11 +162,8 @@ def test_matching_pipeline_api_matching_create_multiple_input_urls(
         self.assertEqual("matching", response.data["runs"][0]["pipeline_name"])
         mock_execute_pipeline_task.assert_called_once()
 
-        created_matching_project_detail_url = response.data["url"]
-        matching_project_uuid = response.data["uuid"]
-        results_url = reverse("matching-results", args=[matching_project_uuid])
-
         # Check that the file was uploaded
+        created_matching_project_detail_url = response.data["url"]
         response = self.csrf_client.get(created_matching_project_detail_url)
         input_sources = response.data["input_sources"]
         self.assertEqual(2, len(input_sources))

diff --git a/minecode/api.py b/minecode/api.py
@@ -201,7 +201,7 @@ def index_package_scan(request, key):
 
     user_id = signing.loads(key)
     User = get_user_model()
-    user = get_object_or_404(User, id=user_id)
+    get_object_or_404(User, id=user_id)
 
     results = json_data.get("results")
     summary = json_data.get("summary")

diff --git a/minecode/collectors/debian.py b/minecode/collectors/debian.py
@@ -521,9 +521,7 @@ def get_vcs_repo(description):
         repos.append((vcs_tool, vcs_repo))
 
     if len(repos) > 1:
-        raise TypeError(
-            "Debian description with more than one Vcs repos: %(repos)r" % locals()
-        )
+        raise TypeError(f"Debian description with more than one Vcs repos: {repos}")
 
     if repos:
         vcs_tool, vcs_repo = repos[0]

diff --git a/minecode/collectors/maven.py b/minecode/collectors/maven.py
@@ -498,17 +498,17 @@ def process_request(purl_str, **kwargs):
 
 def check_if_file_name_is_linked_on_page(file_name, links, **kwargs):
     """Return True if `file_name` is in `links`"""
-    return any(l.endswith(file_name) for l in links)
+    return any(link.endswith(file_name) for link in links)
 
 
 def check_if_page_has_pom_files(links, **kwargs):
     """Return True of any entry in `links` ends with .pom."""
-    return any(l.endswith(".pom") for l in links)
+    return any(link.endswith(".pom") for link in links)
 
 
 def check_if_page_has_directories(links, **kwargs):
     """Return True if any entry, excluding "../", ends with /."""
-    return any(l.endswith("/") for l in links if l != "../")
+    return any(link.endswith("/") for link in links if link != "../")
 
 
 def check_if_package_version_page(links, **kwargs):

diff --git a/minecode/filter.py b/minecode/filter.py
@@ -39,28 +39,28 @@ def sf_net(input_file, output):
         writer = csv.writer(fo, quoting=csv.QUOTE_ALL)
         with open(input_file) as fi:
             reader = csv.reader(fi)
-            for i, l in enumerate(reader):
+            for i, row in enumerate(reader):
                 if i == 0:
                     # add headers on first row
-                    l.extend(new_headers)
-                if not l:
+                    row.extend(new_headers)
+                if not row:
                     continue
-                project_id = l[0]
-                name = l[1]
-                version_column = l[2]
+                project_id = row[0]
+                name = row[1]
+                version_column = row[2]
                 sep = ":  released on "
                 if sep not in version_column:
                     # write as is if we do not have a file release date
                     # separator
-                    writer.writerow(l)
+                    writer.writerow(row)
                     continue
                 filename, release_date_ts = version_column.split(sep, 1)
                 found_version = version.version_hint(filename)
-                l.append(found_version or "")
-                l.append(release_date_ts or "")
-                l.append(download_url_template % locals())
-                l.append("")  # reviewed
-                l.append("")  # curated name
+                row.append(found_version or "")
+                row.append(release_date_ts or "")
+                row.append(download_url_template % locals())
+                row.append("")  # reviewed
+                row.append("")  # curated name
                 excluded_reason = ""
                 if "." in project_id:
                     excluded_reason = "mirror or special project"
@@ -70,10 +70,10 @@ def sf_net(input_file, output):
                     excluded_reason = "special chars in name"
                 elif not good_filename(project_id, filename, name):
                     excluded_reason = "multi component possible"
-                l.append(excluded_reason)
-                l.append("")  # curated_owner
-                l.append("")  # owner_type
-                writer.writerow(l)
+                row.append(excluded_reason)
+                row.append("")  # curated_owner
+                row.append("")  # owner_type
+                writer.writerow(row)
 
 
 def good_name(s):

diff --git a/minecode/indexing.py b/minecode/indexing.py
@@ -100,7 +100,9 @@ def index_package(
         declared_license_expression = summary_data.get("declared_license_expression")
         other_license_expressions = summary_data.get("other_license_expressions", [])
         other_license_expressions = [
-            l["value"] for l in other_license_expressions if l["value"]
+            license_expression["value"]
+            for license_expression in other_license_expressions
+            if license_expression["value"]
         ]
         other_license_expression = combine_expressions(other_license_expressions)
 

diff --git a/minecode/management/commands/check_licenses.py b/minecode/management/commands/check_licenses.py
@@ -99,8 +99,7 @@ def find_ambiguous_packages(
     )
     license_filter = reduce(operator.or_, filter_expression)
 
-    for package in Package.objects.filter(type__in=types).filter(license_filter):
-        yield package
+    yield from Package.objects.filter(type__in=types).filter(license_filter)
 
 
 def dump(packages, json_location):

diff --git a/minecode/management/commands/import_queue.py b/minecode/management/commands/import_queue.py
@@ -92,7 +92,9 @@ def handle(self, *args, **options):
             try:
                 errors = process_request(importable_uri)
             except Exception as e:
-                errors = f"Error: Failed to process ImportableURI: {repr(importable_uri)}\n"
+                errors = (
+                    f"Error: Failed to process ImportableURI: {repr(importable_uri)}\n"
+                )
                 errors += get_error_message(e)
             finally:
                 if errors: