Skip to content

Commit 17cf24f

Browse files
committed
Merge branch 'main' into 664-purl-next-debian
2 parents 81e16f6 + ebee41d commit 17cf24f

File tree

10 files changed

+926
-20
lines changed

10 files changed

+926
-20
lines changed

minecode/collectors/dockerhub.py

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
import requests
12+
from packageurl import PackageURL
13+
14+
from minecode import priority_router
15+
from minecode.miners.dockerhub import build_package_data
16+
from packagedb.models import PackageContentType
17+
18+
logger = logging.getLogger(__name__)
19+
handler = logging.StreamHandler()
20+
logger.addHandler(handler)
21+
logger.setLevel(logging.INFO)
22+
23+
24+
def fetch_dockerhub_repo_summary(name, namespace="library"):
25+
"""
26+
Fetch summary metadata for a Docker Hub repository.
27+
28+
Returns:
29+
dict or None: Full metadata JSON from the Docker Hub API, including:
30+
- description (str): Short description
31+
- full_description (str): Detailed description
32+
- is_private (bool): Privacy status
33+
34+
"""
35+
url = f"https://hub.docker.com/v2/repositories/{namespace}/{name}/"
36+
try:
37+
response = requests.get(url)
38+
response.raise_for_status()
39+
return response.json()
40+
except requests.exceptions.RequestException as err:
41+
logger.error(f"Error fetching repository metadata for {name}: {err}")
42+
return None
43+
44+
45+
def fetch_dockerhub_tags_metadata(name, namespace, tag=None):
46+
"""
47+
Search through Docker Hub tags for a given repository.
48+
- If `tag` is provided, return the JSON metadata for that tag (by name or digest).
49+
- If `tag` is None, return a list of all tag metadata.
50+
51+
Examples:
52+
fetch_dockerhub_tag_metadata("nginx", "1.25.2")
53+
fetch_dockerhub_tag_metadata("nginx", "sha256:3d8957cb61d0223de2ab1aa2ec91d29796eb82a81cdcc1e968c090c29606d648")
54+
fetch_dockerhub_tag_metadata("nginx") # returns all tags
55+
56+
"""
57+
page = 0
58+
page_size = 100
59+
all_results = []
60+
61+
while True:
62+
page += 1
63+
url = f"https://hub.docker.com/v2/repositories/{namespace}/{name}/tags/?page={page}&page_size={page_size}"
64+
try:
65+
response = requests.get(url)
66+
response.raise_for_status()
67+
data = response.json()
68+
69+
results = data.get("results", [])
70+
if not tag:
71+
all_results.extend(results) # collect everything
72+
else:
73+
for result in results:
74+
if tag.startswith("sha256") and result.get("digest") == tag:
75+
return [result]
76+
elif result.get("name") == tag:
77+
return [result]
78+
79+
# Check if more pages exist
80+
if not data.get("next") or page_size * page > data.get("count", 0):
81+
break # no more pages
82+
83+
except requests.exceptions.RequestException as err:
84+
logger.error(f"Error fetching tags for {name}, page {page}: {err}")
85+
return None
86+
87+
if not tag:
88+
return all_results # return collected list
89+
90+
return None # tag not found
91+
92+
93+
def map_dockerhub_package(package_url, pipelines, priority=0):
94+
"""
95+
Add a Dockerhub distribution `package_url` to the PackageDB.
96+
"""
97+
from minecode.model_utils import add_package_to_scan_queue
98+
from minecode.model_utils import merge_or_create_package
99+
100+
if not package_url.name:
101+
error = f"Missing package name in DockerHub Package URL: {package_url}"
102+
logger.error(error)
103+
return error
104+
105+
namespace = package_url.namespace or "library"
106+
summary = fetch_dockerhub_repo_summary(package_url.name, namespace)
107+
if not summary:
108+
error = f"Package does not exist on dockerhub: {package_url}"
109+
logger.error(error)
110+
return error
111+
112+
tags_metadata = fetch_dockerhub_tags_metadata(package_url.name, namespace, package_url.version)
113+
114+
packages = build_package_data(summary, tags_metadata, package_url)
115+
116+
error = None
117+
for package in packages:
118+
package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE
119+
db_package, _, _, error = merge_or_create_package(package, visit_level=0)
120+
if error:
121+
break
122+
123+
if db_package:
124+
add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority)
125+
return error
126+
127+
128+
@priority_router.route("pkg:docker/.*")
129+
def process_request(purl_str, **kwargs):
130+
"""
131+
Process Dockerhub Package URL (PURL).
132+
ex:
133+
pkg:docker/nginx@latest
134+
pkg:docker/nginx@sha256:3d8957cb61d0223de2ab1aa2ec91d29796eb82a81cdcc1e968c090c29606d648
135+
"""
136+
from minecode.model_utils import DEFAULT_PIPELINES
137+
138+
addon_pipelines = kwargs.get("addon_pipelines", [])
139+
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
140+
priority = kwargs.get("priority", 0)
141+
142+
package_url = PackageURL.from_string(purl_str)
143+
144+
error_msg = map_dockerhub_package(package_url, pipelines, priority)
145+
146+
if error_msg:
147+
return error_msg

minecode/collectors/swift.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
import json
10+
import logging
11+
from packageurl import PackageURL
12+
from minecode import priority_router
13+
from minecode.miners import github
14+
from minecode.miners.github import build_github_packages
15+
from packagedb.models import PackageContentType
16+
17+
logger = logging.getLogger(__name__)
18+
handler = logging.StreamHandler()
19+
logger.addHandler(handler)
20+
logger.setLevel(logging.INFO)
21+
22+
23+
def map_swift_package(package_url, pipelines, priority=0):
24+
"""
25+
Add a Swift distribution `package_url` to the PackageDB.
26+
"""
27+
from minecode.model_utils import add_package_to_scan_queue, merge_or_create_package
28+
29+
namespace = package_url.namespace
30+
version = package_url.version
31+
32+
owner_name = namespace.split("/")[-1]
33+
34+
uri = f"https://api.github.com/repos/{owner_name}/{package_url.name}"
35+
_, response_text, _ = github.GithubSingleRepoVisitor(uri)
36+
repo_data = json.loads(response_text)
37+
repo_data["tags"] = [tag for tag in repo_data["tags"] if tag["name"] == version]
38+
packages = build_github_packages(json.dumps(repo_data), uri, package_url)
39+
40+
error = None
41+
for package in packages:
42+
package.type = "swift"
43+
package.namespace = namespace
44+
package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE
45+
db_package, _, _, error = merge_or_create_package(package, visit_level=0)
46+
if error:
47+
break
48+
49+
if db_package:
50+
add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority)
51+
return error
52+
53+
54+
@priority_router.route("pkg:swift/.*")
55+
def process_request(purl_str, **kwargs):
56+
"""
57+
Process Swift Package URL (PURL).
58+
"""
59+
from minecode.model_utils import DEFAULT_PIPELINES
60+
61+
addon_pipelines = kwargs.get("addon_pipelines", [])
62+
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
63+
priority = kwargs.get("priority", 0)
64+
65+
package_url = PackageURL.from_string(purl_str)
66+
error_msg = map_swift_package(package_url, pipelines, priority)
67+
68+
if error_msg:
69+
return error_msg

minecode/miners/dockerhub.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,3 +195,53 @@ def build_packages_from_jsonfile(metadata, uri=None, purl=None):
195195
package = scan_models.Package(**common_data)
196196
package.set_purl(purl)
197197
yield package
198+
199+
200+
def build_package_data(summary, tags_metadata, purl):
201+
"""
202+
Yield ScannedPackage built from PackageData API.
203+
"""
204+
205+
namespace = purl.namespace or "library"
206+
207+
short_desc = summary.get("description")
208+
long_desc = summary.get("full_description")
209+
descriptions = [d for d in (short_desc, long_desc) if d and d.strip()]
210+
description = "\n".join(descriptions)
211+
is_private = summary.get("is_private")
212+
213+
homepage_url = (
214+
f"https://hub.docker.com/_/{purl.name}"
215+
if namespace == "library"
216+
else f"https://hub.docker.com/r/{namespace}/{purl.name}"
217+
)
218+
219+
for tag_metadata in tags_metadata:
220+
tag_name = tag_metadata.get("name")
221+
size = tag_metadata.get("full_size")
222+
digest = tag_metadata.get("digest")
223+
sha256 = digest[7::] if digest else None
224+
225+
last_updater_username = tag_metadata.get("last_updater_username")
226+
parties = []
227+
if last_updater_username:
228+
parties.append(scan_models.Party(name=last_updater_username, role="usernmae"))
229+
230+
download_data = dict(
231+
type="docker",
232+
name=purl.name,
233+
namespace=purl.namespace,
234+
version=purl.version or tag_name,
235+
description=description,
236+
is_private=is_private,
237+
sha256=sha256,
238+
parties=parties,
239+
size=size,
240+
homepage_url=homepage_url,
241+
download_url=f"https://hub.docker.com/layers/{namespace}/{purl.name}/{tag_name}/images/{digest}",
242+
)
243+
244+
package = scan_models.PackageData.from_data(download_data)
245+
package.datasource_id = "dockerhub_repositories"
246+
package.set_purl(purl)
247+
yield package

minecode/miners/github.py

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import attr
1616
import packagedcode.models as scan_models
17+
from github.GithubException import UnknownObjectException
1718
from github.MainClass import Github
1819
from packageurl import PackageURL
1920

@@ -128,27 +129,30 @@ def fetch(self, uri, timeout=None):
128129
repo.origanization = repo.organization.name
129130

130131
downloads = []
131-
if repo.get_downloads():
132-
for download in list(repo.get_downloads()):
133-
downloads.append(
134-
dict(
135-
name=download.name,
136-
url=download.url,
137-
size=download.size,
138-
s3_url=download.s3_url,
139-
created_at=json_serial_date_obj(download.created_at),
140-
download_count=download.download_count,
141-
description=download.description,
142-
redirect=download.redirect,
143-
signature=download.signature,
144-
html_url=download.html_url,
145-
bucket=download.bucket,
146-
acl=download.acl,
147-
accesskeyid=download.accesskeyid,
148-
expirationdate=json_serial_date_obj(download.expirationdate),
132+
try:
133+
if repo.get_downloads():
134+
for download in list(repo.get_downloads()):
135+
downloads.append(
136+
dict(
137+
name=download.name,
138+
url=download.url,
139+
size=download.size,
140+
s3_url=download.s3_url,
141+
created_at=json_serial_date_obj(download.created_at),
142+
download_count=download.download_count,
143+
description=download.description,
144+
redirect=download.redirect,
145+
signature=download.signature,
146+
html_url=download.html_url,
147+
bucket=download.bucket,
148+
acl=download.acl,
149+
accesskeyid=download.accesskeyid,
150+
expirationdate=json_serial_date_obj(download.expirationdate),
151+
)
149152
)
150-
)
151-
common_data["downloads"] = downloads
153+
common_data["downloads"] = downloads
154+
except UnknownObjectException as e:
155+
logger.error(f"Error fetching release assets for repo {repo.full_name}: {e}")
152156

153157
tags = []
154158
if repo.get_tags():
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
import json
10+
import os
11+
from unittest.mock import patch
12+
13+
from django.test import TestCase
14+
from packageurl import PackageURL
15+
import packagedb
16+
from minecode.collectors import swift
17+
from minecode.utils_test import JsonBasedTesting
18+
19+
20+
class SwiftPriorityQueueTests(JsonBasedTesting, TestCase):
21+
test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles")
22+
23+
def test_map_swift_package(self):
24+
package_url = PackageURL.from_string("pkg:swift/github.com/Alamofire/[email protected]")
25+
26+
package_count = packagedb.models.Package.objects.all().count()
27+
self.assertEqual(package_count, 0)
28+
29+
swift.map_swift_package(package_url, ("test_pipelines"))
30+
package_count = packagedb.models.Package.objects.all().count()
31+
self.assertEqual(package_count, 2)
32+
package = packagedb.models.Package.objects.all().first()
33+
34+
self.assertEqual(package.purl, str(package_url))
35+
36+
@patch("minecode.collectors.swift.github.GithubSingleRepoVisitor")
37+
def test_map_swift_package1(self, mock_github_visitor):
38+
package_url = PackageURL.from_string(
39+
"pkg:swift/github.com/erikdrobne/[email protected]"
40+
)
41+
42+
expected_json_loc = self.get_test_loc("swift/swift-ui-coordinator.json")
43+
44+
with open(expected_json_loc) as f:
45+
expected_json_contents = json.load(f)
46+
raw_repo_text = json.dumps(expected_json_contents)
47+
48+
mock_github_visitor.return_value = (None, raw_repo_text, None)
49+
swift.map_swift_package(package_url, ("test_pipelines",))
50+
package_count = packagedb.models.Package.objects.all().count()
51+
self.assertEqual(package_count, 2)
52+
package = packagedb.models.Package.objects.all().first()
53+
self.assertEqual(package.purl, str(package_url))

0 commit comments

Comments
 (0)