6
6
# See https://github.com/aboutcode-org/purldb for support or download.
7
7
# See https://aboutcode.org for more information about nexB OSS projects.
8
8
#
9
- from minecode_pipelines .pipes import get_last_commit
9
+ from datetime import datetime
10
+
11
+ from minecode_pipelines .pipes import fetch_checkpoint_from_github
12
+ from minecode_pipelines .pipes import update_checkpoints_in_github
13
+ from minecode_pipelines .pipes import MINECODE_PIPELINES_CONFIG_REPO
10
14
from minecode_pipelines .pipes import get_changed_files
11
- from minecode_pipelines .pipes import update_last_commit
12
15
from minecode_pipelines .pipes .cargo import store_cargo_packages
16
+ from scanpipe .pipes .federatedcode import commit_changes
17
+ from scanpipe .pipes .federatedcode import push_changes
18
+ from minecode_pipelines import VERSION
19
+
13
20
import json
14
21
from pathlib import Path
15
22
16
23
from minecode_pipelines .utils import get_next_x_commit
17
24
25
+ PACKAGE_BATCH_SIZE = 500
26
+ CARGO_CHECKPOINT_PATH = "cargo/checkpoints.json"
18
27
19
- def process_cargo_packages (cargo_repo , fed_repo , fed_conf_repo , logger ):
28
+
29
+ def process_cargo_packages (cargo_index_repo , cloned_data_repo , config_repo , logger ):
20
30
"""
21
31
Process Cargo index files commit by commit.
22
32
Push changes to fed_repo after:
23
- - every `commit_batch` commits, OR
24
- - when reaching HEAD.
33
+ - every `commit_batch` commits, OR when reaching HEAD.
25
34
"""
26
35
27
- base_path = Path (cargo_repo .working_tree_dir )
36
+ base_path = Path (cargo_index_repo .working_tree_dir )
28
37
29
38
while True :
30
- setting_last_commit = get_last_commit (fed_conf_repo , "cargo" )
31
- next_commit = get_next_x_commit (cargo_repo , setting_last_commit , x = 10 , branch = "master" )
39
+ cargo_checkpoints = (
40
+ fetch_checkpoint_from_github (MINECODE_PIPELINES_CONFIG_REPO , CARGO_CHECKPOINT_PATH )
41
+ or {}
42
+ )
43
+ checkpoints_last_commit = cargo_checkpoints .get ("last_commit" )
32
44
33
- if next_commit == setting_last_commit :
45
+ next_commit = get_next_x_commit (
46
+ cargo_index_repo , checkpoints_last_commit , x = 10 , branch = "master"
47
+ )
48
+
49
+ if next_commit == checkpoints_last_commit :
34
50
logger ("No new commits to mine" )
35
51
break
36
52
37
53
changed_files = get_changed_files (
38
- cargo_repo , commit_x = setting_last_commit , commit_y = next_commit
54
+ cargo_index_repo , commit_x = checkpoints_last_commit , commit_y = next_commit
39
55
)
40
56
logger (f"Found { len (changed_files )} changed files in Cargo index." )
41
57
42
58
file_counter = 0
59
+ purl_files = []
60
+ purls = []
43
61
for idx , rel_path in enumerate (changed_files ):
44
62
file_path = base_path / rel_path
45
63
logger (f"Found { file_path } ." )
@@ -57,8 +75,45 @@ def process_cargo_packages(cargo_repo, fed_repo, fed_conf_repo, logger):
57
75
packages .append (json .loads (line ))
58
76
59
77
file_counter += 1
60
- push_commit = (file_counter % 1000 == 0 ) or (idx == len (changed_files ))
61
- store_cargo_packages (packages , fed_repo , push_commit )
78
+ commit_and_push = (file_counter % PACKAGE_BATCH_SIZE == 0 ) or (
79
+ idx == len (changed_files )
80
+ )
81
+ purl_file , base_purl = store_cargo_packages (packages , cloned_data_repo )
82
+ logger (f"writing packageURLs for package: { base_purl } at: { purl_file } " )
83
+
84
+ purl_files .append (purl_file )
85
+ purls .append (str (base_purl ))
86
+ if not commit_and_push :
87
+ continue
88
+
89
+ commit_changes (
90
+ repo = cloned_data_repo ,
91
+ files_to_commit = purl_files ,
92
+ purls = purls ,
93
+ mine_type = "packageURL" ,
94
+ tool_name = "pkg:cargo/minecode-pipelines" ,
95
+ tool_version = VERSION ,
96
+ )
97
+
98
+ # Push changes to remote repository
99
+ push_changes (repo = cloned_data_repo )
100
+ purl_files = []
101
+ purls = []
102
+
103
+ if logger :
104
+ logger (
105
+ f"Updating checkpoint at: { CARGO_CHECKPOINT_PATH } with last commit: { checkpoints_last_commit } "
106
+ )
107
+
108
+ settings_data = {
109
+ "date" : str (datetime .now ()),
110
+ "last_commit" : next_commit ,
111
+ }
112
+
113
+ update_checkpoints_in_github (
114
+ checkpoint = settings_data ,
115
+ cloned_repo = config_repo ,
116
+ path = CARGO_CHECKPOINT_PATH ,
117
+ )
62
118
63
- update_last_commit (next_commit , fed_conf_repo , "cargo" )
64
- logger (f"Pushed batch for commit range { setting_last_commit } :{ next_commit } ." )
119
+ logger (f"Pushed batch for commit range { checkpoints_last_commit } :{ next_commit } ." )
0 commit comments