Skip to content

Commit 4a5b865

Browse files
authored
Merge pull request #137 from DSACMS/seperate-fetch-from-generation
Seperate Fetch Action From Generation of Reports and Graphs
2 parents 9d51f80 + 304df33 commit 4a5b865

9 files changed

+223
-55
lines changed

.github/workflows/cicd.yml

+40-1
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,47 @@ concurrency:
1919
cancel-in-progress: false
2020

2121
jobs:
22+
update-reports-and-graphs:
23+
permissions: write-all
24+
name: update reports and graphs
25+
runs-on: ubuntu-latest
26+
steps:
27+
- uses: actions/checkout@v4
28+
# update stats
29+
- uses: actions/setup-python@v4
30+
with:
31+
python-version: '3.9'
32+
- name: cache pip
33+
uses: actions/cache@v3
34+
with:
35+
path: ~/.cache/pip
36+
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}
37+
restore-keys: |
38+
${{ runner.os }}-pip-
39+
- run: pip install -r requirements.txt
40+
- run: ./gen_reports.sh
41+
env:
42+
GITHUB_TOKEN: ${{ secrets.METRICS_GITHUB_TOKEN }}
43+
AUGUR_HOST: ${{ vars.AUGUR_HOST }}
44+
- run: ./gen_graphs.sh
45+
env:
46+
GITHUB_TOKEN: ${{ secrets.METRICS_GITHUB_TOKEN }}
47+
AUGUR_HOST: ${{ vars.AUGUR_HOST }}
48+
- run: |
49+
git config user.name 'GitHub Actions'
50+
git config user.email '[email protected]'
51+
git add -A
52+
timestamp=$(date -u)
53+
git commit -m "update reports and graphs: ${timestamp}" || exit 0
54+
- name: Push to ${{ github.ref_name }}
55+
uses: CasperWA/push-protected@v2
56+
with:
57+
token: ${{ secrets.METRICS_GITHUB_TOKEN }}
58+
branch: ${{ github.ref_name }}
2259
deploy:
2360
runs-on: ubuntu-latest
2461
if: github.ref == 'refs/heads/main'
62+
needs: update-reports-and-graphs
2563
defaults:
2664
run:
2765
working-directory: ./app
@@ -55,4 +93,5 @@ jobs:
5593
path: app/dist
5694

5795
- name: Deploy to GitHub pages
58-
uses: actions/deploy-pages@v2
96+
uses: actions/deploy-pages@v2
97+

gen_graphs.sh

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/bash
2+
pwd
3+
cd scripts
4+
python3 refresh_graphs.py

gen_reports.sh

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/bash
2+
pwd
3+
cd scripts
4+
python3 refresh_reports.py

scripts/fetch_public_metrics.py

+127-5
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,58 @@
22
Module to define methods that fetch data to store in the oss metric
33
entity objects.
44
"""
5+
import os
56
import json
67
from metricsLib.metrics_definitions import SIMPLE_METRICS, ORG_METRICS, ADVANCED_METRICS
78
from metricsLib.metrics_definitions import PERIODIC_METRICS, RESOURCE_METRICS
9+
from metricsLib.oss_metric_entities import GithubOrg, Repository
10+
from metricsLib.constants import PATH_TO_METADATA
811

12+
def parse_tracked_repos_file():
13+
"""
14+
Function to parse projects_tracked.json
15+
16+
Returns:
17+
Tuple of lists of strings that represent repos and orgs
18+
"""
19+
20+
# TODO: Create a read repos-to-include.txt
21+
metadata_path = os.path.join(PATH_TO_METADATA, "projects_tracked.json")
22+
with open(metadata_path, "r", encoding="utf-8") as file:
23+
tracking_file = json.load(file)
24+
25+
# Track specific repositories e.g. ['dsacms.github.io']
26+
repo_urls = tracking_file["Open Source Projects"]
27+
28+
# Get two lists of objects that will hold all the new metrics
29+
return tracking_file["orgs"], repo_urls
30+
31+
def parse_repos_and_orgs_into_objects(org_name_list, repo_name_list):
32+
"""
33+
This function parses lists of strings into oss metric entities and
34+
returns lists of corresponding oss metric entitiy objects.
35+
36+
Arguments:
37+
org_name_list: list of logins for github orgs
38+
repo_name_list: list of urls for git repositories with groups labeled
39+
40+
Returns:
41+
Tuple of lists of oss metric entity objects
42+
"""
43+
orgs = [GithubOrg(org) for org in org_name_list]
44+
45+
repos = [] # [Repository(repo_url) for repo_url in repo_name_list]
46+
47+
for owner, urls in repo_name_list.items():
48+
print(owner)
49+
# search for matching org
50+
org_id = next(
51+
(x.repo_group_id for x in orgs if x.login.lower() == owner.lower()), None)
52+
53+
# print(f"!!{org_id}")
54+
for repo_url in urls:
55+
repos.append(Repository(repo_url, org_id))
56+
return orgs, repos
957

1058
def get_all_data(all_orgs, all_repos):
1159
"""
@@ -95,8 +143,56 @@ def fetch_all_new_metric_data(all_orgs, all_repos):
95143
print(f"Fetching metrics for org {org.name} id #{org.repo_group_id}")
96144
for metric in ORG_METRICS:
97145
org.apply_metric_and_store_data(metric)
146+
print(metric.name)
98147
add_info_to_org_from_list_of_repos(all_repos, org)
99148

149+
def read_current_metric_data(repos,orgs):
150+
"""
151+
Read current metrics and load previous metrics that
152+
were saved in .old files.
153+
154+
Arguments:
155+
orgs: orgs to read data for.
156+
repos: repos to read data for.
157+
"""
158+
159+
for org in orgs:
160+
161+
path = org.get_path_to_json_data()
162+
#generate dict of previous and save it as {path}.old
163+
#previous_metric_org_json = json.dumps(org.previous_metric_data, indent=4)
164+
165+
with open(f"{path}.old","r",encoding="utf-8") as file:
166+
previous_metric_org_json = json.load(file)
167+
168+
#generate dict of current metric data.
169+
org.previous_metric_data.update(previous_metric_org_json)
170+
171+
172+
with open(path, "r", encoding="utf-8") as file:
173+
#file.write(org_metric_data)
174+
print(path)
175+
current_metric_org_json = json.load(file)
176+
177+
org.metric_data.update(current_metric_org_json)
178+
179+
for repo in repos:
180+
#previous_metric_repo_json = json.dumps(repo.previous_metric_data, indent=4)
181+
path = repo.get_path_to_json_data()
182+
183+
with open(f"{path}.old","r",encoding="utf-8") as file:
184+
#file.write(previous_metric_repo_json)
185+
previous_metric_repo_json = json.load(file)
186+
187+
repo.previous_metric_data.update(previous_metric_repo_json)
188+
189+
190+
with open(path, "r", encoding="utf-8") as file:
191+
#file.write(repo_metric_data)
192+
metric_repo_json = json.load(file)
193+
194+
repo.metric_data.update(metric_repo_json)
195+
100196

101197
def read_previous_metric_data(repos, orgs):
102198
"""
@@ -116,7 +212,8 @@ def read_previous_metric_data(repos, orgs):
116212
org.previous_metric_data.update(prev_data)
117213
except FileNotFoundError:
118214
print("Could not find previous data for records for org" +
119-
f"{org.login}")
215+
f"{org.login}")
216+
120217

121218
for repo in repos:
122219
try:
@@ -131,20 +228,45 @@ def read_previous_metric_data(repos, orgs):
131228
def write_metric_data_json_to_file(orgs, repos):
132229
"""
133230
Write all metric data to json files.
231+
232+
Keep old metrics as a .old file.
134233
135234
Arguments:
136235
orgs: orgs to write to file
137236
repos: repos to write to file
138237
"""
139238

140239
for org in orgs:
141-
org_metric_data = json.dumps(org.metric_data, indent=4)
142240

143-
with open(org.get_path_to_json_data(), "w+", encoding="utf-8") as file:
241+
path = org.get_path_to_json_data()
242+
#generate dict of previous and save it as {path}.old
243+
previous_metric_org_json = json.dumps(org.previous_metric_data, indent=4)
244+
245+
with open(f"{path}.old","w+",encoding="utf-8") as file:
246+
file.write(previous_metric_org_json)
247+
248+
#generate dict of current metric data.
249+
org_dict = org.previous_metric_data
250+
org_dict.update(org.metric_data)
251+
org_metric_data = json.dumps(org_dict, indent=4)
252+
253+
#print(org_metric_data)
254+
255+
with open(path, "w+", encoding="utf-8") as file:
144256
file.write(org_metric_data)
145257

146258
for repo in repos:
147-
repo_metric_data = json.dumps(repo.metric_data, indent=4)
259+
path = repo.get_path_to_json_data()
260+
261+
previous_metric_repo_json = json.dumps(repo.previous_metric_data, indent=4)
262+
263+
with open(f"{path}.old","w+",encoding="utf-8") as file:
264+
file.write(previous_metric_repo_json)
265+
266+
repo_dict = repo.previous_metric_data
267+
repo_dict.update(repo.metric_data)
268+
repo_metric_data = json.dumps(repo_dict, indent=4)
269+
148270

149-
with open(repo.get_path_to_json_data(), "w+", encoding="utf-8") as file:
271+
with open(path, "w+", encoding="utf-8") as file:
150272
file.write(repo_metric_data)

scripts/gen_reports.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def get_heading_report_values(headings, oss_entity):
9898
# +--------------+--------------------------------------+---+-------------------+
9999

100100

101-
and_conditional = ((raw_diff>0) and (behavior == DesiredReportBehavior.VALUE_INCREASE.value))
101+
and_conditional=((raw_diff>0) and (behavior == DesiredReportBehavior.VALUE_INCREASE.value))
102102
#Use a XOR by using the != operator
103103
if (raw_diff > 0) != (behavior == DesiredReportBehavior.VALUE_INCREASE.value):
104104
if raw_diff < 0:

scripts/metricsLib/oss_metric_entities.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import datetime
1010
import pathlib
1111
import requests
12+
from requests.exceptions import ReadTimeout
1213
from metricsLib.constants import PATH_TO_METRICS_DATA, PATH_TO_REPORTS_DATA, AUGUR_HOST
1314
from metricsLib.constants import TIMEOUT_IN_SECONDS, PATH_TO_GRAPHS_DATA
1415

@@ -152,7 +153,7 @@ def apply_metric_and_store_data(self, metric, *args, **kwargs):
152153

153154
try:
154155
self.store_metrics(metric.get_values(*args, **kwargs))
155-
except TimeoutError as e:
156+
except (TimeoutError, ReadTimeout) as e:
156157
print(f"Timeout for repo {self.name} with metric {metric.name}")
157158
print(f"Error: {e}")
158159
except ConnectionError as e:

scripts/refresh_graphs.py

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
"""
2+
Script to run all graph generation from existing data.
3+
"""
4+
import os
5+
from fetch_public_metrics import parse_repos_and_orgs_into_objects
6+
from fetch_public_metrics import parse_tracked_repos_file, read_current_metric_data
7+
from gen_graphs import generate_all_graphs_for_repos, generate_all_graphs_for_orgs
8+
9+
10+
11+
if __name__ == "__main__":
12+
os.umask(0)
13+
14+
orgs_urls, repo_urls = parse_tracked_repos_file()
15+
16+
all_orgs, all_repos = parse_repos_and_orgs_into_objects(orgs_urls, repo_urls)
17+
18+
read_current_metric_data(all_repos,all_orgs)
19+
generate_all_graphs_for_orgs(all_orgs)
20+
generate_all_graphs_for_repos(all_repos)

scripts/refresh_metrics.py

+5-47
Original file line numberDiff line numberDiff line change
@@ -2,60 +2,18 @@
22
Script to run all metrics collection and update operations
33
"""
44
import os
5-
import json
6-
from metricsLib.oss_metric_entities import GithubOrg, Repository
7-
from metricsLib.constants import PATH_TO_METADATA
8-
from fetch_public_metrics import get_all_data
9-
from gen_reports import generate_repo_report_files, generate_org_report_files
10-
from gen_graphs import generate_all_graphs_for_repos, generate_all_graphs_for_orgs
5+
from fetch_public_metrics import get_all_data, parse_repos_and_orgs_into_objects
6+
from fetch_public_metrics import parse_tracked_repos_file, read_previous_metric_data
117

128

13-
def parse_repos_and_orgs_into_objects(org_name_list, repo_name_list):
14-
"""
15-
This function parses lists of strings into oss metric entities and
16-
returns lists of corresponding oss metric entitiy objects.
17-
18-
Arguments:
19-
org_name_list: list of logins for github orgs
20-
repo_name_list: list of urls for git repositories with groups labeled
21-
22-
Returns:
23-
Tuple of lists of oss metric entity objects
24-
"""
25-
orgs = [GithubOrg(org) for org in org_name_list]
26-
27-
repos = [] # [Repository(repo_url) for repo_url in repo_name_list]
28-
29-
for owner, urls in repo_name_list.items():
30-
print(owner)
31-
# search for matching org
32-
org_id = next(
33-
(x.repo_group_id for x in orgs if x.login.lower() == owner.lower()), None)
34-
35-
# print(f"!!{org_id}")
36-
for repo_url in urls:
37-
repos.append(Repository(repo_url, org_id))
38-
return orgs, repos
39-
409

4110
if __name__ == "__main__":
4211
os.umask(0)
43-
# TODO: Create a read repos-to-include.txt
44-
metadata_path = os.path.join(PATH_TO_METADATA, "projects_tracked.json")
45-
with open(metadata_path, "r", encoding="utf-8") as file:
46-
tracking_file = json.load(file)
4712

48-
# Track specific repositories e.g. ['dsacms.github.io']
49-
repo_urls = tracking_file["Open Source Projects"]
13+
orgs_urls, repo_urls = parse_tracked_repos_file()
5014

51-
# Get two lists of objects that will hold all the new metrics
52-
all_orgs, all_repos = parse_repos_and_orgs_into_objects(
53-
tracking_file["orgs"], repo_urls)
15+
all_orgs, all_repos = parse_repos_and_orgs_into_objects(orgs_urls, repo_urls)
5416

5517
# Generate json data, report data, and graph data.
56-
#read_previous_metric_data(all_repos,all_orgs)
18+
read_previous_metric_data(all_repos,all_orgs)
5719
get_all_data(all_orgs, all_repos)
58-
generate_repo_report_files(all_repos)
59-
generate_org_report_files(all_orgs)
60-
generate_all_graphs_for_repos(all_repos)
61-
generate_all_graphs_for_orgs(all_orgs)

scripts/refresh_reports.py

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
"""
2+
Script to run all report generation from existing data.
3+
"""
4+
import os
5+
from fetch_public_metrics import parse_repos_and_orgs_into_objects
6+
from fetch_public_metrics import parse_tracked_repos_file, read_current_metric_data
7+
from gen_reports import generate_repo_report_files, generate_org_report_files
8+
9+
10+
11+
if __name__ == "__main__":
12+
os.umask(0)
13+
14+
orgs_urls, repo_urls = parse_tracked_repos_file()
15+
16+
all_orgs, all_repos = parse_repos_and_orgs_into_objects(orgs_urls, repo_urls)
17+
18+
read_current_metric_data(all_repos,all_orgs)
19+
generate_repo_report_files(all_repos)
20+
generate_org_report_files(all_orgs)

0 commit comments

Comments
 (0)