From 9af7d48eb99910b75cc49d27e25dfa020f1926d6 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Tue, 7 Jan 2025 14:13:48 -0600 Subject: [PATCH] use upstream lib instead Signed-off-by: Isaac Milarsky --- requirements.txt | 1 + scripts/{metricsLib => }/constants.py | 0 scripts/fetch_public_metrics.py | 278 -------- scripts/gen_graphs.py | 617 ------------------ scripts/gen_reports.py | 231 ------- scripts/metricsLib/__init__.py | 0 scripts/metricsLib/metrics_data_structures.py | 576 ---------------- scripts/metricsLib/metrics_definitions.py | 262 -------- scripts/metricsLib/oss_metric_entities.py | 454 ------------- scripts/refresh_graphs.py | 16 +- scripts/refresh_metrics.py | 11 +- scripts/refresh_reports.py | 18 +- 12 files changed, 26 insertions(+), 2438 deletions(-) rename scripts/{metricsLib => }/constants.py (100%) delete mode 100755 scripts/fetch_public_metrics.py delete mode 100644 scripts/gen_graphs.py delete mode 100644 scripts/gen_reports.py delete mode 100644 scripts/metricsLib/__init__.py delete mode 100644 scripts/metricsLib/metrics_data_structures.py delete mode 100644 scripts/metricsLib/metrics_definitions.py delete mode 100644 scripts/metricsLib/oss_metric_entities.py diff --git a/requirements.txt b/requirements.txt index 66f9499662..9b048b0779 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ pygal pandas pytest pylint +metrics-dash-backend-tools diff --git a/scripts/metricsLib/constants.py b/scripts/constants.py similarity index 100% rename from scripts/metricsLib/constants.py rename to scripts/constants.py diff --git a/scripts/fetch_public_metrics.py b/scripts/fetch_public_metrics.py deleted file mode 100755 index 7d300deb12..0000000000 --- a/scripts/fetch_public_metrics.py +++ /dev/null @@ -1,278 +0,0 @@ -""" -Module to define methods that fetch data to store in the oss metric -entity objects. -""" -import os -import json -from metricsLib.metrics_definitions import SIMPLE_METRICS, ORG_METRICS, ADVANCED_METRICS -from metricsLib.metrics_definitions import PERIODIC_METRICS, RESOURCE_METRICS -from metricsLib.oss_metric_entities import GithubOrg, Repository -from metricsLib.constants import PATH_TO_METADATA - -def parse_tracked_repos_file(org=None): - """ - Function to parse projects_tracked.json - - Returns: - Tuple of lists of strings that represent repos and orgs - """ - - # TODO: Create a read repos-to-include.txt - metadata_path = os.path.join(PATH_TO_METADATA, "projects_tracked.json") - with open(metadata_path, "r", encoding="utf-8") as file: - tracking_file = json.load(file) - - # Only parse the desired org if an org was passed as an argument - if org: - repo_urls = { - org : tracking_file["Open Source Projects"][org] - } - return [org], repo_urls - - repo_urls = tracking_file["Open Source Projects"] - - # Get two lists of objects that will hold all the new metrics - return tracking_file["orgs"], repo_urls - -def parse_repos_and_orgs_into_objects(org_name_list, repo_name_list): - """ - This function parses lists of strings into oss metric entities and - returns lists of corresponding oss metric entitiy objects. - - Arguments: - org_name_list: list of logins for github orgs - repo_name_list: list of urls for git repositories with groups labeled - - Returns: - Tuple of lists of oss metric entity objects - """ - orgs = [GithubOrg(org) for org in org_name_list] - - repos = [] # [Repository(repo_url) for repo_url in repo_name_list] - - for owner, urls in repo_name_list.items(): - print(owner) - # search for matching org - org_id = next( - (x.repo_group_id for x in orgs if x.login.lower() == owner.lower()), None) - - # print(f"!!{org_id}") - for repo_url in urls: - repos.append(Repository(repo_url, org_id)) - return orgs, repos - -def get_all_data(all_orgs, all_repos): - """ - Call relevant methods on orgs and repos - - Arguments: - all_orgs: List of all orgs to gather metrics for - all_repos: List of all repos to gather metrics for - """ - fetch_all_new_metric_data(all_orgs, all_repos) - read_previous_metric_data(all_repos, all_orgs) - write_metric_data_json_to_file(all_orgs, all_repos) - - -def add_info_to_org_from_list_of_repos(repo_list, org): - """ - This method serves to iterate through previously collected metric - data that is associated with a repo and derive the cumulative metric data - for the whole organization instead of the repository. - - This is mainly to avoid using more api calls than we have to. - - Arguments: - repo_list: List of all repos with metrics - org: The github org to add metrics to - """ - # Define counts to update based on tracked repositories. - org_counts = {"commits_count": 0, - "issues_count": 0, - "open_issues_count": 0, - "closed_issues_count": 0, - "pull_requests_count": 0, - "open_pull_requests_count": 0, - "merged_pull_requests_count": 0, - "closed_pull_requests_count": 0, - "forks_count": 0, - "stargazers_count": 0, - "watchers_count": 0 - } - - # Add repo data to org that repo is a part of - for repo in repo_list: - # Check for membership - #print(repo.needed_parameters["repo_group_id"]) - #print(org.needed_parameters["repo_group_id"]) - if repo.needed_parameters["repo_group_id"] == org.needed_parameters["repo_group_id"]: - # Add metric data. - for key, _ in org_counts.items(): - raw_count = repo.metric_data.get(key) - if raw_count: - org_counts[key] += raw_count - - org.store_metrics(org_counts) - - -def fetch_all_new_metric_data(all_orgs, all_repos): - """ - This method applies all desired methods to all desired repos - and orgs. It applies and stores all the metrics - - This is mainly to avoid using more api calls than we have to. - - Arguments: - all_orgs: List of all orgs to gather metrics for - all_repos: List of all repos to gather metrics for - """ - - # Capture the metric data from all repos - # Returns a nested dictionary - for repo in all_repos: - print(f"Fetching metrics for repo {repo.name}, id #{repo.repo_id}.") - # Get info from all metrics for each repo - for metric in SIMPLE_METRICS: - repo.apply_metric_and_store_data(metric) - - for metric in PERIODIC_METRICS: - repo.apply_metric_and_store_data(metric) - - for metric in RESOURCE_METRICS: - repo.apply_metric_and_store_data(metric, oss_entity=repo) - - for metric in ADVANCED_METRICS: - repo.apply_metric_and_store_data(metric) - - # Capture all metric data for all Github orgs - for org in all_orgs: - print(f"Fetching metrics for org {org.name} id #{org.repo_group_id}") - for metric in ORG_METRICS: - org.apply_metric_and_store_data(metric) - print(metric.name) - add_info_to_org_from_list_of_repos(all_repos, org) - -def read_current_metric_data(repos,orgs): - """ - Read current metrics and load previous metrics that - were saved in .old files. - - Arguments: - orgs: orgs to read data for. - repos: repos to read data for. - """ - - for org in orgs: - - path = org.get_path_to_json_data() - #generate dict of previous and save it as {path}.old - #previous_metric_org_json = json.dumps(org.previous_metric_data, indent=4) - - with open(f"{path}.old","r",encoding="utf-8") as file: - previous_metric_org_json = json.load(file) - - #generate dict of current metric data. - org.previous_metric_data.update(previous_metric_org_json) - - - with open(path, "r", encoding="utf-8") as file: - #file.write(org_metric_data) - print(path) - current_metric_org_json = json.load(file) - - org.metric_data.update(current_metric_org_json) - - for repo in repos: - #previous_metric_repo_json = json.dumps(repo.previous_metric_data, indent=4) - path = repo.get_path_to_json_data() - - with open(f"{path}.old","r",encoding="utf-8") as file: - #file.write(previous_metric_repo_json) - previous_metric_repo_json = json.load(file) - - repo.previous_metric_data.update(previous_metric_repo_json) - - - with open(path, "r", encoding="utf-8") as file: - #file.write(repo_metric_data) - metric_repo_json = json.load(file) - - repo.metric_data.update(metric_repo_json) - - -def read_previous_metric_data(repos, orgs): - """ - This method reads the previously gathered metric data and - stores it in the OSSEntity objects passed in. - - This is for the reports that compare changes since last collection. - - Arguments: - repos: List of all orgs to read metrics for - orgs: List of all repos to read metrics for - """ - for org in orgs: - try: - with open(org.get_path_to_json_data(), "r", encoding="utf-8") as file: - prev_data = json.load(file) - org.previous_metric_data.update(prev_data) - except FileNotFoundError: - print("Could not find previous data for records for org" + - f"{org.login}") - - - for repo in repos: - try: - with open(repo.get_path_to_json_data(), "r", encoding="utf-8") as file: - prev_data = json.load(file) - repo.previous_metric_data.update(prev_data) - except FileNotFoundError: - print("Could not find previous data for records for repo" + - repo.name) - - -def write_metric_data_json_to_file(orgs, repos): - """ - Write all metric data to json files. - - Keep old metrics as a .old file. - - Arguments: - orgs: orgs to write to file - repos: repos to write to file - """ - - for org in orgs: - - path = org.get_path_to_json_data() - #generate dict of previous and save it as {path}.old - previous_metric_org_json = json.dumps(org.previous_metric_data, indent=4) - - with open(f"{path}.old","w+",encoding="utf-8") as file: - file.write(previous_metric_org_json) - - #generate dict of current metric data. - org_dict = org.previous_metric_data - org_dict.update(org.metric_data) - org_metric_data = json.dumps(org_dict, indent=4) - - #print(org_metric_data) - - with open(path, "w+", encoding="utf-8") as file: - file.write(org_metric_data) - - for repo in repos: - path = repo.get_path_to_json_data() - - previous_metric_repo_json = json.dumps(repo.previous_metric_data, indent=4) - - with open(f"{path}.old","w+",encoding="utf-8") as file: - file.write(previous_metric_repo_json) - - repo_dict = repo.previous_metric_data - repo_dict.update(repo.metric_data) - repo_metric_data = json.dumps(repo_dict, indent=4) - - - with open(path, "w+", encoding="utf-8") as file: - file.write(repo_metric_data) diff --git a/scripts/gen_graphs.py b/scripts/gen_graphs.py deleted file mode 100644 index dcd03f7b2a..0000000000 --- a/scripts/gen_graphs.py +++ /dev/null @@ -1,617 +0,0 @@ -""" -Module to define methods to create pygals graphs -""" -import datetime -from datetime import timedelta -import re -import pygal - -def percent_formatter(x): - """ - Function to format percentage values. - - Arguments: - x: Value to format into a percent - Returns: - A string containing the formatted version of x - """ - - return '{:0.2f}%'.format(x) - -def timedelta_formatter(x): - """ - Function to format percentage values. - - Arguments: - x: Value to format into days - Returns: - A string containing the formatted version of x - """ - - return '{} days'.format(x.days) - -def ignore_formatter(x): - """ - Function to ignore values in formatting - - Arguments: - x: Value to ignore - Returns: - A string containing the formatted version of x - """ - - return '' - -def generate_all_graphs_for_repos(all_repos): - """ - Function to generate and save all graphs for the input - repos. - - Arguments: - all_repos: Repos to generate graphs for. - """ - for repo in all_repos: - print(f"Generating graphs for repo {repo.name}") - generate_solid_gauge_issue_graph(repo) - generate_repo_sparklines(repo) - generate_predominant_languages_graph(repo) - generate_language_summary_pie_chart(repo) - generate_cost_estimates_bar_chart(repo) - generate_time_estimates_bar_chart(repo) - generate_average_issue_resolution_graph(repo) - try: - generate_donut_graph_line_complexity_graph(repo) - generate_time_xy_issue_graph( - repo, "new_commit_contributors_by_day_over_last_month", "New Contributors" - ) - generate_time_xy_issue_graph( - repo, "new_commit_contributors_by_day_over_last_six_months", "New Contributors" - ) - except KeyError as e: - print(f"Could not find metrics to build graphs for repo {repo.name}") - print(e) - - try: - generate_libyears_graph(repo) - except KeyError: - print(f"Repository {repo.name} has no deps data associated with it!") - - try: - generate_dryness_percentage_graph(repo) - except ValueError as e: - print("Could not parse DRYness due to percentage values being invalid!") - print(e) - except KeyError as e: - print(f"Could not find metrics to build dryness graphs for repo {repo.name}") - print(e) - -def generate_all_graphs_for_orgs(all_orgs): - """ - Function to iterate through all orgs and generate graphs for each of them - - Arguments: - all_orgs: Orgs to generate graphs for. - """ - for org in all_orgs: - print(f"Generating graphs for org {org.name}") - generate_solid_gauge_issue_graph(org) - generate_time_xy_issue_graph(org, "new_issues_by_day_over_last_six_months", "New Issues") - generate_time_xy_issue_graph(org, "new_issues_by_day_over_last_month", "New Issues") - generate_top_committer_bar_graph(org) - - try: - generate_libyears_graph(org) - except KeyError: - print(f"Org {org.name} has no deps data associated with it!") - -def write_repo_chart_to_file(repo, chart, chart_name, custom_func=None, custom_func_params={}): - """ - This function's purpose is to save a pygals chart to a path derived from the - repository object passed in. - - Arguments: - repo: the Repository object that the chart is about - chart: the pygals chart object - chart_name: the name to save the chart as - custom_func: an optional custom function to render the pygals chart with - """ - - with open(repo.get_path_to_graph_data(chart_name), "wb+") as file: - try: - if not custom_func: - file.write(chart.render()) - else: - file.write(custom_func(**custom_func_params)) - except ZeroDivisionError: - print( - f"Repo {repo.name} has a division by zero error when trying to make graph") - # issues_gauge.render_to_file(repo.get_path_to_graph_data("issue_gauge")) - -def generate_repo_sparklines(repo): - """ - This function generates pygals sparklines graphs for a set of Repository objects. - - Arguments: - repos: the set of Repository objects - """ - chart = pygal.Line(interpolate='cubic') - chart.add('', list(repo.metric_data["commits_by_month"].values())) - chart.x_labels = list(repo.metric_data["commits_by_month"].keys()) - - # print("SPARKLINES") - # print(chart.render_sparkline()) - # I have to do this because sparklinees don't have their own subclass and instead - # are rendered through a special method of the Line object. - # TODO: file a pygals issue to make sparklines their own object - _kwargs_ = { - "show_x_labels": False, - "show_y_labels": True, - "margin": 10 - } - write_repo_chart_to_file( - repo, chart, "commit_sparklines", - custom_func=chart.render_sparkline, custom_func_params=_kwargs_) - - -def generate_time_xy_issue_graph(oss_entity,data_key,legend_key): - """ - This function generates pygals xy time graph for new issue creation over a time period. - - Arguments: - oss_entity: the OSSEntity to create a graph for - data_key: key of the dictionary to use to generate the time graph - """ - - graph_data_dict = oss_entity.metric_data[data_key] - - - date_series = [] - for record in graph_data_dict: - #datetime.datetime.fromisoformat(stamp.replace('Z', '+00:00')) - date_obj = datetime.datetime.fromisoformat(record[0].replace('Z', '+00:00')) - date_series.append((date_obj.strftime('%Y/%m/%d'),record[1])) - - xy_time_issue_chart = pygal.Line(x_label_rotation=20,legend_at_bottom=True,stroke=False) - xy_time_issue_chart.x_labels = [iter[0] for iter in date_series] - xy_time_issue_chart.add(legend_key, [iter[1] for iter in date_series]) - - write_repo_chart_to_file(oss_entity, xy_time_issue_chart, data_key) - -def generate_donut_graph_line_complexity_graph(oss_entity): - """ - This function generates pygals line complexity donut graph - for a set of Repository objects. - - Arguments: - oss_entity: The OSSEntity to create a graph for. an - OSSEntity is a data structure that is typically - a repository or an organization. - """ - - donut_lines_graph = pygal.Pie(inner_radius=0.65,legend_at_bottom=True) - donut_lines_graph.title = "Composition of Lines of Code" - - - num_blank_lines = oss_entity.metric_data['total_project_blank_lines'] - donut_lines_graph.add('Total Blank Lines', num_blank_lines) - - num_comment_lines = oss_entity.metric_data['total_project_comment_lines'] - donut_lines_graph.add('Total Comment Lines', num_comment_lines) - - num_total_lines = oss_entity.metric_data['total_project_lines'] - num_remaining_lines = (num_total_lines - num_comment_lines) - num_blank_lines - donut_lines_graph.add('Total Other Lines', num_remaining_lines) - - write_repo_chart_to_file(oss_entity, donut_lines_graph, "total_line_makeup") - -def generate_solid_gauge_issue_graph(oss_entity): - """ - This function generates pygals solid gauge issue/pr graphs for a set of Repository objects. - - Arguments: - oss_entity: the OSSEntity to create a graph for. - """ - - issues_gauge = pygal.SolidGauge(inner_radius=0.70, legend_at_bottom=True) - - issues_gauge.value_formatter = percent_formatter - - # Generate graph to measure percentage of issues that are open - try: - # calculate portion of issues that are open. - open_issue_percent = oss_entity.metric_data['open_issues_count'] / \ - oss_entity.metric_data['issues_count'] - except ZeroDivisionError: - open_issue_percent = 0 - except TypeError: - print("Repo has no issues") - return - - issues_gauge.add( - 'Open Issues', [{'value': open_issue_percent * 100, 'max_value': 100}]) - - try: - # calculate portion of pull requests that are open, merged, and closed - open_pr_percent = oss_entity.metric_data['open_pull_requests_count'] / \ - oss_entity.metric_data['pull_requests_count'] - merged_pr_percent = oss_entity.metric_data['merged_pull_requests_count'] / \ - oss_entity.metric_data['pull_requests_count'] - closed_pr_percent = oss_entity.metric_data['closed_pull_requests_count'] / \ - oss_entity.metric_data['pull_requests_count'] - except ZeroDivisionError: - open_pr_percent = 0 - merged_pr_percent = 0 - closed_pr_percent = 0 - - # Generate graph to measure portion of pull requests that are open - issues_gauge.add('Open Pull Requests', [ - {'value': open_pr_percent * 100, 'max_value': 100}]) - - # Generate graph to measure portion of pull requests that are merged or closed. - issues_gauge.add( - 'Closed and Merged Pull Requests', [ - {'label': "Merged Pull Requests", - 'value': merged_pr_percent * 100, 'max_value': 100}, - {'label': "Closed Pull Requests", 'value': closed_pr_percent * 100, 'max_value': 100}]) - - write_repo_chart_to_file(oss_entity, issues_gauge, "issue_gauge") - -def generate_top_committer_bar_graph(oss_entity): - """ - This function generates pygals -top committer by org- bar graph. - - Arguments: - oss_entity: the OSSEntity to create a graph for. - """ - - # Create a bar chart object - bar_chart = pygal.Bar() - bar_chart.title = f"Top Committers in {oss_entity.metric_data['name']}" - - top_committers = oss_entity.metric_data['top_committers'] - contributor_count = 0 - - for committer, commits in top_committers: - if "dependabot" in committer or committer == "actions@github.com": - continue - if contributor_count == 5: - break - bar_chart.add(committer, commits) - contributor_count += 1 - - write_repo_chart_to_file(oss_entity, bar_chart, "top_committers") - -def generate_predominant_languages_graph(oss_entity): - """ - This function generates a pygal predominant programming languages guage graph. - - Arguments: - oss_entity: the OSSEntity to create a graph for. - """ - - bar_chart = pygal.Bar() - bar_chart.title = f"Predominant Languages in {oss_entity.metric_data['name']}" - - predominant_lang = oss_entity.metric_data['predominant_langs'] - - for lang, lines in predominant_lang.items(): - bar_chart.add(lang, lines) - - write_repo_chart_to_file(oss_entity, bar_chart, "predominant_langs") - -def parse_libyear_list(dependency_list): - """ - Parses the dependency list returned from the libyear metric into a list of python dictionaries - that have correctly parsed dates. - - Arguments: - dependency_list: the list of lists that has the deps data - - Returns: - A list of dictionaries describing deps - """ - - to_return = [] - for dep in dependency_list: - - #print(dep) - if dep[-2] >= 0: - date = datetime.datetime.strptime(dep[-1], '%Y-%m-%dT%H:%M:%S.%f') - - dep_dict = { - "dep_name": dep[-3], - "libyear_value": dep[-2], - "libyear_date_last_updated": date - } - - if len(dep) > 3: - dep_dict['repo_name'] = dep[0] - else: - dep_dict['repo_name'] = '' - - to_return.append( - dep_dict - ) - - #return list sorted by date - return sorted(to_return, key=lambda d : d["libyear_value"],reverse=True) - - -def generate_libyears_graph(oss_entity): - """ - Generates a pygal graph to describe libyear metrics for the requested oss_entity - - Arguments: - oss_entity: the OSSEntity to create a libyears graph for. - """ - - try: - raw_dep_list = oss_entity.metric_data['repo_dependency_libyear_list'] - except KeyError: - raw_dep_list = oss_entity.metric_data['dependency_libyear_list'] - - if not raw_dep_list: - return - - #This is going to be kind of hacky since pygals doesn't have a - #timeline object - #TODO: Contribute upstream to add a timeline object to pygal - dateline = pygal.TimeDeltaLine(x_label_rotation=25,legend_at_bottom=True) - dateline.x_value_formatter = timedelta_formatter - dateline.value_formatter = ignore_formatter - - - dep_list = parse_libyear_list(raw_dep_list) - total_libyears_ood = sum(n['libyear_value'] for n in dep_list) - - dateline.title = f"""Dependency Libyears: Age of Dependency Version - Total Libyears: {round(total_libyears_ood,1)}""" - - #We are going to treat the y-axis as having one dep per level in the graph - elevation = 0 - for dep in dep_list: - - label = f"{dep['dep_name']}/{dep['repo_name']}" - - dateline.add(label, [ - (timedelta(), elevation), - (timedelta(days=dep["libyear_value"] * 365), elevation), - ]) - - #move one line up so that we have no overlap in the timedeltas - elevation += 1 - - if elevation >= 40: - break - - dateline.show_y_labels = False - write_repo_chart_to_file(oss_entity, dateline, "libyear_timeline") - -def parse_cocomo_dryness_metrics(dryness_string): - """ - This function parses the output of the scc dryness metrics. - - For some reason, ULOC, SLOC, and DRYness don't show up in the json and - only show up in the stdout text. - - Arguments: - dryness_string: the string containing the dryness table to parse - - Returns: - A dictionary with the unique lines of code and DRYness percentage - """ - - dryness_metrics = {} - - #Parse output line by line - for line in dryness_string.split('\n'): - #Parse the parts that we want into fields - if 'Unique Lines of Code' in line: - #Use regex to remove all non-numerals from the string - dryness_metrics['total_uloc'] = re.sub('[^0-9.]','',line) - if 'DRYness' in line: - #Use regex to remove all non-numerals from the string - dryness_metrics['DRYness_percentage'] = re.sub('[^0-9.]','',line) - - return dryness_metrics - -def generate_dryness_percentage_graph(oss_entity): - """ - This function generates a pygal DRYness pie graph. - - DRYness = ULOC / SLOC - - WETness = 1 - DRYness - - DRY = Don't repeat yourself - WET = Waste Everybody's time or Write Everything Twice - """ - - dryness_values = parse_cocomo_dryness_metrics( - oss_entity.metric_data["cocomo"]['dryness_table'] - ) - - sloc = (float(dryness_values['total_uloc']) / float(dryness_values['DRYness_percentage'])) - sloc_diff = sloc - float(dryness_values['total_uloc']) - sloc_percent = (sloc_diff / sloc) * 100 - - uloc_percent = (float(dryness_values['total_uloc']) / sloc) * 100 - - pie_chart = pygal.Pie(half_pie=True, legend_at_bottom=True) - pie_chart.value_formatter = percent_formatter - pie_chart.title = 'DRYness Percentage Graph' - - #print(dryness_values) - - pie_chart.add( - 'Unique Lines of Code (ULOC) %', uloc_percent - ) - - #Will cause a value error if the dryness value is NaN which can happen. - pie_chart.add( - 'Source Lines of Code (SLOC) %', - #sloc = uloc / DRYness - sloc_percent - ) - - write_repo_chart_to_file(oss_entity, pie_chart, "DRYness") - - -def generate_language_summary_pie_chart(oss_entity): - """ - This function generates a pygal pie chart for programming languages - and total lines written in each language. - - The total LoC is displayed in the chart's title. - - Arguments: - oss_entity: the OSSEntity to create a graph for. - """ - - pie_chart = pygal.Pie() - - language_summary = oss_entity.metric_data.get('cocomo', {}).get('languageSummary') - if not language_summary: - print("No valid 'languageSummary' found in the data.") - return - - total_loc = sum(entry.get('Code', 0) for entry in language_summary) - - pie_chart.title = f'Language Summary \n Total Source Lines of Code (SLOC): {total_loc:,}' - - pie_chart.value_formatter = lambda x: f'{x} SLOC' - - for entry in language_summary: - code_lines = entry.get('Code', 0) - pie_chart.add(entry['Name'], code_lines) - - write_repo_chart_to_file(oss_entity, pie_chart, "language_summary") - - -def generate_cost_estimates_bar_chart(oss_entity): - """ - This function generates a pygal bar chart for estimated costs - with rounded values and a dollar sign. - - Arguments: - oss_entity: the OSSEntity to create a graph for. - """ - - bar_chart = pygal.Bar(legend_at_bottom=True) - - if oss_entity.metric_data is not None: - metric_data = oss_entity.metric_data.get('cocomo', {}) - estimated_cost_low = float(metric_data.get('estimatedCost_low', 0) or 0.0) - estimated_cost_high = float(metric_data.get('estimatedCost_high', 0) or 0.0) - else: - estimated_cost_low = 0.0 - estimated_cost_high = 0.0 - - bar_chart.value_formatter = lambda x: f'${x:,.2f}' - - average_cost = (estimated_cost_low + - estimated_cost_high) / 2 - - bar_chart.title = f'Estimated Project Costs in $ From Constructive Cost Model (COCOMO) \n Average Cost: ${average_cost:,.2f}' - - bar_chart.add(f'Estimated Cost Low (${estimated_cost_low:,.2f})', - estimated_cost_low) - bar_chart.add(f'Estimated Cost High (${estimated_cost_high:,.2f})', - estimated_cost_high) - - write_repo_chart_to_file(oss_entity, bar_chart, "estimated_project_costs") - - -def generate_time_estimates_bar_chart(oss_entity): - """ - This function generates a pygal bar chart for estimated time - of project in months rounded to the nearest tenth. - - estimatedScheduleMonths_low is used for time. - - Arguments: - oss_entity: the OSSEntity to create a graph for. - """ - - bar_chart = pygal.Bar(legend_at_bottom=True) - - if oss_entity.metric_data is not None: - metric_data = oss_entity.metric_data.get('cocomo', {}) - estimated_schedule_months_low = metric_data.get('estimatedScheduleMonths_low', 0) - else: - estimated_schedule_months_low = 0 - - formatted_estimated_months = float(estimated_schedule_months_low or 0.0) - - bar_chart.value_formatter = lambda x: f'{x:,.1f} mos' - - bar_chart.title = 'Estimated Project Time in Months From Constructive Cost Model (COCOMO)' - - bar_chart.add(None, [0]) - bar_chart.add(f'Estimated Time ({formatted_estimated_months:,.1f} mos)', - estimated_schedule_months_low) - bar_chart.add(None, [0]) - - write_repo_chart_to_file(oss_entity, bar_chart, "estimated_project_time") - - -def generate_people_estimate_bar_chart(oss_entity): - """ - This function generates a pygal bar chart for estimated people - working on the project rounded to the nearest integer. - - estimatedPeople_low is used for contributors. - - Arguments: - oss_entity: the OSSEntity to create a graph for. - """ - - bar_chart = pygal.Bar(legend_at_bottom=True) - - if oss_entity.metric_data is not None: - metric_data = oss_entity.metric_data.get('cocomo', {}) - estimated_people_low = metric_data.get('estimatedPeople_low', 0) - else: - estimated_people_low = 0 - - bar_chart.value_formatter = lambda x: f'{x:,.0f} ppl' - - bar_chart.title = 'Estimated Individual Project Contributors From Constructive Cost Model (COCOMO)' - - bar_chart.add(None, [0]) - bar_chart.add(f'Estimated Contributors ({estimated_people_low:,.0f} ppl)', estimated_people_low) - bar_chart.add(None, [0]) - - write_repo_chart_to_file(oss_entity, bar_chart, "estimated_people_contributing") - -def generate_average_issue_resolution_graph(oss_entity): - """ - This function generates a pygal gauge chart for average issue resolution time. - - Arguments: - oss_entity: An object containing the metric data. - """ - gauge_graph = pygal.Gauge(legend_at_bottom=True) - - metric_data = oss_entity.metric_data.get('average_issue_resolution_time') - if not metric_data or not metric_data[0]: - print("No data available for average issue resolution time") - return - - data = metric_data[0] - repo_name = data[0] - average_time_str = data[1] - - if "days" in average_time_str: - days_str = average_time_str.split(' days ') - days = int(days_str[0]) - else: - print("Average issue resolution time is less than a day") - return - - gauge_graph.range = [0, round((days + 20))] - - gauge_graph.title = f"Average Issue Resolution Time for {repo_name} \n Average Time: {round(days)} days" - gauge_graph.add("Days", round(days)) - - write_repo_chart_to_file(oss_entity, gauge_graph, "average_issue_resolution_time") diff --git a/scripts/gen_reports.py b/scripts/gen_reports.py deleted file mode 100644 index ba5549acf3..0000000000 --- a/scripts/gen_reports.py +++ /dev/null @@ -1,231 +0,0 @@ -""" -Module to define methods to create reports -""" -from datetime import date -from metricsLib.constants import REPO_REPORT_TEMPLATE, ORG_REPORT_TEMPLATE, DesiredReportBehavior - - -def calc_percent_difference(latest, prev): - """ - This function calculates the percent difference between - two numbers - - Arguments: - latest: float - new number - prev: float - old number to compare to new number - - Returns: - Float between 0 and 100 corresponding to the percent - difference. - """ - - abs_diff = abs(latest - prev) - - try: - dec = abs_diff/((latest + prev)/2) - except ZeroDivisionError: - dec = 0 - - return dec * 100 - -def round_to_significant_figures(n, significant_figures): - """ - Returns the input rounded to the desired number of significant figures - - Arguments: - n: number - - significant_figures: int - The number of significant figures to round to. - - Returns: - String of the number formatted to the desired number of significant figures. - """ - - formatted = '{:g}'.format(float('{:.{p}g}'.format(n, p=significant_figures))) - - return formatted - - -def get_heading_report_values(headings, oss_entity): - """ - Generates a dictionary of statistics for each 'heading' where - a heading is a type of data point. i.e. commits_diff versus pull_request_count_diff. - - Arguments: - headings: dictionary - dictionary of data point types i.e. 'commits' with desired behavior - oss_entity: OssEntity - Data structure representing the entity that the data corresponds to - - Returns: - A dictionary of statistics with many keys for each heading. - """ - - report_values = {} - for heading, behavior in headings.items(): - prev_record = oss_entity.metric_data[heading] - - if heading in oss_entity.previous_metric_data.keys(): - prev_record = oss_entity.previous_metric_data[heading] - if prev_record is None: - #Cast None to 0 for diff calc - prev_record = 0 - - next_record = oss_entity.metric_data[heading] - if oss_entity.metric_data[heading] is None: - next_record = 0 - - percent_difference = calc_percent_difference( - next_record, prev_record) - - raw_diff = next_record - prev_record - - diff_color = '' - - - # Truth Table: - # - # +--------------+--------------------------------------+---+-------------------+ - # | raw_diff > 0 | DesiredReportBehavior.VALUE_INCREASE | _ | Result | - # +--------------+--------------------------------------+---+-------------------+ - # | 0 | 1 | _ | Red if negative | - # | 1 | 0 | _ | Red if negative | - # | 1 | 1 | | Green | - # | 0 | 0 | - | Green if negative | - # +--------------+--------------------------------------+---+-------------------+ - - - and_conditional=((raw_diff>0) and (behavior == DesiredReportBehavior.VALUE_INCREASE.value)) - #Use a XOR by using the != operator - if (raw_diff > 0) != (behavior == DesiredReportBehavior.VALUE_INCREASE.value): - if raw_diff < 0: - # Red color - diff_color = 'color: #d31c08' - elif and_conditional or ((raw_diff < 0) and not and_conditional): - # Green color - diff_color = 'color: #45c527' - - report_values.update({ - f"latest_{heading}": oss_entity.metric_data[heading], - f"previous_{heading}": prev_record, - f"{heading}_diff": raw_diff, - f"{heading}_diff_percent": round_to_significant_figures(percent_difference, 2), - f"{heading}_diff_color": diff_color, - f"{heading}_diff_percent_color": diff_color - }) - - return report_values - - -def write_report_to_file(report_template, report_values, oss_entity): - """ - Writes a report markdown file to disc after formatting the values provided through - a python dictionary. - - Arguments: - report_template: str - String that contains unformatted text for the markdown report - report_values: dict - Dictionary that contains values to format the text with - oss_entity: OssEntity - Oss entity that the report corresponds to the report - """ - raw_report = report_template.format(**report_values) - with open(oss_entity.get_path_to_report_data(), "w+", encoding="utf-8") as file: - file.write(raw_report) - - -def generate_org_report_files(orgs): - """ - Generate reports for orgs - - Arguments: - orgs: collection - List of orgs to generate reports for - """ - - for org in orgs: - print(f"Generating report for org {org.name}") - - report_values = { - "date_stamp": date.today(), - "repo_owner": org.login - } - - #Define headings as key value pairs where - # key -> value = heading -> desired_behavior - # - # DesiredReportBehavior.VALUE_INCREASE means you want the value to go up - org_metric_table_headings = { - 'commits_count': DesiredReportBehavior.VALUE_INCREASE.value, - 'issues_count': DesiredReportBehavior.VALUE_INCREASE.value, - 'open_issues_count': DesiredReportBehavior.VALUE_DECREASE.value, - 'closed_issues_count': DesiredReportBehavior.VALUE_INCREASE.value, - 'pull_requests_count': DesiredReportBehavior.VALUE_INCREASE.value, - 'open_pull_requests_count': DesiredReportBehavior.VALUE_DECREASE.value, - 'merged_pull_requests_count': DesiredReportBehavior.VALUE_INCREASE.value, - 'closed_pull_requests_count': DesiredReportBehavior.VALUE_DECREASE.value, - 'forks_count': DesiredReportBehavior.VALUE_INCREASE.value, - 'stargazers_count': DesiredReportBehavior.VALUE_INCREASE.value, - 'watchers_count': DesiredReportBehavior.VALUE_INCREASE.value, - 'followers_count': DesiredReportBehavior.VALUE_INCREASE.value - } - - report_values.update(get_heading_report_values( - org_metric_table_headings, org)) - write_report_to_file(ORG_REPORT_TEMPLATE, report_values, org) - - -def generate_repo_report_files(repos): - """ - This function generates reports for each repo and writes them to file. - - Arguments: - repos: - list of repositories to generate reports for. - """ - for repo in repos: - print(f"Generating repo report for repo {repo.name}") - # Create a dictionary of values to calculate for the report - - - badge_name = repo.metric_data.get('nadia_badge_name') - badge_color = repo.metric_data.get('nadia_color') - - if not badge_name or not badge_color: - badge_color = "#ff0000" - badge_name = "Unknown" - - report_values = { - "date_stamp": date.today(), - "repo_owner": repo.repo_owner, - "repo_name": repo.name, - "repo_nadia_badge_name": badge_name, - "repo_nadia_badge_color": badge_color - } - - #Define headings as key value pairs where - # key -> value = heading -> desired_behavior - # - # DesiredReportBehavior.VALUE_INCREASE means you want the value to go up - metric_table_headings = { - 'commits_count': DesiredReportBehavior.VALUE_INCREASE.value, - 'issues_count': DesiredReportBehavior.VALUE_INCREASE.value, - 'open_issues_count': DesiredReportBehavior.VALUE_DECREASE.value, - 'closed_issues_count': DesiredReportBehavior.VALUE_INCREASE.value, - 'pull_requests_count': DesiredReportBehavior.VALUE_INCREASE.value, - 'open_pull_requests_count': DesiredReportBehavior.VALUE_DECREASE.value, - 'merged_pull_requests_count': DesiredReportBehavior.VALUE_INCREASE.value, - 'closed_pull_requests_count': DesiredReportBehavior.VALUE_DECREASE.value, - 'forks_count': DesiredReportBehavior.VALUE_INCREASE.value, - 'stargazers_count': DesiredReportBehavior.VALUE_INCREASE.value, - 'watchers_count': DesiredReportBehavior.VALUE_INCREASE.value - } - - report_values.update(get_heading_report_values( - metric_table_headings, repo)) - - write_report_to_file(REPO_REPORT_TEMPLATE, report_values, repo) diff --git a/scripts/metricsLib/__init__.py b/scripts/metricsLib/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/scripts/metricsLib/metrics_data_structures.py b/scripts/metricsLib/metrics_data_structures.py deleted file mode 100644 index c2d67fcbb9..0000000000 --- a/scripts/metricsLib/metrics_data_structures.py +++ /dev/null @@ -1,576 +0,0 @@ -""" -Module to define classes of metrics that gather data given parameters -""" -import json -from json.decoder import JSONDecodeError -import datetime -from time import sleep, mktime, gmtime, time, localtime -from functools import reduce -import operator -import requests -from metricsLib.constants import TIMEOUT_IN_SECONDS, GH_GQL_ENDPOINT, REQUEST_RETRIES - -# Simple metric that can be represented by a count or value. - - -class BaseMetric: - """ - This serves as the base class to define a metric. - A metric accepts parameters and returns data with the - get_values method. - - Url format should be in the vein of - 'https://api.github.com/repos/{owner}/{repo}/issues?state=all' - then url.format(**data) - - ... - - Attributes - ---------- - name : str - name of the metric - needed_parameters : dict - Dictionary holding parameters that are needed to hit a metric - endpoint_url : str - Api endpoint to use to get information - return_values : dict - Mapping dict to define the json info to store and the format to store it in - token: str - Api token to use for auth - method: str - Request method, GET or POST - - Methods - ------- - hit_metric(params={}): - Format url with parameters and fetch the data from it - get_values(params={}): - Fetch data from url using parameters and format the data - before returning it. - """ - - def __init__(self, name, needed_params, endpoint_url, return_values, token=None, method='GET'): - self.name = name - self.return_values = return_values - self.url = endpoint_url - - self.needed_parameters = needed_params - self.method = method - if token: - self.headers = {"Authorization": f"bearer {token}"} - else: - self.headers = None - - def hit_metric(self, params=None): - """ - Format the url with parameters and fetch the data from it. - - Args: - params: dict - Dictionary of parameters to apply to endpoint. - """ - request_params = params - endpoint_to_hit = self.url - - if params and len(params) > 0 and self.method == 'GET': - endpoint_to_hit = self.url.format(**params) - request_params = None - - attempts = 0 - - while attempts < REQUEST_RETRIES: - if self.headers: - _args_ = (self.method, endpoint_to_hit) - _kwargs_ = { - "params": request_params, - "headers": self.headers, - "timeout": TIMEOUT_IN_SECONDS - } - response = requests.request(*_args_, **_kwargs_) - else: - response = requests.request( - self.method, endpoint_to_hit, params=request_params, timeout=TIMEOUT_IN_SECONDS) - - try: - if response.status_code == 200: - response_json = json.loads(response.text) - break - elif response.status_code in (403,429): - #rate limit was triggered. - wait_until = int(response.headers.get("x-ratelimit-reset")) - wait_in_seconds = int( - mktime(gmtime(wait_until)) - - mktime(gmtime(time())) - ) - wait_until_time = localtime(wait_until) - - print(f"Ran into rate limit sleeping for {self.name}!") - print( - f"sleeping until {wait_until_time.tm_hour}:{wait_until_time.tm_min} ({wait_in_seconds} seconds)" - ) - sleep(wait_in_seconds) - - response_json = {} - attempts += 1 - - if attempts >= REQUEST_RETRIES: - raise ConnectionError( - f"Rate limit was reached and couldn't be rectified after {attempts} tries" - ) - else: - raise ConnectionError(f"Non valid status code {response.status_code}!") - except JSONDecodeError: - response_json = {} - attempts += 1 - - return response_json - - def get_values(self, params=None): - """ - Fetch data from url using parameters and format the data - before returning it. - - Args: - params: dict - Dictionary of parameters to apply to endpoint. - """ - metric_json = self.hit_metric(params=params) - to_return = {} - - for return_label, api_label in self.return_values.items(): - try: - to_return[return_label] = metric_json[api_label] - except KeyError as e: - print(f"KeyError found when trying to map return values: {e}") - print( - f"Map throws KeyError when trying to parse return values for metric {self.name}" - ) - - return to_return - - -class ResourceMetric(BaseMetric): - """ - Class to define a metric that gets data from an endpoint that returns data - that isn't supposed to be parsed through like a png image or a svg graph. - - Attributes - ---------- - name : str - Filename to save the resource as. - - format: str - File format to use - - Methods - ------- - hit_metric(params={}): - Fetch data from url using parameters - - get_values(repo,params={}): - Fetch data and save it in desired path and format - """ - - def __init__(self, name, needed_params, url, fmt='png', token=None): - super().__init__(name, needed_params, url, {}, token=token) - self.format = fmt - - def hit_metric(self, params=None): - """ - Format the url with parameters and fetch the data from it. - - Args: - params: dict - Dictionary of parameters to apply to endpoint. - """ - - endpoint_to_hit = self.url - request_params = params - if params and len(params) > 0 and self.method == 'GET': - endpoint_to_hit = self.url.format(**params) - request_params = None - - if self.headers: - _args_ = (self.method, endpoint_to_hit) - _kwargs_ = { - "params": request_params, - "headers": self.headers, - "timeout": TIMEOUT_IN_SECONDS, - "stream": True - } - response = requests.request(*_args_, **_kwargs_) - else: - response = requests.request( - self.method, endpoint_to_hit, params=request_params, timeout=TIMEOUT_IN_SECONDS) - # return response - return response - - def get_values(self,params=None, oss_entity=None): - - r = self.hit_metric(params=params) - - path = oss_entity.get_path_to_resource_data(self.name, fmt=self.format) - - if r.status_code == 200: - errtext = "There is no data for this repo, in the database you are accessing" - if r.text == errtext: - print(errtext) - return {} - - with open(path, "wb+") as f: - f.write(r.content) - - print(f"Path: {path}") - else: - print(f"Status code: {r.status_code}") - return {} - - -class GraphQLMetric(BaseMetric): - """ - Class to define a metric that gets data from a graphql endpoint. - - The format of the return_values var also uses lists to extract the - desired value. - EX: - { - commits_count: ["defaultBranchRef","commits","history","totalCount"] - } - - ... - - Attributes - ---------- - query : str - String that corresponds to the relevant graphql query - - Methods - ------- - get_values(params={}): - Fetch data from url using parameters and format the data - before returning it. - """ - # Return value is a dict of lists of strings that match to the keys of the dict. - - def __init__(self, name, needed_params, query, return_vals, token=None, url=GH_GQL_ENDPOINT): - super().__init__(name, needed_params, url, return_vals, token=token) - self.query = query - - def get_values(self, params=None): - """ - Fetch data from url using parameters and format the data - before returning it. - - Args: - params: dict - Dictionary of parameters to apply to endpoint. - """ - json_dict = { - 'query': self.query - } - - #print(params) - - # If there are bind variables bind them to the query here. - if params: - - json_dict['variables'] = params - json_dict['variables'] = json_dict['variables'] - #print(json_dict['variables']) - - if self.headers: - response = requests.post( - self.url, headers=self.headers, json=json_dict, timeout=TIMEOUT_IN_SECONDS) - else: - response = requests.post( - self.url, json=json_dict, timeout=TIMEOUT_IN_SECONDS) - - response_json = json.loads(response.text) - - to_return = {} - - if "data" not in response_json.keys(): - if "message" not in response_json.keys(): - raise requests.exceptions.InvalidJSONError( - response_json['errors'][0]['message']) - - raise requests.exceptions.InvalidJSONError( - response_json['message']) - - # print(f"Response_JSON: {response_json}") - # print(f"Return values: {self.return_values}") - for val, key_sequence in self.return_values.items(): - # Extract the nested data and store it in a flat dict to return to the user - - try: - to_return[val] = reduce( - operator.getitem, key_sequence, response_json) - except TypeError as e: - print(f"Ran into error for {val} " + - f"when parsing data for repo {self.name}!: \n\n {e}\n\n") - to_return[val] = None - - return to_return - - -class LengthMetric(BaseMetric): - """ - Class to define a metric that returns the length of a returned list - from an endpoint - ... - - Methods - ------- - get_values(params={}): - Fetch data from url using parameters, format and sum the data - before returning it. - """ - - def __init__(self, name, needed_params, endpoint_url, return_val, token=None, method='GET'): - super().__init__(name, needed_params, endpoint_url, - return_val, token=token, method=method) - - def get_values(self, params=None): - return {self.return_values: len(self.hit_metric(params=params))} - - -class ListMetric(BaseMetric): - """ - Class to define a metric that returns a returned list - from an endpoint - ... - - Methods - ------- - get_values(params={}): - Fetch data from url using parameters, format and sum the data - before returning it. - """ - - def __init__(self, name, needed_params, endpoint_url, return_values, token=None, method='GET'): - super().__init__(name, needed_params, endpoint_url, - return_values, token=token, method=method) - - self.tuple_flag = True - - def get_values(self, params=None): - metric_json = self.hit_metric(params=params) - - to_return = {} - - #print(f"URL: {self.url}") - for return_label, api_label in self.return_values.items(): - # Allow for multiple keys of each returned element to be stored. - # EX: storing the date and count of each time the amount of followers - # increased. - try: - #Only continue if the api_label is a list - if type(api_label) != list: - raise TypeError - list(api_label) - - # initialize each label as an empty list - to_return[return_label] = [] - - for item in metric_json: - - # extract each key in returned json and add to sublist - elem = [] - for sub_label in api_label: - elem.append(item[sub_label]) - #print(elem) - # Add up sublists and assign to return label key - if not self.tuple_flag: - to_return[return_label].extend(elem) - else: - to_return[return_label].append(elem) - except TypeError: - # return_label key is assigned to list of extracted api_label value - to_return[return_label] = [item[api_label] - for item in metric_json] - - return to_return - - -class RangeMetric(ListMetric): - """ - Class to define a metric that returns the sum of a returned list - from an endpoint - ... - - Methods - ------- - get_values(params={}): - Fetch data from url using parameters, format and sum the data - before returning it. - """ - - def __init__(self, name, needed_params, endpoint_url, return_values, token=None, method='GET'): - super().__init__(name, needed_params, endpoint_url, - return_values, token=token, method=method) - - self.tuple_flag = False - - def get_values(self, params=None): - """ - Fetch data from url using parameters and format the data - before returning it. - - Sums up the result lists of ListMetric's get_values method - and returns - - Args: - params: dict - Dictionary of parameters to apply to endpoint. - - Returns: - Dictionary containing the desired values in the requested mapping - """ - - return_dict = super().get_values(params=params) # self.hit_metric(params=params) - - to_return = {} - - print(return_dict) - for return_label, _ in return_dict.items(): - to_return[return_label] = sum(return_dict[return_label]) - - return to_return - - -class CustomMetric(BaseMetric): - """ - Class to define a metric that is parsed in a custom way defined - by a function that takes the metric_json returned by the endpoint as - an argument. - - ... - - Methods - ------- - get_values(params={}): - Fetch data from url using parameters, format and sum the data - before returning it. Using the custom parsing function passed in. - """ - - def __init__(self, name, needed_parameters, endpoint_url, func, token=None, method='GET'): - super().__init__(name, needed_parameters, - endpoint_url, None, token=token, method=method) - self.parse_function = func - - def get_values(self, params=None): - metric_json = self.hit_metric(params=params) - - return self.parse_function(metric_json=metric_json, return_values=self.return_values) - - -# Custom parse functions -def parse_nadia_label_into_badge(**kwargs): - """ - Parse the json returned by the augur nadia badging - endpoint and return a url to the appropriate badge - - Args: - kwargs: dict - Keyword arguments used by the parsing function. - - Returns: - Dictionary containing the url of the badge - """ - - metric_json = kwargs['metric_json'] - - try: - badge_name = metric_json[0]['nadia_badge_level'] - except KeyError: - return {} - - - color_map = { - "club" : "ff69b4", - "toy" : "0000ff", - "stadium": "ffa500", - "federation": "66ff00" - } - - color = color_map.get(badge_name) - - if not color: - color = "ff0000" - badge_name = "midsize" - - url = f"https://img.shields.io/static/v1?label=project+type&message={badge_name}&color={color}" - - #return the url for the website to link to rather than waste time and space downloading - # the svg tag and saving it - return { - "nadia_shields_badge_url": url, - "nadia_color": color, - "nadia_badge_name": badge_name - } - -def parse_commits_by_month(**kwargs): - """ - Parse the raw json returned by the commits endpoint into - a dictionary that groups commit counts for a repository by - month. - - Args: - kwargs: dict - Keyword arguments used by the parsing function, - - Returns: - Dictionary containing the desired values in the requested mapping - """ - - metric_json = kwargs['metric_json'] - - commits_by_month = {} - # print(metric_json) - - # print(metric_json) - for commit in metric_json: - # Get the month and year of the commit - try: - datetime_str = commit['commit']['author']['date'] - except TypeError: - print(commit) - continue - date_obj = datetime.datetime.strptime( - datetime_str, '%Y-%m-%dT%H:%M:%SZ') - month = f"{date_obj.year}/{date_obj.month}" - # print(month) - - # Add up the commits for each month and return - if commits_by_month.get(month): - commits_by_month[month] += 1 - else: - commits_by_month[month] = 1 - - return {"commits_by_month": commits_by_month} - -class LanguageMetric(BaseMetric): - """ - Class to fetch and process language data for a GitHub repository. - ... - - This class overrides the get_values method to handle the GitHub API's - language endpoint, which returns a dictionary of languages and their - byte counts. It calculates the percentage of each language based on - the total bytes of code in the repository. - - Attributes - ---------- - Inherits all attributes from BaseMetric. - - Methods - ---------- - get_values(params=None): Fetches language data and calculates - percentages. - """ - def __init__(self, name, params, url, token=None): - super().__init__(name, params, url, {}, token) - - def get_values(self, params=None): - predom_langs_data = self.hit_metric(params=params) - return {"predominant_langs": predom_langs_data} diff --git a/scripts/metricsLib/metrics_definitions.py b/scripts/metricsLib/metrics_definitions.py deleted file mode 100644 index 67331f702d..0000000000 --- a/scripts/metricsLib/metrics_definitions.py +++ /dev/null @@ -1,262 +0,0 @@ -""" -Definitions of specific metrics for metricsLib -""" -from metricsLib.metrics_data_structures import CustomMetric, parse_commits_by_month, RangeMetric -from metricsLib.metrics_data_structures import GraphQLMetric, LengthMetric, ResourceMetric, BaseMetric -from metricsLib.metrics_data_structures import ListMetric, parse_nadia_label_into_badge -from metricsLib.metrics_data_structures import BaseMetric, LanguageMetric -from metricsLib.constants import TOKEN, AUGUR_HOST - -# The general procedure is to execute all metrics against all repos and orgs - -SIMPLE_METRICS = [] - -# Weekly, monthly metrics. -PERIODIC_METRICS = [] - -# Classification metrics -ADVANCED_METRICS = [] - -# Metrics gathered by org instead of by repo -ORG_METRICS = [] - -# Metrics that save a resource to a file -RESOURCE_METRICS = [] - -# Predominant Languages Endpoint (ex. https://api.github.com/repos/chaoss/augur/languages) -LANGUAGE_ENDPOINT = "https://api.github.com/repos/{owner}/{repo}/languages" - -REPO_GITHUB_GRAPHQL_QUERY = """ -query ($repo: String!, $owner: String!) { - repository(name: $repo, owner: $owner) { - description, - forkCount, - forkingAllowed, - stargazerCount, - createdAt, - - pullRequests(first: 1) - { - totalCount - }, - mergedPullRequests: pullRequests(first: 1, states: MERGED) - { - totalCount - }, - closedPullRequests: pullRequests(first: 1, states: CLOSED) - { - totalCount - }, - openPullRequests: pullRequests(first: 1, states: OPEN) - { - totalCount - }, - issues(first: 1) - { - totalCount - }, - openIssues: issues(first: 1, states: OPEN) - { - totalCount - }, - closedIssues: issues(first: 1, states: CLOSED) - { - totalCount - }, - watchers(first: 1) - { - totalCount - } - defaultBranchRef - { - name, - target - { - ... on Commit - { - history(first: 1) - { - totalCount - } - - } - } - } - } -} -""" - - -github_graphql_simple_counts_metric_map = { - "description": ["data", "repository", "description"], - "commits_count": ["data", "repository", "defaultBranchRef", "target", "history", "totalCount"], - "issues_count": ["data", "repository", "issues", "totalCount"], - "open_issues_count": ["data", "repository", "openIssues", "totalCount"], - "closed_issues_count": ["data", "repository", "closedIssues", "totalCount"], - "pull_requests_count": ["data", "repository", "pullRequests", "totalCount"], - "open_pull_requests_count": ["data", "repository", "openPullRequests", "totalCount"], - "merged_pull_requests_count": ["data", "repository", "mergedPullRequests", "totalCount"], - "closed_pull_requests_count": ["data", "repository", "closedPullRequests", "totalCount"], - "forks_count": ["data", "repository", "forkCount"], - "stargazers_count": ["data", "repository", "stargazerCount"], - "watchers_count": ["data", "repository", "watchers", "totalCount"], - "created_at": ["data", "repository", "createdAt"] -} -SIMPLE_METRICS.append(GraphQLMetric("githubGraphqlSimpleCounts", ["repo", "owner"], - REPO_GITHUB_GRAPHQL_QUERY, - github_graphql_simple_counts_metric_map, token=TOKEN)) - -SIMPLE_METRICS.append(RangeMetric("totalRepoLines",["repo_id"], AUGUR_HOST + - "/complexity/project_lines?repo_id={repo_id}", - {"total_project_lines": ["total_lines"], - "average_project_lines": ["average_lines"]})) - -SIMPLE_METRICS.append(RangeMetric("totalRepoCommentLines",["repo_id"], AUGUR_HOST + - "/complexity/project_comment_lines?repo_id={repo_id}", - {"total_project_comment_lines": ["comment_lines"], - "average_project_comment_lines": ["avg_comment_lines"]})) - -SIMPLE_METRICS.append(RangeMetric("totalRepoBlankLines",["repo_id"], AUGUR_HOST + - "/complexity/project_blank_lines?repo_id={repo_id}", - {"total_project_blank_lines": ["blank_lines"], - "average_blank_lines": ["avg_blank_lines"]})) - -SIMPLE_METRICS.append(LanguageMetric("repositoryLanguages", - ["owner", "repo"], - LANGUAGE_ENDPOINT, - token=TOKEN)) - -SIMPLE_METRICS.append(GraphQLMetric("githubGraphqlSimpleCounts", ["repo", "owner"], - REPO_GITHUB_GRAPHQL_QUERY, - github_graphql_simple_counts_metric_map, token=TOKEN)) - -REPOMETRICS_ENDPOINT = "https://raw.githubusercontent.com/{owner}/{repo}/main/code.json" -repometrics_values = {"projectType": "projectType", "userInput": "userInput", "fismaLevel": "fismaLevel", - "group": "group", "subsetInHealthcare": "subsetInHealthcare", "userType": "userType", - "repositoryHost": "repositoryHost", "maturityModelTier": "maturityModelTier"} -SIMPLE_METRICS.append(BaseMetric("repometrics", ['owner', 'repo'], REPOMETRICS_ENDPOINT, repometrics_values, token=TOKEN)) - -ORG_METRICS.append(ListMetric("topCommitters", ["repo_group_id"], - AUGUR_HOST + - "/repo-groups/{repo_group_id}/top-committers", - {"top_committers": ["email", "commits"]})) - -ORG_METRICS.append(ListMetric("orgLibyears", ["repo_group_id"], - AUGUR_HOST + - "/repo-groups/{repo_group_id}/libyear", - {"dependency_libyear_list": [ - "repo_name", "name","libyear","most_recent_collection" - ] - })) - - -CONTRIBS_LABEL_LAST_MONTH = "new_commit_contributors_by_day_over_last_month" -PERIODIC_METRICS.append(ListMetric("newContributorsofCommitsWeekly", - ["repo_id", "period", "begin_week", "end_date"], - AUGUR_HOST + "/repos/{repo_id}" + - "/pull-requests-merge-contributor-new" + - "?period={period}&begin_date={begin_week}&end_date={end_date}", - { - CONTRIBS_LABEL_LAST_MONTH: ["commit_date", "count"] - })) - -sixMonthsParams = ["repo_id", "period", "begin_month", "end_date"] -LABEL = "new_commit_contributors_by_day_over_last_six_months" -PERIODIC_METRICS.append(ListMetric("newContributorsofCommitsMonthly", sixMonthsParams, - AUGUR_HOST + - "/repos/{repo_id}/pull-requests-merge-contributor-new" + - "?period={period}&begin_date={begin_month}&end_date={end_date}", - {LABEL: ["commit_date", "count"]})) - -PERIODIC_METRICS.append(ListMetric("issuesNewWeekly", ["repo_id","period","begin_week","end_date"], - AUGUR_HOST + - "/repos/{repo_id}/issues-new" + - "?period={period}&begin_date={begin_week}&end_date={end_date}", - {"new_issues_by_day_over_last_month": ["date", "issues"]})) - -PERIODIC_METRICS.append(ListMetric("issuesNewMonthly", sixMonthsParams, - AUGUR_HOST + - "/repos/{repo_id}/issues-new?" + - "period={period}&begin_date={begin_month}&end_date={end_date}", - {"new_issues_by_day_over_last_six_months": ["date", "issues"]})) - -RESOURCE_METRICS.append(ResourceMetric("firstResponseForClosedPR", sixMonthsParams, - AUGUR_HOST + "/pull_request_reports/PR_time_to_first_response/" + - "?repo_id={repo_id}&start_date={begin_month}&end_date={end_date}")) - -ORG_GITHUB_GRAPHQL_QUERY = """ -query ($org_login: String!) { - organization(login: $org_login) { - createdAt, - avatarUrl, - description, - email, - isVerified, - location, - twitterUsername - repositories(first: 1) - { - totalCount - } - } -} -""" -ORG_METRICS.append(GraphQLMetric("githubGraphqlOrgSimple", ["org_login"], ORG_GITHUB_GRAPHQL_QUERY, - {"timestampCreatedAt": ["data", "organization", "createdAt"], - "avatar_url": ["data", "organization", "avatarUrl"], - "description": ["data", "organization", "description"], - "email": ["data", "organization", "email"], - "is_verified": ["data", "organization", "isVerified"], - "location": ["data", "organization", "location"], - "twitter_username": ["data", "organization", "twitterUsername"], - "repo_count": ["data","organization","repositories","totalCount"] - }, token=TOKEN)) - -FOLLOWERS_ENDPOINT = "https://api.github.com/users/{org_login}/followers" -ORG_METRICS.append( - LengthMetric("orgFollowers", ["org_login"], - FOLLOWERS_ENDPOINT, "followers_count", token=TOKEN) -) - -ORG_METRICS.append(ListMetric("issueNewWeekly", ["repo_group_id","period","begin_week","end_date"], - AUGUR_HOST + - "/repo-groups/{repo_group_id}/issues-new" + - "?period={period}&begin_date={begin_week}&end_date={end_date}", - {"new_issues_by_day_over_last_month": ["date", "issues"]})) - -ORG_METRICS.append(ListMetric("issueNewMonthly",["repo_group_id","period","begin_month","end_date"], - AUGUR_HOST + - "/repo-groups/{repo_group_id}/issues-new" + - "?period={period}&begin_date={begin_month}&end_date={end_date}", - {"new_issues_by_day_over_last_six_months": ["date", "issues"]})) - -COMMITS_ENDPOINT = "https://api.github.com/repos/{owner}/{repo}/commits" -SIMPLE_METRICS.append(CustomMetric("getCommitsByMonth", [ - 'owner', 'repo'], COMMITS_ENDPOINT, parse_commits_by_month, token=TOKEN)) - - -NADIA_ENDPOINT = AUGUR_HOST + "/repos/{repo_id}/nadia-project-labeling-badge/" -ADVANCED_METRICS.append(CustomMetric("getNadiaBadgeURL",[ - "repo_id"],NADIA_ENDPOINT, parse_nadia_label_into_badge)) - -REPO_LIBYEAR_ENDPOINT = AUGUR_HOST + "/repo-groups/{repo_group_id}/repos/{repo_id}/libyear" -ADVANCED_METRICS.append(ListMetric( - "repoLibyears", - ["repo_group_id","repo_id"], - REPO_LIBYEAR_ENDPOINT, - { - "repo_dependency_libyear_list" : [ - "name","libyear","most_recent_collection" - ] - } - ) -) - -SIMPLE_METRICS.append(ListMetric("averageIssueResolutionTime", sixMonthsParams, AUGUR_HOST + "/repos/" + "{repo_id}" + "/average-issue-resolution-time", {"average_issue_resolution_time": ["repo_name", "avg_issue_resolution_time"]})) - -# Metric for Average Commit Counts per PR -# TODO: - Currently not working because of something wrong on Augur's end. Develop a solution here (hacky) or fix upstream. - -# RESOURCE_METRICS.append(ResourceMetric("averageCommitsPerPR", sixMonthsParams, -# AUGUR_HOST + "/pull_request_reports/average_commits_per_PR/" + -# "?repo_id={repo_id}&start_date={begin_month}&end_date={end_date}")) diff --git a/scripts/metricsLib/oss_metric_entities.py b/scripts/metricsLib/oss_metric_entities.py deleted file mode 100644 index 0925056f45..0000000000 --- a/scripts/metricsLib/oss_metric_entities.py +++ /dev/null @@ -1,454 +0,0 @@ -""" -Module that defines objects to model oss metric entities. i.e. objects that -store data and have methods concerning the concept of entities that we would -like to gather metric data for. -""" -import re -import json -import os -import datetime -import pathlib -import requests -from requests.exceptions import ReadTimeout -from metricsLib.constants import PATH_TO_METRICS_DATA, PATH_TO_REPORTS_DATA, AUGUR_HOST -from metricsLib.constants import TIMEOUT_IN_SECONDS, PATH_TO_GRAPHS_DATA - - -def get_repo_owner_and_name(repo_http_url): - """ Gets the owner and repo from a url. - - Args: - url: Github url - - Returns: - Tuple of owner and repo. Or a tuple of None and None if the url is invalid. - """ - - # Regular expression to parse a GitHub URL into two groups - # The first group contains the owner of the github repo extracted from the url - # The second group contains the name of the github repo extracted from the url - # 'But what is a regular expression?' ----> https://docs.python.org/3/howto/regex.html - regex = r"https?:\/\/github\.com\/([A-Za-z0-9 \- _]+)\/([A-Za-z0-9 \- _ \.]+)(.git)?\/?$" - result = re.search(regex, repo_http_url) - - if not result: - return None, None - - capturing_groups = result.groups() - - owner = capturing_groups[0] - repo = capturing_groups[1] - - return owner, repo - - -def get_timebox_timestamps(): - """ - Gets timeboxed timestamps for the time the - function was ran. - - Returns: - Dictionary of key timestamps and the desired period - for metrics - """ - # Get timeboxed metrics - today = datetime.date.today() - week_ago = today - datetime.timedelta(weeks=4) - month_ago = today - datetime.timedelta(weeks=24) - - # Perpare params for weekly timebox - periodic_params = { - "period": "day", - "end_date": today.strftime('%Y/%m/%d'), - "begin_week": week_ago.strftime('%Y/%m/%d'), - "begin_month": month_ago.strftime('%Y/%m/%d') - } - - return periodic_params - - -class OSSEntity: - """ - This serves as the base class to define an OSSEntity. An OSSEntity is an - object that represents some open source thing that we want to get - information about. For example a Github Repository - - ... - - Attributes - ---------- - name : str - name of the entity - augur_endpoint : str - endpoint to use to connect to the corresponding object in the augur db - needed_parameters : dict - Dictionary holding parameters that are needed to hit a metric - metric_data: dict - The dictionary that actually stores data returned by metrics - previous_metric_data: dict - The dictionary that stores the previous data from the previous metric JSON - - Methods - ------- - store_metrics(info={}): - Alias to update the metric_data dict with metric data - get_parameters_for_metric(metric): - Get a sub directory of the needed_parameters dict that only holds the parameters - needed by a metric - apply_metric_and_store_data(metric): - Pass needed parameters into a metric, hit the metric, and then store the result in - the metric_data dict. - """ - - def __init__(self, name, augur_endpoint): - self.name = name - self.augur_util_endpoint = augur_endpoint - - self.needed_parameters = {} - self.metric_data = {} - self.previous_metric_data = {} - - def store_metrics(self, info): - """ - Alias to update the metric data dict with metric data. - - Args: - info: dict - Dictionary containing the metric to update the - metric data with. - """ - self.metric_data.update(info) - - # TODO: should this logic be moved to the hit_metric method? - def get_parameters_for_metric(self, metric): - """ - Get a sub directory of the needed_parameters dict that only holds the parameters - needed by a metric - - Args: - metric: BaseMetric - - Returns: - Dictionary containing the parameters needed for the given metric - """ - params = {} - - # get the parameter for this metric - for param in metric.needed_parameters: - params[param] = self.needed_parameters[param] - - return params - - def apply_metric_and_store_data(self, metric, *args, **kwargs): - """ - Pass needed parameters into a metric, hit the metric, and then store the result in - the metric_data dict. - - Args: - metric: BaseMetric - """ - params = self.get_parameters_for_metric(metric) - - kwargs['params'] = params - - try: - self.store_metrics(metric.get_values(*args, **kwargs)) - except (TimeoutError, ReadTimeout) as e: - print(f"Timeout for repo {self.name} with metric {metric.name}") - print(f"Error: {e}") - except ConnectionError as e: - print(f"Connection error for repo {self.name} with metric {metric.name}") - print(f"Error: {e}") - - -class Repository(OSSEntity): - """ - This class serves to manage the parameter and metric data of a Repository. - It stores parameter and metric data in two seperate dictionaries for easy JSON - conversion. - - Repository's main purpose as a real python class is to encapsulate the mapping - of the db ids in augur to the repos we are trying to gather metrics for. - - ... - - Attributes - ---------- - url : str - url where the repository is hosted. - repo_owner : str - Org that owns the repo, also could be a User - repo_id : int - database id of the repo in Augur - repo_group_id: int - id for the org that owns the repo in Augur - - Methods - ------- - get_path_to_data(parent_path="",extension=""): - Returns the path to store data given extension - and parent path - get_path_to_json_data(): - Derive the path for json data using json parent - path and extension - get_path_to_report_data(): - Derive the path for markdown data using markdown - parent path and extension - get_path_to_graph_data(): - Derive the path for svg data using svg parent path - and extension - """ - - def __init__(self, repo_git_url, owner_id): - - self.url = repo_git_url - - owner, repo_name = get_repo_owner_and_name(self.url) - - self.repo_owner = owner - - #print(f"owner id: {owner_id}") - #print(repo_git_url) - - #if owner_id is None: - # endpoint = f"{AUGUR_HOST}/repos" - #else: - # endpoint = f"{AUGUR_HOST}/repo-groups/{owner_id}/repos" - endpoint = f"{AUGUR_HOST}/owner/{owner.lower()}/repo/{repo_name.lower()}" - super().__init__(repo_name, endpoint) - - response = requests.get( - self.augur_util_endpoint, timeout=TIMEOUT_IN_SECONDS) - response_json = json.loads(response.text) - - try: - print(endpoint) - print(response_json) - repo_val = response_json[0] - except (IndexError,KeyError): - repo_val = {} - - # print(f"!!!{repo_val}") - # for x in response_json: - # print(f"|{x['repo_name'].lower()}=={repo_name.lower()}|") - # print(repo_val) - self.repo_id = repo_val.get('repo_id') - - #print(f"repo id: {self.repo_id}") - if owner_id is not None: - self.repo_group_id = owner_id - else: - self.repo_group_id = repo_val.get('repo_group_id') - - - # print(f"BEGIN: {today.strftime('%Y/%m/%d')}") - # Prepare params - self.needed_parameters = { - "repo": self.name, - "owner": self.repo_owner, - "repo_id": self.repo_id, - "repo_group_id": self.repo_group_id - } - - self.needed_parameters.update(get_timebox_timestamps()) - - # Prepare dict of metric data. - self.metric_data = { - "url": self.url, - "owner": self.repo_owner, - "name": self.name - } - - self.previous_metric_data = {} - - def get_path_to_data(self, parent_path, extension): - """ - Returns the path to store data given extension - and parent path - - Args: - parent_path: parent path to store data - extension: File extension to use for data format - - Returns: - String path to data. - """ - data_path = os.path.join( - parent_path, f"{self.repo_owner}/{self.name}") - pathlib.Path(data_path).mkdir(parents=True, exist_ok=True) - - filename = f"{self.repo_owner}/{self.name}/{self.name}_data.{extension}" - return os.path.join(parent_path, filename) - - def get_path_to_json_data(self): - """ - Derive the path for json data using json parent - path and extension - - Returns: - String path to data. - """ - return self.get_path_to_data(PATH_TO_METRICS_DATA, "json") - - def get_path_to_report_data(self): - """ - Derive the path for markdown data using markdown - parent path and extension - - Returns: - String path to data. - """ - return self.get_path_to_data(PATH_TO_REPORTS_DATA, "md") - - def get_path_to_resource_data(self, resource_name, fmt="png"): - """ - Derive the path for resource data using svg - parent path and extension - - Returns: - String path to data. - """ - - id_str = f"{self.repo_owner}/{self.name}" - data_path = os.path.join(PATH_TO_GRAPHS_DATA, id_str) - pathlib.Path(data_path).mkdir(parents=True, exist_ok=True) - fname = f"{self.repo_owner}/{self.name}/{resource_name}_{self.name}_data.{fmt}" - return os.path.join(PATH_TO_GRAPHS_DATA, fname) - - def get_path_to_graph_data(self, graph_name): - """ - Derive the path for graph data using svg - parent path and extension - - Returns: - String path to data. - """ - - return self.get_path_to_resource_data(graph_name, fmt="svg") - - -class GithubOrg(OSSEntity): - """ - This class serves to manage the parameter and metric data of a GithubOrg. - It stores parameter and metric data in two seperate dictionaries for easy JSON - conversion. - - GithubOrg's main purpose as a real python class is to encapsulate the mapping - of db ids in CHAOSS/augur to the orgs we are trying to gather metrics for. - - ... - - Attributes - ---------- - login : str - login of the org - repo_group_id: int - id for the org that owns the repo in Augur - - Methods - ------- - get_path_to_json_data(): - Derive the path for json data using json parent - path and extension - """ - - def __init__(self, organization_login): - self.login = organization_login - - print(f"AUGUR_HOST: {AUGUR_HOST}") - super().__init__(self.login, f"{AUGUR_HOST}/repo-groups") - - try: - response = requests.get( - self.augur_util_endpoint, timeout=TIMEOUT_IN_SECONDS) - response_dict = json.loads(response.text) - except Exception: - print("It looks like Augur is down! Not able to get Augur data!") - response_dict = {} - - try: - print(self.login) - # Get the item in the list that matches the login of the github org - gen = (item for item in response_dict if item["rg_name"].lower() == self.login.lower()) - group_id = next(gen, None) - - self.repo_group_id = group_id['repo_group_id'] - except Exception: - self.repo_group_id = None - - self.needed_parameters = { - "org_login": self.login, - "repo_group_id": self.repo_group_id - } - print(self.needed_parameters) - - self.needed_parameters.update(get_timebox_timestamps()) - - self.metric_data = { - "login": self.login, - "name": self.name, - "rg_id": self.repo_group_id - } - - self.previous_metric_data = {} - - def get_path_to_data(self, super_parent_path, extension): - """ - Derive the path for data using parent - path and extension - - Returns: - String path to data. - """ - parent_path = os.path.join(super_parent_path, f"{self.login}") - pathlib.Path(parent_path).mkdir(parents=True, exist_ok=True) - org_path = os.path.join(parent_path, f"{self.login}_data.{extension}") - - return org_path - - def get_path_to_json_data(self): - """ - Derive the path for json data using json parent - path and extension - - Returns: - String path to data. - """ - return self.get_path_to_data(PATH_TO_METRICS_DATA, "json") - - def get_path_to_report_data(self): - """ - Derive the path for report data using parent - path and extension - - Returns: - String path to data. - """ - return self.get_path_to_data(PATH_TO_REPORTS_DATA, "md") - - def get_path_to_resource_data(self, resource_name, fmt="png"): - """ - Derive the path for graph data using parent - path and extension - - Returns: - String path to data. - """ - - parent_path = os.path.join(PATH_TO_GRAPHS_DATA, f"{self.login}") - pathlib.Path(parent_path).mkdir(parents=True, exist_ok=True) - fname = f"{self.login}_{resource_name}.{fmt}" - org_path = os.path.join(parent_path, fname) - - return org_path - - def get_path_to_graph_data(self, chart_name): - """ - Derive the path for graph data using parent - path and extension - - Returns: - String path to data. - """ - - return self.get_path_to_resource_data(chart_name, fmt="svg") diff --git a/scripts/refresh_graphs.py b/scripts/refresh_graphs.py index 8dd0e9bc6b..3de87c8bd5 100644 --- a/scripts/refresh_graphs.py +++ b/scripts/refresh_graphs.py @@ -2,19 +2,19 @@ Script to run all graph generation from existing data. """ import os -from fetch_public_metrics import parse_repos_and_orgs_into_objects -from fetch_public_metrics import parse_tracked_repos_file, read_current_metric_data -from gen_graphs import generate_all_graphs_for_repos, generate_all_graphs_for_orgs - +from metrics_dash_backend_tools import parse_repos_and_orgs_into_objects +from metrics_dash_backend_tools import parse_tracked_repos_file, read_current_metric_data +from metrics_dash_backend_tools import generate_all_graphs_for_repos, generate_all_graphs_for_orgs +from constants import PATH_TO_METADATA, PATH_TO_METRICS_DATA, PATH_TO_GRAPHS_DATA if __name__ == "__main__": os.umask(0) - orgs_urls, repo_urls = parse_tracked_repos_file() + orgs_urls, repo_urls = parse_tracked_repos_file(PATH_TO_METADATA) all_orgs, all_repos = parse_repos_and_orgs_into_objects(orgs_urls, repo_urls) - read_current_metric_data(all_repos,all_orgs) - generate_all_graphs_for_orgs(all_orgs) - generate_all_graphs_for_repos(all_repos) + read_current_metric_data(PATH_TO_METRICS_DATA, all_repos,all_orgs) + generate_all_graphs_for_orgs(PATH_TO_GRAPHS_DATA, all_orgs) + generate_all_graphs_for_repos(PATH_TO_GRAPHS_DATA, all_repos) diff --git a/scripts/refresh_metrics.py b/scripts/refresh_metrics.py index a205b50bce..8d0588c1ad 100644 --- a/scripts/refresh_metrics.py +++ b/scripts/refresh_metrics.py @@ -3,8 +3,9 @@ """ import os import argparse -from fetch_public_metrics import get_all_data, parse_repos_and_orgs_into_objects -from fetch_public_metrics import parse_tracked_repos_file, read_previous_metric_data +from metrics_dash_backend_tools import get_all_data, parse_repos_and_orgs_into_objects +from metrics_dash_backend_tools import parse_tracked_repos_file, read_previous_metric_data +from constants import PATH_TO_METADATA, PATH_TO_METRICS_DATA, PATH_TO_GRAPHS_DATA @@ -16,10 +17,10 @@ help='The GitHub Org to update data for.') args = parser.parse_args() - orgs_urls, repo_urls = parse_tracked_repos_file(args.org) + orgs_urls, repo_urls = parse_tracked_repos_file(PATH_TO_METADATA, org=args.org) all_orgs, all_repos = parse_repos_and_orgs_into_objects(orgs_urls, repo_urls) # Generate json data, report data, and graph data. - read_previous_metric_data(all_repos,all_orgs) - get_all_data(all_orgs, all_repos) + read_previous_metric_data(PATH_TO_METRICS_DATA,all_repos,all_orgs) + get_all_data(PATH_TO_METRICS_DATA, PATH_TO_GRAPHS_DATA,all_orgs, all_repos) diff --git a/scripts/refresh_reports.py b/scripts/refresh_reports.py index bf2d2aaced..c6aa0ab0f0 100644 --- a/scripts/refresh_reports.py +++ b/scripts/refresh_reports.py @@ -2,19 +2,23 @@ Script to run all report generation from existing data. """ import os -from fetch_public_metrics import parse_repos_and_orgs_into_objects -from fetch_public_metrics import parse_tracked_repos_file, read_current_metric_data -from gen_reports import generate_repo_report_files, generate_org_report_files +from metrics_dash_backend_tools import parse_repos_and_orgs_into_objects +from metrics_dash_backend_tools import parse_tracked_repos_file, read_current_metric_data +from metrics_dash_backend_tools import generate_repo_report_files, generate_org_report_files +from constants import PATH_TO_METADATA, PATH_TO_METRICS_DATA, PATH_TO_REPORTS_DATA, PATH_TO_TEMPLATES if __name__ == "__main__": os.umask(0) - orgs_urls, repo_urls = parse_tracked_repos_file() + repo_template_path = os.path.join(PATH_TO_TEMPLATES, "repo_report_template.md") + org_template_path = os.path.join(PATH_TO_TEMPLATES, "org_report_template.md") + + orgs_urls, repo_urls = parse_tracked_repos_file(PATH_TO_METADATA) all_orgs, all_repos = parse_repos_and_orgs_into_objects(orgs_urls, repo_urls) - read_current_metric_data(all_repos,all_orgs) - generate_repo_report_files(all_repos) - generate_org_report_files(all_orgs) + read_current_metric_data(PATH_TO_METRICS_DATA,all_repos,all_orgs) + generate_repo_report_files(PATH_TO_REPORTS_DATA, repo_template_path, all_repos) + generate_org_report_files(PATH_TO_REPORTS_DATA, org_template_path, all_orgs)