Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 7 additions & 12 deletions .github/workflows/regenerate_dashboard.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
name: Regenerate dashboard webpages

on:
# schedule:
# - cron: '*/10 * * * *' # Runs every 10 minutes, for now. Github allows at most "every 5 minutes"
schedule:
- cron: '*/5 * * * *' # Runs every 5 minutes, the fastest Github allows
workflow_dispatch: # Allows manual execution

jobs:
Expand All @@ -15,24 +15,19 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
sparse-checkout: 'processed_data/open_pr_data.json'
sparse-checkout-cone-mode: false

- name: Set up environment
run: |
sudo apt-get update
sudo apt-get install -y jq curl gh python3 python3-pip
sudo apt-get install -y python3 python3-pip
pip3 install python-dateutil

- name: "Download .json files for all open PRs"
id: "download-json"
env:
GH_TOKEN: ${{ github.token }}
run: |
bash ./dashboard.sh

- name: "Regenerate the dashboard webpages"
run: |
python3 ./dashboard.py "all-open-PRs-1.json" "all-open-PRs-2.json" > ./index-old.html
rm *.json
python3 ./dashboard.py

- name: Commit and push changes
run: |
Expand Down
40 changes: 0 additions & 40 deletions .github/workflows/update_metadata.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,43 +74,3 @@ jobs:
# FIXME: make this more robust about incoming changes
# The other workflow does not push to this branch, so this should be fine.
git push

- name: "Regenerate the dashboard webpages"
if: ${{ !cancelled() }} && (steps.download-json.outcome == 'success')
run: |
python3 ./dashboard.py "all-open-PRs-1.json" "all-open-PRs-2.json" > ./index-old.html
rm *.json

- name: Commit and push changes
if: ${{ !cancelled() }} && (steps.download-json.outcome == 'success')
run: |
git config --local user.email "action@github.com"
git config --local user.name "GitHub Action"
# Temporarily rename the generated HTML files
# to avoid a name collision with existing files on the gh-pages branch.
mv index.html tmp.html
mv index-old.html tmp1.html
mv on_the_queue.html tmp2.html
mv review_dashboard.html tmp3.html
mv maintainers_quick.html tmp4.html
mv help_out.html tmp5.html
mv triage.html tmp6.html
git fetch --depth 1 origin gh-pages
git checkout gh-pages
mv tmp.html index.html
mv tmp1.html index-old.html
mv tmp2.html on_the_queue.html
mv tmp3.html review_dashboard.html
mv tmp4.html maintainers_quick.html
mv tmp5.html help_out.html
mv tmp6.html triage.html
git add index.html
git add index-old.html
git add on_the_queue.html
git add review_dashboard.html
git add maintainers_quick.html
git add help_out.html
git add triage.html
git checkout master -- style.css
git commit -m "Update dashboard" -a || echo "No changes to commit"
git push
2 changes: 1 addition & 1 deletion ARCHITECTURE.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ There are several levels at which this project can be tested. Currently, there a

TODO: there are more generated HTML files now; recommend copying the folder instead...
- changes to just `dashboard.py` can be tested using the JSON files in the `test` directory: run the following from the `test` directory.
`python3 ../dashboard.py all-open-PRs-1.json all-open-PRs-2.json`.
`python3 ../dashboard.py`.
This creates two webpages named `on_the_queue.html` and `index.html`, overwriting any previous files named thus.
You can run this command before and after your changes and compare the resulting files (using `diff` or a similar tool). Because of the overwriting, take care to copy e.g. the old version of the output to a different file before running the tool again.
(The output file needs to be in the top-level directory in order for the styling to work.)
Expand Down
26 changes: 6 additions & 20 deletions compute_dashboard_prs.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,9 @@ class AggregatePRInfo(NamedTuple):
first_on_queue: Tuple[DataStatus, datetime | None] | None
total_queue_time: TotalQueueTime | None

# Missing aggregate information will be replaced by this default item.
PLACEHOLDER_AGGREGATE_INFO = AggregatePRInfo(
False, CIStatus.Missing, "master", "leanprover-community", "open", datetime.now(timezone.utc),
"unknown", "unknown title", [], -1, -1, -1, [], [], None, None, None, None,
)
@staticmethod
def toBasicPRInformation(self, number: int) -> BasicPRInformation:
return BasicPRInformation(number, self.author, self.title, infer_pr_url(number), self.labels, self.last_updated)


# Parse the contents |data| of an aggregate json file into a dictionary pr number -> AggregatePRInfo.
Expand Down Expand Up @@ -195,7 +193,6 @@ def toLabel(name: str) -> Label:
# (`BasicPRInformation` is not hashable, hence cannot be used as a dictionary key.)
# 'aggregate_info' contains aggregate information about each PR's CI state,
# draft status and base branch (and more, which we do not use).
# If no detailed information was available for a given PR number, 'None' is returned.
def compute_pr_statusses(aggregate_info: dict[int, AggregatePRInfo], prs: List[BasicPRInformation]) -> dict[int, PRStatus]:
def determine_status(aggregate_info: AggregatePRInfo) -> PRStatus:
# Ignore all "other" labels, which are not relevant for this anyway.
Expand All @@ -204,7 +201,7 @@ def determine_status(aggregate_info: AggregatePRInfo) -> PRStatus:
state = PRState(labels, aggregate_info.CI_status, aggregate_info.is_draft, from_fork)
return determine_PR_status(datetime.now(timezone.utc), state)

return {info.number: determine_status(aggregate_info[info.number] or PLACEHOLDER_AGGREGATE_INFO) for info in prs}
return {info.number: determine_status(aggregate_info[info.number]) for info in prs}


# Does a PR have a given label?
Expand Down Expand Up @@ -391,16 +388,13 @@ def has_topic_label(pr: BasicPRInformation) -> bool:
return (with_bad_title, prs_without_topic_label, prs_with_contradictory_labels)


# use_aggregate_queue: if True, determine the review queue (and everything depending on it)
# from the aggregate data and not queue.json
def determine_pr_dashboards(
all_open_prs: List[BasicPRInformation],
nondraft_PRs: List[BasicPRInformation],
base_branch: dict[int, str],
prs_from_fork: List[BasicPRInformation],
CI_status: dict[int, CIStatus],
aggregate_info: dict[int, AggregatePRInfo],
use_aggregate_queue: bool,
aggregate_info: dict[int, AggregatePRInfo]
) -> dict[Dashboard, List[BasicPRInformation]]:
approved = [pr for pr in nondraft_PRs if aggregate_info[pr.number].approvals]
prs_to_list: dict[Dashboard, List[BasicPRInformation]] = dict()
Expand Down Expand Up @@ -445,15 +439,7 @@ def determine_pr_dashboards(
foo = [pr for pr in interesting_CI if base_branch[pr.number] == "master" and pr not in prs_from_fork]
prs_to_list[Dashboard.InessentialCIFails] = prs_without_any_label(foo, other_labels + ["merge-conflict"])

queue_prs2 = None
with open("queue.json", "r") as queuefile:
queue_prs2 = _extract_prs(json.load(queuefile))
queue_pr_numbers2 = [pr.number for pr in queue_prs2]
msg = "comparing this page's review dashboard (left) with the Github #queue (right)"
if my_assert_eq(msg, [pr.number for pr in queue_prs], queue_pr_numbers2):
print("Review dashboard and #queue match, hooray!", file=sys.stderr)

prs_to_list[Dashboard.Queue] = queue_prs if use_aggregate_queue else queue_prs2
prs_to_list[Dashboard.Queue] = queue_prs
queue = prs_to_list[Dashboard.Queue]
prs_to_list[Dashboard.QueueNewContributor] = prs_with_label(queue, "new-contributor")
prs_to_list[Dashboard.QueueEasy] = prs_with_label(queue, "easy")
Expand Down
44 changes: 11 additions & 33 deletions dashboard.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
#!/usr/bin/env python3

# This script accepts json files as command line arguments and displays the data in an HTML dashboard.
# It assumes that for each PR N which should appear in some dashboard,
# there is a file N.json in the `data` directory, which contains all necessary detailed information about that PR.
# This script uses the aggregate information in processed_data/all_open_prs.json
# to generate several HTML webpages on an HTML dashboard.

import json
import sys
Expand All @@ -16,7 +15,7 @@
from ci_status import CIStatus
from classify_pr_state import PRState, PRStatus
from compute_dashboard_prs import (AggregatePRInfo, BasicPRInformation, Label, DataStatus, LastStatusChange, TotalQueueTime,
PLACEHOLDER_AGGREGATE_INFO, compute_pr_statusses, determine_pr_dashboards, infer_pr_url, link_to, parse_aggregate_file, gather_pr_statistics, _extract_prs)
compute_pr_statusses, determine_pr_dashboards, infer_pr_url, link_to, parse_aggregate_file, gather_pr_statistics, _extract_prs)
from mathlib_dashboards import Dashboard, short_description, long_description, getIdTitle
from util import my_assert_eq, format_delta, timedelta_tryParse, relativedelta_tryParse

Expand All @@ -27,26 +26,14 @@
class JSONInputData(NamedTuple):
# All aggregate information stored for every open PR.
aggregate_info: dict[int, AggregatePRInfo]
# Information about all open PRs
all_open_prs: List[BasicPRInformation]


# Validate the command-line arguments and try to read all data passed in via JSON files.
# Any number of JSON files passed in is fine; we interpret them all as containing open PRs.
def read_json_files() -> JSONInputData:
if len(sys.argv) == 1:
print("error: need to pass in some JSON files with open PRs")
sys.exit(1)
all_open_prs = []
for i in range(1, len(sys.argv)):
with open(sys.argv[i]) as prfile:
open_prs = _extract_prs(json.load(prfile))
if len(open_prs) >= 900:
print(f"warning: file {sys.argv[i]} contains at least 900 PRs: the REST API will never return more than 1000 PRs. Please split the list into more files as necessary.", file=sys.stderr)
all_open_prs.extend(open_prs)
with open(path.join("processed_data", "open_pr_data.json"), "r") as f:
aggregate_info = parse_aggregate_file(json.load(f))
return JSONInputData(aggregate_info, all_open_prs)
return JSONInputData(aggregate_info)


### Helper methods: writing HTML code for various parts of the generated webpage ###
Expand Down Expand Up @@ -866,18 +853,11 @@ def main() -> None:
# Populate basic information from the input data: splitting into draft and non-draft PRs
# (mostly, we only use the latter); extract separate dictionaries for CI status and base branch.

# NB. We handle missing metadata by adding "default" values for its aggregate data
# (ready for review, open, against master, failing CI and just updated now).
aggregate_info = input_data.aggregate_info.copy()
for pr in input_data.all_open_prs:
if pr.number not in input_data.aggregate_info:
print(f"warning: found no aggregate information for PR {pr.number}; filling in defaults", file=sys.stderr)
aggregate_info[pr.number] = PLACEHOLDER_AGGREGATE_INFO
draft_PRs = [pr for pr in input_data.all_open_prs if aggregate_info[pr.number].is_draft]
nondraft_PRs = [pr for pr in input_data.all_open_prs if not aggregate_info[pr.number].is_draft]

# The only exception is for the "on the queue" page,
# which points out missing information explicitly, hence is passed the non-filled in data.
aggregate_info = input_data.aggregate_info
all_open_prs = [AggregatePRInfo.toBasicPRInformation(data, number) for (number, data) in aggregate_info.items()]
draft_PRs = [pr for pr in all_open_prs if aggregate_info[pr.number].is_draft]
nondraft_PRs = [pr for pr in all_open_prs if not aggregate_info[pr.number].is_draft]

CI_status: dict[int, CIStatus] = dict()
for pr in nondraft_PRs:
if pr.number in input_data.aggregate_info:
Expand All @@ -888,12 +868,10 @@ def main() -> None:
for pr in nondraft_PRs:
base_branch[pr.number] = aggregate_info[pr.number].base_branch
prs_from_fork = [pr for pr in nondraft_PRs if aggregate_info[pr.number].head_repo != "leanprover-community"]
all_pr_status = compute_pr_statusses(aggregate_info, input_data.all_open_prs)
all_pr_status = compute_pr_statusses(aggregate_info, all_open_prs)
write_on_the_queue_page(all_pr_status, aggregate_info, nondraft_PRs, prs_from_fork, CI_status, base_branch)

# TODO: try to enable |use_aggregate_queue| 'queue_prs' again, once all the root causes
# for PRs getting 'dropped' by 'gather_stats.sh' are found and fixed.
prs_to_list = determine_pr_dashboards(input_data.all_open_prs, nondraft_PRs, base_branch, prs_from_fork, CI_status, aggregate_info, False)
prs_to_list = determine_pr_dashboards(all_open_prs, nondraft_PRs, base_branch, prs_from_fork, CI_status, aggregate_info)

# FUTURE: can this time be displayed in the local time zone of the user viewing this page?
updated = datetime.now(timezone.utc).strftime("%B %d, %Y at %H:%M UTC")
Expand Down
2 changes: 1 addition & 1 deletion docs/Overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ This is not difficult, but requires changes in multiple stages.
1. Edit `process.py`, change the `get_aggregate_data` function to extract the data you want.
Pay attention to the fact that there is "basic" and "full" PR info; not all information might be available in both files.
2. Run `process.py` locally to update the generated file; commit the changes (except for the changed time-stamp). This step is optional; you can also just push the previous step and let CI perform this update.
3. Once step 2 is complete, edit the definition of `AggregatePRInfo` in `dashboard.py` to include your new data field. Update `PLACEHOLDER_AGGREGATE_INFO` as well. Update `read_json_files` to parse this field as well.
3. Once step 2 is complete, edit the definition of `AggregatePRInfo` in `dashboard.py` to include your new data field. Update `read_json_files` to parse this field as well.
4. Congratulations, now you have made some new metadata available to the dashboard processing. (For making use of this, see the previous bullet point for changing the dashboard.)


Expand Down
4 changes: 1 addition & 3 deletions generate_assigment_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,10 @@

# Assumes the aggregate data is correct: no cross-filling in of placeholder data.
def compute_pr_list_from_aggregate_data_only(aggregate_data: dict[int, AggregatePRInfo]) -> dict[Dashboard, List[BasicPRInformation]]:
all_open_prs: List[BasicPRInformation] = []
nondraft_PRs: List[BasicPRInformation] = []
for number, data in aggregate_data.items():
if data.state == "open":
info = BasicPRInformation(number, data.author, data.title, infer_pr_url(number), data.labels, data.last_updated)
all_open_prs.append(info)
if not data.is_draft:
nondraft_PRs.append(info)
CI_status: dict[int, CIStatus] = dict()
Expand All @@ -54,7 +52,7 @@ def compute_pr_list_from_aggregate_data_only(aggregate_data: dict[int, Aggregate
for pr in nondraft_PRs:
base_branch[pr.number] = aggregate_data[pr.number].base_branch
prs_from_fork = [pr for pr in nondraft_PRs if aggregate_data[pr.number].head_repo != "leanprover-community"]
return determine_pr_dashboards(all_open_prs, nondraft_PRs, base_branch, prs_from_fork, CI_status, aggregate_data, True)
return determine_pr_dashboards(all_open_prs, nondraft_PRs, base_branch, prs_from_fork, CI_status, aggregate_data)


class ReviewerInfo(NamedTuple):
Expand Down