Skip to content

Commit 17536fd

Browse files
authored
[BE][drci] Async fetch PR info during reorganizeWorkflows for speed (#6380)
Not sure if github is good enough at showing the diff here since the highlighting what is indentation is helpful for seeing what actually changed. My vscode changed files is more helpful Instead of doing a synchronous for loop, use async to reduce the time needed to wait for api calls. This reduces the time this function takes from ~45s -> 8s (obviously depends on how many PRs, but I assume this is pretty representative of calling drci on pytorch/pytorch during the workday). Waiting for the 45s is kind of annoying when trying to test changes to dr ci I tested that the result of this didn't change by comparing the results of of the output of the function
1 parent e01c392 commit 17536fd

File tree

1 file changed

+56
-59
lines changed

1 file changed

+56
-59
lines changed

torchci/pages/api/drci/drci.ts

+56-59
Original file line numberDiff line numberDiff line change
@@ -1110,69 +1110,63 @@ export async function reorganizeWorkflows(
11101110
workflow.name,
11111111
])
11121112
);
1113-
const workflowsByPR: Map<number, PRandJobs> = new Map();
1114-
const headShaTimestamps: Map<string, string> = new Map();
1115-
1116-
for (const workflow of dedupedRecentWorkflows) {
1117-
const prNumber = workflow.pr_number;
1118-
if (!workflowsByPR.has(prNumber)) {
1119-
let headShaTimestamp = workflow.head_sha_timestamp;
1120-
// NB: The head SHA timestamp is currently used as the end date when
1121-
// searching for similar failures. However, it's not available on CH for
1122-
// commits from forked PRs before a ciflow ref is pushed. In such case,
1123-
// the head SHA timestamp will be undefined and we will make an additional
1124-
// query to GitHub to get the value
1125-
if (octokit && isTime0(headShaTimestamp)) {
1126-
headShaTimestamp = await fetchCommitTimestamp(
1127-
octokit,
1128-
owner,
1129-
repo,
1130-
workflow.head_sha
1131-
);
1132-
headShaTimestamps.set(workflow.head_sha, headShaTimestamp);
1133-
}
1134-
1135-
let prTitle = "";
1136-
let prBody = "";
1137-
let prShas: { sha: string; title: string }[] = [];
1138-
// Gate this to PyTorch as disabled tests feature is only available there
1139-
if (octokit && repo === "pytorch") {
1140-
const prData = await fetchPR(owner, repo, `${prNumber}`, octokit);
1141-
prTitle = prData.title;
1142-
prBody = prData.body;
1143-
prShas = prData.shas;
1144-
}
1145-
1146-
workflowsByPR.set(prNumber, {
1147-
pr_number: prNumber,
1148-
head_sha: workflow.head_sha,
1149-
head_sha_timestamp: headShaTimestamp,
1150-
jobs: [],
1151-
merge_base: "",
1152-
merge_base_date: "",
1153-
owner: owner,
1154-
repo: repo,
1155-
title: prTitle,
1156-
body: prBody,
1157-
shas: prShas,
1158-
});
1159-
}
1113+
const workflowsByPR: PRandJobs[] = await Promise.all(
1114+
_(dedupedRecentWorkflows)
1115+
.groupBy("pr_number")
1116+
.map(async (workflows, prNumber) => {
1117+
// NB: The head SHA timestamp is currently used as the end date when
1118+
// searching for similar failures. However, it's not available on CH for
1119+
// commits from forked PRs before a ciflow ref is pushed. In such case,
1120+
// the head SHA timestamp will be undefined and we will make an additional
1121+
// query to GitHub to get the value
1122+
let headShaTimestamp = workflows.find(
1123+
(workflow) => !isTime0(workflow.head_sha_timestamp)
1124+
)?.head_sha_timestamp;
1125+
if (octokit && headShaTimestamp === undefined) {
1126+
headShaTimestamp = await fetchCommitTimestamp(
1127+
octokit,
1128+
owner,
1129+
repo,
1130+
workflows[0].head_sha
1131+
);
1132+
}
1133+
workflows.forEach((workflow) => {
1134+
if (isTime0(workflow.head_sha_timestamp) && headShaTimestamp) {
1135+
workflow.head_sha_timestamp = headShaTimestamp;
1136+
}
1137+
});
11601138

1161-
const headShaTimestamp = headShaTimestamps.get(workflow.head_sha);
1162-
if (
1163-
isTime0(workflow.head_sha_timestamp) &&
1164-
headShaTimestamp &&
1165-
!isTime0(headShaTimestamp)
1166-
) {
1167-
workflow.head_sha_timestamp = headShaTimestamp;
1168-
}
1139+
let prTitle = "";
1140+
let prBody = "";
1141+
let prShas: { sha: string; title: string }[] = [];
1142+
// Gate this to PyTorch as disabled tests feature is only available there
1143+
if (octokit && repo === "pytorch") {
1144+
const prData = await fetchPR(owner, repo, `${prNumber}`, octokit);
1145+
prTitle = prData.title;
1146+
prBody = prData.body;
1147+
prShas = prData.shas;
1148+
}
11691149

1170-
workflowsByPR.get(prNumber)!.jobs.push(workflow);
1171-
}
1150+
return {
1151+
pr_number: parseInt(prNumber),
1152+
head_sha: workflows[0].head_sha,
1153+
head_sha_timestamp: headShaTimestamp ?? "",
1154+
jobs: workflows,
1155+
merge_base: "",
1156+
merge_base_date: "",
1157+
owner: owner,
1158+
repo: repo,
1159+
title: prTitle,
1160+
body: prBody,
1161+
shas: prShas,
1162+
};
1163+
})
1164+
.value()
1165+
);
11721166

11731167
// clean up the workflows - remove retries, remove workflows that have jobs,
11741168
// remove cancelled jobs with weird names
1175-
for (const [, prInfo] of workflowsByPR) {
1169+
for (const prInfo of workflowsByPR) {
11761170
const [workflows, jobs] = _.partition(
11771171
prInfo.jobs,
11781172
(job) => job.workflowId === 0
@@ -1220,5 +1214,8 @@ export async function reorganizeWorkflows(
12201214
// Remove cancelled jobs with weird names
12211215
prInfo.jobs = removeCancelledJobAfterRetry<RecentWorkflowsData>(allJobs);
12221216
}
1223-
return workflowsByPR;
1217+
return workflowsByPR.reduce((acc, prInfo) => {
1218+
acc.set(prInfo.pr_number, prInfo);
1219+
return acc;
1220+
}, new Map<number, PRandJobs>());
12241221
}

0 commit comments

Comments
 (0)