Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HUD] Adds dashboard in Metrics page to track ephemeral experimentation % over time #6420

Merged
merged 4 commits into from
Mar 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"params": {
"days_ago": "Int64",
"experiment_name": "String"
},
"tests": []
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
WITH
normalized_jobs AS (
SELECT
l AS label,
extract(j.name, '[^,]*') AS job_name, -- Remove shard number and label from job names
j.workflow_name,
toStartOfInterval(j.started_at, INTERVAL 1 HOUR) AS bucket
FROM
-- Deliberatly not adding FINAL to this workflow_job.
-- Risks of not using it:
-- - You may get duplicate records for rows that were updated corresponding to their
-- before/after states, but as long as there’s some mechanism in the query to account
-- for that it’s okay (we check for j.status = 'completed`).
-- - In the worst case scenario, you may only see the ‘old’ version of the records for some rows
-- Costs of using it:
-- - Query procesing time increases from ~5 -> 16 seconds
-- - Memory usage grows from 7.5 GB -> 32 GB
-- So the tradeoff is worth it for this query.
workflow_job AS j
ARRAY JOIN j.labels as l
WHERE
j.created_at > now() - INTERVAL {days_ago: Int64} DAY
AND j.status = 'completed'
AND l != 'self-hosted'
AND l NOT LIKE 'lf.c.%'
AND l NOT LIKE '%.canary'
AND l NOT LIKE 'c.%'
),
experiment_jobs AS (
SELECT
DISTINCT j.job_name
FROM
normalized_jobs AS j
WHERE
j.label LIKE concat('%.', {experiment_name: String}, '.%')
),
comparable_jobs AS (
SELECT
j.bucket,
j.label,
j.job_name,
-- Remove shard number and label from job names
j.workflow_name
FROM
normalized_jobs AS j
INNER JOIN
experiment_jobs AS lfj ON j.job_name = lfj.job_name
),
success_stats AS (
SELECT
bucket,
count(*) AS group_size,
job_name,
workflow_name,
label,
if(like(label, concat('%.', {experiment_name: String}, '.%')), True, False) AS is_ephemeral_exp
FROM
comparable_jobs
GROUP BY
bucket, job_name, workflow_name, label
),
comparison_stats AS (
SELECT
experiment.bucket,
SUM(experiment.group_size + m.group_size) AS total_jobs,
SUM(m.group_size) AS compliment_jobs,
SUM(experiment.group_size) AS counted_jobs,
m.is_ephemeral_exp AS c_fleet,
experiment.is_ephemeral_exp AS m_fleet,
CAST(SUM(experiment.group_size) AS Float32) / SUM(experiment.group_size + m.group_size) * 100 AS percentage,
IF(experiment.is_ephemeral_exp, 'On experiment', 'Not on experiment') AS fleet
FROM
success_stats AS experiment
INNER JOIN
success_stats AS m ON experiment.bucket = m.bucket
WHERE
experiment.job_name = m.job_name
AND experiment.workflow_name = m.workflow_name
AND experiment.is_ephemeral_exp = 1 AND m.is_ephemeral_exp = 0
AND experiment.group_size > 3
AND m.group_size > 3
GROUP BY
experiment.bucket, experiment.is_ephemeral_exp, m.is_ephemeral_exp
)
SELECT * FROM comparison_stats
ORDER BY bucket DESC, fleet
65 changes: 65 additions & 0 deletions torchci/pages/metrics.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,37 @@ export function TtsPercentilePicker({
);
}

/**
* Allows the user to pick the experiment metrics.
*/
export function ExperimentPicker({
experimentName,
setExperimentName,
}: {
experimentName: string;
setExperimentName: any;
}) {
function handleChange(e: SelectChangeEvent<string>) {
setExperimentName(e.target.value as string);
}

return (
<>
<FormControl>
<InputLabel id="experiment-picker-select-label">Experiment</InputLabel>
<Select
defaultValue={experimentName}
label="Experiment Name"
labelId="experiment-picker-select-label"
onChange={handleChange}
>
<MenuItem value={"ephemeral"}>ephemeral</MenuItem>
</Select>
</FormControl>
</>
);
}

function WorkflowDuration({
percentile,
timeParams,
Expand Down Expand Up @@ -425,6 +456,7 @@ export default function Page() {
};

const [ttsPercentile, setTtsPercentile] = useState<number>(0.5);
const [experimentName, setExperimentName] = useState<string>("ephemeral");

// Split the aggregated red % into broken trunk and flaky red %
const queryName = "master_commit_red_avg";
Expand Down Expand Up @@ -918,6 +950,39 @@ export default function Page() {
yAxisRenderer={(value) => value.toFixed(2).toString() + "%"}
/>
</Grid2>

<Grid2 size={{ xs: 12 }}>
<Stack direction="row" spacing={2} sx={{ mb: 2 }}>
<Typography variant="h3" gutterBottom>
Percentage of jobs running on experiment
</Typography>
<ExperimentPicker
experimentName={experimentName}
setExperimentName={setExperimentName}
/>
</Stack>
<p>
This pannel shows the % of jobs that are running the selected
experiment in the dropbox.
</p>
</Grid2>

<Grid2 size={{ xs: 12 }} height={ROW_HEIGHT}>
<TimeSeriesPanel
title={"Percentage of jobs running on experiment"}
queryName={"experiment_rollover_percentage"}
queryParams={{
...timeParams,
days_ago: timeRange,
experiment_name: experimentName,
}}
granularity={"hour"}
timeFieldName={"bucket"}
yAxisFieldName={"percentage"}
groupByFieldName={"fleet"}
yAxisRenderer={(value) => value.toFixed(2).toString() + "%"}
/>
</Grid2>
</Grid2>
</div>
);
Expand Down