diff --git a/clickhouse_db_schema/oss_ci_job_queue_time_historical/schema.sql b/clickhouse_db_schema/oss_ci_job_queue_time_historical/schema.sql new file mode 100644 index 0000000000..c74d0aef75 --- /dev/null +++ b/clickhouse_db_schema/oss_ci_job_queue_time_historical/schema.sql @@ -0,0 +1,24 @@ + -- This table is used to keep track of snapshots of in-queue jobs +CREATE TABLE misc.oss_ci_job_queue_time_historical( + `queue_s` UInt64, + `repo` String DEFAULT 'pytorch/pytorch', + `workflow_name` String, + `job_name` String, + `html_url` String, + `machine_type` String, + `time` DateTime64(9), + `runner_labels` Array(String), + -- The raw records on S3, this is populated by the s3 replicator + `_meta` Tuple(bucket String, key String) +) +ENGINE = SharedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}') +PARTITION BY toYYYYMM(time) +ORDER BY ( + repo, + time, + machine_type, + job_name, + workflow_name, +) +TTL toDate(time) + toIntervalYear(5) +SETTINGS index_granularity = 8192 diff --git a/clickhouse_db_schema/oss_ci_queue_time_histogram/schema.sql b/clickhouse_db_schema/oss_ci_queue_time_histogram/schema.sql index 5532197b0e..a310278d35 100644 --- a/clickhouse_db_schema/oss_ci_queue_time_histogram/schema.sql +++ b/clickhouse_db_schema/oss_ci_queue_time_histogram/schema.sql @@ -1,6 +1,7 @@ + -- This table is used to store queue time histogram CREATE TABLE misc.oss_ci_queue_time_histogram( - `created_at` DateTime64(0, 'UTC'), - `time_stamp` DateTime64(0, 'UTC'), + -- the type of histogram, currently we store two types of histogram: + -- 'in-queue-histogram','completed-queue-histogram' `type` String, `repo` String DEFAULT 'pytorch/pytorch', `workflow_name` String, @@ -9,19 +10,21 @@ CREATE TABLE misc.oss_ci_queue_time_histogram( `histogram_version` String, `histogram` Array(UInt64), `max_queue_time` UInt64, + `avg_queue_time` UInt64, `total_count` UInt64, + `time` DateTime64(9), + `runner_labels` Array(String), `extra_info` Map(String,String) ) ENGINE = SharedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}') -PARTITION BY toYYYYMM(created_at) +PARTITION BY toYYYYMM(time) ORDER BY ( - job_name, - workflow_name, + type, + repo, + time, machine_type, job_name, - time_stamp, - repo, - type, + workflow_name, ) -TTL toDate(time_stamp) + toIntervalYear(5) +TTL toDate(time) + toIntervalYear(5) SETTINGS index_granularity = 8192