Skip to content

Commit 6471c93

Browse files
Update project_stats metric to use HLL
Signed-off-by: Łukasz Gryglicki <[email protected]>
1 parent baf0306 commit 6471c93

File tree

2 files changed

+544
-27
lines changed

2 files changed

+544
-27
lines changed

metrics/shared/project_stats.sql

+27-27
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,15 @@ with commits_data as (
3838
select
3939
'pstat,All' as repo_group,
4040
'Projects/Repository groups' as name,
41-
count(distinct repo_group) as value
41+
round(hll_cardinality(hll_add_agg(hll_hash_text(repo_group)))) as value
4242
from
4343
gha_repos
4444
where
4545
repo_group is not null
4646
union select
4747
'pstat,All' as repo_group,
4848
'Countries' as name,
49-
count(distinct sub.country_id) as value
49+
round(hll_cardinality(hll_add_agg(hll_hash_text(sub.country_id)))) as value
5050
from (
5151
select
5252
a.country_id
@@ -83,7 +83,7 @@ from (
8383
union select
8484
'pstat,' || sub.repo_group as repo_group,
8585
'Countries' as name,
86-
count(distinct sub.country_id) as value
86+
round(hll_cardinality(hll_add_agg(hll_hash_text(sub.country_id)))) as value
8787
from (
8888
select
8989
a.country_id,
@@ -136,7 +136,7 @@ group by
136136
union select
137137
sub.repo_group,
138138
'Contributors' as name,
139-
count(distinct sub.actor) as value
139+
round(hll_cardinality(hll_add_agg(hll_hash_text(sub.actor)))) as value
140140
from (
141141
select 'pstat,' || r.repo_group as repo_group,
142142
e.dup_actor_login as actor
@@ -159,7 +159,7 @@ group by
159159
sub.repo_group
160160
union select 'pstat,All' as repo_group,
161161
'Contributors' as name,
162-
count(distinct dup_actor_login) as value
162+
round(hll_cardinality(hll_add_agg(hll_hash_text(dup_actor_login)))) as value
163163
from
164164
gha_events
165165
where
@@ -172,7 +172,7 @@ where
172172
union select
173173
sub.repo_group,
174174
'Contributions' as name,
175-
count(distinct sub.id) as value
175+
round(hll_cardinality(hll_add_agg(hll_hash_bigint(sub.id)))) as value
176176
from (
177177
select 'pstat,' || r.repo_group as repo_group,
178178
e.id
@@ -195,7 +195,7 @@ group by
195195
sub.repo_group
196196
union select 'pstat,All' as repo_group,
197197
'Contributions' as name,
198-
count(distinct id) as value
198+
round(hll_cardinality(hll_add_agg(hll_hash_bigint(id)))) as value
199199
from
200200
gha_events
201201
where
@@ -208,7 +208,7 @@ where
208208
union select
209209
sub.repo_group,
210210
'Pushes' as name,
211-
count(distinct sub.id) as value
211+
round(hll_cardinality(hll_add_agg(hll_hash_bigint(sub.id)))) as value
212212
from (
213213
select 'pstat,' || r.repo_group as repo_group,
214214
e.id
@@ -228,7 +228,7 @@ group by
228228
sub.repo_group
229229
union select 'pstat,All' as repo_group,
230230
'Pushes' as name,
231-
count(distinct id) as value
231+
round(hll_cardinality(hll_add_agg(hll_hash_bigint(id)))) as value
232232
from
233233
gha_events
234234
where
@@ -237,7 +237,7 @@ where
237237
and type = 'PushEvent'
238238
union select 'pstat,' || c.repo_group as repo_group,
239239
'Commits' as name,
240-
count(distinct c.sha) as value
240+
round(hll_cardinality(hll_add_agg(hll_hash_text(c.sha)))) as value
241241
from
242242
commits_data c
243243
where
@@ -246,12 +246,12 @@ group by
246246
c.repo_group
247247
union select 'pstat,All' as repo_group,
248248
'Commits' as name,
249-
count(distinct c.sha) as value
249+
round(hll_cardinality(hll_add_agg(hll_hash_text(c.sha)))) as value
250250
from
251251
commits_data c
252252
union select 'pstat,' || c.repo_group as repo_group,
253253
'Code committers' as name,
254-
count(distinct c.actor_id) as value
254+
round(hll_cardinality(hll_add_agg(hll_hash_bigint(c.actor_id)))) as value
255255
from
256256
commits_data c
257257
where
@@ -260,7 +260,7 @@ group by
260260
c.repo_group
261261
union select 'pstat,All' as repo_group,
262262
'Code committers' as name,
263-
count(distinct c.actor_id) as value
263+
round(hll_cardinality(hll_add_agg(hll_hash_bigint(c.actor_id)))) as value
264264
from
265265
commits_data c
266266
union select sub.repo_group,
@@ -275,7 +275,7 @@ union select sub.repo_group,
275275
when 'WatchEvent' then 'Stargazers/Watchers'
276276
when 'ForkEvent' then 'Forkers'
277277
end as name,
278-
count(distinct sub.actor_id) as value
278+
round(hll_cardinality(hll_add_agg(hll_hash_bigint(sub.actor_id)))) as value
279279
from (
280280
select 'pstat,' || r.repo_group as repo_group,
281281
e.type,
@@ -311,7 +311,7 @@ union select 'pstat,All' as repo_group,
311311
when 'WatchEvent' then 'Stargazers/Watchers'
312312
when 'ForkEvent' then 'Forkers'
313313
end as name,
314-
count(distinct actor_id) as value
314+
round(hll_cardinality(hll_add_agg(hll_hash_bigint(actor_id)))) as value
315315
from
316316
gha_events
317317
where
@@ -326,7 +326,7 @@ group by
326326
type
327327
union select 'pstat,' || r.repo_group as repo_group,
328328
'Repositories' as name,
329-
count(distinct e.repo_id) as value
329+
round(hll_cardinality(hll_add_agg(hll_hash_bigint(e.repo_id)))) as value
330330
from
331331
gha_events e,
332332
gha_repos r
@@ -339,14 +339,14 @@ group by
339339
r.repo_group
340340
union select 'pstat,All' as repo_group,
341341
'Repositories' as name,
342-
count(distinct repo_id) as value
342+
round(hll_cardinality(hll_add_agg(hll_hash_bigint(repo_id)))) as value
343343
from
344344
gha_events
345345
where
346346
{{period:created_at}}
347347
union select sub.repo_group,
348348
'Comments' as name,
349-
count(distinct sub.id) as value
349+
round(hll_cardinality(hll_add_agg(hll_hash_bigint(sub.id)))) as value
350350
from (
351351
select 'pstat,' || r.repo_group as repo_group,
352352
c.id
@@ -365,15 +365,15 @@ group by
365365
sub.repo_group
366366
union select 'pstat,All' as repo_group,
367367
'Comments' as name,
368-
count(distinct id) as value
368+
round(hll_cardinality(hll_add_agg(hll_hash_bigint(id)))) as value
369369
from
370370
gha_comments
371371
where
372372
{{period:created_at}}
373373
and (lower(dup_user_login) {{exclude_bots}})
374374
union select sub.repo_group,
375375
'Commenters' as name,
376-
count(distinct sub.user_id) as value
376+
round(hll_cardinality(hll_add_agg(hll_hash_bigint(sub.user_id)))) as value
377377
from (
378378
select 'pstat,' || r.repo_group as repo_group,
379379
c.user_id
@@ -392,15 +392,15 @@ group by
392392
sub.repo_group
393393
union select 'pstat,All' as repo_group,
394394
'Commenters' as name,
395-
count(distinct user_id) as value
395+
round(hll_cardinality(hll_add_agg(hll_hash_bigint(user_id)))) as value
396396
from
397397
gha_comments
398398
where
399399
{{period:created_at}}
400400
and (lower(dup_user_login) {{exclude_bots}})
401401
union select sub.repo_group,
402402
'PR reviews' as name,
403-
count(distinct sub.id) as value
403+
round(hll_cardinality(hll_add_agg(hll_hash_bigint(sub.id)))) as value
404404
from (
405405
select 'pstat,' || r.repo_group as repo_group,
406406
c.id
@@ -419,15 +419,15 @@ group by
419419
sub.repo_group
420420
union select 'pstat,All' as repo_group,
421421
'PR reviews' as name,
422-
count(distinct id) as value
422+
round(hll_cardinality(hll_add_agg(hll_hash_bigint(id)))) as value
423423
from
424424
gha_reviews
425425
where
426426
{{period:submitted_at}}
427427
and (lower(dup_user_login) {{exclude_bots}})
428428
union select sub.repo_group,
429429
'Issues' as name,
430-
count(distinct sub.id) as value
430+
round(hll_cardinality(hll_add_agg(hll_hash_bigint(sub.id)))) as value
431431
from (
432432
select 'pstat,' || r.repo_group as repo_group,
433433
i.id
@@ -447,7 +447,7 @@ group by
447447
sub.repo_group
448448
union select 'pstat,All' as repo_group,
449449
'Issues' as name,
450-
count(distinct id) as value
450+
round(hll_cardinality(hll_add_agg(hll_hash_bigint(id)))) as value
451451
from
452452
gha_issues
453453
where
@@ -456,7 +456,7 @@ where
456456
and (lower(dup_user_login) {{exclude_bots}})
457457
union select sub.repo_group,
458458
'PRs' as name,
459-
count(distinct sub.id) as value
459+
round(hll_cardinality(hll_add_agg(hll_hash_bigint(sub.id)))) as value
460460
from (
461461
select 'pstat,' || r.repo_group as repo_group,
462462
i.id
@@ -476,7 +476,7 @@ group by
476476
sub.repo_group
477477
union select 'pstat,All' as repo_group,
478478
'PRs' as name,
479-
count(distinct id) as value
479+
round(hll_cardinality(hll_add_agg(hll_hash_bigint(id)))) as value
480480
from
481481
gha_issues
482482
where

0 commit comments

Comments
 (0)