Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions 8Knot/cache_manager/db_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,6 @@ def _create_application_tables() -> None:
repo_id int,
login text,
action text,
rank int,
cntrb_company text,
email_list text
)
Expand All @@ -220,8 +219,7 @@ def _create_application_tables() -> None:
cntrb_id text,
created_at text,
login text,
action text,
rank int
action text
)
"""
)
Expand Down
2 changes: 1 addition & 1 deletion 8Knot/pages/codebase/visualizations/cntrb_file_heatmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ def cntrb_to_last_activity(df_actions: pd.DataFrame, df_dynamic_directory: pd.Da
# drop unneccessary columns not needed after preprocessing steps
df_actions = df_actions.reset_index()
df_actions.drop(
["index", "repo_id", "repo_name", "login", "Action", "rank"],
["index", "repo_id", "repo_name", "login", "Action"],
axis=1,
inplace=True,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ def cntrb_to_last_activity(df_actions: pd.DataFrame, df_dynamic_directory: pd.Da
# drop unneccessary columns not needed after preprocessing steps
df_actions = df_actions.reset_index()
df_actions.drop(
["index", "repo_id", "repo_name", "login", "Action", "rank"],
["index", "repo_id", "repo_name", "login", "Action"],
axis=1,
inplace=True,
)
Expand Down
3 changes: 1 addition & 2 deletions 8Knot/queries/affiliation_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def affiliation_query(self, repos):
c.repo_id,
c.login,
c.action,
c.rank,
con.cntrb_company,
string_agg(ca.alias_email, ' , ' order by ca.alias_email) as email_list
FROM
Expand All @@ -53,7 +52,7 @@ def affiliation_query(self, repos):
c.repo_id in %s
and timezone('utc', c.created_at) < now() -- created_at is a timestamptz value
-- don't need to check non-null for created_at because it's non-null by definition.
GROUP BY c.cntrb_id, c.created_at, c.repo_id, c.login, c.action, c.rank, con.cntrb_company
GROUP BY c.cntrb_id, c.created_at, c.repo_id, c.login, c.action, con.cntrb_company
ORDER BY
c.created_at
"""
Expand Down
3 changes: 1 addition & 2 deletions 8Knot/queries/contributors_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ def contributors_query(self, repos):
left(ca.cntrb_id::text, 15) as cntrb_id, -- first 15 characters of the uuid
timezone('utc', ca.created_at) AS created_at,
ca.login,
ca.action,
ca.rank
ca.action
FROM
explorer_contributor_actions ca
WHERE
Expand Down
232 changes: 138 additions & 94 deletions docs/materialized_views/explorer_contributor_actions.sql
Original file line number Diff line number Diff line change
@@ -1,99 +1,143 @@
/* This is the SQL query that populates the explorer_contributor_actions materialized view*/

SELECT a.id AS cntrb_id,
WITH commits_deduped AS (
SELECT DISTINCT ON (c.cmt_commit_hash)
c.cmt_ght_author_id AS cntrb_id,
c.cmt_author_timestamp AS created_at,
c.repo_id,
'commit'::text AS action,
r.repo_name
FROM augur_data.commits c
JOIN augur_data.repo r ON r.repo_id = c.repo_id
ORDER BY c.cmt_commit_hash, c.cmt_author_timestamp DESC
)
SELECT
a.cntrb_id,
a.created_at,
a.repo_id,
a.action,
repo.repo_name,
a.login,
row_number() OVER (PARTITION BY a.id, a.repo_id ORDER BY a.created_at DESC) AS rank
FROM ( SELECT commits.cmt_ght_author_id AS id,
commits.cmt_author_timestamp AS created_at,
commits.repo_id,
'commit'::text AS action,
contributors.cntrb_login AS login
FROM (augur_data.commits
LEFT JOIN augur_data.contributors ON (((contributors.cntrb_id)::text = (commits.cmt_ght_author_id)::text)))
GROUP BY commits.cmt_commit_hash, commits.cmt_ght_author_id, commits.repo_id, commits.cmt_author_timestamp, 'commit'::text, contributors.cntrb_login
UNION ALL
SELECT issues.reporter_id AS id,
issues.created_at,
issues.repo_id,
'issue_opened'::text AS action,
contributors.cntrb_login AS login
FROM (augur_data.issues
LEFT JOIN augur_data.contributors ON ((contributors.cntrb_id = issues.reporter_id)))
WHERE (issues.pull_request IS NULL)
UNION ALL
SELECT pull_request_events.cntrb_id AS id,
pull_request_events.created_at,
pull_requests.repo_id,
'pull_request_closed'::text AS action,
contributors.cntrb_login AS login
FROM augur_data.pull_requests,
(augur_data.pull_request_events
LEFT JOIN augur_data.contributors ON ((contributors.cntrb_id = pull_request_events.cntrb_id)))
WHERE ((pull_requests.pull_request_id = pull_request_events.pull_request_id) AND (pull_requests.pr_merged_at IS NULL) AND ((pull_request_events.action)::text = 'closed'::text))
UNION ALL
SELECT pull_request_events.cntrb_id AS id,
pull_request_events.created_at,
pull_requests.repo_id,
'pull_request_merged'::text AS action,
contributors.cntrb_login AS login
FROM augur_data.pull_requests,
(augur_data.pull_request_events
LEFT JOIN augur_data.contributors ON ((contributors.cntrb_id = pull_request_events.cntrb_id)))
WHERE ((pull_requests.pull_request_id = pull_request_events.pull_request_id) AND ((pull_request_events.action)::text = 'merged'::text))
UNION ALL
SELECT issue_events.cntrb_id AS id,
issue_events.created_at,
issues.repo_id,
'issue_closed'::text AS action,
contributors.cntrb_login AS login
FROM augur_data.issues,
(augur_data.issue_events
LEFT JOIN augur_data.contributors ON ((contributors.cntrb_id = issue_events.cntrb_id)))
WHERE ((issues.issue_id = issue_events.issue_id) AND (issues.pull_request IS NULL) AND ((issue_events.action)::text = 'closed'::text))
UNION ALL
SELECT pull_request_reviews.cntrb_id AS id,
pull_request_reviews.pr_review_submitted_at AS created_at,
pull_requests.repo_id,
('pull_request_review_'::text || (pull_request_reviews.pr_review_state)::text) AS action,
contributors.cntrb_login AS login
FROM augur_data.pull_requests,
(augur_data.pull_request_reviews
LEFT JOIN augur_data.contributors ON ((contributors.cntrb_id = pull_request_reviews.cntrb_id)))
WHERE (pull_requests.pull_request_id = pull_request_reviews.pull_request_id)
UNION ALL
SELECT pull_requests.pr_augur_contributor_id AS id,
pull_requests.pr_created_at AS created_at,
pull_requests.repo_id,
'pull_request_open'::text AS action,
contributors.cntrb_login AS login
FROM (augur_data.pull_requests
LEFT JOIN augur_data.contributors ON ((pull_requests.pr_augur_contributor_id = contributors.cntrb_id)))
UNION ALL
SELECT message.cntrb_id AS id,
message.msg_timestamp AS created_at,
pull_requests.repo_id,
'pull_request_comment'::text AS action,
contributors.cntrb_login AS login
FROM augur_data.pull_requests,
augur_data.pull_request_message_ref,
(augur_data.message
LEFT JOIN augur_data.contributors ON ((contributors.cntrb_id = message.cntrb_id)))
WHERE ((pull_request_message_ref.pull_request_id = pull_requests.pull_request_id) AND (pull_request_message_ref.msg_id = message.msg_id))
UNION ALL
SELECT message.cntrb_id AS id,
message.msg_timestamp AS created_at,
issues.repo_id,
'issue_comment'::text AS action,
contributors.cntrb_login AS login
FROM augur_data.issues,
augur_data.issue_message_ref,
(augur_data.message
LEFT JOIN augur_data.contributors ON ((contributors.cntrb_id = message.cntrb_id)))
WHERE ((issue_message_ref.msg_id = message.msg_id) AND (issues.issue_id = issue_message_ref.issue_id) AND (issues.closed_at <> message.msg_timestamp))) a,
augur_data.repo
WHERE (a.repo_id = repo.repo_id)
ORDER BY a.created_at DESC
a.repo_name,
co.cntrb_login AS login
FROM (
SELECT * FROM commits_deduped

UNION ALL

-- issues opened
SELECT
i.reporter_id AS cntrb_id,
i.created_at,
i.repo_id,
'issue_opened'::text AS action,
r.repo_name
FROM augur_data.issues i
JOIN augur_data.repo r ON r.repo_id = i.repo_id
WHERE i.pull_request IS NULL

UNION ALL

-- pull requests closed (not merged)
SELECT
pre.cntrb_id,
pre.created_at,
pr.repo_id,
'pull_request_closed'::text AS action,
r.repo_name
FROM augur_data.pull_request_events pre
JOIN augur_data.pull_requests pr
ON pr.pull_request_id = pre.pull_request_id
AND pr.pr_merged_at IS NULL
JOIN augur_data.repo r ON r.repo_id = pr.repo_id
WHERE pre.action = 'closed'

UNION ALL

-- pull requests merged
SELECT
pre.cntrb_id,
pre.created_at,
pr.repo_id,
'pull_request_merged'::text AS action,
r.repo_name
FROM augur_data.pull_request_events pre
JOIN augur_data.pull_requests pr
ON pr.pull_request_id = pre.pull_request_id
JOIN augur_data.repo r ON r.repo_id = pr.repo_id
WHERE pre.action = 'merged'

UNION ALL

-- issues closed
SELECT
ie.cntrb_id,
ie.created_at,
i.repo_id,
'issue_closed'::text AS action,
r.repo_name
FROM augur_data.issue_events ie
JOIN augur_data.issues i
ON i.issue_id = ie.issue_id
AND i.pull_request IS NULL
JOIN augur_data.repo r ON r.repo_id = i.repo_id
WHERE ie.action = 'closed'

UNION ALL

-- pull request reviews
SELECT
prr.cntrb_id,
prr.pr_review_submitted_at AS created_at,
pr.repo_id,
('pull_request_review_' || prr.pr_review_state::text) AS action,
r.repo_name
FROM augur_data.pull_request_reviews prr
JOIN augur_data.pull_requests pr
ON pr.pull_request_id = prr.pull_request_id
JOIN augur_data.repo r ON r.repo_id = pr.repo_id

UNION ALL

-- pull requests opened
SELECT
pr.pr_augur_contributor_id AS cntrb_id,
pr.pr_created_at AS created_at,
pr.repo_id,
'pull_request_open'::text AS action,
r.repo_name
FROM augur_data.pull_requests pr
JOIN augur_data.repo r ON r.repo_id = pr.repo_id

UNION ALL

-- pull request comments
SELECT
m.cntrb_id,
m.msg_timestamp AS created_at,
pr.repo_id,
'pull_request_comment'::text AS action,
r.repo_name
FROM augur_data.pull_request_message_ref prmr
JOIN augur_data.pull_requests pr
ON pr.pull_request_id = prmr.pull_request_id
JOIN augur_data.repo r ON r.repo_id = pr.repo_id
JOIN augur_data.message m ON m.msg_id = prmr.msg_id

UNION ALL

-- issue comments
SELECT
m.cntrb_id,
m.msg_timestamp AS created_at,
i.repo_id,
'issue_comment'::text AS action,
r.repo_name
FROM augur_data.issue_message_ref imr
JOIN augur_data.message m ON m.msg_id = imr.msg_id
JOIN augur_data.issues i
ON i.issue_id = imr.issue_id
AND i.pull_request IS NULL
AND i.closed_at <> m.msg_timestamp
JOIN augur_data.repo r ON r.repo_id = i.repo_id
) a
LEFT JOIN augur_data.contributors co ON co.cntrb_id = a.cntrb_id
ORDER BY a.created_at DESC
Loading