@@ -196,13 +196,8 @@ def insert_facade_contributors(self, repo_git):
196196 repo_id = repo .repo_id
197197 facade_helper = FacadeHelper (logger )
198198
199- # Find all commits not yet linked to a contributor. The correct signal for
200- # "needs resolution" is a NULL cmt_ght_author_id — not a date window or an
201- # email-table cross-check. The old email-join approach silently skipped
202- # commits whose emails were later linked to a GitHub account, and the
203- # last_collection_date cutoff (PR #3253) made that permanent. Commits
204- # already marked unresolvable are excluded via the unresolved_commit_emails
205- # table so we don't hammer the GitHub API on known dead-ends.
199+ # Find commits not yet linked to a contributor (cmt_ght_author_id IS NULL),
200+ # skipping emails already marked unresolvable.
206201
207202 logger .info (
208203 "Beginning process to insert contributors from facade commits for repo w entry info: {}\n " .format (repo_id ))
@@ -255,12 +250,7 @@ def insert_facade_contributors(self, repo_git):
255250
256251 logger .debug ("DEBUG: Got through the new_contribs" )
257252
258- # Build the full email→contributor mapping by unioning all three email
259- # columns (cntrb_email, cntrb_canonical, alias_email) then deduplicating.
260- # Only target commits that are still unlinked (cmt_ght_author_id IS NULL)
261- # so we never re-process already-resolved records. Removing the
262- # last_collection_date guard means historical commits that slipped through
263- # on first pass are finally eligible for resolution.
253+ # Match unlinked commits to contributors via email, canonical email, or alias.
264254 resolve_email_to_cntrb_id_sql = s .sql .text ("""
265255 WITH email_to_contributor AS (
266256 SELECT cntrb_email AS email, cntrb_id
@@ -284,7 +274,7 @@ def insert_facade_contributors(self, repo_git):
284274 FROM email_to_contributor
285275 ORDER BY email
286276 )
287- SELECT DISTINCT
277+ SELECT
288278 d.cntrb_id,
289279 d.email
290280 FROM
0 commit comments