-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathschema.sql
More file actions
145 lines (134 loc) · 5.53 KB
/
schema.sql
File metadata and controls
145 lines (134 loc) · 5.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
-- PostgreSQL Schema for GitHub Data Pipeline
-- Migrated from Redis caching to PostgreSQL
-- Required for trigram (ILIKE) search indexes on the leaderboard
CREATE EXTENSION IF NOT EXISTS pg_trgm;
-- Users table - caches GitHub user profile data
CREATE TABLE IF NOT EXISTS users (
username TEXT PRIMARY KEY,
avatar_url TEXT,
bio TEXT,
followers INTEGER NOT NULL DEFAULT 0,
following INTEGER NOT NULL DEFAULT 0,
public_repos INTEGER NOT NULL DEFAULT 0,
score REAL,
name TEXT,
company TEXT,
blog TEXT,
location TEXT,
email TEXT,
twitter_username TEXT,
linkedin TEXT,
hireable BOOLEAN,
website_url TEXT,
created_at TIMESTAMP,
updated_at TIMESTAMP,
last_fetched TIMESTAMP NOT NULL DEFAULT NOW(),
raw_json JSONB
);
CREATE INDEX IF NOT EXISTS idx_users_last_fetched ON users(last_fetched);
CREATE INDEX IF NOT EXISTS idx_users_score ON users(score);
CREATE INDEX IF NOT EXISTS idx_users_followers ON users(followers);
-- Repositories table - caches GitHub repository data for users
CREATE TABLE IF NOT EXISTS repos (
id TEXT PRIMARY KEY,
username TEXT NOT NULL,
repo_name TEXT NOT NULL,
full_name TEXT NOT NULL,
stars INTEGER NOT NULL DEFAULT 0,
forks INTEGER NOT NULL DEFAULT 0,
language TEXT,
description TEXT,
url TEXT,
pushed_at TIMESTAMP,
is_fork BOOLEAN NOT NULL DEFAULT FALSE,
topics JSONB,
languages JSONB,
merged_pr_count INTEGER NOT NULL DEFAULT 0,
merged_prs_by_user_count INTEGER NOT NULL DEFAULT 0
);
CREATE INDEX IF NOT EXISTS idx_repos_username ON repos(username);
CREATE INDEX IF NOT EXISTS idx_repos_stars ON repos(stars);
CREATE INDEX IF NOT EXISTS idx_repos_full_name ON repos(full_name);
-- API cache table - caches GitHub API responses with parsed key components.
-- cache_key format: "github:graphql:<username>" -> type=github, subtype=graphql, ref=username
CREATE TABLE IF NOT EXISTS api_cache (
id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
cache_type TEXT NOT NULL, -- e.g. "github"
cache_subtype TEXT NOT NULL, -- e.g. "graphql", "rest"
cache_ref TEXT NOT NULL, -- e.g. username or repo name
cache_key TEXT NOT NULL UNIQUE, -- original full key
response JSONB NOT NULL,
cached_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
expires_at TIMESTAMPTZ NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_api_cache_cache_ref ON api_cache(cache_ref);
CREATE INDEX IF NOT EXISTS idx_api_cache_expires_at ON api_cache(expires_at);
CREATE INDEX IF NOT EXISTS idx_api_cache_type_ref ON api_cache(cache_type, cache_ref);
-- Token rate limit table - tracks GitHub token rate limits
CREATE TABLE IF NOT EXISTS token_rate_limit (
token_index INTEGER PRIMARY KEY,
remaining INTEGER NOT NULL DEFAULT 5000,
reset_time INTEGER NOT NULL DEFAULT 0,
last_updated TIMESTAMP NOT NULL DEFAULT NOW()
);
-- Analyses table - existing table
CREATE TABLE IF NOT EXISTS analyses (
id TEXT PRIMARY KEY,
username TEXT NOT NULL,
total_score REAL NOT NULL DEFAULT 0,
ai_score REAL NOT NULL DEFAULT 0,
backend_score REAL NOT NULL DEFAULT 0,
frontend_score REAL NOT NULL DEFAULT 0,
devops_score REAL NOT NULL DEFAULT 0,
data_score REAL NOT NULL DEFAULT 0,
unique_skills_json JSONB,
linkedin TEXT,
top_repos_json JSONB,
languages_json JSONB,
contribution_count INTEGER NOT NULL DEFAULT 0,
cached_at TIMESTAMP NOT NULL DEFAULT NOW()
);
-- Leaderboard table - consolidated, single-table profile + scores for fast queries.
-- Combines github_users (profile) + analyses (scores) + user_scores (efficiency) + OTW fields.
CREATE TABLE IF NOT EXISTS leaderboard (
username TEXT PRIMARY KEY,
name TEXT,
avatar_url TEXT,
bio TEXT,
location TEXT,
company TEXT,
blog TEXT,
url TEXT,
email TEXT,
twitter_username TEXT,
linkedin TEXT,
hireable BOOLEAN NOT NULL DEFAULT FALSE,
followers INTEGER,
following INTEGER,
public_repos INTEGER,
total_score REAL NOT NULL DEFAULT 0,
ai_score REAL NOT NULL DEFAULT 0,
backend_score REAL NOT NULL DEFAULT 0,
frontend_score REAL NOT NULL DEFAULT 0,
devops_score REAL NOT NULL DEFAULT 0,
data_score REAL NOT NULL DEFAULT 0,
contributor_efficiency REAL,
is_open_to_work BOOLEAN,
otw_error_code TEXT,
otw_permanent_failure BOOLEAN DEFAULT FALSE,
otw_scraped_at TIMESTAMP,
unique_skills TEXT[],
created_at TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
);
-- Partial btree indexes for score-sorted listings (only rows with a positive score)
CREATE INDEX IF NOT EXISTS idx_leaderboard_total_score ON leaderboard(total_score DESC) WHERE total_score > 0;
CREATE INDEX IF NOT EXISTS idx_leaderboard_ai_score ON leaderboard(ai_score DESC) WHERE ai_score > 0;
CREATE INDEX IF NOT EXISTS idx_leaderboard_backend_score ON leaderboard(backend_score DESC) WHERE backend_score > 0;
CREATE INDEX IF NOT EXISTS idx_leaderboard_frontend_score ON leaderboard(frontend_score DESC) WHERE frontend_score > 0;
CREATE INDEX IF NOT EXISTS idx_leaderboard_is_open_to_work ON leaderboard(is_open_to_work) WHERE is_open_to_work = TRUE;
-- Trigram indexes for fuzzy name/username search
CREATE INDEX IF NOT EXISTS idx_leaderboard_name_trgm ON leaderboard USING GIN (name gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_leaderboard_username_trgm ON leaderboard USING GIN (username gin_trgm_ops);
-- GIN index for skill-tag filtering
CREATE INDEX IF NOT EXISTS idx_leaderboard_unique_skills_gin ON leaderboard USING GIN (unique_skills) WHERE unique_skills IS NOT NULL;