Skip to content

Commit 131037e

Browse files
committed
Add migration to backfill user.pubid column
1 parent 766f4d5 commit 131037e

1 file changed

Lines changed: 76 additions & 0 deletions

File tree

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
"""Backfill user.pubid with unique values."""
2+
3+
import logging
4+
import random
5+
6+
import sqlalchemy as sa
7+
from alembic import op
8+
from sqlalchemy.orm import declarative_base, sessionmaker
9+
10+
revision = "f32200e2e496"
11+
down_revision = "3bae642f2b36"
12+
13+
logger = logging.getLogger(__name__)
14+
15+
USER_PUBID_LENGTH = 12
16+
USER_BATCH_LIMIT = 1000
17+
USER_PUBID_RETRIES = 5
18+
19+
20+
Base = declarative_base()
21+
22+
23+
class User(Base):
24+
__tablename__ = "user"
25+
id = sa.Column(sa.Integer, primary_key=True)
26+
pubid = sa.Column(sa.String())
27+
28+
29+
def generate(length):
30+
"""
31+
Generate a random string of the specified length.
32+
33+
This is the generate() function from h/pubid.py.
34+
"""
35+
alphabet = "123456789ABDEGJKLMNPQRVWXYZabdegijkmnopqrvwxyz"
36+
return "".join(random.SystemRandom().choice(alphabet) for _ in range(length))
37+
38+
39+
def get_unique_pubids(count, length):
40+
return {generate(length) for _ in range(count)}
41+
42+
43+
def set_user_pubids(users, pubids):
44+
for user, pubid in zip(users, pubids):
45+
user.pubid = pubid
46+
47+
48+
def upgrade():
49+
session = sessionmaker()(bind=op.get_bind())
50+
51+
users_query = (
52+
session.query(User).filter(User.pubid.is_(None)).limit(USER_BATCH_LIMIT)
53+
)
54+
55+
count = 0
56+
while users := users_query.all():
57+
batch_count = len(users)
58+
for retries in range(1, USER_PUBID_RETRIES + 1):
59+
pubids = get_unique_pubids(batch_count, USER_PUBID_LENGTH)
60+
if len(pubids) == batch_count:
61+
break
62+
logger.warning("Failed to generate %d unique pubids, retrying %d/%d", batch_count, retries, USER_PUBID_RETRIES)
63+
else:
64+
raise RuntimeError(f"Failed to generate {batch_count} unique pubids")
65+
66+
set_user_pubids(users, pubids)
67+
session.commit()
68+
count += batch_count
69+
logger.info("Back-filled %d user.pubid's", count)
70+
71+
72+
def downgrade():
73+
session = sessionmaker()(bind=op.get_bind())
74+
75+
session.query(User).update({User.pubid: None})
76+
session.commit()

0 commit comments

Comments
 (0)