|
| 1 | +"""Backfill user.pubid with unique values.""" |
| 2 | + |
| 3 | +import logging |
| 4 | +import random |
| 5 | + |
| 6 | +import sqlalchemy as sa |
| 7 | +from alembic import op |
| 8 | +from sqlalchemy.orm import declarative_base, sessionmaker |
| 9 | + |
| 10 | +revision = "f32200e2e496" |
| 11 | +down_revision = "3bae642f2b36" |
| 12 | + |
| 13 | +logger = logging.getLogger(__name__) |
| 14 | + |
| 15 | +USER_PUBID_LENGTH = 12 |
| 16 | +USER_BATCH_LIMIT = 1000 |
| 17 | +USER_PUBID_RETRIES = 5 |
| 18 | + |
| 19 | + |
| 20 | +Base = declarative_base() |
| 21 | + |
| 22 | + |
| 23 | +class User(Base): |
| 24 | + __tablename__ = "user" |
| 25 | + id = sa.Column(sa.Integer, primary_key=True) |
| 26 | + pubid = sa.Column(sa.String()) |
| 27 | + |
| 28 | + |
| 29 | +def generate(length): |
| 30 | + """ |
| 31 | + Generate a random string of the specified length. |
| 32 | +
|
| 33 | + This is the generate() function from h/pubid.py. |
| 34 | + """ |
| 35 | + alphabet = "123456789ABDEGJKLMNPQRVWXYZabdegijkmnopqrvwxyz" |
| 36 | + return "".join(random.SystemRandom().choice(alphabet) for _ in range(length)) |
| 37 | + |
| 38 | + |
| 39 | +def get_unique_pubids(count, length): |
| 40 | + return {generate(length) for _ in range(count)} |
| 41 | + |
| 42 | + |
| 43 | +def set_user_pubids(users, pubids): |
| 44 | + for user, pubid in zip(users, pubids): |
| 45 | + user.pubid = pubid |
| 46 | + |
| 47 | + |
| 48 | +def upgrade(): |
| 49 | + session = sessionmaker()(bind=op.get_bind()) |
| 50 | + |
| 51 | + users_query = ( |
| 52 | + session.query(User).filter(User.pubid.is_(None)).limit(USER_BATCH_LIMIT) |
| 53 | + ) |
| 54 | + |
| 55 | + count = 0 |
| 56 | + while users := users_query.all(): |
| 57 | + batch_count = len(users) |
| 58 | + for retries in range(1, USER_PUBID_RETRIES + 1): |
| 59 | + pubids = get_unique_pubids(batch_count, USER_PUBID_LENGTH) |
| 60 | + if len(pubids) == batch_count: |
| 61 | + break |
| 62 | + logger.warning("Failed to generate %d unique pubids, retrying %d/%d", batch_count, retries, USER_PUBID_RETRIES) |
| 63 | + else: |
| 64 | + raise RuntimeError(f"Failed to generate {batch_count} unique pubids") |
| 65 | + |
| 66 | + set_user_pubids(users, pubids) |
| 67 | + session.commit() |
| 68 | + count += batch_count |
| 69 | + logger.info("Back-filled %d user.pubid's", count) |
| 70 | + |
| 71 | + |
| 72 | +def downgrade(): |
| 73 | + session = sessionmaker()(bind=op.get_bind()) |
| 74 | + |
| 75 | + session.query(User).update({User.pubid: None}) |
| 76 | + session.commit() |
0 commit comments