From 46d5c36fedb7ef6f901a80531c9ffdc4dc3e83b2 Mon Sep 17 00:00:00 2001 From: Misha Tomilov Date: Tue, 14 Jan 2025 16:58:02 +0100 Subject: [PATCH] Add migration to backfill user.pubid column --- .../f32200e2e496_backfill_user_pubid.py | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 h/migrations/versions/f32200e2e496_backfill_user_pubid.py diff --git a/h/migrations/versions/f32200e2e496_backfill_user_pubid.py b/h/migrations/versions/f32200e2e496_backfill_user_pubid.py new file mode 100644 index 00000000000..76aeb7922d2 --- /dev/null +++ b/h/migrations/versions/f32200e2e496_backfill_user_pubid.py @@ -0,0 +1,90 @@ +"""Backfill user.pubid with unique values.""" + +import logging +import random + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.orm import declarative_base, sessionmaker + +revision = "f32200e2e496" +down_revision = "3bae642f2b36" + +logger = logging.getLogger(__name__) + +USER_PUBID_LENGTH = 12 +USER_BATCH_LIMIT = 1000 +USER_MAX_COUNT = 100_000 +USER_PUBID_RETRIES = 5 + + +Base = declarative_base() + + +class User(Base): + __tablename__ = "user" + id = sa.Column(sa.Integer, primary_key=True) + pubid = sa.Column(sa.String()) + + +def generate(length): + """ + Generate a random string of the specified length. + + This is the generate() function from h/pubid.py. + """ + alphabet = "123456789ABDEGJKLMNPQRVWXYZabdegijkmnopqrvwxyz" + return "".join(random.SystemRandom().choice(alphabet) for _ in range(length)) + + +def backfill_users( + session, + user_batch_limit, + user_max_count, + user_pubid_retries, + user_pubid_length, +): + user_query = ( + session.query(User).filter(User.pubid.is_(None)).limit(user_batch_limit) + ) + + count = 0 + while users := user_query.all(): + if count >= USER_MAX_COUNT: + logger.info("Reached maximum user count of %d", user_max_count) + break + batch_count = len(users) + for retries in range(user_pubid_retries): + try: + for user in users: + user.pubid = generate(user_pubid_length) + session.commit() + count += batch_count + break + except sa.exc.IntegrityError: + logger.warning( + "Failed to generate unique pubids, retrying %d/%d", + retries + 1, + USER_PUBID_RETRIES, + ) + session.rollback() + else: + raise RuntimeError(f"Failed to generate {batch_count} unique pubids") + + logger.info("Back-filled %d user.pubid's", count) + + +def upgrade(): + session = sessionmaker()(bind=op.get_bind()) + + backfill_users( + session, + user_batch_limit=USER_BATCH_LIMIT, + user_max_count=USER_MAX_COUNT, + user_pubid_retries=USER_PUBID_RETRIES, + user_pubid_length=USER_PUBID_LENGTH, + ) + + +def downgrade(): + pass