diff --git a/h/migrations/versions/f32200e2e496_backfill_user_pubid.py b/h/migrations/versions/f32200e2e496_backfill_user_pubid.py new file mode 100644 index 00000000000..2d74d447ced --- /dev/null +++ b/h/migrations/versions/f32200e2e496_backfill_user_pubid.py @@ -0,0 +1,76 @@ +"""Backfill user.pubid with unique value.""" + +import logging +import random + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.orm import declarative_base, sessionmaker + +revision = "f32200e2e496" +down_revision = "3bae642f2b36" + +logger = logging.getLogger(__name__) + +USER_PUBID_LENGTH = 12 +USER_BATCH_LIMIT = 1000 +USER_PUBID_RETRIES = 5 + + +Base = declarative_base() + + +class User(Base): + __tablename__ = "user" + id = sa.Column(sa.Integer, primary_key=True) + pubid = sa.Column(sa.String()) + + +def generate(length): + """ + Generate a random string of the specified length. + + This is the generate() function from h/pubid.py. + """ + alphabet = "123456789ABDEGJKLMNPQRVWXYZabdegijkmnopqrvwxyz" + return "".join(random.SystemRandom().choice(alphabet) for _ in range(length)) + + +def get_unique_pubids(count, length): + return {generate(length) for _ in range(count)} + + +def set_user_pubids(users, pubids): + for user, pubid in zip(users, pubids): + user.pubid = pubid + + +def upgrade(): + session = sessionmaker()(bind=op.get_bind()) + + users_query = ( + session.query(User).filter(User.pubid.is_(None)).limit(USER_BATCH_LIMIT) + ) + + count = 0 + while users := users_query.all(): + batch_count = len(users) + for _ in range(USER_PUBID_RETRIES): + pubids = get_unique_pubids(batch_count, USER_PUBID_LENGTH) + if len(pubids) == batch_count: + break + logger.warning("Failed to generate %d unique pubids, retrying", batch_count) + else: + raise RuntimeError(f"Failed to generate {batch_count} unique pubids") + + set_user_pubids(users, pubids) + session.commit() + count += batch_count + logger.info("Back-filled %d user.pubid's", count) + + +def downgrade(): + session = sessionmaker()(bind=op.get_bind()) + + session.query(User).update({User.pubid: None}) + session.commit()