Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Old databases migrations #1038

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
8ae2ac9
Revised kaspar migrating script.
vlejd Mar 22, 2017
1ed292a
kms migration skript + improved mihration structure
vlejd Mar 23, 2017
e399f1f
Good enough migration scripts.
vlejd Mar 26, 2017
b16f898
Whole pipeline is working on wet run.
vlejd Mar 31, 2017
c45a524
pep-8 fixes
vlejd Mar 31, 2017
fd387f8
Fixed lint errors and added some options
vlejd Apr 1, 2017
3937866
Minor parameter changes.
vlejd Apr 15, 2017
1f62905
Revised kaspar migrating script.
vlejd Mar 22, 2017
dcd9d35
kms migration skript + improved mihration structure
vlejd Mar 23, 2017
d149519
Good enough migration scripts.
vlejd Mar 26, 2017
e7e472e
Whole pipeline is working on wet run.
vlejd Mar 31, 2017
f4f4039
pep-8 fixes
vlejd Mar 31, 2017
dc73736
Fixed lint errors and added some options
vlejd Apr 1, 2017
adffa56
Minor parameter changes.
vlejd Apr 15, 2017
fd07ac2
Merge branch 'kaspar-migration' of github.com:trojsten/web into kaspa…
vlejd Oct 22, 2017
f362320
Merge branch 'master' into kaspar-migration
vlejd Oct 30, 2017
b176aa0
Review fixes part 1.
vlejd Oct 30, 2017
f3d7fb6
Fix typo.
mhozza Jul 9, 2019
8e08386
Merge branch 'master' of github.com:trojsten/web into kaspar-migration
mhozza Jul 9, 2019
4094bda
black and isort.
mhozza Jul 9, 2019
48944c6
Fix some review comments.
mhozza Jul 9, 2019
e476954
Use raw string literals for regexps.
mhozza Jul 9, 2019
0320953
Use raw string literals for strings containing regexps and ignore var…
mhozza Jul 9, 2019
df2bdbd
Merge branch 'master' into kaspar-migration
mhozza Jul 10, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions trojsten/people/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,23 @@
OTHER_SCHOOL_ID = 1

DEENVELOPING_NOT_REVIEWED_SYMBOL = '*'

# User properties
# User id in the old fks database
FKS_ID_PROPERTY_KEY = "FKS ID"
# User id in the old kms database
KMS_ID_PROPERTY_KEY = "KMS ID"
# User id in the old ksp database
KASPAR_ID_PROPERTY_KEY = "KSP ID"
# User id in the csv file
CSV_ID_PROPERTY_KEY = "csv ID"
MOBIL_PROPERTY_KEY = "Mobil"
NICKNAME_PROPERTY_KEY = "Prezyvka"
BIRTH_NAME_PROPERTY_KEY = "Rodne Meno"
LAST_CONTACT_PROPERTY_KEY = "Posledny kontakt"
KMS_CAMPS_PROPERTY_KEY = "KMS sustredenia"
KASPAR_NOTE_PROPERTY_KEY = "KSP note"
KSP_CAMPS_PROPERTY_KEY = "KSP sustredenia"
MEMORY_PROPERTY_KEY = "Spomienky"
COMPANY_PROPERTY_KEY = "Posobisko"
AFFILIATION_PROPERTY_KEY = "Pozicia"
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,18 @@

import csv

from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand
from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand


class Command(MigrateBaceCommand):
class Command(MigrateBaseCommand):
help = 'Imports people and their related info from fks_csv.'

def add_arguments(self, parser):
super(Command, self).add_arguments(parser)
parser.add_argument('file', type=str)

def handle_noargs(self, **options):
super(Command, self).handle_noargs(**options)
def handle(self, **options):
super(Command, self).handle(**options)
participants_file = options['file']

participants = csv.DictReader(open(participants_file))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,18 @@

import csv

from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand
from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand


class Command(MigrateBaceCommand):
class Command(MigrateBaseCommand):
help = 'Imports people and their related info from fks_csv.'

def add_arguments(self, parser):
super(Command, self).add_arguments(parser)
parser.add_argument('file', type=str)

def handle_noargs(self, **options):
super(Command, self).handle_noargs(**options)
def handle(self, **options):
super(Command, self).handle(**options)
participants_file = options['file']

participants = csv.DictReader(open(participants_file))
Expand Down
147 changes: 74 additions & 73 deletions trojsten/people/management/commands/migrate_base_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,24 @@
from collections import defaultdict
import sys

from django.core.management.base import NoArgsCommand
from django.core.management import BaseCommand as NoArgsCommand
from django.db import transaction
from django.db.models import Q
from django.utils.six.moves import input

from trojsten.people.helpers import get_similar_users
from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey, UserProperty, Address
from trojsten.people import constants
from trojsten.schools.models import School
from trojsten.people.models import DuplicateUser, User, UserPropertyKey, UserProperty, Address

reload(sys)
sys.setdefaultencoding("utf-8")


class MigrateBaceCommand(NoArgsCommand):
class MigrateBaseCommand(NoArgsCommand):
help = 'Base class for importing people.'
SCHOOLS_INF_FAST_RUN = 100
USER_IN_FAST_RUN = 100

def add_arguments(self, parser):
parser.add_argument('--wet_run',
Expand All @@ -30,61 +34,58 @@ def add_arguments(self, parser):
action='store_true',
dest='fast',
default=False,
help='Create only a few users')
help='Create only the first {} users and {} schools'.format(
self.USER_IN_FAST_RUN, self.SCHOOLS_INF_FAST_RUN))

def handle_noargs(self, **options):
def handle(self, **options):
self.dry = options['dry']
self.fast = options['fast']
self.done_users = 0
self.done_schools = 0
if self.dry:
self.stdout.write("Running dry run!")
self.stderr.write("Running dry run!")

self.verbosity = options['verbosity']
self.similar_users = []
self.school_id_map = {}
self.last_contact = defaultdict(list)

CSV_ID_KEY = "csv ID"
self.CSV_ID_PROPERTY = self.process_property(CSV_ID_KEY, "(.{1,20}_)?\d+")
MOBIL_KEY = "Mobil"
self.MOBIL_PROPERTY = self.process_property(MOBIL_KEY, "\+?\d+\/?\d+")
NICKNAME_KEY = "Prezyvka"
self.NICKNAME_PROPERTY = self.process_property(NICKNAME_KEY, ".{1,30}")
BIRTH_NAME_KEY = "Rodne Meno"
self.BIRTH_NAME_PROPERTY = self.process_property(BIRTH_NAME_KEY, ".{1,30}")
LAST_CONTACT_KEY = "Posledny kontakt"
self.CSV_ID_PROPERTY = self.process_property(
constants.CSV_ID_PROPERTY_KEY, "(.{1,20}_)?\d+")
self.MOBIL_PROPERTY = self.process_property(
constants.MOBIL_PROPERTY_KEY, "\+?\d+\/?\d+")
self.NICKNAME_PROPERTY = self.process_property(
constants.NICKNAME_PROPERTY_KEY, ".{1,30}")
self.BIRTH_NAME_PROPERTY = self.process_property(
constants.BIRTH_NAME_PROPERTY_KEY, ".{1,30}")
# TODO fix False and stupid values
self.LAST_CONTACT_PROPERTY = self.process_property(LAST_CONTACT_KEY, "\d\d\d\d")
FKS_ID_KEY = "FKS ID"
self.FKS_ID_PROPERTY = self.process_property(FKS_ID_KEY, "\d+")
KMS_ID_KEY = "KMS ID"
self.KMS_ID_PROPERTY = self.process_property(KMS_ID_KEY, "\d+")
KMS_CAMPS_KEY = "KMS sustredenia"
self.KMS_CAMPS_PROPERTY = self.process_property(KMS_CAMPS_KEY, "\d+")
KASPAR_ID_KEY = "KSP ID"
self.KASPAR_ID_PROPERTY = self.process_property(KASPAR_ID_KEY, "\d+")
KASPAR_NOTE_KEY = "KSP note"
self.KASPAR_NOTE_PROPERTY = self.process_property(KASPAR_NOTE_KEY, ".*")
KSP_CAMPS_KEY = "KSP sustredenia"
self.KSP_CAMPS_PROPERTY = self.process_property(KSP_CAMPS_KEY, "\d+")
MEMORY_KEY = "Spomienky"
self.MEMORY_PROPERTY = self.process_property(MEMORY_KEY, ".*")
COMPANY_KEY = "Posobisko"
self.COMPANY_PROPERTY = self.process_property(COMPANY_KEY, ".*")
AFFILIATION_KEY = "Pozicia"
self.AFFILIATION_PROPERTY = self.process_property(AFFILIATION_KEY, ".*")

@transaction.atomic
def process_address(self, street, town, postal_code, country):
return Address.objects.create(street=street, town=town, postal_code=postal_code, country=country)
self.LAST_CONTACT_PROPERTY = self.process_property(
constants.LAST_CONTACT_PROPERTY_KEY, "\d\d\d\d")
self.FKS_ID_PROPERTY = self.process_property(
constants.FKS_ID_PROPERTY_KEY, "\d+")
self.KMS_ID_PROPERTY = self.process_property(
constants.KMS_ID_PROPERTY_KEY, "\d+")
self.KMS_CAMPS_PROPERTY = self.process_property(
constants.KMS_CAMPS_PROPERTY_KEY, "\d+")
self.KASPAR_ID_PROPERTY = self.process_property(
constants.KASPAR_ID_PROPERTY_KEY, "\d+")
self.KASPAR_NOTE_PROPERTY = self.process_property(
constants.KASPAR_NOTE_PROPERTY_KEY, ".*")
self.KSP_CAMPS_PROPERTY = self.process_property(
constants.KSP_CAMPS_PROPERTY_KEY, "\d+")
self.MEMORY_PROPERTY = self.process_property(
constants.MEMORY_PROPERTY_KEY, ".*")
self.COMPANY_PROPERTY = self.process_property(
constants.COMPANY_PROPERTY_KEY, ".*")
self.AFFILIATION_PROPERTY = self.process_property(
constants.AFFILIATION_PROPERTY_KEY, ".*")

@transaction.atomic
def process_school(self, old_id, abbr, name, addr_name, street,
city, zip_code):

self.done_schools += 1
if self.fast and self.done_schools > 100:
if self.fast and self.done_schools > self.SCHOOLS_INF_FAST_RUN:
return None
# TODO improve this, do not work with abbreviations
if not abbr:
Expand All @@ -98,19 +99,19 @@ def process_school(self, old_id, abbr, name, addr_name, street,
row = (abbr, name, addr_name, street, city, self.fix_string(zip_code))
if len(candidates) == 1:
if self.verbosity >= 2:
self.stdout.write("Matched %r to %s" % (row,
self.stderr.write("Matched %r to %s" % (row,
candidates[0]))
self.school_id_map[old_id] = candidates[0]
elif len(candidates) > 1:
self.stdout.write("Multiple candidates for %r:\n%s" % (
self.stderr.write("Multiple candidates for %r:\n%s" % (
row,
"\n".join("%02d: %s" % (i, candidate)
for i, candidate in enumerate(candidates))
))
try:
choice = int(input("Choice (empty or invalid to create new): "))
self.school_id_map[old_id] = candidates[choice]
except (ValueError, KeyError):
except (KeyError):
self.school_id_map[old_id] = self.create_school(*row)
else:
self.school_id_map[old_id] = self.create_school(*row)
Expand All @@ -120,7 +121,8 @@ def create_school(self, abbr, name, addr_name, street,
abbr += '?' # Question mark denotes schools needing review.
school = None
if len(zip_code) > 10:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tato logika si zasluzi viac komentaru. No idea o co sa toto snazi.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

# Swiss zip codes
# Swiss zip codes are longer than 10 chars, but our db model does not allow
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nechceme zmenit model aby ich podporoval?
Ak sa nam nechce teraz, mozes sem pridat TODO

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ake dlhe su svajciarske PSC? Podla wikipedia a google map 4 miesta.

# them so we skip them.
zip_code = 0

if self.dry:
Expand All @@ -138,25 +140,30 @@ def create_school(self, abbr, name, addr_name, street,
city=city,
zip_code=zip_code)
if self.verbosity >= 2:
self.stdout.write("Created new school %s" % school)
self.stderr.write("Created new school %s" % school)
return school

@transaction.atomic
def process_person(self, user_args, user_properties, old_user_id_field, old_user_id, address=None):
def process_person(self,
user_args,
user_properties,
old_user_id_field,
old_user_id,
address=None):
"""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Skus prvy riadok docstringu mat ako strucny popis metody (hned za """).

Args: moze byt kludne odsadene rovnako ako """.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

Args:
user_args (dict): will be used for user constructor as is. Except for school_id.
user_properties (list(tuple(UserPropertyKey, string))):
will create additional user properties
old_user_id_field (UserPropertyKey): old field that contained oser id
(kaspar_id/ kms id ...), used for faster deduplication.
old_user_id (int/string): old id
user_args can have
first_name, last_name, graduation, email, birth_date, school_id
Args:
user_args (dict): will be used for user constructor as is. Except for school_id.
user_properties (list(tuple(UserPropertyKey, string))):
will create additional user properties
old_user_id_field (UserPropertyKey): old field that contained oser id
(kaspar_id/ kms id ...), used for faster deduplication.
old_user_id (int/string): old id
user_args can have
first_name, last_name, graduation, email, birth_date, school_id
"""
# If the user already exists in our database, skip.
# If we run in the fast mode and we already processed enough users, we skip this one.
self.done_users += 1
if self.fast and self.done_users > 100:
if self.fast and self.done_users > self.USER_IN_FAST_RUN:
return None

old_id_property = None
Expand All @@ -169,11 +176,10 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user
last_name = user_args['last_name']
if old_id_property.exists():
if self.verbosity >= 2:
self.stdout.write("Skipping user %s %s" % (first_name,
self.stderr.write("Skipping user %s %s" % (first_name,
last_name))
return None

# The username needs to be unique, thus the ID.
user_args['is_active'] = False

if 'school_id' in user_args:
Expand All @@ -182,19 +188,16 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user
user_args['school'] = self.school_id_map.get(school_id)

if self.verbosity >= 2:
self.stdout.write("Creating user %s %s" % (first_name, last_name))
self.stderr.write("Creating user %s %s" % (first_name, last_name))

new_user = None
if self.dry:
new_user = User(**user_args)
else:
addr = None
if address:
addr = self.process_address(address['street'],
address['town'],
address['postal_code'],
address['country'])
user_args['home_address'] = addr
user_args['home_address'] = Address.objects.create(
street=address['street'], town=address['town'],
postal_code=address['postal_code'], country=address['country'])

new_user = User.objects.create(**user_args)

Expand All @@ -207,28 +210,26 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user
if valid_contacts:
user_properties.append([self.LAST_CONTACT_PROPERTY, max(valid_contacts)])

user_properties = list(filter(lambda x: x, user_properties))
user_properties = [prop for prop in user_properties if prop is not None]
for key, value in user_properties:
new_user.properties.create(key=key, value=value)

similar_users = get_similar_users(new_user)
if len(similar_users):
names_of_similar = [(x.first_name, x.last_name) for x in similar_users]
names_of_similar = [(user.first_name, user.last_name) for user in similar_users]
self.similar_users.append(((first_name, last_name), names_of_similar))
if self.verbosity >= 2:
self.stdout.write('Similar users: %s' % str(names_of_similar))
if self.dry:
pass
else:
self.stderr.write('Similar users: %s' % str(names_of_similar))
if not self.dry:
DuplicateUser.objects.create(user=new_user)

return new_user

def print_stats(self):
for conflict in self.similar_users:
self.stdout.write("Conflicts: %s" % str(conflict))
self.stderr.write("Conflicts: %s" % str(conflict))

self.stdout.write("Conflict users: %d" % len(self.similar_users))
self.stderr.write("Conflict users: %d" % len(self.similar_users))

def parse_dot_date(self, date_string):
# Remove any whitespace inside the string.
Expand Down
8 changes: 4 additions & 4 deletions trojsten/people/management/commands/migrate_fks_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import csv
import os

from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand
from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand


"""
Expand All @@ -25,16 +25,16 @@
# TODO vvysledkovky


class Command(MigrateBaceCommand):
class Command(MigrateBaseCommand):
help = 'Imports people and their related info from fks_csv.'

def add_arguments(self, parser):
super(Command, self).add_arguments(parser)
parser.add_argument('csv_directory', type=str,
help="Directory containing all csv files.")

def handle_noargs(self, **options):
super(Command, self).handle_noargs(**options)
def handle(self, **options):
super(Command, self).handle(**options)
base = options['csv_directory']

addresses_file = os.path.join(base, "adresa.csv")
Expand Down
8 changes: 4 additions & 4 deletions trojsten/people/management/commands/migrate_kms_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from collections import defaultdict
import os

from trojsten.people.management.commands.migrate_base_class import MigrateBaceCommand
from trojsten.people.management.commands.migrate_base_class import MigrateBaseCommand

"""
Restore the mysql database dump and run (replace <passwd> and <user>)
Expand All @@ -18,16 +18,16 @@
"""


class Command(MigrateBaceCommand):
class Command(MigrateBaseCommand):
help = 'Imports people and their related info from kms_csv.'

def add_arguments(self, parser):
super(Command, self).add_arguments(parser)
parser.add_argument('csv_directory', type=str,
help="Directory containing all csv files.")

def handle_noargs(self, **options):
super(Command, self).handle_noargs(**options)
def handle(self, **options):
super(Command, self).handle(**options)
base = options['csv_directory']
participants_file = os.path.join(base, "riesitelia.csv")
participants = csv.DictReader(open(participants_file))
Expand Down
Loading