-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Old databases migrations #1038
base: master
Are you sure you want to change the base?
Old databases migrations #1038
Changes from 10 commits
8ae2ac9
1ed292a
e399f1f
b16f898
c45a524
fd387f8
3937866
1f62905
dcd9d35
d149519
e7e472e
f4f4039
dc73736
adffa56
fd07ac2
f362320
b176aa0
f3d7fb6
8e08386
4094bda
48944c6
e476954
0320953
df2bdbd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,20 +5,24 @@ | |
from collections import defaultdict | ||
import sys | ||
|
||
from django.core.management.base import NoArgsCommand | ||
from django.core.management import BaseCommand as NoArgsCommand | ||
from django.db import transaction | ||
from django.db.models import Q | ||
from django.utils.six.moves import input | ||
|
||
from trojsten.people.helpers import get_similar_users | ||
from trojsten.people.models import DuplicateUser, School, User, UserPropertyKey, UserProperty, Address | ||
from trojsten.people import constants | ||
from trojsten.schools.models import School | ||
from trojsten.people.models import DuplicateUser, User, UserPropertyKey, UserProperty, Address | ||
|
||
reload(sys) | ||
sys.setdefaultencoding("utf-8") | ||
|
||
|
||
class MigrateBaceCommand(NoArgsCommand): | ||
class MigrateBaseCommand(NoArgsCommand): | ||
help = 'Base class for importing people.' | ||
SCHOOLS_INF_FAST_RUN = 100 | ||
USER_IN_FAST_RUN = 100 | ||
|
||
def add_arguments(self, parser): | ||
parser.add_argument('--wet_run', | ||
|
@@ -30,61 +34,58 @@ def add_arguments(self, parser): | |
action='store_true', | ||
dest='fast', | ||
default=False, | ||
help='Create only a few users') | ||
help='Create only the first {} users and {} schools'.format( | ||
self.USER_IN_FAST_RUN, self.SCHOOLS_INF_FAST_RUN)) | ||
|
||
def handle_noargs(self, **options): | ||
def handle(self, **options): | ||
self.dry = options['dry'] | ||
self.fast = options['fast'] | ||
self.done_users = 0 | ||
self.done_schools = 0 | ||
if self.dry: | ||
self.stdout.write("Running dry run!") | ||
self.stderr.write("Running dry run!") | ||
|
||
self.verbosity = options['verbosity'] | ||
self.similar_users = [] | ||
self.school_id_map = {} | ||
self.last_contact = defaultdict(list) | ||
|
||
CSV_ID_KEY = "csv ID" | ||
self.CSV_ID_PROPERTY = self.process_property(CSV_ID_KEY, "(.{1,20}_)?\d+") | ||
MOBIL_KEY = "Mobil" | ||
self.MOBIL_PROPERTY = self.process_property(MOBIL_KEY, "\+?\d+\/?\d+") | ||
NICKNAME_KEY = "Prezyvka" | ||
self.NICKNAME_PROPERTY = self.process_property(NICKNAME_KEY, ".{1,30}") | ||
BIRTH_NAME_KEY = "Rodne Meno" | ||
self.BIRTH_NAME_PROPERTY = self.process_property(BIRTH_NAME_KEY, ".{1,30}") | ||
LAST_CONTACT_KEY = "Posledny kontakt" | ||
self.CSV_ID_PROPERTY = self.process_property( | ||
constants.CSV_ID_PROPERTY_KEY, "(.{1,20}_)?\d+") | ||
self.MOBIL_PROPERTY = self.process_property( | ||
constants.MOBIL_PROPERTY_KEY, "\+?\d+\/?\d+") | ||
self.NICKNAME_PROPERTY = self.process_property( | ||
constants.NICKNAME_PROPERTY_KEY, ".{1,30}") | ||
self.BIRTH_NAME_PROPERTY = self.process_property( | ||
constants.BIRTH_NAME_PROPERTY_KEY, ".{1,30}") | ||
# TODO fix False and stupid values | ||
self.LAST_CONTACT_PROPERTY = self.process_property(LAST_CONTACT_KEY, "\d\d\d\d") | ||
FKS_ID_KEY = "FKS ID" | ||
self.FKS_ID_PROPERTY = self.process_property(FKS_ID_KEY, "\d+") | ||
KMS_ID_KEY = "KMS ID" | ||
self.KMS_ID_PROPERTY = self.process_property(KMS_ID_KEY, "\d+") | ||
KMS_CAMPS_KEY = "KMS sustredenia" | ||
self.KMS_CAMPS_PROPERTY = self.process_property(KMS_CAMPS_KEY, "\d+") | ||
KASPAR_ID_KEY = "KSP ID" | ||
self.KASPAR_ID_PROPERTY = self.process_property(KASPAR_ID_KEY, "\d+") | ||
KASPAR_NOTE_KEY = "KSP note" | ||
self.KASPAR_NOTE_PROPERTY = self.process_property(KASPAR_NOTE_KEY, ".*") | ||
KSP_CAMPS_KEY = "KSP sustredenia" | ||
self.KSP_CAMPS_PROPERTY = self.process_property(KSP_CAMPS_KEY, "\d+") | ||
MEMORY_KEY = "Spomienky" | ||
self.MEMORY_PROPERTY = self.process_property(MEMORY_KEY, ".*") | ||
COMPANY_KEY = "Posobisko" | ||
self.COMPANY_PROPERTY = self.process_property(COMPANY_KEY, ".*") | ||
AFFILIATION_KEY = "Pozicia" | ||
self.AFFILIATION_PROPERTY = self.process_property(AFFILIATION_KEY, ".*") | ||
|
||
@transaction.atomic | ||
def process_address(self, street, town, postal_code, country): | ||
return Address.objects.create(street=street, town=town, postal_code=postal_code, country=country) | ||
self.LAST_CONTACT_PROPERTY = self.process_property( | ||
constants.LAST_CONTACT_PROPERTY_KEY, "\d\d\d\d") | ||
self.FKS_ID_PROPERTY = self.process_property( | ||
constants.FKS_ID_PROPERTY_KEY, "\d+") | ||
self.KMS_ID_PROPERTY = self.process_property( | ||
constants.KMS_ID_PROPERTY_KEY, "\d+") | ||
self.KMS_CAMPS_PROPERTY = self.process_property( | ||
constants.KMS_CAMPS_PROPERTY_KEY, "\d+") | ||
self.KASPAR_ID_PROPERTY = self.process_property( | ||
constants.KASPAR_ID_PROPERTY_KEY, "\d+") | ||
self.KASPAR_NOTE_PROPERTY = self.process_property( | ||
constants.KASPAR_NOTE_PROPERTY_KEY, ".*") | ||
self.KSP_CAMPS_PROPERTY = self.process_property( | ||
constants.KSP_CAMPS_PROPERTY_KEY, "\d+") | ||
self.MEMORY_PROPERTY = self.process_property( | ||
constants.MEMORY_PROPERTY_KEY, ".*") | ||
self.COMPANY_PROPERTY = self.process_property( | ||
constants.COMPANY_PROPERTY_KEY, ".*") | ||
self.AFFILIATION_PROPERTY = self.process_property( | ||
constants.AFFILIATION_PROPERTY_KEY, ".*") | ||
|
||
@transaction.atomic | ||
def process_school(self, old_id, abbr, name, addr_name, street, | ||
city, zip_code): | ||
|
||
self.done_schools += 1 | ||
if self.fast and self.done_schools > 100: | ||
if self.fast and self.done_schools > self.SCHOOLS_INF_FAST_RUN: | ||
return None | ||
# TODO improve this, do not work with abbreviations | ||
if not abbr: | ||
|
@@ -98,19 +99,19 @@ def process_school(self, old_id, abbr, name, addr_name, street, | |
row = (abbr, name, addr_name, street, city, self.fix_string(zip_code)) | ||
if len(candidates) == 1: | ||
if self.verbosity >= 2: | ||
self.stdout.write("Matched %r to %s" % (row, | ||
self.stderr.write("Matched %r to %s" % (row, | ||
candidates[0])) | ||
self.school_id_map[old_id] = candidates[0] | ||
elif len(candidates) > 1: | ||
self.stdout.write("Multiple candidates for %r:\n%s" % ( | ||
self.stderr.write("Multiple candidates for %r:\n%s" % ( | ||
row, | ||
"\n".join("%02d: %s" % (i, candidate) | ||
for i, candidate in enumerate(candidates)) | ||
)) | ||
try: | ||
choice = int(input("Choice (empty or invalid to create new): ")) | ||
self.school_id_map[old_id] = candidates[choice] | ||
except (ValueError, KeyError): | ||
except (KeyError): | ||
self.school_id_map[old_id] = self.create_school(*row) | ||
else: | ||
self.school_id_map[old_id] = self.create_school(*row) | ||
|
@@ -120,7 +121,8 @@ def create_school(self, abbr, name, addr_name, street, | |
abbr += '?' # Question mark denotes schools needing review. | ||
school = None | ||
if len(zip_code) > 10: | ||
# Swiss zip codes | ||
# Swiss zip codes are longer than 10 chars, but our db model does not allow | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nechceme zmenit model aby ich podporoval? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ake dlhe su svajciarske PSC? Podla wikipedia a google map 4 miesta. |
||
# them so we skip them. | ||
zip_code = 0 | ||
|
||
if self.dry: | ||
|
@@ -138,25 +140,30 @@ def create_school(self, abbr, name, addr_name, street, | |
city=city, | ||
zip_code=zip_code) | ||
if self.verbosity >= 2: | ||
self.stdout.write("Created new school %s" % school) | ||
self.stderr.write("Created new school %s" % school) | ||
return school | ||
|
||
@transaction.atomic | ||
def process_person(self, user_args, user_properties, old_user_id_field, old_user_id, address=None): | ||
def process_person(self, | ||
user_args, | ||
user_properties, | ||
old_user_id_field, | ||
old_user_id, | ||
address=None): | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Skus prvy riadok docstringu mat ako strucny popis metody (hned za """). Args: moze byt kludne odsadene rovnako ako """. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
Args: | ||
user_args (dict): will be used for user constructor as is. Except for school_id. | ||
user_properties (list(tuple(UserPropertyKey, string))): | ||
will create additional user properties | ||
old_user_id_field (UserPropertyKey): old field that contained oser id | ||
(kaspar_id/ kms id ...), used for faster deduplication. | ||
old_user_id (int/string): old id | ||
user_args can have | ||
first_name, last_name, graduation, email, birth_date, school_id | ||
Args: | ||
user_args (dict): will be used for user constructor as is. Except for school_id. | ||
user_properties (list(tuple(UserPropertyKey, string))): | ||
will create additional user properties | ||
old_user_id_field (UserPropertyKey): old field that contained oser id | ||
(kaspar_id/ kms id ...), used for faster deduplication. | ||
old_user_id (int/string): old id | ||
user_args can have | ||
first_name, last_name, graduation, email, birth_date, school_id | ||
""" | ||
# If the user already exists in our database, skip. | ||
# If we run in the fast mode and we already processed enough users, we skip this one. | ||
self.done_users += 1 | ||
if self.fast and self.done_users > 100: | ||
if self.fast and self.done_users > self.USER_IN_FAST_RUN: | ||
mhozza marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return None | ||
|
||
old_id_property = None | ||
|
@@ -169,11 +176,10 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user | |
last_name = user_args['last_name'] | ||
if old_id_property.exists(): | ||
if self.verbosity >= 2: | ||
self.stdout.write("Skipping user %s %s" % (first_name, | ||
self.stderr.write("Skipping user %s %s" % (first_name, | ||
last_name)) | ||
return None | ||
|
||
# The username needs to be unique, thus the ID. | ||
user_args['is_active'] = False | ||
|
||
if 'school_id' in user_args: | ||
|
@@ -182,19 +188,16 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user | |
user_args['school'] = self.school_id_map.get(school_id) | ||
|
||
if self.verbosity >= 2: | ||
self.stdout.write("Creating user %s %s" % (first_name, last_name)) | ||
self.stderr.write("Creating user %s %s" % (first_name, last_name)) | ||
|
||
new_user = None | ||
if self.dry: | ||
new_user = User(**user_args) | ||
else: | ||
addr = None | ||
if address: | ||
addr = self.process_address(address['street'], | ||
address['town'], | ||
address['postal_code'], | ||
address['country']) | ||
user_args['home_address'] = addr | ||
user_args['home_address'] = Address.objects.create( | ||
street=address['street'], town=address['town'], | ||
postal_code=address['postal_code'], country=address['country']) | ||
|
||
new_user = User.objects.create(**user_args) | ||
|
||
|
@@ -207,28 +210,26 @@ def process_person(self, user_args, user_properties, old_user_id_field, old_user | |
if valid_contacts: | ||
user_properties.append([self.LAST_CONTACT_PROPERTY, max(valid_contacts)]) | ||
|
||
user_properties = list(filter(lambda x: x, user_properties)) | ||
user_properties = [prop for prop in user_properties if prop is not None] | ||
for key, value in user_properties: | ||
new_user.properties.create(key=key, value=value) | ||
|
||
similar_users = get_similar_users(new_user) | ||
if len(similar_users): | ||
names_of_similar = [(x.first_name, x.last_name) for x in similar_users] | ||
names_of_similar = [(user.first_name, user.last_name) for user in similar_users] | ||
self.similar_users.append(((first_name, last_name), names_of_similar)) | ||
if self.verbosity >= 2: | ||
self.stdout.write('Similar users: %s' % str(names_of_similar)) | ||
if self.dry: | ||
pass | ||
else: | ||
self.stderr.write('Similar users: %s' % str(names_of_similar)) | ||
if not self.dry: | ||
DuplicateUser.objects.create(user=new_user) | ||
|
||
return new_user | ||
|
||
def print_stats(self): | ||
for conflict in self.similar_users: | ||
self.stdout.write("Conflicts: %s" % str(conflict)) | ||
self.stderr.write("Conflicts: %s" % str(conflict)) | ||
|
||
self.stdout.write("Conflict users: %d" % len(self.similar_users)) | ||
self.stderr.write("Conflict users: %d" % len(self.similar_users)) | ||
|
||
def parse_dot_date(self, date_string): | ||
# Remove any whitespace inside the string. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tato logika si zasluzi viac komentaru. No idea o co sa toto snazi.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok