From 7d4729d05c1011bb25748d350aae1cd6844ad521 Mon Sep 17 00:00:00 2001 From: John Vandenberg <jayvdb@gmail.com> Date: Thu, 14 Dec 2017 23:27:56 +0700 Subject: [PATCH] Cleanse GCI data Before the Google Code-in data can be stored in the repository, the data of students who are just starting needs to be removed, the status types need to be simplified to reduce side channels, the unpublished tasks need to be removed, and task mentor list needs to be removed. Related to https://github.com/coala/community/issues/3 --- .ci/build.sh | 1 + gci/config.py | 2 +- .../commands/cleanse_gci_task_data.py | 43 ++++++++++++++ gci/students.py | 56 +++++++++++++++---- 4 files changed, 91 insertions(+), 11 deletions(-) create mode 100644 gci/management/commands/cleanse_gci_task_data.py diff --git a/.ci/build.sh b/.ci/build.sh index 4602b76f..dd5081b9 100755 --- a/.ci/build.sh +++ b/.ci/build.sh @@ -5,6 +5,7 @@ set -e -x mkdir private _site public python manage.py fetch_gci_task_data private +python manage.py cleanse_gci_task_data private public python manage.py collectstatic --noinput python manage.py distill-local public --force diff --git a/gci/config.py b/gci/config.py index e3c8d0ec..9d8427c8 100644 --- a/gci/config.py +++ b/gci/config.py @@ -5,7 +5,7 @@ GCI_DATA_DIR = os.path.join( os.path.dirname(__file__), '..', - 'private' + 'public' ) diff --git a/gci/management/commands/cleanse_gci_task_data.py b/gci/management/commands/cleanse_gci_task_data.py new file mode 100644 index 00000000..1b2ab240 --- /dev/null +++ b/gci/management/commands/cleanse_gci_task_data.py @@ -0,0 +1,43 @@ +from collections import OrderedDict +import os.path + +from ruamel.yaml import YAML + +from django.core.management.base import BaseCommand + +from gci.students import ( + _get_instances, + _get_tasks, + cleanse_instances, + cleanse_tasks, +) + + +class Command(BaseCommand): + args = '' + help = 'Cleanse GCI data' + + def add_arguments(self, parser): + parser.add_argument('input_dir', nargs='?', type=str) + parser.add_argument('output_dir', nargs='?', type=str) + + def handle(self, *args, **options): + input_dir = options.get('input_dir') + output_dir = options.get('output_dir') + + yaml = YAML() + + with open(os.path.join(input_dir, 'tasks.yaml'), 'r') as f: + tasks = yaml.load(f) + + with open(os.path.join(input_dir, 'instances.yaml'), 'r') as f: + instances = yaml.load(f) + + tasks = cleanse_tasks(tasks) + instances = cleanse_instances(instances) + + with open(os.path.join(output_dir, 'tasks.yaml'), 'w') as f: + yaml.dump(tasks, f) + + with open(os.path.join(output_dir, 'instances.yaml'), 'w') as f: + yaml.dump(instances, f) diff --git a/gci/students.py b/gci/students.py index f2807c32..518fb1fc 100644 --- a/gci/students.py +++ b/gci/students.py @@ -7,6 +7,18 @@ from .gitorg import get_issue +PRIVATE_INSTANCE_STATUSES = ( + 'ABANDONED', + 'OUT_OF_TIME', + 'PENDING_PARENTAL_CONSENT', + 'UNASSIGNED_BY_MENTOR', +) + +PRIVATE_INSTANCE_ATTRIBUTES = ( + 'modified', + 'deadline', +) + _client = None _org = {} _tasks = {} @@ -78,6 +90,37 @@ def get_instances(): return _instances +def cleanse_tasks(tasks): + cleansed_tasks = dict( + (task_id, task) + for task_id, task + in tasks.items() + if task['status'] == 2 + ) + + for task in cleansed_tasks.values(): + del task['mentors'] + + return cleansed_tasks + + +def cleanse_instances(instances): + cleansed_instances = dict( + (instance_id, instance) + for instance_id, instance + in instances.items() + if instance['status'] not in PRIVATE_INSTANCE_STATUSES + ) + + for instance in cleansed_instances.values(): + if instance['status'] != 'COMPLETED': + instance['status'] = 'CLAIMED' + for key in PRIVATE_INSTANCE_ATTRIBUTES: + del instance[key] + + return cleansed_instances + + def get_students(): students = {} for _, instance in get_instances().items(): @@ -99,17 +142,8 @@ def get_students(): student['instances'].append(instance) -def get_effective_students(students): - for student in list(students): - instances = student['instances'] - instances = [instance for instance in instances - if instance['status'] != 'ABANDONED'] - if instances: - yield student - - def get_issue_related_students(students): - for student in list(get_effective_students(students)): + for student in list(get_students(students)): instances = student['instances'] for instance in instances: task = get_task(instance['task_definition_id']) @@ -142,5 +176,7 @@ def get_linked_students(students): (task_id, url, ', '.join(issue.assignees))) else: student['username'] = issue.assignees[0] + print('student %s is %s because of %s' % + (student['id'], issue.assignees[0], url)) yield student break