Skip to content

Commit 41a20fa

Browse files
committed
Cleanse GCI data
Before the Google Code-in data can be stored in the repository, the data of students who are just starting needs to be removed, the status types need to be simplified to reduce side channels, the unpublished tasks need to be removed, and task mentor list needs to be removed. Related to coala#3
1 parent 070c2a3 commit 41a20fa

File tree

4 files changed

+94
-14
lines changed

4 files changed

+94
-14
lines changed

.ci/build.sh

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ set -e -x
55
mkdir private _site public
66

77
python manage.py fetch_gci_task_data private
8+
python manage.py cleanse_gci_task_data private _site
89

910
python manage.py collectstatic --noinput
1011
python manage.py distill-local public --force

gci/config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
GCI_DATA_DIR = os.path.join(
77
os.path.dirname(__file__), '..',
8-
'private',
8+
'_site',
99
)
1010

1111

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from collections import OrderedDict
2+
import os.path
3+
4+
from ruamel.yaml import YAML
5+
6+
from django.core.management.base import BaseCommand
7+
8+
from gci.students import (
9+
_get_instances,
10+
_get_tasks,
11+
cleanse_instances,
12+
cleanse_tasks,
13+
)
14+
15+
16+
class Command(BaseCommand):
17+
args = ''
18+
help = 'Cleanse GCI data'
19+
20+
def add_arguments(self, parser):
21+
parser.add_argument('input_dir', nargs='?', type=str)
22+
parser.add_argument('output_dir', nargs='?', type=str)
23+
24+
def handle(self, *args, **options):
25+
input_dir = options.get('input_dir')
26+
output_dir = options.get('output_dir')
27+
28+
yaml = YAML()
29+
30+
with open(os.path.join(input_dir, 'tasks.yaml'), 'r') as f:
31+
tasks = yaml.load(f)
32+
33+
with open(os.path.join(input_dir, 'instances.yaml'), 'r') as f:
34+
instances = yaml.load(f)
35+
36+
tasks = cleanse_tasks(tasks)
37+
instances = cleanse_instances(instances)
38+
39+
with open(os.path.join(output_dir, 'tasks.yaml'), 'w') as f:
40+
yaml.dump(tasks, f)
41+
42+
with open(os.path.join(output_dir, 'instances.yaml'), 'w') as f:
43+
yaml.dump(instances, f)

gci/students.py

+49-13
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,18 @@
77
from .gitorg import get_issue
88

99

10+
PRIVATE_INSTANCE_STATUSES = (
11+
'ABANDONED',
12+
'OUT_OF_TIME',
13+
'PENDING_PARENTAL_CONSENT',
14+
'UNASSIGNED_BY_MENTOR',
15+
)
16+
17+
PRIVATE_INSTANCE_ATTRIBUTES = (
18+
'modified',
19+
'deadline',
20+
)
21+
1022
_client = None
1123
_org = {}
1224
_tasks = {}
@@ -78,6 +90,37 @@ def get_instances():
7890
return _instances
7991

8092

93+
def cleanse_tasks(tasks):
94+
cleansed_tasks = dict(
95+
(task_id, task)
96+
for task_id, task
97+
in tasks.items()
98+
if task['status'] == 2
99+
)
100+
101+
for task in cleansed_tasks.values():
102+
del task['mentors']
103+
104+
return cleansed_tasks
105+
106+
107+
def cleanse_instances(instances):
108+
cleansed_instances = dict(
109+
(instance_id, instance)
110+
for instance_id, instance
111+
in instances.items()
112+
if instance['status'] not in PRIVATE_INSTANCE_STATUSES
113+
)
114+
115+
for instance in cleansed_instances.values():
116+
if instance['status'] != 'COMPLETED':
117+
instance['status'] = 'CLAIMED'
118+
for key in PRIVATE_INSTANCE_ATTRIBUTES:
119+
del instance[key]
120+
121+
return cleansed_instances
122+
123+
81124
def get_students():
82125
students = {}
83126
for _, instance in get_instances().items():
@@ -99,17 +142,8 @@ def get_students():
99142
student['instances'].append(instance)
100143

101144

102-
def get_effective_students(students):
103-
for student in list(students):
104-
instances = student['instances']
105-
instances = [instance for instance in instances
106-
if instance['status'] != 'ABANDONED']
107-
if instances:
108-
yield student
109-
110-
111-
def get_issue_related_students(students):
112-
for student in list(get_effective_students(students)):
145+
def get_issue_related_students():
146+
for student in list(get_students()):
113147
instances = student['instances']
114148
for instance in instances:
115149
task = get_task(instance['task_definition_id'])
@@ -119,8 +153,8 @@ def get_issue_related_students(students):
119153
break
120154

121155

122-
def get_linked_students(students):
123-
for student in list(get_issue_related_students(students)):
156+
def get_linked_students():
157+
for student in list(get_issue_related_students()):
124158
instances = student['instances']
125159
for instance in instances:
126160
task = get_task(instance['task_definition_id'])
@@ -142,5 +176,7 @@ def get_linked_students(students):
142176
(task_id, url, ', '.join(issue.assignees)))
143177
else:
144178
student['username'] = issue.assignees[0]
179+
print('student %s is %s because of %s' %
180+
(student['id'], issue.assignees[0], url))
145181
yield student
146182
break

0 commit comments

Comments
 (0)