Skip to content

Commit

Permalink
Cleanse GCI data
Browse files Browse the repository at this point in the history
Before the Google Code-in data can be stored in the repository,
the data of students who are just starting needs to be removed,
the status types need to be simplified to reduce side channels,
the unpublished tasks need to be removed, and task mentor list
needs to be removed.

Related to coala#3
  • Loading branch information
jayvdb committed Dec 17, 2017
1 parent 9f1d548 commit e25561f
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 17 deletions.
1 change: 1 addition & 0 deletions .ci/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ set -e -x
mkdir private _site public

python manage.py fetch_gci_task_data private
python manage.py cleanse_gci_task_data private _site

python manage.py collectstatic --noinput
python manage.py distill-local public --force
2 changes: 1 addition & 1 deletion gci/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

GCI_DATA_DIR = os.path.join(
os.path.dirname(__file__), '..',
'private',
'_site',
)


Expand Down
45 changes: 45 additions & 0 deletions gci/management/commands/cleanse_gci_task_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from collections import OrderedDict
import os.path

from ruamel.yaml import YAML

from django.core.management.base import BaseCommand

from gci.students import (
_get_instances,
_get_tasks,
cleanse_instances,
)
from gci.tasks import (
cleanse_tasks,
)


class Command(BaseCommand):
args = ''
help = 'Cleanse GCI data'

def add_arguments(self, parser):
parser.add_argument('input_dir', nargs='?', type=str)
parser.add_argument('output_dir', nargs='?', type=str)

def handle(self, *args, **options):
input_dir = options.get('input_dir')
output_dir = options.get('output_dir')

yaml = YAML()

with open(os.path.join(input_dir, 'tasks.yaml'), 'r') as f:
tasks = yaml.load(f)

with open(os.path.join(input_dir, 'instances.yaml'), 'r') as f:
instances = yaml.load(f)

tasks = cleanse_tasks(tasks)
instances = cleanse_instances(instances, tasks)

with open(os.path.join(output_dir, 'tasks.yaml'), 'w') as f:
yaml.dump(tasks, f)

with open(os.path.join(output_dir, 'instances.yaml'), 'w') as f:
yaml.dump(instances, f)
52 changes: 38 additions & 14 deletions gci/students.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,19 @@

from .config import get_api_key, load_cache
from .gitorg import get_issue
from .task import get_task
from .task import beginner_tasks, get_task

PRIVATE_INSTANCE_STATUSES = (
'ABANDONED',
'OUT_OF_TIME',
'PENDING_PARENTAL_CONSENT',
'UNASSIGNED_BY_MENTOR',
)

PRIVATE_INSTANCE_ATTRIBUTES = (
'modified',
'deadline',
)

_client = None
_instances = {}
Expand Down Expand Up @@ -64,6 +76,25 @@ def get_instances():
return _instances


def cleanse_instances(instances, tasks):
cleansed_instances = dict(
(instance_id, instance)
for instance_id, instance
in instances.items()
if instance['status'] not in PRIVATE_INSTANCE_STATUSES
and instance['task_definition_id'] in tasks
and instance['task_definition_id'] not in beginner_tasks(tasks)
)

for instance in cleansed_instances.values():
if instance['status'] != 'COMPLETED':
instance['status'] = 'CLAIMED'
for key in PRIVATE_INSTANCE_ATTRIBUTES:
del instance[key]

return cleansed_instances


def get_students():
students = {}
for _, instance in get_instances().items():
Expand All @@ -85,17 +116,8 @@ def get_students():
student['instances'].append(instance)


def get_effective_students(students):
for student in list(students):
instances = student['instances']
instances = [instance for instance in instances
if instance['status'] != 'ABANDONED']
if instances:
yield student


def get_issue_related_students(students):
for student in list(get_effective_students(students)):
def get_issue_related_students():
for student in list(get_students()):
instances = student['instances']
for instance in instances:
task = get_task(instance['task_definition_id'])
Expand All @@ -105,8 +127,8 @@ def get_issue_related_students(students):
break


def get_linked_students(students):
for student in list(get_issue_related_students(students)):
def get_linked_students():
for student in list(get_issue_related_students()):
instances = student['instances']
for instance in instances:
task = get_task(instance['task_definition_id'])
Expand All @@ -128,5 +150,7 @@ def get_linked_students(students):
(task_id, url, ', '.join(issue.assignees)))
else:
student['username'] = issue.assignees[0]
print('student %s is %s because of %s' %
(student['id'], issue.assignees[0], url))
yield student
break
31 changes: 31 additions & 0 deletions gci/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,34 @@ def get_tasks():

def get_task(task_id):
return get_tasks()[task_id]


def published_tasks(tasks):
return dict(
(task_id, task)
for task_id, task
in tasks.items()
if task['status'] == 2
)


def beginner_tasks(tasks)
return dict(
(task_id, task)
for task_id, task
in tasks.items()
if task['is_beginner']
)


def strip_mentors(tasks):
for task in tasks.values():
del task['mentors']


def cleanse_tasks(tasks):
cleansed_tasks = published_tasks(tasks)

strip_mentors(tasks)

return cleansed_tasks
5 changes: 3 additions & 2 deletions gci/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from calendar import timegm
import requests

from .students import get_students, get_linked_students
from .students import get_linked_students
from .gitorg import get_logo
from .task import get_tasks

Expand All @@ -26,7 +26,8 @@ def index(request):


def gci_overview():
linked_students = list(get_linked_students(get_students()))
linked_students = list(get_linked_students())

org_id = linked_students[0]['organization_id']
org_name = linked_students[0]['organization_name']
s = []
Expand Down

0 comments on commit e25561f

Please sign in to comment.