Skip to content

Commit

Permalink
Add Django command to fetch GCI data
Browse files Browse the repository at this point in the history
Separates the fetch operation from the load operation,
allowing the build on forks to use a cached dataset.

Related to coala#3
  • Loading branch information
jayvdb committed Dec 15, 2017
1 parent 3080184 commit b0d2d0e
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 17 deletions.
4 changes: 3 additions & 1 deletion .ci/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ set -e -x

bash orgname.sh

mkdir _site public
mkdir private _site public

python manage.py fetch_gci_task_data private

python activity/scraper.py || true

Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ ENV/
# mypy
.mypy_cache/

/private/
_site/
/public/
org_name.txt
11 changes: 11 additions & 0 deletions gci/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import yaml
import os

API_KEY_FILE = '.%s_API_KEY'

GCI_DATA_DIR = os.path.join(
os.path.dirname(__file__), '..',
'private'
)


def get_api_key(name):
env_val = os.environ.get('%s_TOKEN' % name)
Expand All @@ -18,3 +24,8 @@ def get_api_key(name):
except IOError:
print('Please put your %s API key at %s.' % (name, filename))
exit(1)


def load_cache(filename):
with open(os.path.join(GCI_DATA_DIR, filename), 'r') as f:
return yaml.load(f)
40 changes: 40 additions & 0 deletions gci/management/commands/fetch_gci_task_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from collections import OrderedDict
import os.path

from ruamel.yaml import YAML

from django.core.management.base import BaseCommand

from gci.students import (
_get_instances,
_get_tasks,
)


class Command(BaseCommand):
help = 'Fetch GCI data'

def add_arguments(self, parser):
parser.add_argument('output_dir', nargs='?', type=str)

def handle(self, *args, **options):
output_dir = options.get('output_dir')

tasks = {}
for task in _get_tasks():
tasks[int(task['id'])] = task

instances = {}
for instance in _get_instances():
instances[int(instance['id'])] = instance

tasks = OrderedDict(sorted(tasks.items(), key=lambda t: t[0]))
instances = OrderedDict(sorted(instances.items(), key=lambda t: t[0]))

yaml = YAML()

with open(os.path.join(output_dir, 'tasks.yaml'), 'w') as f:
yaml.dump(tasks, f)

with open(os.path.join(output_dir, 'instances.yaml'), 'w') as f:
yaml.dump(instances, f)
42 changes: 26 additions & 16 deletions gci/students.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import os
import re

from .client import GCIAPIClient

from .config import get_api_key
from .config import get_api_key, load_cache
from .gitorg import get_issue


_client = None
_org = {}
_tasks = {}
_instances = {}


def get_client():
Expand Down Expand Up @@ -39,20 +42,7 @@ def _get_tasks():
page = int(result.group(1))


def get_tasks():
global _tasks
if not _tasks:
for task in _get_tasks():
_tasks[task['id']] = task
return _tasks


def get_task(task_id):
tasks = get_tasks()
return tasks[task_id]


def get_instances():
def _get_instances():
client = get_client()
page = 1

Expand All @@ -68,9 +58,29 @@ def get_instances():
page = int(result.group(1))


def get_tasks():
global _tasks
if not _tasks:
_tasks = load_cache('tasks.yaml')

return _tasks


def get_task(task_id):
return get_tasks()[task_id]


def get_instances():
global _instances
if not _instances:
_instances = load_cache('instances.yaml')

return _instances


def get_students():
students = {}
for instance in get_instances():
for _, instance in get_instances().items():
student_id = instance['student_id']
if student_id not in students:
student = {
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ IGitt
requests
python-dateutil
pillow
ruamel.yaml

0 comments on commit b0d2d0e

Please sign in to comment.