Skip to content

Commit 8b1914e

Browse files
orronaiOdeYec
andauthored
feat: Allow html python notebook (#223)
* added support for html header in notebook cells - Fixed the type getter function - Fixed the unittest for notebooks extractor - Changed the constant in utils.files to be a set of the LANGUAGES_EXTENSIONS_TO_NAMES constant Co-authored-by: Odelia Yechiel <[email protected]>
1 parent b06dd52 commit 8b1914e

File tree

6 files changed

+76
-9
lines changed

6 files changed

+76
-9
lines changed

lms/extractors/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,18 +54,18 @@ def _split_header(cls, code: CodeFile) -> Tuple[str, str]:
5454
first_line = clean_text[:first_line_end].strip().replace('_', ' ')
5555
code_lines = clean_text[first_line_end:].strip()
5656

57-
log.debug(f'Upload title: {first_line}')
5857
return first_line, code_lines
5958

6059
@classmethod
61-
def _clean(cls, code: Union[Sequence, str]) -> Tuple[int, str]:
60+
def _clean(cls, code: CodeFile) -> Tuple[int, str]:
6261
first_line, code_text = cls._split_header(code)
62+
log.debug(f'Upload title: {first_line}.')
6363
upload_title = cls.UPLOAD_TITLE.fullmatch(first_line)
6464
if upload_title:
6565
exercise_id = int(upload_title.group(1))
6666
return exercise_id, code_text
6767

68-
log.debug(f'Unmatched title: {first_line}')
68+
log.debug(f'Unmatched title: {first_line}.')
6969
return 0, ''
7070

7171
def get_exercise(self, to_extract: Any) -> Tuple[int, List[File]]:

lms/extractors/notebook.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
import itertools
22
import json
3+
import re
34
from typing import Any, Dict, Iterator, List, Tuple
45

56
from lms.extractors.base import Extractor, File
7+
from lms.utils.files import ALLOWED_EXTENSIONS
8+
from lms.utils.log import log
69

710

811
NotebookJson = Dict[str, Any]
@@ -13,6 +16,8 @@ class Notebook(Extractor):
1316
POSSIBLE_JSON_EXCEPTIONS = (
1417
json.JSONDecodeError, KeyError, StopIteration, UnicodeDecodeError,
1518
)
19+
TYPE_LINE_PREFIX = re.compile(r'type:\s+(\w+)', re.IGNORECASE)
20+
DEFAULT_FILE_TYPE = 'py'
1621

1722
def __init__(self, **kwargs):
1823
super().__init__(**kwargs)
@@ -41,10 +46,25 @@ def _get_code_cells(self) -> Iterator[Cell]:
4146
cells = notebook['cells']
4247
yield from filter(self._is_code_cell, cells)
4348

49+
def _get_file_type(self, code: str) -> Tuple[str, str]:
50+
type_line, code_lines = self._split_header(code)
51+
file_type_match = self.TYPE_LINE_PREFIX.fullmatch(type_line)
52+
53+
if file_type_match:
54+
file_type = file_type_match.group(1)
55+
if file_type not in ALLOWED_EXTENSIONS:
56+
file_type = self.DEFAULT_FILE_TYPE
57+
log.debug(f'File type: {file_type}.')
58+
return code_lines, file_type
59+
60+
log.debug('No file type defined.')
61+
return code, self.DEFAULT_FILE_TYPE
62+
4463
def get_exercise(self, to_extract: Cell) -> Tuple[int, List[File]]:
4564
code: List[str] = to_extract.get('source', [])
4665
exercise_id, clean_code = self._clean(code)
47-
return (exercise_id, [File('/main.py', clean_code)])
66+
clean_code, ext = self._get_file_type(clean_code)
67+
return (exercise_id, [File(f'/main.{ext}', clean_code)])
4868

4969
def get_exercises(self) -> Iterator[Tuple[int, List[File]]]:
5070
"""Yield exercise ID and code from notebook."""

lms/extractors/textfile.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from lms.extractors.base import Extractor, File
44
from lms.models.errors import BadUploadFile
5+
from lms.utils.files import ALLOWED_EXTENSIONS
56

67

78
TEXTCHARS = set(bytes(
@@ -11,14 +12,12 @@
1112

1213

1314
class Textfile(Extractor):
14-
ALLOWED_EXTENSIONS = {'css', 'html', 'js', 'py', 'sql'}
15-
1615
def __init__(self, **kwargs):
1716
super().__init__(**kwargs)
1817
self.filename_no_ext, _, self.ext = self.filename.rpartition('.')
1918

2019
def can_extract(self) -> bool:
21-
if self.ext not in self.ALLOWED_EXTENSIONS:
20+
if self.ext not in ALLOWED_EXTENSIONS:
2221
return False
2322
if isinstance(self.file_content, str):
2423
return True
@@ -30,7 +29,7 @@ def get_exercise(self, to_extract: str) -> Tuple[int, List[File]]:
3029
exercise_id, _ = self._clean(self.filename_no_ext)
3130
content = to_extract
3231
if not exercise_id:
33-
raise BadUploadFile("Can't resolve exercise id", self.filename)
32+
raise BadUploadFile("Can't resolve exercise id.", self.filename)
3433

3534
return (exercise_id, [File(f'/main.{self.ext}', content)])
3635

lms/tests/samples/upload-1-2.ipynb

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,47 @@
5757
"מקווה שלא יכשל על זה"
5858
]
5959
},
60+
{
61+
"cell_type": "code",
62+
"execution_count": null,
63+
"metadata": {},
64+
"outputs": [],
65+
"source": [
66+
"# upload 567\n",
67+
"# type: 123\n",
68+
"\n",
69+
"וזה יבוא\n",
70+
"אתה תראה"
71+
]
72+
},
73+
{
74+
"cell_type": "code",
75+
"execution_count": null,
76+
"metadata": {},
77+
"outputs": [],
78+
"source": [
79+
"# upload 122\n",
80+
"# type: html\n",
81+
"\n",
82+
"<html>\n",
83+
"<body>\n",
84+
"<p>I wish I could fly</p>\n",
85+
"</body>\n",
86+
"</html>"
87+
]
88+
},
89+
{
90+
"cell_type": "code",
91+
"execution_count": null,
92+
"metadata": {},
93+
"outputs": [],
94+
"source": [
95+
"# upload 23\n",
96+
"# type: exe\n",
97+
"\n",
98+
"Fire in the hole"
99+
]
100+
},
60101
{
61102
"cell_type": "markdown",
62103
"metadata": {},

lms/tests/test_extractor.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,12 @@ def get_zip_filenames(self):
7373

7474
def test_notebook(self):
7575
results = list(extractor.Extractor(self.ipynb_storage))
76-
assert len(results) == 2
76+
assert len(results) == 5
7777
assert results[0][0] == 3141
7878
assert results[1][0] == 2
79+
assert results[2][1][0].path.endswith('.py')
80+
assert results[3][1][0].path.endswith('.html')
81+
assert results[4][1][0].path.endswith('.py')
7982
solution = extractor.Extractor(self.pyfiles_storage[1]).file_content
8083
solution = solution.replace('# Upload 3141', '')
8184
assert results[0][1][0].code == solution.strip()

lms/utils/files.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
LANGUAGE_EXTENSIONS_TO_NAMES = {
22
'bat': 'batch',
3+
'css': 'css',
34
'h': 'c',
45
'htm': 'html',
6+
'html': 'html',
57
'js': 'javascript',
68
'md': 'markup',
79
'ps1': 'powershell',
@@ -14,6 +16,8 @@
1416
'yml': 'yaml',
1517
}
1618

19+
ALLOWED_EXTENSIONS = set(LANGUAGE_EXTENSIONS_TO_NAMES)
20+
1721

1822
def get_language_name_by_extension(ext: str) -> str:
1923
return LANGUAGE_EXTENSIONS_TO_NAMES.get(ext, ext)

0 commit comments

Comments
 (0)