Skip to content

Commit 795ea92

Browse files
authored
Merge pull request #173 from kbase/develop
Release v1.3.4
2 parents 7c1194d + acc6048 commit 795ea92

File tree

6 files changed

+37
-41
lines changed

6 files changed

+37
-41
lines changed

RELEASE_NOTES.md

+7
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
### Version 1.3.4
2+
- Alter the behavior of the bulk specification file writers to return an error if the
3+
input `types` parameter is empty.
4+
- Fixed a bug in the csv/tsv bulk specification parser that would case a failure if the
5+
first header of a file had trailing separators. This occurs if a csv/tsv file is opened and
6+
saved by Excel.
7+
18
### Version 1.3.3
29
- Fixed a bug in the csv/tsv bulk specification parser that would include an empty entry for
310
each empty line in the file.

staging_service/app.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434

3535
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
3636
routes = web.RouteTableDef()
37-
VERSION = "1.3.3"
37+
VERSION = "1.3.4"
3838

3939
_DATATYPE_MAPPINGS = None
4040

@@ -59,7 +59,7 @@ async def importer_filetypes(request: web.Request) -> web.json_response:
5959
Returns the file types for the configured datatypes. The returned JSON contains two keys:
6060
* datatype_to_filetype, which maps import datatypes (like gff_genome) to their accepted
6161
filetypes (like [FASTA, GFF])
62-
* filetype_to_extensions, which maps file types (e.g. FASTA) to their extensions (e.g.
62+
* filetype_to_extensions, which maps file types (e.g. FASTA) to their extensions (e.g.
6363
*.fa, *.fasta, *.fa.gz, etc.)
6464
6565
This information is currently static over the life of the server.
@@ -197,7 +197,7 @@ async def write_bulk_specification(request: web.Request) -> web.json_response:
197197
def _createJSONErrorResponse(error_text: str, error_class=web.HTTPBadRequest):
198198
err = json.dumps({"error": error_text})
199199
return error_class(text=err, content_type=_APP_JSON)
200-
200+
201201

202202
@routes.get("/add-acl-concierge")
203203
async def add_acl_concierge(request: web.Request):

staging_service/import_specifications/file_writers.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
entry in the list corresponds to a row in the resulting import specification,
2424
and the order of the list defines the order of the rows.
2525
Leave the `data` list empty to write an empty template.
26-
:returns: A mapping of the data types to the files to which they were written.
26+
:returns: A mapping of the data types to the files to which they were written.
2727
"""
2828
# note that we can't use an f string here to interpolate the variables below, e.g.
2929
# order_and_display, etc.
@@ -82,7 +82,7 @@ def _check_import_specification(types: dict[str, dict[str, list[Any]]]):
8282
Leave the {_DATA} list empty to write an empty template.
8383
"""
8484
if not types:
85-
return
85+
raise ImportSpecWriteException("At least one data type must be specified")
8686
for datatype in types:
8787
# replace this with jsonschema? don't worry about it for now
8888
_check_string(datatype, "A data type")
@@ -235,4 +235,4 @@ class ImportSpecWriteException(Exception):
235235
"""
236236
An exception thrown when writing an import specification fails.
237237
"""
238-
pass
238+
pass

staging_service/import_specifications/individual_parsers.py

+14-29
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
# TODO update to C impl when fixed: https://github.com/Marco-Sulla/python-frozendict/issues/26
1111
from frozendict.core import frozendict
1212
from pathlib import Path
13-
from typing import TextIO, Optional as O, Union, Any
13+
from typing import Optional as O, Union, Any
1414

1515
from staging_service.import_specifications.file_parser import (
1616
PRIMITIVE_TYPE,
@@ -35,7 +35,7 @@
3535
_HEADER_REGEX = re.compile(f"{_DATA_TYPE} (\\w+){_HEADER_SEP} "
3636
+ f"{_COLUMN_STR} (\\d+){_HEADER_SEP} {_VERSION_STR} (\\d+)")
3737

38-
_MAGIC_TEXT_FILES = {"text/plain", "inode/x-empty"}
38+
_MAGIC_TEXT_FILES = {"text/plain", "inode/x-empty", "application/csv", "text/csv"}
3939

4040

4141
class _ParseException(Exception):
@@ -63,26 +63,18 @@ def _parse_header(header: str, spec_source: SpecificationSource, maximum_version
6363
return match[1], int(match[2])
6464

6565

66-
def _required_next(
67-
input_: Union[TextIO, Any], # Any really means a csv reader object
68-
spec_source: SpecificationSource,
69-
error: str
70-
) -> Union[str, list[str]]:
71-
# returns a string for a TextIO input or a list for a Reader input
72-
try:
73-
return next(input_)
74-
except StopIteration:
75-
raise _ParseException(Error(ErrorType.PARSE_FAIL, error, spec_source))
76-
7766
def _csv_next(
78-
input_: Union[TextIO, Any], # Any really means a csv reader object
67+
input_: Any, # Any really means a csv reader object
7968
line_number: int,
80-
expected_line_count: int,
69+
expected_line_count: Union[None, int], # None = skip columns check
8170
spec_source: SpecificationSource,
8271
error: str
8372
) -> list[str]:
84-
line = _required_next(input_, spec_source, error)
85-
if len(line) != expected_line_count:
73+
try:
74+
line = next(input_)
75+
except StopIteration:
76+
raise _ParseException(Error(ErrorType.PARSE_FAIL, error, spec_source))
77+
if expected_line_count and len(line) != expected_line_count:
8678
raise _ParseException(Error(
8779
ErrorType.INCORRECT_COLUMN_COUNT,
8880
f"Incorrect number of items in line {line_number}, "
@@ -91,15 +83,6 @@ def _csv_next(
9183
return line
9284

9385

94-
def _get_datatype(input_: TextIO, spec_source: SpecificationSource, maximum_version: int
95-
) -> tuple[str, int]:
96-
# return is (data type, column count)
97-
return _parse_header(
98-
_required_next(input_, spec_source, "Missing data type / version header").strip(),
99-
spec_source,
100-
maximum_version)
101-
102-
10386
def _error(error: Error) -> ParseResults:
10487
return ParseResults(errors = tuple([error]))
10588

@@ -155,11 +138,13 @@ def _normalize_headers(
155138
def _parse_xsv(path: Path, sep: str) -> ParseResults:
156139
spcsrc = SpecificationSource(path)
157140
try:
158-
if magic.from_file(str(path), mime=True) not in _MAGIC_TEXT_FILES:
159-
return _error(Error(ErrorType.PARSE_FAIL, "Not a text file", spcsrc))
141+
filetype = magic.from_file(str(path), mime=True)
142+
if filetype not in _MAGIC_TEXT_FILES:
143+
return _error(Error(ErrorType.PARSE_FAIL, "Not a text file: " + filetype, spcsrc))
160144
with open(path, newline='') as input_:
161-
datatype, columns = _get_datatype(input_, spcsrc, _VERSION)
162145
rdr = csv.reader(input_, delimiter=sep) # let parser handle quoting
146+
dthd = _csv_next(rdr, 1, None, spcsrc, "Missing data type / version header")
147+
datatype, columns = _parse_header(dthd[0], spcsrc, _VERSION)
163148
hd1 = _csv_next(rdr, 2, columns, spcsrc, "Missing 2nd header line")
164149
param_ids = _normalize_headers(hd1, 2, spcsrc)
165150
_csv_next(rdr, 3, columns, spcsrc, "Missing 3rd header line")

tests/import_specifications/test_file_writers.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,6 @@ def temp_dir() -> Generator[Path, None, None]:
6464
}
6565
}
6666

67-
def test_noop():
68-
assert write_csv(Path("."), {}) == {}
69-
assert write_tsv(Path("."), {}) == {}
7067

7168
def test_write_csv(temp_dir: Path):
7269
res = write_csv(temp_dir, _TEST_DATA)
@@ -194,6 +191,7 @@ def test_file_writers_fail():
194191
E = ImportSpecWriteException
195192
file_writers_fail(None, {}, ValueError("The folder cannot be null"))
196193
file_writers_fail(p, None, E("The types value must be a mapping"))
194+
file_writers_fail(p, {}, E("At least one data type must be specified"))
197195
file_writers_fail(
198196
p, {None: 1}, E("A data type cannot be a non-string or a whitespace only string"))
199197
file_writers_fail(
@@ -281,4 +279,4 @@ def file_writers_fail(path: Path, types: dict, expected: Exception):
281279
assert_exception_correct(got.value, expected)
282280
with raises(Exception) as got:
283281
write_excel(path, types)
284-
assert_exception_correct(got.value, expected)
282+
assert_exception_correct(got.value, expected)

tests/import_specifications/test_individual_parsers.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ def temp_dir() -> Generator[Path, None, None]:
3939

4040

4141
def test_xsv_parse_success(temp_dir: Path):
42+
# Tests a special case where if an xSV file is opened by Excel and then resaved,
43+
# Excel will add separators to the end of the 1st header line. This previously caused
44+
# the parse to fail.
4245
_xsv_parse_success(temp_dir, ',', parse_csv)
4346
_xsv_parse_success(temp_dir, '\t', parse_tsv)
4447

@@ -48,7 +51,7 @@ def _xsv_parse_success(temp_dir: Path, sep: str, parser: Callable[[Path], ParseR
4851
input_ = temp_dir / str(uuid.uuid4())
4952
with open(input_, "w") as test_file:
5053
test_file.writelines([
51-
"Data type: some_type; Columns: 4; Version: 1\n",
54+
f"Data type: some_type; Columns: 4; Version: 1{s}{s}{s}\n",
5255
f"spec1{s} spec2{s} spec3 {s} spec4\n", # test trimming
5356
f"Spec 1{s} Spec 2{s} Spec 3{s} Spec 4\n",
5457
f"val1 {s} val2 {s} 7 {s} 3.2\n", # test trimming
@@ -166,7 +169,10 @@ def test_xsv_parse_fail_binary_file(temp_dir: Path):
166169
res = parse_csv(test_file)
167170

168171
assert res == ParseResults(errors=tuple([
169-
Error(ErrorType.PARSE_FAIL, "Not a text file", source_1=SpecificationSource(test_file))
172+
Error(
173+
ErrorType.PARSE_FAIL,
174+
"Not a text file: application/vnd.ms-excel",
175+
source_1=SpecificationSource(test_file))
170176
]))
171177

172178

0 commit comments

Comments
 (0)