Skip to content

Commit 080ea1f

Browse files
committed
fix(misc): fixed binary/str types between py2 and py3;
- Added binary type support and mapping. - Added new config option for strict type guessing. - Minor fixes for python2 types and classes. - Minor fix for failed type guessing.
1 parent 16b8417 commit 080ea1f

File tree

3 files changed

+25
-3
lines changed

3 files changed

+25
-3
lines changed

ckanext/xloader/config_declaration.yaml

+8
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,14 @@ groups:
4848
type: bool
4949
required: false
5050
legacy_key: ckanext.xloader.just_load_with_messytables
51+
- key: ckanext.xloader.strict_type_guessing
52+
default: True
53+
example: False
54+
description: |
55+
Use with ckanext.xloader.use_type_guessing to set strict true or false
56+
for type guessing. If set to False, the types will always fallback to string type.
57+
type: bool
58+
required: false
5159
- key: ckanext.xloader.parse_dates_dayfirst
5260
default: False
5361
example: False

ckanext/xloader/loader.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import datetime
55
import itertools
6+
from six import text_type as str, binary_type
67
import os
78
import os.path
89
import tempfile
@@ -266,7 +267,9 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
266267
skip_rows = list(range(1, header_offset + 2))
267268

268269
TYPES, TYPE_MAPPING = get_types()
269-
types = type_guess(stream.sample[1:], types=TYPES, strict=True)
270+
strict_guessing = p.toolkit.asbool(
271+
config.get('ckanext.xloader.strict_type_guessing', True))
272+
types = type_guess(stream.sample[1:], types=TYPES, strict=strict_guessing)
270273

271274
# override with types user requested
272275
if existing_info:
@@ -333,12 +336,17 @@ def row_iterator():
333336

334337

335338
_TYPE_MAPPING = {
339+
"<type 'str'>": 'text',
336340
"<type 'unicode'>": 'text',
341+
"<type 'bytes'>": 'text',
337342
"<type 'bool'>": 'text',
338343
"<type 'int'>": 'numeric',
339344
"<type 'float'>": 'numeric',
340345
"<class 'decimal.Decimal'>": 'numeric',
346+
"<type 'datetime.datetime'>": 'timestamp',
341347
"<class 'str'>": 'text',
348+
"<class 'unicode'>": 'text',
349+
"<class 'bytes'>": 'text',
342350
"<class 'bool'>": 'text',
343351
"<class 'int'>": 'numeric',
344352
"<class 'float'>": 'numeric',
@@ -347,7 +355,7 @@ def row_iterator():
347355

348356

349357
def get_types():
350-
_TYPES = [int, bool, str, datetime.datetime, float, Decimal]
358+
_TYPES = [int, bool, str, binary_type, datetime.datetime, float, Decimal]
351359
TYPE_MAPPING = config.get('TYPE_MAPPING', _TYPE_MAPPING)
352360
return _TYPES, TYPE_MAPPING
353361

ckanext/xloader/utils.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,17 @@
33
import json
44
import datetime
55

6+
from six import text_type as str, binary_type
7+
68
from ckan import model
79
from ckan.lib import search
810
from collections import defaultdict
911
from decimal import Decimal
1012

1113
import ckan.plugins as p
1214

15+
from .job_exceptions import JobError
16+
1317

1418
def resource_data(id, resource_id):
1519

@@ -149,7 +153,7 @@ def headers_guess(rows, tolerance=1):
149153
return 0, []
150154

151155

152-
TYPES = [int, bool, str, datetime.datetime, float, Decimal]
156+
TYPES = [int, bool, str, binary_type, datetime.datetime, float, Decimal]
153157

154158

155159
def type_guess(rows, types=TYPES, strict=False):
@@ -210,5 +214,7 @@ def type_guess(rows, types=TYPES, strict=False):
210214
# element in case of a tie
211215
# See: http://stackoverflow.com/a/6783101/214950
212216
guesses_tuples = [(t, guess[t]) for t in types if t in guess]
217+
if not guesses_tuples:
218+
raise JobError('Failed to guess types')
213219
_columns.append(max(guesses_tuples, key=lambda t_n: t_n[1])[0])
214220
return _columns

0 commit comments

Comments
 (0)