Skip to content

Commit 0c9b23d

Browse files
authored
Merge pull request #75 from static-frame/74/dta-dafault-int
Default type-parsed int is always 64 bit (even on Windows)
2 parents b555dde + ac6b278 commit 0c9b23d

File tree

4 files changed

+38
-50
lines changed

4 files changed

+38
-50
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ jobs:
3939
if: matrix.os == 'macOS' || matrix.os == 'Ubuntu'
4040
- run: echo '::add-matcher::.github/problem-matchers/msvc.json'
4141
if: matrix.os == 'Windows'
42-
- uses: pypa/cibuildwheel@v2.9.0
42+
- uses: pypa/cibuildwheel@v2.11.1
4343
with:
4444
output-dir: dist
4545
env:
@@ -69,7 +69,7 @@ jobs:
6969
if: matrix.os == 'macOS' || matrix.os == 'Ubuntu'
7070
- run: echo '::add-matcher::.github/problem-matchers/msvc.json'
7171
if: matrix.os == 'Windows'
72-
- uses: pypa/cibuildwheel@v2.9.0
72+
- uses: pypa/cibuildwheel@v2.11.1
7373
with:
7474
output-dir: dist
7575
env:

src/_arraykit.c

Lines changed: 19 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -397,36 +397,32 @@ AK_TPS_Resolve(AK_TypeParserState previous, AK_TypeParserState new) {
397397
PyArray_Descr*
398398
AK_TPS_ToDtype(AK_TypeParserState state) {
399399
PyArray_Descr *dtype = NULL;
400-
PyArray_Descr *dtype_final;
401400

402401
switch (state) {
403402
case TPS_UNKNOWN:
404-
dtype = PyArray_DescrFromType(NPY_UNICODE);
403+
dtype = PyArray_DescrNewFromType(NPY_UNICODE);
405404
break;
406405
case TPS_EMPTY: // all empty defaults to string
407-
dtype = PyArray_DescrFromType(NPY_UNICODE);
406+
dtype = PyArray_DescrNewFromType(NPY_UNICODE);
408407
break;
409408
case TPS_STRING:
410-
dtype = PyArray_DescrFromType(NPY_UNICODE);
409+
dtype = PyArray_DescrNewFromType(NPY_UNICODE);
411410
break;
412411
case TPS_BOOL:
413-
dtype = PyArray_DescrFromType(NPY_BOOL);
412+
dtype = PyArray_DescrNewFromType(NPY_BOOL);
414413
break;
415414
case TPS_INT:
416-
dtype = PyArray_DescrFromType(NPY_INT64);
415+
dtype = PyArray_DescrNewFromType(NPY_INT64);
417416
break;
418417
case TPS_FLOAT:
419-
dtype = PyArray_DescrFromType(NPY_FLOAT64);
418+
dtype = PyArray_DescrNewFromType(NPY_FLOAT64);
420419
break;
421420
case TPS_COMPLEX:
422-
dtype = PyArray_DescrFromType(NPY_COMPLEX128);
421+
dtype = PyArray_DescrNewFromType(NPY_COMPLEX128);
423422
break;
424423
}
425424
if (dtype == NULL) return NULL; // assume error is set by PyArray_DescrFromType
426-
// get a fresh instance as we might mutate
427-
dtype_final = PyArray_DescrNew(dtype);
428-
Py_DECREF(dtype);
429-
return dtype_final;
425+
return dtype;
430426
}
431427

432428
//------------------------------------------------------------------------------
@@ -1297,7 +1293,7 @@ AK_CPL_Free(AK_CodePointLine* cpl)
12971293
static inline int
12981294
AK_CPL_resize_buffer(AK_CodePointLine* cpl, Py_ssize_t count)
12991295
{
1300-
if ((cpl->buffer_count + count) >= cpl->buffer_capacity) {
1296+
if (AK_UNLIKELY((cpl->buffer_count + count) >= cpl->buffer_capacity)) {
13011297
// realloc
13021298
cpl->buffer_capacity *= 2; // needs to be max of this or element_length
13031299
cpl->buffer = PyMem_Realloc(cpl->buffer,
@@ -1313,7 +1309,7 @@ static inline int
13131309
AK_CPL_resize_offsets(AK_CodePointLine* cpl)
13141310
{
13151311
// increment by at most one, so only need to check if equal
1316-
if (cpl->offsets_count == cpl->offsets_capacity) {
1312+
if (AK_UNLIKELY(cpl->offsets_count == cpl->offsets_capacity)) {
13171313
// realloc
13181314
cpl->offsets_capacity *= 2;
13191315
cpl->offsets = PyMem_Realloc(cpl->offsets,
@@ -1923,29 +1919,21 @@ AK_CPL_to_array_bytes(AK_CodePointLine* cpl, PyArray_Descr* dtype)
19231919
return array;
19241920
}
19251921

1926-
// If we cannot direclty convert bytes to values, create a bytes array and then use PyArray_CastToType to use numpy to interpet it as a new a array. This forces
1922+
// If we cannot direclty convert bytes to values, create a bytes array and then use PyArray_CastToType to use numpy to interpet it as a new a array.
19271923
static inline PyObject*
19281924
AK_CPL_to_array_via_cast(AK_CodePointLine* cpl, PyArray_Descr* dtype)
19291925
{
1930-
// we cannot use this dtype in array construction as it will mutate a global
1931-
PyArray_Descr *dtype_bytes_proto = PyArray_DescrFromType(NPY_STRING);
1932-
if (dtype_bytes_proto == NULL) {
1933-
Py_DECREF(dtype);
1934-
return NULL;
1935-
}
1936-
PyArray_Descr *dtype_bytes = PyArray_DescrNew(dtype_bytes_proto);
1937-
Py_DECREF(dtype_bytes_proto);
1926+
PyArray_Descr *dtype_bytes = PyArray_DescrNewFromType(NPY_STRING);
19381927
if (dtype_bytes == NULL) {
19391928
Py_DECREF(dtype);
19401929
return NULL;
19411930
}
19421931
PyObject* array_bytes = AK_CPL_to_array_bytes(cpl, dtype_bytes);
19431932
if (array_bytes == NULL) {
19441933
Py_DECREF(dtype);
1945-
Py_DECREF(dtype_bytes); // was not stolen if array creation failed
1934+
// dtype_bytes stolen even if array creation failed
19461935
return NULL;
19471936
}
1948-
19491937
PyObject *array = PyArray_CastToType((PyArrayObject*)array_bytes, dtype, 0);
19501938
Py_DECREF(array_bytes);
19511939
if (array == NULL) {
@@ -2096,15 +2084,15 @@ AK_CPG_resize(AK_CodePointGrid* cpg, Py_ssize_t line)
20962084
Py_ssize_t lines_count = cpg->lines_count;
20972085
if (line < lines_count) return 0; // most common scenario
20982086

2099-
if (line >= cpg->lines_capacity) {
2087+
if (AK_UNLIKELY(line >= cpg->lines_capacity)) {
21002088
cpg->lines_capacity *= 2;
21012089
// NOTE: we assume this only copies the pointers, not the data in the CPLs
21022090
cpg->lines = PyMem_Realloc(cpg->lines,
21032091
sizeof(AK_CodePointLine*) * cpg->lines_capacity);
21042092
if (cpg->lines == NULL) return -1;
21052093
}
21062094
// Create the new CPL; first check if we need to set type_parse by calling into the dtypes function. For now we assume sequential growth, so should only check if equal
2107-
if (line >= lines_count) {
2095+
if (AK_UNLIKELY(line >= lines_count)) {
21082096
// determine if we need to parse types
21092097
bool type_parse = false;
21102098
if (cpg->dtypes == NULL) {
@@ -3199,29 +3187,20 @@ dtype_from_element(PyObject *Py_UNUSED(m), PyObject *arg)
31993187
{
32003188
// -------------------------------------------------------------------------
32013189
// 1. Handle fast, exact type checks first.
3202-
3203-
// None
32043190
if (arg == Py_None) {
32053191
return (PyObject*)PyArray_DescrFromType(NPY_OBJECT);
32063192
}
3207-
3208-
// Float
32093193
if (PyFloat_CheckExact(arg)) {
3210-
return (PyObject*)PyArray_DescrFromType(NPY_DOUBLE);
3194+
return (PyObject*)PyArray_DescrFromType(NPY_FLOAT64);
32113195
}
3212-
3213-
// Integers
32143196
if (PyLong_CheckExact(arg)) {
3215-
return (PyObject*)PyArray_DescrFromType(NPY_LONG);
3197+
return (PyObject*)PyArray_DescrFromType(NPY_INT64);
32163198
}
3217-
3218-
// Bool
32193199
if (PyBool_Check(arg)) {
32203200
return (PyObject*)PyArray_DescrFromType(NPY_BOOL);
32213201
}
32223202

32233203
PyObject* dtype = NULL;
3224-
32253204
// String
32263205
if (PyUnicode_CheckExact(arg)) {
32273206
PyArray_Descr* descr = PyArray_DescrFromType(NPY_UNICODE);
@@ -3230,27 +3209,23 @@ dtype_from_element(PyObject *Py_UNUSED(m), PyObject *arg)
32303209
Py_DECREF(descr);
32313210
return dtype;
32323211
}
3233-
32343212
// Bytes
32353213
if (PyBytes_CheckExact(arg)) {
32363214
PyArray_Descr* descr = PyArray_DescrFromType(NPY_STRING);
32373215
if (descr == NULL) return NULL;
3238-
32393216
dtype = (PyObject*)PyArray_DescrFromObject(arg, descr);
32403217
Py_DECREF(descr);
32413218
return dtype;
32423219
}
32433220

32443221
// -------------------------------------------------------------------------
32453222
// 2. Construct dtype (slightly more complicated)
3246-
32473223
// Already known
32483224
dtype = PyObject_GetAttrString(arg, "dtype");
32493225
if (dtype) {
32503226
return dtype;
32513227
}
32523228
PyErr_Clear();
3253-
32543229
// -------------------------------------------------------------------------
32553230
// 3. Handles everything else.
32563231
return (PyObject*)PyArray_DescrFromType(NPY_OBJECT);
@@ -3956,4 +3931,5 @@ PyInit__arraykit(void)
39563931
return NULL;
39573932
}
39583933
return m;
3959-
}
3934+
}
3935+

test/test_delimited_to_arrays.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -738,8 +738,12 @@ def test_delimited_to_arrays_parse_m(self) -> None:
738738
post1 = delimited_to_arrays(msg, axis=1, skipinitialspace=False)
739739
self.assertEqual([a.tolist() for a in post1], [[' 1', ' 2-'], [' 2', ' 2-0'], [3, -3]])
740740

741-
742-
# import ipdb; ipdb.set_trace()
741+
def test_delimited_to_arrays_parse_n(self) -> None:
742+
msg = ['1,2,1', '3,4,5']
743+
post1 = delimited_to_arrays(msg, axis=1)
744+
# NOTE: automatic type parsing should always give an int64
745+
self.assertEqual([str(a.dtype) for a in post1],
746+
['int64', 'int64', 'int64'])
743747

744748
#---------------------------------------------------------------------------
745749
def test_delimited_to_arrays_float_a(self) -> None:

test/test_util.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,8 @@ def test_isna_element_false(self) -> None:
326326
self.assertFalse(isna_element(datetime.date(2020, 12, 31)))
327327
self.assertFalse(isna_element(False))
328328

329+
#---------------------------------------------------------------------------
330+
329331
def test_dtype_from_element_core_dtypes(self) -> None:
330332
dtypes = [
331333
np.longlong,
@@ -367,11 +369,11 @@ def test_dtype_from_element_obj_dtypes(self) -> None:
367369
NT = collections.namedtuple('NT', tuple('abc'))
368370

369371
dtype_obj_pairs = [
370-
(np.int_, 12),
371-
(np.float_, 12.0),
372+
(np.int64, 12),
373+
(np.float64, 12.0),
372374
(np.bool_, True),
373375
(np.dtype('O'), None),
374-
(np.float_, float('NaN')),
376+
(np.float64, float('NaN')),
375377
(np.dtype('O'), object()),
376378
(np.dtype('O'), (1, 2, 3)),
377379
(np.dtype('O'), NT(1, 2, 3)),
@@ -393,6 +395,12 @@ def test_dtype_from_element_str_and_bytes_dtypes(self) -> None:
393395
self.assertEqual(np.dtype(f'|S{size}'), dtype_from_element(bytes(size)))
394396
self.assertEqual(np.dtype(f'<U{size}'), dtype_from_element('x' * size))
395397

398+
def test_dtype_from_element_int(self) -> None:
399+
# make sure all platforms give 64 bit int
400+
self.assertEqual(str(dtype_from_element(3)), 'int64')
401+
402+
#---------------------------------------------------------------------------
403+
396404
def test_get_new_indexers_and_screen_a(self) -> None:
397405
indexersA = np.array([9, 9, 9, 9, 0, 0, 1, 4, 5, 0, 0, 0, 1], dtype=np.int64)
398406
postA = get_new_indexers_and_screen_full(indexersA, np.arange(10, dtype=np.int64))

0 commit comments

Comments
 (0)