@@ -25,9 +25,13 @@ import org.apache.arrow.vector.TimeMicroVector
25
25
import org.apache.arrow.vector.TimeMilliVector
26
26
import org.apache.arrow.vector.TimeNanoVector
27
27
import org.apache.arrow.vector.TimeSecVector
28
+ import org.apache.arrow.vector.TimeStampMicroTZVector
28
29
import org.apache.arrow.vector.TimeStampMicroVector
30
+ import org.apache.arrow.vector.TimeStampMilliTZVector
29
31
import org.apache.arrow.vector.TimeStampMilliVector
32
+ import org.apache.arrow.vector.TimeStampNanoTZVector
30
33
import org.apache.arrow.vector.TimeStampNanoVector
34
+ import org.apache.arrow.vector.TimeStampSecTZVector
31
35
import org.apache.arrow.vector.TimeStampSecVector
32
36
import org.apache.arrow.vector.TinyIntVector
33
37
import org.apache.arrow.vector.UInt1Vector
@@ -39,12 +43,16 @@ import org.apache.arrow.vector.VarCharVector
39
43
import org.apache.arrow.vector.VectorSchemaRoot
40
44
import org.apache.arrow.vector.ViewVarBinaryVector
41
45
import org.apache.arrow.vector.ViewVarCharVector
46
+ import org.apache.arrow.vector.complex.ListVector
42
47
import org.apache.arrow.vector.complex.StructVector
43
48
import org.apache.arrow.vector.ipc.ArrowFileReader
44
49
import org.apache.arrow.vector.ipc.ArrowReader
45
50
import org.apache.arrow.vector.ipc.ArrowStreamReader
46
51
import org.apache.arrow.vector.types.pojo.Field
47
52
import org.apache.arrow.vector.util.DateUtility
53
+ import org.apache.arrow.vector.util.DateUtility.getLocalDateTimeFromEpochMicro
54
+ import org.apache.arrow.vector.util.DateUtility.getLocalDateTimeFromEpochMilli
55
+ import org.apache.arrow.vector.util.DateUtility.getLocalDateTimeFromEpochNano
48
56
import org.jetbrains.kotlinx.dataframe.AnyBaseCol
49
57
import org.jetbrains.kotlinx.dataframe.AnyFrame
50
58
import org.jetbrains.kotlinx.dataframe.DataColumn
@@ -63,6 +71,7 @@ import java.math.BigDecimal
63
71
import java.math.BigInteger
64
72
import java.nio.channels.ReadableByteChannel
65
73
import java.nio.channels.SeekableByteChannel
74
+ import java.util.concurrent.TimeUnit
66
75
import kotlin.reflect.KType
67
76
import kotlin.reflect.full.withNullability
68
77
import kotlin.reflect.typeOf
@@ -197,11 +206,58 @@ private fun TimeStampSecVector.values(range: IntRange): List<LocalDateTime?> =
197
206
}
198
207
}
199
208
209
+ private fun TimeStampNanoTZVector.values (range : IntRange ): List <LocalDateTime ?> =
210
+ range.mapIndexed { i, it ->
211
+ if (isNull(i)) {
212
+ null
213
+ } else {
214
+ getLocalDateTimeFromEpochNano(getObject(it), timeZone).toKotlinLocalDateTime()
215
+ }
216
+ }
217
+
218
+ private fun TimeStampMicroTZVector.values (range : IntRange ): List <LocalDateTime ?> =
219
+ range.mapIndexed { i, it ->
220
+ if (isNull(i)) {
221
+ null
222
+ } else {
223
+ getLocalDateTimeFromEpochMicro(getObject(it), timeZone).toKotlinLocalDateTime()
224
+ }
225
+ }
226
+
227
+ private fun TimeStampMilliTZVector.values (range : IntRange ): List <LocalDateTime ?> =
228
+ range.mapIndexed { i, it ->
229
+ if (isNull(i)) {
230
+ null
231
+ } else {
232
+ getLocalDateTimeFromEpochMilli(getObject(it), timeZone).toKotlinLocalDateTime()
233
+ }
234
+ }
235
+
236
+ private fun TimeStampSecTZVector.values (range : IntRange ): List <LocalDateTime ?> =
237
+ range.mapIndexed { i, it ->
238
+ if (isNull(i)) {
239
+ null
240
+ } else {
241
+ val seconds = getObject(it)
242
+ val millis = TimeUnit .SECONDS .toMillis(seconds)
243
+ getLocalDateTimeFromEpochMilli(millis, timeZone).toKotlinLocalDateTime()
244
+ }
245
+ }
246
+
200
247
private fun StructVector.values (range : IntRange ): List <Map <String , Any ?>? > =
201
248
range.map {
202
249
getObject(it)
203
250
}
204
251
252
+ private fun ListVector.values (range : IntRange ): List <List <Any ?>? > =
253
+ range.map {
254
+ if (isNull(it)) {
255
+ null
256
+ } else {
257
+ getObject(it)
258
+ }
259
+ }
260
+
205
261
private fun NullVector.values (range : IntRange ): List <Nothing ?> =
206
262
range.map {
207
263
getObject(it) as Nothing?
@@ -287,7 +343,14 @@ private fun List<Nothing?>.withTypeNullable(
287
343
288
344
private fun readField (root : VectorSchemaRoot , field : Field , nullability : NullabilityOptions ): AnyBaseCol {
289
345
try {
290
- val range = 0 until root.rowCount
346
+ val range = 0 .. < root.rowCount
347
+
348
+ // TODO
349
+ // most types can be read directly from Arrow
350
+ // some nested types need a recursive type map which we don't support yet
351
+ // so we just rely on DataFrame runtime inference instead
352
+ var infer = Infer .None
353
+
291
354
val (list, type) = when (val vector = root.getVector(field)) {
292
355
is VarCharVector -> vector.values(range).withTypeNullable(field.isNullable, nullability)
293
356
@@ -349,16 +412,29 @@ private fun readField(root: VectorSchemaRoot, field: Field, nullability: Nullabi
349
412
350
413
is TimeStampSecVector -> vector.values(range).withTypeNullable(field.isNullable, nullability)
351
414
352
- is StructVector -> vector.values(range).withTypeNullable(field.isNullable, nullability)
415
+ is TimeStampNanoTZVector -> vector.values(range).withTypeNullable(field.isNullable, nullability)
416
+
417
+ is TimeStampMicroTZVector -> vector.values(range).withTypeNullable(field.isNullable, nullability)
418
+
419
+ is TimeStampMilliTZVector -> vector.values(range).withTypeNullable(field.isNullable, nullability)
420
+
421
+ is TimeStampSecTZVector -> vector.values(range).withTypeNullable(field.isNullable, nullability)
353
422
354
423
is NullVector -> vector.values(range).withTypeNullable(field.isNullable, nullability)
355
424
356
- else -> {
357
- throw NotImplementedError (" reading from ${vector.javaClass.canonicalName} is not implemented" )
358
- }
425
+ is StructVector -> vector.values(range)
426
+ .withTypeNullable(field.isNullable, nullability)
427
+ .also { infer = Infer .Type }
428
+
429
+ is ListVector -> vector.values(range)
430
+ .withTypeNullable(field.isNullable, nullability)
431
+ .also { infer = Infer .Type }
432
+
433
+ else -> throw NotImplementedError (" reading from ${vector.javaClass.canonicalName} is not implemented" )
359
434
}
360
- return DataColumn .createValueColumn(field.name, list, type, Infer .None )
361
- } catch (unexpectedNull: NullabilityException ) {
435
+
436
+ return DataColumn .createValueColumn(name = field.name, values = list, type = type, infer = infer)
437
+ } catch (_: NullabilityException ) {
362
438
throw IllegalArgumentException (" Column `${field.name} ` should be not nullable but has nulls" )
363
439
}
364
440
}
0 commit comments