Skip to content

Commit 70c8ee8

Browse files
committed
updating multik example, 2d
1 parent 79eac6c commit 70c8ee8

File tree

3 files changed

+105
-43
lines changed

3 files changed

+105
-43
lines changed

examples/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ They show how to convert to and from Kotlin Dataframe and their respective table
1616
for an example of using Kotlin Dataframe with [Spark](https://spark.apache.org/).
1717
* **Spark (with Kotlin Spark API)**: See the [kotlinSpark folder](./idea-examples/unsupported-data-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/kotlinSpark)
1818
for an example of using Kotlin DataFrame with the [Kotlin Spark API](https://github.com/JetBrains/kotlin-spark-api).
19+
* **Multik**: See the [multik folder](./idea-examples/unsupported-data-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/multik)
20+
for an example of using Kotlin Dataframe with [Multik](https://github.com/Kotlin/multik).
1921

2022

2123
### Notebook examples

examples/idea-examples/unsupported-data-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/multik/compatibilityLayer.kt

Lines changed: 84 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,16 @@ import kotlin.reflect.typeOf
3535
/** Converts a one-dimensional array ([D1Array]) to a [DataColumn] with optional [name]. */
3636
inline fun <reified N> D1Array<N>.convertToColumn(name: String = ""): DataColumn<N> = column(toList()) named name
3737

38+
/**
39+
* Converts a one-dimensional array ([D1Array]) of type [N] into a DataFrame.
40+
* The resulting DataFrame contains a single column named "value", where each element of the array becomes a row in the DataFrame.
41+
*
42+
* @return a DataFrame where each element of the source array is represented as a row in a column named "value" under the schema [ValueProperty].
43+
*/
44+
inline fun <reified N> D1Array<N>.convertToDataFrame(): DataFrame<ValueProperty<N>> =
45+
dataFrameOf(ValueProperty<*>::value.name to column(toList()))
46+
.cast()
47+
3848
/** Converts a [DataColumn] to a one-dimensional array ([D1Array]). */
3949
@JvmName("convertNumberColumnToMultik")
4050
inline fun <reified N> DataColumn<N>.convertToMultik(): D1Array<N> where N : Number, N : Comparable<N> =
@@ -44,28 +54,20 @@ inline fun <reified N> DataColumn<N>.convertToMultik(): D1Array<N> where N : Num
4454
@JvmName("convertComplexColumnToMultik")
4555
inline fun <reified N : Complex> DataColumn<N>.convertToMultik(): D1Array<N> = mk.ndarray(toList())
4656

57+
/** Converts a [DataColumn] selected by [column] to a one-dimensional array ([D1Array]). */
4758
@JvmName("convertNumberColumnFromDfToMultik")
4859
@OverloadResolutionByLambdaReturnType
4960
inline fun <T, reified N> DataFrame<T>.convertToMultik(
5061
crossinline column: ColumnSelector<T, N>,
5162
): D1Array<N>
5263
where N : Number, N : Comparable<N> = getColumn { column(it) }.convertToMultik()
5364

65+
/** Converts a [DataColumn] selected by [column] to a one-dimensional array ([D1Array]). */
5466
@JvmName("convertComplexColumnFromDfToMultik")
5567
@OverloadResolutionByLambdaReturnType
5668
inline fun <T, reified N : Complex> DataFrame<T>.convertToMultik(crossinline column: ColumnSelector<T, N>): D1Array<N> =
5769
getColumn { column(it) }.convertToMultik()
5870

59-
/**
60-
* Converts a one-dimensional array ([D1Array]) of type [N] into a DataFrame.
61-
* The resulting DataFrame contains a single column named "value", where each element of the array becomes a row in the DataFrame.
62-
*
63-
* @return a DataFrame where each element of the source array is represented as a row in a column named "value" under the schema [ValueProperty].
64-
*/
65-
inline fun <reified N> D1Array<N>.convertToDataFrame(): DataFrame<ValueProperty<N>> =
66-
dataFrameOf(ValueProperty<*>::value.name to column(toList()))
67-
.cast()
68-
6971
// endregion
7072

7173
// region 2D
@@ -85,54 +87,95 @@ inline fun <reified N> D2Array<N>.convertToDataFrame(columnNameGenerator: (Int)
8587
.toColumn(columnNameGenerator(col))
8688
}.toDataFrame()
8789

88-
@JvmName("convertToMultikOfComplex")
89-
inline fun <reified N : Complex> AnyFrame.convertToMultikOf(_klass: KClass<Complex> = Complex::class): D2Array<N> =
90-
convertToMultik { colsOf<N>() }
91-
92-
@JvmName("convertToMultikOfNumber")
93-
inline fun <reified N> AnyFrame.convertToMultikOf(
94-
_klass: KClass<Number> = Number::class,
95-
): D2Array<N> where N : Number, N : Comparable<N> = convertToMultik { colsOf<N>() }
96-
97-
@JvmName("convertToMultikGuess")
98-
fun AnyFrame.convertToMultik(): D2Array<*> {
99-
val columnTypes = columnTypes().distinct()
100-
return when {
101-
columnTypes.size != 1 -> error("found column types: $columnTypes")
102-
columnTypes.single() == typeOf<Complex>() -> convertToMultik { colsOf<Complex>() }
103-
columnTypes.single().isSubtypeOf(typeOf<Byte>()) -> convertToMultik { colsOf<Byte>() }
104-
columnTypes.single().isSubtypeOf(typeOf<Short>()) -> convertToMultik { colsOf<Short>() }
105-
columnTypes.single().isSubtypeOf(typeOf<Int>()) -> convertToMultik { colsOf<Int>() }
106-
columnTypes.single().isSubtypeOf(typeOf<Long>()) -> convertToMultik { colsOf<Long>() }
107-
columnTypes.single().isSubtypeOf(typeOf<Float>()) -> convertToMultik { colsOf<Float>() }
108-
columnTypes.single().isSubtypeOf(typeOf<Double>()) -> convertToMultik { colsOf<Double>() }
109-
else -> error("found column types: $columnTypes")
110-
}
111-
}
112-
90+
/**
91+
* Converts a [DataFrame] to a two-dimensional array ([D2Array]).
92+
* You'll need to specify which columns to convert using the [columns] selector.
93+
*
94+
* All column need to be of the same type. If no column are supplied, the function
95+
* will only succeed if all columns are of the same type.
96+
*
97+
* @see convertToMultikOf
98+
*/
11399
@JvmName("convertNumberColumnsFromDfToMultik")
114100
@OverloadResolutionByLambdaReturnType
115101
inline fun <T, reified N> DataFrame<T>.convertToMultik(
116102
crossinline columns: ColumnsSelector<T, N>,
117103
): D2Array<N>
118104
where N : Number, N : Comparable<N> = getColumns { columns(it) }.convertToMultik()
119105

106+
/**
107+
* Converts a [DataFrame] to a two-dimensional array ([D2Array]).
108+
* You'll need to specify which columns to convert using the [columns] selector.
109+
*
110+
* All column need to be of the same type. If no column are supplied, the function
111+
* will only succeed if all columns are of the same type.
112+
*
113+
* @see convertToMultikOf
114+
*/
120115
@JvmName("convertComplexColumnsFromDfToMultik")
121116
@OverloadResolutionByLambdaReturnType
122117
inline fun <T, reified N : Complex> DataFrame<T>.convertToMultik(
123118
crossinline columns: ColumnsSelector<T, N>,
124119
): D2Array<N> = getColumns { columns(it) }.convertToMultik()
125120

121+
/**
122+
* Converts a [DataFrame] to a two-dimensional array ([D2Array]).
123+
* You'll need to specify which columns to convert using the `columns` selector.
124+
*
125+
* All column need to be of the same type. If no column are supplied, the function
126+
* will only succeed if all columns are of the same type.
127+
*
128+
* @see convertToMultikOf
129+
*/
130+
@JvmName("convertToMultikGuess")
131+
fun AnyFrame.convertToMultik(): D2Array<*> {
132+
val columnTypes = columnTypes().distinct()
133+
val type = columnTypes.singleOrNull() ?: error("found column types: $columnTypes")
134+
return when {
135+
type == typeOf<Complex>() -> convertToMultik { colsOf<Complex>() }
136+
type.isSubtypeOf(typeOf<Byte>()) -> convertToMultik { colsOf<Byte>() }
137+
type.isSubtypeOf(typeOf<Short>()) -> convertToMultik { colsOf<Short>() }
138+
type.isSubtypeOf(typeOf<Int>()) -> convertToMultik { colsOf<Int>() }
139+
type.isSubtypeOf(typeOf<Long>()) -> convertToMultik { colsOf<Long>() }
140+
type.isSubtypeOf(typeOf<Float>()) -> convertToMultik { colsOf<Float>() }
141+
type.isSubtypeOf(typeOf<Double>()) -> convertToMultik { colsOf<Double>() }
142+
else -> error("found column types: $columnTypes")
143+
}
144+
}
145+
146+
/**
147+
* Converts a [DataFrame] to a two-dimensional array ([D2Array]) by taking all
148+
* columns of type [N].
149+
*
150+
* @see convertToMultik
151+
*/
152+
@JvmName("convertToMultikOfComplex")
153+
@Suppress("LocalVariableName")
154+
inline fun <reified N : Complex> AnyFrame.convertToMultikOf(
155+
// unused param to avoid overload resolution ambiguity
156+
_klass: KClass<Complex> = Complex::class,
157+
): D2Array<N> =
158+
convertToMultik { colsOf<N>() }
159+
160+
/**
161+
* Converts a [DataFrame] to a two-dimensional array ([D2Array]) by taking all
162+
* columns of type [N].
163+
*
164+
* @see convertToMultik
165+
*/
166+
@JvmName("convertToMultikOfNumber")
167+
@Suppress("LocalVariableName")
168+
inline fun <reified N> AnyFrame.convertToMultikOf(
169+
// unused param to avoid overload resolution ambiguity
170+
_klass: KClass<Number> = Number::class,
171+
): D2Array<N> where N : Number, N : Comparable<N> = convertToMultik { colsOf<N>() }
172+
126173
@JvmName("convertNumberColumnsToMultik")
127174
inline fun <reified N> List<DataColumn<N>>.convertToMultik(): D2Array<N> where N : Number, N : Comparable<N> =
128-
mk.ndarray(
129-
toDataFrame().map { it.values() as List<N> },
130-
)
175+
mk.ndarray(toDataFrame().map { it.values() as List<N> })
131176

132177
@JvmName("convertComplexColumnsToMultik")
133178
inline fun <reified N : Complex> List<DataColumn<N>>.convertToMultik(): D2Array<N> =
134-
mk.ndarray(
135-
toDataFrame().map { it.values() as List<N> },
136-
)
179+
mk.ndarray(toDataFrame().map { it.values() as List<N> })
137180

138181
// endregion

examples/idea-examples/unsupported-data-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/multik/main.kt

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
package org.jetbrains.kotlinx.dataframe.examples.multik
22

3+
import org.jetbrains.kotlinx.dataframe.api.cast
4+
import org.jetbrains.kotlinx.dataframe.api.colsOf
35
import org.jetbrains.kotlinx.dataframe.api.describe
46
import org.jetbrains.kotlinx.dataframe.api.mean
7+
import org.jetbrains.kotlinx.dataframe.api.meanFor
58
import org.jetbrains.kotlinx.dataframe.api.print
69
import org.jetbrains.kotlinx.dataframe.api.value
710
import org.jetbrains.kotlinx.multik.api.mk
@@ -42,12 +45,26 @@ fun oneDimension() {
4245

4346
fun twoDimensions() {
4447
// we can also convert a 2D ndarray to a DataFrame
48+
// This conversion will create columns like "col0", "col1", etc.
49+
// but will allow for similar access like in multik
50+
// aka: `multikArray[x][y] == dataframe[x][y]`
4551
val mk1 = mk.rand<Int>(5, 10)
4652
println(mk1)
47-
4853
val df = mk1.convertToDataFrame()
4954
df.print()
5055

51-
val mk2 = df.convertToMultikOf<Int>()
56+
// this allows us to perform any DF operation:
57+
val means = df.meanFor { ("col0".."col9").cast<Int>() }
58+
means.print()
59+
60+
// we can convert back to Multik in multiple ways.
61+
// Multik can only store one type of data, so we need to specify the type or select
62+
// only the columns we want:
63+
val mk2 = df.convertToMultik { colsOf<Int>() }
64+
// or
65+
df.convertToMultikOf<Int>()
66+
// or if all columns are of the same type:
67+
df.convertToMultik()
68+
5269
println(mk2)
5370
}

0 commit comments

Comments
 (0)