Skip to content

Commit 211412a

Browse files
committed
WIP spark
1 parent 0e16817 commit 211412a

File tree

4 files changed

+67
-1
lines changed

4 files changed

+67
-1
lines changed

examples/idea-examples/unsupported-data-sources/build.gradle.kts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ dependencies {
2727
implementation(libs.exposed.jdbc)
2828
implementation(libs.exposed.json)
2929
implementation(libs.exposed.money)
30+
31+
// (kotlin) spark support
32+
implementation(libs.kotlin.spark)
33+
compileOnly(libs.spark)
3034
}
3135

3236
tasks.withType<KotlinCompile> {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
package org.jetbrains.kotlinx.dataframe.examples.kotlinSpark
2+
3+
import org.apache.spark.sql.Dataset
4+
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
5+
import org.jetbrains.kotlinx.dataframe.api.print
6+
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
7+
import org.jetbrains.kotlinx.spark.api.withSpark
8+
9+
@DataSchema
10+
data class Name(val firstName: String, val lastName: String)
11+
12+
@DataSchema
13+
data class Person(
14+
val name: Name,
15+
val age: Int,
16+
val city: String?,
17+
val weight: Int?,
18+
val isHappy: Boolean,
19+
)
20+
21+
object TypedDataset {
22+
23+
/**
24+
* With the Kotlin Spark API, norm;;al Kotlin data classes are supported,
25+
* meaning we can reuse the same class for Spark and DataFrame!
26+
*
27+
*/
28+
@JvmStatic
29+
fun main(args: Array<String>): Unit =
30+
withSpark {
31+
// Spark Dataset
32+
val rawDataset: Dataset<Person> = listOf(
33+
Person(Name("Alice", "Cooper"), 15, "London", 54, true),
34+
Person(Name("Bob", "Dylan"), 45, "Dubai", 87, true),
35+
Person(Name("Charlie", "Daniels"), 20, "Moscow", null, false),
36+
Person(Name("Charlie", "Chaplin"), 40, "Milan", null, true),
37+
Person(Name("Bob", "Marley"), 30, "Tokyo", 68, true),
38+
Person(Name("Alice", "Wolf"), 20, null, 55, false),
39+
Person(Name("Charlie", "Byrd"), 30, "Moscow", 90, true),
40+
).toDS()
41+
42+
// we can perform large operations in Spark
43+
val dataset = rawDataset.filter { it.age > 17 }
44+
45+
// and convert to DataFrame
46+
val dataframe = dataset.collectAsList().toDataFrame()
47+
48+
dataframe.print(columnTypes = true, borders = true)
49+
}
50+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
package org.jetbrains.kotlinx.dataframe.examples.kotlinSpark
2+
3+
fun main() {
4+
5+
}

gradle/libs.versions.toml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ jts = "1.20.0"
6464
kandy = "0.8.1-dev-66"
6565
exposed = "1.0.0-beta-2"
6666

67+
# check the versions down in the [libraries] section too!
68+
kotlin-spark = "1.2.4"
69+
spark = "3.3.2"
70+
6771
[libraries]
6872
ksp-gradle = { group = "com.google.devtools.ksp", name = "symbol-processing-gradle-plugin", version.ref = "ksp" }
6973
ksp-api = { group = "com.google.devtools.ksp", name = "symbol-processing-api", version.ref = "ksp" }
@@ -157,14 +161,17 @@ kotlin-jupyter-test-kit = { group = "org.jetbrains.kotlinx", name = "kotlin-jupy
157161
kotlinx-benchmark-runtime = { group = "org.jetbrains.kotlinx", name = "kotlinx-benchmark-runtime", version.ref = "benchmark" }
158162
dataframe-symbol-processor = { group = "org.jetbrains.kotlinx.dataframe", name = "symbol-processor-all" }
159163

160-
duckdb-jdbc = { group = "org.duckdb", name = "duckdb_jdbc", version.ref= "duckdb"}
164+
duckdb-jdbc = { group = "org.duckdb", name = "duckdb_jdbc", version.ref = "duckdb" }
161165

162166
exposed-core = { group = "org.jetbrains.exposed", name = "exposed-core", version.ref = "exposed" }
163167
exposed-jdbc = { group = "org.jetbrains.exposed", name = "exposed-jdbc", version.ref = "exposed" }
164168
exposed-kotlin-datetime = { group = "org.jetbrains.exposed", name = "exposed-kotlin-datetime", version.ref = "exposed" }
165169
exposed-json = { group = "org.jetbrains.exposed", name = "exposed-json", version.ref = "exposed" }
166170
exposed-money = { group = "org.jetbrains.exposed", name = "exposed-money", version.ref = "exposed" }
167171

172+
kotlin-spark = { group = "org.jetbrains.kotlinx.spark", name = "kotlin-spark-api_3.3.2_2.13", version.ref = "kotlin-spark" }
173+
spark = { group = "org.apache.spark", name = "spark-sql_2.13", version.ref = "spark" }
174+
168175
[plugins]
169176
jupyter-api = { id = "org.jetbrains.kotlin.jupyter.api", version.ref = "kotlinJupyter" }
170177
ksp = { id = "com.google.devtools.ksp", version.ref = "ksp" }

0 commit comments

Comments
 (0)