Skip to content

Commit 157276e

Browse files
feat: port of substrait-spark module from Gluten
This module was part of the gluten project and subsequently removed. It is useful for converting spark query plans to and from substrait. Signed-off-by: andrew-coleman <[email protected]>
1 parent 44c078d commit 157276e

36 files changed

+3597
-1
lines changed

gradle.properties

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ jackson.version=2.16.1
2222
junit.version=5.8.1
2323
protobuf.version=3.25.3
2424
slf4j.version=2.0.13
25+
sparkbundle.version=3.4
26+
spark.version=3.4.2
2527

2628
#version that is going to be updated automatically by releases
2729
version = 0.33.0

settings.gradle.kts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
rootProject.name = "substrait"
22

3-
include("bom", "core", "isthmus", "isthmus-cli")
3+
include("bom", "core", "isthmus", "isthmus-cli", "spark")
44

55
pluginManagement {
66
plugins {

spark/build.gradle.kts

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
plugins {
2+
`maven-publish`
3+
id("java")
4+
id("scala")
5+
id("idea")
6+
id("com.diffplug.spotless") version "6.11.0"
7+
signing
8+
}
9+
10+
publishing {
11+
publications {
12+
create<MavenPublication>("maven-publish") {
13+
from(components["java"])
14+
15+
pom {
16+
name.set("Substrait Java")
17+
description.set(
18+
"Create a well-defined, cross-language specification for data compute operations"
19+
)
20+
url.set("https://github.com/substrait-io/substrait-java")
21+
licenses {
22+
license {
23+
name.set("The Apache License, Version 2.0")
24+
url.set("http://www.apache.org/licenses/LICENSE-2.0.txt")
25+
}
26+
}
27+
developers {
28+
developer {
29+
// TBD Get the list of
30+
}
31+
}
32+
scm {
33+
connection.set("scm:git:git://github.com:substrait-io/substrait-java.git")
34+
developerConnection.set("scm:git:ssh://github.com:substrait-io/substrait-java")
35+
url.set("https://github.com/substrait-io/substrait-java/")
36+
}
37+
}
38+
}
39+
}
40+
repositories {
41+
maven {
42+
name = "local"
43+
val releasesRepoUrl = layout.buildDirectory.dir("repos/releases")
44+
val snapshotsRepoUrl = layout.buildDirectory.dir("repos/snapshots")
45+
url = uri(if (version.toString().endsWith("SNAPSHOT")) snapshotsRepoUrl else releasesRepoUrl)
46+
}
47+
}
48+
}
49+
50+
signing {
51+
setRequired({ gradle.taskGraph.hasTask("publishToSonatype") })
52+
val signingKeyId =
53+
System.getenv("SIGNING_KEY_ID").takeUnless { it.isNullOrEmpty() }
54+
?: extra["SIGNING_KEY_ID"].toString()
55+
val signingPassword =
56+
System.getenv("SIGNING_PASSWORD").takeUnless { it.isNullOrEmpty() }
57+
?: extra["SIGNING_PASSWORD"].toString()
58+
val signingKey =
59+
System.getenv("SIGNING_KEY").takeUnless { it.isNullOrEmpty() }
60+
?: extra["SIGNING_KEY"].toString()
61+
useInMemoryPgpKeys(signingKeyId, signingKey, signingPassword)
62+
sign(publishing.publications["maven-publish"])
63+
}
64+
65+
configurations.all {
66+
if (name.startsWith("incrementalScalaAnalysis")) {
67+
setExtendsFrom(emptyList())
68+
}
69+
}
70+
71+
java {
72+
toolchain { languageVersion.set(JavaLanguageVersion.of(17)) }
73+
withJavadocJar()
74+
withSourcesJar()
75+
}
76+
77+
tasks.withType<ScalaCompile>() {
78+
targetCompatibility = ""
79+
scalaCompileOptions.additionalParameters = listOf("-release:17")
80+
}
81+
82+
var SLF4J_VERSION = properties.get("slf4j.version")
83+
var SPARKBUNDLE_VERSION = properties.get("sparkbundle.version")
84+
var SPARK_VERSION = properties.get("spark.version")
85+
86+
sourceSets {
87+
main { scala { setSrcDirs(listOf("src/main/spark-${SPARKBUNDLE_VERSION}")) } }
88+
test { scala { setSrcDirs(listOf("src/test/scala", "src/test/spark-3.2", "src/main/scala")) } }
89+
}
90+
91+
dependencies {
92+
implementation(project(":core"))
93+
implementation("org.scala-lang:scala-library:2.12.16")
94+
implementation("org.apache.spark:spark-core_2.12:${SPARK_VERSION}")
95+
implementation("org.apache.spark:spark-sql_2.12:${SPARK_VERSION}")
96+
implementation("org.apache.spark:spark-catalyst_2.12:${SPARK_VERSION}")
97+
implementation("org.slf4j:slf4j-api:${SLF4J_VERSION}")
98+
99+
testImplementation("org.scalatest:scalatest_2.12:3.2.18")
100+
testRuntimeOnly("org.junit.platform:junit-platform-engine:1.10.0")
101+
testRuntimeOnly("org.junit.platform:junit-platform-launcher:1.10.0")
102+
testRuntimeOnly("org.scalatestplus:junit-5-10_2.12:3.2.18.0")
103+
testImplementation("org.apache.spark:spark-core_2.12:${SPARK_VERSION}:tests")
104+
testImplementation("org.apache.spark:spark-sql_2.12:${SPARK_VERSION}:tests")
105+
testImplementation("org.apache.spark:spark-catalyst_2.12:${SPARK_VERSION}:tests")
106+
}
107+
108+
tasks {
109+
test {
110+
dependsOn(":core:shadowJar")
111+
useJUnitPlatform { includeEngines("scalatest") }
112+
}
113+
}

spark/src/main/resources/spark.yml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one or more
2+
# contributor license agreements. See the NOTICE file distributed with
3+
# this work for additional information regarding copyright ownership.
4+
# The ASF licenses this file to You under the Apache License, Version 2.0
5+
# (the "License"); you may not use this file except in compliance with
6+
# the License. You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
%YAML 1.2
16+
---
17+
scalar_functions:
18+
-
19+
name: year
20+
description: Returns the year component of the date/timestamp
21+
impls:
22+
- args:
23+
- value: date
24+
return: i32
25+
-
26+
name: unscaled
27+
description: >-
28+
Return the unscaled Long value of a Decimal, assuming it fits in a Long.
29+
Note: this expression is internal and created only by the optimizer,
30+
we don't need to do type check for it.
31+
impls:
32+
- args:
33+
- value: DECIMAL<P,S>
34+
return: i64
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package io.substrait.debug
18+
19+
import io.substrait.spark.DefaultExpressionVisitor
20+
21+
import org.apache.spark.sql.catalyst.util.DateTimeUtils
22+
23+
import io.substrait.expression.{Expression, FieldReference}
24+
import io.substrait.expression.Expression.{DateLiteral, DecimalLiteral, I32Literal, StrLiteral}
25+
import io.substrait.function.ToTypeString
26+
import io.substrait.util.DecimalUtil
27+
28+
import scala.collection.JavaConverters.asScalaBufferConverter
29+
30+
class ExpressionToString extends DefaultExpressionVisitor[String] {
31+
32+
override def visit(expr: DecimalLiteral): String = {
33+
val value = expr.value.toByteArray
34+
val decimal = DecimalUtil.getBigDecimalFromBytes(value, expr.scale, 16)
35+
decimal.toString
36+
}
37+
38+
override def visit(expr: StrLiteral): String = {
39+
expr.value()
40+
}
41+
42+
override def visit(expr: I32Literal): String = {
43+
expr.value().toString
44+
}
45+
46+
override def visit(expr: DateLiteral): String = {
47+
DateTimeUtils.toJavaDate(expr.value()).toString
48+
}
49+
50+
override def visit(expr: FieldReference): String = {
51+
withFieldReference(expr)(i => "$" + i.toString)
52+
}
53+
54+
override def visit(expr: Expression.SingleOrList): String = {
55+
expr.toString
56+
}
57+
58+
override def visit(expr: Expression.ScalarFunctionInvocation): String = {
59+
val args = expr
60+
.arguments()
61+
.asScala
62+
.zipWithIndex
63+
.map {
64+
case (arg, i) =>
65+
arg.accept(expr.declaration(), i, this)
66+
}
67+
.mkString(",")
68+
69+
s"${expr.declaration().key()}[${expr.outputType().accept(ToTypeString.INSTANCE)}]($args)"
70+
}
71+
72+
override def visit(expr: Expression.UserDefinedLiteral): String = {
73+
expr.toString
74+
}
75+
}

0 commit comments

Comments
 (0)