forked from databricks/spark-sql-perf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild.sbt
176 lines (133 loc) · 4.95 KB
/
build.sbt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
// Your sbt build file. Guides on how to write one can be found at
// http://www.scala-sbt.org/0.13/docs/index.html
name := "spark-sql-perf"
organization := "com.databricks"
scalaVersion := "2.12.10"
crossScalaVersions := Seq("2.12.10")
sparkPackageName := "databricks/spark-sql-perf"
// All Spark Packages need a license
licenses := Seq("Apache-2.0" -> url("http://opensource.org/licenses/Apache-2.0"))
sparkVersion := "3.2.2"
sparkComponents ++= Seq("sql", "hive", "mllib")
initialCommands in console :=
"""
|import org.apache.spark.sql._
|import org.apache.spark.sql.functions._
|import org.apache.spark.sql.types._
|import org.apache.spark.sql.hive.test.TestHive
|import TestHive.implicits
|import TestHive.sql
|
|val sqlContext = TestHive
|import sqlContext.implicits._
""".stripMargin
libraryDependencies += "com.github.scopt" %% "scopt" % "3.7.1"
libraryDependencies += "com.twitter" %% "util-jvm" % "6.45.0"
libraryDependencies += "org.scalatest" %% "scalatest" % "3.0.5" % "test"
libraryDependencies += "org.yaml" % "snakeyaml" % "1.23"
libraryDependencies += "org.apache.spark" %% "spark-kubernetes" % sparkVersion.value %"provided"
libraryDependencies += "org.bouncycastle" % "bcprov-jdk15on" % "1.68"
libraryDependencies += "org.bouncycastle" % "bcpkix-jdk15on" % "1.68"
libraryDependencies += "io.delta" %% "delta-core" % "2.0.0" %"provided"
libraryDependencies += "org.apache.hadoop" % "hadoop-aws" % "2.7.4" %"provided"
assemblyMergeStrategy in assembly := {
case PathList("META-INF", xs @ _*) => MergeStrategy.discard
case x => MergeStrategy.first
}
fork := true
// Your username to login to Databricks Cloud
dbcUsername := sys.env.getOrElse("DBC_USERNAME", "")
// Your password (Can be set as an environment variable)
dbcPassword := sys.env.getOrElse("DBC_PASSWORD", "")
// The URL to the Databricks Cloud DB Api. Don't forget to set the port number to 34563!
dbcApiUrl := sys.env.getOrElse ("DBC_URL", sys.error("Please set DBC_URL"))
// Add any clusters that you would like to deploy your work to. e.g. "My Cluster"
// or run dbcExecuteCommand
dbcClusters += sys.env.getOrElse("DBC_USERNAME", "")
dbcLibraryPath := s"/Users/${sys.env.getOrElse("DBC_USERNAME", "")}/lib"
val runBenchmark = inputKey[Unit]("runs a benchmark")
runBenchmark := {
import complete.DefaultParsers._
val args = spaceDelimited("[args]").parsed
val scalaRun = (runner in run).value
val classpath = (fullClasspath in Compile).value
scalaRun.run("com.databricks.spark.sql.perf.RunBenchmark", classpath.map(_.data), args,
streams.value.log)
}
val runMLBenchmark = inputKey[Unit]("runs an ML benchmark")
runMLBenchmark := {
import complete.DefaultParsers._
val args = spaceDelimited("[args]").parsed
val scalaRun = (runner in run).value
val classpath = (fullClasspath in Compile).value
scalaRun.run("com.databricks.spark.sql.perf.mllib.MLLib", classpath.map(_.data), args,
streams.value.log)
}
import ReleaseTransformations._
/** Push to the team directory instead of the user's homedir for releases. */
lazy val setupDbcRelease = ReleaseStep(
action = { st: State =>
val extracted = Project.extract(st)
val newSettings = extracted.structure.allProjectRefs.map { ref =>
dbcLibraryPath in ref := "/databricks/spark/sql/lib"
}
reapply(newSettings, st)
}
)
/********************
* Release settings *
********************/
publishMavenStyle := true
releaseCrossBuild := true
licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0"))
releasePublishArtifactsAction := PgpKeys.publishSigned.value
pomExtra := (
<url>https://github.com/databricks/spark-sql-perf</url>
<scm>
<url>[email protected]:databricks/spark-sql-perf.git</url>
<connection>scm:git:[email protected]:databricks/spark-sql-perf.git</connection>
</scm>
<developers>
<developer>
<id>marmbrus</id>
<name>Michael Armbrust</name>
<url>https://github.com/marmbrus</url>
</developer>
<developer>
<id>yhuai</id>
<name>Yin Huai</name>
<url>https://github.com/yhuai</url>
</developer>
<developer>
<id>nongli</id>
<name>Nong Li</name>
<url>https://github.com/nongli</url>
</developer>
<developer>
<id>andrewor14</id>
<name>Andrew Or</name>
<url>https://github.com/andrewor14</url>
</developer>
<developer>
<id>davies</id>
<name>Davies Liu</name>
<url>https://github.com/davies</url>
</developer>
</developers>
)
bintrayReleaseOnPublish in ThisBuild := false
// Add publishing to spark packages as another step.
releaseProcess := Seq[ReleaseStep](
checkSnapshotDependencies,
inquireVersions,
runTest,
setReleaseVersion,
commitReleaseVersion,
tagRelease,
setupDbcRelease,
releaseStepTask(dbcUpload),
publishArtifacts,
setNextVersion,
commitNextVersion,
pushChanges
)