From 57b6d6887081a2c8b632119e18547e09d053e0bc Mon Sep 17 00:00:00 2001 From: Carlos Giraldo Date: Wed, 31 Jul 2019 15:10:40 +0200 Subject: [PATCH] Support of Apache spark 2.4.x --- build.sbt | 2 +- project/Dependencies.scala | 6 +++--- .../scala/com/ibm/sparktc/sparkbench/utils/SparkFuncs.scala | 6 +++--- version.sbt | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/build.sbt b/build.sbt index a9df0f69..d4143f94 100644 --- a/build.sbt +++ b/build.sbt @@ -20,7 +20,7 @@ import java.io.File import java.nio.file.Files import java.nio.file.StandardCopyOption.REPLACE_EXISTING -scalaVersion in ThisBuild := "2.11.8" +scalaVersion in ThisBuild := "2.12.8" /* ********************************************************************************** diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 2462796b..abe38f2f 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -19,7 +19,7 @@ import sbt._ object Dependencies { // Versions - lazy val sparkVersion = "2.3.0" + lazy val sparkVersion = "2.4.2" lazy val scalacheckVersion = "1.13.5" lazy val junitVersion = "4.12" lazy val scalatestVersion = "3.0.5" @@ -29,7 +29,7 @@ object Dependencies { "org.apache.spark" %% "spark-core" % sparkVersion % "provided", "org.apache.spark" %% "spark-mllib" % sparkVersion % "provided", "org.apache.spark" %% "spark-sql" % sparkVersion % "provided", - "com.databricks" %% "spark-avro" % "4.0.0" + "org.apache.spark" %% "spark-avro" % sparkVersion % "provided" ) val breezeDeps = Seq( @@ -55,7 +55,7 @@ object Dependencies { "org.scalactic" %% "scalactic" % scalatestVersion % "test", "org.scalatest" %% "scalatest" % scalatestVersion % "test", "org.apache.spark" %% "spark-hive" % sparkVersion % "test", - "com.holdenkarau" %% "spark-testing-base" % "2.2.0_0.8.0" % "test" excludeAll( + "com.holdenkarau" %% "spark-testing-base" % "2.4.2_0.12.0" % "test" excludeAll( ExclusionRule(organization = "org.scalacheck"), ExclusionRule(organization = "org.scalactic"), ExclusionRule(organization = "org.scalatest"), diff --git a/utils/src/main/scala/com/ibm/sparktc/sparkbench/utils/SparkFuncs.scala b/utils/src/main/scala/com/ibm/sparktc/sparkbench/utils/SparkFuncs.scala index 1b20947a..45917384 100644 --- a/utils/src/main/scala/com/ibm/sparktc/sparkbench/utils/SparkFuncs.scala +++ b/utils/src/main/scala/com/ibm/sparktc/sparkbench/utils/SparkFuncs.scala @@ -20,7 +20,7 @@ package com.ibm.sparktc.sparkbench.utils import org.apache.hadoop.fs.Path import org.apache.spark.sql.functions.lit import org.apache.spark.sql.{DataFrame, SparkSession} -import com.databricks.spark.avro._ +import org.apache.spark.sql.avro._ object SparkFuncs { @@ -103,7 +103,7 @@ object SparkFuncs { case Formats.parquet => data.write.mode(saveMode).parquet(outputDir) case Formats.csv => data.write.mode(saveMode).option("header", "true").csv(outputDir) case Formats.orc => data.write.mode(saveMode).orc(outputDir) - case Formats.avro => data.write.mode(saveMode).avro(outputDir) + case Formats.avro => data.write.mode(saveMode).format("avro").save(outputDir) case Formats.json => data.write.mode(saveMode).json(outputDir) case Formats.console => data.show() case _ => throw new Exception(s"Unrecognized or unspecified save format: $format. " + @@ -123,7 +123,7 @@ object SparkFuncs { inputFormat match { case Formats.parquet => spark.read.parquet(inputDir) case Formats.orc => spark.read.orc(inputDir) - case Formats.avro => spark.read.avro(inputDir) + case Formats.avro => spark.read.format("avro").load(inputDir) case Formats.json => spark.read.json(inputDir) case Formats.csv | _ => spark.read.option("inferSchema", "true").option("header", "true").csv(inputDir) //if unspecified, assume csv } diff --git a/version.sbt b/version.sbt index 90867904..cbed8fb7 100644 --- a/version.sbt +++ b/version.sbt @@ -16,4 +16,4 @@ */ // assign version to all projects // Spark version 2.1.1, spark-bench version 0.2.0 -version in ThisBuild := "2.3.0_0.4.0-RELEASE" \ No newline at end of file +version in ThisBuild := "2.4.2_0.4.0-RELEASE" \ No newline at end of file