diff --git a/README.md b/README.md index a9c2f43..c9c8c83 100644 --- a/README.md +++ b/README.md @@ -16,14 +16,14 @@ You can link against this library in your program at the following ways: com.springml spark-sftp_2.11 - 1.1.3 + 1.2.0 ``` ### SBT Dependency ``` -libraryDependencies += "com.springml" % "spark-sftp_2.11" % "1.1.3" +libraryDependencies += "com.springml" %% "spark-sftp" % "1.2.0" ``` @@ -31,7 +31,7 @@ libraryDependencies += "com.springml" % "spark-sftp_2.11" % "1.1.3" This package can be added to Spark using the `--packages` command line option. For example, to include it when starting the spark shell: ``` -$ bin/spark-shell --packages com.springml:spark-sftp_2.11:1.1.3 +$ bin/spark-shell --packages com.springml:spark-sftp_2.11:1.2.0 ``` ## Features @@ -171,4 +171,4 @@ write.df(df, ## Building From Source -This library is built with [SBT](http://www.scala-sbt.org/0.13/docs/Command-Line-Reference.html), which is automatically downloaded by the included shell script. To build a JAR file simply run `build/sbt package` from the project root. +This library is built with [SBT](http://www.scala-sbt.org/0.13/docs/Command-Line-Reference.html), which is automatically downloaded by the included shell script. To build a JAR file simply run `build/sbt +package` from the project root. diff --git a/build.sbt b/build.sbt index ba0e3f0..ed95bbe 100644 --- a/build.sbt +++ b/build.sbt @@ -2,32 +2,31 @@ name := "spark-sftp" organization := "com.springml" -scalaVersion := "2.11.8" +scalaVersion := "2.12.10" -sparkVersion := "2.3.0" +crossScalaVersions := Seq("2.11.12", "2.12.10") + +sparkVersion := "2.4.3" spName := "springml/spark-sftp" -version := "1.1.4" +version := "1.2.0" // Dependent libraries libraryDependencies ++= Seq( "com.springml" % "sftp.client" % "1.0.3", "org.mockito" % "mockito-core" % "2.0.31-beta", - "com.databricks" % "spark-xml_2.11" % "0.4.1" + "com.databricks" %% "spark-xml" % "0.5.0" ) // used spark components -sparkComponents += "sql" +sparkComponents ++= Seq("sql", "avro") // Repositories resolvers += "Spark Package Main Repo" at "https://dl.bintray.com/spark-packages/maven" -// Spark packages -spDependencies += "com.databricks/spark-avro_2.11:3.2.0" - // Test dependencies -libraryDependencies += "org.scalatest" %% "scalatest" % "2.2.1" % "test" +libraryDependencies += "org.scalatest" %% "scalatest" % "3.0.3" % "test" libraryDependencies += "org.apache.avro" % "avro-mapred" % "1.7.7" % "test" exclude("org.mortbay.jetty", "servlet-api") libraryDependencies += "org.apache.spark" %% "spark-hive" % sparkVersion.value % "test" diff --git a/src/main/scala/com/springml/spark/sftp/DatasetRelation.scala b/src/main/scala/com/springml/spark/sftp/DatasetRelation.scala index 60b341c..19cdf58 100644 --- a/src/main/scala/com/springml/spark/sftp/DatasetRelation.scala +++ b/src/main/scala/com/springml/spark/sftp/DatasetRelation.scala @@ -1,6 +1,5 @@ package com.springml.spark.sftp -import com.databricks.spark.avro._ import org.apache.log4j.Logger import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, Row, SQLContext} @@ -36,7 +35,7 @@ case class DatasetRelation( var df: DataFrame = null df = fileType match { - case "avro" => dataframeReader.avro(fileLocation) + case "avro" => dataframeReader.format("avro").load(fileLocation) case "txt" => dataframeReader.format("text").load(fileLocation) case "xml" => dataframeReader.format(constants.xmlClass) .option(constants.xmlRowTag, rowTag) diff --git a/src/main/scala/com/springml/spark/sftp/DefaultSource.scala b/src/main/scala/com/springml/spark/sftp/DefaultSource.scala index a62e57a..0e75007 100644 --- a/src/main/scala/com/springml/spark/sftp/DefaultSource.scala +++ b/src/main/scala/com/springml/spark/sftp/DefaultSource.scala @@ -256,7 +256,7 @@ class DefaultSource extends RelationProvider with SchemaRelationProvider with Cr optionNoNull("codec", Option(codec)). csv(hdfsTempLocation) case "txt" => df.coalesce(1).write.text(hdfsTempLocation) - case "avro" => df.coalesce(1).write.format("com.databricks.spark.avro").save(hdfsTempLocation) + case "avro" => df.coalesce(1).write.format("avro").save(hdfsTempLocation) case _ => df.coalesce(1).write.format(fileType).save(hdfsTempLocation) }