Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,22 @@ You can link against this library in your program at the following ways:
<dependency>
<groupId>com.springml</groupId>
<artifactId>spark-sftp_2.11</artifactId>
<version>1.1.3</version>
<version>1.2.0</version>
</dependency>

```

### SBT Dependency
```
libraryDependencies += "com.springml" % "spark-sftp_2.11" % "1.1.3"
libraryDependencies += "com.springml" %% "spark-sftp" % "1.2.0"
```


## Using with Spark shell
This package can be added to Spark using the `--packages` command line option. For example, to include it when starting the spark shell:

```
$ bin/spark-shell --packages com.springml:spark-sftp_2.11:1.1.3
$ bin/spark-shell --packages com.springml:spark-sftp_2.11:1.2.0
```

## Features
Expand Down Expand Up @@ -171,4 +171,4 @@ write.df(df,


## Building From Source
This library is built with [SBT](http://www.scala-sbt.org/0.13/docs/Command-Line-Reference.html), which is automatically downloaded by the included shell script. To build a JAR file simply run `build/sbt package` from the project root.
This library is built with [SBT](http://www.scala-sbt.org/0.13/docs/Command-Line-Reference.html), which is automatically downloaded by the included shell script. To build a JAR file simply run `build/sbt +package` from the project root.
17 changes: 8 additions & 9 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,31 @@ name := "spark-sftp"

organization := "com.springml"

scalaVersion := "2.11.8"
scalaVersion := "2.12.10"

sparkVersion := "2.3.0"
crossScalaVersions := Seq("2.11.12", "2.12.10")

sparkVersion := "2.4.3"

spName := "springml/spark-sftp"

version := "1.1.4"
version := "1.2.0"

// Dependent libraries
libraryDependencies ++= Seq(
"com.springml" % "sftp.client" % "1.0.3",
"org.mockito" % "mockito-core" % "2.0.31-beta",
"com.databricks" % "spark-xml_2.11" % "0.4.1"
"com.databricks" %% "spark-xml" % "0.5.0"
)

// used spark components
sparkComponents += "sql"
sparkComponents ++= Seq("sql", "avro")

// Repositories
resolvers += "Spark Package Main Repo" at "https://dl.bintray.com/spark-packages/maven"

// Spark packages
spDependencies += "com.databricks/spark-avro_2.11:3.2.0"

// Test dependencies
libraryDependencies += "org.scalatest" %% "scalatest" % "2.2.1" % "test"
libraryDependencies += "org.scalatest" %% "scalatest" % "3.0.3" % "test"
libraryDependencies += "org.apache.avro" % "avro-mapred" % "1.7.7" % "test" exclude("org.mortbay.jetty", "servlet-api")
libraryDependencies += "org.apache.spark" %% "spark-hive" % sparkVersion.value % "test"

Expand Down
3 changes: 1 addition & 2 deletions src/main/scala/com/springml/spark/sftp/DatasetRelation.scala
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package com.springml.spark.sftp

import com.databricks.spark.avro._
import org.apache.log4j.Logger
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Row, SQLContext}
Expand Down Expand Up @@ -36,7 +35,7 @@ case class DatasetRelation(
var df: DataFrame = null

df = fileType match {
case "avro" => dataframeReader.avro(fileLocation)
case "avro" => dataframeReader.format("avro").load(fileLocation)
case "txt" => dataframeReader.format("text").load(fileLocation)
case "xml" => dataframeReader.format(constants.xmlClass)
.option(constants.xmlRowTag, rowTag)
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/com/springml/spark/sftp/DefaultSource.scala
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ class DefaultSource extends RelationProvider with SchemaRelationProvider with Cr
optionNoNull("codec", Option(codec)).
csv(hdfsTempLocation)
case "txt" => df.coalesce(1).write.text(hdfsTempLocation)
case "avro" => df.coalesce(1).write.format("com.databricks.spark.avro").save(hdfsTempLocation)
case "avro" => df.coalesce(1).write.format("avro").save(hdfsTempLocation)
case _ => df.coalesce(1).write.format(fileType).save(hdfsTempLocation)
}

Expand Down