potix2 · mrb24 · Oct 2, 2019 · Nov 26, 2019
diff --git a/build.sbt b/build.sbt
@@ -2,11 +2,11 @@ name := "spark-google-spreadsheets"
 
 organization := "com.github.potix2"
 
-scalaVersion := "2.11.12"
+scalaVersion := "2.12.10"
 
-crossScalaVersions := Seq("2.11.12")
+crossScalaVersions := Seq("2.12.10")
 
-version := "0.6.4-SNAPSHOT"
+version := "0.6.4"
 
 spName := "potix2/spark-google-spreadsheets"
 
@@ -16,7 +16,7 @@ spIncludeMaven := true
 
 spIgnoreProvided := true
 
-sparkVersion := "2.3.3"
+sparkVersion := "2.4.4"
 
 val testSparkVersion = settingKey[String]("The version of Spark to test against.")
 
@@ -26,7 +26,7 @@ sparkComponents := Seq("sql")
 
 libraryDependencies ++= Seq(
   "org.slf4j" % "slf4j-api" % "1.7.5" % "provided",
-  "org.scalatest" %% "scalatest" % "2.2.1" % "test",
+  "org.scalatest" %% "scalatest" % "3.0.8" % "test",
   ("com.google.api-client" % "google-api-client" % "1.22.0").
     exclude("com.google.guava", "guava-jdk5"),
   "com.google.oauth-client" % "google-oauth-client-jetty" % "1.22.0",
@@ -55,6 +55,9 @@ publishArtifact in Test := false
 
 pomIncludeRepository := { _ => false }
 
+//publishMavenStyle := true
+//publishTo := Some(Resolver.file("file",  new File(Path.userHome.absolutePath+"/.m2/repository")))
+
 publishTo := {
   val nexus = "https://oss.sonatype.org/"
   if (version.value.endsWith("SNAPSHOT"))

diff --git a/src/main/scala/com/github/potix2/spark/google/spreadsheets/SpreadsheetRelation.scala b/src/main/scala/com/github/potix2/spark/google/spreadsheets/SpreadsheetRelation.scala
@@ -29,6 +29,7 @@ case class SpreadsheetRelation protected[spark] (
 
   import com.github.potix2.spark.google.spreadsheets.SparkSpreadsheetService._
 
+  private val fieldMap = scala.collection.mutable.Map[String, String]()
   override def schema: StructType = userSchema.getOrElse(inferSchema())
 
   private lazy val aWorksheet: SparkWorksheet =
@@ -47,6 +48,7 @@ case class SpreadsheetRelation protected[spark] (
 
   override def buildScan(): RDD[Row] = {
     val aSchema = schema
+    val schemaMap = fieldMap.toMap
     sqlContext.sparkContext.makeRDD(rows).mapPartitions { iter =>
       iter.map { m =>
         var index = 0
@@ -55,6 +57,8 @@ case class SpreadsheetRelation protected[spark] (
           val field = aSchema.fields(index)
           rowArray(index) = if (m.contains(field.name)) {
             TypeCast.castTo(m(field.name), field.dataType, field.nullable)
+          } else if (schemaMap.contains(field.name) && m.contains(schemaMap(field.name))) {
+            TypeCast.castTo(m(schemaMap(field.name)), field.dataType, field.nullable)
           } else {
             null
           }
@@ -78,9 +82,19 @@ case class SpreadsheetRelation protected[spark] (
     }
   }
 
+  def sanitizeColumnName(name: String): String =
+  {
+    name
+      .replaceAll("[^a-zA-Z0-9]+", "_")    // Replace sequences of non-alphanumeric characters with underscores
+      .replaceAll("_+$", "")               // Strip trailing underscores
+      .replaceAll("^[0-9_]+", "")          // Strip leading underscores and digits
+  }
+
   private def inferSchema(): StructType =
-    StructType(aWorksheet.headers.toList.map { fieldName =>
-      StructField(fieldName, StringType, nullable = true)
-    })
+    StructType(aWorksheet.headers.toList.map { fieldName => {
+      val sanitizedName = sanitizeColumnName(fieldName)
+      fieldMap.put(sanitizedName, fieldName)
+      StructField(sanitizedName, StringType, true)
+    }})
 
 }