Skip to content

Commit

Permalink
Bugfix: support reading decimals encoded as long values
Browse files Browse the repository at this point in the history
  • Loading branch information
mjakubowski84 committed Feb 5, 2024
1 parent 2727a58 commit 76f91fd
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 11 deletions.
4 changes: 2 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ val akkaLib = ActorLibCross("-akka", "-akka")
val pekkoLib = ActorLibCross("-pekko", "-pekko")

ThisBuild / organization := "com.github.mjakubowski84"
ThisBuild / version := "2.16.0-SNAPSHOT"
ThisBuild / isSnapshot := true
ThisBuild / version := "2.15.1"
ThisBuild / isSnapshot := false
ThisBuild / scalaVersion := twoThirteen

ThisBuild / javacOptions ++= Seq("-source", "1.8", "-target", "1.8")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,6 @@ class ParquetReaderItSpec extends AnyFreeSpec with Matchers with TestUtils with
finally partitioned.close()
}

"Attempt to read partitions on non-partitioned data should succeed" in {
ParquetWriter.of[I].writeAndClose(Path(tempPath, "file.parquet"), Seq(I(1), I(2)))

val data = ParquetReader.as[I].read(tempPath)
try data.toSeq should be(Seq(I(1), I(2)))
finally data.close()
}

"Attempt to read inconsistent partitions should fail with an exception" in {
ParquetWriter.of[I].writeAndClose(Path(tempPath, "a=a1/b=b1/file.parquet"), Seq(I(1)))
ParquetWriter.of[I].writeAndClose(Path(tempPath, "a=a2/file.parquet"), Seq(I(2)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ abstract private class ParquetRecordConverter[R <: ParquetRecord[?, R]](schema:
else value
record = record.add(name, BinaryValue(rescaled))
}

override def addLong(value: Long): Unit =
record = record.add(name, BinaryValue(Decimals.binaryFromDecimal(BigDecimal(value, scale, mathContext))))
}

private class DateTimeConverter(name: String, timeUnit: LogicalTypeAnnotation.TimeUnit)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package com.github.mjakubowski84.parquet4s

import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers
import org.apache.parquet.schema.PrimitiveType
import org.apache.parquet.schema.LogicalTypeAnnotation

import scala.util.Using

class DecimalValueSpec extends AnyFlatSpec with Matchers {

case class Data(decimal: BigDecimal)
val data = Seq(Data(BigDecimal.decimal(1.2f)))

"Decimal value" should "read from binary" in {

val outputFile = InMemoryOutputFile(initBufferSize = 1024)

// write
ParquetWriter
.generic(Message(Some("binary-dec"), TypedSchemaDef.decimalSchema("decimal")))
.writeAndClose(
outputFile,
Seq(RowParquetRecord("decimal" -> BinaryValue(Decimals.binaryFromDecimal(BigDecimal.decimal(1.2f)))))
)

val inputFile = outputFile.toInputFile

// read
Using(ParquetReader.as[Data].read(inputFile)) { readData =>
readData.toSeq shouldBe data
}
}

it should "read from long" in {

val outputFile = InMemoryOutputFile(initBufferSize = 1024)

// write
ParquetWriter
.generic(
Message(
Some("long-dec"),
SchemaDef.primitive(
primitiveType = PrimitiveType.PrimitiveTypeName.INT64,
logicalTypeAnnotation = Some(LogicalTypeAnnotation.decimalType(4, 8))
)("decimal")
)
)
.writeAndClose(outputFile, Seq(RowParquetRecord("decimal" -> LongValue(12000L))))

val inputFile = outputFile.toInputFile

// read
Using(ParquetReader.as[Data].read(inputFile)) { readData =>
readData.toSeq shouldBe data
}
}

}
2 changes: 1 addition & 1 deletion project/metals.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@

// This file enables sbt-bloop to create bloop config files.

addSbtPlugin("ch.epfl.scala" % "sbt-bloop" % "1.5.11")
addSbtPlugin("ch.epfl.scala" % "sbt-bloop" % "1.5.13")

0 comments on commit 76f91fd

Please sign in to comment.