From 76f91fdea15070846fdfcd3a5dc6d6171ee7f671 Mon Sep 17 00:00:00 2001 From: Marcin Jakubowski Date: Mon, 5 Feb 2024 19:07:02 +0100 Subject: [PATCH] Bugfix: support reading decimals encoded as long values --- build.sbt | 4 +- .../parquet4s/ParquetReaderItSpec.scala | 8 --- .../parquet4s/ParquetReadSupport.scala | 3 + .../parquet4s/DecimalValueSpec.scala | 60 +++++++++++++++++++ project/metals.sbt | 2 +- 5 files changed, 66 insertions(+), 11 deletions(-) create mode 100644 core/src/test/scala/com/github/mjakubowski84/parquet4s/DecimalValueSpec.scala diff --git a/build.sbt b/build.sbt index d92c05e3..7cce79a9 100644 --- a/build.sbt +++ b/build.sbt @@ -14,8 +14,8 @@ val akkaLib = ActorLibCross("-akka", "-akka") val pekkoLib = ActorLibCross("-pekko", "-pekko") ThisBuild / organization := "com.github.mjakubowski84" -ThisBuild / version := "2.16.0-SNAPSHOT" -ThisBuild / isSnapshot := true +ThisBuild / version := "2.15.1" +ThisBuild / isSnapshot := false ThisBuild / scalaVersion := twoThirteen ThisBuild / javacOptions ++= Seq("-source", "1.8", "-target", "1.8") diff --git a/core/src/it/scala/com/github/mjakubowski84/parquet4s/ParquetReaderItSpec.scala b/core/src/it/scala/com/github/mjakubowski84/parquet4s/ParquetReaderItSpec.scala index 0987a4b0..f5ca3b54 100644 --- a/core/src/it/scala/com/github/mjakubowski84/parquet4s/ParquetReaderItSpec.scala +++ b/core/src/it/scala/com/github/mjakubowski84/parquet4s/ParquetReaderItSpec.scala @@ -71,14 +71,6 @@ class ParquetReaderItSpec extends AnyFreeSpec with Matchers with TestUtils with finally partitioned.close() } - "Attempt to read partitions on non-partitioned data should succeed" in { - ParquetWriter.of[I].writeAndClose(Path(tempPath, "file.parquet"), Seq(I(1), I(2))) - - val data = ParquetReader.as[I].read(tempPath) - try data.toSeq should be(Seq(I(1), I(2))) - finally data.close() - } - "Attempt to read inconsistent partitions should fail with an exception" in { ParquetWriter.of[I].writeAndClose(Path(tempPath, "a=a1/b=b1/file.parquet"), Seq(I(1))) ParquetWriter.of[I].writeAndClose(Path(tempPath, "a=a2/file.parquet"), Seq(I(2))) diff --git a/core/src/main/scala/com/github/mjakubowski84/parquet4s/ParquetReadSupport.scala b/core/src/main/scala/com/github/mjakubowski84/parquet4s/ParquetReadSupport.scala index c4dbda5a..cf3dafec 100644 --- a/core/src/main/scala/com/github/mjakubowski84/parquet4s/ParquetReadSupport.scala +++ b/core/src/main/scala/com/github/mjakubowski84/parquet4s/ParquetReadSupport.scala @@ -114,6 +114,9 @@ abstract private class ParquetRecordConverter[R <: ParquetRecord[?, R]](schema: else value record = record.add(name, BinaryValue(rescaled)) } + + override def addLong(value: Long): Unit = + record = record.add(name, BinaryValue(Decimals.binaryFromDecimal(BigDecimal(value, scale, mathContext)))) } private class DateTimeConverter(name: String, timeUnit: LogicalTypeAnnotation.TimeUnit) diff --git a/core/src/test/scala/com/github/mjakubowski84/parquet4s/DecimalValueSpec.scala b/core/src/test/scala/com/github/mjakubowski84/parquet4s/DecimalValueSpec.scala new file mode 100644 index 00000000..744908d9 --- /dev/null +++ b/core/src/test/scala/com/github/mjakubowski84/parquet4s/DecimalValueSpec.scala @@ -0,0 +1,60 @@ +package com.github.mjakubowski84.parquet4s + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers +import org.apache.parquet.schema.PrimitiveType +import org.apache.parquet.schema.LogicalTypeAnnotation + +import scala.util.Using + +class DecimalValueSpec extends AnyFlatSpec with Matchers { + + case class Data(decimal: BigDecimal) + val data = Seq(Data(BigDecimal.decimal(1.2f))) + + "Decimal value" should "read from binary" in { + + val outputFile = InMemoryOutputFile(initBufferSize = 1024) + + // write + ParquetWriter + .generic(Message(Some("binary-dec"), TypedSchemaDef.decimalSchema("decimal"))) + .writeAndClose( + outputFile, + Seq(RowParquetRecord("decimal" -> BinaryValue(Decimals.binaryFromDecimal(BigDecimal.decimal(1.2f))))) + ) + + val inputFile = outputFile.toInputFile + + // read + Using(ParquetReader.as[Data].read(inputFile)) { readData => + readData.toSeq shouldBe data + } + } + + it should "read from long" in { + + val outputFile = InMemoryOutputFile(initBufferSize = 1024) + + // write + ParquetWriter + .generic( + Message( + Some("long-dec"), + SchemaDef.primitive( + primitiveType = PrimitiveType.PrimitiveTypeName.INT64, + logicalTypeAnnotation = Some(LogicalTypeAnnotation.decimalType(4, 8)) + )("decimal") + ) + ) + .writeAndClose(outputFile, Seq(RowParquetRecord("decimal" -> LongValue(12000L)))) + + val inputFile = outputFile.toInputFile + + // read + Using(ParquetReader.as[Data].read(inputFile)) { readData => + readData.toSeq shouldBe data + } + } + +} diff --git a/project/metals.sbt b/project/metals.sbt index cbb25c6a..4c9df445 100644 --- a/project/metals.sbt +++ b/project/metals.sbt @@ -2,5 +2,5 @@ // This file enables sbt-bloop to create bloop config files. -addSbtPlugin("ch.epfl.scala" % "sbt-bloop" % "1.5.11") +addSbtPlugin("ch.epfl.scala" % "sbt-bloop" % "1.5.13")