-
-
Notifications
You must be signed in to change notification settings - Fork 67
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add API to write single file using custom ParquetWriter
- Loading branch information
1 parent
f532753
commit 7bb5f07
Showing
10 changed files
with
234 additions
and
44 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
syntax = "proto3"; | ||
|
||
option java_package = "com.github.mjakubowski84.parquet4s.protobuf"; | ||
|
||
message Data { | ||
int32 id = 1; | ||
string text = 2; | ||
} |
31 changes: 31 additions & 0 deletions
31
...s/src/main/scala/com/github/mjakubowski84/parquet4s/akka/CustomParquetWriterAkkaApp.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
package com.github.mjakubowski84.parquet4s.akka | ||
|
||
import akka.actor.ActorSystem | ||
import akka.stream.scaladsl.Source | ||
import com.github.mjakubowski84.parquet4s.{ParquetStreams, Path} | ||
import com.github.mjakubowski84.parquet4s.protobuf.DataOuterClass.Data | ||
import org.apache.parquet.proto.ProtoParquetWriter | ||
|
||
import java.nio.file.Files | ||
import scala.util.Random | ||
|
||
object CustomParquetWriterAkkaApp extends App { | ||
val count = 100 | ||
val data = (1 to count).map(i => Data.newBuilder.setId(i).setText(Random.nextString(4)).build) | ||
val path = Path(Files.createTempDirectory("example")) | ||
|
||
implicit val system: ActorSystem = ActorSystem() | ||
|
||
import system.dispatcher | ||
|
||
val builder = ProtoParquetWriter.builder[Data](path.append("data.parquet").hadoopPath).withMessage(classOf[Data]) | ||
|
||
val sink = ParquetStreams.toParquetSingleFile | ||
.custom[Data, ProtoParquetWriter.Builder[Data]](builder) | ||
.write | ||
|
||
for { | ||
_ <- Source(data).runWith(sink) | ||
_ <- system.terminate() | ||
} yield () | ||
} |
36 changes: 36 additions & 0 deletions
36
...les/src/main/scala/com/github/mjakubowski84/parquet4s/fs2/CustomParquetWriterFS2App.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
package com.github.mjakubowski84.parquet4s.fs2 | ||
|
||
import cats.effect.{IO, IOApp} | ||
import com.github.mjakubowski84.parquet4s.Path | ||
import com.github.mjakubowski84.parquet4s.parquet.* | ||
import com.github.mjakubowski84.parquet4s.protobuf.DataOuterClass.Data | ||
import fs2.io.file.Files | ||
import fs2.{Pipe, Stream} | ||
import org.apache.parquet.proto.ProtoParquetWriter | ||
|
||
import scala.util.Random | ||
|
||
object CustomParquetWriterFS2App extends IOApp.Simple { | ||
private val Count = 100 | ||
|
||
override def run: IO[Unit] = { | ||
def write(path: Path): Pipe[IO, Data, Nothing] = { | ||
val builder = ProtoParquetWriter.builder[Data](path.hadoopPath).withMessage(classOf[Data]) | ||
writeSingleFile[IO] | ||
.custom[Data, ProtoParquetWriter.Builder[Data]](builder) | ||
.write | ||
} | ||
|
||
val stream = for { | ||
path <- Stream | ||
.resource(Files[IO].tempDirectory(None, "", None)) | ||
.map(fs2Path => Path(fs2Path.toNioPath).append("data.parquet")) | ||
_ <- Stream | ||
.range[IO, Int](start = 0, stopExclusive = Count) | ||
.map(i => Data.newBuilder.setId(i).setText(Random.nextString(4)).build) | ||
.through(write(path)) | ||
} yield () | ||
|
||
stream.compile.drain | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.