From f8867ec6a83c8493c28b60d9ce98f74a558ff29d Mon Sep 17 00:00:00 2001 From: go-noah Date: Sat, 12 Jul 2025 14:09:41 +0900 Subject: [PATCH 1/2] maximum vector size --- build.sbt | 2 +- .../scala/serving/config/ConfigManager.scala | 4 +- .../serving/model/CosineSimilarity.scala | 44 ++++++++++++++++--- .../serving/tensor/TensorFlowProvider.scala | 2 +- 4 files changed, 41 insertions(+), 11 deletions(-) diff --git a/build.sbt b/build.sbt index 627a868..29904c4 100644 --- a/build.sbt +++ b/build.sbt @@ -21,7 +21,7 @@ libraryDependencies ++= Seq( "com.typesafe.akka" %% "akka-http-caching" % AkkaHttpVersion, "com.typesafe.akka" %% "akka-http-spray-json" % AkkaHttpVersion, "ch.qos.logback" % "logback-classic" % LogbackVersion, - "org.tensorflow" % "tensorflow-core-platform-gpu" % "0.4.1" + "org.tensorflow" % "tensorflow-core-platform" % "1.0.0" ) assembly / mainClass := Some("serving.http.HttpServer") diff --git a/src/main/scala/serving/config/ConfigManager.scala b/src/main/scala/serving/config/ConfigManager.scala index e05ebb8..3179c9e 100644 --- a/src/main/scala/serving/config/ConfigManager.scala +++ b/src/main/scala/serving/config/ConfigManager.scala @@ -7,8 +7,8 @@ object ConfigManager { val takeSpinCountDelay : Int = System.getProperty("takeSpinCountDelay","5").toInt val topK: Int = System.getProperty("topK","10").toInt - val dim: Int = System.getProperty("dim","100").toInt - val sample: Int = System.getProperty("sample","10000").toInt + val dim: Int = System.getProperty("dim","1000").toInt + val sample: Int = System.getProperty("sample","10000000").toInt val batch: Int = System.getProperty("batch","16").toInt val npyFile: String = System.getProperty("npyFile", "./model/10000-100.npy") diff --git a/src/main/scala/serving/model/CosineSimilarity.scala b/src/main/scala/serving/model/CosineSimilarity.scala index 5e9a172..b4af28b 100644 --- a/src/main/scala/serving/model/CosineSimilarity.scala +++ b/src/main/scala/serving/model/CosineSimilarity.scala @@ -7,12 +7,13 @@ import org.tensorflow.ndarray.Shape import org.tensorflow.ndarray.buffer.DataBuffers import org.tensorflow.op.Ops import org.tensorflow.op.core.{Constant, Placeholder} +import org.tensorflow.op.nn.TopK import org.tensorflow.types.TFloat32 import serving.config.ConfigManager import serving.tensor.TensorFlowProvider import serving.tensor.InputTensor - +import java.nio.{ByteBuffer, ByteOrder} import java.nio.file.Paths final case class Vec(vec: Seq[(String, Array[Float])]) @@ -37,21 +38,46 @@ object CosineSimilarity { def model(k: Int): Graph = { - val wArray: Array[Float] = dataVectorNumpyArray + //val wArray: Array[Float] = dataVectorNumpyArray + val wArray: Array[Float] = Array.fill(dim * wLength)(0.0f) assert(wArray.length == (dim * wLength), f"${wArray.length} != ${dim} * ${wLength} npy file does not match size and dimension and sample length. ") val graph = new Graph() val tf = Ops.create(graph) - val wTensor: Constant[TFloat32] = { - val fp32Buf = DataBuffers.ofFloats(wLength * dim).write(wArray, 0, wLength * dim) - tf.constant(Shape.of(1, dim, wLength), fp32Buf) + + val wTensor = TFloat32.tensorOf(Shape.of(1, dim, wLength)) + + val chunkSize = 100000 + var offset = 0 + + while (offset < wArray.length) { + val length = Math.min(chunkSize, wArray.length - offset) + println(length) + // 100만 개씩 데이터를 복사하여 바이트버퍼로 변환 + val byteBuffer = ByteBuffer.allocateDirect(length * 4).order(ByteOrder.nativeOrder()) + + for (i <- 0 until length) { + byteBuffer.putFloat(wArray(offset + i)) + } + + byteBuffer.flip() + + val byteArray = new Array[Byte](length * 4) + byteBuffer.get(byteArray) + + // wTensor에 기록 + wTensor.asRawTensor().data().write(byteArray) + + offset += length } + + val vTensor = tf.withName("input").placeholder(classOf[TFloat32], Placeholder.shape(Shape.of(-1, dim, 1))) - val mul = tf.math.mul(vTensor, wTensor) + val mul = tf.math.mul(vTensor, tf.constant(wTensor)) val cosineSimilarity = tf.reduceSum(mul, tf.array(1)) - val nnTopK = tf.withName("output").nn.topK(cosineSimilarity, tf.constant(k)) + val nnTopK = tf.withName("output").nn.topK(cosineSimilarity, tf.constant(k), Array.empty[TopK.Options]) graph } @@ -103,4 +129,8 @@ object CosineSimilarity { run(v, k = topK) } + def main(args: Array[String]): Unit = { + val a = run(Array(Array.fill(100)(0.0f)),k = 5) + a.foreach(x=>x.foreach(println)) +} } diff --git a/src/main/scala/serving/tensor/TensorFlowProvider.scala b/src/main/scala/serving/tensor/TensorFlowProvider.scala index 09c44a5..2ba68b2 100644 --- a/src/main/scala/serving/tensor/TensorFlowProvider.scala +++ b/src/main/scala/serving/tensor/TensorFlowProvider.scala @@ -27,7 +27,7 @@ class TensorFlowProvider(graph: Graph) extends AutoCloseable { // run. This takes most of the time val resultTensors: Map[(String, Int), Tensor] = (runner.run().asScala zip outputs) - .map(x => (x._2._1, x._2._2) -> x._1).toMap + .map(x => (x._2._1, x._2._2) -> x._1).map(x=>(x._1,x._2.getValue)).toMap //release inputTensors.foreach(_.tensor.close()) From 391952dcdf89a09b1daa9eebec8e2238a155bb86 Mon Sep 17 00:00:00 2001 From: go-noah Date: Sat, 12 Jul 2025 14:41:00 +0900 Subject: [PATCH 2/2] maximum vector size --- build.sbt | 2 +- .../serving/model/CosineSimilarity.scala | 31 ++++++++++--------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/build.sbt b/build.sbt index 29904c4..9032fa1 100644 --- a/build.sbt +++ b/build.sbt @@ -21,7 +21,7 @@ libraryDependencies ++= Seq( "com.typesafe.akka" %% "akka-http-caching" % AkkaHttpVersion, "com.typesafe.akka" %% "akka-http-spray-json" % AkkaHttpVersion, "ch.qos.logback" % "logback-classic" % LogbackVersion, - "org.tensorflow" % "tensorflow-core-platform" % "1.0.0" + "org.tensorflow" % "tensorflow-core-platform" % "1.1.0" ) assembly / mainClass := Some("serving.http.HttpServer") diff --git a/src/main/scala/serving/model/CosineSimilarity.scala b/src/main/scala/serving/model/CosineSimilarity.scala index b4af28b..df1ece7 100644 --- a/src/main/scala/serving/model/CosineSimilarity.scala +++ b/src/main/scala/serving/model/CosineSimilarity.scala @@ -39,40 +39,41 @@ object CosineSimilarity { def model(k: Int): Graph = { //val wArray: Array[Float] = dataVectorNumpyArray - val wArray: Array[Float] = Array.fill(dim * wLength)(0.0f) + //val wArray: Array[Float] = Array.fill(dim * wLength)(0.0f) - assert(wArray.length == (dim * wLength), f"${wArray.length} != ${dim} * ${wLength} npy file does not match size and dimension and sample length. ") + val wArrayLength:Long = dim.toLong * wLength.toLong + + //assert(wArray.length == (dim * wLength), f"${wArray.length} != ${dim} * ${wLength} npy file does not match size and dimension and sample length. ") val graph = new Graph() val tf = Ops.create(graph) val wTensor = TFloat32.tensorOf(Shape.of(1, dim, wLength)) - val chunkSize = 100000 - var offset = 0 - while (offset < wArray.length) { - val length = Math.min(chunkSize, wArray.length - offset) + val chunkSize:Long = 100000 + var offset:Long = 0 + + while (offset < wArrayLength) { + val length = Math.min(chunkSize, wArrayLength - offset) println(length) // 100만 개씩 데이터를 복사하여 바이트버퍼로 변환 - val byteBuffer = ByteBuffer.allocateDirect(length * 4).order(ByteOrder.nativeOrder()) - - for (i <- 0 until length) { - byteBuffer.putFloat(wArray(offset + i)) - } - byteBuffer.flip() - - val byteArray = new Array[Byte](length * 4) - byteBuffer.get(byteArray) + val byteBuffer = ByteBuffer.allocateDirect((length * 4).toInt).order(ByteOrder.nativeOrder()) + val byteArray = new Array[Byte]((length * 4).toInt) + byteBuffer.put(byteArray) // wTensor에 기록 wTensor.asRawTensor().data().write(byteArray) offset += length + + val totalCopied = (100000 * (offset / 100000)) + length + println(s"Total copied elements: $totalCopied, wArray length: ${wArrayLength}") } + val vTensor = tf.withName("input").placeholder(classOf[TFloat32], Placeholder.shape(Shape.of(-1, dim, 1))) val mul = tf.math.mul(vTensor, tf.constant(wTensor))