diff --git a/.gitignore b/.gitignore index c58d83b..d876fc4 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,11 @@ lib_managed/ src_managed/ project/boot/ project/plugins/project/ +.bsp/ +project/.boot/ +project/.ivy/ +project/.sbtboot/ +.idea/ # Scala-IDE specific .scala_dependencies diff --git a/build.sbt b/build.sbt index 24507d3..5b1af64 100644 --- a/build.sbt +++ b/build.sbt @@ -20,9 +20,9 @@ limitations under the License. // publish isarn-sketches-java for exactly one scala version: // sbt isarn_sketches_java/publish -scalaVersion := "2.12.8" +scalaVersion := "2.12.14" -crossScalaVersions := Seq("2.11.12", "2.12.8") +crossScalaVersions := Seq("2.11.12", "2.12.14") // these do not "inherit" when defined at top level, so // define them here for inclusion in each subproject. @@ -95,7 +95,7 @@ previewFixedPort := Some(4444) lazy val isarn_sketches_java = (project in file("isarn-sketches-java")) .settings(name := "isarn-sketches-java") - .enablePlugins(GenJavadocPlugin, PublishJavadocPlugin) + //.enablePlugins(GenJavadocPlugin, PublishJavadocPlugin) .settings(siteSubProjectSettings :_*) .settings( crossPaths := false, // drop off Scala suffix from artifact names @@ -114,7 +114,7 @@ lazy val isarn_sketches = (project in file(".")) "org.isarnproject" %% "isarn-algebra-api" % "0.0.3", "org.isarnproject" %% "isarn-collections" % "0.0.4", "org.isarnproject" %% "isarn-scalatest" % "0.0.3" % Test, - "org.scalatest" %% "scalatest" % "3.0.5" % Test, + "org.scalatest" %% "scalatest" % "3.2.5" % Test, "org.apache.commons" % "commons-math3" % "3.6.1" % Test) ) .settings(publishSettings :_*) diff --git a/project/build.properties b/project/build.properties index 654fe70..9edb75b 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.3.12 +sbt.version=1.5.4 diff --git a/project/plugins.sbt b/project/plugins.sbt index 4f35abb..84fd37f 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,11 +1,10 @@ -resolvers += Resolver.url( - "bintray-sbt-plugin-releases", - url("http://dl.bintray.com/content/sbt/sbt-plugin-releases"))( - Resolver.ivyStylePatterns) - -resolvers += "sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases/" - -resolvers += "jgit-repo" at "http://download.eclipse.org/jgit/maven" +resolvers ++= Seq( + "jgit-repo".at("https://download.eclipse.org/jgit/maven"), + //"sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases/", + //Resolver.url("bintray-sbt-plugin-releases", url("https://dl.bintray.com/content/sbt/sbt-plugin-releases"))( + // Resolver.ivyStylePatterns + //) +) addSbtPlugin("com.typesafe.sbt" % "sbt-ghpages" % "0.6.3") diff --git a/src/test/scala/org/isarnproject/sketches/TDigestTest.scala b/src/test/scala/org/isarnproject/sketches/TDigestTest.scala index 0aba8aa..4ee6e91 100644 --- a/src/test/scala/org/isarnproject/sketches/TDigestTest.scala +++ b/src/test/scala/org/isarnproject/sketches/TDigestTest.scala @@ -16,14 +16,17 @@ limitations under the License. package org.isarnproject.sketches -import org.scalatest._ - import org.isarnproject.scalatest.matchers.seq._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AsyncWordSpec + + +class TDigestTest extends AsyncWordSpec with Matchers { -class TDigestTest extends FlatSpec with Matchers { import org.apache.commons.math3.distribution.RealDistribution import org.apache.commons.math3.distribution.IntegerDistribution + val seed = 235711L scala.util.Random.setSeed(seed) @@ -41,7 +44,7 @@ class TDigestTest extends FlatSpec with Matchers { .map(x => math.abs(td.cdf(x) - dist.cumulativeProbability(x))).max val dInv = (0.01 to 0.99 by 0.01).iterator - .map(x => math.abs(td.cdfInverse(x) - dist.inverseCumulativeProbability(x))).max / stdv + .map(x => math.abs(td.cdfInverse(x) - dist.inverseCumulativeProbability(x))).max / stdv val pass = d <= maxD && dInv <= maxDI if (!pass) Console.err.println(s"testTDvsDist failure: d= $d dInv= $dInv") @@ -59,7 +62,7 @@ class TDigestTest extends FlatSpec with Matchers { } def testSamplingPMF(td: TDigest, dist: IntegerDistribution): Boolean = { - td.nclusters should be <=(td.maxDiscrete) + td.nclusters should be <= (td.maxDiscrete) val tdSamples = Array.fill(10000) { td.samplePMF } val distSamples = Array.fill(10000) { dist.sample.toDouble } val kst = new org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest() @@ -103,91 +106,94 @@ class TDigestTest extends FlatSpec with Matchers { testMonotoneCDF(dist) && testMonotoneCDFI(dist) } - it should "sketch a uniform distribution" in { - import org.apache.commons.math3.distribution.UniformRealDistribution - val dist = new UniformRealDistribution() - testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be (true) - } + it should { - it should "sketch a normal distribution" in { - import org.apache.commons.math3.distribution.NormalDistribution - val dist = new NormalDistribution() - testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be (true) - } + "sketch a uniform distribution" in { + import org.apache.commons.math3.distribution.UniformRealDistribution + val dist = new UniformRealDistribution() + testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be(true) + } - it should "sketch an exponential distribution" in { - import org.apache.commons.math3.distribution.ExponentialDistribution - val dist = new ExponentialDistribution(1.0) - testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be (true) - } + "sketch a normal distribution" in { + import org.apache.commons.math3.distribution.NormalDistribution + val dist = new NormalDistribution() + testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be(true) + } - it should "aggregate with another t-digest using ++" in { - import org.apache.commons.math3.distribution.NormalDistribution - val dist = new NormalDistribution() - dist.reseedRandomGenerator(seed) + "sketch an exponential distribution" in { + import org.apache.commons.math3.distribution.ExponentialDistribution + val dist = new ExponentialDistribution(1.0) + testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be(true) + } + + "aggregate with another t-digest using ++" in { + import org.apache.commons.math3.distribution.NormalDistribution + val dist = new NormalDistribution() + dist.reseedRandomGenerator(seed) val td1 = TDigest.sketch(Iterator.fill(ss) { dist.sample }, delta = delta) val td2 = TDigest.sketch(Iterator.fill(ss) { dist.sample }, delta = delta) - testTDvsDist(td1 ++ td2, dist, math.sqrt(dist.getNumericalVariance())) should be (true) - } - - it should "respect monotonic cdf and inverse" in { - import org.apache.commons.math3.distribution.ExponentialDistribution - import org.apache.commons.math3.distribution.NormalDistribution - import org.apache.commons.math3.distribution.UniformRealDistribution - - testMonotone(new UniformRealDistribution()) should be (true) - testMonotone(new ExponentialDistribution(1.0)) should be (true) - testMonotone(new NormalDistribution(0.0, 0.1)) should be (true) - } - - it should "respect maxDiscrete parameter" in { - import org.apache.commons.math3.distribution.GeometricDistribution - val gd = new GeometricDistribution(0.33) - val data = gd.sample(1000000) - val dataUniq = data.distinct.sorted - val kt = dataUniq.map(_.toDouble).toSet - val td = TDigest.sketch(data, maxDiscrete = 50) - val clust = td.clusters - clust.keys.toSet should be (kt) - val D = clust.keys.map { x => td.cdfDiscrete(x) } - .zip(dataUniq.map { k => gd.cumulativeProbability(k) }) - .map { case (p1, p2) => math.abs(p1 - p2) } - .max - (D <= 0.01) should be (true) - testSamplingPMF(td, gd) should be (true) - } - - it should "respect maxDiscrete parameter over ++" in { - import org.apache.commons.math3.distribution.GeometricDistribution - val gd = new GeometricDistribution(0.33) + testTDvsDist(td1 ++ td2, dist, math.sqrt(dist.getNumericalVariance())) should be(true) + } + + "respect monotonic cdf and inverse" in { + import org.apache.commons.math3.distribution.ExponentialDistribution + import org.apache.commons.math3.distribution.NormalDistribution + import org.apache.commons.math3.distribution.UniformRealDistribution + + testMonotone(new UniformRealDistribution()) should be(true) + testMonotone(new ExponentialDistribution(1.0)) should be(true) + testMonotone(new NormalDistribution(0.0, 0.1)) should be(true) + } + + "respect maxDiscrete parameter" in { + import org.apache.commons.math3.distribution.GeometricDistribution + val gd = new GeometricDistribution(0.33) + val data = gd.sample(1000000) + val dataUniq = data.distinct.sorted + val kt = dataUniq.map(_.toDouble).toSet + val td = TDigest.sketch(data, maxDiscrete = 50) + val clust = td.clusters + clust.keys.toSet should be(kt) + val D = clust.keys.map { x => td.cdfDiscrete(x) } + .zip(dataUniq.map { k => gd.cumulativeProbability(k) }) + .map { case (p1, p2) => math.abs(p1 - p2) } + .max + (D <= 0.01) should be(true) + testSamplingPMF(td, gd) should be(true) + } + + "respect maxDiscrete parameter over ++" in { + import org.apache.commons.math3.distribution.GeometricDistribution + val gd = new GeometricDistribution(0.33) val tdvec = Vector.fill(10) { TDigest.sketch(gd.sample(100000), maxDiscrete = 50) } - val td = tdvec.reduce(_ ++ _) - val clust = td.clusters - clust.keys.map(_.toInt).map(_.toDouble) should beEqSeq(clust.keys) - val D = clust.keys.map { x => td.cdfDiscrete(x) } - .zip(clust.keys.map(_.toInt).map { k => gd.cumulativeProbability(k) }) - .map { case (p1, p2) => math.abs(p1 - p2) } - .max - (D <= 0.01) should be (true) - testSamplingPMF(td, gd) should be (true) - } + val td = tdvec.reduce(_ ++ _) + val clust = td.clusters + clust.keys.map(_.toInt).map(_.toDouble) should beEqSeq(clust.keys) + val D = clust.keys.map { x => td.cdfDiscrete(x) } + .zip(clust.keys.map(_.toInt).map { k => gd.cumulativeProbability(k) }) + .map { case (p1, p2) => math.abs(p1 - p2) } + .max + (D <= 0.01) should be(true) + testSamplingPMF(td, gd) should be(true) + } - it should "serialize and deserialize" in { - import org.apache.commons.math3.distribution.NormalDistribution + "serialize and deserialize" in { + import org.apache.commons.math3.distribution.NormalDistribution - import org.isarnproject.scalatest.serde.roundTripSerDe + import org.isarnproject.scalatest.serde.roundTripSerDe - val dist = new NormalDistribution() - dist.reseedRandomGenerator(seed) + val dist = new NormalDistribution() + dist.reseedRandomGenerator(seed) val tdo = TDigest.sketch(Iterator.fill(ss) { dist.sample }, delta = delta) - val tdi = roundTripSerDe(tdo) + val tdi = roundTripSerDe(tdo) - (tdi == tdo) should be (true) + (tdi == tdo) should be(true) - testTDvsDist(tdi, dist, math.sqrt(dist.getNumericalVariance())) should be (true) + testTDvsDist(tdi, dist, math.sqrt(dist.getNumericalVariance())) should be(true) + } } } diff --git a/src/test/scala/org/isarnproject/sketches/java/JavaTDigestTest.scala b/src/test/scala/org/isarnproject/sketches/java/JavaTDigestTest.scala index ea80b6e..9d5f320 100644 --- a/src/test/scala/org/isarnproject/sketches/java/JavaTDigestTest.scala +++ b/src/test/scala/org/isarnproject/sketches/java/JavaTDigestTest.scala @@ -16,14 +16,17 @@ limitations under the License. package org.isarnproject.sketches.java -import org.scalatest._ - import org.isarnproject.scalatest.matchers.seq._ +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AsyncWordSpec + + +class JavaTDigestTest extends AsyncWordSpec with Matchers { -class JavaTDigestTest extends FlatSpec with Matchers { import org.apache.commons.math3.distribution.RealDistribution import org.apache.commons.math3.distribution.IntegerDistribution + val seed = 235711L scala.util.Random.setSeed(seed) @@ -41,7 +44,7 @@ class JavaTDigestTest extends FlatSpec with Matchers { .map(x => math.abs(td.cdf(x) - dist.cumulativeProbability(x))).max val dInv = (0.01 to 0.99 by 0.01).iterator - .map(x => math.abs(td.cdfInverse(x) - dist.inverseCumulativeProbability(x))).max / stdv + .map(x => math.abs(td.cdfInverse(x) - dist.inverseCumulativeProbability(x))).max / stdv val pass = d <= maxD && dInv <= maxDI if (!pass) Console.err.println(s"testTDvsDist failure: d= $d dInv= $dInv") @@ -59,7 +62,7 @@ class JavaTDigestTest extends FlatSpec with Matchers { } def testSamplingPMF(td: TDigest, dist: IntegerDistribution): Boolean = { - td.nclusters should be <=(td.maxDiscrete) + td.nclusters should be <= (td.maxDiscrete) val tdSamples = Array.fill(10000) { td.samplePMF } val distSamples = Array.fill(10000) { dist.sample.toDouble } val kst = new org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest() @@ -103,168 +106,170 @@ class JavaTDigestTest extends FlatSpec with Matchers { testMonotoneCDF(dist) && testMonotoneCDFI(dist) } - it should "sketch a uniform distribution" in { - import org.apache.commons.math3.distribution.UniformRealDistribution - val dist = new UniformRealDistribution() - testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be (true) - } + it should { + "sketch a uniform distribution" in { + import org.apache.commons.math3.distribution.UniformRealDistribution + val dist = new UniformRealDistribution() + testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be(true) + } - it should "sketch a normal distribution" in { - import org.apache.commons.math3.distribution.NormalDistribution - val dist = new NormalDistribution() - testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be (true) - } + "sketch a normal distribution" in { + import org.apache.commons.math3.distribution.NormalDistribution + val dist = new NormalDistribution() + testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be(true) + } - it should "sketch an exponential distribution" in { - import org.apache.commons.math3.distribution.ExponentialDistribution - val dist = new ExponentialDistribution(1.0) - testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be (true) - } + "sketch an exponential distribution" in { + import org.apache.commons.math3.distribution.ExponentialDistribution + val dist = new ExponentialDistribution(1.0) + testDistribution(dist, math.sqrt(dist.getNumericalVariance())) should be(true) + } - it should "aggregate with another t-digest using merge method" in { - import org.apache.commons.math3.distribution.NormalDistribution - val dist = new NormalDistribution() - dist.reseedRandomGenerator(seed) + "aggregate with another t-digest using merge method" in { + import org.apache.commons.math3.distribution.NormalDistribution + val dist = new NormalDistribution() + dist.reseedRandomGenerator(seed) val td1 = TDigest.sketch(Array.fill(ss) { dist.sample }, delta) val td2 = TDigest.sketch(Array.fill(ss) { dist.sample }, delta) - testTDvsDist(TDigest.merge(td1, td2), dist, math.sqrt(dist.getNumericalVariance())) should be (true) - } + testTDvsDist(TDigest.merge(td1, td2), dist, math.sqrt(dist.getNumericalVariance())) should be(true) + } - it should "respect monotonic cdf and inverse" in { - import org.apache.commons.math3.distribution.ExponentialDistribution - import org.apache.commons.math3.distribution.NormalDistribution - import org.apache.commons.math3.distribution.UniformRealDistribution + "respect monotonic cdf and inverse" in { + import org.apache.commons.math3.distribution.ExponentialDistribution + import org.apache.commons.math3.distribution.NormalDistribution + import org.apache.commons.math3.distribution.UniformRealDistribution - testMonotone(new UniformRealDistribution()) should be (true) - testMonotone(new ExponentialDistribution(1.0)) should be (true) - testMonotone(new NormalDistribution(0.0, 0.1)) should be (true) - } + testMonotone(new UniformRealDistribution()) should be(true) + testMonotone(new ExponentialDistribution(1.0)) should be(true) + testMonotone(new NormalDistribution(0.0, 0.1)) should be(true) + } - it should "respect maxDiscrete parameter" in { - import org.apache.commons.math3.distribution.GeometricDistribution - val gd = new GeometricDistribution(0.33) - val data = gd.sample(1000000).map(_.toDouble) - val dataUniq = data.distinct.sorted - val kt = dataUniq.map(_.toDouble).toSet - val td = TDigest.sketch(data, delta, 50) - val clust = td.cent - clust.toSet should be (kt) - val D = clust.map { x => td.cdfDiscrete(x) } - .zip(dataUniq.map { k => gd.cumulativeProbability(k.toInt) }) - .map { case (p1, p2) => math.abs(p1 - p2) } - .max - (D <= 0.01) should be (true) - testSamplingPMF(td, gd) should be (true) - } + "respect maxDiscrete parameter" in { + import org.apache.commons.math3.distribution.GeometricDistribution + val gd = new GeometricDistribution(0.33) + val data = gd.sample(1000000).map(_.toDouble) + val dataUniq = data.distinct.sorted + val kt = dataUniq.map(_.toDouble).toSet + val td = TDigest.sketch(data, delta, 50) + val clust = td.cent + clust.toSet should be(kt) + val D = clust.map { x => td.cdfDiscrete(x) } + .zip(dataUniq.map { k => gd.cumulativeProbability(k.toInt) }) + .map { case (p1, p2) => math.abs(p1 - p2) } + .max + (D <= 0.01) should be(true) + testSamplingPMF(td, gd) should be(true) + } - it should "respect maxDiscrete parameter over merge" in { - import org.apache.commons.math3.distribution.GeometricDistribution - val gd = new GeometricDistribution(0.33) + "respect maxDiscrete parameter over merge" in { + import org.apache.commons.math3.distribution.GeometricDistribution + val gd = new GeometricDistribution(0.33) val tdvec = Vector.fill(10) { TDigest.sketch(gd.sample(100000).map(_.toDouble), delta, 50) } - val td = tdvec.reduce((a, b) => TDigest.merge(a, b)) - val clust = td.cent - clust.map(_.toInt).map(_.toDouble).toVector should beEqSeq(clust.toVector) - val D = clust.map { x => td.cdfDiscrete(x) } - .zip(clust.map(_.toInt).map { k => gd.cumulativeProbability(k) }) - .map { case (p1, p2) => math.abs(p1 - p2) } - .max - (D <= 0.01) should be (true) - testSamplingPMF(td, gd) should be (true) - } + val td = tdvec.reduce((a, b) => TDigest.merge(a, b)) + val clust = td.cent + clust.map(_.toInt).map(_.toDouble).toVector should beEqSeq(clust.toVector) + val D = clust.map { x => td.cdfDiscrete(x) } + .zip(clust.map(_.toInt).map { k => gd.cumulativeProbability(k) }) + .map { case (p1, p2) => math.abs(p1 - p2) } + .max + (D <= 0.01) should be(true) + testSamplingPMF(td, gd) should be(true) + } - it should "support copy constructor" in { - import org.apache.commons.math3.distribution.NormalDistribution + "support copy constructor" in { + import org.apache.commons.math3.distribution.NormalDistribution - val dist = new NormalDistribution() - dist.reseedRandomGenerator(seed) + val dist = new NormalDistribution() + dist.reseedRandomGenerator(seed) val data = Array.fill(ss) { dist.sample } - val td1 = TDigest.sketch(data, delta) - val td2 = new TDigest(td1) - (td2.equals(td1)) should be (true) - (td1.equals(td2)) should be (true) - - // add more data and re-check equality to ensure - // that all state for future updates was correctly copied - for { x <- data } { - td1.update(x) - td2.update(x) + val td1 = TDigest.sketch(data, delta) + val td2 = new TDigest(td1) + (td2.equals(td1)) should be(true) + (td1.equals(td2)) should be(true) + + // add more data and re-check equality to ensure + // that all state for future updates was correctly copied + for {x <- data} { + td1.update(x) + td2.update(x) + } + (td2.equals(td1)) should be(true) + (td1.equals(td2)) should be(true) } - (td2.equals(td1)) should be (true) - (td1.equals(td2)) should be (true) - } - def testTDClose(td1: TDigest, td2: TDigest, eps: Double = 1e-6): Unit = { - td1.getCompression() should be (td2.getCompression()) - td1.getMaxDiscrete() should be (td2.getMaxDiscrete()) - td1.size() should be (td2.size()) - td1.mass() should be (td2.mass() +- eps) - for { j <- 0 until td1.size() } { - td1.getCentUnsafe()(j) should be (td2.getCentUnsafe()(j) +- eps) - td1.getMassUnsafe()(j) should be (td2.getMassUnsafe()(j) +- eps) - td1.getFTUnsafe()(1 + j) should be (td2.getFTUnsafe()(1 + j) +- eps) + def testTDClose(td1: TDigest, td2: TDigest, eps: Double = 1e-6): Unit = { + td1.getCompression() should be(td2.getCompression()) + td1.getMaxDiscrete() should be(td2.getMaxDiscrete()) + td1.size() should be(td2.size()) + td1.mass() should be(td2.mass() +- eps) + for {j <- 0 until td1.size()} { + td1.getCentUnsafe()(j) should be(td2.getCentUnsafe()(j) +- eps) + td1.getMassUnsafe()(j) should be(td2.getMassUnsafe()(j) +- eps) + td1.getFTUnsafe()(1 + j) should be(td2.getFTUnsafe()(1 + j) +- eps) + } } - } - it should "support dser constructor" in { - import java.util.Arrays; - import org.apache.commons.math3.distribution.NormalDistribution + "support dser constructor" in { + import java.util.Arrays; + import org.apache.commons.math3.distribution.NormalDistribution - val eps = 1e-9 + val eps = 1e-9 - val dist = new NormalDistribution() - dist.reseedRandomGenerator(seed) + val dist = new NormalDistribution() + dist.reseedRandomGenerator(seed) val data = Array.fill(ss) { dist.sample } - // test constructing empty t-digests - val td1 = new TDigest(0.5, 0, Array.empty[Double], Array.empty[Double]) - val td2 = new TDigest( - td1.getCompression(), - td1.getMaxDiscrete(), - Arrays.copyOf(td1.getCentUnsafe(), td1.size()), - Arrays.copyOf(td1.getMassUnsafe(), td1.size()) - ) - testTDClose(td1, td2, eps) - - // test sketching from empty state - for { x <- data } { - td1.update(x) - td2.update(x) - } - testTDClose(td1, td2, eps) - - // copy from non-empty state - val td3 = new TDigest( - td1.getCompression(), - td1.getMaxDiscrete(), - Arrays.copyOf(td1.getCentUnsafe(), td1.size()), - Arrays.copyOf(td1.getMassUnsafe(), td1.size()) - ) - testTDClose(td1, td3, eps) - - // test from non-empty state - for { x <- data } { - td1.update(x) - td3.update(x) + // test constructing empty t-digests + val td1 = new TDigest(0.5, 0, Array.empty[Double], Array.empty[Double]) + val td2 = new TDigest( + td1.getCompression(), + td1.getMaxDiscrete(), + Arrays.copyOf(td1.getCentUnsafe(), td1.size()), + Arrays.copyOf(td1.getMassUnsafe(), td1.size()) + ) + testTDClose(td1, td2, eps) + + // test sketching from empty state + for {x <- data} { + td1.update(x) + td2.update(x) + } + testTDClose(td1, td2, eps) + + // copy from non-empty state + val td3 = new TDigest( + td1.getCompression(), + td1.getMaxDiscrete(), + Arrays.copyOf(td1.getCentUnsafe(), td1.size()), + Arrays.copyOf(td1.getMassUnsafe(), td1.size()) + ) + testTDClose(td1, td3, eps) + + // test from non-empty state + for {x <- data} { + td1.update(x) + td3.update(x) + } + testTDClose(td1, td3, eps) } - testTDClose(td1, td3, eps) - } - it should "serialize and deserialize" in { - import org.apache.commons.math3.distribution.NormalDistribution + "serialize and deserialize" in { + import org.apache.commons.math3.distribution.NormalDistribution - import org.isarnproject.scalatest.serde.roundTripSerDe + import org.isarnproject.scalatest.serde.roundTripSerDe - val dist = new NormalDistribution() - dist.reseedRandomGenerator(seed) + val dist = new NormalDistribution() + dist.reseedRandomGenerator(seed) val tdo = TDigest.sketch(Array.fill(ss) { dist.sample }, delta) - val tdi = roundTripSerDe(tdo) + val tdi = roundTripSerDe(tdo) - (tdi.equals(tdo)) should be (true) + (tdi.equals(tdo)) should be(true) - testTDvsDist(tdi, dist, math.sqrt(dist.getNumericalVariance())) should be (true) + testTDvsDist(tdi, dist, math.sqrt(dist.getNumericalVariance())) should be(true) + } } }