📝📈 Big improvements to benchmarks.

🔥 was able to delete some of my earlier custom benchmark code
nevans · Feb 1, 2021 · 46ca35e · 46ca35e
1 parent 4ac57dc
commit 46ca35e
Show file tree

Hide file tree

Showing 16 changed files with 1,104 additions and 916 deletions.
diff --git a/benchmarks/push_n.yml b/benchmarks/push_n.yml
@@ -3,33 +3,35 @@ prelude: |
   system("#{RbConfig.ruby} bin/rake compile", err: :out, exception: true)
   require "d_heap/benchmarks"
   include DHeap::Benchmarks
-  fill_random_vals
 
-  N = ENV.fetch("BENCH_N", 1000).to_i
+  # this is >4x faster than calling Kernel#rand (see random_vals.yml)
+  random_idx  = -1
+  random_len  = 1_000_000
+  random_vals = Array.new(random_len) { rand(0..10_000) }
+
+  N = ENV.fetch("BENCH_N", 100_000).to_i
 
 benchmark:
   - script: &script |
-       if __bmdv_i % N == 0
-         q.clear
-       end
-
-       q << random_val
+      q.clear if __bmdv_i % N == 0
+      random_idx = ((random_idx + 1) % random_len)
+      q << random_vals.fetch((random_idx + 1) % random_len)
     name:    "push N (findmin)"
-    prelude: "q = initq FindMin"
-    loop_count: 24000000
+    prelude: "q = FindMin.new"
+    loop_count: 30_000_000
   - script: *script
     name:    "push N (bsearch)"
-    prelude: "q = initq BSearch"
-    loop_count:  2300000
+    prelude: "q = BSearch.new"
+    loop_count:    600_000
   - script: *script
     name:    "push N (rb_heap)"
-    prelude: "q = initq RbHeap"
-    loop_count:  9800000
+    prelude: "q = RbHeap.new"
+    loop_count: 12_000_000
   - script: *script
     name:    "push N (c++ stl)"
-    prelude: "q = initq CppSTL"
-    loop_count: 18700000
+    prelude: "q = CppSTL.new"
+    loop_count: 18_000_000
   - script: *script
     name:    "push N (c_dheap)"
-    prelude: "q = initq DHeap"
-    loop_count: 25100000
+    prelude: "q = DHeap.new"
+    loop_count: 30_000_000
diff --git a/benchmarks/push_n_pop_n.yml b/benchmarks/push_n_pop_n.yml
@@ -3,50 +3,62 @@ prelude: |
   system("#{RbConfig.ruby} bin/rake compile", err: :out, exception: true)
   require "d_heap/benchmarks"
   include DHeap::Benchmarks
-  fill_random_vals
 
-  N = ENV.fetch("BENCH_N", 1000).to_i
+  # this is >4x faster than calling Kernel#rand (see random_vals.yml)
+  random_idx  = -1
+  random_len  = 1_000_000
+  random_vals = Array.new(random_len) { rand(0..10_000) }
+
+  N = ENV.fetch("BENCH_N", 100_000).to_i
   N2 = N * 2
 
   i = j = 0
 
+teardown: |
+  puts "teardown: size: %p, N: %p, i: %p, j: %p, __bmdv_i: %p" % [
+    q.size, N, i, j, __bmdv_i
+  ]
+
 benchmark:
   - script: &script |
       if i < N
         q.clear if __bmdv_i == 0
-        q << random_val
+        random_idx = ((random_idx + 1) % random_len)
+        q << random_vals.fetch((random_idx + 1) % random_len)
         i += 1
 
       elsif j < N
         q.pop
         j += 1
 
       elsif q.empty?
-        i = 1
         j = 0
-        q.clear
-        q << random_val
+        random_idx = ((random_idx + 1) % random_len)
+        q << random_vals.fetch((random_idx + 1) % random_len)
+        i = 1
 
       else
-        raise "q not empty!"
+        raise "q not empty! size: %p, N: %p, i: %p, j: %p, __bmdv_i: %p" % [
+          q.size, N, i, j, __bmdv_i
+        ]
       end
 
     name:    "push N + pop N (findmin)"
-    prelude: "q = initq FindMin"
-    loop_count:   200000
+    prelude: "q = FindMin.new"
+    loop_count:    300_000  # MAX:     3_000 * 2
   - script: *script
     name:    "push N + pop N (bsearch)"
-    prelude: "q = initq BSearch"
-    loop_count:  4000000
+    prelude: "q = BSearch.new"
+    loop_count:  1_200_000  # MAX:   300_000 * 2
   - script: *script
     name:    "push N + pop N (rb_heap)"
-    prelude: "q = initq RbHeap"
-    loop_count:  4000000
+    prelude: "q = RbHeap.new"
+    loop_count:  6_000_000  # MAX: 3_000_000 * 2
   - script: *script
     name:    "push N + pop N (c++ stl)"
-    prelude: "q = initq CppSTL"
-    loop_count: 16000000
+    prelude: "q = CppSTL.new"
+    loop_count: 18_000_000  # MAX: 3_000_000 * 2
   - script: *script
     name:    "push N + pop N (c_dheap)"
-    prelude: "q = initq DHeap"
-    loop_count: 16000000
+    prelude: "q = DHeap.new"
+    loop_count: 18_000_000  # MAX: 3_000_000 * 2
diff --git a/benchmarks/push_pop.yml b/benchmarks/push_pop.yml
@@ -3,30 +3,56 @@ prelude: |
   system("#{RbConfig.ruby} bin/rake compile", err: :out, exception: true)
   require "d_heap/benchmarks"
   include DHeap::Benchmarks
-  fill_random_vals
 
-  n = ENV.fetch("BENCH_N", 1000).to_i
+  # this is >4x faster than calling Kernel#rand (see random_vals.yml)
+  random_idx  = -1
+  random_len  = 1_000_000
+  random_vals = Array.new(random_len) { rand(0..10_000) }
+
+  N = ENV.fetch("BENCH_N", 10_000).to_i
 
 benchmark:
   - script: &script |
-       q << random_val
-       q.pop
+      random_idx = ((random_idx + 1) % random_len)
+      q << random_vals.fetch((random_idx + 1) % random_len)
+      q.pop
     name:    "push + pop (findmin)"
-    prelude: "q = FindMin.new(n) { random_val }"
-    loop_count:   250000
+    prelude: |
+      q = FindMin.new(N) {
+        random_idx = ((random_idx + 1) % random_len)
+        random_vals.fetch((random_idx + 1) % random_len)
+      }
+
   - script: *script
     name:    "push + pop (bsearch)"
-    prelude: "q = BSearch.new(n) { random_val }"
-    loop_count:  5000000
+    prelude: |
+      q = BSearch.new(N) {
+        random_idx = ((random_idx + 1) % random_len)
+        random_vals.fetch((random_idx + 1) % random_len)
+      }
+
   - script: *script
     name:    "push + pop (rb_heap)"
-    prelude: "q = RbHeap.new(n) { random_val }"
-    loop_count:  2000000
+    prelude: |
+      q = RbHeap.new(N) {
+        random_idx = ((random_idx + 1) % random_len)
+        random_vals.fetch((random_idx + 1) % random_len)
+      }
+
   - script: *script
-    name:    "push + pop (c++ stl)"
-    prelude: "q = initq CppSTL, n"
-    loop_count: 13000000
+    name: "push + pop (c++ stl)"
+    prelude: |
+      q = CppSTL.new
+      N.times do
+        random_idx = ((random_idx + 1) % random_len)
+        q << random_vals.fetch((random_idx + 1) % random_len)
+      end
+
   - script: *script
-    name:    "push + pop (c_dheap)"
-    prelude: "q = initq DHeap, n"
-    loop_count: 20000000
+    name: "push + pop (c_dheap)"
+    prelude: |
+      q = DHeap.new
+      N.times do
+        random_idx = ((random_idx + 1) % random_len)
+        q << random_vals.fetch((random_idx + 1) % random_len)
+      end
diff --git a/benchmarks/random_vals.yml b/benchmarks/random_vals.yml
@@ -0,0 +1,53 @@
+---
+prelude: "val = nil"
+teardown: "puts val" # ensure it is used and not somehow optimized away
+
+benchmark:
+
+  - name: "random_val"
+    prelude: |
+      require "d_heap/benchmarks"
+      include DHeap::Benchmarks
+      fill_random_vals
+    script: "random_val"
+
+  - name:   "inline random_vals.fetch"
+    prelude: |
+      random_idx  = -1
+      random_len  = 1_000_000
+      random_vals = Array.new(random_len) { rand(0..10_000) }
+    script: |
+      random_idx = ((random_idx + 1) % random_len)
+      val = random_vals.fetch((random_idx + 1) % random_len)
+
+  - name:   "inline random_vals[]"
+    prelude: |
+      random_idx  = -1
+      random_len  = 1_000_000
+      random_vals = Array.new(random_len) { rand(0..10_000) }
+    script: |
+      random_idx = ((random_idx + 1) % random_len)
+      val = random_vals.fetch((random_idx + 1) % random_len)
+
+  - name: "rand(range)"
+    prelude: "range = 0..10_000"
+    script: "val = rand(range)"
+
+  - name: "rand(0..10_000)"
+    script: "val = rand(0..10_000)"
+
+  # - name:   "just a method call"
+  #   prelude: |
+  #     def random_val; 1 end
+  #   script: |
+  #     val = random_val
+  # - name:   "just increment"
+  #   prelude: |
+  #     i = 0
+  #   script: |
+  #     i += 1
+  #     val = i
+  # - name:   "just assign a constant"
+  #   script: "val = 1"
+  # - name:   "noop"
+  #   script: "val"
diff --git a/bin/bench_charts b/bin/bench_charts
@@ -4,10 +4,19 @@ SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null; pwd -P)
 PROJECT_DIR=$(cd "$SCRIPT_DIR" > /dev/null; cd .. > /dev/null; pwd -P)
 cd "$PROJECT_DIR"
 
-opts=( --bundle -e 'N 100000;N 10000;N 1000;N 100;N 10' -o gruff )
+opts=( --bundle --repeat-count 4 -o gruff )
 
-for bm in push_n push_n_pop_n push_pop; do
-  bin/benchmark-driver "${opts[@]}" "benchmarks/${bm}.yml"
-  mv graph.png "images/${bm}.png"
-done
+ bm=push_n
+ n_values='N 300000;N 100000;N 30000;N 10000;N 3000;N 1000;N 300;N 100;N 30;N 10'
+ bin/benchmark-driver "${opts[@]}" -e "$n_values" "benchmarks/${bm}.yml"
+ mv graph.png "images/${bm}.png"
 
+ bm=push_n_pop_n
+ n_values='N 100000;N 30000;N 10000;N 3000;N 1000;N 300;N 100;N 30;N 10'
+ bin/benchmark-driver "${opts[@]}" -e "$n_values" "benchmarks/${bm}.yml"
+ mv graph.png "images/${bm}.png"
+
+bm=push_pop
+n_values='N 10_000_000;N 3_162_278;N 1_000_00;N 316_228;N 100_000;N 31_623;N 10_000;N 3162;N 1000;N 316;N 100;N 32;N 10'
+bin/benchmark-driver "${opts[@]}" -e "$n_values" "benchmarks/${bm}.yml"
+mv graph.png "images/${bm}.png"
diff --git a/bin/bench_n b/bin/bench_n
@@ -1,7 +1,47 @@
-#!/bin/sh
+#!/bin/bash
 set -eu
 
-export BENCH_N="$1"
-shift
+bin/rake clean compile > /dev/null 2>&1
 
-exec ruby "$@"
+ruby -r bundler/setup -r d_heap/benchmarks/benchmarker \
+  -e 'DHeap::Benchmarks.puts_version_info("Benchmarking")'
+
+function join_by { local IFS="$1"; shift; echo "$*"; }
+
+PUSH_POP_HEAP_SIZES=(
+  "N 1000000"
+  "N 10000000"
+  "N 3162278"
+  "N 316228"
+  "N 100000"
+  "N 31623"
+  "N 10000"
+  "N 3162"
+  "N 1000"
+  "N 316"
+  "N 100"
+  "N 32"
+  "N 10"
+)
+
+# run-duration is used for the *first* executable and used during warmup to
+# determine a loop count that will be identical for all runs.
+#
+# If we run the largest N first, we expect all later runs to be faster.  For
+# algorithms with high time complexity, the later runs will be so much faster
+# that they may not run enough iterations to get a good measurement.
+#
+# If we run the slowest N first, we expect all later runs to be slower.  For
+# algorithms with high time complexity, the later runs will be so much slower
+# that they may take hours to complete.
+#
+# I'm splitting the difference and calculate the loop count from 1/10 of max N.
+#
+# TODO: create a new benchmark_driver/runner class that targets a different loop
+# count for each executable, since they could have wildly divergent ips.
+
+bin/benchmark-driver --bundle \
+  -e "$(join_by ";" "${PUSH_POP_HEAP_SIZES[@]}")" \
+  --repeat-count 4 \
+  benchmarks/push_pop.yml \
+  "$@"