Add args to customize warmup_steps and steps for resnet50, resnet101, and inceptionv3 int8 (#192)

dmsuehir · Karthik Vadla · commit c095fc2c534c · 2019-02-25T08:48:24.000-08:00
diff --git a/benchmarks/common/tensorflow/start.sh b/benchmarks/common/tensorflow/start.sh
@@ -163,6 +163,22 @@ function install_protoc() {
 
 }
 
+function add_steps_args() {
+  # returns string with --steps and --warmup_steps, if there are values specified
+  local steps_arg=""
+  local warmup_steps_arg=""
+
+  if [ -n "${steps}" ]; then
+    steps_arg="--steps=${steps}"
+  fi
+
+  if [ -n "${warmup_steps}" ]; then
+    warmup_steps_arg="--warmup-steps=${warmup_steps}"
+  fi
+
+  echo "${steps_arg} ${warmup_steps_arg}"
+}
+
 # DCGAN model
 function dcgan() {
   if [ ${PRECISION} == "fp32" ]; then
@@ -258,7 +274,7 @@ function inceptionv3() {
       input_width_arg="--input-width=${input_width}"
     fi
 
-    CMD="${CMD} ${input_height_arg} ${input_width_arg}"
+    CMD="${CMD} ${input_height_arg} ${input_width_arg} $(add_steps_args)"
     PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model
 
   elif [ ${PRECISION} == "fp32" ]; then
@@ -365,6 +381,7 @@ function resnet101() {
     fi
 
     if [ ${PRECISION} == "int8" ]; then
+        CMD="${CMD} $(add_steps_args)"
         PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model
     elif [ ${PRECISION} == "fp32" ]; then
       CMD="${CMD} --in-graph=${IN_GRAPH} --data-location=${DATASET_LOCATION}"
@@ -385,6 +402,8 @@ function resnet50() {
           echo "No Data directory specified, accuracy will not be calculated."
           exit 1
         fi
+
+        CMD="${CMD} $(add_steps_args)"
         PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model
 
     elif [ ${PRECISION} == "fp32" ]; then
diff --git a/benchmarks/image_recognition/tensorflow/inceptionv3/README.md b/benchmarks/image_recognition/tensorflow/inceptionv3/README.md
@@ -107,6 +107,11 @@ python launch_benchmark.py \
     -- input_height=299 input_width=299
 ```
 
+When running performance benchmarking, it is optional to specify the
+number of `warmup_steps` and `steps` as extra args, as shown in the
+commands below. If these values are not specified, the script will
+default to use `warmup_steps=10` and `steps=50`.
+
 For latency (using `--benchmark-only`, `--socket-id 0` and `--batch-size 1`):
 
 ```
@@ -121,7 +126,7 @@ python launch_benchmark.py \
     --docker-image tf_int8_docker_image \
     --in-graph /home/myuser/inceptionv3_int8_pretrained_model.pb \
     --data-location /home/myuser/datasets/ImageNet_TFRecords \
-    -- input_height=299 input_width=299
+    -- input_height=299 input_width=299 warmup_steps=50 steps=500
 ```
 
 For throughput (using `--benchmark-only`, `--socket-id 0` and `--batch-size 128`):
@@ -138,7 +143,7 @@ python launch_benchmark.py \
     --docker-image tf_int8_docker_image \
     --in-graph /home/myuser/inceptionv3_int8_pretrained_model.pb \
     --data-location /home/myuser/datasets/ImageNet_TFRecords \
-    -- input_height=299 input_width=299
+    -- input_height=299 input_width=299 warmup_steps=50 steps=500
 ```
 
 Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands
@@ -162,35 +167,28 @@ Log location outside container: {--output-dir value}/benchmark_inceptionv3_infer
 
 Example log tail when benchmarking for latency:
 ```
-[Running warmup steps...]
-steps = 10, 56.8087550114 images/sec
-[Running benchmark steps...]
-steps = 10, 57.2046753318 images/sec
-steps = 20, 56.7181068289 images/sec
-steps = 30, 57.015714208 images/sec
-steps = 40, 57.4216088933 images/sec
-steps = 50, 57.491659242 images/sec
+...
+steps = 470, 53.7256017113 images/sec
+steps = 480, 52.5430812016 images/sec
+steps = 490, 52.9076139058 images/sec
+steps = 500, 53.5021876395 images/sec
 lscpu_path_cmd = command -v lscpu
 lscpu located here: /usr/bin/lscpu
 Ran inference with batch size 1
-Log location outside container: {--output-dir value}/benchmark_inceptionv3_inference_int8_20190104_185906.log
+Log location outside container: {--output-dir value}/benchmark_inceptionv3_inference_int8_20190223_194002.log
 ```
 
 Example log tail when benchmarking for throughput:
 ```
-[Running warmup steps...]
-steps = 10, 341.225945255 images/sec
-[Running benchmark steps...]
-steps = 10, 340.304326771 images/sec
-steps = 20, 339.108777134 images/sec
-steps = 30, 337.139199124 images/sec
-steps = 40, 341.177805273 images/sec
-steps = 50, 338.634144926 images/sec
+...
+steps = 470, 370.435654276 images/sec
+steps = 480, 369.710160177 images/sec
+steps = 490, 369.083388904 images/sec
+steps = 500, 370.287978128 images/sec
 lscpu_path_cmd = command -v lscpu
 lscpu located here: /usr/bin/lscpu
-Executing command: numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/int8/benchmark.py --input_height=299 --input_width=299 --warmup_steps=10 --num_intra_threads=56 --num_inter_threads=2 --batch_size=128 --input_graph=/in_graph/inceptionv3_int8_pretrained_model.pb --steps=50
 Ran inference with batch size 128
-Log location outside container: {--output-dir value}/benchmark_inceptionv3_inference_int8_20190104_014141.log
+Log location outside container: {--output-dir value}/benchmark_inceptionv3_inference_int8_20190223_194314.log
 ```
 
 ## FP32 Inference Instructions
diff --git a/benchmarks/image_recognition/tensorflow/resnet101/README.md b/benchmarks/image_recognition/tensorflow/resnet101/README.md
@@ -91,6 +91,11 @@ $ python launch_benchmark.py \
     --in-graph=/home/myuser/resnet101_int8_pretrained_model.pb
 ```
 
+When running performance benchmarking, it is optional to specify the
+number of `warmup_steps` and `steps` as extra args, as shown in the
+commands below. If these values are not specified, the script will
+default to use `warmup_steps=40` and `steps=100`.
+
 For latency (using `--benchmark-only`, `--socket-id 0` and `--batch-size 1`):
 
 ```
@@ -103,7 +108,8 @@ python launch_benchmark.py \
     --batch-size 1 \
     --socket-id 0 \
     --docker-image tf_int8_docker_image \
-    --in-graph=/home/myuser/resnet101_int8_pretrained_model.pb
+    --in-graph=/home/myuser/resnet101_int8_pretrained_model.pb \
+    -- warmup_steps=50 steps=500
 ```
 
 For throughput (using `--benchmark-only`, `--socket-id 0` and `--batch-size 128`):
@@ -118,7 +124,8 @@ python launch_benchmark.py \
     --batch-size 128 \
     --socket-id 0 \
     --docker-image tf_int8_docker_image \
-    --in-graph=/home/myuser/resnet101_int8_pretrained_model.pb
+    --in-graph=/home/myuser/resnet101_int8_pretrained_model.pb \
+    -- warmup_steps=50 steps=500
 ```
 
 Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands
@@ -142,37 +149,28 @@ Log location outside container: {--output-dir value}/benchmark_resnet101_inferen
 
 Example log tail when benchmarking for latency:
 ```
-[Running warmup steps...]
-steps = 10, 53.3022912987 images/sec
-steps = 20, 54.8999856019 images/sec
-steps = 30, 54.5288420286 images/sec
-steps = 40, 54.3775556506 images/sec
-[Running benchmark steps...]
-steps = 10, 537.185143822 images/sec
-steps = 20, 268.75073286 images/sec
-steps = 30, 179.033434653 images/sec
-steps = 40, 134.356211634 images/sec
-steps = 50, 107.403547389 images/sec
-steps = 60, 89.3812766404 images/sec
-steps = 70, 76.565932747 images/sec
-steps = 80, 67.0330362294 images/sec
-steps = 90, 59.6184242546 images/sec
-steps = 100, 53.6588898046 images/sec
+...
+steps = 470, 48.3195530058 images/sec
+steps = 480, 47.2792312364 images/sec
+steps = 490, 46.3175214744 images/sec
+steps = 500, 45.4044245083 images/sec
 lscpu_path_cmd = command -v lscpu
 lscpu located here: /usr/bin/lscpu
 Ran inference with batch size 1
-Log location outside container: {--output-dir value}/benchmark_resnet101_inference.log
+Log location outside container: {--output-dir value}/benchmark_resnet101_inference_int8_20190223_191406.log
 ```
 
 Example log tail when benchmarking for throughput:
 ```
-steps = 80, 328.404413955 images/sec
-steps = 90, 291.945088967 images/sec
-steps = 100, 262.656894016 images/sec
+...
+steps = 470, 328.906266308 images/sec
+steps = 480, 322.0451309 images/sec
+steps = 490, 315.455582114 images/sec
+steps = 500, 309.142758646 images/sec
 lscpu_path_cmd = command -v lscpu
 lscpu located here: /usr/bin/lscpu
 Ran inference with batch size 128
-Log location outside container: {--output-dir value}/benchmark_resnet101_inference_int8_20190104_211412.log
+Log location outside container: {--output-dir value}/benchmark_resnet101_inference_int8_20190223_192438.log
 ```
 
 ## FP32 Inference Instructions
diff --git a/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/model_init.py b/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/model_init.py
@@ -55,10 +55,10 @@ def parse_args(self):
                                 help="input width")
             parser.add_argument('--warmup-steps', dest='warmup_steps',
                                 help='number of warmup steps', type=int,
-                                default=10)
+                                default=40)
             parser.add_argument('--steps', dest='steps',
                                 help='number of steps', type=int,
-                                default=200)
+                                default=100)
             parser.add_argument('--input-layer', dest='input_layer',
                                 help='name of input layer', type=str,
                                 default=None)
diff --git a/benchmarks/image_recognition/tensorflow/resnet50/README.md b/benchmarks/image_recognition/tensorflow/resnet50/README.md
@@ -79,7 +79,10 @@ Log location outside container: {--output-dir value}/benchmark_resnet50_inferenc
 * Evaluate the model performance: The ImageNet dataset is not needed in this case:
 Calculate the model throughput `images/sec`, the required parameters to run the inference script would include:
 the pre-trained `resnet50_int8_pretrained_model.pb` input graph file (from step
-2, the docker image (from step 3) and the `--benchmark-only` flag.
+2, the docker image (from step 3) and the `--benchmark-only` flag. It is
+optional to specify the number of `warmup_steps` and `steps` as extra
+args, as shown in the command below. If these values are not specified,
+the script will default to use `warmup_steps=10` and `steps=50`.
 
 ```
 $ cd /home/myuser/models/benchmarks
@@ -92,23 +95,21 @@ $ python launch_benchmark.py \
     --mode inference \
     --batch-size=128 \
     --benchmark-only \
-    --docker-image docker_image
+    --docker-image docker_image \
+    -- warmup_steps=50 steps=500
 ```
 The tail of the log output when the benchmarking completes should look
 something like this:
 ```
-[Running warmup steps...]
-steps = 10, 460.862674539 images/sec
-[Running benchmark steps...]
-steps = 10, 461.002369109 images/sec
-steps = 20, 460.082656541 images/sec
-steps = 30, 464.707827579 images/sec
-steps = 40, 463.187506632 images/sec
-steps = 50, 462.725212176 images/sec
+...
+steps = 470, 460.113806562 images/sec
+steps = 480, 460.073982602 images/sec
+steps = 490, 463.289831148 images/sec
+steps = 500, 463.521427264 images/sec
 lscpu_path_cmd = command -v lscpu
 lscpu located here: /usr/bin/lscpu
 Ran inference with batch size 128
-Log location outside container: {--output-dir value}/benchmark_resnet50_inference_int8_20190104_213139.log
+Log location outside container: {--output-dir value}/benchmark_resnet50_inference_int8_20190223_180546.log
 ```
 
 Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands
diff --git a/tests/unit/common/tensorflow/tf_model_args.txt b/tests/unit/common/tensorflow/tf_model_args.txt