[quantization] Add calculation of gzip compression ratio (#289)

caisq · web-flow · commit 75d21fa285cf · 2019-07-08T09:59:57.000-04:00
* Upgrade to tfjs-node(-gpu) 1.2.3; Improve plotting script
* [quantization] Add calculation of gzip compression ratio
* Improve script printout and README.md
diff --git a/quantization/README.md b/quantization/README.md
@@ -41,18 +41,46 @@ quantizing the weights to 8 bits leads to a significant deterioration in
 accuracy, as measured by the top-1 and top-5 accuracies. See example results
 in the table below:
 
-| Dataset and Mdoel      | Original (no-quantization) | 16-bit quantization | 8-bit quantization |
+| Dataset and Model      | Original (no-quantization) | 16-bit quantization | 8-bit quantization |
 | ---------------------- | -------------------------- | ------------------- | ------------------ |
 | housing: multi-layer regressor  |  MAE=0.311984     | MAE=0.311983        | MAE=0.312780       |
 | MNIST: convnet         | accuracy=0.9952            | accuracy=0.9952     | accuracy=0.9952    |
 | Fashion MNIST: convnet | accuracy=0.922             | accuracy=0.922      | accuracy=0.9211    |
 | MobileNetV2            | top-1 accuracy=0.618; top-5 accuracy=0.788 | top-1 accuracy=0.624; top-5 accuracy=0.789 | top-1 accuracy=0.280; top-5 accuracy=0.490 |
 
-MAE Stands for mean absolute error.
+MAE Stands for mean absolute error (lower is better).
 
 They demonstrate different effects of the same quantization technique
 on different problems.
 
+### Effect of quantization on gzip compression ratio
+
+An additional factor affecting the over-the-wire size of models
+under quantization is the gzip ratio. This factor should be taken into
+account because gzip is widely used to transmit large files over the
+web.
+
+Most non-quantized models (i.e.,
+models with 32-bit float weights) are not very compressible, due to
+the noise-like variation in their weight parameters, which contain
+few repeating patterns. The same is true for models with weights
+quantized at the 16-bit precision. However, when models are quantized
+at the 8-bit precision, there is usually a significant increase in the
+gzip compression ratio. The `yarn quantize-and-evalute*` commands in
+this example (see sections below) not only evaluates accuracy, but also
+calculates the gzip compression ratio of model files under different
+levels of quantization. The table below summarizes the compression ratios
+from the four models covered by this example (higher is better):
+
+gzip compression ratio:
+`(total size of the model.json and weight files) / (size of gzipped tar ball)`
+| Model      | Original (no-quantization) | 16-bit quantization | 8-bit quantization |
+| ---------- | -------------------------- | ------------------- | ------------------ |
+| housing: multi-layer regressor  | 1.121 | 1.161               | 1.388              |
+| MNIST: convnet         | 1.082          | 1.037               | 1.184              |
+| Fashion MNIST: convnet | 1.078          | 1.048               | 1.229              |
+| MobileNetV2            | 1.085          | 1.063               | 1.271              |
+
 ## Running the housing quantization demo
 
 In preparation, do:
@@ -105,6 +133,10 @@ and evaluate the effects on the model's test accuracy, do:
 yarn quantize-and-evaluate-mnist
 ```
 
+The command also calculates the ratio of gzip compression for the
+model's saved artifacts under the three different levels of quantization
+(no-quantization, 16-bit, and 8-bit).
+
 ## Running the Fashion-MNIST quantization demo
 
 In preparation, do:
diff --git a/quantization/draw_quantization.py b/quantization/draw_quantization.py
@@ -81,7 +81,7 @@ def main():
   # Number of points along the x-axis used to draw the sine wave.
   n_points = 1e6
   xs = np.linspace(-np.pi, np.pi, n_points).astype(np.float64)
-  w = np.sin(xs)
+  w = xs
 
   w_16bit = dequantize(*quantize(w, 16))
   w_8bit = dequantize(*quantize(w, 8))
@@ -90,10 +90,6 @@ def main():
   plot_range = range(int(n_points * (0.5 - plot_delta)),
                      int(n_points * (0.5 + plot_delta)))
 
-  print(w[plot_range])
-  print(w_16bit[plot_range])
-  print(w_8bit[plot_range])
-
   plt.figure(figsize=(20, 6))
   plt.subplot(1, 3, 1)
   plt.plot(xs[plot_range], w[plot_range], '-')
diff --git a/quantization/eval_mobilenetv2.js b/quantization/eval_mobilenetv2.js
@@ -83,7 +83,6 @@ async function main() {
 
   const imageH = model.inputs[0].shape[2];
   const imageW = model.inputs[0].shape[2];
-  console.log(`imageH = ${imageH}; imageW = ${imageW}`);
 
   // Load the images into a tensor.
   const dirContent = fs.readdirSync(args.imageDir);
diff --git a/quantization/package.json b/quantization/package.json
@@ -23,8 +23,8 @@
     "quantize-and-evaluate-MobileNetV2": "./quantize_evaluate.sh MobileNetV2"
   },
   "devDependencies": {
-    "@tensorflow/tfjs-node": "^1.1.2",
-    "@tensorflow/tfjs-node-gpu": "^1.1.2",
+    "@tensorflow/tfjs-node": "^1.2.3",
+    "@tensorflow/tfjs-node-gpu": "^1.2.3",
     "argparse": "^1.0.10",
     "babel-cli": "^6.26.0",
     "babel-core": "^6.26.3",
diff --git a/quantization/quantize_evaluate.sh b/quantization/quantize_evaluate.sh
@@ -101,23 +101,59 @@ if [[ "${MODEL_NAME}" == "MobileNetV2" ]]; then
   fi
 
   # Evaluate accuracy under no quantization (i.e., full 32-bit weight precision).
+  echo "=== Accuracy evalution: No quantization ==="
   yarn "eval-${MODEL_NAME}" "${MODEL_JSON_PATH}" \
       "${IMAGENET_1000_SAMPLES_DIR}"
 
+
   # Evaluate accuracy under 16-bit quantization.
+  echo "=== Accuracy evalution: 16-bit quantization ==="
   yarn "eval-${MODEL_NAME}" "${MODEL_PATH_16BIT}/model.json" \
       "${IMAGENET_1000_SAMPLES_DIR}"
 
   # Evaluate accuracy under 8-bit quantization.
+  echo "=== Accuracy evalution: 8-bit quantization ==="
   yarn "eval-${MODEL_NAME}" "${MODEL_PATH_8BIT}/model.json" \
       "${IMAGENET_1000_SAMPLES_DIR}"
 else
   # Evaluate accuracy under no quantization (i.e., full 32-bit weight precision).
+  echo "=== Accuracy evalution: No quantization ==="
   yarn "eval-${MODEL_NAME}" "${MODEL_JSON_PATH}"
 
   # Evaluate accuracy under 16-bit quantization.
+  echo "=== Accuracy evalution: 16-bit quantization ==="
   yarn "eval-${MODEL_NAME}" "${MODEL_PATH_16BIT}/model.json"
 
   # Evaluate accuracy under 8-bit quantization.
+  echo "=== Accuracy evalution: 8-bit quantization ==="
   yarn "eval-${MODEL_NAME}" "${MODEL_PATH_8BIT}/model.json"
 fi
+
+function calc_gzip_ratio() {
+  ORIGINAL_FILES_SIZE_BYTES="$(ls -lAR ${1} | grep -v '^d' | awk '{total += $5} END {print total}')"
+  TEMP_TARBALL="$(mktemp)"
+  tar czf "${TEMP_TARBALL}" "${1}"
+  TARBALL_SIZE="$(wc -c < ${TEMP_TARBALL})"
+  ZIP_RATIO="$(awk "BEGIN { print(${ORIGINAL_FILES_SIZE_BYTES} / ${TARBALL_SIZE}) }")"
+  rm "${TEMP_TARBALL}"
+
+  echo "  Total file size: ${ORIGINAL_FILES_SIZE_BYTES} bytes"
+  echo "  gzipped tarball size: ${TARBALL_SIZE} bytes"
+  echo "  gzip ratio: ${ZIP_RATIO}"
+  echo
+}
+
+echo
+echo "=== gzip ratios ==="
+
+# Calculate the gzip ratio of the original (unquantized) model.
+echo "Original model (No quantization):"
+calc_gzip_ratio "${MODEL_PATH}"
+
+# Calculate the gzip ratio of the 16-bit-quantized model.
+echo "16-bit-quantized model:"
+calc_gzip_ratio "${MODEL_PATH_16BIT}"
+
+# Calculate the gzip ratio of the 8-bit-quantized model.
+echo "8-bit-quantized model:"
+calc_gzip_ratio "${MODEL_PATH_8BIT}"
diff --git a/quantization/yarn.lock b/quantization/yarn.lock
@@ -262,16 +262,17 @@
   dependencies:
     core-js "^2.5.7"
 
-"@tensorflow/tfjs-converter@1.1.2":
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-converter/-/tfjs-converter-1.1.2.tgz#2400ac77b30f973f1fcb26c912b28271f7f4d605"
-  integrity sha512-KuLIIJYzmRmtJXcjBH3inQVhTHbABj2TNAVS3ss12hzDiEE/RiRb/LZKo8XV2WczuZXTq+gxep84PWXSH/HQXA==
+"@tensorflow/tfjs-converter@1.2.2":
+  version "1.2.2"
+  resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-converter/-/tfjs-converter-1.2.2.tgz#c95e2f79b1de830b8079c7704dc8463ced2d2b79"
+  integrity sha512-NM2NcPRHpCNeJdBxHcYpmW9ZHTQ2lJFJgmgGpQ8CxSC9CtQB05bFONs3SKcwMNDE/69QBRVom5DYqLCVUg+A+g==
 
-"@tensorflow/tfjs-core@1.1.2":
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-core/-/tfjs-core-1.1.2.tgz#efb8b3688fbff353e51d41a3d832dde8c1bc321d"
-  integrity sha512-xCAUIAh14OFnHt+IQUUZIH/P/jH/EWvewL0Ty6q6USUx4YZ+HvKwNw1G7h/gKki4A31BJ0avD04ylBKc75laGg==
+"@tensorflow/tfjs-core@1.2.2":
+  version "1.2.2"
+  resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-core/-/tfjs-core-1.2.2.tgz#2efa89e323612a26aeccee9b3ae9f5ac5a635bbe"
+  integrity sha512-2hCHMKjh3UNpLEjbAEaurrTGJyj/KpLtMSAraWgHA1vGY0kmk50BBSbgCDmXWUVm7lyh/SkCq4/GrGDZktEs3g==
   dependencies:
+    "@types/offscreencanvas" "~2019.3.0"
     "@types/seedrandom" "2.4.27"
     "@types/webgl-ext" "0.0.30"
     "@types/webgl2" "0.0.4"
@@ -280,54 +281,54 @@
   optionalDependencies:
     rollup-plugin-visualizer "~1.1.1"
 
-"@tensorflow/tfjs-data@1.1.2":
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-data/-/tfjs-data-1.1.2.tgz#f37809aa89946a834f3566bd090db852f2c4244e"
-  integrity sha512-K30QdocXd5zn3rpGbRTC4sO42q8tK1SGqDHE2IEkvYzcg0PAU3cEMODGTLjKt0z1Lfy1JKgs0FPcvazqmxpjGA==
+"@tensorflow/tfjs-data@1.2.2":
+  version "1.2.2"
+  resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-data/-/tfjs-data-1.2.2.tgz#bd802b4096df04277d302d66598aef47fbffef85"
+  integrity sha512-oHGBoGdnCl2RyouLKplQqo+iil0iJgPbi/aoHizhpO77UBuJXlKMblH8w5GbxVAw3hKxWlqzYpxPo6rVRgehNA==
   dependencies:
     "@types/node-fetch" "^2.1.2"
     node-fetch "~2.1.2"
 
-"@tensorflow/tfjs-layers@1.1.2":
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-layers/-/tfjs-layers-1.1.2.tgz#29393221446a877962b71084305597295504801e"
-  integrity sha512-iP9mJz/79nK+sXBWdxQkeNIqn9p+O/x3g15ntIXpEaLXOGjQEE12iKtLCWgG3qH+FltOVt5hTbAXkj/yDym1Xg==
+"@tensorflow/tfjs-layers@1.2.2":
+  version "1.2.2"
+  resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-layers/-/tfjs-layers-1.2.2.tgz#3365dbbca7cfa4fcc6cacc9fffc90d664606bd4e"
+  integrity sha512-yzWZaZrCVpEyTkSrzMe4OOP4aGUfaaROE/zR9fPsPGGF8wLlbLNZUJjeYUmjy3G3pXGaM0mQUbLR5Vd707CVtQ==
 
-"@tensorflow/tfjs-node-gpu@^1.1.2":
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-node-gpu/-/tfjs-node-gpu-1.1.2.tgz#c532ab9ca84745ffe27af8b7cef40d329eace221"
-  integrity sha512-OvLCngIfqdPuYqGrUDnwTLnolOVrS7Zr5ufHWBA+gxesNFHXgfOm0nBFPhXHtfIaaYdMcM8Gsos4frXzqRzsiQ==
+"@tensorflow/tfjs-node-gpu@^1.2.3":
+  version "1.2.3"
+  resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-node-gpu/-/tfjs-node-gpu-1.2.3.tgz#3786d814bc5ca4c10e88a4a490feea65a39bd8cf"
+  integrity sha512-y8A1dF4WZZ+IvCCv/hrEUVV9O1ua0f5rZVzaMnJx+xv8o51DwTGk7h6tsnE/F2N6pf9mKLsY8roUBviIasVEmQ==
   dependencies:
-    "@tensorflow/tfjs" "~1.1.2"
+    "@tensorflow/tfjs" "~1.2.2"
     adm-zip "^0.4.11"
     bindings "~1.3.0"
     https-proxy-agent "^2.2.1"
     progress "^2.0.0"
     rimraf "^2.6.2"
     tar "^4.4.6"
 
-"@tensorflow/tfjs-node@^1.1.2":
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-node/-/tfjs-node-1.1.2.tgz#b5979c94fc80351ef2f5fe2a58c58f385cabaaf0"
-  integrity sha512-QBEnptTDccUZXU1z2P++ZXJflZi+qh915uOu8BAHvBYujBmVp/rL3+HyZhJpgHn8GXz88z0dsXMNem5p4zJnvw==
+"@tensorflow/tfjs-node@^1.2.3":
+  version "1.2.3"
+  resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-node/-/tfjs-node-1.2.3.tgz#b2a6c3051da080a853be34b4bdc6649479139852"
+  integrity sha512-6/V3JfoxnvUJhZle8+7V0ln7KjUIJOlDCk43EBQg+XoGudvp3L1x0RXcfCQ1nXFIlZVYixNJYd3XTIOHZBECSA==
   dependencies:
-    "@tensorflow/tfjs" "~1.1.2"
+    "@tensorflow/tfjs" "~1.2.2"
     adm-zip "^0.4.11"
     bindings "~1.3.0"
     https-proxy-agent "^2.2.1"
     progress "^2.0.0"
     rimraf "^2.6.2"
     tar "^4.4.6"
 
-"@tensorflow/tfjs@~1.1.2":
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/@tensorflow/tfjs/-/tfjs-1.1.2.tgz#9a1c2bbc4d82f9d18f250ab4a4d7c8ad43e2d432"
-  integrity sha512-b+ekLNEfMzaBszti6uGcS3pJoPNQuv1hxKEoY9Q3ix52fFJzI86nSvM1lwOcvUZy6DjPFDoyB8MO+dJHqecG5w==
+"@tensorflow/tfjs@~1.2.2":
+  version "1.2.2"
+  resolved "https://registry.yarnpkg.com/@tensorflow/tfjs/-/tfjs-1.2.2.tgz#e0cc7f1c4139e7c38f3ea478999f0972d354c948"
+  integrity sha512-HfhSzL2eTWhlT0r/A5wmo+u3bHe+an16p5wsnFH3ujn21fQ8QtGpSfDHQZjWx1kVFaQnV6KBG+17MOrRHoHlLA==
   dependencies:
-    "@tensorflow/tfjs-converter" "1.1.2"
-    "@tensorflow/tfjs-core" "1.1.2"
-    "@tensorflow/tfjs-data" "1.1.2"
-    "@tensorflow/tfjs-layers" "1.1.2"
+    "@tensorflow/tfjs-converter" "1.2.2"
+    "@tensorflow/tfjs-core" "1.2.2"
+    "@tensorflow/tfjs-data" "1.2.2"
+    "@tensorflow/tfjs-layers" "1.2.2"
 
 "@types/node-fetch@^2.1.2":
   version "2.3.4"
@@ -341,6 +342,11 @@
   resolved "https://registry.yarnpkg.com/@types/node/-/node-12.0.2.tgz#3452a24edf9fea138b48fad4a0a028a683da1e40"
   integrity sha512-5tabW/i+9mhrfEOUcLDu2xBPsHJ+X5Orqy9FKpale3SjDA17j5AEpYq5vfy3oAeAHGcvANRCO3NV3d2D6q3NiA==
 
+"@types/offscreencanvas@~2019.3.0":
+  version "2019.3.0"
+  resolved "https://registry.yarnpkg.com/@types/offscreencanvas/-/offscreencanvas-2019.3.0.tgz#3336428ec7e9180cf4566dfea5da04eb586a6553"
+  integrity sha512-esIJx9bQg+QYF0ra8GnvfianIY8qWB0GBx54PK5Eps6m+xTj86KLavHv6qDhzKcu5UUOgNfJ2pWaIIV7TRUd9Q==
+
 "@types/seedrandom@2.4.27":
   version "2.4.27"
   resolved "https://registry.yarnpkg.com/@types/seedrandom/-/seedrandom-2.4.27.tgz#9db563937dd86915f69092bc43259d2f48578e41"