[jena-weather] Update example to tfjs 1.0 (#255)

caisq · web-flow · commit 96cdec00b4c3 · 2019-03-22T20:46:34.000-04:00
- Update the usage of `tf.data.generator()` as per the breaking API change of tfjs 1.0. - Update the usage of `tfvis.render.linechart()` and `tfvis.render.scatterplot()` as per the breaking API changes of tfjs-vis 1.0. - Add tensorboard support for Node.js-based RNN traininga, along with the documentation of that in README.md. - Simplify the callback logic for `Model.fitDataset`. Fixes tensorflow/tfjs#1234
diff --git a/jena-weather/README.md b/jena-weather/README.md
@@ -28,7 +28,7 @@ TensorFlow.js
   training-set and validation-set losses at the end of batches and epochs of
   model training.
 
-## Training RNNs
+## Training RNNs in Node.js
 
 This example shows how to predict temperature using a few different types of
 models, including linear regressors, multilayer perceptrons, and recurrent
@@ -63,4 +63,36 @@ yarn
 yarn train-rnn --modelType baseline
 ```
 
-The training code is in the file [train-rnn.js](./train-rnn.js).
+### Monitoring Node.js Training in TensorBoard
+
+The Node.js-based training script allows you to log the loss values from the
+model to TensorBoard. Relative to printing loss values to the console, which
+the training script performs by default, logging to tensorboard has the
+following advantanges:
+
+1. Persistence of the loss values, so you can have a copy of the training
+   history available even if the system crashes in the middle of the training
+   for some reason, while logs in consoles a more ephemeral.
+2. Visualizing the loss values as curves makes the trends easier to see.
+3. You will be able to monitor the training from a remote machine by accessing
+   the TensorBoard HTTP server.
+
+To do this in this example, add the flag --logDir to the yarn train command,
+followed by the directory to which you want the logs to be written, e.g.,
+
+```sh
+yarn train-rnn --gpu --logDir /tmp/jena-weather-logs-1
+```
+
+Then install tensorboard and start it by pointing it to the log directory:
+
+```sh
+# Skip this step if you have already installed tensorboard.
+pip install tensorboard
+
+tensorboard --logdir /tmp/jena-weather-logs-1
+```
+
+tensorboard will print an HTTP URL in the terminal. Open your browser and
+navigate to the URL to view the loss curves in the Scalar dashboard of
+TensorBoard.
diff --git a/jena-weather/data.js b/jena-weather/data.js
@@ -275,69 +275,69 @@ export class JenaWeatherData {
     let startIndex = minIndex + lookBack;
     const lookBackSlices = Math.floor(lookBack / step);
 
-    function nextBatchFn() {
-      const rowIndices = [];
-      let done = false;  // Indicates whether the dataset has ended.
-      if (shuffle) {
-        // If `shuffle` is `true`, start from randomly chosen rows.
-        const range = maxIndex - (minIndex + lookBack);
-        for (let i = 0; i < batchSize; ++i) {
-          const row = minIndex + lookBack + Math.floor(Math.random() * range);
-          rowIndices.push(row);
-        }
-      } else {
-        // If `shuffle` is `false`, the starting row indices will be sequential.
-        let r = startIndex;
-        for (; r < startIndex + batchSize && r < maxIndex; ++r) {
-          rowIndices.push(r);
-        }
-        if (r >= maxIndex) {
-          done = true;
+    return {
+      next: () => {
+        const rowIndices = [];
+        let done = false;  // Indicates whether the dataset has ended.
+        if (shuffle) {
+          // If `shuffle` is `true`, start from randomly chosen rows.
+          const range = maxIndex - (minIndex + lookBack);
+          for (let i = 0; i < batchSize; ++i) {
+            const row = minIndex + lookBack + Math.floor(Math.random() * range);
+            rowIndices.push(row);
+          }
+        } else {
+          // If `shuffle` is `false`, the starting row indices will be sequential.
+          let r = startIndex;
+          for (; r < startIndex + batchSize && r < maxIndex; ++r) {
+            rowIndices.push(r);
+          }
+          if (r >= maxIndex) {
+            done = true;
+          }
         }
-      }
 
-      const numExamples = rowIndices.length;
-      startIndex += numExamples;
+        const numExamples = rowIndices.length;
+        startIndex += numExamples;
 
-      const featureLength =
-          includeDateTime ? this.numColumns + 2 : this.numColumns;
-      const samples = tf.buffer([numExamples, lookBackSlices, featureLength]);
-      const targets = tf.buffer([numExamples, 1]);
-      // Iterate over examples. Each example contains a number of rows.
-      for (let j = 0; j < numExamples; ++j) {
-        const rowIndex = rowIndices[j];
-        let exampleRow = 0;
-        // Iterate over rows in the example.
-        for (let r = rowIndex - lookBack; r < rowIndex; r += step) {
-          let exampleCol = 0;
-          // Iterate over features in the row.
-          for (let n = 0; n < featureLength; ++n) {
-            let value;
-            if (n < this.numColumns) {
-              value = normalize ? this.normalizedData[r][n] : this.data[r][n];
-            } else if (n === this.numColumns) {
-              // Normalized day-of-the-year feature.
-              value = this.normalizedDayOfYear[r];
-            } else {
-              // Normalized time-of-the-day feature.
-              value = this.normalizedTimeOfDay[r];
+        const featureLength =
+            includeDateTime ? this.numColumns + 2 : this.numColumns;
+        const samples = tf.buffer([numExamples, lookBackSlices, featureLength]);
+        const targets = tf.buffer([numExamples, 1]);
+        // Iterate over examples. Each example contains a number of rows.
+        for (let j = 0; j < numExamples; ++j) {
+          const rowIndex = rowIndices[j];
+          let exampleRow = 0;
+          // Iterate over rows in the example.
+          for (let r = rowIndex - lookBack; r < rowIndex; r += step) {
+            let exampleCol = 0;
+            // Iterate over features in the row.
+            for (let n = 0; n < featureLength; ++n) {
+              let value;
+              if (n < this.numColumns) {
+                value = normalize ? this.normalizedData[r][n] : this.data[r][n];
+              } else if (n === this.numColumns) {
+                // Normalized day-of-the-year feature.
+                value = this.normalizedDayOfYear[r];
+              } else {
+                // Normalized time-of-the-day feature.
+                value = this.normalizedTimeOfDay[r];
+              }
+              samples.set(value, j, exampleRow, exampleCol++);
             }
-            samples.set(value, j, exampleRow, exampleCol++);
-          }
 
-          const value = normalize ?
-              this.normalizedData[r + delay][this.tempCol] :
-              this.data[r + delay][this.tempCol];
-          targets.set(value, j, 0);
-          exampleRow++;
+            const value = normalize ?
+                this.normalizedData[r + delay][this.tempCol] :
+                this.data[r + delay][this.tempCol];
+            targets.set(value, j, 0);
+            exampleRow++;
+          }
         }
+        return {
+          value: {xs: samples.toTensor(), ys: targets.toTensor()},
+          done
+        };
       }
-      return {
-        value: [samples.toTensor(), targets.toTensor()],
-        done
-      };
-    }
-
-    return nextBatchFn.bind(this);
+    };
   }
 }
diff --git a/jena-weather/index.js b/jena-weather/index.js
@@ -17,7 +17,7 @@
 
 /**
  * Weather Prediction Example.
- * 
+ *
  * - Visualizes data using tfjs-vis.
  * - Trains simple models (linear regressor and MLPs) and visualizes the
  *   training processes.
@@ -102,7 +102,7 @@ function makeTimeSeriesChart(
   }
   // NOTE(cais): On a Linux workstation running latest Chrome, the length
   // limit seems to be around 120k.
-  tfvis.render.linechart({values, series: series}, chartConatiner, {
+  tfvis.render.linechart(chartConatiner, {values, series: series}, {
     width: chartConatiner.offsetWidth * 0.95,
     height: chartConatiner.offsetWidth * 0.3,
     xLabel: 'Time',
@@ -141,7 +141,7 @@ function makeTimeSeriesScatterPlot(series1, series2, timeSpan, normalize) {
   }
   const series = [`${seriesLabel1} - ${seriesLabel2}`];
 
-  tfvis.render.scatterplot({values, series}, dataChartContainer, {
+  tfvis.render.scatterplot(dataChartContainer, {values, series}, {
     width: dataChartContainer.offsetWidth * 0.7,
     height: dataChartContainer.offsetWidth * 0.5,
     xLabel: seriesLabel1,
@@ -160,7 +160,6 @@ trainModelButton.addEventListener('click', async () => {
   const batchSize = 128;
   const normalize = true;
   const includeDateTime = includeDateTimeSelect.checked;
-  
   const modelType = modelTypeSelect.value;
 
   console.log('Creating model...');
@@ -177,10 +176,9 @@ trainModelButton.addEventListener('click', async () => {
 
   console.log('Starting model training...');
   const epochs = +epochsInput.value;
-  const displayEvery = 100;
   await trainModel(
       model, jenaWeatherData, normalize, includeDateTime,
-      lookBack, step, delay, batchSize, epochs, displayEvery,
+      lookBack, step, delay, batchSize, epochs,
       tfvis.show.fitCallbacks(trainingSurface, ['loss', 'val_loss'], {
         callbacks: ['onBatchEnd', 'onEpochEnd']
       }));
diff --git a/jena-weather/models.js b/jena-weather/models.js
@@ -210,68 +210,28 @@ export function buildModel(modelType, numTimeSteps, numFeatures) {
  *   for.
  * @param {number} batchSize batchSize for training.
  * @param {number} epochs Number of training epochs.
- * @param {number} displayEvery Log info to console every _ batches.
- * @param {number} customCallbacks Optional callback args to invoke at the
- *   end of every epoch. Can optionally have `onBatchEnd` and `onEpochEnd`
- *   fields.
+ * @param {tf.Callback | tf.CustomCallbackArgs} customCallback Optional callback
+ *   to invoke at the end of every epoch. Can optionally have `onBatchEnd` and
+ *   `onEpochEnd` fields.
  */
 export async function trainModel(
     model, jenaWeatherData, normalize, includeDateTime, lookBack, step, delay,
-    batchSize, epochs, displayEvery = 100, customCallbacks) {
-  const shuffle = true;
+    batchSize, epochs, customCallback) {
+  const trainShuffle = true;
+  const trainDataset = tf.data.generator(
+      () => jenaWeatherData.getNextBatchFunction(
+          trainShuffle, lookBack, delay, batchSize, step, TRAIN_MIN_ROW,
+          TRAIN_MAX_ROW, normalize, includeDateTime)).prefetch(8);
+  const evalShuffle = false;
+  const valDataset = tf.data.generator(
+      () => jenaWeatherData.getNextBatchFunction(
+          evalShuffle, lookBack, delay, batchSize, step, VAL_MIN_ROW,
+          VAL_MAX_ROW, normalize, includeDateTime));
 
-  const trainNextBatchFn = jenaWeatherData.getNextBatchFunction(
-      shuffle, lookBack, delay, batchSize, step, TRAIN_MIN_ROW, TRAIN_MAX_ROW,
-      normalize, includeDateTime);
-  const trainDataset = tf.data.generator(trainNextBatchFn).prefetch(8);
-
-  const batchesPerEpoch = 500;
-  let t0;
-  let currentEpoch;
   await model.fitDataset(trainDataset, {
-    batchesPerEpoch,
+    batchesPerEpoch: 500,
     epochs,
-    callbacks: {
-      onEpochBegin: async (epoch) => {
-        currentEpoch = epoch;
-        t0 = tf.util.now();
-      },
-      onBatchEnd: async (batch, logs) => {
-        if ((batch + 1) % displayEvery === 0) {
-          const t = tf.util.now();
-          const millisPerBatch = (t - t0) / (batch + 1);
-          console.log(
-              `epoch ${currentEpoch + 1}/${epochs} ` +
-              `batch ${batch + 1}/${batchesPerEpoch}: ` +
-              `loss=${logs.loss.toFixed(6)} ` +
-              `(${millisPerBatch.toFixed(1)} ms/batch)`);
-          if (customCallbacks && customCallbacks.onBatchEnd) {
-            customCallbacks.onBatchEnd(batch, logs);
-          }
-        }
-      },
-      onEpochEnd: async (epoch, logs) => {
-        const valNextBatchFn = jenaWeatherData.getNextBatchFunction(
-            false, lookBack, delay, batchSize, step, VAL_MIN_ROW, VAL_MAX_ROW,
-            normalize, includeDateTime);
-        const valDataset = tf.data.generator(valNextBatchFn);
-        console.log(`epoch ${epoch + 1}/${epochs}: Performing validation...`);
-        // TODO(cais): Remove the second arg (empty object), when the bug is
-        // fixed:
-        //   https://github.com/tensorflow/tfjs/issues/1096
-        const evalOut = await model.evaluateDataset(valDataset, {});
-        logs.val_loss = (await evalOut.data())[0];
-        tf.dispose(evalOut);
-        console.log(
-            `epoch ${epoch + 1}/${epochs}: ` +
-            `trainLoss=${logs.loss.toFixed(6)}; ` +
-            `valLoss=${logs.val_loss.toFixed(6)}`);
-        if (customCallbacks && customCallbacks.onEpochEnd) {
-          customCallbacks.onEpochEnd(epoch, logs);
-        }
-      }
-    }
+    callbacks: customCallback,
+    validationData: valDataset
   });
-
-  return model;
 }
diff --git a/jena-weather/package.json b/jena-weather/package.json
@@ -9,18 +9,18 @@
     "node": ">=8.9.0"
   },
   "dependencies": {
-    "@tensorflow/tfjs": "^0.15.3",
-    "@tensorflow/tfjs-vis": "0.4.2"
+    "@tensorflow/tfjs": "^1.0.2",
+    "@tensorflow/tfjs-vis": "^1.0.3"
   },
   "scripts": {
     "watch": "cross-env NODE_ENV=development parcel index.html --no-hmr --open",
     "build": "cross-env NODE_ENV=production parcel build index.html --no-minify --public-url ./",
     "link-local": "yalc link",
-    "train-rnn": "babel-node train-rnn.js"
+    "train-rnn": "babel-node --max_old_space_size=4096 train-rnn.js"
   },
   "devDependencies": {
-    "@tensorflow/tfjs-node": "0.3.1",
-    "@tensorflow/tfjs-node-gpu": "0.3.1",
+    "@tensorflow/tfjs-node": "^1.0.2",
+    "@tensorflow/tfjs-node-gpu": "^1.0.2",
     "argparse": "^1.0.10",
     "babel-cli": "^6.26.0",
     "babel-core": "^6.26.3",
diff --git a/jena-weather/train-rnn.js b/jena-weather/train-rnn.js
diff --git a/jena-weather/yarn.lock b/jena-weather/yarn.lock