tensorflow
diff --git a/‎cart-pole/.babelrc
+18 b/‎cart-pole/.babelrc
+18
diff --git a/‎cart-pole/README.md
+56 b/‎cart-pole/README.md
+56
diff --git a/‎cart-pole/cart_pole.js
+126 b/‎cart-pole/cart_pole.js
+126
diff --git a/‎cart-pole/index.html
+147 b/‎cart-pole/index.html
+147
@@ -0,0 +1,18 @@
+{
+  "presets": [
+    [
+      "env",
+      {
+        "esmodules": false,
+        "targets": {
+          "browsers": [
+            "> 3%"
+          ]
+        }
+      }
+    ]
+  ],
+  "plugins": [
+    "transform-runtime"
+  ]
+}
@@ -0,0 +1,56 @@
+# TensorFlow.js Example: Reinforcement Learning with Cart-Pole Simulation
+
+## Overview
+
+This example illustrates how to use TensorFlow.js to perform simple
+reinforcement learning (RL). Specifically, it showcases an implementation
+of the policy-gradient method in TensorFlow.js with a combination of the Layers
+and gradients API. This implementation is used to solve the classic cart-pole
+control problem, which was originally proposed in:
+
+- Barto, Sutton, and Anderson, "Neuronlike Adaptive Elements That Can Solve
+  Difficult Learning Control Problems," IEEE Trans. Syst., Man, Cybern.,
+  Vol. SMC-13, pp. 834--846, Sept.--Oct. 1983
+- Sutton, "Temporal Aspects of Credit Assignment in Reinforcement Learning",
+  Ph.D. Dissertation, Department of Computer and Information Science,
+  University of Massachusetts, Amherst, 1984.
+
+It later became one of OpenAI's gym environmnets:
+  https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py
+
+The gist of the RL algorithm in this example (see [index.js](../index.js)) is:
+
+1. Define a policy network to make decisions on leftward vs. rightward force
+   given the observed state of the system. The decision is not completely
+   deterministic. Instead, it is a probability that is converted to the actual
+   action by drawing random samples from binomial probability distribution.
+2. For each "game", calculate reward values in such a way that longer-lasting
+   games are assigned positive reward values, while shorter-lasting ones
+   are assigned negative reward values.
+3. Calculate the gradients of the policy network's weights with respect to the
+   actual actions and scale the gradients with the reward values from step 2.
+   The scale gradients are added to the policy network's weights, the effect of
+   which is to make the policy network more likely to select actions that lead
+   to the longer-lasting games given the same system states.
+
+For a more detailed overview of policy gradient methods, see:
+  http://www.scholarpedia.org/article/Policy_gradient_methods
+
+For a more graphical illustration of the cart-pole problem, see:
+  http://gym.openai.com/envs/CartPole-v1/
+
+### Features:
+
+- Allows user to specify the architecture of the policy network, in particular,
+  the number of the neural networks's layers and their sizes (# of units).
+- Allows training of the policy network in the browser, optionally with
+  simultaneous visualization of the cart-pole system.
+- Allows testing in the browser, with visualization.
+- Allows saving the policy network to the browser's IndexedDB. The saved policy
+  network can later be loaded back for testing and/or further training.
+
+## Usage
+
+```sh
+yarn && yarn watch
+```
@@ -0,0 +1,126 @@
+/**
+ * @license
+ * Copyright 2018 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+/**
+ * Implementation based on: http://incompleteideas.net/book/code/pole.c
+ */
+
+import * as tf from '@tensorflow/tfjs';
+
+/**
+ * Cart-pole system simulator.
+ *
+ * In the control-theory sense, there are four state variables in this system:
+ *
+ *   - x: The 1D location of the cart.
+ *   - xDot: The velocity of the cart.
+ *   - theta: The angle of the pole (in radians). A value of 0 corresponds to
+ *     a vertical position.
+ *   - thetaDot: The angular velocity of the pole.
+ *
+ * The system is controlled through a single action:
+ *
+ *   - leftward or rightward force.
+ */
+export class CartPole {
+  /**
+   * Constructor of CartPole.
+   */
+  constructor() {
+    // Constants that characterize the system.
+    this.gravity = 9.8;
+    this.massCart = 1.0;
+    this.massPole = 0.1;
+    this.totalMass = this.massCart + this.massPole;
+    this.cartWidth = 0.2;
+    this.cartHeight = 0.1;
+    this.length = 0.5;
+    this.poleMoment = this.massPole * this.length;
+    this.forceMag = 10.0;
+    this.tau = 0.02;  // Seconds between state updates.
+
+    // Threshold values, beyond which a simulation will be marked as failed.
+    this.xThreshold = 2.4;
+    this.thetaTheshold = 12 / 360 * 2 * Math.PI;
+
+    this.setRandomState();
+  }
+
+  /**
+   * Set the state of the cart-pole system randomly.
+   */
+  setRandomState() {
+    // The control-theory state variables of the cart-pole system.
+    // Cart position, meters.
+    this.x = Math.random() - 0.5;
+    // Cart velocity.
+    this.xDot = (Math.random() - 0.5) * 1;
+    // Pole angle, radians.
+    this.theta = (Math.random() - 0.5) * 2 * (6 / 360 * 2 * Math.PI);
+    // Pole angle velocity.
+    this.thetaDot =  (Math.random() - 0.5) * 0.5;
+  }
+
+  /**
+   * Get current state as a tf.Tensor of shape [1, 4].
+   */
+  getStateTensor() {
+    return tf.tensor2d([[this.x, this.xDot, this.theta, this.thetaDot]]);
+  }
+
+  /**
+   * Update the cart-pole system using an action.
+   * @param {number} action Only the sign of `action` matters.
+   *   A value > 0 leads to a rightward force of a fixed magnitude.
+   *   A value <= 0 leads to a leftward force of the same fixed magnitude.
+   */
+  update(action) {
+    const force = action > 0 ? this.forceMag : -this.forceMag;
+
+    const cosTheta = Math.cos(this.theta);
+    const sinTheta = Math.sin(this.theta);
+
+    const temp =
+        (force + this.poleMoment * this.thetaDot * this.thetaDot * sinTheta) /
+        this.totalMass;
+    const thetaAcc = (this.gravity * sinTheta - cosTheta * temp) /
+        (this.length *
+         (4 / 3 - this.massPole * cosTheta * cosTheta / this.totalMass));
+    const xAcc = temp - this.poleMoment * thetaAcc * cosTheta / this.totalMass;
+
+    // Update the four state variables, using Euler's metohd.
+    this.x += this.tau * this.xDot;
+    this.xDot += this.tau * xAcc;
+    this.theta += this.tau * this.thetaDot;
+    this.thetaDot += this.tau * thetaAcc;
+
+    return this.isDone();
+  }
+
+  /**
+   * Determine whether this simulation is done.
+   *
+   * A simulation is done when `x` (position of the cart) goes out of bound
+   * or when `theta` (angle of the pole) goes out of bound.
+   *
+   * @returns {bool} Whether the simulation is done.
+   */
+  isDone() {
+    return this.x < -this.xThreshold || this.x > this.xThreshold ||
+        this.theta < -this.thetaTheshold || this.theta > this.thetaTheshold;
+  }
+}
@@ -0,0 +1,147 @@
+<!--
+Copyright 2018 Google LLC. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================
+-->
+
+<html>
+  <head>
+    <link rel="stylesheet" href="https://code.getmdl.io/1.3.0/material.cyan-teal.min.css" />
+  </head>
+
+  <body>
+
+  <style>
+    #app-status {
+      color: blue;
+      font-size: 150%;
+      padding-bottom: 1em;
+    }
+    button {
+      font-size: 105%;
+      min-width: 120px;
+    }
+    input {
+      font-family: monospace;
+      width: 200px;
+    }
+    .input-div {
+      padding: 5px;
+      font-family: monospace;
+    }
+    .input-label {
+      display: inline-block;
+      width: 15em;
+    }
+    .canvases {
+      display: inline-block;
+    }
+    .horizontal-sections {
+      display: inline-block;
+      padding-left: 0px;
+      padding-right: 10px;
+      vertical-align: top;
+      border: 1px #AAA solid;
+    }
+    .status-span {
+      display: inline-block;
+      width: 150px;
+    }
+    .buttons-section {
+      float: right;
+    }
+    input :disabled {
+      background-color: #AAA,
+    }
+  </style>
+
+  <body>
+    <h1>TensorFlow.js Example:<br/>Reinforcement Learning: Cart Pole </h1>
+
+    <div>
+      <div>
+        <span id="app-status">Standing by.</span>
+      </div>
+
+      <div>
+        <div class="horizontal-sections">
+          <div class="input-div">
+            <span class="input-label">Locally-stored network</span>
+            <input id="stored-model-status" value="N/A" disabled="true" readonly="true"></input>
+            <button id="delete-stored-model" disabled="true">Delete</button>
+          </div>
+
+          <div class="horizontal-sections">
+            <div class="input-div">
+              <span class="input-label">Hidden layer size(s) (e.g.: "5", "8,6"):</span>
+              <input id="hidden-layer-sizes" value="4"></input>
+              <button id="create-model" disabled="true">Create model</button>
+            </div>
+          </div>
+
+          <div class="input-div">
+            <span class="input-label">Number of iterations:</span>
+            <input id="num-iterations" value="20"></input>
+          </div>
+          <div class="input-div">
+            <span class="input-label">Games per iteration:</span>
+            <input id="games-per-iteration" value="20"></input>
+          </div>
+          <div class="input-div">
+            <span class="input-label">Max. steps per game:</span>
+            <input id="max-steps-per-game" value="500"></input>
+          </div>
+          <div class="input-div">
+            <span class="input-label">Reward discount rate:</span>
+            <input id="discount-rate" value="0.95"></input>
+          </div>
+          <div class="input-div">
+            <span class="input-label">Learning rate:</span>
+            <input id="learning-rate" value="0.05"></input>
+          </div>
+          <div class="input-div">
+            <span class="input-label">Render during training:</span>
+            <input type="checkbox" id="render-during-training" />
+          </div>
+          <div class="buttons-section">
+            <button id="train" disabled="true">Train</button>
+            <button id="test" disabled="true">Test</button>
+          </div>
+        </div>
+
+        <div class="horizontal-sections">
+          <div>
+            <span id="iteration-status" class="status-span"></span>
+            <progress value="0" max="100" id="iteration-progress"></progress>
+          </div>
+          <div>
+            <span id="train-status" class="status-span"></span>
+            <progress value="0" max="100" id="train-progress"></progress>
+          </div>
+          <div>
+            <span class="status-span">Training speed:</span>
+            <span id="train-speed" class="status-span"></span>
+          </div>
+          <div class="canvases" id="steps-canvas"></div>
+        </div>
+      </div>
+
+      <div>
+        <canvas id="cart-pole-canvas" height="150px" width="500px"></canvas>
+      </div>
+    </div>
+
+    <script src="index.js"></script>
+  </body>
+</html>