diff --git a/examples/debug_with_dashboard.ipynb b/examples/debug_with_dashboard.ipynb new file mode 100644 index 0000000..148e793 --- /dev/null +++ b/examples/debug_with_dashboard.ipynb @@ -0,0 +1,972 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dashboard Overview\n", + "\n", + "This notebook walks though some examples to show the functinoality of ray dashboard. \n", + "\n", + "There are two views:\n", + "1. Machine View, which groups actors by nodes. Information about occupied and total memory, resources, node ip address, logs, errors, etc. will be displayed. \n", + "2. Logical View, which gorups actors by the hierachical structure. Actor `A` is the parent of actor `B` if `A` creates `B`. In this case, actor `B` will be placed as a nested actor of `A`. " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2020-01-24 15:21:10,546\tINFO resource_spec.py:212 -- Starting Ray with 3.71 GiB memory available for workers and up to 1.88 GiB for objects. You can adjust these settings with ray.init(memory=, object_store_memory=).\n", + "2020-01-24 15:21:10,797\tINFO services.py:1093 -- View the Ray dashboard at \u001b[1m\u001b[32mlocalhost:8273\u001b[39m\u001b[22m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Click here to open the dashboard: http://localhost:8273\n" + ] + } + ], + "source": [ + "import ray\n", + "import os\n", + "import time\n", + "import numpy as np\n", + "import requests\n", + "\n", + "addresses = ray.init()\n", + "print(\"Click here to open the dashboard: http://{}\".format(addresses[\"webui_url\"]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Debug blocked actor creation tasks\n", + "\n", + "If creating an actor requires resources (e.g. CPUs, GPUs, other custom resources) that are not currently available, the actor creation task becomes infeasible. It might causes hanging programs. \n", + "\n", + "To make developers aware of this issue, infeasible tasks are shown in red in the dashboard. \n", + "\n", + "![title](img/infeasible-task.png)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2020-01-24 15:24:29,294\tWARNING worker.py:1063 -- The actor or task with ID ffffffffffffffff1cc4b74c0100 is infeasible and cannot currently be scheduled. It requires {Custom: 1.000000} for execution and {Custom: 1.000000} for placement, however there are no nodes in the cluster that can provide the requested resources. To resolve this issue, consider reducing the resource requests of this task or add nodes that can fit the task.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Session hangs because actor A cannot be created. \n" + ] + } + ], + "source": [ + "@ray.remote(resources={\"Custom\": 1}, num_cpus=0)\n", + "class A(object):\n", + " def __init__(self):\n", + " pass\n", + " \n", + " def f(self):\n", + " return 0\n", + " \n", + "@ray.remote\n", + "class B(object):\n", + " def __init__(self, x, y):\n", + " self.a = A.remote()\n", + " \n", + " def f(self):\n", + " return ray.get(self.a.f.remote())\n", + "\n", + "b = B.remote(3, y=5)\n", + "\n", + "try:\n", + " ray.get(b.f.remote(), timeout=2)\n", + "except ray.exceptions.RayTimeoutError:\n", + " print(\"Session hangs because actor A cannot be created. \")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Inspect local memory usage\n", + "\n", + "The dashboard shows the following informaiton of local memory usage:\n", + "- Number of object ids in scope\n", + "- Number of local objects\n", + "- Used Object Memory\n", + " \n", + "In the example below, all objects (strings) are stored in local object memory. Used local object memory increases as the remote function `g` is repeatedly called. \n", + "\n", + "![title](img/local-memory-usage.png)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "@ray.remote\n", + "def g():\n", + " return \"hello world!\"\n", + "\n", + "@ray.remote\n", + "class A(object):\n", + " def f(self):\n", + " object_ids = []\n", + " for idx in range(50):\n", + " ray.show_in_webui(\"Loop index = {}...\".format(idx))\n", + " object_ids.append(g.remote())\n", + " time.sleep(0.5)\n", + "\n", + "a = A.remote()\n", + "_ = a.f.remote()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Inspect node memory usage\n", + "\n", + "Different from above example, used local object memory will alwasy be zero here because all objects (strings) are stored on the node. \n", + "\n", + "![title](img/node-memory-usage.png)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "@ray.remote\n", + "class C(object):\n", + " def __init__(self):\n", + " self.object_ids = []\n", + " \n", + " def push(self):\n", + " object_id = ray.put(\"test\")\n", + " self.object_ids.append(object_id)\n", + " time.sleep(1)\n", + " return object_id\n", + " \n", + " def clean_memory(self):\n", + " del self.object_ids\n", + " \n", + "@ray.remote\n", + "class D(object):\n", + " def __init__(self):\n", + " self.object_ids = []\n", + " \n", + " def fetch(self):\n", + " c = C.remote()\n", + " \n", + " for idx in range(20):\n", + " ray.show_in_webui(\"Loop index = {}...\".format(idx))\n", + " time.sleep(0.5)\n", + " object_id = ray.get(c.push.remote())\n", + " self.object_ids.append(object_id) \n", + "\n", + " def clean_memory(self):\n", + " del self.object_ids\n", + " \n", + "d = D.remote()\n", + "_ = d.fetch.remote()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following command clears out the number of object ids in scope for actor `d`, as all object ids become out of scope after `self.object_ids` is deleted. The field `NumObjectIdInScope` will be set to 0 on the dashboard. " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "_ = d.clean_memory.remote()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Profile python program with py-spy\n", + "\n", + "Clicking the `profling` button on the dashboard launches `py-spy` that times your python program. The timing information will be visualized as flamegraph in a new browser tab. \n", + "\n", + "Checkout the example Learning to play Pong on ray documentation: https://ray.readthedocs.io/en/latest/auto_examples/plot_pong_example.html\n", + "\n", + "Click `profiling`, and click `Profiling result` when it is ready. Note that there could be multiple threads in the process and some are ray internal threads and the timing information may not be so interesting. Click the left and right arrow on the middle top to see profiling results on different threads.\n", + "\n", + "Now you can intuitively see where could be the computation bottleneck. More information on how to interpret the flamegraph is available at https://github.com/jlfwong/speedscope#usage.\n", + "\n", + "![title](img/profiling.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example\n", + "\n", + "The logical view of the dashboard allows users to track the progress (training accuracy, constructor configuration, memory usage, etc.) of all parallel actors." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 1: Monitor distributed actors\n", + "\n", + "Reference: github issue #3609, @EricSteinberger\n", + "\n", + "In this example, the dashboard displays parallel actors and report the number of tasks each actor has completed, the name of the task currently executed and the number of tasks pending on the queue. Click `collapse` if an actor spawns too many child actors. \n", + "\n", + "![title](img/example-1.png)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test: num_actors = 4, time = 14.119410037994385\n" + ] + } + ], + "source": [ + "import torch\n", + "\n", + "\n", + "class NeuralNet(torch.nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.l = torch.nn.Linear(1000, 2048)\n", + " self.l2 = torch.nn.Linear(2048, 2)\n", + "\n", + " def forward(self, x):\n", + " return self.l2(self.l(x))\n", + "\n", + "\n", + "@ray.remote(num_cpus=1)\n", + "class TestActor:\n", + " def __init__(self):\n", + " self.net = NeuralNet()\n", + "\n", + " def test(self, batch_size):\n", + " p = self.net(torch.rand((batch_size, 1000),))\n", + " \n", + "def test(num_actors):\n", + " t = time.time()\n", + " actors = [TestActor.remote() for _ in range(num_actors)]\n", + "\n", + " t = time.time()\n", + " for _ in range(5000//num_actors):\n", + " ray.get([actor.test.remote(128) for actor in actors])\n", + " \n", + " print(f\"Test: num_actors = {num_actors}, time = {time.time() - t}\")\n", + "\n", + "test(num_actors=4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 2: Distributed network training" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Reference: github issue #6633, @JaeLiiin\n", + "\n", + "![title](img/example-2.png)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "%%capture\n", + "\n", + "from tensorflow.keras import layers\n", + "import json\n", + "\n", + "\n", + "def create_keras_model():\n", + " import tensorflow as tf\n", + " model = tf.keras.Sequential()\n", + " # Adds a densely-connected layer with 64 units to the model:\n", + " model.add(layers.Dense(64, activation=\"relu\", input_shape=(32, )))\n", + " # Add another:\n", + " model.add(layers.Dense(64, activation=\"relu\"))\n", + " # Add a softmax layer with 10 output units:\n", + " model.add(layers.Dense(10, activation=\"softmax\"))\n", + "\n", + " model.compile(\n", + " optimizer=tf.keras.optimizers.RMSprop(0.01),\n", + " loss=tf.keras.losses.categorical_crossentropy,\n", + " metrics=[tf.keras.metrics.categorical_accuracy])\n", + " return model\n", + "\n", + "\n", + "def random_one_hot_labels(shape):\n", + " n, n_class = shape\n", + " classes = np.random.randint(0, n_class, n)\n", + " labels = np.zeros((n, n_class))\n", + " labels[np.arange(n), classes] = 1\n", + " return labels\n", + "\n", + "\n", + "@ray.remote\n", + "class Network(object):\n", + " def __init__(self):\n", + " self.model = create_keras_model()\n", + " self.dataset = np.random.random((1000, 32))\n", + " self.labels = random_one_hot_labels((1000, 10))\n", + "\n", + " def train(self):\n", + " history = self.model.fit(self.dataset, self.labels, verbose=False)\n", + " time.sleep(0.5)\n", + " ray.show_in_webui(repr(history.history))\n", + " return history.history\n", + "\n", + " def get_weights(self):\n", + " return self.model.get_weights()\n", + "\n", + " def set_weights(self, weights):\n", + " # Note that for simplicity this does not handle the optimizer state.\n", + " self.model.set_weights(weights)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "\n", + "result_object_ids = []\n", + "result2_object_ids = []\n", + "\n", + "NetworkActor = Network.remote()\n", + "NetworkActor2 = Network.remote()\n", + "\n", + "for itr in range(20):\n", + " weights = ray.get(\n", + " [NetworkActor.get_weights.remote(),\n", + " NetworkActor2.get_weights.remote()])\n", + "\n", + " averaged_weights = [(layer1 + layer2) / 2\n", + " for layer1, layer2 in zip(weights[0], weights[1])]\n", + "\n", + " weight_id = ray.put(averaged_weights)\n", + " [\n", + " actor.set_weights.remote(weight_id)\n", + " for actor in [NetworkActor, NetworkActor2]\n", + " ]\n", + " result_object_ids.append(NetworkActor.train.remote())\n", + " result2_object_ids.append(NetworkActor2.train.remote())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example 3: monitor MNIST training with tune\n", + "- Actor construction which exposes parameter configuration\n", + "- Task execution\n", + " - Number of tasks executed\n", + " - Function descriptor of currently executed task\n", + " - Number of pending tasks listed on the task queue\n", + "- Training accuracy shown as actor message\n", + "\n", + "![title](img/example-3.png)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2020-01-24 15:43:27,856\tINFO function_runner.py:250 -- tune.track signature detected.\n", + "2020-01-24 15:43:27,869\tERROR logger.py:328 -- pip install 'ray[tune]' to see TensorBoard files.\n", + "2020-01-24 15:43:27,869\tWARNING logger.py:417 -- Could not instantiate TBXLogger: No module named 'tensorboardX'.\n" + ] + }, + { + "data": { + "text/html": [ + "== Status ==
Memory usage on this node: 10.1/16.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 1/16 CPUs, 0/0 GPUs, 0.0/3.71 GiB heap, 0.0/1.27 GiB objects
Result logdir: /Users/yunzhi/ray_results/train_mnist
Number of trials: 3 (1 RUNNING, 2 PENDING)
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Trial name status loc lr
train_mnist_51d5e2c2RUNNING
train_mnist_51d60950PENDING
train_mnist_51d627e6PENDING


" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2020-01-24 15:43:27,892\tERROR logger.py:328 -- pip install 'ray[tune]' to see TensorBoard files.\n", + "2020-01-24 15:43:27,893\tWARNING logger.py:417 -- Could not instantiate TBXLogger: No module named 'tensorboardX'.\n", + "2020-01-24 15:43:27,913\tERROR logger.py:328 -- pip install 'ray[tune]' to see TensorBoard files.\n", + "2020-01-24 15:43:27,914\tWARNING logger.py:417 -- Could not instantiate TBXLogger: No module named 'tensorboardX'.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result for train_mnist_51d60950:\n", + " date: 2020-01-24_15-43-31\n", + " done: false\n", + " experiment_id: 430041a749c3485e89b5a16dbcae3c66\n", + " experiment_tag: 1_lr=0.01\n", + " hostname: Yunzhis-MacBook-Pro.local\n", + " iterations_since_restore: 1\n", + " mean_accuracy: 0.23125\n", + " node_ip: 192.168.1.27\n", + " pid: 12308\n", + " time_since_restore: 0.363037109375\n", + " time_this_iter_s: 0.363037109375\n", + " time_total_s: 0.363037109375\n", + " timestamp: 1579909411\n", + " timesteps_since_restore: 0\n", + " training_iteration: 0\n", + " trial_id: 51d60950\n", + " \n", + "Result for train_mnist_51d5e2c2:\n", + " date: 2020-01-24_15-43-31\n", + " done: false\n", + " experiment_id: 5e6369a0220c49359d02f5b5a19f21b8\n", + " experiment_tag: 0_lr=0.001\n", + " hostname: Yunzhis-MacBook-Pro.local\n", + " iterations_since_restore: 1\n", + " mean_accuracy: 0.065625\n", + " node_ip: 192.168.1.27\n", + " pid: 12309\n", + " time_since_restore: 0.46709609031677246\n", + " time_this_iter_s: 0.46709609031677246\n", + " time_total_s: 0.46709609031677246\n", + " timestamp: 1579909411\n", + " timesteps_since_restore: 0\n", + " training_iteration: 0\n", + " trial_id: 51d5e2c2\n", + " \n", + "Result for train_mnist_51d627e6:\n", + " date: 2020-01-24_15-43-31\n", + " done: false\n", + " experiment_id: 77705b95fef6468ea13c986602e7a71c\n", + " experiment_tag: 2_lr=0.1\n", + " hostname: Yunzhis-MacBook-Pro.local\n", + " iterations_since_restore: 1\n", + " mean_accuracy: 0.503125\n", + " node_ip: 192.168.1.27\n", + " pid: 12307\n", + " time_since_restore: 0.512376070022583\n", + " time_this_iter_s: 0.512376070022583\n", + " time_total_s: 0.512376070022583\n", + " timestamp: 1579909411\n", + " timesteps_since_restore: 0\n", + " training_iteration: 0\n", + " trial_id: 51d627e6\n", + " \n" + ] + }, + { + "data": { + "text/html": [ + "== Status ==
Memory usage on this node: 10.4/16.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 3/16 CPUs, 0/0 GPUs, 0.0/3.71 GiB heap, 0.0/1.27 GiB objects
Result logdir: /Users/yunzhi/ray_results/train_mnist
Number of trials: 3 (3 RUNNING)
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Trial name status loc lr iter total time (s) acc
train_mnist_51d5e2c2RUNNING 192.168.1.27:123090.001 6 1.969410.05
train_mnist_51d60950RUNNING 192.168.1.27:123080.01 7 2.123120.775
train_mnist_51d627e6RUNNING 192.168.1.27:123070.1 6 2.013970.8375


" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result for train_mnist_51d60950:\n", + " date: 2020-01-24_15-43-36\n", + " done: false\n", + " experiment_id: 430041a749c3485e89b5a16dbcae3c66\n", + " experiment_tag: 1_lr=0.01\n", + " hostname: Yunzhis-MacBook-Pro.local\n", + " iterations_since_restore: 23\n", + " mean_accuracy: 0.85625\n", + " node_ip: 192.168.1.27\n", + " pid: 12308\n", + " time_since_restore: 5.452777147293091\n", + " time_this_iter_s: 0.21289515495300293\n", + " time_total_s: 5.452777147293091\n", + " timestamp: 1579909416\n", + " timesteps_since_restore: 0\n", + " training_iteration: 22\n", + " trial_id: 51d60950\n", + " \n", + "Result for train_mnist_51d5e2c2:\n", + " date: 2020-01-24_15-43-36\n", + " done: false\n", + " experiment_id: 5e6369a0220c49359d02f5b5a19f21b8\n", + " experiment_tag: 0_lr=0.001\n", + " hostname: Yunzhis-MacBook-Pro.local\n", + " iterations_since_restore: 23\n", + " mean_accuracy: 0.128125\n", + " node_ip: 192.168.1.27\n", + " pid: 12309\n", + " time_since_restore: 5.5144970417022705\n", + " time_this_iter_s: 0.21251416206359863\n", + " time_total_s: 5.5144970417022705\n", + " timestamp: 1579909416\n", + " timesteps_since_restore: 0\n", + " training_iteration: 22\n", + " trial_id: 51d5e2c2\n", + " \n", + "Result for train_mnist_51d627e6:\n", + " date: 2020-01-24_15-43-36\n", + " done: false\n", + " experiment_id: 77705b95fef6468ea13c986602e7a71c\n", + " experiment_tag: 2_lr=0.1\n", + " hostname: Yunzhis-MacBook-Pro.local\n", + " iterations_since_restore: 23\n", + " mean_accuracy: 0.90625\n", + " node_ip: 192.168.1.27\n", + " pid: 12307\n", + " time_since_restore: 5.559006929397583\n", + " time_this_iter_s: 0.2171010971069336\n", + " time_total_s: 5.559006929397583\n", + " timestamp: 1579909416\n", + " timesteps_since_restore: 0\n", + " training_iteration: 22\n", + " trial_id: 51d627e6\n", + " \n" + ] + }, + { + "data": { + "text/html": [ + "== Status ==
Memory usage on this node: 10.3/16.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 3/16 CPUs, 0/0 GPUs, 0.0/3.71 GiB heap, 0.0/1.27 GiB objects
Result logdir: /Users/yunzhi/ray_results/train_mnist
Number of trials: 3 (3 RUNNING)
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Trial name status loc lr iter total time (s) acc
train_mnist_51d5e2c2RUNNING 192.168.1.27:123090.001 29 7.058380.125
train_mnist_51d60950RUNNING 192.168.1.27:123080.01 30 7.220340.890625
train_mnist_51d627e6RUNNING 192.168.1.27:123070.1 29 7.100890.884375


" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result for train_mnist_51d60950:\n", + " date: 2020-01-24_15-43-41\n", + " done: false\n", + " experiment_id: 430041a749c3485e89b5a16dbcae3c66\n", + " experiment_tag: 1_lr=0.01\n", + " hostname: Yunzhis-MacBook-Pro.local\n", + " iterations_since_restore: 45\n", + " mean_accuracy: 0.86875\n", + " node_ip: 192.168.1.27\n", + " pid: 12308\n", + " time_since_restore: 10.705771207809448\n", + " time_this_iter_s: 0.3253171443939209\n", + " time_total_s: 10.705771207809448\n", + " timestamp: 1579909421\n", + " timesteps_since_restore: 0\n", + " training_iteration: 44\n", + " trial_id: 51d60950\n", + " \n", + "Result for train_mnist_51d5e2c2:\n", + " date: 2020-01-24_15-43-41\n", + " done: false\n", + " experiment_id: 5e6369a0220c49359d02f5b5a19f21b8\n", + " experiment_tag: 0_lr=0.001\n", + " hostname: Yunzhis-MacBook-Pro.local\n", + " iterations_since_restore: 45\n", + " mean_accuracy: 0.24375\n", + " node_ip: 192.168.1.27\n", + " pid: 12309\n", + " time_since_restore: 10.82926321029663\n", + " time_this_iter_s: 0.33666110038757324\n", + " time_total_s: 10.82926321029663\n", + " timestamp: 1579909421\n", + " timesteps_since_restore: 0\n", + " training_iteration: 44\n", + " trial_id: 51d5e2c2\n", + " \n", + "Result for train_mnist_51d627e6:\n", + " date: 2020-01-24_15-43-41\n", + " done: false\n", + " experiment_id: 77705b95fef6468ea13c986602e7a71c\n", + " experiment_tag: 2_lr=0.1\n", + " hostname: Yunzhis-MacBook-Pro.local\n", + " iterations_since_restore: 45\n", + " mean_accuracy: 0.928125\n", + " node_ip: 192.168.1.27\n", + " pid: 12307\n", + " time_since_restore: 10.86169981956482\n", + " time_this_iter_s: 0.3371458053588867\n", + " time_total_s: 10.86169981956482\n", + " timestamp: 1579909421\n", + " timesteps_since_restore: 0\n", + " training_iteration: 44\n", + " trial_id: 51d627e6\n", + " \n" + ] + }, + { + "data": { + "text/html": [ + "== Status ==
Memory usage on this node: 10.4/16.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 3/16 CPUs, 0/0 GPUs, 0.0/3.71 GiB heap, 0.0/1.27 GiB objects
Result logdir: /Users/yunzhi/ray_results/train_mnist
Number of trials: 3 (3 RUNNING)
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Trial name status loc lr iter total time (s) acc
train_mnist_51d5e2c2RUNNING 192.168.1.27:123090.001 49 12.17710.25
train_mnist_51d60950RUNNING 192.168.1.27:123080.01 50 12.32490.909375
train_mnist_51d627e6RUNNING 192.168.1.27:123070.1 49 12.19760.9125


" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result for train_mnist_51d60950:\n", + " date: 2020-01-24_15-43-46\n", + " done: false\n", + " experiment_id: 430041a749c3485e89b5a16dbcae3c66\n", + " experiment_tag: 1_lr=0.01\n", + " hostname: Yunzhis-MacBook-Pro.local\n", + " iterations_since_restore: 64\n", + " mean_accuracy: 0.89375\n", + " node_ip: 192.168.1.27\n", + " pid: 12308\n", + " time_since_restore: 15.811901092529297\n", + " time_this_iter_s: 0.2762620449066162\n", + " time_total_s: 15.811901092529297\n", + " timestamp: 1579909426\n", + " timesteps_since_restore: 0\n", + " training_iteration: 63\n", + " trial_id: 51d60950\n", + " \n", + "Result for train_mnist_51d5e2c2:\n", + " date: 2020-01-24_15-43-46\n", + " done: false\n", + " experiment_id: 5e6369a0220c49359d02f5b5a19f21b8\n", + " experiment_tag: 0_lr=0.001\n", + " hostname: Yunzhis-MacBook-Pro.local\n", + " iterations_since_restore: 64\n", + " mean_accuracy: 0.340625\n", + " node_ip: 192.168.1.27\n", + " pid: 12309\n", + " time_since_restore: 15.890575170516968\n", + " time_this_iter_s: 0.2264111042022705\n", + " time_total_s: 15.890575170516968\n", + " timestamp: 1579909426\n", + " timesteps_since_restore: 0\n", + " training_iteration: 63\n", + " trial_id: 51d5e2c2\n", + " \n", + "Result for train_mnist_51d627e6:\n", + " date: 2020-01-24_15-43-46\n", + " done: false\n", + " experiment_id: 77705b95fef6468ea13c986602e7a71c\n", + " experiment_tag: 2_lr=0.1\n", + " hostname: Yunzhis-MacBook-Pro.local\n", + " iterations_since_restore: 64\n", + " mean_accuracy: 0.928125\n", + " node_ip: 192.168.1.27\n", + " pid: 12307\n", + " time_since_restore: 15.901074886322021\n", + " time_this_iter_s: 0.22142696380615234\n", + " time_total_s: 15.901074886322021\n", + " timestamp: 1579909426\n", + " timesteps_since_restore: 0\n", + " training_iteration: 63\n", + " trial_id: 51d627e6\n", + " \n" + ] + }, + { + "data": { + "text/html": [ + "== Status ==
Memory usage on this node: 10.3/16.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 3/16 CPUs, 0/0 GPUs, 0.0/3.71 GiB heap, 0.0/1.27 GiB objects
Result logdir: /Users/yunzhi/ray_results/train_mnist
Number of trials: 3 (3 RUNNING)
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Trial name status loc lr iter total time (s) acc
train_mnist_51d5e2c2RUNNING 192.168.1.27:123090.001 69 17.29650.365625
train_mnist_51d60950RUNNING 192.168.1.27:123080.01 70 17.45630.8875
train_mnist_51d627e6RUNNING 192.168.1.27:123070.1 69 17.30680.921875


" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result for train_mnist_51d60950:\n", + " date: 2020-01-24_15-43-51\n", + " done: false\n", + " experiment_id: 430041a749c3485e89b5a16dbcae3c66\n", + " experiment_tag: 1_lr=0.01\n", + " hostname: Yunzhis-MacBook-Pro.local\n", + " iterations_since_restore: 86\n", + " mean_accuracy: 0.89375\n", + " node_ip: 192.168.1.27\n", + " pid: 12308\n", + " time_since_restore: 20.95795702934265\n", + " time_this_iter_s: 0.21814513206481934\n", + " time_total_s: 20.95795702934265\n", + " timestamp: 1579909431\n", + " timesteps_since_restore: 0\n", + " training_iteration: 85\n", + " trial_id: 51d60950\n", + " \n", + "Result for train_mnist_51d5e2c2:\n", + " date: 2020-01-24_15-43-51\n", + " done: false\n", + " experiment_id: 5e6369a0220c49359d02f5b5a19f21b8\n", + " experiment_tag: 0_lr=0.001\n", + " hostname: Yunzhis-MacBook-Pro.local\n", + " iterations_since_restore: 86\n", + " mean_accuracy: 0.496875\n", + " node_ip: 192.168.1.27\n", + " pid: 12309\n", + " time_since_restore: 21.006662845611572\n", + " time_this_iter_s: 0.22823190689086914\n", + " time_total_s: 21.006662845611572\n", + " timestamp: 1579909431\n", + " timesteps_since_restore: 0\n", + " training_iteration: 85\n", + " trial_id: 51d5e2c2\n", + " \n", + "Result for train_mnist_51d627e6:\n", + " date: 2020-01-24_15-43-51\n", + " done: false\n", + " experiment_id: 77705b95fef6468ea13c986602e7a71c\n", + " experiment_tag: 2_lr=0.1\n", + " hostname: Yunzhis-MacBook-Pro.local\n", + " iterations_since_restore: 86\n", + " mean_accuracy: 0.940625\n", + " node_ip: 192.168.1.27\n", + " pid: 12307\n", + " time_since_restore: 21.0240797996521\n", + " time_this_iter_s: 0.23275399208068848\n", + " time_total_s: 21.0240797996521\n", + " timestamp: 1579909431\n", + " timesteps_since_restore: 0\n", + " training_iteration: 85\n", + " trial_id: 51d627e6\n", + " \n" + ] + }, + { + "data": { + "text/html": [ + "== Status ==
Memory usage on this node: 10.2/16.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 3/16 CPUs, 0/0 GPUs, 0.0/3.71 GiB heap, 0.0/1.27 GiB objects
Result logdir: /Users/yunzhi/ray_results/train_mnist
Number of trials: 3 (3 RUNNING)
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Trial name status loc lr iter total time (s) acc
train_mnist_51d5e2c2RUNNING 192.168.1.27:123090.001 91 22.32220.5125
train_mnist_51d60950RUNNING 192.168.1.27:123080.01 92 22.48960.884375
train_mnist_51d627e6RUNNING 192.168.1.27:123070.1 91 22.33470.946875


" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "== Status ==
Memory usage on this node: 10.2/16.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/3.71 GiB heap, 0.0/1.27 GiB objects
Result logdir: /Users/yunzhi/ray_results/train_mnist
Number of trials: 3 (3 TERMINATED)
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Trial name status loc lr iter total time (s) acc
train_mnist_51d5e2c2TERMINATED 0.001 99 24.13950.5625
train_mnist_51d60950TERMINATED 0.01 99 24.054 0.89375
train_mnist_51d627e6TERMINATED 0.1 99 24.11020.95625


" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2020-01-24 15:43:54,943\tINFO tune.py:330 -- Returning an analysis object by default. You can call `analysis.trials` to retrieve a list of trials. This message will be removed in future versions of Tune.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best config: {'lr': 0.1}\n" + ] + } + ], + "source": [ + "import torch.optim as optim\n", + "from ray import tune\n", + "from ray.tune.examples.mnist_pytorch import get_data_loaders, ConvNet, train, test\n", + "\n", + "\n", + "def train_mnist(config):\n", + " train_loader, test_loader = get_data_loaders()\n", + " model = ConvNet()\n", + " optimizer = optim.SGD(model.parameters(), lr=config[\"lr\"])\n", + " for i in range(100):\n", + " train(model, optimizer, train_loader)\n", + " acc = test(model, test_loader)\n", + " ray.show_in_webui(str(acc))\n", + " tune.track.log(mean_accuracy=acc)\n", + "\n", + "\n", + "analysis = tune.run(\n", + " train_mnist, config={\"lr\": tune.grid_search([0.001, 0.01, 0.1])})\n", + "\n", + "print(\"Best config: \", analysis.get_best_config(metric=\"mean_accuracy\"))\n", + "\n", + "# Get a dataframe for analyzing trial results.\n", + "df = analysis.dataframe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "More examples are available on https://ray.readthedocs.io/en/latest/auto_examples/overview.html.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/img/example-1.png b/examples/img/example-1.png new file mode 100644 index 0000000..a5fb195 Binary files /dev/null and b/examples/img/example-1.png differ diff --git a/examples/img/example-2.png b/examples/img/example-2.png new file mode 100644 index 0000000..ee7c55a Binary files /dev/null and b/examples/img/example-2.png differ diff --git a/examples/img/example-3.png b/examples/img/example-3.png new file mode 100644 index 0000000..f2d237f Binary files /dev/null and b/examples/img/example-3.png differ diff --git a/examples/img/infeasible-task.png b/examples/img/infeasible-task.png new file mode 100644 index 0000000..b481918 Binary files /dev/null and b/examples/img/infeasible-task.png differ diff --git a/examples/img/local-memory-usage.png b/examples/img/local-memory-usage.png new file mode 100644 index 0000000..5137927 Binary files /dev/null and b/examples/img/local-memory-usage.png differ diff --git a/examples/img/node-memory-usage.png b/examples/img/node-memory-usage.png new file mode 100644 index 0000000..8501565 Binary files /dev/null and b/examples/img/node-memory-usage.png differ diff --git a/examples/img/profiling.png b/examples/img/profiling.png new file mode 100644 index 0000000..db842d4 Binary files /dev/null and b/examples/img/profiling.png differ diff --git a/examples/news_recommendation_model.ipynb b/examples/news_recommendation_model.ipynb index f9266f0..e44ff6e 100644 --- a/examples/news_recommendation_model.ipynb +++ b/examples/news_recommendation_model.ipynb @@ -20,9 +20,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2020-01-02 12:16:41-- https://s3-us-west-2.amazonaws.com/ray-tutorials/hackernews.zip\n", + "Resolving s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)... 52.218.128.120\n", + "Connecting to s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)|52.218.128.120|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 56402193 (54M) [application/zip]\n", + "Saving to: ‘hackernews.zip’\n", + "\n", + "hackernews.zip 100%[===================>] 53.79M 25.7MB/s in 2.1s \n", + "\n", + "2020-01-02 12:16:44 (25.7 MB/s) - ‘hackernews.zip’ saved [56402193/56402193]\n", + "\n", + "Archive: hackernews.zip\n", + " inflating: submission-1.json \n", + " inflating: submission-2.json \n", + " inflating: submission-3.json \n", + " inflating: submission-4.json \n", + "{\"body\": {\"descendants\": 0, \"url\": \"http://markpincus.blogspot.com/2005/03/peopleweb-i-believe-we-are-close-to.html\", \"text\": \"\", \"title\": \"The PeopleWeb | Mark Pincus Blog (March 2005)\", \"by\": \"sayemm\", \"score\": 3, \"time\": 1286515576, \"type\": \"story\", \"id\": 1770734}, \"source\": \"firebase\", \"id\": 1770734, \"retrieved_at_ts\": 1436469924}\n", + "{\"body\": {\"descendants\": 0, \"url\": \"http://omergertel.com/2010/11/16/honing-my-craft/\", \"text\": \"\", \"title\": \"Computer science and programming are two separate things\", \"by\": \"omergertel\", \"score\": 1, \"time\": 1289946709, \"type\": \"story\", \"id\": 1911996}, \"source\": \"firebase\", \"id\": 1911996, \"retrieved_at_ts\": 1436484897}\n" + ] + } + ], "source": [ "!wget -nc https://s3-us-west-2.amazonaws.com/ray-tutorials/hackernews.zip\n", "!unzip -o hackernews.zip\n", @@ -31,9 +56,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "env: RAY_DASHBOARD_DEBUG=True\n" + ] + } + ], "source": [ "from __future__ import absolute_import\n", "from __future__ import division\n", @@ -43,16 +76,51 @@ "import numpy as np\n", "import pandas as pd\n", "import ray\n", - "import time" + "import time\n", + "\n", + "%env RAY_DASHBOARD_DEBUG = True" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ - "ray.init(num_cpus=4, include_webui=False, ignore_reinit_error=True)" + "ray.shutdown()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2020-01-02 12:22:17,658\tINFO resource_spec.py:216 -- Starting Ray with 3.32 GiB memory available for workers and up to 1.66 GiB for objects. You can adjust these settings with ray.init(memory=, object_store_memory=).\n", + "2020-01-02 12:22:17,900\tINFO services.py:1101 -- View the Ray dashboard at \u001b[1m\u001b[32mlocalhost:8267\u001b[39m\u001b[22m.\n" + ] + }, + { + "data": { + "text/plain": [ + "{'node_ip_address': '10.1.10.91',\n", + " 'redis_address': '10.1.10.91:61548',\n", + " 'object_store_address': '/tmp/ray/session_2020-01-02_12-22-17_649165_4496/sockets/plasma_store',\n", + " 'raylet_socket_name': '/tmp/ray/session_2020-01-02_12-22-17_649165_4496/sockets/raylet',\n", + " 'webui_url': 'localhost:8267',\n", + " 'session_dir': '/tmp/ray/session_2020-01-02_12-22-17_649165_4496'}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ray.init(num_cpus=4, include_webui=True, ignore_reinit_error=True)" ] }, { @@ -64,7 +132,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -86,9 +154,85 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Took 2.8535571098327637 seconds to parse the hackernews submissions\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datascore
0The PeopleWeb | Mark Pincus Blog (March 2005)3
1Computer science and programming are two separ...1
2Don't Go It Alone: Create an Advisory Board1
3Wikileaks Secret Dreams1
4MakeMyTrip.com: Is eCommerce in India Finall...1
\n", + "
" + ], + "text/plain": [ + " data score\n", + "0 The PeopleWeb | Mark Pincus Blog (March 2005) 3\n", + "1 Computer science and programming are two separ... 1\n", + "2 Don't Go It Alone: Create an Advisory Board 1\n", + "3 Wikileaks Secret Dreams 1\n", + "4 MakeMyTrip.com: Is eCommerce in India Finall... 1" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "start_time = time.time()\n", "\n", @@ -121,16 +265,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "1.0" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[\"score\"].median()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -157,7 +312,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -174,9 +329,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/stochastic_gradient.py:561: ConvergenceWarning: Maximum number of iteration reached before convergence. Consider increasing max_iter to improve the fit.\n", + " ConvergenceWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy on the training set is 0.586221875\n" + ] + } + ], "source": [ "from sklearn.pipeline import Pipeline\n", "from sklearn.feature_extraction.text import CountVectorizer\n", @@ -196,9 +367,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy on the test set is 0.578675\n" + ] + } + ], "source": [ "predicted = pipeline.predict(test.data)\n", "print(\"Accuracy on the test set is {}\".format(np.mean(predicted == test.target)))" @@ -213,9 +392,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([ True, False])" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pipeline.predict([\"Iconic consoles of the IBM System/360 mainframes, 55 years old today\",\n", " \"Are Banned Drugs in Your Meat?\"])" @@ -238,7 +428,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -260,7 +450,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -268,7 +458,12 @@ "test_id = ray.put(test)\n", "\n", "def train_func(config, reporter):\n", - " pipeline = # TODO: Put in the training pipeline here\n", + " pipeline = Pipeline([\n", + " (\"vect\", CountVectorizer()),\n", + " (\"clf\", SGDClassifier(loss=\"hinge\", penalty=\"l2\",\n", + " alpha=config[\"alpha\"],\n", + " max_iter=5, tol=1e-3,\n", + " warm_start=True))]) # TODO: Put in the training pipeline here\n", " train = ray.get(train_id)\n", " test = ray.get(test_id)\n", " for i in range(5):\n", @@ -289,9 +484,385 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 26, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2020-01-02 12:23:13,171\tWARNING logger.py:413 -- Could not instantiate tf2_compat_logger: No module named 'tensorflow'.\n" + ] + }, + { + "data": { + "text/html": [ + "== Status ==
Memory usage on this node: 11.4/16.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 1/4 CPUs, 0/0 GPUs, 0.0/3.32 GiB heap, 0.0/1.12 GiB objects
Result logdir: /Users/yunzhi/ray_results/news_recommendation
Number of trials: 4 (1 RUNNING, 3 PENDING)
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Trial name status loc alpha
train_func_b36a39c4RUNNING
train_func_b36dfc58PENDING
train_func_b36e24d0PENDING
train_func_b36e4bfePENDING


" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2020-01-02 12:23:13,187\tWARNING logger.py:413 -- Could not instantiate tf2_compat_logger: No module named 'tensorflow'.\n", + "2020-01-02 12:23:13,198\tWARNING logger.py:413 -- Could not instantiate tf2_compat_logger: No module named 'tensorflow'.\n", + "2020-01-02 12:23:13,208\tWARNING logger.py:413 -- Could not instantiate tf2_compat_logger: No module named 'tensorflow'.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result for train_func_b36e24d0:\n", + " date: 2020-01-02_12-23-18\n", + " done: false\n", + " experiment_id: 0c18f4cfd3a54af0902bb2ade835f338\n", + " experiment_tag: 2_alpha=1e-05\n", + " hostname: Yunzhis-MBP.hsd1.ca.comcast.net\n", + " iterations_since_restore: 1\n", + " mean_accuracy: 0.5702625\n", + " node_ip: 10.1.10.91\n", + " pid: 4527\n", + " time_since_restore: 4.267539978027344\n", + " time_this_iter_s: 4.267539978027344\n", + " time_total_s: 4.267539978027344\n", + " timestamp: 1577996598\n", + " timesteps_since_restore: 0\n", + " training_iteration: 1\n", + " trial_id: b36e24d0\n", + " \n" + ] + }, + { + "data": { + "text/html": [ + "== Status ==
Memory usage on this node: 11.4/16.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 0/0 GPUs, 0.0/3.32 GiB heap, 0.0/1.12 GiB objects
Result logdir: /Users/yunzhi/ray_results/news_recommendation
Number of trials: 4 (4 RUNNING)
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Trial name status loc alpha iter total time (s) acc
train_func_b36a39c4RUNNING
train_func_b36dfc58RUNNING
train_func_b36e24d0RUNNING 10.1.10.91:4527 1e-05 1 4.267540.570263
train_func_b36e4bfeRUNNING


" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result for train_func_b36e4bfe:\n", + " date: 2020-01-02_12-23-18\n", + " done: false\n", + " experiment_id: 8852a9f9a054441ea0fb3d6963922800\n", + " experiment_tag: 3_alpha=1e-06\n", + " hostname: Yunzhis-MBP.hsd1.ca.comcast.net\n", + " iterations_since_restore: 1\n", + " mean_accuracy: 0.5621125\n", + " node_ip: 10.1.10.91\n", + " pid: 4525\n", + " time_since_restore: 4.303200960159302\n", + " time_this_iter_s: 4.303200960159302\n", + " time_total_s: 4.303200960159302\n", + " timestamp: 1577996598\n", + " timesteps_since_restore: 0\n", + " training_iteration: 1\n", + " trial_id: b36e4bfe\n", + " \n", + "Result for train_func_b36dfc58:\n", + " date: 2020-01-02_12-23-18\n", + " done: false\n", + " experiment_id: 12c88bd52e6047679f7a52af9aa71d02\n", + " experiment_tag: 1_alpha=0.0001\n", + " hostname: Yunzhis-MBP.hsd1.ca.comcast.net\n", + " iterations_since_restore: 1\n", + " mean_accuracy: 0.59805\n", + " node_ip: 10.1.10.91\n", + " pid: 4528\n", + " time_since_restore: 4.316836833953857\n", + " time_this_iter_s: 4.316836833953857\n", + " time_total_s: 4.316836833953857\n", + " timestamp: 1577996598\n", + " timesteps_since_restore: 0\n", + " training_iteration: 1\n", + " trial_id: b36dfc58\n", + " \n", + "Result for train_func_b36a39c4:\n", + " date: 2020-01-02_12-23-18\n", + " done: false\n", + " experiment_id: f6a7dfdfaba84177bc8dbe4e28c227c4\n", + " experiment_tag: 0_alpha=0.001\n", + " hostname: Yunzhis-MBP.hsd1.ca.comcast.net\n", + " iterations_since_restore: 1\n", + " mean_accuracy: 0.5814125\n", + " node_ip: 10.1.10.91\n", + " pid: 4526\n", + " time_since_restore: 4.329303026199341\n", + " time_this_iter_s: 4.329303026199341\n", + " time_total_s: 4.329303026199341\n", + " timestamp: 1577996598\n", + " timesteps_since_restore: 0\n", + " training_iteration: 1\n", + " trial_id: b36a39c4\n", + " \n", + "Result for train_func_b36e24d0:\n", + " date: 2020-01-02_12-23-26\n", + " done: false\n", + " experiment_id: 0c18f4cfd3a54af0902bb2ade835f338\n", + " experiment_tag: 2_alpha=1e-05\n", + " hostname: Yunzhis-MBP.hsd1.ca.comcast.net\n", + " iterations_since_restore: 3\n", + " mean_accuracy: 0.5919625\n", + " node_ip: 10.1.10.91\n", + " pid: 4527\n", + " time_since_restore: 12.236334085464478\n", + " time_this_iter_s: 3.945594072341919\n", + " time_total_s: 12.236334085464478\n", + " timestamp: 1577996606\n", + " timesteps_since_restore: 0\n", + " training_iteration: 3\n", + " trial_id: b36e24d0\n", + " \n" + ] + }, + { + "data": { + "text/html": [ + "== Status ==
Memory usage on this node: 11.2/16.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 0/0 GPUs, 0.0/3.32 GiB heap, 0.0/1.12 GiB objects
Result logdir: /Users/yunzhi/ray_results/news_recommendation
Number of trials: 4 (4 RUNNING)
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Trial name status loc alpha iter total time (s) acc
train_func_b36a39c4RUNNING 10.1.10.91:4526 0.001 2 8.396690.58005
train_func_b36dfc58RUNNING 10.1.10.91:4528 0.0001 2 8.383580.601413
train_func_b36e24d0RUNNING 10.1.10.91:4527 1e-05 3 12.2363 0.591962
train_func_b36e4bfeRUNNING 10.1.10.91:4525 1e-06 2 8.358940.568412


" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result for train_func_b36e4bfe:\n", + " date: 2020-01-02_12-23-26\n", + " done: false\n", + " experiment_id: 8852a9f9a054441ea0fb3d6963922800\n", + " experiment_tag: 3_alpha=1e-06\n", + " hostname: Yunzhis-MBP.hsd1.ca.comcast.net\n", + " iterations_since_restore: 3\n", + " mean_accuracy: 0.5668\n", + " node_ip: 10.1.10.91\n", + " pid: 4525\n", + " time_since_restore: 12.327115058898926\n", + " time_this_iter_s: 3.9681711196899414\n", + " time_total_s: 12.327115058898926\n", + " timestamp: 1577996606\n", + " timesteps_since_restore: 0\n", + " training_iteration: 3\n", + " trial_id: b36e4bfe\n", + " \n", + "Result for train_func_b36dfc58:\n", + " date: 2020-01-02_12-23-26\n", + " done: false\n", + " experiment_id: 12c88bd52e6047679f7a52af9aa71d02\n", + " experiment_tag: 1_alpha=0.0001\n", + " hostname: Yunzhis-MBP.hsd1.ca.comcast.net\n", + " iterations_since_restore: 3\n", + " mean_accuracy: 0.60135\n", + " node_ip: 10.1.10.91\n", + " pid: 4528\n", + " time_since_restore: 12.36200499534607\n", + " time_this_iter_s: 3.9784250259399414\n", + " time_total_s: 12.36200499534607\n", + " timestamp: 1577996606\n", + " timesteps_since_restore: 0\n", + " training_iteration: 3\n", + " trial_id: b36dfc58\n", + " \n", + "Result for train_func_b36a39c4:\n", + " date: 2020-01-02_12-23-26\n", + " done: false\n", + " experiment_id: f6a7dfdfaba84177bc8dbe4e28c227c4\n", + " experiment_tag: 0_alpha=0.001\n", + " hostname: Yunzhis-MBP.hsd1.ca.comcast.net\n", + " iterations_since_restore: 3\n", + " mean_accuracy: 0.578625\n", + " node_ip: 10.1.10.91\n", + " pid: 4526\n", + " time_since_restore: 12.378322124481201\n", + " time_this_iter_s: 3.981630325317383\n", + " time_total_s: 12.378322124481201\n", + " timestamp: 1577996606\n", + " timesteps_since_restore: 0\n", + " training_iteration: 3\n", + " trial_id: b36a39c4\n", + " \n", + "Result for train_func_b36e24d0:\n", + " date: 2020-01-02_12-23-34\n", + " done: false\n", + " experiment_id: 0c18f4cfd3a54af0902bb2ade835f338\n", + " experiment_tag: 2_alpha=1e-05\n", + " hostname: Yunzhis-MBP.hsd1.ca.comcast.net\n", + " iterations_since_restore: 5\n", + " mean_accuracy: 0.596325\n", + " node_ip: 10.1.10.91\n", + " pid: 4527\n", + " time_since_restore: 20.07131314277649\n", + " time_this_iter_s: 3.911412239074707\n", + " time_total_s: 20.07131314277649\n", + " timestamp: 1577996614\n", + " timesteps_since_restore: 0\n", + " training_iteration: 5\n", + " trial_id: b36e24d0\n", + " \n" + ] + }, + { + "data": { + "text/html": [ + "== Status ==
Memory usage on this node: 11.2/16.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 4/4 CPUs, 0/0 GPUs, 0.0/3.32 GiB heap, 0.0/1.12 GiB objects
Result logdir: /Users/yunzhi/ray_results/news_recommendation
Number of trials: 4 (4 RUNNING)
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Trial name status loc alpha iter total time (s) acc
train_func_b36a39c4RUNNING 10.1.10.91:4526 0.001 4 16.34480.578825
train_func_b36dfc58RUNNING 10.1.10.91:4528 0.0001 4 16.31860.609237
train_func_b36e24d0RUNNING 10.1.10.91:4527 1e-05 5 20.07130.596325
train_func_b36e4bfeRUNNING 10.1.10.91:4525 1e-06 4 16.267 0.5628


" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result for train_func_b36e4bfe:\n", + " date: 2020-01-02_12-23-34\n", + " done: false\n", + " experiment_id: 8852a9f9a054441ea0fb3d6963922800\n", + " experiment_tag: 3_alpha=1e-06\n", + " hostname: Yunzhis-MBP.hsd1.ca.comcast.net\n", + " iterations_since_restore: 5\n", + " mean_accuracy: 0.568875\n", + " node_ip: 10.1.10.91\n", + " pid: 4525\n", + " time_since_restore: 20.2430899143219\n", + " time_this_iter_s: 3.976069927215576\n", + " time_total_s: 20.2430899143219\n", + " timestamp: 1577996614\n", + " timesteps_since_restore: 0\n", + " training_iteration: 5\n", + " trial_id: b36e4bfe\n", + " \n", + "Result for train_func_b36dfc58:\n", + " date: 2020-01-02_12-23-34\n", + " done: false\n", + " experiment_id: 12c88bd52e6047679f7a52af9aa71d02\n", + " experiment_tag: 1_alpha=0.0001\n", + " hostname: Yunzhis-MBP.hsd1.ca.comcast.net\n", + " iterations_since_restore: 5\n", + " mean_accuracy: 0.6095125\n", + " node_ip: 10.1.10.91\n", + " pid: 4528\n", + " time_since_restore: 20.28740692138672\n", + " time_this_iter_s: 3.968809127807617\n", + " time_total_s: 20.28740692138672\n", + " timestamp: 1577996614\n", + " timesteps_since_restore: 0\n", + " training_iteration: 5\n", + " trial_id: b36dfc58\n", + " \n", + "Result for train_func_b36a39c4:\n", + " date: 2020-01-02_12-23-34\n", + " done: false\n", + " experiment_id: f6a7dfdfaba84177bc8dbe4e28c227c4\n", + " experiment_tag: 0_alpha=0.001\n", + " hostname: Yunzhis-MBP.hsd1.ca.comcast.net\n", + " iterations_since_restore: 5\n", + " mean_accuracy: 0.5783875\n", + " node_ip: 10.1.10.91\n", + " pid: 4526\n", + " time_since_restore: 20.350215196609497\n", + " time_this_iter_s: 4.005417346954346\n", + " time_total_s: 20.350215196609497\n", + " timestamp: 1577996614\n", + " timesteps_since_restore: 0\n", + " training_iteration: 5\n", + " trial_id: b36a39c4\n", + " \n" + ] + }, + { + "data": { + "text/html": [ + "== Status ==
Memory usage on this node: 11.1/16.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/4 CPUs, 0/0 GPUs, 0.0/3.32 GiB heap, 0.0/1.12 GiB objects
Result logdir: /Users/yunzhi/ray_results/news_recommendation
Number of trials: 4 (4 TERMINATED)
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Trial name status loc alpha iter total time (s) acc
train_func_b36a39c4TERMINATED 0.001 5 20.35020.578388
train_func_b36dfc58TERMINATED 0.0001 5 20.28740.609513
train_func_b36e24d0TERMINATED 1e-05 5 20.07130.596325
train_func_b36e4bfeTERMINATED 1e-06 5 20.24310.568875


" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2020-01-02 12:23:34,849\tINFO tune.py:334 -- Returning an analysis object by default. You can call `analysis.trials` to retrieve a list of trials. This message will be removed in future versions of Tune.\n" + ] + } + ], "source": [ "all_trials = tune.run(\n", " train_func,\n", @@ -338,7 +909,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.7.4" } }, "nbformat": 4, diff --git a/examples/sharded_parameter_server.ipynb b/examples/sharded_parameter_server.ipynb index 3785506..815e8e7 100644 --- a/examples/sharded_parameter_server.ipynb +++ b/examples/sharded_parameter_server.ipynb @@ -24,9 +24,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "env: RAY_DASHBOARD_DEBUG=True\n" + ] + } + ], "source": [ "from __future__ import absolute_import\n", "from __future__ import division\n", @@ -34,16 +42,43 @@ "\n", "import numpy as np\n", "import ray\n", - "import time" + "import time\n", + "\n", + "%env RAY_DASHBOARD_DEBUG = True" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2020-01-24 14:32:55,735\tINFO resource_spec.py:212 -- Starting Ray with 2.69 GiB memory available for workers and up to 1.36 GiB for objects. You can adjust these settings with ray.init(memory=, object_store_memory=).\n", + "2020-01-24 14:32:56,852\tINFO services.py:501 -- Failed to connect to the redis server, retrying.\n", + "2020-01-24 14:32:57,123\tINFO services.py:1093 -- View the Ray dashboard at \u001b[1m\u001b[32mlocalhost:8268\u001b[39m\u001b[22m\n" + ] + }, + { + "data": { + "text/plain": [ + "{'node_ip_address': '192.168.1.27',\n", + " 'redis_address': '192.168.1.27:16149',\n", + " 'object_store_address': '/tmp/ray/session_2020-01-24_14-32-55_727080_11529/sockets/plasma_store',\n", + " 'raylet_socket_name': '/tmp/ray/session_2020-01-24_14-32-55_727080_11529/sockets/raylet',\n", + " 'webui_url': 'localhost:8268',\n", + " 'session_dir': '/tmp/ray/session_2020-01-24_14-32-55_727080_11529'}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "ray.init(num_cpus=30, include_webui=False, ignore_reinit_error=True)" + "ray.init(num_cpus=30, include_webui=True, ignore_reinit_error=True)" ] }, { @@ -57,12 +92,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "dim = 10\n", "\n", + "@ray.remote\n", "class ParameterServer(object):\n", " def __init__(self, dim):\n", " self.parameters = np.zeros(dim)\n", @@ -74,7 +110,7 @@ " self.parameters += update\n", "\n", "\n", - "ps = ParameterServer(dim)\n", + "ps = ParameterServer.remote(dim)\n", "\n", "assert hasattr(ParameterServer, 'remote'), ('You need to turn ParameterServer into an '\n", " 'actor (by using the ray.remote keyword).')" @@ -91,18 +127,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ + "@ray.remote\n", "def worker(ps, dim, num_iters):\n", " for _ in range(num_iters):\n", " # Get the latest parameters.\n", - " parameters = ps.get_parameters()\n", + " parameters = ray.get(ps.get_parameters.remote())\n", " # Compute an update.\n", " update = 1e-3 * parameters + np.ones(dim)\n", " # Update the parameters.\n", - " ps.update_parameters(update)\n", + " ray.get(ps.update_parameters.remote(update))\n", " # Sleep a little to simulate a real workload.\n", " time.sleep(0.5)\n", "\n", @@ -112,12 +149,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# Start two workers.\n", - "worker_results = [worker(ps, dim, 100) for _ in range(2)]" + "worker_results = [worker.remote(ps, dim, 100) for _ in range(2)]" ] }, { @@ -131,9 +168,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[41.81828509 41.81828509 41.81828509 41.81828509 41.81828509 41.81828509\n", + " 41.81828509 41.81828509 41.81828509 41.81828509]\n" + ] + } + ], "source": [ "print(ray.get(ps.get_parameters.remote()))" ] @@ -155,10 +201,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ + "@ray.remote\n", "class ParameterServerShard(object):\n", " def __init__(self, sharded_dim):\n", " self.parameters = np.zeros(sharded_dim)\n", @@ -178,7 +225,7 @@ " 'perfectly divide the total dimension.')\n", "\n", "# Start some parameter servers.\n", - "ps_shards = [ParameterServerShard(total_dim // num_shards) for _ in range(num_shards)]\n", + "ps_shards = [ParameterServerShard.remote(total_dim // num_shards) for _ in range(num_shards)]\n", "\n", "assert hasattr(ParameterServerShard, 'remote'), ('You need to turn ParameterServerShard into an '\n", " 'actor (by using the ray.remote keyword).')" @@ -200,17 +247,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ + "@ray.remote\n", "def worker_task(total_dim, num_iters, *ps_shards):\n", " # Note that ps_shards are passed in using Python's variable number\n", " # of arguments feature. We do this because currently actor handles\n", " # cannot be passed to tasks inside of lists or other objects.\n", " for _ in range(num_iters):\n", " # Get the current parameters from each parameter server.\n", - " parameter_shards = [ps.get_parameters() for ps in ps_shards]\n", + " parameter_shards = ray.get([ps.get_parameters.remote() for ps in ps_shards])\n", " assert all([isinstance(shard, np.ndarray) for shard in parameter_shards]), (\n", " 'The parameter shards must be numpy arrays. Did you forget to call ray.get?')\n", " # Concatenate them to form the full parameter vector.\n", @@ -224,7 +272,7 @@ " \n", " # Apply the updates to the relevant parameter server shards.\n", " for ps, update_shard in zip(ps_shards, update_shards):\n", - " ps.update_parameters(update_shard)\n", + " ray.get(ps.update_parameters.remote(update_shard))\n", "\n", "\n", "# Test that worker_task is implemented correctly. You do not need to change this line.\n", @@ -242,18 +290,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "This took 1.3368570804595947 seconds.\n", + "This took 1.6423673629760742 seconds.\n", + "This took 3.1816182136535645 seconds.\n" + ] + }, + { + "ename": "RayTaskError", + "evalue": "\u001b[36mray::__main__.worker_task()\u001b[39m (pid=11559, ip=192.168.1.27)\n File \"python/ray/_raylet.pyx\", line 647, in ray._raylet.execute_task\n File \"\", line 22, in worker_task\nray.exceptions.RayTaskError: \u001b[36mray::ParameterServerShard\u001b[39m (pid=11551, ip=192.168.1.27)\n File \"python/ray/_raylet.pyx\", line 633, in ray._raylet.execute_task\n File \"python/ray/_raylet.pyx\", line 634, in ray._raylet.execute_task\n File \"python/ray/_raylet.pyx\", line 519, in ray._raylet.deserialize_args\nray.exceptions.UnreconstructableError: Object 2ca53902e2591031ffffffff0100008004000000 is lost (either LRU evicted or deleted by user) and cannot be reconstructed. Try increasing the object store memory available with ray.init(object_store_memory=) or setting object store limits with ray.remote(object_store_memory=). See also: https://ray.readthedocs.io/en/latest/memory-management.html", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRayTaskError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m# duration changes.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mstart\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mworker_task\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mremote\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtotal_dim\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mps_shards\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnum_workers\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'This took {} seconds.'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mstart\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/ray/python/ray/worker.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(object_ids, timeout)\u001b[0m\n\u001b[1;32m 1490\u001b[0m \u001b[0mworker\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore_worker\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdump_object_store_memory_usage\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1491\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mRayTaskError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1492\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_instanceof_cause\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1493\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1494\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mRayTaskError\u001b[0m: \u001b[36mray::__main__.worker_task()\u001b[39m (pid=11559, ip=192.168.1.27)\n File \"python/ray/_raylet.pyx\", line 647, in ray._raylet.execute_task\n File \"\", line 22, in worker_task\nray.exceptions.RayTaskError: \u001b[36mray::ParameterServerShard\u001b[39m (pid=11551, ip=192.168.1.27)\n File \"python/ray/_raylet.pyx\", line 633, in ray._raylet.execute_task\n File \"python/ray/_raylet.pyx\", line 634, in ray._raylet.execute_task\n File \"python/ray/_raylet.pyx\", line 519, in ray._raylet.deserialize_args\nray.exceptions.UnreconstructableError: Object 2ca53902e2591031ffffffff0100008004000000 is lost (either LRU evicted or deleted by user) and cannot be reconstructed. Try increasing the object store memory available with ray.init(object_store_memory=) or setting object store limits with ray.remote(object_store_memory=). See also: https://ray.readthedocs.io/en/latest/memory-management.html" + ] + } + ], "source": [ - "num_workers = 4\n", + "for num_workers in [1, 2, 4, 8]:\n", "\n", - "# Start some workers. Try changing various quantities and see how the\n", - "# duration changes.\n", - "start = time.time()\n", - "ray.get([worker_task(total_dim, 5, *ps_shards) for _ in range(num_workers)])\n", - "print('This took {} seconds.'.format(time.time() - start))" + " # Start some workers. Try changing various quantities and see how the\n", + " # duration changes.\n", + " start = time.time()\n", + " ray.get([worker_task.remote(total_dim, 5, *ps_shards) for _ in range(num_workers)])\n", + " print('This took {} seconds.'.format(time.time() - start))" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -272,7 +349,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.4" + "version": "3.7.4" } }, "nbformat": 4, diff --git a/rllib_exercises/rllib_exercise03_custom_env.ipynb b/rllib_exercises/rllib_exercise03_custom_env.ipynb index 039de0e..1c7f285 100644 --- a/rllib_exercises/rllib_exercise03_custom_env.ipynb +++ b/rllib_exercises/rllib_exercise03_custom_env.ipynb @@ -24,9 +24,58 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n", + "lz4 not available, disabling sample compression. This will significantly impact RLlib performance. To install lz4, run `pip install lz4`.\n", + "2020-01-24 14:42:09,225\tINFO resource_spec.py:212 -- Starting Ray with 4.0 GiB memory available for workers and up to 2.01 GiB for objects. You can adjust these settings with ray.init(memory=, object_store_memory=).\n", + "2020-01-24 14:42:09,552\tINFO services.py:1093 -- View the Ray dashboard at \u001b[1m\u001b[32mlocalhost:8270\u001b[39m\u001b[22m\n" + ] + }, + { + "data": { + "text/plain": [ + "{'node_ip_address': '192.168.1.27',\n", + " 'redis_address': '192.168.1.27:22733',\n", + " 'object_store_address': '/tmp/ray/session_2020-01-24_14-42-09_215023_11778/sockets/plasma_store',\n", + " 'raylet_socket_name': '/tmp/ray/session_2020-01-24_14-42-09_215023_11778/sockets/raylet',\n", + " 'webui_url': 'localhost:8270',\n", + " 'session_dir': '/tmp/ray/session_2020-01-24_14-42-09_215023_11778'}" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from __future__ import absolute_import\n", "from __future__ import division\n", @@ -73,9 +122,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Success!\n" + ] + } + ], "source": [ "action_space_map = {\n", " \"discrete_10\": spaces.Discrete(10),\n", @@ -86,9 +143,9 @@ "\n", "action_space_jumble = {\n", " \"discrete_10\": 1,\n", - " \"CHANGE_ME\": np.array([0, 0, 0, 2]),\n", - " \"CHANGE_ME\": np.array([[-1.2657754], [-1.6528835], [ 0.5982418]]),\n", - " \"CHANGE_ME\": np.array([0.89089584]),\n", + " \"multi_discrete\": np.array([0, 0, 0, 2]),\n", + " \"box_3x1\": np.array([[-1.2657754], [-1.6528835], [ 0.5982418]]),\n", + " \"box_1\": np.array([0.89089584]),\n", "}\n", "\n", "\n", @@ -143,9 +200,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing if spaces have been setup correctly...\n", + "Success! You've setup the spaces correctly.\n", + "Testing if reward has been setup correctly...\n", + "Success! You've setup the rewards correctly.\n" + ] + } + ], "source": [ "class ChainEnv(gym.Env):\n", " \n", @@ -162,8 +230,8 @@ " def _setup_spaces(self):\n", " ##############\n", " # TODO: Implement this so that it passes tests\n", - " self.action_space = None\n", - " self.observation_space = None\n", + " self.action_space = spaces.Discrete(2)\n", + " self.observation_space = spaces.Discrete(self.n)\n", " ##############\n", "\n", " def step(self, action):\n", @@ -171,18 +239,18 @@ " if action == 1: # 'backwards': go back to the beginning, get small reward\n", " ##############\n", " # TODO 2: Implement this so that it passes tests\n", - " reward = -1\n", + " reward = self.small_reward\n", " ##############\n", " self.state = 0\n", " elif self.state < self.n - 1: # 'forwards': go up along the chain\n", " ##############\n", " # TODO 2: Implement this so that it passes tests\n", - " reward = -1\n", + " reward = 0\n", " self.state += 1\n", " else: # 'forwards': stay at the end of the chain, collect large reward\n", " ##############\n", " # TODO 2: Implement this so that it passes tests\n", - " reward = -1\n", + " reward = self.large_reward\n", " ##############\n", " self._counter += 1\n", " done = self._counter >= self._horizon\n", @@ -209,7 +277,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -222,9 +290,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2020-01-24 14:45:27,438\tINFO trainer.py:377 -- Tip: set 'eager': true or the --eager flag to enable TensorFlow eager execution\n", + "2020-01-24 14:45:27,444\tERROR logger.py:328 -- pip install 'ray[tune]' to see TensorBoard files.\n", + "2020-01-24 14:45:27,444\tWARNING logger.py:417 -- Could not instantiate TBXLogger: No module named 'tensorboardX'.\n", + "2020-01-24 14:45:27,446\tINFO trainer.py:524 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.\n", + "2020-01-24 14:45:30,616\tWARNING util.py:41 -- Install gputil for GPU system monitoring.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training iteration 0...\n", + "Training iteration 1...\n", + "Training iteration 2...\n", + "Training iteration 3...\n", + "Training iteration 4...\n", + "Training iteration 5...\n", + "Training iteration 6...\n", + "Training iteration 7...\n", + "Training iteration 8...\n", + "Training iteration 9...\n", + "Training iteration 10...\n", + "Training iteration 11...\n", + "Training iteration 12...\n", + "Training iteration 13...\n", + "Training iteration 14...\n", + "Training iteration 15...\n", + "Training iteration 16...\n", + "Training iteration 17...\n", + "Training iteration 18...\n", + "Training iteration 19...\n" + ] + } + ], "source": [ "trainer = PPOTrainer(trainer_config, ChainEnv);\n", "for i in range(20):\n", @@ -234,9 +340,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cumulative reward you've received is: 40. Congratulations!\n", + "Max state you've visited is: 0. This is out of 20 states.\n" + ] + } + ], "source": [ "env = ChainEnv({})\n", "state = env.reset()\n", @@ -270,9 +385,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing if behavior has been changed...\n", + "Success! Behavior of environment is correct.\n" + ] + } + ], "source": [ "class ShapedChainEnv(ChainEnv):\n", " def step(self, action):\n", @@ -303,9 +427,58 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2020-01-24 14:46:04,206\tERROR logger.py:328 -- pip install 'ray[tune]' to see TensorBoard files.\n", + "2020-01-24 14:46:04,207\tWARNING logger.py:417 -- Could not instantiate TBXLogger: No module named 'tensorboardX'.\n", + "2020-01-24 14:46:07,247\tWARNING util.py:41 -- Install gputil for GPU system monitoring.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training iteration 0...\n", + "Training iteration 1...\n", + "Training iteration 2...\n", + "Training iteration 3...\n", + "Training iteration 4...\n", + "Training iteration 5...\n", + "Training iteration 6...\n", + "Training iteration 7...\n", + "Training iteration 8...\n", + "Training iteration 9...\n", + "Training iteration 10...\n", + "Training iteration 11...\n", + "Training iteration 12...\n", + "Training iteration 13...\n", + "Training iteration 14...\n", + "Training iteration 15...\n", + "Training iteration 16...\n", + "Training iteration 17...\n", + "Training iteration 18...\n", + "Training iteration 19...\n", + "Cumulative reward you've received is: -20!\n", + "Max state you've visited is: 3.2. This is out of 20 states.\n" + ] + }, + { + "ename": "AssertionError", + "evalue": "This policy did not traverse many states.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Cumulative reward you've received is: {}!\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcumulative_reward\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Max state you've visited is: {}. This is out of {} states.\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmax_states\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 24\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmax_states\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m0.2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"This policy did not traverse many states.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m: This policy did not traverse many states." + ] + } + ], "source": [ "trainer = PPOTrainer(trainer_config, ShapedChainEnv);\n", "for i in range(20):\n", @@ -357,7 +530,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.7.4" } }, "nbformat": 4, diff --git a/rllib_exercises/rllib_exercise04_serving.ipynb b/rllib_exercises/rllib_exercise04_serving.ipynb index ac4132c..0eec2c2 100644 --- a/rllib_exercises/rllib_exercise04_serving.ipynb +++ b/rllib_exercises/rllib_exercise04_serving.ipynb @@ -265,7 +265,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.7.4" } }, "nbformat": 4, diff --git a/solutions/colab01-03_solution.ipynb b/solutions/colab01-03_solution.ipynb index 1d8eb94..836fbe1 100644 --- a/solutions/colab01-03_solution.ipynb +++ b/solutions/colab01-03_solution.ipynb @@ -132,7 +132,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -176,7 +176,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -191,7 +191,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2019-10-13 10:06:37,698\tERROR worker.py:1432 -- Calling ray.init() again after it has already been called.\n" + "2020-01-24 14:35:16,520\tINFO resource_spec.py:212 -- Starting Ray with 3.08 GiB memory available for workers and up to 1.54 GiB for objects. You can adjust these settings with ray.init(memory=, object_store_memory=).\n", + "2020-01-24 14:35:16,888\tINFO services.py:1093 -- View the Ray dashboard at \u001b[1m\u001b[32mlocalhost:8266\u001b[39m\u001b[22m\n" ] } ], @@ -217,7 +218,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -232,7 +233,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Executing the for loop took 1.013 seconds.\n", + "Executing the for loop took 1.014 seconds.\n", "The results are: [0, 1, 2, 3]\n", "Run the next cell to check if the exercise was performed correctly.\n" ] @@ -271,7 +272,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -286,7 +287,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Success! The example took 1.0131359100341797 seconds.\n" + "Success! The example took 1.014420986175537 seconds.\n" ] } ], @@ -445,7 +446,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": 5, "metadata": { "colab": {}, "colab_type": "code", diff --git a/tune_exercises/exercise_1_basics.ipynb b/tune_exercises/exercise_1_basics.ipynb index 5ce7585..5dbf01b 100644 --- a/tune_exercises/exercise_1_basics.ipynb +++ b/tune_exercises/exercise_1_basics.ipynb @@ -76,9 +76,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n", + "/Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n" + ] + } + ], "source": [ "import numpy as np\n", "np.random.seed(0)\n", @@ -130,9 +161,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], "source": [ "from sklearn.datasets import load_iris\n", "\n", @@ -182,7 +226,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -206,7 +250,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -234,9 +278,273 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From /Users/yunzhi/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Call initializer instance with the dtype argument instead of passing it to the constructor\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "WARNING:tensorflow:Can save best model only with accuracy available, skipping.\n", + "Loss is 0.6368\n", + "Accuracy is 0.5526\n" + ] + } + ], "source": [ "original_model = train_on_iris() # This trains the model and returns it.\n", "train_x, train_y, test_x, test_y = get_iris_data()\n", @@ -258,7 +566,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -305,11 +613,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], - "source": [ - "def tune_iris(): # TODO: Change me.\n", + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "pip install 'ray[tune]' to see TensorBoard files.\n", + "Could not instantiate TBXLogger: No module named 'tensorboardX'.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test-running to make sure this function will run correctly.\n" + ] + }, + { + "ename": "AssertionError", + "evalue": "Did you set the right configuration?", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Test-running to make sure this function will run correctly.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0mtune\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrack\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# For testing purposes only.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 23\u001b[0;31m \u001b[0mtune_iris\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m\"lr\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;36m0.1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"dense_1\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"dense_2\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 24\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Success!\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36mtune_iris\u001b[0;34m(config)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mtune_iris\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# TODO: Change me.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mtrain_x\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_y\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_x\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_y\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_iris_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlearning_rate\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdense_1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdense_2\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# TODO: Change me.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m checkpoint_callback = ModelCheckpoint(\n\u001b[1;32m 5\u001b[0m \"model.h5\", monitor='loss', save_best_only=True, save_freq=2)\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36mcreate_model\u001b[0;34m(learning_rate, dense_1, dense_2)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcreate_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlearning_rate\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdense_1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdense_2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0mlearning_rate\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mdense_1\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mdense_2\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Did you set the right configuration?\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mSequential\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mDense\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdense_1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_shape\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mactivation\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'relu'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'fc1'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mDense\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdense_2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mactivation\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'relu'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'fc2'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAssertionError\u001b[0m: Did you set the right configuration?" + ] + } + ], + "source": [ + "def tune_iris(config): # TODO: Change me.\n", " train_x, train_y, test_x, test_y = get_iris_data()\n", " model = create_model(learning_rate=0, dense_1=0, dense_2=0) # TODO: Change me.\n", " checkpoint_callback = ModelCheckpoint(\n", @@ -383,15 +720,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { "scrolled": false }, - "outputs": [], + "outputs": [ + { + "ename": "AssertionError", + "evalue": "The hyperparameter space is not fully designated. It must include all of ['lr', 'dense_1', 'dense_2']", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mHP_KEYS\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"lr\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"dense_1\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"dense_2\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m assert all(key in hyperparameter_space for key in HP_KEYS), (\n\u001b[0;32m---> 10\u001b[0;31m \"The hyperparameter space is not fully designated. It must include all of {}\".format(HP_KEYS))\n\u001b[0m\u001b[1;32m 11\u001b[0m \u001b[0;31m######################################################################################################\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAssertionError\u001b[0m: The hyperparameter space is not fully designated. It must include all of ['lr', 'dense_1', 'dense_2']" + ] + } + ], "source": [ "# This seeds the hyperparameter sampling.\n", "import numpy as np; np.random.seed(5) \n", - "hyperparameter_space = {} # TODO: Fill me out.\n", + "hyperparameter_space = {\"lr\"} # TODO: Fill me out.\n", "num_samples = 1 # TODO: Fill me out.\n", "\n", "####################################################################################################\n", @@ -547,7 +896,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.7.4" } }, "nbformat": 4,