diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a132263 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.pyc +.ipynb_checkpoints/ +*.h5 +checkpoints/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..c2dc6a3 --- /dev/null +++ b/README.md @@ -0,0 +1,81 @@ +# Introduction + +This repository contains code to reproduce the experiments in [Dynamic Filter Networks](https://arxiv.org/pdf/1605.09673v2.pdf), a NIPS 2016 paper by Bert De Brabandere\*, Xu Jia\*, Tinne Tuytelaars and Luc Van Gool (\* Bert and Xu contributed equally). + +In a traditional convolutional layer, the learned filters stay fixed after training. In contrast, we introduce a new framework, the Dynamic Filter Network, where filters are generated dynamically conditioned on an input. + +Example: + +![mnist prediction](https://i.imgur.com/XbyD2ix.png) + + + +If you use our code in your research, please cite following paper: +``` +@inproceedings{debrabandere16_dfn, + author = {Bert De Brabandere and Xu Jia and Tinne Tuytelaars and Luc Van Gool}, + title = {Dynamic Filter Networks}, + booktitle = {NIPS}, + year = {2016} +} +``` + +# Running the code + +* Install [Lasagne](https://lasagne.readthedocs.io/en/latest/user/installation.html) and its prerequisites. +* Download the datasets and update the paths in the datasets/dataset_*.py files to point to them: +``` +wget http://www.cs.toronto.edu/~emansim/datasets/mnist.h5 +wget https://homes.esat.kuleuven.be/~bdebraba/dfn/movingObjects.h5 +wget https://homes.esat.kuleuven.be/~bdebraba/dfn/highwayDriving_train.h5 +wget https://homes.esat.kuleuven.be/~bdebraba/dfn/highwayDriving_test.h5 +``` + +* Run the experiments: +``` +python experiment_steerableFilter.py +python experiment_bouncingMnistOriginal.py +python experiment_highwayDriving.py +python experiment_stereoPrediction.py +``` +This will write checkpoint files to the checkpoints directory. + +* You can also run the baseline models. They have the same architecture as the DFN models, but without the DFN layer at the end: +``` +python experiment_bouncingMnistOriginal_baseline.py +python experiment_highwayDriving_baseline.py +python experiment_stereoPrediction_baseline.py +``` +Finally, you can evaluate the DFN and baseline models on the test set and generate new predictions with the notebook files: +``` +analyse_trained.ipynb +analyse_trained-baseline.ipynb +``` + +## Tensorflow implementation +While we don't provide a full implementation of the experiments in tensorflow, an example dynamic filter layer can be found in `layers/dynamic_filter_layer_tensorflow.py`. + +# Results + +When evaluating the trained models on the test sets with the ipython notebooks, you should approximately get following results: + +| Loss (per pixel) | Baseline | DFN | +| --------------------- |:--------:| ---------:| +| Moving MNIST (bce) | 0.106144 | 0.068914 | +| Highway Driving (mse) | 0.003683 | 0.003270 | +| Stereo Cars (mse) | 0.000416 | 0.000330 | + +| Loss (image, 64x64) | Baseline | DFN | +| --------------------- |:--------:| --------:| +| Moving MNIST (bce) | 434.8 | 282.3 | +| Highway Driving (mse) | 15.08 | 13.39 | +| Stereo Cars (mse) | 1.70 | 1.35 | + +| # Params | Baseline | DFN | +| --------------- |:---------:|:-------:| +| Moving MNIST | 637,443 | 637,361 | +| Highway Driving | 368,245 | 368,122 | +| Stereo Cars | 464,509 | 464,494 | diff --git a/analyse_trained-baseline.ipynb b/analyse_trained-baseline.ipynb new file mode 100644 index 0000000..3382999 --- /dev/null +++ b/analyse_trained-baseline.ipynb @@ -0,0 +1,349 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# export THEANO_FLAGS=\"device=gpu0, floatX=float32\" optimizer=None\n", + "\n", + "import theano\n", + "import theano.tensor as T\n", + "import numpy as np\n", + "import os\n", + "import socket\n", + "import argparse\n", + "import time\n", + "import datetime\n", + "import importlib\n", + "import pprint\n", + "\n", + "%matplotlib inline\n", + "# %matplotlib nbagg\n", + "import matplotlib.pyplot as plt\n", + "from IPython import display\n", + "\n", + "import lasagne\n", + "from lasagne.utils import floatX\n", + "from lasagne.updates import rmsprop, adam, momentum\n", + "from lasagne.layers import get_all_params, get_all_layers, get_all_param_values, get_output\n", + "from lasagne.objectives import squared_error, binary_crossentropy, aggregate\n", + "\n", + "from utils.helperFunctions import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# -------- setup options and data ------------------\n", + "pretrained_model_path = '/path/to/model_checkpoint.p'\n", + "\n", + "checkpoint = pickle.load(open(pretrained_model_path, 'rb'))\n", + "model_values = checkpoint['model_values'] # overwrite the values of model parameters\n", + "options = checkpoint['options']\n", + "pprint.PrettyPrinter(indent=4).pprint(options)\n", + "\n", + "# Load options\n", + "np.random.seed(options['seed'])\n", + "host = socket.gethostname() # get computer hostname\n", + "start_time = datetime.datetime.now().strftime(\"%y-%m-%d-%H-%M\")\n", + "\n", + "model = importlib.import_module(options['model_file'])\n", + "\n", + "# Optional: change some options\n", + "# options['modelOptions']['target_seqlen'] = 10\n", + "# options['datasetOptions']['num_frames'] = 15" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# ---------- build model and compile ---------------\n", + "input_batch = T.tensor4() # input image sequences\n", + "target = T.tensor4() # target image\n", + "\n", + "print('Build model...')\n", + "model = model.Model(**options['modelOptions'])\n", + "\n", + "print('Compile ...')\n", + "net, outputs = model.build_model(input_batch)\n", + "\n", + "# compute loss\n", + "outputs = get_output(outputs)\n", + "output_frames = outputs\n", + "\n", + "# # compute loss\n", + "# outputs = get_output(outputs + [filters])\n", + "# output_frames = outputs[:-1]\n", + "# output_filter = outputs[-1]\n", + "\n", + "train_losses = []\n", + "for i in range(options['modelOptions']['target_seqlen']):\n", + " output_frame = output_frames[i]\n", + "\n", + " if options['loss'] == 'squared_error':\n", + " frame_loss = squared_error(output_frame, target[:, [i], :, :])\n", + " elif options['loss'] == 'binary_crossentropy':\n", + " # Clipping to avoid NaN's in binary crossentropy: https://github.com/Lasagne/Lasagne/issues/436\n", + " output_frame = T.clip(output_frame, np.finfo(np.float32).eps, 1-np.finfo(np.float32).eps)\n", + " frame_loss = binary_crossentropy(output_frame, target[:,[i],:,:])\n", + " else:\n", + " assert False\n", + "\n", + " train_losses.append(aggregate(frame_loss))\n", + "\n", + "train_loss = sum(train_losses) / options['modelOptions']['target_seqlen']\n", + "\n", + "# update\n", + "sh_lr = theano.shared(lasagne.utils.floatX(options['learning_rate'])) # to allow dynamic learning rate\n", + "\n", + "layers = get_all_layers(net)\n", + "all_params = get_all_params(layers, trainable = True)\n", + "updates = adam(train_loss, all_params, learning_rate=sh_lr)\n", + "_test = theano.function([input_batch, target], [train_loss] + output_frames, allow_input_downcast=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# ------------ data setup ----------------\n", + "print('Prepare data...')\n", + "if options['dataset_file'] == 'datasets.bouncingMnist_original':\n", + " options['dataset_file'] = 'datasets.bouncingMnist_originalTest'\n", + "\n", + "datasetOptions = options['datasetOptions']\n", + "dataset = importlib.import_module(options['dataset_file'])\n", + "datasetOptions['mode'] = 'test'\n", + "dh = dataset.DataHandler(**datasetOptions)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [], + "source": [ + "# ------------ training setup ----------------\n", + "lasagne.layers.set_all_param_values(layers, model_values)\n", + "history_train = checkpoint['history_train']\n", + "batch_size = options['batch_size']\n", + "\n", + "# ------------ actual training ----------------\n", + "input_seqlen = options['modelOptions']['input_seqlen']\n", + "\n", + "# setup a test batch\n", + "its = dh.GetDatasetSize() // options['batch_size']\n", + "# its = 1\n", + "history_batch = []\n", + "for i in range(0, its):\n", + " ind = np.arange(i*batch_size, (i+1)*batch_size)\n", + "# import pdb; pdb.set_trace()\n", + " batch = dh.GetBatch(ind) # generate data on the fly\n", + " if options['dataset_file'] == 'datasets.stereoCarsColor':\n", + " batch_input = batch[..., :input_seqlen].squeeze(axis=4) # first frames\n", + " batch_target = batch[..., input_seqlen:].squeeze(axis=4) # last frame\n", + " else:\n", + " batch_input = batch[..., :input_seqlen].transpose(0,4,2,3,1).squeeze(axis=4) # first frames\n", + " batch_target = batch[..., input_seqlen:].transpose(0,4,2,3,1).squeeze(axis=4) # last frame\n", + " testOutputs = _test(batch_input, batch_target)\n", + " loss_test = testOutputs[0]\n", + " history_batch.append(loss_test)\n", + "# import pdb; pdb.set_trace()\n", + " predictions = np.asarray(testOutputs[1:]).transpose(1,2,3,4,0)\n", + " print(\"Batch {} of {}\".format(i+1, its))\n", + " print(\"Test loss:\\t{:.6f}\".format(np.mean(loss_test)))\n", + " \n", + " savefigs = False\n", + " if savefigs == True:\n", + " for case_id in range(batch_size):\n", + "# import pdb; pdb.set_trace()\n", + " # clear the screen\n", + " display.clear_output(wait=True)\n", + " \n", + " # visualize the prediction\n", + " visualize_prediction(batch, fut=predictions, fig=1, case_id=case_id, saveId=(i+1)*100+case_id, savefig=True)\n", + "\n", + " # visualize the flow map\n", + " visualize_flowmap(pred_filter, batch, predictions, input_seqlen, options['image_dim'], options['modelOptions']['dynamic_filter_size'][0], case_id=case_id, saveId=(i+1)*100+case_id, savefig=True)\n", + "# visualize_flowmapStereo(pred_filter, batch, predictions, input_seqlen, options['image_dim'], options['modelOptions']['dynamic_filter_size'][0], case_id=case_id, saveId=(i+1)*100+case_id, savefig=True)\n", + "\n", + " # animated gif\n", + " import matplotlib.animation as animation\n", + "\n", + " fig = plt.figure() # make figure\n", + "\n", + " # make axesimage object\n", + " # the vmin and vmax here are very important to get the color map correct\n", + " redgreen = np.zeros((3,options['image_dim'],options['image_dim'],options['datasetOptions']['num_frames']))\n", + " # import pdb; pdb.set_trace()\n", + " redgreen[0,:,:,:input_seqlen] = 1\n", + " redgreen[1,:,:,input_seqlen:] = 1\n", + "\n", + " data = batch[case_id]\n", + "\n", + " data2 = np.concatenate((np.zeros((options['batch_size'], 1, options['image_dim'], \n", + " options['image_dim'], input_seqlen)), \n", + " predictions), axis=4)\n", + " data2 = np.concatenate((batch[..., :input_seqlen], \n", + " predictions), axis=4)\n", + " data2 = data2[case_id]\n", + "\n", + " plt.subplot(1,3,1)\n", + " im0 = plt.imshow(redgreen[..., 0].transpose(1,2,0).squeeze(), cmap=plt.cm.gray, vmin=0, vmax=1, interpolation=\"nearest\")\n", + " plt.axis('off')\n", + " plt.subplot(1,3,2)\n", + " im = plt.imshow(data[..., 0].transpose(1,2,0).squeeze(), cmap=plt.cm.gray, vmin=0, vmax=1, interpolation=\"nearest\")\n", + " plt.axis('off')\n", + " plt.subplot(1,3,3)\n", + " im2 = plt.imshow(data2[..., 0].transpose(1,2,0).squeeze(), cmap=plt.cm.gray, vmin=0, vmax=1, interpolation=\"nearest\")\n", + " plt.axis('off')\n", + "\n", + " # function to update figure\n", + " def updatefig(j):\n", + " # set the data in the axesimage object \n", + " im0.set_array(redgreen[..., j].transpose(1,2,0).squeeze())\n", + " im.set_array(data[..., j].transpose(1,2,0).squeeze())\n", + " im2.set_array(data2[..., j].transpose(1,2,0).squeeze())\n", + " return im, im2\n", + "\n", + " # kick off the animation\n", + " ani = animation.FuncAnimation(fig, updatefig, frames=range(options['datasetOptions']['num_frames']), interval=200, repeat_delay=1000, blit=True)\n", + " ani.save('images/%d' % ((i+1)*100+case_id) + '.gif', writer='imagemagick')\n", + " plt.show()\n", + "# print statistics\n", + "print(\" Test loss:\\t{:.6f}\".format(np.mean(history_batch)))\n", + "print(\" Parameter count: {}\".format(lasagne.layers.count_params(net)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "scrolled": false + }, + "outputs": [], + "source": [ + "savefig = False\n", + "case_id = 0 # element of the batch to use for visualization\n", + "\n", + "# convergence plot\n", + "plt.figure()\n", + "plt.plot(range(1,len(history_train)+1), history_train, label=\"loss\")\n", + "plt.legend()\n", + "plt.show()\n", + "\n", + "# visualize the prediction\n", + "visualize_prediction(batch, fut=predictions, fig=1, case_id=case_id, savefig=savefig)\n", + " \n", + "visualize_flowmap(pred_filter, batch, predictions, input_seqlen, options['image_dim'], options['modelOptions']['dynamic_filter_size'][0], case_id, savefig=savefig)\n", + "# visualize_flowmapStereo(pred_filter, batch, predictions, input_seqlen, options['image_dim'], options['modelOptions']['dynamic_filter_size'][0], case_id, savefig=savefig)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "scrolled": false + }, + "outputs": [], + "source": [ + "import matplotlib.animation as animation\n", + "\n", + "fig = plt.figure() # make figure\n", + "case_id = 1\n", + "\n", + "# make axesimage object\n", + "# the vmin and vmax here are very important to get the color map correct\n", + "redgreen = np.zeros((3,options['image_dim'],options['image_dim'],options['datasetOptions']['num_frames']))\n", + "# import pdb; pdb.set_trace()\n", + "redgreen[0,:,:,:input_seqlen] = 1\n", + "redgreen[1,:,:,input_seqlen:] = 1\n", + "\n", + "data = batch[case_id]\n", + "\n", + "data2 = np.concatenate((np.zeros((options['batch_size'], 1, options['image_dim'], \n", + " options['image_dim'], input_seqlen)), \n", + " predictions), axis=4)\n", + "data2 = np.concatenate((batch[..., :input_seqlen], \n", + " predictions), axis=4)\n", + "data2 = data2[case_id]\n", + "\n", + "plt.subplot(1,3,1)\n", + "im0 = plt.imshow(redgreen[..., 0].transpose(1,2,0).squeeze(), cmap=plt.cm.gray, vmin=0, vmax=1, interpolation=\"nearest\")\n", + "plt.axis('off')\n", + "plt.subplot(1,3,2)\n", + "im = plt.imshow(data[..., 0].transpose(1,2,0).squeeze(), cmap=plt.cm.gray, vmin=0, vmax=1, interpolation=\"nearest\")\n", + "plt.axis('off')\n", + "plt.subplot(1,3,3)\n", + "im2 = plt.imshow(data2[..., 0].transpose(1,2,0).squeeze(), cmap=plt.cm.gray, vmin=0, vmax=1, interpolation=\"nearest\")\n", + "plt.axis('off')\n", + "\n", + "# function to update figure\n", + "def updatefig(j):\n", + " # set the data in the axesimage object \n", + " im0.set_array(redgreen[..., j].transpose(1,2,0).squeeze())\n", + " im.set_array(data[..., j].transpose(1,2,0).squeeze())\n", + " im2.set_array(data2[..., j].transpose(1,2,0).squeeze())\n", + " return im, im2\n", + "\n", + "# kick off the animation\n", + "ani = animation.FuncAnimation(fig, updatefig, frames=range(options['datasetOptions']['num_frames']), interval=100, repeat_delay=1000, blit=True)\n", + "ani.save(options['name'] + '.gif', writer='imagemagick')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/analyse_trained.ipynb b/analyse_trained.ipynb new file mode 100644 index 0000000..cc9d96e --- /dev/null +++ b/analyse_trained.ipynb @@ -0,0 +1,348 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# export THEANO_FLAGS=\"device=gpu0, floatX=float32\" optimizer=None\n", + "\n", + "import theano\n", + "import theano.tensor as T\n", + "import numpy as np\n", + "import os\n", + "import socket\n", + "import argparse\n", + "import time\n", + "import datetime\n", + "import importlib\n", + "import pprint\n", + "\n", + "%matplotlib inline\n", + "# %matplotlib nbagg\n", + "import matplotlib.pyplot as plt\n", + "from IPython import display\n", + "\n", + "import lasagne\n", + "from lasagne.utils import floatX\n", + "from lasagne.updates import rmsprop, adam, momentum\n", + "from lasagne.layers import get_all_params, get_all_layers, get_all_param_values, get_output\n", + "from lasagne.objectives import squared_error, binary_crossentropy, aggregate\n", + "\n", + "from utils.helperFunctions import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# -------- setup options and data ------------------\n", + "pretrained_model_path = '/path/to/model_checkpoint.p'\n", + "\n", + "checkpoint = pickle.load(open(pretrained_model_path, 'rb'))\n", + "model_values = checkpoint['model_values'] # overwrite the values of model parameters\n", + "options = checkpoint['options']\n", + "pprint.PrettyPrinter(indent=4).pprint(options)\n", + "\n", + "# import pdb; pdb.set_trace()\n", + "\n", + "# Load options\n", + "np.random.seed(options['seed'])\n", + "host = socket.gethostname() # get computer hostname\n", + "start_time = datetime.datetime.now().strftime(\"%y-%m-%d-%H-%M\")\n", + "\n", + "model = importlib.import_module(options['model_file'])\n", + "\n", + "\n", + "# Optional: change some options\n", + "# options['modelOptions']['target_seqlen'] = 10\n", + "# options['datasetOptions']['num_frames'] = 15" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# ---------- build model and compile ---------------\n", + "input_batch = T.tensor4() # input image sequences\n", + "target = T.tensor4() # target image\n", + "\n", + "print('Build model...')\n", + "model = model.Model(**options['modelOptions'])\n", + "\n", + "print('Compile ...')\n", + "net, outputs, filters = model.build_model(input_batch)\n", + "\n", + "# compute loss\n", + "outputs = get_output(outputs + [filters])\n", + "output_frames = outputs[:-1]\n", + "output_filter = outputs[-1]\n", + "\n", + "train_losses = []\n", + "for i in range(options['modelOptions']['target_seqlen']):\n", + " output_frame = output_frames[i]\n", + "\n", + " if options['loss'] == 'squared_error':\n", + " frame_loss = squared_error(output_frame, target[:, [i], :, :])\n", + " elif options['loss'] == 'binary_crossentropy':\n", + " # Clipping to avoid NaN's in binary crossentropy: https://github.com/Lasagne/Lasagne/issues/436\n", + " output_frame = T.clip(output_frame, np.finfo(np.float32).eps, 1-np.finfo(np.float32).eps)\n", + " frame_loss = binary_crossentropy(output_frame, target[:,[i],:,:])\n", + " else:\n", + " assert False\n", + "\n", + " train_losses.append(aggregate(frame_loss))\n", + "\n", + "train_loss = sum(train_losses) / options['modelOptions']['target_seqlen']\n", + "\n", + "# update\n", + "sh_lr = theano.shared(lasagne.utils.floatX(options['learning_rate'])) # to allow dynamic learning rate\n", + "\n", + "layers = get_all_layers(net)\n", + "all_params = get_all_params(layers, trainable = True)\n", + "updates = adam(train_loss, all_params, learning_rate=sh_lr)\n", + "_test = theano.function([input_batch, target], [train_loss, output_filter] + output_frames, allow_input_downcast=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# ------------ data setup ----------------\n", + "print('Prepare data...')\n", + "if options['dataset_file'] == 'datasets.bouncingMnist_original':\n", + " options['dataset_file'] = 'datasets.bouncingMnist_originalTest'\n", + "\n", + "datasetOptions = options['datasetOptions']\n", + "dataset = importlib.import_module(options['dataset_file'])\n", + "datasetOptions['mode'] = 'test'\n", + "dh = dataset.DataHandler(**datasetOptions)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [], + "source": [ + "# ------------ training setup ----------------\n", + "lasagne.layers.set_all_param_values(layers, model_values)\n", + "history_train = checkpoint['history_train']\n", + "batch_size = options['batch_size']\n", + "\n", + "# ------------ actual training ----------------\n", + "input_seqlen = options['modelOptions']['input_seqlen']\n", + "\n", + "# setup a test batch\n", + "its = dh.GetDatasetSize() // options['batch_size']\n", + "# its = 1\n", + "history_batch = []\n", + "for i in range(0, its):\n", + " ind = np.arange(i*batch_size, (i+1)*batch_size)\n", + "# import pdb; pdb.set_trace()\n", + " batch = dh.GetBatch(ind) # generate data on the fly\n", + " if options['dataset_file'] == 'datasets.stereoCarsColor':\n", + " batch_input = batch[..., :input_seqlen].squeeze(axis=4) # first frames\n", + " batch_target = batch[..., input_seqlen:].squeeze(axis=4) # last frame\n", + " else:\n", + " batch_input = batch[..., :input_seqlen].transpose(0,4,2,3,1).squeeze(axis=4) # first frames\n", + " batch_target = batch[..., input_seqlen:].transpose(0,4,2,3,1).squeeze(axis=4) # last frame\n", + " testOutputs = _test(batch_input, batch_target)\n", + " loss_test = testOutputs[0]\n", + " history_batch.append(loss_test)\n", + " pred_filter = testOutputs[1]\n", + " predictions = np.asarray(testOutputs[2:]).transpose(1,2,3,4,0)\n", + " print(\"Batch {} of {}\".format(i+1, its))\n", + " print(\"Test loss:\\t{:.6f}\".format(np.mean(loss_test)))\n", + " \n", + " savefigs = False\n", + " if savefigs == True:\n", + " for case_id in range(batch_size):\n", + "# import pdb; pdb.set_trace()\n", + " # clear the screen\n", + " display.clear_output(wait=True)\n", + " \n", + " # visualize the prediction\n", + " visualize_prediction(batch, fut=predictions, fig=1, case_id=case_id, saveId=(i+1)*100+case_id, savefig=True)\n", + "\n", + " # visualize the flow map\n", + " visualize_flowmap(pred_filter, batch, predictions, input_seqlen, options['image_dim'], options['modelOptions']['dynamic_filter_size'][0], case_id=case_id, saveId=(i+1)*100+case_id, savefig=True)\n", + "# visualize_flowmapStereo(pred_filter, batch, predictions, input_seqlen, options['image_dim'], options['modelOptions']['dynamic_filter_size'][0], case_id=case_id, saveId=(i+1)*100+case_id, savefig=True)\n", + "\n", + " # animated gif\n", + " import matplotlib.animation as animation\n", + "\n", + " fig = plt.figure() # make figure\n", + "\n", + " # make axesimage object\n", + " # the vmin and vmax here are very important to get the color map correct\n", + " redgreen = np.zeros((3,options['image_dim'],options['image_dim'],options['datasetOptions']['num_frames']))\n", + " # import pdb; pdb.set_trace()\n", + " redgreen[0,:,:,:input_seqlen] = 1\n", + " redgreen[1,:,:,input_seqlen:] = 1\n", + "\n", + " data = batch[case_id]\n", + "\n", + " data2 = np.concatenate((np.zeros((options['batch_size'], 1, options['image_dim'], \n", + " options['image_dim'], input_seqlen)), \n", + " predictions), axis=4)\n", + " data2 = np.concatenate((batch[..., :input_seqlen], \n", + " predictions), axis=4)\n", + " data2 = data2[case_id]\n", + "\n", + " plt.subplot(1,3,1)\n", + " im0 = plt.imshow(redgreen[..., 0].transpose(1,2,0).squeeze(), cmap=plt.cm.gray, vmin=0, vmax=1, interpolation=\"nearest\")\n", + " plt.axis('off')\n", + " plt.subplot(1,3,2)\n", + " im = plt.imshow(data[..., 0].transpose(1,2,0).squeeze(), cmap=plt.cm.gray, vmin=0, vmax=1, interpolation=\"nearest\")\n", + " plt.axis('off')\n", + " plt.subplot(1,3,3)\n", + " im2 = plt.imshow(data2[..., 0].transpose(1,2,0).squeeze(), cmap=plt.cm.gray, vmin=0, vmax=1, interpolation=\"nearest\")\n", + " plt.axis('off')\n", + "\n", + " # function to update figure\n", + " def updatefig(j):\n", + " # set the data in the axesimage object \n", + " im0.set_array(redgreen[..., j].transpose(1,2,0).squeeze())\n", + " im.set_array(data[..., j].transpose(1,2,0).squeeze())\n", + " im2.set_array(data2[..., j].transpose(1,2,0).squeeze())\n", + " return im, im2\n", + "\n", + " # kick off the animation\n", + " ani = animation.FuncAnimation(fig, updatefig, frames=range(options['datasetOptions']['num_frames']), interval=200, repeat_delay=1000, blit=True)\n", + " ani.save('images/%d' % ((i+1)*100+case_id) + '.gif', writer='imagemagick')\n", + " plt.show()\n", + "# print statistics\n", + "print(\" Test loss:\\t{:.6f}\".format(np.mean(history_batch)))\n", + "print(\" Parameter count: {}\".format(lasagne.layers.count_params(net)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "scrolled": false + }, + "outputs": [], + "source": [ + "savefig = False\n", + "case_id = 0 # element of the batch to use for visualization\n", + "\n", + "# convergence plot\n", + "plt.figure()\n", + "plt.plot(range(1,len(history_train)+1), history_train, label=\"loss\")\n", + "plt.legend()\n", + "plt.show()\n", + "\n", + "# visualize the prediction\n", + "visualize_prediction(batch, fut=predictions, fig=1, case_id=case_id, savefig=savefig)\n", + " \n", + "visualize_flowmap(pred_filter, batch, predictions, input_seqlen, options['image_dim'], options['modelOptions']['dynamic_filter_size'][0], case_id, savefig=savefig)\n", + "# visualize_flowmapStereo(pred_filter, batch, predictions, input_seqlen, options['image_dim'], options['modelOptions']['dynamic_filter_size'][0], case_id, savefig=savefig)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "scrolled": false + }, + "outputs": [], + "source": [ + "import matplotlib.animation as animation\n", + "\n", + "fig = plt.figure() # make figure\n", + "case_id = 1\n", + "\n", + "# make axesimage object\n", + "# the vmin and vmax here are very important to get the color map correct\n", + "redgreen = np.zeros((3,options['image_dim'],options['image_dim'],options['datasetOptions']['num_frames']))\n", + "# import pdb; pdb.set_trace()\n", + "redgreen[0,:,:,:input_seqlen] = 1\n", + "redgreen[1,:,:,input_seqlen:] = 1\n", + "\n", + "data = batch[case_id]\n", + "\n", + "data2 = np.concatenate((np.zeros((options['batch_size'], 1, options['image_dim'], \n", + " options['image_dim'], input_seqlen)), \n", + " predictions), axis=4)\n", + "data2 = np.concatenate((batch[..., :input_seqlen], \n", + " predictions), axis=4)\n", + "data2 = data2[case_id]\n", + "\n", + "plt.subplot(1,3,1)\n", + "im0 = plt.imshow(redgreen[..., 0].transpose(1,2,0).squeeze(), cmap=plt.cm.gray, vmin=0, vmax=1, interpolation=\"nearest\")\n", + "plt.axis('off')\n", + "plt.subplot(1,3,2)\n", + "im = plt.imshow(data[..., 0].transpose(1,2,0).squeeze(), cmap=plt.cm.gray, vmin=0, vmax=1, interpolation=\"nearest\")\n", + "plt.axis('off')\n", + "plt.subplot(1,3,3)\n", + "im2 = plt.imshow(data2[..., 0].transpose(1,2,0).squeeze(), cmap=plt.cm.gray, vmin=0, vmax=1, interpolation=\"nearest\")\n", + "plt.axis('off')\n", + "\n", + "# function to update figure\n", + "def updatefig(j):\n", + " # set the data in the axesimage object \n", + " im0.set_array(redgreen[..., j].transpose(1,2,0).squeeze())\n", + " im.set_array(data[..., j].transpose(1,2,0).squeeze())\n", + " im2.set_array(data2[..., j].transpose(1,2,0).squeeze())\n", + " return im, im2\n", + "\n", + "# kick off the animation\n", + "ani = animation.FuncAnimation(fig, updatefig, frames=range(options['datasetOptions']['num_frames']), interval=100, repeat_delay=1000, blit=True)\n", + "ani.save(options['name'] + '.gif', writer='imagemagick')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/datasets/__init__.py b/datasets/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/datasets/datasetTests.ipynb b/datasets/datasetTests.ipynb new file mode 100644 index 0000000..4316d8b --- /dev/null +++ b/datasets/datasetTests.ipynb @@ -0,0 +1,110 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Ensure python 3 forward compatibility\n", + "from __future__ import print_function\n", + "\n", + "import os\n", + "from os import sys, path\n", + "sys.path.append(os.path.abspath(os.path.join(os.path.dirname('__file__'), '..')))\n", + "\n", + "from utils.helperFunctions import visualize_prediction\n", + "\n", + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "from IPython import display" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import dataset_steerableFilter as dataset\n", + "\n", + "dh = dataset.DataHandler()\n", + "# data = dh.GetBatch()\n", + "import numpy as np\n", + "data = dh.GetBatch(thetas=np.ones(32)*np.pi/4, sigma=1, filter_size=9)\n", + "# import pdb; pdb.set_trace()\n", + "visualize_prediction(data, fut=None, fig=1, case_id=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import dataset_bouncingMnistOriginal as dataset\n", + "\n", + "dh = dataset.DataHandler(batch_size = 20, num_frames = 10, num_digits = 2)\n", + "data = dh.GetBatch()\n", + "visualize_prediction(data, fut=None, fig=1, case_id=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import dataset_highwayDriving as dataset\n", + "\n", + "dh = dataset.DataHandler(batch_size = 20, num_frames = 10)\n", + "data = dh.GetBatch()\n", + "visualize_prediction(data, fut=None, fig=1, case_id=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import dataset_stereoPrediction as dataset\n", + "\n", + "dh = dataset.DataHandler(batch_size = 20)\n", + "data = dh.GetBatch()\n", + "visualize_prediction(data, fut=None, fig=1, case_id=0)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/datasets/dataset_bouncingMnistOriginal.py b/datasets/dataset_bouncingMnistOriginal.py new file mode 100644 index 0000000..e6c2f07 --- /dev/null +++ b/datasets/dataset_bouncingMnistOriginal.py @@ -0,0 +1,189 @@ +# adapted from https://github.com/emansim/unsupervised-videos + +from __future__ import division + +import h5py +import numpy as np +import matplotlib +import matplotlib.pyplot as plt + +class DataHandler(object): + """Data Handler that creates Bouncing MNIST dataset on the fly.""" + def __init__(self, mnistDataset='datasets/mnist.h5', mode='standard', background='zeros', num_frames=20, batch_size=2, image_size=64, num_digits=2, step_length=0.1): + self.mode_ = mode + self.background_ = background + self.seq_length_ = num_frames + self.batch_size_ = batch_size + self.image_size_ = image_size + self.num_digits_ = num_digits + self.step_length_ = step_length + self.dataset_size_ = 10000 # The dataset is really infinite. This is just for validation. + self.digit_size_ = 28 + self.frame_size_ = self.image_size_ ** 2 + self.num_channels_ = 1 + + try: + f = h5py.File(mnistDataset) + except: + print 'Please set the correct path to MNIST dataset' + sys.exit() + + self.data_ = f['train'].value.reshape(-1, 28, 28) + f.close() + # if self.binarize_: + # self.data_ = np.round(self.data_) + self.indices_ = np.arange(self.data_.shape[0]) + self.row_ = 0 + np.random.shuffle(self.indices_) + + def GetBatchSize(self): + return self.batch_size_ + + def GetDims(self): + return self.frame_size_ + + def GetDatasetSize(self): + return self.dataset_size_ + + def GetSeqLength(self): + return self.seq_length_ + + def Reset(self): + pass + + def GetRandomTrajectory(self, batch_size): + length = self.seq_length_ + canvas_size = self.image_size_ - self.digit_size_ + + # Initial position uniform random inside the box. + y = np.random.rand(batch_size) + x = np.random.rand(batch_size) + + # Choose a random velocity. + theta = np.random.rand(batch_size) * 2 * np.pi + v_y = np.sin(theta) + v_x = np.cos(theta) + + start_y = np.zeros((length, batch_size)) + start_x = np.zeros((length, batch_size)) + for i in xrange(length): + # Take a step along velocity. + y += v_y * self.step_length_ + x += v_x * self.step_length_ + + # Bounce off edges. + for j in xrange(batch_size): + if x[j] <= 0: + x[j] = 0 + v_x[j] = -v_x[j] + if x[j] >= 1.0: + x[j] = 1.0 + v_x[j] = -v_x[j] + if y[j] <= 0: + y[j] = 0 + v_y[j] = -v_y[j] + if y[j] >= 1.0: + y[j] = 1.0 + v_y[j] = -v_y[j] + start_y[i, :] = y + start_x[i, :] = x + + # Scale to the size of the canvas. + start_y = (canvas_size * start_y).astype(np.int32) + start_x = (canvas_size * start_x).astype(np.int32) + return start_y, start_x + + def Overlap(self, a, b): + """ Put b on top of a.""" + return np.maximum(a, b) + #return b + + def GetBatch(self, verbose=False): + start_y, start_x = self.GetRandomTrajectory(self.batch_size_ * self.num_digits_) + + # minibatch data + if self.background_ == 'zeros': + data = np.zeros((self.batch_size_, self.num_channels_, self.image_size_, self.image_size_, self.seq_length_), dtype=np.float32) + elif self.background_ == 'rand': + data = np.random.rand(self.batch_size_, self.num_channels_, self.image_size_, self.image_size_, self.seq_length_) + + for j in xrange(self.batch_size_): + for n in xrange(self.num_digits_): + + # get random digit from dataset + ind = self.indices_[self.row_] + self.row_ += 1 + if self.row_ == self.data_.shape[0]: + self.row_ = 0 + np.random.shuffle(self.indices_) + digit_image = self.data_[ind, :, :] + digit_size = self.digit_size_ + + if self.mode_ == 'squares': + digit_size = np.random.randint(5,20) + digit_image = np.ones((digit_size, digit_size), dtype=np.float32) + + # import pdb; pdb.set_trace() + + # generate video + for i in xrange(self.seq_length_): + top = start_y[i, j * self.num_digits_ + n] + left = start_x[i, j * self.num_digits_ + n] + bottom = top + digit_size + right = left + digit_size + data[j, :, top:bottom, left:right, i] = self.Overlap(data[j, :, top:bottom, left:right, i], digit_image) + + return data + + def DisplayData(self, data, rec=None, fut=None, fig=1, case_id=0, output_file=None): + output_file1 = None + output_file2 = None + + if output_file is not None: + name, ext = os.path.splitext(output_file) + output_file1 = '%s_original%s' % (name, ext) + output_file2 = '%s_recon%s' % (name, ext) + + # get data + data = data[case_id, :].reshape(-1, self.image_size_, self.image_size_) + # get reconstruction and future sequences if exist + if rec is not None: + rec = rec[case_id, :].reshape(-1, self.image_size_, self.image_size_) + enc_seq_length = rec.shape[0] + if fut is not None: + fut = fut[case_id, :].reshape(-1, self.image_size_, self.image_size_) + if rec is None: + enc_seq_length = self.seq_length_ - fut.shape[0] + else: + assert enc_seq_length == self.seq_length_ - fut.shape[0] + + num_rows = 1 + # create figure for original sequence + plt.figure(2*fig, figsize=(20, 1)) + plt.clf() + for i in xrange(self.seq_length_): + plt.subplot(num_rows, self.seq_length_, i+1) + plt.imshow(data[i, :, :], cmap=plt.cm.gray, interpolation="nearest") + plt.axis('off') + plt.show() + if output_file1 is not None: + print output_file1 + plt.savefig(output_file1, bbox_inches='tight') + + # create figure for reconstuction and future sequences + plt.figure(2*fig+1, figsize=(20, 1)) + plt.clf() + for i in xrange(self.seq_length_): + if rec is not None and i < enc_seq_length: + plt.subplot(num_rows, self.seq_length_, i + 1) + plt.imshow(rec[rec.shape[0] - i - 1, :, :], cmap=plt.cm.gray, interpolation="nearest") + if fut is not None and i >= enc_seq_length: + plt.subplot(num_rows, self.seq_length_, i + 1) + plt.imshow(fut[i - enc_seq_length, :, :], cmap=plt.cm.gray, interpolation="nearest") + plt.axis('off') + plt.show() + if output_file2 is not None: + print output_file2 + plt.savefig(output_file2, bbox_inches='tight') + else: + plt.pause(0.1) diff --git a/datasets/dataset_highwayDriving.py b/datasets/dataset_highwayDriving.py new file mode 100644 index 0000000..b8021c5 --- /dev/null +++ b/datasets/dataset_highwayDriving.py @@ -0,0 +1,123 @@ +# adapted from https://github.com/emansim/unsupervised-videos +# DataHandler for different types of datasets + +from __future__ import division + +import h5py +import numpy as np +import matplotlib +import matplotlib.pyplot as plt + +class DataHandler(object): + """Data Handler that creates Bouncing MNIST dataset on the fly.""" + def __init__(self, image_size=64, binarize=False, num_frames=20, batch_size=2, mode='train'): + if mode == 'train': + dataset = 'datasets/highwayDriving_train.h5' + elif mode == 'test': + dataset = 'datasets/highwayDriving_test.h5' + + self.image_size_ = image_size + self.binarize_ = binarize + self.seq_length_ = num_frames + self.batch_size_ = batch_size + + try: + f = h5py.File(dataset) + except: + print 'Please set the correct path to MNIST dataset' + sys.exit() + + # self.data_ = f['train'].value.reshape(-1, 28, 28) + # import pdb; pdb.set_trace() + self.data_ = f['highway_L'].value.transpose(0,1,3,2) + self.dataset_size_ = self.data_.shape[0] + self.num_channels_ = self.data_.shape[1] + self.image_size_ = self.data_.shape[2] + self.frame_size_ = self.image_size_ ** 2 + + def GetBatchSize(self): + return self.batch_size_ + + def GetDims(self): + return self.frame_size_ + + def GetDatasetSize(self): + return self.dataset_size_ - self.seq_length_ + + def GetSeqLength(self): + return self.seq_length_ + + def Reset(self): + pass + + def GetBatch(self, ind=None): + if ind is None: + batch_size = self.batch_size_ + ind = np.random.choice(self.dataset_size_ - self.seq_length_, batch_size) + else: + batch_size = len(ind) + + # import pdb; pdb.set_trace() + + # minibatch data + data = np.ones((batch_size, self.num_channels_, self.image_size_, self.image_size_, self.seq_length_), + dtype=np.float32) + + for j in xrange(batch_size): + # import pdb; pdb.set_trace() + data[j, :, :, :, :] = self.data_[ind[j]:ind[j]+self.seq_length_, :, :, :].transpose(1,2,3,0) + + return data + + def DisplayData(self, data, rec=None, fut=None, fig=1, case_id=0, output_file=None): + output_file1 = None + output_file2 = None + + if output_file is not None: + name, ext = os.path.splitext(output_file) + output_file1 = '%s_original%s' % (name, ext) + output_file2 = '%s_recon%s' % (name, ext) + + # get data + data = data[case_id, :].reshape(-1, self.image_size_, self.image_size_, self.num_channels_) + # get reconstruction and future sequences if exist + if rec is not None: + rec = rec[case_id, :].reshape(-1, self.image_size_, self.image_size_, self.num_channels_) + enc_seq_length = rec.shape[0] + if fut is not None: + fut = fut[case_id, :].reshape(-1, self.image_size_, self.image_size_, self.num_channels_) + if rec is None: + enc_seq_length = self.seq_length_ - fut.shape[0] + else: + assert enc_seq_length == self.seq_length_ - fut.shape[0] + + num_rows = 1 + # create figure for original sequence + plt.figure(2*fig, figsize=(10, 2)) + plt.clf() + for i in xrange(self.seq_length_): + plt.subplot(num_rows, self.seq_length_, i+1) + plt.imshow(data[i, :, :, :].squeeze(), cmap=plt.cm.gray, interpolation="nearest") + plt.axis('off') + plt.show() + if output_file1 is not None: + print output_file1 + plt.savefig(output_file1, bbox_inches='tight') + + # create figure for reconstuction and future sequences + plt.figure(2*fig+1, figsize=(10, 2)) + plt.clf() + for i in xrange(self.seq_length_): + if rec is not None and i < enc_seq_length: + plt.subplot(num_rows, self.seq_length_, i + 1) + plt.imshow(rec[rec.shape[0] - i - 1, :, :, :].squeeze(), cmap=plt.cm.gray, interpolation="nearest") + if fut is not None and i >= enc_seq_length: + plt.subplot(num_rows, self.seq_length_, i + 1) + plt.imshow(fut[i - enc_seq_length, :, :, :].squeeze(), cmap=plt.cm.gray, interpolation="nearest") + plt.axis('off') + plt.show() + if output_file2 is not None: + print output_file2 + plt.savefig(output_file2, bbox_inches='tight') + else: + plt.pause(0.1) diff --git a/datasets/dataset_steerableFilter.py b/datasets/dataset_steerableFilter.py new file mode 100644 index 0000000..8ed25c2 --- /dev/null +++ b/datasets/dataset_steerableFilter.py @@ -0,0 +1,92 @@ +# adapted from https://github.com/emansim/unsupervised-videos +# DataHandler for different types of datasets + +from __future__ import division + +import h5py +import numpy as np +import matplotlib +import matplotlib.pyplot as plt + +from scipy.signal import convolve2d + +class DataHandler(object): + """Data Handler that creates Bouncing MNIST dataset on the fly.""" + def __init__(self, dataset='datasets/movingObjects.h5', mode='standard', image_size=64, num_frames=3, batch_size=32): + self.image_size_ = image_size + self.seq_length_ = num_frames + self.num_channels_ = 1 + self.mode_ = mode + self.batch_size_ = batch_size + + try: + f = h5py.File(dataset) + except: + print 'Please set the correct path to MNIST dataset' + sys.exit() + + self.backgroundData_ = f['backgrounds'].value.transpose(0,1,3,2) + self.num_backgrounds_ = self.backgroundData_.shape[0] + f.close() + + def GetBatchSize(self): + return self.batch_size_ + + def GetDims(self): + return self.frame_size_ + + def GetDatasetSize(self): + return self.dataset_size_ + + def GetSeqLength(self): + return self.seq_length_ + + def Reset(self): + pass + + def GetBatch(self, thetas=None, filter_size=9, sigma=1): + + + # import pdb; pdb.set_trace() + # images = np.random.rand(self.batch_size_, self.num_channels_, self.image_size_, self.image_size_, 1) + if self.mode_ == 'standard': + ind = np.random.choice(self.num_backgrounds_, self.batch_size_) + images = self.backgroundData_[ind, ..., np.newaxis] + elif self.mode_ == 'random': + images = np.random.rand(self.batch_size_, self.num_channels_, self.image_size_, self.image_size_, 1) + else: + assert false + + if thetas is None: + thetas = np.random.rand(self.batch_size_) * 2 * np.pi + thetasChannel = np.ones((self.batch_size_, 1, self.image_size_, self.image_size_, 1)) * thetas[..., np.newaxis, np.newaxis, np.newaxis, np.newaxis] + # import pdb; pdb.set_trace() + + filteredImages = np.zeros((self.batch_size_, self.num_channels_, self.image_size_, self.image_size_, 1)) + for i in range(self.batch_size_): + image = images[i].squeeze() + theta = thetas[i] + filteredImage = self.FilterWithTheta(image, theta, sigma, filter_size) + # import pdb; pdb.set_trace() + filteredImages[i, :, :, :, :] = filteredImage[None, None, :, :, None] + + + output = np.concatenate((images, thetasChannel, filteredImages), axis=4) + return output + + def FilterWithTheta(self, image, theta, sigma, filter_size): + # https://www.mathworks.com/matlabcentral/fileexchange/9645-steerable-gaussian-filters/content/steerGauss.m + # Evaluate 1D Gaussian filter( and its derivative). + x = np.arange(-filter_size//2+1, filter_size//2+1) + # import pdb; pdb.set_trace() + g = np.array([np.exp(-(x**2) / (2*sigma**2))]) + gp = np.array([-(x / sigma) * np.exp(-(x**2) / (2*sigma**2))]) + + Ix = convolve2d(image, -gp, mode='same', boundary='fill', fillvalue=0) + Ix = convolve2d(Ix, g.T, mode='same', boundary='fill', fillvalue=0) + + Iy = convolve2d(image, g, mode='same', boundary='fill', fillvalue=0) + Iy = convolve2d(Iy, -gp.T, mode='same', boundary='fill', fillvalue=0) + + output = np.cos(theta) * Ix + np.sin(theta) * Iy + return output \ No newline at end of file diff --git a/datasets/dataset_stereoPrediction.py b/datasets/dataset_stereoPrediction.py new file mode 100644 index 0000000..54b5dc2 --- /dev/null +++ b/datasets/dataset_stereoPrediction.py @@ -0,0 +1,125 @@ +# adapted from https://github.com/emansim/unsupervised-videos +# DataHandler for different types of datasets + +from __future__ import division + +import h5py +import numpy as np +import matplotlib +import matplotlib.pyplot as plt + +class DataHandler(object): + """Data Handler that creates Bouncing MNIST dataset on the fly.""" + def __init__(self, image_size=64, binarize=False, num_frames=2, batch_size=2, mode='train'): + if mode == 'train': + dataset = 'datasets/highwayDriving_train.h5' + elif mode == 'test': + dataset = 'datasets/highwayDriving_test.h5' + + self.image_size_ = image_size + self.binarize_ = binarize + self.seq_length_ = num_frames + self.batch_size_ = batch_size + + assert num_frames == 2 + + try: + f = h5py.File(dataset) + except: + print('Please set the correct path to MNIST dataset') + sys.exit() + + # self.data_ = f['train'].value.reshape(-1, 28, 28) + # import pdb; pdb.set_trace() + self.data_L_ = f['highway_L'].value.transpose(0,1,3,2) + self.data_R_ = f['highway_R'].value.transpose(0,1,3,2) + self.dataset_size_ = self.data_L_.shape[0] + self.num_channels_ = self.data_L_.shape[1] + self.image_size_ = self.data_L_.shape[2] + self.frame_size_ = self.image_size_ ** 2 + + + def GetBatchSize(self): + return self.batch_size_ + + def GetDims(self): + return self.frame_size_ + + def GetDatasetSize(self): + return self.dataset_size_ + + def GetSeqLength(self): + return self.seq_length_ + + def Reset(self): + pass + + def GetBatch(self, ind=None): + if ind is None: + batch_size = self.batch_size_ + ind = np.random.choice(self.dataset_size_, batch_size) + else: + batch_size = len(ind) + + # minibatch data + data = np.ones((batch_size, self.num_channels_, self.image_size_, self.image_size_, self.seq_length_), + dtype=np.float32) + + for j in xrange(batch_size): + data[j, :, :, :, 0] = self.data_L_[ind[j], :, :, :] + data[j, :, :, :, 1] = self.data_R_[ind[j], :, :, :] + + return data + + def DisplayData(self, data, rec=None, fut=None, fig=1, case_id=0, output_file=None): + output_file1 = None + output_file2 = None + + if output_file is not None: + name, ext = os.path.splitext(output_file) + output_file1 = '%s_original%s' % (name, ext) + output_file2 = '%s_recon%s' % (name, ext) + + # get data + data = data[case_id, :].reshape(-1, self.image_size_, self.image_size_, self.num_channels_) + # get reconstruction and future sequences if exist + if rec is not None: + rec = rec[case_id, :].reshape(-1, self.image_size_, self.image_size_, self.num_channels_) + enc_seq_length = rec.shape[0] + if fut is not None: + fut = fut[case_id, :].reshape(-1, self.image_size_, self.image_size_, self.num_channels_) + if rec is None: + enc_seq_length = self.seq_length_ - fut.shape[0] + else: + assert enc_seq_length == self.seq_length_ - fut.shape[0] + + num_rows = 1 + # create figure for original sequence + plt.figure(2*fig, figsize=(10, 2)) + plt.clf() + for i in xrange(self.seq_length_): + plt.subplot(num_rows, self.seq_length_, i+1) + plt.imshow(data[i, :, :, :].squeeze(), cmap=plt.cm.gray, interpolation="nearest") + plt.axis('off') + plt.show() + if output_file1 is not None: + print output_file1 + plt.savefig(output_file1, bbox_inches='tight') + + # create figure for reconstuction and future sequences + plt.figure(2*fig+1, figsize=(10, 2)) + plt.clf() + for i in xrange(self.seq_length_): + if rec is not None and i < enc_seq_length: + plt.subplot(num_rows, self.seq_length_, i + 1) + plt.imshow(rec[rec.shape[0] - i - 1, :, :, :].squeeze(), cmap=plt.cm.gray, interpolation="nearest") + if fut is not None and i >= enc_seq_length: + plt.subplot(num_rows, self.seq_length_, i + 1) + plt.imshow(fut[i - enc_seq_length, :, :, :].squeeze(), cmap=plt.cm.gray, interpolation="nearest") + plt.axis('off') + plt.show() + if output_file2 is not None: + print output_file2 + plt.savefig(output_file2, bbox_inches='tight') + else: + plt.pause(0.1) diff --git a/experiment_bouncingMnistOriginal.py b/experiment_bouncingMnistOriginal.py new file mode 100644 index 0000000..953b393 --- /dev/null +++ b/experiment_bouncingMnistOriginal.py @@ -0,0 +1,47 @@ +import matplotlib +matplotlib.use('Agg') # Must be before importing matplotlib.pyplot or pylab! + +import train + +options = { + # global setup settings, and checkpoints + 'name': 'bouncingMnistOriginal', + 'seed': 123, + 'checkpoint_output_directory': 'checkpoints', + + # model and dataset + 'dataset_file': 'datasets.dataset_bouncingMnistOriginal', + 'model_file': 'models.model_bouncingMnistOriginal', + 'pretrained_model_path': None, + + # training parameters + 'image_dim': 64, + 'batch_size': 16, + 'loss': 'binary_crossentropy', + 'learning_rate': 1e-3, + 'decay_after': 20, + 'num_epochs': 100, + 'batches_per_epoch': 2 * 100, + 'save_after': 10 +} + +modelOptions = { + 'batch_size': options['batch_size'], + 'npx': options['image_dim'], + 'input_seqlen': 10, + 'target_seqlen': 10, + 'buffer_len': 1, + 'dynamic_filter_size': (9, 9) +} +options['modelOptions'] = modelOptions + +datasetOptions = { + 'batch_size': options['batch_size'], + 'image_size': options['image_dim'], + 'num_frames': modelOptions['input_seqlen'] + modelOptions['target_seqlen'], + 'num_digits': 2, + 'background': 'zeros' +} +options['datasetOptions'] = datasetOptions + +train.train(options) \ No newline at end of file diff --git a/experiment_bouncingMnistOriginal_baseline.py b/experiment_bouncingMnistOriginal_baseline.py new file mode 100644 index 0000000..3ae2b64 --- /dev/null +++ b/experiment_bouncingMnistOriginal_baseline.py @@ -0,0 +1,47 @@ +import matplotlib +matplotlib.use('Agg') # Must be before importing matplotlib.pyplot or pylab! + +import train_baseline as train + +options = { + # global setup settings, and checkpoints + 'name': 'bouncingMnistOriginal_baseline', + 'seed': 123, + 'checkpoint_output_directory': 'checkpoints', + + # model and dataset + 'dataset_file': 'datasets.dataset_bouncingMnistOriginal', + 'model_file': 'models.model_bouncingMnistOriginal_baseline', + 'pretrained_model_path': None, + + # training parameters + 'image_dim': 64, + 'batch_size': 16, + 'loss': 'binary_crossentropy', + 'learning_rate': 1e-4, + 'decay_after': 20, + 'num_epochs': 100, + 'batches_per_epoch': 2 * 100, + 'save_after': 10 +} + +modelOptions = { + 'batch_size': options['batch_size'], + 'npx': options['image_dim'], + 'input_seqlen': 10, + 'target_seqlen': 10, + 'buffer_len': 1, + 'dynamic_filter_size': (9, 9) +} +options['modelOptions'] = modelOptions + +datasetOptions = { + 'batch_size': options['batch_size'], + 'image_size': options['image_dim'], + 'num_frames': modelOptions['input_seqlen'] + modelOptions['target_seqlen'], + 'num_digits': 2, + 'background': 'zeros' +} +options['datasetOptions'] = datasetOptions + +train.train(options) \ No newline at end of file diff --git a/experiment_highwayDriving.py b/experiment_highwayDriving.py new file mode 100644 index 0000000..614884a --- /dev/null +++ b/experiment_highwayDriving.py @@ -0,0 +1,47 @@ +import matplotlib +matplotlib.use('Agg') # Must be before importing matplotlib.pyplot or pylab! + +import train + +options = { + # global setup settings, and checkpoints + 'name': 'highwayDriving', + 'seed': 123, + 'checkpoint_output_directory': 'checkpoints', + + # model and dataset + 'dataset_file': 'datasets.dataset_highwayDriving', + 'model_file': 'models.model_highwayDriving', + 'pretrained_model_path': None, + + # training parameters + 'image_dim': 64, + 'batch_size': 16, + 'loss': 'squared_error', + 'learning_rate': 1e-3, + 'decay_after': 20, + 'num_epochs': 100, + 'batches_per_epoch': 2 * 100, + 'save_after': 10 +} + +modelOptions = { + 'batch_size': options['batch_size'], + 'npx': options['image_dim'], + 'input_seqlen': 3, + 'target_seqlen': 3, + 'buffer_len': 2, + 'dynamic_filter_size': (11, 11), + 'refinement_network': False, + 'dynamic_bias': True +} +options['modelOptions'] = modelOptions + +datasetOptions = { + 'batch_size': options['batch_size'], + 'image_size': options['image_dim'], + 'num_frames': modelOptions['input_seqlen'] + modelOptions['target_seqlen'] +} +options['datasetOptions'] = datasetOptions + +train.train(options) diff --git a/experiment_highwayDriving_baseline.py b/experiment_highwayDriving_baseline.py new file mode 100644 index 0000000..2907e84 --- /dev/null +++ b/experiment_highwayDriving_baseline.py @@ -0,0 +1,47 @@ +import matplotlib +matplotlib.use('Agg') # Must be before importing matplotlib.pyplot or pylab! + +import train_baseline as train + +options = { + # global setup settings, and checkpoints + 'name': 'highwayDriving_baseline', + 'seed': 123, + 'checkpoint_output_directory': 'checkpoints', + + # model and dataset + 'dataset_file': 'datasets.dataset_highwayDriving', + 'model_file': 'models.model_highwayDriving_baseline', + 'pretrained_model_path': None, + + # training parameters + 'image_dim': 64, + 'batch_size': 16, + 'loss': 'squared_error', + 'learning_rate': 1e-3, + 'decay_after': 20, + 'num_epochs': 100, + 'batches_per_epoch': 2 * 100, + 'save_after': 10 +} + +modelOptions = { + 'batch_size': options['batch_size'], + 'npx': options['image_dim'], + 'input_seqlen': 3, + 'target_seqlen': 3, + 'buffer_len': 2, + 'dynamic_filter_size': (11, 11), + 'refinement_network': False, + 'dynamic_bias': True +} +options['modelOptions'] = modelOptions + +datasetOptions = { + 'batch_size': options['batch_size'], + 'image_size': options['image_dim'], + 'num_frames': modelOptions['input_seqlen'] + modelOptions['target_seqlen'] +} +options['datasetOptions'] = datasetOptions + +train.train(options) diff --git a/experiment_steerableFilter.py b/experiment_steerableFilter.py new file mode 100644 index 0000000..222bb3f --- /dev/null +++ b/experiment_steerableFilter.py @@ -0,0 +1,45 @@ +import matplotlib +matplotlib.use('Agg') # Must be before importing matplotlib.pyplot or pylab! + +import train + +options = { + # global setup settings, and checkpoints + 'name': 'steerableFilter', + 'seed': 123, + 'checkpoint_output_directory': 'checkpoints', + + # model and dataset + 'dataset_file': 'datasets.dataset_steerableFilter', + 'model_file': 'models.model_steerableFilter', + 'pretrained_model_path': None, + + # training parameters + 'image_dim': 64, + 'batch_size': 64, + 'loss': 'squared_error', + 'learning_rate': 1e-3, + 'decay_after': 10, + 'num_epochs': 50, + 'batches_per_epoch': 100, + 'save_after': 10 +} + +modelOptions = { + 'batch_size': options['batch_size'], + 'npx': options['image_dim'], + 'input_seqlen': 2, + 'target_seqlen': 1, + 'dynamic_filter_size': (9, 9) +} +options['modelOptions'] = modelOptions + +datasetOptions = { + 'batch_size': options['batch_size'], + 'image_size': options['image_dim'], + 'num_frames': modelOptions['input_seqlen'] + modelOptions['target_seqlen'], + 'mode': 'standard' +} +options['datasetOptions'] = datasetOptions + +train.train(options) \ No newline at end of file diff --git a/experiment_stereoPrediction.py b/experiment_stereoPrediction.py new file mode 100644 index 0000000..c49ac25 --- /dev/null +++ b/experiment_stereoPrediction.py @@ -0,0 +1,45 @@ +import matplotlib +matplotlib.use('Agg') # Must be before importing matplotlib.pyplot or pylab! + +import train + +options = { + # global setup settings, and checkpoints + 'name': 'stereoPrediction', + 'seed': 123, + 'checkpoint_output_directory': 'checkpoints', + + # model and dataset + 'dataset_file': 'datasets.dataset_stereoPrediction', + 'model_file': 'models.model_stereoPrediction', + 'pretrained_model_path': None, + + # training parameters + 'image_dim': 64, + 'batch_size': 16, + 'loss': 'squared_error', + 'learning_rate': 1e-3, + 'decay_after': 20, + 'num_epochs': 100, + 'batches_per_epoch': 2 * 100, + 'save_after': 10 +} + +modelOptions = { + 'batch_size': options['batch_size'], + 'npx': options['image_dim'], + 'input_seqlen': 1, + 'target_seqlen': 1, + 'buffer_len': 1, + 'dynamic_filter_size': (13, 1) +} +options['modelOptions'] = modelOptions + +datasetOptions = { + 'batch_size': options['batch_size'], + 'image_size': options['image_dim'], + 'num_frames': modelOptions['input_seqlen'] + modelOptions['target_seqlen'] +} +options['datasetOptions'] = datasetOptions + +train.train(options) \ No newline at end of file diff --git a/experiment_stereoPrediction_baseline.py b/experiment_stereoPrediction_baseline.py new file mode 100644 index 0000000..b56af4b --- /dev/null +++ b/experiment_stereoPrediction_baseline.py @@ -0,0 +1,47 @@ +import matplotlib +matplotlib.use('Agg') # Must be before importing matplotlib.pyplot or pylab! + +# import train + +import train_baseline as train + +options = { + # global setup settings, and checkpoints + 'name': 'stereoPrediction_baseline', + 'seed': 123, + 'checkpoint_output_directory': 'checkpoints', + + # model and dataset + 'dataset_file': 'datasets.dataset_stereoPrediction', + 'model_file': 'models.model_stereoPrediction_baseline', + 'pretrained_model_path': None, + + # training parameters + 'image_dim': 64, + 'batch_size': 16, + 'loss': 'squared_error', + 'learning_rate': 1e-3, + 'decay_after': 20, + 'num_epochs': 100, + 'batches_per_epoch': 2 * 100, + 'save_after': 10 +} + +modelOptions = { + 'batch_size': options['batch_size'], + 'npx': options['image_dim'], + 'input_seqlen': 1, + 'target_seqlen': 1, + 'buffer_len': 1, + 'dynamic_filter_size': (13, 1) +} +options['modelOptions'] = modelOptions + +datasetOptions = { + 'batch_size': options['batch_size'], + 'image_size': options['image_dim'], + 'num_frames': modelOptions['input_seqlen'] + modelOptions['target_seqlen'] +} +options['datasetOptions'] = datasetOptions + +train.train(options) \ No newline at end of file diff --git a/layers/__init__.py b/layers/__init__.py new file mode 100755 index 0000000..8d1c8b6 --- /dev/null +++ b/layers/__init__.py @@ -0,0 +1 @@ + diff --git a/layers/dynamic_filter_layer.py b/layers/dynamic_filter_layer.py new file mode 100755 index 0000000..65a245f --- /dev/null +++ b/layers/dynamic_filter_layer.py @@ -0,0 +1,211 @@ +# -*- coding: utf-8 -*- + + +import numpy as np +from collections import OrderedDict +import sys +import os + +import theano +import theano.tensor as T + +import lasagne +from lasagne import layers +from lasagne import init +from lasagne import nonlinearities +from lasagne.layers.base import Layer, MergeLayer + +from lasagne.layers.conv import conv_output_length +from lasagne.layers.pool import pool_output_length +from lasagne.utils import as_tuple + +from theano.sandbox.cuda import dnn # xu + +__all__ = [ + "DynamicFilterLayer" +] + +# class Deconv2DLayer(lasagne.layers.Layer): +# """ deconv layer from Jan Schlueter """ +# def __init__(self, incoming, num_filters, filter_size, stride=1, pad=0,W=init.GlorotUniform(), +# b=init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, **kwargs): +# super(Deconv2DLayer, self).__init__(incoming, **kwargs) +# self.num_filters = num_filters +# self.filter_size = lasagne.utils.as_tuple(filter_size, 2, int) +# self.stride = lasagne.utils.as_tuple(stride, 2, int) +# self.pad = lasagne.utils.as_tuple(pad, 2, int) +# self.W = self.add_param(W,(self.input_shape[1], num_filters) + self.filter_size, name='W') +# if b is None: +# self.b = None +# else: +# if self.untie_biases: +# biases_shape = (num_filters, self.output_shape[2], +# self.output_shape[3]) +# else: +# biases_shape = (num_filters,) +# self.b = self.add_param(b, biases_shape, name="b", +# regularizable=False) +# if nonlinearity is None: +# nonlinearity = lasagne.nonlinearities.identity +# self.nonlinearity = nonlinearity +# +# def get_output_shape_for(self, input_shape): +# shape = tuple(i*s - 2*p + f - 1 +# for i, s, p, f in zip(input_shape[2:], +# self.stride, +# self.pad, +# self.filter_size)) +# return (input_shape[0], self.num_filters) + shape +# +# def get_output_for(self, input, **kwargs): +# op = T.nnet.abstract_conv.AbstractConv2d_gradInputs( +# imshp=self.output_shape, +# kshp=(self.input_shape[1], self.num_filters) + self.filter_size, +# subsample=self.stride, border_mode=self.pad) +# conved = op(self.W, input, self.output_shape[2:]) +# if self.b is not None: +# conved += self.b.dimshuffle('x', 0, 'x', 'x') +# return self.nonlinearity(conved) +# # +# class DynConvLayer(MergeLayer): +# ''' +# input : X (batch_num, 1, w,h), W (batch,nf,wk,hk) +# output : +# ''' +# def __init__(self, incomings, stride=1, pad=0, +# nonlinearity=nonlinearities.rectify, flip_filters=False, **kwargs): +# super(DynConvLayer, self).__init__(incomings, **kwargs) +# if nonlinearity is None: +# self.nonlinearity = nonlinearities.identity +# else: +# self.nonlinearity = nonlinearity +# n = len(self.input_shapes[0]) - 2 # or n=2 +# self.n = n +# self.nc = self.input_shapes[0][1] # nc=1 +# self.nf = self.input_shapes[1][1] +# self.filter_size = as_tuple(self.input_shapes[1][2], n, int) +# self.flip_filters = flip_filters +# self.stride = as_tuple(stride, n, int) +# +# if pad == 'same': +# if any(s % 2 == 0 for s in self.filter_size): +# raise NotImplementedError( +# '`same` padding requires odd filter size.') +# if pad == 'valid': +# self.pad = as_tuple(0, n) +# elif pad in ('full', 'same'): +# self.pad = pad +# else: +# self.pad = as_tuple(pad, n, int) +# +# def get_output_shape_for(self, input_shapes): +# # refer to lasagne.dnn layer +# pad = self.pad if isinstance(self.pad, tuple) else (self.pad,) * self.n +# batchsize = input_shapes[0][0] +# +# return ((batchsize, 1, self.nf) + +# tuple(conv_output_length(input, filter, stride, p) +# for input, filter, stride, p +# in zip(input_shapes[0][2:], self.filter_size, +# self.stride, pad))) +# +# def get_output_for(self, inputs, **kwargs): +# # define a function and apply the same operation to each sample +# # using scan +# conv_mode = 'conv' if self.flip_filters else 'cross' +# border_mode = self.pad +# if border_mode == 'same': +# border_mode = tuple(s // 2 for s in self.filter_size) +# +# def onesample_conv(x,w): +# x_ = T.reshape(x, (1, self.nc, x.shape[1], x.shape[2])) +# w_ = T.reshape(w, (self.nf, self.nc, w.shape[1], w.shape[2])) +# conved = dnn.dnn_conv(img=x_, +# kerns=w_, +# subsample=self.stride, +# border_mode=border_mode, +# conv_mode=conv_mode +# ) +# return conved +# output,_ = theano.scan(onesample_conv, sequences=[inputs[0],inputs[1]]) +# return self.nonlinearity(output) +# # +# class LocalExpandLayer(lasagne.layers.Layer): +# ''' +# used later in order to implement local connected layer +# input : X (batch_num, 1, w,h) +# output : Y (batch_num, num, w,h) +# ''' +# def __init__(self, incoming, filter_size, stride=1, pad=0, flip_filters=False, **kwargs): +# super(LocalExpandLayer, self).__init__(incoming, **kwargs) +# +# self.filter_size = lasagne.utils.as_tuple(filter_size, 2, int) +# self.stride = lasagne.utils.as_tuple(stride, 2, int) +# self.pad = lasagne.utils.as_tuple(pad, 2, int) +# self.flip_filters = flip_filters +# +# def get_output_shape_for(self, input_shape): +# shape = (input_shape[0], np.prod(self.filter_size), input_shape[2], input_shape[3]) +# return shape +# +# def get_output_for(self, input, **kwargs): +# conv_mode = 'conv' if self.flip_filters else 'cross' +# border_mode = self.pad +# if border_mode == 'same': +# border_mode = tuple(s // 2 for s in self.filter_size) +# filter_size = self.filter_size +# filter_localexpand_np = np.reshape(np.eye(np.prod(filter_size),np.prod(filter_size)), (np.prod(filter_size), 1, filter_size[0],filter_size[1])) +# filter_localexpand = T.cast(theano.shared(filter_localexpand_np), 'floatX') +# input_localexpanded = conved = dnn.dnn_conv(img=input, kerns=filter_localexpand, subsample=self.stride, border_mode=border_mode, conv_mode=conv_mode) +# +# return input_localexpanded + +class DynamicFilterLayer(MergeLayer): + def __init__(self, incomings, filter_size, stride=1, pad=0, flip_filters=False, grouping=False, **kwargs): + super(DynamicFilterLayer, self).__init__(incomings, **kwargs) + + self.filter_size = lasagne.utils.as_tuple(filter_size, 3, int) + self.stride = lasagne.utils.as_tuple(stride, 2, int) + self.pad = lasagne.utils.as_tuple(pad, 2, int) + self.flip_filters = flip_filters + self.grouping = grouping + + if self.grouping: + assert(filter_size[2] == 1) + + def get_output_shape_for(self, input_shapes): + if self.grouping: + shape = (input_shapes[0][0], input_shapes[0][1], input_shapes[0][2], input_shapes[0][3]) + else: + shape = (input_shapes[0][0], 1, input_shapes[0][2], input_shapes[0][3]) + return shape + + def get_output_for(self, input, **kwargs): + image = input[0] + filters = input[1] + + conv_mode = 'conv' if self.flip_filters else 'cross' + border_mode = self.pad + if border_mode == 'same': + border_mode = tuple(s // 2 for s in self.filter_size) + filter_size = self.filter_size + + if self.grouping: + filter_localexpand_np = np.reshape(np.eye(np.prod(filter_size), np.prod(filter_size)), (np.prod(filter_size), 1, filter_size[0], filter_size[1])) + filter_localexpand = T.cast(theano.shared(filter_localexpand_np), 'floatX') + + outputs = [] + for i in range(3): + input_localexpanded = dnn.dnn_conv(img=image[:,[i],:,:], kerns=filter_localexpand, subsample=self.stride, border_mode=border_mode, conv_mode=conv_mode) + output = T.sum(input_localexpanded * filters, axis=1, keepdims=True) + outputs.append(output) + + output = T.concatenate(outputs, axis=1) + else: + filter_localexpand_np = np.reshape(np.eye(np.prod(filter_size), np.prod(filter_size)), (np.prod(filter_size), filter_size[2], filter_size[0], filter_size[1])) + filter_localexpand = T.cast(theano.shared(filter_localexpand_np), 'floatX') + input_localexpanded = dnn.dnn_conv(img=image, kerns=filter_localexpand, subsample=self.stride, border_mode=border_mode, conv_mode=conv_mode) + output = input_localexpanded * filters + output = T.sum(output, axis=1, keepdims=True) + + return output \ No newline at end of file diff --git a/layers/dynamic_filter_layer_tensorflow.py b/layers/dynamic_filter_layer_tensorflow.py new file mode 100755 index 0000000..5920542 --- /dev/null +++ b/layers/dynamic_filter_layer_tensorflow.py @@ -0,0 +1,32 @@ +def dfn(inputs, + filters, + kernel_size, + stride=1, + padding='SAME', + scope=None, + reuse=None): + + with tf.variable_op_scope([inputs, filters], scope, 'DFN', reuse=reuse): + kernel_h, kernel_w = _two_element_tuple(kernel_size) + stride_h, stride_w = _two_element_tuple(stride) + num_filters_in = inputs.get_shape()[-1].value + assert num_filters_in <= 3 + + filter_size_prod = kernel_h * kernel_w * 1 + reshape_filters = np.reshape( + np.eye(filter_size_prod, filter_size_prod), + (kernel_h, kernel_w, 1, filter_size_prod) + ) + reshape_filters = tf.constant(reshape_filters, dtype=tf.float32) + + outputs = [] + for i in range(num_filters_in): + inputs_channel = tf.slice(inputs, [0,0,0,i], [-1,-1,-1,1]) + inputs_expanded = tf.nn.conv2d(inputs_channel, reshape_filters, [1, stride_h, stride_w, 1], padding=padding) + output = tf.mul(inputs_expanded, filters) + output = tf.reduce_sum(output, reduction_indices=-1, keep_dims=True) + outputs.append(output) + + outputs = tf.concat(3, outputs) + + return outputs \ No newline at end of file diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/model_bouncingMnistOriginal.py b/models/model_bouncingMnistOriginal.py new file mode 100644 index 0000000..1c9bbc7 --- /dev/null +++ b/models/model_bouncingMnistOriginal.py @@ -0,0 +1,145 @@ +import theano +import theano.tensor as T +import numpy as np +import os +import json +import socket +import cPickle as pickle +import argparse +import time +import datetime +import code + +from layers.dynamic_filter_layer import DynamicFilterLayer + +# lasagne +import lasagne +from lasagne.layers import EmbeddingLayer, DenseLayer, ReshapeLayer, ConcatLayer, Gate, LSTMLayer, DropoutLayer, SliceLayer, InputLayer, ElemwiseMergeLayer, NonlinearityLayer, FeaturePoolLayer, DimshuffleLayer, Upscale2DLayer, ElemwiseSumLayer, BiasLayer +from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer +from lasagne.layers.dnn import MaxPool2DDNNLayer as PoolLayer +from lasagne.updates import rmsprop +#from lasagne.regularization import +from lasagne.nonlinearities import softmax, identity, sigmoid, tanh, rectify, leaky_rectify +from lasagne.init import Uniform, Constant, Normal, HeUniform +from lasagne.utils import create_param + +class Model(object): + """ model initialization """ + def __init__(self, npx=64, batch_size=16, input_seqlen=3, target_seqlen=3, buffer_len=1, dynamic_filter_size=(9,9), refinement_network=False, dynamic_bias=False): + self.npx = npx + self.batch_size = batch_size + self.input_seqlen = input_seqlen + self.target_seqlen = target_seqlen + self.nInputs = buffer_len + self.dynamic_filter_size = dynamic_filter_size + self.refinement_network = refinement_network + self.dynamic_bias = dynamic_bias + + def build_model(self, input_batch): + + ## initialize shared parameters + Ws = [] + bs = [] + nLayersWithParams = 13 + if self.refinement_network: + nLayersWithParams = nLayersWithParams + 4 + for i in range(nLayersWithParams): + W = HeUniform() + Ws.append(W) + b = Constant(0.0) + bs.append(b) + hidden_state = InputLayer(input_var=np.zeros((self.batch_size, 128, self.npx/2, self.npx/2), dtype=np.float32), shape=(self.batch_size, 128, self.npx/2, self.npx/2)) + + ## get inputs + inputs = InputLayer(input_var=input_batch, shape=(None, self.input_seqlen, self.npx, self.npx)) + # inputs = InputLayer(input_var=input_batch, shape=(None, 1, self.npx, self.npx, self.input_seqlen)) + # inputs = DimshuffleLayer(inputs, (0, 4, 2, 3, 1)) + outputs = [] + for i in range(self.input_seqlen - self.nInputs + self.target_seqlen): + input = SliceLayer(inputs, indices=slice(0,self.nInputs), axis=1) + output, hidden_state, filters = self.predict(input, hidden_state, Ws, bs) + ## FIFO operation. + inputs = SliceLayer(inputs, indices=slice(1, None), axis=1) + if i == self.input_seqlen - self.nInputs: + filtersToVisualize = filters + if i >= self.input_seqlen - self.nInputs: + inputs = ConcatLayer([inputs, output], axis=1) + outputs.append(output) + + + return output, outputs, filtersToVisualize + + def predict(self, input, hidden_state, Ws, bs): + + npx = self.npx # image size + nc = self.input_seqlen + filter_size = self.dynamic_filter_size[0] + f = 0 + + ############################### + # filter-generating network # + ############################### + ## encoder + output = ConvLayer(input, num_filters=32, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=32, filter_size=(3,3), stride=(2,2), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + ## mid + output = ConvLayer(output, num_filters=128, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + hidden = ConvLayer(hidden_state, num_filters=128, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = hidden.W; bs[f] = hidden.b; f = f+1 + hidden = ConvLayer(hidden, num_filters=128, filter_size=(3, 3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = hidden.W; bs[f] = hidden.b; f = f+1 + output = ElemwiseSumLayer([output, hidden]) + hidden_state = output + + ## decoder + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = Upscale2DLayer(output, scale_factor = 2) + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + output = ConvLayer(output, num_filters=128, filter_size=(1,1), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + ## filter-generating layers + l_filter = ConvLayer(output, num_filters=filter_size**2 + self.dynamic_bias, filter_size=(1,1), stride=(1,1), pad=(0,0), W=Ws[f], b=bs[f], nonlinearity=identity); Ws[f] = l_filter.W; bs[f] = l_filter.b; f = f+1 + + ######################### + # transformer network # + ######################### + ## get inputs + output = SliceLayer(input, indices=slice(self.nInputs-1, self.nInputs), axis=1) # select the last (most recent) frame from the inputs + + ## add a bias + if self.dynamic_bias: + filters_biases = SliceLayer(l_filter, indices=slice(filter_size ** 2, filter_size ** 2 + 1), axis=1) + output = ConcatLayer([output, filters_biases]) + output = FeaturePoolLayer(output, pool_size=2, pool_function=theano.tensor.sum) + + ## dynamic convolution + filters = SliceLayer(l_filter, indices=slice(0, filter_size ** 2), axis=1) + + # filters = FeaturePoolLayer(filters, pool_size=9*9, pool_function=theano.tensor.nnet.softmax) + filters = DimshuffleLayer(filters, (0, 2, 3, 1)) + filters = ReshapeLayer(filters, shape=(-1, filter_size ** 2)) + filters = NonlinearityLayer(filters, nonlinearity=softmax) + filters = ReshapeLayer(filters, shape=(-1, npx, npx, filter_size ** 2)) + filters = DimshuffleLayer(filters, (0, 3, 1, 2)) + + output_dynconv = DynamicFilterLayer([output, filters], filter_size=(filter_size,filter_size,1), pad=(filter_size//2, filter_size//2)) + + ######################## + # refinement network # + ######################## + if self.refinement_network: + output = ConcatLayer([output_dynconv, input]) + output = ConvLayer(output, num_filters=32, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=32, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=1, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ElemwiseSumLayer([output_dynconv, output]) # this is a residual connection + else: + output = output_dynconv + + return output, hidden_state, filters diff --git a/models/model_bouncingMnistOriginal_baseline.py b/models/model_bouncingMnistOriginal_baseline.py new file mode 100644 index 0000000..a19c31c --- /dev/null +++ b/models/model_bouncingMnistOriginal_baseline.py @@ -0,0 +1,125 @@ +import theano +import theano.tensor as T +import numpy as np +import os +import json +import socket +import cPickle as pickle +import argparse +import time +import datetime +import code + +import sys; +sys.setrecursionlimit(40000) + +from layers.dynamic_filter_layer import DynamicFilterLayer + +# lasagne +import lasagne +from lasagne.layers import EmbeddingLayer, DenseLayer, ReshapeLayer, ConcatLayer, Gate, LSTMLayer, DropoutLayer, SliceLayer, InputLayer, ElemwiseMergeLayer, NonlinearityLayer, FeaturePoolLayer, DimshuffleLayer, Upscale2DLayer, ElemwiseSumLayer, BiasLayer +from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer +from lasagne.layers.dnn import MaxPool2DDNNLayer as PoolLayer +from lasagne.updates import rmsprop +#from lasagne.regularization import +from lasagne.nonlinearities import softmax, identity, sigmoid, tanh, rectify, leaky_rectify +from lasagne.init import Uniform, Constant, Normal, HeUniform +from lasagne.utils import create_param + +class Model(object): + """ model initialization """ + def __init__(self, npx=64, batch_size=16, input_seqlen=3, target_seqlen=3, buffer_len=1, dynamic_filter_size=(9,9), refinement_network=False, dynamic_bias=False): + self.npx = npx + self.batch_size = batch_size + self.input_seqlen = input_seqlen + self.target_seqlen = target_seqlen + self.nInputs = buffer_len + self.dynamic_filter_size = dynamic_filter_size + self.refinement_network = refinement_network + self.dynamic_bias = dynamic_bias + + def build_model(self, input_batch): + + ## initialize shared parameters + Ws = [] + bs = [] + nLayersWithParams = 14 + if self.refinement_network: + nLayersWithParams = nLayersWithParams + 4 + for i in range(nLayersWithParams): + W = HeUniform() + Ws.append(W) + b = Constant(0.0) + bs.append(b) + hidden_state = InputLayer(input_var=np.zeros((self.batch_size, 128, self.npx/2, self.npx/2), dtype=np.float32), shape=(self.batch_size, 128, self.npx/2, self.npx/2)) + + ## get inputs + inputs = InputLayer(input_var=input_batch, shape=(None, self.input_seqlen, self.npx, self.npx)) + # inputs = InputLayer(input_var=input_batch, shape=(None, 1, self.npx, self.npx, self.input_seqlen)) + # inputs = DimshuffleLayer(inputs, (0, 4, 2, 3, 1)) + outputs = [] + for i in range(self.input_seqlen - self.nInputs + self.target_seqlen): + input = SliceLayer(inputs, indices=slice(0,self.nInputs), axis=1) + output, hidden_state = self.predict(input, hidden_state, Ws, bs) + ## FIFO operation. + inputs = SliceLayer(inputs, indices=slice(1, None), axis=1) + if i >= self.input_seqlen - self.nInputs: + inputs = ConcatLayer([inputs, output], axis=1) + outputs.append(output) + + + return output, outputs + + def predict(self, input, hidden_state, Ws, bs): + + npx = self.npx # image size + nc = self.input_seqlen + filter_size = self.dynamic_filter_size[0] + f = 0 + + ############################### + # filter-generating network # + ############################### + ## encoder + output = ConvLayer(input, num_filters=32, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=32, filter_size=(3,3), stride=(2,2), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + ## mid + output = ConvLayer(output, num_filters=128, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + hidden = ConvLayer(hidden_state, num_filters=128, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = hidden.W; bs[f] = hidden.b; f = f+1 + hidden = ConvLayer(hidden, num_filters=128, filter_size=(3, 3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = hidden.W; bs[f] = hidden.b; f = f+1 + output = ElemwiseSumLayer([output, hidden]) + hidden_state = output + + ## decoder + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = Upscale2DLayer(output, scale_factor = 2) + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + output = ConvLayer(output, num_filters=128, filter_size=(1,1), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + ## filter-generating layers + l_filter = ConvLayer(output, num_filters=filter_size**2 + self.dynamic_bias, filter_size=(1,1), stride=(1,1), pad=(0,0), W=Ws[f], b=bs[f], nonlinearity=identity); Ws[f] = l_filter.W; bs[f] = l_filter.b; f = f+1 + + # output_dynconv = DynamicFilterLayer([output, filters], filter_size=(filter_size,filter_size,1), pad=(filter_size//2, filter_size//2)) + output_dynconv = ConvLayer(l_filter, num_filters=1, filter_size=(1,1), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=identity); Ws[f] = output_dynconv.W; bs[f] = output_dynconv.b; f = f+1 + + ######################## + # refinement network # + ######################## + if self.refinement_network: + output = ConcatLayer([output_dynconv, input]) + output = ConvLayer(output, num_filters=32, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=32, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=1, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ElemwiseSumLayer([output_dynconv, output]) # this is a residual connection + else: + output = output_dynconv + + return output, hidden_state diff --git a/models/model_highwayDriving.py b/models/model_highwayDriving.py new file mode 100644 index 0000000..88ccb96 --- /dev/null +++ b/models/model_highwayDriving.py @@ -0,0 +1,146 @@ +import theano +import theano.tensor as T +import numpy as np +import os +import json +import socket +import cPickle as pickle +import argparse +import time +import datetime +import code + +from layers.dynamic_filter_layer import DynamicFilterLayer + +# lasagne +import lasagne +from lasagne.layers import EmbeddingLayer, DenseLayer, ReshapeLayer, ConcatLayer, Gate, LSTMLayer, DropoutLayer, SliceLayer, InputLayer, ElemwiseMergeLayer, NonlinearityLayer, FeaturePoolLayer, DimshuffleLayer, Upscale2DLayer, ElemwiseSumLayer, BiasLayer +from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer +from lasagne.layers.dnn import MaxPool2DDNNLayer as PoolLayer +from lasagne.updates import rmsprop +#from lasagne.regularization import +from lasagne.nonlinearities import softmax, identity, sigmoid, tanh, rectify, leaky_rectify +from lasagne.init import Uniform, Constant, Normal, HeUniform +from lasagne.utils import create_param + +class Model(object): + """ model initialization """ + def __init__(self, npx=64, batch_size=16, input_seqlen=3, target_seqlen=3, buffer_len=1, dynamic_filter_size=(9,9), refinement_network=False, dynamic_bias=False): + self.npx = npx + self.batch_size = batch_size + self.input_seqlen = input_seqlen + self.target_seqlen = target_seqlen + self.nInputs = buffer_len + self.dynamic_filter_size = dynamic_filter_size + self.refinement_network = refinement_network + self.dynamic_bias = dynamic_bias + + def build_model(self, input_batch): + + ## initialize shared parameters + Ws = [] + bs = [] + nLayersWithParams = 13 + if self.refinement_network: + nLayersWithParams = nLayersWithParams + 4 + for i in range(nLayersWithParams): + W = HeUniform() + Ws.append(W) + b = Constant(0.0) + bs.append(b) + hidden_state = InputLayer(input_var=np.zeros((self.batch_size, 64, self.npx/2, self.npx/2), dtype=np.float32), shape=(self.batch_size, 64, self.npx/2, self.npx/2)) + + ## get inputs + inputs = InputLayer(input_var=input_batch, shape=(None, self.input_seqlen, self.npx, self.npx)) + # inputs = InputLayer(input_var=input_batch, shape=(None, 1, self.npx, self.npx, self.input_seqlen)) + # inputs = DimshuffleLayer(inputs, (0, 4, 2, 3, 1)) + outputs = [] + for i in range(self.input_seqlen - self.nInputs + self.target_seqlen): + input = SliceLayer(inputs, indices=slice(0,self.nInputs), axis=1) + output, hidden_state, filters = self.predict(input, hidden_state, Ws, bs) + ## FIFO operation. + inputs = SliceLayer(inputs, indices=slice(1, None), axis=1) + + if i == self.input_seqlen - self.nInputs: + filtersToVisualize = filters + if i >= self.input_seqlen - self.nInputs: + inputs = ConcatLayer([inputs, output], axis=1) + outputs.append(output) + + + return output, outputs, filtersToVisualize + + def predict(self, input, hidden_state, Ws, bs): + + npx = self.npx # image size + nc = self.input_seqlen + filter_size = self.dynamic_filter_size[0] + f = 0 + + ############################### + # filter-generating network # + ############################### + ## encoder + output = ConvLayer(input, num_filters=32, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify, untie_biases=True); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=32, filter_size=(3,3), stride=(2,2), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + # output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + ## mid + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + hidden = ConvLayer(hidden_state, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = hidden.W; bs[f] = hidden.b; f = f+1 + hidden = ConvLayer(hidden, num_filters=64, filter_size=(3, 3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = hidden.W; bs[f] = hidden.b; f = f+1 + output = ElemwiseSumLayer([output, hidden]) + hidden_state = output + + ## decoder + # output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = Upscale2DLayer(output, scale_factor = 2) + output = ConvLayer(output, num_filters=32, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + output = ConvLayer(output, num_filters=128, filter_size=(1,1), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + ## filter-generating layers + l_filter = ConvLayer(output, num_filters=filter_size**2 + self.dynamic_bias, filter_size=(1,1), stride=(1,1), pad=(0,0), W=Ws[f], b=bs[f], nonlinearity=identity); Ws[f] = l_filter.W; bs[f] = l_filter.b; f = f+1 + + ######################### + # transformer network # + ######################### + ## get inputs + output = SliceLayer(input, indices=slice(self.nInputs-1, self.nInputs), axis=1) # select the last (most recent) frame from the inputs + + ## add a bias + if self.dynamic_bias: + filters_biases = SliceLayer(l_filter, indices=slice(filter_size ** 2, filter_size ** 2 + 1), axis=1) + output = ConcatLayer([output, filters_biases]) + output = FeaturePoolLayer(output, pool_size=2, pool_function=theano.tensor.sum) + + ## dynamic convolution + filters = SliceLayer(l_filter, indices=slice(0, filter_size ** 2), axis=1) + + # filters = FeaturePoolLayer(filters, pool_size=9*9, pool_function=theano.tensor.nnet.softmax) + filters = DimshuffleLayer(filters, (0, 2, 3, 1)) + filters = ReshapeLayer(filters, shape=(-1, filter_size ** 2)) + filters = NonlinearityLayer(filters, nonlinearity=softmax) + filters = ReshapeLayer(filters, shape=(-1, npx, npx, filter_size ** 2)) + filters = DimshuffleLayer(filters, (0, 3, 1, 2)) + + output_dynconv = DynamicFilterLayer([output, filters], filter_size=(filter_size,filter_size,1), pad=(filter_size//2, filter_size//2)) + + ######################## + # refinement network # + ######################## + if self.refinement_network: + output = ConcatLayer([output_dynconv, input]) + output = ConvLayer(output, num_filters=32, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=32, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=1, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ElemwiseSumLayer([output_dynconv, output]) # this is a residual connection + else: + output = output_dynconv + + return output, hidden_state, filters diff --git a/models/model_highwayDriving_b.py b/models/model_highwayDriving_b.py new file mode 100644 index 0000000..fb4008c --- /dev/null +++ b/models/model_highwayDriving_b.py @@ -0,0 +1,146 @@ +import theano +import theano.tensor as T +import numpy as np +import os +import json +import socket +import cPickle as pickle +import argparse +import time +import datetime +import code + +from layers.dynamic_filter_layer import DynamicFilterLayer + +# lasagne +import lasagne +from lasagne.layers import EmbeddingLayer, DenseLayer, ReshapeLayer, ConcatLayer, Gate, LSTMLayer, DropoutLayer, SliceLayer, InputLayer, ElemwiseMergeLayer, NonlinearityLayer, FeaturePoolLayer, DimshuffleLayer, Upscale2DLayer, ElemwiseSumLayer, BiasLayer +from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer +from lasagne.layers.dnn import MaxPool2DDNNLayer as PoolLayer +from lasagne.updates import rmsprop +#from lasagne.regularization import +from lasagne.nonlinearities import softmax, identity, sigmoid, tanh, rectify, leaky_rectify +from lasagne.init import Uniform, Constant, Normal, HeUniform +from lasagne.utils import create_param + +class Model(object): + """ model initialization """ + def __init__(self, npx=64, batch_size=16, input_seqlen=3, target_seqlen=3, buffer_len=1, dynamic_filter_size=(9,9), refinement_network=False, dynamic_bias=False): + self.npx = npx + self.batch_size = batch_size + self.input_seqlen = input_seqlen + self.target_seqlen = target_seqlen + self.nInputs = buffer_len + self.dynamic_filter_size = dynamic_filter_size + self.refinement_network = refinement_network + self.dynamic_bias = dynamic_bias + + def build_model(self, input_batch): + + ## initialize shared parameters + Ws = [] + bs = [] + nLayersWithParams = 13 + if self.refinement_network: + nLayersWithParams = nLayersWithParams + 4 + for i in range(nLayersWithParams): + W = HeUniform() + Ws.append(W) + b = Constant(0.0) + bs.append(b) + hidden_state = InputLayer(input_var=np.zeros((self.batch_size, 128, self.npx/2, self.npx/2), dtype=np.float32), shape=(self.batch_size, 128, self.npx/2, self.npx/2)) + + ## get inputs + inputs = InputLayer(input_var=input_batch, shape=(None, self.input_seqlen, self.npx, self.npx)) + # inputs = InputLayer(input_var=input_batch, shape=(None, 1, self.npx, self.npx, self.input_seqlen)) + # inputs = DimshuffleLayer(inputs, (0, 4, 2, 3, 1)) + outputs = [] + for i in range(self.input_seqlen - self.nInputs + self.target_seqlen): + input = SliceLayer(inputs, indices=slice(0,self.nInputs), axis=1) + output, hidden_state, filters = self.predict(input, hidden_state, Ws, bs) + ## FIFO operation. + inputs = SliceLayer(inputs, indices=slice(1, None), axis=1) + + if i == self.input_seqlen - self.nInputs: + filtersToVisualize = filters + if i >= self.input_seqlen - self.nInputs: + inputs = ConcatLayer([inputs, output], axis=1) + outputs.append(output) + + + return output, outputs, filtersToVisualize + + def predict(self, input, hidden_state, Ws, bs): + + npx = self.npx # image size + nc = self.input_seqlen + filter_size = self.dynamic_filter_size[0] + f = 0 + + ############################### + # filter-generating network # + ############################### + ## encoder + output = ConvLayer(input, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify, untie_biases=True); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(2,2), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=128, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + # output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + ## mid + output = ConvLayer(output, num_filters=128, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + hidden = ConvLayer(hidden_state, num_filters=128, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = hidden.W; bs[f] = hidden.b; f = f+1 + hidden = ConvLayer(hidden, num_filters=128, filter_size=(3, 3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = hidden.W; bs[f] = hidden.b; f = f+1 + output = ElemwiseSumLayer([output, hidden]) + hidden_state = output + + ## decoder + # output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=128, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = Upscale2DLayer(output, scale_factor = 2) + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=128, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + # output = ConvLayer(output, num_filters=128, filter_size=(1,1), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + ## filter-generating layers + l_filter = ConvLayer(output, num_filters=filter_size**2 + self.dynamic_bias, filter_size=(1,1), stride=(1,1), pad=(0,0), W=Ws[f], b=bs[f], nonlinearity=identity); Ws[f] = l_filter.W; bs[f] = l_filter.b; f = f+1 + + ######################### + # transformer network # + ######################### + ## get inputs + output = SliceLayer(input, indices=slice(self.nInputs-1, self.nInputs), axis=1) # select the last (most recent) frame from the inputs + + ## add a bias + if self.dynamic_bias: + filters_biases = SliceLayer(l_filter, indices=slice(filter_size ** 2, filter_size ** 2 + 1), axis=1) + output = ConcatLayer([output, filters_biases]) + output = FeaturePoolLayer(output, pool_size=2, pool_function=theano.tensor.sum) + + ## dynamic convolution + filters = SliceLayer(l_filter, indices=slice(0, filter_size ** 2), axis=1) + + # filters = FeaturePoolLayer(filters, pool_size=9*9, pool_function=theano.tensor.nnet.softmax) + filters = DimshuffleLayer(filters, (0, 2, 3, 1)) + filters = ReshapeLayer(filters, shape=(-1, filter_size ** 2)) + filters = NonlinearityLayer(filters, nonlinearity=softmax) + filters = ReshapeLayer(filters, shape=(-1, npx, npx, filter_size ** 2)) + filters = DimshuffleLayer(filters, (0, 3, 1, 2)) + + output_dynconv = DynamicFilterLayer([output, filters], filter_size=(filter_size,filter_size,1), pad=(filter_size//2, filter_size//2)) + + ######################## + # refinement network # + ######################## + if self.refinement_network: + output = ConcatLayer([output_dynconv, input]) + output = ConvLayer(output, num_filters=32, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=32, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=1, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ElemwiseSumLayer([output_dynconv, output]) # this is a residual connection + else: + output = output_dynconv + + return output, hidden_state, filters diff --git a/models/model_highwayDriving_baseline.py b/models/model_highwayDriving_baseline.py new file mode 100644 index 0000000..38647ce --- /dev/null +++ b/models/model_highwayDriving_baseline.py @@ -0,0 +1,124 @@ +import theano +import theano.tensor as T +import numpy as np +import os +import json +import socket +import cPickle as pickle +import argparse +import time +import datetime +import code + +from layers.dynamic_filter_layer import DynamicFilterLayer + +# lasagne +import lasagne +from lasagne.layers import EmbeddingLayer, DenseLayer, ReshapeLayer, ConcatLayer, Gate, LSTMLayer, DropoutLayer, SliceLayer, InputLayer, ElemwiseMergeLayer, NonlinearityLayer, FeaturePoolLayer, DimshuffleLayer, Upscale2DLayer, ElemwiseSumLayer, BiasLayer +from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer +from lasagne.layers.dnn import MaxPool2DDNNLayer as PoolLayer +from lasagne.updates import rmsprop +#from lasagne.regularization import +from lasagne.nonlinearities import softmax, identity, sigmoid, tanh, rectify, leaky_rectify +from lasagne.init import Uniform, Constant, Normal, HeUniform +from lasagne.utils import create_param + +class Model(object): + """ model initialization """ + def __init__(self, npx=64, batch_size=16, input_seqlen=3, target_seqlen=3, buffer_len=1, dynamic_filter_size=(9,9), refinement_network=False, dynamic_bias=False): + self.npx = npx + self.batch_size = batch_size + self.input_seqlen = input_seqlen + self.target_seqlen = target_seqlen + self.nInputs = buffer_len + self.dynamic_filter_size = dynamic_filter_size + self.refinement_network = refinement_network + self.dynamic_bias = dynamic_bias + + def build_model(self, input_batch): + + ## initialize shared parameters + Ws = [] + bs = [] + nLayersWithParams = 14 + if self.refinement_network: + nLayersWithParams = nLayersWithParams + 4 + for i in range(nLayersWithParams): + W = HeUniform() + Ws.append(W) + b = Constant(0.0) + bs.append(b) + hidden_state = InputLayer(input_var=np.zeros((self.batch_size, 64, self.npx/2, self.npx/2), dtype=np.float32), shape=(self.batch_size, 64, self.npx/2, self.npx/2)) + + ## get inputs + inputs = InputLayer(input_var=input_batch, shape=(None, self.input_seqlen, self.npx, self.npx)) + # inputs = InputLayer(input_var=input_batch, shape=(None, 1, self.npx, self.npx, self.input_seqlen)) + # inputs = DimshuffleLayer(inputs, (0, 4, 2, 3, 1)) + outputs = [] + for i in range(self.input_seqlen - self.nInputs + self.target_seqlen): + input = SliceLayer(inputs, indices=slice(0,self.nInputs), axis=1) + output, hidden_state = self.predict(input, hidden_state, Ws, bs) + ## FIFO operation. + inputs = SliceLayer(inputs, indices=slice(1, None), axis=1) + + # if i == self.input_seqlen - self.nInputs: + # filtersToVisualize = filters + if i >= self.input_seqlen - self.nInputs: + inputs = ConcatLayer([inputs, output], axis=1) + outputs.append(output) + + + return output, outputs + + def predict(self, input, hidden_state, Ws, bs): + + npx = self.npx # image size + nc = self.input_seqlen + filter_size = self.dynamic_filter_size[0] + f = 0 + + ############################### + # filter-generating network # + ############################### + ## encoder + output = ConvLayer(input, num_filters=32, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify, untie_biases=True); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=32, filter_size=(3,3), stride=(2,2), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + # output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + ## mid + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + hidden = ConvLayer(hidden_state, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = hidden.W; bs[f] = hidden.b; f = f+1 + hidden = ConvLayer(hidden, num_filters=64, filter_size=(3, 3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = hidden.W; bs[f] = hidden.b; f = f+1 + output = ElemwiseSumLayer([output, hidden]) + hidden_state = output + + ## decoder + # output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = Upscale2DLayer(output, scale_factor = 2) + output = ConvLayer(output, num_filters=32, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + output = ConvLayer(output, num_filters=128, filter_size=(1,1), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + ## filter-generating layers + l_filter = ConvLayer(output, num_filters=filter_size**2 + self.dynamic_bias, filter_size=(1,1), stride=(1,1), pad=(0,0), W=Ws[f], b=bs[f], nonlinearity=identity); Ws[f] = l_filter.W; bs[f] = l_filter.b; f = f+1 + + output_dynconv = ConvLayer(l_filter, num_filters=1, filter_size=(1,1), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=identity); Ws[f] = output_dynconv.W; bs[f] = output_dynconv.b; f = f+1 + + ######################## + # refinement network # + ######################## + if self.refinement_network: + output = ConcatLayer([output_dynconv, input]) + output = ConvLayer(output, num_filters=32, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=32, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=1, filter_size=(3, 3), stride=(1, 1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ElemwiseSumLayer([output_dynconv, output]) # this is a residual connection + else: + output = output_dynconv + + return output, hidden_state diff --git a/models/model_steerableFilter.py b/models/model_steerableFilter.py new file mode 100644 index 0000000..7cb4bbc --- /dev/null +++ b/models/model_steerableFilter.py @@ -0,0 +1,50 @@ +import theano +import theano.tensor as T +import numpy as np +import os +import json +import socket +import cPickle as pickle +import argparse +import time +import datetime +import code + +from layers.dynamic_filter_layer import DynamicFilterLayer + +# lasagne +import lasagne +from lasagne.layers import EmbeddingLayer, DenseLayer, ReshapeLayer, ConcatLayer, Gate, LSTMLayer, DropoutLayer, SliceLayer, InputLayer, ElemwiseMergeLayer, NonlinearityLayer, FeaturePoolLayer, DimshuffleLayer, Upscale2DLayer, ElemwiseSumLayer, BiasLayer +from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer +from lasagne.layers.dnn import MaxPool2DDNNLayer as PoolLayer +from lasagne.updates import rmsprop +#from lasagne.regularization import +from lasagne.nonlinearities import softmax, identity, sigmoid, tanh, rectify, leaky_rectify +from lasagne.init import Uniform, Constant, Normal, HeUniform +from lasagne.utils import create_param + +class Model(object): + """ model initialization """ + def __init__(self, npx=64, batch_size=32, input_seqlen=1, target_seqlen=1, dynamic_filter_size=(9,9)): + self.npx = npx + self.batch_size = batch_size + self.input_seqlen = input_seqlen + self.target_seqlen = target_seqlen + self.dynamic_filter_size = dynamic_filter_size + + def build_model(self, input_batch): + filter_size = self.dynamic_filter_size[0] + + ## get inputs + input = InputLayer(input_var=input_batch[:,[0],:,:], shape=(None, 1, self.npx, self.npx)) + theta = InputLayer(input_var=input_batch[:,[1],:,:], shape=(None, 1, self.npx, self.npx)) + # theta = ReshapeLayer(theta, shape=(self.batch_size, 1, 1, 1)) + + output = ConvLayer(theta, num_filters=64, filter_size=(1,1), stride=(1,1), pad='same', nonlinearity=leaky_rectify) + output = ConvLayer(output, num_filters=128, filter_size=(1, 1), stride=(1, 1), pad='same', nonlinearity=leaky_rectify) + filters = ConvLayer(output, num_filters=filter_size ** 2, filter_size=(1, 1), stride=(1, 1), pad='same', nonlinearity=identity) + + image = SliceLayer(input, indices=slice(0, 1), axis=1) + output = DynamicFilterLayer([image, filters], filter_size=(filter_size, filter_size, 1), pad=(filter_size // 2, filter_size // 2)) + + return output, [output], filters \ No newline at end of file diff --git a/models/model_stereoPrediction.py b/models/model_stereoPrediction.py new file mode 100644 index 0000000..03b718b --- /dev/null +++ b/models/model_stereoPrediction.py @@ -0,0 +1,131 @@ +import theano +import theano.tensor as T +import numpy as np +import os +import json +import socket +import cPickle as pickle +import argparse +import time +import datetime +import code + +from layers.dynamic_filter_layer import DynamicFilterLayer + +# lasagne +import lasagne +from lasagne.layers import EmbeddingLayer, DenseLayer, ReshapeLayer, ConcatLayer, Gate, LSTMLayer, DropoutLayer, SliceLayer, InputLayer, ElemwiseMergeLayer, NonlinearityLayer, FeaturePoolLayer, DimshuffleLayer, Upscale2DLayer, ElemwiseSumLayer, BiasLayer +from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer +from lasagne.layers.dnn import MaxPool2DDNNLayer as PoolLayer +from lasagne.updates import rmsprop +#from lasagne.regularization import +from lasagne.nonlinearities import softmax, identity, sigmoid, tanh, rectify, leaky_rectify +from lasagne.init import Uniform, Constant, Normal, HeUniform +from lasagne.utils import create_param + +class Model(object): + """ model initialization """ + def __init__(self, npx=64, batch_size=16, input_seqlen=1, target_seqlen=1, buffer_len=1, dynamic_filter_size=(9,9)): + assert input_seqlen == 1 and target_seqlen == 1 # we are only handling stereo here (color). + + self.npx = npx + self.batch_size = batch_size + self.input_seqlen = input_seqlen + self.target_seqlen = target_seqlen + self.nInputs = buffer_len + self.dynamic_filter_size = dynamic_filter_size + + """ build and compile model """ + + def build_model(self, input_batch): + + ## initialize shared parameters + Ws = [] + bs = [] + for i in range(14): + W = HeUniform() + Ws.append(W) + b = Constant(0.0) + bs.append(b) + hidden_state = InputLayer(input_var=np.zeros((self.batch_size, 128, self.npx/2, self.npx/2), dtype=np.float32), shape=(self.batch_size, 128, self.npx/2, self.npx/2)) + + ## get inputs + input = InputLayer(input_var=input_batch, shape=(None, 1, self.npx, self.npx)) + output, hidden_state, filters = self.predict(input, hidden_state, Ws, bs) + + return output, [output], filters + + def predict(self, input, hidden_state, Ws, bs): + + npx = self.npx # image size + filter_size = self.dynamic_filter_size[0] + f = 0 + + ############################### + # filter-generating network # + ############################### + ## rgb to gray + # output = ConvLayer(input, num_filters=1, filter_size=(1,1), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=None); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + ## encoder + output = ConvLayer(input, num_filters=32, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=32, filter_size=(3,3), stride=(2,2), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + ## mid + output = ConvLayer(output, num_filters=128, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify, untie_biases=True); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + # hidden = ConvLayer(hidden_state, num_filters=128, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = hidden.W; bs[f] = hidden.b; f = f+1 + # hidden = ConvLayer(hidden, num_filters=128, filter_size=(3, 3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = hidden.W; bs[f] = hidden.b; f = f+1 + # output = ElemwiseSumLayer([output, hidden]) + # hidden_state = output + + ## decoder + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = Upscale2DLayer(output, scale_factor = 2) + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + output = ConvLayer(output, num_filters=128, filter_size=(1,1), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + ## filter-generating layers + output = ConvLayer(output, num_filters=filter_size + 1, filter_size=(1,1), stride=(1,1), pad=(0,0), W=Ws[f], b=bs[f], nonlinearity=identity); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + filters = SliceLayer(output, indices=slice(0, filter_size), axis=1) + filters_biases = SliceLayer(output, indices=slice(filter_size, filter_size + 1), axis=1) + + # filters = FeaturePoolLayer(filters, pool_size=9*9, pool_function=theano.tensor.nnet.softmax) + filters = DimshuffleLayer(filters, (0,2,3,1)) + filters = ReshapeLayer(filters, shape=(-1, filter_size)) + filters = NonlinearityLayer(filters, nonlinearity=softmax) + filters = ReshapeLayer(filters, shape=(-1, npx, npx, filter_size)) + filters = DimshuffleLayer(filters, (0,3,1,2)) + + ######################### + # transformer network # + ######################### + ## get inputs + # output = SliceLayer(input, indices=slice(self.nInputs-1, self.nInputs), axis=1) # select the last (most recent) frame from the inputs + + ## add a bias + output = ConcatLayer([input, filters_biases]) + output = FeaturePoolLayer(output, pool_size=2, pool_function=theano.tensor.sum) + + ## dynamic convolution + output_dynconv = DynamicFilterLayer([output, filters], filter_size=(1,filter_size,1), pad=(1//2, filter_size//2)) + + # ######################## + # # refinement network # + # ######################## + # output = ConcatLayer([output_dynconv, input]) + # output = ConvLayer(output, num_filters=32, filter_size=(3, 3), stride=(1, 1), pad='same', W=HeUniform(), b=Constant(0.0), nonlinearity=rectify) + # output = ConvLayer(output, num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', W=HeUniform(), b=Constant(0.0), nonlinearity=rectify) + # output = ConvLayer(output, num_filters=32, filter_size=(3, 3), stride=(1, 1), pad='same', W=HeUniform(), b=Constant(0.0), nonlinearity=rectify) + # output = ConvLayer(output, num_filters=1, filter_size=(3, 3), stride=(1, 1), pad='same', W=HeUniform(), b=Constant(0.0), nonlinearity=rectify) + # output = ElemwiseSumLayer([output_dynconv, output]) # this is a residual connection + + output = output_dynconv + + return output, hidden_state, filters diff --git a/models/model_stereoPrediction_baseline.py b/models/model_stereoPrediction_baseline.py new file mode 100644 index 0000000..8ca013e --- /dev/null +++ b/models/model_stereoPrediction_baseline.py @@ -0,0 +1,97 @@ +import theano +import theano.tensor as T +import numpy as np +import os +import json +import socket +import cPickle as pickle +import argparse +import time +import datetime +import code + +from layers.dynamic_filter_layer import DynamicFilterLayer + +# lasagne +import lasagne +from lasagne.layers import EmbeddingLayer, DenseLayer, ReshapeLayer, ConcatLayer, Gate, LSTMLayer, DropoutLayer, SliceLayer, InputLayer, ElemwiseMergeLayer, NonlinearityLayer, FeaturePoolLayer, DimshuffleLayer, Upscale2DLayer, ElemwiseSumLayer, BiasLayer +from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer +from lasagne.layers.dnn import MaxPool2DDNNLayer as PoolLayer +from lasagne.updates import rmsprop +#from lasagne.regularization import +from lasagne.nonlinearities import softmax, identity, sigmoid, tanh, rectify, leaky_rectify +from lasagne.init import Uniform, Constant, Normal, HeUniform +from lasagne.utils import create_param + +class Model(object): + """ model initialization """ + def __init__(self, npx=64, batch_size=16, input_seqlen=1, target_seqlen=1, buffer_len=1, dynamic_filter_size=(9,9)): + assert input_seqlen == 1 and target_seqlen == 1 # we are only handling stereo here (color). + + self.npx = npx + self.batch_size = batch_size + self.input_seqlen = input_seqlen + self.target_seqlen = target_seqlen + self.nInputs = buffer_len + self.dynamic_filter_size = dynamic_filter_size + + """ build and compile model """ + + def build_model(self, input_batch): + + ## initialize shared parameters + Ws = [] + bs = [] + for i in range(14): + W = HeUniform() + Ws.append(W) + b = Constant(0.0) + bs.append(b) + hidden_state = InputLayer(input_var=np.zeros((self.batch_size, 128, self.npx/2, self.npx/2), dtype=np.float32), shape=(self.batch_size, 128, self.npx/2, self.npx/2)) + + ## get inputs + input = InputLayer(input_var=input_batch, shape=(None, 1, self.npx, self.npx)) + output, hidden_state = self.predict(input, hidden_state, Ws, bs) + + return output, [output] + + def predict(self, input, hidden_state, Ws, bs): + + npx = self.npx # image size + filter_size = self.dynamic_filter_size[0] + f = 0 + + ############################### + # filter-generating network # + ############################### + ## rgb to gray + # output = ConvLayer(input, num_filters=1, filter_size=(1,1), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=None); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + ## encoder + output = ConvLayer(input, num_filters=32, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=32, filter_size=(3,3), stride=(2,2), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + ## mid + output = ConvLayer(output, num_filters=128, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify, untie_biases=True); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + # hidden = ConvLayer(hidden_state, num_filters=128, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = hidden.W; bs[f] = hidden.b; f = f+1 + # hidden = ConvLayer(hidden, num_filters=128, filter_size=(3, 3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = hidden.W; bs[f] = hidden.b; f = f+1 + # output = ElemwiseSumLayer([output, hidden]) + # hidden_state = output + + ## decoder + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = Upscale2DLayer(output, scale_factor = 2) + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=64, filter_size=(3,3), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + output = ConvLayer(output, num_filters=128, filter_size=(1,1), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=leaky_rectify); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + ## filter-generating layers + output = ConvLayer(output, num_filters=filter_size + 1, filter_size=(1,1), stride=(1,1), pad=(0,0), W=Ws[f], b=bs[f], nonlinearity=identity); Ws[f] = output.W; bs[f] = output.b; f = f+1 + output = ConvLayer(output, num_filters=1, filter_size=(1,1), stride=(1,1), pad='same', W=Ws[f], b=bs[f], nonlinearity=identity); Ws[f] = output.W; bs[f] = output.b; f = f+1 + + return output, hidden_state diff --git a/train.py b/train.py new file mode 100644 index 0000000..fa9307a --- /dev/null +++ b/train.py @@ -0,0 +1,139 @@ + +# coding: utf-8 +# export THEANO_FLAGS="device=gpu0, floatX=float32" optimizer=None + +import theano +import theano.tensor as T +import numpy as np +import os +import socket +import argparse +import time +import datetime +import importlib + +import matplotlib.pyplot as plt +from IPython import display + +import lasagne +from lasagne.utils import floatX +from lasagne.updates import rmsprop, adam, momentum +from lasagne.layers import get_all_params, get_all_layers, get_all_param_values, get_output +from lasagne.objectives import squared_error, binary_crossentropy, aggregate + +from utils.helperFunctions import * + +def train(options): + # -------- setup options and data ------------------ + np.random.seed(options['seed']) + + # Load options + host = socket.gethostname() # get computer hostname + start_time = datetime.datetime.now().strftime("%y-%m-%d-%H-%M") + + model = importlib.import_module(options['model_file']) + + # ---------- build model and compile --------------- + input_batch = T.tensor4() # input image sequences + target = T.tensor4() # target image + + print('Build model...') + model = model.Model(**options['modelOptions']) + + print('Compile ...') + net, outputs, filters = model.build_model(input_batch) + + # compute loss + outputs = get_output(outputs + [filters]) + output_frames = outputs[:-1] + output_filter = outputs[-1] + + train_losses = [] + for i in range(options['modelOptions']['target_seqlen']): + output_frame = output_frames[i] + + if options['loss'] == 'squared_error': + frame_loss = squared_error(output_frame, target[:, [i], :, :]) + elif options['loss'] == 'binary_crossentropy': + # Clipping to avoid NaN's in binary crossentropy: https://github.com/Lasagne/Lasagne/issues/436 + output_frame = T.clip(output_frame, np.finfo(np.float32).eps, 1-np.finfo(np.float32).eps) + frame_loss = binary_crossentropy(output_frame, target[:,[i],:,:]) + else: + assert False + + train_losses.append(aggregate(frame_loss)) + + train_loss = sum(train_losses) / options['modelOptions']['target_seqlen'] + + # update + sh_lr = theano.shared(lasagne.utils.floatX(options['learning_rate'])) # to allow dynamic learning rate + + layers = get_all_layers(net) + all_params = get_all_params(layers, trainable = True) + updates = adam(train_loss, all_params, learning_rate=sh_lr) + _train = theano.function([input_batch, target], train_loss, updates=updates, allow_input_downcast=True) + _test = theano.function([input_batch, target], [train_loss, output_filter] + output_frames, allow_input_downcast=True) + + # ------------ data setup ---------------- + print('Prepare data...') + dataset = importlib.import_module(options['dataset_file']) + dh = dataset.DataHandler(**options['datasetOptions']) + + # ------------ training setup ---------------- + if options['pretrained_model_path'] is not None: + checkpoint = pickle.load(open(options['pretrained_model_path'], 'rb')) + model_values = checkpoint['model_values'] # overwrite the values of model parameters + lasagne.layers.set_all_param_values(layers, model_values) + + history_train = checkpoint['history_train'] + start_epoch = checkpoint['epoch'] + 1 + options['batch_size'] = checkpoint['options']['batch_size'] + sh_lr.set_value(floatX(checkpoint['options']['learning_rate'])) + else: + start_epoch = 0 + history_train = [] + + # ------------ actual training ---------------- + print 'Start training ...' + + input_seqlen = options['modelOptions']['input_seqlen'] + for epoch in range(start_epoch, start_epoch + options['num_epochs']): + epoch_start_time = time.time() + + history_batch = [] + for batch_index in range(0, options['batches_per_epoch']): + + batch = dh.GetBatch() # generate data on the fly + if options['dataset_file'] == 'datasets.stereoCarsColor': + batch_input = batch[..., :input_seqlen].squeeze(axis=4) # first frames + batch_target = batch[..., input_seqlen:].squeeze(axis=4) # last frame + else: + batch_input = batch[..., :input_seqlen].transpose(0,4,2,3,1).squeeze(axis=4) # first frames + batch_target = batch[..., input_seqlen:].transpose(0,4,2,3,1).squeeze(axis=4) # last frame + + # train + loss_train = _train(batch_input, batch_target) + history_batch.append(loss_train) + + print("Epoch {} of {}, batch {} of {}, took {:.3f}s".format(epoch + 1, options['num_epochs'], batch_index+1, options['batches_per_epoch'], time.time() - epoch_start_time)) + print(" training loss:\t{:.6f}".format(loss_train.item())) + + # clear the screen + display.clear_output(wait=True) + + # print statistics + history_train.append(np.mean(history_batch)) + history_batch = [] + print("Epoch {} of {}, took {:.3f}s".format(epoch + 1, options['num_epochs'], time.time() - epoch_start_time)) + print(" training loss:\t{:.6f}".format(history_train[epoch].item())) + + # set new learning rate (maybe this is unnecessary with adam updates) + if (epoch+1) % options['decay_after'] == 0: + options['learning_rate'] = sh_lr.get_value() * 0.5 + print "New LR:", options['learning_rate'] + sh_lr.set_value(floatX(options['learning_rate'])) + + # save the model + if (epoch+1) % options['save_after'] == 0: + save_model(layers, epoch, history_train, start_time, host, options) + print("Model saved") \ No newline at end of file diff --git a/train_baseline.py b/train_baseline.py new file mode 100644 index 0000000..a49e2f2 --- /dev/null +++ b/train_baseline.py @@ -0,0 +1,215 @@ + +# coding: utf-8 +# export THEANO_FLAGS="device=gpu0, floatX=float32" optimizer=None + +import theano +import theano.tensor as T +import numpy as np +import os +import socket +import argparse +import time +import datetime +import importlib + +import matplotlib.pyplot as plt +from IPython import display + +import lasagne +from lasagne.utils import floatX +from lasagne.updates import rmsprop, adam, momentum +from lasagne.layers import get_all_params, get_all_layers, get_all_param_values, get_output +from lasagne.objectives import squared_error, binary_crossentropy, aggregate + +from utils.helperFunctions import * + +def train(options): + # -------- setup options and data ------------------ + np.random.seed(options['seed']) + + # Load options + host = socket.gethostname() # get computer hostname + start_time = datetime.datetime.now().strftime("%y-%m-%d-%H-%M") + + model = importlib.import_module(options['model_file']) + + # ---------- build model and compile --------------- + input_batch = T.tensor4() # input image sequences + target = T.tensor4() # target image + + print('Build model...') + model = model.Model(**options['modelOptions']) + + print('Compile ...') + # import pdb; pdb.set_trace() + net, outputs = model.build_model(input_batch) + + # compute loss + outputs = get_output(outputs) + output_frames = outputs + + train_losses = [] + for i in range(options['modelOptions']['target_seqlen']): + output_frame = output_frames[i] + + if options['loss'] == 'squared_error': + frame_loss = squared_error(output_frame, target[:, [i], :, :]) + elif options['loss'] == 'binary_crossentropy': + # Clipping to avoid NaN's in binary crossentropy: https://github.com/Lasagne/Lasagne/issues/436 + output_frame = T.clip(output_frame, np.finfo(np.float32).eps, 1-np.finfo(np.float32).eps) + frame_loss = binary_crossentropy(output_frame, target[:,[i],:,:]) + else: + assert False + + train_losses.append(aggregate(frame_loss)) + + train_loss = sum(train_losses) / options['modelOptions']['target_seqlen'] + + # update + sh_lr = theano.shared(lasagne.utils.floatX(options['learning_rate'])) # to allow dynamic learning rate + + layers = get_all_layers(net) + all_params = get_all_params(layers, trainable = True) + updates = adam(train_loss, all_params, learning_rate=sh_lr) + _train = theano.function([input_batch, target], train_loss, updates=updates, allow_input_downcast=True) + _test = theano.function([input_batch, target], [train_loss] + output_frames, allow_input_downcast=True) + + # ------------ data setup ---------------- + print('Prepare data...') + dataset = importlib.import_module(options['dataset_file']) + dh = dataset.DataHandler(**options['datasetOptions']) + + # ------------ training setup ---------------- + if options['pretrained_model_path'] is not None: + checkpoint = pickle.load(open(options['pretrained_model_path'], 'rb')) + model_values = checkpoint['model_values'] # overwrite the values of model parameters + lasagne.layers.set_all_param_values(layers, model_values) + + history_train = checkpoint['history_train'] + start_epoch = checkpoint['epoch'] + 1 + options['batch_size'] = checkpoint['options']['batch_size'] + sh_lr.set_value(floatX(checkpoint['options']['learning_rate'])) + else: + start_epoch = 0 + history_train = [] + + # ------------ actual training ---------------- + print 'Start training ...' + + input_seqlen = options['modelOptions']['input_seqlen'] + for epoch in range(start_epoch, start_epoch + options['num_epochs']): + epoch_start_time = time.time() + + history_batch = [] + for batch_index in range(0, options['batches_per_epoch']): + + batch = dh.GetBatch() # generate data on the fly + if options['dataset_file'] == 'datasets.stereoCarsColor': + batch_input = batch[..., :input_seqlen].squeeze(axis=4) # first frames + batch_target = batch[..., input_seqlen:].squeeze(axis=4) # last frame + else: + batch_input = batch[..., :input_seqlen].transpose(0,4,2,3,1).squeeze(axis=4) # first frames + batch_target = batch[..., input_seqlen:].transpose(0,4,2,3,1).squeeze(axis=4) # last frame + + # train + loss_train = _train(batch_input, batch_target) + history_batch.append(loss_train) + + print("Epoch {} of {}, batch {} of {}, took {:.3f}s".format(epoch + 1, options['num_epochs'], batch_index+1, options['batches_per_epoch'], time.time() - epoch_start_time)) + print(" training loss:\t{:.6f}".format(loss_train.item())) + + # clear the screen + display.clear_output(wait=True) + + # print statistics + history_train.append(np.mean(history_batch)) + history_batch = [] + print("Epoch {} of {}, took {:.3f}s".format(epoch + 1, options['num_epochs'], time.time() - epoch_start_time)) + print(" training loss:\t{:.6f}".format(history_train[epoch].item())) + + # set new learning rate (maybe this is unnecessary with adam updates) + if (epoch+1) % options['decay_after'] == 0: + options['learning_rate'] = sh_lr.get_value() * 0.5 + print "New LR:", options['learning_rate'] + sh_lr.set_value(floatX(options['learning_rate'])) + + # save the model + if (epoch+1) % options['save_after'] == 0: + save_model(layers, epoch, history_train, start_time, host, options) + print("Model saved") + + # if not (options['dataset_file'] == 'datasets.stereoCarsColor' or options['dataset_file'] == 'datasets.stereoCars'): + # # setup a test batch + # batch = dh.GetBatch() # generate data on the fly + # if options['dataset_file'] == 'datasets.stereoCarsColor': + # batch_input = batch[..., :input_seqlen].squeeze(axis=4) # first frames + # batch_target = batch[..., input_seqlen:].squeeze(axis=4) # last frame + # else: + # batch_input = batch[..., :input_seqlen].transpose(0, 4, 2, 3, 1).squeeze(axis=4) # first frames + # batch_target = batch[..., input_seqlen:].transpose(0, 4, 2, 3, 1).squeeze(axis=4) # last frame + # testOutputs = _test(batch_input, batch_target) + # loss_test = testOutputs[0] + # pred_filter = testOutputs[1] + # predictions = np.asarray(testOutputs[2:]).transpose(1,2,3,4,0) + # + # case_id = 0 # element of the batch to use for visualization + # + # # convergence plot + # plt.figure() + # plt.plot(range(1,len(history_train)+1), history_train, label="loss") + # plt.legend() + # plt.show() + # + # # visualize the prediction + # visualize_prediction(batch, fut=predictions, fig=1, case_id=case_id) + # + # # visualize the flow map + # visualize_flowmap(pred_filter, options['image_dim'], options['modelOptions']['dynamic_filter_size'][0], case_id) + +def getDefaultOptions(): + options = { + # global setup settings, and checkpoints + 'name': 'bouncingMnistOriginal_D', + 'seed': 123, + 'checkpoint_output_directory': '/esat/malachite/bdebraba/video_prediction/checkpoints', + + # model and dataset + 'dataset_file': 'datasets.bouncingMnist_original', + 'model_file': 'models.model_recurrent_3x3Deeper', + 'pretrained_model_path': None, + + # training parameters + 'image_dim': 64, + 'batch_size': 16, + 'loss': 'binary_crossentropy', + 'learning_rate': 1e-3, + 'decay_after': 20, + 'num_epochs': 100, + 'batches_per_epoch': 2 * 100, + 'save_after': 10 + } + + modelOptions = { + 'batch_size': options['batch_size'], + 'npx': options['image_dim'], + 'input_seqlen': 10, + 'target_seqlen': 10, + 'buffer_len': 1, + 'dynamic_filter_size': (9, 9) + } + options['modelOptions'] = modelOptions + + datasetOptions = { + 'batch_size': options['batch_size'], + 'image_size': options['image_dim'], + 'num_frames': modelOptions['input_seqlen'] + modelOptions['target_seqlen'], + 'num_digits': 2, + 'background': 'zeros' + } + options['datasetOptions'] = datasetOptions + + return options + +if __name__ == "__main__": + options = getDefaultOptions() + train(options) diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/helperFunctions.py b/utils/helperFunctions.py new file mode 100644 index 0000000..61b5011 --- /dev/null +++ b/utils/helperFunctions.py @@ -0,0 +1,219 @@ +import numpy as np +import matplotlib.pyplot as plt +import cPickle as pickle +import theano +import lasagne +import os + +def load_model(layers, filepath): + checkpoint = pickle.load(open(filepath, 'rb')) + model_values = checkpoint['model_values'] # overwrite the values of model parameters + lasagne.layers.set_all_param_values(layers, model_values) + return layers + +def save_model(layers, epoch, history_train, start_time, host, options): + if not os.path.exists(options['checkpoint_output_directory']): + os.makedirs(options['checkpoint_output_directory']) + + filename = '%s_%s_%s_epoch%d_train_%.3f.p' % ( + options['name'], start_time, host, epoch+1, history_train[epoch]) + filepath = os.path.join(options['checkpoint_output_directory'], filename) + + checkpoint = {} + checkpoint['epoch'] = epoch + checkpoint['model_values'] = lasagne.layers.get_all_param_values(layers) + checkpoint['layers'] = layers + checkpoint['history_train'] = history_train + checkpoint['options'] = options + + try: + pickle.dump(checkpoint, open(filepath, "wb")) + print 'saved checkpoint in %s' % (filepath, ) + except Exception, e: # todo be more clever here + print 'tried to write checkpoint into %s but got error: ' % (filepath, ) + print e + +def visualize_prediction(data, fut=None, fig=1, case_id=0, saveId=None, savefig=False): + if saveId is None: + saveId = case_id + + batch_size = data.shape[0] + num_channels = data.shape[1] + npx = data.shape[2] + seq_length = data.shape[4] + + # get data + data = data[case_id] + + # get reconstruction and future sequences if exist + if fut is not None: + if fut.ndim == 4: + fut_length = 1 + fut = fut[..., np.newaxis] + else: + fut_length = fut.shape[4] + + fut = np.concatenate((np.zeros((batch_size, num_channels, npx, npx, seq_length-fut_length)), fut), axis=4) + fut = fut[case_id] + + num_rows = 1 + # create figure for original sequence + plt.figure(2*fig, figsize=(20, 4)) + plt.clf() + for i in xrange(seq_length): + plt.subplot(num_rows, seq_length, i+1) + plt.imshow(data[..., i].transpose(1,2,0).squeeze(), cmap=plt.cm.gray, interpolation="nearest") + plt.axis('off') + if savefig: + plt.savefig('images/%d_gt.png' % (saveId), bbox_inches='tight', pad_inches=0, transparent = True) + plt.show() + + # create figure for reconstuction and future sequences + plt.figure(2*fig+1, figsize=(20, 4)) + plt.clf() + for i in xrange(seq_length): + if fut is not None: + plt.subplot(num_rows, seq_length, i+1) + plt.imshow(fut[..., i].transpose(1,2,0).squeeze(), cmap=plt.cm.gray, interpolation="nearest") + plt.axis('off') + if savefig: + plt.savefig('images/%d_pred.png' % (saveId), bbox_inches='tight', pad_inches=0, transparent = True) + plt.show() + +def visualize_flowmapStereo(pred_filter, batch, predictions, input_seqlen, npx, filter_size, case_id, saveId = None, savefig=False): + if saveId is None: + saveId = case_id + max_translation = filter_size // 2 + xFilter = np.arange(-max_translation,max_translation+1)[..., np.newaxis, np.newaxis] +# flowX = (pred_filter[case_id] * xFilter).sum(axis=0) + flowX = (pred_filter[case_id] * xFilter).sum(axis=0) + flowY = np.zeros(flowX.shape) + flowMagnitude = np.sqrt(flowX*flowX + flowY*flowY) + print(" Minimal and maximal flow magnitude: {} / {}".format(np.amin(flowMagnitude), np.amax(flowMagnitude))) + flowMagnitude = flowMagnitude / np.amax(flowMagnitude) + flowOrientation = (np.arctan2(flowY, flowX) + np.pi) / (2*np.pi) + print(" Minimal and maximal orientation: {} / {}".format(np.amin(flowOrientation), np.amax(flowOrientation))) + + flowMap = np.concatenate((flowOrientation[..., np.newaxis], flowMagnitude[..., np.newaxis], np.ones(flowMagnitude.shape)[..., np.newaxis]), 2) + from matplotlib.colors import hsv_to_rgb + flowMap = hsv_to_rgb(flowMap) + + ### images + plt.figure(figsize=(20, 20)) + plt.clf() + + nImages = 4 + # input image + plt.subplot(nImages, 1, 1) + plt.imshow(batch[case_id, :, :, :, input_seqlen - 1].squeeze(), interpolation='none', cmap='gray') + plt.axis('off') + + # optical flow + plt.subplot(nImages, 1, 2) + plt.imshow(flowMap, interpolation='none', cmap='gray') + plt.axis('off') + + # # optical flow magnitude + # plt.subplot(5, 1, 3) + # plt.imshow(flowMagnitude, interpolation='none', cmap=plt.get_cmap('cool')) + # plt.axis('off') + + # predicted image + plt.subplot(nImages, 1, 3) + plt.imshow(predictions[case_id, :, :, :, 0].squeeze(), interpolation='none', cmap='gray') + plt.axis('off') + + # ground truth + plt.subplot(nImages, 1, 4) + plt.imshow(batch[case_id, :, :, :, input_seqlen].squeeze(), interpolation='none', cmap='gray') + plt.axis('off') + + if savefig: + plt.savefig('images/%d_flow.png' % (saveId), bbox_inches='tight', pad_inches=0, transparent = True) + + plt.show() + + # # ground truth circle + # plt.figure(figsize=(10, 4)) + # flowX = -np.tile(np.linspace(-1,1,num=64), (64,1)) + # flowY = flowX.T + # flowMagnitude = np.sqrt(flowX*flowX + flowY*flowY) + # flowOrientation = (np.arctan2(flowY, flowX) + np.pi) / (2*np.pi) + + # flowMap = np.concatenate((flowOrientation[..., np.newaxis], flowMagnitude[..., np.newaxis], np.ones(flowMagnitude.shape)[..., np.newaxis]), 2) + # flowMap = hsv_to_rgb(flowMap) + + # plt.figure() + # plt.imshow(flowMap, interpolation='none') + # plt.axis('off') + # plt.show() + +def visualize_flowmap(pred_filter, batch, predictions, input_seqlen, npx, filter_size, case_id, saveId = None, savefig=False): + if saveId is None: + saveId = case_id + max_translation = filter_size // 2 + xFilter = np.tile(np.tile(np.arange(-max_translation,max_translation+1), filter_size), (npx,npx,1)).transpose(2,0,1) + yFilter = np.tile(np.tile(np.arange(-max_translation,max_translation+1), (filter_size,1)).transpose().flatten(), (npx,npx,1)).transpose(2,0,1) + flowX = (pred_filter[case_id] * xFilter).sum(axis=0) + flowY = (pred_filter[case_id] * yFilter).sum(axis=0) + flowMagnitude = np.sqrt(flowX*flowX + flowY*flowY) +# import pdb; pdb.set_trace() +# flowMagnitude = flowMagnitude / max_translation + print(" Minimal and maximal flow magnitude: {} / {}".format(np.amin(flowMagnitude), np.amax(flowMagnitude))) + flowMagnitude = flowMagnitude / np.amax(flowMagnitude) + flowOrientation = (np.arctan2(flowY, flowX) + np.pi) / (2*np.pi) + print(" Minimal and maximal orientation: {} / {}".format(np.amin(flowOrientation), np.amax(flowOrientation))) + + flowMap = np.concatenate((flowOrientation[..., np.newaxis], flowMagnitude[..., np.newaxis], np.ones(flowMagnitude.shape)[..., np.newaxis]), 2) + from matplotlib.colors import hsv_to_rgb + flowMap = hsv_to_rgb(flowMap) + + ### images + plt.figure(figsize=(20, 20)) + plt.clf() + + nImages = 4 + # input image + plt.subplot(nImages, 1, 1) + plt.imshow(batch[case_id, :, :, :, input_seqlen - 1].squeeze(), interpolation='none', cmap='gray') + plt.axis('off') + + # optical flow + plt.subplot(nImages, 1, 2) + plt.imshow(flowMap, interpolation='none', cmap='gray') + plt.axis('off') + + # # optical flow magnitude + # plt.subplot(5, 1, 3) + # plt.imshow(flowMagnitude, interpolation='none', cmap=plt.get_cmap('cool')) + # plt.axis('off') + + # predicted image + plt.subplot(nImages, 1, 3) + plt.imshow(predictions[case_id, :, :, :, 0].squeeze(), interpolation='none', cmap='gray') + plt.axis('off') + + # ground truth + plt.subplot(nImages, 1, 4) + plt.imshow(batch[case_id, :, :, :, input_seqlen].squeeze(), interpolation='none', cmap='gray') + plt.axis('off') + + if savefig: + plt.savefig('images/%d_flow.png' % (saveId), bbox_inches='tight', pad_inches=0, transparent = True) + + plt.show() + +# # ground truth circle +# plt.figure(figsize=(10, 4)) +# flowX = -np.tile(np.linspace(-1,1,num=64), (64,1)) +# flowY = flowX.T +# flowMagnitude = np.sqrt(flowX*flowX + flowY*flowY) +# flowOrientation = (np.arctan2(flowY, flowX) + np.pi) / (2*np.pi) + +# flowMap = np.concatenate((flowOrientation[..., np.newaxis], flowMagnitude[..., np.newaxis], np.ones(flowMagnitude.shape)[..., np.newaxis]), 2) +# flowMap = hsv_to_rgb(flowMap) + +# plt.figure() +# plt.imshow(flowMap, interpolation='none') +# plt.axis('off') +# plt.show() \ No newline at end of file