diff --git a/.gitignore b/.gitignore old mode 100755 new mode 100644 index 2f54e6b..2b2127b --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,9 @@ __pycache__/ # Video and data folders kitti_data/ model_data_keras*/ +nfs_data/ +PredNet-20200617T064538Z-001/ +PredNet-DO-results/ # Distribution / packaging .Python diff --git a/GitHub b/GitHub new file mode 100644 index 0000000..750b866 --- /dev/null +++ b/GitHub @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"GitHub","provenance":[],"mount_file_id":"1OxEIirVv0ZF6hQOA3Y7-EmIeju8uDxoH","authorship_tag":"ABX9TyOjl3RxuGE6iNEfNzsivZox"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"7EvCibyDJR3o","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1592364774354,"user_tz":-600,"elapsed":2571,"user":{"displayName":"Edward Ho","photoUrl":"","userId":"18197519276397766373"}},"outputId":"8ef250aa-09d3-43f1-b847-1a9550d5890f"},"source":["!ls"],"execution_count":1,"outputs":[{"output_type":"stream","text":["drive sample_data\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"CYcAolnAJUxA","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1592364774355,"user_tz":-600,"elapsed":2555,"user":{"displayName":"Edward Ho","photoUrl":"","userId":"18197519276397766373"}},"outputId":"90dbbdf3-40ec-4073-c243-3f4b25eb2dee"},"source":["%cd drive/My Drive/PredNet/"],"execution_count":2,"outputs":[{"output_type":"stream","text":["/content/drive/My Drive/PredNet\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"mqydiaXBJ-Jz","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1592364776352,"user_tz":-600,"elapsed":4516,"user":{"displayName":"Edward Ho","photoUrl":"","userId":"18197519276397766373"}},"outputId":"1f1ca4b9-d28a-4798-bc3f-9af9c458ffd5"},"source":["!git clone https://github.com/edwardmfho/prednet.git"],"execution_count":3,"outputs":[{"output_type":"stream","text":["fatal: destination path 'prednet' already exists and is not an empty directory.\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"hTFj6t0lKPOK","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1592364778804,"user_tz":-600,"elapsed":6943,"user":{"displayName":"Edward Ho","photoUrl":"","userId":"18197519276397766373"}},"outputId":"a39cc145-fc86-47d6-f668-278508228a30"},"source":["!ls"],"execution_count":4,"outputs":[{"output_type":"stream","text":["GitHub\tprednet\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"2QwNJDovKUx8","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1592364780918,"user_tz":-600,"elapsed":9029,"user":{"displayName":"Edward Ho","photoUrl":"","userId":"18197519276397766373"}},"outputId":"16a8b471-46fa-4b5b-a871-34df78fd2610"},"source":["!python process_kitti.py"],"execution_count":5,"outputs":[{"output_type":"stream","text":["python3: can't open file 'process_kitti.py': [Errno 2] No such file or directory\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"aLJ57TQMKjKq","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1592364782668,"user_tz":-600,"elapsed":10761,"user":{"displayName":"Edward Ho","photoUrl":"","userId":"18197519276397766373"}},"outputId":"236d3ab0-802b-499c-8d1b-c217195798f6"},"source":["!ls"],"execution_count":6,"outputs":[{"output_type":"stream","text":["GitHub\tprednet\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"vzoOi47XKlb0","colab_type":"code","colab":{},"executionInfo":{"status":"ok","timestamp":1592364784114,"user_tz":-600,"elapsed":12175,"user":{"displayName":"Edward Ho","photoUrl":"","userId":"18197519276397766373"}}},"source":["!cd prednet"],"execution_count":7,"outputs":[]},{"cell_type":"code","metadata":{"id":"5gpCQwH5Knqj","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1592364786035,"user_tz":-600,"elapsed":14066,"user":{"displayName":"Edward Ho","photoUrl":"","userId":"18197519276397766373"}},"outputId":"a5183976-dbb4-4271-e252-dbafea011c15"},"source":["!pwd"],"execution_count":8,"outputs":[{"output_type":"stream","text":["/content/drive/My Drive/PredNet\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"Xq9xpG0QKqpn","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1592364788283,"user_tz":-600,"elapsed":16291,"user":{"displayName":"Edward Ho","photoUrl":"","userId":"18197519276397766373"}},"outputId":"259febba-23df-4a01-8c22-5a0a4b75ae3e"},"source":["!cd /prednet"],"execution_count":9,"outputs":[{"output_type":"stream","text":["/bin/bash: line 0: cd: /prednet: No such file or directory\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"AbUCgYDLKsb3","colab_type":"code","colab":{},"executionInfo":{"status":"ok","timestamp":1592364790216,"user_tz":-600,"elapsed":18185,"user":{"displayName":"Edward Ho","photoUrl":"","userId":"18197519276397766373"}}},"source":["!cd prednet"],"execution_count":10,"outputs":[]},{"cell_type":"code","metadata":{"id":"JUz7aFENKztc","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1592364790217,"user_tz":-600,"elapsed":18168,"user":{"displayName":"Edward Ho","photoUrl":"","userId":"18197519276397766373"}},"outputId":"c5c7d00e-7bb9-46fb-aee9-42e01f1e806c"},"source":["pwd"],"execution_count":11,"outputs":[{"output_type":"execute_result","data":{"text/plain":["'/content/drive/My Drive/PredNet'"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"hTfEjVUJK1Tr","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1592364792318,"user_tz":-600,"elapsed":20250,"user":{"displayName":"Edward Ho","photoUrl":"","userId":"18197519276397766373"}},"outputId":"abb7232a-c628-4366-ccc5-0879ca80fb75"},"source":["!python /prednet/process_kitti.py"],"execution_count":12,"outputs":[{"output_type":"stream","text":["python3: can't open file '/prednet/process_kitti.py': [Errno 2] No such file or directory\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"pdNHFEBhK9_h","colab_type":"code","colab":{},"executionInfo":{"status":"ok","timestamp":1592364792319,"user_tz":-600,"elapsed":20243,"user":{"displayName":"Edward Ho","photoUrl":"","userId":"18197519276397766373"}}},"source":[""],"execution_count":12,"outputs":[]}]} \ No newline at end of file diff --git a/License.txt b/License.txt old mode 100755 new mode 100644 diff --git a/README.md b/README.md old mode 100755 new mode 100644 diff --git a/data_utils.py b/data_utils.py old mode 100755 new mode 100644 diff --git a/download_20bn_data.sh b/download_20bn_data.sh new file mode 100644 index 0000000..59f19d3 --- /dev/null +++ b/download_20bn_data.sh @@ -0,0 +1,22 @@ +curl -c https://20bn-data-packages.s3.eu-west-1.amazonaws.com/something-something/v2/20bn-something-something-v2-00?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ2PXKOHYMBEGX4UA%2F20200624%2Feu-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200624T141133Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=42e35eacc0aa19106ac451f5e1507eb27af7009697eb7cb631e2f9c202390f04 --output 20bn-something-something-v2-00 +curl -c https://20bn-data-packages.s3.eu-west-1.amazonaws.com/something-something/v2/20bn-something-something-v2-01?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ2PXKOHYMBEGX4UA%2F20200624%2Feu-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200624T141133Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=f3587eff473816ec00c85aa79dc5605d8dbf5ff14b6f0c379d20ce7bbd656546 --output 20bn-something-something-v2-01 +curl -c https://20bn-data-packages.s3.eu-west-1.amazonaws.com/something-something/v2/20bn-something-something-v2-02?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ2PXKOHYMBEGX4UA%2F20200624%2Feu-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200624T141133Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=4efcfb2814977280ed6ec125af810b3a63ec844cc783f0d30dd977c2d092793d --output 20bn-something-something-v2-02 +curl -c https://20bn-data-packages.s3.eu-west-1.amazonaws.com/something-something/v2/20bn-something-something-v2-03?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ2PXKOHYMBEGX4UA%2F20200624%2Feu-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200624T141133Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=6c4f7ed6264e7d16be22746f2d05329d08fe7c2b00fcf481ea0b8ac7e43f2dc9 --output 20bn-something-something-v2-03 +curl -c https://20bn-data-packages.s3.eu-west-1.amazonaws.com/something-something/v2/20bn-something-something-v2-04?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ2PXKOHYMBEGX4UA%2F20200624%2Feu-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200624T141133Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=4d524b90f85c3cc265fd7df02a6c30b5d52511051f23b185e6cde67d4e9746af --output 20bn-something-something-v2-04 +curl -c https://20bn-data-packages.s3.eu-west-1.amazonaws.com/something-something/v2/20bn-something-something-v2-05?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ2PXKOHYMBEGX4UA%2F20200624%2Feu-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200624T141133Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=deb49d0e6b7c20211f6a10783549e487ca9b0c0116e41543b9ac84fd5f735e10 --output 20bn-something-something-v2-05 +curl -c https://20bn-data-packages.s3.eu-west-1.amazonaws.com/something-something/v2/20bn-something-something-v2-06?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ2PXKOHYMBEGX4UA%2F20200624%2Feu-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200624T141133Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=0cc3227629bd2506221d1938f12784714efcf4718c3e406715cb52c3d14c0cd8 --output 20bn-something-something-v2-06 +curl -c https://20bn-data-packages.s3.eu-west-1.amazonaws.com/something-something/v2/20bn-something-something-v2-07?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ2PXKOHYMBEGX4UA%2F20200624%2Feu-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200624T141133Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=9f5f9011317179238d101d5596632524c5dd771074e3ec745aabdf9f1e4e2db1 --output 20bn-something-something-v2-07 +curl -c https://20bn-data-packages.s3.eu-west-1.amazonaws.com/something-something/v2/20bn-something-something-v2-08?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ2PXKOHYMBEGX4UA%2F20200624%2Feu-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200624T141133Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=400d0ecbd013688f620e6162ad9f231b92b136c3e9d23756b190f890978d5f7d --output 20bn-something-something-v2-08 +curl -c https://20bn-data-packages.s3.eu-west-1.amazonaws.com/something-something/v2/20bn-something-something-v2-09?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ2PXKOHYMBEGX4UA%2F20200624%2Feu-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200624T141411Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=b50f40ec2bcfc06b8530c35b85b0871f113c2b1ef659027b5dc1057553e1bed1 --output 20bn-something-something-v2-09 +curl -c https://20bn-data-packages.s3.eu-west-1.amazonaws.com/something-something/v2/20bn-something-something-v2-10?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ2PXKOHYMBEGX4UA%2F20200624%2Feu-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200624T141411Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=c95ca1e2e40a9238c1d0e918f2be6f23d35aa3873985d7b9a2a42c829d9363e6 --output 20bn-something-something-v2-10 +curl -c https://20bn-data-packages.s3.eu-west-1.amazonaws.com/something-something/v2/20bn-something-something-v2-11?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ2PXKOHYMBEGX4UA%2F20200624%2Feu-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200624T141411Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=fdd3b45f0f8cde0e9ba41944e027f139a6fd8477fb4a6acede36ce1466359955 --output 20bn-something-something-v2-11 +curl -c https://20bn-data-packages.s3.eu-west-1.amazonaws.com/something-something/v2/20bn-something-something-v2-12?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ2PXKOHYMBEGX4UA%2F20200624%2Feu-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200624T141411Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=4949019d51e5c16823592fbccf1a6712c27b3e92f06ace0114a6f123d0c5ecec --output 20bn-something-something-v2-12 +curl -c ttps://20bn-data-packages.s3.eu-west-1.amazonaws.com/something-something/v2/20bn-something-something-v2-13?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ2PXKOHYMBEGX4UA%2F20200624%2Feu-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200624T141411Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=2db571736b025fb8f3a49d5947a4b336a09b89323eaf16906a99d03f66bd932c --output 20bn-something-something-v2-13 +curl -c https://20bn-data-packages.s3.eu-west-1.amazonaws.com/something-something/v2/20bn-something-something-v2-14?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ2PXKOHYMBEGX4UA%2F20200624%2Feu-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200624T141411Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=334d7b8f07d9c162cef6c4dc03ea9471d23d30101c89e4811f44d0152eb25a7d --output 20bn-something-something-v2-14 +curl -c https://20bn-data-packages.s3.eu-west-1.amazonaws.com/something-something/v2/20bn-something-something-v2-15?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ2PXKOHYMBEGX4UA%2F20200624%2Feu-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200624T141411Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=12455968e41d8a736d37d1ef16c84a2ec9215e6dfa2ab55614f34875b27a25bf --output 20bn-something-something-v2-15 +curl -c https://20bn-data-packages.s3.eu-west-1.amazonaws.com/something-something/v2/20bn-something-something-v2-16?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ2PXKOHYMBEGX4UA%2F20200624%2Feu-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200624T141411Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=bf4d2852e0159d61f6ee637251fa82c173bc32af7c4ca634bcdc0d6aa08f2a75 --output 20bn-something-something-v2-16 +curl -c https://20bn-data-packages.s3.eu-west-1.amazonaws.com/something-something/v2/20bn-something-something-v2-17?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ2PXKOHYMBEGX4UA%2F20200624%2Feu-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200624T141411Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=8f3b59c1ef1bda14cbe5c772ac8522c8e6122667c567a0fcaba5f3b29f046715 --output 20bn-something-something-v2-17 +curl -c https://20bn-data-packages.s3.eu-west-1.amazonaws.com/something-something/v2/20bn-something-something-v2-18?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ2PXKOHYMBEGX4UA%2F20200624%2Feu-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200624T141411Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=e057ac8f17d2d7486b80108e422bab5290e02360bb1dd3ff9ace26d35785e42d --output 20bn-something-something-v2-18 +curl -c https://20bn-data-packages.s3.eu-west-1.amazonaws.com/something-something/v2/20bn-something-something-v2-19?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJ2PXKOHYMBEGX4UA%2F20200624%2Feu-west-1%2Fs3%2Faws4_request&X-Amz-Date=20200624T141411Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=bca3c648abfa7214925 --output 20bn-something-something-v2-19 + +cat 20bn-something-something-v2-?? | tar zx diff --git a/download_data.sh b/download_data.sh old mode 100755 new mode 100644 diff --git a/download_models.sh b/download_models.sh old mode 100755 new mode 100644 diff --git a/environment.yml b/environment.yml old mode 100755 new mode 100644 diff --git a/keras_utils.py b/keras_utils.py old mode 100755 new mode 100644 diff --git a/kitti_data - Shortcut.lnk b/kitti_data - Shortcut.lnk new file mode 100644 index 0000000..73f963d Binary files /dev/null and b/kitti_data - Shortcut.lnk differ diff --git a/kitti_evaluate.py b/kitti_evaluate.py old mode 100755 new mode 100644 diff --git a/kitti_extrap_finetune.py b/kitti_extrap_finetune.py old mode 100755 new mode 100644 diff --git a/kitti_settings.py b/kitti_settings.py old mode 100755 new mode 100644 diff --git a/kitti_train.py b/kitti_train.py old mode 100755 new mode 100644 index 481ccd5..464fadf --- a/kitti_train.py +++ b/kitti_train.py @@ -75,7 +75,14 @@ history = model.fit_generator(train_generator, samples_per_epoch / batch_size, nb_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=N_seq_val / batch_size) + + if save_model: json_string = model.to_json() with open(json_file, "w") as f: f.write(json_string) + + loss_history = history.history["loss"] + numpy_loss_history = np.array(loss_history) + np.savetxt("loss_history_prednet.txt", numpy_loss_history, delimiter=",") + diff --git a/kitti_train_prednet_plus.py b/kitti_train_prednet_plus.py new file mode 100644 index 0000000..39c7eac --- /dev/null +++ b/kitti_train_prednet_plus.py @@ -0,0 +1,88 @@ +''' +Train PredNet on KITTI sequences. (Geiger et al. 2013, http://www.cvlibs.net/datasets/kitti/) +''' + +import os +import numpy as np +np.random.seed(123) +from six.moves import cPickle + +from keras import backend as K +from keras.models import Model +from keras.layers import Input, Dense, Flatten +from keras.layers import LSTM +from keras.layers import TimeDistributed +from keras.callbacks import LearningRateScheduler, ModelCheckpoint +from keras.optimizers import Adam + +from prednet-plus import PredNet +from data_utils import SequenceGenerator +from kitti_settings import * + + +save_model = True # if weights will be saved +weights_file = os.path.join(WEIGHTS_DIR, 'prednet_kitti_weights.hdf5') # where weights will be saved +json_file = os.path.join(WEIGHTS_DIR, 'prednet_kitti_model.json') + +# Data files +train_file = os.path.join(DATA_DIR, 'X_train.hkl') +train_sources = os.path.join(DATA_DIR, 'sources_train.hkl') +val_file = os.path.join(DATA_DIR, 'X_val.hkl') +val_sources = os.path.join(DATA_DIR, 'sources_val.hkl') + +# Training parameters +nb_epoch = 150 +batch_size = 4 +samples_per_epoch = 500 +N_seq_val = 100 # number of sequences to use for validation + +# Model parameters +n_channels, im_height, im_width = (3, 128, 160) +input_shape = (n_channels, im_height, im_width) if K.image_data_format() == 'channels_first' else (im_height, im_width, n_channels) +stack_sizes = (n_channels, 48, 96, 192) +R_stack_sizes = stack_sizes +A_filt_sizes = (3, 3, 3) +Ahat_filt_sizes = (3, 3, 3, 3) +R_filt_sizes = (3, 3, 3, 3) +layer_loss_weights = np.array([1., 0., 0., 0.]) # weighting for each layer in final loss; "L_0" model: [1, 0, 0, 0], "L_all": [1, 0.1, 0.1, 0.1] +layer_loss_weights = np.expand_dims(layer_loss_weights, 1) +nt = 10 # number of timesteps used for sequences in training +time_loss_weights = 1./ (nt - 1) * np.ones((nt,1)) # equally weight all timesteps except the first +time_loss_weights[0] = 0 + + +prednet = PredNet(stack_sizes, R_stack_sizes, + A_filt_sizes, Ahat_filt_sizes, R_filt_sizes, + output_mode='error', return_sequences=True) + +inputs = Input(shape=(nt,) + input_shape) +errors = prednet(inputs) # errors will be (batch_size, nt, nb_layers) +errors_by_time = TimeDistributed(Dense(1, trainable=False), weights=[layer_loss_weights, np.zeros(1)], trainable=False)(errors) # calculate weighted error by layer +errors_by_time = Flatten()(errors_by_time) # will be (batch_size, nt) +final_errors = Dense(1, weights=[time_loss_weights, np.zeros(1)], trainable=False)(errors_by_time) # weight errors by time +model = Model(inputs=inputs, outputs=final_errors) +model.compile(loss='mean_absolute_error', optimizer='adam') + +train_generator = SequenceGenerator(train_file, train_sources, nt, batch_size=batch_size, shuffle=True) +val_generator = SequenceGenerator(val_file, val_sources, nt, batch_size=batch_size, N_seq=N_seq_val) + +lr_schedule = lambda epoch: 0.001 if epoch < 75 else 0.0001 # start with lr of 0.001 and then drop to 0.0001 after 75 epochs +callbacks = [LearningRateScheduler(lr_schedule)] +if save_model: + if not os.path.exists(WEIGHTS_DIR): os.mkdir(WEIGHTS_DIR) + callbacks.append(ModelCheckpoint(filepath=weights_file, monitor='val_loss', save_best_only=True)) + +history = model.fit_generator(train_generator, samples_per_epoch / batch_size, nb_epoch, callbacks=callbacks, + validation_data=val_generator, validation_steps=N_seq_val / batch_size) + + + +if save_model: + json_string = model.to_json() + with open(json_file, "w") as f: + f.write(json_string) + + loss_history = history.history["loss"] + numpy_loss_history = np.array(loss_history) + np.savetxt("loss_history_prednet.txt", numpy_loss_history, delimiter=",") + diff --git a/nfs_settings.py b/nfs_settings.py new file mode 100644 index 0000000..d5ead87 --- /dev/null +++ b/nfs_settings.py @@ -0,0 +1,10 @@ +# Where KITTI data will be saved if you run process_kitti.py +# If you directly download the processed data, change to the path of the data. +DATA_DIR = './nfs_data/' + +# Where model weights and config will be saved if you run kitti_train.py +# If you directly download the trained weights, change to appropriate path. +WEIGHTS_DIR = './model_data_keras2/' + +# Where results (prediction plots and evaluation file) will be saved. +RESULTS_SAVE_DIR = './nfs_results/' diff --git a/prednet-plus.py b/prednet-plus.py new file mode 100644 index 0000000..35b0a8e --- /dev/null +++ b/prednet-plus.py @@ -0,0 +1,320 @@ +import numpy as np + +from keras import backend as K +from keras import activations +from keras.layers import Recurrent, Dropout +from keras.layers import Conv2D, UpSampling2D, MaxPooling2D +from keras.engine import InputSpec +from keras_utils import legacy_prednet_support + +class PredNet(Recurrent): + '''PredNet architecture - Lotter 2016. + Stacked convolutional LSTM inspired by predictive coding principles. + + # Arguments + stack_sizes: number of channels in targets (A) and predictions (Ahat) in each layer of the architecture. + Length is the number of layers in the architecture. + First element is the number of channels in the input. + Ex. (3, 16, 32) would correspond to a 3 layer architecture that takes in RGB images and has 16 and 32 + channels in the second and third layers, respectively. + R_stack_sizes: number of channels in the representation (R) modules. + Length must equal length of stack_sizes, but the number of channels per layer can be different. + A_filt_sizes: filter sizes for the target (A) modules. + Has length of 1 - len(stack_sizes). + Ex. (3, 3) would mean that targets for layers 2 and 3 are computed by a 3x3 convolution of the errors (E) + from the layer below (followed by max-pooling) + Ahat_filt_sizes: filter sizes for the prediction (Ahat) modules. + Has length equal to length of stack_sizes. + Ex. (3, 3, 3) would mean that the predictions for each layer are computed by a 3x3 convolution of the + representation (R) modules at each layer. + R_filt_sizes: filter sizes for the representation (R) modules. + Has length equal to length of stack_sizes. + Corresponds to the filter sizes for all convolutions in the LSTM. + pixel_max: the maximum pixel value. + Used to clip the pixel-layer prediction. + error_activation: activation function for the error (E) units. + A_activation: activation function for the target (A) and prediction (A_hat) units. + LSTM_activation: activation function for the cell and hidden states of the LSTM. + LSTM_inner_activation: activation function for the gates in the LSTM. + output_mode: either 'error', 'prediction', 'all' or layer specification (ex. R2, see below). + Controls what is outputted by the PredNet. + If 'error', the mean response of the error (E) units of each layer will be outputted. + That is, the output shape will be (batch_size, nb_layers). + If 'prediction', the frame prediction will be outputted. + If 'all', the output will be the frame prediction concatenated with the mean layer errors. + The frame prediction is flattened before concatenation. + Nomenclature of 'all' is kept for backwards compatibility, but should not be confused with returning all of the layers of the model + For returning the features of a particular layer, output_mode should be of the form unit_type + layer_number. + For instance, to return the features of the LSTM "representational" units in the lowest layer, output_mode should be specificied as 'R0'. + The possible unit types are 'R', 'Ahat', 'A', and 'E' corresponding to the 'representation', 'prediction', 'target', and 'error' units respectively. + extrap_start_time: time step for which model will start extrapolating. + Starting at this time step, the prediction from the previous time step will be treated as the "actual" + data_format: 'channels_first' or 'channels_last'. + It defaults to the `image_data_format` value found in your + Keras config file at `~/.keras/keras.json`. + + # To-do: + + - implement an encoder and decoder for label classfication. + + # References + - [Deep predictive coding networks for video prediction and unsupervised learning](https://arxiv.org/abs/1605.08104) + - [Long short-term memory](http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf) + - [Convolutional LSTM network: a machine learning approach for precipitation nowcasting](http://arxiv.org/abs/1506.04214) + - [Predictive coding in the visual cortex: a functional interpretation of some extra-classical receptive-field effects](http://www.nature.com/neuro/journal/v2/n1/pdf/nn0199_79.pdf) + ''' + @legacy_prednet_support + def __init__(self, stack_sizes, R_stack_sizes, + A_filt_sizes, Ahat_filt_sizes, R_filt_sizes, + pixel_max=1., error_activation='relu', A_activation='relu', + LSTM_activation='tanh', LSTM_inner_activation='hard_sigmoid', + output_mode='error', extrap_start_time=None, + data_format=K.image_data_format(), **kwargs): + self.stack_sizes = stack_sizes + self.nb_layers = len(stack_sizes) + assert len(R_stack_sizes) == self.nb_layers, 'len(R_stack_sizes) must equal len(stack_sizes)' + self.R_stack_sizes = R_stack_sizes + assert len(A_filt_sizes) == (self.nb_layers - 1), 'len(A_filt_sizes) must equal len(stack_sizes) - 1' + self.A_filt_sizes = A_filt_sizes + assert len(Ahat_filt_sizes) == self.nb_layers, 'len(Ahat_filt_sizes) must equal len(stack_sizes)' + self.Ahat_filt_sizes = Ahat_filt_sizes + assert len(R_filt_sizes) == (self.nb_layers), 'len(R_filt_sizes) must equal len(stack_sizes)' + self.R_filt_sizes = R_filt_sizes + + self.pixel_max = pixel_max + self.error_activation = activations.get(error_activation) + self.A_activation = activations.get(A_activation) + self.LSTM_activation = activations.get(LSTM_activation) + self.LSTM_inner_activation = activations.get(LSTM_inner_activation) + + default_output_modes = ['prediction', 'error', 'all'] + layer_output_modes = [layer + str(n) for n in range(self.nb_layers) for layer in ['R', 'E', 'A', 'Ahat']] + assert output_mode in default_output_modes + layer_output_modes, 'Invalid output_mode: ' + str(output_mode) + self.output_mode = output_mode + if self.output_mode in layer_output_modes: + self.output_layer_type = self.output_mode[:-1] + self.output_layer_num = int(self.output_mode[-1]) + else: + self.output_layer_type = None + self.output_layer_num = None + self.extrap_start_time = extrap_start_time + + assert data_format in {'channels_last', 'channels_first'}, 'data_format must be in {channels_last, channels_first}' + self.data_format = data_format + self.channel_axis = -3 if data_format == 'channels_first' else -1 + self.row_axis = -2 if data_format == 'channels_first' else -3 + self.column_axis = -1 if data_format == 'channels_first' else -2 + super(PredNet, self).__init__(**kwargs) + self.input_spec = [InputSpec(ndim=5)] + + def compute_output_shape(self, input_shape): + if self.output_mode == 'prediction': + out_shape = input_shape[2:] + elif self.output_mode == 'error': + out_shape = (self.nb_layers,) + elif self.output_mode == 'all': + out_shape = (np.prod(input_shape[2:]) + self.nb_layers,) + else: + stack_str = 'R_stack_sizes' if self.output_layer_type == 'R' else 'stack_sizes' + stack_mult = 2 if self.output_layer_type == 'E' else 1 + out_stack_size = stack_mult * getattr(self, stack_str)[self.output_layer_num] + out_nb_row = input_shape[self.row_axis] / 2**self.output_layer_num + out_nb_col = input_shape[self.column_axis] / 2**self.output_layer_num + if self.data_format == 'channels_first': + out_shape = (out_stack_size, out_nb_row, out_nb_col) + else: + out_shape = (out_nb_row, out_nb_col, out_stack_size) + + if self.return_sequences: + return (input_shape[0], input_shape[1]) + out_shape + else: + return (input_shape[0],) + out_shape + + def get_initial_state(self, x): + input_shape = self.input_spec[0].shape + init_nb_row = input_shape[self.row_axis] + init_nb_col = input_shape[self.column_axis] + + base_initial_state = K.zeros_like(x) # (samples, timesteps) + image_shape + non_channel_axis = -1 if self.data_format == 'channels_first' else -2 + for _ in range(2): + base_initial_state = K.sum(base_initial_state, axis=non_channel_axis) + base_initial_state = K.sum(base_initial_state, axis=1) # (samples, nb_channels) + + initial_states = [] + states_to_pass = ['r', 'c', 'e'] + nlayers_to_pass = {u: self.nb_layers for u in states_to_pass} + if self.extrap_start_time is not None: + states_to_pass.append('ahat') # pass prediction in states so can use as actual for t+1 when extrapolating + nlayers_to_pass['ahat'] = 1 + for u in states_to_pass: + for l in range(nlayers_to_pass[u]): + ds_factor = 2 ** l + nb_row = init_nb_row // ds_factor + nb_col = init_nb_col // ds_factor + if u in ['r', 'c']: + stack_size = self.R_stack_sizes[l] + elif u == 'e': + stack_size = 2 * self.stack_sizes[l] + elif u == 'ahat': + stack_size = self.stack_sizes[l] + output_size = stack_size * nb_row * nb_col # flattened size + + reducer = K.zeros((input_shape[self.channel_axis], output_size)) # (nb_channels, output_size) + initial_state = K.dot(base_initial_state, reducer) # (samples, output_size) + if self.data_format == 'channels_first': + output_shp = (-1, stack_size, nb_row, nb_col) + else: + output_shp = (-1, nb_row, nb_col, stack_size) + initial_state = K.reshape(initial_state, output_shp) + initial_states += [initial_state] + + if K._BACKEND == 'theano': + from theano import tensor as T + # There is a known issue in the Theano scan op when dealing with inputs whose shape is 1 along a dimension. + # In our case, this is a problem when training on grayscale images, and the below line fixes it. + initial_states = [T.unbroadcast(init_state, 0, 1) for init_state in initial_states] + + if self.extrap_start_time is not None: + initial_states += [K.variable(0, int if K.backend() != 'tensorflow' else 'int32')] # the last state will correspond to the current timestep + return initial_states + + def build(self, input_shape): + self.input_spec = [InputSpec(shape=input_shape)] + self.dropout_layer = Dropout(0.2, input_shape=input_shape) + self.conv_layers = {c: [] for c in ['i', 'f', 'c', 'o', 'a', 'ahat']} + + + for l in range(self.nb_layers): + for c in ['i', 'f', 'c', 'o']: + act = self.LSTM_activation if c == 'c' else self.LSTM_inner_activation # inner activation = hard sigmoid activation function + self.conv_layers[c].append(Conv2D(self.R_stack_sizes[l], self.R_filt_sizes[l], padding='same', activation=act, data_format=self.data_format)) + + + act = 'relu' if l == 0 else self.A_activation + self.conv_layers['ahat'].append(Conv2D(self.stack_sizes[l], self.Ahat_filt_sizes[l], padding='same', activation=act, data_format=self.data_format)) + + if l < self.nb_layers - 1: + self.conv_layers['a'].append(Conv2D(self.stack_sizes[l+1], self.A_filt_sizes[l], padding='same', activation=self.A_activation, data_format=self.data_format)) + + + + self.upsample = UpSampling2D(data_format=self.data_format) + self.pool = MaxPooling2D(data_format=self.data_format) + + self.trainable_weights = [] + nb_row, nb_col = (input_shape[-2], input_shape[-1]) if self.data_format == 'channels_first' else (input_shape[-3], input_shape[-2]) + for c in sorted(self.conv_layers.keys()): + for l in range(len(self.conv_layers[c])): + ds_factor = 2 ** l + if c == 'ahat': + nb_channels = self.R_stack_sizes[l] + elif c == 'a': + nb_channels = 2 * self.stack_sizes[l] + else: + nb_channels = self.stack_sizes[l] * 2 + self.R_stack_sizes[l] + if l < self.nb_layers - 1: + nb_channels += self.R_stack_sizes[l+1] + in_shape = (input_shape[0], nb_channels, nb_row // ds_factor, nb_col // ds_factor) + if self.data_format == 'channels_last': in_shape = (in_shape[0], in_shape[2], in_shape[3], in_shape[1]) + with K.name_scope('layer_' + c + '_' + str(l)): + self.conv_layers[c][l].build(in_shape) + self.trainable_weights += self.conv_layers[c][l].trainable_weights + + self.states = [None] * self.nb_layers*3 + + if self.extrap_start_time is not None: + self.t_extrap = K.variable(self.extrap_start_time, int if K.backend() != 'tensorflow' else 'int32') + self.states += [None] * 2 # [previous frame prediction, timestep] + + def step(self, a, states): + r_tm1 = states[:self.nb_layers] + c_tm1 = states[self.nb_layers:2*self.nb_layers] + e_tm1 = states[2*self.nb_layers:3*self.nb_layers] + + if self.extrap_start_time is not None: + t = states[-1] + a = K.switch(t >= self.t_extrap, states[-2], a) # if past self.extrap_start_time, the previous prediction will be treated as the actual + + c = [] + r = [] + e = [] + + # Update R units starting from the top + for l in reversed(range(self.nb_layers)): + inputs = [r_tm1[l], e_tm1[l]] + if l < self.nb_layers - 1: + inputs.append(r_up) + + inputs = K.concatenate(inputs, axis=self.channel_axis) + i = self.conv_layers['i'][l].call(inputs) + f = self.conv_layers['f'][l].call(inputs) + o = self.conv_layers['o'][l].call(inputs) + _c = f * c_tm1[l] + i * self.conv_layers['c'][l].call(inputs) + _r = o * self.LSTM_activation(_c) + c.insert(0, _c) + r.insert(0, _r) + + if l > 0: + r_up = self.upsample.call(_r) + + # Update feedforward path starting from the bottom + for l in range(self.nb_layers): + ahat = self.conv_layers['ahat'][l].call(r[l]) + if l == 0: + ahat = K.minimum(ahat, self.pixel_max) + frame_prediction = ahat + + # compute errors + e_up = self.error_activation(ahat - a) + e_down = self.error_activation(a - ahat) + + e.append(K.concatenate((e_up, e_down), axis=self.channel_axis)) + + if self.output_layer_num == l: + if self.output_layer_type == 'A': + output = a + elif self.output_layer_type == 'Ahat': + output = ahat + elif self.output_layer_type == 'R': + output = r[l] + elif self.output_layer_type == 'E': + output = e[l] + + if l < self.nb_layers - 1: + a = self.conv_layers['a'][l].call(e[l]) + a = self.pool.call(a) # target for next layer + + if self.output_layer_type is None: + if self.output_mode == 'prediction': + output = frame_prediction + else: + for l in range(self.nb_layers): + layer_error = K.mean(K.batch_flatten(e[l]), axis=-1, keepdims=True) + all_error = layer_error if l == 0 else K.concatenate((all_error, layer_error), axis=-1) + if self.output_mode == 'error': + output = all_error + else: + output = K.concatenate((K.batch_flatten(frame_prediction), all_error), axis=-1) + + states = r + c + e + if self.extrap_start_time is not None: + states += [frame_prediction, t + 1] + return output, states + + def get_config(self): + config = {'stack_sizes': self.stack_sizes, + 'R_stack_sizes': self.R_stack_sizes, + 'A_filt_sizes': self.A_filt_sizes, + 'Ahat_filt_sizes': self.Ahat_filt_sizes, + 'R_filt_sizes': self.R_filt_sizes, + 'pixel_max': self.pixel_max, + 'error_activation': self.error_activation.__name__, + 'A_activation': self.A_activation.__name__, + 'LSTM_activation': self.LSTM_activation.__name__, + 'LSTM_inner_activation': self.LSTM_inner_activation.__name__, + 'data_format': self.data_format, + 'extrap_start_time': self.extrap_start_time, + 'output_mode': self.output_mode} + base_config = super(PredNet, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/prednet.py b/prednet.py old mode 100755 new mode 100644 index b5a0208..76a5bf2 --- a/prednet.py +++ b/prednet.py @@ -2,7 +2,7 @@ from keras import backend as K from keras import activations -from keras.layers import Recurrent +from keras.layers import Recurrent, Dropout from keras.layers import Conv2D, UpSampling2D, MaxPooling2D from keras.engine import InputSpec from keras_utils import legacy_prednet_support diff --git a/prednet.sublime-workspace b/prednet.sublime-workspace new file mode 100644 index 0000000..08aa5d0 --- /dev/null +++ b/prednet.sublime-workspace @@ -0,0 +1,273 @@ +{ + "auto_complete": + { + "selected_items": + [ + [ + "we", + "weigthPath" + ] + ] + }, + "buffers": + [ + ], + "build_system": "", + "build_system_choices": + [ + ], + "build_varint": "", + "command_palette": + { + "height": 0.0, + "last_filter": "", + "selected_items": + [ + ], + "width": 0.0 + }, + "console": + { + "height": 0.0, + "history": + [ + ] + }, + "distraction_free": + { + "menu_visible": true, + "show_minimap": false, + "show_open_files": false, + "show_tabs": false, + "side_bar_visible": false, + "status_bar_visible": false + }, + "file_history": + [ + "/C/Users/Edward/Anaconda3/envs/prednet_fixed/Lib/site-packages/keras/layers/recurrent.py", + "/D/PredNet_Fixed/prednet.py", + "/D/PredNet_Fixed/kitti_settings.py", + "/D/PredNet_Fixed/keras_utils.py", + "/D/PredNet_Fixed/data_utils.py", + "/D/PredNet_Fixed/kitti_extrap_finetune.py", + "/D/PredNet_Fixed/kitti_evaluate.py", + "/D/PredNet_Fixed/kitti_train.py", + "/D/PredNet_Fixed/process_kitti.py", + "/C/Users/Edward/Anaconda3/envs/prednet_fixed/Lib/site-packages/keras/backend/tensorflow_backend.py", + "/C/Users/Edward/Desktop/column.csv", + "/C/Users/Edward/Downloads/CalcMeasures.txt", + "/C/Users/Edward/Desktop/Readme.txt", + "/D/csci926/CSCI926/PythonAPI/PythonAPI/examples/output.txt", + "/D/csci926/CSCI926/PythonAPI/PythonAPI/examples/AutomaticTestGenerationToolV3.py", + "/D/CARLA_0.9.9/WindowsNoEditor/PythonAPI/examples/automatic_control.py", + "/C/Program Files/PuTTY/README.txt", + "/D/research_prednet/kitti_data/X_test.hkl", + "/D/research_prednet/prednet.py", + "/D/INFO911/YearPredictionMSD.txt/YearPredictionMSD.csv", + "/D/research_prednet/convert_hickle.py", + "/D/research_prednet/kitti_data/convert_hickle.py", + "/D/research_prednet/kitti_train.py", + "/D/research_prednet/prednet/kitti_evaluate.py", + "/D/research_prednet/prednet/prednet.py", + "/D/research_prednet/prednet_37/prednet.py", + "/D/research_prednet/prednet_37/data_utils.py", + "/D/research_prednet/prednet_37/kitti_train.py", + "/D/research_prednet/prednet_37/process_kitti.py", + "/D/research_prednet/prednet/process_kitti.py", + "/D/research_prednet/prednet/data_utils.py", + "/D/research_prednet/prednet_37/environment.yml", + "/D/research_prednet/prednet/environment.yml", + "/D/research_prednet/prednet/.gitignore", + "/D/research_prednet/new/kitti_train.py", + "/D/research_prednet/new/kitti_extrap_finetune.py", + "/D/research_prednet/new/kitti_evaluate.py", + "/C/Users/Edward/Desktop/Task2_LeNet-5.py", + "/D/research_prednet/new/keras_utils.py", + "/D/research_prednet/new/data_utils.py", + "/D/research_prednet/new/prednet.py", + "/D/research_prednet/tree_report.txt", + "/D/research_prednet/keras_utils.py", + "/C/Users/Edward/Anaconda3/envs/pred_net/Lib/site-packages/keras/callbacks.py", + "/D/research_prednet/download_data.sh", + "/C/Users/Edward/Anaconda3/envs/pred_net/Lib/sched.py", + "/C/Users/Edward/Anaconda3/envs/research36/Lib/site-packages/keras/layers/recurrent.py", + "/D/research_prednet/data_utils.py", + "/D/animalcrossing/showoff/requirements.txt", + "/D/CARLA_0.9.9/WindowsNoEditor/PythonAPI/examples/drive_forward_tutorial_2.py", + "/C/Users/Edward/Documents/CSCI991/requirements", + "/D/ai/a1/definitions.txt", + "/C/Users/Edward/Documents/ai/a1/activity.py", + "/D/ai/a1/Student_Taxi_Mapping.txt", + "/D/ai/a1/test_.R", + "/C/Users/Edward/Documents/DeepLearning/Prism-ColouringGreyscalePhotos/Prism.py", + "/C/Users/Edward/Documents/DeepLearning/Prism-ColouringGreyscalePhotos/main.py", + "/C/Users/Edward/Desktop/lab.R", + "/D/CARLA_0.9.9/WindowsNoEditor/PythonAPI/examples/drive_forward_tutorial_2", + "/C/Users/Edward/Downloads/README_format_release2.txt", + "/C/Users/Edward/Downloads/VIRAT_S_000002.viratdata.objects.txt", + "/C/Users/Edward/AppData/Local/Temp/Temp1_FM20 Transfers & Data Update Pack 3.3 (by pr0 & FMTU).zip/Instructions-Donation info/How to install correctly.txt", + "/C/Users/Edward/Documents/CSCI991/utils.py", + "/C/Users/Edward/Documents/CSCI991/ImageNet-Datasets-Downloader-master/requirements.txt", + "/C/Users/Edward/Documents/CSCI991/main_cifar.py", + "/D/DeepLearning/Standnews/politician/recognize_faces_image.py", + "/D/DeepLearning/Standnews/politician/encode_faces.py", + "/D/DeepLearning/Standnews/politician/search_bing_api.py", + "/D/DeepLearning/Standnews/politician/build_face_dataset.py", + "/D/CSCI946/datasets/train_label.txt", + "/C/Users/Edward/Documents/gekko-develop/web/vue/public/UIconfig.js", + "/C/Users/Edward/Downloads/LeNet-5.py", + "/C/Users/Edward/Downloads/Logistic_Regression.py", + "/D/CSCI946/datasets/test_labels.txt", + "/D/CSCI946/datasets/test_label.txt", + "/D/CSCI946/train.csv", + "/C/Users/Edward/Documents/CSCI946/Week_8/section_9_7.py", + "/D/CSCI946/Neural Network Implementation/YOLO/yolo.py", + "/D/CSCI946/datasets/label.txt", + "/D/CSCI946/Assignment 2/Task 1/test.map", + "/D/CSCI946/Assignment 2/Task 1/train.label", + "/C/Users/Edward/Downloads/Untitled1.py", + "/D/CSCI946/Neural Network Implementation/PCN/main_cifar.py", + "/D/DeepLearning/mnist.py", + "/C/Users/Edward/Anaconda3/envs/bigdata/etc/keras/load_config.py", + "/C/Users/Edward/.jupyter/jupyter_notebook_config.py", + "/C/Users/Edward/Downloads/20news-19997.tar/20news-19997/20_newsgroups/alt.atheism/51121", + "/D/CSCI946/Assignment 2/Task 1/train.data", + "/D/CSCI946/Assignment 2/final_tweet2.csv", + "/C/Users/Edward/Downloads/Untitled1-Copy2.py", + "/C/Users/Edward/Downloads/siftImages.cpp", + "/C/Users/Edward/Documents/GitHub/PCN-with-Local-Recurrent-Processing/main_imagenet.py", + "/C/Users/Edward/Anaconda3/envs/cv/Lib/site-packages/cv2/__init__.py", + "/C/Users/Edward/Downloads/openCVex-4.1.1-v15/openCVex-4.1.1-v15/build/python/cv2/__init__.py", + "/C/Users/Edward/Downloads/openCVex-4.1.1-v15/openCVex-4.1.1-v15/build/python/cv2/load_config_py3.py", + "/C/Users/Edward/Downloads/openCVex-4.1.1-v15/openCVex-4.1.1-v15/build/python/cv2/config-3.7.py", + "/C/Users/Edward/Downloads/20news-19997.tar/20news-19997/20_newsgroups/rec.sport.baseball/102584", + "/C/Users/Edward/Downloads/openCVex-4.1.1-v15/openCVex-4.1.1-v15/build/setup_vars_opencv4.cmd", + "/C/Users/Edward/Downloads/openCVex-4.1.1-v15/openCVex-4.1.1-v15/build/LICENSE", + "/C/Users/Edward/Downloads/openCVex-4.1.1-v15/openCVex-4.1.1-v15/build/python/cv2/python-3.7/cv2.cp37-win_amd64.pyd", + "/D/Downloads/RC_2015-01/RC_2015-01", + "/D/GAN/environment.yml", + "/D/GAN/datasets/download_cyclegan_dataset.sh", + "/D/CSCI946/Week7_tutorial/Chapter 7/DTdata.csv", + "/C/Users/Edward/Downloads/bzip2-1.0.8.0-win-x64/bzip2.exe" + ], + "find": + { + "height": 22.0 + }, + "find_in_files": + { + "height": 0.0, + "where_history": + [ + ] + }, + "find_state": + { + "case_sensitive": false, + "find_history": + [ + ], + "highlight": true, + "in_selection": false, + "preserve_case": false, + "regex": false, + "replace_history": + [ + ], + "reverse": false, + "show_context": true, + "use_buffer2": true, + "whole_word": false, + "wrap": true + }, + "groups": + [ + { + "sheets": + [ + ] + } + ], + "incremental_find": + { + "height": 22.0 + }, + "input": + { + "height": 34.0 + }, + "layout": + { + "cells": + [ + [ + 0, + 0, + 1, + 1 + ] + ], + "cols": + [ + 0.0, + 1.0 + ], + "rows": + [ + 0.0, + 1.0 + ] + }, + "menu_visible": true, + "output.find_results": + { + "height": 0.0 + }, + "pinned_build_system": "", + "project": "prednet.sublime-project", + "replace": + { + "height": 40.0 + }, + "save_all_on_build": true, + "select_file": + { + "height": 0.0, + "last_filter": "", + "selected_items": + [ + ], + "width": 0.0 + }, + "select_project": + { + "height": 0.0, + "last_filter": "", + "selected_items": + [ + ], + "width": 0.0 + }, + "select_symbol": + { + "height": 0.0, + "last_filter": "", + "selected_items": + [ + ], + "width": 0.0 + }, + "selected_group": 0, + "settings": + { + }, + "show_minimap": true, + "show_open_files": false, + "show_tabs": true, + "side_bar_visible": true, + "side_bar_width": 150.0, + "status_bar_visible": true, + "template_settings": + { + } +} diff --git a/prednet_do.py b/prednet_do.py new file mode 100644 index 0000000..fd5e54c --- /dev/null +++ b/prednet_do.py @@ -0,0 +1,312 @@ +import numpy as np + +from keras import backend as K +from keras import activations +from keras.layers import Recurrent, Dropout +from keras.layers import Conv2D, UpSampling2D, MaxPooling2D +from keras.engine import InputSpec +from keras_utils import legacy_prednet_support + +class PredNet(Recurrent): + '''PredNet architecture - Lotter 2016. + Stacked convolutional LSTM inspired by predictive coding principles. + + # Arguments + stack_sizes: number of channels in targets (A) and predictions (Ahat) in each layer of the architecture. + Length is the number of layers in the architecture. + First element is the number of channels in the input. + Ex. (3, 16, 32) would correspond to a 3 layer architecture that takes in RGB images and has 16 and 32 + channels in the second and third layers, respectively. + R_stack_sizes: number of channels in the representation (R) modules. + Length must equal length of stack_sizes, but the number of channels per layer can be different. + A_filt_sizes: filter sizes for the target (A) modules. + Has length of 1 - len(stack_sizes). + Ex. (3, 3) would mean that targets for layers 2 and 3 are computed by a 3x3 convolution of the errors (E) + from the layer below (followed by max-pooling) + Ahat_filt_sizes: filter sizes for the prediction (Ahat) modules. + Has length equal to length of stack_sizes. + Ex. (3, 3, 3) would mean that the predictions for each layer are computed by a 3x3 convolution of the + representation (R) modules at each layer. + R_filt_sizes: filter sizes for the representation (R) modules. + Has length equal to length of stack_sizes. + Corresponds to the filter sizes for all convolutions in the LSTM. + pixel_max: the maximum pixel value. + Used to clip the pixel-layer prediction. + error_activation: activation function for the error (E) units. + A_activation: activation function for the target (A) and prediction (A_hat) units. + LSTM_activation: activation function for the cell and hidden states of the LSTM. + LSTM_inner_activation: activation function for the gates in the LSTM. + output_mode: either 'error', 'prediction', 'all' or layer specification (ex. R2, see below). + Controls what is outputted by the PredNet. + If 'error', the mean response of the error (E) units of each layer will be outputted. + That is, the output shape will be (batch_size, nb_layers). + If 'prediction', the frame prediction will be outputted. + If 'all', the output will be the frame prediction concatenated with the mean layer errors. + The frame prediction is flattened before concatenation. + Nomenclature of 'all' is kept for backwards compatibility, but should not be confused with returning all of the layers of the model + For returning the features of a particular layer, output_mode should be of the form unit_type + layer_number. + For instance, to return the features of the LSTM "representational" units in the lowest layer, output_mode should be specificied as 'R0'. + The possible unit types are 'R', 'Ahat', 'A', and 'E' corresponding to the 'representation', 'prediction', 'target', and 'error' units respectively. + extrap_start_time: time step for which model will start extrapolating. + Starting at this time step, the prediction from the previous time step will be treated as the "actual" + data_format: 'channels_first' or 'channels_last'. + It defaults to the `image_data_format` value found in your + Keras config file at `~/.keras/keras.json`. + + # References + - [Deep predictive coding networks for video prediction and unsupervised learning](https://arxiv.org/abs/1605.08104) + - [Long short-term memory](http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf) + - [Convolutional LSTM network: a machine learning approach for precipitation nowcasting](http://arxiv.org/abs/1506.04214) + - [Predictive coding in the visual cortex: a functional interpretation of some extra-classical receptive-field effects](http://www.nature.com/neuro/journal/v2/n1/pdf/nn0199_79.pdf) + ''' + @legacy_prednet_support + def __init__(self, stack_sizes, R_stack_sizes, + A_filt_sizes, Ahat_filt_sizes, R_filt_sizes, + pixel_max=1., error_activation='relu', A_activation='relu', + LSTM_activation='tanh', LSTM_inner_activation='hard_sigmoid', + output_mode='error', extrap_start_time=None, + data_format=K.image_data_format(), **kwargs): + self.stack_sizes = stack_sizes + self.nb_layers = len(stack_sizes) + assert len(R_stack_sizes) == self.nb_layers, 'len(R_stack_sizes) must equal len(stack_sizes)' + self.R_stack_sizes = R_stack_sizes + assert len(A_filt_sizes) == (self.nb_layers - 1), 'len(A_filt_sizes) must equal len(stack_sizes) - 1' + self.A_filt_sizes = A_filt_sizes + assert len(Ahat_filt_sizes) == self.nb_layers, 'len(Ahat_filt_sizes) must equal len(stack_sizes)' + self.Ahat_filt_sizes = Ahat_filt_sizes + assert len(R_filt_sizes) == (self.nb_layers), 'len(R_filt_sizes) must equal len(stack_sizes)' + self.R_filt_sizes = R_filt_sizes + + self.pixel_max = pixel_max + self.error_activation = activations.get(error_activation) + self.A_activation = activations.get(A_activation) + self.LSTM_activation = activations.get(LSTM_activation) + self.LSTM_inner_activation = activations.get(LSTM_inner_activation) + + default_output_modes = ['prediction', 'error', 'all'] + layer_output_modes = [layer + str(n) for n in range(self.nb_layers) for layer in ['R', 'E', 'A', 'Ahat']] + assert output_mode in default_output_modes + layer_output_modes, 'Invalid output_mode: ' + str(output_mode) + self.output_mode = output_mode + if self.output_mode in layer_output_modes: + self.output_layer_type = self.output_mode[:-1] + self.output_layer_num = int(self.output_mode[-1]) + else: + self.output_layer_type = None + self.output_layer_num = None + self.extrap_start_time = extrap_start_time + + assert data_format in {'channels_last', 'channels_first'}, 'data_format must be in {channels_last, channels_first}' + self.data_format = data_format + self.channel_axis = -3 if data_format == 'channels_first' else -1 + self.row_axis = -2 if data_format == 'channels_first' else -3 + self.column_axis = -1 if data_format == 'channels_first' else -2 + super(PredNet, self).__init__(**kwargs) + self.input_spec = [InputSpec(ndim=5)] + + def compute_output_shape(self, input_shape): + if self.output_mode == 'prediction': + out_shape = input_shape[2:] + elif self.output_mode == 'error': + out_shape = (self.nb_layers,) + elif self.output_mode == 'all': + out_shape = (np.prod(input_shape[2:]) + self.nb_layers,) + else: + stack_str = 'R_stack_sizes' if self.output_layer_type == 'R' else 'stack_sizes' + stack_mult = 2 if self.output_layer_type == 'E' else 1 + out_stack_size = stack_mult * getattr(self, stack_str)[self.output_layer_num] + out_nb_row = input_shape[self.row_axis] / 2**self.output_layer_num + out_nb_col = input_shape[self.column_axis] / 2**self.output_layer_num + if self.data_format == 'channels_first': + out_shape = (out_stack_size, out_nb_row, out_nb_col) + else: + out_shape = (out_nb_row, out_nb_col, out_stack_size) + + if self.return_sequences: + return (input_shape[0], input_shape[1]) + out_shape + else: + return (input_shape[0],) + out_shape + + def get_initial_state(self, x): + input_shape = self.input_spec[0].shape + init_nb_row = input_shape[self.row_axis] + init_nb_col = input_shape[self.column_axis] + + base_initial_state = K.zeros_like(x) # (samples, timesteps) + image_shape + non_channel_axis = -1 if self.data_format == 'channels_first' else -2 + for _ in range(2): + base_initial_state = K.sum(base_initial_state, axis=non_channel_axis) + base_initial_state = K.sum(base_initial_state, axis=1) # (samples, nb_channels) + + initial_states = [] + states_to_pass = ['r', 'c', 'e'] + nlayers_to_pass = {u: self.nb_layers for u in states_to_pass} + if self.extrap_start_time is not None: + states_to_pass.append('ahat') # pass prediction in states so can use as actual for t+1 when extrapolating + nlayers_to_pass['ahat'] = 1 + for u in states_to_pass: + for l in range(nlayers_to_pass[u]): + ds_factor = 2 ** l + nb_row = init_nb_row // ds_factor + nb_col = init_nb_col // ds_factor + if u in ['r', 'c']: + stack_size = self.R_stack_sizes[l] + elif u == 'e': + stack_size = 2 * self.stack_sizes[l] + elif u == 'ahat': + stack_size = self.stack_sizes[l] + output_size = stack_size * nb_row * nb_col # flattened size + + reducer = K.zeros((input_shape[self.channel_axis], output_size)) # (nb_channels, output_size) + initial_state = K.dot(base_initial_state, reducer) # (samples, output_size) + if self.data_format == 'channels_first': + output_shp = (-1, stack_size, nb_row, nb_col) + else: + output_shp = (-1, nb_row, nb_col, stack_size) + initial_state = K.reshape(initial_state, output_shp) + initial_states += [initial_state] + + if K._BACKEND == 'theano': + from theano import tensor as T + # There is a known issue in the Theano scan op when dealing with inputs whose shape is 1 along a dimension. + # In our case, this is a problem when training on grayscale images, and the below line fixes it. + initial_states = [T.unbroadcast(init_state, 0, 1) for init_state in initial_states] + + if self.extrap_start_time is not None: + initial_states += [K.variable(0, int if K.backend() != 'tensorflow' else 'int32')] # the last state will correspond to the current timestep + return initial_states + + def build(self, input_shape): + self.input_spec = [InputSpec(shape=input_shape)] + self.conv_layers = {c: [] for c in ['i', 'f', 'c', 'o', 'a', 'ahat']} + + for l in range(self.nb_layers): + for c in ['i', 'f', 'c', 'o']: + act = self.LSTM_activation if c == 'c' else self.LSTM_inner_activation + self.conv_layers[c].append(Conv2D(self.R_stack_sizes[l], self.R_filt_sizes[l], padding='same', activation=act, data_format=self.data_format)) + + + act = 'relu' if l == 0 else self.A_activation + self.conv_layers['ahat'].append(Conv2D(self.stack_sizes[l], self.Ahat_filt_sizes[l], padding='same', activation=act, data_format=self.data_format)) + + if l < self.nb_layers - 1: + self.conv_layers['a'].append(Conv2D(self.stack_sizes[l+1], self.A_filt_sizes[l], padding='same', activation=self.A_activation, data_format=self.data_format)) + + self.upsample = UpSampling2D(data_format=self.data_format) + self.pool = MaxPooling2D(data_format=self.data_format) + + self.trainable_weights = [] + nb_row, nb_col = (input_shape[-2], input_shape[-1]) if self.data_format == 'channels_first' else (input_shape[-3], input_shape[-2]) + for c in sorted(self.conv_layers.keys()): + for l in range(len(self.conv_layers[c])): + ds_factor = 2 ** l + if c == 'ahat': + nb_channels = self.R_stack_sizes[l] + elif c == 'a': + nb_channels = 2 * self.stack_sizes[l] + else: + nb_channels = self.stack_sizes[l] * 2 + self.R_stack_sizes[l] + if l < self.nb_layers - 1: + nb_channels += self.R_stack_sizes[l+1] + in_shape = (input_shape[0], nb_channels, nb_row // ds_factor, nb_col // ds_factor) + if self.data_format == 'channels_last': in_shape = (in_shape[0], in_shape[2], in_shape[3], in_shape[1]) + with K.name_scope('layer_' + c + '_' + str(l)): + self.conv_layers[c][l].build(in_shape) + self.trainable_weights += self.conv_layers[c][l].trainable_weights + + self.states = [None] * self.nb_layers*3 + + if self.extrap_start_time is not None: + self.t_extrap = K.variable(self.extrap_start_time, int if K.backend() != 'tensorflow' else 'int32') + self.states += [None] * 2 # [previous frame prediction, timestep] + + def step(self, a, states): + r_tm1 = states[:self.nb_layers] + c_tm1 = states[self.nb_layers:2*self.nb_layers] + e_tm1 = states[2*self.nb_layers:3*self.nb_layers] + + if self.extrap_start_time is not None: + t = states[-1] + a = K.switch(t >= self.t_extrap, states[-2], a) # if past self.extrap_start_time, the previous prediction will be treated as the actual + + c = [] + r = [] + e = [] + + # Update R units starting from the top + for l in reversed(range(self.nb_layers)): + inputs = [r_tm1[l], e_tm1[l]] + if l < self.nb_layers - 1: + inputs.append(r_up) + + inputs = K.concatenate(inputs, axis=self.channel_axis) + i = self.conv_layers['i'][l].call(inputs) + f = self.conv_layers['f'][l].call(inputs) + o = self.conv_layers['o'][l].call(inputs) + _c = f * c_tm1[l] + i * self.conv_layers['c'][l].call(inputs) + _r = o * self.LSTM_activation(_c) + c.insert(0, _c) + r.insert(0, _r) + + if l > 0: + r_up = self.upsample.call(_r) + + # Update feedforward path starting from the bottom + for l in range(self.nb_layers): + ahat = self.conv_layers['ahat'][l].call(r[l]) + if l == 0: + ahat = K.minimum(ahat, self.pixel_max) + frame_prediction = ahat + + # compute errors + e_up = self.error_activation(ahat - a) + e_down = self.error_activation(a - ahat) + + e.append(K.concatenate((e_up, e_down), axis=self.channel_axis)) + + if self.output_layer_num == l: + if self.output_layer_type == 'A': + output = a + elif self.output_layer_type == 'Ahat': + output = ahat + elif self.output_layer_type == 'R': + output = r[l] + elif self.output_layer_type == 'E': + output = e[l] + + if l < self.nb_layers - 1: + a = self.conv_layers['a'][l].call(e[l]) + a = self.pool.call(a) # target for next layer + + if self.output_layer_type is None: + if self.output_mode == 'prediction': + output = frame_prediction + else: + for l in range(self.nb_layers): + layer_error = K.mean(K.batch_flatten(e[l]), axis=-1, keepdims=True) + all_error = layer_error if l == 0 else K.concatenate((all_error, layer_error), axis=-1) + if self.output_mode == 'error': + output = all_error + else: + output = K.concatenate((K.batch_flatten(frame_prediction), all_error), axis=-1) + + states = r + c + e + if self.extrap_start_time is not None: + states += [frame_prediction, t + 1] + return output, states + + def get_config(self): + config = {'stack_sizes': self.stack_sizes, + 'R_stack_sizes': self.R_stack_sizes, + 'A_filt_sizes': self.A_filt_sizes, + 'Ahat_filt_sizes': self.Ahat_filt_sizes, + 'R_filt_sizes': self.R_filt_sizes, + 'pixel_max': self.pixel_max, + 'error_activation': self.error_activation.__name__, + 'A_activation': self.A_activation.__name__, + 'LSTM_activation': self.LSTM_activation.__name__, + 'LSTM_inner_activation': self.LSTM_inner_activation.__name__, + 'data_format': self.data_format, + 'extrap_start_time': self.extrap_start_time, + 'output_mode': self.output_mode} + base_config = super(PredNet, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/prednet_evaluate.py b/prednet_evaluate.py new file mode 100644 index 0000000..e3c51c1 --- /dev/null +++ b/prednet_evaluate.py @@ -0,0 +1,91 @@ +''' +Evaluate trained PredNet on KITTI sequences. +Calculates mean-squared error and plots predictions. +''' + +import os +import numpy as np +from six.moves import cPickle +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec + +from keras import backend as K +from keras.models import Model, model_from_json +from keras.layers import Input, Dense, Flatten + +from prednet import PredNet +from data_utils import SequenceGenerator +from kitti_settings import * + + +n_plot = 40 +batch_size = 10 +nt = 10 + +weights_file = os.path.join(WEIGHTS_DIR, 'tensorflow_weights/prednet_kitti_weights.hdf5') +json_file = os.path.join(WEIGHTS_DIR, 'prednet_kitti_model.json') +test_file = os.path.join(DATA_DIR, 'X_test.hkl') +test_sources = os.path.join(DATA_DIR, 'sources_test.hkl') + +# Load trained model +f = open(json_file, 'r') +json_string = f.read() +f.close() +train_model = model_from_json(json_string, custom_objects = {'PredNet': PredNet}) +train_model.load_weights(weights_file) + +# Create testing model (to output predictions) +layer_config = train_model.layers[1].get_config() +layer_config['output_mode'] = 'prediction' +data_format = layer_config['data_format'] if 'data_format' in layer_config else layer_config['dim_ordering'] +test_prednet = PredNet(weights=train_model.layers[1].get_weights(), **layer_config) +input_shape = list(train_model.layers[0].batch_input_shape[1:]) +input_shape[0] = nt +inputs = Input(shape=tuple(input_shape)) +predictions = test_prednet(inputs) +test_model = Model(inputs=inputs, outputs=predictions) + +test_generator = SequenceGenerator(test_file, test_sources, nt, sequence_start_mode='unique', data_format=data_format) +X_test = test_generator.create_all() +X_hat = test_model.predict(X_test, batch_size) +if data_format == 'channels_first': + X_test = np.transpose(X_test, (0, 1, 3, 4, 2)) + X_hat = np.transpose(X_hat, (0, 1, 3, 4, 2)) + +# Compare MSE of PredNet predictions vs. using last frame. Write results to prediction_scores.txt +mse_model = np.mean( (X_test[:, 1:] - X_hat[:, 1:])**2 ) # look at all timesteps except the first +mse_prev = np.mean( (X_test[:, :-1] - X_test[:, 1:])**2 ) +if not os.path.exists(RESULTS_SAVE_DIR): os.mkdir(RESULTS_SAVE_DIR) +f = open(RESULTS_SAVE_DIR + 'prediction_scores.txt', 'w') +f.write("Model MSE: %f\n" % mse_model) +f.write("Previous Frame MSE: %f" % mse_prev) +f.close() + +# Plot some predictions +# To-do: plot bounding box +aspect_ratio = float(X_hat.shape[2]) / X_hat.shape[3] +plt.figure(figsize = (nt, 2*aspect_ratio)) +gs = gridspec.GridSpec(2, nt) +gs.update(wspace=0., hspace=0.) +plot_save_dir = os.path.join(RESULTS_SAVE_DIR, 'prediction_plots/') +if not os.path.exists(plot_save_dir): os.mkdir(plot_save_dir) +plot_idx = np.random.permutation(X_test.shape[0])[:n_plot] +for i in plot_idx: + for t in range(nt): + plt.subplot(gs[t]) + plt.imshow(X_test[i,t], interpolation='none') + plt.tick_params(axis='both', which='both', bottom='off', top='off', left='off', right='off', labelbottom='off', labelleft='off') + if t==0: plt.ylabel('Actual', fontsize=10) + + plt.subplot(gs[t + nt]) + plt.imshow(X_hat[i,t], interpolation='none') + plt.tick_params(axis='both', which='both', bottom='off', top='off', left='off', right='off', labelbottom='off', labelleft='off') + if t==0: plt.ylabel('Predicted', fontsize=10) + + plt.savefig(plot_save_dir + 'plot_' + str(i) + '.png') + plt.clf() + + +# https://stackoverflow.com/questions/38445982/how-to-log-keras-loss-output-to-a-file \ No newline at end of file diff --git a/process_kitti.py b/process_kitti.py old mode 100755 new mode 100644 index 1d2f401..9472273 --- a/process_kitti.py +++ b/process_kitti.py @@ -8,7 +8,7 @@ import urllib.request import numpy as np from imageio import imread -from scipy.misc import imresize +from PIL import Image import hickle as hkl from kitti_settings import * @@ -32,7 +32,7 @@ def download_data(): r = requests.get(url) soup = BeautifulSoup(r.content) drive_list = soup.find_all("h3") - drive_list = [d.text[:d.text.find(' ')] for d in drive_list] + drive_list = [d.text[:d.text.find(' ')] for d in drive_list] print( "Downloading set: " + c) c_dir = base_dir + c + '/' if not os.path.exists(c_dir): os.mkdir(c_dir) @@ -77,6 +77,8 @@ def process_data(): print( 'Creating ' + split + ' data: ' + str(len(im_list)) + ' images') X = np.zeros((len(im_list),) + desired_im_sz + (3,), np.uint8) + # print('the shape of X: ' + str(X.shape)) + print(X[2].shape) for i, im_file in enumerate(im_list): im = imread(im_file) X[i] = process_im(im, desired_im_sz) @@ -88,13 +90,15 @@ def process_data(): # resize and crop image def process_im(im, desired_sz): target_ds = float(desired_sz[0])/im.shape[0] - im = imresize(im, (desired_sz[0], int(np.round(target_ds * im.shape[1])))) + im = np.array(Image.fromarray(im).resize((int(np.round(target_ds * im.shape[1])), desired_sz[0] ))) + + d = int((im.shape[1] - desired_sz[1]) / 2) im = im[:, d:d+desired_sz[1]] return im if __name__ == '__main__': - download_data() - extract_data() + # download_data() + # extract_data() process_data() diff --git a/process_nfs.py b/process_nfs.py new file mode 100644 index 0000000..46ce6a3 --- /dev/null +++ b/process_nfs.py @@ -0,0 +1,63 @@ +''' +Code for downloading and processing KITTI data (Geiger et al. 2013, http://www.cvlibs.net/datasets/kitti/) +''' + +import os +import requests +from bs4 import BeautifulSoup +import urllib.request +import numpy as np +from imageio import imread +from PIL import Image +import hickle as hkl +from nfs_settings import * + + +desired_im_sz = (128, 160) +video_folder = 'zebra_fish/' +# categories = ['city', 'residential', 'road'] + +# Recordings used for validation and testing. +# Were initially chosen randomly such that one of the city recordings was used for validation and one of each category was used for testing. +# val_recordings = [('city', '2011_09_26_drive_0005_sync')] +# test_recordings = [('city', '2011_09_26_drive_0104_sync'), ('residential', '2011_09_26_drive_0079_sync'), ('road', '2011_09_26_drive_0070_sync')] + +# if not os.path.exists(DATA_DIR): os.mkdir(DATA_DIR) + +# Create image datasets. +def process_data(): + + im_list = [] + source_list = [] # corresponds to recording that image came from + + im_dir = os.path.join(DATA_DIR, video_folder) + files = list(os.walk(im_dir, topdown=False))[-1][-1] + im_list += [im_dir + f for f in sorted(files)] + source_list += [video_folder] * len(files) + print( 'Creating test data: ' + str(len(im_list)) + ' images') + X = np.zeros((len(im_list),) + desired_im_sz + (3,), np.uint8) + + for i, im_file in enumerate(im_list): + im = imread(im_file) + X[i] = process_im(im, desired_im_sz) + + hkl.dump(X, os.path.join(DATA_DIR, 'zebra_fish_test.hkl')) + hkl.dump(source_list, os.path.join(DATA_DIR, 'sources_zebra_fish.hkl')) + + +# resize and crop image +def process_im(im, desired_sz): + print('ran 1') + target_ds = float(desired_sz[0])/im.shape[0] + print('ran 2') + im = np.array(Image.fromarray(im).resize((int(np.round(target_ds * im.shape[1])), desired_sz[0] ))) + print('ran 3') + d = int((im.shape[1] - desired_sz[1]) / 2) + print('ran 4') + im = im[:, d:d+desired_sz[1]] + print('ran 5') + return im + + +if __name__ == '__main__': + process_data() \ No newline at end of file diff --git a/somethingsomethingv2.py b/somethingsomethingv2.py new file mode 100644 index 0000000..abcf887 --- /dev/null +++ b/somethingsomethingv2.py @@ -0,0 +1,120 @@ +"""This script is for preprocessing something-something-v2 dataset. +The code is largely borrowed from https://github.com/MIT-HAN-LAB/temporal-shift-module +and https://github.com/metalbubble/TRN-pytorch/blob/master/process_dataset.py +""" + +import os +import sys +import threading +import argparse +import json + +def parse_args(): + parser = argparse.ArgumentParser(description='prepare something-something-v2 dataset') + parser.add_argument('--video_root', type=str, default='~/.mxnet/datasets/somethingsomethingv2/20bn-something-something-v2') + parser.add_argument('--frame_root', type=str, default='~/.mxnet/datasets/somethingsomethingv2/20bn-something-something-v2-frames') + parser.add_argument('--anno_root', type=str, default='~/.mxnet/datasets/somethingsomethingv2/annotations') + parser.add_argument('--num_threads', type=int, default=100) + parser.add_argument('--decode_video', action='store_true', default=True) + parser.add_argument('--build_file_list', action='store_true', default=True) + args = parser.parse_args() + + args.video_root = os.path.expanduser(args.video_root) + args.frame_root = os.path.expanduser(args.frame_root) + args.anno_root = os.path.expanduser(args.anno_root) + return args + +def split_func(l, n): + """Yield successive n-sized chunks from l.""" + for i in range(0, len(l), n): + yield l[i:i + n] + +def extract(video, tmpl='%06d.jpg'): + cmd = 'ffmpeg -i \"{}/{}\" -threads 1 -vf scale=-1:256 -q:v 0 \"{}/{}/%06d.jpg\"'.format(args.video_root, video, args.frame_root, video[:-5]) + os.system(cmd) + +def target(video_list): + for video in video_list: + os.makedirs(os.path.join(args.frame_root, video[:-5])) + extract(video) + +def decode_video(args): + print(args.video_root) + if not os.path.exists(args.video_root): + raise ValueError('Please download videos and set video_root variable.') + if not os.path.exists(args.frame_root): + os.makedirs(args.frame_root) + + video_list = os.listdir(args.video_root) + splits = list(split_func(video_list, args.num_threads)) + + threads = [] + for i, split in enumerate(splits): + thread = threading.Thread(target=target, args=(split,)) + thread.start() + threads.append(thread) + + for thread in threads: + thread.join() + +def build_file_list(args): + if not os.path.exists(args.anno_root): + raise ValueError('Please download annotations and set anno_root variable.') + + dataset_name = 'something-something-v2' + with open(os.path.join(args.anno_root, '%s-labels.json' % dataset_name)) as f: + data = json.load(f) + categories = [] + for i, (cat, idx) in enumerate(data.items()): + assert i == int(idx) # make sure the rank is right + categories.append(cat) + + with open('category.txt', 'w') as f: + f.write('\n'.join(categories)) + + dict_categories = {} + for i, category in enumerate(categories): + dict_categories[category] = i + + files_input = [os.path.join(args.anno_root, '%s-validation.json' % dataset_name), + os.path.join(args.anno_root, '%s-train.json' % dataset_name), + os.path.join(args.anno_root, '%s-test.json' % dataset_name)] + files_output = [os.path.join(args.anno_root, 'val_videofolder.txt'), + os.path.join(args.anno_root, 'train_videofolder.txt'), + os.path.join(args.anno_root, 'test_videofolder.txt')] + for (filename_input, filename_output) in zip(files_input, files_output): + with open(filename_input) as f: + data = json.load(f) + folders = [] + idx_categories = [] + for item in data: + folders.append(item['id']) + if 'test' not in filename_input: + idx_categories.append(dict_categories[item['template'].replace('[', '').replace(']', '')]) + else: + idx_categories.append(0) + output = [] + for i in range(len(folders)): + curFolder = folders[i] + curIDX = idx_categories[i] + # counting the number of frames in each video folders + dir_files = os.listdir(os.path.join(args.frame_root, curFolder)) + if len(dir_files) == 0: + print('video decoding fails at %s' (curFolder)) + sys.exit() + output.append('%s %d %d' % (curFolder, len(dir_files), curIDX)) + print('%d/%d' % (i, len(folders))) + with open(filename_output, 'w') as f: + f.write('\n'.join(output)) + +if __name__ == '__main__': + global args + args = parse_args() + + if args.decode_video: + print('Decoding videos to frames.') + decode_video(args) + + if args.build_file_list: + print('Generating training files.') + build_file_list(args)