From c4d6df24173572da60d0991bc6172472f4a2523f Mon Sep 17 00:00:00 2001
From: pkmital <parag@pkmital.com>
Date: Thu, 5 Apr 2018 18:33:41 +0000
Subject: [PATCH] trying seq2seq w/ shorted seq

---
 lstm_mdn.py   |  6 +++---
 seq2seq.py    |  4 ++--
 tests.py      | 47 +++++++++++++++++++++++++----------------------
 train.py      | 25 +++++++++++++------------
 train_lstm.py |  4 ++--
 5 files changed, 45 insertions(+), 41 deletions(-)

diff --git a/lstm_mdn.py b/lstm_mdn.py
index d71a700..c607649 100644
--- a/lstm_mdn.py
+++ b/lstm_mdn.py
@@ -144,9 +144,9 @@ def create_model(batch_size=50,
                 source_output)
             mse = tf.losses.mean_squared_error(sample, source_output)
         else:
-            weighted_mse_loss = tf.constant(0)
-            mse = tf.constant(0)
-        loss = mdn_loss
+            weighted_mse_loss = tf.constant(0.0)
+            mse = tf.constant(0.0)
+        loss = mdn_loss + weighted_mse_loss
 
     return {
         'source': source,
diff --git a/seq2seq.py b/seq2seq.py
index 18f5280..3209496 100644
--- a/seq2seq.py
+++ b/seq2seq.py
@@ -379,8 +379,7 @@ def create_model(batch_size=50,
                 cat=tfd.Categorical(probs=weights), components=components)
 
         with tf.variable_scope('loss'):
-            p = gauss.log_prob(decoder_output)
-            negloglike = -tf.reduce_logsumexp(p, axis=1)
+            negloglike = -gauss.log_prob(decoder_output)
             weighted_reconstruction = tf.reduce_mean(
                 tf.expand_dims(weights, 2) * means, 3)
             mdn_loss = tf.reduce_mean(negloglike)
@@ -399,6 +398,7 @@ def create_model(batch_size=50,
         'keep_prob': keep_prob,
         'encoding': encoder_state,
         'decoding': infer_outputs,
+        'weighted': weighted_reconstruction,
         'loss': loss,
         'mdn_loss': mdn_loss,
         'mse_loss': mse_loss
diff --git a/tests.py b/tests.py
index ca60ce6..6eac0f0 100644
--- a/tests.py
+++ b/tests.py
@@ -4,9 +4,9 @@
 
 def euler():
     data = np.load('euler.npy')
-    mean_data = np.mean(data)
-    std_data = np.std(data)
-    data = (data.reshape([data.shape[0], -1]) - mean_data) / std_data
+    data_mean = np.mean(data)
+    data_std = np.std(data)
+    data = (data.reshape([data.shape[0], -1]) - data_mean) / data_std
     n_features = data.shape[-1]
     batch_size = 50
     sequence_length = 240
@@ -21,8 +21,8 @@ def euler():
 
     train.infer(
         data=data,
-        mean_data=mean_data,
-        std_data=std_data,
+        data_mean=data_mean,
+        data_std=data_std,
         batch_size=batch_size,
         sequence_length=sequence_length,
         n_features=n_features,
@@ -37,9 +37,9 @@ def euler():
 
 def euler_v2():
     data = np.load('euler.npy')
-    mean_data = np.mean(data)
-    std_data = np.std(data)
-    data = (data.reshape([data.shape[0], -1]) - mean_data) / std_data
+    data_mean = np.mean(data)
+    data_std = np.std(data)
+    data = (data.reshape([data.shape[0], -1]) - data_mean) / data_std
     n_features = data.shape[-1]
     batch_size = 50
     sequence_length = 240
@@ -53,8 +53,8 @@ def euler_v2():
 
     train.infer(
         data=data,
-        mean_data=mean_data,
-        std_data=std_data,
+        data_mean=data_mean,
+        data_std=data_std,
         batch_size=batch_size,
         sequence_length=sequence_length,
         n_features=n_features,
@@ -69,9 +69,9 @@ def euler_v2():
 
 def euler_v3():
     data = np.load('euler.npy')
-    mean_data = np.mean(data)
-    std_data = np.std(data)
-    data = (data.reshape([data.shape[0], -1]) - mean_data) / std_data
+    data_mean = np.mean(data)
+    data_std = np.std(data)
+    data = (data.reshape([data.shape[0], -1]) - data_mean) / data_std
     n_features = data.shape[-1]
     sequence_length = 120
     input_embed_size = None
@@ -94,8 +94,8 @@ def euler_v3():
     res = train.infer(
         source=source,
         target=target,
-        mean_data=mean_data,
-        std_data=std_data,
+        data_mean=data_mean,
+        data_std=data_std,
         batch_size=batch_size,
         sequence_length=sequence_length,
         n_features=n_features,
@@ -111,12 +111,15 @@ def euler_v3():
 def euler_v4():
     data = np.load('euler.npy')
     data = data.reshape(data.shape[0], -1)
-    mean_data = np.mean(data, axis=0)
-    std_data = np.std(data, axis=0)
-    data = (data - mean_data) / std_data
+    data_mean = np.mean(data, axis=0)
+    data_std = np.std(data, axis=0)
+    idxs = np.where(data_std > 0)[0]
+    data_mean = data_mean[idxs]
+    data_std = data_std[idxs]
+    data = (data[:, idxs] - data_mean) / data_std
     n_features = data.shape[-1]
-    batch_size = 20
-    sequence_length = 500
+    batch_size = 64
+    sequence_length = 120
     input_embed_size = None
     n_neurons = 512
     n_layers = 3
@@ -127,8 +130,8 @@ def euler_v4():
 
     res = train.train(
         data=data,
-        mean_data=mean_data,
-        std_data=std_data,
+        data_mean=data_mean,
+        data_std=data_std,
         batch_size=batch_size,
         sequence_length=sequence_length,
         n_features=n_features,
diff --git a/train.py b/train.py
index cceedf0..fbb0acb 100644
--- a/train.py
+++ b/train.py
@@ -53,8 +53,8 @@ def batch_generator(data, sequence_length, batch_size=50):
 
 
 def train(data,
-          mean_data,
-          std_data,
+          data_mean,
+          data_std,
           n_epochs=1000,
           batch_size=100,
           sequence_length=240,
@@ -116,8 +116,8 @@ def train(data,
 
 def infer(source,
           target,
-          mean_data,
-          std_data,
+          data_mean,
+          data_std,
           batch_size,
           sequence_length,
           ckpt_path='./',
@@ -134,24 +134,25 @@ def infer(source,
         sess.run(init_op)
         saver = tf.train.Saver()
         saver.restore(sess, os.path.join(ckpt_path, model_name))
-        recon, enc = sess.run(
-            [net['decoding'], net['encoding']],
+        weighted, recon, enc = sess.run(
+            [net['weighted'], net['decoding'], net['encoding']],
             feed_dict={
                 net['source']: source,
                 net['keep_prob']: 1.0
             })
-        src = (source * std_data) + mean_data
-        tgt = (target * std_data) + mean_data
-        res = (recon[0] * std_data) + mean_data
+        src = (source * data_std) + data_mean
+        tgt = (target * data_std) + data_mean
+        res = (recon[0] * data_std) + data_mean
+        wgt = (weighted[0] * data_std) + data_mean
         fig, axs = plt.subplots(2, 2)
         axs[0][0].plot(src.reshape(-1, src.shape[-1]))
         axs[0][0].set_title('Source')
         axs[0][1].plot(tgt.reshape(-1, tgt.shape[-1]))
         axs[0][1].set_title('Target (Original)')
-        axs[1][0].plot(src.reshape(-1, src.shape[-1]))
-        axs[1][0].set_title('Source')
+        axs[1][0].plot(wgt.reshape(-1, src.shape[-1]))
+        axs[1][0].set_title('Target (Synthesis Weighted)')
         axs[1][1].plot(res.reshape(-1, res.shape[-1]))
-        axs[1][1].set_title('Target (Synthesis)')
+        axs[1][1].set_title('Target (Synthesis Sampling)')
         np.save('source.npy', src)
         np.save('target.npy', tgt)
         np.save('encoding.npy', enc)
diff --git a/train_lstm.py b/train_lstm.py
index cbe5aa8..1bf6903 100644
--- a/train_lstm.py
+++ b/train_lstm.py
@@ -80,7 +80,7 @@ def train(data,
                         '{}: total_loss: {} total_weighted_mse: {}'.
                         format(it_i, loss, weighted_mse),
                         end='\r')
-            current_learning_rate = max(0.0001, current_learning_rate * 0.99)
+            current_learning_rate = max(0.0001, current_learning_rate * 0.995)
             print('iteration: {}, learning rate: {}'.format(
                 it_i, current_learning_rate))
             print(
@@ -198,7 +198,7 @@ def test_euler():
     n_gaussians = 10
     use_mdn = True
     model_name = 'lstm_mdn-euler'
-    restore_name = None
+    restore_name = 'lstm_mdn-euler-365'
     overfit = False
 
     if overfit: