From c4d6df24173572da60d0991bc6172472f4a2523f Mon Sep 17 00:00:00 2001 From: pkmital Date: Thu, 5 Apr 2018 18:33:41 +0000 Subject: [PATCH] trying seq2seq w/ shorted seq --- lstm_mdn.py | 6 +++--- seq2seq.py | 4 ++-- tests.py | 47 +++++++++++++++++++++++++---------------------- train.py | 25 +++++++++++++------------ train_lstm.py | 4 ++-- 5 files changed, 45 insertions(+), 41 deletions(-) diff --git a/lstm_mdn.py b/lstm_mdn.py index d71a700..c607649 100644 --- a/lstm_mdn.py +++ b/lstm_mdn.py @@ -144,9 +144,9 @@ def create_model(batch_size=50, source_output) mse = tf.losses.mean_squared_error(sample, source_output) else: - weighted_mse_loss = tf.constant(0) - mse = tf.constant(0) - loss = mdn_loss + weighted_mse_loss = tf.constant(0.0) + mse = tf.constant(0.0) + loss = mdn_loss + weighted_mse_loss return { 'source': source, diff --git a/seq2seq.py b/seq2seq.py index 18f5280..3209496 100644 --- a/seq2seq.py +++ b/seq2seq.py @@ -379,8 +379,7 @@ def create_model(batch_size=50, cat=tfd.Categorical(probs=weights), components=components) with tf.variable_scope('loss'): - p = gauss.log_prob(decoder_output) - negloglike = -tf.reduce_logsumexp(p, axis=1) + negloglike = -gauss.log_prob(decoder_output) weighted_reconstruction = tf.reduce_mean( tf.expand_dims(weights, 2) * means, 3) mdn_loss = tf.reduce_mean(negloglike) @@ -399,6 +398,7 @@ def create_model(batch_size=50, 'keep_prob': keep_prob, 'encoding': encoder_state, 'decoding': infer_outputs, + 'weighted': weighted_reconstruction, 'loss': loss, 'mdn_loss': mdn_loss, 'mse_loss': mse_loss diff --git a/tests.py b/tests.py index ca60ce6..6eac0f0 100644 --- a/tests.py +++ b/tests.py @@ -4,9 +4,9 @@ def euler(): data = np.load('euler.npy') - mean_data = np.mean(data) - std_data = np.std(data) - data = (data.reshape([data.shape[0], -1]) - mean_data) / std_data + data_mean = np.mean(data) + data_std = np.std(data) + data = (data.reshape([data.shape[0], -1]) - data_mean) / data_std n_features = data.shape[-1] batch_size = 50 sequence_length = 240 @@ -21,8 +21,8 @@ def euler(): train.infer( data=data, - mean_data=mean_data, - std_data=std_data, + data_mean=data_mean, + data_std=data_std, batch_size=batch_size, sequence_length=sequence_length, n_features=n_features, @@ -37,9 +37,9 @@ def euler(): def euler_v2(): data = np.load('euler.npy') - mean_data = np.mean(data) - std_data = np.std(data) - data = (data.reshape([data.shape[0], -1]) - mean_data) / std_data + data_mean = np.mean(data) + data_std = np.std(data) + data = (data.reshape([data.shape[0], -1]) - data_mean) / data_std n_features = data.shape[-1] batch_size = 50 sequence_length = 240 @@ -53,8 +53,8 @@ def euler_v2(): train.infer( data=data, - mean_data=mean_data, - std_data=std_data, + data_mean=data_mean, + data_std=data_std, batch_size=batch_size, sequence_length=sequence_length, n_features=n_features, @@ -69,9 +69,9 @@ def euler_v2(): def euler_v3(): data = np.load('euler.npy') - mean_data = np.mean(data) - std_data = np.std(data) - data = (data.reshape([data.shape[0], -1]) - mean_data) / std_data + data_mean = np.mean(data) + data_std = np.std(data) + data = (data.reshape([data.shape[0], -1]) - data_mean) / data_std n_features = data.shape[-1] sequence_length = 120 input_embed_size = None @@ -94,8 +94,8 @@ def euler_v3(): res = train.infer( source=source, target=target, - mean_data=mean_data, - std_data=std_data, + data_mean=data_mean, + data_std=data_std, batch_size=batch_size, sequence_length=sequence_length, n_features=n_features, @@ -111,12 +111,15 @@ def euler_v3(): def euler_v4(): data = np.load('euler.npy') data = data.reshape(data.shape[0], -1) - mean_data = np.mean(data, axis=0) - std_data = np.std(data, axis=0) - data = (data - mean_data) / std_data + data_mean = np.mean(data, axis=0) + data_std = np.std(data, axis=0) + idxs = np.where(data_std > 0)[0] + data_mean = data_mean[idxs] + data_std = data_std[idxs] + data = (data[:, idxs] - data_mean) / data_std n_features = data.shape[-1] - batch_size = 20 - sequence_length = 500 + batch_size = 64 + sequence_length = 120 input_embed_size = None n_neurons = 512 n_layers = 3 @@ -127,8 +130,8 @@ def euler_v4(): res = train.train( data=data, - mean_data=mean_data, - std_data=std_data, + data_mean=data_mean, + data_std=data_std, batch_size=batch_size, sequence_length=sequence_length, n_features=n_features, diff --git a/train.py b/train.py index cceedf0..fbb0acb 100644 --- a/train.py +++ b/train.py @@ -53,8 +53,8 @@ def batch_generator(data, sequence_length, batch_size=50): def train(data, - mean_data, - std_data, + data_mean, + data_std, n_epochs=1000, batch_size=100, sequence_length=240, @@ -116,8 +116,8 @@ def train(data, def infer(source, target, - mean_data, - std_data, + data_mean, + data_std, batch_size, sequence_length, ckpt_path='./', @@ -134,24 +134,25 @@ def infer(source, sess.run(init_op) saver = tf.train.Saver() saver.restore(sess, os.path.join(ckpt_path, model_name)) - recon, enc = sess.run( - [net['decoding'], net['encoding']], + weighted, recon, enc = sess.run( + [net['weighted'], net['decoding'], net['encoding']], feed_dict={ net['source']: source, net['keep_prob']: 1.0 }) - src = (source * std_data) + mean_data - tgt = (target * std_data) + mean_data - res = (recon[0] * std_data) + mean_data + src = (source * data_std) + data_mean + tgt = (target * data_std) + data_mean + res = (recon[0] * data_std) + data_mean + wgt = (weighted[0] * data_std) + data_mean fig, axs = plt.subplots(2, 2) axs[0][0].plot(src.reshape(-1, src.shape[-1])) axs[0][0].set_title('Source') axs[0][1].plot(tgt.reshape(-1, tgt.shape[-1])) axs[0][1].set_title('Target (Original)') - axs[1][0].plot(src.reshape(-1, src.shape[-1])) - axs[1][0].set_title('Source') + axs[1][0].plot(wgt.reshape(-1, src.shape[-1])) + axs[1][0].set_title('Target (Synthesis Weighted)') axs[1][1].plot(res.reshape(-1, res.shape[-1])) - axs[1][1].set_title('Target (Synthesis)') + axs[1][1].set_title('Target (Synthesis Sampling)') np.save('source.npy', src) np.save('target.npy', tgt) np.save('encoding.npy', enc) diff --git a/train_lstm.py b/train_lstm.py index cbe5aa8..1bf6903 100644 --- a/train_lstm.py +++ b/train_lstm.py @@ -80,7 +80,7 @@ def train(data, '{}: total_loss: {} total_weighted_mse: {}'. format(it_i, loss, weighted_mse), end='\r') - current_learning_rate = max(0.0001, current_learning_rate * 0.99) + current_learning_rate = max(0.0001, current_learning_rate * 0.995) print('iteration: {}, learning rate: {}'.format( it_i, current_learning_rate)) print( @@ -198,7 +198,7 @@ def test_euler(): n_gaussians = 10 use_mdn = True model_name = 'lstm_mdn-euler' - restore_name = None + restore_name = 'lstm_mdn-euler-365' overfit = False if overfit: