Merge pull request #29 from otiliastr:vat

tensorflow-copybara · tensorflow-copybara · commit 4650b686a78a · 2019-10-14T09:49:34.000-07:00
PiperOrigin-RevId: 274596242
diff --git a/neural_structured_learning/research/gam/data/dataset.py b/neural_structured_learning/research/gam/data/dataset.py
@@ -16,6 +16,7 @@
 import logging
 import os
 import pickle
+
 from gam.data.preprocessing import split_train_val
 import numpy as np
 import scipy
@@ -267,7 +268,6 @@ def _agreement_cond(edge):
       return self.get_labels(edge.src) == self.get_labels(edge.tgt)
 
     agreement_cond = _agreement_cond if label_must_match else lambda e: True
-
     return [
         e for e in self.edges if _labeled_cond(e.src, src_labeled) and
         _labeled_cond(e.tgt, tgt_labeled) and agreement_cond(e)
@@ -286,7 +286,6 @@ def __init__(self,
                test_mask,
                labels,
                row_normalize=False):
-
     # Extract train, val, test, unlabeled indices.
     train_indices = np.where(train_mask)[0]
     test_indices = np.where(test_mask)[0]
diff --git a/neural_structured_learning/research/gam/data/loaders.py b/neural_structured_learning/research/gam/data/loaders.py
@@ -21,6 +21,7 @@
 import logging
 import os
 import pickle
+
 import sys
 
 from gam.data.dataset import Dataset
diff --git a/neural_structured_learning/research/gam/experiments/run_train_gam.py b/neural_structured_learning/research/gam/experiments/run_train_gam.py
@@ -41,17 +41,21 @@
 
 FLAGS = flags.FLAGS
 flags.DEFINE_string(
-    'dataset_name', '',
+    'dataset_name', 'cifar10',
     'Dataset name. Supported options are: mnist, cifar10, cifar100, '
     'svhn_cropped, fashion_mnist.')
 flags.DEFINE_string(
     'data_source', 'tensorflow_datasets', 'Data source. Valid options are: '
     '`tensorflow_datasets`, `realistic_ssl`, `planetoid`.')
-flags.DEFINE_integer('target_num_train_per_class', 400,
-                     'Number of samples per class to use for training.')
-flags.DEFINE_integer('target_num_val', 1000,
-                     'Number of samples to be used for validation.')
-flags.DEFINE_integer('seed', 123, 'Seed used by the random number generators.')
+flags.DEFINE_integer(
+    'target_num_train_per_class', 400,
+    'Number of samples per class to use for training.')
+flags.DEFINE_integer(
+    'target_num_val', 1000,
+    'Number of samples to be used for validation.')
+flags.DEFINE_integer(
+    'seed', 123,
+    'Seed used by the random number generators.')
 flags.DEFINE_bool(
     'load_preprocessed', False,
     'Specifies whether to load data already preprocessed. If False, it reads'
@@ -222,6 +226,12 @@
     'num_pairs_reg', 128,
     'Number of pairs of nodes to use in the agreement loss term of the '
     'classification model.')
+flags.DEFINE_float(
+    'reg_weight_vat', 0.0,
+    'Regularization weight for the virtual adversarial training (VAT) loss.')
+flags.DEFINE_bool(
+    'use_ent_min', False,
+    'A boolean specifying whether to add entropy minimization to VAT.')
 flags.DEFINE_string(
     'aggregation_agr_inputs', 'dist',
     'Operation to apply on the pair of nodes in the agreement model. '
@@ -421,6 +431,8 @@ def main(argv):
       reg_weight_ll=FLAGS.reg_weight_ll,
       reg_weight_lu=FLAGS.reg_weight_lu,
       reg_weight_uu=FLAGS.reg_weight_uu,
+      reg_weight_vat=FLAGS.reg_weight_vat,
+      use_ent_min=FLAGS.use_ent_min,
       num_pairs_reg=FLAGS.num_pairs_reg,
       penalize_neg_agr=FLAGS.penalize_neg_agr,
       use_l2_cls=FLAGS.use_l2_cls,
diff --git a/neural_structured_learning/research/gam/experiments/run_train_gam_graph.py b/neural_structured_learning/research/gam/experiments/run_train_gam_graph.py
@@ -36,6 +36,7 @@
 import numpy as np
 import tensorflow as tf
 
+
 FLAGS = flags.FLAGS
 flags.DEFINE_string(
     'dataset_name', 'cora',
@@ -196,6 +197,12 @@
     'num_pairs_reg', 128,
     'Number of pairs of nodes to use in the agreement loss term of the '
     'classification model.')
+flags.DEFINE_float(
+    'reg_weight_vat', 0.0,
+    'Regularization weight for the virtual adversarial training (VAT) loss.')
+flags.DEFINE_bool(
+    'use_ent_min', False,
+    'A boolean specifying whether to add entropy minimization to VAT.')
 flags.DEFINE_string(
     'aggregation_agr_inputs', 'dist',
     'Operation to apply on the pair of nodes in the agreement model. '
@@ -291,6 +298,8 @@ def main(argv):
   model_name += '-perfCls' if FLAGS.use_perfect_classifier else ''
   model_name += '-keepProp' if FLAGS.keep_label_proportions else ''
   model_name += '-PenNegAgr' if FLAGS.penalize_neg_agr else ''
+  model_name += '-VAT' if FLAGS.reg_weight_vat > 0 else ''
+  model_name += 'ENT' if FLAGS.reg_weight_vat > 0 and FLAGS.use_ent_min else ''
   model_name += '-transd' if not FLAGS.inductive else ''
   model_name += '-L2' if FLAGS.use_l2_cls else '-CE'
   model_name += '-graph' if FLAGS.use_graph else '-noGraph'
@@ -380,6 +389,8 @@ def main(argv):
       reg_weight_lu=FLAGS.reg_weight_lu,
       reg_weight_uu=FLAGS.reg_weight_uu,
       num_pairs_reg=FLAGS.num_pairs_reg,
+      reg_weight_vat=FLAGS.reg_weight_vat,
+      use_ent_min=FLAGS.use_ent_min,
       penalize_neg_agr=FLAGS.penalize_neg_agr,
       use_l2_cls=FLAGS.use_l2_cls,
       first_iter_original=FLAGS.first_iter_original,
@@ -401,6 +412,5 @@ def main(argv):
   ############################################################################
   trainer.train(data)
 
-
 if __name__ == '__main__':
   app.run(main)
diff --git a/neural_structured_learning/research/gam/trainer/adversarial.py b/neural_structured_learning/research/gam/trainer/adversarial.py
@@ -0,0 +1,126 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utilities for virtual adversarial training."""
+import tensorflow as tf
+
+epsilon = 5
+num_power_iterations = 1
+xi = 1e-6
+scale_r = False
+
+
+def kl_divergence_with_logit(q_logit, p_logit):
+  """Computes KL-divergence between to sets of logits."""
+  q = tf.nn.softmax(q_logit)
+  qlogq = -tf.nn.softmax_cross_entropy_with_logits_v2(labels=q, logits=q_logit)
+  qlogp = -tf.nn.softmax_cross_entropy_with_logits_v2(labels=q, logits=p_logit)
+  return qlogq - qlogp
+
+
+def get_normalized_vector(d):
+  """Normalizes the providede input vector."""
+  d /= (1e-12 + tf.reduce_max(tf.abs(d), keep_dims=True))
+  d /= tf.sqrt(1e-6 + tf.reduce_sum(tf.pow(d, 2.0), keep_dims=True))
+  return d
+
+
+def get_normalizing_constant(d):
+  """Returns the normalizing constant to scale the VAT perturbation vector."""
+  c = 1e-12 + tf.reduce_max(tf.abs(d), keep_dims=True)
+  c *= tf.sqrt(1e-6 + tf.reduce_sum(tf.pow(d, 2.0), keep_dims=True))
+  return c
+
+
+def get_loss_vat(inputs, predictions, is_train, model, predictions_var_scope):
+  """Computes the virtual adversarial loss for the provided inputs.
+
+  Args:
+    inputs: A batch of input features, where the batch is the first dimension.
+    predictions: The logits predicted by a model on the provided inputs.
+    is_train: A boolean placeholder specifying if this is a training or testing
+      setting.
+    model: The model that generated the logits.
+    predictions_var_scope: Variable scope for obtaining the predictions.
+
+  Returns:
+    A float value representing the virtual adversarial loss.
+  """
+  r_vadv = generate_virtual_adversarial_perturbation(
+      inputs, predictions, model, predictions_var_scope, is_train=is_train)
+  predictions = tf.stop_gradient(predictions)
+  logit_p = predictions
+  new_inputs = tf.add(inputs, r_vadv)
+  with tf.variable_scope(
+      predictions_var_scope, auxiliary_name_scope=False, reuse=True):
+    encoding_m, _, _ = model.get_encoding_and_params(
+        inputs=new_inputs, is_train=is_train, update_batch_stats=False)
+    logit_m, _, _ = model.get_predictions_and_params(
+        encoding=encoding_m, is_train=is_train)
+  loss = kl_divergence_with_logit(logit_p, logit_m)
+  return tf.reduce_mean(loss)
+
+
+def generate_virtual_adversarial_perturbation(inputs,
+                                              logits,
+                                              model,
+                                              predictions_var_scope,
+                                              is_train=True):
+  """Generates an adversarial perturbation for virtual adversarial training.
+
+  Args:
+    inputs: A batch of input features, where the batch is the first dimension.
+    logits: The logits predicted by a model on the provided inputs.
+    model: The model that generated the logits.
+    predictions_var_scope: Variable scope for obtaining the predictions.
+    is_train: A boolean placeholder specifying if this is a training or testing
+      setting.
+
+  Returns:
+    A Tensor of the same shape as the inputs containing the adversarial
+    perturbation for these inputs.
+  """
+  d = tf.random_normal(shape=tf.shape(inputs))
+
+  for _ in range(num_power_iterations):
+    d = xi * get_normalized_vector(d)
+    logit_p = logits
+    with tf.variable_scope(
+        predictions_var_scope, auxiliary_name_scope=False, reuse=True):
+      encoding_m, _, _ = model.get_encoding_and_params(
+          inputs=d + inputs, is_train=is_train, update_batch_stats=False)
+      logit_m, _, _ = model.get_predictions_and_params(
+          encoding=encoding_m, is_train=is_train)
+    dist = kl_divergence_with_logit(logit_p, logit_m)
+    grad = tf.gradients(dist, [d], aggregation_method=2)[0]
+    d = tf.stop_gradient(grad)
+
+  r_vadv = get_normalized_vector(d)
+  if scale_r:
+    r_vadv *= get_normalizing_constant(inputs)
+  r_vadv *= epsilon
+  return r_vadv
+
+
+def entropy_y_x(logits):
+  """Entropy term to add to VAT with entropy minimization.
+
+  Args:
+    logits: A Tensor containing the predicted logits for a batch of samples.
+
+  Returns:
+    The entropy minimization loss.
+  """
+  p = tf.nn.softmax(logits)
+  return tf.reduce_mean(
+      tf.nn.softmax_cross_entropy_with_logits_v2(labels=p, logits=logits))
diff --git a/neural_structured_learning/research/gam/trainer/trainer_agreement.py b/neural_structured_learning/research/gam/trainer/trainer_agreement.py
@@ -442,7 +442,9 @@ def _eval_train(self, session, feed_dict):
       feed_dict: A train feed dictionary.
 
     Returns:
-      The computed train accuracy.
+      train_acc: The computed train accuracy.
+      acc_0: Accuracy for class 0.
+      acc_1: Accuracy for class 1.
     """
     train_acc, pred, targ = session.run(
         (self.accuracy, self.normalized_predictions, self.labels),
@@ -462,9 +464,7 @@ def _eval_train(self, session, feed_dict):
       acc_0 = sum(acc_0) / np.float32(len(acc_0))
     else:
       acc_0 = -1
-    logging.info('Train acc: %.2f. Acc class 1: %.2f. Acc class 0: %.2f',
-                 train_acc, acc_1, acc_0)
-    return train_acc
+    return train_acc, acc_0, acc_1
 
   def _eval_validation(self, data_iterator_val, num_samples_val, session):
     """Evaluate the current model on validation data.
@@ -685,7 +685,8 @@ def train(self, data, session=None, **kwargs):
         # Evaluate the accuracy on the latest train batch. We track this to make
         # sure the agreement model is able to fit the training data, but can be
         # eliminated if efficiency is an issue.
-        acc_train = self._eval_train(session, feed_dict)
+        acc_train, acc_0_train, acc_1_train = self._eval_train(
+            session, feed_dict)
 
         if self.enable_summaries:
           summary = tf.Summary()
@@ -700,9 +701,10 @@ def train(self, data, session=None, **kwargs):
           summary_writer.flush()
         if step % self.logging_step == 0 or val_acc > best_val_acc:
           logging.info(
-              'Agreement step %6d | Loss: %10.4f | val_acc: %10.4f |'
-              'random_acc: %10.4f | acc_train: %10.4f', step, loss_val, val_acc,
-              acc_random, acc_train)
+              'Agreement step %6d | Loss: %10.4f | val_acc: %.4f |'
+              'random_acc: %.4f | acc_train: %.4f | acc_train_cls_0: %.4f | '
+              'acc_train_cls_1: %.4f', step, loss_val, val_acc, acc_random,
+              acc_train, acc_0_train, acc_1_train)
         if val_acc > best_val_acc:
           best_val_acc = val_acc
           if self.checkpoint_path:
diff --git a/neural_structured_learning/research/gam/trainer/trainer_classification.py b/neural_structured_learning/research/gam/trainer/trainer_classification.py
diff --git a/neural_structured_learning/research/gam/trainer/trainer_cotrain.py b/neural_structured_learning/research/gam/trainer/trainer_cotrain.py