Merge remote-tracking branch 'origin/master'

MorvanZhou · MorvanZhou · commit a7dab0721020 · 2020-12-05T20:55:12.000+08:00
# Conflicts:
#	requirements.txt
diff --git a/BERT.py b/BERT.py
@@ -13,7 +13,8 @@ def __init__(self, model_dim, max_len, n_layer, n_head, n_vocab, lr, max_seg=3,
         super().__init__(model_dim, max_len, n_layer, n_head, n_vocab, lr, max_seg, drop_rate, padding_idx)
         # I think task emb is not necessary for pretraining,
         # because the aim of all tasks is to train a universal sentence embedding
-        # the body encoder is the same across all task, and the output layer defines each task.
+        # the body encoder is the same across all tasks,
+        # and different output layer defines different task just like transfer learning.
         # finetuning replaces output layer and leaves the body encoder unchanged.
 
         # self.task_emb = keras.layers.Embedding(
@@ -126,6 +127,7 @@ def export_attention(model, data, name="bert"):
 
 
 if __name__ == "__main__":
+    utils.set_soft_gpu(True)
     MODEL_DIM = 256
     N_LAYER = 4
     LEARNING_RATE = 1e-4
diff --git a/BERT_window_mask.py b/BERT_window_mask.py
@@ -29,6 +29,7 @@ def mask(self, seqs):
 
 
 if __name__ == "__main__":
+    # utils.set_soft_gpu(True)
     MODEL_DIM = 256
     N_LAYER = 4
     LEARNING_RATE = 1e-4
diff --git a/GPT.py b/GPT.py
@@ -115,6 +115,7 @@ def export_attention(model, data, name="gpt"):
 
 
 if __name__ == "__main__":
+    # utils.set_soft_gpu(True)
     MODEL_DIM = 256
     N_LAYER = 4
     LEARNING_RATE = 1e-4
diff --git a/requirements.txt b/requirements.txt
@@ -2,5 +2,5 @@ matplotlib==3.2.1
 numpy==1.18.5
 pandas==1.0.4
 requests==2.23.0
-tensorflow==2.2.0
+tensorflow==2.3.1
 tensorflow-addons==0.10.0
diff --git a/utils.py b/utils.py
@@ -246,3 +246,13 @@ def process_w2v_data(corpus, skip_window=2, method="skip_gram"):
     return Dataset(x, y, v2i, i2v)
 
 
+def set_soft_gpu(soft_gpu):
+    import tensorflow as tf
+    if soft_gpu:
+        gpus = tf.config.experimental.list_physical_devices('GPU')
+        if gpus:
+            # Currently, memory growth needs to be the same across GPUs
+            for gpu in gpus:
+                tf.config.experimental.set_memory_growth(gpu, True)
+            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
+            print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
diff --git a/visual.py b/visual.py
@@ -291,7 +291,7 @@ def self_attention_line(bert_or_gpt="bert", case=0):
     # transformer_attention_matrix(case=0)
     # transformer_attention_line(case=0)
 
-    model = ["gpt", "bert", "bert_window_mask"][1]
-    case = 7
+    model = ["gpt", "bert", "bert_window_mask"][2]
+    case = 4
     self_attention_matrix(model, case=case)
     self_attention_line(model, case=case)