Skip to content

Commit a7dab07

Browse files
committed
Merge remote-tracking branch 'origin/master'
# Conflicts: # requirements.txt
2 parents b1c453e + 304e361 commit a7dab07

File tree

6 files changed

+18
-4
lines changed

6 files changed

+18
-4
lines changed

BERT.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ def __init__(self, model_dim, max_len, n_layer, n_head, n_vocab, lr, max_seg=3,
1313
super().__init__(model_dim, max_len, n_layer, n_head, n_vocab, lr, max_seg, drop_rate, padding_idx)
1414
# I think task emb is not necessary for pretraining,
1515
# because the aim of all tasks is to train a universal sentence embedding
16-
# the body encoder is the same across all task, and the output layer defines each task.
16+
# the body encoder is the same across all tasks,
17+
# and different output layer defines different task just like transfer learning.
1718
# finetuning replaces output layer and leaves the body encoder unchanged.
1819

1920
# self.task_emb = keras.layers.Embedding(
@@ -126,6 +127,7 @@ def export_attention(model, data, name="bert"):
126127

127128

128129
if __name__ == "__main__":
130+
utils.set_soft_gpu(True)
129131
MODEL_DIM = 256
130132
N_LAYER = 4
131133
LEARNING_RATE = 1e-4

BERT_window_mask.py

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def mask(self, seqs):
2929

3030

3131
if __name__ == "__main__":
32+
# utils.set_soft_gpu(True)
3233
MODEL_DIM = 256
3334
N_LAYER = 4
3435
LEARNING_RATE = 1e-4

GPT.py

+1
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def export_attention(model, data, name="gpt"):
115115

116116

117117
if __name__ == "__main__":
118+
# utils.set_soft_gpu(True)
118119
MODEL_DIM = 256
119120
N_LAYER = 4
120121
LEARNING_RATE = 1e-4

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ matplotlib==3.2.1
22
numpy==1.18.5
33
pandas==1.0.4
44
requests==2.23.0
5-
tensorflow==2.2.0
5+
tensorflow==2.3.1
66
tensorflow-addons==0.10.0

utils.py

+10
Original file line numberDiff line numberDiff line change
@@ -246,3 +246,13 @@ def process_w2v_data(corpus, skip_window=2, method="skip_gram"):
246246
return Dataset(x, y, v2i, i2v)
247247

248248

249+
def set_soft_gpu(soft_gpu):
250+
import tensorflow as tf
251+
if soft_gpu:
252+
gpus = tf.config.experimental.list_physical_devices('GPU')
253+
if gpus:
254+
# Currently, memory growth needs to be the same across GPUs
255+
for gpu in gpus:
256+
tf.config.experimental.set_memory_growth(gpu, True)
257+
logical_gpus = tf.config.experimental.list_logical_devices('GPU')
258+
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")

visual.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ def self_attention_line(bert_or_gpt="bert", case=0):
291291
# transformer_attention_matrix(case=0)
292292
# transformer_attention_line(case=0)
293293

294-
model = ["gpt", "bert", "bert_window_mask"][1]
295-
case = 7
294+
model = ["gpt", "bert", "bert_window_mask"][2]
295+
case = 4
296296
self_attention_matrix(model, case=case)
297297
self_attention_line(model, case=case)

0 commit comments

Comments
 (0)