From 404d9c09c106c43e6a20e98c33111f7d77589f6f Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 6 Feb 2023 09:56:12 +0000 Subject: [PATCH] tmp ada --- config.py | 2 +- models/swin_transformer_v2.py | 2 +- train.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config.py b/config.py index 767459f87..f336815d0 100644 --- a/config.py +++ b/config.py @@ -146,7 +146,7 @@ # ----------------------------------------------------------------------------- _C.TRAIN = CN() _C.TRAIN.START_EPOCH = 0 -_C.TRAIN.EPOCHS = 300 +_C.TRAIN.EPOCHS = 90 _C.TRAIN.WARMUP_EPOCHS = 20 _C.TRAIN.WEIGHT_DECAY = 0.05 _C.TRAIN.BASE_LR = 5e-4 diff --git a/models/swin_transformer_v2.py b/models/swin_transformer_v2.py index a429d0a2c..bf1a20d24 100644 --- a/models/swin_transformer_v2.py +++ b/models/swin_transformer_v2.py @@ -153,7 +153,7 @@ def forward(self, x, mask=None): # cosine attention attn = (F.normalize(q, dim=-1) @ F.normalize(k, dim=-1).transpose(-2, -1)) - logit_scale = torch.clamp(self.logit_scale, max=torch.log(torch.tensor(1. / 0.01))).exp() + logit_scale = torch.clamp(self.logit_scale, max=torch.log(torch.tensor(1. / 0.01, device = "cuda"))).exp() attn = attn * logit_scale relative_position_bias_table = self.cpb_mlp(self.relative_coords_table).view(-1, self.num_heads) diff --git a/train.sh b/train.sh index 88805ed4e..6535ece63 100644 --- a/train.sh +++ b/train.sh @@ -1 +1 @@ -horovodrun -np 4 python main_hvd.py --cfg configs/swinv2/swinv2_base_patch4_window8_128_hvd.yaml --data-path ../../datasets/tiny-imagenet-200 --batch-size 64 --disable_amp --local_rank 0 +horovodrun -np 8 python main_hvd.py --cfg configs/swinv2/swinv2_base_patch4_window8_128_hvd.yaml --data-path ../tiny-imagenet-200 --batch-size 32 --disable_amp --local_rank 0