forked from PaddlePaddle/PaddleVideo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvideoswin_k400_videos.yaml
175 lines (168 loc) · 6.29 KB
/
videoswin_k400_videos.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
MODEL: #MODEL field
framework: "RecognizerTransformer" #Mandatory, indicate the type of network, associate to the 'paddlevideo/modeling/framework/' .
backbone: #Mandatory, indicate the type of backbone, associate to the 'paddlevideo/modeling/backbones/' .
name: "SwinTransformer3D" #Mandatory, The name of backbone.
pretrained: "data/SwinTransformer_imagenet.pdparams" #Optional, pretrained model path.
patch_size: [2, 4, 4]
embed_dim: 128
depths: [2, 2, 18, 2]
num_heads: [4, 8, 16, 32]
window_size: [8, 7, 7]
mlp_ratio: 4.
qkv_bias: True
qk_scale: None
drop_rate: 0.0
attn_drop_rate: 0.0
drop_path_rate: 0.2
patch_norm: True
head:
name: "I3DHead" #Mandatory, indicate the type of head, associate to the 'paddlevideo/modeling/heads'
num_classes: 400 #Optional, the number of classes to be classified.
in_channels: 1024 #input channel of the extracted feature.
spatial_type: 'avg'
drop_ratio: 0.5 #the ratio of dropout
std: 0.01 #std value in params initialization
runtime_cfg: # configuration used when the model is train or test.
test: # test config
num_seg: 32
avg_type: 'prob' # 'score' or 'prob'
DATASET: #DATASET field
batch_size: 1 #Mandatory, bacth size
num_workers: 4 #Mandatory, XXX the number of subprocess on each GPU.
test_batch_size: 1
train:
format: "VideoDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'
data_prefix: "data/k400/videos" #Mandatory, train data root path
file_path: "data/k400/train.list" #Mandatory, train data index file path
valid:
format: "VideoDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'
data_prefix: "data/k400/videos" #Mandatory, train data root path
file_path: "data/k400/val.list" #Mandatory, valid data index file path
test:
format: "VideoDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'
data_prefix: "data/k400/videos" #Mandatory, train data root path
file_path: "data/k400/val.list" #Mandatory, valid data index file path
PIPELINE: #PIPELINE field TODO.....
train: #Mandotary, indicate the pipeline to deal with the training data, associate to the 'paddlevideo/loader/pipelines/'
decode:
name: "VideoDecoder"
backend: 'decord'
mode: 'train'
sample:
name: "Sampler"
num_seg: 1
frame_interval: 2
seg_len: 32
valid_mode: False
use_pil: False
transform: #Mandotary, image transform operator.
- Scale:
short_size: 256
fixed_ratio: False
keep_ratio: True
backend: 'cv2'
do_round: True
- RandomResizedCrop:
backend: 'cv2'
- Scale:
short_size: 224
fixed_ratio: False
keep_ratio: False
backend: 'cv2'
do_round: True
- RandomFlip:
- Normalization:
mean: [123.675, 116.28, 103.53]
std: [58.395, 57.12, 57.375]
tensor_shape: [3, 1, 1, 1]
inplace: True
- Image2Array:
data_format: 'cthw'
valid: #Mandatory, indicate the pipeline to deal with the validing data. associate to the 'paddlevideo/loader/pipelines/'
decode:
name: "VideoDecoder"
backend: 'decord'
mode: 'valid'
sample:
name: "Sampler"
num_seg: 1
frame_interval: 2
seg_len: 32
valid_mode: True
use_pil: False
transform: #Mandotary, image transform operator.
- Scale:
short_size: 256
fixed_ratio: False
keep_ratio: True
backend: 'cv2'
do_round: True
- CenterCrop:
target_size: 224
do_round: False
backend: 'cv2'
- Normalization:
mean: [123.675, 116.28, 103.53]
std: [58.395, 57.12, 57.375]
tensor_shape: [3, 1, 1, 1]
inplace: True
- Image2Array:
data_format: 'cthw'
test:
decode:
name: "VideoDecoder"
backend: 'decord'
mode: 'valid'
sample:
name: "Sampler"
num_seg: 4
frame_interval: 2
seg_len: 32
valid_mode: True
use_pil: False
transform: #Mandotary, image transform operator.
- Scale:
short_size: 224
fixed_ratio: False
keep_ratio: True
backend: 'cv2'
do_round: True
- UniformCrop:
target_size: 224
backend: 'cv2'
- Normalization:
mean: [123.675, 116.28, 103.53]
std: [58.395, 57.12, 57.375]
tensor_shape: [3, 1, 1, 1]
inplace: True
- Image2Array:
data_format: 'cthw'
OPTIMIZER: #OPTIMIZER field
name: 'AdamW' #Mandatory, the type of optimizer, associate to the 'paddlevideo/solver/'
beta1: 0.9
beta2: 0.999
no_weight_decay_name: 'norm relative_position_bias_table'
learning_rate: #Mandatory, the type of learning rate scheduler, associate to the 'paddlevideo/solver/'
name: 'CustomWarmupCosineStepDecay'
iter_step: True
warmup_iters: 2.5
warmup_ratio: 0.1
min_lr: 0
base_lr: 3e-5
max_epoch: 30
weight_decay: 0.05
METRIC:
name: 'CenterCropMetric'
GRADIENT_ACCUMULATION:
global_batch_size: 64 # Specify the sum of batches to be calculated by all GPUs
INFERENCE:
name: 'VideoSwin_Inference_helper'
num_seg: 1
seg_len: 32
short_size: 256
target_size: 224
model_name: "VideoSwin"
log_interval: 20 #Optional, the interal of logger, default:10
save_interval: 5
epochs: 30 #Mandatory, total epoch
log_level: "INFO" #Optional, the logger level. default: "INFO"