Skip to content

Commit

Permalink
⚗️ Style Transfer: add additional ds experiments
Browse files Browse the repository at this point in the history
  • Loading branch information
simonmeoni committed Sep 4, 2024
1 parent 0dd65b8 commit 5e19db8
Show file tree
Hide file tree
Showing 43 changed files with 640 additions and 322 deletions.
Original file line number Diff line number Diff line change
@@ -1,35 +1,36 @@
# @package _global_
defaults:
- default
- _self_
- default

dataset:
_target_: datasets.Dataset
name: 0.04-0
topk: 20
percentile: 10
precision: true
name: 0.06-2-ofzh3aqu
random_sampling: true
percentile: 0
precision: true
topk: 100

training_args:
_target_: transformers.TrainingArguments
per_device_train_batch_size: 4
gradient_accumulation_steps: 4
logging_steps: 20
warmup_steps: 50
eval_steps: 100
evaluation_strategy: "steps"
remove_unused_columns: true
save_strategy: "no"
output_dir: "models/icd"
lr_scheduler_type: "constant"
num_train_epochs: 8
num_train_epochs: 10
learning_rate: 2e-5

wandb_project: style-transfer-baseline-icd
wandb_project: style-transfer-icd-seed
model: microsoft/deberta-v3-base
seed: 0
hydra:
sweeper:
params:
dataset.topk: 20,50,100,400
dataset.precision: true
model: microsoft/deberta-v3-base
dataset.name: 0.06-2-ofzh3aqu
dataset.random_sampling: true, false
dataset.percentile: 20
seed: 0, 1, 2, 3, 4
35 changes: 35 additions & 0 deletions lib/style-transfer/configs/ds_stream/ablation_icd/40.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# @package _global_
defaults:
- _self_

dataset:
name: 0.06-2-ofzh3aqu
random_sampling: true
percentile: 0
precision: true
topk: 100

training_args:
_target_: transformers.TrainingArguments
per_device_train_batch_size: 4
gradient_accumulation_steps: 4
logging_steps: 20
warmup_steps: 50
eval_steps: 100
evaluation_strategy: "steps"
remove_unused_columns: true
save_strategy: "no"
output_dir: "models/icd"
num_train_epochs: 10
learning_rate: 2e-5

wandb_project: style-transfer-icd-seed
model: microsoft/deberta-v3-base
seed: 0
hydra:
sweeper:
params:
dataset.name: 0.06-2-ofzh3aqu
dataset.random_sampling: true, false
dataset.percentile: 40
seed: 0, 1, 2, 3, 4
35 changes: 35 additions & 0 deletions lib/style-transfer/configs/ds_stream/ablation_icd/60.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# @package _global_
defaults:
- _self_

dataset:
name: 0.06-2-ofzh3aqu
random_sampling: true
percentile: 0
precision: true
topk: 100

training_args:
_target_: transformers.TrainingArguments
per_device_train_batch_size: 4
gradient_accumulation_steps: 4
logging_steps: 20
warmup_steps: 50
eval_steps: 100
evaluation_strategy: "steps"
remove_unused_columns: true
save_strategy: "no"
output_dir: "models/icd"
num_train_epochs: 10
learning_rate: 2e-5

wandb_project: style-transfer-icd-seed
model: microsoft/deberta-v3-base
seed: 0
hydra:
sweeper:
params:
dataset.name: 0.06-2-ofzh3aqu
dataset.random_sampling: true, false
dataset.percentile: 60
seed: 0, 1, 2, 3, 4
35 changes: 35 additions & 0 deletions lib/style-transfer/configs/ds_stream/ablation_icd/80.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# @package _global_
defaults:
- _self_

dataset:
name: 0.06-2-ofzh3aqu
random_sampling: true
percentile: 0
precision: true
topk: 100

training_args:
_target_: transformers.TrainingArguments
per_device_train_batch_size: 4
gradient_accumulation_steps: 4
logging_steps: 20
warmup_steps: 50
eval_steps: 100
evaluation_strategy: "steps"
remove_unused_columns: true
save_strategy: "no"
output_dir: "models/icd"
num_train_epochs: 10
learning_rate: 2e-5

wandb_project: style-transfer-icd-seed
model: microsoft/deberta-v3-base
seed: 0
hydra:
sweeper:
params:
dataset.name: 0.06-2-ofzh3aqu
dataset.random_sampling: true, false
dataset.percentile: 80
seed: 0, 1, 2, 3, 4
32 changes: 32 additions & 0 deletions lib/style-transfer/configs/ds_stream/ablation_icd/default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# @package _global_
dataset:
name: 0.06-2-ofzh3aqu
random_sampling: true
percentile: 0
precision: true
topk: 100

training_args:
_target_: transformers.TrainingArguments
per_device_train_batch_size: 4
gradient_accumulation_steps: 4
logging_steps: 20
warmup_steps: 50
eval_steps: 100
evaluation_strategy: "steps"
remove_unused_columns: true
save_strategy: "no"
output_dir: "models/icd"
num_train_epochs: 10
learning_rate: 2e-5

wandb_project: style-transfer-icd-seed
model: microsoft/deberta-v3-base
seed: 0
hydra:
sweeper:
params:
dataset.name: 0.06-2-ofzh3aqu
dataset.random_sampling: true, false
dataset.percentile: ???
seed: 0, 1, 2, 3, 4
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
# @package _global_
defaults:
- default
- _self_

dataset:
_target_: datasets.Dataset
name: all
name: 0.04-0
topk: 20
percentile: 0
precision: true
Expand All @@ -16,18 +15,22 @@ training_args:
per_device_train_batch_size: 4
gradient_accumulation_steps: 4
logging_steps: 20
warmup_steps: 50
eval_steps: 100
evaluation_strategy: "steps"
remove_unused_columns: true
save_strategy: "no"
output_dir: "models/icd"
lr_scheduler_type: "constant"
num_train_epochs: 8
output_dir: "models/ner"
num_train_epochs: 10
learning_rate: 2e-5

wandb_project: style-transfer-ablation-all-percentile-1
wandb_project: style-transfer-ner-seed
model: microsoft/deberta-v3-base
seed: 0
hydra:
sweeper:
params:
dataset.topk: 20,50,100,400
dataset.name: 0.06-2-ofzh3aqu
dataset.random_sampling: true, false
dataset.percentile: 20
seed: 0, 1, 2, 3, 4
36 changes: 36 additions & 0 deletions lib/style-transfer/configs/ds_stream/ablation_ner/40.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# @package _global_
defaults:
- _self_

dataset:
_target_: datasets.Dataset
name: 0.04-0
topk: 20
percentile: 0
precision: true
random_sampling: false

training_args:
_target_: transformers.TrainingArguments
per_device_train_batch_size: 4
gradient_accumulation_steps: 4
logging_steps: 20
warmup_steps: 50
eval_steps: 100
evaluation_strategy: "steps"
remove_unused_columns: true
save_strategy: "no"
output_dir: "models/ner"
num_train_epochs: 10
learning_rate: 2e-5

wandb_project: style-transfer-ner-seed
model: microsoft/deberta-v3-base
seed: 0
hydra:
sweeper:
params:
dataset.name: 0.06-2-ofzh3aqu
dataset.random_sampling: true, false
dataset.percentile: 20
seed: 0, 1, 2, 3, 4
36 changes: 36 additions & 0 deletions lib/style-transfer/configs/ds_stream/ablation_ner/60.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# @package _global_
defaults:
- _self_

dataset:
_target_: datasets.Dataset
name: 0.04-0
topk: 20
percentile: 0
precision: true
random_sampling: false

training_args:
_target_: transformers.TrainingArguments
per_device_train_batch_size: 4
gradient_accumulation_steps: 4
logging_steps: 20
warmup_steps: 50
eval_steps: 100
evaluation_strategy: "steps"
remove_unused_columns: true
save_strategy: "no"
output_dir: "models/ner"
num_train_epochs: 10
learning_rate: 2e-5

wandb_project: style-transfer-ner-seed
model: microsoft/deberta-v3-base
seed: 0
hydra:
sweeper:
params:
dataset.name: 0.06-2-ofzh3aqu
dataset.random_sampling: true, false
dataset.percentile: 20
seed: 0, 1, 2, 3, 4
36 changes: 36 additions & 0 deletions lib/style-transfer/configs/ds_stream/ablation_ner/80.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# @package _global_
defaults:
- _self_

dataset:
_target_: datasets.Dataset
name: 0.04-0
topk: 20
percentile: 0
precision: true
random_sampling: false

training_args:
_target_: transformers.TrainingArguments
per_device_train_batch_size: 4
gradient_accumulation_steps: 4
logging_steps: 20
warmup_steps: 50
eval_steps: 100
evaluation_strategy: "steps"
remove_unused_columns: true
save_strategy: "no"
output_dir: "models/ner"
num_train_epochs: 10
learning_rate: 2e-5

wandb_project: style-transfer-ner-seed
model: microsoft/deberta-v3-base
seed: 0
hydra:
sweeper:
params:
dataset.name: 0.06-2-ofzh3aqu
dataset.random_sampling: true, false
dataset.percentile: 20
seed: 0, 1, 2, 3, 4
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
# @package _global_
defaults:
- default
- _self_

dataset:
_target_: datasets.Dataset
name: 0.04-0
Expand All @@ -27,11 +23,13 @@ training_args:
num_train_epochs: 8
learning_rate: 2e-5

wandb_project: style-transfer-icd-5
wandb_project: style-transfer-icd-seed
model: microsoft/deberta-v3-base
seed: 0
hydra:
sweeper:
params:
dataset.precision: true
dataset.name: gold, 0.06-0, 0.06-1-ofzh3aqu, 0.06-2-ofzh3aqu, 0.04-1-mru97w7c, 0.04-2-mru97w7c
dataset.name: combined
model: microsoft/deberta-v3-base
seed: 0, 1, 2, 3, 4
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
# @package _global_
defaults:
- default
- _self_

dataset:
_target_: datasets.Dataset
name: 0.04-0
Expand All @@ -27,11 +23,13 @@ training_args:
num_train_epochs: 8
learning_rate: 2e-5

wandb_project: style-transfer-icd-5
wandb_project: style-transfer-icd-seed
model: microsoft/deberta-v3-base
seed: 0
hydra:
sweeper:
params:
dataset.precision: true
dataset.name: gold, 0.06-0, 0.06-1-ofzh3aqu, 0.06-2-ofzh3aqu, 0.04-1-mru97w7c, 0.04-2-mru97w7c
dataset.name: combined
model: microsoft/deberta-v3-base
seed: 0, 1, 2, 3, 4
Loading

0 comments on commit 5e19db8

Please sign in to comment.