Skip to content

Commit d8418ff

Browse files
SunMarczucchini-nlp
authored andcommitted
Revert "Fix DeepSpeed mixed precision precedence over Accelerate defaults" (huggingface#41124)
* Revert "Fix DeepSpeed mixed precision precedence over Accelerate defaults (#3…" This reverts commit df67cd3. * fix
1 parent 7f1eb07 commit d8418ff

File tree

3 files changed

+7
-105
lines changed

3 files changed

+7
-105
lines changed

src/transformers/integrations/deepspeed.py

Lines changed: 0 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -130,58 +130,11 @@ def fill_match(self, ds_key_long, hf_val, hf_key=None, must_match=True):
130130

131131
fill_only = partialmethod(fill_match, must_match=False)
132132

133-
def override_training_args_from_deepspeed(self, args):
134-
"""
135-
Override TrainingArguments based on DeepSpeed config values to ensure compatibility.
136-
137-
This method ensures that the DeepSpeed config takes precedence over TrainingArguments
138-
defaults when there are conflicts, particularly for mixed precision settings.
139-
140-
Args:
141-
args: TrainingArguments object to potentially modify
142-
"""
143-
# Check precision settings in DeepSpeed config and override TrainingArguments accordingly
144-
# Only override defaults, not explicit user settings
145-
146-
# Check if user explicitly set precision options (we assume defaults are False)
147-
user_set_fp16 = args.fp16 is True
148-
user_set_bf16 = args.bf16 is True
149-
150-
if self.is_true("fp16.enabled"):
151-
# DeepSpeed config explicitly enables fp16
152-
if not user_set_fp16 and not user_set_bf16:
153-
# User didn't explicitly set either, so apply DeepSpeed config
154-
args.fp16 = True
155-
args.bf16 = False
156-
elif user_set_bf16 and not user_set_fp16:
157-
# User explicitly chose bf16, but DeepSpeed config wants fp16
158-
# This is a potential conflict - let user choice win but log a warning
159-
pass # Keep user's bf16=True, fp16=False
160-
elif self.is_true("bf16.enabled"):
161-
# DeepSpeed config explicitly enables bf16
162-
if not user_set_fp16 and not user_set_bf16:
163-
# User didn't explicitly set either, so apply DeepSpeed config
164-
args.bf16 = True
165-
args.fp16 = False
166-
elif user_set_fp16 and not user_set_bf16:
167-
# User explicitly chose fp16, but DeepSpeed config wants bf16
168-
# This is a potential conflict - let user choice win but log a warning
169-
pass # Keep user's fp16=True, bf16=False
170-
elif self.is_false("fp16.enabled") and self.is_false("bf16.enabled"):
171-
# Both are explicitly disabled in DeepSpeed config
172-
if not user_set_fp16 and not user_set_bf16:
173-
# User didn't explicitly set either, so apply DeepSpeed config (fp32)
174-
args.fp16 = False
175-
args.bf16 = False
176-
177133
def trainer_config_process(self, args, auto_find_batch_size=False):
178134
"""
179135
Adjust the config with `TrainingArguments` values. This stage is run during `TrainingArguments` object
180136
creation.
181137
"""
182-
# First, override TrainingArguments based on DeepSpeed config to ensure compatibility
183-
self.override_training_args_from_deepspeed(args)
184-
185138
# DeepSpeed does:
186139
# train_batch_size = world_size * train_micro_batch_size_per_gpu * gradient_accumulation_steps
187140
train_batch_size = args.world_size * args.per_device_train_batch_size * args.gradient_accumulation_steps

src/transformers/training_args.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1688,8 +1688,13 @@ def __post_init__(self):
16881688
torch.backends.cudnn.allow_tf32 = False
16891689
# no need to assert on else
16901690

1691-
# NOTE: Mixed precision environment variable setting moved to after DeepSpeed processing
1692-
# to ensure DeepSpeed config can override TrainingArguments defaults
1691+
# if training args is specified, it will override the one specified in the accelerate config
1692+
mixed_precision_dtype = os.environ.get("ACCELERATE_MIXED_PRECISION", "no")
1693+
if self.fp16:
1694+
mixed_precision_dtype = "fp16"
1695+
elif self.bf16:
1696+
mixed_precision_dtype = "bf16"
1697+
os.environ["ACCELERATE_MIXED_PRECISION"] = mixed_precision_dtype
16931698

16941699
if self.report_to is None:
16951700
logger.info(
@@ -1876,15 +1881,6 @@ def __post_init__(self):
18761881
self.deepspeed_plugin.set_mixed_precision(mixed_precision)
18771882
self.deepspeed_plugin.set_deepspeed_weakref()
18781883

1879-
# Set mixed precision environment variable after DeepSpeed processing
1880-
# This ensures DeepSpeed config overrides have been applied to fp16/bf16 settings
1881-
mixed_precision_dtype = os.environ.get("ACCELERATE_MIXED_PRECISION", "no")
1882-
if self.fp16:
1883-
mixed_precision_dtype = "fp16"
1884-
elif self.bf16:
1885-
mixed_precision_dtype = "bf16"
1886-
os.environ["ACCELERATE_MIXED_PRECISION"] = mixed_precision_dtype
1887-
18881884
if self.use_cpu:
18891885
self.dataloader_pin_memory = False
18901886

tests/deepspeed/test_deepspeed.py

Lines changed: 0 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1431,50 +1431,3 @@ def test_clm_from_config_zero3_fp16(self):
14311431
with CaptureStderr() as cs:
14321432
execute_subprocess_async(cmd, env=self.get_env())
14331433
self.assertIn("Detected DeepSpeed ZeRO-3", cs.err)
1434-
1435-
1436-
@require_deepspeed
1437-
class TestDeepSpeedMixedPrecisionPrecedence(TestCasePlus):
1438-
"""Test DeepSpeed mixed precision precedence over Accelerate defaults."""
1439-
1440-
def setUp(self):
1441-
super().setUp()
1442-
unset_hf_deepspeed_config()
1443-
1444-
def tearDown(self):
1445-
super().tearDown()
1446-
unset_hf_deepspeed_config()
1447-
1448-
def test_deepspeed_fp16_overrides_defaults(self):
1449-
"""Test that DeepSpeed fp16 config overrides TrainingArguments defaults"""
1450-
from transformers.integrations.deepspeed import HfTrainerDeepSpeedConfig
1451-
1452-
args = TrainingArguments(output_dir="./test_output", fp16=False, bf16=False)
1453-
ds_config = {"fp16": {"enabled": True}, "bf16": {"enabled": False}, "zero_optimization": {"stage": 2}}
1454-
hf_ds_config = HfTrainerDeepSpeedConfig(ds_config)
1455-
hf_ds_config.trainer_config_process(args)
1456-
self.assertTrue(args.fp16)
1457-
self.assertFalse(args.bf16)
1458-
1459-
def test_deepspeed_bf16_overrides_defaults(self):
1460-
"""Test that DeepSpeed bf16 config overrides TrainingArguments defaults"""
1461-
from transformers.integrations.deepspeed import HfTrainerDeepSpeedConfig
1462-
1463-
args = TrainingArguments(output_dir="./test_output", fp16=False, bf16=False)
1464-
ds_config = {"fp16": {"enabled": False}, "bf16": {"enabled": True}, "zero_optimization": {"stage": 2}}
1465-
hf_ds_config = HfTrainerDeepSpeedConfig(ds_config)
1466-
hf_ds_config.trainer_config_process(args)
1467-
self.assertTrue(args.bf16)
1468-
self.assertFalse(args.fp16)
1469-
1470-
def test_user_explicit_settings_preserved(self):
1471-
"""Test that explicit user settings are preserved over DeepSpeed config"""
1472-
from transformers.integrations.deepspeed import HfTrainerDeepSpeedConfig
1473-
1474-
args = TrainingArguments(output_dir="./test_output", fp16=True, bf16=False) # User explicit
1475-
ds_config = {"fp16": {"enabled": False}, "bf16": {"enabled": True}, "zero_optimization": {"stage": 2}}
1476-
hf_ds_config = HfTrainerDeepSpeedConfig(ds_config)
1477-
hf_ds_config.trainer_config_process(args)
1478-
# User's explicit choice should be preserved
1479-
self.assertTrue(args.fp16)
1480-
self.assertFalse(args.bf16)

0 commit comments

Comments
 (0)