Skip to content

Commit 08819c7

Browse files
authored
Merge branch 'main' into xccl/new_api
2 parents 1150063 + c967dbe commit 08819c7

File tree

4 files changed

+144
-6
lines changed

4 files changed

+144
-6
lines changed

test/xpu/run_distributed.py

+26-6
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,42 @@
22
import subprocess
33
import sys
44

5+
from skip_list_dist import skip_dict
6+
from xpu_test_utils import launch_test
57

8+
res = 0
9+
res2 = 0
10+
fail_test = []
11+
12+
13+
# run python test
614
def run(test_command):
715
result = subprocess.run(test_command, capture_output=True, text=True)
816
print(result.stdout)
917
print(result.stderr)
1018
if "FAILED" in result.stdout or "FAILED" in result.stderr:
11-
return 0
12-
else:
13-
return 1
19+
fail_test.append(" ".join(test_command))
20+
return result.returncode
1421

1522

16-
res = 0
1723
test_command = ["python", "distributed/test_c10d_ops_xccl.py"]
1824
res += run(test_command)
1925
test_command = ["python", "distributed/test_c10d_xccl.py"]
2026
res += run(test_command)
2127

22-
exit_code = os.WEXITSTATUS(res)
23-
sys.exit(exit_code)
28+
# run pytest with skiplist
29+
for key in skip_dict:
30+
skip_list = skip_dict[key]
31+
fail = launch_test(key, skip_list)
32+
res2 += fail
33+
if fail:
34+
fail_test.append(key)
35+
36+
if fail_test:
37+
print(",".join(fail_test) + " have failures")
38+
39+
exit_code = os.WEXITSTATUS(res2)
40+
if exit_code == 0:
41+
sys.exit(res)
42+
else:
43+
sys.exit(exit_code)

test/xpu/skip_list_common.py

+18
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,13 @@
773773
"test_scaled_dot_product_attention_3D_input_dim_2D_attn_mask_dropout_p_0_5_xpu",
774774
"test_scaled_dot_product_attention_3D_input_dim_2D_attn_mask_dropout_p_0_2_xpu",
775775
"test_scaled_dot_product_attention_3D_input_dim_2D_attn_mask_dropout_p_0_0_xpu",
776+
# https://github.com/intel/torch-xpu-ops/issues/1432
777+
"test_multiheadattention_fastpath_attn_mask_attn_mask_dim_2_key_padding_mask_dim_2_bool_xpu",
778+
"test_multiheadattention_fastpath_attn_mask_attn_mask_dim_3_key_padding_mask_dim_2_bool_xpu",
779+
"test_transformerencoder_fastpath_use_torchscript_False_enable_nested_tensor_False_use_autocast_False_d_model_12_xpu",
780+
"test_transformerencoder_fastpath_use_torchscript_False_enable_nested_tensor_False_use_autocast_True_d_model_12_xpu",
781+
"test_transformerencoder_fastpath_use_torchscript_False_enable_nested_tensor_True_use_autocast_False_d_model_12_xpu",
782+
"test_transformerencoder_fastpath_use_torchscript_False_enable_nested_tensor_True_use_autocast_True_d_model_12_xpu",
776783
),
777784
"test_complex_xpu.py": None,
778785
"test_modules_xpu.py": (
@@ -1027,6 +1034,10 @@
10271034
"test_ctc_loss_cudnn_tensor", # want "xpu" in function name
10281035
# RuntimeError: reflection_pad2d_backward_xpu does not have a deterministic implementation, but you set 'torch.use_deterministic_algorithms(True)'.
10291036
"test_ReflectionPad2d_large_deterministic_xpu",
1037+
# Case updated in pytorch commit 97272e4
1038+
"test_hardswish_grad_corner_xpu_bfloat16",
1039+
"test_hardswish_grad_corner_xpu_float16",
1040+
"test_hardswish_grad_corner_xpu_float32",
10301041
),
10311042
"test_indexing_xpu.py": (
10321043
# XPU implementation doesn't claimn FP8 now
@@ -1466,6 +1477,13 @@
14661477
"test_compile_int4_mm_m_64_k_32_n_64_xpu",
14671478
"test_compile_int4_mm_m_64_k_64_n_48_xpu",
14681479
"test_compile_int4_mm_m_64_k_64_n_64_xpu",
1480+
# float8 is not supported
1481+
"test_matmul_scaled_gemm_offline_tunableop_xpu_float8_e4m3fnuz",
1482+
"test_matmul_scaled_gemm_offline_tunableop_xpu_float8_e5m2fnuz",
1483+
"test_scaled_gemm_offline_tunableop_xpu_float8_e4m3fnuz",
1484+
"test_scaled_gemm_offline_tunableop_xpu_float8_e5m2fnuz",
1485+
# case need to port for xpu
1486+
"test_gemm_bias_offline_tunableop_xpu_bfloat16",
14691487
),
14701488
"test_ops_fwd_gradients_xpu.py": (
14711489
# All of the followings are oneDNN issues

test/xpu/skip_list_dist.py

+95
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
skip_dict = {
2+
"../../../../test/distributed/fsdp/test_fsdp_checkpoint.py": (
3+
"test_checkpoint_fsdp_wrapping_cpu_offload0_offload_activations_False_use_orig_params_False",
4+
"test_checkpoint_fsdp_wrapping_cpu_offload1_offload_activations_False_use_orig_params_False",
5+
"test_checkpoint_fsdp_wrapping_cpu_offload1_offload_activations_True_use_orig_params_False",
6+
"test_checkpoint_submodule_use_reentrant_False_xpu",
7+
),
8+
"../../../../test/distributed/fsdp/test_fsdp_apply.py": None,
9+
"../../../../test/distributed/fsdp/test_fsdp_clip_grad_norm.py": (
10+
"test_ddp_parity_xpu",
11+
),
12+
"../../../../test/distributed/fsdp/test_fsdp_comm.py": None,
13+
"../../../../test/distributed/fsdp/test_fsdp_core.py": (
14+
"test_delayed_optim_step_offload_false_no_shard_xpu",
15+
"test_delayed_optim_step_offload_false_none_xpu",
16+
"test_delayed_optim_step_offload_false_shard_grad_op_xpu",
17+
"test_delayed_optim_step_offload_true_none_xpu",
18+
"test_delayed_optim_step_offload_true_shard_grad_op_xpu",
19+
"test_delayed_reduce_scatter_offload_false_no_shard_xpu",
20+
"test_delayed_reduce_scatter_offload_false_none_xpu",
21+
"test_delayed_reduce_scatter_offload_false_shard_grad_op_xpu",
22+
"test_delayed_reduce_scatter_offload_true_none_xpu",
23+
"test_delayed_reduce_scatter_offload_true_shard_grad_op_xpu",
24+
"test_mixture_of_experts_offload_false_no_shard_xpu",
25+
"test_mixture_of_experts_offload_false_none_xpu",
26+
"test_mixture_of_experts_offload_false_shard_grad_op_xpu",
27+
"test_mixture_of_experts_offload_true_none_xpu",
28+
"test_mixture_of_experts_offload_true_shard_grad_op_xpu",
29+
"test_mixture_of_experts_with_delay_before_free_offload_false_no_shard_xpu",
30+
"test_mixture_of_experts_with_delay_before_free_offload_false_none_xpu",
31+
"test_mixture_of_experts_with_delay_before_free_offload_false_shard_grad_op_xpu",
32+
"test_mixture_of_experts_with_delay_before_free_offload_true_none_xpu",
33+
"test_mixture_of_experts_with_delay_before_free_offload_true_shard_grad_op_xpu",
34+
"test_nested_always_wrap_model_offload_false_no_shard_xpu",
35+
"test_nested_always_wrap_model_offload_false_none_xpu",
36+
"test_nested_always_wrap_model_offload_false_shard_grad_op_xpu",
37+
"test_nested_always_wrap_model_offload_true_none_xpu",
38+
"test_nested_always_wrap_model_offload_true_shard_grad_op_xpu",
39+
"test_nested_wrapped_model_offload_false_no_shard_xpu",
40+
"test_nested_wrapped_model_offload_false_none_xpu",
41+
"test_nested_wrapped_model_offload_false_shard_grad_op_xpu",
42+
"test_nested_wrapped_model_offload_true_none_xpu",
43+
"test_nested_wrapped_model_offload_true_shard_grad_op_xpu",
44+
"test_transformer_offload_false_no_shard_xpu",
45+
"test_transformer_offload_false_none_xpu",
46+
"test_transformer_offload_false_shard_grad_op_xpu",
47+
"test_transformer_offload_true_none_xpu",
48+
"test_transformer_offload_true_shard_grad_op_xpu",
49+
# https://github.com/intel/torch-xpu-ops/issues/1475
50+
"test_transformer_no_grad_mixed_precision_True_xpu",
51+
"test_transformer_no_grad_mixed_precision_False_xpu",
52+
),
53+
# Will add them back after debugging
54+
# "../../../../test/distributed/fsdp/test_fsdp_dtensor_state_dict.py": (
55+
# "test_dtensor_sharded_model_load_state_dict_offload_to_cpu_False_is_even_sharded_model_False_xpu",
56+
# "test_dtensor_sharded_model_load_state_dict_offload_to_cpu_False_is_even_sharded_model_True_xpu",
57+
# "test_dtensor_sharded_model_load_state_dict_offload_to_cpu_True_is_even_sharded_model_False_xpu",
58+
# "test_dtensor_sharded_model_load_state_dict_offload_to_cpu_True_is_even_sharded_model_True_xpu",
59+
# "test_dtensor_sharded_optim_load_state_dict_offload_to_cpu_False_is_even_sharded_model_False_xpu",
60+
# "test_dtensor_sharded_optim_load_state_dict_offload_to_cpu_False_is_even_sharded_model_True_xpu",
61+
# "test_dtensor_sharded_optim_load_state_dict_offload_to_cpu_True_is_even_sharded_model_False_xpu",
62+
# "test_dtensor_sharded_optim_load_state_dict_offload_to_cpu_True_is_even_sharded_model_True_xpu",
63+
# "test_dtensor_sharded_tensor_state_dict_identical_offload_to_cpu_False_is_even_sharded_model_False_xpu",
64+
# "test_dtensor_sharded_tensor_state_dict_identical_offload_to_cpu_False_is_even_sharded_model_True_xpu",
65+
# "test_dtensor_sharded_tensor_state_dict_identical_offload_to_cpu_True_is_even_sharded_model_False_xpu",
66+
# "test_dtensor_sharded_tensor_state_dict_identical_offload_to_cpu_True_is_even_sharded_model_True_xpu",
67+
# "test_fsdp_init_with_device_mesh_is_even_sharded_model_False_xpu",
68+
# "test_fsdp_init_with_device_mesh_is_even_sharded_model_True_xpu",
69+
# "test_raises_warning_or_errors_xpu",
70+
# ),
71+
"../../../../test/distributed/fsdp/test_fsdp_exec_order.py": None,
72+
"../../../../test/distributed/fsdp/test_fsdp_fine_tune.py": (
73+
"test_parity_with_non_frozen_fsdp_xpu",
74+
"test_parity_with_ddp_xpu",
75+
),
76+
"../../../../test/distributed/fsdp/test_fsdp_fx.py": None,
77+
"../../../../test/distributed/fsdp/test_fsdp_input.py": None,
78+
"../../../../test/distributed/fsdp/test_fsdp_multiple_forward.py": None,
79+
"../../../../test/distributed/fsdp/test_fsdp_multiple_wrapping.py": (
80+
"test_transformer_no_grad_mixed_precision_True_xpu",
81+
),
82+
"../../../../test/distributed/fsdp/test_fsdp_uneven.py": None,
83+
# Will add them back after debugging
84+
# "../../../../test/distributed/fsdp/test_hsdp_dtensor_state_dict.py": (
85+
# "test_dtensor_sharded_model_load_state_dict_offload_to_cpu_False_xpu",
86+
# "test_dtensor_sharded_model_load_state_dict_offload_to_cpu_True_xpu",
87+
# "test_dtensor_sharded_optim_load_state_dict_offload_to_cpu_False_xpu",
88+
# "test_dtensor_sharded_optim_load_state_dict_offload_to_cpu_True_xpu",
89+
# "test_dtensor_sharded_tensor_state_dict_identical_offload_to_cpu_False_xpu",
90+
# "test_dtensor_sharded_tensor_state_dict_identical_offload_to_cpu_True_xpu",
91+
# "test_hsdp_init_with_device_mesh_xpu",
92+
# "test_root_module_is_not_FSDP_xpu",
93+
# ),
94+
"../../../../test/distributed/fsdp/test_utils.py": None,
95+
}

test/xpu/xpu_test_utils.py

+5
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,11 @@
429429
torch.float32: tol(atol=2e-5, rtol=5e-5),
430430
}
431431
},
432+
"test_modules_xpu.py": {
433+
("TestModuleXPU", "test_non_contiguous_tensors_nn_LazyConv3d_xpu_float32"): {
434+
torch.float32: tol(atol=2e-5, rtol=7e-5),
435+
}
436+
},
432437
}
433438

434439

0 commit comments

Comments
 (0)