Skip to content

[executorch][cuda] fuse gate/up MLP projections #20482

Merged
Gasoonjia merged 3 commits into
mainfrom
gemma4_31b-cuda-decode-speedup
Jun 25, 2026
Merged

[executorch][cuda] fuse gate/up MLP projections #20482
Gasoonjia merged 3 commits into
mainfrom
gemma4_31b-cuda-decode-speedup

[executorch][cuda] gemma4_31b: fuse gate/up MLP projections (default-on)

4025660
Select commit
Loading
Failed to load commit list.
PyTorch Bot / Dr.CI completed Jun 25, 2026 in 0s

Dr.CI classification results

{"FAILED":[{"workflowId":28188311787,"workflowUniqueId":63454257,"id":83495979545,"runnerName":"i-00150b6ba31737f67","authorEmail":"gasoonjia@meta.com","name":"pull / unittest-editable / linux / linux-job","jobName":"unittest-editable / linux / linux-job","conclusion":"failure","completed_at":"2026-06-25T18:36:03.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/28188311787/job/83495979545","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/83495979545","head_branch":"gemma4_31b-cuda-decode-speedup","pr_number":20482,"head_sha":"4025660ac810cf796f6a19c06692b1777e0ac145","head_sha_timestamp":"2026-06-25T15:56:51.000000000Z","failure_captures":["RuntimeError: Command docker exec -t 2b25cd9f539890778880ab9e9ff8565d1ff57db62a1365829ed1613992a6aafc /exec failed with exit code 1"],"failure_lines":["RuntimeError: Command docker exec -t 2b25cd9f539890778880ab9e9ff8565d1ff57db62a1365829ed1613992a6aafc /exec failed with exit code 1"],"failure_context":[],"time":"2026-06-25T17:25:26.000000000Z"},{"workflowId":28188311787,"workflowUniqueId":63454257,"id":83495979440,"runnerName":"i-0f769932d2fa4f394","authorEmail":"gasoonjia@meta.com","name":"pull / unittest / linux / linux-job","jobName":"unittest / linux / linux-job","conclusion":"failure","completed_at":"2026-06-25T18:36:12.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/28188311787/job/83495979440","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/83495979440","head_branch":"gemma4_31b-cuda-decode-speedup","pr_number":20482,"head_sha":"4025660ac810cf796f6a19c06692b1777e0ac145","head_sha_timestamp":"2026-06-25T15:56:51.000000000Z","failure_captures":["RuntimeError: Command docker exec -t 9b7c35dea08f7d7e172d2ef7d9de93f92814edd3c9b27b2a4512c0cf36f65643 /exec failed with exit code 1"],"failure_lines":["RuntimeError: Command docker exec -t 9b7c35dea08f7d7e172d2ef7d9de93f92814edd3c9b27b2a4512c0cf36f65643 /exec failed with exit code 1"],"failure_context":[],"time":"2026-06-25T17:25:26.000000000Z"},{"workflowId":28188311787,"workflowUniqueId":63454257,"id":83495978890,"runnerName":"i-0b4a60bfacd97f9c5","authorEmail":"gasoonjia@meta.com","name":"pull / test-arm-backend-no-driver (test_pytest_ops_tosa) / linux-job","jobName":"test-arm-backend-no-driver (test_pytest_ops_tosa) / linux-job","conclusion":"failure","completed_at":"2026-06-25T18:30:35.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/28188311787/job/83495978890","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/83495978890","head_branch":"gemma4_31b-cuda-decode-speedup","pr_number":20482,"head_sha":"4025660ac810cf796f6a19c06692b1777e0ac145","head_sha_timestamp":"2026-06-25T15:56:51.000000000Z","failure_captures":["RuntimeError: Command docker exec -t 42ab82696ecf3cee778a305f76e6dc75fd6e4cfc4d45659ca886127e49f5bcae /exec failed with exit code 1"],"failure_lines":["RuntimeError: Command docker exec -t 42ab82696ecf3cee778a305f76e6dc75fd6e4cfc4d45659ca886127e49f5bcae /exec failed with exit code 1"],"failure_context":[],"time":"2026-06-25T17:25:26.000000000Z"}],"FLAKY":[{"workflowId":28188311787,"workflowUniqueId":63454257,"id":83495980766,"runnerName":"i-0536a652bf262908a","authorEmail":"gasoonjia@meta.com","name":"pull / test-qnn-testsuite-linux / test-backend-linux (qnn, models) / linux-job","jobName":"test-qnn-testsuite-linux / test-backend-linux (qnn, models) / linux-job","conclusion":"failure","completed_at":"2026-06-25T18:29:03.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/28188311787/job/83495980766","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/83495980766","head_branch":"gemma4_31b-cuda-decode-speedup","pr_number":20482,"head_sha":"4025660ac810cf796f6a19c06692b1777e0ac145","head_sha_timestamp":"2026-06-25T15:56:51.000000000Z","failure_captures":["The runner has received a shutdown signal. This can happen when the runner service is stopped, or a manually started runner is canceled."],"failure_lines":["##[error]The runner has received a shutdown signal. This can happen when the runner service is stopped, or a manually started runner is canceled."],"failure_context":[],"time":"2026-06-25T17:25:26.000000000Z"}],"BROKEN_TRUNK":[{"workflowId":28188311787,"workflowUniqueId":63454257,"id":83495979039,"runnerName":"i-0a3e2e5e37304c84d","authorEmail":"gasoonjia@meta.com","name":"pull / test-llama-runner-qnn-linux (fp32, qnn_16a16w, qnn) / linux-job","jobName":"test-llama-runner-qnn-linux (fp32, qnn_16a16w, qnn) / linux-job","conclusion":"failure","completed_at":"2026-06-25T18:17:04.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/28188311787/job/83495979039","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/83495979039","head_branch":"gemma4_31b-cuda-decode-speedup","pr_number":20482,"head_sha":"4025660ac810cf796f6a19c06692b1777e0ac145","head_sha_timestamp":"2026-06-25T15:56:51.000000000Z","failure_captures":[],"failure_lines":[],"failure_context":[],"time":"2026-06-25T17:25:26.000000000Z"},{"workflowId":28188311787,"workflowUniqueId":63454257,"id":83495979483,"runnerName":"i-01ff70d1614aaf1da","authorEmail":"gasoonjia@meta.com","name":"pull / unittest / macos / macos-job","jobName":"unittest / macos / macos-job","conclusion":"failure","completed_at":"2026-06-25T18:19:32.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/28188311787/job/83495979483","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/83495979483","head_branch":"gemma4_31b-cuda-decode-speedup","pr_number":20482,"head_sha":"4025660ac810cf796f6a19c06692b1777e0ac145","head_sha_timestamp":"2026-06-25T15:56:51.000000000Z","failure_captures":["RuntimeError: Command bash /Users/ec2-user/runner/_work/_temp/exec_script failed with exit code 1"],"failure_lines":["RuntimeError: Command bash /Users/ec2-user/runner/_work/_temp/exec_script failed with exit code 1"],"failure_context":[],"time":"2026-06-25T17:25:26.000000000Z"},{"workflowId":28188311787,"workflowUniqueId":63454257,"id":83495979468,"runnerName":"i-08b706c8623936d0a","authorEmail":"gasoonjia@meta.com","name":"pull / unittest-editable / macos / macos-job","jobName":"unittest-editable / macos / macos-job","conclusion":"failure","completed_at":"2026-06-25T18:25:15.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/28188311787/job/83495979468","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/83495979468","head_branch":"gemma4_31b-cuda-decode-speedup","pr_number":20482,"head_sha":"4025660ac810cf796f6a19c06692b1777e0ac145","head_sha_timestamp":"2026-06-25T15:56:51.000000000Z","failure_captures":["RuntimeError: Command bash /Users/ec2-user/runner/_work/_temp/exec_script failed with exit code 1"],"failure_lines":["RuntimeError: Command bash /Users/ec2-user/runner/_work/_temp/exec_script failed with exit code 1"],"failure_context":[],"time":"2026-06-25T17:25:26.000000000Z"}],"UNSTABLE":[],"UNKNOWN":[{"workflowId":28188311615,"workflowUniqueId":236455973,"id":83496275748,"runnerName":"GitHub Actions 1020278952","authorEmail":"gasoonjia@meta.com","name":"MLX / test-mlx-qwen35-moe / test-mlx-qwen35-moe","jobName":"test-mlx-qwen35-moe / test-mlx-qwen35-moe","conclusion":"cancelled","completed_at":"2026-06-25T19:02:54.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/28188311615/job/83496275748","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/83496275748","head_branch":"gemma4_31b-cuda-decode-speedup","pr_number":20482,"head_sha":"4025660ac810cf796f6a19c06692b1777e0ac145","head_sha_timestamp":"2026-06-25T15:56:51.000000000Z","failure_captures":[],"failure_lines":[],"failure_context":[],"time":"2026-06-25T17:26:56.000000000Z"}],"AWAITING_APPROVAL":[]}