Skip to content

Add support for transposed kv cache in all variants of custom kv cache

bab3440
Select commit
Loading
Failed to load commit list.
Open

Add support for transposed kv cache in all variants of custom kv cache #18709

Add support for transposed kv cache in all variants of custom kv cache
bab3440
Select commit
Loading
Failed to load commit list.
PyTorch Bot / Dr.CI completed Apr 6, 2026 in 0s

Dr.CI classification results

{"FAILED":[{"workflowId":24038694812,"workflowUniqueId":63454257,"id":70104646506,"runnerName":"16-core-ubuntu-1013504714","authorEmail":"[email protected]","name":"pull / android / build-android","jobName":"android / build-android","conclusion":"failure","completed_at":"2026-04-06T15:59:11.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24038694812/job/70104646506","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/70104646506","head_branch":"gh/kimishpatel/229/head","pr_number":18709,"head_sha":"bab3440678f4311fed11d757ee07923b4cbe295b","head_sha_timestamp":"2026-04-06T15:47:29.000000000Z","failure_captures":["Process completed with exit code 1."],"failure_lines":["##[error]Process completed with exit code 1."],"failure_context":[],"time":"2026-04-06T15:47:37.000000000Z"},{"workflowId":24038694812,"workflowUniqueId":63454257,"id":70104647392,"runnerName":"i-0027ae1d78ee5d948","authorEmail":"[email protected]","name":"pull / test-lora-linux / linux-job","jobName":"test-lora-linux / linux-job","conclusion":"failure","completed_at":"2026-04-06T16:11:51.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24038694812/job/70104647392","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/70104647392","head_branch":"gh/kimishpatel/229/head","pr_number":18709,"head_sha":"bab3440678f4311fed11d757ee07923b4cbe295b","head_sha_timestamp":"2026-04-06T15:47:29.000000000Z","failure_captures":["RuntimeError: Command docker exec -t 5a7117fab31dc7fd5b40af8a2f751b025914caf9149689a230873dac241415bc /exec failed with exit code 134"],"failure_lines":["RuntimeError: Command docker exec -t 5a7117fab31dc7fd5b40af8a2f751b025914caf9149689a230873dac241415bc /exec failed with exit code 134"],"failure_context":[],"time":"2026-04-06T15:47:37.000000000Z"},{"workflowId":24038694812,"workflowUniqueId":63454257,"id":70104647296,"runnerName":"i-04fed97c1f90c900a","authorEmail":"[email protected]","name":"pull / unittest / linux / linux-job","jobName":"unittest / linux / linux-job","conclusion":"failure","completed_at":"2026-04-06T16:51:19.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24038694812/job/70104647296","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/70104647296","head_branch":"gh/kimishpatel/229/head","pr_number":18709,"head_sha":"bab3440678f4311fed11d757ee07923b4cbe295b","head_sha_timestamp":"2026-04-06T15:47:29.000000000Z","failure_captures":["examples/models/llama/tests/test_replace_kv_cache.py::TestReplaceKVCache::test_replace_quantized_kv_cache_with_quantized_ring_kv_cache"],"failure_lines":["FAILED examples/models/llama/tests/test_replace_kv_cache.py::TestReplaceKVCache::test_replace_quantized_kv_cache_with_quantized_ring_kv_cache - TypeError: QuantizedRingKVCache.init() got multiple values for argument 'is_seq_at_dim_2'"],"failure_context":[],"time":"2026-04-06T15:47:37.000000000Z"},{"workflowId":24038694812,"workflowUniqueId":63454257,"id":70104647298,"runnerName":"i-0a2dc785548da96f8","authorEmail":"[email protected]","name":"pull / unittest / windows / windows-job","jobName":"unittest / windows / windows-job","conclusion":"cancelled","completed_at":"2026-04-06T17:49:58.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24038694812/job/70104647298","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/70104647298","head_branch":"gh/kimishpatel/229/head","pr_number":18709,"head_sha":"bab3440678f4311fed11d757ee07923b4cbe295b","head_sha_timestamp":"2026-04-06T15:47:29.000000000Z","failure_captures":[],"failure_lines":[],"failure_context":[],"time":"2026-04-06T15:47:37.000000000Z"},{"workflowId":24038694812,"workflowUniqueId":63454257,"id":70104647311,"runnerName":"i-00fa99ac13769e204","authorEmail":"[email protected]","name":"pull / unittest-editable / linux / linux-job","jobName":"unittest-editable / linux / linux-job","conclusion":"failure","completed_at":"2026-04-06T16:52:55.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24038694812/job/70104647311","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/70104647311","head_branch":"gh/kimishpatel/229/head","pr_number":18709,"head_sha":"bab3440678f4311fed11d757ee07923b4cbe295b","head_sha_timestamp":"2026-04-06T15:47:29.000000000Z","failure_captures":["examples/models/llama/tests/test_replace_kv_cache.py::TestReplaceKVCache::test_replace_quantized_kv_cache_with_quantized_ring_kv_cache"],"failure_lines":["FAILED examples/models/llama/tests/test_replace_kv_cache.py::TestReplaceKVCache::test_replace_quantized_kv_cache_with_quantized_ring_kv_cache - TypeError: QuantizedRingKVCache.init() got multiple values for argument 'is_seq_at_dim_2'"],"failure_context":[],"time":"2026-04-06T15:47:37.000000000Z"},{"workflowId":24038694812,"workflowUniqueId":63454257,"id":70104647313,"runnerName":"i-0a3a30679ef1b4e28","authorEmail":"[email protected]","name":"pull / unittest-buck / macos / macos-job","jobName":"unittest-buck / macos / macos-job","conclusion":"failure","completed_at":"2026-04-06T15:53:22.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24038694812/job/70104647313","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/70104647313","head_branch":"gh/kimishpatel/229/head","pr_number":18709,"head_sha":"bab3440678f4311fed11d757ee07923b4cbe295b","head_sha_timestamp":"2026-04-06T15:47:29.000000000Z","failure_captures":["RuntimeError: Command bash /Users/ec2-user/runner/_work/_temp/exec_script failed with exit code 1"],"failure_lines":["RuntimeError: Command bash /Users/ec2-user/runner/_work/_temp/exec_script failed with exit code 1"],"failure_context":[],"time":"2026-04-06T15:47:37.000000000Z"},{"workflowId":24038694812,"workflowUniqueId":63454257,"id":70104647272,"runnerName":"i-073082c2e19d1aa4d","authorEmail":"[email protected]","name":"pull / test-multimodal-linux (gemma3-4b) / linux-job","jobName":"test-multimodal-linux (gemma3-4b) / linux-job","conclusion":"failure","completed_at":"2026-04-06T16:47:10.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24038694812/job/70104647272","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/70104647272","head_branch":"gh/kimishpatel/229/head","pr_number":18709,"head_sha":"bab3440678f4311fed11d757ee07923b4cbe295b","head_sha_timestamp":"2026-04-06T15:47:29.000000000Z","failure_captures":["RuntimeError: Command docker exec -t 16c0d96ec88c7a94fe4ed671a1455c9a6d31db4c6fc2c74180dd37c3316bf196 /exec failed with exit code 134"],"failure_lines":["RuntimeError: Command docker exec -t 16c0d96ec88c7a94fe4ed671a1455c9a6d31db4c6fc2c74180dd37c3316bf196 /exec failed with exit code 134"],"failure_context":[],"time":"2026-04-06T15:47:37.000000000Z"},{"workflowId":24038694812,"workflowUniqueId":63454257,"id":70104647288,"runnerName":"i-08b6d7b1479312fea","authorEmail":"[email protected]","name":"pull / unittest / macos / macos-job","jobName":"unittest / macos / macos-job","conclusion":"failure","completed_at":"2026-04-06T16:26:29.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24038694812/job/70104647288","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/70104647288","head_branch":"gh/kimishpatel/229/head","pr_number":18709,"head_sha":"bab3440678f4311fed11d757ee07923b4cbe295b","head_sha_timestamp":"2026-04-06T15:47:29.000000000Z","failure_captures":["examples/models/llama/tests/test_replace_kv_cache.py::TestReplaceKVCache::test_replace_quantized_kv_cache_with_quantized_ring_kv_cache"],"failure_lines":["FAILED examples/models/llama/tests/test_replace_kv_cache.py::TestReplaceKVCache::test_replace_quantized_kv_cache_with_quantized_ring_kv_cache - TypeError: QuantizedRingKVCache.init() got multiple values for argument 'is_seq_at_dim_2'"],"failure_context":[],"time":"2026-04-06T15:47:37.000000000Z"},{"workflowId":24038694812,"workflowUniqueId":63454257,"id":70104647209,"runnerName":"i-01a46c9fd3c9a6a0c","authorEmail":"[email protected]","name":"pull / unittest-editable / macos / macos-job","jobName":"unittest-editable / macos / macos-job","conclusion":"failure","completed_at":"2026-04-06T16:23:40.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24038694812/job/70104647209","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/70104647209","head_branch":"gh/kimishpatel/229/head","pr_number":18709,"head_sha":"bab3440678f4311fed11d757ee07923b4cbe295b","head_sha_timestamp":"2026-04-06T15:47:29.000000000Z","failure_captures":["examples/models/llama/tests/test_replace_kv_cache.py::TestReplaceKVCache::test_replace_quantized_kv_cache_with_quantized_ring_kv_cache"],"failure_lines":["FAILED examples/models/llama/tests/test_replace_kv_cache.py::TestReplaceKVCache::test_replace_quantized_kv_cache_with_quantized_ring_kv_cache - TypeError: QuantizedRingKVCache.init() got multiple values for argument 'is_seq_at_dim_2'"],"failure_context":[],"time":"2026-04-06T15:47:37.000000000Z"},{"workflowId":24038694812,"workflowUniqueId":63454257,"id":70104647220,"runnerName":"i-01ebc2de770351f48","authorEmail":"[email protected]","name":"pull / unittest-editable / windows / windows-job","jobName":"unittest-editable / windows / windows-job","conclusion":"cancelled","completed_at":"2026-04-06T17:49:53.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24038694812/job/70104647220","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/70104647220","head_branch":"gh/kimishpatel/229/head","pr_number":18709,"head_sha":"bab3440678f4311fed11d757ee07923b4cbe295b","head_sha_timestamp":"2026-04-06T15:47:29.000000000Z","failure_captures":[],"failure_lines":[],"failure_context":[],"time":"2026-04-06T15:47:37.000000000Z"},{"workflowId":24038694812,"workflowUniqueId":63454257,"id":70104647174,"runnerName":"i-01b8b8577a5202d25","authorEmail":"[email protected]","name":"pull / test-lora-multimethod-linux / linux-job","jobName":"test-lora-multimethod-linux / linux-job","conclusion":"failure","completed_at":"2026-04-06T16:28:34.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24038694812/job/70104647174","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/70104647174","head_branch":"gh/kimishpatel/229/head","pr_number":18709,"head_sha":"bab3440678f4311fed11d757ee07923b4cbe295b","head_sha_timestamp":"2026-04-06T15:47:29.000000000Z","failure_captures":["RuntimeError: Command docker exec -t 728b6ae556bd71093db0e0f9178a5ce766238bde7d80263896f294370690a0e0 /exec failed with exit code 134"],"failure_lines":["RuntimeError: Command docker exec -t 728b6ae556bd71093db0e0f9178a5ce766238bde7d80263896f294370690a0e0 /exec failed with exit code 134"],"failure_context":[],"time":"2026-04-06T15:47:37.000000000Z"},{"workflowId":24038694812,"workflowUniqueId":63454257,"id":70104646863,"runnerName":"i-0b922578fb84f4148","authorEmail":"[email protected]","name":"pull / test-llama-runner-linux (fp32, xnnpack+custom+qe, linux.arm64.2xlarge, executorch-ubuntu-22.04-gc... / linux-job","jobName":"test-llama-runner-linux (fp32, xnnpack+custom+qe, linux.arm64.2xlarge, executorch-ubuntu-22.04-gc... / linux-job","conclusion":"failure","completed_at":"2026-04-06T16:12:41.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24038694812/job/70104646863","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/70104646863","head_branch":"gh/kimishpatel/229/head","pr_number":18709,"head_sha":"bab3440678f4311fed11d757ee07923b4cbe295b","head_sha_timestamp":"2026-04-06T15:47:29.000000000Z","failure_captures":["RuntimeError: Command docker exec -t 00b077dd30be8cee73e177c6be62f9eb654b06af1a0f757a617736e8b9f70e36 /exec failed with exit code 134"],"failure_lines":["RuntimeError: Command docker exec -t 00b077dd30be8cee73e177c6be62f9eb654b06af1a0f757a617736e8b9f70e36 /exec failed with exit code 134"],"failure_context":[],"time":"2026-04-06T15:47:37.000000000Z"},{"workflowId":24038694812,"workflowUniqueId":63454257,"id":70104646802,"runnerName":"i-06bfce3c8735864a6","authorEmail":"[email protected]","name":"pull / test-llama-runner-linux (fp32, xnnpack+custom+qe, linux.2xlarge, executorch-ubuntu-22.04-clang12) / linux-job","jobName":"test-llama-runner-linux (fp32, xnnpack+custom+qe, linux.2xlarge, executorch-ubuntu-22.04-clang12) / linux-job","conclusion":"failure","completed_at":"2026-04-06T16:05:46.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24038694812/job/70104646802","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/70104646802","head_branch":"gh/kimishpatel/229/head","pr_number":18709,"head_sha":"bab3440678f4311fed11d757ee07923b4cbe295b","head_sha_timestamp":"2026-04-06T15:47:29.000000000Z","failure_captures":["RuntimeError: Command docker exec -t d5abb155cb9b815e61578647771658b2dbc23c0bcef807a13a21b3869e222d66 /exec failed with exit code 134"],"failure_lines":["RuntimeError: Command docker exec -t d5abb155cb9b815e61578647771658b2dbc23c0bcef807a13a21b3869e222d66 /exec failed with exit code 134"],"failure_context":[],"time":"2026-04-06T15:47:37.000000000Z"},{"workflowId":24038694812,"workflowUniqueId":63454257,"id":70104646823,"runnerName":"i-096910a70fb74793b","authorEmail":"[email protected]","name":"pull / test-llama-runner-linux (bf16, custom, linux.2xlarge, executorch-ubuntu-22.04-clang12) / linux-job","jobName":"test-llama-runner-linux (bf16, custom, linux.2xlarge, executorch-ubuntu-22.04-clang12) / linux-job","conclusion":"failure","completed_at":"2026-04-06T16:02:22.000000000Z","html_url":"https://github.com/pytorch/executorch/actions/runs/24038694812/job/70104646823","logUrl":"https://ossci-raw-job-status.s3.amazonaws.com/log/pytorch/executorch/70104646823","head_branch":"gh/kimishpatel/229/head","pr_number":18709,"head_sha":"bab3440678f4311fed11d757ee07923b4cbe295b","head_sha_timestamp":"2026-04-06T15:47:29.000000000Z","failure_captures":["RuntimeError: Command docker exec -t d614aedd934356e865fa991380f661a0e0fb03b32ea65ebda6a7ea05d60fb791 /exec failed with exit code 134"],"failure_lines":["RuntimeError: Command docker exec -t d614aedd934356e865fa991380f661a0e0fb03b32ea65ebda6a7ea05d60fb791 /exec failed with exit code 134"],"failure_context":[],"time":"2026-04-06T15:47:37.000000000Z"}],"FLAKY":[],"BROKEN_TRUNK":[],"UNSTABLE":[],"AWAITING_APPROVAL":[]}