Move attention block assertion for shape (#960)

rsuderman · web-flow · commit 9b829cd02b30 · 2025-02-14T16:29:46.000-08:00
This assumes the `qkv` matrices are square which is not a requirement
for the attention block. Moving / updating the assertion enables
architectures that do not impose this restriction.
diff --git a/sharktank/sharktank/layers/paged_llama_attention_block.py b/sharktank/sharktank/layers/paged_llama_attention_block.py
@@ -101,13 +101,16 @@ def forward(
     ):
         assert bool(start_index is not None) ^ bool(embedding_batch_mask is not None)
         x = self.attn_norm(h)
-        bs, batch_seq_len, feature_dim = x.shape
-        assert feature_dim == self.head_count * self.head_dim
+        bs, batch_seq_len, _ = x.shape
 
         xq = self.attn_q(x)
         xk = self.attn_k(x)
         xv = self.attn_v(x)
 
+        assert xq.shape[-1] == self.head_count * self.head_dim
+        assert xk.shape[-1] == self.head_count_kv * self.head_dim
+        assert xv.shape[-1] == self.head_count_kv * self.head_dim
+
         xq = xq.view(bs, batch_seq_len, self.head_count, self.head_dim)
         xk = xk.view(bs, batch_seq_len, self.head_count_kv, self.head_dim)
         xv = xv.view(bs, batch_seq_len, self.head_count_kv, self.head_dim)