ascend: align attention mask to 32bytes (#7)

JackWeiw · yao-fengchen · commit 052069bb82d6 · 2024-08-23T09:19:08.000Z
diff --git a/lmdeploy/pytorch/engine/devices/ascend.py b/lmdeploy/pytorch/engine/devices/ascend.py
@@ -17,7 +17,7 @@ def update_step_context(cls, step_context):
             single_attention_mask = torch.logical_not(
                 torch.tril(
                     torch.ones(step_context.q_seq_length[i],
-                               step_context.kv_seq_length[i],
+                               (step_context.kv_seq_length[i] + 31) & (~31),
                                dtype=torch.bool).cuda(),
                     diagonal=step_context.kv_seq_length[i] -
                     step_context.q_seq_length[i],