fix sync on ascend (#19)

yao-fengchen · web-flow · commit badb76d208e3 · 2024-08-30T10:03:26.000+08:00
diff --git a/lmdeploy/pytorch/engine/devices/ascend.py b/lmdeploy/pytorch/engine/devices/ascend.py
@@ -17,7 +17,8 @@ def update_step_context(cls, step_context):
             single_attention_mask = torch.logical_not(
                 torch.tril(
                     torch.ones(step_context.q_seq_length[i],
-                               step_context.block_offsets.shape[1] * block_size,
+                               step_context.block_offsets.shape[1] *
+                               block_size,
                                dtype=torch.bool).cuda(),
                     diagonal=step_context.kv_seq_length[i] -
                     step_context.q_seq_length[i],
@@ -38,6 +39,10 @@ def update_step_context(cls, step_context):
             kv_start_indices, device=step_context.block_offsets.device)
         setattr(step_context, 'kv_start_indices', kv_start_indices)
         setattr(step_context, 'attention_mask', attention_mask)
+        setattr(step_context, 'q_start_loc', step_context.q_start_loc.cpu())
+        setattr(step_context, 'q_seq_length', step_context.q_seq_length.cpu())
+        setattr(step_context, 'kv_seq_length',
+                step_context.kv_seq_length.cpu())
         is_unpaged_prefill = (not step_context.is_decoding) and all(
             (step_context.q_seq_length == step_context.kv_seq_length).tolist())
         setattr(step_context, 'is_unpaged_prefill', is_unpaged_prefill)
diff --git a/lmdeploy/pytorch/kernels/ascend/apply_rotary_pos_emb.py b/lmdeploy/pytorch/kernels/ascend/apply_rotary_pos_emb.py
@@ -26,7 +26,7 @@ def apply_rotary_pos_emb(
             cos = cos[position_ids_1d].view(1, bs, 1, -1)
             sin = sin[position_ids_1d].view(1, bs, 1, -1)
         else:
-            raise RuntimeError("Cannot handle cos/sin shape dims!")
+            raise RuntimeError('Cannot handle cos/sin shape dims!')
 
         if context:
             setattr(context, 'cos', cos)
diff --git a/lmdeploy/pytorch/kernels/ascend/rms_norm.py b/lmdeploy/pytorch/kernels/ascend/rms_norm.py
@@ -12,4 +12,4 @@ def rms_norm(hidden_states: Tensor,
         out = rms_norm_out
     else:
         out.copy_(rms_norm_out)
-    return rms_norm_out
+    return out
diff --git a/lmdeploy/pytorch/models/qwen2_moe.py b/lmdeploy/pytorch/models/qwen2_moe.py
@@ -106,10 +106,9 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
             routing_weights /= routing_weights.sum(dim=-1, keepdim=True)
         routing_weights = routing_weights.to(hidden_states.dtype)
 
-        out_states = torch.zeros(
-            (batch_size * sequence_length, hidden_dim),
-            dtype=hidden_states.dtype,
-            device=hidden_states.device)
+        out_states = torch.zeros((batch_size * sequence_length, hidden_dim),
+                                 dtype=hidden_states.dtype,
+                                 device=hidden_states.device)
 
         expert_mask = torch.nn.functional.one_hot(
             selected_experts, num_classes=self.num_experts).permute(2, 1, 0)