fix rotary_embedding

tangzhiyi11 · tangzhiyi11 · commit 7c1c6a7bf8f0 · 2024-10-24T04:06:20.000Z
diff --git a/lmdeploy/pytorch/backends/dlinfer/rotary_embedding.py b/lmdeploy/pytorch/backends/dlinfer/rotary_embedding.py
@@ -5,6 +5,7 @@
 from ..rotary_embedding import (Llama3Parameters, LongRoPEScalingParameters,
                                 RopeType, RotaryEmbeddingBuilder,
                                 RotaryEmbeddingImpl, YarnParameters)
+from ..default.rotary_embedding import LlamaDynamicNTKScalingRotaryEmbedding, Llama3RotaryEmbeddingImpl
 
 
 class DlinferRotaryEmbeddingImpl(RotaryEmbeddingImpl, nn.Module):
@@ -23,11 +24,6 @@ def __init__(self,
             self.dim)).float().cuda()
         self.register_buffer('inv_freq', inv_freq, persistent=False)
 
-    def dump_tensor(self, name, t):
-        import pickle
-        with open(f'/tzy/dev_ops/{name}.pkl', 'wb') as f:
-            pickle.dump(t.cpu(), f)
-
     def forward(self, x, position_ids):
         """forward."""
         # x: [bs, num_attention_heads, seq_len, head_size]
@@ -47,7 +43,6 @@ def forward(self, x, position_ids):
         device_type = x.device.type
         device_type = device_type if isinstance(
             device_type, str) and device_type != 'mps' else 'cpu'
-        # with torch.autocast(device_type=device_type, enabled=False):
         inv_freq_expanded = inv_freq_expanded
         position_ids_expanded = position_ids_expanded
         tmp = torch.bmm(inv_freq_expanded, position_ids_expanded)
@@ -78,37 +73,11 @@ def build(
         elif emb_type == RopeType.DynamicNTKScaling:
             return LlamaDynamicNTKScalingRotaryEmbedding(
                 dim, base, scaling_factor, max_position_embeddings)
+        elif emb_type == RopeType.Llama3:
+            return Llama3RotaryEmbeddingImpl(dim, base, scaling_factor,
+                                             llama3_params.low_freq_factor,
+                                             llama3_params.high_freq_factor,
+                                             max_position_embeddings)
         else:
             raise NotImplementedError(
                 f'Unsupported embedding type: {emb_type}')
-
-
-class LlamaDynamicNTKScalingRotaryEmbedding(RotaryEmbeddingImpl):
-    """LlamaRotaryEmbedding extended with Dynamic NTK scaling.
-
-    Credits to the Reddit users /u/bloc97 and /u/emozilla
-    """
-
-    def __init__(self,
-                 dim: int,
-                 base: int = 10000,
-                 scaling_factor: float = 1.0,
-                 max_position_embeddings: int = 2048):
-        super().__init__(dim, base, scaling_factor)
-        self.max_position_embeddings = max_position_embeddings
-
-    def forward(self, x, position_ids):
-        """forward."""
-        seq_len = torch.max(position_ids) + 1
-        if seq_len > self.max_position_embeddings:
-            base = self.base * ((self.scaling_factor * seq_len /
-                                 self.max_position_embeddings) -
-                                (self.scaling_factor - 1))**(self.dim /
-                                                             (self.dim - 2))
-            inv_freq = 1.0 / (base**(torch.arange(
-                0, self.dim, 2, dtype=torch.int64).float().to(x.device) /
-                                     self.dim))
-            self.register_buffer('inv_freq', inv_freq, persistent=False)
-
-        cos, sin = super().forward(x, position_ids)
-        return cos, sin