vllm-project · simon-mo · Aug 12, 2025 · Jul 24, 2025 · Aug 11, 2025 · Aug 11, 2025
@@ -427,14 +427,10 @@ def _get_bnb_target_modules(self, model: nn.Module) -> None:
             elif isinstance(module, FusedMoE) and hasattr(
                     module.quant_method, "quant_config"):
                 # TODO: support FusedMoE with prequant and 8bit.
-                if self.pre_quant:
+                if self.pre_quant and self.load_8bit:
                     raise ValueError(
-                        "Prequant BitsAndBytes models with FusedMoE is not "
-                        "supported yet.")
-                if self.load_8bit:
-                    raise ValueError(
-                        "BitsAndBytes 8bit quantization with FusedMoE is not "
-                        "supported yet.")
+                        "Prequant BitsAndBytes 8bit models with FusedMoE "
+                        "is not supported yet.")
                 # Get the corresponding weight name using module name and
                 # expert_params_mapping.
 

@@ -684,4 +684,4 @@ def load_weights(self, weights: Iterable[tuple[str,
         return loader.load_weights(weights)
 
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
-        return self.model.get_expert_mapping()
+        return self.model.get_expert_mapping()