Restore n_tensor check.

ggml-org · QingtaoLi1 · Oct 10, 2024 · Oct 11, 2024 · Oct 11, 2024 · Oct 11, 2024
commit f64c7680550ebf4dd0453013524cc1054b311d17
@@ -367,6 +367,12 @@ def prepare_tensors(self):
                     break
 
             for new_name, data_torch in (self._modify_tensors(data_torch, name, bid)):
+                # Some GPTQ models have empty bias tensors which are not in the model architecture.
+                # These tensors will cause tensor number check to fail, so we have to skip them.
+                if new_name.endswith(".bias") and np.all(LazyTorchTensor.to_eager(data_torch).numpy() == 0):
-                if new_name.endswith(".bias") and np.all(LazyTorchTensor.to_eager(data_torch).numpy() == 0):
+                if new_name.endswith(".bias") and torch.all(data_torch == 0):
-                if new_name.endswith(".bias") and np.all(LazyTorchTensor.to_eager(data_torch).numpy() == 0):
+                if new_name.endswith(".bias") and torch.all(data_torch == 0):
+                    logger.info(f"Skipping empty bias tensor: {new_name}")
+                    continue
+
                 data = data_torch.squeeze().numpy()
 
                 # if data ends up empty, it means data_torch was a scalar tensor -> restore

@@ -4783,9 +4783,7 @@ struct llama_model_loader {
 
     void done_getting_tensors() const {
         if (n_created != n_tensors) {
-            // Zero bias in some HuggingFace models will cause n_tensors mismatch
-            // Consider removing zero bias in convert_hf_to_gguf.py?
-            // throw std::runtime_error(format("%s: wrong number of tensors; expected %d, got %d", __func__, n_tensors, n_created));
+            throw std::runtime_error(format("%s: wrong number of tensors; expected %d, got %d", __func__, n_tensors, n_created));
         }
     }