Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
gguf-py: introduce support for reading from generation_config.py
Signed-off-by: Aaron Teo <[email protected]>
  • Loading branch information
taronaeo committed Nov 9, 2025
commit 44addcebd90673c33c10e66210f87a1ca7e3f2a4
48 changes: 48 additions & 0 deletions gguf-py/gguf/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,42 @@ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Pat

model_card = Metadata.load_model_card(model_path)
hf_params = Metadata.load_hf_parameters(model_path)
gen_config = Metadata.load_generation_config(model_path)
# TODO: load adapter_config.json when possible, it usually contains the base model of the LoRA adapter

# heuristics
metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path, total_params)

if gen_config:
# Standard generation_config.json parameters
if metadata.sampler_top_k is None and "top_k" in gen_config:
metadata.sampler_top_k = int(gen_config["top_k"])

if metadata.sampler_top_p is None and "top_p" in gen_config:
metadata.sampler_top_p = float(gen_config["top_p"])

if metadata.sampler_min_p is None and "min_p" in gen_config:
metadata.sampler_min_p = float(gen_config["min_p"])

if metadata.sampler_temp is None and "temperature" in gen_config:
metadata.sampler_temp = float(gen_config["temperature"])

# Non-standard generation_config.json parameters
if metadata.sampler_penalty_last_n is None and "penalty_last_n" in gen_config:
metadata.sampler_penalty_last_n = int(gen_config["penalty_last_n"])

if metadata.sampler_penalty_repeat is None and "penalty_repeat" in gen_config:
metadata.sampler_penalty_repeat = float(gen_config["penalty_repeat"])

if metadata.sampler_mirostat is None and "mirostat" in gen_config:
metadata.sampler_mirostat = int(gen_config["mirostat"])

if metadata.sampler_mirostat_tau is None and "mirostat_tau" in gen_config:
metadata.sampler_mirostat_tau = float(gen_config["mirostat_tau"])

if metadata.sampler_mirostat_eta is None and "mirostat_eta" in gen_config:
metadata.sampler_mirostat_eta = float(gen_config["mirostat_eta"])

# Metadata Override File Provided
# This is based on LLM_KV_NAMES mapping in llama.cpp
metadata_override = Metadata.load_metadata_override(metadata_override_path)
Expand Down Expand Up @@ -193,6 +224,23 @@ def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, Any]:
with open(config_path, "r", encoding="utf-8") as f:
return json.load(f)

@staticmethod
def load_generation_config(model_path: Optional[Path] = None) -> dict[str, Any]:
if model_path is None or not model_path.is_dir():
return {}

generation_config_path = model_path / "generation_config.json"

if not generation_config_path.is_file():
return {}

try:
with open(generation_config_path, "r", encoding="utf-8") as f:
return json.load(f)
except (json.JSONDecodeError, IOError) as e:
# not all models have valid generation_config.json
return {}

@staticmethod
def id_to_title(string):
# Convert capitalization into title form unless acronym or version number
Expand Down
103 changes: 103 additions & 0 deletions gguf-py/tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,109 @@ def test_apply_metadata_heuristic_from_model_dir(self):
expect = gguf.Metadata(name='Hermes 2 Pro Llama 3 8b DPO', finetune='DPO', basename='hermes-2-pro-llama-3', size_label='8B')
self.assertEqual(got, expect)

def test_load_generation_config(self):
import tempfile
import json

# Test with a valid generation_config.json
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir_path = Path(tmpdir)
gen_config_path = tmpdir_path / "generation_config.json"

# Create a sample generation_config.json
gen_config_data = {
"temperature": 0.7,
"top_k": 50,
"top_p": 0.95,
"repetition_penalty": 1.1,
"do_sample": True,
"max_length": 2048
}

with open(gen_config_path, "w") as f:
json.dump(gen_config_data, f)

# Test loading the file
result = gguf.Metadata.load_generation_config(tmpdir_path)
self.assertEqual(result, gen_config_data)

# Test with missing file
with tempfile.TemporaryDirectory() as tmpdir:
result = gguf.Metadata.load_generation_config(Path(tmpdir))
self.assertEqual(result, {})

# Test with None path
result = gguf.Metadata.load_generation_config(None)
self.assertEqual(result, {})

def test_metadata_load_with_generation_config(self):
import tempfile
import json

# Test that generation_config values are properly loaded into metadata
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir_path = Path(tmpdir)
gen_config_path = tmpdir_path / "generation_config.json"

# Create a sample generation_config.json with sampling parameters
gen_config_data = {
"temperature": 0.8,
"top_k": 40,
"top_p": 0.9,
"min_p": 0.05,
"repetition_penalty": 1.15,
}

with open(gen_config_path, "w") as f:
json.dump(gen_config_data, f)

# Load metadata with generation config
metadata = gguf.Metadata.load(model_path=tmpdir_path)

# Verify sampling parameters were loaded
self.assertEqual(metadata.sampler_temp, 0.8)
self.assertEqual(metadata.sampler_top_k, 40)
self.assertEqual(metadata.sampler_top_p, 0.9)
self.assertEqual(metadata.sampler_min_p, 0.05)
self.assertEqual(metadata.sampler_penalty_repeat, 1.15)

def test_metadata_override_precedence(self):
import tempfile
import json

# Test that metadata_override takes precedence over generation_config
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir_path = Path(tmpdir)
gen_config_path = tmpdir_path / "generation_config.json"
metadata_override_path = tmpdir_path / "metadata.json"

# Create generation_config.json
gen_config_data = {
"temperature": 0.7,
"top_k": 50,
}
with open(gen_config_path, "w") as f:
json.dump(gen_config_data, f)

# Create metadata.json that overrides temperature
metadata_override_data = {
"general.sampler.temp": 0.5,
}
with open(metadata_override_path, "w") as f:
json.dump(metadata_override_data, f)

# Load metadata with both files present
metadata = gguf.Metadata.load(
metadata_override_path=metadata_override_path,
model_path=tmpdir_path
)

# Verify that metadata_override takes precedence for temperature
self.assertEqual(metadata.sampler_temp, 0.5)
# Verify that generation_config value is used for top_k
self.assertEqual(metadata.sampler_top_k, 50)


if __name__ == "__main__":
unittest.main()