Skip to content

Conversation

@levkropp
Copy link

I noticed that converting google/umt5-xxl to GGUF worked out of the box, but converting to google/umt5-xl did not work because its architecture was UMT5Model which was not one of the registered T5 architectures in convert_hf_to_gguf.py. Adding the registration allowed me to create GGUFs of umt5-xl successfully with no additional code changes.

I tested the resulting converted GGUF model using the script below and it produced identical embeddings and encoder weights for the F32 compared to the Pytorch model.

test_umt5_encoding.py
#!/usr/bin/env python3
"""
Test script to verify that the GGUF conversion produces the same encodings
as the original PyTorch model.
"""

import sys
from pathlib import Path

# Add gguf-py to path
sys.path.insert(0, str(Path(__file__).parent / 'gguf-py'))

import torch
import numpy as np
from transformers import AutoModel, AutoTokenizer
import gguf

def load_pytorch_model(model_path):
    """Load the original PyTorch model."""
    print(f"Loading PyTorch model from {model_path}...")
    model = AutoModel.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model.eval()
    return model, tokenizer

def load_gguf_model(gguf_path):
    """Load the GGUF model and extract encoder weights."""
    print(f"Loading GGUF model from {gguf_path}...")
    reader = gguf.GGUFReader(gguf_path)
    
    # Extract tensors
    tensors = {}
    for tensor in reader.tensors:
        tensors[tensor.name] = tensor.data
    
    return reader, tensors

def encode_text_pytorch(model, tokenizer, text):
    """Encode text using PyTorch model."""
    print(f"\nEncoding with PyTorch: '{text}'")
    inputs = tokenizer(text, return_tensors="pt", padding=True)
    
    with torch.no_grad():
        # Get encoder outputs
        encoder_outputs = model.encoder(
            input_ids=inputs['input_ids'],
            attention_mask=inputs['attention_mask']
        )
        last_hidden_state = encoder_outputs.last_hidden_state
    
    print(f"Output shape: {last_hidden_state.shape}")
    print(f"Output mean: {last_hidden_state.mean().item():.6f}")
    print(f"Output std: {last_hidden_state.std().item():.6f}")
    print(f"First 10 values: {last_hidden_state[0, 0, :10].numpy()}")
    
    return last_hidden_state.numpy(), inputs['input_ids']

def compare_embeddings(pytorch_model, gguf_tensors, token_ids):
    """Compare embedding lookup between PyTorch and GGUF."""
    print("\n=== Comparing Token Embeddings ===")
    
    # Get PyTorch embeddings
    pt_embed_weight = pytorch_model.shared.weight.detach().numpy()
    print(f"PyTorch embedding shape: {pt_embed_weight.shape}")
    
    # Get GGUF embeddings
    gguf_embed = gguf_tensors.get('token_embd.weight')
    if gguf_embed is None:
        print("ERROR: Could not find token_embd.weight in GGUF")
        return False
    
    print(f"GGUF embedding shape: {gguf_embed.shape}")
    
    # Compare a few token embeddings
    token_id = token_ids[0, 0].item()
    print(f"\nComparing embedding for token {token_id}:")
    
    pt_vec = pt_embed_weight[token_id]
    gguf_vec = gguf_embed[:, token_id] if gguf_embed.shape[0] < gguf_embed.shape[1] else gguf_embed[token_id]
    
    print(f"PyTorch first 10: {pt_vec[:10]}")
    print(f"GGUF first 10: {gguf_vec[:10]}")
    
    # Calculate difference
    diff = np.abs(pt_vec - gguf_vec)
    max_diff = np.max(diff)
    mean_diff = np.mean(diff)
    
    print(f"\nMax difference: {max_diff:.6e}")
    print(f"Mean difference: {mean_diff:.6e}")
    
    # Check if they're close (allowing for floating point precision)
    if max_diff < 1e-5:
        print("✓ Embeddings match!")
        return True
    else:
        print("✗ Embeddings differ!")
        return False

def compare_encoder_weights(pytorch_model, gguf_tensors):
    """Compare encoder layer weights."""
    print("\n=== Comparing Encoder Weights ===")
    
    # Compare first layer attention query weights
    layer_idx = 0
    pt_q_weight = pytorch_model.encoder.block[layer_idx].layer[0].SelfAttention.q.weight.detach().numpy()
    gguf_q_weight = gguf_tensors.get(f'enc.blk.{layer_idx}.attn_q.weight')
    
    if gguf_q_weight is None:
        print(f"ERROR: Could not find enc.blk.{layer_idx}.attn_q.weight in GGUF")
        return False
    
    print(f"PyTorch Q weight shape: {pt_q_weight.shape}")
    print(f"GGUF Q weight shape: {gguf_q_weight.shape}")
    
    # GGUF may transpose the weights
    if pt_q_weight.shape != gguf_q_weight.shape:
        if pt_q_weight.shape == gguf_q_weight.T.shape:
            print("Transposing GGUF weight to match PyTorch...")
            gguf_q_weight = gguf_q_weight.T
        else:
            print("ERROR: Shape mismatch!")
            return False
    
    print(f"PyTorch first 5x5:\n{pt_q_weight[:5, :5]}")
    print(f"GGUF first 5x5:\n{gguf_q_weight[:5, :5]}")
    
    diff = np.abs(pt_q_weight - gguf_q_weight)
    max_diff = np.max(diff)
    mean_diff = np.mean(diff)
    
    print(f"\nMax difference: {max_diff:.6e}")
    print(f"Mean difference: {mean_diff:.6e}")
    
    if max_diff < 1e-5:
        print("✓ Weights match!")
        return True
    else:
        print("✗ Weights differ!")
        return False

def main():
    model_path = "./models/umt5-xl"
    gguf_path = "./google-umt5-xl-f32.gguf"
    test_text = "Hello, world! This is a test."
    
    print("="*70)
    print("UMT5-XL GGUF Conversion Verification Test")
    print("="*70)
    
    # Load models
    pytorch_model, tokenizer = load_pytorch_model(model_path)
    reader, gguf_tensors = load_gguf_model(gguf_path)
    
    # Encode text with PyTorch
    pt_output, token_ids = encode_text_pytorch(pytorch_model, tokenizer, test_text)
    
    print(f"\nToken IDs: {token_ids}")
    
    # Compare embeddings
    embeddings_match = compare_embeddings(pytorch_model, gguf_tensors, token_ids)
    
    # Compare encoder weights
    weights_match = compare_encoder_weights(pytorch_model, gguf_tensors)
    
    # Summary
    print("\n" + "="*70)
    print("SUMMARY")
    print("="*70)
    print(f"Embeddings match: {'✓ YES' if embeddings_match else '✗ NO'}")
    print(f"Encoder weights match: {'✓ YES' if weights_match else '✗ NO'}")
    
    if embeddings_match and weights_match:
        print("\n✓ SUCCESS: GGUF conversion verified!")
        return 0
    else:
        print("\n✗ FAILURE: GGUF conversion has issues!")
        return 1

if __name__ == "__main__":
    exit(main())

Register UMT5Model as a supported architecture variant for T5 model
conversion. This allows converting models like google/umt5-xl that use
the UMT5Model architecture class instead of UMT5ForConditionalGeneration.
@levkropp levkropp requested a review from CISC as a code owner November 11, 2025 01:50
@levkropp levkropp closed this by deleting the head repository Nov 11, 2025
@github-actions github-actions bot added the python python script changes label Nov 11, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

python python script changes

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant