Spaces:

n00b001
/

llm-compressor-my-repo

Sleeping

File size: 2,915 Bytes

d95ff5b

#!/usr/bin/env python
"""
Test script to verify that the new quantization methods work with Qwen2.5-VL architecture
"""

from app import get_quantization_recipe
import torch

def test_new_quantization_methods():
    """
    Test the new quantization methods with Qwen2.5-VL architecture.
    """
    architectures = ["Qwen2_5_VLForConditionalGeneration"]
    
    # Test all the new quantization methods
    new_methods = ["W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8", "FP8", "SmoothQuant", "SparseGPT"]
    
    print(f"Testing new quantization methods with architecture: {architectures[0]}")
    
    for method in new_methods:
        print(f"\nTesting {method} quantization recipe...")
        try:
            if method in ["SmoothQuant", "SparseGPT"] and architectures[0] == "Qwen2_5_VLForConditionalGeneration":
                # These methods don't support Qwen2_5_VLForConditionalGeneration, so they should raise an error
                try:
                    recipe = get_quantization_recipe(method, architectures[0])
                    print(f"✗ {method} should not be supported for Qwen2.5-VL but it didn't raise an error")
                except ValueError as e:
                    print(f"✓ {method} correctly raises error for Qwen2.5-VL: {e}")
            else:
                recipe = get_quantization_recipe(method, architectures[0])
                print(f"✓ {method} recipe created successfully: {recipe}")
                if hasattr(recipe[0], 'scheme'):
                    print(f"  Scheme: {recipe[0].scheme}")
                if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets:
                    print(f"  Sequential targets: {recipe[0].sequential_targets}")
                if hasattr(recipe[0], 'ignore'):
                    print(f"  Ignore layers: {recipe[0].ignore}")
        except ValueError as e:
            if method in ["SmoothQuant", "SparseGPT"]:
                # These are expected to not work with Qwen2.5-VL
                print(f"✓ {method} correctly not supported for Qwen2.5-VL: {e}")
            else:
                print(f"✗ Error with {method}: {e}")
        except Exception as e:
            print(f"✗ Unexpected error with {method}: {e}")
    
    # Test that Llama models still work with all methods
    print(f"\n\nTesting LlamaForCausalLM compatibility...")
    llama_arch = "LlamaForCausalLM"
    for method in new_methods:
        print(f"Testing {method} with {llama_arch}...")
        try:
            recipe = get_quantization_recipe(method, llama_arch)
            print(f"✓ {method} works with {llama_arch}")
        except Exception as e:
            print(f"✗ {method} failed with {llama_arch}: {e}")
    
    return True

if __name__ == "__main__":
    print("Testing new quantization methods...\n")
    test_new_quantization_methods()
    print("\n✓ Testing of new quantization methods completed!")