#!/usr/bin/env python """ Test script to verify that the new quantization methods work with Qwen2.5-VL architecture """ from app import get_quantization_recipe import torch def test_new_quantization_methods(): """ Test the new quantization methods with Qwen2.5-VL architecture. """ architectures = ["Qwen2_5_VLForConditionalGeneration"] # Test all the new quantization methods new_methods = ["W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8", "FP8", "SmoothQuant", "SparseGPT"] print(f"Testing new quantization methods with architecture: {architectures[0]}") for method in new_methods: print(f"\nTesting {method} quantization recipe...") try: if method in ["SmoothQuant", "SparseGPT"] and architectures[0] == "Qwen2_5_VLForConditionalGeneration": # These methods don't support Qwen2_5_VLForConditionalGeneration, so they should raise an error try: recipe = get_quantization_recipe(method, architectures[0]) print(f"✗ {method} should not be supported for Qwen2.5-VL but it didn't raise an error") except ValueError as e: print(f"✓ {method} correctly raises error for Qwen2.5-VL: {e}") else: recipe = get_quantization_recipe(method, architectures[0]) print(f"✓ {method} recipe created successfully: {recipe}") if hasattr(recipe[0], 'scheme'): print(f" Scheme: {recipe[0].scheme}") if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets: print(f" Sequential targets: {recipe[0].sequential_targets}") if hasattr(recipe[0], 'ignore'): print(f" Ignore layers: {recipe[0].ignore}") except ValueError as e: if method in ["SmoothQuant", "SparseGPT"]: # These are expected to not work with Qwen2.5-VL print(f"✓ {method} correctly not supported for Qwen2.5-VL: {e}") else: print(f"✗ Error with {method}: {e}") except Exception as e: print(f"✗ Unexpected error with {method}: {e}") # Test that Llama models still work with all methods print(f"\n\nTesting LlamaForCausalLM compatibility...") llama_arch = "LlamaForCausalLM" for method in new_methods: print(f"Testing {method} with {llama_arch}...") try: recipe = get_quantization_recipe(method, llama_arch) print(f"✓ {method} works with {llama_arch}") except Exception as e: print(f"✗ {method} failed with {llama_arch}: {e}") return True if __name__ == "__main__": print("Testing new quantization methods...\n") test_new_quantization_methods() print("\n✓ Testing of new quantization methods completed!")