Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python | |
| """ | |
| Test script to verify that the new quantization methods work with Qwen2.5-VL architecture | |
| """ | |
| from app import get_quantization_recipe | |
| import torch | |
| def test_new_quantization_methods(): | |
| """ | |
| Test the new quantization methods with Qwen2.5-VL architecture. | |
| """ | |
| architectures = ["Qwen2_5_VLForConditionalGeneration"] | |
| # Test all the new quantization methods | |
| new_methods = ["W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8", "FP8", "SmoothQuant", "SparseGPT"] | |
| print(f"Testing new quantization methods with architecture: {architectures[0]}") | |
| for method in new_methods: | |
| print(f"\nTesting {method} quantization recipe...") | |
| try: | |
| if method in ["SmoothQuant", "SparseGPT"] and architectures[0] == "Qwen2_5_VLForConditionalGeneration": | |
| # These methods don't support Qwen2_5_VLForConditionalGeneration, so they should raise an error | |
| try: | |
| recipe = get_quantization_recipe(method, architectures[0]) | |
| print(f"β {method} should not be supported for Qwen2.5-VL but it didn't raise an error") | |
| except ValueError as e: | |
| print(f"β {method} correctly raises error for Qwen2.5-VL: {e}") | |
| else: | |
| recipe = get_quantization_recipe(method, architectures[0]) | |
| print(f"β {method} recipe created successfully: {recipe}") | |
| if hasattr(recipe[0], 'scheme'): | |
| print(f" Scheme: {recipe[0].scheme}") | |
| if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets: | |
| print(f" Sequential targets: {recipe[0].sequential_targets}") | |
| if hasattr(recipe[0], 'ignore'): | |
| print(f" Ignore layers: {recipe[0].ignore}") | |
| except ValueError as e: | |
| if method in ["SmoothQuant", "SparseGPT"]: | |
| # These are expected to not work with Qwen2.5-VL | |
| print(f"β {method} correctly not supported for Qwen2.5-VL: {e}") | |
| else: | |
| print(f"β Error with {method}: {e}") | |
| except Exception as e: | |
| print(f"β Unexpected error with {method}: {e}") | |
| # Test that Llama models still work with all methods | |
| print(f"\n\nTesting LlamaForCausalLM compatibility...") | |
| llama_arch = "LlamaForCausalLM" | |
| for method in new_methods: | |
| print(f"Testing {method} with {llama_arch}...") | |
| try: | |
| recipe = get_quantization_recipe(method, llama_arch) | |
| print(f"β {method} works with {llama_arch}") | |
| except Exception as e: | |
| print(f"β {method} failed with {llama_arch}: {e}") | |
| return True | |
| if __name__ == "__main__": | |
| print("Testing new quantization methods...\n") | |
| test_new_quantization_methods() | |
| print("\nβ Testing of new quantization methods completed!") |