llm-compressor-my-repo / test_new_quantization_methods.py
n00b001's picture
save
d95ff5b unverified
#!/usr/bin/env python
"""
Test script to verify that the new quantization methods work with Qwen2.5-VL architecture
"""
from app import get_quantization_recipe
import torch
def test_new_quantization_methods():
"""
Test the new quantization methods with Qwen2.5-VL architecture.
"""
architectures = ["Qwen2_5_VLForConditionalGeneration"]
# Test all the new quantization methods
new_methods = ["W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8", "FP8", "SmoothQuant", "SparseGPT"]
print(f"Testing new quantization methods with architecture: {architectures[0]}")
for method in new_methods:
print(f"\nTesting {method} quantization recipe...")
try:
if method in ["SmoothQuant", "SparseGPT"] and architectures[0] == "Qwen2_5_VLForConditionalGeneration":
# These methods don't support Qwen2_5_VLForConditionalGeneration, so they should raise an error
try:
recipe = get_quantization_recipe(method, architectures[0])
print(f"βœ— {method} should not be supported for Qwen2.5-VL but it didn't raise an error")
except ValueError as e:
print(f"βœ“ {method} correctly raises error for Qwen2.5-VL: {e}")
else:
recipe = get_quantization_recipe(method, architectures[0])
print(f"βœ“ {method} recipe created successfully: {recipe}")
if hasattr(recipe[0], 'scheme'):
print(f" Scheme: {recipe[0].scheme}")
if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets:
print(f" Sequential targets: {recipe[0].sequential_targets}")
if hasattr(recipe[0], 'ignore'):
print(f" Ignore layers: {recipe[0].ignore}")
except ValueError as e:
if method in ["SmoothQuant", "SparseGPT"]:
# These are expected to not work with Qwen2.5-VL
print(f"βœ“ {method} correctly not supported for Qwen2.5-VL: {e}")
else:
print(f"βœ— Error with {method}: {e}")
except Exception as e:
print(f"βœ— Unexpected error with {method}: {e}")
# Test that Llama models still work with all methods
print(f"\n\nTesting LlamaForCausalLM compatibility...")
llama_arch = "LlamaForCausalLM"
for method in new_methods:
print(f"Testing {method} with {llama_arch}...")
try:
recipe = get_quantization_recipe(method, llama_arch)
print(f"βœ“ {method} works with {llama_arch}")
except Exception as e:
print(f"βœ— {method} failed with {llama_arch}: {e}")
return True
if __name__ == "__main__":
print("Testing new quantization methods...\n")
test_new_quantization_methods()
print("\nβœ“ Testing of new quantization methods completed!")