File size: 2,915 Bytes
d95ff5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python
"""
Test script to verify that the new quantization methods work with Qwen2.5-VL architecture
"""

from app import get_quantization_recipe
import torch

def test_new_quantization_methods():
    """
    Test the new quantization methods with Qwen2.5-VL architecture.
    """
    architectures = ["Qwen2_5_VLForConditionalGeneration"]
    
    # Test all the new quantization methods
    new_methods = ["W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8", "FP8", "SmoothQuant", "SparseGPT"]
    
    print(f"Testing new quantization methods with architecture: {architectures[0]}")
    
    for method in new_methods:
        print(f"\nTesting {method} quantization recipe...")
        try:
            if method in ["SmoothQuant", "SparseGPT"] and architectures[0] == "Qwen2_5_VLForConditionalGeneration":
                # These methods don't support Qwen2_5_VLForConditionalGeneration, so they should raise an error
                try:
                    recipe = get_quantization_recipe(method, architectures[0])
                    print(f"βœ— {method} should not be supported for Qwen2.5-VL but it didn't raise an error")
                except ValueError as e:
                    print(f"βœ“ {method} correctly raises error for Qwen2.5-VL: {e}")
            else:
                recipe = get_quantization_recipe(method, architectures[0])
                print(f"βœ“ {method} recipe created successfully: {recipe}")
                if hasattr(recipe[0], 'scheme'):
                    print(f"  Scheme: {recipe[0].scheme}")
                if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets:
                    print(f"  Sequential targets: {recipe[0].sequential_targets}")
                if hasattr(recipe[0], 'ignore'):
                    print(f"  Ignore layers: {recipe[0].ignore}")
        except ValueError as e:
            if method in ["SmoothQuant", "SparseGPT"]:
                # These are expected to not work with Qwen2.5-VL
                print(f"βœ“ {method} correctly not supported for Qwen2.5-VL: {e}")
            else:
                print(f"βœ— Error with {method}: {e}")
        except Exception as e:
            print(f"βœ— Unexpected error with {method}: {e}")
    
    # Test that Llama models still work with all methods
    print(f"\n\nTesting LlamaForCausalLM compatibility...")
    llama_arch = "LlamaForCausalLM"
    for method in new_methods:
        print(f"Testing {method} with {llama_arch}...")
        try:
            recipe = get_quantization_recipe(method, llama_arch)
            print(f"βœ“ {method} works with {llama_arch}")
        except Exception as e:
            print(f"βœ— {method} failed with {llama_arch}: {e}")
    
    return True

if __name__ == "__main__":
    print("Testing new quantization methods...\n")
    test_new_quantization_methods()
    print("\nβœ“ Testing of new quantization methods completed!")