#!/usr/bin/env python """ Test script to verify that the Qwen2.5-VL architecture detection and quantization recipe work correctly """ from transformers import AutoConfig from app import determine_model_class, get_quantization_recipe import torch def test_qwen2_5_vl_detection(): """ Test to see if we can properly detect the Qwen2.5-VL architecture. We'll use a known Qwen2.5-VL model ID to test the detection. """ # For testing purposes, use a known Qwen2.5-VL model ID model_id = "Qwen/Qwen2.5-VL-7B-Instruct" # Use a known Qwen2.5-VL model # Simulate the architecture string that would come from the model config # In the real scenario, this comes from model.config.architectures[0] architectures = ["Qwen2_5_VLForConditionalGeneration"] print(f"Testing architecture detection for: {model_id}") print(f"Architectures found: {architectures}") try: # Test if our recipe function can handle this architecture for method in ["GPTQ", "AWQ", "FP8"]: print(f"\nTesting {method} quantization recipe...") recipe = get_quantization_recipe(method, architectures[0]) print(f"{method} recipe created successfully: {recipe}") print(f"Sequential targets: {[mod.sequential_targets if hasattr(mod, 'sequential_targets') else 'N/A' for mod in recipe]}") print(f"Ignore layers: {[mod.ignore for mod in recipe if hasattr(mod, 'ignore')]}") print("\n✓ All quantization methods work with Qwen2_5_VLForConditionalGeneration architecture") except Exception as e: print(f"\n✗ Error creating quantization recipe: {e}") import traceback traceback.print_exc() return False return True def test_manual_model_class_detection(): """ Test the manual model class detection in the app. """ print("\nTesting manual model class detection...") manual_model_type = "Qwen2_5_VLForConditionalGeneration (Qwen2.5-VL)" try: model_class = determine_model_class("test", "dummy_token", manual_model_type) print(f"Manual detection returned: {model_class}") print("✓ Manual model class detection works") return True except Exception as e: print(f"✗ Error in manual detection: {e}") import traceback traceback.print_exc() return False if __name__ == "__main__": print("Testing Qwen2.5-VL architecture detection and quantization support...\n") success1 = test_qwen2_5_vl_detection() success2 = test_manual_model_class_detection() if success1 and success2: print("\n✓ All tests passed! Qwen2.5-VL models should now be properly supported.") else: print("\n✗ Some tests failed. Please check the implementation.")