#!/usr/bin/env python """ Final test to confirm the original issue is resolved: GPTQ quantization is not supported for Qwen2_5_VLForConditionalGeneration architecture """ from app import get_quantization_recipe def test_original_issue_fixed(): """ Test to confirm the original error is fixed. The original error was: GPTQ quantization is not supported for Qwen2_5_VLForConditionalGeneration architecture """ print("Testing the original issue that was reported...") print("Original error: GPTQ quantization is not supported for Qwen2_5_VLForConditionalGeneration architecture") print() # Test the original problematic case try: recipe = get_quantization_recipe("GPTQ", "Qwen2_5_VLForConditionalGeneration") print("✓ GPTQ quantization recipe created successfully for Qwen2_5_VLForConditionalGeneration") print(f" Recipe: {recipe}") if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets: print(f" Uses sequential onloading: {recipe[0].sequential_targets}") print(f" Ignores visual components: {recipe[0].ignore}") success_gptq = True except Exception as e: print(f"✗ GPTQ still fails: {e}") success_gptq = False print() # Test other methods that were also problematic other_methods = ["AWQ", "FP8"] success_others = True for method in other_methods: try: recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration") print(f"✓ {method} quantization recipe created successfully for Qwen2_5_VLForConditionalGeneration") if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets: print(f" Uses sequential onloading: {recipe[0].sequential_targets}") success_others = success_others and True except Exception as e: print(f"✗ {method} still fails: {e}") success_others = False print() # Test new methods for Qwen2.5-VL new_methods = ["W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8"] success_new = True for method in new_methods: try: recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration") print(f"✓ {method} quantization recipe created successfully for Qwen2_5_VLForConditionalGeneration") success_new = success_new and True except Exception as e: print(f"✗ {method} fails: {e}") success_new = False print() if success_gptq and success_others and success_new: print("🎉 SUCCESS: The original issue has been completely resolved!") print(" - GPTQ now works for Qwen2_5_VLForConditionalGeneration") print(" - AWQ now works for Qwen2_5_VLForConditionalGeneration") print(" - FP8 now works for Qwen2_5_VLForConditionalGeneration") print(" - New methods (W4A16, W8A16, W8A8_INT8, W8A8_FP8) also work!") print(" - Sequential onloading is used for memory efficiency") print(" - Visual components are properly ignored during quantization") return True else: print("❌ FAILURE: Some issues remain") return False def test_specific_model(): """ Test with the specific model mentioned: huihui-ai/Huihui-Fara-7B-abliterated """ print("\n" + "="*60) print("Testing with the specific model: huihui-ai/Huihui-Fara-7B-abliterated") print("(This model has architecture: Qwen2_5_VLForConditionalGeneration)") print("="*60) # All the methods that should now work for this model methods = ["GPTQ", "AWQ", "FP8", "W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8"] success = True for method in methods: try: recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration") print(f"✓ {method}: OK") except Exception as e: print(f"✗ {method}: FAILED - {e}") success = False if success: print(f"\n🎉 All {len(methods)} quantization methods now work for the target model!") print("Users can now quantize huihui-ai/Huihui-Fara-7B-abliterated with any of these methods.") else: print("\n❌ Some methods still don't work for the target model.") return success if __name__ == "__main__": print("Testing resolution of the original quantization issue...\n") issue_fixed = test_original_issue_fixed() model_specific = test_specific_model() print("\n" + "="*60) if issue_fixed and model_specific: print("✅ ALL TESTS PASSED - The issue is completely resolved!") print("\nThe Hugging Face Space now supports:") print(" • All original methods: GPTQ, AWQ, FP8") print(" • New methods: W4A16, W8A16, W8A8_INT8, W8A8_FP8") print(" • Sequential onloading for memory efficiency") print(" • Proper handling of Qwen2.5-VL visual components") print(" • All methods work with Qwen2_5_VLForConditionalGeneration models") else: print("❌ SOME TESTS FAILED - Issue may not be completely resolved") print("="*60)