Spaces:

n00b001
/

llm-compressor-my-repo

Sleeping

App Files Files Community

llm-compressor-my-repo / test_new_quantization_methods.py

n00b001

save

d95ff5b unverified 12 days ago

raw

history blame contribute delete

2.92 kB

	#!/usr/bin/env python
	"""
	Test script to verify that the new quantization methods work with Qwen2.5-VL architecture
	"""

	from app import get_quantization_recipe
	import torch

	def test_new_quantization_methods():
	"""
	Test the new quantization methods with Qwen2.5-VL architecture.
	"""
	architectures = ["Qwen2_5_VLForConditionalGeneration"]

	# Test all the new quantization methods
	new_methods = ["W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8", "FP8", "SmoothQuant", "SparseGPT"]

	print(f"Testing new quantization methods with architecture: {architectures[0]}")

	for method in new_methods:
	print(f"\nTesting {method} quantization recipe...")
	try:
	if method in ["SmoothQuant", "SparseGPT"] and architectures[0] == "Qwen2_5_VLForConditionalGeneration":
	# These methods don't support Qwen2_5_VLForConditionalGeneration, so they should raise an error
	try:
	recipe = get_quantization_recipe(method, architectures[0])
	print(f"✗ {method} should not be supported for Qwen2.5-VL but it didn't raise an error")
	except ValueError as e:
	print(f"✓ {method} correctly raises error for Qwen2.5-VL: {e}")
	else:
	recipe = get_quantization_recipe(method, architectures[0])
	print(f"✓ {method} recipe created successfully: {recipe}")
	if hasattr(recipe[0], 'scheme'):
	print(f" Scheme: {recipe[0].scheme}")
	if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets:
	print(f" Sequential targets: {recipe[0].sequential_targets}")
	if hasattr(recipe[0], 'ignore'):
	print(f" Ignore layers: {recipe[0].ignore}")
	except ValueError as e:
	if method in ["SmoothQuant", "SparseGPT"]:
	# These are expected to not work with Qwen2.5-VL
	print(f"✓ {method} correctly not supported for Qwen2.5-VL: {e}")
	else:
	print(f"✗ Error with {method}: {e}")
	except Exception as e:
	print(f"✗ Unexpected error with {method}: {e}")

	# Test that Llama models still work with all methods
	print(f"\n\nTesting LlamaForCausalLM compatibility...")
	llama_arch = "LlamaForCausalLM"
	for method in new_methods:
	print(f"Testing {method} with {llama_arch}...")
	try:
	recipe = get_quantization_recipe(method, llama_arch)
	print(f"✓ {method} works with {llama_arch}")
	except Exception as e:
	print(f"✗ {method} failed with {llama_arch}: {e}")

	return True

	if __name__ == "__main__":
	print("Testing new quantization methods...\n")
	test_new_quantization_methods()
	print("\n✓ Testing of new quantization methods completed!")