rahul7star commited on
Commit
b219c38
Β·
verified Β·
1 Parent(s): 470d32a

Update app_quant.py

Browse files
Files changed (1) hide show
  1. app_quant.py +46 -49
app_quant.py CHANGED
@@ -3,10 +3,9 @@ import spaces
3
  import gradio as gr
4
  import sys
5
  import platform
6
- import os
7
  import diffusers
8
  import transformers
9
-
10
 
11
  from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
12
  from diffusers import ZImagePipeline, AutoModel
@@ -16,7 +15,6 @@ from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
16
  # LOGGING BUFFER
17
  # ============================================================
18
  LOGS = ""
19
-
20
  def log(msg):
21
  global LOGS
22
  print(msg)
@@ -27,7 +25,7 @@ def log(msg):
27
  # ENVIRONMENT INFO
28
  # ============================================================
29
  log("===================================================")
30
- log("πŸ” Z-IMAGE-TURBO DEBUGGING + DETAILED TRANSFORMER INSIGHTS")
31
  log("===================================================\n")
32
 
33
  log(f"πŸ“Œ PYTHON VERSION : {sys.version.replace(chr(10), ' ')}")
@@ -35,7 +33,6 @@ log(f"πŸ“Œ PLATFORM : {platform.platform()}")
35
  log(f"πŸ“Œ TORCH VERSION : {torch.__version__}")
36
  log(f"πŸ“Œ TRANSFORMERS VERSION : {transformers.__version__}")
37
  log(f"πŸ“Œ DIFFUSERS VERSION : {diffusers.__version__}")
38
-
39
  log(f"πŸ“Œ CUDA AVAILABLE : {torch.cuda.is_available()}")
40
 
41
  if torch.cuda.is_available():
@@ -65,6 +62,45 @@ log(f"Model Cache Directory : {model_cache}")
65
  log(f"torch_dtype : {torch_dtype}")
66
  log(f"USE_CPU_OFFLOAD : {USE_CPU_OFFLOAD}")
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  # ============================================================
69
  # LOAD TRANSFORMER BLOCK
70
  # ============================================================
@@ -75,11 +111,10 @@ log("===================================================")
75
  quantization_config = DiffusersBitsAndBytesConfig(
76
  load_in_4bit=True,
77
  bnb_4bit_quant_type="nf4",
78
- bnb_4bit_compute_dtype=torch.bfloat16,
79
  bnb_4bit_use_double_quant=True,
80
  llm_int8_skip_modules=["transformer_blocks.0.img_mod"],
81
  )
82
-
83
  log("4-bit Quantization Config (Transformer):")
84
  log(str(quantization_config))
85
 
@@ -92,25 +127,7 @@ transformer = AutoModel.from_pretrained(
92
  device_map=device,
93
  )
94
  log("βœ… Transformer block loaded successfully.")
95
-
96
- # ------------------------------------------------------------
97
- # TRANSFORMER INSIGHTS
98
- # ------------------------------------------------------------
99
- log("πŸ” Transformer Architecture Details:")
100
- log(f"Number of Transformer Modules : {len(transformer.transformer_blocks)}")
101
- for i, block in enumerate(transformer.transformer_blocks):
102
- log(f" Block {i}: {block.__class__.__name__}")
103
- # Log attention type if possible
104
- attn_type = getattr(block, "attn", None)
105
- if attn_type:
106
- log(f" Attention: {attn_type.__class__.__name__}")
107
- # Check for FlashAttention usage if attribute exists
108
- flash_enabled = getattr(attn_type, "flash", None)
109
- log(f" FlashAttention Enabled? : {flash_enabled}")
110
- log(f"Hidden size: {transformer.config.hidden_size}")
111
- log(f"Number of attention heads: {transformer.config.num_attention_heads}")
112
- log(f"Number of layers: {transformer.config.num_hidden_layers}")
113
- log(f"Intermediate size: {transformer.config.intermediate_size}")
114
 
115
  if USE_CPU_OFFLOAD:
116
  transformer = transformer.to("cpu")
@@ -125,10 +142,9 @@ log("===================================================")
125
  quantization_config = TransformersBitsAndBytesConfig(
126
  load_in_4bit=True,
127
  bnb_4bit_quant_type="nf4",
128
- bnb_4bit_compute_dtype=torch.bfloat16,
129
  bnb_4bit_use_double_quant=True,
130
  )
131
-
132
  log("4-bit Quantization Config (Text Encoder):")
133
  log(str(quantization_config))
134
 
@@ -141,23 +157,7 @@ text_encoder = AutoModel.from_pretrained(
141
  device_map=device,
142
  )
143
  log("βœ… Text encoder loaded successfully.")
144
-
145
- # ------------------------------------------------------------
146
- # TEXT ENCODER INSIGHTS
147
- # ------------------------------------------------------------
148
- log("πŸ” Text Encoder Architecture Details:")
149
- log(f"Number of Transformer Modules : {len(text_encoder.transformer_blocks)}")
150
- for i, block in enumerate(text_encoder.transformer_blocks):
151
- log(f" Block {i}: {block.__class__.__name__}")
152
- attn_type = getattr(block, "attn", None)
153
- if attn_type:
154
- log(f" Attention: {attn_type.__class__.__name__}")
155
- flash_enabled = getattr(attn_type, "flash", None)
156
- log(f" FlashAttention Enabled? : {flash_enabled}")
157
- log(f"Hidden size: {text_encoder.config.hidden_size}")
158
- log(f"Number of attention heads: {text_encoder.config.num_attention_heads}")
159
- log(f"Number of layers: {text_encoder.config.num_hidden_layers}")
160
- log(f"Intermediate size: {text_encoder.config.intermediate_size}")
161
 
162
  if USE_CPU_OFFLOAD:
163
  text_encoder = text_encoder.to("cpu")
@@ -191,12 +191,10 @@ log("βœ… Pipeline ready.")
191
  @spaces.GPU
192
  def generate_image(prompt, height, width, steps, seed):
193
  global LOGS
194
- LOGS = "" # Reset logs for this run
195
-
196
  log("===================================================")
197
  log("🎨 RUNNING INFERENCE")
198
  log("===================================================")
199
-
200
  log(f"Prompt : {prompt}")
201
  log(f"Resolution : {width} x {height}")
202
  log(f"Steps : {steps}")
@@ -212,7 +210,6 @@ def generate_image(prompt, height, width, steps, seed):
212
  guidance_scale=0.0,
213
  generator=generator,
214
  )
215
-
216
  log("βœ… Inference Finished")
217
  return out.images[0], LOGS
218
 
 
3
  import gradio as gr
4
  import sys
5
  import platform
 
6
  import diffusers
7
  import transformers
8
+ import os
9
 
10
  from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
11
  from diffusers import ZImagePipeline, AutoModel
 
15
  # LOGGING BUFFER
16
  # ============================================================
17
  LOGS = ""
 
18
  def log(msg):
19
  global LOGS
20
  print(msg)
 
25
  # ENVIRONMENT INFO
26
  # ============================================================
27
  log("===================================================")
28
+ log("πŸ” Z-IMAGE-TURBO DEBUGGING + ROBUST TRANSFORMER INSPECTION")
29
  log("===================================================\n")
30
 
31
  log(f"πŸ“Œ PYTHON VERSION : {sys.version.replace(chr(10), ' ')}")
 
33
  log(f"πŸ“Œ TORCH VERSION : {torch.__version__}")
34
  log(f"πŸ“Œ TRANSFORMERS VERSION : {transformers.__version__}")
35
  log(f"πŸ“Œ DIFFUSERS VERSION : {diffusers.__version__}")
 
36
  log(f"πŸ“Œ CUDA AVAILABLE : {torch.cuda.is_available()}")
37
 
38
  if torch.cuda.is_available():
 
62
  log(f"torch_dtype : {torch_dtype}")
63
  log(f"USE_CPU_OFFLOAD : {USE_CPU_OFFLOAD}")
64
 
65
+ # ============================================================
66
+ # ROBUST TRANSFORMER INSPECTION FUNCTION
67
+ # ============================================================
68
+ def inspect_transformer(model, model_name="Transformer"):
69
+ log(f"\nπŸ” {model_name} Architecture Details:")
70
+ try:
71
+ block_attrs = ["transformer_blocks", "blocks", "layers", "encoder_blocks", "model"]
72
+ blocks = None
73
+ for attr in block_attrs:
74
+ blocks = getattr(model, attr, None)
75
+ if blocks is not None:
76
+ break
77
+
78
+ if blocks is None:
79
+ log(f"⚠️ Could not find transformer blocks in {model_name}, skipping detailed block info")
80
+ else:
81
+ try:
82
+ log(f"Number of Transformer Modules : {len(blocks)}")
83
+ for i, block in enumerate(blocks):
84
+ log(f" Block {i}: {block.__class__.__name__}")
85
+ attn_type = getattr(block, "attn", None)
86
+ if attn_type:
87
+ log(f" Attention: {attn_type.__class__.__name__}")
88
+ flash_enabled = getattr(attn_type, "flash", None)
89
+ log(f" FlashAttention Enabled? : {flash_enabled}")
90
+ except Exception as e:
91
+ log(f"⚠️ Error inspecting blocks: {e}")
92
+
93
+ config = getattr(model, "config", None)
94
+ if config:
95
+ log(f"Hidden size: {getattr(config, 'hidden_size', 'N/A')}")
96
+ log(f"Number of attention heads: {getattr(config, 'num_attention_heads', 'N/A')}")
97
+ log(f"Number of layers: {getattr(config, 'num_hidden_layers', 'N/A')}")
98
+ log(f"Intermediate size: {getattr(config, 'intermediate_size', 'N/A')}")
99
+ else:
100
+ log(f"⚠️ No config attribute found in {model_name}")
101
+ except Exception as e:
102
+ log(f"⚠️ Failed to inspect {model_name}: {e}")
103
+
104
  # ============================================================
105
  # LOAD TRANSFORMER BLOCK
106
  # ============================================================
 
111
  quantization_config = DiffusersBitsAndBytesConfig(
112
  load_in_4bit=True,
113
  bnb_4bit_quant_type="nf4",
114
+ bnb_4bit_compute_dtype=torch_dtype,
115
  bnb_4bit_use_double_quant=True,
116
  llm_int8_skip_modules=["transformer_blocks.0.img_mod"],
117
  )
 
118
  log("4-bit Quantization Config (Transformer):")
119
  log(str(quantization_config))
120
 
 
127
  device_map=device,
128
  )
129
  log("βœ… Transformer block loaded successfully.")
130
+ inspect_transformer(transformer, "Transformer")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  if USE_CPU_OFFLOAD:
133
  transformer = transformer.to("cpu")
 
142
  quantization_config = TransformersBitsAndBytesConfig(
143
  load_in_4bit=True,
144
  bnb_4bit_quant_type="nf4",
145
+ bnb_4bit_compute_dtype=torch_dtype,
146
  bnb_4bit_use_double_quant=True,
147
  )
 
148
  log("4-bit Quantization Config (Text Encoder):")
149
  log(str(quantization_config))
150
 
 
157
  device_map=device,
158
  )
159
  log("βœ… Text encoder loaded successfully.")
160
+ inspect_transformer(text_encoder, "Text Encoder")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
  if USE_CPU_OFFLOAD:
163
  text_encoder = text_encoder.to("cpu")
 
191
  @spaces.GPU
192
  def generate_image(prompt, height, width, steps, seed):
193
  global LOGS
194
+ LOGS = "" # reset logs
 
195
  log("===================================================")
196
  log("🎨 RUNNING INFERENCE")
197
  log("===================================================")
 
198
  log(f"Prompt : {prompt}")
199
  log(f"Resolution : {width} x {height}")
200
  log(f"Steps : {steps}")
 
210
  guidance_scale=0.0,
211
  generator=generator,
212
  )
 
213
  log("βœ… Inference Finished")
214
  return out.images[0], LOGS
215