rahul7star commited on
Commit
85d3b9b
Β·
verified Β·
1 Parent(s): 8349cf7

Update app_quant.py

Browse files
Files changed (1) hide show
  1. app_quant.py +138 -32
app_quant.py CHANGED
@@ -1,33 +1,77 @@
1
  import torch
2
  import spaces
3
  import gradio as gr
 
 
 
 
 
 
4
 
5
  from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
6
  from diffusers import ZImagePipeline, AutoModel
7
  from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
8
 
9
  # ============================================================
10
- # Model Settings
11
  # ============================================================
12
- model_cache = "./weights/"
13
- model_id = "Tongyi-MAI/Z-Image-Turbo"
14
- torch_dtype = torch.bfloat16
15
- USE_CPU_OFFLOAD = False
 
 
 
16
 
17
  # ============================================================
18
- # GPU Check
19
  # ============================================================
 
 
 
 
 
 
 
 
 
 
 
 
20
  if torch.cuda.is_available():
21
- print(f"INFO: CUDA available: {torch.cuda.get_device_name(0)} (count={torch.cuda.device_count()})")
22
- device = "cuda:0"
23
- gpu_id = 0
 
24
  else:
25
- raise RuntimeError("ERROR: CUDA not available. This program requires a CUDA-enabled GPU.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  # ============================================================
28
- # Load Transformer
29
  # ============================================================
30
- print("INFO: Loading transformer block ...")
 
 
 
31
  quantization_config = DiffusersBitsAndBytesConfig(
32
  load_in_4bit=True,
33
  bnb_4bit_quant_type="nf4",
@@ -35,6 +79,10 @@ quantization_config = DiffusersBitsAndBytesConfig(
35
  bnb_4bit_use_double_quant=True,
36
  llm_int8_skip_modules=["transformer_blocks.0.img_mod"],
37
  )
 
 
 
 
38
  transformer = AutoModel.from_pretrained(
39
  model_id,
40
  cache_dir=model_cache,
@@ -43,21 +91,47 @@ transformer = AutoModel.from_pretrained(
43
  torch_dtype=torch_dtype,
44
  device_map=device,
45
  )
46
- print("INFO: Transformer block loaded.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  if USE_CPU_OFFLOAD:
49
  transformer = transformer.to("cpu")
50
 
51
  # ============================================================
52
- # Load Text Encoder
53
  # ============================================================
54
- print("INFO: Loading text encoder ...")
 
 
 
55
  quantization_config = TransformersBitsAndBytesConfig(
56
  load_in_4bit=True,
57
  bnb_4bit_quant_type="nf4",
58
  bnb_4bit_compute_dtype=torch.bfloat16,
59
  bnb_4bit_use_double_quant=True,
60
  )
 
 
 
 
61
  text_encoder = AutoModel.from_pretrained(
62
  model_id,
63
  cache_dir=model_cache,
@@ -66,15 +140,35 @@ text_encoder = AutoModel.from_pretrained(
66
  torch_dtype=torch_dtype,
67
  device_map=device,
68
  )
69
- print("INFO: Text encoder loaded.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  if USE_CPU_OFFLOAD:
72
  text_encoder = text_encoder.to("cpu")
73
 
74
  # ============================================================
75
- # Build Pipeline
76
  # ============================================================
77
- print("INFO: Building pipeline ...")
 
 
 
78
  pipe = ZImagePipeline.from_pretrained(
79
  model_id,
80
  transformer=transformer,
@@ -84,19 +178,33 @@ pipe = ZImagePipeline.from_pretrained(
84
 
85
  if USE_CPU_OFFLOAD:
86
  pipe.enable_model_cpu_offload(gpu_id=gpu_id)
87
- print("INFO: CPU offload active")
88
  else:
89
  pipe.to(device)
90
- print("INFO: Pipeline to GPU")
 
 
91
 
92
  # ============================================================
93
- # Inference Function for Gradio
94
  # ============================================================
95
  @spaces.GPU
96
  def generate_image(prompt, height, width, steps, seed):
 
 
 
 
 
 
 
 
 
 
 
 
97
  generator = torch.Generator(device).manual_seed(seed)
98
-
99
- output = pipe(
100
  prompt=prompt,
101
  height=height,
102
  width=width,
@@ -105,14 +213,14 @@ def generate_image(prompt, height, width, steps, seed):
105
  generator=generator,
106
  )
107
 
108
- return output.images[0]
109
-
110
 
111
  # ============================================================
112
- # Gradio UI
113
  # ============================================================
114
  with gr.Blocks(title="Z-Image-Turbo Generator") as demo:
115
- gr.Markdown("# **Z-Image-Turbo β€” 4bit Quantized Image Generator**")
116
 
117
  with gr.Row():
118
  with gr.Column(scale=1):
@@ -123,17 +231,15 @@ with gr.Blocks(title="Z-Image-Turbo Generator") as demo:
123
  seed = gr.Slider(0, 999999, value=42, step=1, label="Seed")
124
 
125
  btn = gr.Button("Generate", variant="primary")
126
-
127
  with gr.Column(scale=1):
128
  output_image = gr.Image(label="Output Image")
 
129
 
130
  btn.click(
131
  generate_image,
132
  inputs=[prompt, height, width, steps, seed],
133
- outputs=[output_image],
134
  )
135
 
136
- # ============================================================
137
- # Launch
138
- # ============================================================
139
  demo.launch()
 
1
  import torch
2
  import spaces
3
  import gradio as gr
4
+ import sys
5
+ import platform
6
+ import os
7
+ import diffusers
8
+ import transformers
9
+ import peft
10
 
11
  from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
12
  from diffusers import ZImagePipeline, AutoModel
13
  from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
14
 
15
  # ============================================================
16
+ # LOGGING BUFFER
17
  # ============================================================
18
+ LOGS = ""
19
+
20
+ def log(msg):
21
+ global LOGS
22
+ print(msg)
23
+ LOGS += msg + "\n"
24
+ return msg
25
 
26
  # ============================================================
27
+ # ENVIRONMENT INFO
28
  # ============================================================
29
+ log("===================================================")
30
+ log("πŸ” Z-IMAGE-TURBO DEBUGGING + DETAILED TRANSFORMER INSIGHTS")
31
+ log("===================================================\n")
32
+
33
+ log(f"πŸ“Œ PYTHON VERSION : {sys.version.replace(chr(10), ' ')}")
34
+ log(f"πŸ“Œ PLATFORM : {platform.platform()}")
35
+ log(f"πŸ“Œ TORCH VERSION : {torch.__version__}")
36
+ log(f"πŸ“Œ TRANSFORMERS VERSION : {transformers.__version__}")
37
+ log(f"πŸ“Œ DIFFUSERS VERSION : {diffusers.__version__}")
38
+ log(f"πŸ“Œ PEFT VERSION : {peft.__version__}")
39
+ log(f"πŸ“Œ CUDA AVAILABLE : {torch.cuda.is_available()}")
40
+
41
  if torch.cuda.is_available():
42
+ log(f"πŸ“Œ GPU NAME : {torch.cuda.get_device_name(0)}")
43
+ log(f"πŸ“Œ GPU CAPABILITY : {torch.cuda.get_device_capability(0)}")
44
+ log(f"πŸ“Œ GPU MEMORY (TOTAL) : {torch.cuda.get_device_properties(0).total_memory/1e9:.2f} GB")
45
+ log(f"πŸ“Œ FLASH ATTENTION : {torch.backends.cuda.flash_sdp_enabled()}")
46
  else:
47
+ raise RuntimeError("❌ CUDA is REQUIRED but not available.")
48
+
49
+ device = "cuda"
50
+ gpu_id = 0
51
+
52
+ # ============================================================
53
+ # MODEL SETTINGS
54
+ # ============================================================
55
+ model_cache = "./weights/"
56
+ model_id = "Tongyi-MAI/Z-Image-Turbo"
57
+ torch_dtype = torch.bfloat16
58
+ USE_CPU_OFFLOAD = False
59
+
60
+ log("\n===================================================")
61
+ log("🧠 MODEL CONFIGURATION")
62
+ log("===================================================")
63
+ log(f"Model ID : {model_id}")
64
+ log(f"Model Cache Directory : {model_cache}")
65
+ log(f"torch_dtype : {torch_dtype}")
66
+ log(f"USE_CPU_OFFLOAD : {USE_CPU_OFFLOAD}")
67
 
68
  # ============================================================
69
+ # LOAD TRANSFORMER BLOCK
70
  # ============================================================
71
+ log("\n===================================================")
72
+ log("πŸ”§ LOADING TRANSFORMER BLOCK")
73
+ log("===================================================")
74
+
75
  quantization_config = DiffusersBitsAndBytesConfig(
76
  load_in_4bit=True,
77
  bnb_4bit_quant_type="nf4",
 
79
  bnb_4bit_use_double_quant=True,
80
  llm_int8_skip_modules=["transformer_blocks.0.img_mod"],
81
  )
82
+
83
+ log("4-bit Quantization Config (Transformer):")
84
+ log(str(quantization_config))
85
+
86
  transformer = AutoModel.from_pretrained(
87
  model_id,
88
  cache_dir=model_cache,
 
91
  torch_dtype=torch_dtype,
92
  device_map=device,
93
  )
94
+ log("βœ… Transformer block loaded successfully.")
95
+
96
+ # ------------------------------------------------------------
97
+ # TRANSFORMER INSIGHTS
98
+ # ------------------------------------------------------------
99
+ log("πŸ” Transformer Architecture Details:")
100
+ log(f"Number of Transformer Modules : {len(transformer.transformer_blocks)}")
101
+ for i, block in enumerate(transformer.transformer_blocks):
102
+ log(f" Block {i}: {block.__class__.__name__}")
103
+ # Log attention type if possible
104
+ attn_type = getattr(block, "attn", None)
105
+ if attn_type:
106
+ log(f" Attention: {attn_type.__class__.__name__}")
107
+ # Check for FlashAttention usage if attribute exists
108
+ flash_enabled = getattr(attn_type, "flash", None)
109
+ log(f" FlashAttention Enabled? : {flash_enabled}")
110
+ log(f"Hidden size: {transformer.config.hidden_size}")
111
+ log(f"Number of attention heads: {transformer.config.num_attention_heads}")
112
+ log(f"Number of layers: {transformer.config.num_hidden_layers}")
113
+ log(f"Intermediate size: {transformer.config.intermediate_size}")
114
 
115
  if USE_CPU_OFFLOAD:
116
  transformer = transformer.to("cpu")
117
 
118
  # ============================================================
119
+ # LOAD TEXT ENCODER
120
  # ============================================================
121
+ log("\n===================================================")
122
+ log("πŸ”§ LOADING TEXT ENCODER")
123
+ log("===================================================")
124
+
125
  quantization_config = TransformersBitsAndBytesConfig(
126
  load_in_4bit=True,
127
  bnb_4bit_quant_type="nf4",
128
  bnb_4bit_compute_dtype=torch.bfloat16,
129
  bnb_4bit_use_double_quant=True,
130
  )
131
+
132
+ log("4-bit Quantization Config (Text Encoder):")
133
+ log(str(quantization_config))
134
+
135
  text_encoder = AutoModel.from_pretrained(
136
  model_id,
137
  cache_dir=model_cache,
 
140
  torch_dtype=torch_dtype,
141
  device_map=device,
142
  )
143
+ log("βœ… Text encoder loaded successfully.")
144
+
145
+ # ------------------------------------------------------------
146
+ # TEXT ENCODER INSIGHTS
147
+ # ------------------------------------------------------------
148
+ log("πŸ” Text Encoder Architecture Details:")
149
+ log(f"Number of Transformer Modules : {len(text_encoder.transformer_blocks)}")
150
+ for i, block in enumerate(text_encoder.transformer_blocks):
151
+ log(f" Block {i}: {block.__class__.__name__}")
152
+ attn_type = getattr(block, "attn", None)
153
+ if attn_type:
154
+ log(f" Attention: {attn_type.__class__.__name__}")
155
+ flash_enabled = getattr(attn_type, "flash", None)
156
+ log(f" FlashAttention Enabled? : {flash_enabled}")
157
+ log(f"Hidden size: {text_encoder.config.hidden_size}")
158
+ log(f"Number of attention heads: {text_encoder.config.num_attention_heads}")
159
+ log(f"Number of layers: {text_encoder.config.num_hidden_layers}")
160
+ log(f"Intermediate size: {text_encoder.config.intermediate_size}")
161
 
162
  if USE_CPU_OFFLOAD:
163
  text_encoder = text_encoder.to("cpu")
164
 
165
  # ============================================================
166
+ # BUILD PIPELINE
167
  # ============================================================
168
+ log("\n===================================================")
169
+ log("πŸ”§ BUILDING Z-IMAGE-TURBO PIPELINE")
170
+ log("===================================================")
171
+
172
  pipe = ZImagePipeline.from_pretrained(
173
  model_id,
174
  transformer=transformer,
 
178
 
179
  if USE_CPU_OFFLOAD:
180
  pipe.enable_model_cpu_offload(gpu_id=gpu_id)
181
+ log("βš™ CPU OFFLOAD ENABLED")
182
  else:
183
  pipe.to(device)
184
+ log("βš™ Pipeline moved to GPU")
185
+
186
+ log("βœ… Pipeline ready.")
187
 
188
  # ============================================================
189
+ # INFERENCE FUNCTION
190
  # ============================================================
191
  @spaces.GPU
192
  def generate_image(prompt, height, width, steps, seed):
193
+ global LOGS
194
+ LOGS = "" # Reset logs for this run
195
+
196
+ log("===================================================")
197
+ log("🎨 RUNNING INFERENCE")
198
+ log("===================================================")
199
+
200
+ log(f"Prompt : {prompt}")
201
+ log(f"Resolution : {width} x {height}")
202
+ log(f"Steps : {steps}")
203
+ log(f"Seed : {seed}")
204
+
205
  generator = torch.Generator(device).manual_seed(seed)
206
+
207
+ out = pipe(
208
  prompt=prompt,
209
  height=height,
210
  width=width,
 
213
  generator=generator,
214
  )
215
 
216
+ log("βœ… Inference Finished")
217
+ return out.images[0], LOGS
218
 
219
  # ============================================================
220
+ # GRADIO UI
221
  # ============================================================
222
  with gr.Blocks(title="Z-Image-Turbo Generator") as demo:
223
+ gr.Markdown("# **πŸš€ Z-Image-Turbo β€” Transformer Deep Logs**")
224
 
225
  with gr.Row():
226
  with gr.Column(scale=1):
 
231
  seed = gr.Slider(0, 999999, value=42, step=1, label="Seed")
232
 
233
  btn = gr.Button("Generate", variant="primary")
234
+
235
  with gr.Column(scale=1):
236
  output_image = gr.Image(label="Output Image")
237
+ logs_panel = gr.Textbox(label="πŸ“œ Transformer Logs", lines=25, interactive=False)
238
 
239
  btn.click(
240
  generate_image,
241
  inputs=[prompt, height, width, steps, seed],
242
+ outputs=[output_image, logs_panel],
243
  )
244
 
 
 
 
245
  demo.launch()