linoyts HF Staff commited on
Commit
9d91ddf
·
verified ·
1 Parent(s): 32eac87

add MCP support

Browse files
Files changed (1) hide show
  1. app.py +389 -83
app.py CHANGED
@@ -20,22 +20,29 @@ import os
20
  import gradio as gr
21
  from gradio_client import Client, handle_file
22
  import tempfile
 
23
 
24
 
25
  # --- Model Loading ---
26
  dtype = torch.bfloat16
27
  device = "cuda" if torch.cuda.is_available() else "cpu"
28
 
29
- pipe = QwenImageEditPlusPipeline.from_pretrained("Qwen/Qwen-Image-Edit-2509",
30
- transformer= QwenImageTransformer2DModel.from_pretrained("linoyts/Qwen-Image-Edit-Rapid-AIO",
31
- subfolder='transformer',
32
- torch_dtype=dtype,
33
- device_map='cuda'),torch_dtype=dtype).to(device)
 
 
 
 
 
34
 
35
  pipe.load_lora_weights(
36
- "dx8152/Qwen-Edit-2509-Multiple-angles",
37
- weight_name="镜头转换.safetensors", adapter_name="angles"
38
- )
 
39
 
40
  # pipe.load_lora_weights(
41
  # "lovis93/next-scene-qwen-image-lora-2509",
@@ -46,38 +53,105 @@ pipe.fuse_lora(adapter_names=["angles"], lora_scale=1.25)
46
  # pipe.fuse_lora(adapter_names=["next-scene"], lora_scale=1.)
47
  pipe.unload_lora_weights()
48
 
49
-
50
-
51
  pipe.transformer.__class__ = QwenImageTransformer2DModel
52
  pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
53
 
54
- optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")
55
-
 
 
 
56
 
57
  MAX_SEED = np.iinfo(np.int32).max
58
 
59
- def _generate_video_segment(input_image_path: str, output_image_path: str, prompt: str, request: gr.Request) -> str:
60
- """Generates a single video segment using the external service."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  x_ip_token = request.headers['x-ip-token']
62
- video_client = Client("multimodalart/wan-2-2-first-last-frame", headers={"x-ip-token": x_ip_token})
 
 
 
63
  result = video_client.predict(
64
  start_image_pil=handle_file(input_image_path),
65
  end_image_pil=handle_file(output_image_path),
66
- prompt=prompt, api_name="/generate_video",
 
67
  )
68
  return result[0]["video"]
69
 
70
- def build_camera_prompt(rotate_deg, move_forward, vertical_tilt, wideangle):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  prompt_parts = []
72
 
73
  # Rotation
74
  if rotate_deg != 0:
75
  direction = "left" if rotate_deg > 0 else "right"
76
  if direction == "left":
77
- prompt_parts.append(f"将镜头向左旋转{abs(rotate_deg)}度 Rotate the camera {abs(rotate_deg)} degrees to the left.")
 
 
78
  else:
79
- prompt_parts.append(f"将镜头向右旋转{abs(rotate_deg)}度 Rotate the camera {abs(rotate_deg)} degrees to the right.")
80
-
 
81
 
82
  # Move forward / close-up
83
  if move_forward > 5:
@@ -101,20 +175,72 @@ def build_camera_prompt(rotate_deg, move_forward, vertical_tilt, wideangle):
101
 
102
  @spaces.GPU
103
  def infer_camera_edit(
104
- image,
105
- rotate_deg,
106
- move_forward,
107
- vertical_tilt,
108
- wideangle,
109
- seed,
110
- randomize_seed,
111
- true_guidance_scale,
112
- num_inference_steps,
113
- height,
114
- width,
115
- prev_output = None,
116
- progress=gr.Progress(track_tqdm=True)
117
- ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  prompt = build_camera_prompt(rotate_deg, move_forward, vertical_tilt, wideangle)
119
  print(f"Generated Prompt: {prompt}")
120
 
@@ -137,6 +263,7 @@ def infer_camera_edit(
137
 
138
  if prompt == "no camera movement":
139
  return image, seed, prompt
 
140
  result = pipe(
141
  image=pil_images,
142
  prompt=prompt,
@@ -150,25 +277,52 @@ def infer_camera_edit(
150
 
151
  return result, seed, prompt
152
 
153
- def create_video_between_images(input_image, output_image, prompt: str, request: gr.Request) -> str:
154
- """Create a video between the input and output images."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  if input_image is None or output_image is None:
156
  raise gr.Error("Both input and output images are required to create a video.")
157
-
158
  try:
159
-
160
  with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
161
  input_image.save(tmp.name)
162
  input_image_path = tmp.name
163
-
164
  output_pil = Image.fromarray(output_image.astype('uint8'))
165
  with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
166
  output_pil.save(tmp.name)
167
  output_image_path = tmp.name
168
-
169
  video_path = _generate_video_segment(
170
- input_image_path,
171
- output_image_path,
172
  prompt if prompt else "Camera movement transformation",
173
  request
174
  )
@@ -182,18 +336,60 @@ css = '''#col-container { max-width: 800px; margin: 0 auto; }
182
  .dark .progress-text{color: white !important}
183
  #examples{max-width: 800px; margin: 0 auto; }'''
184
 
185
- def reset_all():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  return [0, 0, 0, 0, False, True]
187
 
188
- def end_reset():
 
 
 
 
 
 
 
 
 
 
 
189
  return False
190
 
191
- def update_dimensions_on_upload(image):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  if image is None:
193
  return 1024, 1024
194
-
195
  original_width, original_height = image.size
196
-
197
  if original_width > original_height:
198
  new_width = 1024
199
  aspect_ratio = original_height / original_width
@@ -202,11 +398,11 @@ def update_dimensions_on_upload(image):
202
  new_height = 1024
203
  aspect_ratio = original_width / original_height
204
  new_width = int(new_height * aspect_ratio)
205
-
206
  # Ensure dimensions are multiples of 8
207
  new_width = (new_width // 8) * 8
208
  new_height = (new_height // 8) * 8
209
-
210
  return new_width, new_height
211
 
212
 
@@ -226,31 +422,90 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
226
  is_reset = gr.Checkbox(value=False, visible=False)
227
 
228
  with gr.Tab("Camera Controls"):
229
- rotate_deg = gr.Slider(label="Rotate Right-Left (degrees °)", minimum=-90, maximum=90, step=45, value=0)
230
- move_forward = gr.Slider(label="Move Forward Close-Up", minimum=0, maximum=10, step=5, value=0)
231
- vertical_tilt = gr.Slider(label="Vertical Angle (Bird ↔ Worm)", minimum=-1, maximum=1, step=1, value=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  wideangle = gr.Checkbox(label="Wide-Angle Lens", value=False)
233
  with gr.Row():
234
- reset_btn = gr.Button("Reset")
235
- run_btn = gr.Button("Generate", variant="primary")
236
 
237
  with gr.Accordion("Advanced Settings", open=False):
238
- seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
239
- randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
240
- true_guidance_scale = gr.Slider(label="True Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
241
- num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=40, step=1, value=4)
242
- height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)
243
- width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
  with gr.Column():
246
  result = gr.Image(label="Output Image", interactive=False)
247
  prompt_preview = gr.Textbox(label="Processed Prompt", interactive=False)
248
- create_video_button = gr.Button("🎥 Create Video Between Images", variant="secondary", visible=False)
 
 
 
 
249
  with gr.Group(visible=False) as video_group:
250
- video_output = gr.Video(label="Generated Video", show_download_button=True, autoplay=True)
251
-
 
 
 
 
252
  inputs = [
253
- image,rotate_deg, move_forward,
254
  vertical_tilt, wideangle,
255
  seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width, prev_output
256
  ]
@@ -265,21 +520,36 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
265
  ).then(fn=end_reset, inputs=None, outputs=[is_reset], queue=False)
266
 
267
  # Manual generation with video button visibility control
268
- def infer_and_show_video_button(*args):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  result_img, result_seed, result_prompt = infer_camera_edit(*args)
270
  # Show video button if we have both input and output images
271
  show_button = args[0] is not None and result_img is not None
272
  return result_img, result_seed, result_prompt, gr.update(visible=show_button)
273
-
274
  run_event = run_btn.click(
275
- fn=infer_and_show_video_button,
276
- inputs=inputs,
277
  outputs=outputs + [create_video_button]
278
  )
279
 
280
  # Video creation
281
  create_video_button.click(
282
- fn=lambda: gr.update(visible=True),
283
  outputs=[video_group],
284
  api_name=False
285
  ).then(
@@ -298,15 +568,17 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
298
  ["disaster_girl.jpg", -45, 0, 1, False, 0, True, 1.0, 4, 768, 1024],
299
  ["grumpy.png", 90, 0, 1, False, 0, True, 1.0, 4, 576, 1024]
300
  ],
301
- inputs=[image,rotate_deg, move_forward,
302
- vertical_tilt, wideangle,
303
- seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width],
 
 
304
  outputs=outputs,
305
  fn=infer_camera_edit,
306
  cache_examples="lazy",
307
  elem_id="examples"
308
  )
309
-
310
  # Image upload triggers dimension update and control reset
311
  image.upload(
312
  fn=update_dimensions_on_upload,
@@ -318,15 +590,41 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
318
  outputs=[rotate_deg, move_forward, vertical_tilt, wideangle, is_reset],
319
  queue=False
320
  ).then(
321
- fn=end_reset,
322
- inputs=None,
323
- outputs=[is_reset],
324
  queue=False
325
  )
326
 
327
-
328
  # Live updates
329
- def maybe_infer(is_reset, progress=gr.Progress(track_tqdm=True), *args):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  if is_reset:
331
  return gr.update(), gr.update(), gr.update(), gr.update()
332
  else:
@@ -343,10 +641,18 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
343
  control_inputs_with_flag = [is_reset] + control_inputs
344
 
345
  for control in [rotate_deg, move_forward, vertical_tilt]:
346
- control.release(fn=maybe_infer, inputs=control_inputs_with_flag, outputs=outputs + [create_video_button])
347
-
348
- wideangle.input(fn=maybe_infer, inputs=control_inputs_with_flag, outputs=outputs + [create_video_button])
349
-
 
 
 
 
 
 
 
 
350
  run_event.then(lambda img, *_: img, inputs=[result], outputs=[prev_output])
351
 
352
- demo.launch()
 
20
  import gradio as gr
21
  from gradio_client import Client, handle_file
22
  import tempfile
23
+ from typing import Optional, Tuple, Any
24
 
25
 
26
  # --- Model Loading ---
27
  dtype = torch.bfloat16
28
  device = "cuda" if torch.cuda.is_available() else "cpu"
29
 
30
+ pipe = QwenImageEditPlusPipeline.from_pretrained(
31
+ "Qwen/Qwen-Image-Edit-2509",
32
+ transformer=QwenImageTransformer2DModel.from_pretrained(
33
+ "linoyts/Qwen-Image-Edit-Rapid-AIO",
34
+ subfolder='transformer',
35
+ torch_dtype=dtype,
36
+ device_map='cuda'
37
+ ),
38
+ torch_dtype=dtype
39
+ ).to(device)
40
 
41
  pipe.load_lora_weights(
42
+ "dx8152/Qwen-Edit-2509-Multiple-angles",
43
+ weight_name="镜头转换.safetensors",
44
+ adapter_name="angles"
45
+ )
46
 
47
  # pipe.load_lora_weights(
48
  # "lovis93/next-scene-qwen-image-lora-2509",
 
53
  # pipe.fuse_lora(adapter_names=["next-scene"], lora_scale=1.)
54
  pipe.unload_lora_weights()
55
 
 
 
56
  pipe.transformer.__class__ = QwenImageTransformer2DModel
57
  pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
58
 
59
+ optimize_pipeline_(
60
+ pipe,
61
+ image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))],
62
+ prompt="prompt"
63
+ )
64
 
65
  MAX_SEED = np.iinfo(np.int32).max
66
 
67
+
68
+ def _generate_video_segment(
69
+ input_image_path: str,
70
+ output_image_path: str,
71
+ prompt: str,
72
+ request: gr.Request
73
+ ) -> str:
74
+ """
75
+ Generate a single video segment between two frames by calling an external
76
+ Wan 2.2 image-to-video service hosted on Hugging Face Spaces.
77
+
78
+ This helper function is used internally when the user asks to create
79
+ a video between the input and output images.
80
+
81
+ Args:
82
+ input_image_path (str):
83
+ Path to the starting frame image on disk.
84
+ output_image_path (str):
85
+ Path to the ending frame image on disk.
86
+ prompt (str):
87
+ Text prompt describing the camera movement / transition.
88
+ request (gr.Request):
89
+ Gradio request object, used here to forward the `x-ip-token`
90
+ header to the downstream Space for authentication/rate limiting.
91
+
92
+ Returns:
93
+ str:
94
+ A string returned by the external service, usually a URL or path
95
+ to the generated video.
96
+ """
97
  x_ip_token = request.headers['x-ip-token']
98
+ video_client = Client(
99
+ "multimodalart/wan-2-2-first-last-frame",
100
+ headers={"x-ip-token": x_ip_token}
101
+ )
102
  result = video_client.predict(
103
  start_image_pil=handle_file(input_image_path),
104
  end_image_pil=handle_file(output_image_path),
105
+ prompt=prompt,
106
+ api_name="/generate_video",
107
  )
108
  return result[0]["video"]
109
 
110
+
111
+ def build_camera_prompt(
112
+ rotate_deg: float = 0.0,
113
+ move_forward: float = 0.0,
114
+ vertical_tilt: float = 0.0,
115
+ wideangle: bool = False
116
+ ) -> str:
117
+ """
118
+ Build a camera movement prompt based on the chosen controls.
119
+
120
+ This converts the provided control values into a prompt instruction with the corresponding trigger words for the multiple-angles LoRA.
121
+
122
+ Args:
123
+ rotate_deg (float, optional):
124
+ Horizontal rotation in degrees. Positive values rotate left,
125
+ negative values rotate right. Defaults to 0.0.
126
+ move_forward (float, optional):
127
+ Forward movement / zoom factor. Larger values imply moving the
128
+ camera closer or into a close-up. Defaults to 0.0.
129
+ vertical_tilt (float, optional):
130
+ Vertical angle of the camera:
131
+ - Negative ≈ bird's-eye view
132
+ - Positive ≈ worm's-eye view
133
+ Defaults to 0.0.
134
+ wideangle (bool, optional):
135
+ Whether to switch to a wide-angle lens style. Defaults to False.
136
+
137
+ Returns:
138
+ str:
139
+ A text prompt describing the camera motion. If no controls are
140
+ active, returns `"no camera movement"`.
141
+ """
142
  prompt_parts = []
143
 
144
  # Rotation
145
  if rotate_deg != 0:
146
  direction = "left" if rotate_deg > 0 else "right"
147
  if direction == "left":
148
+ prompt_parts.append(
149
+ f"将镜头向左旋转{abs(rotate_deg)}度 Rotate the camera {abs(rotate_deg)} degrees to the left."
150
+ )
151
  else:
152
+ prompt_parts.append(
153
+ f"将镜头向右旋转{abs(rotate_deg)}度 Rotate the camera {abs(rotate_deg)} degrees to the right."
154
+ )
155
 
156
  # Move forward / close-up
157
  if move_forward > 5:
 
175
 
176
  @spaces.GPU
177
  def infer_camera_edit(
178
+ image: Optional[Image.Image] = None,
179
+ rotate_deg: float = 0.0,
180
+ move_forward: float = 0.0,
181
+ vertical_tilt: float = 0.0,
182
+ wideangle: bool = False,
183
+ seed: int = 0,
184
+ randomize_seed: bool = True,
185
+ true_guidance_scale: float = 1.0,
186
+ num_inference_steps: int = 4,
187
+ height: int = 1024,
188
+ width: int = 1024,
189
+ prev_output: Optional[Image.Image] = None,
190
+ progress: gr.Progress = gr.Progress(track_tqdm=True)
191
+ ) -> Tuple[Image.Image, int, str]:
192
+ """
193
+ Edit the camera angles/view of an image with Qwen Image Edit 2509 and dx8152's Qwen-Edit-2509-Multiple-angles LoRA.
194
+
195
+ Applies a camera-style transformation (rotation, zoom, tilt, lens)
196
+ to an input image.
197
+
198
+ Args:
199
+ image (PIL.Image.Image | None, optional):
200
+ Input image to edit. If `None`, the function will instead try to
201
+ use `prev_output`. At least one of `image` or `prev_output` must
202
+ be available. Defaults to None.
203
+ rotate_deg (float, optional):
204
+ Horizontal rotation in degrees (-90, -45, 0, 45, 90). Positive values rotate
205
+ to the left, negative to the right. Defaults to 0.0.
206
+ move_forward (float, optional):
207
+ Forward movement / zoom factor (0, 5, 10). Higher values move the
208
+ camera closer; values >5 switch to a close-up style. Defaults to 0.0.
209
+ vertical_tilt (float, optional):
210
+ Vertical tilt (-1 to 1). -1 ≈ bird's-eye view, +1 ≈ worm's-eye view.
211
+ Defaults to 0.0.
212
+ wideangle (bool, optional):
213
+ Whether to use a wide-angle lens style. Defaults to False.
214
+ seed (int, optional):
215
+ Random seed for the generation. Ignored if `randomize_seed=True`.
216
+ Defaults to 0.
217
+ randomize_seed (bool, optional):
218
+ If True, a random seed (0..MAX_SEED) is chosen per call.
219
+ Defaults to True.
220
+ true_guidance_scale (float, optional):
221
+ CFG / guidance scale controlling prompt adherence.
222
+ Defaults to 1.0 since the demo is using a distilled transformer for faster inference.
223
+ num_inference_steps (int, optional):
224
+ Number of inference steps. Defaults to 4.
225
+ height (int, optional):
226
+ Output image height. Must typically be a multiple of 8.
227
+ If set to 0, the model will infer a size. Defaults to 1024 if none is provided.
228
+ width (int, optional):
229
+ Output image width. Must typically be a multiple of 8.
230
+ If set to 0, the model will infer a size. Defaults to 1024 if none is provided.
231
+ prev_output (PIL.Image.Image | None, optional):
232
+ Previous output image to use as input when no new image is uploaded.
233
+ Defaults to None.
234
+ progress (gr.Progress, optional):
235
+ Gradio progress tracker, automatically provided by Gradio in the UI.
236
+ Defaults to a progress tracker with tqdm support.
237
+
238
+ Returns:
239
+ Tuple[PIL.Image.Image, int, str]:
240
+ - The edited output image.
241
+ - The actual seed used for generation.
242
+ - The constructed camera prompt string.
243
+ """
244
  prompt = build_camera_prompt(rotate_deg, move_forward, vertical_tilt, wideangle)
245
  print(f"Generated Prompt: {prompt}")
246
 
 
263
 
264
  if prompt == "no camera movement":
265
  return image, seed, prompt
266
+
267
  result = pipe(
268
  image=pil_images,
269
  prompt=prompt,
 
277
 
278
  return result, seed, prompt
279
 
280
+
281
+ def create_video_between_images(
282
+ input_image: Optional[Image.Image],
283
+ output_image: Optional[np.ndarray],
284
+ prompt: str,
285
+ request: gr.Request
286
+ ) -> str:
287
+ """
288
+ Create a short transition video between the input and output images via the
289
+ Wan 2.2 first-last-frame Space.
290
+
291
+ Args:
292
+ input_image (PIL.Image.Image | None):
293
+ Starting frame image (the original / previous view).
294
+ output_image (numpy.ndarray | None):
295
+ Ending frame image - the output image with the the edited camera angles.
296
+ prompt (str):
297
+ The camera movement prompt used to describe the transition.
298
+ request (gr.Request):
299
+ Gradio request object, used to forward the `x-ip-token` header
300
+ to the video generation app.
301
+
302
+ Returns:
303
+ str:
304
+ a path pointing to the generated video.
305
+
306
+ Raises:
307
+ gr.Error:
308
+ If either image is missing or if the video generation fails.
309
+ """
310
  if input_image is None or output_image is None:
311
  raise gr.Error("Both input and output images are required to create a video.")
312
+
313
  try:
 
314
  with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
315
  input_image.save(tmp.name)
316
  input_image_path = tmp.name
317
+
318
  output_pil = Image.fromarray(output_image.astype('uint8'))
319
  with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
320
  output_pil.save(tmp.name)
321
  output_image_path = tmp.name
322
+
323
  video_path = _generate_video_segment(
324
+ input_image_path,
325
+ output_image_path,
326
  prompt if prompt else "Camera movement transformation",
327
  request
328
  )
 
336
  .dark .progress-text{color: white !important}
337
  #examples{max-width: 800px; margin: 0 auto; }'''
338
 
339
+
340
+ def reset_all() -> list:
341
+ """
342
+ Reset all camera control knobs and flags to their default values.
343
+
344
+ This is used by the "Reset" button to set:
345
+ - rotate_deg = 0
346
+ - move_forward = 0
347
+ - vertical_tilt = 0
348
+ - wideangle = False
349
+ - is_reset = True
350
+
351
+ Returns:
352
+ list:
353
+ A list of values matching the order of the reset outputs:
354
+ [rotate_deg, move_forward, vertical_tilt, wideangle, is_reset, True]
355
+ """
356
  return [0, 0, 0, 0, False, True]
357
 
358
+
359
+ def end_reset() -> bool:
360
+ """
361
+ Mark the end of a reset cycle.
362
+
363
+ This helper is chained after `reset_all` to set the internal
364
+ `is_reset` flag back to False, so that live inference can resume.
365
+
366
+ Returns:
367
+ bool:
368
+ Always returns False.
369
+ """
370
  return False
371
 
372
+
373
+ def update_dimensions_on_upload(
374
+ image: Optional[Image.Image]
375
+ ) -> Tuple[int, int]:
376
+ """
377
+ Compute recommended (width, height) for the output resolution when an
378
+ image is uploaded while preserveing the aspect ratio.
379
+
380
+ Args:
381
+ image (PIL.Image.Image | None):
382
+ The uploaded image. If `None`, defaults to (1024, 1024).
383
+
384
+ Returns:
385
+ Tuple[int, int]:
386
+ The new (width, height).
387
+ """
388
  if image is None:
389
  return 1024, 1024
390
+
391
  original_width, original_height = image.size
392
+
393
  if original_width > original_height:
394
  new_width = 1024
395
  aspect_ratio = original_height / original_width
 
398
  new_height = 1024
399
  aspect_ratio = original_width / original_height
400
  new_width = int(new_height * aspect_ratio)
401
+
402
  # Ensure dimensions are multiples of 8
403
  new_width = (new_width // 8) * 8
404
  new_height = (new_height // 8) * 8
405
+
406
  return new_width, new_height
407
 
408
 
 
422
  is_reset = gr.Checkbox(value=False, visible=False)
423
 
424
  with gr.Tab("Camera Controls"):
425
+ rotate_deg = gr.Slider(
426
+ label="Rotate Right-Left (degrees °)",
427
+ minimum=-90,
428
+ maximum=90,
429
+ step=45,
430
+ value=0
431
+ )
432
+ move_forward = gr.Slider(
433
+ label="Move Forward → Close-Up",
434
+ minimum=0,
435
+ maximum=10,
436
+ step=5,
437
+ value=0
438
+ )
439
+ vertical_tilt = gr.Slider(
440
+ label="Vertical Angle (Bird ↔ Worm)",
441
+ minimum=-1,
442
+ maximum=1,
443
+ step=1,
444
+ value=0
445
+ )
446
  wideangle = gr.Checkbox(label="Wide-Angle Lens", value=False)
447
  with gr.Row():
448
+ reset_btn = gr.Button("Reset")
449
+ run_btn = gr.Button("Generate", variant="primary")
450
 
451
  with gr.Accordion("Advanced Settings", open=False):
452
+ seed = gr.Slider(
453
+ label="Seed",
454
+ minimum=0,
455
+ maximum=MAX_SEED,
456
+ step=1,
457
+ value=0
458
+ )
459
+ randomize_seed = gr.Checkbox(
460
+ label="Randomize Seed",
461
+ value=True
462
+ )
463
+ true_guidance_scale = gr.Slider(
464
+ label="True Guidance Scale",
465
+ minimum=1.0,
466
+ maximum=10.0,
467
+ step=0.1,
468
+ value=1.0
469
+ )
470
+ num_inference_steps = gr.Slider(
471
+ label="Inference Steps",
472
+ minimum=1,
473
+ maximum=40,
474
+ step=1,
475
+ value=4
476
+ )
477
+ height = gr.Slider(
478
+ label="Height",
479
+ minimum=256,
480
+ maximum=2048,
481
+ step=8,
482
+ value=1024
483
+ )
484
+ width = gr.Slider(
485
+ label="Width",
486
+ minimum=256,
487
+ maximum=2048,
488
+ step=8,
489
+ value=1024
490
+ )
491
 
492
  with gr.Column():
493
  result = gr.Image(label="Output Image", interactive=False)
494
  prompt_preview = gr.Textbox(label="Processed Prompt", interactive=False)
495
+ create_video_button = gr.Button(
496
+ "🎥 Create Video Between Images",
497
+ variant="secondary",
498
+ visible=False
499
+ )
500
  with gr.Group(visible=False) as video_group:
501
+ video_output = gr.Video(
502
+ label="Generated Video",
503
+ show_download_button=True,
504
+ autoplay=True
505
+ )
506
+
507
  inputs = [
508
+ image, rotate_deg, move_forward,
509
  vertical_tilt, wideangle,
510
  seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width, prev_output
511
  ]
 
520
  ).then(fn=end_reset, inputs=None, outputs=[is_reset], queue=False)
521
 
522
  # Manual generation with video button visibility control
523
+ def infer_and_show_video_button(*args: Any):
524
+ """
525
+ Wrapper around `infer_camera_edit` that also controls the visibility
526
+ of the 'Create Video Between Images' button.
527
+
528
+ The first argument in `args` is expected to be the input image; if both
529
+ input and output images are present, the video button is shown.
530
+
531
+ Args:
532
+ *args:
533
+ Positional arguments forwarded directly to `infer_camera_edit`.
534
+
535
+ Returns:
536
+ tuple:
537
+ (output_image, seed, prompt, video_button_visibility_update)
538
+ """
539
  result_img, result_seed, result_prompt = infer_camera_edit(*args)
540
  # Show video button if we have both input and output images
541
  show_button = args[0] is not None and result_img is not None
542
  return result_img, result_seed, result_prompt, gr.update(visible=show_button)
543
+
544
  run_event = run_btn.click(
545
+ fn=infer_and_show_video_button,
546
+ inputs=inputs,
547
  outputs=outputs + [create_video_button]
548
  )
549
 
550
  # Video creation
551
  create_video_button.click(
552
+ fn=lambda: gr.update(visible=True),
553
  outputs=[video_group],
554
  api_name=False
555
  ).then(
 
568
  ["disaster_girl.jpg", -45, 0, 1, False, 0, True, 1.0, 4, 768, 1024],
569
  ["grumpy.png", 90, 0, 1, False, 0, True, 1.0, 4, 576, 1024]
570
  ],
571
+ inputs=[
572
+ image, rotate_deg, move_forward,
573
+ vertical_tilt, wideangle,
574
+ seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width
575
+ ],
576
  outputs=outputs,
577
  fn=infer_camera_edit,
578
  cache_examples="lazy",
579
  elem_id="examples"
580
  )
581
+
582
  # Image upload triggers dimension update and control reset
583
  image.upload(
584
  fn=update_dimensions_on_upload,
 
590
  outputs=[rotate_deg, move_forward, vertical_tilt, wideangle, is_reset],
591
  queue=False
592
  ).then(
593
+ fn=end_reset,
594
+ inputs=None,
595
+ outputs=[is_reset],
596
  queue=False
597
  )
598
 
 
599
  # Live updates
600
+ def maybe_infer(
601
+ is_reset: bool,
602
+ progress: gr.Progress = gr.Progress(track_tqdm=True),
603
+ *args: Any
604
+ ):
605
+ """
606
+ Conditionally run live inference when sliders change.
607
+
608
+ If `is_reset` is True, this function returns empty updates so that no
609
+ inference is triggered while the controls are being reset.
610
+
611
+ Otherwise, it forwards the remaining arguments to `infer_camera_edit`.
612
+
613
+ Args:
614
+ is_reset (bool):
615
+ Indicates whether the UI is in a reset state.
616
+ progress (gr.Progress, optional):
617
+ Gradio progress tracker injected by Gradio.
618
+ *args:
619
+ Remaining positional args forwarded to `infer_camera_edit`.
620
+
621
+ Returns:
622
+ tuple:
623
+ When resetting:
624
+ (gr.update(), gr.update(), gr.update(), gr.update())
625
+ When running inference:
626
+ (output_image, seed, prompt, video_button_visibility_update)
627
+ """
628
  if is_reset:
629
  return gr.update(), gr.update(), gr.update(), gr.update()
630
  else:
 
641
  control_inputs_with_flag = [is_reset] + control_inputs
642
 
643
  for control in [rotate_deg, move_forward, vertical_tilt]:
644
+ control.release(
645
+ fn=maybe_infer,
646
+ inputs=control_inputs_with_flag,
647
+ outputs=outputs + [create_video_button]
648
+ )
649
+
650
+ wideangle.input(
651
+ fn=maybe_infer,
652
+ inputs=control_inputs_with_flag,
653
+ outputs=outputs + [create_video_button]
654
+ )
655
+
656
  run_event.then(lambda img, *_: img, inputs=[result], outputs=[prev_output])
657
 
658
+ demo.launch(mcp_server=True)