|
|
import gradio as gr |
|
|
import numpy as np |
|
|
import random |
|
|
import torch |
|
|
import spaces |
|
|
from PIL import Image |
|
|
from diffusers import FlowMatchEulerDiscreteScheduler |
|
|
from optimization import optimize_pipeline_ |
|
|
from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline |
|
|
from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel |
|
|
from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3 |
|
|
import math |
|
|
|
|
|
|
|
|
dtype = torch.bfloat16 |
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
scheduler_config = { |
|
|
"base_image_seq_len": 256, |
|
|
"base_shift": math.log(3), |
|
|
"invert_sigmas": False, |
|
|
"max_image_seq_len": 8192, |
|
|
"max_shift": math.log(3), |
|
|
"num_train_timesteps": 1000, |
|
|
"shift": 1.0, |
|
|
"shift_terminal": None, |
|
|
"stochastic_sampling": False, |
|
|
"time_shift_type": "exponential", |
|
|
"use_beta_sigmas": False, |
|
|
"use_dynamic_shifting": True, |
|
|
"use_exponential_sigmas": False, |
|
|
"use_karras_sigmas": False, |
|
|
} |
|
|
scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config) |
|
|
|
|
|
pipe = QwenImageEditPlusPipeline.from_pretrained( |
|
|
"Qwen/Qwen-Image-Edit-2509", |
|
|
scheduler=scheduler, |
|
|
torch_dtype=dtype |
|
|
).to(device) |
|
|
|
|
|
pipe.load_lora_weights( |
|
|
"2vXpSwA7/iroiro-lora", |
|
|
weight_name="qwen_lora/Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16_dim1.safetensors" |
|
|
) |
|
|
pipe.fuse_lora(lora_scale=1.0) |
|
|
|
|
|
pipe.load_lora_weights( |
|
|
"dx8152/Qwen-Edit-2509-Multiple-angles", |
|
|
weight_name="多角度.safetensors", |
|
|
) |
|
|
pipe.fuse_lora(lora_scale=1.0) |
|
|
|
|
|
pipe.transformer.__class__ = QwenImageTransformer2DModel |
|
|
pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3()) |
|
|
optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt") |
|
|
|
|
|
|
|
|
MAX_SEED = np.iinfo(np.int32).max |
|
|
|
|
|
|
|
|
DEFAULT_SEED = 0 |
|
|
DEFAULT_RANDOMIZE = True |
|
|
DEFAULT_TRUE_GUIDANCE_SCALE = 1.0 |
|
|
DEFAULT_NUM_INFERENCE_STEPS = 4 |
|
|
|
|
|
|
|
|
CAMERA_OPTIONS = [ |
|
|
{"cn": "镜头方向左回转45度", "ja": "左に45度回転", "en": "Rotate camera 45° left"}, |
|
|
{"cn": "镜头向右回转45度", "ja": "右に45度回転", "en": "Rotate camera 45° right"}, |
|
|
{"cn": "镜头方向左回转90度", "ja": "左に90度回転", "en": "Rotate camera 90° left"}, |
|
|
{"cn": "镜头向右回转90度", "ja": "右に90度回転", "en": "Rotate camera 90° right"}, |
|
|
{"cn": "将镜头转为俯视", "ja": "上から見下ろす", "en": "Switch to top-down view"}, |
|
|
{"cn": "将镜头转为仰视", "ja": "下から見上げる", "en": "Switch to low-angle view"}, |
|
|
{"cn": "将镜头转为特写镜头", "ja": "クローズアップ", "en": "Switch to close-up lens"}, |
|
|
{"cn": "将镜头转为中近景镜头", "ja": "ややクローズアップ", "en": "Switch to medium close-up lens"}, |
|
|
{"cn": "将镜头转为拉远镜头", "ja": "ズームアウト", "en": "Switch to zoom out lens"}, |
|
|
] |
|
|
|
|
|
|
|
|
CUSTOM_OPTION_VALUE = "__custom__" |
|
|
CUSTOM_LABELS = { |
|
|
"en": "Custom (enter Chinese or English prompt)", |
|
|
"ja": "自由入力(中国語、英語で入力)", |
|
|
"zh": "自定义(中文或英文输入)", |
|
|
} |
|
|
|
|
|
|
|
|
I18N = { |
|
|
"title": { |
|
|
"en": "Camera Work", |
|
|
"ja": "カメラワーク", |
|
|
"zh": "镜头控制", |
|
|
}, |
|
|
"notice": { |
|
|
"en": "Note: Please avoid uploading images created by others. There may be rights infringements.", |
|
|
"ja": "注意:他者が作成した画像のアップロードはご遠慮ください。権利侵害の可能性があります。", |
|
|
"zh": "注意:请勿上传他人创作的图片,可能涉及权利侵害。", |
|
|
}, |
|
|
"input_image": {"en": "Input image", "ja": "入力画像", "zh": "输入图像"}, |
|
|
"dropdown_label": { |
|
|
"en": "Camera work", |
|
|
"ja": "カメラワーク", |
|
|
"zh": "镜头操作", |
|
|
}, |
|
|
"custom_cn_label": { |
|
|
"en": "Custom prompt(English and Chinese recommended)", |
|
|
"ja": "自由入力のプロンプト(英語、中国語がおすすめ)", |
|
|
"zh": "自定义提示词(推荐英文和中文)", |
|
|
}, |
|
|
"custom_cn_ph": { |
|
|
"en": "e.g., 将镜头转为斜俯视 并 拉远镜头", |
|
|
"ja": "例: 将镜头转为斜俯视 并 拉远镜头", |
|
|
"zh": "例如:将镜头转为斜俯视 并 拉远镜头", |
|
|
}, |
|
|
"extra_label": { |
|
|
"en": "Extra prompt (optional, appended at end)(English and Chinese recommended)", |
|
|
"ja": "追加プロンプト(任意・末尾に付加)(英語、中国語がおすすめ)", |
|
|
"zh": "附加提示词(可选,追加在末尾)(推荐英文和中文)", |
|
|
}, |
|
|
"extra_ph": { |
|
|
"en": "e.g., Subject is a girl", |
|
|
"ja": "例: 被摄体是一名女孩子", |
|
|
"zh": "例如:被摄体是一名女孩子", |
|
|
}, |
|
|
"accordion": {"en": "Advanced settings", "ja": "詳細設定", "zh": "高级设置"}, |
|
|
"seed": {"en": "Seed", "ja": "Seed", "zh": "Seed"}, |
|
|
"rand": {"en": "Randomize seed", "ja": "ランダムシード", "zh": "随机种子"}, |
|
|
"tgs": {"en": "True guidance scale", "ja": "True guidance scale", "zh": "True guidance scale"}, |
|
|
"steps": {"en": "Steps", "ja": "生成ステップ数", "zh": "生成步数"}, |
|
|
"run": {"en": "Generate", "ja": "生成", "zh": "生成"}, |
|
|
"output": {"en": "Output image", "ja": "出力画像", "zh": "输出图像"}, |
|
|
"status": {"en": "Status", "ja": "ステータス", "zh": "状态"}, |
|
|
"status_ok": { |
|
|
"en": "Generated 1 image (PNG).", |
|
|
"ja": "1枚生成しました(PNG)。", |
|
|
"zh": "已生成 1 张图片(PNG)。", |
|
|
}, |
|
|
"err_no_img": { |
|
|
"en": "Error: Please upload an input image.", |
|
|
"ja": "エラー: 入力画像をアップロードしてください", |
|
|
"zh": "错误:请先上传输入图像。", |
|
|
}, |
|
|
"err_no_custom": { |
|
|
"en": "Error: Please enter a custom prompt.", |
|
|
"ja": "エラー: 自由入力のプロンプトを入力してください", |
|
|
"zh": "错误:请输入自定义提示词。", |
|
|
}, |
|
|
"lang_label": {"en": "UI Language", "ja": "UI言語", "zh": "界面语言"}, |
|
|
} |
|
|
|
|
|
def t(key, lang): |
|
|
return I18N[key][lang] |
|
|
|
|
|
def build_dropdown_choices(lang): |
|
|
|
|
|
if lang not in ("en", "ja", "zh"): |
|
|
lang = "en" |
|
|
ch = [] |
|
|
for item in CAMERA_OPTIONS: |
|
|
label = item[lang] |
|
|
ch.append((label, item["cn"])) |
|
|
ch.append((CUSTOM_LABELS[lang], CUSTOM_OPTION_VALUE)) |
|
|
return ch |
|
|
|
|
|
def _append_prompt(base: str, extra: str) -> str: |
|
|
extra = (extra or "").strip() |
|
|
return (base if not extra else f"{base} {extra}").strip() |
|
|
|
|
|
def generate_single_view(input_images, prompt, seed, num_inference_steps, true_guidance_scale): |
|
|
generator = torch.Generator(device=device).manual_seed(seed) |
|
|
result = pipe( |
|
|
image=input_images if input_images else None, |
|
|
prompt=prompt, |
|
|
negative_prompt=" ", |
|
|
num_inference_steps=num_inference_steps, |
|
|
generator=generator, |
|
|
true_cfg_scale=true_guidance_scale, |
|
|
num_images_per_prompt=1, |
|
|
).images |
|
|
return result[0] |
|
|
|
|
|
@spaces.GPU() |
|
|
def generate_from_dropdown( |
|
|
image, |
|
|
dropdown_value_cn, |
|
|
custom_cn, |
|
|
extra_prompt="", |
|
|
seed=DEFAULT_SEED, |
|
|
randomize_seed=DEFAULT_RANDOMIZE, |
|
|
true_guidance_scale=DEFAULT_TRUE_GUIDANCE_SCALE, |
|
|
num_inference_steps=DEFAULT_NUM_INFERENCE_STEPS, |
|
|
lang="en", |
|
|
progress=gr.Progress(track_tqdm=True), |
|
|
): |
|
|
if randomize_seed: |
|
|
seed = random.randint(0, MAX_SEED) |
|
|
|
|
|
if image is None: |
|
|
return None, t("err_no_img", lang) |
|
|
|
|
|
if isinstance(image, Image.Image): |
|
|
input_image = image.convert("RGB") |
|
|
else: |
|
|
input_image = Image.open(image).convert("RGB") |
|
|
|
|
|
pil_images = [input_image] |
|
|
|
|
|
if dropdown_value_cn == CUSTOM_OPTION_VALUE: |
|
|
base_cn = (custom_cn or "").strip() |
|
|
if not base_cn: |
|
|
return None, t("err_no_custom", lang) |
|
|
else: |
|
|
base_cn = dropdown_value_cn or CAMERA_OPTIONS[0]["cn"] |
|
|
|
|
|
final_prompt = _append_prompt(base_cn, extra_prompt) |
|
|
|
|
|
progress(0.6, desc="Generating..." if lang=="en" else ("生成中..." if lang=="ja" else "生成中...")) |
|
|
out = generate_single_view(pil_images, final_prompt, seed, num_inference_steps, true_guidance_scale) |
|
|
progress(1.0, desc="Done" if lang=="en" else ("完了" if lang=="ja" else "完成")) |
|
|
|
|
|
return out, t("status_ok", lang) |
|
|
|
|
|
|
|
|
css = """ |
|
|
#app-wrap {margin: 0 auto; max-width: 1200px;} |
|
|
.notice { |
|
|
background: #fff8e1; |
|
|
border: 1px solid #facc15; |
|
|
color: #713f12; |
|
|
padding: 12px 14px; |
|
|
border-radius: 12px; |
|
|
font-weight: 600; |
|
|
line-height: 1.5; |
|
|
margin-bottom: 10px; |
|
|
} |
|
|
.card { |
|
|
background: white; |
|
|
border: 1px solid #e5e7eb; |
|
|
border-radius: 14px; |
|
|
padding: 14px; |
|
|
box-shadow: 0 1px 2px rgba(0,0,0,0.04); |
|
|
} |
|
|
.small { font-size: 12px; color: #6b7280; } |
|
|
.preview { |
|
|
background: #f9fafb; |
|
|
border: 1px dashed #cbd5e1; |
|
|
border-radius: 10px; |
|
|
padding: 8px 10px; |
|
|
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace; |
|
|
white-space: pre-wrap; |
|
|
} |
|
|
""" |
|
|
|
|
|
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: |
|
|
|
|
|
lang_selector = gr.Radio( |
|
|
label=I18N["lang_label"]["en"], |
|
|
choices=[("English", "en"), ("日本語", "ja"), ("中文", "zh")], |
|
|
value="en", |
|
|
interactive=True, |
|
|
) |
|
|
|
|
|
title_md = gr.Markdown(I18N["title"]["en"]) |
|
|
|
|
|
with gr.Column(elem_id="app-wrap"): |
|
|
notice_html = gr.HTML(f"<div class='notice'>{I18N['notice']['en']}</div>") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
input_image = gr.Image(label=I18N["input_image"]["en"], type="pil", height=420) |
|
|
|
|
|
with gr.Column(scale=1, elem_classes=["card"]): |
|
|
dropdown = gr.Dropdown( |
|
|
label=I18N["dropdown_label"]["en"], |
|
|
choices=build_dropdown_choices("en"), |
|
|
value=CAMERA_OPTIONS[0]["cn"], |
|
|
allow_custom_value=False, |
|
|
interactive=True, |
|
|
) |
|
|
|
|
|
custom_cn = gr.Textbox( |
|
|
label=I18N["custom_cn_label"]["en"], |
|
|
placeholder=I18N["custom_cn_ph"]["en"], |
|
|
visible=False, |
|
|
lines=2 |
|
|
) |
|
|
|
|
|
extra_prompt = gr.Textbox( |
|
|
label=I18N["extra_label"]["en"], |
|
|
placeholder=I18N["extra_ph"]["en"], |
|
|
lines=2 |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Accordion(I18N["accordion"]["en"], open=False) as adv_acc: |
|
|
seed = gr.Slider(label=I18N["seed"]["en"], minimum=0, maximum=MAX_SEED, step=1, value=DEFAULT_SEED) |
|
|
randomize_seed = gr.Checkbox(label=I18N["rand"]["en"], value=DEFAULT_RANDOMIZE) |
|
|
true_guidance_scale = gr.Slider(label=I18N["tgs"]["en"], minimum=1.0, maximum=10.0, step=0.1, value=DEFAULT_TRUE_GUIDANCE_SCALE) |
|
|
num_inference_steps = gr.Slider(label=I18N["steps"]["en"], minimum=1, maximum=40, step=1, value=DEFAULT_NUM_INFERENCE_STEPS) |
|
|
|
|
|
run_button = gr.Button(I18N["run"]["en"], variant="primary") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1, elem_classes=["card"]): |
|
|
result_image = gr.Image(label=I18N["output"]["en"], type="pil", format="png", height=520, show_download_button=True) |
|
|
status_text = gr.Textbox(label=I18N["status"]["en"], interactive=False) |
|
|
|
|
|
|
|
|
def _toggle_custom(v_cn, extra, custom_text): |
|
|
is_custom = (v_cn == CUSTOM_OPTION_VALUE) |
|
|
return gr.update(visible=is_custom) |
|
|
|
|
|
dropdown.change( |
|
|
fn=_toggle_custom, |
|
|
inputs=[dropdown, extra_prompt, custom_cn], |
|
|
outputs=[custom_cn] |
|
|
) |
|
|
extra_prompt.change( |
|
|
fn=_toggle_custom, |
|
|
inputs=[dropdown, extra_prompt, custom_cn], |
|
|
outputs=[custom_cn] |
|
|
) |
|
|
custom_cn.change( |
|
|
fn=_toggle_custom, |
|
|
inputs=[dropdown, extra_prompt, custom_cn], |
|
|
outputs=[custom_cn] |
|
|
) |
|
|
|
|
|
|
|
|
def _switch_lang(lang, current_dropdown_value): |
|
|
return ( |
|
|
gr.update(label=I18N["lang_label"][lang]), |
|
|
I18N["title"][lang], |
|
|
gr.update(value=f"<div class='notice'>{I18N['notice'][lang]}</div>"), |
|
|
gr.update(label=I18N["input_image"][lang]), |
|
|
gr.update(label=I18N["dropdown_label"][lang], |
|
|
choices=build_dropdown_choices(lang), |
|
|
value=current_dropdown_value if current_dropdown_value else CAMERA_OPTIONS[0]["cn"]), |
|
|
gr.update(label=I18N["custom_cn_label"][lang], placeholder=I18N["custom_cn_ph"][lang]), |
|
|
gr.update(label=I18N["extra_label"][lang], placeholder=I18N["extra_ph"][lang]), |
|
|
gr.update(label=I18N["seed"][lang]), |
|
|
gr.update(label=I18N["rand"][lang]), |
|
|
gr.update(label=I18N["tgs"][lang]), |
|
|
gr.update(label=I18N["steps"][lang]), |
|
|
gr.update(value=I18N["run"][lang]), |
|
|
gr.update(label=I18N["output"][lang]), |
|
|
gr.update(label=I18N["status"][lang]), |
|
|
) |
|
|
|
|
|
lang_selector.change( |
|
|
fn=_switch_lang, |
|
|
inputs=[lang_selector, dropdown], |
|
|
outputs=[ |
|
|
lang_selector, |
|
|
title_md, |
|
|
notice_html, |
|
|
input_image, |
|
|
dropdown, |
|
|
custom_cn, |
|
|
extra_prompt, |
|
|
seed, |
|
|
randomize_seed, |
|
|
true_guidance_scale, |
|
|
num_inference_steps, |
|
|
run_button, |
|
|
result_image, |
|
|
status_text, |
|
|
], |
|
|
) |
|
|
|
|
|
|
|
|
run_button.click( |
|
|
fn=generate_from_dropdown, |
|
|
inputs=[input_image, dropdown, custom_cn, extra_prompt, seed, randomize_seed, true_guidance_scale, num_inference_steps, lang_selector], |
|
|
outputs=[result_image, status_text], |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|