Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,7 +8,7 @@ from omegaconf import OmegaConf
|
|
| 8 |
from huggingface_hub import snapshot_download
|
| 9 |
|
| 10 |
import torch
|
| 11 |
-
from transformers import T5EncoderModel, AutoTokenizer
|
| 12 |
|
| 13 |
from pixelflow.scheduling_pixelflow import PixelFlowScheduler
|
| 14 |
from pixelflow.pipeline_pixelflow import PixelFlowPipeline
|
|
@@ -22,12 +22,12 @@ parser.add_argument('--class_cond', action='store_true', help='use class conditi
|
|
| 22 |
args = parser.parse_args()
|
| 23 |
|
| 24 |
# deploy
|
| 25 |
-
args.checkpoint = "
|
| 26 |
-
args.class_cond =
|
| 27 |
|
| 28 |
-
output_dir = args.checkpoint
|
| 29 |
|
| 30 |
if args.class_cond:
|
|
|
|
| 31 |
if not os.path.exists(output_dir):
|
| 32 |
snapshot_download(repo_id="ShoufaChen/PixelFlow-Class2Image", local_dir=output_dir)
|
| 33 |
config = OmegaConf.load(f"{output_dir}/config.yaml")
|
|
@@ -39,13 +39,12 @@ if args.class_cond:
|
|
| 39 |
resolution = 256
|
| 40 |
NUM_EXAMPLES = 4
|
| 41 |
else:
|
| 42 |
-
|
| 43 |
-
snapshot_download(repo_id="ShoufaChen/PixelFlow-Text2Image", local_dir=output_dir)
|
| 44 |
config = OmegaConf.load(f"{output_dir}/config.yaml")
|
| 45 |
-
model = config_utils.instantiate_from_config(config.model)
|
| 46 |
print(f"Num of parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")
|
| 47 |
ckpt = torch.load(f"{output_dir}/model.pt", map_location="cpu", weights_only=True)
|
| 48 |
-
text_encoder = T5EncoderModel.from_pretrained("google/flan-t5-xl")
|
| 49 |
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xl")
|
| 50 |
resolution = 1024
|
| 51 |
NUM_EXAMPLES = 1
|
|
@@ -55,7 +54,6 @@ model.eval()
|
|
| 55 |
print(f"outside space.GPU. {torch.cuda.is_available()=}")
|
| 56 |
if torch.cuda.is_available():
|
| 57 |
model = model.cuda()
|
| 58 |
-
text_encoder = text_encoder.cuda() if text_encoder else None
|
| 59 |
device = torch.device("cuda")
|
| 60 |
else:
|
| 61 |
raise ValueError("No GPU")
|
|
@@ -70,8 +68,8 @@ pipeline = PixelFlowPipeline(
|
|
| 70 |
max_token_length=512,
|
| 71 |
)
|
| 72 |
|
| 73 |
-
@spaces.GPU
|
| 74 |
-
def infer(noise_shift, cfg_scale, class_label, seed, *num_steps_per_stage):
|
| 75 |
print(f"inside space.GPU. {torch.cuda.is_available()=}")
|
| 76 |
seed_everything(seed)
|
| 77 |
with torch.autocast("cuda", dtype=torch.bfloat16), torch.no_grad():
|
|
@@ -83,7 +81,7 @@ def infer(noise_shift, cfg_scale, class_label, seed, *num_steps_per_stage):
|
|
| 83 |
guidance_scale=cfg_scale, # The guidance for the first frame, set it to 7 for 384p variant
|
| 84 |
device=device,
|
| 85 |
shift=noise_shift,
|
| 86 |
-
use_ode_dopri5=
|
| 87 |
)
|
| 88 |
samples = (samples * 255).round().astype("uint8")
|
| 89 |
samples = [Image.fromarray(sample) for sample in samples]
|
|
@@ -108,8 +106,8 @@ with gr.Blocks(css=css) as demo:
|
|
| 108 |
gr.Markdown("# PixelFlow: Pixel-Space Generative Models with Flow")
|
| 109 |
gr.HTML("""
|
| 110 |
<div class="follow-link">
|
| 111 |
-
For online
|
| 112 |
-
<a href="https://huggingface.co/spaces/ShoufaChen/PixelFlow-
|
| 113 |
For more details, refer to our
|
| 114 |
<a href="https://arxiv.org/abs/2504.07963">arXiv paper</a> and <a href="https://github.com/ShoufaChen/PixelFlow">GitHub repo</a>.
|
| 115 |
</div>
|
|
@@ -129,6 +127,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 129 |
else:
|
| 130 |
# text input
|
| 131 |
user_input = gr.Textbox(label='Enter your prompt', show_label=False, max_lines=1, placeholder="Enter your prompt",)
|
|
|
|
| 132 |
noise_shift = gr.Slider(minimum=1.0, maximum=100.0, step=1, value=1.0, label='Noise Shift')
|
| 133 |
cfg_scale = gr.Slider(minimum=1, maximum=25, step=0.1, value=4.0, label='Classifier-free Guidance Scale')
|
| 134 |
num_steps_per_stage = []
|
|
@@ -139,6 +138,6 @@ with gr.Blocks(css=css) as demo:
|
|
| 139 |
button = gr.Button("Generate", variant="primary")
|
| 140 |
with gr.Column():
|
| 141 |
output = gr.Gallery(label='Generated Images', height=700)
|
| 142 |
-
button.click(infer, inputs=[noise_shift, cfg_scale, user_input, seed, *num_steps_per_stage], outputs=[output])
|
| 143 |
demo.queue()
|
| 144 |
-
demo.launch(share=True, debug=True)
|
|
|
|
| 8 |
from huggingface_hub import snapshot_download
|
| 9 |
|
| 10 |
import torch
|
| 11 |
+
# from transformers import T5EncoderModel, AutoTokenizer
|
| 12 |
|
| 13 |
from pixelflow.scheduling_pixelflow import PixelFlowScheduler
|
| 14 |
from pixelflow.pipeline_pixelflow import PixelFlowPipeline
|
|
|
|
| 22 |
args = parser.parse_args()
|
| 23 |
|
| 24 |
# deploy
|
| 25 |
+
args.checkpoint = "pixelflow_c2i"
|
| 26 |
+
args.class_cond = True
|
| 27 |
|
|
|
|
| 28 |
|
| 29 |
if args.class_cond:
|
| 30 |
+
output_dir = args.checkpoint
|
| 31 |
if not os.path.exists(output_dir):
|
| 32 |
snapshot_download(repo_id="ShoufaChen/PixelFlow-Class2Image", local_dir=output_dir)
|
| 33 |
config = OmegaConf.load(f"{output_dir}/config.yaml")
|
|
|
|
| 39 |
resolution = 256
|
| 40 |
NUM_EXAMPLES = 4
|
| 41 |
else:
|
| 42 |
+
raise NotImplementedError("Please run locally.")
|
|
|
|
| 43 |
config = OmegaConf.load(f"{output_dir}/config.yaml")
|
| 44 |
+
model = config_utils.instantiate_from_config(config.model).to(device)
|
| 45 |
print(f"Num of parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")
|
| 46 |
ckpt = torch.load(f"{output_dir}/model.pt", map_location="cpu", weights_only=True)
|
| 47 |
+
text_encoder = T5EncoderModel.from_pretrained("google/flan-t5-xl").to(device)
|
| 48 |
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xl")
|
| 49 |
resolution = 1024
|
| 50 |
NUM_EXAMPLES = 1
|
|
|
|
| 54 |
print(f"outside space.GPU. {torch.cuda.is_available()=}")
|
| 55 |
if torch.cuda.is_available():
|
| 56 |
model = model.cuda()
|
|
|
|
| 57 |
device = torch.device("cuda")
|
| 58 |
else:
|
| 59 |
raise ValueError("No GPU")
|
|
|
|
| 68 |
max_token_length=512,
|
| 69 |
)
|
| 70 |
|
| 71 |
+
@spaces.GPU
|
| 72 |
+
def infer(use_ode_dopri5, noise_shift, cfg_scale, class_label, seed, *num_steps_per_stage):
|
| 73 |
print(f"inside space.GPU. {torch.cuda.is_available()=}")
|
| 74 |
seed_everything(seed)
|
| 75 |
with torch.autocast("cuda", dtype=torch.bfloat16), torch.no_grad():
|
|
|
|
| 81 |
guidance_scale=cfg_scale, # The guidance for the first frame, set it to 7 for 384p variant
|
| 82 |
device=device,
|
| 83 |
shift=noise_shift,
|
| 84 |
+
use_ode_dopri5=use_ode_dopri5,
|
| 85 |
)
|
| 86 |
samples = (samples * 255).round().astype("uint8")
|
| 87 |
samples = [Image.fromarray(sample) for sample in samples]
|
|
|
|
| 106 |
gr.Markdown("# PixelFlow: Pixel-Space Generative Models with Flow")
|
| 107 |
gr.HTML("""
|
| 108 |
<div class="follow-link">
|
| 109 |
+
For online text-to-image generation, please try
|
| 110 |
+
<a href="https://huggingface.co/spaces/ShoufaChen/PixelFlow-Text2Image">text-to-image</a>.
|
| 111 |
For more details, refer to our
|
| 112 |
<a href="https://arxiv.org/abs/2504.07963">arXiv paper</a> and <a href="https://github.com/ShoufaChen/PixelFlow">GitHub repo</a>.
|
| 113 |
</div>
|
|
|
|
| 127 |
else:
|
| 128 |
# text input
|
| 129 |
user_input = gr.Textbox(label='Enter your prompt', show_label=False, max_lines=1, placeholder="Enter your prompt",)
|
| 130 |
+
ode_dopri5 = gr.Checkbox(label="Dopri5 ODE", info="Use Dopri5 ODE solver")
|
| 131 |
noise_shift = gr.Slider(minimum=1.0, maximum=100.0, step=1, value=1.0, label='Noise Shift')
|
| 132 |
cfg_scale = gr.Slider(minimum=1, maximum=25, step=0.1, value=4.0, label='Classifier-free Guidance Scale')
|
| 133 |
num_steps_per_stage = []
|
|
|
|
| 138 |
button = gr.Button("Generate", variant="primary")
|
| 139 |
with gr.Column():
|
| 140 |
output = gr.Gallery(label='Generated Images', height=700)
|
| 141 |
+
button.click(infer, inputs=[ode_dopri5, noise_shift, cfg_scale, user_input, seed, *num_steps_per_stage], outputs=[output])
|
| 142 |
demo.queue()
|
| 143 |
+
demo.launch(share=True, debug=True)
|