Spaces:

hongyu12321
/

RedFish

Sleeping

App Files Files Community

hongyu12321 commited on Sep 13

Commit

aec1787

verified ·

1 Parent(s): 7093eab

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -80

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py — Age-first + FAST cartoon (Turbo) with prompt hint pickers (largest face only)
 import os
 os.environ["TRANSFORMERS_NO_TF"] = "1"
@@ -42,12 +42,12 @@ class PretrainedAgeEstimator:
                        for i, p in enumerate(probs))
         return expected, top
-# ------------------ Face detection with WIDER crop (largest face) ------------------
 from facenet_pytorch import MTCNN
 class FaceCropper:
-    """Detect faces; return (cropped_wide, annotated). Adds margin so face isn't full screen."""
-    def __init__(self, device: Optional[str] = None, margin_scale: float = 1.8):
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self.mtcnn = MTCNN(keep_all=True, device=self.device)
         self.margin_scale = margin_scale
@@ -64,9 +64,8 @@ class FaceCropper:
         annotated = pil.copy()
         draw = ImageDraw.Draw(annotated)
         if boxes is None or len(boxes) == 0:
-            return None, annotated  # no faces
         # draw all boxes
         for b, p in zip(boxes, probs):
@@ -74,10 +73,10 @@ class FaceCropper:
             draw.rectangle([bx1, by1, bx2, by2], outline=(255, 0, 0), width=3)
             draw.text((bx1, max(0, by1-12)), f"{p:.2f}", fill=(255, 0, 0))
-        # choose largest face
         idx = int(np.argmax([(b[2]-b[0])*(b[3]-b[1]) for b in boxes]))
         x1, y1, x2, y2 = boxes[idx]
-        # expand with margin (4:5 portrait feel)
         cx, cy = (x1 + x2) / 2.0, (y1 + y2) / 2.0
         w, h = (x2 - x1), (y2 - y1)
         side = max(w, h) * self.margin_scale
@@ -92,21 +91,19 @@ class FaceCropper:
         crop = pil.crop((nx1, ny1, nx2, ny2))
         return crop, annotated
-# ------------------ FAST Cartoonizer (SD-Turbo) with safety ------------------
 from diffusers import AutoPipelineForImage2Image
 from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
 from transformers import AutoFeatureExtractor
-# Turbo is very fast (1–4 steps). Great for stylization on CPU/GPU.
 TURBO_ID = "stabilityai/sd-turbo"
 def load_turbo_pipe(device):
-    dtype = torch.float16 if (device == "cuda") else torch.float32
     pipe = AutoPipelineForImage2Image.from_pretrained(
         TURBO_ID,
-        dtype=dtype,           # ✅ use dtype (no deprecation warning)
-    )
-    pipe = pipe.to(device)
     # Safety checker ON for public Spaces
     pipe.safety_checker = StableDiffusionSafetyChecker.from_pretrained(
         "CompVis/stable-diffusion-safety-checker"
@@ -122,10 +119,10 @@ def load_turbo_pipe(device):
 # ------------------ Init models once ------------------
 age_est = PretrainedAgeEstimator()
-cropper = FaceCropper(device=age_est.device, margin_scale=1.85)  # 1.6–2.0 feels good
 sd_pipe = load_turbo_pipe(age_est.device)
-# ------------------ Prompt hint dictionaries ------------------
 ROLE_CHOICES = [
     "Queen/Princess", "King/Prince", "Fairy", "Elf", "Knight", "Sorcerer/Sorceress",
     "Steampunk Royalty", "Cyberpunk Royalty", "Superhero", "Anime Protagonist"
@@ -157,8 +154,7 @@ EFFECTS_CHOICES = [
 ]
 NEGATIVE_PROMPT = (
-    "deformed, disfigured, ugly, extra limbs, extra fingers, bad anatomy, low quality, "
-    "blurry, watermark, text, logo"
 )
 # ------------------ Helpers ------------------
@@ -166,7 +162,6 @@ def _ensure_pil(img):
     return img if isinstance(img, Image.Image) else Image.fromarray(img)
 def _resize_512(im: Image.Image):
-    # keep aspect, fit longest side to 512 (faster, fewer artifacts)
     w, h = im.size
     scale = 512 / max(w, h)
     if scale < 1.0:
@@ -174,8 +169,16 @@ def _resize_512(im: Image.Image):
     return im
 def build_prompt(role, background, lighting, artstyle, colors, outfit, effects, extra):
-    bits = []
-    # role to base descriptors
     role_map = {
         "Queen/Princess": "regal queen/princess portrait",
         "King/Prince": "regal king/prince portrait",
@@ -186,61 +189,51 @@ def build_prompt(role, background, lighting, artstyle, colors, outfit, effects,
         "Steampunk Royalty": "steampunk royal portrait with brass filigree",
         "Cyberpunk Royalty": "cyberpunk royal portrait with neon accents",
         "Superhero": "heroic comic-style portrait",
-        "Anime Protagonist": "anime protagonist portrait"
     }
-    if role:
-        bits.append(role_map.get(role, role))
-    # the hint pickers
     for group in (background, lighting, artstyle, colors, outfit, effects):
         if group and isinstance(group, list):
-            bits.append(", ".join(group))
-    # strong general quality/style anchors
-    bits.append("clean lineart, storybook illustration, high quality")
-    # extra user text
     extra = (extra or "").strip()
     if extra:
-        bits.append(extra)
-    # join
-    return ", ".join([b for b in bits if b])
-# ------------------ 1) Predict Age (fast, largest face) ------------------
 @torch.inference_mode()
 def predict_age_only(img, auto_crop=True):
     if img is None:
         return {}, "Please upload an image.", None
-    img = _ensure_pil(img).convert("RGB")
-    face_wide = None
-    annotated = None
     if auto_crop:
-        face_wide, annotated = cropper.detect_and_crop_wide(img)
-    target = face_wide if face_wide is not None else img
     age, top = age_est.predict(target, topk=5)
     probs = {lbl: float(p) for lbl, p in top}
     summary = f"**Estimated age:** {age:.1f} years"
-    return probs, summary, (annotated if annotated is not None else img)
-# ------------------ 2) Generate Cartoon (fast, largest face) ------------------
 @torch.inference_mode()
 def generate_cartoon(img, role, background, lighting, artstyle, colors, outfit, effects,
                      extra_desc, auto_crop=True, strength=0.5, steps=2, seed=-1):
     if img is None:
         return None
-    img = _ensure_pil(img).convert("RGB")
     if auto_crop:
-        face_wide, _ = cropper.detect_and_crop_wide(img)
         if face_wide is not None:
-            img = face_wide
-    img = _resize_512(img)
-    # prompt assembly from pickers
     prompt = build_prompt(role, background, lighting, artstyle, colors, outfit, effects, extra_desc)
     generator = None
@@ -250,54 +243,51 @@ def generate_cartoon(img, role, background, lighting, artstyle, colors, outfit,
     out = sd_pipe(
         prompt=prompt,
         negative_prompt=NEGATIVE_PROMPT,
-        image=img,
-        strength=float(strength),          # 0.4–0.6 keeps identity & adds dress/background
-        guidance_scale=0.0,                # Turbo commonly uses 0
-        num_inference_steps=int(steps),    # 1–4 steps → very fast
         generator=generator,
     )
     return out.images[0]
-# ------------------ UI ------------------
-with gr.Blocks(title="Age First + Fast Cartoon (with Hint Pickers)") as demo:
-    gr.Markdown("# Upload or capture once — get age prediction first, then a beautiful cartoon ✨")
-    gr.Markdown("Largest face is used if multiple people are present.")
     with gr.Row():
         with gr.Column(scale=1):
             img_in = gr.Image(sources=["upload", "webcam"], type="pil", label="Upload / Webcam")
-            auto = gr.Checkbox(True, label="Auto face crop (wide, recommended)")
-            # --- Age first
-            btn_age = gr.Button("Predict Age (fast)", variant="primary")
-            gr.Markdown("### Cartoon Description Hints")
-            role = gr.Dropdown(choices=ROLE_CHOICES, value="Queen/Princess", label="Role")
-            background = gr.CheckboxGroup(choices=BACKGROUND_CHOICES, label="Background")
-            lighting = gr.CheckboxGroup(choices=LIGHTING_CHOICES, label="Lighting")
-            artstyle = gr.CheckboxGroup(choices=ARTSTYLE_CHOICES, label="Art Style")
-            colors = gr.CheckboxGroup(choices=COLOR_CHOICES, label="Color Mood")
-            outfit = gr.CheckboxGroup(choices=OUTFIT_CHOICES, label="Outfit / Accessories")
-            effects = gr.CheckboxGroup(choices=EFFECTS_CHOICES, label="Magical Effects")
-            extra = gr.Textbox(
-                label="Extra description (optional)",
-                placeholder="e.g., silver tiara, flowing gown, castle balcony at sunset"
-            )
             with gr.Row():
-                strength = gr.Slider(0.3, 0.8, value=0.5, step=0.05, label="Cartoon strength")
-                steps = gr.Slider(1, 4, value=2, step=1, label="Turbo steps (1–4)")
-                seed = gr.Number(value=-1, precision=0, label="Seed (-1 = random)")
-            btn_cartoon = gr.Button("Make Cartoon (fast)", variant="secondary")
         with gr.Column(scale=1):
-            probs_out = gr.Label(num_top_classes=5, label="Age Prediction (probabilities)")
             age_md = gr.Markdown(label="Age Summary")
             preview = gr.Image(label="Detection Preview")
             cartoon_out = gr.Image(label="Cartoon Result")
-    # Wire the buttons
     btn_age.click(fn=predict_age_only, inputs=[img_in, auto], outputs=[probs_out, age_md, preview])
     btn_cartoon.click(
         fn=generate_cartoon,
@@ -306,7 +296,7 @@ with gr.Blocks(title="Age First + Fast Cartoon (with Hint Pickers)") as demo:
         outputs=cartoon_out
     )
-# Expose app for HF Spaces
 app = demo
 if __name__ == "__main__":

+# app.py — Compact UI: Age-first + FAST cartoon (Turbo) with collapsible advanced options
 import os
 os.environ["TRANSFORMERS_NO_TF"] = "1"
                        for i, p in enumerate(probs))
         return expected, top
+# ------------------ Largest-face detector with nice margin ------------------
 from facenet_pytorch import MTCNN
 class FaceCropper:
+    """Detect faces; return (wide_crop, annotated). Largest face only; adds margin so face isn't full screen."""
+    def __init__(self, device: Optional[str] = None, margin_scale: float = 1.85):
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self.mtcnn = MTCNN(keep_all=True, device=self.device)
         self.margin_scale = margin_scale
         annotated = pil.copy()
         draw = ImageDraw.Draw(annotated)
         if boxes is None or len(boxes) == 0:
+            return None, annotated
         # draw all boxes
         for b, p in zip(boxes, probs):
             draw.rectangle([bx1, by1, bx2, by2], outline=(255, 0, 0), width=3)
             draw.text((bx1, max(0, by1-12)), f"{p:.2f}", fill=(255, 0, 0))
+        # choose largest
         idx = int(np.argmax([(b[2]-b[0])*(b[3]-b[1]) for b in boxes]))
         x1, y1, x2, y2 = boxes[idx]
+        # expand with margin (approx 4:5 portrait)
         cx, cy = (x1 + x2) / 2.0, (y1 + y2) / 2.0
         w, h = (x2 - x1), (y2 - y1)
         side = max(w, h) * self.margin_scale
         crop = pil.crop((nx1, ny1, nx2, ny2))
         return crop, annotated
+# ------------------ Fast Cartoonizer (SD-Turbo) with safety ------------------
 from diffusers import AutoPipelineForImage2Image
 from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
 from transformers import AutoFeatureExtractor
 TURBO_ID = "stabilityai/sd-turbo"
 def load_turbo_pipe(device):
+    dtype = torch.float16 if torch.cuda.is_available() else torch.float32
     pipe = AutoPipelineForImage2Image.from_pretrained(
         TURBO_ID,
+        dtype=dtype,  # ✅ no deprecation warning
+    ).to(device)
     # Safety checker ON for public Spaces
     pipe.safety_checker = StableDiffusionSafetyChecker.from_pretrained(
         "CompVis/stable-diffusion-safety-checker"
 # ------------------ Init models once ------------------
 age_est = PretrainedAgeEstimator()
+cropper = FaceCropper(device=age_est.device, margin_scale=1.85)
 sd_pipe = load_turbo_pipe(age_est.device)
+# ------------------ Hint choices (with defaults) ------------------
 ROLE_CHOICES = [
     "Queen/Princess", "King/Prince", "Fairy", "Elf", "Knight", "Sorcerer/Sorceress",
     "Steampunk Royalty", "Cyberpunk Royalty", "Superhero", "Anime Protagonist"
 ]
 NEGATIVE_PROMPT = (
+    "deformed, disfigured, ugly, extra limbs, extra fingers, bad anatomy, low quality, blurry, watermark, text, logo"
 )
 # ------------------ Helpers ------------------
     return img if isinstance(img, Image.Image) else Image.fromarray(img)
 def _resize_512(im: Image.Image):
     w, h = im.size
     scale = 512 / max(w, h)
     if scale < 1.0:
     return im
 def build_prompt(role, background, lighting, artstyle, colors, outfit, effects, extra):
+    """Defaults always exist; user selections override them."""
+    # Defaults (applied if user doesn't choose)
+    role = role or "Queen/Princess"
+    background = background or ["castle balcony at sunset"]
+    lighting = lighting or ["soft magical lighting"]
+    artstyle = artstyle or ["storybook illustration"]
+    colors = colors or ["vibrant colors"]
+    outfit = outfit or ["elegant gown", "jeweled tiara/crown"]
+    effects = effects or ["sparkles", "glowing particles"]
     role_map = {
         "Queen/Princess": "regal queen/princess portrait",
         "King/Prince": "regal king/prince portrait",
         "Steampunk Royalty": "steampunk royal portrait with brass filigree",
         "Cyberpunk Royalty": "cyberpunk royal portrait with neon accents",
         "Superhero": "heroic comic-style portrait",
+        "Anime Protagonist": "anime protagonist portrait",
     }
+    parts = [role_map.get(role, role)]
     for group in (background, lighting, artstyle, colors, outfit, effects):
         if group and isinstance(group, list):
+            parts.append(", ".join(group))
+    parts.append("clean lineart, high quality")
     extra = (extra or "").strip()
     if extra:
+        parts.append(extra)
+    return ", ".join([p for p in parts if p])
+# ------------------ Actions ------------------
 @torch.inference_mode()
 def predict_age_only(img, auto_crop=True):
     if img is None:
         return {}, "Please upload an image.", None
+    pil = _ensure_pil(img).convert("RGB")
+    face_wide, annotated = (None, None)
     if auto_crop:
+        face_wide, annotated = cropper.detect_and_crop_wide(pil)
+    target = face_wide if face_wide is not None else pil
     age, top = age_est.predict(target, topk=5)
     probs = {lbl: float(p) for lbl, p in top}
     summary = f"**Estimated age:** {age:.1f} years"
+    return probs, summary, (annotated if annotated is not None else pil)
 @torch.inference_mode()
 def generate_cartoon(img, role, background, lighting, artstyle, colors, outfit, effects,
                      extra_desc, auto_crop=True, strength=0.5, steps=2, seed=-1):
     if img is None:
         return None
+    pil = _ensure_pil(img).convert("RGB")
     if auto_crop:
+        face_wide, _ = cropper.detect_and_crop_wide(pil)
         if face_wide is not None:
+            pil = face_wide
+    pil = _resize_512(pil)
     prompt = build_prompt(role, background, lighting, artstyle, colors, outfit, effects, extra_desc)
     generator = None
     out = sd_pipe(
         prompt=prompt,
         negative_prompt=NEGATIVE_PROMPT,
+        image=pil,
+        strength=float(strength),      # 0.4–0.6 keeps identity & adds dress/background
+        guidance_scale=0.0,            # Turbo likes 0
+        num_inference_steps=int(steps),# 1–4 → fast
         generator=generator,
     )
     return out.images[0]
+# ------------------ Compact UI ------------------
+with gr.Blocks(title="Age + Cartoon (Compact)") as demo:
+    gr.Markdown("## Upload → Predict Age → Make Cartoon ✨")
+    gr.Markdown("Largest face is used if multiple people are present. Defaults are applied automatically.")
     with gr.Row():
         with gr.Column(scale=1):
             img_in = gr.Image(sources=["upload", "webcam"], type="pil", label="Upload / Webcam")
+            auto = gr.Checkbox(True, label="Auto face crop (recommended)")
+            # Buttons visible immediately (no scrolling)
             with gr.Row():
+                btn_age = gr.Button("Predict Age", variant="primary")
+                btn_cartoon = gr.Button("Make Cartoon", variant="secondary")
+            # Collapsible advanced options
+            with gr.Accordion("🎨 Advanced Cartoon Options", open=False):
+                role = gr.Dropdown(choices=ROLE_CHOICES, value="Queen/Princess", label="Role")
+                background = gr.CheckboxGroup(choices=BACKGROUND_CHOICES, value=["castle balcony at sunset"], label="Background")
+                lighting = gr.CheckboxGroup(choices=LIGHTING_CHOICES, value=["soft magical lighting"], label="Lighting")
+                artstyle = gr.CheckboxGroup(choices=ARTSTYLE_CHOICES, value=["storybook illustration"], label="Art Style")
+                colors = gr.CheckboxGroup(choices=COLOR_CHOICES, value=["vibrant colors"], label="Color Mood")
+                outfit = gr.CheckboxGroup(choices=OUTFIT_CHOICES, value=["elegant gown", "jeweled tiara/crown"], label="Outfit / Accessories")
+                effects = gr.CheckboxGroup(choices=EFFECTS_CHOICES, value=["sparkles", "glowing particles"], label="Magical Effects")
+                extra = gr.Textbox(label="Extra description (optional)", placeholder="e.g., silver tiara, flowing gown, balcony at sunset")
+                with gr.Row():
+                    strength = gr.Slider(0.3, 0.8, value=0.5, step=0.05, label="Cartoon strength")
+                    steps = gr.Slider(1, 4, value=2, step=1, label="Turbo steps (1–4)")
+                    seed = gr.Number(value=-1, precision=0, label="Seed (-1 = random)")
         with gr.Column(scale=1):
+            probs_out = gr.Label(num_top_classes=5, label="Age Prediction")
             age_md = gr.Markdown(label="Age Summary")
             preview = gr.Image(label="Detection Preview")
             cartoon_out = gr.Image(label="Cartoon Result")
+    # Wire events
     btn_age.click(fn=predict_age_only, inputs=[img_in, auto], outputs=[probs_out, age_md, preview])
     btn_cartoon.click(
         fn=generate_cartoon,
         outputs=cartoon_out
     )
+# Expose for HF Spaces
 app = demo
 if __name__ == "__main__":