Spaces:

hongyu12321
/

RedFish

Sleeping

App Files Files Community

hongyu12321 commited on Sep 13

Commit

165f68d

verified ·

1 Parent(s): 18d7038

Update app.py

Browse files

Files changed (1) hide show

app.py +123 -84

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
-# app.py — One-page Age + Cartoon app (no extra modules needed)
-# Quiet TF/Flax logs (PyTorch-only)
 import os
 os.environ["TRANSFORMERS_NO_TF"] = "1"
 os.environ["TRANSFORMERS_NO_FLAX"] = "1"
@@ -11,9 +10,7 @@ from PIL import Image, ImageDraw
 import numpy as np
 import torch
-# ---------------------------
-# 1) Pretrained Age Estimator
-# ---------------------------
 from transformers import AutoImageProcessor, AutoModelForImageClassification
 HF_MODEL_ID = "nateraw/vit-age-classifier"
@@ -44,23 +41,24 @@ class PretrainedAgeEstimator:
                        for i, p in enumerate(probs))
         return expected, top
-# ---------------------------
-# 2) Face detector / cropper (MTCNN)
-# ---------------------------
 from facenet_pytorch import MTCNN
 class FaceCropper:
-    """Detect faces and return (cropped_face, annotated_image)."""
-    def __init__(self, device: str | None = None):
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self.mtcnn = MTCNN(keep_all=True, device=self.device)
     def _ensure_pil(self, img):
         if isinstance(img, Image.Image):
             return img.convert("RGB")
         return Image.fromarray(img).convert("RGB")
-    def detect_and_crop(self, img, select="largest"):
         pil = self._ensure_pil(img)
         boxes, probs = self.mtcnn.detect(pil)
         annotated = pil.copy()
@@ -69,124 +67,165 @@ class FaceCropper:
         if boxes is None or len(boxes) == 0:
             return None, annotated
-        # draw boxes
-        for b, p in zip(boxes, probs):
-            x1, y1, x2, y2 = map(float, b)
-            draw.rectangle([x1, y1, x2, y2], outline=(255, 0, 0), width=3)
-            draw.text((x1, max(0, y1-12)), f"{p:.2f}", fill=(255, 0, 0))
-        # choose largest by area
         idx = int(np.argmax([(b[2]-b[0])*(b[3]-b[1]) for b in boxes]))
         if isinstance(select, int) and 0 <= select < len(boxes):
             idx = select
-        x1, y1, x2, y2 = boxes[idx].astype(int)
-        face = pil.crop((x1, y1, x2, y2))
-        return face, annotated
-# ---------------------------
-# 3) Cartoonizer (Stable Diffusion img2img)
-# ---------------------------
-from diffusers import StableDiffusionImg2ImgPipeline
-SD15_ID = "runwayml/stable-diffusion-v1-5"
-def load_sd_pipe(device):
     dtype = torch.float16 if (device == "cuda") else torch.float32
-    pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
-        SD15_ID,
         torch_dtype=dtype,
-        safety_checker=None,   # rely on prompts; HF has global content filters
     )
-    return pipe.to(device)
-# ---------------------------
-# 4) Initialize models once
-# ---------------------------
 age_est = PretrainedAgeEstimator()
-cropper = FaceCropper(device=age_est.device)
-sd_pipe = load_sd_pipe(age_est.device)
-# ---------------------------
-# 5) App logic (one click does both)
-# ---------------------------
-DEFAULT_PROMPT = (
-    "cartoon, cel-shaded, clean lineart, smooth shading, vibrant colors, "
-    "studio ghibli style, pixar style, 2D illustration, high quality"
 )
 def _ensure_pil(img):
     return img if isinstance(img, Image.Image) else Image.fromarray(img)
 @torch.inference_mode()
-def run_all(img, prompt, auto_crop=True, strength=0.6, guidance=7.5, steps=25, seed=-1):
     if img is None:
         return {}, "Please upload an image.", None
     img = _ensure_pil(img).convert("RGB")
-    # ---- choose region for both age + cartoon ----
-    face = None
     annotated = None
     if auto_crop:
-        face, annotated = cropper.detect_and_crop(img, select="largest")
-    target_for_age = face if face is not None else img
-    # Age prediction
-    age, top = age_est.predict(target_for_age, topk=5)
     probs = {lbl: float(p) for lbl, p in top}
     summary = f"**Estimated age:** {age:.1f} years"
-    # Cartoon generation
-    txt = (prompt or "").strip()
-    if not txt:
-        txt = DEFAULT_PROMPT
-    else:
-        txt = f"{DEFAULT_PROMPT}, {txt}"
     generator = None
     if isinstance(seed, (int, float)) and int(seed) >= 0:
         generator = torch.Generator(device=age_est.device).manual_seed(int(seed))
-    base_img = face if face is not None else img
     out = sd_pipe(
-        prompt=txt,
-        image=base_img,
-        strength=float(strength),         # 0.3 subtle → 0.8 strong
-        guidance_scale=float(guidance),   # 5–12 typical
-        num_inference_steps=int(steps),
         generator=generator,
     )
-    cartoon = out.images[0]
-    return probs, summary, cartoon
-# ---------------------------
-# 6) Gradio UI (single page)
-# ---------------------------
-with gr.Blocks(title="Age + Cartoon (One Page)") as demo:
-    gr.Markdown("# Age Estimator + Cartoonizer")
-    gr.Markdown("Upload or capture once — get **age prediction** and a **cartoon** of the same image.")
     with gr.Row():
         with gr.Column(scale=1):
-            img_in = gr.Image(sources=["upload", "webcam"], type="pil",
-                              label="Upload / Webcam")
-            prompt = gr.Textbox(label="(Optional) Extra cartoon style",
-                                placeholder="e.g., comic-book halftone, bold lines, neon palette")
-            auto = gr.Checkbox(True, label="Auto face crop (recommended)")
             with gr.Row():
-                strength = gr.Slider(0.2, 0.95, value=0.6, step=0.05, label="Cartoon strength")
-                guidance = gr.Slider(3, 15, value=7.5, step=0.5, label="Guidance")
-                steps = gr.Slider(10, 50, value=25, step=1, label="Steps")
                 seed = gr.Number(value=-1, precision=0, label="Seed (-1 = random)")
-            go = gr.Button("Predict Age + Generate Cartoon", variant="primary", size="lg")
         with gr.Column(scale=1):
             probs_out = gr.Label(num_top_classes=5, label="Age Prediction (probabilities)")
             age_md = gr.Markdown(label="Age Summary")
             cartoon_out = gr.Image(label="Cartoon Result")
-    go.click(fn=run_all,
-             inputs=[img_in, prompt, auto, strength, guidance, steps, seed],
-             outputs=[probs_out, age_md, cartoon_out])
 if __name__ == "__main__":
     demo.launch()

+# app.py — Age-first + FAST cartoon (Turbo), nicer framing & magical background
 import os
 os.environ["TRANSFORMERS_NO_TF"] = "1"
 os.environ["TRANSFORMERS_NO_FLAX"] = "1"
 import numpy as np
 import torch
+# ------------------ Age estimator (Hugging Face) ------------------
 from transformers import AutoImageProcessor, AutoModelForImageClassification
 HF_MODEL_ID = "nateraw/vit-age-classifier"
                        for i, p in enumerate(probs))
         return expected, top
+# ------------------ Face detection with WIDER crop ------------------
 from facenet_pytorch import MTCNN
 class FaceCropper:
+    """Detect faces; return (cropped_wide, annotated). Adds margin so face isn't full screen."""
+    def __init__(self, device: str | None = None, margin_scale: float = 1.8):
         self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self.mtcnn = MTCNN(keep_all=True, device=self.device)
+        self.margin_scale = margin_scale
     def _ensure_pil(self, img):
         if isinstance(img, Image.Image):
             return img.convert("RGB")
         return Image.fromarray(img).convert("RGB")
+    def detect_and_crop_wide(self, img, select="largest"):
         pil = self._ensure_pil(img)
+        W, H = pil.size
         boxes, probs = self.mtcnn.detect(pil)
         annotated = pil.copy()
         if boxes is None or len(boxes) == 0:
             return None, annotated
+        # choose largest face
         idx = int(np.argmax([(b[2]-b[0])*(b[3]-b[1]) for b in boxes]))
         if isinstance(select, int) and 0 <= select < len(boxes):
             idx = select
+        x1, y1, x2, y2 = boxes[idx]
+        # draw all boxes
+        for b, p in zip(boxes, probs):
+            bx1, by1, bx2, by2 = map(float, b)
+            draw.rectangle([bx1, by1, bx2, by2], outline=(255, 0, 0), width=3)
+            draw.text((bx1, max(0, by1-12)), f"{p:.2f}", fill=(255, 0, 0))
+        # expand with margin
+        cx, cy = (x1 + x2) / 2.0, (y1 + y2) / 2.0
+        w, h = (x2 - x1), (y2 - y1)
+        side = max(w, h) * self.margin_scale  # wider frame to include background/shoulders
+        # keep a pleasant portrait aspect (4:5)
+        target_w = side
+        target_h = side * 1.25
+        nx1 = int(max(0, cx - target_w/2))
+        nx2 = int(min(W, cx + target_w/2))
+        ny1 = int(max(0, cy - target_h/2))
+        ny2 = int(min(H, cy + target_h/2))
+        crop = pil.crop((nx1, ny1, nx2, ny2))
+        return crop, annotated
+# ------------------ FAST Cartoonizer (SD-Turbo) ------------------
+from diffusers import AutoPipelineForImage2Image
+# Turbo is very fast (1–4 steps). Great for stylization on CPU/GPU.
+TURBO_ID = "stabilityai/sd-turbo"
+def load_turbo_pipe(device):
     dtype = torch.float16 if (device == "cuda") else torch.float32
+    pipe = AutoPipelineForImage2Image.from_pretrained(
+        TURBO_ID,
         torch_dtype=dtype,
+        safety_checker=None,
     )
+    pipe = pipe.to(device)
+    try:
+        pipe.enable_attention_slicing()
+    except Exception:
+        pass
+    return pipe
+# ------------------ Init models once ------------------
 age_est = PretrainedAgeEstimator()
+cropper = FaceCropper(device=age_est.device, margin_scale=1.8)  # 1.6–2.0 feels good
+sd_pipe = load_turbo_pipe(age_est.device)
+# ------------------ Prompts ------------------
+DEFAULT_POSITIVE = (
+    "beautiful princess portrait, elegant gown, tiara, soft magical lighting, "
+    "sparkles, dreamy castle background, painterly, clean lineart, vibrant but natural colors, "
+    "storybook illustration, high quality"
+)
+DEFAULT_NEGATIVE = (
+    "deformed, disfigured, ugly, extra limbs, extra fingers, bad anatomy, low quality, "
+    "blurry, watermark, text, logo"
 )
+# ------------------ Helpers ------------------
 def _ensure_pil(img):
     return img if isinstance(img, Image.Image) else Image.fromarray(img)
+def _resize_512(im: Image.Image):
+    # keep aspect, fit longest side to 512 (faster, fewer artifacts)
+    w, h = im.size
+    scale = 512 / max(w, h)
+    if scale < 1.0:
+        im = im.resize((int(w*scale), int(h*scale)), Image.LANCZOS)
+    return im
+# ------------------ 1) Predict Age (fast) ------------------
 @torch.inference_mode()
+def predict_age_only(img, auto_crop=True):
     if img is None:
         return {}, "Please upload an image.", None
     img = _ensure_pil(img).convert("RGB")
+    face_wide = None
     annotated = None
     if auto_crop:
+        face_wide, annotated = cropper.detect_and_crop_wide(img)
+    target = face_wide if face_wide is not None else img
+    age, top = age_est.predict(target, topk=5)
     probs = {lbl: float(p) for lbl, p in top}
     summary = f"**Estimated age:** {age:.1f} years"
+    return probs, summary, (annotated if annotated is not None else img)
+# ------------------ 2) Generate Cartoon (fast) ------------------
+@torch.inference_mode()
+def generate_cartoon(img, prompt="", auto_crop=True, strength=0.5, steps=2, seed=-1):
+    if img is None:
+        return None
+    img = _ensure_pil(img).convert("RGB")
+    # use wide face crop to include background/shoulders
+    if auto_crop:
+        face_wide, _ = cropper.detect_and_crop_wide(img)
+        if face_wide is not None:
+            img = face_wide
+    img = _resize_512(img)
+    # prompt assembly
+    user = (prompt or "").strip()
+    pos = DEFAULT_POSITIVE if not user else f"{DEFAULT_POSITIVE}, {user}"
+    neg = DEFAULT_NEGATIVE
     generator = None
     if isinstance(seed, (int, float)) and int(seed) >= 0:
         generator = torch.Generator(device=age_est.device).manual_seed(int(seed))
+    # Turbo likes low steps and guidance ~0
     out = sd_pipe(
+        prompt=pos,
+        negative_prompt=neg,
+        image=img,
+        strength=float(strength),          # 0.4–0.6 keeps identity & adds dress/background
+        guidance_scale=0.0,                # Turbo typically uses 0
+        num_inference_steps=int(steps),    # 1–4 steps → very fast
         generator=generator,
     )
+    return out.images[0]
+# ------------------ UI ------------------
+with gr.Blocks(title="Age First + Fast Cartoon") as demo:
+    gr.Markdown("# Upload or capture once — get age prediction first, then a faster cartoon ✨")
     with gr.Row():
         with gr.Column(scale=1):
+            img_in = gr.Image(sources=["upload", "webcam"], type="pil", label="Upload / Webcam")
+            auto = gr.Checkbox(True, label="Auto face crop (wide, recommended)")
+            prompt = gr.Textbox(
+                label="(Optional) Extra cartoon style",
+                placeholder="e.g., studio ghibli watercolor, soft bokeh, pastel palette"
+            )
             with gr.Row():
+                strength = gr.Slider(0.3, 0.8, value=0.5, step=0.05, label="Cartoon strength")
+                steps = gr.Slider(1, 4, value=2, step=1, label="Turbo steps (1–4)")
                 seed = gr.Number(value=-1, precision=0, label="Seed (-1 = random)")
+            btn_age = gr.Button("Predict Age (fast)", variant="primary")
+            btn_cartoon = gr.Button("Make Cartoon (fast)", variant="secondary")
         with gr.Column(scale=1):
             probs_out = gr.Label(num_top_classes=5, label="Age Prediction (probabilities)")
             age_md = gr.Markdown(label="Age Summary")
+            preview = gr.Image(label="Detection Preview")
             cartoon_out = gr.Image(label="Cartoon Result")
+    # Wire the buttons
+    btn_age.click(fn=predict_age_only, inputs=[img_in, auto], outputs=[probs_out, age_md, preview])
+    btn_cartoon.click(fn=generate_cartoon, inputs=[img_in, prompt, auto, strength, steps, seed], outputs=cartoon_out)
 if __name__ == "__main__":
     demo.launch()