Spaces:

hongyu12321
/

RedFish

Sleeping

App Files Files Community

hongyu12321 commited on Sep 13

Commit

97ae321

verified ·

1 Parent(s): 26f7527

Update app.py

Browse files

Files changed (1) hide show

app.py +157 -135

app.py CHANGED Viewed

@@ -1,170 +1,192 @@
-# app.py
 import os
 os.environ["TRANSFORMERS_NO_TF"] = "1"
 os.environ["TRANSFORMERS_NO_FLAX"] = "1"
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 import gradio as gr
-from PIL import Image
 import numpy as np
 import torch
-from hf_model import PretrainedAgeEstimator
-from face_utils import FaceCropper
-# NEW: diffusers for cartoonizer
 from diffusers import StableDiffusionImg2ImgPipeline
-# ---------- Load models once ----------
-est = PretrainedAgeEstimator()
-cropper = FaceCropper(device=est.device)
-# A solid, public SD 1.5 img2img pipeline; fast and reliable
 SD15_ID = "runwayml/stable-diffusion-v1-5"
-sd_pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
-    SD15_ID,
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-    safety_checker=None,   # rely on prompts; HF Spaces also has a global filter
-).to(est.device)
-# ---------- Helpers ----------
 def _ensure_pil(img):
-    if isinstance(img, Image.Image):
-        return img
-    return Image.fromarray(img)
-# ----- Age: single image -----
-def predict_single(img, auto_crop=True, topk=5, show_annot=True):
     if img is None:
-        return {}, "No image provided.", None
     img = _ensure_pil(img).convert("RGB")
-    preview = img
     face = None
     if auto_crop:
-        face, annotated, _ = cropper.detect_and_crop(img, select="largest")
-        preview = annotated if show_annot else img
-    target = face if face is not None else img
-    age, top = est.predict(target, topk=topk)
-    probs = {lbl: float(prob) for lbl, prob in top}
     summary = f"**Estimated age:** {age:.1f} years"
-    return probs, summary, preview
-# ----- Age: batch -----
-def predict_batch(files, auto_crop=True, topk=5):
-    if not files:
-        return "No files uploaded."
-    rows = ["| File | Estimated Age | Top-1 | p |", "|---|---:|---|---:|"]
-    for f in files:
-        try:
-            img = Image.open(f.name).convert("RGB")
-            face = None
-            if auto_crop:
-                face, _, _ = cropper.detect_and_crop(img, select="largest")
-            target = face if face is not None else img
-            age, top = est.predict(target, topk=topk)
-            top1_lbl, top1_p = top[0]
-            rows.append(f"| {os.path.basename(f.name)} | {age:.1f} | {top1_lbl} | {top1_p:.3f} |")
-        except Exception:
-            rows.append(f"| {os.path.basename(f.name)} | (error) | - | - |")
-    return "\n".join(rows)
-# ----- NEW: Cartoonizer (img2img) -----
-def cartoonize(img, prompt, strength=0.6, guidance=7.5, steps=25, seed=0, use_face_crop=True):
-    """
-    img: PIL or numpy
-    prompt: text description, e.g. "cute cel-shaded cartoon, soft outlines, vibrant colors"
-    strength: how much to deviate from the input (0.3 subtle → 0.8 strong)
-    guidance: prompt strength (5–12 typical)
-    steps: diffusion steps (20–40 typical)
-    seed: reproducibility (-1 for random)
-    """
-    if img is None:
-        return None
-    img = _ensure_pil(img).convert("RGB")
-    # optional crop to the largest face for better identity preservation
-    if use_face_crop:
-        face, _, _ = cropper.detect_and_crop(img, select="largest")
-        if face is not None:
-            img = face
-    # cartoon-y defaults (you can tweak in UI)
-    base_prompt = (
-        "cartoon, cel-shaded, clean lineart, smooth shading, high contrast, vibrant, studio ghibli style, "
-        "pixar style, highly detailed, 2D illustration"
-    )
-    full_prompt = f"{base_prompt}, {prompt}".strip().strip(",")
     generator = None
-    if seed and seed >= 0:
-        generator = torch.Generator(device=est.device).manual_seed(int(seed))
     out = sd_pipe(
-        prompt=full_prompt,
-        image=img,
-        strength=float(strength),
-        guidance_scale=float(guidance),
         num_inference_steps=int(steps),
         generator=generator,
     )
-    result = out.images[0]
-    return result
-# ---------- UI ----------
-with gr.Blocks(title="Pretrained Age Estimator + Cartoonizer") as demo:
-    gr.Markdown("# Pretrained Age Estimator + Cartoonizer")
-    gr.Markdown("Detects age from a face and can also generate a cartoonized image guided by your text description.")
-    with gr.Tabs():
-        with gr.Tab("Age (Single)"):
-            with gr.Row():
-                with gr.Column():
-                    inp = gr.Image(type="pil", label="Upload a face image")
-                    cam = gr.Image(sources=["webcam"], type="pil", label="Webcam (optional)")
-                    auto = gr.Checkbox(True, label="Auto face crop (MTCNN)")
-                    topk = gr.Slider(3, 9, value=5, step=1, label="Top-K age ranges")
-                    annot = gr.Checkbox(True, label="Show detection preview")
-                    btn = gr.Button("Predict Age", variant="primary")
-                with gr.Column():
-                    out_label = gr.Label(num_top_classes=5, label="Age Prediction (probabilities)")
-                    out_md = gr.Markdown(label="Summary")
-                    out_prev = gr.Image(label="Preview", visible=True)
-            def run_single(img, cam_img, auto_crop, topk_val, show_annot):
-                chosen = cam_img if cam_img is not None else img
-                return predict_single(chosen, auto_crop, int(topk_val), show_annot)
-            btn.click(fn=run_single, inputs=[inp, cam, auto, topk, annot],
-                      outputs=[out_label, out_md, out_prev])
-        with gr.Tab("Age (Batch)"):
-            files = gr.Files(label="Upload multiple images")
-            auto_b = gr.Checkbox(True, label="Auto face crop (MTCNN)")
-            topk_b = gr.Slider(3, 9, value=5, step=1, label="Top-K age ranges")
-            btn_b = gr.Button("Run batch")
-            out_table = gr.Markdown()
-            btn_b.click(fn=predict_batch, inputs=[files, auto_b, topk_b], outputs=out_table)
-        with gr.Tab("Cartoonizer"):
-            src = gr.Image(type="pil", label="Source image (face or any photo)")
-            prompt = gr.Textbox(label="Your style prompt",
-                                value="cute cel-shaded cartoon, clean lines, soft colors")
             with gr.Row():
-                strength = gr.Slider(0.2, 0.95, value=0.6, step=0.05, label="Transformation strength")
-                guidance = gr.Slider(3, 15, value=7.5, step=0.5, label="Guidance scale")
                 steps = gr.Slider(10, 50, value=25, step=1, label="Steps")
-                seed = gr.Number(value=0, precision=0, label="Seed (0 or -1 = random)")
-            use_crop = gr.Checkbox(True, label="Crop to largest face before stylizing")
-            btn_c = gr.Button("Generate Cartoon", variant="primary")
-            out_img = gr.Image(label="Cartoon result")
-            btn_c.click(fn=cartoonize,
-                        inputs=[src, prompt, strength, guidance, steps, seed, use_crop],
-                        outputs=out_img)
 if __name__ == "__main__":
     demo.launch()

+# app.py — One-page Age + Cartoon app (no extra modules needed)
+# Quiet TF/Flax logs (PyTorch-only)
 import os
 os.environ["TRANSFORMERS_NO_TF"] = "1"
 os.environ["TRANSFORMERS_NO_FLAX"] = "1"
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 import gradio as gr
+from PIL import Image, ImageDraw
 import numpy as np
 import torch
+# ---------------------------
+# 1) Pretrained Age Estimator
+# ---------------------------
+from transformers import AutoImageProcessor, AutoModelForImageClassification
+HF_MODEL_ID = "nateraw/vit-age-classifier"
+AGE_RANGE_TO_MID = {
+    "0-2": 1, "3-9": 6, "10-19": 15, "20-29": 25, "30-39": 35,
+    "40-49": 45, "50-59": 55, "60-69": 65, "70+": 75
+}
+class PretrainedAgeEstimator:
+    def __init__(self, model_id: str = HF_MODEL_ID, device: str | None = None):
+        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        self.processor = AutoImageProcessor.from_pretrained(model_id, use_fast=True)
+        self.model = AutoModelForImageClassification.from_pretrained(model_id)
+        self.model.to(self.device).eval()
+        self.id2label = self.model.config.id2label
+    @torch.inference_mode()
+    def predict(self, img: Image.Image, topk: int = 5):
+        if img.mode != "RGB":
+            img = img.convert("RGB")
+        inputs = self.processor(images=img, return_tensors="pt").to(self.device)
+        logits = self.model(**inputs).logits
+        probs = logits.softmax(dim=-1).squeeze(0)
+        k = min(topk, probs.numel())
+        values, indices = torch.topk(probs, k=k)
+        top = [(self.id2label[i.item()], float(v.item())) for i, v in zip(indices, values)]
+        expected = sum(AGE_RANGE_TO_MID.get(self.id2label[i], 35) * float(p)
+                       for i, p in enumerate(probs))
+        return expected, top
+# ---------------------------
+# 2) Face detector / cropper (MTCNN)
+# ---------------------------
+from facenet_pytorch import MTCNN
+class FaceCropper:
+    """Detect faces and return (cropped_face, annotated_image)."""
+    def __init__(self, device: str | None = None):
+        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        self.mtcnn = MTCNN(keep_all=True, device=self.device)
+    def _ensure_pil(self, img):
+        if isinstance(img, Image.Image):
+            return img.convert("RGB")
+        return Image.fromarray(img).convert("RGB")
+    def detect_and_crop(self, img, select="largest"):
+        pil = self._ensure_pil(img)
+        boxes, probs = self.mtcnn.detect(pil)
+        annotated = pil.copy()
+        draw = ImageDraw.Draw(annotated)
+        if boxes is None or len(boxes) == 0:
+            return None, annotated
+        # draw boxes
+        for b, p in zip(boxes, probs):
+            x1, y1, x2, y2 = map(float, b)
+            draw.rectangle([x1, y1, x2, y2], outline=(255, 0, 0), width=3)
+            draw.text((x1, max(0, y1-12)), f"{p:.2f}", fill=(255, 0, 0))
+        # choose largest by area
+        idx = int(np.argmax([(b[2]-b[0])*(b[3]-b[1]) for b in boxes]))
+        if isinstance(select, int) and 0 <= select < len(boxes):
+            idx = select
+        x1, y1, x2, y2 = boxes[idx].astype(int)
+        face = pil.crop((x1, y1, x2, y2))
+        return face, annotated
+# ---------------------------
+# 3) Cartoonizer (Stable Diffusion img2img)
+# ---------------------------
 from diffusers import StableDiffusionImg2ImgPipeline
 SD15_ID = "runwayml/stable-diffusion-v1-5"
+def load_sd_pipe(device):
+    dtype = torch.float16 if (device == "cuda") else torch.float32
+    pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
+        SD15_ID,
+        torch_dtype=dtype,
+        safety_checker=None,   # rely on prompts; HF has global content filters
+    )
+    return pipe.to(device)
+# ---------------------------
+# 4) Initialize models once
+# ---------------------------
+age_est = PretrainedAgeEstimator()
+cropper = FaceCropper(device=age_est.device)
+sd_pipe = load_sd_pipe(age_est.device)
+# ---------------------------
+# 5) App logic (one click does both)
+# ---------------------------
+DEFAULT_PROMPT = (
+    "cartoon, cel-shaded, clean lineart, smooth shading, vibrant colors, "
+    "studio ghibli style, pixar style, 2D illustration, high quality"
+)
 def _ensure_pil(img):
+    return img if isinstance(img, Image.Image) else Image.fromarray(img)
+@torch.inference_mode()
+def run_all(img, prompt, auto_crop=True, strength=0.6, guidance=7.5, steps=25, seed=-1):
     if img is None:
+        return {}, "Please upload an image.", None
     img = _ensure_pil(img).convert("RGB")
+    # ---- choose region for both age + cartoon ----
     face = None
+    annotated = None
     if auto_crop:
+        face, annotated = cropper.detect_and_crop(img, select="largest")
+    target_for_age = face if face is not None else img
+    # Age prediction
+    age, top = age_est.predict(target_for_age, topk=5)
+    probs = {lbl: float(p) for lbl, p in top}
     summary = f"**Estimated age:** {age:.1f} years"
+    # Cartoon generation
+    txt = (prompt or "").strip()
+    if not txt:
+        txt = DEFAULT_PROMPT
+    else:
+        txt = f"{DEFAULT_PROMPT}, {txt}"
     generator = None
+    if isinstance(seed, (int, float)) and int(seed) >= 0:
+        generator = torch.Generator(device=age_est.device).manual_seed(int(seed))
+    base_img = face if face is not None else img
     out = sd_pipe(
+        prompt=txt,
+        image=base_img,
+        strength=float(strength),         # 0.3 subtle → 0.8 strong
+        guidance_scale=float(guidance),   # 5–12 typical
         num_inference_steps=int(steps),
         generator=generator,
     )
+    cartoon = out.images[0]
+    return probs, summary, cartoon
+# ---------------------------
+# 6) Gradio UI (single page)
+# ---------------------------
+with gr.Blocks(title="Age + Cartoon (One Page)") as demo:
+    gr.Markdown("# Age Estimator + Cartoonizer")
+    gr.Markdown("Upload or capture once — get **age prediction** and a **cartoon** of the same image.")
+    with gr.Row():
+        with gr.Column(scale=1):
+            img_in = gr.Image(sources=["upload", "webcam"], type="pil",
+                              label="Upload / Webcam")
+            prompt = gr.Textbox(label="(Optional) Extra cartoon style",
+                                placeholder="e.g., comic-book halftone, bold lines, neon palette")
+            auto = gr.Checkbox(True, label="Auto face crop (recommended)")
             with gr.Row():
+                strength = gr.Slider(0.2, 0.95, value=0.6, step=0.05, label="Cartoon strength")
+                guidance = gr.Slider(3, 15, value=7.5, step=0.5, label="Guidance")
                 steps = gr.Slider(10, 50, value=25, step=1, label="Steps")
+                seed = gr.Number(value=-1, precision=0, label="Seed (-1 = random)")
+            go = gr.Button("Predict Age + Generate Cartoon", variant="primary", size="lg")
+        with gr.Column(scale=1):
+            probs_out = gr.Label(num_top_classes=5, label="Age Prediction (probabilities)")
+            age_md = gr.Markdown(label="Age Summary")
+            cartoon_out = gr.Image(label="Cartoon Result")
+    go.click(fn=run_all,
+             inputs=[img_in, prompt, auto, strength, guidance, steps, seed],
+             outputs=[probs_out, age_md, cartoon_out])
 if __name__ == "__main__":
     demo.launch()