Spaces:

Tongyi-MAI
/

Z-Image-Turbo

Running on Zero

App Files Files Community

Cxxs commited on 8 days ago

Commit

e157e7f

1 Parent(s): 30c2304

add input check

Browse files

Files changed (2) hide show

app.py +45 -32
prompt_check.py +35 -0

app.py CHANGED Viewed

@@ -12,7 +12,9 @@ import spaces
 import torch
 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
 from PIL import Image
-from transformers import AutoModel, AutoTokenizer
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
@@ -126,7 +128,7 @@ def load_models(model_path, enable_compile=False, attention_backend="native"):
             os.path.join(model_path, "vae"), torch_dtype=torch.bfloat16, device_map="cuda"
         )
-        text_encoder = AutoModel.from_pretrained(
             os.path.join(model_path, "text_encoder"),
             torch_dtype=torch.bfloat16,
             device_map="cuda",
@@ -402,42 +404,53 @@ def generate(
             - seed_str: String representation of the seed used for generation
             - seed_int: Integer representation of the seed used for generation
     """
-    if pipe is None:
-        raise gr.Error("Model not loaded.")
-    final_prompt = prompt
-    if enhance:
-        final_prompt, _ = prompt_enhance(prompt, True)
-        print(f"Enhanced prompt: {final_prompt}")
-    if random_seed:
-        new_seed = random.randint(1, 1000000)
-    else:
-        new_seed = seed if seed != -1 else random.randint(1, 1000000)
-    try:
-        resolution_str = resolution.split(" ")[0]
-    except:
-        resolution_str = "1024x1024"
-    image = generate_image(
-        pipe=pipe,
-        prompt=final_prompt,
-        resolution=resolution_str,
-        seed=new_seed,
-        guidance_scale=0.0,
-        num_inference_steps=int(steps + 1),
-        shift=shift,
-    )
-    safety_checker_input = pipe.safety_feature_extractor([image], return_tensors="pt").pixel_values.cuda()
-    _, has_nsfw_concept = pipe.safety_checker(
-        images=[torch.zeros(1)], clip_input=safety_checker_input
-    )
-    has_nsfw_concept = has_nsfw_concept[0]
-    if has_nsfw_concept:
         image = Image.open("nsfw.png")
     if gallery_images is None:

 import torch
 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
 from PIL import Image
+from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer
+from prompt_check import is_unsafe_prompt
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
             os.path.join(model_path, "vae"), torch_dtype=torch.bfloat16, device_map="cuda"
         )
+        text_encoder = AutoModelForCausalLM.from_pretrained(
             os.path.join(model_path, "text_encoder"),
             torch_dtype=torch.bfloat16,
             device_map="cuda",
             - seed_str: String representation of the seed used for generation
             - seed_int: Integer representation of the seed used for generation
     """
+    class UnsafeContentError(Exception):
+        pass
+    try:
+        if pipe is None:
+            raise gr.Error("Model not loaded.")
+        has_nsfw_concept = is_unsafe_prompt(pipe.text_encoder, pipe.tokenizer, prompt)
+        if has_nsfw_concept:
+            raise UnsafeContentError("input unsafe")
+        final_prompt = prompt
+        if enhance:
+            final_prompt, _ = prompt_enhance(prompt, True)
+            print(f"Enhanced prompt: {final_prompt}")
+        if random_seed:
+            new_seed = random.randint(1, 1000000)
+        else:
+            new_seed = seed if seed != -1 else random.randint(1, 1000000)
+        try:
+            resolution_str = resolution.split(" ")[0]
+        except:
+            resolution_str = "1024x1024"
+        image = generate_image(
+            pipe=pipe,
+            prompt=final_prompt,
+            resolution=resolution_str,
+            seed=new_seed,
+            guidance_scale=0.0,
+            num_inference_steps=int(steps + 1),
+            shift=shift,
+        )
+        safety_checker_input = pipe.safety_feature_extractor([image], return_tensors="pt").pixel_values.cuda()
+        _, has_nsfw_concept = pipe.safety_checker(
+            images=[torch.zeros(1)], clip_input=safety_checker_input
+        )
+        has_nsfw_concept = has_nsfw_concept[0]
+        if has_nsfw_concept:
+            raise UnsafeContentError("input unsafe")
+    except UnsafeContentError:
         image = Image.open("nsfw.png")
     if gallery_images is None:

prompt_check.py ADDED Viewed

	@@ -0,0 +1,35 @@

+def is_unsafe_prompt(model, tokenizer, user_prompt):
+    prompt = f"""You are a content safety filter. Your task is to analyze the following user input and determine if it contains unsafe content.
+    Unsafe content includes:
+    1. **Sexually Explicit Material**: Descriptions of nudity or sexual acts.
+    2. **Sensitive Political Content**: References to real-world political figures (e.g., heads of state, government officials), political parties, sensitive geopolitical events, or propaganda.
+    Analyze the user input below.
+    If the input falls into EITHER of these categories, answer "yes".
+    If the input is safe, answer "no".
+    User Input: "{user_prompt}"
+    Answer (only yes or no):"""
+    messages = [
+        {"role": "user", "content": prompt}
+    ]
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True,
+        enable_thinking=False  # Switches between thinking and non-thinking modes. Default is True.
+    )
+    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+    # conduct text completion
+    generated_ids = model.generate(
+        **model_inputs,
+        max_new_tokens=10
+    )
+    output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
+    content = tokenizer.decode(output_ids, skip_special_tokens=True).strip("\n")
+    return "yes" in content.lower()