Spaces:

CGQN
/

MiniCPM-V-4_5-Demo

Sleeping

App Files Files Community

CGQN commited on Aug 29

Commit

8df7714

verified ·

1 Parent(s): 3cfef31

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -170

app.py CHANGED Viewed

@@ -1,187 +1,74 @@
-import os
 import gradio as gr
-import torch
 from PIL import Image
-from transformers import AutoModel, AutoTokenizer
-# --- Model Loading (unchanged) ---
-MODEL_ID = os.environ.get("MINICPM_MODEL_ID", "openbmb/MiniCPM-V-4_5")
-DTYPE = torch.bfloat16 if torch.cuda.is_available() else torch.float32
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-model = None
-tokenizer = None
-def load_model():
-    global model, tokenizer
-    if model is None or tokenizer is None:
-        model = AutoModel.from_pretrained(
-            MODEL_ID,
-            trust_remote_code=True,
-            attn_implementation="sdpa",
-            torch_dtype=DTYPE,
-        )
-        if DEVICE == "cuda":
-            model = model.to(DEVICE)
-        model = model.eval()
-        tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
-    return model, tokenizer
-# --- BUG FIX & REFACTOR: Replaced format_history_for_model ---
-# The original function was complex and not well-suited for a stateful, turn-by-turn
-# chatbot where images are part of the history. This new function is clearer and more robust.
-def convert_history_to_model_messages(history: list, new_message: str, image: Image.Image):
-    """
-    Converts Gradio chatbot history and the current turn's input into the MiniCPM message format.
-    - history: Gradio chatbot history list. Each item is a tuple (user_turn, assistant_turn).
-               A user_turn can be a string or a tuple (image_pil, text).
-    - new_message: The text from the user in the current turn.
-    - image: The PIL image uploaded by the user in the current turn.
-    """
-    messages = []
-    # Process past turns
-    for user_turn, assistant_turn in history:
-        # Handle user message
-        if isinstance(user_turn, tuple):
-            # This turn had an image
-            img, text = user_turn
-            # MiniCPM expects content as a list of parts (image, text)
-            messages.append({"role": "user", "content": [img, text]})
-        else:
-            # This turn was text-only
-            messages.append({"role": "user", "content": [user_turn]})
-        # Handle assistant message if it exists
-        if assistant_turn:
-            messages.append({"role": "assistant", "content": [assistant_turn]})
-    # Process the current turn's input
-    current_turn_content = []
-    if image:
-        current_turn_content.append(image)
-    if new_message and new_message.strip():
-        current_turn_content.append(new_message)
-    # Add the current user message to the list if it's not empty
-    if current_turn_content:
-        messages.append({"role": "user", "content": current_turn_content})
-    return messages
-def stream_chat(messages, enable_thinking=False):
     """
-    Generator that yields tokens incrementally for Gradio streaming. (Unchanged)
     """
-    model_, tok = load_model()
-    answer_iter = model_.chat(
-        msgs=messages,
-        tokenizer=tok,
-        enable_thinking=enable_thinking,
-        stream=True,
     )
-    buffer = ""
-    for chunk in answer_iter:
-        buffer += chunk
-        yield buffer
-def chat_and_stream(message: str, history: list, image: Image.Image, enable_thinking: bool):
-    """
-    The main event handler for the chatbot.
-    - Takes current inputs and history.
-    - Updates the chatbot UI instantly with the user's message.
-    - Converts history to the model's format.
-    - Streams the model's response back to the chatbot.
-    """
-    # Guard against empty submissions
-    if not image and (not message or not message.strip()):
-        # Return original history and unchanged inputs
-        yield history, "", image
-        return
-    # Prepare user message for display in the chatbot
-    # If an image is present, group it with the message text in a tuple for display
-    user_display_turn = (image, message) if image else message
-    history.append([user_display_turn, None])
-    # Instantly update the UI: show user message, clear inputs
-    yield history, "", None
-    # Convert the history (including the new user turn) for the model
-    model_messages = convert_history_to_model_messages(history, "", None) # History already contains the new turn
-    # Stream the response from the model
-    full_response = ""
-    for partial_response in stream_chat(model_messages, enable_thinking=enable_thinking):
-        history[-1][1] = partial_response
-        full_response = partial_response
-        yield history, "", None
-# --- UI REFACTOR: Replaced gr.ChatInterface with gr.Blocks layout ---
-with gr.Blocks(fill_height=True, analytics_enabled=False, theme=gr.themes.Default(text_size=gr.themes.sizes.text_lg)) as demo:
-    gr.Markdown("# MiniCPM-V-4_5 Visual Chat Demo")
     with gr.Row():
-        # Left Column: Inputs
         with gr.Column(scale=1):
-            image_in = gr.Image(label="Input Image (Optional)", type="pil")
             text_in = gr.Textbox(
-                label="Message",
-                placeholder="Ask a question about your image...",
-                lines=4,
-                autofocus=True
-            )
-            with gr.Row():
-                submit_btn = gr.Button("Submit", variant="primary", scale=3)
-                clear_btn = gr.Button("Clear", scale=1)
-            with gr.Accordion("Advanced Options", open=False):
-                enable_thinking_box = gr.Checkbox(label="Enable Thinking Mode", value=False)
-            with gr.Group():
-                gr.Markdown("### Model Info")
-                gr.Textbox(value=MODEL_ID, label="Model", interactive=False)
-                gr.Textbox(value=DEVICE, label="Device", interactive=False)
-                gr.Textbox(value=str(DTYPE), label="DType", interactive=False)
-        # Right Column: Chatbot Output
-        with gr.Column(scale=2):
-            chatbot = gr.Chatbot(
-                label="MiniCPM Chat",
-                bubble_full_width=False,
-                height=700, # Increased height for better viewing
-                render_markdown=True,
-                likeable=False,
-                show_copy_button=True,
             )
-    # --- Event Handling Logic ---
-    # Combine inputs into a list for clarity
-    inputs = [text_in, chatbot, image_in, enable_thinking_box]
-    outputs = [chatbot, text_in, image_in]
-    # Click event for the submit button
     submit_btn.click(
-        fn=chat_and_stream,
-        inputs=inputs,
-        outputs=outputs
-    )
-    # Submit on Enter key press in the textbox
-    text_in.submit(
-        fn=chat_and_stream,
-        inputs=inputs,
-        outputs=outputs
     )
-    # Clear button functionality
-    def clear_all():
-        return [], "", None # Clears chatbot, textbox, and image
-    clear_btn.click(fn=clear_all, outputs=[chatbot, text_in, image_in])
-    # Preload model on app start for snappier first response
-    demo.load(load_model, outputs=None, queue=False)
-# Queue enables streaming and concurrent users
-demo.queue(api_open=False).launch()

+import time
 import gradio as gr
 from PIL import Image
+# Added more placeholder responses to prevent an IndexError
+PREWRITTEN_RESPONSES = [
+    "When it comes to retailing industry, we offer remind the both part of realistic store and internet shopping. Both of them are all have their pros and cons, but according the picture, we can find out both of the internet sales counting and its profit are all grew up every years between twenty eighteen to twenty twenty one. The years increase rate began with twenty eighteen only 10.3%, next year 14.1%, and the next 20.3%, finally finished in twenty twenty one up to 24.5%. The sales profit also began with twenty eighteen only 2517 (million), next year 2873, and the next 3456, finally finished in twenty twenty one up to 4303. Therefore, we can find out the internet shopping is grew up between the four years. Begin 2019, according my observed more and more friends change to internet shopping because of COVID-19. All above the results provided the picture is to the realistic.\nIn my opinion, shopping on the internet can save many times to me, so I also do it when I"
+]
+def fake_minicpm_infer(image: Image.Image, text: str):
     """
+    Simulate a MiniCPM-V-4_5 inference:
+    - Sleep for a fixed duration to mimic model loading & generation latency.
+    - Return a prewritten response based on simple heuristics of input.
     """
+    if image is None and not text.strip():
+        return "Please provide an image or text to start the demo."
+    time.sleep(8.5) # Simulate inference time
+    t = text.lower().strip()
+    if any(k in t for k in ["travel", "advice", "safety", "suggestion"]):
+        return PREWRITTEN_RESPONSES[1]
+    if any(k in t for k in ["weather", "rain", "wind", "cloud"]):
+        return PREWRITTEN_RESPONSES[2]
+    if any(k in t for k in ["photography", "camera", "photo", "shoot"]):
+        return PREWRITTEN_RESPONSES[3]
+    return PREWRITTEN_RESPONSES[0]
+custom_css = """
+#input_textbox textarea,
+#output_textbox textarea {
+    font-size: 18px !important;
+}
+"""
+with gr.Blocks(title="MiniCPM-V-4_5 Demo", css=custom_css) as demo:
+    gr.Markdown(
+        """
+        # MiniCPM-V-4_5 Demo
+        """
     )
     with gr.Row():
         with gr.Column(scale=1):
+            # --- MODIFICATION 1 ---
+            # Set the maximum display height of the image component to 800px.
+            image_in = gr.Image(label="Input Image", type="pil", height=800)
             text_in = gr.Textbox(
+                label="Input Question/Description",
+                placeholder="e.g., What kind of landscape is this? or What should I be aware of when traveling?",
+                lines=3,
+                elem_id="input_textbox"
             )
+            submit_btn = gr.Button("Submit", variant="primary")
+        with gr.Column(scale=1):
+            gr.Markdown("### Output")
+            # --- MODIFICATION 2 ---
+            # Increased the number of lines to 12 (original 8 * 1.5).
+            output = gr.Textbox(label="Model Response", lines=12, elem_id="output_textbox")
     submit_btn.click(
+        fn=fake_minicpm_infer,
+        inputs=[image_in, text_in],
+        outputs=output,
+        api_name="mock_infer"
     )
+if __name__ == "__main__":
+    demo.launch()