|
|
import datetime |
|
|
from uuid import uuid4 |
|
|
|
|
|
from openai import OpenAI |
|
|
import gradio as gr |
|
|
|
|
|
from theme import apriel |
|
|
from utils import COMMUNITY_POSTFIX_URL, get_model_config, check_format, models_config, \ |
|
|
logged_event_handler, DEBUG_MODEL, log_debug, log_info, log_error |
|
|
from log_chat import log_chat |
|
|
|
|
|
MODEL_TEMPERATURE = 0.8 |
|
|
BUTTON_WIDTH = 160 |
|
|
DEFAULT_OPT_OUT_VALUE = False |
|
|
|
|
|
DEFAULT_MODEL_NAME = "Apriel-Nemotron-15b-Thinker" if not DEBUG_MODEL else "Apriel-5b" |
|
|
|
|
|
BUTTON_ENABLED = gr.update(interactive=True) |
|
|
BUTTON_DISABLED = gr.update(interactive=False) |
|
|
INPUT_ENABLED = gr.update(interactive=True) |
|
|
INPUT_DISABLED = gr.update(interactive=False) |
|
|
DROPDOWN_ENABLED = gr.update(interactive=True) |
|
|
DROPDOWN_DISABLED = gr.update(interactive=False) |
|
|
|
|
|
SEND_BUTTON_ENABLED = gr.update(interactive=True, visible=True) |
|
|
SEND_BUTTON_DISABLED = gr.update(interactive=True, visible=False) |
|
|
STOP_BUTTON_ENABLED = gr.update(interactive=True, visible=True) |
|
|
STOP_BUTTON_DISABLED = gr.update(interactive=True, visible=False) |
|
|
|
|
|
chat_start_count = 0 |
|
|
model_config = {} |
|
|
openai_client = None |
|
|
|
|
|
|
|
|
def app_loaded(state, request: gr.Request): |
|
|
message_html = setup_model(DEFAULT_MODEL_NAME, intial=False) |
|
|
state['session'] = request.session_hash if request else uuid4().hex |
|
|
log_debug(f"app_loaded() --> Session: {state['session']}") |
|
|
return state, message_html |
|
|
|
|
|
|
|
|
def update_model_and_clear_chat(model_name): |
|
|
actual_model_name = model_name.replace("Model: ", "") |
|
|
desc = setup_model(actual_model_name) |
|
|
return desc, [] |
|
|
|
|
|
|
|
|
def setup_model(model_name, intial=False): |
|
|
global model_config, openai_client |
|
|
model_config = get_model_config(model_name) |
|
|
log_debug(f"update_model() --> Model config: {model_config}") |
|
|
openai_client = OpenAI( |
|
|
api_key=model_config.get('AUTH_TOKEN'), |
|
|
base_url=model_config.get('VLLM_API_URL') |
|
|
) |
|
|
|
|
|
_model_hf_name = model_config.get("MODEL_HF_URL").split('https://huggingface.co/')[1] |
|
|
_link = f"<a href='{model_config.get('MODEL_HF_URL')}{COMMUNITY_POSTFIX_URL}' target='_blank'>{_model_hf_name}</a>" |
|
|
_description = f"We'd love to hear your thoughts on the model. Click here to provide feedback - {_link}" |
|
|
|
|
|
log_debug(f"Switched to model {_model_hf_name}") |
|
|
|
|
|
if intial: |
|
|
return |
|
|
else: |
|
|
return _description |
|
|
|
|
|
|
|
|
def chat_started(): |
|
|
|
|
|
return (DROPDOWN_DISABLED, gr.update(value="", interactive=False), |
|
|
SEND_BUTTON_DISABLED, STOP_BUTTON_ENABLED, BUTTON_DISABLED) |
|
|
|
|
|
|
|
|
def chat_finished(): |
|
|
|
|
|
return DROPDOWN_ENABLED, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED |
|
|
|
|
|
|
|
|
def stop_chat(state): |
|
|
state["stop_flag"] = True |
|
|
gr.Info("Chat stopped") |
|
|
return state |
|
|
|
|
|
|
|
|
def toggle_opt_out(state, checkbox): |
|
|
state["opt_out"] = checkbox |
|
|
return state |
|
|
|
|
|
|
|
|
def run_chat_inference(history, message, state): |
|
|
global chat_start_count |
|
|
state["is_streaming"] = True |
|
|
state["stop_flag"] = False |
|
|
error = None |
|
|
model_name = model_config.get('MODEL_NAME') |
|
|
|
|
|
if len(history) == 0: |
|
|
state["chat_id"] = uuid4().hex |
|
|
|
|
|
if openai_client is None: |
|
|
log_info("Client UI is stale, letting user know to refresh the page") |
|
|
gr.Warning("Client UI is stale, please refresh the page") |
|
|
return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state |
|
|
|
|
|
|
|
|
log_debug(f"{'-' * 80}") |
|
|
log_debug(f"chat_fn() --> Message: {message}") |
|
|
log_debug(f"chat_fn() --> History: {history}") |
|
|
|
|
|
try: |
|
|
|
|
|
if not message.strip(): |
|
|
gr.Info("Please enter a message before sending") |
|
|
yield history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state |
|
|
return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state |
|
|
|
|
|
chat_start_count = chat_start_count + 1 |
|
|
log_info(f"chat_start_count: {chat_start_count}, turns: {int(len(history if history else []) / 3)}, " |
|
|
f"model: {model_name}") |
|
|
|
|
|
is_reasoning = model_config.get("REASONING") |
|
|
|
|
|
|
|
|
log_debug(f"Initial History: {history}") |
|
|
check_format(history, "messages") |
|
|
history.append({"role": "user", "content": message}) |
|
|
log_debug(f"History with user message: {history}") |
|
|
check_format(history, "messages") |
|
|
|
|
|
|
|
|
try: |
|
|
history_no_thoughts = [item for item in history if |
|
|
not (isinstance(item, dict) and |
|
|
item.get("role") == "assistant" and |
|
|
isinstance(item.get("metadata"), dict) and |
|
|
item.get("metadata", {}).get("title") is not None)] |
|
|
log_debug(f"Updated History: {history_no_thoughts}") |
|
|
check_format(history_no_thoughts, "messages") |
|
|
log_debug(f"history_no_thoughts with user message: {history_no_thoughts}") |
|
|
|
|
|
stream = openai_client.chat.completions.create( |
|
|
model=model_name, |
|
|
messages=history_no_thoughts, |
|
|
temperature=MODEL_TEMPERATURE, |
|
|
stream=True |
|
|
) |
|
|
except Exception as e: |
|
|
log_error(f"Error: {e}") |
|
|
error = str(e) |
|
|
yield ([{"role": "assistant", |
|
|
"content": "😔 The model is unavailable at the moment. Please try again later."}], |
|
|
INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state) |
|
|
if state["opt_out"] is not True: |
|
|
log_chat(chat_id=state["chat_id"], |
|
|
session_id=state["session"], |
|
|
model_name=model_name, |
|
|
prompt=message, |
|
|
history=history, |
|
|
info={"is_reasoning": model_config.get("REASONING"), "temperature": MODEL_TEMPERATURE, |
|
|
"stopped": True, "error": str(e)}, |
|
|
) |
|
|
else: |
|
|
log_info(f"User opted out of chat history. Not logging chat. model: {model_name}") |
|
|
return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state |
|
|
|
|
|
if is_reasoning: |
|
|
history.append(gr.ChatMessage( |
|
|
role="assistant", |
|
|
content="Thinking...", |
|
|
metadata={"title": "🧠 Thought"} |
|
|
)) |
|
|
log_debug(f"History added thinking: {history}") |
|
|
check_format(history, "messages") |
|
|
else: |
|
|
history.append(gr.ChatMessage( |
|
|
role="assistant", |
|
|
content="", |
|
|
)) |
|
|
log_debug(f"History added empty assistant: {history}") |
|
|
check_format(history, "messages") |
|
|
|
|
|
output = "" |
|
|
completion_started = False |
|
|
for chunk in stream: |
|
|
if state["stop_flag"]: |
|
|
log_debug(f"chat_fn() --> Stopping streaming...") |
|
|
break |
|
|
|
|
|
content = getattr(chunk.choices[0].delta, "content", "") |
|
|
output += content |
|
|
|
|
|
if is_reasoning: |
|
|
parts = output.split("[BEGIN FINAL RESPONSE]") |
|
|
|
|
|
if len(parts) > 1: |
|
|
if parts[1].endswith("[END FINAL RESPONSE]"): |
|
|
parts[1] = parts[1].replace("[END FINAL RESPONSE]", "") |
|
|
if parts[1].endswith("[END FINAL RESPONSE]\n<|end|>"): |
|
|
parts[1] = parts[1].replace("[END FINAL RESPONSE]\n<|end|>", "") |
|
|
if parts[1].endswith("<|end|>"): |
|
|
parts[1] = parts[1].replace("<|end|>", "") |
|
|
|
|
|
history[-1 if not completion_started else -2] = gr.ChatMessage( |
|
|
role="assistant", |
|
|
content=parts[0], |
|
|
metadata={"title": "🧠 Thought"} |
|
|
) |
|
|
if completion_started: |
|
|
history[-1] = gr.ChatMessage( |
|
|
role="assistant", |
|
|
content=parts[1] |
|
|
) |
|
|
elif len(parts) > 1 and not completion_started: |
|
|
completion_started = True |
|
|
history.append(gr.ChatMessage( |
|
|
role="assistant", |
|
|
content=parts[1] |
|
|
)) |
|
|
else: |
|
|
if output.endswith("<|end|>"): |
|
|
output = output.replace("<|end|>", "") |
|
|
history[-1] = gr.ChatMessage( |
|
|
role="assistant", |
|
|
content=output |
|
|
) |
|
|
|
|
|
|
|
|
yield history, INPUT_DISABLED, SEND_BUTTON_DISABLED, STOP_BUTTON_ENABLED, BUTTON_DISABLED, state |
|
|
|
|
|
log_debug(f"Final History: {history}") |
|
|
check_format(history, "messages") |
|
|
yield history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state |
|
|
finally: |
|
|
if error is None: |
|
|
log_debug(f"chat_fn() --> Finished streaming. {chat_start_count} chats started.") |
|
|
if state["opt_out"] is not True: |
|
|
log_chat(chat_id=state["chat_id"], |
|
|
session_id=state["session"], |
|
|
model_name=model_name, |
|
|
prompt=message, |
|
|
history=history, |
|
|
info={"is_reasoning": model_config.get("REASONING"), "temperature": MODEL_TEMPERATURE, |
|
|
"stopped": state["stop_flag"]}, |
|
|
) |
|
|
|
|
|
else: |
|
|
log_info(f"User opted out of chat history. Not logging chat. model: {model_name}") |
|
|
state["is_streaming"] = False |
|
|
state["stop_flag"] = False |
|
|
return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state |
|
|
|
|
|
|
|
|
log_info(f"Gradio version: {gr.__version__}") |
|
|
|
|
|
title = None |
|
|
description = None |
|
|
theme = apriel |
|
|
|
|
|
with open('styles.css', 'r') as f: |
|
|
custom_css = f.read() |
|
|
|
|
|
with gr.Blocks(theme=theme, css=custom_css) as demo: |
|
|
session_state = gr.State(value={ |
|
|
"is_streaming": False, |
|
|
"stop_flag": False, |
|
|
"chat_id": None, |
|
|
"session": None, |
|
|
"opt_out": DEFAULT_OPT_OUT_VALUE, |
|
|
}) |
|
|
|
|
|
gr.HTML(f""" |
|
|
<style> |
|
|
@media (min-width: 1024px) {{ |
|
|
.send-button-container, .clear-button-container {{ |
|
|
max-width: {BUTTON_WIDTH}px; |
|
|
}} |
|
|
}} |
|
|
</style> |
|
|
""", elem_classes="css-styles") |
|
|
with gr.Row(variant="panel", elem_classes="responsive-row"): |
|
|
with gr.Column(scale=1, min_width=400, elem_classes="model-dropdown-container"): |
|
|
model_dropdown = gr.Dropdown( |
|
|
choices=[f"Model: {model}" for model in models_config.keys()], |
|
|
value=f"Model: {DEFAULT_MODEL_NAME}", |
|
|
label=None, |
|
|
interactive=True, |
|
|
container=False, |
|
|
scale=0, |
|
|
min_width=400 |
|
|
) |
|
|
with gr.Column(scale=4, min_width=0): |
|
|
feedback_message_html = gr.HTML(description, elem_classes="model-message") |
|
|
|
|
|
chatbot = gr.Chatbot( |
|
|
type="messages", |
|
|
height="calc(100dvh - 310px)", |
|
|
elem_classes="chatbot", |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=10, min_width=400): |
|
|
with gr.Row(): |
|
|
user_input = gr.Textbox( |
|
|
show_label=False, |
|
|
placeholder="Type your message here and press Enter", |
|
|
container=False |
|
|
) |
|
|
with gr.Column(scale=1, min_width=BUTTON_WIDTH * 2 + 20): |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1, min_width=BUTTON_WIDTH, elem_classes="send-button-container"): |
|
|
send_btn = gr.Button("Send", variant="primary") |
|
|
stop_btn = gr.Button("Stop", variant="cancel", visible=False) |
|
|
with gr.Column(scale=1, min_width=BUTTON_WIDTH, elem_classes="clear-button-container"): |
|
|
clear_btn = gr.ClearButton(chatbot, value="New Chat", variant="secondary") |
|
|
with gr.Row(): |
|
|
with gr.Column(min_width=400, elem_classes="opt-out-container"): |
|
|
with gr.Row(): |
|
|
gr.HTML( |
|
|
"We may use your chats to improve our AI. You may opt out if you don’t want your conversations saved.", |
|
|
elem_classes="opt-out-message") |
|
|
with gr.Row(): |
|
|
opt_out_checkbox = gr.Checkbox( |
|
|
label="Don’t save my chat history for improvements or training", |
|
|
value=DEFAULT_OPT_OUT_VALUE, |
|
|
elem_classes="opt-out-checkbox", |
|
|
interactive=True, |
|
|
container=False |
|
|
) |
|
|
|
|
|
gr.on( |
|
|
triggers=[send_btn.click, user_input.submit], |
|
|
fn=run_chat_inference, |
|
|
inputs=[chatbot, user_input, session_state], |
|
|
outputs=[chatbot, user_input, send_btn, stop_btn, clear_btn, session_state], |
|
|
concurrency_limit=4, |
|
|
api_name=False |
|
|
).then( |
|
|
fn=chat_finished, inputs=None, outputs=[model_dropdown, user_input, send_btn, stop_btn, clear_btn], queue=False) |
|
|
|
|
|
|
|
|
gr.on( |
|
|
triggers=[send_btn.click, user_input.submit], |
|
|
fn=chat_started, |
|
|
inputs=None, |
|
|
outputs=[model_dropdown, user_input, send_btn, stop_btn, clear_btn], |
|
|
queue=False, |
|
|
show_progress='hidden', |
|
|
api_name=False |
|
|
) |
|
|
|
|
|
stop_btn.click( |
|
|
fn=stop_chat, |
|
|
inputs=[session_state], |
|
|
outputs=[session_state], |
|
|
api_name=False |
|
|
) |
|
|
|
|
|
opt_out_checkbox.change(fn=toggle_opt_out, inputs=[session_state, opt_out_checkbox], outputs=[session_state]) |
|
|
|
|
|
|
|
|
demo.load( |
|
|
fn=logged_event_handler( |
|
|
log_msg="Browser session started", |
|
|
event_handler=app_loaded |
|
|
), |
|
|
inputs=[session_state], |
|
|
outputs=[session_state, feedback_message_html], |
|
|
queue=True, |
|
|
api_name=False |
|
|
) |
|
|
|
|
|
model_dropdown.change( |
|
|
fn=update_model_and_clear_chat, |
|
|
inputs=[model_dropdown], |
|
|
outputs=[feedback_message_html, chatbot], |
|
|
api_name=False |
|
|
) |
|
|
|
|
|
demo.queue(default_concurrency_limit=2).launch(ssr_mode=False, show_api=False) |
|
|
log_info("Gradio app launched") |
|
|
|