gpt-oss-120b-chat

Running

App Files Files Community

gpt-oss-120b-chat / app.py

bradnow

Improve logging for normal operation

2ce979c 7 months ago

raw

history blame

15 kB

	import datetime
	from uuid import uuid4

	from openai import OpenAI
	import gradio as gr

	from theme import apriel
	from utils import COMMUNITY_POSTFIX_URL, get_model_config, check_format, models_config, \
	logged_event_handler, DEBUG_MODEL, log_debug, log_info, log_error
	from log_chat import log_chat

	MODEL_TEMPERATURE = 0.8
	BUTTON_WIDTH = 160
	DEFAULT_OPT_OUT_VALUE = False

	DEFAULT_MODEL_NAME = "Apriel-Nemotron-15b-Thinker" if not DEBUG_MODEL else "Apriel-5b"

	BUTTON_ENABLED = gr.update(interactive=True)
	BUTTON_DISABLED = gr.update(interactive=False)
	INPUT_ENABLED = gr.update(interactive=True)
	INPUT_DISABLED = gr.update(interactive=False)
	DROPDOWN_ENABLED = gr.update(interactive=True)
	DROPDOWN_DISABLED = gr.update(interactive=False)

	SEND_BUTTON_ENABLED = gr.update(interactive=True, visible=True)
	SEND_BUTTON_DISABLED = gr.update(interactive=True, visible=False)
	STOP_BUTTON_ENABLED = gr.update(interactive=True, visible=True)
	STOP_BUTTON_DISABLED = gr.update(interactive=True, visible=False)

	chat_start_count = 0
	model_config = {}
	openai_client = None


	def app_loaded(state, request: gr.Request):
	message_html = setup_model(DEFAULT_MODEL_NAME, intial=False)
	state['session'] = request.session_hash if request else uuid4().hex
	log_debug(f"app_loaded() --> Session: {state['session']}")
	return state, message_html


	def update_model_and_clear_chat(model_name):
	actual_model_name = model_name.replace("Model: ", "")
	desc = setup_model(actual_model_name)
	return desc, []


	def setup_model(model_name, intial=False):
	global model_config, openai_client
	model_config = get_model_config(model_name)
	log_debug(f"update_model() --> Model config: {model_config}")
	openai_client = OpenAI(
	api_key=model_config.get('AUTH_TOKEN'),
	base_url=model_config.get('VLLM_API_URL')
	)

	_model_hf_name = model_config.get("MODEL_HF_URL").split('https://huggingface.co/')[1]
	_link = f"<a href='{model_config.get('MODEL_HF_URL')}{COMMUNITY_POSTFIX_URL}' target='_blank'>{_model_hf_name}</a>"
	_description = f"We'd love to hear your thoughts on the model. Click here to provide feedback - {_link}"

	log_debug(f"Switched to model {_model_hf_name}")

	if intial:
	return
	else:
	return _description


	def chat_started():
	# outputs: model_dropdown, user_input, send_btn, stop_btn, clear_btn
	return (DROPDOWN_DISABLED, gr.update(value="", interactive=False),
	SEND_BUTTON_DISABLED, STOP_BUTTON_ENABLED, BUTTON_DISABLED)


	def chat_finished():
	# outputs: model_dropdown, user_input, send_btn, stop_btn, clear_btn
	return DROPDOWN_ENABLED, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED


	def stop_chat(state):
	state["stop_flag"] = True
	gr.Info("Chat stopped")
	return state


	def toggle_opt_out(state, checkbox):
	state["opt_out"] = checkbox
	return state


	def run_chat_inference(history, message, state):
	global chat_start_count
	state["is_streaming"] = True
	state["stop_flag"] = False
	error = None
	model_name = model_config.get('MODEL_NAME')

	if len(history) == 0:
	state["chat_id"] = uuid4().hex

	if openai_client is None:
	log_info("Client UI is stale, letting user know to refresh the page")
	gr.Warning("Client UI is stale, please refresh the page")
	return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state

	# outputs: model_dropdown, user_input, send_btn, stop_btn, clear_btn, session_state
	log_debug(f"{'-' * 80}")
	log_debug(f"chat_fn() --> Message: {message}")
	log_debug(f"chat_fn() --> History: {history}")

	try:
	# Check if the message is empty
	if not message.strip():
	gr.Info("Please enter a message before sending")
	yield history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state
	return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state

	chat_start_count = chat_start_count + 1
	log_info(f"chat_start_count: {chat_start_count}, turns: {int(len(history if history else []) / 3)}, "
	f"model: {model_name}")

	is_reasoning = model_config.get("REASONING")

	# Remove any assistant messages with metadata from history for multiple turns
	log_debug(f"Initial History: {history}")
	check_format(history, "messages")
	history.append({"role": "user", "content": message})
	log_debug(f"History with user message: {history}")
	check_format(history, "messages")

	# Create the streaming response
	try:
	history_no_thoughts = [item for item in history if
	not (isinstance(item, dict) and
	item.get("role") == "assistant" and
	isinstance(item.get("metadata"), dict) and
	item.get("metadata", {}).get("title") is not None)]
	log_debug(f"Updated History: {history_no_thoughts}")
	check_format(history_no_thoughts, "messages")
	log_debug(f"history_no_thoughts with user message: {history_no_thoughts}")

	stream = openai_client.chat.completions.create(
	model=model_name,
	messages=history_no_thoughts,
	temperature=MODEL_TEMPERATURE,
	stream=True
	)
	except Exception as e:
	log_error(f"Error: {e}")
	error = str(e)
	yield ([{"role": "assistant",
	"content": "😔 The model is unavailable at the moment. Please try again later."}],
	INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state)
	if state["opt_out"] is not True:
	log_chat(chat_id=state["chat_id"],
	session_id=state["session"],
	model_name=model_name,
	prompt=message,
	history=history,
	info={"is_reasoning": model_config.get("REASONING"), "temperature": MODEL_TEMPERATURE,
	"stopped": True, "error": str(e)},
	)
	else:
	log_info(f"User opted out of chat history. Not logging chat. model: {model_name}")
	return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state

	if is_reasoning:
	history.append(gr.ChatMessage(
	role="assistant",
	content="Thinking...",
	metadata={"title": "🧠 Thought"}
	))
	log_debug(f"History added thinking: {history}")
	check_format(history, "messages")
	else:
	history.append(gr.ChatMessage(
	role="assistant",
	content="",
	))
	log_debug(f"History added empty assistant: {history}")
	check_format(history, "messages")

	output = ""
	completion_started = False
	for chunk in stream:
	if state["stop_flag"]:
	log_debug(f"chat_fn() --> Stopping streaming...")
	break # Exit the loop if the stop flag is set
	# Extract the new content from the delta field
	content = getattr(chunk.choices[0].delta, "content", "")
	output += content

	if is_reasoning:
	parts = output.split("[BEGIN FINAL RESPONSE]")

	if len(parts) > 1:
	if parts[1].endswith("[END FINAL RESPONSE]"):
	parts[1] = parts[1].replace("[END FINAL RESPONSE]", "")
	if parts[1].endswith("[END FINAL RESPONSE]\n<\|end\|>"):
	parts[1] = parts[1].replace("[END FINAL RESPONSE]\n<\|end\|>", "")
	if parts[1].endswith("<\|end\|>"):
	parts[1] = parts[1].replace("<\|end\|>", "")

	history[-1 if not completion_started else -2] = gr.ChatMessage(
	role="assistant",
	content=parts[0],
	metadata={"title": "🧠 Thought"}
	)
	if completion_started:
	history[-1] = gr.ChatMessage(
	role="assistant",
	content=parts[1]
	)
	elif len(parts) > 1 and not completion_started:
	completion_started = True
	history.append(gr.ChatMessage(
	role="assistant",
	content=parts[1]
	))
	else:
	if output.endswith("<\|end\|>"):
	output = output.replace("<\|end\|>", "")
	history[-1] = gr.ChatMessage(
	role="assistant",
	content=output
	)

	# log_message(f"Yielding messages: {history}")
	yield history, INPUT_DISABLED, SEND_BUTTON_DISABLED, STOP_BUTTON_ENABLED, BUTTON_DISABLED, state

	log_debug(f"Final History: {history}")
	check_format(history, "messages")
	yield history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state
	finally:
	if error is None:
	log_debug(f"chat_fn() --> Finished streaming. {chat_start_count} chats started.")
	if state["opt_out"] is not True:
	log_chat(chat_id=state["chat_id"],
	session_id=state["session"],
	model_name=model_name,
	prompt=message,
	history=history,
	info={"is_reasoning": model_config.get("REASONING"), "temperature": MODEL_TEMPERATURE,
	"stopped": state["stop_flag"]},
	)

	else:
	log_info(f"User opted out of chat history. Not logging chat. model: {model_name}")
	state["is_streaming"] = False
	state["stop_flag"] = False
	return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state


	log_info(f"Gradio version: {gr.__version__}")

	title = None
	description = None
	theme = apriel

	with open('styles.css', 'r') as f:
	custom_css = f.read()

	with gr.Blocks(theme=theme, css=custom_css) as demo:
	session_state = gr.State(value={
	"is_streaming": False,
	"stop_flag": False,
	"chat_id": None,
	"session": None,
	"opt_out": DEFAULT_OPT_OUT_VALUE,
	}) # Store session state as a dictionary

	gr.HTML(f"""
	<style>
	@media (min-width: 1024px) {{
	.send-button-container, .clear-button-container {{
	max-width: {BUTTON_WIDTH}px;
	}}
	}}
	</style>
	""", elem_classes="css-styles")
	with gr.Row(variant="panel", elem_classes="responsive-row"):
	with gr.Column(scale=1, min_width=400, elem_classes="model-dropdown-container"):
	model_dropdown = gr.Dropdown(
	choices=[f"Model: {model}" for model in models_config.keys()],
	value=f"Model: {DEFAULT_MODEL_NAME}",
	label=None,
	interactive=True,
	container=False,
	scale=0,
	min_width=400
	)
	with gr.Column(scale=4, min_width=0):
	feedback_message_html = gr.HTML(description, elem_classes="model-message")

	chatbot = gr.Chatbot(
	type="messages",
	height="calc(100dvh - 310px)",
	elem_classes="chatbot",
	)

	with gr.Row():
	with gr.Column(scale=10, min_width=400):
	with gr.Row():
	user_input = gr.Textbox(
	show_label=False,
	placeholder="Type your message here and press Enter",
	container=False
	)
	with gr.Column(scale=1, min_width=BUTTON_WIDTH * 2 + 20):
	with gr.Row():
	with gr.Column(scale=1, min_width=BUTTON_WIDTH, elem_classes="send-button-container"):
	send_btn = gr.Button("Send", variant="primary")
	stop_btn = gr.Button("Stop", variant="cancel", visible=False)
	with gr.Column(scale=1, min_width=BUTTON_WIDTH, elem_classes="clear-button-container"):
	clear_btn = gr.ClearButton(chatbot, value="New Chat", variant="secondary")
	with gr.Row():
	with gr.Column(min_width=400, elem_classes="opt-out-container"):
	with gr.Row():
	gr.HTML(
	"We may use your chats to improve our AI. You may opt out if you don’t want your conversations saved.",
	elem_classes="opt-out-message")
	with gr.Row():
	opt_out_checkbox = gr.Checkbox(
	label="Don’t save my chat history for improvements or training",
	value=DEFAULT_OPT_OUT_VALUE,
	elem_classes="opt-out-checkbox",
	interactive=True,
	container=False
	)

	gr.on(
	triggers=[send_btn.click, user_input.submit],
	fn=run_chat_inference, # this generator streams results. do not use logged_event_handler wrapper
	inputs=[chatbot, user_input, session_state],
	outputs=[chatbot, user_input, send_btn, stop_btn, clear_btn, session_state],
	concurrency_limit=4,
	api_name=False
	).then(
	fn=chat_finished, inputs=None, outputs=[model_dropdown, user_input, send_btn, stop_btn, clear_btn], queue=False)

	# In parallel, disable or update the UI controls
	gr.on(
	triggers=[send_btn.click, user_input.submit],
	fn=chat_started,
	inputs=None,
	outputs=[model_dropdown, user_input, send_btn, stop_btn, clear_btn],
	queue=False,
	show_progress='hidden',
	api_name=False
	)

	stop_btn.click(
	fn=stop_chat,
	inputs=[session_state],
	outputs=[session_state],
	api_name=False
	)

	opt_out_checkbox.change(fn=toggle_opt_out, inputs=[session_state, opt_out_checkbox], outputs=[session_state])

	# Ensure the model is reset to default on page reload
	demo.load(
	fn=logged_event_handler(
	log_msg="Browser session started",
	event_handler=app_loaded
	),
	inputs=[session_state],
	outputs=[session_state, feedback_message_html],
	queue=True,
	api_name=False
	)

	model_dropdown.change(
	fn=update_model_and_clear_chat,
	inputs=[model_dropdown],
	outputs=[feedback_message_html, chatbot],
	api_name=False
	)

	demo.queue(default_concurrency_limit=2).launch(ssr_mode=False, show_api=False)
	log_info("Gradio app launched")