Spaces:
Running
Running
| import os | |
| import gradio as gr | |
| from openai import OpenAI | |
| def predict( | |
| message, | |
| history, | |
| system_prompt, | |
| model, | |
| api_url, | |
| api_key, | |
| max_tk, | |
| temp, | |
| top_p, | |
| ): | |
| if not api_key: | |
| return "Please set valid api keys in settings first." | |
| # Format history with a given chat template | |
| msgs = [{"role": "system", "content": system_prompt}] | |
| for user, assistant in history: | |
| msgs.append({"role": "user", "content": user}) | |
| msgs.append({"role": "system", "content": assistant}) | |
| msgs.append({"role": "user", "content": message}) | |
| try: | |
| client = OpenAI(api_key=api_key, base_url=api_url) | |
| response = client.chat.completions.create( | |
| model=model, | |
| messages=msgs, | |
| max_tokens=max_tk, | |
| temperature=temp, | |
| top_p=top_p, | |
| stream=False, | |
| ).to_dict()["choices"][0]["message"]["content"] | |
| except Exception as e: | |
| response = f"{e}" | |
| return response | |
| def deepseek( | |
| message, | |
| history, | |
| model, | |
| api_key, | |
| system_prompt, | |
| max_tk, | |
| temp, | |
| top_p, | |
| ): | |
| response = predict( | |
| message, | |
| history, | |
| system_prompt, | |
| model, | |
| "https://api.deepseek.com", | |
| api_key, | |
| max_tk, | |
| temp, | |
| top_p, | |
| ) | |
| outputs = [] | |
| for new_token in response: | |
| outputs.append(new_token) | |
| yield "".join(outputs) | |
| def kimi( | |
| message, | |
| history, | |
| model, | |
| api_key, | |
| system_prompt, | |
| max_tk, | |
| temp, | |
| top_p, | |
| ): | |
| response = predict( | |
| message, | |
| history, | |
| system_prompt, | |
| model, | |
| "https://api.moonshot.cn/v1", | |
| api_key, | |
| max_tk, | |
| temp, | |
| top_p, | |
| ) | |
| outputs = [] | |
| for new_token in response: | |
| outputs.append(new_token) | |
| yield "".join(outputs) | |
| def LLM_APIs(): | |
| with gr.Blocks() as llms: # Create Gradio interface | |
| gr.Markdown("# LLM API Aggregation Deployment") | |
| with gr.Tab("DeepSeek"): | |
| with gr.Accordion(label="⚙️ Settings", open=False) as ds_acc: | |
| ds_model = gr.Dropdown( | |
| choices=["deepseek-chat", "deepseek-reasoner"], | |
| value="deepseek-chat", | |
| label="Select a model", | |
| ) | |
| ds_key = gr.Textbox( | |
| os.getenv("ds_api_key"), | |
| type="password", | |
| label="API key", | |
| ) | |
| ds_sys = gr.Textbox( | |
| "You are a useful assistant. first recognize user request and then reply carfuly and thinking", | |
| label="System prompt", | |
| ) | |
| ds_maxtk = gr.Slider(0, 32000, 10000, label="Max new tokens") | |
| ds_temp = gr.Slider(0, 1, 0.3, label="Temperature") | |
| ds_topp = gr.Slider(0, 1, 0.95, label="Top P sampling") | |
| gr.ChatInterface( | |
| deepseek, | |
| additional_inputs=[ | |
| ds_model, | |
| ds_key, | |
| ds_sys, | |
| ds_maxtk, | |
| ds_temp, | |
| ds_topp, | |
| ], | |
| ) | |
| with gr.Tab("Kimi"): | |
| with gr.Accordion(label="⚙️ Settings", open=False) as kimi_acc: | |
| kimi_model = gr.Dropdown( | |
| choices=["moonshot-v1-8k", "moonshot-v1-32k", "moonshot-v1-128k"], | |
| value="moonshot-v1-32k", | |
| label="Select a model", | |
| ) | |
| kimi_key = gr.Textbox( | |
| os.getenv("kimi_api_key"), | |
| type="password", | |
| label="API key", | |
| ) | |
| kimi_sys = gr.Textbox( | |
| "You are a useful assistant. first recognize user request and then reply carfuly and thinking", | |
| label="System prompt", | |
| ) | |
| kimi_maxtk = gr.Slider(0, 32000, 10000, label="Max new tokens") | |
| kimi_temp = gr.Slider(0, 1, 0.3, label="Temperature") | |
| kimi_topp = gr.Slider(0, 1, 0.95, label="Top P sampling") | |
| gr.ChatInterface( | |
| kimi, | |
| additional_inputs=[ | |
| kimi_model, | |
| kimi_key, | |
| kimi_sys, | |
| kimi_maxtk, | |
| kimi_temp, | |
| kimi_topp, | |
| ], | |
| ) | |
| return llms.queue() | |