deepseek-r1dotcom

Runtime error

App Files Files Community

hadadrjt commited on Apr 19

Commit

e864bae

1 Parent(s): 452f16b

ai: Switch to production task cancelation.

Browse files

Files changed (1) hide show

jarvis.py +27 -44

jarvis.py CHANGED Viewed

@@ -62,6 +62,8 @@ class SessionWithID(requests.Session):
     def __init__(sess):
         super().__init__()
         sess.session_id = str(uuid.uuid4())
 def create_session():
     return SessionWithID()
@@ -69,6 +71,8 @@ def create_session():
 def ensure_stop_event(sess):
     if not hasattr(sess, "stop_event"):
         sess.stop_event = asyncio.Event()
 def marked_item(item, marked, attempts):
     marked.add(item)
@@ -197,7 +201,7 @@ def extract_file_content(fp):
         except Exception as e:
             return f"{fp}: {e}"
-async def fetch_response_stream_async(host, key, model, msgs, cfg, sid, stop_event):
     for t in [0.5, 1]:
         try:
             async with httpx.AsyncClient(timeout=t) as client:
@@ -206,7 +210,7 @@ async def fetch_response_stream_async(host, key, model, msgs, cfg, sid, stop_eve
                         marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
                         return
                     async for line in response.aiter_lines():
-                        if stop_event.is_set():
                             return
                         if not line:
                             continue
@@ -219,10 +223,10 @@ async def fetch_response_stream_async(host, key, model, msgs, cfg, sid, stop_eve
                                 if isinstance(j, dict) and j.get("choices"):
                                     for ch in j["choices"]:
                                         delta = ch.get("delta", {})
-                                        if "reasoning" in delta and delta["reasoning"] is not None and delta["reasoning"] != "":
-                                            decoded_reasoning = delta["reasoning"].encode('utf-8').decode('unicode_escape')
-                                            yield ("reasoning", decoded_reasoning)
-                                        if "content" in delta and delta["content"] is not None and delta["content"] != "":
                                             yield ("content", delta["content"])
                             except:
                                 continue
@@ -234,57 +238,36 @@ async def fetch_response_stream_async(host, key, model, msgs, cfg, sid, stop_eve
 async def chat_with_model_async(history, user_input, model_display, sess, custom_prompt):
     ensure_stop_event(sess)
     sess.stop_event.clear()
     if not LINUX_SERVER_PROVIDER_KEYS or not LINUX_SERVER_HOSTS:
         yield ("content", RESPONSES["RESPONSE_3"])
         return
     if not hasattr(sess, "session_id") or not sess.session_id:
         sess.session_id = str(uuid.uuid4())
-        sess.stop_event = asyncio.Event()
-    if not hasattr(sess, "active_candidate"):
-        sess.active_candidate = None
     model_key = get_model_key(model_display)
     cfg = MODEL_CONFIG.get(model_key, DEFAULT_CONFIG)
     msgs = [{"role": "user", "content": u} for u, _ in history] + [{"role": "assistant", "content": a} for _, a in history if a]
     prompt = INTERNAL_TRAINING_DATA if model_key == DEFAULT_MODEL_KEY and INTERNAL_TRAINING_DATA else (custom_prompt or SYSTEM_PROMPT_MAPPING.get(model_key, SYSTEM_PROMPT_DEFAULT))
     msgs.insert(0, {"role": "system", "content": prompt})
     msgs.append({"role": "user", "content": user_input})
-    if sess.active_candidate:
-        async for chunk in fetch_response_stream_async(sess.active_candidate[0], sess.active_candidate[1], model_key, msgs, cfg, sess.session_id, sess.stop_event):
-            if sess.stop_event.is_set():
                 return
             yield chunk
-        return
-    jarvis = False
-    responses_success = False
-    keys = list(LINUX_SERVER_PROVIDER_KEYS)
-    hosts = list(LINUX_SERVER_HOSTS)
-    random.shuffle(keys)
-    random.shuffle(hosts)
-    for k in keys:
-        for h in hosts:
-            jarvis = True
-            stream_gen = fetch_response_stream_async(h, k, model_key, msgs, cfg, sess.session_id, sess.stop_event)
-            responses = ""
-            got_responses = False
-            async for chunk in stream_gen:
-                if sess.stop_event.is_set():
-                    return
-                if not got_responses:
-                    got_responses = True
-                    sess.active_candidate = (h, k)
-                responses += chunk[1]
-                yield chunk
-            if got_responses and responses.strip():
-                responses_success = True
-                return
-    if not jarvis:
-        yield ("content", RESPONSES["RESPONSE_3"])
-    elif not responses_success:
-        yield ("content", RESPONSES["RESPONSE_2"])
 async def respond_async(multi, history, model_display, sess, custom_prompt):
     ensure_stop_event(sess)
     sess.stop_event.clear()
     msg_input = {"text": multi.get("text", "").strip(), "files": multi.get("files", [])}
     if not msg_input["text"] and not msg_input["files"]:
         yield history, gr.update(value="", interactive=True, submit_btn=True, stop_btn=False), sess
@@ -304,7 +287,7 @@ async def respond_async(multi, history, model_display, sess, custom_prompt):
         content_started = False
         ignore_reasoning = False
         async for typ, chunk in chat_with_model_async(history, inp, model_display, sess, custom_prompt):
-            if sess.stop_event.is_set():
                 break
             if typ == "reasoning":
                 if ignore_reasoning:
@@ -329,10 +312,10 @@ async def respond_async(multi, history, model_display, sess, custom_prompt):
         while True:
             done, _ = await asyncio.wait({stop_task, asyncio.create_task(queue.get())}, return_when=asyncio.FIRST_COMPLETED)
             if stop_task in done:
                 bg_task.cancel()
                 history[-1][1] = RESPONSES["RESPONSE_1"]
                 yield history, gr.update(value="", interactive=True, submit_btn=True, stop_btn=False), sess
-                sess.stop_event.clear()
                 return
             for d in done:
                 result = d.result()
@@ -356,10 +339,10 @@ def change_model(new):
 def stop_response(history, sess):
     ensure_stop_event(sess)
     sess.stop_event.set()
     if history:
         history[-1][1] = RESPONSES["RESPONSE_1"]
-    new_sess = create_session()
-    return history, None, new_sess
 with gr.Blocks(fill_height=True, fill_width=True, title=AI_TYPES["AI_TYPE_4"], head=META_TAGS) as jarvis:
     user_history = gr.State([])

     def __init__(sess):
         super().__init__()
         sess.session_id = str(uuid.uuid4())
+        sess.stop_event = asyncio.Event()
+        sess.cancel_token = {"cancelled": False}
 def create_session():
     return SessionWithID()
 def ensure_stop_event(sess):
     if not hasattr(sess, "stop_event"):
         sess.stop_event = asyncio.Event()
+    if not hasattr(sess, "cancel_token"):
+        sess.cancel_token = {"cancelled": False}
 def marked_item(item, marked, attempts):
     marked.add(item)
         except Exception as e:
             return f"{fp}: {e}"
+async def fetch_response_stream_async(host, key, model, msgs, cfg, sid, stop_event, cancel_token):
     for t in [0.5, 1]:
         try:
             async with httpx.AsyncClient(timeout=t) as client:
                         marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
                         return
                     async for line in response.aiter_lines():
+                        if stop_event.is_set() or cancel_token["cancelled"]:
                             return
                         if not line:
                             continue
                                 if isinstance(j, dict) and j.get("choices"):
                                     for ch in j["choices"]:
                                         delta = ch.get("delta", {})
+                                        if "reasoning" in delta and delta["reasoning"]:
+                                            decoded = delta["reasoning"].encode('utf-8').decode('unicode_escape')
+                                            yield ("reasoning", decoded)
+                                        if "content" in delta and delta["content"]:
                                             yield ("content", delta["content"])
                             except:
                                 continue
 async def chat_with_model_async(history, user_input, model_display, sess, custom_prompt):
     ensure_stop_event(sess)
     sess.stop_event.clear()
+    sess.cancel_token["cancelled"] = False
     if not LINUX_SERVER_PROVIDER_KEYS or not LINUX_SERVER_HOSTS:
         yield ("content", RESPONSES["RESPONSE_3"])
         return
     if not hasattr(sess, "session_id") or not sess.session_id:
         sess.session_id = str(uuid.uuid4())
     model_key = get_model_key(model_display)
     cfg = MODEL_CONFIG.get(model_key, DEFAULT_CONFIG)
     msgs = [{"role": "user", "content": u} for u, _ in history] + [{"role": "assistant", "content": a} for _, a in history if a]
     prompt = INTERNAL_TRAINING_DATA if model_key == DEFAULT_MODEL_KEY and INTERNAL_TRAINING_DATA else (custom_prompt or SYSTEM_PROMPT_MAPPING.get(model_key, SYSTEM_PROMPT_DEFAULT))
     msgs.insert(0, {"role": "system", "content": prompt})
     msgs.append({"role": "user", "content": user_input})
+    candidates = [(h, k) for h in LINUX_SERVER_HOSTS for k in LINUX_SERVER_PROVIDER_KEYS]
+    random.shuffle(candidates)
+    for h, k in candidates:
+        stream_gen = fetch_response_stream_async(h, k, model_key, msgs, cfg, sess.session_id, sess.stop_event, sess.cancel_token)
+        got_responses = False
+        async for chunk in stream_gen:
+            if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
                 return
+            got_responses = True
             yield chunk
+        if got_responses:
+            return
+    yield ("content", RESPONSES["RESPONSE_2"])
 async def respond_async(multi, history, model_display, sess, custom_prompt):
     ensure_stop_event(sess)
     sess.stop_event.clear()
+    sess.cancel_token["cancelled"] = False
     msg_input = {"text": multi.get("text", "").strip(), "files": multi.get("files", [])}
     if not msg_input["text"] and not msg_input["files"]:
         yield history, gr.update(value="", interactive=True, submit_btn=True, stop_btn=False), sess
         content_started = False
         ignore_reasoning = False
         async for typ, chunk in chat_with_model_async(history, inp, model_display, sess, custom_prompt):
+            if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
                 break
             if typ == "reasoning":
                 if ignore_reasoning:
         while True:
             done, _ = await asyncio.wait({stop_task, asyncio.create_task(queue.get())}, return_when=asyncio.FIRST_COMPLETED)
             if stop_task in done:
+                sess.cancel_token["cancelled"] = True
                 bg_task.cancel()
                 history[-1][1] = RESPONSES["RESPONSE_1"]
                 yield history, gr.update(value="", interactive=True, submit_btn=True, stop_btn=False), sess
                 return
             for d in done:
                 result = d.result()
 def stop_response(history, sess):
     ensure_stop_event(sess)
     sess.stop_event.set()
+    sess.cancel_token["cancelled"] = True
     if history:
         history[-1][1] = RESPONSES["RESPONSE_1"]
+    return history, None, create_session()
 with gr.Blocks(fill_height=True, fill_width=True, title=AI_TYPES["AI_TYPE_4"], head=META_TAGS) as jarvis:
     user_history = gr.State([])