Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -182,7 +182,8 @@ def model_inference(input_dict, history):
|
|
| 182 |
return_tensors="pt",
|
| 183 |
padding=True,
|
| 184 |
).to("cuda")
|
| 185 |
-
|
|
|
|
| 186 |
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=False)
|
| 187 |
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, temperature=0.1, top_p=0.95, top_k=50)
|
| 188 |
# generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False, num_beams=1)
|
|
@@ -205,7 +206,7 @@ def model_inference(input_dict, history):
|
|
| 205 |
|
| 206 |
# Process the full segment (e.g., remove <|im_end|>)
|
| 207 |
processed_segment = current_model_output_segment.split("<|im_end|>", 1)[0] if "<|im_end|>" in current_model_output_segment else current_model_output_segment
|
| 208 |
-
|
| 209 |
# Append this processed segment to the cumulative display string for Gradio
|
| 210 |
complete_assistant_response_for_gradio += [processed_segment + "\n\n"]
|
| 211 |
yield complete_assistant_response_for_gradio # Ensure the fully processed segment is yielded to Gradio
|
|
|
|
| 182 |
return_tensors="pt",
|
| 183 |
padding=True,
|
| 184 |
).to("cuda")
|
| 185 |
+
print(f"===> messages for generation")
|
| 186 |
+
print(messages)
|
| 187 |
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=False)
|
| 188 |
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, temperature=0.1, top_p=0.95, top_k=50)
|
| 189 |
# generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False, num_beams=1)
|
|
|
|
| 206 |
|
| 207 |
# Process the full segment (e.g., remove <|im_end|>)
|
| 208 |
processed_segment = current_model_output_segment.split("<|im_end|>", 1)[0] if "<|im_end|>" in current_model_output_segment else current_model_output_segment
|
| 209 |
+
messages.append(dict(role='assistant', content=processed_segment))
|
| 210 |
# Append this processed segment to the cumulative display string for Gradio
|
| 211 |
complete_assistant_response_for_gradio += [processed_segment + "\n\n"]
|
| 212 |
yield complete_assistant_response_for_gradio # Ensure the fully processed segment is yielded to Gradio
|