Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import numpy as np | |
| import librosa | |
| from transformers import pipeline | |
| import tempfile | |
| from functools import lru_cache | |
| # Cache the model to avoid reloading on every interaction | |
| def load_model(): | |
| return pipeline( | |
| model='fixie-ai/ultravox-v0_5-llama-3_2-1b', | |
| trust_remote_code=True, | |
| device_map="auto" # Automatically uses GPU if available | |
| ) | |
| def process_audio(audio_file, user_message): | |
| try: | |
| # Load audio (supports file upload or microphone input) | |
| if isinstance(audio_file, (str, tempfile._TemporaryFileWrapper)): | |
| audio_path = audio_file.name if hasattr(audio_file, 'name') else audio_file | |
| audio, sr = librosa.load(audio_path, sr=16000) | |
| else: # Handle direct numpy array from microphone | |
| sr, audio = audio_file | |
| # Initialize conversation | |
| turns = [ | |
| { | |
| "role": "system", | |
| "content": "You are a friendly and helpful AI assistant. Respond conversationally to the user's audio input." | |
| }, | |
| { | |
| "role": "user", | |
| "content": user_message if user_message else "Describe what you heard in the audio." | |
| } | |
| ] | |
| # Get model prediction | |
| pipe = load_model() | |
| result = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=100) | |
| return result[-1]["content"] | |
| except Exception as e: | |
| return f"Error processing audio: {str(e)}" | |
| # Gradio UI | |
| with gr.Blocks(title="UltraVox Audio Assistant") as demo: | |
| gr.Markdown("## 🎤 UltraVox Audio Assistant") | |
| gr.Markdown("Upload an audio file or speak via microphone, then ask questions about it.") | |
| with gr.Row(): | |
| audio_input = gr.Audio( | |
| sources=["upload", "microphone"], | |
| type="filepath", | |
| label="Input Audio" | |
| ) | |
| text_input = gr.Textbox( | |
| label="Your Question (Optional)", | |
| placeholder="Ask me about the audio..." | |
| ) | |
| submit_btn = gr.Button("Process") | |
| output = gr.Textbox(label="AI Response", interactive=False) | |
| submit_btn.click( | |
| fn=process_audio, | |
| inputs=[audio_input, text_input], | |
| outputs=output | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["examples/weather_report.wav", "What's the weather forecast?"], | |
| ["examples/meeting_notes.mp3", "Summarize the key points"] | |
| ], | |
| inputs=[audio_input, text_input] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |