Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """app_voice.ipynb | |
| Automatically generated by Colab. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1op-dtpDLHXAJm53Q-2S04nNsQGjcz18G | |
| """ | |
| import os | |
| import numpy as np | |
| import librosa | |
| import gradio as gr | |
| from tensorflow.keras.models import load_model | |
| from sklearn.preprocessing import LabelEncoder | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| # Load trained model | |
| model = load_model("voice_verifier_model.h5") | |
| # Load label encoder | |
| encoder = LabelEncoder() | |
| encoder.classes_ = np.array(['Fake', 'Real']) # Adjust if your label order is different | |
| # Feature extraction | |
| def extract_features(file_path): | |
| try: | |
| audio, sample_rate = librosa.load(file_path, duration=3, offset=0.5) | |
| mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40) | |
| return np.mean(mfccs.T, axis=0) | |
| except Exception as e: | |
| print("Audio processing error:", e) | |
| return None | |
| # Prediction function | |
| def predict_audio(file): | |
| features = extract_features(file) | |
| if features is None: | |
| return "β οΈ **Oops! Couldn't understand the audio. Try again with a clear `.wav` file.**" | |
| features = features.reshape(1, -1) | |
| probs = model.predict(features)[0] | |
| index = np.argmax(probs) | |
| label = encoder.inverse_transform([index])[0] | |
| confidence = round(probs[index] * 100, 2) | |
| if label.lower() == "real": | |
| emoji = "π§ π£οΈ" | |
| msg = f"{emoji} **Real Human Voice Detected!**\nπ’ Confidence: **{confidence}%**" | |
| advice = "β No robots here. It's a real person!" | |
| else: | |
| emoji = "π€ποΈ" | |
| msg = f"{emoji} **AI-Generated Voice Detected!**\nπ΄ Confidence: **{confidence}%**" | |
| advice = "β οΈ Synthetic voice detected. Be cautious!" | |
| return f"{msg}\n\n{advice}" | |
| # App description | |
| description = """ | |
| ποΈ Welcome to **Voice Verifier 3000** | |
| π Detect whether a voice is **REAL** or **AI-generated** using a deep learning model trained on human vs synthetic audio. | |
| --- | |
| ### π€ Why Use This? | |
| - π‘οΈ Catch deepfake voices in seconds | |
| - ποΈ Validate voiceovers, interviews, or online calls | |
| - π Useful for researchers, content moderators, or just curious minds | |
| --- | |
| ### π How to Use: | |
| 1. Upload a `.wav` file (3β5 seconds) | |
| 2. Click **Submit** | |
| 3. Instantly see the voice verdict with confidence level! | |
| --- | |
| π₯ Built with β€οΈ using TensorFlow + Librosa + Gradio | |
| """ | |
| # Gradio UI | |
| gr.Interface( | |
| fn=predict_audio, | |
| inputs=gr.Audio(type="filepath", label="π Upload your voice (.wav only)"), | |
| outputs="markdown", | |
| title="π§ Voice Verifier 3000: Human vs AI Voice Detector", | |
| description=description, | |
| theme="default" | |
| ).launch() |