Spaces:
Sleeping
Sleeping
File size: 6,848 Bytes
32e24ba 671ad4f 32e24ba 32c220b 671ad4f af15c0a 671ad4f af15c0a 671ad4f 80def04 671ad4f c367cdd 671ad4f 42551c5 671ad4f a4c8ebb 671ad4f a4c8ebb 671ad4f 2317975 671ad4f 2317975 671ad4f 61f9651 671ad4f 2317975 671ad4f 2317975 671ad4f 2317975 671ad4f 61f9651 671ad4f 61f9651 671ad4f 2317975 671ad4f 61f9651 671ad4f c367cdd 1788d42 671ad4f c47a0c1 671ad4f c367cdd 671ad4f dc52202 671ad4f ba040e0 671ad4f a4c8ebb 671ad4f ba040e0 a4c8ebb 671ad4f ba040e0 671ad4f ba040e0 671ad4f ba040e0 671ad4f ba040e0 671ad4f c367cdd 671ad4f ba040e0 671ad4f ba040e0 671ad4f 32c220b 671ad4f 42551c5 671ad4f 42551c5 671ad4f 32e24ba 42551c5 7a2af87 671ad4f 7a2af87 671ad4f 7a2af87 671ad4f 42551c5 671ad4f 4fb7326 671ad4f 7487456 c47a0c1 671ad4f c47a0c1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 |
"""
MoodSync Therapy AI
-------------------
Production-grade Voice Emotion Detection for Therapy
Single dominant emotion with strong confidence
Prepared for Music-as-Therapy integration
Author: You
Use case: Mental health, therapy, wellness, funding programs
"""
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import librosa
import gradio as gr
from transformers import pipeline
# =====================================================
# MODEL CONFIGURATION
# =====================================================
MODEL_NAME = "superb/wav2vec2-base-superb-er"
TARGET_SAMPLE_RATE = 16000
# Confidence & decision thresholds (therapy-grade)
CHUNK_CONFIDENCE_THRESHOLD = 0.60
AGREEMENT_THRESHOLD = 0.65
FINAL_CONFIDENCE_THRESHOLD = 0.70
# Core emotions only (higher accuracy, clinically safer)
ALLOWED_EMOTIONS = {"HAPPY", "SAD", "ANGRY", "NEUTRAL"}
EMOJI_MAP = {
"HAPPY": "π",
"SAD": "π’",
"ANGRY": "π ",
"NEUTRAL": "π"
}
# =====================================================
# LOAD MODEL
# =====================================================
print("Loading Therapy-Grade Emotion Model...")
emotion_pipe = pipeline(
task="audio-classification",
model=MODEL_NAME,
top_k=None
)
print("Model loaded successfully.")
# =====================================================
# AUDIO PROCESSING
# =====================================================
def preprocess_audio(audio):
"""
Convert to mono, resample, normalize (RMS)
"""
sr, data = audio
if data is None or len(data) == 0:
return None
if len(data.shape) > 1:
data = np.mean(data, axis=1)
if sr != TARGET_SAMPLE_RATE:
data = librosa.resample(data, orig_sr=sr, target_sr=TARGET_SAMPLE_RATE)
data = data.astype(np.float32)
rms = np.sqrt(np.mean(data ** 2))
if rms > 0:
data = data / rms
return data
def chunk_audio(audio, chunk_sec=1.0):
"""
Split audio into 1-second chunks
"""
chunk_size = int(TARGET_SAMPLE_RATE * chunk_sec)
return [
audio[i:i + chunk_size]
for i in range(0, len(audio) - chunk_size, chunk_size)
]
# =====================================================
# EMOTION INFERENCE
# =====================================================
def predict_chunks(chunks):
"""
Predict emotion per chunk and keep confident predictions
"""
predictions = []
for chunk in chunks:
preds = emotion_pipe({
"raw": chunk,
"sampling_rate": TARGET_SAMPLE_RATE
})
top = max(preds, key=lambda x: x["score"])
label = top["label"].upper()
if label in ALLOWED_EMOTIONS and top["score"] >= CHUNK_CONFIDENCE_THRESHOLD:
predictions.append(top)
return predictions
def decide_final_emotion(predictions):
"""
Voting + confidence aggregation
"""
if len(predictions) < 2:
return None, 0.0, "Not enough confident speech segments."
votes = {}
confidence_sum = {}
for p in predictions:
label = p["label"].upper()
votes[label] = votes.get(label, 0) + 1
confidence_sum[label] = confidence_sum.get(label, 0) + p["score"]
total_votes = sum(votes.values())
best_emotion = max(votes, key=votes.get)
agreement = votes[best_emotion] / total_votes
avg_confidence = confidence_sum[best_emotion] / votes[best_emotion]
if agreement >= AGREEMENT_THRESHOLD and avg_confidence >= FINAL_CONFIDENCE_THRESHOLD:
return best_emotion, avg_confidence, "High confidence emotional state detected."
return None, 0.0, "Emotion detected but confidence not strong enough for therapy use."
# =====================================================
# PUBLIC ANALYSIS FUNCTION
# =====================================================
def analyze_voice(audio):
"""
End-to-end therapy-grade emotion analysis
"""
if audio is None:
return "π€ No audio", "0%", "Please record or upload speech."
processed = preprocess_audio(audio)
if processed is None:
return "β Invalid audio", "0%", "Unreadable audio input."
chunks = chunk_audio(processed)
predictions = predict_chunks(chunks)
emotion, confidence, message = decide_final_emotion(predictions)
if emotion is None:
return "π UNCERTAIN", "Low confidence", message
display = f"{EMOJI_MAP.get(emotion)} {emotion}"
return display, f"{confidence * 100:.1f}%", message
# =====================================================
# MUSIC-AS-THERAPY (PLACEHOLDER)
# =====================================================
def therapy_music_placeholder(emotion):
"""
Placeholder for future music therapy engine
"""
if emotion in ["SAD", "ANGRY"]:
return "π§ Music therapy will recommend calming, uplifting tracks."
if emotion == "HAPPY":
return "π§ Music therapy will sustain and reinforce positive mood."
if emotion == "NEUTRAL":
return "π§ Music therapy will gently elevate mood."
return "π§ Music therapy unavailable."
# =====================================================
# GRADIO UI
# =====================================================
with gr.Blocks(title="MoodSync Therapy AI") as demo:
gr.Markdown("# π€ MoodSync Therapy AI")
gr.Markdown(
"Clinical-grade voice emotion detection for therapy.\n\n"
"β’ Detects **one dominant emotion**\n"
"β’ High confidence only\n"
"β’ Designed for Music-as-Therapy systems"
)
with gr.Row():
with gr.Column(scale=6):
audio_input = gr.Audio(
sources=["microphone", "upload"],
type="numpy",
label="Speak naturally for 3β10 seconds"
)
analyze_btn = gr.Button("Analyze Emotion", variant="primary")
with gr.Column(scale=4):
emotion_output = gr.Textbox(label="Detected Emotion", interactive=False)
confidence_output = gr.Textbox(label="Confidence", interactive=False)
explanation_output = gr.Textbox(label="Clinical Explanation", lines=3, interactive=False)
therapy_btn = gr.Button("Music-as-Therapy (Preview)")
therapy_output = gr.Textbox(label="Therapy Guidance", interactive=False)
analyze_btn.click(
fn=analyze_voice,
inputs=audio_input,
outputs=[emotion_output, confidence_output, explanation_output]
)
therapy_btn.click(
fn=therapy_music_placeholder,
inputs=emotion_output,
outputs=therapy_output
)
# =====================================================
# APP ENTRYPOINT
# =====================================================
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
debug=False
)
|