Spaces:

PsalmsJava
/

moodsync

Sleeping

App Files Files Community

moodsync / app.py

PsalmsJava

Update app.py

671ad4f verified 13 days ago

raw

history blame contribute delete

6.85 kB

	"""
	MoodSync Therapy AI
	-------------------
	Production-grade Voice Emotion Detection for Therapy
	Single dominant emotion with strong confidence
	Prepared for Music-as-Therapy integration

	Author: You
	Use case: Mental health, therapy, wellness, funding programs
	"""

	import warnings
	warnings.filterwarnings("ignore")

	import numpy as np
	import librosa
	import gradio as gr
	from transformers import pipeline

	# =====================================================
	# MODEL CONFIGURATION
	# =====================================================

	MODEL_NAME = "superb/wav2vec2-base-superb-er"
	TARGET_SAMPLE_RATE = 16000

	# Confidence & decision thresholds (therapy-grade)
	CHUNK_CONFIDENCE_THRESHOLD = 0.60
	AGREEMENT_THRESHOLD = 0.65
	FINAL_CONFIDENCE_THRESHOLD = 0.70

	# Core emotions only (higher accuracy, clinically safer)
	ALLOWED_EMOTIONS = {"HAPPY", "SAD", "ANGRY", "NEUTRAL"}

	EMOJI_MAP = {
	"HAPPY": "😊",
	"SAD": "😢",
	"ANGRY": "😠",
	"NEUTRAL": "😐"
	}

	# =====================================================
	# LOAD MODEL
	# =====================================================

	print("Loading Therapy-Grade Emotion Model...")
	emotion_pipe = pipeline(
	task="audio-classification",
	model=MODEL_NAME,
	top_k=None
	)
	print("Model loaded successfully.")

	# =====================================================
	# AUDIO PROCESSING
	# =====================================================

	def preprocess_audio(audio):
	"""
	Convert to mono, resample, normalize (RMS)
	"""
	sr, data = audio

	if data is None or len(data) == 0:
	return None

	if len(data.shape) > 1:
	data = np.mean(data, axis=1)

	if sr != TARGET_SAMPLE_RATE:
	data = librosa.resample(data, orig_sr=sr, target_sr=TARGET_SAMPLE_RATE)

	data = data.astype(np.float32)
	rms = np.sqrt(np.mean(data ** 2))
	if rms > 0:
	data = data / rms

	return data


	def chunk_audio(audio, chunk_sec=1.0):
	"""
	Split audio into 1-second chunks
	"""
	chunk_size = int(TARGET_SAMPLE_RATE * chunk_sec)
	return [
	audio[i:i + chunk_size]
	for i in range(0, len(audio) - chunk_size, chunk_size)
	]


	# =====================================================
	# EMOTION INFERENCE
	# =====================================================

	def predict_chunks(chunks):
	"""
	Predict emotion per chunk and keep confident predictions
	"""
	predictions = []

	for chunk in chunks:
	preds = emotion_pipe({
	"raw": chunk,
	"sampling_rate": TARGET_SAMPLE_RATE
	})

	top = max(preds, key=lambda x: x["score"])
	label = top["label"].upper()

	if label in ALLOWED_EMOTIONS and top["score"] >= CHUNK_CONFIDENCE_THRESHOLD:
	predictions.append(top)

	return predictions


	def decide_final_emotion(predictions):
	"""
	Voting + confidence aggregation
	"""
	if len(predictions) < 2:
	return None, 0.0, "Not enough confident speech segments."

	votes = {}
	confidence_sum = {}

	for p in predictions:
	label = p["label"].upper()
	votes[label] = votes.get(label, 0) + 1
	confidence_sum[label] = confidence_sum.get(label, 0) + p["score"]

	total_votes = sum(votes.values())
	best_emotion = max(votes, key=votes.get)

	agreement = votes[best_emotion] / total_votes
	avg_confidence = confidence_sum[best_emotion] / votes[best_emotion]

	if agreement >= AGREEMENT_THRESHOLD and avg_confidence >= FINAL_CONFIDENCE_THRESHOLD:
	return best_emotion, avg_confidence, "High confidence emotional state detected."

	return None, 0.0, "Emotion detected but confidence not strong enough for therapy use."


	# =====================================================
	# PUBLIC ANALYSIS FUNCTION
	# =====================================================

	def analyze_voice(audio):
	"""
	End-to-end therapy-grade emotion analysis
	"""
	if audio is None:
	return "🎤 No audio", "0%", "Please record or upload speech."

	processed = preprocess_audio(audio)
	if processed is None:
	return "❌ Invalid audio", "0%", "Unreadable audio input."

	chunks = chunk_audio(processed)
	predictions = predict_chunks(chunks)

	emotion, confidence, message = decide_final_emotion(predictions)

	if emotion is None:
	return "😐 UNCERTAIN", "Low confidence", message

	display = f"{EMOJI_MAP.get(emotion)} {emotion}"
	return display, f"{confidence * 100:.1f}%", message


	# =====================================================
	# MUSIC-AS-THERAPY (PLACEHOLDER)
	# =====================================================

	def therapy_music_placeholder(emotion):
	"""
	Placeholder for future music therapy engine
	"""
	if emotion in ["SAD", "ANGRY"]:
	return "🎧 Music therapy will recommend calming, uplifting tracks."
	if emotion == "HAPPY":
	return "🎧 Music therapy will sustain and reinforce positive mood."
	if emotion == "NEUTRAL":
	return "🎧 Music therapy will gently elevate mood."
	return "🎧 Music therapy unavailable."

	# =====================================================
	# GRADIO UI
	# =====================================================

	with gr.Blocks(title="MoodSync Therapy AI") as demo:
	gr.Markdown("# 🎤 MoodSync Therapy AI")
	gr.Markdown(
	"Clinical-grade voice emotion detection for therapy.\n\n"
	"• Detects one dominant emotion\n"
	"• High confidence only\n"
	"• Designed for Music-as-Therapy systems"
	)

	with gr.Row():
	with gr.Column(scale=6):
	audio_input = gr.Audio(
	sources=["microphone", "upload"],
	type="numpy",
	label="Speak naturally for 3–10 seconds"
	)
	analyze_btn = gr.Button("Analyze Emotion", variant="primary")

	with gr.Column(scale=4):
	emotion_output = gr.Textbox(label="Detected Emotion", interactive=False)
	confidence_output = gr.Textbox(label="Confidence", interactive=False)
	explanation_output = gr.Textbox(label="Clinical Explanation", lines=3, interactive=False)

	therapy_btn = gr.Button("Music-as-Therapy (Preview)")
	therapy_output = gr.Textbox(label="Therapy Guidance", interactive=False)

	analyze_btn.click(
	fn=analyze_voice,
	inputs=audio_input,
	outputs=[emotion_output, confidence_output, explanation_output]
	)

	therapy_btn.click(
	fn=therapy_music_placeholder,
	inputs=emotion_output,
	outputs=therapy_output
	)

	# =====================================================
	# APP ENTRYPOINT
	# =====================================================

	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	debug=False
	)