"""
MoodSync Therapy AI
-------------------
Production-grade Voice Emotion Detection for Therapy
Single dominant emotion with strong confidence
Prepared for Music-as-Therapy integration

Author: You
Use case: Mental health, therapy, wellness, funding programs
"""

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import librosa
import gradio as gr
from transformers import pipeline

# =====================================================
# MODEL CONFIGURATION
# =====================================================

MODEL_NAME = "superb/wav2vec2-base-superb-er"
TARGET_SAMPLE_RATE = 16000

# Confidence & decision thresholds (therapy-grade)
CHUNK_CONFIDENCE_THRESHOLD = 0.60
AGREEMENT_THRESHOLD = 0.65
FINAL_CONFIDENCE_THRESHOLD = 0.70

# Core emotions only (higher accuracy, clinically safer)
ALLOWED_EMOTIONS = {"HAPPY", "SAD", "ANGRY", "NEUTRAL"}

EMOJI_MAP = {
    "HAPPY": "😊",
    "SAD": "😢",
    "ANGRY": "😠",
    "NEUTRAL": "😐"
}

# =====================================================
# LOAD MODEL
# =====================================================

print("Loading Therapy-Grade Emotion Model...")
emotion_pipe = pipeline(
    task="audio-classification",
    model=MODEL_NAME,
    top_k=None
)
print("Model loaded successfully.")

# =====================================================
# AUDIO PROCESSING
# =====================================================

def preprocess_audio(audio):
    """
    Convert to mono, resample, normalize (RMS)
    """
    sr, data = audio

    if data is None or len(data) == 0:
        return None

    if len(data.shape) > 1:
        data = np.mean(data, axis=1)

    if sr != TARGET_SAMPLE_RATE:
        data = librosa.resample(data, orig_sr=sr, target_sr=TARGET_SAMPLE_RATE)

    data = data.astype(np.float32)
    rms = np.sqrt(np.mean(data ** 2))
    if rms > 0:
        data = data / rms

    return data


def chunk_audio(audio, chunk_sec=1.0):
    """
    Split audio into 1-second chunks
    """
    chunk_size = int(TARGET_SAMPLE_RATE * chunk_sec)
    return [
        audio[i:i + chunk_size]
        for i in range(0, len(audio) - chunk_size, chunk_size)
    ]


# =====================================================
# EMOTION INFERENCE
# =====================================================

def predict_chunks(chunks):
    """
    Predict emotion per chunk and keep confident predictions
    """
    predictions = []

    for chunk in chunks:
        preds = emotion_pipe({
            "raw": chunk,
            "sampling_rate": TARGET_SAMPLE_RATE
        })

        top = max(preds, key=lambda x: x["score"])
        label = top["label"].upper()

        if label in ALLOWED_EMOTIONS and top["score"] >= CHUNK_CONFIDENCE_THRESHOLD:
            predictions.append(top)

    return predictions


def decide_final_emotion(predictions):
    """
    Voting + confidence aggregation
    """
    if len(predictions) < 2:
        return None, 0.0, "Not enough confident speech segments."

    votes = {}
    confidence_sum = {}

    for p in predictions:
        label = p["label"].upper()
        votes[label] = votes.get(label, 0) + 1
        confidence_sum[label] = confidence_sum.get(label, 0) + p["score"]

    total_votes = sum(votes.values())
    best_emotion = max(votes, key=votes.get)

    agreement = votes[best_emotion] / total_votes
    avg_confidence = confidence_sum[best_emotion] / votes[best_emotion]

    if agreement >= AGREEMENT_THRESHOLD and avg_confidence >= FINAL_CONFIDENCE_THRESHOLD:
        return best_emotion, avg_confidence, "High confidence emotional state detected."

    return None, 0.0, "Emotion detected but confidence not strong enough for therapy use."


# =====================================================
# PUBLIC ANALYSIS FUNCTION
# =====================================================

def analyze_voice(audio):
    """
    End-to-end therapy-grade emotion analysis
    """
    if audio is None:
        return "🎤 No audio", "0%", "Please record or upload speech."

    processed = preprocess_audio(audio)
    if processed is None:
        return "❌ Invalid audio", "0%", "Unreadable audio input."

    chunks = chunk_audio(processed)
    predictions = predict_chunks(chunks)

    emotion, confidence, message = decide_final_emotion(predictions)

    if emotion is None:
        return "😐 UNCERTAIN", "Low confidence", message

    display = f"{EMOJI_MAP.get(emotion)} {emotion}"
    return display, f"{confidence * 100:.1f}%", message


# =====================================================
# MUSIC-AS-THERAPY (PLACEHOLDER)
# =====================================================

def therapy_music_placeholder(emotion):
    """
    Placeholder for future music therapy engine
    """
    if emotion in ["SAD", "ANGRY"]:
        return "🎧 Music therapy will recommend calming, uplifting tracks."
    if emotion == "HAPPY":
        return "🎧 Music therapy will sustain and reinforce positive mood."
    if emotion == "NEUTRAL":
        return "🎧 Music therapy will gently elevate mood."
    return "🎧 Music therapy unavailable."

# =====================================================
# GRADIO UI
# =====================================================

with gr.Blocks(title="MoodSync Therapy AI") as demo:
    gr.Markdown("# 🎤 MoodSync Therapy AI")
    gr.Markdown(
        "Clinical-grade voice emotion detection for therapy.\n\n"
        "• Detects **one dominant emotion**\n"
        "• High confidence only\n"
        "• Designed for Music-as-Therapy systems"
    )

    with gr.Row():
        with gr.Column(scale=6):
            audio_input = gr.Audio(
                sources=["microphone", "upload"],
                type="numpy",
                label="Speak naturally for 3–10 seconds"
            )
            analyze_btn = gr.Button("Analyze Emotion", variant="primary")

        with gr.Column(scale=4):
            emotion_output = gr.Textbox(label="Detected Emotion", interactive=False)
            confidence_output = gr.Textbox(label="Confidence", interactive=False)
            explanation_output = gr.Textbox(label="Clinical Explanation", lines=3, interactive=False)

    therapy_btn = gr.Button("Music-as-Therapy (Preview)")
    therapy_output = gr.Textbox(label="Therapy Guidance", interactive=False)

    analyze_btn.click(
        fn=analyze_voice,
        inputs=audio_input,
        outputs=[emotion_output, confidence_output, explanation_output]
    )

    therapy_btn.click(
        fn=therapy_music_placeholder,
        inputs=emotion_output,
        outputs=therapy_output
    )

# =====================================================
# APP ENTRYPOINT
# =====================================================

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        debug=False
    )