PsalmsJava commited on
Commit
c367cdd
Β·
verified Β·
1 Parent(s): 6bbc448

Update app.py

Browse files

Added Recommendation With Spotify

Files changed (1) hide show
  1. app.py +239 -89
app.py CHANGED
@@ -1,132 +1,282 @@
1
  """
2
- Voice Mood Detector - Simple version for Hugging Face
 
3
  """
4
- import gradio as gr
 
 
 
 
5
  import numpy as np
6
  from transformers import pipeline
7
- import warnings
 
8
  warnings.filterwarnings("ignore")
9
 
10
- # Initialize the emotion detection model
 
 
 
 
 
 
 
 
11
  print("Loading emotion detection model...")
12
  try:
13
- # Try the main model first
14
  pipe = pipeline(
15
- "audio-classification",
16
  model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
17
  )
18
- except:
19
- # Fallback model if first fails
20
  pipe = pipeline(
21
  "audio-classification",
22
  model="superb/wav2vec2-base-superb-ers"
23
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- print("Model loaded successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
 
 
 
27
  def analyze_audio(audio):
28
  """
29
- Analyze audio and return mood with confidence
30
- audio: tuple of (sample_rate, audio_data) from Gradio
31
  """
32
  if audio is None:
33
- return "🎀 No audio", "0%", "Please record or upload audio first"
34
-
35
  try:
36
- # Get sample rate and audio data
37
  sample_rate, audio_data = audio
38
-
39
- # Convert to mono if stereo
 
 
40
  if len(audio_data.shape) > 1:
41
  audio_data = np.mean(audio_data, axis=0)
42
-
43
- # Run prediction
44
- predictions = pipe({
45
  "raw": audio_data,
46
- "sampling_rate": sample_rate
47
  })
48
-
49
- # Get top result
50
- top = predictions[0]
51
- mood = top['label'].upper()
52
- confidence = f"{top['score']*100:.1f}%"
53
-
54
- # Mood emoji mapping
 
55
  emoji_map = {
56
  "ANGER": "😠 Anger",
57
- "DISGUST": "🀒 Disgust",
58
  "FEAR": "😨 Fear",
59
  "HAPPY": "😊 Happy",
60
  "NEUTRAL": "😐 Neutral",
61
  "SADNESS": "😒 Sad",
62
  "SURPRISE": "😲 Surprise"
63
  }
64
-
65
- mood_display = emoji_map.get(mood, f"🎀 {mood}")
66
-
67
- # Create details
68
- details = "All Predictions:\n"
69
- for i, pred in enumerate(predictions[:5], 1):
70
- details += f"{i}. {pred['label'].upper()}: {pred['score']*100:.1f}%\n"
71
-
72
- return mood_display, confidence, details
73
-
 
74
  except Exception as e:
75
- return f"❌ Error", "0%", f"Analysis failed: {str(e)}"
76
 
77
- # Create Gradio interface
78
- with gr.Blocks(title="MoodSync AI Detector", theme=gr.themes.Soft()) as demo:
79
- gr.Markdown("# 🎀 MoodSync AI Detector")
80
- gr.Markdown("Record your voice or upload audio to detect emotional state")
81
-
82
- with gr.Row():
83
- with gr.Column():
84
- # audio_input = gr.Audio(
85
- # sources=["microphone", "upload"],
86
- # type="numpy",
87
- # label="Speak or Upload Audio",
88
- # waveform_options={"show_controls": True}
89
- # )
90
- audio_input = gr.Audio(
91
- sources=["microphone", "upload"],
92
- type="numpy",
93
- label="Speak or Upload Audio"
94
- # The 'waveform_options' parameter has been removed entirely.
95
- )
96
- btn = gr.Button("Analyze Mood 🎯", variant="primary")
97
-
98
- with gr.Column():
99
- mood_output = gr.Textbox(label="Detected Mood", interactive=False)
100
- confidence_output = gr.Textbox(label="Confidence", interactive=False)
101
- details_output = gr.Textbox(
102
- label="Detailed Results",
103
- lines=6,
104
- interactive=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  )
106
-
107
- # Instructions
108
- with gr.Accordion("πŸ“– Instructions", open=False):
109
- gr.Markdown("""
110
- **How to use:**
111
- 1. Click microphone icon and speak for 3-5 seconds
112
- 2. OR upload an audio file (WAV/MP3)
113
- 3. Click "Analyze Mood"
114
- 4. View your emotional state
115
-
116
- **Tips for best results:**
117
- - Speak clearly in English
118
- - Keep background noise minimal
119
- - Optimal length: 3-5 seconds
120
- - Use mono audio if possible
121
- """)
122
-
123
- # Set up button action
124
- btn.click(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  fn=analyze_audio,
126
  inputs=audio_input,
127
- outputs=[mood_output, confidence_output, details_output]
128
  )
129
 
130
- # Launch the app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  if __name__ == "__main__":
132
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  """
2
+ MoodSync AI + Spotify Recommender (Emotion labels β†’ Spotify params)
3
+ Single-file Gradio app for Hugging Face / local hosting.
4
  """
5
+
6
+ import os
7
+ import base64
8
+ import requests
9
+ import warnings
10
  import numpy as np
11
  from transformers import pipeline
12
+ import gradio as gr
13
+
14
  warnings.filterwarnings("ignore")
15
 
16
+ # ------------------------
17
+ # CONFIG - set via env or paste directly
18
+ # ------------------------
19
+ SPOTIFY_CLIENT_ID = os.environ.get("SPOTIFY_CLIENT_ID", "49272551bfef4203bc54777db99fabf7")
20
+ SPOTIFY_CLIENT_SECRET = os.environ.get("SPOTIFY_CLIENT_SECRET", "87d98f227f0b4e07a28b3802a80a291a")
21
+
22
+ # ------------------------
23
+ # Load emotion model
24
+ # ------------------------
25
  print("Loading emotion detection model...")
26
  try:
 
27
  pipe = pipeline(
28
+ "audio-classification",
29
  model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
30
  )
31
+ except Exception as e:
32
+ print("Primary model failed, falling back:", e)
33
  pipe = pipeline(
34
  "audio-classification",
35
  model="superb/wav2vec2-base-superb-ers"
36
  )
37
+ print("Model ready!")
38
+
39
+ # ------------------------
40
+ # Emotion -> Spotify params mapping (fine-grained)
41
+ # ------------------------
42
+ def emotion_to_spotify_params(emotion_label: str):
43
+ # emotion_label is expected uppercase like 'HAPPY', 'SADNESS', etc.
44
+ e = (emotion_label or "").upper()
45
+ mapping = {
46
+ "HAPPY": {"valence": 0.9, "energy": 0.8, "tempo": 130.0},
47
+ "SURPRISE": {"valence": 0.75, "energy": 0.85, "tempo": 125.0},
48
+ "NEUTRAL": {"valence": 0.5, "energy": 0.45},
49
+ "SADNESS": {"valence": 0.2, "energy": 0.25, "acousticness": 0.8},
50
+ "FEAR": {"valence": 0.15, "energy": 0.25},
51
+ "ANGER": {"valence": 0.2, "energy": 0.9, "tempo": 135.0},
52
+ "DISGUST": {"valence": 0.15, "energy": 0.4}
53
+ }
54
+ return mapping.get(e, {"valence": 0.5, "energy": 0.5})
55
+
56
+ # ------------------------
57
+ # Spotify token retrieval (Client Credentials)
58
+ # ------------------------
59
+ def get_spotify_token():
60
+ client_id = SPOTIFY_CLIENT_ID
61
+ client_secret = SPOTIFY_CLIENT_SECRET
62
+ if not client_id or client_id.startswith("YOUR_") or not client_secret or client_secret.startswith("YOUR_"):
63
+ return None, "Spotify client id/secret not set. Please set SPOTIFY_CLIENT_ID and SPOTIFY_CLIENT_SECRET."
64
 
65
+ auth = f"{client_id}:{client_secret}"
66
+ b64 = base64.b64encode(auth.encode()).decode()
67
+ url = "https://accounts.spotify.com/api/token"
68
+ headers = {"Authorization": f"Basic {b64}"}
69
+ data = {"grant_type": "client_credentials"}
70
+
71
+ try:
72
+ r = requests.post(url, headers=headers, data=data, timeout=10)
73
+ r.raise_for_status()
74
+ token = r.json().get("access_token")
75
+ if not token:
76
+ return None, "No access token received from Spotify."
77
+ return token, None
78
+ except Exception as e:
79
+ return None, f"Spotify token error: {str(e)}"
80
 
81
+ # ------------------------
82
+ # Emotion analysis function
83
+ # ------------------------
84
  def analyze_audio(audio):
85
  """
86
+ audio: (sample_rate, np.ndarray) from gr.Audio with type='numpy'
87
+ Returns: mood_display (with emoji), confidence (string), details (string), raw_label (string)
88
  """
89
  if audio is None:
90
+ return "🎀 No audio", "0%", "Please record or upload audio first.", ""
91
+
92
  try:
 
93
  sample_rate, audio_data = audio
94
+
95
+ # convert stereo -> mono
96
+ if audio_data is None:
97
+ return "🎀 Invalid audio", "0%", "Uploaded audio unreadable.", ""
98
  if len(audio_data.shape) > 1:
99
  audio_data = np.mean(audio_data, axis=0)
100
+
101
+ preds = pipe({
 
102
  "raw": audio_data,
103
+ "sampling_rate": int(sample_rate)
104
  })
105
+
106
+ if not preds:
107
+ return "🎀 No prediction", "0%", "Model returned no predictions.", ""
108
+
109
+ top = preds[0]
110
+ label = top.get("label", "UNKNOWN").upper()
111
+ confidence = f"{top.get('score', 0.0)*100:.1f}%"
112
+
113
  emoji_map = {
114
  "ANGER": "😠 Anger",
115
+ "DISGUST": "🀒 Disgust",
116
  "FEAR": "😨 Fear",
117
  "HAPPY": "😊 Happy",
118
  "NEUTRAL": "😐 Neutral",
119
  "SADNESS": "😒 Sad",
120
  "SURPRISE": "😲 Surprise"
121
  }
122
+ mood_display = emoji_map.get(label, f"🎀 {label}")
123
+
124
+ # build details
125
+ details = ""
126
+ for i, p in enumerate(preds[:6], start=1):
127
+ lab = p.get("label", "unknown").upper()
128
+ sc = p.get("score", 0.0)
129
+ details += f"{i}. {lab}: {sc*100:.1f}%\n"
130
+
131
+ return mood_display, confidence, details, label
132
+
133
  except Exception as e:
134
+ return "❌ Error", "0%", f"Analysis failed: {str(e)}", ""
135
 
136
+ # ------------------------
137
+ # Spotify recommendation function (returns HTML)
138
+ # ------------------------
139
+ def recommend_music(emotion_label, genre, limit=5):
140
+ """
141
+ emotion_label: raw detected label (e.g., 'SADNESS')
142
+ genre: user-selected genre (string)
143
+ limit: number of tracks to fetch (max 20)
144
+ """
145
+ # Basic validation
146
+ if not emotion_label:
147
+ return "❌ Please analyze audio first (click Analyze Mood)."
148
+ if not genre:
149
+ return "❌ Please select a genre."
150
+
151
+ token, err = get_spotify_token()
152
+ if err:
153
+ return f"❌ {err}"
154
+
155
+ params = emotion_to_spotify_params(emotion_label)
156
+ # Build query for Spotify recommendations endpoint
157
+ url = "https://api.spotify.com/v1/recommendations"
158
+
159
+ query = {
160
+ "limit": int(limit),
161
+ "seed_genres": genre
162
+ }
163
+
164
+ # Add target params where available
165
+ if "valence" in params:
166
+ query["target_valence"] = params["valence"]
167
+ if "energy" in params:
168
+ query["target_energy"] = params["energy"]
169
+ if "acousticness" in params:
170
+ query["target_acousticness"] = params["acousticness"]
171
+ if "tempo" in params:
172
+ query["target_tempo"] = params["tempo"]
173
+
174
+ headers = {"Authorization": f"Bearer {token}"}
175
+
176
+ try:
177
+ r = requests.get(url, headers=headers, params=query, timeout=10)
178
+ # handle non-200
179
+ if r.status_code != 200:
180
+ try:
181
+ msg = r.json()
182
+ except:
183
+ msg = r.text
184
+ return f"❌ Spotify API error (status {r.status_code}): {msg}"
185
+
186
+ data = r.json()
187
+ tracks = data.get("tracks", [])
188
+ if not tracks:
189
+ return "❌ No tracks found for that genre+emotion combination. Try another genre."
190
+
191
+ # Build HTML for top results
192
+ html_lines = []
193
+ html_lines.append(f"<h3>🎧 Recommendations for <em>{emotion_label.title()}</em> + <strong>{genre}</strong></h3>")
194
+ html_lines.append("<div style='display:flex;flex-wrap:wrap;gap:12px;'>")
195
+
196
+ for t in tracks[:limit]:
197
+ name = t.get("name", "Unknown")
198
+ artists = ", ".join([a.get("name", "Unknown") for a in t.get("artists", [])])
199
+ spotify_url = t.get("external_urls", {}).get("spotify", "#")
200
+ album_images = t.get("album", {}).get("images", [])
201
+ img_url = album_images[0]["url"] if album_images else ""
202
+ # card HTML
203
+ card = (
204
+ "<div style='width:220px;border-radius:8px;padding:8px;"
205
+ "box-shadow:0 2px 8px rgba(0,0,0,0.08);background:#fff;'>"
206
  )
207
+ if img_url:
208
+ card += f"<a href='{spotify_url}' target='_blank' rel='noopener noreferrer'><img src='{img_url}' alt='cover' style='width:100%;height:120px;object-fit:cover;border-radius:6px;'/></a>"
209
+ card += f"<div style='padding-top:8px;font-size:14px;'><strong>{name}</strong><br/><em style='font-size:13px;color:#555'>{artists}</em></div>"
210
+ card += f"<div style='padding-top:8px'><a href='{spotify_url}' target='_blank' rel='noopener noreferrer'>Open in Spotify β–Ά</a></div>"
211
+ card += "</div>"
212
+ html_lines.append(card)
213
+
214
+ html_lines.append("</div>")
215
+
216
+ return "\n".join(html_lines)
217
+
218
+ except Exception as e:
219
+ return f"❌ Recommendation error: {str(e)}"
220
+
221
+ # ------------------------
222
+ # Build Gradio UI
223
+ # ------------------------
224
+ with gr.Blocks(title="MoodSync AI + Spotify Recommender") as demo:
225
+ gr.Markdown("# 🎀 MoodSync AI + Spotify Recommender")
226
+ gr.Markdown("Record or upload a short audio clip (2–10s). The app will detect emotion and then suggest music based on that emotion + your chosen genre.")
227
+
228
+ with gr.Row():
229
+ with gr.Column(scale=6):
230
+ audio_input = gr.Audio(source="microphone", type="numpy", label="Speak or Upload Audio (2-10s)")
231
+ analyze_btn = gr.Button("Analyze Mood 🎯", variant="primary")
232
+ gr.Markdown("**Tips:** Speak clearly, 2–10 seconds, minimal background noise.")
233
+ with gr.Column(scale=4):
234
+ mood_display = gr.Textbox(label="Detected Mood (with emoji)", interactive=False)
235
+ confidence_display = gr.Textbox(label="Confidence", interactive=False)
236
+ details_display = gr.Textbox(label="Detailed Predictions", lines=6, interactive=False)
237
+ # hidden/aux label to carry raw emotion label for recommendation
238
+ raw_label = gr.Textbox(label="EmotionLabel", visible=False)
239
+
240
+ gr.Markdown("---")
241
+ gr.Markdown("## 🎢 Music Recommendation (Spotify)")
242
+ genre_dropdown = gr.Dropdown(
243
+ choices=[
244
+ "pop", "rock", "jazz", "hip-hop", "afrobeat", "classical", "rnb", "gospel", "electronic", "reggae"
245
+ ],
246
+ label="Choose a Genre (seed_genres)",
247
+ value="pop"
248
+ )
249
+
250
+ with gr.Row():
251
+ recommend_btn = gr.Button("Recommend Music 🎡", variant="secondary")
252
+ # Use HTML to display album covers nicely
253
+ recommendation_html = gr.HTML()
254
+
255
+ # Wire actions
256
+ analyze_btn.click(
257
  fn=analyze_audio,
258
  inputs=audio_input,
259
+ outputs=[mood_display, confidence_display, details_display, raw_label]
260
  )
261
 
262
+ # Recommend uses raw emotion label + genre
263
+ recommend_btn.click(
264
+ fn=recommend_music,
265
+ inputs=[raw_label, genre_dropdown],
266
+ outputs=[recommendation_html]
267
+ )
268
+
269
+ # Accordion with small notes
270
+ with gr.Accordion("How this works (technical)", open=False):
271
+ gr.Markdown(
272
+ """
273
+ - We use a wav2vec2-based model to detect emotion from the audio (tone, not words).
274
+ - The raw emotion label (e.g., HAPPY, SADNESS) maps to Spotify 'target' parameters like `valence` and `energy`.
275
+ - We call Spotify's Recommendations endpoint using Client Credentials to fetch tracks seeded by the chosen genre.
276
+ - No user login is required (server-side client credentials).
277
+ """
278
+ )
279
+
280
+ # Launch (suitable for Hugging Face space or local)
281
  if __name__ == "__main__":
282
+ demo.launch(server_name="0.0.0.0", server_port=7860)