Archime commited on
Commit
010aaff
·
1 Parent(s): 11c4a5a

ajust task for spaces.GPU

Browse files
Files changed (2) hide show
  1. app.py +78 -2
  2. app/utils.py +0 -70
app.py CHANGED
@@ -17,7 +17,7 @@ from app.utils import (
17
  generate_coturn_config,
18
  read_and_stream_audio,
19
  stop_streaming,
20
- task
21
  )
22
  from app.session_utils import (
23
  on_load,
@@ -40,6 +40,11 @@ from app.ui_utils import (
40
  on_file_load
41
  )
42
  import nemo.collections.asr as nemo_asr
 
 
 
 
 
43
  # --------------------------------------------------------
44
  # Initialization
45
  # --------------------------------------------------------
@@ -63,6 +68,77 @@ streaming_audio_processor_config = StreamingAudioProcessorConfig(
63
  silence_threshold_chunks=1
64
  )
65
  streamer = StreamingAudioProcessor(speech_engine=canary_speech_engine,vad_engine=silero_vad_engine,cfg=streaming_audio_processor_config)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
 
68
  with gr.Blocks(theme=theme, css=css_style) as demo:
@@ -291,7 +367,7 @@ with gr.Blocks(theme=theme, css=css_style) as demo:
291
  yield f"Starting {task_type.lower()}...\n\n",gr.update(visible=False),gr.update(visible=True)
292
 
293
  # Boucle sur le générateur de `task()`
294
- for msg in task(session_hash_code,streamer=streamer):
295
  accumulated += msg
296
  yield accumulated,gr.update(visible=False),gr.update(visible=True)
297
 
 
17
  generate_coturn_config,
18
  read_and_stream_audio,
19
  stop_streaming,
20
+ # task
21
  )
22
  from app.session_utils import (
23
  on_load,
 
40
  on_file_load
41
  )
42
  import nemo.collections.asr as nemo_asr
43
+ from app.session_utils import (
44
+ get_active_task_flag_file,
45
+ get_folder_chunks
46
+ )
47
+ import spaces
48
  # --------------------------------------------------------
49
  # Initialization
50
  # --------------------------------------------------------
 
68
  silence_threshold_chunks=1
69
  )
70
  streamer = StreamingAudioProcessor(speech_engine=canary_speech_engine,vad_engine=silero_vad_engine,cfg=streaming_audio_processor_config)
71
+ @spaces.GPU
72
+ def task(session_id: str):
73
+ """Continuously read and delete .npz chunks while task is active."""
74
+ active_flag = get_active_task_flag_file(session_id)
75
+ with open(active_flag, "w") as f:
76
+ f.write("1")
77
+ chunk_dir = get_folder_chunks(session_id)
78
+ logging.info(f"[{session_id}] task started. {chunk_dir}")
79
+
80
+
81
+ try:
82
+ logging.info(f"[{session_id}] task loop started.")
83
+ yield f"Task started for session {session_id}\n\n"
84
+ while os.path.exists(active_flag):
85
+ if not os.path.exists(chunk_dir):
86
+ logging.warning(f"[{session_id}] No chunk directory found for task.")
87
+ yield "No audio chunks yet... waiting for stream.\n"
88
+ time.sleep(0.1)
89
+ continue
90
+ files = sorted(f for f in os.listdir(chunk_dir) if f.endswith(".npz"))
91
+ if not files:
92
+ time.sleep(0.1)
93
+ continue
94
+
95
+ for fname in files:
96
+ fpath = os.path.join(chunk_dir, fname)
97
+ try:
98
+ npz = np.load(fpath)
99
+ samples = npz["data"]
100
+ rate = int(npz["rate"])
101
+
102
+ text = f"Transcribed {fname}: {len(samples)} samples @ {rate}Hz"
103
+ new_texts = streamer.process_chunk(samples)
104
+ for text in new_texts:
105
+ print(text, end='', flush=True)
106
+ yield f"{text}"
107
+ logging.debug(f"[{session_id}] {new_texts}")
108
+ # yield f"{text}\n"
109
+ os.remove(fpath)
110
+ logging.debug(f"[{session_id}] Deleted processed chunk: {fname}")
111
+ except Exception as e:
112
+ logging.error(f"[{session_id}] Error processing {fname}: {e}")
113
+ yield f"Error processing {fname}: {e}\n"
114
+ continue
115
+
116
+ time.sleep(0.1)
117
+ # raise_function()
118
+ final_text = streamer.finalize_stream()
119
+ if final_text:
120
+ print(final_text, end='', flush=True)
121
+ yield f"\n{final_text}"
122
+ # yield f"\n"
123
+ logging.info(f"[{session_id}] task loop ended (flag removed).")
124
+
125
+ except Exception as e:
126
+ logging.error(f"[{session_id}] task error: {e}", exc_info=True)
127
+ yield f"Unexpected error: {e}\n"
128
+ finally:
129
+ # active_flag = os.path.join(TMP_DIR, f"transcribe_active_{session_id}.txt")
130
+ if os.path.exists(active_flag):
131
+ os.remove(active_flag)
132
+ logging.info(f"[{session_id}] task stopped.")
133
+ try:
134
+ if os.path.exists(chunk_dir) and not os.listdir(chunk_dir):
135
+ os.rmdir(chunk_dir)
136
+ logging.debug(f"[{session_id}] Cleaned up empty chunk dir.")
137
+ except Exception as e:
138
+ logging.error(f"[{session_id}] Cleanup error: {e}")
139
+ yield "\nCleanup error: {e}"
140
+ logging.info(f"[{session_id}] Exiting task loop.")
141
+ yield "\nTask finished and cleaned up.\n"
142
 
143
 
144
  with gr.Blocks(theme=theme, css=css_style) as demo:
 
367
  yield f"Starting {task_type.lower()}...\n\n",gr.update(visible=False),gr.update(visible=True)
368
 
369
  # Boucle sur le générateur de `task()`
370
+ for msg in task(session_hash_code):
371
  accumulated += msg
372
  yield accumulated,gr.update(visible=False),gr.update(visible=True)
373
 
app/utils.py CHANGED
@@ -156,78 +156,8 @@ else:
156
 
157
 
158
  # --- Audio Stream Function ---
159
- @spaces.GPU
160
- def task(session_id: str, streamer: StreamingAudioProcessor):
161
- """Continuously read and delete .npz chunks while task is active."""
162
- active_flag = get_active_task_flag_file(session_id)
163
- with open(active_flag, "w") as f:
164
- f.write("1")
165
- chunk_dir = get_folder_chunks(session_id)
166
- logging.info(f"[{session_id}] task started. {chunk_dir}")
167
 
168
 
169
- try:
170
- logging.info(f"[{session_id}] task loop started.")
171
- yield f"Task started for session {session_id}\n\n"
172
- while os.path.exists(active_flag):
173
- if not os.path.exists(chunk_dir):
174
- logging.warning(f"[{session_id}] No chunk directory found for task.")
175
- yield "No audio chunks yet... waiting for stream.\n"
176
- time.sleep(0.1)
177
- continue
178
- files = sorted(f for f in os.listdir(chunk_dir) if f.endswith(".npz"))
179
- if not files:
180
- time.sleep(0.1)
181
- continue
182
-
183
- for fname in files:
184
- fpath = os.path.join(chunk_dir, fname)
185
- try:
186
- npz = np.load(fpath)
187
- samples = npz["data"]
188
- rate = int(npz["rate"])
189
-
190
- text = f"Transcribed {fname}: {len(samples)} samples @ {rate}Hz"
191
- new_texts = streamer.process_chunk(samples)
192
- for text in new_texts:
193
- print(text, end='', flush=True)
194
- yield f"{text}"
195
- logging.debug(f"[{session_id}] {new_texts}")
196
- # yield f"{text}\n"
197
- os.remove(fpath)
198
- logging.debug(f"[{session_id}] Deleted processed chunk: {fname}")
199
- except Exception as e:
200
- logging.error(f"[{session_id}] Error processing {fname}: {e}")
201
- yield f"Error processing {fname}: {e}\n"
202
- continue
203
-
204
- time.sleep(0.1)
205
- # raise_function()
206
- final_text = streamer.finalize_stream()
207
- if final_text:
208
- print(final_text, end='', flush=True)
209
- yield f"\n{final_text}"
210
- # yield f"\n"
211
- logging.info(f"[{session_id}] task loop ended (flag removed).")
212
-
213
- except Exception as e:
214
- logging.error(f"[{session_id}] task error: {e}", exc_info=True)
215
- yield f"Unexpected error: {e}\n"
216
- finally:
217
- # active_flag = os.path.join(TMP_DIR, f"transcribe_active_{session_id}.txt")
218
- if os.path.exists(active_flag):
219
- os.remove(active_flag)
220
- logging.info(f"[{session_id}] task stopped.")
221
- try:
222
- if os.path.exists(chunk_dir) and not os.listdir(chunk_dir):
223
- os.rmdir(chunk_dir)
224
- logging.debug(f"[{session_id}] Cleaned up empty chunk dir.")
225
- except Exception as e:
226
- logging.error(f"[{session_id}] Cleanup error: {e}")
227
- yield "\nCleanup error: {e}"
228
- logging.info(f"[{session_id}] Exiting task loop.")
229
- yield "\nTask finished and cleaned up.\n"
230
-
231
 
232
 
233
 
 
156
 
157
 
158
  # --- Audio Stream Function ---
 
 
 
 
 
 
 
 
159
 
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
 
163