Spaces:

jebin2
/

TTS

Running

App Files Files Community

github-actions[bot] commited on 11 days ago

Commit

68a99fc

1 Parent(s): 4cf286c

Auto-deploy from GitHub: c1cbfa3a37f6853e24d067af55ebc1ab447d9fc0

Browse files

Files changed (35) hide show

.gitattributes +4 -34
Dockerfile +28 -0
README.md +25 -4
app.py +256 -0
index.html +687 -0
requirements.txt +5 -0
setup.py +77 -0
tts_runner/__init__.py +2 -0
tts_runner/base.py +660 -0
tts_runner/common.py +135 -0
tts_runner/engines/__init__.py +0 -0
tts_runner/engines/chatterbox.py +149 -0
tts_runner/engines/kitten.py +33 -0
tts_runner/engines/kokoro.py +71 -0
tts_runner/runner.py +122 -0
tts_runner/tui.py +536 -0
tts_runner/voices/00007.wav +3 -0
tts_runner/voices/20250329-audio-american-female.wav +3 -0
tts_runner/voices/20250329-audio-american-male.wav +3 -0
tts_runner/voices/Ellen-TTS-10.wav +3 -0
tts_runner/voices/Ellen13y TTS-14.wav +3 -0
tts_runner/voices/Main-4.wav +3 -0
tts_runner/voices/Simple guy.wav +3 -0
tts_runner/voices/VEGETA_4_504_US.wav +3 -0
tts_runner/voices/VEGETA_4_532_US.wav +3 -0
tts_runner/voices/bbc_news.wav +3 -0
tts_runner/voices/en_woman.wav +3 -0
tts_runner/voices/kratos(ambient)_en.wav +3 -0
tts_runner/voices/voice_preview_cocky male villain voice.mp3 +3 -0
tts_runner/voices/voice_preview_cocky male villain voice.mp3:Zone.Identifier +0 -0
tts_runner/voices/voice_preview_david castlemore - newsreader and educator.mp3 +3 -0
tts_runner/voices/voice_preview_kelly - storytelling & motivational content.mp3 +3 -0
tts_runner/voices/voice_preview_motivational coach - leader.mp3 +3 -0
tts_runner/voices/voice_preview_sevan bomar - black motivational speaker.mp3 +3 -0
worker.py +138 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,5 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.wav filter=lfs diff=lfs merge=lfs -text
+*.mp3 filter=lfs diff=lfs merge=lfs -text
+*.flac filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,28 @@

+FROM python:3.10-slim
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    git \
+    curl \
+    espeak-ng \
+    && rm -rf /var/lib/apt/lists/*
+# Copy application files
+COPY . .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Create necessary directories
+RUN mkdir -p uploads temp_dir
+# Expose port
+EXPOSE 7860
+# Run only the Flask app (worker starts automatically on first upload)
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,10 +1,31 @@
 ---
-title: TTS
-emoji: 📚
 colorFrom: blue
-colorTo: blue
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: TTS Text-to-Speech Generator
+emoji: 🎵
 colorFrom: blue
+colorTo: purple
 sdk: docker
 pinned: false
+license: mit
 ---
+# TTS Text-to-Speech Generator
+A Python-based text-to-speech service with a neobrutalist web interface.
+## Features
+- 📝 Text-to-Speech generation
+- 🤖 Multiple voices and speeds
+- 💾 SQLite database for queue management
+- 🎨 Neobrutalist UI with smooth animations
+- 🔄 Real-time status updates
+## Usage
+Access the web interface at the Space URL above.
+## API Endpoints
+- POST `/api/generate` - Generate audio from text
+- GET `/api/files` - Get all files
+- GET `/api/download/<id>` - Download generated audio
+---
+*Auto-deployed from GitHub*

app.py ADDED Viewed

	@@ -0,0 +1,256 @@

+from flask import Flask, request, jsonify, send_from_directory, send_file
+from flask_cors import CORS
+import sqlite3
+import os
+import uuid
+from datetime import datetime
+from werkzeug.utils import secure_filename
+import threading
+import subprocess
+import time
+import shutil
+app = Flask(__name__)
+CORS(app)
+UPLOAD_FOLDER = 'uploads'
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+os.makedirs('temp_dir', exist_ok=True)
+# Worker state
+worker_thread = None
+worker_running = False
+def init_db():
+    conn = sqlite3.connect('tts_tasks.db')
+    c = conn.cursor()
+    c.execute('''CREATE TABLE IF NOT EXISTS tasks
+                 (id TEXT PRIMARY KEY,
+                  text TEXT NOT NULL,
+                  voice TEXT,
+                  speed REAL,
+                  status TEXT NOT NULL,
+                  output_file TEXT,
+                  created_at TEXT NOT NULL,
+                  processed_at TEXT,
+                  error TEXT)''')
+    conn.commit()
+    conn.close()
+def start_worker():
+    """Start the worker thread if not already running"""
+    global worker_thread, worker_running
+    if not worker_running:
+        worker_running = True
+        worker_thread = threading.Thread(target=worker_loop, daemon=True)
+        worker_thread.start()
+        print("✅ Worker thread started")
+def worker_loop():
+    """Main worker loop that processes TTS tasks"""
+    print("🤖 TTS Worker started. Monitoring for new tasks...")
+    CWD = "./"
+    PYTHON_PATH = "python3" # Or just python
+    POLL_INTERVAL = 2  # seconds
+    while worker_running:
+        try:
+            # Get next unprocessed task
+            conn = sqlite3.connect('tts_tasks.db')
+            conn.row_factory = sqlite3.Row
+            c = conn.cursor()
+            c.execute('''SELECT * FROM tasks
+                         WHERE status = 'not_started'
+                         ORDER BY created_at ASC
+                         LIMIT 1''')
+            row = c.fetchone()
+            conn.close()
+            if row:
+                task_id = row['id']
+                text = row['text']
+                voice = row['voice'] or '8' # Default voice
+                speed = row['speed'] or 1.0
+                print(f"\n{'='*60}")
+                print(f"🎵 Processing Task: {task_id}")
+                print(f"📝 Text: {text[:50]}...")
+                print(f"{'='*60}")
+                # Update status to processing
+                update_status(task_id, 'processing')
+                try:
+                    # Write text to content.txt
+                    with open('content.txt', 'w', encoding='utf-8') as f:
+                        f.write(text)
+                    # Run TTS command
+                    # python3 -m tts_runner.runner --model kokoro --voice <voice> --speed <speed>
+                    print(f"🔄 Running TTS...")
+                    command = [
+                        PYTHON_PATH, "-m", "tts_runner.runner",
+                        "--model", "kokoro",
+                        "--voice", str(voice),
+                        "--speed", str(speed)
+                    ]
+                    subprocess.run(
+                        command,
+                        check=True,
+                        cwd=CWD,
+                        env={
+                            **os.environ,
+                            'PYTHONUNBUFFERED': '1',
+                            'CUDA_LAUNCH_BLOCKING': '1'
+                        }
+                    )
+                    # Check for output file
+                    output_filename = "output_audio.wav"
+                    if os.path.exists(output_filename):
+                        # Move to uploads folder
+                        target_filename = f"{task_id}.wav"
+                        target_path = os.path.join(UPLOAD_FOLDER, target_filename)
+                        shutil.move(output_filename, target_path)
+                        print(f"✅ Successfully processed: {target_filename}")
+                        # Update database with success
+                        update_status(task_id, 'completed', output_file=target_filename)
+                    else:
+                        raise Exception("Output audio file not found")
+                except Exception as e:
+                    print(f"❌ Failed to process: {task_id}")
+                    print(f"Error: {str(e)}")
+                    update_status(task_id, 'failed', error=str(e))
+            else:
+                # No tasks to process, sleep for a bit
+                time.sleep(POLL_INTERVAL)
+        except Exception as e:
+            print(f"⚠️  Worker error: {str(e)}")
+            time.sleep(POLL_INTERVAL)
+def update_status(task_id, status, output_file=None, error=None):
+    """Update the status of a task in the database"""
+    conn = sqlite3.connect('tts_tasks.db')
+    c = conn.cursor()
+    if status == 'completed':
+        c.execute('''UPDATE tasks
+                     SET status = ?, output_file = ?, processed_at = ?
+                     WHERE id = ?''',
+                  (status, output_file, datetime.now().isoformat(), task_id))
+    elif status == 'failed':
+        c.execute('''UPDATE tasks
+                     SET status = ?, error = ?, processed_at = ?
+                     WHERE id = ?''',
+                  (status, str(error), datetime.now().isoformat(), task_id))
+    else:
+        c.execute('UPDATE tasks SET status = ? WHERE id = ?', (status, task_id))
+    conn.commit()
+    conn.close()
+@app.route('/')
+def index():
+    return send_from_directory('.', 'index.html')
+@app.route('/api/generate', methods=['POST'])
+def generate_audio():
+    data = request.json
+    if not data or 'text' not in data:
+        return jsonify({'error': 'No text provided'}), 400
+    text = data['text']
+    voice = data.get('voice', '8')
+    speed = data.get('speed', 1.0)
+    if not text.strip():
+        return jsonify({'error': 'Text cannot be empty'}), 400
+    task_id = str(uuid.uuid4())
+    conn = sqlite3.connect('tts_tasks.db')
+    c = conn.cursor()
+    c.execute('''INSERT INTO tasks
+                 (id, text, voice, speed, status, created_at)
+                 VALUES (?, ?, ?, ?, ?, ?)''',
+              (task_id, text, voice, speed, 'not_started', datetime.now().isoformat()))
+    conn.commit()
+    conn.close()
+    # Start worker on first request
+    start_worker()
+    return jsonify({
+        'id': task_id,
+        'status': 'not_started',
+        'message': 'Task queued successfully'
+    }), 201
+@app.route('/api/files', methods=['GET'])
+def get_files():
+    conn = sqlite3.connect('tts_tasks.db')
+    conn.row_factory = sqlite3.Row
+    c = conn.cursor()
+    c.execute('SELECT * FROM tasks ORDER BY created_at DESC')
+    rows = c.fetchall()
+    conn.close()
+    files = []
+    for row in rows:
+        files.append({
+            'id': row['id'],
+            'text': row['text'],
+            'status': row['status'],
+            'output_file': row['output_file'],
+            'created_at': row['created_at'],
+            'processed_at': row['processed_at'],
+            'error': row['error']
+        })
+    return jsonify(files)
+@app.route('/api/download/<task_id>', methods=['GET'])
+def download_file(task_id):
+    conn = sqlite3.connect('tts_tasks.db')
+    conn.row_factory = sqlite3.Row
+    c = conn.cursor()
+    c.execute('SELECT * FROM tasks WHERE id = ?', (task_id,))
+    row = c.fetchone()
+    conn.close()
+    if row is None or not row['output_file']:
+        return jsonify({'error': 'File not found'}), 404
+    file_path = os.path.join(UPLOAD_FOLDER, row['output_file'])
+    if not os.path.exists(file_path):
+        return jsonify({'error': 'File missing on server'}), 404
+    return send_file(file_path, as_attachment=True, download_name=f"tts_{task_id}.wav")
+@app.route('/health', methods=['GET'])
+def health():
+    return jsonify({
+        'status': 'healthy',
+        'service': 'tts-generator',
+        'worker_running': worker_running
+    })
+if __name__ == '__main__':
+    init_db()
+    print("\n" + "="*60)
+    print("🚀 TTS Generator API Server")
+    print("="*60)
+    print("📌 Worker will start automatically on first request")
+    print("="*60 + "\n")
+    # Use PORT environment variable for Hugging Face compatibility
+    port = int(os.environ.get('PORT', 7860))
+    app.run(debug=False, host='0.0.0.0', port=port)

index.html ADDED Viewed

	@@ -0,0 +1,687 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>TTS Generator</title>
+    <style>
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+        :root {
+            --bg: #0a0e27;
+            --surface: #141b3d;
+            --primary: #00ff88;
+            --secondary: #ff00ff;
+            --accent: #00d4ff;
+            --error: #ff1744;
+            --text: #ffffff;
+            --border: 4px;
+        }
+        body {
+            font-family: 'Space Grotesk', 'Courier New', monospace;
+            background: var(--bg);
+            color: var(--text);
+            min-height: 100vh;
+            overflow-x: hidden;
+            position: relative;
+        }
+        body::before {
+            content: '';
+            position: fixed;
+            top: 0;
+            left: 0;
+            width: 100%;
+            height: 100%;
+            background:
+                radial-gradient(circle at 20% 50%, rgba(0, 255, 136, 0.1) 0%, transparent 50%),
+                radial-gradient(circle at 80% 80%, rgba(255, 0, 255, 0.1) 0%, transparent 50%),
+                radial-gradient(circle at 40% 20%, rgba(0, 212, 255, 0.1) 0%, transparent 50%);
+            pointer-events: none;
+            z-index: 0;
+        }
+        .container {
+            max-width: 1400px;
+            margin: 0 auto;
+            padding: 2rem;
+            position: relative;
+            z-index: 1;
+        }
+        header {
+            text-align: center;
+            margin-bottom: 3rem;
+            animation: slideDown 0.6s cubic-bezier(0.68, -0.55, 0.265, 1.55);
+        }
+        @keyframes slideDown {
+            from {
+                opacity: 0;
+                transform: translateY(-50px);
+            }
+            to {
+                opacity: 1;
+                transform: translateY(0);
+            }
+        }
+        h1 {
+            font-size: clamp(2rem, 5vw, 4rem);
+            font-weight: 900;
+            background: linear-gradient(135deg, var(--primary) 0%, var(--accent) 50%, var(--secondary) 100%);
+            -webkit-background-clip: text;
+            -webkit-text-fill-color: transparent;
+            background-clip: text;
+            text-transform: uppercase;
+            letter-spacing: -2px;
+            margin-bottom: 1rem;
+            position: relative;
+            display: inline-block;
+        }
+        h1::after {
+            content: '';
+            position: absolute;
+            bottom: -10px;
+            left: 50%;
+            transform: translateX(-50%);
+            width: 60%;
+            height: 6px;
+            background: linear-gradient(90deg, transparent, var(--primary), transparent);
+            animation: glow 2s ease-in-out infinite;
+        }
+        @keyframes glow {
+            0%,
+            100% {
+                opacity: 0.5;
+            }
+            50% {
+                opacity: 1;
+            }
+        }
+        .subtitle {
+            font-size: 1.2rem;
+            color: var(--accent);
+            letter-spacing: 2px;
+        }
+        .input-section {
+            background: var(--surface);
+            border: var(--border) solid var(--primary);
+            box-shadow: 8px 8px 0 var(--primary);
+            padding: 2rem;
+            margin-bottom: 3rem;
+            position: relative;
+            transition: all 0.3s ease;
+            animation: slideUp 0.6s cubic-bezier(0.68, -0.55, 0.265, 1.55) 0.2s both;
+        }
+        @keyframes slideUp {
+            from {
+                opacity: 0;
+                transform: translateY(50px);
+            }
+            to {
+                opacity: 1;
+                transform: translateY(0);
+            }
+        }
+        .input-section:hover {
+            transform: translate(-2px, -2px);
+            box-shadow: 12px 12px 0 var(--primary);
+        }
+        textarea {
+            width: 100%;
+            height: 150px;
+            background: rgba(0, 212, 255, 0.05);
+            border: 3px solid var(--accent);
+            color: var(--text);
+            padding: 1rem;
+            font-family: inherit;
+            font-size: 1.1rem;
+            resize: vertical;
+            margin-bottom: 1.5rem;
+            transition: all 0.3s ease;
+        }
+        textarea:focus {
+            outline: none;
+            border-color: var(--primary);
+            background: rgba(0, 255, 136, 0.05);
+        }
+        .controls {
+            display: flex;
+            gap: 1rem;
+            margin-bottom: 1.5rem;
+            flex-wrap: wrap;
+        }
+        .control-group {
+            flex: 1;
+            min-width: 200px;
+        }
+        label {
+            display: block;
+            margin-bottom: 0.5rem;
+            color: var(--accent);
+            font-weight: bold;
+        }
+        select, input[type="number"] {
+            width: 100%;
+            padding: 0.8rem;
+            background: var(--bg);
+            border: 2px solid var(--accent);
+            color: var(--text);
+            font-family: inherit;
+            font-size: 1rem;
+        }
+        .btn {
+            background: var(--primary);
+            color: var(--bg);
+            border: var(--border) solid var(--bg);
+            padding: 1rem 2rem;
+            font-size: 1.1rem;
+            font-weight: 900;
+            text-transform: uppercase;
+            cursor: pointer;
+            transition: all 0.2s ease;
+            box-shadow: 4px 4px 0 var(--bg);
+            letter-spacing: 1px;
+            position: relative;
+            width: 100%;
+        }
+        .btn:hover:not(:disabled) {
+            transform: translate(-2px, -2px);
+            box-shadow: 6px 6px 0 var(--bg);
+        }
+        .btn:active:not(:disabled) {
+            transform: translate(2px, 2px);
+            box-shadow: 2px 2px 0 var(--bg);
+        }
+        .btn:disabled {
+            opacity: 0.6;
+            cursor: not-allowed;
+        }
+        .btn-secondary {
+            background: var(--accent);
+        }
+        .btn-small {
+            padding: 0.5rem 1rem;
+            font-size: 0.85rem;
+            box-shadow: 3px 3px 0 var(--bg);
+            text-decoration: none;
+            display: inline-block;
+            color: var(--bg);
+        }
+        .btn-small:hover:not(:disabled) {
+            box-shadow: 4px 4px 0 var(--bg);
+            transform: translate(-2px, -2px);
+        }
+        .table-section {
+            animation: slideUp 0.6s cubic-bezier(0.68, -0.55, 0.265, 1.55) 0.4s both;
+        }
+        .table-wrapper {
+            overflow-x: auto;
+            background: var(--surface);
+            border: var(--border) solid var(--secondary);
+            box-shadow: 8px 8px 0 var(--secondary);
+        }
+        table {
+            width: 100%;
+            border-collapse: collapse;
+        }
+        thead {
+            background: linear-gradient(135deg, var(--primary), var(--accent));
+        }
+        th {
+            padding: 1.5rem 1rem;
+            text-align: left;
+            font-weight: 900;
+            text-transform: uppercase;
+            letter-spacing: 1px;
+            color: var(--bg);
+            border-right: 3px solid var(--bg);
+        }
+        th:last-child {
+            border-right: none;
+        }
+        tbody tr {
+            border-bottom: 2px solid rgba(0, 212, 255, 0.2);
+            transition: all 0.3s ease;
+            animation: fadeIn 0.5s ease;
+        }
+        @keyframes fadeIn {
+            from {
+                opacity: 0;
+            }
+            to {
+                opacity: 1;
+            }
+        }
+        tbody tr:hover {
+            background: rgba(0, 255, 136, 0.1);
+        }
+        td {
+            padding: 1.5rem 1rem;
+            color: var(--text);
+        }
+        .status {
+            display: inline-block;
+            padding: 0.5rem 1rem;
+            border: 3px solid;
+            font-weight: 900;
+            text-transform: uppercase;
+            font-size: 0.85rem;
+            letter-spacing: 1px;
+        }
+        .status-not_started {
+            background: var(--bg);
+            border-color: var(--accent);
+            color: var(--accent);
+        }
+        .status-processing {
+            background: var(--bg);
+            border-color: var(--primary);
+            color: var(--primary);
+            animation: pulse 1.5s ease-in-out infinite;
+        }
+        @keyframes pulse {
+            0%,
+            100% {
+                opacity: 1;
+            }
+            50% {
+                opacity: 0.6;
+            }
+        }
+        .status-completed {
+            background: var(--primary);
+            border-color: var(--primary);
+            color: var(--bg);
+        }
+        .status-failed {
+            background: var(--error);
+            border-color: var(--error);
+            color: var(--text);
+        }
+        .text-cell {
+            max-width: 300px;
+            overflow: hidden;
+            text-overflow: ellipsis;
+            white-space: nowrap;
+        }
+        .empty-state {
+            text-align: center;
+            padding: 4rem 2rem;
+            color: var(--accent);
+            font-size: 1.2rem;
+        }
+        .refresh-btn {
+            position: fixed;
+            bottom: 2rem;
+            right: 2rem;
+            width: 60px;
+            height: 60px;
+            border-radius: 50%;
+            background: var(--secondary);
+            border: var(--border) solid var(--bg);
+            box-shadow: 4px 4px 0 var(--bg);
+            cursor: pointer;
+            transition: all 0.3s ease;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            font-size: 1.5rem;
+            z-index: 1000;
+        }
+        .refresh-btn:hover {
+            transform: rotate(180deg) scale(1.1);
+            box-shadow: 6px 6px 0 var(--bg);
+        }
+        /* Loader styles */
+        .loader-overlay {
+            position: fixed;
+            top: 0;
+            left: 0;
+            width: 100%;
+            height: 100%;
+            background: rgba(10, 14, 39, 0.95);
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            z-index: 9999;
+            animation: fadeIn 0.3s ease;
+        }
+        .loader {
+            width: 80px;
+            height: 80px;
+            border: 6px solid var(--surface);
+            border-top: 6px solid var(--primary);
+            border-right: 6px solid var(--accent);
+            border-bottom: 6px solid var(--secondary);
+            border-radius: 50%;
+            animation: spin 1s linear infinite;
+        }
+        @keyframes spin {
+            0% {
+                transform: rotate(0deg);
+            }
+            100% {
+                transform: rotate(360deg);
+            }
+        }
+        .loader-text {
+            position: absolute;
+            margin-top: 120px;
+            font-size: 1.2rem;
+            font-weight: 900;
+            color: var(--primary);
+            text-transform: uppercase;
+            letter-spacing: 2px;
+        }
+        @media (max-width: 768px) {
+            .container {
+                padding: 1rem;
+            }
+            .input-section,
+            .table-wrapper {
+                box-shadow: 4px 4px 0 var(--primary);
+            }
+            th,
+            td {
+                padding: 1rem 0.5rem;
+                font-size: 0.9rem;
+            }
+            .text-cell {
+                max-width: 150px;
+            }
+        }
+        .notification {
+            position: fixed;
+            top: 2rem;
+            right: 2rem;
+            padding: 1.5rem 2rem;
+            background: var(--primary);
+            color: var(--bg);
+            border: var(--border) solid var(--bg);
+            box-shadow: 6px 6px 0 var(--bg);
+            font-weight: 900;
+            z-index: 2000;
+            animation: slideInRight 0.5s ease, slideOutRight 0.5s ease 3.5s;
+        }
+        @keyframes slideInRight {
+            from {
+                transform: translateX(400px);
+                opacity: 0;
+            }
+            to {
+                transform: translateX(0);
+                opacity: 1;
+            }
+        }
+        @keyframes slideOutRight {
+            to {
+                transform: translateX(400px);
+                opacity: 0;
+            }
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <header>
+            <h1>TTS Generator</h1>
+            <p class="subtitle">Text • Process • Audio</p>
+        </header>
+        <div class="input-section">
+            <h2 style="margin-bottom: 1.5rem; color: var(--primary);">Generate Audio</h2>
+            <textarea id="textInput" placeholder="Enter text to convert to speech..."></textarea>
+            <div class="controls">
+                <div class="control-group">
+                    <label>Voice</label>
+                    <select id="voiceSelect">
+                        <option value="8">Default Voice</option>
+                        <option value="0">Voice 0</option>
+                        <option value="1">Voice 1</option>
+                        <option value="2">Voice 2</option>
+                        <option value="3">Voice 3</option>
+                        <option value="4">Voice 4</option>
+                        <option value="5">Voice 5</option>
+                        <option value="6">Voice 6</option>
+                        <option value="7">Voice 7</option>
+                        <option value="9">Voice 9</option>
+                    </select>
+                </div>
+                <div class="control-group">
+                    <label>Speed</label>
+                    <input type="number" id="speedInput" value="1.0" step="0.1" min="0.5" max="2.0">
+                </div>
+            </div>
+            <button class="btn" id="generateBtn">
+                🚀 Generate Audio
+            </button>
+        </div>
+        <div class="table-section">
+            <h2 style="margin-bottom: 1.5rem; color: var(--secondary);">Processing Queue</h2>
+            <div class="table-wrapper">
+                <table>
+                    <thead>
+                        <tr>
+                            <th>Text</th>
+                            <th>Status</th>
+                            <th>Audio</th>
+                            <th>Created</th>
+                            <th>Processed</th>
+                        </tr>
+                    </thead>
+                    <tbody id="filesTable">
+                        <tr>
+                            <td colspan="5" class="empty-state">No tasks yet. Start by generating audio!
+                            </td>
+                        </tr>
+                    </tbody>
+                </table>
+            </div>
+        </div>
+    </div>
+    <button class="refresh-btn" id="refreshBtn" title="Refresh">🔄</button>
+    <!-- Loader -->
+    <div class="loader-overlay" id="loader" style="display: none;">
+        <div>
+            <div class="loader"></div>
+            <div class="loader-text">Queuing...</div>
+        </div>
+    </div>
+    <script>
+        const API_URL = '/api';
+        // Elements
+        const textInput = document.getElementById('textInput');
+        const voiceSelect = document.getElementById('voiceSelect');
+        const speedInput = document.getElementById('speedInput');
+        const generateBtn = document.getElementById('generateBtn');
+        const loader = document.getElementById('loader');
+        const refreshBtn = document.getElementById('refreshBtn');
+        // Generate button
+        generateBtn.addEventListener('click', async () => {
+            const text = textInput.value.trim();
+            if (!text) {
+                showNotification('Please enter some text!', 'error');
+                return;
+            }
+            const voice = voiceSelect.value;
+            const speed = parseFloat(speedInput.value);
+            // Show loader
+            loader.style.display = 'flex';
+            generateBtn.disabled = true;
+            try {
+                const response = await fetch(`${API_URL}/generate`, {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json'
+                    },
+                    body: JSON.stringify({
+                        text,
+                        voice,
+                        speed
+                    })
+                });
+                const data = await response.json();
+                if (response.ok) {
+                    showNotification('Task queued successfully! 🎉');
+                    textInput.value = '';
+                    loadFiles();
+                } else {
+                    showNotification(data.error || 'Generation failed', 'error');
+                }
+            } catch (error) {
+                showNotification('Network error: ' + error.message, 'error');
+            } finally {
+                // Hide loader
+                loader.style.display = 'none';
+                generateBtn.disabled = false;
+            }
+        });
+        // Load files
+        async function loadFiles() {
+            try {
+                const response = await fetch(`${API_URL}/files`);
+                const files = await response.json();
+                const tbody = document.getElementById('filesTable');
+                if (files.length === 0) {
+                    tbody.innerHTML = '<tr><td colspan="5" class="empty-state">No tasks yet. Start by generating audio!</td></tr>';
+                    return;
+                }
+                tbody.innerHTML = files.map(file => {
+                    return `
+                    <tr>
+                        <td class="text-cell" title="${file.text}">${file.text}</td>
+                        <td><span class="status status-${file.status}">${file.status.replace('_', ' ')}</span></td>
+                        <td>
+                            ${file.status === 'completed' && file.output_file ?
+                            `<a href="${API_URL}/download/${file.id}" class="btn btn-small btn-secondary" target="_blank">⬇️ Download</a>`
+                            : '—'}
+                        </td>
+                        <td>${new Date(file.created_at).toLocaleString()}</td>
+                        <td>${file.processed_at ? new Date(file.processed_at).toLocaleString() : '—'}</td>
+                    </tr>
+                `;
+                }).join('');
+            } catch (error) {
+                console.error('Error loading files:', error);
+            }
+        }
+        // Refresh button
+        refreshBtn.addEventListener('click', () => {
+            loadFiles();
+            const icon = refreshBtn.textContent;
+            refreshBtn.textContent = '⏳';
+            setTimeout(() => refreshBtn.textContent = icon, 500);
+        });
+        // Auto refresh every 5 seconds
+        setInterval(loadFiles, 5000);
+        // Initial load
+        loadFiles();
+        // Notification system
+        function showNotification(message, type = 'success') {
+            const notification = document.createElement('div');
+            notification.className = 'notification';
+            if (type === 'error') {
+                notification.style.background = 'var(--error)';
+                notification.style.borderColor = 'var(--error)';
+            }
+            notification.textContent = message;
+            document.body.appendChild(notification);
+            setTimeout(() => {
+                notification.remove();
+            }, 4000);
+        }
+    </script>
+</body>
+</html>

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+Flask==3.0.0
+flask-cors==4.0.0
+werkzeug==3.0.1
+git+https://github.com/jebin2/TTS.git#egg=tts-runner[kokoro]

setup.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# setup.py
+import os
+from setuptools import setup, find_packages
+# Read README.md
+this_directory = os.path.abspath(os.path.dirname(__file__))
+with open(os.path.join(this_directory, 'README.md'), encoding='utf-8') as f:
+    long_description = f.read()
+# Base dependencies
+BASE_DEPS = [
+    'numpy',
+    'torch',
+    'pydub',
+    'sounddevice',
+    'python-dotenv',
+    # 'textual',       # From requirement_tui.txt
+    # 'pyperclip',     # From requirement_tui.txt
+    'scipy'          # Implicit dependency for wavfile reading in base
+]
+# Optional extras (engines)
+extras_require = {
+    "chatterbox": [
+        "chatterbox-tts",
+        "spacy",
+        "peft"
+    ],
+    "kitten": [
+        "kittentts",
+        "spacy"
+    ],
+    "kokoro": [
+        "kokoro>=0.9.4",
+        "soundfile"
+    ],
+}
+# All extras
+all_deps = []
+for deps in extras_require.values():
+    all_deps.extend(deps)
+extras_require["all"] = list(set(all_deps))
+setup(
+    name="tts-runner",
+    version="1.0.0",
+    author="Jebin Einstein",
+    author_email="jebin@gmail.com",
+    description="A flexible, multi-engine Text-to-Speech runner with TUI",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/jebin2/TTS",
+    packages=find_packages(),
+    include_package_data=True,
+    install_requires=BASE_DEPS,
+    extras_require=extras_require,
+    entry_points={
+        "console_scripts": [
+            "tts-runner=tts_runner.runner:main",
+            "tts-tui=tts_runner.tui:main",
+        ],
+    },
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+        "Topic :: Multimedia :: Sound/Audio :: Speech",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    ],
+    python_requires=">=3.10",
+)

tts_runner/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # tts_runner/__init__.py
2	+ __version__ = "1.0.0"

tts_runner/base.py ADDED Viewed

	@@ -0,0 +1,660 @@

+from pathlib import Path
+import shutil
+from pydub import AudioSegment
+import os
+import traceback
+from functools import reduce
+import threading
+import queue
+import time
+import sys
+from . import common
+from dotenv import load_dotenv
+import os
+if os.path.exists(".env"):
+	print("Loaded load_dotenv")
+	load_dotenv(".env")
+class BaseTTS:
+	def __init__(self, type, stream_audio=False, setup_signals=True):
+		"""Initialize BaseTTS with environment settings and configuration."""
+		if os.getenv("USE_CPU_IF_POSSIBLE", None):
+			self.device = "cpu"
+		else:
+			self.device = "cuda" if common.is_gpu_available() else "cpu"
+		print(f'Using device:: {self.device}')
+		# Environment setup
+		os.environ["TORCH_USE_CUDA_DSA"] = "1"
+		os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
+		os.environ["HF_HUB_TIMEOUT"] = "120"
+		base_dir = os.path.dirname(os.path.abspath(__file__))
+		# File paths and directories
+		self.content_file = Path("content.txt")
+		self.final_output_audio = "output_audio.wav"
+		self.final_output_timestamps = "output_timestamps.json"
+		self.temp_output_dir = Path("temp_audio_chunks")
+		if not self.temp_output_dir.exists():
+			self.temp_output_dir.mkdir(parents=True, exist_ok=True)
+		# Voice and speed configuration
+		self.default_voice_index = 8
+		self.default_speed = 0.8
+		self.voices = [
+			None,
+			os.path.join(base_dir, 'voices/Main-4.wav'),
+			os.path.join(base_dir, 'voices/Ellen-TTS-10.wav'),
+			os.path.join(base_dir, 'voices/kratos(ambient)_en.wav'),
+			os.path.join(base_dir, 'voices/20250329-audio-american-male.wav'),
+			os.path.join(base_dir, 'voices/Ellen13y TTS-14.wav'),
+			os.path.join(base_dir, 'voices/Simple guy.wav'),
+			None,
+			os.path.join(base_dir, 'voices/bbc_news.wav'),
+			os.path.join(base_dir, 'voices/en_woman.wav'),
+			os.path.join(base_dir, 'voices/voice_preview_david castlemore - newsreader and educator.mp3'),
+			os.path.join(base_dir, 'voices/voice_preview_kelly - storytelling & motivational content.mp3'),
+			os.path.join(base_dir, 'voices/voice_preview_motivational coach - leader.mp3'),
+			os.path.join(base_dir, 'voices/voice_preview_sevan bomar - black motivational speaker.mp3',)
+		]
+		# General settings
+		self.type = type
+		self.save_audio_file = True
+		self.stream_audio = stream_audio
+		# Audio streaming configuration
+		self.audio_queue = queue.Queue()
+		self.is_streaming = False
+		self.stream_thread = None
+		self.sample_rate = 24000  # Default, can be overridden by subclasses
+		self.last_playing_audio_duration_seconds = 0.1
+		# Text streaming configuration
+		self.text_queue = queue.Queue()
+		self.text_processing_thread = None
+		self.is_text_streaming = False
+		self.text_chunk_size = 10  # Number of words per chunk
+		self.current_voice = None
+		self.current_speed = None
+		# Text buffering for streaming input
+		self.temp_feed_words = []
+		# Emergency stop control
+		self.emergency_stop = False
+		if setup_signals:
+			self.setup_signal_handler()
+	# ===== UTILITY METHODS =====
+	def cleanup_temp_files(self):
+		"""Clean up temporary audio files."""
+		if self.temp_output_dir.exists():
+			shutil.rmtree(self.temp_output_dir)
+		if os.path.exists(self.final_output_audio):
+			os.remove(self.final_output_audio)
+		if os.path.exists(self.final_output_timestamps):
+			os.remove(self.final_output_timestamps)
+		print("Temporary files cleaned up")
+	def setup_output_directory(self):
+		"""Create clean output directory for audio chunks."""
+		if self.temp_output_dir.exists():
+			shutil.rmtree(self.temp_output_dir)
+		self.temp_output_dir.mkdir(exist_ok=True)
+	def read_content_file(self):
+		"""Read content from the content file."""
+		with open(self.content_file, 'r', encoding='utf-8') as file:
+			return file.read().strip()
+	def validate_voice_index(self, args) -> str:
+		"""Validate and return voice file path."""
+		voice_index = self.default_voice_index
+		try:
+			voice_index = int(getattr(args, 'voice'))
+			if not 0 <= voice_index < len(self.voices):
+				print(f"Invalid voice index {voice_index}, using default voice")
+				voice_index = self.default_voice_index
+		except:
+			voice_index = self.default_voice_index
+		print(f"Voice Value: {self.voices[voice_index]}")
+		return self.voices[voice_index]
+	def validate_speed(self, args) -> float:
+		"""Validate and return speed value."""
+		speed_value = self.default_speed
+		try:
+			speed_value = float(getattr(args, 'speed'))
+			if speed_value <= 0:
+				print(f"Invalid speed {speed_value}, using default speed")
+				speed_value = self.default_speed
+		except:
+			speed_value = self.default_speed
+		print(f"Speed Value: {speed_value}")
+		return speed_value
+	def combine_audio_files(self, audio_files):
+		"""Combine multiple audio files into one.
+		Args:
+			audio_files: List of audio file paths to combine
+		Returns:
+			True if successful, False otherwise
+		"""
+		if not audio_files:
+			raise ValueError("No audio files to combine")
+		print(f"Combining {len(audio_files)} audio files...")
+		combined = reduce(
+			lambda acc, file_name: acc + AudioSegment.from_wav(file_name),
+			audio_files,
+			AudioSegment.empty()
+		)
+		# Export combined audio
+		combined.export(self.final_output_audio, format="wav")
+		print(f"Combined audio saved as {self.final_output_audio}")
+		return True
+	def split_sentences(self, text, max_chars=300):
+		"""Common split method for all frameworks. Override in subclasses if needed.
+		Args:
+			text (str): Text to split
+			max_chars (int): Maximum characters per chunk
+		Returns:
+			list: List of text chunks
+		"""
+		words = text.split()
+		chunks = []
+		current = ""
+		for word in words:
+			test_chunk = current + " " + word if current else word
+			if len(test_chunk) <= max_chars:
+				current = test_chunk
+			else:
+				if current:
+					chunks.append(current)
+				current = word
+		if current:
+			chunks.append(current)
+		return chunks
+	def generate_chunk_audio_file(self, audio, chunk_index) -> Path:
+		import soundfile as sf
+		chunk_file = self.temp_output_dir / f"chunk_{chunk_index:04d}.wav"
+		sf.write(chunk_file, audio, self.sample_rate)
+		return chunk_file
+	# ===== EMERGENCY STOP METHODS =====
+	def setup_signal_handler(self):
+		"""Setup signal handler for Ctrl+C to stop everything immediately."""
+		import signal
+		signal.signal(signal.SIGINT, self.emergency_stop_handler)
+		signal.signal(signal.SIGTERM, self.emergency_stop_handler)
+	def emergency_stop_handler(self, signum, frame):
+		"""Handle Ctrl+C - stop everything immediately."""
+		print("\n🛑 Emergency stop triggered! Stopping all operations...")
+		self.emergency_stop = True
+		# Stop audio playback immediately
+		try:
+			import sounddevice as sd
+			sd.stop()
+		except:
+			pass
+		# Stop streaming
+		self.force_stop_streaming()
+		self.force_stop_text_streaming()
+		print("✅ Emergency stop completed. Exiting...")
+		sys.exit(0)
+	def force_stop_streaming(self):
+		"""Force stop audio streaming immediately without waiting."""
+		if self.is_streaming:
+			self.is_streaming = False
+			# Clear the queue
+			try:
+				while not self.audio_queue.empty():
+					self.audio_queue.get_nowait()
+			except:
+				pass
+			# Send poison pill
+			try:
+				self.audio_queue.put(None)
+			except:
+				pass
+			print("🔇 Audio streaming force stopped")
+	def force_stop_text_streaming(self):
+		"""Force stop text streaming immediately without waiting."""
+		if self.is_text_streaming:
+			self.is_text_streaming = False
+			# Clear the text queue
+			try:
+				while not self.text_queue.empty():
+					self.text_queue.get_nowait()
+			except:
+				pass
+			# Send poison pill
+			try:
+				self.text_queue.put(None)
+			except:
+				pass
+			print("📝 Text streaming force stopped")
+	def check_emergency_stop(self):
+		"""Check if emergency stop was triggered. Call this in loops."""
+		if self.emergency_stop:
+			raise KeyboardInterrupt("Emergency stop triggered")
+	# ===== AUDIO STREAMING METHODS =====
+	def _audio_stream_worker(self):
+		"""Worker thread that plays audio chunks as they arrive."""
+		while self.is_streaming and not self.emergency_stop:
+			try:
+				audio_data = self.audio_queue.get(timeout=0.1)
+				if audio_data is None or self.emergency_stop:  # Poison pill or emergency stop
+					break
+				self.last_playing_audio_duration_seconds = len(audio_data) / self.sample_rate
+				# Play audio chunk
+				import sounddevice as sd
+				sd.play(audio_data, samplerate=self.sample_rate)
+				# Check for emergency stop while playing
+				while sd.get_stream().active and not self.emergency_stop:
+					time.sleep(0.01)
+				if self.emergency_stop:
+					sd.stop()
+					break
+			except queue.Empty:
+				continue
+			except Exception as e:
+				if not self.emergency_stop:
+					print(f"Audio playback error: {e}")
+				break
+	def start_audio_streaming(self):
+		"""Start the audio streaming thread."""
+		try:
+			import sounddevice as sd
+			if not self.is_streaming and not self.emergency_stop:
+				self.is_streaming = True
+				self.stream_thread = threading.Thread(target=self._audio_stream_worker)
+				self.stream_thread.daemon = True
+				self.stream_thread.start()
+				print("🔊 Audio streaming started")
+		except:
+			self.stream_audio = False
+			print("🔇 No sounddevice available.")
+			pass
+	def stop_audio_streaming(self):
+		"""Stop the audio streaming thread."""
+		if self.is_streaming:
+			self.is_streaming = False
+			self.audio_queue.put(None)  # Poison pill
+			if self.stream_thread:
+				self.stream_thread.join(timeout=2)  # Don't wait forever
+			print("🔇 Audio streaming stopped")
+	def queue_audio_for_streaming(self, audio_data, sample_rate=None):
+		"""Queue audio data for streaming playback.
+		Args:
+			audio_data: Audio data (numpy array, tensor, or file path)
+			sample_rate (int, optional): Sample rate of the audio data
+		"""
+		if self.is_streaming and not self.emergency_stop:
+			# Convert audio data to numpy array if needed
+			processed_audio = self._prepare_audio_for_streaming(audio_data, sample_rate)
+			if processed_audio is not None:
+				self.audio_queue.put(processed_audio)
+				return len(processed_audio) / self.sample_rate
+		return 0
+	def _prepare_audio_for_streaming(self, audio_data, sample_rate=None):
+		"""Prepare audio data for streaming by converting to numpy array.
+		Args:
+			audio_data: Raw audio data (numpy array, tensor, or file path)
+			sample_rate (int, optional): Sample rate of the audio data
+		Returns:
+			numpy.ndarray: Audio data ready for streaming
+		"""
+		import numpy as np
+		try:
+			# If it's already a numpy array, just ensure it's float32
+			if isinstance(audio_data, np.ndarray):
+				# Ensure audio is in the right format for sounddevice
+				audio = audio_data.astype(np.float32)
+				# Ensure values are in [-1, 1] range
+				if audio.max() > 1.0 or audio.min() < -1.0:
+					audio = audio / np.max(np.abs(audio))
+				return audio
+			# If it's a torch tensor, convert to numpy
+			elif hasattr(audio_data, 'cpu'):  # torch tensor
+				audio = audio_data.cpu().numpy().astype(np.float32)
+				# Ensure values are in [-1, 1] range
+				if audio.max() > 1.0 or audio.min() < -1.0:
+					audio = audio / np.max(np.abs(audio))
+				return audio
+			# If it's a file path, load it
+			elif isinstance(audio_data, (str, Path)):
+				from scipy.io import wavfile
+				sr, audio = wavfile.read(str(audio_data))
+				# Convert to float32 and normalize
+				if audio.dtype == np.int16:
+					audio = audio.astype(np.float32) / 32768.0
+				elif audio.dtype == np.int32:
+					audio = audio.astype(np.float32) / 2147483648.0
+				else:
+					audio = audio.astype(np.float32)
+				# Update sample rate if provided
+				if sample_rate is None:
+					self.sample_rate = sr
+				return audio
+			else:
+				print(f"⚠️ Unsupported audio data type: {type(audio_data)}")
+				return None
+		except Exception as e:
+			print(f"❌ Error preparing audio for streaming: {e}")
+			return None
+	def wait_for_audio_streaming_complete(self):
+		"""Wait for all queued audio to finish playing."""
+		time.sleep(0.5)  # Small delay to ensure last chunk starts
+		while not self.audio_queue.empty() and not self.emergency_stop:
+			time.sleep(0.1)
+		time.sleep(self.last_playing_audio_duration_seconds)
+	# ===== TEXT STREAMING METHODS =====
+	def _text_processing_worker(self):
+		"""Worker thread that processes text chunks from the queue."""
+		chunk_counter = 0
+		while self.is_text_streaming and not self.emergency_stop:
+			try:
+				text_chunk = self.text_queue.get(timeout=0.1)
+				if text_chunk is None or self.emergency_stop:  # Poison pill or emergency stop
+					break
+				if not text_chunk.strip():  # Skip empty chunks
+					continue
+				print(f"📝 Processing text chunk {chunk_counter + 1}: '{text_chunk[:50]}...'")
+				# Generate audio for this text chunk
+				try:
+					audio_files = self.generate_audio_files(
+						text_chunk,
+						self.current_voice,
+						self.current_speed,
+						chunk_id=chunk_counter
+					)
+					if audio_files:
+						print(f"✅ Generated audio for chunk {chunk_counter + 1}")
+					else:
+						print(f"⚠️ No audio generated for chunk {chunk_counter + 1}")
+				except Exception as e:
+					print(f"❌ Error processing chunk {chunk_counter + 1}: {e}")
+				chunk_counter += 1
+			except queue.Empty:
+				continue
+			except Exception as e:
+				if not self.emergency_stop:
+					print(f"Text processing error: {e}")
+				break
+		print(f"📝 Text processing completed. Processed {chunk_counter} chunks.")
+	def start_text_streaming(self, voice, speed):
+		"""Start the text processing streaming thread."""
+		if not self.is_text_streaming and not self.emergency_stop:
+			self.current_voice = voice
+			self.current_speed = speed
+			self.is_text_streaming = True
+			self.text_processing_thread = threading.Thread(target=self._text_processing_worker)
+			self.text_processing_thread.daemon = True
+			self.text_processing_thread.start()
+			print("📝 Text streaming started")
+	def stop_text_streaming(self):
+		"""Stop the text processing streaming thread."""
+		if self.is_text_streaming:
+			self.is_text_streaming = False
+			self.text_queue.put(None)  # Poison pill
+			if self.text_processing_thread:
+				self.text_processing_thread.join(timeout=5)  # Wait a bit longer for text processing
+			print("📝 Text streaming stopped")
+	def add_text_chunk(self, text_chunk):
+		"""Add a text chunk to the processing queue.
+		Args:
+			text_chunk (str): Text chunk to process
+		"""
+		if self.is_text_streaming and not self.emergency_stop and text_chunk.strip():
+			# Ensure chunk ends with punctuation for better TTS pronunciation
+			cleaned_chunk = text_chunk.strip()
+			if not any(cleaned_chunk.endswith(p) for p in ['.', '!', '?', ':', ';', ',']):
+				cleaned_chunk += '.'  # Add period if no punctuation
+			self.text_queue.put(cleaned_chunk)
+			print(f"📝 Queued text chunk: '{cleaned_chunk[:30]}...'")
+		else:
+			if not self.is_text_streaming:
+				print("⚠️ Text streaming not started. Call start_text_streaming() first.")
+	def add_text_by_words(self, text, words_per_chunk=None):
+		"""Split text into word chunks and add to queue.
+		Args:
+			text (str): Full text to split and queue
+			words_per_chunk (int, optional): Number of words per chunk. Uses self.text_chunk_size if None.
+		"""
+		if words_per_chunk is None:
+			words_per_chunk = self.text_chunk_size
+		words = text.split()
+		for i in range(0, len(words), words_per_chunk):
+			chunk = ' '.join(words[i:i + words_per_chunk])
+			self.add_text_chunk(chunk)
+		print(f"📝 Split text into {(len(words) + words_per_chunk - 1) // words_per_chunk} chunks of {words_per_chunk} words each")
+	def wait_for_text_processing_complete(self):
+		"""Wait for all queued text chunks to be processed."""
+		print("📝 Waiting for text processing to complete...")
+		while not self.text_queue.empty() and not self.emergency_stop:
+			time.sleep(0.1)
+		time.sleep(1)  # Extra time for last chunk to process
+		print("📝 Text processing queue empty")
+	# ===== STREAMING TEXT INPUT METHODS =====
+	def feed_text_chunk(self, text_chunk):
+		"""Feed a single text chunk for processing with smart buffering.
+		Args:
+			text_chunk (str): Text chunk to process
+		"""
+		# Add new words to the buffer
+		self.temp_feed_words.extend(text_chunk.split())
+		# Combine all buffered words and split into sentences/chunks
+		all_words = " ".join(self.temp_feed_words)
+		sentences = self.split_sentences(all_words)
+		total_sentences = len(sentences)
+		# Process all complete sentences except the last one (which might be incomplete)
+		for i, sentence in enumerate(sentences):
+			if i + 1 != total_sentences:  # Not the last sentence
+				print(f"📝 Feeding chunk: {sentence}")
+				self.add_text_chunk(sentence)
+		# Keep the last sentence in buffer (might be incomplete)
+		self.temp_feed_words = sentences[-1].split() if sentences else []
+	def flush_remaining_words(self):
+		"""Flush any remaining words in the buffer. Call this when done feeding text."""
+		if self.temp_feed_words:
+			chunk_text = " ".join(self.temp_feed_words)
+			print(f"📝 Flushing final chunk: {chunk_text}")
+			self.add_text_chunk(chunk_text)
+			self.temp_feed_words = []
+	# ===== HIGH-LEVEL STREAMING METHODS =====
+	def stream_real_time_text(self, args):
+		"""Initialize streaming for real-time text input.
+		Args:
+			args: Arguments containing voice, speed, etc.
+		"""
+		speed = self.validate_speed(args)
+		voice = self.validate_voice_index(args)
+		# Setup directories
+		self.cleanup_temp_files()
+		self.setup_output_directory()
+		# Start both streaming systems
+		if self.stream_audio:
+			self.start_audio_streaming()
+		self.start_text_streaming(voice, speed)
+		print("🚀 Real-time text streaming initialized!")
+		print("📝 Use feed_text_chunk() to add text incrementally")
+		print("📝 Use add_text_chunk() to add individual chunks")
+		print("📝 Use add_text_by_words() to split and add text automatically")
+		print("🛑 Use stop_all_streaming() when done")
+	def stop_all_streaming(self):
+		"""Stop all streaming operations and cleanup."""
+		print("🛑 Stopping all streaming operations...")
+		# Flush any remaining words first
+		self.flush_remaining_words()
+		# Wait for queues to empty
+		self.wait_for_text_processing_complete()
+		self.wait_for_audio_streaming_complete()
+		# Stop streaming threads
+		self.stop_text_streaming()
+		self.stop_audio_streaming()
+		print("✅ All streaming operations stopped")
+	# ===== BACKWARD COMPATIBILITY METHODS =====
+	def start_streaming(self):
+		"""Start audio streaming (backward compatibility)."""
+		self.start_audio_streaming()
+	def stop_streaming(self):
+		"""Stop audio streaming (backward compatibility)."""
+		self.stop_audio_streaming()
+	def wait_for_streaming_complete(self):
+		"""Wait for audio streaming to complete (backward compatibility)."""
+		self.wait_for_audio_streaming_complete()
+	# ===== ABSTRACT METHODS =====
+	def generate_audio_files(self, text: str, voice: str, speed: float, chunk_id: int = None):
+		"""Generate audio files. To be implemented by subclasses.
+		Args:
+			text (str): Text to convert to audio
+			voice (str): Voice file path
+			speed (float): Speed multiplier
+			chunk_id (int, optional): Unique identifier for this chunk (for streaming)
+		"""
+		raise NotImplementedError("Subclasses must implement generate_audio_files")
+	# ===== MAIN METHODS =====
+	def save_audio(self, args) -> bool:
+		"""Generate and save complete audio file (batch mode).
+		Args:
+			args: Arguments containing voice, speed, etc.
+		Returns:
+			True if successful, False otherwise
+		"""
+		# Read content
+		text = self.read_content_file()
+		if not text:
+			raise ValueError("Warning: Content file is empty")
+		speed = self.validate_speed(args)
+		voice = self.validate_voice_index(args)
+		# Clean up temporary files
+		self.cleanup_temp_files()
+		# Setup output directory
+		self.setup_output_directory()
+		# Generate audio files (with optional streaming)
+		if self.stream_audio:
+			self.start_audio_streaming()
+		audio_files = self.generate_audio_files(text, voice, speed)
+		if not audio_files:
+			raise ValueError("Error: No audio files generated")
+		# Combine audio files
+		success = self.combine_audio_files(audio_files)
+		self.wait_for_audio_streaming_complete()
+		self.stop_audio_streaming()
+		return success

tts_runner/common.py ADDED Viewed

	@@ -0,0 +1,135 @@

+from pathlib import Path
+import os
+import shutil
+import string
+import secrets
+import hashlib
+import random
+import time
+import re
+def get_files_count(directory_path):
+    return len(os.listdir(directory_path))
+def generate_random_string(length=10):
+    characters = string.ascii_letters
+    random_string = ''.join(secrets.choice(characters) for _ in range(length))
+    return random_string
+def generate_random_string_from_input(input_string, length=16):
+    # Hash the input string to get a consistent value
+    hash_object = hashlib.sha256(input_string.encode())
+    hashed_string = hash_object.hexdigest()
+    # Use the hash to seed the random number generator
+    random.seed(hashed_string)
+    # Generate a random string based on the seed
+    characters = string.ascii_letters + string.digits
+    random_string = ''.join(random.choice(characters) for _ in range(length))
+    return random_string
+def is_mostly_black(frame, black_threshold=20, percentage_threshold=0.9, sample_rate=10):
+    """
+    Fast black frame detection using pixel sampling.
+    Args:
+        frame: OpenCV BGR frame (NumPy array)
+        black_threshold: grayscale value below which a pixel is considered black
+        percentage_threshold: fraction of black pixels to consider frame mostly black
+        sample_rate: sample every N-th pixel in both dimensions (higher = faster)
+    Returns:
+        True if mostly black, False otherwise
+    """
+    import cv2
+    import numpy as np
+    if frame is None or frame.size == 0:
+        return True
+    # Convert to grayscale
+    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+    # Sample pixels
+    sampled = gray[::sample_rate, ::sample_rate]
+    black_count = np.sum(sampled < black_threshold)
+    total_count = sampled.size
+    return (black_count / total_count) >= percentage_threshold
+def only_alpha(text: str) -> str:
+    # Keep only alphabetic characters (make lowercase to ignore case)
+    return re.sub(r'[^a-zA-Z]', '', text).lower()
+def manage_gpu(size_gb: float = 0, gpu_index: int = 0, action: str = "check"):
+    """
+    Manage GPU memory:
+      - check       → just prints memory + process table
+      - clear_cache → clears PyTorch cache
+      - kill        → kills all GPU processes
+    """
+    try:
+        import pynvml,signal, gc
+        pynvml.nvmlInit()
+        handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_index)
+        info = pynvml.nvmlDeviceGetMemoryInfo(handle)
+        free_gb = info.free / 1024**3
+        total_gb = info.total / 1024**3
+        print(f"\nGPU {gpu_index}: Free {free_gb:.2f} GB / Total {total_gb:.2f} GB")
+        # Show processes
+        processes = pynvml.nvmlDeviceGetComputeRunningProcesses(handle)
+        print("\nActive GPU Processes:")
+        print(f"{'PID':<8} {'Process Name':<40} {'Used (GB)':<10}")
+        print("-" * 60)
+        for p in processes:
+            used_gb = p.usedGpuMemory / 1024**3
+            proc_name = pynvml.nvmlSystemGetProcessName(p.pid).decode(errors="ignore")
+            print(f"{p.pid:<8} {proc_name:<40} {used_gb:.2f}")
+        if action == "clear_cache":
+            try:
+                import torch
+                gc.collect()
+                gc.collect()
+                torch.cuda.empty_cache()
+                torch.cuda.reset_peak_memory_stats()
+                torch.cuda.synchronize()
+                time.sleep(1)
+                print("\n🧹 Cleared PyTorch CUDA cache")
+            except ImportError:
+                print("\n⚠️ PyTorch not installed, cannot clear cache.")
+        elif action == "kill":
+            for p in processes:
+                proc_name = pynvml.nvmlSystemGetProcessName(p.pid).decode(errors="ignore")
+                try:
+                    os.kill(p.pid, signal.SIGKILL)
+                    print(f"❌ Killed {p.pid} ({proc_name})")
+                except Exception as e:
+                    print(f"⚠️ Could not kill {p.pid}: {e}")
+            manage_gpu(action="clear_cache")
+        gc.collect()
+        gc.collect()
+        return free_gb > size_gb
+    except: return False
+def is_gpu_available(verbose=True):
+    import torch
+    if not torch.cuda.is_available():
+        if verbose:
+            print("CUDA not available.")
+        return False
+    try:
+        # Try a tiny allocation to check if GPU is free & usable
+        torch.empty(1, device="cuda")
+        if verbose:
+            print(f"CUDA available. Using device: {torch.cuda.get_device_name(0)}")
+        return True
+    except RuntimeError as e:
+        if "CUDA-capable device(s) is/are busy or unavailable" in str(e) or \
+           "CUDA error" in str(e):
+            if verbose:
+                print("CUDA detected but busy/unavailable. Please CPU.")
+            return False
+        raise  # re-raise if it's some other unexpected error

tts_runner/engines/__init__.py ADDED Viewed

File without changes

tts_runner/engines/chatterbox.py ADDED Viewed

	@@ -0,0 +1,149 @@

+from pathlib import Path
+from typing import List
+import spacy
+import torchaudio as ta
+import torch
+from ..base import BaseTTS
+class ChatterboxTTSProcessor(BaseTTS):
+	"""Text-to-Speech processor using ChatterboxTTS."""
+	def __init__(self, stream_audio=False):
+		super().__init__("Chatterbox", stream_audio=stream_audio)
+		print("Initializing Chatterbox...")
+		from chatterbox.tts import ChatterboxTTS
+		print("Loading Modal...")
+		self.model = ChatterboxTTS.from_pretrained(device=self.device)
+		self.nlp=None
+		try:
+			self.nlp = spacy.load("en_core_web_sm")
+		except OSError:
+			from spacy.cli import download
+			download("en_core_web_sm")
+			self.nlp = spacy.load("en_core_web_sm")
+		print("Model loaded successfully")
+	def tokenize_sentences(self, text):
+		"""Split text into sentences using spaCy.
+		Args:
+			text: Input text to tokenize
+		Returns:
+			List of sentence strings
+		"""
+		doc = self.nlp(text)
+		return [sent.text.strip() for sent in doc.sents if sent.text.strip()]
+	def norm_and_token_count(self, text):
+		"""Get normalized text and token count.
+		Args:
+			text: Input text to normalize and count tokens
+		Returns:
+			Tuple of (normalized_text, token_count)
+		"""
+		from chatterbox.tts import punc_norm
+		with torch.inference_mode():
+			normalized = punc_norm(text)
+			tokens = self.model.tokenizer.text_to_tokens(normalized)
+			token_count = tokens.shape[1]
+			# Clear tokens from GPU memory immediately
+			if hasattr(tokens, 'cpu'):
+				tokens = tokens.cpu()
+			del tokens
+			return normalized, token_count
+	def split_sentences(self, text, max_tokens=200):
+		"""Split text into chunks based on token count.
+		Args:
+			text: Input text to split
+			max_tokens: Maximum tokens per chunk
+		Returns:
+			List of text chunks
+		"""
+		sentences = self.tokenize_sentences(text)
+		chunks = []
+		current = ""
+		for sentence in sentences:
+			# Check if sentence alone exceeds max tokens
+			_, sentence_tokens = self.norm_and_token_count(sentence)
+			if sentence_tokens > max_tokens:
+				# If current chunk has content, save it first
+				if current:
+					chunks.append(current.strip())
+					current = ""
+				# Split long sentence by words if it's too long
+				words = sentence.split()
+				temp_chunk = ""
+				for word in words:
+					test_chunk = (temp_chunk + " " + word).strip() if temp_chunk else word
+					_, test_tokens = self.norm_and_token_count(test_chunk)
+					if test_tokens <= max_tokens:
+						temp_chunk = test_chunk
+					else:
+						if temp_chunk:
+							chunks.append(temp_chunk.strip())
+						temp_chunk = word
+				if temp_chunk:
+					current = temp_chunk.strip()
+				continue
+			# Try adding sentence to current chunk
+			candidate = (current + " " + sentence).strip() if current else sentence.strip()
+			_, token_count = self.norm_and_token_count(candidate)
+			if token_count <= max_tokens:
+				current = candidate
+			else:
+				# Current chunk is full, save it and start new one
+				if current:
+					chunks.append(current.strip())
+				current = sentence.strip()
+		# Don't forget the last chunk
+		if current:
+			chunks.append(current.strip())
+		return chunks
+	def generate_chunk_audio_file(self, sentence: str, chunk_index: int, voice: str, speed: float) -> Path:
+		wav = self.model.generate(
+			sentence,
+			audio_prompt_path=voice,
+			temperature=speed
+		)
+		# Save sentence to numbered file
+		chunk_file = self.temp_output_dir / f"chunk_{chunk_index:04d}.wav"
+		ta.save(str(chunk_file), wav, self.model.sr)
+		del wav
+		if self.stream_audio:
+			self.queue_audio_for_streaming(str(chunk_file))
+		return chunk_file
+	def generate_audio_files(self, text: str, voice: str, speed: float, chunk_id: int = None):
+		sentences = self.split_sentences(text)
+		audio_files = []
+		total_sentences = len(sentences)
+		print(f"Processing {total_sentences} text sentences...")
+		with torch.inference_mode():
+			for i, sentence in enumerate(sentences):
+				if self.save_audio_file:
+					chunk_file = self.generate_chunk_audio_file(sentence, chunk_id if chunk_id else i, voice, speed)
+					audio_files.append(chunk_file)
+				print(f"Sentence {i + 1}/{total_sentences} processed -> {chunk_file.name} -> {sentence}")
+		return audio_files

tts_runner/engines/kitten.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from typing import List
+from pathlib import Path
+from ..base import BaseTTS
+class KittenTTSProcessor(BaseTTS):
+	"""Text-to-Speech processor using KittenTTS with streaming support."""
+	def __init__(self, stream_audio=False):
+		super().__init__("Kitten", stream_audio=stream_audio)
+		self.default_voice_index = 7
+		self.voices = [  'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f',  'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f' ]
+		print("Initialising Kitten...")
+		from kittentts import KittenTTS
+		print("Loading Modal...")
+		self.pipeline = KittenTTS("KittenML/kitten-tts-nano-0.2")
+		print("Model loaded successfully")
+	def generate_audio_files(self, text: str, voice: str, speed: float, chunk_id: int = None):
+		sentences = self.split_sentences(text)
+		audio_files = []
+		total_sentences = len(sentences)
+		print(f"Processing {total_sentences} text sentences...")
+		for i, sentence in enumerate(sentences):
+			audio = self.pipeline.generate(sentence, voice=voice)
+			if self.stream_audio:
+				self.queue_audio_for_streaming(audio)
+			if self.save_audio_file:
+				chunk_file = self.generate_chunk_audio_file(audio, chunk_id if chunk_id else i)
+				audio_files.append(chunk_file)
+			print(f"Sentence {i + 1} processed -> {chunk_file.name} -> {sentence}")
+		return audio_files

tts_runner/engines/kokoro.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import json
+from typing import List
+from pathlib import Path
+from ..base import BaseTTS
+class KokoroTTSProcessor(BaseTTS):
+	"""Text-to-Speech processor using KokoroTTS."""
+	def __init__(self, stream_audio=False, setup_signals=True):
+		super().__init__("Kokoro", stream_audio=stream_audio, setup_signals=setup_signals)
+		self.default_voice_index = 8
+		self.default_speed = 1
+		self.voices = [
+			'af',  # Default voice is a 50-50 mix of Bella & Sarah
+			'af_bella', 'af_sarah', 'am_adam', 'am_michael',
+			'bf_emma', 'bf_isabella', 'bm_george', 'bm_lewis',
+			'af_nicole', 'af_sky', 'af_heart', 'am_echo'
+		]
+		print("Initialising Kokoro...")
+		from kokoro import KPipeline
+		print("Loading Modal...")
+		self.pipeline = KPipeline(lang_code='a', device=self.device)
+		print("Model loaded successfully")
+	def generate_audio_files(self, text: str, voice: str, speed: float, chunk_id: int = None):
+		generator = self.pipeline(
+			text,
+			voice=voice,
+			speed=speed,
+			split_pattern=r'\n+'
+		)
+		audio_files = []
+		word_timestamps = []
+		print(f"Processing text sentences...")
+		for i, result in enumerate(generator):
+			tokens = result.tokens
+			audio = result.audio
+			callback_words = []
+			sentence = ""
+			for word in tokens:
+				sentence += word.text
+				word_data = {
+					"word": word.text,
+					"phonemes": word.phonemes,
+					"start_time": word.start_ts,
+					"end_time": word.end_ts
+				}
+				word_timestamps.append(word_data)
+				callback_words.append(word_data)
+			if self.stream_audio:
+				audio_duration = self.queue_audio_for_streaming(audio)
+				# Call the callback if set (for UI highlighting)
+				if hasattr(self, 'word_callback') and self.word_callback:
+					self.word_callback(callback_words, audio_duration)
+			if self.save_audio_file:
+				chunk_file = self.generate_chunk_audio_file(audio, chunk_id if chunk_id else i)
+				audio_files.append(chunk_file)
+			print(f"Sentence {i + 1} processed -> {chunk_file.name} -> {sentence}")
+		# Save timestamps to a JSON file
+		with open(self.final_output_timestamps, 'w') as f:
+			json.dump(word_timestamps, f, indent=4)
+		print(f'Timestamps saved as {self.final_output_timestamps}')
+		return audio_files

tts_runner/runner.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import warnings
+warnings.filterwarnings("ignore", category=UserWarning)
+warnings.filterwarnings("ignore", category=FutureWarning)
+import logging
+logging.getLogger().setLevel(logging.ERROR)
+import argparse
+import os
+import sys
+import time
+TTS_ENGINE = None
+os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
+def server_mode(args):
+    while True:
+        input = sys.stdin.readline().strip()
+        input = input.split("voice")
+        try: args.speed = float(input[0])
+        except: args.speed = 1
+        try: args.voice = int(input[1])
+        except: args.voice = 8
+        output_path = initiate(args)
+        print(output_path)
+        sys.stdout.flush()
+def current_env():
+    """Detect current virtual environment."""
+    venv_path = os.environ.get("VIRTUAL_ENV")
+    if venv_path:
+        return os.path.basename(venv_path)
+    raise ValueError("Please set env first")
+def initiate(args):
+    model = args.get('model') if isinstance(args, dict) else getattr(args, 'model', None)
+    if not model:
+        if current_env() == "kokoro_env":
+            from .engines.kokoro import KokoroTTSProcessor as TTSEngine
+        elif current_env() == "kitten_env":
+            from .engines.kitten import KittenTTSProcessor as TTSEngine
+        else:
+            from .engines.chatterbox import ChatterboxTTSProcessor as TTSEngine
+    else:
+        if model == "kokoro":
+            from .engines.kokoro import KokoroTTSProcessor as TTSEngine
+        elif model == "kitten":
+            from .engines.kitten import KittenTTSProcessor as TTSEngine
+        else:
+            from .engines.chatterbox import ChatterboxTTSProcessor as TTSEngine
+    global TTS_ENGINE
+    if not TTS_ENGINE:
+        TTS_ENGINE = TTSEngine(stream_audio=args.stream_text)
+    try:
+        import torch
+        import gc
+        torch.cuda.empty_cache()
+        torch.cuda.synchronize()
+        gc.collect()
+        gc.collect()
+        time.sleep(1)
+        print("\n🧹 Cleared PyTorch CUDA cache")
+    except: pass
+    if args.stream_text:
+        TTS_ENGINE.stream_real_time_text(args)
+        text = TTS_ENGINE.read_content_file()
+        for text_chunk in text.split():
+            TTS_ENGINE.feed_text_chunk(text_chunk)
+            time.sleep(0.1)  # Optional delay
+        TTS_ENGINE.stop_all_streaming()
+    else:
+        TTS_ENGINE.save_audio(args)
+def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser(
+        description="Text-to-Speech processor"
+    )
+    parser.add_argument(
+        "--server-mode",
+        action="store_true",
+        help="Run in server mode (read commands from stdin)"
+    )
+    parser.add_argument(
+        "--speed",
+        type=float,
+        help=f"Speech speed"
+    )
+    parser.add_argument(
+        "--voice",
+        type=int,
+        help=f"Voice index"
+    )
+    parser.add_argument(
+        "--stream-text",
+        action="store_true",
+        help="Enable streaming text output"
+    )
+    parser.add_argument(
+        "--model",
+        help="model name"
+    )
+    args = parser.parse_args()
+    if args.server_mode:
+        server_mode(args)
+    else:
+        success = initiate(args)
+        return 0 if success else 1
+if __name__ == "__main__":
+    main()

tts_runner/tui.py ADDED Viewed

	@@ -0,0 +1,536 @@

+"""
+Minimalistic TTS TUI Reader with Word Highlighting
+Requires: textual, pyperclip, kokoro-tts
+Install: pip install textual pyperclip kokoro-tts
+"""
+from textual.app import App, ComposeResult
+from textual.widgets import TextArea, Button, Footer, RichLog, Static
+from textual.containers import Horizontal, Vertical, Container
+from textual.binding import Binding
+from textual.reactive import reactive
+from textual.widgets.text_area import Selection
+from textual import work
+import pyperclip
+import threading
+import time
+import queue
+import re
+import bisect
+from .kokoro_tts import KokoroTTSProcessor
+class StatusBar(Static):
+    """Custom status bar with TTS state"""
+    DEFAULT_CSS = """
+    StatusBar {
+        dock: top;
+        height: 1;
+        background: #1a1a2e;
+        color: #00ff9f;
+        padding: 0 2;
+        text-style: bold;
+    }
+    """
+    status_text = reactive("Ready")
+    def render(self) -> str:
+        return self.status_text
+class TTSReader(App):
+    CSS = """
+    Screen {
+        background: #0f0f23;
+    }
+    StatusBar {
+        border-bottom: heavy #00ff9f;
+    }
+    #main_container {
+        height: 1fr;
+        margin: 2 3;
+        padding: 0;
+    }
+    #text_panel {
+        height: 1fr;
+        background: #1a1a2e;
+        border: heavy #00d4ff;
+        padding: 2;
+    }
+    TextArea {
+        height: 1fr;
+        background: #1a1a2e;
+        color: #e0e0e0;
+        border: none;
+        padding: 1;
+        scrollbar-gutter: stable;
+        scrollbar-color: #00ff9f #1a1a2e;
+    }
+    TextArea:focus {
+        border: none;
+    }
+    TextArea > .text-area--cursor {
+        background: #ff00ff;
+        color: #1a1a2e;
+    }
+    TextArea > .text-area--selection {
+        background: #ff00ff 40%;
+    }
+    #log_container {
+        height: 12;
+        margin: 0 3 2 3;
+        padding: 0;
+    }
+    #log_panel {
+        height: 1fr;
+        background: #1a1a2e;
+        border: heavy #ff00ff;
+        padding: 1 2;
+    }
+    RichLog {
+        height: 1fr;
+        background: transparent;
+        color: #00ff9f;
+        border: none;
+        padding: 0;
+    }
+    #controls {
+        height: auto;
+        dock: bottom;
+        background: #0f0f23;
+        padding: 2 3 3 3;
+        align: center middle;
+    }
+    #button_row {
+        width: auto;
+        height: auto;
+        align: center middle;
+    }
+    Button {
+        min-width: 14;
+        height: 3;
+        margin: 0 1;
+        border: heavy #00d4ff;
+        background: #1a1a2e;
+        color: #00d4ff;
+        text-style: bold;
+    }
+    Button:hover {
+        background: #00d4ff 20%;
+        color: #ffffff;
+        border: heavy #00ff9f;
+    }
+    Button:disabled {
+        opacity: 0.6;
+        border: heavy #00d4ff;
+        color: #00d4ff;
+    }
+    Footer {
+        background: #1a1a2e;
+        color: #00ff9f;
+        border-top: heavy #00d4ff;
+    }
+    Footer > .footer--highlight {
+        background: #ff00ff;
+        color: #ffffff;
+    }
+    Footer > .footer--key {
+        background: #00d4ff;
+        color: #0f0f23;
+    }
+    /* Smooth transitions */
+    Button {
+        transition: background 100ms, border 100ms, color 100ms;
+    }
+    """
+    BINDINGS = [
+        Binding("ctrl+v", "paste", "Paste", show=True),
+        Binding("ctrl+p", "toggle_play", "Play", show=True),
+        Binding("ctrl+s", "stop_audio", "Stop", show=True),
+        Binding("q", "quit", "Quit", show=True),
+    ]
+    is_playing = reactive(False)
+    tts_ready = reactive(False)
+    def __init__(self, debug_mode=False):
+        super().__init__()
+        self.debug_mode = debug_mode
+        self.tts = None
+        self.original_text = ""
+        self._playback_worker = None
+        self._highlight_worker = None
+        self._word_queue = queue.Queue()
+        self._stop_highlighting = threading.Event()
+        self._pending_play_after_ready = False
+        self._word_spans = []
+        self._word_span_pos = 0
+    def compose(self) -> ComposeResult:
+        yield StatusBar(id="status")
+        with Vertical(id="main_container"):
+            with Container(id="text_panel"):
+                yield TextArea(
+                    "",
+                    id="text_input",
+                    soft_wrap=True,
+                    language="text",
+                    theme="css"
+                )
+        if self.debug_mode:
+            with Vertical(id="log_container"):
+                with Container(id="log_panel"):
+                    yield RichLog(id="log", wrap=True, markup=True, auto_scroll=True)
+        with Horizontal(id="controls"):
+            with Horizontal(id="button_row"):
+                yield Button("Paste", id="paste")
+                yield Button("Play", id="play")
+                yield Button("Stop", id="stop")
+                yield Button("Quit", id="quit")
+        yield Footer()
+    def on_mount(self):
+        self.update_status("▶ INITIALIZING...")
+        self.update_controls()
+        self.log_message("[dim]>>> Initializing TTS engine...[/dim]")
+        self._init_tts()
+    @work(thread=True)
+    def _init_tts(self):
+        try:
+            self.tts = KokoroTTSProcessor(stream_audio=True, setup_signals=False)
+            self.tts_ready = True
+            self.call_from_thread(self.update_status, "Ready")
+            self.call_from_thread(self.log_message, "[green]>>> TTS engine initialized[/green]")
+            if self._pending_play_after_ready:
+                self._pending_play_after_ready = False
+                self.call_from_thread(self.action_toggle_play)
+            self.call_from_thread(self.update_controls)
+        except Exception as e:
+            self.call_from_thread(self.update_status, "Error")
+            self.call_from_thread(self.log_message, f"[red]>>> TTS initialization failed: {e}[/red]")
+    def update_status(self, text: str):
+        try:
+            status = self.query_one(StatusBar)
+            status.status_text = text
+        except Exception:
+            pass
+    # --- Actions ---
+    def action_paste(self):
+        try:
+            text = pyperclip.paste()
+            if text:
+                self.query_one("#text_input", TextArea).text = text
+                self.log_message("[green]>>> Text pasted from clipboard[/green]")
+                self.update_status("Text loaded")
+        except Exception as e:
+            self.log_message(f"[red]>>> Paste failed: {e}[/red]")
+    def action_toggle_play(self):
+        textarea = self.query_one("#text_input", TextArea)
+        text = textarea.text
+        if text.strip():
+            play_btn = self.query_one("#play", Button)
+            stop_btn = self.query_one("#stop", Button)
+            if self.is_playing:
+                self.stop_audio()
+            else:
+                if not self.tts_ready:
+                    self.log_message("[cyan]>>> TTS loading... will auto-play[/cyan]")
+                    self.update_status("Loading...")
+                    self._pending_play_after_ready = True
+                    play_btn.disabled = True
+                    stop_btn.disabled = True
+                else:
+                    self.play_audio()
+    def action_stop_audio(self):
+        self.stop_audio()
+    def action_quit(self):
+        try:
+            self.update_status("Exiting...")
+        except Exception:
+            pass
+        self._ensure_tts_stopped()
+        self.exit()  # cleanly exits the Textual app
+    def on_button_pressed(self, event: Button.Pressed):
+        mapping = {
+            "paste": self.action_paste,
+            "play": self.action_toggle_play,
+            "stop": self.action_stop_audio,
+            "quit": self.action_quit,
+        }
+        action = mapping.get(event.button.id)
+        if action:
+            action()
+    # --- Word span mapping ---
+    @staticmethod
+    def _normalize_token(s: str) -> str:
+        return re.sub(r"[^A-Za-z0-9']+", "", s).lower()
+    @staticmethod
+    def _line_starts(text: str):
+        starts = [0]
+        for i, ch in enumerate(text):
+            if ch == "\n":
+                starts.append(i + 1)
+        return starts
+    def _build_word_spans(self, text: str):
+        spans = []
+        line_starts = self._line_starts(text)
+        for m in re.finditer(r"\S+", text):
+            abs_start, abs_end = m.start(), m.end()
+            row = bisect.bisect_right(line_starts, abs_start) - 1
+            start_col = abs_start - line_starts[row]
+            end_col = abs_end - line_starts[row]
+            spans.append({
+                "token": m.group(),
+                "row": row,
+                "start_col": start_col,
+                "end_col": end_col,
+            })
+        return spans
+    # --- Playback + Highlight ---
+    def play_audio(self):
+        if not self.tts_ready:
+            self.log_message("[cyan]>>> TTS is still loading[/cyan]")
+            return
+        textarea = self.query_one("#text_input", TextArea)
+        text = textarea.text
+        if not text.strip():
+            self.log_message("[cyan]>>> No text to read[/cyan]")
+            return
+        self._ensure_tts_stopped()
+        self._word_spans = self._build_word_spans(text)
+        self._word_span_pos = 0
+        self.is_playing = True
+        self._stop_highlighting.clear()
+        while not self._word_queue.empty():
+            try:
+                self._word_queue.get_nowait()
+            except queue.Empty:
+                break
+        textarea.focus()
+        self.update_status("Playing...")
+        self._highlight_worker = threading.Thread(target=self._highlight_loop, daemon=True)
+        self._highlight_worker.start()
+        self._playback_worker = threading.Thread(
+            target=self._tts_playback_thread, args=(text,), daemon=True
+        )
+        self._playback_worker.start()
+    def _highlight_loop(self):
+        prev_end_time = 0.0
+        while not self._stop_highlighting.is_set():
+            try:
+                item = self._word_queue.get(timeout=0.1)
+                if item is None:
+                    break
+                row, start_col, end_col, start_time, end_time = (
+                    item["row"],
+                    item["start_col"],
+                    item["end_col"],
+                    item["start_time"],
+                    item["end_time"],
+                )
+                self.call_from_thread(self._set_selection, row, start_col, end_col)
+                if prev_end_time > end_time:
+                    prev_end_time = -0.2 # add buffer when next audio plays
+                duration = max(0.0, end_time - prev_end_time)
+                prev_end_time = end_time
+                time.sleep(duration)
+            except queue.Empty:
+                continue
+            except Exception as e:
+                self.call_from_thread(lambda: self.log_message(f"[red]>>> Highlight error: {e}[/red]"))
+                break
+    MATCH_WINDOW = 12
+    def _set_selection(self, row: int, start_col: int, end_col: int):
+        try:
+            textarea = self.query_one("#text_input", TextArea)
+            textarea.selection = Selection(start=(row, start_col), end=(row, end_col))
+            textarea.focus()
+            textarea.scroll_to(y=row, immediate=True)
+        except Exception as e:
+            self.log_message(f"[red]>>> Selection error: {e}[/red]")
+    def _tts_playback_thread(self, text: str):
+        try:
+            def word_cb(word_datas, audio_duration):
+                self.log_message(word_datas)
+                for wd_index, wd in enumerate(word_datas):
+                    tts_word = wd.get("word", "")
+                    if not tts_word or not any(ch.isalnum() for ch in tts_word):
+                        continue
+                    start_index = self._word_span_pos
+                    end_index = min(start_index + 1, len(self._word_spans))
+                    match_idx = None
+                    for i in range(start_index, end_index):
+                        if self._word_spans[i]["token"] == tts_word:
+                            match_idx = i
+                            break
+                    if match_idx is None:
+                        if self._word_span_pos < len(self._word_spans):
+                            match_idx = self._word_span_pos
+                        else:
+                            continue
+                    span = self._word_spans[match_idx]
+                    self._word_span_pos = match_idx + 1
+                    start_time = wd.get("start_time", 0.0)
+                    end_time = wd.get("end_time", 0.0)
+                    if start_time == None and end_time == None:
+                        if wd_index + 1 == len(word_datas):
+                            start_time = word_datas[wd_index - 1]["end_time"]
+                            end_time = audio_duration
+                        else:
+                            start_time = word_datas[wd_index - 1]["end_time"]
+                            end_time = word_datas[wd_index + 1]["start_time"]
+                    self._word_queue.put(
+                        {
+                            "word": span["token"],
+                            "row": span["row"],
+                            "start_col": span["start_col"],
+                            "end_col": span["end_col"],
+                            "start_time": float(start_time) if start_time is not None else 0.0,
+                            "end_time": float(end_time) if end_time is not None else 0.0,
+                        }
+                    )
+            self.tts.word_callback = word_cb
+            self.tts.start_audio_streaming()
+            self.tts.generate_audio_files(text, self.tts.voices[2], self.tts.default_speed)
+            self._word_queue.put(None)
+            self.tts.wait_for_audio_streaming_complete()
+            self.tts.stop_audio_streaming()
+            self.call_from_thread(self.update_status, "Completed")
+            self.call_from_thread(lambda: self.log_message("[green]>>> Playback complete[/green]"))
+        except Exception as e:
+            self.call_from_thread(lambda: self.log_message(f"[red]>>> Playback error: {e}[/red]"))
+        finally:
+            self.tts.word_callback = None
+            self._stop_highlighting.set()
+            self.is_playing = False
+            self.call_from_thread(self._cleanup_playback)
+    def _ensure_tts_stopped(self):
+        if self.tts:
+            try:
+                if hasattr(self.tts, "is_streaming") and self.tts.is_streaming:
+                    if hasattr(self.tts, "force_stop_streaming"):
+                        self.tts.force_stop_streaming()
+                if hasattr(self.tts, "audio_queue"):
+                    while not self.tts.audio_queue.empty():
+                        try:
+                            self.tts.audio_queue.get_nowait()
+                        except Exception:
+                            break
+                self.tts.is_streaming = False
+            except Exception as e:
+                self.log_message(f"[cyan]>>> Cleanup warning: {e}[/cyan]")
+        self._stop_highlighting.set()
+        if self._highlight_worker and self._highlight_worker.is_alive():
+            self._highlight_worker.join(timeout=0.2)
+        if self._playback_worker and self._playback_worker.is_alive():
+            self._playback_worker.join(timeout=0.2)
+        self.is_playing = False
+    def stop_audio(self):
+        if not self.is_playing:
+            return
+        self.is_playing = False
+        self._stop_highlighting.set()
+        self._ensure_tts_stopped()
+        self._cleanup_playback()
+        self.update_status("Stopped")
+        self.log_message("[red]>>> Playback stopped[/red]")
+    def _cleanup_playback(self):
+        textarea = self.query_one("#text_input", TextArea)
+        textarea.selection = Selection()
+        self.update_controls()
+    # --- UI ---
+    def log_message(self, message):
+        if not self.debug_mode:
+            return
+        try:
+            self.query_one("#log", RichLog).write(message)
+        except Exception:
+            pass
+    def watch_is_playing(self, is_playing):
+        self.update_controls()
+        play_btn = self.query_one("#play", Button)
+        play_btn.label = "Play"
+    def update_controls(self):
+        try:
+            play_btn = self.query_one("#play", Button)
+            stop_btn = self.query_one("#stop", Button)
+            play_btn.disabled = self.is_playing
+            stop_btn.disabled = not self.is_playing
+        except Exception:
+            pass
+def main():
+    import sys
+    debug_mode = "--debug" in sys.argv
+    TTSReader(debug_mode=debug_mode).run()
+if __name__ == "__main__":
+    main()

tts_runner/voices/00007.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:697a4ec1903de653c6febe002e0f6fb6f2d0087cc3b1843efb745f8280466201
+size 108844

tts_runner/voices/20250329-audio-american-female.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f3cfd78a952c62b19af188a473948f20cc75ea429c172f7688b2cf1fecd13e2b
+size 403244

tts_runner/voices/20250329-audio-american-male.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65daf63c956847c7e3b47c683090c237e6927b3443edc04e5545fdc0d9565502
+size 712014

tts_runner/voices/Ellen-TTS-10.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fc930644956f6e214eae4256d1f5328998dd22df8522d9710b77309d06702091
+size 868072

tts_runner/voices/Ellen13y TTS-14.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:abb2e9140f3fdf1af552ab42fccf5441f67c88a857589f1f504d557111f601b6
+size 1219164

tts_runner/voices/Main-4.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dab7226e72f1de2f7751c829466deb2f68f60ccbaf4f5265135073d6015a94ee
+size 619208

tts_runner/voices/Simple guy.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:77454f53011ec6565b72f62a7e6afdf8f564463990572c489af4d02c3622b14e
+size 616364

tts_runner/voices/VEGETA_4_504_US.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d520d15eb7db5a1e100c529ba160ef6945260799ebf16ca3b79e64b29330ed7c
+size 157302

tts_runner/voices/VEGETA_4_532_US.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:906c1ffae0f1661841c7d4d2970811dde8b64e92cf894db545bf5c3453e12e22
+size 152976

tts_runner/voices/bbc_news.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c5152e43ef1b2f72c95d64f216179b52d0b68d754785bb85b69ed9111036aa43
+size 317214

tts_runner/voices/en_woman.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1d49dc69f3b0731ed7b10ddf51dfc8f73465d4323f45841d93583d8b1e4d3e6
+size 313272

tts_runner/voices/kratos(ambient)_en.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e725a35b3aec489a95877c3940e2c6f6cfd24a7ca4692c8680f18a368674cfa8
+size 2759900

tts_runner/voices/voice_preview_cocky male villain voice.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52c844d16cc2d459e9a9881f9fec6fd7580ac3fb6633de11d4bf6913107c9bd2
+size 182693

tts_runner/voices/voice_preview_cocky male villain voice.mp3:Zone.Identifier ADDED Viewed

File without changes

tts_runner/voices/voice_preview_david castlemore - newsreader and educator.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0961e2fb1a2437c22b1ca8490965309fb5eec52d3f4f67b62e45fcf280d3b755
+size 152973

tts_runner/voices/voice_preview_kelly - storytelling & motivational content.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d71ad08000aa9178bfad7c30a55d6b6607d51aa93c8d87a85f30015237ad64a
+size 86771

tts_runner/voices/voice_preview_motivational coach - leader.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc3ae8151229a195be4472e042f4bd03a0e8b7ce33995dbbd1d6fccc28979f15
+size 201038

tts_runner/voices/voice_preview_sevan bomar - black motivational speaker.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9b36725add8170141c740632a4a870b5f2bd8523e9361a84ca49d7131c9e8b2b
+size 114938

worker.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import sqlite3
+import time
+import os
+import subprocess
+import json
+import shlex
+from datetime import datetime
+CWD = "./"
+PYTHON_PATH = "stt-transcribe"
+STT_MODEL_NAME = "fasterwhispher"
+POLL_INTERVAL = 3  # seconds
+def process_audio(file_id, filepath):
+    """Process audio file using STT and return the transcription"""
+    try:
+        print(f"🔄 Running STT on: {os.path.abspath(filepath)}")
+        # Run STT command
+        command = f"""cd {CWD} && {PYTHON_PATH} --input {shlex.quote(os.path.abspath(filepath))} --model {STT_MODEL_NAME}"""
+        subprocess.run(
+            command,
+            shell=True,
+            executable="/bin/bash",
+            check=True,
+            cwd=CWD,
+            env={
+                **os.environ,
+                'PYTHONUNBUFFERED': '1',
+                'CUDA_LAUNCH_BLOCKING': '1',
+                'USE_CPU_IF_POSSIBLE': 'true'
+            }
+        )
+        # Read transcription result
+        output_path = f'{CWD}/temp_dir/output_transcription.json'
+        with open(output_path, 'r') as file:
+            result = json.loads(file.read().strip())
+        # Extract caption text (adjust based on your actual output format)
+        caption = result.get('text', '') or result.get('transcription', '') or str(result)
+        return caption, None
+    except Exception as e:
+        print(f"❌ Error processing file {file_id}: {str(e)}")
+        return None, str(e)
+def update_status(file_id, status, caption=None, error=None):
+    """Update the status of a file in the database"""
+    conn = sqlite3.connect('audio_captions.db')
+    c = conn.cursor()
+    if status == 'completed':
+        c.execute('''UPDATE audio_files
+                     SET status = ?, caption = ?, processed_at = ?
+                     WHERE id = ?''',
+                  (status, caption, datetime.now().isoformat(), file_id))
+    elif status == 'failed':
+        c.execute('''UPDATE audio_files
+                     SET status = ?, caption = ?, processed_at = ?
+                     WHERE id = ?''',
+                  (status, f"Error: {error}", datetime.now().isoformat(), file_id))
+    else:
+        c.execute('UPDATE audio_files SET status = ? WHERE id = ?', (status, file_id))
+    conn.commit()
+    conn.close()
+def worker_loop():
+    """Main worker loop that processes audio files"""
+    print("🤖 STT Worker started. Monitoring for new audio files...")
+    print("🗑️  Audio files will be deleted after successful processing\n")
+    while True:
+        try:
+            # Get next unprocessed file
+            conn = sqlite3.connect('audio_captions.db')
+            conn.row_factory = sqlite3.Row
+            c = conn.cursor()
+            c.execute('''SELECT * FROM audio_files
+                         WHERE status = 'not_started'
+                         ORDER BY created_at ASC
+                         LIMIT 1''')
+            row = c.fetchone()
+            conn.close()
+            if row:
+                file_id = row['id']
+                filepath = row['filepath']
+                filename = row['filename']
+                print(f"\n{'='*60}")
+                print(f"🎵 Processing: {filename}")
+                print(f"📝 ID: {file_id}")
+                print(f"{'='*60}")
+                # Update status to processing
+                update_status(file_id, 'processing')
+                # Process the audio file
+                caption, error = process_audio(file_id, filepath)
+                if caption:
+                    print(f"✅ Successfully processed: {filename}")
+                    print(f"📄 Caption preview: {caption[:100]}...")
+                    update_status(file_id, 'completed', caption=caption)
+                    # Delete the audio file after successful processing
+                    if os.path.exists(filepath):
+                        os.remove(filepath)
+                        print(f"🗑️  Deleted audio file: {filepath}")
+                else:
+                    print(f"❌ Failed to process: {filename}")
+                    print(f"Error: {error}")
+                    update_status(file_id, 'failed', error=error)
+                    # Don't delete file on failure (for debugging)
+            else:
+                # No files to process, sleep for a bit
+                time.sleep(POLL_INTERVAL)
+        except Exception as e:
+            print(f"⚠️  Worker error: {str(e)}")
+            time.sleep(POLL_INTERVAL)
+if __name__ == '__main__':
+    # Initialize database if it doesn't exist
+    if not os.path.exists('audio_captions.db'):
+        print("❌ Database not found. Please run app.py first to initialize.")
+    else:
+        print("\n" + "="*60)
+        print("🚀 Starting STT Worker (Standalone Mode)")
+        print("="*60)
+        print("⚠️  Note: Worker is now embedded in app.py")
+        print("⚠️  This standalone mode is for testing/debugging only")
+        print("="*60 + "\n")
+        worker_loop()