From cfe2b2346ad4c4f40489477b0a09df686f2ada78 Mon Sep 17 00:00:00 2001
From: Alireza <seyedalirezamanavi1402@gmail.com>
Date: Sat, 2 Aug 2025 17:52:11 +0330
Subject: [PATCH] Add Gunicorn support for production in Vosk service and
 update requirements

---
 vosk/test_files/quick_fix.sh               | 36 ++++++++++
 vosk/test_files/requirements_optimized.txt |  1 +
 vosk/test_files/run_optimized_192cores.sh  | 18 +++--
 vosk/test_files/test_service.py            | 80 ++++++++++++++++++++++
 vosk/vosk_service/app_optimized.py         | 10 ++-
 vosk/vosk_service/gunicorn_config.py       | 43 ++++++++++++
 vosk/vosk_service/start_service.py         | 38 ++++++++++
 7 files changed, 220 insertions(+), 6 deletions(-)
 create mode 100755 vosk/test_files/quick_fix.sh
 create mode 100644 vosk/test_files/test_service.py
 create mode 100644 vosk/vosk_service/gunicorn_config.py
 create mode 100644 vosk/vosk_service/start_service.py

diff --git a/vosk/test_files/quick_fix.sh b/vosk/test_files/quick_fix.sh
new file mode 100755
index 0000000..4de668f
--- /dev/null
+++ b/vosk/test_files/quick_fix.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Quick fix for Flask multiprocessing issue
+echo "🔧 Applying quick fix for Flask multiprocessing issue..."
+
+# Kill any existing Vosk service
+echo "🛑 Stopping any existing Vosk service..."
+pkill -f "python.*app_optimized" || true
+pkill -f "gunicorn.*app_optimized" || true
+
+# Wait a moment
+sleep 2
+
+# Start the service with proper configuration
+echo "🚀 Starting Vosk service with proper configuration..."
+
+cd ../vosk_service
+
+# Use the startup script that handles configuration properly
+python start_service.py &
+VOSK_PID=$!
+
+echo "✅ Vosk service started with PID: $VOSK_PID"
+
+# Wait for service to be ready
+echo "⏳ Waiting for service to be ready..."
+for i in {1..30}; do
+    if curl -s http://localhost:5000/ > /dev/null; then
+        echo "✅ Service is ready!"
+        break
+    fi
+    sleep 1
+done
+
+echo "🎯 Service should now be running without the multiprocessing error!"
+echo "💡 You can now run your optimized processing script." 
\ No newline at end of file
diff --git a/vosk/test_files/requirements_optimized.txt b/vosk/test_files/requirements_optimized.txt
index 76c956f..18953ad 100644
--- a/vosk/test_files/requirements_optimized.txt
+++ b/vosk/test_files/requirements_optimized.txt
@@ -21,6 +21,7 @@ vosk>=0.3.45
 
 # Flask for API (if using Flask version)
 flask>=2.3.0
+gunicorn>=21.0.0
 
 # Additional optimizations
 uvloop>=0.17.0  # Faster event loop for asyncio
diff --git a/vosk/test_files/run_optimized_192cores.sh b/vosk/test_files/run_optimized_192cores.sh
index f4f2498..3fc8cfe 100755
--- a/vosk/test_files/run_optimized_192cores.sh
+++ b/vosk/test_files/run_optimized_192cores.sh
@@ -44,10 +44,20 @@ if ! curl -s http://localhost:5000/ > /dev/null; then
     
     # Start optimized Vosk service
     cd ../vosk_service
-    export USE_ASYNC=true
-    python app_optimized.py &
-    VOSK_PID=$!
-    echo "✅ Vosk service started with PID: $VOSK_PID"
+    
+    # Choose between async and production mode
+    if [ "$USE_ASYNC" = "true" ]; then
+        export USE_ASYNC=true
+        python start_service.py &
+        VOSK_PID=$!
+        echo "✅ Vosk async service started with PID: $VOSK_PID"
+    else
+        # Use Gunicorn for production multiprocessing
+        export USE_GUNICORN=true
+        gunicorn -c gunicorn_config.py app_optimized:app &
+        VOSK_PID=$!
+        echo "✅ Vosk Gunicorn service started with PID: $VOSK_PID"
+    fi
     
     # Wait for service to be ready
     echo "⏳ Waiting for service to be ready..."
diff --git a/vosk/test_files/test_service.py b/vosk/test_files/test_service.py
new file mode 100644
index 0000000..ad7f83c
--- /dev/null
+++ b/vosk/test_files/test_service.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+"""
+Test script to verify Vosk service is working properly.
+"""
+
+import requests
+import time
+import json
+
+def test_service_health():
+    """Test if the service is responding"""
+    try:
+        response = requests.get("http://localhost:5000/", timeout=5)
+        if response.status_code == 200:
+            print("✅ Service is healthy!")
+            print(f"Response: {response.json()}")
+            return True
+        else:
+            print(f"❌ Service returned status {response.status_code}")
+            return False
+    except Exception as e:
+        print(f"❌ Service not responding: {e}")
+        return False
+
+def test_batch_processing():
+    """Test batch processing functionality"""
+    try:
+        # Create a simple test batch
+        test_data = {
+            'references': json.dumps(['test sentence 1', 'test sentence 2'])
+        }
+        
+        # Create dummy audio files (you'll need real audio files for actual testing)
+        files = {
+            'audio0': ('test1.wav', b'dummy_audio_data', 'audio/wav'),
+            'audio1': ('test2.wav', b'dummy_audio_data', 'audio/wav')
+        }
+        
+        response = requests.post(
+            "http://localhost:5000/batch_confirm",
+            data=test_data,
+            files=files,
+            timeout=30
+        )
+        
+        if response.status_code == 200:
+            print("✅ Batch processing endpoint is working!")
+            print(f"Response: {response.json()}")
+            return True
+        else:
+            print(f"❌ Batch processing failed: {response.status_code}")
+            print(f"Response: {response.text}")
+            return False
+            
+    except Exception as e:
+        print(f"❌ Batch processing error: {e}")
+        return False
+
+def main():
+    """Main test function"""
+    print("🔍 Testing Vosk service...")
+    
+    # Wait for service to be ready
+    print("⏳ Waiting for service to be ready...")
+    for i in range(30):
+        if test_service_health():
+            break
+        time.sleep(1)
+    else:
+        print("❌ Service did not become ready within 30 seconds")
+        return
+    
+    # Test batch processing
+    print("\n🧪 Testing batch processing...")
+    test_batch_processing()
+    
+    print("\n✅ Service testing complete!")
+
+if __name__ == "__main__":
+    main() 
\ No newline at end of file
diff --git a/vosk/vosk_service/app_optimized.py b/vosk/vosk_service/app_optimized.py
index f2ff3ac..43f3c5d 100644
--- a/vosk/vosk_service/app_optimized.py
+++ b/vosk/vosk_service/app_optimized.py
@@ -267,5 +267,11 @@ if __name__ == '__main__':
         app = create_async_app()
         web.run_app(app, host='0.0.0.0', port=5000)
     else:
-        # Run Flask version
-        app.run(host='0.0.0.0', port=5000, threaded=True, processes=4) 
\ No newline at end of file
+        # Run Flask version with proper multiprocessing setup
+        # Use Gunicorn for production multiprocessing
+        if os.getenv('USE_GUNICORN', 'false').lower() == 'true':
+            # This will be handled by Gunicorn
+            app.run(host='0.0.0.0', port=5000)
+        else:
+            # Use threading for development
+            app.run(host='0.0.0.0', port=5000, threaded=True) 
\ No newline at end of file
diff --git a/vosk/vosk_service/gunicorn_config.py b/vosk/vosk_service/gunicorn_config.py
new file mode 100644
index 0000000..c0abf86
--- /dev/null
+++ b/vosk/vosk_service/gunicorn_config.py
@@ -0,0 +1,43 @@
+# Gunicorn configuration for Vosk service with 192-core optimization
+
+# Server socket
+bind = "0.0.0.0:5000"
+backlog = 2048
+
+# Worker processes
+workers = 48  # 192/4 for optimal worker count
+worker_class = "sync"
+worker_connections = 1000
+max_requests = 1000
+max_requests_jitter = 50
+preload_app = True
+
+# Timeout settings
+timeout = 300
+keepalive = 2
+graceful_timeout = 30
+
+# Logging
+accesslog = "-"
+errorlog = "-"
+loglevel = "info"
+access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s" %(D)s'
+
+# Process naming
+proc_name = "vosk-service"
+
+# Security
+limit_request_line = 4094
+limit_request_fields = 100
+limit_request_field_size = 8190
+
+# Performance
+worker_tmp_dir = "/dev/shm"
+worker_exit_on_app_exit = True
+
+# Memory management
+max_requests_jitter = 50
+preload_app = True
+
+# Worker lifecycle
+worker_abort_on_app_exit = True 
\ No newline at end of file
diff --git a/vosk/vosk_service/start_service.py b/vosk/vosk_service/start_service.py
new file mode 100644
index 0000000..98c371a
--- /dev/null
+++ b/vosk/vosk_service/start_service.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+"""
+Simple startup script for Vosk service that handles Flask configuration properly.
+"""
+
+import os
+import sys
+from app_optimized import app, load_model
+
+def main():
+    """Start the Vosk service with proper configuration"""
+    
+    # Load the model first
+    print("Loading Vosk model...")
+    load_model()
+    
+    # Determine the mode to run in
+    use_async = os.getenv('USE_ASYNC', 'false').lower() == 'true'
+    use_gunicorn = os.getenv('USE_GUNICORN', 'false').lower() == 'true'
+    
+    if use_async:
+        print("Starting async aiohttp service...")
+        from aiohttp import web
+        from app_optimized import create_async_app
+        
+        app_async = create_async_app()
+        web.run_app(app_async, host='0.0.0.0', port=5000)
+    elif use_gunicorn:
+        print("Starting with Gunicorn (production mode)...")
+        # Gunicorn will handle the app
+        app.run(host='0.0.0.0', port=5000)
+    else:
+        print("Starting Flask service with threading...")
+        # Use threading only (no multiprocessing in Flask)
+        app.run(host='0.0.0.0', port=5000, threaded=True)
+
+if __name__ == '__main__':
+    main() 
\ No newline at end of file