Add Gunicorn support for production in Vosk service and update requirements

2025-08-02 17:52:11 +03:30
parent 0d151529f0
commit cfe2b2346a
7 changed files with 220 additions and 6 deletions
--- a/vosk/test_files/quick_fix.sh
+++ b/vosk/test_files/quick_fix.sh
@@ -0,0 +1,36 @@
 #!/bin/bash
 # Quick fix for Flask multiprocessing issue
 echo "🔧 Applying quick fix for Flask multiprocessing issue..."
 # Kill any existing Vosk service
 echo "🛑 Stopping any existing Vosk service..."
 pkill -f "python.*app_optimized" || true
 pkill -f "gunicorn.*app_optimized" || true
 # Wait a moment
 sleep 2
 # Start the service with proper configuration
 echo "🚀 Starting Vosk service with proper configuration..."
 cd ../vosk_service
 # Use the startup script that handles configuration properly
 python start_service.py &
 VOSK_PID=$!
 echo "✅ Vosk service started with PID: $VOSK_PID"
 # Wait for service to be ready
 echo "⏳ Waiting for service to be ready..."
 for i in {1..30}; do
    if curl -s http://localhost:5000/ > /dev/null; then
        echo "✅ Service is ready!"
        break
    fi
    sleep 1
 done
 echo "🎯 Service should now be running without the multiprocessing error!"
 echo "💡 You can now run your optimized processing script." 
--- a/vosk/test_files/requirements_optimized.txt
+++ b/vosk/test_files/requirements_optimized.txt
@@ -21,6 +21,7 @@ vosk>=0.3.45
 # Flask for API (if using Flask version)
 flask>=2.3.0
 gunicorn>=21.0.0
 # Additional optimizations
 uvloop>=0.17.0  # Faster event loop for asyncio
--- a/vosk/test_files/run_optimized_192cores.sh
+++ b/vosk/test_files/run_optimized_192cores.sh
@@ -44,10 +44,20 @@ if ! curl -s http://localhost:5000/ > /dev/null; then
    # Start optimized Vosk service
    cd ../vosk_service
    # Choose between async and production mode
    if [ "$USE_ASYNC" = "true" ]; then
        export USE_ASYNC=true
-    python app_optimized.py &
+        python start_service.py &
        VOSK_PID=$!
-    echo "✅ Vosk service started with PID: $VOSK_PID"
+        echo "✅ Vosk async service started with PID: $VOSK_PID"
    else
        # Use Gunicorn for production multiprocessing
        export USE_GUNICORN=true
        gunicorn -c gunicorn_config.py app_optimized:app &
        VOSK_PID=$!
        echo "✅ Vosk Gunicorn service started with PID: $VOSK_PID"
    fi
    # Wait for service to be ready
    echo "⏳ Waiting for service to be ready..."
--- a/vosk/test_files/test_service.py
+++ b/vosk/test_files/test_service.py
@@ -0,0 +1,80 @@
 #!/usr/bin/env python3
 """
 Test script to verify Vosk service is working properly.
 """
 import requests
 import time
 import json
 def test_service_health():
    """Test if the service is responding"""
    try:
        response = requests.get("http://localhost:5000/", timeout=5)
        if response.status_code == 200:
            print("✅ Service is healthy!")
            print(f"Response: {response.json()}")
            return True
        else:
            print(f"❌ Service returned status {response.status_code}")
            return False
    except Exception as e:
        print(f"❌ Service not responding: {e}")
        return False
 def test_batch_processing():
    """Test batch processing functionality"""
    try:
        # Create a simple test batch
        test_data = {
            'references': json.dumps(['test sentence 1', 'test sentence 2'])
        }
        # Create dummy audio files (you'll need real audio files for actual testing)
        files = {
            'audio0': ('test1.wav', b'dummy_audio_data', 'audio/wav'),
            'audio1': ('test2.wav', b'dummy_audio_data', 'audio/wav')
        }
        response = requests.post(
            "http://localhost:5000/batch_confirm",
            data=test_data,
            files=files,
            timeout=30
        )
        if response.status_code == 200:
            print("✅ Batch processing endpoint is working!")
            print(f"Response: {response.json()}")
            return True
        else:
            print(f"❌ Batch processing failed: {response.status_code}")
            print(f"Response: {response.text}")
            return False
    except Exception as e:
        print(f"❌ Batch processing error: {e}")
        return False
 def main():
    """Main test function"""
    print("🔍 Testing Vosk service...")
    # Wait for service to be ready
    print("⏳ Waiting for service to be ready...")
    for i in range(30):
        if test_service_health():
            break
        time.sleep(1)
    else:
        print("❌ Service did not become ready within 30 seconds")
        return
    # Test batch processing
    print("\n🧪 Testing batch processing...")
    test_batch_processing()
    print("\n✅ Service testing complete!")
 if __name__ == "__main__":
    main() 
--- a/vosk/vosk_service/app_optimized.py
+++ b/vosk/vosk_service/app_optimized.py
@@ -267,5 +267,11 @@ if __name__ == '__main__':
        app = create_async_app()
        web.run_app(app, host='0.0.0.0', port=5000)
    else:
-        # Run Flask version
+        # Run Flask version with proper multiprocessing setup
-        app.run(host='0.0.0.0', port=5000, threaded=True, processes=4) 
+        # Use Gunicorn for production multiprocessing
        if os.getenv('USE_GUNICORN', 'false').lower() == 'true':
            # This will be handled by Gunicorn
            app.run(host='0.0.0.0', port=5000)
        else:
            # Use threading for development
            app.run(host='0.0.0.0', port=5000, threaded=True) 
--- a/vosk/vosk_service/gunicorn_config.py
+++ b/vosk/vosk_service/gunicorn_config.py
@@ -0,0 +1,43 @@
 # Gunicorn configuration for Vosk service with 192-core optimization
 # Server socket
 bind = "0.0.0.0:5000"
 backlog = 2048
 # Worker processes
 workers = 48  # 192/4 for optimal worker count
 worker_class = "sync"
 worker_connections = 1000
 max_requests = 1000
 max_requests_jitter = 50
 preload_app = True
 # Timeout settings
 timeout = 300
 keepalive = 2
 graceful_timeout = 30
 # Logging
 accesslog = "-"
 errorlog = "-"
 loglevel = "info"
 access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s" %(D)s'
 # Process naming
 proc_name = "vosk-service"
 # Security
 limit_request_line = 4094
 limit_request_fields = 100
 limit_request_field_size = 8190
 # Performance
 worker_tmp_dir = "/dev/shm"
 worker_exit_on_app_exit = True
 # Memory management
 max_requests_jitter = 50
 preload_app = True
 # Worker lifecycle
 worker_abort_on_app_exit = True 
--- a/vosk/vosk_service/start_service.py
+++ b/vosk/vosk_service/start_service.py
@@ -0,0 +1,38 @@
 #!/usr/bin/env python3
 """
 Simple startup script for Vosk service that handles Flask configuration properly.
 """
 import os
 import sys
 from app_optimized import app, load_model
 def main():
    """Start the Vosk service with proper configuration"""
    # Load the model first
    print("Loading Vosk model...")
    load_model()
    # Determine the mode to run in
    use_async = os.getenv('USE_ASYNC', 'false').lower() == 'true'
    use_gunicorn = os.getenv('USE_GUNICORN', 'false').lower() == 'true'
    if use_async:
        print("Starting async aiohttp service...")
        from aiohttp import web
        from app_optimized import create_async_app
        app_async = create_async_app()
        web.run_app(app_async, host='0.0.0.0', port=5000)
    elif use_gunicorn:
        print("Starting with Gunicorn (production mode)...")
        # Gunicorn will handle the app
        app.run(host='0.0.0.0', port=5000)
    else:
        print("Starting Flask service with threading...")
        # Use threading only (no multiprocessing in Flask)
        app.run(host='0.0.0.0', port=5000, threaded=True)
 if __name__ == '__main__':
    main()