From cfe2b2346ad4c4f40489477b0a09df686f2ada78 Mon Sep 17 00:00:00 2001 From: Alireza Date: Sat, 2 Aug 2025 17:52:11 +0330 Subject: [PATCH] Add Gunicorn support for production in Vosk service and update requirements --- vosk/test_files/quick_fix.sh | 36 ++++++++++ vosk/test_files/requirements_optimized.txt | 1 + vosk/test_files/run_optimized_192cores.sh | 18 +++-- vosk/test_files/test_service.py | 80 ++++++++++++++++++++++ vosk/vosk_service/app_optimized.py | 10 ++- vosk/vosk_service/gunicorn_config.py | 43 ++++++++++++ vosk/vosk_service/start_service.py | 38 ++++++++++ 7 files changed, 220 insertions(+), 6 deletions(-) create mode 100755 vosk/test_files/quick_fix.sh create mode 100644 vosk/test_files/test_service.py create mode 100644 vosk/vosk_service/gunicorn_config.py create mode 100644 vosk/vosk_service/start_service.py diff --git a/vosk/test_files/quick_fix.sh b/vosk/test_files/quick_fix.sh new file mode 100755 index 0000000..4de668f --- /dev/null +++ b/vosk/test_files/quick_fix.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Quick fix for Flask multiprocessing issue +echo "๐Ÿ”ง Applying quick fix for Flask multiprocessing issue..." + +# Kill any existing Vosk service +echo "๐Ÿ›‘ Stopping any existing Vosk service..." +pkill -f "python.*app_optimized" || true +pkill -f "gunicorn.*app_optimized" || true + +# Wait a moment +sleep 2 + +# Start the service with proper configuration +echo "๐Ÿš€ Starting Vosk service with proper configuration..." + +cd ../vosk_service + +# Use the startup script that handles configuration properly +python start_service.py & +VOSK_PID=$! + +echo "โœ… Vosk service started with PID: $VOSK_PID" + +# Wait for service to be ready +echo "โณ Waiting for service to be ready..." +for i in {1..30}; do + if curl -s http://localhost:5000/ > /dev/null; then + echo "โœ… Service is ready!" + break + fi + sleep 1 +done + +echo "๐ŸŽฏ Service should now be running without the multiprocessing error!" +echo "๐Ÿ’ก You can now run your optimized processing script." \ No newline at end of file diff --git a/vosk/test_files/requirements_optimized.txt b/vosk/test_files/requirements_optimized.txt index 76c956f..18953ad 100644 --- a/vosk/test_files/requirements_optimized.txt +++ b/vosk/test_files/requirements_optimized.txt @@ -21,6 +21,7 @@ vosk>=0.3.45 # Flask for API (if using Flask version) flask>=2.3.0 +gunicorn>=21.0.0 # Additional optimizations uvloop>=0.17.0 # Faster event loop for asyncio diff --git a/vosk/test_files/run_optimized_192cores.sh b/vosk/test_files/run_optimized_192cores.sh index f4f2498..3fc8cfe 100755 --- a/vosk/test_files/run_optimized_192cores.sh +++ b/vosk/test_files/run_optimized_192cores.sh @@ -44,10 +44,20 @@ if ! curl -s http://localhost:5000/ > /dev/null; then # Start optimized Vosk service cd ../vosk_service - export USE_ASYNC=true - python app_optimized.py & - VOSK_PID=$! - echo "โœ… Vosk service started with PID: $VOSK_PID" + + # Choose between async and production mode + if [ "$USE_ASYNC" = "true" ]; then + export USE_ASYNC=true + python start_service.py & + VOSK_PID=$! + echo "โœ… Vosk async service started with PID: $VOSK_PID" + else + # Use Gunicorn for production multiprocessing + export USE_GUNICORN=true + gunicorn -c gunicorn_config.py app_optimized:app & + VOSK_PID=$! + echo "โœ… Vosk Gunicorn service started with PID: $VOSK_PID" + fi # Wait for service to be ready echo "โณ Waiting for service to be ready..." diff --git a/vosk/test_files/test_service.py b/vosk/test_files/test_service.py new file mode 100644 index 0000000..ad7f83c --- /dev/null +++ b/vosk/test_files/test_service.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +""" +Test script to verify Vosk service is working properly. +""" + +import requests +import time +import json + +def test_service_health(): + """Test if the service is responding""" + try: + response = requests.get("http://localhost:5000/", timeout=5) + if response.status_code == 200: + print("โœ… Service is healthy!") + print(f"Response: {response.json()}") + return True + else: + print(f"โŒ Service returned status {response.status_code}") + return False + except Exception as e: + print(f"โŒ Service not responding: {e}") + return False + +def test_batch_processing(): + """Test batch processing functionality""" + try: + # Create a simple test batch + test_data = { + 'references': json.dumps(['test sentence 1', 'test sentence 2']) + } + + # Create dummy audio files (you'll need real audio files for actual testing) + files = { + 'audio0': ('test1.wav', b'dummy_audio_data', 'audio/wav'), + 'audio1': ('test2.wav', b'dummy_audio_data', 'audio/wav') + } + + response = requests.post( + "http://localhost:5000/batch_confirm", + data=test_data, + files=files, + timeout=30 + ) + + if response.status_code == 200: + print("โœ… Batch processing endpoint is working!") + print(f"Response: {response.json()}") + return True + else: + print(f"โŒ Batch processing failed: {response.status_code}") + print(f"Response: {response.text}") + return False + + except Exception as e: + print(f"โŒ Batch processing error: {e}") + return False + +def main(): + """Main test function""" + print("๐Ÿ” Testing Vosk service...") + + # Wait for service to be ready + print("โณ Waiting for service to be ready...") + for i in range(30): + if test_service_health(): + break + time.sleep(1) + else: + print("โŒ Service did not become ready within 30 seconds") + return + + # Test batch processing + print("\n๐Ÿงช Testing batch processing...") + test_batch_processing() + + print("\nโœ… Service testing complete!") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/vosk/vosk_service/app_optimized.py b/vosk/vosk_service/app_optimized.py index f2ff3ac..43f3c5d 100644 --- a/vosk/vosk_service/app_optimized.py +++ b/vosk/vosk_service/app_optimized.py @@ -267,5 +267,11 @@ if __name__ == '__main__': app = create_async_app() web.run_app(app, host='0.0.0.0', port=5000) else: - # Run Flask version - app.run(host='0.0.0.0', port=5000, threaded=True, processes=4) \ No newline at end of file + # Run Flask version with proper multiprocessing setup + # Use Gunicorn for production multiprocessing + if os.getenv('USE_GUNICORN', 'false').lower() == 'true': + # This will be handled by Gunicorn + app.run(host='0.0.0.0', port=5000) + else: + # Use threading for development + app.run(host='0.0.0.0', port=5000, threaded=True) \ No newline at end of file diff --git a/vosk/vosk_service/gunicorn_config.py b/vosk/vosk_service/gunicorn_config.py new file mode 100644 index 0000000..c0abf86 --- /dev/null +++ b/vosk/vosk_service/gunicorn_config.py @@ -0,0 +1,43 @@ +# Gunicorn configuration for Vosk service with 192-core optimization + +# Server socket +bind = "0.0.0.0:5000" +backlog = 2048 + +# Worker processes +workers = 48 # 192/4 for optimal worker count +worker_class = "sync" +worker_connections = 1000 +max_requests = 1000 +max_requests_jitter = 50 +preload_app = True + +# Timeout settings +timeout = 300 +keepalive = 2 +graceful_timeout = 30 + +# Logging +accesslog = "-" +errorlog = "-" +loglevel = "info" +access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s" %(D)s' + +# Process naming +proc_name = "vosk-service" + +# Security +limit_request_line = 4094 +limit_request_fields = 100 +limit_request_field_size = 8190 + +# Performance +worker_tmp_dir = "/dev/shm" +worker_exit_on_app_exit = True + +# Memory management +max_requests_jitter = 50 +preload_app = True + +# Worker lifecycle +worker_abort_on_app_exit = True \ No newline at end of file diff --git a/vosk/vosk_service/start_service.py b/vosk/vosk_service/start_service.py new file mode 100644 index 0000000..98c371a --- /dev/null +++ b/vosk/vosk_service/start_service.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +""" +Simple startup script for Vosk service that handles Flask configuration properly. +""" + +import os +import sys +from app_optimized import app, load_model + +def main(): + """Start the Vosk service with proper configuration""" + + # Load the model first + print("Loading Vosk model...") + load_model() + + # Determine the mode to run in + use_async = os.getenv('USE_ASYNC', 'false').lower() == 'true' + use_gunicorn = os.getenv('USE_GUNICORN', 'false').lower() == 'true' + + if use_async: + print("Starting async aiohttp service...") + from aiohttp import web + from app_optimized import create_async_app + + app_async = create_async_app() + web.run_app(app_async, host='0.0.0.0', port=5000) + elif use_gunicorn: + print("Starting with Gunicorn (production mode)...") + # Gunicorn will handle the app + app.run(host='0.0.0.0', port=5000) + else: + print("Starting Flask service with threading...") + # Use threading only (no multiprocessing in Flask) + app.run(host='0.0.0.0', port=5000, threaded=True) + +if __name__ == '__main__': + main() \ No newline at end of file