Add Gunicorn support for production in Vosk service and update requirements

2025-08-02 17:52:11 +03:30
parent 0d151529f0
commit cfe2b2346a
7 changed files with 220 additions and 6 deletions
--- a/vosk/vosk_service/app_optimized.py
+++ b/vosk/vosk_service/app_optimized.py
@@ -267,5 +267,11 @@ if __name__ == '__main__':
        app = create_async_app()
        web.run_app(app, host='0.0.0.0', port=5000)
    else:
-        # Run Flask version
-        app.run(host='0.0.0.0', port=5000, threaded=True, processes=4) 
+        # Run Flask version with proper multiprocessing setup
+        # Use Gunicorn for production multiprocessing
+        if os.getenv('USE_GUNICORN', 'false').lower() == 'true':
+            # This will be handled by Gunicorn
+            app.run(host='0.0.0.0', port=5000)
+        else:
+            # Use threading for development
+            app.run(host='0.0.0.0', port=5000, threaded=True) 
--- a/vosk/vosk_service/gunicorn_config.py
+++ b/vosk/vosk_service/gunicorn_config.py
@@ -0,0 +1,43 @@
+# Gunicorn configuration for Vosk service with 192-core optimization
+
+# Server socket
+bind = "0.0.0.0:5000"
+backlog = 2048
+
+# Worker processes
+workers = 48  # 192/4 for optimal worker count
+worker_class = "sync"
+worker_connections = 1000
+max_requests = 1000
+max_requests_jitter = 50
+preload_app = True
+
+# Timeout settings
+timeout = 300
+keepalive = 2
+graceful_timeout = 30
+
+# Logging
+accesslog = "-"
+errorlog = "-"
+loglevel = "info"
+access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s" %(D)s'
+
+# Process naming
+proc_name = "vosk-service"
+
+# Security
+limit_request_line = 4094
+limit_request_fields = 100
+limit_request_field_size = 8190
+
+# Performance
+worker_tmp_dir = "/dev/shm"
+worker_exit_on_app_exit = True
+
+# Memory management
+max_requests_jitter = 50
+preload_app = True
+
+# Worker lifecycle
+worker_abort_on_app_exit = True 
--- a/vosk/vosk_service/start_service.py
+++ b/vosk/vosk_service/start_service.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+"""
+Simple startup script for Vosk service that handles Flask configuration properly.
+"""
+
+import os
+import sys
+from app_optimized import app, load_model
+
+def main():
+    """Start the Vosk service with proper configuration"""
+    
+    # Load the model first
+    print("Loading Vosk model...")
+    load_model()
+    
+    # Determine the mode to run in
+    use_async = os.getenv('USE_ASYNC', 'false').lower() == 'true'
+    use_gunicorn = os.getenv('USE_GUNICORN', 'false').lower() == 'true'
+    
+    if use_async:
+        print("Starting async aiohttp service...")
+        from aiohttp import web
+        from app_optimized import create_async_app
+        
+        app_async = create_async_app()
+        web.run_app(app_async, host='0.0.0.0', port=5000)
+    elif use_gunicorn:
+        print("Starting with Gunicorn (production mode)...")
+        # Gunicorn will handle the app
+        app.run(host='0.0.0.0', port=5000)
+    else:
+        print("Starting Flask service with threading...")
+        # Use threading only (no multiprocessing in Flask)
+        app.run(host='0.0.0.0', port=5000, threaded=True)
+
+if __name__ == '__main__':
+    main()