Add Gunicorn support for production in Vosk service and update requirements

This commit is contained in:
Alireza
2025-08-02 17:52:11 +03:30
parent 0d151529f0
commit cfe2b2346a
7 changed files with 220 additions and 6 deletions

View File

@@ -267,5 +267,11 @@ if __name__ == '__main__':
app = create_async_app()
web.run_app(app, host='0.0.0.0', port=5000)
else:
# Run Flask version
app.run(host='0.0.0.0', port=5000, threaded=True, processes=4)
# Run Flask version with proper multiprocessing setup
# Use Gunicorn for production multiprocessing
if os.getenv('USE_GUNICORN', 'false').lower() == 'true':
# This will be handled by Gunicorn
app.run(host='0.0.0.0', port=5000)
else:
# Use threading for development
app.run(host='0.0.0.0', port=5000, threaded=True)

View File

@@ -0,0 +1,43 @@
# Gunicorn configuration for Vosk service with 192-core optimization
# Server socket
bind = "0.0.0.0:5000"
backlog = 2048
# Worker processes
workers = 48 # 192/4 for optimal worker count
worker_class = "sync"
worker_connections = 1000
max_requests = 1000
max_requests_jitter = 50
preload_app = True
# Timeout settings
timeout = 300
keepalive = 2
graceful_timeout = 30
# Logging
accesslog = "-"
errorlog = "-"
loglevel = "info"
access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s" %(D)s'
# Process naming
proc_name = "vosk-service"
# Security
limit_request_line = 4094
limit_request_fields = 100
limit_request_field_size = 8190
# Performance
worker_tmp_dir = "/dev/shm"
worker_exit_on_app_exit = True
# Memory management
max_requests_jitter = 50
preload_app = True
# Worker lifecycle
worker_abort_on_app_exit = True

View File

@@ -0,0 +1,38 @@
#!/usr/bin/env python3
"""
Simple startup script for Vosk service that handles Flask configuration properly.
"""
import os
import sys
from app_optimized import app, load_model
def main():
"""Start the Vosk service with proper configuration"""
# Load the model first
print("Loading Vosk model...")
load_model()
# Determine the mode to run in
use_async = os.getenv('USE_ASYNC', 'false').lower() == 'true'
use_gunicorn = os.getenv('USE_GUNICORN', 'false').lower() == 'true'
if use_async:
print("Starting async aiohttp service...")
from aiohttp import web
from app_optimized import create_async_app
app_async = create_async_app()
web.run_app(app_async, host='0.0.0.0', port=5000)
elif use_gunicorn:
print("Starting with Gunicorn (production mode)...")
# Gunicorn will handle the app
app.run(host='0.0.0.0', port=5000)
else:
print("Starting Flask service with threading...")
# Use threading only (no multiprocessing in Flask)
app.run(host='0.0.0.0', port=5000, threaded=True)
if __name__ == '__main__':
main()