Add Gunicorn support for production in Vosk service and update requirements
This commit is contained in:
@@ -267,5 +267,11 @@ if __name__ == '__main__':
|
||||
app = create_async_app()
|
||||
web.run_app(app, host='0.0.0.0', port=5000)
|
||||
else:
|
||||
# Run Flask version
|
||||
app.run(host='0.0.0.0', port=5000, threaded=True, processes=4)
|
||||
# Run Flask version with proper multiprocessing setup
|
||||
# Use Gunicorn for production multiprocessing
|
||||
if os.getenv('USE_GUNICORN', 'false').lower() == 'true':
|
||||
# This will be handled by Gunicorn
|
||||
app.run(host='0.0.0.0', port=5000)
|
||||
else:
|
||||
# Use threading for development
|
||||
app.run(host='0.0.0.0', port=5000, threaded=True)
|
||||
43
vosk/vosk_service/gunicorn_config.py
Normal file
43
vosk/vosk_service/gunicorn_config.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# Gunicorn configuration for Vosk service with 192-core optimization
|
||||
|
||||
# Server socket
|
||||
bind = "0.0.0.0:5000"
|
||||
backlog = 2048
|
||||
|
||||
# Worker processes
|
||||
workers = 48 # 192/4 for optimal worker count
|
||||
worker_class = "sync"
|
||||
worker_connections = 1000
|
||||
max_requests = 1000
|
||||
max_requests_jitter = 50
|
||||
preload_app = True
|
||||
|
||||
# Timeout settings
|
||||
timeout = 300
|
||||
keepalive = 2
|
||||
graceful_timeout = 30
|
||||
|
||||
# Logging
|
||||
accesslog = "-"
|
||||
errorlog = "-"
|
||||
loglevel = "info"
|
||||
access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s" %(D)s'
|
||||
|
||||
# Process naming
|
||||
proc_name = "vosk-service"
|
||||
|
||||
# Security
|
||||
limit_request_line = 4094
|
||||
limit_request_fields = 100
|
||||
limit_request_field_size = 8190
|
||||
|
||||
# Performance
|
||||
worker_tmp_dir = "/dev/shm"
|
||||
worker_exit_on_app_exit = True
|
||||
|
||||
# Memory management
|
||||
max_requests_jitter = 50
|
||||
preload_app = True
|
||||
|
||||
# Worker lifecycle
|
||||
worker_abort_on_app_exit = True
|
||||
38
vosk/vosk_service/start_service.py
Normal file
38
vosk/vosk_service/start_service.py
Normal file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple startup script for Vosk service that handles Flask configuration properly.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from app_optimized import app, load_model
|
||||
|
||||
def main():
|
||||
"""Start the Vosk service with proper configuration"""
|
||||
|
||||
# Load the model first
|
||||
print("Loading Vosk model...")
|
||||
load_model()
|
||||
|
||||
# Determine the mode to run in
|
||||
use_async = os.getenv('USE_ASYNC', 'false').lower() == 'true'
|
||||
use_gunicorn = os.getenv('USE_GUNICORN', 'false').lower() == 'true'
|
||||
|
||||
if use_async:
|
||||
print("Starting async aiohttp service...")
|
||||
from aiohttp import web
|
||||
from app_optimized import create_async_app
|
||||
|
||||
app_async = create_async_app()
|
||||
web.run_app(app_async, host='0.0.0.0', port=5000)
|
||||
elif use_gunicorn:
|
||||
print("Starting with Gunicorn (production mode)...")
|
||||
# Gunicorn will handle the app
|
||||
app.run(host='0.0.0.0', port=5000)
|
||||
else:
|
||||
print("Starting Flask service with threading...")
|
||||
# Use threading only (no multiprocessing in Flask)
|
||||
app.run(host='0.0.0.0', port=5000, threaded=True)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user