Add Gunicorn support for production in Vosk service and update requirements

This commit is contained in:
Alireza
2025-08-02 17:52:11 +03:30
parent 0d151529f0
commit cfe2b2346a
7 changed files with 220 additions and 6 deletions

36
vosk/test_files/quick_fix.sh Executable file
View File

@@ -0,0 +1,36 @@
#!/bin/bash
# Quick fix for Flask multiprocessing issue
echo "🔧 Applying quick fix for Flask multiprocessing issue..."
# Kill any existing Vosk service
echo "🛑 Stopping any existing Vosk service..."
pkill -f "python.*app_optimized" || true
pkill -f "gunicorn.*app_optimized" || true
# Wait a moment
sleep 2
# Start the service with proper configuration
echo "🚀 Starting Vosk service with proper configuration..."
cd ../vosk_service
# Use the startup script that handles configuration properly
python start_service.py &
VOSK_PID=$!
echo "✅ Vosk service started with PID: $VOSK_PID"
# Wait for service to be ready
echo "⏳ Waiting for service to be ready..."
for i in {1..30}; do
if curl -s http://localhost:5000/ > /dev/null; then
echo "✅ Service is ready!"
break
fi
sleep 1
done
echo "🎯 Service should now be running without the multiprocessing error!"
echo "💡 You can now run your optimized processing script."

View File

@@ -21,6 +21,7 @@ vosk>=0.3.45
# Flask for API (if using Flask version) # Flask for API (if using Flask version)
flask>=2.3.0 flask>=2.3.0
gunicorn>=21.0.0
# Additional optimizations # Additional optimizations
uvloop>=0.17.0 # Faster event loop for asyncio uvloop>=0.17.0 # Faster event loop for asyncio

View File

@@ -44,10 +44,20 @@ if ! curl -s http://localhost:5000/ > /dev/null; then
# Start optimized Vosk service # Start optimized Vosk service
cd ../vosk_service cd ../vosk_service
export USE_ASYNC=true
python app_optimized.py & # Choose between async and production mode
VOSK_PID=$! if [ "$USE_ASYNC" = "true" ]; then
echo "✅ Vosk service started with PID: $VOSK_PID" export USE_ASYNC=true
python start_service.py &
VOSK_PID=$!
echo "✅ Vosk async service started with PID: $VOSK_PID"
else
# Use Gunicorn for production multiprocessing
export USE_GUNICORN=true
gunicorn -c gunicorn_config.py app_optimized:app &
VOSK_PID=$!
echo "✅ Vosk Gunicorn service started with PID: $VOSK_PID"
fi
# Wait for service to be ready # Wait for service to be ready
echo "⏳ Waiting for service to be ready..." echo "⏳ Waiting for service to be ready..."

View File

@@ -0,0 +1,80 @@
#!/usr/bin/env python3
"""
Test script to verify Vosk service is working properly.
"""
import requests
import time
import json
def test_service_health():
"""Test if the service is responding"""
try:
response = requests.get("http://localhost:5000/", timeout=5)
if response.status_code == 200:
print("✅ Service is healthy!")
print(f"Response: {response.json()}")
return True
else:
print(f"❌ Service returned status {response.status_code}")
return False
except Exception as e:
print(f"❌ Service not responding: {e}")
return False
def test_batch_processing():
"""Test batch processing functionality"""
try:
# Create a simple test batch
test_data = {
'references': json.dumps(['test sentence 1', 'test sentence 2'])
}
# Create dummy audio files (you'll need real audio files for actual testing)
files = {
'audio0': ('test1.wav', b'dummy_audio_data', 'audio/wav'),
'audio1': ('test2.wav', b'dummy_audio_data', 'audio/wav')
}
response = requests.post(
"http://localhost:5000/batch_confirm",
data=test_data,
files=files,
timeout=30
)
if response.status_code == 200:
print("✅ Batch processing endpoint is working!")
print(f"Response: {response.json()}")
return True
else:
print(f"❌ Batch processing failed: {response.status_code}")
print(f"Response: {response.text}")
return False
except Exception as e:
print(f"❌ Batch processing error: {e}")
return False
def main():
"""Main test function"""
print("🔍 Testing Vosk service...")
# Wait for service to be ready
print("⏳ Waiting for service to be ready...")
for i in range(30):
if test_service_health():
break
time.sleep(1)
else:
print("❌ Service did not become ready within 30 seconds")
return
# Test batch processing
print("\n🧪 Testing batch processing...")
test_batch_processing()
print("\n✅ Service testing complete!")
if __name__ == "__main__":
main()

View File

@@ -267,5 +267,11 @@ if __name__ == '__main__':
app = create_async_app() app = create_async_app()
web.run_app(app, host='0.0.0.0', port=5000) web.run_app(app, host='0.0.0.0', port=5000)
else: else:
# Run Flask version # Run Flask version with proper multiprocessing setup
app.run(host='0.0.0.0', port=5000, threaded=True, processes=4) # Use Gunicorn for production multiprocessing
if os.getenv('USE_GUNICORN', 'false').lower() == 'true':
# This will be handled by Gunicorn
app.run(host='0.0.0.0', port=5000)
else:
# Use threading for development
app.run(host='0.0.0.0', port=5000, threaded=True)

View File

@@ -0,0 +1,43 @@
# Gunicorn configuration for Vosk service with 192-core optimization
# Server socket
bind = "0.0.0.0:5000"
backlog = 2048
# Worker processes
workers = 48 # 192/4 for optimal worker count
worker_class = "sync"
worker_connections = 1000
max_requests = 1000
max_requests_jitter = 50
preload_app = True
# Timeout settings
timeout = 300
keepalive = 2
graceful_timeout = 30
# Logging
accesslog = "-"
errorlog = "-"
loglevel = "info"
access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s" %(D)s'
# Process naming
proc_name = "vosk-service"
# Security
limit_request_line = 4094
limit_request_fields = 100
limit_request_field_size = 8190
# Performance
worker_tmp_dir = "/dev/shm"
worker_exit_on_app_exit = True
# Memory management
max_requests_jitter = 50
preload_app = True
# Worker lifecycle
worker_abort_on_app_exit = True

View File

@@ -0,0 +1,38 @@
#!/usr/bin/env python3
"""
Simple startup script for Vosk service that handles Flask configuration properly.
"""
import os
import sys
from app_optimized import app, load_model
def main():
"""Start the Vosk service with proper configuration"""
# Load the model first
print("Loading Vosk model...")
load_model()
# Determine the mode to run in
use_async = os.getenv('USE_ASYNC', 'false').lower() == 'true'
use_gunicorn = os.getenv('USE_GUNICORN', 'false').lower() == 'true'
if use_async:
print("Starting async aiohttp service...")
from aiohttp import web
from app_optimized import create_async_app
app_async = create_async_app()
web.run_app(app_async, host='0.0.0.0', port=5000)
elif use_gunicorn:
print("Starting with Gunicorn (production mode)...")
# Gunicorn will handle the app
app.run(host='0.0.0.0', port=5000)
else:
print("Starting Flask service with threading...")
# Use threading only (no multiprocessing in Flask)
app.run(host='0.0.0.0', port=5000, threaded=True)
if __name__ == '__main__':
main()