Add Gunicorn support for production in Vosk service and update requirements
This commit is contained in:
36
vosk/test_files/quick_fix.sh
Executable file
36
vosk/test_files/quick_fix.sh
Executable file
@@ -0,0 +1,36 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Quick fix for Flask multiprocessing issue
|
||||||
|
echo "🔧 Applying quick fix for Flask multiprocessing issue..."
|
||||||
|
|
||||||
|
# Kill any existing Vosk service
|
||||||
|
echo "🛑 Stopping any existing Vosk service..."
|
||||||
|
pkill -f "python.*app_optimized" || true
|
||||||
|
pkill -f "gunicorn.*app_optimized" || true
|
||||||
|
|
||||||
|
# Wait a moment
|
||||||
|
sleep 2
|
||||||
|
|
||||||
|
# Start the service with proper configuration
|
||||||
|
echo "🚀 Starting Vosk service with proper configuration..."
|
||||||
|
|
||||||
|
cd ../vosk_service
|
||||||
|
|
||||||
|
# Use the startup script that handles configuration properly
|
||||||
|
python start_service.py &
|
||||||
|
VOSK_PID=$!
|
||||||
|
|
||||||
|
echo "✅ Vosk service started with PID: $VOSK_PID"
|
||||||
|
|
||||||
|
# Wait for service to be ready
|
||||||
|
echo "⏳ Waiting for service to be ready..."
|
||||||
|
for i in {1..30}; do
|
||||||
|
if curl -s http://localhost:5000/ > /dev/null; then
|
||||||
|
echo "✅ Service is ready!"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "🎯 Service should now be running without the multiprocessing error!"
|
||||||
|
echo "💡 You can now run your optimized processing script."
|
||||||
@@ -21,6 +21,7 @@ vosk>=0.3.45
|
|||||||
|
|
||||||
# Flask for API (if using Flask version)
|
# Flask for API (if using Flask version)
|
||||||
flask>=2.3.0
|
flask>=2.3.0
|
||||||
|
gunicorn>=21.0.0
|
||||||
|
|
||||||
# Additional optimizations
|
# Additional optimizations
|
||||||
uvloop>=0.17.0 # Faster event loop for asyncio
|
uvloop>=0.17.0 # Faster event loop for asyncio
|
||||||
|
|||||||
@@ -44,10 +44,20 @@ if ! curl -s http://localhost:5000/ > /dev/null; then
|
|||||||
|
|
||||||
# Start optimized Vosk service
|
# Start optimized Vosk service
|
||||||
cd ../vosk_service
|
cd ../vosk_service
|
||||||
|
|
||||||
|
# Choose between async and production mode
|
||||||
|
if [ "$USE_ASYNC" = "true" ]; then
|
||||||
export USE_ASYNC=true
|
export USE_ASYNC=true
|
||||||
python app_optimized.py &
|
python start_service.py &
|
||||||
VOSK_PID=$!
|
VOSK_PID=$!
|
||||||
echo "✅ Vosk service started with PID: $VOSK_PID"
|
echo "✅ Vosk async service started with PID: $VOSK_PID"
|
||||||
|
else
|
||||||
|
# Use Gunicorn for production multiprocessing
|
||||||
|
export USE_GUNICORN=true
|
||||||
|
gunicorn -c gunicorn_config.py app_optimized:app &
|
||||||
|
VOSK_PID=$!
|
||||||
|
echo "✅ Vosk Gunicorn service started with PID: $VOSK_PID"
|
||||||
|
fi
|
||||||
|
|
||||||
# Wait for service to be ready
|
# Wait for service to be ready
|
||||||
echo "⏳ Waiting for service to be ready..."
|
echo "⏳ Waiting for service to be ready..."
|
||||||
|
|||||||
80
vosk/test_files/test_service.py
Normal file
80
vosk/test_files/test_service.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Test script to verify Vosk service is working properly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
|
||||||
|
def test_service_health():
|
||||||
|
"""Test if the service is responding"""
|
||||||
|
try:
|
||||||
|
response = requests.get("http://localhost:5000/", timeout=5)
|
||||||
|
if response.status_code == 200:
|
||||||
|
print("✅ Service is healthy!")
|
||||||
|
print(f"Response: {response.json()}")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print(f"❌ Service returned status {response.status_code}")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Service not responding: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_batch_processing():
|
||||||
|
"""Test batch processing functionality"""
|
||||||
|
try:
|
||||||
|
# Create a simple test batch
|
||||||
|
test_data = {
|
||||||
|
'references': json.dumps(['test sentence 1', 'test sentence 2'])
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create dummy audio files (you'll need real audio files for actual testing)
|
||||||
|
files = {
|
||||||
|
'audio0': ('test1.wav', b'dummy_audio_data', 'audio/wav'),
|
||||||
|
'audio1': ('test2.wav', b'dummy_audio_data', 'audio/wav')
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
"http://localhost:5000/batch_confirm",
|
||||||
|
data=test_data,
|
||||||
|
files=files,
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
print("✅ Batch processing endpoint is working!")
|
||||||
|
print(f"Response: {response.json()}")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print(f"❌ Batch processing failed: {response.status_code}")
|
||||||
|
print(f"Response: {response.text}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Batch processing error: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main test function"""
|
||||||
|
print("🔍 Testing Vosk service...")
|
||||||
|
|
||||||
|
# Wait for service to be ready
|
||||||
|
print("⏳ Waiting for service to be ready...")
|
||||||
|
for i in range(30):
|
||||||
|
if test_service_health():
|
||||||
|
break
|
||||||
|
time.sleep(1)
|
||||||
|
else:
|
||||||
|
print("❌ Service did not become ready within 30 seconds")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Test batch processing
|
||||||
|
print("\n🧪 Testing batch processing...")
|
||||||
|
test_batch_processing()
|
||||||
|
|
||||||
|
print("\n✅ Service testing complete!")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -267,5 +267,11 @@ if __name__ == '__main__':
|
|||||||
app = create_async_app()
|
app = create_async_app()
|
||||||
web.run_app(app, host='0.0.0.0', port=5000)
|
web.run_app(app, host='0.0.0.0', port=5000)
|
||||||
else:
|
else:
|
||||||
# Run Flask version
|
# Run Flask version with proper multiprocessing setup
|
||||||
app.run(host='0.0.0.0', port=5000, threaded=True, processes=4)
|
# Use Gunicorn for production multiprocessing
|
||||||
|
if os.getenv('USE_GUNICORN', 'false').lower() == 'true':
|
||||||
|
# This will be handled by Gunicorn
|
||||||
|
app.run(host='0.0.0.0', port=5000)
|
||||||
|
else:
|
||||||
|
# Use threading for development
|
||||||
|
app.run(host='0.0.0.0', port=5000, threaded=True)
|
||||||
43
vosk/vosk_service/gunicorn_config.py
Normal file
43
vosk/vosk_service/gunicorn_config.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
# Gunicorn configuration for Vosk service with 192-core optimization
|
||||||
|
|
||||||
|
# Server socket
|
||||||
|
bind = "0.0.0.0:5000"
|
||||||
|
backlog = 2048
|
||||||
|
|
||||||
|
# Worker processes
|
||||||
|
workers = 48 # 192/4 for optimal worker count
|
||||||
|
worker_class = "sync"
|
||||||
|
worker_connections = 1000
|
||||||
|
max_requests = 1000
|
||||||
|
max_requests_jitter = 50
|
||||||
|
preload_app = True
|
||||||
|
|
||||||
|
# Timeout settings
|
||||||
|
timeout = 300
|
||||||
|
keepalive = 2
|
||||||
|
graceful_timeout = 30
|
||||||
|
|
||||||
|
# Logging
|
||||||
|
accesslog = "-"
|
||||||
|
errorlog = "-"
|
||||||
|
loglevel = "info"
|
||||||
|
access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s" %(D)s'
|
||||||
|
|
||||||
|
# Process naming
|
||||||
|
proc_name = "vosk-service"
|
||||||
|
|
||||||
|
# Security
|
||||||
|
limit_request_line = 4094
|
||||||
|
limit_request_fields = 100
|
||||||
|
limit_request_field_size = 8190
|
||||||
|
|
||||||
|
# Performance
|
||||||
|
worker_tmp_dir = "/dev/shm"
|
||||||
|
worker_exit_on_app_exit = True
|
||||||
|
|
||||||
|
# Memory management
|
||||||
|
max_requests_jitter = 50
|
||||||
|
preload_app = True
|
||||||
|
|
||||||
|
# Worker lifecycle
|
||||||
|
worker_abort_on_app_exit = True
|
||||||
38
vosk/vosk_service/start_service.py
Normal file
38
vosk/vosk_service/start_service.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Simple startup script for Vosk service that handles Flask configuration properly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from app_optimized import app, load_model
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Start the Vosk service with proper configuration"""
|
||||||
|
|
||||||
|
# Load the model first
|
||||||
|
print("Loading Vosk model...")
|
||||||
|
load_model()
|
||||||
|
|
||||||
|
# Determine the mode to run in
|
||||||
|
use_async = os.getenv('USE_ASYNC', 'false').lower() == 'true'
|
||||||
|
use_gunicorn = os.getenv('USE_GUNICORN', 'false').lower() == 'true'
|
||||||
|
|
||||||
|
if use_async:
|
||||||
|
print("Starting async aiohttp service...")
|
||||||
|
from aiohttp import web
|
||||||
|
from app_optimized import create_async_app
|
||||||
|
|
||||||
|
app_async = create_async_app()
|
||||||
|
web.run_app(app_async, host='0.0.0.0', port=5000)
|
||||||
|
elif use_gunicorn:
|
||||||
|
print("Starting with Gunicorn (production mode)...")
|
||||||
|
# Gunicorn will handle the app
|
||||||
|
app.run(host='0.0.0.0', port=5000)
|
||||||
|
else:
|
||||||
|
print("Starting Flask service with threading...")
|
||||||
|
# Use threading only (no multiprocessing in Flask)
|
||||||
|
app.run(host='0.0.0.0', port=5000, threaded=True)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user