Add Gunicorn support for production in Vosk service and update requirements
This commit is contained in:
36
vosk/test_files/quick_fix.sh
Executable file
36
vosk/test_files/quick_fix.sh
Executable file
@@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Quick fix for Flask multiprocessing issue
|
||||
echo "🔧 Applying quick fix for Flask multiprocessing issue..."
|
||||
|
||||
# Kill any existing Vosk service
|
||||
echo "🛑 Stopping any existing Vosk service..."
|
||||
pkill -f "python.*app_optimized" || true
|
||||
pkill -f "gunicorn.*app_optimized" || true
|
||||
|
||||
# Wait a moment
|
||||
sleep 2
|
||||
|
||||
# Start the service with proper configuration
|
||||
echo "🚀 Starting Vosk service with proper configuration..."
|
||||
|
||||
cd ../vosk_service
|
||||
|
||||
# Use the startup script that handles configuration properly
|
||||
python start_service.py &
|
||||
VOSK_PID=$!
|
||||
|
||||
echo "✅ Vosk service started with PID: $VOSK_PID"
|
||||
|
||||
# Wait for service to be ready
|
||||
echo "⏳ Waiting for service to be ready..."
|
||||
for i in {1..30}; do
|
||||
if curl -s http://localhost:5000/ > /dev/null; then
|
||||
echo "✅ Service is ready!"
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
echo "🎯 Service should now be running without the multiprocessing error!"
|
||||
echo "💡 You can now run your optimized processing script."
|
||||
@@ -21,6 +21,7 @@ vosk>=0.3.45
|
||||
|
||||
# Flask for API (if using Flask version)
|
||||
flask>=2.3.0
|
||||
gunicorn>=21.0.0
|
||||
|
||||
# Additional optimizations
|
||||
uvloop>=0.17.0 # Faster event loop for asyncio
|
||||
|
||||
@@ -44,10 +44,20 @@ if ! curl -s http://localhost:5000/ > /dev/null; then
|
||||
|
||||
# Start optimized Vosk service
|
||||
cd ../vosk_service
|
||||
export USE_ASYNC=true
|
||||
python app_optimized.py &
|
||||
VOSK_PID=$!
|
||||
echo "✅ Vosk service started with PID: $VOSK_PID"
|
||||
|
||||
# Choose between async and production mode
|
||||
if [ "$USE_ASYNC" = "true" ]; then
|
||||
export USE_ASYNC=true
|
||||
python start_service.py &
|
||||
VOSK_PID=$!
|
||||
echo "✅ Vosk async service started with PID: $VOSK_PID"
|
||||
else
|
||||
# Use Gunicorn for production multiprocessing
|
||||
export USE_GUNICORN=true
|
||||
gunicorn -c gunicorn_config.py app_optimized:app &
|
||||
VOSK_PID=$!
|
||||
echo "✅ Vosk Gunicorn service started with PID: $VOSK_PID"
|
||||
fi
|
||||
|
||||
# Wait for service to be ready
|
||||
echo "⏳ Waiting for service to be ready..."
|
||||
|
||||
80
vosk/test_files/test_service.py
Normal file
80
vosk/test_files/test_service.py
Normal file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script to verify Vosk service is working properly.
|
||||
"""
|
||||
|
||||
import requests
|
||||
import time
|
||||
import json
|
||||
|
||||
def test_service_health():
|
||||
"""Test if the service is responding"""
|
||||
try:
|
||||
response = requests.get("http://localhost:5000/", timeout=5)
|
||||
if response.status_code == 200:
|
||||
print("✅ Service is healthy!")
|
||||
print(f"Response: {response.json()}")
|
||||
return True
|
||||
else:
|
||||
print(f"❌ Service returned status {response.status_code}")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"❌ Service not responding: {e}")
|
||||
return False
|
||||
|
||||
def test_batch_processing():
|
||||
"""Test batch processing functionality"""
|
||||
try:
|
||||
# Create a simple test batch
|
||||
test_data = {
|
||||
'references': json.dumps(['test sentence 1', 'test sentence 2'])
|
||||
}
|
||||
|
||||
# Create dummy audio files (you'll need real audio files for actual testing)
|
||||
files = {
|
||||
'audio0': ('test1.wav', b'dummy_audio_data', 'audio/wav'),
|
||||
'audio1': ('test2.wav', b'dummy_audio_data', 'audio/wav')
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
"http://localhost:5000/batch_confirm",
|
||||
data=test_data,
|
||||
files=files,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
print("✅ Batch processing endpoint is working!")
|
||||
print(f"Response: {response.json()}")
|
||||
return True
|
||||
else:
|
||||
print(f"❌ Batch processing failed: {response.status_code}")
|
||||
print(f"Response: {response.text}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Batch processing error: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
"""Main test function"""
|
||||
print("🔍 Testing Vosk service...")
|
||||
|
||||
# Wait for service to be ready
|
||||
print("⏳ Waiting for service to be ready...")
|
||||
for i in range(30):
|
||||
if test_service_health():
|
||||
break
|
||||
time.sleep(1)
|
||||
else:
|
||||
print("❌ Service did not become ready within 30 seconds")
|
||||
return
|
||||
|
||||
# Test batch processing
|
||||
print("\n🧪 Testing batch processing...")
|
||||
test_batch_processing()
|
||||
|
||||
print("\n✅ Service testing complete!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -267,5 +267,11 @@ if __name__ == '__main__':
|
||||
app = create_async_app()
|
||||
web.run_app(app, host='0.0.0.0', port=5000)
|
||||
else:
|
||||
# Run Flask version
|
||||
app.run(host='0.0.0.0', port=5000, threaded=True, processes=4)
|
||||
# Run Flask version with proper multiprocessing setup
|
||||
# Use Gunicorn for production multiprocessing
|
||||
if os.getenv('USE_GUNICORN', 'false').lower() == 'true':
|
||||
# This will be handled by Gunicorn
|
||||
app.run(host='0.0.0.0', port=5000)
|
||||
else:
|
||||
# Use threading for development
|
||||
app.run(host='0.0.0.0', port=5000, threaded=True)
|
||||
43
vosk/vosk_service/gunicorn_config.py
Normal file
43
vosk/vosk_service/gunicorn_config.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# Gunicorn configuration for Vosk service with 192-core optimization
|
||||
|
||||
# Server socket
|
||||
bind = "0.0.0.0:5000"
|
||||
backlog = 2048
|
||||
|
||||
# Worker processes
|
||||
workers = 48 # 192/4 for optimal worker count
|
||||
worker_class = "sync"
|
||||
worker_connections = 1000
|
||||
max_requests = 1000
|
||||
max_requests_jitter = 50
|
||||
preload_app = True
|
||||
|
||||
# Timeout settings
|
||||
timeout = 300
|
||||
keepalive = 2
|
||||
graceful_timeout = 30
|
||||
|
||||
# Logging
|
||||
accesslog = "-"
|
||||
errorlog = "-"
|
||||
loglevel = "info"
|
||||
access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s" %(D)s'
|
||||
|
||||
# Process naming
|
||||
proc_name = "vosk-service"
|
||||
|
||||
# Security
|
||||
limit_request_line = 4094
|
||||
limit_request_fields = 100
|
||||
limit_request_field_size = 8190
|
||||
|
||||
# Performance
|
||||
worker_tmp_dir = "/dev/shm"
|
||||
worker_exit_on_app_exit = True
|
||||
|
||||
# Memory management
|
||||
max_requests_jitter = 50
|
||||
preload_app = True
|
||||
|
||||
# Worker lifecycle
|
||||
worker_abort_on_app_exit = True
|
||||
38
vosk/vosk_service/start_service.py
Normal file
38
vosk/vosk_service/start_service.py
Normal file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple startup script for Vosk service that handles Flask configuration properly.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from app_optimized import app, load_model
|
||||
|
||||
def main():
|
||||
"""Start the Vosk service with proper configuration"""
|
||||
|
||||
# Load the model first
|
||||
print("Loading Vosk model...")
|
||||
load_model()
|
||||
|
||||
# Determine the mode to run in
|
||||
use_async = os.getenv('USE_ASYNC', 'false').lower() == 'true'
|
||||
use_gunicorn = os.getenv('USE_GUNICORN', 'false').lower() == 'true'
|
||||
|
||||
if use_async:
|
||||
print("Starting async aiohttp service...")
|
||||
from aiohttp import web
|
||||
from app_optimized import create_async_app
|
||||
|
||||
app_async = create_async_app()
|
||||
web.run_app(app_async, host='0.0.0.0', port=5000)
|
||||
elif use_gunicorn:
|
||||
print("Starting with Gunicorn (production mode)...")
|
||||
# Gunicorn will handle the app
|
||||
app.run(host='0.0.0.0', port=5000)
|
||||
else:
|
||||
print("Starting Flask service with threading...")
|
||||
# Use threading only (no multiprocessing in Flask)
|
||||
app.run(host='0.0.0.0', port=5000, threaded=True)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user