optimization

2025-08-02 17:46:06 +03:30
parent 474245fe83
commit 0d151529f0
7 changed files with 1209 additions and 0 deletions
--- a/vosk/vosk_service/app_optimized.py
+++ b/vosk/vosk_service/app_optimized.py
@@ -0,0 +1,271 @@
+from flask import Flask, request, jsonify
+from vosk import Model, KaldiRecognizer
+import soundfile as sf
+import io
+import os
+import json
+import numpy as np
+from multiprocessing import Process, Queue, Pool, cpu_count
+import difflib
+import asyncio
+import aiohttp
+from aiohttp import web
+import logging
+from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
+import time
+
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+# Configuration for high-performance processing
+NUM_WORKERS = 192  # Use all available cores
+BATCH_SIZE = 32
+MAX_CONCURRENT_PROCESSES = 48
+
+MODEL_PATH = "/app/model"
+
+# Global model instance (shared across processes)
+model = None
+
+def load_model():
+    """Load the Vosk model"""
+    global model
+    print(f"Checking for model at: {MODEL_PATH}")
+    if os.path.exists(MODEL_PATH):
+        print(f"Model directory exists at {MODEL_PATH}")
+        print(f"Contents: {os.listdir(MODEL_PATH)}")
+        try:
+            model = Model(MODEL_PATH)
+            print("Model loaded successfully!")
+            return model
+        except Exception as e:
+            print(f"Error loading model: {e}")
+            raise RuntimeError(f"Failed to load Vosk model: {e}")
+    else:
+        print(f"Model directory not found at {MODEL_PATH}")
+        raise RuntimeError(f"Vosk model not found at {MODEL_PATH}. Please download and mount a model.")
+
+def similarity(a, b):
+    """Calculate similarity between two strings"""
+    return difflib.SequenceMatcher(None, a, b).ratio()
+
+def confirm_voice_process(args):
+    """Process a single audio file in a separate process"""
+    audio_bytes, reference_text, samplerate = args
+    
+    try:
+        data, _ = sf.read(io.BytesIO(audio_bytes))
+        if len(data.shape) > 1:
+            data = data[:, 0]
+        if data.dtype != np.int16:
+            data = (data * 32767).astype(np.int16)
+        
+        # Create recognizer in this process
+        local_model = Model(MODEL_PATH)
+        recognizer = KaldiRecognizer(local_model, samplerate)
+        recognizer.AcceptWaveform(data.tobytes())
+        result = recognizer.Result()
+        text = json.loads(result).get('text', '')
+        sim = similarity(text, reference_text)
+        
+        return {
+            'transcription': text, 
+            'similarity': sim, 
+            'confirmed': sim > 0.2
+        }
+    except Exception as e:
+        logger.error(f"Error processing audio: {e}")
+        return {
+            'transcription': '', 
+            'similarity': 0.0, 
+            'confirmed': False
+        }
+
+def process_batch_parallel(audio_files, references):
+    """Process a batch of audio files using parallel processing"""
+    # Prepare data for parallel processing
+    samplerates = []
+    for audio_bytes in audio_files:
+        data, samplerate = sf.read(io.BytesIO(audio_bytes))
+        samplerates.append(samplerate)
+    
+    # Prepare arguments for parallel processing
+    process_args = [
+        (audio_bytes, reference_text, samplerate) 
+        for audio_bytes, reference_text, samplerate in zip(audio_files, references, samplerates)
+    ]
+    
+    # Use ProcessPoolExecutor for parallel processing
+    with ProcessPoolExecutor(max_workers=MAX_CONCURRENT_PROCESSES) as executor:
+        results = list(executor.map(confirm_voice_process, process_args))
+    
+    return results
+
+# Flask app for backward compatibility
+app = Flask(__name__)
+
+@app.route('/', methods=['GET'])
+def health_check():
+    return jsonify({'status': 'ok', 'service': 'vosk-transcription-api', 'model': 'persian'})
+
+@app.route('/batch_confirm', methods=['POST'])
+def batch_confirm():
+    """Handle batch confirmation requests"""
+    start_time = time.time()
+    
+    # Parse request
+    references = request.form.get('references')
+    if not references:
+        return jsonify({'error': 'Missing references'}), 400
+    try:
+        references = json.loads(references)
+    except Exception:
+        return jsonify({'error': 'Invalid references JSON'}), 400
+    
+    # Get audio files
+    audio_files = []
+    for i in range(len(references)):
+        audio_file = request.files.get(f'audio{i}')
+        if not audio_file:
+            return jsonify({'error': f'Missing audio file audio{i}'}), 400
+        audio_files.append(audio_file.read())
+    
+    # Process batch in parallel
+    results = process_batch_parallel(audio_files, references)
+    
+    processing_time = time.time() - start_time
+    logger.info(f"Processed batch of {len(results)} files in {processing_time:.2f}s")
+    
+    return jsonify({'results': results})
+
+@app.route('/transcribe', methods=['POST'])
+def transcribe():
+    """Handle single transcription request"""
+    if 'audio' not in request.files:
+        return jsonify({'error': 'No audio file provided'}), 400
+    
+    audio_file = request.files['audio']
+    audio_bytes = audio_file.read()
+    
+    try:
+        data, samplerate = sf.read(io.BytesIO(audio_bytes))
+        if len(data.shape) > 1:
+            data = data[:, 0]
+        if data.dtype != np.int16:
+            data = (data * 32767).astype(np.int16)
+        
+        recognizer = KaldiRecognizer(model, samplerate)
+        recognizer.AcceptWaveform(data.tobytes())
+        result = recognizer.Result()
+        text = json.loads(result).get('text', '')
+        
+        return jsonify({'transcription': text})
+    except Exception as e:
+        logger.error(f"Error in transcription: {e}")
+        return jsonify({'error': str(e)}), 500
+
+# Async version using aiohttp for better performance
+async def async_batch_confirm(request):
+    """Async version of batch confirmation"""
+    start_time = time.time()
+    
+    # Parse multipart data
+    data = await request.post()
+    
+    # Get references
+    references_text = data.get('references')
+    if not references_text:
+        return web.json_response({'error': 'Missing references'}, status=400)
+    
+    try:
+        references = json.loads(references_text)
+    except Exception:
+        return web.json_response({'error': 'Invalid references JSON'}, status=400)
+    
+    # Get audio files
+    audio_files = []
+    for i in range(len(references)):
+        audio_file = data.get(f'audio{i}')
+        if not audio_file:
+            return web.json_response({'error': f'Missing audio file audio{i}'}, status=400)
+        
+        audio_bytes = await audio_file.read()
+        audio_files.append(audio_bytes)
+    
+    # Process in thread pool to avoid blocking
+    loop = asyncio.get_event_loop()
+    with ThreadPoolExecutor(max_workers=MAX_CONCURRENT_PROCESSES) as executor:
+        results = await loop.run_in_executor(
+            executor, 
+            process_batch_parallel, 
+            audio_files, 
+            references
+        )
+    
+    processing_time = time.time() - start_time
+    logger.info(f"Async processed batch of {len(results)} files in {processing_time:.2f}s")
+    
+    return web.json_response({'results': results})
+
+async def async_transcribe(request):
+    """Async version of single transcription"""
+    data = await request.post()
+    
+    if 'audio' not in data:
+        return web.json_response({'error': 'No audio file provided'}, status=400)
+    
+    audio_file = data['audio']
+    audio_bytes = await audio_file.read()
+    
+    try:
+        data, samplerate = sf.read(io.BytesIO(audio_bytes))
+        if len(data.shape) > 1:
+            data = data[:, 0]
+        if data.dtype != np.int16:
+            data = (data * 32767).astype(np.int16)
+        
+        recognizer = KaldiRecognizer(model, samplerate)
+        recognizer.AcceptWaveform(data.tobytes())
+        result = recognizer.Result()
+        text = json.loads(result).get('text', '')
+        
+        return web.json_response({'transcription': text})
+    except Exception as e:
+        logger.error(f"Error in async transcription: {e}")
+        return web.json_response({'error': str(e)}, status=500)
+
+async def health_check_async(request):
+    """Async health check"""
+    return web.json_response({
+        'status': 'ok', 
+        'service': 'vosk-transcription-api-async', 
+        'model': 'persian',
+        'workers': MAX_CONCURRENT_PROCESSES
+    })
+
+def create_async_app():
+    """Create async aiohttp app"""
+    app = web.Application()
+    
+    # Add routes
+    app.router.add_get('/', health_check_async)
+    app.router.add_post('/batch_confirm', async_batch_confirm)
+    app.router.add_post('/transcribe', async_transcribe)
+    
+    return app
+
+if __name__ == '__main__':
+    # Load model
+    load_model()
+    
+    # Choose between Flask and aiohttp based on environment
+    use_async = os.getenv('USE_ASYNC', 'false').lower() == 'true'
+    
+    if use_async:
+        # Run async version
+        app = create_async_app()
+        web.run_app(app, host='0.0.0.0', port=5000)
+    else:
+        # Run Flask version
+        app.run(host='0.0.0.0', port=5000, threaded=True, processes=4)