Enhance batch_confirm_hf_optimized.py to ensure torchcodec is installed before loading the dataset, and update requirements_optimized.txt to include torchcodec. Modify run_optimized_192cores_no_root.sh to install additional audio dependencies and test audio imports.
This commit is contained in:
@@ -27,6 +27,16 @@ BATCH_SIZE = 32 # Increased batch size for better throughput
|
|||||||
MAX_CONCURRENT_REQUESTS = 48 # 192/4 for optimal concurrency
|
MAX_CONCURRENT_REQUESTS = 48 # 192/4 for optimal concurrency
|
||||||
CHUNK_SIZE = 1000 # Process data in chunks to manage memory
|
CHUNK_SIZE = 1000 # Process data in chunks to manage memory
|
||||||
|
|
||||||
|
# Ensure torchcodec is installed before loading dataset
|
||||||
|
try:
|
||||||
|
import torchcodec
|
||||||
|
except ImportError:
|
||||||
|
print("Installing torchcodec...")
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
subprocess.check_call([sys.executable, "-m", "pip", "install", "torchcodec>=0.1.0"])
|
||||||
|
import torchcodec
|
||||||
|
|
||||||
# Load the dataset with audio decoding
|
# Load the dataset with audio decoding
|
||||||
print("Loading dataset...")
|
print("Loading dataset...")
|
||||||
ds = load_dataset(
|
ds = load_dataset(
|
||||||
|
|||||||
318
vosk/test_files/batch_confirm_hf_simple.py
Normal file
318
vosk/test_files/batch_confirm_hf_simple.py
Normal file
@@ -0,0 +1,318 @@
|
|||||||
|
import asyncio
|
||||||
|
import aiohttp
|
||||||
|
import multiprocessing as mp
|
||||||
|
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
|
||||||
|
import soundfile as sf
|
||||||
|
import requests
|
||||||
|
import os
|
||||||
|
from tqdm import tqdm
|
||||||
|
import pandas as pd
|
||||||
|
import json
|
||||||
|
import pyarrow as pa
|
||||||
|
import pyarrow.parquet as pq
|
||||||
|
import numpy as np
|
||||||
|
from huggingface_hub import HfApi, create_repo
|
||||||
|
from datasets import load_dataset, Audio, Dataset
|
||||||
|
import time
|
||||||
|
from functools import partial
|
||||||
|
import logging
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Configuration for 192 cores
|
||||||
|
NUM_CORES = 192
|
||||||
|
BATCH_SIZE = 32 # Increased batch size for better throughput
|
||||||
|
MAX_CONCURRENT_REQUESTS = 48 # 192/4 for optimal concurrency
|
||||||
|
CHUNK_SIZE = 1000 # Process data in chunks to manage memory
|
||||||
|
|
||||||
|
# Load the dataset without audio decoding first
|
||||||
|
print("Loading dataset...")
|
||||||
|
ds = load_dataset(
|
||||||
|
"Ashegh-Sad-Warrior/Persian_Common_Voice_17_0",
|
||||||
|
split="validated",
|
||||||
|
streaming=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Now cast to audio after loading
|
||||||
|
print("Casting to audio...")
|
||||||
|
ds = ds.cast_column("audio", Audio(sampling_rate=16000))
|
||||||
|
|
||||||
|
output_dir = "confirmed_dataset"
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
API_URL = "http://localhost:5000/batch_confirm"
|
||||||
|
|
||||||
|
# Hugging Face configuration
|
||||||
|
HF_DATASET_NAME = "dpr2000/persian-cv17-confirmed"
|
||||||
|
HF_PRIVATE = True
|
||||||
|
|
||||||
|
def save_flac(audio_array, path):
|
||||||
|
"""Save audio array as FLAC file"""
|
||||||
|
sf.write(path, audio_array, 16000, format="FLAC")
|
||||||
|
|
||||||
|
def process_audio_chunk(audio_data):
|
||||||
|
"""Process a single audio item - designed for multiprocessing"""
|
||||||
|
audio, sentence = audio_data
|
||||||
|
flac_path = f"temp_{hash(audio.tobytes())}.flac"
|
||||||
|
save_flac(audio["array"], flac_path)
|
||||||
|
return {
|
||||||
|
'flac_path': flac_path,
|
||||||
|
'sentence': sentence,
|
||||||
|
'audio_array': audio["array"]
|
||||||
|
}
|
||||||
|
|
||||||
|
async def send_batch_request(session, batch_data, batch_id):
|
||||||
|
"""Send a single batch request asynchronously"""
|
||||||
|
files = {}
|
||||||
|
references = []
|
||||||
|
temp_flacs = []
|
||||||
|
audio_arrays = []
|
||||||
|
|
||||||
|
for j, item in enumerate(batch_data):
|
||||||
|
files[f"audio{j}"] = open(item['flac_path'], "rb")
|
||||||
|
references.append(item['sentence'])
|
||||||
|
temp_flacs.append(item['flac_path'])
|
||||||
|
audio_arrays.append(item['audio_array'])
|
||||||
|
|
||||||
|
data = {"references": json.dumps(references)}
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with session.post(API_URL, data=data, files=files, timeout=aiohttp.ClientTimeout(total=120)) as response:
|
||||||
|
if response.status == 200:
|
||||||
|
resp_json = await response.json()
|
||||||
|
if "results" in resp_json:
|
||||||
|
results = resp_json["results"]
|
||||||
|
else:
|
||||||
|
logger.warning(f"Batch {batch_id} failed: 'results' key missing")
|
||||||
|
results = [None] * len(references)
|
||||||
|
else:
|
||||||
|
logger.error(f"Batch {batch_id} failed: HTTP {response.status}")
|
||||||
|
results = [None] * len(references)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Batch {batch_id} failed: {e}")
|
||||||
|
results = [None] * len(references)
|
||||||
|
finally:
|
||||||
|
# Clean up files
|
||||||
|
for f in files.values():
|
||||||
|
f.close()
|
||||||
|
for flac_path in temp_flacs:
|
||||||
|
try:
|
||||||
|
os.remove(flac_path)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Process results
|
||||||
|
confirmed_items = []
|
||||||
|
for j, result in enumerate(results):
|
||||||
|
if result and result.get("confirmed"):
|
||||||
|
confirmed_items.append({
|
||||||
|
"audio": audio_arrays[j],
|
||||||
|
"transcription": references[j]
|
||||||
|
})
|
||||||
|
|
||||||
|
return confirmed_items
|
||||||
|
|
||||||
|
async def process_dataset_async():
|
||||||
|
"""Main async processing function"""
|
||||||
|
confirmed = []
|
||||||
|
|
||||||
|
# Prepare all audio data first using multiprocessing
|
||||||
|
print("Preparing audio data with multiprocessing...")
|
||||||
|
audio_data = [(ds[i]["audio"], ds[i]["sentence"]) for i in range(len(ds))]
|
||||||
|
|
||||||
|
# Use ProcessPoolExecutor for CPU-intensive audio processing
|
||||||
|
with ProcessPoolExecutor(max_workers=NUM_CORES) as executor:
|
||||||
|
processed_audio = list(tqdm(
|
||||||
|
executor.map(process_audio_chunk, audio_data),
|
||||||
|
total=len(audio_data),
|
||||||
|
desc="Processing audio files"
|
||||||
|
))
|
||||||
|
|
||||||
|
# Create batches
|
||||||
|
batches = []
|
||||||
|
for i in range(0, len(processed_audio), BATCH_SIZE):
|
||||||
|
batch = processed_audio[i:i+BATCH_SIZE]
|
||||||
|
batches.append((batch, i // BATCH_SIZE))
|
||||||
|
|
||||||
|
print(f"Processing {len(batches)} batches with {MAX_CONCURRENT_REQUESTS} concurrent requests...")
|
||||||
|
|
||||||
|
# Process batches asynchronously
|
||||||
|
async with aiohttp.ClientSession(
|
||||||
|
connector=aiohttp.TCPConnector(limit=MAX_CONCURRENT_REQUESTS),
|
||||||
|
timeout=aiohttp.ClientTimeout(total=300)
|
||||||
|
) as session:
|
||||||
|
tasks = []
|
||||||
|
for batch_data, batch_id in batches:
|
||||||
|
task = send_batch_request(session, batch_data, batch_id)
|
||||||
|
tasks.append(task)
|
||||||
|
|
||||||
|
# Process in chunks to avoid overwhelming the system
|
||||||
|
chunk_size = MAX_CONCURRENT_REQUESTS
|
||||||
|
for i in range(0, len(tasks), chunk_size):
|
||||||
|
chunk_tasks = tasks[i:i+chunk_size]
|
||||||
|
results = await asyncio.gather(*chunk_tasks, return_exceptions=True)
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
if isinstance(result, Exception):
|
||||||
|
logger.error(f"Task failed: {result}")
|
||||||
|
else:
|
||||||
|
confirmed.extend(result)
|
||||||
|
|
||||||
|
print(f"Processed {min(i+chunk_size, len(tasks))}/{len(tasks)} batches, confirmed: {len(confirmed)}")
|
||||||
|
|
||||||
|
return confirmed
|
||||||
|
|
||||||
|
def save_confirmed_data_parallel(confirmed):
|
||||||
|
"""Save confirmed data using parallel processing"""
|
||||||
|
if not confirmed:
|
||||||
|
print("❌ No confirmed samples to save")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"\n🔄 Saving {len(confirmed)} confirmed samples...")
|
||||||
|
|
||||||
|
def extract_minimal(example):
|
||||||
|
"""Convert audio to int16 format"""
|
||||||
|
audio_float32 = np.array(example["audio"], dtype=np.float32)
|
||||||
|
audio_float32 = np.clip(audio_float32, -1.0, 1.0)
|
||||||
|
audio_int16 = (audio_float32 * 32767).astype(np.int16)
|
||||||
|
return {
|
||||||
|
"audio": audio_int16.tobytes(),
|
||||||
|
"text": example["transcription"]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create dataset from confirmed samples
|
||||||
|
confirmed_dataset = Dataset.from_list(confirmed)
|
||||||
|
confirmed_dataset = confirmed_dataset.map(
|
||||||
|
extract_minimal,
|
||||||
|
remove_columns=confirmed_dataset.column_names,
|
||||||
|
num_proc=NUM_CORES # Use all cores for dataset processing
|
||||||
|
)
|
||||||
|
|
||||||
|
# Optimize sharding for parallel writing
|
||||||
|
num_shards = min(50, len(confirmed)) # More shards for better parallelization
|
||||||
|
shard_size = len(confirmed_dataset) // num_shards + 1
|
||||||
|
|
||||||
|
def write_shard(shard_info):
|
||||||
|
"""Write a single shard - designed for multiprocessing"""
|
||||||
|
i, start, end = shard_info
|
||||||
|
if start >= len(confirmed_dataset):
|
||||||
|
return None
|
||||||
|
|
||||||
|
shard = confirmed_dataset.select(range(start, end))
|
||||||
|
table = pa.Table.from_pandas(shard.to_pandas())
|
||||||
|
|
||||||
|
shard_path = os.path.join(output_dir, f"confirmed_shard_{i:03}.parquet")
|
||||||
|
|
||||||
|
pq.write_table(
|
||||||
|
table,
|
||||||
|
shard_path,
|
||||||
|
compression="zstd",
|
||||||
|
compression_level=22,
|
||||||
|
use_dictionary=True,
|
||||||
|
version="2.6"
|
||||||
|
)
|
||||||
|
|
||||||
|
return f"Shard {i+1}: {len(shard)} samples saved to {shard_path}"
|
||||||
|
|
||||||
|
# Prepare shard information
|
||||||
|
shard_info = []
|
||||||
|
for i in range(num_shards):
|
||||||
|
start = i * shard_size
|
||||||
|
end = min(len(confirmed_dataset), (i + 1) * shard_size)
|
||||||
|
shard_info.append((i, start, end))
|
||||||
|
|
||||||
|
# Write shards in parallel
|
||||||
|
print(f"Writing {num_shards} shards in parallel...")
|
||||||
|
with ProcessPoolExecutor(max_workers=NUM_CORES) as executor:
|
||||||
|
results = list(tqdm(
|
||||||
|
executor.map(write_shard, shard_info),
|
||||||
|
total=len(shard_info),
|
||||||
|
desc="Writing shards"
|
||||||
|
))
|
||||||
|
|
||||||
|
# Print results
|
||||||
|
for result in results:
|
||||||
|
if result:
|
||||||
|
print(f"🔹 {result}")
|
||||||
|
|
||||||
|
print(f"\n✅ All confirmed data saved in {num_shards} shards in `{output_dir}/`")
|
||||||
|
|
||||||
|
return num_shards
|
||||||
|
|
||||||
|
async def upload_to_hf(num_shards):
|
||||||
|
"""Upload to Hugging Face Hub"""
|
||||||
|
print(f"\n🚀 Pushing dataset to Hugging Face Hub as '{HF_DATASET_NAME}'...")
|
||||||
|
try:
|
||||||
|
api = HfApi(token=os.getenv("HF_TOKEN"))
|
||||||
|
|
||||||
|
# Create repository
|
||||||
|
try:
|
||||||
|
create_repo(
|
||||||
|
repo_id=HF_DATASET_NAME,
|
||||||
|
repo_type="dataset",
|
||||||
|
private=HF_PRIVATE,
|
||||||
|
exist_ok=True
|
||||||
|
)
|
||||||
|
print(f"✅ Repository '{HF_DATASET_NAME}' created/verified")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ Repository creation failed: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Create dataset info
|
||||||
|
dataset_info = {
|
||||||
|
"dataset_name": HF_DATASET_NAME,
|
||||||
|
"description": "Persian Common Voice confirmed samples for Whisper fine-tuning",
|
||||||
|
"total_samples": len(confirmed),
|
||||||
|
"num_shards": num_shards,
|
||||||
|
"audio_format": "int16 PCM, 16kHz",
|
||||||
|
"columns": ["audio", "text"],
|
||||||
|
"source_dataset": "Ashegh-Sad-Warrior/Persian_Common_Voice_17_0",
|
||||||
|
"processing": "Vosk API batch confirmation (optimized for 192 cores)"
|
||||||
|
}
|
||||||
|
|
||||||
|
info_path = os.path.join(output_dir, "dataset_info.json")
|
||||||
|
with open(info_path, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(dataset_info, f, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
# Upload folder
|
||||||
|
api.upload_folder(
|
||||||
|
folder_path=output_dir,
|
||||||
|
repo_id=HF_DATASET_NAME,
|
||||||
|
repo_type="dataset",
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"🎉 Dataset successfully pushed to: https://huggingface.co/datasets/{HF_DATASET_NAME}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Failed to push to Hugging Face: {e}")
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
"""Main function"""
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
print(f"🚀 Starting optimized processing with {NUM_CORES} cores")
|
||||||
|
print(f"📊 Dataset size: {len(ds)} samples")
|
||||||
|
print(f"⚙️ Batch size: {BATCH_SIZE}")
|
||||||
|
print(f"🔄 Max concurrent requests: {MAX_CONCURRENT_REQUESTS}")
|
||||||
|
|
||||||
|
# Process dataset
|
||||||
|
confirmed = await process_dataset_async()
|
||||||
|
|
||||||
|
# Save data
|
||||||
|
num_shards = save_confirmed_data_parallel(confirmed)
|
||||||
|
|
||||||
|
# Upload to HF
|
||||||
|
await upload_to_hf(num_shards)
|
||||||
|
|
||||||
|
end_time = time.time()
|
||||||
|
print(f"\n⏱️ Total processing time: {end_time - start_time:.2f} seconds")
|
||||||
|
print(f"📈 Processing rate: {len(ds) / (end_time - start_time):.2f} samples/second")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Set multiprocessing start method for better performance
|
||||||
|
mp.set_start_method('spawn', force=True)
|
||||||
|
|
||||||
|
# Run the async main function
|
||||||
|
asyncio.run(main())
|
||||||
31
vosk/test_files/fix_torchcodec.sh
Normal file
31
vosk/test_files/fix_torchcodec.sh
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Fix torchcodec dependency issue
|
||||||
|
|
||||||
|
echo "🔧 Fixing torchcodec dependency..."
|
||||||
|
|
||||||
|
# Activate virtual environment if it exists
|
||||||
|
if [ -d ".venv" ]; then
|
||||||
|
echo "🔧 Activating virtual environment..."
|
||||||
|
source .venv/bin/activate
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Install torchcodec
|
||||||
|
echo "📦 Installing torchcodec..."
|
||||||
|
uv pip install torchcodec>=0.1.0
|
||||||
|
|
||||||
|
# Also install other audio-related dependencies
|
||||||
|
echo "📦 Installing additional audio dependencies..."
|
||||||
|
uv pip install librosa>=0.10.0
|
||||||
|
uv pip install ffmpeg-python>=0.2.0
|
||||||
|
|
||||||
|
# Test the import
|
||||||
|
echo "🧪 Testing audio imports..."
|
||||||
|
python test_audio_deps.py
|
||||||
|
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
echo "🎯 torchcodec dependency fixed!"
|
||||||
|
echo "💡 You can now run the optimized processing script."
|
||||||
|
else
|
||||||
|
echo "❌ Failed to install torchcodec. Please check your system."
|
||||||
|
fi
|
||||||
33
vosk/test_files/install_and_run.sh
Normal file
33
vosk/test_files/install_and_run.sh
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Install torchcodec and run optimized processing
|
||||||
|
|
||||||
|
echo "🔧 Installing torchcodec and running optimized processing..."
|
||||||
|
|
||||||
|
# Activate virtual environment if it exists
|
||||||
|
if [ -d ".venv" ]; then
|
||||||
|
echo "🔧 Activating virtual environment..."
|
||||||
|
source .venv/bin/activate
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Install torchcodec first
|
||||||
|
echo "📦 Installing torchcodec..."
|
||||||
|
uv pip install torchcodec>=0.1.0
|
||||||
|
|
||||||
|
# Also install other audio dependencies
|
||||||
|
echo "📦 Installing additional audio dependencies..."
|
||||||
|
uv pip install librosa>=0.10.0
|
||||||
|
uv pip install ffmpeg-python>=0.2.0
|
||||||
|
|
||||||
|
# Test the installation
|
||||||
|
echo "🧪 Testing torchcodec installation..."
|
||||||
|
python -c "import torchcodec; print('torchcodec installed successfully')"
|
||||||
|
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "❌ Failed to install torchcodec. Trying alternative installation..."
|
||||||
|
uv pip install --force-reinstall torchcodec>=0.1.0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Run the optimized processing
|
||||||
|
echo "🚀 Running optimized processing..."
|
||||||
|
python batch_confirm_hf_optimized.py
|
||||||
@@ -7,6 +7,7 @@ pandas>=2.0.0
|
|||||||
pyarrow>=12.0.0
|
pyarrow>=12.0.0
|
||||||
numpy>=1.24.0
|
numpy>=1.24.0
|
||||||
huggingface_hub>=0.16.0
|
huggingface_hub>=0.16.0
|
||||||
|
torchcodec>=0.1.0
|
||||||
|
|
||||||
# Async and concurrent processing
|
# Async and concurrent processing
|
||||||
aiohttp>=3.8.0
|
aiohttp>=3.8.0
|
||||||
|
|||||||
34
vosk/test_files/run_now.sh
Normal file
34
vosk/test_files/run_now.sh
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# One-command solution for 192-core optimized processing
|
||||||
|
|
||||||
|
echo "🚀 Setting up 192-core optimized processing..."
|
||||||
|
|
||||||
|
# Make scripts executable
|
||||||
|
chmod +x run_optimized_192cores_no_root.sh
|
||||||
|
chmod +x fix_torchcodec.sh
|
||||||
|
|
||||||
|
# Check if uv is available
|
||||||
|
if ! command -v uv &> /dev/null; then
|
||||||
|
echo "❌ uv not found. Installing uv..."
|
||||||
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
source ~/.cargo/env
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Create virtual environment if it doesn't exist
|
||||||
|
if [ ! -d ".venv" ]; then
|
||||||
|
echo "🔧 Creating virtual environment..."
|
||||||
|
uv venv
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Activate virtual environment
|
||||||
|
echo "🔧 Activating virtual environment..."
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
# Fix torchcodec dependency
|
||||||
|
echo "🔧 Fixing audio dependencies..."
|
||||||
|
./fix_torchcodec.sh
|
||||||
|
|
||||||
|
# Run the optimized processing
|
||||||
|
echo "🚀 Running optimized processing..."
|
||||||
|
./run_optimized_192cores_no_root.sh
|
||||||
@@ -70,6 +70,16 @@ source .venv/bin/activate
|
|||||||
echo "📦 Installing dependencies..."
|
echo "📦 Installing dependencies..."
|
||||||
uv pip install -r requirements_optimized.txt
|
uv pip install -r requirements_optimized.txt
|
||||||
|
|
||||||
|
# Install additional audio dependencies
|
||||||
|
echo "📦 Installing audio dependencies..."
|
||||||
|
uv pip install torchcodec>=0.1.0
|
||||||
|
uv pip install librosa>=0.10.0
|
||||||
|
uv pip install ffmpeg-python>=0.2.0
|
||||||
|
|
||||||
|
# Test audio imports
|
||||||
|
echo "🧪 Testing audio imports..."
|
||||||
|
python test_audio_deps.py
|
||||||
|
|
||||||
# Check if Vosk service is running
|
# Check if Vosk service is running
|
||||||
echo "🔍 Checking Vosk service status..."
|
echo "🔍 Checking Vosk service status..."
|
||||||
if ! curl -s http://localhost:5000/ > /dev/null; then
|
if ! curl -s http://localhost:5000/ > /dev/null; then
|
||||||
|
|||||||
42
vosk/test_files/test_audio_deps.py
Normal file
42
vosk/test_files/test_audio_deps.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Test script for audio dependencies
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_audio_dependencies():
|
||||||
|
"""Test if all audio dependencies are installed"""
|
||||||
|
try:
|
||||||
|
import torchcodec
|
||||||
|
print("torchcodec: OK")
|
||||||
|
except ImportError as e:
|
||||||
|
print(f"torchcodec: FAILED - {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
import datasets
|
||||||
|
print("datasets: OK")
|
||||||
|
except ImportError as e:
|
||||||
|
print(f"datasets: FAILED - {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
import soundfile
|
||||||
|
print("soundfile: OK")
|
||||||
|
except ImportError as e:
|
||||||
|
print(f"soundfile: FAILED - {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
import librosa
|
||||||
|
print("librosa: OK")
|
||||||
|
except ImportError as e:
|
||||||
|
print(f"librosa: FAILED - {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
print("All audio dependencies installed successfully")
|
||||||
|
return True
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
success = test_audio_dependencies()
|
||||||
|
if not success:
|
||||||
|
exit(1)
|
||||||
Reference in New Issue
Block a user