Enhance batch_confirm_hf_optimized.py to ensure torchcodec is installed before loading the dataset, and update requirements_optimized.txt to include torchcodec. Modify run_optimized_192cores_no_root.sh to install additional audio dependencies and test audio imports.
This commit is contained in:
@@ -27,6 +27,16 @@ BATCH_SIZE = 32 # Increased batch size for better throughput
|
||||
MAX_CONCURRENT_REQUESTS = 48 # 192/4 for optimal concurrency
|
||||
CHUNK_SIZE = 1000 # Process data in chunks to manage memory
|
||||
|
||||
# Ensure torchcodec is installed before loading dataset
|
||||
try:
|
||||
import torchcodec
|
||||
except ImportError:
|
||||
print("Installing torchcodec...")
|
||||
import subprocess
|
||||
import sys
|
||||
subprocess.check_call([sys.executable, "-m", "pip", "install", "torchcodec>=0.1.0"])
|
||||
import torchcodec
|
||||
|
||||
# Load the dataset with audio decoding
|
||||
print("Loading dataset...")
|
||||
ds = load_dataset(
|
||||
|
||||
Reference in New Issue
Block a user