Files
vosk-datacleaner/whisper-medium-finetuned/config/training_config.yaml
2025-07-31 17:35:08 +03:30

31 lines
646 B
YAML

model:
name: "openai/whisper-medium"
language: "persian"
task: "transcribe"
training:
batch_size: 16
learning_rate: 1e-5
num_epochs: 10
warmup_steps: 500
gradient_accumulation_steps: 2
save_steps: 1000
eval_steps: 500
logging_steps: 100
data:
dataset_path: "confirmed_dataset/confirmed.parquet"
max_audio_length: 30.0
min_audio_length: 1.0
train_split: 0.9
eval_split: 0.1
persian:
use_hazm: true
normalize_text: true
remove_diacritics: true
output:
model_dir: "whisper-medium-finetuned/models"
logs_dir: "whisper-medium-finetuned/logs"
faster_whisper_dir: "whisper-medium-finetuned/faster_whisper"