Files
vosk-datacleaner/vosk/test_files/human_confirm_parquet.py
2025-07-31 17:35:08 +03:30

89 lines
3.0 KiB
Python

import sys
import os
import pandas as pd
import numpy as np
import sounddevice as sd
from PyQt5.QtWidgets import (
QApplication, QWidget, QLabel, QPushButton, QVBoxLayout, QHBoxLayout, QMessageBox
)
parquet_path = os.path.join('confirmed_dataset', 'confirmed_shard_00.parquet')
df = pd.read_parquet(parquet_path)
results = []
class AudioReviewer(QWidget):
def __init__(self, df):
super().__init__()
self.df = df
self.idx = 0
self.total = len(df)
self.audio = None
self.transcription = None
self.setWindowTitle("Human Audio Confirmation GUI (PyQt5)")
self.setGeometry(100, 100, 600, 200)
self.label = QLabel(f"Sample 1/{self.total}", self)
self.trans_label = QLabel("", self)
self.play_button = QPushButton("Play Audio", self)
self.yes_button = QPushButton("Yes (Correct)", self)
self.no_button = QPushButton("No (Incorrect)", self)
self.skip_button = QPushButton("Skip", self)
self.quit_button = QPushButton("Quit", self)
self.play_button.clicked.connect(self.play_audio)
self.yes_button.clicked.connect(lambda: self.save_and_next('y'))
self.no_button.clicked.connect(lambda: self.save_and_next('n'))
self.skip_button.clicked.connect(lambda: self.save_and_next('skip'))
self.quit_button.clicked.connect(self.quit)
vbox = QVBoxLayout()
vbox.addWidget(self.label)
vbox.addWidget(self.trans_label)
vbox.addWidget(self.play_button)
hbox = QHBoxLayout()
hbox.addWidget(self.yes_button)
hbox.addWidget(self.no_button)
hbox.addWidget(self.skip_button)
hbox.addWidget(self.quit_button)
vbox.addLayout(hbox)
self.setLayout(vbox)
self.load_sample()
def load_sample(self):
if self.idx >= self.total:
QMessageBox.information(self, "Done", "All samples reviewed!")
self.quit()
return
row = self.df.iloc[self.idx]
# Convert bytes back to numpy array
audio_bytes = row['audio']
self.audio = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32767.0 # Convert int16 to float32
self.transcription = row['text'] # Use 'text' column instead of 'transcription'
self.label.setText(f"Sample {self.idx+1}/{self.total}")
self.trans_label.setText(f"Transcription: {self.transcription}")
def play_audio(self):
sd.play(self.audio, 16000)
sd.wait()
def save_and_next(self, result):
results.append({
'index': self.idx,
'transcription': self.transcription,
'result': result
})
self.idx += 1
self.load_sample()
def quit(self):
pd.DataFrame(results).to_csv('human_confirmed_results.csv', index=False)
self.close()
if __name__ == "__main__":
app = QApplication(sys.argv)
reviewer = AudioReviewer(df)
reviewer.show()
sys.exit(app.exec_())