89 lines
3.0 KiB
Python
89 lines
3.0 KiB
Python
import sys
|
|
import os
|
|
import pandas as pd
|
|
import numpy as np
|
|
import sounddevice as sd
|
|
from PyQt5.QtWidgets import (
|
|
QApplication, QWidget, QLabel, QPushButton, QVBoxLayout, QHBoxLayout, QMessageBox
|
|
)
|
|
|
|
parquet_path = os.path.join('confirmed_dataset', 'confirmed_shard_00.parquet')
|
|
df = pd.read_parquet(parquet_path)
|
|
results = []
|
|
|
|
class AudioReviewer(QWidget):
|
|
def __init__(self, df):
|
|
super().__init__()
|
|
self.df = df
|
|
self.idx = 0
|
|
self.total = len(df)
|
|
self.audio = None
|
|
self.transcription = None
|
|
|
|
self.setWindowTitle("Human Audio Confirmation GUI (PyQt5)")
|
|
self.setGeometry(100, 100, 600, 200)
|
|
|
|
self.label = QLabel(f"Sample 1/{self.total}", self)
|
|
self.trans_label = QLabel("", self)
|
|
self.play_button = QPushButton("Play Audio", self)
|
|
self.yes_button = QPushButton("Yes (Correct)", self)
|
|
self.no_button = QPushButton("No (Incorrect)", self)
|
|
self.skip_button = QPushButton("Skip", self)
|
|
self.quit_button = QPushButton("Quit", self)
|
|
|
|
self.play_button.clicked.connect(self.play_audio)
|
|
self.yes_button.clicked.connect(lambda: self.save_and_next('y'))
|
|
self.no_button.clicked.connect(lambda: self.save_and_next('n'))
|
|
self.skip_button.clicked.connect(lambda: self.save_and_next('skip'))
|
|
self.quit_button.clicked.connect(self.quit)
|
|
|
|
vbox = QVBoxLayout()
|
|
vbox.addWidget(self.label)
|
|
vbox.addWidget(self.trans_label)
|
|
vbox.addWidget(self.play_button)
|
|
|
|
hbox = QHBoxLayout()
|
|
hbox.addWidget(self.yes_button)
|
|
hbox.addWidget(self.no_button)
|
|
hbox.addWidget(self.skip_button)
|
|
hbox.addWidget(self.quit_button)
|
|
vbox.addLayout(hbox)
|
|
|
|
self.setLayout(vbox)
|
|
self.load_sample()
|
|
|
|
def load_sample(self):
|
|
if self.idx >= self.total:
|
|
QMessageBox.information(self, "Done", "All samples reviewed!")
|
|
self.quit()
|
|
return
|
|
row = self.df.iloc[self.idx]
|
|
# Convert bytes back to numpy array
|
|
audio_bytes = row['audio']
|
|
self.audio = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32767.0 # Convert int16 to float32
|
|
self.transcription = row['text'] # Use 'text' column instead of 'transcription'
|
|
self.label.setText(f"Sample {self.idx+1}/{self.total}")
|
|
self.trans_label.setText(f"Transcription: {self.transcription}")
|
|
|
|
def play_audio(self):
|
|
sd.play(self.audio, 16000)
|
|
sd.wait()
|
|
|
|
def save_and_next(self, result):
|
|
results.append({
|
|
'index': self.idx,
|
|
'transcription': self.transcription,
|
|
'result': result
|
|
})
|
|
self.idx += 1
|
|
self.load_sample()
|
|
|
|
def quit(self):
|
|
pd.DataFrame(results).to_csv('human_confirmed_results.csv', index=False)
|
|
self.close()
|
|
|
|
if __name__ == "__main__":
|
|
app = QApplication(sys.argv)
|
|
reviewer = AudioReviewer(df)
|
|
reviewer.show()
|
|
sys.exit(app.exec_()) |