Files
whisper-local/whisper_local/recorder.py
T
2026-04-06 20:25:31 +02:00

64 lines
1.9 KiB
Python

"""Audio-Aufnahme via sounddevice."""
import logging
import numpy as np
import sounddevice as sd
logger = logging.getLogger(__name__)
class Recorder:
def __init__(self, sample_rate: int = 16000, channels: int = 1, min_duration: float = 0.5):
self.sample_rate = sample_rate
self.channels = channels
self.min_duration = min_duration
self.is_recording = False
self._chunks: list[np.ndarray] = []
self._stream: sd.InputStream | None = None
def _audio_callback(self, indata: np.ndarray, frames: int, time_info, status) -> None:
if status:
logger.warning("Audio-Status: %s", status)
self._chunks.append(indata.copy())
def start(self) -> None:
"""Startet die Audioaufnahme."""
self._chunks = []
self.is_recording = True
self._stream = sd.InputStream(
samplerate=self.sample_rate,
channels=self.channels,
dtype=np.float32,
callback=self._audio_callback,
)
self._stream.start()
logger.info("Aufnahme gestartet")
def stop(self) -> np.ndarray | None:
"""Stoppt die Aufnahme. Gibt Audio als 1D-Array zurück oder None wenn zu kurz."""
if not self.is_recording:
return None
self.is_recording = False
if self._stream is not None:
self._stream.stop()
self._stream.close()
self._stream = None
if not self._chunks:
return None
audio = np.concatenate(self._chunks, axis=0)
# Mono: von (N, 1) auf (N,) flatten
if audio.ndim > 1:
audio = audio[:, 0]
duration = len(audio) / self.sample_rate
if duration < self.min_duration:
logger.info("Aufnahme zu kurz (%.2fs < %.2fs), verworfen", duration, self.min_duration)
return None
logger.info("Aufnahme beendet: %.2fs", duration)
return audio