From 949eb679e177593ea5e617c8e651bfba3c1ba980 Mon Sep 17 00:00:00 2001 From: Vitali Graf Date: Mon, 6 Apr 2026 20:25:31 +0200 Subject: [PATCH] feat: add recorder module with sounddevice audio capture Co-Authored-By: Claude Sonnet 4.6 --- tests/test_recorder.py | 65 +++++++++++++++++++++++++++++++++++++++ whisper_local/recorder.py | 63 +++++++++++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 tests/test_recorder.py create mode 100644 whisper_local/recorder.py diff --git a/tests/test_recorder.py b/tests/test_recorder.py new file mode 100644 index 0000000..0f73aaf --- /dev/null +++ b/tests/test_recorder.py @@ -0,0 +1,65 @@ +import numpy as np +import pytest +from unittest.mock import MagicMock, patch + +from whisper_local.recorder import Recorder + + +@pytest.fixture +def recorder(): + return Recorder(sample_rate=16000, channels=1, min_duration=0.5) + + +class TestRecorder: + def test_init(self, recorder): + assert recorder.sample_rate == 16000 + assert recorder.channels == 1 + assert not recorder.is_recording + + def test_start_recording(self, recorder): + with patch("sounddevice.InputStream") as mock_stream_cls: + mock_stream = MagicMock() + mock_stream_cls.return_value = mock_stream + recorder.start() + assert recorder.is_recording + assert len(recorder._chunks) == 0 + recorder.stop() # Cleanup + + def test_stop_without_start_returns_none(self, recorder): + result = recorder.stop() + assert result is None + + def test_add_chunk_and_stop(self, recorder): + with patch("sounddevice.InputStream") as mock_stream_cls: + mock_stream_cls.return_value = MagicMock() + recorder.start() + # Simuliere 1 Sekunde Audio (16000 Samples) + chunk = np.zeros((16000, 1), dtype=np.float32) + recorder._chunks.append(chunk) + result = recorder.stop() + assert result is not None + assert result.shape == (16000,) + assert result.dtype == np.float32 + + def test_short_audio_returns_none(self, recorder): + with patch("sounddevice.InputStream") as mock_stream_cls: + mock_stream_cls.return_value = MagicMock() + recorder.start() + # Nur 0.1s Audio — unter min_duration + chunk = np.zeros((1600, 1), dtype=np.float32) + recorder._chunks.append(chunk) + result = recorder.stop() + assert result is None + + def test_multiple_chunks_concatenated(self, recorder): + with patch("sounddevice.InputStream") as mock_stream_cls: + mock_stream_cls.return_value = MagicMock() + recorder.start() + chunk1 = np.ones((8000, 1), dtype=np.float32) + chunk2 = np.ones((8000, 1), dtype=np.float32) * 0.5 + recorder._chunks.extend([chunk1, chunk2]) + result = recorder.stop() + assert result is not None + assert result.shape == (16000,) + assert result[0] == 1.0 + assert result[8000] == 0.5 diff --git a/whisper_local/recorder.py b/whisper_local/recorder.py new file mode 100644 index 0000000..f7cc824 --- /dev/null +++ b/whisper_local/recorder.py @@ -0,0 +1,63 @@ +"""Audio-Aufnahme via sounddevice.""" + +import logging + +import numpy as np +import sounddevice as sd + +logger = logging.getLogger(__name__) + + +class Recorder: + def __init__(self, sample_rate: int = 16000, channels: int = 1, min_duration: float = 0.5): + self.sample_rate = sample_rate + self.channels = channels + self.min_duration = min_duration + self.is_recording = False + self._chunks: list[np.ndarray] = [] + self._stream: sd.InputStream | None = None + + def _audio_callback(self, indata: np.ndarray, frames: int, time_info, status) -> None: + if status: + logger.warning("Audio-Status: %s", status) + self._chunks.append(indata.copy()) + + def start(self) -> None: + """Startet die Audioaufnahme.""" + self._chunks = [] + self.is_recording = True + self._stream = sd.InputStream( + samplerate=self.sample_rate, + channels=self.channels, + dtype=np.float32, + callback=self._audio_callback, + ) + self._stream.start() + logger.info("Aufnahme gestartet") + + def stop(self) -> np.ndarray | None: + """Stoppt die Aufnahme. Gibt Audio als 1D-Array zurück oder None wenn zu kurz.""" + if not self.is_recording: + return None + + self.is_recording = False + if self._stream is not None: + self._stream.stop() + self._stream.close() + self._stream = None + + if not self._chunks: + return None + + audio = np.concatenate(self._chunks, axis=0) + # Mono: von (N, 1) auf (N,) flatten + if audio.ndim > 1: + audio = audio[:, 0] + + duration = len(audio) / self.sample_rate + if duration < self.min_duration: + logger.info("Aufnahme zu kurz (%.2fs < %.2fs), verworfen", duration, self.min_duration) + return None + + logger.info("Aufnahme beendet: %.2fs", duration) + return audio