feat: add recorder module with sounddevice audio capture

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-06 20:25:31 +02:00
parent ef6ff5b35b
commit 949eb679e1
2 changed files with 128 additions and 0 deletions
@@ -0,0 +1,65 @@
+import numpy as np
+import pytest
+from unittest.mock import MagicMock, patch
+
+from whisper_local.recorder import Recorder
+
+
+@pytest.fixture
+def recorder():
+    return Recorder(sample_rate=16000, channels=1, min_duration=0.5)
+
+
+class TestRecorder:
+    def test_init(self, recorder):
+        assert recorder.sample_rate == 16000
+        assert recorder.channels == 1
+        assert not recorder.is_recording
+
+    def test_start_recording(self, recorder):
+        with patch("sounddevice.InputStream") as mock_stream_cls:
+            mock_stream = MagicMock()
+            mock_stream_cls.return_value = mock_stream
+            recorder.start()
+            assert recorder.is_recording
+            assert len(recorder._chunks) == 0
+            recorder.stop()  # Cleanup
+
+    def test_stop_without_start_returns_none(self, recorder):
+        result = recorder.stop()
+        assert result is None
+
+    def test_add_chunk_and_stop(self, recorder):
+        with patch("sounddevice.InputStream") as mock_stream_cls:
+            mock_stream_cls.return_value = MagicMock()
+            recorder.start()
+            # Simuliere 1 Sekunde Audio (16000 Samples)
+            chunk = np.zeros((16000, 1), dtype=np.float32)
+            recorder._chunks.append(chunk)
+            result = recorder.stop()
+        assert result is not None
+        assert result.shape == (16000,)
+        assert result.dtype == np.float32
+
+    def test_short_audio_returns_none(self, recorder):
+        with patch("sounddevice.InputStream") as mock_stream_cls:
+            mock_stream_cls.return_value = MagicMock()
+            recorder.start()
+            # Nur 0.1s Audio — unter min_duration
+            chunk = np.zeros((1600, 1), dtype=np.float32)
+            recorder._chunks.append(chunk)
+            result = recorder.stop()
+        assert result is None
+
+    def test_multiple_chunks_concatenated(self, recorder):
+        with patch("sounddevice.InputStream") as mock_stream_cls:
+            mock_stream_cls.return_value = MagicMock()
+            recorder.start()
+            chunk1 = np.ones((8000, 1), dtype=np.float32)
+            chunk2 = np.ones((8000, 1), dtype=np.float32) * 0.5
+            recorder._chunks.extend([chunk1, chunk2])
+            result = recorder.stop()
+        assert result is not None
+        assert result.shape == (16000,)
+        assert result[0] == 1.0
+        assert result[8000] == 0.5
@@ -0,0 +1,63 @@
+"""Audio-Aufnahme via sounddevice."""
+
+import logging
+
+import numpy as np
+import sounddevice as sd
+
+logger = logging.getLogger(__name__)
+
+
+class Recorder:
+    def __init__(self, sample_rate: int = 16000, channels: int = 1, min_duration: float = 0.5):
+        self.sample_rate = sample_rate
+        self.channels = channels
+        self.min_duration = min_duration
+        self.is_recording = False
+        self._chunks: list[np.ndarray] = []
+        self._stream: sd.InputStream | None = None
+
+    def _audio_callback(self, indata: np.ndarray, frames: int, time_info, status) -> None:
+        if status:
+            logger.warning("Audio-Status: %s", status)
+        self._chunks.append(indata.copy())
+
+    def start(self) -> None:
+        """Startet die Audioaufnahme."""
+        self._chunks = []
+        self.is_recording = True
+        self._stream = sd.InputStream(
+            samplerate=self.sample_rate,
+            channels=self.channels,
+            dtype=np.float32,
+            callback=self._audio_callback,
+        )
+        self._stream.start()
+        logger.info("Aufnahme gestartet")
+
+    def stop(self) -> np.ndarray | None:
+        """Stoppt die Aufnahme. Gibt Audio als 1D-Array zurück oder None wenn zu kurz."""
+        if not self.is_recording:
+            return None
+
+        self.is_recording = False
+        if self._stream is not None:
+            self._stream.stop()
+            self._stream.close()
+            self._stream = None
+
+        if not self._chunks:
+            return None
+
+        audio = np.concatenate(self._chunks, axis=0)
+        # Mono: von (N, 1) auf (N,) flatten
+        if audio.ndim > 1:
+            audio = audio[:, 0]
+
+        duration = len(audio) / self.sample_rate
+        if duration < self.min_duration:
+            logger.info("Aufnahme zu kurz (%.2fs < %.2fs), verworfen", duration, self.min_duration)
+            return None
+
+        logger.info("Aufnahme beendet: %.2fs", duration)
+        return audio