feat: add recorder module with sounddevice audio capture

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-06 20:25:31 +02:00
parent ef6ff5b35b
commit 949eb679e1
2 changed files with 128 additions and 0 deletions
@@ -0,0 +1,65 @@
 import numpy as np
 import pytest
 from unittest.mock import MagicMock, patch
 from whisper_local.recorder import Recorder
@pytest.fixture
 def recorder():
    return Recorder(sample_rate=16000, channels=1, min_duration=0.5)
 class TestRecorder:
    def test_init(self, recorder):
        assert recorder.sample_rate == 16000
        assert recorder.channels == 1
        assert not recorder.is_recording
    def test_start_recording(self, recorder):
        with patch("sounddevice.InputStream") as mock_stream_cls:
            mock_stream = MagicMock()
            mock_stream_cls.return_value = mock_stream
            recorder.start()
            assert recorder.is_recording
            assert len(recorder._chunks) == 0
            recorder.stop()  # Cleanup
    def test_stop_without_start_returns_none(self, recorder):
        result = recorder.stop()
        assert result is None
    def test_add_chunk_and_stop(self, recorder):
        with patch("sounddevice.InputStream") as mock_stream_cls:
            mock_stream_cls.return_value = MagicMock()
            recorder.start()
            # Simuliere 1 Sekunde Audio (16000 Samples)
            chunk = np.zeros((16000, 1), dtype=np.float32)
            recorder._chunks.append(chunk)
            result = recorder.stop()
        assert result is not None
        assert result.shape == (16000,)
        assert result.dtype == np.float32
    def test_short_audio_returns_none(self, recorder):
        with patch("sounddevice.InputStream") as mock_stream_cls:
            mock_stream_cls.return_value = MagicMock()
            recorder.start()
            # Nur 0.1s Audio — unter min_duration
            chunk = np.zeros((1600, 1), dtype=np.float32)
            recorder._chunks.append(chunk)
            result = recorder.stop()
        assert result is None
    def test_multiple_chunks_concatenated(self, recorder):
        with patch("sounddevice.InputStream") as mock_stream_cls:
            mock_stream_cls.return_value = MagicMock()
            recorder.start()
            chunk1 = np.ones((8000, 1), dtype=np.float32)
            chunk2 = np.ones((8000, 1), dtype=np.float32) * 0.5
            recorder._chunks.extend([chunk1, chunk2])
            result = recorder.stop()
        assert result is not None
        assert result.shape == (16000,)
        assert result[0] == 1.0
        assert result[8000] == 0.5
@@ -0,0 +1,63 @@
 """Audio-Aufnahme via sounddevice."""
 import logging
 import numpy as np
 import sounddevice as sd
 logger = logging.getLogger(__name__)
 class Recorder:
    def __init__(self, sample_rate: int = 16000, channels: int = 1, min_duration: float = 0.5):
        self.sample_rate = sample_rate
        self.channels = channels
        self.min_duration = min_duration
        self.is_recording = False
        self._chunks: list[np.ndarray] = []
        self._stream: sd.InputStream | None = None
    def _audio_callback(self, indata: np.ndarray, frames: int, time_info, status) -> None:
        if status:
            logger.warning("Audio-Status: %s", status)
        self._chunks.append(indata.copy())
    def start(self) -> None:
        """Startet die Audioaufnahme."""
        self._chunks = []
        self.is_recording = True
        self._stream = sd.InputStream(
            samplerate=self.sample_rate,
            channels=self.channels,
            dtype=np.float32,
            callback=self._audio_callback,
        )
        self._stream.start()
        logger.info("Aufnahme gestartet")
    def stop(self) -> np.ndarray | None:
        """Stoppt die Aufnahme. Gibt Audio als 1D-Array zurück oder None wenn zu kurz."""
        if not self.is_recording:
            return None
        self.is_recording = False
        if self._stream is not None:
            self._stream.stop()
            self._stream.close()
            self._stream = None
        if not self._chunks:
            return None
        audio = np.concatenate(self._chunks, axis=0)
        # Mono: von (N, 1) auf (N,) flatten
        if audio.ndim > 1:
            audio = audio[:, 0]
        duration = len(audio) / self.sample_rate
        if duration < self.min_duration:
            logger.info("Aufnahme zu kurz (%.2fs < %.2fs), verworfen", duration, self.min_duration)
            return None
        logger.info("Aufnahme beendet: %.2fs", duration)
        return audio