feat: add recorder module with sounddevice audio capture
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,65 @@
|
|||||||
|
import numpy as np
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
from whisper_local.recorder import Recorder
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def recorder():
|
||||||
|
return Recorder(sample_rate=16000, channels=1, min_duration=0.5)
|
||||||
|
|
||||||
|
|
||||||
|
class TestRecorder:
|
||||||
|
def test_init(self, recorder):
|
||||||
|
assert recorder.sample_rate == 16000
|
||||||
|
assert recorder.channels == 1
|
||||||
|
assert not recorder.is_recording
|
||||||
|
|
||||||
|
def test_start_recording(self, recorder):
|
||||||
|
with patch("sounddevice.InputStream") as mock_stream_cls:
|
||||||
|
mock_stream = MagicMock()
|
||||||
|
mock_stream_cls.return_value = mock_stream
|
||||||
|
recorder.start()
|
||||||
|
assert recorder.is_recording
|
||||||
|
assert len(recorder._chunks) == 0
|
||||||
|
recorder.stop() # Cleanup
|
||||||
|
|
||||||
|
def test_stop_without_start_returns_none(self, recorder):
|
||||||
|
result = recorder.stop()
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_add_chunk_and_stop(self, recorder):
|
||||||
|
with patch("sounddevice.InputStream") as mock_stream_cls:
|
||||||
|
mock_stream_cls.return_value = MagicMock()
|
||||||
|
recorder.start()
|
||||||
|
# Simuliere 1 Sekunde Audio (16000 Samples)
|
||||||
|
chunk = np.zeros((16000, 1), dtype=np.float32)
|
||||||
|
recorder._chunks.append(chunk)
|
||||||
|
result = recorder.stop()
|
||||||
|
assert result is not None
|
||||||
|
assert result.shape == (16000,)
|
||||||
|
assert result.dtype == np.float32
|
||||||
|
|
||||||
|
def test_short_audio_returns_none(self, recorder):
|
||||||
|
with patch("sounddevice.InputStream") as mock_stream_cls:
|
||||||
|
mock_stream_cls.return_value = MagicMock()
|
||||||
|
recorder.start()
|
||||||
|
# Nur 0.1s Audio — unter min_duration
|
||||||
|
chunk = np.zeros((1600, 1), dtype=np.float32)
|
||||||
|
recorder._chunks.append(chunk)
|
||||||
|
result = recorder.stop()
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_multiple_chunks_concatenated(self, recorder):
|
||||||
|
with patch("sounddevice.InputStream") as mock_stream_cls:
|
||||||
|
mock_stream_cls.return_value = MagicMock()
|
||||||
|
recorder.start()
|
||||||
|
chunk1 = np.ones((8000, 1), dtype=np.float32)
|
||||||
|
chunk2 = np.ones((8000, 1), dtype=np.float32) * 0.5
|
||||||
|
recorder._chunks.extend([chunk1, chunk2])
|
||||||
|
result = recorder.stop()
|
||||||
|
assert result is not None
|
||||||
|
assert result.shape == (16000,)
|
||||||
|
assert result[0] == 1.0
|
||||||
|
assert result[8000] == 0.5
|
||||||
@@ -0,0 +1,63 @@
|
|||||||
|
"""Audio-Aufnahme via sounddevice."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import sounddevice as sd
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class Recorder:
|
||||||
|
def __init__(self, sample_rate: int = 16000, channels: int = 1, min_duration: float = 0.5):
|
||||||
|
self.sample_rate = sample_rate
|
||||||
|
self.channels = channels
|
||||||
|
self.min_duration = min_duration
|
||||||
|
self.is_recording = False
|
||||||
|
self._chunks: list[np.ndarray] = []
|
||||||
|
self._stream: sd.InputStream | None = None
|
||||||
|
|
||||||
|
def _audio_callback(self, indata: np.ndarray, frames: int, time_info, status) -> None:
|
||||||
|
if status:
|
||||||
|
logger.warning("Audio-Status: %s", status)
|
||||||
|
self._chunks.append(indata.copy())
|
||||||
|
|
||||||
|
def start(self) -> None:
|
||||||
|
"""Startet die Audioaufnahme."""
|
||||||
|
self._chunks = []
|
||||||
|
self.is_recording = True
|
||||||
|
self._stream = sd.InputStream(
|
||||||
|
samplerate=self.sample_rate,
|
||||||
|
channels=self.channels,
|
||||||
|
dtype=np.float32,
|
||||||
|
callback=self._audio_callback,
|
||||||
|
)
|
||||||
|
self._stream.start()
|
||||||
|
logger.info("Aufnahme gestartet")
|
||||||
|
|
||||||
|
def stop(self) -> np.ndarray | None:
|
||||||
|
"""Stoppt die Aufnahme. Gibt Audio als 1D-Array zurück oder None wenn zu kurz."""
|
||||||
|
if not self.is_recording:
|
||||||
|
return None
|
||||||
|
|
||||||
|
self.is_recording = False
|
||||||
|
if self._stream is not None:
|
||||||
|
self._stream.stop()
|
||||||
|
self._stream.close()
|
||||||
|
self._stream = None
|
||||||
|
|
||||||
|
if not self._chunks:
|
||||||
|
return None
|
||||||
|
|
||||||
|
audio = np.concatenate(self._chunks, axis=0)
|
||||||
|
# Mono: von (N, 1) auf (N,) flatten
|
||||||
|
if audio.ndim > 1:
|
||||||
|
audio = audio[:, 0]
|
||||||
|
|
||||||
|
duration = len(audio) / self.sample_rate
|
||||||
|
if duration < self.min_duration:
|
||||||
|
logger.info("Aufnahme zu kurz (%.2fs < %.2fs), verworfen", duration, self.min_duration)
|
||||||
|
return None
|
||||||
|
|
||||||
|
logger.info("Aufnahme beendet: %.2fs", duration)
|
||||||
|
return audio
|
||||||
Reference in New Issue
Block a user