Files

53 lines
1.6 KiB
Python
Raw Permalink Normal View History

"""Whisper-Transkription via faster-whisper."""
import logging
import sys
from pathlib import Path
import numpy as np
from faster_whisper import WhisperModel
logger = logging.getLogger(__name__)
def _model_cache_dir() -> str | None:
"""Im gebündelten Modus: Modell neben der EXE cachen (portable).
Im Entwicklungsmodus: None → HuggingFace-Standard-Cache."""
if getattr(sys, "frozen", False):
cache = Path(sys.executable).parent / "models"
try:
cache.mkdir(exist_ok=True)
return str(cache)
except OSError:
return None # Fallback auf HuggingFace-Standard-Cache
return None
class Transcriber:
def __init__(
self,
model_name: str = "small",
compute_type: str = "int8",
language: str = "de",
model: WhisperModel | None = None,
):
self.language = language
if model is not None:
self.model = model
else:
logger.info("Lade Whisper-Modell '%s' (compute_type=%s)...", model_name, compute_type)
self.model = WhisperModel(
model_name, compute_type=compute_type, download_root=_model_cache_dir()
)
logger.info("Modell geladen")
def transcribe(self, audio: np.ndarray) -> str:
"""Transkribiert Audio-Array zu Text."""
segments, _ = self.model.transcribe(audio, language=self.language)
text = " ".join(segment.text.strip() for segment in segments if segment.text.strip())
if text:
logger.info("Transkribiert: %s", text)
else:
logger.info("Keine Sprache erkannt")
return text