Refactor: Gemeinsame Basisklassen für Worker-Pools und Parameter-Dialoge

- BaseWorkerPool (worker_pool_base.py): Eliminiert ~450 Zeilen Duplikation
  aus saxon_pool.py, saxon_pool_s9api.py und fop_pool.py; behebt stderr-Handle-Leak
- XsltParamsEditDialog (XsltParamsEditDialog.py): Gemeinsame Basisklasse für
  TreeNodeEditDialog und XslFileEditDialog; reduziert je 162 auf 8 Zeilen

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-09 19:49:57 +01:00
parent cb90f9e483
commit 37ebdff349
7 changed files with 552 additions and 1182 deletions
+39 -282
View File
@@ -5,16 +5,12 @@ Eliminiert JVM-Startup-Overhead durch Vorinitialisierung von N Worker-Prozessen.
Jeder Worker läuft als Daemon und verarbeitet mehrere FO→PDF Transformationen nacheinander.
"""
import glob
import logging
import subprocess
import threading
import time
import psutil
from pathlib import Path
from typing import Optional
import tempfile
from worker_metrics import WorkerPoolMetrics
from worker_pool_base import BaseWorkerPool, _CLASSPATH_SEP
logger = logging.getLogger(__name__)
@@ -164,7 +160,7 @@ public class FopWorker {
"""
class FopWorkerPool:
class FopWorkerPool(BaseWorkerPool):
"""
Pool von lang-laufenden JVM-Prozessen für Apache FOP PDF-Generierung.
@@ -179,53 +175,19 @@ class FopWorkerPool:
fop_config_file: Optional[Path] = None,
log_dir: Optional[Path] = None,
):
"""
Initialisiert den FOP-Worker-Pool.
Args:
num_workers: Anzahl der Worker-Prozesse
java_vm_path: Pfad zur Java VM Binary
apache_fop_dir: Pfad zum Apache FOP-Verzeichnis
fop_config_file: Optionaler Pfad zur fop.xconf Konfigurationsdatei
log_dir: Optionales Verzeichnis für Worker-Logs (Standard: temp_dir/tmp)
"""
self.num_workers = num_workers
self.java_vm_path = java_vm_path
super().__init__(num_workers, java_vm_path, log_dir)
self.apache_fop_dir = apache_fop_dir
self.fop_config_file = fop_config_file
self.log_dir = log_dir
# Worker-Prozesse
self.workers: list[subprocess.Popen] = []
self.worker_locks: list[threading.Lock] = []
# Temporäres Verzeichnis für kompilierte Java-Klasse
self.temp_dir: Optional[Path] = None
self.worker_class_path: Optional[Path] = None
self.worker_log_dir: Optional[Path] = None
# Classpath für FOP
self.fop_classpath: Optional[str] = None
# Performance-Metriken
self.metrics = WorkerPoolMetrics()
# Initialisierung
self._build_fop_classpath()
self._compile_worker_class()
self._start_workers()
logger.info(f"FopWorkerPool initialisiert mit {num_workers} Workern")
def _build_fop_classpath(self):
"""Erstellt den Classpath für Apache FOP."""
import glob
import sys
# Sammle alle JAR-Dateien im FOP-Verzeichnis
all_jars = glob.glob(str(self.apache_fop_dir / "build" / "*.jar"))
# FOP lib-Verzeichnis
lib_dir = self.apache_fop_dir / "lib"
if lib_dir.exists() and lib_dir.is_dir():
all_jars.extend(glob.glob(str(lib_dir / "*.jar")))
@@ -233,120 +195,46 @@ class FopWorkerPool:
if not all_jars:
raise RuntimeError(f"Keine FOP JAR-Dateien gefunden in {self.apache_fop_dir}")
classpath_separator = ";" if sys.platform == "win32" else ":"
self.fop_classpath = classpath_separator.join(all_jars)
self.fop_classpath = _CLASSPATH_SEP.join(all_jars)
logger.debug(f"FOP Classpath: {len(all_jars)} JARs")
def _compile_worker_class(self):
"""Kompiliert die FopWorker-Java-Klasse."""
start_time = time.time()
try:
# Erstelle temporäres Verzeichnis
self.temp_dir = Path(tempfile.mkdtemp(prefix="fop_worker_"))
# --- Abstrakte Properties ---
# Schreibe Java-Quellcode
java_file = self.temp_dir / "FopWorker.java"
java_file.write_text(FOP_WORKER_JAVA, encoding="utf-8")
@property
def _pool_name(self) -> str:
return "FOP"
# Kompiliere Java-Klasse
javac_cmd = [
str(self.java_vm_path).replace("java", "javac"),
"-cp",
self.fop_classpath,
str(java_file),
]
@property
def _java_source_code(self) -> str:
return FOP_WORKER_JAVA
logger.debug(f"Kompiliere FopWorker: {' '.join(javac_cmd[:3])}...")
@property
def _java_class_name(self) -> str:
return "FopWorker"
result = subprocess.run(javac_cmd, capture_output=True, text=True, timeout=30)
@property
def _temp_dir_prefix(self) -> str:
return "fop_worker_"
if result.returncode != 0:
raise RuntimeError(f"Java-Kompilierung fehlgeschlagen: {result.stderr}")
@property
def _worker_init_sleep(self) -> float:
return 0.2 # FOP braucht etwas länger zum Initialisieren
self.worker_class_path = self.temp_dir
# --- Abstrakte Methoden ---
# Speichere Kompilierungszeit
self.metrics.compilation_time_seconds = time.time() - start_time
def _get_classpath(self) -> str:
return self.fop_classpath
logger.info(
f"FopWorker erfolgreich kompiliert: {self.temp_dir} " f"({self.metrics.compilation_time_seconds:.3f}s)"
)
def _build_worker_cmd(self, full_classpath: str) -> list[str]:
cmd = [str(self.java_vm_path), "-cp", full_classpath, "FopWorker"]
if self.fop_config_file and self.fop_config_file.exists():
cmd.append(str(self.fop_config_file))
return cmd
except Exception as e:
logger.error(f"Fehler beim Kompilieren von FopWorker: {e}")
raise
def _stderr_log_name(self, i: int) -> str:
return f"fop_worker_{i}_stderr.log"
def _start_workers(self):
"""Startet N Worker-Prozesse."""
import sys
# Füge Worker-Classpath zum FOP-Classpath hinzu
classpath_separator = ";" if sys.platform == "win32" else ":"
full_classpath = str(self.worker_class_path) + classpath_separator + self.fop_classpath
# Bestimme Log-Verzeichnis
self.worker_log_dir = self.log_dir if self.log_dir else self.temp_dir
if self.log_dir:
self.worker_log_dir.mkdir(parents=True, exist_ok=True)
for i in range(self.num_workers):
worker_start_time = time.time()
try:
# Starte JVM-Prozess mit FopWorker
# Übergebe fop.xconf als Argument falls vorhanden
cmd = [str(self.java_vm_path), "-cp", full_classpath, "FopWorker"]
if self.fop_config_file and self.fop_config_file.exists():
cmd.append(str(self.fop_config_file))
# Öffne stderr-Log-Datei für diesen Worker
stderr_log = self.worker_log_dir / f"fop_worker_{i}_stderr.log"
stderr_file = open(stderr_log, "w", encoding="utf-8")
process = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=stderr_file, # Redirect stderr to file
text=True,
bufsize=1, # Line buffered
)
self.workers.append(process)
self.worker_locks.append(threading.Lock())
logger.debug(f"FOP Worker {i} gestartet (PID: {process.pid}, stderr: {stderr_log})")
# Warte kurz damit Worker initialisieren kann
time.sleep(0.2) # FOP braucht etwas länger zum Initialisieren
# Prüfe ob Worker noch läuft
if process.poll() is not None:
# Worker ist bereits beendet - Fehler!
stderr_file.close()
with open(stderr_log, "r") as f:
stderr_content = f.read()
raise RuntimeError(
f"FOP Worker {i} ist sofort beendet (Exit Code: {process.returncode})\nstderr:\n{stderr_content}"
)
# Speichere Worker-Startzeit
worker_elapsed = time.time() - worker_start_time
self.metrics.worker_start_times.append(worker_elapsed)
except Exception as e:
logger.error(f"Fehler beim Starten von FOP Worker {i}: {e}")
raise
# Berechne Aggregat-Werte für Worker-Startzeiten
self.metrics.calculate_aggregates()
logger.info(
f"{len(self.workers)} FOP-Worker erfolgreich gestartet "
f"(Summe: {self.metrics.total_worker_start_time_seconds:.3f}s, "
f"Durchschnitt: {self.metrics.average_worker_start_time_seconds:.3f}s)"
)
# --- FOP-spezifische Job-Methode ---
def build_pdf(self, input_fo: Path, output_pdf: Path) -> tuple[bool, str]:
"""
@@ -359,167 +247,36 @@ class FopWorkerPool:
Returns:
tuple[bool, str]: (Erfolg, Fehlermeldung/Info)
"""
# Finde freien Worker
worker_idx = None
for i, lock in enumerate(self.worker_locks):
if lock.acquire(blocking=False):
worker_idx = i
break
if worker_idx is None:
# Kein freier Worker, warte auf ersten verfügbaren
for i, lock in enumerate(self.worker_locks):
lock.acquire()
worker_idx = i
break
worker_idx = self._acquire_worker()
try:
worker = self.workers[worker_idx]
# Prüfe ob Worker noch läuft
if worker.poll() is not None:
# Worker ist tot!
stderr_log = self.worker_log_dir / f"fop_worker_{worker_idx}_stderr.log"
try:
with open(stderr_log, "r") as f:
stderr_content = f.read()
error_msg = (
f"FOP Worker {worker_idx} ist beendet (Exit: {worker.returncode})\nstderr:\n{stderr_content}"
)
except Exception:
error_msg = f"FOP Worker {worker_idx} ist beendet (Exit: {worker.returncode})"
stderr_content = self._read_stderr_log(worker_idx)
error_msg = f"FOP Worker {worker_idx} ist beendet (Exit: {worker.returncode})\nstderr:\n{stderr_content}"
logger.error(error_msg)
return False, error_msg
# Erstelle Job-String (Tab-separated)
job = f"{input_fo}\t{output_pdf}\n"
logger.debug(f"Sende FOP-Job an Worker {worker_idx}: {input_fo.name}{output_pdf.name}")
# Sende Job an Worker
worker.stdin.write(job)
worker.stdin.flush()
# Warte auf Antwort
response = worker.stdout.readline().strip()
logger.debug(f"FOP Worker {worker_idx} Antwort: '{response}'")
if response == "OK":
return True, "Erfolgreich"
elif response.startswith("ERROR:"):
error_msg = response[6:].strip()
return False, f"FOP-Fehler: {error_msg}"
return False, f"FOP-Fehler: {response[6:].strip()}"
elif not response:
stderr_content = self._read_stderr_log(worker_idx, tail=500)
return False, f"FOP Worker {worker_idx} crashed (keine Antwort)\nstderr:\n{stderr_content}"
else:
# Leere Antwort bedeutet Worker ist crashed
if not response:
stderr_log = self.worker_log_dir / f"fop_worker_{worker_idx}_stderr.log"
try:
with open(stderr_log, "r") as f:
stderr_content = f.read()[-500:] # Letzte 500 Zeichen
return False, f"FOP Worker {worker_idx} crashed (keine Antwort)\nstderr:\n{stderr_content}"
except Exception:
return False, f"FOP Worker {worker_idx} crashed (keine Antwort)"
return False, f"Unerwartete Antwort: {response}"
except Exception as e:
logger.error(f"Fehler bei FOP Worker {worker_idx}: {e}")
return False, f"Worker-Fehler: {str(e)}"
finally:
# Gebe Worker-Lock frei
self.worker_locks[worker_idx].release()
def measure_ram_usage(self) -> tuple[float, float, list[float]]:
"""
Misst den aktuellen RAM-Verbrauch aller Worker-Prozesse.
Returns:
tuple: (total_mb, average_mb, per_worker_mb_list)
"""
ram_per_worker = []
for i, worker in enumerate(self.workers):
try:
if worker.poll() is None: # Worker läuft noch
process = psutil.Process(worker.pid)
# Hole Speicherinfo (RSS = Resident Set Size in Bytes)
mem_info = process.memory_info()
ram_mb = mem_info.rss / (1024 * 1024) # Konvertiere zu MB
ram_per_worker.append(ram_mb)
else:
logger.warning(f"Worker {i} ist nicht mehr aktiv (kann RAM nicht messen)")
except (psutil.NoSuchProcess, psutil.AccessDenied) as e:
logger.warning(f"Konnte RAM für Worker {i} nicht messen: {e}")
total_ram = sum(ram_per_worker)
average_ram = total_ram / len(ram_per_worker) if ram_per_worker else 0.0
return total_ram, average_ram, ram_per_worker
def capture_ram_before_transform(self):
"""Erfasst RAM-Verbrauch vor der ersten Transformation."""
total, average, per_worker = self.measure_ram_usage()
self.metrics.ram_before_transform_mb_per_worker = per_worker
self.metrics.total_ram_before_mb = total
self.metrics.average_ram_before_mb = average
logger.info(
f"RAM vor Transformation: {self.metrics.total_ram_before_mb:.1f} MB "
f"(Durchschnitt: {self.metrics.average_ram_before_mb:.1f} MB/Worker)"
)
def capture_ram_after_transform(self):
"""Erfasst RAM-Verbrauch nach allen Transformationen."""
total, average, per_worker = self.measure_ram_usage()
self.metrics.ram_after_transform_mb_per_worker = per_worker
self.metrics.total_ram_after_mb = total
self.metrics.average_ram_after_mb = average
logger.info(
f"RAM nach Transformation: {self.metrics.total_ram_after_mb:.1f} MB "
f"(Durchschnitt: {self.metrics.average_ram_after_mb:.1f} MB/Worker)"
)
def shutdown(self):
"""Beendet alle Worker-Prozesse sauber."""
logger.info("Beende FOP-Worker-Pool...")
for i, worker in enumerate(self.workers):
try:
# Sende EXIT-Befehl
if worker.stdin and not worker.stdin.closed:
worker.stdin.write("EXIT\n")
worker.stdin.flush()
# Warte auf Beendigung (max 2 Sekunden)
worker.wait(timeout=2)
logger.debug(f"FOP Worker {i} beendet")
except subprocess.TimeoutExpired:
# Force kill falls nötig
worker.kill()
logger.warning(f"FOP Worker {i} musste gekillt werden")
except Exception as e:
logger.error(f"Fehler beim Beenden von FOP Worker {i}: {e}")
# Lösche temporäres Verzeichnis
if self.temp_dir and self.temp_dir.exists():
try:
import shutil
shutil.rmtree(self.temp_dir)
logger.debug(f"Temporäres Verzeichnis gelöscht: {self.temp_dir}")
except Exception as e:
logger.warning(f"Konnte temporäres Verzeichnis nicht löschen: {e}")
logger.info("FOP-Worker-Pool beendet")
def __enter__(self):
"""Context manager entry."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
self.shutdown()