Refactor: Gemeinsame Basisklassen für Worker-Pools und Parameter-Dialoge

- BaseWorkerPool (worker_pool_base.py): Eliminiert ~450 Zeilen Duplikation
  aus saxon_pool.py, saxon_pool_s9api.py und fop_pool.py; behebt stderr-Handle-Leak
- XsltParamsEditDialog (XsltParamsEditDialog.py): Gemeinsame Basisklasse für
  TreeNodeEditDialog und XslFileEditDialog; reduziert je 162 auf 8 Zeilen

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-09 19:49:57 +01:00
parent cb90f9e483
commit 37ebdff349
7 changed files with 552 additions and 1182 deletions
+39 -303
View File
@@ -7,15 +7,10 @@ Jeder Worker läuft als Daemon und verarbeitet mehrere Transformationen nacheina
"""
import logging
import subprocess
import threading
import time
import psutil
from pathlib import Path
from typing import Optional
import tempfile
from worker_metrics import WorkerPoolMetrics
from worker_pool_base import BaseWorkerPool, build_jar_classpath
logger = logging.getLogger(__name__)
@@ -155,7 +150,7 @@ public class SaxonS9ApiWorker {
"""
class SaxonWorkerPoolS9Api:
class SaxonWorkerPoolS9Api(BaseWorkerPool):
"""
Pool von lang-laufenden JVM-Prozessen für Saxon-Transformationen mit s9api.
@@ -171,181 +166,52 @@ class SaxonWorkerPoolS9Api:
classpath_cache: dict[Path, str],
log_dir: Optional[Path] = None,
):
"""
Initialisiert den Saxon-Worker-Pool mit s9api.
Args:
num_workers: Anzahl der Worker-Prozesse
java_vm_path: Pfad zur Java VM Binary
saxon_jar_path: Pfad zur Saxon JAR-Datei
classpath_cache: Cache für Saxon-Classpaths
log_dir: Optionales Verzeichnis für Worker-Logs (Standard: temp_dir/tmp)
"""
self.num_workers = num_workers
self.java_vm_path = java_vm_path
super().__init__(num_workers, java_vm_path, log_dir)
self.saxon_jar_path = saxon_jar_path
self.classpath_cache = classpath_cache
self.log_dir = log_dir
# Worker-Prozesse
self.workers: list[subprocess.Popen] = []
self.worker_locks: list[threading.Lock] = []
# Temporäres Verzeichnis für kompilierte Java-Klasse
self.temp_dir: Optional[Path] = None
self.worker_class_path: Optional[Path] = None
self.worker_log_dir: Optional[Path] = None
# Performance-Metriken
self.metrics = WorkerPoolMetrics()
# Initialisierung
self._compile_worker_class()
self._start_workers()
logger.info(f"SaxonWorkerPoolS9Api initialisiert mit {num_workers} Workern (XSLT 2.0/3.0)")
def _compile_worker_class(self):
"""Kompiliert die SaxonS9ApiWorker-Java-Klasse."""
start_time = time.time()
try:
# Erstelle temporäres Verzeichnis
self.temp_dir = Path(tempfile.mkdtemp(prefix="saxon_s9api_worker_"))
# --- Abstrakte Properties ---
# Schreibe Java-Quellcode
java_file = self.temp_dir / "SaxonS9ApiWorker.java"
java_file.write_text(SAXON_S9API_WORKER_JAVA, encoding="utf-8")
@property
def _pool_name(self) -> str:
return "Saxon-S9Api"
# Hole Classpath
saxon_dir = self.saxon_jar_path.parent
if saxon_dir in self.classpath_cache:
classpath = self.classpath_cache[saxon_dir]
else:
# Fallback: Baue Classpath neu
import glob
import sys
@property
def _java_source_code(self) -> str:
return SAXON_S9API_WORKER_JAVA
all_jars = glob.glob(str(saxon_dir / "*.jar"))
lib_dir = saxon_dir / "lib"
if lib_dir.exists():
all_jars.extend(glob.glob(str(lib_dir / "*.jar")))
@property
def _java_class_name(self) -> str:
return "SaxonS9ApiWorker"
classpath_separator = ";" if sys.platform == "win32" else ":"
classpath = classpath_separator.join(all_jars)
@property
def _temp_dir_prefix(self) -> str:
return "saxon_s9api_worker_"
# Kompiliere Java-Klasse
javac_cmd = [str(self.java_vm_path).replace("java", "javac"), "-cp", classpath, str(java_file)]
@property
def _worker_init_sleep(self) -> float:
return 0.1
logger.debug(f"Kompiliere SaxonS9ApiWorker: {' '.join(javac_cmd)}")
# --- Abstrakte Methoden ---
result = subprocess.run(javac_cmd, capture_output=True, text=True, timeout=30)
if result.returncode != 0:
raise RuntimeError(f"Java-Kompilierung fehlgeschlagen: {result.stderr}")
self.worker_class_path = self.temp_dir
# Speichere Kompilierungszeit
self.metrics.compilation_time_seconds = time.time() - start_time
logger.info(
f"SaxonS9ApiWorker erfolgreich kompiliert: {self.temp_dir} "
f"({self.metrics.compilation_time_seconds:.3f}s)"
)
except Exception as e:
logger.error(f"Fehler beim Kompilieren von SaxonS9ApiWorker: {e}")
raise
def _start_workers(self):
"""Startet N Worker-Prozesse."""
# Hole Classpath
def _get_classpath(self) -> str:
saxon_dir = self.saxon_jar_path.parent
if saxon_dir in self.classpath_cache:
classpath = self.classpath_cache[saxon_dir]
else:
# Fallback: Baue Classpath neu (sollte nicht nötig sein, aber zur Sicherheit)
import glob
import sys
all_jars = glob.glob(str(saxon_dir / "*.jar"))
lib_dir = saxon_dir / "lib"
if lib_dir.exists():
all_jars.extend(glob.glob(str(lib_dir / "*.jar")))
classpath_separator = ";" if sys.platform == "win32" else ":"
classpath = classpath_separator.join(all_jars)
# Cache für zukünftige Verwendung
self.classpath_cache[saxon_dir] = classpath
if saxon_dir not in self.classpath_cache:
self.classpath_cache[saxon_dir] = build_jar_classpath(saxon_dir)
logger.debug(f"Classpath für {saxon_dir} neu erstellt und gecacht")
return self.classpath_cache[saxon_dir]
# Füge Worker-Classpath hinzu
import sys
def _build_worker_cmd(self, full_classpath: str) -> list[str]:
return [str(self.java_vm_path), "-cp", full_classpath, "SaxonS9ApiWorker"]
classpath_separator = ";" if sys.platform == "win32" else ":"
full_classpath = str(self.worker_class_path) + classpath_separator + classpath
def _stderr_log_name(self, i: int) -> str:
return f"s9api_worker_{i}_stderr.log"
logger.debug(f"S9Api Worker Classpath: {full_classpath[:200]}...")
# Bestimme Log-Verzeichnis
self.worker_log_dir = self.log_dir if self.log_dir else self.temp_dir
if self.log_dir:
self.worker_log_dir.mkdir(parents=True, exist_ok=True)
for i in range(self.num_workers):
worker_start_time = time.time()
try:
# Starte JVM-Prozess mit SaxonS9ApiWorker
cmd = [str(self.java_vm_path), "-cp", full_classpath, "SaxonS9ApiWorker"]
# Öffne stderr-Log-Datei für diesen Worker
stderr_log = self.worker_log_dir / f"s9api_worker_{i}_stderr.log"
stderr_file = open(stderr_log, "w", encoding="utf-8")
process = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=stderr_file, # Redirect stderr to file
text=True,
bufsize=1, # Line buffered
)
self.workers.append(process)
self.worker_locks.append(threading.Lock())
logger.debug(f"S9Api Worker {i} gestartet (PID: {process.pid}, stderr: {stderr_log})")
# Warte kurz damit Worker initialisieren kann
time.sleep(0.1)
# Prüfe ob Worker noch läuft
if process.poll() is not None:
# Worker ist bereits beendet - Fehler!
stderr_file.close()
with open(stderr_log, "r") as f:
stderr_content = f.read()
raise RuntimeError(
f"S9Api Worker {i} ist sofort beendet (Exit Code: {process.returncode})\nstderr:\n{stderr_content}"
)
# Speichere Worker-Startzeit
worker_elapsed = time.time() - worker_start_time
self.metrics.worker_start_times.append(worker_elapsed)
except Exception as e:
logger.error(f"Fehler beim Starten von S9Api Worker {i}: {e}")
raise
# Berechne Aggregat-Werte für Worker-Startzeiten
self.metrics.calculate_aggregates()
logger.info(
f"{len(self.workers)} Saxon-S9Api-Worker erfolgreich gestartet "
f"(Summe: {self.metrics.total_worker_start_time_seconds:.3f}s, "
f"Durchschnitt: {self.metrics.average_worker_start_time_seconds:.3f}s)"
)
# --- Saxon-s9api-spezifische Job-Methode ---
def transform(
self, source_xml: Path, xsl_stylesheet: Path, output_fo: Path, xslt_params: dict[str, str]
@@ -362,170 +228,40 @@ class SaxonWorkerPoolS9Api:
Returns:
tuple[bool, str]: (Erfolg, Fehlermeldung/Info)
"""
# Finde freien Worker
worker_idx = None
for i, lock in enumerate(self.worker_locks):
if lock.acquire(blocking=False):
worker_idx = i
break
if worker_idx is None:
# Kein freier Worker, warte auf ersten verfügbaren
for i, lock in enumerate(self.worker_locks):
lock.acquire()
worker_idx = i
break
worker_idx = self._acquire_worker()
try:
worker = self.workers[worker_idx]
# Prüfe ob Worker noch läuft
if worker.poll() is not None:
# Worker ist tot!
stderr_log = self.worker_log_dir / f"s9api_worker_{worker_idx}_stderr.log"
try:
with open(stderr_log, "r") as f:
stderr_content = f.read()
error_msg = (
f"S9Api Worker {worker_idx} ist beendet (Exit: {worker.returncode})\nstderr:\n{stderr_content}"
)
except Exception:
error_msg = f"S9Api Worker {worker_idx} ist beendet (Exit: {worker.returncode})"
stderr_content = self._read_stderr_log(worker_idx)
error_msg = (
f"S9Api Worker {worker_idx} ist beendet (Exit: {worker.returncode})\nstderr:\n{stderr_content}"
)
logger.error(error_msg)
return False, error_msg
# Formatiere Parameter
params_str = "|||".join([f"{key}={value}" for key, value in xslt_params.items()])
# Erstelle Job-String (Tab-separated)
params_str = "|||".join([f"{k}={v}" for k, v in xslt_params.items()])
job = f"{source_xml}\t{xsl_stylesheet}\t{output_fo}\t{params_str}\n"
logger.debug(f"Sende Job an S9Api Worker {worker_idx}: {source_xml.name}")
# Sende Job an Worker
worker.stdin.write(job)
worker.stdin.flush()
# Warte auf Antwort
response = worker.stdout.readline().strip()
logger.debug(f"S9Api Worker {worker_idx} Antwort: '{response}'")
if response == "OK":
return True, "Erfolgreich"
elif response.startswith("ERROR:"):
error_msg = response[6:].strip()
return False, f"Saxon-Fehler (s9api): {error_msg}"
return False, f"Saxon-Fehler (s9api): {response[6:].strip()}"
elif not response:
stderr_content = self._read_stderr_log(worker_idx, tail=500)
return False, f"S9Api Worker {worker_idx} crashed (keine Antwort)\nstderr:\n{stderr_content}"
else:
# Leere Antwort bedeutet Worker ist crashed
if not response:
stderr_log = self.worker_log_dir / f"s9api_worker_{worker_idx}_stderr.log"
try:
with open(stderr_log, "r") as f:
stderr_content = f.read()[-500:] # Letzte 500 Zeichen
return False, f"S9Api Worker {worker_idx} crashed (keine Antwort)\nstderr:\n{stderr_content}"
except Exception:
return False, f"S9Api Worker {worker_idx} crashed (keine Antwort)"
return False, f"Unerwartete Antwort: {response}"
except Exception as e:
logger.error(f"Fehler bei S9Api Worker {worker_idx}: {e}")
return False, f"Worker-Fehler: {str(e)}"
finally:
# Gebe Worker-Lock frei
self.worker_locks[worker_idx].release()
def measure_ram_usage(self) -> tuple[float, float, list[float]]:
"""
Misst den aktuellen RAM-Verbrauch aller Worker-Prozesse.
Returns:
tuple: (total_mb, average_mb, per_worker_mb_list)
"""
ram_per_worker = []
for i, worker in enumerate(self.workers):
try:
if worker.poll() is None: # Worker läuft noch
process = psutil.Process(worker.pid)
# Hole Speicherinfo (RSS = Resident Set Size in Bytes)
mem_info = process.memory_info()
ram_mb = mem_info.rss / (1024 * 1024) # Konvertiere zu MB
ram_per_worker.append(ram_mb)
else:
logger.warning(f"Worker {i} ist nicht mehr aktiv (kann RAM nicht messen)")
except (psutil.NoSuchProcess, psutil.AccessDenied) as e:
logger.warning(f"Konnte RAM für Worker {i} nicht messen: {e}")
total_ram = sum(ram_per_worker)
average_ram = total_ram / len(ram_per_worker) if ram_per_worker else 0.0
return total_ram, average_ram, ram_per_worker
def capture_ram_before_transform(self):
"""Erfasst RAM-Verbrauch vor der ersten Transformation."""
total, average, per_worker = self.measure_ram_usage()
self.metrics.ram_before_transform_mb_per_worker = per_worker
self.metrics.total_ram_before_mb = total
self.metrics.average_ram_before_mb = average
logger.info(
f"RAM vor Transformation: {self.metrics.total_ram_before_mb:.1f} MB "
f"(Durchschnitt: {self.metrics.average_ram_before_mb:.1f} MB/Worker)"
)
def capture_ram_after_transform(self):
"""Erfasst RAM-Verbrauch nach allen Transformationen."""
total, average, per_worker = self.measure_ram_usage()
self.metrics.ram_after_transform_mb_per_worker = per_worker
self.metrics.total_ram_after_mb = total
self.metrics.average_ram_after_mb = average
logger.info(
f"RAM nach Transformation: {self.metrics.total_ram_after_mb:.1f} MB "
f"(Durchschnitt: {self.metrics.average_ram_after_mb:.1f} MB/Worker)"
)
def shutdown(self):
"""Beendet alle Worker-Prozesse sauber."""
logger.info("Beende Saxon-S9Api-Worker-Pool...")
for i, worker in enumerate(self.workers):
try:
# Sende EXIT-Befehl
if worker.stdin and not worker.stdin.closed:
worker.stdin.write("EXIT\n")
worker.stdin.flush()
# Warte auf Beendigung (max 2 Sekunden)
worker.wait(timeout=2)
logger.debug(f"S9Api Worker {i} beendet")
except subprocess.TimeoutExpired:
# Force kill falls nötig
worker.kill()
logger.warning(f"S9Api Worker {i} musste gekillt werden")
except Exception as e:
logger.error(f"Fehler beim Beenden von S9Api Worker {i}: {e}")
# Lösche temporäres Verzeichnis
if self.temp_dir and self.temp_dir.exists():
try:
import shutil
shutil.rmtree(self.temp_dir)
logger.debug(f"Temporäres Verzeichnis gelöscht: {self.temp_dir}")
except Exception as e:
logger.warning(f"Konnte temporäres Verzeichnis nicht löschen: {e}")
logger.info("Saxon-S9Api-Worker-Pool beendet")
def __enter__(self):
"""Context manager entry."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
self.shutdown()