Feature: s9api-basierte SaxonWorkerPool-Variante für XSLT 2.0/3.0

Die JAXP-basierte SaxonWorkerPool-Implementierung ist nur für XSLT 1.0
vollständig spezifiziert und kann bei XSLT 2.0/3.0 zu fehlerhaften
Ausgaben führen.

Änderungen:
- Neue SaxonWorkerPoolS9Api-Klasse mit Saxon s9api für XSLT 2.0/3.0
- XsltVersion-Enum in conf.py (XSLT_1_0, XSLT_2_0_3_0)
- ComboBox in Performance-Einstellungen zur XSLT-Version-Auswahl
- MainWindow wählt automatisch richtige Worker-Pool-Variante
- Verbesserte Classpath-Behandlung und Fehlerbehandlung

Standard-Einstellung: XSLT 2.0/3.0 (s9api) - empfohlen für moderne Stylesheets
Fallback: XSLT 1.0 (JAXP) - verfügbar für Legacy-Stylesheets

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-05 20:20:00 +01:00
parent 6976d21768
commit cbcae3222f
7 changed files with 605 additions and 22 deletions
+443
View File
@@ -0,0 +1,443 @@
"""
Saxon Worker Pool (s9api) - Persistente JVM-Prozesse für XSLT 2.0/3.0 Transformationen.
Diese Variante verwendet die Saxon s9api API anstatt JAXP und ist für XSLT 2.0 und 3.0 geeignet.
Eliminiert JVM-Startup-Overhead durch Vorinitialisierung von N Worker-Prozessen.
Jeder Worker läuft als Daemon und verarbeitet mehrere Transformationen nacheinander.
"""
import logging
import subprocess
import threading
from pathlib import Path
from queue import Queue
from typing import Optional
import tempfile
logger = logging.getLogger(__name__)
# Java-Worker-Code für s9api (wird zur Laufzeit kompiliert)
SAXON_S9API_WORKER_JAVA = """
import net.sf.saxon.s9api.*;
import javax.xml.transform.stream.StreamSource;
import java.io.*;
public class SaxonS9ApiWorker {
public static void main(String[] args) {
BufferedReader reader = new BufferedReader(new InputStreamReader(System.in));
String line;
// Create Processor once and reuse (equivalent to TransformerFactory)
Processor processor = new Processor(false);
System.err.println("SaxonS9ApiWorker started and ready (using s9api for XSLT 2.0/3.0)");
System.err.flush();
try {
while ((line = reader.readLine()) != null) {
System.err.println("DEBUG: Received line: " + line.substring(0, Math.min(100, line.length())));
System.err.flush();
if ("EXIT".equals(line.trim())) {
System.err.println("SaxonS9ApiWorker exiting");
break;
}
try {
// Parse job
System.err.println("DEBUG: Parsing job...");
System.err.flush();
String[] parts = line.split("\\\\t");
System.err.println("DEBUG: Parts count: " + parts.length);
System.err.flush();
if (parts.length < 3) {
System.out.println("ERROR: Invalid job format");
System.out.flush();
continue;
}
String sourceXml = parts[0];
String xslStylesheet = parts[1];
String outputFo = parts[2];
System.err.println("DEBUG: Compiling stylesheet...");
System.err.flush();
// Compile stylesheet
XsltCompiler compiler = processor.newXsltCompiler();
XsltExecutable executable = compiler.compile(new StreamSource(new File(xslStylesheet)));
System.err.println("DEBUG: Creating transformer...");
System.err.flush();
// Create transformer
XsltTransformer transformer = executable.load();
// Set source
transformer.setSource(new StreamSource(new File(sourceXml)));
// Set destination
Serializer serializer = processor.newSerializer(new File(outputFo));
transformer.setDestination(serializer);
// Set parameters if present
if (parts.length > 3 && !parts[3].isEmpty()) {
String[] params = parts[3].split("\\\\|\\\\|\\\\|");
for (String param : params) {
if (!param.isEmpty() && param.contains("=")) {
String[] kv = param.split("=", 2);
transformer.setParameter(new QName(kv[0]), new XdmAtomicValue(kv[1]));
System.err.println("DEBUG: Set parameter: " + kv[0] + " = " + kv[1]);
}
}
System.err.flush();
}
System.err.println("DEBUG: Running transformation...");
System.err.flush();
// Transform
transformer.transform();
System.err.println("DEBUG: Transformation completed");
System.err.flush();
System.out.println("OK");
System.out.flush();
} catch (SaxonApiException e) {
System.err.println("DEBUG: SaxonApiException: " + e.getClass().getName());
System.err.flush();
e.printStackTrace(System.err);
String errorMsg = e.getMessage();
if (errorMsg == null || errorMsg.isEmpty()) {
errorMsg = e.getClass().getSimpleName();
}
System.out.println("ERROR: " + errorMsg);
System.out.flush();
} catch (Exception e) {
System.err.println("DEBUG: Job processing exception: " + e.getClass().getName());
System.err.flush();
e.printStackTrace(System.err);
System.out.println("ERROR: " + (e.getMessage() != null ? e.getMessage() : e.getClass().getName()));
System.out.flush();
}
}
} catch (IOException e) {
System.err.println("SaxonS9ApiWorker I/O error: " + e.getMessage());
e.printStackTrace(System.err);
}
}
}
"""
class SaxonWorkerPoolS9Api:
"""
Pool von lang-laufenden JVM-Prozessen für Saxon-Transformationen mit s9api.
Diese Variante verwendet die Saxon s9api API anstatt JAXP und unterstützt
vollständig XSLT 2.0 und 3.0 Transformationen.
"""
def __init__(
self,
num_workers: int,
java_vm_path: Path,
saxon_jar_path: Path,
classpath_cache: dict[Path, str],
log_dir: Optional[Path] = None,
):
"""
Initialisiert den Saxon-Worker-Pool mit s9api.
Args:
num_workers: Anzahl der Worker-Prozesse
java_vm_path: Pfad zur Java VM Binary
saxon_jar_path: Pfad zur Saxon JAR-Datei
classpath_cache: Cache für Saxon-Classpaths
log_dir: Optionales Verzeichnis für Worker-Logs (Standard: temp_dir/temp)
"""
self.num_workers = num_workers
self.java_vm_path = java_vm_path
self.saxon_jar_path = saxon_jar_path
self.classpath_cache = classpath_cache
self.log_dir = log_dir
# Worker-Prozesse und Queues
self.workers: list[subprocess.Popen] = []
self.job_queue: Queue = Queue()
self.result_queue: Queue = Queue()
self.worker_locks: list[threading.Lock] = []
# Temporäres Verzeichnis für kompilierte Java-Klasse
self.temp_dir: Optional[Path] = None
self.worker_class_path: Optional[Path] = None
self.worker_log_dir: Optional[Path] = None
# Initialisierung
self._compile_worker_class()
self._start_workers()
logger.info(f"SaxonWorkerPoolS9Api initialisiert mit {num_workers} Workern (XSLT 2.0/3.0)")
def _compile_worker_class(self):
"""Kompiliert die SaxonS9ApiWorker-Java-Klasse."""
try:
# Erstelle temporäres Verzeichnis
self.temp_dir = Path(tempfile.mkdtemp(prefix="saxon_s9api_worker_"))
# Schreibe Java-Quellcode
java_file = self.temp_dir / "SaxonS9ApiWorker.java"
java_file.write_text(SAXON_S9API_WORKER_JAVA, encoding="utf-8")
# Hole Classpath
saxon_dir = self.saxon_jar_path.parent
if saxon_dir in self.classpath_cache:
classpath = self.classpath_cache[saxon_dir]
else:
# Fallback: Baue Classpath neu
import glob
import sys
all_jars = glob.glob(str(saxon_dir / "*.jar"))
lib_dir = saxon_dir / "lib"
if lib_dir.exists():
all_jars.extend(glob.glob(str(lib_dir / "*.jar")))
classpath_separator = ";" if sys.platform == "win32" else ":"
classpath = classpath_separator.join(all_jars)
# Kompiliere Java-Klasse
javac_cmd = [str(self.java_vm_path).replace("java", "javac"), "-cp", classpath, str(java_file)]
logger.debug(f"Kompiliere SaxonS9ApiWorker: {' '.join(javac_cmd)}")
result = subprocess.run(javac_cmd, capture_output=True, text=True, timeout=30)
if result.returncode != 0:
raise RuntimeError(f"Java-Kompilierung fehlgeschlagen: {result.stderr}")
self.worker_class_path = self.temp_dir
logger.info(f"SaxonS9ApiWorker erfolgreich kompiliert: {self.temp_dir}")
except Exception as e:
logger.error(f"Fehler beim Kompilieren von SaxonS9ApiWorker: {e}")
raise
def _start_workers(self):
"""Startet N Worker-Prozesse."""
# Hole Classpath
saxon_dir = self.saxon_jar_path.parent
if saxon_dir in self.classpath_cache:
classpath = self.classpath_cache[saxon_dir]
else:
# Fallback: Baue Classpath neu (sollte nicht nötig sein, aber zur Sicherheit)
import glob
import sys
all_jars = glob.glob(str(saxon_dir / "*.jar"))
lib_dir = saxon_dir / "lib"
if lib_dir.exists():
all_jars.extend(glob.glob(str(lib_dir / "*.jar")))
classpath_separator = ";" if sys.platform == "win32" else ":"
classpath = classpath_separator.join(all_jars)
# Cache für zukünftige Verwendung
self.classpath_cache[saxon_dir] = classpath
logger.debug(f"Classpath für {saxon_dir} neu erstellt und gecacht")
# Füge Worker-Classpath hinzu
import sys
classpath_separator = ";" if sys.platform == "win32" else ":"
full_classpath = str(self.worker_class_path) + classpath_separator + classpath
logger.debug(f"S9Api Worker Classpath: {full_classpath[:200]}...")
# Bestimme Log-Verzeichnis
self.worker_log_dir = self.log_dir if self.log_dir else self.temp_dir
if self.log_dir:
self.worker_log_dir.mkdir(parents=True, exist_ok=True)
for i in range(self.num_workers):
try:
# Starte JVM-Prozess mit SaxonS9ApiWorker
cmd = [str(self.java_vm_path), "-cp", full_classpath, "SaxonS9ApiWorker"]
# Öffne stderr-Log-Datei für diesen Worker
stderr_log = self.worker_log_dir / f"s9api_worker_{i}_stderr.log"
stderr_file = open(stderr_log, "w", encoding="utf-8")
process = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=stderr_file, # Redirect stderr to file
text=True,
bufsize=1, # Line buffered
)
self.workers.append(process)
self.worker_locks.append(threading.Lock())
logger.debug(f"S9Api Worker {i} gestartet (PID: {process.pid}, stderr: {stderr_log})")
# Warte kurz damit Worker initialisieren kann
import time
time.sleep(0.1)
# Prüfe ob Worker noch läuft
if process.poll() is not None:
# Worker ist bereits beendet - Fehler!
stderr_file.close()
with open(stderr_log, "r") as f:
stderr_content = f.read()
raise RuntimeError(
f"S9Api Worker {i} ist sofort beendet (Exit Code: {process.returncode})\nstderr:\n{stderr_content}"
)
except Exception as e:
logger.error(f"Fehler beim Starten von S9Api Worker {i}: {e}")
raise
logger.info(f"{len(self.workers)} Saxon-S9Api-Worker erfolgreich gestartet")
def transform(
self, source_xml: Path, xsl_stylesheet: Path, output_fo: Path, xslt_params: dict[str, str]
) -> tuple[bool, str]:
"""
Führt eine XSLT-Transformation mit einem Worker aus dem Pool aus.
Args:
source_xml: Pfad zur XML-Eingabedatei
xsl_stylesheet: Pfad zur XSL-Stylesheet-Datei
output_fo: Pfad zur FO-Ausgabedatei
xslt_params: Dictionary mit XSLT-Parametern
Returns:
tuple[bool, str]: (Erfolg, Fehlermeldung/Info)
"""
# Finde freien Worker
worker_idx = None
for i, lock in enumerate(self.worker_locks):
if lock.acquire(blocking=False):
worker_idx = i
break
if worker_idx is None:
# Kein freier Worker, warte auf ersten verfügbaren
for i, lock in enumerate(self.worker_locks):
lock.acquire()
worker_idx = i
break
try:
worker = self.workers[worker_idx]
# Prüfe ob Worker noch läuft
if worker.poll() is not None:
# Worker ist tot!
stderr_log = self.worker_log_dir / f"s9api_worker_{worker_idx}_stderr.log"
try:
with open(stderr_log, "r") as f:
stderr_content = f.read()
error_msg = (
f"S9Api Worker {worker_idx} ist beendet (Exit: {worker.returncode})\nstderr:\n{stderr_content}"
)
except Exception:
error_msg = f"S9Api Worker {worker_idx} ist beendet (Exit: {worker.returncode})"
logger.error(error_msg)
return False, error_msg
# Formatiere Parameter
params_str = "|||".join([f"{key}={value}" for key, value in xslt_params.items()])
# Erstelle Job-String (Tab-separated)
job = f"{source_xml}\t{xsl_stylesheet}\t{output_fo}\t{params_str}\n"
logger.debug(f"Sende Job an S9Api Worker {worker_idx}: {source_xml.name}")
# Sende Job an Worker
worker.stdin.write(job)
worker.stdin.flush()
# Warte auf Antwort
response = worker.stdout.readline().strip()
logger.debug(f"S9Api Worker {worker_idx} Antwort: '{response}'")
if response == "OK":
return True, "Erfolgreich"
elif response.startswith("ERROR:"):
error_msg = response[6:].strip()
return False, f"Saxon-Fehler (s9api): {error_msg}"
else:
# Leere Antwort bedeutet Worker ist crashed
if not response:
stderr_log = self.worker_log_dir / f"s9api_worker_{worker_idx}_stderr.log"
try:
with open(stderr_log, "r") as f:
stderr_content = f.read()[-500:] # Letzte 500 Zeichen
return False, f"S9Api Worker {worker_idx} crashed (keine Antwort)\nstderr:\n{stderr_content}"
except Exception:
return False, f"S9Api Worker {worker_idx} crashed (keine Antwort)"
return False, f"Unerwartete Antwort: {response}"
except Exception as e:
logger.error(f"Fehler bei S9Api Worker {worker_idx}: {e}")
return False, f"Worker-Fehler: {str(e)}"
finally:
# Gebe Worker-Lock frei
self.worker_locks[worker_idx].release()
def shutdown(self):
"""Beendet alle Worker-Prozesse sauber."""
logger.info("Beende Saxon-S9Api-Worker-Pool...")
for i, worker in enumerate(self.workers):
try:
# Sende EXIT-Befehl
if worker.stdin and not worker.stdin.closed:
worker.stdin.write("EXIT\n")
worker.stdin.flush()
# Warte auf Beendigung (max 2 Sekunden)
worker.wait(timeout=2)
logger.debug(f"S9Api Worker {i} beendet")
except subprocess.TimeoutExpired:
# Force kill falls nötig
worker.kill()
logger.warning(f"S9Api Worker {i} musste gekillt werden")
except Exception as e:
logger.error(f"Fehler beim Beenden von S9Api Worker {i}: {e}")
# Lösche temporäres Verzeichnis
if self.temp_dir and self.temp_dir.exists():
try:
import shutil
shutil.rmtree(self.temp_dir)
logger.debug(f"Temporäres Verzeichnis gelöscht: {self.temp_dir}")
except Exception as e:
logger.warning(f"Konnte temporäres Verzeichnis nicht löschen: {e}")
logger.info("Saxon-S9Api-Worker-Pool beendet")
def __enter__(self):
"""Context manager entry."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
self.shutdown()