Feature: s9api-basierte SaxonWorkerPool-Variante für XSLT 2.0/3.0
Die JAXP-basierte SaxonWorkerPool-Implementierung ist nur für XSLT 1.0 vollständig spezifiziert und kann bei XSLT 2.0/3.0 zu fehlerhaften Ausgaben führen. Änderungen: - Neue SaxonWorkerPoolS9Api-Klasse mit Saxon s9api für XSLT 2.0/3.0 - XsltVersion-Enum in conf.py (XSLT_1_0, XSLT_2_0_3_0) - ComboBox in Performance-Einstellungen zur XSLT-Version-Auswahl - MainWindow wählt automatisch richtige Worker-Pool-Variante - Verbesserte Classpath-Behandlung und Fehlerbehandlung Standard-Einstellung: XSLT 2.0/3.0 (s9api) - empfohlen für moderne Stylesheets Fallback: XSLT 1.0 (JAXP) - verfügbar für Legacy-Stylesheets 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,443 @@
|
||||
"""
|
||||
Saxon Worker Pool (s9api) - Persistente JVM-Prozesse für XSLT 2.0/3.0 Transformationen.
|
||||
|
||||
Diese Variante verwendet die Saxon s9api API anstatt JAXP und ist für XSLT 2.0 und 3.0 geeignet.
|
||||
Eliminiert JVM-Startup-Overhead durch Vorinitialisierung von N Worker-Prozessen.
|
||||
Jeder Worker läuft als Daemon und verarbeitet mehrere Transformationen nacheinander.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import subprocess
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from queue import Queue
|
||||
from typing import Optional
|
||||
import tempfile
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Java-Worker-Code für s9api (wird zur Laufzeit kompiliert)
|
||||
SAXON_S9API_WORKER_JAVA = """
|
||||
import net.sf.saxon.s9api.*;
|
||||
import javax.xml.transform.stream.StreamSource;
|
||||
import java.io.*;
|
||||
|
||||
public class SaxonS9ApiWorker {
|
||||
public static void main(String[] args) {
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(System.in));
|
||||
String line;
|
||||
|
||||
// Create Processor once and reuse (equivalent to TransformerFactory)
|
||||
Processor processor = new Processor(false);
|
||||
|
||||
System.err.println("SaxonS9ApiWorker started and ready (using s9api for XSLT 2.0/3.0)");
|
||||
System.err.flush();
|
||||
|
||||
try {
|
||||
while ((line = reader.readLine()) != null) {
|
||||
System.err.println("DEBUG: Received line: " + line.substring(0, Math.min(100, line.length())));
|
||||
System.err.flush();
|
||||
|
||||
if ("EXIT".equals(line.trim())) {
|
||||
System.err.println("SaxonS9ApiWorker exiting");
|
||||
break;
|
||||
}
|
||||
|
||||
try {
|
||||
// Parse job
|
||||
System.err.println("DEBUG: Parsing job...");
|
||||
System.err.flush();
|
||||
|
||||
String[] parts = line.split("\\\\t");
|
||||
System.err.println("DEBUG: Parts count: " + parts.length);
|
||||
System.err.flush();
|
||||
|
||||
if (parts.length < 3) {
|
||||
System.out.println("ERROR: Invalid job format");
|
||||
System.out.flush();
|
||||
continue;
|
||||
}
|
||||
|
||||
String sourceXml = parts[0];
|
||||
String xslStylesheet = parts[1];
|
||||
String outputFo = parts[2];
|
||||
|
||||
System.err.println("DEBUG: Compiling stylesheet...");
|
||||
System.err.flush();
|
||||
|
||||
// Compile stylesheet
|
||||
XsltCompiler compiler = processor.newXsltCompiler();
|
||||
XsltExecutable executable = compiler.compile(new StreamSource(new File(xslStylesheet)));
|
||||
|
||||
System.err.println("DEBUG: Creating transformer...");
|
||||
System.err.flush();
|
||||
|
||||
// Create transformer
|
||||
XsltTransformer transformer = executable.load();
|
||||
|
||||
// Set source
|
||||
transformer.setSource(new StreamSource(new File(sourceXml)));
|
||||
|
||||
// Set destination
|
||||
Serializer serializer = processor.newSerializer(new File(outputFo));
|
||||
transformer.setDestination(serializer);
|
||||
|
||||
// Set parameters if present
|
||||
if (parts.length > 3 && !parts[3].isEmpty()) {
|
||||
String[] params = parts[3].split("\\\\|\\\\|\\\\|");
|
||||
for (String param : params) {
|
||||
if (!param.isEmpty() && param.contains("=")) {
|
||||
String[] kv = param.split("=", 2);
|
||||
transformer.setParameter(new QName(kv[0]), new XdmAtomicValue(kv[1]));
|
||||
System.err.println("DEBUG: Set parameter: " + kv[0] + " = " + kv[1]);
|
||||
}
|
||||
}
|
||||
System.err.flush();
|
||||
}
|
||||
|
||||
System.err.println("DEBUG: Running transformation...");
|
||||
System.err.flush();
|
||||
|
||||
// Transform
|
||||
transformer.transform();
|
||||
|
||||
System.err.println("DEBUG: Transformation completed");
|
||||
System.err.flush();
|
||||
|
||||
System.out.println("OK");
|
||||
System.out.flush();
|
||||
|
||||
} catch (SaxonApiException e) {
|
||||
System.err.println("DEBUG: SaxonApiException: " + e.getClass().getName());
|
||||
System.err.flush();
|
||||
e.printStackTrace(System.err);
|
||||
|
||||
String errorMsg = e.getMessage();
|
||||
if (errorMsg == null || errorMsg.isEmpty()) {
|
||||
errorMsg = e.getClass().getSimpleName();
|
||||
}
|
||||
System.out.println("ERROR: " + errorMsg);
|
||||
System.out.flush();
|
||||
|
||||
} catch (Exception e) {
|
||||
System.err.println("DEBUG: Job processing exception: " + e.getClass().getName());
|
||||
System.err.flush();
|
||||
e.printStackTrace(System.err);
|
||||
System.out.println("ERROR: " + (e.getMessage() != null ? e.getMessage() : e.getClass().getName()));
|
||||
System.out.flush();
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.err.println("SaxonS9ApiWorker I/O error: " + e.getMessage());
|
||||
e.printStackTrace(System.err);
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
class SaxonWorkerPoolS9Api:
|
||||
"""
|
||||
Pool von lang-laufenden JVM-Prozessen für Saxon-Transformationen mit s9api.
|
||||
|
||||
Diese Variante verwendet die Saxon s9api API anstatt JAXP und unterstützt
|
||||
vollständig XSLT 2.0 und 3.0 Transformationen.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
num_workers: int,
|
||||
java_vm_path: Path,
|
||||
saxon_jar_path: Path,
|
||||
classpath_cache: dict[Path, str],
|
||||
log_dir: Optional[Path] = None,
|
||||
):
|
||||
"""
|
||||
Initialisiert den Saxon-Worker-Pool mit s9api.
|
||||
|
||||
Args:
|
||||
num_workers: Anzahl der Worker-Prozesse
|
||||
java_vm_path: Pfad zur Java VM Binary
|
||||
saxon_jar_path: Pfad zur Saxon JAR-Datei
|
||||
classpath_cache: Cache für Saxon-Classpaths
|
||||
log_dir: Optionales Verzeichnis für Worker-Logs (Standard: temp_dir/temp)
|
||||
"""
|
||||
self.num_workers = num_workers
|
||||
self.java_vm_path = java_vm_path
|
||||
self.saxon_jar_path = saxon_jar_path
|
||||
self.classpath_cache = classpath_cache
|
||||
self.log_dir = log_dir
|
||||
|
||||
# Worker-Prozesse und Queues
|
||||
self.workers: list[subprocess.Popen] = []
|
||||
self.job_queue: Queue = Queue()
|
||||
self.result_queue: Queue = Queue()
|
||||
self.worker_locks: list[threading.Lock] = []
|
||||
|
||||
# Temporäres Verzeichnis für kompilierte Java-Klasse
|
||||
self.temp_dir: Optional[Path] = None
|
||||
self.worker_class_path: Optional[Path] = None
|
||||
self.worker_log_dir: Optional[Path] = None
|
||||
|
||||
# Initialisierung
|
||||
self._compile_worker_class()
|
||||
self._start_workers()
|
||||
|
||||
logger.info(f"SaxonWorkerPoolS9Api initialisiert mit {num_workers} Workern (XSLT 2.0/3.0)")
|
||||
|
||||
def _compile_worker_class(self):
|
||||
"""Kompiliert die SaxonS9ApiWorker-Java-Klasse."""
|
||||
try:
|
||||
# Erstelle temporäres Verzeichnis
|
||||
self.temp_dir = Path(tempfile.mkdtemp(prefix="saxon_s9api_worker_"))
|
||||
|
||||
# Schreibe Java-Quellcode
|
||||
java_file = self.temp_dir / "SaxonS9ApiWorker.java"
|
||||
java_file.write_text(SAXON_S9API_WORKER_JAVA, encoding="utf-8")
|
||||
|
||||
# Hole Classpath
|
||||
saxon_dir = self.saxon_jar_path.parent
|
||||
if saxon_dir in self.classpath_cache:
|
||||
classpath = self.classpath_cache[saxon_dir]
|
||||
else:
|
||||
# Fallback: Baue Classpath neu
|
||||
import glob
|
||||
import sys
|
||||
|
||||
all_jars = glob.glob(str(saxon_dir / "*.jar"))
|
||||
lib_dir = saxon_dir / "lib"
|
||||
if lib_dir.exists():
|
||||
all_jars.extend(glob.glob(str(lib_dir / "*.jar")))
|
||||
|
||||
classpath_separator = ";" if sys.platform == "win32" else ":"
|
||||
classpath = classpath_separator.join(all_jars)
|
||||
|
||||
# Kompiliere Java-Klasse
|
||||
javac_cmd = [str(self.java_vm_path).replace("java", "javac"), "-cp", classpath, str(java_file)]
|
||||
|
||||
logger.debug(f"Kompiliere SaxonS9ApiWorker: {' '.join(javac_cmd)}")
|
||||
|
||||
result = subprocess.run(javac_cmd, capture_output=True, text=True, timeout=30)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"Java-Kompilierung fehlgeschlagen: {result.stderr}")
|
||||
|
||||
self.worker_class_path = self.temp_dir
|
||||
|
||||
logger.info(f"SaxonS9ApiWorker erfolgreich kompiliert: {self.temp_dir}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Kompilieren von SaxonS9ApiWorker: {e}")
|
||||
raise
|
||||
|
||||
def _start_workers(self):
|
||||
"""Startet N Worker-Prozesse."""
|
||||
# Hole Classpath
|
||||
saxon_dir = self.saxon_jar_path.parent
|
||||
if saxon_dir in self.classpath_cache:
|
||||
classpath = self.classpath_cache[saxon_dir]
|
||||
else:
|
||||
# Fallback: Baue Classpath neu (sollte nicht nötig sein, aber zur Sicherheit)
|
||||
import glob
|
||||
import sys
|
||||
|
||||
all_jars = glob.glob(str(saxon_dir / "*.jar"))
|
||||
lib_dir = saxon_dir / "lib"
|
||||
if lib_dir.exists():
|
||||
all_jars.extend(glob.glob(str(lib_dir / "*.jar")))
|
||||
|
||||
classpath_separator = ";" if sys.platform == "win32" else ":"
|
||||
classpath = classpath_separator.join(all_jars)
|
||||
|
||||
# Cache für zukünftige Verwendung
|
||||
self.classpath_cache[saxon_dir] = classpath
|
||||
logger.debug(f"Classpath für {saxon_dir} neu erstellt und gecacht")
|
||||
|
||||
# Füge Worker-Classpath hinzu
|
||||
import sys
|
||||
|
||||
classpath_separator = ";" if sys.platform == "win32" else ":"
|
||||
full_classpath = str(self.worker_class_path) + classpath_separator + classpath
|
||||
|
||||
logger.debug(f"S9Api Worker Classpath: {full_classpath[:200]}...")
|
||||
|
||||
# Bestimme Log-Verzeichnis
|
||||
self.worker_log_dir = self.log_dir if self.log_dir else self.temp_dir
|
||||
if self.log_dir:
|
||||
self.worker_log_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for i in range(self.num_workers):
|
||||
try:
|
||||
# Starte JVM-Prozess mit SaxonS9ApiWorker
|
||||
cmd = [str(self.java_vm_path), "-cp", full_classpath, "SaxonS9ApiWorker"]
|
||||
|
||||
# Öffne stderr-Log-Datei für diesen Worker
|
||||
stderr_log = self.worker_log_dir / f"s9api_worker_{i}_stderr.log"
|
||||
stderr_file = open(stderr_log, "w", encoding="utf-8")
|
||||
|
||||
process = subprocess.Popen(
|
||||
cmd,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=stderr_file, # Redirect stderr to file
|
||||
text=True,
|
||||
bufsize=1, # Line buffered
|
||||
)
|
||||
|
||||
self.workers.append(process)
|
||||
self.worker_locks.append(threading.Lock())
|
||||
|
||||
logger.debug(f"S9Api Worker {i} gestartet (PID: {process.pid}, stderr: {stderr_log})")
|
||||
|
||||
# Warte kurz damit Worker initialisieren kann
|
||||
import time
|
||||
|
||||
time.sleep(0.1)
|
||||
|
||||
# Prüfe ob Worker noch läuft
|
||||
if process.poll() is not None:
|
||||
# Worker ist bereits beendet - Fehler!
|
||||
stderr_file.close()
|
||||
with open(stderr_log, "r") as f:
|
||||
stderr_content = f.read()
|
||||
raise RuntimeError(
|
||||
f"S9Api Worker {i} ist sofort beendet (Exit Code: {process.returncode})\nstderr:\n{stderr_content}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Starten von S9Api Worker {i}: {e}")
|
||||
raise
|
||||
|
||||
logger.info(f"{len(self.workers)} Saxon-S9Api-Worker erfolgreich gestartet")
|
||||
|
||||
def transform(
|
||||
self, source_xml: Path, xsl_stylesheet: Path, output_fo: Path, xslt_params: dict[str, str]
|
||||
) -> tuple[bool, str]:
|
||||
"""
|
||||
Führt eine XSLT-Transformation mit einem Worker aus dem Pool aus.
|
||||
|
||||
Args:
|
||||
source_xml: Pfad zur XML-Eingabedatei
|
||||
xsl_stylesheet: Pfad zur XSL-Stylesheet-Datei
|
||||
output_fo: Pfad zur FO-Ausgabedatei
|
||||
xslt_params: Dictionary mit XSLT-Parametern
|
||||
|
||||
Returns:
|
||||
tuple[bool, str]: (Erfolg, Fehlermeldung/Info)
|
||||
"""
|
||||
# Finde freien Worker
|
||||
worker_idx = None
|
||||
for i, lock in enumerate(self.worker_locks):
|
||||
if lock.acquire(blocking=False):
|
||||
worker_idx = i
|
||||
break
|
||||
|
||||
if worker_idx is None:
|
||||
# Kein freier Worker, warte auf ersten verfügbaren
|
||||
for i, lock in enumerate(self.worker_locks):
|
||||
lock.acquire()
|
||||
worker_idx = i
|
||||
break
|
||||
|
||||
try:
|
||||
worker = self.workers[worker_idx]
|
||||
|
||||
# Prüfe ob Worker noch läuft
|
||||
if worker.poll() is not None:
|
||||
# Worker ist tot!
|
||||
stderr_log = self.worker_log_dir / f"s9api_worker_{worker_idx}_stderr.log"
|
||||
try:
|
||||
with open(stderr_log, "r") as f:
|
||||
stderr_content = f.read()
|
||||
error_msg = (
|
||||
f"S9Api Worker {worker_idx} ist beendet (Exit: {worker.returncode})\nstderr:\n{stderr_content}"
|
||||
)
|
||||
except Exception:
|
||||
error_msg = f"S9Api Worker {worker_idx} ist beendet (Exit: {worker.returncode})"
|
||||
logger.error(error_msg)
|
||||
return False, error_msg
|
||||
|
||||
# Formatiere Parameter
|
||||
params_str = "|||".join([f"{key}={value}" for key, value in xslt_params.items()])
|
||||
|
||||
# Erstelle Job-String (Tab-separated)
|
||||
job = f"{source_xml}\t{xsl_stylesheet}\t{output_fo}\t{params_str}\n"
|
||||
|
||||
logger.debug(f"Sende Job an S9Api Worker {worker_idx}: {source_xml.name}")
|
||||
|
||||
# Sende Job an Worker
|
||||
worker.stdin.write(job)
|
||||
worker.stdin.flush()
|
||||
|
||||
# Warte auf Antwort
|
||||
response = worker.stdout.readline().strip()
|
||||
|
||||
logger.debug(f"S9Api Worker {worker_idx} Antwort: '{response}'")
|
||||
|
||||
if response == "OK":
|
||||
return True, "Erfolgreich"
|
||||
elif response.startswith("ERROR:"):
|
||||
error_msg = response[6:].strip()
|
||||
return False, f"Saxon-Fehler (s9api): {error_msg}"
|
||||
else:
|
||||
# Leere Antwort bedeutet Worker ist crashed
|
||||
if not response:
|
||||
stderr_log = self.worker_log_dir / f"s9api_worker_{worker_idx}_stderr.log"
|
||||
try:
|
||||
with open(stderr_log, "r") as f:
|
||||
stderr_content = f.read()[-500:] # Letzte 500 Zeichen
|
||||
return False, f"S9Api Worker {worker_idx} crashed (keine Antwort)\nstderr:\n{stderr_content}"
|
||||
except Exception:
|
||||
return False, f"S9Api Worker {worker_idx} crashed (keine Antwort)"
|
||||
return False, f"Unerwartete Antwort: {response}"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei S9Api Worker {worker_idx}: {e}")
|
||||
return False, f"Worker-Fehler: {str(e)}"
|
||||
|
||||
finally:
|
||||
# Gebe Worker-Lock frei
|
||||
self.worker_locks[worker_idx].release()
|
||||
|
||||
def shutdown(self):
|
||||
"""Beendet alle Worker-Prozesse sauber."""
|
||||
logger.info("Beende Saxon-S9Api-Worker-Pool...")
|
||||
|
||||
for i, worker in enumerate(self.workers):
|
||||
try:
|
||||
# Sende EXIT-Befehl
|
||||
if worker.stdin and not worker.stdin.closed:
|
||||
worker.stdin.write("EXIT\n")
|
||||
worker.stdin.flush()
|
||||
|
||||
# Warte auf Beendigung (max 2 Sekunden)
|
||||
worker.wait(timeout=2)
|
||||
logger.debug(f"S9Api Worker {i} beendet")
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
# Force kill falls nötig
|
||||
worker.kill()
|
||||
logger.warning(f"S9Api Worker {i} musste gekillt werden")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Beenden von S9Api Worker {i}: {e}")
|
||||
|
||||
# Lösche temporäres Verzeichnis
|
||||
if self.temp_dir and self.temp_dir.exists():
|
||||
try:
|
||||
import shutil
|
||||
|
||||
shutil.rmtree(self.temp_dir)
|
||||
logger.debug(f"Temporäres Verzeichnis gelöscht: {self.temp_dir}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Konnte temporäres Verzeichnis nicht löschen: {e}")
|
||||
|
||||
logger.info("Saxon-S9Api-Worker-Pool beendet")
|
||||
|
||||
def __enter__(self):
|
||||
"""Context manager entry."""
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Context manager exit."""
|
||||
self.shutdown()
|
||||
Reference in New Issue
Block a user