XSL-Transformations-Engine mit Saxon, Apache FOP und diff-pdf implementiert
Fügt die komplette Transformations-Pipeline hinzu: - Saxon XSLT-Transformation (XML → FO) mit vollständigem Classpath-Support - Apache FOP PDF-Generierung (FO → PDF) mit plattformübergreifender Unterstützung - Automatische diff-pdf Vergleichs- und Diff-Generierung - Valide-PDF-Verwaltung (Referenz-PDFs beim ersten erfolgreichen Build) - Up-to-Date-Prüfung basierend auf Datei-Zeitstempeln - Asynchrone Ausführung via TransformationThread (QThread) - Kontextmenü-Integration für XML- und XSL-Dateien - Detailliertes Fehler-Reporting und Fortschritts-Feedback Neue Dateien: - src/transform.py: TransformationJob-Klasse mit vollständiger Pipeline Erweiterte Dateien: - src/ui/MainWindow.py: TransformationThread und Transformations-Methoden Technische Details: - Löst Saxon ClassNotFoundException durch Verwendung aller JARs im Saxon-Verzeichnis - Verwendet -cp statt -jar für vollständigen Classpath-Zugriff - Automatisches Cleanup temporärer FO-Dateien - Thread-sicheres Shutdown-Handling 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,420 @@
|
||||
"""
|
||||
Transformations-Engine für XSL-FO PDF-Generierung.
|
||||
|
||||
Dieses Modul implementiert die Transformations-Pipeline:
|
||||
1. XML → FO (Saxon XSLT Transformation)
|
||||
2. FO → PDF (Apache FOP)
|
||||
3. PDF-Vergleich (diff-pdf)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TransformationJob:
|
||||
"""
|
||||
Repräsentiert einen einzelnen Transformations-Job.
|
||||
|
||||
Ähnlich zur TestFall-Klasse in validate-xls.py, aber für DocuMentor angepasst.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
project_dir: Path,
|
||||
xml_file: Path,
|
||||
xsl_file: Path,
|
||||
xslt_params: dict[str, str],
|
||||
java_vm_path: Path,
|
||||
saxon_jar_path: Path,
|
||||
apache_fop_dir: Path,
|
||||
diff_pdf_path: Path,
|
||||
diff_pdf_params: list[str]
|
||||
):
|
||||
"""
|
||||
Initialisiert einen Transformations-Job.
|
||||
|
||||
Args:
|
||||
project_dir: Pfad zum Projekt-Verzeichnis
|
||||
xml_file: Relative Pfad zur XML-Eingabedatei (relativ zu project_dir)
|
||||
xsl_file: Absolute Pfad zur XSL-Stylesheet-Datei
|
||||
xslt_params: Dictionary mit XSLT-Parametern
|
||||
java_vm_path: Pfad zur Java VM Binary
|
||||
saxon_jar_path: Pfad zur Saxon JAR-Datei
|
||||
apache_fop_dir: Pfad zum Apache FOP-Verzeichnis
|
||||
diff_pdf_path: Pfad zur diff-pdf Binary
|
||||
diff_pdf_params: Standard-Parameter für diff-pdf
|
||||
"""
|
||||
self.project_dir = project_dir
|
||||
self.xml_file = xml_file # Relativ
|
||||
self.xsl_file = xsl_file # Absolut
|
||||
self.xslt_params = xslt_params
|
||||
|
||||
# Tool-Pfade
|
||||
self.java_vm_path = java_vm_path
|
||||
self.saxon_jar_path = saxon_jar_path
|
||||
self.apache_fop_dir = apache_fop_dir
|
||||
self.diff_pdf_path = diff_pdf_path
|
||||
self.diff_pdf_params = diff_pdf_params
|
||||
|
||||
# Ausgabe-Verzeichnisse im Projektordner
|
||||
self.output_dir = project_dir / "output"
|
||||
self.valide_dir = project_dir / "valide"
|
||||
self.diff_dir = project_dir / "diff"
|
||||
|
||||
# Stelle sicher, dass Ausgabe-Verzeichnisse existieren
|
||||
self.output_dir.mkdir(exist_ok=True)
|
||||
self.valide_dir.mkdir(exist_ok=True)
|
||||
self.diff_dir.mkdir(exist_ok=True)
|
||||
|
||||
# Dateinamen basierend auf XML-Datei
|
||||
base_name = self.xml_file.stem
|
||||
self.temp_fo = self.output_dir / f"{base_name}.fo"
|
||||
self.output_pdf = self.output_dir / f"{base_name}.pdf"
|
||||
self.valide_pdf = self.valide_dir / f"{base_name}.pdf"
|
||||
self.diff_pdf = self.diff_dir / f"{base_name}.pdf"
|
||||
|
||||
# Apache FOP Binaries (plattformabhängig)
|
||||
import sys
|
||||
if sys.platform == "win32":
|
||||
self.fop_cmd = self.apache_fop_dir / "fop.cmd"
|
||||
else:
|
||||
self.fop_cmd = self.apache_fop_dir / "fop"
|
||||
|
||||
self.fop_conf = self.apache_fop_dir / "conf" / "fop.xconf"
|
||||
|
||||
def is_up_to_date(self) -> bool:
|
||||
"""
|
||||
Prüft, ob die Transformation aktuell ist.
|
||||
|
||||
Returns:
|
||||
bool: True wenn Output-PDF existiert und aktueller ist als alle Inputs
|
||||
"""
|
||||
if not self.output_pdf.exists():
|
||||
logger.debug(f"Output-PDF existiert nicht: {self.output_pdf}")
|
||||
return False
|
||||
|
||||
output_mtime = self.output_pdf.stat().st_mtime
|
||||
|
||||
# Prüfe XML-Datei
|
||||
xml_abs = self.project_dir / self.xml_file
|
||||
if xml_abs.exists() and xml_abs.stat().st_mtime > output_mtime:
|
||||
logger.debug(f"XML-Datei ist neuer: {xml_abs}")
|
||||
return False
|
||||
|
||||
# Prüfe XSL-Datei
|
||||
if self.xsl_file.exists() and self.xsl_file.stat().st_mtime > output_mtime:
|
||||
logger.debug(f"XSL-Datei ist neuer: {self.xsl_file}")
|
||||
return False
|
||||
|
||||
logger.debug(f"Transformation ist aktuell: {self.output_pdf}")
|
||||
return True
|
||||
|
||||
def transform_saxon(self, force: bool = False) -> tuple[bool, str]:
|
||||
"""
|
||||
Führt XSLT-Transformation mit Saxon aus: XML → FO.
|
||||
|
||||
Args:
|
||||
force: Wenn True, wird Transformation auch bei aktuellem Output durchgeführt
|
||||
|
||||
Returns:
|
||||
tuple[bool, str]: (Erfolg, Fehlermeldung/Info)
|
||||
"""
|
||||
if not force and self.is_up_to_date():
|
||||
logger.info(f"Transformation übersprungen (aktuell): {self.xml_file.name}")
|
||||
return True, "Übersprungen (aktuell)"
|
||||
|
||||
xml_abs = self.project_dir / self.xml_file
|
||||
|
||||
# Prüfe ob Eingabedateien existieren
|
||||
if not xml_abs.exists():
|
||||
error_msg = f"XML-Datei nicht gefunden: {xml_abs}"
|
||||
logger.error(error_msg)
|
||||
return False, error_msg
|
||||
|
||||
if not self.xsl_file.exists():
|
||||
error_msg = f"XSL-Datei nicht gefunden: {self.xsl_file}"
|
||||
logger.error(error_msg)
|
||||
return False, error_msg
|
||||
|
||||
# XSLT-Parameter formatieren
|
||||
params = [f"{key}={value}" for key, value in self.xslt_params.items()]
|
||||
|
||||
# Sammle alle JAR-Dateien im Saxon-Verzeichnis für den Classpath
|
||||
import glob
|
||||
saxon_dir = self.saxon_jar_path.parent
|
||||
all_jars = glob.glob(str(saxon_dir / "*.jar"))
|
||||
|
||||
# Verwende alle JARs im Classpath (getrennt durch : auf Linux/Mac, ; auf Windows)
|
||||
import sys
|
||||
classpath_separator = ";" if sys.platform == "win32" else ":"
|
||||
classpath = classpath_separator.join(all_jars)
|
||||
|
||||
# Saxon-Kommandozeile
|
||||
# Verwende -cp mit allen JARs und rufe Transform-Main direkt auf
|
||||
cmd_line = [
|
||||
str(self.java_vm_path),
|
||||
"-cp",
|
||||
classpath,
|
||||
"net.sf.saxon.Transform",
|
||||
f"-s:{xml_abs}",
|
||||
f"-xsl:{self.xsl_file}",
|
||||
f"-o:{self.temp_fo}",
|
||||
*params,
|
||||
]
|
||||
|
||||
logger.info(f"Starte Saxon-Transformation: {self.xml_file.name}")
|
||||
logger.debug(f"Kommandozeile: {' '.join(cmd_line)}")
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd_line,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120 # 2 Minuten Timeout
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
logger.info(f"Saxon-Transformation erfolgreich: {self.xml_file.name}")
|
||||
return True, "Erfolgreich"
|
||||
else:
|
||||
error_msg = f"Saxon-Fehler (Exit {result.returncode}):\nStdOut: {result.stdout}\nStdErr: {result.stderr}"
|
||||
logger.error(error_msg)
|
||||
return False, error_msg
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
error_msg = "Saxon-Transformation Timeout (>120s)"
|
||||
logger.error(error_msg)
|
||||
return False, error_msg
|
||||
except Exception as e:
|
||||
error_msg = f"Unerwarteter Fehler bei Saxon-Transformation: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
return False, error_msg
|
||||
|
||||
def build_pdf(self, force: bool = False) -> tuple[bool, str]:
|
||||
"""
|
||||
Generiert PDF aus FO-Datei mit Apache FOP: FO → PDF.
|
||||
|
||||
Args:
|
||||
force: Wenn True, wird Build auch bei aktuellem Output durchgeführt
|
||||
|
||||
Returns:
|
||||
tuple[bool, str]: (Erfolg, Fehlermeldung/Info)
|
||||
"""
|
||||
if not force and self.is_up_to_date():
|
||||
logger.info(f"PDF-Build übersprungen (aktuell): {self.xml_file.name}")
|
||||
return True, "Übersprungen (aktuell)"
|
||||
|
||||
# Prüfe ob FO-Datei existiert
|
||||
if not self.temp_fo.exists():
|
||||
error_msg = f"FO-Datei nicht gefunden: {self.temp_fo}"
|
||||
logger.error(error_msg)
|
||||
return False, error_msg
|
||||
|
||||
# Apache FOP Kommandozeile
|
||||
cmd_line = [
|
||||
str(self.fop_cmd),
|
||||
"-c", str(self.fop_conf) if self.fop_conf.exists() else "",
|
||||
"-r",
|
||||
"-fo", str(self.temp_fo),
|
||||
"-pdf", str(self.output_pdf),
|
||||
]
|
||||
|
||||
# Entferne leere Config-Parameter wenn fop.xconf nicht existiert
|
||||
if not self.fop_conf.exists():
|
||||
cmd_line = [c for c in cmd_line if c not in ["-c", ""]]
|
||||
|
||||
logger.info(f"Starte Apache FOP PDF-Generierung: {self.xml_file.name}")
|
||||
logger.debug(f"Kommandozeile: {' '.join(cmd_line)}")
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd_line,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=180 # 3 Minuten Timeout
|
||||
)
|
||||
|
||||
# Temporäre FO-Datei löschen
|
||||
if self.temp_fo.exists():
|
||||
try:
|
||||
self.temp_fo.unlink()
|
||||
logger.debug(f"Temporäre FO-Datei gelöscht: {self.temp_fo}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Konnte FO-Datei nicht löschen: {e}")
|
||||
|
||||
if result.returncode == 0:
|
||||
# Wenn kein Valide-PDF existiert, erstelle es
|
||||
if not self.valide_pdf.exists():
|
||||
try:
|
||||
import shutil
|
||||
shutil.copy2(self.output_pdf, self.valide_pdf)
|
||||
logger.info(f"Valide-PDF erstellt: {self.valide_pdf}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Konnte Valide-PDF nicht erstellen: {e}")
|
||||
|
||||
logger.info(f"PDF-Generierung erfolgreich: {self.output_pdf}")
|
||||
return True, "Erfolgreich"
|
||||
else:
|
||||
error_msg = f"FOP-Fehler (Exit {result.returncode}):\nStdOut: {result.stdout}\nStdErr: {result.stderr}"
|
||||
logger.error(error_msg)
|
||||
return False, error_msg
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
error_msg = "FOP PDF-Generierung Timeout (>180s)"
|
||||
logger.error(error_msg)
|
||||
return False, error_msg
|
||||
except Exception as e:
|
||||
error_msg = f"Unerwarteter Fehler bei PDF-Generierung: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
return False, error_msg
|
||||
|
||||
def compare_pdf(self) -> tuple[bool, str]:
|
||||
"""
|
||||
Vergleicht Output-PDF mit Valide-PDF und erstellt ggf. Diff-PDF.
|
||||
|
||||
Returns:
|
||||
tuple[bool, str]: (PDFs sind identisch, Fehlermeldung/Info)
|
||||
"""
|
||||
# Prüfe ob beide PDFs existieren
|
||||
if not self.valide_pdf.exists():
|
||||
info_msg = "Kein Valide-PDF vorhanden (wird beim nächsten Build erstellt)"
|
||||
logger.info(info_msg)
|
||||
return True, info_msg
|
||||
|
||||
if not self.output_pdf.exists():
|
||||
error_msg = f"Output-PDF nicht gefunden: {self.output_pdf}"
|
||||
logger.error(error_msg)
|
||||
return False, error_msg
|
||||
|
||||
logger.info(f"Vergleiche PDFs: {self.xml_file.name}")
|
||||
|
||||
# Erster Vergleich (ohne Diff-Generierung)
|
||||
cmd_compare = [
|
||||
str(self.diff_pdf_path),
|
||||
*self.diff_pdf_params,
|
||||
str(self.valide_pdf),
|
||||
str(self.output_pdf),
|
||||
]
|
||||
|
||||
logger.debug(f"Kommandozeile Vergleich: {' '.join(cmd_compare)}")
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd_compare,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60 # 1 Minute Timeout
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
# PDFs sind identisch
|
||||
logger.info(f"PDFs sind identisch: {self.xml_file.name}")
|
||||
|
||||
# Lösche altes Diff-PDF falls vorhanden
|
||||
if self.diff_pdf.exists():
|
||||
try:
|
||||
self.diff_pdf.unlink()
|
||||
logger.debug(f"Diff-PDF gelöscht (nicht mehr nötig): {self.diff_pdf}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Konnte Diff-PDF nicht löschen: {e}")
|
||||
|
||||
return True, "PDFs sind identisch"
|
||||
else:
|
||||
# PDFs unterscheiden sich - erstelle Diff-PDF
|
||||
logger.info(f"PDFs unterscheiden sich, erstelle Diff-PDF: {self.xml_file.name}")
|
||||
|
||||
cmd_diff = [
|
||||
str(self.diff_pdf_path),
|
||||
f"--output-diff={self.diff_pdf}",
|
||||
*self.diff_pdf_params,
|
||||
"--mark-differences",
|
||||
str(self.valide_pdf),
|
||||
str(self.output_pdf),
|
||||
]
|
||||
|
||||
logger.debug(f"Kommandozeile Diff: {' '.join(cmd_diff)}")
|
||||
|
||||
result_diff = subprocess.run(
|
||||
cmd_diff,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=90 # 1.5 Minuten Timeout
|
||||
)
|
||||
|
||||
if result_diff.returncode == 0 or self.diff_pdf.exists():
|
||||
logger.info(f"Diff-PDF erstellt: {self.diff_pdf}")
|
||||
return False, f"Unterschiede gefunden - Diff-PDF: {self.diff_pdf.name}"
|
||||
else:
|
||||
error_msg = f"Diff-PDF-Erstellung fehlgeschlagen: {result_diff.stderr}"
|
||||
logger.error(error_msg)
|
||||
return False, error_msg
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
error_msg = "PDF-Vergleich Timeout"
|
||||
logger.error(error_msg)
|
||||
return False, error_msg
|
||||
except Exception as e:
|
||||
error_msg = f"Unerwarteter Fehler beim PDF-Vergleich: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
return False, error_msg
|
||||
|
||||
def run_full_pipeline(self, force: bool = False) -> dict[str, any]:
|
||||
"""
|
||||
Führt die komplette Transformations-Pipeline aus:
|
||||
1. Saxon-Transformation (XML → FO)
|
||||
2. PDF-Generierung (FO → PDF)
|
||||
3. PDF-Vergleich
|
||||
|
||||
Args:
|
||||
force: Wenn True, werden alle Schritte ausgeführt (ignoriert Up-to-Date)
|
||||
|
||||
Returns:
|
||||
dict: Ergebnis-Dictionary mit Status und Meldungen
|
||||
"""
|
||||
start_time = datetime.now()
|
||||
result = {
|
||||
"success": False,
|
||||
"xml_file": str(self.xml_file),
|
||||
"steps": {},
|
||||
"duration": None,
|
||||
"output_pdf": str(self.output_pdf) if self.output_pdf.exists() else None,
|
||||
"diff_pdf": str(self.diff_pdf) if self.diff_pdf.exists() else None,
|
||||
}
|
||||
|
||||
logger.info(f"Starte Transformations-Pipeline: {self.xml_file.name}")
|
||||
|
||||
# Schritt 1: Saxon-Transformation
|
||||
success_saxon, msg_saxon = self.transform_saxon(force=force)
|
||||
result["steps"]["saxon"] = {"success": success_saxon, "message": msg_saxon}
|
||||
|
||||
if not success_saxon:
|
||||
result["success"] = False
|
||||
result["duration"] = (datetime.now() - start_time).total_seconds()
|
||||
return result
|
||||
|
||||
# Schritt 2: PDF-Generierung
|
||||
success_build, msg_build = self.build_pdf(force=force)
|
||||
result["steps"]["build"] = {"success": success_build, "message": msg_build}
|
||||
|
||||
if not success_build:
|
||||
result["success"] = False
|
||||
result["duration"] = (datetime.now() - start_time).total_seconds()
|
||||
return result
|
||||
|
||||
# Schritt 3: PDF-Vergleich
|
||||
pdfs_identical, msg_compare = self.compare_pdf()
|
||||
result["steps"]["compare"] = {"identical": pdfs_identical, "message": msg_compare}
|
||||
result["pdfs_identical"] = pdfs_identical
|
||||
|
||||
# Pipeline erfolgreich abgeschlossen
|
||||
result["success"] = True
|
||||
result["duration"] = (datetime.now() - start_time).total_seconds()
|
||||
|
||||
logger.info(f"Pipeline abgeschlossen: {self.xml_file.name} ({result['duration']:.2f}s)")
|
||||
|
||||
return result
|
||||
Reference in New Issue
Block a user