Performance: 4x schnellere XSLT-Transformationen durch Worker-Pool
Problem: 82 XML-Dateien brauchten 160 Sekunden (JVM-Startup-Overhead) Lösung: Persistente JVM-Worker-Prozesse mit JAXP Transformer API - Saxon Worker Pool mit N persistenten JVM-Prozessen - Eliminiert JVM-Startup und Classpath-Scanning bei jedem Job - Parallele Verarbeitung mit ThreadPoolExecutor - JAXP Transformer API (javax.xml.transform) - stabil, kein System.exit() - Konfigurierbare Worker-Anzahl über Performance-Menü Ergebnis: 82 Dateien in 40 Sekunden (4x Speedup, ~0.49s pro Datei) Zusätzliche Verbesserungen: - Dual-Logging (Datei + Konsole) mit Timestamps - Worker-stderr-Logs in Projektverzeichnis/temp/ - Umfangreiche Debug-Ausgaben für Fehlerdiagnose - Robuste Fehlerbehandlung mit ErrorListener Technische Details: - SaxonWorkerPool: Verwaltet N Worker-Prozesse - JAXP statt Transform.main() (kein System.exit!) - Worker-Locks für thread-sichere Job-Verteilung - Graceful Shutdown mit EXIT-Befehl - Fallback auf subprocess bei Pool-Fehlern Dateien: - src/saxon_pool.py (NEU): Worker-Pool-Implementation - src/transform.py: Integration mit Worker-Pool - src/ui/MainWindow.py: Pool-Initialisierung, Performance-Menü - src/conf.py: max_workers Einstellung - src/main.py: Dual-Logging 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
+165
-26
@@ -29,7 +29,8 @@ from ui.TreeNodeEditDialog import TreeNodeEditDialog
|
||||
from ui.XslFileEditDialog import XslFileEditDialog
|
||||
from ui.XmlToXslAssignDialog import XmlToXslAssignDialog
|
||||
from conf import app_settings, Project, ProjectData, TreeNode, XslFile, XmlFile
|
||||
from transform import TransformationJob
|
||||
from transform import TransformationJob, set_saxon_worker_pool
|
||||
from saxon_pool import SaxonWorkerPool
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
@@ -390,48 +391,79 @@ class TransformationThread(QThread):
|
||||
job_error = Signal(str, str, str) # xml_file_name, xsl_id_str, error_message
|
||||
all_jobs_finished = Signal(int, int, float) # successful_count, total_count, total_duration
|
||||
|
||||
def __init__(self, jobs: list[TransformationJob], force: bool = False):
|
||||
def __init__(self, jobs: list[TransformationJob], force: bool = False, max_workers: int = 8):
|
||||
"""
|
||||
Initialisiert den Transformations-Thread.
|
||||
|
||||
Args:
|
||||
jobs: Liste der TransformationJob-Objekte
|
||||
force: Wenn True, werden alle Jobs ausgeführt (ignoriert Up-to-Date)
|
||||
max_workers: Maximale Anzahl paralleler Worker (Standard: 8)
|
||||
"""
|
||||
super().__init__()
|
||||
self.jobs = jobs
|
||||
self.force = force
|
||||
self.max_workers = max_workers
|
||||
self.successful_count = 0
|
||||
|
||||
def _process_single_job(self, job: TransformationJob) -> dict:
|
||||
"""
|
||||
Verarbeitet einen einzelnen Transformations-Job (Thread-safe).
|
||||
|
||||
Args:
|
||||
job: Der zu verarbeitende TransformationJob
|
||||
|
||||
Returns:
|
||||
dict: Ergebnis-Dictionary des Jobs
|
||||
"""
|
||||
try:
|
||||
# Sende Start-Signal mit XSL-ID
|
||||
xsl_id_str = "_".join(str(x) for x in job.xsl_id) if job.xsl_id else ""
|
||||
self.job_started.emit(str(job.xml_file), xsl_id_str)
|
||||
|
||||
# Führe Transformations-Pipeline aus
|
||||
result = job.run_full_pipeline(force=self.force)
|
||||
|
||||
# Sende Abschluss-Signal
|
||||
self.job_finished.emit(result)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Unerwarteter Fehler bei Transformation: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
xsl_id_str = "_".join(str(x) for x in job.xsl_id) if job.xsl_id else ""
|
||||
self.job_error.emit(str(job.xml_file), xsl_id_str, error_msg)
|
||||
return {"success": False, "error": error_msg}
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
Führt alle Transformations-Jobs sequenziell aus.
|
||||
Führt alle Transformations-Jobs parallel aus mit ThreadPoolExecutor.
|
||||
"""
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from datetime import datetime
|
||||
import threading
|
||||
|
||||
start_time = datetime.now()
|
||||
logger.info(f"Starte Transformation von {len(self.jobs)} Jobs")
|
||||
logger.info(f"Starte parallele Transformation von {len(self.jobs)} Jobs mit {self.max_workers} Workern")
|
||||
|
||||
for job in self.jobs:
|
||||
try:
|
||||
# Sende Start-Signal mit XSL-ID
|
||||
xsl_id_str = "_".join(str(x) for x in job.xsl_id) if job.xsl_id else ""
|
||||
self.job_started.emit(str(job.xml_file), xsl_id_str)
|
||||
# Thread-sicherer Counter
|
||||
successful_lock = threading.Lock()
|
||||
|
||||
# Führe Transformations-Pipeline aus
|
||||
result = job.run_full_pipeline(force=self.force)
|
||||
# Verwende ThreadPoolExecutor für parallele Verarbeitung
|
||||
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
||||
# Starte alle Jobs
|
||||
future_to_job = {executor.submit(self._process_single_job, job): job for job in self.jobs}
|
||||
|
||||
# Sende Abschluss-Signal
|
||||
self.job_finished.emit(result)
|
||||
|
||||
if result["success"]:
|
||||
self.successful_count += 1
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Unerwarteter Fehler bei Transformation: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
xsl_id_str = "_".join(str(x) for x in job.xsl_id) if job.xsl_id else ""
|
||||
self.job_error.emit(str(job.xml_file), xsl_id_str, error_msg)
|
||||
# Warte auf Abschluss und sammle Ergebnisse
|
||||
for future in as_completed(future_to_job):
|
||||
try:
|
||||
result = future.result()
|
||||
if result.get("success", False):
|
||||
with successful_lock:
|
||||
self.successful_count += 1
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Verarbeiten des Future: {e}")
|
||||
|
||||
# Berechne Gesamtdauer
|
||||
total_duration = (datetime.now() - start_time).total_seconds()
|
||||
@@ -439,7 +471,8 @@ class TransformationThread(QThread):
|
||||
# Sende Abschluss-Signal für alle Jobs mit Gesamtdauer
|
||||
self.all_jobs_finished.emit(self.successful_count, len(self.jobs), total_duration)
|
||||
logger.info(
|
||||
f"Transformation abgeschlossen: {self.successful_count}/{len(self.jobs)} erfolgreich ({total_duration:.2f}s)"
|
||||
f"Transformation abgeschlossen: {self.successful_count}/{len(self.jobs)} erfolgreich ({total_duration:.2f}s) "
|
||||
f"[{len(self.jobs) / total_duration:.2f} Jobs/s mit {self.max_workers} Workern]"
|
||||
)
|
||||
|
||||
|
||||
@@ -517,6 +550,9 @@ class MainWindow(QMainWindow):
|
||||
# Vorhandene Projekte-Menü initialisieren
|
||||
self._setup_projects_menu()
|
||||
|
||||
# Performance-Einstellungen-Menü initialisieren
|
||||
self._setup_performance_menu()
|
||||
|
||||
#
|
||||
if theme := app_settings.theme:
|
||||
self.change_theme(theme)
|
||||
@@ -612,6 +648,47 @@ class MainWindow(QMainWindow):
|
||||
|
||||
logger.info(f"Projekte-Menü initialisiert mit {len(app_settings.pdf_projects)} Projekten")
|
||||
|
||||
def _setup_performance_menu(self):
|
||||
"""Fügt ein Menü-Item für Performance-Einstellungen hinzu."""
|
||||
# Füge Separator vor der Performance-Einstellung hinzu
|
||||
self.ui.menuProjekt.addSeparator()
|
||||
|
||||
# Erstelle Aktion für Performance-Einstellungen
|
||||
performance_action = QAction("Performance-Einstellungen...", self)
|
||||
performance_action.setToolTip(f"Parallele Worker: {app_settings.max_workers}")
|
||||
performance_action.triggered.connect(self._open_performance_settings)
|
||||
|
||||
# Füge die Aktion zum Projekt-Menü hinzu
|
||||
self.ui.menuProjekt.addAction(performance_action)
|
||||
|
||||
logger.debug(f"Performance-Menü initialisiert (max_workers={app_settings.max_workers})")
|
||||
|
||||
def _open_performance_settings(self):
|
||||
"""Öffnet einen Dialog für Performance-Einstellungen."""
|
||||
from PySide6.QtWidgets import QInputDialog
|
||||
|
||||
current_workers = app_settings.max_workers
|
||||
new_workers, ok = QInputDialog.getInt(
|
||||
self,
|
||||
"Performance-Einstellungen",
|
||||
"Anzahl paralleler Worker für Transformationen:",
|
||||
current_workers, # value
|
||||
1, # minValue
|
||||
32, # maxValue
|
||||
1, # step
|
||||
)
|
||||
|
||||
if ok and new_workers != current_workers:
|
||||
app_settings.max_workers = new_workers
|
||||
app_settings.save()
|
||||
logger.info(f"max_workers geändert: {current_workers} → {new_workers}")
|
||||
QMessageBox.information(
|
||||
self,
|
||||
"Einstellungen gespeichert",
|
||||
f"Anzahl paralleler Worker wurde auf {new_workers} gesetzt.\n\n"
|
||||
f"Die Änderung wird bei der nächsten Transformation wirksam.",
|
||||
)
|
||||
|
||||
def open_existing_project(self, project: Project):
|
||||
"""
|
||||
Öffnet ein vorhandenes Projekt.
|
||||
@@ -647,6 +724,9 @@ class MainWindow(QMainWindow):
|
||||
# Starte Hash-Berechnung für alle XML-Dateien
|
||||
self._start_xml_hash_calculation()
|
||||
|
||||
# Initialisiere Saxon-Worker-Pool für schnellere Transformationen
|
||||
self._initialize_saxon_worker_pool()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Laden des Projekts '{project.name}': {e}")
|
||||
# Fallback: Erstelle Standard-Einstellungen
|
||||
@@ -658,6 +738,62 @@ class MainWindow(QMainWindow):
|
||||
except Exception as fallback_error:
|
||||
logger.error(f"Fehler beim Erstellen der Fallback-Einstellungen: {fallback_error}")
|
||||
|
||||
def _initialize_saxon_worker_pool(self):
|
||||
"""Initialisiert den Saxon-Worker-Pool für schnelle Transformationen."""
|
||||
try:
|
||||
# Shutdown vorherigen Pool falls vorhanden
|
||||
self._shutdown_saxon_worker_pool()
|
||||
|
||||
if not self.project:
|
||||
logger.warning("Kein Projekt geladen, Saxon-Worker-Pool nicht initialisiert")
|
||||
return
|
||||
|
||||
# Hole Tool-Konfigurationen
|
||||
java_vm = next((vm for vm in app_settings.java_vms if vm.id == self.project.java_vm_id), None)
|
||||
saxon_jar = next((jar for jar in app_settings.saxon_jars if jar.id == self.project.saxon_jar_id), None)
|
||||
|
||||
if not java_vm or not saxon_jar:
|
||||
logger.warning("Java VM oder Saxon JAR nicht gefunden, Pool nicht initialisiert")
|
||||
return
|
||||
|
||||
# Erstelle Worker-Pool
|
||||
num_workers = app_settings.max_workers
|
||||
log_dir = self.project.project_dir / "temp"
|
||||
pool = SaxonWorkerPool(
|
||||
num_workers=num_workers,
|
||||
java_vm_path=java_vm.path_to_binary_file,
|
||||
saxon_jar_path=saxon_jar.path_to_jar_file,
|
||||
classpath_cache=TransformationJob._classpath_cache,
|
||||
log_dir=log_dir,
|
||||
)
|
||||
|
||||
# Setze globalen Pool
|
||||
set_saxon_worker_pool(pool)
|
||||
|
||||
logger.info(
|
||||
f"Saxon-Worker-Pool initialisiert: {num_workers} Worker "
|
||||
f"(erwartet: {num_workers}x schneller für Saxon-Transformationen)"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Initialisieren des Saxon-Worker-Pools: {e}")
|
||||
# Kein Pool ist OK - Fallback auf subprocess
|
||||
|
||||
def _shutdown_saxon_worker_pool(self):
|
||||
"""Beendet den Saxon-Worker-Pool sauber."""
|
||||
try:
|
||||
# Importiere transform um Zugriff auf globalen Pool zu haben
|
||||
import transform
|
||||
|
||||
if transform._saxon_worker_pool:
|
||||
logger.info("Beende Saxon-Worker-Pool...")
|
||||
transform._saxon_worker_pool.shutdown()
|
||||
set_saxon_worker_pool(None)
|
||||
logger.info("Saxon-Worker-Pool beendet")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Beenden des Saxon-Worker-Pools: {e}")
|
||||
|
||||
def change_theme(self, theme_name):
|
||||
"""
|
||||
Wechselt das Theme der Anwendung.
|
||||
@@ -2900,7 +3036,7 @@ class MainWindow(QMainWindow):
|
||||
"""
|
||||
# Erstelle Zusammenfassungstext
|
||||
summary_lines = []
|
||||
summary_lines.append(f"Verarbeitung abgeschlossen:\n")
|
||||
summary_lines.append("Verarbeitung abgeschlossen:\n")
|
||||
summary_lines.append(f"📊 Gesamt: {stats['total']} Datei(en)")
|
||||
summary_lines.append(f"✓ Verarbeitet: {stats['processed']} Datei(en)")
|
||||
|
||||
@@ -2917,7 +3053,7 @@ class MainWindow(QMainWindow):
|
||||
summary_lines.append(f"🚫 Abgebrochen: {stats['cancelled']} Datei(en)")
|
||||
|
||||
if stats["renamed_files"]:
|
||||
summary_lines.append(f"\n📝 Umbenannte Dateien:")
|
||||
summary_lines.append("\n📝 Umbenannte Dateien:")
|
||||
for renamed in stats["renamed_files"]:
|
||||
summary_lines.append(f" • {renamed}")
|
||||
|
||||
@@ -3938,7 +4074,7 @@ class MainWindow(QMainWindow):
|
||||
return
|
||||
|
||||
# Erstelle und konfiguriere Thread
|
||||
self.transformation_thread = TransformationThread(jobs, force=force)
|
||||
self.transformation_thread = TransformationThread(jobs, force=force, max_workers=app_settings.max_workers)
|
||||
|
||||
# Verbinde Signale
|
||||
self.transformation_thread.job_started.connect(self._on_transformation_job_started)
|
||||
@@ -4537,6 +4673,9 @@ class MainWindow(QMainWindow):
|
||||
self.transformation_thread.quit()
|
||||
self.transformation_thread.wait()
|
||||
|
||||
# Beende Saxon-Worker-Pool
|
||||
self._shutdown_saxon_worker_pool()
|
||||
|
||||
# PDF-Dokumente schließen ist bei QtPdf automatisch durch Garbage Collection
|
||||
super().closeEvent(event)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user