#!/usr/bin/env python3 """ Vergleicht die Dependencies aus pyproject.toml mit den Einträgen in THIRD_PARTY_LICENSES.txt. Gibt eine JSON-Ausgabe mit: - missing: Dependencies die in pyproject.toml stehen aber nicht in THIRD_PARTY_LICENSES.txt - removed: Einträge in THIRD_PARTY_LICENSES.txt die nicht mehr in pyproject.toml stehen - version_changed: Dependencies deren Mindestversion sich geändert hat - info: Metadaten zu fehlenden Paketen (Lizenz, Homepage, etc.) """ import json import re import sys import tomllib from importlib.metadata import PackageNotFoundError, metadata from pathlib import Path def parse_pyproject(pyproject_path: Path) -> dict[str, str]: """Parst pyproject.toml und extrahiert Dependencies mit Mindestversionen.""" with open(pyproject_path, "rb") as f: data = tomllib.load(f) deps: dict[str, str] = {} # dependencies-Sektion for dep_str in data.get("project", {}).get("dependencies", []): m = re.match(r"([a-zA-Z0-9_-]+)(?:\[.*?\])?(?:>=([0-9.]+))?", dep_str) if m: deps[m.group(1).lower()] = m.group(2) or "" # dependency-groups dev for dep_str in data.get("dependency-groups", {}).get("dev", []): if isinstance(dep_str, str): m = re.match(r"([a-zA-Z0-9_-]+)(?:\[.*?\])?(?:>=([0-9.]+))?", dep_str) if m: deps[m.group(1).lower()] = m.group(2) or "" return deps def parse_licenses_file(licenses_path: Path) -> tuple[dict[str, str], dict[str, str]]: """Parst THIRD_PARTY_LICENSES.txt und extrahiert Paketnamen nach Sektion. Returns: tuple[dict, dict]: (python_deps, embedded_libs) — jeweils lowercase key -> original name """ content = licenses_path.read_text(encoding="utf-8") python_deps: dict[str, str] = {} embedded_libs: dict[str, str] = {} current_section = None current_target = None for line in content.splitlines(): if "Python-Abhängigkeiten" in line: current_section = "python" current_target = python_deps continue if "Eingebettete Bibliotheken" in line: current_section = "embedded" current_target = embedded_libs continue if "Externe Tools" in line or "Lizenztexte" in line: current_section = None current_target = None continue if current_target is None: continue # Nummerierter Eintrag: "1. PaketName" oder "1. PaketName (via X)" entry_match = re.match(r"\s*\d+\.\s+(.+?)(?:\s+\(.*\))?\s*$", line) if entry_match: name = entry_match.group(1).strip() current_target[name.lower()] = name continue # Version-Zeile: " Version: >=X.Y.Z" version_match = re.match(r"\s+Version:\s*>=?([\d.]+)", line) if version_match and current_target: last_key = list(current_target.keys())[-1] current_target[last_key] = current_target[last_key] + "|" + version_match.group(1) return python_deps, embedded_libs # Mapping: pyproject-Name -> zugehörige Einträge in THIRD_PARTY_LICENSES.txt # Deckt transitive Dependencies und Aliase mit Suffixen ab. KNOWN_ALIASES = { "pyside6": ["pyside6"], "pydantic-settings": ["pydantic-settings", "pydantic"], # pydantic ist transitive Dep "pydantic-yaml": ["pydantic-yaml"], "polars": ["polars", "connectorx (via polars)", "pyarrow (via polars)"], "connectorx": ["connectorx (via polars)"], "psutil": ["psutil"], "lxml": ["lxml"], # BSD-3-Clause, XML/XSLT-Parsing "ruff": ["ruff (development)"], "pyinstaller": ["pyinstaller (development)"], "pillow": ["pillow (development)"], } def get_package_info(pkg_name: str) -> dict: """Holt Paket-Metadaten via importlib.metadata.""" info = {"name": pkg_name, "installed": False} try: m = metadata(pkg_name) info["installed"] = True info["version"] = m.get("Version", "") info["summary"] = m.get("Summary", "") # Lizenz ermitteln license_expr = m.get("License-Expression") or "" if not license_expr: classifiers = [c for c in (m.get_all("Classifier") or []) if "License" in c] if classifiers: license_expr = classifiers[0].split(" :: ")[-1] else: lic_text = m.get("License") or "" if "MIT" in lic_text: license_expr = "MIT License" elif "BSD" in lic_text: license_expr = "BSD License" elif "Apache" in lic_text: license_expr = "Apache License 2.0" elif "LGPL" in lic_text or "GPL" in lic_text: license_expr = lic_text[:80] else: license_expr = lic_text[:80] if lic_text else "Unbekannt" info["license"] = license_expr # Homepage/GitHub urls = m.get_all("Project-URL") or [] for url_entry in urls: if "," in url_entry: label, url = url_entry.split(",", 1) label = label.strip().lower() url = url.strip() if "homepage" in label or "home-page" in label: info["homepage"] = url elif "repository" in label or "github" in label or "source" in label: info["github"] = url if "homepage" not in info: homepage = m.get("Home-page") if homepage: info["homepage"] = homepage # Author/Copyright author = m.get("Author") or m.get("Author-email") or "" info["author"] = author except PackageNotFoundError: pass return info def normalize_name(name: str) -> str: """Normalisiert Paketnamen für Vergleich.""" return re.sub(r"[-_.]+", "-", name).lower().strip() def main(): project_root = Path(__file__).resolve().parents[4] # .claude/skills/license-check/scripts -> root pyproject_path = project_root / "pyproject.toml" licenses_path = project_root / "THIRD_PARTY_LICENSES.txt" if not pyproject_path.exists(): print(json.dumps({"error": f"pyproject.toml nicht gefunden: {pyproject_path}"})) sys.exit(1) if not licenses_path.exists(): print(json.dumps({"error": f"THIRD_PARTY_LICENSES.txt nicht gefunden: {licenses_path}"})) sys.exit(1) pyproject_deps = parse_pyproject(pyproject_path) python_entries, embedded_entries = parse_licenses_file(licenses_path) # Normalisiere Python-License-Entry-Keys normalized_license_names = {} for key in python_entries: clean = re.sub(r"\s*\(.*?\)", "", key).strip() normalized_license_names[normalize_name(clean)] = key result = { "pyproject_deps": {k: v for k, v in sorted(pyproject_deps.items())}, "python_license_entries": list(python_entries.keys()), "embedded_license_entries": list(embedded_entries.keys()), "missing": [], "removed": [], "info": {}, } # Finde fehlende Dependencies covered_in_licenses = set() for dep_name in pyproject_deps: norm = normalize_name(dep_name) if norm in normalized_license_names: covered_in_licenses.add(norm) elif dep_name in KNOWN_ALIASES: found = False for alias in KNOWN_ALIASES[dep_name]: alias_norm = normalize_name(re.sub(r"\s*\(.*?\)", "", alias)) if alias_norm in normalized_license_names: found = True covered_in_licenses.add(alias_norm) if not found: result["missing"].append(dep_name) result["info"][dep_name] = get_package_info(dep_name) else: result["missing"].append(dep_name) result["info"][dep_name] = get_package_info(dep_name) # Finde entfernte Einträge (nur Python-Abhängigkeiten, NICHT eingebettete) for norm_name, orig_key in normalized_license_names.items(): if norm_name not in covered_in_licenses: # Prüfe ob es ein "via"-Eintrag ist if "(via" in orig_key: parent = re.search(r"\(via\s+(\w+)\)", orig_key) if parent and normalize_name(parent.group(1)) in {normalize_name(d) for d in pyproject_deps}: continue # Prüfe ob es über KNOWN_ALIASES abgedeckt ist is_alias = False for dep, aliases in KNOWN_ALIASES.items(): if dep in pyproject_deps: for alias in aliases: if normalize_name(re.sub(r"\s*\(.*?\)", "", alias)) == norm_name: is_alias = True break if is_alias: break if not is_alias: result["removed"].append(orig_key) print(json.dumps(result, indent=2, ensure_ascii=False)) if __name__ == "__main__": main()