Files

240 lines
8.9 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
"""
Vergleicht die Dependencies aus pyproject.toml mit den Einträgen in THIRD_PARTY_LICENSES.txt.
Gibt eine JSON-Ausgabe mit:
- missing: Dependencies die in pyproject.toml stehen aber nicht in THIRD_PARTY_LICENSES.txt
- removed: Einträge in THIRD_PARTY_LICENSES.txt die nicht mehr in pyproject.toml stehen
- version_changed: Dependencies deren Mindestversion sich geändert hat
- info: Metadaten zu fehlenden Paketen (Lizenz, Homepage, etc.)
"""
import json
import re
import sys
import tomllib
from importlib.metadata import PackageNotFoundError, metadata
from pathlib import Path
def parse_pyproject(pyproject_path: Path) -> dict[str, str]:
"""Parst pyproject.toml und extrahiert Dependencies mit Mindestversionen."""
with open(pyproject_path, "rb") as f:
data = tomllib.load(f)
deps: dict[str, str] = {}
# dependencies-Sektion
for dep_str in data.get("project", {}).get("dependencies", []):
m = re.match(r"([a-zA-Z0-9_-]+)(?:\[.*?\])?(?:>=([0-9.]+))?", dep_str)
if m:
deps[m.group(1).lower()] = m.group(2) or ""
# dependency-groups dev
for dep_str in data.get("dependency-groups", {}).get("dev", []):
if isinstance(dep_str, str):
m = re.match(r"([a-zA-Z0-9_-]+)(?:\[.*?\])?(?:>=([0-9.]+))?", dep_str)
if m:
deps[m.group(1).lower()] = m.group(2) or ""
return deps
def parse_licenses_file(licenses_path: Path) -> tuple[dict[str, str], dict[str, str]]:
"""Parst THIRD_PARTY_LICENSES.txt und extrahiert Paketnamen nach Sektion.
Returns:
tuple[dict, dict]: (python_deps, embedded_libs) — jeweils lowercase key -> original name
"""
content = licenses_path.read_text(encoding="utf-8")
python_deps: dict[str, str] = {}
embedded_libs: dict[str, str] = {}
current_section = None
current_target = None
for line in content.splitlines():
if "Python-Abhängigkeiten" in line:
current_section = "python"
current_target = python_deps
continue
if "Eingebettete Bibliotheken" in line:
current_section = "embedded"
current_target = embedded_libs
continue
if "Externe Tools" in line or "Lizenztexte" in line:
current_section = None
current_target = None
continue
if current_target is None:
continue
# Nummerierter Eintrag: "1. PaketName" oder "1. PaketName (via X)"
entry_match = re.match(r"\s*\d+\.\s+(.+?)(?:\s+\(.*\))?\s*$", line)
if entry_match:
name = entry_match.group(1).strip()
current_target[name.lower()] = name
continue
# Version-Zeile: " Version: >=X.Y.Z"
version_match = re.match(r"\s+Version:\s*>=?([\d.]+)", line)
if version_match and current_target:
last_key = list(current_target.keys())[-1]
current_target[last_key] = current_target[last_key] + "|" + version_match.group(1)
return python_deps, embedded_libs
# Mapping: pyproject-Name -> zugehörige Einträge in THIRD_PARTY_LICENSES.txt
# Deckt transitive Dependencies und Aliase mit Suffixen ab.
KNOWN_ALIASES = {
"pyside6": ["pyside6"],
"pydantic-settings": ["pydantic-settings", "pydantic"], # pydantic ist transitive Dep
"pydantic-yaml": ["pydantic-yaml"],
"polars": ["polars", "connectorx (via polars)", "pyarrow (via polars)"],
"connectorx": ["connectorx (via polars)"],
"psutil": ["psutil"],
"lxml": ["lxml"], # BSD-3-Clause, XML/XSLT-Parsing
"ruff": ["ruff (development)"],
"pyinstaller": ["pyinstaller (development)"],
"pillow": ["pillow (development)"],
}
def get_package_info(pkg_name: str) -> dict:
"""Holt Paket-Metadaten via importlib.metadata."""
info = {"name": pkg_name, "installed": False}
try:
m = metadata(pkg_name)
info["installed"] = True
info["version"] = m.get("Version", "")
info["summary"] = m.get("Summary", "")
# Lizenz ermitteln
license_expr = m.get("License-Expression") or ""
if not license_expr:
classifiers = [c for c in (m.get_all("Classifier") or []) if "License" in c]
if classifiers:
license_expr = classifiers[0].split(" :: ")[-1]
else:
lic_text = m.get("License") or ""
if "MIT" in lic_text:
license_expr = "MIT License"
elif "BSD" in lic_text:
license_expr = "BSD License"
elif "Apache" in lic_text:
license_expr = "Apache License 2.0"
elif "LGPL" in lic_text or "GPL" in lic_text:
license_expr = lic_text[:80]
else:
license_expr = lic_text[:80] if lic_text else "Unbekannt"
info["license"] = license_expr
# Homepage/GitHub
urls = m.get_all("Project-URL") or []
for url_entry in urls:
if "," in url_entry:
label, url = url_entry.split(",", 1)
label = label.strip().lower()
url = url.strip()
if "homepage" in label or "home-page" in label:
info["homepage"] = url
elif "repository" in label or "github" in label or "source" in label:
info["github"] = url
if "homepage" not in info:
homepage = m.get("Home-page")
if homepage:
info["homepage"] = homepage
# Author/Copyright
author = m.get("Author") or m.get("Author-email") or ""
info["author"] = author
except PackageNotFoundError:
pass
return info
def normalize_name(name: str) -> str:
"""Normalisiert Paketnamen für Vergleich."""
return re.sub(r"[-_.]+", "-", name).lower().strip()
def main():
project_root = Path(__file__).resolve().parents[4] # .claude/skills/license-check/scripts -> root
pyproject_path = project_root / "pyproject.toml"
licenses_path = project_root / "THIRD_PARTY_LICENSES.txt"
if not pyproject_path.exists():
print(json.dumps({"error": f"pyproject.toml nicht gefunden: {pyproject_path}"}))
sys.exit(1)
if not licenses_path.exists():
print(json.dumps({"error": f"THIRD_PARTY_LICENSES.txt nicht gefunden: {licenses_path}"}))
sys.exit(1)
pyproject_deps = parse_pyproject(pyproject_path)
python_entries, embedded_entries = parse_licenses_file(licenses_path)
# Normalisiere Python-License-Entry-Keys
normalized_license_names = {}
for key in python_entries:
clean = re.sub(r"\s*\(.*?\)", "", key).strip()
normalized_license_names[normalize_name(clean)] = key
result = {
"pyproject_deps": {k: v for k, v in sorted(pyproject_deps.items())},
"python_license_entries": list(python_entries.keys()),
"embedded_license_entries": list(embedded_entries.keys()),
"missing": [],
"removed": [],
"info": {},
}
# Finde fehlende Dependencies
covered_in_licenses = set()
for dep_name in pyproject_deps:
norm = normalize_name(dep_name)
if norm in normalized_license_names:
covered_in_licenses.add(norm)
elif dep_name in KNOWN_ALIASES:
found = False
for alias in KNOWN_ALIASES[dep_name]:
alias_norm = normalize_name(re.sub(r"\s*\(.*?\)", "", alias))
if alias_norm in normalized_license_names:
found = True
covered_in_licenses.add(alias_norm)
if not found:
result["missing"].append(dep_name)
result["info"][dep_name] = get_package_info(dep_name)
else:
result["missing"].append(dep_name)
result["info"][dep_name] = get_package_info(dep_name)
# Finde entfernte Einträge (nur Python-Abhängigkeiten, NICHT eingebettete)
for norm_name, orig_key in normalized_license_names.items():
if norm_name not in covered_in_licenses:
# Prüfe ob es ein "via"-Eintrag ist
if "(via" in orig_key:
parent = re.search(r"\(via\s+(\w+)\)", orig_key)
if parent and normalize_name(parent.group(1)) in {normalize_name(d) for d in pyproject_deps}:
continue
# Prüfe ob es über KNOWN_ALIASES abgedeckt ist
is_alias = False
for dep, aliases in KNOWN_ALIASES.items():
if dep in pyproject_deps:
for alias in aliases:
if normalize_name(re.sub(r"\s*\(.*?\)", "", alias)) == norm_name:
is_alias = True
break
if is_alias:
break
if not is_alias:
result["removed"].append(orig_key)
print(json.dumps(result, indent=2, ensure_ascii=False))
if __name__ == "__main__":
main()