240 lines
8.9 KiB
Python
240 lines
8.9 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Vergleicht die Dependencies aus pyproject.toml mit den Einträgen in THIRD_PARTY_LICENSES.txt.
|
||
|
|
|
||
|
|
Gibt eine JSON-Ausgabe mit:
|
||
|
|
- missing: Dependencies die in pyproject.toml stehen aber nicht in THIRD_PARTY_LICENSES.txt
|
||
|
|
- removed: Einträge in THIRD_PARTY_LICENSES.txt die nicht mehr in pyproject.toml stehen
|
||
|
|
- version_changed: Dependencies deren Mindestversion sich geändert hat
|
||
|
|
- info: Metadaten zu fehlenden Paketen (Lizenz, Homepage, etc.)
|
||
|
|
"""
|
||
|
|
|
||
|
|
import json
|
||
|
|
import re
|
||
|
|
import sys
|
||
|
|
import tomllib
|
||
|
|
from importlib.metadata import PackageNotFoundError, metadata
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
|
||
|
|
def parse_pyproject(pyproject_path: Path) -> dict[str, str]:
|
||
|
|
"""Parst pyproject.toml und extrahiert Dependencies mit Mindestversionen."""
|
||
|
|
with open(pyproject_path, "rb") as f:
|
||
|
|
data = tomllib.load(f)
|
||
|
|
|
||
|
|
deps: dict[str, str] = {}
|
||
|
|
|
||
|
|
# dependencies-Sektion
|
||
|
|
for dep_str in data.get("project", {}).get("dependencies", []):
|
||
|
|
m = re.match(r"([a-zA-Z0-9_-]+)(?:\[.*?\])?(?:>=([0-9.]+))?", dep_str)
|
||
|
|
if m:
|
||
|
|
deps[m.group(1).lower()] = m.group(2) or ""
|
||
|
|
|
||
|
|
# dependency-groups dev
|
||
|
|
for dep_str in data.get("dependency-groups", {}).get("dev", []):
|
||
|
|
if isinstance(dep_str, str):
|
||
|
|
m = re.match(r"([a-zA-Z0-9_-]+)(?:\[.*?\])?(?:>=([0-9.]+))?", dep_str)
|
||
|
|
if m:
|
||
|
|
deps[m.group(1).lower()] = m.group(2) or ""
|
||
|
|
|
||
|
|
return deps
|
||
|
|
|
||
|
|
|
||
|
|
def parse_licenses_file(licenses_path: Path) -> tuple[dict[str, str], dict[str, str]]:
|
||
|
|
"""Parst THIRD_PARTY_LICENSES.txt und extrahiert Paketnamen nach Sektion.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
tuple[dict, dict]: (python_deps, embedded_libs) — jeweils lowercase key -> original name
|
||
|
|
"""
|
||
|
|
content = licenses_path.read_text(encoding="utf-8")
|
||
|
|
python_deps: dict[str, str] = {}
|
||
|
|
embedded_libs: dict[str, str] = {}
|
||
|
|
current_section = None
|
||
|
|
current_target = None
|
||
|
|
|
||
|
|
for line in content.splitlines():
|
||
|
|
if "Python-Abhängigkeiten" in line:
|
||
|
|
current_section = "python"
|
||
|
|
current_target = python_deps
|
||
|
|
continue
|
||
|
|
if "Eingebettete Bibliotheken" in line:
|
||
|
|
current_section = "embedded"
|
||
|
|
current_target = embedded_libs
|
||
|
|
continue
|
||
|
|
if "Externe Tools" in line or "Lizenztexte" in line:
|
||
|
|
current_section = None
|
||
|
|
current_target = None
|
||
|
|
continue
|
||
|
|
if current_target is None:
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Nummerierter Eintrag: "1. PaketName" oder "1. PaketName (via X)"
|
||
|
|
entry_match = re.match(r"\s*\d+\.\s+(.+?)(?:\s+\(.*\))?\s*$", line)
|
||
|
|
if entry_match:
|
||
|
|
name = entry_match.group(1).strip()
|
||
|
|
current_target[name.lower()] = name
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Version-Zeile: " Version: >=X.Y.Z"
|
||
|
|
version_match = re.match(r"\s+Version:\s*>=?([\d.]+)", line)
|
||
|
|
if version_match and current_target:
|
||
|
|
last_key = list(current_target.keys())[-1]
|
||
|
|
current_target[last_key] = current_target[last_key] + "|" + version_match.group(1)
|
||
|
|
|
||
|
|
return python_deps, embedded_libs
|
||
|
|
|
||
|
|
|
||
|
|
# Mapping: pyproject-Name -> zugehörige Einträge in THIRD_PARTY_LICENSES.txt
|
||
|
|
# Deckt transitive Dependencies und Aliase mit Suffixen ab.
|
||
|
|
KNOWN_ALIASES = {
|
||
|
|
"pyside6": ["pyside6"],
|
||
|
|
"pydantic-settings": ["pydantic-settings", "pydantic"], # pydantic ist transitive Dep
|
||
|
|
"pydantic-yaml": ["pydantic-yaml"],
|
||
|
|
"polars": ["polars", "connectorx (via polars)", "pyarrow (via polars)"],
|
||
|
|
"connectorx": ["connectorx (via polars)"],
|
||
|
|
"psutil": ["psutil"],
|
||
|
|
"lxml": ["lxml"], # BSD-3-Clause, XML/XSLT-Parsing
|
||
|
|
"ruff": ["ruff (development)"],
|
||
|
|
"pyinstaller": ["pyinstaller (development)"],
|
||
|
|
"pillow": ["pillow (development)"],
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def get_package_info(pkg_name: str) -> dict:
|
||
|
|
"""Holt Paket-Metadaten via importlib.metadata."""
|
||
|
|
info = {"name": pkg_name, "installed": False}
|
||
|
|
try:
|
||
|
|
m = metadata(pkg_name)
|
||
|
|
info["installed"] = True
|
||
|
|
info["version"] = m.get("Version", "")
|
||
|
|
info["summary"] = m.get("Summary", "")
|
||
|
|
|
||
|
|
# Lizenz ermitteln
|
||
|
|
license_expr = m.get("License-Expression") or ""
|
||
|
|
if not license_expr:
|
||
|
|
classifiers = [c for c in (m.get_all("Classifier") or []) if "License" in c]
|
||
|
|
if classifiers:
|
||
|
|
license_expr = classifiers[0].split(" :: ")[-1]
|
||
|
|
else:
|
||
|
|
lic_text = m.get("License") or ""
|
||
|
|
if "MIT" in lic_text:
|
||
|
|
license_expr = "MIT License"
|
||
|
|
elif "BSD" in lic_text:
|
||
|
|
license_expr = "BSD License"
|
||
|
|
elif "Apache" in lic_text:
|
||
|
|
license_expr = "Apache License 2.0"
|
||
|
|
elif "LGPL" in lic_text or "GPL" in lic_text:
|
||
|
|
license_expr = lic_text[:80]
|
||
|
|
else:
|
||
|
|
license_expr = lic_text[:80] if lic_text else "Unbekannt"
|
||
|
|
info["license"] = license_expr
|
||
|
|
|
||
|
|
# Homepage/GitHub
|
||
|
|
urls = m.get_all("Project-URL") or []
|
||
|
|
for url_entry in urls:
|
||
|
|
if "," in url_entry:
|
||
|
|
label, url = url_entry.split(",", 1)
|
||
|
|
label = label.strip().lower()
|
||
|
|
url = url.strip()
|
||
|
|
if "homepage" in label or "home-page" in label:
|
||
|
|
info["homepage"] = url
|
||
|
|
elif "repository" in label or "github" in label or "source" in label:
|
||
|
|
info["github"] = url
|
||
|
|
if "homepage" not in info:
|
||
|
|
homepage = m.get("Home-page")
|
||
|
|
if homepage:
|
||
|
|
info["homepage"] = homepage
|
||
|
|
|
||
|
|
# Author/Copyright
|
||
|
|
author = m.get("Author") or m.get("Author-email") or ""
|
||
|
|
info["author"] = author
|
||
|
|
|
||
|
|
except PackageNotFoundError:
|
||
|
|
pass
|
||
|
|
|
||
|
|
return info
|
||
|
|
|
||
|
|
|
||
|
|
def normalize_name(name: str) -> str:
|
||
|
|
"""Normalisiert Paketnamen für Vergleich."""
|
||
|
|
return re.sub(r"[-_.]+", "-", name).lower().strip()
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
project_root = Path(__file__).resolve().parents[4] # .claude/skills/license-check/scripts -> root
|
||
|
|
pyproject_path = project_root / "pyproject.toml"
|
||
|
|
licenses_path = project_root / "THIRD_PARTY_LICENSES.txt"
|
||
|
|
|
||
|
|
if not pyproject_path.exists():
|
||
|
|
print(json.dumps({"error": f"pyproject.toml nicht gefunden: {pyproject_path}"}))
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
if not licenses_path.exists():
|
||
|
|
print(json.dumps({"error": f"THIRD_PARTY_LICENSES.txt nicht gefunden: {licenses_path}"}))
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
pyproject_deps = parse_pyproject(pyproject_path)
|
||
|
|
python_entries, embedded_entries = parse_licenses_file(licenses_path)
|
||
|
|
|
||
|
|
# Normalisiere Python-License-Entry-Keys
|
||
|
|
normalized_license_names = {}
|
||
|
|
for key in python_entries:
|
||
|
|
clean = re.sub(r"\s*\(.*?\)", "", key).strip()
|
||
|
|
normalized_license_names[normalize_name(clean)] = key
|
||
|
|
|
||
|
|
result = {
|
||
|
|
"pyproject_deps": {k: v for k, v in sorted(pyproject_deps.items())},
|
||
|
|
"python_license_entries": list(python_entries.keys()),
|
||
|
|
"embedded_license_entries": list(embedded_entries.keys()),
|
||
|
|
"missing": [],
|
||
|
|
"removed": [],
|
||
|
|
"info": {},
|
||
|
|
}
|
||
|
|
|
||
|
|
# Finde fehlende Dependencies
|
||
|
|
covered_in_licenses = set()
|
||
|
|
for dep_name in pyproject_deps:
|
||
|
|
norm = normalize_name(dep_name)
|
||
|
|
if norm in normalized_license_names:
|
||
|
|
covered_in_licenses.add(norm)
|
||
|
|
elif dep_name in KNOWN_ALIASES:
|
||
|
|
found = False
|
||
|
|
for alias in KNOWN_ALIASES[dep_name]:
|
||
|
|
alias_norm = normalize_name(re.sub(r"\s*\(.*?\)", "", alias))
|
||
|
|
if alias_norm in normalized_license_names:
|
||
|
|
found = True
|
||
|
|
covered_in_licenses.add(alias_norm)
|
||
|
|
if not found:
|
||
|
|
result["missing"].append(dep_name)
|
||
|
|
result["info"][dep_name] = get_package_info(dep_name)
|
||
|
|
else:
|
||
|
|
result["missing"].append(dep_name)
|
||
|
|
result["info"][dep_name] = get_package_info(dep_name)
|
||
|
|
|
||
|
|
# Finde entfernte Einträge (nur Python-Abhängigkeiten, NICHT eingebettete)
|
||
|
|
for norm_name, orig_key in normalized_license_names.items():
|
||
|
|
if norm_name not in covered_in_licenses:
|
||
|
|
# Prüfe ob es ein "via"-Eintrag ist
|
||
|
|
if "(via" in orig_key:
|
||
|
|
parent = re.search(r"\(via\s+(\w+)\)", orig_key)
|
||
|
|
if parent and normalize_name(parent.group(1)) in {normalize_name(d) for d in pyproject_deps}:
|
||
|
|
continue
|
||
|
|
# Prüfe ob es über KNOWN_ALIASES abgedeckt ist
|
||
|
|
is_alias = False
|
||
|
|
for dep, aliases in KNOWN_ALIASES.items():
|
||
|
|
if dep in pyproject_deps:
|
||
|
|
for alias in aliases:
|
||
|
|
if normalize_name(re.sub(r"\s*\(.*?\)", "", alias)) == norm_name:
|
||
|
|
is_alias = True
|
||
|
|
break
|
||
|
|
if is_alias:
|
||
|
|
break
|
||
|
|
if not is_alias:
|
||
|
|
result["removed"].append(orig_key)
|
||
|
|
|
||
|
|
print(json.dumps(result, indent=2, ensure_ascii=False))
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|