diff --git a/app/fetch/pypi.py b/app/fetch/pypi.py index 5c9bdf5..1826251 100644 --- a/app/fetch/pypi.py +++ b/app/fetch/pypi.py @@ -5,6 +5,7 @@ import datetime import gzip import json import re +from urllib.parse import unquote from ..appconfig import PYPI_URLS, REQUEST_TIMEOUT from ..appstate import ExtId, ExtInfo, state @@ -18,6 +19,37 @@ def normalize(name: str) -> str: return re.sub(r"[-_.]+", "-", name).lower() +def extract_pypi_project_from_purl(purl: str) -> str | None: + """Extract the project name from a PyPI PURL. + If not a proper PyPI PURL, return None. + """ + + if not purl.startswith("pkg:pypi/"): + return None + path_and_rest = purl[len("pkg:pypi/"):] + path_part = path_and_rest.split("@", 1)[0].split("?", 1)[0].split("#", 1)[0] + parts = path_part.rsplit("/", 1) + if not parts or not parts[-1]: + return None + return unquote(parts[-1]) + + +def extract_pypi_project_from_references(references: dict[str, list[str | None]]) -> str | None: + if "pypi" in references: + for entry in references["pypi"]: + if entry is not None: + return entry + + for purl in references.get("purl", []): + if purl is None: + continue + project = extract_pypi_project_from_purl(purl) + if project is not None: + return project + + return None + + async def update_pypi_versions(pkgextra: PkgExtra) -> None: urls = PYPI_URLS if not await check_needs_update(urls): @@ -32,9 +64,9 @@ async def update_pypi_versions(pkgextra: PkgExtra) -> None: pypi_versions = {} for entry in pkgextra.packages.values(): - if "pypi" not in entry.references: + pypi_name = extract_pypi_project_from_references(entry.references) + if pypi_name is None: continue - pypi_name = entry.references["pypi"][0] assert isinstance(pypi_name, str) normalized_name = normalize(pypi_name) if normalized_name in projects: diff --git a/tests/test_main.py b/tests/test_main.py index ba31b02..b186c99 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -8,6 +8,7 @@ import pytest from app import app from app.appstate import SrcInfoPackage, parse_packager from app.fetch.cygwin import parse_cygwin_versions +from app.fetch.pypi import extract_pypi_project_from_purl from app.utils import split_optdepends, strip_vcs, vercmp from app.pkgextra import extra_to_pkgextra_entry from fastapi.testclient import TestClient @@ -218,3 +219,10 @@ def test_extra_to_pkgextra_entry(): assert extra_to_pkgextra_entry( {"changelog_url": "foo"} ).changelog_url == "foo" + + +def test_extract_pypi_project_from_purl(): + assert extract_pypi_project_from_purl("pkg:pypi/foo") == "foo" + assert extract_pypi_project_from_purl("pkg:pypi/django@1.11.1") == "django" + assert extract_pypi_project_from_purl("pkg:pypi/django?filename=Django-1.11.1.tar.gz") == "django" + assert extract_pypi_project_from_purl("pkg:cargo/rand@0.7.2") is None