pypi: support extracting project names from PURL references

For example "pkg:pypi/django@1.11.1" -> django

This means we can get rid of the non-purl "pypi" mapping in PKGBUILDS
This commit is contained in:
Christoph Reiter 2025-02-16 20:33:00 +01:00
parent b29a9d37f4
commit 0909a67fa4
2 changed files with 42 additions and 2 deletions

View File

@ -5,6 +5,7 @@ import datetime
import gzip
import json
import re
from urllib.parse import unquote
from ..appconfig import PYPI_URLS, REQUEST_TIMEOUT
from ..appstate import ExtId, ExtInfo, state
@ -18,6 +19,37 @@ def normalize(name: str) -> str:
return re.sub(r"[-_.]+", "-", name).lower()
def extract_pypi_project_from_purl(purl: str) -> str | None:
"""Extract the project name from a PyPI PURL.
If not a proper PyPI PURL, return None.
"""
if not purl.startswith("pkg:pypi/"):
return None
path_and_rest = purl[len("pkg:pypi/"):]
path_part = path_and_rest.split("@", 1)[0].split("?", 1)[0].split("#", 1)[0]
parts = path_part.rsplit("/", 1)
if not parts or not parts[-1]:
return None
return unquote(parts[-1])
def extract_pypi_project_from_references(references: dict[str, list[str | None]]) -> str | None:
if "pypi" in references:
for entry in references["pypi"]:
if entry is not None:
return entry
for purl in references.get("purl", []):
if purl is None:
continue
project = extract_pypi_project_from_purl(purl)
if project is not None:
return project
return None
async def update_pypi_versions(pkgextra: PkgExtra) -> None:
urls = PYPI_URLS
if not await check_needs_update(urls):
@ -32,9 +64,9 @@ async def update_pypi_versions(pkgextra: PkgExtra) -> None:
pypi_versions = {}
for entry in pkgextra.packages.values():
if "pypi" not in entry.references:
pypi_name = extract_pypi_project_from_references(entry.references)
if pypi_name is None:
continue
pypi_name = entry.references["pypi"][0]
assert isinstance(pypi_name, str)
normalized_name = normalize(pypi_name)
if normalized_name in projects:

View File

@ -8,6 +8,7 @@ import pytest
from app import app
from app.appstate import SrcInfoPackage, parse_packager
from app.fetch.cygwin import parse_cygwin_versions
from app.fetch.pypi import extract_pypi_project_from_purl
from app.utils import split_optdepends, strip_vcs, vercmp
from app.pkgextra import extra_to_pkgextra_entry
from fastapi.testclient import TestClient
@ -218,3 +219,10 @@ def test_extra_to_pkgextra_entry():
assert extra_to_pkgextra_entry(
{"changelog_url": "foo"}
).changelog_url == "foo"
def test_extract_pypi_project_from_purl():
assert extract_pypi_project_from_purl("pkg:pypi/foo") == "foo"
assert extract_pypi_project_from_purl("pkg:pypi/django@1.11.1") == "django"
assert extract_pypi_project_from_purl("pkg:pypi/django?filename=Django-1.11.1.tar.gz") == "django"
assert extract_pypi_project_from_purl("pkg:cargo/rand@0.7.2") is None