Verify checksums when downloading assets

In the last few weeks (I think) GH added checksums to the API reponses
for release assets. Use them to verify the downloaded files.

Also bump the chunk size a bit while at it, it was quite small..
This commit is contained in:
Christoph Reiter 2025-08-01 08:13:06 +02:00
parent 69ce064955
commit 5bbfb7bb18

View File

@ -4,10 +4,10 @@ import shutil
import sys import sys
import tempfile import tempfile
import time import time
import hashlib
from contextlib import contextmanager from contextlib import contextmanager
from datetime import datetime, timezone from datetime import datetime, timezone
from functools import lru_cache from functools import lru_cache
from hashlib import sha256
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Generator, List, Optional, Callable from typing import Any, Dict, Generator, List, Optional, Callable
@ -153,9 +153,11 @@ def download_asset(asset: GitReleaseAsset, target_path: str,
fd, temppath = tempfile.mkstemp() fd, temppath = tempfile.mkstemp()
try: try:
os.chmod(temppath, 0o644) os.chmod(temppath, 0o644)
with os.fdopen(fd, "wb") as h: with verify_asset_digest(asset) as hash:
for chunk in r.iter_content(4096): with os.fdopen(fd, "wb") as h:
h.write(chunk) for chunk in r.iter_content(256 * 1024):
hash.update(chunk)
h.write(chunk)
mtime_ns = get_asset_mtime_ns(asset) mtime_ns = get_asset_mtime_ns(asset)
os.utime(temppath, ns=(mtime_ns, mtime_ns)) os.utime(temppath, ns=(mtime_ns, mtime_ns))
if onverify is not None: if onverify is not None:
@ -171,7 +173,7 @@ def download_asset(asset: GitReleaseAsset, target_path: str,
def get_gh_asset_name(basename: PathLike, text: bool = False) -> str: def get_gh_asset_name(basename: PathLike, text: bool = False) -> str:
# GitHub will throw out charaters like '~' or '='. It also doesn't like # GitHub will throw out charaters like '~' or '='. It also doesn't like
# when there is no file extension and will try to add one # when there is no file extension and will try to add one
return sha256(str(basename).encode("utf-8")).hexdigest() + (".bin" if not text else ".txt") return hashlib.sha256(str(basename).encode("utf-8")).hexdigest() + (".bin" if not text else ".txt")
def get_asset_filename(asset: GitReleaseAsset) -> str: def get_asset_filename(asset: GitReleaseAsset) -> str:
@ -183,6 +185,28 @@ def get_asset_filename(asset: GitReleaseAsset) -> str:
return asset.label return asset.label
@contextmanager
def verify_asset_digest(asset: GitReleaseAsset) -> Generator[Any]:
type_, value = get_asset_digest(asset)
value = value.lower()
h = hashlib.new(type_)
try:
yield h
finally:
hexdigest = h.hexdigest().lower()
if h.hexdigest() != value:
raise Exception(f"Digest mismatch for asset {get_asset_filename(asset)}: "
f"got {hexdigest}, expected {value}")
def get_asset_digest(asset: GitReleaseAsset) -> tuple[str, str]:
# https://github.com/PyGithub/PyGithub/issues/3324
digest = asset._rawData['digest']
assert digest
type_, value = asset._rawData['digest'].split(":", 1)
return type_, value
def is_asset_from_gha(asset: GitReleaseAsset) -> bool: def is_asset_from_gha(asset: GitReleaseAsset) -> bool:
"""If the asset was uploaded from CI via GHA""" """If the asset was uploaded from CI via GHA"""