Verify checksums when downloading assets

In the last few weeks (I think) GH added checksums to the API reponses
for release assets. Use them to verify the downloaded files.

Also bump the chunk size a bit while at it, it was quite small..
This commit is contained in:
Christoph Reiter 2025-08-01 08:13:06 +02:00
parent 69ce064955
commit 5bbfb7bb18

View File

@ -4,10 +4,10 @@ import shutil
import sys
import tempfile
import time
import hashlib
from contextlib import contextmanager
from datetime import datetime, timezone
from functools import lru_cache
from hashlib import sha256
from pathlib import Path
from typing import Any, Dict, Generator, List, Optional, Callable
@ -153,8 +153,10 @@ def download_asset(asset: GitReleaseAsset, target_path: str,
fd, temppath = tempfile.mkstemp()
try:
os.chmod(temppath, 0o644)
with verify_asset_digest(asset) as hash:
with os.fdopen(fd, "wb") as h:
for chunk in r.iter_content(4096):
for chunk in r.iter_content(256 * 1024):
hash.update(chunk)
h.write(chunk)
mtime_ns = get_asset_mtime_ns(asset)
os.utime(temppath, ns=(mtime_ns, mtime_ns))
@ -171,7 +173,7 @@ def download_asset(asset: GitReleaseAsset, target_path: str,
def get_gh_asset_name(basename: PathLike, text: bool = False) -> str:
# GitHub will throw out charaters like '~' or '='. It also doesn't like
# when there is no file extension and will try to add one
return sha256(str(basename).encode("utf-8")).hexdigest() + (".bin" if not text else ".txt")
return hashlib.sha256(str(basename).encode("utf-8")).hexdigest() + (".bin" if not text else ".txt")
def get_asset_filename(asset: GitReleaseAsset) -> str:
@ -183,6 +185,28 @@ def get_asset_filename(asset: GitReleaseAsset) -> str:
return asset.label
@contextmanager
def verify_asset_digest(asset: GitReleaseAsset) -> Generator[Any]:
type_, value = get_asset_digest(asset)
value = value.lower()
h = hashlib.new(type_)
try:
yield h
finally:
hexdigest = h.hexdigest().lower()
if h.hexdigest() != value:
raise Exception(f"Digest mismatch for asset {get_asset_filename(asset)}: "
f"got {hexdigest}, expected {value}")
def get_asset_digest(asset: GitReleaseAsset) -> tuple[str, str]:
# https://github.com/PyGithub/PyGithub/issues/3324
digest = asset._rawData['digest']
assert digest
type_, value = asset._rawData['digest'].split(":", 1)
return type_, value
def is_asset_from_gha(asset: GitReleaseAsset) -> bool:
"""If the asset was uploaded from CI via GHA"""