fetch: send cache request headers when checking for changes

Up until now it only did a HEAD request and compared the returned
cache headers to see if something changed. By also sending the
request cache headers, the other side can potentially respond from their
caching layer and not hit any actual service.

With this we can reduce the checking interval without harming any
external service.
This commit is contained in:
Christoph Reiter 2023-08-15 22:00:55 +02:00
parent 30110d1c78
commit 789c72ae75

View File

@ -300,30 +300,46 @@ async def update_arch_versions() -> None:
state.set_ext_infos(ExtId("aur", "AUR", True), aur_versions)
async def check_needs_update(urls: List[str], _cache: Dict[str, str] = {}) -> bool:
CacheHeaders = Dict[str, Optional[str]]
async def check_needs_update(urls: List[str], _cache: Dict[str, CacheHeaders] = {}) -> bool:
"""Raises RequestException"""
if appconfig.CACHE_DIR:
return True
async def get_headers(client: httpx.AsyncClient, url: str, *args: Any, **kwargs: Any) -> Tuple[str, httpx.Headers]:
r = await client.head(url, *args, **kwargs)
async def get_cache_headers(client: httpx.AsyncClient, url: str, timeout: float) -> Tuple[str, CacheHeaders]:
"""This tries to return the cache response headers for a given URL as cheap as possible"""
old_headers = _cache.get(url, {})
last_modified = old_headers.get("last-modified")
etag = old_headers.get("etag")
fetch_headers = {}
if last_modified is not None:
fetch_headers["if-modified-since"] = last_modified
if etag is not None:
fetch_headers["if-none-match"] = etag
r = await client.head(url, timeout=timeout, headers=fetch_headers)
if r.status_code == 304:
return (url, dict(old_headers))
r.raise_for_status()
return (url, r.headers)
new_headers = {}
new_headers["last-modified"] = r.headers.get("last-modified")
new_headers["etag"] = r.headers.get("etag")
return (url, new_headers)
needs_update = False
async with httpx.AsyncClient(follow_redirects=True) as client:
awaitables = []
for url in urls:
awaitables.append(get_headers(client, url, timeout=REQUEST_TIMEOUT))
awaitables.append(get_cache_headers(client, url, timeout=REQUEST_TIMEOUT))
for url, headers in (await asyncio.gather(*awaitables)):
old = _cache.get(url)
new = headers.get("last-modified", "")
new += headers.get("etag", "")
if old != new:
for url, new_cache_headers in (await asyncio.gather(*awaitables)):
old_cache_headers = _cache.get(url, {})
if old_cache_headers != new_cache_headers:
needs_update = True
_cache[url] = new
_cache[url] = new_cache_headers
return needs_update