fetch-assets: add --delete option to clear targetdir of unwanted files

This removes all files we no longer need from the target, while keeping
files where the mtime and size match and wont re-download them.

This is useful for keeping a directory in sync via a cron job for example.
This commit is contained in:
Christoph Reiter 2021-08-11 21:05:26 +02:00
parent edf78a3862
commit d028d3acbd

View File

@ -1164,7 +1164,7 @@ def upload_assets(args: Any) -> None:
def fetch_assets(args: Any) -> None: def fetch_assets(args: Any) -> None:
repo = get_repo() repo = get_repo()
target_dir = args.targetdir target_dir = os.path.abspath(args.targetdir)
fetch_all = args.fetch_all fetch_all = args.fetch_all
all_patterns: Dict[str, List[str]] = {} all_patterns: Dict[str, List[str]] = {}
@ -1185,7 +1185,7 @@ def fetch_assets(args: Any) -> None:
(pkg["name"], build_type, pkg.get_status_details(build_type))) (pkg["name"], build_type, pkg.get_status_details(build_type)))
all_assets = {} all_assets = {}
to_download: Dict[str, List[GitReleaseAsset]] = {} assets_to_download: Dict[str, List[GitReleaseAsset]] = {}
for repo_type, patterns in all_patterns.items(): for repo_type, patterns in all_patterns.items():
if repo_type not in all_assets: if repo_type not in all_assets:
release = get_release(repo, 'staging-' + repo_type) release = get_release(repo, 'staging-' + repo_type)
@ -1200,46 +1200,82 @@ def fetch_assets(args: Any) -> None:
matches = fnmatch.filter(assets_mapping.keys(), pattern) matches = fnmatch.filter(assets_mapping.keys(), pattern)
if matches: if matches:
found = assets_mapping[matches[0]] found = assets_mapping[matches[0]]
to_download.setdefault(repo_type, []).extend(found) assets_to_download.setdefault(repo_type, []).extend(found)
todo = [] to_fetch = {}
done = [] for repo_type, assets in assets_to_download.items():
for repo_type, assets in to_download.items():
for asset in assets: for asset in assets:
asset_dir = Path(target_dir) / get_repo_subdir(repo_type, asset) asset_dir = Path(target_dir) / get_repo_subdir(repo_type, asset)
asset_dir.mkdir(parents=True, exist_ok=True)
asset_path = asset_dir / get_asset_filename(asset) asset_path = asset_dir / get_asset_filename(asset)
if asset_path.exists(): to_fetch[str(asset_path)] = asset
if asset_path.stat().st_size != asset.size:
print(f"Warning: {asset_path} already exists " def file_is_uptodate(path, asset):
f"but has a different size") asset_path = Path(path)
if get_asset_mtime_ns(asset) != asset_path.stat().st_mtime_ns: if not asset_path.exists():
print(f"Warning: {asset_path} already exists " return False
f"but has a different mtime") if asset_path.stat().st_size != asset.size:
done.append(asset) return False
continue if get_asset_mtime_ns(asset) != asset_path.stat().st_mtime_ns:
todo.append((asset, asset_path)) return False
return True
# find files that are either wrong or not what we want
to_delete = []
not_uptodate = []
for root, dirs, files in os.walk(target_dir):
for name in files:
existing = os.path.join(root, name)
if existing in to_fetch:
asset = to_fetch[existing]
if not file_is_uptodate(existing, asset):
to_delete.append(existing)
not_uptodate.append(existing)
else:
to_delete.append(existing)
if args.delete and not args.pretend:
# delete unwanted files
for path in to_delete:
os.remove(path)
# delete empty directories
for root, dirs, files in os.walk(target_dir, topdown=False):
for name in dirs:
path = os.path.join(root, name)
if not os.listdir(path):
os.rmdir(path)
# Finally figure out what to download
todo = {}
done = []
for path, asset in to_fetch.items():
if not os.path.exists(path) or path in not_uptodate:
todo[path] = asset
Path(path).parent.mkdir(parents=True, exist_ok=True)
else:
done.append(path)
if args.verbose and all_blocked: if args.verbose and all_blocked:
import pprint import pprint
print("Packages that are blocked and why:") print("Packages that are blocked and why:")
pprint.pprint(all_blocked) pprint.pprint(all_blocked)
print(f"downloading: {len(todo)}, done: {len(done)}, " print(f"downloading: {len(todo)}, done: {len(done)} "
f"blocked: {len(all_blocked)} (related builds missing)") f"blocked: {len(all_blocked)} (related builds missing)")
print("Pass --verbose to see the list of blocked packages.") print("Pass --verbose to see the list of blocked packages.")
print("Pass --fetch-all to also fetch blocked packages.") print("Pass --fetch-all to also fetch blocked packages.")
print("Pass --delete to clear the target directory")
def fetch_item(item): def fetch_item(item):
asset, asset_path = item asset_path, asset = item
if not args.pretend: if not args.pretend:
download_asset(asset, asset_path) download_asset(asset, asset_path)
return item return item
with ThreadPoolExecutor(8) as executor: with ThreadPoolExecutor(8) as executor:
for i, item in enumerate(executor.map(fetch_item, todo)): for i, item in enumerate(executor.map(fetch_item, todo.items())):
print(f"[{i + 1}/{len(todo)}] {get_asset_filename(item[0])}") print(f"[{i + 1}/{len(todo)}] {get_asset_filename(item[1])}")
print("done") print("done")
@ -1404,6 +1440,8 @@ def main(argv: List[str]) -> None:
sub = subparser.add_parser( sub = subparser.add_parser(
"fetch-assets", help="Download all staging packages", allow_abbrev=False) "fetch-assets", help="Download all staging packages", allow_abbrev=False)
sub.add_argument("targetdir") sub.add_argument("targetdir")
sub.add_argument(
"--delete", action="store_true", help="Clear targetdir of unneeded files")
sub.add_argument( sub.add_argument(
"--verbose", action="store_true", help="Show why things are blocked") "--verbose", action="store_true", help="Show why things are blocked")
sub.add_argument( sub.add_argument(