diff --git a/README.md b/README.md index 80fc563..08acaac 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,12 @@ type: static # static | hugo | mkdocs # - www.my-site.vino.network # tidy: true # set false to skip HTML tidy # enabled: true # set false to decommission +# excludes: # paths/patterns to skip during sync (relative to bucket root). +# - welcome/welcome.pdf +# # These are passed verbatim to `aws s3 sync --exclude`, +# # so they're both un-uploaded AND un-deleted. Use this +# # for large assets managed out-of-band via aws-cli +# # (e.g. media files updated more often than the site code). ``` `.gitea/workflows/publish.yaml`: diff --git a/scripts/deploy.py b/scripts/deploy.py index e2aad28..5b1aade 100644 --- a/scripts/deploy.py +++ b/scripts/deploy.py @@ -2,6 +2,7 @@ import json import os +import shlex import shutil import tempfile from pathlib import Path @@ -30,7 +31,7 @@ GARAGE_ADMIN_ENDPOINT = os.environ.get( CACHE_CONTROL = "public, max-age=0, must-revalidate" -def s3_sync(site_name, site_dir): +def s3_sync(site_name, site_dir, excludes=None): endpoint = os.environ.get("GARAGE_S3_ENDPOINT", DEFAULT_S3_ENDPOINT) html_dir = site_dir / "build" / "html" if not html_dir.exists(): @@ -38,6 +39,12 @@ def s3_sync(site_name, site_dir): env("AWS_ACCESS_KEY_ID") env("AWS_SECRET_ACCESS_KEY") os.environ.setdefault("AWS_DEFAULT_REGION", "sjc001") + # `excludes` are patterns (site.yaml `excludes:` list) that should never + # be uploaded *and* should never be deleted from the bucket — escape hatch + # for assets managed out-of-band (e.g. large PDFs uploaded via aws-cli). + exclude_flags = " ".join(f"--exclude {shlex.quote(p)}" for p in (excludes or [])) + if excludes: + print(f"Excluding patterns: {excludes}") print(f"Syncing {html_dir} → s3://{site_name} via {endpoint}") # `sync --delete` handles new/changed/orphaned files. `cp --recursive` # then re-uploads everything to refresh metadata (cache-control, @@ -49,13 +56,15 @@ def s3_sync(site_name, site_dir): run( f"aws --endpoint-url {endpoint} s3 sync {html_dir}/ s3://{site_name}/ " f"--delete --only-show-errors " - f"--cache-control '{CACHE_CONTROL}'" + f"--cache-control '{CACHE_CONTROL}' " + f"{exclude_flags}".rstrip() ) print("Re-stamping metadata on all objects...") run( f"aws --endpoint-url {endpoint} s3 cp {html_dir}/ s3://{site_name}/ " f"--recursive --only-show-errors " - f"--cache-control '{CACHE_CONTROL}'" + f"--cache-control '{CACHE_CONTROL}' " + f"{exclude_flags}".rstrip() ) @@ -122,7 +131,7 @@ def render_site_manifests(site_name, action_dir, app_dir, manifests_dir, cfg): def deploy_static(site_name, site_dir, action_dir, token, cfg): - s3_sync(site_name, site_dir) + s3_sync(site_name, site_dir, excludes=cfg.get("excludes")) ensure_bucket_aliases(site_name, cfg["aliases"], os.environ.get("GARAGE_ADMIN_TOKEN")) apps_dir = clone_apps(token) diff --git a/scripts/utils.py b/scripts/utils.py index b06bfa7..561e5d8 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -83,6 +83,10 @@ def parse_site_yaml(site_dir): if site_type not in VALID_TYPES: die(f"Unknown site type: {site_type} (valid: {', '.join(sorted(VALID_TYPES))})") + excludes = cfg.get("excludes") or [] + if not isinstance(excludes, list) or any(not isinstance(p, str) for p in excludes): + die("excludes must be a list of string patterns") + site = { "domain": cfg["domain"], "type": site_type, @@ -90,6 +94,7 @@ def parse_site_yaml(site_dir): "aliases": cfg.get("aliases") or [], "content_dir": cfg.get("content_dir", ""), "tidy": cfg.get("tidy", True), + "excludes": excludes, } print("Site config:")