site-publish: honor site.yaml excludes during S3 sync

site.yaml can now declare excludes: [paths/patterns] that are passed to
`aws s3 sync` and `aws s3 cp` as --exclude flags, so the listed objects
are neither uploaded from the build dir nor deleted from the bucket.
Escape hatch for assets managed out-of-band (e.g. large PDFs uploaded
via aws-cli) that would otherwise be wiped by --delete.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Donavan Fritz
2026-05-28 10:12:10 -05:00
parent 69512391ff
commit d431fbddb4
3 changed files with 24 additions and 4 deletions
+6
View File
@@ -42,6 +42,12 @@ type: static # static | hugo | mkdocs
# - www.my-site.vino.network # - www.my-site.vino.network
# tidy: true # set false to skip HTML tidy # tidy: true # set false to skip HTML tidy
# enabled: true # set false to decommission # enabled: true # set false to decommission
# excludes: # paths/patterns to skip during sync (relative to bucket root).
# - welcome/welcome.pdf
# # These are passed verbatim to `aws s3 sync --exclude`,
# # so they're both un-uploaded AND un-deleted. Use this
# # for large assets managed out-of-band via aws-cli
# # (e.g. media files updated more often than the site code).
``` ```
`.gitea/workflows/publish.yaml`: `.gitea/workflows/publish.yaml`:
+11 -2
View File
@@ -2,6 +2,7 @@
import json import json
import os import os
import shlex
import shutil import shutil
import tempfile import tempfile
from pathlib import Path from pathlib import Path
@@ -30,7 +31,7 @@ GARAGE_ADMIN_ENDPOINT = os.environ.get(
CACHE_CONTROL = "public, max-age=0, must-revalidate" CACHE_CONTROL = "public, max-age=0, must-revalidate"
def s3_sync(site_name, site_dir): def s3_sync(site_name, site_dir, excludes=None):
endpoint = os.environ.get("GARAGE_S3_ENDPOINT", DEFAULT_S3_ENDPOINT) endpoint = os.environ.get("GARAGE_S3_ENDPOINT", DEFAULT_S3_ENDPOINT)
html_dir = site_dir / "build" / "html" html_dir = site_dir / "build" / "html"
if not html_dir.exists(): if not html_dir.exists():
@@ -38,6 +39,12 @@ def s3_sync(site_name, site_dir):
env("AWS_ACCESS_KEY_ID") env("AWS_ACCESS_KEY_ID")
env("AWS_SECRET_ACCESS_KEY") env("AWS_SECRET_ACCESS_KEY")
os.environ.setdefault("AWS_DEFAULT_REGION", "sjc001") os.environ.setdefault("AWS_DEFAULT_REGION", "sjc001")
# `excludes` are patterns (site.yaml `excludes:` list) that should never
# be uploaded *and* should never be deleted from the bucket — escape hatch
# for assets managed out-of-band (e.g. large PDFs uploaded via aws-cli).
exclude_flags = " ".join(f"--exclude {shlex.quote(p)}" for p in (excludes or []))
if excludes:
print(f"Excluding patterns: {excludes}")
print(f"Syncing {html_dir} → s3://{site_name} via {endpoint}") print(f"Syncing {html_dir} → s3://{site_name} via {endpoint}")
# `sync --delete` handles new/changed/orphaned files. `cp --recursive` # `sync --delete` handles new/changed/orphaned files. `cp --recursive`
# then re-uploads everything to refresh metadata (cache-control, # then re-uploads everything to refresh metadata (cache-control,
@@ -50,12 +57,14 @@ def s3_sync(site_name, site_dir):
f"aws --endpoint-url {endpoint} s3 sync {html_dir}/ s3://{site_name}/ " f"aws --endpoint-url {endpoint} s3 sync {html_dir}/ s3://{site_name}/ "
f"--delete --only-show-errors " f"--delete --only-show-errors "
f"--cache-control '{CACHE_CONTROL}' " f"--cache-control '{CACHE_CONTROL}' "
f"{exclude_flags}".rstrip()
) )
print("Re-stamping metadata on all objects...") print("Re-stamping metadata on all objects...")
run( run(
f"aws --endpoint-url {endpoint} s3 cp {html_dir}/ s3://{site_name}/ " f"aws --endpoint-url {endpoint} s3 cp {html_dir}/ s3://{site_name}/ "
f"--recursive --only-show-errors " f"--recursive --only-show-errors "
f"--cache-control '{CACHE_CONTROL}' " f"--cache-control '{CACHE_CONTROL}' "
f"{exclude_flags}".rstrip()
) )
@@ -122,7 +131,7 @@ def render_site_manifests(site_name, action_dir, app_dir, manifests_dir, cfg):
def deploy_static(site_name, site_dir, action_dir, token, cfg): def deploy_static(site_name, site_dir, action_dir, token, cfg):
s3_sync(site_name, site_dir) s3_sync(site_name, site_dir, excludes=cfg.get("excludes"))
ensure_bucket_aliases(site_name, cfg["aliases"], os.environ.get("GARAGE_ADMIN_TOKEN")) ensure_bucket_aliases(site_name, cfg["aliases"], os.environ.get("GARAGE_ADMIN_TOKEN"))
apps_dir = clone_apps(token) apps_dir = clone_apps(token)
+5
View File
@@ -83,6 +83,10 @@ def parse_site_yaml(site_dir):
if site_type not in VALID_TYPES: if site_type not in VALID_TYPES:
die(f"Unknown site type: {site_type} (valid: {', '.join(sorted(VALID_TYPES))})") die(f"Unknown site type: {site_type} (valid: {', '.join(sorted(VALID_TYPES))})")
excludes = cfg.get("excludes") or []
if not isinstance(excludes, list) or any(not isinstance(p, str) for p in excludes):
die("excludes must be a list of string patterns")
site = { site = {
"domain": cfg["domain"], "domain": cfg["domain"],
"type": site_type, "type": site_type,
@@ -90,6 +94,7 @@ def parse_site_yaml(site_dir):
"aliases": cfg.get("aliases") or [], "aliases": cfg.get("aliases") or [],
"content_dir": cfg.get("content_dir", ""), "content_dir": cfg.get("content_dir", ""),
"tidy": cfg.get("tidy", True), "tidy": cfg.get("tidy", True),
"excludes": excludes,
} }
print("Site config:") print("Site config:")