site-publish: honor site.yaml excludes during S3 sync
site.yaml can now declare excludes: [paths/patterns] that are passed to `aws s3 sync` and `aws s3 cp` as --exclude flags, so the listed objects are neither uploaded from the build dir nor deleted from the bucket. Escape hatch for assets managed out-of-band (e.g. large PDFs uploaded via aws-cli) that would otherwise be wiped by --delete. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -42,6 +42,12 @@ type: static # static | hugo | mkdocs
|
|||||||
# - www.my-site.vino.network
|
# - www.my-site.vino.network
|
||||||
# tidy: true # set false to skip HTML tidy
|
# tidy: true # set false to skip HTML tidy
|
||||||
# enabled: true # set false to decommission
|
# enabled: true # set false to decommission
|
||||||
|
# excludes: # paths/patterns to skip during sync (relative to bucket root).
|
||||||
|
# - welcome/welcome.pdf
|
||||||
|
# # These are passed verbatim to `aws s3 sync --exclude`,
|
||||||
|
# # so they're both un-uploaded AND un-deleted. Use this
|
||||||
|
# # for large assets managed out-of-band via aws-cli
|
||||||
|
# # (e.g. media files updated more often than the site code).
|
||||||
```
|
```
|
||||||
|
|
||||||
`.gitea/workflows/publish.yaml`:
|
`.gitea/workflows/publish.yaml`:
|
||||||
|
|||||||
+11
-2
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import shlex
|
||||||
import shutil
|
import shutil
|
||||||
import tempfile
|
import tempfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -30,7 +31,7 @@ GARAGE_ADMIN_ENDPOINT = os.environ.get(
|
|||||||
CACHE_CONTROL = "public, max-age=0, must-revalidate"
|
CACHE_CONTROL = "public, max-age=0, must-revalidate"
|
||||||
|
|
||||||
|
|
||||||
def s3_sync(site_name, site_dir):
|
def s3_sync(site_name, site_dir, excludes=None):
|
||||||
endpoint = os.environ.get("GARAGE_S3_ENDPOINT", DEFAULT_S3_ENDPOINT)
|
endpoint = os.environ.get("GARAGE_S3_ENDPOINT", DEFAULT_S3_ENDPOINT)
|
||||||
html_dir = site_dir / "build" / "html"
|
html_dir = site_dir / "build" / "html"
|
||||||
if not html_dir.exists():
|
if not html_dir.exists():
|
||||||
@@ -38,6 +39,12 @@ def s3_sync(site_name, site_dir):
|
|||||||
env("AWS_ACCESS_KEY_ID")
|
env("AWS_ACCESS_KEY_ID")
|
||||||
env("AWS_SECRET_ACCESS_KEY")
|
env("AWS_SECRET_ACCESS_KEY")
|
||||||
os.environ.setdefault("AWS_DEFAULT_REGION", "sjc001")
|
os.environ.setdefault("AWS_DEFAULT_REGION", "sjc001")
|
||||||
|
# `excludes` are patterns (site.yaml `excludes:` list) that should never
|
||||||
|
# be uploaded *and* should never be deleted from the bucket — escape hatch
|
||||||
|
# for assets managed out-of-band (e.g. large PDFs uploaded via aws-cli).
|
||||||
|
exclude_flags = " ".join(f"--exclude {shlex.quote(p)}" for p in (excludes or []))
|
||||||
|
if excludes:
|
||||||
|
print(f"Excluding patterns: {excludes}")
|
||||||
print(f"Syncing {html_dir} → s3://{site_name} via {endpoint}")
|
print(f"Syncing {html_dir} → s3://{site_name} via {endpoint}")
|
||||||
# `sync --delete` handles new/changed/orphaned files. `cp --recursive`
|
# `sync --delete` handles new/changed/orphaned files. `cp --recursive`
|
||||||
# then re-uploads everything to refresh metadata (cache-control,
|
# then re-uploads everything to refresh metadata (cache-control,
|
||||||
@@ -50,12 +57,14 @@ def s3_sync(site_name, site_dir):
|
|||||||
f"aws --endpoint-url {endpoint} s3 sync {html_dir}/ s3://{site_name}/ "
|
f"aws --endpoint-url {endpoint} s3 sync {html_dir}/ s3://{site_name}/ "
|
||||||
f"--delete --only-show-errors "
|
f"--delete --only-show-errors "
|
||||||
f"--cache-control '{CACHE_CONTROL}' "
|
f"--cache-control '{CACHE_CONTROL}' "
|
||||||
|
f"{exclude_flags}".rstrip()
|
||||||
)
|
)
|
||||||
print("Re-stamping metadata on all objects...")
|
print("Re-stamping metadata on all objects...")
|
||||||
run(
|
run(
|
||||||
f"aws --endpoint-url {endpoint} s3 cp {html_dir}/ s3://{site_name}/ "
|
f"aws --endpoint-url {endpoint} s3 cp {html_dir}/ s3://{site_name}/ "
|
||||||
f"--recursive --only-show-errors "
|
f"--recursive --only-show-errors "
|
||||||
f"--cache-control '{CACHE_CONTROL}' "
|
f"--cache-control '{CACHE_CONTROL}' "
|
||||||
|
f"{exclude_flags}".rstrip()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -122,7 +131,7 @@ def render_site_manifests(site_name, action_dir, app_dir, manifests_dir, cfg):
|
|||||||
|
|
||||||
|
|
||||||
def deploy_static(site_name, site_dir, action_dir, token, cfg):
|
def deploy_static(site_name, site_dir, action_dir, token, cfg):
|
||||||
s3_sync(site_name, site_dir)
|
s3_sync(site_name, site_dir, excludes=cfg.get("excludes"))
|
||||||
ensure_bucket_aliases(site_name, cfg["aliases"], os.environ.get("GARAGE_ADMIN_TOKEN"))
|
ensure_bucket_aliases(site_name, cfg["aliases"], os.environ.get("GARAGE_ADMIN_TOKEN"))
|
||||||
|
|
||||||
apps_dir = clone_apps(token)
|
apps_dir = clone_apps(token)
|
||||||
|
|||||||
@@ -83,6 +83,10 @@ def parse_site_yaml(site_dir):
|
|||||||
if site_type not in VALID_TYPES:
|
if site_type not in VALID_TYPES:
|
||||||
die(f"Unknown site type: {site_type} (valid: {', '.join(sorted(VALID_TYPES))})")
|
die(f"Unknown site type: {site_type} (valid: {', '.join(sorted(VALID_TYPES))})")
|
||||||
|
|
||||||
|
excludes = cfg.get("excludes") or []
|
||||||
|
if not isinstance(excludes, list) or any(not isinstance(p, str) for p in excludes):
|
||||||
|
die("excludes must be a list of string patterns")
|
||||||
|
|
||||||
site = {
|
site = {
|
||||||
"domain": cfg["domain"],
|
"domain": cfg["domain"],
|
||||||
"type": site_type,
|
"type": site_type,
|
||||||
@@ -90,6 +94,7 @@ def parse_site_yaml(site_dir):
|
|||||||
"aliases": cfg.get("aliases") or [],
|
"aliases": cfg.get("aliases") or [],
|
||||||
"content_dir": cfg.get("content_dir", ""),
|
"content_dir": cfg.get("content_dir", ""),
|
||||||
"tidy": cfg.get("tidy", True),
|
"tidy": cfg.get("tidy", True),
|
||||||
|
"excludes": excludes,
|
||||||
}
|
}
|
||||||
|
|
||||||
print("Site config:")
|
print("Site config:")
|
||||||
|
|||||||
Reference in New Issue
Block a user