diff --git a/bigfiles b/bigfiles new file mode 100755 index 0000000..3fbe7e0 --- /dev/null +++ b/bigfiles @@ -0,0 +1,10 @@ +#!/bin/sh +# +# bigfiles - a system for getting big files to the server efficiently +# part of ejv2.cc -- by Ethan Marshall in 2024 + +REMOTEHOST="ejv2.cc" \ + LOCALDIR=${LOCALDIR:-"big"} \ + REMOTECACHE=${REMOTECACHE:-"/usr/share/bigfiles"} \ + REMOTEDIR=${REMOTEDIR:-"/var/www/html"} \ + ./bigfiles.awk bigfiles.tsv diff --git a/bigfiles.awk b/bigfiles.awk new file mode 100755 index 0000000..3dc56d8 --- /dev/null +++ b/bigfiles.awk @@ -0,0 +1,108 @@ +#!/usr/bin/awk -f +# +# bigfiles - a system for getting big files to the server efficiently +# part of ejv2.cc -- by Ethan Marshall in 2024 +# +# You usually don't want to call this directly; use the wrapper script for +# convenience! +# +# Bigfiles parses a TSV file containing a list in the format: +# +# If remotepath ends in a slash, it is assumed to be a directory and the +# localpath filename is appended to it. +# +# Relies upon rsync for file uploading. +# +# TODO: +# - Perhaps we should buffer lots of file copies into the same rsync call? + +# Returns truthy if a remotely cached copy exists on the remote. +# Cached copies are stored by storing in REMOTECACHE under their intended remote path. +# These copies are then copied on the remote when needed. +function is_cached(path) +{ + return system("ssh " REMOTEHOST " stat " REMOTECACHE "/" path " >/dev/null 2>&1") == 0; +} + +# Uses rsync with some flags to copy src to dest on REMOTEHOST. +# Returns truthy on success. +function rsyncit(src, dest) +{ + return system("rsync --mkpath -zP " src " " REMOTEHOST ":" dest) == 0; +} + +# Copies files around on REMOTEHOST using ssh. +# Returns truthy on success. +function cpit(src, dest) +{ + return system("ssh " REMOTEHOST " cp " src " " dest) == 0; +} + +BEGIN { + VERSION = "v1.0"; + LOCALDIR = ENVIRON["LOCALDIR"]; + REMOTEDIR = ENVIRON["REMOTEDIR"]; + REMOTECACHE = ENVIRON["REMOTECACHE"]; + REMOTEHOST = ENVIRON["REMOTEHOST"]; + NOCACHE = ENVIRON["NOCACHE"] != ""; + + done = 0 + cached = 0 + total = 0 + + if (REMOTEHOST == "") { + print "bigfiles -- must specify remote host to copy to"; + exit 1; + } + + printf("bigfiles %s: host=%s, local=%s, remote=%s\n", VERSION, REMOTEHOST, LOCALDIR, REMOTEDIR); + if (system("ssh " REMOTEHOST " uname -a") != 0) { + print "bigfiles -- remote host", REMOTEHOST, "did not respond over ssh"; + exit 1; + } + printf("\n"); +} + +# Comments are lines which begin with a hash literal +# Ignore them +/^#/ || /^$/ { + next; +} + +# Every other line is a file record to copy +{ + was_cached = 0; + total++; + + localfile = LOCALDIR "/" $1; + fullpath = $2 + if (fullpath ~ /\/$/) { + fullpath = fullpath $1; + } + cachefile = REMOTECACHE "/" fullpath; + destfile = REMOTEDIR "/" fullpath; + + printf("file=%s, dest=%s... ", localfile, REMOTEDIR); + if (is_cached(fullpath)) { + was_cached = 1; + printf("[CACHED]\n"); + } else { + printf("[NEW]\n"); + if (!rsyncit(localfile, cachefile)) { + next; + } + } + + if (cpit(cachefile, destfile)) { + done++; + if (was_cached) { + cached++; + } + } +} + +END { + printf("done: %d success (%d cached, %d new), %d failed, %d total\n", done, cached, done - cached, total - done, total); +} + +# vim: ft=awk diff --git a/bigfiles.tsv b/bigfiles.tsv new file mode 100644 index 0000000..e757bc7 --- /dev/null +++ b/bigfiles.tsv @@ -0,0 +1,11 @@ +# bigfiles - a system for getting big files to the server efficiently +# part of ejv2.cc -- by Ethan Marshall in 2024 +# +# This is the data file. +# Format is +# If is a directory IT MUST END WITH A SLASH! +# +# See environment variables for where these are relative to. + +join-castaway.mp4 blog/making-of-join-castaway/ +join-castaway.webm blog/making-of-join-castaway/ diff --git a/siteupdate b/siteupdate index ecb302b..8808338 100755 --- a/siteupdate +++ b/siteupdate @@ -4,4 +4,6 @@ rm -rf public/ hugo ssh root@ejv2.cc rm -rf /var/www/html/ -rsync -r public/ ejv2.cc:/var/www/html/ +rsync -zr public/ ejv2.cc:/var/www/html/ + +./bigfiles