1
0
Fork 0
ejv2.cc/bigfiles.awk
Ethan Marshall d177a087ea
Add bigfiles system
Adds a system which should allow us to avoid copying the same massive
files every time we want to update the site.

Improves siteupdate performance by an order of magnitude.
2024-09-20 11:44:18 +01:00

108 lines
2.6 KiB
Awk
Executable file

#!/usr/bin/awk -f
#
# bigfiles - a system for getting big files to the server efficiently
# part of ejv2.cc -- by Ethan Marshall in 2024
#
# You usually don't want to call this directly; use the wrapper script for
# convenience!
#
# Bigfiles parses a TSV file containing a list in the format:
# <localpath> <remotepath>
# If remotepath ends in a slash, it is assumed to be a directory and the
# localpath filename is appended to it.
#
# Relies upon rsync for file uploading.
#
# TODO:
# - Perhaps we should buffer lots of file copies into the same rsync call?
# Returns truthy if a remotely cached copy exists on the remote.
# Cached copies are stored by storing in REMOTECACHE under their intended remote path.
# These copies are then copied on the remote when needed.
function is_cached(path)
{
return system("ssh " REMOTEHOST " stat " REMOTECACHE "/" path " >/dev/null 2>&1") == 0;
}
# Uses rsync with some flags to copy src to dest on REMOTEHOST.
# Returns truthy on success.
function rsyncit(src, dest)
{
return system("rsync --mkpath -zP " src " " REMOTEHOST ":" dest) == 0;
}
# Copies files around on REMOTEHOST using ssh.
# Returns truthy on success.
function cpit(src, dest)
{
return system("ssh " REMOTEHOST " cp " src " " dest) == 0;
}
BEGIN {
VERSION = "v1.0";
LOCALDIR = ENVIRON["LOCALDIR"];
REMOTEDIR = ENVIRON["REMOTEDIR"];
REMOTECACHE = ENVIRON["REMOTECACHE"];
REMOTEHOST = ENVIRON["REMOTEHOST"];
NOCACHE = ENVIRON["NOCACHE"] != "";
done = 0
cached = 0
total = 0
if (REMOTEHOST == "") {
print "bigfiles -- must specify remote host to copy to";
exit 1;
}
printf("bigfiles %s: host=%s, local=%s, remote=%s\n", VERSION, REMOTEHOST, LOCALDIR, REMOTEDIR);
if (system("ssh " REMOTEHOST " uname -a") != 0) {
print "bigfiles -- remote host", REMOTEHOST, "did not respond over ssh";
exit 1;
}
printf("\n");
}
# Comments are lines which begin with a hash literal
# Ignore them
/^#/ || /^$/ {
next;
}
# Every other line is a file record to copy
{
was_cached = 0;
total++;
localfile = LOCALDIR "/" $1;
fullpath = $2
if (fullpath ~ /\/$/) {
fullpath = fullpath $1;
}
cachefile = REMOTECACHE "/" fullpath;
destfile = REMOTEDIR "/" fullpath;
printf("file=%s, dest=%s... ", localfile, REMOTEDIR);
if (is_cached(fullpath)) {
was_cached = 1;
printf("[CACHED]\n");
} else {
printf("[NEW]\n");
if (!rsyncit(localfile, cachefile)) {
next;
}
}
if (cpit(cachefile, destfile)) {
done++;
if (was_cached) {
cached++;
}
}
}
END {
printf("done: %d success (%d cached, %d new), %d failed, %d total\n", done, cached, done - cached, total - done, total);
}
# vim: ft=awk