1
0
Fork 0

Add bigfiles system

Adds a system which should allow us to avoid copying the same massive
files every time we want to update the site.

Improves siteupdate performance by an order of magnitude.
This commit is contained in:
Ethan Marshall 2024-09-20 11:42:37 +01:00
parent f8943bb2ba
commit d177a087ea
Signed by: ejv2
GPG key ID: EC2FAEF4DB8968D8
4 changed files with 132 additions and 1 deletions

10
bigfiles Executable file
View file

@ -0,0 +1,10 @@
#!/bin/sh
#
# bigfiles - a system for getting big files to the server efficiently
# part of ejv2.cc -- by Ethan Marshall in 2024
REMOTEHOST="ejv2.cc" \
LOCALDIR=${LOCALDIR:-"big"} \
REMOTECACHE=${REMOTECACHE:-"/usr/share/bigfiles"} \
REMOTEDIR=${REMOTEDIR:-"/var/www/html"} \
./bigfiles.awk bigfiles.tsv

108
bigfiles.awk Executable file
View file

@ -0,0 +1,108 @@
#!/usr/bin/awk -f
#
# bigfiles - a system for getting big files to the server efficiently
# part of ejv2.cc -- by Ethan Marshall in 2024
#
# You usually don't want to call this directly; use the wrapper script for
# convenience!
#
# Bigfiles parses a TSV file containing a list in the format:
# <localpath> <remotepath>
# If remotepath ends in a slash, it is assumed to be a directory and the
# localpath filename is appended to it.
#
# Relies upon rsync for file uploading.
#
# TODO:
# - Perhaps we should buffer lots of file copies into the same rsync call?
# Returns truthy if a remotely cached copy exists on the remote.
# Cached copies are stored by storing in REMOTECACHE under their intended remote path.
# These copies are then copied on the remote when needed.
function is_cached(path)
{
return system("ssh " REMOTEHOST " stat " REMOTECACHE "/" path " >/dev/null 2>&1") == 0;
}
# Uses rsync with some flags to copy src to dest on REMOTEHOST.
# Returns truthy on success.
function rsyncit(src, dest)
{
return system("rsync --mkpath -zP " src " " REMOTEHOST ":" dest) == 0;
}
# Copies files around on REMOTEHOST using ssh.
# Returns truthy on success.
function cpit(src, dest)
{
return system("ssh " REMOTEHOST " cp " src " " dest) == 0;
}
BEGIN {
VERSION = "v1.0";
LOCALDIR = ENVIRON["LOCALDIR"];
REMOTEDIR = ENVIRON["REMOTEDIR"];
REMOTECACHE = ENVIRON["REMOTECACHE"];
REMOTEHOST = ENVIRON["REMOTEHOST"];
NOCACHE = ENVIRON["NOCACHE"] != "";
done = 0
cached = 0
total = 0
if (REMOTEHOST == "") {
print "bigfiles -- must specify remote host to copy to";
exit 1;
}
printf("bigfiles %s: host=%s, local=%s, remote=%s\n", VERSION, REMOTEHOST, LOCALDIR, REMOTEDIR);
if (system("ssh " REMOTEHOST " uname -a") != 0) {
print "bigfiles -- remote host", REMOTEHOST, "did not respond over ssh";
exit 1;
}
printf("\n");
}
# Comments are lines which begin with a hash literal
# Ignore them
/^#/ || /^$/ {
next;
}
# Every other line is a file record to copy
{
was_cached = 0;
total++;
localfile = LOCALDIR "/" $1;
fullpath = $2
if (fullpath ~ /\/$/) {
fullpath = fullpath $1;
}
cachefile = REMOTECACHE "/" fullpath;
destfile = REMOTEDIR "/" fullpath;
printf("file=%s, dest=%s... ", localfile, REMOTEDIR);
if (is_cached(fullpath)) {
was_cached = 1;
printf("[CACHED]\n");
} else {
printf("[NEW]\n");
if (!rsyncit(localfile, cachefile)) {
next;
}
}
if (cpit(cachefile, destfile)) {
done++;
if (was_cached) {
cached++;
}
}
}
END {
printf("done: %d success (%d cached, %d new), %d failed, %d total\n", done, cached, done - cached, total - done, total);
}
# vim: ft=awk

11
bigfiles.tsv Normal file
View file

@ -0,0 +1,11 @@
# bigfiles - a system for getting big files to the server efficiently
# part of ejv2.cc -- by Ethan Marshall in 2024
#
# This is the data file.
# Format is <local filename> <remote path>
# If <remote path> is a directory IT MUST END WITH A SLASH!
#
# See environment variables for where these are relative to.
join-castaway.mp4 blog/making-of-join-castaway/
join-castaway.webm blog/making-of-join-castaway/
Can't render this file because it has a wrong number of fields in line 10.

View file

@ -4,4 +4,6 @@ rm -rf public/
hugo
ssh root@ejv2.cc rm -rf /var/www/html/
rsync -r public/ ejv2.cc:/var/www/html/
rsync -zr public/ ejv2.cc:/var/www/html/
./bigfiles