1
0
Fork 0

Compare commits

...

2 commits

Author SHA1 Message Date
d177a087ea
Add bigfiles system
Adds a system which should allow us to avoid copying the same massive
files every time we want to update the site.

Improves siteupdate performance by an order of magnitude.
2024-09-20 11:44:18 +01:00
f8943bb2ba
Move castaway backups to big files area 2024-09-20 11:44:16 +01:00
6 changed files with 132 additions and 1 deletions

10
bigfiles Executable file
View file

@ -0,0 +1,10 @@
#!/bin/sh
#
# bigfiles - a system for getting big files to the server efficiently
# part of ejv2.cc -- by Ethan Marshall in 2024
REMOTEHOST="ejv2.cc" \
LOCALDIR=${LOCALDIR:-"big"} \
REMOTECACHE=${REMOTECACHE:-"/usr/share/bigfiles"} \
REMOTEDIR=${REMOTEDIR:-"/var/www/html"} \
./bigfiles.awk bigfiles.tsv

108
bigfiles.awk Executable file
View file

@ -0,0 +1,108 @@
#!/usr/bin/awk -f
#
# bigfiles - a system for getting big files to the server efficiently
# part of ejv2.cc -- by Ethan Marshall in 2024
#
# You usually don't want to call this directly; use the wrapper script for
# convenience!
#
# Bigfiles parses a TSV file containing a list in the format:
# <localpath> <remotepath>
# If remotepath ends in a slash, it is assumed to be a directory and the
# localpath filename is appended to it.
#
# Relies upon rsync for file uploading.
#
# TODO:
# - Perhaps we should buffer lots of file copies into the same rsync call?
# Returns truthy if a remotely cached copy exists on the remote.
# Cached copies are stored by storing in REMOTECACHE under their intended remote path.
# These copies are then copied on the remote when needed.
function is_cached(path)
{
return system("ssh " REMOTEHOST " stat " REMOTECACHE "/" path " >/dev/null 2>&1") == 0;
}
# Uses rsync with some flags to copy src to dest on REMOTEHOST.
# Returns truthy on success.
function rsyncit(src, dest)
{
return system("rsync --mkpath -zP " src " " REMOTEHOST ":" dest) == 0;
}
# Copies files around on REMOTEHOST using ssh.
# Returns truthy on success.
function cpit(src, dest)
{
return system("ssh " REMOTEHOST " cp " src " " dest) == 0;
}
BEGIN {
VERSION = "v1.0";
LOCALDIR = ENVIRON["LOCALDIR"];
REMOTEDIR = ENVIRON["REMOTEDIR"];
REMOTECACHE = ENVIRON["REMOTECACHE"];
REMOTEHOST = ENVIRON["REMOTEHOST"];
NOCACHE = ENVIRON["NOCACHE"] != "";
done = 0
cached = 0
total = 0
if (REMOTEHOST == "") {
print "bigfiles -- must specify remote host to copy to";
exit 1;
}
printf("bigfiles %s: host=%s, local=%s, remote=%s\n", VERSION, REMOTEHOST, LOCALDIR, REMOTEDIR);
if (system("ssh " REMOTEHOST " uname -a") != 0) {
print "bigfiles -- remote host", REMOTEHOST, "did not respond over ssh";
exit 1;
}
printf("\n");
}
# Comments are lines which begin with a hash literal
# Ignore them
/^#/ || /^$/ {
next;
}
# Every other line is a file record to copy
{
was_cached = 0;
total++;
localfile = LOCALDIR "/" $1;
fullpath = $2
if (fullpath ~ /\/$/) {
fullpath = fullpath $1;
}
cachefile = REMOTECACHE "/" fullpath;
destfile = REMOTEDIR "/" fullpath;
printf("file=%s, dest=%s... ", localfile, REMOTEDIR);
if (is_cached(fullpath)) {
was_cached = 1;
printf("[CACHED]\n");
} else {
printf("[NEW]\n");
if (!rsyncit(localfile, cachefile)) {
next;
}
}
if (cpit(cachefile, destfile)) {
done++;
if (was_cached) {
cached++;
}
}
}
END {
printf("done: %d success (%d cached, %d new), %d failed, %d total\n", done, cached, done - cached, total - done, total);
}
# vim: ft=awk

11
bigfiles.tsv Normal file
View file

@ -0,0 +1,11 @@
# bigfiles - a system for getting big files to the server efficiently
# part of ejv2.cc -- by Ethan Marshall in 2024
#
# This is the data file.
# Format is <local filename> <remote path>
# If <remote path> is a directory IT MUST END WITH A SLASH!
#
# See environment variables for where these are relative to.
join-castaway.mp4 blog/making-of-join-castaway/
join-castaway.webm blog/making-of-join-castaway/
Can't render this file because it has a wrong number of fields in line 10.

View file

@ -4,4 +4,6 @@ rm -rf public/
hugo hugo
ssh root@ejv2.cc rm -rf /var/www/html/ ssh root@ejv2.cc rm -rf /var/www/html/
rsync -r public/ ejv2.cc:/var/www/html/ rsync -zr public/ ejv2.cc:/var/www/html/
./bigfiles