Add bigfiles
system
Adds a system which should allow us to avoid copying the same massive files every time we want to update the site. Improves siteupdate performance by an order of magnitude.
This commit is contained in:
parent
f8943bb2ba
commit
d177a087ea
4 changed files with 132 additions and 1 deletions
10
bigfiles
Executable file
10
bigfiles
Executable file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# bigfiles - a system for getting big files to the server efficiently
|
||||||
|
# part of ejv2.cc -- by Ethan Marshall in 2024
|
||||||
|
|
||||||
|
REMOTEHOST="ejv2.cc" \
|
||||||
|
LOCALDIR=${LOCALDIR:-"big"} \
|
||||||
|
REMOTECACHE=${REMOTECACHE:-"/usr/share/bigfiles"} \
|
||||||
|
REMOTEDIR=${REMOTEDIR:-"/var/www/html"} \
|
||||||
|
./bigfiles.awk bigfiles.tsv
|
108
bigfiles.awk
Executable file
108
bigfiles.awk
Executable file
|
@ -0,0 +1,108 @@
|
||||||
|
#!/usr/bin/awk -f
|
||||||
|
#
|
||||||
|
# bigfiles - a system for getting big files to the server efficiently
|
||||||
|
# part of ejv2.cc -- by Ethan Marshall in 2024
|
||||||
|
#
|
||||||
|
# You usually don't want to call this directly; use the wrapper script for
|
||||||
|
# convenience!
|
||||||
|
#
|
||||||
|
# Bigfiles parses a TSV file containing a list in the format:
|
||||||
|
# <localpath> <remotepath>
|
||||||
|
# If remotepath ends in a slash, it is assumed to be a directory and the
|
||||||
|
# localpath filename is appended to it.
|
||||||
|
#
|
||||||
|
# Relies upon rsync for file uploading.
|
||||||
|
#
|
||||||
|
# TODO:
|
||||||
|
# - Perhaps we should buffer lots of file copies into the same rsync call?
|
||||||
|
|
||||||
|
# Returns truthy if a remotely cached copy exists on the remote.
|
||||||
|
# Cached copies are stored by storing in REMOTECACHE under their intended remote path.
|
||||||
|
# These copies are then copied on the remote when needed.
|
||||||
|
function is_cached(path)
|
||||||
|
{
|
||||||
|
return system("ssh " REMOTEHOST " stat " REMOTECACHE "/" path " >/dev/null 2>&1") == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Uses rsync with some flags to copy src to dest on REMOTEHOST.
|
||||||
|
# Returns truthy on success.
|
||||||
|
function rsyncit(src, dest)
|
||||||
|
{
|
||||||
|
return system("rsync --mkpath -zP " src " " REMOTEHOST ":" dest) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Copies files around on REMOTEHOST using ssh.
|
||||||
|
# Returns truthy on success.
|
||||||
|
function cpit(src, dest)
|
||||||
|
{
|
||||||
|
return system("ssh " REMOTEHOST " cp " src " " dest) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
VERSION = "v1.0";
|
||||||
|
LOCALDIR = ENVIRON["LOCALDIR"];
|
||||||
|
REMOTEDIR = ENVIRON["REMOTEDIR"];
|
||||||
|
REMOTECACHE = ENVIRON["REMOTECACHE"];
|
||||||
|
REMOTEHOST = ENVIRON["REMOTEHOST"];
|
||||||
|
NOCACHE = ENVIRON["NOCACHE"] != "";
|
||||||
|
|
||||||
|
done = 0
|
||||||
|
cached = 0
|
||||||
|
total = 0
|
||||||
|
|
||||||
|
if (REMOTEHOST == "") {
|
||||||
|
print "bigfiles -- must specify remote host to copy to";
|
||||||
|
exit 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("bigfiles %s: host=%s, local=%s, remote=%s\n", VERSION, REMOTEHOST, LOCALDIR, REMOTEDIR);
|
||||||
|
if (system("ssh " REMOTEHOST " uname -a") != 0) {
|
||||||
|
print "bigfiles -- remote host", REMOTEHOST, "did not respond over ssh";
|
||||||
|
exit 1;
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
# Comments are lines which begin with a hash literal
|
||||||
|
# Ignore them
|
||||||
|
/^#/ || /^$/ {
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Every other line is a file record to copy
|
||||||
|
{
|
||||||
|
was_cached = 0;
|
||||||
|
total++;
|
||||||
|
|
||||||
|
localfile = LOCALDIR "/" $1;
|
||||||
|
fullpath = $2
|
||||||
|
if (fullpath ~ /\/$/) {
|
||||||
|
fullpath = fullpath $1;
|
||||||
|
}
|
||||||
|
cachefile = REMOTECACHE "/" fullpath;
|
||||||
|
destfile = REMOTEDIR "/" fullpath;
|
||||||
|
|
||||||
|
printf("file=%s, dest=%s... ", localfile, REMOTEDIR);
|
||||||
|
if (is_cached(fullpath)) {
|
||||||
|
was_cached = 1;
|
||||||
|
printf("[CACHED]\n");
|
||||||
|
} else {
|
||||||
|
printf("[NEW]\n");
|
||||||
|
if (!rsyncit(localfile, cachefile)) {
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cpit(cachefile, destfile)) {
|
||||||
|
done++;
|
||||||
|
if (was_cached) {
|
||||||
|
cached++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
END {
|
||||||
|
printf("done: %d success (%d cached, %d new), %d failed, %d total\n", done, cached, done - cached, total - done, total);
|
||||||
|
}
|
||||||
|
|
||||||
|
# vim: ft=awk
|
11
bigfiles.tsv
Normal file
11
bigfiles.tsv
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
# bigfiles - a system for getting big files to the server efficiently
|
||||||
|
# part of ejv2.cc -- by Ethan Marshall in 2024
|
||||||
|
#
|
||||||
|
# This is the data file.
|
||||||
|
# Format is <local filename> <remote path>
|
||||||
|
# If <remote path> is a directory IT MUST END WITH A SLASH!
|
||||||
|
#
|
||||||
|
# See environment variables for where these are relative to.
|
||||||
|
|
||||||
|
join-castaway.mp4 blog/making-of-join-castaway/
|
||||||
|
join-castaway.webm blog/making-of-join-castaway/
|
Can't render this file because it has a wrong number of fields in line 10.
|
|
@ -4,4 +4,6 @@ rm -rf public/
|
||||||
hugo
|
hugo
|
||||||
|
|
||||||
ssh root@ejv2.cc rm -rf /var/www/html/
|
ssh root@ejv2.cc rm -rf /var/www/html/
|
||||||
rsync -r public/ ejv2.cc:/var/www/html/
|
rsync -zr public/ ejv2.cc:/var/www/html/
|
||||||
|
|
||||||
|
./bigfiles
|
||||||
|
|
Loading…
Reference in a new issue