#!/usr/bin/awk -f # # bigfiles - a system for getting big files to the server efficiently # part of ejv2.cc -- by Ethan Marshall in 2024 # # You usually don't want to call this directly; use the wrapper script for # convenience! # # Bigfiles parses a TSV file containing a list in the format: # # If remotepath ends in a slash, it is assumed to be a directory and the # localpath filename is appended to it. # # Relies upon rsync for file uploading. # # TODO: # - Perhaps we should buffer lots of file copies into the same rsync call? # Returns truthy if a remotely cached copy exists on the remote. # Cached copies are stored by storing in REMOTECACHE under their intended remote path. # These copies are then copied on the remote when needed. function is_cached(path) { return system("ssh " REMOTEHOST " stat " REMOTECACHE "/" path " >/dev/null 2>&1") == 0; } # Uses rsync with some flags to copy src to dest on REMOTEHOST. # Returns truthy on success. function rsyncit(src, dest) { return system("rsync --mkpath -zP " src " " REMOTEHOST ":" dest) == 0; } # Copies files around on REMOTEHOST using ssh. # Returns truthy on success. function cpit(src, dest) { return system("ssh " REMOTEHOST " cp " src " " dest) == 0; } BEGIN { VERSION = "v1.0"; LOCALDIR = ENVIRON["LOCALDIR"]; REMOTEDIR = ENVIRON["REMOTEDIR"]; REMOTECACHE = ENVIRON["REMOTECACHE"]; REMOTEHOST = ENVIRON["REMOTEHOST"]; NOCACHE = ENVIRON["NOCACHE"] != ""; done = 0 cached = 0 total = 0 if (REMOTEHOST == "") { print "bigfiles -- must specify remote host to copy to"; exit 1; } printf("bigfiles %s: host=%s, local=%s, remote=%s\n", VERSION, REMOTEHOST, LOCALDIR, REMOTEDIR); if (system("ssh " REMOTEHOST " uname -a") != 0) { print "bigfiles -- remote host", REMOTEHOST, "did not respond over ssh"; exit 1; } printf("\n"); } # Comments are lines which begin with a hash literal # Ignore them /^#/ || /^$/ { next; } # Every other line is a file record to copy { was_cached = 0; total++; localfile = LOCALDIR "/" $1; fullpath = $2 if (fullpath ~ /\/$/) { fullpath = fullpath $1; } cachefile = REMOTECACHE "/" fullpath; destfile = REMOTEDIR "/" fullpath; printf("file=%s, dest=%s... ", localfile, REMOTEDIR); if (is_cached(fullpath)) { was_cached = 1; printf("[CACHED]\n"); } else { printf("[NEW]\n"); if (!rsyncit(localfile, cachefile)) { next; } } if (cpit(cachefile, destfile)) { done++; if (was_cached) { cached++; } } } END { printf("done: %d success (%d cached, %d new), %d failed, %d total\n", done, cached, done - cached, total - done, total); } # vim: ft=awk