#!/bin/sh # ====================[ hg-cohere ]==================== # [ Time-stamp: "2009-05-18 23:47:25 leycec" ] # # A shell script cohereing one or more local paths on your local machine # onto one or more local (locally mounted) target paths on your local machine. # These backups are Mercurial changesets, thus preserving changes to your # local machine across time (depending on how frequently you backup) and space # (depending on how many target paths you backup onto). # # This script integrates well with "cron", allowing you to backup on some fixed # schedule (say, every day); and with NFS, CurlFtpFS, and SSHFS, allowing you to # schedule those backups onto one or more remote machines (say, freeshell.org). # # This script assumes Gentoo and Gentoo's Portage package manager (emerge) in # the installation, configuration, and usage instructions, below. (Neither are # strictly required, of course. Any sufficiently competent Linux distribution-- # Debian, Ubuntu, or otherwise--should suffice to install, configure, and use # this script.) # # --------------------( DEPENDENCIES )-------------------- # # Install Mercurial; e.g., on Gentoo: # sudo paludis -i mercurial # # # Optionally, install SSHFS and SSH keychain; e.g., on Gentoo: # sudo paludis -i sshfs-fuse keychain # # # Optionally, install cronbase; e.g., on Gentoo: # sudo paludis -i cronbase # # Installing SSHFS and SSH keychain allows this script to backup onto locally # mounted, remote SSH servers. # # Installing cronbase allows scheduling of this script by moving it, merely, to # "/etc/cron.{daily|hourly|weekly|monthly}". # # --------------------( INSTALLATION )-------------------- # # Move this script to some temporary path, and make it executable. # cp hg-cohere /tmp/; chmod ug+rx /tmp/hg-cohere # # # Edit the "CONFIGURATION" section of this script, below. # vi /tmp/hg-cohere # # # Make one or several ".hgignore" files. (See "CONFIGURATION", below). # # # Run this script--to try it before scheduling it with a system cronjob. # sh /tmp/hg-cohere # # # Schedule this script to be run "every so often" by the system crontab. # # # # If the system crontab checks "/etc/cron.{daily|hourly|weekly|monthly}/" # # (typically, by installation of cronbase on Gentoo-installed machines), # # schedule this script to be run each day, hour, week, or month by simply # # moving this script into that path. As example: # sudo mv /tmp/hg-cohere /etc/cron.daily/ # # # Otherwise, edit your system crontab by hand and restart the cron daemon. # #vi /etc/crontab; /etc/init.d/vixie-cron restart # # # Check the system log, after that cronjob runs this script, for scheduled # # output from this script. It logs to the syslog cron facility, by default. # tail /var/log/crond/current # # --------------------( CONFIGURATION )-------------------- # You probably do not want to backup every file under "/etc/" and "/home/". # # Luckily, Mercurial lets you ignore files matching certain regular expressions; # unluckily, Mercurial does not ship with a default list of regular expressions # for ignoring commonly undesirable files (e.g., binary, cache, and temporary # files). # # To ignore these files, you must explicitly create one file named ".hgignore" # in the root path for every such repository. This script provides ".hgignore" # sample files, below, which you may use as a starting point for your own. To # install these files, remove the "# "-prefix from each line and copy the # uncommented result into the filename specified. # # # Alternatively, download copies of these files from the following public # # URLs--provided, for your fresh and current use, by the principle author # # of this script. Assuming you have "wget", these commands download: # # (1) "/etc/mercurial/hgignore", for use as a system-wide ".hgignore" file. # # (2) "/etc/.hgignore", for use as an "/etc/"-specific ".hgignore" file. # # (3) "~/.hgignore", for use as a home directory-specific ".hgignore" file. # sudo wget -P /etc/mercurial/ http://bcurry.gomen.org/hg/mysidia/etc/mercurial/hgignore # sudo wget -P /etc/ http://bcurry.gomen.org/hg/mysidia/etc/.hgignore # wget -P ~/ http://bcurry.gomen.org/hg/mysidia/home/leycec/.hgignore # # # Now, assuming you'd rather glom these files together by hand than # # download them, you might find the following contents to be a decent # # starting place. # # # # First, make a system-wide "/etc/mercurial/hgignore" file. Mercurial # # ignores all files matching at least one regular expression in this file, # # for all Mercurial repositories on the local machine. As example, # sudo vi /etc/mercurial/hgignore # # ===============[ /etc/mercurial/hgignore ]=============== # syntax: regexp # # # Ignore private files. # ^\.ssh/(id_dsa|id_rsa) # ^\.(lftp/bookmarks|mutt_certificates)$ # # # Ignore private paths. # ^\.gnupg$ # # # Ignore temporary files. # (^|/)\.keep # (^|/)\#.*\#$ # \.(lock|bad|cur|new|old|swp|DS_Store)$ # # # Ignore temporary paths. # (temp|tmp|~)$ # \.(lock|bad|cur|new|old|swp)$ # ^mnt$ # ^\.(eclipse|fontconfig|thumbnails) # # # Ignore connection-specific files. # ^\.(pulse-cookie|fvwm/.fs-restart|fvwm-crystal/.FvwmConsole|serverauth) # # # Ignore connection-specific paths. # ^\.(dbus|keychain|Xauthority)$ # # # Ignore log paths. # (^|/)(log|logs)$ # # # Ignore log files. # \.log$ # ^\.PerlySense$ # # # Ignore history files. # _history$ # \.procmailusage$ # ^\.recently-used # ^\.(lesshst|viminfo|links/links.his)$ # # # Ignore archive files. # \.(bz2|gz|jar|rar|tar|zip)$ # # # Ignore image files. # \.(bmp|gif|jpg|png|xpm)$ # # # Ignore externally-managed files. # ^\.(fehbg|htoprc|mutt/hcache|ssh/known_hosts|zcompdump)$ # (^|/)(tags|TAGS|\.tags)$ # # # Ignore externally-managed paths. # ^\.gconf|\.gnome # ^\.(AbiSuite|bogofilter|fceultra|kismet|mozilla|mpd|offlineimap|zsh)$ # # # Ignore externally-managed, Emacs-specific files and paths. # ^\.(ido\.last|projects\.ede)$ # ^\.emacs\.d/site-lisp/emacs-keychain\.el$ # ^\.emacs\.d/(auto-save-list|var|games)$ # # # Next, make an "/etc/"-specific "/etc/.hgignore" file. This file has # # identical syntax as that system-wide "/etc/mercurial/hgignore" file, # # above--but is used to ignore files under "/etc/", specifically. As example, # sudo vi /etc/.hgignore # # ===============[ /etc/.hgignore ]=============== # syntax: regexp # # # Ignore private files. # ^(group|passwd|shadow) # # # Ignore private paths. # ^(apache2/ssl|ssl/.*)$ # # # Ignore temporary files. # ^blkid # ^mtab$ # # # Ignore automatically-generated files. # ^(csh\.env|ld\.so\.conf|ld\.so\.cache|modprobe\.conf|modules\.conf|profile\.csh|profile\.env|resolv\.conf)$ # # # Ignore externally-managed files. # ^(adjtime|client_manuf|fdprm|gentoo-release|localtime|make\.global|mime\.types|protocols|services|ssh/moduli)$ # # # Ignore externally-managed paths. # ^(dispatch-conf\.archive|gconf|eselect|java-config-2/current-system-vm|sound|terminfo|xml|rmt)$ # # # Last, make one user-specific "/home/${USERNAME}/.hgignore" file for each # # user whose "/home/" directory you intend to hg-cohere. As example, # vi /home/leycec/.hgignore # # ===============[ /home/leycec/.hgignore ]=============== # syntax: regexp # # # Ignore temporary paths. # ^\.fvwm-crystal/wallpaper # # # Ignore binary-file paths. # ^pub/(audio|image|new|old|scene|tmp|text|video)$ # ^var$ # # Installing these files is a tad troublesome; but, hopefully, not too tiring. # # Regardless of which backup system you use, you'll need to selectively prevent # that system from backing-up binary and cache files, and other temporaries. The # Mercurial "hgignore" approach seems just as good as any other--and probably no # worse than most. # # --------------------( USAGE )-------------------- # To hg-cohere onto an SSH-secured remote machine, run SSHFS on your local # machine to mount some remote path on that machine onto the local machine: # # # Make a local path to which the remote path will be mounted. # mkdir -p ~/mnt/hg # # # Mount the remote path onto that local path. (See "SSHFS" below for # # several SSHFS command-line options, for improving SSHFS performance.) # # As example, # # sshfs leycec@faeroes.freeshell.org:html/hg ~/mnt/hg/ # sshfs ${USERNAME}@${HOSTNAME}:${REMOTE_PATH} ~/mnt/hg/ # # # Edit this script. Specifically, append that local path to the # # "$TARGET_ROOTS" shell variable, below. # vi hg-cohere # # # Run this script to test your edits. # sh hg-cohere # # # Unmount the remote path from that local path. # fusermount -uz ~/mnt/hg/ # # To revert the accidental addition or removal of files (added or removed, # accidentally, via the "hg addremove" command), if you haven't committed those # accidental changes: # # # Jump to the path having the root ".hg/" path to be reverted; e.g.: # cd ~ # # # Examine the list of files to be (accidentally) added and removed. # hg status # # # Revert (unschedule) the addition or removal of those files. # hg revert --all # # # Try fake-scheduling new additions and removals of files, now. # hg addremove --dry-run # # --------------------( DECENTRALIZATION )-------------------- # This script automates decentralization by automatically cloning from existing # "target" (e.g., remote) repositories into non-existant "source" (e.g., local) # repositories, and by automatically pulling from existing such target # repositories into such source repositories. Let's examine an example, to see # what this implies. # # Suppose two source machines (named "alpha" and "beta") and one target machine # (named "omega"), one common user on both source machines (named "quark"), and # that quark installs, configures, and runs this script on machine "alpha", # first. Specifically, quark runs an SSH server on "omega", runs an SSHFS-FuSE # client on "alpha" to mount some remote path from "omega" onto "alpha", and # runs this script on "alpha". (See "USAGE" and "SSHFS", below, for SSHFS-FuSE # syntax, specifics, and specific justification for using SSHFS-FuSE.) # # Supposing that, this backups the contents of "/etc/" and "/home/quark/" from # machine "alpha" onto machine "omega" by creating two Mercurial repositories on # "alpha" (at "/etc/.hg/" and "/home/quark/.hg/") and cloning those repositories # onto "omega" (at "${TARGET_ROOT}/etc/.hg/" and # "${TARGET_ROOT}/home/quark/.hg/"). "omega" now has a cloned copy, complete # with full revision history, of the contents of "/etc/" and "/home/quark/" on # "alpha". # # Suppose that quark modifies the contents of "/etc/" or "/home/quark/" on # "alpha" and runs this script on "alpha", again. These modifications will, as # expected, be pushed onto the cloned copy of "/etc/" and "/home/quark/" on # "omega", automatically. # # Suppose, further, that quark modifies the contents of "/etc/" or # "/home/quark/" on "omega" and runs this script on "alpha", again. What occurs? # Expectedly, these modifications will be pulled into the original copy of # "/etc/" and "/home/quark/" on "alpha", automatically. (Thus is sanity, # synchronicity, and syncretic joy fully maintained.) # # Suppose, further and further anon, that quark now wants to distribute these # contents onto a second "source" (i.e., local) machine, named "beta". While # quark could explicitly clone the cloned copy of "/etc/" and "/home/quark/" on # "omega" onto "beta" by running # "hg clone ${LOCAL_SSHFS-FUSE_MOUNTED_PATH_TO_OMEGA}/etc/ /etc/" and # "hg clone ${LOCAL_SSHFS-FUSE_MOUNTED_PATH_TO_OMEGA}/home/quark/ /home/quark/" # on "beta", quark could also implicitly clone these copies by simply running # this script on "beta". This script sees, implicitly, that "beta" has no # Mercurial repositories in "/etc/" or "/home/quark/", and thus looks for a # remote repository in "${LOCAL_SSHFS-FUSE_MOUNTED_PATH_TO_OMEGA}/etc/" and a # remote repository in "${LOCAL_SSHFS-FUSE_MOUNTED_PATH_TO_OMEGA}/home/quark/". # Finding both, it automatically clones from those remote repositories onto new # local repositories on "beta". As expected, this requires no unseemly work from # quark and all works as it seems and should. # # --------------------( PURPOSES )-------------------- # To backup "/etc" and "/home" in some versioned, decentralized manner. # Mercurial fulfills this mission nicely; see "WHY MERCURIAL?", below. # # To minimize dependencies, code complexities, and insufferable stupidities. # Mercurial (hg) and this script (hg-cohere) should be the only external # applications needed to perform a simple backup of local machine settings. # Shell scripting fulfills this mission (mostly) nicely. # # To maximize legibility, code maintainability, and literate understandability. # This script should not exceed 100 lines of actual, scriptable code; and that # code should, as much as can be, be self-documenting. (While shell scripting # cannot be called legible, all effort's been made to make it so.) # # You are welcome, hungrily, to rewrite this script in a wilder, wittier # language, if one (or more) of these missions does not maintain for you. (Yo!) # # --------------------( SSHFS )-------------------- # This script does not support the "ssh://", "http://", "https://", or # "static-http://" Mercurial protocols, since supporting those protocols implies # an increase in script complexity; and since that implies rewriting this script # in a language capable of handling complexity; and since that implies adding # that language as another script dependency; and since that, finally, this # author is loathe to do. # # This script does, however, support backing up onto remote paths on remote # machines by transparently mounting said paths onto a local path on the local # machine--by leveraging a network filesystem such as CurlFtpFS, NFS, or SSHFS, # typically. (This is more elegant than adding support for Mercurial protocols, # by several orders of giddy magnitude. See "WHY SSHFS?", below, for further # issues around use of Mercurial protocols in a backup system.) # # This script ignores inaccessible target paths with a bright (but ignorable) # warning. As such, you needn't be concerned about whether each locally mounted, # remote path added to "TARGET_ROOTS" is accessible during each system cronjob # run of this script; inaccessible target paths are simply ignored. # # This script may perform badly, lastly, when run over an SSHFS connection # using "default" SSHFS options. When needed, run SSHFS under these options to # correct the bad performance (or other Mercurial warnings and errors): # # # Mount the remote path onto that local path--with optimal SSHFS options. # sshfs -o allow_other,default_permissions,kernel_cache,reconnect,\ # transform_symlinks,compression=yes,cache_timeout=256,\ # cache_stat_timeout=16,cache_dir_timeout=16,cache_link_timeout=16,\ # idmap=user,workaround=rename \ # ${USERNAME}@${HOSTNAME}:${REMOTE_PATH} ~/mnt/hg/ # # --------------------( WHY MERCURIAL? )-------------------- # Why Mercurial? Why not darcs, bzr, git, or another similar, surely suitable # Distributed Version Control System (DVCS)? Why--because they are unsuitable! # # 0. Mercurial is much more time- and space-efficient than darcs. # 1. Mercurial is much more time-efficient than bzr, purportedly. # 2. Mercurial handles symlinks elegantly; darcs, for example, does not. # 3. Mercurial operates seamlessly over SSHFS-mounted filesystems. (Anectodal # evidence indicates that bzr and git do not.) # 4. Mercurial syntax is much more "conventional" than that of darcs and git. # (This should not be underestimated. In the long run, all else being equal, # a DVCS with familiar syntax is likely to be used in industry moreso than # a DVCS with unfamiliar syntax.) # 5. I like the periodic table. # # --------------------( WHY SSHFS? )-------------------- # Why SSHFS-mounted filesystems? Why not Mercurial's built-in support for the # "ssh://" protocol? The answer, my friend, is blowing in the bit-wind: # # First, since Mercurial need not be installed on the remote machine to write to # or read from Mercurial repositories provided by the remote machine--when that # machine is accessed over a mountpoint on the local machine. This is vitally # nifty! With clarity, it implies that you can create Mercurial repositories on # systems over which you have no administrial control (e.g., a GMailFS-mounted # Google Mail "filesystem," a CurlFtpFS-mounted FTP account, an SSHFS-mounted # "freeshell.org" account.) # # By contrast, Mercurial must be installed on the remote machine to write to # or read from Mercurial repositories provided from that machine--when that # machine is accessed over the "ssh://", "http://", "https://", and # "static-http://" Mercurial protocols. This can be a crippling requirement. # It requires you have Mercurial and support dependencies (e.g., Python 2.4) # installed on that remote machine; or administrial control to do their # installation yourself. # # However, there are other difficulties with non-"file://" protocols. # # Typically, hg-cohere runs as a cron job. Cron jobs are non-interactive; # therefore, they must access remote servers in passwordless, non-interactive # fashion. For SSH, this implies pre-caching SSH authentication tokens (e.g., # DSA- or RSA-style private keys) to the local filesystem--typically through # the SSH keychain. This, in turn, implies our usage of those tokens through # the SSH keychain, via "source /home/your-username/.keychain/`hostname`-sh". # In other words, this requires you (the user) to hard-code Yet Another Path, # on initial configuration of this file; which is, really, just another # blatant headache and spur to user adoption. # # Furthermore, support for non-"file://" Mercurial protocols requires we # provide some support for parsing apart Mercurial protocols (probably, via # clever usage of "sed"), below; and thereby break our 100-line intent. # # --------------------( CHANGELOG )-------------------- # 2009-07-26 Cecil Curry # * Renamed to "hg-cohere", which better coheres this script's use. # # 2007-11-20 Cecil Curry # * Permission handling by running Mercurial commands as local users, rather # than "root", whenever possible. This isn't always possible: especially # when running Mercurial against a root-only-writeable "/etc" repository. # * Third-party extensibility by permitting non-Mercurial synchronization of # local repositories to remote repositories via "rsync", "rdiff-backup", or # other external applications. Such synchronization is entirely optional -- # but, occasionally, useful -- and, when enabled, will be performed after # Mercurial synchronization. # * Improved internal code structure. # # 2007-03-01 Cecil Curry # * Created. # # --------------------( TODO )-------------------- # * Improve error detection and recovery, as described by voluminous "#FIXME" # comments, below. # * Add a command-line argument for specifying a user-specific configuration # file, with which to override this file's defaults. Clearly, this file would # be sourced after this file defines those defaults. # * Correct all Raiazome-specific links to this script. # * Consider advertising this script on, mayhap, a Mercurial Wiki, Bourne shell # repository, or similar open-source storehouse, online. # * Rewrite in Python, and generalize to bzr and git. # ....................{ CONFIGURATION =posix }.................... # Run processes run by this script under this "niceness." Niceness is an integer # describing the CPU scheduling priority to be allocated a process; it ranges # from -20 (the most favorable priority) to 19 (the least favorable priority). # # By default, cron job processes are run with least favorable priority so as to # minimize the CPU load on all other running processes. NICENESS=19 # Run processes run by this script under this "ioniceness" and ioniceness class. # Ioniceness is a pair of integers describing the I/O scheduling priority to be # allocated a process; the ioniceness class is either 1 (the most favorable # priority, called "real time"), 2 (the normal priority, called "best effort), # or 3 (the lowest priority, called "idle"), while the ioniceness itself is an # integer ranging from 0 (the most favorable priority) to 7 (the least favorable # priority). # # By default, cron job processes are run with least favorable priority so as to # minimize the I/O load on all other running processes - but not the "idle" # priority, as that could prevent a cron job process from running at or near its # scheduled time. IONICENESS_CLASS=2 IONICENESS=7 # The local machine's hostname. # # The default is probably fine. See documentation for "TARGET_ROOTS", above. HOSTNAME=$(hostname) # The path to which temporary files are written. # # The default is probably fine. TEMP_PATH=/tmp # ....................{ CONFIGURATION =mercurial }.................... # A boolean that, if true, enables Mercurial synchronization. # IS_HG_SYNCING="" IS_HG_SYNCING=1 # The name of the Mercurial command. HG_COMMAND="hg" # Command-line options to be passed to Mercurial. HG_OPTIONS="" # Command-line options to be passed to Mercurial, when this script is run as a # cronjob. (Typically, this reduces the verbosity of Mercurial output.) # # The defaults are probably fine. HG_OPTIONS_IF_CRON="--quiet" # Command-line options to be passed to Mercurial, when this script is not run as # a cronjob -- but called, instead, from the command-line. # # The defaults are probably fine. HG_OPTIONS_IF_NOT_CRON="--verbose" # A whitespace-delimited set of source paths. This script backs-up all paths and # files under these paths except those explicitly ignored by Mercurial (see # "IGNORING FILES," above). By default, this backs-up all system-wide settings # ("/etc/") and user-specific settings ("/home/"). # # The default values, here, should be fine. That said, you may replace them with # with one or more glob expressions evaluating to one or more local paths on the # local machine. (These expressions are glob-evaluated on each run of the script # and may include the standard glob characters: "*", "?", et al.) #SOURCE_ROOTS="/media/fa_pri/ /home/*/ /etc/" SOURCE_ROOTS="/home/*/ /etc/" # A whitespace-delimited set of target paths. This script backs-up every source # path above into each of these target paths, such that each target path # receives a cloned copy of every source path as a Mercurial repository; # specifically, this script makes one Mercurial repository for every source path # (living under that source path at ".hg/"), then clone copies every source path # repository into each target path. As example, supposing the local machine is # named "mysidia" and has one local user named "leycec", and that you leave these # source and target paths at their default values, this script backs as follows: # # /etc/ ---------> /home/leycec/mnt/sdf/html/hg/mysidia/etc/ # /home/leycec/ -> /home/leycec/mnt/sdf/html/hg/home/leycec/ # /etc/ ---------> /media/sda1/old/hg/mysidia/etc/ # /home/leycec/ -> /media/sda1/old/hg/home/leycec/ # # Notice, above, that system-wide settings ("/etc/") are not backed-up to # "/home/leycec/mnt/sdf/html/hg/etc/"--but to # "/home/leycec/mnt/sdf/html/hg/mysidia/etc/". System-wide settings tend to be # machine-specific while user-specific settings, being generalized, tend to be # machine-independent. (That is, you tend to use the same user-specific dotfiles # across many different machines but tend to use system-wide dotfiles on one and # only one machine, as system-wide dotfiles are specific to that system.) # # Thus, by default, this script backs-up source paths matching "/etc/" to each # target path appended by the local machine's hostname. If you find this # behaviour inconvenient, set HOSTNAME="" (below). # # Target paths are, customarily, locally mounted paths to external harddrives, # externally collocated servers, and other (presumably distant and decidably # "safe") data havens--specific to your feisty needs. # # The default values, as such, are probably not fine. Please replace them with # one or more locally mounted paths. TARGET_ROOTS="/home/leycec/pub/old/hg" # TARGET_ROOTS="/home/leycec/pub/old/hg /home/leycec/mnt/sdf/html/hg" # TARGET_ROOTS="/home/leycec/mnt/sdf/html/hg /home/leycec/pub/old/hg" # TARGET_ROOTS="/www/af/b/bcurry/hg" # The name of the script-specific path under "/etc" to which this script copies # system-wide config files not already under "/etc". Linux kernel config files # do not, as example, typically reside under "/etc"; nonetheless, it seems # reasonable that Linux kernel config files should be maintained in the "/etc" # Mercurial repository along with all other system-wide config files. This # script ensures that by copying all such files into the path corresponding to # this variable prior to recording changes in the "/etc" Mercurial repository. # # If this path does not already exist and this script has access to make it, # this script makes it. This is a good thing, mostly. # # The default value is probably fine. (By default, this ensures system-wide # config files not already under "/etc" are copied to "/etc/hg-cohere".) EXTRANEOUS_CONF_FILES_PATH="/etc/$SCRIPT_NAME" # ....................{ CONFIGURATION =post }.................... # A boolean that, if true, enables post-synchronization via "$POST_COMMAND". # # The default may or may not be fine. To disable post-synchronization, simply # comment out this option or set it to the empty string, ala: # IS_POST_SYNCING="" IS_POST_SYNCING=1 # The name of the command with which to perform post-synchronization. # # The default may or may not be fine. "rsync" and "rdiff-backup" are, at the # time of this writing, two common commands for post-synchronization. As these # are external commands, they must be installed externally -- by you, manually. POST_COMMAND="rsync" # Command-line options to be passed to "$POST_COMMAND". # # The defaults may or may not be fine. Consult "man rsync" or "man # rdiff-backup", as appropriate. Several notes, for rsync users: # # * See the OPTIONS section of "man rsync". # * You probably want to exclude all Mercurial-specific metadata -- that is, the # ".hg/" directory at the root of the Mercurial repository -- from the # synchronized target repository. See discussion in the "POST_TARGET_ROOTS" # variable below for reasons why. (Please note that this may or may not be an # entirely safe thing to do. As target repositories synchronized to in this # way will have no Mercurial-specific metadata, the source Mercurial # repository from which they were synchronized cannot be restored from these # Mercurial-less target repositories... If this is a strong concern, consider # removing the "--exclude=.hg/" option.) # * You probably want changes in the target repository to be auto-overwritten by # local changes. Thus, this is the default. (Please note that this may or may # not be an entirely safe thing to do. Files existing in the target repository # but not existing in the source repository will be automatically deleted. If # this is the case and is a strong concern, consider removing the "--delete" # option.) # * You probably want to synchronize symbolic links. Thus, this is the default. # (Please note that this may or may not be an entirely safe thing to do. # Symbolic links in the target repository will probably not refer to valid # paths on the target machine. If this is the case and is a strong concern, # consider replacing the "--links" option with # "--links --keep-dirlinks --safe-links", instead.) # * You probably want to specify the "--no-whole-file" option, here. This is the # rsync default; but, explicitly specifying this forces this when performing # rsync synchronization on SSHfs-mounted volumes. # * You probably want to specify the "--checksum" option, here, when performing # rsync synchronization on SSHfs-mounted volumes. SSHfs tends to report # improper modification and/or filesizes; consequently, these statistics # cannot be used to compare differences between the local and remote volume. # * You probably do not want to specify the "--sparse" option, here, when doing # rsync synchronization on SSHfs-mounted volumes. SSHfs tends to report # improper modification, filesize, and/or file contents for such files. POST_OPTIONS="\ --exclude=.hg/ \ --exclude=/home/leycec.pri/ \ --exclude=/media/ \ --exclude=/odden/ \ --exclude=/ubuntu/ \ \ --checksum \ --compress \ --cvs-exclude \ --delete --delete-after \ --links --safe-links \ --no-whole-file \ --human-readable \ --recursive \ --super \ --timeout=128 \ --no-times --no-perms --no-owner --no-group \ " # Command-line options to be passed to "$POST_COMMAND", when this script is run # as a cronjob. (Typically, these options reduce the verbosity of "rsync" or # "rdiff-backup" output.) # # The defaults may or may not be fine. Please consult "man rsync" or "man # rdiff-backup", as appropriate. POST_OPTIONS_IF_CRON="--quiet" # Command-line options to be passed to the "$POST_COMMAND" binary, when not # running as a cronjob. # # The defaults may or may not be fine. Please consult "man rsync" or "man # rdiff-backup", as appropriate. POST_OPTIONS_IF_NOT_CRON="--progress --verbose --verbose" # A whitespace-delimited set of post-synchronization target paths. This script # synchronizes all source Mercurial repositories into these paths, sans their # Mercurial-specific metadata. Each target path receives a copy of each source # Mercurial repository -- but without that source repositories Mercurial- # specific metadata. # # Mercurial metadata tends to be expensive -- both in filesize and number of # files. Preliminary tests show a marked reduction in the repository size of # Mercurial metadata-less target repositories versus metadata-full source # repositories: sometimes of up to half the original size of those source # repositories. This is (mostly) unsurprising, of course. So, such target # repositories might be a good fit for: # # * Space- and bandwidth-limited online repositories. # * Space-limited, USB 2.0-hosted local repositories. # # And so on. POST_TARGET_ROOTS="/www/af/b/bcurry/hg" # ....................{ CONSTANTS }.................... SCRIPT_NAME=$(basename "$0") SCRIPT_VERSION="0.0.3" # Adapted from this itworld.com article, "Am I being run by cron?": # http://www.itworld.com/Comp/3380/nls_unixcron041209/index.html CRON_PIDS=$(pgrep -x cron) GRANDPARENT_PID=$(ps -eo ppid,pid | grep " $PPID$" | awk '{print $1}') # Determine whether this system provides "nice". NICE=$(which nice 2>/dev/null) if [ $? -eq 0 -a -x "$NICE" ] then NICE="$NICE -n${NICENESS}" else NICE="" fi # Determine whether this system provides "ionice". IONICE=$(which ionice 2>/dev/null) if [ $? -eq 0 -a -x "$IONICE" ] then IONICE="$IONICE -c${IONICENESS_CLASS} -n${IONICENESS}" else IONICE="" fi if [[ $(echo "$CRON_PIDS" | grep "$GRANDPARENT_PID") ]]; then IS_CRON="1" HG_OPTIONS="$HG_OPTIONS $HG_OPTIONS_IF_CRON" POST_OPTIONS="$POST_OPTIONS $POST_OPTIONS_IF_CRON" else IS_CRON="" HG_OPTIONS="$HG_OPTIONS $HG_OPTIONS_IF_NOT_CRON" POST_OPTIONS="$POST_OPTIONS $POST_OPTIONS_IF_NOT_CRON" fi HG="$HG_COMMAND $HG_OPTIONS" POST="$POST_COMMAND $POST_OPTIONS" # ....................{ MAIN }.................... main() { utter "v${SCRIPT_VERSION}" # Apply a custom umask to all files created during the implementation, below. # Specifically, make all such files "u+rwx,g+rx-w,o-rxw". This common umask # should suffice for most remote, web-accessible systems. umask 027 create_path_if_not_found "$TEMP_PATH" if [ -n "$IS_HG_SYNCING" ]; then if [ -x "$(which $HG_COMMAND 2>/dev/null)" ] then hg_sync else curse "'$HG_COMMAND' not found; skipping synchronization via this protocol!" fi fi if [ -n "$IS_POST_SYNCING" ]; then if [ -x "$(which $POST_COMMAND 2>/dev/null)" ] then post_sync else curse "'$POST_COMMAND' not found; skipping synchronization via this protocol!" fi fi } # ....................{ MERCURIAL }.................... # Backup each locally-mounted source repository onto each locally-mounted target # repository. hg_sync() { for SOURCE_ROOT in $SOURCE_ROOTS; do echo "" # Ensure this source path ends in a backslash. (This simplifies things, below.) SOURCE_ROOT=$(get_slash_suffixed_path "$SOURCE_ROOT") # Determine the user under which to run Mercurial commands on this # repository. If this is the "root" or "guest" user, we do not run as that # user. (Why? We're already running as "root," here; so, switching to the # "root" user again doesn't effect much. As for the "guest" user - that's # better left inexplicable, save to say it simplifies permission handling.) RUN_AS=$(stat --format=%U "$SOURCE_ROOT") if [ "$RUN_AS" = "root" -o "$RUN_AS" = "guest" ]; then utter "[$SOURCE_ROOT] synchronizing via mercurial as 'root'..." RUN_AS="" else utter "[$SOURCE_ROOT] synchronizing via mercurial as '$RUN_AS'..." fi # Move to the source path. (Mercurial commands must be run from the root of # the repository to which we're applying those commands.) try cd "$SOURCE_ROOT" if [ "$SOURCE_ROOT" = "/etc/" ]; then # Mercurial circa-0.9.3 cannot follow symlinks; rather, it versions symlinks # as binary files. Thus, Mercurial cannot version files residing outside the # repository root. Thus, Mercurial cannot version files residing outside the # "/etc/" repository unless we explicitly copy those files into that repository # before versioning that repository. We ensure this now! utter "[$SOURCE_ROOT] copying extraneous conf files to '$EXTRANEOUS_CONF_FILES_PATH'..." create_path_if_not_found "$EXTRANEOUS_CONF_FILES_PATH" sync_extraneous_conf_file /usr/src/linux/.config # kernel sync_extraneous_conf_file /boot/grub/menu.lst # grub1 sync_extraneous_conf_file /boot/grub/grub.conf # grub2 sync_extraneous_conf_file /boot/yaboot.conf # yaboot fi # If there's no Mercurial repository describing the current "source" (e.g., # local) path, try cloning that repository from a pertinent "target" (e.g., # remote) path; if there's no such path, then (presumably) there's no such # remote Mercurial repository, either... Then, this is the first run of this # script and a local repository must be initialized. if [ ! -d "$SOURCE_ROOT/.hg/" ]; then IS_SOURCE_ROOT_CLONED="" utter "[$SOURCE_ROOT] existing source repository not found!" utter "[$SOURCE_ROOT] searching for an existing target repository from which to clone..." for TARGET_ROOT in $TARGET_ROOTS; do TARGET_PATH=$(get_target_path_from_source_and_target_root \ "$SOURCE_ROOT" "$TARGET_ROOT") if [ -d "$TARGET_PATH/.hg/" ]; then utter "[$SOURCE_ROOT] existing target repository found!" # Unfortunately, due to a slight deficiency in the "hg clone" command # (namely, that that command cannot clone into an already existing path), # we must clone into a temporary new path, overlay the contents of that # path into the desired source repository, and remove the temporary # path. (Unfortunate; but unavoidable, we're afraid.) if [ -n "$RANDOM" ] then SOURCE_ROOT_TEMP="$TEMP_PATH/$RANDOM" else SOURCE_ROOT_TEMP="$TEMP_PATH/$SOURCE_ROOT" fi if [ -d "$SOURCE_ROOT_TEMP" ]; then curse "[$SOURCE_ROOT] cannot clone source repository from '$TARGET_PATH'!" curse "[$SOURCE_ROOT] please manually move '$SOURCE_ROOT_TEMP' elsewhere." die "fatal error!" else utter "[$SOURCE_ROOT}] cloning source repository from '$TARGET_PATH' to '$SOURCE_ROOT_TEMP'..." # Avoid hardlinking when cloning, as that tends to (oddly) produce # rather unreadable repositories, at the moment. (Note, also, that # we could--but do not--simply recursively copy the ".hg/" path # from the remote target repository to the local source # repository. We don't, since such a copy cannot be guaranteed to be # atomic and may, explosively, result in corrupt local state: # especially when another Mercurial process, elsewhere, is pushing # changes to that remote target repository while performing this # recursive copy.) try $HG clone --pull "$TARGET_PATH" "$SOURCE_ROOT_TEMP" utter "[$SOURCE_ROOT}] overlaying '$SOURCE_ROOT_TEMP' onto '$SOURCE_ROOT'..." try mv "$SOURCE_ROOT_TEMP/.hg/" "$SOURCE_ROOT" try mv "$SOURCE_ROOT/.hg/hgrc" "$SOURCE_ROOT/.hg/hgrc.old" utter "[$SOURCE_ROOT] updating..." try $HG update utter "[$SOURCE_ROOT] was successfully cloned!" utter "[$SOURCE_ROOT] you may remove '$SOURCE_ROOT_TEMP', now." IS_SOURCE_ROOT_CLONED=1 break fi fi done if [ -z "$IS_SOURCE_ROOT_CLONED" ]; then utter "[$SOURCE_ROOT] existing target repository not found!" utter "[$SOURCE_ROOT] creating source repository from scratch..." try $HG init fi else for TARGET_ROOT in $TARGET_ROOTS; do TARGET_PATH=$(get_target_path_from_source_and_target_root "$SOURCE_ROOT" "$TARGET_ROOT") if [ -d "$TARGET_PATH/.hg/" ]; then utter "[$SOURCE_ROOT] pulling from '$TARGET_PATH'..." try $HG pull --update "$TARGET_PATH" fi done fi utter "[$SOURCE_ROOT] adding and removing new files..." try $HG addremove # Get Mercurial's list of all newly added or removed files for this # repository, truncating that list to 512 characters, maximum. (This avoids # argument overflow errors when used as a Mercurial commit message, later.) # # Note, also, that we explicitly escape all single and double quote # characters. (This permits us to embed this into other variables or # function calls.) HG_STATUS=$(run $HG status | tr "\n" "\v" | tr "\"'" '~~' | \ awk '{ gsub(/\v/, "\n"); print substr($0, 0, 509)"..." }') # If and only if at least one file or path under this source path has changed, # commit those changes. if [ "$HG_STATUS" ]; then utter "[$SOURCE_ROOT] committing..." HG_COMMIT_OUTPUT=$(hg_commit 2>&1) HG_COMMIT_RETURN_CODE=$? echo $HG_COMMIT_OUTPUT # Mercurial (often) prints the following error after failed commits: # abort: journal already exists - run hg recover! # # Detect this; correct this by running "hg recover"; and run "hg commit", # again. if [ "$HG_COMMIT_RETURN_CODE" -ne 0 ]; then if [ $(echo "$HG_COMMIT_OUTPUT" | grep 'hg recover') ]; then utter "[$SOURCE_ROOT] recovering from failed commit..." try $HG recover hg_commit else curse "[$SOURCE_ROOT] committing failed with '$HG_COMMIT_RETURN_CODE'!" exit $HG_COMMIT_RETURN_CODE fi fi fi for TARGET_ROOT in $TARGET_ROOTS; do TARGET_PATH=$(get_target_path_from_source_and_target_root "$SOURCE_ROOT" "$TARGET_ROOT") IS_TARGET_PATH_CHANGED="" if [ ! -d "$TARGET_PATH/.hg/" ]; then IS_TARGET_PATH_CHANGED=1 create_path_if_not_found $(dirname "$TARGET_PATH") # Avoid hardlinking when cloning, as that tends to (oddly) produce # rather unreadable repositories, at the moment. utter "[$SOURCE_ROOT] cloning to '$TARGET_PATH'..." try $HG clone --pull "$SOURCE_ROOT" "$TARGET_PATH" elif [ -n "$HG_STATUS" ]; then IS_TARGET_PATH_CHANGED=1 # Run "hg recover" from the target path before pushing to that target # path, so as to implicitly recover from some previously interrupted # push. If there is no previously interrupted push, this is a slightly # inefficient noop -- but no hard danger. (Note that, in that case, # that command returns a failure error code. We ignore that "error," # purposefully, by running the command with "run" rather than "try.") try cd "$TARGET_PATH" run $HG recover 1>/dev/null 2>&1 if [ $? -eq 0 ]; then utter "[$SOURCE_ROOT] recovered from previously interrupted push to '$TARGET_PATH'!" fi # Push changes from the source to target path's Mercurial repository. # (This updates the target path's ".hg/" path and none else. As such, # we perform an "hg update" immediately afterward, below.) try cd "$SOURCE_ROOT" utter "[$SOURCE_ROOT] pushing to '$TARGET_PATH'..." try $HG push "$TARGET_PATH" # Irregardless of whether or not there were any changes, update the # target path as if there were (since that target path might now be # desynchronized from the source path and require updating, anyway). try cd "$TARGET_PATH" utter "[$SOURCE_ROOT] cleanly updating '$TARGET_PATH'..." try $HG update --clean fi #FIXME: If "hg verify" actually corrected the errors it reported, this would #be a decent solution. But, it doesn't. So it's pretty much useless, eh? # if [ $? -eq 0 ]; then # utter "[${SOURCE_ROOT}] update failed!..." # utter "[${SOURCE_ROOT}] verifying remote changes to '${TARGET_PATH}'..." # try $HG verify # utter "[${SOURCE_ROOT}] updating remote changes to '${TARGET_PATH}'... (again)" # try $HG update # fi # TEMP_FILE="${TEMP_PATH}/${SCRIPT_NAME}.${SOURCE_ROOT}.${TARGET_ROOT}.push" # $HG push "$TARGET_PATH" 1>"$TEMP_FILE" 2>&1 # cat "$TEMP_FILE" # if [ $? -ne 0 ]; then # if [ $(grep "hg recover" "$TEMP_FILE") ]; then # rm "$TEMP_FILE" # try $HG recover # try $HG push "$TARGET_PATH" # else # rm "$TEMP_FILE" # exit 1 # fi # else # rm "$TEMP_FILE" # fi # Although commands run on files and paths owned by non-superusers should # be owned by those non-superusers, it may happen those those files or paths # are still owned by the superuser. To correct this, we manually restore # ownership on all such files or paths to their rightful owner. if [ -n "$RUN_AS" -a -n "$IS_TARGET_PATH_CHANGED" ]; then utter "[$SOURCE_ROOT] restoring ownership of '$TARGET_PATH' to '$RUN_AS'..." try chown -R ${RUN_AS}:${RUN_AS} "$TARGET_PATH" fi done # Unset global variables so as to ensure local sanity, elsewhere and after. RUN_AS="" utter "[$SOURCE_ROOT] done!" done } # Copy the passed file to "/etc/hg-cohere/", so as to ensure we maintain a # copy of this file in the "/etc" Mercurial repository. sync_extraneous_conf_file() { [ -f "$1" ] && cp --update "$1" "$EXTRANEOUS_CONF_FILES_PATH/" } # Perform a Mercurial commit. (This is extracted into an independent function, # so as to permit our calling it twice: once and once after, if the first # instance failed with an "abort: journal already exists - run hg recover!" and # we successfully responded by running "hg recover".) hg_commit() { utter "[$SOURCE_ROOT] committing..." # If ${HG_STATUS} has newlines, it cannot be passed to the "try" function # without significant newline-replacement; as such, we just try ourselves. if [ -n "$RUN_AS" ]; then su --preserve-environment --command "$NICE $IONICE $HG commit --message '$HG_STATUS'" "$RUN_AS" HG_COMMIT_RETURN_CODE=$? else $NICE $IONICE $HG commit --message '$HG_STATUS' HG_COMMIT_RETURN_CODE=$? fi return $HG_COMMIT_RETURN_CODE } # ....................{ MERCURIAL =post }.................... # Backup the locally-mounted source repository onto each locally-mounted, post- # synchronization target repository. post_sync() { for TARGET_ROOT in $TARGET_ROOTS; do echo "" # If post-synchronizing via "rsync", ensure this local source path ends in a # backslash. If it doesn't, "rsync" behaves oddly. See the section of # "man rsync". TARGET_ROOT=$(get_slash_suffixed_path "$TARGET_ROOT") # Synchronization tools tend to fail with I/O failures, when run over SSHfs. # As a(n admittedly hacky) mechanism for handling this, we cache stderr and, # after running the synchronization and recieving an error, grep the cached # stderr for an error string indicating an I/O failure. If found, we restart # synchronization from the beginning. This may cause an infinite spin-lock, # of course... But, at the moment, it does appear to run reasonable stably. [ -n "$RANDOM" ] || RANDOM="1" POST_LOG="$TEMP_PATH/$POST_COMMAND.$RANDOM.log" for POST_TARGET_ROOT in $POST_TARGET_ROOTS; do continue_if_path_not_found_or_is_relative "$POST_TARGET_ROOT" while [ 1 ]; do utter "[$TARGET_ROOT] synchronizing via $POST_COMMAND to '$POST_TARGET_ROOT' as user 'root'..." $POST "$TARGET_ROOT" "$POST_TARGET_ROOT" 2>"$POST_LOG" POST_RETURN_CODE=$? try cat "$POST_LOG" if [ $POST_RETURN_CODE -eq 0 ] then break else if [[ $(grep 'io timeout after' "$POST_LOG") ]]; then curse "[$TARGET_ROOT] caught an $POST_COMMAND connection timeout!" else curse "[$TARGET_ROOT] caught an $POST_COMMAND error!" try rm "$POST_LOG" exit 1 fi fi try rm "$POST_LOG" done done # Post-processing synchronization only requires one source repository from # which to synchronize... Consequently, we're already done. break done } # ....................{ PATH HANDLING }.................... create_path_if_not_found() { if [ ! -d "$1" ]; then utter "[$1] creating path..." try mkdir --parents "$1" fi } get_slash_suffixed_path() { continue_if_path_not_found_or_is_relative "$1" # Ensure this source path ends in a backslash. If it doesn't, "rsync" behaves # oddly. See the section of "man rsync". (The following expression # effectively "returns" the resultant path, in a subtly hackish way.) echo "$1" | sed 's/\/\?$/\//' } continue_if_path_not_found_or_is_relative() { if [ ! -d "$1" ]; then curse "[$1] will be skipped, since it does not exist!" curse "[$1] please make or mount this path, manually." continue fi # Ensure this source path begins in a backslash (i.e., that it is absolute). # If it doesn't, log an error and proceed to the next. if [ ! $(echo "$1" | grep '^/') ]; then curse "[$1] will be skipped, since it is a relative path (...to what?)!" continue fi } get_target_path_from_source_and_target_root() { _SOURCE_ROOT=$1 _TARGET_ROOT=$2 continue_if_path_not_found_or_is_relative $_TARGET_ROOT 1>/dev/null if [ "$_SOURCE_ROOT" = "/etc/" ] then echo "$_TARGET_ROOT/$HOSTNAME$_SOURCE_ROOT" else echo "$_TARGET_ROOT$_SOURCE_ROOT" fi } # ....................{ I/O HANDLING }.................... utter() { if [ -n "$IS_CRON" ] then logger -p cron.notice "${SCRIPT_NAME}: $*" else echo "${SCRIPT_NAME}: $*" fi } curse() { if [ -n "$IS_CRON" ] then logger -p cron.err "${SCRIPT_NAME}! $*" else echo "${SCRIPT_NAME}! $*" 1>&2 fi } die() { curse $* exit 1 } try() { run $* [ $? -eq 0 ] || exit $? } run() { # If the passed command to run actually exists, run it under it "nice" and # "ionice"; otherwise, the command is probably a built-in shell command and # cannot be run under "nice" or "ionice". if [ -x "$(which $1 2>/dev/null)" ]; then if [ -n "$RUN_AS" ] then su --preserve-environment --command "$NICE $IONICE $*" "$RUN_AS"; _RETURN_CODE=$? else $NICE $IONICE $* ; _RETURN_CODE=$? fi else $* ; _RETURN_CODE=$? fi return $_RETURN_CODE } # ....................{ IMPLEMENTATION }.................... main # --------------------( COPYRIGHT AND LICENSE )-------------------- # The information below applies to everything in this distribution, # except where noted. # # Copyleft 2008, 2009, and 2010 by Cecil Curry. # # http://www.raiazome.com # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see .