#!/bin/sh
# ====================[ rdiff-snapshot                     ]====================
#                     [ Time-stamp: "2008-12-04 19:18:20 leycec" ]
#
# A shell script "snapshot"-ing one or more local paths on your local machine
# onto one or more local (locally mounted) target paths on your local machine.
# These snapshots are rdiff-backup changesets, thus preserving changes to your
# local machine across time (depending on how frequently you back-up) and space
# (depending on how many target paths you back-up onto).
#
# This script integrates well with "cron", allowing you to backup on some fixed
# schedule (say, every day); and with NFS, CurlFtpFS, and SSHFS, allowing you to
# schedule those backups onto one or more remote machines (say, freeshell.org).
#
# This script assumes Gentoo and Gentoo's Portage package manager (emerge) in
# the installation, configuration, and usage instructions, below. (Neither are
# strictly required, of course. Any sufficiently competent Linux distribution--
# Debian, Ubuntu, or otherwise--should suffice to install, configure, and use
# this script.)
#
# --------------------( DEPENDENCIES                       )--------------------
#   # Install rdiff-backup; e.g., on Gentoo:
#   sudo paludis -i rdiff-backup
# 
#   # Optionally, install SSHFS and SSH keychain; e.g., on Gentoo:
#   sudo paludis -i sshfs-fuse keychain
#
#   # Optionally, install cronbase; e.g., on Gentoo:
#   sudo paludis -i cronbase
# 
# Installing SSHFS and SSH keychain allows this script to backup onto locally
# mounted, remote SSH servers.
#
# Installing cronbase allows scheduling of this script by moving it, merely, to
# "/etc/cron.{daily|hourly|weekly|monthly}".
#
# --------------------( INSTALLATION                       )--------------------
#   # Move this script to some temporary path, and make it executable.
#   cp rdiff-snapshot /tmp/; chmod ug+rx /tmp/rdiff-snapshot
#
#   # Edit the "CONFIGURATION" section of this script, below.
#   vi /tmp/rdiff-snapshot
#
#   # Make one or several ".hgignore" files. (See "CONFIGURATION", below).
#
#   # Run this script--to try it before scheduling it with a system cronjob.
#   sh /tmp/rdiff-snapshot
#
#   # Schedule this script to be run "every so often" by the system crontab.
#   #
#   # If the system crontab checks "/etc/cron.{daily|hourly|weekly|monthly}/"
#   # (typically, by installation of cronbase on Gentoo-installed machines),
#   # schedule this script to be run each day, hour, week, or month by simply
#   # moving this script into that path. As example:
#   sudo mv /tmp/rdiff-snapshot /etc/cron.daily/
#
#   # Otherwise, edit your system crontab by hand and restart the cron daemon.
#   #vi /etc/crontab; /etc/init.d/vixie-cron restart
#
#   # Check the system log, after that cronjob runs this script, for scheduled
#   # output from this script. It logs to the syslog cron facility, by default.
#   tail /var/log/crond/current
#
# --------------------( USAGE                              )--------------------
# To rdiff-snapshot onto an SSH-secured remote machine, run SSHFS on your local
# machine to mount some remote path on that machine onto the local machine:
# 
#   # Make a local path to which the remote path will be mounted.
#   mkdir -p ~/mnt/hg
#
#   # Mount the remote path onto that local path. (See "SSHFS" below for
#   # several SSHFS command-line options, for improving SSHFS performance.)
#   # As example,
#   # sshfs leycec@faeroes.freeshell.org:html/hg ~/mnt/hg/
#   sshfs ${USERNAME}@${HOSTNAME}:${REMOTE_PATH} ~/mnt/hg/
#
#   # Edit this script. Specifically, append that local path to the
#   # "$TARGET_ROOTS" shell variable, below.
#   vi rdiff-snapshot
#
#   # Run this script to test your edits.
#   sh rdiff-snapshot
#
#   # Unmount the remote path from that local path.
#   fusermount -uz ~/mnt/hg/
#
# --------------------( CHANGELOG                          )--------------------
# [2007-12-04] 0.0.1: Created.
#
# --------------------( TODO                               )--------------------
# * Improve configurability and documentation.


# ....................{ CONFIGURATION                      }....................
# A whitespace-delimited set of source paths. This script backs-up all paths and
# files under these paths except those explicitly ignored by Mercurial (see
# "IGNORING FILES," above). By default, this backs-up all system-wide settings
# ("/etc/") and user-specific settings ("/home/").
#
# The default values, here, should be fine. That said, you may replace them with
# with one or more glob expressions evaluating to one or more local paths on the
# local machine. (These expressions are glob-evaluated on each run of the script
# and may include the standard glob characters: "*", "?", et al.)
SOURCE_ROOTS="/home/leycec/pub/old/hg/"

# A whitespace-delimited set of target paths. This script backs-up every source
# path above into each of these target paths, such that each target path
# receives a cloned copy of every source path as a Mercurial repository;
# specifically, this script makes one Mercurial repository for every source path
# (living under that source path at ".hg/"), then clone copies every source path
# repository into each target path. As example, supposing the local machine is
# named "mysidia" and has one local user named "leycec", and that you leave these
# source and target paths at their default values, this script backs as follows:
#
#   /etc/ ---------> /home/leycec/mnt/sdf/html/hg/mysidia/etc/
#   /home/leycec/ -> /home/leycec/mnt/sdf/html/hg/home/leycec/
#   /etc/ ---------> /media/sda1/old/hg/mysidia/etc/
#   /home/leycec/ -> /media/sda1/old/hg/home/leycec/
#
# Notice, above, that system-wide settings ("/etc/") are not backed-up to
# "/home/leycec/mnt/sdf/html/hg/etc/"--but to
# "/home/leycec/mnt/sdf/html/hg/mysidia/etc/". System-wide settings tend to be
# machine-specific while user-specific settings, being generalized, tend to be
# machine-independent. (That is, you tend to use the same user-specific dotfiles
# across many different machines but tend to use system-wide dotfiles on one and
# only one machine, as system-wide dotfiles are specific to that system.)
#
# Thus, by default, this script backs-up source paths matching "/etc/" to each
# target path appended by the local machine's hostname. If you find this
# behaviour inconvenient, set HOSTNAME="" (below).
#
# Target paths are, customarily, locally mounted paths to external harddrives,
# externally collocated servers, and other (presumably distant and decidably
# "safe") data havens--specific to your feisty needs.
#
# The default values, as such, are probably not fine. Please replace them with
# one or more locally mounted paths.
TARGET_ROOTS="/www/af/b/bcurry/rdiff-backup"

# The local machine's hostname. The default is probably fine. (However, see
# documentation for "TARGET_ROOTS", above.)
HOSTNAME=$(hostname)

# The path to which temporary files are written. The default is probably fine.
TEMP_PATH=/tmp

# ....................{ CONSTANTS                          }....................
SCRIPT_NAME=$(basename "$0")
SCRIPT_VERSION="0.0.1"

# Adapted from this itworld.com article, "Am I being run by cron?":
#        http://www.itworld.com/Comp/3380/nls_unixcron041209/index.html
CRON_PIDS=$(pgrep -x cron)
GRANDPARENT_PID=$(ps -eo ppid,pid | grep " $PPID$" | awk '{print $1}')

if [[ $(echo "$CRON_PIDS" | grep "$GRANDPARENT_PID") ]]
  then IS_CRON="1"; RDIFF_BACKUP_OPTIONS="--quiet"
  else IS_CRON="";  RDIFF_BACKUP_OPTIONS="--verbose"
fi

RDIFF_BACKUP_COMMAND="nice hg $RDIFF_BACKUP_OPTIONS"

# ....................{ I/O HANDLING                       }....................
utter() {
  if [ -n "$IS_CRON" ]
  then logger -p cron.notice "${SCRIPT_NAME}: $*"
  else echo "${SCRIPT_NAME}: $*"
  fi
}

curse() {
  if [ -n "$IS_CRON" ]
  then logger -p cron.err "${SCRIPT_NAME}! $*"
  else echo "${SCRIPT_NAME}! $*" 1>&2
  fi
}

run() {
  if [ -n "$RUN_AS" ]
  then su --login --command "$*" "$RUN_AS"
  else $*
  fi

  return $?
}

try() {
  run $*
    [ $? -eq 0 ] || exit 1
}

die() {
  curse $*
  exit 1
}

# ....................{ HELPER FUNCTIONS                   }....................
continue_if_path_not_found_or_is_relative() {
  TEST_PATH=$1
  
  if [ ! -d "$TEST_PATH" ]; then
    curse "[${TEST_PATH}] will be skipped, since it does not exist!"
    curse "[${TEST_PATH}] please make or mount this path, manually."
    continue
  fi

  # Ensure this source path begins in a backslash (i.e., that it is absolute).
  # If it doesn't, log an error and proceed to the next.
  if [ ! $(echo "$TEST_PATH" | grep '^/') ]; then
    curse "[${TEST_PATH}] will be skipped, since it is a relative path (...to what?)!"
    continue
  fi
}
  
get_target_path_from_source_and_target_root() {
  SOURCE_ROOT=$1
  TARGET_ROOT=$2
  
  continue_if_path_not_found_or_is_relative $TARGET_ROOT

  if [ "$SOURCE_ROOT" = "/etc/" ]
  then TARGET_PATH="${TARGET_ROOT}/${HOSTNAME}${SOURCE_ROOT}"
  else TARGET_PATH="${TARGET_ROOT}${SOURCE_ROOT}"
  fi
}

create_path_if_not_found() {
  MAKE_PATH=$1

  if [ ! -d "$MAKE_PATH" ]; then
    utter "[${MAKE_PATH}] creating path..."
    try mkdir --parents "$MAKE_PATH"
  fi
}

# ....................{ INITIALIZATION                     }....................
utter "v${SCRIPT_VERSION}"

[ -x "$(which rdiff-backup 2>/dev/null)" ] ||
  die "rdiff-backup not installed! ('rdiff-backup' binary not found in your PATH.)"

# Apply a custom umask to all files created during the implementation, below.
# Specifically, make all such files "u+rwx,g+rx-w,o-rxw". This common umask
# should suffice for most remote, web-accessible systems.
umask 027

create_path_if_not_found "$TEMP_PATH"

# ....................{ IMPLEMENTATION                     }....................
# for SOURCE_ROOT in $SOURCE_ROOTS; do
#   echo ""

#   # Ensure this source path ends in a backslash. (This simplifies things, below.)
#   SOURCE_ROOT=$(echo "$SOURCE_ROOT" | sed 's/\/\?$/\//' -)
#   continue_if_path_not_found_or_is_relative $SOURCE_ROOT

#   # Move to the source path. (Mercurial commands must be run from the root of
#   # the repository to which we're applying those commands.)
#   cd ${SOURCE_ROOT}

#   if [ "$SOURCE_ROOT" = "/etc/" ]; then
#     RUN_AS=""

#     # Mercurial circa-0.9.3 cannot follow symlinks; rather, it versions symlinks
#     # as binary files. Thus, Mercurial cannot version files residing outside the
#     # repository root. Thus, Mercurial cannot version files residing outside the
#     # "/etc/" repository unless we explicitly copy those files into that repository
#     # before versioning that repository. We ensure this now!
#     utter "[${SOURCE_ROOT}] copying system-wide files..."

#     SOURCE_SYSTEM_ROOT="${SOURCE_ROOT}/system"
#     create_path_if_not_found "$SOURCE_SYSTEM_ROOT"

#     # Copy "/usr/src/linux/.config" to "/etc/system/kernel.config", so as to ensure we
#     # version-control a copy of your current kernel configuration.
#     [ -f /usr/src/linux/.config ] &&
#       cp /usr/src/linux/.config "${SOURCE_SYSTEM_ROOT}/kernel.config"

#     # Copy "/boot/grub/grub.conf" to "/etc/system/grub.conf", so as to ensure we
#     # version-control a copy of your current kernel configuration.
#     [ -f /boot/grub.conf ] &&
#       cp /boot/grub.conf "${SOURCE_SYSTEM_ROOT}/grub.conf"
#   else
#     RUN_AS=$(basename "$SOURCE_ROOT")

#     # Do not run Mercurial commands as the guest user. While, perhaps, this is a
#     # slight hack, it remarkably simplifies permission handling.
#     [ "$RUN_AS" = "guest" ] && RUN_AS=""
#   fi

#   # If there's no Mercurial repository describing the current "source" (e.g.,
#   # local) path, try cloning that repository from a pertinent "target" (e.g.,
#   # remote) path; if there's no such path, then (presumably) there's no such
#   # remote Mercurial repository, either... Then, this is the first run of this
#   # script and a local repository must be initialized.
#   if [ ! -d "${SOURCE_ROOT}/.hg/" ]; then
#     IS_CLONED=""

#     for TARGET_ROOT in $TARGET_ROOTS; do
#       get_target_path_from_source_and_target_root "$SOURCE_ROOT" "$TARGET_ROOT"
      
#       if [ -d "${TARGET_PATH}/.hg/" ]; then
#         utter "[${SOURCE_ROOT}] matching target repository found!"

#         # Unfortunately, due to a slight deficiency in the "hg clone" command
#         # (namely, that that command cannot clone into an already existing path),
#         # we must clone into a temporary new path, overlay the contents of that
#         # path into the desired source repository, and remove the temporary
#         # path. (Unfortunate; but unavoidable, we're afraid.)
#         if [ -n "$RANDOM" ]; then
#           SOURCE_ROOT_TEMP="${TEMP_PATH}/${RANDOM}/"
#         else
#           SOURCE_ROOT_TEMP="${TEMP_PATH}/${SOURCE_ROOT}"
#         fi
        
#         if [ -d "$SOURCE_ROOT_TEMP" ]; then
#           curse "[${SOURCE_ROOT}] cannot clone source repository from '${TARGET_PATH}'!"
#           curse "[${SOURCE_ROOT}] please manually move '${SOURCE_ROOT_TEMP}' elsewhere."
#           die "fatal error!"
#         else
#           utter "[${SOURCE_ROOT}] cloning source repository from '${TARGET_PATH}' to '${SOURCE_ROOT_TEMP}'..."

#           # Avoid hardlinking when cloning, as that tends to (oddly) produce
#           # rather unreadable repositories, at the moment. (Note, also, that
#           # we could--but do not--simply recursively copy the ".hg/" path
#           # from the remote target repository to the local source
#           # repository. We don't, since such a copy cannot be guaranteed to be
#           # atomic and may, explosively, result in corrupt local state:
#           # especially when another Mercurial process, elsewhere, is pushing
#           # changes to that remote target repository while performing this
#           # recursive copy.)
#           try $RDIFF_BACKUP_COMMAND clone --pull "$TARGET_PATH" "$SOURCE_ROOT_TEMP"

#           utter "[${SOURCE_ROOT}] overlaying '${SOURCE_ROOT_TEMP}' onto '${SOURCE_ROOT}'..."
#           try mv "${SOURCE_ROOT_TEMP}/.hg/" "${SOURCE_ROOT}"
#           try mv "${SOURCE_ROOT}/.hg/hgrc" "${SOURCE_ROOT}/.hg/hgrc.old"

#           utter "[${SOURCE_ROOT}] updating..."
#           try $RDIFF_BACKUP_COMMAND update

#           utter "[${SOURCE_ROOT}] was successfully cloned!"
#           utter "[${SOURCE_ROOT}] you may remove '${SOURCE_ROOT_TEMP}', now."
#           IS_CLONED=1
#           break
#         fi
#       fi
#     done
    
#     if [ -z "$IS_CLONED" ]; then
#       utter "[${SOURCE_ROOT}] matching target repository not found."
#       utter "[${SOURCE_ROOT}] initing (from scratch)..."
#       try $RDIFF_BACKUP_COMMAND init
#     fi
#   else
#     for TARGET_ROOT in $TARGET_ROOTS; do
#       get_target_path_from_source_and_target_root "$SOURCE_ROOT" "$TARGET_ROOT"
      
#       if [ -d "${TARGET_PATH}/.hg/" ]; then
#         utter "[${SOURCE_ROOT}] updating from '${TARGET_PATH}'..."
#         try $RDIFF_BACKUP_COMMAND pull --update "$TARGET_PATH"
#       fi
#     done
#   fi

#   utter "[${SOURCE_ROOT}] add-removing..."
#   try $RDIFF_BACKUP_COMMAND addremove

#   # Get Mercurial's list of all newly added or removed files for this repository,
#   # truncating that list to 512 characters, maximum. (This avoids argument
#   # overflow errors when we use this status message as a commit message, later.)
#   RDIFF_BACKUP_STATUS=$(run $RDIFF_BACKUP_COMMAND status | \
#     tr "\n" "\v" | awk '{ gsub(/\v/, "\n"); print substr($0, 0, 509)"..." }')

#   # If and only if at least one file or path under this source path has changed,
#   # commit those changes.
#   if [ "$RDIFF_BACKUP_STATUS" ]; then
#     utter "[${SOURCE_ROOT}] committing..."

#     # If ${RDIFF_BACKUP_STATUS} has newlines, it cannot be passed to the "try" function
#     # without significant newline-replacement; as such, we just try ourselves.
#     if [ -n "$RUN_AS" ]
#     then su --preserve-environment --command \
#          "$RDIFF_BACKUP_COMMAND commit --message '$RDIFF_BACKUP_STATUS'" "$RUN_AS"
#     else $*
#     fi
    
#     [ $? -eq 0 ] || exit 1
#   fi

#   # Irregardless of whether or not there were any changes, update the target
#   # path as if there were as that path may be desynchronized from the source 
#   # path and require updating, anyway.
#   for TARGET_ROOT in $TARGET_ROOTS; do
#     get_target_path_from_source_and_target_root "$SOURCE_ROOT" "$TARGET_ROOT"

#     if [ ! -d "${TARGET_PATH}/.hg/" ]; then
#       create_path_if_not_found $(dirname "$TARGET_PATH")

#       # Avoid hardlinking when cloning, as that tends to (oddly) produce
#       # rather unreadable repositories, at the moment.
#       utter "[${SOURCE_ROOT}] cloning to '${TARGET_PATH}'..."
#       try $RDIFF_BACKUP_COMMAND clone --pull "$SOURCE_ROOT" "$TARGET_PATH"
#     else
#       if [ "$RDIFF_BACKUP_STATUS" ]; then
#         # Run "hg recover" from the target path before pushing to that target
#         # path, so as to implicitly recover from some previously interrupted
#         # push. If there is no previously interrupted push, this is a slightly
#         # inefficient noop -- but no hard danger.
#         cd ${TARGET_PATH}
#         nice hg recover 1>/dev/null 2>&1
#         if [ $? -eq 0 ]; then
#           utter "[${SOURCE_ROOT}] recovered from previously interrupted push to '${TARGET_PATH}'!"
#         fi

#         cd ${SOURCE_ROOT}
#         utter "[${SOURCE_ROOT}] pushing to '${TARGET_PATH}'..."
#         try $RDIFF_BACKUP_COMMAND push "$TARGET_PATH"

#         cd ${TARGET_PATH}
#         utter "[${SOURCE_ROOT}] updating '${TARGET_PATH}'..."
#         try $RDIFF_BACKUP_COMMAND update --clean
#       fi
#     fi
#   done
# done

# --------------------( COPYRIGHT AND LICENSE              )--------------------
# The information below applies to everything in this distribution,
# except where noted.
#              
# Copyleft 2008 by B.w.Curry.
#   
#   http://www.raiazome.com
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
