#!/bin/sh
# ====================[ oddmuse-cohere                     ]====================
#                     [ Time-stamp: "2009-01-01 23:08:16 leycec" ]             
#
# An "rsync"-dependent shell script for mirroring, storing, and restoring
# Oddmuse Wiki installations; or, specifically, for cloning one local or remote
# Oddmuse Wiki installation onto one or more (local or remote) back-up replicas
# of that Wiki installation.
#
# --------------------( DEPENDENCIES                       )--------------------
#   # Install "rsync"; e.g., on Gentoo:
#   sudo emerge rsync
# 
#   # Optionally, install SSH keychain; e.g., on Gentoo:
#   sudo emerge keychain
# 
# --------------------( INSTALLATION                       )--------------------
#   # Make two paths: one for this script and one for its log-file.
#   mkdir --parents ~/bin/ ~/var/log/oddmuse-cohere
#
#   # Move this script to some user-accessible path and make it executable.
#   cp oddmuse-cohere ~/bin/; chmod ug+rx ~/bin/oddmuse-cohere
#
#   # Edit the "CONFIGURATION" section of this script, below.
#   vi ~/bin/oddmuse-cohere
#
#   # Run this script, to try it before scheduling it with a user cronjob.
#   sh ~/bin/oddmuse-cohere
#
#   # Schedule this script to be run "every so often" by the user crontab.
#   #
#   # While you can schedule this script to be run by the system crontab, that
#   # may prevent you from rsync-synchronizing against a remote SSH server via
#   # the passwordless, pre-cached SSH keychain (since that keychain is
#   # specific, and only specifically accessible, to a user).
#   #
#   # To schedule this script as a user cronjob, run the following command as
#   # that user and add the single line following it to that file, uncommenting
#   # that line and replacing "$HOME" with the absolute path of the user's home:
#   crontab -e
#   #0 0 * * *  source $HOME/.keychain/`hostname`-sh && $HOME/bin/oddmuse-cohere 1>>$HOME/var/log/oddmuse-cohere 2>&1
#
#   # Great! That's scheduled cron to run this script every day, at midnight.
#   # Occasionally check that logfile--by default, above, at
#   # "~/var/log/oddmuse-cohere"--for the unlucky, inevitable errors and
#   # warnings. (What can we say? We run a clean ship but the oceans are rough.)
#   tail ~/var/log/oddmuse-cohere
#
#   # Cron output can, also, appear in the system log. This script logs to the
#   # syslog 'cron' facility; thus, on Gentoo systems, this is:
#   tail /var/log/crond/current
#
# --------------------( DESCRIPTION                        )--------------------
# This script leverages "rsync" - an external, externally installable, open-
# source application (and protocol specification) for mirroring sites, boxes, or
# other external sources - for the steady grunt-work of its mirroring. Nicely,
# "rsync" is this script's only hard dependency.
#
# This script also (optionally) leverages "cron" - an external, externally
# installable (but, typically, pre-bundled), open-source application suite for
# scheduling execution of applications - for the scheduled execution of its
# mirroring. (You are encouraged, once comfortable with this script, to schedule
# execution of this script via an automated cronjob. See "INSTALLATION", below.)
#
# --------------------( FILE OVERWRITING                   )--------------------
# This script overwrites all changes to the current, local version of your
# Oddmuse Wiki when "rsync"-synchronizing from your remote Oddmuse Wiki.
#
# This cannot be overstated. This can, inherently, be fatally trip-wiry: changes
# made to the current, local version and not the current, remote version will be
# deleted on the next execution of this script. This necessarily extends to the
# existence of files and paths, themselves: files and paths added to the
# current, local version and not the current, remote version will be deleted on
# the next execution of this script. (This is necessary, so as to maintain
# synchronicity in the file-based database underlying the Oddmuse Wiki.)
#
# This, however, is disable-able. See the "RSYNC_OPTIONS" variable, below.
# Note, however, that if you disable this, you're probably using the wrong
# script. If you intend to make desynchronous changes to local or remote copies
# of another repository, you should probably be using a properly distributed,
# properly disconnected, versioned control system. Like Mercurial.
#
# This script always - whether overwriting is disabled or not - performs a
# preemptive "dry run" before performing an actual, potentially destructive
# "rsync"-synchronization. The "dry run" is a test run printing the set of all
# local files and paths to be changed by that "rsync"-synchronization, without
# actually performing those changes. (For evident reasons, this script does not
# perform that dry run when run from a scheduled cronjob. Thus, before
# scheduling this, you should be sure this script is "rsync"-synchronizing
# precisely what you want and no more.)
#
# --------------------( VERSIONED BACKUPS                  )--------------------
# This script does not maintain versioned, dated backups. Rather, it always
# overwrites the current, local version of your Oddmuse Wiki with the full
# content of your current, remote Oddmuse Wiki.
#
# This script could be improved - simply and safely - to maintain such backups
# by archiving file and path content for the current, local version with "tar",
# "gzip", "bzip2", or even a full-blown, proper version control system before
# "rsync"-synchronizing over those files and paths; and you're warmly, welcomely
# encouraged to do that at your idle leisure. (Note, however, that any such
# backups are likely to be excessively large and probably shouldn't be retained
# past several months. Such is largesse, and administrative paranoia.)
#
# This script's author, for example, version controls the content of his Oddmuse
# Wiki through Mercurial by "rsync"-synchronizing that Wiki into a local path
# 'managed' by Mercurial. Mercurial, a distributed concurrent versions system
# (or, DCVS), makes a Wiki-resembling, decentralized medium for versioning,
# recovering, and publishing the core, technologic innards of a Wiki. To
# simplify this integration with Mercurial, the developers of this script have,
# also, developed a separate script performing this integration; see the
# "hg-snapshot" script at:
#
#   http://www.raiazome.com/Mercurial--hg-snapshot
# 
# --------------------( CRON SCHEDULING                    )--------------------
# This script is meant to be scheduled as a user cronjob. While it can be run as
# a system cronjob process, that tends to muck with SSH keychain's passwordless
# pre-caching. SSH keychain is an external application, allowing you to "cache"
# passphrases for one or several RSA- or DSA-style, user-specific SSH
# authentication keys when logging in as that user. By leveraging SSH keychain
# in conjunction with cron, you can schedule this script to mirror from SSH-
# authenticated servers (hosting your remote Oddmuse Wiki), without requiring
# you interactively enter username and password data for those servers;
#
# See "INSTALLATION", above, for gritty instructions.
#
# --------------------( NON-ODDMUSE CONTENT                )--------------------
# This script is intended, mostly, to "rsync"-synchronize Oddmuse Wiki
# installations cross-server. Despite this, and given its generality, this
# script should be able to "rsync"-sychronize any dirge of files, paths or
# installed, file-based Wikis cross-server. (At the least, it should serve as
# some well-documented help, when devoutly routing your own!)
#
# --------------------( CHANGELOG                          )--------------------
# [2007-04-16] 0.0.1: Creation.
#
# --------------------( TODO                               )--------------------
# * Provide support for running "rdiff-backup" as the backend synchronizer,
#   rather than "rsync", and to associate each repository with one such
#   synchronizer. For example, I'd like to use "rsync" when syncing against
#   nfsn.net, but default back to "rdiff-backup" when syncing against
#   sdf.com (since sdf.com does not provide free access to the rsync binary).
# * Provide support for running as a separate, non-privelaged user.
# * Provide support for the "--log-file" option.

# ....................{ CONFIGURATION                      }....................
# The absolute path to the root of your Oddmuse Wiki. This path may be either
# local (i.e., residing on the same machine from which this script is run) or
# remote (i.e., residing on some SSH-accessible machine, elsewhere). Wherever it
# resides, this path should have the main "current.pl" or "wiki.pl" Oddmuse
# script for your Wiki.
#
# This script considers sources matching the pattern
# "${SSH_USERNAME}@${SSH_HOSTNAME}:${SSH_HOSTNAME_PATH}" to be remote SSH
# servers; and will, properly, attempt to rsync-synchronize them over SSH.
#
# This script considers all other sources to be be local paths on the local
# machine; and will, properly, attempt to rsync-synchronize them over local
# filesystems.
#
# The default, here, is probably not fine. You'll probably want to change it.
SOURCE_ROOT="leycec_raiazome@ssh.phx.nearlyfreespeech.net:/home/htdocs/"

# A whitespace-delimited set of absolute paths to where you want the above
# Oddmuse Wiki locally synchronized to. These paths must be local (i.e.,
# reside on the same machine from which this script is run).
# 
# The "rsync" command-line utility cannot synchronize from one remote source
# path to one remote target path; it only synchronizes from one remote source
# path to one local target path. Thus, this script mimics synchronization from
# one remote source path to one remote target path by first synchronizing that
# remote source path to one local target path, then synchronizing that local
# target path to that desired remote target path. (Confused, assuredly? Sweet.
# So were we.)
#
# Each target path receives a cloned copy of that (local or remote) source path;
# as example, supposing you leave the source and target paths at their defaults,
# this script synchronizes as follows:
#
#  from <-- leycec_raiazome@ssh.phx.nearlyfreespeech.net:~/home/htdocs/ [remote]
#       |
#  into --> /home/leycec/pub/old/site/raiazome.com/                     [local]
#         |
#  into   --> bcurry@faeroes.freeshell.org:~/html/                      [remote]
#
# Also, note that rsync changes its pathname behavior slightly (and slightly
# non-intuitively), when the target pathname ends in an explicit "/". As such,
# it's best to simply, safely not end these paths in an explicit "/"--emphasis
# on the 'not.' See the USAGE section of "man rsync", for gritty details.
#
# The default, here, is probably not fine. You'll probably want to change it.
# List target paths local to the current machine, here, and target paths remote
# to the current machine via the "TARGET_ROOTS_REMOTE" variable, below.
# TARGET_ROOTS_LOCAL="/home/leycec/pub/code/raiazome.com /www/af/b/bcurry/"
# TARGET_ROOTS_LOCAL="/www/af/b/bcurry/ /home/leycec/pub/code/raiazome.com"
TARGET_ROOTS_LOCAL="/home/leycec/pub/code/raiazome.com"

# A whitespace-delimited set of absolute paths to where you want the above
# Oddmuse Wiki remotely synchronized to. These paths must be remote (i.e.,
# residing on some SSH-accessible machines, elsewhere). See comment under the
# "TARGET_ROOTS_LOCAL" variable, above, as to why we divide the set of local
# target paths from the set of remote target paths.
#
# The default, here, is probably not fine. You'll probably want to change it.
# TARGET_ROOTS_REMOTE="bcurry@faeroes.freeshell.org:~/html"
TARGET_ROOTS_REMOTE=""

# Command-line filters to be passed to the "rsync" binary. See the
# <FILTER RULES> section of "man rsync".
#
# All files or paths in the remote repository matching at least one of these
# filters will be ignored (i.e., not "rsync"-synchronized into the local
# repository).
#
# The default, here, is probably fine.
# A whitespace-delimited set of "rsync" filters. Any file or path on the
# remote, source repository-side matching at least one of these filters is
# ignored (i.e., not "rsync"-synchronized into the local, target repository).
#
# The default, here, is probably not fine. You'll probably want to change it.
# See the <FILTER RULES> section of "man rsync" for tiring, enmiring details.
# (Uncommenting this line and commenting the lines below it is a safe bet, if
# you initially can't be bothered to stumble through the "rsync" man page.)
#
# The default, here, assumes the following top-level files and paths:
#
#   /wiki.css                 ...the CSS file for your Oddmuse Wiki.
#   /wiki.pl                  ...the Perl script for your Oddmuse Wiki.
#   /wiki/                    ...the $DataDirectory for your Oddmuse Wiki.
#   /wiki/config_private.pl   ...a host-specific configuration Perl script.
#
# The Oddmuse Wiki for which this script was developed, http://raiazome.com,
# uses another, non-standard Perl script named "/wiki/config_private.pl" to
# persist host-specific, and private, Wiki settings; by excluding this file,
# below, "rsync"-synchronization prevents this file from overwriting a file
# of the same name on the target mirror for that synchronization. At the
# moment, it contains these host-specific configuration settings:
# $ScriptName, $FullUrl, $EditAllowed, $AdminPass, $EditPass,
# $ReCaptchaPublicKey, $ReCaptchaPrivateKey, and $ReCaptchaSecretKey.
# RSYNC_FILTERS="--exclude=* --exclude=/wiki/config_private.pl"
RSYNC_FILTERS="\
  --include=/raiazome.css \
  --include=/wiki.pl \
  --include=/wiki/ \
  --exclude=/wiki/config_private.pl \
  --exclude=/wiki/visitors.log \
  --exclude=/*"

# Command-line options to be passed to the "rsync" binary. See the <OPTIONS>
# section of "man rsync".
#
# The defaults, here, are probably fine. If you don't want local changes to be
# auto-overwritten by remote changes (...you probably do, but who are we to
# carp and complain?), you'll want to remove the '--delete' option.
RSYNC_OPTIONS="\
  --compress \
  --cvs-exclude \
  --delete --delete-after \
  --devices --specials \
  --human-readable \
  --links --keep-dirlinks --safe-links \
  --recursive --sparse \
  --super \
  --timeout=32 \
  --times --no-perms --no-owner --no-group \
"

# Command-line options to be passed to the 'rsync' binary, when performing
# rsync-synchronizations from a cronjob. These, typically, include additional
# precautions against catastrophic, large-scale changes or deletions.
#
# This script appends the "$RSYNC_OPTIONS" above onto this list, automatically.
# (No need to repeat yourself, there!)
#
# The defaults, here, are probably fine.
RSYNC_OPTIONS_IF_CRON="\
  --max-delete=128 \
"

# ....................{ CONSTANTS                          }....................
SCRIPT_NAME=$(basename "$0")
SCRIPT_VERSION="0.0.1"

# ....................{ I/O HANDLING                       }....................
mutter() {
  echo -n "${SCRIPT_NAME}: $*"
}

utter() {
  if [ -n "$IS_CRON" ]
  then logger -p cron.notice "${SCRIPT_NAME}: $*"
  else echo "${SCRIPT_NAME}: $*"
  fi
}

curse() {
  if [ -n "$IS_CRON" ]
  then logger -p cron.err "${SCRIPT_NAME}! $*"
  else echo "${SCRIPT_NAME}! $*" 1>&2
  fi
}

utter_usage_error() {
  utter_usage
  exit 1
}

utter_usage_noerror() {
  utter_usage
  exit 0
}

utter_usage() {
  echo "Usage: ${SCRIPT_NAME} [options] file...
             options
  -h, --help         Show this help.
  -u, --unfiltered   Do not filter files, even if $RSYNC_FILTERS is set."
}

# ....................{ ERROR HANDLING                     }....................
try() {
  $*
  [ $? = 0 ] || exit 1
}

die() {
  curse $*
  exit 1
}

# ....................{ OPTION PARSING                     }....................
OPTIONS=$(getopt --unquoted --longoption 'unfiltered,help' --options '+u,h' --shell sh -- "${@}")
[ $? = 0 ] || utter_usage_error

set -- $OPTIONS

# A list of descriptive "modifiers" to be output, when outputting verbosely.
# These are descriptive, purely - and serve no functional purpose.
MODIFIERS=" "

while [ $# -gt 0 ]; do
  case "$1" in
    --help)       utter_usage_noerror;;
    -h)           utter_usage_noerror;;
    --unfiltered) RSYNC_FILTERS=""; MODIFIERS="$MODIFIERS +unfiltered"; shift;;
    -u)           RSYNC_FILTERS=""; MODIFIERS="$MODIFIERS +unfiltered"; shift;;
    --)     shift; break;;
    -*)     utter_usage_error;;
    *)      break;;
  esac
done

# Adapted from this itworld.com article, "Am I being run by cron?":
#        http://www.itworld.com/Comp/3380/nls_unixcron041209/index.html
CRON_PIDS=$(pgrep -x cron)
GRANDPARENT_PID=$(ps -eo ppid,pid | grep " $PPID$" | awk '{print $1}')

if [ $(echo "$CRON_PIDS" | grep "$GRANDPARENT_PID") ]
  then IS_CRON="1"; RSYNC_OPTIONS="${RSYNC_OPTIONS} --quiet"
  else IS_CRON="";  RSYNC_OPTIONS="${RSYNC_OPTIONS} --progress --verbose --verbose"
fi

RSYNC_COMMAND="nice rsync $RSYNC_OPTIONS $RSYNC_FILTERS"

# ....................{ INITIALIZATION                     }....................
utter "v${SCRIPT_VERSION}"
echo ""

[ -x "$(which rsync 2>/dev/null)" ] ||
  die 'rsync not installed! ("rsync" binary not found in your $PATH.)'

# ....................{ IMPLEMENTATION                     }....................
rsync_from_source_path_to_target_paths() {
  SOURCE_ROOT=$1
  TARGET_ROOTS=$2
  
  # Ensure the local source path ends in a backslash. If it doesn't, "rsync"
  # behaves oddly. See the <USAGE> section of "man rsync".
  SOURCE_ROOT=$(echo "$SOURCE_ROOT" | sed 's/\/\?$/\//')
  
  for TARGET_ROOT in $TARGET_ROOTS; do
    if [ -n "$IS_CRON" ]; then
      utter "${SOURCE_ROOT} -> ${TARGET_ROOT}$MODIFIERS"
      try $RSYNC_COMMAND $RSYNC_OPTIONS_IF_CRON "${SOURCE_ROOT}" "${TARGET_ROOT}"
    else
      utter "${SOURCE_ROOT} -> ${TARGET_ROOT}$MODIFIERS  (dry run)"
      try $RSYNC_COMMAND --dry-run "${SOURCE_ROOT}" "${TARGET_ROOT}"

      echo
      mutter "are you sure you want to perform this rsync? [yes/no] "
      read IS_SURE

      if [ "$IS_SURE" = "y" -o "$IS_SURE" = "ye" -o "$IS_SURE" = "yes" ]; then
        utter "${SOURCE_ROOT} -> ${TARGET_ROOT}$MODIFIERS"
        try $RSYNC_COMMAND "${SOURCE_ROOT}" "${TARGET_ROOT}"
      else
        break
      fi
    fi
  done
}

# First, synchronize from one (local or remote) source path to every local
# target path.
if [ "$SOURCE_ROOT" -a "$TARGET_ROOTS_LOCAL" ]; then
  rsync_from_source_path_to_target_paths "$SOURCE_ROOT" "$TARGET_ROOTS_LOCAL"
fi

# Then, synchronize from one local target path to every remote target paths.
if [ "$TARGET_ROOTS_LOCAL" -a "$TARGET_ROOTS_REMOTE" ]; then
  for TARGET_ROOT_LOCAL in $TARGET_ROOTS_LOCAL; do
    rsync_from_source_path_to_target_paths \
      "$TARGET_ROOT_LOCAL" "$TARGET_ROOTS_REMOTE"

    # Now that we've synchronized from the first local target path, stop.
    break
  done
fi

# --------------------( COPYRIGHT AND LICENSE              )--------------------
# The information below applies to everything in this distribution,
# except where noted.
#              
# Copyleft 2008 by B.w.Curry.
#   
#   http://www.raiazome.com
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
