#!/bin/sh
# ====================[ drupal-cohere                        ]====================
#                     [ Time-stamp: "2008-12-22 13:22:33 leycec" ]
#
# --------------------( CHANGELOG                          )--------------------
# [2007-12-21] 0.0.1: Created.
#
# --------------------( TODO                               )--------------------
# * Integrate with the "hg-cohere" script, if applicable.

# ....................{ CONFIGURATION =posix               }....................
SOURCE_DRUPAL_SITES_ROOT="/home/leycec/pub/code/organicmechanics.com/sites"

TARGET_DRUPAL_SITES_ARCHIVE_PATH="/home/leycec/pub/old/site/organicmechanics.com"

# The command with which to "nice" (i.e., reduce the scheduling priority of)
# all processes run by this script. By default, this runs processes with lowest
# CPU priority ("nice -n19") and lowest "best effort" disk priority
# ("ionice -c2 -n7").
#
# The default is probably fine.
NICE="ionice -c2 -n7 nice -n19"

# The path to which temporary files are written.
#
# The default is probably fine.
TEMP_PATH=/tmp

# ....................{ CONFIGURATION =archival            }....................
# The name of the command with which to perform file and path archival. This
# command does not perform compression on those files or paths; rather, it
# archives filesystem attributes (such as those files' or paths' owner, group,
# access time, creation time, modification time, et al.). On *nux, this tends to
# be the "tar" command.
#
# The default is probably fine.
ARCHIVAL_COMMAND="tar"

# Command-line options to be passed to the archival command, above.
#
# The default is probably fine
ARCHIVAL_OPTIONS="--create --file - \
  --atime-preserve --ignore-failed-read --preserve --recursion --sparse"

# The number of archive files to retain. This script automatically deletes all
# older archive files than this, so as to conserve disk space.
#
# The default may or may not be fine. Please consider this number, carefully!
MAX_NUMBER_OF_ARCHIVAL_FILES=4

# ....................{ CONFIGURATION =compression         }....................
# The name of the command with which to perform file and path compression.
# Examples include "bzip2", "gzip", "tar", and "7z".
#
# The default may or may not be fine, here. Please change this, according to
# which compression command you have installed and prefer. That said, the author
# of this script recommends you use "7z" over other compression commands. The
# 7-Zip algorithm tends to provide better text compression than those others.
# Your mileage may vary, of course.
COMPRESS_COMMAND="7z"

# Command-line options to be passed to the compression command, above. By
# default, this instructs 7-Zip to use the PPMd compression algorithm, rather
# than the LZMA compression algorithm default. (PPMd tends to provides optimal
# compression for plaintext files.)
#
# The default may or may not be fine, here. If you change the compression
# command, you should probably change this, too.
COMPRESS_OPTIONS="a -t7z -m0=PPMd -mmem=64m -si"

# ....................{ CONFIGURATION =http-request        }....................
# The name of the command with which to perform HTTP requests. This command is
# run when performing an HTTP-style GET request against a Drupal site's
# "cron.php" file, which requests that site perform a new database backup.
#
# The default is probably fine.
HTTP_REQUEST_COMMAND="curl"

# Command-line options to be passed to the HTTP request command, above.
#
# The default is probably fine
HTTP_REQUEST_OPTIONS="--silent --compressed"

# ....................{ CONSTANTS                          }....................
# This script's name and version.
SCRIPT_NAME=$(basename "$0")
SCRIPT_VERSION="0.0.1"

# The date as "YY-MM-DD" at which this script is currently being run.
TODAY=$(date '+%F')

# Adapted from this itworld.com article, "Am I being run by cron?":
#        http://www.itworld.com/Comp/3380/nls_unixcron041209/index.html
CRON_PIDS=$(pgrep -x cron)
GRANDPARENT_PID=$(ps -eo ppid,pid | grep " $PPID$" | awk '{print $1}')

if [[ $(echo "$CRON_PIDS" | grep "$GRANDPARENT_PID") ]]; then
  IS_CRON="1"
else
  IS_CRON=""
fi

# ....................{ MAIN                               }....................
main() {
  utter "v${SCRIPT_VERSION}"

  # Apply a custom umask to all files created during the implementation, below.
  # Specifically, make all such files "u+rwx,go-rwx". (This rather restrictive
  # umask forbids file access to public clients when hosted on a public domain.)
  umask 077

   pre_archival_compress
  main_archival_compress
  post_archival_compress
}

# ....................{ ARCHIVAL-COMPRESS =pre             }....................
# If the remote site does not list a database backup corresponding to today,
# then perform a database backup, now, by requesting the Drupal-specific URL
# from that site responsible for running that site's cronjobs and thus
# performing that database backup. Then, remove all database backups from that
# site except that present backup. Boom!
pre_archival_compress() {
  for SOURCE_DRUPAL_SITE_ROOT in $SOURCE_DRUPAL_SITES_ROOT/*; do
    SOURCE_DRUPAL_SITE_BACKUP_ROOT="$SOURCE_DRUPAL_SITE_ROOT/files/backup_migrate"
    SOURCE_DRUPAL_SITE_DOMAIN_NAME=$(basename "$SOURCE_DRUPAL_SITE_ROOT")

    # Continue past the "all/" and "default/" site paths, if they have no
    # "files/backup_migrate/" path. Typically, the "all/" site path never has a
    # "files/backup_migrate/" path -- while the "default/" site path only has a
    # "files/backup_migrate/" path when run under a single-site installation.
    #
    # Otherwise, emit an error.
    if [ ! -d "$SOURCE_DRUPAL_SITE_BACKUP_ROOT" ]; then
      [ "$SOURCE_DRUPAL_SITE_DOMAIN_NAME" = "all" -o \
        "$SOURCE_DRUPAL_SITE_DOMAIN_NAME" = "default" ] && continue
      request_drupal_site_backup "$SOURCE_DRUPAL_SITE_DOMAIN_NAME"
    fi

    # Move all database backups in the "manual/" path to the "scheduled/" path.
    # This avoids archival+compression of extraneous database backups.
    if [ $(ls "$SOURCE_DRUPAL_SITE_BACKUP_ROOT/manual/"*.sql* 2>/dev/null) ]; then
      utter "moving database backups from '$SOURCE_DRUPAL_SITE_BACKUP_ROOT/manual'..."
      try mv "$SOURCE_DRUPAL_SITE_BACKUP_ROOT/manual/"*.sql* \
             "$SOURCE_DRUPAL_SITE_BACKUP_ROOT/scheduled/"
    fi

    # Remove all database backups from the "scheduled/" path, except the most
    # recently created database backup in that path. This, also, avoids
    # archival+compression of extraneous database backups.
    #
    # We effect this by moving the most recently created database backup in that
    # path out of that path, removing all other database backups from that path,
    # and moving that database backup back to that path.
    SOURCE_DRUPAL_SITE_BACKUP_FILES=$(ls -1t --almost-all "$SOURCE_DRUPAL_SITE_BACKUP_ROOT/scheduled/"*$TODAY*.sql* 2>/dev/null)

    # If this Drupal site has at least one database backup corresponding to
    # today, ensure it's the only retained database backup by removing all
    # others.
    if [ -n  "$SOURCE_DRUPAL_SITE_BACKUP_FILES" ]; then
      # Remove all others, if there are.
      if [ $(echo "$SOURCE_DRUPAL_SITE_BACKUP_FILES" | wc --lines) -gt 1 ]; then
        SOURCE_DRUPAL_SITE_BACKUP_FILE=$(echo "$SOURCE_DRUPAL_SITE_BACKUP_FILES" | head --lines=1)
        utter "removing database backups older than '$SOURCE_DRUPAL_SITE_BACKUP_FILE'..."
        try mv "$SOURCE_DRUPAL_SITE_BACKUP_FILE" \
               "$SOURCE_DRUPAL_SITE_BACKUP_ROOT/manual/"
        try rm "$SOURCE_DRUPAL_SITE_BACKUP_ROOT/scheduled/"*.sql*
        try mv "$SOURCE_DRUPAL_SITE_BACKUP_ROOT/manual/"*.sql* \
               "$SOURCE_DRUPAL_SITE_BACKUP_ROOT/scheduled/"
      fi
    # Otherwise, request this Drupal site create a new database backup.
    else request_drupal_site_backup "$SOURCE_DRUPAL_SITE_DOMAIN_NAME"
    fi
  done
}

# Request that the Drupal site corresponding to the passed domain name perform a
# database backup, now, by requesting the Drupal-specific URL for that site
# responsible for running that site's cronjobs and thus performing that database
# backup.
request_drupal_site_backup() {
  die_if_command_not_found "$HTTP_REQUEST_COMMAND"

#   DRUPAL_SITE_DOMAIN_NAME="$1"
  DRUPAL_SITE_DOMAIN_NAME="gwydden/~leycec/organicmechanics"
  DRUPAL_SITE_CRON_URL="http://$DRUPAL_SITE_DOMAIN_NAME/cron.php"

  utter "requesting '$DRUPAL_SITE_CRON_URL' run database backups..."
  try $HTTP_REQUEST_COMMAND $HTTP_REQUEST_OPTIONS "$DRUPAL_SITE_CRON_URL"
}

# ....................{ ARCHIVAL-COMPRESS =main            }....................
# Archive and compress that remote site's "sites/" directory to some file on
# another local or remote site.
main_archival_compress() {
  die_if_command_not_found "$ARCHIVAL_COMMAND"
  die_if_command_not_found "$COMPRESS_COMMAND"

  die_if_path_not_found "$TEMP_PATH"
  die_if_path_not_found "$SOURCE_DRUPAL_SITES_ROOT"
  die_if_path_not_found "$TARGET_DRUPAL_SITES_ARCHIVE_PATH"

  TARGET_DRUPAL_SITES_ARCHIVE_FILE="$TARGET_DRUPAL_SITES_ARCHIVE_PATH/drupal_sites_$TODAY.$ARCHIVAL_COMMAND.$COMPRESS_COMMAND"
  [ -f   "$TARGET_DRUPAL_SITES_ARCHIVE_FILE" ] && \
    die "'$TARGET_DRUPAL_SITES_ARCHIVE_FILE' already exists!"

  #FIXME: This isn't... exactly robust. (Implement a loop, instead!)
  # Recursively copy the entire contents of the remote Drupal "sites/" path to a
  # local, temporary path. (Archival and compress commands tend to perform badly
  # when run over an SSHfs mount.)
  [ -n "$RANDOM" ] || RANDOM="1"
  TARGET_DRUPAL_SITES_ROOT="$TEMP_PATH/drupal_sites_$RANDOM"
  [ -d "$TARGET_DRUPAL_SITES_ROOT" ] && die "$TARGET_DRUPAL_SITES_ROOT already exists!"

  utter "copying '$SOURCE_DRUPAL_SITES_ROOT' to '$TARGET_DRUPAL_SITES_ROOT'..."
  try cp --no-target-directory --preserve --recursive \
    "$SOURCE_DRUPAL_SITES_ROOT" "$TARGET_DRUPAL_SITES_ROOT"

  # Recursively decompress each database backup. We recompress these backups via
  # custom archive and compress commands, later.
  for TARGET_DRUPAL_SITE_ROOT in $TARGET_DRUPAL_SITES_ROOT/*; do
    TARGET_DRUPAL_SITE_BACKUP_ROOT="$TARGET_DRUPAL_SITE_ROOT/files/backup_migrate"
    TARGET_DRUPAL_SITE_DOMAIN_NAME=$(basename "$TARGET_DRUPAL_SITE_ROOT")

    if [ ! -d "$TARGET_DRUPAL_SITE_BACKUP_ROOT" ]; then
      [ "$TARGET_DRUPAL_SITE_DOMAIN_NAME" = "all" -o \
        "$TARGET_DRUPAL_SITE_DOMAIN_NAME" = "default" ] && continue
      curse "'$TARGET_DRUPAL_SITE_ROOT' has no database backup paths!"
    fi

    for TARGET_DRUPAL_SITE_BACKUP_FILE in \
      "$TARGET_DRUPAL_SITE_BACKUP_ROOT/scheduled/"*.sql*; do
      # Extract this database backup's filetype.
      utter "decompressing '$TARGET_DRUPAL_SITE_BACKUP_FILE'..."
      TARGET_DRUPAL_SITE_BACKUP_FILE_TYPE=$(basename "$TARGET_DRUPAL_SITE_BACKUP_FILE" | awk '{ sub(/^.+\.sql\./, ""); print }')

      # Decompress this database backup on the basis of its filetype.
      case "$TARGET_DRUPAL_SITE_BACKUP_FILE_TYPE" in
        bz | bz2 | bzip2 ) try bunzip2 "$TARGET_DRUPAL_SITE_BACKUP_FILE";;
        gz | gzip )        try gunzip  "$TARGET_DRUPAL_SITE_BACKUP_FILE";;
        zip )              try  unzip  "$TARGET_DRUPAL_SITE_BACKUP_FILE";;
        *) curse "$TARGET_DRUPAL_SITE_BACKUP_FILE not compressed!"
      esac
    done
  done

  # Recursively recompress the entire Drupal site structure, including all
  # recently decompressed database backups.
  utter "compressing '$TARGET_DRUPAL_SITE_ROOT' to '$TARGET_DRUPAL_SITES_ARCHIVE_FILE' via $ARCHIVAL_COMMAND to $COMPRESS_COMMAND..."
  try $ARCHIVAL_COMMAND $ARCHIVAL_OPTIONS "$TARGET_DRUPAL_SITES_ROOT" | \
      $COMPRESS_COMMAND $COMPRESS_OPTIONS "$TARGET_DRUPAL_SITES_ARCHIVE_FILE"

  # Recursively delete the local, temporary path.
  utter "removing '$TARGET_DRUPAL_SITES_ROOT'..."
  try rm --recursive "$TARGET_DRUPAL_SITES_ROOT"

  # Recursively delete each remote database backup, lastly.
  for SOURCE_DRUPAL_SITE_ROOT in $SOURCE_DRUPAL_SITES_ROOT/*; do
    SOURCE_DRUPAL_SITE_BACKUP_ROOT="$SOURCE_DRUPAL_SITE_ROOT/files/backup_migrate"
    [ -d "$SOURCE_DRUPAL_SITE_BACKUP_ROOT" ] || continue

    utter "removing database backups from '$SOURCE_DRUPAL_SITE_BACKUP_ROOT'..."
    rm "$SOURCE_DRUPAL_SITE_BACKUP_ROOT/manual/"*.sql*    2>/dev/null
    rm "$SOURCE_DRUPAL_SITE_BACKUP_ROOT/scheduled/"*.sql* 2>/dev/null
  done
}

# ....................{ ARCHIVAL-COMPRESS =post            }....................
# Remove older versions of that archived, compressed file on that other local or
# remote site, now that we've produced a more recent file.
post_archival_compress() {
  TARGET_DRUPAL_SITES_ARCHIVE_FILES=$(ls -1t --almost-all "$TARGET_DRUPAL_SITES_ARCHIVE_PATH/drupal_sites_"*".$ARCHIVAL_COMMAND.$COMPRESS_COMMAND" 2>/dev/null)
  NUMBER_OF_ARCHIVAL_FILES=$(echo "$TARGET_DRUPAL_SITES_ARCHIVE_FILES" | wc --lines)

  if [ "$NUMBER_OF_ARCHIVAL_FILES" -gt "$MAX_NUMBER_OF_ARCHIVAL_FILES" ]; then
    utter "removing older archive files from '$TARGET_DRUPAL_SITES_ARCHIVE_PATH'..."
    TARGET_DRUPAL_SITES_ARCHIVE_FILES=$(echo "$TARGET_DRUPAL_SITES_ARCHIVE_FILES" | head --lines="$MAX_NUMBER_OF_ARCHIVAL_FILES")

    # We cannot use the "try" subroutine, here, due to the seeming inability to
    # pass newline characters in shell variables to that subroutine. *shrug*
    echo "$TARGET_DRUPAL_SITES_ARCHIVE_FILES" | xargs --delimiter="\n" -I '{}' mv '{}' \
         "$TARGET_DRUPAL_SITES_ARCHIVE_PATH/.."
    [ $? -eq 0 ] || exit 1
    try rm "$TARGET_DRUPAL_SITES_ARCHIVE_PATH/drupal_sites_"*".$ARCHIVAL_COMMAND.$COMPRESS_COMMAND"
    try mv "$TARGET_DRUPAL_SITES_ARCHIVE_PATH/../drupal_sites_"*".$ARCHIVAL_COMMAND.$COMPRESS_COMMAND" \
           "$TARGET_DRUPAL_SITES_ARCHIVE_PATH/"
  fi
}

# ....................{ PATH HANDLING                      }....................
die_if_command_not_found() {
  [ -x "$(which $1 2>/dev/null)" ] || die "'$1' not found in \$PATH!"
}

die_if_path_not_found() {
  [ -d "$1" ] || die "'$1' not found!"
}

# ....................{ I/O HANDLING                       }....................
utter() {
  if [ -n "$IS_CRON" ]
  then logger -p cron.notice "${SCRIPT_NAME}: $*"
  else echo "${SCRIPT_NAME}: $*"
  fi
}

curse() {
  if [ -n "$IS_CRON" ]
  then logger -p cron.err "${SCRIPT_NAME}! $*"
  else echo "${SCRIPT_NAME}! $*" 1>&2
  fi
}

die() {
  curse $*
  exit 1
}

try() {
  run $*
    [ $? -eq 0 ] || exit 1
}

run() {
  if [ -n "$RUN_AS" ]
  then su --command "$NICE $*" "$RUN_AS"; RETURN_CODE=$?
  else $NICE $*;                          RETURN_CODE=$?
  fi

  return $RETURN_CODE
}

# ....................{ IMPLEMENTATION                     }....................
main

# --------------------( COPYRIGHT AND LICENSE              )--------------------
# The information below applies to everything in this distribution,
# except where noted.
#              
# Copyleft 2008 by B.w.Curry.
#   
#   http://www.raiazome.com
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
