#!/bin/sh 
#
# SCRIPT:   /secure/mirror_boot.sh   
#
# FUNCTION: backup main boot disk to secondary (cold mirroring).
#           Each night (for example) the offline disk is mounted and 
#           synchronised with the primary disk. 
#           ***** See the latest doc and version of this script at: *****
#           http://www.boran.com/security/sp/coldmirroring20010306.html 
#           http://mail.boran.com/security/sp/solaris/mirror_boot.sh
#
# This script is typically called from the root cron nightly. It mounts the 
# spare disk under /newroot, copies all filesystems, installs a boot block and 
# copies over a new vfstab. This creates a fully updated bootable spare disk. 
# The results of the script are sent to the administrator via email.
#
# USAGE:
#   Set the admin, mounts and targets variables below, then run via cron
#   when disk activity is as low as possible:
#   0 23 * * *   /secure/mirror_boot.sh
#   Set DEBUG to 1, to "dry run" the script without actually copying any data.
#  
# ASSUMPTIONS:
#   - a second identical disk with identical partitions and filesystems exists.
#   - The /newroot mount point exists, e.g.:
#       mkdir /newroot; chmod 777 /newroot ; mount /newroot
#   - entries in vfstab exist for relevant filesystems (e.g. /newroot/usr 
#     /newroot/opt /newroot/var)
#   - /etc/vfstab.newroot exists with correct device entries so that
#     we can boot from the second disk. 
#   - The variables have been set below
#   - The /newroot partitions are unmounted when running this script.
#
# PROBLEMS: The /newroot target is never wiped clean, this has the advantage
# that files deleted several days ago can be recovered, but the disadvantage
# that the new targets are likely to fill up over time (as hence need wiping
# every few months or so). ufsdump could be used rather than cpio to copy the
# device, but then files more than one day old cannot be recovered.
#
# Solaris Intel is a bit tricky. See the notes at the bottom of this file.
#
# HISTORY: 
#     19.Jul.05 sb comments
# <13>29.Jan.03 sb remove old comments. Stop if $config file not found.
#                  Special handling of /var/run
# <12>30.Jul.02 sb Ignore /var/spool/mqueue
# <11>08.Feb.02 sb User /secure/secure.conf to determine email address
# <10>15.Nov.01 sb "CD /" explicitly, as root's home might be /root
# <9> 24.Oct.01 sb Ignore Doors and UNIX sockets.
# <8> 03.Aug.01 sb Unmounting of nested filesystems in reverse order
#                  [Thanks to Michael Haun]
# <7> 26.Feb.01 sb $BOOTABLE, $ignore_these, print eeprom boot-device
# <6> 19.12.00 sb Correct quoting, and minor fix for first time run. Ignore sockets.
# <5> 12.10.00 sb Shutdown mysql (or other app) before backup, restart after
#                 Don't abort if cpio has errors, continue as much as possible.
#                 Fixed 2>1 =>  2>&1 bugs
# <4> 6.10.00 sb ACL backups not working correctly, disable.
# <3> 25.9.00 sb Log result to syslog, add boot support for Intel Architecture.
#                tested on Solaris8 x86. cpio: backup ACLs & comment.
# <2> 20.7.00 sb /proc & mount point fixes. Auto get boot raw device.
#                Add VERBOSE variable to supress "ok" emails if wanted.
#                Tested on Solaris 2.8 too.
# <1> 16.6.00 sb Additional comments, add DEBUG+newroot_device variables
# <0> 28.8.99 sb Original script by Sean Boran on Solaris 2.7
#
# LICENSE:
#      This script was developed by Sean Boran, http://www.boran.com.
#      It can be distributed for free as long as these headers are included.
#      Please send any bug fixes or improvements to sean AT boran.com.
##################################

stop () {
  echo $*
  exit 1;                                                                          
}

###### Set the following variables according to your needs ############

# Dry run: show lots of detail on what would be done, but don't copy
DEBUG='0';

## Where should we email results?
#admin='root';
# <11>
## Where should we email results?
## read in settings from config file
config="/secure/secure.conf"
[ ! -f $config ] && stop "$config missing - stopped.";
group=default
tool="mirror_boot"
admin=`  awk -F: '{if ($1==g && $2==t) print $4}' g=$group t=$tool $config`


## / will always be backed up, only list other filesystems
targets='/var'
#targets='/disk2'

## Unmounting of nested filesystems (e.g. /var/mail in /var) need to be 
## in reverse order so list the $targets accordingly here. <8>
revtargets="/var";
#revtargets="/var/mail /var/spool/MHS /var";

## List empty mount points needed, we need /proc, local filesystems mounts,
## or remote NFS mounts. 
## /var/run is needed on Solaris8 <3>, but you don't need to list it here.
## /oldroot is needed for mounting the primary disk, when the secondary mirror
## is used to boot the machine. Its not actually used by this script.
## Filesystems in $targets should be listed here too.
mounts='/proc  /var  /oldroot';
#mounts='/proc /disk2  /oldroot';

## Send an email even if backup was OK?
#VERBOSE='0';
VERBOSE='1';

## If you want specific applications stopped before the backup
## and restart afterwards, add in the commands here. <5>
## If there are no app daemons:
#stop_daemons='/opt/postfix/postfix stop';
#start_daemons='/opt/postfix/postfix start';
stop_daemons='';
start_daemons='';

## Is the primary disk bootable? Set to 0 if you don't want a bootblock
## installed
BOOTABLE='1';

## Don't backup files that match this (egrep) pattern <12>
ignore_these='\.tmp|^core|/var/spool/mqueue|/opt/netscape/cache';
# Other candidates:
#ignore_these='.gz|.Z';

#####################################################

## -- variables --
f=/var/tmp/f$$;
# assume backup doesn't work, until we get to the end.
PROBLEM='1';

# Cpio (file copy) options:
# - copy all files listed in stdin, create dirs, maintain dates,
#   overwrite new
# - If debugging, add '-v' for verbose, but the output will be huge.
# - The option preserve Solaris ACLs '-P' kept giving errors like: <4>
#"Error with acl() of "usr/dt/dthelp/nls/en_US.UTF-8", errno 2, No such file or directory"
#cpio='cpio -pdmuP';
# - to exclude certain file patterns add '-f PATTERN'
# - to skip corrupted files use '-k' ??
cpio='cpio -pdmu';

# Ignore sockets, pipes and Doors, it can cause grief <9>
find='find . ! -type s ! -type D ! -type p -xdev -print'

##---------- functions -------
check_err () {
  ## Check result of last operation. If it was an error, abort the
  ## script, clean up and email results
  if [ "$*" != "0" ] ; then
    echo "SCRIPT $0 ABORTED: error." >>$f 2>&1
    umount_filesystems;
    umount /newroot;
    send_results;
    exit 1;
  fi
}

echo_f () {
  echo "$*" >>$f
}

send_results () {
  echo_f "  "
  echo_f "This email was generated by `uname -n`:$0 "

  ## Restart daemons, if any, <5>
  if [ "$start_daemons" != "" ] ; then
     echo_f "Restart key applications after backup.."
     echo_f "$start_daemons"
     $start_daemons  >>$f 2>&1
  fi

  # In debug mode print to stdout, else email results
  if [ "$DEBUG" = "1" ] ; then
    cat $f
  else
    # <3> Log result to syslog, and only delete log if successful.
    if [ "$PROBLEM" = "1" ] ; then
      mailx -s "`uname -n` Error: Boot disk backup" $admin < $f
      logger -p daemon.alert "$0 Error: Boot disk backup see $f"
    elif [ "$VERBOSE" = "1" ] ; then
      # <2>
      mailx -s "`uname -n` OK: Boot disk backup" $admin < $f 2>&1
      logger -p daemon.info "$0 Info: Boot disk backup OK"
      rm $f
    else
      # <10> We only remove the log file if there was no errors
      rm $f
    fi
  fi
}

make_mount_points () {
  echo "make mount points $mounts .."      >>$f
  for mnt in $mounts ; do
    if [ "$DEBUG" = "1" ] ; then
      echo "mkdir /newroot/$mnt"
    else
      mkdir /newroot/$mnt >/dev/null 2>&1;
      chmod 777 /newroot/$mnt >/dev/null 2>&1;
    fi
  done

  # /var/run is also needed on Solaris8 <3>.
  # /var/run is special, since we need to make sur /var is mounted
  mount /newroot/var >/dev/null 2>&1;
  mkdir /newroot/var/run >/dev/null 2>&1;
  chmod 777 /newroot/var/run >/dev/null 2>&1;
  umount /newroot/var >/dev/null 2>&1;

}

umount_filesystems () {
  # <8>
  echo "umount /newroot/{$revtargets} " >>$f
  for filesys in $revtargets ; do
    umount /newroot$filesys          >> $f 2>&1
  done
}

##---------- main -------
echo "----- Backup $targets to /newroot----"    >>$f
echo "Logfile: $f on `date`" >>$f
echo "  " >>$f
if [ "$DEBUG" = "1" ] ; then echo "DEBUG mode on"; fi
cd /;    # <10>

## Stop daemons, if required <5>
if [ "$stop_daemons" !=  "" ] ; then
   echo "Stopping key applications before backup.." >>$f
   $stop_daemons  >>$f  2>&1
fi

## First do Root
umount /newroot >> /dev/null 2>&1
mount /newroot >> $f 2>&1
check_err "$?";

echo "Backing up root filesystem at `date` to /newroot" >>$f
if [ "$DEBUG" = "1" ] ; then
  echo "$find | egrep -v $ignore_these| $cpio /newroot at `date`" >>$f
  echo "DEBUG MODE: no data actually copied" >> $f 2>&1
else
  $find | egrep -v "$ignore_these"| $cpio /newroot  >> $f 2>&1
  check_err "$?";
fi
make_mount_points;

## Then other filesystems
for filesys in $targets ; do
  echo "mounting /newroot$filesys" >> $f 2>&1
  mount /newroot$filesys >> $f 2>&1
  check_err "$?";
  cd $filesys; >> $f 2>&1
  check_err "$?";
  echo "Backing up $filesys at `date`" >>$f
  if [ "$DEBUG" = "1" ] ; then
    echo "$find | $cpio /newroot$filesys at `date`" >>$f
    echo "DEBUG MODE: no data actually copied" >> $f 2>&1
  else
    $find | egrep -v "$ignore_these"| $cpio /newroot$filesys  >> $f 2>&1
    ## cpio can give weird errors like the following, ignore return code for now: 
    ## Error with lstat() of "grou ", errno 2, No such file or directory <5>
    check_err "$?";
  fi
done

echo " "     >>$f
df -k -F ufs >>$f 2>&1
echo " "     >>$f
echo "Prom boot order is: `eeprom boot-device` "     >>$f
echo " "     >>$f

echo "Copy over /etc/vfstab.newroot.... " >>$f
if [ ! -d /newroot/etc ] ; then 
  # The first time we run this script, or newroot is empty <6>
  # so create it with Default Solaris7/8 settings
  mkdir /newroot/etc; chmod 755 /newroot/etc;
  chown root:sys /newroot/etc;
fi
mv /newroot/etc/vfstab         /newroot/etc/vfstab.$$ >/dev/null 2>&1
# Ignore errors on the last line, if may happen the first run.
cp /etc/vfstab.newroot         /newroot/etc/vfstab >> $f 2>&1
check_err "$?";

umount_filesystems;

# <2> Before we unmount /newroot, do a df and get device name
#echo "How about boot= `df -k -F ufs | grep newroot | awk '{print $1}'`" >>$f
newroot_device=`df -k -F ufs | grep newroot | awk '{print $1}' |sed 's/dsk/rdsk/'`
umount /newroot                         >> $f 2>&1
check_err "$?";

if [ "$BOOTABLE" = "1" ] ; then
  # Boot block handling is different for x86 and sparc <3>
  if [ `uname -m` = "i86pc" ] ; then
    # Solaris Intel uses slice 2 for boot and has different arguments
    x86_device=`echo $newroot_device|sed 's/s0/s2/'`;
    echo "Install boot block on (PC device) $x86_device " >>$f
    /usr/sbin/installboot /usr/platform/`uname -i`/lib/fs/ufs/pboot /usr/platform/`uname -i`/lib/fs/ufs/bootblk  $x86_device;
    check_err "$?";

  else
    # assume sparc
    echo "Install boot block on $newroot_device " >>$f
    /usr/sbin/installboot /usr/platform/`uname -i`/lib/fs/ufs/bootblk $newroot_device;
    check_err "$?";
  fi
fi

echo " " >>$f
echo "Finished at `date` " >>$f
# No errors so far, so tell this to user
PROBLEM='0';

send_results;
############ end of script ##############


#####################  Solaris Intel Notes ###################
#
# Solaris Intel: Booting from 2nd (backup) Disk <3>
#
# Example: "server1" has two scsi disks, the main one has target 0, the second
#  (target 1) is mirrored each night from the first. If the main disk fails, 
#  proceed as follows to boot from the second disk:
#
# a) Stop the PC, Switch it on allow the bios to do it's checks.
# b) When the scsi controller is checking the disks, Press Ctrl-A (for example)
#    to enter the scsi menu.
# c) Change the boot device to scsi ID 1 (which is the ID of the second disk). 
#    Save changes, and exit, and it will probably insist on a reboot.
# d) After scsi disk check, the "SunOS Secondary Boot" will start, press ESC to
#    enter it's menu and F2 "continue" about three times until the list of 
#    boot disks is shown. Select the backup disk (target 1) and press continue,
#    then allow the boot process to continue.
# e) login as usual, check with "df" that you really are using the backup disk.
# f) Disable the mirroring script in the root cron, until the primary disk is 
#    working again!

#############################################################
