#!/bin/csh # # /secure/chk_disk # # Function: # If a filesystem is 97% full or system heavily loaded # alert $admin ONCE. Set a flag and alert again when problem is fixed. # # Example cron usage: # 0 8-18 * * 1-5 /secure/chk_disk # # Tested on Solaris 2. Sean Boran / 18.Mar.2001 set admin = root set subject = "Disk/performance problems" set subject2 = "Disk/performance FIXED - OK" set load_limit = 190; # "uptime" max. allowed value * 100 # Disk space is set to 97% free, see below.. set tempfile = /tmp/chk_disk.$$ set flagfile = /tmp/chk_disk.flag set mailit = /usr/ucb/mail ## List local (ufs) fileystems, ifnore cdroms, get rid of percentages, ## split up into fields, get the percentange column and check it's value /usr/sbin/df -k -F ufs | /usr/bin/egrep -v "Filesystem|cdrom" | /usr/bin/sed 's/\%//' | /usr/bin/awk '{if (NF != 6) \ printf("Filesystem error %s\n",$0); else if (($5 > 97) ) \ printf("Filesystem %s is %s full\n",$1,$5)}' > $tempfile ## check system load over last 15 minutes ## set load=`uptime | awk -F',' '{print 100*$6}' ` if ($load > $load_limit) then echo "System under heavy load (greater than .$load_limit)..." >> $tempfile endif #echo "Current load over 15 mins=$load" ## Check if anything was written to the check file: set size = `/usr/bin/ls -l $tempfile | /usr/bin/awk '{print $5}'` if ($size != 0) then # we have a problem ## have we reported a problem already? if (-e $flagfile) then ; # don't report again else # report it and set flag # add in some more info to help the admin echo " " >> $tempfile df -k >> $tempfile echo " " >> $tempfile uptime >> $tempfile $mailit -s "`uname -n` $subject" $admin < $tempfile touch $flagfile endif else # no problem if (-e $flagfile) then # there was a problem on the last run. echo "previous problem fixed" | \ $mailit -s "`uname -n` $subject2" $admin rm $flagfile # clear flag endif endif # cleanup /bin/rm $tempfile