#!/bin/ksh # ########################################################################### # # program to produce IO loading for benchmarking # # this script expect some setup - you should be running iostat and/or sar # to gather the OS stats as this runs # # it is meant to run on solaris, but with a few changes, could work on # any flavor of unix # # Areas of concern: # iostat: M/s, %b, asvc_t # sar: %wait # ########################################################################### # # $Log: ioload,v $ # Revision 1.33 2004/04/09 16:28:22 ms35068 # added another backslash (same cut and paste error) # # Revision 1.32 2004/04/08 20:59:05 ms35068 # added trailing backslash # # Revision 1.31 2003/10/31 19:36:52 MS35068 # OSVER added, pkill and pgrep work arounds for OSVER, iostat OSVER handled as well. # # Revision 1.30 2003/10/10 13:46:20 MS35068 # changed LOGDIR to better dir # # Revision 1.29 2003/10/07 18:37:12 MS35068 # added /dev/md for disk groups # # Revision 1.28 2003/10/03 14:33:29 MS35068 # added BINDIR to input options # # Revision 1.27 2003/10/03 03:21:50 MS35068 # added 'sort -n' to pgrep line # # Revision 1.26 2003/10/02 21:17:01 MS35068 # changed print to a -R # # Revision 1.25 2003/10/02 21:06:00 MS35068 # changed a test variable to the correct test variable # # Revision 1.24 2003/10/02 20:15:22 MS35068 # tests # # Revision 1.23 2003/10/02 20:08:51 MS35068 # added debug to all functions # # Revision 1.22 2003/10/02 20:00:58 MS35068 # syntax fixes # # Revision 1.21 2003/10/02 19:20:39 MS35068 # first test of new level monitoring to kill off benchmarks if too much load # or to start more if not enough load BUT only if it is outside the # "tolerance" range. # # Revision 1.20 2003/09/27 00:27:23 MS35068 # initialize VERSION # # Revision 1.19 2003/09/25 23:08:48 MS35068 # added version # # Revision 1.18 2003/09/25 22:59:05 MS35068 # changed IDLETIME setup (initialization and ordering) # changed some print statements. # # Revision 1.17 2003/09/23 19:30:53 MS35068 # fixed syntax and deleted bogus char # # Revision 1.16 2003/09/23 16:43:42 MS35068 # added ddload time # added user check # # Revision 1.15 2003/09/23 13:55:06 MS35068 # added test to see if rootdg only and return if such # added final exit to program # # Revision 1.14 2003/09/22 21:31:28 MS35068 # added new runtime secion, relaid out functions for benchmarking area # # Revision 1.13 2003/08/29 15:00:40 MS35068 # changed mkdir section # # Revision 1.12 2003/08/29 14:56:10 MS35068 # added error check # # Revision 1.11 2003/08/29 14:53:32 MS35068 # added mkdir LOGDIR and reorded some variable declarations # # Revision 1.10 2003/08/22 20:43:19 MS35068 # added message # # Revision 1.9 2003/08/22 20:38:40 MS35068 # added a normalize time after dd's finish # # Revision 1.8 2003/08/22 20:36:42 MS35068 # moved sleep and added kill for dd # # Revision 1.7 2003/08/22 20:30:45 MS35068 # changed collection with backgrounding and times # # Revision 1.6 2003/08/22 20:22:49 MS35068 # backgroup stats # # Revision 1.5 2003/08/22 20:20:48 MS35068 # added logging for stats and function as well # # Revision 1.4 2003/08/22 19:52:56 MS35068 # creation of SLEEPTIME and change sleep value # # Revision 1.3 2003/08/22 13:27:32 MS35068 # added more descrip to verbose messages # # Revision 1.2 2003/08/21 21:30:17 MS35068 # changed idletime to 50% # # Revision 1.1 2003/08/21 21:29:08 MS35068 # Initial revision # # ########################################################################### set -o allexport BINDIR=/mnt/san-upgrade/load_tests # BLKSZ is 8k default BLKSZ=8 BNCHMRK=iozone # modified later DATE=$(date +%Y%m%d%H%M) DDNUM=10 DEBUG=0 DDLOAD=20 HOSTNAME=$(hostname) IDLESETLEVEL=50 IOSTAT="iostat -zxnM" MAXSTATRUN=10000 MYTMPFILE=/tmp/mylist.$$ OSTYPE=$(uname) # # OSVER is redone in the SunOS section to only get the minor version # this makes for easier (numeric) comparissons # OSVER=$(uname -r) # runtime given as minutes as arg, but used as seconds # hence default of 3 min is 180 sec RUNTIME=180 SLEEPBENCH=15 SLEEPCHECK=2 # modified later UTILRANGE=2 VERBOSE=0 VERSION=0 RUNUSER=root # vars dependent on other var's definition LOGDIR=/var/tmp/ioload/$HOSTNAME LOGFILE=$LOGDIR/ioload_log BENCHFILE=$LOGDIR/ioload_benchmark_$DATE SARFILE=$LOGDIR/sarfile.$$ IOSTATFILE=$LOGDIR/iostat.$$ # set +o allexport ############# #check to see if superuser ############# if [[ $(id | tr '(' " " | tr ')' " " | awk '{print $2}') != $RUNUSER ]] then print "ERROR: must be $RUNUSER" exit 2 fi if [[ $OSTYPE == SunOS ]] then PATH=$BINDIR:/usr/sbin:/usr/bin:$PATH OSVER=$(uname -r | awk -F. '{print $2}') if (( $OSVER >= 8 )) then IOSTAT="iostat -zxnM" else IOSTAT="iostat -xnM" fi elif [[ $OSTYPE == Linux ]] then PATH=$BINDIR:/sbin:/usr/sbin:/usr/bin:/bin:$PATH IOSTAT="iostat" elif [[ $OSTYPE == CYGWIN_NT-5.0 ]] then PATH=$BINDIR:$PATH else print "ERROR: $OSTYPE is not a supported OS" \ | tee -a $LOGFILE exit 1 fi ########### # read in options & arguments ########### while (( $# )) do case $1 in -b) shift if (( $# )) then BLKSZ=$1 shift else print "ERROR: blocksize (-b) needs a numeric argument, assumes KB: i.e. 8,32,64..." \ | tee -a $LOGFILE exit 1 fi ;; -bindir) shift if (( $# )) then BINDIR=$1 shift else print "ERROR: bindir needs a path argument, i.e. /some/dir" \ | tee -a $LOGFILE exit 1 fi ;; -debug) shift DEBUG=1 ;; -d) shift if (( $# )) then DDNUM=$1 shift else print "ERROR: number of dd's to run (-d) needs a numeric argument, i.e. 10,20,..." \ | tee -a $LOGFILE exit 1 fi ;; -i) shift if (( $# )) then IDLESETLEVEL=$1 shift else print "ERROR: percent idle level (-i) needs a numeric argument" \ | tee -a $LOGFILE exit 1 fi ;; -l) shift if (( $# )) then DDLOAD=$1 shift else print "ERROR: dd load runtime (-l) needs a numeric argument" \ | tee -a $LOGFILE exit 1 fi ;; -r) shift if (( $# )) then RUNTIME=$1 #convert minutes to seconds let RUNTIME=$RUNTIME*60 let LINECNT=$LINECNT+1 shift else print "ERROR: runtime (-r) needs a numeric argument" \ | tee -a $LOGFILE exit 1 fi ;; -s) shift if (( $# )) then SLEEPBENCH=$1 shift else print "ERROR: sleeptime (-s) needs a numeric argument" \ | tee -a $LOGFILE exit 1 fi ;; -v) shift VERBOSE=1 ;; -V) shift VERSION=1 ;; -h|*) if [[ $1 != -h ]] then print "\n OPTION: $1 is invalid.\n" else print "" fi cat << EOF Usage: $0: Usage: $0: [-debug] [-v] Usage: $0: [-debug] [-v] [-V] [-b #] [-d #] [-bindir /path] [-i #] [-r #] [-s #] Usage: $0: -h no args: runs with defaults -debug: debug mode, does a 'set -x' -bindir: path to benchmark executable -b: block size value, assumes KB (default 8k) -d: number of concurrent dd's to run (default 10) -i: % idle time - amount of benchmark loading on the system (default 50%) -h: help - synopsis -l: number of Seconds to run dd load (default 20) -r: number of Minutes to run the benchmark load (default 3) -s: number of seconds to sleep between benchmark load checks (default 15) -v: verbose mode -V: prints out version of program EOF exit 1 ;; esac done if [[ ! -d $BINDIR ]] then print "ERROR: $BINDIR not found - needed to run $BNCHMRK" \ | tee -a $LOGFILE exit 1 fi if (( DEBUG || VERBOSE )) then (print "Command line option settings" print "...BINDIR: $BINDIR" print "...BLKSZ: $BLKSZ" print "...BNCHMRK: $BNCHMRK" print "...DATE: $DATE" print "...DDLOAD: $DDLOAD" print "...DDNUM: $DDNUM" print "...HOSTNAME: $HOSTNAME" print "...IDLESETLEVEL: $IDLESETLEVEL" print "...IOSTATFILE: $IOSTATFILE" print "...LOGFILE: $LOGFILE" print "...OSTYPE: $OSTYPE" print "...RUNTIME (converted to seconds): $RUNTIME" print "...SLEEPBENCH: $SLEEPBENCH" print "...SARFILE: $SARFILE" print "...VERBOSE: $VERBOSE") | tee -a $LOGFILE if (( DEBUG )) then set -x fi fi collection () { if (( DEBUG )) then set -x fi sar -u -o $SARFILE 1 $MAXSTATRUN > /dev/null & $IOSTAT 1 $MAXSTATRUN > $IOSTATFILE & } ddload () { if (( DEBUG )) then set -x fi # get all non-OS disk groups DGLIST=$(vxdg list | grep -v rootdg \ | grep -v /dev/md \ | grep -w enabled \ | awk '/enabled/ {print $1}') if [[ -z $DGLIST ]] then print "ERROR: only root diskgroup - no other diskgroups to work on" \ | tee -a $LOGFILE return fi # get volumes per disk group # this will create a file that will be read from the top to # give volumes that are active for the dd's to run against > $MYTMPFILE for DG in $DGLIST do vxprint -g $DG \ | awk '/^v / {print $2}' \ | sed "s/.*/$DG:&/" >> $MYTMPFILE done LOOPCNT=1 LINECNT=1 FILELINES=$(wc -l $MYTMPFILE | awk '{print $1}' | sed 's/ //') while (( LOOPCNT <= DDNUM )) do # we are using raw - this could be setup to # we assume that volumes are started # test filesystem (dsk), use fstyp to test # would also need to see if mounted DEV=$(sed -n ${LINECNT}p $MYTMPFILE | sed 's@:@/@') if (( VERBOSE )) then print "INFO: $(date +%Y%m%d%H%M) dd $LOOPCNT of $DDNUM on /dev/vx/rdsk/$DEV" \ | tee -a $LOGFILE fi dd bs=1024 if=/dev/vx/rdsk/$DEV of=/dev/null & # don't go past number of lines/volumes available # if so, then reset counter for line prints back to 1 if (( LINECNT <= $FILELINES )) then let LINECNT=$LINECNT+1 else LINECNT=1 fi let LOOPCNT=$LOOPCNT+1 done if (( VERBOSE )) then print "INFO: sleeping while dd's run" fi # if you give 0 for DDNUM, then you don't want dd's, so why waste # this sleep time if (( DDNUM )) then # let dd's run for a time, then kill them sleep $DDLOAD if (( $OSVER >= 8 )) then pkill -9 -xu $RUNUSER dd else kill -9 $(ps -ef | awk '/dd/ {if ($1 == "'$RUNUSER'") print $2}') fi # let system normalize sleep 20 fi } benchload () { if (( DEBUG )) then set -x fi #set CNT here for use by timebenchload - keeping it from # reseting to 1 on each run, thereby causing the first filesystem # from getting hit each time timebenchload could call runbenchload # trying to create a 'static' RBL_CNT=1 #same for LINECNT RBL_LINECNT=1 IDLERUNLEVEL=100 runsetvars runbenchload timebenchload } runbenchload () { if (( DEBUG )) then set -x fi # get all non-OS disk groups DGLIST=$(vxdg list | grep -v rootdg \ | grep -v /dev/md \ | grep -w enabled \ | awk 'BEGIN {ORS = OFS = "|"} {print $1}') if [[ -z $DGLIST ]] then print "ERROR: only rootdg - no other diskgroups to work on" \ | tee -a $LOGFILE return fi # use DGLIST to grep all mountpoints that are part of the diskgroups > $MYTMPFILE df -kl | egrep -e "$(print $DGLIST)" \ | awk '{print $6}' >> $MYTMPFILE # setup for the benchmark runs FILELINES=$(wc -l $MYTMPFILE | awk '{print $1}' | sed 's/ //') # idlerunlevel - where the system idle time is at now # idlesetlevel - where we want to the idle time to be at # sleepcheck - time to sleep between creation of more load - give # system ample time to ramp up # utilrange - the +/- range from the idletime of the system # if you are outside of the range negative - start more load # if you are outside of the range positive - kill off some load # start more running - if needed while (( $IDLERUNLEVEL > $IDLESETLEVEL && $IDLEDIFFABS > $UTILRANGE )) do if (( VERBOSE )) then print "INFO: starting $RBL_CNT $BNCHMRK load with idle of $IDLERUNLEVEL" \ | tee -a $LOGFILE fi THISMTPT=$(sed -n ${RBL_LINECNT}p $MYTMPFILE) # random read/writes (non-destructive) # 8K block size (default) # used to measure total throughput and total throughput per # controller. loop on each FS until load go down to %idle # time value we defined then sleep 20s when load is reached # # iozone # -i 0 : must always have 0 (create temp file) # -i 2 : random read/write test # -f dir/file : file to test # -r # : record size to test # -s # : file size to test # -d : diag mode, verifies every byte in i/o subsystem # # use RBL_CNT instead of RBL_LINECNT as you have to watch if number # of entries is divisible evenly by FILELINES, you would use # the same count number in the same mountpt - not unique # $BNCHMRK -i 0 -i 1 -i 2 -r ${BLKSZ}k -s 500m -f $THISMTPT/IozoneTmpFile.$RBL_CNT \ >> $BENCHFILE 2>&1 & # don't go past number of lines/volumes available # if so, then reset counter for line prints back to 1 if (( RBL_LINECNT <= $FILELINES )) then let RBL_LINECNT=$RBL_LINECNT+1 else RBL_LINECNT=1 fi let RBL_CNT=$RBL_CNT+1 # pause so we can let new process get ramped up sleep $SLEEPCHECK runsetvars done } killbenchload () { if (( DEBUG )) then set -x fi # kill off some load - if needed while (( $IDLERUNLEVEL < $IDLESETLEVEL && $IDLEDIFFABS > $UTILRANGE )) do if (( VERBOSE )) then print "INFO: KILLING $BNCHMRK - too much load" \ | tee -a $LOGFILE fi if (( $OSVER >= 8 )) then kill -9 $(pgrep -xu $RUNUSER $BNCHMRK | sort -n | sed -n 1p) else kill -9 $(ps -ef | awk '/'$BNCHMRK'/ {if ($1 == "'$RUNUSER'") \ print $2}'| sort -n | sed -n 1p) fi # pause so we can let OS level out from dead process sleep $SLEEPCHECK runsetvars done } timebenchload () { if (( DEBUG )) then set -x fi if (( VERBOSE )) then print "INFO: at desireable load. Running at load level for $RUNTIME seconds" \ | tee -a $LOGFILE fi THISTIME=0 while (( $THISTIME <= $RUNTIME )) do # runtime for constant load if (( VERBOSE )) then print "INFO: at desireable load...sleeping for $SLEEPBENCH" \ | tee -a $LOGFILE fi #add: sleep time to total time of running thus far #add: assume this function takes 1 second to run let THISTIME=$THISTIME+$SLEEPBENCH+1 #sleep for some time sleep $SLEEPBENCH #make sure load is still correct #if we fall low, then make more load runsetvars #if (( $IDLERUNLEVEL > $IDLESETLEVEL )) if (( $IDLERUNLEVEL > $IDLESETLEVEL && $IDLEDIFFABS > $UTILRANGE )) then runbenchload elif (( $IDLERUNLEVEL < $IDLESETLEVEL && $IDLEDIFFABS > $UTILRANGE )) then killbenchload fi # add some amount of time to overall time, add 5 sec for this let THISTIME=$THISTIME+5 done } runsetvars () { if (( DEBUG )) then set -x fi IDLERUNLEVEL=$(iostat 1 2 \ | grep -v '[a-z]' \ | sed -n 2p \ | awk '{print $NF}') let IDLEDIFF=$IDLERUNLEVEL-$IDLESETLEVEL # need -R in cases of neg num (-xx) causes a 'option' error IDLEDIFFABS=$(print -R $IDLEDIFF | sed -e 's/-//') } cpupercent () { if (( DEBUG )) then set -x fi if (( VERBOSE )) then print "INFO: Setting up CPU values for load factor" \ | tee -a $LOGFILE fi # find number of cpus NUMCPU=$(/usr/sbin/psrinfo | wc -l | sed -e 's/ //g') #set values based on number of cpus # sleepcheck - time to sleep between creation of more load - give # system ample time to ramp up # utilrange - the +/- range from the idletime of the system # if you are outside of the range negative - start more load # if you are outside of the range positive - kill off some load if (( $NUMCPU <= 2 )) then SLEEPCHECK=15 UTILRANGE=15 elif (( $NUMCPU >= 3 && $NUMCPU <= 9 )) then SLEEPCHECK=10 UTILRANGE=10 elif (( $NUMCPU >= 10 && $NUMCPU <= 30 )) then SLEEPCHECK=5 UTILRANGE=8 elif (( $NUMCPU >= 31 && $NUMCPU <= 50 )) then SLEEPCHECK=2 UTILRANGE=5 else SLEEPCHECK=1 UTILRANGE=2 fi } cleanup () { if (( DEBUG )) then set -x fi if (( VERBOSE )) then print "INFO: doing cleanup" \ | tee -a $LOGFILE fi rm -f $MYTMPFILE if (( $OSVER >= 8 )) then pkill -9 -xu $RUNUSER dd pkill -9 -xu $RUNUSER $BNCHMRK pkill -9 -xu $RUNUSER sar else kill -9 $(ps -ef | awk '/dd/ {if ($1 == "'$RUNUSER'") print $2}') kill -9 $(ps -ef | awk '/sar/ {if ($1 == "'$RUNUSER'") print $2}') kill -9 $(ps -ef | awk '/'$BNCHMRK'/ {if ($1 == "'$RUNUSER'") print $2}') fi # this is done this way because of the options we use and we DONT want # to kill the normal perf runs going on outside of this program kill -9 $(ps -ef | grep "$IOSTAT" | grep -v grep | awk '{print $2}') if (( VERBOSE )) then print "Copy off if desired: iostat and sar files: $SARFILE $IOSTATFILE" \ | tee -a $LOGFILE fi } ############################################################################# # main function: run other functions ############################################################################# # run trap so if user exists, it cleans up processes trap 'cleanup;exit 2' 1 2 3 15 if [[ ! -d $LOGDIR ]] then mkdir -p $LOGDIR if (( $? )) then print "ERROR: cannot create $LOGDIR ... exiting" exit 2 fi if (( VERBOSE )) then print "INFO: made $LOGDIR" \ | tee -a $LOGFILE fi fi print "INFO: $(date +%Y%m%d%H%M) starting $0 on $HOSTNAME" \ | tee -a $LOGFILE if (( VERSION )) then print "$0 is version: $(grep Revision $0 | line | awk '{print $3}')" fi if (( VERBOSE )) then print "INFO: starting collection" fi collection if (( VERBOSE )) then print "INFO: doing dd load" fi ddload if (( VERBOSE )) then print "INFO: doing benchmark load" fi cpupercent benchload if (( VERBOSE )) then print "INFO: doing cleanup" fi print "INFO: $(date +%Y%m%d%H%M) finishing $0 on $HOSTNAME" \ | tee -a $LOGFILE cleanup exit 0