#! /bin/sh # # Copyright (c) 2018 Red Hat. # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. # # # Administrative script for daily sar-style report summaries. # This is intened to be run from cron, an hour or two after the pmlogger_daily # cron script has completed, e.g. around 2am would be a suitable time. # # Sample crontab entry: # # # daily generation of system activity reports # 0 2 * * * pcp /usr/libexec/pcp/bin/pmlogger_daily_report # # # By default, the daily report will be written to /var/log/pcp/sa/sarXX # where XX is the day of the month, yesterday. # . $PCP_DIR/etc/pcp.env . $PCP_SHARE_DIR/lib/utilproc.sh status=0 prog=`basename $0` # optional begin logging to $PCP_LOG_DIR/NOTICES # if $PCP_LOG_RC_SCRIPTS then logmsg="begin pid:$$ $prog args:$*" if which pstree >/dev/null 2>&1 then logmsg="$logmsg [`_pstree_oneline $$`]" fi $PCP_BINADM_DIR/pmpost "$logmsg" fi # error messages should go to stderr, not the GUI notifiers unset PCP_STDERR # want mode for mkdir below to reliably be rwxrwxr-x (775) # umask 022 # default message log file PROGLOG=$PCP_LOG_DIR/pmlogger/$prog.log USE_SYSLOG=true tmp=`mktemp -d "$PCP_TMPFILE_DIR/pmlogger_daily_report.XXXXXXXXX"` || exit 1 _cleanup() { $USE_SYSLOG && [ $status -ne 0 ] && \ $PCP_SYSLOG_PROG -p daemon.error "$prog failed - see $PROGLOG" if [ "$PROGLOG" != "/dev/tty" ] then [ -s "$PROGLOG" ] || rm -f "$PROGLOG" fi rm -rf $tmp } trap "_cleanup; exit \$status" 0 1 2 3 15 echo > $tmp/usage cat >> $tmp/usage </YYYYMMDD) -f=FILE output filename (default "sarXX" in directory specified with -o, for XX yesterdays day of the month) -h=HOSTNAME hostname, affects the default input archive file path (default local hostname) -l=FILE,--logfile=FILE send important diagnostic messages to FILE -p poll and exit if processing already done for today -o=DIRECTORY output directory (default $PCP_SA_DIR, see also -f option) -t=TIME reporting interval, default 10m -A use start and end times of input archive for the report (default midnight yesterday for 24hours) -V,--verbose verbose messages in log (multiple times for very verbose), see also -l option --help EOF _usage() { pmgetopt --progname=$prog --config=$tmp/usage --usage status=1 exit } # option parsing # VERBOSE=false ARCHIVETIMES=false VERY_VERBOSE=false PFLAG=false MYARGS="" ARGS=`pmgetopt --progname=$prog --config=$tmp/usage -- "$@"` [ $? != 0 ] && exit 1 eval set -- "$ARGS" while [ $# -gt 0 ] do case "$1" in -a) ARCHIVEPATH="$2" shift ;; -f) REPORTFILE="$2" shift ;; -h) HOSTNAME="$2" shift ;; -l) PROGLOG="$2" USE_SYSLOG=false shift ;; -o) REPORTDIR="$2" shift ;; -p) PFLAG=true ;; -t) INTERVAL="$2" shift ;; -A) ARCHIVETIMES=true ;; -V) if $VERBOSE then VERY_VERBOSE=true else VERBOSE=true fi MYARGS="$MYARGS -V" ;; -\?) _usage ;; esac shift done [ $# -ne 0 ] && _usage # optionally uncompress a compressed archive # _uncompress() { ls -d $1.* >$tmp/list 2>&1 rm -f $tmp/uncompress for _suff in `pmconfig -L compress_suffixes | sed -e 's/^compress_suffixes=//'` do if grep "$1.*.$_suff\$" $tmp/list >/dev/null then touch $tmp/uncompress break fi done if [ -f $tmp/uncompress ] then # at least one of the archive files is compressed, copy and # uncompress # $VERBOSE && echo >&2 "Uncompressing $1" touch $tmp/ok for _file in `cat $tmp/list` do case "$_file" in *.xz) _dest=$tmp/`basename "$_file" .xz` ;; *.lzma) _dest=$tmp/`basename "$_file" .lzma` ;; *.bz2) _dest=$tmp/`basename "$_file" .bz2` ;; *.bz) _dest=$tmp/`basename "$_file" .bz` ;; *.gz) _dest=$tmp/`basename "$_file" .gz` ;; *.Z) _dest=$tmp/`basename "$_file" .Z` ;; *.z) _dest=$tmp/`basename "$_file" .z` ;; *.zst) _dest=$tmp/`basename "$_file" .zst` ;; esac case "$_file" in *.xz|*.lzma) if xzcat "$_file" >"$_dest" then : else echo >&2 "Warning: xzcat $_file failed" rm -f $tmp/ok break fi ;; *.bz2|*.bz) if bzcat "$_file" >"$_dest" then : else echo >&2 "Warning: bzcat $_file failed" rm -f $tmp/ok break fi ;; *.gz|*.Z|*.z) if zcat "$_file" >"$_dest" then : else echo >&2 "Warning: zcat $_file failed" rm -f $tmp/ok break fi ;; *.zst) if zstdcat "$_file" >"$_dest" then : else echo >&2 "Warning: zstdcat $_file failed" rm -f $tmp/ok break fi ;; *) # not compressed _dest=$tmp/`basename "$_file"` if cp "$_file" "$_dest" then : else echo >&2 "Warning: cp $_file failed" rm -f $tmp/ok break fi ;; esac done if [ -f $tmp/ok ] then echo $tmp/`basename "$1"` else $VERBOSE && echo >&2 "Uncompressing failed, reverting to compressed archive" echo "$1" fi else # no compression, nothing to be done # echo "$1" fi } if $PFLAG then rm -f $tmp/ok if [ -f $PCP_LOG_DIR/pmlogger/pmlogger_daily_report.stamp ] then last_stamp=`sed -e '/^#/d' <$PCP_LOG_DIR/pmlogger/pmlogger_daily_report.stamp` if [ -n "$last_stamp" ] then # Polling happens every 60 mins, so if pmlogger_daily was last # run more than 23.5 hours ago, we need to do it again, otherwise # exit quietly # now_stamp=`pmdate %s` check=`expr $now_stamp - \( 23 \* 3600 \) - 1800` if [ "$last_stamp" -ge "$check" ] then # nothing to be done, yet exit fi touch $tmp/ok fi fi if [ ! -f $tmp/ok ] then # special start up logic when pmlogger_daily_report.stamp does not # exist ... punt on archive files being below $PCP_LOG_DIR/sa # if [ -d $PCP_LOG_DIR/sa ] then find $PCP_LOG_DIR/sa -name "sar`pmdate -1d %d`" >$tmp/tmp if [ -s $tmp/tmp ] then # heuristic match, assume nothing to be done exit fi fi fi fi # if it looks like we can create/update the date-and-timestamp # then do it ... this will be checked by -p polling # if [ -w $PCP_LOG_DIR/pmlogger ] then if [ ! -f $PCP_LOG_DIR/pmlogger/pmlogger_daily_report.stamp \ -o -w $PCP_LOG_DIR/pmlogger/pmlogger_daily_report.stamp ] then if _save_prev_file $PCP_LOG_DIR/pmlogger/pmlogger_daily_report.stamp then : else echo "Warning: cannot save previous date-and-timestamp" >&2 fi # only update date-and-timestamp if we can write the file # pmdate '# %Y-%m-%d %H:%M:%S %s' >$tmp/stamp if cp $tmp/stamp $PCP_LOG_DIR/pmlogger/pmlogger_daily_report.stamp then : else echo "Warning: cannot install new date-and-timestamp" >&2 fi fi fi if [ "$PROGLOG" = "/dev/tty" ] then # special case for debugging ... no salt away previous # : else # After argument checking, everything must be logged to ensure no mail is # accidentally sent from cron. Close stdout and stderr, then open stdout # as our logfile and redirect stderr there too. # [ -f "$PROGLOG" ] && mv "$PROGLOG" "$PROGLOG.prev" exec 1>"$PROGLOG" 2>&1 fi # Default hostname is the name of the local host # [ -z "$HOSTNAME" ] && HOSTNAME=`hostname` # Default input archive is the merged archive for yesterday. # Unfortunately we can't just specify the entire directory # and rely on multi-archive mode because this script might # take a long time to run as a result. # [ -z "$ARCHIVEPATH" ] && ARCHIVEPATH=$PCP_ARCHIVE_DIR/$HOSTNAME/`pmdate -1d %Y%m%d` # If input archive has been compressed, then uncompress it # into temporary files and use these, to avoid repeated # uncompressing for pmrep below. # ORIG_ARCHIVEPATH="$ARCHIVEPATH" ARCHIVEPATH=`_uncompress $ARCHIVEPATH` $VERBOSE && echo ARCHIVEPATH=$ARCHIVEPATH # Default output directory # [ -z "$REPORTDIR" ] && REPORTDIR="$PCP_SA_DIR" $VERBOSE && echo REPORTDIR=$REPORTDIR # Create output directory - if this fails for any reason we exit later # # mode rwxrwxr-x is the default for pcp:pcp dirs umask 002 [ -d "$REPORTDIR" ] \ || mkdir -p -m 0775 "$REPORTDIR" 2>/dev/null # reset the default mode to rw-rw-r- for files umask 022 # Default output file is the day of month for yesterday in REPORTDIR # [ -z "$REPORTFILE" ] && REPORTFILE=$REPORTDIR/sar`pmdate -1d %d` $VERBOSE && echo REPORTFILE=$REPORTFILE # Default reporting interval is 10m (same as sysstat uses) # [ -z "$INTERVAL" ] && INTERVAL="10m" # If the input archive doesn't exist, we exit. # if [ ! -f "`echo $ARCHIVEPATH.meta*`" ]; then # report this to the log echo "$prog: FATAL error: Failed to find input archive \"$ARCHIVEPATH\"" exit 1 fi # Common reporting options, including time window: midnight yesterday for 24h # REPORT_OPTIONS="-a $ARCHIVEPATH -z -E 0 -p -f%H:%M:%S -t$INTERVAL" if ! $ARCHIVETIMES then # specific start/finish times - midnight yesterday for 24h start=`pmdate -1d "@%d-%b-%Y"` REPORT_OPTIONS="$REPORT_OPTIONS -S$start -T 24h" fi $VERBOSE && echo REPORT_OPTIONS=$REPORT_OPTIONS # Truncate or create the output file. Exit if this fails. # if ! cp /dev/null $REPORTFILE then status=$? echo "$prog: FATAL error: cannot create \"$REPORTFILE\"" exit fi # # Common reporting function # _report() { _conf=$1 _comment="$2" $VERBOSE && echo Generating report for $_conf $_comment echo >>$REPORTFILE; echo >>$REPORTFILE pmdumplog -z -l $ARCHIVEPATH | awk '/commencing/ {print "# ",$2,$3,$4,$5,$6}' >>$REPORTFILE echo $_comment >>$REPORTFILE $VERBOSE && echo pmrep $REPORT_OPTIONS $_conf pmrep $REPORT_OPTIONS $_conf >$tmp/out 2>$tmp/err if [ -s $tmp/out ] then cat $tmp/out >>$REPORTFILE else if grep 'PM_ERR_NAME' $tmp/err >/dev/null 2>&1 then metric=`$PCP_AWK_PROG <$tmp/err '/PM_ERR_NAME/ { print $3; exit }'` echo "-- no report for config \"$_conf\" because the metric \"$metric\" is not in the archive" >>$REPORTFILE elif grep 'PM_ERR_INDOM_LOG' $tmp/err >/dev/null 2>&1 then metric=`$PCP_AWK_PROG <$tmp/err '/PM_ERR_INDOM_LOG/ { print $3; exit }'` echo "-- no report for config \"$_conf\" because there are no values for any instance of the metric \"$metric\" in the archive" >>$REPORTFILE elif grep 'PM_ERR_BADDERIVE' $tmp/err >/dev/null 2>&1 then metric=`$PCP_AWK_PROG <$tmp/err '/PM_ERR_BADDERIVE/ { print $3; exit }'` echo "-- no report for config \"$_conf\" because one or more metrics for the derived metric \"$metric\" is not in the archive" >>$REPORTFILE else cat $tmp/err >>$REPORTFILE echo "-- no report for config \"$_conf\"" >>$REPORTFILE fi fi } echo "System Activity Report" >>$REPORTFILE echo >>$REPORTFILE echo "Host: $HOSTNAME" >>$REPORTFILE echo "Archive: $ORIG_ARCHIVEPATH" >>$REPORTFILE echo "Report created: `date`" >>$REPORTFILE _report :sar-u-ALL '# CPU Utilization statistics, all CPUS' _report :sar-u-ALL-P-ALL '# CPU Utilization statistics, per-CPU' _report :vmstat '# virtual memory (vmstat) statistics' _report :vmstat-a '# virtual memory active/inactive memory statistics' _report :sar-B '# paging statistics' _report :sar-b '# I/O and transfer rate statistics' _report :sar-d-dev '# block device statistics' _report :sar-d-dm '# device-mapper device statistics' _report :sar-F '# mounted filesystem statistics' _report :sar-H '# hugepages utilization statistics' _report :sar-I-SUM '# interrupt statistics, summed' _report :sar-n-DEV '# network statistics, per device' _report :sar-n-EDEV '# network error statistics, per device' _report :sar-n-NFSv4 '# NFSv4 client and RPC statistics' _report :sar-n-NFSDv4 '# NFSv4 server and RPC statistics' _report :sar-n-SOCK '# socket statistics' _report :sar-n-TCP-ETCP '# TCP statistics' _report :sar-q '# queue length and load averages' _report :sar-r '# memory utilization statistics' _report :sar-S '# swap usage statistics' _report :sar-W '# swapping statistics' _report :sar-w '# task creation and system switching statistics' _report :sar-y '# TTY devices activity' _report :numa-hint-faults '# NUMA hint fault statistics' _report :numa-per-node-cpu '# NUMA per-node CPU statistics' _report :numa-pgmigrate-per-node '# NUMA per-node page migration statistics' # optional end logging to $PCP_LOG_DIR/NOTICES # if $PCP_LOG_RC_SCRIPTS then $PCP_BINADM_DIR/pmpost "end pid:$$ $prog status=$status" fi exit