#!/bin/sh # # Compression/decompression of files of a PCP archive. # # Copyright (c) 2024 Ken McDonell, Inc. All Rights Reserved. # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # . $PCP_DIR/etc/pcp.env prog=`basename $0` tmp=/var/tmp/pmlogdecompress.$$ status=1 trap "rm -f $tmp.*; exit \$status" 0 1 2 3 15 # Note -A arg (not -a arg) to avoid confusion with -a archive # cat <<'End-of-File' >$tmp.usage # usage: [options] archive ... Options: -A=ARG, --arg=ARG argument for compression program -c=PROGLIST, --command=PROGLIST candidate compression program(s) -f=PROG, --use=PROG use this compression program -d, --decompress decompress files -l=LIMIT, --lower-limit=LIMIT do no compress files smaller than LIMIT -N, --show-me do nothing, but show me -o=TYPE, --optimize=TYPE choose program to optimize compression -t=DIR, --dir=DIR decompress into directory DIR -V, --verbose increase verbosity -z, --compress compress files [default] -Z=MIN, --min-zstd-size=MIN minimum file size for compression with zstd --help # end More than one -A option is allowed, and more than one -c option is allowed. PROGLIST may be one command, or multiple commands separated by colons, [default zstd:xz:bzip2:gzip]. Compression program (without -o or -f) depends on installed programs and file size (use -N to see what would be used, use -f to force a particular compression program to be used). Decompression program is selected based on file extension. LIMIT is in bytes [default 4096]; use 0 to force compression. similarly for zstd mimimum file size MIN [default 52428800]. Compression optimization TYPE may be cpu or size. End-of-File _size() { ls -l "$1" | awk '{print $5}' } _run_decompress() { if $showme then echo >&2 "+ $*" elif ! $* then echo >&2 "$* file failed" exit fi } # component archive filenames are arguments # - pick largest # - iterate over compression tools measuring size # or CPU time # - set $use_prog for selected compression tool _optimize() { pick='' pick_size=0 for file do [ -f "$file" ] || continue case "$file" in *.xz|*.lzma|*.bz2|*.bz|*.gz|*.Z|*.z|*.zst) # already compressed $verbose && echo >&2 "$file: skipped, already compressed" continue ;; esac size=`_size "$file"` if [ "$size" -ge "$min_size" -a "$size" -gt "$pick_size" ] then pick="$file" pick_size=$size fi done if [ -z "$pick" ] then $verbose && echo 2>&1 "optimize: failed to find a candidate file, defaulting to xz" use_prog=xz return fi $verbose && echo 2>&1 "$pick: ($pick_size bytes) selected as candidate for optimize measurements" pick_size='' pick_cpu='' use_prog='' for tool in `echo $proglist | sed -e 's/:/ /g'` do case "$tool" in zstd) if $have_zstd then if time -f '%U %S' zstd -zc -q "$pick" >$tmp.tmp 2>$tmp.time then size=`_size "$tmp.tmp"` cpu=`awk <$tmp.time '{ print 1000*($1+$2) }'` $verbose && echo >&2 "$pick: zstd size=$size cpu=$cpu" if [ "$optimize" = size ] then if [ -z "$pick_size" -o $size -lt 0$pick_size ] then pick_size=$size use_prog=zstd fi else if [ -z "$pick_cpu" -o $cpu -lt 0$pick_cpu ] then pick_cpu=$cpu use_prog=zstd fi fi else if $verbose then cat >&2 $tmp.time echo >&2 "$file: optimize test with zstd failed" fi fi fi ;; xz) if $have_xz then if time -f '%U %S' xz -zc -q "$pick" >$tmp.tmp 2>$tmp.time then size=`_size "$tmp.tmp"` cpu=`awk <$tmp.time '{ print 1000*($1+$2) }'` $verbose && echo >&2 "$pick: xz size=$size cpu=$cpu" if [ "$optimize" = size ] then if [ -z "$pick_size" -o $size -lt 0$pick_size ] then pick_size=$size use_prog=xz fi else if [ -z "$pick_cpu" -o $cpu -lt 0$pick_cpu ] then pick_cpu=$cpu use_prog=xz fi fi else if $verbose then cat >&2 $tmp.time echo >&2 "$file: optimize test with xz failed" fi fi fi ;; bzip2) if $have_bzip2 then if time -f '%U %S' bzip2 -zc -q "$pick" >$tmp.tmp 2>$tmp.time then size=`_size "$tmp.tmp"` cpu=`awk <$tmp.time '{ print 1000*($1+$2) }'` $verbose && echo >&2 "$pick: bzip2 size=$size cpu=$cpu" if [ "$optimize" = size ] then if [ -z "$pick_size" -o $size -lt $pick_size ] then pick_size=$size use_prog=bzip2 fi else if [ -z "$pick_cpu" -o $cpu -lt 0$pick_cpu ] then pick_cpu=$cpu use_prog=bzip2 fi fi else if $verbose then cat >&2 $tmp.time echo >&2 "$file: optimize test with bzip2 failed" fi fi fi ;; gzip) if $have_gzip then if time -f '%U %S' gzip -c -q "$pick" >$tmp.tmp 2>$tmp.time then size=`_size "$tmp.tmp"` cpu=`awk <$tmp.time '{ print 1000*($1+$2) }'` $verbose && echo >&2 "$pick: gzip size=$size cpu=$cpu" if [ "$optimize" = size ] then if [ -z "$pick_size" -o $size -lt 0$pick_size ] then pick_size=$size use_prog=gzip fi else if [ -z "$pick_cpu" -o $cpu -lt $pick_cpu ] then pick_cpu=$cpu use_prog=gzip fi fi else if $verbose then cat >&2 $tmp.time echo >&2 "$file: optimize test with gzip failed" fi fi fi ;; *) echo >&2 "$file: Botch: no compression recipe for $tool program" ;; esac done $verbose && echo >&2 "optimize: pick $use_prog" } _compress() { if [ -f "$1" ] then # single file ... # archbase='' filelist="$1" else # assume it is an archive basename # archbase="`pmlogbasename $1`" $verbose && echo >&2 "archbase=$archbase" filelist=`echo ${archbase}*` fi [ -n "$optimize" ] && _optimize $filelist nfile=0 for file in $filelist do [ ! -f "$file" ] && continue [ -n "$archbase" -a "$archbase" != `pmlogbasename "$file"` ] && continue nfile=`expr $nfile + 1` case "$file" in *.xz|*.lzma|*.bz2|*.bz|*.gz|*.Z|*.z|*.zst) # already compressed $verbose && echo >&2 "$file: skipped, already compressed" continue ;; *.index) # TI is never compressed continue ;; esac size=`_size "$file"` if [ "$size" -lt "$min_size" ] then $verbose && echo >&2 "$file: skipped, size $size < limit $min_size" continue fi for tool in $use_prog `echo $proglist | sed -e 's/:/ /g'` do case "$tool" in zstd) if $have_zstd || [ "$use_prog" = zstd ] then if [ "$size" -ge "$min_zstd_size" ] || [ "$use_prog" = zstd ] then largs=" --rm --quiet" [ -n "$args" ] && largs=" $args" if $showme then echo >&2 "+ zstd$largs $file" else if zstd$largs "$file" then $verbose && echo >&2 "$file: compressed with zstd" else echo >&2 "$file: zstd failed!" exit fi fi break else $verbose && echo >&2 "$file: size $size too small for zstd" fi fi ;; xz) if $have_xz || [ "$use_prog" = xz ] then if [ -z "${xz_args+onetrip}" ] then if xz -0 --block-size=10MiB /dev/null 2>&1 then # want minimal overheads, -0 is the same as --fast ... # these options were selected after extensive analysis # for original settings in pmlogger_daily # largs=" -0 --block-size=10MiB" elif xz -0 /dev/null 2>&1 then # no --block-size in older versions of xz # largs=" -0" else largs='' fi xz_args="$largs" fi [ -n "$args" ] && largs=" $args" if $showme then echo >&2 "+ xz$largs $file" else if xz$largs "$file" then $verbose && echo >&2 "$file: compressed with xz" else echo >&2 "$file: xz failed!" exit fi fi break fi ;; bzip2) if $have_bzip2 || [ "$use_prog" = bzip2 ] then largs='' [ -n "$args" ] && largs=" $args" if $showme then echo >&2 "+ bzip2$largs $file" else if bzip2$largs "$file" then $verbose && echo >&2 "$file: compressed with bzip2" else echo >&2 "$file: bzip2 failed!" exit fi fi break fi ;; gzip) if $have_gzip || [ "$use_prog" = gzip ] then largs='' [ -n "$args" ] && largs=" $args" if $showme then echo >&2 "+ gzip$largs $file" else if gzip$largs "$file" then $verbose && echo >&2 "$file: compressed with gzip" else echo >&2 "$file: gzip failed!" exit fi fi break fi ;; *) echo >&2 "$file: Botch: no compression recipe for $tool program" ;; esac done done if [ "$nfile" -eq 0 ] then echo >&2 "$prog: Warning: no PCP archive files match \"$1\"" return fi } _decompress() { if [ -f "$1" ] then # single file ... # archbase='' filelist="$1" else # assume it is an archive basename # archbase="`pmlogbasename $1`" $verbose && echo >&2 "archbase=$archbase" filelist=`echo ${archbase}*` fi # for all the files that seem to be part of this archive, # count the files and the number that are compressed # nfile=0 ncompress=0 for file in $filelist do [ ! -f "$file" ] && continue if [ -z "$archbase" -o "$archbase" = `pmlogbasename "$file"` ] then nfile=`expr $nfile + 1` case $file in *.xz|*.lzma|*.bz2|*.bz|*.gz|*.Z|*.z|*.zst) ncompress=`expr $ncompress + 1` if [ -z "$dir" ] then outfile=`echo "$file" | sed -e 's/\.[^.]*$//'` else outfile="$dir"/`basename "$file" | sed -e 's/\.[^.]*$//'` fi if [ -f "$outfile" ] then echo >&2 "$prog: $outfile exists and will not be clobbered" return fi ;; esac fi done if [ "$nfile" -eq 0 ] then echo >&2 "$prog: Warning: no PCP archive files match \"$1\"" return fi if [ "$ncompress" -eq 0 ] then $verbose && echo >&2 "No compresssed PCP archive files match \"$1\"" status=0 return fi $verbose && echo >&2 "$ncompress of $nfile files in the archive are compressed" for file in $filelist do [ ! -f "$file" ] && continue if [ -z "$archbase" -o "$archbase" = `pmlogbasename "$file"` ] then case $file in *.xz) if [ -z "$dir" ] then outfile=`basename "$file" .xz` else outfile="$dir"/`basename "$file" .xz` fi if [ -f "$outfile" ] then echo >&2 "$outfile: exists, skip $file decompression" else if [ -z "$dir" ] then _run_decompress xz --decompress "$file" else _run_decompress xz --decompress --stdout "$file" >"$outfile" fi $verbose && echo >&2 "$outfile: decompressed" fi ;; *.lzma) if [ -z "$dir" ] then outfile=`basename "$file" .lzma` else outfile="$dir"/`basename "$file" .lzma` fi if [ -f "$outfile" ] then echo >&2 "$outfile: exists, skip $file decompression" else if [ -z "$dir" ] then _run_decompress xz --decompress --format=lzma "$file" else _run_decompress xz --decompress --format=lzma --stdout "$file" >"$outfile" fi $verbose && echo >&2 "$outfile: decompressed" fi ;; *.bz2) if [ -z "$dir" ] then outfile=`basename "$file" .bz2` else outfile="$dir"/`basename "$file" .bz2` fi if [ -f "$outfile" ] then echo >&2 "$outfile: exists, skip $file decompression" else if [ -z "$dir" ] then _run_decompress bzip2 --decompress "$file" else _run_decompress bzip2 --decompress --stdout "$file" >"$outfile" fi $verbose && echo >&2 "$outfile: decompressed" fi ;; *.bz) if [ -z "$dir" ] then outfile=`basename "$file" .bz` else outfile="$dir"/`basename "$file" .bz` fi if [ -f "$outfile" ] then echo >&2 "$outfile: exists, skip $file decompression" else if [ -z "$dir" ] then _run_decompress bzip2 --decompress "$file" else _run_decompress bzip2 --decompress --stdout "$file" >"$outfile" fi $verbose && echo >&2 "$outfile: decompressed" fi ;; *.gz) if [ -z "$dir" ] then outfile=`basename "$file" .gz` else outfile="$dir"/`basename "$file" .gz` fi if [ -f "$outfile" ] then echo >&2 "$outfile: exists, skip $file decompression" else if [ -z "$dir" ] then _run_decompress gzip -d "$file" else _run_decompress gzip -dc "$file" >"$outfile" fi $verbose && echo >&2 "$outfile: decompressed" fi ;; *.z) if [ -z "$dir" ] then outfile=`basename "$file" .z` else outfile="$dir"/`basename "$file" .z` fi if [ -f "$outfile" ] then echo >&2 "$outfile: exists, skip $file decompression" else if [ -z "$dir" ] then _run_decompress gzip -d "$file" else _run_decompress gzip -dc "$file" >"$outfile" fi $verbose && echo >&2 "$outfile: decompressed" fi ;; *.Z) echo >&2 TODO: $file ;; *.zst) if [ -z "$dir" ] then outfile=`basename "$file" .zst` else outfile="$dir"/`basename "$file" .zst` fi if [ -f "$outfile" ] then echo >&2 "$outfile: exists, skip $file decompression" else if [ -z "$dir" ] then _run_decompress unzstd --rm --quiet "$file" else _run_decompress unzstd --stdout --quiet "$file" >"$outfile" fi $verbose && echo >&2 "$outfile: decompressed" fi ;; *) # not compressed ... do nothing unless -d, and then # try ln(1), failing that cp(1) # if [ -n "$dir" ] then outfile="$dir/`basename $file`" if [ -f "$outfile" ] then echo >&2 "$outfile: exists, skip $file decompression" else if $showme then echo >&2 "+ cp $file $outfile" else if ! cp "$file" "$outfile" >>$tmp.err 2>&1 then cat >&2 $tmp.err echo >&2 "Failed to copy $file" exit else $verbose && echo >&2 "$outfile: copied" fi fi fi fi ;; esac fi done } mode='' args='' showme=false verbose=false dir='' optimize='' min_size='' min_zstd_size='' proglist='' use_prog='' ARGS=`pmgetopt --progname=$prog --config=$tmp.usage -- "$@"` [ $? != 0 ] && exit eval set -- "$ARGS" while [ $# -gt 0 ] do case "$1" in -A) # arg(s) for compression program if [ -z "$args" ] then args="$2" else args="$args $2" fi shift ;; -c) # candidate compression program(s) if [ -z "$proglist" ] then proglist="$2" else proglist="$proglist:$2" fi shift ;; -d) # decompress if [ -z "$mode" ] then mode=decompress elif [ "$mode" != decompress ] then echo >&2 "$prog: at most one of -d and -z maybe specified" exit fi ;; -f) # use this compression program use_prog="$2" shift ;; -l) # lower limit on file size for compression min_size="$2" shift ;; -N) # show me, do nothing showme=true ;; -o) # optimize compression (option = size|cpu) case "$2" in size|cpu) optimize="$2" ;; *) echo >&2 "$prog: -o option must be size or cpu" exit ;; esac shift ;; -t) # temp dir if [ -z "$2" ] then echo >&2 "$prog: -t requires a dir argument" exit fi if [ ! -d "$2" ] then echo >&2 "$prog: -t: $2 is not an existing directory" exit fi dir="$2" shift ;; -V) verbose=true ;; -z) # compress (default) if [ -z "$mode" ] then mode=compress elif [ "$mode" != compress ] then echo >&2 "$prog: at most one of -d and -z maybe specified" exit fi ;; -Z) # lower limit on file size for zstd compression min_zstd_size="$2" shift ;; --) shift break ;; -\?) pmgetopt --usage --progname=$prog --config=$tmp.usage status=0 exit ;; esac shift done if [ $# -eq 0 ] then pmgetopt --usage --progname=$prog --config=$tmp.usage exit fi [ -z "$mode" ] && mode=compress # some command line args are ONLY valid for one or other of the $mode # if [ "$mode" = decompress ] then # Disallow: -A -c -f -l -o -Z # if [ -n "$args" ] then echo >&2 "$prog: -A [$args] not allowed when decompressing" exit fi if [ -n "$proglist" ] then echo >&2 "$prog: -c [$proglist] not allowed when decompressing" exit fi if [ -n "$use_prog" ] then echo >&2 "$prog: -f [$use_prog] not allowed when decompressing" exit fi if [ -n "$min_size" ] then echo >&2 "$prog: -l [$min_size] not allowed when decompressing" exit fi if [ -n "$optimize" ] then echo >&2 "$prog: -o [$optimize] not allowed when decompressing" exit fi if [ -n "$min_zstd_size" ] then echo >&2 "$prog: -Z [$min_zstd_size] not allowed when decompressing" exit fi else # Compressing ... # with -o: disallow: -f if [ -n "$optimize" ] then if [ -n "$use_prog" ] then echo >&2 "$prog: -f [$use_prog] not allowed when compressing with -o" exit fi fi fi [ -z "$min_size" ] && min_size=4096 [ -z "$min_zstd_size" ] && min_zstd_size=52428800 [ -z "$proglist" -a -z "$use_prog" ] && proglist='zstd:xz:bzip2:gzip' have_zstd=false have_xz=false have_bzip2=false have_gzip=false if [ -n "$use_prog" ] then # -f so no choice ... # if ! which "$use_prog" >/dev/null 2>&1 then echo >&2 "$prog: cannot find a compression program ($use_prog)" exit fi else # work through the -c [or default] list ... # for try in `echo $proglist | sed -e 's/:/ /g'` do case "$try" in zstd) which zstd >/dev/null 2>&1 && have_zstd=true ;; xz) which xz >/dev/null 2>&1 && have_xz=true ;; bzip2) which bzip2 >/dev/null 2>&1 && have_bzip2=true ;; gzip) which gzip >/dev/null 2>&1 && have_gzip=true ;; *) echo >&2 "Warning: no clue how to deal with \"compression\" program $try" ;; esac done if [ "$have_zstd$have_xz$have_bzip2$have_gzip" = "falsefalsefalsefalse" ] then echo >&2 "$prog: cannot find a compression program (tried $proglist)" exit fi fi while [ $# -gt 0 ] do if [ "$mode" = decompress ] then _decompress "$1" else _compress "$1" fi shift done status=0 exit