#!/usr/bin/env bash # This program is part of Aspersa (http://code.google.com/p/aspersa/) # ######################################################################## # This script uses strace and lsof to watch a process's IO and print out a table # of files and I/O activity. # TODO: make a more complete list of IO functions, such as stat, lstat, unlink, # chmod, chown, ... # # Author: Baron Schwartz # ######################################################################## # ######################################################################## # Usage functions. # ######################################################################## # Print a usage message and exit. usage() { if [ "${OPT_ERR}" ]; then echo "${OPT_ERR}" fi cat <<-USAGE Usage: $0 [OPTIONS] [FILE] $0 does two things: 1) get lsof+strace for -s seconds 2) aggregate the result. If you specify a FILE, then step 1) is not performed. Options: -a FUNCTION The aggregate function (default sum); one of the following: sum) # Each cell will contain the sum of the values in it. avg) # Each cell will contain the average of the values in it. -b BINARY The process name to profile (default mysqld). -c CELL The cell contents (default times); one of the following: count) # The cells contain the count of I/O operations. sizes) # The cells contain the sizes of I/O operations. times) # The cells contain the I/O operation timing. -g GROUPBY The group-by item (default filename); one of the following: all) # Summarize into a single line of output. filename)# One line of output per filename. pid) # One line of output per process ID. -k KEEPFILE A file to hold the raw strace data (default: /tmp/aspersa) The default value prevents the file from being saved; any other value will result in the file being saved for further analysis. -p PID The process ID to profile; overrides -b. -s TIME The number of seconds to profile (default 30). USAGE exit 1 } # Read the 'lsof' and 'strace' from the file, and convert it into lines: # pid function fd_no size timing filename # The arguments are the files to summarize. tabulate_strace() { cat > /tmp/aspersa.awk < function call if ( \$3 == "<..." ) { funcn = \$4; fd = unfinished[pid "," funcn]; if ( fd > 0 ) { filename = filename_for[fd]; if ( filename != "" ) { if ( funcn ~ /open/ ) { size = 0; } else { size_field = NF - 1; size = \$size_field; } timing = \$NF; gsub(/[<>]/, "", timing); print pid, funcn, fd, size, timing, filename; } } } # The beginning of a function call (not resumed). There are basically # two cases here: the whole call is on one line, and it's unfinished # and ends on a later line. else { funcn = substr(\$3, 1, index(\$3, "(") - 1); if ( funcn ~ wanted_pat ) { # Save the file descriptor and name for lookup later. if ( funcn ~ /open/ ) { filename = substr(\$3, index(\$3, "(") + 2); filename = substr(filename, 1, index(filename, "\\"") - 1); if ( "./" == substr(filename, 1, 2) ) { # Translate relative filenames into absolute ones. filename = cwd substr(filename, 2); } fd_field = NF - 1; fd = \$fd_field; filename_for[fd] = filename; } else { fd = substr(\$3, index(\$3, "(") + 1); gsub(/[^0-9].*/, "", fd); } # Save unfinished calls for later if ( \$NF == "...>" ) { unfinished[pid "," funcn] = fd; } # Function calls that are all on one line, not else { filename = filename_for[fd]; if ( filename != "" ) { if ( funcn ~ /open/ ) { size = 0; } else { size_field = NF - 1; size = \$size_field; } timing = \$NF; gsub(/[<>]/, "", timing); print pid, funcn, fd, size, timing, filename; } } } } } } EOF awk -f /tmp/aspersa.awk "$@" } # Takes as input the output from tabulate_strace. Arguments are just a subset # of the overall command-line options, but no validation is needed. The last # command-line option is the filename of the tabulate_strace output. summarize_strace() { # Get command-line options. for o; do case "${o}" in --) break; ;; -a) shift; FUNC="${1}"; shift; ;; -c) shift; CELL="${1}"; shift; ;; -g) shift; GROUPBY="${1}"; shift; ;; esac done cat > /tmp/aspersa.awk < 0 ) { result /= count[funcn "," thing]; } else { result = 0; } } if ( "${GROUPBY}" != "all" ) { output = output sprintf(col_pat, result); } else { printf(col_pat funcn "\\n", result); } } total_result = total_${CELL}; if ( "${FUNC}" == "avg" ) { if ( total_count > 0 ) { total_result /= total_count; } else { total_result = 0; } } printf(col_pat, total_result); if ( "${GROUPBY}" != "all" ) { print(output thing); } else { print "TOTAL"; } } } EOF awk -f /tmp/aspersa.awk "$@" > /tmp/aspersa3 if [ "${GROUPBY}" != "all" ]; then head -n1 /tmp/aspersa3 tail -n +2 /tmp/aspersa3 | sort -rn -k1 else grep TOTAL /tmp/aspersa3 grep -v TOTAL /tmp/aspersa3 | sort -rn -k1 fi } # The main code that runs by default. Arguments are the command-line options. main() { rm -f /tmp/aspersa{,.awk,2,3} # Get command-line options. for o; do case "${o}" in --) shift; break; ;; --help) usage; ;; -a) shift; OPT_a="${1}"; shift; case "${OPT_a}" in sum) ;; avg) ;; *) OPT_ERR="Bad option value"; usage ;; esac ;; -b) shift; OPT_b="${1}"; shift; ;; -c) shift; OPT_c="${1}"; shift; case "${OPT_c}" in count) ;; sizes) ;; times) ;; *) OPT_ERR="Bad option value"; usage ;; esac ;; -g) shift; OPT_g="${1}"; shift; case "${OPT_g}" in all) ;; filename)# ;; pid) ;; *) OPT_ERR="Bad option value"; usage ;; esac ;; -k) shift; OPT_k="${1}"; shift; ;; -p) shift; OPT_p="${1}"; shift; ;; -s) shift; OPT_s="${1}"; shift; ;; -*) OPT_ERR="Unknown option ${o}." usage ;; esac done export OPT_k="${OPT_k:-/tmp/aspersa}"; export OPT_a="${OPT_a:-sum}"; export OPT_b="${OPT_b:-mysqld}"; export OPT_p="${OPT_p:-}"; export OPT_c="${OPT_c:-times}"; export OPT_s="${OPT_s:-30}"; export OPT_g="${OPT_g:-filename}"; if [ $# -eq 0 ]; then # There's no file to analyze, so we'll make one. if which strace > /dev/null 2>&1; then if [ -z "${OPT_p}" ]; then OPT_p=$(pidof -s "${OPT_b}" 2>/dev/null); if [ -z "${OPT_p}" ]; then OPT_p=$(pgrep -o -x "${OPT_b}" 2>/dev/null) fi if [ -z "${OPT_p}" ]; then OPT_p=$(ps -eaf | grep "${OPT_b}" | grep -v grep | awk '{print $2}' | head -n1); fi fi date; if [ "${OPT_p}" ]; then echo "Tracing process ID ${OPT_p}" lsof -n -P -s -p "${OPT_p}" > "${OPT_k}" 2>&1 if [ "$?" -ne "0" ]; then echo "Error: could not execute lsof, error code $?" exit 1 fi strace -T -s 0 -f -p ${OPT_p} >> "${OPT_k}" 2>&1 & if [ "$?" -ne "0" ]; then echo "Error: could not execute strace, error code $?" exit 1 fi tokill=$! #sleep one second then check to make sure the strace is actually running sleep 1 ps -p ${tokill} > /dev/null 2>&1 if [ "$?" -ne "0" ]; then echo "Error: could not find strace process.. possible error message follows" tail "${OPT_k}" exit 1 fi #sleep for interval -1, since we did a one second sleep before checking for the #PID of strace if [ $((${OPT_s}-1)) -gt 0 ]; then sleep $((${OPT_s}-1)) fi kill -s 2 ${tokill} sleep 1 kill -s 15 ${tokill} 2>/dev/null # Sometimes strace leaves threads/processes in T status. kill -s 18 ${OPT_p} # Summarize the output we just generated. tabulate_strace "${OPT_k}" > /tmp/aspersa2 else echo "No such process" exit 1 fi else echo "Error: could not find strace in path" exit 1 fi else # Summarize the files the user passed in. tabulate_strace "$@" > /tmp/aspersa2 fi # Assuming all the inputs are filenames that have been processed above, now # we ignore $@ and re-send the pre-processed command-line options to the # summarize_strace function. summarize_strace -a ${OPT_a} -c ${OPT_c} -g ${OPT_g} /tmp/aspersa2 rm -f /tmp/aspersa{,.awk,2,3} } # Execute the program if it was not included from another file. This makes it # possible to include without executing, and thus test. if [ "$(basename "$0")" = "ioprofile" ] || [ "$(basename "$0")" = "bash" -a "$_" = "$0" ]; then main "$@" fi