#!/usr/bin/env bash # This program is part of Aspersa (http://code.google.com/p/aspersa/) # ######################################################################## # This script reads a file that was generated from /proc/diskstats and # summarizes it, kind of like iostat. The file should look like this: # # # TS # # ... et cetera # TS <-- must end with a TS line. # # Author: Baron Schwartz # ######################################################################## # ######################################################################## # A bunch of snippets of awk code, to be reused in the functions below. # ######################################################################## awk_parse_line=" # The entries in each stat line are as follows: # 1 major # 2 minor # 3 device name dev = \$3; # 4 reads reads = \$4; # 5 reads merged reads_merged = \$5; # 6 read sectors read_sectors = \$6; # 7 ms spent reading ms_spent_reading = \$7; # 8 writes writes = \$8; # 9 writes merged writes_merged = \$9; # 10 written sectors written_sectors = \$10; # 11 ms spent writing ms_spent_writing = \$11; # 12 IOs in progress ios_in_progress = \$12; # 13 ms spent doing io ms_spent_doing_io = \$13; # 14 ms spent, weighted by ios_in_progress ms_weighted = \$14; " # NOTE: this one is necessary in order to get the device listing. NOTE: the # 'devs' variable is initialized to 0, but it is pre-incremented, so a) it will # reflect the accurate number of devices found (after filtering); b) iteration # must be from 1 to devs, not from 0 to devs-1. awk_save_sample_to_first=" # Keep track of the natural order of the devices, so we can print them out # nicely later; and also keep the first-ever line of output. This only # executes the first time through. devices[dev]++; if ( devices[dev] == 1 ) { devsort[++devs] = dev; first[dev \"_reads\"] = reads; first[dev \"_reads_merged\"] = reads_merged; first[dev \"_read_sectors\"] = read_sectors; first[dev \"_ms_spent_reading\"] = ms_spent_reading; first[dev \"_writes\"] = writes; first[dev \"_writes_merged\"] = writes_merged; first[dev \"_written_sectors\"] = written_sectors; first[dev \"_ms_spent_writing\"] = ms_spent_writing; first[dev \"_ios_in_progress\"] = ios_in_progress; first[dev \"_ms_spent_doing_io\"] = ms_spent_doing_io; first[dev \"_ms_weighted\"] = ms_weighted; } " awk_set_iterations_and_timestamp=" iterations++; curr_ts = \$2; if ( iterations == 1 ) { first[\"ts\"] = curr_ts; } " awk_save_sample_to_curr=" curr[dev \"_reads\"] = reads; curr[dev \"_reads_merged\"] = reads_merged; curr[dev \"_read_sectors\"] = read_sectors; curr[dev \"_ms_spent_reading\"] = ms_spent_reading; curr[dev \"_writes\"] = writes; curr[dev \"_writes_merged\"] = writes_merged; curr[dev \"_written_sectors\"] = written_sectors; curr[dev \"_ms_spent_writing\"] = ms_spent_writing; curr[dev \"_ios_in_progress\"] = ios_in_progress; curr[dev \"_ms_spent_doing_io\"] = ms_spent_doing_io; curr[dev \"_ms_weighted\"] = ms_weighted; " awk_save_curr_as_prev=" curr[\"ts\"] = curr_ts; for (i in curr) { prev[i] = curr[i]; } for ( i = 1; i <= devs; i++ ) { dev = devsort[i]; prev[dev \"_sum_ios_in_progress\"] += curr[dev \"_ios_in_progress\"]; } ts = curr_ts; " awk_find_max_device_name_length=" mdev = 6; for ( i = 1; i <= devs; i++ ) { dlen = length(devsort[i]); if ( dlen > mdev ) { mdev = dlen; } } " awk_get_overall_increments=" # Get incremental numbers. reads = curr[dev \"_reads\"] - first[dev \"_reads\"]; reads_merged = curr[dev \"_reads_merged\"] - first[dev \"_reads_merged\"]; read_sectors = curr[dev \"_read_sectors\"] - first[dev \"_read_sectors\"]; ms_spent_reading = curr[dev \"_ms_spent_reading\"] - first[dev \"_ms_spent_reading\"]; writes = curr[dev \"_writes\"] - first[dev \"_writes\"]; writes_merged = curr[dev \"_writes_merged\"] - first[dev \"_writes_merged\"]; written_sectors = curr[dev \"_written_sectors\"] - first[dev \"_written_sectors\"]; ms_spent_writing = curr[dev \"_ms_spent_writing\"] - first[dev \"_ms_spent_writing\"]; ms_spent_doing_io = curr[dev \"_ms_spent_doing_io\"] - first[dev \"_ms_spent_doing_io\"]; ms_weighted = curr[dev \"_ms_weighted\"] - first[dev \"_ms_weighted\"]; in_progress = curr[dev \"_ios_in_progress\"]; tot_in_progress = prev[dev \"_sum_ios_in_progress\"]; " awk_compute_incremental_stats=" # Get incremental numbers. reads = curr[dev \"_reads\"] - prev[dev \"_reads\"]; reads_merged = curr[dev \"_reads_merged\"] - prev[dev \"_reads_merged\"]; read_sectors = curr[dev \"_read_sectors\"] - prev[dev \"_read_sectors\"]; ms_spent_reading = curr[dev \"_ms_spent_reading\"] - prev[dev \"_ms_spent_reading\"]; writes = curr[dev \"_writes\"] - prev[dev \"_writes\"]; writes_merged = curr[dev \"_writes_merged\"] - prev[dev \"_writes_merged\"]; written_sectors = curr[dev \"_written_sectors\"] - prev[dev \"_written_sectors\"]; ms_spent_writing = curr[dev \"_ms_spent_writing\"] - prev[dev \"_ms_spent_writing\"]; ms_spent_doing_io = curr[dev \"_ms_spent_doing_io\"] - prev[dev \"_ms_spent_doing_io\"]; ms_weighted = curr[dev \"_ms_weighted\"] - prev[dev \"_ms_weighted\"]; in_progress = curr[dev \"_ios_in_progress\"]; tot_in_progress = curr[dev \"_sum_ios_in_progress\"]; " awk_reset_accumulators=" t_reads = 0; t_reads_merged = 0; t_read_sectors = 0; t_ms_spent_reading = 0; t_writes = 0; t_writes_merged = 0; t_written_sectors = 0; t_ms_spent_writing = 0; t_ms_spent_doing_io = 0; t_ms_weighted = 0; t_in_progress = 0; " awk_copy_variables_to_accumulators=" t_reads = reads; t_reads_merged = reads_merged; t_read_sectors = read_sectors; t_ms_spent_reading = ms_spent_reading; t_writes = writes; t_writes_merged = writes_merged; t_written_sectors = written_sectors; t_ms_spent_writing = ms_spent_writing; t_ms_spent_doing_io = ms_spent_doing_io; t_ms_weighted = ms_weighted; " awk_compute_read_write_stats=" # Compute the per-second stats for reads, writes, and overall. reads_sec = t_reads / elapsed; read_requests = t_reads_merged + t_reads; mbytes_read_sec = t_read_sectors / elapsed / 2048; read_conc = t_ms_spent_reading / elapsed / 1000 / devs_in_group; if ( t_reads > 0 ) { read_rtime = t_ms_spent_reading / t_reads; avg_read_sz = t_read_sectors / t_reads; } else { read_rtime = 0; avg_read_sz = 0; } if ( read_requests > 0 ) { read_merge_pct = 100 * t_reads_merged / read_requests; } else { read_merge_pct = 0; } writes_sec = t_writes / elapsed; write_requests = t_writes_merged + t_writes; mbytes_written_sec = t_written_sectors / elapsed / 2048; write_conc = t_ms_spent_writing / elapsed / 1000 / devs_in_group; if ( t_writes > 0 ) { write_rtime = t_ms_spent_writing / t_writes; avg_write_sz = t_written_sectors / t_writes; } else { write_rtime = 0; avg_write_sz = 0; } if ( write_requests > 0 ) { write_merge_pct = 100 * t_writes_merged / write_requests; } else { write_merge_pct = 0; } # Compute the numbers for reads and writes together, the things for # which we do not have separate statistics. # Busy is what iostat calls %util. This is the percent of # wall-clock time during which the device has I/O happening. busy = 100 * t_ms_spent_doing_io / (1000 * elapsed * devs_in_group); if ( first[\"ts\"] > 0 ) { line_ts = sprintf(\"%5.1f\", curr_ts - first[\"ts\"]); } else { line_ts = sprintf(\"%5.1f\", 0); } " # Returns true if the column should be displayed. col_ok() { result=$(echo $1 | awk "/${OPT_c:-.}/{print 0}") return ${result:-1} } # Based on which columns match $OPT_c, designs a header and line printf format, # and a printf statement to print the lines. design_print_formats() { # For each device, print out the following: The timestamp offset and # device name. Must embed the mdev Awk variable here, because the device # name is variable-length. fmt="\"%5s %-\" mdev \"s"; hdr="${fmt}"; vars=""; # The per-second reads, read size (kB), per-second MB read, read merged pct, read # concurrency, and average response time for each read. if col_ok rd_s ; then fmt="${fmt} %7.1f"; hdr="${hdr} rd_s"; vars="${vars}, reads_sec"; fi if col_ok rd_avkb ; then fmt="${fmt} %7.1f"; hdr="${hdr} rd_avkb"; vars="${vars}, avg_read_sz"; fi if col_ok rd_mb_s ; then fmt="${fmt} %7.1f"; hdr="${hdr} rd_mb_s"; vars="${vars}, mbytes_read_sec"; fi if col_ok rd_mrg ; then fmt="${fmt} %5.0f%%"; hdr="${hdr} rd_mrg"; vars="${vars}, read_merge_pct"; fi if col_ok rd_cnc ; then fmt="${fmt} %6.1f"; hdr="${hdr} rd_cnc"; vars="${vars}, read_conc"; fi if col_ok rd_rt ; then fmt="${fmt} %7.1f"; hdr="${hdr} rd_rt"; vars="${vars}, read_rtime"; fi # The same for writes. if col_ok wr_s ; then fmt="${fmt} %7.1f"; hdr="${hdr} wr_s"; vars="${vars}, writes_sec"; fi if col_ok wr_avkb ; then fmt="${fmt} %7.1f"; hdr="${hdr} wr_avkb"; vars="${vars}, avg_write_sz"; fi if col_ok wr_mb_s ; then fmt="${fmt} %7.1f"; hdr="${hdr} wr_mb_s"; vars="${vars}, mbytes_written_sec"; fi if col_ok wr_mrg ; then fmt="${fmt} %5.0f%%"; hdr="${hdr} wr_mrg"; vars="${vars}, write_merge_pct"; fi if col_ok wr_cnc ; then fmt="${fmt} %6.1f"; hdr="${hdr} wr_cnc"; vars="${vars}, write_conc"; fi if col_ok wr_rt ; then fmt="${fmt} %7.1f"; hdr="${hdr} wr_rt"; vars="${vars}, write_rtime"; fi # Then busy%, in-progress, and line-ending. if col_ok busy ; then fmt="${fmt} %3.0f%%"; hdr="${hdr} busy"; vars="${vars}, busy"; fi if col_ok in_prg ; then fmt="${fmt} %6d"; hdr="${hdr} in_prg"; vars="${vars}, t_in_progress"; fi fmt="${fmt}\n\""; hdr="${hdr}\n\""; awk_print_header="printf(${hdr}, \"#ts\", \"device\");"; awk_print_line="printf(${fmt}, line_ts, dev${vars});"; } # Prints out one line for each disk, summing over the interval from first to # last sample. group_by_disk () { [ -z "${awk_print_line}" ] && design_print_formats cat > /tmp/aspersa.awk < 1 { ${awk_set_iterations_and_timestamp} } END { if ( iterations < 2 ) { exit; } ${awk_find_max_device_name_length} ${awk_print_header} elapsed = curr_ts - first["ts"]; for ( i = 1; i <= devs; i++ ) { dev = devsort[i]; ${awk_get_overall_increments} ${awk_copy_variables_to_accumulators} # The in-progress operations needs to be averaged. t_in_progress = (tot_in_progress / (iterations - 1)); devs_in_group = 1; ${awk_compute_read_write_stats} line_ts="{" (iterations - 1) "}"; ${awk_print_line} } } EOF awk -f /tmp/aspersa.awk "$@" } # Prints out one line for each sample, summing up all disks together. group_by_sample() { [ -z "${awk_print_line}" ] && design_print_formats cat > /tmp/aspersa.awk < 1 { ${awk_set_iterations_and_timestamp} printed_a_line = 0; if ( iterations == 1 ) { # The second time we see a timestamp we are ready to print a header. mdev = 6; if ( devs == 1 ) { ${awk_find_max_device_name_length} } ${awk_print_header} } elapsed = curr_ts - ts; if ( ts > 0 && elapsed > ${OPT_i:-0} ) { # Reset the t_ variables to zero. ${awk_reset_accumulators} for ( i = 1; i <= devs; i++ ) { dev = devsort[i]; # Save the incrementals into named variables. ${awk_compute_incremental_stats} # Add the increments to the accumulators. t_reads += reads; t_reads_merged += reads_merged; t_read_sectors += read_sectors; t_ms_spent_reading += ms_spent_reading; t_writes += writes; t_writes_merged += writes_merged; t_written_sectors += written_sectors; t_ms_spent_writing += ms_spent_writing; t_ms_spent_doing_io += ms_spent_doing_io; t_ms_weighted += ms_weighted; t_in_progress += in_progress; } devs_in_group = devs; ${awk_compute_read_write_stats} if ( devs > 1 ) { dev = "{" devs "}"; } else { dev = devsort[1]; } ${awk_print_line} printed_a_line = 1; } if ( iterations == 1 || printed_a_line == 1 ) { # We don't save "curr" as "prev" on every sample we see, because if the # interval of printing is more than one sample, we want "prev" to be # the first sample in the interval, not the previous sample seen. ${awk_save_curr_as_prev} } } EOF awk -f /tmp/aspersa.awk "$@" } # Prints out one line for each sample, for each disk that matches the pattern. # TODO: omits the first sample. group_by_all () { [ -z "${awk_print_line}" ] && design_print_formats cat > /tmp/aspersa.awk < 1 { ${awk_set_iterations_and_timestamp} ${awk_find_max_device_name_length} if ( iterations > 1 ) { if ( devs > 1 || iterations == 2 ) { ${awk_print_header} } ${awk_reset_accumulators} elapsed = curr_ts - prev["ts"]; for ( i = 1; i <= devs; i++ ) { dev = devsort[i]; ${awk_compute_incremental_stats} ${awk_copy_variables_to_accumulators} t_in_progress = curr[dev "_ios_in_progress"]; devs_in_group = 1; ${awk_compute_read_write_stats} ${awk_print_line} } } ${awk_save_curr_as_prev} } EOF awk -f /tmp/aspersa.awk "$@" } # Show current help and settings print_help() { cat <<-HELP You can control this program by key presses: ------------------- Key ------------------- ---- Current Setting ---- A, D, S) Set the group-by mode ${OPT_g:-(none)} c) Enter an awk regex to match column names ${OPT_c:-(none)} d) Enter an awk regex to match disk names ${OPT_d:-(none)} i) Set the sample size in seconds ${OPT_i:-(none)} s) Set the redisplay interval in seconds ${OPT_s:-(none)} p) Pause the program q) Quit the program ------------------- Press any key to continue ----------------------- HELP } usage() { if [ "${OPT_ERR}" ]; then echo "${OPT_ERR}" fi cat <<-USAGE Usage: $0 [OPTIONS] [FILE] $0 does two things: 1) get /proc/diskstats periodically 2) aggregate the result. If you specify a FILE, or send input to STDIN, then step 1) is not performed. Options: -c COLS Awk regex of which columns to include (default cnc|rt|mb|busy|prg). -d DEVICES Awk regex of which devices to include. -g GROUPBY Group-by mode (default disk); specify one of the following: disk) # Each line of output shows one disk device. sample) # Each line of output shows one sample of statistics. all) # Each line of output shows one sample and one disk device. -i INTERVAL In -g sample mode, include INTERVAL seconds per sample. -k KEEPFILE File to save diskstats samples in (default /tmp/aspersa). If a non-default filename is used, it will be saved for later analysis. -n SAMPLES When in interactive mode, stop after N samples. -s INTERVAL Sample /proc/diskstats every N seconds (default 1). USAGE exit 1 } # The main code that runs by default. Arguments are the command-line options. main() { # Get command-line options. for o; do case "${o}" in --) shift; break; ;; --help) usage; ;; -c) shift; OPT_c="${1}"; shift; ;; -d) shift; OPT_d="${1}"; shift; ;; -g) shift; OPT_g="${1}"; shift; case "${OPT_g}" in disk) ;; sample) ;; all) ;; *) OPT_ERR="Bad option value"; usage ;; esac ;; -i) shift; OPT_i="${1}"; shift; ;; -k) shift; OPT_k="${1}"; shift; ;; -n) shift; OPT_n="${1}"; shift; ;; -s) shift; OPT_s="${1}"; shift; ;; -*) OPT_ERR="Unknown option ${o}." usage ;; esac done OPT_i="${OPT_i:-}"; export OPT_i; OPT_k="${OPT_k:-/tmp/aspersa}"; export OPT_k; OPT_n="${OPT_n:-}"; export OPT_n; OPT_c="${OPT_c:-cnc|rt|mb|busy|prg}"; export OPT_c; OPT_d="${OPT_d:-}"; export OPT_d; OPT_s="${OPT_s:-1}"; export OPT_s; OPT_g="${OPT_g:-disk}"; export OPT_g; # We need to "do the right thing." The user might invoke any of several # ways; we get samples every now and then unless there is data on STDIN or a # file to read. if [ $# -gt 0 -o -p 1 ]; then READ_FILE=1 fi # If we are interactive and there's no file, we gather stats to play with. if [ -z "${READ_FILE}" ]; then PARENT=$$ loops=1 while true; do cat /proc/diskstats >> "${OPT_k}" date +"TS %s.%N %F %T" >> "${OPT_k}" if ! ps -p ${PARENT} >/dev/null 2>&1 ; then # The parent process doesn't exist anymore -- quit. finished="yes" elif [ "${OPT_n}" ]; then if [ "${loops}" -gt "${OPT_n}" ] ; then finished="yes" fi fi if [ "${finished}" ]; then if [ "${OPT_k}" = "/tmp/aspersa" ]; then rm -f /tmp/aspersa fi break; fi sleep ${OPT_s} loops=$(($loops + 1)) done & # Sleep until the loop has gathered 2 samples. while [ "$(grep -c TS "${OPT_k}")" -lt "2" ]; do sleep .5 done fi if [ -z "${READ_FILE}" ]; then group_by_${OPT_g} "${OPT_k}" else group_by_${OPT_g} "$@" fi # Don't be "interactive" unless the user actually has control. if [ ! -t 0 -o ! -t 1 ]; then exit; fi # We use this in iterative-loop mode if [ -z "${READ_FILE}" ]; then TAIL_LINES=$(cat /proc/diskstats | wc -l) fi while [ -z "${OPT_n}" -o "${i:-0}" -le "${OPT_n:-0}" ]; do i=$(( ${i:-1} + 1 )) # Re-decide the timeout every loop if [ -z "${READ_FILE}" ]; then TIMEOUT="-t ${OPT_s}" fi cmd="" # Must reset, some bash won't clear it after a read times out. read $TIMEOUT -n 1 -s cmd junk case "${cmd}" in A) OPT_g="all" FIRST_LOOP="1" ;; d) read -p "Enter a disk/device pattern: " OPT_d FIRST_LOOP="1" ;; D) OPT_g="disk" FIRST_LOOP="1" ;; c) read -p "Enter a column pattern: " OPT_c FIRST_LOOP="1" awk_print_line="" # Make it re-compute the column headers ;; i) read -p "Enter a sample size: " OPT_i FIRST_LOOP="1" ;; p) read -n 1 -p "Paused - press any key to continue" ;; q) break ;; s) read -p "Enter a redisplay interval: " OPT_s FIRST_LOOP="1" ;; S) OPT_g="sample" FIRST_LOOP="1" ;; '?') print_help; read -n1 -s ;; esac if [ -z "${READ_FILE}" ]; then if [ -z "${FIRST_LOOP}" ]; then # We only print out what's new since last printout N=$(($TAIL_LINES * 2 + 2)) # Extra is for TS lines tail -n $N "${OPT_k}" 2>/dev/null | group_by_${OPT_g} | tail -n +2 else group_by_${OPT_g} "${OPT_k}" fi FIRST_LOOP="" else group_by_${OPT_g} "$@" fi done if [ "${OPT_k}" = "/tmp/aspersa" ]; then rm -f "/tmp/aspersa" fi rm -f /tmp/aspersa.awk } # Execute the program if it was not included from another file. This makes it # possible to include without executing, and thus test. if [ "$(basename "$0")" = "diskstats" ] || [ "$(basename "$0")" = "bash" -a "$_" = "$0" ]; then main "$@" fi