#!/usr/bin/env bash # This program is part of Aspersa (http://code.google.com/p/aspersa/) # ######################################################################## # A tool to do Universal Scalability Law modeling, based on Neil Gunther's # book Guerrilla Capacity Planning. # # Authors: # Baron Schwartz # ######################################################################## # TODO: need to make it optionally make logarithmic X axis graph. Also, apply # -i and -n and so on in the main body, not in the converter itself, so that I # can convert a file and then manipulate it separately. # TODO: I want it to entirely skip samples that have too-large concurrency, as # defined by -m. I don't want it to just average the concurrency across the # other samples; it will introduce skew into the throughput for that sample, # too. # Print a usage message and exit. usage() { if [ "${OPT_ERR}" ]; then echo "${OPT_ERR}" fi cat <<-EOF Usage: $0 [OPTIONS] FILE [FILE...] $0 performs Universal Scalability Law modelling. It expects as input a file with columns N (load; concurrency; independentvar) and C (throughput). The file may contain comment lines beginning with the # character. The tool outputs .png images with the deviation, efficiency, residuals, and model-vs-actual data. It also prints out information from the process of fitting the curve to the data, such as gnuplot's error estimates. Options: -a Set X-axis label to 'node count' (default is concurrency). -c CONVERSION Converts the input file into N-vs-C format as specified: globalstatus Convert from MySQL's SHOW GLOBAL STATUS tcpdump Convert from 'tcpdump -tttt -nnq' format -d Don't delete the gnuplot files used to generate the charts. -e Draw error lines on the final plot. -i INTERVAL When -c is given, group the input into -i second intervals. -k KEEPFILE Save the N-vs-C data in the specified file. -l LIMIT X-axis limit for the final plot. -L COLOR The color for plotting points (default lt rgb "#8B0000"). -m THREADS When -c is given, max valid value of N, to filter outliers. -n ADJUSTMENT Adjust the N variable downwards to compensate for known errors such as error of observation. -o ONLY Only produce plots specified in this comma-separated list. -p PREFIX Prefix for the generated image file names. -P PORT TCP port for when -c tcpdump is used (default 3306). -r Render pdf and eps plots in color. -R Don't re-fit the data; use the results from quadratic fit. -t FILETYPE Type for the image files (png, pdf, eps). -T POINTTYPE Point-type and options for the plots (default 6). -x ADJUSTMENT Multiply the C(1) regression parameter by this factor. -X Include C(1) as a fit parameter for USL regression. EOF exit 1 } # Converts SHOW GLOBAL STATUS into concurrency-vs-throughput. There are # basically three things we are interested in from SHOW GLOBAL STATUS. # Questions 118357171 # Threads_running 8 # Uptime 614909 # # Command-line options (not optional) # -i The time interval over which to aggregate. # -n The number of slaves connected to the server, and # thus running Binlog Dump commands. # -m The max number of threads possible (to filter outliers). # # XXX In the future, when we see Uptime, we need to check whether it is greater # than what we saw the first time. If we see it decrease or stay the same, then # this output probably came from "mysqladmin ext -ri" For now we assume not. convert_globalstatus() { # Get the -n command-line option. for o; do case "${o}" in --) break; ;; -i) shift; INTERVAL="${1}"; shift; ;; -m) shift; THREADS_MAX="${1}"; shift; ;; -n) shift; NUM_SLAVES="${1}"; shift; ;; esac done cat > /tmp/aspersa <<-EOF BEGIN { threads = 0; threads_sum = 0; threads_max = ${THREADS_MAX}; questions = 0; start_questions = 0; samples = 0; skipped = 0; start = 0; } /Threads_running/ { # There will always be at least 1 thread running, the one doing SHOW # STATUS. And if there are slaves doing Binlog Dump, they really do # not count as concurrency, so we remove them too. threads = \$2 - 1 - ${NUM_SLAVES}; if ( threads_max > 0 && threads > threads_max ) { # We do not count these. Later we'll adjust the denominator of the # average thread count accordingly. skipped++; } else { threads_sum += threads; } } /Questions/ { questions = \$2; if ( start_questions == 0 ) { # Initial condition, runs only once. start_questions = questions; } } /Uptime/ { end = \$2; if ( start == 0 ) { # Initial condition, runs only once. start = end; } # This is where the main work takes place. We compute the concurrency # over the interval as the average of Threads_running. elapsed = end - start; if ( elapsed > 0 && elapsed >= ${INTERVAL} && samples > skipped ) { concurrency = threads_sum / (samples - skipped + 1); throughput = (questions - start_questions) / elapsed; printf "%f %f\\n", concurrency, throughput; start_questions = questions; start = end; samples = 0; skipped = 0; threads_sum = threads; if ( threads_max > 0 && threads_sum > threads_max ) { threads_sum = threads_max; } } samples++; } EOF awk -f /tmp/aspersa "$@" } # Converts tcpdump into concurrency-vs-throughput. Use a tcpdump command line # such as the following: # sudo tcpdump -s 384 -i any -nnq -tttt -c 10 'tcp port 3306 and (((ip[2:2] - ((ip[0]&0xf)<<2)) - ((tcp[12]&0xf0)>>2)) != 0)' # # Command-line options (not optional) # -P The TCP port that the server is listening on. convert_tcpdump() { # For ease of testing, this process is split into two parts, and this # function is just a wrapper around helpers that do the main work. convert_tcpdump_tabulate "$@" | convert_tcpdump_tabulated_to_1sec } # Accepts the same input as convert_tcpdump(). Output fields: # 1 - date # 2 - time # 3 - timestamp as a number (NOTE: doesn't handle wrapping past midnight) # 4 - client IP address and port number # 5 - response time # 6 - number of requests still pending / in progress # 7 - total time spent serving requests (increases while #6 is nonzero) # 8 - weighted time spent serving requests (incremented by #6 * elapsed) convert_tcpdump_tabulate() { # Get the command-line options. for o; do case "${o}" in --) break; ;; -P) shift; PORT="${1}"; shift; ;; esac done cat > /tmp/aspersa-convert_tcpdump_tabulate <<-EOF BEGIN { watch_port = ${PORT}; from_pat = "[.]" watch_port "$"; # Matches IP.port combination to_pat = "[.]" watch_port ":$"; pending = 0; # The number of requests that haven't been replied yet. # Other global variables: # current # The queries that have been sent to the server. # last_ts # Used for computing weighted time spent doing queries. # busy # Like Field 10 in /proc/diskstats: increases if pending > 0. # weighted # The time spent doing IO, like Field 11 in /proc/diskstats. } # Ignore any zero-length, it's usually just an ack or something. # ts = \$1 \$2 # from = \$4 # to = \$6 \$NF > 0 { ts = (3600 * substr(\$2, 1, 2)) + (60 * substr(\$2, 4, 2)) + substr(\$2, 7); # Packets from the client. if ( \$6 ~ to_pat ) { client = \$4; if ( !current[client] ) { if ( ${MKDEBUG:-0} > 0 ) { printf "MKDEBUG: new request from '%s' at line %d\\n", client, NR; } current[client] = ts; if ( last_ts ) { elapsed = ts - last_ts; if ( ${MKDEBUG:-0} > 0 ) { printf "MKDEBUG: weighted (%.6f) += %.6f\\n", weighted, elapsed; } weighted += pending * elapsed; if ( pending > 0 ) { if ( ${MKDEBUG:-0} > 0 ) { printf "MKDEBUG: busy (%.6f) += %.6f\\n", busy, elapsed; } busy += (ts - last_ts); } } last_ts = ts; pending++; } else if ( ${MKDEBUG:-0} > 0 ) { print "MKDEBUG: existing request for client"; } } # Packets from the server we're watching to the client. else if ( \$4 ~ from_pat ) { client = substr(\$6, 1, length(\$6) - 1); if ( current[client] ) { if ( ${MKDEBUG:-0} > 0 ) { printf "MKDEBUG: reply to '%s' at line %d\\n", client, NR; } rt = ts - current[client]; weighted += pending * (ts - last_ts); busy += (ts - last_ts); last_ts = ts; pending--; delete current[client]; printf("%s %s %12.6f %-21s %10.6f %3d %10.6f %10.6f\\n", \$1, \$2, ts, client, rt, pending, busy, weighted); } else if ( ${MKDEBUG:-0} > 0 ) { printf "MKDEBUG: reply to '%s' at line %d (DNE)\\n", client, NR; } } } END { if ( ${MKDEBUG:-0} > 0 ) { printf "MKDEBUG: %d sessions currently open\\n", pending; for (c in current) { if ( current[c] ) { printf "MKDEBUG: client '%s' started %.6f\\n", c, current[c]; } } } } EOF awk -f /tmp/aspersa-convert_tcpdump_tabulate "$@" } # Takes in the output of convert_tcpdump_tabulate() and outputs one line per # second, showing the concurrency and throughput for that second. convert_tcpdump_tabulated_to_1sec() { cat > /tmp/aspersa-convert_tcpdump_tabulated_to_1sec <<-EOF { if ( !ts ) { # Initial condition ts = \$3; # timestamp ct = 0; # count of queries bt = \$7; # busy time wt = \$8; # weighted busy time } if ( \$3 >= ts + 1 ) { concurrency = (\$8 - wt) / (\$7 - bt); throughput = ct / (\$7 - bt); printf "%.6f %.6f %s %s\\n", concurrency, throughput, \$1, \$2; ts = \$3; # timestamp ct = 0; # count of queries bt = \$7; # busy time wt = \$8; # weighted busy time } else { ct++; } } EOF awk -f /tmp/aspersa-convert_tcpdump_tabulated_to_1sec "$@" } # To find the deviation from linearity, we have to find the C(N) for N=1 (or if # 1 is not available, then we interpolate, which may require human judgment, as # in Gunther p.94). We take an average. Input columns must be N and C # (throughput). The command-line parameter is the N value we wish to look for. find_C_of_one() { cat > /tmp/aspersa <<-EOF /^[^#]/ { if ( \$1 == $1 ) { count++; sum += \$2; } } END { if ( count > 0 ) { print (sum/count) / $1; } else { print 0; } } EOF awk -f /tmp/aspersa "$2" } # The main code that runs by default. Arguments are the command-line options. main() { echo "# Command-line: $0 $@" # Get command-line options. for o; do case "${o}" in --) break; ;; -a) shift; X_AXIS="node count"; ;; -c) shift; CONV="${1}"; shift; ;; -d) DEL="0"; shift; ;; -e) shift; ERRL="1"; ;; -i) shift; INT="${1}"; shift; ;; -k) shift; KEEP="${1}"; shift; ;; -l) shift; LIM="${1}"; shift; ;; -L) shift; LTY="${1}"; shift; ;; -m) shift; MXT="${1}"; shift; ;; -n) shift; ADJ="${1}"; shift; ;; -o) shift; ONLY="${1}"; shift; ;; -p) shift; PRE="${1}"; shift; ;; -P) shift; PORT="${1}"; shift; ;; -r) shift; COLOR="color"; ;; -R) shift; REFIT=0; ;; -t) shift; TYP="${1}"; shift; ;; -T) shift; PTY="${1}"; shift; ;; -x) shift; XAD="${1}"; shift; ;; -X) shift; FITC1=", C1"; ;; -*) OPT_ERR="Unknown option '${o}'." usage 1 ;; esac done ERRL=${ERRL:-0} ADJ=${ADJ:-0} DEL=${DEL:-1} INT=${INT:-0} MXT=${MXT:-0} TYP=${TYP:-png} XAD=${XAD:-1} FILE="${KEEP:-/tmp/aspersa-N-C}" EXT="${TYP}" LIM="${LIM:-0}" X_AXIS="${X_AXIS:-concurrency}" PORT="${PORT:-3306}" PTY="${PTY:-6}" REFIT="${REFIT:-1}" FITC1="${FITC1:-}" if [ "${COLOR}" ]; then LTY="${LTY:-lt rgb \"#8B0000\"}" BLUE=" lt rgb \"#0000FF\""; fi # ###################################################################### # Set up the gnuplot instructions for filetype. # ###################################################################### case "${TYP}" in png) GNUPLOT_TERM="set terminal ${TYP}" GNUPLOT_SIZE="" GNUPLOT_SIGMA='sigma' GNUPLOT_KAPPA='kappa' ;; eps|pdf) # We make PDFs as EPSs and then convert them. GNUPLOT_TERM="set terminal postscript eps enhanced ${COLOR} solid" GNUPLOT_SIZE="set size 0.6, 0.6" GNUPLOT_SIGMA='{/Symbol s}' GNUPLOT_KAPPA='{/Symbol k}' if [ "${TYP}" = "pdf" ]; then EXT="eps" fi ;; *) echo "Unknown -t value ${TYP}" usage exit 1 ;; esac GNUPLOT_LABEL="set xlabel 'N (${X_AXIS})'; set ylabel 'C (throughput)'" # ###################################################################### # Convert the input file if needed. # ###################################################################### if [ "${CONV}" ]; then case "${CONV}" in globalstatus) convert_globalstatus -i ${INT} -m ${MXT} -n ${ADJ} "$@" \ > "${FILE}" ;; tcpdump) convert_tcpdump -P $PORT "$@" > "${FILE}" ;; *) echo "Unknown -c value ${CONV}" usage exit 1 esac else cat "$@" > "${FILE}" fi # ###################################################################### # From here on, we have an input file with N in the first column, # and C in the second column. # ###################################################################### # We need to find some data points such as C(1) for subsequent operations. min_N=$(awk 'BEGIN{min=999999}/^[^#]/{if($10){min=$1}}END{print min}' \ "${FILE}"); max_N=$(awk '/^[^#]/{if($1>max){max=$1}}END{print max}' "${FILE}"); max_C=$(awk '/^[^#]/{if($2>max){max=$2}}END{print max}' "${FILE}"); # Find C(1) if it exists, else use C(min(N)). N_one=$(awk '{if($1 == 1) {print 1; exit}}' "${FILE}"); if [ "$N_one" = "1" ]; then C_of_one="$(find_C_of_one 1 "${FILE}")" else C_of_one="$(find_C_of_one ${min_N} "${FILE}")" fi echo "# Using $(gnuplot -V)" echo "# Parameters to the model:" echo "min(N) ${min_N}" echo "max(N) ${max_N}" echo "max(C) ${max_C}" echo "C(1) ${C_of_one} (pre-adjustment by ${XAD})" echo "N=1 ??? ${N_one:-no}" # ###################################################################### # Use gnuplot to 1) find the scalability law parameters and 2) plot the # original data with the model. We have to go about the fitting in a funny # way, because gnuplot can get stuck at a local maximum with the fitting if # we don't give it good starting parameters. So we use the quadratic # equation to figure out the starting parameters, and then refit with the USL # equation for the final result. This results in a better fit. # ###################################################################### # Sets up parameters. We'll always load this file. if [ "${XAD}" != "1" ]; then echo "# Adjusting C(1) by ${XAD}" C_of_one="$(echo ${C_of_one} | awk "{print \$1 * ${XAD}}")"; fi cat > /tmp/aspersa-gnuplot0 <<-EOF ${GNUPLOT_TERM} ${GNUPLOT_SIZE} ${GNUPLOT_LABEL} C1 = ${C_of_one} max_N = ${max_N} max_C = ${max_C} max(a,b) = (a > b) ? a : b min(a,b) = (a > b) ? b : a f(x) = a*x*x + b*x g(x) = x*C1/(1+sigma*(x-1) + kappa*x*(x-1)) EOF # Plot the efficiency relative to C(1). cat > /tmp/aspersa-gnuplot1 <<-EOF set output "${PRE}usl-efficiency.${EXT}" set ylabel "Relative Efficiency" plot 1 title 'Unity', \\ "${FILE}" using 1:(\$2/C1/\$1) pt ${PTY}${BLUE} title 'Computed Efficiency' EOF gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1 # Fit the deviation from linearity relative to C(1). echo "# Fitting the transformed data against a 2nd-degree polynomial." cat > /tmp/aspersa-gnuplot1 <<-EOF set fit logfile "/dev/null" # defines a_err and b_err set fit errorvariables fit f(x) '${FILE}' using (\$1-1):(\$1/(\$2/C1)-1) via a, b EOF gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1 2> /tmp/aspersa-qfit.log # If everything went OK, then the fit should have converged. if ! grep 'the fit converged' /tmp/aspersa-qfit.log >/dev/null ; then echo "The quadratic regression failed, check /tmp/aspersa-qfit.log" exit 1; fi # Now we can check the log and extract the parameters from it. sed -n -e '/the fit converged/,/^b /p' /tmp/aspersa-qfit.log > /tmp/aspersa.params PARAM_A="$(awk '/^a / { print $3 }' /tmp/aspersa.params)" PARAM_A_ERR="$(awk '/^a / { print $5 }' /tmp/aspersa.params)" PARAM_A_PCT="$(awk '/^a / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)" PARAM_B="$(awk '/^b / { print $3 }' /tmp/aspersa.params)" PARAM_B_ERR="$(awk '/^b / { print $5 }' /tmp/aspersa.params)" PARAM_B_PCT="$(awk '/^b / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)" echo " a ${PARAM_A} +/- ${PARAM_A_ERR} (${PARAM_A_PCT}%)" echo " b ${PARAM_B} +/- ${PARAM_B_ERR} (${PARAM_B_PCT}%)" # Now find the coefficient of determination, R^2. Although gnuplot # documentation says it is not the best metric, it's the one everyone is used # to, and it needs to be on the graph. FYI, gnuplot prints the # sum_of_squared_errors as the "final sum of squares of residuals." cat > /tmp/aspersa <<-EOF /^[^#]/{ x = \$1 - 1; y = \$1 / (\$2/${C_of_one}) -1; f = ${PARAM_A}*x*x + ${PARAM_B}*x; sum_of_squares += y * y; sum_of_squared_errors += (f - y) * (f - y); } END { print 1 - (sum_of_squared_errors / sum_of_squares); } EOF R_SQUARED="$(awk -f /tmp/aspersa "${FILE}")" echo " R^2 ${R_SQUARED}" cat > /tmp/aspersa-gnuplot1 <<-EOF set output "${PRE}usl-deviation.${EXT}" set ylabel "Deviation From Linearity" set label "R^2 = ${R_SQUARED}" at graph .1, .5 set label "b = ${PARAM_B}" at graph .1, .57 set label "a = ${PARAM_A}" at graph .1, .64 set key left a = ${PARAM_A} b = ${PARAM_B} plot f(x) title 'Modeled' w lines${BLUE}, \\ "${FILE}" using (\$1-1):(\$1/(\$2/C1)-1) pt ${PTY} ${LTY} title 'Measured' # '' using (\$1-1):(\$1/(\$2/C1)):1 w labels EOF gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1 # Plot the residual errors to look for a pattern in them. cat > /tmp/aspersa-gnuplot1 <<-EOF set key off set ylabel "Residual Errors" set output "${PRE}usl-quadratic-residuals.${EXT}" a = ${PARAM_A} b = ${PARAM_B} plot "${FILE}" using (\$1-1):((\$1/(\$2/C1)-1)-f(\$1-1)) pt ${PTY}${BLUE} EOF gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1 # Plot the residuals squared, for finding outliers and removing them. cat > /tmp/aspersa-gnuplot1 <<-EOF set key off set ylabel "Residual Squared" set output "${PRE}usl-quadratic-squared.${EXT}" a = ${PARAM_A} b = ${PARAM_B} plot "${FILE}" using (\$1-1):((\$1/(\$2/C1)-1)-f(\$1-1))**2:1 pt ${PTY}${BLUE}, \\ '' using 1:((\$1/(\$2/C1)-1)-f(\$1-1))**2:1 w labels EOF gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1 if [ "${REFIT}" = "1" ]; then # Re-fit against the Universal Scalability Law -- it will be more # accurate. Put the C(1) point into a new file for this purpose. echo "# Re-fitting against the USL with (a, b-a) as a starting point." echo "# Treating (1, ${C_of_one}) as a point in original measurements." echo "1 ${C_of_one}" > /tmp/aspersa-usl-input-extended cat "${FILE}" >> /tmp/aspersa-usl-input-extended cat > /tmp/aspersa-gnuplot1 <<-EOF a = ${PARAM_A} b = ${PARAM_B} kappa = a sigma = b - kappa set fit logfile "/dev/null" # defines sigma_err and kappa_err set fit errorvariables fit g(x) '/tmp/aspersa-usl-input-extended' using 1:2 via sigma, kappa${FITC1} EOF gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1 2> /tmp/aspersa-ufit.log # If everything went OK, then the fit should have converged. if ! grep 'the fit converged' /tmp/aspersa-ufit.log >/dev/null ; then echo "The USL regression failed, check /tmp/aspersa-ufit.log" exit 1; fi # Now check the log again, and extract the parameters from it. if [ "${FITC1}" ]; then sed -n -e '/the fit converged/,/^C1 /p' /tmp/aspersa-ufit.log > /tmp/aspersa.params PARAM_SIGMA="$(awk '/^sigma / { print $3 }' /tmp/aspersa.params)" PARAM_SIGMA_ERR="$(awk '/^sigma / { print $5 }' /tmp/aspersa.params)" PARAM_SIGMA_PCT="$(awk '/^sigma / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)" PARAM_KAPPA="$(awk '/^kappa / { print $3 }' /tmp/aspersa.params)" PARAM_KAPPA_ERR="$(awk '/^kappa / { print $5 }' /tmp/aspersa.params)" PARAM_KAPPA_PCT="$(awk '/^kappa / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)" PARAM_C_ONE="$(awk '/^C1 / { print $3 }' /tmp/aspersa.params)" PARAM_C_ONE_ERR="$(awk '/^C1 / { print $5 }' /tmp/aspersa.params)" PARAM_C_ONE_PCT="$(awk '/^C1 / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)" echo " sigma ${PARAM_SIGMA} +/- ${PARAM_SIGMA_ERR} (${PARAM_SIGMA_PCT}%)" echo " kappa ${PARAM_KAPPA} +/- ${PARAM_KAPPA_ERR} (${PARAM_KAPPA_PCT}%)" echo " C(1) ${PARAM_C_ONE} +/- ${PARAM_C_ONE_ERR} (${PARAM_C_ONE_PCT}%)" else sed -n -e '/the fit converged/,/^kappa /p' /tmp/aspersa-ufit.log > /tmp/aspersa.params PARAM_SIGMA="$(awk '/^sigma / { print $3 }' /tmp/aspersa.params)" PARAM_SIGMA_ERR="$(awk '/^sigma / { print $5 }' /tmp/aspersa.params)" PARAM_SIGMA_PCT="$(awk '/^sigma / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)" PARAM_KAPPA="$(awk '/^kappa / { print $3 }' /tmp/aspersa.params)" PARAM_KAPPA_ERR="$(awk '/^kappa / { print $5 }' /tmp/aspersa.params)" PARAM_KAPPA_PCT="$(awk '/^kappa / { print substr($6, 2, length($6)-3) }' /tmp/aspersa.params)" PARAM_C_ONE="${C_of_one}" echo " sigma ${PARAM_SIGMA} +/- ${PARAM_SIGMA_ERR} (${PARAM_SIGMA_PCT}%)" echo " kappa ${PARAM_KAPPA} +/- ${PARAM_KAPPA_ERR} (${PARAM_KAPPA_PCT}%)" echo " C(1) ${PARAM_C_ONE} (not a regression parameter)" fi # Now find the coefficient of determination again, this time against the # USL model. cat > /tmp/aspersa <<-EOF /^[^#]/{ x = \$1; y = \$2; g = x*${PARAM_C_ONE}/(1+${PARAM_SIGMA}*(x-1) + ${PARAM_KAPPA}*x*(x-1)); sum_of_squares += y * y; sum_of_squared_errors += (g - y) * (g - y); } END { print 1 - (sum_of_squared_errors / sum_of_squares); } EOF R_SQUARED="$(awk -f /tmp/aspersa "/tmp/aspersa-usl-input-extended")" echo " R^2 ${R_SQUARED}" # Plot the USL residuals squared, for finding outliers and removing them. cat > /tmp/aspersa-gnuplot1 <<-EOF set key off set ylabel "Residual Errors Squared" set output "${PRE}usl-residuals-squared.${EXT}" sigma = ${PARAM_SIGMA} kappa = ${PARAM_KAPPA} plot "/tmp/aspersa-usl-input-extended" using 1:((g(\$1)-\$2)**2) pt ${PTY}${BLUE}, \\ '' using (\$1+1):((g(\$1)-\$2)**2):1 w labels EOF gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1 else # We need to set the PARAM_SIGMA and PARAM_KAPPA stuff. PARAM_C_ONE="${C_of_one}" echo | awk "BEGIN{ a = ${PARAM_A}; a_err = ${PARAM_A_ERR}; b = ${PARAM_B}; b_err = ${PARAM_B_ERR}; printf \"PARAM_SIGMA=%.6f\\n\", b - a; printf \"PARAM_KAPPA=%.6f\\n\", a; print \"PARAM_SIGMA_ERR=0\"; print \"PARAM_SIGMA_PCT=0\"; print \"PARAM_KAPPA_ERR=0\"; print \"PARAM_KAPPA_PCT=0\"; }" > /tmp/aspersa . /tmp/aspersa echo " sigma ${PARAM_SIGMA} +/- ${PARAM_SIGMA_ERR} (${PARAM_SIGMA_PCT}%)" echo " kappa ${PARAM_KAPPA} +/- ${PARAM_KAPPA_ERR} (${PARAM_KAPPA_PCT}%)" fi # Plot the original points and the Universal Scalability Law's predictions. cat > /tmp/aspersa-gnuplot1 <<-EOF sigma = ${PARAM_SIGMA} kappa = ${PARAM_KAPPA} C1 = ${PARAM_C_ONE} sigma_best = sigma - ${PARAM_SIGMA_ERR}; sigma_worst = sigma + ${PARAM_SIGMA_ERR}; kappa_best = kappa - ${PARAM_KAPPA}; kappa_worst = kappa + ${PARAM_KAPPA_ERR}; set output "${PRE}usl-model-vs-actual.${EXT}" N_star = floor(sqrt((1-sigma)/kappa)) modelC = N_star*C1 / (1 + sigma*(N_star-1) + kappa*N_star*(N_star-1)) set label sprintf("Peak capacity is C=%d at N=%d", modelC, N_star) at graph 0.1, 0.9 set label sprintf('${GNUPLOT_SIGMA} = %f', sigma) at graph .5, .3 set label sprintf('${GNUPLOT_KAPPA} = %f', kappa) at graph .5, .25 set label sprintf('R^2 = %f', ${R_SQUARED}) at graph .5, .2 set key bottom xlimit = ${LIM} if ( xlimit == 0 ) xlimit = max(N_star, max_N) * 2 set xrange [0.0:xlimit] set yrange [0:max(max_C, modelC * 1.3)] if ( ${ERRL} == 1 ) plot \\ x*C1/(1+sigma_best*(x-1) + kappa_best*x*(x-1)) title '',\\ x*C1/(1+sigma_worst*(x-1) + kappa_worst*x*(x-1)) title '',\\ x*C1/(1+sigma*(x-1) + kappa*x*(x-1)) title 'Modeled' w lines${BLUE}, \\ "${FILE}" using 1:2 pt ${PTY} ${LTY} title 'Measured' if ( ${ERRL} == 0 ) plot \\ x*C1/(1+sigma*(x-1) + kappa*x*(x-1)) title 'Modeled' w lines${BLUE}, \\ "${FILE}" using 1:2 pt ${PTY} ${LTY} title 'Measured' EOF gnuplot /tmp/aspersa-gnuplot0 /tmp/aspersa-gnuplot1 # Remove files not mentioned in -o. if [ "${ONLY}" ]; then for f in deviation efficiency quadratic-residuals residuals-squared \ quadratic-squared model-vs-actual; do if [[ "${ONLY}" != *$f* ]]; then file="${PRE}usl-${f}.${EXT}" rm -f "${file}" fi done fi # The type "pdf" is really just eps, and now we need to convert it to pdf. if [ "${TYP}" = "pdf" ]; then for f in deviation efficiency quadratic-residuals residuals-squared \ quadratic-squared model-vs-actual; do file="${PRE}usl-${f}.eps" if [ -e "${file}" ]; then epstopdf "${file}" rm -f "${file}" fi done fi # Remove temp files. if [ "${DEL}" = "1" ]; then rm -f /tmp/aspersa{,-N-C,.params,-gnuplot0,-gnuplot1,-qfit.log,-ufit.log,-usl-input-extended} fi } # Execute the program if it was not included from another file. This makes it # possible to include without executing, and thus test. if [ "$(basename "$0")" = "usl" ] || [ "$(basename "$0")" = "bash" -a "$_" = "$0" ]; then main "$@" fi