summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2012-01-10 15:13:05 +0100
committerJohannes Weiner <hannes@cmpxchg.org>2012-01-10 15:23:29 +0100
commitff0d8d374a1272fdff5cc8dd7bb929f43ba001a2 (patch)
tree1a445263d09b16ba821d86ae6f8294147f4d04a3
parent96ede7370f9108555c95e3ab2b0d43864bca9f36 (diff)
analyze: analyze and present data per spec file description
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
-rwxr-xr-xanalyze254
-rw-r--r--examples/time.spec7
-rw-r--r--examples/vmstat.spec26
3 files changed, 287 insertions, 0 deletions
diff --git a/analyze b/analyze
new file mode 100755
index 0000000..3906bae
--- /dev/null
+++ b/analyze
@@ -0,0 +1,254 @@
+#!/usr/bin/python
+
+from __future__ import print_function
+import argparse
+import math
+
+def mean(v):
+ return sum(v) / len(v)
+
+def stddev(v):
+ m = mean(v)
+ d = [(n - m) ** 2 for n in v]
+ return math.sqrt(mean(d))
+
+class Dataset(object):
+ def __init__(self, runs):
+ self.runs = runs
+ def __repr__(self):
+ return repr(self.runs)
+ def merge(self):
+ runs = [[]]
+ for run in self.runs:
+ runs[0] += run
+ return Dataset(runs)
+ def unary(self, op):
+ runs = []
+ for run in self.runs:
+ runs.append(op(run))
+ return Dataset(runs)
+ def binary(self, y, op):
+ runs = []
+ for runx, runy in zip(self.runs, y.runs):
+ runs.append(op(runx, runy))
+ return Dataset(runs)
+ def binary_values(self, y, op):
+ return self.binary(y, lambda rx, ry: [op(x, y) for x, y in zip(rx, ry)])
+ def binary_constant(self, c, op):
+ runs = []
+ for run in self.runs:
+ r = []
+ for value in run:
+ r.append(op(value, c))
+ runs.append(r)
+ return Dataset(runs)
+ def __add__(self, y):
+ if type(y) == Dataset:
+ return self.binary_values(y, float.__add__)
+ return self.binary_constant(y, float.__add__)
+ def __sub__(self, y):
+ if type(y) == Dataset:
+ return self.binary_values(y, float.__sub__)
+ return self.binary_constant(y, float.__sub__)
+ def __mul__(self, y):
+ if type(y) == Dataset:
+ return self.binary_values(y, float.__mul__)
+ return self.binary_constant(y, float.__mul__)
+ def __div__(self, y):
+ if type(y) == Dataset:
+ return self.binary_values(y, float.__div__)
+ return self.binary_constant(y, float.__div__)
+
+def builtin_read(filename):
+ """Read a space-separated table of runs into a dictionary.
+
+ A00 B00
+ A01 B01
+
+ A10 B10
+ A11 B11
+
+ { 1: [[A00, A01], [A10, A11]], 2: [[B00, B01], [B10, B11]] }
+ """
+ data = {}
+ run = {}
+ for line in open(filename):
+ if len(line.strip()) == 0:
+ for key in run:
+ if not key in data:
+ data[key] = Dataset([])
+ data[key].runs.append(run[key])
+ run = {}
+ continue
+ for key, value in enumerate(line.split()):
+ value = float(value)
+ if not key in run:
+ run[key] = []
+ run[key].append(value)
+ for key in run:
+ if not key in data:
+ data[key] = Dataset([])
+ data[key].runs.append(run[key])
+ return data
+
+def builtin_readdict(filename):
+ """Read a dictionary style file into a dictionary.
+
+ A A00
+ B B00
+ A A01
+ B B01
+
+ A A10
+ B B10
+ A A11
+ B B11
+
+ { A: [[A00, A01], [A10, A11]], B: [[B00, B01], [B10, B11]] }
+ """
+ data = {}
+ run = {}
+ for line in open(filename):
+ if len(line.strip()) == 0:
+ for key in run:
+ if not key in data:
+ data[key] = Dataset([])
+ data[key].runs.append(run[key])
+ run = {}
+ continue
+ key, value = line.split()
+ value = float(value)
+ if not key in run:
+ run[key] = []
+ run[key].append(value)
+ for key in run:
+ if not key in data:
+ data[key] = Dataset([])
+ data[key].runs.append(run[key])
+ return data
+
+def builtin_merge(s):
+ """Merge all runs into a single one.
+
+ [[v0, ...], ...] -> [[v0, ...]]
+ """
+ return s.merge()
+
+def builtin_fold(s):
+ """Fold each run into the delta between its first and last value.
+
+ [[v0, ...], ...] -> [[vD], ...]
+ """
+ return s.unary(lambda r: [r[-1] - r[0]])
+
+def builtin_mean(s):
+ """Fold each run into the arithmetic mean of its values.
+
+ [[v0, ...], ...] -> [[vmean], ...]
+ """
+ return s.unary(lambda r: [mean(r)])
+
+def builtin_stddev(s):
+ """Fold each run into the standard deviation of its values.
+
+ [[v0, ...], ...] -> [[vstddev], ...]
+ """
+ return s.unary(lambda r: [stddev(r)])
+
+senv = { 'read': builtin_read,
+ 'readdict': builtin_readdict,
+ 'merge': builtin_merge,
+ 'fold': builtin_fold,
+ 'mean': builtin_mean,
+ 'stddev': builtin_stddev }
+
+parser = argparse.ArgumentParser()
+parser.add_argument('-s', '--spec', action='append', default=[])
+parser.add_argument('name', nargs='+')
+
+args = parser.parse_args()
+
+datas = []
+items = []
+
+def save_section(type_, name, body):
+ if type_ == 'data':
+ datas.append((name, body))
+ elif type_ == 'item':
+ items.append((name, body))
+ else:
+ print('WARNING: unknown section type "%s"' % type_)
+
+for spec in args.spec:
+ type_ = None
+ name = None
+ body = ''
+ for line in open(spec):
+ line = line.strip()
+ if len(line) == 0 or line.startswith('#'):
+ continue
+ if line.startswith('%'):
+ if type_:
+ save_section(type_, name, body)
+ parts = line.split(None, 1)
+ type_ = parts[0][1:]
+ name = parts[1]
+ body = ''
+ elif type_:
+ body += line
+ if type_:
+ save_section(type_, name, body)
+
+values = {}
+
+for name in args.name:
+ env = { 'name': name }
+ env.update(senv)
+ for data in datas:
+ try:
+ env[data[0]] = eval(data[1], env)
+ except Exception, e:
+ print('ERROR in data expression "%s"' % data[1])
+ raise e
+ values[name] = {}
+ for item in items:
+ try:
+ data = eval(item[1], env)
+ except Exception, e:
+ print('ERROR in item expression "%s"' % item[1])
+ raise e
+ if len(data.runs) != 1:
+ raise ValueError('more than one run for item "%s": %s' %
+ (item[0], data.runs))
+ if len(data.runs[0]) != 1:
+ raise ValueError('more than one value in run 0 for item "%s": %s' %
+ (item[0], data.runs))
+ values[name][item[0]] = data.runs[0][0]
+
+report = [['']]
+widths = [0]
+
+for name in args.name:
+ report[0].append(name)
+ widths.append(len(name))
+for item in items:
+ row = [item[0]]
+ if len(item[0]) > widths[0]:
+ widths[0] = len(item[0])
+ for i, name in enumerate(args.name, start=1):
+ old = float(values[args.name[0]][item[0]])
+ new = float(values[name][item[0]])
+ delta = (new - old) / (old + 1) * 100
+ cell = '%.2f (%+7.2f%%)' % (values[name][item[0]], delta)
+ row.append(cell)
+ if len(cell) > widths[i]:
+ widths[i] = len(cell)
+ report.append(row)
+
+for row in report:
+ for i, col in enumerate(row):
+ fmt = '%*s'
+ if not i:
+ fmt = '%-*s'
+ print(fmt % (widths[i], col), end='\t')
+ print('')
diff --git a/examples/time.spec b/examples/time.spec
new file mode 100644
index 0000000..573aabd
--- /dev/null
+++ b/examples/time.spec
@@ -0,0 +1,7 @@
+# Parse the timestamps that runtest writes to the .time files
+
+%data time
+read(name + '.time')
+
+%item Walltime
+mean(merge(fold(time[0])))
diff --git a/examples/vmstat.spec b/examples/vmstat.spec
new file mode 100644
index 0000000..8d0d73c
--- /dev/null
+++ b/examples/vmstat.spec
@@ -0,0 +1,26 @@
+# Parse some reclaim stats from a /proc/vmstat monitor log
+
+%data vmstat
+readdict(name + '-vmstat.data')
+
+%data pgscan
+fold(vmstat['pgscan_kswapd_dma']) +
+fold(vmstat['pgscan_kswapd_dma32']) +
+fold(vmstat['pgscan_kswapd_normal']) +
+fold(vmstat['pgscan_direct_dma']) +
+fold(vmstat['pgscan_direct_dma32']) +
+fold(vmstat['pgscan_direct_normal'])
+
+%data pgsteal
+fold(vmstat['pgsteal_dma']) +
+fold(vmstat['pgsteal_dma32']) +
+fold(vmstat['pgsteal_normal'])
+
+%item Pages scanned
+mean(merge(pgscan))
+
+%item Pages reclaimed
+mean(merge(pgsteal))
+
+%item Reclaim efficiency %
+mean(merge(pgsteal / pgscan * 100))