aboutsummaryrefslogtreecommitdiffstats
path: root/scripts/pybootchartgui/pybootchartgui/parsing.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/pybootchartgui/pybootchartgui/parsing.py')
-rw-r--r--scripts/pybootchartgui/pybootchartgui/parsing.py223
1 files changed, 223 insertions, 0 deletions
diff --git a/scripts/pybootchartgui/pybootchartgui/parsing.py b/scripts/pybootchartgui/pybootchartgui/parsing.py
new file mode 100644
index 0000000000..a350a3eb08
--- /dev/null
+++ b/scripts/pybootchartgui/pybootchartgui/parsing.py
@@ -0,0 +1,223 @@
+from __future__ import with_statement
+
+import os
+import string
+import re
+import tarfile
+from collections import defaultdict
+
+from samples import *
+from process_tree import ProcessTree
+
+class ParseError(Exception):
+ """Represents errors during parse of the bootchart."""
+ def __init__(self, value):
+ self.value = value
+
+ def __str__(self):
+ return self.value
+
+def _parse_headers(file):
+ """Parses the headers of the bootchart."""
+ def parse((headers,last), line):
+ if '=' in line: last,value = map(string.strip, line.split('=', 1))
+ else: value = line.strip()
+ headers[last] += value
+ return headers,last
+ return reduce(parse, file.read().split('\n'), (defaultdict(str),''))[0]
+
+def _parse_timed_blocks(file):
+ """Parses (ie., splits) a file into so-called timed-blocks. A
+ timed-block consists of a timestamp on a line by itself followed
+ by zero or more lines of data for that point in time."""
+ def parse(block):
+ lines = block.split('\n')
+ if not lines:
+ raise ParseError('expected a timed-block consisting a timestamp followed by data lines')
+ try:
+ return (int(lines[0]), lines[1:])
+ except ValueError:
+ raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0])
+ blocks = file.read().split('\n\n')
+ return [parse(block) for block in blocks if block.strip()]
+
+def _parse_proc_ps_log(file):
+ """
+ * See proc(5) for details.
+ *
+ * {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime,
+ * cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack,
+ * kstkesp, kstkeip}
+ """
+ processMap = {}
+ ltime = 0
+ timed_blocks = _parse_timed_blocks(file)
+ for time, lines in timed_blocks:
+ for line in lines:
+ tokens = line.split(' ')
+
+ offset = [index for index, token in enumerate(tokens[1:]) if token.endswith(')')][0]
+ pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset])
+ userCpu, sysCpu, stime= int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset])
+
+ if processMap.has_key(pid):
+ process = processMap[pid]
+ process.cmd = cmd.replace('(', '').replace(')', '') # why rename after latest name??
+ else:
+ process = Process(pid, cmd, ppid, min(time, stime))
+ processMap[pid] = process
+
+ if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None:
+ userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, time - ltime)
+ cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0)
+ process.samples.append(ProcessSample(time, state, cpuSample))
+
+ process.last_user_cpu_time = userCpu
+ process.last_sys_cpu_time = sysCpu
+ ltime = time
+
+ startTime = timed_blocks[0][0]
+ avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1)
+
+ for process in processMap.values():
+ process.set_parent(processMap)
+
+ for process in processMap.values():
+ process.calc_stats(avgSampleLength)
+
+ return ProcessStats(processMap.values(), avgSampleLength, startTime, ltime)
+
+def _parse_proc_stat_log(file):
+ samples = []
+ ltimes = None
+ for time, lines in _parse_timed_blocks(file):
+ # CPU times {user, nice, system, idle, io_wait, irq, softirq}
+ tokens = lines[0].split();
+ times = [ int(token) for token in tokens[1:] ]
+ if ltimes:
+ user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1]))
+ system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6]))
+ idle = float(times[3] - ltimes[3])
+ iowait = float(times[4] - ltimes[4])
+
+ aSum = max(user + system + idle + iowait, 1)
+ samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) )
+
+ ltimes = times
+ # skip the rest of statistics lines
+ return samples
+
+
+def _parse_proc_disk_stat_log(file, numCpu):
+ """
+ Parse file for disk stats, but only look at the whole disks, eg. sda,
+ not sda1, sda2 etc. The format of relevant lines should be:
+ {major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq}
+ """
+ DISK_REGEX = 'hd.$|sd.$'
+
+ def is_relevant_line(line):
+ return len(line.split()) == 14 and re.match(DISK_REGEX, line.split()[2])
+
+ disk_stat_samples = []
+
+ for time, lines in _parse_timed_blocks(file):
+ sample = DiskStatSample(time)
+ relevant_tokens = [line.split() for line in lines if is_relevant_line(line)]
+
+ for tokens in relevant_tokens:
+ disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12])
+ sample.add_diskdata([rsect, wsect, use])
+
+ disk_stat_samples.append(sample)
+
+ disk_stats = []
+ for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]):
+ interval = sample1.time - sample2.time
+ sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ]
+ readTput = sums[0] / 2.0 * 100.0 / interval
+ writeTput = sums[1] / 2.0 * 100.0 / interval
+ util = float( sums[2] ) / 10 / interval / numCpu
+ util = max(0.0, min(1.0, util))
+ disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util))
+
+ return disk_stats
+
+
+def get_num_cpus(headers):
+ """Get the number of CPUs from the system.cpu header property. As the
+ CPU utilization graphs are relative, the number of CPUs currently makes
+ no difference."""
+ if headers is None:
+ return 1
+ cpu_model = headers.get("system.cpu")
+ if cpu_model is None:
+ return 1
+ mat = re.match(".*\\((\\d+)\\)", cpu_model)
+ if mat is None:
+ return 1
+ return int(mat.group(1))
+
+class ParserState:
+ def __init__(self):
+ self.headers = None
+ self.disk_stats = None
+ self.ps_stats = None
+ self.cpu_stats = None
+
+ def valid(self):
+ return self.headers != None and self.disk_stats != None and self.ps_stats != None and self.cpu_stats != None
+
+
+_relevant_files = set(["header", "proc_diskstats.log", "proc_ps.log", "proc_stat.log"])
+
+def _do_parse(state, name, file):
+ if name == "header":
+ state.headers = _parse_headers(file)
+ elif name == "proc_diskstats.log":
+ state.disk_stats = _parse_proc_disk_stat_log(file, get_num_cpus(state.headers))
+ elif name == "proc_ps.log":
+ state.ps_stats = _parse_proc_ps_log(file)
+ elif name == "proc_stat.log":
+ state.cpu_stats = _parse_proc_stat_log(file)
+ return state
+
+def parse_file(state, filename):
+ basename = os.path.basename(filename)
+ if not(basename in _relevant_files):
+ return state
+ with open(filename, "rb") as file:
+ return _do_parse(state, basename, file)
+
+def parse_paths(state, paths):
+ for path in paths:
+ root,extension = os.path.splitext(path)
+ if not(os.path.exists(path)):
+ print "warning: path '%s' does not exist, ignoring." % path
+ continue
+ if os.path.isdir(path):
+ files = [ f for f in [os.path.join(path, f) for f in os.listdir(path)] if os.path.isfile(f) ]
+ files.sort()
+ state = parse_paths(state, files)
+ elif extension in [".tar", ".tgz", ".tar.gz"]:
+ tf = None
+ try:
+ tf = tarfile.open(path, 'r:*')
+ for name in tf.getnames():
+ state = _do_parse(state, name, tf.extractfile(name))
+ except tarfile.ReadError, error:
+ raise ParseError("error: could not read tarfile '%s': %s." % (path, error))
+ finally:
+ if tf != None:
+ tf.close()
+ else:
+ state = parse_file(state, path)
+ return state
+
+def parse(paths, prune):
+ state = parse_paths(ParserState(), paths)
+ if not state.valid():
+ raise ParseError("empty state: '%s' does not contain a valid bootchart" % ", ".join(paths))
+ monitored_app = state.headers.get("profile.process")
+ proc_tree = ProcessTree(state.ps_stats, monitored_app, prune)
+ return (state.headers, state.cpu_stats, state.disk_stats, proc_tree)