diff options
Diffstat (limited to 'scripts/pybootchartgui/pybootchartgui/parsing.py')
-rw-r--r-- | scripts/pybootchartgui/pybootchartgui/parsing.py | 223 |
1 files changed, 223 insertions, 0 deletions
diff --git a/scripts/pybootchartgui/pybootchartgui/parsing.py b/scripts/pybootchartgui/pybootchartgui/parsing.py new file mode 100644 index 0000000000..a350a3eb08 --- /dev/null +++ b/scripts/pybootchartgui/pybootchartgui/parsing.py @@ -0,0 +1,223 @@ +from __future__ import with_statement + +import os +import string +import re +import tarfile +from collections import defaultdict + +from samples import * +from process_tree import ProcessTree + +class ParseError(Exception): + """Represents errors during parse of the bootchart.""" + def __init__(self, value): + self.value = value + + def __str__(self): + return self.value + +def _parse_headers(file): + """Parses the headers of the bootchart.""" + def parse((headers,last), line): + if '=' in line: last,value = map(string.strip, line.split('=', 1)) + else: value = line.strip() + headers[last] += value + return headers,last + return reduce(parse, file.read().split('\n'), (defaultdict(str),''))[0] + +def _parse_timed_blocks(file): + """Parses (ie., splits) a file into so-called timed-blocks. A + timed-block consists of a timestamp on a line by itself followed + by zero or more lines of data for that point in time.""" + def parse(block): + lines = block.split('\n') + if not lines: + raise ParseError('expected a timed-block consisting a timestamp followed by data lines') + try: + return (int(lines[0]), lines[1:]) + except ValueError: + raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0]) + blocks = file.read().split('\n\n') + return [parse(block) for block in blocks if block.strip()] + +def _parse_proc_ps_log(file): + """ + * See proc(5) for details. + * + * {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime, + * cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack, + * kstkesp, kstkeip} + """ + processMap = {} + ltime = 0 + timed_blocks = _parse_timed_blocks(file) + for time, lines in timed_blocks: + for line in lines: + tokens = line.split(' ') + + offset = [index for index, token in enumerate(tokens[1:]) if token.endswith(')')][0] + pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset]) + userCpu, sysCpu, stime= int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset]) + + if processMap.has_key(pid): + process = processMap[pid] + process.cmd = cmd.replace('(', '').replace(')', '') # why rename after latest name?? + else: + process = Process(pid, cmd, ppid, min(time, stime)) + processMap[pid] = process + + if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None: + userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, time - ltime) + cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0) + process.samples.append(ProcessSample(time, state, cpuSample)) + + process.last_user_cpu_time = userCpu + process.last_sys_cpu_time = sysCpu + ltime = time + + startTime = timed_blocks[0][0] + avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1) + + for process in processMap.values(): + process.set_parent(processMap) + + for process in processMap.values(): + process.calc_stats(avgSampleLength) + + return ProcessStats(processMap.values(), avgSampleLength, startTime, ltime) + +def _parse_proc_stat_log(file): + samples = [] + ltimes = None + for time, lines in _parse_timed_blocks(file): + # CPU times {user, nice, system, idle, io_wait, irq, softirq} + tokens = lines[0].split(); + times = [ int(token) for token in tokens[1:] ] + if ltimes: + user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1])) + system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6])) + idle = float(times[3] - ltimes[3]) + iowait = float(times[4] - ltimes[4]) + + aSum = max(user + system + idle + iowait, 1) + samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) ) + + ltimes = times + # skip the rest of statistics lines + return samples + + +def _parse_proc_disk_stat_log(file, numCpu): + """ + Parse file for disk stats, but only look at the whole disks, eg. sda, + not sda1, sda2 etc. The format of relevant lines should be: + {major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq} + """ + DISK_REGEX = 'hd.$|sd.$' + + def is_relevant_line(line): + return len(line.split()) == 14 and re.match(DISK_REGEX, line.split()[2]) + + disk_stat_samples = [] + + for time, lines in _parse_timed_blocks(file): + sample = DiskStatSample(time) + relevant_tokens = [line.split() for line in lines if is_relevant_line(line)] + + for tokens in relevant_tokens: + disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12]) + sample.add_diskdata([rsect, wsect, use]) + + disk_stat_samples.append(sample) + + disk_stats = [] + for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]): + interval = sample1.time - sample2.time + sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ] + readTput = sums[0] / 2.0 * 100.0 / interval + writeTput = sums[1] / 2.0 * 100.0 / interval + util = float( sums[2] ) / 10 / interval / numCpu + util = max(0.0, min(1.0, util)) + disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util)) + + return disk_stats + + +def get_num_cpus(headers): + """Get the number of CPUs from the system.cpu header property. As the + CPU utilization graphs are relative, the number of CPUs currently makes + no difference.""" + if headers is None: + return 1 + cpu_model = headers.get("system.cpu") + if cpu_model is None: + return 1 + mat = re.match(".*\\((\\d+)\\)", cpu_model) + if mat is None: + return 1 + return int(mat.group(1)) + +class ParserState: + def __init__(self): + self.headers = None + self.disk_stats = None + self.ps_stats = None + self.cpu_stats = None + + def valid(self): + return self.headers != None and self.disk_stats != None and self.ps_stats != None and self.cpu_stats != None + + +_relevant_files = set(["header", "proc_diskstats.log", "proc_ps.log", "proc_stat.log"]) + +def _do_parse(state, name, file): + if name == "header": + state.headers = _parse_headers(file) + elif name == "proc_diskstats.log": + state.disk_stats = _parse_proc_disk_stat_log(file, get_num_cpus(state.headers)) + elif name == "proc_ps.log": + state.ps_stats = _parse_proc_ps_log(file) + elif name == "proc_stat.log": + state.cpu_stats = _parse_proc_stat_log(file) + return state + +def parse_file(state, filename): + basename = os.path.basename(filename) + if not(basename in _relevant_files): + return state + with open(filename, "rb") as file: + return _do_parse(state, basename, file) + +def parse_paths(state, paths): + for path in paths: + root,extension = os.path.splitext(path) + if not(os.path.exists(path)): + print "warning: path '%s' does not exist, ignoring." % path + continue + if os.path.isdir(path): + files = [ f for f in [os.path.join(path, f) for f in os.listdir(path)] if os.path.isfile(f) ] + files.sort() + state = parse_paths(state, files) + elif extension in [".tar", ".tgz", ".tar.gz"]: + tf = None + try: + tf = tarfile.open(path, 'r:*') + for name in tf.getnames(): + state = _do_parse(state, name, tf.extractfile(name)) + except tarfile.ReadError, error: + raise ParseError("error: could not read tarfile '%s': %s." % (path, error)) + finally: + if tf != None: + tf.close() + else: + state = parse_file(state, path) + return state + +def parse(paths, prune): + state = parse_paths(ParserState(), paths) + if not state.valid(): + raise ParseError("empty state: '%s' does not contain a valid bootchart" % ", ".join(paths)) + monitored_app = state.headers.get("profile.process") + proc_tree = ProcessTree(state.ps_stats, monitored_app, prune) + return (state.headers, state.cpu_stats, state.disk_stats, proc_tree) |