.process_map.items(): if (value.start_time <= crop_at): cropped_map[key] = value for proc in cropped_map.values(): proc.duration = min (proc.duration, crop_at - proc.start_time) while len (proc.samples) \ and proc.samples[-1].time > crop_at: proc.samples.pop() self.ps_stats.process_map = cropped_map return idle class ParseError(Exception): """Represents errors during parse of the bootchart.""" def __init__(self, value): self.value = value def __str__(self): return self.value def _parse_headers(file): """Parses the headers of the bootchart.""" def parse(acc, line): (headers, last) = acc if '=' in line: last, value = map (lambda x: x.strip(), line.split('=', 1)) else: value = line.strip() headers[last] += value return headers, last return reduce(parse, file.read().decode('utf-8').split('\n'), (defaultdict(str),''))[0] def _parse_timed_blocks(file): """Parses (ie., splits) a file into so-called timed-blocks. A timed-block consists of a timestamp on a line by itself followed by zero or more lines of data for that point in time.""" def parse(block): lines = block.split('\n') if not lines: raise ParseError('expected a timed-block consisting a timestamp followed by data lines') try: return (int(lines[0]), lines[1:]) except ValueError: raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0]) blocks = file.read().decode('utf-8').split('\n\n') return [parse(block) for block in blocks if block.strip() and not block.endswith(' not running\n')] def _parse_proc_ps_log(writer, file): """ * See proc(5) for details. * * {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime, * cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack, * kstkesp, kstkeip} """ processMap = {} ltime = 0 timed_blocks = _parse_timed_blocks(file) for time, lines in timed_blocks: for line in lines: if not line: continue tokens = line.split(' ') if len(tokens) < 21: continue offset = [index for index, token in enumerate(tokens[1:]) if token[-1] == ')'][0] pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset]) userCpu, sysCpu, stime = int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset]) # magic fixed point-ness ... pid *= 1000 ppid *= 1000 if pid in processMap: process = processMap[pid] process.cmd = cmd.strip('()') # why rename after latest name?? else: process = Process(writer, pid, cmd.strip('()'), ppid, min(time, stime)) processMap[pid] = process if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None: userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, max(1, time - ltime)) cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0) process.samples.append(ProcessSample(time, state, cpuSample)) process.last_user_cpu_time = userCpu process.last_sys_cpu_time = sysCpu ltime = time if len (timed_blocks) < 2: return None startTime = timed_blocks[0][0] avgSampleLength = (ltime - startTime)/(len (timed_blocks) - 1) return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime) def _parse_taskstats_log(writer, file): """ * See bootchart-collector.c for details. * * { pid, ppid, comm, cpu_run_real_total, blkio_delay_total, swapin_delay_total } * """ processMap = {} pidRewrites = {} ltime = None timed_blocks = _parse_timed_blocks(file) for time, lines in timed_blocks: # we have no 'stime' from taskstats, so prep 'init' if ltime is None: process = Process(writer, 1, '[init]', 0, 0) processMap[1000] = process ltime = time # continue for line in lines: if not line: continue tokens = line.split(' ') if len(tokens) != 6: continue opid, ppid, cmd = int(tokens[0]), int(tokens[1]), tokens[2] cpu_ns, blkio_delay_ns, swapin_delay_ns = long(tokens[-3]), long(tokens[-2]), long(tokens[-1]), # make space for trees of pids opid *= 1000 ppid *= 1000 # when the process name changes, we re-write the pid. if opid in pidRewrites: pid = pidRewrites[opid] else: pid = opid cmd = cmd.strip('(').strip(')') if pid in processMap: process = processMap[pid] if process.cmd != cmd: pid += 1 pidRewrites[opid] = pid # print "process mutation ! '%s' vs '%s' pid %s -> pid %s\n" % (process.cmd, cmd, opid, pid) process = process.split (writer, pid, cmd, ppid, time) processMap[pid] = process else: process.cmd = cmd; else: process = Process(writer, pid, cmd, ppid, time) processMap[pid] = process delta_cpu_ns = (float) (cpu_ns - process.last_cpu_ns) delta_blkio_delay_ns = (float) (blkio_delay_ns - process.last_blkio_delay_ns) delta_swapin_delay_ns = (float) (swapin_delay_ns - process.last_swapin_delay_ns) # make up some state data ... if delta_cpu_ns > 0: state = "R" elif delta_blkio_delay_ns + delta_swapin_delay_ns > 0: state = "D" else: state = "S" # retain the ns timing information into a CPUSample - that tries # with the old-style to be a %age of CPU used in this time-slice. if delta_cpu_ns + delta_blkio_delay_ns + delta_swapin_delay_ns > 0: # print "proc %s cpu_ns %g delta_cpu %g" % (cmd, cpu_ns, delta_cpu_ns) cpuSample = CPUSample('null', delta_cpu_ns, 0.0, delta_blkio_delay_ns, delta_swapin_delay_ns) process.samples.append(ProcessSample(time, state, cpuSample)) process.last_cpu_ns = cpu_ns process.last_blkio_delay_ns = blkio_delay_ns process.last_swapin_delay_ns = swapin_delay_ns ltime = time if len (timed_blocks) < 2: return None startTime = timed_blocks[0][0] avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1) return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime) def _parse_proc_stat_log(file): samples = [] ltimes = None for time, lines in _parse_timed_blocks(file): # skip emtpy lines if not lines: continue # CPU times {user, nice, system, idle, io_wait, irq, softirq} tokens = lines[0].split() times = [ int(token) for token in tokens[1:] ] if ltimes: user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1])) system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6])) idle = float(times[3] - ltimes[3]) iowait = float(times[4] - ltimes[4]) aSum = max(user + system + idle + iowait, 1) samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) ) ltimes = times # skip the rest of statistics lines return samples def _parse_proc_disk_stat_log(file, numCpu): """ Parse file for disk stats, but only look at the whole device, eg. sda, not sda1, sda2 etc. The format of relevant lines should be: {major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq} """ disk_regex_re = re.compile ('^([hsv]d.|mtdblock\d|mmcblk\d|cciss/c\d+d\d+.*)$') # this gets called an awful lot. def is_relevant_line(linetokens): if len(linetokens) != 14: return False disk = linetokens[2] return disk_regex_re.match(disk) disk_stat_samples = [] for time, lines in _parse_timed_blocks(file): sample = DiskStatSample(time) relevant_tokens = [linetokens for linetokens in map (lambda x: x.split(),lines) if is_relevant_line(linetokens)] for tokens in relevant_tokens: disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12]) sample.add_diskdata([rsect, wsect, use]) disk_stat_samples.append(sample) disk_stats = [] for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]): interval = sample1.time - sample2.time if interval == 0: interval = 1 sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ] readTput = sums[0] / 2.0 * 100.0 / interval writeTput = sums[1] / 2.0 * 100.0 / interval util = float( sums[2] ) / 10 / interval / numCpu util = max(0.0, min(1.0, util)) disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util)) return disk_stats def _parse_proc_meminfo_log(file): """ Parse file for global memory statistics. The format of relevant lines should be: ^key: value( unit)? """ used_values = ('MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree',) mem_stats = [] meminfo_re = re.compile(r'([^ \t:]+):\s*(\d+).*') for time, lines in _parse_timed_blocks(file): sample = MemSample(time) for line in lines: match = meminfo_re.match(line) if not match: raise ParseError("Invalid meminfo line \"%s\"" % match.groups(0)) sample.add_value(match.group(1), int(match.group(2))) if sample.valid(): mem_stats.append(sample) return mem_stats # if we boot the kernel with: initcall_debug printk.time=1 we can # get all manner of interesting data from the dmesg output # We turn this into a pseudo-process tree: each event is # characterised by a # we don't try to detect a "kernel finished" state - since the kernel # continues to do interesting things after init is called. # # sample input: # [ 0.000000] ACPI: FACP 3f4fc000 000F4 (v04 INTEL Napa 00000001 MSFT 01000013) # ... # [ 0.039993] calling migration_init+0x0/0x6b @ 1 # [ 0.039993] initcall migration_init+0x0/0x6b returned 1 after 0 usecs def _parse_dmesg(writer, file): timestamp_re = re.compile ("^\[\s*(\d+\.\d+)\s*]\s+(.*)$") split_re = re.compile ("^(\S+)\s+([\S\+_-]+) (.*)$") processMap = {} idx = 0 inc = 1.0 / 1000000 kernel = Process(writer, idx, "k-boot", 0, 0.1) processMap['k-boot'] = kernel base_ts = False max_ts = 0 for line in file.read().decode('utf-8').split('\n'): t = timestamp_re.match (line) if t is None: # print "duff timestamp " + line continue time_ms = float (t.group(1)) * 1000 # looks like we may have a huge diff after the clock # has been set up. This could lead to huge graph: # so huge we will be killed by the OOM. # So instead of using the plain timestamp we will # use a delta to first one and skip the first one # for convenience if max_ts == 0 and not base_ts and time_ms > 1000: base_ts = time_ms continue max_ts = max(time_ms, max_ts) if base_ts: # print "fscked clock: used %f instead of %f" % (time_ms - base_ts, time_ms) time_ms -= base_ts m = split_re.match (t.group(2)) if m is None: continue # print "match: '%s'" % (m.group(1)) type = m.group(1) func = m.group(2) rest = m.group(3) if t.group(2).startswith ('Write protecting the') or \ t.group(2).startswith ('Freeing unused kernel memory'): kernel.duration = time_ms / 10 continue # print "foo: '%s' '%s' '%s'" % (type, func, rest) if type == "calling": ppid = kernel.pid p = re.match ("\@ (\d+)", rest) if p is not None: ppid = float (p.group(1)) // 1000 # print "match: '%s' ('%g') at '%s'" % (func, ppid, time_ms) name = func.split ('+', 1) [0] idx += inc processMap[func] = Process(writer, ppid + idx, name, ppid, time_ms / 10) elif type == "initcall": # print "finished: '%s' at '%s'" % (func, time_ms) if func in processMap: process = processMap[func] process.duration = (time_ms / 10) - process.start_time else: print("corrupted init call for %s" % (func)) elif type == "async_waiting" or type == "async_continuing": continue # ignore return processMap.values() # # Parse binary pacct accounting file output if we have one # cf. /usr/include/linux/acct.h # def _parse_pacct(writer, file): # read LE int32 def _read_le_int32(file): byts = file.read(4) return (ord(byts[0])) | (ord(byts[1]) << 8) | \ (ord(byts[2]) << 16) | (ord(byts[3]) << 24) parent_map = {} parent_map[0] = 0 while file.read(1) != "": # ignore flags ver = file.read(1) if ord(ver) < 3: print("Invalid version 0x%x" % (ord(ver))) return None file.seek (14, 1) # user, group etc. pid = _read_le_int32 (file) ppid = _read_le_int32 (file) # print "Parent of %d is %d" % (pid, ppid) parent_map[pid] = ppid file.seek (4 + 4 + 16, 1) # timings file.seek (16, 1) # acct_comm return parent_map def _parse_paternity_log(writer, file): parent_map = {} parent_map[0] = 0 for line in file.read().decode('utf-8').split('\n'): if not line: continue elems = line.split(' ') # <Child> <Parent> if len (elems) >= 2: # print "paternity of %d is %d" % (int(elems[0]), int(elems[1])) parent_map[int(elems[0])] = int(elems[1]) else: print("Odd paternity line '%s'" % (line)) return parent_map def _parse_cmdline_log(writer, file): cmdLines = {} for block in file.read().decode('utf-8').split('\n\n'): lines = block.split('\n') if len (lines) >= 3: # print "Lines '%s'" % (lines[0]) pid = int (lines[0]) values = {} values['exe'] = lines[1].lstrip(':') args = lines[2].lstrip(':').split('\0') args.pop() values['args'] = args cmdLines[pid] = values return cmdLines def get_num_cpus(headers): """Get the number of CPUs from the system.cpu header property. As the CPU utilization graphs are relative, the number of CPUs currently makes no difference.""" if headers is None: return 1 if headers.get("system.cpu.num"): return max (int (headers.get("system.cpu.num")), 1) cpu_model = headers.get("system.cpu") if cpu_model is None: return 1 mat = re.match(".*\\((\\d+)\\)", cpu_model) if mat is None: return 1 return max (int(mat.group(1)), 1) def _do_parse(writer, state, filename, file): writer.info("parsing '%s'" % filename) t1 = clock() paths = filename.split("/") task = paths[-1] pn = paths[-2] start = None end = None for line in file: if line.startswith("Started:"): start = int(float(line.split()[-1])) elif line.startswith("Ended:"): end = int(float(line.split()[-1])) if start and end: state.add_process(pn + ":" + task, start, end) t2 = clock() writer.info(" %s seconds" % str(t2-t1)) return state def parse_file(writer, state, filename): if state.filename is None: state.filename = filename basename = os.path.basename(filename) with open(filename, "rb") as file: return _do_parse(writer, state, filename, file) def parse_paths(writer, state, paths): for path in paths: if state.filename is None: state.filename = path root, extension = os.path.splitext(path) if not(os.path.exists(path)): writer.warn("warning: path '%s' does not exist, ignoring." % path) continue #state.filename = path if os.path.isdir(path): files = sorted([os.path.join(path, f) for f in os.listdir(path)]) state = parse_paths(writer, state, files) elif extension in [".tar", ".tgz", ".gz"]: if extension == ".gz": root, extension = os.path.splitext(root) if extension != ".tar": writer.warn("warning: can only handle zipped tar files, not zipped '%s'-files; ignoring" % extension) continue tf = None try: writer.status("parsing '%s'" % path) tf = tarfile.open(path, 'r:*') for name in tf.getnames(): state = _do_parse(writer, state, name, tf.extractfile(name)) except tarfile.ReadError as error: raise ParseError("error: could not read tarfile '%s': %s." % (path, error)) finally: if tf != None: tf.close() else: state = parse_file(writer, state, path) return state def split_res(res, options): """ Split the res into n pieces """ res_list = [] if options.num > 1: s_list = sorted(res.start.keys()) frag_size = len(s_list) / float(options.num) # Need the top value if frag_size > int(frag_size): frag_size = int(frag_size + 1) else: frag_size = int(frag_size) start = 0 end = frag_size while start < end: state = Trace(None, [], None) if options.full_time: state.min = min(res.start.keys()) state.max = max(res.end.keys()) for i in range(start, end): # Add this line for reference #state.add_process(pn + ":" + task, start, end) for p in res.start[s_list[i]]: state.add_process(p, s_list[i], res.processes[p][1]) start = end end = end + frag_size if end > len(s_list): end = len(s_list) res_list.append(state) else: res_list.append(res) return res_list