aboutsummaryrefslogtreecommitdiffstats
path: root/scripts/pybootchartgui/pybootchartgui/parsing.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/pybootchartgui/pybootchartgui/parsing.py')
-rw-r--r--scripts/pybootchartgui/pybootchartgui/parsing.py791
1 files changed, 628 insertions, 163 deletions
diff --git a/scripts/pybootchartgui/pybootchartgui/parsing.py b/scripts/pybootchartgui/pybootchartgui/parsing.py
index 6343fd5a7b..c8a6a5721a 100644
--- a/scripts/pybootchartgui/pybootchartgui/parsing.py
+++ b/scripts/pybootchartgui/pybootchartgui/parsing.py
@@ -1,178 +1,638 @@
+# This file is part of pybootchartgui.
+
+# pybootchartgui is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# pybootchartgui is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with pybootchartgui. If not, see <http://www.gnu.org/licenses/>.
+
+
from __future__ import with_statement
import os
import string
import re
+import sys
import tarfile
+from time import clock
from collections import defaultdict
+from functools import reduce
+
+from .samples import *
+from .process_tree import ProcessTree
+
+if sys.version_info >= (3, 0):
+ long = int
+
+# Parsing produces as its end result a 'Trace'
+
+class Trace:
+ def __init__(self, writer, paths, options):
+ self.processes = {}
+ self.start = {}
+ self.end = {}
+ self.headers = None
+ self.disk_stats = None
+ self.ps_stats = None
+ self.taskstats = None
+ self.cpu_stats = None
+ self.cmdline = None
+ self.kernel = None
+ self.kernel_tree = None
+ self.filename = None
+ self.parent_map = None
+ self.mem_stats = None
+
+ parse_paths (writer, self, paths, options.mintime)
+ if not self.valid():
+ raise ParseError("empty state: '%s' does not contain a valid bootchart" % ", ".join(paths))
+
+ return
+
+ # Turn that parsed information into something more useful
+ # link processes into a tree of pointers, calculate statistics
+ self.compile(writer)
+
+ # Crop the chart to the end of the first idle period after the given
+ # process
+ if options.crop_after:
+ idle = self.crop (writer, options.crop_after)
+ else:
+ idle = None
+
+ # Annotate other times as the first start point of given process lists
+ self.times = [ idle ]
+ if options.annotate:
+ for procnames in options.annotate:
+ names = [x[:15] for x in procnames.split(",")]
+ for proc in self.ps_stats.process_map.values():
+ if proc.cmd in names:
+ self.times.append(proc.start_time)
+ break
+ else:
+ self.times.append(None)
+
+ self.proc_tree = ProcessTree(writer, self.kernel, self.ps_stats,
+ self.ps_stats.sample_period,
+ self.headers.get("profile.process"),
+ options.prune, idle, self.taskstats,
+ self.parent_map is not None)
+
+ if self.kernel is not None:
+ self.kernel_tree = ProcessTree(writer, self.kernel, None, 0,
+ self.headers.get("profile.process"),
+ False, None, None, True)
+
+ def valid(self):
+ return len(self.processes) != 0
+ return self.headers != None and self.disk_stats != None and \
+ self.ps_stats != None and self.cpu_stats != None
+
+
+ def compile(self, writer):
+
+ def find_parent_id_for(pid):
+ if pid is 0:
+ return 0
+ ppid = self.parent_map.get(pid)
+ if ppid:
+ # many of these double forks are so short lived
+ # that we have no samples, or process info for them
+ # so climb the parent hierarcy to find one
+ if int (ppid * 1000) not in self.ps_stats.process_map:
+# print "Pid '%d' short lived with no process" % ppid
+ ppid = find_parent_id_for (ppid)
+# else:
+# print "Pid '%d' has an entry" % ppid
+ else:
+# print "Pid '%d' missing from pid map" % pid
+ return 0
+ return ppid
+
+ # merge in the cmdline data
+ if self.cmdline is not None:
+ for proc in self.ps_stats.process_map.values():
+ rpid = int (proc.pid // 1000)
+ if rpid in self.cmdline:
+ cmd = self.cmdline[rpid]
+ proc.exe = cmd['exe']
+ proc.args = cmd['args']
+# else:
+# print "proc %d '%s' not in cmdline" % (rpid, proc.exe)
+
+ # re-parent any stray orphans if we can
+ if self.parent_map is not None:
+ for process in self.ps_stats.process_map.values():
+ ppid = find_parent_id_for (int(process.pid // 1000))
+ if ppid:
+ process.ppid = ppid * 1000
+
+ # stitch the tree together with pointers
+ for process in self.ps_stats.process_map.values():
+ process.set_parent (self.ps_stats.process_map)
+
+ # count on fingers variously
+ for process in self.ps_stats.process_map.values():
+ process.calc_stats (self.ps_stats.sample_period)
+
+ def crop(self, writer, crop_after):
+
+ def is_idle_at(util, start, j):
+ k = j + 1
+ while k < len(util) and util[k][0] < start + 300:
+ k += 1
+ k = min(k, len(util)-1)
+
+ if util[j][1] >= 0.25:
+ return False
+
+ avgload = sum(u[1] for u in util[j:k+1]) / (k-j+1)
+ if avgload < 0.25:
+ return True
+ else:
+ return False
+ def is_idle(util, start):
+ for j in range(0, len(util)):
+ if util[j][0] < start:
+ continue
+ return is_idle_at(util, start, j)
+ else:
+ return False
+
+ names = [x[:15] for x in crop_after.split(",")]
+ for proc in self.ps_stats.process_map.values():
+ if proc.cmd in names or proc.exe in names:
+ writer.info("selected proc '%s' from list (start %d)"
+ % (proc.cmd, proc.start_time))
+ break
+ if proc is None:
+ writer.warn("no selected crop proc '%s' in list" % crop_after)
+
+
+ cpu_util = [(sample.time, sample.user + sample.sys + sample.io) for sample in self.cpu_stats]
+ disk_util = [(sample.time, sample.util) for sample in self.disk_stats]
+
+ idle = None
+ for i in range(0, len(cpu_util)):
+ if cpu_util[i][0] < proc.start_time:
+ continue
+ if is_idle_at(cpu_util, cpu_util[i][0], i) \
+ and is_idle(disk_util, cpu_util[i][0]):
+ idle = cpu_util[i][0]
+ break
+
+ if idle is None:
+ writer.warn ("not idle after proc '%s'" % crop_after)
+ return None
+
+ crop_at = idle + 300
+ writer.info ("cropping at time %d" % crop_at)
+ while len (self.cpu_stats) \
+ and self.cpu_stats[-1].time > crop_at:
+ self.cpu_stats.pop()
+ while len (self.disk_stats) \
+ and self.disk_stats[-1].time > crop_at:
+ self.disk_stats.pop()
+
+ self.ps_stats.end_time = crop_at
+
+ cropped_map = {}
+ for key, value in self.ps_stats.process_map.items():
+ if (value.start_time <= crop_at):
+ cropped_map[key] = value
+
+ for proc in cropped_map.values():
+ proc.duration = min (proc.duration, crop_at - proc.start_time)
+ while len (proc.samples) \
+ and proc.samples[-1].time > crop_at:
+ proc.samples.pop()
+
+ self.ps_stats.process_map = cropped_map
+
+ return idle
+
-from samples import *
-from process_tree import ProcessTree
class ParseError(Exception):
- """Represents errors during parse of the bootchart."""
- def __init__(self, value):
- self.value = value
+ """Represents errors during parse of the bootchart."""
+ def __init__(self, value):
+ self.value = value
- def __str__(self):
- return self.value
+ def __str__(self):
+ return self.value
def _parse_headers(file):
- """Parses the headers of the bootchart."""
- def parse((headers,last), line):
- if '=' in line: last,value = map(string.strip, line.split('=', 1))
- else: value = line.strip()
- headers[last] += value
- return headers,last
- return reduce(parse, file.read().split('\n'), (defaultdict(str),''))[0]
+ """Parses the headers of the bootchart."""
+ def parse(acc, line):
+ (headers, last) = acc
+ if '=' in line:
+ last, value = map (lambda x: x.strip(), line.split('=', 1))
+ else:
+ value = line.strip()
+ headers[last] += value
+ return headers, last
+ return reduce(parse, file.read().decode('utf-8').split('\n'), (defaultdict(str),''))[0]
def _parse_timed_blocks(file):
- """Parses (ie., splits) a file into so-called timed-blocks. A
- timed-block consists of a timestamp on a line by itself followed
- by zero or more lines of data for that point in time."""
- def parse(block):
- lines = block.split('\n')
- if not lines:
- raise ParseError('expected a timed-block consisting a timestamp followed by data lines')
- try:
- return (int(lines[0]), lines[1:])
- except ValueError:
- raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0])
- blocks = file.read().split('\n\n')
- return [parse(block) for block in blocks if block.strip()]
-
-def _parse_proc_ps_log(file):
- """
- * See proc(5) for details.
- *
- * {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime,
- * cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack,
- * kstkesp, kstkeip}
- """
- processMap = {}
- ltime = 0
- timed_blocks = _parse_timed_blocks(file)
- for time, lines in timed_blocks:
- for line in lines:
- tokens = line.split(' ')
-
- offset = [index for index, token in enumerate(tokens[1:]) if token.endswith(')')][0]
- pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset])
- userCpu, sysCpu, stime= int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset])
-
- if processMap.has_key(pid):
- process = processMap[pid]
- process.cmd = cmd.replace('(', '').replace(')', '') # why rename after latest name??
- else:
- process = Process(pid, cmd, ppid, min(time, stime))
- processMap[pid] = process
-
- if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None:
- userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, time - ltime)
- cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0)
- process.samples.append(ProcessSample(time, state, cpuSample))
-
- process.last_user_cpu_time = userCpu
- process.last_sys_cpu_time = sysCpu
- ltime = time
-
- startTime = timed_blocks[0][0]
- avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1)
-
- for process in processMap.values():
- process.set_parent(processMap)
-
- for process in processMap.values():
- process.calc_stats(avgSampleLength)
-
- return ProcessStats(processMap.values(), avgSampleLength, startTime, ltime)
-
+ """Parses (ie., splits) a file into so-called timed-blocks. A
+ timed-block consists of a timestamp on a line by itself followed
+ by zero or more lines of data for that point in time."""
+ def parse(block):
+ lines = block.split('\n')
+ if not lines:
+ raise ParseError('expected a timed-block consisting a timestamp followed by data lines')
+ try:
+ return (int(lines[0]), lines[1:])
+ except ValueError:
+ raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0])
+ blocks = file.read().decode('utf-8').split('\n\n')
+ return [parse(block) for block in blocks if block.strip() and not block.endswith(' not running\n')]
+
+def _parse_proc_ps_log(writer, file):
+ """
+ * See proc(5) for details.
+ *
+ * {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime,
+ * cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack,
+ * kstkesp, kstkeip}
+ """
+ processMap = {}
+ ltime = 0
+ timed_blocks = _parse_timed_blocks(file)
+ for time, lines in timed_blocks:
+ for line in lines:
+ if not line: continue
+ tokens = line.split(' ')
+ if len(tokens) < 21:
+ continue
+
+ offset = [index for index, token in enumerate(tokens[1:]) if token[-1] == ')'][0]
+ pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset])
+ userCpu, sysCpu, stime = int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset])
+
+ # magic fixed point-ness ...
+ pid *= 1000
+ ppid *= 1000
+ if pid in processMap:
+ process = processMap[pid]
+ process.cmd = cmd.strip('()') # why rename after latest name??
+ else:
+ process = Process(writer, pid, cmd.strip('()'), ppid, min(time, stime))
+ processMap[pid] = process
+
+ if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None:
+ userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, max(1, time - ltime))
+ cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0)
+ process.samples.append(ProcessSample(time, state, cpuSample))
+
+ process.last_user_cpu_time = userCpu
+ process.last_sys_cpu_time = sysCpu
+ ltime = time
+
+ if len (timed_blocks) < 2:
+ return None
+
+ startTime = timed_blocks[0][0]
+ avgSampleLength = (ltime - startTime)/(len (timed_blocks) - 1)
+
+ return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime)
+
+def _parse_taskstats_log(writer, file):
+ """
+ * See bootchart-collector.c for details.
+ *
+ * { pid, ppid, comm, cpu_run_real_total, blkio_delay_total, swapin_delay_total }
+ *
+ """
+ processMap = {}
+ pidRewrites = {}
+ ltime = None
+ timed_blocks = _parse_timed_blocks(file)
+ for time, lines in timed_blocks:
+ # we have no 'stime' from taskstats, so prep 'init'
+ if ltime is None:
+ process = Process(writer, 1, '[init]', 0, 0)
+ processMap[1000] = process
+ ltime = time
+# continue
+ for line in lines:
+ if not line: continue
+ tokens = line.split(' ')
+ if len(tokens) != 6:
+ continue
+
+ opid, ppid, cmd = int(tokens[0]), int(tokens[1]), tokens[2]
+ cpu_ns, blkio_delay_ns, swapin_delay_ns = long(tokens[-3]), long(tokens[-2]), long(tokens[-1]),
+
+ # make space for trees of pids
+ opid *= 1000
+ ppid *= 1000
+
+ # when the process name changes, we re-write the pid.
+ if opid in pidRewrites:
+ pid = pidRewrites[opid]
+ else:
+ pid = opid
+
+ cmd = cmd.strip('(').strip(')')
+ if pid in processMap:
+ process = processMap[pid]
+ if process.cmd != cmd:
+ pid += 1
+ pidRewrites[opid] = pid
+# print "process mutation ! '%s' vs '%s' pid %s -> pid %s\n" % (process.cmd, cmd, opid, pid)
+ process = process.split (writer, pid, cmd, ppid, time)
+ processMap[pid] = process
+ else:
+ process.cmd = cmd;
+ else:
+ process = Process(writer, pid, cmd, ppid, time)
+ processMap[pid] = process
+
+ delta_cpu_ns = (float) (cpu_ns - process.last_cpu_ns)
+ delta_blkio_delay_ns = (float) (blkio_delay_ns - process.last_blkio_delay_ns)
+ delta_swapin_delay_ns = (float) (swapin_delay_ns - process.last_swapin_delay_ns)
+
+ # make up some state data ...
+ if delta_cpu_ns > 0:
+ state = "R"
+ elif delta_blkio_delay_ns + delta_swapin_delay_ns > 0:
+ state = "D"
+ else:
+ state = "S"
+
+ # retain the ns timing information into a CPUSample - that tries
+ # with the old-style to be a %age of CPU used in this time-slice.
+ if delta_cpu_ns + delta_blkio_delay_ns + delta_swapin_delay_ns > 0:
+# print "proc %s cpu_ns %g delta_cpu %g" % (cmd, cpu_ns, delta_cpu_ns)
+ cpuSample = CPUSample('null', delta_cpu_ns, 0.0,
+ delta_blkio_delay_ns,
+ delta_swapin_delay_ns)
+ process.samples.append(ProcessSample(time, state, cpuSample))
+
+ process.last_cpu_ns = cpu_ns
+ process.last_blkio_delay_ns = blkio_delay_ns
+ process.last_swapin_delay_ns = swapin_delay_ns
+ ltime = time
+
+ if len (timed_blocks) < 2:
+ return None
+
+ startTime = timed_blocks[0][0]
+ avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1)
+
+ return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime)
+
def _parse_proc_stat_log(file):
- samples = []
- ltimes = None
- for time, lines in _parse_timed_blocks(file):
- # CPU times {user, nice, system, idle, io_wait, irq, softirq}
- tokens = lines[0].split();
- times = [ int(token) for token in tokens[1:] ]
- if ltimes:
- user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1]))
- system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6]))
- idle = float(times[3] - ltimes[3])
- iowait = float(times[4] - ltimes[4])
-
- aSum = max(user + system + idle + iowait, 1)
- samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) )
-
- ltimes = times
- # skip the rest of statistics lines
- return samples
-
-
+ samples = []
+ ltimes = None
+ for time, lines in _parse_timed_blocks(file):
+ # skip emtpy lines
+ if not lines:
+ continue
+ # CPU times {user, nice, system, idle, io_wait, irq, softirq}
+ tokens = lines[0].split()
+ times = [ int(token) for token in tokens[1:] ]
+ if ltimes:
+ user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1]))
+ system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6]))
+ idle = float(times[3] - ltimes[3])
+ iowait = float(times[4] - ltimes[4])
+
+ aSum = max(user + system + idle + iowait, 1)
+ samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) )
+
+ ltimes = times
+ # skip the rest of statistics lines
+ return samples
+
def _parse_proc_disk_stat_log(file, numCpu):
- """
- Parse file for disk stats, but only look at the whole disks, eg. sda,
- not sda1, sda2 etc. The format of relevant lines should be:
- {major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq}
- """
- DISK_REGEX = 'hd.$|sd.$'
-
- def is_relevant_line(line):
- return len(line.split()) == 14 and re.match(DISK_REGEX, line.split()[2])
-
- disk_stat_samples = []
-
- for time, lines in _parse_timed_blocks(file):
- sample = DiskStatSample(time)
- relevant_tokens = [line.split() for line in lines if is_relevant_line(line)]
-
- for tokens in relevant_tokens:
- disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12])
- sample.add_diskdata([rsect, wsect, use])
-
- disk_stat_samples.append(sample)
-
- disk_stats = []
- for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]):
- interval = sample1.time - sample2.time
- sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ]
- readTput = sums[0] / 2.0 * 100.0 / interval
- writeTput = sums[1] / 2.0 * 100.0 / interval
- util = float( sums[2] ) / 10 / interval / numCpu
- util = max(0.0, min(1.0, util))
- disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util))
-
- return disk_stats
-
-
+ """
+ Parse file for disk stats, but only look at the whole device, eg. sda,
+ not sda1, sda2 etc. The format of relevant lines should be:
+ {major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq}
+ """
+ disk_regex_re = re.compile ('^([hsv]d.|mtdblock\d|mmcblk\d|cciss/c\d+d\d+.*)$')
+
+ # this gets called an awful lot.
+ def is_relevant_line(linetokens):
+ if len(linetokens) != 14:
+ return False
+ disk = linetokens[2]
+ return disk_regex_re.match(disk)
+
+ disk_stat_samples = []
+
+ for time, lines in _parse_timed_blocks(file):
+ sample = DiskStatSample(time)
+ relevant_tokens = [linetokens for linetokens in map (lambda x: x.split(),lines) if is_relevant_line(linetokens)]
+
+ for tokens in relevant_tokens:
+ disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12])
+ sample.add_diskdata([rsect, wsect, use])
+
+ disk_stat_samples.append(sample)
+
+ disk_stats = []
+ for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]):
+ interval = sample1.time - sample2.time
+ if interval == 0:
+ interval = 1
+ sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ]
+ readTput = sums[0] / 2.0 * 100.0 / interval
+ writeTput = sums[1] / 2.0 * 100.0 / interval
+ util = float( sums[2] ) / 10 / interval / numCpu
+ util = max(0.0, min(1.0, util))
+ disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util))
+
+ return disk_stats
+
+def _parse_proc_meminfo_log(file):
+ """
+ Parse file for global memory statistics.
+ The format of relevant lines should be: ^key: value( unit)?
+ """
+ used_values = ('MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree',)
+
+ mem_stats = []
+ meminfo_re = re.compile(r'([^ \t:]+):\s*(\d+).*')
+
+ for time, lines in _parse_timed_blocks(file):
+ sample = MemSample(time)
+
+ for line in lines:
+ match = meminfo_re.match(line)
+ if not match:
+ raise ParseError("Invalid meminfo line \"%s\"" % match.groups(0))
+ sample.add_value(match.group(1), int(match.group(2)))
+
+ if sample.valid():
+ mem_stats.append(sample)
+
+ return mem_stats
+
+# if we boot the kernel with: initcall_debug printk.time=1 we can
+# get all manner of interesting data from the dmesg output
+# We turn this into a pseudo-process tree: each event is
+# characterised by a
+# we don't try to detect a "kernel finished" state - since the kernel
+# continues to do interesting things after init is called.
+#
+# sample input:
+# [ 0.000000] ACPI: FACP 3f4fc000 000F4 (v04 INTEL Napa 00000001 MSFT 01000013)
+# ...
+# [ 0.039993] calling migration_init+0x0/0x6b @ 1
+# [ 0.039993] initcall migration_init+0x0/0x6b returned 1 after 0 usecs
+def _parse_dmesg(writer, file):
+ timestamp_re = re.compile ("^\[\s*(\d+\.\d+)\s*]\s+(.*)$")
+ split_re = re.compile ("^(\S+)\s+([\S\+_-]+) (.*)$")
+ processMap = {}
+ idx = 0
+ inc = 1.0 / 1000000
+ kernel = Process(writer, idx, "k-boot", 0, 0.1)
+ processMap['k-boot'] = kernel
+ base_ts = False
+ max_ts = 0
+ for line in file.read().decode('utf-8').split('\n'):
+ t = timestamp_re.match (line)
+ if t is None:
+# print "duff timestamp " + line
+ continue
+
+ time_ms = float (t.group(1)) * 1000
+ # looks like we may have a huge diff after the clock
+ # has been set up. This could lead to huge graph:
+ # so huge we will be killed by the OOM.
+ # So instead of using the plain timestamp we will
+ # use a delta to first one and skip the first one
+ # for convenience
+ if max_ts == 0 and not base_ts and time_ms > 1000:
+ base_ts = time_ms
+ continue
+ max_ts = max(time_ms, max_ts)
+ if base_ts:
+# print "fscked clock: used %f instead of %f" % (time_ms - base_ts, time_ms)
+ time_ms -= base_ts
+ m = split_re.match (t.group(2))
+
+ if m is None:
+ continue
+# print "match: '%s'" % (m.group(1))
+ type = m.group(1)
+ func = m.group(2)
+ rest = m.group(3)
+
+ if t.group(2).startswith ('Write protecting the') or \
+ t.group(2).startswith ('Freeing unused kernel memory'):
+ kernel.duration = time_ms / 10
+ continue
+
+# print "foo: '%s' '%s' '%s'" % (type, func, rest)
+ if type == "calling":
+ ppid = kernel.pid
+ p = re.match ("\@ (\d+)", rest)
+ if p is not None:
+ ppid = float (p.group(1)) // 1000
+# print "match: '%s' ('%g') at '%s'" % (func, ppid, time_ms)
+ name = func.split ('+', 1) [0]
+ idx += inc
+ processMap[func] = Process(writer, ppid + idx, name, ppid, time_ms / 10)
+ elif type == "initcall":
+# print "finished: '%s' at '%s'" % (func, time_ms)
+ if func in processMap:
+ process = processMap[func]
+ process.duration = (time_ms / 10) - process.start_time
+ else:
+ print("corrupted init call for %s" % (func))
+
+ elif type == "async_waiting" or type == "async_continuing":
+ continue # ignore
+
+ return processMap.values()
+
+#
+# Parse binary pacct accounting file output if we have one
+# cf. /usr/include/linux/acct.h
+#
+def _parse_pacct(writer, file):
+ # read LE int32
+ def _read_le_int32(file):
+ byts = file.read(4)
+ return (ord(byts[0])) | (ord(byts[1]) << 8) | \
+ (ord(byts[2]) << 16) | (ord(byts[3]) << 24)
+
+ parent_map = {}
+ parent_map[0] = 0
+ while file.read(1) != "": # ignore flags
+ ver = file.read(1)
+ if ord(ver) < 3:
+ print("Invalid version 0x%x" % (ord(ver)))
+ return None
+
+ file.seek (14, 1) # user, group etc.
+ pid = _read_le_int32 (file)
+ ppid = _read_le_int32 (file)
+# print "Parent of %d is %d" % (pid, ppid)
+ parent_map[pid] = ppid
+ file.seek (4 + 4 + 16, 1) # timings
+ file.seek (16, 1) # acct_comm
+ return parent_map
+
+def _parse_paternity_log(writer, file):
+ parent_map = {}
+ parent_map[0] = 0
+ for line in file.read().decode('utf-8').split('\n'):
+ if not line:
+ continue
+ elems = line.split(' ') # <Child> <Parent>
+ if len (elems) >= 2:
+# print "paternity of %d is %d" % (int(elems[0]), int(elems[1]))
+ parent_map[int(elems[0])] = int(elems[1])
+ else:
+ print("Odd paternity line '%s'" % (line))
+ return parent_map
+
+def _parse_cmdline_log(writer, file):
+ cmdLines = {}
+ for block in file.read().decode('utf-8').split('\n\n'):
+ lines = block.split('\n')
+ if len (lines) >= 3:
+# print "Lines '%s'" % (lines[0])
+ pid = int (lines[0])
+ values = {}
+ values['exe'] = lines[1].lstrip(':')
+ args = lines[2].lstrip(':').split('\0')
+ args.pop()
+ values['args'] = args
+ cmdLines[pid] = values
+ return cmdLines
+
def get_num_cpus(headers):
"""Get the number of CPUs from the system.cpu header property. As the
CPU utilization graphs are relative, the number of CPUs currently makes
no difference."""
if headers is None:
return 1
+ if headers.get("system.cpu.num"):
+ return max (int (headers.get("system.cpu.num")), 1)
cpu_model = headers.get("system.cpu")
if cpu_model is None:
return 1
mat = re.match(".*\\((\\d+)\\)", cpu_model)
if mat is None:
return 1
- return int(mat.group(1))
-
-class ParserState:
- def __init__(self):
- self.processes = {}
- self.start = {}
- self.end = {}
+ return max (int(mat.group(1)), 1)
- def valid(self):
- return len(self.processes) != 0
-
-
-_relevant_files = set(["header", "proc_diskstats.log", "proc_ps.log", "proc_stat.log"])
-
-def _do_parse(state, filename, file, mintime):
- #print filename
- #writer.status("parsing '%s'" % filename)
+def _do_parse(writer, state, filename, file, mintime):
+ writer.info("parsing '%s'" % filename)
+ t1 = clock()
paths = filename.split("/")
task = paths[-1]
pn = paths[-2]
@@ -194,44 +654,49 @@ def _do_parse(state, filename, file, mintime):
state.end[end] = []
if k not in state.end[end]:
state.end[end].append(pn + ":" + task)
+ t2 = clock()
+ writer.info(" %s seconds" % str(t2-t1))
return state
-def parse_file(state, filename, mintime):
+def parse_file(writer, state, filename, mintime):
+ if state.filename is None:
+ state.filename = filename
basename = os.path.basename(filename)
with open(filename, "rb") as file:
- return _do_parse(state, filename, file, mintime)
+ return _do_parse(writer, state, filename, file, mintime)
-def parse_paths(state, paths, mintime):
+def parse_paths(writer, state, paths, mintime):
for path in paths:
- root,extension = os.path.splitext(path)
+ if state.filename is None:
+ state.filename = path
+ root, extension = os.path.splitext(path)
if not(os.path.exists(path)):
- print "warning: path '%s' does not exist, ignoring." % path
+ writer.warn("warning: path '%s' does not exist, ignoring." % path)
continue
+ #state.filename = path
if os.path.isdir(path):
files = [ f for f in [os.path.join(path, f) for f in os.listdir(path)] ]
files.sort()
- state = parse_paths(state, files, mintime)
- elif extension in [".tar", ".tgz", ".tar.gz"]:
+ state = parse_paths(writer, state, files, mintime)
+ elif extension in [".tar", ".tgz", ".gz"]:
+ if extension == ".gz":
+ root, extension = os.path.splitext(root)
+ if extension != ".tar":
+ writer.warn("warning: can only handle zipped tar files, not zipped '%s'-files; ignoring" % extension)
+ continue
tf = None
try:
+ writer.status("parsing '%s'" % path)
tf = tarfile.open(path, 'r:*')
for name in tf.getnames():
- state = _do_parse(state, name, tf.extractfile(name))
- except tarfile.ReadError, error:
+ state = _do_parse(writer, state, name, tf.extractfile(name))
+ except tarfile.ReadError as error:
raise ParseError("error: could not read tarfile '%s': %s." % (path, error))
finally:
if tf != None:
tf.close()
else:
- state = parse_file(state, path, mintime)
- return state
-
-def parse(paths, prune, mintime):
- state = parse_paths(ParserState(), paths, mintime)
- if not state.valid():
- raise ParseError("empty state: '%s' does not contain a valid bootchart" % ", ".join(paths))
- #monitored_app = state.headers.get("profile.process")
- #proc_tree = ProcessTree(state.ps_stats, monitored_app, prune)
+ state = parse_file(writer, state, path, mintime)
return state
def split_res(res, n):