summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrey Bondarenko <abone27@mail.ru>2014-11-05 01:25:10 +0500
committerArmin Kuster <akuster808@gmail.com>2019-05-03 18:33:14 -0700
commit404bbb2a0fc6442dbe277e877de64f4ee0c4eebb (patch)
treef66d04d4943ae4fa71b55c958a70b34c319592b9
parent6d6baac7c6ccfd72e53e9c5a5c8de528dcc3d842 (diff)
downloadopenembedded-core-contrib-404bbb2a0fc6442dbe277e877de64f4ee0c4eebb.tar.gz
pybootchartgui: _parse_proc_ps_log rewrite with iterator
Iterators use much less memory, so larger bootcharts may be processed without triggering OOM killer and massive swapping. On a (big) 11MB tarball this will have a performance penalty of about ~10% but consuming half the memory. Before: 23.50user 1.20system 0:24.97elapsed 98%CPU (0avgtext+0avgdata 770048maxresident)k After: 26.78user 0.44system 0:27.24elapsed 99%CPU (0avgtext+0avgdata 321192maxresident)k Signed-off-by: Armin Kuster <akuster808@gmail.com>
-rw-r--r--scripts/pybootchartgui/pybootchartgui/parsing.py46
1 files changed, 40 insertions, 6 deletions
diff --git a/scripts/pybootchartgui/pybootchartgui/parsing.py b/scripts/pybootchartgui/pybootchartgui/parsing.py
index 0b5063b4f3..97d28724a4 100644
--- a/scripts/pybootchartgui/pybootchartgui/parsing.py
+++ b/scripts/pybootchartgui/pybootchartgui/parsing.py
@@ -13,6 +13,9 @@
# You should have received a copy of the GNU General Public License
# along with pybootchartgui. If not, see <http://www.gnu.org/licenses/>.
+
+import codecs
+import itertools
import os
import string
import re
@@ -269,6 +272,30 @@ def _parse_headers(file):
return headers, last
return reduce(parse, file.read().decode('utf-8').split('\n'), (defaultdict(str),''))[0]
+def _iter_parse_timed_blocks(file):
+ """Parses (ie., splits) a file into so-called timed-blocks.
+
+ A timed-block consists of a timestamp on a line by itself followed
+ by zero or more lines of data for that point in time.
+
+ Return an iterator over timed blocks, so there is no need to keep
+ all the data in memory.
+ """
+ def parse(block):
+ lines = block
+ if not lines:
+ raise ParseError('expected a timed-block consisting a timestamp followed by data lines')
+ try:
+ return (int(lines[0]), lines[1:])
+ except ValueError:
+ raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0])
+ data = codecs.iterdecode(file, "utf-8")
+ block = [line.strip() for line in itertools.takewhile(lambda s: s != "\n", data)]
+ while block:
+ if block and not block[-1].endswith(" not running\n"):
+ yield parse(block)
+ block = [line.strip() for line in itertools.takewhile(lambda s: s != "\n", data)]
+
def _parse_timed_blocks(file):
"""Parses (ie., splits) a file into so-called timed-blocks. A
timed-block consists of a timestamp on a line by itself followed
@@ -292,10 +319,18 @@ def _parse_proc_ps_log(writer, file):
* cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack,
* kstkesp, kstkeip}
"""
+ timed_blocks = _iter_parse_timed_blocks(file)
+ try:
+ first_timed_block = next(timed_blocks)
+ startTime = first_timed_block[0]
+ except StopIteration:
+ return None
+
processMap = {}
ltime = 0
- timed_blocks = _parse_timed_blocks(file)
- for time, lines in timed_blocks:
+ timed_blocks_count = 0
+ for time, lines in itertools.chain((first_timed_block,), timed_blocks):
+ timed_blocks_count += 1
for line in lines:
if not line: continue
tokens = line.split(' ')
@@ -325,13 +360,12 @@ def _parse_proc_ps_log(writer, file):
process.last_sys_cpu_time = sysCpu
ltime = time
- if len (timed_blocks) < 2:
+ if timed_blocks_count < 2:
return None
- startTime = timed_blocks[0][0]
- avgSampleLength = (ltime - startTime)/(len (timed_blocks) - 1)
+ avgSampleLength = (ltime - startTime)/(timed_blocks_count - 1)
- return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime)
+ return ProcessStats (writer, processMap, timed_blocks_count, avgSampleLength, startTime, ltime)
def _parse_taskstats_log(writer, file):
"""