aboutsummaryrefslogtreecommitdiffstats
path: root/lib/bb/codeparser.py
diff options
context:
space:
mode:
authorRichard Purdie <richard.purdie@linuxfoundation.org>2012-03-11 14:30:31 +0000
committerRichard Purdie <richard.purdie@linuxfoundation.org>2012-03-12 02:22:34 +0000
commit2d56dc7b1f0d186e14c4c8a949b280b6b3fc31de (patch)
tree7747426c25a0aee27f24a06af1c793cc920160d4 /lib/bb/codeparser.py
parentcdd5d0dee6ab12326b252b6b505a316a52638cac (diff)
downloadbitbake-2d56dc7b1f0d186e14c4c8a949b280b6b3fc31de.tar.gz
codeparser: Call intern over the set contents for better cache performance
See the comment in the code in the commit for more information. Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'lib/bb/codeparser.py')
-rw-r--r--lib/bb/codeparser.py21
1 files changed, 21 insertions, 0 deletions
diff --git a/lib/bb/codeparser.py b/lib/bb/codeparser.py
index 04a34f944..af2e19411 100644
--- a/lib/bb/codeparser.py
+++ b/lib/bb/codeparser.py
@@ -98,6 +98,12 @@ def parser_cache_save(d):
bb.utils.unlockfile(lf)
bb.utils.unlockfile(glf)
+def internSet(items):
+ new = set()
+ for i in items:
+ new.add(intern(i))
+ return new
+
def parser_cache_savemerge(d):
cachefile = parser_cachefile(d)
if not cachefile:
@@ -133,6 +139,21 @@ def parser_cache_savemerge(d):
data[1][h] = extradata[1][h]
os.unlink(f)
+ # When the dicts are originally created, python calls intern() on the set keys
+ # which significantly improves memory usage. Sadly the pickle/unpickle process
+ # doesn't call intern() on the keys and results in the same strings being duplicated
+ # in memory. This also means pickle will save the same string multiple times in
+ # the cache file. By interning the data here, the cache file shrinks dramatically
+ # meaning faster load times and the reloaded cache files also consume much less
+ # memory. This is worth any performance hit from this loops and the use of the
+ # intern() data storage.
+ # Python 3.x may behave better in this area
+ for h in data[0]:
+ data[0][h]["refs"] = internSet(data[0][h]["refs"])
+ data[0][h]["execs"] = internSet(data[0][h]["execs"])
+ for h in data[1]:
+ data[1][h]["execs"] = internSet(data[1][h]["execs"])
+
p = pickle.Pickler(file(cachefile, "wb"), -1)
p.dump([data, PARSERCACHE_VERSION])