diff options
author | Richard Purdie <richard.purdie@linuxfoundation.org> | 2012-03-11 14:30:31 +0000 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2012-03-12 02:22:34 +0000 |
commit | 2d56dc7b1f0d186e14c4c8a949b280b6b3fc31de (patch) | |
tree | 7747426c25a0aee27f24a06af1c793cc920160d4 /lib/bb | |
parent | cdd5d0dee6ab12326b252b6b505a316a52638cac (diff) | |
download | bitbake-contrib-2d56dc7b1f0d186e14c4c8a949b280b6b3fc31de.tar.gz |
codeparser: Call intern over the set contents for better cache performance
See the comment in the code in the commit for more information.
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'lib/bb')
-rw-r--r-- | lib/bb/codeparser.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/lib/bb/codeparser.py b/lib/bb/codeparser.py index 04a34f944..af2e19411 100644 --- a/lib/bb/codeparser.py +++ b/lib/bb/codeparser.py @@ -98,6 +98,12 @@ def parser_cache_save(d): bb.utils.unlockfile(lf) bb.utils.unlockfile(glf) +def internSet(items): + new = set() + for i in items: + new.add(intern(i)) + return new + def parser_cache_savemerge(d): cachefile = parser_cachefile(d) if not cachefile: @@ -133,6 +139,21 @@ def parser_cache_savemerge(d): data[1][h] = extradata[1][h] os.unlink(f) + # When the dicts are originally created, python calls intern() on the set keys + # which significantly improves memory usage. Sadly the pickle/unpickle process + # doesn't call intern() on the keys and results in the same strings being duplicated + # in memory. This also means pickle will save the same string multiple times in + # the cache file. By interning the data here, the cache file shrinks dramatically + # meaning faster load times and the reloaded cache files also consume much less + # memory. This is worth any performance hit from this loops and the use of the + # intern() data storage. + # Python 3.x may behave better in this area + for h in data[0]: + data[0][h]["refs"] = internSet(data[0][h]["refs"]) + data[0][h]["execs"] = internSet(data[0][h]["execs"]) + for h in data[1]: + data[1][h]["execs"] = internSet(data[1][h]["execs"]) + p = pickle.Pickler(file(cachefile, "wb"), -1) p.dump([data, PARSERCACHE_VERSION]) |