aboutsummaryrefslogtreecommitdiffstats
path: root/bitbake
diff options
context:
space:
mode:
authorPaul Eggleton <paul.eggleton@linux.intel.com>2012-05-23 00:23:31 +0100
committerRichard Purdie <richard.purdie@linuxfoundation.org>2012-05-23 11:33:18 +0100
commitd7b818b51f3e6dded0c0885cdfed5a24cda3b428 (patch)
tree6cd18f70cb71682aad55a6079da32c25f60bcbf7 /bitbake
parent644b30adfb8fb158a253712033f717aadf6f2c68 (diff)
downloadopenembedded-core-contrib-d7b818b51f3e6dded0c0885cdfed5a24cda3b428.tar.gz
bitbake: refactor out codeparser cache into a separate class
We want to be able to reuse most this functionality for the file checksum cache. (Bitbake rev: 0fe3cb1438d297f90dd0fc6b26362ecbff75c76d) Signed-off-by: Paul Eggleton <paul.eggleton@linux.intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake')
-rw-r--r--bitbake/lib/bb/cache.py116
-rw-r--r--bitbake/lib/bb/codeparser.py191
2 files changed, 171 insertions, 136 deletions
diff --git a/bitbake/lib/bb/cache.py b/bitbake/lib/bb/cache.py
index 47e814b577..36e6356f51 100644
--- a/bitbake/lib/bb/cache.py
+++ b/bitbake/lib/bb/cache.py
@@ -1,11 +1,12 @@
# ex:ts=4:sw=4:sts=4:et
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
#
-# BitBake 'Event' implementation
+# BitBake Cache implementation
#
# Caching of bitbake variables before task execution
# Copyright (C) 2006 Richard Purdie
+# Copyright (C) 2012 Intel Corporation
# but small sections based on code from bin/bitbake:
# Copyright (C) 2003, 2004 Chris Larson
@@ -703,4 +704,115 @@ class CacheData(object):
for info in info_array:
info.add_cacheData(self, fn)
-
+
+class MultiProcessCache(object):
+ """
+ BitBake multi-process cache implementation
+
+ Used by the codeparser & file checksum caches
+ """
+
+ def __init__(self):
+ self.cachefile = None
+ self.cachedata = self.create_cachedata()
+ self.cachedata_extras = self.create_cachedata()
+
+ def init_cache(self, d):
+ cachedir = (d.getVar("PERSISTENT_DIR", True) or
+ d.getVar("CACHE", True))
+ if cachedir in [None, '']:
+ return
+ bb.utils.mkdirhier(cachedir)
+ self.cachefile = os.path.join(cachedir, self.__class__.cache_file_name)
+ logger.debug(1, "Using cache in '%s'", self.cachefile)
+
+ try:
+ p = pickle.Unpickler(file(self.cachefile, "rb"))
+ data, version = p.load()
+ except:
+ return
+
+ if version != self.__class__.CACHE_VERSION:
+ return
+
+ self.cachedata = data
+
+ def internSet(self, items):
+ new = set()
+ for i in items:
+ new.add(intern(i))
+ return new
+
+ def compress_keys(self, data):
+ # Override in subclasses if desired
+ return
+
+ def create_cachedata(self):
+ data = [{}]
+ return data
+
+ def save_extras(self, d):
+ if not self.cachefile:
+ return
+
+ glf = bb.utils.lockfile(self.cachefile + ".lock", shared=True)
+
+ i = os.getpid()
+ lf = None
+ while not lf:
+ lf = bb.utils.lockfile(self.cachefile + ".lock." + str(i), retry=False)
+ if not lf or os.path.exists(self.cachefile + "-" + str(i)):
+ if lf:
+ bb.utils.unlockfile(lf)
+ lf = None
+ i = i + 1
+ continue
+
+ p = pickle.Pickler(file(self.cachefile + "-" + str(i), "wb"), -1)
+ p.dump([self.cachedata_extras, self.__class__.CACHE_VERSION])
+
+ bb.utils.unlockfile(lf)
+ bb.utils.unlockfile(glf)
+
+ def merge_data(self, source, dest):
+ for j in range(0,len(dest)):
+ for h in source[j]:
+ if h not in dest[j]:
+ dest[j][h] = source[j][h]
+
+ def save_merge(self, d):
+ if not self.cachefile:
+ return
+
+ glf = bb.utils.lockfile(self.cachefile + ".lock")
+
+ try:
+ p = pickle.Unpickler(file(self.cachefile, "rb"))
+ data, version = p.load()
+ except (IOError, EOFError):
+ data, version = None, None
+
+ if version != self.__class__.CACHE_VERSION:
+ data = self.create_cachedata()
+
+ for f in [y for y in os.listdir(os.path.dirname(self.cachefile)) if y.startswith(os.path.basename(self.cachefile) + '-')]:
+ f = os.path.join(os.path.dirname(self.cachefile), f)
+ try:
+ p = pickle.Unpickler(file(f, "rb"))
+ extradata, version = p.load()
+ except (IOError, EOFError):
+ extradata, version = self.create_cachedata(), None
+
+ if version != self.__class__.CACHE_VERSION:
+ continue
+
+ self.merge_data(extradata, data)
+ os.unlink(f)
+
+ self.compress_keys(data)
+
+ p = pickle.Pickler(file(self.cachefile, "wb"), -1)
+ p.dump([data, self.__class__.CACHE_VERSION])
+
+ bb.utils.unlockfile(glf)
+
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py
index af2e19411c..d7d3f513d9 100644
--- a/bitbake/lib/bb/codeparser.py
+++ b/bitbake/lib/bb/codeparser.py
@@ -5,10 +5,10 @@ import os.path
import bb.utils, bb.data
from itertools import chain
from pysh import pyshyacc, pyshlex, sherrors
+from bb.cache import MultiProcessCache
logger = logging.getLogger('BitBake.CodeParser')
-PARSERCACHE_VERSION = 2
try:
import cPickle as pickle
@@ -32,133 +32,56 @@ def check_indent(codestr):
return codestr
-pythonparsecache = {}
-shellparsecache = {}
-pythonparsecacheextras = {}
-shellparsecacheextras = {}
-
-def parser_cachefile(d):
- cachedir = (d.getVar("PERSISTENT_DIR", True) or
- d.getVar("CACHE", True))
- if cachedir in [None, '']:
- return None
- bb.utils.mkdirhier(cachedir)
- cachefile = os.path.join(cachedir, "bb_codeparser.dat")
- logger.debug(1, "Using cache in '%s' for codeparser cache", cachefile)
- return cachefile
-
-def parser_cache_init(d):
- global pythonparsecache
- global shellparsecache
-
- cachefile = parser_cachefile(d)
- if not cachefile:
+class CodeParserCache(MultiProcessCache):
+ cache_file_name = "bb_codeparser.dat"
+ CACHE_VERSION = 2
+
+ def __init__(self):
+ MultiProcessCache.__init__(self)
+ self.pythoncache = self.cachedata[0]
+ self.shellcache = self.cachedata[1]
+ self.pythoncacheextras = self.cachedata_extras[0]
+ self.shellcacheextras = self.cachedata_extras[1]
+
+ def init_cache(self, d):
+ MultiProcessCache.init_cache(self, d)
+
+ # cachedata gets re-assigned in the parent
+ self.pythoncache = self.cachedata[0]
+ self.shellcache = self.cachedata[1]
+
+ def compress_keys(self, data):
+ # When the dicts are originally created, python calls intern() on the set keys
+ # which significantly improves memory usage. Sadly the pickle/unpickle process
+ # doesn't call intern() on the keys and results in the same strings being duplicated
+ # in memory. This also means pickle will save the same string multiple times in
+ # the cache file. By interning the data here, the cache file shrinks dramatically
+ # meaning faster load times and the reloaded cache files also consume much less
+ # memory. This is worth any performance hit from this loops and the use of the
+ # intern() data storage.
+ # Python 3.x may behave better in this area
+ for h in data[0]:
+ data[0][h]["refs"] = self.internSet(data[0][h]["refs"])
+ data[0][h]["execs"] = self.internSet(data[0][h]["execs"])
+ for h in data[1]:
+ data[1][h]["execs"] = self.internSet(data[1][h]["execs"])
return
- try:
- p = pickle.Unpickler(file(cachefile, "rb"))
- data, version = p.load()
- except:
- return
+ def create_cachedata(self):
+ data = [{}, {}]
+ return data
- if version != PARSERCACHE_VERSION:
- return
+codeparsercache = CodeParserCache()
- pythonparsecache = data[0]
- shellparsecache = data[1]
+def parser_cache_init(d):
+ codeparsercache.init_cache(d)
def parser_cache_save(d):
- cachefile = parser_cachefile(d)
- if not cachefile:
- return
-
- glf = bb.utils.lockfile(cachefile + ".lock", shared=True)
-
- i = os.getpid()
- lf = None
- while not lf:
- shellcache = {}
- pythoncache = {}
-
- lf = bb.utils.lockfile(cachefile + ".lock." + str(i), retry=False)
- if not lf or os.path.exists(cachefile + "-" + str(i)):
- if lf:
- bb.utils.unlockfile(lf)
- lf = None
- i = i + 1
- continue
-
- shellcache = shellparsecacheextras
- pythoncache = pythonparsecacheextras
-
- p = pickle.Pickler(file(cachefile + "-" + str(i), "wb"), -1)
- p.dump([[pythoncache, shellcache], PARSERCACHE_VERSION])
-
- bb.utils.unlockfile(lf)
- bb.utils.unlockfile(glf)
-
-def internSet(items):
- new = set()
- for i in items:
- new.add(intern(i))
- return new
+ codeparsercache.save_extras(d)
def parser_cache_savemerge(d):
- cachefile = parser_cachefile(d)
- if not cachefile:
- return
-
- glf = bb.utils.lockfile(cachefile + ".lock")
-
- try:
- p = pickle.Unpickler(file(cachefile, "rb"))
- data, version = p.load()
- except (IOError, EOFError):
- data, version = None, None
-
- if version != PARSERCACHE_VERSION:
- data = [{}, {}]
-
- for f in [y for y in os.listdir(os.path.dirname(cachefile)) if y.startswith(os.path.basename(cachefile) + '-')]:
- f = os.path.join(os.path.dirname(cachefile), f)
- try:
- p = pickle.Unpickler(file(f, "rb"))
- extradata, version = p.load()
- except (IOError, EOFError):
- extradata, version = [{}, {}], None
-
- if version != PARSERCACHE_VERSION:
- continue
-
- for h in extradata[0]:
- if h not in data[0]:
- data[0][h] = extradata[0][h]
- for h in extradata[1]:
- if h not in data[1]:
- data[1][h] = extradata[1][h]
- os.unlink(f)
-
- # When the dicts are originally created, python calls intern() on the set keys
- # which significantly improves memory usage. Sadly the pickle/unpickle process
- # doesn't call intern() on the keys and results in the same strings being duplicated
- # in memory. This also means pickle will save the same string multiple times in
- # the cache file. By interning the data here, the cache file shrinks dramatically
- # meaning faster load times and the reloaded cache files also consume much less
- # memory. This is worth any performance hit from this loops and the use of the
- # intern() data storage.
- # Python 3.x may behave better in this area
- for h in data[0]:
- data[0][h]["refs"] = internSet(data[0][h]["refs"])
- data[0][h]["execs"] = internSet(data[0][h]["execs"])
- for h in data[1]:
- data[1][h]["execs"] = internSet(data[1][h]["execs"])
-
- p = pickle.Pickler(file(cachefile, "wb"), -1)
- p.dump([data, PARSERCACHE_VERSION])
-
- bb.utils.unlockfile(glf)
-
+ codeparsercache.save_merge(d)
Logger = logging.getLoggerClass()
class BufferedLogger(Logger):
@@ -235,14 +158,14 @@ class PythonParser():
def parse_python(self, node):
h = hash(str(node))
- if h in pythonparsecache:
- self.references = pythonparsecache[h]["refs"]
- self.execs = pythonparsecache[h]["execs"]
+ if h in codeparsercache.pythoncache:
+ self.references = codeparsercache.pythoncache[h]["refs"]
+ self.execs = codeparsercache.pythoncache[h]["execs"]
return
- if h in pythonparsecacheextras:
- self.references = pythonparsecacheextras[h]["refs"]
- self.execs = pythonparsecacheextras[h]["execs"]
+ if h in codeparsercache.pythoncacheextras:
+ self.references = codeparsercache.pythoncacheextras[h]["refs"]
+ self.execs = codeparsercache.pythoncacheextras[h]["execs"]
return
@@ -256,9 +179,9 @@ class PythonParser():
self.references.update(self.var_references)
self.references.update(self.var_execs)
- pythonparsecacheextras[h] = {}
- pythonparsecacheextras[h]["refs"] = self.references
- pythonparsecacheextras[h]["execs"] = self.execs
+ codeparsercache.pythoncacheextras[h] = {}
+ codeparsercache.pythoncacheextras[h]["refs"] = self.references
+ codeparsercache.pythoncacheextras[h]["execs"] = self.execs
class ShellParser():
def __init__(self, name, log):
@@ -276,12 +199,12 @@ class ShellParser():
h = hash(str(value))
- if h in shellparsecache:
- self.execs = shellparsecache[h]["execs"]
+ if h in codeparsercache.shellcache:
+ self.execs = codeparsercache.shellcache[h]["execs"]
return self.execs
- if h in shellparsecacheextras:
- self.execs = shellparsecacheextras[h]["execs"]
+ if h in codeparsercache.shellcacheextras:
+ self.execs = codeparsercache.shellcacheextras[h]["execs"]
return self.execs
try:
@@ -293,8 +216,8 @@ class ShellParser():
self.process_tokens(token)
self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
- shellparsecacheextras[h] = {}
- shellparsecacheextras[h]["execs"] = self.execs
+ codeparsercache.shellcacheextras[h] = {}
+ codeparsercache.shellcacheextras[h]["execs"] = self.execs
return self.execs