aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Purdie <richard.purdie@linuxfoundation.org>2019-12-12 18:07:14 +0000
committerArmin Kuster <akuster808@gmail.com>2020-02-08 13:29:15 -0800
commit8f4f6c2f9acab23bc795ffe389c4cd74711d10ff (patch)
treeb83a7c3e95bfe753ba23e7e1315955182201e23f
parentc131015f1ac152f1fea4b83a3d451c3e4d05ebec (diff)
downloadbitbake-contrib-8f4f6c2f9acab23bc795ffe389c4cd74711d10ff.tar.gz
siggen: Optimise get_unihash disk based cache handling
Currently the cache can grow huge since any previously used hash is retained in the cache. This change moves to use one hash per task which improves the speed of the functions considerably. Currently performance is an issue, as are very large cache files and cache load time. By moving to a single hash per task, the shorted filename as a key is no longer usable as the same recipe has multiple variants for the same filename so this has to change. Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org> (cherry picked from commit ed764e7fcf04b6d0ba6b4cac7415b1ee8f492865) Signed-off-by: Armin Kuster <akuster808@gmail.com>
-rw-r--r--lib/bb/siggen.py50
1 files changed, 32 insertions, 18 deletions
diff --git a/lib/bb/siggen.py b/lib/bb/siggen.py
index ded1da020..209a34288 100644
--- a/lib/bb/siggen.py
+++ b/lib/bb/siggen.py
@@ -44,6 +44,7 @@ class SignatureGenerator(object):
self.file_checksum_values = {}
self.taints = {}
self.unitaskhashes = {}
+ self.tidtopn = {}
self.setscenetasks = set()
def finalise(self, fn, d, varient):
@@ -79,19 +80,19 @@ class SignatureGenerator(object):
return
def get_taskdata(self):
- return (self.runtaskdeps, self.taskhash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.setscenetasks)
+ return (self.runtaskdeps, self.taskhash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
def set_taskdata(self, data):
- self.runtaskdeps, self.taskhash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.setscenetasks = data
+ self.runtaskdeps, self.taskhash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
def reset(self, data):
self.__init__(data)
def get_taskhashes(self):
- return self.taskhash, self.unitaskhashes
+ return self.taskhash, self.unitaskhashes, self.tidtopn
def set_taskhashes(self, hashes):
- self.taskhash, self.unitaskhashes = hashes
+ self.taskhash, self.unitaskhashes, self.tidtopn = hashes
def save_unitaskhashes(self):
return
@@ -124,9 +125,10 @@ class SignatureGeneratorBasic(SignatureGenerator):
else:
self.checksum_cache = None
- self.unihash_cache = bb.cache.SimpleCache("1")
+ self.unihash_cache = bb.cache.SimpleCache("3")
self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
+ self.tidtopn = {}
def init_rundepcheck(self, data):
self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None
@@ -210,6 +212,9 @@ class SignatureGeneratorBasic(SignatureGenerator):
self.runtaskdeps[tid] = []
self.file_checksum_values[tid] = []
recipename = dataCache.pkg_fn[fn]
+
+ self.tidtopn[tid] = recipename
+
for dep in sorted(deps, key=clean_basepath):
(depmc, _, deptaskname, depfn) = bb.runqueue.split_tid_mcfn(dep)
if mc != depmc:
@@ -407,24 +412,35 @@ class SignatureGeneratorUniHashMixIn(object):
self._client = hashserv.create_client(self.server)
return self._client
- def __get_task_unihash_key(self, tid):
- # TODO: The key only *needs* to be the taskhash, the tid is just
- # convenient
- return '%s:%s' % (tid.rsplit("/", 1)[1], self.taskhash[tid])
-
def get_stampfile_hash(self, tid):
if tid in self.taskhash:
# If a unique hash is reported, use it as the stampfile hash. This
# ensures that if a task won't be re-run if the taskhash changes,
# but it would result in the same output hash
- unihash = self.unitaskhashes.get(self.__get_task_unihash_key(tid), None)
+ unihash = self._get_unihash(tid)
if unihash is not None:
return unihash
return super().get_stampfile_hash(tid)
def set_unihash(self, tid, unihash):
- self.unitaskhashes[self.__get_task_unihash_key(tid)] = unihash
+ (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
+ key = mc + ":" + self.tidtopn[tid] + ":" + taskname
+ self.unitaskhashes[key] = (self.taskhash[tid], unihash)
+
+ def _get_unihash(self, tid, checkkey=None):
+ if tid not in self.tidtopn:
+ return None
+ (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
+ key = mc + ":" + self.tidtopn[tid] + ":" + taskname
+ if key not in self.unitaskhashes:
+ return None
+ if not checkkey:
+ checkkey = self.taskhash[tid]
+ (key, unihash) = self.unitaskhashes[key]
+ if key != checkkey:
+ return None
+ return unihash
def get_unihash(self, tid):
taskhash = self.taskhash[tid]
@@ -433,11 +449,9 @@ class SignatureGeneratorUniHashMixIn(object):
if self.setscenetasks and tid not in self.setscenetasks:
return taskhash
- key = self.__get_task_unihash_key(tid)
-
# TODO: This cache can grow unbounded. It probably only needs to keep
# for each task
- unihash = self.unitaskhashes.get(key, None)
+ unihash = self._get_unihash(tid)
if unihash is not None:
return unihash
@@ -472,7 +486,7 @@ class SignatureGeneratorUniHashMixIn(object):
except hashserv.client.HashConnectionError as e:
bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
- self.unitaskhashes[key] = unihash
+ self.set_unihash(tid, unihash)
return unihash
def report_unihash(self, path, task, d):
@@ -484,13 +498,13 @@ class SignatureGeneratorUniHashMixIn(object):
tempdir = d.getVar('T')
fn = d.getVar('BB_FILENAME')
tid = fn + ':do_' + task
- key = tid.rsplit("/", 1)[1] + ':' + taskhash
+ key = tid + ':' + taskhash
if self.setscenetasks and tid not in self.setscenetasks:
return
# Sanity checks
- cache_unihash = self.unitaskhashes.get(key, None)
+ cache_unihash = self._get_unihash(tid, checkkey=taskhash)
if cache_unihash is None:
bb.fatal('%s not in unihash cache. Please report this error' % key)