diff options
author | Richard Purdie <richard.purdie@linuxfoundation.org> | 2019-12-11 23:11:50 +0000 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2019-12-12 13:07:44 +0000 |
commit | c4b8440f730c33eaf9f818b856ae81b2f1017fec (patch) | |
tree | ed3aff6329bac02c52fdaa13c08dc5c9c2133206 | |
parent | db033a8f8a276d864bdb2e1eef159ab5794a0658 (diff) | |
download | bitbake-c4b8440f730c33eaf9f818b856ae81b2f1017fec.tar.gz |
siggen: Fix hashequiv performance issues
We're seeing huge slowdowns on large builds on the autobuilder. A qemux86 world
build was running really slowly, a primary feature was lots of rehashing going on
due to an sstate change which caused a rebuild when all output should be identical.
This was traced to the hundreds of thousands of calls to get_unihash() from
get_taskash(). If we simplify the unitaskhashes data structure, we can bypass
the function call and access the data directly. In local profile charts,
this significanly sped up process_possible_migrations(), both on profiles
and visually.
Whilst this change doesn't aid readability, it does solve an otherwise
huge performance issue.
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r-- | lib/bb/siggen.py | 27 |
1 files changed, 13 insertions, 14 deletions
diff --git a/lib/bb/siggen.py b/lib/bb/siggen.py index dbf510238..517ec7b61 100644 --- a/lib/bb/siggen.py +++ b/lib/bb/siggen.py @@ -121,7 +121,7 @@ class SignatureGeneratorBasic(SignatureGenerator): else: self.checksum_cache = None - self.unihash_cache = bb.cache.SimpleCache("1") + self.unihash_cache = bb.cache.SimpleCache("2") self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) def init_rundepcheck(self, data): @@ -216,7 +216,13 @@ class SignatureGeneratorBasic(SignatureGenerator): continue if dep not in self.taskhash: bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) - data = data + self.get_unihash(dep) + # We bypass the fuction and go straight to the cache here + # as this was a performance bottleneck otherwise + if self.taskhash[dep] in self.unitaskhashes: + unihash = self.unitaskhashes[self.taskhash[dep]] + else: + unihash = self.get_unihash(dep) + data = data + unihash self.runtaskdeps[tid].append(dep) if task in dataCache.file_checksums[fn]: @@ -387,24 +393,19 @@ class SignatureGeneratorUniHashMixIn(object): self._client = hashserv.create_client(self.server) return self._client - def __get_task_unihash_key(self, tid): - # TODO: The key only *needs* to be the taskhash, the tid is just - # convenient - return '%s:%s' % (tid.rsplit("/", 1)[1], self.taskhash[tid]) - def get_stampfile_hash(self, tid): if tid in self.taskhash: # If a unique hash is reported, use it as the stampfile hash. This # ensures that if a task won't be re-run if the taskhash changes, # but it would result in the same output hash - unihash = self.unitaskhashes.get(self.__get_task_unihash_key(tid), None) + unihash = self.unitaskhashes.get(self.taskhash[tid], None) if unihash is not None: return unihash return super().get_stampfile_hash(tid) def set_unihash(self, tid, unihash): - self.unitaskhashes[self.__get_task_unihash_key(tid)] = unihash + self.unitaskhashes[self.taskhash[tid]] = unihash def get_unihash(self, tid): taskhash = self.taskhash[tid] @@ -413,11 +414,9 @@ class SignatureGeneratorUniHashMixIn(object): if self.setscenetasks and tid not in self.setscenetasks: return taskhash - key = self.__get_task_unihash_key(tid) - # TODO: This cache can grow unbounded. It probably only needs to keep # for each task - unihash = self.unitaskhashes.get(key, None) + unihash = self.unitaskhashes.get(taskhash, None) if unihash is not None: return unihash @@ -449,7 +448,7 @@ class SignatureGeneratorUniHashMixIn(object): except hashserv.client.HashConnectionError as e: bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) - self.unitaskhashes[key] = unihash + self.unitaskhashes[taskhash] = unihash return unihash def report_unihash(self, path, task, d): @@ -467,7 +466,7 @@ class SignatureGeneratorUniHashMixIn(object): return # Sanity checks - cache_unihash = self.unitaskhashes.get(key, None) + cache_unihash = self.unitaskhashes.get(taskhash, None) if cache_unihash is None: bb.fatal('%s not in unihash cache. Please report this error' % key) |