aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Purdie <richard.purdie@linuxfoundation.org>2021-10-09 17:42:44 +0100
committerRichard Purdie <richard.purdie@linuxfoundation.org>2021-10-11 10:58:44 +0100
commit20d6ac753efa364349100cdc863e5eabec8e5b78 (patch)
tree131a11211215ba8a03468b12d40b99ae806f64f6
parentdff5a17558e2476064e85f35bad1fd65fec23600 (diff)
downloadbitbake-20d6ac753efa364349100cdc863e5eabec8e5b78.tar.gz
hashserv: Improve behaviour for better determinism/sstate reuse
We have a choice of policy with hashequivalence - whether to reduce sstate duplication in the sstate feed to a minimum or have maximal sstate reuse from the user's perspective. The challenge is that non-matching outhashes are generated due to determinism issues, or due to differences in host gcc version, architecture and so on and the question is how to reconcile then. The approach before this patch is that any new match is added and matches can update. This has the side effect that a queried value from the server can change due to the replacement and you may not always get the same value from the server. With the client side caching bitbake has, this can be suboptimal and when using the autobuilder sstate feed, it results in poor artefact reuse. This patch switches to the other possible behaviour, once a hash is assigned, it doesn't change. This means some sstate artefacts may be duplicated but dependency chains aren't invalidated which I suspect may give better overall performance. Update the tests to match the new behaviour. Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r--lib/hashserv/server.py2
-rw-r--r--lib/hashserv/tests.py13
2 files changed, 7 insertions, 8 deletions
diff --git a/lib/hashserv/server.py b/lib/hashserv/server.py
index ef8227d43..d40a2ab8f 100644
--- a/lib/hashserv/server.py
+++ b/lib/hashserv/server.py
@@ -413,7 +413,7 @@ class ServerClient(bb.asyncrpc.AsyncServerConnection):
# A matching output hash was found. Set our taskhash to the
# same unihash since they are equivalent
unihash = row['unihash']
- resolve = Resolve.REPLACE
+ resolve = Resolve.IGNORE
else:
# No matching output hash was found. This is probably the
# first outhash to be added.
diff --git a/lib/hashserv/tests.py b/lib/hashserv/tests.py
index efaf3bdf4..f6b85aed8 100644
--- a/lib/hashserv/tests.py
+++ b/lib/hashserv/tests.py
@@ -392,15 +392,14 @@ class HashEquivalenceCommonTests(object):
result = self.client.report_unihash(taskhash2, self.METHOD, outhash3, unihash2)
self.assertEqual(result['unihash'], unihash2)
- # Report Task 2. This is equivalent to Task 1, so will pick up the
- # unihash from that task
+ # Report Task 2. This is equivalent to Task 1 but there is already a mapping for
+ # taskhash2 so it will report unihash2
result = self.client.report_unihash(taskhash2, self.METHOD, outhash1, unihash2)
- self.assertEqual(result['unihash'], unihash1)
+ self.assertEqual(result['unihash'], unihash2)
- # The originally reported unihash for Task 3 should have been updated
- # with the second report to use the new unihash from Task 1 (because is
- # shares a taskhash with Task 2)
- self.assertClientGetHash(self.client, taskhash2, unihash1)
+ # The originally reported unihash for Task 3 should be unchanged even if it
+ # shares a taskhash with Task 2
+ self.assertClientGetHash(self.client, taskhash2, unihash2)
class TestHashEquivalenceUnixServer(HashEquivalenceTestSetup, HashEquivalenceCommonTests, unittest.TestCase):
def get_server_addr(self, server_idx):