aboutsummaryrefslogtreecommitdiffstats
path: root/lib/bb
diff options
context:
space:
mode:
authorJoshua Watt <jpewhacker@gmail.com>2019-09-17 08:37:11 -0500
committerRichard Purdie <richard.purdie@linuxfoundation.org>2019-09-18 17:48:25 +0100
commit2124eec3a5830afe8e07ffb6f2a0df6a417ac973 (patch)
treeec54ecf492198a28628a7c251b6a3ea524e430c4 /lib/bb
parent22082c7b3ca0cffcedb7d1d8c6681d35286376db (diff)
downloadbitbake-2124eec3a5830afe8e07ffb6f2a0df6a417ac973.tar.gz
bitbake: Rework hash equivalence
Reworks the hash equivalence server to address performance issues that were encountered with the REST mechanism used previously, particularly during the heavy request load encountered during signature generation. Notable changes are: 1) The server protocol is no longer HTTP based. Instead, it uses a simpler JSON over a streaming protocol link. This protocol has much lower overhead than HTTP since it eliminates the HTTP headers. 2) The hash equivalence server can either bind to a TCP port, or a Unix domain socket. Unix domain sockets are more efficient for local communication, and so are preferred if the user enables hash equivalence only for the local build. The arguments to the 'bitbake-hashserve' command have been updated accordingly. 3) The value to which BB_HASHSERVE should be set to enable a local hash equivalence server is changed to "auto" instead of "localhost:0". The latter didn't make sense when the local server was using a Unix domain socket. 4) Clients are expected to keep a persistent connection to the server instead of creating a new connection each time a request is made for optimal performance. 5) Most of the client logic has been moved to the hashserve module in bitbake. This makes it easier to share the client code. 6) A new bitbake command has been added called 'bitbake-hashclient'. This command can be used to query a hash equivalence server, including fetching the statistics and running a performance stress test. 7) The table indexes in the SQLite database have been updated to optimize hash lookups. This change is backward compatible, as the database will delete the old indexes first if they exist. 8) The server has been reworked to use python async to maximize performance with persistently connected clients. This requires Python 3.5 or later. Signed-off-by: Joshua Watt <JPEWhacker@gmail.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'lib/bb')
-rw-r--r--lib/bb/cooker.py17
-rw-r--r--lib/bb/runqueue.py4
-rw-r--r--lib/bb/siggen.py74
-rw-r--r--lib/bb/tests/runqueue.py19
4 files changed, 50 insertions, 64 deletions
diff --git a/lib/bb/cooker.py b/lib/bb/cooker.py
index e46868ddd..0c540028a 100644
--- a/lib/bb/cooker.py
+++ b/lib/bb/cooker.py
@@ -194,7 +194,7 @@ class BBCooker:
self.ui_cmdline = None
self.hashserv = None
- self.hashservport = None
+ self.hashservaddr = None
self.initConfigurationData()
@@ -392,19 +392,20 @@ class BBCooker:
except prserv.serv.PRServiceConfigError as e:
bb.fatal("Unable to start PR Server, exitting")
- if self.data.getVar("BB_HASHSERVE") == "localhost:0":
+ if self.data.getVar("BB_HASHSERVE") == "auto":
+ # Create a new hash server bound to a unix domain socket
if not self.hashserv:
dbfile = (self.data.getVar("PERSISTENT_DIR") or self.data.getVar("CACHE")) + "/hashserv.db"
- self.hashserv = hashserv.create_server(('localhost', 0), dbfile, '')
- self.hashservport = "localhost:" + str(self.hashserv.server_port)
+ self.hashservaddr = "unix://%s/hashserve.sock" % self.data.getVar("TOPDIR")
+ self.hashserv = hashserv.create_server(self.hashservaddr, dbfile, sync=False)
self.hashserv.process = multiprocessing.Process(target=self.hashserv.serve_forever)
self.hashserv.process.daemon = True
self.hashserv.process.start()
- self.data.setVar("BB_HASHSERVE", self.hashservport)
- self.databuilder.origdata.setVar("BB_HASHSERVE", self.hashservport)
- self.databuilder.data.setVar("BB_HASHSERVE", self.hashservport)
+ self.data.setVar("BB_HASHSERVE", self.hashservaddr)
+ self.databuilder.origdata.setVar("BB_HASHSERVE", self.hashservaddr)
+ self.databuilder.data.setVar("BB_HASHSERVE", self.hashservaddr)
for mc in self.databuilder.mcdata:
- self.databuilder.mcdata[mc].setVar("BB_HASHSERVE", self.hashservport)
+ self.databuilder.mcdata[mc].setVar("BB_HASHSERVE", self.hashservaddr)
bb.parse.init_parser(self.data)
diff --git a/lib/bb/runqueue.py b/lib/bb/runqueue.py
index 45bfec8c3..314a30908 100644
--- a/lib/bb/runqueue.py
+++ b/lib/bb/runqueue.py
@@ -1260,7 +1260,7 @@ class RunQueue:
"buildname" : self.cfgData.getVar("BUILDNAME"),
"date" : self.cfgData.getVar("DATE"),
"time" : self.cfgData.getVar("TIME"),
- "hashservport" : self.cooker.hashservport,
+ "hashservaddr" : self.cooker.hashservaddr,
}
worker.stdin.write(b"<cookerconfig>" + pickle.dumps(self.cooker.configuration) + b"</cookerconfig>")
@@ -2174,7 +2174,7 @@ class RunQueueExecute:
ret.add(dep)
return ret
- # We filter out multiconfig dependencies from taskdepdata we pass to the tasks
+ # We filter out multiconfig dependencies from taskdepdata we pass to the tasks
# as most code can't handle them
def build_taskdepdata(self, task):
taskdepdata = {}
diff --git a/lib/bb/siggen.py b/lib/bb/siggen.py
index 8b593a348..e047c217e 100644
--- a/lib/bb/siggen.py
+++ b/lib/bb/siggen.py
@@ -13,6 +13,7 @@ import difflib
import simplediff
from bb.checksum import FileChecksumCache
from bb import runqueue
+import hashserv
logger = logging.getLogger('BitBake.SigGen')
@@ -375,6 +376,11 @@ class SignatureGeneratorUniHashMixIn(object):
self.server, self.method = data[:2]
super().set_taskdata(data[2:])
+ def client(self):
+ if getattr(self, '_client', None) is None:
+ self._client = hashserv.create_client(self.server)
+ return self._client
+
def __get_task_unihash_key(self, tid):
# TODO: The key only *needs* to be the taskhash, the tid is just
# convenient
@@ -395,9 +401,6 @@ class SignatureGeneratorUniHashMixIn(object):
self.unitaskhashes[self.__get_task_unihash_key(tid)] = unihash
def get_unihash(self, tid):
- import urllib
- import json
-
taskhash = self.taskhash[tid]
# If its not a setscene task we can return
@@ -428,36 +431,22 @@ class SignatureGeneratorUniHashMixIn(object):
unihash = taskhash
try:
- url = '%s/v1/equivalent?%s' % (self.server,
- urllib.parse.urlencode({'method': self.method, 'taskhash': self.taskhash[tid]}))
-
- request = urllib.request.Request(url)
- response = urllib.request.urlopen(request)
- data = response.read().decode('utf-8')
-
- json_data = json.loads(data)
-
- if json_data:
- unihash = json_data['unihash']
+ data = self.client().get_unihash(self.method, self.taskhash[tid])
+ if data:
+ unihash = data
# A unique hash equal to the taskhash is not very interesting,
# so it is reported it at debug level 2. If they differ, that
# is much more interesting, so it is reported at debug level 1
bb.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
else:
bb.debug(2, 'No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
- except urllib.error.URLError as e:
- bb.warn('Failure contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
- except (KeyError, json.JSONDecodeError) as e:
- bb.warn('Poorly formatted response from %s: %s' % (self.server, str(e)))
+ except hashserv.HashConnectionError as e:
+ bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
self.unitaskhashes[key] = unihash
return unihash
def report_unihash(self, path, task, d):
- import urllib
- import json
- import tempfile
- import base64
import importlib
taskhash = d.getVar('BB_TASKHASH')
@@ -492,42 +481,31 @@ class SignatureGeneratorUniHashMixIn(object):
outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
try:
- url = '%s/v1/equivalent' % self.server
- task_data = {
- 'taskhash': taskhash,
- 'method': self.method,
- 'outhash': outhash,
- 'unihash': unihash,
- 'owner': d.getVar('SSTATE_HASHEQUIV_OWNER')
- }
+ extra_data = {}
+
+ owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
+ if owner:
+ extra_data['owner'] = owner
if report_taskdata:
sigfile.seek(0)
- task_data['PN'] = d.getVar('PN')
- task_data['PV'] = d.getVar('PV')
- task_data['PR'] = d.getVar('PR')
- task_data['task'] = task
- task_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
-
- headers = {'content-type': 'application/json'}
-
- request = urllib.request.Request(url, json.dumps(task_data).encode('utf-8'), headers)
- response = urllib.request.urlopen(request)
- data = response.read().decode('utf-8')
+ extra_data['PN'] = d.getVar('PN')
+ extra_data['PV'] = d.getVar('PV')
+ extra_data['PR'] = d.getVar('PR')
+ extra_data['task'] = task
+ extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
- json_data = json.loads(data)
- new_unihash = json_data['unihash']
+ data = self.client().report_unihash(taskhash, self.method, outhash, unihash, extra_data)
+ new_unihash = data['unihash']
if new_unihash != unihash:
bb.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
else:
bb.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
- except urllib.error.URLError as e:
- bb.warn('Failure contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
- except (KeyError, json.JSONDecodeError) as e:
- bb.warn('Poorly formatted response from %s: %s' % (self.server, str(e)))
+ except hashserv.HashConnectionError as e:
+ bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
finally:
if sigfile:
sigfile.close()
@@ -548,7 +526,7 @@ class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureG
name = "TestEquivHash"
def init_rundepcheck(self, data):
super().init_rundepcheck(data)
- self.server = "http://" + data.getVar('BB_HASHSERVE')
+ self.server = data.getVar('BB_HASHSERVE')
self.method = "sstate_output_hash"
diff --git a/lib/bb/tests/runqueue.py b/lib/bb/tests/runqueue.py
index c7f5e5572..cb4d526f1 100644
--- a/lib/bb/tests/runqueue.py
+++ b/lib/bb/tests/runqueue.py
@@ -11,6 +11,7 @@ import bb
import os
import tempfile
import subprocess
+import sys
#
# TODO:
@@ -232,10 +233,11 @@ class RunQueueTests(unittest.TestCase):
self.assertEqual(set(tasks), set(expected))
+ @unittest.skipIf(sys.version_info < (3, 5, 0), 'Python 3.5 or later required')
def test_hashserv_single(self):
with tempfile.TemporaryDirectory(prefix="runqueuetest") as tempdir:
extraenv = {
- "BB_HASHSERVE" : "localhost:0",
+ "BB_HASHSERVE" : "auto",
"BB_SIGNATURE_HANDLER" : "TestEquivHash"
}
cmd = ["bitbake", "a1", "b1"]
@@ -255,10 +257,11 @@ class RunQueueTests(unittest.TestCase):
'a1:package_write_ipk_setscene', 'a1:package_qa_setscene']
self.assertEqual(set(tasks), set(expected))
+ @unittest.skipIf(sys.version_info < (3, 5, 0), 'Python 3.5 or later required')
def test_hashserv_double(self):
with tempfile.TemporaryDirectory(prefix="runqueuetest") as tempdir:
extraenv = {
- "BB_HASHSERVE" : "localhost:0",
+ "BB_HASHSERVE" : "auto",
"BB_SIGNATURE_HANDLER" : "TestEquivHash"
}
cmd = ["bitbake", "a1", "b1", "e1"]
@@ -278,11 +281,12 @@ class RunQueueTests(unittest.TestCase):
self.assertEqual(set(tasks), set(expected))
+ @unittest.skipIf(sys.version_info < (3, 5, 0), 'Python 3.5 or later required')
def test_hashserv_multiple_setscene(self):
# Runs e1:do_package_setscene twice
with tempfile.TemporaryDirectory(prefix="runqueuetest") as tempdir:
extraenv = {
- "BB_HASHSERVE" : "localhost:0",
+ "BB_HASHSERVE" : "auto",
"BB_SIGNATURE_HANDLER" : "TestEquivHash"
}
cmd = ["bitbake", "a1", "b1", "e1"]
@@ -308,11 +312,12 @@ class RunQueueTests(unittest.TestCase):
else:
self.assertEqual(tasks.count(i), 1, "%s not in task list once" % i)
+ @unittest.skipIf(sys.version_info < (3, 5, 0), 'Python 3.5 or later required')
def test_hashserv_partial_match(self):
# e1:do_package matches initial built but not second hash value
with tempfile.TemporaryDirectory(prefix="runqueuetest") as tempdir:
extraenv = {
- "BB_HASHSERVE" : "localhost:0",
+ "BB_HASHSERVE" : "auto",
"BB_SIGNATURE_HANDLER" : "TestEquivHash"
}
cmd = ["bitbake", "a1", "b1"]
@@ -336,11 +341,12 @@ class RunQueueTests(unittest.TestCase):
expected.remove('e1:package')
self.assertEqual(set(tasks), set(expected))
+ @unittest.skipIf(sys.version_info < (3, 5, 0), 'Python 3.5 or later required')
def test_hashserv_partial_match2(self):
# e1:do_package + e1:do_populate_sysroot matches initial built but not second hash value
with tempfile.TemporaryDirectory(prefix="runqueuetest") as tempdir:
extraenv = {
- "BB_HASHSERVE" : "localhost:0",
+ "BB_HASHSERVE" : "auto",
"BB_SIGNATURE_HANDLER" : "TestEquivHash"
}
cmd = ["bitbake", "a1", "b1"]
@@ -363,13 +369,14 @@ class RunQueueTests(unittest.TestCase):
'e1:package_setscene', 'e1:populate_sysroot_setscene', 'e1:build', 'e1:package_qa', 'e1:package_write_rpm', 'e1:package_write_ipk', 'e1:packagedata']
self.assertEqual(set(tasks), set(expected))
+ @unittest.skipIf(sys.version_info < (3, 5, 0), 'Python 3.5 or later required')
def test_hashserv_partial_match3(self):
# e1:do_package is valid for a1 but not after b1
# In former buggy code, this triggered e1:do_fetch, then e1:do_populate_sysroot to run
# with none of the intermediate tasks which is a serious bug
with tempfile.TemporaryDirectory(prefix="runqueuetest") as tempdir:
extraenv = {
- "BB_HASHSERVE" : "localhost:0",
+ "BB_HASHSERVE" : "auto",
"BB_SIGNATURE_HANDLER" : "TestEquivHash"
}
cmd = ["bitbake", "a1", "b1"]