diff options
-rw-r--r-- | meta/classes/sstate.bbclass | 105 | ||||
-rw-r--r-- | meta/conf/bitbake.conf | 4 | ||||
-rw-r--r-- | meta/lib/oe/sstatesig.py | 167 |
3 files changed, 267 insertions, 9 deletions
diff --git a/meta/classes/sstate.bbclass b/meta/classes/sstate.bbclass index 59ebc3ab5c..da0807d6e9 100644 --- a/meta/classes/sstate.bbclass +++ b/meta/classes/sstate.bbclass @@ -11,7 +11,7 @@ def generate_sstatefn(spec, hash, d): SSTATE_PKGARCH = "${PACKAGE_ARCH}" SSTATE_PKGSPEC = "sstate:${PN}:${PACKAGE_ARCH}${TARGET_VENDOR}-${TARGET_OS}:${PV}:${PR}:${SSTATE_PKGARCH}:${SSTATE_VERSION}:" SSTATE_SWSPEC = "sstate:${PN}::${PV}:${PR}::${SSTATE_VERSION}:" -SSTATE_PKGNAME = "${SSTATE_EXTRAPATH}${@generate_sstatefn(d.getVar('SSTATE_PKGSPEC'), d.getVar('BB_TASKHASH'), d)}" +SSTATE_PKGNAME = "${SSTATE_EXTRAPATH}${@generate_sstatefn(d.getVar('SSTATE_PKGSPEC'), d.getVar('BB_UNIHASH'), d)}" SSTATE_PKG = "${SSTATE_DIR}/${SSTATE_PKGNAME}" SSTATE_EXTRAPATH = "" SSTATE_EXTRAPATHWILDCARD = "" @@ -82,6 +82,23 @@ SSTATE_SIG_PASSPHRASE ?= "" # Whether to verify the GnUPG signatures when extracting sstate archives SSTATE_VERIFY_SIG ?= "0" +SSTATE_HASHEQUIV_METHOD ?= "OEOuthashBasic" +SSTATE_HASHEQUIV_METHOD[doc] = "The function used to calculate the output hash \ + for a task, which in turn is used to determine equivalency. \ + " + +SSTATE_HASHEQUIV_SERVER ?= "" +SSTATE_HASHEQUIV_SERVER[doc] = "The hash equivalence sever. For example, \ + 'http://192.168.0.1:5000'. Do not include a trailing slash \ + " + +SSTATE_HASHEQUIV_REPORT_TASKDATA ?= "0" +SSTATE_HASHEQUIV_REPORT_TASKDATA[doc] = "Report additional useful data to the \ + hash equivalency server, such as PN, PV, taskname, etc. This information \ + is very useful for developers looking at task data, but may leak sensitive \ + data if the equivalence server is public. \ + " + python () { if bb.data.inherits_class('native', d): d.setVar('SSTATE_PKGARCH', d.getVar('BUILD_ARCH', False)) @@ -640,7 +657,7 @@ def sstate_package(ss, d): return for f in (d.getVar('SSTATECREATEFUNCS') or '').split() + \ - ['sstate_create_package', 'sstate_sign_package'] + \ + ['sstate_report_unihash', 'sstate_create_package', 'sstate_sign_package'] + \ (d.getVar('SSTATEPOSTCREATEFUNCS') or '').split(): # All hooks should run in SSTATE_BUILDDIR. bb.build.exec_func(f, d, (sstatebuild,)) @@ -764,6 +781,73 @@ python sstate_sign_package () { d.getVar('SSTATE_SIG_PASSPHRASE'), armor=False) } +def OEOuthashBasic(path, sigfile, task, d): + import hashlib + import stat + + def update_hash(s): + s = s.encode('utf-8') + h.update(s) + if sigfile: + sigfile.write(s) + + h = hashlib.sha256() + prev_dir = os.getcwd() + + try: + os.chdir(path) + + update_hash("OEOuthashBasic\n") + + # It is only currently useful to get equivalent hashes for things that + # can be restored from sstate. Since the sstate object is named using + # SSTATE_PKGSPEC and the task name, those should be included in the + # output hash calculation. + update_hash("SSTATE_PKGSPEC=%s\n" % d.getVar('SSTATE_PKGSPEC')) + update_hash("task=%s\n" % task) + + for root, dirs, files in os.walk('.', topdown=True): + # Sort directories and files to ensure consistent ordering + dirs.sort() + files.sort() + + for f in files: + path = os.path.join(root, f) + s = os.lstat(path) + + # Hash file path + update_hash(path + '\n') + + # Hash file mode + update_hash("\tmode=0x%x\n" % stat.S_IMODE(s.st_mode)) + update_hash("\ttype=0x%x\n" % stat.S_IFMT(s.st_mode)) + + if stat.S_ISBLK(s.st_mode) or stat.S_ISBLK(s.st_mode): + # Hash device major and minor + update_hash("\tdev=%d,%d\n" % (os.major(s.st_rdev), os.minor(s.st_rdev))) + elif stat.S_ISLNK(s.st_mode): + # Hash symbolic link + update_hash("\tsymlink=%s\n" % os.readlink(path)) + else: + fh = hashlib.sha256() + # Hash file contents + with open(path, 'rb') as d: + for chunk in iter(lambda: d.read(4096), b""): + fh.update(chunk) + update_hash("\tdigest=%s\n" % fh.hexdigest()) + finally: + os.chdir(prev_dir) + + return h.hexdigest() + +python sstate_report_unihash() { + report_unihash = getattr(bb.parse.siggen, 'report_unihash', None) + + if report_unihash: + ss = sstate_state_fromvars(d) + report_unihash(os.getcwd(), ss['task'], d) +} + # # Shell function to decompress and prepare a package for installation # Will be run from within SSTATE_INSTDIR. @@ -788,6 +872,11 @@ def sstate_checkhashes(sq_fn, sq_task, sq_hash, sq_hashfn, d, siginfo=False, *, if siginfo: extension = extension + ".siginfo" + def gethash(task): + if sq_unihash is not None: + return sq_unihash[task] + return sq_hash[task] + def getpathcomponents(task, d): # Magic data from BB_HASHFILENAME splithashfn = sq_hashfn[task].split(" ") @@ -810,7 +899,7 @@ def sstate_checkhashes(sq_fn, sq_task, sq_hash, sq_hashfn, d, siginfo=False, *, spec, extrapath, tname = getpathcomponents(task, d) - sstatefile = d.expand("${SSTATE_DIR}/" + extrapath + generate_sstatefn(spec, sq_hash[task], d) + "_" + tname + extension) + sstatefile = d.expand("${SSTATE_DIR}/" + extrapath + generate_sstatefn(spec, gethash(task), d) + "_" + tname + extension) if os.path.exists(sstatefile): bb.debug(2, "SState: Found valid sstate file %s" % sstatefile) @@ -872,7 +961,7 @@ def sstate_checkhashes(sq_fn, sq_task, sq_hash, sq_hashfn, d, siginfo=False, *, if task in ret: continue spec, extrapath, tname = getpathcomponents(task, d) - sstatefile = d.expand(extrapath + generate_sstatefn(spec, sq_hash[task], d) + "_" + tname + extension) + sstatefile = d.expand(extrapath + generate_sstatefn(spec, gethash(task), d) + "_" + tname + extension) tasklist.append((task, sstatefile)) if tasklist: @@ -898,12 +987,12 @@ def sstate_checkhashes(sq_fn, sq_task, sq_hash, sq_hashfn, d, siginfo=False, *, evdata = {'missed': [], 'found': []}; for task in missed: spec, extrapath, tname = getpathcomponents(task, d) - sstatefile = d.expand(extrapath + generate_sstatefn(spec, sq_hash[task], d) + "_" + tname + ".tgz") - evdata['missed'].append( (sq_fn[task], sq_task[task], sq_hash[task], sstatefile ) ) + sstatefile = d.expand(extrapath + generate_sstatefn(spec, gethash(task), d) + "_" + tname + ".tgz") + evdata['missed'].append( (sq_fn[task], sq_task[task], gethash(task), sstatefile ) ) for task in ret: spec, extrapath, tname = getpathcomponents(task, d) - sstatefile = d.expand(extrapath + generate_sstatefn(spec, sq_hash[task], d) + "_" + tname + ".tgz") - evdata['found'].append( (sq_fn[task], sq_task[task], sq_hash[task], sstatefile ) ) + sstatefile = d.expand(extrapath + generate_sstatefn(spec, gethash(task), d) + "_" + tname + ".tgz") + evdata['found'].append( (sq_fn[task], sq_task[task], gethash(task), sstatefile ) ) bb.event.fire(bb.event.MetadataEvent("MissedSstate", evdata), d) # Print some summary statistics about the current task completion and how much sstate diff --git a/meta/conf/bitbake.conf b/meta/conf/bitbake.conf index 6480062354..e64ce6a6da 100644 --- a/meta/conf/bitbake.conf +++ b/meta/conf/bitbake.conf @@ -867,7 +867,9 @@ BB_HASHBASE_WHITELIST ?= "TMPDIR FILE PATH PWD BB_TASKHASH BBPATH BBSERVER DL_DI STAMPS_DIR PRSERV_DUMPDIR PRSERV_DUMPFILE PRSERV_LOCKDOWN PARALLEL_MAKE \ CCACHE_DIR EXTERNAL_TOOLCHAIN CCACHE CCACHE_NOHASHDIR LICENSE_PATH SDKPKGSUFFIX \ WARN_QA ERROR_QA WORKDIR STAMPCLEAN PKGDATA_DIR BUILD_ARCH SSTATE_PKGARCH \ - BB_WORKERCONTEXT BB_LIMITEDDEPS extend_recipe_sysroot DEPLOY_DIR" + BB_WORKERCONTEXT BB_LIMITEDDEPS BB_UNIHASH extend_recipe_sysroot DEPLOY_DIR \ + SSTATE_HASHEQUIV_METHOD SSTATE_HASHEQUIV_SERVER SSTATE_HASHEQUIV_REPORT_TASKDATA \ + SSTATE_HASHEQUIV_OWNER" BB_HASHCONFIG_WHITELIST ?= "${BB_HASHBASE_WHITELIST} DATE TIME SSH_AGENT_PID \ SSH_AUTH_SOCK PSEUDO_BUILD BB_ENV_EXTRAWHITE DISABLE_SANITY_CHECKS \ PARALLEL_MAKE BB_NUMBER_THREADS BB_ORIGENV BB_INVALIDCONF BBINCLUDED \ diff --git a/meta/lib/oe/sstatesig.py b/meta/lib/oe/sstatesig.py index 18c5a353a2..059e165c7a 100644 --- a/meta/lib/oe/sstatesig.py +++ b/meta/lib/oe/sstatesig.py @@ -263,10 +263,177 @@ class SignatureGeneratorOEBasicHash(bb.siggen.SignatureGeneratorBasicHash): if error_msgs: bb.fatal("\n".join(error_msgs)) +class SignatureGeneratorOEEquivHash(SignatureGeneratorOEBasicHash): + name = "OEEquivHash" + + def init_rundepcheck(self, data): + super().init_rundepcheck(data) + self.server = data.getVar('SSTATE_HASHEQUIV_SERVER') + self.method = data.getVar('SSTATE_HASHEQUIV_METHOD') + self.unihashes = bb.persist_data.persist('SSTATESIG_UNIHASH_CACHE_v1_' + self.method, data) + + def get_taskdata(self): + return (self.server, self.method) + super().get_taskdata() + + def set_taskdata(self, data): + self.server, self.method = data[:2] + super().set_taskdata(data[2:]) + + def __get_task_unihash_key(self, task): + # TODO: The key only *needs* to be the taskhash, the task is just + # convenient + return '%s:%s' % (task, self.taskhash[task]) + + def get_stampfile_hash(self, task): + if task in self.taskhash: + # If a unique hash is reported, use it as the stampfile hash. This + # ensures that if a task won't be re-run if the taskhash changes, + # but it would result in the same output hash + unihash = self.unihashes.get(self.__get_task_unihash_key(task)) + if unihash is not None: + return unihash + + return super().get_stampfile_hash(task) + + def get_unihash(self, task): + import urllib + import json + + taskhash = self.taskhash[task] + + key = self.__get_task_unihash_key(task) + + # TODO: This cache can grow unbounded. It probably only needs to keep + # for each task + unihash = self.unihashes.get(key) + if unihash is not None: + return unihash + + # In the absence of being able to discover a unique hash from the + # server, make it be equivalent to the taskhash. The unique "hash" only + # really needs to be a unique string (not even necessarily a hash), but + # making it match the taskhash has a few advantages: + # + # 1) All of the sstate code that assumes hashes can be the same + # 2) It provides maximal compatibility with builders that don't use + # an equivalency server + # 3) The value is easy for multiple independent builders to derive the + # same unique hash from the same input. This means that if the + # independent builders find the same taskhash, but it isn't reported + # to the server, there is a better chance that they will agree on + # the unique hash. + unihash = taskhash + + try: + url = '%s/v1/equivalent?%s' % (self.server, + urllib.parse.urlencode({'method': self.method, 'taskhash': self.taskhash[task]})) + + request = urllib.request.Request(url) + response = urllib.request.urlopen(request) + data = response.read().decode('utf-8') + + json_data = json.loads(data) + + if json_data: + unihash = json_data['unihash'] + # A unique hash equal to the taskhash is not very interesting, + # so it is reported it at debug level 2. If they differ, that + # is much more interesting, so it is reported at debug level 1 + bb.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, task, self.server)) + else: + bb.debug(2, 'No reported unihash for %s:%s from %s' % (task, taskhash, self.server)) + except urllib.error.URLError as e: + bb.warn('Failure contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) + except (KeyError, json.JSONDecodeError) as e: + bb.warn('Poorly formatted response from %s: %s' % (self.server, str(e))) + + self.unihashes[key] = unihash + return unihash + + def report_unihash(self, path, task, d): + import urllib + import json + import tempfile + import base64 + + taskhash = d.getVar('BB_TASKHASH') + unihash = d.getVar('BB_UNIHASH') + report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1' + tempdir = d.getVar('T') + fn = d.getVar('BB_FILENAME') + key = fn + '.do_' + task + ':' + taskhash + + # Sanity checks + cache_unihash = self.unihashes.get(key) + if cache_unihash is None: + bb.fatal('%s not in unihash cache. Please report this error' % key) + + if cache_unihash != unihash: + bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash)) + + sigfile = None + sigfile_name = "depsig.do_%s.%d" % (task, os.getpid()) + sigfile_link = "depsig.do_%s" % task + + try: + call = self.method + '(path, sigfile, task, d)' + sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b') + locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d} + + outhash = bb.utils.better_eval(call, locs) + + try: + url = '%s/v1/equivalent' % self.server + task_data = { + 'taskhash': taskhash, + 'method': self.method, + 'outhash': outhash, + 'unihash': unihash, + 'owner': d.getVar('SSTATE_HASHEQUIV_OWNER') + } + + if report_taskdata: + sigfile.seek(0) + + task_data['PN'] = d.getVar('PN') + task_data['PV'] = d.getVar('PV') + task_data['PR'] = d.getVar('PR') + task_data['task'] = task + task_data['outhash_siginfo'] = sigfile.read().decode('utf-8') + + headers = {'content-type': 'application/json'} + + request = urllib.request.Request(url, json.dumps(task_data).encode('utf-8'), headers) + response = urllib.request.urlopen(request) + data = response.read().decode('utf-8') + + json_data = json.loads(data) + new_unihash = json_data['unihash'] + + if new_unihash != unihash: + bb.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) + else: + bb.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) + except urllib.error.URLError as e: + bb.warn('Failure contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) + except (KeyError, json.JSONDecodeError) as e: + bb.warn('Poorly formatted response from %s: %s' % (self.server, str(e))) + finally: + if sigfile: + sigfile.close() + + sigfile_link_path = os.path.join(tempdir, sigfile_link) + bb.utils.remove(sigfile_link_path) + + try: + os.symlink(sigfile_name, sigfile_link_path) + except OSError: + pass # Insert these classes into siggen's namespace so it can see and select them bb.siggen.SignatureGeneratorOEBasic = SignatureGeneratorOEBasic bb.siggen.SignatureGeneratorOEBasicHash = SignatureGeneratorOEBasicHash +bb.siggen.SignatureGeneratorOEEquivHash = SignatureGeneratorOEEquivHash def find_siginfo(pn, taskname, taskhashlist, d): |