summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoshua Watt <JPEWhacker@gmail.com>2022-08-03 09:04:41 -0500
committerRichard Purdie <richard.purdie@linuxfoundation.org>2022-08-09 16:24:42 +0100
commit97955f3c1c738aa4b4478a6ec10a08094ffc689d (patch)
tree8a761edefd9b9d4c959ba29682d949bae41f7afa
parent8efced47fcb47851a370fd6786df6fb377f99963 (diff)
downloadbitbake-97955f3c1c738aa4b4478a6ec10a08094ffc689d.tar.gz
siggen: Fix insufficent entropy in sigtask file names
Signature generation uses mkstemp() to get a file descriptor to a unique file and then write the signature into it. However, the unique file name generation in glibc is based on the system timestamp, which means that with highly parallel builds it is more likely than one might expect expected that a conflict will occur between two different builder nodes. When operating over NFS (such as a shared sstate cache), this can cause race conditions and rare failures (particularly with NFS servers that may not correctly implement O_EXCL). The signature generation code is particularly susceptible to races since a single "sigtask." prefix used for all signatures from all tasks, which makes collision even more likely. To work around this, add an internal implementation of mkstemp() that adds additional truly random entropy to the file name to eliminate conflicts. Signed-off-by: Joshua Watt <JPEWhacker@gmail.com> Signed-off-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
-rw-r--r--lib/bb/siggen.py2
-rw-r--r--lib/bb/utils.py21
2 files changed, 22 insertions, 1 deletions
diff --git a/lib/bb/siggen.py b/lib/bb/siggen.py
index 3f3d6df54..bb80343a9 100644
--- a/lib/bb/siggen.py
+++ b/lib/bb/siggen.py
@@ -425,7 +425,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
- fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
+ fd, tmpfile = bb.utils.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
try:
with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
diff --git a/lib/bb/utils.py b/lib/bb/utils.py
index 19ed68ea6..b8b90df8d 100644
--- a/lib/bb/utils.py
+++ b/lib/bb/utils.py
@@ -28,6 +28,8 @@ import signal
import collections
import copy
import ctypes
+import random
+import tempfile
from subprocess import getstatusoutput
from contextlib import contextmanager
from ctypes import cdll
@@ -1754,3 +1756,22 @@ def is_local_uid(uid=''):
if str(uid) == line_split[2]:
return True
return False
+
+def mkstemp(suffix=None, prefix=None, dir=None, text=False):
+ """
+ Generates a unique filename, independent of time.
+
+ mkstemp() in glibc (at least) generates unique file names based on the
+ current system time. When combined with highly parallel builds, and
+ operating over NFS (e.g. shared sstate/downloads) this can result in
+ conflicts and race conditions.
+
+ This function adds additional entropy to the file name so that a collision
+ is independent of time and thus extremely unlikely.
+ """
+ entropy = "".join(random.choices("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890", k=20))
+ if prefix:
+ prefix = prefix + entropy
+ else:
+ prefix = tempfile.gettempprefix() + entropy
+ return tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=dir, text=text)