aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoss Burton <ross.burton@intel.com>2018-08-13 19:02:25 +0100
committerRichard Purdie <richard.purdie@linuxfoundation.org>2018-08-29 15:20:13 +0100
commit41e6161c8ce8cc90ebc93d72852673ae60fac923 (patch)
treedb0c08d86a3e3b667c34ecb6d967818104ce117b
parent822dd77333896538393c657dd220d8f66d8a46a7 (diff)
downloadbitbake-contrib-41e6161c8ce8cc90ebc93d72852673ae60fac923.tar.gz
utils/md5_file: don't iterate line-by-line
Opening a file in binary mode and iterating it seems like the simple solution but will still break on newlines, which for binary files isn't really useful as the size of the chunks could be huge or tiny. Instead, let's be a bit more clever: we'll be MD5ing lots of files, but we don't want to fill up memory: use mmap() to open the file and read the file in 8k blocks. Signed-off-by: Ross Burton <ross.burton@intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r--lib/bb/utils.py13
1 files changed, 9 insertions, 4 deletions
diff --git a/lib/bb/utils.py b/lib/bb/utils.py
index 378e699e0..2ff7e8222 100644
--- a/lib/bb/utils.py
+++ b/lib/bb/utils.py
@@ -524,12 +524,17 @@ def md5_file(filename):
"""
Return the hex string representation of the MD5 checksum of filename.
"""
- import hashlib
- m = hashlib.md5()
+ import hashlib, mmap
with open(filename, "rb") as f:
- for line in f:
- m.update(line)
+ m = hashlib.md5()
+ try:
+ with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mm:
+ for chunk in iter(lambda: mm.read(8192), b''):
+ m.update(chunk)
+ except ValueError:
+ # You can't mmap() an empty file so silence this exception
+ pass
return m.hexdigest()
def sha256_file(filename):