aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Purdie <richard.purdie@linuxfoundation.org>2020-04-08 16:16:53 +0100
committerRichard Purdie <richard.purdie@linuxfoundation.org>2020-04-24 14:31:30 +0100
commitc07f374998903359ed55f263c86466d05aa39b68 (patch)
tree8d80879a613b37b7ce858183914e3e5054982454
parent4679d3cdb9cdf23f3962aa61c599ad7474591f9f (diff)
downloadbitbake-c07f374998903359ed55f263c86466d05aa39b68.tar.gz
cache: Fix performance problem with large numbers of source files
Some companies are using large numbers of patch files in SRC_URI. Rightly or wrongly that exposes a performance problem where the code does not handle the large string manipulations in a way which works efficienty in python. This is a modified version of a patch from z00539568 <zhangyifan46@huawei.com153340508@qq.com which addresses the performance problem. I modified it to use a more advanced regex, retain the "*" check and cache the regex. [YOCTO #13824] Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r--lib/bb/cache.py22
1 files changed, 8 insertions, 14 deletions
diff --git a/lib/bb/cache.py b/lib/bb/cache.py
index a5aaf3b99..d1be83617 100644
--- a/lib/bb/cache.py
+++ b/lib/bb/cache.py
@@ -21,6 +21,7 @@ import logging
import pickle
from collections import defaultdict
import bb.utils
+import re
logger = logging.getLogger("BitBake.Cache")
@@ -369,6 +370,7 @@ class Cache(NoCache):
self.data_fn = None
self.cacheclean = True
self.data_hash = data_hash
+ self.filelist_regex = re.compile(r'(?:(?<=:True)|(?<=:False))\s+')
if self.cachedir in [None, '']:
self.has_cache = False
@@ -607,20 +609,12 @@ class Cache(NoCache):
if hasattr(info_array[0], 'file_checksums'):
for _, fl in info_array[0].file_checksums.items():
fl = fl.strip()
- while fl:
- # A .split() would be simpler but means spaces or colons in filenames would break
- a = fl.find(":True")
- b = fl.find(":False")
- if ((a < 0) and b) or ((b > 0) and (b < a)):
- f = fl[:b+6]
- fl = fl[b+7:]
- elif ((b < 0) and a) or ((a > 0) and (a < b)):
- f = fl[:a+5]
- fl = fl[a+6:]
- else:
- break
- fl = fl.strip()
- if "*" in f:
+ if not fl:
+ continue
+ # Have to be careful about spaces and colons in filenames
+ flist = self.filelist_regex.split(fl)
+ for f in flist:
+ if not f or "*" in f:
continue
f, exist = f.split(":")
if (exist == "True" and not os.path.exists(f)) or (exist == "False" and os.path.exists(f)):