From c07f374998903359ed55f263c86466d05aa39b68 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Wed, 8 Apr 2020 16:16:53 +0100 Subject: cache: Fix performance problem with large numbers of source files Some companies are using large numbers of patch files in SRC_URI. Rightly or wrongly that exposes a performance problem where the code does not handle the large string manipulations in a way which works efficienty in python. This is a modified version of a patch from z00539568 --- lib/bb/cache.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/lib/bb/cache.py b/lib/bb/cache.py index a5aaf3b99..d1be83617 100644 --- a/lib/bb/cache.py +++ b/lib/bb/cache.py @@ -21,6 +21,7 @@ import logging import pickle from collections import defaultdict import bb.utils +import re logger = logging.getLogger("BitBake.Cache") @@ -369,6 +370,7 @@ class Cache(NoCache): self.data_fn = None self.cacheclean = True self.data_hash = data_hash + self.filelist_regex = re.compile(r'(?:(?<=:True)|(?<=:False))\s+') if self.cachedir in [None, '']: self.has_cache = False @@ -607,20 +609,12 @@ class Cache(NoCache): if hasattr(info_array[0], 'file_checksums'): for _, fl in info_array[0].file_checksums.items(): fl = fl.strip() - while fl: - # A .split() would be simpler but means spaces or colons in filenames would break - a = fl.find(":True") - b = fl.find(":False") - if ((a < 0) and b) or ((b > 0) and (b < a)): - f = fl[:b+6] - fl = fl[b+7:] - elif ((b < 0) and a) or ((a > 0) and (a < b)): - f = fl[:a+5] - fl = fl[a+6:] - else: - break - fl = fl.strip() - if "*" in f: + if not fl: + continue + # Have to be careful about spaces and colons in filenames + flist = self.filelist_regex.split(fl) + for f in flist: + if not f or "*" in f: continue f, exist = f.split(":") if (exist == "True" and not os.path.exists(f)) or (exist == "False" and os.path.exists(f)): -- cgit 1.2.3-korg