From ad36335b8592e0387dd36066920cd5ffefd375f8 Mon Sep 17 00:00:00 2001 From: Konrad Scherer Date: Fri, 27 Sep 2019 14:56:42 -0400 Subject: gen-lockedsig-cache: Replace glob lookup with hash to filename lookup Using the glob function to map signatures to sstate files is very slow when the sstate is large and accessed over nfs. The lookup now only loads the necessary prefixes and doesn't use glob as all. Unfortunately I don't have access to the systems where the performance isse was noticed and on my test system the glob is fast enough that the performance numbers aren't useful. I could verify that file list returned by the new code is the same. [YOCTO #13539] Signed-off-by: Konrad Scherer Signed-off-by: Richard Purdie --- scripts/gen-lockedsig-cache | 47 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/scripts/gen-lockedsig-cache b/scripts/gen-lockedsig-cache index e3076e11a5..48cb67112f 100755 --- a/scripts/gen-lockedsig-cache +++ b/scripts/gen-lockedsig-cache @@ -5,9 +5,9 @@ import os import sys -import glob import shutil import errno +import time def mkdir(d): try: @@ -16,6 +16,36 @@ def mkdir(d): if e.errno != errno.EEXIST: raise e +# extract the hash from past the last colon to last underscore +def extract_sha(filename): + return filename.split(':')[7].split('_')[0] + +# get all files in a directory, extract hash and make +# a map from hash to list of file with that hash +def map_sha_to_files(dir_, prefix, sha_map): + sstate_prefix_path = dir_ + '/' + prefix + '/' + sstate_files = os.listdir(sstate_prefix_path) + for f in sstate_files: + try: + sha = extract_sha(f) + if sha not in sha_map: + sha_map[sha] = [] + sha_map[sha].append(sstate_prefix_path + f) + except IndexError: + continue + +# given a prefix build a map of hash to list of files +def build_sha_cache(prefix): + sha_map = {} + + sstate_dir = sys.argv[2] + map_sha_to_files(sstate_dir, prefix, sha_map) + + native_sstate_dir = sys.argv[2] + '/' + sys.argv[4] + map_sha_to_files(native_sstate_dir, prefix, sha_map) + + return sha_map + if len(sys.argv) < 5: print("Incorrect number of arguments specified") print("syntax: gen-lockedsig-cache [filterfile]") @@ -41,12 +71,19 @@ with open(sys.argv[1]) as f: sigs.append(sig) print('Gathering file list') +start_time = time.perf_counter() files = set() +sstate_content_cache = {} for s in sigs: - p = sys.argv[2] + "/" + s[:2] + "/*" + s + "*" - files |= set(glob.glob(p)) - p = sys.argv[2] + "/%s/" % sys.argv[4] + s[:2] + "/*" + s + "*" - files |= set(glob.glob(p)) + prefix = s[:2] + if prefix not in sstate_content_cache: + sstate_content_cache[prefix] = build_sha_cache(prefix) + + for f in sstate_content_cache[prefix][s]: + files.add(f) + +elapsed = time.perf_counter() - start_time +print("Gathering file list took %.1fs" % elapsed) print('Processing files') for f in files: -- cgit 1.2.3-korg