diff options
author | Alberto Pianon <alberto@pianon.eu> | 2023-09-21 12:00:35 +0200 |
---|---|---|
committer | Alberto Pianon <alberto@pianon.eu> | 2023-09-21 12:00:35 +0200 |
commit | 9e96450eb2849a26928f48f5df34e4d08bcdd1e5 (patch) | |
tree | eb13448b0fb5237896eaade97397f3d3fe8e81be | |
parent | afbc169e1490a86d6250969f780062c426eb4682 (diff) | |
download | bitbake-contrib-alpianon/srctrace5.tar.gz |
temporary commitalpianon/srctrace5
Signed-off-by: Alberto Pianon <alberto@pianon.eu>
-rw-r--r-- | lib/bb/fetch2/__init__.py | 42 | ||||
-rw-r--r-- | lib/bb/fetch2/crate.py | 2 | ||||
-rw-r--r-- | lib/bb/fetch2/git.py | 1 | ||||
-rw-r--r-- | lib/bb/fetch2/gitsm.py | 4 | ||||
-rw-r--r-- | lib/bb/fetch2/hg.py | 1 | ||||
-rw-r--r-- | lib/bb/fetch2/npm.py | 1 | ||||
-rw-r--r-- | lib/bb/fetch2/npmsw.py | 12 | ||||
-rw-r--r-- | lib/bb/fetch2/trace.py | 265 |
8 files changed, 328 insertions, 0 deletions
diff --git a/lib/bb/fetch2/__init__.py b/lib/bb/fetch2/__init__.py index 765aedd51..a8b44a4f3 100644 --- a/lib/bb/fetch2/__init__.py +++ b/lib/bb/fetch2/__init__.py @@ -140,6 +140,35 @@ class MissingChecksumEvent(bb.event.Event): bb.event.Event.__init__(self) +class DummyUnpackTracer(object): + + def start(self, unpackdir, ud_dict, d): + return + + def start_url(self, url): + return + + def unpack(self, unpack_type, destdir, ud=None): + return + + def finish_url(self, url): + return + + def start_module(self, module_type, unpackdir, ud_dict, parent_ud, d): + return + + def module(self, module_type, url, name, path, revision=None): + return + + def finish_module(self, module_type, unpackdir, ud_dict, parent_ud, d): + return + + def complete(self): + return + +unpack_tracer = DummyUnpackTracer() + + class URI(object): """ A class representing a generic URI, with methods for @@ -1578,6 +1607,7 @@ class FetchMethod(object): unpackdir = rootdir if not unpack or not cmd: + urldata.unpack_tracer.unpack("file-copy", unpackdir) # If file == dest, then avoid any copies, as we already put the file into dest! dest = os.path.join(unpackdir, os.path.basename(file)) if file != dest and not (os.path.exists(dest) and os.path.samefile(file, dest)): @@ -1591,8 +1621,13 @@ class FetchMethod(object): if urlpath.find("/") != -1: destdir = urlpath.rsplit("/", 1)[0] + '/' bb.utils.mkdirhier("%s/%s" % (unpackdir, destdir)) + urldata.unpack_tracer.unpack( + "file-copy", "%s/%s" % (unpackdir, destdir)) cmd = 'cp -fpPRH "%s" "%s"' % (file, destdir) + else: + urldata.unpack_tracer.unpack("archive-extract", unpackdir) + if not cmd: return @@ -1707,6 +1742,7 @@ class Fetch(object): if url not in self.ud: try: self.ud[url] = FetchData(url, d, localonly) + self.ud[url].unpack_tracer = unpack_tracer except NonLocalMethod: if localonly: self.ud[url] = None @@ -1882,6 +1918,8 @@ class Fetch(object): if not urls: urls = self.urls + unpack_tracer.start(root, self.ud, self.d) + for u in urls: ud = self.ud[u] ud.setup_localpath(self.d) @@ -1889,11 +1927,15 @@ class Fetch(object): if ud.lockfile: lf = bb.utils.lockfile(ud.lockfile) + unpack_tracer.start_url(u) ud.method.unpack(ud, root, self.d) + unpack_tracer.finish_url(u) if ud.lockfile: bb.utils.unlockfile(lf) + unpack_tracer.complete() + def clean(self, urls=None): """ Clean files that the fetcher gets or places diff --git a/lib/bb/fetch2/crate.py b/lib/bb/fetch2/crate.py index 3310ed005..01d49435c 100644 --- a/lib/bb/fetch2/crate.py +++ b/lib/bb/fetch2/crate.py @@ -101,8 +101,10 @@ class Crate(Wget): bp = d.getVar('BP') if bp == ud.parm.get('name'): cmd = "tar -xz --no-same-owner -f %s" % thefile + ud.unpack_tracer.unpack("crate-extract", rootdir) else: cargo_bitbake = self._cargo_bitbake_path(rootdir) + ud.unpack_tracer.unpack("cargo-extract", cargo_bitbake) cmd = "tar -xz --no-same-owner -f %s -C %s" % (thefile, cargo_bitbake) diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py index 4385d0b37..567943da0 100644 --- a/lib/bb/fetch2/git.py +++ b/lib/bb/fetch2/git.py @@ -589,6 +589,7 @@ class Git(FetchMethod): destdir = ud.destdir = os.path.join(destdir, destsuffix) if os.path.exists(destdir): bb.utils.prunedir(destdir) + ud.unpack_tracer.unpack("git", destdir, ud) need_lfs = self._need_lfs(ud) diff --git a/lib/bb/fetch2/gitsm.py b/lib/bb/fetch2/gitsm.py index a87361ccf..06dda48b3 100644 --- a/lib/bb/fetch2/gitsm.py +++ b/lib/bb/fetch2/gitsm.py @@ -218,7 +218,9 @@ class GitSM(Git): try: newfetch = Fetch([url], d, cache=False) + ud.unpack_tracer.start_module("git", repo_conf, newfetch.ud, ud, d) newfetch.unpack(root=os.path.dirname(os.path.join(repo_conf, 'modules', module))) + ud.unpack_tracer.finish_module("git", repo_conf, newfetch.ud, ud, d) except Exception as e: logger.error('gitsm: submodule unpack failed: %s %s' % (type(e).__name__, str(e))) raise @@ -238,6 +240,8 @@ class GitSM(Git): logger.error("Unable to set git config core.bare to false for %s" % os.path.join(repo_conf, 'modules', module)) raise + ud.unpack_tracer.module("git", url, module, modpath, d.getVar("SRCREV_%s" % module)) + Git.unpack(self, ud, destdir, d) ret = self.process_submodules(ud, ud.destdir, unpack_submodules, d) diff --git a/lib/bb/fetch2/hg.py b/lib/bb/fetch2/hg.py index 063e13008..cbff8c490 100644 --- a/lib/bb/fetch2/hg.py +++ b/lib/bb/fetch2/hg.py @@ -242,6 +242,7 @@ class Hg(FetchMethod): revflag = "-r %s" % ud.revision subdir = ud.parm.get("destsuffix", ud.module) codir = "%s/%s" % (destdir, subdir) + ud.unpack_tracer.unpack("hg", codir) scmdata = ud.parm.get("scmdata", "") if scmdata != "nokeep": diff --git a/lib/bb/fetch2/npm.py b/lib/bb/fetch2/npm.py index f83485ad8..15f3f19bc 100644 --- a/lib/bb/fetch2/npm.py +++ b/lib/bb/fetch2/npm.py @@ -298,6 +298,7 @@ class Npm(FetchMethod): destsuffix = ud.parm.get("destsuffix", "npm") destdir = os.path.join(rootdir, destsuffix) npm_unpack(ud.localpath, destdir, d) + ud.unpack_tracer.unpack("npm", destdir) def clean(self, ud, d): """Clean any existing full or partial download""" diff --git a/lib/bb/fetch2/npmsw.py b/lib/bb/fetch2/npmsw.py index 4ff2c8ffc..5cab96c42 100644 --- a/lib/bb/fetch2/npmsw.py +++ b/lib/bb/fetch2/npmsw.py @@ -192,6 +192,7 @@ class NpmShrinkWrap(FetchMethod): raise ParameterError("Unsupported dependency: %s" % name, ud.url) ud.deps.append({ + "name": name, "url": url, "localpath": localpath, "extrapaths": extrapaths, @@ -270,16 +271,27 @@ class NpmShrinkWrap(FetchMethod): destsuffix = ud.parm.get("destsuffix") if destsuffix: destdir = os.path.join(rootdir, destsuffix) + ud.unpack_tracer.unpack("npmsw", destdir) bb.utils.mkdirhier(destdir) bb.utils.copyfile(ud.shrinkwrap_file, os.path.join(destdir, "npm-shrinkwrap.json")) + for dep in ud.deps: + ud.unpack_tracer.module( + "npm", + dep["url"] or dep["localpath"], + dep["name"], + dep["destsuffix"] + ) + auto = [dep["url"] for dep in ud.deps if not dep["localpath"]] manual = [dep for dep in ud.deps if dep["localpath"]] if auto: + ud.unpack_tracer.start_module("npm", destdir, ud.proxy.ud, ud, d) ud.proxy.unpack(destdir, auto) + ud.unpack_tracer.finish_module("npm", destdir, ud.proxy.ud, ud, d) for dep in manual: depdestdir = os.path.join(destdir, dep["destsuffix"]) diff --git a/lib/bb/fetch2/trace.py b/lib/bb/fetch2/trace.py new file mode 100644 index 000000000..40245e6d2 --- /dev/null +++ b/lib/bb/fetch2/trace.py @@ -0,0 +1,265 @@ +import os +import hashlib +import time +import json + +import bb.process +import bb.utils +import bb.compress.zstd + +def is_git_dir(e): + if ".git" in os.listdir(e.path): + try: + bb.process.run( + ["git", "rev-parse", "--is-inside-work-tree"], cwd=e.path) + return True + except bb.process.ExecutionError: + return False + return False + +def scandir(path, exclude=[], skip_git_submodules=False): + + def _scandir(path, tree, excluded_list, exclude, skip_git_submodules): + with os.scandir(path) as scan: + scandir = [ e for e in scan ] + for e in scandir: + if e.name in exclude: + excluded_list.append(e.path) + continue + if e.is_dir() and not e.is_symlink(): + if skip_git_submodules and is_git_dir(e): + excluded_list.append(e.path) + continue + _scandir(e.path, tree, excluded_list, exclude, skip_git_submodules) + else: + tree[e.path] = e + + tree = {} + excluded_list = [] + _scandir(path, tree, excluded_list, exclude, skip_git_submodules) + paths = list(tree.keys()) + sorted_tree = {path: tree[path] for path in sorted(paths)} + return sorted_tree, sorted(excluded_list) + + +def calculate_sha1(path): + sha1 = hashlib.sha1() + with open(path, 'rb') as file: + while chunk := file.read(8192): + sha1.update(chunk) + return sha1.hexdigest() + +def get_stats(e): + s = e.stat() + return (s.st_mode, s.st_mtime, s.st_ctime, s.st_size) + +class FileIndexException(Exception): + pass + +class FileIndexEntry(object): + def __init__(self, stats, link, sha1, last_update): + self.stats = stats + self.link = link + self.sha1 = sha1 + self.last_update = last_update + +class FileIndex(object): + + UNCHANGED = 0 + ADDED = 1 + MODIFIED = 2 + REMOVED = 3 + + def __init__(self, root, exclude=[], skip_git_submodules=False): + self.entries = {} + self.root = root + self.exclude = exclude + self.skip_git_submodules = skip_git_submodules + self.update_index(root) + + def _add_or_update_entry(self, e, link, timestamp, stats=None, sha1=None): + self.entries[e.path] = FileIndexEntry( + stats = (stats or get_stats(e)) if not link else None, + link = link, + sha1 = (sha1 or calculate_sha1(e.path)) if not link else None, + last_update = int(timestamp) + ) + + def add_or_update_entry(self, e, timestamp=None): + if not timestamp: + timestamp = time.time() + link = os.readlink(e.path) if e.is_symlink() else None + if e.path in self.entries: + entry = self.entries[e.path] + if link: + if link != entry.link: + self._add_or_update_entry(e, link, timestamp), + return FileIndex.MODIFIED + else: + return FileIndex.UNCHANGED + mode, mtime, ctime, size = stats = get_stats(e) + if entry.stats != stats: + self._add_or_update_entry(e, link, timestamp, stats) + return FileIndex.MODIFIED + elif (entry.last_update <= int(mtime) + or entry.last_update <= int(ctime) + ): + sha1 = calculate_sha1(e.path) + if sha1 != entry.sha1: + self._add_or_update_entry(e, link, timestamp, stats, sha1) + return FileIndex.MODIFIED + return FileIndex.UNCHANGED + else: + self._add_or_update_entry(e, link, timestamp) + return FileIndex.ADDED + + def remove_entry(self, path): + if path in self.entries: + del self.entries[path] + return FileIndex.REMOVED + return FileIndex.UNCHANGED + + def update_index(self, path, skip_node_submodules=False, skip_git_submodules=False): + if not path.startswith(self.root): + raise FileIndexException( + "Cannot update index for path %s, because it is not inside" + " index root dir %s" % (path, self.root) + ) + timestamp = time.time() + extra_exclude = ["node_modules"] if skip_node_submodules else [] + exclude = self.exclude + extra_exclude + tree, excluded_list = scandir(path, exclude, skip_git_submodules) + files = {} + links = {} + for p, e in tree.items(): + res = self.index.add_or_update_entry(e, timestamp) + if res in [FileIndex.ADDED, FileIndex.MODIFIED]: + entry = self.index.entries[e.path] + relpath = os.path.relpath(e.path, self.root) + if entry.sha1: + files[relpath] = entry.sha1 + elif entry.link: + links[relpath] = entry.link + removed = [] + for p in self.index.entries: + abspath = os.path.join(self.root, p) + if not abspath.startswith(path): + continue + for excluded_path in excluded_list: + if abspath.startswith(excluded_path): + break + else: + if path not in tree: + removed.append(p) + for p in removed: + res = self.index.remove_entry(p) + return files, links, removed + + +class UrlTraceData(object): + def __init__(self, ud, unpackdir, checkout_dir, is_module): + self.ud = ud + self.unpackdir = unpackdir + self.is_module = is_module + self.module_data = [] + self.unpackdir = None + self.is_extracted_archive = False + +class ModuleData(object): + def __init__(self, url, name, path, parent_path, revision=None): + self.url = url + self.name = name + self.path = path + self.parent_path = parent_path + self.revision = revision + +class UnpackTracer(object): + + def __init__(self): + self.url_td = {} + self.file_index = None + self.unpack_trace = [] + self.d = None + self.root_unpackdir = None + self.unpackdir = None + self.url = None + self.is_module = False + + def _start(self, unpackdir, ud_dict, d, is_module=False): + if not self.file_index: + self.root_unpackdir = unpackdir + self.file_index = FileIndex(self.root_unpackdir) + self.d = d + self.unpack_dir = unpackdir + self.is_module = is_module + for url, ud in ud_dict.items(): + url_td.setdefault( + (url, unpackdir), + UrlTraceData(ud, unpackdir, is_module) + ) + + def start(self, unpackdir, ud_dict, d): + self._start(self, unpackdir, ud_dict, d) + + def start_module(self, module_type, unpackdir, ud_dict, parent_ud, d): + if module_type == "git": + self._start(self, unpackdir, ud_dict, d, is_module=True) + + def _get_url_tracedata(self): + return self.url_td[(self.url, self.unpackdir)] + + def start_url(self, url): + self.url = url + + def finish_url(self, url): + if self.is_module: + return + utd = self._get_url_tracedata() + + + + + + def _set_url_tracedata(self, name, value): + utd = self.url_td[(self.url, self.unpackdir)] + setattr(utd, name, value) + + def unpack(self, unpack_type, unpackdir, ud): + self._set_url_tracedata("is_unpacked_archive", unpack_type == "archive-extract") + self._set_url_tracedata("unpackdir", unpackdir) + if unpack_type == "git": + if not hasattr(ud, "trace_checkout_dir"): + ud.trace_checkout_dir = ud.destdir + + def module(self, module_type, url, name, path, revision=None): + utd = self._get_url_tracedata() + if module_type == "git": + parent_path = utd.ud.checkout_dir.rstrip("/") + path = os.path.join(parent_path, path).rstrip("/") + elif module_type == "npm": + path = os.path.join(utd.unpackdir, path) + parent_path = re.sub("/node_modules/"+name+"$", "", path) + utd.module_data.append( + ModuleData(url, name, path, parent_path, revision)) + # FIXME module_type == "npm" + + def start_git_module(self, ud, parent_ud, path, d): + ud.checkout_dir = os.path.join(parent_ud.checkout_dir, modpath) + self._start(ud.checkout_dir,) + + + + + + + + + + + + def finish_url(self, url): + pass + + def complete(self): + pass + # NOTE: delete object!
\ No newline at end of file |