aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlberto Pianon <alberto@pianon.eu>2023-09-21 12:00:35 +0200
committerAlberto Pianon <alberto@pianon.eu>2023-09-21 12:00:35 +0200
commit9e96450eb2849a26928f48f5df34e4d08bcdd1e5 (patch)
treeeb13448b0fb5237896eaade97397f3d3fe8e81be
parentafbc169e1490a86d6250969f780062c426eb4682 (diff)
downloadbitbake-contrib-alpianon/srctrace5.tar.gz
temporary commitalpianon/srctrace5
Signed-off-by: Alberto Pianon <alberto@pianon.eu>
-rw-r--r--lib/bb/fetch2/__init__.py42
-rw-r--r--lib/bb/fetch2/crate.py2
-rw-r--r--lib/bb/fetch2/git.py1
-rw-r--r--lib/bb/fetch2/gitsm.py4
-rw-r--r--lib/bb/fetch2/hg.py1
-rw-r--r--lib/bb/fetch2/npm.py1
-rw-r--r--lib/bb/fetch2/npmsw.py12
-rw-r--r--lib/bb/fetch2/trace.py265
8 files changed, 328 insertions, 0 deletions
diff --git a/lib/bb/fetch2/__init__.py b/lib/bb/fetch2/__init__.py
index 765aedd51..a8b44a4f3 100644
--- a/lib/bb/fetch2/__init__.py
+++ b/lib/bb/fetch2/__init__.py
@@ -140,6 +140,35 @@ class MissingChecksumEvent(bb.event.Event):
bb.event.Event.__init__(self)
+class DummyUnpackTracer(object):
+
+ def start(self, unpackdir, ud_dict, d):
+ return
+
+ def start_url(self, url):
+ return
+
+ def unpack(self, unpack_type, destdir, ud=None):
+ return
+
+ def finish_url(self, url):
+ return
+
+ def start_module(self, module_type, unpackdir, ud_dict, parent_ud, d):
+ return
+
+ def module(self, module_type, url, name, path, revision=None):
+ return
+
+ def finish_module(self, module_type, unpackdir, ud_dict, parent_ud, d):
+ return
+
+ def complete(self):
+ return
+
+unpack_tracer = DummyUnpackTracer()
+
+
class URI(object):
"""
A class representing a generic URI, with methods for
@@ -1578,6 +1607,7 @@ class FetchMethod(object):
unpackdir = rootdir
if not unpack or not cmd:
+ urldata.unpack_tracer.unpack("file-copy", unpackdir)
# If file == dest, then avoid any copies, as we already put the file into dest!
dest = os.path.join(unpackdir, os.path.basename(file))
if file != dest and not (os.path.exists(dest) and os.path.samefile(file, dest)):
@@ -1591,8 +1621,13 @@ class FetchMethod(object):
if urlpath.find("/") != -1:
destdir = urlpath.rsplit("/", 1)[0] + '/'
bb.utils.mkdirhier("%s/%s" % (unpackdir, destdir))
+ urldata.unpack_tracer.unpack(
+ "file-copy", "%s/%s" % (unpackdir, destdir))
cmd = 'cp -fpPRH "%s" "%s"' % (file, destdir)
+ else:
+ urldata.unpack_tracer.unpack("archive-extract", unpackdir)
+
if not cmd:
return
@@ -1707,6 +1742,7 @@ class Fetch(object):
if url not in self.ud:
try:
self.ud[url] = FetchData(url, d, localonly)
+ self.ud[url].unpack_tracer = unpack_tracer
except NonLocalMethod:
if localonly:
self.ud[url] = None
@@ -1882,6 +1918,8 @@ class Fetch(object):
if not urls:
urls = self.urls
+ unpack_tracer.start(root, self.ud, self.d)
+
for u in urls:
ud = self.ud[u]
ud.setup_localpath(self.d)
@@ -1889,11 +1927,15 @@ class Fetch(object):
if ud.lockfile:
lf = bb.utils.lockfile(ud.lockfile)
+ unpack_tracer.start_url(u)
ud.method.unpack(ud, root, self.d)
+ unpack_tracer.finish_url(u)
if ud.lockfile:
bb.utils.unlockfile(lf)
+ unpack_tracer.complete()
+
def clean(self, urls=None):
"""
Clean files that the fetcher gets or places
diff --git a/lib/bb/fetch2/crate.py b/lib/bb/fetch2/crate.py
index 3310ed005..01d49435c 100644
--- a/lib/bb/fetch2/crate.py
+++ b/lib/bb/fetch2/crate.py
@@ -101,8 +101,10 @@ class Crate(Wget):
bp = d.getVar('BP')
if bp == ud.parm.get('name'):
cmd = "tar -xz --no-same-owner -f %s" % thefile
+ ud.unpack_tracer.unpack("crate-extract", rootdir)
else:
cargo_bitbake = self._cargo_bitbake_path(rootdir)
+ ud.unpack_tracer.unpack("cargo-extract", cargo_bitbake)
cmd = "tar -xz --no-same-owner -f %s -C %s" % (thefile, cargo_bitbake)
diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py
index 4385d0b37..567943da0 100644
--- a/lib/bb/fetch2/git.py
+++ b/lib/bb/fetch2/git.py
@@ -589,6 +589,7 @@ class Git(FetchMethod):
destdir = ud.destdir = os.path.join(destdir, destsuffix)
if os.path.exists(destdir):
bb.utils.prunedir(destdir)
+ ud.unpack_tracer.unpack("git", destdir, ud)
need_lfs = self._need_lfs(ud)
diff --git a/lib/bb/fetch2/gitsm.py b/lib/bb/fetch2/gitsm.py
index a87361ccf..06dda48b3 100644
--- a/lib/bb/fetch2/gitsm.py
+++ b/lib/bb/fetch2/gitsm.py
@@ -218,7 +218,9 @@ class GitSM(Git):
try:
newfetch = Fetch([url], d, cache=False)
+ ud.unpack_tracer.start_module("git", repo_conf, newfetch.ud, ud, d)
newfetch.unpack(root=os.path.dirname(os.path.join(repo_conf, 'modules', module)))
+ ud.unpack_tracer.finish_module("git", repo_conf, newfetch.ud, ud, d)
except Exception as e:
logger.error('gitsm: submodule unpack failed: %s %s' % (type(e).__name__, str(e)))
raise
@@ -238,6 +240,8 @@ class GitSM(Git):
logger.error("Unable to set git config core.bare to false for %s" % os.path.join(repo_conf, 'modules', module))
raise
+ ud.unpack_tracer.module("git", url, module, modpath, d.getVar("SRCREV_%s" % module))
+
Git.unpack(self, ud, destdir, d)
ret = self.process_submodules(ud, ud.destdir, unpack_submodules, d)
diff --git a/lib/bb/fetch2/hg.py b/lib/bb/fetch2/hg.py
index 063e13008..cbff8c490 100644
--- a/lib/bb/fetch2/hg.py
+++ b/lib/bb/fetch2/hg.py
@@ -242,6 +242,7 @@ class Hg(FetchMethod):
revflag = "-r %s" % ud.revision
subdir = ud.parm.get("destsuffix", ud.module)
codir = "%s/%s" % (destdir, subdir)
+ ud.unpack_tracer.unpack("hg", codir)
scmdata = ud.parm.get("scmdata", "")
if scmdata != "nokeep":
diff --git a/lib/bb/fetch2/npm.py b/lib/bb/fetch2/npm.py
index f83485ad8..15f3f19bc 100644
--- a/lib/bb/fetch2/npm.py
+++ b/lib/bb/fetch2/npm.py
@@ -298,6 +298,7 @@ class Npm(FetchMethod):
destsuffix = ud.parm.get("destsuffix", "npm")
destdir = os.path.join(rootdir, destsuffix)
npm_unpack(ud.localpath, destdir, d)
+ ud.unpack_tracer.unpack("npm", destdir)
def clean(self, ud, d):
"""Clean any existing full or partial download"""
diff --git a/lib/bb/fetch2/npmsw.py b/lib/bb/fetch2/npmsw.py
index 4ff2c8ffc..5cab96c42 100644
--- a/lib/bb/fetch2/npmsw.py
+++ b/lib/bb/fetch2/npmsw.py
@@ -192,6 +192,7 @@ class NpmShrinkWrap(FetchMethod):
raise ParameterError("Unsupported dependency: %s" % name, ud.url)
ud.deps.append({
+ "name": name,
"url": url,
"localpath": localpath,
"extrapaths": extrapaths,
@@ -270,16 +271,27 @@ class NpmShrinkWrap(FetchMethod):
destsuffix = ud.parm.get("destsuffix")
if destsuffix:
destdir = os.path.join(rootdir, destsuffix)
+ ud.unpack_tracer.unpack("npmsw", destdir)
bb.utils.mkdirhier(destdir)
bb.utils.copyfile(ud.shrinkwrap_file,
os.path.join(destdir, "npm-shrinkwrap.json"))
+ for dep in ud.deps:
+ ud.unpack_tracer.module(
+ "npm",
+ dep["url"] or dep["localpath"],
+ dep["name"],
+ dep["destsuffix"]
+ )
+
auto = [dep["url"] for dep in ud.deps if not dep["localpath"]]
manual = [dep for dep in ud.deps if dep["localpath"]]
if auto:
+ ud.unpack_tracer.start_module("npm", destdir, ud.proxy.ud, ud, d)
ud.proxy.unpack(destdir, auto)
+ ud.unpack_tracer.finish_module("npm", destdir, ud.proxy.ud, ud, d)
for dep in manual:
depdestdir = os.path.join(destdir, dep["destsuffix"])
diff --git a/lib/bb/fetch2/trace.py b/lib/bb/fetch2/trace.py
new file mode 100644
index 000000000..40245e6d2
--- /dev/null
+++ b/lib/bb/fetch2/trace.py
@@ -0,0 +1,265 @@
+import os
+import hashlib
+import time
+import json
+
+import bb.process
+import bb.utils
+import bb.compress.zstd
+
+def is_git_dir(e):
+ if ".git" in os.listdir(e.path):
+ try:
+ bb.process.run(
+ ["git", "rev-parse", "--is-inside-work-tree"], cwd=e.path)
+ return True
+ except bb.process.ExecutionError:
+ return False
+ return False
+
+def scandir(path, exclude=[], skip_git_submodules=False):
+
+ def _scandir(path, tree, excluded_list, exclude, skip_git_submodules):
+ with os.scandir(path) as scan:
+ scandir = [ e for e in scan ]
+ for e in scandir:
+ if e.name in exclude:
+ excluded_list.append(e.path)
+ continue
+ if e.is_dir() and not e.is_symlink():
+ if skip_git_submodules and is_git_dir(e):
+ excluded_list.append(e.path)
+ continue
+ _scandir(e.path, tree, excluded_list, exclude, skip_git_submodules)
+ else:
+ tree[e.path] = e
+
+ tree = {}
+ excluded_list = []
+ _scandir(path, tree, excluded_list, exclude, skip_git_submodules)
+ paths = list(tree.keys())
+ sorted_tree = {path: tree[path] for path in sorted(paths)}
+ return sorted_tree, sorted(excluded_list)
+
+
+def calculate_sha1(path):
+ sha1 = hashlib.sha1()
+ with open(path, 'rb') as file:
+ while chunk := file.read(8192):
+ sha1.update(chunk)
+ return sha1.hexdigest()
+
+def get_stats(e):
+ s = e.stat()
+ return (s.st_mode, s.st_mtime, s.st_ctime, s.st_size)
+
+class FileIndexException(Exception):
+ pass
+
+class FileIndexEntry(object):
+ def __init__(self, stats, link, sha1, last_update):
+ self.stats = stats
+ self.link = link
+ self.sha1 = sha1
+ self.last_update = last_update
+
+class FileIndex(object):
+
+ UNCHANGED = 0
+ ADDED = 1
+ MODIFIED = 2
+ REMOVED = 3
+
+ def __init__(self, root, exclude=[], skip_git_submodules=False):
+ self.entries = {}
+ self.root = root
+ self.exclude = exclude
+ self.skip_git_submodules = skip_git_submodules
+ self.update_index(root)
+
+ def _add_or_update_entry(self, e, link, timestamp, stats=None, sha1=None):
+ self.entries[e.path] = FileIndexEntry(
+ stats = (stats or get_stats(e)) if not link else None,
+ link = link,
+ sha1 = (sha1 or calculate_sha1(e.path)) if not link else None,
+ last_update = int(timestamp)
+ )
+
+ def add_or_update_entry(self, e, timestamp=None):
+ if not timestamp:
+ timestamp = time.time()
+ link = os.readlink(e.path) if e.is_symlink() else None
+ if e.path in self.entries:
+ entry = self.entries[e.path]
+ if link:
+ if link != entry.link:
+ self._add_or_update_entry(e, link, timestamp),
+ return FileIndex.MODIFIED
+ else:
+ return FileIndex.UNCHANGED
+ mode, mtime, ctime, size = stats = get_stats(e)
+ if entry.stats != stats:
+ self._add_or_update_entry(e, link, timestamp, stats)
+ return FileIndex.MODIFIED
+ elif (entry.last_update <= int(mtime)
+ or entry.last_update <= int(ctime)
+ ):
+ sha1 = calculate_sha1(e.path)
+ if sha1 != entry.sha1:
+ self._add_or_update_entry(e, link, timestamp, stats, sha1)
+ return FileIndex.MODIFIED
+ return FileIndex.UNCHANGED
+ else:
+ self._add_or_update_entry(e, link, timestamp)
+ return FileIndex.ADDED
+
+ def remove_entry(self, path):
+ if path in self.entries:
+ del self.entries[path]
+ return FileIndex.REMOVED
+ return FileIndex.UNCHANGED
+
+ def update_index(self, path, skip_node_submodules=False, skip_git_submodules=False):
+ if not path.startswith(self.root):
+ raise FileIndexException(
+ "Cannot update index for path %s, because it is not inside"
+ " index root dir %s" % (path, self.root)
+ )
+ timestamp = time.time()
+ extra_exclude = ["node_modules"] if skip_node_submodules else []
+ exclude = self.exclude + extra_exclude
+ tree, excluded_list = scandir(path, exclude, skip_git_submodules)
+ files = {}
+ links = {}
+ for p, e in tree.items():
+ res = self.index.add_or_update_entry(e, timestamp)
+ if res in [FileIndex.ADDED, FileIndex.MODIFIED]:
+ entry = self.index.entries[e.path]
+ relpath = os.path.relpath(e.path, self.root)
+ if entry.sha1:
+ files[relpath] = entry.sha1
+ elif entry.link:
+ links[relpath] = entry.link
+ removed = []
+ for p in self.index.entries:
+ abspath = os.path.join(self.root, p)
+ if not abspath.startswith(path):
+ continue
+ for excluded_path in excluded_list:
+ if abspath.startswith(excluded_path):
+ break
+ else:
+ if path not in tree:
+ removed.append(p)
+ for p in removed:
+ res = self.index.remove_entry(p)
+ return files, links, removed
+
+
+class UrlTraceData(object):
+ def __init__(self, ud, unpackdir, checkout_dir, is_module):
+ self.ud = ud
+ self.unpackdir = unpackdir
+ self.is_module = is_module
+ self.module_data = []
+ self.unpackdir = None
+ self.is_extracted_archive = False
+
+class ModuleData(object):
+ def __init__(self, url, name, path, parent_path, revision=None):
+ self.url = url
+ self.name = name
+ self.path = path
+ self.parent_path = parent_path
+ self.revision = revision
+
+class UnpackTracer(object):
+
+ def __init__(self):
+ self.url_td = {}
+ self.file_index = None
+ self.unpack_trace = []
+ self.d = None
+ self.root_unpackdir = None
+ self.unpackdir = None
+ self.url = None
+ self.is_module = False
+
+ def _start(self, unpackdir, ud_dict, d, is_module=False):
+ if not self.file_index:
+ self.root_unpackdir = unpackdir
+ self.file_index = FileIndex(self.root_unpackdir)
+ self.d = d
+ self.unpack_dir = unpackdir
+ self.is_module = is_module
+ for url, ud in ud_dict.items():
+ url_td.setdefault(
+ (url, unpackdir),
+ UrlTraceData(ud, unpackdir, is_module)
+ )
+
+ def start(self, unpackdir, ud_dict, d):
+ self._start(self, unpackdir, ud_dict, d)
+
+ def start_module(self, module_type, unpackdir, ud_dict, parent_ud, d):
+ if module_type == "git":
+ self._start(self, unpackdir, ud_dict, d, is_module=True)
+
+ def _get_url_tracedata(self):
+ return self.url_td[(self.url, self.unpackdir)]
+
+ def start_url(self, url):
+ self.url = url
+
+ def finish_url(self, url):
+ if self.is_module:
+ return
+ utd = self._get_url_tracedata()
+
+
+
+
+
+ def _set_url_tracedata(self, name, value):
+ utd = self.url_td[(self.url, self.unpackdir)]
+ setattr(utd, name, value)
+
+ def unpack(self, unpack_type, unpackdir, ud):
+ self._set_url_tracedata("is_unpacked_archive", unpack_type == "archive-extract")
+ self._set_url_tracedata("unpackdir", unpackdir)
+ if unpack_type == "git":
+ if not hasattr(ud, "trace_checkout_dir"):
+ ud.trace_checkout_dir = ud.destdir
+
+ def module(self, module_type, url, name, path, revision=None):
+ utd = self._get_url_tracedata()
+ if module_type == "git":
+ parent_path = utd.ud.checkout_dir.rstrip("/")
+ path = os.path.join(parent_path, path).rstrip("/")
+ elif module_type == "npm":
+ path = os.path.join(utd.unpackdir, path)
+ parent_path = re.sub("/node_modules/"+name+"$", "", path)
+ utd.module_data.append(
+ ModuleData(url, name, path, parent_path, revision))
+ # FIXME module_type == "npm"
+
+ def start_git_module(self, ud, parent_ud, path, d):
+ ud.checkout_dir = os.path.join(parent_ud.checkout_dir, modpath)
+ self._start(ud.checkout_dir,)
+
+
+
+
+
+
+
+
+
+
+
+ def finish_url(self, url):
+ pass
+
+ def complete(self):
+ pass
+ # NOTE: delete object! \ No newline at end of file