aboutsummaryrefslogtreecommitdiffstats
path: root/lib/bb/fetch2
diff options
context:
space:
mode:
Diffstat (limited to 'lib/bb/fetch2')
-rw-r--r--lib/bb/fetch2/README57
-rw-r--r--lib/bb/fetch2/__init__.py424
-rw-r--r--lib/bb/fetch2/az.py93
-rw-r--r--lib/bb/fetch2/bzr.py8
-rw-r--r--lib/bb/fetch2/clearcase.py2
-rw-r--r--lib/bb/fetch2/crate.py150
-rw-r--r--lib/bb/fetch2/cvs.py4
-rw-r--r--lib/bb/fetch2/gcp.py102
-rw-r--r--lib/bb/fetch2/git.py333
-rw-r--r--lib/bb/fetch2/gitsm.py55
-rw-r--r--lib/bb/fetch2/hg.py17
-rw-r--r--lib/bb/fetch2/local.py33
-rw-r--r--lib/bb/fetch2/npm.py63
-rw-r--r--lib/bb/fetch2/npmsw.py96
-rw-r--r--lib/bb/fetch2/osc.py61
-rw-r--r--lib/bb/fetch2/perforce.py13
-rw-r--r--lib/bb/fetch2/repo.py2
-rw-r--r--lib/bb/fetch2/s3.py41
-rw-r--r--lib/bb/fetch2/sftp.py2
-rw-r--r--lib/bb/fetch2/ssh.py54
-rw-r--r--lib/bb/fetch2/svn.py21
-rw-r--r--lib/bb/fetch2/wget.py195
22 files changed, 1425 insertions, 401 deletions
diff --git a/lib/bb/fetch2/README b/lib/bb/fetch2/README
new file mode 100644
index 000000000..67b787ef4
--- /dev/null
+++ b/lib/bb/fetch2/README
@@ -0,0 +1,57 @@
+There are expectations of users of the fetcher code. This file attempts to document
+some of the constraints that are present. Some are obvious, some are less so. It is
+documented in the context of how OE uses it but the API calls are generic.
+
+a) network access for sources is only expected to happen in the do_fetch task.
+ This is not enforced or tested but is required so that we can:
+
+ i) audit the sources used (i.e. for license/manifest reasons)
+ ii) support offline builds with a suitable cache
+ iii) allow work to continue even with downtime upstream
+ iv) allow for changes upstream in incompatible ways
+ v) allow rebuilding of the software in X years time
+
+b) network access is not expected in do_unpack task.
+
+c) you can take DL_DIR and use it as a mirror for offline builds.
+
+d) access to the network is only made when explicitly configured in recipes
+ (e.g. use of AUTOREV, or use of git tags which change revision).
+
+e) fetcher output is deterministic (i.e. if you fetch configuration XXX now it
+ will match in future exactly in a clean build with a new DL_DIR).
+ One specific pain point example are git tags. They can be replaced and change
+ so the git fetcher has to resolve them with the network. We use git revisions
+ where possible to avoid this and ensure determinism.
+
+f) network access is expected to work with the standard linux proxy variables
+ so that access behind firewalls works (the fetcher sets these in the
+ environment but only in the do_fetch tasks).
+
+g) access during parsing has to be minimal, a "git ls-remote" for an AUTOREV
+ git recipe might be ok but you can't expect to checkout a git tree.
+
+h) we need to provide revision information during parsing such that a version
+ for the recipe can be constructed.
+
+i) versions are expected to be able to increase in a way which sorts allowing
+ package feeds to operate (see PR server required for git revisions to sort).
+
+j) API to query for possible version upgrades of a url is highly desireable to
+ allow our automated upgrage code to function (it is implied this does always
+ have network access).
+
+k) Where fixes or changes to behaviour in the fetcher are made, we ask that
+ test cases are added (run with "bitbake-selftest bb.tests.fetch"). We do
+ have fairly extensive test coverage of the fetcher as it is the only way
+ to track all of its corner cases, it still doesn't give entire coverage
+ though sadly.
+
+l) If using tools during parse time, they will have to be in ASSUME_PROVIDED
+ in OE's context as we can't build git-native, then parse a recipe and use
+ git ls-remote.
+
+Not all fetchers support all features, autorev is optional and doesn't make
+sense for some. Upgrade detection means different things in different contexts
+too.
+
diff --git a/lib/bb/fetch2/__init__.py b/lib/bb/fetch2/__init__.py
index 756f60212..5bf2c4b8c 100644
--- a/lib/bb/fetch2/__init__.py
+++ b/lib/bb/fetch2/__init__.py
@@ -113,7 +113,7 @@ class MissingParameterError(BBFetchException):
self.args = (missing, url)
class ParameterError(BBFetchException):
- """Exception raised when a url cannot be proccessed due to invalid parameters."""
+ """Exception raised when a url cannot be processed due to invalid parameters."""
def __init__(self, message, url):
msg = "URL: '%s' has invalid parameters. %s" % (url, message)
self.url = url
@@ -182,7 +182,7 @@ class URI(object):
Some notes about relative URIs: while it's specified that
a URI beginning with <scheme>:// should either be directly
followed by a hostname or a /, the old URI handling of the
- fetch2 library did not comform to this. Therefore, this URI
+ fetch2 library did not conform to this. Therefore, this URI
class has some kludges to make sure that URIs are parsed in
a way comforming to bitbake's current usage. This URI class
supports the following:
@@ -199,7 +199,7 @@ class URI(object):
file://hostname/absolute/path.diff (would be IETF compliant)
Note that the last case only applies to a list of
- "whitelisted" schemes (currently only file://), that requires
+ explicitly allowed schemes (currently only file://), that requires
its URIs to not have a network location.
"""
@@ -290,12 +290,12 @@ class URI(object):
def _param_str_split(self, string, elmdelim, kvdelim="="):
ret = collections.OrderedDict()
- for k, v in [x.split(kvdelim, 1) for x in string.split(elmdelim)]:
+ for k, v in [x.split(kvdelim, 1) if kvdelim in x else (x, None) for x in string.split(elmdelim) if x]:
ret[k] = v
return ret
def _param_str_join(self, dict_, elmdelim, kvdelim="="):
- return elmdelim.join([kvdelim.join([k, v]) for k, v in dict_.items()])
+ return elmdelim.join([kvdelim.join([k, v]) if v else k for k, v in dict_.items()])
@property
def hostport(self):
@@ -388,7 +388,7 @@ def decodeurl(url):
if s:
if not '=' in s:
raise MalformedUrl(url, "The URL: '%s' is invalid: parameter %s does not specify a value (missing '=')" % (url, s))
- s1, s2 = s.split('=')
+ s1, s2 = s.split('=', 1)
p[s1] = s2
return type, host, urllib.parse.unquote(path), user, pswd, p
@@ -402,24 +402,24 @@ def encodeurl(decoded):
if not type:
raise MissingParameterError('type', "encoded from the data %s" % str(decoded))
- url = '%s://' % type
+ url = ['%s://' % type]
if user and type != "file":
- url += "%s" % user
+ url.append("%s" % user)
if pswd:
- url += ":%s" % pswd
- url += "@"
+ url.append(":%s" % pswd)
+ url.append("@")
if host and type != "file":
- url += "%s" % host
+ url.append("%s" % host)
if path:
# Standardise path to ensure comparisons work
while '//' in path:
path = path.replace("//", "/")
- url += "%s" % urllib.parse.quote(path)
+ url.append("%s" % urllib.parse.quote(path))
if p:
for parm in p:
- url += ";%s=%s" % (parm, p[parm])
+ url.append(";%s=%s" % (parm, p[parm]))
- return url
+ return "".join(url)
def uri_replace(ud, uri_find, uri_replace, replacements, d, mirrortarball=None):
if not ud.url or not uri_find or not uri_replace:
@@ -428,8 +428,9 @@ def uri_replace(ud, uri_find, uri_replace, replacements, d, mirrortarball=None):
uri_decoded = list(decodeurl(ud.url))
uri_find_decoded = list(decodeurl(uri_find))
uri_replace_decoded = list(decodeurl(uri_replace))
- logger.debug(2, "For url %s comparing %s to %s" % (uri_decoded, uri_find_decoded, uri_replace_decoded))
+ logger.debug2("For url %s comparing %s to %s" % (uri_decoded, uri_find_decoded, uri_replace_decoded))
result_decoded = ['', '', '', '', '', {}]
+ # 0 - type, 1 - host, 2 - path, 3 - user, 4- pswd, 5 - params
for loc, i in enumerate(uri_find_decoded):
result_decoded[loc] = uri_decoded[loc]
regexp = i
@@ -449,6 +450,9 @@ def uri_replace(ud, uri_find, uri_replace, replacements, d, mirrortarball=None):
for l in replacements:
uri_replace_decoded[loc][k] = uri_replace_decoded[loc][k].replace(l, replacements[l])
result_decoded[loc][k] = uri_replace_decoded[loc][k]
+ elif (loc == 3 or loc == 4) and uri_replace_decoded[loc]:
+ # User/password in the replacement is just a straight replacement
+ result_decoded[loc] = uri_replace_decoded[loc]
elif (re.match(regexp, uri_decoded[loc])):
if not uri_replace_decoded[loc]:
result_decoded[loc] = ""
@@ -465,16 +469,24 @@ def uri_replace(ud, uri_find, uri_replace, replacements, d, mirrortarball=None):
basename = os.path.basename(mirrortarball)
# Kill parameters, they make no sense for mirror tarballs
uri_decoded[5] = {}
+ uri_find_decoded[5] = {}
elif ud.localpath and ud.method.supports_checksum(ud):
basename = os.path.basename(ud.localpath)
- if basename and not result_decoded[loc].endswith(basename):
- result_decoded[loc] = os.path.join(result_decoded[loc], basename)
+ if basename:
+ uri_basename = os.path.basename(uri_decoded[loc])
+ # Prefix with a slash as a sentinel in case
+ # result_decoded[loc] does not contain one.
+ path = "/" + result_decoded[loc]
+ if uri_basename and basename != uri_basename and path.endswith("/" + uri_basename):
+ result_decoded[loc] = path[1:-len(uri_basename)] + basename
+ elif not path.endswith("/" + basename):
+ result_decoded[loc] = os.path.join(path[1:], basename)
else:
return None
result = encodeurl(result_decoded)
if result == ud.url:
return None
- logger.debug(2, "For url %s returning %s" % (ud.url, result))
+ logger.debug2("For url %s returning %s" % (ud.url, result))
return result
methods = []
@@ -499,14 +511,14 @@ def fetcher_init(d):
# When to drop SCM head revisions controlled by user policy
srcrev_policy = d.getVar('BB_SRCREV_POLICY') or "clear"
if srcrev_policy == "cache":
- logger.debug(1, "Keeping SRCREV cache due to cache policy of: %s", srcrev_policy)
+ logger.debug("Keeping SRCREV cache due to cache policy of: %s", srcrev_policy)
elif srcrev_policy == "clear":
- logger.debug(1, "Clearing SRCREV cache due to cache policy of: %s", srcrev_policy)
+ logger.debug("Clearing SRCREV cache due to cache policy of: %s", srcrev_policy)
revs.clear()
else:
raise FetchError("Invalid SRCREV cache policy of: %s" % srcrev_policy)
- _checksum_cache.init_cache(d)
+ _checksum_cache.init_cache(d.getVar("BB_CACHEDIR"))
for m in methods:
if hasattr(m, "init"):
@@ -534,7 +546,7 @@ def mirror_from_string(data):
bb.warn('Invalid mirror data %s, should have paired members.' % data)
return list(zip(*[iter(mirrors)]*2))
-def verify_checksum(ud, d, precomputed={}):
+def verify_checksum(ud, d, precomputed={}, localpath=None, fatal_nochecksum=True):
"""
verify the MD5 and SHA256 checksum for downloaded src
@@ -548,20 +560,25 @@ def verify_checksum(ud, d, precomputed={}):
file against those in the recipe each time, rather than only after
downloading. See https://bugzilla.yoctoproject.org/show_bug.cgi?id=5571.
"""
-
if ud.ignore_checksums or not ud.method.supports_checksum(ud):
return {}
+ if localpath is None:
+ localpath = ud.localpath
+
def compute_checksum_info(checksum_id):
checksum_name = getattr(ud, "%s_name" % checksum_id)
if checksum_id in precomputed:
checksum_data = precomputed[checksum_id]
else:
- checksum_data = getattr(bb.utils, "%s_file" % checksum_id)(ud.localpath)
+ checksum_data = getattr(bb.utils, "%s_file" % checksum_id)(localpath)
checksum_expected = getattr(ud, "%s_expected" % checksum_id)
+ if checksum_expected == '':
+ checksum_expected = None
+
return {
"id": checksum_id,
"name": checksum_name,
@@ -581,17 +598,13 @@ def verify_checksum(ud, d, precomputed={}):
checksum_lines = ["SRC_URI[%s] = \"%s\"" % (ci["name"], ci["data"])]
# If no checksum has been provided
- if ud.method.recommends_checksum(ud) and all(ci["expected"] is None for ci in checksum_infos):
+ if fatal_nochecksum and ud.method.recommends_checksum(ud) and all(ci["expected"] is None for ci in checksum_infos):
messages = []
strict = d.getVar("BB_STRICT_CHECKSUM") or "0"
# If strict checking enabled and neither sum defined, raise error
if strict == "1":
- messages.append("No checksum specified for '%s', please add at " \
- "least one to the recipe:" % ud.localpath)
- messages.extend(checksum_lines)
- logger.error("\n".join(messages))
- raise NoChecksumError("Missing SRC_URI checksum", ud.url)
+ raise NoChecksumError("\n".join(checksum_lines))
bb.event.fire(MissingChecksumEvent(ud.url, **checksum_event), d)
@@ -612,8 +625,8 @@ def verify_checksum(ud, d, precomputed={}):
for ci in checksum_infos:
if ci["expected"] and ci["expected"] != ci["data"]:
- messages.append("File: '%s' has %s checksum %s when %s was " \
- "expected" % (ud.localpath, ci["id"], ci["data"], ci["expected"]))
+ messages.append("File: '%s' has %s checksum '%s' when '%s' was " \
+ "expected" % (localpath, ci["id"], ci["data"], ci["expected"]))
bad_checksum = ci["data"]
if bad_checksum:
@@ -731,13 +744,16 @@ def subprocess_setup():
# SIGPIPE errors are known issues with gzip/bash
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
-def get_autorev(d):
- # only not cache src rev in autorev case
+def mark_recipe_nocache(d):
if d.getVar('BB_SRCREV_POLICY') != "cache":
d.setVar('BB_DONT_CACHE', '1')
+
+def get_autorev(d):
+ mark_recipe_nocache(d)
+ d.setVar("__BBAUTOREV_SEEN", True)
return "AUTOINC"
-def get_srcrev(d, method_name='sortable_revision'):
+def _get_srcrev(d, method_name='sortable_revision'):
"""
Return the revision string, usually for use in the version string (PV) of the current package
Most packages usually only have one SCM so we just pass on the call.
@@ -751,23 +767,34 @@ def get_srcrev(d, method_name='sortable_revision'):
that fetcher provides a method with the given name and the same signature as sortable_revision.
"""
+ d.setVar("__BBSRCREV_SEEN", "1")
+ recursion = d.getVar("__BBINSRCREV")
+ if recursion:
+ raise FetchError("There are recursive references in fetcher variables, likely through SRC_URI")
+ d.setVar("__BBINSRCREV", True)
+
scms = []
+ revs = []
fetcher = Fetch(d.getVar('SRC_URI').split(), d)
urldata = fetcher.ud
for u in urldata:
if urldata[u].method.supports_srcrev():
scms.append(u)
- if len(scms) == 0:
- raise FetchError("SRCREV was used yet no valid SCM was found in SRC_URI")
+ if not scms:
+ d.delVar("__BBINSRCREV")
+ return "", revs
+
if len(scms) == 1 and len(urldata[scms[0]].names) == 1:
autoinc, rev = getattr(urldata[scms[0]].method, method_name)(urldata[scms[0]], d, urldata[scms[0]].names[0])
+ revs.append(rev)
if len(rev) > 10:
rev = rev[:10]
+ d.delVar("__BBINSRCREV")
if autoinc:
- return "AUTOINC+" + rev
- return rev
+ return "AUTOINC+" + rev, revs
+ return rev, revs
#
# Mutiple SCMs are in SRC_URI so we resort to SRCREV_FORMAT
@@ -783,6 +810,7 @@ def get_srcrev(d, method_name='sortable_revision'):
ud = urldata[scm]
for name in ud.names:
autoinc, rev = getattr(ud.method, method_name)(ud, d, name)
+ revs.append(rev)
seenautoinc = seenautoinc or autoinc
if len(rev) > 10:
rev = rev[:10]
@@ -799,12 +827,70 @@ def get_srcrev(d, method_name='sortable_revision'):
if seenautoinc:
format = "AUTOINC+" + format
- return format
+ d.delVar("__BBINSRCREV")
+ return format, revs
+
+def get_hashvalue(d, method_name='sortable_revision'):
+ pkgv, revs = _get_srcrev(d, method_name=method_name)
+ return " ".join(revs)
+
+def get_pkgv_string(d, method_name='sortable_revision'):
+ pkgv, revs = _get_srcrev(d, method_name=method_name)
+ return pkgv
+
+def get_srcrev(d, method_name='sortable_revision'):
+ pkgv, revs = _get_srcrev(d, method_name=method_name)
+ if not pkgv:
+ raise FetchError("SRCREV was used yet no valid SCM was found in SRC_URI")
+ return pkgv
def localpath(url, d):
fetcher = bb.fetch2.Fetch([url], d)
return fetcher.localpath(url)
+# Need to export PATH as binary could be in metadata paths
+# rather than host provided
+# Also include some other variables.
+FETCH_EXPORT_VARS = ['HOME', 'PATH',
+ 'HTTP_PROXY', 'http_proxy',
+ 'HTTPS_PROXY', 'https_proxy',
+ 'FTP_PROXY', 'ftp_proxy',
+ 'FTPS_PROXY', 'ftps_proxy',
+ 'NO_PROXY', 'no_proxy',
+ 'ALL_PROXY', 'all_proxy',
+ 'GIT_PROXY_COMMAND',
+ 'GIT_SSH',
+ 'GIT_SSH_COMMAND',
+ 'GIT_SSL_CAINFO',
+ 'GIT_SMART_HTTP',
+ 'SSH_AUTH_SOCK', 'SSH_AGENT_PID',
+ 'SOCKS5_USER', 'SOCKS5_PASSWD',
+ 'DBUS_SESSION_BUS_ADDRESS',
+ 'P4CONFIG',
+ 'SSL_CERT_FILE',
+ 'NODE_EXTRA_CA_CERTS',
+ 'AWS_PROFILE',
+ 'AWS_ACCESS_KEY_ID',
+ 'AWS_SECRET_ACCESS_KEY',
+ 'AWS_ROLE_ARN',
+ 'AWS_WEB_IDENTITY_TOKEN_FILE',
+ 'AWS_DEFAULT_REGION',
+ 'AWS_SESSION_TOKEN',
+ 'GIT_CACHE_PATH',
+ 'REMOTE_CONTAINERS_IPC',
+ 'SSL_CERT_DIR']
+
+def get_fetcher_environment(d):
+ newenv = {}
+ origenv = d.getVar("BB_ORIGENV")
+ for name in bb.fetch2.FETCH_EXPORT_VARS:
+ value = d.getVar(name)
+ if not value and origenv:
+ value = origenv.getVar(name)
+ if value:
+ newenv[name] = value
+ return newenv
+
def runfetchcmd(cmd, d, quiet=False, cleanup=None, log=None, workdir=None):
"""
Run cmd returning the command output
@@ -813,25 +899,7 @@ def runfetchcmd(cmd, d, quiet=False, cleanup=None, log=None, workdir=None):
Optionally remove the files/directories listed in cleanup upon failure
"""
- # Need to export PATH as binary could be in metadata paths
- # rather than host provided
- # Also include some other variables.
- # FIXME: Should really include all export varaiables?
- exportvars = ['HOME', 'PATH',
- 'HTTP_PROXY', 'http_proxy',
- 'HTTPS_PROXY', 'https_proxy',
- 'FTP_PROXY', 'ftp_proxy',
- 'FTPS_PROXY', 'ftps_proxy',
- 'NO_PROXY', 'no_proxy',
- 'ALL_PROXY', 'all_proxy',
- 'GIT_PROXY_COMMAND',
- 'GIT_SSH',
- 'GIT_SSL_CAINFO',
- 'GIT_SMART_HTTP',
- 'SSH_AUTH_SOCK', 'SSH_AGENT_PID',
- 'SOCKS5_USER', 'SOCKS5_PASSWD',
- 'DBUS_SESSION_BUS_ADDRESS',
- 'P4CONFIG']
+ exportvars = FETCH_EXPORT_VARS
if not cleanup:
cleanup = []
@@ -853,18 +921,13 @@ def runfetchcmd(cmd, d, quiet=False, cleanup=None, log=None, workdir=None):
if val:
cmd = 'export ' + var + '=\"%s\"; %s' % (val, cmd)
- # Ensure that a _PYTHON_SYSCONFIGDATA_NAME value set by a recipe
- # (for example via python3native.bbclass since warrior) is not set for
- # host Python (otherwise tools like git-make-shallow will fail)
- cmd = 'unset _PYTHON_SYSCONFIGDATA_NAME; ' + cmd
-
# Disable pseudo as it may affect ssh, potentially causing it to hang.
cmd = 'export PSEUDO_DISABLED=1; ' + cmd
if workdir:
- logger.debug(1, "Running '%s' in %s" % (cmd, workdir))
+ logger.debug("Running '%s' in %s" % (cmd, workdir))
else:
- logger.debug(1, "Running %s", cmd)
+ logger.debug("Running %s", cmd)
success = False
error_message = ""
@@ -873,14 +936,17 @@ def runfetchcmd(cmd, d, quiet=False, cleanup=None, log=None, workdir=None):
(output, errors) = bb.process.run(cmd, log=log, shell=True, stderr=subprocess.PIPE, cwd=workdir)
success = True
except bb.process.NotFoundError as e:
- error_message = "Fetch command %s" % (e.command)
+ error_message = "Fetch command %s not found" % (e.command)
except bb.process.ExecutionError as e:
if e.stdout:
output = "output:\n%s\n%s" % (e.stdout, e.stderr)
elif e.stderr:
output = "output:\n%s" % e.stderr
else:
- output = "no output"
+ if log:
+ output = "see logfile for output"
+ else:
+ output = "no output"
error_message = "Fetch command %s failed with exit code %s, %s" % (e.command, e.exitcode, output)
except bb.process.CmdError as e:
error_message = "Fetch command %s could not be run:\n%s" % (e.command, e.msg)
@@ -905,7 +971,7 @@ def check_network_access(d, info, url):
elif not trusted_network(d, url):
raise UntrustedUrl(url, info)
else:
- logger.debug(1, "Fetcher accessed the network with the command %s" % info)
+ logger.debug("Fetcher accessed the network with the command %s" % info)
def build_mirroruris(origud, mirrors, ld):
uris = []
@@ -931,7 +997,7 @@ def build_mirroruris(origud, mirrors, ld):
continue
if not trusted_network(ld, newuri):
- logger.debug(1, "Mirror %s not in the list of trusted networks, skipping" % (newuri))
+ logger.debug("Mirror %s not in the list of trusted networks, skipping" % (newuri))
continue
# Create a local copy of the mirrors minus the current line
@@ -942,10 +1008,11 @@ def build_mirroruris(origud, mirrors, ld):
try:
newud = FetchData(newuri, ld)
+ newud.ignore_checksums = True
newud.setup_localpath(ld)
except bb.fetch2.BBFetchException as e:
- logger.debug(1, "Mirror fetch failure for url %s (original url: %s)" % (newuri, origud.url))
- logger.debug(1, str(e))
+ logger.debug("Mirror fetch failure for url %s (original url: %s)" % (newuri, origud.url))
+ logger.debug(str(e))
try:
# setup_localpath of file:// urls may fail, we should still see
# if mirrors of the url exist
@@ -1048,10 +1115,11 @@ def try_mirror_url(fetch, origud, ud, ld, check = False):
elif isinstance(e, NoChecksumError):
raise
else:
- logger.debug(1, "Mirror fetch failure for url %s (original url: %s)" % (ud.url, origud.url))
- logger.debug(1, str(e))
+ logger.debug("Mirror fetch failure for url %s (original url: %s)" % (ud.url, origud.url))
+ logger.debug(str(e))
try:
- ud.method.clean(ud, ld)
+ if ud.method.cleanup_upon_failure():
+ ud.method.clean(ud, ld)
except UnboundLocalError:
pass
return False
@@ -1062,6 +1130,8 @@ def try_mirror_url(fetch, origud, ud, ld, check = False):
def ensure_symlink(target, link_name):
if not os.path.exists(link_name):
+ dirname = os.path.dirname(link_name)
+ bb.utils.mkdirhier(dirname)
if os.path.islink(link_name):
# Broken symbolic link
os.unlink(link_name)
@@ -1145,11 +1215,11 @@ def srcrev_internal_helper(ud, d, name):
pn = d.getVar("PN")
attempts = []
if name != '' and pn:
- attempts.append("SRCREV_%s_pn-%s" % (name, pn))
+ attempts.append("SRCREV_%s:pn-%s" % (name, pn))
if name != '':
attempts.append("SRCREV_%s" % name)
if pn:
- attempts.append("SRCREV_pn-%s" % pn)
+ attempts.append("SRCREV:pn-%s" % pn)
attempts.append("SRCREV")
for a in attempts:
@@ -1174,6 +1244,7 @@ def srcrev_internal_helper(ud, d, name):
if srcrev == "INVALID" or not srcrev:
raise FetchError("Please set a valid SRCREV for url %s (possible key names are %s, or use a ;rev=X URL parameter)" % (str(attempts), ud.url), ud.url)
if srcrev == "AUTOINC":
+ d.setVar("__BBAUTOREV_ACTED_UPON", True)
srcrev = ud.method.latest_revision(ud, d, name)
return srcrev
@@ -1185,25 +1256,21 @@ def get_checksum_file_list(d):
SRC_URI as a space-separated string
"""
fetch = Fetch([], d, cache = False, localonly = True)
-
- dl_dir = d.getVar('DL_DIR')
filelist = []
for u in fetch.urls:
ud = fetch.ud[u]
-
if ud and isinstance(ud.method, local.Local):
- paths = ud.method.localpaths(ud, d)
+ found = False
+ paths = ud.method.localfile_searchpaths(ud, d)
for f in paths:
pth = ud.decodedurl
- if '*' in pth:
- f = os.path.join(os.path.abspath(f), pth)
- if f.startswith(dl_dir):
- # The local fetcher's behaviour is to return a path under DL_DIR if it couldn't find the file anywhere else
- if os.path.exists(f):
- bb.warn("Getting checksum for %s SRC_URI entry %s: file not found except in DL_DIR" % (d.getVar('PN'), os.path.basename(f)))
- else:
- bb.warn("Unable to get checksum for %s SRC_URI entry %s: file could not be found" % (d.getVar('PN'), os.path.basename(f)))
+ if os.path.exists(f):
+ found = True
filelist.append(f + ":" + str(os.path.exists(f)))
+ if not found:
+ bb.fatal(("Unable to get checksum for %s SRC_URI entry %s: file could not be found"
+ "\nThe following paths were searched:"
+ "\n%s") % (d.getVar('PN'), os.path.basename(f), '\n'.join(paths)))
return " ".join(filelist)
@@ -1250,18 +1317,13 @@ class FetchData(object):
if checksum_name in self.parm:
checksum_expected = self.parm[checksum_name]
- elif self.type not in ["http", "https", "ftp", "ftps", "sftp", "s3"]:
+ elif self.type not in ["http", "https", "ftp", "ftps", "sftp", "s3", "az", "crate", "gs"]:
checksum_expected = None
else:
checksum_expected = d.getVarFlag("SRC_URI", checksum_name)
setattr(self, "%s_expected" % checksum_id, checksum_expected)
- for checksum_id in CHECKSUM_LIST:
- configure_checksum(checksum_id)
-
- self.ignore_checksums = False
-
self.names = self.parm.get("name",'default').split(',')
self.method = None
@@ -1283,6 +1345,11 @@ class FetchData(object):
if hasattr(self.method, "urldata_init"):
self.method.urldata_init(self, d)
+ for checksum_id in CHECKSUM_LIST:
+ configure_checksum(checksum_id)
+
+ self.ignore_checksums = False
+
if "localpath" in self.parm:
# if user sets localpath for file, use it instead.
self.localpath = self.parm["localpath"]
@@ -1362,12 +1429,12 @@ class FetchMethod(object):
Is localpath something that can be represented by a checksum?
"""
+ # We cannot compute checksums for None
+ if urldata.localpath is None:
+ return False
# We cannot compute checksums for directories
if os.path.isdir(urldata.localpath):
return False
- if urldata.localpath.find("*") != -1:
- return False
-
return True
def recommends_checksum(self, urldata):
@@ -1377,6 +1444,12 @@ class FetchMethod(object):
"""
return False
+ def cleanup_upon_failure(self):
+ """
+ When a fetch fails, should clean() be called?
+ """
+ return True
+
def verify_donestamp(self, ud, d):
"""
Verify the donestamp file
@@ -1430,11 +1503,6 @@ class FetchMethod(object):
iterate = False
file = urldata.localpath
- # Localpath can't deal with 'dir/*' entries, so it converts them to '.',
- # but it must be corrected back for local files copying
- if urldata.basename == '*' and file.endswith('/.'):
- file = '%s/%s' % (file.rstrip('/.'), urldata.path)
-
try:
unpack = bb.utils.to_boolean(urldata.parm.get('unpack'), True)
except ValueError as exc:
@@ -1449,28 +1517,35 @@ class FetchMethod(object):
cmd = None
if unpack:
+ tar_cmd = 'tar --extract --no-same-owner'
+ if 'striplevel' in urldata.parm:
+ tar_cmd += ' --strip-components=%s' % urldata.parm['striplevel']
if file.endswith('.tar'):
- cmd = 'tar x --no-same-owner -f %s' % file
+ cmd = '%s -f %s' % (tar_cmd, file)
elif file.endswith('.tgz') or file.endswith('.tar.gz') or file.endswith('.tar.Z'):
- cmd = 'tar xz --no-same-owner -f %s' % file
+ cmd = '%s -z -f %s' % (tar_cmd, file)
elif file.endswith('.tbz') or file.endswith('.tbz2') or file.endswith('.tar.bz2'):
- cmd = 'bzip2 -dc %s | tar x --no-same-owner -f -' % file
+ cmd = 'bzip2 -dc %s | %s -f -' % (file, tar_cmd)
elif file.endswith('.gz') or file.endswith('.Z') or file.endswith('.z'):
cmd = 'gzip -dc %s > %s' % (file, efile)
elif file.endswith('.bz2'):
cmd = 'bzip2 -dc %s > %s' % (file, efile)
elif file.endswith('.txz') or file.endswith('.tar.xz'):
- cmd = 'xz -dc %s | tar x --no-same-owner -f -' % file
+ cmd = 'xz -dc %s | %s -f -' % (file, tar_cmd)
elif file.endswith('.xz'):
cmd = 'xz -dc %s > %s' % (file, efile)
elif file.endswith('.tar.lz'):
- cmd = 'lzip -dc %s | tar x --no-same-owner -f -' % file
+ cmd = 'lzip -dc %s | %s -f -' % (file, tar_cmd)
elif file.endswith('.lz'):
cmd = 'lzip -dc %s > %s' % (file, efile)
elif file.endswith('.tar.7z'):
- cmd = '7z x -so %s | tar x --no-same-owner -f -' % file
+ cmd = '7z x -so %s | %s -f -' % (file, tar_cmd)
elif file.endswith('.7z'):
cmd = '7za x -y %s 1>/dev/null' % file
+ elif file.endswith('.tzst') or file.endswith('.tar.zst'):
+ cmd = 'zstd --decompress --stdout %s | %s -f -' % (file, tar_cmd)
+ elif file.endswith('.zst'):
+ cmd = 'zstd --decompress --stdout %s > %s' % (file, efile)
elif file.endswith('.zip') or file.endswith('.jar'):
try:
dos = bb.utils.to_boolean(urldata.parm.get('dos'), False)
@@ -1501,7 +1576,7 @@ class FetchMethod(object):
raise UnpackError("Unable to unpack deb/ipk package - does not contain data.tar.* file", urldata.url)
else:
raise UnpackError("Unable to unpack deb/ipk package - could not list contents", urldata.url)
- cmd = 'ar x %s %s && tar --no-same-owner -xpf %s && rm %s' % (file, datafile, datafile, datafile)
+ cmd = 'ar x %s %s && %s -p -f %s && rm %s' % (file, datafile, tar_cmd, datafile, datafile)
# If 'subdir' param exists, create a dir and use it as destination for unpack cmd
if 'subdir' in urldata.parm:
@@ -1517,6 +1592,7 @@ class FetchMethod(object):
unpackdir = rootdir
if not unpack or not cmd:
+ urldata.unpack_tracer.unpack("file-copy", unpackdir)
# If file == dest, then avoid any copies, as we already put the file into dest!
dest = os.path.join(unpackdir, os.path.basename(file))
if file != dest and not (os.path.exists(dest) and os.path.samefile(file, dest)):
@@ -1530,7 +1606,9 @@ class FetchMethod(object):
if urlpath.find("/") != -1:
destdir = urlpath.rsplit("/", 1)[0] + '/'
bb.utils.mkdirhier("%s/%s" % (unpackdir, destdir))
- cmd = 'cp -fpPRH %s %s' % (file, destdir)
+ cmd = 'cp -fpPRH "%s" "%s"' % (file, destdir)
+ else:
+ urldata.unpack_tracer.unpack("archive-extract", unpackdir)
if not cmd:
return
@@ -1613,8 +1691,6 @@ class FetchMethod(object):
"""
if os.path.exists(ud.localpath):
return True
- if ud.localpath.find("*") != -1:
- return True
return False
def implicit_urldata(self, ud, d):
@@ -1624,12 +1700,61 @@ class FetchMethod(object):
"""
return []
+
+class DummyUnpackTracer(object):
+ """
+ Abstract API definition for a class that traces unpacked source files back
+ to their respective upstream SRC_URI entries, for software composition
+ analysis, license compliance and detailed SBOM generation purposes.
+ User may load their own unpack tracer class (instead of the dummy
+ one) by setting the BB_UNPACK_TRACER_CLASS config parameter.
+ """
+ def start(self, unpackdir, urldata_dict, d):
+ """
+ Start tracing the core Fetch.unpack process, using an index to map
+ unpacked files to each SRC_URI entry.
+ This method is called by Fetch.unpack and it may receive nested calls by
+ gitsm and npmsw fetchers, that expand SRC_URI entries by adding implicit
+ URLs and by recursively calling Fetch.unpack from new (nested) Fetch
+ instances.
+ """
+ return
+ def start_url(self, url):
+ """Start tracing url unpack process.
+ This method is called by Fetch.unpack before the fetcher-specific unpack
+ method starts, and it may receive nested calls by gitsm and npmsw
+ fetchers.
+ """
+ return
+ def unpack(self, unpack_type, destdir):
+ """
+ Set unpack_type and destdir for current url.
+ This method is called by the fetcher-specific unpack method after url
+ tracing started.
+ """
+ return
+ def finish_url(self, url):
+ """Finish tracing url unpack process and update the file index.
+ This method is called by Fetch.unpack after the fetcher-specific unpack
+ method finished its job, and it may receive nested calls by gitsm
+ and npmsw fetchers.
+ """
+ return
+ def complete(self):
+ """
+ Finish tracing the Fetch.unpack process, and check if all nested
+ Fecth.unpack calls (if any) have been completed; if so, save collected
+ metadata.
+ """
+ return
+
+
class Fetch(object):
def __init__(self, urls, d, cache = True, localonly = False, connection_cache = None):
if localonly and cache:
raise Exception("bb.fetch2.Fetch.__init__: cannot set cache and localonly at same time")
- if len(urls) == 0:
+ if not urls:
urls = d.getVar("SRC_URI").split()
self.urls = urls
self.d = d
@@ -1644,10 +1769,30 @@ class Fetch(object):
if key in urldata_cache:
self.ud = urldata_cache[key]
+ # the unpack_tracer object needs to be made available to possible nested
+ # Fetch instances (when those are created by gitsm and npmsw fetchers)
+ # so we set it as a global variable
+ global unpack_tracer
+ try:
+ unpack_tracer
+ except NameError:
+ class_path = d.getVar("BB_UNPACK_TRACER_CLASS")
+ if class_path:
+ # use user-defined unpack tracer class
+ import importlib
+ module_name, _, class_name = class_path.rpartition(".")
+ module = importlib.import_module(module_name)
+ class_ = getattr(module, class_name)
+ unpack_tracer = class_()
+ else:
+ # fall back to the dummy/abstract class
+ unpack_tracer = DummyUnpackTracer()
+
for url in urls:
if url not in self.ud:
try:
self.ud[url] = FetchData(url, d, localonly)
+ self.ud[url].unpack_tracer = unpack_tracer
except NonLocalMethod:
if localonly:
self.ud[url] = None
@@ -1686,6 +1831,7 @@ class Fetch(object):
network = self.d.getVar("BB_NO_NETWORK")
premirroronly = bb.utils.to_boolean(self.d.getVar("BB_FETCH_PREMIRRORONLY"))
+ checksum_missing_messages = []
for u in urls:
ud = self.ud[u]
ud.setup_localpath(self.d)
@@ -1697,11 +1843,10 @@ class Fetch(object):
try:
self.d.setVar("BB_NO_NETWORK", network)
-
if m.verify_donestamp(ud, self.d) and not m.need_update(ud, self.d):
done = True
elif m.try_premirror(ud, self.d):
- logger.debug(1, "Trying PREMIRRORS")
+ logger.debug("Trying PREMIRRORS")
mirrors = mirror_from_string(self.d.getVar('PREMIRRORS'))
done = m.try_mirrors(self, ud, self.d, mirrors)
if done:
@@ -1711,19 +1856,21 @@ class Fetch(object):
m.update_donestamp(ud, self.d)
except ChecksumError as e:
logger.warning("Checksum failure encountered with premirror download of %s - will attempt other sources." % u)
- logger.debug(1, str(e))
+ logger.debug(str(e))
done = False
if premirroronly:
self.d.setVar("BB_NO_NETWORK", "1")
firsterr = None
- verified_stamp = m.verify_donestamp(ud, self.d)
+ verified_stamp = False
+ if done:
+ verified_stamp = m.verify_donestamp(ud, self.d)
if not done and (not verified_stamp or m.need_update(ud, self.d)):
try:
if not trusted_network(self.d, ud.url):
raise UntrustedUrl(ud.url)
- logger.debug(1, "Trying Upstream")
+ logger.debug("Trying Upstream")
m.download(ud, self.d)
if hasattr(m, "build_mirror_data"):
m.build_mirror_data(ud, self.d)
@@ -1738,19 +1885,19 @@ class Fetch(object):
except BBFetchException as e:
if isinstance(e, ChecksumError):
logger.warning("Checksum failure encountered with download of %s - will attempt other sources if available" % u)
- logger.debug(1, str(e))
+ logger.debug(str(e))
if os.path.exists(ud.localpath):
rename_bad_checksum(ud, e.checksum)
elif isinstance(e, NoChecksumError):
raise
else:
logger.warning('Failed to fetch URL %s, attempting MIRRORS if available' % u)
- logger.debug(1, str(e))
+ logger.debug(str(e))
firsterr = e
# Remove any incomplete fetch
- if not verified_stamp:
+ if not verified_stamp and m.cleanup_upon_failure():
m.clean(ud, self.d)
- logger.debug(1, "Trying MIRRORS")
+ logger.debug("Trying MIRRORS")
mirrors = mirror_from_string(self.d.getVar('MIRRORS'))
done = m.try_mirrors(self, ud, self.d, mirrors)
@@ -1767,17 +1914,28 @@ class Fetch(object):
raise ChecksumError("Stale Error Detected")
except BBFetchException as e:
- if isinstance(e, ChecksumError):
+ if isinstance(e, NoChecksumError):
+ (message, _) = e.args
+ checksum_missing_messages.append(message)
+ continue
+ elif isinstance(e, ChecksumError):
logger.error("Checksum failure fetching %s" % u)
raise
finally:
if ud.lockfile:
bb.utils.unlockfile(lf)
+ if checksum_missing_messages:
+ logger.error("Missing SRC_URI checksum, please add those to the recipe: \n%s", "\n".join(checksum_missing_messages))
+ raise BBFetchException("There was some missing checksums in the recipe")
def checkstatus(self, urls=None):
"""
- Check all urls exist upstream
+ Check all URLs exist upstream.
+
+ Returns None if the URLs exist, raises FetchError if the check wasn't
+ successful but there wasn't an error (such as file not found), and
+ raises other exceptions in error cases.
"""
if not urls:
@@ -1787,7 +1945,7 @@ class Fetch(object):
ud = self.ud[u]
ud.setup_localpath(self.d)
m = ud.method
- logger.debug(1, "Testing URL %s", u)
+ logger.debug("Testing URL %s", u)
# First try checking uri, u, from PREMIRRORS
mirrors = mirror_from_string(self.d.getVar('PREMIRRORS'))
ret = m.try_mirrors(self, ud, self.d, mirrors, True)
@@ -1800,7 +1958,7 @@ class Fetch(object):
ret = m.try_mirrors(self, ud, self.d, mirrors, True)
if not ret:
- raise FetchError("URL %s doesn't work" % u, u)
+ raise FetchError("URL doesn't work", u)
def unpack(self, root, urls=None):
"""
@@ -1810,6 +1968,8 @@ class Fetch(object):
if not urls:
urls = self.urls
+ unpack_tracer.start(root, self.ud, self.d)
+
for u in urls:
ud = self.ud[u]
ud.setup_localpath(self.d)
@@ -1817,11 +1977,15 @@ class Fetch(object):
if ud.lockfile:
lf = bb.utils.lockfile(ud.lockfile)
+ unpack_tracer.start_url(u)
ud.method.unpack(ud, root, self.d)
+ unpack_tracer.finish_url(u)
if ud.lockfile:
bb.utils.unlockfile(lf)
+ unpack_tracer.complete()
+
def clean(self, urls=None):
"""
Clean files that the fetcher gets or places
@@ -1921,6 +2085,9 @@ from . import repo
from . import clearcase
from . import npm
from . import npmsw
+from . import az
+from . import crate
+from . import gcp
methods.append(local.Local())
methods.append(wget.Wget())
@@ -1940,3 +2107,6 @@ methods.append(repo.Repo())
methods.append(clearcase.ClearCase())
methods.append(npm.Npm())
methods.append(npmsw.NpmShrinkWrap())
+methods.append(az.Az())
+methods.append(crate.Crate())
+methods.append(gcp.GCP())
diff --git a/lib/bb/fetch2/az.py b/lib/bb/fetch2/az.py
new file mode 100644
index 000000000..3ccc594c2
--- /dev/null
+++ b/lib/bb/fetch2/az.py
@@ -0,0 +1,93 @@
+"""
+BitBake 'Fetch' Azure Storage implementation
+
+"""
+
+# Copyright (C) 2021 Alejandro Hernandez Samaniego
+#
+# Based on bb.fetch2.wget:
+# Copyright (C) 2003, 2004 Chris Larson
+#
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Based on functions from the base bb module, Copyright 2003 Holger Schurig
+
+import shlex
+import os
+import bb
+from bb.fetch2 import FetchError
+from bb.fetch2 import logger
+from bb.fetch2.wget import Wget
+
+
+class Az(Wget):
+
+ def supports(self, ud, d):
+ """
+ Check to see if a given url can be fetched from Azure Storage
+ """
+ return ud.type in ['az']
+
+
+ def checkstatus(self, fetch, ud, d, try_again=True):
+
+ # checkstatus discards parameters either way, we need to do this before adding the SAS
+ ud.url = ud.url.replace('az://','https://').split(';')[0]
+
+ az_sas = d.getVar('AZ_SAS')
+ if az_sas and az_sas not in ud.url:
+ ud.url += az_sas
+
+ return Wget.checkstatus(self, fetch, ud, d, try_again)
+
+ # Override download method, include retries
+ def download(self, ud, d, retries=3):
+ """Fetch urls"""
+
+ # If were reaching the account transaction limit we might be refused a connection,
+ # retrying allows us to avoid false negatives since the limit changes over time
+ fetchcmd = self.basecmd + ' --retry-connrefused --waitretry=5'
+
+ # We need to provide a localpath to avoid wget using the SAS
+ # ud.localfile either has the downloadfilename or ud.path
+ localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile)
+ bb.utils.mkdirhier(os.path.dirname(localpath))
+ fetchcmd += " -O %s" % shlex.quote(localpath)
+
+
+ if ud.user and ud.pswd:
+ fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)
+
+ # Check if a Shared Access Signature was given and use it
+ az_sas = d.getVar('AZ_SAS')
+
+ if az_sas:
+ azuri = '%s%s%s%s' % ('https://', ud.host, ud.path, az_sas)
+ else:
+ azuri = '%s%s%s' % ('https://', ud.host, ud.path)
+
+ if os.path.exists(ud.localpath):
+ # file exists, but we didnt complete it.. trying again.
+ fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % azuri)
+ else:
+ fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % azuri)
+
+ try:
+ self._runwget(ud, d, fetchcmd, False)
+ except FetchError as e:
+ # Azure fails on handshake sometimes when using wget after some stress, producing a
+ # FetchError from the fetcher, if the artifact exists retyring should succeed
+ if 'Unable to establish SSL connection' in str(e):
+ logger.debug2('Unable to establish SSL connection: Retries remaining: %s, Retrying...' % retries)
+ self.download(ud, d, retries -1)
+
+ # Sanity check since wget can pretend it succeed when it didn't
+ # Also, this used to happen if sourceforge sent us to the mirror page
+ if not os.path.exists(ud.localpath):
+ raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (azuri, ud.localpath), azuri)
+
+ if os.path.getsize(ud.localpath) == 0:
+ os.remove(ud.localpath)
+ raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (azuri), azuri)
+
+ return True
diff --git a/lib/bb/fetch2/bzr.py b/lib/bb/fetch2/bzr.py
index 566ace9f0..fc558f50b 100644
--- a/lib/bb/fetch2/bzr.py
+++ b/lib/bb/fetch2/bzr.py
@@ -74,16 +74,16 @@ class Bzr(FetchMethod):
if os.access(os.path.join(ud.pkgdir, os.path.basename(ud.pkgdir), '.bzr'), os.R_OK):
bzrcmd = self._buildbzrcommand(ud, d, "update")
- logger.debug(1, "BZR Update %s", ud.url)
+ logger.debug("BZR Update %s", ud.url)
bb.fetch2.check_network_access(d, bzrcmd, ud.url)
runfetchcmd(bzrcmd, d, workdir=os.path.join(ud.pkgdir, os.path.basename(ud.path)))
else:
bb.utils.remove(os.path.join(ud.pkgdir, os.path.basename(ud.pkgdir)), True)
bzrcmd = self._buildbzrcommand(ud, d, "fetch")
bb.fetch2.check_network_access(d, bzrcmd, ud.url)
- logger.debug(1, "BZR Checkout %s", ud.url)
+ logger.debug("BZR Checkout %s", ud.url)
bb.utils.mkdirhier(ud.pkgdir)
- logger.debug(1, "Running %s", bzrcmd)
+ logger.debug("Running %s", bzrcmd)
runfetchcmd(bzrcmd, d, workdir=ud.pkgdir)
scmdata = ud.parm.get("scmdata", "")
@@ -109,7 +109,7 @@ class Bzr(FetchMethod):
"""
Return the latest upstream revision number
"""
- logger.debug(2, "BZR fetcher hitting network for %s", ud.url)
+ logger.debug2("BZR fetcher hitting network for %s", ud.url)
bb.fetch2.check_network_access(d, self._buildbzrcommand(ud, d, "revno"), ud.url)
diff --git a/lib/bb/fetch2/clearcase.py b/lib/bb/fetch2/clearcase.py
index 49d7ae1b0..1a9c86376 100644
--- a/lib/bb/fetch2/clearcase.py
+++ b/lib/bb/fetch2/clearcase.py
@@ -70,7 +70,7 @@ class ClearCase(FetchMethod):
return ud.type in ['ccrc']
def debug(self, msg):
- logger.debug(1, "ClearCase: %s", msg)
+ logger.debug("ClearCase: %s", msg)
def urldata_init(self, ud, d):
"""
diff --git a/lib/bb/fetch2/crate.py b/lib/bb/fetch2/crate.py
new file mode 100644
index 000000000..e611736f0
--- /dev/null
+++ b/lib/bb/fetch2/crate.py
@@ -0,0 +1,150 @@
+# ex:ts=4:sw=4:sts=4:et
+# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
+"""
+BitBake 'Fetch' implementation for crates.io
+"""
+
+# Copyright (C) 2016 Doug Goldstein
+#
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Based on functions from the base bb module, Copyright 2003 Holger Schurig
+
+import hashlib
+import json
+import os
+import subprocess
+import bb
+from bb.fetch2 import logger, subprocess_setup, UnpackError
+from bb.fetch2.wget import Wget
+
+
+class Crate(Wget):
+
+ """Class to fetch crates via wget"""
+
+ def _cargo_bitbake_path(self, rootdir):
+ return os.path.join(rootdir, "cargo_home", "bitbake")
+
+ def supports(self, ud, d):
+ """
+ Check to see if a given url is for this fetcher
+ """
+ return ud.type in ['crate']
+
+ def recommends_checksum(self, urldata):
+ return True
+
+ def urldata_init(self, ud, d):
+ """
+ Sets up to download the respective crate from crates.io
+ """
+
+ if ud.type == 'crate':
+ self._crate_urldata_init(ud, d)
+
+ super(Crate, self).urldata_init(ud, d)
+
+ def _crate_urldata_init(self, ud, d):
+ """
+ Sets up the download for a crate
+ """
+
+ # URL syntax is: crate://NAME/VERSION
+ # break the URL apart by /
+ parts = ud.url.split('/')
+ if len(parts) < 5:
+ raise bb.fetch2.ParameterError("Invalid URL: Must be crate://HOST/NAME/VERSION", ud.url)
+
+ # version is expected to be the last token
+ # but ignore possible url parameters which will be used
+ # by the top fetcher class
+ version = parts[-1].split(";")[0]
+ # second to last field is name
+ name = parts[-2]
+ # host (this is to allow custom crate registries to be specified
+ host = '/'.join(parts[2:-2])
+
+ # if using upstream just fix it up nicely
+ if host == 'crates.io':
+ host = 'crates.io/api/v1/crates'
+
+ ud.url = "https://%s/%s/%s/download" % (host, name, version)
+ ud.versionsurl = "https://%s/%s/versions" % (host, name)
+ ud.parm['downloadfilename'] = "%s-%s.crate" % (name, version)
+ if 'name' not in ud.parm:
+ ud.parm['name'] = '%s-%s' % (name, version)
+
+ logger.debug2("Fetching %s to %s" % (ud.url, ud.parm['downloadfilename']))
+
+ def unpack(self, ud, rootdir, d):
+ """
+ Uses the crate to build the necessary paths for cargo to utilize it
+ """
+ if ud.type == 'crate':
+ return self._crate_unpack(ud, rootdir, d)
+ else:
+ super(Crate, self).unpack(ud, rootdir, d)
+
+ def _crate_unpack(self, ud, rootdir, d):
+ """
+ Unpacks a crate
+ """
+ thefile = ud.localpath
+
+ # possible metadata we need to write out
+ metadata = {}
+
+ # change to the rootdir to unpack but save the old working dir
+ save_cwd = os.getcwd()
+ os.chdir(rootdir)
+
+ bp = d.getVar('BP')
+ if bp == ud.parm.get('name'):
+ cmd = "tar -xz --no-same-owner -f %s" % thefile
+ ud.unpack_tracer.unpack("crate-extract", rootdir)
+ else:
+ cargo_bitbake = self._cargo_bitbake_path(rootdir)
+ ud.unpack_tracer.unpack("cargo-extract", cargo_bitbake)
+
+ cmd = "tar -xz --no-same-owner -f %s -C %s" % (thefile, cargo_bitbake)
+
+ # ensure we've got these paths made
+ bb.utils.mkdirhier(cargo_bitbake)
+
+ # generate metadata necessary
+ with open(thefile, 'rb') as f:
+ # get the SHA256 of the original tarball
+ tarhash = hashlib.sha256(f.read()).hexdigest()
+
+ metadata['files'] = {}
+ metadata['package'] = tarhash
+
+ path = d.getVar('PATH')
+ if path:
+ cmd = "PATH=\"%s\" %s" % (path, cmd)
+ bb.note("Unpacking %s to %s/" % (thefile, os.getcwd()))
+
+ ret = subprocess.call(cmd, preexec_fn=subprocess_setup, shell=True)
+
+ os.chdir(save_cwd)
+
+ if ret != 0:
+ raise UnpackError("Unpack command %s failed with return value %s" % (cmd, ret), ud.url)
+
+ # if we have metadata to write out..
+ if len(metadata) > 0:
+ cratepath = os.path.splitext(os.path.basename(thefile))[0]
+ bbpath = self._cargo_bitbake_path(rootdir)
+ mdfile = '.cargo-checksum.json'
+ mdpath = os.path.join(bbpath, cratepath, mdfile)
+ with open(mdpath, "w") as f:
+ json.dump(metadata, f)
+
+ def latest_versionstring(self, ud, d):
+ from functools import cmp_to_key
+ json_data = json.loads(self._fetch_index(ud.versionsurl, ud, d))
+ versions = [(0, i["num"], "") for i in json_data["versions"]]
+ versions = sorted(versions, key=cmp_to_key(bb.utils.vercmp))
+
+ return (versions[-1][1], "")
diff --git a/lib/bb/fetch2/cvs.py b/lib/bb/fetch2/cvs.py
index 22abdef79..01de5ff4c 100644
--- a/lib/bb/fetch2/cvs.py
+++ b/lib/bb/fetch2/cvs.py
@@ -109,7 +109,7 @@ class Cvs(FetchMethod):
cvsupdatecmd = "CVS_RSH=\"%s\" %s" % (cvs_rsh, cvsupdatecmd)
# create module directory
- logger.debug(2, "Fetch: checking for module directory")
+ logger.debug2("Fetch: checking for module directory")
moddir = os.path.join(ud.pkgdir, localdir)
workdir = None
if os.access(os.path.join(moddir, 'CVS'), os.R_OK):
@@ -123,7 +123,7 @@ class Cvs(FetchMethod):
# check out sources there
bb.utils.mkdirhier(ud.pkgdir)
workdir = ud.pkgdir
- logger.debug(1, "Running %s", cvscmd)
+ logger.debug("Running %s", cvscmd)
bb.fetch2.check_network_access(d, cvscmd, ud.url)
cmd = cvscmd
diff --git a/lib/bb/fetch2/gcp.py b/lib/bb/fetch2/gcp.py
new file mode 100644
index 000000000..eb3e0c6a6
--- /dev/null
+++ b/lib/bb/fetch2/gcp.py
@@ -0,0 +1,102 @@
+"""
+BitBake 'Fetch' implementation for Google Cloup Platform Storage.
+
+Class for fetching files from Google Cloud Storage using the
+Google Cloud Storage Python Client. The GCS Python Client must
+be correctly installed, configured and authenticated prior to use.
+Additionally, gsutil must also be installed.
+
+"""
+
+# Copyright (C) 2023, Snap Inc.
+#
+# Based in part on bb.fetch2.s3:
+# Copyright (C) 2017 Andre McCurdy
+#
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Based on functions from the base bb module, Copyright 2003 Holger Schurig
+
+import os
+import bb
+import urllib.parse, urllib.error
+from bb.fetch2 import FetchMethod
+from bb.fetch2 import FetchError
+from bb.fetch2 import logger
+from bb.fetch2 import runfetchcmd
+
+class GCP(FetchMethod):
+ """
+ Class to fetch urls via GCP's Python API.
+ """
+ def __init__(self):
+ self.gcp_client = None
+
+ def supports(self, ud, d):
+ """
+ Check to see if a given url can be fetched with GCP.
+ """
+ return ud.type in ['gs']
+
+ def recommends_checksum(self, urldata):
+ return True
+
+ def urldata_init(self, ud, d):
+ if 'downloadfilename' in ud.parm:
+ ud.basename = ud.parm['downloadfilename']
+ else:
+ ud.basename = os.path.basename(ud.path)
+
+ ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
+ ud.basecmd = "gsutil stat"
+
+ def get_gcp_client(self):
+ from google.cloud import storage
+ self.gcp_client = storage.Client(project=None)
+
+ def download(self, ud, d):
+ """
+ Fetch urls using the GCP API.
+ Assumes localpath was called first.
+ """
+ logger.debug2(f"Trying to download gs://{ud.host}{ud.path} to {ud.localpath}")
+ if self.gcp_client is None:
+ self.get_gcp_client()
+
+ bb.fetch2.check_network_access(d, ud.basecmd, f"gs://{ud.host}{ud.path}")
+ runfetchcmd("%s %s" % (ud.basecmd, f"gs://{ud.host}{ud.path}"), d)
+
+ # Path sometimes has leading slash, so strip it
+ path = ud.path.lstrip("/")
+ blob = self.gcp_client.bucket(ud.host).blob(path)
+ blob.download_to_filename(ud.localpath)
+
+ # Additional sanity checks copied from the wget class (although there
+ # are no known issues which mean these are required, treat the GCP API
+ # tool with a little healthy suspicion).
+ if not os.path.exists(ud.localpath):
+ raise FetchError(f"The GCP API returned success for gs://{ud.host}{ud.path} but {ud.localpath} doesn't exist?!")
+
+ if os.path.getsize(ud.localpath) == 0:
+ os.remove(ud.localpath)
+ raise FetchError(f"The downloaded file for gs://{ud.host}{ud.path} resulted in a zero size file?! Deleting and failing since this isn't right.")
+
+ return True
+
+ def checkstatus(self, fetch, ud, d):
+ """
+ Check the status of a URL.
+ """
+ logger.debug2(f"Checking status of gs://{ud.host}{ud.path}")
+ if self.gcp_client is None:
+ self.get_gcp_client()
+
+ bb.fetch2.check_network_access(d, ud.basecmd, f"gs://{ud.host}{ud.path}")
+ runfetchcmd("%s %s" % (ud.basecmd, f"gs://{ud.host}{ud.path}"), d)
+
+ # Path sometimes has leading slash, so strip it
+ path = ud.path.lstrip("/")
+ if self.gcp_client.bucket(ud.host).blob(path).exists() == False:
+ raise FetchError(f"The GCP API reported that gs://{ud.host}{ud.path} does not exist")
+ else:
+ return True
diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py
index 644ba9238..c7ff769fd 100644
--- a/lib/bb/fetch2/git.py
+++ b/lib/bb/fetch2/git.py
@@ -44,13 +44,27 @@ Supported SRC_URI options are:
- nobranch
Don't check the SHA validation for branch. set this option for the recipe
- referring to commit which is valid in tag instead of branch.
+ referring to commit which is valid in any namespace (branch, tag, ...)
+ instead of branch.
The default is "0", set nobranch=1 if needed.
+- subpath
+ Limit the checkout to a specific subpath of the tree.
+ By default, checkout the whole tree, set subpath=<path> if needed
+
+- destsuffix
+ The name of the path in which to place the checkout.
+ By default, the path is git/, set destsuffix=<suffix> if needed
+
- usehead
For local git:// urls to use the current branch HEAD as the revision for use with
AUTOREV. Implies nobranch.
+- lfs
+ Enable the checkout to use LFS for large files. This will download all LFS files
+ in the download step, as the unpack step does not have network access.
+ The default is "1", set lfs=0 to skip.
+
"""
# Copyright (C) 2005 Richard Purdie
@@ -63,14 +77,21 @@ import errno
import fnmatch
import os
import re
+import shlex
+import shutil
import subprocess
import tempfile
import bb
import bb.progress
+from contextlib import contextmanager
from bb.fetch2 import FetchMethod
from bb.fetch2 import runfetchcmd
from bb.fetch2 import logger
+from bb.fetch2 import trusted_network
+
+sha1_re = re.compile(r'^[0-9a-f]{40}$')
+slash_re = re.compile(r"/+")
class GitProgressHandler(bb.progress.LineFilterProgressHandler):
"""Extract progress information from git output"""
@@ -129,6 +150,9 @@ class Git(FetchMethod):
def supports_checksum(self, urldata):
return False
+ def cleanup_upon_failure(self):
+ return False
+
def urldata_init(self, ud, d):
"""
init git specific variable within url data
@@ -140,6 +164,11 @@ class Git(FetchMethod):
ud.proto = 'file'
else:
ud.proto = "git"
+ if ud.host == "github.com" and ud.proto == "git":
+ # github stopped supporting git protocol
+ # https://github.blog/2021-09-01-improving-git-protocol-security-github/#no-more-unauthenticated-git
+ ud.proto = "https"
+ bb.warn("URL: %s uses git protocol which is no longer supported by github. Please change to ;protocol=https in the url." % ud.url)
if not ud.proto in ('git', 'file', 'ssh', 'http', 'https', 'rsync'):
raise bb.fetch2.ParameterError("Invalid protocol type", ud.url)
@@ -163,11 +192,18 @@ class Git(FetchMethod):
ud.nocheckout = 1
ud.unresolvedrev = {}
- branches = ud.parm.get("branch", "master").split(',')
+ branches = ud.parm.get("branch", "").split(',')
+ if branches == [""] and not ud.nobranch:
+ bb.warn("URL: %s does not set any branch parameter. The future default branch used by tools and repositories is uncertain and we will therefore soon require this is set in all git urls." % ud.url)
+ branches = ["master"]
if len(branches) != len(ud.names):
raise bb.fetch2.ParameterError("The number of name and branch parameters is not balanced", ud.url)
- ud.cloneflags = "-s -n"
+ ud.noshared = d.getVar("BB_GIT_NOSHARED") == "1"
+
+ ud.cloneflags = "-n"
+ if not ud.noshared:
+ ud.cloneflags += " -s"
if ud.bareclone:
ud.cloneflags += " --mirror"
@@ -219,9 +255,14 @@ class Git(FetchMethod):
ud.shallow = False
if ud.usehead:
- ud.unresolvedrev['default'] = 'HEAD'
+ # When usehead is set let's associate 'HEAD' with the unresolved
+ # rev of this repository. This will get resolved into a revision
+ # later. If an actual revision happens to have also been provided
+ # then this setting will be overridden.
+ for name in ud.names:
+ ud.unresolvedrev[name] = 'HEAD'
- ud.basecmd = d.getVar("FETCHCMD_git") or "git -c core.fsyncobjectfiles=0"
+ ud.basecmd = d.getVar("FETCHCMD_git") or "git -c gc.autoDetach=false -c core.pager=cat -c safe.bareRepository=all"
write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0"
ud.write_tarballs = write_tarballs != "0" or ud.rebaseable
@@ -230,20 +271,20 @@ class Git(FetchMethod):
ud.setup_revisions(d)
for name in ud.names:
- # Ensure anything that doesn't look like a sha256 checksum/revision is translated into one
- if not ud.revisions[name] or len(ud.revisions[name]) != 40 or (False in [c in "abcdef0123456789" for c in ud.revisions[name]]):
+ # Ensure any revision that doesn't look like a SHA-1 is translated into one
+ if not sha1_re.match(ud.revisions[name] or ''):
if ud.revisions[name]:
ud.unresolvedrev[name] = ud.revisions[name]
ud.revisions[name] = self.latest_revision(ud, d, name)
- gitsrcname = '%s%s' % (ud.host.replace(':', '.'), ud.path.replace('/', '.').replace('*', '.'))
+ gitsrcname = '%s%s' % (ud.host.replace(':', '.'), ud.path.replace('/', '.').replace('*', '.').replace(' ','_').replace('(', '_').replace(')', '_'))
if gitsrcname.startswith('.'):
gitsrcname = gitsrcname[1:]
- # for rebaseable git repo, it is necessary to keep mirror tar ball
- # per revision, so that even the revision disappears from the
+ # For a rebaseable git repo, it is necessary to keep a mirror tar ball
+ # per revision, so that even if the revision disappears from the
# upstream repo in the future, the mirror will remain intact and still
- # contains the revision
+ # contain the revision
if ud.rebaseable:
for name in ud.names:
gitsrcname = gitsrcname + '_' + ud.revisions[name]
@@ -287,7 +328,10 @@ class Git(FetchMethod):
return ud.clonedir
def need_update(self, ud, d):
- return self.clonedir_need_update(ud, d) or self.shallow_tarball_need_update(ud) or self.tarball_need_update(ud)
+ return self.clonedir_need_update(ud, d) \
+ or self.shallow_tarball_need_update(ud) \
+ or self.tarball_need_update(ud) \
+ or self.lfs_need_update(ud, d)
def clonedir_need_update(self, ud, d):
if not os.path.exists(ud.clonedir):
@@ -299,6 +343,15 @@ class Git(FetchMethod):
return True
return False
+ def lfs_need_update(self, ud, d):
+ if self.clonedir_need_update(ud, d):
+ return True
+
+ for name in ud.names:
+ if not self._lfs_objects_downloaded(ud, d, name, ud.clonedir):
+ return True
+ return False
+
def clonedir_need_shallow_revs(self, ud, d):
for rev in ud.shallow_revs:
try:
@@ -318,6 +371,16 @@ class Git(FetchMethod):
# is not possible
if bb.utils.to_boolean(d.getVar("BB_FETCH_PREMIRRORONLY")):
return True
+ # If the url is not in trusted network, that is, BB_NO_NETWORK is set to 0
+ # and BB_ALLOWED_NETWORKS does not contain the host that ud.url uses, then
+ # we need to try premirrors first as using upstream is destined to fail.
+ if not trusted_network(d, ud.url):
+ return True
+ # the following check is to ensure incremental fetch in downloads, this is
+ # because the premirror might be old and does not contain the new rev required,
+ # and this will cause a total removal and new clone. So if we can reach to
+ # network, we prefer upstream over premirror, though the premirror might contain
+ # the new rev.
if os.path.exists(ud.clonedir):
return False
return True
@@ -331,18 +394,55 @@ class Git(FetchMethod):
if ud.shallow and os.path.exists(ud.fullshallow) and self.need_update(ud, d):
ud.localpath = ud.fullshallow
return
- elif os.path.exists(ud.fullmirror) and not os.path.exists(ud.clonedir):
- bb.utils.mkdirhier(ud.clonedir)
- runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=ud.clonedir)
-
+ elif os.path.exists(ud.fullmirror) and self.need_update(ud, d):
+ if not os.path.exists(ud.clonedir):
+ bb.utils.mkdirhier(ud.clonedir)
+ runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=ud.clonedir)
+ else:
+ tmpdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR'))
+ runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=tmpdir)
+ output = runfetchcmd("%s remote" % ud.basecmd, d, quiet=True, workdir=ud.clonedir)
+ if 'mirror' in output:
+ runfetchcmd("%s remote rm mirror" % ud.basecmd, d, workdir=ud.clonedir)
+ runfetchcmd("%s remote add --mirror=fetch mirror %s" % (ud.basecmd, tmpdir), d, workdir=ud.clonedir)
+ fetch_cmd = "LANG=C %s fetch -f --update-head-ok --progress mirror " % (ud.basecmd)
+ runfetchcmd(fetch_cmd, d, workdir=ud.clonedir)
repourl = self._get_repo_url(ud)
+ needs_clone = False
+ if os.path.exists(ud.clonedir):
+ # The directory may exist, but not be the top level of a bare git
+ # repository in which case it needs to be deleted and re-cloned.
+ try:
+ # Since clones can be bare, use --absolute-git-dir instead of --show-toplevel
+ output = runfetchcmd("LANG=C %s rev-parse --absolute-git-dir" % ud.basecmd, d, workdir=ud.clonedir)
+ toplevel = output.rstrip()
+
+ if not bb.utils.path_is_descendant(toplevel, ud.clonedir):
+ logger.warning("Top level directory '%s' is not a descendant of '%s'. Re-cloning", toplevel, ud.clonedir)
+ needs_clone = True
+ except bb.fetch2.FetchError as e:
+ logger.warning("Unable to get top level for %s (not a git directory?): %s", ud.clonedir, e)
+ needs_clone = True
+ except FileNotFoundError as e:
+ logger.warning("%s", e)
+ needs_clone = True
+
+ if needs_clone:
+ shutil.rmtree(ud.clonedir)
+ else:
+ needs_clone = True
+
# If the repo still doesn't exist, fallback to cloning it
- if not os.path.exists(ud.clonedir):
- # We do this since git will use a "-l" option automatically for local urls where possible
+ if needs_clone:
+ # We do this since git will use a "-l" option automatically for local urls where possible,
+ # but it doesn't work when git/objects is a symlink, only works when it is a directory.
if repourl.startswith("file://"):
- repourl = repourl[7:]
- clone_cmd = "LANG=C %s clone --bare --mirror %s %s --progress" % (ud.basecmd, repourl, ud.clonedir)
+ repourl_path = repourl[7:]
+ objects = os.path.join(repourl_path, 'objects')
+ if os.path.isdir(objects) and not os.path.islink(objects):
+ repourl = repourl_path
+ clone_cmd = "LANG=C %s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir)
if ud.proto.lower() != 'file':
bb.fetch2.check_network_access(d, clone_cmd, ud.url)
progresshandler = GitProgressHandler(d)
@@ -354,8 +454,12 @@ class Git(FetchMethod):
if "origin" in output:
runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir)
- runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, repourl), d, workdir=ud.clonedir)
- fetch_cmd = "LANG=C %s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, repourl)
+ runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir)
+
+ if ud.nobranch:
+ fetch_cmd = "LANG=C %s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl))
+ else:
+ fetch_cmd = "LANG=C %s fetch -f --progress %s refs/heads/*:refs/heads/* refs/tags/*:refs/tags/*" % (ud.basecmd, shlex.quote(repourl))
if ud.proto.lower() != 'file':
bb.fetch2.check_network_access(d, fetch_cmd, ud.url)
progresshandler = GitProgressHandler(d)
@@ -378,7 +482,47 @@ class Git(FetchMethod):
if missing_rev:
raise bb.fetch2.FetchError("Unable to find revision %s even from upstream" % missing_rev)
+ if self.lfs_need_update(ud, d):
+ # Unpack temporary working copy, use it to run 'git checkout' to force pre-fetching
+ # of all LFS blobs needed at the srcrev.
+ #
+ # It would be nice to just do this inline here by running 'git-lfs fetch'
+ # on the bare clonedir, but that operation requires a working copy on some
+ # releases of Git LFS.
+ with tempfile.TemporaryDirectory(dir=d.getVar('DL_DIR')) as tmpdir:
+ # Do the checkout. This implicitly involves a Git LFS fetch.
+ Git.unpack(self, ud, tmpdir, d)
+
+ # Scoop up a copy of any stuff that Git LFS downloaded. Merge them into
+ # the bare clonedir.
+ #
+ # As this procedure is invoked repeatedly on incremental fetches as
+ # a recipe's SRCREV is bumped throughout its lifetime, this will
+ # result in a gradual accumulation of LFS blobs in <ud.clonedir>/lfs
+ # corresponding to all the blobs reachable from the different revs
+ # fetched across time.
+ #
+ # Only do this if the unpack resulted in a .git/lfs directory being
+ # created; this only happens if at least one blob needed to be
+ # downloaded.
+ if os.path.exists(os.path.join(ud.destdir, ".git", "lfs")):
+ runfetchcmd("tar -cf - lfs | tar -xf - -C %s" % ud.clonedir, d, workdir="%s/.git" % ud.destdir)
+
def build_mirror_data(self, ud, d):
+
+ # Create as a temp file and move atomically into position to avoid races
+ @contextmanager
+ def create_atomic(filename):
+ fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename))
+ try:
+ yield tfile
+ umask = os.umask(0o666)
+ os.umask(umask)
+ os.chmod(tfile, (0o666 & ~umask))
+ os.rename(tfile, filename)
+ finally:
+ os.close(fd)
+
if ud.shallow and ud.write_shallow_tarballs:
if not os.path.exists(ud.fullshallow):
if os.path.islink(ud.fullshallow):
@@ -389,7 +533,8 @@ class Git(FetchMethod):
self.clone_shallow_local(ud, shallowclone, d)
logger.info("Creating tarball of git repository")
- runfetchcmd("tar -czf %s ." % ud.fullshallow, d, workdir=shallowclone)
+ with create_atomic(ud.fullshallow) as tfile:
+ runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone)
runfetchcmd("touch %s.done" % ud.fullshallow, d)
finally:
bb.utils.remove(tempdir, recurse=True)
@@ -398,7 +543,11 @@ class Git(FetchMethod):
os.unlink(ud.fullmirror)
logger.info("Creating tarball of git repository")
- runfetchcmd("tar -czf %s ." % ud.fullmirror, d, workdir=ud.clonedir)
+ with create_atomic(ud.fullmirror) as tfile:
+ mtime = runfetchcmd("{} log --all -1 --format=%cD".format(ud.basecmd), d,
+ quiet=True, workdir=ud.clonedir)
+ runfetchcmd("tar -czf %s --owner oe:0 --group oe:0 --mtime \"%s\" ."
+ % (tfile, mtime), d, workdir=ud.clonedir)
runfetchcmd("touch %s.done" % ud.fullmirror, d)
def clone_shallow_local(self, ud, dest, d):
@@ -460,20 +609,33 @@ class Git(FetchMethod):
def unpack(self, ud, destdir, d):
""" unpack the downloaded src to destdir"""
- subdir = ud.parm.get("subpath", "")
- if subdir != "":
- readpathspec = ":%s" % subdir
- def_destsuffix = "%s/" % os.path.basename(subdir.rstrip('/'))
- else:
- readpathspec = ""
- def_destsuffix = "git/"
+ subdir = ud.parm.get("subdir")
+ subpath = ud.parm.get("subpath")
+ readpathspec = ""
+ def_destsuffix = "git/"
+
+ if subpath:
+ readpathspec = ":%s" % subpath
+ def_destsuffix = "%s/" % os.path.basename(subpath.rstrip('/'))
+
+ if subdir:
+ # If 'subdir' param exists, create a dir and use it as destination for unpack cmd
+ if os.path.isabs(subdir):
+ if not os.path.realpath(subdir).startswith(os.path.realpath(destdir)):
+ raise bb.fetch2.UnpackError("subdir argument isn't a subdirectory of unpack root %s" % destdir, ud.url)
+ destdir = subdir
+ else:
+ destdir = os.path.join(destdir, subdir)
+ def_destsuffix = ""
destsuffix = ud.parm.get("destsuffix", def_destsuffix)
destdir = ud.destdir = os.path.join(destdir, destsuffix)
if os.path.exists(destdir):
bb.utils.prunedir(destdir)
+ if not ud.bareclone:
+ ud.unpack_tracer.unpack("git", destdir)
- need_lfs = ud.parm.get("lfs", "1") == "1"
+ need_lfs = self._need_lfs(ud)
if not need_lfs:
ud.basecmd = "GIT_LFS_SKIP_SMUDGE=1 " + ud.basecmd
@@ -481,13 +643,12 @@ class Git(FetchMethod):
source_found = False
source_error = []
- if not source_found:
- clonedir_is_up_to_date = not self.clonedir_need_update(ud, d)
- if clonedir_is_up_to_date:
- runfetchcmd("%s clone %s %s/ %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, destdir), d)
- source_found = True
- else:
- source_error.append("clone directory not available or not up to date: " + ud.clonedir)
+ clonedir_is_up_to_date = not self.clonedir_need_update(ud, d)
+ if clonedir_is_up_to_date:
+ runfetchcmd("%s clone %s %s/ %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, destdir), d)
+ source_found = True
+ else:
+ source_error.append("clone directory not available or not up to date: " + ud.clonedir)
if not source_found:
if ud.shallow:
@@ -504,16 +665,18 @@ class Git(FetchMethod):
raise bb.fetch2.UnpackError("No up to date source found: " + "; ".join(source_error), ud.url)
repourl = self._get_repo_url(ud)
- runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, repourl), d, workdir=destdir)
+ runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=destdir)
if self._contains_lfs(ud, d, destdir):
if need_lfs and not self._find_git_lfs(d):
raise bb.fetch2.FetchError("Repository %s has LFS content, install git-lfs on host to download (or set lfs=0 to ignore it)" % (repourl))
elif not need_lfs:
bb.note("Repository %s has LFS content but it is not being fetched" % (repourl))
+ else:
+ runfetchcmd("%s lfs install --local" % ud.basecmd, d, workdir=destdir)
if not ud.nocheckout:
- if subdir != "":
+ if subpath:
runfetchcmd("%s read-tree %s%s" % (ud.basecmd, ud.revisions[ud.names[0]], readpathspec), d,
workdir=destdir)
runfetchcmd("%s checkout-index -q -f -a" % ud.basecmd, d, workdir=destdir)
@@ -562,18 +725,54 @@ class Git(FetchMethod):
raise bb.fetch2.FetchError("The command '%s' gave output with more then 1 line unexpectedly, output: '%s'" % (cmd, output))
return output.split()[0] != "0"
+ def _lfs_objects_downloaded(self, ud, d, name, wd):
+ """
+ Verifies whether the LFS objects for requested revisions have already been downloaded
+ """
+ # Bail out early if this repository doesn't use LFS
+ if not self._need_lfs(ud) or not self._contains_lfs(ud, d, wd):
+ return True
+
+ # The Git LFS specification specifies ([1]) the LFS folder layout so it should be safe to check for file
+ # existence.
+ # [1] https://github.com/git-lfs/git-lfs/blob/main/docs/spec.md#intercepting-git
+ cmd = "%s lfs ls-files -l %s" \
+ % (ud.basecmd, ud.revisions[name])
+ output = runfetchcmd(cmd, d, quiet=True, workdir=wd).rstrip()
+ # Do not do any further matching if no objects are managed by LFS
+ if not output:
+ return True
+
+ # Match all lines beginning with the hexadecimal OID
+ oid_regex = re.compile("^(([a-fA-F0-9]{2})([a-fA-F0-9]{2})[A-Fa-f0-9]+)")
+ for line in output.split("\n"):
+ oid = re.search(oid_regex, line)
+ if not oid:
+ bb.warn("git lfs ls-files output '%s' did not match expected format." % line)
+ if not os.path.exists(os.path.join(wd, "lfs", "objects", oid.group(2), oid.group(3), oid.group(1))):
+ return False
+
+ return True
+
+ def _need_lfs(self, ud):
+ return ud.parm.get("lfs", "1") == "1"
+
def _contains_lfs(self, ud, d, wd):
"""
Check if the repository has 'lfs' (large file) content
"""
- if not ud.nobranch:
- branchname = ud.branches[ud.names[0]]
+ if ud.nobranch:
+ # If no branch is specified, use the current git commit
+ refname = self._build_revision(ud, d, ud.names[0])
+ elif wd == ud.clonedir:
+ # The bare clonedir doesn't use the remote names; it has the branch immediately.
+ refname = ud.branches[ud.names[0]]
else:
- branchname = "master"
+ refname = "origin/%s" % ud.branches[ud.names[0]]
- cmd = "%s grep lfs origin/%s:.gitattributes | wc -l" % (
- ud.basecmd, ud.branches[ud.names[0]])
+ cmd = "%s grep lfs %s:.gitattributes | wc -l" % (
+ ud.basecmd, refname)
try:
output = runfetchcmd(cmd, d, quiet=True, workdir=wd)
@@ -594,6 +793,11 @@ class Git(FetchMethod):
"""
Return the repository URL
"""
+ # Note that we do not support passwords directly in the git urls. There are several
+ # reasons. SRC_URI can be written out to things like buildhistory and people don't
+ # want to leak passwords like that. Its also all too easy to share metadata without
+ # removing the password. ssh keys, ~/.netrc and ~/.ssh/config files can be used as
+ # alternatives so we will not take patches adding password support here.
if ud.user:
username = ud.user + '@'
else:
@@ -605,7 +809,6 @@ class Git(FetchMethod):
Return a unique key for the url
"""
# Collapse adjacent slashes
- slash_re = re.compile(r"/+")
return "git:" + ud.host + slash_re.sub(".", ud.path) + ud.unresolvedrev[name]
def _lsremote(self, ud, d, search):
@@ -624,7 +827,7 @@ class Git(FetchMethod):
try:
repourl = self._get_repo_url(ud)
cmd = "%s ls-remote %s %s" % \
- (ud.basecmd, repourl, search)
+ (ud.basecmd, shlex.quote(repourl), search)
if ud.proto.lower() != 'file':
bb.fetch2.check_network_access(d, cmd, repourl)
output = runfetchcmd(cmd, d, True)
@@ -638,6 +841,12 @@ class Git(FetchMethod):
"""
Compute the HEAD revision for the url
"""
+ if not d.getVar("__BBSRCREV_SEEN"):
+ raise bb.fetch2.FetchError("Recipe uses a floating tag/branch '%s' for repo '%s' without a fixed SRCREV yet doesn't call bb.fetch2.get_srcrev() (use SRCPV in PV for OE)." % (ud.unresolvedrev[name], ud.host+ud.path))
+
+ # Ensure we mark as not cached
+ bb.fetch2.mark_recipe_nocache(d)
+
output = self._lsremote(ud, d, "")
# Tags of the form ^{} may not work, need to fallback to other form
if ud.unresolvedrev[name][:5] == "refs/" or ud.usehead:
@@ -662,38 +871,42 @@ class Git(FetchMethod):
"""
pupver = ('', '')
- tagregex = re.compile(d.getVar('UPSTREAM_CHECK_GITTAGREGEX') or r"(?P<pver>([0-9][\.|_]?)+)")
try:
output = self._lsremote(ud, d, "refs/tags/*")
except (bb.fetch2.FetchError, bb.fetch2.NetworkAccess) as e:
bb.note("Could not list remote: %s" % str(e))
return pupver
+ rev_tag_re = re.compile(r"([0-9a-f]{40})\s+refs/tags/(.*)")
+ pver_re = re.compile(d.getVar('UPSTREAM_CHECK_GITTAGREGEX') or r"(?P<pver>([0-9][\.|_]?)+)")
+ nonrel_re = re.compile(r"(alpha|beta|rc|final)+")
+
verstring = ""
- revision = ""
for line in output.split("\n"):
if not line:
break
- tag_head = line.split("/")[-1]
+ m = rev_tag_re.match(line)
+ if not m:
+ continue
+
+ (revision, tag) = m.groups()
+
# Ignore non-released branches
- m = re.search(r"(alpha|beta|rc|final)+", tag_head)
- if m:
+ if nonrel_re.search(tag):
continue
# search for version in the line
- tag = tagregex.search(tag_head)
- if tag is None:
+ m = pver_re.search(tag)
+ if not m:
continue
- tag = tag.group('pver')
- tag = tag.replace("_", ".")
+ pver = m.group('pver').replace("_", ".")
- if verstring and bb.utils.vercmp(("0", tag, ""), ("0", verstring, "")) < 0:
+ if verstring and bb.utils.vercmp(("0", pver, ""), ("0", verstring, "")) < 0:
continue
- verstring = tag
- revision = line.split()[0]
+ verstring = pver
pupver = (verstring, revision)
return pupver
diff --git a/lib/bb/fetch2/gitsm.py b/lib/bb/fetch2/gitsm.py
index d6e5c5c05..f7f3af721 100644
--- a/lib/bb/fetch2/gitsm.py
+++ b/lib/bb/fetch2/gitsm.py
@@ -78,7 +78,7 @@ class GitSM(Git):
module_hash = ""
if not module_hash:
- logger.debug(1, "submodule %s is defined, but is not initialized in the repository. Skipping", m)
+ logger.debug("submodule %s is defined, but is not initialized in the repository. Skipping", m)
continue
submodules.append(m)
@@ -88,9 +88,9 @@ class GitSM(Git):
subrevision[m] = module_hash.split()[2]
# Convert relative to absolute uri based on parent uri
- if uris[m].startswith('..'):
+ if uris[m].startswith('..') or uris[m].startswith('./'):
newud = copy.copy(ud)
- newud.path = os.path.realpath(os.path.join(newud.path, uris[m]))
+ newud.path = os.path.normpath(os.path.join(newud.path, uris[m]))
uris[m] = Git._get_repo_url(self, newud)
for module in submodules:
@@ -115,10 +115,21 @@ class GitSM(Git):
# This has to be a file reference
proto = "file"
url = "gitsm://" + uris[module]
+ if url.endswith("{}{}".format(ud.host, ud.path)):
+ raise bb.fetch2.FetchError("Submodule refers to the parent repository. This will cause deadlock situation in current version of Bitbake." \
+ "Consider using git fetcher instead.")
url += ';protocol=%s' % proto
url += ";name=%s" % module
url += ";subpath=%s" % module
+ url += ";nobranch=1"
+ url += ";lfs=%s" % self._need_lfs(ud)
+ # Note that adding "user=" here to give credentials to the
+ # submodule is not supported. Since using SRC_URI to give git://
+ # URL a password is not supported, one have to use one of the
+ # recommended way (eg. ~/.netrc or SSH config) which does specify
+ # the user (See comment in git.py).
+ # So, we will not take patches adding "user=" support here.
ld = d.createCopy()
# Not necessary to set SRC_URI, since we're passing the URI to
@@ -140,16 +151,6 @@ class GitSM(Git):
if Git.need_update(self, ud, d):
return True
- try:
- # Check for the nugget dropped by the download operation
- known_srcrevs = runfetchcmd("%s config --get-all bitbake.srcrev" % \
- (ud.basecmd), d, workdir=ud.clonedir)
-
- if ud.revisions[ud.names[0]] in known_srcrevs.split():
- return False
- except bb.fetch2.FetchError:
- pass
-
need_update_list = []
def need_update_submodule(ud, url, module, modpath, workdir, d):
url += ";bareclone=1;nobranch=1"
@@ -172,14 +173,9 @@ class GitSM(Git):
shutil.rmtree(tmpdir)
else:
self.process_submodules(ud, ud.clonedir, need_update_submodule, d)
- if len(need_update_list) == 0:
- # We already have the required commits of all submodules. Drop
- # a nugget so we don't need to check again.
- runfetchcmd("%s config --add bitbake.srcrev %s" % \
- (ud.basecmd, ud.revisions[ud.names[0]]), d, workdir=ud.clonedir)
-
- if len(need_update_list) > 0:
- logger.debug(1, 'gitsm: Submodules requiring update: %s' % (' '.join(need_update_list)))
+
+ if need_update_list:
+ logger.debug('gitsm: Submodules requiring update: %s' % (' '.join(need_update_list)))
return True
return False
@@ -209,9 +205,6 @@ class GitSM(Git):
shutil.rmtree(tmpdir)
else:
self.process_submodules(ud, ud.clonedir, download_submodule, d)
- # Drop a nugget for the srcrev we've fetched (used by need_update)
- runfetchcmd("%s config --add bitbake.srcrev %s" % \
- (ud.basecmd, ud.revisions[ud.names[0]]), d, workdir=ud.clonedir)
def unpack(self, ud, destdir, d):
def unpack_submodules(ud, url, module, modpath, workdir, d):
@@ -225,6 +218,10 @@ class GitSM(Git):
try:
newfetch = Fetch([url], d, cache=False)
+ # modpath is needed by unpack tracer to calculate submodule
+ # checkout dir
+ new_ud = newfetch.ud[url]
+ new_ud.modpath = modpath
newfetch.unpack(root=os.path.dirname(os.path.join(repo_conf, 'modules', module)))
except Exception as e:
logger.error('gitsm: submodule unpack failed: %s %s' % (type(e).__name__, str(e)))
@@ -250,10 +247,12 @@ class GitSM(Git):
ret = self.process_submodules(ud, ud.destdir, unpack_submodules, d)
if not ud.bareclone and ret:
- # All submodules should already be downloaded and configured in the tree. This simply sets
- # up the configuration and checks out the files. The main project config should remain
- # unmodified, and no download from the internet should occur.
- runfetchcmd("%s submodule update --recursive --no-fetch" % (ud.basecmd), d, quiet=True, workdir=ud.destdir)
+ # All submodules should already be downloaded and configured in the tree. This simply
+ # sets up the configuration and checks out the files. The main project config should
+ # remain unmodified, and no download from the internet should occur. As such, lfs smudge
+ # should also be skipped as these files were already smudged in the fetch stage if lfs
+ # was enabled.
+ runfetchcmd("GIT_LFS_SKIP_SMUDGE=1 %s submodule update --recursive --no-fetch" % (ud.basecmd), d, quiet=True, workdir=ud.destdir)
def implicit_urldata(self, ud, d):
import shutil, subprocess, tempfile
diff --git a/lib/bb/fetch2/hg.py b/lib/bb/fetch2/hg.py
index 8f503701e..cbff8c490 100644
--- a/lib/bb/fetch2/hg.py
+++ b/lib/bb/fetch2/hg.py
@@ -150,7 +150,7 @@ class Hg(FetchMethod):
def download(self, ud, d):
"""Fetch url"""
- logger.debug(2, "Fetch: checking for module directory '" + ud.moddir + "'")
+ logger.debug2("Fetch: checking for module directory '" + ud.moddir + "'")
# If the checkout doesn't exist and the mirror tarball does, extract it
if not os.path.exists(ud.pkgdir) and os.path.exists(ud.fullmirror):
@@ -160,7 +160,7 @@ class Hg(FetchMethod):
if os.access(os.path.join(ud.moddir, '.hg'), os.R_OK):
# Found the source, check whether need pull
updatecmd = self._buildhgcommand(ud, d, "update")
- logger.debug(1, "Running %s", updatecmd)
+ logger.debug("Running %s", updatecmd)
try:
runfetchcmd(updatecmd, d, workdir=ud.moddir)
except bb.fetch2.FetchError:
@@ -168,7 +168,7 @@ class Hg(FetchMethod):
pullcmd = self._buildhgcommand(ud, d, "pull")
logger.info("Pulling " + ud.url)
# update sources there
- logger.debug(1, "Running %s", pullcmd)
+ logger.debug("Running %s", pullcmd)
bb.fetch2.check_network_access(d, pullcmd, ud.url)
runfetchcmd(pullcmd, d, workdir=ud.moddir)
try:
@@ -183,14 +183,14 @@ class Hg(FetchMethod):
logger.info("Fetch " + ud.url)
# check out sources there
bb.utils.mkdirhier(ud.pkgdir)
- logger.debug(1, "Running %s", fetchcmd)
+ logger.debug("Running %s", fetchcmd)
bb.fetch2.check_network_access(d, fetchcmd, ud.url)
runfetchcmd(fetchcmd, d, workdir=ud.pkgdir)
# Even when we clone (fetch), we still need to update as hg's clone
# won't checkout the specified revision if its on a branch
updatecmd = self._buildhgcommand(ud, d, "update")
- logger.debug(1, "Running %s", updatecmd)
+ logger.debug("Running %s", updatecmd)
runfetchcmd(updatecmd, d, workdir=ud.moddir)
def clean(self, ud, d):
@@ -242,14 +242,15 @@ class Hg(FetchMethod):
revflag = "-r %s" % ud.revision
subdir = ud.parm.get("destsuffix", ud.module)
codir = "%s/%s" % (destdir, subdir)
+ ud.unpack_tracer.unpack("hg", codir)
scmdata = ud.parm.get("scmdata", "")
if scmdata != "nokeep":
proto = ud.parm.get('protocol', 'http')
if not os.access(os.path.join(codir, '.hg'), os.R_OK):
- logger.debug(2, "Unpack: creating new hg repository in '" + codir + "'")
+ logger.debug2("Unpack: creating new hg repository in '" + codir + "'")
runfetchcmd("%s init %s" % (ud.basecmd, codir), d)
- logger.debug(2, "Unpack: updating source in '" + codir + "'")
+ logger.debug2("Unpack: updating source in '" + codir + "'")
if ud.user and ud.pswd:
runfetchcmd("%s --config auth.default.prefix=* --config auth.default.username=%s --config auth.default.password=%s --config \"auth.default.schemes=%s\" pull %s" % (ud.basecmd, ud.user, ud.pswd, proto, ud.moddir), d, workdir=codir)
else:
@@ -259,5 +260,5 @@ class Hg(FetchMethod):
else:
runfetchcmd("%s up -C %s" % (ud.basecmd, revflag), d, workdir=codir)
else:
- logger.debug(2, "Unpack: extracting source to '" + codir + "'")
+ logger.debug2("Unpack: extracting source to '" + codir + "'")
runfetchcmd("%s archive -t files %s %s" % (ud.basecmd, revflag, codir), d, workdir=ud.moddir)
diff --git a/lib/bb/fetch2/local.py b/lib/bb/fetch2/local.py
index 01d9ff9f8..7d7668110 100644
--- a/lib/bb/fetch2/local.py
+++ b/lib/bb/fetch2/local.py
@@ -17,7 +17,7 @@ import os
import urllib.request, urllib.parse, urllib.error
import bb
import bb.utils
-from bb.fetch2 import FetchMethod, FetchError
+from bb.fetch2 import FetchMethod, FetchError, ParameterError
from bb.fetch2 import logger
class Local(FetchMethod):
@@ -33,15 +33,17 @@ class Local(FetchMethod):
ud.basename = os.path.basename(ud.decodedurl)
ud.basepath = ud.decodedurl
ud.needdonestamp = False
+ if "*" in ud.decodedurl:
+ raise bb.fetch2.ParameterError("file:// urls using globbing are no longer supported. Please place the files in a directory and reference that instead.", ud.url)
return
def localpath(self, urldata, d):
"""
Return the local filename of a given url assuming a successful fetch.
"""
- return self.localpaths(urldata, d)[-1]
+ return self.localfile_searchpaths(urldata, d)[-1]
- def localpaths(self, urldata, d):
+ def localfile_searchpaths(self, urldata, d):
"""
Return the local filename of a given url assuming a successful fetch.
"""
@@ -49,29 +51,17 @@ class Local(FetchMethod):
path = urldata.decodedurl
newpath = path
if path[0] == "/":
+ logger.debug2("Using absolute %s" % (path))
return [path]
filespath = d.getVar('FILESPATH')
if filespath:
- logger.debug(2, "Searching for %s in paths:\n %s" % (path, "\n ".join(filespath.split(":"))))
+ logger.debug2("Searching for %s in paths:\n %s" % (path, "\n ".join(filespath.split(":"))))
newpath, hist = bb.utils.which(filespath, path, history=True)
+ logger.debug2("Using %s for %s" % (newpath, path))
searched.extend(hist)
- if (not newpath or not os.path.exists(newpath)) and path.find("*") != -1:
- # For expressions using '*', best we can do is take the first directory in FILESPATH that exists
- newpath, hist = bb.utils.which(filespath, ".", history=True)
- searched.extend(hist)
- logger.debug(2, "Searching for %s in path: %s" % (path, newpath))
- return searched
- if not os.path.exists(newpath):
- dldirfile = os.path.join(d.getVar("DL_DIR"), path)
- logger.debug(2, "Defaulting to %s for %s" % (dldirfile, path))
- bb.utils.mkdirhier(os.path.dirname(dldirfile))
- searched.append(dldirfile)
- return searched
return searched
def need_update(self, ud, d):
- if ud.url.find("*") != -1:
- return False
if os.path.exists(ud.localpath):
return False
return True
@@ -84,9 +74,7 @@ class Local(FetchMethod):
filespath = d.getVar('FILESPATH')
if filespath:
locations = filespath.split(":")
- locations.append(d.getVar("DL_DIR"))
-
- msg = "Unable to find file " + urldata.url + " anywhere. The paths that were searched were:\n " + "\n ".join(locations)
+ msg = "Unable to find file " + urldata.url + " anywhere to download to " + urldata.localpath + ". The paths that were searched were:\n " + "\n ".join(locations)
raise FetchError(msg)
return True
@@ -95,9 +83,6 @@ class Local(FetchMethod):
"""
Check the status of the url
"""
- if urldata.localpath.find("*") != -1:
- logger.info("URL %s looks like a glob and was therefore not checked.", urldata.url)
- return True
if os.path.exists(urldata.localpath):
return True
return False
diff --git a/lib/bb/fetch2/npm.py b/lib/bb/fetch2/npm.py
index 47898509f..15f3f19bc 100644
--- a/lib/bb/fetch2/npm.py
+++ b/lib/bb/fetch2/npm.py
@@ -44,17 +44,24 @@ def npm_package(package):
"""Convert the npm package name to remove unsupported character"""
# Scoped package names (with the @) use the same naming convention
# as the 'npm pack' command.
- if package.startswith("@"):
- return re.sub("/", "-", package[1:])
- return package
+ name = re.sub("/", "-", package)
+ name = name.lower()
+ name = re.sub(r"[^\-a-z0-9]", "", name)
+ name = name.strip("-")
+ return name
+
def npm_filename(package, version):
"""Get the filename of a npm package"""
return npm_package(package) + "-" + version + ".tgz"
-def npm_localfile(package, version):
+def npm_localfile(package, version=None):
"""Get the local filename of a npm package"""
- return os.path.join("npm2", npm_filename(package, version))
+ if version is not None:
+ filename = npm_filename(package, version)
+ else:
+ filename = package
+ return os.path.join("npm2", filename)
def npm_integrity(integrity):
"""
@@ -69,41 +76,52 @@ def npm_unpack(tarball, destdir, d):
bb.utils.mkdirhier(destdir)
cmd = "tar --extract --gzip --file=%s" % shlex.quote(tarball)
cmd += " --no-same-owner"
+ cmd += " --delay-directory-restore"
cmd += " --strip-components=1"
runfetchcmd(cmd, d, workdir=destdir)
+ runfetchcmd("chmod -R +X '%s'" % (destdir), d, quiet=True, workdir=destdir)
class NpmEnvironment(object):
"""
Using a npm config file seems more reliable than using cli arguments.
This class allows to create a controlled environment for npm commands.
"""
- def __init__(self, d, configs=None):
+ def __init__(self, d, configs=[], npmrc=None):
self.d = d
- self.configs = configs
+
+ self.user_config = tempfile.NamedTemporaryFile(mode="w", buffering=1)
+ for key, value in configs:
+ self.user_config.write("%s=%s\n" % (key, value))
+
+ if npmrc:
+ self.global_config_name = npmrc
+ else:
+ self.global_config_name = "/dev/null"
+
+ def __del__(self):
+ if self.user_config:
+ self.user_config.close()
def run(self, cmd, args=None, configs=None, workdir=None):
"""Run npm command in a controlled environment"""
with tempfile.TemporaryDirectory() as tmpdir:
d = bb.data.createCopy(self.d)
+ d.setVar("PATH", d.getVar("PATH")) # PATH might contain $HOME - evaluate it before patching
d.setVar("HOME", tmpdir)
- cfgfile = os.path.join(tmpdir, "npmrc")
-
if not workdir:
workdir = tmpdir
def _run(cmd):
- cmd = "NPM_CONFIG_USERCONFIG=%s " % cfgfile + cmd
- cmd = "NPM_CONFIG_GLOBALCONFIG=%s " % cfgfile + cmd
+ cmd = "NPM_CONFIG_USERCONFIG=%s " % (self.user_config.name) + cmd
+ cmd = "NPM_CONFIG_GLOBALCONFIG=%s " % (self.global_config_name) + cmd
return runfetchcmd(cmd, d, workdir=workdir)
- if self.configs:
- for key, value in self.configs:
- _run("npm config set %s %s" % (key, shlex.quote(value)))
-
if configs:
+ bb.warn("Use of configs argument of NpmEnvironment.run() function"
+ " is deprecated. Please use args argument instead.")
for key, value in configs:
- _run("npm config set %s %s" % (key, shlex.quote(value)))
+ cmd += " --%s=%s" % (key, shlex.quote(value))
if args:
for key, value in args:
@@ -142,12 +160,12 @@ class Npm(FetchMethod):
raise ParameterError("Invalid 'version' parameter", ud.url)
# Extract the 'registry' part of the url
- ud.registry = re.sub(r"^npm://", "http://", ud.url.split(";")[0])
+ ud.registry = re.sub(r"^npm://", "https://", ud.url.split(";")[0])
# Using the 'downloadfilename' parameter as local filename
# or the npm package name.
if "downloadfilename" in ud.parm:
- ud.localfile = d.expand(ud.parm["downloadfilename"])
+ ud.localfile = npm_localfile(d.expand(ud.parm["downloadfilename"]))
else:
ud.localfile = npm_localfile(ud.package, ud.version)
@@ -165,14 +183,14 @@ class Npm(FetchMethod):
def _resolve_proxy_url(self, ud, d):
def _npm_view():
- configs = []
- configs.append(("json", "true"))
- configs.append(("registry", ud.registry))
+ args = []
+ args.append(("json", "true"))
+ args.append(("registry", ud.registry))
pkgver = shlex.quote(ud.package + "@" + ud.version)
cmd = ud.basecmd + " view %s" % pkgver
env = NpmEnvironment(d)
check_network_access(d, cmd, ud.registry)
- view_string = env.run(cmd, configs=configs)
+ view_string = env.run(cmd, args=args)
if not view_string:
raise FetchError("Unavailable package %s" % pkgver, ud.url)
@@ -280,6 +298,7 @@ class Npm(FetchMethod):
destsuffix = ud.parm.get("destsuffix", "npm")
destdir = os.path.join(rootdir, destsuffix)
npm_unpack(ud.localpath, destdir, d)
+ ud.unpack_tracer.unpack("npm", destdir)
def clean(self, ud, d):
"""Clean any existing full or partial download"""
diff --git a/lib/bb/fetch2/npmsw.py b/lib/bb/fetch2/npmsw.py
index 0c3511d8a..b55e885d7 100644
--- a/lib/bb/fetch2/npmsw.py
+++ b/lib/bb/fetch2/npmsw.py
@@ -24,11 +24,14 @@ import bb
from bb.fetch2 import Fetch
from bb.fetch2 import FetchMethod
from bb.fetch2 import ParameterError
+from bb.fetch2 import runfetchcmd
from bb.fetch2 import URI
from bb.fetch2.npm import npm_integrity
from bb.fetch2.npm import npm_localfile
from bb.fetch2.npm import npm_unpack
from bb.utils import is_semver
+from bb.utils import lockfile
+from bb.utils import unlockfile
def foreach_dependencies(shrinkwrap, callback=None, dev=False):
"""
@@ -38,8 +41,9 @@ def foreach_dependencies(shrinkwrap, callback=None, dev=False):
with:
name = the package name (string)
params = the package parameters (dictionary)
- deptree = the package dependency tree (array of strings)
+ destdir = the destination of the package (string)
"""
+ # For handling old style dependencies entries in shinkwrap files
def _walk_deps(deps, deptree):
for name in deps:
subtree = [*deptree, name]
@@ -49,9 +53,22 @@ def foreach_dependencies(shrinkwrap, callback=None, dev=False):
continue
elif deps[name].get("bundled", False):
continue
- callback(name, deps[name], subtree)
-
- _walk_deps(shrinkwrap.get("dependencies", {}), [])
+ destsubdirs = [os.path.join("node_modules", dep) for dep in subtree]
+ destsuffix = os.path.join(*destsubdirs)
+ callback(name, deps[name], destsuffix)
+
+ # packages entry means new style shrinkwrap file, else use dependencies
+ packages = shrinkwrap.get("packages", None)
+ if packages is not None:
+ for package in packages:
+ if package != "":
+ name = package.split('node_modules/')[-1]
+ package_infos = packages.get(package, {})
+ if dev == False and package_infos.get("dev", False):
+ continue
+ callback(name, package_infos, package)
+ else:
+ _walk_deps(shrinkwrap.get("dependencies", {}), [])
class NpmShrinkWrap(FetchMethod):
"""Class to fetch all package from a shrinkwrap file"""
@@ -72,19 +89,22 @@ class NpmShrinkWrap(FetchMethod):
# Resolve the dependencies
ud.deps = []
- def _resolve_dependency(name, params, deptree):
+ def _resolve_dependency(name, params, destsuffix):
url = None
localpath = None
extrapaths = []
- destsubdirs = [os.path.join("node_modules", dep) for dep in deptree]
- destsuffix = os.path.join(*destsubdirs)
+ unpack = True
integrity = params.get("integrity", None)
resolved = params.get("resolved", None)
version = params.get("version", None)
# Handle registry sources
- if is_semver(version) and resolved and integrity:
+ if is_semver(version) and integrity:
+ # Handle duplicate dependencies without url
+ if not resolved:
+ return
+
localfile = npm_localfile(name, version)
uri = URI(resolved)
@@ -109,7 +129,7 @@ class NpmShrinkWrap(FetchMethod):
# Handle http tarball sources
elif version.startswith("http") and integrity:
- localfile = os.path.join("npm2", os.path.basename(version))
+ localfile = npm_localfile(os.path.basename(version))
uri = URI(version)
uri.params["downloadfilename"] = localfile
@@ -121,8 +141,28 @@ class NpmShrinkWrap(FetchMethod):
localpath = os.path.join(d.getVar("DL_DIR"), localfile)
+ # Handle local tarball and link sources
+ elif version.startswith("file"):
+ localpath = version[5:]
+ if not version.endswith(".tgz"):
+ unpack = False
+
# Handle git sources
- elif version.startswith("git"):
+ elif version.startswith(("git", "bitbucket","gist")) or (
+ not version.endswith((".tgz", ".tar", ".tar.gz"))
+ and not version.startswith((".", "@", "/"))
+ and "/" in version
+ ):
+ if version.startswith("github:"):
+ version = "git+https://github.com/" + version[len("github:"):]
+ elif version.startswith("gist:"):
+ version = "git+https://gist.github.com/" + version[len("gist:"):]
+ elif version.startswith("bitbucket:"):
+ version = "git+https://bitbucket.org/" + version[len("bitbucket:"):]
+ elif version.startswith("gitlab:"):
+ version = "git+https://gitlab.com/" + version[len("gitlab:"):]
+ elif not version.startswith(("git+","git:")):
+ version = "git+https://github.com/" + version
regex = re.compile(r"""
^
git\+
@@ -148,15 +188,17 @@ class NpmShrinkWrap(FetchMethod):
url = str(uri)
- # local tarball sources and local link sources are unsupported
else:
raise ParameterError("Unsupported dependency: %s" % name, ud.url)
+ # name is needed by unpack tracer for module mapping
ud.deps.append({
+ "name": name,
"url": url,
"localpath": localpath,
"extrapaths": extrapaths,
"destsuffix": destsuffix,
+ "unpack": unpack,
})
try:
@@ -177,17 +219,23 @@ class NpmShrinkWrap(FetchMethod):
# This fetcher resolves multiple URIs from a shrinkwrap file and then
# forwards it to a proxy fetcher. The management of the donestamp file,
# the lockfile and the checksums are forwarded to the proxy fetcher.
- ud.proxy = Fetch([dep["url"] for dep in ud.deps], data)
+ shrinkwrap_urls = [dep["url"] for dep in ud.deps if dep["url"]]
+ if shrinkwrap_urls:
+ ud.proxy = Fetch(shrinkwrap_urls, data)
ud.needdonestamp = False
@staticmethod
def _foreach_proxy_method(ud, handle):
returns = []
- for proxy_url in ud.proxy.urls:
- proxy_ud = ud.proxy.ud[proxy_url]
- proxy_d = ud.proxy.d
- proxy_ud.setup_localpath(proxy_d)
- returns.append(handle(proxy_ud.method, proxy_ud, proxy_d))
+ #Check if there are dependencies before try to fetch them
+ if len(ud.deps) > 0:
+ for proxy_url in ud.proxy.urls:
+ proxy_ud = ud.proxy.ud[proxy_url]
+ proxy_d = ud.proxy.d
+ proxy_ud.setup_localpath(proxy_d)
+ lf = lockfile(proxy_ud.lockfile)
+ returns.append(handle(proxy_ud.method, proxy_ud, proxy_d))
+ unlockfile(lf)
return returns
def verify_donestamp(self, ud, d):
@@ -220,10 +268,11 @@ class NpmShrinkWrap(FetchMethod):
def unpack(self, ud, rootdir, d):
"""Unpack the downloaded dependencies"""
- destdir = d.getVar("S")
+ destdir = rootdir
destsuffix = ud.parm.get("destsuffix")
if destsuffix:
destdir = os.path.join(rootdir, destsuffix)
+ ud.unpack_tracer.unpack("npm-shrinkwrap", destdir)
bb.utils.mkdirhier(destdir)
bb.utils.copyfile(ud.shrinkwrap_file,
@@ -237,7 +286,16 @@ class NpmShrinkWrap(FetchMethod):
for dep in manual:
depdestdir = os.path.join(destdir, dep["destsuffix"])
- npm_unpack(dep["localpath"], depdestdir, d)
+ if dep["url"]:
+ npm_unpack(dep["localpath"], depdestdir, d)
+ else:
+ depsrcdir= os.path.join(destdir, dep["localpath"])
+ if dep["unpack"]:
+ npm_unpack(depsrcdir, depdestdir, d)
+ else:
+ bb.utils.mkdirhier(depdestdir)
+ cmd = 'cp -fpPRH "%s/." .' % (depsrcdir)
+ runfetchcmd(cmd, d, workdir=depdestdir)
def clean(self, ud, d):
"""Clean any existing full or partial download"""
diff --git a/lib/bb/fetch2/osc.py b/lib/bb/fetch2/osc.py
index 8f091efd0..495ac8a30 100644
--- a/lib/bb/fetch2/osc.py
+++ b/lib/bb/fetch2/osc.py
@@ -1,4 +1,6 @@
#
+# Copyright BitBake Contributors
+#
# SPDX-License-Identifier: GPL-2.0-only
#
"""
@@ -8,12 +10,16 @@ Based on the svn "Fetch" implementation.
"""
import logging
+import os
+import re
import bb
from bb.fetch2 import FetchMethod
from bb.fetch2 import FetchError
from bb.fetch2 import MissingParameterError
from bb.fetch2 import runfetchcmd
+logger = logging.getLogger(__name__)
+
class Osc(FetchMethod):
"""Class to fetch a module or modules from Opensuse build server
repositories."""
@@ -33,6 +39,7 @@ class Osc(FetchMethod):
# Create paths to osc checkouts
oscdir = d.getVar("OSCDIR") or (d.getVar("DL_DIR") + "/osc")
relpath = self._strip_leading_slashes(ud.path)
+ ud.oscdir = oscdir
ud.pkgdir = os.path.join(oscdir, ud.host)
ud.moddir = os.path.join(ud.pkgdir, relpath, ud.module)
@@ -40,13 +47,13 @@ class Osc(FetchMethod):
ud.revision = ud.parm['rev']
else:
pv = d.getVar("PV", False)
- rev = bb.fetch2.srcrev_internal_helper(ud, d)
+ rev = bb.fetch2.srcrev_internal_helper(ud, d, '')
if rev:
ud.revision = rev
else:
ud.revision = ""
- ud.localfile = d.expand('%s_%s_%s.tar.gz' % (ud.module.replace('/', '.'), ud.path.replace('/', '.'), ud.revision))
+ ud.localfile = d.expand('%s_%s_%s.tar.gz' % (ud.module.replace('/', '.'), relpath.replace('/', '.'), ud.revision))
def _buildosccommand(self, ud, d, command):
"""
@@ -56,38 +63,61 @@ class Osc(FetchMethod):
basecmd = d.getVar("FETCHCMD_osc") or "/usr/bin/env osc"
- proto = ud.parm.get('protocol', 'ocs')
+ proto = ud.parm.get('protocol', 'https')
options = []
config = "-c %s" % self.generate_config(ud, d)
- if ud.revision:
+ if getattr(ud, 'revision', ''):
options.append("-r %s" % ud.revision)
coroot = self._strip_leading_slashes(ud.path)
if command == "fetch":
- osccmd = "%s %s co %s/%s %s" % (basecmd, config, coroot, ud.module, " ".join(options))
+ osccmd = "%s %s -A %s://%s co %s/%s %s" % (basecmd, config, proto, ud.host, coroot, ud.module, " ".join(options))
elif command == "update":
- osccmd = "%s %s up %s" % (basecmd, config, " ".join(options))
+ osccmd = "%s %s -A %s://%s up %s" % (basecmd, config, proto, ud.host, " ".join(options))
+ elif command == "api_source":
+ osccmd = "%s %s -A %s://%s api source/%s/%s" % (basecmd, config, proto, ud.host, coroot, ud.module)
else:
raise FetchError("Invalid osc command %s" % command, ud.url)
return osccmd
+ def _latest_revision(self, ud, d, name):
+ """
+ Fetch latest revision for the given package
+ """
+ api_source_cmd = self._buildosccommand(ud, d, "api_source")
+
+ output = runfetchcmd(api_source_cmd, d)
+ match = re.match(r'<directory ?.* rev="(\d+)".*>', output)
+ if match is None:
+ raise FetchError("Unable to parse osc response", ud.url)
+ return match.groups()[0]
+
+ def _revision_key(self, ud, d, name):
+ """
+ Return a unique key for the url
+ """
+ # Collapse adjacent slashes
+ slash_re = re.compile(r"/+")
+ rev = getattr(ud, 'revision', "latest")
+ return "osc:%s%s.%s.%s" % (ud.host, slash_re.sub(".", ud.path), name, rev)
+
def download(self, ud, d):
"""
Fetch url
"""
- logger.debug(2, "Fetch: checking for module directory '" + ud.moddir + "'")
+ logger.debug2("Fetch: checking for module directory '" + ud.moddir + "'")
- if os.access(os.path.join(d.getVar('OSCDIR'), ud.path, ud.module), os.R_OK):
+ if os.access(ud.moddir, os.R_OK):
oscupdatecmd = self._buildosccommand(ud, d, "update")
logger.info("Update "+ ud.url)
# update sources there
- logger.debug(1, "Running %s", oscupdatecmd)
+ logger.debug("Running %s", oscupdatecmd)
bb.fetch2.check_network_access(d, oscupdatecmd, ud.url)
runfetchcmd(oscupdatecmd, d, workdir=ud.moddir)
else:
@@ -95,7 +125,7 @@ class Osc(FetchMethod):
logger.info("Fetch " + ud.url)
# check out sources there
bb.utils.mkdirhier(ud.pkgdir)
- logger.debug(1, "Running %s", oscfetchcmd)
+ logger.debug("Running %s", oscfetchcmd)
bb.fetch2.check_network_access(d, oscfetchcmd, ud.url)
runfetchcmd(oscfetchcmd, d, workdir=ud.pkgdir)
@@ -111,20 +141,23 @@ class Osc(FetchMethod):
Generate a .oscrc to be used for this run.
"""
- config_path = os.path.join(d.getVar('OSCDIR'), "oscrc")
+ config_path = os.path.join(ud.oscdir, "oscrc")
+ if not os.path.exists(ud.oscdir):
+ bb.utils.mkdirhier(ud.oscdir)
+
if (os.path.exists(config_path)):
os.remove(config_path)
f = open(config_path, 'w')
+ proto = ud.parm.get('protocol', 'https')
f.write("[general]\n")
- f.write("apisrv = %s\n" % ud.host)
- f.write("scheme = http\n")
+ f.write("apiurl = %s://%s\n" % (proto, ud.host))
f.write("su-wrapper = su -c\n")
f.write("build-root = %s\n" % d.getVar('WORKDIR'))
f.write("urllist = %s\n" % d.getVar("OSCURLLIST"))
f.write("extra-pkgs = gzip\n")
f.write("\n")
- f.write("[%s]\n" % ud.host)
+ f.write("[%s://%s]\n" % (proto, ud.host))
f.write("user = %s\n" % ud.parm["user"])
f.write("pass = %s\n" % ud.parm["pswd"])
f.close()
diff --git a/lib/bb/fetch2/perforce.py b/lib/bb/fetch2/perforce.py
index 6f3c95b6c..3b6fa4b1e 100644
--- a/lib/bb/fetch2/perforce.py
+++ b/lib/bb/fetch2/perforce.py
@@ -90,16 +90,16 @@ class Perforce(FetchMethod):
p4port = d.getVar('P4PORT')
if p4port:
- logger.debug(1, 'Using recipe provided P4PORT: %s' % p4port)
+ logger.debug('Using recipe provided P4PORT: %s' % p4port)
ud.host = p4port
else:
- logger.debug(1, 'Trying to use P4CONFIG to automatically set P4PORT...')
+ logger.debug('Trying to use P4CONFIG to automatically set P4PORT...')
ud.usingp4config = True
p4cmd = '%s info | grep "Server address"' % ud.basecmd
bb.fetch2.check_network_access(d, p4cmd, ud.url)
ud.host = runfetchcmd(p4cmd, d, True)
ud.host = ud.host.split(': ')[1].strip()
- logger.debug(1, 'Determined P4PORT to be: %s' % ud.host)
+ logger.debug('Determined P4PORT to be: %s' % ud.host)
if not ud.host:
raise FetchError('Could not determine P4PORT from P4CONFIG')
@@ -119,6 +119,7 @@ class Perforce(FetchMethod):
cleanedpath = ud.path.replace('/...', '').replace('/', '.')
cleanedhost = ud.host.replace(':', '.')
+ cleanedmodule = ""
# Merge the path and module into the final depot location
if ud.module:
if ud.module.find('/') == 0:
@@ -133,7 +134,7 @@ class Perforce(FetchMethod):
ud.setup_revisions(d)
- ud.localfile = d.expand('%s_%s_%s.tar.gz' % (cleanedhost, cleanedpath, ud.revision))
+ ud.localfile = d.expand('%s_%s_%s_%s.tar.gz' % (cleanedhost, cleanedpath, cleanedmodule, ud.revision))
def _buildp4command(self, ud, d, command, depot_filename=None):
"""
@@ -207,7 +208,7 @@ class Perforce(FetchMethod):
for filename in p4fileslist:
item = filename.split(' - ')
lastaction = item[1].split()
- logger.debug(1, 'File: %s Last Action: %s' % (item[0], lastaction[0]))
+ logger.debug('File: %s Last Action: %s' % (item[0], lastaction[0]))
if lastaction[0] == 'delete':
continue
filelist.append(item[0])
@@ -254,7 +255,7 @@ class Perforce(FetchMethod):
raise FetchError('Could not determine the latest perforce changelist')
tipcset = tip.split(' ')[1]
- logger.debug(1, 'p4 tip found to be changelist %s' % tipcset)
+ logger.debug('p4 tip found to be changelist %s' % tipcset)
return tipcset
def sortable_revision(self, ud, d, name):
diff --git a/lib/bb/fetch2/repo.py b/lib/bb/fetch2/repo.py
index 2bdbbd409..fa4cb8149 100644
--- a/lib/bb/fetch2/repo.py
+++ b/lib/bb/fetch2/repo.py
@@ -47,7 +47,7 @@ class Repo(FetchMethod):
"""Fetch url"""
if os.access(os.path.join(d.getVar("DL_DIR"), ud.localfile), os.R_OK):
- logger.debug(1, "%s already exists (or was stashed). Skipping repo init / sync.", ud.localpath)
+ logger.debug("%s already exists (or was stashed). Skipping repo init / sync.", ud.localpath)
return
repodir = d.getVar("REPODIR") or (d.getVar("DL_DIR") + "/repo")
diff --git a/lib/bb/fetch2/s3.py b/lib/bb/fetch2/s3.py
index ffca73c8e..6b8ffd535 100644
--- a/lib/bb/fetch2/s3.py
+++ b/lib/bb/fetch2/s3.py
@@ -18,10 +18,47 @@ The aws tool must be correctly installed and configured prior to use.
import os
import bb
import urllib.request, urllib.parse, urllib.error
+import re
from bb.fetch2 import FetchMethod
from bb.fetch2 import FetchError
from bb.fetch2 import runfetchcmd
+def convertToBytes(value, unit):
+ value = float(value)
+ if (unit == "KiB"):
+ value = value*1024.0;
+ elif (unit == "MiB"):
+ value = value*1024.0*1024.0;
+ elif (unit == "GiB"):
+ value = value*1024.0*1024.0*1024.0;
+ return value
+
+class S3ProgressHandler(bb.progress.LineFilterProgressHandler):
+ """
+ Extract progress information from s3 cp output, e.g.:
+ Completed 5.1 KiB/8.8 GiB (12.0 MiB/s) with 1 file(s) remaining
+ """
+ def __init__(self, d):
+ super(S3ProgressHandler, self).__init__(d)
+ # Send an initial progress event so the bar gets shown
+ self._fire_progress(0)
+
+ def writeline(self, line):
+ percs = re.findall(r'^Completed (\d+.{0,1}\d*) (\w+)\/(\d+.{0,1}\d*) (\w+) (\(.+\)) with\s+', line)
+ if percs:
+ completed = (percs[-1][0])
+ completedUnit = (percs[-1][1])
+ total = (percs[-1][2])
+ totalUnit = (percs[-1][3])
+ completed = convertToBytes(completed, completedUnit)
+ total = convertToBytes(total, totalUnit)
+ progress = (completed/total)*100.0
+ rate = percs[-1][4]
+ self.update(progress, rate)
+ return False
+ return True
+
+
class S3(FetchMethod):
"""Class to fetch urls via 'aws s3'"""
@@ -52,7 +89,9 @@ class S3(FetchMethod):
cmd = '%s cp s3://%s%s %s' % (ud.basecmd, ud.host, ud.path, ud.localpath)
bb.fetch2.check_network_access(d, cmd, ud.url)
- runfetchcmd(cmd, d)
+
+ progresshandler = S3ProgressHandler(d)
+ runfetchcmd(cmd, d, False, log=progresshandler)
# Additional sanity checks copied from the wget class (although there
# are no known issues which mean these are required, treat the aws cli
diff --git a/lib/bb/fetch2/sftp.py b/lib/bb/fetch2/sftp.py
index f87f292e5..7884cce94 100644
--- a/lib/bb/fetch2/sftp.py
+++ b/lib/bb/fetch2/sftp.py
@@ -103,7 +103,7 @@ class SFTP(FetchMethod):
if path[:3] == '/~/':
path = path[3:]
- remote = '%s%s:%s' % (user, urlo.hostname, path)
+ remote = '"%s%s:%s"' % (user, urlo.hostname, path)
cmd = '%s %s %s %s' % (basecmd, port, remote, lpath)
diff --git a/lib/bb/fetch2/ssh.py b/lib/bb/fetch2/ssh.py
index 5e982ecf3..0cbb2a6f2 100644
--- a/lib/bb/fetch2/ssh.py
+++ b/lib/bb/fetch2/ssh.py
@@ -31,8 +31,8 @@ IETF secsh internet draft:
#
import re, os
-from bb.fetch2 import FetchMethod
-from bb.fetch2 import runfetchcmd
+from bb.fetch2 import check_network_access, FetchMethod, ParameterError, runfetchcmd
+import urllib
__pattern__ = re.compile(r'''
@@ -41,9 +41,9 @@ __pattern__ = re.compile(r'''
( # Optional username/password block
(?P<user>\S+) # username
(:(?P<pass>\S+))? # colon followed by the password (optional)
- )?
(?P<cparam>(;[^;]+)*)? # connection parameters block (optional)
@
+ )?
(?P<host>\S+?) # non-greedy match of the host
(:(?P<port>[0-9]+))? # colon followed by the port (optional)
/
@@ -65,12 +65,13 @@ class SSH(FetchMethod):
def urldata_init(self, urldata, d):
if 'protocol' in urldata.parm and urldata.parm['protocol'] == 'git':
- raise bb.fetch2.ParameterError(
+ raise ParameterError(
"Invalid protocol - if you wish to fetch from a git " +
"repository using ssh, you need to use " +
"git:// prefix with protocol=ssh", urldata.url)
m = __pattern__.match(urldata.url)
path = m.group('path')
+ path = urllib.parse.unquote(path)
host = m.group('host')
urldata.localpath = os.path.join(d.getVar('DL_DIR'),
os.path.basename(os.path.normpath(path)))
@@ -97,6 +98,11 @@ class SSH(FetchMethod):
fr += '@%s' % host
else:
fr = host
+
+ if path[0] != '~':
+ path = '/%s' % path
+ path = urllib.parse.unquote(path)
+
fr += ':%s' % path
cmd = 'scp -B -r %s %s %s/' % (
@@ -105,7 +111,45 @@ class SSH(FetchMethod):
dldir
)
- bb.fetch2.check_network_access(d, cmd, urldata.url)
+ check_network_access(d, cmd, urldata.url)
+
+ runfetchcmd(cmd, d)
+
+ def checkstatus(self, fetch, urldata, d):
+ """
+ Check the status of the url
+ """
+ m = __pattern__.match(urldata.url)
+ path = m.group('path')
+ host = m.group('host')
+ port = m.group('port')
+ user = m.group('user')
+ password = m.group('pass')
+
+ if port:
+ portarg = '-P %s' % port
+ else:
+ portarg = ''
+
+ if user:
+ fr = user
+ if password:
+ fr += ':%s' % password
+ fr += '@%s' % host
+ else:
+ fr = host
+
+ if path[0] != '~':
+ path = '/%s' % path
+ path = urllib.parse.unquote(path)
+
+ cmd = 'ssh -o BatchMode=true %s %s [ -f %s ]' % (
+ portarg,
+ fr,
+ path
+ )
+ check_network_access(d, cmd, urldata.url)
runfetchcmd(cmd, d)
+ return True
diff --git a/lib/bb/fetch2/svn.py b/lib/bb/fetch2/svn.py
index 971a5add4..0852108e7 100644
--- a/lib/bb/fetch2/svn.py
+++ b/lib/bb/fetch2/svn.py
@@ -57,7 +57,12 @@ class Svn(FetchMethod):
if 'rev' in ud.parm:
ud.revision = ud.parm['rev']
- ud.localfile = d.expand('%s_%s_%s_%s_.tar.gz' % (ud.module.replace('/', '.'), ud.host, ud.path.replace('/', '.'), ud.revision))
+ # Whether to use the @REV peg-revision syntax in the svn command or not
+ ud.pegrevision = True
+ if 'nopegrevision' in ud.parm:
+ ud.pegrevision = False
+
+ ud.localfile = d.expand('%s_%s_%s_%s_%s.tar.gz' % (ud.module.replace('/', '.'), ud.host, ud.path.replace('/', '.'), ud.revision, ["0", "1"][ud.pegrevision]))
def _buildsvncommand(self, ud, d, command):
"""
@@ -86,7 +91,7 @@ class Svn(FetchMethod):
if command == "info":
svncmd = "%s info %s %s://%s/%s/" % (ud.basecmd, " ".join(options), proto, svnroot, ud.module)
elif command == "log1":
- svncmd = "%s log --limit 1 %s %s://%s/%s/" % (ud.basecmd, " ".join(options), proto, svnroot, ud.module)
+ svncmd = "%s log --limit 1 --quiet %s %s://%s/%s/" % (ud.basecmd, " ".join(options), proto, svnroot, ud.module)
else:
suffix = ""
@@ -98,7 +103,8 @@ class Svn(FetchMethod):
if ud.revision:
options.append("-r %s" % ud.revision)
- suffix = "@%s" % (ud.revision)
+ if ud.pegrevision:
+ suffix = "@%s" % (ud.revision)
if command == "fetch":
transportuser = ud.parm.get("transportuser", "")
@@ -116,7 +122,7 @@ class Svn(FetchMethod):
def download(self, ud, d):
"""Fetch url"""
- logger.debug(2, "Fetch: checking for module directory '" + ud.moddir + "'")
+ logger.debug2("Fetch: checking for module directory '" + ud.moddir + "'")
lf = bb.utils.lockfile(ud.svnlock)
@@ -129,7 +135,7 @@ class Svn(FetchMethod):
runfetchcmd(ud.basecmd + " upgrade", d, workdir=ud.moddir)
except FetchError:
pass
- logger.debug(1, "Running %s", svncmd)
+ logger.debug("Running %s", svncmd)
bb.fetch2.check_network_access(d, svncmd, ud.url)
runfetchcmd(svncmd, d, workdir=ud.moddir)
else:
@@ -137,7 +143,7 @@ class Svn(FetchMethod):
logger.info("Fetch " + ud.url)
# check out sources there
bb.utils.mkdirhier(ud.pkgdir)
- logger.debug(1, "Running %s", svncmd)
+ logger.debug("Running %s", svncmd)
bb.fetch2.check_network_access(d, svncmd, ud.url)
runfetchcmd(svncmd, d, workdir=ud.pkgdir)
@@ -204,3 +210,6 @@ class Svn(FetchMethod):
def _build_revision(self, ud, d):
return ud.revision
+
+ def supports_checksum(self, urldata):
+ return False
diff --git a/lib/bb/fetch2/wget.py b/lib/bb/fetch2/wget.py
index f7d1de26b..d76b1d0d3 100644
--- a/lib/bb/fetch2/wget.py
+++ b/lib/bb/fetch2/wget.py
@@ -26,7 +26,6 @@ from bb.fetch2 import FetchMethod
from bb.fetch2 import FetchError
from bb.fetch2 import logger
from bb.fetch2 import runfetchcmd
-from bb.utils import export_proxies
from bs4 import BeautifulSoup
from bs4 import SoupStrainer
@@ -53,11 +52,23 @@ class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
class Wget(FetchMethod):
"""Class to fetch urls via 'wget'"""
+
+ # CDNs like CloudFlare may do a 'browser integrity test' which can fail
+ # with the standard wget/urllib User-Agent, so pretend to be a modern
+ # browser.
+ user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
+
+ def check_certs(self, d):
+ """
+ Should certificates be checked?
+ """
+ return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0"
+
def supports(self, ud, d):
"""
Check to see if a given url can be fetched with wget.
"""
- return ud.type in ['http', 'https', 'ftp']
+ return ud.type in ['http', 'https', 'ftp', 'ftps']
def recommends_checksum(self, urldata):
return True
@@ -76,13 +87,19 @@ class Wget(FetchMethod):
if not ud.localfile:
ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
- self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate"
+ self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30"
+
+ if ud.type == 'ftp' or ud.type == 'ftps':
+ self.basecmd += " --passive-ftp"
+
+ if not self.check_certs(d):
+ self.basecmd += " --no-check-certificate"
def _runwget(self, ud, d, command, quiet, workdir=None):
progresshandler = WgetProgressHandler(d)
- logger.debug(2, "Fetching %s using command '%s'" % (ud.url, command))
+ logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
bb.fetch2.check_network_access(d, command, ud.url)
runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
@@ -91,13 +108,22 @@ class Wget(FetchMethod):
fetchcmd = self.basecmd
- if 'downloadfilename' in ud.parm:
- localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile)
- bb.utils.mkdirhier(os.path.dirname(localpath))
- fetchcmd += " -O %s" % shlex.quote(localpath)
+ localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp"
+ bb.utils.mkdirhier(os.path.dirname(localpath))
+ fetchcmd += " -O %s" % shlex.quote(localpath)
if ud.user and ud.pswd:
- fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)
+ fetchcmd += " --auth-no-challenge"
+ if ud.parm.get("redirectauth", "1") == "1":
+ # An undocumented feature of wget is that if the
+ # username/password are specified on the URI, wget will only
+ # send the Authorization header to the first host and not to
+ # any hosts that it is redirected to. With the increasing
+ # usage of temporary AWS URLs, this difference now matters as
+ # AWS will reject any request that has authentication both in
+ # the query parameters (from the redirect) and in the
+ # Authorization header.
+ fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
uri = ud.url.split(";")[0]
if os.path.exists(ud.localpath):
@@ -110,13 +136,22 @@ class Wget(FetchMethod):
# Sanity check since wget can pretend it succeed when it didn't
# Also, this used to happen if sourceforge sent us to the mirror page
- if not os.path.exists(ud.localpath):
- raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
+ if not os.path.exists(localpath):
+ raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, localpath), uri)
- if os.path.getsize(ud.localpath) == 0:
- os.remove(ud.localpath)
+ if os.path.getsize(localpath) == 0:
+ os.remove(localpath)
raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
+ # Try and verify any checksum now, meaning if it isn't correct, we don't remove the
+ # original file, which might be a race (imagine two recipes referencing the same
+ # source, one with an incorrect checksum)
+ bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False)
+
+ # Remove the ".tmp" and move the file into position atomically
+ # Our lock prevents multiple writers but mirroring code may grab incomplete files
+ os.rename(localpath, localpath[:-4])
+
return True
def checkstatus(self, fetch, ud, d, try_again=True):
@@ -203,15 +238,12 @@ class Wget(FetchMethod):
# We let the request fail and expect it to be
# tried once more ("try_again" in check_status()),
# with the dead connection removed from the cache.
- # If it still fails, we give up, which can happend for bad
+ # If it still fails, we give up, which can happen for bad
# HTTP proxy settings.
fetch.connection_cache.remove_connection(h.host, h.port)
raise urllib.error.URLError(err)
else:
- try:
- r = h.getresponse(buffering=True)
- except TypeError: # buffering kw not supported
- r = h.getresponse()
+ r = h.getresponse()
# Pick apart the HTTPResponse object to get the addinfourl
# object initialized properly.
@@ -279,56 +311,76 @@ class Wget(FetchMethod):
newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
newreq.get_method = req.get_method
return newreq
- exported_proxies = export_proxies(d)
-
- handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback]
- if exported_proxies:
- handlers.append(urllib.request.ProxyHandler())
- handlers.append(CacheHTTPHandler())
- # Since Python 2.7.9 ssl cert validation is enabled by default
- # see PEP-0476, this causes verification errors on some https servers
- # so disable by default.
- import ssl
- if hasattr(ssl, '_create_unverified_context'):
- handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context()))
- opener = urllib.request.build_opener(*handlers)
-
- try:
- uri = ud.url.split(";")[0]
- r = urllib.request.Request(uri)
- r.get_method = lambda: "HEAD"
- # Some servers (FusionForge, as used on Alioth) require that the
- # optional Accept header is set.
- r.add_header("Accept", "*/*")
- r.add_header("User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12")
- def add_basic_auth(login_str, request):
- '''Adds Basic auth to http request, pass in login:password as string'''
- import base64
- encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
- authheader = "Basic %s" % encodeuser
- r.add_header("Authorization", authheader)
-
- if ud.user and ud.pswd:
- add_basic_auth(ud.user + ':' + ud.pswd, r)
- try:
- import netrc
- n = netrc.netrc()
- login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
- add_basic_auth("%s:%s" % (login, password), r)
- except (TypeError, ImportError, IOError, netrc.NetrcParseError):
- pass
-
- with opener.open(r) as response:
- pass
- except urllib.error.URLError as e:
- if try_again:
- logger.debug(2, "checkstatus: trying again")
- return self.checkstatus(fetch, ud, d, False)
+ # We need to update the environment here as both the proxy and HTTPS
+ # handlers need variables set. The proxy needs http_proxy and friends to
+ # be set, and HTTPSHandler ends up calling into openssl to load the
+ # certificates. In buildtools configurations this will be looking at the
+ # wrong place for certificates by default: we set SSL_CERT_FILE to the
+ # right location in the buildtools environment script but as BitBake
+ # prunes prunes the environment this is lost. When binaries are executed
+ # runfetchcmd ensures these values are in the environment, but this is
+ # pure Python so we need to update the environment.
+ #
+ # Avoid tramping the environment too much by using bb.utils.environment
+ # to scope the changes to the build_opener request, which is when the
+ # environment lookups happen.
+ newenv = bb.fetch2.get_fetcher_environment(d)
+
+ with bb.utils.environment(**newenv):
+ import ssl
+
+ if self.check_certs(d):
+ context = ssl.create_default_context()
else:
- # debug for now to avoid spamming the logs in e.g. remote sstate searches
- logger.debug(2, "checkstatus() urlopen failed: %s" % e)
- return False
+ context = ssl._create_unverified_context()
+
+ handlers = [FixedHTTPRedirectHandler,
+ HTTPMethodFallback,
+ urllib.request.ProxyHandler(),
+ CacheHTTPHandler(),
+ urllib.request.HTTPSHandler(context=context)]
+ opener = urllib.request.build_opener(*handlers)
+
+ try:
+ uri_base = ud.url.split(";")[0]
+ uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path)
+ r = urllib.request.Request(uri)
+ r.get_method = lambda: "HEAD"
+ # Some servers (FusionForge, as used on Alioth) require that the
+ # optional Accept header is set.
+ r.add_header("Accept", "*/*")
+ r.add_header("User-Agent", self.user_agent)
+ def add_basic_auth(login_str, request):
+ '''Adds Basic auth to http request, pass in login:password as string'''
+ import base64
+ encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
+ authheader = "Basic %s" % encodeuser
+ r.add_header("Authorization", authheader)
+
+ if ud.user and ud.pswd:
+ add_basic_auth(ud.user + ':' + ud.pswd, r)
+
+ try:
+ import netrc
+ auth_data = netrc.netrc().authenticators(urllib.parse.urlparse(uri).hostname)
+ if auth_data:
+ login, _, password = auth_data
+ add_basic_auth("%s:%s" % (login, password), r)
+ except (FileNotFoundError, netrc.NetrcParseError):
+ pass
+
+ with opener.open(r, timeout=30) as response:
+ pass
+ except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e:
+ if try_again:
+ logger.debug2("checkstatus: trying again")
+ return self.checkstatus(fetch, ud, d, False)
+ else:
+ # debug for now to avoid spamming the logs in e.g. remote sstate searches
+ logger.debug2("checkstatus() urlopen failed for %s: %s" % (uri,e))
+ return False
+
return True
def _parse_path(self, regex, s):
@@ -404,9 +456,8 @@ class Wget(FetchMethod):
"""
f = tempfile.NamedTemporaryFile()
with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
- agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12"
fetchcmd = self.basecmd
- fetchcmd += " -O " + f.name + " --user-agent='" + agent + "' '" + uri + "'"
+ fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'"
try:
self._runwget(ud, d, fetchcmd, True, workdir=workdir)
fetchresult = f.read()
@@ -462,7 +513,7 @@ class Wget(FetchMethod):
version_dir = ['', '', '']
version = ['', '', '']
- dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))")
+ dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))")
s = dirver_regex.search(dirver)
if s:
version_dir[1] = s.group('ver')
@@ -538,7 +589,7 @@ class Wget(FetchMethod):
# src.rpm extension was added only for rpm package. Can be removed if the rpm
# packaged will always be considered as having to be manually upgraded
- psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
+ psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)"
# match name, version and archive type of a package
package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
@@ -589,10 +640,10 @@ class Wget(FetchMethod):
# search for version matches on folders inside the path, like:
# "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
- m = dirver_regex.search(path)
+ m = dirver_regex.findall(path)
if m:
pn = d.getVar('PN')
- dirver = m.group('dirver')
+ dirver = m[-1][0]
dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
if not dirver_pn_regex.search(dirver):