diff options
Diffstat (limited to 'lib/bb/fetch2')
-rw-r--r-- | lib/bb/fetch2/README | 57 | ||||
-rw-r--r-- | lib/bb/fetch2/__init__.py | 424 | ||||
-rw-r--r-- | lib/bb/fetch2/az.py | 93 | ||||
-rw-r--r-- | lib/bb/fetch2/bzr.py | 8 | ||||
-rw-r--r-- | lib/bb/fetch2/clearcase.py | 2 | ||||
-rw-r--r-- | lib/bb/fetch2/crate.py | 150 | ||||
-rw-r--r-- | lib/bb/fetch2/cvs.py | 4 | ||||
-rw-r--r-- | lib/bb/fetch2/gcp.py | 102 | ||||
-rw-r--r-- | lib/bb/fetch2/git.py | 333 | ||||
-rw-r--r-- | lib/bb/fetch2/gitsm.py | 55 | ||||
-rw-r--r-- | lib/bb/fetch2/hg.py | 17 | ||||
-rw-r--r-- | lib/bb/fetch2/local.py | 33 | ||||
-rw-r--r-- | lib/bb/fetch2/npm.py | 63 | ||||
-rw-r--r-- | lib/bb/fetch2/npmsw.py | 96 | ||||
-rw-r--r-- | lib/bb/fetch2/osc.py | 61 | ||||
-rw-r--r-- | lib/bb/fetch2/perforce.py | 13 | ||||
-rw-r--r-- | lib/bb/fetch2/repo.py | 2 | ||||
-rw-r--r-- | lib/bb/fetch2/s3.py | 41 | ||||
-rw-r--r-- | lib/bb/fetch2/sftp.py | 2 | ||||
-rw-r--r-- | lib/bb/fetch2/ssh.py | 54 | ||||
-rw-r--r-- | lib/bb/fetch2/svn.py | 21 | ||||
-rw-r--r-- | lib/bb/fetch2/wget.py | 195 |
22 files changed, 1425 insertions, 401 deletions
diff --git a/lib/bb/fetch2/README b/lib/bb/fetch2/README new file mode 100644 index 000000000..67b787ef4 --- /dev/null +++ b/lib/bb/fetch2/README @@ -0,0 +1,57 @@ +There are expectations of users of the fetcher code. This file attempts to document +some of the constraints that are present. Some are obvious, some are less so. It is +documented in the context of how OE uses it but the API calls are generic. + +a) network access for sources is only expected to happen in the do_fetch task. + This is not enforced or tested but is required so that we can: + + i) audit the sources used (i.e. for license/manifest reasons) + ii) support offline builds with a suitable cache + iii) allow work to continue even with downtime upstream + iv) allow for changes upstream in incompatible ways + v) allow rebuilding of the software in X years time + +b) network access is not expected in do_unpack task. + +c) you can take DL_DIR and use it as a mirror for offline builds. + +d) access to the network is only made when explicitly configured in recipes + (e.g. use of AUTOREV, or use of git tags which change revision). + +e) fetcher output is deterministic (i.e. if you fetch configuration XXX now it + will match in future exactly in a clean build with a new DL_DIR). + One specific pain point example are git tags. They can be replaced and change + so the git fetcher has to resolve them with the network. We use git revisions + where possible to avoid this and ensure determinism. + +f) network access is expected to work with the standard linux proxy variables + so that access behind firewalls works (the fetcher sets these in the + environment but only in the do_fetch tasks). + +g) access during parsing has to be minimal, a "git ls-remote" for an AUTOREV + git recipe might be ok but you can't expect to checkout a git tree. + +h) we need to provide revision information during parsing such that a version + for the recipe can be constructed. + +i) versions are expected to be able to increase in a way which sorts allowing + package feeds to operate (see PR server required for git revisions to sort). + +j) API to query for possible version upgrades of a url is highly desireable to + allow our automated upgrage code to function (it is implied this does always + have network access). + +k) Where fixes or changes to behaviour in the fetcher are made, we ask that + test cases are added (run with "bitbake-selftest bb.tests.fetch"). We do + have fairly extensive test coverage of the fetcher as it is the only way + to track all of its corner cases, it still doesn't give entire coverage + though sadly. + +l) If using tools during parse time, they will have to be in ASSUME_PROVIDED + in OE's context as we can't build git-native, then parse a recipe and use + git ls-remote. + +Not all fetchers support all features, autorev is optional and doesn't make +sense for some. Upgrade detection means different things in different contexts +too. + diff --git a/lib/bb/fetch2/__init__.py b/lib/bb/fetch2/__init__.py index 756f60212..5bf2c4b8c 100644 --- a/lib/bb/fetch2/__init__.py +++ b/lib/bb/fetch2/__init__.py @@ -113,7 +113,7 @@ class MissingParameterError(BBFetchException): self.args = (missing, url) class ParameterError(BBFetchException): - """Exception raised when a url cannot be proccessed due to invalid parameters.""" + """Exception raised when a url cannot be processed due to invalid parameters.""" def __init__(self, message, url): msg = "URL: '%s' has invalid parameters. %s" % (url, message) self.url = url @@ -182,7 +182,7 @@ class URI(object): Some notes about relative URIs: while it's specified that a URI beginning with <scheme>:// should either be directly followed by a hostname or a /, the old URI handling of the - fetch2 library did not comform to this. Therefore, this URI + fetch2 library did not conform to this. Therefore, this URI class has some kludges to make sure that URIs are parsed in a way comforming to bitbake's current usage. This URI class supports the following: @@ -199,7 +199,7 @@ class URI(object): file://hostname/absolute/path.diff (would be IETF compliant) Note that the last case only applies to a list of - "whitelisted" schemes (currently only file://), that requires + explicitly allowed schemes (currently only file://), that requires its URIs to not have a network location. """ @@ -290,12 +290,12 @@ class URI(object): def _param_str_split(self, string, elmdelim, kvdelim="="): ret = collections.OrderedDict() - for k, v in [x.split(kvdelim, 1) for x in string.split(elmdelim)]: + for k, v in [x.split(kvdelim, 1) if kvdelim in x else (x, None) for x in string.split(elmdelim) if x]: ret[k] = v return ret def _param_str_join(self, dict_, elmdelim, kvdelim="="): - return elmdelim.join([kvdelim.join([k, v]) for k, v in dict_.items()]) + return elmdelim.join([kvdelim.join([k, v]) if v else k for k, v in dict_.items()]) @property def hostport(self): @@ -388,7 +388,7 @@ def decodeurl(url): if s: if not '=' in s: raise MalformedUrl(url, "The URL: '%s' is invalid: parameter %s does not specify a value (missing '=')" % (url, s)) - s1, s2 = s.split('=') + s1, s2 = s.split('=', 1) p[s1] = s2 return type, host, urllib.parse.unquote(path), user, pswd, p @@ -402,24 +402,24 @@ def encodeurl(decoded): if not type: raise MissingParameterError('type', "encoded from the data %s" % str(decoded)) - url = '%s://' % type + url = ['%s://' % type] if user and type != "file": - url += "%s" % user + url.append("%s" % user) if pswd: - url += ":%s" % pswd - url += "@" + url.append(":%s" % pswd) + url.append("@") if host and type != "file": - url += "%s" % host + url.append("%s" % host) if path: # Standardise path to ensure comparisons work while '//' in path: path = path.replace("//", "/") - url += "%s" % urllib.parse.quote(path) + url.append("%s" % urllib.parse.quote(path)) if p: for parm in p: - url += ";%s=%s" % (parm, p[parm]) + url.append(";%s=%s" % (parm, p[parm])) - return url + return "".join(url) def uri_replace(ud, uri_find, uri_replace, replacements, d, mirrortarball=None): if not ud.url or not uri_find or not uri_replace: @@ -428,8 +428,9 @@ def uri_replace(ud, uri_find, uri_replace, replacements, d, mirrortarball=None): uri_decoded = list(decodeurl(ud.url)) uri_find_decoded = list(decodeurl(uri_find)) uri_replace_decoded = list(decodeurl(uri_replace)) - logger.debug(2, "For url %s comparing %s to %s" % (uri_decoded, uri_find_decoded, uri_replace_decoded)) + logger.debug2("For url %s comparing %s to %s" % (uri_decoded, uri_find_decoded, uri_replace_decoded)) result_decoded = ['', '', '', '', '', {}] + # 0 - type, 1 - host, 2 - path, 3 - user, 4- pswd, 5 - params for loc, i in enumerate(uri_find_decoded): result_decoded[loc] = uri_decoded[loc] regexp = i @@ -449,6 +450,9 @@ def uri_replace(ud, uri_find, uri_replace, replacements, d, mirrortarball=None): for l in replacements: uri_replace_decoded[loc][k] = uri_replace_decoded[loc][k].replace(l, replacements[l]) result_decoded[loc][k] = uri_replace_decoded[loc][k] + elif (loc == 3 or loc == 4) and uri_replace_decoded[loc]: + # User/password in the replacement is just a straight replacement + result_decoded[loc] = uri_replace_decoded[loc] elif (re.match(regexp, uri_decoded[loc])): if not uri_replace_decoded[loc]: result_decoded[loc] = "" @@ -465,16 +469,24 @@ def uri_replace(ud, uri_find, uri_replace, replacements, d, mirrortarball=None): basename = os.path.basename(mirrortarball) # Kill parameters, they make no sense for mirror tarballs uri_decoded[5] = {} + uri_find_decoded[5] = {} elif ud.localpath and ud.method.supports_checksum(ud): basename = os.path.basename(ud.localpath) - if basename and not result_decoded[loc].endswith(basename): - result_decoded[loc] = os.path.join(result_decoded[loc], basename) + if basename: + uri_basename = os.path.basename(uri_decoded[loc]) + # Prefix with a slash as a sentinel in case + # result_decoded[loc] does not contain one. + path = "/" + result_decoded[loc] + if uri_basename and basename != uri_basename and path.endswith("/" + uri_basename): + result_decoded[loc] = path[1:-len(uri_basename)] + basename + elif not path.endswith("/" + basename): + result_decoded[loc] = os.path.join(path[1:], basename) else: return None result = encodeurl(result_decoded) if result == ud.url: return None - logger.debug(2, "For url %s returning %s" % (ud.url, result)) + logger.debug2("For url %s returning %s" % (ud.url, result)) return result methods = [] @@ -499,14 +511,14 @@ def fetcher_init(d): # When to drop SCM head revisions controlled by user policy srcrev_policy = d.getVar('BB_SRCREV_POLICY') or "clear" if srcrev_policy == "cache": - logger.debug(1, "Keeping SRCREV cache due to cache policy of: %s", srcrev_policy) + logger.debug("Keeping SRCREV cache due to cache policy of: %s", srcrev_policy) elif srcrev_policy == "clear": - logger.debug(1, "Clearing SRCREV cache due to cache policy of: %s", srcrev_policy) + logger.debug("Clearing SRCREV cache due to cache policy of: %s", srcrev_policy) revs.clear() else: raise FetchError("Invalid SRCREV cache policy of: %s" % srcrev_policy) - _checksum_cache.init_cache(d) + _checksum_cache.init_cache(d.getVar("BB_CACHEDIR")) for m in methods: if hasattr(m, "init"): @@ -534,7 +546,7 @@ def mirror_from_string(data): bb.warn('Invalid mirror data %s, should have paired members.' % data) return list(zip(*[iter(mirrors)]*2)) -def verify_checksum(ud, d, precomputed={}): +def verify_checksum(ud, d, precomputed={}, localpath=None, fatal_nochecksum=True): """ verify the MD5 and SHA256 checksum for downloaded src @@ -548,20 +560,25 @@ def verify_checksum(ud, d, precomputed={}): file against those in the recipe each time, rather than only after downloading. See https://bugzilla.yoctoproject.org/show_bug.cgi?id=5571. """ - if ud.ignore_checksums or not ud.method.supports_checksum(ud): return {} + if localpath is None: + localpath = ud.localpath + def compute_checksum_info(checksum_id): checksum_name = getattr(ud, "%s_name" % checksum_id) if checksum_id in precomputed: checksum_data = precomputed[checksum_id] else: - checksum_data = getattr(bb.utils, "%s_file" % checksum_id)(ud.localpath) + checksum_data = getattr(bb.utils, "%s_file" % checksum_id)(localpath) checksum_expected = getattr(ud, "%s_expected" % checksum_id) + if checksum_expected == '': + checksum_expected = None + return { "id": checksum_id, "name": checksum_name, @@ -581,17 +598,13 @@ def verify_checksum(ud, d, precomputed={}): checksum_lines = ["SRC_URI[%s] = \"%s\"" % (ci["name"], ci["data"])] # If no checksum has been provided - if ud.method.recommends_checksum(ud) and all(ci["expected"] is None for ci in checksum_infos): + if fatal_nochecksum and ud.method.recommends_checksum(ud) and all(ci["expected"] is None for ci in checksum_infos): messages = [] strict = d.getVar("BB_STRICT_CHECKSUM") or "0" # If strict checking enabled and neither sum defined, raise error if strict == "1": - messages.append("No checksum specified for '%s', please add at " \ - "least one to the recipe:" % ud.localpath) - messages.extend(checksum_lines) - logger.error("\n".join(messages)) - raise NoChecksumError("Missing SRC_URI checksum", ud.url) + raise NoChecksumError("\n".join(checksum_lines)) bb.event.fire(MissingChecksumEvent(ud.url, **checksum_event), d) @@ -612,8 +625,8 @@ def verify_checksum(ud, d, precomputed={}): for ci in checksum_infos: if ci["expected"] and ci["expected"] != ci["data"]: - messages.append("File: '%s' has %s checksum %s when %s was " \ - "expected" % (ud.localpath, ci["id"], ci["data"], ci["expected"])) + messages.append("File: '%s' has %s checksum '%s' when '%s' was " \ + "expected" % (localpath, ci["id"], ci["data"], ci["expected"])) bad_checksum = ci["data"] if bad_checksum: @@ -731,13 +744,16 @@ def subprocess_setup(): # SIGPIPE errors are known issues with gzip/bash signal.signal(signal.SIGPIPE, signal.SIG_DFL) -def get_autorev(d): - # only not cache src rev in autorev case +def mark_recipe_nocache(d): if d.getVar('BB_SRCREV_POLICY') != "cache": d.setVar('BB_DONT_CACHE', '1') + +def get_autorev(d): + mark_recipe_nocache(d) + d.setVar("__BBAUTOREV_SEEN", True) return "AUTOINC" -def get_srcrev(d, method_name='sortable_revision'): +def _get_srcrev(d, method_name='sortable_revision'): """ Return the revision string, usually for use in the version string (PV) of the current package Most packages usually only have one SCM so we just pass on the call. @@ -751,23 +767,34 @@ def get_srcrev(d, method_name='sortable_revision'): that fetcher provides a method with the given name and the same signature as sortable_revision. """ + d.setVar("__BBSRCREV_SEEN", "1") + recursion = d.getVar("__BBINSRCREV") + if recursion: + raise FetchError("There are recursive references in fetcher variables, likely through SRC_URI") + d.setVar("__BBINSRCREV", True) + scms = [] + revs = [] fetcher = Fetch(d.getVar('SRC_URI').split(), d) urldata = fetcher.ud for u in urldata: if urldata[u].method.supports_srcrev(): scms.append(u) - if len(scms) == 0: - raise FetchError("SRCREV was used yet no valid SCM was found in SRC_URI") + if not scms: + d.delVar("__BBINSRCREV") + return "", revs + if len(scms) == 1 and len(urldata[scms[0]].names) == 1: autoinc, rev = getattr(urldata[scms[0]].method, method_name)(urldata[scms[0]], d, urldata[scms[0]].names[0]) + revs.append(rev) if len(rev) > 10: rev = rev[:10] + d.delVar("__BBINSRCREV") if autoinc: - return "AUTOINC+" + rev - return rev + return "AUTOINC+" + rev, revs + return rev, revs # # Mutiple SCMs are in SRC_URI so we resort to SRCREV_FORMAT @@ -783,6 +810,7 @@ def get_srcrev(d, method_name='sortable_revision'): ud = urldata[scm] for name in ud.names: autoinc, rev = getattr(ud.method, method_name)(ud, d, name) + revs.append(rev) seenautoinc = seenautoinc or autoinc if len(rev) > 10: rev = rev[:10] @@ -799,12 +827,70 @@ def get_srcrev(d, method_name='sortable_revision'): if seenautoinc: format = "AUTOINC+" + format - return format + d.delVar("__BBINSRCREV") + return format, revs + +def get_hashvalue(d, method_name='sortable_revision'): + pkgv, revs = _get_srcrev(d, method_name=method_name) + return " ".join(revs) + +def get_pkgv_string(d, method_name='sortable_revision'): + pkgv, revs = _get_srcrev(d, method_name=method_name) + return pkgv + +def get_srcrev(d, method_name='sortable_revision'): + pkgv, revs = _get_srcrev(d, method_name=method_name) + if not pkgv: + raise FetchError("SRCREV was used yet no valid SCM was found in SRC_URI") + return pkgv def localpath(url, d): fetcher = bb.fetch2.Fetch([url], d) return fetcher.localpath(url) +# Need to export PATH as binary could be in metadata paths +# rather than host provided +# Also include some other variables. +FETCH_EXPORT_VARS = ['HOME', 'PATH', + 'HTTP_PROXY', 'http_proxy', + 'HTTPS_PROXY', 'https_proxy', + 'FTP_PROXY', 'ftp_proxy', + 'FTPS_PROXY', 'ftps_proxy', + 'NO_PROXY', 'no_proxy', + 'ALL_PROXY', 'all_proxy', + 'GIT_PROXY_COMMAND', + 'GIT_SSH', + 'GIT_SSH_COMMAND', + 'GIT_SSL_CAINFO', + 'GIT_SMART_HTTP', + 'SSH_AUTH_SOCK', 'SSH_AGENT_PID', + 'SOCKS5_USER', 'SOCKS5_PASSWD', + 'DBUS_SESSION_BUS_ADDRESS', + 'P4CONFIG', + 'SSL_CERT_FILE', + 'NODE_EXTRA_CA_CERTS', + 'AWS_PROFILE', + 'AWS_ACCESS_KEY_ID', + 'AWS_SECRET_ACCESS_KEY', + 'AWS_ROLE_ARN', + 'AWS_WEB_IDENTITY_TOKEN_FILE', + 'AWS_DEFAULT_REGION', + 'AWS_SESSION_TOKEN', + 'GIT_CACHE_PATH', + 'REMOTE_CONTAINERS_IPC', + 'SSL_CERT_DIR'] + +def get_fetcher_environment(d): + newenv = {} + origenv = d.getVar("BB_ORIGENV") + for name in bb.fetch2.FETCH_EXPORT_VARS: + value = d.getVar(name) + if not value and origenv: + value = origenv.getVar(name) + if value: + newenv[name] = value + return newenv + def runfetchcmd(cmd, d, quiet=False, cleanup=None, log=None, workdir=None): """ Run cmd returning the command output @@ -813,25 +899,7 @@ def runfetchcmd(cmd, d, quiet=False, cleanup=None, log=None, workdir=None): Optionally remove the files/directories listed in cleanup upon failure """ - # Need to export PATH as binary could be in metadata paths - # rather than host provided - # Also include some other variables. - # FIXME: Should really include all export varaiables? - exportvars = ['HOME', 'PATH', - 'HTTP_PROXY', 'http_proxy', - 'HTTPS_PROXY', 'https_proxy', - 'FTP_PROXY', 'ftp_proxy', - 'FTPS_PROXY', 'ftps_proxy', - 'NO_PROXY', 'no_proxy', - 'ALL_PROXY', 'all_proxy', - 'GIT_PROXY_COMMAND', - 'GIT_SSH', - 'GIT_SSL_CAINFO', - 'GIT_SMART_HTTP', - 'SSH_AUTH_SOCK', 'SSH_AGENT_PID', - 'SOCKS5_USER', 'SOCKS5_PASSWD', - 'DBUS_SESSION_BUS_ADDRESS', - 'P4CONFIG'] + exportvars = FETCH_EXPORT_VARS if not cleanup: cleanup = [] @@ -853,18 +921,13 @@ def runfetchcmd(cmd, d, quiet=False, cleanup=None, log=None, workdir=None): if val: cmd = 'export ' + var + '=\"%s\"; %s' % (val, cmd) - # Ensure that a _PYTHON_SYSCONFIGDATA_NAME value set by a recipe - # (for example via python3native.bbclass since warrior) is not set for - # host Python (otherwise tools like git-make-shallow will fail) - cmd = 'unset _PYTHON_SYSCONFIGDATA_NAME; ' + cmd - # Disable pseudo as it may affect ssh, potentially causing it to hang. cmd = 'export PSEUDO_DISABLED=1; ' + cmd if workdir: - logger.debug(1, "Running '%s' in %s" % (cmd, workdir)) + logger.debug("Running '%s' in %s" % (cmd, workdir)) else: - logger.debug(1, "Running %s", cmd) + logger.debug("Running %s", cmd) success = False error_message = "" @@ -873,14 +936,17 @@ def runfetchcmd(cmd, d, quiet=False, cleanup=None, log=None, workdir=None): (output, errors) = bb.process.run(cmd, log=log, shell=True, stderr=subprocess.PIPE, cwd=workdir) success = True except bb.process.NotFoundError as e: - error_message = "Fetch command %s" % (e.command) + error_message = "Fetch command %s not found" % (e.command) except bb.process.ExecutionError as e: if e.stdout: output = "output:\n%s\n%s" % (e.stdout, e.stderr) elif e.stderr: output = "output:\n%s" % e.stderr else: - output = "no output" + if log: + output = "see logfile for output" + else: + output = "no output" error_message = "Fetch command %s failed with exit code %s, %s" % (e.command, e.exitcode, output) except bb.process.CmdError as e: error_message = "Fetch command %s could not be run:\n%s" % (e.command, e.msg) @@ -905,7 +971,7 @@ def check_network_access(d, info, url): elif not trusted_network(d, url): raise UntrustedUrl(url, info) else: - logger.debug(1, "Fetcher accessed the network with the command %s" % info) + logger.debug("Fetcher accessed the network with the command %s" % info) def build_mirroruris(origud, mirrors, ld): uris = [] @@ -931,7 +997,7 @@ def build_mirroruris(origud, mirrors, ld): continue if not trusted_network(ld, newuri): - logger.debug(1, "Mirror %s not in the list of trusted networks, skipping" % (newuri)) + logger.debug("Mirror %s not in the list of trusted networks, skipping" % (newuri)) continue # Create a local copy of the mirrors minus the current line @@ -942,10 +1008,11 @@ def build_mirroruris(origud, mirrors, ld): try: newud = FetchData(newuri, ld) + newud.ignore_checksums = True newud.setup_localpath(ld) except bb.fetch2.BBFetchException as e: - logger.debug(1, "Mirror fetch failure for url %s (original url: %s)" % (newuri, origud.url)) - logger.debug(1, str(e)) + logger.debug("Mirror fetch failure for url %s (original url: %s)" % (newuri, origud.url)) + logger.debug(str(e)) try: # setup_localpath of file:// urls may fail, we should still see # if mirrors of the url exist @@ -1048,10 +1115,11 @@ def try_mirror_url(fetch, origud, ud, ld, check = False): elif isinstance(e, NoChecksumError): raise else: - logger.debug(1, "Mirror fetch failure for url %s (original url: %s)" % (ud.url, origud.url)) - logger.debug(1, str(e)) + logger.debug("Mirror fetch failure for url %s (original url: %s)" % (ud.url, origud.url)) + logger.debug(str(e)) try: - ud.method.clean(ud, ld) + if ud.method.cleanup_upon_failure(): + ud.method.clean(ud, ld) except UnboundLocalError: pass return False @@ -1062,6 +1130,8 @@ def try_mirror_url(fetch, origud, ud, ld, check = False): def ensure_symlink(target, link_name): if not os.path.exists(link_name): + dirname = os.path.dirname(link_name) + bb.utils.mkdirhier(dirname) if os.path.islink(link_name): # Broken symbolic link os.unlink(link_name) @@ -1145,11 +1215,11 @@ def srcrev_internal_helper(ud, d, name): pn = d.getVar("PN") attempts = [] if name != '' and pn: - attempts.append("SRCREV_%s_pn-%s" % (name, pn)) + attempts.append("SRCREV_%s:pn-%s" % (name, pn)) if name != '': attempts.append("SRCREV_%s" % name) if pn: - attempts.append("SRCREV_pn-%s" % pn) + attempts.append("SRCREV:pn-%s" % pn) attempts.append("SRCREV") for a in attempts: @@ -1174,6 +1244,7 @@ def srcrev_internal_helper(ud, d, name): if srcrev == "INVALID" or not srcrev: raise FetchError("Please set a valid SRCREV for url %s (possible key names are %s, or use a ;rev=X URL parameter)" % (str(attempts), ud.url), ud.url) if srcrev == "AUTOINC": + d.setVar("__BBAUTOREV_ACTED_UPON", True) srcrev = ud.method.latest_revision(ud, d, name) return srcrev @@ -1185,25 +1256,21 @@ def get_checksum_file_list(d): SRC_URI as a space-separated string """ fetch = Fetch([], d, cache = False, localonly = True) - - dl_dir = d.getVar('DL_DIR') filelist = [] for u in fetch.urls: ud = fetch.ud[u] - if ud and isinstance(ud.method, local.Local): - paths = ud.method.localpaths(ud, d) + found = False + paths = ud.method.localfile_searchpaths(ud, d) for f in paths: pth = ud.decodedurl - if '*' in pth: - f = os.path.join(os.path.abspath(f), pth) - if f.startswith(dl_dir): - # The local fetcher's behaviour is to return a path under DL_DIR if it couldn't find the file anywhere else - if os.path.exists(f): - bb.warn("Getting checksum for %s SRC_URI entry %s: file not found except in DL_DIR" % (d.getVar('PN'), os.path.basename(f))) - else: - bb.warn("Unable to get checksum for %s SRC_URI entry %s: file could not be found" % (d.getVar('PN'), os.path.basename(f))) + if os.path.exists(f): + found = True filelist.append(f + ":" + str(os.path.exists(f))) + if not found: + bb.fatal(("Unable to get checksum for %s SRC_URI entry %s: file could not be found" + "\nThe following paths were searched:" + "\n%s") % (d.getVar('PN'), os.path.basename(f), '\n'.join(paths))) return " ".join(filelist) @@ -1250,18 +1317,13 @@ class FetchData(object): if checksum_name in self.parm: checksum_expected = self.parm[checksum_name] - elif self.type not in ["http", "https", "ftp", "ftps", "sftp", "s3"]: + elif self.type not in ["http", "https", "ftp", "ftps", "sftp", "s3", "az", "crate", "gs"]: checksum_expected = None else: checksum_expected = d.getVarFlag("SRC_URI", checksum_name) setattr(self, "%s_expected" % checksum_id, checksum_expected) - for checksum_id in CHECKSUM_LIST: - configure_checksum(checksum_id) - - self.ignore_checksums = False - self.names = self.parm.get("name",'default').split(',') self.method = None @@ -1283,6 +1345,11 @@ class FetchData(object): if hasattr(self.method, "urldata_init"): self.method.urldata_init(self, d) + for checksum_id in CHECKSUM_LIST: + configure_checksum(checksum_id) + + self.ignore_checksums = False + if "localpath" in self.parm: # if user sets localpath for file, use it instead. self.localpath = self.parm["localpath"] @@ -1362,12 +1429,12 @@ class FetchMethod(object): Is localpath something that can be represented by a checksum? """ + # We cannot compute checksums for None + if urldata.localpath is None: + return False # We cannot compute checksums for directories if os.path.isdir(urldata.localpath): return False - if urldata.localpath.find("*") != -1: - return False - return True def recommends_checksum(self, urldata): @@ -1377,6 +1444,12 @@ class FetchMethod(object): """ return False + def cleanup_upon_failure(self): + """ + When a fetch fails, should clean() be called? + """ + return True + def verify_donestamp(self, ud, d): """ Verify the donestamp file @@ -1430,11 +1503,6 @@ class FetchMethod(object): iterate = False file = urldata.localpath - # Localpath can't deal with 'dir/*' entries, so it converts them to '.', - # but it must be corrected back for local files copying - if urldata.basename == '*' and file.endswith('/.'): - file = '%s/%s' % (file.rstrip('/.'), urldata.path) - try: unpack = bb.utils.to_boolean(urldata.parm.get('unpack'), True) except ValueError as exc: @@ -1449,28 +1517,35 @@ class FetchMethod(object): cmd = None if unpack: + tar_cmd = 'tar --extract --no-same-owner' + if 'striplevel' in urldata.parm: + tar_cmd += ' --strip-components=%s' % urldata.parm['striplevel'] if file.endswith('.tar'): - cmd = 'tar x --no-same-owner -f %s' % file + cmd = '%s -f %s' % (tar_cmd, file) elif file.endswith('.tgz') or file.endswith('.tar.gz') or file.endswith('.tar.Z'): - cmd = 'tar xz --no-same-owner -f %s' % file + cmd = '%s -z -f %s' % (tar_cmd, file) elif file.endswith('.tbz') or file.endswith('.tbz2') or file.endswith('.tar.bz2'): - cmd = 'bzip2 -dc %s | tar x --no-same-owner -f -' % file + cmd = 'bzip2 -dc %s | %s -f -' % (file, tar_cmd) elif file.endswith('.gz') or file.endswith('.Z') or file.endswith('.z'): cmd = 'gzip -dc %s > %s' % (file, efile) elif file.endswith('.bz2'): cmd = 'bzip2 -dc %s > %s' % (file, efile) elif file.endswith('.txz') or file.endswith('.tar.xz'): - cmd = 'xz -dc %s | tar x --no-same-owner -f -' % file + cmd = 'xz -dc %s | %s -f -' % (file, tar_cmd) elif file.endswith('.xz'): cmd = 'xz -dc %s > %s' % (file, efile) elif file.endswith('.tar.lz'): - cmd = 'lzip -dc %s | tar x --no-same-owner -f -' % file + cmd = 'lzip -dc %s | %s -f -' % (file, tar_cmd) elif file.endswith('.lz'): cmd = 'lzip -dc %s > %s' % (file, efile) elif file.endswith('.tar.7z'): - cmd = '7z x -so %s | tar x --no-same-owner -f -' % file + cmd = '7z x -so %s | %s -f -' % (file, tar_cmd) elif file.endswith('.7z'): cmd = '7za x -y %s 1>/dev/null' % file + elif file.endswith('.tzst') or file.endswith('.tar.zst'): + cmd = 'zstd --decompress --stdout %s | %s -f -' % (file, tar_cmd) + elif file.endswith('.zst'): + cmd = 'zstd --decompress --stdout %s > %s' % (file, efile) elif file.endswith('.zip') or file.endswith('.jar'): try: dos = bb.utils.to_boolean(urldata.parm.get('dos'), False) @@ -1501,7 +1576,7 @@ class FetchMethod(object): raise UnpackError("Unable to unpack deb/ipk package - does not contain data.tar.* file", urldata.url) else: raise UnpackError("Unable to unpack deb/ipk package - could not list contents", urldata.url) - cmd = 'ar x %s %s && tar --no-same-owner -xpf %s && rm %s' % (file, datafile, datafile, datafile) + cmd = 'ar x %s %s && %s -p -f %s && rm %s' % (file, datafile, tar_cmd, datafile, datafile) # If 'subdir' param exists, create a dir and use it as destination for unpack cmd if 'subdir' in urldata.parm: @@ -1517,6 +1592,7 @@ class FetchMethod(object): unpackdir = rootdir if not unpack or not cmd: + urldata.unpack_tracer.unpack("file-copy", unpackdir) # If file == dest, then avoid any copies, as we already put the file into dest! dest = os.path.join(unpackdir, os.path.basename(file)) if file != dest and not (os.path.exists(dest) and os.path.samefile(file, dest)): @@ -1530,7 +1606,9 @@ class FetchMethod(object): if urlpath.find("/") != -1: destdir = urlpath.rsplit("/", 1)[0] + '/' bb.utils.mkdirhier("%s/%s" % (unpackdir, destdir)) - cmd = 'cp -fpPRH %s %s' % (file, destdir) + cmd = 'cp -fpPRH "%s" "%s"' % (file, destdir) + else: + urldata.unpack_tracer.unpack("archive-extract", unpackdir) if not cmd: return @@ -1613,8 +1691,6 @@ class FetchMethod(object): """ if os.path.exists(ud.localpath): return True - if ud.localpath.find("*") != -1: - return True return False def implicit_urldata(self, ud, d): @@ -1624,12 +1700,61 @@ class FetchMethod(object): """ return [] + +class DummyUnpackTracer(object): + """ + Abstract API definition for a class that traces unpacked source files back + to their respective upstream SRC_URI entries, for software composition + analysis, license compliance and detailed SBOM generation purposes. + User may load their own unpack tracer class (instead of the dummy + one) by setting the BB_UNPACK_TRACER_CLASS config parameter. + """ + def start(self, unpackdir, urldata_dict, d): + """ + Start tracing the core Fetch.unpack process, using an index to map + unpacked files to each SRC_URI entry. + This method is called by Fetch.unpack and it may receive nested calls by + gitsm and npmsw fetchers, that expand SRC_URI entries by adding implicit + URLs and by recursively calling Fetch.unpack from new (nested) Fetch + instances. + """ + return + def start_url(self, url): + """Start tracing url unpack process. + This method is called by Fetch.unpack before the fetcher-specific unpack + method starts, and it may receive nested calls by gitsm and npmsw + fetchers. + """ + return + def unpack(self, unpack_type, destdir): + """ + Set unpack_type and destdir for current url. + This method is called by the fetcher-specific unpack method after url + tracing started. + """ + return + def finish_url(self, url): + """Finish tracing url unpack process and update the file index. + This method is called by Fetch.unpack after the fetcher-specific unpack + method finished its job, and it may receive nested calls by gitsm + and npmsw fetchers. + """ + return + def complete(self): + """ + Finish tracing the Fetch.unpack process, and check if all nested + Fecth.unpack calls (if any) have been completed; if so, save collected + metadata. + """ + return + + class Fetch(object): def __init__(self, urls, d, cache = True, localonly = False, connection_cache = None): if localonly and cache: raise Exception("bb.fetch2.Fetch.__init__: cannot set cache and localonly at same time") - if len(urls) == 0: + if not urls: urls = d.getVar("SRC_URI").split() self.urls = urls self.d = d @@ -1644,10 +1769,30 @@ class Fetch(object): if key in urldata_cache: self.ud = urldata_cache[key] + # the unpack_tracer object needs to be made available to possible nested + # Fetch instances (when those are created by gitsm and npmsw fetchers) + # so we set it as a global variable + global unpack_tracer + try: + unpack_tracer + except NameError: + class_path = d.getVar("BB_UNPACK_TRACER_CLASS") + if class_path: + # use user-defined unpack tracer class + import importlib + module_name, _, class_name = class_path.rpartition(".") + module = importlib.import_module(module_name) + class_ = getattr(module, class_name) + unpack_tracer = class_() + else: + # fall back to the dummy/abstract class + unpack_tracer = DummyUnpackTracer() + for url in urls: if url not in self.ud: try: self.ud[url] = FetchData(url, d, localonly) + self.ud[url].unpack_tracer = unpack_tracer except NonLocalMethod: if localonly: self.ud[url] = None @@ -1686,6 +1831,7 @@ class Fetch(object): network = self.d.getVar("BB_NO_NETWORK") premirroronly = bb.utils.to_boolean(self.d.getVar("BB_FETCH_PREMIRRORONLY")) + checksum_missing_messages = [] for u in urls: ud = self.ud[u] ud.setup_localpath(self.d) @@ -1697,11 +1843,10 @@ class Fetch(object): try: self.d.setVar("BB_NO_NETWORK", network) - if m.verify_donestamp(ud, self.d) and not m.need_update(ud, self.d): done = True elif m.try_premirror(ud, self.d): - logger.debug(1, "Trying PREMIRRORS") + logger.debug("Trying PREMIRRORS") mirrors = mirror_from_string(self.d.getVar('PREMIRRORS')) done = m.try_mirrors(self, ud, self.d, mirrors) if done: @@ -1711,19 +1856,21 @@ class Fetch(object): m.update_donestamp(ud, self.d) except ChecksumError as e: logger.warning("Checksum failure encountered with premirror download of %s - will attempt other sources." % u) - logger.debug(1, str(e)) + logger.debug(str(e)) done = False if premirroronly: self.d.setVar("BB_NO_NETWORK", "1") firsterr = None - verified_stamp = m.verify_donestamp(ud, self.d) + verified_stamp = False + if done: + verified_stamp = m.verify_donestamp(ud, self.d) if not done and (not verified_stamp or m.need_update(ud, self.d)): try: if not trusted_network(self.d, ud.url): raise UntrustedUrl(ud.url) - logger.debug(1, "Trying Upstream") + logger.debug("Trying Upstream") m.download(ud, self.d) if hasattr(m, "build_mirror_data"): m.build_mirror_data(ud, self.d) @@ -1738,19 +1885,19 @@ class Fetch(object): except BBFetchException as e: if isinstance(e, ChecksumError): logger.warning("Checksum failure encountered with download of %s - will attempt other sources if available" % u) - logger.debug(1, str(e)) + logger.debug(str(e)) if os.path.exists(ud.localpath): rename_bad_checksum(ud, e.checksum) elif isinstance(e, NoChecksumError): raise else: logger.warning('Failed to fetch URL %s, attempting MIRRORS if available' % u) - logger.debug(1, str(e)) + logger.debug(str(e)) firsterr = e # Remove any incomplete fetch - if not verified_stamp: + if not verified_stamp and m.cleanup_upon_failure(): m.clean(ud, self.d) - logger.debug(1, "Trying MIRRORS") + logger.debug("Trying MIRRORS") mirrors = mirror_from_string(self.d.getVar('MIRRORS')) done = m.try_mirrors(self, ud, self.d, mirrors) @@ -1767,17 +1914,28 @@ class Fetch(object): raise ChecksumError("Stale Error Detected") except BBFetchException as e: - if isinstance(e, ChecksumError): + if isinstance(e, NoChecksumError): + (message, _) = e.args + checksum_missing_messages.append(message) + continue + elif isinstance(e, ChecksumError): logger.error("Checksum failure fetching %s" % u) raise finally: if ud.lockfile: bb.utils.unlockfile(lf) + if checksum_missing_messages: + logger.error("Missing SRC_URI checksum, please add those to the recipe: \n%s", "\n".join(checksum_missing_messages)) + raise BBFetchException("There was some missing checksums in the recipe") def checkstatus(self, urls=None): """ - Check all urls exist upstream + Check all URLs exist upstream. + + Returns None if the URLs exist, raises FetchError if the check wasn't + successful but there wasn't an error (such as file not found), and + raises other exceptions in error cases. """ if not urls: @@ -1787,7 +1945,7 @@ class Fetch(object): ud = self.ud[u] ud.setup_localpath(self.d) m = ud.method - logger.debug(1, "Testing URL %s", u) + logger.debug("Testing URL %s", u) # First try checking uri, u, from PREMIRRORS mirrors = mirror_from_string(self.d.getVar('PREMIRRORS')) ret = m.try_mirrors(self, ud, self.d, mirrors, True) @@ -1800,7 +1958,7 @@ class Fetch(object): ret = m.try_mirrors(self, ud, self.d, mirrors, True) if not ret: - raise FetchError("URL %s doesn't work" % u, u) + raise FetchError("URL doesn't work", u) def unpack(self, root, urls=None): """ @@ -1810,6 +1968,8 @@ class Fetch(object): if not urls: urls = self.urls + unpack_tracer.start(root, self.ud, self.d) + for u in urls: ud = self.ud[u] ud.setup_localpath(self.d) @@ -1817,11 +1977,15 @@ class Fetch(object): if ud.lockfile: lf = bb.utils.lockfile(ud.lockfile) + unpack_tracer.start_url(u) ud.method.unpack(ud, root, self.d) + unpack_tracer.finish_url(u) if ud.lockfile: bb.utils.unlockfile(lf) + unpack_tracer.complete() + def clean(self, urls=None): """ Clean files that the fetcher gets or places @@ -1921,6 +2085,9 @@ from . import repo from . import clearcase from . import npm from . import npmsw +from . import az +from . import crate +from . import gcp methods.append(local.Local()) methods.append(wget.Wget()) @@ -1940,3 +2107,6 @@ methods.append(repo.Repo()) methods.append(clearcase.ClearCase()) methods.append(npm.Npm()) methods.append(npmsw.NpmShrinkWrap()) +methods.append(az.Az()) +methods.append(crate.Crate()) +methods.append(gcp.GCP()) diff --git a/lib/bb/fetch2/az.py b/lib/bb/fetch2/az.py new file mode 100644 index 000000000..3ccc594c2 --- /dev/null +++ b/lib/bb/fetch2/az.py @@ -0,0 +1,93 @@ +""" +BitBake 'Fetch' Azure Storage implementation + +""" + +# Copyright (C) 2021 Alejandro Hernandez Samaniego +# +# Based on bb.fetch2.wget: +# Copyright (C) 2003, 2004 Chris Larson +# +# SPDX-License-Identifier: GPL-2.0-only +# +# Based on functions from the base bb module, Copyright 2003 Holger Schurig + +import shlex +import os +import bb +from bb.fetch2 import FetchError +from bb.fetch2 import logger +from bb.fetch2.wget import Wget + + +class Az(Wget): + + def supports(self, ud, d): + """ + Check to see if a given url can be fetched from Azure Storage + """ + return ud.type in ['az'] + + + def checkstatus(self, fetch, ud, d, try_again=True): + + # checkstatus discards parameters either way, we need to do this before adding the SAS + ud.url = ud.url.replace('az://','https://').split(';')[0] + + az_sas = d.getVar('AZ_SAS') + if az_sas and az_sas not in ud.url: + ud.url += az_sas + + return Wget.checkstatus(self, fetch, ud, d, try_again) + + # Override download method, include retries + def download(self, ud, d, retries=3): + """Fetch urls""" + + # If were reaching the account transaction limit we might be refused a connection, + # retrying allows us to avoid false negatives since the limit changes over time + fetchcmd = self.basecmd + ' --retry-connrefused --waitretry=5' + + # We need to provide a localpath to avoid wget using the SAS + # ud.localfile either has the downloadfilename or ud.path + localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + bb.utils.mkdirhier(os.path.dirname(localpath)) + fetchcmd += " -O %s" % shlex.quote(localpath) + + + if ud.user and ud.pswd: + fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd) + + # Check if a Shared Access Signature was given and use it + az_sas = d.getVar('AZ_SAS') + + if az_sas: + azuri = '%s%s%s%s' % ('https://', ud.host, ud.path, az_sas) + else: + azuri = '%s%s%s' % ('https://', ud.host, ud.path) + + if os.path.exists(ud.localpath): + # file exists, but we didnt complete it.. trying again. + fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % azuri) + else: + fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % azuri) + + try: + self._runwget(ud, d, fetchcmd, False) + except FetchError as e: + # Azure fails on handshake sometimes when using wget after some stress, producing a + # FetchError from the fetcher, if the artifact exists retyring should succeed + if 'Unable to establish SSL connection' in str(e): + logger.debug2('Unable to establish SSL connection: Retries remaining: %s, Retrying...' % retries) + self.download(ud, d, retries -1) + + # Sanity check since wget can pretend it succeed when it didn't + # Also, this used to happen if sourceforge sent us to the mirror page + if not os.path.exists(ud.localpath): + raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (azuri, ud.localpath), azuri) + + if os.path.getsize(ud.localpath) == 0: + os.remove(ud.localpath) + raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (azuri), azuri) + + return True diff --git a/lib/bb/fetch2/bzr.py b/lib/bb/fetch2/bzr.py index 566ace9f0..fc558f50b 100644 --- a/lib/bb/fetch2/bzr.py +++ b/lib/bb/fetch2/bzr.py @@ -74,16 +74,16 @@ class Bzr(FetchMethod): if os.access(os.path.join(ud.pkgdir, os.path.basename(ud.pkgdir), '.bzr'), os.R_OK): bzrcmd = self._buildbzrcommand(ud, d, "update") - logger.debug(1, "BZR Update %s", ud.url) + logger.debug("BZR Update %s", ud.url) bb.fetch2.check_network_access(d, bzrcmd, ud.url) runfetchcmd(bzrcmd, d, workdir=os.path.join(ud.pkgdir, os.path.basename(ud.path))) else: bb.utils.remove(os.path.join(ud.pkgdir, os.path.basename(ud.pkgdir)), True) bzrcmd = self._buildbzrcommand(ud, d, "fetch") bb.fetch2.check_network_access(d, bzrcmd, ud.url) - logger.debug(1, "BZR Checkout %s", ud.url) + logger.debug("BZR Checkout %s", ud.url) bb.utils.mkdirhier(ud.pkgdir) - logger.debug(1, "Running %s", bzrcmd) + logger.debug("Running %s", bzrcmd) runfetchcmd(bzrcmd, d, workdir=ud.pkgdir) scmdata = ud.parm.get("scmdata", "") @@ -109,7 +109,7 @@ class Bzr(FetchMethod): """ Return the latest upstream revision number """ - logger.debug(2, "BZR fetcher hitting network for %s", ud.url) + logger.debug2("BZR fetcher hitting network for %s", ud.url) bb.fetch2.check_network_access(d, self._buildbzrcommand(ud, d, "revno"), ud.url) diff --git a/lib/bb/fetch2/clearcase.py b/lib/bb/fetch2/clearcase.py index 49d7ae1b0..1a9c86376 100644 --- a/lib/bb/fetch2/clearcase.py +++ b/lib/bb/fetch2/clearcase.py @@ -70,7 +70,7 @@ class ClearCase(FetchMethod): return ud.type in ['ccrc'] def debug(self, msg): - logger.debug(1, "ClearCase: %s", msg) + logger.debug("ClearCase: %s", msg) def urldata_init(self, ud, d): """ diff --git a/lib/bb/fetch2/crate.py b/lib/bb/fetch2/crate.py new file mode 100644 index 000000000..e611736f0 --- /dev/null +++ b/lib/bb/fetch2/crate.py @@ -0,0 +1,150 @@ +# ex:ts=4:sw=4:sts=4:et +# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- +""" +BitBake 'Fetch' implementation for crates.io +""" + +# Copyright (C) 2016 Doug Goldstein +# +# SPDX-License-Identifier: GPL-2.0-only +# +# Based on functions from the base bb module, Copyright 2003 Holger Schurig + +import hashlib +import json +import os +import subprocess +import bb +from bb.fetch2 import logger, subprocess_setup, UnpackError +from bb.fetch2.wget import Wget + + +class Crate(Wget): + + """Class to fetch crates via wget""" + + def _cargo_bitbake_path(self, rootdir): + return os.path.join(rootdir, "cargo_home", "bitbake") + + def supports(self, ud, d): + """ + Check to see if a given url is for this fetcher + """ + return ud.type in ['crate'] + + def recommends_checksum(self, urldata): + return True + + def urldata_init(self, ud, d): + """ + Sets up to download the respective crate from crates.io + """ + + if ud.type == 'crate': + self._crate_urldata_init(ud, d) + + super(Crate, self).urldata_init(ud, d) + + def _crate_urldata_init(self, ud, d): + """ + Sets up the download for a crate + """ + + # URL syntax is: crate://NAME/VERSION + # break the URL apart by / + parts = ud.url.split('/') + if len(parts) < 5: + raise bb.fetch2.ParameterError("Invalid URL: Must be crate://HOST/NAME/VERSION", ud.url) + + # version is expected to be the last token + # but ignore possible url parameters which will be used + # by the top fetcher class + version = parts[-1].split(";")[0] + # second to last field is name + name = parts[-2] + # host (this is to allow custom crate registries to be specified + host = '/'.join(parts[2:-2]) + + # if using upstream just fix it up nicely + if host == 'crates.io': + host = 'crates.io/api/v1/crates' + + ud.url = "https://%s/%s/%s/download" % (host, name, version) + ud.versionsurl = "https://%s/%s/versions" % (host, name) + ud.parm['downloadfilename'] = "%s-%s.crate" % (name, version) + if 'name' not in ud.parm: + ud.parm['name'] = '%s-%s' % (name, version) + + logger.debug2("Fetching %s to %s" % (ud.url, ud.parm['downloadfilename'])) + + def unpack(self, ud, rootdir, d): + """ + Uses the crate to build the necessary paths for cargo to utilize it + """ + if ud.type == 'crate': + return self._crate_unpack(ud, rootdir, d) + else: + super(Crate, self).unpack(ud, rootdir, d) + + def _crate_unpack(self, ud, rootdir, d): + """ + Unpacks a crate + """ + thefile = ud.localpath + + # possible metadata we need to write out + metadata = {} + + # change to the rootdir to unpack but save the old working dir + save_cwd = os.getcwd() + os.chdir(rootdir) + + bp = d.getVar('BP') + if bp == ud.parm.get('name'): + cmd = "tar -xz --no-same-owner -f %s" % thefile + ud.unpack_tracer.unpack("crate-extract", rootdir) + else: + cargo_bitbake = self._cargo_bitbake_path(rootdir) + ud.unpack_tracer.unpack("cargo-extract", cargo_bitbake) + + cmd = "tar -xz --no-same-owner -f %s -C %s" % (thefile, cargo_bitbake) + + # ensure we've got these paths made + bb.utils.mkdirhier(cargo_bitbake) + + # generate metadata necessary + with open(thefile, 'rb') as f: + # get the SHA256 of the original tarball + tarhash = hashlib.sha256(f.read()).hexdigest() + + metadata['files'] = {} + metadata['package'] = tarhash + + path = d.getVar('PATH') + if path: + cmd = "PATH=\"%s\" %s" % (path, cmd) + bb.note("Unpacking %s to %s/" % (thefile, os.getcwd())) + + ret = subprocess.call(cmd, preexec_fn=subprocess_setup, shell=True) + + os.chdir(save_cwd) + + if ret != 0: + raise UnpackError("Unpack command %s failed with return value %s" % (cmd, ret), ud.url) + + # if we have metadata to write out.. + if len(metadata) > 0: + cratepath = os.path.splitext(os.path.basename(thefile))[0] + bbpath = self._cargo_bitbake_path(rootdir) + mdfile = '.cargo-checksum.json' + mdpath = os.path.join(bbpath, cratepath, mdfile) + with open(mdpath, "w") as f: + json.dump(metadata, f) + + def latest_versionstring(self, ud, d): + from functools import cmp_to_key + json_data = json.loads(self._fetch_index(ud.versionsurl, ud, d)) + versions = [(0, i["num"], "") for i in json_data["versions"]] + versions = sorted(versions, key=cmp_to_key(bb.utils.vercmp)) + + return (versions[-1][1], "") diff --git a/lib/bb/fetch2/cvs.py b/lib/bb/fetch2/cvs.py index 22abdef79..01de5ff4c 100644 --- a/lib/bb/fetch2/cvs.py +++ b/lib/bb/fetch2/cvs.py @@ -109,7 +109,7 @@ class Cvs(FetchMethod): cvsupdatecmd = "CVS_RSH=\"%s\" %s" % (cvs_rsh, cvsupdatecmd) # create module directory - logger.debug(2, "Fetch: checking for module directory") + logger.debug2("Fetch: checking for module directory") moddir = os.path.join(ud.pkgdir, localdir) workdir = None if os.access(os.path.join(moddir, 'CVS'), os.R_OK): @@ -123,7 +123,7 @@ class Cvs(FetchMethod): # check out sources there bb.utils.mkdirhier(ud.pkgdir) workdir = ud.pkgdir - logger.debug(1, "Running %s", cvscmd) + logger.debug("Running %s", cvscmd) bb.fetch2.check_network_access(d, cvscmd, ud.url) cmd = cvscmd diff --git a/lib/bb/fetch2/gcp.py b/lib/bb/fetch2/gcp.py new file mode 100644 index 000000000..eb3e0c6a6 --- /dev/null +++ b/lib/bb/fetch2/gcp.py @@ -0,0 +1,102 @@ +""" +BitBake 'Fetch' implementation for Google Cloup Platform Storage. + +Class for fetching files from Google Cloud Storage using the +Google Cloud Storage Python Client. The GCS Python Client must +be correctly installed, configured and authenticated prior to use. +Additionally, gsutil must also be installed. + +""" + +# Copyright (C) 2023, Snap Inc. +# +# Based in part on bb.fetch2.s3: +# Copyright (C) 2017 Andre McCurdy +# +# SPDX-License-Identifier: GPL-2.0-only +# +# Based on functions from the base bb module, Copyright 2003 Holger Schurig + +import os +import bb +import urllib.parse, urllib.error +from bb.fetch2 import FetchMethod +from bb.fetch2 import FetchError +from bb.fetch2 import logger +from bb.fetch2 import runfetchcmd + +class GCP(FetchMethod): + """ + Class to fetch urls via GCP's Python API. + """ + def __init__(self): + self.gcp_client = None + + def supports(self, ud, d): + """ + Check to see if a given url can be fetched with GCP. + """ + return ud.type in ['gs'] + + def recommends_checksum(self, urldata): + return True + + def urldata_init(self, ud, d): + if 'downloadfilename' in ud.parm: + ud.basename = ud.parm['downloadfilename'] + else: + ud.basename = os.path.basename(ud.path) + + ud.localfile = d.expand(urllib.parse.unquote(ud.basename)) + ud.basecmd = "gsutil stat" + + def get_gcp_client(self): + from google.cloud import storage + self.gcp_client = storage.Client(project=None) + + def download(self, ud, d): + """ + Fetch urls using the GCP API. + Assumes localpath was called first. + """ + logger.debug2(f"Trying to download gs://{ud.host}{ud.path} to {ud.localpath}") + if self.gcp_client is None: + self.get_gcp_client() + + bb.fetch2.check_network_access(d, ud.basecmd, f"gs://{ud.host}{ud.path}") + runfetchcmd("%s %s" % (ud.basecmd, f"gs://{ud.host}{ud.path}"), d) + + # Path sometimes has leading slash, so strip it + path = ud.path.lstrip("/") + blob = self.gcp_client.bucket(ud.host).blob(path) + blob.download_to_filename(ud.localpath) + + # Additional sanity checks copied from the wget class (although there + # are no known issues which mean these are required, treat the GCP API + # tool with a little healthy suspicion). + if not os.path.exists(ud.localpath): + raise FetchError(f"The GCP API returned success for gs://{ud.host}{ud.path} but {ud.localpath} doesn't exist?!") + + if os.path.getsize(ud.localpath) == 0: + os.remove(ud.localpath) + raise FetchError(f"The downloaded file for gs://{ud.host}{ud.path} resulted in a zero size file?! Deleting and failing since this isn't right.") + + return True + + def checkstatus(self, fetch, ud, d): + """ + Check the status of a URL. + """ + logger.debug2(f"Checking status of gs://{ud.host}{ud.path}") + if self.gcp_client is None: + self.get_gcp_client() + + bb.fetch2.check_network_access(d, ud.basecmd, f"gs://{ud.host}{ud.path}") + runfetchcmd("%s %s" % (ud.basecmd, f"gs://{ud.host}{ud.path}"), d) + + # Path sometimes has leading slash, so strip it + path = ud.path.lstrip("/") + if self.gcp_client.bucket(ud.host).blob(path).exists() == False: + raise FetchError(f"The GCP API reported that gs://{ud.host}{ud.path} does not exist") + else: + return True diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py index 644ba9238..c7ff769fd 100644 --- a/lib/bb/fetch2/git.py +++ b/lib/bb/fetch2/git.py @@ -44,13 +44,27 @@ Supported SRC_URI options are: - nobranch Don't check the SHA validation for branch. set this option for the recipe - referring to commit which is valid in tag instead of branch. + referring to commit which is valid in any namespace (branch, tag, ...) + instead of branch. The default is "0", set nobranch=1 if needed. +- subpath + Limit the checkout to a specific subpath of the tree. + By default, checkout the whole tree, set subpath=<path> if needed + +- destsuffix + The name of the path in which to place the checkout. + By default, the path is git/, set destsuffix=<suffix> if needed + - usehead For local git:// urls to use the current branch HEAD as the revision for use with AUTOREV. Implies nobranch. +- lfs + Enable the checkout to use LFS for large files. This will download all LFS files + in the download step, as the unpack step does not have network access. + The default is "1", set lfs=0 to skip. + """ # Copyright (C) 2005 Richard Purdie @@ -63,14 +77,21 @@ import errno import fnmatch import os import re +import shlex +import shutil import subprocess import tempfile import bb import bb.progress +from contextlib import contextmanager from bb.fetch2 import FetchMethod from bb.fetch2 import runfetchcmd from bb.fetch2 import logger +from bb.fetch2 import trusted_network + +sha1_re = re.compile(r'^[0-9a-f]{40}$') +slash_re = re.compile(r"/+") class GitProgressHandler(bb.progress.LineFilterProgressHandler): """Extract progress information from git output""" @@ -129,6 +150,9 @@ class Git(FetchMethod): def supports_checksum(self, urldata): return False + def cleanup_upon_failure(self): + return False + def urldata_init(self, ud, d): """ init git specific variable within url data @@ -140,6 +164,11 @@ class Git(FetchMethod): ud.proto = 'file' else: ud.proto = "git" + if ud.host == "github.com" and ud.proto == "git": + # github stopped supporting git protocol + # https://github.blog/2021-09-01-improving-git-protocol-security-github/#no-more-unauthenticated-git + ud.proto = "https" + bb.warn("URL: %s uses git protocol which is no longer supported by github. Please change to ;protocol=https in the url." % ud.url) if not ud.proto in ('git', 'file', 'ssh', 'http', 'https', 'rsync'): raise bb.fetch2.ParameterError("Invalid protocol type", ud.url) @@ -163,11 +192,18 @@ class Git(FetchMethod): ud.nocheckout = 1 ud.unresolvedrev = {} - branches = ud.parm.get("branch", "master").split(',') + branches = ud.parm.get("branch", "").split(',') + if branches == [""] and not ud.nobranch: + bb.warn("URL: %s does not set any branch parameter. The future default branch used by tools and repositories is uncertain and we will therefore soon require this is set in all git urls." % ud.url) + branches = ["master"] if len(branches) != len(ud.names): raise bb.fetch2.ParameterError("The number of name and branch parameters is not balanced", ud.url) - ud.cloneflags = "-s -n" + ud.noshared = d.getVar("BB_GIT_NOSHARED") == "1" + + ud.cloneflags = "-n" + if not ud.noshared: + ud.cloneflags += " -s" if ud.bareclone: ud.cloneflags += " --mirror" @@ -219,9 +255,14 @@ class Git(FetchMethod): ud.shallow = False if ud.usehead: - ud.unresolvedrev['default'] = 'HEAD' + # When usehead is set let's associate 'HEAD' with the unresolved + # rev of this repository. This will get resolved into a revision + # later. If an actual revision happens to have also been provided + # then this setting will be overridden. + for name in ud.names: + ud.unresolvedrev[name] = 'HEAD' - ud.basecmd = d.getVar("FETCHCMD_git") or "git -c core.fsyncobjectfiles=0" + ud.basecmd = d.getVar("FETCHCMD_git") or "git -c gc.autoDetach=false -c core.pager=cat -c safe.bareRepository=all" write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0" ud.write_tarballs = write_tarballs != "0" or ud.rebaseable @@ -230,20 +271,20 @@ class Git(FetchMethod): ud.setup_revisions(d) for name in ud.names: - # Ensure anything that doesn't look like a sha256 checksum/revision is translated into one - if not ud.revisions[name] or len(ud.revisions[name]) != 40 or (False in [c in "abcdef0123456789" for c in ud.revisions[name]]): + # Ensure any revision that doesn't look like a SHA-1 is translated into one + if not sha1_re.match(ud.revisions[name] or ''): if ud.revisions[name]: ud.unresolvedrev[name] = ud.revisions[name] ud.revisions[name] = self.latest_revision(ud, d, name) - gitsrcname = '%s%s' % (ud.host.replace(':', '.'), ud.path.replace('/', '.').replace('*', '.')) + gitsrcname = '%s%s' % (ud.host.replace(':', '.'), ud.path.replace('/', '.').replace('*', '.').replace(' ','_').replace('(', '_').replace(')', '_')) if gitsrcname.startswith('.'): gitsrcname = gitsrcname[1:] - # for rebaseable git repo, it is necessary to keep mirror tar ball - # per revision, so that even the revision disappears from the + # For a rebaseable git repo, it is necessary to keep a mirror tar ball + # per revision, so that even if the revision disappears from the # upstream repo in the future, the mirror will remain intact and still - # contains the revision + # contain the revision if ud.rebaseable: for name in ud.names: gitsrcname = gitsrcname + '_' + ud.revisions[name] @@ -287,7 +328,10 @@ class Git(FetchMethod): return ud.clonedir def need_update(self, ud, d): - return self.clonedir_need_update(ud, d) or self.shallow_tarball_need_update(ud) or self.tarball_need_update(ud) + return self.clonedir_need_update(ud, d) \ + or self.shallow_tarball_need_update(ud) \ + or self.tarball_need_update(ud) \ + or self.lfs_need_update(ud, d) def clonedir_need_update(self, ud, d): if not os.path.exists(ud.clonedir): @@ -299,6 +343,15 @@ class Git(FetchMethod): return True return False + def lfs_need_update(self, ud, d): + if self.clonedir_need_update(ud, d): + return True + + for name in ud.names: + if not self._lfs_objects_downloaded(ud, d, name, ud.clonedir): + return True + return False + def clonedir_need_shallow_revs(self, ud, d): for rev in ud.shallow_revs: try: @@ -318,6 +371,16 @@ class Git(FetchMethod): # is not possible if bb.utils.to_boolean(d.getVar("BB_FETCH_PREMIRRORONLY")): return True + # If the url is not in trusted network, that is, BB_NO_NETWORK is set to 0 + # and BB_ALLOWED_NETWORKS does not contain the host that ud.url uses, then + # we need to try premirrors first as using upstream is destined to fail. + if not trusted_network(d, ud.url): + return True + # the following check is to ensure incremental fetch in downloads, this is + # because the premirror might be old and does not contain the new rev required, + # and this will cause a total removal and new clone. So if we can reach to + # network, we prefer upstream over premirror, though the premirror might contain + # the new rev. if os.path.exists(ud.clonedir): return False return True @@ -331,18 +394,55 @@ class Git(FetchMethod): if ud.shallow and os.path.exists(ud.fullshallow) and self.need_update(ud, d): ud.localpath = ud.fullshallow return - elif os.path.exists(ud.fullmirror) and not os.path.exists(ud.clonedir): - bb.utils.mkdirhier(ud.clonedir) - runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=ud.clonedir) - + elif os.path.exists(ud.fullmirror) and self.need_update(ud, d): + if not os.path.exists(ud.clonedir): + bb.utils.mkdirhier(ud.clonedir) + runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=ud.clonedir) + else: + tmpdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR')) + runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=tmpdir) + output = runfetchcmd("%s remote" % ud.basecmd, d, quiet=True, workdir=ud.clonedir) + if 'mirror' in output: + runfetchcmd("%s remote rm mirror" % ud.basecmd, d, workdir=ud.clonedir) + runfetchcmd("%s remote add --mirror=fetch mirror %s" % (ud.basecmd, tmpdir), d, workdir=ud.clonedir) + fetch_cmd = "LANG=C %s fetch -f --update-head-ok --progress mirror " % (ud.basecmd) + runfetchcmd(fetch_cmd, d, workdir=ud.clonedir) repourl = self._get_repo_url(ud) + needs_clone = False + if os.path.exists(ud.clonedir): + # The directory may exist, but not be the top level of a bare git + # repository in which case it needs to be deleted and re-cloned. + try: + # Since clones can be bare, use --absolute-git-dir instead of --show-toplevel + output = runfetchcmd("LANG=C %s rev-parse --absolute-git-dir" % ud.basecmd, d, workdir=ud.clonedir) + toplevel = output.rstrip() + + if not bb.utils.path_is_descendant(toplevel, ud.clonedir): + logger.warning("Top level directory '%s' is not a descendant of '%s'. Re-cloning", toplevel, ud.clonedir) + needs_clone = True + except bb.fetch2.FetchError as e: + logger.warning("Unable to get top level for %s (not a git directory?): %s", ud.clonedir, e) + needs_clone = True + except FileNotFoundError as e: + logger.warning("%s", e) + needs_clone = True + + if needs_clone: + shutil.rmtree(ud.clonedir) + else: + needs_clone = True + # If the repo still doesn't exist, fallback to cloning it - if not os.path.exists(ud.clonedir): - # We do this since git will use a "-l" option automatically for local urls where possible + if needs_clone: + # We do this since git will use a "-l" option automatically for local urls where possible, + # but it doesn't work when git/objects is a symlink, only works when it is a directory. if repourl.startswith("file://"): - repourl = repourl[7:] - clone_cmd = "LANG=C %s clone --bare --mirror %s %s --progress" % (ud.basecmd, repourl, ud.clonedir) + repourl_path = repourl[7:] + objects = os.path.join(repourl_path, 'objects') + if os.path.isdir(objects) and not os.path.islink(objects): + repourl = repourl_path + clone_cmd = "LANG=C %s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir) if ud.proto.lower() != 'file': bb.fetch2.check_network_access(d, clone_cmd, ud.url) progresshandler = GitProgressHandler(d) @@ -354,8 +454,12 @@ class Git(FetchMethod): if "origin" in output: runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir) - runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, repourl), d, workdir=ud.clonedir) - fetch_cmd = "LANG=C %s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, repourl) + runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir) + + if ud.nobranch: + fetch_cmd = "LANG=C %s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl)) + else: + fetch_cmd = "LANG=C %s fetch -f --progress %s refs/heads/*:refs/heads/* refs/tags/*:refs/tags/*" % (ud.basecmd, shlex.quote(repourl)) if ud.proto.lower() != 'file': bb.fetch2.check_network_access(d, fetch_cmd, ud.url) progresshandler = GitProgressHandler(d) @@ -378,7 +482,47 @@ class Git(FetchMethod): if missing_rev: raise bb.fetch2.FetchError("Unable to find revision %s even from upstream" % missing_rev) + if self.lfs_need_update(ud, d): + # Unpack temporary working copy, use it to run 'git checkout' to force pre-fetching + # of all LFS blobs needed at the srcrev. + # + # It would be nice to just do this inline here by running 'git-lfs fetch' + # on the bare clonedir, but that operation requires a working copy on some + # releases of Git LFS. + with tempfile.TemporaryDirectory(dir=d.getVar('DL_DIR')) as tmpdir: + # Do the checkout. This implicitly involves a Git LFS fetch. + Git.unpack(self, ud, tmpdir, d) + + # Scoop up a copy of any stuff that Git LFS downloaded. Merge them into + # the bare clonedir. + # + # As this procedure is invoked repeatedly on incremental fetches as + # a recipe's SRCREV is bumped throughout its lifetime, this will + # result in a gradual accumulation of LFS blobs in <ud.clonedir>/lfs + # corresponding to all the blobs reachable from the different revs + # fetched across time. + # + # Only do this if the unpack resulted in a .git/lfs directory being + # created; this only happens if at least one blob needed to be + # downloaded. + if os.path.exists(os.path.join(ud.destdir, ".git", "lfs")): + runfetchcmd("tar -cf - lfs | tar -xf - -C %s" % ud.clonedir, d, workdir="%s/.git" % ud.destdir) + def build_mirror_data(self, ud, d): + + # Create as a temp file and move atomically into position to avoid races + @contextmanager + def create_atomic(filename): + fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename)) + try: + yield tfile + umask = os.umask(0o666) + os.umask(umask) + os.chmod(tfile, (0o666 & ~umask)) + os.rename(tfile, filename) + finally: + os.close(fd) + if ud.shallow and ud.write_shallow_tarballs: if not os.path.exists(ud.fullshallow): if os.path.islink(ud.fullshallow): @@ -389,7 +533,8 @@ class Git(FetchMethod): self.clone_shallow_local(ud, shallowclone, d) logger.info("Creating tarball of git repository") - runfetchcmd("tar -czf %s ." % ud.fullshallow, d, workdir=shallowclone) + with create_atomic(ud.fullshallow) as tfile: + runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone) runfetchcmd("touch %s.done" % ud.fullshallow, d) finally: bb.utils.remove(tempdir, recurse=True) @@ -398,7 +543,11 @@ class Git(FetchMethod): os.unlink(ud.fullmirror) logger.info("Creating tarball of git repository") - runfetchcmd("tar -czf %s ." % ud.fullmirror, d, workdir=ud.clonedir) + with create_atomic(ud.fullmirror) as tfile: + mtime = runfetchcmd("{} log --all -1 --format=%cD".format(ud.basecmd), d, + quiet=True, workdir=ud.clonedir) + runfetchcmd("tar -czf %s --owner oe:0 --group oe:0 --mtime \"%s\" ." + % (tfile, mtime), d, workdir=ud.clonedir) runfetchcmd("touch %s.done" % ud.fullmirror, d) def clone_shallow_local(self, ud, dest, d): @@ -460,20 +609,33 @@ class Git(FetchMethod): def unpack(self, ud, destdir, d): """ unpack the downloaded src to destdir""" - subdir = ud.parm.get("subpath", "") - if subdir != "": - readpathspec = ":%s" % subdir - def_destsuffix = "%s/" % os.path.basename(subdir.rstrip('/')) - else: - readpathspec = "" - def_destsuffix = "git/" + subdir = ud.parm.get("subdir") + subpath = ud.parm.get("subpath") + readpathspec = "" + def_destsuffix = "git/" + + if subpath: + readpathspec = ":%s" % subpath + def_destsuffix = "%s/" % os.path.basename(subpath.rstrip('/')) + + if subdir: + # If 'subdir' param exists, create a dir and use it as destination for unpack cmd + if os.path.isabs(subdir): + if not os.path.realpath(subdir).startswith(os.path.realpath(destdir)): + raise bb.fetch2.UnpackError("subdir argument isn't a subdirectory of unpack root %s" % destdir, ud.url) + destdir = subdir + else: + destdir = os.path.join(destdir, subdir) + def_destsuffix = "" destsuffix = ud.parm.get("destsuffix", def_destsuffix) destdir = ud.destdir = os.path.join(destdir, destsuffix) if os.path.exists(destdir): bb.utils.prunedir(destdir) + if not ud.bareclone: + ud.unpack_tracer.unpack("git", destdir) - need_lfs = ud.parm.get("lfs", "1") == "1" + need_lfs = self._need_lfs(ud) if not need_lfs: ud.basecmd = "GIT_LFS_SKIP_SMUDGE=1 " + ud.basecmd @@ -481,13 +643,12 @@ class Git(FetchMethod): source_found = False source_error = [] - if not source_found: - clonedir_is_up_to_date = not self.clonedir_need_update(ud, d) - if clonedir_is_up_to_date: - runfetchcmd("%s clone %s %s/ %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, destdir), d) - source_found = True - else: - source_error.append("clone directory not available or not up to date: " + ud.clonedir) + clonedir_is_up_to_date = not self.clonedir_need_update(ud, d) + if clonedir_is_up_to_date: + runfetchcmd("%s clone %s %s/ %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, destdir), d) + source_found = True + else: + source_error.append("clone directory not available or not up to date: " + ud.clonedir) if not source_found: if ud.shallow: @@ -504,16 +665,18 @@ class Git(FetchMethod): raise bb.fetch2.UnpackError("No up to date source found: " + "; ".join(source_error), ud.url) repourl = self._get_repo_url(ud) - runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, repourl), d, workdir=destdir) + runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=destdir) if self._contains_lfs(ud, d, destdir): if need_lfs and not self._find_git_lfs(d): raise bb.fetch2.FetchError("Repository %s has LFS content, install git-lfs on host to download (or set lfs=0 to ignore it)" % (repourl)) elif not need_lfs: bb.note("Repository %s has LFS content but it is not being fetched" % (repourl)) + else: + runfetchcmd("%s lfs install --local" % ud.basecmd, d, workdir=destdir) if not ud.nocheckout: - if subdir != "": + if subpath: runfetchcmd("%s read-tree %s%s" % (ud.basecmd, ud.revisions[ud.names[0]], readpathspec), d, workdir=destdir) runfetchcmd("%s checkout-index -q -f -a" % ud.basecmd, d, workdir=destdir) @@ -562,18 +725,54 @@ class Git(FetchMethod): raise bb.fetch2.FetchError("The command '%s' gave output with more then 1 line unexpectedly, output: '%s'" % (cmd, output)) return output.split()[0] != "0" + def _lfs_objects_downloaded(self, ud, d, name, wd): + """ + Verifies whether the LFS objects for requested revisions have already been downloaded + """ + # Bail out early if this repository doesn't use LFS + if not self._need_lfs(ud) or not self._contains_lfs(ud, d, wd): + return True + + # The Git LFS specification specifies ([1]) the LFS folder layout so it should be safe to check for file + # existence. + # [1] https://github.com/git-lfs/git-lfs/blob/main/docs/spec.md#intercepting-git + cmd = "%s lfs ls-files -l %s" \ + % (ud.basecmd, ud.revisions[name]) + output = runfetchcmd(cmd, d, quiet=True, workdir=wd).rstrip() + # Do not do any further matching if no objects are managed by LFS + if not output: + return True + + # Match all lines beginning with the hexadecimal OID + oid_regex = re.compile("^(([a-fA-F0-9]{2})([a-fA-F0-9]{2})[A-Fa-f0-9]+)") + for line in output.split("\n"): + oid = re.search(oid_regex, line) + if not oid: + bb.warn("git lfs ls-files output '%s' did not match expected format." % line) + if not os.path.exists(os.path.join(wd, "lfs", "objects", oid.group(2), oid.group(3), oid.group(1))): + return False + + return True + + def _need_lfs(self, ud): + return ud.parm.get("lfs", "1") == "1" + def _contains_lfs(self, ud, d, wd): """ Check if the repository has 'lfs' (large file) content """ - if not ud.nobranch: - branchname = ud.branches[ud.names[0]] + if ud.nobranch: + # If no branch is specified, use the current git commit + refname = self._build_revision(ud, d, ud.names[0]) + elif wd == ud.clonedir: + # The bare clonedir doesn't use the remote names; it has the branch immediately. + refname = ud.branches[ud.names[0]] else: - branchname = "master" + refname = "origin/%s" % ud.branches[ud.names[0]] - cmd = "%s grep lfs origin/%s:.gitattributes | wc -l" % ( - ud.basecmd, ud.branches[ud.names[0]]) + cmd = "%s grep lfs %s:.gitattributes | wc -l" % ( + ud.basecmd, refname) try: output = runfetchcmd(cmd, d, quiet=True, workdir=wd) @@ -594,6 +793,11 @@ class Git(FetchMethod): """ Return the repository URL """ + # Note that we do not support passwords directly in the git urls. There are several + # reasons. SRC_URI can be written out to things like buildhistory and people don't + # want to leak passwords like that. Its also all too easy to share metadata without + # removing the password. ssh keys, ~/.netrc and ~/.ssh/config files can be used as + # alternatives so we will not take patches adding password support here. if ud.user: username = ud.user + '@' else: @@ -605,7 +809,6 @@ class Git(FetchMethod): Return a unique key for the url """ # Collapse adjacent slashes - slash_re = re.compile(r"/+") return "git:" + ud.host + slash_re.sub(".", ud.path) + ud.unresolvedrev[name] def _lsremote(self, ud, d, search): @@ -624,7 +827,7 @@ class Git(FetchMethod): try: repourl = self._get_repo_url(ud) cmd = "%s ls-remote %s %s" % \ - (ud.basecmd, repourl, search) + (ud.basecmd, shlex.quote(repourl), search) if ud.proto.lower() != 'file': bb.fetch2.check_network_access(d, cmd, repourl) output = runfetchcmd(cmd, d, True) @@ -638,6 +841,12 @@ class Git(FetchMethod): """ Compute the HEAD revision for the url """ + if not d.getVar("__BBSRCREV_SEEN"): + raise bb.fetch2.FetchError("Recipe uses a floating tag/branch '%s' for repo '%s' without a fixed SRCREV yet doesn't call bb.fetch2.get_srcrev() (use SRCPV in PV for OE)." % (ud.unresolvedrev[name], ud.host+ud.path)) + + # Ensure we mark as not cached + bb.fetch2.mark_recipe_nocache(d) + output = self._lsremote(ud, d, "") # Tags of the form ^{} may not work, need to fallback to other form if ud.unresolvedrev[name][:5] == "refs/" or ud.usehead: @@ -662,38 +871,42 @@ class Git(FetchMethod): """ pupver = ('', '') - tagregex = re.compile(d.getVar('UPSTREAM_CHECK_GITTAGREGEX') or r"(?P<pver>([0-9][\.|_]?)+)") try: output = self._lsremote(ud, d, "refs/tags/*") except (bb.fetch2.FetchError, bb.fetch2.NetworkAccess) as e: bb.note("Could not list remote: %s" % str(e)) return pupver + rev_tag_re = re.compile(r"([0-9a-f]{40})\s+refs/tags/(.*)") + pver_re = re.compile(d.getVar('UPSTREAM_CHECK_GITTAGREGEX') or r"(?P<pver>([0-9][\.|_]?)+)") + nonrel_re = re.compile(r"(alpha|beta|rc|final)+") + verstring = "" - revision = "" for line in output.split("\n"): if not line: break - tag_head = line.split("/")[-1] + m = rev_tag_re.match(line) + if not m: + continue + + (revision, tag) = m.groups() + # Ignore non-released branches - m = re.search(r"(alpha|beta|rc|final)+", tag_head) - if m: + if nonrel_re.search(tag): continue # search for version in the line - tag = tagregex.search(tag_head) - if tag is None: + m = pver_re.search(tag) + if not m: continue - tag = tag.group('pver') - tag = tag.replace("_", ".") + pver = m.group('pver').replace("_", ".") - if verstring and bb.utils.vercmp(("0", tag, ""), ("0", verstring, "")) < 0: + if verstring and bb.utils.vercmp(("0", pver, ""), ("0", verstring, "")) < 0: continue - verstring = tag - revision = line.split()[0] + verstring = pver pupver = (verstring, revision) return pupver diff --git a/lib/bb/fetch2/gitsm.py b/lib/bb/fetch2/gitsm.py index d6e5c5c05..f7f3af721 100644 --- a/lib/bb/fetch2/gitsm.py +++ b/lib/bb/fetch2/gitsm.py @@ -78,7 +78,7 @@ class GitSM(Git): module_hash = "" if not module_hash: - logger.debug(1, "submodule %s is defined, but is not initialized in the repository. Skipping", m) + logger.debug("submodule %s is defined, but is not initialized in the repository. Skipping", m) continue submodules.append(m) @@ -88,9 +88,9 @@ class GitSM(Git): subrevision[m] = module_hash.split()[2] # Convert relative to absolute uri based on parent uri - if uris[m].startswith('..'): + if uris[m].startswith('..') or uris[m].startswith('./'): newud = copy.copy(ud) - newud.path = os.path.realpath(os.path.join(newud.path, uris[m])) + newud.path = os.path.normpath(os.path.join(newud.path, uris[m])) uris[m] = Git._get_repo_url(self, newud) for module in submodules: @@ -115,10 +115,21 @@ class GitSM(Git): # This has to be a file reference proto = "file" url = "gitsm://" + uris[module] + if url.endswith("{}{}".format(ud.host, ud.path)): + raise bb.fetch2.FetchError("Submodule refers to the parent repository. This will cause deadlock situation in current version of Bitbake." \ + "Consider using git fetcher instead.") url += ';protocol=%s' % proto url += ";name=%s" % module url += ";subpath=%s" % module + url += ";nobranch=1" + url += ";lfs=%s" % self._need_lfs(ud) + # Note that adding "user=" here to give credentials to the + # submodule is not supported. Since using SRC_URI to give git:// + # URL a password is not supported, one have to use one of the + # recommended way (eg. ~/.netrc or SSH config) which does specify + # the user (See comment in git.py). + # So, we will not take patches adding "user=" support here. ld = d.createCopy() # Not necessary to set SRC_URI, since we're passing the URI to @@ -140,16 +151,6 @@ class GitSM(Git): if Git.need_update(self, ud, d): return True - try: - # Check for the nugget dropped by the download operation - known_srcrevs = runfetchcmd("%s config --get-all bitbake.srcrev" % \ - (ud.basecmd), d, workdir=ud.clonedir) - - if ud.revisions[ud.names[0]] in known_srcrevs.split(): - return False - except bb.fetch2.FetchError: - pass - need_update_list = [] def need_update_submodule(ud, url, module, modpath, workdir, d): url += ";bareclone=1;nobranch=1" @@ -172,14 +173,9 @@ class GitSM(Git): shutil.rmtree(tmpdir) else: self.process_submodules(ud, ud.clonedir, need_update_submodule, d) - if len(need_update_list) == 0: - # We already have the required commits of all submodules. Drop - # a nugget so we don't need to check again. - runfetchcmd("%s config --add bitbake.srcrev %s" % \ - (ud.basecmd, ud.revisions[ud.names[0]]), d, workdir=ud.clonedir) - - if len(need_update_list) > 0: - logger.debug(1, 'gitsm: Submodules requiring update: %s' % (' '.join(need_update_list))) + + if need_update_list: + logger.debug('gitsm: Submodules requiring update: %s' % (' '.join(need_update_list))) return True return False @@ -209,9 +205,6 @@ class GitSM(Git): shutil.rmtree(tmpdir) else: self.process_submodules(ud, ud.clonedir, download_submodule, d) - # Drop a nugget for the srcrev we've fetched (used by need_update) - runfetchcmd("%s config --add bitbake.srcrev %s" % \ - (ud.basecmd, ud.revisions[ud.names[0]]), d, workdir=ud.clonedir) def unpack(self, ud, destdir, d): def unpack_submodules(ud, url, module, modpath, workdir, d): @@ -225,6 +218,10 @@ class GitSM(Git): try: newfetch = Fetch([url], d, cache=False) + # modpath is needed by unpack tracer to calculate submodule + # checkout dir + new_ud = newfetch.ud[url] + new_ud.modpath = modpath newfetch.unpack(root=os.path.dirname(os.path.join(repo_conf, 'modules', module))) except Exception as e: logger.error('gitsm: submodule unpack failed: %s %s' % (type(e).__name__, str(e))) @@ -250,10 +247,12 @@ class GitSM(Git): ret = self.process_submodules(ud, ud.destdir, unpack_submodules, d) if not ud.bareclone and ret: - # All submodules should already be downloaded and configured in the tree. This simply sets - # up the configuration and checks out the files. The main project config should remain - # unmodified, and no download from the internet should occur. - runfetchcmd("%s submodule update --recursive --no-fetch" % (ud.basecmd), d, quiet=True, workdir=ud.destdir) + # All submodules should already be downloaded and configured in the tree. This simply + # sets up the configuration and checks out the files. The main project config should + # remain unmodified, and no download from the internet should occur. As such, lfs smudge + # should also be skipped as these files were already smudged in the fetch stage if lfs + # was enabled. + runfetchcmd("GIT_LFS_SKIP_SMUDGE=1 %s submodule update --recursive --no-fetch" % (ud.basecmd), d, quiet=True, workdir=ud.destdir) def implicit_urldata(self, ud, d): import shutil, subprocess, tempfile diff --git a/lib/bb/fetch2/hg.py b/lib/bb/fetch2/hg.py index 8f503701e..cbff8c490 100644 --- a/lib/bb/fetch2/hg.py +++ b/lib/bb/fetch2/hg.py @@ -150,7 +150,7 @@ class Hg(FetchMethod): def download(self, ud, d): """Fetch url""" - logger.debug(2, "Fetch: checking for module directory '" + ud.moddir + "'") + logger.debug2("Fetch: checking for module directory '" + ud.moddir + "'") # If the checkout doesn't exist and the mirror tarball does, extract it if not os.path.exists(ud.pkgdir) and os.path.exists(ud.fullmirror): @@ -160,7 +160,7 @@ class Hg(FetchMethod): if os.access(os.path.join(ud.moddir, '.hg'), os.R_OK): # Found the source, check whether need pull updatecmd = self._buildhgcommand(ud, d, "update") - logger.debug(1, "Running %s", updatecmd) + logger.debug("Running %s", updatecmd) try: runfetchcmd(updatecmd, d, workdir=ud.moddir) except bb.fetch2.FetchError: @@ -168,7 +168,7 @@ class Hg(FetchMethod): pullcmd = self._buildhgcommand(ud, d, "pull") logger.info("Pulling " + ud.url) # update sources there - logger.debug(1, "Running %s", pullcmd) + logger.debug("Running %s", pullcmd) bb.fetch2.check_network_access(d, pullcmd, ud.url) runfetchcmd(pullcmd, d, workdir=ud.moddir) try: @@ -183,14 +183,14 @@ class Hg(FetchMethod): logger.info("Fetch " + ud.url) # check out sources there bb.utils.mkdirhier(ud.pkgdir) - logger.debug(1, "Running %s", fetchcmd) + logger.debug("Running %s", fetchcmd) bb.fetch2.check_network_access(d, fetchcmd, ud.url) runfetchcmd(fetchcmd, d, workdir=ud.pkgdir) # Even when we clone (fetch), we still need to update as hg's clone # won't checkout the specified revision if its on a branch updatecmd = self._buildhgcommand(ud, d, "update") - logger.debug(1, "Running %s", updatecmd) + logger.debug("Running %s", updatecmd) runfetchcmd(updatecmd, d, workdir=ud.moddir) def clean(self, ud, d): @@ -242,14 +242,15 @@ class Hg(FetchMethod): revflag = "-r %s" % ud.revision subdir = ud.parm.get("destsuffix", ud.module) codir = "%s/%s" % (destdir, subdir) + ud.unpack_tracer.unpack("hg", codir) scmdata = ud.parm.get("scmdata", "") if scmdata != "nokeep": proto = ud.parm.get('protocol', 'http') if not os.access(os.path.join(codir, '.hg'), os.R_OK): - logger.debug(2, "Unpack: creating new hg repository in '" + codir + "'") + logger.debug2("Unpack: creating new hg repository in '" + codir + "'") runfetchcmd("%s init %s" % (ud.basecmd, codir), d) - logger.debug(2, "Unpack: updating source in '" + codir + "'") + logger.debug2("Unpack: updating source in '" + codir + "'") if ud.user and ud.pswd: runfetchcmd("%s --config auth.default.prefix=* --config auth.default.username=%s --config auth.default.password=%s --config \"auth.default.schemes=%s\" pull %s" % (ud.basecmd, ud.user, ud.pswd, proto, ud.moddir), d, workdir=codir) else: @@ -259,5 +260,5 @@ class Hg(FetchMethod): else: runfetchcmd("%s up -C %s" % (ud.basecmd, revflag), d, workdir=codir) else: - logger.debug(2, "Unpack: extracting source to '" + codir + "'") + logger.debug2("Unpack: extracting source to '" + codir + "'") runfetchcmd("%s archive -t files %s %s" % (ud.basecmd, revflag, codir), d, workdir=ud.moddir) diff --git a/lib/bb/fetch2/local.py b/lib/bb/fetch2/local.py index 01d9ff9f8..7d7668110 100644 --- a/lib/bb/fetch2/local.py +++ b/lib/bb/fetch2/local.py @@ -17,7 +17,7 @@ import os import urllib.request, urllib.parse, urllib.error import bb import bb.utils -from bb.fetch2 import FetchMethod, FetchError +from bb.fetch2 import FetchMethod, FetchError, ParameterError from bb.fetch2 import logger class Local(FetchMethod): @@ -33,15 +33,17 @@ class Local(FetchMethod): ud.basename = os.path.basename(ud.decodedurl) ud.basepath = ud.decodedurl ud.needdonestamp = False + if "*" in ud.decodedurl: + raise bb.fetch2.ParameterError("file:// urls using globbing are no longer supported. Please place the files in a directory and reference that instead.", ud.url) return def localpath(self, urldata, d): """ Return the local filename of a given url assuming a successful fetch. """ - return self.localpaths(urldata, d)[-1] + return self.localfile_searchpaths(urldata, d)[-1] - def localpaths(self, urldata, d): + def localfile_searchpaths(self, urldata, d): """ Return the local filename of a given url assuming a successful fetch. """ @@ -49,29 +51,17 @@ class Local(FetchMethod): path = urldata.decodedurl newpath = path if path[0] == "/": + logger.debug2("Using absolute %s" % (path)) return [path] filespath = d.getVar('FILESPATH') if filespath: - logger.debug(2, "Searching for %s in paths:\n %s" % (path, "\n ".join(filespath.split(":")))) + logger.debug2("Searching for %s in paths:\n %s" % (path, "\n ".join(filespath.split(":")))) newpath, hist = bb.utils.which(filespath, path, history=True) + logger.debug2("Using %s for %s" % (newpath, path)) searched.extend(hist) - if (not newpath or not os.path.exists(newpath)) and path.find("*") != -1: - # For expressions using '*', best we can do is take the first directory in FILESPATH that exists - newpath, hist = bb.utils.which(filespath, ".", history=True) - searched.extend(hist) - logger.debug(2, "Searching for %s in path: %s" % (path, newpath)) - return searched - if not os.path.exists(newpath): - dldirfile = os.path.join(d.getVar("DL_DIR"), path) - logger.debug(2, "Defaulting to %s for %s" % (dldirfile, path)) - bb.utils.mkdirhier(os.path.dirname(dldirfile)) - searched.append(dldirfile) - return searched return searched def need_update(self, ud, d): - if ud.url.find("*") != -1: - return False if os.path.exists(ud.localpath): return False return True @@ -84,9 +74,7 @@ class Local(FetchMethod): filespath = d.getVar('FILESPATH') if filespath: locations = filespath.split(":") - locations.append(d.getVar("DL_DIR")) - - msg = "Unable to find file " + urldata.url + " anywhere. The paths that were searched were:\n " + "\n ".join(locations) + msg = "Unable to find file " + urldata.url + " anywhere to download to " + urldata.localpath + ". The paths that were searched were:\n " + "\n ".join(locations) raise FetchError(msg) return True @@ -95,9 +83,6 @@ class Local(FetchMethod): """ Check the status of the url """ - if urldata.localpath.find("*") != -1: - logger.info("URL %s looks like a glob and was therefore not checked.", urldata.url) - return True if os.path.exists(urldata.localpath): return True return False diff --git a/lib/bb/fetch2/npm.py b/lib/bb/fetch2/npm.py index 47898509f..15f3f19bc 100644 --- a/lib/bb/fetch2/npm.py +++ b/lib/bb/fetch2/npm.py @@ -44,17 +44,24 @@ def npm_package(package): """Convert the npm package name to remove unsupported character""" # Scoped package names (with the @) use the same naming convention # as the 'npm pack' command. - if package.startswith("@"): - return re.sub("/", "-", package[1:]) - return package + name = re.sub("/", "-", package) + name = name.lower() + name = re.sub(r"[^\-a-z0-9]", "", name) + name = name.strip("-") + return name + def npm_filename(package, version): """Get the filename of a npm package""" return npm_package(package) + "-" + version + ".tgz" -def npm_localfile(package, version): +def npm_localfile(package, version=None): """Get the local filename of a npm package""" - return os.path.join("npm2", npm_filename(package, version)) + if version is not None: + filename = npm_filename(package, version) + else: + filename = package + return os.path.join("npm2", filename) def npm_integrity(integrity): """ @@ -69,41 +76,52 @@ def npm_unpack(tarball, destdir, d): bb.utils.mkdirhier(destdir) cmd = "tar --extract --gzip --file=%s" % shlex.quote(tarball) cmd += " --no-same-owner" + cmd += " --delay-directory-restore" cmd += " --strip-components=1" runfetchcmd(cmd, d, workdir=destdir) + runfetchcmd("chmod -R +X '%s'" % (destdir), d, quiet=True, workdir=destdir) class NpmEnvironment(object): """ Using a npm config file seems more reliable than using cli arguments. This class allows to create a controlled environment for npm commands. """ - def __init__(self, d, configs=None): + def __init__(self, d, configs=[], npmrc=None): self.d = d - self.configs = configs + + self.user_config = tempfile.NamedTemporaryFile(mode="w", buffering=1) + for key, value in configs: + self.user_config.write("%s=%s\n" % (key, value)) + + if npmrc: + self.global_config_name = npmrc + else: + self.global_config_name = "/dev/null" + + def __del__(self): + if self.user_config: + self.user_config.close() def run(self, cmd, args=None, configs=None, workdir=None): """Run npm command in a controlled environment""" with tempfile.TemporaryDirectory() as tmpdir: d = bb.data.createCopy(self.d) + d.setVar("PATH", d.getVar("PATH")) # PATH might contain $HOME - evaluate it before patching d.setVar("HOME", tmpdir) - cfgfile = os.path.join(tmpdir, "npmrc") - if not workdir: workdir = tmpdir def _run(cmd): - cmd = "NPM_CONFIG_USERCONFIG=%s " % cfgfile + cmd - cmd = "NPM_CONFIG_GLOBALCONFIG=%s " % cfgfile + cmd + cmd = "NPM_CONFIG_USERCONFIG=%s " % (self.user_config.name) + cmd + cmd = "NPM_CONFIG_GLOBALCONFIG=%s " % (self.global_config_name) + cmd return runfetchcmd(cmd, d, workdir=workdir) - if self.configs: - for key, value in self.configs: - _run("npm config set %s %s" % (key, shlex.quote(value))) - if configs: + bb.warn("Use of configs argument of NpmEnvironment.run() function" + " is deprecated. Please use args argument instead.") for key, value in configs: - _run("npm config set %s %s" % (key, shlex.quote(value))) + cmd += " --%s=%s" % (key, shlex.quote(value)) if args: for key, value in args: @@ -142,12 +160,12 @@ class Npm(FetchMethod): raise ParameterError("Invalid 'version' parameter", ud.url) # Extract the 'registry' part of the url - ud.registry = re.sub(r"^npm://", "http://", ud.url.split(";")[0]) + ud.registry = re.sub(r"^npm://", "https://", ud.url.split(";")[0]) # Using the 'downloadfilename' parameter as local filename # or the npm package name. if "downloadfilename" in ud.parm: - ud.localfile = d.expand(ud.parm["downloadfilename"]) + ud.localfile = npm_localfile(d.expand(ud.parm["downloadfilename"])) else: ud.localfile = npm_localfile(ud.package, ud.version) @@ -165,14 +183,14 @@ class Npm(FetchMethod): def _resolve_proxy_url(self, ud, d): def _npm_view(): - configs = [] - configs.append(("json", "true")) - configs.append(("registry", ud.registry)) + args = [] + args.append(("json", "true")) + args.append(("registry", ud.registry)) pkgver = shlex.quote(ud.package + "@" + ud.version) cmd = ud.basecmd + " view %s" % pkgver env = NpmEnvironment(d) check_network_access(d, cmd, ud.registry) - view_string = env.run(cmd, configs=configs) + view_string = env.run(cmd, args=args) if not view_string: raise FetchError("Unavailable package %s" % pkgver, ud.url) @@ -280,6 +298,7 @@ class Npm(FetchMethod): destsuffix = ud.parm.get("destsuffix", "npm") destdir = os.path.join(rootdir, destsuffix) npm_unpack(ud.localpath, destdir, d) + ud.unpack_tracer.unpack("npm", destdir) def clean(self, ud, d): """Clean any existing full or partial download""" diff --git a/lib/bb/fetch2/npmsw.py b/lib/bb/fetch2/npmsw.py index 0c3511d8a..b55e885d7 100644 --- a/lib/bb/fetch2/npmsw.py +++ b/lib/bb/fetch2/npmsw.py @@ -24,11 +24,14 @@ import bb from bb.fetch2 import Fetch from bb.fetch2 import FetchMethod from bb.fetch2 import ParameterError +from bb.fetch2 import runfetchcmd from bb.fetch2 import URI from bb.fetch2.npm import npm_integrity from bb.fetch2.npm import npm_localfile from bb.fetch2.npm import npm_unpack from bb.utils import is_semver +from bb.utils import lockfile +from bb.utils import unlockfile def foreach_dependencies(shrinkwrap, callback=None, dev=False): """ @@ -38,8 +41,9 @@ def foreach_dependencies(shrinkwrap, callback=None, dev=False): with: name = the package name (string) params = the package parameters (dictionary) - deptree = the package dependency tree (array of strings) + destdir = the destination of the package (string) """ + # For handling old style dependencies entries in shinkwrap files def _walk_deps(deps, deptree): for name in deps: subtree = [*deptree, name] @@ -49,9 +53,22 @@ def foreach_dependencies(shrinkwrap, callback=None, dev=False): continue elif deps[name].get("bundled", False): continue - callback(name, deps[name], subtree) - - _walk_deps(shrinkwrap.get("dependencies", {}), []) + destsubdirs = [os.path.join("node_modules", dep) for dep in subtree] + destsuffix = os.path.join(*destsubdirs) + callback(name, deps[name], destsuffix) + + # packages entry means new style shrinkwrap file, else use dependencies + packages = shrinkwrap.get("packages", None) + if packages is not None: + for package in packages: + if package != "": + name = package.split('node_modules/')[-1] + package_infos = packages.get(package, {}) + if dev == False and package_infos.get("dev", False): + continue + callback(name, package_infos, package) + else: + _walk_deps(shrinkwrap.get("dependencies", {}), []) class NpmShrinkWrap(FetchMethod): """Class to fetch all package from a shrinkwrap file""" @@ -72,19 +89,22 @@ class NpmShrinkWrap(FetchMethod): # Resolve the dependencies ud.deps = [] - def _resolve_dependency(name, params, deptree): + def _resolve_dependency(name, params, destsuffix): url = None localpath = None extrapaths = [] - destsubdirs = [os.path.join("node_modules", dep) for dep in deptree] - destsuffix = os.path.join(*destsubdirs) + unpack = True integrity = params.get("integrity", None) resolved = params.get("resolved", None) version = params.get("version", None) # Handle registry sources - if is_semver(version) and resolved and integrity: + if is_semver(version) and integrity: + # Handle duplicate dependencies without url + if not resolved: + return + localfile = npm_localfile(name, version) uri = URI(resolved) @@ -109,7 +129,7 @@ class NpmShrinkWrap(FetchMethod): # Handle http tarball sources elif version.startswith("http") and integrity: - localfile = os.path.join("npm2", os.path.basename(version)) + localfile = npm_localfile(os.path.basename(version)) uri = URI(version) uri.params["downloadfilename"] = localfile @@ -121,8 +141,28 @@ class NpmShrinkWrap(FetchMethod): localpath = os.path.join(d.getVar("DL_DIR"), localfile) + # Handle local tarball and link sources + elif version.startswith("file"): + localpath = version[5:] + if not version.endswith(".tgz"): + unpack = False + # Handle git sources - elif version.startswith("git"): + elif version.startswith(("git", "bitbucket","gist")) or ( + not version.endswith((".tgz", ".tar", ".tar.gz")) + and not version.startswith((".", "@", "/")) + and "/" in version + ): + if version.startswith("github:"): + version = "git+https://github.com/" + version[len("github:"):] + elif version.startswith("gist:"): + version = "git+https://gist.github.com/" + version[len("gist:"):] + elif version.startswith("bitbucket:"): + version = "git+https://bitbucket.org/" + version[len("bitbucket:"):] + elif version.startswith("gitlab:"): + version = "git+https://gitlab.com/" + version[len("gitlab:"):] + elif not version.startswith(("git+","git:")): + version = "git+https://github.com/" + version regex = re.compile(r""" ^ git\+ @@ -148,15 +188,17 @@ class NpmShrinkWrap(FetchMethod): url = str(uri) - # local tarball sources and local link sources are unsupported else: raise ParameterError("Unsupported dependency: %s" % name, ud.url) + # name is needed by unpack tracer for module mapping ud.deps.append({ + "name": name, "url": url, "localpath": localpath, "extrapaths": extrapaths, "destsuffix": destsuffix, + "unpack": unpack, }) try: @@ -177,17 +219,23 @@ class NpmShrinkWrap(FetchMethod): # This fetcher resolves multiple URIs from a shrinkwrap file and then # forwards it to a proxy fetcher. The management of the donestamp file, # the lockfile and the checksums are forwarded to the proxy fetcher. - ud.proxy = Fetch([dep["url"] for dep in ud.deps], data) + shrinkwrap_urls = [dep["url"] for dep in ud.deps if dep["url"]] + if shrinkwrap_urls: + ud.proxy = Fetch(shrinkwrap_urls, data) ud.needdonestamp = False @staticmethod def _foreach_proxy_method(ud, handle): returns = [] - for proxy_url in ud.proxy.urls: - proxy_ud = ud.proxy.ud[proxy_url] - proxy_d = ud.proxy.d - proxy_ud.setup_localpath(proxy_d) - returns.append(handle(proxy_ud.method, proxy_ud, proxy_d)) + #Check if there are dependencies before try to fetch them + if len(ud.deps) > 0: + for proxy_url in ud.proxy.urls: + proxy_ud = ud.proxy.ud[proxy_url] + proxy_d = ud.proxy.d + proxy_ud.setup_localpath(proxy_d) + lf = lockfile(proxy_ud.lockfile) + returns.append(handle(proxy_ud.method, proxy_ud, proxy_d)) + unlockfile(lf) return returns def verify_donestamp(self, ud, d): @@ -220,10 +268,11 @@ class NpmShrinkWrap(FetchMethod): def unpack(self, ud, rootdir, d): """Unpack the downloaded dependencies""" - destdir = d.getVar("S") + destdir = rootdir destsuffix = ud.parm.get("destsuffix") if destsuffix: destdir = os.path.join(rootdir, destsuffix) + ud.unpack_tracer.unpack("npm-shrinkwrap", destdir) bb.utils.mkdirhier(destdir) bb.utils.copyfile(ud.shrinkwrap_file, @@ -237,7 +286,16 @@ class NpmShrinkWrap(FetchMethod): for dep in manual: depdestdir = os.path.join(destdir, dep["destsuffix"]) - npm_unpack(dep["localpath"], depdestdir, d) + if dep["url"]: + npm_unpack(dep["localpath"], depdestdir, d) + else: + depsrcdir= os.path.join(destdir, dep["localpath"]) + if dep["unpack"]: + npm_unpack(depsrcdir, depdestdir, d) + else: + bb.utils.mkdirhier(depdestdir) + cmd = 'cp -fpPRH "%s/." .' % (depsrcdir) + runfetchcmd(cmd, d, workdir=depdestdir) def clean(self, ud, d): """Clean any existing full or partial download""" diff --git a/lib/bb/fetch2/osc.py b/lib/bb/fetch2/osc.py index 8f091efd0..495ac8a30 100644 --- a/lib/bb/fetch2/osc.py +++ b/lib/bb/fetch2/osc.py @@ -1,4 +1,6 @@ # +# Copyright BitBake Contributors +# # SPDX-License-Identifier: GPL-2.0-only # """ @@ -8,12 +10,16 @@ Based on the svn "Fetch" implementation. """ import logging +import os +import re import bb from bb.fetch2 import FetchMethod from bb.fetch2 import FetchError from bb.fetch2 import MissingParameterError from bb.fetch2 import runfetchcmd +logger = logging.getLogger(__name__) + class Osc(FetchMethod): """Class to fetch a module or modules from Opensuse build server repositories.""" @@ -33,6 +39,7 @@ class Osc(FetchMethod): # Create paths to osc checkouts oscdir = d.getVar("OSCDIR") or (d.getVar("DL_DIR") + "/osc") relpath = self._strip_leading_slashes(ud.path) + ud.oscdir = oscdir ud.pkgdir = os.path.join(oscdir, ud.host) ud.moddir = os.path.join(ud.pkgdir, relpath, ud.module) @@ -40,13 +47,13 @@ class Osc(FetchMethod): ud.revision = ud.parm['rev'] else: pv = d.getVar("PV", False) - rev = bb.fetch2.srcrev_internal_helper(ud, d) + rev = bb.fetch2.srcrev_internal_helper(ud, d, '') if rev: ud.revision = rev else: ud.revision = "" - ud.localfile = d.expand('%s_%s_%s.tar.gz' % (ud.module.replace('/', '.'), ud.path.replace('/', '.'), ud.revision)) + ud.localfile = d.expand('%s_%s_%s.tar.gz' % (ud.module.replace('/', '.'), relpath.replace('/', '.'), ud.revision)) def _buildosccommand(self, ud, d, command): """ @@ -56,38 +63,61 @@ class Osc(FetchMethod): basecmd = d.getVar("FETCHCMD_osc") or "/usr/bin/env osc" - proto = ud.parm.get('protocol', 'ocs') + proto = ud.parm.get('protocol', 'https') options = [] config = "-c %s" % self.generate_config(ud, d) - if ud.revision: + if getattr(ud, 'revision', ''): options.append("-r %s" % ud.revision) coroot = self._strip_leading_slashes(ud.path) if command == "fetch": - osccmd = "%s %s co %s/%s %s" % (basecmd, config, coroot, ud.module, " ".join(options)) + osccmd = "%s %s -A %s://%s co %s/%s %s" % (basecmd, config, proto, ud.host, coroot, ud.module, " ".join(options)) elif command == "update": - osccmd = "%s %s up %s" % (basecmd, config, " ".join(options)) + osccmd = "%s %s -A %s://%s up %s" % (basecmd, config, proto, ud.host, " ".join(options)) + elif command == "api_source": + osccmd = "%s %s -A %s://%s api source/%s/%s" % (basecmd, config, proto, ud.host, coroot, ud.module) else: raise FetchError("Invalid osc command %s" % command, ud.url) return osccmd + def _latest_revision(self, ud, d, name): + """ + Fetch latest revision for the given package + """ + api_source_cmd = self._buildosccommand(ud, d, "api_source") + + output = runfetchcmd(api_source_cmd, d) + match = re.match(r'<directory ?.* rev="(\d+)".*>', output) + if match is None: + raise FetchError("Unable to parse osc response", ud.url) + return match.groups()[0] + + def _revision_key(self, ud, d, name): + """ + Return a unique key for the url + """ + # Collapse adjacent slashes + slash_re = re.compile(r"/+") + rev = getattr(ud, 'revision', "latest") + return "osc:%s%s.%s.%s" % (ud.host, slash_re.sub(".", ud.path), name, rev) + def download(self, ud, d): """ Fetch url """ - logger.debug(2, "Fetch: checking for module directory '" + ud.moddir + "'") + logger.debug2("Fetch: checking for module directory '" + ud.moddir + "'") - if os.access(os.path.join(d.getVar('OSCDIR'), ud.path, ud.module), os.R_OK): + if os.access(ud.moddir, os.R_OK): oscupdatecmd = self._buildosccommand(ud, d, "update") logger.info("Update "+ ud.url) # update sources there - logger.debug(1, "Running %s", oscupdatecmd) + logger.debug("Running %s", oscupdatecmd) bb.fetch2.check_network_access(d, oscupdatecmd, ud.url) runfetchcmd(oscupdatecmd, d, workdir=ud.moddir) else: @@ -95,7 +125,7 @@ class Osc(FetchMethod): logger.info("Fetch " + ud.url) # check out sources there bb.utils.mkdirhier(ud.pkgdir) - logger.debug(1, "Running %s", oscfetchcmd) + logger.debug("Running %s", oscfetchcmd) bb.fetch2.check_network_access(d, oscfetchcmd, ud.url) runfetchcmd(oscfetchcmd, d, workdir=ud.pkgdir) @@ -111,20 +141,23 @@ class Osc(FetchMethod): Generate a .oscrc to be used for this run. """ - config_path = os.path.join(d.getVar('OSCDIR'), "oscrc") + config_path = os.path.join(ud.oscdir, "oscrc") + if not os.path.exists(ud.oscdir): + bb.utils.mkdirhier(ud.oscdir) + if (os.path.exists(config_path)): os.remove(config_path) f = open(config_path, 'w') + proto = ud.parm.get('protocol', 'https') f.write("[general]\n") - f.write("apisrv = %s\n" % ud.host) - f.write("scheme = http\n") + f.write("apiurl = %s://%s\n" % (proto, ud.host)) f.write("su-wrapper = su -c\n") f.write("build-root = %s\n" % d.getVar('WORKDIR')) f.write("urllist = %s\n" % d.getVar("OSCURLLIST")) f.write("extra-pkgs = gzip\n") f.write("\n") - f.write("[%s]\n" % ud.host) + f.write("[%s://%s]\n" % (proto, ud.host)) f.write("user = %s\n" % ud.parm["user"]) f.write("pass = %s\n" % ud.parm["pswd"]) f.close() diff --git a/lib/bb/fetch2/perforce.py b/lib/bb/fetch2/perforce.py index 6f3c95b6c..3b6fa4b1e 100644 --- a/lib/bb/fetch2/perforce.py +++ b/lib/bb/fetch2/perforce.py @@ -90,16 +90,16 @@ class Perforce(FetchMethod): p4port = d.getVar('P4PORT') if p4port: - logger.debug(1, 'Using recipe provided P4PORT: %s' % p4port) + logger.debug('Using recipe provided P4PORT: %s' % p4port) ud.host = p4port else: - logger.debug(1, 'Trying to use P4CONFIG to automatically set P4PORT...') + logger.debug('Trying to use P4CONFIG to automatically set P4PORT...') ud.usingp4config = True p4cmd = '%s info | grep "Server address"' % ud.basecmd bb.fetch2.check_network_access(d, p4cmd, ud.url) ud.host = runfetchcmd(p4cmd, d, True) ud.host = ud.host.split(': ')[1].strip() - logger.debug(1, 'Determined P4PORT to be: %s' % ud.host) + logger.debug('Determined P4PORT to be: %s' % ud.host) if not ud.host: raise FetchError('Could not determine P4PORT from P4CONFIG') @@ -119,6 +119,7 @@ class Perforce(FetchMethod): cleanedpath = ud.path.replace('/...', '').replace('/', '.') cleanedhost = ud.host.replace(':', '.') + cleanedmodule = "" # Merge the path and module into the final depot location if ud.module: if ud.module.find('/') == 0: @@ -133,7 +134,7 @@ class Perforce(FetchMethod): ud.setup_revisions(d) - ud.localfile = d.expand('%s_%s_%s.tar.gz' % (cleanedhost, cleanedpath, ud.revision)) + ud.localfile = d.expand('%s_%s_%s_%s.tar.gz' % (cleanedhost, cleanedpath, cleanedmodule, ud.revision)) def _buildp4command(self, ud, d, command, depot_filename=None): """ @@ -207,7 +208,7 @@ class Perforce(FetchMethod): for filename in p4fileslist: item = filename.split(' - ') lastaction = item[1].split() - logger.debug(1, 'File: %s Last Action: %s' % (item[0], lastaction[0])) + logger.debug('File: %s Last Action: %s' % (item[0], lastaction[0])) if lastaction[0] == 'delete': continue filelist.append(item[0]) @@ -254,7 +255,7 @@ class Perforce(FetchMethod): raise FetchError('Could not determine the latest perforce changelist') tipcset = tip.split(' ')[1] - logger.debug(1, 'p4 tip found to be changelist %s' % tipcset) + logger.debug('p4 tip found to be changelist %s' % tipcset) return tipcset def sortable_revision(self, ud, d, name): diff --git a/lib/bb/fetch2/repo.py b/lib/bb/fetch2/repo.py index 2bdbbd409..fa4cb8149 100644 --- a/lib/bb/fetch2/repo.py +++ b/lib/bb/fetch2/repo.py @@ -47,7 +47,7 @@ class Repo(FetchMethod): """Fetch url""" if os.access(os.path.join(d.getVar("DL_DIR"), ud.localfile), os.R_OK): - logger.debug(1, "%s already exists (or was stashed). Skipping repo init / sync.", ud.localpath) + logger.debug("%s already exists (or was stashed). Skipping repo init / sync.", ud.localpath) return repodir = d.getVar("REPODIR") or (d.getVar("DL_DIR") + "/repo") diff --git a/lib/bb/fetch2/s3.py b/lib/bb/fetch2/s3.py index ffca73c8e..6b8ffd535 100644 --- a/lib/bb/fetch2/s3.py +++ b/lib/bb/fetch2/s3.py @@ -18,10 +18,47 @@ The aws tool must be correctly installed and configured prior to use. import os import bb import urllib.request, urllib.parse, urllib.error +import re from bb.fetch2 import FetchMethod from bb.fetch2 import FetchError from bb.fetch2 import runfetchcmd +def convertToBytes(value, unit): + value = float(value) + if (unit == "KiB"): + value = value*1024.0; + elif (unit == "MiB"): + value = value*1024.0*1024.0; + elif (unit == "GiB"): + value = value*1024.0*1024.0*1024.0; + return value + +class S3ProgressHandler(bb.progress.LineFilterProgressHandler): + """ + Extract progress information from s3 cp output, e.g.: + Completed 5.1 KiB/8.8 GiB (12.0 MiB/s) with 1 file(s) remaining + """ + def __init__(self, d): + super(S3ProgressHandler, self).__init__(d) + # Send an initial progress event so the bar gets shown + self._fire_progress(0) + + def writeline(self, line): + percs = re.findall(r'^Completed (\d+.{0,1}\d*) (\w+)\/(\d+.{0,1}\d*) (\w+) (\(.+\)) with\s+', line) + if percs: + completed = (percs[-1][0]) + completedUnit = (percs[-1][1]) + total = (percs[-1][2]) + totalUnit = (percs[-1][3]) + completed = convertToBytes(completed, completedUnit) + total = convertToBytes(total, totalUnit) + progress = (completed/total)*100.0 + rate = percs[-1][4] + self.update(progress, rate) + return False + return True + + class S3(FetchMethod): """Class to fetch urls via 'aws s3'""" @@ -52,7 +89,9 @@ class S3(FetchMethod): cmd = '%s cp s3://%s%s %s' % (ud.basecmd, ud.host, ud.path, ud.localpath) bb.fetch2.check_network_access(d, cmd, ud.url) - runfetchcmd(cmd, d) + + progresshandler = S3ProgressHandler(d) + runfetchcmd(cmd, d, False, log=progresshandler) # Additional sanity checks copied from the wget class (although there # are no known issues which mean these are required, treat the aws cli diff --git a/lib/bb/fetch2/sftp.py b/lib/bb/fetch2/sftp.py index f87f292e5..7884cce94 100644 --- a/lib/bb/fetch2/sftp.py +++ b/lib/bb/fetch2/sftp.py @@ -103,7 +103,7 @@ class SFTP(FetchMethod): if path[:3] == '/~/': path = path[3:] - remote = '%s%s:%s' % (user, urlo.hostname, path) + remote = '"%s%s:%s"' % (user, urlo.hostname, path) cmd = '%s %s %s %s' % (basecmd, port, remote, lpath) diff --git a/lib/bb/fetch2/ssh.py b/lib/bb/fetch2/ssh.py index 5e982ecf3..0cbb2a6f2 100644 --- a/lib/bb/fetch2/ssh.py +++ b/lib/bb/fetch2/ssh.py @@ -31,8 +31,8 @@ IETF secsh internet draft: # import re, os -from bb.fetch2 import FetchMethod -from bb.fetch2 import runfetchcmd +from bb.fetch2 import check_network_access, FetchMethod, ParameterError, runfetchcmd +import urllib __pattern__ = re.compile(r''' @@ -41,9 +41,9 @@ __pattern__ = re.compile(r''' ( # Optional username/password block (?P<user>\S+) # username (:(?P<pass>\S+))? # colon followed by the password (optional) - )? (?P<cparam>(;[^;]+)*)? # connection parameters block (optional) @ + )? (?P<host>\S+?) # non-greedy match of the host (:(?P<port>[0-9]+))? # colon followed by the port (optional) / @@ -65,12 +65,13 @@ class SSH(FetchMethod): def urldata_init(self, urldata, d): if 'protocol' in urldata.parm and urldata.parm['protocol'] == 'git': - raise bb.fetch2.ParameterError( + raise ParameterError( "Invalid protocol - if you wish to fetch from a git " + "repository using ssh, you need to use " + "git:// prefix with protocol=ssh", urldata.url) m = __pattern__.match(urldata.url) path = m.group('path') + path = urllib.parse.unquote(path) host = m.group('host') urldata.localpath = os.path.join(d.getVar('DL_DIR'), os.path.basename(os.path.normpath(path))) @@ -97,6 +98,11 @@ class SSH(FetchMethod): fr += '@%s' % host else: fr = host + + if path[0] != '~': + path = '/%s' % path + path = urllib.parse.unquote(path) + fr += ':%s' % path cmd = 'scp -B -r %s %s %s/' % ( @@ -105,7 +111,45 @@ class SSH(FetchMethod): dldir ) - bb.fetch2.check_network_access(d, cmd, urldata.url) + check_network_access(d, cmd, urldata.url) + + runfetchcmd(cmd, d) + + def checkstatus(self, fetch, urldata, d): + """ + Check the status of the url + """ + m = __pattern__.match(urldata.url) + path = m.group('path') + host = m.group('host') + port = m.group('port') + user = m.group('user') + password = m.group('pass') + + if port: + portarg = '-P %s' % port + else: + portarg = '' + + if user: + fr = user + if password: + fr += ':%s' % password + fr += '@%s' % host + else: + fr = host + + if path[0] != '~': + path = '/%s' % path + path = urllib.parse.unquote(path) + + cmd = 'ssh -o BatchMode=true %s %s [ -f %s ]' % ( + portarg, + fr, + path + ) + check_network_access(d, cmd, urldata.url) runfetchcmd(cmd, d) + return True diff --git a/lib/bb/fetch2/svn.py b/lib/bb/fetch2/svn.py index 971a5add4..0852108e7 100644 --- a/lib/bb/fetch2/svn.py +++ b/lib/bb/fetch2/svn.py @@ -57,7 +57,12 @@ class Svn(FetchMethod): if 'rev' in ud.parm: ud.revision = ud.parm['rev'] - ud.localfile = d.expand('%s_%s_%s_%s_.tar.gz' % (ud.module.replace('/', '.'), ud.host, ud.path.replace('/', '.'), ud.revision)) + # Whether to use the @REV peg-revision syntax in the svn command or not + ud.pegrevision = True + if 'nopegrevision' in ud.parm: + ud.pegrevision = False + + ud.localfile = d.expand('%s_%s_%s_%s_%s.tar.gz' % (ud.module.replace('/', '.'), ud.host, ud.path.replace('/', '.'), ud.revision, ["0", "1"][ud.pegrevision])) def _buildsvncommand(self, ud, d, command): """ @@ -86,7 +91,7 @@ class Svn(FetchMethod): if command == "info": svncmd = "%s info %s %s://%s/%s/" % (ud.basecmd, " ".join(options), proto, svnroot, ud.module) elif command == "log1": - svncmd = "%s log --limit 1 %s %s://%s/%s/" % (ud.basecmd, " ".join(options), proto, svnroot, ud.module) + svncmd = "%s log --limit 1 --quiet %s %s://%s/%s/" % (ud.basecmd, " ".join(options), proto, svnroot, ud.module) else: suffix = "" @@ -98,7 +103,8 @@ class Svn(FetchMethod): if ud.revision: options.append("-r %s" % ud.revision) - suffix = "@%s" % (ud.revision) + if ud.pegrevision: + suffix = "@%s" % (ud.revision) if command == "fetch": transportuser = ud.parm.get("transportuser", "") @@ -116,7 +122,7 @@ class Svn(FetchMethod): def download(self, ud, d): """Fetch url""" - logger.debug(2, "Fetch: checking for module directory '" + ud.moddir + "'") + logger.debug2("Fetch: checking for module directory '" + ud.moddir + "'") lf = bb.utils.lockfile(ud.svnlock) @@ -129,7 +135,7 @@ class Svn(FetchMethod): runfetchcmd(ud.basecmd + " upgrade", d, workdir=ud.moddir) except FetchError: pass - logger.debug(1, "Running %s", svncmd) + logger.debug("Running %s", svncmd) bb.fetch2.check_network_access(d, svncmd, ud.url) runfetchcmd(svncmd, d, workdir=ud.moddir) else: @@ -137,7 +143,7 @@ class Svn(FetchMethod): logger.info("Fetch " + ud.url) # check out sources there bb.utils.mkdirhier(ud.pkgdir) - logger.debug(1, "Running %s", svncmd) + logger.debug("Running %s", svncmd) bb.fetch2.check_network_access(d, svncmd, ud.url) runfetchcmd(svncmd, d, workdir=ud.pkgdir) @@ -204,3 +210,6 @@ class Svn(FetchMethod): def _build_revision(self, ud, d): return ud.revision + + def supports_checksum(self, urldata): + return False diff --git a/lib/bb/fetch2/wget.py b/lib/bb/fetch2/wget.py index f7d1de26b..d76b1d0d3 100644 --- a/lib/bb/fetch2/wget.py +++ b/lib/bb/fetch2/wget.py @@ -26,7 +26,6 @@ from bb.fetch2 import FetchMethod from bb.fetch2 import FetchError from bb.fetch2 import logger from bb.fetch2 import runfetchcmd -from bb.utils import export_proxies from bs4 import BeautifulSoup from bs4 import SoupStrainer @@ -53,11 +52,23 @@ class WgetProgressHandler(bb.progress.LineFilterProgressHandler): class Wget(FetchMethod): """Class to fetch urls via 'wget'""" + + # CDNs like CloudFlare may do a 'browser integrity test' which can fail + # with the standard wget/urllib User-Agent, so pretend to be a modern + # browser. + user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0" + + def check_certs(self, d): + """ + Should certificates be checked? + """ + return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0" + def supports(self, ud, d): """ Check to see if a given url can be fetched with wget. """ - return ud.type in ['http', 'https', 'ftp'] + return ud.type in ['http', 'https', 'ftp', 'ftps'] def recommends_checksum(self, urldata): return True @@ -76,13 +87,19 @@ class Wget(FetchMethod): if not ud.localfile: ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) - self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate" + self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30" + + if ud.type == 'ftp' or ud.type == 'ftps': + self.basecmd += " --passive-ftp" + + if not self.check_certs(d): + self.basecmd += " --no-check-certificate" def _runwget(self, ud, d, command, quiet, workdir=None): progresshandler = WgetProgressHandler(d) - logger.debug(2, "Fetching %s using command '%s'" % (ud.url, command)) + logger.debug2("Fetching %s using command '%s'" % (ud.url, command)) bb.fetch2.check_network_access(d, command, ud.url) runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir) @@ -91,13 +108,22 @@ class Wget(FetchMethod): fetchcmd = self.basecmd - if 'downloadfilename' in ud.parm: - localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) - bb.utils.mkdirhier(os.path.dirname(localpath)) - fetchcmd += " -O %s" % shlex.quote(localpath) + localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp" + bb.utils.mkdirhier(os.path.dirname(localpath)) + fetchcmd += " -O %s" % shlex.quote(localpath) if ud.user and ud.pswd: - fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd) + fetchcmd += " --auth-no-challenge" + if ud.parm.get("redirectauth", "1") == "1": + # An undocumented feature of wget is that if the + # username/password are specified on the URI, wget will only + # send the Authorization header to the first host and not to + # any hosts that it is redirected to. With the increasing + # usage of temporary AWS URLs, this difference now matters as + # AWS will reject any request that has authentication both in + # the query parameters (from the redirect) and in the + # Authorization header. + fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd) uri = ud.url.split(";")[0] if os.path.exists(ud.localpath): @@ -110,13 +136,22 @@ class Wget(FetchMethod): # Sanity check since wget can pretend it succeed when it didn't # Also, this used to happen if sourceforge sent us to the mirror page - if not os.path.exists(ud.localpath): - raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri) + if not os.path.exists(localpath): + raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, localpath), uri) - if os.path.getsize(ud.localpath) == 0: - os.remove(ud.localpath) + if os.path.getsize(localpath) == 0: + os.remove(localpath) raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri) + # Try and verify any checksum now, meaning if it isn't correct, we don't remove the + # original file, which might be a race (imagine two recipes referencing the same + # source, one with an incorrect checksum) + bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False) + + # Remove the ".tmp" and move the file into position atomically + # Our lock prevents multiple writers but mirroring code may grab incomplete files + os.rename(localpath, localpath[:-4]) + return True def checkstatus(self, fetch, ud, d, try_again=True): @@ -203,15 +238,12 @@ class Wget(FetchMethod): # We let the request fail and expect it to be # tried once more ("try_again" in check_status()), # with the dead connection removed from the cache. - # If it still fails, we give up, which can happend for bad + # If it still fails, we give up, which can happen for bad # HTTP proxy settings. fetch.connection_cache.remove_connection(h.host, h.port) raise urllib.error.URLError(err) else: - try: - r = h.getresponse(buffering=True) - except TypeError: # buffering kw not supported - r = h.getresponse() + r = h.getresponse() # Pick apart the HTTPResponse object to get the addinfourl # object initialized properly. @@ -279,56 +311,76 @@ class Wget(FetchMethod): newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) newreq.get_method = req.get_method return newreq - exported_proxies = export_proxies(d) - - handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback] - if exported_proxies: - handlers.append(urllib.request.ProxyHandler()) - handlers.append(CacheHTTPHandler()) - # Since Python 2.7.9 ssl cert validation is enabled by default - # see PEP-0476, this causes verification errors on some https servers - # so disable by default. - import ssl - if hasattr(ssl, '_create_unverified_context'): - handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context())) - opener = urllib.request.build_opener(*handlers) - - try: - uri = ud.url.split(";")[0] - r = urllib.request.Request(uri) - r.get_method = lambda: "HEAD" - # Some servers (FusionForge, as used on Alioth) require that the - # optional Accept header is set. - r.add_header("Accept", "*/*") - r.add_header("User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12") - def add_basic_auth(login_str, request): - '''Adds Basic auth to http request, pass in login:password as string''' - import base64 - encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8") - authheader = "Basic %s" % encodeuser - r.add_header("Authorization", authheader) - - if ud.user and ud.pswd: - add_basic_auth(ud.user + ':' + ud.pswd, r) - try: - import netrc - n = netrc.netrc() - login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname) - add_basic_auth("%s:%s" % (login, password), r) - except (TypeError, ImportError, IOError, netrc.NetrcParseError): - pass - - with opener.open(r) as response: - pass - except urllib.error.URLError as e: - if try_again: - logger.debug(2, "checkstatus: trying again") - return self.checkstatus(fetch, ud, d, False) + # We need to update the environment here as both the proxy and HTTPS + # handlers need variables set. The proxy needs http_proxy and friends to + # be set, and HTTPSHandler ends up calling into openssl to load the + # certificates. In buildtools configurations this will be looking at the + # wrong place for certificates by default: we set SSL_CERT_FILE to the + # right location in the buildtools environment script but as BitBake + # prunes prunes the environment this is lost. When binaries are executed + # runfetchcmd ensures these values are in the environment, but this is + # pure Python so we need to update the environment. + # + # Avoid tramping the environment too much by using bb.utils.environment + # to scope the changes to the build_opener request, which is when the + # environment lookups happen. + newenv = bb.fetch2.get_fetcher_environment(d) + + with bb.utils.environment(**newenv): + import ssl + + if self.check_certs(d): + context = ssl.create_default_context() else: - # debug for now to avoid spamming the logs in e.g. remote sstate searches - logger.debug(2, "checkstatus() urlopen failed: %s" % e) - return False + context = ssl._create_unverified_context() + + handlers = [FixedHTTPRedirectHandler, + HTTPMethodFallback, + urllib.request.ProxyHandler(), + CacheHTTPHandler(), + urllib.request.HTTPSHandler(context=context)] + opener = urllib.request.build_opener(*handlers) + + try: + uri_base = ud.url.split(";")[0] + uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path) + r = urllib.request.Request(uri) + r.get_method = lambda: "HEAD" + # Some servers (FusionForge, as used on Alioth) require that the + # optional Accept header is set. + r.add_header("Accept", "*/*") + r.add_header("User-Agent", self.user_agent) + def add_basic_auth(login_str, request): + '''Adds Basic auth to http request, pass in login:password as string''' + import base64 + encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8") + authheader = "Basic %s" % encodeuser + r.add_header("Authorization", authheader) + + if ud.user and ud.pswd: + add_basic_auth(ud.user + ':' + ud.pswd, r) + + try: + import netrc + auth_data = netrc.netrc().authenticators(urllib.parse.urlparse(uri).hostname) + if auth_data: + login, _, password = auth_data + add_basic_auth("%s:%s" % (login, password), r) + except (FileNotFoundError, netrc.NetrcParseError): + pass + + with opener.open(r, timeout=30) as response: + pass + except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e: + if try_again: + logger.debug2("checkstatus: trying again") + return self.checkstatus(fetch, ud, d, False) + else: + # debug for now to avoid spamming the logs in e.g. remote sstate searches + logger.debug2("checkstatus() urlopen failed for %s: %s" % (uri,e)) + return False + return True def _parse_path(self, regex, s): @@ -404,9 +456,8 @@ class Wget(FetchMethod): """ f = tempfile.NamedTemporaryFile() with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: - agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12" fetchcmd = self.basecmd - fetchcmd += " -O " + f.name + " --user-agent='" + agent + "' '" + uri + "'" + fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'" try: self._runwget(ud, d, fetchcmd, True, workdir=workdir) fetchresult = f.read() @@ -462,7 +513,7 @@ class Wget(FetchMethod): version_dir = ['', '', ''] version = ['', '', ''] - dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))") + dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))") s = dirver_regex.search(dirver) if s: version_dir[1] = s.group('ver') @@ -538,7 +589,7 @@ class Wget(FetchMethod): # src.rpm extension was added only for rpm package. Can be removed if the rpm # packaged will always be considered as having to be manually upgraded - psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)" + psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)" # match name, version and archive type of a package package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)" @@ -589,10 +640,10 @@ class Wget(FetchMethod): # search for version matches on folders inside the path, like: # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/") - m = dirver_regex.search(path) + m = dirver_regex.findall(path) if m: pn = d.getVar('PN') - dirver = m.group('dirver') + dirver = m[-1][0] dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn))) if not dirver_pn_regex.search(dirver): |