diff options
Diffstat (limited to 'lib/bb/fetch2')
-rw-r--r-- | lib/bb/fetch2/README | 57 | ||||
-rw-r--r-- | lib/bb/fetch2/__init__.py | 772 | ||||
-rw-r--r-- | lib/bb/fetch2/az.py | 93 | ||||
-rw-r--r-- | lib/bb/fetch2/bzr.py | 27 | ||||
-rw-r--r-- | lib/bb/fetch2/clearcase.py | 33 | ||||
-rw-r--r-- | lib/bb/fetch2/crate.py | 150 | ||||
-rw-r--r-- | lib/bb/fetch2/cvs.py | 43 | ||||
-rw-r--r-- | lib/bb/fetch2/gcp.py | 102 | ||||
-rw-r--r-- | lib/bb/fetch2/git.py | 455 | ||||
-rw-r--r-- | lib/bb/fetch2/gitannex.py | 16 | ||||
-rw-r--r-- | lib/bb/fetch2/gitsm.py | 319 | ||||
-rw-r--r-- | lib/bb/fetch2/hg.py | 66 | ||||
-rw-r--r-- | lib/bb/fetch2/local.py | 49 | ||||
-rw-r--r-- | lib/bb/fetch2/npm.py | 572 | ||||
-rw-r--r-- | lib/bb/fetch2/npmsw.py | 313 | ||||
-rw-r--r-- | lib/bb/fetch2/osc.py | 75 | ||||
-rw-r--r-- | lib/bb/fetch2/perforce.py | 120 | ||||
-rw-r--r-- | lib/bb/fetch2/repo.py | 30 | ||||
-rw-r--r-- | lib/bb/fetch2/s3.py | 56 | ||||
-rw-r--r-- | lib/bb/fetch2/sftp.py | 17 | ||||
-rw-r--r-- | lib/bb/fetch2/ssh.py | 72 | ||||
-rw-r--r-- | lib/bb/fetch2/svn.py | 126 | ||||
-rw-r--r-- | lib/bb/fetch2/wget.py | 274 |
23 files changed, 2661 insertions, 1176 deletions
diff --git a/lib/bb/fetch2/README b/lib/bb/fetch2/README new file mode 100644 index 000000000..67b787ef4 --- /dev/null +++ b/lib/bb/fetch2/README @@ -0,0 +1,57 @@ +There are expectations of users of the fetcher code. This file attempts to document +some of the constraints that are present. Some are obvious, some are less so. It is +documented in the context of how OE uses it but the API calls are generic. + +a) network access for sources is only expected to happen in the do_fetch task. + This is not enforced or tested but is required so that we can: + + i) audit the sources used (i.e. for license/manifest reasons) + ii) support offline builds with a suitable cache + iii) allow work to continue even with downtime upstream + iv) allow for changes upstream in incompatible ways + v) allow rebuilding of the software in X years time + +b) network access is not expected in do_unpack task. + +c) you can take DL_DIR and use it as a mirror for offline builds. + +d) access to the network is only made when explicitly configured in recipes + (e.g. use of AUTOREV, or use of git tags which change revision). + +e) fetcher output is deterministic (i.e. if you fetch configuration XXX now it + will match in future exactly in a clean build with a new DL_DIR). + One specific pain point example are git tags. They can be replaced and change + so the git fetcher has to resolve them with the network. We use git revisions + where possible to avoid this and ensure determinism. + +f) network access is expected to work with the standard linux proxy variables + so that access behind firewalls works (the fetcher sets these in the + environment but only in the do_fetch tasks). + +g) access during parsing has to be minimal, a "git ls-remote" for an AUTOREV + git recipe might be ok but you can't expect to checkout a git tree. + +h) we need to provide revision information during parsing such that a version + for the recipe can be constructed. + +i) versions are expected to be able to increase in a way which sorts allowing + package feeds to operate (see PR server required for git revisions to sort). + +j) API to query for possible version upgrades of a url is highly desireable to + allow our automated upgrage code to function (it is implied this does always + have network access). + +k) Where fixes or changes to behaviour in the fetcher are made, we ask that + test cases are added (run with "bitbake-selftest bb.tests.fetch"). We do + have fairly extensive test coverage of the fetcher as it is the only way + to track all of its corner cases, it still doesn't give entire coverage + though sadly. + +l) If using tools during parse time, they will have to be in ASSUME_PROVIDED + in OE's context as we can't build git-native, then parse a recipe and use + git ls-remote. + +Not all fetchers support all features, autorev is optional and doesn't make +sense for some. Upgrade detection means different things in different contexts +too. + diff --git a/lib/bb/fetch2/__init__.py b/lib/bb/fetch2/__init__.py index 72d6092de..5bf2c4b8c 100644 --- a/lib/bb/fetch2/__init__.py +++ b/lib/bb/fetch2/__init__.py @@ -1,5 +1,3 @@ -# ex:ts=4:sw=4:sts=4:et -# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- """ BitBake 'Fetch' implementations @@ -10,18 +8,7 @@ BitBake build tools. # Copyright (C) 2003, 2004 Chris Larson # Copyright (C) 2012 Intel Corporation # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# SPDX-License-Identifier: GPL-2.0-only # # Based on functions from the base bb module, Copyright 2003 Holger Schurig @@ -46,6 +33,9 @@ _checksum_cache = bb.checksum.FileChecksumCache() logger = logging.getLogger("BitBake.Fetcher") +CHECKSUM_LIST = [ "md5", "sha256", "sha1", "sha384", "sha512" ] +SHOWN_CHECKSUM_LIST = ["sha256"] + class BBFetchException(Exception): """Class all fetch exceptions inherit from""" def __init__(self, message): @@ -123,7 +113,7 @@ class MissingParameterError(BBFetchException): self.args = (missing, url) class ParameterError(BBFetchException): - """Exception raised when a url cannot be proccessed due to invalid parameters.""" + """Exception raised when a url cannot be processed due to invalid parameters.""" def __init__(self, message, url): msg = "URL: '%s' has invalid parameters. %s" % (url, message) self.url = url @@ -144,10 +134,9 @@ class NonLocalMethod(Exception): Exception.__init__(self) class MissingChecksumEvent(bb.event.Event): - def __init__(self, url, md5sum, sha256sum): + def __init__(self, url, **checksums): self.url = url - self.checksums = {'md5sum': md5sum, - 'sha256sum': sha256sum} + self.checksums = checksums bb.event.Event.__init__(self) @@ -193,7 +182,7 @@ class URI(object): Some notes about relative URIs: while it's specified that a URI beginning with <scheme>:// should either be directly followed by a hostname or a /, the old URI handling of the - fetch2 library did not comform to this. Therefore, this URI + fetch2 library did not conform to this. Therefore, this URI class has some kludges to make sure that URIs are parsed in a way comforming to bitbake's current usage. This URI class supports the following: @@ -210,7 +199,7 @@ class URI(object): file://hostname/absolute/path.diff (would be IETF compliant) Note that the last case only applies to a list of - "whitelisted" schemes (currently only file://), that requires + explicitly allowed schemes (currently only file://), that requires its URIs to not have a network location. """ @@ -256,7 +245,7 @@ class URI(object): # Identify if the URI is relative or not if urlp.scheme in self._relative_schemes and \ - re.compile("^\w+:(?!//)").match(uri): + re.compile(r"^\w+:(?!//)").match(uri): self.relative = True if not self.relative: @@ -301,12 +290,12 @@ class URI(object): def _param_str_split(self, string, elmdelim, kvdelim="="): ret = collections.OrderedDict() - for k, v in [x.split(kvdelim, 1) for x in string.split(elmdelim)]: + for k, v in [x.split(kvdelim, 1) if kvdelim in x else (x, None) for x in string.split(elmdelim) if x]: ret[k] = v return ret def _param_str_join(self, dict_, elmdelim, kvdelim="="): - return elmdelim.join([kvdelim.join([k, v]) for k, v in dict_.items()]) + return elmdelim.join([kvdelim.join([k, v]) if v else k for k, v in dict_.items()]) @property def hostport(self): @@ -383,7 +372,7 @@ def decodeurl(url): path = location else: host = location - path = "" + path = "/" if user: m = re.compile('(?P<user>[^:]+)(:?(?P<pswd>.*))').match(user) if m: @@ -399,7 +388,7 @@ def decodeurl(url): if s: if not '=' in s: raise MalformedUrl(url, "The URL: '%s' is invalid: parameter %s does not specify a value (missing '=')" % (url, s)) - s1, s2 = s.split('=') + s1, s2 = s.split('=', 1) p[s1] = s2 return type, host, urllib.parse.unquote(path), user, pswd, p @@ -413,24 +402,24 @@ def encodeurl(decoded): if not type: raise MissingParameterError('type', "encoded from the data %s" % str(decoded)) - url = '%s://' % type + url = ['%s://' % type] if user and type != "file": - url += "%s" % user + url.append("%s" % user) if pswd: - url += ":%s" % pswd - url += "@" + url.append(":%s" % pswd) + url.append("@") if host and type != "file": - url += "%s" % host + url.append("%s" % host) if path: # Standardise path to ensure comparisons work while '//' in path: path = path.replace("//", "/") - url += "%s" % urllib.parse.quote(path) + url.append("%s" % urllib.parse.quote(path)) if p: for parm in p: - url += ";%s=%s" % (parm, p[parm]) + url.append(";%s=%s" % (parm, p[parm])) - return url + return "".join(url) def uri_replace(ud, uri_find, uri_replace, replacements, d, mirrortarball=None): if not ud.url or not uri_find or not uri_replace: @@ -439,8 +428,9 @@ def uri_replace(ud, uri_find, uri_replace, replacements, d, mirrortarball=None): uri_decoded = list(decodeurl(ud.url)) uri_find_decoded = list(decodeurl(uri_find)) uri_replace_decoded = list(decodeurl(uri_replace)) - logger.debug(2, "For url %s comparing %s to %s" % (uri_decoded, uri_find_decoded, uri_replace_decoded)) + logger.debug2("For url %s comparing %s to %s" % (uri_decoded, uri_find_decoded, uri_replace_decoded)) result_decoded = ['', '', '', '', '', {}] + # 0 - type, 1 - host, 2 - path, 3 - user, 4- pswd, 5 - params for loc, i in enumerate(uri_find_decoded): result_decoded[loc] = uri_decoded[loc] regexp = i @@ -452,14 +442,17 @@ def uri_replace(ud, uri_find, uri_replace, replacements, d, mirrortarball=None): # Handle URL parameters if i: # Any specified URL parameters must match - for k in uri_replace_decoded[loc]: - if uri_decoded[loc][k] != uri_replace_decoded[loc][k]: + for k in uri_find_decoded[loc]: + if uri_decoded[loc][k] != uri_find_decoded[loc][k]: return None # Overwrite any specified replacement parameters for k in uri_replace_decoded[loc]: for l in replacements: uri_replace_decoded[loc][k] = uri_replace_decoded[loc][k].replace(l, replacements[l]) result_decoded[loc][k] = uri_replace_decoded[loc][k] + elif (loc == 3 or loc == 4) and uri_replace_decoded[loc]: + # User/password in the replacement is just a straight replacement + result_decoded[loc] = uri_replace_decoded[loc] elif (re.match(regexp, uri_decoded[loc])): if not uri_replace_decoded[loc]: result_decoded[loc] = "" @@ -476,16 +469,24 @@ def uri_replace(ud, uri_find, uri_replace, replacements, d, mirrortarball=None): basename = os.path.basename(mirrortarball) # Kill parameters, they make no sense for mirror tarballs uri_decoded[5] = {} + uri_find_decoded[5] = {} elif ud.localpath and ud.method.supports_checksum(ud): basename = os.path.basename(ud.localpath) - if basename and not result_decoded[loc].endswith(basename): - result_decoded[loc] = os.path.join(result_decoded[loc], basename) + if basename: + uri_basename = os.path.basename(uri_decoded[loc]) + # Prefix with a slash as a sentinel in case + # result_decoded[loc] does not contain one. + path = "/" + result_decoded[loc] + if uri_basename and basename != uri_basename and path.endswith("/" + uri_basename): + result_decoded[loc] = path[1:-len(uri_basename)] + basename + elif not path.endswith("/" + basename): + result_decoded[loc] = os.path.join(path[1:], basename) else: return None result = encodeurl(result_decoded) if result == ud.url: return None - logger.debug(2, "For url %s returning %s" % (ud.url, result)) + logger.debug2("For url %s returning %s" % (ud.url, result)) return result methods = [] @@ -497,22 +498,27 @@ def fetcher_init(d): Called to initialize the fetchers once the configuration data is known. Calls before this must not hit the cache. """ + + revs = bb.persist_data.persist('BB_URI_HEADREVS', d) + try: + # fetcher_init is called multiple times, so make sure we only save the + # revs the first time it is called. + if not bb.fetch2.saved_headrevs: + bb.fetch2.saved_headrevs = dict(revs) + except: + pass + # When to drop SCM head revisions controlled by user policy srcrev_policy = d.getVar('BB_SRCREV_POLICY') or "clear" if srcrev_policy == "cache": - logger.debug(1, "Keeping SRCREV cache due to cache policy of: %s", srcrev_policy) + logger.debug("Keeping SRCREV cache due to cache policy of: %s", srcrev_policy) elif srcrev_policy == "clear": - logger.debug(1, "Clearing SRCREV cache due to cache policy of: %s", srcrev_policy) - revs = bb.persist_data.persist('BB_URI_HEADREVS', d) - try: - bb.fetch2.saved_headrevs = revs.items() - except: - pass + logger.debug("Clearing SRCREV cache due to cache policy of: %s", srcrev_policy) revs.clear() else: raise FetchError("Invalid SRCREV cache policy of: %s" % srcrev_policy) - _checksum_cache.init_cache(d) + _checksum_cache.init_cache(d.getVar("BB_CACHEDIR")) for m in methods: if hasattr(m, "init"): @@ -524,24 +530,14 @@ def fetcher_parse_save(): def fetcher_parse_done(): _checksum_cache.save_merge() -def fetcher_compare_revisions(): +def fetcher_compare_revisions(d): """ - Compare the revisions in the persistant cache with current values and - return true/false on whether they've changed. + Compare the revisions in the persistent cache with the saved values from + when bitbake was started and return true if they have changed. """ - data = bb.persist_data.persist('BB_URI_HEADREVS', d).items() - data2 = bb.fetch2.saved_headrevs - - changed = False - for key in data: - if key not in data2 or data2[key] != data[key]: - logger.debug(1, "%s changed", key) - changed = True - return True - else: - logger.debug(2, "%s did not change", key) - return False + headrevs = dict(bb.persist_data.persist('BB_URI_HEADREVS', d)) + return headrevs != bb.fetch2.saved_headrevs def mirror_from_string(data): mirrors = (data or "").replace('\\n',' ').split() @@ -550,7 +546,7 @@ def mirror_from_string(data): bb.warn('Invalid mirror data %s, should have paired members.' % data) return list(zip(*[iter(mirrors)]*2)) -def verify_checksum(ud, d, precomputed={}): +def verify_checksum(ud, d, precomputed={}, localpath=None, fatal_nochecksum=True): """ verify the MD5 and SHA256 checksum for downloaded src @@ -564,72 +560,86 @@ def verify_checksum(ud, d, precomputed={}): file against those in the recipe each time, rather than only after downloading. See https://bugzilla.yoctoproject.org/show_bug.cgi?id=5571. """ - - _MD5_KEY = "md5" - _SHA256_KEY = "sha256" - if ud.ignore_checksums or not ud.method.supports_checksum(ud): return {} - if _MD5_KEY in precomputed: - md5data = precomputed[_MD5_KEY] - else: - md5data = bb.utils.md5_file(ud.localpath) + if localpath is None: + localpath = ud.localpath - if _SHA256_KEY in precomputed: - sha256data = precomputed[_SHA256_KEY] - else: - sha256data = bb.utils.sha256_file(ud.localpath) + def compute_checksum_info(checksum_id): + checksum_name = getattr(ud, "%s_name" % checksum_id) - if ud.method.recommends_checksum(ud) and not ud.md5_expected and not ud.sha256_expected: - # If strict checking enabled and neither sum defined, raise error + if checksum_id in precomputed: + checksum_data = precomputed[checksum_id] + else: + checksum_data = getattr(bb.utils, "%s_file" % checksum_id)(localpath) + + checksum_expected = getattr(ud, "%s_expected" % checksum_id) + + if checksum_expected == '': + checksum_expected = None + + return { + "id": checksum_id, + "name": checksum_name, + "data": checksum_data, + "expected": checksum_expected + } + + checksum_infos = [] + for checksum_id in CHECKSUM_LIST: + checksum_infos.append(compute_checksum_info(checksum_id)) + + checksum_dict = {ci["id"] : ci["data"] for ci in checksum_infos} + checksum_event = {"%ssum" % ci["id"] : ci["data"] for ci in checksum_infos} + + for ci in checksum_infos: + if ci["id"] in SHOWN_CHECKSUM_LIST: + checksum_lines = ["SRC_URI[%s] = \"%s\"" % (ci["name"], ci["data"])] + + # If no checksum has been provided + if fatal_nochecksum and ud.method.recommends_checksum(ud) and all(ci["expected"] is None for ci in checksum_infos): + messages = [] strict = d.getVar("BB_STRICT_CHECKSUM") or "0" + + # If strict checking enabled and neither sum defined, raise error if strict == "1": - logger.error('No checksum specified for %s, please add at least one to the recipe:\n' - 'SRC_URI[%s] = "%s"\nSRC_URI[%s] = "%s"' % - (ud.localpath, ud.md5_name, md5data, - ud.sha256_name, sha256data)) - raise NoChecksumError('Missing SRC_URI checksum', ud.url) + raise NoChecksumError("\n".join(checksum_lines)) - bb.event.fire(MissingChecksumEvent(ud.url, md5data, sha256data), d) + bb.event.fire(MissingChecksumEvent(ud.url, **checksum_event), d) if strict == "ignore": - return { - _MD5_KEY: md5data, - _SHA256_KEY: sha256data - } + return checksum_dict # Log missing sums so user can more easily add them - logger.warning('Missing md5 SRC_URI checksum for %s, consider adding to the recipe:\n' - 'SRC_URI[%s] = "%s"', - ud.localpath, ud.md5_name, md5data) - logger.warning('Missing sha256 SRC_URI checksum for %s, consider adding to the recipe:\n' - 'SRC_URI[%s] = "%s"', - ud.localpath, ud.sha256_name, sha256data) + messages.append("Missing checksum for '%s', consider adding at " \ + "least one to the recipe:" % ud.localpath) + messages.extend(checksum_lines) + logger.warning("\n".join(messages)) # We want to alert the user if a checksum is defined in the recipe but # it does not match. - msg = "" - mismatch = False - if ud.md5_expected and ud.md5_expected != md5data: - msg = msg + "\nFile: '%s' has %s checksum %s when %s was expected" % (ud.localpath, 'md5', md5data, ud.md5_expected) - mismatch = True; - - if ud.sha256_expected and ud.sha256_expected != sha256data: - msg = msg + "\nFile: '%s' has %s checksum %s when %s was expected" % (ud.localpath, 'sha256', sha256data, ud.sha256_expected) - mismatch = True; - - if mismatch: - msg = msg + '\nIf this change is expected (e.g. you have upgraded to a new version without updating the checksums) then you can use these lines within the recipe:\nSRC_URI[%s] = "%s"\nSRC_URI[%s] = "%s"\nOtherwise you should retry the download and/or check with upstream to determine if the file has become corrupted or otherwise unexpectedly modified.\n' % (ud.md5_name, md5data, ud.sha256_name, sha256data) - - if len(msg): - raise ChecksumError('Checksum mismatch!%s' % msg, ud.url, md5data) - - return { - _MD5_KEY: md5data, - _SHA256_KEY: sha256data - } - + messages = [] + messages.append("Checksum mismatch!") + bad_checksum = None + + for ci in checksum_infos: + if ci["expected"] and ci["expected"] != ci["data"]: + messages.append("File: '%s' has %s checksum '%s' when '%s' was " \ + "expected" % (localpath, ci["id"], ci["data"], ci["expected"])) + bad_checksum = ci["data"] + + if bad_checksum: + messages.append("If this change is expected (e.g. you have upgraded " \ + "to a new version without updating the checksums) " \ + "then you can use these lines within the recipe:") + messages.extend(checksum_lines) + messages.append("Otherwise you should retry the download and/or " \ + "check with upstream to determine if the file has " \ + "become corrupted or otherwise unexpectedly modified.") + raise ChecksumError("\n".join(messages), ud.url, bad_checksum) + + return checksum_dict def verify_donestamp(ud, d, origud=None): """ @@ -734,13 +744,16 @@ def subprocess_setup(): # SIGPIPE errors are known issues with gzip/bash signal.signal(signal.SIGPIPE, signal.SIG_DFL) -def get_autorev(d): - # only not cache src rev in autorev case +def mark_recipe_nocache(d): if d.getVar('BB_SRCREV_POLICY') != "cache": d.setVar('BB_DONT_CACHE', '1') + +def get_autorev(d): + mark_recipe_nocache(d) + d.setVar("__BBAUTOREV_SEEN", True) return "AUTOINC" -def get_srcrev(d, method_name='sortable_revision'): +def _get_srcrev(d, method_name='sortable_revision'): """ Return the revision string, usually for use in the version string (PV) of the current package Most packages usually only have one SCM so we just pass on the call. @@ -754,30 +767,42 @@ def get_srcrev(d, method_name='sortable_revision'): that fetcher provides a method with the given name and the same signature as sortable_revision. """ + d.setVar("__BBSRCREV_SEEN", "1") + recursion = d.getVar("__BBINSRCREV") + if recursion: + raise FetchError("There are recursive references in fetcher variables, likely through SRC_URI") + d.setVar("__BBINSRCREV", True) + scms = [] + revs = [] fetcher = Fetch(d.getVar('SRC_URI').split(), d) urldata = fetcher.ud for u in urldata: if urldata[u].method.supports_srcrev(): scms.append(u) - if len(scms) == 0: - raise FetchError("SRCREV was used yet no valid SCM was found in SRC_URI") + if not scms: + d.delVar("__BBINSRCREV") + return "", revs + if len(scms) == 1 and len(urldata[scms[0]].names) == 1: autoinc, rev = getattr(urldata[scms[0]].method, method_name)(urldata[scms[0]], d, urldata[scms[0]].names[0]) + revs.append(rev) if len(rev) > 10: rev = rev[:10] + d.delVar("__BBINSRCREV") if autoinc: - return "AUTOINC+" + rev - return rev + return "AUTOINC+" + rev, revs + return rev, revs # # Mutiple SCMs are in SRC_URI so we resort to SRCREV_FORMAT # format = d.getVar('SRCREV_FORMAT') if not format: - raise FetchError("The SRCREV_FORMAT variable must be set when multiple SCMs are used.") + raise FetchError("The SRCREV_FORMAT variable must be set when multiple SCMs are used.\n"\ + "The SCMs are:\n%s" % '\n'.join(scms)) name_to_rev = {} seenautoinc = False @@ -785,6 +810,7 @@ def get_srcrev(d, method_name='sortable_revision'): ud = urldata[scm] for name in ud.names: autoinc, rev = getattr(ud.method, method_name)(ud, d, name) + revs.append(rev) seenautoinc = seenautoinc or autoinc if len(rev) > 10: rev = rev[:10] @@ -801,12 +827,70 @@ def get_srcrev(d, method_name='sortable_revision'): if seenautoinc: format = "AUTOINC+" + format - return format + d.delVar("__BBINSRCREV") + return format, revs + +def get_hashvalue(d, method_name='sortable_revision'): + pkgv, revs = _get_srcrev(d, method_name=method_name) + return " ".join(revs) + +def get_pkgv_string(d, method_name='sortable_revision'): + pkgv, revs = _get_srcrev(d, method_name=method_name) + return pkgv + +def get_srcrev(d, method_name='sortable_revision'): + pkgv, revs = _get_srcrev(d, method_name=method_name) + if not pkgv: + raise FetchError("SRCREV was used yet no valid SCM was found in SRC_URI") + return pkgv def localpath(url, d): fetcher = bb.fetch2.Fetch([url], d) return fetcher.localpath(url) +# Need to export PATH as binary could be in metadata paths +# rather than host provided +# Also include some other variables. +FETCH_EXPORT_VARS = ['HOME', 'PATH', + 'HTTP_PROXY', 'http_proxy', + 'HTTPS_PROXY', 'https_proxy', + 'FTP_PROXY', 'ftp_proxy', + 'FTPS_PROXY', 'ftps_proxy', + 'NO_PROXY', 'no_proxy', + 'ALL_PROXY', 'all_proxy', + 'GIT_PROXY_COMMAND', + 'GIT_SSH', + 'GIT_SSH_COMMAND', + 'GIT_SSL_CAINFO', + 'GIT_SMART_HTTP', + 'SSH_AUTH_SOCK', 'SSH_AGENT_PID', + 'SOCKS5_USER', 'SOCKS5_PASSWD', + 'DBUS_SESSION_BUS_ADDRESS', + 'P4CONFIG', + 'SSL_CERT_FILE', + 'NODE_EXTRA_CA_CERTS', + 'AWS_PROFILE', + 'AWS_ACCESS_KEY_ID', + 'AWS_SECRET_ACCESS_KEY', + 'AWS_ROLE_ARN', + 'AWS_WEB_IDENTITY_TOKEN_FILE', + 'AWS_DEFAULT_REGION', + 'AWS_SESSION_TOKEN', + 'GIT_CACHE_PATH', + 'REMOTE_CONTAINERS_IPC', + 'SSL_CERT_DIR'] + +def get_fetcher_environment(d): + newenv = {} + origenv = d.getVar("BB_ORIGENV") + for name in bb.fetch2.FETCH_EXPORT_VARS: + value = d.getVar(name) + if not value and origenv: + value = origenv.getVar(name) + if value: + newenv[name] = value + return newenv + def runfetchcmd(cmd, d, quiet=False, cleanup=None, log=None, workdir=None): """ Run cmd returning the command output @@ -815,36 +899,21 @@ def runfetchcmd(cmd, d, quiet=False, cleanup=None, log=None, workdir=None): Optionally remove the files/directories listed in cleanup upon failure """ - # Need to export PATH as binary could be in metadata paths - # rather than host provided - # Also include some other variables. - # FIXME: Should really include all export varaiables? - exportvars = ['HOME', 'PATH', - 'HTTP_PROXY', 'http_proxy', - 'HTTPS_PROXY', 'https_proxy', - 'FTP_PROXY', 'ftp_proxy', - 'FTPS_PROXY', 'ftps_proxy', - 'NO_PROXY', 'no_proxy', - 'ALL_PROXY', 'all_proxy', - 'GIT_PROXY_COMMAND', - 'GIT_SSL_CAINFO', - 'GIT_SMART_HTTP', - 'SSH_AUTH_SOCK', 'SSH_AGENT_PID', - 'SOCKS5_USER', 'SOCKS5_PASSWD', - 'DBUS_SESSION_BUS_ADDRESS', - 'P4CONFIG'] + exportvars = FETCH_EXPORT_VARS if not cleanup: cleanup = [] - # If PATH contains WORKDIR which contains PV which contains SRCPV we + # If PATH contains WORKDIR which contains PV-PR which contains SRCPV we # can end up in circular recursion here so give the option of breaking it # in a data store copy. try: d.getVar("PV") + d.getVar("PR") except bb.data_smart.ExpansionError: d = bb.data.createCopy(d) d.setVar("PV", "fetcheravoidrecurse") + d.setVar("PR", "fetcheravoidrecurse") origenv = d.getVar("BB_ORIGENV", False) for var in exportvars: @@ -855,7 +924,10 @@ def runfetchcmd(cmd, d, quiet=False, cleanup=None, log=None, workdir=None): # Disable pseudo as it may affect ssh, potentially causing it to hang. cmd = 'export PSEUDO_DISABLED=1; ' + cmd - logger.debug(1, "Running %s", cmd) + if workdir: + logger.debug("Running '%s' in %s" % (cmd, workdir)) + else: + logger.debug("Running %s", cmd) success = False error_message = "" @@ -864,14 +936,17 @@ def runfetchcmd(cmd, d, quiet=False, cleanup=None, log=None, workdir=None): (output, errors) = bb.process.run(cmd, log=log, shell=True, stderr=subprocess.PIPE, cwd=workdir) success = True except bb.process.NotFoundError as e: - error_message = "Fetch command %s" % (e.command) + error_message = "Fetch command %s not found" % (e.command) except bb.process.ExecutionError as e: if e.stdout: output = "output:\n%s\n%s" % (e.stdout, e.stderr) elif e.stderr: output = "output:\n%s" % e.stderr else: - output = "no output" + if log: + output = "see logfile for output" + else: + output = "no output" error_message = "Fetch command %s failed with exit code %s, %s" % (e.command, e.exitcode, output) except bb.process.CmdError as e: error_message = "Fetch command %s could not be run:\n%s" % (e.command, e.msg) @@ -891,12 +966,12 @@ def check_network_access(d, info, url): log remote network access, and error if BB_NO_NETWORK is set or the given URI is untrusted """ - if d.getVar("BB_NO_NETWORK") == "1": + if bb.utils.to_boolean(d.getVar("BB_NO_NETWORK")): raise NetworkAccess(url, info) elif not trusted_network(d, url): raise UntrustedUrl(url, info) else: - logger.debug(1, "Fetcher accessed the network with the command %s" % info) + logger.debug("Fetcher accessed the network with the command %s" % info) def build_mirroruris(origud, mirrors, ld): uris = [] @@ -922,7 +997,7 @@ def build_mirroruris(origud, mirrors, ld): continue if not trusted_network(ld, newuri): - logger.debug(1, "Mirror %s not in the list of trusted networks, skipping" % (newuri)) + logger.debug("Mirror %s not in the list of trusted networks, skipping" % (newuri)) continue # Create a local copy of the mirrors minus the current line @@ -933,10 +1008,11 @@ def build_mirroruris(origud, mirrors, ld): try: newud = FetchData(newuri, ld) + newud.ignore_checksums = True newud.setup_localpath(ld) except bb.fetch2.BBFetchException as e: - logger.debug(1, "Mirror fetch failure for url %s (original url: %s)" % (newuri, origud.url)) - logger.debug(1, str(e)) + logger.debug("Mirror fetch failure for url %s (original url: %s)" % (newuri, origud.url)) + logger.debug(str(e)) try: # setup_localpath of file:// urls may fail, we should still see # if mirrors of the url exist @@ -963,7 +1039,8 @@ def rename_bad_checksum(ud, suffix): new_localpath = "%s_bad-checksum_%s" % (ud.localpath, suffix) bb.warn("Renaming %s to %s" % (ud.localpath, new_localpath)) - bb.utils.movefile(ud.localpath, new_localpath) + if not bb.utils.movefile(ud.localpath, new_localpath): + bb.warn("Renaming %s to %s failed, grep movefile in log.do_fetch to see why" % (ud.localpath, new_localpath)) def try_mirror_url(fetch, origud, ud, ld, check = False): @@ -1016,16 +1093,7 @@ def try_mirror_url(fetch, origud, ud, ld, check = False): origud.method.build_mirror_data(origud, ld) return origud.localpath # Otherwise the result is a local file:// and we symlink to it - if not os.path.exists(origud.localpath): - if os.path.islink(origud.localpath): - # Broken symbolic link - os.unlink(origud.localpath) - - # As per above, in case two tasks end up here simultaneously. - try: - os.symlink(ud.localpath, origud.localpath) - except FileExistsError: - pass + ensure_symlink(ud.localpath, origud.localpath) update_stamp(origud, ld) return ud.localpath @@ -1033,7 +1101,7 @@ def try_mirror_url(fetch, origud, ud, ld, check = False): raise except IOError as e: - if e.errno in [os.errno.ESTALE]: + if e.errno in [errno.ESTALE]: logger.warning("Stale Error Observed %s." % ud.url) return False raise @@ -1047,10 +1115,11 @@ def try_mirror_url(fetch, origud, ud, ld, check = False): elif isinstance(e, NoChecksumError): raise else: - logger.debug(1, "Mirror fetch failure for url %s (original url: %s)" % (ud.url, origud.url)) - logger.debug(1, str(e)) + logger.debug("Mirror fetch failure for url %s (original url: %s)" % (ud.url, origud.url)) + logger.debug(str(e)) try: - ud.method.clean(ud, ld) + if ud.method.cleanup_upon_failure(): + ud.method.clean(ud, ld) except UnboundLocalError: pass return False @@ -1059,6 +1128,24 @@ def try_mirror_url(fetch, origud, ud, ld, check = False): bb.utils.unlockfile(lf) +def ensure_symlink(target, link_name): + if not os.path.exists(link_name): + dirname = os.path.dirname(link_name) + bb.utils.mkdirhier(dirname) + if os.path.islink(link_name): + # Broken symbolic link + os.unlink(link_name) + + # In case this is executing without any file locks held (as is + # the case for file:// URLs), two tasks may end up here at the + # same time, in which case we do not want the second task to + # fail when the link has already been created by the first task. + try: + os.symlink(target, link_name) + except FileExistsError: + pass + + def try_mirrors(fetch, d, origud, mirrors, check = False): """ Try to use a mirrored version of the sources. @@ -1074,7 +1161,7 @@ def try_mirrors(fetch, d, origud, mirrors, check = False): for index, uri in enumerate(uris): ret = try_mirror_url(fetch, origud, uds[index], ld, check) - if ret != False: + if ret: return ret return None @@ -1084,11 +1171,13 @@ def trusted_network(d, url): BB_ALLOWED_NETWORKS is set globally or for a specific recipe. Note: modifies SRC_URI & mirrors. """ - if d.getVar('BB_NO_NETWORK') == "1": + if bb.utils.to_boolean(d.getVar("BB_NO_NETWORK")): return True pkgname = d.expand(d.getVar('PN', False)) - trusted_hosts = d.getVarFlag('BB_ALLOWED_NETWORKS', pkgname, False) + trusted_hosts = None + if pkgname: + trusted_hosts = d.getVarFlag('BB_ALLOWED_NETWORKS', pkgname, False) if not trusted_hosts: trusted_hosts = d.getVar('BB_ALLOWED_NETWORKS') @@ -1126,11 +1215,11 @@ def srcrev_internal_helper(ud, d, name): pn = d.getVar("PN") attempts = [] if name != '' and pn: - attempts.append("SRCREV_%s_pn-%s" % (name, pn)) + attempts.append("SRCREV_%s:pn-%s" % (name, pn)) if name != '': attempts.append("SRCREV_%s" % name) if pn: - attempts.append("SRCREV_pn-%s" % pn) + attempts.append("SRCREV:pn-%s" % pn) attempts.append("SRCREV") for a in attempts: @@ -1155,6 +1244,7 @@ def srcrev_internal_helper(ud, d, name): if srcrev == "INVALID" or not srcrev: raise FetchError("Please set a valid SRCREV for url %s (possible key names are %s, or use a ;rev=X URL parameter)" % (str(attempts), ud.url), ud.url) if srcrev == "AUTOINC": + d.setVar("__BBAUTOREV_ACTED_UPON", True) srcrev = ud.method.latest_revision(ud, d, name) return srcrev @@ -1166,36 +1256,32 @@ def get_checksum_file_list(d): SRC_URI as a space-separated string """ fetch = Fetch([], d, cache = False, localonly = True) - - dl_dir = d.getVar('DL_DIR') filelist = [] for u in fetch.urls: ud = fetch.ud[u] - if ud and isinstance(ud.method, local.Local): - paths = ud.method.localpaths(ud, d) + found = False + paths = ud.method.localfile_searchpaths(ud, d) for f in paths: pth = ud.decodedurl - if '*' in pth: - f = os.path.join(os.path.abspath(f), pth) - if f.startswith(dl_dir): - # The local fetcher's behaviour is to return a path under DL_DIR if it couldn't find the file anywhere else - if os.path.exists(f): - bb.warn("Getting checksum for %s SRC_URI entry %s: file not found except in DL_DIR" % (d.getVar('PN'), os.path.basename(f))) - else: - bb.warn("Unable to get checksum for %s SRC_URI entry %s: file could not be found" % (d.getVar('PN'), os.path.basename(f))) + if os.path.exists(f): + found = True filelist.append(f + ":" + str(os.path.exists(f))) + if not found: + bb.fatal(("Unable to get checksum for %s SRC_URI entry %s: file could not be found" + "\nThe following paths were searched:" + "\n%s") % (d.getVar('PN'), os.path.basename(f), '\n'.join(paths))) return " ".join(filelist) -def get_file_checksums(filelist, pn): +def get_file_checksums(filelist, pn, localdirsexclude): """Get a list of the checksums for a list of local files Returns the checksums for a list of local files, caching the results as it proceeds """ - return _checksum_cache.get_checksums(filelist, pn) + return _checksum_cache.get_checksums(filelist, pn, localdirsexclude) class FetchData(object): @@ -1221,25 +1307,22 @@ class FetchData(object): self.pswd = self.parm["pswd"] self.setup = False - if "name" in self.parm: - self.md5_name = "%s.md5sum" % self.parm["name"] - self.sha256_name = "%s.sha256sum" % self.parm["name"] - else: - self.md5_name = "md5sum" - self.sha256_name = "sha256sum" - if self.md5_name in self.parm: - self.md5_expected = self.parm[self.md5_name] - elif self.type not in ["http", "https", "ftp", "ftps", "sftp", "s3"]: - self.md5_expected = None - else: - self.md5_expected = d.getVarFlag("SRC_URI", self.md5_name) - if self.sha256_name in self.parm: - self.sha256_expected = self.parm[self.sha256_name] - elif self.type not in ["http", "https", "ftp", "ftps", "sftp", "s3"]: - self.sha256_expected = None - else: - self.sha256_expected = d.getVarFlag("SRC_URI", self.sha256_name) - self.ignore_checksums = False + def configure_checksum(checksum_id): + if "name" in self.parm: + checksum_name = "%s.%ssum" % (self.parm["name"], checksum_id) + else: + checksum_name = "%ssum" % checksum_id + + setattr(self, "%s_name" % checksum_id, checksum_name) + + if checksum_name in self.parm: + checksum_expected = self.parm[checksum_name] + elif self.type not in ["http", "https", "ftp", "ftps", "sftp", "s3", "az", "crate", "gs"]: + checksum_expected = None + else: + checksum_expected = d.getVarFlag("SRC_URI", checksum_name) + + setattr(self, "%s_expected" % checksum_id, checksum_expected) self.names = self.parm.get("name",'default').split(',') @@ -1262,6 +1345,11 @@ class FetchData(object): if hasattr(self.method, "urldata_init"): self.method.urldata_init(self, d) + for checksum_id in CHECKSUM_LIST: + configure_checksum(checksum_id) + + self.ignore_checksums = False + if "localpath" in self.parm: # if user sets localpath for file, use it instead. self.localpath = self.parm["localpath"] @@ -1341,12 +1429,12 @@ class FetchMethod(object): Is localpath something that can be represented by a checksum? """ - # We cannot compute checksums for directories - if os.path.isdir(urldata.localpath) == True: + # We cannot compute checksums for None + if urldata.localpath is None: return False - if urldata.localpath.find("*") != -1: + # We cannot compute checksums for directories + if os.path.isdir(urldata.localpath): return False - return True def recommends_checksum(self, urldata): @@ -1356,6 +1444,24 @@ class FetchMethod(object): """ return False + def cleanup_upon_failure(self): + """ + When a fetch fails, should clean() be called? + """ + return True + + def verify_donestamp(self, ud, d): + """ + Verify the donestamp file + """ + return verify_donestamp(ud, d) + + def update_donestamp(self, ud, d): + """ + Update the donestamp file + """ + update_stamp(ud, d) + def _strip_leading_slashes(self, relpath): """ Remove leading slash as os.path.join can't cope @@ -1391,17 +1497,12 @@ class FetchMethod(object): Fetch urls Assumes localpath was called first """ - raise NoMethodError(url) + raise NoMethodError(urldata.url) def unpack(self, urldata, rootdir, data): iterate = False file = urldata.localpath - # Localpath can't deal with 'dir/*' entries, so it converts them to '.', - # but it must be corrected back for local files copying - if urldata.basename == '*' and file.endswith('/.'): - file = '%s/%s' % (file.rstrip('/.'), urldata.path) - try: unpack = bb.utils.to_boolean(urldata.parm.get('unpack'), True) except ValueError as exc: @@ -1416,28 +1517,35 @@ class FetchMethod(object): cmd = None if unpack: + tar_cmd = 'tar --extract --no-same-owner' + if 'striplevel' in urldata.parm: + tar_cmd += ' --strip-components=%s' % urldata.parm['striplevel'] if file.endswith('.tar'): - cmd = 'tar x --no-same-owner -f %s' % file + cmd = '%s -f %s' % (tar_cmd, file) elif file.endswith('.tgz') or file.endswith('.tar.gz') or file.endswith('.tar.Z'): - cmd = 'tar xz --no-same-owner -f %s' % file + cmd = '%s -z -f %s' % (tar_cmd, file) elif file.endswith('.tbz') or file.endswith('.tbz2') or file.endswith('.tar.bz2'): - cmd = 'bzip2 -dc %s | tar x --no-same-owner -f -' % file + cmd = 'bzip2 -dc %s | %s -f -' % (file, tar_cmd) elif file.endswith('.gz') or file.endswith('.Z') or file.endswith('.z'): cmd = 'gzip -dc %s > %s' % (file, efile) elif file.endswith('.bz2'): cmd = 'bzip2 -dc %s > %s' % (file, efile) elif file.endswith('.txz') or file.endswith('.tar.xz'): - cmd = 'xz -dc %s | tar x --no-same-owner -f -' % file + cmd = 'xz -dc %s | %s -f -' % (file, tar_cmd) elif file.endswith('.xz'): cmd = 'xz -dc %s > %s' % (file, efile) elif file.endswith('.tar.lz'): - cmd = 'lzip -dc %s | tar x --no-same-owner -f -' % file + cmd = 'lzip -dc %s | %s -f -' % (file, tar_cmd) elif file.endswith('.lz'): cmd = 'lzip -dc %s > %s' % (file, efile) elif file.endswith('.tar.7z'): - cmd = '7z x -so %s | tar x --no-same-owner -f -' % file + cmd = '7z x -so %s | %s -f -' % (file, tar_cmd) elif file.endswith('.7z'): cmd = '7za x -y %s 1>/dev/null' % file + elif file.endswith('.tzst') or file.endswith('.tar.zst'): + cmd = 'zstd --decompress --stdout %s | %s -f -' % (file, tar_cmd) + elif file.endswith('.zst'): + cmd = 'zstd --decompress --stdout %s > %s' % (file, efile) elif file.endswith('.zip') or file.endswith('.jar'): try: dos = bb.utils.to_boolean(urldata.parm.get('dos'), False) @@ -1457,7 +1565,7 @@ class FetchMethod(object): else: cmd = 'rpm2cpio.sh %s | cpio -id' % (file) elif file.endswith('.deb') or file.endswith('.ipk'): - output = subprocess.check_output('ar -t %s' % file, preexec_fn=subprocess_setup, shell=True) + output = subprocess.check_output(['ar', '-t', file], preexec_fn=subprocess_setup) datafile = None if output: for line in output.decode().splitlines(): @@ -1468,7 +1576,7 @@ class FetchMethod(object): raise UnpackError("Unable to unpack deb/ipk package - does not contain data.tar.* file", urldata.url) else: raise UnpackError("Unable to unpack deb/ipk package - could not list contents", urldata.url) - cmd = 'ar x %s %s && tar --no-same-owner -xpf %s && rm %s' % (file, datafile, datafile, datafile) + cmd = 'ar x %s %s && %s -p -f %s && rm %s' % (file, datafile, tar_cmd, datafile, datafile) # If 'subdir' param exists, create a dir and use it as destination for unpack cmd if 'subdir' in urldata.parm: @@ -1484,6 +1592,7 @@ class FetchMethod(object): unpackdir = rootdir if not unpack or not cmd: + urldata.unpack_tracer.unpack("file-copy", unpackdir) # If file == dest, then avoid any copies, as we already put the file into dest! dest = os.path.join(unpackdir, os.path.basename(file)) if file != dest and not (os.path.exists(dest) and os.path.samefile(file, dest)): @@ -1497,7 +1606,9 @@ class FetchMethod(object): if urlpath.find("/") != -1: destdir = urlpath.rsplit("/", 1)[0] + '/' bb.utils.mkdirhier("%s/%s" % (unpackdir, destdir)) - cmd = 'cp -fpPRH %s %s' % (file, destdir) + cmd = 'cp -fpPRH "%s" "%s"' % (file, destdir) + else: + urldata.unpack_tracer.unpack("archive-extract", unpackdir) if not cmd: return @@ -1530,12 +1641,18 @@ class FetchMethod(object): """ return True + def try_mirrors(self, fetch, urldata, d, mirrors, check=False): + """ + Try to use a mirror + """ + return bool(try_mirrors(fetch, d, urldata, mirrors, check)) + def checkstatus(self, fetch, urldata, d): """ Check the status of a URL Assumes localpath was called first """ - logger.info("URL %s could not be checked for status since no method exists.", url) + logger.info("URL %s could not be checked for status since no method exists.", urldata.url) return True def latest_revision(self, ud, d, name): @@ -1543,7 +1660,7 @@ class FetchMethod(object): Look in the cache for the latest revision, if not present ask the SCM. """ if not hasattr(self, "_latest_revision"): - raise ParameterError("The fetcher for this URL does not support _latest_revision", url) + raise ParameterError("The fetcher for this URL does not support _latest_revision", ud.url) revs = bb.persist_data.persist('BB_URI_HEADREVS', d) key = self.generate_revision_key(ud, d, name) @@ -1558,8 +1675,7 @@ class FetchMethod(object): return True, str(latest_rev) def generate_revision_key(self, ud, d, name): - key = self._revision_key(ud, d, name) - return "%s-%s" % (key, d.getVar("PN") or "") + return self._revision_key(ud, d, name) def latest_versionstring(self, ud, d): """ @@ -1569,12 +1685,76 @@ class FetchMethod(object): """ return ('', '') + def done(self, ud, d): + """ + Is the download done ? + """ + if os.path.exists(ud.localpath): + return True + return False + + def implicit_urldata(self, ud, d): + """ + Get a list of FetchData objects for any implicit URLs that will also + be downloaded when we fetch the given URL. + """ + return [] + + +class DummyUnpackTracer(object): + """ + Abstract API definition for a class that traces unpacked source files back + to their respective upstream SRC_URI entries, for software composition + analysis, license compliance and detailed SBOM generation purposes. + User may load their own unpack tracer class (instead of the dummy + one) by setting the BB_UNPACK_TRACER_CLASS config parameter. + """ + def start(self, unpackdir, urldata_dict, d): + """ + Start tracing the core Fetch.unpack process, using an index to map + unpacked files to each SRC_URI entry. + This method is called by Fetch.unpack and it may receive nested calls by + gitsm and npmsw fetchers, that expand SRC_URI entries by adding implicit + URLs and by recursively calling Fetch.unpack from new (nested) Fetch + instances. + """ + return + def start_url(self, url): + """Start tracing url unpack process. + This method is called by Fetch.unpack before the fetcher-specific unpack + method starts, and it may receive nested calls by gitsm and npmsw + fetchers. + """ + return + def unpack(self, unpack_type, destdir): + """ + Set unpack_type and destdir for current url. + This method is called by the fetcher-specific unpack method after url + tracing started. + """ + return + def finish_url(self, url): + """Finish tracing url unpack process and update the file index. + This method is called by Fetch.unpack after the fetcher-specific unpack + method finished its job, and it may receive nested calls by gitsm + and npmsw fetchers. + """ + return + def complete(self): + """ + Finish tracing the Fetch.unpack process, and check if all nested + Fecth.unpack calls (if any) have been completed; if so, save collected + metadata. + """ + return + + class Fetch(object): def __init__(self, urls, d, cache = True, localonly = False, connection_cache = None): if localonly and cache: raise Exception("bb.fetch2.Fetch.__init__: cannot set cache and localonly at same time") - if len(urls) == 0: + if not urls: urls = d.getVar("SRC_URI").split() self.urls = urls self.d = d @@ -1583,20 +1763,43 @@ class Fetch(object): fn = d.getVar('FILE') mc = d.getVar('__BBMULTICONFIG') or "" - if cache and fn and mc + fn in urldata_cache: - self.ud = urldata_cache[mc + fn] + key = None + if cache and fn: + key = mc + fn + str(id(d)) + if key in urldata_cache: + self.ud = urldata_cache[key] + + # the unpack_tracer object needs to be made available to possible nested + # Fetch instances (when those are created by gitsm and npmsw fetchers) + # so we set it as a global variable + global unpack_tracer + try: + unpack_tracer + except NameError: + class_path = d.getVar("BB_UNPACK_TRACER_CLASS") + if class_path: + # use user-defined unpack tracer class + import importlib + module_name, _, class_name = class_path.rpartition(".") + module = importlib.import_module(module_name) + class_ = getattr(module, class_name) + unpack_tracer = class_() + else: + # fall back to the dummy/abstract class + unpack_tracer = DummyUnpackTracer() for url in urls: if url not in self.ud: try: self.ud[url] = FetchData(url, d, localonly) + self.ud[url].unpack_tracer = unpack_tracer except NonLocalMethod: if localonly: self.ud[url] = None pass - if fn and cache: - urldata_cache[mc + fn] = self.ud + if key: + urldata_cache[key] = self.ud def localpath(self, url): if url not in self.urls: @@ -1626,53 +1829,55 @@ class Fetch(object): urls = self.urls network = self.d.getVar("BB_NO_NETWORK") - premirroronly = (self.d.getVar("BB_FETCH_PREMIRRORONLY") == "1") + premirroronly = bb.utils.to_boolean(self.d.getVar("BB_FETCH_PREMIRRORONLY")) + checksum_missing_messages = [] for u in urls: ud = self.ud[u] ud.setup_localpath(self.d) m = ud.method - localpath = "" + done = False if ud.lockfile: lf = bb.utils.lockfile(ud.lockfile) try: self.d.setVar("BB_NO_NETWORK", network) - - if verify_donestamp(ud, self.d) and not m.need_update(ud, self.d): - localpath = ud.localpath + if m.verify_donestamp(ud, self.d) and not m.need_update(ud, self.d): + done = True elif m.try_premirror(ud, self.d): - logger.debug(1, "Trying PREMIRRORS") + logger.debug("Trying PREMIRRORS") mirrors = mirror_from_string(self.d.getVar('PREMIRRORS')) - localpath = try_mirrors(self, self.d, ud, mirrors, False) - if localpath: + done = m.try_mirrors(self, ud, self.d, mirrors) + if done: try: # early checksum verification so that if the checksum of the premirror # contents mismatch the fetcher can still try upstream and mirrors - update_stamp(ud, self.d) + m.update_donestamp(ud, self.d) except ChecksumError as e: logger.warning("Checksum failure encountered with premirror download of %s - will attempt other sources." % u) - logger.debug(1, str(e)) - localpath = "" + logger.debug(str(e)) + done = False if premirroronly: self.d.setVar("BB_NO_NETWORK", "1") firsterr = None - verified_stamp = verify_donestamp(ud, self.d) - if not localpath and (not verified_stamp or m.need_update(ud, self.d)): + verified_stamp = False + if done: + verified_stamp = m.verify_donestamp(ud, self.d) + if not done and (not verified_stamp or m.need_update(ud, self.d)): try: if not trusted_network(self.d, ud.url): raise UntrustedUrl(ud.url) - logger.debug(1, "Trying Upstream") + logger.debug("Trying Upstream") m.download(ud, self.d) if hasattr(m, "build_mirror_data"): m.build_mirror_data(ud, self.d) - localpath = ud.localpath + done = True # early checksum verify, so that if checksum mismatched, # fetcher still have chance to fetch from mirror - update_stamp(ud, self.d) + m.update_donestamp(ud, self.d) except bb.fetch2.NetworkAccess: raise @@ -1680,46 +1885,57 @@ class Fetch(object): except BBFetchException as e: if isinstance(e, ChecksumError): logger.warning("Checksum failure encountered with download of %s - will attempt other sources if available" % u) - logger.debug(1, str(e)) + logger.debug(str(e)) if os.path.exists(ud.localpath): rename_bad_checksum(ud, e.checksum) elif isinstance(e, NoChecksumError): raise else: logger.warning('Failed to fetch URL %s, attempting MIRRORS if available' % u) - logger.debug(1, str(e)) + logger.debug(str(e)) firsterr = e # Remove any incomplete fetch - if not verified_stamp: + if not verified_stamp and m.cleanup_upon_failure(): m.clean(ud, self.d) - logger.debug(1, "Trying MIRRORS") + logger.debug("Trying MIRRORS") mirrors = mirror_from_string(self.d.getVar('MIRRORS')) - localpath = try_mirrors(self, self.d, ud, mirrors) + done = m.try_mirrors(self, ud, self.d, mirrors) - if not localpath or ((not os.path.exists(localpath)) and localpath.find("*") == -1): + if not done or not m.done(ud, self.d): if firsterr: logger.error(str(firsterr)) raise FetchError("Unable to fetch URL from any source.", u) - update_stamp(ud, self.d) + m.update_donestamp(ud, self.d) except IOError as e: - if e.errno in [os.errno.ESTALE]: + if e.errno in [errno.ESTALE]: logger.error("Stale Error Observed %s." % u) raise ChecksumError("Stale Error Detected") except BBFetchException as e: - if isinstance(e, ChecksumError): + if isinstance(e, NoChecksumError): + (message, _) = e.args + checksum_missing_messages.append(message) + continue + elif isinstance(e, ChecksumError): logger.error("Checksum failure fetching %s" % u) raise finally: if ud.lockfile: bb.utils.unlockfile(lf) + if checksum_missing_messages: + logger.error("Missing SRC_URI checksum, please add those to the recipe: \n%s", "\n".join(checksum_missing_messages)) + raise BBFetchException("There was some missing checksums in the recipe") def checkstatus(self, urls=None): """ - Check all urls exist upstream + Check all URLs exist upstream. + + Returns None if the URLs exist, raises FetchError if the check wasn't + successful but there wasn't an error (such as file not found), and + raises other exceptions in error cases. """ if not urls: @@ -1729,20 +1945,20 @@ class Fetch(object): ud = self.ud[u] ud.setup_localpath(self.d) m = ud.method - logger.debug(1, "Testing URL %s", u) + logger.debug("Testing URL %s", u) # First try checking uri, u, from PREMIRRORS mirrors = mirror_from_string(self.d.getVar('PREMIRRORS')) - ret = try_mirrors(self, self.d, ud, mirrors, True) + ret = m.try_mirrors(self, ud, self.d, mirrors, True) if not ret: # Next try checking from the original uri, u ret = m.checkstatus(self, ud, self.d) if not ret: # Finally, try checking uri, u, from MIRRORS mirrors = mirror_from_string(self.d.getVar('MIRRORS')) - ret = try_mirrors(self, self.d, ud, mirrors, True) + ret = m.try_mirrors(self, ud, self.d, mirrors, True) if not ret: - raise FetchError("URL %s doesn't work" % u, u) + raise FetchError("URL doesn't work", u) def unpack(self, root, urls=None): """ @@ -1752,6 +1968,8 @@ class Fetch(object): if not urls: urls = self.urls + unpack_tracer.start(root, self.ud, self.d) + for u in urls: ud = self.ud[u] ud.setup_localpath(self.d) @@ -1759,11 +1977,15 @@ class Fetch(object): if ud.lockfile: lf = bb.utils.lockfile(ud.lockfile) + unpack_tracer.start_url(u) ud.method.unpack(ud, root, self.d) + unpack_tracer.finish_url(u) if ud.lockfile: bb.utils.unlockfile(lf) + unpack_tracer.complete() + def clean(self, urls=None): """ Clean files that the fetcher gets or places @@ -1774,7 +1996,7 @@ class Fetch(object): for url in urls: if url not in self.ud: - self.ud[url] = FetchData(url, d) + self.ud[url] = FetchData(url, self.d) ud = self.ud[url] ud.setup_localpath(self.d) @@ -1791,6 +2013,24 @@ class Fetch(object): if ud.lockfile: bb.utils.unlockfile(lf) + def expanded_urldata(self, urls=None): + """ + Get an expanded list of FetchData objects covering both the given + URLS and any additional implicit URLs that are added automatically by + the appropriate FetchMethod. + """ + + if not urls: + urls = self.urls + + urldata = [] + for url in urls: + ud = self.ud[url] + urldata.append(ud) + urldata += ud.method.implicit_urldata(ud, self.d) + + return urldata + class FetchConnectionCache(object): """ A class which represents an container for socket connections. @@ -1844,6 +2084,10 @@ from . import osc from . import repo from . import clearcase from . import npm +from . import npmsw +from . import az +from . import crate +from . import gcp methods.append(local.Local()) methods.append(wget.Wget()) @@ -1862,3 +2106,7 @@ methods.append(osc.Osc()) methods.append(repo.Repo()) methods.append(clearcase.ClearCase()) methods.append(npm.Npm()) +methods.append(npmsw.NpmShrinkWrap()) +methods.append(az.Az()) +methods.append(crate.Crate()) +methods.append(gcp.GCP()) diff --git a/lib/bb/fetch2/az.py b/lib/bb/fetch2/az.py new file mode 100644 index 000000000..3ccc594c2 --- /dev/null +++ b/lib/bb/fetch2/az.py @@ -0,0 +1,93 @@ +""" +BitBake 'Fetch' Azure Storage implementation + +""" + +# Copyright (C) 2021 Alejandro Hernandez Samaniego +# +# Based on bb.fetch2.wget: +# Copyright (C) 2003, 2004 Chris Larson +# +# SPDX-License-Identifier: GPL-2.0-only +# +# Based on functions from the base bb module, Copyright 2003 Holger Schurig + +import shlex +import os +import bb +from bb.fetch2 import FetchError +from bb.fetch2 import logger +from bb.fetch2.wget import Wget + + +class Az(Wget): + + def supports(self, ud, d): + """ + Check to see if a given url can be fetched from Azure Storage + """ + return ud.type in ['az'] + + + def checkstatus(self, fetch, ud, d, try_again=True): + + # checkstatus discards parameters either way, we need to do this before adding the SAS + ud.url = ud.url.replace('az://','https://').split(';')[0] + + az_sas = d.getVar('AZ_SAS') + if az_sas and az_sas not in ud.url: + ud.url += az_sas + + return Wget.checkstatus(self, fetch, ud, d, try_again) + + # Override download method, include retries + def download(self, ud, d, retries=3): + """Fetch urls""" + + # If were reaching the account transaction limit we might be refused a connection, + # retrying allows us to avoid false negatives since the limit changes over time + fetchcmd = self.basecmd + ' --retry-connrefused --waitretry=5' + + # We need to provide a localpath to avoid wget using the SAS + # ud.localfile either has the downloadfilename or ud.path + localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + bb.utils.mkdirhier(os.path.dirname(localpath)) + fetchcmd += " -O %s" % shlex.quote(localpath) + + + if ud.user and ud.pswd: + fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd) + + # Check if a Shared Access Signature was given and use it + az_sas = d.getVar('AZ_SAS') + + if az_sas: + azuri = '%s%s%s%s' % ('https://', ud.host, ud.path, az_sas) + else: + azuri = '%s%s%s' % ('https://', ud.host, ud.path) + + if os.path.exists(ud.localpath): + # file exists, but we didnt complete it.. trying again. + fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % azuri) + else: + fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % azuri) + + try: + self._runwget(ud, d, fetchcmd, False) + except FetchError as e: + # Azure fails on handshake sometimes when using wget after some stress, producing a + # FetchError from the fetcher, if the artifact exists retyring should succeed + if 'Unable to establish SSL connection' in str(e): + logger.debug2('Unable to establish SSL connection: Retries remaining: %s, Retrying...' % retries) + self.download(ud, d, retries -1) + + # Sanity check since wget can pretend it succeed when it didn't + # Also, this used to happen if sourceforge sent us to the mirror page + if not os.path.exists(ud.localpath): + raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (azuri, ud.localpath), azuri) + + if os.path.getsize(ud.localpath) == 0: + os.remove(ud.localpath) + raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (azuri), azuri) + + return True diff --git a/lib/bb/fetch2/bzr.py b/lib/bb/fetch2/bzr.py index 16123f8af..fc558f50b 100644 --- a/lib/bb/fetch2/bzr.py +++ b/lib/bb/fetch2/bzr.py @@ -10,22 +10,10 @@ BitBake 'Fetch' implementation for bzr. # BitBake build tools. # Copyright (C) 2003, 2004 Chris Larson # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# SPDX-License-Identifier: GPL-2.0-only # -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. import os -import sys -import logging import bb from bb.fetch2 import FetchMethod from bb.fetch2 import FetchError @@ -41,8 +29,9 @@ class Bzr(FetchMethod): init bzr specific variable within url data """ # Create paths to bzr checkouts + bzrdir = d.getVar("BZRDIR") or (d.getVar("DL_DIR") + "/bzr") relpath = self._strip_leading_slashes(ud.path) - ud.pkgdir = os.path.join(d.expand('${BZRDIR}'), ud.host, relpath) + ud.pkgdir = os.path.join(bzrdir, ud.host, relpath) ud.setup_revisions(d) @@ -57,7 +46,7 @@ class Bzr(FetchMethod): command is "fetch", "update", "revno" """ - basecmd = d.expand('${FETCHCMD_bzr}') + basecmd = d.getVar("FETCHCMD_bzr") or "/usr/bin/env bzr" proto = ud.parm.get('protocol', 'http') @@ -85,16 +74,16 @@ class Bzr(FetchMethod): if os.access(os.path.join(ud.pkgdir, os.path.basename(ud.pkgdir), '.bzr'), os.R_OK): bzrcmd = self._buildbzrcommand(ud, d, "update") - logger.debug(1, "BZR Update %s", ud.url) + logger.debug("BZR Update %s", ud.url) bb.fetch2.check_network_access(d, bzrcmd, ud.url) runfetchcmd(bzrcmd, d, workdir=os.path.join(ud.pkgdir, os.path.basename(ud.path))) else: bb.utils.remove(os.path.join(ud.pkgdir, os.path.basename(ud.pkgdir)), True) bzrcmd = self._buildbzrcommand(ud, d, "fetch") bb.fetch2.check_network_access(d, bzrcmd, ud.url) - logger.debug(1, "BZR Checkout %s", ud.url) + logger.debug("BZR Checkout %s", ud.url) bb.utils.mkdirhier(ud.pkgdir) - logger.debug(1, "Running %s", bzrcmd) + logger.debug("Running %s", bzrcmd) runfetchcmd(bzrcmd, d, workdir=ud.pkgdir) scmdata = ud.parm.get("scmdata", "") @@ -120,7 +109,7 @@ class Bzr(FetchMethod): """ Return the latest upstream revision number """ - logger.debug(2, "BZR fetcher hitting network for %s", ud.url) + logger.debug2("BZR fetcher hitting network for %s", ud.url) bb.fetch2.check_network_access(d, self._buildbzrcommand(ud, d, "revno"), ud.url) diff --git a/lib/bb/fetch2/clearcase.py b/lib/bb/fetch2/clearcase.py index 36beab6a5..1a9c86376 100644 --- a/lib/bb/fetch2/clearcase.py +++ b/lib/bb/fetch2/clearcase.py @@ -1,5 +1,3 @@ -# ex:ts=4:sw=4:sts=4:et -# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- """ BitBake 'Fetch' clearcase implementation @@ -47,29 +45,18 @@ User credentials: """ # Copyright (C) 2014 Siemens AG # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# SPDX-License-Identifier: GPL-2.0-only # import os -import sys import shutil import bb from bb.fetch2 import FetchMethod from bb.fetch2 import FetchError +from bb.fetch2 import MissingParameterError +from bb.fetch2 import ParameterError from bb.fetch2 import runfetchcmd from bb.fetch2 import logger -from distutils import spawn class ClearCase(FetchMethod): """Class to fetch urls via 'clearcase'""" @@ -83,7 +70,7 @@ class ClearCase(FetchMethod): return ud.type in ['ccrc'] def debug(self, msg): - logger.debug(1, "ClearCase: %s", msg) + logger.debug("ClearCase: %s", msg) def urldata_init(self, ud, d): """ @@ -93,7 +80,7 @@ class ClearCase(FetchMethod): if 'protocol' in ud.parm: ud.proto = ud.parm['protocol'] if not ud.proto in ('http', 'https'): - raise fetch2.ParameterError("Invalid protocol type", ud.url) + raise ParameterError("Invalid protocol type", ud.url) ud.vob = '' if 'vob' in ud.parm: @@ -107,7 +94,7 @@ class ClearCase(FetchMethod): else: ud.module = "" - ud.basecmd = d.getVar("FETCHCMD_ccrc") or spawn.find_executable("cleartool") or spawn.find_executable("rcleartool") + ud.basecmd = d.getVar("FETCHCMD_ccrc") or "/usr/bin/env cleartool || rcleartool" if d.getVar("SRCREV") == "INVALID": raise FetchError("Set a valid SRCREV for the clearcase fetcher in your recipe, e.g. SRCREV = \"/main/LATEST\" or any other label of your choice.") @@ -157,18 +144,18 @@ class ClearCase(FetchMethod): basecmd = "%s %s" % (ud.basecmd, command) - if command is 'mkview': + if command == 'mkview': if not "rcleartool" in ud.basecmd: # Cleartool needs a -snapshot view options.append("-snapshot") options.append("-tag %s" % ud.viewname) options.append(ud.viewdir) - elif command is 'rmview': + elif command == 'rmview': options.append("-force") options.append("%s" % ud.viewdir) - elif command is 'setcs': + elif command == 'setcs': options.append("-overwrite") options.append(ud.configspecfile) @@ -250,7 +237,7 @@ class ClearCase(FetchMethod): # Clean clearcase meta-data before tar - runfetchcmd('tar -czf "%s" .' % (ud.localpath), d, cleanup = [ud.localpath]) + runfetchcmd('tar -czf "%s" .' % (ud.localpath), d, cleanup = [ud.localpath], workdir = ud.viewdir) # Clean up so we can create a new view next time self.clean(ud, d); diff --git a/lib/bb/fetch2/crate.py b/lib/bb/fetch2/crate.py new file mode 100644 index 000000000..e611736f0 --- /dev/null +++ b/lib/bb/fetch2/crate.py @@ -0,0 +1,150 @@ +# ex:ts=4:sw=4:sts=4:et +# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- +""" +BitBake 'Fetch' implementation for crates.io +""" + +# Copyright (C) 2016 Doug Goldstein +# +# SPDX-License-Identifier: GPL-2.0-only +# +# Based on functions from the base bb module, Copyright 2003 Holger Schurig + +import hashlib +import json +import os +import subprocess +import bb +from bb.fetch2 import logger, subprocess_setup, UnpackError +from bb.fetch2.wget import Wget + + +class Crate(Wget): + + """Class to fetch crates via wget""" + + def _cargo_bitbake_path(self, rootdir): + return os.path.join(rootdir, "cargo_home", "bitbake") + + def supports(self, ud, d): + """ + Check to see if a given url is for this fetcher + """ + return ud.type in ['crate'] + + def recommends_checksum(self, urldata): + return True + + def urldata_init(self, ud, d): + """ + Sets up to download the respective crate from crates.io + """ + + if ud.type == 'crate': + self._crate_urldata_init(ud, d) + + super(Crate, self).urldata_init(ud, d) + + def _crate_urldata_init(self, ud, d): + """ + Sets up the download for a crate + """ + + # URL syntax is: crate://NAME/VERSION + # break the URL apart by / + parts = ud.url.split('/') + if len(parts) < 5: + raise bb.fetch2.ParameterError("Invalid URL: Must be crate://HOST/NAME/VERSION", ud.url) + + # version is expected to be the last token + # but ignore possible url parameters which will be used + # by the top fetcher class + version = parts[-1].split(";")[0] + # second to last field is name + name = parts[-2] + # host (this is to allow custom crate registries to be specified + host = '/'.join(parts[2:-2]) + + # if using upstream just fix it up nicely + if host == 'crates.io': + host = 'crates.io/api/v1/crates' + + ud.url = "https://%s/%s/%s/download" % (host, name, version) + ud.versionsurl = "https://%s/%s/versions" % (host, name) + ud.parm['downloadfilename'] = "%s-%s.crate" % (name, version) + if 'name' not in ud.parm: + ud.parm['name'] = '%s-%s' % (name, version) + + logger.debug2("Fetching %s to %s" % (ud.url, ud.parm['downloadfilename'])) + + def unpack(self, ud, rootdir, d): + """ + Uses the crate to build the necessary paths for cargo to utilize it + """ + if ud.type == 'crate': + return self._crate_unpack(ud, rootdir, d) + else: + super(Crate, self).unpack(ud, rootdir, d) + + def _crate_unpack(self, ud, rootdir, d): + """ + Unpacks a crate + """ + thefile = ud.localpath + + # possible metadata we need to write out + metadata = {} + + # change to the rootdir to unpack but save the old working dir + save_cwd = os.getcwd() + os.chdir(rootdir) + + bp = d.getVar('BP') + if bp == ud.parm.get('name'): + cmd = "tar -xz --no-same-owner -f %s" % thefile + ud.unpack_tracer.unpack("crate-extract", rootdir) + else: + cargo_bitbake = self._cargo_bitbake_path(rootdir) + ud.unpack_tracer.unpack("cargo-extract", cargo_bitbake) + + cmd = "tar -xz --no-same-owner -f %s -C %s" % (thefile, cargo_bitbake) + + # ensure we've got these paths made + bb.utils.mkdirhier(cargo_bitbake) + + # generate metadata necessary + with open(thefile, 'rb') as f: + # get the SHA256 of the original tarball + tarhash = hashlib.sha256(f.read()).hexdigest() + + metadata['files'] = {} + metadata['package'] = tarhash + + path = d.getVar('PATH') + if path: + cmd = "PATH=\"%s\" %s" % (path, cmd) + bb.note("Unpacking %s to %s/" % (thefile, os.getcwd())) + + ret = subprocess.call(cmd, preexec_fn=subprocess_setup, shell=True) + + os.chdir(save_cwd) + + if ret != 0: + raise UnpackError("Unpack command %s failed with return value %s" % (cmd, ret), ud.url) + + # if we have metadata to write out.. + if len(metadata) > 0: + cratepath = os.path.splitext(os.path.basename(thefile))[0] + bbpath = self._cargo_bitbake_path(rootdir) + mdfile = '.cargo-checksum.json' + mdpath = os.path.join(bbpath, cratepath, mdfile) + with open(mdpath, "w") as f: + json.dump(metadata, f) + + def latest_versionstring(self, ud, d): + from functools import cmp_to_key + json_data = json.loads(self._fetch_index(ud.versionsurl, ud, d)) + versions = [(0, i["num"], "") for i in json_data["versions"]] + versions = sorted(versions, key=cmp_to_key(bb.utils.vercmp)) + + return (versions[-1][1], "") diff --git a/lib/bb/fetch2/cvs.py b/lib/bb/fetch2/cvs.py index 490c95471..01de5ff4c 100644 --- a/lib/bb/fetch2/cvs.py +++ b/lib/bb/fetch2/cvs.py @@ -1,5 +1,3 @@ -# ex:ts=4:sw=4:sts=4:et -# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- """ BitBake 'Fetch' implementations @@ -10,24 +8,12 @@ BitBake build tools. # Copyright (C) 2003, 2004 Chris Larson # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# SPDX-License-Identifier: GPL-2.0-only # -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -#Based on functions from the base bb module, Copyright 2003 Holger Schurig +# Based on functions from the base bb module, Copyright 2003 Holger Schurig # import os -import logging import bb from bb.fetch2 import FetchMethod, FetchError, MissingParameterError, logger from bb.fetch2 import runfetchcmd @@ -65,6 +51,10 @@ class Cvs(FetchMethod): ud.localfile = d.expand('%s_%s_%s_%s%s%s.tar.gz' % (ud.module.replace('/', '.'), ud.host, ud.tag, ud.date, norecurse, fullpath)) + pkg = d.getVar('PN') + cvsdir = d.getVar("CVSDIR") or (d.getVar("DL_DIR") + "/cvs") + ud.pkgdir = os.path.join(cvsdir, pkg) + def need_update(self, ud, d): if (ud.date == "now"): return True @@ -110,7 +100,7 @@ class Cvs(FetchMethod): if ud.tag: options.append("-r %s" % ud.tag) - cvsbasecmd = d.getVar("FETCHCMD_cvs") + cvsbasecmd = d.getVar("FETCHCMD_cvs") or "/usr/bin/env cvs" cvscmd = cvsbasecmd + " '-d" + cvsroot + "' co " + " ".join(options) + " " + ud.module cvsupdatecmd = cvsbasecmd + " '-d" + cvsroot + "' update -d -P " + " ".join(options) @@ -119,10 +109,8 @@ class Cvs(FetchMethod): cvsupdatecmd = "CVS_RSH=\"%s\" %s" % (cvs_rsh, cvsupdatecmd) # create module directory - logger.debug(2, "Fetch: checking for module directory") - pkg = d.getVar('PN') - pkgdir = os.path.join(d.getVar('CVSDIR'), pkg) - moddir = os.path.join(pkgdir, localdir) + logger.debug2("Fetch: checking for module directory") + moddir = os.path.join(ud.pkgdir, localdir) workdir = None if os.access(os.path.join(moddir, 'CVS'), os.R_OK): logger.info("Update " + ud.url) @@ -133,9 +121,9 @@ class Cvs(FetchMethod): else: logger.info("Fetch " + ud.url) # check out sources there - bb.utils.mkdirhier(pkgdir) - workdir = pkgdir - logger.debug(1, "Running %s", cvscmd) + bb.utils.mkdirhier(ud.pkgdir) + workdir = ud.pkgdir + logger.debug("Running %s", cvscmd) bb.fetch2.check_network_access(d, cvscmd, ud.url) cmd = cvscmd @@ -153,7 +141,7 @@ class Cvs(FetchMethod): # tar them up to a defined filename workdir = None if 'fullpath' in ud.parm: - workdir = pkgdir + workdir = ud.pkgdir cmd = "tar %s -czf %s %s" % (tar_flags, ud.localpath, localdir) else: workdir = os.path.dirname(os.path.realpath(moddir)) @@ -164,9 +152,6 @@ class Cvs(FetchMethod): def clean(self, ud, d): """ Clean CVS Files and tarballs """ - pkg = d.getVar('PN') - pkgdir = os.path.join(d.getVar("CVSDIR"), pkg) - - bb.utils.remove(pkgdir, True) + bb.utils.remove(ud.pkgdir, True) bb.utils.remove(ud.localpath) diff --git a/lib/bb/fetch2/gcp.py b/lib/bb/fetch2/gcp.py new file mode 100644 index 000000000..eb3e0c6a6 --- /dev/null +++ b/lib/bb/fetch2/gcp.py @@ -0,0 +1,102 @@ +""" +BitBake 'Fetch' implementation for Google Cloup Platform Storage. + +Class for fetching files from Google Cloud Storage using the +Google Cloud Storage Python Client. The GCS Python Client must +be correctly installed, configured and authenticated prior to use. +Additionally, gsutil must also be installed. + +""" + +# Copyright (C) 2023, Snap Inc. +# +# Based in part on bb.fetch2.s3: +# Copyright (C) 2017 Andre McCurdy +# +# SPDX-License-Identifier: GPL-2.0-only +# +# Based on functions from the base bb module, Copyright 2003 Holger Schurig + +import os +import bb +import urllib.parse, urllib.error +from bb.fetch2 import FetchMethod +from bb.fetch2 import FetchError +from bb.fetch2 import logger +from bb.fetch2 import runfetchcmd + +class GCP(FetchMethod): + """ + Class to fetch urls via GCP's Python API. + """ + def __init__(self): + self.gcp_client = None + + def supports(self, ud, d): + """ + Check to see if a given url can be fetched with GCP. + """ + return ud.type in ['gs'] + + def recommends_checksum(self, urldata): + return True + + def urldata_init(self, ud, d): + if 'downloadfilename' in ud.parm: + ud.basename = ud.parm['downloadfilename'] + else: + ud.basename = os.path.basename(ud.path) + + ud.localfile = d.expand(urllib.parse.unquote(ud.basename)) + ud.basecmd = "gsutil stat" + + def get_gcp_client(self): + from google.cloud import storage + self.gcp_client = storage.Client(project=None) + + def download(self, ud, d): + """ + Fetch urls using the GCP API. + Assumes localpath was called first. + """ + logger.debug2(f"Trying to download gs://{ud.host}{ud.path} to {ud.localpath}") + if self.gcp_client is None: + self.get_gcp_client() + + bb.fetch2.check_network_access(d, ud.basecmd, f"gs://{ud.host}{ud.path}") + runfetchcmd("%s %s" % (ud.basecmd, f"gs://{ud.host}{ud.path}"), d) + + # Path sometimes has leading slash, so strip it + path = ud.path.lstrip("/") + blob = self.gcp_client.bucket(ud.host).blob(path) + blob.download_to_filename(ud.localpath) + + # Additional sanity checks copied from the wget class (although there + # are no known issues which mean these are required, treat the GCP API + # tool with a little healthy suspicion). + if not os.path.exists(ud.localpath): + raise FetchError(f"The GCP API returned success for gs://{ud.host}{ud.path} but {ud.localpath} doesn't exist?!") + + if os.path.getsize(ud.localpath) == 0: + os.remove(ud.localpath) + raise FetchError(f"The downloaded file for gs://{ud.host}{ud.path} resulted in a zero size file?! Deleting and failing since this isn't right.") + + return True + + def checkstatus(self, fetch, ud, d): + """ + Check the status of a URL. + """ + logger.debug2(f"Checking status of gs://{ud.host}{ud.path}") + if self.gcp_client is None: + self.get_gcp_client() + + bb.fetch2.check_network_access(d, ud.basecmd, f"gs://{ud.host}{ud.path}") + runfetchcmd("%s %s" % (ud.basecmd, f"gs://{ud.host}{ud.path}"), d) + + # Path sometimes has leading slash, so strip it + path = ud.path.lstrip("/") + if self.gcp_client.bucket(ud.host).blob(path).exists() == False: + raise FetchError(f"The GCP API reported that gs://{ud.host}{ud.path} does not exist") + else: + return True diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py index 3de83bed1..c7ff769fd 100644 --- a/lib/bb/fetch2/git.py +++ b/lib/bb/fetch2/git.py @@ -1,5 +1,3 @@ -# ex:ts=4:sw=4:sts=4:et -# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- """ BitBake 'Fetch' git implementation @@ -46,43 +44,54 @@ Supported SRC_URI options are: - nobranch Don't check the SHA validation for branch. set this option for the recipe - referring to commit which is valid in tag instead of branch. + referring to commit which is valid in any namespace (branch, tag, ...) + instead of branch. The default is "0", set nobranch=1 if needed. +- subpath + Limit the checkout to a specific subpath of the tree. + By default, checkout the whole tree, set subpath=<path> if needed + +- destsuffix + The name of the path in which to place the checkout. + By default, the path is git/, set destsuffix=<suffix> if needed + - usehead For local git:// urls to use the current branch HEAD as the revision for use with AUTOREV. Implies nobranch. +- lfs + Enable the checkout to use LFS for large files. This will download all LFS files + in the download step, as the unpack step does not have network access. + The default is "1", set lfs=0 to skip. + """ -#Copyright (C) 2005 Richard Purdie +# Copyright (C) 2005 Richard Purdie # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# SPDX-License-Identifier: GPL-2.0-only # -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. import collections import errno import fnmatch import os import re +import shlex +import shutil import subprocess import tempfile import bb import bb.progress +from contextlib import contextmanager from bb.fetch2 import FetchMethod from bb.fetch2 import runfetchcmd from bb.fetch2 import logger +from bb.fetch2 import trusted_network + +sha1_re = re.compile(r'^[0-9a-f]{40}$') +slash_re = re.compile(r"/+") class GitProgressHandler(bb.progress.LineFilterProgressHandler): """Extract progress information from git output""" @@ -141,6 +150,9 @@ class Git(FetchMethod): def supports_checksum(self, urldata): return False + def cleanup_upon_failure(self): + return False + def urldata_init(self, ud, d): """ init git specific variable within url data @@ -152,6 +164,11 @@ class Git(FetchMethod): ud.proto = 'file' else: ud.proto = "git" + if ud.host == "github.com" and ud.proto == "git": + # github stopped supporting git protocol + # https://github.blog/2021-09-01-improving-git-protocol-security-github/#no-more-unauthenticated-git + ud.proto = "https" + bb.warn("URL: %s uses git protocol which is no longer supported by github. Please change to ;protocol=https in the url." % ud.url) if not ud.proto in ('git', 'file', 'ssh', 'http', 'https', 'rsync'): raise bb.fetch2.ParameterError("Invalid protocol type", ud.url) @@ -175,11 +192,18 @@ class Git(FetchMethod): ud.nocheckout = 1 ud.unresolvedrev = {} - branches = ud.parm.get("branch", "master").split(',') + branches = ud.parm.get("branch", "").split(',') + if branches == [""] and not ud.nobranch: + bb.warn("URL: %s does not set any branch parameter. The future default branch used by tools and repositories is uncertain and we will therefore soon require this is set in all git urls." % ud.url) + branches = ["master"] if len(branches) != len(ud.names): raise bb.fetch2.ParameterError("The number of name and branch parameters is not balanced", ud.url) - ud.cloneflags = "-s -n" + ud.noshared = d.getVar("BB_GIT_NOSHARED") == "1" + + ud.cloneflags = "-n" + if not ud.noshared: + ud.cloneflags += " -s" if ud.bareclone: ud.cloneflags += " --mirror" @@ -199,7 +223,7 @@ class Git(FetchMethod): depth_default = 1 ud.shallow_depths = collections.defaultdict(lambda: depth_default) - revs_default = d.getVar("BB_GIT_SHALLOW_REVS", True) + revs_default = d.getVar("BB_GIT_SHALLOW_REVS") ud.shallow_revs = [] ud.branches = {} for pos, name in enumerate(ud.names): @@ -231,9 +255,14 @@ class Git(FetchMethod): ud.shallow = False if ud.usehead: - ud.unresolvedrev['default'] = 'HEAD' + # When usehead is set let's associate 'HEAD' with the unresolved + # rev of this repository. This will get resolved into a revision + # later. If an actual revision happens to have also been provided + # then this setting will be overridden. + for name in ud.names: + ud.unresolvedrev[name] = 'HEAD' - ud.basecmd = d.getVar("FETCHCMD_git") or "git -c core.fsyncobjectfiles=0" + ud.basecmd = d.getVar("FETCHCMD_git") or "git -c gc.autoDetach=false -c core.pager=cat -c safe.bareRepository=all" write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0" ud.write_tarballs = write_tarballs != "0" or ud.rebaseable @@ -242,26 +271,26 @@ class Git(FetchMethod): ud.setup_revisions(d) for name in ud.names: - # Ensure anything that doesn't look like a sha256 checksum/revision is translated into one - if not ud.revisions[name] or len(ud.revisions[name]) != 40 or (False in [c in "abcdef0123456789" for c in ud.revisions[name]]): + # Ensure any revision that doesn't look like a SHA-1 is translated into one + if not sha1_re.match(ud.revisions[name] or ''): if ud.revisions[name]: ud.unresolvedrev[name] = ud.revisions[name] ud.revisions[name] = self.latest_revision(ud, d, name) - gitsrcname = '%s%s' % (ud.host.replace(':', '.'), ud.path.replace('/', '.').replace('*', '.')) + gitsrcname = '%s%s' % (ud.host.replace(':', '.'), ud.path.replace('/', '.').replace('*', '.').replace(' ','_').replace('(', '_').replace(')', '_')) if gitsrcname.startswith('.'): gitsrcname = gitsrcname[1:] - # for rebaseable git repo, it is necessary to keep mirror tar ball - # per revision, so that even the revision disappears from the + # For a rebaseable git repo, it is necessary to keep a mirror tar ball + # per revision, so that even if the revision disappears from the # upstream repo in the future, the mirror will remain intact and still - # contains the revision + # contain the revision if ud.rebaseable: for name in ud.names: gitsrcname = gitsrcname + '_' + ud.revisions[name] dl_dir = d.getVar("DL_DIR") - gitdir = d.getVar("GITDIR") or (dl_dir + "/git2/") + gitdir = d.getVar("GITDIR") or (dl_dir + "/git2") ud.clonedir = os.path.join(gitdir, gitsrcname) ud.localfile = ud.clonedir @@ -299,22 +328,59 @@ class Git(FetchMethod): return ud.clonedir def need_update(self, ud, d): + return self.clonedir_need_update(ud, d) \ + or self.shallow_tarball_need_update(ud) \ + or self.tarball_need_update(ud) \ + or self.lfs_need_update(ud, d) + + def clonedir_need_update(self, ud, d): if not os.path.exists(ud.clonedir): return True + if ud.shallow and ud.write_shallow_tarballs and self.clonedir_need_shallow_revs(ud, d): + return True for name in ud.names: if not self._contains_ref(ud, d, name, ud.clonedir): return True - if ud.shallow and ud.write_shallow_tarballs and not os.path.exists(ud.fullshallow): - return True - if ud.write_tarballs and not os.path.exists(ud.fullmirror): + return False + + def lfs_need_update(self, ud, d): + if self.clonedir_need_update(ud, d): return True + + for name in ud.names: + if not self._lfs_objects_downloaded(ud, d, name, ud.clonedir): + return True return False + def clonedir_need_shallow_revs(self, ud, d): + for rev in ud.shallow_revs: + try: + runfetchcmd('%s rev-parse -q --verify %s' % (ud.basecmd, rev), d, quiet=True, workdir=ud.clonedir) + except bb.fetch2.FetchError: + return rev + return None + + def shallow_tarball_need_update(self, ud): + return ud.shallow and ud.write_shallow_tarballs and not os.path.exists(ud.fullshallow) + + def tarball_need_update(self, ud): + return ud.write_tarballs and not os.path.exists(ud.fullmirror) + def try_premirror(self, ud, d): # If we don't do this, updating an existing checkout with only premirrors # is not possible - if d.getVar("BB_FETCH_PREMIRRORONLY") is not None: + if bb.utils.to_boolean(d.getVar("BB_FETCH_PREMIRRORONLY")): + return True + # If the url is not in trusted network, that is, BB_NO_NETWORK is set to 0 + # and BB_ALLOWED_NETWORKS does not contain the host that ud.url uses, then + # we need to try premirrors first as using upstream is destined to fail. + if not trusted_network(d, ud.url): return True + # the following check is to ensure incremental fetch in downloads, this is + # because the premirror might be old and does not contain the new rev required, + # and this will cause a total removal and new clone. So if we can reach to + # network, we prefer upstream over premirror, though the premirror might contain + # the new rev. if os.path.exists(ud.clonedir): return False return True @@ -322,45 +388,78 @@ class Git(FetchMethod): def download(self, ud, d): """Fetch url""" - no_clone = not os.path.exists(ud.clonedir) - need_update = no_clone or self.need_update(ud, d) - # A current clone is preferred to either tarball, a shallow tarball is # preferred to an out of date clone, and a missing clone will use # either tarball. - if ud.shallow and os.path.exists(ud.fullshallow) and need_update: + if ud.shallow and os.path.exists(ud.fullshallow) and self.need_update(ud, d): ud.localpath = ud.fullshallow return - elif os.path.exists(ud.fullmirror) and no_clone: - bb.utils.mkdirhier(ud.clonedir) - runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=ud.clonedir) - + elif os.path.exists(ud.fullmirror) and self.need_update(ud, d): + if not os.path.exists(ud.clonedir): + bb.utils.mkdirhier(ud.clonedir) + runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=ud.clonedir) + else: + tmpdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR')) + runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=tmpdir) + output = runfetchcmd("%s remote" % ud.basecmd, d, quiet=True, workdir=ud.clonedir) + if 'mirror' in output: + runfetchcmd("%s remote rm mirror" % ud.basecmd, d, workdir=ud.clonedir) + runfetchcmd("%s remote add --mirror=fetch mirror %s" % (ud.basecmd, tmpdir), d, workdir=ud.clonedir) + fetch_cmd = "LANG=C %s fetch -f --update-head-ok --progress mirror " % (ud.basecmd) + runfetchcmd(fetch_cmd, d, workdir=ud.clonedir) repourl = self._get_repo_url(ud) + needs_clone = False + if os.path.exists(ud.clonedir): + # The directory may exist, but not be the top level of a bare git + # repository in which case it needs to be deleted and re-cloned. + try: + # Since clones can be bare, use --absolute-git-dir instead of --show-toplevel + output = runfetchcmd("LANG=C %s rev-parse --absolute-git-dir" % ud.basecmd, d, workdir=ud.clonedir) + toplevel = output.rstrip() + + if not bb.utils.path_is_descendant(toplevel, ud.clonedir): + logger.warning("Top level directory '%s' is not a descendant of '%s'. Re-cloning", toplevel, ud.clonedir) + needs_clone = True + except bb.fetch2.FetchError as e: + logger.warning("Unable to get top level for %s (not a git directory?): %s", ud.clonedir, e) + needs_clone = True + except FileNotFoundError as e: + logger.warning("%s", e) + needs_clone = True + + if needs_clone: + shutil.rmtree(ud.clonedir) + else: + needs_clone = True + # If the repo still doesn't exist, fallback to cloning it - if not os.path.exists(ud.clonedir): - # We do this since git will use a "-l" option automatically for local urls where possible + if needs_clone: + # We do this since git will use a "-l" option automatically for local urls where possible, + # but it doesn't work when git/objects is a symlink, only works when it is a directory. if repourl.startswith("file://"): - repourl = repourl[7:] - clone_cmd = "LANG=C %s clone --bare --mirror %s %s --progress" % (ud.basecmd, repourl, ud.clonedir) + repourl_path = repourl[7:] + objects = os.path.join(repourl_path, 'objects') + if os.path.isdir(objects) and not os.path.islink(objects): + repourl = repourl_path + clone_cmd = "LANG=C %s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir) if ud.proto.lower() != 'file': bb.fetch2.check_network_access(d, clone_cmd, ud.url) progresshandler = GitProgressHandler(d) runfetchcmd(clone_cmd, d, log=progresshandler) # Update the checkout if needed - needupdate = False - for name in ud.names: - if not self._contains_ref(ud, d, name, ud.clonedir): - needupdate = True - if needupdate: - try: - runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir) - except bb.fetch2.FetchError: - logger.debug(1, "No Origin") + if self.clonedir_need_update(ud, d): + output = runfetchcmd("%s remote" % ud.basecmd, d, quiet=True, workdir=ud.clonedir) + if "origin" in output: + runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir) - runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, repourl), d, workdir=ud.clonedir) - fetch_cmd = "LANG=C %s fetch -f --prune --progress %s refs/*:refs/*" % (ud.basecmd, repourl) + runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir) + + if ud.nobranch: + fetch_cmd = "LANG=C %s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl)) + else: + fetch_cmd = "LANG=C %s fetch -f --progress %s refs/heads/*:refs/heads/* refs/tags/*:refs/tags/*" % (ud.basecmd, shlex.quote(repourl)) if ud.proto.lower() != 'file': bb.fetch2.check_network_access(d, fetch_cmd, ud.url) progresshandler = GitProgressHandler(d) @@ -373,11 +472,57 @@ class Git(FetchMethod): except OSError as exc: if exc.errno != errno.ENOENT: raise + for name in ud.names: if not self._contains_ref(ud, d, name, ud.clonedir): raise bb.fetch2.FetchError("Unable to find revision %s in branch %s even from upstream" % (ud.revisions[name], ud.branches[name])) + if ud.shallow and ud.write_shallow_tarballs: + missing_rev = self.clonedir_need_shallow_revs(ud, d) + if missing_rev: + raise bb.fetch2.FetchError("Unable to find revision %s even from upstream" % missing_rev) + + if self.lfs_need_update(ud, d): + # Unpack temporary working copy, use it to run 'git checkout' to force pre-fetching + # of all LFS blobs needed at the srcrev. + # + # It would be nice to just do this inline here by running 'git-lfs fetch' + # on the bare clonedir, but that operation requires a working copy on some + # releases of Git LFS. + with tempfile.TemporaryDirectory(dir=d.getVar('DL_DIR')) as tmpdir: + # Do the checkout. This implicitly involves a Git LFS fetch. + Git.unpack(self, ud, tmpdir, d) + + # Scoop up a copy of any stuff that Git LFS downloaded. Merge them into + # the bare clonedir. + # + # As this procedure is invoked repeatedly on incremental fetches as + # a recipe's SRCREV is bumped throughout its lifetime, this will + # result in a gradual accumulation of LFS blobs in <ud.clonedir>/lfs + # corresponding to all the blobs reachable from the different revs + # fetched across time. + # + # Only do this if the unpack resulted in a .git/lfs directory being + # created; this only happens if at least one blob needed to be + # downloaded. + if os.path.exists(os.path.join(ud.destdir, ".git", "lfs")): + runfetchcmd("tar -cf - lfs | tar -xf - -C %s" % ud.clonedir, d, workdir="%s/.git" % ud.destdir) + def build_mirror_data(self, ud, d): + + # Create as a temp file and move atomically into position to avoid races + @contextmanager + def create_atomic(filename): + fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename)) + try: + yield tfile + umask = os.umask(0o666) + os.umask(umask) + os.chmod(tfile, (0o666 & ~umask)) + os.rename(tfile, filename) + finally: + os.close(fd) + if ud.shallow and ud.write_shallow_tarballs: if not os.path.exists(ud.fullshallow): if os.path.islink(ud.fullshallow): @@ -388,7 +533,8 @@ class Git(FetchMethod): self.clone_shallow_local(ud, shallowclone, d) logger.info("Creating tarball of git repository") - runfetchcmd("tar -czf %s ." % ud.fullshallow, d, workdir=shallowclone) + with create_atomic(ud.fullshallow) as tfile: + runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone) runfetchcmd("touch %s.done" % ud.fullshallow, d) finally: bb.utils.remove(tempdir, recurse=True) @@ -397,7 +543,11 @@ class Git(FetchMethod): os.unlink(ud.fullmirror) logger.info("Creating tarball of git repository") - runfetchcmd("tar -czf %s ." % ud.fullmirror, d, workdir=ud.clonedir) + with create_atomic(ud.fullmirror) as tfile: + mtime = runfetchcmd("{} log --all -1 --format=%cD".format(ud.basecmd), d, + quiet=True, workdir=ud.clonedir) + runfetchcmd("tar -czf %s --owner oe:0 --group oe:0 --mtime \"%s\" ." + % (tfile, mtime), d, workdir=ud.clonedir) runfetchcmd("touch %s.done" % ud.fullmirror, d) def clone_shallow_local(self, ud, dest, d): @@ -459,29 +609,74 @@ class Git(FetchMethod): def unpack(self, ud, destdir, d): """ unpack the downloaded src to destdir""" - subdir = ud.parm.get("subpath", "") - if subdir != "": - readpathspec = ":%s" % subdir - def_destsuffix = "%s/" % os.path.basename(subdir.rstrip('/')) - else: - readpathspec = "" - def_destsuffix = "git/" + subdir = ud.parm.get("subdir") + subpath = ud.parm.get("subpath") + readpathspec = "" + def_destsuffix = "git/" + + if subpath: + readpathspec = ":%s" % subpath + def_destsuffix = "%s/" % os.path.basename(subpath.rstrip('/')) + + if subdir: + # If 'subdir' param exists, create a dir and use it as destination for unpack cmd + if os.path.isabs(subdir): + if not os.path.realpath(subdir).startswith(os.path.realpath(destdir)): + raise bb.fetch2.UnpackError("subdir argument isn't a subdirectory of unpack root %s" % destdir, ud.url) + destdir = subdir + else: + destdir = os.path.join(destdir, subdir) + def_destsuffix = "" destsuffix = ud.parm.get("destsuffix", def_destsuffix) destdir = ud.destdir = os.path.join(destdir, destsuffix) if os.path.exists(destdir): bb.utils.prunedir(destdir) + if not ud.bareclone: + ud.unpack_tracer.unpack("git", destdir) - if ud.shallow and (not os.path.exists(ud.clonedir) or self.need_update(ud, d)): - bb.utils.mkdirhier(destdir) - runfetchcmd("tar -xzf %s" % ud.fullshallow, d, workdir=destdir) - else: + need_lfs = self._need_lfs(ud) + + if not need_lfs: + ud.basecmd = "GIT_LFS_SKIP_SMUDGE=1 " + ud.basecmd + + source_found = False + source_error = [] + + clonedir_is_up_to_date = not self.clonedir_need_update(ud, d) + if clonedir_is_up_to_date: runfetchcmd("%s clone %s %s/ %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, destdir), d) + source_found = True + else: + source_error.append("clone directory not available or not up to date: " + ud.clonedir) + + if not source_found: + if ud.shallow: + if os.path.exists(ud.fullshallow): + bb.utils.mkdirhier(destdir) + runfetchcmd("tar -xzf %s" % ud.fullshallow, d, workdir=destdir) + source_found = True + else: + source_error.append("shallow clone not available: " + ud.fullshallow) + else: + source_error.append("shallow clone not enabled") + + if not source_found: + raise bb.fetch2.UnpackError("No up to date source found: " + "; ".join(source_error), ud.url) repourl = self._get_repo_url(ud) - runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, repourl), d, workdir=destdir) + runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=destdir) + + if self._contains_lfs(ud, d, destdir): + if need_lfs and not self._find_git_lfs(d): + raise bb.fetch2.FetchError("Repository %s has LFS content, install git-lfs on host to download (or set lfs=0 to ignore it)" % (repourl)) + elif not need_lfs: + bb.note("Repository %s has LFS content but it is not being fetched" % (repourl)) + else: + runfetchcmd("%s lfs install --local" % ud.basecmd, d, workdir=destdir) + if not ud.nocheckout: - if subdir != "": + if subpath: runfetchcmd("%s read-tree %s%s" % (ud.basecmd, ud.revisions[ud.names[0]], readpathspec), d, workdir=destdir) runfetchcmd("%s checkout-index -q -f -a" % ud.basecmd, d, workdir=destdir) @@ -499,9 +694,17 @@ class Git(FetchMethod): def clean(self, ud, d): """ clean the git directory """ - bb.utils.remove(ud.localpath, True) - bb.utils.remove(ud.fullmirror) - bb.utils.remove(ud.fullmirror + ".done") + to_remove = [ud.localpath, ud.fullmirror, ud.fullmirror + ".done"] + # The localpath is a symlink to clonedir when it is cloned from a + # mirror, so remove both of them. + if os.path.islink(ud.localpath): + clonedir = os.path.realpath(ud.localpath) + to_remove.append(clonedir) + + for r in to_remove: + if os.path.exists(r): + bb.note('Removing %s' % r) + bb.utils.remove(r, True) def supports_srcrev(self): return True @@ -522,10 +725,79 @@ class Git(FetchMethod): raise bb.fetch2.FetchError("The command '%s' gave output with more then 1 line unexpectedly, output: '%s'" % (cmd, output)) return output.split()[0] != "0" + def _lfs_objects_downloaded(self, ud, d, name, wd): + """ + Verifies whether the LFS objects for requested revisions have already been downloaded + """ + # Bail out early if this repository doesn't use LFS + if not self._need_lfs(ud) or not self._contains_lfs(ud, d, wd): + return True + + # The Git LFS specification specifies ([1]) the LFS folder layout so it should be safe to check for file + # existence. + # [1] https://github.com/git-lfs/git-lfs/blob/main/docs/spec.md#intercepting-git + cmd = "%s lfs ls-files -l %s" \ + % (ud.basecmd, ud.revisions[name]) + output = runfetchcmd(cmd, d, quiet=True, workdir=wd).rstrip() + # Do not do any further matching if no objects are managed by LFS + if not output: + return True + + # Match all lines beginning with the hexadecimal OID + oid_regex = re.compile("^(([a-fA-F0-9]{2})([a-fA-F0-9]{2})[A-Fa-f0-9]+)") + for line in output.split("\n"): + oid = re.search(oid_regex, line) + if not oid: + bb.warn("git lfs ls-files output '%s' did not match expected format." % line) + if not os.path.exists(os.path.join(wd, "lfs", "objects", oid.group(2), oid.group(3), oid.group(1))): + return False + + return True + + def _need_lfs(self, ud): + return ud.parm.get("lfs", "1") == "1" + + def _contains_lfs(self, ud, d, wd): + """ + Check if the repository has 'lfs' (large file) content + """ + + if ud.nobranch: + # If no branch is specified, use the current git commit + refname = self._build_revision(ud, d, ud.names[0]) + elif wd == ud.clonedir: + # The bare clonedir doesn't use the remote names; it has the branch immediately. + refname = ud.branches[ud.names[0]] + else: + refname = "origin/%s" % ud.branches[ud.names[0]] + + cmd = "%s grep lfs %s:.gitattributes | wc -l" % ( + ud.basecmd, refname) + + try: + output = runfetchcmd(cmd, d, quiet=True, workdir=wd) + if int(output) > 0: + return True + except (bb.fetch2.FetchError,ValueError): + pass + return False + + def _find_git_lfs(self, d): + """ + Return True if git-lfs can be found, False otherwise. + """ + import shutil + return shutil.which("git-lfs", path=d.getVar('PATH')) is not None + def _get_repo_url(self, ud): """ Return the repository URL """ + # Note that we do not support passwords directly in the git urls. There are several + # reasons. SRC_URI can be written out to things like buildhistory and people don't + # want to leak passwords like that. Its also all too easy to share metadata without + # removing the password. ssh keys, ~/.netrc and ~/.ssh/config files can be used as + # alternatives so we will not take patches adding password support here. if ud.user: username = ud.user + '@' else: @@ -536,7 +808,8 @@ class Git(FetchMethod): """ Return a unique key for the url """ - return "git:" + ud.host + ud.path.replace('/', '.') + ud.unresolvedrev[name] + # Collapse adjacent slashes + return "git:" + ud.host + slash_re.sub(".", ud.path) + ud.unresolvedrev[name] def _lsremote(self, ud, d, search): """ @@ -554,7 +827,7 @@ class Git(FetchMethod): try: repourl = self._get_repo_url(ud) cmd = "%s ls-remote %s %s" % \ - (ud.basecmd, repourl, search) + (ud.basecmd, shlex.quote(repourl), search) if ud.proto.lower() != 'file': bb.fetch2.check_network_access(d, cmd, repourl) output = runfetchcmd(cmd, d, True) @@ -568,6 +841,12 @@ class Git(FetchMethod): """ Compute the HEAD revision for the url """ + if not d.getVar("__BBSRCREV_SEEN"): + raise bb.fetch2.FetchError("Recipe uses a floating tag/branch '%s' for repo '%s' without a fixed SRCREV yet doesn't call bb.fetch2.get_srcrev() (use SRCPV in PV for OE)." % (ud.unresolvedrev[name], ud.host+ud.path)) + + # Ensure we mark as not cached + bb.fetch2.mark_recipe_nocache(d) + output = self._lsremote(ud, d, "") # Tags of the form ^{} may not work, need to fallback to other form if ud.unresolvedrev[name][:5] == "refs/" or ud.usehead: @@ -592,38 +871,42 @@ class Git(FetchMethod): """ pupver = ('', '') - tagregex = re.compile(d.getVar('UPSTREAM_CHECK_GITTAGREGEX') or "(?P<pver>([0-9][\.|_]?)+)") try: output = self._lsremote(ud, d, "refs/tags/*") except (bb.fetch2.FetchError, bb.fetch2.NetworkAccess) as e: bb.note("Could not list remote: %s" % str(e)) return pupver + rev_tag_re = re.compile(r"([0-9a-f]{40})\s+refs/tags/(.*)") + pver_re = re.compile(d.getVar('UPSTREAM_CHECK_GITTAGREGEX') or r"(?P<pver>([0-9][\.|_]?)+)") + nonrel_re = re.compile(r"(alpha|beta|rc|final)+") + verstring = "" - revision = "" for line in output.split("\n"): if not line: break - tag_head = line.split("/")[-1] + m = rev_tag_re.match(line) + if not m: + continue + + (revision, tag) = m.groups() + # Ignore non-released branches - m = re.search("(alpha|beta|rc|final)+", tag_head) - if m: + if nonrel_re.search(tag): continue # search for version in the line - tag = tagregex.search(tag_head) - if tag == None: + m = pver_re.search(tag) + if not m: continue - tag = tag.group('pver') - tag = tag.replace("_", ".") + pver = m.group('pver').replace("_", ".") - if verstring and bb.utils.vercmp(("0", tag, ""), ("0", verstring, "")) < 0: + if verstring and bb.utils.vercmp(("0", pver, ""), ("0", verstring, "")) < 0: continue - verstring = tag - revision = line.split()[0] + verstring = pver pupver = (verstring, revision) return pupver diff --git a/lib/bb/fetch2/gitannex.py b/lib/bb/fetch2/gitannex.py index a9b69caab..80a808d88 100644 --- a/lib/bb/fetch2/gitannex.py +++ b/lib/bb/fetch2/gitannex.py @@ -1,5 +1,3 @@ -# ex:ts=4:sw=4:sts=4:et -# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- """ BitBake 'Fetch' git annex implementation """ @@ -7,24 +5,12 @@ BitBake 'Fetch' git annex implementation # Copyright (C) 2014 Otavio Salvador # Copyright (C) 2014 O.S. Systems Software LTDA. # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# SPDX-License-Identifier: GPL-2.0-only # -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -import os import bb from bb.fetch2.git import Git from bb.fetch2 import runfetchcmd -from bb.fetch2 import logger class GitANNEX(Git): def supports(self, ud, d): diff --git a/lib/bb/fetch2/gitsm.py b/lib/bb/fetch2/gitsm.py index 0aff1008e..f7f3af721 100644 --- a/lib/bb/fetch2/gitsm.py +++ b/lib/bb/fetch2/gitsm.py @@ -1,5 +1,3 @@ -# ex:ts=4:sw=4:sts=4:et -# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- """ BitBake 'Fetch' git submodules implementation @@ -16,24 +14,18 @@ NOTE: Switching a SRC_URI from "git://" to "gitsm://" requires a clean of your r # Copyright (C) 2013 Richard Purdie # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# SPDX-License-Identifier: GPL-2.0-only # -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. import os import bb +import copy +import shutil +import tempfile from bb.fetch2.git import Git from bb.fetch2 import runfetchcmd from bb.fetch2 import logger +from bb.fetch2 import Fetch class GitSM(Git): def supports(self, ud, d): @@ -42,94 +34,243 @@ class GitSM(Git): """ return ud.type in ['gitsm'] - def uses_submodules(self, ud, d, wd): - for name in ud.names: - try: - runfetchcmd("%s show %s:.gitmodules" % (ud.basecmd, ud.revisions[name]), d, quiet=True, workdir=wd) - return True - except bb.fetch.FetchError: - pass - return False - - def _set_relative_paths(self, repopath): + def process_submodules(self, ud, workdir, function, d): """ - Fix submodule paths to be relative instead of absolute, - so that when we move the repo it doesn't break - (In Git 1.7.10+ this is done automatically) + Iterate over all of the submodules in this repository and execute + the 'function' for each of them. """ + submodules = [] - with open(os.path.join(repopath, '.gitmodules'), 'r') as f: - for line in f.readlines(): + paths = {} + revision = {} + uris = {} + subrevision = {} + + def parse_gitmodules(gitmodules): + modules = {} + module = "" + for line in gitmodules.splitlines(): if line.startswith('[submodule'): - submodules.append(line.split('"')[1]) + module = line.split('"')[1] + modules[module] = {} + elif module and line.strip().startswith('path'): + path = line.split('=')[1].strip() + modules[module]['path'] = path + elif module and line.strip().startswith('url'): + url = line.split('=')[1].strip() + modules[module]['url'] = url + return modules + + # Collect the defined submodules, and their attributes + for name in ud.names: + try: + gitmodules = runfetchcmd("%s show %s:.gitmodules" % (ud.basecmd, ud.revisions[name]), d, quiet=True, workdir=workdir) + except: + # No submodules to update + continue + + for m, md in parse_gitmodules(gitmodules).items(): + try: + module_hash = runfetchcmd("%s ls-tree -z -d %s %s" % (ud.basecmd, ud.revisions[name], md['path']), d, quiet=True, workdir=workdir) + except: + # If the command fails, we don't have a valid file to check. If it doesn't + # fail -- it still might be a failure, see next check... + module_hash = "" + + if not module_hash: + logger.debug("submodule %s is defined, but is not initialized in the repository. Skipping", m) + continue + + submodules.append(m) + paths[m] = md['path'] + revision[m] = ud.revisions[name] + uris[m] = md['url'] + subrevision[m] = module_hash.split()[2] + + # Convert relative to absolute uri based on parent uri + if uris[m].startswith('..') or uris[m].startswith('./'): + newud = copy.copy(ud) + newud.path = os.path.normpath(os.path.join(newud.path, uris[m])) + uris[m] = Git._get_repo_url(self, newud) for module in submodules: - repo_conf = os.path.join(repopath, module, '.git') - if os.path.exists(repo_conf): - with open(repo_conf, 'r') as f: - lines = f.readlines() - newpath = '' - for i, line in enumerate(lines): - if line.startswith('gitdir:'): - oldpath = line.split(': ')[-1].rstrip() - if oldpath.startswith('/'): - newpath = '../' * (module.count('/') + 1) + '.git/modules/' + module - lines[i] = 'gitdir: %s\n' % newpath - break - if newpath: - with open(repo_conf, 'w') as f: - for line in lines: - f.write(line) - - repo_conf2 = os.path.join(repopath, '.git', 'modules', module, 'config') - if os.path.exists(repo_conf2): - with open(repo_conf2, 'r') as f: - lines = f.readlines() - newpath = '' - for i, line in enumerate(lines): - if line.lstrip().startswith('worktree = '): - oldpath = line.split(' = ')[-1].rstrip() - if oldpath.startswith('/'): - newpath = '../' * (module.count('/') + 3) + module - lines[i] = '\tworktree = %s\n' % newpath - break - if newpath: - with open(repo_conf2, 'w') as f: - for line in lines: - f.write(line) - - def update_submodules(self, ud, d): - # We have to convert bare -> full repo, do the submodule bit, then convert back - tmpclonedir = ud.clonedir + ".tmp" - gitdir = tmpclonedir + os.sep + ".git" - bb.utils.remove(tmpclonedir, True) - os.mkdir(tmpclonedir) - os.rename(ud.clonedir, gitdir) - runfetchcmd("sed " + gitdir + "/config -i -e 's/bare.*=.*true/bare = false/'", d) - runfetchcmd(ud.basecmd + " reset --hard", d, workdir=tmpclonedir) - runfetchcmd(ud.basecmd + " checkout -f " + ud.revisions[ud.names[0]], d, workdir=tmpclonedir) - runfetchcmd(ud.basecmd + " submodule update --init --recursive", d, workdir=tmpclonedir) - self._set_relative_paths(tmpclonedir) - runfetchcmd("sed " + gitdir + "/config -i -e 's/bare.*=.*false/bare = true/'", d, workdir=tmpclonedir) - os.rename(gitdir, ud.clonedir,) - bb.utils.remove(tmpclonedir, True) + # Translate the module url into a SRC_URI + + if "://" in uris[module]: + # Properly formated URL already + proto = uris[module].split(':', 1)[0] + url = uris[module].replace('%s:' % proto, 'gitsm:', 1) + else: + if ":" in uris[module]: + # Most likely an SSH style reference + proto = "ssh" + if ":/" in uris[module]: + # Absolute reference, easy to convert.. + url = "gitsm://" + uris[module].replace(':/', '/', 1) + else: + # Relative reference, no way to know if this is right! + logger.warning("Submodule included by %s refers to relative ssh reference %s. References may fail if not absolute." % (ud.url, uris[module])) + url = "gitsm://" + uris[module].replace(':', '/', 1) + else: + # This has to be a file reference + proto = "file" + url = "gitsm://" + uris[module] + if url.endswith("{}{}".format(ud.host, ud.path)): + raise bb.fetch2.FetchError("Submodule refers to the parent repository. This will cause deadlock situation in current version of Bitbake." \ + "Consider using git fetcher instead.") + + url += ';protocol=%s' % proto + url += ";name=%s" % module + url += ";subpath=%s" % module + url += ";nobranch=1" + url += ";lfs=%s" % self._need_lfs(ud) + # Note that adding "user=" here to give credentials to the + # submodule is not supported. Since using SRC_URI to give git:// + # URL a password is not supported, one have to use one of the + # recommended way (eg. ~/.netrc or SSH config) which does specify + # the user (See comment in git.py). + # So, we will not take patches adding "user=" support here. + + ld = d.createCopy() + # Not necessary to set SRC_URI, since we're passing the URI to + # Fetch. + #ld.setVar('SRC_URI', url) + ld.setVar('SRCREV_%s' % module, subrevision[module]) + + # Workaround for issues with SRCPV/SRCREV_FORMAT errors + # error refer to 'multiple' repositories. Only the repository + # in the original SRC_URI actually matters... + ld.setVar('SRCPV', d.getVar('SRCPV')) + ld.setVar('SRCREV_FORMAT', module) + + function(ud, url, module, paths[module], workdir, ld) + + return submodules != [] + + def need_update(self, ud, d): + if Git.need_update(self, ud, d): + return True + + need_update_list = [] + def need_update_submodule(ud, url, module, modpath, workdir, d): + url += ";bareclone=1;nobranch=1" + + try: + newfetch = Fetch([url], d, cache=False) + new_ud = newfetch.ud[url] + if new_ud.method.need_update(new_ud, d): + need_update_list.append(modpath) + except Exception as e: + logger.error('gitsm: submodule update check failed: %s %s' % (type(e).__name__, str(e))) + need_update_result = True + + # If we're using a shallow mirror tarball it needs to be unpacked + # temporarily so that we can examine the .gitmodules file + if ud.shallow and os.path.exists(ud.fullshallow) and not os.path.exists(ud.clonedir): + tmpdir = tempfile.mkdtemp(dir=d.getVar("DL_DIR")) + runfetchcmd("tar -xzf %s" % ud.fullshallow, d, workdir=tmpdir) + self.process_submodules(ud, tmpdir, need_update_submodule, d) + shutil.rmtree(tmpdir) + else: + self.process_submodules(ud, ud.clonedir, need_update_submodule, d) + + if need_update_list: + logger.debug('gitsm: Submodules requiring update: %s' % (' '.join(need_update_list))) + return True + + return False def download(self, ud, d): - Git.download(self, ud, d) + def download_submodule(ud, url, module, modpath, workdir, d): + url += ";bareclone=1;nobranch=1" - if not ud.shallow or ud.localpath != ud.fullshallow: - submodules = self.uses_submodules(ud, d, ud.clonedir) - if submodules: - self.update_submodules(ud, d) + # Is the following still needed? + #url += ";nocheckout=1" - def clone_shallow_local(self, ud, dest, d): - super(GitSM, self).clone_shallow_local(ud, dest, d) + try: + newfetch = Fetch([url], d, cache=False) + newfetch.download() + except Exception as e: + logger.error('gitsm: submodule download failed: %s %s' % (type(e).__name__, str(e))) + raise + + Git.download(self, ud, d) - runfetchcmd('cp -fpPRH "%s/modules" "%s/"' % (ud.clonedir, os.path.join(dest, '.git')), d) + # If we're using a shallow mirror tarball it needs to be unpacked + # temporarily so that we can examine the .gitmodules file + if ud.shallow and os.path.exists(ud.fullshallow) and self.need_update(ud, d): + tmpdir = tempfile.mkdtemp(dir=d.getVar("DL_DIR")) + runfetchcmd("tar -xzf %s" % ud.fullshallow, d, workdir=tmpdir) + self.process_submodules(ud, tmpdir, download_submodule, d) + shutil.rmtree(tmpdir) + else: + self.process_submodules(ud, ud.clonedir, download_submodule, d) def unpack(self, ud, destdir, d): + def unpack_submodules(ud, url, module, modpath, workdir, d): + url += ";bareclone=1;nobranch=1" + + # Figure out where we clone over the bare submodules... + if ud.bareclone: + repo_conf = ud.destdir + else: + repo_conf = os.path.join(ud.destdir, '.git') + + try: + newfetch = Fetch([url], d, cache=False) + # modpath is needed by unpack tracer to calculate submodule + # checkout dir + new_ud = newfetch.ud[url] + new_ud.modpath = modpath + newfetch.unpack(root=os.path.dirname(os.path.join(repo_conf, 'modules', module))) + except Exception as e: + logger.error('gitsm: submodule unpack failed: %s %s' % (type(e).__name__, str(e))) + raise + + local_path = newfetch.localpath(url) + + # Correct the submodule references to the local download version... + runfetchcmd("%(basecmd)s config submodule.%(module)s.url %(url)s" % {'basecmd': ud.basecmd, 'module': module, 'url' : local_path}, d, workdir=ud.destdir) + + if ud.shallow: + runfetchcmd("%(basecmd)s config submodule.%(module)s.shallow true" % {'basecmd': ud.basecmd, 'module': module}, d, workdir=ud.destdir) + + # Ensure the submodule repository is NOT set to bare, since we're checking it out... + try: + runfetchcmd("%s config core.bare false" % (ud.basecmd), d, quiet=True, workdir=os.path.join(repo_conf, 'modules', module)) + except: + logger.error("Unable to set git config core.bare to false for %s" % os.path.join(repo_conf, 'modules', module)) + raise + Git.unpack(self, ud, destdir, d) - if self.uses_submodules(ud, d, ud.destdir): - runfetchcmd(ud.basecmd + " checkout " + ud.revisions[ud.names[0]], d, workdir=ud.destdir) - runfetchcmd(ud.basecmd + " submodule update --init --recursive", d, workdir=ud.destdir) + ret = self.process_submodules(ud, ud.destdir, unpack_submodules, d) + + if not ud.bareclone and ret: + # All submodules should already be downloaded and configured in the tree. This simply + # sets up the configuration and checks out the files. The main project config should + # remain unmodified, and no download from the internet should occur. As such, lfs smudge + # should also be skipped as these files were already smudged in the fetch stage if lfs + # was enabled. + runfetchcmd("GIT_LFS_SKIP_SMUDGE=1 %s submodule update --recursive --no-fetch" % (ud.basecmd), d, quiet=True, workdir=ud.destdir) + + def implicit_urldata(self, ud, d): + import shutil, subprocess, tempfile + + urldata = [] + def add_submodule(ud, url, module, modpath, workdir, d): + url += ";bareclone=1;nobranch=1" + newfetch = Fetch([url], d, cache=False) + urldata.extend(newfetch.expanded_urldata()) + + # If we're using a shallow mirror tarball it needs to be unpacked + # temporarily so that we can examine the .gitmodules file + if ud.shallow and os.path.exists(ud.fullshallow) and ud.method.need_update(ud, d): + tmpdir = tempfile.mkdtemp(dir=d.getVar("DL_DIR")) + subprocess.check_call("tar -xzf %s" % ud.fullshallow, cwd=tmpdir, shell=True) + self.process_submodules(ud, tmpdir, add_submodule, d) + shutil.rmtree(tmpdir) + else: + self.process_submodules(ud, ud.clonedir, add_submodule, d) + + return urldata diff --git a/lib/bb/fetch2/hg.py b/lib/bb/fetch2/hg.py index d0857e63f..cbff8c490 100644 --- a/lib/bb/fetch2/hg.py +++ b/lib/bb/fetch2/hg.py @@ -1,5 +1,3 @@ -# ex:ts=4:sw=4:sts=4:et -# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- """ BitBake 'Fetch' implementation for mercurial DRCS (hg). @@ -9,24 +7,12 @@ BitBake 'Fetch' implementation for mercurial DRCS (hg). # Copyright (C) 2004 Marcin Juszkiewicz # Copyright (C) 2007 Robert Schuster # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# SPDX-License-Identifier: GPL-2.0-only # # Based on functions from the base bb module, Copyright 2003 Holger Schurig +# import os -import sys -import logging import bb import errno from bb.fetch2 import FetchMethod @@ -66,13 +52,6 @@ class Hg(FetchMethod): else: ud.proto = "hg" - ud.setup_revisions(d) - - if 'rev' in ud.parm: - ud.revision = ud.parm['rev'] - elif not ud.revision: - ud.revision = self.latest_revision(ud, d) - # Create paths to mercurial checkouts hgsrcname = '%s_%s_%s' % (ud.module.replace('/', '.'), \ ud.host, ud.path.replace('/', '.')) @@ -80,12 +59,19 @@ class Hg(FetchMethod): ud.fullmirror = os.path.join(d.getVar("DL_DIR"), mirrortarball) ud.mirrortarballs = [mirrortarball] - hgdir = d.getVar("HGDIR") or (d.getVar("DL_DIR") + "/hg/") + hgdir = d.getVar("HGDIR") or (d.getVar("DL_DIR") + "/hg") ud.pkgdir = os.path.join(hgdir, hgsrcname) ud.moddir = os.path.join(ud.pkgdir, ud.module) ud.localfile = ud.moddir ud.basecmd = d.getVar("FETCHCMD_hg") or "/usr/bin/env hg" + ud.setup_revisions(d) + + if 'rev' in ud.parm: + ud.revision = ud.parm['rev'] + elif not ud.revision: + ud.revision = self.latest_revision(ud, d) + ud.write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") def need_update(self, ud, d): @@ -99,7 +85,7 @@ class Hg(FetchMethod): def try_premirror(self, ud, d): # If we don't do this, updating an existing checkout with only premirrors # is not possible - if d.getVar("BB_FETCH_PREMIRRORONLY") is not None: + if bb.utils.to_boolean(d.getVar("BB_FETCH_PREMIRRORONLY")): return True if os.path.exists(ud.moddir): return False @@ -151,7 +137,7 @@ class Hg(FetchMethod): cmd = "%s --config auth.default.prefix=* --config auth.default.username=%s --config auth.default.password=%s --config \"auth.default.schemes=%s\" pull" % (ud.basecmd, ud.user, ud.pswd, proto) else: cmd = "%s pull" % (ud.basecmd) - elif command == "update": + elif command == "update" or command == "up": if ud.user and ud.pswd: cmd = "%s --config auth.default.prefix=* --config auth.default.username=%s --config auth.default.password=%s --config \"auth.default.schemes=%s\" update -C %s" % (ud.basecmd, ud.user, ud.pswd, proto, " ".join(options)) else: @@ -164,7 +150,7 @@ class Hg(FetchMethod): def download(self, ud, d): """Fetch url""" - logger.debug(2, "Fetch: checking for module directory '" + ud.moddir + "'") + logger.debug2("Fetch: checking for module directory '" + ud.moddir + "'") # If the checkout doesn't exist and the mirror tarball does, extract it if not os.path.exists(ud.pkgdir) and os.path.exists(ud.fullmirror): @@ -174,7 +160,7 @@ class Hg(FetchMethod): if os.access(os.path.join(ud.moddir, '.hg'), os.R_OK): # Found the source, check whether need pull updatecmd = self._buildhgcommand(ud, d, "update") - logger.debug(1, "Running %s", updatecmd) + logger.debug("Running %s", updatecmd) try: runfetchcmd(updatecmd, d, workdir=ud.moddir) except bb.fetch2.FetchError: @@ -182,7 +168,7 @@ class Hg(FetchMethod): pullcmd = self._buildhgcommand(ud, d, "pull") logger.info("Pulling " + ud.url) # update sources there - logger.debug(1, "Running %s", pullcmd) + logger.debug("Running %s", pullcmd) bb.fetch2.check_network_access(d, pullcmd, ud.url) runfetchcmd(pullcmd, d, workdir=ud.moddir) try: @@ -197,14 +183,14 @@ class Hg(FetchMethod): logger.info("Fetch " + ud.url) # check out sources there bb.utils.mkdirhier(ud.pkgdir) - logger.debug(1, "Running %s", fetchcmd) + logger.debug("Running %s", fetchcmd) bb.fetch2.check_network_access(d, fetchcmd, ud.url) runfetchcmd(fetchcmd, d, workdir=ud.pkgdir) # Even when we clone (fetch), we still need to update as hg's clone # won't checkout the specified revision if its on a branch updatecmd = self._buildhgcommand(ud, d, "update") - logger.debug(1, "Running %s", updatecmd) + logger.debug("Running %s", updatecmd) runfetchcmd(updatecmd, d, workdir=ud.moddir) def clean(self, ud, d): @@ -256,15 +242,23 @@ class Hg(FetchMethod): revflag = "-r %s" % ud.revision subdir = ud.parm.get("destsuffix", ud.module) codir = "%s/%s" % (destdir, subdir) + ud.unpack_tracer.unpack("hg", codir) scmdata = ud.parm.get("scmdata", "") if scmdata != "nokeep": + proto = ud.parm.get('protocol', 'http') if not os.access(os.path.join(codir, '.hg'), os.R_OK): - logger.debug(2, "Unpack: creating new hg repository in '" + codir + "'") + logger.debug2("Unpack: creating new hg repository in '" + codir + "'") runfetchcmd("%s init %s" % (ud.basecmd, codir), d) - logger.debug(2, "Unpack: updating source in '" + codir + "'") - runfetchcmd("%s pull %s" % (ud.basecmd, ud.moddir), d, workdir=codir) - runfetchcmd("%s up -C %s" % (ud.basecmd, revflag), d, workdir=codir) + logger.debug2("Unpack: updating source in '" + codir + "'") + if ud.user and ud.pswd: + runfetchcmd("%s --config auth.default.prefix=* --config auth.default.username=%s --config auth.default.password=%s --config \"auth.default.schemes=%s\" pull %s" % (ud.basecmd, ud.user, ud.pswd, proto, ud.moddir), d, workdir=codir) + else: + runfetchcmd("%s pull %s" % (ud.basecmd, ud.moddir), d, workdir=codir) + if ud.user and ud.pswd: + runfetchcmd("%s --config auth.default.prefix=* --config auth.default.username=%s --config auth.default.password=%s --config \"auth.default.schemes=%s\" up -C %s" % (ud.basecmd, ud.user, ud.pswd, proto, revflag), d, workdir=codir) + else: + runfetchcmd("%s up -C %s" % (ud.basecmd, revflag), d, workdir=codir) else: - logger.debug(2, "Unpack: extracting source to '" + codir + "'") + logger.debug2("Unpack: extracting source to '" + codir + "'") runfetchcmd("%s archive -t files %s %s" % (ud.basecmd, revflag, codir), d, workdir=ud.moddir) diff --git a/lib/bb/fetch2/local.py b/lib/bb/fetch2/local.py index a114ac12e..7d7668110 100644 --- a/lib/bb/fetch2/local.py +++ b/lib/bb/fetch2/local.py @@ -1,5 +1,3 @@ -# ex:ts=4:sw=4:sts=4:et -# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- """ BitBake 'Fetch' implementations @@ -10,26 +8,16 @@ BitBake build tools. # Copyright (C) 2003, 2004 Chris Larson # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# SPDX-License-Identifier: GPL-2.0-only # # Based on functions from the base bb module, Copyright 2003 Holger Schurig +# import os import urllib.request, urllib.parse, urllib.error import bb import bb.utils -from bb.fetch2 import FetchMethod, FetchError +from bb.fetch2 import FetchMethod, FetchError, ParameterError from bb.fetch2 import logger class Local(FetchMethod): @@ -45,15 +33,17 @@ class Local(FetchMethod): ud.basename = os.path.basename(ud.decodedurl) ud.basepath = ud.decodedurl ud.needdonestamp = False + if "*" in ud.decodedurl: + raise bb.fetch2.ParameterError("file:// urls using globbing are no longer supported. Please place the files in a directory and reference that instead.", ud.url) return def localpath(self, urldata, d): """ Return the local filename of a given url assuming a successful fetch. """ - return self.localpaths(urldata, d)[-1] + return self.localfile_searchpaths(urldata, d)[-1] - def localpaths(self, urldata, d): + def localfile_searchpaths(self, urldata, d): """ Return the local filename of a given url assuming a successful fetch. """ @@ -61,29 +51,17 @@ class Local(FetchMethod): path = urldata.decodedurl newpath = path if path[0] == "/": + logger.debug2("Using absolute %s" % (path)) return [path] filespath = d.getVar('FILESPATH') if filespath: - logger.debug(2, "Searching for %s in paths:\n %s" % (path, "\n ".join(filespath.split(":")))) + logger.debug2("Searching for %s in paths:\n %s" % (path, "\n ".join(filespath.split(":")))) newpath, hist = bb.utils.which(filespath, path, history=True) + logger.debug2("Using %s for %s" % (newpath, path)) searched.extend(hist) - if (not newpath or not os.path.exists(newpath)) and path.find("*") != -1: - # For expressions using '*', best we can do is take the first directory in FILESPATH that exists - newpath, hist = bb.utils.which(filespath, ".", history=True) - searched.extend(hist) - logger.debug(2, "Searching for %s in path: %s" % (path, newpath)) - return searched - if not os.path.exists(newpath): - dldirfile = os.path.join(d.getVar("DL_DIR"), path) - logger.debug(2, "Defaulting to %s for %s" % (dldirfile, path)) - bb.utils.mkdirhier(os.path.dirname(dldirfile)) - searched.append(dldirfile) - return searched return searched def need_update(self, ud, d): - if ud.url.find("*") != -1: - return False if os.path.exists(ud.localpath): return False return True @@ -96,9 +74,7 @@ class Local(FetchMethod): filespath = d.getVar('FILESPATH') if filespath: locations = filespath.split(":") - locations.append(d.getVar("DL_DIR")) - - msg = "Unable to find file " + urldata.url + " anywhere. The paths that were searched were:\n " + "\n ".join(locations) + msg = "Unable to find file " + urldata.url + " anywhere to download to " + urldata.localpath + ". The paths that were searched were:\n " + "\n ".join(locations) raise FetchError(msg) return True @@ -107,9 +83,6 @@ class Local(FetchMethod): """ Check the status of the url """ - if urldata.localpath.find("*") != -1: - logger.info("URL %s looks like a glob and was therefore not checked.", urldata.url) - return True if os.path.exists(urldata.localpath): return True return False diff --git a/lib/bb/fetch2/npm.py b/lib/bb/fetch2/npm.py index 730c346a9..15f3f19bc 100644 --- a/lib/bb/fetch2/npm.py +++ b/lib/bb/fetch2/npm.py @@ -1,309 +1,315 @@ -# ex:ts=4:sw=4:sts=4:et -# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- +# Copyright (C) 2020 Savoir-Faire Linux +# +# SPDX-License-Identifier: GPL-2.0-only +# """ -BitBake 'Fetch' NPM implementation +BitBake 'Fetch' npm implementation -The NPM fetcher is used to retrieve files from the npmjs repository +npm fetcher support the SRC_URI with format of: +SRC_URI = "npm://some.registry.url;OptionA=xxx;OptionB=xxx;..." -Usage in the recipe: +Supported SRC_URI options are: - SRC_URI = "npm://registry.npmjs.org/;name=${PN};version=${PV}" - Suported SRC_URI options are: +- package + The npm package name. This is a mandatory parameter. - - name - - version +- version + The npm package version. This is a mandatory parameter. - npm://registry.npmjs.org/${PN}/-/${PN}-${PV}.tgz would become npm://registry.npmjs.org;name=${PN};version=${PV} - The fetcher all triggers off the existence of ud.localpath. If that exists and has the ".done" stamp, its assumed the fetch is good/done +- downloadfilename + Specifies the filename used when storing the downloaded file. +- destsuffix + Specifies the directory to use to unpack the package (default: npm). """ -import os -import sys -import urllib.request, urllib.parse, urllib.error +import base64 import json -import subprocess -import signal +import os +import re +import shlex +import tempfile import bb -from bb.fetch2 import FetchMethod -from bb.fetch2 import FetchError -from bb.fetch2 import ChecksumError -from bb.fetch2 import runfetchcmd -from bb.fetch2 import logger -from bb.fetch2 import UnpackError -from bb.fetch2 import ParameterError -from distutils import spawn - -def subprocess_setup(): - # Python installs a SIGPIPE handler by default. This is usually not what - # non-Python subprocesses expect. - # SIGPIPE errors are known issues with gzip/bash - signal.signal(signal.SIGPIPE, signal.SIG_DFL) +from bb.fetch2 import Fetch +from bb.fetch2 import FetchError +from bb.fetch2 import FetchMethod +from bb.fetch2 import MissingParameterError +from bb.fetch2 import ParameterError +from bb.fetch2 import URI +from bb.fetch2 import check_network_access +from bb.fetch2 import runfetchcmd +from bb.utils import is_semver + +def npm_package(package): + """Convert the npm package name to remove unsupported character""" + # Scoped package names (with the @) use the same naming convention + # as the 'npm pack' command. + name = re.sub("/", "-", package) + name = name.lower() + name = re.sub(r"[^\-a-z0-9]", "", name) + name = name.strip("-") + return name + + +def npm_filename(package, version): + """Get the filename of a npm package""" + return npm_package(package) + "-" + version + ".tgz" + +def npm_localfile(package, version=None): + """Get the local filename of a npm package""" + if version is not None: + filename = npm_filename(package, version) + else: + filename = package + return os.path.join("npm2", filename) + +def npm_integrity(integrity): + """ + Get the checksum name and expected value from the subresource integrity + https://www.w3.org/TR/SRI/ + """ + algo, value = integrity.split("-", maxsplit=1) + return "%ssum" % algo, base64.b64decode(value).hex() + +def npm_unpack(tarball, destdir, d): + """Unpack a npm tarball""" + bb.utils.mkdirhier(destdir) + cmd = "tar --extract --gzip --file=%s" % shlex.quote(tarball) + cmd += " --no-same-owner" + cmd += " --delay-directory-restore" + cmd += " --strip-components=1" + runfetchcmd(cmd, d, workdir=destdir) + runfetchcmd("chmod -R +X '%s'" % (destdir), d, quiet=True, workdir=destdir) + +class NpmEnvironment(object): + """ + Using a npm config file seems more reliable than using cli arguments. + This class allows to create a controlled environment for npm commands. + """ + def __init__(self, d, configs=[], npmrc=None): + self.d = d + + self.user_config = tempfile.NamedTemporaryFile(mode="w", buffering=1) + for key, value in configs: + self.user_config.write("%s=%s\n" % (key, value)) + + if npmrc: + self.global_config_name = npmrc + else: + self.global_config_name = "/dev/null" -class Npm(FetchMethod): + def __del__(self): + if self.user_config: + self.user_config.close() - """Class to fetch urls via 'npm'""" - def init(self, d): - pass + def run(self, cmd, args=None, configs=None, workdir=None): + """Run npm command in a controlled environment""" + with tempfile.TemporaryDirectory() as tmpdir: + d = bb.data.createCopy(self.d) + d.setVar("PATH", d.getVar("PATH")) # PATH might contain $HOME - evaluate it before patching + d.setVar("HOME", tmpdir) - def supports(self, ud, d): - """ - Check to see if a given url can be fetched with npm - """ - return ud.type in ['npm'] + if not workdir: + workdir = tmpdir - def debug(self, msg): - logger.debug(1, "NpmFetch: %s", msg) + def _run(cmd): + cmd = "NPM_CONFIG_USERCONFIG=%s " % (self.user_config.name) + cmd + cmd = "NPM_CONFIG_GLOBALCONFIG=%s " % (self.global_config_name) + cmd + return runfetchcmd(cmd, d, workdir=workdir) - def clean(self, ud, d): - logger.debug(2, "Calling cleanup %s" % ud.pkgname) - bb.utils.remove(ud.localpath, False) - bb.utils.remove(ud.pkgdatadir, True) - bb.utils.remove(ud.fullmirror, False) + if configs: + bb.warn("Use of configs argument of NpmEnvironment.run() function" + " is deprecated. Please use args argument instead.") + for key, value in configs: + cmd += " --%s=%s" % (key, shlex.quote(value)) + + if args: + for key, value in args: + cmd += " --%s=%s" % (key, shlex.quote(value)) + + return _run(cmd) + +class Npm(FetchMethod): + """Class to fetch a package from a npm registry""" + + def supports(self, ud, d): + """Check if a given url can be fetched with npm""" + return ud.type in ["npm"] def urldata_init(self, ud, d): - """ - init NPM specific variable within url data - """ - if 'downloadfilename' in ud.parm: - ud.basename = ud.parm['downloadfilename'] - else: - ud.basename = os.path.basename(ud.path) - - # can't call it ud.name otherwise fetcher base class will start doing sha1stuff - # TODO: find a way to get an sha1/sha256 manifest of pkg & all deps - ud.pkgname = ud.parm.get("name", None) - if not ud.pkgname: - raise ParameterError("NPM fetcher requires a name parameter", ud.url) - ud.version = ud.parm.get("version", None) + """Init npm specific variables within url data""" + ud.package = None + ud.version = None + ud.registry = None + + # Get the 'package' parameter + if "package" in ud.parm: + ud.package = ud.parm.get("package") + + if not ud.package: + raise MissingParameterError("Parameter 'package' required", ud.url) + + # Get the 'version' parameter + if "version" in ud.parm: + ud.version = ud.parm.get("version") + if not ud.version: - raise ParameterError("NPM fetcher requires a version parameter", ud.url) - ud.bbnpmmanifest = "%s-%s.deps.json" % (ud.pkgname, ud.version) - ud.bbnpmmanifest = ud.bbnpmmanifest.replace('/', '-') - ud.registry = "http://%s" % (ud.url.replace('npm://', '', 1).split(';'))[0] - prefixdir = "npm/%s" % ud.pkgname - ud.pkgdatadir = d.expand("${DL_DIR}/%s" % prefixdir) - if not os.path.exists(ud.pkgdatadir): - bb.utils.mkdirhier(ud.pkgdatadir) - ud.localpath = d.expand("${DL_DIR}/npm/%s" % ud.bbnpmmanifest) - - self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -O -t 2 -T 30 -nv --passive-ftp --no-check-certificate " - ud.prefixdir = prefixdir - - ud.write_tarballs = ((d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0") != "0") - mirrortarball = 'npm_%s-%s.tar.xz' % (ud.pkgname, ud.version) - mirrortarball = mirrortarball.replace('/', '-') - ud.fullmirror = os.path.join(d.getVar("DL_DIR"), mirrortarball) - ud.mirrortarballs = [mirrortarball] + raise MissingParameterError("Parameter 'version' required", ud.url) - def need_update(self, ud, d): - if os.path.exists(ud.localpath): - return False - return True - - def _runwget(self, ud, d, command, quiet): - logger.debug(2, "Fetching %s using command '%s'" % (ud.url, command)) - bb.fetch2.check_network_access(d, command, ud.url) - dldir = d.getVar("DL_DIR") - runfetchcmd(command, d, quiet, workdir=dldir) - - def _unpackdep(self, ud, pkg, data, destdir, dldir, d): - file = data[pkg]['tgz'] - logger.debug(2, "file to extract is %s" % file) - if file.endswith('.tgz') or file.endswith('.tar.gz') or file.endswith('.tar.Z'): - cmd = 'tar xz --strip 1 --no-same-owner --warning=no-unknown-keyword -f %s/%s' % (dldir, file) - else: - bb.fatal("NPM package %s downloaded not a tarball!" % file) - - # Change to subdir before executing command - if not os.path.exists(destdir): - os.makedirs(destdir) - path = d.getVar('PATH') - if path: - cmd = "PATH=\"%s\" %s" % (path, cmd) - bb.note("Unpacking %s to %s/" % (file, destdir)) - ret = subprocess.call(cmd, preexec_fn=subprocess_setup, shell=True, cwd=destdir) - - if ret != 0: - raise UnpackError("Unpack command %s failed with return value %s" % (cmd, ret), ud.url) - - if 'deps' not in data[pkg]: - return - for dep in data[pkg]['deps']: - self._unpackdep(ud, dep, data[pkg]['deps'], "%s/node_modules/%s" % (destdir, dep), dldir, d) - - - def unpack(self, ud, destdir, d): - dldir = d.getVar("DL_DIR") - with open("%s/npm/%s" % (dldir, ud.bbnpmmanifest)) as datafile: - workobj = json.load(datafile) - dldir = "%s/%s" % (os.path.dirname(ud.localpath), ud.pkgname) - - if 'subdir' in ud.parm: - unpackdir = '%s/%s' % (destdir, ud.parm.get('subdir')) + if not is_semver(ud.version) and not ud.version == "latest": + raise ParameterError("Invalid 'version' parameter", ud.url) + + # Extract the 'registry' part of the url + ud.registry = re.sub(r"^npm://", "https://", ud.url.split(";")[0]) + + # Using the 'downloadfilename' parameter as local filename + # or the npm package name. + if "downloadfilename" in ud.parm: + ud.localfile = npm_localfile(d.expand(ud.parm["downloadfilename"])) else: - unpackdir = '%s/npmpkg' % destdir - - self._unpackdep(ud, ud.pkgname, workobj, unpackdir, dldir, d) - - def _parse_view(self, output): - ''' - Parse the output of npm view --json; the last JSON result - is assumed to be the one that we're interested in. - ''' - pdata = None - outdeps = {} - datalines = [] - bracelevel = 0 - for line in output.splitlines(): - if bracelevel: - datalines.append(line) - elif '{' in line: - datalines = [] - datalines.append(line) - bracelevel = bracelevel + line.count('{') - line.count('}') - if datalines: - pdata = json.loads('\n'.join(datalines)) - return pdata - - def _getdependencies(self, pkg, data, version, d, ud, optional=False, fetchedlist=None): - if fetchedlist is None: - fetchedlist = [] - pkgfullname = pkg - if version != '*' and not '/' in version: - pkgfullname += "@'%s'" % version - logger.debug(2, "Calling getdeps on %s" % pkg) - fetchcmd = "npm view %s --json --registry %s" % (pkgfullname, ud.registry) - output = runfetchcmd(fetchcmd, d, True) - pdata = self._parse_view(output) - if not pdata: - raise FetchError("The command '%s' returned no output" % fetchcmd) - if optional: - pkg_os = pdata.get('os', None) - if pkg_os: - if not isinstance(pkg_os, list): - pkg_os = [pkg_os] - blacklist = False - for item in pkg_os: - if item.startswith('!'): - blacklist = True - break - if (not blacklist and 'linux' not in pkg_os) or '!linux' in pkg_os: - logger.debug(2, "Skipping %s since it's incompatible with Linux" % pkg) - return - #logger.debug(2, "Output URL is %s - %s - %s" % (ud.basepath, ud.basename, ud.localfile)) - outputurl = pdata['dist']['tarball'] - data[pkg] = {} - data[pkg]['tgz'] = os.path.basename(outputurl) - if outputurl in fetchedlist: - return - - self._runwget(ud, d, "%s --directory-prefix=%s %s" % (self.basecmd, ud.prefixdir, outputurl), False) - fetchedlist.append(outputurl) - - dependencies = pdata.get('dependencies', {}) - optionalDependencies = pdata.get('optionalDependencies', {}) - dependencies.update(optionalDependencies) - depsfound = {} - optdepsfound = {} - data[pkg]['deps'] = {} - for dep in dependencies: - if dep in optionalDependencies: - optdepsfound[dep] = dependencies[dep] + ud.localfile = npm_localfile(ud.package, ud.version) + + # Get the base 'npm' command + ud.basecmd = d.getVar("FETCHCMD_npm") or "npm" + + # This fetcher resolves a URI from a npm package name and version and + # then forwards it to a proxy fetcher. A resolve file containing the + # resolved URI is created to avoid unwanted network access (if the file + # already exists). The management of the donestamp file, the lockfile + # and the checksums are forwarded to the proxy fetcher. + ud.proxy = None + ud.needdonestamp = False + ud.resolvefile = self.localpath(ud, d) + ".resolved" + + def _resolve_proxy_url(self, ud, d): + def _npm_view(): + args = [] + args.append(("json", "true")) + args.append(("registry", ud.registry)) + pkgver = shlex.quote(ud.package + "@" + ud.version) + cmd = ud.basecmd + " view %s" % pkgver + env = NpmEnvironment(d) + check_network_access(d, cmd, ud.registry) + view_string = env.run(cmd, args=args) + + if not view_string: + raise FetchError("Unavailable package %s" % pkgver, ud.url) + + try: + view = json.loads(view_string) + + error = view.get("error") + if error is not None: + raise FetchError(error.get("summary"), ud.url) + + if ud.version == "latest": + bb.warn("The npm package %s is using the latest " \ + "version available. This could lead to " \ + "non-reproducible builds." % pkgver) + elif ud.version != view.get("version"): + raise ParameterError("Invalid 'version' parameter", ud.url) + + return view + + except Exception as e: + raise FetchError("Invalid view from npm: %s" % str(e), ud.url) + + def _get_url(view): + tarball_url = view.get("dist", {}).get("tarball") + + if tarball_url is None: + raise FetchError("Invalid 'dist.tarball' in view", ud.url) + + uri = URI(tarball_url) + uri.params["downloadfilename"] = ud.localfile + + integrity = view.get("dist", {}).get("integrity") + shasum = view.get("dist", {}).get("shasum") + + if integrity is not None: + checksum_name, checksum_expected = npm_integrity(integrity) + uri.params[checksum_name] = checksum_expected + elif shasum is not None: + uri.params["sha1sum"] = shasum else: - depsfound[dep] = dependencies[dep] - for dep, version in optdepsfound.items(): - self._getdependencies(dep, data[pkg]['deps'], version, d, ud, optional=True, fetchedlist=fetchedlist) - for dep, version in depsfound.items(): - self._getdependencies(dep, data[pkg]['deps'], version, d, ud, fetchedlist=fetchedlist) - - def _getshrinkeddependencies(self, pkg, data, version, d, ud, lockdown, manifest, toplevel=True): - logger.debug(2, "NPM shrinkwrap file is %s" % data) - if toplevel: - name = data.get('name', None) - if name and name != pkg: - for obj in data.get('dependencies', []): - if obj == pkg: - self._getshrinkeddependencies(obj, data['dependencies'][obj], data['dependencies'][obj]['version'], d, ud, lockdown, manifest, False) - return - outputurl = "invalid" - if ('resolved' not in data) or (not data['resolved'].startswith('http')): - # will be the case for ${PN} - fetchcmd = "npm view %s@%s dist.tarball --registry %s" % (pkg, version, ud.registry) - logger.debug(2, "Found this matching URL: %s" % str(fetchcmd)) - outputurl = runfetchcmd(fetchcmd, d, True) - else: - outputurl = data['resolved'] - self._runwget(ud, d, "%s --directory-prefix=%s %s" % (self.basecmd, ud.prefixdir, outputurl), False) - manifest[pkg] = {} - manifest[pkg]['tgz'] = os.path.basename(outputurl).rstrip() - manifest[pkg]['deps'] = {} - - if pkg in lockdown: - sha1_expected = lockdown[pkg][version] - sha1_data = bb.utils.sha1_file("npm/%s/%s" % (ud.pkgname, manifest[pkg]['tgz'])) - if sha1_expected != sha1_data: - msg = "\nFile: '%s' has %s checksum %s when %s was expected" % (manifest[pkg]['tgz'], 'sha1', sha1_data, sha1_expected) - raise ChecksumError('Checksum mismatch!%s' % msg) - else: - logger.debug(2, "No lockdown data for %s@%s" % (pkg, version)) + raise FetchError("Invalid 'dist.integrity' in view", ud.url) + + return str(uri) + + url = _get_url(_npm_view()) + + bb.utils.mkdirhier(os.path.dirname(ud.resolvefile)) + with open(ud.resolvefile, "w") as f: + f.write(url) + + def _setup_proxy(self, ud, d): + if ud.proxy is None: + if not os.path.exists(ud.resolvefile): + self._resolve_proxy_url(ud, d) + + with open(ud.resolvefile, "r") as f: + url = f.read() + + # Avoid conflicts between the environment data and: + # - the proxy url checksum + data = bb.data.createCopy(d) + data.delVarFlags("SRC_URI") + ud.proxy = Fetch([url], data) - if 'dependencies' in data: - for obj in data['dependencies']: - logger.debug(2, "Found dep is %s" % str(obj)) - self._getshrinkeddependencies(obj, data['dependencies'][obj], data['dependencies'][obj]['version'], d, ud, lockdown, manifest[pkg]['deps'], False) + def _get_proxy_method(self, ud, d): + self._setup_proxy(ud, d) + proxy_url = ud.proxy.urls[0] + proxy_ud = ud.proxy.ud[proxy_url] + proxy_d = ud.proxy.d + proxy_ud.setup_localpath(proxy_d) + return proxy_ud.method, proxy_ud, proxy_d + + def verify_donestamp(self, ud, d): + """Verify the donestamp file""" + proxy_m, proxy_ud, proxy_d = self._get_proxy_method(ud, d) + return proxy_m.verify_donestamp(proxy_ud, proxy_d) + + def update_donestamp(self, ud, d): + """Update the donestamp file""" + proxy_m, proxy_ud, proxy_d = self._get_proxy_method(ud, d) + proxy_m.update_donestamp(proxy_ud, proxy_d) + + def need_update(self, ud, d): + """Force a fetch, even if localpath exists ?""" + if not os.path.exists(ud.resolvefile): + return True + if ud.version == "latest": + return True + proxy_m, proxy_ud, proxy_d = self._get_proxy_method(ud, d) + return proxy_m.need_update(proxy_ud, proxy_d) + + def try_mirrors(self, fetch, ud, d, mirrors): + """Try to use a mirror""" + proxy_m, proxy_ud, proxy_d = self._get_proxy_method(ud, d) + return proxy_m.try_mirrors(fetch, proxy_ud, proxy_d, mirrors) def download(self, ud, d): """Fetch url""" - jsondepobj = {} - shrinkobj = {} - lockdown = {} - - if not os.listdir(ud.pkgdatadir) and os.path.exists(ud.fullmirror): - dest = d.getVar("DL_DIR") - bb.utils.mkdirhier(dest) - runfetchcmd("tar -xJf %s" % (ud.fullmirror), d, workdir=dest) - return - - if ud.parm.get("noverify", None) != '1': - shwrf = d.getVar('NPM_SHRINKWRAP') - logger.debug(2, "NPM shrinkwrap file is %s" % shwrf) - if shwrf: - try: - with open(shwrf) as datafile: - shrinkobj = json.load(datafile) - except Exception as e: - raise FetchError('Error loading NPM_SHRINKWRAP file "%s" for %s: %s' % (shwrf, ud.pkgname, str(e))) - elif not ud.ignore_checksums: - logger.warning('Missing shrinkwrap file in NPM_SHRINKWRAP for %s, this will lead to unreliable builds!' % ud.pkgname) - lckdf = d.getVar('NPM_LOCKDOWN') - logger.debug(2, "NPM lockdown file is %s" % lckdf) - if lckdf: - try: - with open(lckdf) as datafile: - lockdown = json.load(datafile) - except Exception as e: - raise FetchError('Error loading NPM_LOCKDOWN file "%s" for %s: %s' % (lckdf, ud.pkgname, str(e))) - elif not ud.ignore_checksums: - logger.warning('Missing lockdown file in NPM_LOCKDOWN for %s, this will lead to unreproducible builds!' % ud.pkgname) - - if ('name' not in shrinkobj): - self._getdependencies(ud.pkgname, jsondepobj, ud.version, d, ud) - else: - self._getshrinkeddependencies(ud.pkgname, shrinkobj, ud.version, d, ud, lockdown, jsondepobj) - - with open(ud.localpath, 'w') as outfile: - json.dump(jsondepobj, outfile) - - def build_mirror_data(self, ud, d): - # Generate a mirror tarball if needed - if ud.write_tarballs and not os.path.exists(ud.fullmirror): - # it's possible that this symlink points to read-only filesystem with PREMIRROR - if os.path.islink(ud.fullmirror): - os.unlink(ud.fullmirror) - - dldir = d.getVar("DL_DIR") - logger.info("Creating tarball of npm data") - runfetchcmd("tar -cJf %s npm/%s npm/%s" % (ud.fullmirror, ud.bbnpmmanifest, ud.pkgname), d, - workdir=dldir) - runfetchcmd("touch %s.done" % (ud.fullmirror), d, workdir=dldir) + self._setup_proxy(ud, d) + ud.proxy.download() + + def unpack(self, ud, rootdir, d): + """Unpack the downloaded archive""" + destsuffix = ud.parm.get("destsuffix", "npm") + destdir = os.path.join(rootdir, destsuffix) + npm_unpack(ud.localpath, destdir, d) + ud.unpack_tracer.unpack("npm", destdir) + + def clean(self, ud, d): + """Clean any existing full or partial download""" + if os.path.exists(ud.resolvefile): + self._setup_proxy(ud, d) + ud.proxy.clean() + bb.utils.remove(ud.resolvefile) + + def done(self, ud, d): + """Is the download done ?""" + if not os.path.exists(ud.resolvefile): + return False + proxy_m, proxy_ud, proxy_d = self._get_proxy_method(ud, d) + return proxy_m.done(proxy_ud, proxy_d) diff --git a/lib/bb/fetch2/npmsw.py b/lib/bb/fetch2/npmsw.py new file mode 100644 index 000000000..ff5f8dc75 --- /dev/null +++ b/lib/bb/fetch2/npmsw.py @@ -0,0 +1,313 @@ +# Copyright (C) 2020 Savoir-Faire Linux +# +# SPDX-License-Identifier: GPL-2.0-only +# +""" +BitBake 'Fetch' npm shrinkwrap implementation + +npm fetcher support the SRC_URI with format of: +SRC_URI = "npmsw://some.registry.url;OptionA=xxx;OptionB=xxx;..." + +Supported SRC_URI options are: + +- dev + Set to 1 to also install devDependencies. + +- destsuffix + Specifies the directory to use to unpack the dependencies (default: ${S}). +""" + +import json +import os +import re +import bb +from bb.fetch2 import Fetch +from bb.fetch2 import FetchMethod +from bb.fetch2 import ParameterError +from bb.fetch2 import runfetchcmd +from bb.fetch2 import URI +from bb.fetch2.npm import npm_integrity +from bb.fetch2.npm import npm_localfile +from bb.fetch2.npm import npm_unpack +from bb.utils import is_semver +from bb.utils import lockfile +from bb.utils import unlockfile + +def foreach_dependencies(shrinkwrap, callback=None, dev=False): + """ + Run a callback for each dependencies of a shrinkwrap file. + The callback is using the format: + callback(name, params, deptree) + with: + name = the package name (string) + params = the package parameters (dictionary) + destdir = the destination of the package (string) + """ + # For handling old style dependencies entries in shinkwrap files + def _walk_deps(deps, deptree): + for name in deps: + subtree = [*deptree, name] + _walk_deps(deps[name].get("dependencies", {}), subtree) + if callback is not None: + if deps[name].get("dev", False) and not dev: + continue + elif deps[name].get("bundled", False): + continue + destsubdirs = [os.path.join("node_modules", dep) for dep in subtree] + destsuffix = os.path.join(*destsubdirs) + callback(name, deps[name], destsuffix) + + # packages entry means new style shrinkwrap file, else use dependencies + packages = shrinkwrap.get("packages", None) + if packages is not None: + for package in packages: + if package != "": + name = package.split('node_modules/')[-1] + package_infos = packages.get(package, {}) + if dev == False and package_infos.get("dev", False): + continue + callback(name, package_infos, package) + else: + _walk_deps(shrinkwrap.get("dependencies", {}), []) + +class NpmShrinkWrap(FetchMethod): + """Class to fetch all package from a shrinkwrap file""" + + def supports(self, ud, d): + """Check if a given url can be fetched with npmsw""" + return ud.type in ["npmsw"] + + def urldata_init(self, ud, d): + """Init npmsw specific variables within url data""" + + # Get the 'shrinkwrap' parameter + ud.shrinkwrap_file = re.sub(r"^npmsw://", "", ud.url.split(";")[0]) + + # Get the 'dev' parameter + ud.dev = bb.utils.to_boolean(ud.parm.get("dev"), False) + + # Resolve the dependencies + ud.deps = [] + + def _resolve_dependency(name, params, destsuffix): + url = None + localpath = None + extrapaths = [] + unpack = True + + integrity = params.get("integrity", None) + resolved = params.get("resolved", None) + version = params.get("version", None) + + # Handle registry sources + if is_semver(version) and integrity: + # Handle duplicate dependencies without url + if not resolved: + return + + localfile = npm_localfile(name, version) + + uri = URI(resolved) + uri.params["downloadfilename"] = localfile + + checksum_name, checksum_expected = npm_integrity(integrity) + uri.params[checksum_name] = checksum_expected + + url = str(uri) + + localpath = os.path.join(d.getVar("DL_DIR"), localfile) + + # Create a resolve file to mimic the npm fetcher and allow + # re-usability of the downloaded file. + resolvefile = localpath + ".resolved" + + bb.utils.mkdirhier(os.path.dirname(resolvefile)) + with open(resolvefile, "w") as f: + f.write(url) + + extrapaths.append(resolvefile) + + # Handle http tarball sources + elif version.startswith("http") and integrity: + localfile = npm_localfile(os.path.basename(version)) + + uri = URI(version) + uri.params["downloadfilename"] = localfile + + checksum_name, checksum_expected = npm_integrity(integrity) + uri.params[checksum_name] = checksum_expected + + url = str(uri) + + localpath = os.path.join(d.getVar("DL_DIR"), localfile) + + # Handle local tarball and link sources + elif version.startswith("file"): + localpath = version[5:] + if not version.endswith(".tgz"): + unpack = False + + # Handle git sources + elif version.startswith(("git", "bitbucket","gist")) or ( + not version.endswith((".tgz", ".tar", ".tar.gz")) + and not version.startswith((".", "@", "/")) + and "/" in version + ): + if version.startswith("github:"): + version = "git+https://github.com/" + version[len("github:"):] + elif version.startswith("gist:"): + version = "git+https://gist.github.com/" + version[len("gist:"):] + elif version.startswith("bitbucket:"): + version = "git+https://bitbucket.org/" + version[len("bitbucket:"):] + elif version.startswith("gitlab:"): + version = "git+https://gitlab.com/" + version[len("gitlab:"):] + elif not version.startswith(("git+","git:")): + version = "git+https://github.com/" + version + regex = re.compile(r""" + ^ + git\+ + (?P<protocol>[a-z]+) + :// + (?P<url>[^#]+) + \# + (?P<rev>[0-9a-f]+) + $ + """, re.VERBOSE) + + match = regex.match(version) + + if not match: + raise ParameterError("Invalid git url: %s" % version, ud.url) + + groups = match.groupdict() + + uri = URI("git://" + str(groups["url"])) + uri.params["protocol"] = str(groups["protocol"]) + uri.params["rev"] = str(groups["rev"]) + uri.params["destsuffix"] = destsuffix + + url = str(uri) + + else: + raise ParameterError("Unsupported dependency: %s" % name, ud.url) + + # name is needed by unpack tracer for module mapping + ud.deps.append({ + "name": name, + "url": url, + "localpath": localpath, + "extrapaths": extrapaths, + "destsuffix": destsuffix, + "unpack": unpack, + }) + + try: + with open(ud.shrinkwrap_file, "r") as f: + shrinkwrap = json.load(f) + except Exception as e: + raise ParameterError("Invalid shrinkwrap file: %s" % str(e), ud.url) + + foreach_dependencies(shrinkwrap, _resolve_dependency, ud.dev) + + # Avoid conflicts between the environment data and: + # - the proxy url revision + # - the proxy url checksum + data = bb.data.createCopy(d) + data.delVar("SRCREV") + data.delVarFlags("SRC_URI") + + # This fetcher resolves multiple URIs from a shrinkwrap file and then + # forwards it to a proxy fetcher. The management of the donestamp file, + # the lockfile and the checksums are forwarded to the proxy fetcher. + shrinkwrap_urls = [dep["url"] for dep in ud.deps if dep["url"]] + if shrinkwrap_urls: + ud.proxy = Fetch(shrinkwrap_urls, data) + ud.needdonestamp = False + + @staticmethod + def _foreach_proxy_method(ud, handle): + returns = [] + #Check if there are dependencies before try to fetch them + if len(ud.deps) > 0: + for proxy_url in ud.proxy.urls: + proxy_ud = ud.proxy.ud[proxy_url] + proxy_d = ud.proxy.d + proxy_ud.setup_localpath(proxy_d) + lf = lockfile(proxy_ud.lockfile) + returns.append(handle(proxy_ud.method, proxy_ud, proxy_d)) + unlockfile(lf) + return returns + + def verify_donestamp(self, ud, d): + """Verify the donestamp file""" + def _handle(m, ud, d): + return m.verify_donestamp(ud, d) + return all(self._foreach_proxy_method(ud, _handle)) + + def update_donestamp(self, ud, d): + """Update the donestamp file""" + def _handle(m, ud, d): + m.update_donestamp(ud, d) + self._foreach_proxy_method(ud, _handle) + + def need_update(self, ud, d): + """Force a fetch, even if localpath exists ?""" + def _handle(m, ud, d): + return m.need_update(ud, d) + return all(self._foreach_proxy_method(ud, _handle)) + + def try_mirrors(self, fetch, ud, d, mirrors): + """Try to use a mirror""" + def _handle(m, ud, d): + return m.try_mirrors(fetch, ud, d, mirrors) + return all(self._foreach_proxy_method(ud, _handle)) + + def download(self, ud, d): + """Fetch url""" + ud.proxy.download() + + def unpack(self, ud, rootdir, d): + """Unpack the downloaded dependencies""" + destdir = d.getVar("S") + destsuffix = ud.parm.get("destsuffix") + if destsuffix: + destdir = os.path.join(rootdir, destsuffix) + ud.unpack_tracer.unpack("npm-shrinkwrap", destdir) + + bb.utils.mkdirhier(destdir) + bb.utils.copyfile(ud.shrinkwrap_file, + os.path.join(destdir, "npm-shrinkwrap.json")) + + auto = [dep["url"] for dep in ud.deps if not dep["localpath"]] + manual = [dep for dep in ud.deps if dep["localpath"]] + + if auto: + ud.proxy.unpack(destdir, auto) + + for dep in manual: + depdestdir = os.path.join(destdir, dep["destsuffix"]) + if dep["url"]: + npm_unpack(dep["localpath"], depdestdir, d) + else: + depsrcdir= os.path.join(destdir, dep["localpath"]) + if dep["unpack"]: + npm_unpack(depsrcdir, depdestdir, d) + else: + bb.utils.mkdirhier(depdestdir) + cmd = 'cp -fpPRH "%s/." .' % (depsrcdir) + runfetchcmd(cmd, d, workdir=depdestdir) + + def clean(self, ud, d): + """Clean any existing full or partial download""" + ud.proxy.clean() + + # Clean extra files + for dep in ud.deps: + for path in dep["extrapaths"]: + bb.utils.remove(path) + + def done(self, ud, d): + """Is the download done ?""" + def _handle(m, ud, d): + return m.done(ud, d) + return all(self._foreach_proxy_method(ud, _handle)) diff --git a/lib/bb/fetch2/osc.py b/lib/bb/fetch2/osc.py index 2b4f7d9c1..495ac8a30 100644 --- a/lib/bb/fetch2/osc.py +++ b/lib/bb/fetch2/osc.py @@ -1,20 +1,25 @@ -# ex:ts=4:sw=4:sts=4:et -# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- +# +# Copyright BitBake Contributors +# +# SPDX-License-Identifier: GPL-2.0-only +# """ Bitbake "Fetch" implementation for osc (Opensuse build service client). Based on the svn "Fetch" implementation. """ -import os -import sys import logging +import os +import re import bb from bb.fetch2 import FetchMethod from bb.fetch2 import FetchError from bb.fetch2 import MissingParameterError from bb.fetch2 import runfetchcmd +logger = logging.getLogger(__name__) + class Osc(FetchMethod): """Class to fetch a module or modules from Opensuse build server repositories.""" @@ -32,21 +37,23 @@ class Osc(FetchMethod): ud.module = ud.parm["module"] # Create paths to osc checkouts + oscdir = d.getVar("OSCDIR") or (d.getVar("DL_DIR") + "/osc") relpath = self._strip_leading_slashes(ud.path) - ud.pkgdir = os.path.join(d.getVar('OSCDIR'), ud.host) + ud.oscdir = oscdir + ud.pkgdir = os.path.join(oscdir, ud.host) ud.moddir = os.path.join(ud.pkgdir, relpath, ud.module) if 'rev' in ud.parm: ud.revision = ud.parm['rev'] else: pv = d.getVar("PV", False) - rev = bb.fetch2.srcrev_internal_helper(ud, d) - if rev and rev != True: + rev = bb.fetch2.srcrev_internal_helper(ud, d, '') + if rev: ud.revision = rev else: ud.revision = "" - ud.localfile = d.expand('%s_%s_%s.tar.gz' % (ud.module.replace('/', '.'), ud.path.replace('/', '.'), ud.revision)) + ud.localfile = d.expand('%s_%s_%s.tar.gz' % (ud.module.replace('/', '.'), relpath.replace('/', '.'), ud.revision)) def _buildosccommand(self, ud, d, command): """ @@ -54,40 +61,63 @@ class Osc(FetchMethod): command is "fetch", "update", "info" """ - basecmd = d.expand('${FETCHCMD_osc}') + basecmd = d.getVar("FETCHCMD_osc") or "/usr/bin/env osc" - proto = ud.parm.get('protocol', 'ocs') + proto = ud.parm.get('protocol', 'https') options = [] config = "-c %s" % self.generate_config(ud, d) - if ud.revision: + if getattr(ud, 'revision', ''): options.append("-r %s" % ud.revision) coroot = self._strip_leading_slashes(ud.path) if command == "fetch": - osccmd = "%s %s co %s/%s %s" % (basecmd, config, coroot, ud.module, " ".join(options)) + osccmd = "%s %s -A %s://%s co %s/%s %s" % (basecmd, config, proto, ud.host, coroot, ud.module, " ".join(options)) elif command == "update": - osccmd = "%s %s up %s" % (basecmd, config, " ".join(options)) + osccmd = "%s %s -A %s://%s up %s" % (basecmd, config, proto, ud.host, " ".join(options)) + elif command == "api_source": + osccmd = "%s %s -A %s://%s api source/%s/%s" % (basecmd, config, proto, ud.host, coroot, ud.module) else: raise FetchError("Invalid osc command %s" % command, ud.url) return osccmd + def _latest_revision(self, ud, d, name): + """ + Fetch latest revision for the given package + """ + api_source_cmd = self._buildosccommand(ud, d, "api_source") + + output = runfetchcmd(api_source_cmd, d) + match = re.match(r'<directory ?.* rev="(\d+)".*>', output) + if match is None: + raise FetchError("Unable to parse osc response", ud.url) + return match.groups()[0] + + def _revision_key(self, ud, d, name): + """ + Return a unique key for the url + """ + # Collapse adjacent slashes + slash_re = re.compile(r"/+") + rev = getattr(ud, 'revision', "latest") + return "osc:%s%s.%s.%s" % (ud.host, slash_re.sub(".", ud.path), name, rev) + def download(self, ud, d): """ Fetch url """ - logger.debug(2, "Fetch: checking for module directory '" + ud.moddir + "'") + logger.debug2("Fetch: checking for module directory '" + ud.moddir + "'") - if os.access(os.path.join(d.getVar('OSCDIR'), ud.path, ud.module), os.R_OK): + if os.access(ud.moddir, os.R_OK): oscupdatecmd = self._buildosccommand(ud, d, "update") logger.info("Update "+ ud.url) # update sources there - logger.debug(1, "Running %s", oscupdatecmd) + logger.debug("Running %s", oscupdatecmd) bb.fetch2.check_network_access(d, oscupdatecmd, ud.url) runfetchcmd(oscupdatecmd, d, workdir=ud.moddir) else: @@ -95,7 +125,7 @@ class Osc(FetchMethod): logger.info("Fetch " + ud.url) # check out sources there bb.utils.mkdirhier(ud.pkgdir) - logger.debug(1, "Running %s", oscfetchcmd) + logger.debug("Running %s", oscfetchcmd) bb.fetch2.check_network_access(d, oscfetchcmd, ud.url) runfetchcmd(oscfetchcmd, d, workdir=ud.pkgdir) @@ -111,20 +141,23 @@ class Osc(FetchMethod): Generate a .oscrc to be used for this run. """ - config_path = os.path.join(d.getVar('OSCDIR'), "oscrc") + config_path = os.path.join(ud.oscdir, "oscrc") + if not os.path.exists(ud.oscdir): + bb.utils.mkdirhier(ud.oscdir) + if (os.path.exists(config_path)): os.remove(config_path) f = open(config_path, 'w') + proto = ud.parm.get('protocol', 'https') f.write("[general]\n") - f.write("apisrv = %s\n" % ud.host) - f.write("scheme = http\n") + f.write("apiurl = %s://%s\n" % (proto, ud.host)) f.write("su-wrapper = su -c\n") f.write("build-root = %s\n" % d.getVar('WORKDIR')) f.write("urllist = %s\n" % d.getVar("OSCURLLIST")) f.write("extra-pkgs = gzip\n") f.write("\n") - f.write("[%s]\n" % ud.host) + f.write("[%s://%s]\n" % (proto, ud.host)) f.write("user = %s\n" % ud.parm["user"]) f.write("pass = %s\n" % ud.parm["pswd"]) f.close() diff --git a/lib/bb/fetch2/perforce.py b/lib/bb/fetch2/perforce.py index 3debad59f..3b6fa4b1e 100644 --- a/lib/bb/fetch2/perforce.py +++ b/lib/bb/fetch2/perforce.py @@ -1,36 +1,66 @@ -# ex:ts=4:sw=4:sts=4:et -# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- """ BitBake 'Fetch' implementation for perforce +Supported SRC_URI options are: + +- module + The top-level location to fetch while preserving the remote paths + + The value of module can point to either a directory or a file. The result, + in both cases, is that the fetcher will preserve all file paths starting + from the module path. That is, the top-level directory in the module value + will also be the top-level directory in P4DIR. + +- remotepath + If the value "keep" is given, the full depot location of each file is + preserved in P4DIR. This option overrides the effect of the module option. + """ # Copyright (C) 2003, 2004 Chris Larson # Copyright (C) 2016 Kodak Alaris, Inc. # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# SPDX-License-Identifier: GPL-2.0-only # # Based on functions from the base bb module, Copyright 2003 Holger Schurig import os -import logging import bb from bb.fetch2 import FetchMethod from bb.fetch2 import FetchError from bb.fetch2 import logger from bb.fetch2 import runfetchcmd +class PerforceProgressHandler (bb.progress.BasicProgressHandler): + """ + Implements basic progress information for perforce, based on the number of + files to be downloaded. + + The p4 print command will print one line per file, therefore it can be used + to "count" the number of files already completed and give an indication of + the progress. + """ + def __init__(self, d, num_files): + self._num_files = num_files + self._count = 0 + super(PerforceProgressHandler, self).__init__(d) + + # Send an initial progress event so the bar gets shown + self._fire_progress(-1) + + def write(self, string): + self._count = self._count + 1 + + percent = int(100.0 * float(self._count) / float(self._num_files)) + + # In case something goes wrong, we try to preserve our sanity + if percent > 100: + percent = 100 + + self.update(percent) + + super(PerforceProgressHandler, self).write(string) + class Perforce(FetchMethod): """ Class to fetch from perforce repositories """ def supports(self, ud, d): @@ -43,13 +73,9 @@ class Perforce(FetchMethod): provided by the env, use it. If P4PORT is specified by the recipe, use its values, which may override the settings in P4CONFIG. """ - ud.basecmd = d.getVar('FETCHCMD_p4') - if not ud.basecmd: - ud.basecmd = "/usr/bin/env p4" + ud.basecmd = d.getVar("FETCHCMD_p4") or "/usr/bin/env p4" - ud.dldir = d.getVar('P4DIR') - if not ud.dldir: - ud.dldir = '%s/%s' % (d.getVar('DL_DIR'), 'p4') + ud.dldir = d.getVar("P4DIR") or (d.getVar("DL_DIR") + "/p4") path = ud.url.split('://')[1] path = path.split(';')[0] @@ -64,31 +90,51 @@ class Perforce(FetchMethod): p4port = d.getVar('P4PORT') if p4port: - logger.debug(1, 'Using recipe provided P4PORT: %s' % p4port) + logger.debug('Using recipe provided P4PORT: %s' % p4port) ud.host = p4port else: - logger.debug(1, 'Trying to use P4CONFIG to automatically set P4PORT...') + logger.debug('Trying to use P4CONFIG to automatically set P4PORT...') ud.usingp4config = True p4cmd = '%s info | grep "Server address"' % ud.basecmd bb.fetch2.check_network_access(d, p4cmd, ud.url) ud.host = runfetchcmd(p4cmd, d, True) ud.host = ud.host.split(': ')[1].strip() - logger.debug(1, 'Determined P4PORT to be: %s' % ud.host) + logger.debug('Determined P4PORT to be: %s' % ud.host) if not ud.host: raise FetchError('Could not determine P4PORT from P4CONFIG') - + + # Fetcher options + ud.module = ud.parm.get('module') + ud.keepremotepath = (ud.parm.get('remotepath', '') == 'keep') + if ud.path.find('/...') >= 0: ud.pathisdir = True else: ud.pathisdir = False + # Avoid using the "/..." syntax in SRC_URI when a module value is given + if ud.pathisdir and ud.module: + raise FetchError('SRC_URI depot path cannot not end in /... when a module value is given') + cleanedpath = ud.path.replace('/...', '').replace('/', '.') cleanedhost = ud.host.replace(':', '.') + + cleanedmodule = "" + # Merge the path and module into the final depot location + if ud.module: + if ud.module.find('/') == 0: + raise FetchError('module cannot begin with /') + ud.path = os.path.join(ud.path, ud.module) + + # Append the module path to the local pkg name + cleanedmodule = ud.module.replace('/...', '').replace('/', '.') + cleanedpath += '--%s' % cleanedmodule + ud.pkgdir = os.path.join(ud.dldir, cleanedhost, cleanedpath) ud.setup_revisions(d) - ud.localfile = d.expand('%s_%s_%s.tar.gz' % (cleanedhost, cleanedpath, ud.revision)) + ud.localfile = d.expand('%s_%s_%s_%s.tar.gz' % (cleanedhost, cleanedpath, cleanedmodule, ud.revision)) def _buildp4command(self, ud, d, command, depot_filename=None): """ @@ -113,16 +159,26 @@ class Perforce(FetchMethod): pathnrev = '%s' % (ud.path) if depot_filename: - if ud.pathisdir: # Remove leading path to obtain filename + if ud.keepremotepath: + # preserve everything, remove the leading // + filename = depot_filename.lstrip('/') + elif ud.module: + # remove everything up to the module path + modulepath = ud.module.rstrip('/...') + filename = depot_filename[depot_filename.rfind(modulepath):] + elif ud.pathisdir: + # Remove leading (visible) path to obtain the filepath filename = depot_filename[len(ud.path)-1:] else: + # Remove everything, except the filename filename = depot_filename[depot_filename.rfind('/'):] + filename = filename[:filename.find('#')] # Remove trailing '#rev' if command == 'changes': p4cmd = '%s%s changes -m 1 //%s' % (ud.basecmd, p4opt, pathnrev) elif command == 'print': - if depot_filename != None: + if depot_filename is not None: p4cmd = '%s%s print -o "p4/%s" "%s"' % (ud.basecmd, p4opt, filename, depot_filename) else: raise FetchError('No depot file name provided to p4 %s' % command, ud.url) @@ -152,7 +208,7 @@ class Perforce(FetchMethod): for filename in p4fileslist: item = filename.split(' - ') lastaction = item[1].split() - logger.debug(1, 'File: %s Last Action: %s' % (item[0], lastaction[0])) + logger.debug('File: %s Last Action: %s' % (item[0], lastaction[0])) if lastaction[0] == 'delete': continue filelist.append(item[0]) @@ -168,10 +224,12 @@ class Perforce(FetchMethod): bb.utils.remove(ud.pkgdir, True) bb.utils.mkdirhier(ud.pkgdir) + progresshandler = PerforceProgressHandler(d, len(filelist)) + for afile in filelist: p4fetchcmd = self._buildp4command(ud, d, 'print', afile) bb.fetch2.check_network_access(d, p4fetchcmd, ud.url) - runfetchcmd(p4fetchcmd, d, workdir=ud.pkgdir) + runfetchcmd(p4fetchcmd, d, workdir=ud.pkgdir, log=progresshandler) runfetchcmd('tar -czf %s p4' % (ud.localpath), d, cleanup=[ud.localpath], workdir=ud.pkgdir) @@ -197,7 +255,7 @@ class Perforce(FetchMethod): raise FetchError('Could not determine the latest perforce changelist') tipcset = tip.split(' ')[1] - logger.debug(1, 'p4 tip found to be changelist %s' % tipcset) + logger.debug('p4 tip found to be changelist %s' % tipcset) return tipcset def sortable_revision(self, ud, d, name): diff --git a/lib/bb/fetch2/repo.py b/lib/bb/fetch2/repo.py index c22d9b557..fa4cb8149 100644 --- a/lib/bb/fetch2/repo.py +++ b/lib/bb/fetch2/repo.py @@ -1,5 +1,3 @@ -# ex:ts=4:sw=4:sts=4:et -# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- """ BitBake "Fetch" repo (git) implementation @@ -8,20 +6,10 @@ BitBake "Fetch" repo (git) implementation # Copyright (C) 2009 Tom Rini <trini@embeddedalley.com> # # Based on git.py which is: -#Copyright (C) 2005 Richard Purdie +# Copyright (C) 2005 Richard Purdie # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# SPDX-License-Identifier: GPL-2.0-only # -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. import os import bb @@ -45,6 +33,8 @@ class Repo(FetchMethod): "master". """ + ud.basecmd = d.getVar("FETCHCMD_repo") or "/usr/bin/env repo" + ud.proto = ud.parm.get('protocol', 'git') ud.branch = ud.parm.get('branch', 'master') ud.manifest = ud.parm.get('manifest', 'default.xml') @@ -57,11 +47,11 @@ class Repo(FetchMethod): """Fetch url""" if os.access(os.path.join(d.getVar("DL_DIR"), ud.localfile), os.R_OK): - logger.debug(1, "%s already exists (or was stashed). Skipping repo init / sync.", ud.localpath) + logger.debug("%s already exists (or was stashed). Skipping repo init / sync.", ud.localpath) return + repodir = d.getVar("REPODIR") or (d.getVar("DL_DIR") + "/repo") gitsrcname = "%s%s" % (ud.host, ud.path.replace("/", ".")) - repodir = d.getVar("REPODIR") or os.path.join(d.getVar("DL_DIR"), "repo") codir = os.path.join(repodir, gitsrcname, ud.manifest) if ud.user: @@ -72,11 +62,11 @@ class Repo(FetchMethod): repodir = os.path.join(codir, "repo") bb.utils.mkdirhier(repodir) if not os.path.exists(os.path.join(repodir, ".repo")): - bb.fetch2.check_network_access(d, "repo init -m %s -b %s -u %s://%s%s%s" % (ud.manifest, ud.branch, ud.proto, username, ud.host, ud.path), ud.url) - runfetchcmd("repo init -m %s -b %s -u %s://%s%s%s" % (ud.manifest, ud.branch, ud.proto, username, ud.host, ud.path), d, workdir=repodir) + bb.fetch2.check_network_access(d, "%s init -m %s -b %s -u %s://%s%s%s" % (ud.basecmd, ud.manifest, ud.branch, ud.proto, username, ud.host, ud.path), ud.url) + runfetchcmd("%s init -m %s -b %s -u %s://%s%s%s" % (ud.basecmd, ud.manifest, ud.branch, ud.proto, username, ud.host, ud.path), d, workdir=repodir) - bb.fetch2.check_network_access(d, "repo sync %s" % ud.url, ud.url) - runfetchcmd("repo sync", d, workdir=repodir) + bb.fetch2.check_network_access(d, "%s sync %s" % (ud.basecmd, ud.url), ud.url) + runfetchcmd("%s sync" % ud.basecmd, d, workdir=repodir) scmdata = ud.parm.get("scmdata", "") if scmdata == "keep": diff --git a/lib/bb/fetch2/s3.py b/lib/bb/fetch2/s3.py index 162928862..6b8ffd535 100644 --- a/lib/bb/fetch2/s3.py +++ b/lib/bb/fetch2/s3.py @@ -1,5 +1,3 @@ -# ex:ts=4:sw=4:sts=4:et -# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- """ BitBake 'Fetch' implementation for Amazon AWS S3. @@ -13,28 +11,54 @@ The aws tool must be correctly installed and configured prior to use. # Based in part on bb.fetch2.wget: # Copyright (C) 2003, 2004 Chris Larson # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# SPDX-License-Identifier: GPL-2.0-only # # Based on functions from the base bb module, Copyright 2003 Holger Schurig import os import bb import urllib.request, urllib.parse, urllib.error +import re from bb.fetch2 import FetchMethod from bb.fetch2 import FetchError from bb.fetch2 import runfetchcmd +def convertToBytes(value, unit): + value = float(value) + if (unit == "KiB"): + value = value*1024.0; + elif (unit == "MiB"): + value = value*1024.0*1024.0; + elif (unit == "GiB"): + value = value*1024.0*1024.0*1024.0; + return value + +class S3ProgressHandler(bb.progress.LineFilterProgressHandler): + """ + Extract progress information from s3 cp output, e.g.: + Completed 5.1 KiB/8.8 GiB (12.0 MiB/s) with 1 file(s) remaining + """ + def __init__(self, d): + super(S3ProgressHandler, self).__init__(d) + # Send an initial progress event so the bar gets shown + self._fire_progress(0) + + def writeline(self, line): + percs = re.findall(r'^Completed (\d+.{0,1}\d*) (\w+)\/(\d+.{0,1}\d*) (\w+) (\(.+\)) with\s+', line) + if percs: + completed = (percs[-1][0]) + completedUnit = (percs[-1][1]) + total = (percs[-1][2]) + totalUnit = (percs[-1][3]) + completed = convertToBytes(completed, completedUnit) + total = convertToBytes(total, totalUnit) + progress = (completed/total)*100.0 + rate = percs[-1][4] + self.update(progress, rate) + return False + return True + + class S3(FetchMethod): """Class to fetch urls via 'aws s3'""" @@ -65,7 +89,9 @@ class S3(FetchMethod): cmd = '%s cp s3://%s%s %s' % (ud.basecmd, ud.host, ud.path, ud.localpath) bb.fetch2.check_network_access(d, cmd, ud.url) - runfetchcmd(cmd, d) + + progresshandler = S3ProgressHandler(d) + runfetchcmd(cmd, d, False, log=progresshandler) # Additional sanity checks copied from the wget class (although there # are no known issues which mean these are required, treat the aws cli diff --git a/lib/bb/fetch2/sftp.py b/lib/bb/fetch2/sftp.py index 81884a6aa..7884cce94 100644 --- a/lib/bb/fetch2/sftp.py +++ b/lib/bb/fetch2/sftp.py @@ -1,5 +1,3 @@ -# ex:ts=4:sw=4:sts=4:et -# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- """ BitBake SFTP Fetch implementation @@ -44,18 +42,7 @@ SRC_URI = "sftp://user@host.example.com/dir/path.file.txt" # Based in part on bb.fetch2.wget: # Copyright (C) 2003, 2004 Chris Larson # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# SPDX-License-Identifier: GPL-2.0-only # # Based on functions from the base bb module, Copyright 2003 Holger Schurig @@ -116,7 +103,7 @@ class SFTP(FetchMethod): if path[:3] == '/~/': path = path[3:] - remote = '%s%s:%s' % (user, urlo.hostname, path) + remote = '"%s%s:%s"' % (user, urlo.hostname, path) cmd = '%s %s %s %s' % (basecmd, port, remote, lpath) diff --git a/lib/bb/fetch2/ssh.py b/lib/bb/fetch2/ssh.py index 6047ee417..0cbb2a6f2 100644 --- a/lib/bb/fetch2/ssh.py +++ b/lib/bb/fetch2/ssh.py @@ -1,5 +1,3 @@ -# ex:ts=4:sw=4:sts=4:et -# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- ''' BitBake 'Fetch' implementations @@ -29,24 +27,12 @@ IETF secsh internet draft: # Copyright 2003 Holger Schurig # # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# SPDX-License-Identifier: GPL-2.0-only # -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. import re, os -from bb.fetch2 import FetchMethod -from bb.fetch2 import FetchError -from bb.fetch2 import logger -from bb.fetch2 import runfetchcmd +from bb.fetch2 import check_network_access, FetchMethod, ParameterError, runfetchcmd +import urllib __pattern__ = re.compile(r''' @@ -55,9 +41,9 @@ __pattern__ = re.compile(r''' ( # Optional username/password block (?P<user>\S+) # username (:(?P<pass>\S+))? # colon followed by the password (optional) - )? (?P<cparam>(;[^;]+)*)? # connection parameters block (optional) @ + )? (?P<host>\S+?) # non-greedy match of the host (:(?P<port>[0-9]+))? # colon followed by the port (optional) / @@ -72,19 +58,20 @@ class SSH(FetchMethod): '''Class to fetch a module or modules via Secure Shell''' def supports(self, urldata, d): - return __pattern__.match(urldata.url) != None + return __pattern__.match(urldata.url) is not None def supports_checksum(self, urldata): return False def urldata_init(self, urldata, d): if 'protocol' in urldata.parm and urldata.parm['protocol'] == 'git': - raise bb.fetch2.ParameterError( + raise ParameterError( "Invalid protocol - if you wish to fetch from a git " + "repository using ssh, you need to use " + "git:// prefix with protocol=ssh", urldata.url) m = __pattern__.match(urldata.url) path = m.group('path') + path = urllib.parse.unquote(path) host = m.group('host') urldata.localpath = os.path.join(d.getVar('DL_DIR'), os.path.basename(os.path.normpath(path))) @@ -111,6 +98,11 @@ class SSH(FetchMethod): fr += '@%s' % host else: fr = host + + if path[0] != '~': + path = '/%s' % path + path = urllib.parse.unquote(path) + fr += ':%s' % path cmd = 'scp -B -r %s %s %s/' % ( @@ -119,7 +111,45 @@ class SSH(FetchMethod): dldir ) - bb.fetch2.check_network_access(d, cmd, urldata.url) + check_network_access(d, cmd, urldata.url) + + runfetchcmd(cmd, d) + + def checkstatus(self, fetch, urldata, d): + """ + Check the status of the url + """ + m = __pattern__.match(urldata.url) + path = m.group('path') + host = m.group('host') + port = m.group('port') + user = m.group('user') + password = m.group('pass') + + if port: + portarg = '-P %s' % port + else: + portarg = '' + + if user: + fr = user + if password: + fr += ':%s' % password + fr += '@%s' % host + else: + fr = host + + if path[0] != '~': + path = '/%s' % path + path = urllib.parse.unquote(path) + + cmd = 'ssh -o BatchMode=true %s %s [ -f %s ]' % ( + portarg, + fr, + path + ) + check_network_access(d, cmd, urldata.url) runfetchcmd(cmd, d) + return True diff --git a/lib/bb/fetch2/svn.py b/lib/bb/fetch2/svn.py index 3f172eec9..0852108e7 100644 --- a/lib/bb/fetch2/svn.py +++ b/lib/bb/fetch2/svn.py @@ -1,5 +1,3 @@ -# ex:ts=4:sw=4:sts=4:et -# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- """ BitBake 'Fetch' implementation for svn. @@ -8,24 +6,11 @@ BitBake 'Fetch' implementation for svn. # Copyright (C) 2003, 2004 Chris Larson # Copyright (C) 2004 Marcin Juszkiewicz # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# SPDX-License-Identifier: GPL-2.0-only # # Based on functions from the base bb module, Copyright 2003 Holger Schurig import os -import sys -import logging import bb import re from bb.fetch2 import FetchMethod @@ -49,7 +34,7 @@ class Svn(FetchMethod): if not "module" in ud.parm: raise MissingParameterError('module', ud.url) - ud.basecmd = d.getVar('FETCHCMD_svn') + ud.basecmd = d.getVar("FETCHCMD_svn") or "/usr/bin/env svn --non-interactive --trust-server-cert" ud.module = ud.parm["module"] @@ -59,16 +44,25 @@ class Svn(FetchMethod): ud.path_spec = ud.parm["path_spec"] # Create paths to svn checkouts + svndir = d.getVar("SVNDIR") or (d.getVar("DL_DIR") + "/svn") relpath = self._strip_leading_slashes(ud.path) - ud.pkgdir = os.path.join(d.expand('${SVNDIR}'), ud.host, relpath) - ud.moddir = os.path.join(ud.pkgdir, ud.module) + ud.pkgdir = os.path.join(svndir, ud.host, relpath) + ud.moddir = os.path.join(ud.pkgdir, ud.path_spec) + # Protects the repository from concurrent updates, e.g. from two + # recipes fetching different revisions at the same time + ud.svnlock = os.path.join(ud.pkgdir, "svn.lock") ud.setup_revisions(d) if 'rev' in ud.parm: ud.revision = ud.parm['rev'] - ud.localfile = d.expand('%s_%s_%s_%s_.tar.gz' % (ud.module.replace('/', '.'), ud.host, ud.path.replace('/', '.'), ud.revision)) + # Whether to use the @REV peg-revision syntax in the svn command or not + ud.pegrevision = True + if 'nopegrevision' in ud.parm: + ud.pegrevision = False + + ud.localfile = d.expand('%s_%s_%s_%s_%s.tar.gz' % (ud.module.replace('/', '.'), ud.host, ud.path.replace('/', '.'), ud.revision, ["0", "1"][ud.pegrevision])) def _buildsvncommand(self, ud, d, command): """ @@ -97,12 +91,20 @@ class Svn(FetchMethod): if command == "info": svncmd = "%s info %s %s://%s/%s/" % (ud.basecmd, " ".join(options), proto, svnroot, ud.module) elif command == "log1": - svncmd = "%s log --limit 1 %s %s://%s/%s/" % (ud.basecmd, " ".join(options), proto, svnroot, ud.module) + svncmd = "%s log --limit 1 --quiet %s %s://%s/%s/" % (ud.basecmd, " ".join(options), proto, svnroot, ud.module) else: suffix = "" + + # externals may be either 'allowed' or 'nowarn', but not both. Allowed + # will not issue a warning, but will log to the debug buffer what has likely + # been downloaded by SVN. + if not ("externals" in ud.parm and ud.parm["externals"] == "allowed"): + options.append("--ignore-externals") + if ud.revision: options.append("-r %s" % ud.revision) - suffix = "@%s" % (ud.revision) + if ud.pegrevision: + suffix = "@%s" % (ud.revision) if command == "fetch": transportuser = ud.parm.get("transportuser", "") @@ -120,37 +122,54 @@ class Svn(FetchMethod): def download(self, ud, d): """Fetch url""" - logger.debug(2, "Fetch: checking for module directory '" + ud.moddir + "'") - - if os.access(os.path.join(ud.moddir, '.svn'), os.R_OK): - svnupdatecmd = self._buildsvncommand(ud, d, "update") - logger.info("Update " + ud.url) - # We need to attempt to run svn upgrade first in case its an older working format - try: - runfetchcmd(ud.basecmd + " upgrade", d, workdir=ud.moddir) - except FetchError: - pass - logger.debug(1, "Running %s", svnupdatecmd) - bb.fetch2.check_network_access(d, svnupdatecmd, ud.url) - runfetchcmd(svnupdatecmd, d, workdir=ud.moddir) - else: - svnfetchcmd = self._buildsvncommand(ud, d, "fetch") - logger.info("Fetch " + ud.url) - # check out sources there - bb.utils.mkdirhier(ud.pkgdir) - logger.debug(1, "Running %s", svnfetchcmd) - bb.fetch2.check_network_access(d, svnfetchcmd, ud.url) - runfetchcmd(svnfetchcmd, d, workdir=ud.pkgdir) - - scmdata = ud.parm.get("scmdata", "") - if scmdata == "keep": - tar_flags = "" - else: - tar_flags = "--exclude='.svn'" + logger.debug2("Fetch: checking for module directory '" + ud.moddir + "'") + + lf = bb.utils.lockfile(ud.svnlock) + + try: + if os.access(os.path.join(ud.moddir, '.svn'), os.R_OK): + svncmd = self._buildsvncommand(ud, d, "update") + logger.info("Update " + ud.url) + # We need to attempt to run svn upgrade first in case its an older working format + try: + runfetchcmd(ud.basecmd + " upgrade", d, workdir=ud.moddir) + except FetchError: + pass + logger.debug("Running %s", svncmd) + bb.fetch2.check_network_access(d, svncmd, ud.url) + runfetchcmd(svncmd, d, workdir=ud.moddir) + else: + svncmd = self._buildsvncommand(ud, d, "fetch") + logger.info("Fetch " + ud.url) + # check out sources there + bb.utils.mkdirhier(ud.pkgdir) + logger.debug("Running %s", svncmd) + bb.fetch2.check_network_access(d, svncmd, ud.url) + runfetchcmd(svncmd, d, workdir=ud.pkgdir) + + if not ("externals" in ud.parm and ud.parm["externals"] == "nowarn"): + # Warn the user if this had externals (won't catch them all) + output = runfetchcmd("svn propget svn:externals || true", d, workdir=ud.moddir) + if output: + if "--ignore-externals" in svncmd.split(): + bb.warn("%s contains svn:externals." % ud.url) + bb.warn("These should be added to the recipe SRC_URI as necessary.") + bb.warn("svn fetch has ignored externals:\n%s" % output) + bb.warn("To disable this warning add ';externals=nowarn' to the url.") + else: + bb.debug(1, "svn repository has externals:\n%s" % output) + + scmdata = ud.parm.get("scmdata", "") + if scmdata == "keep": + tar_flags = "" + else: + tar_flags = "--exclude='.svn'" - # tar them up to a defined filename - runfetchcmd("tar %s -czf %s %s" % (tar_flags, ud.localpath, ud.path_spec), d, - cleanup=[ud.localpath], workdir=ud.pkgdir) + # tar them up to a defined filename + runfetchcmd("tar %s -czf %s %s" % (tar_flags, ud.localpath, ud.path_spec), d, + cleanup=[ud.localpath], workdir=ud.pkgdir) + finally: + bb.utils.unlockfile(lf) def clean(self, ud, d): """ Clean SVN specific files and dirs """ @@ -191,3 +210,6 @@ class Svn(FetchMethod): def _build_revision(self, ud, d): return ud.revision + + def supports_checksum(self, urldata): + return False diff --git a/lib/bb/fetch2/wget.py b/lib/bb/fetch2/wget.py index 8f505b6de..fbfa6938a 100644 --- a/lib/bb/fetch2/wget.py +++ b/lib/bb/fetch2/wget.py @@ -1,5 +1,3 @@ -# ex:ts=4:sw=4:sts=4:et -# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- """ BitBake 'Fetch' implementations @@ -10,35 +8,24 @@ BitBake build tools. # Copyright (C) 2003, 2004 Chris Larson # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# SPDX-License-Identifier: GPL-2.0-only # # Based on functions from the base bb module, Copyright 2003 Holger Schurig +import shlex import re import tempfile -import subprocess import os -import logging import errno import bb import bb.progress +import socket +import http.client import urllib.request, urllib.parse, urllib.error from bb.fetch2 import FetchMethod from bb.fetch2 import FetchError from bb.fetch2 import logger from bb.fetch2 import runfetchcmd -from bb.utils import export_proxies from bs4 import BeautifulSoup from bs4 import SoupStrainer @@ -65,11 +52,23 @@ class WgetProgressHandler(bb.progress.LineFilterProgressHandler): class Wget(FetchMethod): """Class to fetch urls via 'wget'""" + + # CDNs like CloudFlare may do a 'browser integrity test' which can fail + # with the standard wget/urllib User-Agent, so pretend to be a modern + # browser. + user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0" + + def check_certs(self, d): + """ + Should certificates be checked? + """ + return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0" + def supports(self, ud, d): """ Check to see if a given url can be fetched with wget. """ - return ud.type in ['http', 'https', 'ftp'] + return ud.type in ['http', 'https', 'ftp', 'ftps'] def recommends_checksum(self, urldata): return True @@ -88,13 +87,19 @@ class Wget(FetchMethod): if not ud.localfile: ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) - self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate" + self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30" + + if ud.type == 'ftp' or ud.type == 'ftps': + self.basecmd += " --passive-ftp" + + if not self.check_certs(d): + self.basecmd += " --no-check-certificate" def _runwget(self, ud, d, command, quiet, workdir=None): progresshandler = WgetProgressHandler(d) - logger.debug(2, "Fetching %s using command '%s'" % (ud.url, command)) + logger.debug2("Fetching %s using command '%s'" % (ud.url, command)) bb.fetch2.check_network_access(d, command, ud.url) runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir) @@ -103,13 +108,22 @@ class Wget(FetchMethod): fetchcmd = self.basecmd - if 'downloadfilename' in ud.parm: - dldir = d.getVar("DL_DIR") - bb.utils.mkdirhier(os.path.dirname(dldir + os.sep + ud.localfile)) - fetchcmd += " -O " + dldir + os.sep + ud.localfile + localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp" + bb.utils.mkdirhier(os.path.dirname(localpath)) + fetchcmd += " -O %s" % shlex.quote(localpath) if ud.user and ud.pswd: - fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd) + fetchcmd += " --auth-no-challenge" + if ud.parm.get("redirectauth", "1") == "1": + # An undocumented feature of wget is that if the + # username/password are specified on the URI, wget will only + # send the Authorization header to the first host and not to + # any hosts that it is redirected to. With the increasing + # usage of temporary AWS URLs, this difference now matters as + # AWS will reject any request that has authentication both in + # the query parameters (from the redirect) and in the + # Authorization header. + fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd) uri = ud.url.split(";")[0] if os.path.exists(ud.localpath): @@ -120,6 +134,15 @@ class Wget(FetchMethod): self._runwget(ud, d, fetchcmd, False) + # Try and verify any checksum now, meaning if it isn't correct, we don't remove the + # original file, which might be a race (imagine two recipes referencing the same + # source, one with an incorrect checksum) + bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False) + + # Remove the ".tmp" and move the file into position atomically + # Our lock prevents multiple writers but mirroring code may grab incomplete files + os.rename(localpath, localpath[:-4]) + # Sanity check since wget can pretend it succeed when it didn't # Also, this used to happen if sourceforge sent us to the mirror page if not os.path.exists(ud.localpath): @@ -132,10 +155,6 @@ class Wget(FetchMethod): return True def checkstatus(self, fetch, ud, d, try_again=True): - import urllib.request, urllib.error, urllib.parse, socket, http.client - from urllib.response import addinfourl - from bb.fetch2 import FetchConnectionCache - class HTTPConnectionCache(http.client.HTTPConnection): if fetch.connection_cache: def connect(self): @@ -168,7 +187,7 @@ class Wget(FetchMethod): """ host = req.host if not host: - raise urlllib2.URLError('no host given') + raise urllib.error.URLError('no host given') h = http_class(host, timeout=req.timeout) # will parse host:port h.set_debuglevel(self._debuglevel) @@ -185,7 +204,7 @@ class Wget(FetchMethod): # request. # Don't close connection when connection_cache is enabled, - if fetch.connection_cache is None: + if fetch.connection_cache is None: headers["Connection"] = "close" else: headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0 @@ -219,15 +238,12 @@ class Wget(FetchMethod): # We let the request fail and expect it to be # tried once more ("try_again" in check_status()), # with the dead connection removed from the cache. - # If it still fails, we give up, which can happend for bad + # If it still fails, we give up, which can happen for bad # HTTP proxy settings. fetch.connection_cache.remove_connection(h.host, h.port) raise urllib.error.URLError(err) else: - try: - r = h.getresponse(buffering=True) - except TypeError: # buffering kw not supported - r = h.getresponse() + r = h.getresponse() # Pick apart the HTTPResponse object to get the addinfourl # object initialized properly. @@ -252,7 +268,7 @@ class Wget(FetchMethod): pass closed = False - resp = addinfourl(fp_dummy(), r.msg, req.get_full_url()) + resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url()) resp.code = r.status resp.msg = r.reason @@ -271,17 +287,18 @@ class Wget(FetchMethod): fp.read() fp.close() - newheaders = dict((k,v) for k,v in list(req.headers.items()) - if k.lower() not in ("content-length", "content-type")) - return self.parent.open(urllib.request.Request(req.get_full_url(), - headers=newheaders, - origin_req_host=req.origin_req_host, - unverifiable=True)) + if req.get_method() != 'GET': + newheaders = dict((k, v) for k, v in list(req.headers.items()) + if k.lower() not in ("content-length", "content-type")) + return self.parent.open(urllib.request.Request(req.get_full_url(), + headers=newheaders, + origin_req_host=req.origin_req_host, + unverifiable=True)) - """ - Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403 - Forbidden when they actually mean 405 Method Not Allowed. - """ + raise urllib.request.HTTPError(req, code, msg, headers, None) + + # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403 + # Forbidden when they actually mean 405 Method Not Allowed. http_error_403 = http_error_405 @@ -292,57 +309,78 @@ class Wget(FetchMethod): """ def redirect_request(self, req, fp, code, msg, headers, newurl): newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) - newreq.get_method = lambda: req.get_method() + newreq.get_method = req.get_method return newreq - exported_proxies = export_proxies(d) - - handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback] - if export_proxies: - handlers.append(urllib.request.ProxyHandler()) - handlers.append(CacheHTTPHandler()) - # XXX: Since Python 2.7.9 ssl cert validation is enabled by default - # see PEP-0476, this causes verification errors on some https servers - # so disable by default. - import ssl - if hasattr(ssl, '_create_unverified_context'): - handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context())) - opener = urllib.request.build_opener(*handlers) - - try: - uri = ud.url.split(";")[0] - r = urllib.request.Request(uri) - r.get_method = lambda: "HEAD" - # Some servers (FusionForge, as used on Alioth) require that the - # optional Accept header is set. - r.add_header("Accept", "*/*") - def add_basic_auth(login_str, request): - '''Adds Basic auth to http request, pass in login:password as string''' - import base64 - encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8") - authheader = "Basic %s" % encodeuser - r.add_header("Authorization", authheader) - - if ud.user: - add_basic_auth(ud.user, r) - try: - import netrc, urllib.parse - n = netrc.netrc() - login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname) - add_basic_auth("%s:%s" % (login, password), r) - except (TypeError, ImportError, IOError, netrc.NetrcParseError): - pass - - with opener.open(r) as response: - pass - except urllib.error.URLError as e: - if try_again: - logger.debug(2, "checkstatus: trying again") - return self.checkstatus(fetch, ud, d, False) + # We need to update the environment here as both the proxy and HTTPS + # handlers need variables set. The proxy needs http_proxy and friends to + # be set, and HTTPSHandler ends up calling into openssl to load the + # certificates. In buildtools configurations this will be looking at the + # wrong place for certificates by default: we set SSL_CERT_FILE to the + # right location in the buildtools environment script but as BitBake + # prunes prunes the environment this is lost. When binaries are executed + # runfetchcmd ensures these values are in the environment, but this is + # pure Python so we need to update the environment. + # + # Avoid tramping the environment too much by using bb.utils.environment + # to scope the changes to the build_opener request, which is when the + # environment lookups happen. + newenv = bb.fetch2.get_fetcher_environment(d) + + with bb.utils.environment(**newenv): + import ssl + + if self.check_certs(d): + context = ssl.create_default_context() else: - # debug for now to avoid spamming the logs in e.g. remote sstate searches - logger.debug(2, "checkstatus() urlopen failed: %s" % e) - return False + context = ssl._create_unverified_context() + + handlers = [FixedHTTPRedirectHandler, + HTTPMethodFallback, + urllib.request.ProxyHandler(), + CacheHTTPHandler(), + urllib.request.HTTPSHandler(context=context)] + opener = urllib.request.build_opener(*handlers) + + try: + uri_base = ud.url.split(";")[0] + uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path) + r = urllib.request.Request(uri) + r.get_method = lambda: "HEAD" + # Some servers (FusionForge, as used on Alioth) require that the + # optional Accept header is set. + r.add_header("Accept", "*/*") + r.add_header("User-Agent", self.user_agent) + def add_basic_auth(login_str, request): + '''Adds Basic auth to http request, pass in login:password as string''' + import base64 + encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8") + authheader = "Basic %s" % encodeuser + r.add_header("Authorization", authheader) + + if ud.user and ud.pswd: + add_basic_auth(ud.user + ':' + ud.pswd, r) + + try: + import netrc + auth_data = netrc.netrc().authenticators(urllib.parse.urlparse(uri).hostname) + if auth_data: + login, _, password = auth_data + add_basic_auth("%s:%s" % (login, password), r) + except (FileNotFoundError, netrc.NetrcParseError): + pass + + with opener.open(r, timeout=30) as response: + pass + except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e: + if try_again: + logger.debug2("checkstatus: trying again") + return self.checkstatus(fetch, ud, d, False) + else: + # debug for now to avoid spamming the logs in e.g. remote sstate searches + logger.debug2("checkstatus() urlopen failed for %s: %s" % (uri,e)) + return False + return True def _parse_path(self, regex, s): @@ -396,18 +434,14 @@ class Wget(FetchMethod): (oldpn, oldpv, oldsuffix) = old (newpn, newpv, newsuffix) = new - """ - Check for a new suffix type that we have never heard of before - """ - if (newsuffix): + # Check for a new suffix type that we have never heard of before + if newsuffix: m = self.suffix_regex_comp.search(newsuffix) if not m: bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix)) return False - """ - Not our package so ignore it - """ + # Not our package so ignore it if oldpn != newpn: return False @@ -422,9 +456,8 @@ class Wget(FetchMethod): """ f = tempfile.NamedTemporaryFile() with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: - agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12" fetchcmd = self.basecmd - fetchcmd += " -O " + f.name + " --user-agent='" + agent + "' '" + uri + "'" + fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'" try: self._runwget(ud, d, fetchcmd, True, workdir=workdir) fetchresult = f.read() @@ -473,15 +506,14 @@ class Wget(FetchMethod): return "" - def _check_latest_version_by_dir(self, dirver, package, package_regex, - current_version, ud, d): + def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d): """ - Scan every directory in order to get upstream version. + Scan every directory in order to get upstream version. """ version_dir = ['', '', ''] version = ['', '', ''] - dirver_regex = re.compile("(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))") + dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))") s = dirver_regex.search(dirver) if s: version_dir[1] = s.group('ver') @@ -541,26 +573,26 @@ class Wget(FetchMethod): gst-fluendo-mp3 """ # match most patterns which uses "-" as separator to version digits - pn_prefix1 = "[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]" + pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]" # a loose pattern such as for unzip552.tar.gz - pn_prefix2 = "[a-zA-Z]+" + pn_prefix2 = r"[a-zA-Z]+" # a loose pattern such as for 80325-quicky-0.4.tar.gz - pn_prefix3 = "[0-9]+[-]?[a-zA-Z]+" + pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+" # Save the Package Name (pn) Regex for use later - pn_regex = "(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3) + pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3) # match version - pver_regex = "(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)" + pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)" # match arch parch_regex = "-source|_all_" # src.rpm extension was added only for rpm package. Can be removed if the rpm # packaged will always be considered as having to be manually upgraded - psuffix_regex = "(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)" + psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)" # match name, version and archive type of a package - package_regex_comp = re.compile("(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)" + package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)" % (pn_regex, pver_regex, parch_regex, psuffix_regex)) self.suffix_regex_comp = re.compile(psuffix_regex) @@ -572,7 +604,7 @@ class Wget(FetchMethod): version = self._parse_path(package_regex_comp, package) if version: package_custom_regex_comp = re.compile( - "(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" % + r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" % (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex)) else: package_custom_regex_comp = None @@ -589,7 +621,7 @@ class Wget(FetchMethod): current_version = ['', d.getVar('PV'), ''] """possible to have no version in pkg name, such as spectrum-fw""" - if not re.search("\d+", package): + if not re.search(r"\d+", package): current_version[1] = re.sub('_', '.', current_version[1]) current_version[1] = re.sub('-', '.', current_version[1]) return (current_version[1], '') @@ -607,13 +639,13 @@ class Wget(FetchMethod): # search for version matches on folders inside the path, like: # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz - dirver_regex = re.compile("(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/") - m = dirver_regex.search(path) + dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/") + m = dirver_regex.findall(path) if m: pn = d.getVar('PN') - dirver = m.group('dirver') + dirver = m[-1][0] - dirver_pn_regex = re.compile("%s\d?" % (re.escape(pn))) + dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn))) if not dirver_pn_regex.search(dirver): return (self._check_latest_version_by_dir(dirver, package, package_regex, current_version, ud, d), '') |