From ee26ecf58277560459dd01992bb3f486f92c1531 Mon Sep 17 00:00:00 2001 From: Aníbal Limón Date: Fri, 10 Jun 2016 10:12:10 -0500 Subject: oe/distro_check.py: Fixes for python3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit create_socket: Use urllib because urllib2 is now urllib in python3 and proxies as argument are deprecated so export them in the environ instead. get_links_from_url: Change usage of sgmllib for parsing HTML because is deprecated in python 3, use instead bs4 that is already imported in the bitbake tree. [YOCTO #9744] Signed-off-by: Aníbal Limón Signed-off-by: Ross Burton --- meta/lib/oe/distro_check.py | 82 +++++++++++++++++++-------------------------- 1 file changed, 34 insertions(+), 48 deletions(-) diff --git a/meta/lib/oe/distro_check.py b/meta/lib/oe/distro_check.py index f1f1fbbb28..87c52fae9c 100644 --- a/meta/lib/oe/distro_check.py +++ b/meta/lib/oe/distro_check.py @@ -1,53 +1,35 @@ from contextlib import contextmanager -@contextmanager + +from bb.utils import export_proxies + def create_socket(url, d): - import urllib.request, urllib.parse, urllib.error - socket = urllib.request.urlopen(url, proxies=get_proxies(d)) + import urllib + + socket = None try: - yield socket - finally: - socket.close() + export_proxies(d) + socket = urllib.request.urlopen(url) + except: + bb.warn("distro_check: create_socket url %s can't access" % url) -def get_proxies(d): - proxies = {} - for key in ['http', 'https', 'ftp', 'ftps', 'no', 'all']: - proxy = d.getVar(key + '_proxy', True) - if proxy: - proxies[key] = proxy - return proxies + return socket def get_links_from_url(url, d): "Return all the href links found on the web location" - import sgmllib - - class LinksParser(sgmllib.SGMLParser): - def parse(self, s): - "Parse the given string 's'." - self.feed(s) - self.close() - - def __init__(self, verbose=0): - "Initialise an object passing 'verbose' to the superclass." - sgmllib.SGMLParser.__init__(self, verbose) - self.hyperlinks = [] - - def start_a(self, attributes): - "Process a hyperlink and its 'attributes'." - for name, value in attributes: - if name == "href": - self.hyperlinks.append(value.strip('/')) - - def get_hyperlinks(self): - "Return the list of hyperlinks." - return self.hyperlinks + from bs4 import BeautifulSoup, SoupStrainer - with create_socket(url,d) as sock: + hyperlinks = [] + + webpage = '' + sock = create_socket(url,d) + if sock: webpage = sock.read() - linksparser = LinksParser() - linksparser.parse(webpage) - return linksparser.get_hyperlinks() + soup = BeautifulSoup(webpage, "html.parser", parse_only=SoupStrainer("a")) + for line in soup.find_all('a', href=True): + hyperlinks.append(line['href'].strip('/')) + return hyperlinks def find_latest_numeric_release(url, d): "Find the latest listed numeric release on the given url" @@ -162,14 +144,18 @@ def find_latest_debian_release(url, d): def get_debian_style_source_package_list(url, section, d): "Return the list of package-names stored in the debian style Sources.gz file" - with create_socket(url,d) as sock: - webpage = sock.read() - import tempfile - tmpfile = tempfile.NamedTemporaryFile(mode='wb', prefix='oecore.', suffix='.tmp', delete=False) - tmpfilename=tmpfile.name - tmpfile.write(sock.read()) - tmpfile.close() + import tempfile import gzip + + webpage = '' + sock = create_socket(url,d) + if sock: + webpage = sock.read() + + tmpfile = tempfile.NamedTemporaryFile(mode='wb', prefix='oecore.', suffix='.tmp', delete=False) + tmpfilename=tmpfile.name + tmpfile.write(sock.read()) + tmpfile.close() bb.note("Reading %s: %s" % (url, section)) f = gzip.open(tmpfilename) @@ -266,9 +252,9 @@ def update_distro_data(distro_check_dir, datetime, d): import fcntl try: if not os.path.exists(datetime_file): - open(datetime_file, 'w+b').close() # touch the file so that the next open won't fail + open(datetime_file, 'w+').close() # touch the file so that the next open won't fail - f = open(datetime_file, "r+b") + f = open(datetime_file, "r+") fcntl.lockf(f, fcntl.LOCK_EX) saved_datetime = f.read() if saved_datetime[0:8] != datetime[0:8]: -- cgit 1.2.3-korg