diff options
author | Alexander Kanavin <alexander.kanavin@linux.intel.com> | 2015-12-04 13:00:20 +0200 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2015-12-07 16:59:33 +0000 |
commit | 7546d4aeb3ba8fda9832081b84d93138dc5e58d6 (patch) | |
tree | 9bec332142732f8d443224c80faa439b1f4a37e6 | |
parent | 649d563f95ffc57d0fe3dbf494e7dbffcc99a1b4 (diff) | |
download | bitbake-7546d4aeb3ba8fda9832081b84d93138dc5e58d6.tar.gz |
wget.py: parse only <a> tags
For two reasons:
1) The important one: we hit the following bug when doing upstream version checks
on some webpages:
https://bugs.launchpad.net/beautifulsoup/+bug/1471755
2) Also, documentation for beautifulsoup states that memory usage and
speed is improved that way.
Signed-off-by: Alexander Kanavin <alexander.kanavin@linux.intel.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r-- | lib/bb/fetch2/wget.py | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/lib/bb/fetch2/wget.py b/lib/bb/fetch2/wget.py index bd2a8972a..c185f5b5f 100644 --- a/lib/bb/fetch2/wget.py +++ b/lib/bb/fetch2/wget.py @@ -38,6 +38,7 @@ from bb.fetch2 import FetchError from bb.fetch2 import logger from bb.fetch2 import runfetchcmd from bs4 import BeautifulSoup +from bs4 import SoupStrainer class Wget(FetchMethod): """Class to fetch urls via 'wget'""" @@ -367,7 +368,7 @@ class Wget(FetchMethod): version = ['', '', ''] bb.debug(3, "VersionURL: %s" % (url)) - soup = BeautifulSoup(self._fetch_index(url, ud, d)) + soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a")) if not soup: bb.debug(3, "*** %s NO SOUP" % (url)) return "" @@ -417,7 +418,7 @@ class Wget(FetchMethod): ud.path.split(dirver)[0], ud.user, ud.pswd, {}]) bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package)) - soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d)) + soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a")) if not soup: return version[1] |