aboutsummaryrefslogtreecommitdiffstats
path: root/scripts/lib/mic/3rdparty/pykickstart/urlgrabber
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/lib/mic/3rdparty/pykickstart/urlgrabber')
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py53
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py463
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py1477
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py617
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py458
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/progress.py530
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/sslfactory.py90
7 files changed, 3688 insertions, 0 deletions
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py
new file mode 100644
index 0000000000..7bcd9d5541
--- /dev/null
+++ b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py
@@ -0,0 +1,53 @@
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+# Copyright 2002-2006 Michael D. Stenner, Ryan Tomayko
+
+# $Id: __init__.py,v 1.20 2006/09/22 00:58:55 mstenner Exp $
+
+"""A high-level cross-protocol url-grabber.
+
+Using urlgrabber, data can be fetched in three basic ways:
+
+ urlgrab(url) copy the file to the local filesystem
+ urlopen(url) open the remote file and return a file object
+ (like urllib2.urlopen)
+ urlread(url) return the contents of the file as a string
+
+When using these functions (or methods), urlgrabber supports the
+following features:
+
+ * identical behavior for http://, ftp://, and file:// urls
+ * http keepalive - faster downloads of many files by using
+ only a single connection
+ * byte ranges - fetch only a portion of the file
+ * reget - for a urlgrab, resume a partial download
+ * progress meters - the ability to report download progress
+ automatically, even when using urlopen!
+ * throttling - restrict bandwidth usage
+ * retries - automatically retry a download if it fails. The
+ number of retries and failure types are configurable.
+ * authenticated server access for http and ftp
+ * proxy support - support for authenticated http and ftp proxies
+ * mirror groups - treat a list of mirrors as a single source,
+ automatically switching mirrors if there is a failure.
+"""
+
+__version__ = '3.1.0'
+__date__ = '2006/09/21'
+__author__ = 'Michael D. Stenner <mstenner@linux.duke.edu>, ' \
+ 'Ryan Tomayko <rtomayko@naeblis.cx>'
+__url__ = 'http://linux.duke.edu/projects/urlgrabber/'
+
+from grabber import urlgrab, urlopen, urlread
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py
new file mode 100644
index 0000000000..001b4e32d6
--- /dev/null
+++ b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py
@@ -0,0 +1,463 @@
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330,
+# Boston, MA 02111-1307 USA
+
+# This file is part of urlgrabber, a high-level cross-protocol url-grabber
+# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
+
+# $Id: byterange.py,v 1.12 2006/07/20 20:15:58 mstenner Exp $
+
+import os
+import stat
+import urllib
+import urllib2
+import rfc822
+
+DEBUG = None
+
+try:
+ from cStringIO import StringIO
+except ImportError, msg:
+ from StringIO import StringIO
+
+class RangeError(IOError):
+ """Error raised when an unsatisfiable range is requested."""
+ pass
+
+class HTTPRangeHandler(urllib2.BaseHandler):
+ """Handler that enables HTTP Range headers.
+
+ This was extremely simple. The Range header is a HTTP feature to
+ begin with so all this class does is tell urllib2 that the
+ "206 Partial Content" reponse from the HTTP server is what we
+ expected.
+
+ Example:
+ import urllib2
+ import byterange
+
+ range_handler = range.HTTPRangeHandler()
+ opener = urllib2.build_opener(range_handler)
+
+ # install it
+ urllib2.install_opener(opener)
+
+ # create Request and set Range header
+ req = urllib2.Request('http://www.python.org/')
+ req.header['Range'] = 'bytes=30-50'
+ f = urllib2.urlopen(req)
+ """
+
+ def http_error_206(self, req, fp, code, msg, hdrs):
+ # 206 Partial Content Response
+ r = urllib.addinfourl(fp, hdrs, req.get_full_url())
+ r.code = code
+ r.msg = msg
+ return r
+
+ def http_error_416(self, req, fp, code, msg, hdrs):
+ # HTTP's Range Not Satisfiable error
+ raise RangeError('Requested Range Not Satisfiable')
+
+class HTTPSRangeHandler(HTTPRangeHandler):
+ """ Range Header support for HTTPS. """
+
+ def https_error_206(self, req, fp, code, msg, hdrs):
+ return self.http_error_206(req, fp, code, msg, hdrs)
+
+ def https_error_416(self, req, fp, code, msg, hdrs):
+ self.https_error_416(req, fp, code, msg, hdrs)
+
+class RangeableFileObject:
+ """File object wrapper to enable raw range handling.
+ This was implemented primarilary for handling range
+ specifications for file:// urls. This object effectively makes
+ a file object look like it consists only of a range of bytes in
+ the stream.
+
+ Examples:
+ # expose 10 bytes, starting at byte position 20, from
+ # /etc/aliases.
+ >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30))
+ # seek seeks within the range (to position 23 in this case)
+ >>> fo.seek(3)
+ # tell tells where your at _within the range_ (position 3 in
+ # this case)
+ >>> fo.tell()
+ # read EOFs if an attempt is made to read past the last
+ # byte in the range. the following will return only 7 bytes.
+ >>> fo.read(30)
+ """
+
+ def __init__(self, fo, rangetup):
+ """Create a RangeableFileObject.
+ fo -- a file like object. only the read() method need be
+ supported but supporting an optimized seek() is
+ preferable.
+ rangetup -- a (firstbyte,lastbyte) tuple specifying the range
+ to work over.
+ The file object provided is assumed to be at byte offset 0.
+ """
+ self.fo = fo
+ (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup)
+ self.realpos = 0
+ self._do_seek(self.firstbyte)
+
+ def __getattr__(self, name):
+ """This effectively allows us to wrap at the instance level.
+ Any attribute not found in _this_ object will be searched for
+ in self.fo. This includes methods."""
+ if hasattr(self.fo, name):
+ return getattr(self.fo, name)
+ raise AttributeError, name
+
+ def tell(self):
+ """Return the position within the range.
+ This is different from fo.seek in that position 0 is the
+ first byte position of the range tuple. For example, if
+ this object was created with a range tuple of (500,899),
+ tell() will return 0 when at byte position 500 of the file.
+ """
+ return (self.realpos - self.firstbyte)
+
+ def seek(self,offset,whence=0):
+ """Seek within the byte range.
+ Positioning is identical to that described under tell().
+ """
+ assert whence in (0, 1, 2)
+ if whence == 0: # absolute seek
+ realoffset = self.firstbyte + offset
+ elif whence == 1: # relative seek
+ realoffset = self.realpos + offset
+ elif whence == 2: # absolute from end of file
+ # XXX: are we raising the right Error here?
+ raise IOError('seek from end of file not supported.')
+
+ # do not allow seek past lastbyte in range
+ if self.lastbyte and (realoffset >= self.lastbyte):
+ realoffset = self.lastbyte
+
+ self._do_seek(realoffset - self.realpos)
+
+ def read(self, size=-1):
+ """Read within the range.
+ This method will limit the size read based on the range.
+ """
+ size = self._calc_read_size(size)
+ rslt = self.fo.read(size)
+ self.realpos += len(rslt)
+ return rslt
+
+ def readline(self, size=-1):
+ """Read lines within the range.
+ This method will limit the size read based on the range.
+ """
+ size = self._calc_read_size(size)
+ rslt = self.fo.readline(size)
+ self.realpos += len(rslt)
+ return rslt
+
+ def _calc_read_size(self, size):
+ """Handles calculating the amount of data to read based on
+ the range.
+ """
+ if self.lastbyte:
+ if size > -1:
+ if ((self.realpos + size) >= self.lastbyte):
+ size = (self.lastbyte - self.realpos)
+ else:
+ size = (self.lastbyte - self.realpos)
+ return size
+
+ def _do_seek(self,offset):
+ """Seek based on whether wrapped object supports seek().
+ offset is relative to the current position (self.realpos).
+ """
+ assert offset >= 0
+ if not hasattr(self.fo, 'seek'):
+ self._poor_mans_seek(offset)
+ else:
+ self.fo.seek(self.realpos + offset)
+ self.realpos+= offset
+
+ def _poor_mans_seek(self,offset):
+ """Seek by calling the wrapped file objects read() method.
+ This is used for file like objects that do not have native
+ seek support. The wrapped objects read() method is called
+ to manually seek to the desired position.
+ offset -- read this number of bytes from the wrapped
+ file object.
+ raise RangeError if we encounter EOF before reaching the
+ specified offset.
+ """
+ pos = 0
+ bufsize = 1024
+ while pos < offset:
+ if (pos + bufsize) > offset:
+ bufsize = offset - pos
+ buf = self.fo.read(bufsize)
+ if len(buf) != bufsize:
+ raise RangeError('Requested Range Not Satisfiable')
+ pos+= bufsize
+
+class FileRangeHandler(urllib2.FileHandler):
+ """FileHandler subclass that adds Range support.
+ This class handles Range headers exactly like an HTTP
+ server would.
+ """
+ def open_local_file(self, req):
+ import mimetypes
+ import mimetools
+ host = req.get_host()
+ file = req.get_selector()
+ localfile = urllib.url2pathname(file)
+ stats = os.stat(localfile)
+ size = stats[stat.ST_SIZE]
+ modified = rfc822.formatdate(stats[stat.ST_MTIME])
+ mtype = mimetypes.guess_type(file)[0]
+ if host:
+ host, port = urllib.splitport(host)
+ if port or socket.gethostbyname(host) not in self.get_names():
+ raise urllib2.URLError('file not on local host')
+ fo = open(localfile,'rb')
+ brange = req.headers.get('Range',None)
+ brange = range_header_to_tuple(brange)
+ assert brange != ()
+ if brange:
+ (fb,lb) = brange
+ if lb == '': lb = size
+ if fb < 0 or fb > size or lb > size:
+ raise RangeError('Requested Range Not Satisfiable')
+ size = (lb - fb)
+ fo = RangeableFileObject(fo, (fb,lb))
+ headers = mimetools.Message(StringIO(
+ 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
+ (mtype or 'text/plain', size, modified)))
+ return urllib.addinfourl(fo, headers, 'file:'+file)
+
+
+# FTP Range Support
+# Unfortunately, a large amount of base FTP code had to be copied
+# from urllib and urllib2 in order to insert the FTP REST command.
+# Code modifications for range support have been commented as
+# follows:
+# -- range support modifications start/end here
+
+from urllib import splitport, splituser, splitpasswd, splitattr, \
+ unquote, addclosehook, addinfourl
+import ftplib
+import socket
+import sys
+import ftplib
+import mimetypes
+import mimetools
+
+class FTPRangeHandler(urllib2.FTPHandler):
+ def ftp_open(self, req):
+ host = req.get_host()
+ if not host:
+ raise IOError, ('ftp error', 'no host given')
+ host, port = splitport(host)
+ if port is None:
+ port = ftplib.FTP_PORT
+
+ # username/password handling
+ user, host = splituser(host)
+ if user:
+ user, passwd = splitpasswd(user)
+ else:
+ passwd = None
+ host = unquote(host)
+ user = unquote(user or '')
+ passwd = unquote(passwd or '')
+
+ try:
+ host = socket.gethostbyname(host)
+ except socket.error, msg:
+ raise urllib2.URLError(msg)
+ path, attrs = splitattr(req.get_selector())
+ dirs = path.split('/')
+ dirs = map(unquote, dirs)
+ dirs, file = dirs[:-1], dirs[-1]
+ if dirs and not dirs[0]:
+ dirs = dirs[1:]
+ try:
+ fw = self.connect_ftp(user, passwd, host, port, dirs)
+ type = file and 'I' or 'D'
+ for attr in attrs:
+ attr, value = splitattr(attr)
+ if attr.lower() == 'type' and \
+ value in ('a', 'A', 'i', 'I', 'd', 'D'):
+ type = value.upper()
+
+ # -- range support modifications start here
+ rest = None
+ range_tup = range_header_to_tuple(req.headers.get('Range',None))
+ assert range_tup != ()
+ if range_tup:
+ (fb,lb) = range_tup
+ if fb > 0: rest = fb
+ # -- range support modifications end here
+
+ fp, retrlen = fw.retrfile(file, type, rest)
+
+ # -- range support modifications start here
+ if range_tup:
+ (fb,lb) = range_tup
+ if lb == '':
+ if retrlen is None or retrlen == 0:
+ raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.')
+ lb = retrlen
+ retrlen = lb - fb
+ if retrlen < 0:
+ # beginning of range is larger than file
+ raise RangeError('Requested Range Not Satisfiable')
+ else:
+ retrlen = lb - fb
+ fp = RangeableFileObject(fp, (0,retrlen))
+ # -- range support modifications end here
+
+ headers = ""
+ mtype = mimetypes.guess_type(req.get_full_url())[0]
+ if mtype:
+ headers += "Content-Type: %s\n" % mtype
+ if retrlen is not None and retrlen >= 0:
+ headers += "Content-Length: %d\n" % retrlen
+ sf = StringIO(headers)
+ headers = mimetools.Message(sf)
+ return addinfourl(fp, headers, req.get_full_url())
+ except ftplib.all_errors, msg:
+ raise IOError, ('ftp error', msg), sys.exc_info()[2]
+
+ def connect_ftp(self, user, passwd, host, port, dirs):
+ fw = ftpwrapper(user, passwd, host, port, dirs)
+ return fw
+
+class ftpwrapper(urllib.ftpwrapper):
+ # range support note:
+ # this ftpwrapper code is copied directly from
+ # urllib. The only enhancement is to add the rest
+ # argument and pass it on to ftp.ntransfercmd
+ def retrfile(self, file, type, rest=None):
+ self.endtransfer()
+ if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
+ else: cmd = 'TYPE ' + type; isdir = 0
+ try:
+ self.ftp.voidcmd(cmd)
+ except ftplib.all_errors:
+ self.init()
+ self.ftp.voidcmd(cmd)
+ conn = None
+ if file and not isdir:
+ # Use nlst to see if the file exists at all
+ try:
+ self.ftp.nlst(file)
+ except ftplib.error_perm, reason:
+ raise IOError, ('ftp error', reason), sys.exc_info()[2]
+ # Restore the transfer mode!
+ self.ftp.voidcmd(cmd)
+ # Try to retrieve as a file
+ try:
+ cmd = 'RETR ' + file
+ conn = self.ftp.ntransfercmd(cmd, rest)
+ except ftplib.error_perm, reason:
+ if str(reason)[:3] == '501':
+ # workaround for REST not supported error
+ fp, retrlen = self.retrfile(file, type)
+ fp = RangeableFileObject(fp, (rest,''))
+ return (fp, retrlen)
+ elif str(reason)[:3] != '550':
+ raise IOError, ('ftp error', reason), sys.exc_info()[2]
+ if not conn:
+ # Set transfer mode to ASCII!
+ self.ftp.voidcmd('TYPE A')
+ # Try a directory listing
+ if file: cmd = 'LIST ' + file
+ else: cmd = 'LIST'
+ conn = self.ftp.ntransfercmd(cmd)
+ self.busy = 1
+ # Pass back both a suitably decorated object and a retrieval length
+ return (addclosehook(conn[0].makefile('rb'),
+ self.endtransfer), conn[1])
+
+
+####################################################################
+# Range Tuple Functions
+# XXX: These range tuple functions might go better in a class.
+
+_rangere = None
+def range_header_to_tuple(range_header):
+ """Get a (firstbyte,lastbyte) tuple from a Range header value.
+
+ Range headers have the form "bytes=<firstbyte>-<lastbyte>". This
+ function pulls the firstbyte and lastbyte values and returns
+ a (firstbyte,lastbyte) tuple. If lastbyte is not specified in
+ the header value, it is returned as an empty string in the
+ tuple.
+
+ Return None if range_header is None
+ Return () if range_header does not conform to the range spec
+ pattern.
+
+ """
+ global _rangere
+ if range_header is None: return None
+ if _rangere is None:
+ import re
+ _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)')
+ match = _rangere.match(range_header)
+ if match:
+ tup = range_tuple_normalize(match.group(1,2))
+ if tup and tup[1]:
+ tup = (tup[0],tup[1]+1)
+ return tup
+ return ()
+
+def range_tuple_to_header(range_tup):
+ """Convert a range tuple to a Range header value.
+ Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None
+ if no range is needed.
+ """
+ if range_tup is None: return None
+ range_tup = range_tuple_normalize(range_tup)
+ if range_tup:
+ if range_tup[1]:
+ range_tup = (range_tup[0],range_tup[1] - 1)
+ return 'bytes=%s-%s' % range_tup
+
+def range_tuple_normalize(range_tup):
+ """Normalize a (first_byte,last_byte) range tuple.
+ Return a tuple whose first element is guaranteed to be an int
+ and whose second element will be '' (meaning: the last byte) or
+ an int. Finally, return None if the normalized tuple == (0,'')
+ as that is equivelant to retrieving the entire file.
+ """
+ if range_tup is None: return None
+ # handle first byte
+ fb = range_tup[0]
+ if fb in (None,''): fb = 0
+ else: fb = int(fb)
+ # handle last byte
+ try: lb = range_tup[1]
+ except IndexError: lb = ''
+ else:
+ if lb is None: lb = ''
+ elif lb != '': lb = int(lb)
+ # check if range is over the entire file
+ if (fb,lb) == (0,''): return None
+ # check that the range is valid
+ if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb))
+ return (fb,lb)
+
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py
new file mode 100644
index 0000000000..fefdab36f6
--- /dev/null
+++ b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py
@@ -0,0 +1,1477 @@
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330,
+# Boston, MA 02111-1307 USA
+
+# This file is part of urlgrabber, a high-level cross-protocol url-grabber
+# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
+
+"""A high-level cross-protocol url-grabber.
+
+GENERAL ARGUMENTS (kwargs)
+
+ Where possible, the module-level default is indicated, and legal
+ values are provided.
+
+ copy_local = 0 [0|1]
+
+ ignored except for file:// urls, in which case it specifies
+ whether urlgrab should still make a copy of the file, or simply
+ point to the existing copy. The module level default for this
+ option is 0.
+
+ close_connection = 0 [0|1]
+
+ tells URLGrabber to close the connection after a file has been
+ transfered. This is ignored unless the download happens with the
+ http keepalive handler (keepalive=1). Otherwise, the connection
+ is left open for further use. The module level default for this
+ option is 0 (keepalive connections will not be closed).
+
+ keepalive = 1 [0|1]
+
+ specifies whether keepalive should be used for HTTP/1.1 servers
+ that support it. The module level default for this option is 1
+ (keepalive is enabled).
+
+ progress_obj = None
+
+ a class instance that supports the following methods:
+ po.start(filename, url, basename, length, text)
+ # length will be None if unknown
+ po.update(read) # read == bytes read so far
+ po.end()
+
+ text = None
+
+ specifies an alternativ text item in the beginning of the progress
+ bar line. If not given, the basename of the file is used.
+
+ throttle = 1.0
+
+ a number - if it's an int, it's the bytes/second throttle limit.
+ If it's a float, it is first multiplied by bandwidth. If throttle
+ == 0, throttling is disabled. If None, the module-level default
+ (which can be set on default_grabber.throttle) is used. See
+ BANDWIDTH THROTTLING for more information.
+
+ timeout = None
+
+ a positive float expressing the number of seconds to wait for socket
+ operations. If the value is None or 0.0, socket operations will block
+ forever. Setting this option causes urlgrabber to call the settimeout
+ method on the Socket object used for the request. See the Python
+ documentation on settimeout for more information.
+ http://www.python.org/doc/current/lib/socket-objects.html
+
+ bandwidth = 0
+
+ the nominal max bandwidth in bytes/second. If throttle is a float
+ and bandwidth == 0, throttling is disabled. If None, the
+ module-level default (which can be set on
+ default_grabber.bandwidth) is used. See BANDWIDTH THROTTLING for
+ more information.
+
+ range = None
+
+ a tuple of the form (first_byte, last_byte) describing a byte
+ range to retrieve. Either or both of the values may set to
+ None. If first_byte is None, byte offset 0 is assumed. If
+ last_byte is None, the last byte available is assumed. Note that
+ the range specification is python-like in that (0,10) will yeild
+ the first 10 bytes of the file.
+
+ If set to None, no range will be used.
+
+ reget = None [None|'simple'|'check_timestamp']
+
+ whether to attempt to reget a partially-downloaded file. Reget
+ only applies to .urlgrab and (obviously) only if there is a
+ partially downloaded file. Reget has two modes:
+
+ 'simple' -- the local file will always be trusted. If there
+ are 100 bytes in the local file, then the download will always
+ begin 100 bytes into the requested file.
+
+ 'check_timestamp' -- the timestamp of the server file will be
+ compared to the timestamp of the local file. ONLY if the
+ local file is newer than or the same age as the server file
+ will reget be used. If the server file is newer, or the
+ timestamp is not returned, the entire file will be fetched.
+
+ NOTE: urlgrabber can do very little to verify that the partial
+ file on disk is identical to the beginning of the remote file.
+ You may want to either employ a custom "checkfunc" or simply avoid
+ using reget in situations where corruption is a concern.
+
+ user_agent = 'urlgrabber/VERSION'
+
+ a string, usually of the form 'AGENT/VERSION' that is provided to
+ HTTP servers in the User-agent header. The module level default
+ for this option is "urlgrabber/VERSION".
+
+ http_headers = None
+
+ a tuple of 2-tuples, each containing a header and value. These
+ will be used for http and https requests only. For example, you
+ can do
+ http_headers = (('Pragma', 'no-cache'),)
+
+ ftp_headers = None
+
+ this is just like http_headers, but will be used for ftp requests.
+
+ proxies = None
+
+ a dictionary that maps protocol schemes to proxy hosts. For
+ example, to use a proxy server on host "foo" port 3128 for http
+ and https URLs:
+ proxies={ 'http' : 'http://foo:3128', 'https' : 'http://foo:3128' }
+ note that proxy authentication information may be provided using
+ normal URL constructs:
+ proxies={ 'http' : 'http://user:host@foo:3128' }
+ Lastly, if proxies is None, the default environment settings will
+ be used.
+
+ prefix = None
+
+ a url prefix that will be prepended to all requested urls. For
+ example:
+ g = URLGrabber(prefix='http://foo.com/mirror/')
+ g.urlgrab('some/file.txt')
+ ## this will fetch 'http://foo.com/mirror/some/file.txt'
+ This option exists primarily to allow identical behavior to
+ MirrorGroup (and derived) instances. Note: a '/' will be inserted
+ if necessary, so you cannot specify a prefix that ends with a
+ partial file or directory name.
+
+ opener = None
+
+ Overrides the default urllib2.OpenerDirector provided to urllib2
+ when making requests. This option exists so that the urllib2
+ handler chain may be customized. Note that the range, reget,
+ proxy, and keepalive features require that custom handlers be
+ provided to urllib2 in order to function properly. If an opener
+ option is provided, no attempt is made by urlgrabber to ensure
+ chain integrity. You are responsible for ensuring that any
+ extension handlers are present if said features are required.
+
+ data = None
+
+ Only relevant for the HTTP family (and ignored for other
+ protocols), this allows HTTP POSTs. When the data kwarg is
+ present (and not None), an HTTP request will automatically become
+ a POST rather than GET. This is done by direct passthrough to
+ urllib2. If you use this, you may also want to set the
+ 'Content-length' and 'Content-type' headers with the http_headers
+ option. Note that python 2.2 handles the case of these
+ badly and if you do not use the proper case (shown here), your
+ values will be overridden with the defaults.
+
+
+RETRY RELATED ARGUMENTS
+
+ retry = None
+
+ the number of times to retry the grab before bailing. If this is
+ zero, it will retry forever. This was intentional... really, it
+ was :). If this value is not supplied or is supplied but is None
+ retrying does not occur.
+
+ retrycodes = [-1,2,4,5,6,7]
+
+ a sequence of errorcodes (values of e.errno) for which it should
+ retry. See the doc on URLGrabError for more details on this. You
+ might consider modifying a copy of the default codes rather than
+ building yours from scratch so that if the list is extended in the
+ future (or one code is split into two) you can still enjoy the
+ benefits of the default list. You can do that with something like
+ this:
+
+ retrycodes = urlgrabber.grabber.URLGrabberOptions().retrycodes
+ if 12 not in retrycodes:
+ retrycodes.append(12)
+
+ checkfunc = None
+
+ a function to do additional checks. This defaults to None, which
+ means no additional checking. The function should simply return
+ on a successful check. It should raise URLGrabError on an
+ unsuccessful check. Raising of any other exception will be
+ considered immediate failure and no retries will occur.
+
+ If it raises URLGrabError, the error code will determine the retry
+ behavior. Negative error numbers are reserved for use by these
+ passed in functions, so you can use many negative numbers for
+ different types of failure. By default, -1 results in a retry,
+ but this can be customized with retrycodes.
+
+ If you simply pass in a function, it will be given exactly one
+ argument: a CallbackObject instance with the .url attribute
+ defined and either .filename (for urlgrab) or .data (for urlread).
+ For urlgrab, .filename is the name of the local file. For
+ urlread, .data is the actual string data. If you need other
+ arguments passed to the callback (program state of some sort), you
+ can do so like this:
+
+ checkfunc=(function, ('arg1', 2), {'kwarg': 3})
+
+ if the downloaded file has filename /tmp/stuff, then this will
+ result in this call (for urlgrab):
+
+ function(obj, 'arg1', 2, kwarg=3)
+ # obj.filename = '/tmp/stuff'
+ # obj.url = 'http://foo.com/stuff'
+
+ NOTE: both the "args" tuple and "kwargs" dict must be present if
+ you use this syntax, but either (or both) can be empty.
+
+ failure_callback = None
+
+ The callback that gets called during retries when an attempt to
+ fetch a file fails. The syntax for specifying the callback is
+ identical to checkfunc, except for the attributes defined in the
+ CallbackObject instance. The attributes for failure_callback are:
+
+ exception = the raised exception
+ url = the url we're trying to fetch
+ tries = the number of tries so far (including this one)
+ retry = the value of the retry option
+
+ The callback is present primarily to inform the calling program of
+ the failure, but if it raises an exception (including the one it's
+ passed) that exception will NOT be caught and will therefore cause
+ future retries to be aborted.
+
+ The callback is called for EVERY failure, including the last one.
+ On the last try, the callback can raise an alternate exception,
+ but it cannot (without severe trickiness) prevent the exception
+ from being raised.
+
+ interrupt_callback = None
+
+ This callback is called if KeyboardInterrupt is received at any
+ point in the transfer. Basically, this callback can have three
+ impacts on the fetch process based on the way it exits:
+
+ 1) raise no exception: the current fetch will be aborted, but
+ any further retries will still take place
+
+ 2) raise a URLGrabError: if you're using a MirrorGroup, then
+ this will prompt a failover to the next mirror according to
+ the behavior of the MirrorGroup subclass. It is recommended
+ that you raise URLGrabError with code 15, 'user abort'. If
+ you are NOT using a MirrorGroup subclass, then this is the
+ same as (3).
+
+ 3) raise some other exception (such as KeyboardInterrupt), which
+ will not be caught at either the grabber or mirror levels.
+ That is, it will be raised up all the way to the caller.
+
+ This callback is very similar to failure_callback. They are
+ passed the same arguments, so you could use the same function for
+ both.
+
+ urlparser = URLParser()
+
+ The URLParser class handles pre-processing of URLs, including
+ auth-handling for user/pass encoded in http urls, file handing
+ (that is, filenames not sent as a URL), and URL quoting. If you
+ want to override any of this behavior, you can pass in a
+ replacement instance. See also the 'quote' option.
+
+ quote = None
+
+ Whether or not to quote the path portion of a url.
+ quote = 1 -> quote the URLs (they're not quoted yet)
+ quote = 0 -> do not quote them (they're already quoted)
+ quote = None -> guess what to do
+
+ This option only affects proper urls like 'file:///etc/passwd'; it
+ does not affect 'raw' filenames like '/etc/passwd'. The latter
+ will always be quoted as they are converted to URLs. Also, only
+ the path part of a url is quoted. If you need more fine-grained
+ control, you should probably subclass URLParser and pass it in via
+ the 'urlparser' option.
+
+BANDWIDTH THROTTLING
+
+ urlgrabber supports throttling via two values: throttle and
+ bandwidth Between the two, you can either specify and absolute
+ throttle threshold or specify a theshold as a fraction of maximum
+ available bandwidth.
+
+ throttle is a number - if it's an int, it's the bytes/second
+ throttle limit. If it's a float, it is first multiplied by
+ bandwidth. If throttle == 0, throttling is disabled. If None, the
+ module-level default (which can be set with set_throttle) is used.
+
+ bandwidth is the nominal max bandwidth in bytes/second. If throttle
+ is a float and bandwidth == 0, throttling is disabled. If None, the
+ module-level default (which can be set with set_bandwidth) is used.
+
+ THROTTLING EXAMPLES:
+
+ Lets say you have a 100 Mbps connection. This is (about) 10^8 bits
+ per second, or 12,500,000 Bytes per second. You have a number of
+ throttling options:
+
+ *) set_bandwidth(12500000); set_throttle(0.5) # throttle is a float
+
+ This will limit urlgrab to use half of your available bandwidth.
+
+ *) set_throttle(6250000) # throttle is an int
+
+ This will also limit urlgrab to use half of your available
+ bandwidth, regardless of what bandwidth is set to.
+
+ *) set_throttle(6250000); set_throttle(1.0) # float
+
+ Use half your bandwidth
+
+ *) set_throttle(6250000); set_throttle(2.0) # float
+
+ Use up to 12,500,000 Bytes per second (your nominal max bandwidth)
+
+ *) set_throttle(6250000); set_throttle(0) # throttle = 0
+
+ Disable throttling - this is more efficient than a very large
+ throttle setting.
+
+ *) set_throttle(0); set_throttle(1.0) # throttle is float, bandwidth = 0
+
+ Disable throttling - this is the default when the module is loaded.
+
+ SUGGESTED AUTHOR IMPLEMENTATION (THROTTLING)
+
+ While this is flexible, it's not extremely obvious to the user. I
+ suggest you implement a float throttle as a percent to make the
+ distinction between absolute and relative throttling very explicit.
+
+ Also, you may want to convert the units to something more convenient
+ than bytes/second, such as kbps or kB/s, etc.
+
+"""
+
+# $Id: grabber.py,v 1.48 2006/09/22 00:58:05 mstenner Exp $
+
+import os
+import os.path
+import sys
+import urlparse
+import rfc822
+import time
+import string
+import urllib
+import urllib2
+from stat import * # S_* and ST_*
+
+########################################################################
+# MODULE INITIALIZATION
+########################################################################
+try:
+ exec('from ' + (__name__.split('.'))[0] + ' import __version__')
+except:
+ __version__ = '???'
+
+import sslfactory
+
+auth_handler = urllib2.HTTPBasicAuthHandler( \
+ urllib2.HTTPPasswordMgrWithDefaultRealm())
+
+try:
+ from i18n import _
+except ImportError, msg:
+ def _(st): return st
+
+try:
+ from httplib import HTTPException
+except ImportError, msg:
+ HTTPException = None
+
+try:
+ # This is a convenient way to make keepalive optional.
+ # Just rename the module so it can't be imported.
+ import keepalive
+ from keepalive import HTTPHandler, HTTPSHandler
+ have_keepalive = True
+except ImportError, msg:
+ have_keepalive = False
+
+try:
+ # add in range support conditionally too
+ import byterange
+ from byterange import HTTPRangeHandler, HTTPSRangeHandler, \
+ FileRangeHandler, FTPRangeHandler, range_tuple_normalize, \
+ range_tuple_to_header, RangeError
+except ImportError, msg:
+ range_handlers = ()
+ RangeError = None
+ have_range = 0
+else:
+ range_handlers = (HTTPRangeHandler(), HTTPSRangeHandler(),
+ FileRangeHandler(), FTPRangeHandler())
+ have_range = 1
+
+
+# check whether socket timeout support is available (Python >= 2.3)
+import socket
+try:
+ TimeoutError = socket.timeout
+ have_socket_timeout = True
+except AttributeError:
+ TimeoutError = None
+ have_socket_timeout = False
+
+########################################################################
+# functions for debugging output. These functions are here because they
+# are also part of the module initialization.
+DEBUG = None
+def set_logger(DBOBJ):
+ """Set the DEBUG object. This is called by _init_default_logger when
+ the environment variable URLGRABBER_DEBUG is set, but can also be
+ called by a calling program. Basically, if the calling program uses
+ the logging module and would like to incorporate urlgrabber logging,
+ then it can do so this way. It's probably not necessary as most
+ internal logging is only for debugging purposes.
+
+ The passed-in object should be a logging.Logger instance. It will
+ be pushed into the keepalive and byterange modules if they're
+ being used. The mirror module pulls this object in on import, so
+ you will need to manually push into it. In fact, you may find it
+ tidier to simply push your logging object (or objects) into each
+ of these modules independently.
+ """
+
+ global DEBUG
+ DEBUG = DBOBJ
+ if have_keepalive and keepalive.DEBUG is None:
+ keepalive.DEBUG = DBOBJ
+ if have_range and byterange.DEBUG is None:
+ byterange.DEBUG = DBOBJ
+ if sslfactory.DEBUG is None:
+ sslfactory.DEBUG = DBOBJ
+
+def _init_default_logger():
+ '''Examines the environment variable URLGRABBER_DEBUG and creates
+ a logging object (logging.logger) based on the contents. It takes
+ the form
+
+ URLGRABBER_DEBUG=level,filename
+
+ where "level" can be either an integer or a log level from the
+ logging module (DEBUG, INFO, etc). If the integer is zero or
+ less, logging will be disabled. Filename is the filename where
+ logs will be sent. If it is "-", then stdout will be used. If
+ the filename is empty or missing, stderr will be used. If the
+ variable cannot be processed or the logging module cannot be
+ imported (python < 2.3) then logging will be disabled. Here are
+ some examples:
+
+ URLGRABBER_DEBUG=1,debug.txt # log everything to debug.txt
+ URLGRABBER_DEBUG=WARNING,- # log warning and higher to stdout
+ URLGRABBER_DEBUG=INFO # log info and higher to stderr
+
+ This funtion is called during module initialization. It is not
+ intended to be called from outside. The only reason it is a
+ function at all is to keep the module-level namespace tidy and to
+ collect the code into a nice block.'''
+
+ try:
+ dbinfo = os.environ['URLGRABBER_DEBUG'].split(',')
+ import logging
+ level = logging._levelNames.get(dbinfo[0], int(dbinfo[0]))
+ if level < 1: raise ValueError()
+
+ formatter = logging.Formatter('%(asctime)s %(message)s')
+ if len(dbinfo) > 1: filename = dbinfo[1]
+ else: filename = ''
+ if filename == '': handler = logging.StreamHandler(sys.stderr)
+ elif filename == '-': handler = logging.StreamHandler(sys.stdout)
+ else: handler = logging.FileHandler(filename)
+ handler.setFormatter(formatter)
+ DBOBJ = logging.getLogger('urlgrabber')
+ DBOBJ.addHandler(handler)
+ DBOBJ.setLevel(level)
+ except (KeyError, ImportError, ValueError):
+ DBOBJ = None
+ set_logger(DBOBJ)
+
+_init_default_logger()
+########################################################################
+# END MODULE INITIALIZATION
+########################################################################
+
+
+
+class URLGrabError(IOError):
+ """
+ URLGrabError error codes:
+
+ URLGrabber error codes (0 -- 255)
+ 0 - everything looks good (you should never see this)
+ 1 - malformed url
+ 2 - local file doesn't exist
+ 3 - request for non-file local file (dir, etc)
+ 4 - IOError on fetch
+ 5 - OSError on fetch
+ 6 - no content length header when we expected one
+ 7 - HTTPException
+ 8 - Exceeded read limit (for urlread)
+ 9 - Requested byte range not satisfiable.
+ 10 - Byte range requested, but range support unavailable
+ 11 - Illegal reget mode
+ 12 - Socket timeout
+ 13 - malformed proxy url
+ 14 - HTTPError (includes .code and .exception attributes)
+ 15 - user abort
+
+ MirrorGroup error codes (256 -- 511)
+ 256 - No more mirrors left to try
+
+ Custom (non-builtin) classes derived from MirrorGroup (512 -- 767)
+ [ this range reserved for application-specific error codes ]
+
+ Retry codes (< 0)
+ -1 - retry the download, unknown reason
+
+ Note: to test which group a code is in, you can simply do integer
+ division by 256: e.errno / 256
+
+ Negative codes are reserved for use by functions passed in to
+ retrygrab with checkfunc. The value -1 is built in as a generic
+ retry code and is already included in the retrycodes list.
+ Therefore, you can create a custom check function that simply
+ returns -1 and the fetch will be re-tried. For more customized
+ retries, you can use other negative number and include them in
+ retry-codes. This is nice for outputting useful messages about
+ what failed.
+
+ You can use these error codes like so:
+ try: urlgrab(url)
+ except URLGrabError, e:
+ if e.errno == 3: ...
+ # or
+ print e.strerror
+ # or simply
+ print e #### print '[Errno %i] %s' % (e.errno, e.strerror)
+ """
+ pass
+
+class CallbackObject:
+ """Container for returned callback data.
+
+ This is currently a dummy class into which urlgrabber can stuff
+ information for passing to callbacks. This way, the prototype for
+ all callbacks is the same, regardless of the data that will be
+ passed back. Any function that accepts a callback function as an
+ argument SHOULD document what it will define in this object.
+
+ It is possible that this class will have some greater
+ functionality in the future.
+ """
+ def __init__(self, **kwargs):
+ self.__dict__.update(kwargs)
+
+def urlgrab(url, filename=None, **kwargs):
+ """grab the file at <url> and make a local copy at <filename>
+ If filename is none, the basename of the url is used.
+ urlgrab returns the filename of the local file, which may be different
+ from the passed-in filename if the copy_local kwarg == 0.
+
+ See module documentation for a description of possible kwargs.
+ """
+ return default_grabber.urlgrab(url, filename, **kwargs)
+
+def urlopen(url, **kwargs):
+ """open the url and return a file object
+ If a progress object or throttle specifications exist, then
+ a special file object will be returned that supports them.
+ The file object can be treated like any other file object.
+
+ See module documentation for a description of possible kwargs.
+ """
+ return default_grabber.urlopen(url, **kwargs)
+
+def urlread(url, limit=None, **kwargs):
+ """read the url into a string, up to 'limit' bytes
+ If the limit is exceeded, an exception will be thrown. Note that urlread
+ is NOT intended to be used as a way of saying "I want the first N bytes"
+ but rather 'read the whole file into memory, but don't use too much'
+
+ See module documentation for a description of possible kwargs.
+ """
+ return default_grabber.urlread(url, limit, **kwargs)
+
+
+class URLParser:
+ """Process the URLs before passing them to urllib2.
+
+ This class does several things:
+
+ * add any prefix
+ * translate a "raw" file to a proper file: url
+ * handle any http or https auth that's encoded within the url
+ * quote the url
+
+ Only the "parse" method is called directly, and it calls sub-methods.
+
+ An instance of this class is held in the options object, which
+ means that it's easy to change the behavior by sub-classing and
+ passing the replacement in. It need only have a method like:
+
+ url, parts = urlparser.parse(url, opts)
+ """
+
+ def parse(self, url, opts):
+ """parse the url and return the (modified) url and its parts
+
+ Note: a raw file WILL be quoted when it's converted to a URL.
+ However, other urls (ones which come with a proper scheme) may
+ or may not be quoted according to opts.quote
+
+ opts.quote = 1 --> quote it
+ opts.quote = 0 --> do not quote it
+ opts.quote = None --> guess
+ """
+ quote = opts.quote
+
+ if opts.prefix:
+ url = self.add_prefix(url, opts.prefix)
+
+ parts = urlparse.urlparse(url)
+ (scheme, host, path, parm, query, frag) = parts
+
+ if not scheme or (len(scheme) == 1 and scheme in string.letters):
+ # if a scheme isn't specified, we guess that it's "file:"
+ if url[0] not in '/\\': url = os.path.abspath(url)
+ url = 'file:' + urllib.pathname2url(url)
+ parts = urlparse.urlparse(url)
+ quote = 0 # pathname2url quotes, so we won't do it again
+
+ if scheme in ['http', 'https']:
+ parts = self.process_http(parts)
+
+ if quote is None:
+ quote = self.guess_should_quote(parts)
+ if quote:
+ parts = self.quote(parts)
+
+ url = urlparse.urlunparse(parts)
+ return url, parts
+
+ def add_prefix(self, url, prefix):
+ if prefix[-1] == '/' or url[0] == '/':
+ url = prefix + url
+ else:
+ url = prefix + '/' + url
+ return url
+
+ def process_http(self, parts):
+ (scheme, host, path, parm, query, frag) = parts
+
+ if '@' in host and auth_handler:
+ try:
+ user_pass, host = host.split('@', 1)
+ if ':' in user_pass:
+ user, password = user_pass.split(':', 1)
+ except ValueError, e:
+ raise URLGrabError(1, _('Bad URL: %s') % url)
+ if DEBUG: DEBUG.info('adding HTTP auth: %s, XXXXXXXX', user)
+ auth_handler.add_password(None, host, user, password)
+
+ return (scheme, host, path, parm, query, frag)
+
+ def quote(self, parts):
+ """quote the URL
+
+ This method quotes ONLY the path part. If you need to quote
+ other parts, you should override this and pass in your derived
+ class. The other alternative is to quote other parts before
+ passing into urlgrabber.
+ """
+ (scheme, host, path, parm, query, frag) = parts
+ path = urllib.quote(path)
+ return (scheme, host, path, parm, query, frag)
+
+ hexvals = '0123456789ABCDEF'
+ def guess_should_quote(self, parts):
+ """
+ Guess whether we should quote a path. This amounts to
+ guessing whether it's already quoted.
+
+ find ' ' -> 1
+ find '%' -> 1
+ find '%XX' -> 0
+ else -> 1
+ """
+ (scheme, host, path, parm, query, frag) = parts
+ if ' ' in path:
+ return 1
+ ind = string.find(path, '%')
+ if ind > -1:
+ while ind > -1:
+ if len(path) < ind+3:
+ return 1
+ code = path[ind+1:ind+3].upper()
+ if code[0] not in self.hexvals or \
+ code[1] not in self.hexvals:
+ return 1
+ ind = string.find(path, '%', ind+1)
+ return 0
+ return 1
+
+class URLGrabberOptions:
+ """Class to ease kwargs handling."""
+
+ def __init__(self, delegate=None, **kwargs):
+ """Initialize URLGrabberOptions object.
+ Set default values for all options and then update options specified
+ in kwargs.
+ """
+ self.delegate = delegate
+ if delegate is None:
+ self._set_defaults()
+ self._set_attributes(**kwargs)
+
+ def __getattr__(self, name):
+ if self.delegate and hasattr(self.delegate, name):
+ return getattr(self.delegate, name)
+ raise AttributeError, name
+
+ def raw_throttle(self):
+ """Calculate raw throttle value from throttle and bandwidth
+ values.
+ """
+ if self.throttle <= 0:
+ return 0
+ elif type(self.throttle) == type(0):
+ return float(self.throttle)
+ else: # throttle is a float
+ return self.bandwidth * self.throttle
+
+ def derive(self, **kwargs):
+ """Create a derived URLGrabberOptions instance.
+ This method creates a new instance and overrides the
+ options specified in kwargs.
+ """
+ return URLGrabberOptions(delegate=self, **kwargs)
+
+ def _set_attributes(self, **kwargs):
+ """Update object attributes with those provided in kwargs."""
+ self.__dict__.update(kwargs)
+ if have_range and kwargs.has_key('range'):
+ # normalize the supplied range value
+ self.range = range_tuple_normalize(self.range)
+ if not self.reget in [None, 'simple', 'check_timestamp']:
+ raise URLGrabError(11, _('Illegal reget mode: %s') \
+ % (self.reget, ))
+
+ def _set_defaults(self):
+ """Set all options to their default values.
+ When adding new options, make sure a default is
+ provided here.
+ """
+ self.progress_obj = None
+ self.throttle = 1.0
+ self.bandwidth = 0
+ self.retry = None
+ self.retrycodes = [-1,2,4,5,6,7]
+ self.checkfunc = None
+ self.copy_local = 0
+ self.close_connection = 0
+ self.range = None
+ self.user_agent = 'urlgrabber/%s' % __version__
+ self.keepalive = 1
+ self.proxies = None
+ self.reget = None
+ self.failure_callback = None
+ self.interrupt_callback = None
+ self.prefix = None
+ self.opener = None
+ self.cache_openers = True
+ self.timeout = None
+ self.text = None
+ self.http_headers = None
+ self.ftp_headers = None
+ self.data = None
+ self.urlparser = URLParser()
+ self.quote = None
+ self.ssl_ca_cert = None
+ self.ssl_context = None
+
+class URLGrabber:
+ """Provides easy opening of URLs with a variety of options.
+
+ All options are specified as kwargs. Options may be specified when
+ the class is created and may be overridden on a per request basis.
+
+ New objects inherit default values from default_grabber.
+ """
+
+ def __init__(self, **kwargs):
+ self.opts = URLGrabberOptions(**kwargs)
+
+ def _retry(self, opts, func, *args):
+ tries = 0
+ while 1:
+ # there are only two ways out of this loop. The second has
+ # several "sub-ways"
+ # 1) via the return in the "try" block
+ # 2) by some exception being raised
+ # a) an excepton is raised that we don't "except"
+ # b) a callback raises ANY exception
+ # c) we're not retry-ing or have run out of retries
+ # d) the URLGrabError code is not in retrycodes
+ # beware of infinite loops :)
+ tries = tries + 1
+ exception = None
+ retrycode = None
+ callback = None
+ if DEBUG: DEBUG.info('attempt %i/%s: %s',
+ tries, opts.retry, args[0])
+ try:
+ r = apply(func, (opts,) + args, {})
+ if DEBUG: DEBUG.info('success')
+ return r
+ except URLGrabError, e:
+ exception = e
+ callback = opts.failure_callback
+ retrycode = e.errno
+ except KeyboardInterrupt, e:
+ exception = e
+ callback = opts.interrupt_callback
+
+ if DEBUG: DEBUG.info('exception: %s', exception)
+ if callback:
+ if DEBUG: DEBUG.info('calling callback: %s', callback)
+ cb_func, cb_args, cb_kwargs = self._make_callback(callback)
+ obj = CallbackObject(exception=exception, url=args[0],
+ tries=tries, retry=opts.retry)
+ cb_func(obj, *cb_args, **cb_kwargs)
+
+ if (opts.retry is None) or (tries == opts.retry):
+ if DEBUG: DEBUG.info('retries exceeded, re-raising')
+ raise
+
+ if (retrycode is not None) and (retrycode not in opts.retrycodes):
+ if DEBUG: DEBUG.info('retrycode (%i) not in list %s, re-raising',
+ retrycode, opts.retrycodes)
+ raise
+
+ def urlopen(self, url, **kwargs):
+ """open the url and return a file object
+ If a progress object or throttle value specified when this
+ object was created, then a special file object will be
+ returned that supports them. The file object can be treated
+ like any other file object.
+ """
+ opts = self.opts.derive(**kwargs)
+ (url,parts) = opts.urlparser.parse(url, opts)
+ def retryfunc(opts, url):
+ return URLGrabberFileObject(url, filename=None, opts=opts)
+ return self._retry(opts, retryfunc, url)
+
+ def urlgrab(self, url, filename=None, **kwargs):
+ """grab the file at <url> and make a local copy at <filename>
+ If filename is none, the basename of the url is used.
+ urlgrab returns the filename of the local file, which may be
+ different from the passed-in filename if copy_local == 0.
+ """
+ opts = self.opts.derive(**kwargs)
+ (url,parts) = opts.urlparser.parse(url, opts)
+ (scheme, host, path, parm, query, frag) = parts
+ if filename is None:
+ filename = os.path.basename( urllib.unquote(path) )
+ if scheme == 'file' and not opts.copy_local:
+ # just return the name of the local file - don't make a
+ # copy currently
+ path = urllib.url2pathname(path)
+ if host:
+ path = os.path.normpath('//' + host + path)
+ if not os.path.exists(path):
+ raise URLGrabError(2,
+ _('Local file does not exist: %s') % (path, ))
+ elif not os.path.isfile(path):
+ raise URLGrabError(3,
+ _('Not a normal file: %s') % (path, ))
+ elif not opts.range:
+ return path
+
+ def retryfunc(opts, url, filename):
+ fo = URLGrabberFileObject(url, filename, opts)
+ try:
+ fo._do_grab()
+ if not opts.checkfunc is None:
+ cb_func, cb_args, cb_kwargs = \
+ self._make_callback(opts.checkfunc)
+ obj = CallbackObject()
+ obj.filename = filename
+ obj.url = url
+ apply(cb_func, (obj, )+cb_args, cb_kwargs)
+ finally:
+ fo.close()
+ return filename
+
+ return self._retry(opts, retryfunc, url, filename)
+
+ def urlread(self, url, limit=None, **kwargs):
+ """read the url into a string, up to 'limit' bytes
+ If the limit is exceeded, an exception will be thrown. Note
+ that urlread is NOT intended to be used as a way of saying
+ "I want the first N bytes" but rather 'read the whole file
+ into memory, but don't use too much'
+ """
+ opts = self.opts.derive(**kwargs)
+ (url,parts) = opts.urlparser.parse(url, opts)
+ if limit is not None:
+ limit = limit + 1
+
+ def retryfunc(opts, url, limit):
+ fo = URLGrabberFileObject(url, filename=None, opts=opts)
+ s = ''
+ try:
+ # this is an unfortunate thing. Some file-like objects
+ # have a default "limit" of None, while the built-in (real)
+ # file objects have -1. They each break the other, so for
+ # now, we just force the default if necessary.
+ if limit is None: s = fo.read()
+ else: s = fo.read(limit)
+
+ if not opts.checkfunc is None:
+ cb_func, cb_args, cb_kwargs = \
+ self._make_callback(opts.checkfunc)
+ obj = CallbackObject()
+ obj.data = s
+ obj.url = url
+ apply(cb_func, (obj, )+cb_args, cb_kwargs)
+ finally:
+ fo.close()
+ return s
+
+ s = self._retry(opts, retryfunc, url, limit)
+ if limit and len(s) > limit:
+ raise URLGrabError(8,
+ _('Exceeded limit (%i): %s') % (limit, url))
+ return s
+
+ def _make_callback(self, callback_obj):
+ if callable(callback_obj):
+ return callback_obj, (), {}
+ else:
+ return callback_obj
+
+# create the default URLGrabber used by urlXXX functions.
+# NOTE: actual defaults are set in URLGrabberOptions
+default_grabber = URLGrabber()
+
+class URLGrabberFileObject:
+ """This is a file-object wrapper that supports progress objects
+ and throttling.
+
+ This exists to solve the following problem: lets say you want to
+ drop-in replace a normal open with urlopen. You want to use a
+ progress meter and/or throttling, but how do you do that without
+ rewriting your code? Answer: urlopen will return a wrapped file
+ object that does the progress meter and-or throttling internally.
+ """
+
+ def __init__(self, url, filename, opts):
+ self.url = url
+ self.filename = filename
+ self.opts = opts
+ self.fo = None
+ self._rbuf = ''
+ self._rbufsize = 1024*8
+ self._ttime = time.time()
+ self._tsize = 0
+ self._amount_read = 0
+ self._opener = None
+ self._do_open()
+
+ def __getattr__(self, name):
+ """This effectively allows us to wrap at the instance level.
+ Any attribute not found in _this_ object will be searched for
+ in self.fo. This includes methods."""
+ if hasattr(self.fo, name):
+ return getattr(self.fo, name)
+ raise AttributeError, name
+
+ def _get_opener(self):
+ """Build a urllib2 OpenerDirector based on request options."""
+ if self.opts.opener:
+ return self.opts.opener
+ elif self._opener is None:
+ handlers = []
+ need_keepalive_handler = (have_keepalive and self.opts.keepalive)
+ need_range_handler = (range_handlers and \
+ (self.opts.range or self.opts.reget))
+ # if you specify a ProxyHandler when creating the opener
+ # it _must_ come before all other handlers in the list or urllib2
+ # chokes.
+ if self.opts.proxies:
+ handlers.append( CachedProxyHandler(self.opts.proxies) )
+
+ # -------------------------------------------------------
+ # OK, these next few lines are a serious kludge to get
+ # around what I think is a bug in python 2.2's
+ # urllib2. The basic idea is that default handlers
+ # get applied first. If you override one (like a
+ # proxy handler), then the default gets pulled, but
+ # the replacement goes on the end. In the case of
+ # proxies, this means the normal handler picks it up
+ # first and the proxy isn't used. Now, this probably
+ # only happened with ftp or non-keepalive http, so not
+ # many folks saw it. The simple approach to fixing it
+ # is just to make sure you override the other
+ # conflicting defaults as well. I would LOVE to see
+ # these go way or be dealt with more elegantly. The
+ # problem isn't there after 2.2. -MDS 2005/02/24
+ if not need_keepalive_handler:
+ handlers.append( urllib2.HTTPHandler() )
+ if not need_range_handler:
+ handlers.append( urllib2.FTPHandler() )
+ # -------------------------------------------------------
+
+ ssl_factory = sslfactory.get_factory(self.opts.ssl_ca_cert,
+ self.opts.ssl_context)
+
+ if need_keepalive_handler:
+ handlers.append(HTTPHandler())
+ handlers.append(HTTPSHandler(ssl_factory))
+ if need_range_handler:
+ handlers.extend( range_handlers )
+ handlers.append( auth_handler )
+ if self.opts.cache_openers:
+ self._opener = CachedOpenerDirector(ssl_factory, *handlers)
+ else:
+ self._opener = ssl_factory.create_opener(*handlers)
+ # OK, I don't like to do this, but otherwise, we end up with
+ # TWO user-agent headers.
+ self._opener.addheaders = []
+ return self._opener
+
+ def _do_open(self):
+ opener = self._get_opener()
+
+ req = urllib2.Request(self.url, self.opts.data) # build request object
+ self._add_headers(req) # add misc headers that we need
+ self._build_range(req) # take care of reget and byterange stuff
+
+ fo, hdr = self._make_request(req, opener)
+ if self.reget_time and self.opts.reget == 'check_timestamp':
+ # do this if we have a local file with known timestamp AND
+ # we're in check_timestamp reget mode.
+ fetch_again = 0
+ try:
+ modified_tuple = hdr.getdate_tz('last-modified')
+ modified_stamp = rfc822.mktime_tz(modified_tuple)
+ if modified_stamp > self.reget_time: fetch_again = 1
+ except (TypeError,):
+ fetch_again = 1
+
+ if fetch_again:
+ # the server version is newer than the (incomplete) local
+ # version, so we should abandon the version we're getting
+ # and fetch the whole thing again.
+ fo.close()
+ self.opts.reget = None
+ del req.headers['Range']
+ self._build_range(req)
+ fo, hdr = self._make_request(req, opener)
+
+ (scheme, host, path, parm, query, frag) = urlparse.urlparse(self.url)
+ path = urllib.unquote(path)
+ if not (self.opts.progress_obj or self.opts.raw_throttle() \
+ or self.opts.timeout):
+ # if we're not using the progress_obj, throttling, or timeout
+ # we can get a performance boost by going directly to
+ # the underlying fileobject for reads.
+ self.read = fo.read
+ if hasattr(fo, 'readline'):
+ self.readline = fo.readline
+ elif self.opts.progress_obj:
+ try:
+ length = int(hdr['Content-Length'])
+ length = length + self._amount_read # Account for regets
+ except (KeyError, ValueError, TypeError):
+ length = None
+
+ self.opts.progress_obj.start(str(self.filename),
+ urllib.unquote(self.url),
+ os.path.basename(path),
+ length, text=self.opts.text)
+ self.opts.progress_obj.update(0)
+ (self.fo, self.hdr) = (fo, hdr)
+
+ def _add_headers(self, req):
+ if self.opts.user_agent:
+ req.add_header('User-agent', self.opts.user_agent)
+ try: req_type = req.get_type()
+ except ValueError: req_type = None
+ if self.opts.http_headers and req_type in ('http', 'https'):
+ for h, v in self.opts.http_headers:
+ req.add_header(h, v)
+ if self.opts.ftp_headers and req_type == 'ftp':
+ for h, v in self.opts.ftp_headers:
+ req.add_header(h, v)
+
+ def _build_range(self, req):
+ self.reget_time = None
+ self.append = 0
+ reget_length = 0
+ rt = None
+ if have_range and self.opts.reget and type(self.filename) == type(''):
+ # we have reget turned on and we're dumping to a file
+ try:
+ s = os.stat(self.filename)
+ except OSError:
+ pass
+ else:
+ self.reget_time = s[ST_MTIME]
+ reget_length = s[ST_SIZE]
+
+ # Set initial length when regetting
+ self._amount_read = reget_length
+
+ rt = reget_length, ''
+ self.append = 1
+
+ if self.opts.range:
+ if not have_range:
+ raise URLGrabError(10, _('Byte range requested but range '\
+ 'support unavailable'))
+ rt = self.opts.range
+ if rt[0]: rt = (rt[0] + reget_length, rt[1])
+
+ if rt:
+ header = range_tuple_to_header(rt)
+ if header: req.add_header('Range', header)
+
+ def _make_request(self, req, opener):
+ try:
+ if have_socket_timeout and self.opts.timeout:
+ old_to = socket.getdefaulttimeout()
+ socket.setdefaulttimeout(self.opts.timeout)
+ try:
+ fo = opener.open(req)
+ finally:
+ socket.setdefaulttimeout(old_to)
+ else:
+ fo = opener.open(req)
+ hdr = fo.info()
+ except ValueError, e:
+ raise URLGrabError(1, _('Bad URL: %s') % (e, ))
+ except RangeError, e:
+ raise URLGrabError(9, str(e))
+ except urllib2.HTTPError, e:
+ new_e = URLGrabError(14, str(e))
+ new_e.code = e.code
+ new_e.exception = e
+ raise new_e
+ except IOError, e:
+ if hasattr(e, 'reason') and have_socket_timeout and \
+ isinstance(e.reason, TimeoutError):
+ raise URLGrabError(12, _('Timeout: %s') % (e, ))
+ else:
+ raise URLGrabError(4, _('IOError: %s') % (e, ))
+ except OSError, e:
+ raise URLGrabError(5, _('OSError: %s') % (e, ))
+ except HTTPException, e:
+ raise URLGrabError(7, _('HTTP Exception (%s): %s') % \
+ (e.__class__.__name__, e))
+ else:
+ return (fo, hdr)
+
+ def _do_grab(self):
+ """dump the file to self.filename."""
+ if self.append: new_fo = open(self.filename, 'ab')
+ else: new_fo = open(self.filename, 'wb')
+ bs = 1024*8
+ size = 0
+
+ block = self.read(bs)
+ size = size + len(block)
+ while block:
+ new_fo.write(block)
+ block = self.read(bs)
+ size = size + len(block)
+
+ new_fo.close()
+ try:
+ modified_tuple = self.hdr.getdate_tz('last-modified')
+ modified_stamp = rfc822.mktime_tz(modified_tuple)
+ os.utime(self.filename, (modified_stamp, modified_stamp))
+ except (TypeError,), e: pass
+
+ return size
+
+ def _fill_buffer(self, amt=None):
+ """fill the buffer to contain at least 'amt' bytes by reading
+ from the underlying file object. If amt is None, then it will
+ read until it gets nothing more. It updates the progress meter
+ and throttles after every self._rbufsize bytes."""
+ # the _rbuf test is only in this first 'if' for speed. It's not
+ # logically necessary
+ if self._rbuf and not amt is None:
+ L = len(self._rbuf)
+ if amt > L:
+ amt = amt - L
+ else:
+ return
+
+ # if we've made it here, then we don't have enough in the buffer
+ # and we need to read more.
+
+ buf = [self._rbuf]
+ bufsize = len(self._rbuf)
+ while amt is None or amt:
+ # first, delay if necessary for throttling reasons
+ if self.opts.raw_throttle():
+ diff = self._tsize/self.opts.raw_throttle() - \
+ (time.time() - self._ttime)
+ if diff > 0: time.sleep(diff)
+ self._ttime = time.time()
+
+ # now read some data, up to self._rbufsize
+ if amt is None: readamount = self._rbufsize
+ else: readamount = min(amt, self._rbufsize)
+ try:
+ new = self.fo.read(readamount)
+ except socket.error, e:
+ raise URLGrabError(4, _('Socket Error: %s') % (e, ))
+ except TimeoutError, e:
+ raise URLGrabError(12, _('Timeout: %s') % (e, ))
+ except IOError, e:
+ raise URLGrabError(4, _('IOError: %s') %(e,))
+ newsize = len(new)
+ if not newsize: break # no more to read
+
+ if amt: amt = amt - newsize
+ buf.append(new)
+ bufsize = bufsize + newsize
+ self._tsize = newsize
+ self._amount_read = self._amount_read + newsize
+ if self.opts.progress_obj:
+ self.opts.progress_obj.update(self._amount_read)
+
+ self._rbuf = string.join(buf, '')
+ return
+
+ def read(self, amt=None):
+ self._fill_buffer(amt)
+ if amt is None:
+ s, self._rbuf = self._rbuf, ''
+ else:
+ s, self._rbuf = self._rbuf[:amt], self._rbuf[amt:]
+ return s
+
+ def readline(self, limit=-1):
+ i = string.find(self._rbuf, '\n')
+ while i < 0 and not (0 < limit <= len(self._rbuf)):
+ L = len(self._rbuf)
+ self._fill_buffer(L + self._rbufsize)
+ if not len(self._rbuf) > L: break
+ i = string.find(self._rbuf, '\n', L)
+
+ if i < 0: i = len(self._rbuf)
+ else: i = i+1
+ if 0 <= limit < len(self._rbuf): i = limit
+
+ s, self._rbuf = self._rbuf[:i], self._rbuf[i:]
+ return s
+
+ def close(self):
+ if self.opts.progress_obj:
+ self.opts.progress_obj.end(self._amount_read)
+ self.fo.close()
+ if self.opts.close_connection:
+ try: self.fo.close_connection()
+ except: pass
+
+_handler_cache = []
+def CachedOpenerDirector(ssl_factory = None, *handlers):
+ for (cached_handlers, opener) in _handler_cache:
+ if cached_handlers == handlers:
+ for handler in opener.handlers:
+ handler.add_parent(opener)
+ return opener
+ if not ssl_factory:
+ ssl_factory = sslfactory.get_factory()
+ opener = ssl_factory.create_opener(*handlers)
+ _handler_cache.append( (handlers, opener) )
+ return opener
+
+_proxy_cache = []
+def CachedProxyHandler(proxies):
+ for (pdict, handler) in _proxy_cache:
+ if pdict == proxies:
+ if DEBUG: DEBUG.debug('re-using proxy settings: %s', proxies)
+ break
+ else:
+ for k, v in proxies.items():
+ utype, url = urllib.splittype(v)
+ host, other = urllib.splithost(url)
+ if (utype is None) or (host is None):
+ raise URLGrabError(13, _('Bad proxy URL: %s') % v)
+
+ if DEBUG: DEBUG.info('creating new proxy handler: %s', proxies)
+ handler = urllib2.ProxyHandler(proxies)
+ _proxy_cache.append( (proxies, handler) )
+ return handler
+
+#####################################################################
+# DEPRECATED FUNCTIONS
+def set_throttle(new_throttle):
+ """Deprecated. Use: default_grabber.throttle = new_throttle"""
+ default_grabber.throttle = new_throttle
+
+def set_bandwidth(new_bandwidth):
+ """Deprecated. Use: default_grabber.bandwidth = new_bandwidth"""
+ default_grabber.bandwidth = new_bandwidth
+
+def set_progress_obj(new_progress_obj):
+ """Deprecated. Use: default_grabber.progress_obj = new_progress_obj"""
+ default_grabber.progress_obj = new_progress_obj
+
+def set_user_agent(new_user_agent):
+ """Deprecated. Use: default_grabber.user_agent = new_user_agent"""
+ default_grabber.user_agent = new_user_agent
+
+def retrygrab(url, filename=None, copy_local=0, close_connection=0,
+ progress_obj=None, throttle=None, bandwidth=None,
+ numtries=3, retrycodes=[-1,2,4,5,6,7], checkfunc=None):
+ """Deprecated. Use: urlgrab() with the retry arg instead"""
+ kwargs = {'copy_local' : copy_local,
+ 'close_connection' : close_connection,
+ 'progress_obj' : progress_obj,
+ 'throttle' : throttle,
+ 'bandwidth' : bandwidth,
+ 'retry' : numtries,
+ 'retrycodes' : retrycodes,
+ 'checkfunc' : checkfunc
+ }
+ return urlgrab(url, filename, **kwargs)
+
+
+#####################################################################
+# TESTING
+def _main_test():
+ import sys
+ try: url, filename = sys.argv[1:3]
+ except ValueError:
+ print 'usage:', sys.argv[0], \
+ '<url> <filename> [copy_local=0|1] [close_connection=0|1]'
+ sys.exit()
+
+ kwargs = {}
+ for a in sys.argv[3:]:
+ k, v = string.split(a, '=', 1)
+ kwargs[k] = int(v)
+
+ set_throttle(1.0)
+ set_bandwidth(32 * 1024)
+ print "throttle: %s, throttle bandwidth: %s B/s" % (default_grabber.throttle,
+ default_grabber.bandwidth)
+
+ try: from progress import text_progress_meter
+ except ImportError, e: pass
+ else: kwargs['progress_obj'] = text_progress_meter()
+
+ try: name = apply(urlgrab, (url, filename), kwargs)
+ except URLGrabError, e: print e
+ else: print 'LOCAL FILE:', name
+
+
+def _retry_test():
+ import sys
+ try: url, filename = sys.argv[1:3]
+ except ValueError:
+ print 'usage:', sys.argv[0], \
+ '<url> <filename> [copy_local=0|1] [close_connection=0|1]'
+ sys.exit()
+
+ kwargs = {}
+ for a in sys.argv[3:]:
+ k, v = string.split(a, '=', 1)
+ kwargs[k] = int(v)
+
+ try: from progress import text_progress_meter
+ except ImportError, e: pass
+ else: kwargs['progress_obj'] = text_progress_meter()
+
+ def cfunc(filename, hello, there='foo'):
+ print hello, there
+ import random
+ rnum = random.random()
+ if rnum < .5:
+ print 'forcing retry'
+ raise URLGrabError(-1, 'forcing retry')
+ if rnum < .75:
+ print 'forcing failure'
+ raise URLGrabError(-2, 'forcing immediate failure')
+ print 'success'
+ return
+
+ kwargs['checkfunc'] = (cfunc, ('hello',), {'there':'there'})
+ try: name = apply(retrygrab, (url, filename), kwargs)
+ except URLGrabError, e: print e
+ else: print 'LOCAL FILE:', name
+
+def _file_object_test(filename=None):
+ import random, cStringIO, sys
+ if filename is None:
+ filename = __file__
+ print 'using file "%s" for comparisons' % filename
+ fo = open(filename)
+ s_input = fo.read()
+ fo.close()
+
+ for testfunc in [_test_file_object_smallread,
+ _test_file_object_readall,
+ _test_file_object_readline,
+ _test_file_object_readlines]:
+ fo_input = cStringIO.StringIO(s_input)
+ fo_output = cStringIO.StringIO()
+ wrapper = URLGrabberFileObject(fo_input, None, 0)
+ print 'testing %-30s ' % testfunc.__name__,
+ testfunc(wrapper, fo_output)
+ s_output = fo_output.getvalue()
+ if s_output == s_input: print 'passed'
+ else: print 'FAILED'
+
+def _test_file_object_smallread(wrapper, fo_output):
+ while 1:
+ s = wrapper.read(23)
+ fo_output.write(s)
+ if not s: return
+
+def _test_file_object_readall(wrapper, fo_output):
+ s = wrapper.read()
+ fo_output.write(s)
+
+def _test_file_object_readline(wrapper, fo_output):
+ while 1:
+ s = wrapper.readline()
+ fo_output.write(s)
+ if not s: return
+
+def _test_file_object_readlines(wrapper, fo_output):
+ li = wrapper.readlines()
+ fo_output.write(string.join(li, ''))
+
+if __name__ == '__main__':
+ _main_test()
+ _retry_test()
+ _file_object_test('test')
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py
new file mode 100644
index 0000000000..71393e2b8d
--- /dev/null
+++ b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py
@@ -0,0 +1,617 @@
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330,
+# Boston, MA 02111-1307 USA
+
+# This file is part of urlgrabber, a high-level cross-protocol url-grabber
+# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
+
+"""An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
+
+>>> import urllib2
+>>> from keepalive import HTTPHandler
+>>> keepalive_handler = HTTPHandler()
+>>> opener = urllib2.build_opener(keepalive_handler)
+>>> urllib2.install_opener(opener)
+>>>
+>>> fo = urllib2.urlopen('http://www.python.org')
+
+If a connection to a given host is requested, and all of the existing
+connections are still in use, another connection will be opened. If
+the handler tries to use an existing connection but it fails in some
+way, it will be closed and removed from the pool.
+
+To remove the handler, simply re-run build_opener with no arguments, and
+install that opener.
+
+You can explicitly close connections by using the close_connection()
+method of the returned file-like object (described below) or you can
+use the handler methods:
+
+ close_connection(host)
+ close_all()
+ open_connections()
+
+NOTE: using the close_connection and close_all methods of the handler
+should be done with care when using multiple threads.
+ * there is nothing that prevents another thread from creating new
+ connections immediately after connections are closed
+ * no checks are done to prevent in-use connections from being closed
+
+>>> keepalive_handler.close_all()
+
+EXTRA ATTRIBUTES AND METHODS
+
+ Upon a status of 200, the object returned has a few additional
+ attributes and methods, which should not be used if you want to
+ remain consistent with the normal urllib2-returned objects:
+
+ close_connection() - close the connection to the host
+ readlines() - you know, readlines()
+ status - the return status (ie 404)
+ reason - english translation of status (ie 'File not found')
+
+ If you want the best of both worlds, use this inside an
+ AttributeError-catching try:
+
+ >>> try: status = fo.status
+ >>> except AttributeError: status = None
+
+ Unfortunately, these are ONLY there if status == 200, so it's not
+ easy to distinguish between non-200 responses. The reason is that
+ urllib2 tries to do clever things with error codes 301, 302, 401,
+ and 407, and it wraps the object upon return.
+
+ For python versions earlier than 2.4, you can avoid this fancy error
+ handling by setting the module-level global HANDLE_ERRORS to zero.
+ You see, prior to 2.4, it's the HTTP Handler's job to determine what
+ to handle specially, and what to just pass up. HANDLE_ERRORS == 0
+ means "pass everything up". In python 2.4, however, this job no
+ longer belongs to the HTTP Handler and is now done by a NEW handler,
+ HTTPErrorProcessor. Here's the bottom line:
+
+ python version < 2.4
+ HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
+ errors
+ HANDLE_ERRORS == 0 pass everything up, error processing is
+ left to the calling code
+ python version >= 2.4
+ HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
+ HANDLE_ERRORS == 0 (default) pass everything up, let the
+ other handlers (specifically,
+ HTTPErrorProcessor) decide what to do
+
+ In practice, setting the variable either way makes little difference
+ in python 2.4, so for the most consistent behavior across versions,
+ you probably just want to use the defaults, which will give you
+ exceptions on errors.
+
+"""
+
+# $Id: keepalive.py,v 1.16 2006/09/22 00:58:05 mstenner Exp $
+
+import urllib2
+import httplib
+import socket
+import thread
+
+DEBUG = None
+
+import sslfactory
+
+import sys
+if sys.version_info < (2, 4): HANDLE_ERRORS = 1
+else: HANDLE_ERRORS = 0
+
+class ConnectionManager:
+ """
+ The connection manager must be able to:
+ * keep track of all existing
+ """
+ def __init__(self):
+ self._lock = thread.allocate_lock()
+ self._hostmap = {} # map hosts to a list of connections
+ self._connmap = {} # map connections to host
+ self._readymap = {} # map connection to ready state
+
+ def add(self, host, connection, ready):
+ self._lock.acquire()
+ try:
+ if not self._hostmap.has_key(host): self._hostmap[host] = []
+ self._hostmap[host].append(connection)
+ self._connmap[connection] = host
+ self._readymap[connection] = ready
+ finally:
+ self._lock.release()
+
+ def remove(self, connection):
+ self._lock.acquire()
+ try:
+ try:
+ host = self._connmap[connection]
+ except KeyError:
+ pass
+ else:
+ del self._connmap[connection]
+ del self._readymap[connection]
+ self._hostmap[host].remove(connection)
+ if not self._hostmap[host]: del self._hostmap[host]
+ finally:
+ self._lock.release()
+
+ def set_ready(self, connection, ready):
+ try: self._readymap[connection] = ready
+ except KeyError: pass
+
+ def get_ready_conn(self, host):
+ conn = None
+ self._lock.acquire()
+ try:
+ if self._hostmap.has_key(host):
+ for c in self._hostmap[host]:
+ if self._readymap[c]:
+ self._readymap[c] = 0
+ conn = c
+ break
+ finally:
+ self._lock.release()
+ return conn
+
+ def get_all(self, host=None):
+ if host:
+ return list(self._hostmap.get(host, []))
+ else:
+ return dict(self._hostmap)
+
+class KeepAliveHandler:
+ def __init__(self):
+ self._cm = ConnectionManager()
+
+ #### Connection Management
+ def open_connections(self):
+ """return a list of connected hosts and the number of connections
+ to each. [('foo.com:80', 2), ('bar.org', 1)]"""
+ return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
+
+ def close_connection(self, host):
+ """close connection(s) to <host>
+ host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
+ no error occurs if there is no connection to that host."""
+ for h in self._cm.get_all(host):
+ self._cm.remove(h)
+ h.close()
+
+ def close_all(self):
+ """close all open connections"""
+ for host, conns in self._cm.get_all().items():
+ for h in conns:
+ self._cm.remove(h)
+ h.close()
+
+ def _request_closed(self, request, host, connection):
+ """tells us that this request is now closed and the the
+ connection is ready for another request"""
+ self._cm.set_ready(connection, 1)
+
+ def _remove_connection(self, host, connection, close=0):
+ if close: connection.close()
+ self._cm.remove(connection)
+
+ #### Transaction Execution
+ def do_open(self, req):
+ host = req.get_host()
+ if not host:
+ raise urllib2.URLError('no host given')
+
+ try:
+ h = self._cm.get_ready_conn(host)
+ while h:
+ r = self._reuse_connection(h, req, host)
+
+ # if this response is non-None, then it worked and we're
+ # done. Break out, skipping the else block.
+ if r: break
+
+ # connection is bad - possibly closed by server
+ # discard it and ask for the next free connection
+ h.close()
+ self._cm.remove(h)
+ h = self._cm.get_ready_conn(host)
+ else:
+ # no (working) free connections were found. Create a new one.
+ h = self._get_connection(host)
+ if DEBUG: DEBUG.info("creating new connection to %s (%d)",
+ host, id(h))
+ self._cm.add(host, h, 0)
+ self._start_transaction(h, req)
+ r = h.getresponse()
+ except (socket.error, httplib.HTTPException), err:
+ raise urllib2.URLError(err)
+
+ # if not a persistent connection, don't try to reuse it
+ if r.will_close: self._cm.remove(h)
+
+ if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
+ r._handler = self
+ r._host = host
+ r._url = req.get_full_url()
+ r._connection = h
+ r.code = r.status
+ r.headers = r.msg
+ r.msg = r.reason
+
+ if r.status == 200 or not HANDLE_ERRORS:
+ return r
+ else:
+ return self.parent.error('http', req, r,
+ r.status, r.msg, r.headers)
+
+ def _reuse_connection(self, h, req, host):
+ """start the transaction with a re-used connection
+ return a response object (r) upon success or None on failure.
+ This DOES not close or remove bad connections in cases where
+ it returns. However, if an unexpected exception occurs, it
+ will close and remove the connection before re-raising.
+ """
+ try:
+ self._start_transaction(h, req)
+ r = h.getresponse()
+ # note: just because we got something back doesn't mean it
+ # worked. We'll check the version below, too.
+ except (socket.error, httplib.HTTPException):
+ r = None
+ except:
+ # adding this block just in case we've missed
+ # something we will still raise the exception, but
+ # lets try and close the connection and remove it
+ # first. We previously got into a nasty loop
+ # where an exception was uncaught, and so the
+ # connection stayed open. On the next try, the
+ # same exception was raised, etc. The tradeoff is
+ # that it's now possible this call will raise
+ # a DIFFERENT exception
+ if DEBUG: DEBUG.error("unexpected exception - closing " + \
+ "connection to %s (%d)", host, id(h))
+ self._cm.remove(h)
+ h.close()
+ raise
+
+ if r is None or r.version == 9:
+ # httplib falls back to assuming HTTP 0.9 if it gets a
+ # bad header back. This is most likely to happen if
+ # the socket has been closed by the server since we
+ # last used the connection.
+ if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)",
+ host, id(h))
+ r = None
+ else:
+ if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h))
+
+ return r
+
+ def _start_transaction(self, h, req):
+ try:
+ if req.has_data():
+ data = req.get_data()
+ h.putrequest('POST', req.get_selector())
+ if not req.headers.has_key('Content-type'):
+ h.putheader('Content-type',
+ 'application/x-www-form-urlencoded')
+ if not req.headers.has_key('Content-length'):
+ h.putheader('Content-length', '%d' % len(data))
+ else:
+ h.putrequest('GET', req.get_selector())
+ except (socket.error, httplib.HTTPException), err:
+ raise urllib2.URLError(err)
+
+ for args in self.parent.addheaders:
+ h.putheader(*args)
+ for k, v in req.headers.items():
+ h.putheader(k, v)
+ h.endheaders()
+ if req.has_data():
+ h.send(data)
+
+ def _get_connection(self, host):
+ return NotImplementedError
+
+class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
+ def __init__(self):
+ KeepAliveHandler.__init__(self)
+
+ def http_open(self, req):
+ return self.do_open(req)
+
+ def _get_connection(self, host):
+ return HTTPConnection(host)
+
+class HTTPSHandler(KeepAliveHandler, urllib2.HTTPSHandler):
+ def __init__(self, ssl_factory=None):
+ KeepAliveHandler.__init__(self)
+ if not ssl_factory:
+ ssl_factory = sslfactory.get_factory()
+ self._ssl_factory = ssl_factory
+
+ def https_open(self, req):
+ return self.do_open(req)
+
+ def _get_connection(self, host):
+ return self._ssl_factory.get_https_connection(host)
+
+class HTTPResponse(httplib.HTTPResponse):
+ # we need to subclass HTTPResponse in order to
+ # 1) add readline() and readlines() methods
+ # 2) add close_connection() methods
+ # 3) add info() and geturl() methods
+
+ # in order to add readline(), read must be modified to deal with a
+ # buffer. example: readline must read a buffer and then spit back
+ # one line at a time. The only real alternative is to read one
+ # BYTE at a time (ick). Once something has been read, it can't be
+ # put back (ok, maybe it can, but that's even uglier than this),
+ # so if you THEN do a normal read, you must first take stuff from
+ # the buffer.
+
+ # the read method wraps the original to accomodate buffering,
+ # although read() never adds to the buffer.
+ # Both readline and readlines have been stolen with almost no
+ # modification from socket.py
+
+
+ def __init__(self, sock, debuglevel=0, strict=0, method=None):
+ if method: # the httplib in python 2.3 uses the method arg
+ httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
+ else: # 2.2 doesn't
+ httplib.HTTPResponse.__init__(self, sock, debuglevel)
+ self.fileno = sock.fileno
+ self.code = None
+ self._rbuf = ''
+ self._rbufsize = 8096
+ self._handler = None # inserted by the handler later
+ self._host = None # (same)
+ self._url = None # (same)
+ self._connection = None # (same)
+
+ _raw_read = httplib.HTTPResponse.read
+
+ def close(self):
+ if self.fp:
+ self.fp.close()
+ self.fp = None
+ if self._handler:
+ self._handler._request_closed(self, self._host,
+ self._connection)
+
+ def close_connection(self):
+ self._handler._remove_connection(self._host, self._connection, close=1)
+ self.close()
+
+ def info(self):
+ return self.headers
+
+ def geturl(self):
+ return self._url
+
+ def read(self, amt=None):
+ # the _rbuf test is only in this first if for speed. It's not
+ # logically necessary
+ if self._rbuf and not amt is None:
+ L = len(self._rbuf)
+ if amt > L:
+ amt -= L
+ else:
+ s = self._rbuf[:amt]
+ self._rbuf = self._rbuf[amt:]
+ return s
+
+ s = self._rbuf + self._raw_read(amt)
+ self._rbuf = ''
+ return s
+
+ def readline(self, limit=-1):
+ data = ""
+ i = self._rbuf.find('\n')
+ while i < 0 and not (0 < limit <= len(self._rbuf)):
+ new = self._raw_read(self._rbufsize)
+ if not new: break
+ i = new.find('\n')
+ if i >= 0: i = i + len(self._rbuf)
+ self._rbuf = self._rbuf + new
+ if i < 0: i = len(self._rbuf)
+ else: i = i+1
+ if 0 <= limit < len(self._rbuf): i = limit
+ data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
+ return data
+
+ def readlines(self, sizehint = 0):
+ total = 0
+ list = []
+ while 1:
+ line = self.readline()
+ if not line: break
+ list.append(line)
+ total += len(line)
+ if sizehint and total >= sizehint:
+ break
+ return list
+
+
+class HTTPConnection(httplib.HTTPConnection):
+ # use the modified response class
+ response_class = HTTPResponse
+
+class HTTPSConnection(httplib.HTTPSConnection):
+ response_class = HTTPResponse
+
+#########################################################################
+##### TEST FUNCTIONS
+#########################################################################
+
+def error_handler(url):
+ global HANDLE_ERRORS
+ orig = HANDLE_ERRORS
+ keepalive_handler = HTTPHandler()
+ opener = urllib2.build_opener(keepalive_handler)
+ urllib2.install_opener(opener)
+ pos = {0: 'off', 1: 'on'}
+ for i in (0, 1):
+ print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
+ HANDLE_ERRORS = i
+ try:
+ fo = urllib2.urlopen(url)
+ foo = fo.read()
+ fo.close()
+ try: status, reason = fo.status, fo.reason
+ except AttributeError: status, reason = None, None
+ except IOError, e:
+ print " EXCEPTION: %s" % e
+ raise
+ else:
+ print " status = %s, reason = %s" % (status, reason)
+ HANDLE_ERRORS = orig
+ hosts = keepalive_handler.open_connections()
+ print "open connections:", hosts
+ keepalive_handler.close_all()
+
+def continuity(url):
+ import md5
+ format = '%25s: %s'
+
+ # first fetch the file with the normal http handler
+ opener = urllib2.build_opener()
+ urllib2.install_opener(opener)
+ fo = urllib2.urlopen(url)
+ foo = fo.read()
+ fo.close()
+ m = md5.new(foo)
+ print format % ('normal urllib', m.hexdigest())
+
+ # now install the keepalive handler and try again
+ opener = urllib2.build_opener(HTTPHandler())
+ urllib2.install_opener(opener)
+
+ fo = urllib2.urlopen(url)
+ foo = fo.read()
+ fo.close()
+ m = md5.new(foo)
+ print format % ('keepalive read', m.hexdigest())
+
+ fo = urllib2.urlopen(url)
+ foo = ''
+ while 1:
+ f = fo.readline()
+ if f: foo = foo + f
+ else: break
+ fo.close()
+ m = md5.new(foo)
+ print format % ('keepalive readline', m.hexdigest())
+
+def comp(N, url):
+ print ' making %i connections to:\n %s' % (N, url)
+
+ sys.stdout.write(' first using the normal urllib handlers')
+ # first use normal opener
+ opener = urllib2.build_opener()
+ urllib2.install_opener(opener)
+ t1 = fetch(N, url)
+ print ' TIME: %.3f s' % t1
+
+ sys.stdout.write(' now using the keepalive handler ')
+ # now install the keepalive handler and try again
+ opener = urllib2.build_opener(HTTPHandler())
+ urllib2.install_opener(opener)
+ t2 = fetch(N, url)
+ print ' TIME: %.3f s' % t2
+ print ' improvement factor: %.2f' % (t1/t2, )
+
+def fetch(N, url, delay=0):
+ import time
+ lens = []
+ starttime = time.time()
+ for i in range(N):
+ if delay and i > 0: time.sleep(delay)
+ fo = urllib2.urlopen(url)
+ foo = fo.read()
+ fo.close()
+ lens.append(len(foo))
+ diff = time.time() - starttime
+
+ j = 0
+ for i in lens[1:]:
+ j = j + 1
+ if not i == lens[0]:
+ print "WARNING: inconsistent length on read %i: %i" % (j, i)
+
+ return diff
+
+def test_timeout(url):
+ global DEBUG
+ dbbackup = DEBUG
+ class FakeLogger:
+ def debug(self, msg, *args): print msg % args
+ info = warning = error = debug
+ DEBUG = FakeLogger()
+ print " fetching the file to establish a connection"
+ fo = urllib2.urlopen(url)
+ data1 = fo.read()
+ fo.close()
+
+ i = 20
+ print " waiting %i seconds for the server to close the connection" % i
+ while i > 0:
+ sys.stdout.write('\r %2i' % i)
+ sys.stdout.flush()
+ time.sleep(1)
+ i -= 1
+ sys.stderr.write('\r')
+
+ print " fetching the file a second time"
+ fo = urllib2.urlopen(url)
+ data2 = fo.read()
+ fo.close()
+
+ if data1 == data2:
+ print ' data are identical'
+ else:
+ print ' ERROR: DATA DIFFER'
+
+ DEBUG = dbbackup
+
+
+def test(url, N=10):
+ print "checking error hander (do this on a non-200)"
+ try: error_handler(url)
+ except IOError, e:
+ print "exiting - exception will prevent further tests"
+ sys.exit()
+ print
+ print "performing continuity test (making sure stuff isn't corrupted)"
+ continuity(url)
+ print
+ print "performing speed comparison"
+ comp(N, url)
+ print
+ print "performing dropped-connection check"
+ test_timeout(url)
+
+if __name__ == '__main__':
+ import time
+ import sys
+ try:
+ N = int(sys.argv[1])
+ url = sys.argv[2]
+ except:
+ print "%s <integer> <url>" % sys.argv[0]
+ else:
+ test(url, N)
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py
new file mode 100644
index 0000000000..9664c6b5c5
--- /dev/null
+++ b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py
@@ -0,0 +1,458 @@
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330,
+# Boston, MA 02111-1307 USA
+
+# This file is part of urlgrabber, a high-level cross-protocol url-grabber
+# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
+
+"""Module for downloading files from a pool of mirrors
+
+DESCRIPTION
+
+ This module provides support for downloading files from a pool of
+ mirrors with configurable failover policies. To a large extent, the
+ failover policy is chosen by using different classes derived from
+ the main class, MirrorGroup.
+
+ Instances of MirrorGroup (and cousins) act very much like URLGrabber
+ instances in that they have urlread, urlgrab, and urlopen methods.
+ They can therefore, be used in very similar ways.
+
+ from urlgrabber.grabber import URLGrabber
+ from urlgrabber.mirror import MirrorGroup
+ gr = URLGrabber()
+ mg = MirrorGroup(gr, ['http://foo.com/some/directory/',
+ 'http://bar.org/maybe/somewhere/else/',
+ 'ftp://baz.net/some/other/place/entirely/']
+ mg.urlgrab('relative/path.zip')
+
+ The assumption is that all mirrors are identical AFTER the base urls
+ specified, so that any mirror can be used to fetch any file.
+
+FAILOVER
+
+ The failover mechanism is designed to be customized by subclassing
+ from MirrorGroup to change the details of the behavior. In general,
+ the classes maintain a master mirror list and a "current mirror"
+ index. When a download is initiated, a copy of this list and index
+ is created for that download only. The specific failover policy
+ depends on the class used, and so is documented in the class
+ documentation. Note that ANY behavior of the class can be
+ overridden, so any failover policy at all is possible (although
+ you may need to change the interface in extreme cases).
+
+CUSTOMIZATION
+
+ Most customization of a MirrorGroup object is done at instantiation
+ time (or via subclassing). There are four major types of
+ customization:
+
+ 1) Pass in a custom urlgrabber - The passed in urlgrabber will be
+ used (by default... see #2) for the grabs, so options to it
+ apply for the url-fetching
+
+ 2) Custom mirror list - Mirror lists can simply be a list of
+ stings mirrors (as shown in the example above) but each can
+ also be a dict, allowing for more options. For example, the
+ first mirror in the list above could also have been:
+
+ {'mirror': 'http://foo.com/some/directory/',
+ 'grabber': <a custom grabber to be used for this mirror>,
+ 'kwargs': { <a dict of arguments passed to the grabber> }}
+
+ All mirrors are converted to this format internally. If
+ 'grabber' is omitted, the default grabber will be used. If
+ kwargs are omitted, then (duh) they will not be used.
+
+ 3) Pass keyword arguments when instantiating the mirror group.
+ See, for example, the failure_callback argument.
+
+ 4) Finally, any kwargs passed in for the specific file (to the
+ urlgrab method, for example) will be folded in. The options
+ passed into the grabber's urlXXX methods will override any
+ options specified in a custom mirror dict.
+
+"""
+
+# $Id: mirror.py,v 1.14 2006/02/22 18:26:46 mstenner Exp $
+
+import random
+import thread # needed for locking to make this threadsafe
+
+from grabber import URLGrabError, CallbackObject, DEBUG
+
+try:
+ from i18n import _
+except ImportError, msg:
+ def _(st): return st
+
+class GrabRequest:
+ """This is a dummy class used to hold information about the specific
+ request. For example, a single file. By maintaining this information
+ separately, we can accomplish two things:
+
+ 1) make it a little easier to be threadsafe
+ 2) have request-specific parameters
+ """
+ pass
+
+class MirrorGroup:
+ """Base Mirror class
+
+ Instances of this class are built with a grabber object and a list
+ of mirrors. Then all calls to urlXXX should be passed relative urls.
+ The requested file will be searched for on the first mirror. If the
+ grabber raises an exception (possibly after some retries) then that
+ mirror will be removed from the list, and the next will be attempted.
+ If all mirrors are exhausted, then an exception will be raised.
+
+ MirrorGroup has the following failover policy:
+
+ * downloads begin with the first mirror
+
+ * by default (see default_action below) a failure (after retries)
+ causes it to increment the local AND master indices. Also,
+ the current mirror is removed from the local list (but NOT the
+ master list - the mirror can potentially be used for other
+ files)
+
+ * if the local list is ever exhausted, a URLGrabError will be
+ raised (errno=256, no more mirrors)
+
+ OPTIONS
+
+ In addition to the required arguments "grabber" and "mirrors",
+ MirrorGroup also takes the following optional arguments:
+
+ default_action
+
+ A dict that describes the actions to be taken upon failure
+ (after retries). default_action can contain any of the
+ following keys (shown here with their default values):
+
+ default_action = {'increment': 1,
+ 'increment_master': 1,
+ 'remove': 1,
+ 'remove_master': 0,
+ 'fail': 0}
+
+ In this context, 'increment' means "use the next mirror" and
+ 'remove' means "never use this mirror again". The two
+ 'master' values refer to the instance-level mirror list (used
+ for all files), whereas the non-master values refer to the
+ current download only.
+
+ The 'fail' option will cause immediate failure by re-raising
+ the exception and no further attempts to get the current
+ download.
+
+ This dict can be set at instantiation time,
+ mg = MirrorGroup(grabber, mirrors, default_action={'fail':1})
+ at method-execution time (only applies to current fetch),
+ filename = mg.urlgrab(url, default_action={'increment': 0})
+ or by returning an action dict from the failure_callback
+ return {'fail':0}
+ in increasing precedence.
+
+ If all three of these were done, the net result would be:
+ {'increment': 0, # set in method
+ 'increment_master': 1, # class default
+ 'remove': 1, # class default
+ 'remove_master': 0, # class default
+ 'fail': 0} # set at instantiation, reset
+ # from callback
+
+ failure_callback
+
+ this is a callback that will be called when a mirror "fails",
+ meaning the grabber raises some URLGrabError. If this is a
+ tuple, it is interpreted to be of the form (cb, args, kwargs)
+ where cb is the actual callable object (function, method,
+ etc). Otherwise, it is assumed to be the callable object
+ itself. The callback will be passed a grabber.CallbackObject
+ instance along with args and kwargs (if present). The following
+ attributes are defined withing the instance:
+
+ obj.exception = < exception that was raised >
+ obj.mirror = < the mirror that was tried >
+ obj.relative_url = < url relative to the mirror >
+ obj.url = < full url that failed >
+ # .url is just the combination of .mirror
+ # and .relative_url
+
+ The failure callback can return an action dict, as described
+ above.
+
+ Like default_action, the failure_callback can be set at
+ instantiation time or when the urlXXX method is called. In
+ the latter case, it applies only for that fetch.
+
+ The callback can re-raise the exception quite easily. For
+ example, this is a perfectly adequate callback function:
+
+ def callback(obj): raise obj.exception
+
+ WARNING: do not save the exception object (or the
+ CallbackObject instance). As they contain stack frame
+ references, they can lead to circular references.
+
+ Notes:
+ * The behavior can be customized by deriving and overriding the
+ 'CONFIGURATION METHODS'
+ * The 'grabber' instance is kept as a reference, not copied.
+ Therefore, the grabber instance can be modified externally
+ and changes will take effect immediately.
+ """
+
+ # notes on thread-safety:
+
+ # A GrabRequest should never be shared by multiple threads because
+ # it's never saved inside the MG object and never returned outside it.
+ # therefore, it should be safe to access/modify grabrequest data
+ # without a lock. However, accessing the mirrors and _next attributes
+ # of the MG itself must be done when locked to prevent (for example)
+ # removal of the wrong mirror.
+
+ ##############################################################
+ # CONFIGURATION METHODS - intended to be overridden to
+ # customize behavior
+ def __init__(self, grabber, mirrors, **kwargs):
+ """Initialize the MirrorGroup object.
+
+ REQUIRED ARGUMENTS
+
+ grabber - URLGrabber instance
+ mirrors - a list of mirrors
+
+ OPTIONAL ARGUMENTS
+
+ failure_callback - callback to be used when a mirror fails
+ default_action - dict of failure actions
+
+ See the module-level and class level documentation for more
+ details.
+ """
+
+ # OVERRIDE IDEAS:
+ # shuffle the list to randomize order
+ self.grabber = grabber
+ self.mirrors = self._parse_mirrors(mirrors)
+ self._next = 0
+ self._lock = thread.allocate_lock()
+ self.default_action = None
+ self._process_kwargs(kwargs)
+
+ # if these values are found in **kwargs passed to one of the urlXXX
+ # methods, they will be stripped before getting passed on to the
+ # grabber
+ options = ['default_action', 'failure_callback']
+
+ def _process_kwargs(self, kwargs):
+ self.failure_callback = kwargs.get('failure_callback')
+ self.default_action = kwargs.get('default_action')
+
+ def _parse_mirrors(self, mirrors):
+ parsed_mirrors = []
+ for m in mirrors:
+ if type(m) == type(''): m = {'mirror': m}
+ parsed_mirrors.append(m)
+ return parsed_mirrors
+
+ def _load_gr(self, gr):
+ # OVERRIDE IDEAS:
+ # shuffle gr list
+ self._lock.acquire()
+ gr.mirrors = list(self.mirrors)
+ gr._next = self._next
+ self._lock.release()
+
+ def _get_mirror(self, gr):
+ # OVERRIDE IDEAS:
+ # return a random mirror so that multiple mirrors get used
+ # even without failures.
+ if not gr.mirrors:
+ raise URLGrabError(256, _('No more mirrors to try.'))
+ return gr.mirrors[gr._next]
+
+ def _failure(self, gr, cb_obj):
+ # OVERRIDE IDEAS:
+ # inspect the error - remove=1 for 404, remove=2 for connection
+ # refused, etc. (this can also be done via
+ # the callback)
+ cb = gr.kw.get('failure_callback') or self.failure_callback
+ if cb:
+ if type(cb) == type( () ):
+ cb, args, kwargs = cb
+ else:
+ args, kwargs = (), {}
+ action = cb(cb_obj, *args, **kwargs) or {}
+ else:
+ action = {}
+ # XXXX - decide - there are two ways to do this
+ # the first is action-overriding as a whole - use the entire action
+ # or fall back on module level defaults
+ #action = action or gr.kw.get('default_action') or self.default_action
+ # the other is to fall through for each element in the action dict
+ a = dict(self.default_action or {})
+ a.update(gr.kw.get('default_action', {}))
+ a.update(action)
+ action = a
+ self.increment_mirror(gr, action)
+ if action and action.get('fail', 0): raise
+
+ def increment_mirror(self, gr, action={}):
+ """Tell the mirror object increment the mirror index
+
+ This increments the mirror index, which amounts to telling the
+ mirror object to use a different mirror (for this and future
+ downloads).
+
+ This is a SEMI-public method. It will be called internally,
+ and you may never need to call it. However, it is provided
+ (and is made public) so that the calling program can increment
+ the mirror choice for methods like urlopen. For example, with
+ urlopen, there's no good way for the mirror group to know that
+ an error occurs mid-download (it's already returned and given
+ you the file object).
+
+ remove --- can have several values
+ 0 do not remove the mirror from the list
+ 1 remove the mirror for this download only
+ 2 remove the mirror permanently
+
+ beware of remove=0 as it can lead to infinite loops
+ """
+ badmirror = gr.mirrors[gr._next]
+
+ self._lock.acquire()
+ try:
+ ind = self.mirrors.index(badmirror)
+ except ValueError:
+ pass
+ else:
+ if action.get('remove_master', 0):
+ del self.mirrors[ind]
+ elif self._next == ind and action.get('increment_master', 1):
+ self._next += 1
+ if self._next >= len(self.mirrors): self._next = 0
+ self._lock.release()
+
+ if action.get('remove', 1):
+ del gr.mirrors[gr._next]
+ elif action.get('increment', 1):
+ gr._next += 1
+ if gr._next >= len(gr.mirrors): gr._next = 0
+
+ if DEBUG:
+ grm = [m['mirror'] for m in gr.mirrors]
+ DEBUG.info('GR mirrors: [%s] %i', ' '.join(grm), gr._next)
+ selfm = [m['mirror'] for m in self.mirrors]
+ DEBUG.info('MAIN mirrors: [%s] %i', ' '.join(selfm), self._next)
+
+ #####################################################################
+ # NON-CONFIGURATION METHODS
+ # these methods are designed to be largely workhorse methods that
+ # are not intended to be overridden. That doesn't mean you can't;
+ # if you want to, feel free, but most things can be done by
+ # by overriding the configuration methods :)
+
+ def _join_url(self, base_url, rel_url):
+ if base_url.endswith('/') or rel_url.startswith('/'):
+ return base_url + rel_url
+ else:
+ return base_url + '/' + rel_url
+
+ def _mirror_try(self, func, url, kw):
+ gr = GrabRequest()
+ gr.func = func
+ gr.url = url
+ gr.kw = dict(kw)
+ self._load_gr(gr)
+
+ for k in self.options:
+ try: del kw[k]
+ except KeyError: pass
+
+ while 1:
+ mirrorchoice = self._get_mirror(gr)
+ fullurl = self._join_url(mirrorchoice['mirror'], gr.url)
+ kwargs = dict(mirrorchoice.get('kwargs', {}))
+ kwargs.update(kw)
+ grabber = mirrorchoice.get('grabber') or self.grabber
+ func_ref = getattr(grabber, func)
+ if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl)
+ try:
+ return func_ref( *(fullurl,), **kwargs )
+ except URLGrabError, e:
+ if DEBUG: DEBUG.info('MIRROR: failed')
+ obj = CallbackObject()
+ obj.exception = e
+ obj.mirror = mirrorchoice['mirror']
+ obj.relative_url = gr.url
+ obj.url = fullurl
+ self._failure(gr, obj)
+
+ def urlgrab(self, url, filename=None, **kwargs):
+ kw = dict(kwargs)
+ kw['filename'] = filename
+ func = 'urlgrab'
+ return self._mirror_try(func, url, kw)
+
+ def urlopen(self, url, **kwargs):
+ kw = dict(kwargs)
+ func = 'urlopen'
+ return self._mirror_try(func, url, kw)
+
+ def urlread(self, url, limit=None, **kwargs):
+ kw = dict(kwargs)
+ kw['limit'] = limit
+ func = 'urlread'
+ return self._mirror_try(func, url, kw)
+
+
+class MGRandomStart(MirrorGroup):
+ """A mirror group that starts at a random mirror in the list.
+
+ This behavior of this class is identical to MirrorGroup, except that
+ it starts at a random location in the mirror list.
+ """
+
+ def __init__(self, grabber, mirrors, **kwargs):
+ """Initialize the object
+
+ The arguments for intialization are the same as for MirrorGroup
+ """
+ MirrorGroup.__init__(self, grabber, mirrors, **kwargs)
+ self._next = random.randrange(len(mirrors))
+
+class MGRandomOrder(MirrorGroup):
+ """A mirror group that uses mirrors in a random order.
+
+ This behavior of this class is identical to MirrorGroup, except that
+ it uses the mirrors in a random order. Note that the order is set at
+ initialization time and fixed thereafter. That is, it does not pick a
+ random mirror after each failure.
+ """
+
+ def __init__(self, grabber, mirrors, **kwargs):
+ """Initialize the object
+
+ The arguments for intialization are the same as for MirrorGroup
+ """
+ MirrorGroup.__init__(self, grabber, mirrors, **kwargs)
+ random.shuffle(self.mirrors)
+
+if __name__ == '__main__':
+ pass
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/progress.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/progress.py
new file mode 100644
index 0000000000..02db524e76
--- /dev/null
+++ b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/progress.py
@@ -0,0 +1,530 @@
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330,
+# Boston, MA 02111-1307 USA
+
+# This file is part of urlgrabber, a high-level cross-protocol url-grabber
+# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
+
+# $Id: progress.py,v 1.7 2005/08/19 21:59:07 mstenner Exp $
+
+import sys
+import time
+import math
+import thread
+
+class BaseMeter:
+ def __init__(self):
+ self.update_period = 0.3 # seconds
+
+ self.filename = None
+ self.url = None
+ self.basename = None
+ self.text = None
+ self.size = None
+ self.start_time = None
+ self.last_amount_read = 0
+ self.last_update_time = None
+ self.re = RateEstimator()
+
+ def start(self, filename=None, url=None, basename=None,
+ size=None, now=None, text=None):
+ self.filename = filename
+ self.url = url
+ self.basename = basename
+ self.text = text
+
+ #size = None ######### TESTING
+ self.size = size
+ if not size is None: self.fsize = format_number(size) + 'B'
+
+ if now is None: now = time.time()
+ self.start_time = now
+ self.re.start(size, now)
+ self.last_amount_read = 0
+ self.last_update_time = now
+ self._do_start(now)
+
+ def _do_start(self, now=None):
+ pass
+
+ def update(self, amount_read, now=None):
+ # for a real gui, you probably want to override and put a call
+ # to your mainloop iteration function here
+ if now is None: now = time.time()
+ if (now >= self.last_update_time + self.update_period) or \
+ not self.last_update_time:
+ self.re.update(amount_read, now)
+ self.last_amount_read = amount_read
+ self.last_update_time = now
+ self._do_update(amount_read, now)
+
+ def _do_update(self, amount_read, now=None):
+ pass
+
+ def end(self, amount_read, now=None):
+ if now is None: now = time.time()
+ self.re.update(amount_read, now)
+ self.last_amount_read = amount_read
+ self.last_update_time = now
+ self._do_end(amount_read, now)
+
+ def _do_end(self, amount_read, now=None):
+ pass
+
+class TextMeter(BaseMeter):
+ def __init__(self, fo=sys.stderr):
+ BaseMeter.__init__(self)
+ self.fo = fo
+
+ def _do_update(self, amount_read, now=None):
+ etime = self.re.elapsed_time()
+ fetime = format_time(etime)
+ fread = format_number(amount_read)
+ #self.size = None
+ if self.text is not None:
+ text = self.text
+ else:
+ text = self.basename
+ if self.size is None:
+ out = '\r%-60.60s %5sB %s ' % \
+ (text, fread, fetime)
+ else:
+ rtime = self.re.remaining_time()
+ frtime = format_time(rtime)
+ frac = self.re.fraction_read()
+ bar = '='*int(25 * frac)
+
+ out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s ETA ' % \
+ (text, frac*100, bar, fread, frtime)
+
+ self.fo.write(out)
+ self.fo.flush()
+
+ def _do_end(self, amount_read, now=None):
+ total_time = format_time(self.re.elapsed_time())
+ total_size = format_number(amount_read)
+ if self.text is not None:
+ text = self.text
+ else:
+ text = self.basename
+ if self.size is None:
+ out = '\r%-60.60s %5sB %s ' % \
+ (text, total_size, total_time)
+ else:
+ bar = '='*25
+ out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s ' % \
+ (text, 100, bar, total_size, total_time)
+ self.fo.write(out + '\n')
+ self.fo.flush()
+
+text_progress_meter = TextMeter
+
+class MultiFileHelper(BaseMeter):
+ def __init__(self, master):
+ BaseMeter.__init__(self)
+ self.master = master
+
+ def _do_start(self, now):
+ self.master.start_meter(self, now)
+
+ def _do_update(self, amount_read, now):
+ # elapsed time since last update
+ self.master.update_meter(self, now)
+
+ def _do_end(self, amount_read, now):
+ self.ftotal_time = format_time(now - self.start_time)
+ self.ftotal_size = format_number(self.last_amount_read)
+ self.master.end_meter(self, now)
+
+ def failure(self, message, now=None):
+ self.master.failure_meter(self, message, now)
+
+ def message(self, message):
+ self.master.message_meter(self, message)
+
+class MultiFileMeter:
+ helperclass = MultiFileHelper
+ def __init__(self):
+ self.meters = []
+ self.in_progress_meters = []
+ self._lock = thread.allocate_lock()
+ self.update_period = 0.3 # seconds
+
+ self.numfiles = None
+ self.finished_files = 0
+ self.failed_files = 0
+ self.open_files = 0
+ self.total_size = None
+ self.failed_size = 0
+ self.start_time = None
+ self.finished_file_size = 0
+ self.last_update_time = None
+ self.re = RateEstimator()
+
+ def start(self, numfiles=None, total_size=None, now=None):
+ if now is None: now = time.time()
+ self.numfiles = numfiles
+ self.finished_files = 0
+ self.failed_files = 0
+ self.open_files = 0
+ self.total_size = total_size
+ self.failed_size = 0
+ self.start_time = now
+ self.finished_file_size = 0
+ self.last_update_time = now
+ self.re.start(total_size, now)
+ self._do_start(now)
+
+ def _do_start(self, now):
+ pass
+
+ def end(self, now=None):
+ if now is None: now = time.time()
+ self._do_end(now)
+
+ def _do_end(self, now):
+ pass
+
+ def lock(self): self._lock.acquire()
+ def unlock(self): self._lock.release()
+
+ ###########################################################
+ # child meter creation and destruction
+ def newMeter(self):
+ newmeter = self.helperclass(self)
+ self.meters.append(newmeter)
+ return newmeter
+
+ def removeMeter(self, meter):
+ self.meters.remove(meter)
+
+ ###########################################################
+ # child functions - these should only be called by helpers
+ def start_meter(self, meter, now):
+ if not meter in self.meters:
+ raise ValueError('attempt to use orphaned meter')
+ self._lock.acquire()
+ try:
+ if not meter in self.in_progress_meters:
+ self.in_progress_meters.append(meter)
+ self.open_files += 1
+ finally:
+ self._lock.release()
+ self._do_start_meter(meter, now)
+
+ def _do_start_meter(self, meter, now):
+ pass
+
+ def update_meter(self, meter, now):
+ if not meter in self.meters:
+ raise ValueError('attempt to use orphaned meter')
+ if (now >= self.last_update_time + self.update_period) or \
+ not self.last_update_time:
+ self.re.update(self._amount_read(), now)
+ self.last_update_time = now
+ self._do_update_meter(meter, now)
+
+ def _do_update_meter(self, meter, now):
+ pass
+
+ def end_meter(self, meter, now):
+ if not meter in self.meters:
+ raise ValueError('attempt to use orphaned meter')
+ self._lock.acquire()
+ try:
+ try: self.in_progress_meters.remove(meter)
+ except ValueError: pass
+ self.open_files -= 1
+ self.finished_files += 1
+ self.finished_file_size += meter.last_amount_read
+ finally:
+ self._lock.release()
+ self._do_end_meter(meter, now)
+
+ def _do_end_meter(self, meter, now):
+ pass
+
+ def failure_meter(self, meter, message, now):
+ if not meter in self.meters:
+ raise ValueError('attempt to use orphaned meter')
+ self._lock.acquire()
+ try:
+ try: self.in_progress_meters.remove(meter)
+ except ValueError: pass
+ self.open_files -= 1
+ self.failed_files += 1
+ if meter.size and self.failed_size is not None:
+ self.failed_size += meter.size
+ else:
+ self.failed_size = None
+ finally:
+ self._lock.release()
+ self._do_failure_meter(meter, message, now)
+
+ def _do_failure_meter(self, meter, message, now):
+ pass
+
+ def message_meter(self, meter, message):
+ pass
+
+ ########################################################
+ # internal functions
+ def _amount_read(self):
+ tot = self.finished_file_size
+ for m in self.in_progress_meters:
+ tot += m.last_amount_read
+ return tot
+
+
+class TextMultiFileMeter(MultiFileMeter):
+ def __init__(self, fo=sys.stderr):
+ self.fo = fo
+ MultiFileMeter.__init__(self)
+
+ # files: ###/### ###% data: ######/###### ###% time: ##:##:##/##:##:##
+ def _do_update_meter(self, meter, now):
+ self._lock.acquire()
+ try:
+ format = "files: %3i/%-3i %3i%% data: %6.6s/%-6.6s %3i%% " \
+ "time: %8.8s/%8.8s"
+ df = self.finished_files
+ tf = self.numfiles or 1
+ pf = 100 * float(df)/tf + 0.49
+ dd = self.re.last_amount_read
+ td = self.total_size
+ pd = 100 * (self.re.fraction_read() or 0) + 0.49
+ dt = self.re.elapsed_time()
+ rt = self.re.remaining_time()
+ if rt is None: tt = None
+ else: tt = dt + rt
+
+ fdd = format_number(dd) + 'B'
+ ftd = format_number(td) + 'B'
+ fdt = format_time(dt, 1)
+ ftt = format_time(tt, 1)
+
+ out = '%-79.79s' % (format % (df, tf, pf, fdd, ftd, pd, fdt, ftt))
+ self.fo.write('\r' + out)
+ self.fo.flush()
+ finally:
+ self._lock.release()
+
+ def _do_end_meter(self, meter, now):
+ self._lock.acquire()
+ try:
+ format = "%-30.30s %6.6s %8.8s %9.9s"
+ fn = meter.basename
+ size = meter.last_amount_read
+ fsize = format_number(size) + 'B'
+ et = meter.re.elapsed_time()
+ fet = format_time(et, 1)
+ frate = format_number(size / et) + 'B/s'
+
+ out = '%-79.79s' % (format % (fn, fsize, fet, frate))
+ self.fo.write('\r' + out + '\n')
+ finally:
+ self._lock.release()
+ self._do_update_meter(meter, now)
+
+ def _do_failure_meter(self, meter, message, now):
+ self._lock.acquire()
+ try:
+ format = "%-30.30s %6.6s %s"
+ fn = meter.basename
+ if type(message) in (type(''), type(u'')):
+ message = message.splitlines()
+ if not message: message = ['']
+ out = '%-79s' % (format % (fn, 'FAILED', message[0] or ''))
+ self.fo.write('\r' + out + '\n')
+ for m in message[1:]: self.fo.write(' ' + m + '\n')
+ self._lock.release()
+ finally:
+ self._do_update_meter(meter, now)
+
+ def message_meter(self, meter, message):
+ self._lock.acquire()
+ try:
+ pass
+ finally:
+ self._lock.release()
+
+ def _do_end(self, now):
+ self._do_update_meter(None, now)
+ self._lock.acquire()
+ try:
+ self.fo.write('\n')
+ self.fo.flush()
+ finally:
+ self._lock.release()
+
+######################################################################
+# support classes and functions
+
+class RateEstimator:
+ def __init__(self, timescale=5.0):
+ self.timescale = timescale
+
+ def start(self, total=None, now=None):
+ if now is None: now = time.time()
+ self.total = total
+ self.start_time = now
+ self.last_update_time = now
+ self.last_amount_read = 0
+ self.ave_rate = None
+
+ def update(self, amount_read, now=None):
+ if now is None: now = time.time()
+ if amount_read == 0:
+ # if we just started this file, all bets are off
+ self.last_update_time = now
+ self.last_amount_read = 0
+ self.ave_rate = None
+ return
+
+ #print 'times', now, self.last_update_time
+ time_diff = now - self.last_update_time
+ read_diff = amount_read - self.last_amount_read
+ self.last_update_time = now
+ self.last_amount_read = amount_read
+ self.ave_rate = self._temporal_rolling_ave(\
+ time_diff, read_diff, self.ave_rate, self.timescale)
+ #print 'results', time_diff, read_diff, self.ave_rate
+
+ #####################################################################
+ # result methods
+ def average_rate(self):
+ "get the average transfer rate (in bytes/second)"
+ return self.ave_rate
+
+ def elapsed_time(self):
+ "the time between the start of the transfer and the most recent update"
+ return self.last_update_time - self.start_time
+
+ def remaining_time(self):
+ "estimated time remaining"
+ if not self.ave_rate or not self.total: return None
+ return (self.total - self.last_amount_read) / self.ave_rate
+
+ def fraction_read(self):
+ """the fraction of the data that has been read
+ (can be None for unknown transfer size)"""
+ if self.total is None: return None
+ elif self.total == 0: return 1.0
+ else: return float(self.last_amount_read)/self.total
+
+ #########################################################################
+ # support methods
+ def _temporal_rolling_ave(self, time_diff, read_diff, last_ave, timescale):
+ """a temporal rolling average performs smooth averaging even when
+ updates come at irregular intervals. This is performed by scaling
+ the "epsilon" according to the time since the last update.
+ Specifically, epsilon = time_diff / timescale
+
+ As a general rule, the average will take on a completely new value
+ after 'timescale' seconds."""
+ epsilon = time_diff / timescale
+ if epsilon > 1: epsilon = 1.0
+ return self._rolling_ave(time_diff, read_diff, last_ave, epsilon)
+
+ def _rolling_ave(self, time_diff, read_diff, last_ave, epsilon):
+ """perform a "rolling average" iteration
+ a rolling average "folds" new data into an existing average with
+ some weight, epsilon. epsilon must be between 0.0 and 1.0 (inclusive)
+ a value of 0.0 means only the old value (initial value) counts,
+ and a value of 1.0 means only the newest value is considered."""
+
+ try:
+ recent_rate = read_diff / time_diff
+ except ZeroDivisionError:
+ recent_rate = None
+ if last_ave is None: return recent_rate
+ elif recent_rate is None: return last_ave
+
+ # at this point, both last_ave and recent_rate are numbers
+ return epsilon * recent_rate + (1 - epsilon) * last_ave
+
+ def _round_remaining_time(self, rt, start_time=15.0):
+ """round the remaining time, depending on its size
+ If rt is between n*start_time and (n+1)*start_time round downward
+ to the nearest multiple of n (for any counting number n).
+ If rt < start_time, round down to the nearest 1.
+ For example (for start_time = 15.0):
+ 2.7 -> 2.0
+ 25.2 -> 25.0
+ 26.4 -> 26.0
+ 35.3 -> 34.0
+ 63.6 -> 60.0
+ """
+
+ if rt < 0: return 0.0
+ shift = int(math.log(rt/start_time)/math.log(2))
+ rt = int(rt)
+ if shift <= 0: return rt
+ return float(int(rt) >> shift << shift)
+
+
+def format_time(seconds, use_hours=0):
+ if seconds is None or seconds < 0:
+ if use_hours: return '--:--:--'
+ else: return '--:--'
+ else:
+ seconds = int(seconds)
+ minutes = seconds / 60
+ seconds = seconds % 60
+ if use_hours:
+ hours = minutes / 60
+ minutes = minutes % 60
+ return '%02i:%02i:%02i' % (hours, minutes, seconds)
+ else:
+ return '%02i:%02i' % (minutes, seconds)
+
+def format_number(number, SI=0, space=' '):
+ """Turn numbers into human-readable metric-like numbers"""
+ symbols = ['', # (none)
+ 'k', # kilo
+ 'M', # mega
+ 'G', # giga
+ 'T', # tera
+ 'P', # peta
+ 'E', # exa
+ 'Z', # zetta
+ 'Y'] # yotta
+
+ if SI: step = 1000.0
+ else: step = 1024.0
+
+ thresh = 999
+ depth = 0
+ max_depth = len(symbols) - 1
+
+ # we want numbers between 0 and thresh, but don't exceed the length
+ # of our list. In that event, the formatting will be screwed up,
+ # but it'll still show the right number.
+ while number > thresh and depth < max_depth:
+ depth = depth + 1
+ number = number / step
+
+ if type(number) == type(1) or type(number) == type(1L):
+ # it's an int or a long, which means it didn't get divided,
+ # which means it's already short enough
+ format = '%i%s%s'
+ elif number < 9.95:
+ # must use 9.95 for proper sizing. For example, 9.99 will be
+ # rounded to 10.0 with the .1f format string (which is too long)
+ format = '%.1f%s%s'
+ else:
+ format = '%.0f%s%s'
+
+ return(format % (float(number or 0), space, symbols[depth]))
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/sslfactory.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/sslfactory.py
new file mode 100644
index 0000000000..07848dac7c
--- /dev/null
+++ b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/sslfactory.py
@@ -0,0 +1,90 @@
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330,
+# Boston, MA 02111-1307 USA
+
+# This file is part of urlgrabber, a high-level cross-protocol url-grabber
+
+import httplib
+import urllib2
+
+try:
+ from M2Crypto import SSL
+ from M2Crypto import httpslib
+ from M2Crypto import m2urllib2
+
+ SSL.Connection.clientPostConnectionCheck = None
+ have_m2crypto = True
+except ImportError:
+ have_m2crypto = False
+
+DEBUG = None
+
+if have_m2crypto:
+
+ class M2SSLFactory:
+
+ def __init__(self, ssl_ca_cert, ssl_context):
+ self.ssl_context = self._get_ssl_context(ssl_ca_cert, ssl_context)
+
+ def _get_ssl_context(self, ssl_ca_cert, ssl_context):
+ """
+ Create an ssl context using the CA cert file or ssl context.
+
+ The CA cert is used first if it was passed as an option. If not,
+ then the supplied ssl context is used. If no ssl context was supplied,
+ None is returned.
+ """
+ if ssl_ca_cert:
+ context = SSL.Context()
+ context.load_verify_locations(ssl_ca_cert)
+ context.set_verify(SSL.verify_none, -1)
+ return context
+ else:
+ return ssl_context
+
+ def create_https_connection(self, host, response_class = None):
+ connection = httplib.HTTPSConnection(host, self.ssl_context)
+ if response_class:
+ connection.response_class = response_class
+ return connection
+
+ def create_opener(self, *handlers):
+ return m2urllib2.build_opener(self.ssl_context, *handlers)
+
+
+class SSLFactory:
+
+ def create_https_connection(self, host, response_class = None):
+ connection = httplib.HTTPSConnection(host)
+ if response_class:
+ connection.response_class = response_class
+ return connection
+
+ def create_opener(self, *handlers):
+ return urllib2.build_opener(*handlers)
+
+
+
+def get_factory(ssl_ca_cert = None, ssl_context = None):
+ """ Return an SSLFactory, based on if M2Crypto is available. """
+ if have_m2crypto:
+ return M2SSLFactory(ssl_ca_cert, ssl_context)
+ else:
+ # Log here if someone provides the args but we don't use them.
+ if ssl_ca_cert or ssl_context:
+ if DEBUG:
+ DEBUG.warning("SSL arguments supplied, but M2Crypto is not available. "
+ "Using Python SSL.")
+ return SSLFactory()