aboutsummaryrefslogtreecommitdiffstats
path: root/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py')
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py458
1 files changed, 458 insertions, 0 deletions
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py
new file mode 100644
index 0000000000..9664c6b5c5
--- /dev/null
+++ b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py
@@ -0,0 +1,458 @@
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330,
+# Boston, MA 02111-1307 USA
+
+# This file is part of urlgrabber, a high-level cross-protocol url-grabber
+# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
+
+"""Module for downloading files from a pool of mirrors
+
+DESCRIPTION
+
+ This module provides support for downloading files from a pool of
+ mirrors with configurable failover policies. To a large extent, the
+ failover policy is chosen by using different classes derived from
+ the main class, MirrorGroup.
+
+ Instances of MirrorGroup (and cousins) act very much like URLGrabber
+ instances in that they have urlread, urlgrab, and urlopen methods.
+ They can therefore, be used in very similar ways.
+
+ from urlgrabber.grabber import URLGrabber
+ from urlgrabber.mirror import MirrorGroup
+ gr = URLGrabber()
+ mg = MirrorGroup(gr, ['http://foo.com/some/directory/',
+ 'http://bar.org/maybe/somewhere/else/',
+ 'ftp://baz.net/some/other/place/entirely/']
+ mg.urlgrab('relative/path.zip')
+
+ The assumption is that all mirrors are identical AFTER the base urls
+ specified, so that any mirror can be used to fetch any file.
+
+FAILOVER
+
+ The failover mechanism is designed to be customized by subclassing
+ from MirrorGroup to change the details of the behavior. In general,
+ the classes maintain a master mirror list and a "current mirror"
+ index. When a download is initiated, a copy of this list and index
+ is created for that download only. The specific failover policy
+ depends on the class used, and so is documented in the class
+ documentation. Note that ANY behavior of the class can be
+ overridden, so any failover policy at all is possible (although
+ you may need to change the interface in extreme cases).
+
+CUSTOMIZATION
+
+ Most customization of a MirrorGroup object is done at instantiation
+ time (or via subclassing). There are four major types of
+ customization:
+
+ 1) Pass in a custom urlgrabber - The passed in urlgrabber will be
+ used (by default... see #2) for the grabs, so options to it
+ apply for the url-fetching
+
+ 2) Custom mirror list - Mirror lists can simply be a list of
+ stings mirrors (as shown in the example above) but each can
+ also be a dict, allowing for more options. For example, the
+ first mirror in the list above could also have been:
+
+ {'mirror': 'http://foo.com/some/directory/',
+ 'grabber': <a custom grabber to be used for this mirror>,
+ 'kwargs': { <a dict of arguments passed to the grabber> }}
+
+ All mirrors are converted to this format internally. If
+ 'grabber' is omitted, the default grabber will be used. If
+ kwargs are omitted, then (duh) they will not be used.
+
+ 3) Pass keyword arguments when instantiating the mirror group.
+ See, for example, the failure_callback argument.
+
+ 4) Finally, any kwargs passed in for the specific file (to the
+ urlgrab method, for example) will be folded in. The options
+ passed into the grabber's urlXXX methods will override any
+ options specified in a custom mirror dict.
+
+"""
+
+# $Id: mirror.py,v 1.14 2006/02/22 18:26:46 mstenner Exp $
+
+import random
+import thread # needed for locking to make this threadsafe
+
+from grabber import URLGrabError, CallbackObject, DEBUG
+
+try:
+ from i18n import _
+except ImportError, msg:
+ def _(st): return st
+
+class GrabRequest:
+ """This is a dummy class used to hold information about the specific
+ request. For example, a single file. By maintaining this information
+ separately, we can accomplish two things:
+
+ 1) make it a little easier to be threadsafe
+ 2) have request-specific parameters
+ """
+ pass
+
+class MirrorGroup:
+ """Base Mirror class
+
+ Instances of this class are built with a grabber object and a list
+ of mirrors. Then all calls to urlXXX should be passed relative urls.
+ The requested file will be searched for on the first mirror. If the
+ grabber raises an exception (possibly after some retries) then that
+ mirror will be removed from the list, and the next will be attempted.
+ If all mirrors are exhausted, then an exception will be raised.
+
+ MirrorGroup has the following failover policy:
+
+ * downloads begin with the first mirror
+
+ * by default (see default_action below) a failure (after retries)
+ causes it to increment the local AND master indices. Also,
+ the current mirror is removed from the local list (but NOT the
+ master list - the mirror can potentially be used for other
+ files)
+
+ * if the local list is ever exhausted, a URLGrabError will be
+ raised (errno=256, no more mirrors)
+
+ OPTIONS
+
+ In addition to the required arguments "grabber" and "mirrors",
+ MirrorGroup also takes the following optional arguments:
+
+ default_action
+
+ A dict that describes the actions to be taken upon failure
+ (after retries). default_action can contain any of the
+ following keys (shown here with their default values):
+
+ default_action = {'increment': 1,
+ 'increment_master': 1,
+ 'remove': 1,
+ 'remove_master': 0,
+ 'fail': 0}
+
+ In this context, 'increment' means "use the next mirror" and
+ 'remove' means "never use this mirror again". The two
+ 'master' values refer to the instance-level mirror list (used
+ for all files), whereas the non-master values refer to the
+ current download only.
+
+ The 'fail' option will cause immediate failure by re-raising
+ the exception and no further attempts to get the current
+ download.
+
+ This dict can be set at instantiation time,
+ mg = MirrorGroup(grabber, mirrors, default_action={'fail':1})
+ at method-execution time (only applies to current fetch),
+ filename = mg.urlgrab(url, default_action={'increment': 0})
+ or by returning an action dict from the failure_callback
+ return {'fail':0}
+ in increasing precedence.
+
+ If all three of these were done, the net result would be:
+ {'increment': 0, # set in method
+ 'increment_master': 1, # class default
+ 'remove': 1, # class default
+ 'remove_master': 0, # class default
+ 'fail': 0} # set at instantiation, reset
+ # from callback
+
+ failure_callback
+
+ this is a callback that will be called when a mirror "fails",
+ meaning the grabber raises some URLGrabError. If this is a
+ tuple, it is interpreted to be of the form (cb, args, kwargs)
+ where cb is the actual callable object (function, method,
+ etc). Otherwise, it is assumed to be the callable object
+ itself. The callback will be passed a grabber.CallbackObject
+ instance along with args and kwargs (if present). The following
+ attributes are defined withing the instance:
+
+ obj.exception = < exception that was raised >
+ obj.mirror = < the mirror that was tried >
+ obj.relative_url = < url relative to the mirror >
+ obj.url = < full url that failed >
+ # .url is just the combination of .mirror
+ # and .relative_url
+
+ The failure callback can return an action dict, as described
+ above.
+
+ Like default_action, the failure_callback can be set at
+ instantiation time or when the urlXXX method is called. In
+ the latter case, it applies only for that fetch.
+
+ The callback can re-raise the exception quite easily. For
+ example, this is a perfectly adequate callback function:
+
+ def callback(obj): raise obj.exception
+
+ WARNING: do not save the exception object (or the
+ CallbackObject instance). As they contain stack frame
+ references, they can lead to circular references.
+
+ Notes:
+ * The behavior can be customized by deriving and overriding the
+ 'CONFIGURATION METHODS'
+ * The 'grabber' instance is kept as a reference, not copied.
+ Therefore, the grabber instance can be modified externally
+ and changes will take effect immediately.
+ """
+
+ # notes on thread-safety:
+
+ # A GrabRequest should never be shared by multiple threads because
+ # it's never saved inside the MG object and never returned outside it.
+ # therefore, it should be safe to access/modify grabrequest data
+ # without a lock. However, accessing the mirrors and _next attributes
+ # of the MG itself must be done when locked to prevent (for example)
+ # removal of the wrong mirror.
+
+ ##############################################################
+ # CONFIGURATION METHODS - intended to be overridden to
+ # customize behavior
+ def __init__(self, grabber, mirrors, **kwargs):
+ """Initialize the MirrorGroup object.
+
+ REQUIRED ARGUMENTS
+
+ grabber - URLGrabber instance
+ mirrors - a list of mirrors
+
+ OPTIONAL ARGUMENTS
+
+ failure_callback - callback to be used when a mirror fails
+ default_action - dict of failure actions
+
+ See the module-level and class level documentation for more
+ details.
+ """
+
+ # OVERRIDE IDEAS:
+ # shuffle the list to randomize order
+ self.grabber = grabber
+ self.mirrors = self._parse_mirrors(mirrors)
+ self._next = 0
+ self._lock = thread.allocate_lock()
+ self.default_action = None
+ self._process_kwargs(kwargs)
+
+ # if these values are found in **kwargs passed to one of the urlXXX
+ # methods, they will be stripped before getting passed on to the
+ # grabber
+ options = ['default_action', 'failure_callback']
+
+ def _process_kwargs(self, kwargs):
+ self.failure_callback = kwargs.get('failure_callback')
+ self.default_action = kwargs.get('default_action')
+
+ def _parse_mirrors(self, mirrors):
+ parsed_mirrors = []
+ for m in mirrors:
+ if type(m) == type(''): m = {'mirror': m}
+ parsed_mirrors.append(m)
+ return parsed_mirrors
+
+ def _load_gr(self, gr):
+ # OVERRIDE IDEAS:
+ # shuffle gr list
+ self._lock.acquire()
+ gr.mirrors = list(self.mirrors)
+ gr._next = self._next
+ self._lock.release()
+
+ def _get_mirror(self, gr):
+ # OVERRIDE IDEAS:
+ # return a random mirror so that multiple mirrors get used
+ # even without failures.
+ if not gr.mirrors:
+ raise URLGrabError(256, _('No more mirrors to try.'))
+ return gr.mirrors[gr._next]
+
+ def _failure(self, gr, cb_obj):
+ # OVERRIDE IDEAS:
+ # inspect the error - remove=1 for 404, remove=2 for connection
+ # refused, etc. (this can also be done via
+ # the callback)
+ cb = gr.kw.get('failure_callback') or self.failure_callback
+ if cb:
+ if type(cb) == type( () ):
+ cb, args, kwargs = cb
+ else:
+ args, kwargs = (), {}
+ action = cb(cb_obj, *args, **kwargs) or {}
+ else:
+ action = {}
+ # XXXX - decide - there are two ways to do this
+ # the first is action-overriding as a whole - use the entire action
+ # or fall back on module level defaults
+ #action = action or gr.kw.get('default_action') or self.default_action
+ # the other is to fall through for each element in the action dict
+ a = dict(self.default_action or {})
+ a.update(gr.kw.get('default_action', {}))
+ a.update(action)
+ action = a
+ self.increment_mirror(gr, action)
+ if action and action.get('fail', 0): raise
+
+ def increment_mirror(self, gr, action={}):
+ """Tell the mirror object increment the mirror index
+
+ This increments the mirror index, which amounts to telling the
+ mirror object to use a different mirror (for this and future
+ downloads).
+
+ This is a SEMI-public method. It will be called internally,
+ and you may never need to call it. However, it is provided
+ (and is made public) so that the calling program can increment
+ the mirror choice for methods like urlopen. For example, with
+ urlopen, there's no good way for the mirror group to know that
+ an error occurs mid-download (it's already returned and given
+ you the file object).
+
+ remove --- can have several values
+ 0 do not remove the mirror from the list
+ 1 remove the mirror for this download only
+ 2 remove the mirror permanently
+
+ beware of remove=0 as it can lead to infinite loops
+ """
+ badmirror = gr.mirrors[gr._next]
+
+ self._lock.acquire()
+ try:
+ ind = self.mirrors.index(badmirror)
+ except ValueError:
+ pass
+ else:
+ if action.get('remove_master', 0):
+ del self.mirrors[ind]
+ elif self._next == ind and action.get('increment_master', 1):
+ self._next += 1
+ if self._next >= len(self.mirrors): self._next = 0
+ self._lock.release()
+
+ if action.get('remove', 1):
+ del gr.mirrors[gr._next]
+ elif action.get('increment', 1):
+ gr._next += 1
+ if gr._next >= len(gr.mirrors): gr._next = 0
+
+ if DEBUG:
+ grm = [m['mirror'] for m in gr.mirrors]
+ DEBUG.info('GR mirrors: [%s] %i', ' '.join(grm), gr._next)
+ selfm = [m['mirror'] for m in self.mirrors]
+ DEBUG.info('MAIN mirrors: [%s] %i', ' '.join(selfm), self._next)
+
+ #####################################################################
+ # NON-CONFIGURATION METHODS
+ # these methods are designed to be largely workhorse methods that
+ # are not intended to be overridden. That doesn't mean you can't;
+ # if you want to, feel free, but most things can be done by
+ # by overriding the configuration methods :)
+
+ def _join_url(self, base_url, rel_url):
+ if base_url.endswith('/') or rel_url.startswith('/'):
+ return base_url + rel_url
+ else:
+ return base_url + '/' + rel_url
+
+ def _mirror_try(self, func, url, kw):
+ gr = GrabRequest()
+ gr.func = func
+ gr.url = url
+ gr.kw = dict(kw)
+ self._load_gr(gr)
+
+ for k in self.options:
+ try: del kw[k]
+ except KeyError: pass
+
+ while 1:
+ mirrorchoice = self._get_mirror(gr)
+ fullurl = self._join_url(mirrorchoice['mirror'], gr.url)
+ kwargs = dict(mirrorchoice.get('kwargs', {}))
+ kwargs.update(kw)
+ grabber = mirrorchoice.get('grabber') or self.grabber
+ func_ref = getattr(grabber, func)
+ if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl)
+ try:
+ return func_ref( *(fullurl,), **kwargs )
+ except URLGrabError, e:
+ if DEBUG: DEBUG.info('MIRROR: failed')
+ obj = CallbackObject()
+ obj.exception = e
+ obj.mirror = mirrorchoice['mirror']
+ obj.relative_url = gr.url
+ obj.url = fullurl
+ self._failure(gr, obj)
+
+ def urlgrab(self, url, filename=None, **kwargs):
+ kw = dict(kwargs)
+ kw['filename'] = filename
+ func = 'urlgrab'
+ return self._mirror_try(func, url, kw)
+
+ def urlopen(self, url, **kwargs):
+ kw = dict(kwargs)
+ func = 'urlopen'
+ return self._mirror_try(func, url, kw)
+
+ def urlread(self, url, limit=None, **kwargs):
+ kw = dict(kwargs)
+ kw['limit'] = limit
+ func = 'urlread'
+ return self._mirror_try(func, url, kw)
+
+
+class MGRandomStart(MirrorGroup):
+ """A mirror group that starts at a random mirror in the list.
+
+ This behavior of this class is identical to MirrorGroup, except that
+ it starts at a random location in the mirror list.
+ """
+
+ def __init__(self, grabber, mirrors, **kwargs):
+ """Initialize the object
+
+ The arguments for intialization are the same as for MirrorGroup
+ """
+ MirrorGroup.__init__(self, grabber, mirrors, **kwargs)
+ self._next = random.randrange(len(mirrors))
+
+class MGRandomOrder(MirrorGroup):
+ """A mirror group that uses mirrors in a random order.
+
+ This behavior of this class is identical to MirrorGroup, except that
+ it uses the mirrors in a random order. Note that the order is set at
+ initialization time and fixed thereafter. That is, it does not pick a
+ random mirror after each failure.
+ """
+
+ def __init__(self, grabber, mirrors, **kwargs):
+ """Initialize the object
+
+ The arguments for intialization are the same as for MirrorGroup
+ """
+ MirrorGroup.__init__(self, grabber, mirrors, **kwargs)
+ random.shuffle(self.mirrors)
+
+if __name__ == '__main__':
+ pass