diff options
Diffstat (limited to 'scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py')
-rw-r--r-- | scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py | 458 |
1 files changed, 458 insertions, 0 deletions
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py new file mode 100644 index 0000000000..9664c6b5c5 --- /dev/null +++ b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py @@ -0,0 +1,458 @@ +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, +# Boston, MA 02111-1307 USA + +# This file is part of urlgrabber, a high-level cross-protocol url-grabber +# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko + +"""Module for downloading files from a pool of mirrors + +DESCRIPTION + + This module provides support for downloading files from a pool of + mirrors with configurable failover policies. To a large extent, the + failover policy is chosen by using different classes derived from + the main class, MirrorGroup. + + Instances of MirrorGroup (and cousins) act very much like URLGrabber + instances in that they have urlread, urlgrab, and urlopen methods. + They can therefore, be used in very similar ways. + + from urlgrabber.grabber import URLGrabber + from urlgrabber.mirror import MirrorGroup + gr = URLGrabber() + mg = MirrorGroup(gr, ['http://foo.com/some/directory/', + 'http://bar.org/maybe/somewhere/else/', + 'ftp://baz.net/some/other/place/entirely/'] + mg.urlgrab('relative/path.zip') + + The assumption is that all mirrors are identical AFTER the base urls + specified, so that any mirror can be used to fetch any file. + +FAILOVER + + The failover mechanism is designed to be customized by subclassing + from MirrorGroup to change the details of the behavior. In general, + the classes maintain a master mirror list and a "current mirror" + index. When a download is initiated, a copy of this list and index + is created for that download only. The specific failover policy + depends on the class used, and so is documented in the class + documentation. Note that ANY behavior of the class can be + overridden, so any failover policy at all is possible (although + you may need to change the interface in extreme cases). + +CUSTOMIZATION + + Most customization of a MirrorGroup object is done at instantiation + time (or via subclassing). There are four major types of + customization: + + 1) Pass in a custom urlgrabber - The passed in urlgrabber will be + used (by default... see #2) for the grabs, so options to it + apply for the url-fetching + + 2) Custom mirror list - Mirror lists can simply be a list of + stings mirrors (as shown in the example above) but each can + also be a dict, allowing for more options. For example, the + first mirror in the list above could also have been: + + {'mirror': 'http://foo.com/some/directory/', + 'grabber': <a custom grabber to be used for this mirror>, + 'kwargs': { <a dict of arguments passed to the grabber> }} + + All mirrors are converted to this format internally. If + 'grabber' is omitted, the default grabber will be used. If + kwargs are omitted, then (duh) they will not be used. + + 3) Pass keyword arguments when instantiating the mirror group. + See, for example, the failure_callback argument. + + 4) Finally, any kwargs passed in for the specific file (to the + urlgrab method, for example) will be folded in. The options + passed into the grabber's urlXXX methods will override any + options specified in a custom mirror dict. + +""" + +# $Id: mirror.py,v 1.14 2006/02/22 18:26:46 mstenner Exp $ + +import random +import thread # needed for locking to make this threadsafe + +from grabber import URLGrabError, CallbackObject, DEBUG + +try: + from i18n import _ +except ImportError, msg: + def _(st): return st + +class GrabRequest: + """This is a dummy class used to hold information about the specific + request. For example, a single file. By maintaining this information + separately, we can accomplish two things: + + 1) make it a little easier to be threadsafe + 2) have request-specific parameters + """ + pass + +class MirrorGroup: + """Base Mirror class + + Instances of this class are built with a grabber object and a list + of mirrors. Then all calls to urlXXX should be passed relative urls. + The requested file will be searched for on the first mirror. If the + grabber raises an exception (possibly after some retries) then that + mirror will be removed from the list, and the next will be attempted. + If all mirrors are exhausted, then an exception will be raised. + + MirrorGroup has the following failover policy: + + * downloads begin with the first mirror + + * by default (see default_action below) a failure (after retries) + causes it to increment the local AND master indices. Also, + the current mirror is removed from the local list (but NOT the + master list - the mirror can potentially be used for other + files) + + * if the local list is ever exhausted, a URLGrabError will be + raised (errno=256, no more mirrors) + + OPTIONS + + In addition to the required arguments "grabber" and "mirrors", + MirrorGroup also takes the following optional arguments: + + default_action + + A dict that describes the actions to be taken upon failure + (after retries). default_action can contain any of the + following keys (shown here with their default values): + + default_action = {'increment': 1, + 'increment_master': 1, + 'remove': 1, + 'remove_master': 0, + 'fail': 0} + + In this context, 'increment' means "use the next mirror" and + 'remove' means "never use this mirror again". The two + 'master' values refer to the instance-level mirror list (used + for all files), whereas the non-master values refer to the + current download only. + + The 'fail' option will cause immediate failure by re-raising + the exception and no further attempts to get the current + download. + + This dict can be set at instantiation time, + mg = MirrorGroup(grabber, mirrors, default_action={'fail':1}) + at method-execution time (only applies to current fetch), + filename = mg.urlgrab(url, default_action={'increment': 0}) + or by returning an action dict from the failure_callback + return {'fail':0} + in increasing precedence. + + If all three of these were done, the net result would be: + {'increment': 0, # set in method + 'increment_master': 1, # class default + 'remove': 1, # class default + 'remove_master': 0, # class default + 'fail': 0} # set at instantiation, reset + # from callback + + failure_callback + + this is a callback that will be called when a mirror "fails", + meaning the grabber raises some URLGrabError. If this is a + tuple, it is interpreted to be of the form (cb, args, kwargs) + where cb is the actual callable object (function, method, + etc). Otherwise, it is assumed to be the callable object + itself. The callback will be passed a grabber.CallbackObject + instance along with args and kwargs (if present). The following + attributes are defined withing the instance: + + obj.exception = < exception that was raised > + obj.mirror = < the mirror that was tried > + obj.relative_url = < url relative to the mirror > + obj.url = < full url that failed > + # .url is just the combination of .mirror + # and .relative_url + + The failure callback can return an action dict, as described + above. + + Like default_action, the failure_callback can be set at + instantiation time or when the urlXXX method is called. In + the latter case, it applies only for that fetch. + + The callback can re-raise the exception quite easily. For + example, this is a perfectly adequate callback function: + + def callback(obj): raise obj.exception + + WARNING: do not save the exception object (or the + CallbackObject instance). As they contain stack frame + references, they can lead to circular references. + + Notes: + * The behavior can be customized by deriving and overriding the + 'CONFIGURATION METHODS' + * The 'grabber' instance is kept as a reference, not copied. + Therefore, the grabber instance can be modified externally + and changes will take effect immediately. + """ + + # notes on thread-safety: + + # A GrabRequest should never be shared by multiple threads because + # it's never saved inside the MG object and never returned outside it. + # therefore, it should be safe to access/modify grabrequest data + # without a lock. However, accessing the mirrors and _next attributes + # of the MG itself must be done when locked to prevent (for example) + # removal of the wrong mirror. + + ############################################################## + # CONFIGURATION METHODS - intended to be overridden to + # customize behavior + def __init__(self, grabber, mirrors, **kwargs): + """Initialize the MirrorGroup object. + + REQUIRED ARGUMENTS + + grabber - URLGrabber instance + mirrors - a list of mirrors + + OPTIONAL ARGUMENTS + + failure_callback - callback to be used when a mirror fails + default_action - dict of failure actions + + See the module-level and class level documentation for more + details. + """ + + # OVERRIDE IDEAS: + # shuffle the list to randomize order + self.grabber = grabber + self.mirrors = self._parse_mirrors(mirrors) + self._next = 0 + self._lock = thread.allocate_lock() + self.default_action = None + self._process_kwargs(kwargs) + + # if these values are found in **kwargs passed to one of the urlXXX + # methods, they will be stripped before getting passed on to the + # grabber + options = ['default_action', 'failure_callback'] + + def _process_kwargs(self, kwargs): + self.failure_callback = kwargs.get('failure_callback') + self.default_action = kwargs.get('default_action') + + def _parse_mirrors(self, mirrors): + parsed_mirrors = [] + for m in mirrors: + if type(m) == type(''): m = {'mirror': m} + parsed_mirrors.append(m) + return parsed_mirrors + + def _load_gr(self, gr): + # OVERRIDE IDEAS: + # shuffle gr list + self._lock.acquire() + gr.mirrors = list(self.mirrors) + gr._next = self._next + self._lock.release() + + def _get_mirror(self, gr): + # OVERRIDE IDEAS: + # return a random mirror so that multiple mirrors get used + # even without failures. + if not gr.mirrors: + raise URLGrabError(256, _('No more mirrors to try.')) + return gr.mirrors[gr._next] + + def _failure(self, gr, cb_obj): + # OVERRIDE IDEAS: + # inspect the error - remove=1 for 404, remove=2 for connection + # refused, etc. (this can also be done via + # the callback) + cb = gr.kw.get('failure_callback') or self.failure_callback + if cb: + if type(cb) == type( () ): + cb, args, kwargs = cb + else: + args, kwargs = (), {} + action = cb(cb_obj, *args, **kwargs) or {} + else: + action = {} + # XXXX - decide - there are two ways to do this + # the first is action-overriding as a whole - use the entire action + # or fall back on module level defaults + #action = action or gr.kw.get('default_action') or self.default_action + # the other is to fall through for each element in the action dict + a = dict(self.default_action or {}) + a.update(gr.kw.get('default_action', {})) + a.update(action) + action = a + self.increment_mirror(gr, action) + if action and action.get('fail', 0): raise + + def increment_mirror(self, gr, action={}): + """Tell the mirror object increment the mirror index + + This increments the mirror index, which amounts to telling the + mirror object to use a different mirror (for this and future + downloads). + + This is a SEMI-public method. It will be called internally, + and you may never need to call it. However, it is provided + (and is made public) so that the calling program can increment + the mirror choice for methods like urlopen. For example, with + urlopen, there's no good way for the mirror group to know that + an error occurs mid-download (it's already returned and given + you the file object). + + remove --- can have several values + 0 do not remove the mirror from the list + 1 remove the mirror for this download only + 2 remove the mirror permanently + + beware of remove=0 as it can lead to infinite loops + """ + badmirror = gr.mirrors[gr._next] + + self._lock.acquire() + try: + ind = self.mirrors.index(badmirror) + except ValueError: + pass + else: + if action.get('remove_master', 0): + del self.mirrors[ind] + elif self._next == ind and action.get('increment_master', 1): + self._next += 1 + if self._next >= len(self.mirrors): self._next = 0 + self._lock.release() + + if action.get('remove', 1): + del gr.mirrors[gr._next] + elif action.get('increment', 1): + gr._next += 1 + if gr._next >= len(gr.mirrors): gr._next = 0 + + if DEBUG: + grm = [m['mirror'] for m in gr.mirrors] + DEBUG.info('GR mirrors: [%s] %i', ' '.join(grm), gr._next) + selfm = [m['mirror'] for m in self.mirrors] + DEBUG.info('MAIN mirrors: [%s] %i', ' '.join(selfm), self._next) + + ##################################################################### + # NON-CONFIGURATION METHODS + # these methods are designed to be largely workhorse methods that + # are not intended to be overridden. That doesn't mean you can't; + # if you want to, feel free, but most things can be done by + # by overriding the configuration methods :) + + def _join_url(self, base_url, rel_url): + if base_url.endswith('/') or rel_url.startswith('/'): + return base_url + rel_url + else: + return base_url + '/' + rel_url + + def _mirror_try(self, func, url, kw): + gr = GrabRequest() + gr.func = func + gr.url = url + gr.kw = dict(kw) + self._load_gr(gr) + + for k in self.options: + try: del kw[k] + except KeyError: pass + + while 1: + mirrorchoice = self._get_mirror(gr) + fullurl = self._join_url(mirrorchoice['mirror'], gr.url) + kwargs = dict(mirrorchoice.get('kwargs', {})) + kwargs.update(kw) + grabber = mirrorchoice.get('grabber') or self.grabber + func_ref = getattr(grabber, func) + if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl) + try: + return func_ref( *(fullurl,), **kwargs ) + except URLGrabError, e: + if DEBUG: DEBUG.info('MIRROR: failed') + obj = CallbackObject() + obj.exception = e + obj.mirror = mirrorchoice['mirror'] + obj.relative_url = gr.url + obj.url = fullurl + self._failure(gr, obj) + + def urlgrab(self, url, filename=None, **kwargs): + kw = dict(kwargs) + kw['filename'] = filename + func = 'urlgrab' + return self._mirror_try(func, url, kw) + + def urlopen(self, url, **kwargs): + kw = dict(kwargs) + func = 'urlopen' + return self._mirror_try(func, url, kw) + + def urlread(self, url, limit=None, **kwargs): + kw = dict(kwargs) + kw['limit'] = limit + func = 'urlread' + return self._mirror_try(func, url, kw) + + +class MGRandomStart(MirrorGroup): + """A mirror group that starts at a random mirror in the list. + + This behavior of this class is identical to MirrorGroup, except that + it starts at a random location in the mirror list. + """ + + def __init__(self, grabber, mirrors, **kwargs): + """Initialize the object + + The arguments for intialization are the same as for MirrorGroup + """ + MirrorGroup.__init__(self, grabber, mirrors, **kwargs) + self._next = random.randrange(len(mirrors)) + +class MGRandomOrder(MirrorGroup): + """A mirror group that uses mirrors in a random order. + + This behavior of this class is identical to MirrorGroup, except that + it uses the mirrors in a random order. Note that the order is set at + initialization time and fixed thereafter. That is, it does not pick a + random mirror after each failure. + """ + + def __init__(self, grabber, mirrors, **kwargs): + """Initialize the object + + The arguments for intialization are the same as for MirrorGroup + """ + MirrorGroup.__init__(self, grabber, mirrors, **kwargs) + random.shuffle(self.mirrors) + +if __name__ == '__main__': + pass |