aboutsummaryrefslogtreecommitdiffstats
path: root/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py')
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py458
1 files changed, 0 insertions, 458 deletions
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py
deleted file mode 100644
index 9664c6b5c5..0000000000
--- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py
+++ /dev/null
@@ -1,458 +0,0 @@
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330,
-# Boston, MA 02111-1307 USA
-
-# This file is part of urlgrabber, a high-level cross-protocol url-grabber
-# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
-
-"""Module for downloading files from a pool of mirrors
-
-DESCRIPTION
-
- This module provides support for downloading files from a pool of
- mirrors with configurable failover policies. To a large extent, the
- failover policy is chosen by using different classes derived from
- the main class, MirrorGroup.
-
- Instances of MirrorGroup (and cousins) act very much like URLGrabber
- instances in that they have urlread, urlgrab, and urlopen methods.
- They can therefore, be used in very similar ways.
-
- from urlgrabber.grabber import URLGrabber
- from urlgrabber.mirror import MirrorGroup
- gr = URLGrabber()
- mg = MirrorGroup(gr, ['http://foo.com/some/directory/',
- 'http://bar.org/maybe/somewhere/else/',
- 'ftp://baz.net/some/other/place/entirely/']
- mg.urlgrab('relative/path.zip')
-
- The assumption is that all mirrors are identical AFTER the base urls
- specified, so that any mirror can be used to fetch any file.
-
-FAILOVER
-
- The failover mechanism is designed to be customized by subclassing
- from MirrorGroup to change the details of the behavior. In general,
- the classes maintain a master mirror list and a "current mirror"
- index. When a download is initiated, a copy of this list and index
- is created for that download only. The specific failover policy
- depends on the class used, and so is documented in the class
- documentation. Note that ANY behavior of the class can be
- overridden, so any failover policy at all is possible (although
- you may need to change the interface in extreme cases).
-
-CUSTOMIZATION
-
- Most customization of a MirrorGroup object is done at instantiation
- time (or via subclassing). There are four major types of
- customization:
-
- 1) Pass in a custom urlgrabber - The passed in urlgrabber will be
- used (by default... see #2) for the grabs, so options to it
- apply for the url-fetching
-
- 2) Custom mirror list - Mirror lists can simply be a list of
- stings mirrors (as shown in the example above) but each can
- also be a dict, allowing for more options. For example, the
- first mirror in the list above could also have been:
-
- {'mirror': 'http://foo.com/some/directory/',
- 'grabber': <a custom grabber to be used for this mirror>,
- 'kwargs': { <a dict of arguments passed to the grabber> }}
-
- All mirrors are converted to this format internally. If
- 'grabber' is omitted, the default grabber will be used. If
- kwargs are omitted, then (duh) they will not be used.
-
- 3) Pass keyword arguments when instantiating the mirror group.
- See, for example, the failure_callback argument.
-
- 4) Finally, any kwargs passed in for the specific file (to the
- urlgrab method, for example) will be folded in. The options
- passed into the grabber's urlXXX methods will override any
- options specified in a custom mirror dict.
-
-"""
-
-# $Id: mirror.py,v 1.14 2006/02/22 18:26:46 mstenner Exp $
-
-import random
-import thread # needed for locking to make this threadsafe
-
-from grabber import URLGrabError, CallbackObject, DEBUG
-
-try:
- from i18n import _
-except ImportError, msg:
- def _(st): return st
-
-class GrabRequest:
- """This is a dummy class used to hold information about the specific
- request. For example, a single file. By maintaining this information
- separately, we can accomplish two things:
-
- 1) make it a little easier to be threadsafe
- 2) have request-specific parameters
- """
- pass
-
-class MirrorGroup:
- """Base Mirror class
-
- Instances of this class are built with a grabber object and a list
- of mirrors. Then all calls to urlXXX should be passed relative urls.
- The requested file will be searched for on the first mirror. If the
- grabber raises an exception (possibly after some retries) then that
- mirror will be removed from the list, and the next will be attempted.
- If all mirrors are exhausted, then an exception will be raised.
-
- MirrorGroup has the following failover policy:
-
- * downloads begin with the first mirror
-
- * by default (see default_action below) a failure (after retries)
- causes it to increment the local AND master indices. Also,
- the current mirror is removed from the local list (but NOT the
- master list - the mirror can potentially be used for other
- files)
-
- * if the local list is ever exhausted, a URLGrabError will be
- raised (errno=256, no more mirrors)
-
- OPTIONS
-
- In addition to the required arguments "grabber" and "mirrors",
- MirrorGroup also takes the following optional arguments:
-
- default_action
-
- A dict that describes the actions to be taken upon failure
- (after retries). default_action can contain any of the
- following keys (shown here with their default values):
-
- default_action = {'increment': 1,
- 'increment_master': 1,
- 'remove': 1,
- 'remove_master': 0,
- 'fail': 0}
-
- In this context, 'increment' means "use the next mirror" and
- 'remove' means "never use this mirror again". The two
- 'master' values refer to the instance-level mirror list (used
- for all files), whereas the non-master values refer to the
- current download only.
-
- The 'fail' option will cause immediate failure by re-raising
- the exception and no further attempts to get the current
- download.
-
- This dict can be set at instantiation time,
- mg = MirrorGroup(grabber, mirrors, default_action={'fail':1})
- at method-execution time (only applies to current fetch),
- filename = mg.urlgrab(url, default_action={'increment': 0})
- or by returning an action dict from the failure_callback
- return {'fail':0}
- in increasing precedence.
-
- If all three of these were done, the net result would be:
- {'increment': 0, # set in method
- 'increment_master': 1, # class default
- 'remove': 1, # class default
- 'remove_master': 0, # class default
- 'fail': 0} # set at instantiation, reset
- # from callback
-
- failure_callback
-
- this is a callback that will be called when a mirror "fails",
- meaning the grabber raises some URLGrabError. If this is a
- tuple, it is interpreted to be of the form (cb, args, kwargs)
- where cb is the actual callable object (function, method,
- etc). Otherwise, it is assumed to be the callable object
- itself. The callback will be passed a grabber.CallbackObject
- instance along with args and kwargs (if present). The following
- attributes are defined withing the instance:
-
- obj.exception = < exception that was raised >
- obj.mirror = < the mirror that was tried >
- obj.relative_url = < url relative to the mirror >
- obj.url = < full url that failed >
- # .url is just the combination of .mirror
- # and .relative_url
-
- The failure callback can return an action dict, as described
- above.
-
- Like default_action, the failure_callback can be set at
- instantiation time or when the urlXXX method is called. In
- the latter case, it applies only for that fetch.
-
- The callback can re-raise the exception quite easily. For
- example, this is a perfectly adequate callback function:
-
- def callback(obj): raise obj.exception
-
- WARNING: do not save the exception object (or the
- CallbackObject instance). As they contain stack frame
- references, they can lead to circular references.
-
- Notes:
- * The behavior can be customized by deriving and overriding the
- 'CONFIGURATION METHODS'
- * The 'grabber' instance is kept as a reference, not copied.
- Therefore, the grabber instance can be modified externally
- and changes will take effect immediately.
- """
-
- # notes on thread-safety:
-
- # A GrabRequest should never be shared by multiple threads because
- # it's never saved inside the MG object and never returned outside it.
- # therefore, it should be safe to access/modify grabrequest data
- # without a lock. However, accessing the mirrors and _next attributes
- # of the MG itself must be done when locked to prevent (for example)
- # removal of the wrong mirror.
-
- ##############################################################
- # CONFIGURATION METHODS - intended to be overridden to
- # customize behavior
- def __init__(self, grabber, mirrors, **kwargs):
- """Initialize the MirrorGroup object.
-
- REQUIRED ARGUMENTS
-
- grabber - URLGrabber instance
- mirrors - a list of mirrors
-
- OPTIONAL ARGUMENTS
-
- failure_callback - callback to be used when a mirror fails
- default_action - dict of failure actions
-
- See the module-level and class level documentation for more
- details.
- """
-
- # OVERRIDE IDEAS:
- # shuffle the list to randomize order
- self.grabber = grabber
- self.mirrors = self._parse_mirrors(mirrors)
- self._next = 0
- self._lock = thread.allocate_lock()
- self.default_action = None
- self._process_kwargs(kwargs)
-
- # if these values are found in **kwargs passed to one of the urlXXX
- # methods, they will be stripped before getting passed on to the
- # grabber
- options = ['default_action', 'failure_callback']
-
- def _process_kwargs(self, kwargs):
- self.failure_callback = kwargs.get('failure_callback')
- self.default_action = kwargs.get('default_action')
-
- def _parse_mirrors(self, mirrors):
- parsed_mirrors = []
- for m in mirrors:
- if type(m) == type(''): m = {'mirror': m}
- parsed_mirrors.append(m)
- return parsed_mirrors
-
- def _load_gr(self, gr):
- # OVERRIDE IDEAS:
- # shuffle gr list
- self._lock.acquire()
- gr.mirrors = list(self.mirrors)
- gr._next = self._next
- self._lock.release()
-
- def _get_mirror(self, gr):
- # OVERRIDE IDEAS:
- # return a random mirror so that multiple mirrors get used
- # even without failures.
- if not gr.mirrors:
- raise URLGrabError(256, _('No more mirrors to try.'))
- return gr.mirrors[gr._next]
-
- def _failure(self, gr, cb_obj):
- # OVERRIDE IDEAS:
- # inspect the error - remove=1 for 404, remove=2 for connection
- # refused, etc. (this can also be done via
- # the callback)
- cb = gr.kw.get('failure_callback') or self.failure_callback
- if cb:
- if type(cb) == type( () ):
- cb, args, kwargs = cb
- else:
- args, kwargs = (), {}
- action = cb(cb_obj, *args, **kwargs) or {}
- else:
- action = {}
- # XXXX - decide - there are two ways to do this
- # the first is action-overriding as a whole - use the entire action
- # or fall back on module level defaults
- #action = action or gr.kw.get('default_action') or self.default_action
- # the other is to fall through for each element in the action dict
- a = dict(self.default_action or {})
- a.update(gr.kw.get('default_action', {}))
- a.update(action)
- action = a
- self.increment_mirror(gr, action)
- if action and action.get('fail', 0): raise
-
- def increment_mirror(self, gr, action={}):
- """Tell the mirror object increment the mirror index
-
- This increments the mirror index, which amounts to telling the
- mirror object to use a different mirror (for this and future
- downloads).
-
- This is a SEMI-public method. It will be called internally,
- and you may never need to call it. However, it is provided
- (and is made public) so that the calling program can increment
- the mirror choice for methods like urlopen. For example, with
- urlopen, there's no good way for the mirror group to know that
- an error occurs mid-download (it's already returned and given
- you the file object).
-
- remove --- can have several values
- 0 do not remove the mirror from the list
- 1 remove the mirror for this download only
- 2 remove the mirror permanently
-
- beware of remove=0 as it can lead to infinite loops
- """
- badmirror = gr.mirrors[gr._next]
-
- self._lock.acquire()
- try:
- ind = self.mirrors.index(badmirror)
- except ValueError:
- pass
- else:
- if action.get('remove_master', 0):
- del self.mirrors[ind]
- elif self._next == ind and action.get('increment_master', 1):
- self._next += 1
- if self._next >= len(self.mirrors): self._next = 0
- self._lock.release()
-
- if action.get('remove', 1):
- del gr.mirrors[gr._next]
- elif action.get('increment', 1):
- gr._next += 1
- if gr._next >= len(gr.mirrors): gr._next = 0
-
- if DEBUG:
- grm = [m['mirror'] for m in gr.mirrors]
- DEBUG.info('GR mirrors: [%s] %i', ' '.join(grm), gr._next)
- selfm = [m['mirror'] for m in self.mirrors]
- DEBUG.info('MAIN mirrors: [%s] %i', ' '.join(selfm), self._next)
-
- #####################################################################
- # NON-CONFIGURATION METHODS
- # these methods are designed to be largely workhorse methods that
- # are not intended to be overridden. That doesn't mean you can't;
- # if you want to, feel free, but most things can be done by
- # by overriding the configuration methods :)
-
- def _join_url(self, base_url, rel_url):
- if base_url.endswith('/') or rel_url.startswith('/'):
- return base_url + rel_url
- else:
- return base_url + '/' + rel_url
-
- def _mirror_try(self, func, url, kw):
- gr = GrabRequest()
- gr.func = func
- gr.url = url
- gr.kw = dict(kw)
- self._load_gr(gr)
-
- for k in self.options:
- try: del kw[k]
- except KeyError: pass
-
- while 1:
- mirrorchoice = self._get_mirror(gr)
- fullurl = self._join_url(mirrorchoice['mirror'], gr.url)
- kwargs = dict(mirrorchoice.get('kwargs', {}))
- kwargs.update(kw)
- grabber = mirrorchoice.get('grabber') or self.grabber
- func_ref = getattr(grabber, func)
- if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl)
- try:
- return func_ref( *(fullurl,), **kwargs )
- except URLGrabError, e:
- if DEBUG: DEBUG.info('MIRROR: failed')
- obj = CallbackObject()
- obj.exception = e
- obj.mirror = mirrorchoice['mirror']
- obj.relative_url = gr.url
- obj.url = fullurl
- self._failure(gr, obj)
-
- def urlgrab(self, url, filename=None, **kwargs):
- kw = dict(kwargs)
- kw['filename'] = filename
- func = 'urlgrab'
- return self._mirror_try(func, url, kw)
-
- def urlopen(self, url, **kwargs):
- kw = dict(kwargs)
- func = 'urlopen'
- return self._mirror_try(func, url, kw)
-
- def urlread(self, url, limit=None, **kwargs):
- kw = dict(kwargs)
- kw['limit'] = limit
- func = 'urlread'
- return self._mirror_try(func, url, kw)
-
-
-class MGRandomStart(MirrorGroup):
- """A mirror group that starts at a random mirror in the list.
-
- This behavior of this class is identical to MirrorGroup, except that
- it starts at a random location in the mirror list.
- """
-
- def __init__(self, grabber, mirrors, **kwargs):
- """Initialize the object
-
- The arguments for intialization are the same as for MirrorGroup
- """
- MirrorGroup.__init__(self, grabber, mirrors, **kwargs)
- self._next = random.randrange(len(mirrors))
-
-class MGRandomOrder(MirrorGroup):
- """A mirror group that uses mirrors in a random order.
-
- This behavior of this class is identical to MirrorGroup, except that
- it uses the mirrors in a random order. Note that the order is set at
- initialization time and fixed thereafter. That is, it does not pick a
- random mirror after each failure.
- """
-
- def __init__(self, grabber, mirrors, **kwargs):
- """Initialize the object
-
- The arguments for intialization are the same as for MirrorGroup
- """
- MirrorGroup.__init__(self, grabber, mirrors, **kwargs)
- random.shuffle(self.mirrors)
-
-if __name__ == '__main__':
- pass