index : reflector32 | |
Archlinux32 fork of reflector | gitolite user |
summaryrefslogtreecommitdiff |
author | Erich Eckner <git@eckner.net> | 2018-06-15 09:19:34 +0200 |
---|---|---|
committer | Erich Eckner <git@eckner.net> | 2018-06-15 09:19:34 +0200 |
commit | d5038733b8d45c155bcfb5ba0035c2f2ca34552e (patch) | |
tree | c56d67940f41dda40b5ad658dd302792d21fa323 |
-rw-r--r-- | CHANGELOG | 32 | ||||
-rw-r--r-- | COPYING | 339 | ||||
-rw-r--r-- | Reflector.py | 782 | ||||
-rwxr-xr-x | reflector | 2 | ||||
-rw-r--r-- | setup.py | 14 |
diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000..46f5f74 --- /dev/null +++ b/CHANGELOG @@ -0,0 +1,32 @@ +# 2017-06-13 +* Added `--score` option. +* Remove old-style message formatting. +* Use logging module instead of custom methods. + +# 2017-01-07 +* case-insensitive country name matching + +# 2014-08-15 +* added `--info` option for displaying info about each mirror +* added support for filtering by country code + +# 2013-12-15 +* added code to time rsync downloads (suggested by Teo Guo Ci, implemented with a slightly different approach) +* `get_mirrorlist` method now accepts an unquoted command list. +* updated string formatting commands (e.g. `'foo %s %s' % (x, y)` to `'foo {} {}'.format(x, y)`) +* changed format of generated mirrorlist header +* fixed help message for `--verbose` (STDOUT -> STDERR) + +# 2013-01-04 +* added `--include` and `--exclude` filter options +* removed `--grep` option (replaced by `--include`) + +# 2012-06-28 +* added JSON data caching to avoid redundant connections to archlinux.org +* added `--connection-timeout` and `--cache-timeout` options + +# 2012-03-24 +* added "delay" and "score" to sort options + +# 2011-03-25 +* rewritten in Python 3 following the deprecation of the perl-xyne-arch package @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/Reflector.py b/Reflector.py new file mode 100644 index 0000000..b4e9efb --- /dev/null +++ b/Reflector.py @@ -0,0 +1,782 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2012, 2013 Xyne +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# (version 2) as published by the Free Software Foundation. +# +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import argparse +import calendar +import datetime +import errno +import getpass +import http.client +import json +import logging +import os +import pipes +import queue +import re +import socket +import subprocess +import sys +import tempfile +import threading +import time +import urllib.error +import urllib.request + + + +# Generic MirrorStatus Exception +class MirrorStatusError(Exception): + def __init__(self, msg): + self.msg = msg + def __str__(self): + return repr(self.msg) + + + +def get_cache_file(): + bname = 'mirrorstatus.json' + path = os.getenv('XDG_CACHE_HOME') + if path: + try: + os.makedirs(path, exist_ok=True) + # Raised if permissions do not match umask + except FileExistsError: + pass + return os.path.join(path, bname) + else: + return '/tmp/.{}.{}'.format(getpass.getuser(), bname) + + + +class MirrorStatus(object): + # JSON URI + URL = 'https://www.archlinux.org/mirrors/status/json/' + # Mirror URL format. Accepts server base URL, repository, and architecture. + MIRROR_URL_FORMAT = '{0}{1}/os/{2}' + MIRRORLIST_ENTRY_FORMAT = "Server = " + MIRROR_URL_FORMAT + "\n" + DISPLAY_TIME_FORMAT = '%Y-%m-%d %H:%M:%S UTC' + PARSE_TIME_FORMAT = '%Y-%m-%dT%H:%M:%SZ' + # Required for the last_check field, which oddly includes microseconds. + PARSE_TIME_FORMAT_WITH_USEC = '%Y-%m-%dT%H:%M:%S.%fZ' + # Recognized list sort types and their descriptions. + SORT_TYPES = { + 'age' : 'last server synchronization', + 'rate' : 'download rate', + 'country': 'server\'s location', + 'score' : 'MirrorStatus score', + 'delay' : 'MirrorStatus delay', + } + # Known repositories, i.e. those that should be on each mirror. + # Used to replace the "$repo" variable. + # TODO + # Avoid using a hard-coded list. + # See https://bugs.archlinux.org/task/32895 + REPOSITORIES = ( + 'community', + 'community-staging', + 'community-testing', + 'core', + 'extra', + 'gnome-unstable', + 'kde-unstable', + 'multilib', + 'multilib-testing' + 'staging', + 'testing' + ) + + # Known system architectures, as used to replace the "$arch" variable. + ARCHITECTURES = ['x86_64'] + + # Initialize + # refresh_interval: + # The cached list will be replaced after this many seconds have passed. + # 0 effectively disables caching. + # Caching is only useful if the object persists, e.g. if it were embedded + # in a server. + def __init__( + self, + refresh_interval=0, + verbose=False, + connection_timeout=5, +# download_timeout=None, + cache_timeout=300, + min_completion_pct=1., + threads=5 + ): + self.refresh_interval = refresh_interval + + # Last modification time of the json object. + self.json_mtime = 0 + # The parsed JSON object. + self.json_obj = {} + # Display extra information. + self.verbose = verbose + # Connection timeout + self.connection_timeout = connection_timeout + # Download timeout +# self.download_timeout = download_timeout + # Cache timeout + self.cache_timeout = cache_timeout + # Minimum completion percent, for filtering mirrors. + self.min_completion_pct = min_completion_pct + # Threads + self.threads = threads + + + + def retrieve(self): + """Retrieve the current mirror status JSON data.""" + self.json_obj = None + json_str = None + save_json = False + + cache_file = get_cache_file() + if self.cache_timeout > 0: + save_json = True + try: + mtime = os.path.getmtime(cache_file) + if time.time() - mtime < self.cache_timeout: + try: + with open(cache_file) as f: + self.json_obj = json.load(f) + self.json_mtime = mtime + save_json = False + except IOError as e: + raise MirrorStatusError('failed to load cached JSON data ({})'.format(e)) + except OSError as e: + if e.errno != errno.ENOENT: + raise MirrorStatusError('failed to get cache file mtime ({})'.format(e)) + + if not self.json_obj: + try: + with urllib.request.urlopen(MirrorStatus.URL, None, self.connection_timeout) as f: + json_str = f.read() + self.json_obj = json.loads(json_str.decode()) + self.json_mtime = time.time() + except (urllib.error.URLError, socket.timeout) as e: + raise MirrorStatusError('failed to retrieve mirror data: ({})'.format(e)) + except ValueError as e: + raise MirrorStatusError('failed to parse retrieved mirror data: ({})'.format(e)) + + try: + # Remove servers that have not synced, and parse the "last_sync" times for + # comparison later. + mirrors = self.json_obj['urls'] + # Filter incomplete mirrors and mirrors that haven't synced. + mirrors = list( + m for m in mirrors + if m['last_sync'] + and m['completion_pct'] >= self.min_completion_pct + ) + # Parse 'last_sync' times for future comparison. + for mirror in mirrors: + mirror['last_sync'] = calendar.timegm( + time.strptime(mirror['last_sync'], + MirrorStatus.PARSE_TIME_FORMAT) + ) + self.json_obj['urls'] = mirrors + except KeyError: + raise MirrorStatusError('failed to parse retrieved mirror data (the format may have changed or there may be a transient error)') + + if save_json and json_str: + try: + with open(cache_file, 'wb') as f: + f.write(json_str) + except IOError as e: + raise MirrorStatusError('failed to cache JSON data ({})'.format(e)) + + + + + + def get_obj(self): + """Return the JSON object, retrieving new data if necessary.""" + if not self.json_obj \ + or time.time() > (self.json_mtime + self.refresh_interval): + self.retrieve() + + return self.json_obj + + + def get_mirrors(self): + """Get the mirrors.""" + return self.get_obj()['urls'] + + + + def filter( + self, + mirrors=None, + countries=None, + regexes=None, # TODO: remove + include=None, + exclude=None, + age=None, + protocols=None + ): + """Filter using different parameters.""" + # TODO: remove + if regexes: +# raise MirrorStatusError('The "regexes" keyword has been deprecated and replaced by "include" and "exclude".') + if not include: + include = regexes + sys.stderr.write('''WARNING: The "regexes" keyword has been deprecated and replaced by "include" and "exclude". + Support will be soon removed without further warning.''') + if mirrors is None: + mirrors = self.get_mirrors() + + t = time.time() + n = 0 + + # Make country arguments case-insensitive. + uc_countries = tuple(c.upper() for c in countries) if countries else None + for mirror in mirrors: + # Filter by country. + if countries \ + and not ( \ + mirror['country'].upper() in uc_countries or \ + mirror['country_code'].upper() in uc_countries \ + ): + continue + # Filter by protocol. + if protocols and not mirror['protocol'] in protocols: + continue + # Filter by regex. + # TODO: Find a better way to do this. + if include: + for regex in include: + if re.search(regex, mirror['url']): + break + else: + continue + if exclude: + discard = False + for regex in exclude: + if re.search(regex, mirror['url']): + discard = True + break + if discard: + continue + # Filter by hours since last sync. + if age and t > (age * 60**2 + mirror['last_sync']): + continue + + # Yield if we're still here. + yield mirror + + + + def sort(self, mirrors=None, by=None): + """Sort using different parameters.""" + if mirrors is None: + mirrors = self.get_mirrors() + # Ensure that "mirrors" is a list that can be sorted. + if not isinstance(mirrors, list): + mirrors = list(mirrors) + + if by == 'age': + mirrors.sort(key=lambda m: m['last_sync'], reverse=True) + elif by == 'rate': + mirrors = self.rate(mirrors) + elif by in ('country', 'country_code', 'delay', 'score'): + mirrors.sort(key=lambda m: m[by]) + return mirrors + + + + # Sort mirrors by download speed. Download speed will be calculated from the + # download time of the [core] database from each server. + # TODO: Consider ways to improve this. + # TODO: Consider the effects of threading (do the threads affect the results + # by competing for bandwidth?) + def rate(self, mirrors=None, threads=5): + if mirrors is None: + mirrors = self.get_mirrors() + if not threads: + threads = self.threads + # Ensure that "mirrors" is a list and not a generator. + if not isinstance(mirrors, list): + mirrors = list(mirrors) + + # Ensure a sane number of threads. + if threads < 1: + threads = 1 + else: + threads = min(threads, len(mirrors)) + + rates = {} + + # URL input queue.Queue + q_in = queue.Queue() + # URL and rate output queue.Queue + q_out = queue.Queue() + def worker(): + while True: + url = q_in.get() + db_subpath = 'core/os/x86_64/core.db' + db_url = url + db_subpath + scheme = urllib.parse.urlparse(url).scheme + # Leave the rate as 0 if the connection fails. + # TODO: Consider more graceful error handling. + rate = 0 + dt = float('NaN') + + # urllib cannot handle rsync protocol + if scheme == 'rsync': + rsync_cmd = [ + 'rsync', + '-avL', '--no-h', '--no-motd', + '--contimeout={}'.format(self.connection_timeout), + db_url + ] + try: + with tempfile.TemporaryDirectory() as tmpdir: + t0 = time.time() + subprocess.check_call( + rsync_cmd + [tmpdir], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL + ) + dt = time.time() - t0 + size = os.path.getsize(os.path.join( + tmpdir, + os.path.basename(db_subpath) + )) + rate = size / dt + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError): + pass + else: + req = urllib.request.Request(url=db_url) + try: + t0 = time.time() + with urllib.request.urlopen(req, None, self.connection_timeout) as f: + size = len(f.read()) + dt = time.time() - t0 + rate = size / (dt) + except (OSError, urllib.error.HTTPError, http.client.HTTPException): + pass + q_out.put((url, rate, dt)) + q_in.task_done() + + # Launch threads + for i in range(threads): + t = threading.Thread(target=worker) + t.daemon = True + t.start() + + + # Load the input queue.Queue + url_len = max(len(m['url']) for m in mirrors) + for mirror in mirrors: + logging.info("rating {}".format(mirror['url'])) + q_in.put(mirror['url']) + + q_in.join() + + + # Get the results + # The "in mirrors" loop is just used to ensure that the right number of + # items is retrieved. + + # Display some extra data. + header_fmt = '{{:{:d}s}} {{:>14s}} {{:>9s}}'.format(url_len) + logging.info(header_fmt.format('Server', 'Rate', 'Time')) + fmt = '{{:{:d}s}} {{:8.2f}} KiB/s {{:7.2f}} s'.format(url_len) + + # Loop over the mirrors just to ensure that we get the rate for each mirror. + # The value in the loop does not (necessarily) correspond to the mirror. + for mirror in mirrors: + url, rate, dt = q_out.get() + kibps = rate / 1024.0 + logging.info(fmt.format(url, kibps, dt)) + rates[url] = rate + q_out.task_done() + + + # Sort by rate. + rated_mirrors = [m for m in mirrors if rates[m['url']] > 0] + rated_mirrors.sort(key=lambda m: rates[m['url']], reverse=True) + + return rated_mirrors + [m for m in mirrors if rates[m['url']] == 0] + + + + def display_time(self, t=None): + '''Format a time for display.''' + return time.strftime(self.DISPLAY_TIME_FORMAT, t) + + + + + # Return a Pacman-formatted mirrorlist + # TODO: Reconsider the assumption that self.json_obj has been retrieved. + def get_mirrorlist(self, mirrors=None, include_country=False, cmd=None): + if mirrors is None: + mirrors = self.get_mirrors() + if cmd is None: + cmd = '?' + else: + cmd = 'reflector ' + ' '.join(pipes.quote(x) for x in cmd) + + last_check = self.json_obj['last_check'] + # For some reason the "last_check" field included microseconds. + try: + parsed_last_check = datetime.datetime.strptime( + last_check, + self.PARSE_TIME_FORMAT_WITH_USEC, + ).timetuple() + except ValueError: + parsed_last_check = datetime.datetime.strptime( + last_check, + self.PARSE_TIME_FORMAT, + ).timetuple() + + width = 80 + colw = 11 + header = '# Arch Linux mirrorlist generated by Reflector #'.center(width, '#') + border = '#' * len(header) + mirrorlist = '{}\n{}\n{}\n'.format(border, header, border) + \ + '\n' + \ + '\n'.join( + '# {{:<{:d}s}} {{}}'.format(colw).format(k, v) for k, v in ( + ('With:', cmd), + ('When:', self.display_time(time.gmtime())), + ('From:', MirrorStatus.URL), + ('Retrieved:', self.display_time(time.gmtime(self.json_mtime))), + ('Last Check:', self.display_time(parsed_last_check)), + ) + ) + \ + '\n\n' + + country = None + + # mirrors may be a generator so "if mirrors" will not work + no_mirrors = True + for mirror in mirrors: + no_mirrors = False + # Include country tags. This is intended for lists that are sorted by + # country. + if include_country: + c = '{} [{}]'.format(mirror['country'], mirror['country_code']) + if c != country: + if country: + mirrorlist += '\n' + mirrorlist += '# {}\n'.format(c) + country = c + mirrorlist += MirrorStatus.MIRRORLIST_ENTRY_FORMAT.format(mirror['url'], '$repo', '$arch') + + if no_mirrors: + return None + else: + return mirrorlist + + + + def list_countries(self): + countries = dict() + for m in self.get_mirrors(): + k = (m['country'], m['country_code']) + try: + countries[k] += 1 + except KeyError: + countries[k] = 1 + return countries + + + + + + +class ListCountries(argparse.Action): + ''' + Action to list countries along with the number of mirrors in each. + ''' + def __call__(self, parser, namespace, values, option_string=None): + ms = MirrorStatus() + countries = ms.list_countries() + w = max(len(c) for c, cc in countries) + n = len(str(max(countries.values()))) + fmt = '{{:{:d}s}} {{}} {{:{:d}d}}'.format(w, n) + for (c, cc), n in sorted(countries.items(), key=lambda x: x[0][0]): + print(fmt.format(c, cc, n)) + sys.exit(0) + + + +def print_mirror_info(mirrors, time_fmt=MirrorStatus.DISPLAY_TIME_FORMAT): + ''' + Print information about each mirror to STDOUT. + ''' + if mirrors: + if not isinstance(mirrors, list): + mirrors = list(mirrors) + ks = sorted(k for k in mirrors[0].keys() if k != 'url') + l = max(len(k) for k in ks) + fmt = '{{:{:d}s}} : {{}}'.format(l) + for m in mirrors: + print('{}$repo/os/$arch'.format(m['url'])) + for k in ks: + v = m[k] + if k == 'last_sync': + v = time.strftime(time_fmt, time.gmtime(v)) + print(fmt.format(k, v)) + print() + + + +def add_arguments(parser): + ''' + Add reflector arguments to the argument parser. + ''' + parser = argparse.ArgumentParser(description='retrieve and filter a list of the latest Arch Linux mirrors') + + parser.add_argument( + '--connection-timeout', type=int, metavar='n', default=5, + help='The number of seconds to wait before a connection times out.' + ) + +# parser.add_argument( +# '--download-timeout', type=int, metavar='n', +# help='The number of seconds to wait before a download times out. The threshold is checked after each chunk is read, so the actual timeout may take longer.' +# ) + + parser.add_argument( + '--list-countries', action=ListCountries, nargs=0, + help='Display a table of the distribution of servers by country.' + ) + + parser.add_argument( + '--cache-timeout', type=int, metavar='n', default=300, + help='The cache timeout in seconds for the data retrieved from the Arch Linux Mirror Status API. The default is 300 (5 minutes).' + ) + + parser.add_argument( + '--save', metavar='<filepath>', + help='Save the mirrorlist to the given path.' + ) + + sort_help = '; '.join('"{}": {}'.format(k, v) for k, v in MirrorStatus.SORT_TYPES.items()) + parser.add_argument( + '--sort', choices=MirrorStatus.SORT_TYPES, + help='Sort the mirrorlist. {}.'.format(sort_help) + ) + + parser.add_argument( + '--threads', type=int, metavar='n', + help='The number of threads to use when rating mirrors.' + ) + + parser.add_argument( + '--verbose', action='store_true', + help='Print extra information to STDERR. Only works with some options.' + ) + + parser.add_argument( + '--info', action='store_true', + help='Print mirror information instead of a mirror list. Filter options apply.' + ) + + + + filters = parser.add_argument_group( + 'filters', + 'The following filters are inclusive, i.e. the returned list will only contain mirrors for which all of the given conditions are met.' + ) + + filters.add_argument( + '-a', '--age', type=float, metavar='n', + help='Only return mirrors that have synchronized in the last n hours. n may be an integer or a decimal number.' + ) + + filters.add_argument( + '-c', '--country', dest='countries', action='append', metavar='<country>', + help='Match one of the given countries (case-sensitive). Use "--list-countries" to see which are available.' + ) + + filters.add_argument( + '-f', '--fastest', type=int, metavar='n', + help='Return the n fastest mirrors that meet the other criteria. Do not use this option without other filtering options.' + ) + + filters.add_argument( + '-i', '--include', metavar='<regex>', action='append', + help='Include servers that match <regex>, where <regex> is a Python regular express.' + ) + + filters.add_argument( + '-x', '--exclude', metavar='<regex>', action='append', + help='Exclude servers that match <regex>, where <regex> is a Python regular express.' + ) + + filters.add_argument( + '-l', '--latest', type=int, metavar='n', + help='Limit the list to the n most recently synchronized servers.' + ) + + filters.add_argument( + '--score', type=int, metavar='n', + help='Limit the list to the n servers with the highest score.' + ) + + filters.add_argument( + '-n', '--number', type=int, metavar='n', + help='Return at most n mirrors.' + ) + + filters.add_argument( + '-p', '--protocol', dest='protocols', action='append', metavar='<protocol>', + help='Match one of the given protocols, e.g. "http", "ftp".' + ) + + filters.add_argument( + '--completion-percent', type=float, metavar='[0-100]', default=100., + help='Set the minimum completion percent for the returned mirrors. Check the mirrorstatus webpage for the meaning of this parameter. Default value: %(default)s.' + ) + + return parser + + + +def parse_args(args=None): + ''' + Parse command-line arguments. + ''' + parser = argparse.ArgumentParser( + description='retrieve and filter a list of the latest Arch Linux mirrors' + ) + parser = add_arguments(parser) + options = parser.parse_args(args) + return options + + + +# Process options +def process_options(options, ms=None, mirrors=None): + ''' + Process options. + + Optionally accepts a MirrorStatus object and/or the mirrors as returned by + the MirrorStatus.get_mirrors method. + ''' + if not ms: + ms = MirrorStatus( + verbose=options.verbose, + connection_timeout=options.connection_timeout, +# download_timeout=options.download_timeout, + cache_timeout=options.cache_timeout, + min_completion_pct=(options.completion_percent/100.), + threads=options.threads + ) + + if mirrors is None: + mirrors = ms.get_mirrors() + + # Filter + mirrors = ms.filter( + mirrors, + countries=options.countries, + include=options.include, + exclude=options.exclude, + age=options.age, + protocols=options.protocols + ) + + if options.latest and options.latest > 0: + mirrors = ms.sort(mirrors, by='age') + mirrors = mirrors[:options.latest] + + if options.score and options.score > 0: + mirrors = ms.sort(mirrors, by='score') + mirrors = mirrors[:options.score] + + if options.fastest and options.fastest > 0: + mirrors = ms.sort(mirrors, by='rate') + mirrors = mirrors[:options.fastest] + + if options.sort and not (options.sort == 'rate' and options.fastest): + mirrors = ms.sort(mirrors, by=options.sort) + + if options.number: + mirrors = list(mirrors)[:options.number] + + return ms, mirrors + + + + + +def main(args=None, configure_logging=False): + if args: + cmd = tuple(args) + else: + cmd = sys.argv[1:] + + options = parse_args(args) + + if configure_logging: + if options.verbose: + level = logging.INFO + else: + level = logging.WARNING + logging.basicConfig( + format='[{asctime:s}] {levelname:s}: {message:s}', + style='{', + datefmt='%Y-%m-%d %H:%M:%S', + level=level + ) + + try: + ms, mirrors = process_options(options) + if mirrors is not None and not isinstance(mirrors, list): + mirrors = list(mirrors) + if not mirrors: + sys.exit('error: no mirrors found') + include_country = options.sort == 'country' + # Convert the generator object to a list for re-use later. + if options.info: + print_mirror_info(mirrors) + return + else: + mirrorlist = ms.get_mirrorlist(mirrors, include_country=include_country, cmd=cmd) + if mirrorlist is None: + sys.exit('error: no mirrors found') + except MirrorStatusError as e: + sys.exit('error: {}\n'.format(e.msg)) + + if options.save: + try: + with open(options.save, 'w') as f: + f.write(mirrorlist) + except IOError as e: + sys.exit('error: {}\n'.format(e.strerror)) + else: + print(mirrorlist) + + + +def run_main(args=None, **kwargs): + try: + main(args, **kwargs) + except KeyboardInterrupt: + pass + + +if __name__ == "__main__": + run_main(configure_logging=True) + diff --git a/reflector b/reflector new file mode 100755 index 0000000..ddfe468 --- /dev/null +++ b/reflector @@ -0,0 +1,2 @@ +#!/bin/bash +python3 -m Reflector "$@"
\ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..af28090 --- /dev/null +++ b/setup.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 + +from distutils.core import setup +import time + +setup( + name='''Reflector''', + version=time.strftime('%Y.%m.%d.%H.%M.%S', time.gmtime(1511733550)), + description='''A Python 3 module and script to retrieve and filter the latest Pacman mirror list.''', + author='''Xyne''', + author_email='''ac xunilhcra enyx, backwards''', + url='''http://xyne.archlinux.ca/projects/reflector''', + py_modules=['''Reflector'''], +) |