upstream version 2019

author: Erich Eckner <git@eckner.net> 2019-03-21 09:11:08 +0100
committer: Erich Eckner <git@eckner.net> 2019-03-21 09:11:08 +0100
commit: c944bb0374256554698c3190b3470bfae70d21ba (patch)
tree: 762ef167b8ff65d6f046e38c9858507ef04d70ba
parent: 800b09afa97ea885bf07085dd71863ae6dd0a078 (diff)
3 files changed, 539 insertions, 405 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 46f5f74..1fa5c7e 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,7 @@
+# 2019-03-02
+* Refactored code to make it more modular. The MirrorStatus class remains with all of its functions for backwards compatibility but this will either be removed for further refactored in the future.
+* Added `--isos`, `--ipv4` and `--ipv6` options.
+
 # 2017-06-13
 * Added `--score` option.
 * Remove old-style message formatting.
diff --git a/Reflector.py b/Reflector.py
index b81f910..bcbb82f 100644
--- a/Reflector.py
+++ b/Reflector.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (C) 2012, 2013  Xyne
+# Copyright (C) 2012-2019  Xyne
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
@@ -22,6 +22,7 @@ import datetime
 import errno
 import getpass
 import http.client
+import itertools
 import json
 import logging
 import os
@@ -37,475 +38,580 @@ import time
 import urllib.error
 import urllib.request
 
+################################## Constants ###################################
 
+NAME = 'Reflector'
 
-# Generic MirrorStatus Exception
-class MirrorStatusError(Exception):
-  def __init__(self, msg):
-    self.msg = msg
-  def __str__(self):
-    return repr(self.msg)
+URL = 'https://www.archlinux.org/mirrors/status/json/'
+
+DISPLAY_TIME_FORMAT = '%Y-%m-%d %H:%M:%S UTC'
+PARSE_TIME_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
+PARSE_TIME_FORMAT_WITH_USEC = '%Y-%m-%dT%H:%M:%S.%fZ'
+
+DB_SUBPATH = 'core/os/x86_64/core.db'
 
+MIRROR_URL_FORMAT = '{0}{1}/os/{2}'
+MIRRORLIST_ENTRY_FORMAT = "Server = " + MIRROR_URL_FORMAT + "\n"
 
+DEFAULT_CONNECTION_TIMEOUT = 5
+DEFAULT_CACHE_TIMEOUT = 300
+DEFAULT_N_THREADS = 5
+
+SORT_TYPES = {
+  'age'    : 'last server synchronization',
+  'rate'   : 'download rate',
+  'country': 'server\'s location',
+  'score'  : 'MirrorStatus score',
+  'delay'  : 'MirrorStatus delay',
+}
+
+
+################################# IO Functions #################################
 
 def get_cache_file():
-  bname = 'mirrorstatus.json'
-  path = os.getenv('XDG_CACHE_HOME')
-  if path:
-    try:
-      os.makedirs(path, exist_ok=True)
-    # Raised if permissions do not match umask
-    except FileExistsError:
-      pass
-    return os.path.join(path, bname)
-  else:
-    return '/tmp/.{}.{}'.format(getpass.getuser(), bname)
+  '''
+  Get a nearly XDG-compliant cache directory. PyXDG is not used to  avoid the
+  external dependency. It is not fully compliant because it omits the
+  application name, but the mirror status file can be reused by other
+  applications and this stores no other files.
+  '''
+  base_name = 'mirrorstatus.json'
+  cache_dir = os.getenv('XDG_CACHE_HOME', default=os.path.expanduser('~/.cache'))
+  try:
+    os.makedirs(cache_dir, exist_ok=True)
+  # Raised by makedirs if permissions do not match umask
+  except FileExistsError:
+    pass
+  return os.path.join(cache_dir, base_name)
 
 
 
-class MirrorStatus(object):
-  # JSON URI
-  URL = 'https://www.archlinux.org/mirrors/status/json/'
-  # Mirror URL format. Accepts server base URL, repository, and architecture.
-  MIRROR_URL_FORMAT = '{0}{1}/os/{2}'
-  MIRRORLIST_ENTRY_FORMAT = "Server = " + MIRROR_URL_FORMAT + "\n"
-  DISPLAY_TIME_FORMAT = '%Y-%m-%d %H:%M:%S UTC'
-  PARSE_TIME_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
-  # Required for the last_check field, which oddly includes microseconds.
-  PARSE_TIME_FORMAT_WITH_USEC = '%Y-%m-%dT%H:%M:%S.%fZ'
-  # Recognized list sort types and their descriptions.
-  SORT_TYPES = {
-    'age'    : 'last server synchronization',
-    'rate'   : 'download rate',
-    'country': 'server\'s location',
-    'score'  : 'MirrorStatus score',
-    'delay'  : 'MirrorStatus delay',
-  }
-  # Known repositories, i.e. those that should be on each mirror.
-  # Used to replace the "$repo" variable.
-  # TODO
-  # Avoid using a hard-coded list.
-  # See https://bugs.archlinux.org/task/32895
-  REPOSITORIES = (
-    'community',
-    'community-staging',
-    'community-testing',
-    'core',
-    'extra',
-    'gnome-unstable',
-    'kde-unstable',
-    'multilib',
-    'multilib-testing'
-    'staging',
-    'testing'
-  )
+def get_mirrorstatus(
+  connection_timeout=DEFAULT_CONNECTION_TIMEOUT,
+  cache_timeout=DEFAULT_CACHE_TIMEOUT
+):
+  '''
+  Retrieve the mirror status JSON object. The downloaded data will be cached
+  locally and re-used within the cache timeout period. Returns the object and
+  the local cache's modification time.
+  '''
+  cache_path = get_cache_file()
+  try:
+    mtime = os.path.getmtime(cache_path)
+    invalid = (time.time() - mtime) > cache_timeout
+  except FileNotFoundError:
+    mtime = None
+    invalid = True
 
-  # Known system architectures, as used to replace the "$arch" variable.
-  ARCHITECTURES = ['x86_64']
+  try:
+    if invalid:
+      with urllib.request.urlopen(URL, None, connection_timeout) as h:
+        obj = json.loads(h.read().decode())
+      with open(cache_path, 'w') as h:
+        json.dump(obj, h, sort_keys=True, indent=2)
+      mtime = time.time()
+    else:
+      with open(cache_path, 'r') as h:
+        obj = json.load(h)
 
-  # Initialize
-  # refresh_interval:
-  #   The cached list will be replaced after this many seconds have passed.
-  #   0 effectively disables caching.
-  #   Caching is only useful if the object persists, e.g. if it were embedded
-  #   in a server.
-  def __init__(
-    self,
-    refresh_interval=0,
-    verbose=False,
-    connection_timeout=5,
-#     download_timeout=None,
-    cache_timeout=300,
-    min_completion_pct=1.,
-    threads=5
-  ):
-    self.refresh_interval = refresh_interval
+    return obj, mtime
+  except (IOError, urllib.error.URLError, socket.timeout) as e:
+    raise MirrorStatusError(str(e))
 
-    # Last modification time of the json object.
-    self.json_mtime = 0
-    # The parsed JSON object.
-    self.json_obj = {}
-    # Display extra information.
-    self.verbose = verbose
-    # Connection timeout
-    self.connection_timeout = connection_timeout
-    # Download timeout
-#     self.download_timeout = download_timeout
-    # Cache timeout
-    self.cache_timeout = cache_timeout
-    # Minimum completion percent, for filtering mirrors.
-    self.min_completion_pct = min_completion_pct
-    # Threads
-    self.threads = threads
 
 
+################################ Miscellaneous #################################
+
+def get_logger():
+  '''
+  Get the logger used by this module. Use this to be sure that the right logger
+  is used.
+  '''
+  return logging.getLogger(NAME)
 
-  def retrieve(self):
-    """Retrieve the current mirror status JSON data."""
-    self.json_obj = None
-    json_str = None
-    save_json = False
 
-    cache_file = get_cache_file()
-    if self.cache_timeout > 0:
-      save_json = True
-      try:
-        mtime = os.path.getmtime(cache_file)
-        if time.time() - mtime < self.cache_timeout:
-          try:
-            with open(cache_file) as f:
-              self.json_obj = json.load(f)
-            self.json_mtime = mtime
-            save_json = False
-          except IOError as e:
-            raise MirrorStatusError('failed to load cached JSON data ({})'.format(e))
-      except OSError as e:
-        if e.errno != errno.ENOENT:
-          raise MirrorStatusError('failed to get cache file mtime ({})'.format(e))
 
-    if not self.json_obj:
-      try:
-        with urllib.request.urlopen(MirrorStatus.URL, None, self.connection_timeout) as f:
-          json_str = f.read()
-          self.json_obj = json.loads(json_str.decode())
-          self.json_mtime = time.time()
-      except (urllib.error.URLError, socket.timeout) as e:
-        raise MirrorStatusError('failed to retrieve mirror data: ({})'.format(e))
-      except ValueError as e:
-        raise MirrorStatusError('failed to parse retrieved mirror data: ({})'.format(e))
+def format_last_sync(mirrors):
+  '''
+  Parse and format the "last_sync" field.
+  '''
+  for m in mirrors:
+    last_sync = calendar.timegm(time.strptime(m['last_sync'], PARSE_TIME_FORMAT))
+    m.update(last_sync=last_sync)
+    yield m
+
 
+
+def count_countries(mirrors):
+  '''
+  Count the mirrors in each country.
+  '''
+  countries = dict()
+  for m in mirrors:
+    k = (m['country'], m['country_code'])
+    if not any(k):
+      continue
     try:
-      # Remove servers that have not synced, and parse the "last_sync" times for
-      # comparison later.
-      mirrors = self.json_obj['urls']
-      # Filter incomplete mirrors  and mirrors that haven't synced.
-      mirrors = list(
-        m for m in mirrors
-        if m['last_sync']
-        and m['completion_pct'] >= self.min_completion_pct
-      )
-      # Parse 'last_sync' times for future comparison.
-      for mirror in mirrors:
-        mirror['last_sync'] = calendar.timegm(
-          time.strptime(mirror['last_sync'],
-          MirrorStatus.PARSE_TIME_FORMAT)
-        )
-      self.json_obj['urls'] = mirrors
+      countries[k] += 1
     except KeyError:
-      raise MirrorStatusError('failed to parse retrieved mirror data (the format may have changed or there may be a transient error)')
+      countries[k] = 1
+  return countries
 
-    if save_json and json_str:
-      try:
-        with open(cache_file, 'wb') as f:
-          f.write(json_str)
-      except IOError as e:
-        raise MirrorStatusError('failed to cache JSON data ({})'.format(e))
 
 
+################################### Sorting ####################################
 
+def sort(mirrors, by=None, n_threads=DEFAULT_N_THREADS):
+  '''
+  Sort mirrors by different criteria.
+  '''
+  # Ensure that "mirrors" is a list that can be sorted.
+  if not isinstance(mirrors, list):
+    mirrors = list(mirrors)
 
+  if by == 'age':
+    mirrors.sort(key=lambda m: m['last_sync'], reverse=True)
 
-  def get_obj(self):
-    """Return the JSON object, retrieving new data if necessary."""
-    if not self.json_obj \
-    or time.time() > (self.json_mtime + self.refresh_interval):
-      self.retrieve()
+  elif by == 'rate':
+    rates = rate(mirrors, n_threads=n_threads)
+    mirrors = sorted(mirrors, key=lambda m: rates[m['url']], reverse=True)
+
+  else:
+    try:
+      mirrors.sort(key=lambda m: m[by])
+    except KeyError:
+      raise MirrorStatusError('attempted to sort mirrors by unrecognized criterion: "{}"'.format(by))
 
-    return self.json_obj
+  return mirrors
 
 
-  def get_mirrors(self):
-    """Get the mirrors."""
-    return self.get_obj()['urls']
+#################################### Rating ####################################
+
+def rate_rsync(db_url, connection_timeout=DEFAULT_CONNECTION_TIMEOUT):
+  '''
+  Download a database via rsync and return the time and rate of the download.
+  '''
+  rsync_cmd = [
+    'rsync',
+    '-avL', '--no-h', '--no-motd',
+    '--contimeout={}'.format(connection_timeout),
+    db_url
+  ]
+  try:
+    with tempfile.TemporaryDirectory() as tmpdir:
+      t0 = time.time()
+      subprocess.check_call(
+        rsync_cmd + [tmpdir],
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL
+      )
+      dt = time.time() - t0
+      size = os.path.getsize(
+        os.path.join(tmpdir, os.path.basename(DB_SUBPATH))
+      )
+      r = size / dt
+      return dt, r
+  except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError):
+    return None, 0
+
+
+
+def rate_http(db_url, connection_timeout=DEFAULT_CONNECTION_TIMEOUT):
+  '''
+  Download a database via any protocol supported by urlopen and return the time
+  and rate of the download.
+  '''
+  req = urllib.request.Request(url=db_url)
+  try:
+    with urllib.request.urlopen(req, None, connection_timeout) as f:
+      t0 = time.time()
+      size = len(f.read())
+      dt = time.time() - t0
+    r = size / (dt)
+    return dt, r
+  except (OSError, urllib.error.HTTPError, http.client.HTTPException):
+    return None, 0
+
+
+
+def rate(mirrors, n_threads=DEFAULT_N_THREADS, connection_timeout=DEFAULT_CONNECTION_TIMEOUT):
+  '''
+  Rate mirrors by timing the download the core repo's database for each one.
+  '''
+  # Ensure that mirrors is not a generator so that its length can be determined.
+  if not isinstance(mirrors, tuple):
+    mirrors = tuple(mirrors)
+
+  if not mirrors:
+    return None
+
+  # At least 1 thread and not more than the number of mirrors.
+  n_threads = max(1, min(n_threads, len(mirrors)))
+
+  # URL input queue.
+  q_in = queue.Queue()
+  # URL, elapsed time and rate output queue.
+  q_out = queue.Queue()
+
+
+  def worker():
+    while True:
+      # To stop a thread, an integer will be inserted in the input queue. Each
+      # thread will increment it and re-insert it until it equals the
+      # threadcount. After encountering the integer, the thread exits the loop.
+      url = q_in.get()
+
+      if isinstance(url, int):
+        if url < n_threads:
+          q_in.put(url + 1)
+
+      else:
+        db_url = url + DB_SUBPATH
+        scheme = urllib.parse.urlparse(url).scheme
+
+        if scheme == 'rsync':
+          dt, r = rate_rsync(db_url, connection_timeout)
+        else:
+          dt, r = rate_http(db_url, connection_timeout)
+
+        q_out.put((url, dt, r))
+
+      q_in.task_done()
+
+
+  workers = tuple(threading.Thread(target=worker) for _ in range(n_threads))
+  for w in workers:
+    w.daemon = True
+    w.start()
+
+  url_len = max(len(m['url']) for m in mirrors)
+  logger = get_logger()
+  for m in mirrors:
+    url = m['url']
+    logger.info("rating {}".format(url))
+    q_in.put(url)
+
+  # To exit the threads.
+  q_in.put(0)
+  q_in.join()
+
+  header_fmt = '{{:{:d}s}}  {{:>14s}}  {{:>9s}}'.format(url_len)
+  logger.info(header_fmt.format('Server', 'Rate', 'Time'))
+  fmt = '{{:{:d}s}}  {{:8.2f}} KiB/s  {{:7.2f}} s'.format(url_len)
+
+  # Loop over the mirrors just to ensure that we get the rate for each mirror.
+  # The value in the loop does not (necessarily) correspond to the mirror.
+  rates = dict()
+  for _ in mirrors:
+    url, dt, r = q_out.get()
+    kibps = r / 1024.0
+    logger.info(fmt.format(url, kibps, dt))
+    rates[url] = r
+    q_out.task_done()
+
+  return rates
+
+
+
+############################## MirrorStatusError ###############################
+
+class MirrorStatusError(Exception):
+  '''
+  Common base exception raised by this module.
+  '''
+  def __init__(self, msg):
+    self.msg = msg
+  def __str__(self):
+    return repr(self.msg)
+
 
 
 
-  def filter(
+############################## MirrorStatusFilter ##############################
+
+class MirrorStatusFilter():
+
+  def __init__(
     self,
-    mirrors=None,
+    min_completion_pct=1.0,
     countries=None,
-    regexes=None, # TODO: remove
+    protocols=None,
     include=None,
     exclude=None,
     age=None,
-    protocols=None
+    isos=False,
+    ipv4=False,
+    ipv6=False
   ):
-    """Filter using different parameters."""
-    # TODO: remove
-    if regexes:
-#       raise MirrorStatusError('The "regexes" keyword has been deprecated and replaced by "include" and "exclude".')
-      if not include:
-        include = regexes
-      sys.stderr.write('''WARNING: The "regexes" keyword has been deprecated and replaced by "include" and "exclude".
-         Support will be soon removed without further warning.''')
-    if mirrors is None:
-      mirrors = self.get_mirrors()
+    self.min_completion_pct = min_completion_pct
+    self.countries = tuple(c.upper() for c in countries) if countries else None
+    self.protocols = protocols
+    self.include = tuple(re.compile(r) for r in include) if include else None
+    self.exclude = tuple(re.compile(r) for r in exclude) if exclude else None
+    self.age = age
+    self.isos = isos
+    self.ipv4 = ipv4
+    self.ipv6 = ipv6
 
-    t = time.time()
-    n = 0
 
-    # Make country arguments case-insensitive.
-    uc_countries = tuple(c.upper() for c in countries) if countries else None
-    for mirror in mirrors:
-      # Filter by country.
-      if countries \
-      and not ( \
-        mirror['country'].upper() in uc_countries or \
-        mirror['country_code'].upper() in uc_countries \
-      ):
-        continue
-      # Filter by protocol.
-      if protocols and not mirror['protocol'] in protocols:
-        continue
-      # Filter by regex.
-      # TODO: Find a better way to do this.
-      if include:
-        for regex in include:
-          if re.search(regex, mirror['url']):
-            break
-        else:
-          continue
-      if exclude:
-        discard = False
-        for regex in exclude:
-          if re.search(regex, mirror['url']):
-            discard = True
-            break
-        if discard:
-          continue
-      # Filter by hours since last sync.
-      if age and t > (age * 60**2 + mirror['last_sync']):
-        continue
+  def filter_mirrors(self, mirrors):
+    # Filter unsynced mirrors.
+    mirrors = (m for m in mirrors if m['last_sync'])
 
-      # Yield if we're still here.
-      yield mirror
+    # Parse the last sync time.
+    mirrors = format_last_sync(mirrors)
 
+    # Filter by completion "percent" [0-1].
+    mirrors = (m for m in mirrors if m['completion_pct'] >= self.min_completion_pct)
 
+    # Filter by countries.
+    if self.countries:
+      mirrors = (
+        m for m in mirrors
+        if m['country'].upper() in self.countries
+        or m['country_code'].upper() in self.countries
+      )
 
-  def sort(self, mirrors=None, by=None):
-    """Sort using different parameters."""
-    if mirrors is None:
-      mirrors = self.get_mirrors()
-    # Ensure that "mirrors" is a list that can be sorted.
-    if not isinstance(mirrors, list):
-      mirrors = list(mirrors)
+    # Filter by protocols.
+    if self.protocols:
+      mirrors = (m for m in mirrors if m['protocol'] in self.protocols)
 
-    if by == 'age':
-      mirrors.sort(key=lambda m: m['last_sync'], reverse=True)
-    elif by == 'rate':
-      mirrors = self.rate(mirrors)
-    elif by in ('country', 'country_code', 'delay', 'score'):
-      mirrors.sort(key=lambda m: m[by])
-    return mirrors
+    # Filter by include expressions.
+    if self.include:
+      mirrors = (m for m in mirrors if any(r.search(m['url']) for r in self.include))
 
+    # Filter by exclude expressions.
+    if self.exclude:
+      mirrors = (m for m in mirrors if not any(r.search(m['url']) for r in self.exclude))
 
+    # Filter by age. The age is given in hours and converted to seconds. Servers
+    # with a last refresh older than the age are omitted.
+    if self.age and self.age > 0:
+      t = time.time()
+      a = self.age * 60**2
+      mirrors = (m for m in mirrors if (m['last_sync'] + a) >= t)
 
-  # Sort mirrors by download speed. Download speed will be calculated from the
-  # download time of the [core] database from each server.
-  # TODO: Consider ways to improve this.
-  # TODO: Consider the effects of threading (do the threads affect the results
-  #       by competing for bandwidth?)
-  def rate(self, mirrors=None, threads=5):
-    if mirrors is None:
-      mirrors = self.get_mirrors()
-    if not threads:
-      threads = self.threads
-    # Ensure that "mirrors" is a list and not a generator.
-    if not isinstance(mirrors, list):
-      mirrors = list(mirrors)
+    # The following does not work. Only the final iteration affects "mirrors".
+    # TODO: Understand exactly why the code above works but the loop doesn't.
+    #  for field in ('isos', 'ipv4', 'ipv6'):
+    #    if getattr(self, field):
+    #      mirrors = (m for m in mirrors if m[field])
 
-    if not mirrors:
-      logging.warning('no mirrors selected for rating')
-      return mirrors
+    # Filter by ISO hosing.
+    if self.isos:
+      mirrors = (m for m in mirrors if m['isos'])
 
-    # Ensure a sane number of threads.
-    if threads < 1:
-      threads = 1
-    else:
-      threads = min(threads, len(mirrors))
+    # Filter by IPv4 support.
+    if self.ipv4:
+      mirrors = (m for m in mirrors if m['ipv4'])
 
-    rates = {}
+    # Filter by IPv6 support.
+    if self.ipv6:
+      mirrors = (m for m in mirrors if m['ipv6'])
 
-    # URL input queue.Queue
-    q_in = queue.Queue()
-    # URL and rate output queue.Queue
-    q_out = queue.Queue()
-    def worker():
-      while True:
-        url = q_in.get()
-        db_subpath = 'core/os/x86_64/core.db'
-        db_url = url + db_subpath
-        scheme = urllib.parse.urlparse(url).scheme
-        # Leave the rate as 0 if the connection fails.
-        # TODO: Consider more graceful error handling.
-        rate = 0
-        dt = float('NaN')
+    yield from mirrors
 
-        # urllib cannot handle rsync protocol
-        if scheme == 'rsync':
-          rsync_cmd = [
-            'rsync',
-            '-avL', '--no-h', '--no-motd',
-            '--contimeout={}'.format(self.connection_timeout),
-            db_url
-          ]
-          try:
-            with tempfile.TemporaryDirectory() as tmpdir:
-              t0 = time.time()
-              subprocess.check_call(
-                rsync_cmd + [tmpdir],
-                stdout=subprocess.DEVNULL,
-                stderr=subprocess.DEVNULL
-              )
-              dt = time.time() - t0
-              size = os.path.getsize(os.path.join(
-                tmpdir,
-                os.path.basename(db_subpath)
-              ))
-              rate = size / dt
-          except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError):
-            pass
-        else:
-          req = urllib.request.Request(url=db_url)
-          try:
-            t0 = time.time()
-            with urllib.request.urlopen(req, None, self.connection_timeout) as f:
-              size = len(f.read())
-              dt = time.time() - t0
-              rate = size / (dt)
-          except (OSError, urllib.error.HTTPError, http.client.HTTPException):
-            pass
-        q_out.put((url, rate, dt))
-        q_in.task_done()
 
-    # Launch threads
-    for i in range(threads):
-      t = threading.Thread(target=worker)
-      t.daemon = True
-      t.start()
 
+################################## Formatting ##################################
 
-    # Load the input queue.Queue
-    url_len = max(len(m['url']) for m in mirrors)
-    for mirror in mirrors:
-      logging.info("rating {}".format(mirror['url']))
-      q_in.put(mirror['url'])
+def format_mirrorlist(mirror_status, mtime, include_country=False, command=None):
+  if command is None:
+    command = '?'
+  else:
+    command = 'reflector ' + ' '.join(pipes.quote(x) for x in command)
 
-    q_in.join()
+  last_check = mirror_status['last_check']
+  # For some reason the "last_check" field included microseconds.
+  try:
+    parsed_last_check = datetime.datetime.strptime(
+      last_check,
+      PARSE_TIME_FORMAT_WITH_USEC,
+    ).timetuple()
+  except ValueError:
+    parsed_last_check = datetime.datetime.strptime(
+      last_check,
+      PARSE_TIME_FORMAT,
+    ).timetuple()
 
+  width = 80
+  colw = 11
+  header = '# Arch Linux mirrorlist generated by Reflector #'.center(width, '#')
+  border = '#' * len(header)
+  mirrorlist = ''
+  mirrorlist = '{}\n{}\n{}\n'.format(border, header, border) + \
+    '\n' + \
+    '\n'.join(
+      '# {{:<{:d}s}} {{}}'.format(colw).format(k, v) for k, v in (
+        ('With:', command),
+        ('When:', time.strftime(DISPLAY_TIME_FORMAT, time.gmtime())),
+        ('From:', URL),
+        ('Retrieved:', time.strftime(DISPLAY_TIME_FORMAT, time.gmtime(mtime))),
+        ('Last Check:', time.strftime(DISPLAY_TIME_FORMAT, parsed_last_check))
+      )
+    ) + \
+    '\n\n'
 
-    # Get the results
-    # The "in mirrors" loop is just used to ensure that the right number of
-    # items is retrieved.
+  country = None
 
-    # Display some extra data.
-    header_fmt = '{{:{:d}s}}  {{:>14s}}  {{:>9s}}'.format(url_len)
-    logging.info(header_fmt.format('Server', 'Rate', 'Time'))
-    fmt = '{{:{:d}s}}  {{:8.2f}} KiB/s  {{:7.2f}} s'.format(url_len)
+  mirrors = mirror_status['urls']
+  for mirror in mirrors:
+    # Include country tags. This is intended for lists that are sorted by
+    # country.
+    if include_country:
+      c = '{} [{}]'.format(mirror['country'], mirror['country_code'])
+      if c != country:
+        if country:
+          mirrorlist += '\n'
+        mirrorlist += '# {}\n'.format(c)
+        country = c
+    mirrorlist += MIRRORLIST_ENTRY_FORMAT.format(mirror['url'], '$repo', '$arch')
 
-    # Loop over the mirrors just to ensure that we get the rate for each mirror.
-    # The value in the loop does not (necessarily) correspond to the mirror.
-    for _ in mirrors:
-      url, rate, dt = q_out.get()
-      kibps = rate / 1024.0
-      logging.info(fmt.format(url, kibps, dt))
-      rates[url] = rate
-      q_out.task_done()
+  if mirrors:
+    return mirrorlist
+  else:
+    return None
 
 
-    # Sort by rate.
-    rated_mirrors = [m for m in mirrors if rates[m['url']] > 0]
-    rated_mirrors.sort(key=lambda m: rates[m['url']], reverse=True)
 
-    return rated_mirrors + [m for m in mirrors if rates[m['url']] == 0]
 
 
+############################ MirrorStatus Retriever ############################
 
-  def display_time(self, t=None):
-    '''Format a time for display.'''
-    return time.strftime(self.DISPLAY_TIME_FORMAT, t)
+class MirrorStatus():
+  '''
+  This is a legacy class that will likely be removed in the future. It
+  previously held most of this module's functionality until it was refactored
+  into more modular functions. Seemingly pointless code is still used by
+  importers of this module.
+  '''
 
+  # TODO: move these to another module or remove them completely
+  # Related: https://bugs.archlinux.org/task/32895
+  REPOSITORIES = (
+    'community',
+    'community-staging',
+    'community-testing',
+    'core',
+    'extra',
+    'gnome-unstable',
+    'kde-unstable',
+    'multilib',
+    'multilib-testing'
+    'staging',
+    'testing'
+  )
+  # Officially supported system architectures.
+  ARCHITECTURES = ['x86_64']
 
+  MIRROR_URL_FORMAT = MIRROR_URL_FORMAT
+  MIRRORLIST_ENTRY_FORMAT = MIRRORLIST_ENTRY_FORMAT
 
 
-  # Return a Pacman-formatted mirrorlist
-  # TODO: Reconsider the assumption that self.json_obj has been retrieved.
-  def get_mirrorlist(self, mirrors=None, include_country=False, cmd=None):
+  def __init__(
+    self,
+    connection_timeout=DEFAULT_CONNECTION_TIMEOUT,
+    cache_timeout=DEFAULT_CACHE_TIMEOUT,
+    min_completion_pct=1.0,
+    threads=DEFAULT_N_THREADS
+  ):
+    self.connection_timeout = connection_timeout
+    self.cache_timeout = cache_timeout
+    self.min_completion_pct = min_completion_pct
+    self.threads = threads
+
+    self.mirror_status = None
+    self.ms_mtime = 0
+
+
+  def retrieve(self):
+    self.mirror_status, self.ms_mtime = get_mirrorstatus(
+      connection_timeout=self.connection_timeout,
+      cache_timeout=self.cache_timeout
+    )
+
+
+  def get_obj(self):
+    '''
+    Get the JSON mirror status.
+    '''
+    t = time.time()
+    if (t - self.ms_mtime) > self.cache_timeout:
+      self.retrieve()
+    return self.mirror_status
+
+
+  def get_mirrors(self):
+    '''
+    Get the mirror from the mirror status.
+    '''
+    obj = self.get_obj()
+    try:
+      return obj['urls']
+    except KeyError:
+      raise MirrorStatusError('no mirrors detected in mirror status output')
+
+
+
+  def filter(self, mirrors=None, **kwargs):
+    '''
+    Filter mirrors by various criteria.
+    '''
     if mirrors is None:
       mirrors = self.get_mirrors()
-    if cmd is None:
-      cmd = '?'
-    else:
-      cmd = 'reflector ' + ' '.join(pipes.quote(x) for x in cmd)
+    msf = MirrorStatusFilter(min_completion_pct=self.min_completion_pct, **kwargs)
+    yield from msf.filter_mirrors(mirrors)
 
-    last_check = self.json_obj['last_check']
-    # For some reason the "last_check" field included microseconds.
-    try:
-      parsed_last_check = datetime.datetime.strptime(
-        last_check,
-        self.PARSE_TIME_FORMAT_WITH_USEC,
-      ).timetuple()
-    except ValueError:
-      parsed_last_check = datetime.datetime.strptime(
-        last_check,
-        self.PARSE_TIME_FORMAT,
-      ).timetuple()
 
-    width = 80
-    colw = 11
-    header = '# Arch Linux mirrorlist generated by Reflector #'.center(width, '#')
-    border = '#' * len(header)
-    mirrorlist = '{}\n{}\n{}\n'.format(border, header, border) + \
-      '\n' + \
-      '\n'.join(
-        '# {{:<{:d}s}} {{}}'.format(colw).format(k, v) for k, v in (
-          ('With:', cmd),
-          ('When:', self.display_time(time.gmtime())),
-          ('From:', MirrorStatus.URL),
-          ('Retrieved:', self.display_time(time.gmtime(self.json_mtime))),
-          ('Last Check:', self.display_time(parsed_last_check)),
-        )
-      ) + \
-      '\n\n'
 
-    country = None
+  def sort(self, mirrors=None, **kwargs):
+    '''
+    Sort mirrors by various criteria.
+    '''
+    if mirrors is None:
+      mirrors = self.get_mirrors()
+    yield from sort(mirrors, n_threads=self.threads, **kwargs)
+
+
+  def rate(self, mirrors=None, **kwargs):
+    '''
+    Sort mirrors by download speed.
+    '''
+    if mirrors is None:
+      mirrors = self.get_mirrors()
+    yield from sort(mirrors, n_threads=self.threads, by='rate', **kwargs)
 
-    # mirrors may be a generator so "if mirrors" will not work
-    no_mirrors = True
-    for mirror in mirrors:
-      no_mirrors = False
-      # Include country tags. This is intended for lists that are sorted by
-      # country.
-      if include_country:
-        c = '{} [{}]'.format(mirror['country'], mirror['country_code'])
-        if c != country:
-          if country:
-            mirrorlist += '\n'
-          mirrorlist += '# {}\n'.format(c)
-          country = c
-      mirrorlist += MirrorStatus.MIRRORLIST_ENTRY_FORMAT.format(mirror['url'], '$repo', '$arch')
 
-    if no_mirrors:
-      return None
-    else:
-      return mirrorlist
+
+  def display_time(self, t=None):
+    '''Format a time for display.'''
+    return time.strftime(self.DISPLAY_TIME_FORMAT, t)
 
 
 
-  def list_countries(self):
-    countries = dict()
-    for m in self.get_mirrors():
-      k = (m['country'], m['country_code'])
-      try:
-        countries[k] += 1
-      except KeyError:
-        countries[k] = 1
-    return countries
+  def get_mirrorlist(self, mirrors=None, include_country=False, cmd=None):
+    '''
+    Get a Pacman-formatted mirrorlist.
+    '''
+    obj = self.get_obj().copy()
+    if mirrors is not None:
+      if not isinstance(mirrors, list):
+        mirrors = list(mirrors)
+      obj['urls'] = mirrors
+    return format_mirrorlist(obj, self.ms_mtime, include_country=include_country, command=cmd)
 
 
 
+  def list_countries(self):
+    '''
+    List countries along with a server count for each one.
+    '''
+    mirrors = self.get_mirrors()
+    return count_countries(mirrors)
+
 
 
+############################### argparse Actions ###############################
 
 class ListCountries(argparse.Action):
   '''
@@ -523,11 +629,12 @@ class ListCountries(argparse.Action):
 
 
 
-def print_mirror_info(mirrors, time_fmt=MirrorStatus.DISPLAY_TIME_FORMAT):
+def print_mirror_info(mirrors, time_fmt=DISPLAY_TIME_FORMAT):
   '''
   Print information about each mirror to STDOUT.
   '''
   if mirrors:
+    #  mirrors = format_last_sync(mirrors)
     if not isinstance(mirrors, list):
       mirrors = list(mirrors)
     ks = sorted(k for k in mirrors[0].keys() if k != 'url')
@@ -551,8 +658,8 @@ def add_arguments(parser):
   parser = argparse.ArgumentParser(description='retrieve and filter a list of the latest Arch Linux mirrors')
 
   parser.add_argument(
-    '--connection-timeout', type=int, metavar='n', default=5,
-    help='The number of seconds to wait before a connection times out.'
+    '--connection-timeout', type=int, metavar='n', default=DEFAULT_CONNECTION_TIMEOUT,
+    help='The number of seconds to wait before a connection times out. Default: %(default)s'
   )
 
 #   parser.add_argument(
@@ -566,8 +673,8 @@ def add_arguments(parser):
   )
 
   parser.add_argument(
-    '--cache-timeout', type=int, metavar='n', default=300,
-    help='The cache timeout in seconds for the data retrieved from the Arch Linux Mirror Status API. The default is 300 (5 minutes).'
+    '--cache-timeout', type=int, metavar='n', default=DEFAULT_CACHE_TIMEOUT,
+    help='The cache timeout in seconds for the data retrieved from the Arch Linux Mirror Status API. The default is %(default)s.'
   )
 
   parser.add_argument(
@@ -575,15 +682,15 @@ def add_arguments(parser):
     help='Save the mirrorlist to the given path.'
   )
 
-  sort_help = '; '.join('"{}": {}'.format(k, v) for k, v in MirrorStatus.SORT_TYPES.items())
+  sort_help = '; '.join('"{}": {}'.format(k, v) for k, v in SORT_TYPES.items())
   parser.add_argument(
-    '--sort', choices=MirrorStatus.SORT_TYPES,
+    '--sort', choices=SORT_TYPES,
     help='Sort the mirrorlist. {}.'.format(sort_help)
   )
 
   parser.add_argument(
-    '--threads', type=int, metavar='n',
-    help='The number of threads to use when rating mirrors.'
+    '--threads', type=int, metavar='n', default=DEFAULT_N_THREADS,
+    help='The maximum number of threads to use when rating mirrors. Default: %(default)s'
   )
 
   parser.add_argument(
@@ -653,6 +760,21 @@ def add_arguments(parser):
     help='Set the minimum completion percent for the returned mirrors. Check the mirrorstatus webpage for the meaning of this parameter. Default value: %(default)s.'
   )
 
+  filters.add_argument(
+    '--isos', action='store_true',
+    help='Only return mirrors that host ISOs.'
+  )
+
+  filters.add_argument(
+    '--ipv4', action='store_true',
+    help='Only return mirrors that support IPv4.'
+  )
+
+  filters.add_argument(
+    '--ipv6', action='store_true',
+    help='Only return mirrors that support IPv6.'
+  )
+
   return parser
 
 
@@ -680,7 +802,6 @@ def process_options(options, ms=None, mirrors=None):
   '''
   if not ms:
     ms = MirrorStatus(
-      verbose=options.verbose,
       connection_timeout=options.connection_timeout,
 #       download_timeout=options.download_timeout,
       cache_timeout=options.cache_timeout,
@@ -698,20 +819,23 @@ def process_options(options, ms=None, mirrors=None):
     include=options.include,
     exclude=options.exclude,
     age=options.age,
-    protocols=options.protocols
+    protocols=options.protocols,
+    isos=options.isos,
+    ipv4=options.ipv4,
+    ipv6=options.ipv6
   )
 
   if options.latest and options.latest > 0:
     mirrors = ms.sort(mirrors, by='age')
-    mirrors = mirrors[:options.latest]
+    mirrors = itertools.islice(mirrors, options.latest)
 
   if options.score and options.score > 0:
     mirrors = ms.sort(mirrors, by='score')
-    mirrors = mirrors[:options.score]
+    mirrors = itertools.islice(mirrors, options.score)
 
   if options.fastest and options.fastest > 0:
     mirrors = ms.sort(mirrors, by='rate')
-    mirrors = mirrors[:options.fastest]
+    mirrors = itertools.islice(mirrors, options.fastest)
 
   if options.sort and not (options.sort == 'rate' and options.fastest):
     mirrors = ms.sort(mirrors, by=options.sort)
@@ -723,8 +847,6 @@ def process_options(options, ms=None, mirrors=None):
 
 
 
-
-
 def main(args=None, configure_logging=False):
   if args:
     cmd = tuple(args)
@@ -733,17 +855,25 @@ def main(args=None, configure_logging=False):
 
   options = parse_args(args)
 
+  # Configure logging.
+  logger = get_logger()
+
   if configure_logging:
     if options.verbose:
       level = logging.INFO
     else:
       level = logging.WARNING
-    logging.basicConfig(
-      format='[{asctime:s}] {levelname:s}: {message:s}',
+
+    logger.setLevel(level)
+    ch = logging.StreamHandler()
+    formatter = logging.Formatter(
+      fmt='[{asctime:s}] {levelname:s}: {message:s}',
       style='{',
-      datefmt='%Y-%m-%d %H:%M:%S',
-      level=level
+      datefmt='%Y-%m-%d %H:%M:%S'
     )
+    ch.setFormatter(formatter)
+    logger.addHandler(ch)
+
 
   try:
     ms, mirrors = process_options(options)
diff --git a/setup.py b/setup.py
index ca52b36..e06748f 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@ import time
 
 setup(
   name='''Reflector''',
-  version=time.strftime('%Y.%m.%d.%H.%M.%S', time.gmtime(1520114494)),
+  version=time.strftime('%Y.%m.%d.%H.%M.%S', time.gmtime(1551526611)),
   description='''A Python 3 module and script to retrieve and filter the latest Pacman mirror list.''',
   author='''Xyne''',
   author_email='''ac xunilhcra enyx, backwards''',
author	Erich Eckner <git@eckner.net>	2019-03-21 09:11:08 +0100
committer	Erich Eckner <git@eckner.net>	2019-03-21 09:11:08 +0100
commit	c944bb0374256554698c3190b3470bfae70d21ba (patch)
tree	762ef167b8ff65d6f046e38c9858507ef04d70ba
parent	800b09afa97ea885bf07085dd71863ae6dd0a078 (diff)