#!/usr/bin/python import logging import socket import urllib2 import time import rfc822 import re import tempfile import subprocess import os import urlparse class GardCheck: # Base class which provides some helper functions def __init__(self, url, log=True, timeout=30): # Munge trailing '/' so we can sanely concatenate paths later self.url = url.rstrip('/') if log: self.logger = logging.getLogger() else: self.logger = None # By default urllib2 has no timeout, so we need to set a sane # global maximum socket.setdefaulttimeout(timeout) def log_info(self, msg): if self.logger is not None: self.logger.info(msg) def log_error(self, msg): if self.logger is not None: self.logger.error(msg) def check_file_exists(self, url): ret = True try: f = urllib2.urlopen(url) if len(f.read()) == 0: raise IOError except: ret = False return ret # Converts an rsync URL in the rsync://server/module form to a string # that can be passed to the rsync command (server::module) def _rsync_url_to_cmd(self, url, path): urlp = urlparse.urlparse(self.url) if len(urlp.path) > 1: # strip leading '/' from URL path path = urlp.path[1:] + '/' + path target = '%s::%s' % (urlp.netloc, path) return target # Gets a file over rsync and puts it in a temporary directory, # if specified (assumes URL is the form rsync://server/module # and takes path relative to this) def get_file_rsync(self, path, dir='.'): target = self._rsync_url_to_cmd(self.url, path) retcode = subprocess.call(['rsync', '-aqPz', '--no-motd', '--contimeout=30', target, dir]) if retcode > 0: logging.error('rsync returned %d during gentoo-portage check' % retcode) return False return True def check_rsync_writable(self, path): target = self._rsync_url_to_cmd(self.url, path) # Create a test file file = tempfile.NamedTemporaryFile() file.write('THIS_SHOULD_NOT_WORK'); file.flush() retcode = subprocess.call(['rsync', '-aqP', '--no-motd', '--contimeout=30', file, target]) file.close() return retcode > 0 # Takes the URL to a timestamp.{chk|x} file and returns the # corresponding time stamp in seconds def _get_timestamp_from_url(self, url): try: # TODO: Add a timeout f = urllib2.urlopen(url) date = f.read() f.close() if date is None or len(date) == 0: raise ValueError try: # timestamp.chk format ts = self.timestamp_to_secs(date) except: # timestamp.x format? ts = float(date.split(' ')[0]) except: return None return ts def get_lag(self, url): ts = self._get_timestamp_from_url(url) now = time.time() if ts is None or now < ts: return None return now - ts def humanize_time(self, secs): mins, secs = divmod(secs, 60) hours, mins = divmod(mins, 60) days, hours = divmod(hours, 24) return '%02dd %02dh %02dm %02ds' % (days, hours, mins, secs) def timestamp_to_secs(self, ts): return rfc822.mktime_tz(rfc822.parsedate_tz(ts)) # OPTIONAL: Override or supplement these in child classes if desired. def check_name(self): return re.sub('.*\.|(Check$)', '', str(self.__class__)) def check(self, maxlag): lag = self.lag() if lag is None: self.log_error('Could not get %s timestamp for %s' % (self.check_name(), self.url)) ret = False elif lag > maxlag: self.log_error('%s at %s is lagging (delta is %s)' \ % (self.check_name(), self.url, self.humanize_time(lag))) ret = False else: ret = True return ret # REQUIRED: You must override these in child classes def lag(self): return None # Check distfiles mirrors class DistfilesCheck(GardCheck): def lag(self): scheme = urlparse.urlparse(self.url).scheme if scheme == 'http' or scheme == 'ftp': path = '/distfiles/timestamp.chk' lag = self.get_lag(self.url + path) elif scheme == 'rsync': file = 'timestamp.x' path = 'distfiles/' + file if self.get_file_rsync(path) is False: return None url = 'file://%s/%s' % (os.path.abspath(''), file) lag = self.get_lag(url) os.remove(file) return lag # Check experimental mirrors class ExperimentalCheck(GardCheck): def lag(self): path = '/experimental/.timestamp-experimental.x' return self.get_lag(self.url + path) # Check snapshot mirrors class SnapshotsCheck(GardCheck): def lag(self): path = '/snapshots/.timestamp-snapshots.x' return self.get_lag(self.url + path) # Check releases mirrors class ReleasesCheck(GardCheck): def lag(self): path = '/releases/.test/timestamp.x' return self.get_lag(self.url + path) def check(self, maxlag): # Call the superclass first if exists try: fun = getattr(GardCheck, 'check') except AttributeError: pass else: ret = fun(self, maxlag) # Verify that releases/.test/THIS-FILE-SHOULD-NOT-BE-PUBLIC.txt # is not world readable if self.check_file_exists(self.url+'/releases/.test/THIS-FILE-SHOULD-NOT-BE-PUBLIC.txt'): self.log_error('Releases permission check failed on %s' % self.url) ret = False return ret class PortageCheck(GardCheck): # Takes a URL in rsync://host/module/path form def lag(self): file = 'timestamp.chk' path = 'gentoo-portage/metadata/' + file if self.get_file_rsync(path) is False: return None # This is Linux-specific, but who's going to run this on # Windows anyway? :D url = 'file://%s/%s' % (os.path.abspath(''), file) lag = self.get_lag(url) os.remove(file) return lag