aboutsummaryrefslogtreecommitdiff
blob: 7326c86034ca6514ab8c1a5cb99f46725752ed13 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""Mirrorselect 2.x
 Tool for selecting Gentoo source and rsync mirrors.

Copyright 2005-2012 Gentoo Foundation

	Copyright (C) 2005 Colin Kingsley <tercel@gentoo.org>
	Copyright (C) 2008 Zac Medico <zmedico@gentoo.org>
	Copyright (C) 2009 Sebastian Pipping <sebastian@pipping.org>
	Copyright (C) 2009 Christian Ruppert <idl0r@gentoo.org>
	Copyright (C) 2012 Brian Dolbec <dolsen@gentoo.org>

Distributed under the terms of the GNU General Public License v2
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, version 2 of the License.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.

"""

import os

import requests

from mirrorselect.mirrorparser3 import MirrorParser3
from mirrorselect.version import version

USERAGENT = "Mirrorselect-" + version


class Extractor:
    """The Extractor employs a MirrorParser3 object to get a list of valid
    mirrors, and then filters them. Only the mirrors that should be tested,
    based on user input are saved. They will be in the hosts attribute."""

    def __init__(self, list_url, options, output):
        self.output = output
        self.output.print_info("Using url: %s\n" % list_url)
        filters = {}
        for opt in ["country", "region"]:
            value = getattr(options, opt)
            if value is not None:
                filters[opt] = value
                self.output.print_info(f'Limiting test to "{opt}={value}" hosts. \n')
        for opt in ["ftp", "http", "https"]:
            if getattr(options, opt):
                filters["proto"] = opt
                self.output.print_info("Limiting test to %s hosts. \n" % opt)

        self.proxies = {}

        for proxy in ["http_proxy", "https_proxy"]:
            prox = proxy.split("_")[0]
            if options.proxy and prox + ":" in options.proxy:
                self.proxies[prox] = options.proxy
            elif os.getenv(proxy):
                self.proxies[prox] = os.getenv(proxy)

        parser = MirrorParser3()
        self.hosts = []

        self.unfiltered_hosts = self.getlist(parser, list_url)

        self.hosts = self.filter_hosts(filters, self.unfiltered_hosts)

        self.output.write(
            "Extractor(): fetched mirrors,"
            " %s hosts after filtering\n" % len(self.hosts),
            2,
        )

    @staticmethod
    def filter_hosts(filters, hosts):
        """Filter the hosts to the criteria passed in
        Return the filtered list
        """
        if not len(filters):
            return hosts
        filtered = []
        for uri, data in hosts:
            good = True
            for f in filters:
                if data[f] != filters[f]:
                    good = False
                    continue
            if good:
                filtered.append((uri, data))
        return filtered

    def getlist(self, parser, url):
        """
        Uses the supplied parser to get a list of urls.
        Takes a parser object, url, and filering options.
        """

        self.output.write("getlist(): fetching " + url + "\n", 2)

        self.output.print_info("Downloading a list of mirrors...\n")

        response = requests.get(url,
                                timeout=60,
                                proxies=self.proxies,
                                headers={"User-Agent": USERAGENT})
        if response:
            parser.parse(response.text)

        if len(parser.tuples()) == 0:
            self.output.print_err(
                "Could not get mirror list. " "Check your internet connection."
            )

        self.output.write(" Got %d mirrors.\n" % len(parser.tuples()))

        return parser.tuples()