import re
import urlparse
from cherrypy.lib.tidy import html_break

from web.lib.links import viewcvs_link, \
                          bugzilla_bug_link, \
                          ciavc_link

# We use short variable names!
# pylint: disable-msg=C0103

re_author = re.compile(r' 20[01]\d; ([^<]+)? *<([^@> ]+)@[^>]')
re_author2 = re.compile(r' <?([^<@ ]+)@')
def extract_changelog_entry_author(line):
    """From the first line of a changelog entry,
        extract the author name and userid"""
    authorname = authorid = None
    authorsearch = re_author.search(line)
    if authorsearch is not None:
        if authorsearch.group(1):
            authorname = authorsearch.group(1).strip()
        if authorsearch.group(2):
            authorid = authorsearch.group(2).strip()
    else:
        authorname = 'Unknown'
        authorid = ''
        # try harder to find an username only
        authorsearch = re_author2.search(line)
        if authorsearch is not None and authorsearch.group(1):
            authorid = authorsearch.group(1).strip()
    return (authorname, authorid)

def _single_pass_re_loop(reo, callback, instr):
    """For every match of the given regex, replace the entire match
    with the string given by callback.
    Callback takes the re.match object."""
    for m in reo.finditer(instr):
        #while True:
        #m = reo.search(instr)
        if m is None:
            break
        repl = callback(m)
        instr = instr.replace(m.group(0), repl)
    return instr

re_email1 = re.compile(r'<([^@ ]+)@gentoo.org>')
re_email2 = re.compile(r'([^@ ]+)@gentoo.org')
re_file = re.compile(r'([\+-]?)(\S+)([:,]|[:,]$)')
re_bugid = re.compile(r'([Bb][uU][gG]\s+?#?)(\d+)')
re_url_base = '(https?://[^\s/)>]+(?:/[\S]+)?)'
re_url = re.compile("([\s<(]*)"+re_url_base+"([\s>)\"']+?.?|$)?")
re_url_notend = re.compile(r'[\s.)>\'"]+$')
def _pretty_changelog_pass1(cat, pn, changelog):
    """Changelog prettification, pass1: replace text with markers"""

    changelog = changelog.strip()
    changelog_lines = changelog.splitlines()
    i = 0
    while changelog_lines is not None and \
            len(changelog_lines) > i and \
            changelog_lines[i] is not None and \
            changelog_lines[i].startswith('*'):
        i += 1
    seen_files = False
    seen_author = False
    authorname = None
    authorid = None
    while True and len(changelog_lines) > i:
        oldline = changelog_lines[i]
        line = oldline.split()
        newline = []
        if not seen_author or not seen_files:
            for w in line:
                if not seen_author and '@gentoo.org' in w:
                    (authorname, authorid) = \
                            extract_changelog_entry_author(oldline)
                    if authorid == '':
                        print 'Bad changelog entry for %s/%s = "%r"' \
                                % (cat, pn, changelog)
                    w = re_email1.sub('__CIA_VC__\\1__/CIA_VC__', w)
                    w = re_email2.sub(' __CIA_VC__\\1__/CIA_VC__', w)
                    seen_author = True
                elif not seen_files and seen_author:
                    w = re_file.sub('\\1__FILE__\\2__/FILE__\\3', w)
                newline.append(w)
            changelog_lines[i] = ' '.join(newline)
        else:
            # re.IGNORECASE does not work on Unicode strings in 2.4!
            newline = oldline
            def bug_markup(m):
                return '%s__BUG__%s__/BUG__' % (m.group(1), m.group(2))
            newline = _single_pass_re_loop(re_bugid, bug_markup, newline)
            def url_markup(m):
                prefix = m.group(1)
                url = m.group(2)
                suffix = m.group(3)
                if prefix is None:
                    prefix = ''
                if suffix is None:
                    suffix = ''
                extra_suffix = re_url_notend.search(url)
                if extra_suffix:
                    extra_suffix = extra_suffix.group(0)
                    suffix = extra_suffix + suffix
                    url = url[0:-len(extra_suffix)]
                return '%s__URL__%s__/URL__%s' % (prefix, url, suffix)
            newline = _single_pass_re_loop(re_url, url_markup, newline)
            changelog_lines[i] = newline
        if oldline.endswith(':'):
            seen_files = True
        i += 1
    changelog = "\n".join(changelog_lines)
    changelog.strip()
    if len(changelog) == 0:
        changelog = "No changelog entry available"
    return (changelog, authorname, authorid)

def _pretty_changelog_pass2(changelog):
    """Now convert remaining stuff to be HTML. This catches all
        lurking entities as well as \\n"""

    changelog = html_break(changelog)
    return changelog

re_m_ciavc = re.compile(r'__CIA_VC__(\S+)__/CIA_VC__ ?')
def _pretty_changelog_pass3(changelog):
    """Convert author markup to CIA.vc links"""

    def markup(m):
        user = m.group(1)
        url = ciavc_link(user)
        return '(<a href="%s">%s</a>) ' % (url, user)
    changelog = _single_pass_re_loop(re_m_ciavc, markup, changelog)
    return changelog

re_m_file = re.compile(r'__FILE__(\S+)__/FILE__')
def _pretty_changelog_pass4(cat, pn, changelog):
    """Convert any file markup entries to Gentoo ViewCVS links"""

    def markup(m):
        filename = m.group(1)
        relpath = '%s/%s/%s' % (cat, pn, filename)
        url = viewcvs_link(relpath)
        return '<a href="%s">%s</a>' % (url, filename)
    changelog = _single_pass_re_loop(re_m_file, markup, changelog)
    return changelog

re_m_bug = re.compile(r'__BUG__(\d+)__/BUG__')
def _pretty_changelog_pass5(changelog):
    """Convert any bug id markup to bugzilla links"""

    def markup(m):
        bugid = m.group(1)
        url = bugzilla_bug_link(int(bugid))
        return '<a href="%s">%s</a>' % (url, bugid)
    changelog = _single_pass_re_loop(re_m_bug, markup, changelog)
    return changelog

re_m_url = re.compile(r'__URL__(\S+)__/URL__')
def _pretty_changelog_pass6(changelog):
    """Convert any URL markup to real links"""

    def markup(m):
        group = m.group(1)
        url = urlparse.urlparse(group)
        if not url.scheme:
            url = 'http://' + m.group(1)
        else:
            url = group
        return '<a href="%s">%s</a>' % (url, url)
    changelog = _single_pass_re_loop(re_m_url, markup, changelog)
    return changelog


def pretty_changelog(cat, pn, changelog):
    """Given a changelog snippet, make it look nice with HTML:
        - Make the body HTML-safe via entities.
        - Replace the author email with a CIA.vc link
        - Link the changed files to ViewCVS
        - Link any bug# entries to Bugzilla"""
    # text -> markup
    (changelog, authorname, authorid) = \
            _pretty_changelog_pass1(cat, pn, changelog)
    # entities -> HTML
    changelog = _pretty_changelog_pass2(changelog)
    # user markup -> CIA.vc
    changelog = _pretty_changelog_pass3(changelog)
    # file markup -> sources.g.o link
    changelog = _pretty_changelog_pass4(cat, pn, changelog)
    # bug markup -> bugzilla link
    changelog = _pretty_changelog_pass5(changelog)
    # url markup -> real link
    changelog = _pretty_changelog_pass6(changelog)
    # Done!
    return (changelog, authorname, authorid)

def optimal_collapse(atom, pnlength, pvlength, ellipsis = '^'):
    """Shrink the PN-PV string using well-placed ellipsis characters so
        that the maximum length of the string does not exceed the sum of
        the max specified PN and PV lengths. Retain the maximum amount of
        information."""
    # TP[PN] = target length
    maxlength = pnlength + pvlength + 1
    npn = pn = atom.package
    npv = pv = atom.fullver
    sep = '-'
    if atom.fullver is None:
        npv = pv = ''
        sep = ''
    tlpn = len(pn)
    tlpv = len(pv)
    pnpv = "%s%s%s" % (npn, sep, npv)
    i = 0
    while len(pnpv) > maxlength and i < 25:
        if tlpv > pvlength:
            tlpv -= 1
            npv = pv[0:tlpv-1] + '@'
        elif tlpn > pnlength:
            tlpn -= 1
            npn = pn[0:tlpn-1] + '@'
        pnpv = "%s%s%s" % (npn, sep, npv)
        i += 1
    return pnpv.replace('@', ellipsis)

# vim:ts=4 et ft=python: