aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'cvs2svn_rcsparse/common.py')
-rw-r--r--cvs2svn_rcsparse/common.py324
1 files changed, 324 insertions, 0 deletions
diff --git a/cvs2svn_rcsparse/common.py b/cvs2svn_rcsparse/common.py
new file mode 100644
index 0000000..3eed600
--- /dev/null
+++ b/cvs2svn_rcsparse/common.py
@@ -0,0 +1,324 @@
+# -*-python-*-
+#
+# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved.
+#
+# By using this file, you agree to the terms and conditions set forth in
+# the LICENSE.html file which can be found at the top level of the ViewVC
+# distribution or at http://viewvc.org/license-1.html.
+#
+# For more information, visit http://viewvc.org/
+#
+# -----------------------------------------------------------------------
+
+"""common.py: common classes and functions for the RCS parsing tools."""
+
+import calendar
+import string
+
+class Sink:
+ def set_head_revision(self, revision):
+ pass
+
+ def set_principal_branch(self, branch_name):
+ pass
+
+ def set_access(self, accessors):
+ pass
+
+ def define_tag(self, name, revision):
+ pass
+
+ def set_locker(self, revision, locker):
+ pass
+
+ def set_locking(self, mode):
+ """Used to signal locking mode.
+
+ Called with mode argument 'strict' if strict locking
+ Not called when no locking used."""
+
+ pass
+
+ def set_comment(self, comment):
+ pass
+
+ def set_expansion(self, mode):
+ pass
+
+ def admin_completed(self):
+ pass
+
+ def define_revision(self, revision, timestamp, author, state,
+ branches, next):
+ pass
+
+ def tree_completed(self):
+ pass
+
+ def set_description(self, description):
+ pass
+
+ def set_revision_info(self, revision, log, text):
+ pass
+
+ def parse_completed(self):
+ pass
+
+
+# --------------------------------------------------------------------------
+#
+# EXCEPTIONS USED BY RCSPARSE
+#
+
+class RCSParseError(Exception):
+ pass
+
+
+class RCSIllegalCharacter(RCSParseError):
+ pass
+
+
+class RCSExpected(RCSParseError):
+ def __init__(self, got, wanted):
+ RCSParseError.__init__(
+ self,
+ 'Unexpected parsing error in RCS file.\n'
+ 'Expected token: %s, but saw: %s'
+ % (wanted, got)
+ )
+
+
+class RCSStopParser(Exception):
+ pass
+
+
+# --------------------------------------------------------------------------
+#
+# STANDARD TOKEN STREAM-BASED PARSER
+#
+
+class _Parser:
+ stream_class = None # subclasses need to define this
+
+ def _read_until_semicolon(self):
+ """Read all tokens up to and including the next semicolon token.
+
+ Return the tokens (not including the semicolon) as a list."""
+
+ tokens = []
+
+ while 1:
+ token = self.ts.get()
+ if token == ';':
+ break
+ tokens.append(token)
+
+ return tokens
+
+ def _parse_admin_head(self, token):
+ rev = self.ts.get()
+ if rev == ';':
+ # The head revision is not specified. Just drop the semicolon
+ # on the floor.
+ pass
+ else:
+ self.sink.set_head_revision(rev)
+ self.ts.match(';')
+
+ def _parse_admin_branch(self, token):
+ branch = self.ts.get()
+ if branch != ';':
+ self.sink.set_principal_branch(branch)
+ self.ts.match(';')
+
+ def _parse_admin_access(self, token):
+ accessors = self._read_until_semicolon()
+ if accessors:
+ self.sink.set_access(accessors)
+
+ def _parse_admin_symbols(self, token):
+ while 1:
+ tag_name = self.ts.get()
+ if tag_name == ';':
+ break
+ self.ts.match(':')
+ tag_rev = self.ts.get()
+ self.sink.define_tag(tag_name, tag_rev)
+
+ def _parse_admin_locks(self, token):
+ while 1:
+ locker = self.ts.get()
+ if locker == ';':
+ break
+ self.ts.match(':')
+ rev = self.ts.get()
+ self.sink.set_locker(rev, locker)
+
+ def _parse_admin_strict(self, token):
+ self.sink.set_locking("strict")
+ self.ts.match(';')
+
+ def _parse_admin_comment(self, token):
+ self.sink.set_comment(self.ts.get())
+ self.ts.match(';')
+
+ def _parse_admin_expand(self, token):
+ expand_mode = self.ts.get()
+ self.sink.set_expansion(expand_mode)
+ self.ts.match(';')
+
+ admin_token_map = {
+ 'head' : _parse_admin_head,
+ 'branch' : _parse_admin_branch,
+ 'access' : _parse_admin_access,
+ 'symbols' : _parse_admin_symbols,
+ 'locks' : _parse_admin_locks,
+ 'strict' : _parse_admin_strict,
+ 'comment' : _parse_admin_comment,
+ 'expand' : _parse_admin_expand,
+ 'desc' : None,
+ }
+
+ def parse_rcs_admin(self):
+ while 1:
+ # Read initial token at beginning of line
+ token = self.ts.get()
+
+ try:
+ f = self.admin_token_map[token]
+ except KeyError:
+ # We're done once we reach the description of the RCS tree
+ if token[0] in string.digits:
+ self.ts.unget(token)
+ return
+ else:
+ # Chew up "newphrase"
+ # warn("Unexpected RCS token: $token\n")
+ pass
+ else:
+ if f is None:
+ self.ts.unget(token)
+ return
+ else:
+ f(self, token)
+
+ def _parse_rcs_tree_entry(self, revision):
+ # Parse date
+ self.ts.match('date')
+ date = self.ts.get()
+ self.ts.match(';')
+
+ # Convert date into timestamp
+ date_fields = string.split(date, '.')
+ # According to rcsfile(5): the year "contains just the last two
+ # digits of the year for years from 1900 through 1999, and all the
+ # digits of years thereafter".
+ if len(date_fields[0]) == 2:
+ date_fields[0] = '19' + date_fields[0]
+ date_fields = map(string.atoi, date_fields)
+ EPOCH = 1970
+ if date_fields[0] < EPOCH:
+ raise ValueError, 'invalid year'
+ timestamp = calendar.timegm(tuple(date_fields) + (0, 0, 0,))
+
+ # Parse author
+ ### NOTE: authors containing whitespace are violations of the
+ ### RCS specification. We are making an allowance here because
+ ### CVSNT is known to produce these sorts of authors.
+ self.ts.match('author')
+ author = ' '.join(self._read_until_semicolon())
+
+ # Parse state
+ self.ts.match('state')
+ state = ''
+ while 1:
+ token = self.ts.get()
+ if token == ';':
+ break
+ state = state + token + ' '
+ state = state[:-1] # toss the trailing space
+
+ # Parse branches
+ self.ts.match('branches')
+ branches = self._read_until_semicolon()
+
+ # Parse revision of next delta in chain
+ self.ts.match('next')
+ next = self.ts.get()
+ if next == ';':
+ next = None
+ else:
+ self.ts.match(';')
+
+ # there are some files with extra tags in them. for example:
+ # owner 640;
+ # group 15;
+ # permissions 644;
+ # hardlinks @configure.in@;
+ # this is "newphrase" in RCSFILE(5). we just want to skip over these.
+ while 1:
+ token = self.ts.get()
+ if token == 'desc' or token[0] in string.digits:
+ self.ts.unget(token)
+ break
+ # consume everything up to the semicolon
+ self._read_until_semicolon()
+
+ self.sink.define_revision(revision, timestamp, author, state, branches,
+ next)
+
+ def parse_rcs_tree(self):
+ while 1:
+ revision = self.ts.get()
+
+ # End of RCS tree description ?
+ if revision == 'desc':
+ self.ts.unget(revision)
+ return
+
+ self._parse_rcs_tree_entry(revision)
+
+ def parse_rcs_description(self):
+ self.ts.match('desc')
+ self.sink.set_description(self.ts.get())
+
+ def parse_rcs_deltatext(self):
+ while 1:
+ revision = self.ts.get()
+ if revision is None:
+ # EOF
+ break
+ text, sym2, log, sym1 = self.ts.mget(4)
+ if sym1 != 'log':
+ print `text[:100], sym2[:100], log[:100], sym1[:100]`
+ raise RCSExpected(sym1, 'log')
+ if sym2 != 'text':
+ raise RCSExpected(sym2, 'text')
+ ### need to add code to chew up "newphrase"
+ self.sink.set_revision_info(revision, log, text)
+
+ def parse(self, file, sink):
+ self.ts = self.stream_class(file)
+ self.sink = sink
+
+ self.parse_rcs_admin()
+
+ # let sink know when the admin section has been completed
+ self.sink.admin_completed()
+
+ self.parse_rcs_tree()
+
+ # many sinks want to know when the tree has been completed so they can
+ # do some work to prep for the arrival of the deltatext
+ self.sink.tree_completed()
+
+ self.parse_rcs_description()
+ self.parse_rcs_deltatext()
+
+ # easiest for us to tell the sink it is done, rather than worry about
+ # higher level software doing it.
+ self.sink.parse_completed()
+
+ self.ts = self.sink = None
+
+# --------------------------------------------------------------------------