From 2919cf0d03b37050c6624d97547653d1fffa033d Mon Sep 17 00:00:00 2001
From: Brian Harring <ferringb@gmail.com>
Date: Sat, 13 Oct 2012 17:49:44 -0700
Subject: import of content;

note rcsparse has had my old http://cvs2svn.tigris.org/nonav/issues/showattachment.cgi/64/rcparse_redundant_work.patch
patch applied.
---
 .gitignore                                    |    2 +
 config                                        |  588 ++++++++
 cvs2svn_lib/__init__.py                       |   18 +
 cvs2svn_lib/apple_single_filter.py            |  292 ++++
 cvs2svn_lib/artifact.py                       |   59 +
 cvs2svn_lib/artifact_manager.py               |  256 ++++
 cvs2svn_lib/bzr_run_options.py                |  175 +++
 cvs2svn_lib/changeset.py                      |  269 ++++
 cvs2svn_lib/changeset_database.py             |   70 +
 cvs2svn_lib/changeset_graph.py                |  456 ++++++
 cvs2svn_lib/changeset_graph_link.py           |  149 ++
 cvs2svn_lib/changeset_graph_node.py           |   50 +
 cvs2svn_lib/check_dependencies_pass.py        |  144 ++
 cvs2svn_lib/checkout_internal.py              |  778 +++++++++++
 cvs2svn_lib/collect_data.py                   | 1431 +++++++++++++++++++
 cvs2svn_lib/common.py                         |  409 ++++++
 cvs2svn_lib/config.py                         |  221 +++
 cvs2svn_lib/context.py                        |   93 ++
 cvs2svn_lib/cvs_file.py                       |  287 ++++
 cvs2svn_lib/cvs_file_database.py              |   75 +
 cvs2svn_lib/cvs_file_items.py                 | 1075 +++++++++++++++
 cvs2svn_lib/cvs_item.py                       |  901 ++++++++++++
 cvs2svn_lib/cvs_item_database.py              |  248 ++++
 cvs2svn_lib/cvs_revision_manager.py           |   85 ++
 cvs2svn_lib/database.py                       |  322 +++++
 cvs2svn_lib/dumpfile_delegate.py              |  510 +++++++
 cvs2svn_lib/fill_source.py                    |  192 +++
 cvs2svn_lib/fulltext_revision_recorder.py     |  127 ++
 cvs2svn_lib/git_output_option.py              |  658 +++++++++
 cvs2svn_lib/git_revision_recorder.py          |  114 ++
 cvs2svn_lib/git_run_options.py                |  274 ++++
 cvs2svn_lib/key_generator.py                  |   45 +
 cvs2svn_lib/log.py                            |  174 +++
 cvs2svn_lib/main.py                           |  117 ++
 cvs2svn_lib/man_writer.py                     |  197 +++
 cvs2svn_lib/metadata.py                       |   26 +
 cvs2svn_lib/metadata_database.py              |  102 ++
 cvs2svn_lib/openings_closings.py              |  236 ++++
 cvs2svn_lib/output_option.py                  |   85 ++
 cvs2svn_lib/pass_manager.py                   |  215 +++
 cvs2svn_lib/passes.py                         | 1837 +++++++++++++++++++++++++
 cvs2svn_lib/persistence_manager.py            |  106 ++
 cvs2svn_lib/process.py                        |  116 ++
 cvs2svn_lib/project.py                        |  219 +++
 cvs2svn_lib/property_setters.py               |  385 ++++++
 cvs2svn_lib/rcs_revision_manager.py           |   51 +
 cvs2svn_lib/rcs_stream.py                     |  149 ++
 cvs2svn_lib/record_table.py                   |  399 ++++++
 cvs2svn_lib/repository_delegate.py            |   98 ++
 cvs2svn_lib/repository_mirror.py              |  897 ++++++++++++
 cvs2svn_lib/revision_manager.py               |  189 +++
 cvs2svn_lib/run_options.py                    | 1035 ++++++++++++++
 cvs2svn_lib/serializer.py                     |  146 ++
 cvs2svn_lib/stats_keeper.py                   |  189 +++
 cvs2svn_lib/stdout_delegate.py                |  107 ++
 cvs2svn_lib/svn_commit.py                     |  381 +++++
 cvs2svn_lib/svn_commit_creator.py             |  217 +++
 cvs2svn_lib/svn_commit_item.py                |   50 +
 cvs2svn_lib/svn_output_option.py              |  753 ++++++++++
 cvs2svn_lib/svn_repository_delegate.py        |  121 ++
 cvs2svn_lib/svn_revision_range.py             |  171 +++
 cvs2svn_lib/svn_run_options.py                |  543 ++++++++
 cvs2svn_lib/symbol.py                         |  246 ++++
 cvs2svn_lib/symbol_database.py                |   68 +
 cvs2svn_lib/symbol_statistics.py              |  521 +++++++
 cvs2svn_lib/symbol_strategy.py                |  685 +++++++++
 cvs2svn_lib/symbol_transform.py               |  236 ++++
 cvs2svn_lib/time_range.py                     |   44 +
 cvs2svn_lib/version.py                        |   27 +
 cvs2svn_rcsparse/__init__.py                  |   26 +
 cvs2svn_rcsparse/common.py                    |  324 +++++
 cvs2svn_rcsparse/debug.py                     |  122 ++
 cvs2svn_rcsparse/default.py                   |  172 +++
 cvs2svn_rcsparse/parse_rcs_file.py            |   73 +
 cvs2svn_rcsparse/rcparse_redundant_work.patch |   99 ++
 cvs2svn_rcsparse/run-tests.py                 |   73 +
 cvs2svn_rcsparse/texttools.py                 |  348 +++++
 77 files changed, 22748 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 config
 create mode 100644 cvs2svn_lib/__init__.py
 create mode 100644 cvs2svn_lib/apple_single_filter.py
 create mode 100644 cvs2svn_lib/artifact.py
 create mode 100644 cvs2svn_lib/artifact_manager.py
 create mode 100644 cvs2svn_lib/bzr_run_options.py
 create mode 100644 cvs2svn_lib/changeset.py
 create mode 100644 cvs2svn_lib/changeset_database.py
 create mode 100644 cvs2svn_lib/changeset_graph.py
 create mode 100644 cvs2svn_lib/changeset_graph_link.py
 create mode 100644 cvs2svn_lib/changeset_graph_node.py
 create mode 100644 cvs2svn_lib/check_dependencies_pass.py
 create mode 100644 cvs2svn_lib/checkout_internal.py
 create mode 100644 cvs2svn_lib/collect_data.py
 create mode 100644 cvs2svn_lib/common.py
 create mode 100644 cvs2svn_lib/config.py
 create mode 100644 cvs2svn_lib/context.py
 create mode 100644 cvs2svn_lib/cvs_file.py
 create mode 100644 cvs2svn_lib/cvs_file_database.py
 create mode 100644 cvs2svn_lib/cvs_file_items.py
 create mode 100644 cvs2svn_lib/cvs_item.py
 create mode 100644 cvs2svn_lib/cvs_item_database.py
 create mode 100644 cvs2svn_lib/cvs_revision_manager.py
 create mode 100644 cvs2svn_lib/database.py
 create mode 100644 cvs2svn_lib/dumpfile_delegate.py
 create mode 100644 cvs2svn_lib/fill_source.py
 create mode 100644 cvs2svn_lib/fulltext_revision_recorder.py
 create mode 100644 cvs2svn_lib/git_output_option.py
 create mode 100644 cvs2svn_lib/git_revision_recorder.py
 create mode 100644 cvs2svn_lib/git_run_options.py
 create mode 100644 cvs2svn_lib/key_generator.py
 create mode 100644 cvs2svn_lib/log.py
 create mode 100644 cvs2svn_lib/main.py
 create mode 100644 cvs2svn_lib/man_writer.py
 create mode 100644 cvs2svn_lib/metadata.py
 create mode 100644 cvs2svn_lib/metadata_database.py
 create mode 100644 cvs2svn_lib/openings_closings.py
 create mode 100644 cvs2svn_lib/output_option.py
 create mode 100644 cvs2svn_lib/pass_manager.py
 create mode 100644 cvs2svn_lib/passes.py
 create mode 100644 cvs2svn_lib/persistence_manager.py
 create mode 100644 cvs2svn_lib/process.py
 create mode 100644 cvs2svn_lib/project.py
 create mode 100644 cvs2svn_lib/property_setters.py
 create mode 100644 cvs2svn_lib/rcs_revision_manager.py
 create mode 100644 cvs2svn_lib/rcs_stream.py
 create mode 100644 cvs2svn_lib/record_table.py
 create mode 100644 cvs2svn_lib/repository_delegate.py
 create mode 100644 cvs2svn_lib/repository_mirror.py
 create mode 100644 cvs2svn_lib/revision_manager.py
 create mode 100644 cvs2svn_lib/run_options.py
 create mode 100644 cvs2svn_lib/serializer.py
 create mode 100644 cvs2svn_lib/stats_keeper.py
 create mode 100644 cvs2svn_lib/stdout_delegate.py
 create mode 100644 cvs2svn_lib/svn_commit.py
 create mode 100644 cvs2svn_lib/svn_commit_creator.py
 create mode 100644 cvs2svn_lib/svn_commit_item.py
 create mode 100644 cvs2svn_lib/svn_output_option.py
 create mode 100644 cvs2svn_lib/svn_repository_delegate.py
 create mode 100644 cvs2svn_lib/svn_revision_range.py
 create mode 100644 cvs2svn_lib/svn_run_options.py
 create mode 100644 cvs2svn_lib/symbol.py
 create mode 100644 cvs2svn_lib/symbol_database.py
 create mode 100644 cvs2svn_lib/symbol_statistics.py
 create mode 100644 cvs2svn_lib/symbol_strategy.py
 create mode 100644 cvs2svn_lib/symbol_transform.py
 create mode 100644 cvs2svn_lib/time_range.py
 create mode 100644 cvs2svn_lib/version.py
 create mode 100644 cvs2svn_rcsparse/__init__.py
 create mode 100644 cvs2svn_rcsparse/common.py
 create mode 100644 cvs2svn_rcsparse/debug.py
 create mode 100644 cvs2svn_rcsparse/default.py
 create mode 100644 cvs2svn_rcsparse/parse_rcs_file.py
 create mode 100644 cvs2svn_rcsparse/rcparse_redundant_work.patch
 create mode 100644 cvs2svn_rcsparse/run-tests.py
 create mode 100644 cvs2svn_rcsparse/texttools.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8b5efc7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+cvs-repo
+output
diff --git a/config b/config
new file mode 100644
index 0000000..94c17d7
--- /dev/null
+++ b/config
@@ -0,0 +1,588 @@
+# (Be in -*- mode: python; coding: utf-8 -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2006-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+#                  #####################
+#                  ## PLEASE READ ME! ##
+#                  #####################
+#
+# This is a template for an options file that can be used to configure
+# cvs2svn to convert to git rather than to Subversion.  See
+# www/cvs2git.html and www/cvs2svn.html for general information, and
+# see the comments in this file for information about what options are
+# available and how they can be set.
+#
+# The program that is run to convert from CVS to git is called
+# cvs2git.  Run it with the --options option, passing it this file
+# like this:
+#
+#     cvs2git --options=cvs2git-example.options
+#
+# The output of cvs2git is a blob file and a dump file that can be
+# loaded into git using the "git fast-import" command.  Please read
+# www/cvs2git.html for more information.
+#
+# Many options do not have defaults, so it is easier to copy this file
+# and modify what you need rather than creating a new options file
+# from scratch.  This file is in Python syntax, but you don't need to
+# know Python to modify it.  But if you *do* know Python, then you
+# will be happy to know that you can use arbitary Python constructs to
+# do fancy configuration tricks.
+#
+# But please be aware of the following:
+#
+# * In many places, leading whitespace is significant in Python (it is
+#   used instead of curly braces to group statements together).
+#   Therefore, if you don't know what you are doing, it is best to
+#   leave the whitespace as it is.
+#
+# * In normal strings, Python treats a backslash ("\") as an escape
+#   character.  Therefore, if you want to specify a string that
+#   contains a backslash, you need either to escape the backslash with
+#   another backslash ("\\"), or use a "raw string", as in one if the
+#   following equivalent examples:
+#
+#       ctx.sort_executable = 'c:\\windows\\system32\\sort.exe'
+#       ctx.sort_executable = r'c:\windows\system32\sort.exe'
+#
+#   See http://docs.python.org/tutorial/introduction.html#strings for
+#   more information.
+#
+# Two identifiers will have been defined before this file is executed,
+# and can be used freely within this file:
+#
+#     ctx -- a Ctx object (see cvs2svn_lib/context.py), which holds
+#         many configuration options
+#
+#     run_options -- an instance of the GitRunOptions class (see
+#         cvs2svn_lib/git_run_options.py), which holds some variables
+#         governing how cvs2git is run
+
+
+# Import some modules that are used in setting the options:
+import re
+
+from cvs2svn_lib import config
+from cvs2svn_lib import changeset_database
+from cvs2svn_lib.common import CVSTextDecoder
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.project import Project
+from cvs2svn_lib.git_revision_recorder import GitRevisionRecorder
+from cvs2svn_lib.git_output_option import GitRevisionMarkWriter
+from cvs2svn_lib.git_output_option import GitOutputOption
+from cvs2svn_lib.revision_manager import NullRevisionRecorder
+from cvs2svn_lib.revision_manager import NullRevisionExcluder
+from cvs2svn_lib.fulltext_revision_recorder \
+     import SimpleFulltextRevisionRecorderAdapter
+from cvs2svn_lib.rcs_revision_manager import RCSRevisionReader
+from cvs2svn_lib.cvs_revision_manager import CVSRevisionReader
+from cvs2svn_lib.checkout_internal import InternalRevisionRecorder
+from cvs2svn_lib.checkout_internal import InternalRevisionExcluder
+from cvs2svn_lib.checkout_internal import InternalRevisionReader
+from cvs2svn_lib.symbol_strategy import AllBranchRule
+from cvs2svn_lib.symbol_strategy import AllTagRule
+from cvs2svn_lib.symbol_strategy import BranchIfCommitsRule
+from cvs2svn_lib.symbol_strategy import ExcludeRegexpStrategyRule
+from cvs2svn_lib.symbol_strategy import ForceBranchRegexpStrategyRule
+from cvs2svn_lib.symbol_strategy import ForceTagRegexpStrategyRule
+from cvs2svn_lib.symbol_strategy import ExcludeTrivialImportBranchRule
+from cvs2svn_lib.symbol_strategy import ExcludeVendorBranchRule
+from cvs2svn_lib.symbol_strategy import HeuristicStrategyRule
+from cvs2svn_lib.symbol_strategy import UnambiguousUsageRule
+from cvs2svn_lib.symbol_strategy import HeuristicPreferredParentRule
+from cvs2svn_lib.symbol_strategy import SymbolHintsFileRule
+from cvs2svn_lib.symbol_transform import ReplaceSubstringsSymbolTransform
+from cvs2svn_lib.symbol_transform import RegexpSymbolTransform
+from cvs2svn_lib.symbol_transform import IgnoreSymbolTransform
+from cvs2svn_lib.symbol_transform import NormalizePathsSymbolTransform
+from cvs2svn_lib.property_setters import AutoPropsPropertySetter
+from cvs2svn_lib.property_setters import CVSBinaryFileDefaultMimeTypeSetter
+from cvs2svn_lib.property_setters import CVSBinaryFileEOLStyleSetter
+from cvs2svn_lib.property_setters import CVSRevisionNumberSetter
+from cvs2svn_lib.property_setters import DefaultEOLStyleSetter
+from cvs2svn_lib.property_setters import EOLStyleFromMimeTypeSetter
+from cvs2svn_lib.property_setters import ExecutablePropertySetter
+from cvs2svn_lib.property_setters import KeywordsPropertySetter
+from cvs2svn_lib.property_setters import MimeMapper
+from cvs2svn_lib.property_setters import SVNBinaryFileKeywordsPropertySetter
+
+# To choose the level of logging output, uncomment one of the
+# following lines:
+#Log().log_level = Log.WARN
+#Log().log_level = Log.QUIET
+#Log().log_level = Log.NORMAL
+#Log().log_level = Log.VERBOSE
+Log().log_level = Log.DEBUG
+
+
+# During CollectRevsPass, cvs2git records the contents of file
+# revisions into a "blob" file in git-fast-import format.  This option
+# configures that process:
+ctx.revision_recorder = SimpleFulltextRevisionRecorderAdapter(
+    # The following option specifies how the revision contents of the RCS
+    # files should be read.
+    #
+    # RCSRevisionReader uses RCS's "co" program to extract the revision
+    # contents of the RCS files during CollectRevsPass.  The constructor
+    # argument specifies how to invoke the "co" executable.
+    #
+    # CVSRevisionReader uses the "cvs" program to extract the revision
+    # contents out of the RCS files during OutputPass.  This option is
+    # considerably slower than RCSRevisionReader because "cvs" is
+    # considerably slower than "co".  However, it works in some situations
+    # where RCSRevisionReader fails; see the HTML documentation of the
+    # "--use-cvs" option for details.  The constructor argument specifies
+    # how to invoke the "co" executable.
+    #
+    # Uncomment one of the two following lines:
+    RCSRevisionReader(co_executable=r'co'),
+    #CVSRevisionReader(cvs_executable=r'cvs'),
+
+    # The file in which to write the git-fast-import stream that
+    # contains the file revision contents:
+    GitRevisionRecorder('cvs2svn-tmp/git-blob.dat'),
+    )
+
+# cvs2git does not need to keep track of what revisions will be
+# excluded, so leave this option unchanged:
+ctx.revision_excluder = NullRevisionExcluder()
+
+# cvs2git doesn't need a revision reader because OutputPass only
+# refers to blobs that were output during CollectRevsPass, so leave
+# this option set to None.
+ctx.revision_reader = None
+
+# Set the name (and optionally the path) of some other executables
+# required by cvs2svn:
+ctx.sort_executable = r'sort'
+
+# Change the following line to True if the conversion should only
+# include the trunk of the repository (i.e., all branches and tags
+# should be omitted from the conversion):
+ctx.trunk_only = False
+
+# How to convert CVS author names, log messages, and filenames to
+# Unicode.  The first argument to CVSTextDecoder is a list of encoders
+# that are tried in order in 'strict' mode until one of them succeeds.
+# If none of those succeeds, then fallback_encoder (if it is
+# specified) is used in lossy 'replace' mode.  Setting a fallback
+# encoder ensures that the encoder always succeeds, but it can cause
+# information loss.
+ctx.cvs_author_decoder = CVSTextDecoder(
+    [
+        #'latin1',
+        #'utf8',
+        'ascii',
+        ],
+    fallback_encoding='latin1'
+    )
+ctx.cvs_log_decoder = CVSTextDecoder(
+    [
+        #'latin1',
+        #'utf8',
+        'ascii',
+        ],
+    fallback_encoding='latin1'
+    )
+# You might want to be especially strict when converting filenames to
+# Unicode (e.g., maybe not specify a fallback_encoding).
+ctx.cvs_filename_decoder = CVSTextDecoder(
+    [
+        #'latin1',
+        #'utf8',
+        'ascii',
+        ],
+    #fallback_encoding='ascii'
+    )
+
+# Template for the commit message to be used for initial project
+# commits.
+ctx.initial_project_commit_message = (
+    'Standard project directories initialized by cvs2svn.'
+    )
+
+# Template for the commit message to be used for post commits, in
+# which modifications to a vendor branch are copied back to trunk.
+# This message can use '%(revnum)d' to include the SVN revision number
+# of the revision that included the change to the vendor branch
+# (admittedly rather pointless in a cvs2git conversion).
+ctx.post_commit_message = (
+    'This commit was generated by cvs2svn to track changes on a CVS '
+    'vendor branch.'
+    )
+
+# Template for the commit message to be used for commits in which
+# symbols are created.  This message can use '%(symbol_type)d' to
+# include the type of the symbol ('branch' or 'tag') or
+# '%(symbol_name)' to include the name of the symbol.
+ctx.symbol_commit_message = (
+    "This commit was manufactured by cvs2svn to create %(symbol_type)s "
+    "'%(symbol_name)s'."
+    )
+
+# Some CVS clients for MacOS store resource fork data into CVS along
+# with the file contents itself by wrapping it all up in a container
+# format called "AppleSingle".  Subversion currently does not support
+# MacOS resource forks.  Nevertheless, sometimes the resource fork
+# information is not necessary and can be discarded.  Set the
+# following option to True if you would like cvs2svn to identify files
+# whose contents are encoded in AppleSingle format, and discard all
+# but the data fork for such files before committing them to
+# Subversion.  (Please note that AppleSingle contents are identified
+# by the AppleSingle magic number as the first four bytes of the file.
+# This check is not failproof, so only set this option if you think
+# you need it.)
+ctx.decode_apple_single = False
+
+# This option can be set to the name of a filename to which are stored
+# statistics and conversion decisions about the CVS symbols.
+ctx.symbol_info_filename = None
+#ctx.symbol_info_filename = 'symbol-info.txt'
+
+# cvs2svn uses "symbol strategy rules" to help decide how to handle
+# CVS symbols.  The rules in a project's symbol_strategy_rules are
+# applied in order, and each rule is allowed to modify the symbol.
+# The result (after each of the rules has been applied) is used for
+# the conversion.
+#
+# 1. A CVS symbol might be used as a tag in one file and as a branch
+#    in another file.  cvs2svn has to decide whether to convert such a
+#    symbol as a tag or as a branch.  cvs2svn uses a series of
+#    heuristic rules to decide how to convert a symbol.  The user can
+#    override the default rules for specific symbols or symbols
+#    matching regular expressions.
+#
+# 2. cvs2svn is also capable of excluding symbols from the conversion
+#    (provided no other symbols depend on them.
+#
+# 3. CVS does not record unambiguously the line of development from
+#    which a symbol sprouted.  cvs2svn uses a heuristic to choose a
+#    symbol's "preferred parents".
+#
+# The standard branch/tag/exclude StrategyRules do not change a symbol
+# that has already been processed by an earlier rule, so in effect the
+# first matching rule is the one that is used.
+
+global_symbol_strategy_rules = [
+    # It is possible to specify manually exactly how symbols should be
+    # converted and what line of development should be used as the
+    # preferred parent.  To do so, create a file containing the symbol
+    # hints and enable the following option.
+    #
+    # The format of the hints file is described in the documentation
+    # for the --symbol-hints command-line option.  The file output by
+    # the --write-symbol-info (i.e., ctx.symbol_info_filename) option
+    # is in the same format.  The simplest way to use this option is
+    # to run the conversion through CollateSymbolsPass with
+    # --write-symbol-info option, copy the symbol info and edit it to
+    # create a hints file, then re-start the conversion at
+    # CollateSymbolsPass with this option enabled.
+    #SymbolHintsFileRule('symbol-hints.txt'),
+
+    # To force all symbols matching a regular expression to be
+    # converted as branches, add rules like the following:
+    #ForceBranchRegexpStrategyRule(r'branch.*'),
+
+    # To force all symbols matching a regular expression to be
+    # converted as tags, add rules like the following:
+    #ForceTagRegexpStrategyRule(r'tag.*'),
+
+    # To force all symbols matching a regular expression to be
+    # excluded from the conversion, add rules like the following:
+    #ExcludeRegexpStrategyRule(r'unknown-.*'),
+
+    # Sometimes people use "cvs import" to get their own source code
+    # into CVS.  This practice creates a vendor branch 1.1.1 and
+    # imports the code onto the vendor branch as 1.1.1.1, then copies
+    # the same content to the trunk as version 1.1.  Normally, such
+    # vendor branches are useless and they complicate the SVN history
+    # unnecessarily.  The following rule excludes any branches that
+    # only existed as a vendor branch with a single import (leaving
+    # only the 1.1 revision).  If you want to retain such branches,
+    # comment out the following line.  (Please note that this rule
+    # does not exclude vendor *tags*, as they are not so easy to
+    # identify.)
+    ExcludeTrivialImportBranchRule(),
+
+    # To exclude all vendor branches (branches that had "cvs import"s
+    # on them bug no other kinds of commits), uncomment the following
+    # line:
+    #ExcludeVendorBranchRule(),
+
+    # Usually you want this rule, to convert unambiguous symbols
+    # (symbols that were only ever used as tags or only ever used as
+    # branches in CVS) the same way they were used in CVS:
+    UnambiguousUsageRule(),
+
+    # If there was ever a commit on a symbol, then it cannot be
+    # converted as a tag.  This rule causes all such symbols to be
+    # converted as branches.  If you would like to resolve such
+    # ambiguities manually, comment out the following line:
+    BranchIfCommitsRule(),
+
+    # Last in the list can be a catch-all rule that is used for
+    # symbols that were not matched by any of the more specific rules
+    # above.  (Assuming that BranchIfCommitsRule() was included above,
+    # then the symbols that are still indeterminate at this point can
+    # sensibly be converted as branches or tags.)  Include at most one
+    # of these lines.  If none of these catch-all rules are included,
+    # then the presence of any ambiguous symbols (that haven't been
+    # disambiguated above) is an error:
+
+    # Convert ambiguous symbols based on whether they were used more
+    # often as branches or as tags:
+    HeuristicStrategyRule(),
+    # Convert all ambiguous symbols as branches:
+    #AllBranchRule(),
+    # Convert all ambiguous symbols as tags:
+    #AllTagRule(),
+
+    # The last rule is here to choose the preferred parent of branches
+    # and tags, that is, the line of development from which the symbol
+    # sprouts.
+    HeuristicPreferredParentRule(),
+    ]
+
+# Specify a username to be used for commits for which CVS doesn't
+# record the original author (for example, the creation of a branch).
+# This should be a simple (unix-style) username, but it can be
+# translated into a git-style name by the author_transforms map.
+ctx.username = 'cvs2svn'
+
+# ctx.svn_property_setters contains a list of rules used to set the
+# svn properties on files in the converted archive.  For each file,
+# the rules are tried one by one.  Any rule can add or suppress one or
+# more svn properties.  Typically the rules will not overwrite
+# properties set by a previous rule (though they are free to do so).
+#
+# Obviously, SVN properties per se are not interesting for a cvs2git
+# conversion, but some of these properties have side-effects that do
+# affect the git output.  FIXME: Document this in more detail.
+ctx.svn_property_setters.extend([
+    # To read auto-props rules from a file, uncomment the following line
+    # and specify a filename.  The boolean argument specifies whether
+    # case should be ignored when matching filenames to the filename
+    # patterns found in the auto-props file:
+    #AutoPropsPropertySetter(
+    #    r'/home/username/.subversion/config',
+    #    ignore_case=True,
+    #    ),
+
+    # To read mime types from a file, uncomment the following line and
+    # specify a filename:
+    #MimeMapper(r'/etc/mime.types'),
+
+    # Omit the svn:eol-style property from any files that are listed
+    # as binary (i.e., mode '-kb') in CVS:
+    CVSBinaryFileEOLStyleSetter(),
+
+    # If the file is binary and its svn:mime-type property is not yet
+    # set, set svn:mime-type to 'application/octet-stream'.
+    CVSBinaryFileDefaultMimeTypeSetter(),
+
+    # To try to determine the eol-style from the mime type, uncomment
+    # the following line:
+    #EOLStyleFromMimeTypeSetter(),
+
+    # Choose one of the following lines to set the default
+    # svn:eol-style if none of the above rules applied.  The argument
+    # is the svn:eol-style that should be applied, or None if no
+    # svn:eol-style should be set (i.e., the file should be treated as
+    # binary).
+    #
+    # The default is to treat all files as binary unless one of the
+    # previous rules has determined otherwise, because this is the
+    # safest approach.  However, if you have been diligent about
+    # marking binary files with -kb in CVS and/or you have used the
+    # above rules to definitely mark binary files as binary, then you
+    # might prefer to use 'native' as the default, as it is usually
+    # the most convenient setting for text files.  Other possible
+    # options: 'CRLF', 'CR', 'LF'.
+    DefaultEOLStyleSetter(None),
+    #DefaultEOLStyleSetter('native'),
+
+    # Prevent svn:keywords from being set on files that have
+    # svn:eol-style unset.
+    SVNBinaryFileKeywordsPropertySetter(),
+
+    # If svn:keywords has not been set yet, set it based on the file's
+    # CVS mode:
+    KeywordsPropertySetter(config.SVN_KEYWORDS_VALUE),
+
+    # Set the svn:executable flag on any files that are marked in CVS as
+    # being executable:
+    ExecutablePropertySetter(),
+
+    ])
+
+# The directory to use for temporary files:
+ctx.tmpdir = r'cvs2svn-tmp'
+
+# To skip the cleanup of temporary files, uncomment the following
+# option:
+#ctx.skip_cleanup = True
+
+
+# In CVS, it is perfectly possible to make a single commit that
+# affects more than one project or more than one branch of a single
+# project.  Subversion also allows such commits.  Therefore, by
+# default, when cvs2svn sees what looks like a cross-project or
+# cross-branch CVS commit, it converts it into a
+# cross-project/cross-branch Subversion commit.
+#
+# However, other tools and SCMs have trouble representing
+# cross-project or cross-branch commits.  (For example, Trac's Revtree
+# plugin, http://www.trac-hacks.org/wiki/RevtreePlugin is confused by
+# such commits.)  Therefore, we provide the following two options to
+# allow cross-project/cross-branch commits to be suppressed.
+
+# cvs2git only supports single-project conversions (multiple-project
+# conversions wouldn't really make sense for git anyway).  So this
+# option must be set to False:
+ctx.cross_project_commits = False
+
+# git itself doesn't allow commits that affect more than one branch,
+# so this option must be set to False:
+ctx.cross_branch_commits = False
+
+# cvs2git does not yet handle translating .cvsignore files into
+# .gitignore files, so by default, the .cvsignore files are included
+# in the conversion output.  If you would like to omit the .cvsignore
+# files from the output, set this option to False:
+ctx.keep_cvsignore = True
+
+# By default, it is a fatal error for a CVS ",v" file to appear both
+# inside and outside of an "Attic" subdirectory (this should never
+# happen, but frequently occurs due to botched repository
+# administration).  If you would like to retain both versions of such
+# files, change the following option to True, and the attic version of
+# the file will be written to a subdirectory called "Attic" in the
+# output repository:
+ctx.retain_conflicting_attic_files = False
+
+# CVS uses unix login names as author names whereas git requires
+# author names to be of the form "foo <bar>".  The default is to set
+# the git author to "cvsauthor <cvsauthor>".  author_transforms can be
+# used to map cvsauthor names (e.g., "jrandom") to a true name and
+# email address (e.g., "J. Random <jrandom@example.com>" for the
+# example shown).  All values should be either Unicode strings (i.e.,
+# with "u" as a prefix) or 8-bit strings in the utf-8 encoding.
+# Please substitute your own project's usernames here to use with the
+# author_transforms option of GitOutputOption below.
+author_transforms={
+    'jrandom' : ('J. Random', 'jrandom@example.com'),
+    'mhagger' : ('Michael Haggerty', 'mhagger@alum.mit.edu'),
+    'brane' : (u'Branko Čibej', 'brane@xbc.nu'),
+    'ringstrom' : ('Tobias Ringström', 'tobias@ringstrom.mine.nu'),
+    'dionisos' : (u'Erik Hülsmann', 'e.huelsmann@gmx.net'),
+
+    # This one will be used for commits for which CVS doesn't record
+    # the original author, as explained above.
+    'cvs2svn' : ('cvs2svn', 'admin@example.com'),
+    }
+
+# This is the main option that causes cvs2svn to output to a
+# "fastimport"-format dumpfile rather than to Subversion:
+ctx.output_option = GitOutputOption(
+    # The file in which to write the git-fast-import stream that
+    # contains the changesets and branch/tag information:
+    'cvs2svn-tmp/git-dump.dat',
+
+    # The blobs will be written via the revision recorder, so in
+    # OutputPass we only have to emit references to the blob marks:
+    GitRevisionMarkWriter(),
+
+    # This option can be set to an integer to limit the number of
+    # revisions that are merged with the main parent in any commit.
+    # For git output, this can be set to None (unlimited), though due
+    # to the limitations of other tools you might want to set it to a
+    # smaller number (e.g., 16).  For Mercurial output, this should be
+    # set to 1.
+    max_merges=None,
+    #max_merges=1,
+
+    # Optional map from CVS author names to git author names:
+    author_transforms=author_transforms,
+    )
+
+# Change this option to True to turn on profiling of cvs2svn (for
+# debugging purposes):
+run_options.profiling = False
+
+
+# Should CVSItem -> Changeset database files be memory mapped?  In
+# some tests, using memory mapping speeded up the overall conversion
+# by about 5%.  But this option can cause the conversion to fail with
+# an out of memory error if the conversion computer runs out of
+# virtual address space (e.g., when running a very large conversion on
+# a 32-bit operating system).  Therefore it is disabled by default.
+# Uncomment the following line to allow these database files to be
+# memory mapped.
+changeset_database.use_mmap_for_cvs_item_to_changeset_table = True
+
+# Now set the project to be converted to git.  cvs2git only supports
+# single-project conversions, so this method must only be called
+# once:
+run_options.set_project(
+    # The filesystem path to the part of the CVS repository (*not* a
+    # CVS working copy) that should be converted.  This may be a
+    # subdirectory (i.e., a module) within a larger CVS repository.
+    r'cvs-repo',
+
+    # A list of symbol transformations that can be used to rename
+    # symbols in this project.
+    symbol_transforms=[
+        # Use IgnoreSymbolTransforms like the following to completely
+        # ignore symbols matching a regular expression when parsing
+        # the CVS repository, for example to avoid warnings about
+        # branches with two names and to choose the preferred name.
+        # It is *not* recommended to use this instead of
+        # ExcludeRegexpStrategyRule; though more efficient,
+        # IgnoreSymbolTransforms are less flexible and don't exclude
+        # branches correctly.  The argument is a Python-style regular
+        # expression that has to match the *whole* CVS symbol name:
+        #IgnoreSymbolTransform(r'nightly-build-tag-.*')
+
+        # RegexpSymbolTransforms transform symbols textually using a
+        # regular expression.  The first argument is a Python regular
+        # expression pattern and the second is a replacement pattern.
+        # The pattern is matched against each symbol name.  If it
+        # matches the whole symbol name, then the symbol name is
+        # replaced with the corresponding replacement text.  The
+        # replacement can include substitution patterns (e.g., r'\1'
+        # or r'\g<name>').  Typically you will want to use raw strings
+        # (strings with a preceding 'r', like shown in the examples)
+        # for the regexp and its replacement to avoid backslash
+        # substitution within those strings.
+        #RegexpSymbolTransform(r'release-(\d+)_(\d+)',
+        #                      r'release-\1.\2'),
+        #RegexpSymbolTransform(r'release-(\d+)_(\d+)_(\d+)',
+        #                      r'release-\1.\2.\3'),
+
+        # Simple 1:1 character replacements can also be done.  The
+        # following transform, which converts backslashes into forward
+        # slashes, should usually be included:
+        ReplaceSubstringsSymbolTransform('\\','/'),
+
+        # This last rule eliminates leading, trailing, and repeated
+        # slashes within the output symbol names:
+        NormalizePathsSymbolTransform(),
+        ],
+
+    # See the definition of global_symbol_strategy_rules above for a
+    # description of this option:
+    symbol_strategy_rules=global_symbol_strategy_rules,
+    )
+
diff --git a/cvs2svn_lib/__init__.py b/cvs2svn_lib/__init__.py
new file mode 100644
index 0000000..838d4c6
--- /dev/null
+++ b/cvs2svn_lib/__init__.py
@@ -0,0 +1,18 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This package contains modules that support cvs2svn."""
+
diff --git a/cvs2svn_lib/apple_single_filter.py b/cvs2svn_lib/apple_single_filter.py
new file mode 100644
index 0000000..95fa9cb
--- /dev/null
+++ b/cvs2svn_lib/apple_single_filter.py
@@ -0,0 +1,292 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2007-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""A stream filter for extracting the data fork from AppleSingle data.
+
+Some Macintosh CVS clients store resource fork data along with the
+contents of the file (called the data fork) by encoding both in an
+'AppleSingle' data stream before storing them to CVS.  This file
+contains a stream filter for extracting the data fork from such data
+streams.  (Any other forks are discarded.)
+
+See the following for some random information about this format and
+how it is used by Macintosh CVS clients:
+
+    http://users.phg-online.de/tk/netatalk/doc/Apple/v1/
+    http://rfc.net/rfc1740.html
+    http://ximbiot.com/cvs/cvshome/cyclic/cvs/dev-mac.html
+    http://www.maccvs.org/faq.html#resfiles
+    http://www.heilancoo.net/MacCVSClient/MacCVSClientDoc/storage-formats.html
+
+"""
+
+
+import struct
+from cStringIO import StringIO
+
+
+class AppleSingleFormatError(IOError):
+  """The stream was not in correct AppleSingle format."""
+
+  pass
+
+
+class AppleSingleIncorrectMagicError(AppleSingleFormatError):
+  """The file didn't start with the correct magic number."""
+
+  def __init__(self, data_read, eof):
+    AppleSingleFormatError.__init__(self)
+    self.data_read = data_read
+    self.eof = eof
+
+
+class AppleSingleEOFError(AppleSingleFormatError):
+  """EOF was reached where AppleSingle doesn't allow it."""
+
+  pass
+
+
+class AppleSingleFilter(object):
+  """A stream that reads the data fork from an AppleSingle stream.
+
+  If the constructor discovers that the file is not a legitimate
+  AppleSingle stream, then it raises an AppleSingleFormatError.  In
+  the special case that the magic number is incorrect, it raises
+  AppleSingleIncorrectMagicError with data_read set to the data that
+  have been read so far from the input stream.  (This allows the
+  caller the option to fallback to treating the input stream as a
+  normal binary data stream.)"""
+
+  # The header is:
+  #
+  #     Magic number             4 bytes
+  #     Version number           4 bytes
+  #     File system or filler   16 bytes
+  #     Number of entries        2 bytes
+  magic_struct = '>i'
+  magic_len = struct.calcsize(magic_struct)
+
+  # The part of the header after the magic number:
+  rest_of_header_struct = '>i16sH'
+  rest_of_header_len = struct.calcsize(rest_of_header_struct)
+
+  # Each entry is:
+  #
+  #     Entry ID                 4 bytes
+  #     Offset                   4 bytes
+  #     Length                   4 bytes
+  entry_struct = '>iii'
+  entry_len = struct.calcsize(entry_struct)
+
+  apple_single_magic = 0x00051600
+  apple_single_version_1 = 0x00010000
+  apple_single_version_2 = 0x00020000
+  apple_single_filler = '\0' * 16
+
+  apple_single_data_fork_entry_id = 1
+
+  def __init__(self, stream):
+    self.stream = stream
+
+    # Check for the AppleSingle magic number:
+    s = self._read_exactly(self.magic_len)
+    if len(s) < self.magic_len:
+      raise AppleSingleIncorrectMagicError(s, True)
+
+    (magic,) = struct.unpack(self.magic_struct, s)
+    if magic != self.apple_single_magic:
+      raise AppleSingleIncorrectMagicError(s, False)
+
+    # Read the rest of the header:
+    s = self._read_exactly(self.rest_of_header_len)
+    if len(s) < self.rest_of_header_len:
+      raise AppleSingleEOFError('AppleSingle header incomplete')
+
+    (version, filler, num_entries) = \
+        struct.unpack(self.rest_of_header_struct, s)
+
+    if version == self.apple_single_version_1:
+      self._prepare_apple_single_v1_file(num_entries)
+    elif version == self.apple_single_version_2:
+      if filler != self.apple_single_filler:
+        raise AppleSingleFormatError('Incorrect filler')
+      self._prepare_apple_single_v2_file(num_entries)
+    else:
+      raise AppleSingleFormatError('Unknown AppleSingle version')
+
+  def _read_exactly(self, size):
+    """Read and return exactly SIZE characters from the stream.
+
+    This method is to deal with the fact that stream.read(size) is
+    allowed to return less than size characters.  If EOF is reached
+    before SIZE characters have been read, return the characters that
+    have been read so far."""
+
+    retval = []
+    length_remaining = size
+    while length_remaining > 0:
+      s = self.stream.read(length_remaining)
+      if not s:
+        break
+      retval.append(s)
+      length_remaining -= len(s)
+
+    return ''.join(retval)
+
+  def _prepare_apple_single_file(self, num_entries):
+    entries = self._read_exactly(num_entries * self.entry_len)
+    if len(entries) < num_entries * self.entry_len:
+      raise AppleSingleEOFError('Incomplete entries list')
+
+    for i in range(num_entries):
+      entry = entries[i * self.entry_len : (i + 1) * self.entry_len]
+      (entry_id, offset, length) = struct.unpack(self.entry_struct, entry)
+      if entry_id == self.apple_single_data_fork_entry_id:
+        break
+    else:
+      raise AppleSingleFormatError('No data fork found')
+
+    # The data fork is located at [offset : offset + length].  Read up
+    # to the start of the data:
+    n = offset - self.magic_len - self.rest_of_header_len - len(entries)
+    if n < 0:
+      raise AppleSingleFormatError('Invalid offset to AppleSingle data fork')
+
+    max_chunk_size = 65536
+    while n > 0:
+      s = self.stream.read(min(n, max_chunk_size))
+      if not s:
+        raise AppleSingleEOFError(
+            'Offset to AppleSingle data fork past end of file'
+            )
+      n -= len(s)
+
+    self.length_remaining = length
+
+  def _prepare_apple_single_v1_file(self, num_entries):
+    self._prepare_apple_single_file(num_entries)
+
+  def _prepare_apple_single_v2_file(self, num_entries):
+    self._prepare_apple_single_file(num_entries)
+
+  def read(self, size=-1):
+    if size == 0 or self.length_remaining == 0:
+      return ''
+    elif size < 0:
+      s = self._read_exactly(self.length_remaining)
+      if len(s) < self.length_remaining:
+        raise AppleSingleEOFError('AppleSingle data fork truncated')
+      self.length_remaining = 0
+      return s
+    else:
+      # The length of this read is allowed to be shorter than the
+      # requested size:
+      s = self.stream.read(min(size, self.length_remaining))
+      if not s:
+        raise AppleSingleEOFError()
+      self.length_remaining -= len(s)
+      return s
+
+  def close(self):
+    self.stream.close()
+    self.stream = None
+
+
+class CompoundStream(object):
+  """A stream that reads from a series of streams, one after the other."""
+
+  def __init__(self, *streams):
+    self.streams = list(streams)
+    self.stream_index = 0
+
+  def read(self, size=-1):
+    if size < 0:
+      retval = []
+      while self.stream_index < len(self.streams):
+        retval.append(self.streams[self.stream_index].read())
+        self.stream_index += 1
+      return ''.join(retval)
+    else:
+      while self.stream_index < len(self.streams):
+        s = self.streams[self.stream_index].read(size)
+        if s:
+          # This may not be the full size requested, but that is OK:
+          return s
+        else:
+          # That stream was empty; proceed to the next stream:
+          self.stream_index += 1
+
+      # No streams are left:
+      return ''
+
+  def close(self):
+    for stream in self.streams:
+      stream.close()
+    self.streams = None
+
+
+def get_maybe_apple_single_stream(stream):
+  """Treat STREAM as AppleSingle if possible; otherwise treat it literally.
+
+  If STREAM is in AppleSingle format, then return a stream that will
+  output the data fork of the original stream.  Otherwise, return a
+  stream that will output the original file contents literally.
+
+  Be careful not to read from STREAM after it has already hit EOF."""
+
+  try:
+    return AppleSingleFilter(stream)
+  except AppleSingleIncorrectMagicError, e:
+    # This is OK; the file is not AppleSingle, so we read it normally:
+    string_io = StringIO(e.data_read)
+    if e.eof:
+      # The original stream already reached EOF, so the part already
+      # read contains the complete file contents:
+      return string_io
+    else:
+      # The stream needs to output the part already read followed by
+      # whatever hasn't been read of the original stream:
+      return CompoundStream(string_io, stream)
+
+
+if __name__ == '__main__':
+  # For fun and testing, allow use of this file as a pipe if it is
+  # invoked as a script.  Specifically, if stdin is in AppleSingle
+  # format, then output only its data fork; otherwise, output it
+  # unchanged.
+  #
+  # This might not work on systems where sys.stdin is opened in text
+  # mode.
+  #
+  # Remember to set PYTHONPATH to point to the main cvs2svn directory.
+
+  import sys
+
+  #CHUNK_SIZE = -1
+  CHUNK_SIZE = 100
+
+  f = get_maybe_apple_single_stream(sys.stdin)
+
+  if CHUNK_SIZE < 0:
+    sys.stdout.write(f.read())
+  else:
+    while True:
+      s = f.read(CHUNK_SIZE)
+      if not s:
+        break
+      sys.stdout.write(s)
+
+
diff --git a/cvs2svn_lib/artifact.py b/cvs2svn_lib/artifact.py
new file mode 100644
index 0000000..99d6945
--- /dev/null
+++ b/cvs2svn_lib/artifact.py
@@ -0,0 +1,59 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module defines Artifact types to be used with an ArtifactManager."""
+
+
+import os
+
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.log import Log
+
+
+class Artifact(object):
+  """An object that is created, used across passes, then cleaned up."""
+
+  def __init__(self):
+    # The set of passes that need this artifact.  This field is
+    # maintained by ArtifactManager.
+    self._passes_needed = set()
+
+  def cleanup(self):
+    """This artifact is no longer needed; clean it up."""
+
+    pass
+
+
+class TempFile(Artifact):
+  """A temporary file that can be used across cvs2svn passes."""
+
+  def __init__(self, basename):
+    Artifact.__init__(self)
+    self.basename = basename
+
+  def _get_filename(self):
+    return Ctx().get_temp_filename(self.basename)
+
+  filename = property(_get_filename)
+
+  def cleanup(self):
+    Log().verbose("Deleting", self.filename)
+    os.unlink(self.filename)
+
+  def __str__(self):
+    return 'Temporary file %r' % (self.filename,)
+
+
diff --git a/cvs2svn_lib/artifact_manager.py b/cvs2svn_lib/artifact_manager.py
new file mode 100644
index 0000000..08f0ec7
--- /dev/null
+++ b/cvs2svn_lib/artifact_manager.py
@@ -0,0 +1,256 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module manages the artifacts produced by conversion passes."""
+
+
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.artifact import TempFile
+
+
+class ArtifactNotActiveError(Exception):
+  """An artifact was requested when no passes that have registered
+  that they need it are active."""
+
+  def __init__(self, artifact_name):
+    Exception.__init__(
+        self, 'Artifact %s is not currently active' % artifact_name)
+
+
+class ArtifactManager:
+  """Manage artifacts that are created by one pass but needed by others.
+
+  This class is responsible for cleaning up artifacts once they are no
+  longer needed.  The trick is that cvs2svn can be run pass by pass,
+  so not all passes might be executed during a specific program run.
+
+  To use this class:
+
+  - Call artifact_manager.set_artifact(name, artifact) once for each
+    known artifact.
+
+  - Call artifact_manager.creates(which_pass, artifact) to indicate
+    that WHICH_PASS is the pass that creates ARTIFACT.
+
+  - Call artifact_manager.uses(which_pass, artifact) to indicate that
+    WHICH_PASS needs to use ARTIFACT.
+
+  There are also helper methods register_temp_file(),
+  register_artifact_needed(), and register_temp_file_needed() which
+  combine some useful operations.
+
+  Then, in pass order:
+
+  - Call pass_skipped() for any passes that were already executed
+    during a previous cvs2svn run.
+
+  - Call pass_started() when a pass is about to start execution.
+
+  - If a pass that has been started will be continued during the next
+    program run, then call pass_continued().
+
+  - If a pass that has been started finishes execution, call
+    pass_done(), to allow any artifacts that won't be needed anymore
+    to be cleaned up.
+
+  - Call pass_deferred() for any passes that have been deferred to a
+    future cvs2svn run.
+
+  Finally:
+
+  - Call check_clean() to verify that all artifacts have been
+    accounted for."""
+
+  def __init__(self):
+    # A map { artifact_name : artifact } of known artifacts.
+    self._artifacts = { }
+
+    # A map { pass : set_of_artifacts }, where set_of_artifacts is a
+    # set of artifacts needed by the pass.
+    self._pass_needs = { }
+
+    # A set of passes that are currently being executed.
+    self._active_passes = set()
+
+  def set_artifact(self, name, artifact):
+    """Add ARTIFACT to the list of artifacts that we manage.
+
+    Store it under NAME."""
+
+    assert name not in self._artifacts
+    self._artifacts[name] = artifact
+
+  def get_artifact(self, name):
+    """Return the artifact with the specified name.
+
+    If the artifact does not currently exist, raise a KeyError.  If it
+    is not registered as being needed by one of the active passes,
+    raise an ArtifactNotActiveError."""
+
+    artifact = self._artifacts[name]
+    for active_pass in self._active_passes:
+      if artifact in self._pass_needs[active_pass]:
+        # OK
+        return artifact
+    else:
+      raise ArtifactNotActiveError(name)
+
+  def creates(self, which_pass, artifact):
+    """Register that WHICH_PASS creates ARTIFACT.
+
+    ARTIFACT must already have been registered."""
+
+    # An artifact is automatically "needed" in the pass in which it is
+    # created:
+    self.uses(which_pass, artifact)
+
+  def uses(self, which_pass, artifact):
+    """Register that WHICH_PASS uses ARTIFACT.
+
+    ARTIFACT must already have been registered."""
+
+    artifact._passes_needed.add(which_pass)
+    if which_pass in self._pass_needs:
+      self._pass_needs[which_pass].add(artifact)
+    else:
+      self._pass_needs[which_pass] = set([artifact])
+
+  def register_temp_file(self, basename, which_pass):
+    """Register a temporary file with base name BASENAME as an artifact.
+
+    Return the filename of the temporary file."""
+
+    artifact = TempFile(basename)
+    self.set_artifact(basename, artifact)
+    self.creates(which_pass, artifact)
+
+  def get_temp_file(self, basename):
+    """Return the filename of the temporary file with the specified BASENAME.
+
+    If the temporary file is not an existing, registered TempFile,
+    raise a KeyError."""
+
+    return self.get_artifact(basename).filename
+
+  def register_artifact_needed(self, artifact_name, which_pass):
+    """Register that WHICH_PASS uses the artifact named ARTIFACT_NAME.
+
+    An artifact with this name must already have been registered."""
+
+    artifact = self._artifacts[artifact_name]
+    artifact._passes_needed.add(which_pass)
+    if which_pass in self._pass_needs:
+      self._pass_needs[which_pass].add(artifact)
+    else:
+      self._pass_needs[which_pass] = set([artifact,])
+
+  def register_temp_file_needed(self, basename, which_pass):
+    """Register that a temporary file is needed by WHICH_PASS.
+
+    Register that the temporary file with base name BASENAME is needed
+    by WHICH_PASS."""
+
+    self.register_artifact_needed(basename, which_pass)
+
+  def _unregister_artifacts(self, which_pass):
+    """Unregister any artifacts that were needed for WHICH_PASS.
+
+    Return a list of artifacts that are no longer needed at all."""
+
+    try:
+      artifacts = list(self._pass_needs[which_pass])
+    except KeyError:
+      # No artifacts were needed for that pass:
+      return []
+
+    del self._pass_needs[which_pass]
+
+    unneeded_artifacts = []
+    for artifact in artifacts:
+      artifact._passes_needed.remove(which_pass)
+      if not artifact._passes_needed:
+        unneeded_artifacts.append(artifact)
+
+    return unneeded_artifacts
+
+  def pass_skipped(self, which_pass):
+    """WHICH_PASS was executed during a previous cvs2svn run.
+
+    Its artifacts were created then, and any artifacts that would
+    normally be cleaned up after this pass have already been cleaned
+    up."""
+
+    self._unregister_artifacts(which_pass)
+
+  def pass_started(self, which_pass):
+    """WHICH_PASS is starting."""
+
+    self._active_passes.add(which_pass)
+
+  def pass_continued(self, which_pass):
+    """WHICH_PASS will be continued during the next program run.
+
+    WHICH_PASS, which has already been started, will be continued
+    during the next program run.  Unregister any artifacts that would
+    be cleaned up at the end of WHICH_PASS without actually cleaning
+    them up."""
+
+    self._active_passes.remove(which_pass)
+    self._unregister_artifacts(which_pass)
+
+  def pass_done(self, which_pass, skip_cleanup):
+    """WHICH_PASS is done.
+
+    Clean up all artifacts that are no longer needed.  If SKIP_CLEANUP
+    is True, then just do the bookkeeping without actually calling
+    artifact.cleanup()."""
+
+    self._active_passes.remove(which_pass)
+    artifacts = self._unregister_artifacts(which_pass)
+    if not skip_cleanup:
+      for artifact in artifacts:
+        artifact.cleanup()
+
+  def pass_deferred(self, which_pass):
+    """WHICH_PASS is being deferred until a future cvs2svn run.
+
+    Unregister any artifacts that would be cleaned up during
+    WHICH_PASS."""
+
+    self._unregister_artifacts(which_pass)
+
+  def check_clean(self):
+    """All passes have been processed.
+
+    Output a warning messages if all artifacts have not been accounted
+    for.  (This is mainly a consistency check, that no artifacts were
+    registered under nonexistent passes.)"""
+
+    unclean_artifacts = [
+        str(artifact)
+        for artifact in self._artifacts.values()
+        if artifact._passes_needed]
+
+    if unclean_artifacts:
+      Log().warn(
+          'INTERNAL: The following artifacts were not cleaned up:\n    %s\n'
+          % ('\n    '.join(unclean_artifacts)))
+
+
+# The default ArtifactManager instance:
+artifact_manager = ArtifactManager()
+
+
diff --git a/cvs2svn_lib/bzr_run_options.py b/cvs2svn_lib/bzr_run_options.py
new file mode 100644
index 0000000..5332dff
--- /dev/null
+++ b/cvs2svn_lib/bzr_run_options.py
@@ -0,0 +1,175 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module manages cvs2bzr run options."""
+
+
+import sys
+import datetime
+import codecs
+
+from cvs2svn_lib.version import VERSION
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.run_options import not_both
+from cvs2svn_lib.run_options import RunOptions
+from cvs2svn_lib.run_options import ContextOption
+from cvs2svn_lib.run_options import IncompatibleOption
+from cvs2svn_lib.run_options import authors
+from cvs2svn_lib.man_writer import ManWriter
+from cvs2svn_lib.rcs_revision_manager import RCSRevisionReader
+from cvs2svn_lib.cvs_revision_manager import CVSRevisionReader
+from cvs2svn_lib.git_run_options import GitRunOptions
+from cvs2svn_lib.git_output_option import GitRevisionInlineWriter
+from cvs2svn_lib.git_output_option import GitOutputOption
+from cvs2svn_lib.revision_manager import NullRevisionRecorder
+from cvs2svn_lib.revision_manager import NullRevisionExcluder
+
+
+short_desc = 'convert a cvs repository into a Bazaar repository'
+
+synopsis = """\
+.B cvs2bzr
+[\\fIOPTION\\fR]... \\fIOUTPUT-OPTIONS CVS-REPOS-PATH\\fR
+.br
+.B cvs2bzr
+[\\fIOPTION\\fR]... \\fI--options=PATH\\fR
+"""
+
+description="""\
+Convert a CVS repository into a Bazaar repository, including history.
+
+"""
+long_desc = """\
+Create a new Bazaar repository based on the version history stored in a
+CVS repository. Each CVS commit will be mirrored in the Bazaar
+repository, including such information as date of commit and id of the
+committer.
+.P
+The output of this program is a "fast-import dumpfile", which
+can be loaded into a Bazaar repository using the Bazaar FastImport
+Plugin, available from https://launchpad.net/bzr-fastimport.
+
+.P
+\\fICVS-REPOS-PATH\\fR is the filesystem path of the part of the CVS
+repository that you want to convert.  This path doesn't have to be the
+top level directory of a CVS repository; it can point at a project
+within a repository, in which case only that project will be
+converted.  This path or one of its parent directories has to contain
+a subdirectory called CVSROOT (though the CVSROOT directory can be
+empty).
+.P
+It is not possible directly to convert a CVS repository to which you
+only have remote access, but the FAQ describes tools that may be used
+to create a local copy of a remote CVS repository.
+"""
+
+files = """\
+A directory called \\fIcvs2svn-tmp\\fR (or the directory specified by
+\\fB--tmpdir\\fR) is used as scratch space for temporary data files.
+"""
+
+see_also = [
+  ('cvs', '1'),
+  ('bzr', '1'),
+  ]
+
+
+class BzrRunOptions(GitRunOptions):
+
+  def get_description(self):
+    return description
+
+  def _get_output_options_group(self):
+    group = RunOptions._get_output_options_group(self)
+
+    group.add_option(IncompatibleOption(
+        '--dumpfile', type='string',
+        action='store',
+        help='path to which the data should be written',
+        man_help=(
+            'Write the blobs and revision data to \\fIpath\\fR.'
+            ),
+        metavar='PATH',
+        ))
+    group.add_option(ContextOption(
+        '--dry-run',
+        action='store_true',
+        help=(
+            'do not create any output; just print what would happen.'
+            ),
+        man_help=(
+            'Do not create any output; just print what would happen.'
+            ),
+        ))
+
+    return group
+
+  def callback_manpage(self, option, opt_str, value, parser):
+    f = codecs.getwriter('utf_8')(sys.stdout)
+    ManWriter(
+        parser,
+        section='1',
+        date=datetime.date.today(),
+        source='Version %s' % (VERSION,),
+        manual='User Commands',
+        short_desc=short_desc,
+        synopsis=synopsis,
+        long_desc=long_desc,
+        files=files,
+        authors=authors,
+        see_also=see_also,
+        ).write_manpage(f)
+    sys.exit(0)
+
+  def process_io_options(self):
+    """Process input/output options.
+
+    Process options related to extracting data from the CVS repository
+    and writing to a Bazaar-friendly fast-import file."""
+
+    ctx = Ctx()
+    options = self.options
+
+    not_both(options.use_rcs, '--use-rcs',
+             options.use_cvs, '--use-cvs')
+
+    if options.use_rcs:
+      revision_reader = RCSRevisionReader(
+          co_executable=options.co_executable
+          )
+    else:
+      # --use-cvs is the default:
+      revision_reader = CVSRevisionReader(
+          cvs_executable=options.cvs_executable
+          )
+
+    if not ctx.dry_run and not options.dumpfile:
+      raise FatalError("must pass '--dry-run' or '--dumpfile' option.")
+
+    ctx.revision_recorder = NullRevisionRecorder()
+    ctx.revision_excluder = NullRevisionExcluder()
+    ctx.revision_reader = None
+
+    ctx.output_option = GitOutputOption(
+        options.dumpfile,
+        GitRevisionInlineWriter(revision_reader),
+        max_merges=None,
+        # Optional map from CVS author names to bzr author names:
+        author_transforms={}, # FIXME
+        )
+
+
diff --git a/cvs2svn_lib/changeset.py b/cvs2svn_lib/changeset.py
new file mode 100644
index 0000000..1022e0a
--- /dev/null
+++ b/cvs2svn_lib/changeset.py
@@ -0,0 +1,269 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2006-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Manage change sets."""
+
+
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.symbol import Branch
+from cvs2svn_lib.symbol import Tag
+from cvs2svn_lib.time_range import TimeRange
+from cvs2svn_lib.changeset_graph_node import ChangesetGraphNode
+
+
+class Changeset(object):
+  """A set of cvs_items that might potentially form a single change set."""
+
+  def __init__(self, id, cvs_item_ids):
+    self.id = id
+    self.cvs_item_ids = list(cvs_item_ids)
+
+  def iter_cvs_items(self):
+    """Yield the CVSItems within this Changeset."""
+
+    for (id, cvs_item) in Ctx()._cvs_items_db.get_many(self.cvs_item_ids):
+      assert cvs_item is not None
+      yield cvs_item
+
+  def get_projects_opened(self):
+    """Return the set of projects that might be opened by this changeset."""
+
+    raise NotImplementedError()
+
+  def create_graph_node(self, cvs_item_to_changeset_id):
+    """Return a ChangesetGraphNode for this Changeset."""
+
+    raise NotImplementedError()
+
+  def create_split_changeset(self, id, cvs_item_ids):
+    """Return a Changeset with the specified contents.
+
+    This method is only implemented for changesets that can be split.
+    The type of the new changeset should be the same as that of SELF,
+    and any other information from SELF should also be copied to the
+    new changeset."""
+
+    raise NotImplementedError()
+
+  def __getstate__(self):
+    return (self.id, self.cvs_item_ids,)
+
+  def __setstate__(self, state):
+    (self.id, self.cvs_item_ids,) = state
+
+  def __cmp__(self, other):
+    raise NotImplementedError()
+
+  def __str__(self):
+    raise NotImplementedError()
+
+  def __repr__(self):
+    return '%s [%s]' % (
+        self, ', '.join(['%x' % id for id in self.cvs_item_ids]),)
+
+
+class RevisionChangeset(Changeset):
+  """A Changeset consisting of CVSRevisions."""
+
+  _sort_order = 3
+
+  def create_graph_node(self, cvs_item_to_changeset_id):
+    time_range = TimeRange()
+    pred_ids = set()
+    succ_ids = set()
+
+    for cvs_item in self.iter_cvs_items():
+      time_range.add(cvs_item.timestamp)
+
+      for pred_id in cvs_item.get_pred_ids():
+        changeset_id = cvs_item_to_changeset_id.get(pred_id)
+        if changeset_id is not None:
+          pred_ids.add(changeset_id)
+
+      for succ_id in cvs_item.get_succ_ids():
+        changeset_id = cvs_item_to_changeset_id.get(succ_id)
+        if changeset_id is not None:
+          succ_ids.add(changeset_id)
+
+    return ChangesetGraphNode(self, time_range, pred_ids, succ_ids)
+
+  def create_split_changeset(self, id, cvs_item_ids):
+    return RevisionChangeset(id, cvs_item_ids)
+
+  def __cmp__(self, other):
+    return cmp(self._sort_order, other._sort_order) \
+           or cmp(self.id, other.id)
+
+  def __str__(self):
+    return 'RevisionChangeset<%x>' % (self.id,)
+
+
+class OrderedChangeset(Changeset):
+  """A Changeset of CVSRevisions whose preliminary order is known.
+
+  The first changeset ordering involves only RevisionChangesets, and
+  results in a full ordering of RevisionChangesets (i.e., a linear
+  chain of dependencies with the order consistent with the
+  dependencies).  These OrderedChangesets form the skeleton for the
+  full topological sort that includes SymbolChangesets as well."""
+
+  _sort_order = 2
+
+  def __init__(self, id, cvs_item_ids, ordinal, prev_id, next_id):
+    Changeset.__init__(self, id, cvs_item_ids)
+
+    # The order of this changeset among all OrderedChangesets:
+    self.ordinal = ordinal
+
+    # The changeset id of the previous OrderedChangeset, or None if
+    # this is the first OrderedChangeset:
+    self.prev_id = prev_id
+
+    # The changeset id of the next OrderedChangeset, or None if this
+    # is the last OrderedChangeset:
+    self.next_id = next_id
+
+  def get_projects_opened(self):
+    retval = set()
+    for cvs_item in self.iter_cvs_items():
+      retval.add(cvs_item.cvs_file.project)
+    return retval
+
+  def create_graph_node(self, cvs_item_to_changeset_id):
+    time_range = TimeRange()
+
+    pred_ids = set()
+    succ_ids = set()
+
+    if self.prev_id is not None:
+      pred_ids.add(self.prev_id)
+
+    if self.next_id is not None:
+      succ_ids.add(self.next_id)
+
+    for cvs_item in self.iter_cvs_items():
+      time_range.add(cvs_item.timestamp)
+
+      for pred_id in cvs_item.get_symbol_pred_ids():
+        changeset_id = cvs_item_to_changeset_id.get(pred_id)
+        if changeset_id is not None:
+          pred_ids.add(changeset_id)
+
+      for succ_id in cvs_item.get_symbol_succ_ids():
+        changeset_id = cvs_item_to_changeset_id.get(succ_id)
+        if changeset_id is not None:
+          succ_ids.add(changeset_id)
+
+    return ChangesetGraphNode(self, time_range, pred_ids, succ_ids)
+
+  def __getstate__(self):
+    return (
+        Changeset.__getstate__(self),
+        self.ordinal, self.prev_id, self.next_id,)
+
+  def __setstate__(self, state):
+    (changeset_state, self.ordinal, self.prev_id, self.next_id,) = state
+    Changeset.__setstate__(self, changeset_state)
+
+  def __cmp__(self, other):
+    return cmp(self._sort_order, other._sort_order) \
+           or cmp(self.id, other.id)
+
+  def __str__(self):
+    return 'OrderedChangeset<%x(%d)>' % (self.id, self.ordinal,)
+
+
+class SymbolChangeset(Changeset):
+  """A Changeset consisting of CVSSymbols."""
+
+  def __init__(self, id, symbol, cvs_item_ids):
+    Changeset.__init__(self, id, cvs_item_ids)
+    self.symbol = symbol
+
+  def get_projects_opened(self):
+    # A SymbolChangeset can never open a project.
+    return set()
+
+  def create_graph_node(self, cvs_item_to_changeset_id):
+    pred_ids = set()
+    succ_ids = set()
+
+    for cvs_item in self.iter_cvs_items():
+      for pred_id in cvs_item.get_pred_ids():
+        changeset_id = cvs_item_to_changeset_id.get(pred_id)
+        if changeset_id is not None:
+          pred_ids.add(changeset_id)
+
+      for succ_id in cvs_item.get_succ_ids():
+        changeset_id = cvs_item_to_changeset_id.get(succ_id)
+        if changeset_id is not None:
+          succ_ids.add(changeset_id)
+
+    return ChangesetGraphNode(self, TimeRange(), pred_ids, succ_ids)
+
+  def __cmp__(self, other):
+    return cmp(self._sort_order, other._sort_order) \
+           or cmp(self.symbol, other.symbol) \
+           or cmp(self.id, other.id)
+
+  def __getstate__(self):
+    return (Changeset.__getstate__(self), self.symbol.id,)
+
+  def __setstate__(self, state):
+    (changeset_state, symbol_id) = state
+    Changeset.__setstate__(self, changeset_state)
+    self.symbol = Ctx()._symbol_db.get_symbol(symbol_id)
+
+
+class BranchChangeset(SymbolChangeset):
+  """A Changeset consisting of CVSBranches."""
+
+  _sort_order = 1
+
+  def create_split_changeset(self, id, cvs_item_ids):
+    return BranchChangeset(id, self.symbol, cvs_item_ids)
+
+  def __str__(self):
+    return 'BranchChangeset<%x>("%s")' % (self.id, self.symbol,)
+
+
+class TagChangeset(SymbolChangeset):
+  """A Changeset consisting of CVSTags."""
+
+  _sort_order = 0
+
+  def create_split_changeset(self, id, cvs_item_ids):
+    return TagChangeset(id, self.symbol, cvs_item_ids)
+
+  def __str__(self):
+    return 'TagChangeset<%x>("%s")' % (self.id, self.symbol,)
+
+
+def create_symbol_changeset(id, symbol, cvs_item_ids):
+  """Factory function for SymbolChangesets.
+
+  Return a BranchChangeset or TagChangeset, depending on the type of
+  SYMBOL.  SYMBOL must be a Branch or Tag."""
+
+  if isinstance(symbol, Branch):
+    return BranchChangeset(id, symbol, cvs_item_ids)
+  if isinstance(symbol, Tag):
+    return TagChangeset(id, symbol, cvs_item_ids)
+  else:
+    raise InternalError('Unknown symbol type %s' % (symbol,))
+
+
diff --git a/cvs2svn_lib/changeset_database.py b/cvs2svn_lib/changeset_database.py
new file mode 100644
index 0000000..82ca904
--- /dev/null
+++ b/cvs2svn_lib/changeset_database.py
@@ -0,0 +1,70 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2006-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes to store changesets."""
+
+
+from cvs2svn_lib.changeset import Changeset
+from cvs2svn_lib.changeset import RevisionChangeset
+from cvs2svn_lib.changeset import OrderedChangeset
+from cvs2svn_lib.changeset import SymbolChangeset
+from cvs2svn_lib.changeset import BranchChangeset
+from cvs2svn_lib.changeset import TagChangeset
+from cvs2svn_lib.record_table import UnsignedIntegerPacker
+from cvs2svn_lib.record_table import MmapRecordTable
+from cvs2svn_lib.record_table import RecordTable
+from cvs2svn_lib.database import IndexedStore
+from cvs2svn_lib.serializer import PrimedPickleSerializer
+
+
+# Should the CVSItemToChangesetTable database files be memory mapped?
+# This speeds up the converstion but can cause the computer's virtual
+# address space to be exhausted.  This option can be changed
+# externally, affecting any CVSItemToChangesetTables opened subsequent
+# to the change:
+use_mmap_for_cvs_item_to_changeset_table = False
+
+
+def CVSItemToChangesetTable(filename, mode):
+  if use_mmap_for_cvs_item_to_changeset_table:
+    return MmapRecordTable(filename, mode, UnsignedIntegerPacker())
+  else:
+    return RecordTable(filename, mode, UnsignedIntegerPacker())
+
+
+class ChangesetDatabase(IndexedStore):
+  def __init__(self, filename, index_filename, mode):
+    primer = (
+        Changeset,
+        RevisionChangeset,
+        OrderedChangeset,
+        SymbolChangeset,
+        BranchChangeset,
+        TagChangeset,
+        )
+    IndexedStore.__init__(
+        self, filename, index_filename, mode, PrimedPickleSerializer(primer))
+
+  def store(self, changeset):
+    self.add(changeset)
+
+  def keys(self):
+    return list(self.iterkeys())
+
+  def close(self):
+    IndexedStore.close(self)
+
+
diff --git a/cvs2svn_lib/changeset_graph.py b/cvs2svn_lib/changeset_graph.py
new file mode 100644
index 0000000..64ebf2c
--- /dev/null
+++ b/cvs2svn_lib/changeset_graph.py
@@ -0,0 +1,456 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2006-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""The changeset dependency graph."""
+
+
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.changeset import RevisionChangeset
+from cvs2svn_lib.changeset import OrderedChangeset
+from cvs2svn_lib.changeset import BranchChangeset
+from cvs2svn_lib.changeset import TagChangeset
+
+
+class CycleInGraphException(Exception):
+  def __init__(self, cycle):
+    Exception.__init__(
+        self,
+        'Cycle found in graph: %s'
+        % ' -> '.join(map(str, cycle + [cycle[0]])))
+
+
+class NoPredNodeInGraphException(Exception):
+  def __init__(self, node):
+    Exception.__init__(self, 'Node %s has no predecessors' % (node,))
+
+
+class _NoPredNodes:
+  """Manage changesets that are to be processed.
+
+  Output the changesets in order by time and changeset type.
+
+  The implementation of this class is crude: as changesets are added,
+  they are appended to a list.  When one is needed, the list is sorted
+  in reverse order and then the last changeset in the list is
+  returned.  To reduce the number of sorts that are needed, the class
+  keeps track of whether the list is currently sorted.
+
+  All this repeated sorting is wasteful and unnecessary.  We should
+  instead use a heap to output the changeset order, which would
+  require O(lg N) work per add()/get() rather than O(1) and O(N lg N)
+  as in the current implementation [1].  But: (1) the lame interface
+  of heapq doesn't allow an arbitrary compare function, so we would
+  have to store extra information in the array elements; (2) in
+  practice, the number of items in the list at any time is only a tiny
+  fraction of the total number of changesets; and (3) testing showed
+  that the heapq implementation is no faster than this one (perhaps
+  because of the increased memory usage).
+
+  [1] According to Objects/listsort.txt in the Python source code, the
+  Python list-sorting code is heavily optimized for arrays that have
+  runs of already-sorted elements, so the current cost of get() is
+  probably closer to O(N) than O(N lg N)."""
+
+  def __init__(self, changeset_db):
+    self.changeset_db = changeset_db
+    # A list [(node, changeset,)] of nodes with no predecessors:
+    self._nodes = []
+    self._sorted = True
+
+  def __len__(self):
+    return len(self._nodes)
+
+  @staticmethod
+  def _compare((node_1, changeset_1), (node_2, changeset_2)):
+    """Define a (reverse) ordering on self._nodes."""
+
+    return cmp(node_2.time_range, node_1.time_range) \
+           or cmp(changeset_2, changeset_1)
+
+  def add(self, node):
+    self._nodes.append( (node, self.changeset_db[node.id],) )
+    self._sorted = False
+
+  def get(self):
+    """Return (node, changeset,) of the smallest node.
+
+    'Smallest' is defined by self._compare()."""
+
+    if not self._sorted:
+      self._nodes.sort(self._compare)
+      self._sorted = True
+    return self._nodes.pop()
+
+
+class ChangesetGraph(object):
+  """A graph of changesets and their dependencies."""
+
+  def __init__(self, changeset_db, cvs_item_to_changeset_id):
+    self._changeset_db = changeset_db
+    self._cvs_item_to_changeset_id = cvs_item_to_changeset_id
+    # A map { id : ChangesetGraphNode }
+    self.nodes = {}
+
+  def close(self):
+    self._cvs_item_to_changeset_id.close()
+    self._cvs_item_to_changeset_id = None
+    self._changeset_db.close()
+    self._changeset_db = None
+
+  def add_changeset(self, changeset):
+    """Add CHANGESET to this graph.
+
+    Determine and record any dependencies to changesets that are
+    already in the graph.  This method does not affect the databases."""
+
+    node = changeset.create_graph_node(self._cvs_item_to_changeset_id)
+
+    # Now tie the node into our graph.  If a changeset referenced by
+    # node is already in our graph, then add the backwards connection
+    # from the other node to the new one.  If not, then delete the
+    # changeset from node.
+
+    for pred_id in list(node.pred_ids):
+      pred_node = self.nodes.get(pred_id)
+      if pred_node is not None:
+        pred_node.succ_ids.add(node.id)
+      else:
+        node.pred_ids.remove(pred_id)
+
+    for succ_id in list(node.succ_ids):
+      succ_node = self.nodes.get(succ_id)
+      if succ_node is not None:
+        succ_node.pred_ids.add(node.id)
+      else:
+        node.succ_ids.remove(succ_id)
+
+    self.nodes[node.id] = node
+
+  def store_changeset(self, changeset):
+    for cvs_item_id in changeset.cvs_item_ids:
+      self._cvs_item_to_changeset_id[cvs_item_id] = changeset.id
+    self._changeset_db.store(changeset)
+
+  def add_new_changeset(self, changeset):
+    """Add the new CHANGESET to the graph and also to the databases."""
+
+    if Log().is_on(Log.DEBUG):
+      Log().debug('Adding changeset %r' % (changeset,))
+
+    self.add_changeset(changeset)
+    self.store_changeset(changeset)
+
+  def delete_changeset(self, changeset):
+    """Remove CHANGESET from the graph and also from the databases.
+
+    In fact, we don't remove CHANGESET from
+    self._cvs_item_to_changeset_id, because in practice the CVSItems
+    in CHANGESET are always added again as part of a new CHANGESET,
+    which will cause the old values to be overwritten."""
+
+    if Log().is_on(Log.DEBUG):
+      Log().debug('Removing changeset %r' % (changeset,))
+
+    del self[changeset.id]
+    del self._changeset_db[changeset.id]
+
+  def __nonzero__(self):
+    """Instances are considered True iff they contain any nodes."""
+
+    return bool(self.nodes)
+
+  def __contains__(self, id):
+    """Return True if the specified ID is contained in this graph."""
+
+    return id in self.nodes
+
+  def __getitem__(self, id):
+    return self.nodes[id]
+
+  def get(self, id):
+    return self.nodes.get(id)
+
+  def __delitem__(self, id):
+    """Remove the node corresponding to ID.
+
+    Also remove references to it from other nodes.  This method does
+    not change pred_ids or succ_ids of the node being deleted, nor
+    does it affect the databases."""
+
+    node = self[id]
+
+    for succ_id in node.succ_ids:
+      succ = self[succ_id]
+      succ.pred_ids.remove(node.id)
+
+    for pred_id in node.pred_ids:
+      pred = self[pred_id]
+      pred.succ_ids.remove(node.id)
+
+    del self.nodes[node.id]
+
+  def keys(self):
+    return self.nodes.keys()
+
+  def __iter__(self):
+    return self.nodes.itervalues()
+
+  def _get_path(self, reachable_changesets, starting_node_id, ending_node_id):
+    """Return the shortest path from ENDING_NODE_ID to STARTING_NODE_ID.
+
+    Find a path from ENDING_NODE_ID to STARTING_NODE_ID in
+    REACHABLE_CHANGESETS, where STARTING_NODE_ID is the id of a
+    changeset that depends on the changeset with ENDING_NODE_ID.  (See
+    the comment in search_for_path() for a description of the format
+    of REACHABLE_CHANGESETS.)
+
+    Return a list of changesets, where the 0th one has ENDING_NODE_ID
+    and the last one has STARTING_NODE_ID.  If there is no such path
+    described in in REACHABLE_CHANGESETS, return None."""
+
+    if ending_node_id not in reachable_changesets:
+      return None
+
+    path = [self._changeset_db[ending_node_id]]
+    id = reachable_changesets[ending_node_id][1]
+    while id != starting_node_id:
+      path.append(self._changeset_db[id])
+      id = reachable_changesets[id][1]
+    path.append(self._changeset_db[starting_node_id])
+    return path
+
+  def search_for_path(self, starting_node_id, stop_set):
+    """Search for paths to prerequisites of STARTING_NODE_ID.
+
+    Try to find the shortest dependency path that causes the changeset
+    with STARTING_NODE_ID to depend (directly or indirectly) on one of
+    the changesets whose ids are contained in STOP_SET.
+
+    We consider direct and indirect dependencies in the sense that the
+    changeset can be reached by following a chain of predecessor nodes.
+
+    When one of the changeset_ids in STOP_SET is found, terminate the
+    search and return the path from that changeset_id to
+    STARTING_NODE_ID.  If no path is found to a node in STOP_SET,
+    return None."""
+
+    # A map {node_id : (steps, next_node_id)} where NODE_ID can be
+    # reached from STARTING_NODE_ID in STEPS steps, and NEXT_NODE_ID
+    # is the id of the previous node in the path.  STARTING_NODE_ID is
+    # only included as a key if there is a loop leading back to it.
+    reachable_changesets = {}
+
+    # A list of (node_id, steps) that still have to be investigated,
+    # and STEPS is the number of steps to get to NODE_ID.
+    open_nodes = [(starting_node_id, 0)]
+    # A breadth-first search:
+    while open_nodes:
+      (id, steps) = open_nodes.pop(0)
+      steps += 1
+      node = self[id]
+      for pred_id in node.pred_ids:
+        # Since the search is breadth-first, we only have to set steps
+        # that don't already exist.
+        if pred_id not in reachable_changesets:
+          reachable_changesets[pred_id] = (steps, id)
+          open_nodes.append((pred_id, steps))
+
+          # See if we can stop now:
+          if pred_id in stop_set:
+            return self._get_path(
+                reachable_changesets, starting_node_id, pred_id
+                )
+
+    return None
+
+  def consume_nopred_nodes(self):
+    """Remove and yield changesets in dependency order.
+
+    Each iteration, this generator yields a (changeset, time_range)
+    tuple for the oldest changeset in the graph that doesn't have any
+    predecessor nodes (i.e., it is ready to be committed).  This is
+    continued until there are no more nodes without predecessors
+    (either because the graph has been emptied, or because of cycles
+    in the graph).
+
+    Among the changesets that are ready to be processed, the earliest
+    one (according to the sorting of the TimeRange class) is yielded
+    each time.  (This is the order in which the changesets should be
+    committed.)
+
+    The graph should not be otherwise altered while this generator is
+    running."""
+
+    # Find a list of (node,changeset,) where the node has no
+    # predecessors:
+    nopred_nodes = _NoPredNodes(self._changeset_db)
+    for node in self.nodes.itervalues():
+      if not node.pred_ids:
+        nopred_nodes.add(node)
+
+    while nopred_nodes:
+      (node, changeset,) = nopred_nodes.get()
+      del self[node.id]
+      # See if any successors are now ready for extraction:
+      for succ_id in node.succ_ids:
+        succ = self[succ_id]
+        if not succ.pred_ids:
+          nopred_nodes.add(succ)
+      yield (changeset, node.time_range)
+
+  def find_cycle(self, starting_node_id):
+    """Find a cycle in the dependency graph and return it.
+
+    Use STARTING_NODE_ID as the place to start looking.  This routine
+    must only be called after all nopred_nodes have been removed.
+    Return the list of changesets that are involved in the cycle
+    (ordered such that cycle[n-1] is a predecessor of cycle[n] and
+    cycle[-1] is a predecessor of cycle[0])."""
+
+    # Since there are no nopred nodes in the graph, all nodes in the
+    # graph must either be involved in a cycle or depend (directly or
+    # indirectly) on nodes that are in a cycle.
+
+    # Pick an arbitrary node:
+    node = self[starting_node_id]
+
+    seen_nodes = [node]
+
+    # Follow it backwards until a node is seen a second time; then we
+    # have our cycle.
+    while True:
+      # Pick an arbitrary predecessor of node.  It must exist, because
+      # there are no nopred nodes:
+      try:
+        node_id = node.pred_ids.__iter__().next()
+      except StopIteration:
+        raise NoPredNodeInGraphException(node)
+      node = self[node_id]
+      try:
+        i = seen_nodes.index(node)
+      except ValueError:
+        seen_nodes.append(node)
+      else:
+        seen_nodes = seen_nodes[i:]
+        seen_nodes.reverse()
+        return [self._changeset_db[node.id] for node in seen_nodes]
+
+  def consume_graph(self, cycle_breaker=None):
+    """Remove and yield changesets from this graph in dependency order.
+
+    Each iteration, this generator yields a (changeset, time_range)
+    tuple for the oldest changeset in the graph that doesn't have any
+    predecessor nodes.  If CYCLE_BREAKER is specified, then call
+    CYCLE_BREAKER(cycle) whenever a cycle is encountered, where cycle
+    is the list of changesets that are involved in the cycle (ordered
+    such that cycle[n-1] is a predecessor of cycle[n] and cycle[-1] is
+    a predecessor of cycle[0]).  CYCLE_BREAKER should break the cycle
+    in place then return.
+
+    If a cycle is found and CYCLE_BREAKER was not specified, raise
+    CycleInGraphException."""
+
+    while True:
+      for (changeset, time_range) in self.consume_nopred_nodes():
+        yield (changeset, time_range)
+
+      # If there are any nodes left in the graph, then there must be
+      # at least one cycle.  Find a cycle and process it.
+
+      # This might raise StopIteration, but that indicates that the
+      # graph has been fully consumed, so we just let the exception
+      # escape.
+      start_node_id = self.nodes.iterkeys().next()
+
+      cycle = self.find_cycle(start_node_id)
+
+      if cycle_breaker is not None:
+        cycle_breaker(cycle)
+      else:
+        raise CycleInGraphException(cycle)
+
+  def __repr__(self):
+    """For convenience only.  The format is subject to change at any time."""
+
+    if self.nodes:
+      return 'ChangesetGraph:\n%s' \
+             % ''.join(['  %r\n' % node for node in self])
+    else:
+      return 'ChangesetGraph:\n  EMPTY\n'
+
+  node_colors = {
+      RevisionChangeset : 'lightgreen',
+      OrderedChangeset : 'cyan',
+      BranchChangeset : 'orange',
+      TagChangeset : 'yellow',
+      }
+
+  def output_coarse_dot(self, f):
+    """Output the graph in DOT format to file-like object f.
+
+    Such a file can be rendered into a visual representation of the
+    graph using tools like graphviz.  Include only changesets in the
+    graph, and the dependencies between changesets."""
+
+    f.write('digraph G {\n')
+    for node in self:
+      f.write(
+          '  C%x [style=filled, fillcolor=%s];\n' % (
+              node.id,
+              self.node_colors[self._changeset_db[node.id].__class__],
+              )
+          )
+    f.write('\n')
+
+    for node in self:
+      for succ_id in node.succ_ids:
+        f.write('  C%x -> C%x\n' % (node.id, succ_id,))
+      f.write('\n')
+
+    f.write('}\n')
+
+  def output_fine_dot(self, f):
+    """Output the graph in DOT format to file-like object f.
+
+    Such a file can be rendered into a visual representation of the
+    graph using tools like graphviz.  Include all CVSItems and the
+    CVSItem-CVSItem dependencies in the graph.  Group the CVSItems
+    into clusters by changeset."""
+
+    f.write('digraph G {\n')
+    for node in self:
+      f.write('  subgraph cluster_%x {\n' % (node.id,))
+      f.write('    label = "C%x";\n' % (node.id,))
+      changeset = self._changeset_db[node.id]
+      for item_id in changeset.cvs_item_ids:
+        f.write('    I%x;\n' % (item_id,))
+      f.write('    style=filled;\n')
+      f.write(
+          '    fillcolor=%s;\n'
+          % (self.node_colors[self._changeset_db[node.id].__class__],))
+      f.write('  }\n\n')
+
+    for node in self:
+      changeset = self._changeset_db[node.id]
+      for cvs_item in changeset.iter_cvs_items():
+        for succ_id in cvs_item.get_succ_ids():
+          f.write('  I%x -> I%x;\n' % (cvs_item.id, succ_id,))
+
+      f.write('\n')
+
+    f.write('}\n')
+
+
diff --git a/cvs2svn_lib/changeset_graph_link.py b/cvs2svn_lib/changeset_graph_link.py
new file mode 100644
index 0000000..9d0cc9d
--- /dev/null
+++ b/cvs2svn_lib/changeset_graph_link.py
@@ -0,0 +1,149 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2006-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Keep track of counts of different types of changeset links."""
+
+
+
+# A cvs_item doesn't depend on any cvs_items in either pred or succ:
+LINK_NONE = 0
+
+# A cvs_item depends on one or more cvs_items in pred but none in succ:
+LINK_PRED = 1
+
+# A cvs_item depends on one or more cvs_items in succ but none in pred:
+LINK_SUCC = 2
+
+# A cvs_item depends on one or more cvs_items in both pred and succ:
+LINK_PASSTHRU = LINK_PRED | LINK_SUCC
+
+
+class ChangesetGraphLink(object):
+  def __init__(self, pred, changeset, succ):
+    """Represent a link in a loop in a changeset graph.
+
+    This is the link that goes from PRED -> CHANGESET -> SUCC.
+
+    We are mainly concerned with how many CVSItems have LINK_PRED,
+    LINK_SUCC, and LINK_PASSTHRU type links to the neighboring
+    commitsets.  If necessary, this class can also break up CHANGESET
+    into multiple changesets."""
+
+    self.pred = pred
+    self.pred_ids = set(pred.cvs_item_ids)
+
+    self.changeset = changeset
+
+    self.succ_ids = set(succ.cvs_item_ids)
+    self.succ = succ
+
+    # A count of each type of link for cvs_items in changeset
+    # (indexed by LINK_* constants):
+    link_counts = [0] * 4
+
+    for cvs_item in list(changeset.iter_cvs_items()):
+      link_counts[self.get_link_type(cvs_item)] += 1
+
+    [self.pred_links, self.succ_links, self.passthru_links] = link_counts[1:]
+
+  def get_link_type(self, cvs_item):
+    """Return the type of links from CVS_ITEM to self.PRED and self.SUCC.
+
+    The return value is one of LINK_NONE, LINK_PRED, LINK_SUCC, or
+    LINK_PASSTHRU."""
+
+    retval = LINK_NONE
+
+    if cvs_item.get_pred_ids() & self.pred_ids:
+      retval |= LINK_PRED
+    if cvs_item.get_succ_ids() & self.succ_ids:
+      retval |= LINK_SUCC
+
+    return retval
+
+  def get_links_to_move(self):
+    """Return the number of items that would be moved to split changeset."""
+
+    return min(self.pred_links, self.succ_links) \
+           or max(self.pred_links, self.succ_links)
+
+  def is_breakable(self):
+    """Return True iff breaking the changeset will do any good."""
+
+    return self.pred_links != 0 or self.succ_links != 0
+
+  def __cmp__(self, other):
+    """Compare SELF with OTHER in terms of which would be better to break.
+
+    The one that is better to break is considered the lesser."""
+
+    return (
+        - cmp(int(self.is_breakable()), int(other.is_breakable()))
+        or cmp(self.passthru_links, other.passthru_links)
+        or cmp(self.get_links_to_move(), other.get_links_to_move())
+        )
+
+  def break_changeset(self, changeset_key_generator):
+    """Break up self.changeset and return the fragments.
+
+    Break it up in such a way that the link is weakened as efficiently
+    as possible."""
+
+    if not self.is_breakable():
+      raise ValueError('Changeset is not breakable: %r' % self.changeset)
+
+    pred_items = []
+    succ_items = []
+
+    # For each link type, should such CVSItems be moved to the
+    # changeset containing the predecessor items or the one containing
+    # the successor items?
+    destination = {
+        LINK_PRED : pred_items,
+        LINK_SUCC : succ_items,
+        }
+
+    if self.pred_links == 0:
+      destination[LINK_NONE] = pred_items
+      destination[LINK_PASSTHRU] = pred_items
+    elif self.succ_links == 0:
+      destination[LINK_NONE] = succ_items
+      destination[LINK_PASSTHRU] = succ_items
+    elif self.pred_links < self.succ_links:
+      destination[LINK_NONE] = succ_items
+      destination[LINK_PASSTHRU] = succ_items
+    else:
+      destination[LINK_NONE] = pred_items
+      destination[LINK_PASSTHRU] = pred_items
+
+    for cvs_item in self.changeset.iter_cvs_items():
+      link_type = self.get_link_type(cvs_item)
+      destination[link_type].append(cvs_item.id)
+
+    # Create new changesets of the same type as the old one:
+    return [
+        self.changeset.create_split_changeset(
+            changeset_key_generator.gen_id(), pred_items),
+        self.changeset.create_split_changeset(
+            changeset_key_generator.gen_id(), succ_items),
+        ]
+
+  def __str__(self):
+    return 'Link<%x>(%d, %d, %d)' % (
+        self.changeset.id,
+        self.pred_links, self.succ_links, self.passthru_links)
+
+
diff --git a/cvs2svn_lib/changeset_graph_node.py b/cvs2svn_lib/changeset_graph_node.py
new file mode 100644
index 0000000..cbbebd7
--- /dev/null
+++ b/cvs2svn_lib/changeset_graph_node.py
@@ -0,0 +1,50 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2006-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""A node in the changeset dependency graph."""
+
+
+class ChangesetGraphNode(object):
+  """A node in the changeset dependency graph."""
+
+  __slots__ = ['id', 'time_range', 'pred_ids', 'succ_ids']
+
+  def __init__(self, changeset, time_range, pred_ids, succ_ids):
+    # The id of the ChangesetGraphNode is the same as the id of the
+    # changeset.
+    self.id = changeset.id
+
+    # The range of times of CVSItems within this Changeset.
+    self.time_range = time_range
+
+    # The set of changeset ids of changesets that are direct
+    # predecessors of this one.
+    self.pred_ids = pred_ids
+
+    # The set of changeset ids of changesets that are direct
+    # successors of this one.
+    self.succ_ids = succ_ids
+
+  def __repr__(self):
+    """For convenience only.  The format is subject to change at any time."""
+
+    return '%x; pred=[%s]; succ=[%s]' % (
+        self.id,
+        ','.join(['%x' % id for id in self.pred_ids]),
+        ','.join(['%x' % id for id in self.succ_ids]),
+        )
+
+
diff --git a/cvs2svn_lib/check_dependencies_pass.py b/cvs2svn_lib/check_dependencies_pass.py
new file mode 100644
index 0000000..172c264
--- /dev/null
+++ b/cvs2svn_lib/check_dependencies_pass.py
@@ -0,0 +1,144 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module defines some passes that can be used for debugging cv2svn."""
+
+
+from cvs2svn_lib import config
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.common import FatalException
+from cvs2svn_lib.common import DB_OPEN_READ
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.pass_manager import Pass
+from cvs2svn_lib.project import read_projects
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.cvs_file_database import CVSFileDatabase
+from cvs2svn_lib.symbol_database import SymbolDatabase
+from cvs2svn_lib.cvs_item_database import OldCVSItemStore
+from cvs2svn_lib.cvs_item_database import IndexedCVSItemStore
+
+
+class CheckDependenciesPass(Pass):
+  """Check that the dependencies are self-consistent."""
+
+  def __init__(self):
+    Pass.__init__(self)
+
+  def register_artifacts(self):
+    self._register_temp_file_needed(config.PROJECTS)
+    self._register_temp_file_needed(config.SYMBOL_DB)
+    self._register_temp_file_needed(config.CVS_FILES_DB)
+
+  def iter_cvs_items(self):
+    raise NotImplementedError()
+
+  def get_cvs_item(self, item_id):
+    raise NotImplementedError()
+
+  def run(self, run_options, stats_keeper):
+    Ctx()._projects = read_projects(
+        artifact_manager.get_temp_file(config.PROJECTS)
+        )
+    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
+    self.symbol_db = SymbolDatabase()
+    Ctx()._symbol_db = self.symbol_db
+
+    Log().quiet("Checking dependency consistency...")
+
+    fatal_errors = []
+    for cvs_item in self.iter_cvs_items():
+      # Check that the pred_ids and succ_ids are mutually consistent:
+      for pred_id in cvs_item.get_pred_ids():
+        pred = self.get_cvs_item(pred_id)
+        if not cvs_item.id in pred.get_succ_ids():
+          fatal_errors.append(
+              '%s lists pred=%s, but not vice versa.' % (cvs_item, pred,))
+
+      for succ_id in cvs_item.get_succ_ids():
+        succ = self.get_cvs_item(succ_id)
+        if not cvs_item.id in succ.get_pred_ids():
+          fatal_errors.append(
+              '%s lists succ=%s, but not vice versa.' % (cvs_item, succ,))
+
+    if fatal_errors:
+      raise FatalException(
+          'Dependencies inconsistent:\n'
+          '%s\n'
+          'Exited due to fatal error(s).'
+          % ('\n'.join(fatal_errors),)
+          )
+
+    self.symbol_db.close()
+    self.symbol_db = None
+    Ctx()._cvs_file_db.close()
+    Log().quiet("Done")
+
+
+class CheckItemStoreDependenciesPass(CheckDependenciesPass):
+  def __init__(self, cvs_items_store_file):
+    CheckDependenciesPass.__init__(self)
+    self.cvs_items_store_file = cvs_items_store_file
+
+  def register_artifacts(self):
+    CheckDependenciesPass.register_artifacts(self)
+    self._register_temp_file_needed(self.cvs_items_store_file)
+
+  def iter_cvs_items(self):
+    cvs_item_store = OldCVSItemStore(
+        artifact_manager.get_temp_file(self.cvs_items_store_file))
+
+    for cvs_file_items in cvs_item_store.iter_cvs_file_items():
+      self.current_cvs_file_items = cvs_file_items
+      for cvs_item in cvs_file_items.values():
+        yield cvs_item
+
+    del self.current_cvs_file_items
+
+    cvs_item_store.close()
+
+  def get_cvs_item(self, item_id):
+    return self.current_cvs_file_items[item_id]
+
+
+class CheckIndexedItemStoreDependenciesPass(CheckDependenciesPass):
+  def __init__(self, cvs_items_store_file, cvs_items_store_index_file):
+    CheckDependenciesPass.__init__(self)
+    self.cvs_items_store_file = cvs_items_store_file
+    self.cvs_items_store_index_file = cvs_items_store_index_file
+
+  def register_artifacts(self):
+    CheckDependenciesPass.register_artifacts(self)
+    self._register_temp_file_needed(self.cvs_items_store_file)
+    self._register_temp_file_needed(self.cvs_items_store_index_file)
+
+  def iter_cvs_items(self):
+    return self.cvs_item_store.itervalues()
+
+  def get_cvs_item(self, item_id):
+    return self.cvs_item_store[item_id]
+
+  def run(self, run_options, stats_keeper):
+    self.cvs_item_store = IndexedCVSItemStore(
+        artifact_manager.get_temp_file(self.cvs_items_store_file),
+        artifact_manager.get_temp_file(self.cvs_items_store_index_file),
+        DB_OPEN_READ)
+
+    CheckDependenciesPass.run(self, run_options, stats_keeper)
+
+    self.cvs_item_store.close()
+    self.cvs_item_store = None
+
+
diff --git a/cvs2svn_lib/checkout_internal.py b/cvs2svn_lib/checkout_internal.py
new file mode 100644
index 0000000..fe28e0c
--- /dev/null
+++ b/cvs2svn_lib/checkout_internal.py
@@ -0,0 +1,778 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2007-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes that implement the --use-internal-co option.
+
+The idea is to patch up the revisions' contents incrementally, thus
+avoiding the huge number of process spawns and the O(n^2) overhead of
+using 'co' and 'cvs'.
+
+InternalRevisionRecorder saves the RCS deltas and RCS revision trees
+to databases.  Notably, deltas from the trunk need to be reversed, as
+CVS stores them so they apply from HEAD backwards.
+
+InternalRevisionExcluder copies the revision trees to a new database,
+omitting excluded branches.
+
+InternalRevisionReader produces the revisions' contents on demand.  To
+generate the text for a typical revision, we need the revision's delta
+text plus the fulltext of the previous revision.  Therefore, we
+maintain a checkout database containing a copy of the fulltext of any
+revision for which subsequent revisions still need to be retrieved.
+It is crucial to remove text from this database as soon as it is no
+longer needed, to prevent it from growing enormous.
+
+There are two reasons that the text from a revision can be needed: (1)
+because the revision itself still needs to be output to a dumpfile;
+(2) because another revision needs it as the base of its delta.  We
+maintain a reference count for each revision, which includes *both*
+possibilities.  The first time a revision's text is needed, it is
+generated by applying the revision's deltatext to the previous
+revision's fulltext, and the resulting fulltext is stored in the
+checkout database.  Each time a revision's fulltext is retrieved, its
+reference count is decremented.  When the reference count goes to
+zero, then the fulltext is deleted from the checkout database.
+
+The administrative data for managing this consists of one TextRecord
+entry for each revision.  Each TextRecord has an id, which is the same
+id as used for the corresponding CVSRevision instance.  It also
+maintains a count of the times it is expected to be retrieved.
+TextRecords come in several varieties:
+
+FullTextRecord -- Used for revisions whose fulltext is contained
+    directly in the RCS file, and therefore available during
+    CollectRevsPass (i.e., typically revision 1.1 of each file).
+
+DeltaTextRecord -- Used for revisions that are defined via a delta
+    relative to some other TextRecord.  These records record the id of
+    the TextRecord that holds the base text against which the delta is
+    defined.  When the text for a DeltaTextRecord is retrieved, the
+    DeltaTextRecord instance is deleted and a CheckedOutTextRecord
+    instance is created to take its place.
+
+CheckedOutTextRecord -- Used during OutputPass for a revision that
+    started out as a DeltaTextRecord, but has already been retrieved
+    (and therefore its fulltext is stored in the checkout database).
+
+While a file is being processed during CollectRevsPass, the fulltext
+and deltas are stored to the delta database, and TextRecord instances
+are created to keep track of things.  The reference counts are all
+initialized to zero.
+
+After CollectRevsPass has done any preliminary tree mangling, its
+_FileDataCollector.parse_completed(), method calls
+RevisionRecorder.finish_file(), passing it the CVSFileItems instance
+that describes the revisions in the file.  At this point the reference
+counts for the file's TextRecords are updated: each record referred to
+by a delta has its refcount incremented, and each record that
+corresponds to a non-delete CVSRevision is incremented.  After that,
+any records with refcount==0 are removed.  When one record is removed,
+that can cause another record's reference count to go to zero and be
+removed too, recursively.  When a TextRecord is deleted at this stage,
+its deltatext is also deleted from the delta database.
+
+In FilterSymbolsPass, the exact same procedure (described in the
+previous paragraph) is repeated, but this time using the CVSFileItems
+after it has been updated for excluded symbols, symbol
+preferred-parent grafting, etc."""
+
+
+import cStringIO
+import re
+import time
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import DB_OPEN_NEW
+from cvs2svn_lib.common import DB_OPEN_READ
+from cvs2svn_lib.common import warning_prefix
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.symbol import Trunk
+from cvs2svn_lib.cvs_item import CVSRevisionModification
+from cvs2svn_lib.database import Database
+from cvs2svn_lib.database import IndexedDatabase
+from cvs2svn_lib.rcs_stream import RCSStream
+from cvs2svn_lib.rcs_stream import MalformedDeltaException
+from cvs2svn_lib.revision_manager import RevisionRecorder
+from cvs2svn_lib.revision_manager import RevisionExcluder
+from cvs2svn_lib.revision_manager import RevisionReader
+from cvs2svn_lib.serializer import MarshalSerializer
+from cvs2svn_lib.serializer import CompressingSerializer
+from cvs2svn_lib.serializer import PrimedPickleSerializer
+
+
+class TextRecord(object):
+  """Bookkeeping data for the text of a single CVSRevision."""
+
+  __slots__ = ['id', 'refcount']
+
+  def __init__(self, id):
+    # The cvs_rev_id of the revision whose text this is.
+    self.id = id
+
+    # The number of times that the text of this revision will be
+    # retrieved.
+    self.refcount = 0
+
+  def __getstate__(self):
+    return (self.id, self.refcount,)
+
+  def __setstate__(self, state):
+    (self.id, self.refcount,) = state
+
+  def increment_dependency_refcounts(self, text_record_db):
+    """Increment the refcounts of any records that this one depends on."""
+
+    pass
+
+  def decrement_refcount(self, text_record_db):
+    """Decrement the number of times our text still has to be checked out.
+
+    If the reference count goes to zero, call discard()."""
+
+    self.refcount -= 1
+    if self.refcount == 0:
+      text_record_db.discard(self.id)
+
+  def checkout(self, text_record_db):
+    """Workhorse of the checkout process.
+
+    Return the text for this revision, decrement our reference count,
+    and update the databases depending on whether there will be future
+    checkouts."""
+
+    raise NotImplementedError()
+
+  def free(self, text_record_db):
+    """This instance will never again be checked out; free it.
+
+    Also free any associated resources and decrement the refcounts of
+    any other TextRecords that this one depends on."""
+
+    raise NotImplementedError()
+
+
+class FullTextRecord(TextRecord):
+  __slots__ = []
+
+  def __getstate__(self):
+    return (self.id, self.refcount,)
+
+  def __setstate__(self, state):
+    (self.id, self.refcount,) = state
+
+  def checkout(self, text_record_db):
+    text = text_record_db.delta_db[self.id]
+    self.decrement_refcount(text_record_db)
+    return text
+
+  def free(self, text_record_db):
+    del text_record_db.delta_db[self.id]
+
+  def __str__(self):
+    return 'FullTextRecord(%x, %d)' % (self.id, self.refcount,)
+
+
+class DeltaTextRecord(TextRecord):
+  __slots__ = ['pred_id']
+
+  def __init__(self, id, pred_id):
+    TextRecord.__init__(self, id)
+
+    # The cvs_rev_id of the revision relative to which this delta is
+    # defined.
+    self.pred_id = pred_id
+
+  def __getstate__(self):
+    return (self.id, self.refcount, self.pred_id,)
+
+  def __setstate__(self, state):
+    (self.id, self.refcount, self.pred_id,) = state
+
+  def increment_dependency_refcounts(self, text_record_db):
+    text_record_db[self.pred_id].refcount += 1
+
+  def checkout(self, text_record_db):
+    base_text = text_record_db[self.pred_id].checkout(text_record_db)
+    co = RCSStream(base_text)
+    delta_text = text_record_db.delta_db[self.id]
+    co.apply_diff(delta_text)
+    text = co.get_text()
+    del co
+    self.refcount -= 1
+    if self.refcount == 0:
+      # This text will never be needed again; just delete ourselves
+      # without ever having stored the fulltext to the checkout
+      # database:
+      del text_record_db[self.id]
+    else:
+      # Store a new CheckedOutTextRecord in place of ourselves:
+      text_record_db.checkout_db['%x' % self.id] = text
+      new_text_record = CheckedOutTextRecord(self.id)
+      new_text_record.refcount = self.refcount
+      text_record_db.replace(new_text_record)
+    return text
+
+  def free(self, text_record_db):
+    del text_record_db.delta_db[self.id]
+    text_record_db[self.pred_id].decrement_refcount(text_record_db)
+
+  def __str__(self):
+    return 'DeltaTextRecord(%x -> %x, %d)' \
+           % (self.pred_id, self.id, self.refcount,)
+
+
+class CheckedOutTextRecord(TextRecord):
+  __slots__ = []
+
+  def __getstate__(self):
+    return (self.id, self.refcount,)
+
+  def __setstate__(self, state):
+    (self.id, self.refcount,) = state
+
+  def checkout(self, text_record_db):
+    text = text_record_db.checkout_db['%x' % self.id]
+    self.decrement_refcount(text_record_db)
+    return text
+
+  def free(self, text_record_db):
+    del text_record_db.checkout_db['%x' % self.id]
+
+  def __str__(self):
+    return 'CheckedOutTextRecord(%x, %d)' % (self.id, self.refcount,)
+
+
+class NullDatabase(object):
+  """A do-nothing database that can be used with TextRecordDatabase.
+
+  Use this when you don't actually want to allow anything to be
+  deleted."""
+
+  def __delitem__(self, id):
+    pass
+
+
+class TextRecordDatabase:
+  """Holds the TextRecord instances that are currently live.
+
+  During CollectRevsPass and FilterSymbolsPass, files are processed
+  one by one and a new TextRecordDatabase instance is used for each
+  file.  During OutputPass, a single TextRecordDatabase instance is
+  used for the duration of OutputPass; individual records are added
+  and removed when they are active."""
+
+  def __init__(self, delta_db, checkout_db):
+    # A map { cvs_rev_id -> TextRecord }.
+    self.text_records = {}
+
+    # A database-like object using cvs_rev_ids as keys and containing
+    # fulltext/deltatext strings as values.  Its __getitem__() method
+    # is used to retrieve deltas when they are needed, and its
+    # __delitem__() method is used to delete deltas when they can be
+    # freed.  The modifiability of the delta database varies from pass
+    # to pass, so the object stored here varies as well:
+    #
+    # CollectRevsPass: a fully-functional IndexedDatabase.  This
+    #     allows deltas that will not be needed to be deleted.
+    #
+    # FilterSymbolsPass: a NullDatabase.  The delta database cannot be
+    #     modified during this pass, and we have no need to retrieve
+    #     deltas, so we just use a dummy object here.
+    #
+    # OutputPass: a disabled IndexedDatabase.  During this pass we
+    # need to retrieve deltas, but we are not allowed to modify the
+    # delta database.  So we use an IndexedDatabase whose __del__()
+    # method has been disabled to do nothing.
+    self.delta_db = delta_db
+
+    # A database-like object using cvs_rev_ids as keys and containing
+    # fulltext strings as values.  This database is only set during
+    # OutputPass.
+    self.checkout_db = checkout_db
+
+    # If this is set to a list, then the list holds the ids of
+    # text_records that have to be deleted; when discard() is called,
+    # it adds the requested id to the list but does not delete it.  If
+    # this member is set to None, then text_records are deleted
+    # immediately when discard() is called.
+    self.deferred_deletes = None
+
+  def __getstate__(self):
+    return (self.text_records.values(),)
+
+  def __setstate__(self, state):
+    (text_records,) = state
+    self.text_records = {}
+    for text_record in text_records:
+      self.add(text_record)
+    self.delta_db = NullDatabase()
+    self.checkout_db = NullDatabase()
+    self.deferred_deletes = None
+
+  def add(self, text_record):
+    """Add TEXT_RECORD to our database.
+
+    There must not already be a record with the same id."""
+
+    assert not self.text_records.has_key(text_record.id)
+
+    self.text_records[text_record.id] = text_record
+
+  def __getitem__(self, id):
+    return self.text_records[id]
+
+  def __delitem__(self, id):
+    """Free the record with the specified ID."""
+
+    del self.text_records[id]
+
+  def replace(self, text_record):
+    """Store TEXT_RECORD in place of the existing record with the same id.
+
+    Do not do anything with the old record."""
+
+    assert self.text_records.has_key(text_record.id)
+    self.text_records[text_record.id] = text_record
+
+  def discard(self, *ids):
+    """The text records with IDS are no longer needed; discard them.
+
+    This involves calling their free() methods and also removing them
+    from SELF.
+
+    If SELF.deferred_deletes is not None, then the ids to be deleted
+    are added to the list instead of deleted immediately.  This
+    mechanism is to prevent a stack overflow from the avalanche of
+    deletes that can result from deleting a long chain of revisions."""
+
+    if self.deferred_deletes is None:
+      # This is an outer-level delete.
+      self.deferred_deletes = list(ids)
+      while self.deferred_deletes:
+        id = self.deferred_deletes.pop()
+        text_record = self[id]
+        if text_record.refcount != 0:
+          raise InternalError(
+              'TextRecordDatabase.discard(%s) called with refcount = %d'
+              % (text_record, text_record.refcount,)
+              )
+        # This call might cause other text_record ids to be added to
+        # self.deferred_deletes:
+        text_record.free(self)
+        del self[id]
+      self.deferred_deletes = None
+    else:
+      self.deferred_deletes.extend(ids)
+
+  def itervalues(self):
+    return self.text_records.itervalues()
+
+  def recompute_refcounts(self, cvs_file_items):
+    """Recompute the refcounts of the contained TextRecords.
+
+    Use CVS_FILE_ITEMS to determine which records will be needed by
+    cvs2svn."""
+
+    # First clear all of the refcounts:
+    for text_record in self.itervalues():
+      text_record.refcount = 0
+
+    # Now increment the reference count of records that are needed as
+    # the source of another record's deltas:
+    for text_record in self.itervalues():
+      text_record.increment_dependency_refcounts(self.text_records)
+
+    # Now increment the reference count of records that will be needed
+    # by cvs2svn:
+    for lod_items in cvs_file_items.iter_lods():
+      for cvs_rev in lod_items.cvs_revisions:
+        if isinstance(cvs_rev, CVSRevisionModification):
+          self[cvs_rev.id].refcount += 1
+
+  def free_unused(self):
+    """Free any TextRecords whose reference counts are zero."""
+
+    # The deletion of some of these text records might cause others to
+    # be unused, in which case they will be deleted automatically.
+    # But since the initially-unused records are not referred to by
+    # any others, we don't have to be afraid that they will be deleted
+    # before we get to them.  But it *is* crucial that we create the
+    # whole unused list before starting the loop.
+
+    unused = [
+        text_record.id
+        for text_record in self.itervalues()
+        if text_record.refcount == 0
+        ]
+
+    self.discard(*unused)
+
+  def log_leftovers(self):
+    """If any TextRecords still exist, log them."""
+
+    if self.text_records:
+      Log().warn(
+          "%s: internal problem: leftover revisions in the checkout cache:"
+          % warning_prefix)
+      for text_record in self.itervalues():
+        Log().warn('    %s' % (text_record,))
+
+  def __repr__(self):
+    """Debugging output of the current contents of the TextRecordDatabase."""
+
+    retval = ['TextRecordDatabase:']
+    for text_record in self.itervalues():
+      retval.append('    %s' % (text_record,))
+    return '\n'.join(retval)
+
+
+class InternalRevisionRecorder(RevisionRecorder):
+  """A RevisionRecorder that reconstructs the fulltext internally."""
+
+  def __init__(self, compress):
+    RevisionRecorder.__init__(self)
+    self._compress = compress
+
+  def register_artifacts(self, which_pass):
+    artifact_manager.register_temp_file(
+        config.RCS_DELTAS_INDEX_TABLE, which_pass
+        )
+    artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass)
+    artifact_manager.register_temp_file(
+        config.RCS_TREES_INDEX_TABLE, which_pass
+        )
+    artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)
+
+  def start(self):
+    ser = MarshalSerializer()
+    if self._compress:
+      ser = CompressingSerializer(ser)
+    self._rcs_deltas = IndexedDatabase(
+        artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
+        artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
+        DB_OPEN_NEW, ser)
+    primer = (FullTextRecord, DeltaTextRecord)
+    self._rcs_trees = IndexedDatabase(
+        artifact_manager.get_temp_file(config.RCS_TREES_STORE),
+        artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
+        DB_OPEN_NEW, PrimedPickleSerializer(primer))
+
+  def start_file(self, cvs_file_items):
+    self._cvs_file_items = cvs_file_items
+
+    # A map from cvs_rev_id to TextRecord instance:
+    self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase())
+
+  def record_text(self, cvs_rev, log, text):
+    if isinstance(cvs_rev.lod, Trunk):
+      # On trunk, revisions are encountered in reverse order (1.<N>
+      # ... 1.1) and deltas are inverted.  The first text that we see
+      # is the fulltext for the HEAD revision.  After that, the text
+      # corresponding to revision 1.N is the delta (1.<N+1> ->
+      # 1.<N>)).  We have to invert the deltas here so that we can
+      # read the revisions out in dependency order; that is, for
+      # revision 1.1 we want the fulltext, and for revision 1.<N> we
+      # want the delta (1.<N-1> -> 1.<N>).  This means that we can't
+      # compute the delta for a revision until we see its logical
+      # parent.  When we finally see revision 1.1 (which is recognized
+      # because it doesn't have a parent), we can record the diff (1.1
+      # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
+
+      if cvs_rev.next_id is None:
+        # This is HEAD, as fulltext.  Initialize the RCSStream so
+        # that we can compute deltas backwards in time.
+        self._stream = RCSStream(text)
+      else:
+        # Any other trunk revision is a backward delta.  Apply the
+        # delta to the RCSStream to mutate it to the contents of this
+        # revision, and also to get the reverse delta, which we store
+        # as the forward delta of our child revision.
+        try:
+          text = self._stream.invert_diff(text)
+        except MalformedDeltaException, (msg):
+          Log().error('Malformed RCS delta in %s, revision %s: %s'
+                      % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev,
+                         msg))
+          raise RuntimeError
+        text_record = DeltaTextRecord(cvs_rev.next_id, cvs_rev.id)
+        self._writeout(text_record, text)
+
+      if cvs_rev.prev_id is None:
+        # This is revision 1.1.  Write its fulltext:
+        text_record = FullTextRecord(cvs_rev.id)
+        self._writeout(text_record, self._stream.get_text())
+
+        # There will be no more trunk revisions delivered, so free the
+        # RCSStream.
+        del self._stream
+
+    else:
+      # On branches, revisions are encountered in logical order
+      # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
+      # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
+      # <BRANCH>.<N>).  That's what we need, so just store it.
+
+      # FIXME: It would be nice to avoid writing out branch deltas
+      # when --trunk-only.  (They will be deleted when finish_file()
+      # is called, but if the delta db is in an IndexedDatabase the
+      # deletions won't actually recover any disk space.)
+      text_record = DeltaTextRecord(cvs_rev.id, cvs_rev.prev_id)
+      self._writeout(text_record, text)
+
+    return None
+
+  def _writeout(self, text_record, text):
+    self.text_record_db.add(text_record)
+    self._rcs_deltas[text_record.id] = text
+
+  def finish_file(self, cvs_file_items):
+    """Finish processing of the current file.
+
+    Compute the initial text record refcounts, discard any records
+    that are unneeded, and store the text records for the file to the
+    _rcs_trees database."""
+
+    # Delete our copy of the preliminary CVSFileItems:
+    del self._cvs_file_items
+
+    self.text_record_db.recompute_refcounts(cvs_file_items)
+    self.text_record_db.free_unused()
+    self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db
+    del self.text_record_db
+
+  def finish(self):
+    self._rcs_deltas.close()
+    self._rcs_trees.close()
+
+
+class InternalRevisionExcluder(RevisionExcluder):
+  """The RevisionExcluder used by InternalRevisionReader."""
+
+  def register_artifacts(self, which_pass):
+    artifact_manager.register_temp_file_needed(
+        config.RCS_TREES_STORE, which_pass
+        )
+    artifact_manager.register_temp_file_needed(
+        config.RCS_TREES_INDEX_TABLE, which_pass
+        )
+    artifact_manager.register_temp_file(
+        config.RCS_TREES_FILTERED_STORE, which_pass
+        )
+    artifact_manager.register_temp_file(
+        config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass
+        )
+
+  def start(self):
+    self._tree_db = IndexedDatabase(
+        artifact_manager.get_temp_file(config.RCS_TREES_STORE),
+        artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
+        DB_OPEN_READ)
+    primer = (FullTextRecord, DeltaTextRecord)
+    self._new_tree_db = IndexedDatabase(
+        artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE),
+        artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE),
+        DB_OPEN_NEW, PrimedPickleSerializer(primer))
+
+  def process_file(self, cvs_file_items):
+    text_record_db = self._tree_db[cvs_file_items.cvs_file.id]
+    text_record_db.recompute_refcounts(cvs_file_items)
+    text_record_db.free_unused()
+    self._new_tree_db[cvs_file_items.cvs_file.id] = text_record_db
+
+  def finish(self):
+    self._tree_db.close()
+    self._new_tree_db.close()
+
+
+class _KeywordExpander:
+  """A class whose instances provide substitutions for CVS keywords.
+
+  This class is used via its __call__() method, which should be called
+  with a match object representing a match for a CVS keyword string.
+  The method returns the replacement for the matched text.
+
+  The __call__() method works by calling the method with the same name
+  as that of the CVS keyword (converted to lower case).
+
+  Instances of this class can be passed as the REPL argument to
+  re.sub()."""
+
+  date_fmt_old = "%Y/%m/%d %H:%M:%S"    # CVS 1.11, rcs
+  date_fmt_new = "%Y-%m-%d %H:%M:%S"    # CVS 1.12
+
+  date_fmt = date_fmt_new
+
+  @classmethod
+  def use_old_date_format(klass):
+      """Class method to ensure exact compatibility with CVS 1.11
+      output.  Use this if you want to verify your conversion and you're
+      using CVS 1.11."""
+      klass.date_fmt = klass.date_fmt_old
+
+  def __init__(self, cvs_rev):
+    self.cvs_rev = cvs_rev
+
+  def __call__(self, match):
+    return '$%s: %s $' % \
+           (match.group(1), getattr(self, match.group(1).lower())(),)
+
+  def author(self):
+    return Ctx()._metadata_db[self.cvs_rev.metadata_id].original_author
+
+  def date(self):
+    return time.strftime(self.date_fmt,
+                         time.gmtime(self.cvs_rev.timestamp))
+
+  def header(self):
+    return '%s %s %s %s Exp' % \
+           (self.source(), self.cvs_rev.rev, self.date(), self.author())
+
+  def id(self):
+    return '%s %s %s %s Exp' % \
+           (self.rcsfile(), self.cvs_rev.rev, self.date(), self.author())
+
+  def locker(self):
+    # Handle kvl like kv, as a converted repo is supposed to have no
+    # locks.
+    return ''
+
+  def log(self):
+    # Would need some special handling.
+    return 'not supported by cvs2svn'
+
+  def name(self):
+    # Cannot work, as just creating a new symbol does not check out
+    # the revision again.
+    return 'not supported by cvs2svn'
+
+  def rcsfile(self):
+    return self.cvs_rev.cvs_file.basename + ",v"
+
+  def revision(self):
+    return self.cvs_rev.rev
+
+  def source(self):
+    project = self.cvs_rev.cvs_file.project
+    return project.cvs_repository_root + '/' + project.cvs_module + \
+        self.cvs_rev.cvs_file.cvs_path + ",v"
+
+  def state(self):
+    # We check out only live revisions.
+    return 'Exp'
+
+
+class InternalRevisionReader(RevisionReader):
+  """A RevisionReader that reads the contents from an own delta store."""
+
+  _kws = 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State'
+  _kw_re = re.compile(r'\$(' + _kws + r'):[^$\n]*\$')
+  _kwo_re = re.compile(r'\$(' + _kws + r')(:[^$\n]*)?\$')
+
+  def __init__(self, compress):
+    self._compress = compress
+
+  def register_artifacts(self, which_pass):
+    artifact_manager.register_temp_file(config.CVS_CHECKOUT_DB, which_pass)
+    artifact_manager.register_temp_file_needed(
+        config.RCS_DELTAS_STORE, which_pass
+        )
+    artifact_manager.register_temp_file_needed(
+        config.RCS_DELTAS_INDEX_TABLE, which_pass
+        )
+    artifact_manager.register_temp_file_needed(
+        config.RCS_TREES_FILTERED_STORE, which_pass
+        )
+    artifact_manager.register_temp_file_needed(
+        config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass
+        )
+
+  def start(self):
+    self._delta_db = IndexedDatabase(
+        artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
+        artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
+        DB_OPEN_READ)
+    self._delta_db.__delitem__ = lambda id: None
+    self._tree_db = IndexedDatabase(
+        artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE),
+        artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE),
+        DB_OPEN_READ)
+    ser = MarshalSerializer()
+    if self._compress:
+      ser = CompressingSerializer(ser)
+    self._co_db = Database(
+        artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB), DB_OPEN_NEW,
+        ser)
+
+    # The set of CVSFile instances whose TextRecords have already been
+    # read:
+    self._loaded_files = set()
+
+    # A map { CVSFILE : _FileTree } for files that currently have live
+    # revisions:
+    self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db)
+
+  def _get_text_record(self, cvs_rev):
+    """Return the TextRecord instance for CVS_REV.
+
+    If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
+    do so now."""
+
+    if cvs_rev.cvs_file not in self._loaded_files:
+      for text_record in self._tree_db[cvs_rev.cvs_file.id].itervalues():
+        self._text_record_db.add(text_record)
+      self._loaded_files.add(cvs_rev.cvs_file)
+
+    return self._text_record_db[cvs_rev.id]
+
+  def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False):
+    """Check out the text for revision C_REV from the repository.
+
+    Return the text wrapped in a readable file object.  If
+    SUPPRESS_KEYWORD_SUBSTITUTION is True, any RCS keywords will be
+    _un_expanded prior to returning the file content.  Note that $Log$
+    never actually generates a log (which makes test 'requires_cvs()'
+    fail).
+
+    Revisions may be requested in any order, but if they are not
+    requested in dependency order the checkout database will become
+    very large.  Revisions may be skipped.  Each revision may be
+    requested only once."""
+
+    try:
+      text = self._get_text_record(cvs_rev).checkout(self._text_record_db)
+    except MalformedDeltaException, (msg):
+      raise FatalError('Malformed RCS delta in %s, revision %s: %s'
+                       % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg))
+    if cvs_rev.cvs_file.mode != 'b' and cvs_rev.cvs_file.mode != 'o':
+      if suppress_keyword_substitution or cvs_rev.cvs_file.mode == 'k':
+        text = self._kw_re.sub(r'$\1$', text)
+      else:
+        text = self._kwo_re.sub(_KeywordExpander(cvs_rev), text)
+
+    return cStringIO.StringIO(text)
+
+  def finish(self):
+    self._text_record_db.log_leftovers()
+
+    del self._text_record_db
+    self._delta_db.close()
+    self._tree_db.close()
+    self._co_db.close()
+
diff --git a/cvs2svn_lib/collect_data.py b/cvs2svn_lib/collect_data.py
new file mode 100644
index 0000000..160d7b9
--- /dev/null
+++ b/cvs2svn_lib/collect_data.py
@@ -0,0 +1,1431 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Data collection classes.
+
+This module contains the code used to collect data from the CVS
+repository.  It parses *,v files, recording all useful information
+except for the actual file contents (though even the file contents
+might be recorded by the RevisionRecorder if one is configured).
+
+As a *,v file is parsed, the information pertaining to the file is
+accumulated in memory, mostly in _RevisionData, _BranchData, and
+_TagData objects.  When parsing is complete, a final pass is made over
+the data to create some final dependency links, collect statistics,
+etc., then the _*Data objects are converted into CVSItem objects
+(CVSRevision, CVSBranch, and CVSTag respectively) and the CVSItems are
+dumped into databases.
+
+During the data collection, persistent unique ids are allocated to
+many types of objects: CVSFile, Symbol, and CVSItems.  CVSItems are a
+special case.  CVSItem ids are unique across all CVSItem types, and
+the ids are carried over from the corresponding data collection
+objects:
+
+    _RevisionData -> CVSRevision
+
+    _BranchData -> CVSBranch
+
+    _TagData -> CVSTag
+
+In a later pass it is possible to convert tags <-> branches.  But even
+if this occurs, the new branch or tag uses the same id as the old tag
+or branch.
+
+"""
+
+
+import os
+import stat
+import re
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import DB_OPEN_NEW
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import warning_prefix
+from cvs2svn_lib.common import error_prefix
+from cvs2svn_lib.common import IllegalSVNPathError
+from cvs2svn_lib.common import verify_svn_filename_legal
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.project import FileInAndOutOfAtticException
+from cvs2svn_lib.cvs_file import CVSPath
+from cvs2svn_lib.cvs_file import CVSDirectory
+from cvs2svn_lib.cvs_file import CVSFile
+from cvs2svn_lib.symbol import Symbol
+from cvs2svn_lib.symbol import Trunk
+from cvs2svn_lib.cvs_item import CVSRevision
+from cvs2svn_lib.cvs_item import CVSBranch
+from cvs2svn_lib.cvs_item import CVSTag
+from cvs2svn_lib.cvs_item import cvs_revision_type_map
+from cvs2svn_lib.cvs_file_items import VendorBranchError
+from cvs2svn_lib.cvs_file_items import CVSFileItems
+from cvs2svn_lib.key_generator import KeyGenerator
+from cvs2svn_lib.cvs_item_database import NewCVSItemStore
+from cvs2svn_lib.symbol_statistics import SymbolStatisticsCollector
+from cvs2svn_lib.metadata_database import MetadataDatabase
+from cvs2svn_lib.metadata_database import MetadataLogger
+
+import cvs2svn_rcsparse
+
+
+# A regular expression defining "valid" revision numbers (used to
+# check that symbol definitions are reasonable).
+_valid_revision_re = re.compile(r'''
+    ^
+    (?:\d+\.)+          # Digit groups with trailing dots
+    \d+                 # And the last digit group.
+    $
+    ''', re.VERBOSE)
+
+_branch_revision_re = re.compile(r'''
+    ^
+    ((?:\d+\.\d+\.)+)   # A nonzero even number of digit groups w/trailing dot
+    (?:0\.)?            # CVS sticks an extra 0 here; RCS does not
+    (\d+)               # And the last digit group
+    $
+    ''', re.VERBOSE)
+
+
+def rev_tuple(rev):
+  """Return a tuple of integers corresponding to revision number REV.
+
+  For example, if REV is '1.2.3.4', then return (1,2,3,4)."""
+
+  return tuple([int(x) for x in rev.split('.')])
+
+
+def is_trunk_revision(rev):
+  """Return True iff REV is a trunk revision.
+
+  REV is a revision number corresponding to a specific revision (i.e.,
+  not a whole branch)."""
+
+  return rev.count('.') == 1
+
+
+def is_branch_revision_number(rev):
+  """Return True iff REV is a branch revision number.
+
+  REV is a CVS revision number in canonical form (i.e., with zeros
+  removed).  Return True iff it refers to a whole branch, as opposed
+  to a single revision."""
+
+  return rev.count('.') % 2 == 0
+
+
+def is_same_line_of_development(rev1, rev2):
+  """Return True if rev1 and rev2 are on the same line of
+  development (i.e., both on trunk, or both on the same branch);
+  return False otherwise.  Either rev1 or rev2 can be None, in
+  which case automatically return False."""
+
+  if rev1 is None or rev2 is None:
+    return False
+  if rev1.count('.') == 1 and rev2.count('.') == 1:
+    return True
+  if rev1[0:rev1.rfind('.')] == rev2[0:rev2.rfind('.')]:
+    return True
+  return False
+
+
+class _RevisionData:
+  """We track the state of each revision so that in set_revision_info,
+  we can determine if our op is an add/change/delete.  We can do this
+  because in set_revision_info, we'll have all of the _RevisionData
+  for a file at our fingertips, and we need to examine the state of
+  our prev_rev to determine if we're an add or a change.  Without the
+  state of the prev_rev, we are unable to distinguish between an add
+  and a change."""
+
+  def __init__(self, cvs_rev_id, rev, timestamp, author, state):
+    # The id of this revision:
+    self.cvs_rev_id = cvs_rev_id
+    self.rev = rev
+    self.timestamp = timestamp
+    self.author = author
+    self.original_timestamp = timestamp
+    self.state = state
+
+    # If this is the first revision on a branch, then this is the
+    # branch_data of that branch; otherwise it is None.
+    self.parent_branch_data = None
+
+    # The revision number of the parent of this revision along the
+    # same line of development, if any.  For the first revision R on a
+    # branch, we consider the revision from which R sprouted to be the
+    # 'parent'.  If this is the root revision in the file's revision
+    # tree, then this field is None.
+    #
+    # Note that this revision can't be determined arithmetically (due
+    # to cvsadmin -o), which is why this field is necessary.
+    self.parent = None
+
+    # The revision number of the primary child of this revision (the
+    # child along the same line of development), if any; otherwise,
+    # None.
+    self.child = None
+
+    # The _BranchData instances of branches that sprout from this
+    # revision, sorted in ascending order by branch number.  It would
+    # be inconvenient to initialize it here because we would have to
+    # scan through all branches known by the _SymbolDataCollector to
+    # find the ones having us as the parent.  Instead, this
+    # information is filled in by
+    # _FileDataCollector._resolve_dependencies() and sorted by
+    # _FileDataCollector._sort_branches().
+    self.branches_data = []
+
+    # The revision numbers of the first commits on any branches on
+    # which commits occurred.  This dependency is kept explicitly
+    # because otherwise a revision-only topological sort would miss
+    # the dependency that exists via branches_data.
+    self.branches_revs_data = []
+
+    # The _TagData instances of tags that are connected to this
+    # revision.
+    self.tags_data = []
+
+    # A token that may be returned from
+    # RevisionRecorder.record_text().  It can be used by
+    # RevisionReader to obtain the text again.
+    self.revision_recorder_token = None
+
+  def get_first_on_branch_id(self):
+    return self.parent_branch_data and self.parent_branch_data.id
+
+
+class _SymbolData:
+  """Collection area for information about a symbol in a single CVSFile.
+
+  SYMBOL is an instance of Symbol, undifferentiated as a Branch or a
+  Tag regardless of whether self is a _BranchData or a _TagData."""
+
+  def __init__(self, id, symbol):
+    """Initialize an object for SYMBOL."""
+
+    # The unique id that will be used for this particular symbol in
+    # this particular file.  This same id will be used for the CVSItem
+    # that is derived from this instance.
+    self.id = id
+
+    # An instance of Symbol.
+    self.symbol = symbol
+
+
+class _BranchData(_SymbolData):
+  """Collection area for information about a Branch in a single CVSFile."""
+
+  def __init__(self, id, symbol, branch_number):
+    _SymbolData.__init__(self, id, symbol)
+
+    # The branch number (e.g., '1.5.2') of this branch.
+    self.branch_number = branch_number
+
+    # The revision number of the revision from which this branch
+    # sprouts (e.g., '1.5').
+    self.parent = self.branch_number[:self.branch_number.rindex(".")]
+
+    # The revision number of the first commit on this branch, if any
+    # (e.g., '1.5.2.1'); otherwise, None.
+    self.child = None
+
+
+class _TagData(_SymbolData):
+  """Collection area for information about a Tag in a single CVSFile."""
+
+  def __init__(self, id, symbol, rev):
+    _SymbolData.__init__(self, id, symbol)
+
+    # The revision number being tagged (e.g., '1.5.2.3').
+    self.rev = rev
+
+
+class _SymbolDataCollector(object):
+  """Collect information about symbols in a single CVSFile."""
+
+  def __init__(self, fdc, cvs_file):
+    self.fdc = fdc
+    self.cvs_file = cvs_file
+
+    self.pdc = self.fdc.pdc
+    self.collect_data = self.fdc.collect_data
+
+    # A list [(name, revision), ...] of symbols defined in the header
+    # of the file.  The name has already been transformed using the
+    # symbol transform rules.  If the symbol transform rules indicate
+    # that the symbol should be ignored, then it is never added to
+    # this list.  This list is processed then deleted in
+    # process_symbols().
+    self._symbol_defs = []
+
+    # A set containing the transformed names of symbols in this file
+    # (used to detect duplicats during processing of unlabeled
+    # branches):
+    self._defined_symbols = set()
+
+    # Map { branch_number : _BranchData }, where branch_number has an
+    # odd number of digits.
+    self.branches_data = { }
+
+    # Map { revision : [ tag_data ] }, where revision has an even
+    # number of digits, and the value is a list of _TagData objects
+    # for tags that apply to that revision.
+    self.tags_data = { }
+
+  def _add_branch(self, name, branch_number):
+    """Record that BRANCH_NUMBER is the branch number for branch NAME,
+    and derive and record the revision from which NAME sprouts.
+    BRANCH_NUMBER is an RCS branch number with an odd number of
+    components, for example '1.7.2' (never '1.7.0.2').  Return the
+    _BranchData instance (which is usually newly-created)."""
+
+    branch_data = self.branches_data.get(branch_number)
+
+    if branch_data is not None:
+      Log().warn(
+          "%s: in '%s':\n"
+          "   branch '%s' already has name '%s',\n"
+          "   cannot also have name '%s', ignoring the latter\n"
+          % (warning_prefix,
+             self.cvs_file.filename, branch_number,
+             branch_data.symbol.name, name)
+          )
+      return branch_data
+
+    symbol = self.pdc.get_symbol(name)
+    branch_data = _BranchData(
+        self.collect_data.item_key_generator.gen_id(), symbol, branch_number
+        )
+    self.branches_data[branch_number] = branch_data
+    return branch_data
+
+  def _construct_distinct_name(self, name, original_name):
+    """Construct a distinct symbol name from NAME.
+
+    If NAME is distinct, return it.  If it is already used in this
+    file (as determined from its presence in self._defined_symbols),
+    construct and return a new name that is not already used."""
+
+    if name not in self._defined_symbols:
+      return name
+    else:
+      index = 1
+      while True:
+        dup_name = '%s-DUPLICATE-%d' % (name, index,)
+        if dup_name not in self._defined_symbols:
+          self.collect_data.record_fatal_error(
+              "Symbol name '%s' is already used in '%s'.\n"
+              "The unlabeled branch '%s' must be renamed using "
+              "--symbol-transform."
+              % (name, self.cvs_file.filename, original_name,)
+              )
+          return dup_name
+
+  def _add_unlabeled_branch(self, branch_number):
+    original_name = "unlabeled-" + branch_number
+    name = self.transform_symbol(original_name, branch_number)
+    if name is None:
+      self.collect_data.record_fatal_error(
+          "The unlabeled branch '%s' in '%s' contains commits.\n"
+          "It may not be ignored via a symbol transform.  (Use --exclude "
+          "instead.)"
+          % (original_name, self.cvs_file.filename,)
+          )
+      # Retain the original name to allow the conversion to continue:
+      name = original_name
+
+    distinct_name = self._construct_distinct_name(name, original_name)
+    self._defined_symbols.add(distinct_name)
+    return self._add_branch(distinct_name, branch_number)
+
+  def _add_tag(self, name, revision):
+    """Record that tag NAME refers to the specified REVISION."""
+
+    symbol = self.pdc.get_symbol(name)
+    tag_data = _TagData(
+        self.collect_data.item_key_generator.gen_id(), symbol, revision
+        )
+    self.tags_data.setdefault(revision, []).append(tag_data)
+    return tag_data
+
+  def transform_symbol(self, name, revision):
+    """Transform a symbol according to the project's symbol transforms.
+
+    Transform the symbol with the original name NAME and canonicalized
+    revision number REVISION.  Return the new symbol name or None if
+    the symbol should be ignored entirely.
+
+    Log the results of the symbol transform if necessary."""
+
+    old_name = name
+    # Apply any user-defined symbol transforms to the symbol name:
+    name = self.cvs_file.project.transform_symbol(
+        self.cvs_file, name, revision
+        )
+
+    if name is None:
+      # Ignore symbol:
+      self.pdc.log_symbol_transform(old_name, None)
+      Log().verbose(
+          "   symbol '%s'=%s ignored in %s"
+          % (old_name, revision, self.cvs_file.filename,)
+          )
+    else:
+      if name != old_name:
+        self.pdc.log_symbol_transform(old_name, name)
+        Log().verbose(
+            "   symbol '%s'=%s transformed to '%s' in %s"
+            % (old_name, revision, name, self.cvs_file.filename,)
+            )
+
+    return name
+
+  def define_symbol(self, name, revision):
+    """Record a symbol definition for later processing."""
+
+    # Canonicalize the revision number:
+    revision = _branch_revision_re.sub(r'\1\2', revision)
+
+    # Apply any user-defined symbol transforms to the symbol name:
+    name = self.transform_symbol(name, revision)
+
+    if name is not None:
+      # Verify that the revision number is valid:
+      if _valid_revision_re.match(revision):
+        # The revision number is valid; record it for later processing:
+        self._symbol_defs.append( (name, revision) )
+      else:
+        Log().warn(
+            'In %r:\n'
+            '    branch %r references invalid revision %s\n'
+            '    and will be ignored.'
+            % (self.cvs_file.filename, name, revision,)
+            )
+
+  def _eliminate_trivial_duplicate_defs(self, symbol_defs):
+    """Iterate through SYMBOL_DEFS, Removing identical duplicate definitions.
+
+    Duplicate definitions of symbol names have been seen in the wild,
+    and they can also happen when --symbol-transform is used.  If a
+    symbol is defined to the same revision number repeatedly, then
+    ignore all but the last definition."""
+
+    # Make a copy, since we have to iterate through the definitions
+    # twice:
+    symbol_defs = list(symbol_defs)
+
+    # A map { (name, revision) : [index,...] } of the indexes where
+    # symbol definitions name=revision were found:
+    known_definitions = {}
+    for (i, symbol_def) in enumerate(symbol_defs):
+      known_definitions.setdefault(symbol_def, []).append(i)
+
+    # A set of the indexes of entries that have to be removed from
+    # symbol_defs:
+    dup_indexes = set()
+    for ((name, revision), indexes) in known_definitions.iteritems():
+      if len(indexes) > 1:
+        Log().verbose(
+            "in %r:\n"
+            "   symbol %s:%s defined multiple times; ignoring duplicates\n"
+            % (self.cvs_file.filename, name, revision,)
+            )
+        dup_indexes.update(indexes[:-1])
+
+    for (i, symbol_def) in enumerate(symbol_defs):
+      if i not in dup_indexes:
+        yield symbol_def
+
+  def _process_duplicate_defs(self, symbol_defs):
+    """Iterate through SYMBOL_DEFS, processing duplicate names.
+
+    Duplicate definitions of symbol names have been seen in the wild,
+    and they can also happen when --symbol-transform is used.  If a
+    symbol is defined multiple times, then it is a fatal error.  This
+    method should be called after _eliminate_trivial_duplicate_defs()."""
+
+    # Make a copy, since we have to access multiple times:
+    symbol_defs = list(symbol_defs)
+
+    # A map {name : [index,...]} mapping the names of symbols to a
+    # list of their definitions' indexes in symbol_defs:
+    known_symbols = {}
+    for (i, (name, revision)) in enumerate(symbol_defs):
+      known_symbols.setdefault(name, []).append(i)
+
+    known_symbols = known_symbols.items()
+    known_symbols.sort()
+    dup_indexes = set()
+    for (name, indexes) in known_symbols:
+      if len(indexes) > 1:
+        # This symbol was defined multiple times.
+        self.collect_data.record_fatal_error(
+            "Multiple definitions of the symbol '%s' in '%s': %s" % (
+                name, self.cvs_file.filename,
+                ' '.join([symbol_defs[i][1] for i in indexes]),
+                )
+            )
+        # Ignore all but the last definition for now, to allow the
+        # conversion to proceed:
+        dup_indexes.update(indexes[:-1])
+
+    for (i, symbol_def) in enumerate(symbol_defs):
+      if i not in dup_indexes:
+        yield symbol_def
+
+  def _process_symbol(self, name, revision):
+    """Process a symbol called NAME, which is associated with REVISON.
+
+    REVISION is a canonical revision number with zeros removed, for
+    example: '1.7', '1.7.2', or '1.1.1' or '1.1.1.1'.  NAME is a
+    transformed branch or tag name."""
+
+    # Add symbol to our records:
+    if is_branch_revision_number(revision):
+      self._add_branch(name, revision)
+    else:
+      self._add_tag(name, revision)
+
+  def process_symbols(self):
+    """Process the symbol definitions from SELF._symbol_defs."""
+
+    symbol_defs = self._symbol_defs
+    del self._symbol_defs
+
+    symbol_defs = self._eliminate_trivial_duplicate_defs(symbol_defs)
+    symbol_defs = self._process_duplicate_defs(symbol_defs)
+
+    for (name, revision) in symbol_defs:
+      self._defined_symbols.add(name)
+      self._process_symbol(name, revision)
+
+  @staticmethod
+  def rev_to_branch_number(revision):
+    """Return the branch_number of the branch on which REVISION lies.
+
+    REVISION is a branch revision number with an even number of
+    components; for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
+    The return value is the branch number (for example, '1.7.2').
+    Return none iff REVISION is a trunk revision such as '1.2'."""
+
+    if is_trunk_revision(revision):
+      return None
+    return revision[:revision.rindex(".")]
+
+  def rev_to_branch_data(self, revision):
+    """Return the branch_data of the branch on which REVISION lies.
+
+    REVISION must be a branch revision number with an even number of
+    components; for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
+    Raise KeyError iff REVISION is unknown."""
+
+    assert not is_trunk_revision(revision)
+
+    return self.branches_data[self.rev_to_branch_number(revision)]
+
+  def rev_to_lod(self, revision):
+    """Return the line of development on which REVISION lies.
+
+    REVISION must be a revision number with an even number of
+    components.  Raise KeyError iff REVISION is unknown."""
+
+    if is_trunk_revision(revision):
+      return self.pdc.trunk
+    else:
+      return self.rev_to_branch_data(revision).symbol
+
+
+class _FileDataCollector(cvs2svn_rcsparse.Sink):
+  """Class responsible for collecting RCS data for a particular file.
+
+  Any collected data that need to be remembered are stored into the
+  referenced CollectData instance."""
+
+  def __init__(self, pdc, cvs_file):
+    """Create an object that is prepared to receive data for CVS_FILE.
+    CVS_FILE is a CVSFile instance.  COLLECT_DATA is used to store the
+    information collected about the file."""
+
+    self.pdc = pdc
+    self.cvs_file = cvs_file
+
+    self.collect_data = self.pdc.collect_data
+    self.project = self.cvs_file.project
+
+    # A place to store information about the symbols in this file:
+    self.sdc = _SymbolDataCollector(self, self.cvs_file)
+
+    # { revision : _RevisionData instance }
+    self._rev_data = { }
+
+    # Lists [ (parent, child) ] of revision number pairs indicating
+    # that revision child depends on revision parent along the main
+    # line of development.
+    self._primary_dependencies = []
+
+    # If set, this is an RCS branch number -- rcsparse calls this the
+    # "principal branch", but CVS and RCS refer to it as the "default
+    # branch", so that's what we call it, even though the rcsparse API
+    # setter method is still 'set_principal_branch'.
+    self.default_branch = None
+
+    # True iff revision 1.1 of the file appears to have been imported
+    # (as opposed to added normally).
+    self._file_imported = False
+
+  def _get_rev_id(self, revision):
+    if revision is None:
+      return None
+    return self._rev_data[revision].cvs_rev_id
+
+  def set_principal_branch(self, branch):
+    """This is a callback method declared in Sink."""
+
+    if branch.find('.') == -1:
+      # This just sets the default branch to trunk.  Normally this
+      # shouldn't occur, but it has been seen in at least one CVS
+      # repository.  Just ignore it.
+      pass
+    else:
+      self.default_branch = branch
+
+  def set_expansion(self, mode):
+    """This is a callback method declared in Sink."""
+
+    self.cvs_file.mode = mode
+
+  def define_tag(self, name, revision):
+    """Remember the symbol name and revision, but don't process them yet.
+
+    This is a callback method declared in Sink."""
+
+    self.sdc.define_symbol(name, revision)
+
+  def admin_completed(self):
+    """This is a callback method declared in Sink."""
+
+    self.sdc.process_symbols()
+
+  def define_revision(self, revision, timestamp, author, state,
+                      branches, next):
+    """This is a callback method declared in Sink."""
+
+    for branch in branches:
+      try:
+        branch_data = self.sdc.rev_to_branch_data(branch)
+      except KeyError:
+        # Normally we learn about the branches from the branch names
+        # and numbers parsed from the symbolic name header.  But this
+        # must have been an unlabeled branch that slipped through the
+        # net.  Generate a name for it and create a _BranchData record
+        # for it now.
+        branch_data = self.sdc._add_unlabeled_branch(
+            self.sdc.rev_to_branch_number(branch))
+
+      assert branch_data.child is None
+      branch_data.child = branch
+
+    if revision in self._rev_data:
+      # This revision has already been seen.
+      Log().error('File %r contains duplicate definitions of revision %s.'
+                  % (self.cvs_file.filename, revision,))
+      raise RuntimeError
+
+    # Record basic information about the revision:
+    rev_data = _RevisionData(
+        self.collect_data.item_key_generator.gen_id(),
+        revision, int(timestamp), author, state)
+    self._rev_data[revision] = rev_data
+
+    # When on trunk, the RCS 'next' revision number points to what
+    # humans might consider to be the 'previous' revision number.  For
+    # example, 1.3's RCS 'next' is 1.2.
+    #
+    # However, on a branch, the RCS 'next' revision number really does
+    # point to what humans would consider to be the 'next' revision
+    # number.  For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
+    #
+    # In other words, in RCS, 'next' always means "where to find the next
+    # deltatext that you need this revision to retrieve.
+    #
+    # That said, we don't *want* RCS's behavior here, so we determine
+    # whether we're on trunk or a branch and set the dependencies
+    # accordingly.
+    if next:
+      if is_trunk_revision(revision):
+        self._primary_dependencies.append( (next, revision,) )
+      else:
+        self._primary_dependencies.append( (revision, next,) )
+
+  def _resolve_primary_dependencies(self):
+    """Resolve the dependencies listed in self._primary_dependencies."""
+
+    for (parent, child,) in self._primary_dependencies:
+      parent_data = self._rev_data[parent]
+      assert parent_data.child is None
+      parent_data.child = child
+
+      child_data = self._rev_data[child]
+      assert child_data.parent is None
+      child_data.parent = parent
+
+  def _resolve_branch_dependencies(self):
+    """Resolve dependencies involving branches."""
+
+    for branch_data in self.sdc.branches_data.values():
+      # The branch_data's parent has the branch as a child regardless
+      # of whether the branch had any subsequent commits:
+      try:
+        parent_data = self._rev_data[branch_data.parent]
+      except KeyError:
+        Log().warn(
+            'In %r:\n'
+            '    branch %r references non-existing revision %s\n'
+            '    and will be ignored.'
+            % (self.cvs_file.filename, branch_data.symbol.name,
+               branch_data.parent,))
+        del self.sdc.branches_data[branch_data.branch_number]
+      else:
+        parent_data.branches_data.append(branch_data)
+
+        # If the branch has a child (i.e., something was committed on
+        # the branch), then we store a reference to the branch_data
+        # there, define the child's parent to be the branch's parent,
+        # and list the child in the branch parent's branches_revs_data:
+        if branch_data.child is not None:
+          child_data = self._rev_data[branch_data.child]
+          assert child_data.parent_branch_data is None
+          child_data.parent_branch_data = branch_data
+          assert child_data.parent is None
+          child_data.parent = branch_data.parent
+          parent_data.branches_revs_data.append(branch_data.child)
+
+  def _sort_branches(self):
+    """Sort the branches sprouting from each revision in creation order.
+
+    Creation order is taken to be the reverse of the order that they
+    are listed in the symbols part of the RCS file.  (If a branch is
+    created then deleted, a later branch can be assigned the recycled
+    branch number; therefore branch numbers are not an indication of
+    creation order.)"""
+
+    for rev_data in self._rev_data.values():
+      rev_data.branches_data.sort(lambda a, b: - cmp(a.id, b.id))
+
+  def _resolve_tag_dependencies(self):
+    """Resolve dependencies involving tags."""
+
+    for (rev, tag_data_list) in self.sdc.tags_data.items():
+      try:
+        parent_data = self._rev_data[rev]
+      except KeyError:
+        Log().warn(
+            'In %r:\n'
+            '    the following tag(s) reference non-existing revision %s\n'
+            '    and will be ignored:\n'
+            '    %s' % (
+                self.cvs_file.filename, rev,
+                ', '.join([repr(tag_data.symbol.name)
+                           for tag_data in tag_data_list]),))
+        del self.sdc.tags_data[rev]
+      else:
+        for tag_data in tag_data_list:
+          assert tag_data.rev == rev
+          # The tag_data's rev has the tag as a child:
+          parent_data.tags_data.append(tag_data)
+
+  def _determine_operation(self, rev_data):
+    prev_rev_data = self._rev_data.get(rev_data.parent)
+    return cvs_revision_type_map[(
+        rev_data.state != 'dead',
+        prev_rev_data is not None and prev_rev_data.state != 'dead',
+        )]
+
+  def _get_cvs_revision(self, rev_data):
+    """Create and return a CVSRevision for REV_DATA."""
+
+    branch_ids = [
+        branch_data.id
+        for branch_data in rev_data.branches_data
+        ]
+
+    branch_commit_ids = [
+        self._get_rev_id(rev)
+        for rev in rev_data.branches_revs_data
+        ]
+
+    tag_ids = [
+        tag_data.id
+        for tag_data in rev_data.tags_data
+        ]
+
+    revision_type = self._determine_operation(rev_data)
+
+    return revision_type(
+        self._get_rev_id(rev_data.rev), self.cvs_file,
+        rev_data.timestamp, None,
+        self._get_rev_id(rev_data.parent),
+        self._get_rev_id(rev_data.child),
+        rev_data.rev,
+        True,
+        self.sdc.rev_to_lod(rev_data.rev),
+        rev_data.get_first_on_branch_id(),
+        False, None, None,
+        tag_ids, branch_ids, branch_commit_ids,
+        rev_data.revision_recorder_token)
+
+  def _get_cvs_revisions(self):
+    """Generate the CVSRevisions present in this file."""
+
+    for rev_data in self._rev_data.itervalues():
+      yield self._get_cvs_revision(rev_data)
+
+  def _get_cvs_branches(self):
+    """Generate the CVSBranches present in this file."""
+
+    for branch_data in self.sdc.branches_data.values():
+      yield CVSBranch(
+          branch_data.id, self.cvs_file, branch_data.symbol,
+          branch_data.branch_number,
+          self.sdc.rev_to_lod(branch_data.parent),
+          self._get_rev_id(branch_data.parent),
+          self._get_rev_id(branch_data.child),
+          None,
+          )
+
+  def _get_cvs_tags(self):
+    """Generate the CVSTags present in this file."""
+
+    for tags_data in self.sdc.tags_data.values():
+      for tag_data in tags_data:
+        yield CVSTag(
+            tag_data.id, self.cvs_file, tag_data.symbol,
+            self.sdc.rev_to_lod(tag_data.rev),
+            self._get_rev_id(tag_data.rev),
+            None,
+            )
+
+  def tree_completed(self):
+    """The revision tree has been parsed.
+
+    Analyze it for consistency and connect some loose ends.
+
+    This is a callback method declared in Sink."""
+
+    self._resolve_primary_dependencies()
+    self._resolve_branch_dependencies()
+    self._sort_branches()
+    self._resolve_tag_dependencies()
+
+    # Compute the preliminary CVSFileItems for this file:
+    cvs_items = []
+    cvs_items.extend(self._get_cvs_revisions())
+    cvs_items.extend(self._get_cvs_branches())
+    cvs_items.extend(self._get_cvs_tags())
+    self._cvs_file_items = CVSFileItems(
+        self.cvs_file, self.pdc.trunk, cvs_items
+        )
+
+    self._cvs_file_items.check_link_consistency()
+
+    # Tell the revision recorder about the file dependency tree.
+    self.collect_data.revision_recorder.start_file(self._cvs_file_items)
+
+  def set_revision_info(self, revision, log, text):
+    """This is a callback method declared in Sink."""
+
+    rev_data = self._rev_data[revision]
+    cvs_rev = self._cvs_file_items[rev_data.cvs_rev_id]
+
+    if cvs_rev.metadata_id is not None:
+      # Users have reported problems with repositories in which the
+      # deltatext block for revision 1.1 appears twice.  It is not
+      # known whether this results from a CVS/RCS bug, or from botched
+      # hand-editing of the repository.  In any case, empirically, cvs
+      # and rcs both use the first version when checking out data, so
+      # that's what we will do.  (For the record: "cvs log" fails on
+      # such a file; "rlog" prints the log message from the first
+      # block and ignores the second one.)
+      Log().warn(
+          "%s: in '%s':\n"
+          "   Deltatext block for revision %s appeared twice;\n"
+          "   ignoring the second occurrence.\n"
+          % (warning_prefix, self.cvs_file.filename, revision,)
+          )
+      return
+
+    if is_trunk_revision(revision):
+      branch_name = None
+    else:
+      branch_name = self.sdc.rev_to_branch_data(revision).symbol.name
+
+    cvs_rev.metadata_id = self.collect_data.metadata_logger.store(
+        self.project, branch_name, rev_data.author, log
+        )
+    cvs_rev.deltatext_exists = bool(text)
+
+    # If this is revision 1.1, determine whether the file appears to
+    # have been created via 'cvs add' instead of 'cvs import'.  The
+    # test is that the log message CVS uses for 1.1 in imports is
+    # "Initial revision\n" with no period.  (This fact helps determine
+    # whether this file might have had a default branch in the past.)
+    if revision == '1.1':
+      self._file_imported = (log == 'Initial revision\n')
+
+    cvs_rev.revision_recorder_token = \
+        self.collect_data.revision_recorder.record_text(cvs_rev, log, text)
+
+  def parse_completed(self):
+    """Finish the processing of this file.
+
+    This is a callback method declared in Sink."""
+
+    # Make sure that there was an info section for each revision:
+    for cvs_item in self._cvs_file_items.values():
+      if isinstance(cvs_item, CVSRevision) and cvs_item.metadata_id is None:
+        self.collect_data.record_fatal_error(
+            '%r has no deltatext section for revision %s'
+            % (self.cvs_file.filename, cvs_item.rev,)
+            )
+
+  def _process_ntdbrs(self):
+    """Fix up any non-trunk default branch revisions (if present).
+
+    If a non-trunk default branch is determined to have existed, yield
+    the _RevisionData.ids for all revisions that were once non-trunk
+    default revisions, in dependency order.
+
+    There are two cases to handle:
+
+    One case is simple.  The RCS file lists a default branch
+    explicitly in its header, such as '1.1.1'.  In this case, we know
+    that every revision on the vendor branch is to be treated as head
+    of trunk at that point in time.
+
+    But there's also a degenerate case.  The RCS file does not
+    currently have a default branch, yet we can deduce that for some
+    period in the past it probably *did* have one.  For example, the
+    file has vendor revisions 1.1.1.1 -> 1.1.1.96, all of which are
+    dated before 1.2, and then it has 1.1.1.97 -> 1.1.1.100 dated
+    after 1.2.  In this case, we should record 1.1.1.96 as the last
+    vendor revision to have been the head of the default branch.
+
+    If any non-trunk default branch revisions are found:
+
+    - Set their ntdbr members to True.
+
+    - Connect the last one with revision 1.2.
+
+    - Remove revision 1.1 if it is not needed.
+
+    """
+
+    try:
+      if self.default_branch:
+        vendor_cvs_branch_id = self.sdc.branches_data[self.default_branch].id
+        vendor_lod_items = self._cvs_file_items.get_lod_items(
+            self._cvs_file_items[vendor_cvs_branch_id]
+            )
+        if not self._cvs_file_items.process_live_ntdb(vendor_lod_items):
+          return
+      elif self._file_imported:
+        vendor_branch_data = self.sdc.branches_data.get('1.1.1')
+        if vendor_branch_data is None:
+          return
+        else:
+          vendor_lod_items = self._cvs_file_items.get_lod_items(
+              self._cvs_file_items[vendor_branch_data.id]
+              )
+          if not self._cvs_file_items.process_historical_ntdb(
+                vendor_lod_items
+                ):
+            return
+      else:
+        return
+    except VendorBranchError, e:
+      self.collect_data.record_fatal_error(str(e))
+      return
+
+    if self._file_imported:
+      self._cvs_file_items.imported_remove_1_1(vendor_lod_items)
+
+    self._cvs_file_items.check_link_consistency()
+
+  def get_cvs_file_items(self):
+    """Finish up and return a CVSFileItems instance for this file.
+
+    This method must only be called once."""
+
+    self._process_ntdbrs()
+
+    # Break a circular reference loop, allowing the memory for self
+    # and sdc to be freed.
+    del self.sdc
+
+    return self._cvs_file_items
+
+
+class _ProjectDataCollector:
+  def __init__(self, collect_data, project):
+    self.collect_data = collect_data
+    self.project = project
+    self.num_files = 0
+
+    # The Trunk LineOfDevelopment object for this project:
+    self.trunk = Trunk(
+        self.collect_data.symbol_key_generator.gen_id(), self.project
+        )
+    self.project.trunk_id = self.trunk.id
+
+    # This causes a record for self.trunk to spring into existence:
+    self.collect_data.symbol_stats[self.trunk]
+
+    # A map { name -> Symbol } for all known symbols in this project.
+    # The symbols listed here are undifferentiated into Branches and
+    # Tags because the same name might appear as a branch in one file
+    # and a tag in another.
+    self.symbols = {}
+
+    # A map { (old_name, new_name) : count } indicating how many files
+    # were affected by each each symbol name transformation:
+    self.symbol_transform_counts = {}
+
+  def get_symbol(self, name):
+    """Return the Symbol object for the symbol named NAME in this project.
+
+    If such a symbol does not yet exist, allocate a new symbol_id,
+    create a Symbol instance, store it in self.symbols, and return it."""
+
+    symbol = self.symbols.get(name)
+    if symbol is None:
+      symbol = Symbol(
+          self.collect_data.symbol_key_generator.gen_id(),
+          self.project, name)
+      self.symbols[name] = symbol
+    return symbol
+
+  def log_symbol_transform(self, old_name, new_name):
+    """Record that OLD_NAME was transformed to NEW_NAME in one file.
+
+    This information is used to generated a statistical summary of
+    symbol transforms."""
+
+    try:
+      self.symbol_transform_counts[old_name, new_name] += 1
+    except KeyError:
+      self.symbol_transform_counts[old_name, new_name] = 1
+
+  def summarize_symbol_transforms(self):
+    if self.symbol_transform_counts and Log().is_on(Log.NORMAL):
+      log = Log()
+      log.normal('Summary of symbol transforms:')
+      transforms = self.symbol_transform_counts.items()
+      transforms.sort()
+      for ((old_name, new_name), count) in transforms:
+        if new_name is None:
+          log.normal('    "%s" ignored in %d files' % (old_name, count,))
+        else:
+          log.normal(
+              '    "%s" transformed to "%s" in %d files'
+              % (old_name, new_name, count,)
+              )
+
+  def _process_cvs_file_items(self, cvs_file_items):
+    """Process the CVSFileItems from one CVSFile."""
+
+    # Remove CVSRevisionDeletes that are not needed:
+    cvs_file_items.remove_unneeded_deletes(self.collect_data.metadata_db)
+
+    # Remove initial branch deletes that are not needed:
+    cvs_file_items.remove_initial_branch_deletes(
+        self.collect_data.metadata_db
+        )
+
+    # If this is a --trunk-only conversion, discard all branches and
+    # tags, then draft any non-trunk default branch revisions to
+    # trunk:
+    if Ctx().trunk_only:
+      cvs_file_items.exclude_non_trunk()
+
+    self.collect_data.revision_recorder.finish_file(cvs_file_items)
+    self.collect_data.add_cvs_file_items(cvs_file_items)
+    self.collect_data.symbol_stats.register(cvs_file_items)
+
+  def process_file(self, cvs_file):
+    Log().normal(cvs_file.filename)
+    fdc = _FileDataCollector(self, cvs_file)
+    try:
+      cvs2svn_rcsparse.parse(open(cvs_file.filename, 'rb'), fdc)
+    except (cvs2svn_rcsparse.common.RCSParseError, ValueError, RuntimeError):
+      self.collect_data.record_fatal_error(
+          "%r is not a valid ,v file" % (cvs_file.filename,)
+          )
+      # Abort the processing of this file, but let the pass continue
+      # with other files:
+      return
+    except:
+      Log().warn("Exception occurred while parsing %s" % cvs_file.filename)
+      raise
+    else:
+      self.num_files += 1
+
+    cvs_file_items = fdc.get_cvs_file_items()
+
+    del fdc
+
+    self._process_cvs_file_items(cvs_file_items)
+
+
+class CollectData:
+  """Repository for data collected by parsing the CVS repository files.
+
+  This class manages the databases into which information collected
+  from the CVS repository is stored.  The data are stored into this
+  class by _FileDataCollector instances, one of which is created for
+  each file to be parsed."""
+
+  def __init__(self, revision_recorder, stats_keeper):
+    self.revision_recorder = revision_recorder
+    self._cvs_item_store = NewCVSItemStore(
+        artifact_manager.get_temp_file(config.CVS_ITEMS_STORE))
+    self.metadata_db = MetadataDatabase(
+        artifact_manager.get_temp_file(config.METADATA_STORE),
+        artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE),
+        DB_OPEN_NEW,
+        )
+    self.metadata_logger = MetadataLogger(self.metadata_db)
+    self.fatal_errors = []
+    self.num_files = 0
+    self.symbol_stats = SymbolStatisticsCollector()
+    self.stats_keeper = stats_keeper
+
+    # Key generator for CVSFiles:
+    self.file_key_generator = KeyGenerator()
+
+    # Key generator for CVSItems:
+    self.item_key_generator = KeyGenerator()
+
+    # Key generator for Symbols:
+    self.symbol_key_generator = KeyGenerator()
+
+    self.revision_recorder.start()
+
+  def record_fatal_error(self, err):
+    """Record that fatal error ERR was found.
+
+    ERR is a string (without trailing newline) describing the error.
+    Output the error to stderr immediately, and record a copy to be
+    output again in a summary at the end of CollectRevsPass."""
+
+    err = '%s: %s' % (error_prefix, err,)
+    Log().error(err + '\n')
+    self.fatal_errors.append(err)
+
+  def add_cvs_directory(self, cvs_directory):
+    """Record CVS_DIRECTORY."""
+
+    Ctx()._cvs_file_db.log_file(cvs_directory)
+
+  def add_cvs_file_items(self, cvs_file_items):
+    """Record the information from CVS_FILE_ITEMS.
+
+    Store the CVSFile to _cvs_file_db under its persistent id, store
+    the CVSItems, and record the CVSItems to self.stats_keeper."""
+
+    Ctx()._cvs_file_db.log_file(cvs_file_items.cvs_file)
+    self._cvs_item_store.add(cvs_file_items)
+
+    self.stats_keeper.record_cvs_file(cvs_file_items.cvs_file)
+    for cvs_item in cvs_file_items.values():
+      self.stats_keeper.record_cvs_item(cvs_item)
+
+  def _get_cvs_file(
+        self, parent_directory, basename, file_in_attic, leave_in_attic=False
+        ):
+    """Return a CVSFile describing the file with name BASENAME.
+
+    PARENT_DIRECTORY is the CVSDirectory instance describing the
+    directory that physically holds this file in the filesystem.
+    BASENAME must be the base name of a *,v file within
+    PARENT_DIRECTORY.
+
+    FILE_IN_ATTIC is a boolean telling whether the specified file is
+    in an Attic subdirectory.  If FILE_IN_ATTIC is True, then:
+
+    - If LEAVE_IN_ATTIC is True, then leave the 'Attic' component in
+      the filename.
+
+    - Otherwise, raise FileInAndOutOfAtticException if a file with the
+      same filename appears outside of Attic.
+
+    The CVSFile is assigned a new unique id.  All of the CVSFile
+    information is filled in except mode (which can only be determined
+    by parsing the file).
+
+    Raise FatalError if the resulting filename would not be legal in
+    SVN."""
+
+    filename = os.path.join(parent_directory.filename, basename)
+    try:
+      verify_svn_filename_legal(basename[:-2])
+    except IllegalSVNPathError, e:
+      raise FatalError(
+          'File %r would result in an illegal SVN filename: %s'
+          % (filename, e,)
+          )
+
+    if file_in_attic and not leave_in_attic:
+      in_attic = True
+      logical_parent_directory = parent_directory.parent_directory
+
+      # If this file also exists outside of the attic, it's a fatal
+      # error:
+      non_attic_filename = os.path.join(
+          logical_parent_directory.filename, basename,
+          )
+      if os.path.exists(non_attic_filename):
+        raise FileInAndOutOfAtticException(non_attic_filename, filename)
+    else:
+      in_attic = False
+      logical_parent_directory = parent_directory
+
+    file_stat = os.stat(filename)
+
+    # The size of the file in bytes:
+    file_size = file_stat[stat.ST_SIZE]
+
+    # Whether or not the executable bit is set:
+    file_executable = bool(file_stat[0] & stat.S_IXUSR)
+
+    # mode is not known, so we temporarily set it to None.
+    return CVSFile(
+        self.file_key_generator.gen_id(),
+        parent_directory.project, logical_parent_directory, basename[:-2],
+        in_attic, file_executable, file_size, None
+        )
+
+  def _get_attic_file(self, parent_directory, basename):
+    """Return a CVSFile object for the Attic file at BASENAME.
+
+    PARENT_DIRECTORY is the CVSDirectory that physically contains the
+    file on the filesystem (i.e., the Attic directory).  It is not
+    necessarily the parent_directory of the CVSFile that will be
+    returned.
+
+    Return CVSFile, whose parent directory is usually
+    PARENT_DIRECTORY.parent_directory, but might be PARENT_DIRECTORY
+    iff CVSFile will remain in the Attic directory."""
+
+    try:
+      return self._get_cvs_file(parent_directory, basename, True)
+    except FileInAndOutOfAtticException, e:
+      if Ctx().retain_conflicting_attic_files:
+        Log().warn(
+            "%s: %s;\n"
+            "   storing the latter into 'Attic' subdirectory.\n"
+            % (warning_prefix, e)
+            )
+      else:
+        self.record_fatal_error(str(e))
+
+      # Either way, return a CVSFile object so that the rest of the
+      # file processing can proceed:
+      return self._get_cvs_file(
+          parent_directory, basename, True, leave_in_attic=True
+          )
+
+  def _generate_attic_cvs_files(self, cvs_directory):
+    """Generate CVSFiles for the files in Attic directory CVS_DIRECTORY.
+
+    Also add CVS_DIRECTORY to self if any files are being retained in
+    that directory."""
+
+    retained_attic_file = False
+
+    fnames = os.listdir(cvs_directory.filename)
+    fnames.sort()
+    for fname in fnames:
+      pathname = os.path.join(cvs_directory.filename, fname)
+      if os.path.isdir(pathname):
+        Log().warn("Directory %s found within Attic; ignoring" % (pathname,))
+      elif fname.endswith(',v'):
+        cvs_file = self._get_attic_file(cvs_directory, fname)
+        if cvs_file.parent_directory == cvs_directory:
+          # This file will be retained in the Attic directory.
+          retained_attic_file = True
+        yield cvs_file
+
+    if retained_attic_file:
+      # If any files were retained in the Attic directory, then write
+      # the Attic directory to CVSFileDatabase:
+      self.add_cvs_directory(cvs_directory)
+
+  def _get_non_attic_file(self, parent_directory, basename):
+    """Return a CVSFile object for the non-Attic file at BASENAME."""
+
+    return self._get_cvs_file(parent_directory, basename, False)
+
+  def _generate_cvs_files(self, cvs_directory):
+    """Generate the CVSFiles under non-Attic directory CVS_DIRECTORY.
+
+    Process directories recursively, including Attic directories.
+    Also create and register CVSDirectories as they are found, and
+    look for conflicts between the filenames that will result from
+    files, attic files, and subdirectories."""
+
+    self.add_cvs_directory(cvs_directory)
+
+    # Map {cvs_file.basename : cvs_file.filename} for files directly
+    # in cvs_directory:
+    rcsfiles = {}
+
+    attic_dir = None
+
+    # Non-Attic subdirectories of cvs_directory (to be recursed into):
+    dirs = []
+
+    fnames = os.listdir(cvs_directory.filename)
+    fnames.sort()
+    for fname in fnames:
+      pathname = os.path.join(cvs_directory.filename, fname)
+      if os.path.isdir(pathname):
+        if fname == 'Attic':
+          attic_dir = fname
+        else:
+          dirs.append(fname)
+      elif fname.endswith(',v'):
+        cvs_file = self._get_non_attic_file(cvs_directory, fname)
+        rcsfiles[cvs_file.basename] = cvs_file.filename
+        yield cvs_file
+      else:
+        # Silently ignore other files:
+        pass
+
+    # Map {cvs_file.basename : cvs_file.filename} for files in an
+    # Attic directory within cvs_directory:
+    attic_rcsfiles = {}
+
+    if attic_dir is not None:
+      attic_directory = CVSDirectory(
+          self.file_key_generator.gen_id(),
+          cvs_directory.project, cvs_directory, 'Attic',
+          )
+
+      for cvs_file in self._generate_attic_cvs_files(attic_directory):
+        if cvs_file.parent_directory == cvs_directory:
+          attic_rcsfiles[cvs_file.basename] = cvs_file.filename
+        yield cvs_file
+
+      alldirs = dirs + [attic_dir]
+    else:
+      alldirs = dirs
+
+    # Check for conflicts between directory names and the filenames
+    # that will result from the rcs files (both in this directory and
+    # in attic).  (We recurse into the subdirectories nevertheless, to
+    # try to detect more problems.)
+    for fname in alldirs:
+      pathname = os.path.join(cvs_directory.filename, fname)
+      for rcsfile_list in [rcsfiles, attic_rcsfiles]:
+        if fname in rcsfile_list:
+          self.record_fatal_error(
+              'Directory name conflicts with filename.  Please remove or '
+              'rename one\n'
+              'of the following:\n'
+              '    "%s"\n'
+              '    "%s"'
+              % (pathname, rcsfile_list[fname],)
+              )
+
+    # Now recurse into the other subdirectories:
+    for fname in dirs:
+      dirname = os.path.join(cvs_directory.filename, fname)
+
+      # Verify that the directory name does not contain any illegal
+      # characters:
+      try:
+        verify_svn_filename_legal(fname)
+      except IllegalSVNPathError, e:
+        raise FatalError(
+            'Directory %r would result in an illegal SVN path name: %s'
+            % (dirname, e,)
+            )
+
+      sub_directory = CVSDirectory(
+          self.file_key_generator.gen_id(),
+          cvs_directory.project, cvs_directory, fname,
+          )
+
+      for cvs_file in self._generate_cvs_files(sub_directory):
+        yield cvs_file
+
+  def process_project(self, project):
+    Ctx()._projects[project.id] = project
+
+    root_cvs_directory = CVSDirectory(
+        self.file_key_generator.gen_id(), project, None, ''
+        )
+    project.root_cvs_directory_id = root_cvs_directory.id
+    pdc = _ProjectDataCollector(self, project)
+
+    found_rcs_file = False
+    for cvs_file in self._generate_cvs_files(root_cvs_directory):
+      pdc.process_file(cvs_file)
+      found_rcs_file = True
+
+    if not found_rcs_file:
+      self.record_fatal_error(
+          'No RCS files found under %r!\n'
+          'Are you absolutely certain you are pointing cvs2svn\n'
+          'at a CVS repository?\n'
+          % (project.project_cvs_repos_path,)
+          )
+
+    pdc.summarize_symbol_transforms()
+
+    self.num_files += pdc.num_files
+    Log().verbose('Processed', self.num_files, 'files')
+
+  def _set_cvs_path_ordinals(self):
+    cvs_files = list(Ctx()._cvs_file_db.itervalues())
+    cvs_files.sort(CVSPath.slow_compare)
+    for (i, cvs_file) in enumerate(cvs_files):
+      cvs_file.ordinal = i
+
+  def close(self):
+    """Close the data structures associated with this instance.
+
+    Return a list of fatal errors encountered while processing input.
+    Each list entry is a string describing one fatal error."""
+
+    self.revision_recorder.finish()
+    self.symbol_stats.purge_ghost_symbols()
+    self.symbol_stats.close()
+    self.symbol_stats = None
+    self.metadata_logger = None
+    self.metadata_db.close()
+    self.metadata_db = None
+    self._cvs_item_store.close()
+    self._cvs_item_store = None
+    self._set_cvs_path_ordinals()
+    self.revision_recorder = None
+    retval = self.fatal_errors
+    self.fatal_errors = None
+    return retval
+
+
diff --git a/cvs2svn_lib/common.py b/cvs2svn_lib/common.py
new file mode 100644
index 0000000..8400907
--- /dev/null
+++ b/cvs2svn_lib/common.py
@@ -0,0 +1,409 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains common facilities used by cvs2svn."""
+
+
+import re
+import time
+import codecs
+
+from cvs2svn_lib.log import Log
+
+
+# Always use these constants for opening databases.
+DB_OPEN_READ = 'r'
+DB_OPEN_WRITE = 'w'
+DB_OPEN_NEW = 'n'
+
+
+SVN_INVALID_REVNUM = -1
+
+
+# Warnings and errors start with these strings.  They are typically
+# followed by a colon and a space, as in "%s: " ==> "WARNING: ".
+warning_prefix = "WARNING"
+error_prefix = "ERROR"
+
+
+class FatalException(Exception):
+  """Exception thrown on a non-recoverable error.
+
+  If this exception is thrown by main(), it is caught by the global
+  layer of the program, its string representation is printed (followed
+  by a newline), and the program is ended with an exit code of 1."""
+
+  pass
+
+
+class InternalError(Exception):
+  """Exception thrown in the case of a cvs2svn internal error (aka, bug)."""
+
+  pass
+
+
+class FatalError(FatalException):
+  """A FatalException that prepends error_prefix to the message."""
+
+  def __init__(self, msg):
+    """Use (error_prefix + ': ' + MSG) as the error message."""
+
+    FatalException.__init__(self, '%s: %s' % (error_prefix, msg,))
+
+
+class CommandError(FatalError):
+  """A FatalError caused by a failed command invocation.
+
+  The error message includes the command name, exit code, and output."""
+
+  def __init__(self, command, exit_status, error_output=''):
+    self.command = command
+    self.exit_status = exit_status
+    self.error_output = error_output
+    if error_output.rstrip():
+      FatalError.__init__(
+          self,
+          'The command %r failed with exit status=%s\n'
+          'and the following output:\n'
+          '%s'
+          % (self.command, self.exit_status, self.error_output.rstrip()))
+    else:
+      FatalError.__init__(
+          self,
+          'The command %r failed with exit status=%s and no output'
+          % (self.command, self.exit_status))
+
+
+def path_join(*components):
+  """Join two or more pathname COMPONENTS, inserting '/' as needed.
+  Empty component are skipped."""
+
+  return '/'.join(filter(None, components))
+
+
+def path_split(path):
+  """Split the svn pathname PATH into a pair, (HEAD, TAIL).
+
+  This is similar to os.path.split(), but always uses '/' as path
+  separator.  PATH is an svn path, which should not start with a '/'.
+  HEAD is everything before the last slash, and TAIL is everything
+  after.  If PATH ends in a slash, TAIL will be empty.  If there is no
+  slash in PATH, HEAD will be empty.  If PATH is empty, both HEAD and
+  TAIL are empty."""
+
+  pos = path.rfind('/')
+  if pos == -1:
+    return ('', path,)
+  else:
+    return (path[:pos], path[pos+1:],)
+
+
+class IllegalSVNPathError(FatalException):
+  pass
+
+
+# Control characters (characters not allowed in Subversion filenames):
+ctrl_characters_regexp = re.compile('[\\\x00-\\\x1f\\\x7f]')
+
+
+def verify_svn_filename_legal(filename):
+  """Verify that FILENAME is a legal filename.
+
+  FILENAME is a path component of a CVS path.  Check that it won't
+  choke SVN:
+
+  - Check that it is not empty.
+
+  - Check that it is not equal to '.' or '..'.
+
+  - Check that the filename does not include any control characters.
+
+  If any of these tests fail, raise an IllegalSVNPathError."""
+
+  if filename == '':
+    raise IllegalSVNPathError("Empty filename component.")
+
+  if filename in ['.', '..']:
+    raise IllegalSVNPathError("Illegal filename component %r." % (filename,))
+
+  m = ctrl_characters_regexp.search(filename)
+  if m:
+    raise IllegalSVNPathError(
+        "Character %r in filename %r is not supported by Subversion."
+        % (m.group(), filename,)
+        )
+
+
+def verify_svn_path_legal(path):
+  """Verify that PATH is a legitimate SVN path.
+
+  If not, raise an IllegalSVNPathError."""
+
+  if path.startswith('/'):
+    raise IllegalSVNPathError("Path %r must not start with '/'." % (path,))
+  head = path
+  while head != '':
+    (head,tail) = path_split(head)
+    try:
+        verify_svn_filename_legal(tail)
+    except IllegalSVNPathError, e:
+        raise IllegalSVNPathError('Problem with path %r: %s' % (path, e,))
+
+
+def normalize_svn_path(path, allow_empty=False):
+  """Normalize an SVN path (e.g., one supplied by a user).
+
+  1. Strip leading, trailing, and duplicated '/'.
+  2. If ALLOW_EMPTY is not set, verify that PATH is not empty.
+
+  Return the normalized path.
+
+  If the path is invalid, raise an IllegalSVNPathError."""
+
+  norm_path = path_join(*path.split('/'))
+  if not allow_empty and not norm_path:
+    raise IllegalSVNPathError("Path is empty")
+  return norm_path
+
+
+class PathRepeatedException(Exception):
+  def __init__(self, path, count):
+    self.path = path
+    self.count = count
+    Exception.__init__(
+        self, 'Path %s is repeated %d times' % (self.path, self.count,)
+        )
+
+
+class PathsNestedException(Exception):
+  def __init__(self, nest, nestlings):
+    self.nest = nest
+    self.nestlings = nestlings
+    Exception.__init__(
+        self,
+        'Path %s contains the following other paths: %s'
+        % (self.nest, ', '.join(self.nestlings),)
+        )
+
+
+class PathsNotDisjointException(FatalException):
+  """An exception that collects multiple other disjointness exceptions."""
+
+  def __init__(self, problems):
+    self.problems = problems
+    Exception.__init__(
+        self,
+        'The following paths are not disjoint:\n'
+        '    %s\n'
+        % ('\n    '.join([str(problem) for problem in self.problems]),)
+        )
+
+
+def verify_paths_disjoint(*paths):
+  """Verify that all of the paths in the argument list are disjoint.
+
+  If any of the paths is nested in another one (i.e., in the sense
+  that 'a/b/c/d' is nested in 'a/b'), or any two paths are identical,
+  raise a PathsNotDisjointException containing exceptions detailing
+  the individual problems."""
+
+  def split(path):
+    if not path:
+      return []
+    else:
+      return path.split('/')
+
+  def contains(split_path1, split_path2):
+    """Return True iff SPLIT_PATH1 contains SPLIT_PATH2."""
+
+    return (
+        len(split_path1) < len(split_path2)
+        and split_path2[:len(split_path1)] == split_path1
+        )
+
+  paths = [(split(path), path) for path in paths]
+  # If all overlapping elements are equal, a shorter list is
+  # considered "less than" a longer one.  Therefore if any paths are
+  # nested, this sort will leave at least one such pair adjacent, in
+  # the order [nest,nestling].
+  paths.sort()
+
+  problems = []
+
+  # Create exceptions for any repeated paths, and delete the repeats
+  # from the paths array:
+  i = 0
+  while i < len(paths):
+    split_path, path = paths[i]
+    j = i + 1
+    while j < len(paths) and split_path == paths[j][0]:
+      j += 1
+    if j - i > 1:
+      problems.append(PathRepeatedException(path, j - i))
+      # Delete all but the first copy:
+      del paths[i + 1:j]
+    i += 1
+
+  # Create exceptions for paths nested in each other:
+  i = 0
+  while i < len(paths):
+    split_path, path = paths[i]
+    j = i + 1
+    while j < len(paths) and contains(split_path, paths[j][0]):
+      j += 1
+    if j - i > 1:
+      problems.append(PathsNestedException(
+          path, [path2 for (split_path2, path2) in paths[i + 1:j]]
+          ))
+    i += 1
+
+  if problems:
+    raise PathsNotDisjointException(problems)
+
+
+def format_date(date):
+  """Return an svn-compatible date string for DATE (seconds since epoch).
+
+  A Subversion date looks like '2002-09-29T14:44:59.000000Z'."""
+
+  return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date))
+
+
+class CVSTextDecoder:
+  """Callable that decodes CVS strings into Unicode."""
+
+  def __init__(self, encodings, fallback_encoding=None):
+    """Create a CVSTextDecoder instance.
+
+    ENCODINGS is a list containing the names of encodings that are
+    attempted to be used as source encodings in 'strict' mode.
+
+    FALLBACK_ENCODING, if specified, is the name of an encoding that
+    should be used as a source encoding in lossy 'replace' mode if all
+    of ENCODINGS failed.
+
+    Raise LookupError if any of the specified encodings is unknown."""
+
+    self.decoders = [
+        (encoding, codecs.lookup(encoding)[1])
+        for encoding in encodings]
+
+    if fallback_encoding is None:
+      self.fallback_decoder = None
+    else:
+      self.fallback_decoder = (
+          fallback_encoding, codecs.lookup(fallback_encoding)[1]
+          )
+
+  def add_encoding(self, encoding):
+    """Add an encoding to be tried in 'strict' mode.
+
+    ENCODING is the name of an encoding.  If it is unknown, raise a
+    LookupError."""
+
+    for (name, decoder) in self.decoders:
+      if name == encoding:
+        return
+    else:
+      self.decoders.append( (encoding, codecs.lookup(encoding)[1]) )
+
+  def set_fallback_encoding(self, encoding):
+    """Set the fallback encoding, to be tried in 'replace' mode.
+
+    ENCODING is the name of an encoding.  If it is unknown, raise a
+    LookupError."""
+
+    if encoding is None:
+      self.fallback_decoder = None
+    else:
+      self.fallback_decoder = (encoding, codecs.lookup(encoding)[1])
+
+  def __call__(self, s):
+    """Try to decode string S using our configured source encodings.
+
+    Return the string as a Unicode string.  If S is already a unicode
+    string, do nothing.
+
+    Raise UnicodeError if the string cannot be decoded using any of
+    the source encodings and no fallback encoding was specified."""
+
+    if isinstance(s, unicode):
+      return s
+    for (name, decoder) in self.decoders:
+      try:
+        return decoder(s)[0]
+      except ValueError:
+        Log().verbose("Encoding '%s' failed for string %r" % (name, s))
+
+    if self.fallback_decoder is not None:
+      (name, decoder) = self.fallback_decoder
+      return decoder(s, 'replace')[0]
+    else:
+      raise UnicodeError
+
+
+class Timestamper:
+  """Return monotonic timestamps derived from changeset timestamps."""
+
+  def __init__(self):
+    # The last timestamp that has been returned:
+    self.timestamp = 0.0
+
+    # The maximum timestamp that is considered reasonable:
+    self.max_timestamp = time.time() + 24.0 * 60.0 * 60.0
+
+  def get(self, timestamp, change_expected):
+    """Return a reasonable timestamp derived from TIMESTAMP.
+
+    Push TIMESTAMP into the future if necessary to ensure that it is
+    at least one second later than every other timestamp that has been
+    returned by previous calls to this method.
+
+    If CHANGE_EXPECTED is not True, then log a message if the
+    timestamp has to be changed."""
+
+    if timestamp > self.max_timestamp:
+      # If a timestamp is in the future, it is assumed that it is
+      # bogus.  Shift it backwards in time to prevent it forcing other
+      # timestamps to be pushed even further in the future.
+
+      # Note that this is not nearly a complete solution to the bogus
+      # timestamp problem.  A timestamp in the future still affects
+      # the ordering of changesets, and a changeset having such a
+      # timestamp will not be committed until all changesets with
+      # earlier timestamps have been committed, even if other
+      # changesets with even earlier timestamps depend on this one.
+      self.timestamp = self.timestamp + 1.0
+      if not change_expected:
+        Log().warn(
+            'Timestamp "%s" is in the future; changed to "%s".'
+            % (time.asctime(time.gmtime(timestamp)),
+               time.asctime(time.gmtime(self.timestamp)),)
+            )
+    elif timestamp < self.timestamp + 1.0:
+      self.timestamp = self.timestamp + 1.0
+      if not change_expected and Log().is_on(Log.VERBOSE):
+        Log().verbose(
+            'Timestamp "%s" adjusted to "%s" to ensure monotonicity.'
+            % (time.asctime(time.gmtime(timestamp)),
+               time.asctime(time.gmtime(self.timestamp)),)
+            )
+    else:
+      self.timestamp = timestamp
+
+    return self.timestamp
+
+
diff --git a/cvs2svn_lib/config.py b/cvs2svn_lib/config.py
new file mode 100644
index 0000000..b313b2c
--- /dev/null
+++ b/cvs2svn_lib/config.py
@@ -0,0 +1,221 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains various configuration constants used by cvs2svn."""
+
+
+SVN_KEYWORDS_VALUE = 'Author Date Id Revision'
+
+# The default names for the trunk/branches/tags directory for each
+# project:
+DEFAULT_TRUNK_BASE = 'trunk'
+DEFAULT_BRANCHES_BASE = 'branches'
+DEFAULT_TAGS_BASE = 'tags'
+
+SVNADMIN_EXECUTABLE = 'svnadmin'
+CO_EXECUTABLE = 'co'
+CVS_EXECUTABLE = 'cvs'
+SORT_EXECUTABLE = 'sort'
+
+# A pickled list of the projects defined for this conversion.
+PROJECTS = 'projects.pck'
+
+# A file holding the Serializer to be used for
+# CVS_REVS_SUMMARY_*_DATAFILE and CVS_SYMBOLS_SYMMARY_*_DATAFILE:
+SUMMARY_SERIALIZER = 'summary-serializer.pck'
+
+# The first file contains enough information about each CVSRevision to
+# deduce preliminary Changesets.  The second file is a sorted version
+# of the first.
+CVS_REVS_SUMMARY_DATAFILE = 'revs-summary.txt'
+CVS_REVS_SUMMARY_SORTED_DATAFILE = 'revs-summary-s.txt'
+
+# The first file contains enough information about each CVSSymbol to
+# deduce preliminary Changesets.  The second file is a sorted version
+# of the first.
+CVS_SYMBOLS_SUMMARY_DATAFILE = 'symbols-summary.txt'
+CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE = 'symbols-summary-s.txt'
+
+# A mapping from CVSItem id to Changeset id.
+CVS_ITEM_TO_CHANGESET = 'cvs-item-to-changeset.dat'
+
+# A mapping from CVSItem id to Changeset id, after the
+# RevisionChangeset loops have been broken.
+CVS_ITEM_TO_CHANGESET_REVBROKEN = 'cvs-item-to-changeset-revbroken.dat'
+
+# A mapping from CVSItem id to Changeset id, after the SymbolChangeset
+# loops have been broken.
+CVS_ITEM_TO_CHANGESET_SYMBROKEN = 'cvs-item-to-changeset-symbroken.dat'
+
+# A mapping from CVSItem id to Changeset id, after all Changeset
+# loops have been broken.
+CVS_ITEM_TO_CHANGESET_ALLBROKEN = 'cvs-item-to-changeset-allbroken.dat'
+
+# A mapping from id to Changeset.
+CHANGESETS_INDEX = 'changesets-index.dat'
+CHANGESETS_STORE = 'changesets.pck'
+
+# A mapping from id to Changeset, after the RevisionChangeset loops
+# have been broken.
+CHANGESETS_REVBROKEN_INDEX = 'changesets-revbroken-index.dat'
+CHANGESETS_REVBROKEN_STORE = 'changesets-revbroken.pck'
+
+# A mapping from id to Changeset, after the RevisionChangesets have
+# been sorted and converted into OrderedChangesets.
+CHANGESETS_REVSORTED_INDEX = 'changesets-revsorted-index.dat'
+CHANGESETS_REVSORTED_STORE = 'changesets-revsorted.pck'
+
+# A mapping from id to Changeset, after the SymbolChangeset loops have
+# been broken.
+CHANGESETS_SYMBROKEN_INDEX = 'changesets-symbroken-index.dat'
+CHANGESETS_SYMBROKEN_STORE = 'changesets-symbroken.pck'
+
+# A mapping from id to Changeset, after all Changeset loops have been
+# broken.
+CHANGESETS_ALLBROKEN_INDEX = 'changesets-allbroken-index.dat'
+CHANGESETS_ALLBROKEN_STORE = 'changesets-allbroken.pck'
+
+# The RevisionChangesets in commit order.  Each line contains the
+# changeset id and timestamp of one changeset, in hexadecimal, in the
+# order that the changesets should be committed to svn.
+CHANGESETS_SORTED_DATAFILE = 'changesets-s.txt'
+
+# A file containing a marshalled copy of all the statistics that have
+# been gathered so far is written at the end of each pass as a
+# marshalled dictionary.  This is the pattern used to generate the
+# filenames.
+STATISTICS_FILE = 'statistics-%02d.pck'
+
+# This text file contains records (1 per line) that describe openings
+# and closings for copies to tags and branches.  The format is as
+# follows:
+#
+#     SYMBOL_ID SVN_REVNUM TYPE CVS_SYMBOL_ID
+#
+# where type is either OPENING or CLOSING.  CVS_SYMBOL_ID is the id of
+# the CVSSymbol whose opening or closing is being described (in hex).
+SYMBOL_OPENINGS_CLOSINGS = 'symbolic-names.txt'
+# A sorted version of the above file.  SYMBOL_ID and SVN_REVNUM are
+# the primary and secondary sorting criteria.  It is important that
+# SYMBOL_IDs be located together to make it quick to read them at
+# once.  The order of SVN_REVNUM is only important because it is
+# assumed by some internal consistency checks.
+SYMBOL_OPENINGS_CLOSINGS_SORTED = 'symbolic-names-s.txt'
+
+# Skeleton version of the repository filesystem.  See class
+# RepositoryMirror for how these work.
+MIRROR_NODES_INDEX_TABLE = 'mirror-nodes-index.dat'
+MIRROR_NODES_STORE = 'mirror-nodes.pck'
+
+# Offsets pointing to the beginning of each symbol's records in
+# SYMBOL_OPENINGS_CLOSINGS_SORTED.  This file contains a pickled map
+# from symbol_id to file offset.
+SYMBOL_OFFSETS_DB = 'symbol-offsets.pck'
+
+# Pickled map of CVSFile.id to instance.
+CVS_FILES_DB = 'cvs-files.pck'
+
+# A series of records.  The first is a pickled serializer.  Each
+# subsequent record is a serialized list of all CVSItems applying to a
+# CVSFile.
+CVS_ITEMS_STORE = 'cvs-items.pck'
+
+# The same as above, but with the CVSItems ordered in groups based on
+# their initial changesets.  CVSItems will usually be accessed one
+# changeset at a time, so this ordering helps disk locality (even
+# though some of the changesets will later be broken up).
+CVS_ITEMS_SORTED_INDEX_TABLE = 'cvs-items-sorted-index.dat'
+CVS_ITEMS_SORTED_STORE = 'cvs-items-sorted.pck'
+
+# A record of all symbolic names that will be processed in the
+# conversion.  This file contains a pickled list of TypedSymbol
+# objects.
+SYMBOL_DB = 'symbols.pck'
+
+# A pickled list of the statistics for all symbols.  Each entry in the
+# list is an instance of cvs2svn_lib.symbol_statistics._Stats.
+SYMBOL_STATISTICS = 'symbol-statistics.pck'
+
+# These two databases provide a bidirectional mapping between
+# CVSRevision.ids (in hex) and Subversion revision numbers.
+#
+# The first maps CVSRevision.id to the SVN revision number of which it
+# is a part (more than one CVSRevision can map to the same SVN
+# revision number).
+#
+# The second maps Subversion revision numbers (as hex strings) to
+# pickled SVNCommit instances.
+CVS_REVS_TO_SVN_REVNUMS = 'cvs-revs-to-svn-revnums.dat'
+
+# This database maps Subversion revision numbers to pickled SVNCommit
+# instances.
+SVN_COMMITS_INDEX_TABLE = 'svn-commits-index.dat'
+SVN_COMMITS_STORE = 'svn-commits.pck'
+
+# How many bytes to read at a time from a pipe.  128 kiB should be
+# large enough to be efficient without wasting too much memory.
+PIPE_READ_SIZE = 128 * 1024
+
+# Records the author and log message for each changeset.  The database
+# contains a map metadata_id -> (author, logmessage).  Each
+# CVSRevision that is eligible to be combined into the same SVN commit
+# is assigned the same id.  Note that the (author, logmessage) pairs
+# are not necessarily all distinct; other data are taken into account
+# when constructing ids.
+METADATA_INDEX_TABLE = 'metadata-index.dat'
+METADATA_STORE = 'metadata.pck'
+
+# The same, after it has been cleaned up for the chosen output option:
+METADATA_CLEAN_INDEX_TABLE = 'metadata-clean-index.dat'
+METADATA_CLEAN_STORE = 'metadata-clean.pck'
+
+# The following four databases are used in conjunction with --use-internal-co.
+
+# Records the RCS deltas for all CVS revisions.  The deltas are to be
+# applied forward, i.e. those from trunk are reversed wrt RCS.
+RCS_DELTAS_INDEX_TABLE = 'rcs-deltas-index.dat'
+RCS_DELTAS_STORE = 'rcs-deltas.pck'
+
+# Records the revision tree of each RCS file.  The format is a list of
+# list of integers.  The outer list holds lines of development, the inner list
+# revisions within the LODs, revisions are CVSItem ids.  Branches "closer
+# to the trunk" appear later.  Revisions are sorted by reverse chronological
+# order.  The last revision of each branch is the revision it sprouts from.
+# Revisions that represent deletions at the end of a branch are omitted.
+RCS_TREES_INDEX_TABLE = 'rcs-trees-index.dat'
+RCS_TREES_STORE = 'rcs-trees.pck'
+
+# Records the revision tree of each RCS file after removing revisions
+# belonging to excluded branches.  Note that the branch ordering is arbitrary
+# in this file.
+RCS_TREES_FILTERED_INDEX_TABLE = 'rcs-trees-filtered-index.dat'
+RCS_TREES_FILTERED_STORE = 'rcs-trees-filtered.pck'
+
+# At any given time during OutputPass, holds the full text of each CVS
+# revision that was checked out already and still has descendants that will
+# be checked out.
+CVS_CHECKOUT_DB = 'cvs-checkout.db'
+
+# End of DBs related to --use-internal-co.
+
+# If this run will output directly to a Subversion repository, then
+# this is the name of the file that each revision will temporarily be
+# written to prior to writing it into the repository.
+DUMPFILE = 'svn.dump'
+
+# flush a commit if a 5 minute gap occurs.
+COMMIT_THRESHOLD = 5 * 60
+
diff --git a/cvs2svn_lib/context.py b/cvs2svn_lib/context.py
new file mode 100644
index 0000000..89dc16a
--- /dev/null
+++ b/cvs2svn_lib/context.py
@@ -0,0 +1,93 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Store the context (options, etc) for a cvs2svn run."""
+
+
+import os
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import CVSTextDecoder
+
+
+class Ctx:
+  """Session state for this run of cvs2svn.  For example, run-time
+  options are stored here.  This class is a Borg (see
+  http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531)."""
+
+  __shared_state = { }
+
+  def __init__(self):
+    self.__dict__ = self.__shared_state
+    if self.__dict__:
+      return
+    # Else, initialize to defaults.
+    self.set_defaults()
+
+  def set_defaults(self):
+    """Set all parameters to their default values."""
+
+    self.output_option = None
+    self.dry_run = False
+    self.revision_recorder = None
+    self.revision_excluder = None
+    self.revision_reader = None
+    self.svnadmin_executable = config.SVNADMIN_EXECUTABLE
+    self.sort_executable = config.SORT_EXECUTABLE
+    self.trunk_only = False
+    self.prune = True
+    self.cvs_author_decoder = CVSTextDecoder(['ascii'])
+    self.cvs_log_decoder = CVSTextDecoder(['ascii'])
+    self.cvs_filename_decoder = CVSTextDecoder(['ascii'])
+    self.decode_apple_single = False
+    self.symbol_info_filename = None
+    self.username = None
+    self.svn_property_setters = []
+    self.tmpdir = 'cvs2svn-tmp'
+    self.skip_cleanup = False
+    self.keep_cvsignore = False
+    self.cross_project_commits = True
+    self.cross_branch_commits = True
+    self.retain_conflicting_attic_files = False
+
+    self.initial_project_commit_message = (
+        'Standard project directories initialized by cvs2svn.'
+        )
+    self.post_commit_message = (
+        'This commit was generated by cvs2svn to compensate for '
+        'changes in r%(revnum)d, which included commits to RCS files '
+        'with non-trunk default branches.'
+        )
+    self.symbol_commit_message = (
+        "This commit was manufactured by cvs2svn to create %(symbol_type)s "
+        "'%(symbol_name)s'."
+        )
+
+
+  def get_temp_filename(self, basename):
+    return os.path.join(self.tmpdir, basename)
+
+  def clean(self):
+    """Dispose of items in our dictionary that are not intended to
+    live past the end of a pass (identified by exactly one leading
+    underscore)."""
+
+    for attr in self.__dict__.keys():
+      if (attr.startswith('_') and not attr.startswith('__')
+          and not attr.startswith('_Ctx__')):
+        delattr(self, attr)
+
+
diff --git a/cvs2svn_lib/cvs_file.py b/cvs2svn_lib/cvs_file.py
new file mode 100644
index 0000000..3a1bb4f
--- /dev/null
+++ b/cvs2svn_lib/cvs_file.py
@@ -0,0 +1,287 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains a class to store information about a CVS file."""
+
+import os
+
+from cvs2svn_lib.common import path_join
+from cvs2svn_lib.context import Ctx
+
+
+class CVSPath(object):
+  """Represent a CVS file or directory.
+
+  Members:
+
+    id -- (int) unique ID for this CVSPath.  At any moment, there is
+        at most one CVSPath instance with a particular ID.  (This
+        means that object identity is the same as object equality, and
+        objects can be used as map keys even though they don't have a
+        __hash__() method).
+
+    project -- (Project) the project containing this CVSPath.
+
+    parent_directory -- (CVSDirectory or None) the CVSDirectory
+        containing this CVSPath.
+
+    basename -- (string) the base name of this CVSPath (no ',v').  The
+        basename of the root directory of a project is ''.
+
+    ordinal -- (int) the order that this instance should be sorted
+        relative to other CVSPath instances.  This member is set based
+        on the ordering imposed by slow_compare() by CollectData after
+        all CVSFiles have been processed.  Comparisons of CVSPath
+        using __cmp__() simply compare the ordinals.
+
+  """
+
+  __slots__ = [
+      'id',
+      'project',
+      'parent_directory',
+      'basename',
+      'ordinal',
+      ]
+
+  def __init__(self, id, project, parent_directory, basename):
+    self.id = id
+    self.project = project
+    self.parent_directory = parent_directory
+    self.basename = basename
+
+  def __getstate__(self):
+    """This method must only be called after ordinal has been set."""
+
+    return (
+        self.id, self.project.id,
+        self.parent_directory, self.basename,
+        self.ordinal,
+        )
+
+  def __setstate__(self, state):
+    (
+        self.id, project_id,
+        self.parent_directory, self.basename,
+        self.ordinal,
+        ) = state
+    self.project = Ctx()._projects[project_id]
+
+  def get_ancestry(self):
+    """Return a list of the CVSPaths leading from the root path to SELF.
+
+    Return the CVSPaths in a list, starting with
+    self.project.get_root_cvs_directory() and ending with self."""
+
+    ancestry = []
+    p = self
+    while p is not None:
+      ancestry.append(p)
+      p = p.parent_directory
+
+    ancestry.reverse()
+    return ancestry
+
+  def get_cvs_path(self):
+    """Return the canonical path within the Project.
+
+    The canonical path:
+
+    - Uses forward slashes
+
+    - Doesn't include ',v' for files
+
+    - This doesn't include the 'Attic' segment of the path unless the
+      file is to be left in an Attic directory in the SVN repository;
+      i.e., if a filename exists in and out of Attic and the
+      --retain-conflicting-attic-files option was specified.
+
+    """
+
+    return path_join(*[p.basename for p in self.get_ancestry()[1:]])
+
+  cvs_path = property(get_cvs_path)
+
+  def _get_dir_components(self):
+    """Return a list containing the components of the path leading to SELF.
+
+    The return value contains the base names of all of the parent
+    directories (except for the root directory) and SELF."""
+
+    return [p.basename for p in self.get_ancestry()[1:]]
+
+  def __eq__(a, b):
+    """Compare two CVSPath instances for equality.
+
+    This method is supplied to avoid using __cmp__() for comparing for
+    equality."""
+
+    return a is b
+
+  def slow_compare(a, b):
+    return (
+        # Sort first by project:
+        cmp(a.project, b.project)
+        # Then by directory components:
+        or cmp(a._get_dir_components(), b._get_dir_components())
+        )
+
+  def __cmp__(a, b):
+    """This method must only be called after ordinal has been set."""
+
+    return cmp(a.ordinal, b.ordinal)
+
+
+class CVSDirectory(CVSPath):
+  """Represent a CVS directory.
+
+  Members:
+
+    id -- (int or None) unique id for this file.  If None, a new id is
+        generated.
+
+    project -- (Project) the project containing this file.
+
+    parent_directory -- (CVSDirectory or None) the CVSDirectory
+        containing this CVSDirectory.
+
+    basename -- (string) the base name of this CVSDirectory (no ',v').
+
+  """
+
+  __slots__ = []
+
+  def __init__(self, id, project, parent_directory, basename):
+    """Initialize a new CVSDirectory object."""
+
+    CVSPath.__init__(self, id, project, parent_directory, basename)
+
+  def get_filename(self):
+    """Return the filesystem path to this CVSPath in the CVS repository."""
+
+    if self.parent_directory is None:
+      return self.project.project_cvs_repos_path
+    else:
+      return os.path.join(
+          self.parent_directory.get_filename(), self.basename
+          )
+
+  filename = property(get_filename)
+
+  def __getstate__(self):
+    return CVSPath.__getstate__(self)
+
+  def __setstate__(self, state):
+    CVSPath.__setstate__(self, state)
+
+  def __str__(self):
+    """For convenience only.  The format is subject to change at any time."""
+
+    return self.cvs_path + '/'
+
+  def __repr__(self):
+    return 'CVSDirectory<%x>(%r)' % (self.id, str(self),)
+
+
+class CVSFile(CVSPath):
+  """Represent a CVS file.
+
+  Members:
+
+    id -- (int) unique id for this file.
+
+    project -- (Project) the project containing this file.
+
+    parent_directory -- (CVSDirectory) the CVSDirectory containing
+        this CVSFile.
+
+    basename -- (string) the base name of this CVSFile (no ',v').
+
+    _in_attic -- (bool) True if RCS file is in an Attic subdirectory
+        that is not considered the parent directory.  (If a file is
+        in-and-out-of-attic and one copy is to be left in Attic after
+        the conversion, then the Attic directory is that file's
+        PARENT_DIRECTORY and _IN_ATTIC is False.)
+
+    executable -- (bool) True iff RCS file has executable bit set.
+
+    file_size -- (long) size of the RCS file in bytes.
+
+    mode -- (string or None) 'kkv', 'kb', etc.
+
+  PARENT_DIRECTORY might contain an 'Attic' component if it should be
+  retained in the SVN repository; i.e., if the same filename exists out
+  of Attic and the --retain-conflicting-attic-files option was specified.
+
+  """
+
+  __slots__ = [
+      '_in_attic',
+      'executable',
+      'file_size',
+      'mode',
+      ]
+
+  def __init__(
+        self, id, project, parent_directory, basename, in_attic,
+        executable, file_size, mode
+        ):
+    """Initialize a new CVSFile object."""
+
+    CVSPath.__init__(self, id, project, parent_directory, basename)
+    self._in_attic = in_attic
+    self.executable = executable
+    self.file_size = file_size
+    self.mode = mode
+
+    assert self.parent_directory is not None
+
+  def get_filename(self):
+    """Return the filesystem path to this CVSPath in the CVS repository."""
+
+    if self._in_attic:
+      return os.path.join(
+          self.parent_directory.filename, 'Attic', self.basename + ',v'
+          )
+    else:
+      return os.path.join(
+          self.parent_directory.filename, self.basename + ',v'
+          )
+
+  filename = property(get_filename)
+
+  def __getstate__(self):
+    return (
+        CVSPath.__getstate__(self),
+        self._in_attic, self.executable, self.file_size, self.mode,
+        )
+
+  def __setstate__(self, state):
+    (
+        cvs_path_state,
+        self._in_attic, self.executable, self.file_size, self.mode,
+        ) = state
+    CVSPath.__setstate__(self, cvs_path_state)
+
+  def __str__(self):
+    """For convenience only.  The format is subject to change at any time."""
+
+    return self.cvs_path
+
+  def __repr__(self):
+    return 'CVSFile<%x>(%r)' % (self.id, str(self),)
+
+
diff --git a/cvs2svn_lib/cvs_file_database.py b/cvs2svn_lib/cvs_file_database.py
new file mode 100644
index 0000000..61eebf3
--- /dev/null
+++ b/cvs2svn_lib/cvs_file_database.py
@@ -0,0 +1,75 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains database facilities used by cvs2svn."""
+
+
+import cPickle
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import DB_OPEN_READ
+from cvs2svn_lib.common import DB_OPEN_NEW
+from cvs2svn_lib.artifact_manager import artifact_manager
+
+
+class CVSFileDatabase:
+  """A database to store CVSFile objects and retrieve them by their id."""
+
+  def __init__(self, mode):
+    """Initialize an instance, opening database in MODE (where MODE is
+    either DB_OPEN_NEW or DB_OPEN_READ)."""
+
+    self.mode = mode
+
+    # A map { id : CVSFile }
+    self._cvs_files = {}
+
+    if self.mode == DB_OPEN_NEW:
+      pass
+    elif self.mode == DB_OPEN_READ:
+      f = open(artifact_manager.get_temp_file(config.CVS_FILES_DB), 'rb')
+      cvs_files = cPickle.load(f)
+      for cvs_file in cvs_files:
+        self._cvs_files[cvs_file.id] = cvs_file
+    else:
+      raise RuntimeError('Invalid mode %r' % self.mode)
+
+  def log_file(self, cvs_file):
+    """Add CVS_FILE, a CVSFile instance, to the database."""
+
+    if self.mode == DB_OPEN_READ:
+      raise RuntimeError('Cannot write items in mode %r' % self.mode)
+
+    self._cvs_files[cvs_file.id] = cvs_file
+
+  def itervalues(self):
+    for value in self._cvs_files.itervalues():
+      yield value
+
+  def get_file(self, id):
+    """Return the CVSFile with the specified ID."""
+
+    return self._cvs_files[id]
+
+  def close(self):
+    if self.mode == DB_OPEN_NEW:
+      f = open(artifact_manager.get_temp_file(config.CVS_FILES_DB), 'wb')
+      cPickle.dump(self._cvs_files.values(), f, -1)
+      f.close()
+
+    self._cvs_files = None
+
+
diff --git a/cvs2svn_lib/cvs_file_items.py b/cvs2svn_lib/cvs_file_items.py
new file mode 100644
index 0000000..f0dc782
--- /dev/null
+++ b/cvs2svn_lib/cvs_file_items.py
@@ -0,0 +1,1075 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2006-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains a class to manage the CVSItems related to one file."""
+
+
+import re
+
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.symbol import Trunk
+from cvs2svn_lib.symbol import Branch
+from cvs2svn_lib.symbol import Tag
+from cvs2svn_lib.symbol import ExcludedSymbol
+from cvs2svn_lib.cvs_item import CVSRevision
+from cvs2svn_lib.cvs_item import CVSRevisionModification
+from cvs2svn_lib.cvs_item import CVSRevisionAbsent
+from cvs2svn_lib.cvs_item import CVSRevisionNoop
+from cvs2svn_lib.cvs_item import CVSSymbol
+from cvs2svn_lib.cvs_item import CVSBranch
+from cvs2svn_lib.cvs_item import CVSTag
+from cvs2svn_lib.cvs_item import cvs_revision_type_map
+from cvs2svn_lib.cvs_item import cvs_branch_type_map
+from cvs2svn_lib.cvs_item import cvs_tag_type_map
+
+
+class VendorBranchError(Exception):
+  """There is an error in the structure of the file revision tree."""
+
+  pass
+
+
+class LODItems(object):
+  def __init__(self, lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags):
+    # The LineOfDevelopment described by this instance.
+    self.lod = lod
+
+    # The CVSBranch starting this LOD, if any; otherwise, None.
+    self.cvs_branch = cvs_branch
+
+    # The list of CVSRevisions on this LOD, if any.  The CVSRevisions
+    # are listed in dependency order.
+    self.cvs_revisions = cvs_revisions
+
+    # A list of CVSBranches that sprout from this LOD (either from
+    # cvs_branch or from one of the CVSRevisions).
+    self.cvs_branches = cvs_branches
+
+    # A list of CVSTags that sprout from this LOD (either from
+    # cvs_branch or from one of the CVSRevisions).
+    self.cvs_tags = cvs_tags
+
+  def is_trivial_import(self):
+    """Return True iff this LOD is a trivial import branch in this file.
+
+    A trivial import branch is a branch that was used for a single
+    import and nothing else.  Such a branch is eligible for being
+    grafted onto trunk, even if it has branch blockers."""
+
+    return (
+        len(self.cvs_revisions) == 1
+        and self.cvs_revisions[0].ntdbr
+        )
+
+  def is_pure_ntdb(self):
+    """Return True iff this LOD is a pure NTDB in this file.
+
+    A pure non-trunk default branch is defined to be a branch that
+    contains only NTDB revisions (and at least one of them).  Such a
+    branch is eligible for being grafted onto trunk, even if it has
+    branch blockers."""
+
+    return (
+        self.cvs_revisions
+        and self.cvs_revisions[-1].ntdbr
+        )
+
+  def iter_blockers(self):
+    if self.is_pure_ntdb():
+      # Such a branch has no blockers, because the blockers can be
+      # grafted to trunk.
+      pass
+    else:
+      # Other branches are only blocked by symbols that sprout from
+      # non-NTDB revisions:
+      non_ntdbr_revision_ids = set()
+      for cvs_revision in self.cvs_revisions:
+        if not cvs_revision.ntdbr:
+          non_ntdbr_revision_ids.add(cvs_revision.id)
+
+      for cvs_tag in self.cvs_tags:
+        if cvs_tag.source_id in non_ntdbr_revision_ids:
+          yield cvs_tag
+
+      for cvs_branch in self.cvs_branches:
+        if cvs_branch.source_id in non_ntdbr_revision_ids:
+          yield cvs_branch
+
+
+class CVSFileItems(object):
+  def __init__(self, cvs_file, trunk, cvs_items):
+    # The file whose data this instance holds.
+    self.cvs_file = cvs_file
+
+    # The symbol that represents "Trunk" in this file.
+    self.trunk = trunk
+
+    # A map from CVSItem.id to CVSItem:
+    self._cvs_items = {}
+
+    # The cvs_item_id of each root in the CVSItem forest.  (A root is
+    # defined to be any CVSRevision with no prev_id.)
+    self.root_ids = set()
+
+    for cvs_item in cvs_items:
+      self.add(cvs_item)
+      if isinstance(cvs_item, CVSRevision) and cvs_item.prev_id is None:
+        self.root_ids.add(cvs_item.id)
+
+  def __getstate__(self):
+    return (self.cvs_file.id, self.values(),)
+
+  def __setstate__(self, state):
+    (cvs_file_id, cvs_items,) = state
+    cvs_file = Ctx()._cvs_file_db.get_file(cvs_file_id)
+    CVSFileItems.__init__(
+        self, cvs_file, cvs_file.project.get_trunk(), cvs_items,
+        )
+
+  def add(self, cvs_item):
+    self._cvs_items[cvs_item.id] = cvs_item
+
+  def __getitem__(self, id):
+    """Return the CVSItem with the specified ID."""
+
+    return self._cvs_items[id]
+
+  def get(self, id, default=None):
+    return self._cvs_items.get(id, default)
+
+  def __delitem__(self, id):
+    assert id not in self.root_ids
+    del self._cvs_items[id]
+
+  def values(self):
+    return self._cvs_items.values()
+
+  def check_link_consistency(self):
+    """Check that the CVSItems are linked correctly with each other."""
+
+    for cvs_item in self.values():
+      try:
+        cvs_item.check_links(self)
+      except AssertionError:
+        Log().error(
+            'Link consistency error in %s\n'
+            'This is probably a bug internal to cvs2svn.  Please file a bug\n'
+            'report including the following stack trace (see FAQ for more '
+            'info).'
+            % (cvs_item,))
+        raise
+
+  def _get_lod(self, lod, cvs_branch, start_id):
+    """Return the indicated LODItems.
+
+    LOD is the corresponding LineOfDevelopment.  CVS_BRANCH is the
+    CVSBranch instance that starts the LOD if any; otherwise it is
+    None.  START_ID is the id of the first CVSRevision on this LOD, or
+    None if there are none."""
+
+    cvs_revisions = []
+    cvs_branches = []
+    cvs_tags = []
+
+    def process_subitems(cvs_item):
+      """Process the branches and tags that are rooted in CVS_ITEM.
+
+      CVS_ITEM can be a CVSRevision or a CVSBranch."""
+
+      for branch_id in cvs_item.branch_ids[:]:
+        cvs_branches.append(self[branch_id])
+
+      for tag_id in cvs_item.tag_ids:
+        cvs_tags.append(self[tag_id])
+
+    if cvs_branch is not None:
+      # Include the symbols sprouting directly from the CVSBranch:
+      process_subitems(cvs_branch)
+
+    id = start_id
+    while id is not None:
+      cvs_rev = self[id]
+      cvs_revisions.append(cvs_rev)
+      process_subitems(cvs_rev)
+      id = cvs_rev.next_id
+
+    return LODItems(lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags)
+
+  def get_lod_items(self, cvs_branch):
+    """Return an LODItems describing the branch that starts at CVS_BRANCH.
+
+    CVS_BRANCH must be an instance of CVSBranch contained in this
+    CVSFileItems."""
+
+    return self._get_lod(cvs_branch.symbol, cvs_branch, cvs_branch.next_id)
+
+  def iter_root_lods(self):
+    """Iterate over the LODItems for all root LODs (non-recursively)."""
+
+    for id in list(self.root_ids):
+      cvs_item = self[id]
+      if isinstance(cvs_item, CVSRevision):
+        # This LOD doesn't have a CVSBranch associated with it.
+        # Either it is Trunk, or it is a branch whose CVSBranch has
+        # been deleted.
+        yield self._get_lod(cvs_item.lod, None, id)
+      elif isinstance(cvs_item, CVSBranch):
+        # This is a Branch that has been severed from the rest of the
+        # tree.
+        yield self._get_lod(cvs_item.symbol, cvs_item, cvs_item.next_id)
+      else:
+        raise InternalError('Unexpected root item: %s' % (cvs_item,))
+
+  def _iter_tree(self, lod, cvs_branch, start_id):
+    """Iterate over the tree that starts at the specified line of development.
+
+    LOD is the LineOfDevelopment where the iteration should start.
+    CVS_BRANCH is the CVSBranch instance that starts the LOD if any;
+    otherwise it is None.  ID is the id of the first CVSRevision on
+    this LOD, or None if there are none.
+
+    There are two cases handled by this routine: trunk (where LOD is a
+    Trunk instance, CVS_BRANCH is None, and ID is the id of the 1.1
+    revision) and a branch (where LOD is a Branch instance, CVS_BRANCH
+    is a CVSBranch instance, and ID is either the id of the first
+    CVSRevision on the branch or None if there are no CVSRevisions on
+    the branch).  Note that CVS_BRANCH and ID cannot simultaneously be
+    None.
+
+    Yield an LODItems instance for each line of development."""
+
+    cvs_revisions = []
+    cvs_branches = []
+    cvs_tags = []
+
+    def process_subitems(cvs_item):
+      """Process the branches and tags that are rooted in CVS_ITEM.
+
+      CVS_ITEM can be a CVSRevision or a CVSBranch."""
+
+      for branch_id in cvs_item.branch_ids[:]:
+        # Recurse into the branch:
+        branch = self[branch_id]
+        for lod_items in self._iter_tree(
+              branch.symbol, branch, branch.next_id
+              ):
+          yield lod_items
+        # The caller might have deleted the branch that we just
+        # yielded.  If it is no longer present, then do not add it to
+        # the list of cvs_branches.
+        try:
+          cvs_branches.append(self[branch_id])
+        except KeyError:
+          pass
+
+      for tag_id in cvs_item.tag_ids:
+        cvs_tags.append(self[tag_id])
+
+    if cvs_branch is not None:
+      # Include the symbols sprouting directly from the CVSBranch:
+      for lod_items in process_subitems(cvs_branch):
+        yield lod_items
+
+    id = start_id
+    while id is not None:
+      cvs_rev = self[id]
+      cvs_revisions.append(cvs_rev)
+
+      for lod_items in process_subitems(cvs_rev):
+        yield lod_items
+
+      id = cvs_rev.next_id
+
+    yield LODItems(lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags)
+
+  def iter_lods(self):
+    """Iterate over LinesOfDevelopment in this file, in depth-first order.
+
+    For each LOD, yield an LODItems instance.  The traversal starts at
+    each root node but returns the LODs in depth-first order.
+
+    It is allowed to modify the CVSFileItems instance while the
+    traversal is occurring, but only in ways that don't affect the
+    tree structure above (i.e., towards the trunk from) the current
+    LOD."""
+
+    # Make a list out of root_ids so that callers can change it:
+    for id in list(self.root_ids):
+      cvs_item = self[id]
+      if isinstance(cvs_item, CVSRevision):
+        # This LOD doesn't have a CVSBranch associated with it.
+        # Either it is Trunk, or it is a branch whose CVSBranch has
+        # been deleted.
+        lod = cvs_item.lod
+        cvs_branch = None
+      elif isinstance(cvs_item, CVSBranch):
+        # This is a Branch that has been severed from the rest of the
+        # tree.
+        lod = cvs_item.symbol
+        id = cvs_item.next_id
+        cvs_branch = cvs_item
+      else:
+        raise InternalError('Unexpected root item: %s' % (cvs_item,))
+
+      for lod_items in self._iter_tree(lod, cvs_branch, id):
+        yield lod_items
+
+  def iter_deltatext_ancestors(self, cvs_rev):
+    """Generate the delta-dependency ancestors of CVS_REV.
+
+    Generate then ancestors of CVS_REV in deltatext order; i.e., back
+    along branches towards trunk, then outwards along trunk towards
+    HEAD."""
+
+    while True:
+      # Determine the next candidate source revision:
+      if isinstance(cvs_rev.lod, Trunk):
+        if cvs_rev.next_id is None:
+          # HEAD has no ancestors, so we are done:
+          return
+        else:
+          cvs_rev = self[cvs_rev.next_id]
+      else:
+        cvs_rev = self[cvs_rev.prev_id]
+
+      yield cvs_rev
+
+  def _sever_branch(self, lod_items):
+    """Sever the branch from its source and discard the CVSBranch.
+
+    LOD_ITEMS describes a branch that should be severed from its
+    source, deleting the CVSBranch and creating a new root.  Also set
+    LOD_ITEMS.cvs_branch to none.
+
+    This method can only be used before symbols have been grafted onto
+    CVSBranches.  It does not adjust NTDBR, NTDBR_PREV_ID or
+    NTDBR_NEXT_ID even if LOD_ITEMS describes a NTDB."""
+
+    cvs_branch = lod_items.cvs_branch
+    assert cvs_branch is not None
+    assert not cvs_branch.tag_ids
+    assert not cvs_branch.branch_ids
+    source_rev = self[cvs_branch.source_id]
+
+    # We only cover the following case, even though after
+    # FilterSymbolsPass cvs_branch.source_id might refer to another
+    # CVSBranch.
+    assert isinstance(source_rev, CVSRevision)
+
+    # Delete the CVSBranch itself:
+    lod_items.cvs_branch = None
+    del self[cvs_branch.id]
+
+    # Delete the reference from the source revision to the CVSBranch:
+    source_rev.branch_ids.remove(cvs_branch.id)
+
+    # Delete the reference from the first revision on the branch to
+    # the CVSBranch:
+    if lod_items.cvs_revisions:
+      first_rev = lod_items.cvs_revisions[0]
+
+      # Delete the reference from first_rev to the CVSBranch:
+      first_rev.first_on_branch_id = None
+
+      # Delete the reference from the source revision to the first
+      # revision on the branch:
+      source_rev.branch_commit_ids.remove(first_rev.id)
+
+      # ...and vice versa:
+      first_rev.prev_id = None
+
+      # Change the type of first_rev (e.g., from Change to Add):
+      first_rev.__class__ = cvs_revision_type_map[
+          (isinstance(first_rev, CVSRevisionModification), False,)
+          ]
+
+      # Now first_rev is a new root:
+      self.root_ids.add(first_rev.id)
+
+  def adjust_ntdbrs(self, ntdbr_cvs_revs):
+    """Adjust the specified non-trunk default branch revisions.
+
+    NTDBR_CVS_REVS is a list of CVSRevision instances in this file
+    that have been determined to be non-trunk default branch
+    revisions.
+
+    The first revision on the default branch is handled strangely by
+    CVS.  If a file is imported (as opposed to being added), CVS
+    creates a 1.1 revision, then creates a vendor branch 1.1.1 based
+    on 1.1, then creates a 1.1.1.1 revision that is identical to the
+    1.1 revision (i.e., its deltatext is empty).  The log message that
+    the user typed when importing is stored with the 1.1.1.1 revision.
+    The 1.1 revision always contains a standard, generated log
+    message, 'Initial revision\n'.
+
+    When we detect a straightforward import like this, we want to
+    handle it by deleting the 1.1 revision (which doesn't contain any
+    useful information) and making 1.1.1.1 into an independent root in
+    the file's dependency tree.  In SVN, 1.1.1.1 will be added
+    directly to the vendor branch with its initial content.  Then in a
+    special 'post-commit', the 1.1.1.1 revision is copied back to
+    trunk.
+
+    If the user imports again to the same vendor branch, then CVS
+    creates revisions 1.1.1.2, 1.1.1.3, etc. on the vendor branch,
+    *without* counterparts in trunk (even though these revisions
+    effectively play the role of trunk revisions).  So after we add
+    such revisions to the vendor branch, we also copy them back to
+    trunk in post-commits.
+
+    Set the ntdbr members of the revisions listed in NTDBR_CVS_REVS to
+    True.  Also, if there is a 1.2 revision, then set that revision to
+    depend on the last non-trunk default branch revision and possibly
+    adjust its type accordingly."""
+
+    for cvs_rev in ntdbr_cvs_revs:
+      cvs_rev.ntdbr = True
+
+    # Look for a 1.2 revision:
+    rev_1_1 = self[ntdbr_cvs_revs[0].prev_id]
+
+    rev_1_2 = self.get(rev_1_1.next_id)
+    if rev_1_2 is not None:
+      # Revision 1.2 logically follows the imported revisions, not
+      # 1.1.  Accordingly, connect it to the last NTDBR and possibly
+      # change its type.
+      last_ntdbr = ntdbr_cvs_revs[-1]
+      rev_1_2.ntdbr_prev_id = last_ntdbr.id
+      last_ntdbr.ntdbr_next_id = rev_1_2.id
+      rev_1_2.__class__ = cvs_revision_type_map[(
+          isinstance(rev_1_2, CVSRevisionModification),
+          isinstance(last_ntdbr, CVSRevisionModification),
+          )]
+
+  def process_live_ntdb(self, vendor_lod_items):
+    """VENDOR_LOD_ITEMS is a live default branch; process it.
+
+    In this case, all revisions on the default branch are NTDBRs and
+    it is an error if there is also a '1.2' revision.
+
+    Return True iff this transformation really does something.  Raise
+    a VendorBranchError if there is a '1.2' revision."""
+
+    rev_1_1 = self[vendor_lod_items.cvs_branch.source_id]
+    rev_1_2_id = rev_1_1.next_id
+    if rev_1_2_id is not None:
+      raise VendorBranchError(
+          'File \'%s\' has default branch=%s but also a revision %s'
+          % (self.cvs_file.filename,
+             vendor_lod_items.cvs_branch.branch_number, self[rev_1_2_id].rev,)
+          )
+
+    ntdbr_cvs_revs = list(vendor_lod_items.cvs_revisions)
+
+    if ntdbr_cvs_revs:
+      self.adjust_ntdbrs(ntdbr_cvs_revs)
+      return True
+    else:
+      return False
+
+  def process_historical_ntdb(self, vendor_lod_items):
+    """There appears to have been a non-trunk default branch in the past.
+
+    There is currently no default branch, but the branch described by
+    file appears to have been imported.  So our educated guess is that
+    all revisions on the '1.1.1' branch (described by
+    VENDOR_LOD_ITEMS) with timestamps prior to the timestamp of '1.2'
+    were non-trunk default branch revisions.
+
+    Return True iff this transformation really does something.
+
+    This really only handles standard '1.1.1.*'-style vendor
+    revisions.  One could conceivably have a file whose default branch
+    is 1.1.3 or whatever, or was that at some point in time, with
+    vendor revisions 1.1.3.1, 1.1.3.2, etc.  But with the default
+    branch gone now, we'd have no basis for assuming that the
+    non-standard vendor branch had ever been the default branch
+    anyway.
+
+    Note that we rely on comparisons between the timestamps of the
+    revisions on the vendor branch and that of revision 1.2, even
+    though the timestamps might be incorrect due to clock skew.  We
+    could do a slightly better job if we used the changeset
+    timestamps, as it is possible that the dependencies that went into
+    determining those timestamps are more accurate.  But that would
+    require an extra pass or two."""
+
+    rev_1_1 = self[vendor_lod_items.cvs_branch.source_id]
+    rev_1_2_id = rev_1_1.next_id
+
+    if rev_1_2_id is None:
+      rev_1_2_timestamp = None
+    else:
+      rev_1_2_timestamp = self[rev_1_2_id].timestamp
+
+    ntdbr_cvs_revs = []
+    for cvs_rev in vendor_lod_items.cvs_revisions:
+      if rev_1_2_timestamp is not None \
+             and cvs_rev.timestamp >= rev_1_2_timestamp:
+        # That's the end of the once-default branch.
+        break
+      ntdbr_cvs_revs.append(cvs_rev)
+
+    if ntdbr_cvs_revs:
+      self.adjust_ntdbrs(ntdbr_cvs_revs)
+      return True
+    else:
+      return False
+
+  def imported_remove_1_1(self, vendor_lod_items):
+    """This file was imported.  Remove the 1.1 revision if possible.
+
+    VENDOR_LOD_ITEMS is the LODItems instance for the vendor branch.
+    See adjust_ntdbrs() for more information."""
+
+    assert vendor_lod_items.cvs_revisions
+    cvs_rev = vendor_lod_items.cvs_revisions[0]
+
+    if isinstance(cvs_rev, CVSRevisionModification) \
+           and not cvs_rev.deltatext_exists:
+      cvs_branch = vendor_lod_items.cvs_branch
+      rev_1_1 = self[cvs_branch.source_id]
+      assert isinstance(rev_1_1, CVSRevision)
+      Log().debug('Removing unnecessary revision %s' % (rev_1_1,))
+
+      # Delete the 1.1.1 CVSBranch and sever the vendor branch from trunk:
+      self._sever_branch(vendor_lod_items)
+
+      # Delete rev_1_1:
+      self.root_ids.remove(rev_1_1.id)
+      del self[rev_1_1.id]
+      rev_1_2_id = rev_1_1.next_id
+      if rev_1_2_id is not None:
+        rev_1_2 = self[rev_1_2_id]
+        rev_1_2.prev_id = None
+        self.root_ids.add(rev_1_2.id)
+
+      # Move any tags and branches from rev_1_1 to cvs_rev:
+      cvs_rev.tag_ids.extend(rev_1_1.tag_ids)
+      for id in rev_1_1.tag_ids:
+        cvs_tag = self[id]
+        cvs_tag.source_lod = cvs_rev.lod
+        cvs_tag.source_id = cvs_rev.id
+      cvs_rev.branch_ids[0:0] = rev_1_1.branch_ids
+      for id in rev_1_1.branch_ids:
+        cvs_branch = self[id]
+        cvs_branch.source_lod = cvs_rev.lod
+        cvs_branch.source_id = cvs_rev.id
+      cvs_rev.branch_commit_ids[0:0] = rev_1_1.branch_commit_ids
+      for id in rev_1_1.branch_commit_ids:
+        cvs_rev2 = self[id]
+        cvs_rev2.prev_id = cvs_rev.id
+
+  def _delete_unneeded(self, cvs_item, metadata_db):
+    if isinstance(cvs_item, CVSRevisionNoop) \
+           and cvs_item.rev == '1.1' \
+           and isinstance(cvs_item.lod, Trunk) \
+           and len(cvs_item.branch_ids) >= 1 \
+           and self[cvs_item.branch_ids[0]].next_id is not None \
+           and not cvs_item.closed_symbols \
+           and not cvs_item.ntdbr:
+      # FIXME: This message will not match if the RCS file was renamed
+      # manually after it was created.
+      log_msg = metadata_db[cvs_item.metadata_id].log_msg
+      cvs_generated_msg = 'file %s was initially added on branch %s.\n' % (
+          self.cvs_file.basename,
+          self[cvs_item.branch_ids[0]].symbol.name,)
+      return log_msg == cvs_generated_msg
+    else:
+      return False
+
+  def remove_unneeded_deletes(self, metadata_db):
+    """Remove unneeded deletes for this file.
+
+    If a file is added on a branch, then a trunk revision is added at
+    the same time in the 'Dead' state.  This revision doesn't do
+    anything useful, so delete it."""
+
+    for id in self.root_ids:
+      cvs_item = self[id]
+      if self._delete_unneeded(cvs_item, metadata_db):
+        Log().debug('Removing unnecessary delete %s' % (cvs_item,))
+
+        # Delete cvs_item:
+        self.root_ids.remove(cvs_item.id)
+        del self[id]
+        if cvs_item.next_id is not None:
+          cvs_rev_next = self[cvs_item.next_id]
+          cvs_rev_next.prev_id = None
+          self.root_ids.add(cvs_rev_next.id)
+
+        # Delete all CVSBranches rooted at this revision.  If there is
+        # a CVSRevision on the branch, it should already be an add so
+        # it doesn't have to be changed.
+        for cvs_branch_id in cvs_item.branch_ids:
+          cvs_branch = self[cvs_branch_id]
+          del self[cvs_branch.id]
+
+          if cvs_branch.next_id is not None:
+            cvs_branch_next = self[cvs_branch.next_id]
+            cvs_branch_next.first_on_branch_id = None
+            cvs_branch_next.prev_id = None
+            self.root_ids.add(cvs_branch_next.id)
+
+        # Tagging a dead revision doesn't do anything, so remove any
+        # tags that were set on 1.1:
+        for cvs_tag_id in cvs_item.tag_ids:
+          del self[cvs_tag_id]
+
+        # This can only happen once per file, and we might have just
+        # changed self.root_ids, so break out of the loop:
+        break
+
+  def _initial_branch_delete_unneeded(self, lod_items, metadata_db):
+    """Return True iff the initial revision in LOD_ITEMS can be deleted."""
+
+    if lod_items.cvs_branch is not None \
+           and lod_items.cvs_branch.source_id is not None \
+           and len(lod_items.cvs_revisions) >= 2:
+      cvs_revision = lod_items.cvs_revisions[0]
+      cvs_rev_source = self[lod_items.cvs_branch.source_id]
+      if isinstance(cvs_revision, CVSRevisionAbsent) \
+             and not cvs_revision.tag_ids \
+             and not cvs_revision.branch_ids \
+             and abs(cvs_revision.timestamp - cvs_rev_source.timestamp) <= 2:
+        # FIXME: This message will not match if the RCS file was renamed
+        # manually after it was created.
+        log_msg = metadata_db[cvs_revision.metadata_id].log_msg
+        return bool(re.match(
+            r'file %s was added on branch .* on '
+            r'\d{4}\-\d{2}\-\d{2} \d{2}\:\d{2}\:\d{2}( [\+\-]\d{4})?'
+            '\n' % (re.escape(self.cvs_file.basename),),
+            log_msg,
+            ))
+    return False
+
+  def remove_initial_branch_deletes(self, metadata_db):
+    """If the first revision on a branch is an unnecessary delete, remove it.
+
+    If a file is added on a branch (whether or not it already existed
+    on trunk), then new versions of CVS add a first branch revision in
+    the 'dead' state (to indicate that the file did not exist on the
+    branch when the branch was created) followed by the second branch
+    revision, which is an add.  When we encounter this situation, we
+    sever the branch from trunk and delete the first branch
+    revision."""
+
+    for lod_items in self.iter_lods():
+      if self._initial_branch_delete_unneeded(lod_items, metadata_db):
+        cvs_revision = lod_items.cvs_revisions[0]
+        Log().debug(
+            'Removing unnecessary initial branch delete %s' % (cvs_revision,)
+            )
+        cvs_branch = lod_items.cvs_branch
+        cvs_rev_source = self[cvs_branch.source_id]
+        cvs_rev_next = lod_items.cvs_revisions[1]
+
+        # Delete cvs_revision:
+        del self[cvs_revision.id]
+        cvs_rev_next.prev_id = None
+        self.root_ids.add(cvs_rev_next.id)
+        cvs_rev_source.branch_commit_ids.remove(cvs_revision.id)
+
+        # Delete the CVSBranch on which it is located:
+        del self[cvs_branch.id]
+        cvs_rev_source.branch_ids.remove(cvs_branch.id)
+
+  def _exclude_tag(self, cvs_tag):
+    """Exclude the specified CVS_TAG."""
+
+    del self[cvs_tag.id]
+
+    # A CVSTag is the successor of the CVSRevision that it
+    # sprouts from.  Delete this tag from that revision's
+    # tag_ids:
+    self[cvs_tag.source_id].tag_ids.remove(cvs_tag.id)
+
+  def _exclude_branch(self, lod_items):
+    """Exclude the branch described by LOD_ITEMS, including its revisions.
+
+    (Do not update the LOD_ITEMS instance itself.)
+
+    If the LOD starts with non-trunk default branch revisions, leave
+    the branch and the NTDB revisions in place, but delete any
+    subsequent revisions that are not NTDB revisions.  In this case,
+    return True; otherwise return False"""
+
+    if lod_items.cvs_revisions and lod_items.cvs_revisions[0].ntdbr:
+      for cvs_rev in lod_items.cvs_revisions:
+        if not cvs_rev.ntdbr:
+          # We've found the first non-NTDBR, and it's stored in cvs_rev:
+          break
+      else:
+        # There was no revision following the NTDBRs:
+        cvs_rev = None
+
+      if cvs_rev:
+        last_ntdbr = self[cvs_rev.prev_id]
+        last_ntdbr.next_id = None
+        while True:
+          del self[cvs_rev.id]
+          if cvs_rev.next_id is None:
+            break
+          cvs_rev = self[cvs_rev.next_id]
+
+      return True
+
+    else:
+      if lod_items.cvs_branch is not None:
+        # Delete the CVSBranch itself:
+        cvs_branch = lod_items.cvs_branch
+
+        del self[cvs_branch.id]
+
+        # A CVSBranch is the successor of the CVSRevision that it
+        # sprouts from.  Delete this branch from that revision's
+        # branch_ids:
+        self[cvs_branch.source_id].branch_ids.remove(cvs_branch.id)
+
+      if lod_items.cvs_revisions:
+        # The first CVSRevision on the branch has to be either detached
+        # from the revision from which the branch sprang, or removed
+        # from self.root_ids:
+        cvs_rev = lod_items.cvs_revisions[0]
+        if cvs_rev.prev_id is None:
+          self.root_ids.remove(cvs_rev.id)
+        else:
+          self[cvs_rev.prev_id].branch_commit_ids.remove(cvs_rev.id)
+
+        for cvs_rev in lod_items.cvs_revisions:
+          del self[cvs_rev.id]
+
+      return False
+
+  def graft_ntdbr_to_trunk(self):
+    """Graft the non-trunk default branch revisions to trunk.
+
+    They should already be alone on a branch that may or may not have
+    a CVSBranch connecting it to trunk."""
+
+    for lod_items in self.iter_lods():
+      if lod_items.cvs_revisions and lod_items.cvs_revisions[0].ntdbr:
+        assert lod_items.is_pure_ntdb()
+
+        first_rev = lod_items.cvs_revisions[0]
+        last_rev = lod_items.cvs_revisions[-1]
+        rev_1_1 = self.get(first_rev.prev_id)
+        rev_1_2 = self.get(last_rev.ntdbr_next_id)
+
+        if lod_items.cvs_branch is not None:
+          self._sever_branch(lod_items)
+
+        if rev_1_1 is not None:
+          rev_1_1.next_id = first_rev.id
+          first_rev.prev_id = rev_1_1.id
+
+          self.root_ids.remove(first_rev.id)
+
+          first_rev.__class__ = cvs_revision_type_map[(
+              isinstance(first_rev, CVSRevisionModification),
+              isinstance(rev_1_1, CVSRevisionModification),
+              )]
+
+        if rev_1_2 is not None:
+          rev_1_2.ntdbr_prev_id = None
+          last_rev.ntdbr_next_id = None
+
+          if rev_1_2.prev_id is None:
+            self.root_ids.remove(rev_1_2.id)
+
+          rev_1_2.prev_id = last_rev.id
+          last_rev.next_id = rev_1_2.id
+
+          # The effective_pred_id of rev_1_2 was not changed, so we
+          # don't have to change rev_1_2's type.
+
+        for cvs_rev in lod_items.cvs_revisions:
+          cvs_rev.ntdbr = False
+          cvs_rev.lod = self.trunk
+
+        for cvs_branch in lod_items.cvs_branches:
+          cvs_branch.source_lod = self.trunk
+
+        for cvs_tag in lod_items.cvs_tags:
+          cvs_tag.source_lod = self.trunk
+
+        return
+
+  def exclude_non_trunk(self):
+    """Delete all tags and branches."""
+
+    ntdbr_excluded = False
+    for lod_items in self.iter_lods():
+      for cvs_tag in lod_items.cvs_tags[:]:
+        self._exclude_tag(cvs_tag)
+        lod_items.cvs_tags.remove(cvs_tag)
+
+      if not isinstance(lod_items.lod, Trunk):
+        assert not lod_items.cvs_branches
+
+        ntdbr_excluded |= self._exclude_branch(lod_items)
+
+    if ntdbr_excluded:
+      self.graft_ntdbr_to_trunk()
+
+  def filter_excluded_symbols(self, revision_excluder):
+    """Delete any excluded symbols and references to them.
+
+    Call the revision_excluder's callback methods to let it know what
+    is being excluded."""
+
+    ntdbr_excluded = False
+    for lod_items in self.iter_lods():
+      # Delete any excluded tags:
+      for cvs_tag in lod_items.cvs_tags[:]:
+        if isinstance(cvs_tag.symbol, ExcludedSymbol):
+          self._exclude_tag(cvs_tag)
+
+          lod_items.cvs_tags.remove(cvs_tag)
+
+      # Delete the whole branch if it is to be excluded:
+      if isinstance(lod_items.lod, ExcludedSymbol):
+        # A symbol can only be excluded if no other symbols spring
+        # from it.  This was already checked in CollateSymbolsPass, so
+        # these conditions should already be satisfied.
+        assert not list(lod_items.iter_blockers())
+
+        ntdbr_excluded |= self._exclude_branch(lod_items)
+
+    if ntdbr_excluded:
+      self.graft_ntdbr_to_trunk()
+
+    revision_excluder.process_file(self)
+
+  def _mutate_branch_to_tag(self, cvs_branch):
+    """Mutate the branch CVS_BRANCH into a tag."""
+
+    if cvs_branch.next_id is not None:
+      # This shouldn't happen because it was checked in
+      # CollateSymbolsPass:
+      raise FatalError('Attempt to exclude a branch with commits.')
+    cvs_tag = CVSTag(
+        cvs_branch.id, cvs_branch.cvs_file, cvs_branch.symbol,
+        cvs_branch.source_lod, cvs_branch.source_id,
+        cvs_branch.revision_recorder_token,
+        )
+    self.add(cvs_tag)
+    cvs_revision = self[cvs_tag.source_id]
+    cvs_revision.branch_ids.remove(cvs_tag.id)
+    cvs_revision.tag_ids.append(cvs_tag.id)
+
+  def _mutate_tag_to_branch(self, cvs_tag):
+    """Mutate the tag into a branch."""
+
+    cvs_branch = CVSBranch(
+        cvs_tag.id, cvs_tag.cvs_file, cvs_tag.symbol,
+        None, cvs_tag.source_lod, cvs_tag.source_id, None,
+        cvs_tag.revision_recorder_token,
+        )
+    self.add(cvs_branch)
+    cvs_revision = self[cvs_branch.source_id]
+    cvs_revision.tag_ids.remove(cvs_branch.id)
+    cvs_revision.branch_ids.append(cvs_branch.id)
+
+  def _mutate_symbol(self, cvs_symbol):
+    """Mutate CVS_SYMBOL if necessary."""
+
+    symbol = cvs_symbol.symbol
+    if isinstance(cvs_symbol, CVSBranch) and isinstance(symbol, Tag):
+      self._mutate_branch_to_tag(cvs_symbol)
+    elif isinstance(cvs_symbol, CVSTag) and isinstance(symbol, Branch):
+      self._mutate_tag_to_branch(cvs_symbol)
+
+  def mutate_symbols(self):
+    """Force symbols to be tags/branches based on self.symbol_db."""
+
+    for cvs_item in self.values():
+      if isinstance(cvs_item, CVSRevision):
+        # This CVSRevision may be affected by the mutation of any
+        # CVSSymbols that it references, but there is nothing to do
+        # here directly.
+        pass
+      elif isinstance(cvs_item, CVSSymbol):
+        self._mutate_symbol(cvs_item)
+      else:
+        raise RuntimeError('Unknown cvs item type')
+
+  def _adjust_tag_parent(self, cvs_tag):
+    """Adjust the parent of CVS_TAG if possible and preferred.
+
+    CVS_TAG is an instance of CVSTag.  This method must be called in
+    leaf-to-trunk order."""
+
+    # The Symbol that cvs_tag would like to have as a parent:
+    preferred_parent = Ctx()._symbol_db.get_symbol(
+        cvs_tag.symbol.preferred_parent_id)
+
+    if cvs_tag.source_lod == preferred_parent:
+      # The preferred parent is already the parent.
+      return
+
+    # The CVSRevision that is its direct parent:
+    source = self[cvs_tag.source_id]
+    assert isinstance(source, CVSRevision)
+
+    if isinstance(preferred_parent, Trunk):
+      # It is not possible to graft *onto* Trunk:
+      return
+
+    # Try to find the preferred parent among the possible parents:
+    for branch_id in source.branch_ids:
+      if self[branch_id].symbol == preferred_parent:
+        # We found it!
+        break
+    else:
+      # The preferred parent is not a possible parent in this file.
+      return
+
+    parent = self[branch_id]
+    assert isinstance(parent, CVSBranch)
+
+    Log().debug('Grafting %s from %s (on %s) onto %s' % (
+                cvs_tag, source, source.lod, parent,))
+    # Switch parent:
+    source.tag_ids.remove(cvs_tag.id)
+    parent.tag_ids.append(cvs_tag.id)
+    cvs_tag.source_lod = parent.symbol
+    cvs_tag.source_id = parent.id
+
+  def _adjust_branch_parents(self, cvs_branch):
+    """Adjust the parent of CVS_BRANCH if possible and preferred.
+
+    CVS_BRANCH is an instance of CVSBranch.  This method must be
+    called in leaf-to-trunk order."""
+
+    # The Symbol that cvs_branch would like to have as a parent:
+    preferred_parent = Ctx()._symbol_db.get_symbol(
+        cvs_branch.symbol.preferred_parent_id)
+
+    if cvs_branch.source_lod == preferred_parent:
+      # The preferred parent is already the parent.
+      return
+
+    # The CVSRevision that is its direct parent:
+    source = self[cvs_branch.source_id]
+    # This is always a CVSRevision because we haven't adjusted it yet:
+    assert isinstance(source, CVSRevision)
+
+    if isinstance(preferred_parent, Trunk):
+      # It is not possible to graft *onto* Trunk:
+      return
+
+    # Try to find the preferred parent among the possible parents:
+    for branch_id in source.branch_ids:
+      possible_parent = self[branch_id]
+      if possible_parent.symbol == preferred_parent:
+        # We found it!
+        break
+      elif possible_parent.symbol == cvs_branch.symbol:
+        # Only branches that precede the branch to be adjusted are
+        # considered possible parents.  Leave parentage unchanged:
+        return
+    else:
+      # This point should never be reached.
+      raise InternalError(
+          'Possible parent search did not terminate as expected')
+
+    parent = possible_parent
+    assert isinstance(parent, CVSBranch)
+
+    Log().debug('Grafting %s from %s (on %s) onto %s' % (
+                cvs_branch, source, source.lod, parent,))
+    # Switch parent:
+    source.branch_ids.remove(cvs_branch.id)
+    parent.branch_ids.append(cvs_branch.id)
+    cvs_branch.source_lod = parent.symbol
+    cvs_branch.source_id = parent.id
+
+  def adjust_parents(self):
+    """Adjust the parents of symbols to their preferred parents.
+
+    If a CVSSymbol has a preferred parent that is different than its
+    current parent, and if the preferred parent is an allowed parent
+    of the CVSSymbol in this file, then graft the CVSSymbol onto its
+    preferred parent."""
+
+    for lod_items in self.iter_lods():
+      for cvs_tag in lod_items.cvs_tags:
+        self._adjust_tag_parent(cvs_tag)
+
+      for cvs_branch in lod_items.cvs_branches:
+        self._adjust_branch_parents(cvs_branch)
+
+  def _get_revision_source(self, cvs_symbol):
+    """Return the CVSRevision that is the ultimate source of CVS_SYMBOL."""
+
+    while True:
+      cvs_item = self[cvs_symbol.source_id]
+      if isinstance(cvs_item, CVSRevision):
+        return cvs_item
+      else:
+        cvs_symbol = cvs_item
+
+  def refine_symbols(self):
+    """Refine the types of the CVSSymbols in this file.
+
+    Adjust the symbol types based on whether the source exists:
+    CVSBranch vs. CVSBranchNoop and CVSTag vs. CVSTagNoop."""
+
+    for lod_items in self.iter_lods():
+      for cvs_tag in lod_items.cvs_tags:
+        source = self._get_revision_source(cvs_tag)
+        cvs_tag.__class__ = cvs_tag_type_map[
+            isinstance(source, CVSRevisionModification)
+            ]
+
+      for cvs_branch in lod_items.cvs_branches:
+        source = self._get_revision_source(cvs_branch)
+        cvs_branch.__class__ = cvs_branch_type_map[
+            isinstance(source, CVSRevisionModification)
+            ]
+
+  def record_opened_symbols(self):
+    """Set CVSRevision.opened_symbols for the surviving revisions."""
+
+    for cvs_item in self.values():
+      if isinstance(cvs_item, (CVSRevision, CVSBranch)):
+        cvs_item.opened_symbols = []
+        for cvs_symbol_opened_id in cvs_item.get_cvs_symbol_ids_opened():
+          cvs_symbol_opened = self[cvs_symbol_opened_id]
+          cvs_item.opened_symbols.append(
+              (cvs_symbol_opened.symbol.id, cvs_symbol_opened.id,)
+              )
+
+  def record_closed_symbols(self):
+    """Set CVSRevision.closed_symbols for the surviving revisions.
+
+    A CVSRevision closes the symbols that were opened by the CVSItems
+    that the CVSRevision closes.  Got it?
+
+    This method must be called after record_opened_symbols()."""
+
+    for cvs_item in self.values():
+      if isinstance(cvs_item, CVSRevision):
+        cvs_item.closed_symbols = []
+        for cvs_item_closed_id in cvs_item.get_ids_closed():
+          cvs_item_closed = self[cvs_item_closed_id]
+          cvs_item.closed_symbols.extend(cvs_item_closed.opened_symbols)
+
+
diff --git a/cvs2svn_lib/cvs_item.py b/cvs2svn_lib/cvs_item.py
new file mode 100644
index 0000000..5c01a24
--- /dev/null
+++ b/cvs2svn_lib/cvs_item.py
@@ -0,0 +1,901 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes to store atomic CVS events.
+
+A CVSItem is a single event, pertaining to a single file, that can be
+determined to have occured based on the information in the CVS
+repository.
+
+The inheritance tree is as follows:
+
+CVSItem
+|
++--CVSRevision
+|  |
+|  +--CVSRevisionModification (* -> 'Exp')
+|  |  |
+|  |  +--CVSRevisionAdd ('dead' -> 'Exp')
+|  |  |
+|  |  +--CVSRevisionChange ('Exp' -> 'Exp')
+|  |
+|  +--CVSRevisionAbsent (* -> 'dead')
+|     |
+|     +--CVSRevisionDelete ('Exp' -> 'dead')
+|     |
+|     +--CVSRevisionNoop ('dead' -> 'dead')
+|
++--CVSSymbol
+   |
+   +--CVSBranch
+   |  |
+   |  +--CVSBranchNoop
+   |
+   +--CVSTag
+      |
+      +--CVSTagNoop
+
+"""
+
+
+from cvs2svn_lib.context import Ctx
+
+
+class CVSItem(object):
+  __slots__ = [
+      'id',
+      'cvs_file',
+      'revision_recorder_token',
+      ]
+
+  def __init__(self, id, cvs_file, revision_recorder_token):
+    self.id = id
+    self.cvs_file = cvs_file
+    self.revision_recorder_token = revision_recorder_token
+
+  def __eq__(self, other):
+    return self.id == other.id
+
+  def __cmp__(self, other):
+    return cmp(self.id, other.id)
+
+  def __hash__(self):
+    return self.id
+
+  def __getstate__(self):
+    raise NotImplementedError()
+
+  def __setstate__(self, data):
+    raise NotImplementedError()
+
+  def get_svn_path(self):
+    """Return the SVN path associated with this CVSItem."""
+
+    raise NotImplementedError()
+
+  def get_pred_ids(self):
+    """Return the CVSItem.ids of direct predecessors of SELF.
+
+    A predecessor is defined to be a CVSItem that has to have been
+    committed before this one."""
+
+    raise NotImplementedError()
+
+  def get_succ_ids(self):
+    """Return the CVSItem.ids of direct successors of SELF.
+
+    A direct successor is defined to be a CVSItem that has this one as
+    a direct predecessor."""
+
+    raise NotImplementedError()
+
+  def get_cvs_symbol_ids_opened(self):
+    """Return an iterable over the ids of CVSSymbols that this item opens.
+
+    The definition of 'open' is that the path corresponding to this
+    CVSItem will have to be copied when filling the corresponding
+    symbol."""
+
+    raise NotImplementedError()
+
+  def get_ids_closed(self):
+    """Return an iterable over the CVSItem.ids of CVSItems closed by this one.
+
+    A CVSItem A is said to close a CVSItem B if committing A causes B
+    to be overwritten or deleted (no longer available) in the SVN
+    repository.  This is interesting because it sets the last SVN
+    revision number from which the contents of B can be copied (for
+    example, to fill a symbol).  See the concrete implementations of
+    this method for the exact rules about what closes what."""
+
+    raise NotImplementedError()
+
+  def check_links(self, cvs_file_items):
+    """Check for consistency of links to other CVSItems.
+
+    Other items can be looked up in CVS_FILE_ITEMS, which is an
+    instance of CVSFileItems.  Raise an AssertionError if there is a
+    problem."""
+
+    raise NotImplementedError()
+
+  def __repr__(self):
+    return '%s(%s)' % (self.__class__.__name__, self,)
+
+
+class CVSRevision(CVSItem):
+  """Information about a single CVS revision.
+
+  A CVSRevision holds the information known about a single version of
+  a single file.
+
+  Members:
+
+    id -- (int) unique ID for this revision.
+
+    cvs_file -- (CVSFile) CVSFile affected by this revision.
+
+    timestamp -- (int) date stamp for this revision.
+
+    metadata_id -- (int) id of metadata instance record in
+        metadata_db.
+
+    prev_id -- (int) id of the logically previous CVSRevision, either
+        on the same or the source branch (or None).
+
+    next_id -- (int) id of the logically next CVSRevision (or None).
+
+    rev -- (string) the CVS revision number, e.g., '1.3'.
+
+    deltatext_exists -- (bool) true iff this revision's deltatext is
+        not empty.
+
+    lod -- (LineOfDevelopment) LOD on which this revision occurred.
+
+    first_on_branch_id -- (int or None) if this revision is the first
+        on its branch, the cvs_branch_id of that branch; else, None.
+
+    ntdbr -- (bool) true iff this is a non-trunk default branch
+        revision.
+
+    ntdbr_prev_id -- (int or None) Iff this is the 1.2 revision after
+        the end of a default branch, the id of the last rev on the
+        default branch; else, None.
+
+    ntdbr_next_id -- (int or None) Iff this is the last revision on a
+        default branch preceding a 1.2 rev, the id of the 1.2
+        revision; else, None.
+
+    tag_ids -- (list of int) ids of all CVSTags rooted at this
+        CVSRevision.
+
+    branch_ids -- (list of int) ids of all CVSBranches rooted at this
+        CVSRevision.
+
+    branch_commit_ids -- (list of int) ids of first CVSRevision
+        committed on each branch rooted in this revision (for branches
+        with commits).
+
+    opened_symbols -- (None or list of (symbol_id, cvs_symbol_id)
+        tuples) information about all CVSSymbols opened by this
+        revision.  This member is set in FilterSymbolsPass; before
+        then, it is None.
+
+    closed_symbols -- (None or list of (symbol_id, cvs_symbol_id)
+        tuples) information about all CVSSymbols closed by this
+        revision.  This member is set in FilterSymbolsPass; before
+        then, it is None.
+
+    revision_recorder_token -- (arbitrary) a token that can be set by
+        RevisionRecorder for the later use of RevisionReader.
+
+  """
+
+  __slots__ = [
+      'timestamp',
+      'metadata_id',
+      'prev_id',
+      'next_id',
+      'rev',
+      'deltatext_exists',
+      'lod',
+      'first_on_branch_id',
+      'ntdbr',
+      'ntdbr_prev_id',
+      'ntdbr_next_id',
+      'tag_ids',
+      'branch_ids',
+      'branch_commit_ids',
+      'opened_symbols',
+      'closed_symbols',
+      ]
+
+  def __init__(self,
+               id, cvs_file,
+               timestamp, metadata_id,
+               prev_id, next_id,
+               rev, deltatext_exists,
+               lod, first_on_branch_id, ntdbr,
+               ntdbr_prev_id, ntdbr_next_id,
+               tag_ids, branch_ids, branch_commit_ids,
+               revision_recorder_token):
+    """Initialize a new CVSRevision object."""
+
+    CVSItem.__init__(self, id, cvs_file, revision_recorder_token)
+
+    self.timestamp = timestamp
+    self.metadata_id = metadata_id
+    self.prev_id = prev_id
+    self.next_id = next_id
+    self.rev = rev
+    self.deltatext_exists = deltatext_exists
+    self.lod = lod
+    self.first_on_branch_id = first_on_branch_id
+    self.ntdbr = ntdbr
+    self.ntdbr_prev_id = ntdbr_prev_id
+    self.ntdbr_next_id = ntdbr_next_id
+    self.tag_ids = tag_ids
+    self.branch_ids = branch_ids
+    self.branch_commit_ids = branch_commit_ids
+    self.opened_symbols = None
+    self.closed_symbols = None
+
+  def _get_cvs_path(self):
+    return self.cvs_file.cvs_path
+
+  cvs_path = property(_get_cvs_path)
+
+  def get_svn_path(self):
+    return self.lod.get_path(self.cvs_file.cvs_path)
+
+  def __getstate__(self):
+    """Return the contents of this instance, for pickling.
+
+    The presence of this method improves the space efficiency of
+    pickling CVSRevision instances."""
+
+    return (
+        self.id, self.cvs_file.id,
+        self.timestamp, self.metadata_id,
+        self.prev_id, self.next_id,
+        self.rev,
+        self.deltatext_exists,
+        self.lod.id,
+        self.first_on_branch_id,
+        self.ntdbr,
+        self.ntdbr_prev_id, self.ntdbr_next_id,
+        self.tag_ids, self.branch_ids, self.branch_commit_ids,
+        self.opened_symbols, self.closed_symbols,
+        self.revision_recorder_token,
+        )
+
+  def __setstate__(self, data):
+    (self.id, cvs_file_id,
+     self.timestamp, self.metadata_id,
+     self.prev_id, self.next_id,
+     self.rev,
+     self.deltatext_exists,
+     lod_id,
+     self.first_on_branch_id,
+     self.ntdbr,
+     self.ntdbr_prev_id, self.ntdbr_next_id,
+     self.tag_ids, self.branch_ids, self.branch_commit_ids,
+     self.opened_symbols, self.closed_symbols,
+     self.revision_recorder_token) = data
+    self.cvs_file = Ctx()._cvs_file_db.get_file(cvs_file_id)
+    self.lod = Ctx()._symbol_db.get_symbol(lod_id)
+
+  def get_effective_prev_id(self):
+    """Return the ID of the effective predecessor of this item.
+
+    This is the ID of the item that determines whether the object
+    existed before this CVSRevision."""
+
+    if self.ntdbr_prev_id is not None:
+      return self.ntdbr_prev_id
+    else:
+      return self.prev_id
+
+  def get_symbol_pred_ids(self):
+    """Return the pred_ids for symbol predecessors."""
+
+    retval = set()
+    if self.first_on_branch_id is not None:
+      retval.add(self.first_on_branch_id)
+    return retval
+
+  def get_pred_ids(self):
+    retval = self.get_symbol_pred_ids()
+    if self.prev_id is not None:
+      retval.add(self.prev_id)
+    if self.ntdbr_prev_id is not None:
+      retval.add(self.ntdbr_prev_id)
+    return retval
+
+  def get_symbol_succ_ids(self):
+    """Return the succ_ids for symbol successors."""
+
+    retval = set()
+    for id in self.branch_ids + self.tag_ids:
+      retval.add(id)
+    return retval
+
+  def get_succ_ids(self):
+    retval = self.get_symbol_succ_ids()
+    if self.next_id is not None:
+      retval.add(self.next_id)
+    if self.ntdbr_next_id is not None:
+      retval.add(self.ntdbr_next_id)
+    for id in self.branch_commit_ids:
+      retval.add(id)
+    return retval
+
+  def get_ids_closed(self):
+    # Special handling is needed in the case of non-trunk default
+    # branches.  The following cases have to be handled:
+    #
+    # Case 1: Revision 1.1 not deleted; revision 1.2 exists:
+    #
+    #         1.1 -----------------> 1.2
+    #           \    ^          ^    /
+    #            \   |          |   /
+    #             1.1.1.1 -> 1.1.1.2
+    #
+    # * 1.1.1.1 closes 1.1 (because its post-commit overwrites 1.1
+    #   on trunk)
+    #
+    # * 1.1.1.2 closes 1.1.1.1
+    #
+    # * 1.2 doesn't close anything (the post-commit from 1.1.1.1
+    #   already closed 1.1, and no symbols can sprout from the
+    #   post-commit of 1.1.1.2)
+    #
+    # Case 2: Revision 1.1 not deleted; revision 1.2 does not exist:
+    #
+    #         1.1 ..................
+    #           \    ^          ^
+    #            \   |          |
+    #             1.1.1.1 -> 1.1.1.2
+    #
+    # * 1.1.1.1 closes 1.1 (because its post-commit overwrites 1.1
+    #   on trunk)
+    #
+    # * 1.1.1.2 closes 1.1.1.1
+    #
+    # Case 3: Revision 1.1 deleted; revision 1.2 exists:
+    #
+    #                ............... 1.2
+    #                ^          ^    /
+    #                |          |   /
+    #             1.1.1.1 -> 1.1.1.2
+    #
+    # * 1.1.1.1 doesn't close anything
+    #
+    # * 1.1.1.2 closes 1.1.1.1
+    #
+    # * 1.2 doesn't close anything (no symbols can sprout from the
+    #   post-commit of 1.1.1.2)
+    #
+    # Case 4: Revision 1.1 deleted; revision 1.2 doesn't exist:
+    #
+    #                ...............
+    #                ^          ^
+    #                |          |
+    #             1.1.1.1 -> 1.1.1.2
+    #
+    # * 1.1.1.1 doesn't close anything
+    #
+    # * 1.1.1.2 closes 1.1.1.1
+
+    if self.first_on_branch_id is not None:
+      # The first CVSRevision on a branch is considered to close the
+      # branch:
+      yield self.first_on_branch_id
+      if self.ntdbr:
+        # If the 1.1 revision was not deleted, the 1.1.1.1 revision is
+        # considered to close it:
+        yield self.prev_id
+    elif self.ntdbr_prev_id is not None:
+      # This is the special case of a 1.2 revision that follows a
+      # non-trunk default branch.  Either 1.1 was deleted or the first
+      # default branch revision closed 1.1, so we don't have to close
+      # 1.1.  Technically, we close the revision on trunk that was
+      # copied from the last non-trunk default branch revision in a
+      # post-commit, but for now no symbols can sprout from that
+      # revision so we ignore that one, too.
+      pass
+    elif self.prev_id is not None:
+      # Since this CVSRevision is not the first on a branch, its
+      # prev_id is on the same LOD and this item closes that one:
+      yield self.prev_id
+
+  def _get_branch_ids_recursively(self, cvs_file_items):
+    """Return the set of all CVSBranches that sprout from this CVSRevision.
+
+    After parent adjustment in FilterSymbolsPass, it is possible for
+    branches to sprout directly from a CVSRevision, or from those
+    branches, etc.  Return all branches that sprout from this
+    CVSRevision, directly or indirectly."""
+
+    retval = set()
+    branch_ids_to_process = list(self.branch_ids)
+    while branch_ids_to_process:
+      branch = cvs_file_items[branch_ids_to_process.pop()]
+      retval.add(branch)
+      branch_ids_to_process.extend(branch.branch_ids)
+
+    return retval
+
+  def check_links(self, cvs_file_items):
+    assert self.cvs_file == cvs_file_items.cvs_file
+
+    prev = cvs_file_items.get(self.prev_id)
+    next = cvs_file_items.get(self.next_id)
+    first_on_branch = cvs_file_items.get(self.first_on_branch_id)
+    ntdbr_next = cvs_file_items.get(self.ntdbr_next_id)
+    ntdbr_prev = cvs_file_items.get(self.ntdbr_prev_id)
+    effective_prev = cvs_file_items.get(self.get_effective_prev_id())
+
+    if prev is None:
+      # This is the first CVSRevision on trunk or a detached branch:
+      assert self.id in cvs_file_items.root_ids
+    elif first_on_branch is not None:
+      # This is the first CVSRevision on an existing branch:
+      assert isinstance(first_on_branch, CVSBranch)
+      assert first_on_branch.symbol == self.lod
+      assert first_on_branch.next_id == self.id
+      cvs_revision_source = first_on_branch.get_cvs_revision_source(
+          cvs_file_items
+          )
+      assert cvs_revision_source.id == prev.id
+      assert self.id in prev.branch_commit_ids
+    else:
+      # This revision follows another revision on the same LOD:
+      assert prev.next_id == self.id
+      assert prev.lod == self.lod
+
+    if next is not None:
+      assert next.prev_id == self.id
+      assert next.lod == self.lod
+
+    if ntdbr_next is not None:
+      assert self.ntdbr
+      assert ntdbr_next.ntdbr_prev_id == self.id
+
+    if ntdbr_prev is not None:
+      assert ntdbr_prev.ntdbr_next_id == self.id
+
+    for tag_id in self.tag_ids:
+      tag = cvs_file_items[tag_id]
+      assert isinstance(tag, CVSTag)
+      assert tag.source_id == self.id
+      assert tag.source_lod == self.lod
+
+    for branch_id in self.branch_ids:
+      branch = cvs_file_items[branch_id]
+      assert isinstance(branch, CVSBranch)
+      assert branch.source_id == self.id
+      assert branch.source_lod == self.lod
+
+    branch_commit_ids = list(self.branch_commit_ids)
+
+    for branch in self._get_branch_ids_recursively(cvs_file_items):
+      assert isinstance(branch, CVSBranch)
+      if branch.next_id is not None:
+        assert branch.next_id in branch_commit_ids
+        branch_commit_ids.remove(branch.next_id)
+
+    assert not branch_commit_ids
+
+    assert self.__class__ == cvs_revision_type_map[(
+        isinstance(self, CVSRevisionModification),
+        effective_prev is not None
+            and isinstance(effective_prev, CVSRevisionModification),
+        )]
+
+  def __str__(self):
+    """For convenience only.  The format is subject to change at any time."""
+
+    return '%s:%s<%x>' % (self.cvs_file, self.rev, self.id,)
+
+
+class CVSRevisionModification(CVSRevision):
+  """Base class for CVSRevisionAdd or CVSRevisionChange."""
+
+  __slots__ = []
+
+  def get_cvs_symbol_ids_opened(self):
+    return self.tag_ids + self.branch_ids
+
+
+class CVSRevisionAdd(CVSRevisionModification):
+  """A CVSRevision that creates a file that previously didn't exist.
+
+  The file might have never existed on this LOD, or it might have
+  existed previously but been deleted by a CVSRevisionDelete."""
+
+  __slots__ = []
+
+
+class CVSRevisionChange(CVSRevisionModification):
+  """A CVSRevision that modifies a file that already existed on this LOD."""
+
+  __slots__ = []
+
+
+class CVSRevisionAbsent(CVSRevision):
+  """A CVSRevision for which the file is nonexistent on this LOD."""
+
+  __slots__ = []
+
+  def get_cvs_symbol_ids_opened(self):
+    return []
+
+
+class CVSRevisionDelete(CVSRevisionAbsent):
+  """A CVSRevision that deletes a file that existed on this LOD."""
+
+  __slots__ = []
+
+
+class CVSRevisionNoop(CVSRevisionAbsent):
+  """A CVSRevision that doesn't do anything.
+
+  The revision was 'dead' and the predecessor either didn't exist or
+  was also 'dead'.  These revisions can't necessarily be thrown away
+  because (1) they impose ordering constraints on other items; (2)
+  they might have a nontrivial log message that we don't want to throw
+  away."""
+
+  __slots__ = []
+
+
+# A map
+#
+#   {(nondead(cvs_rev), nondead(prev_cvs_rev)) : cvs_revision_subtype}
+#
+# , where nondead() means that the cvs revision exists and is not
+# 'dead', and CVS_REVISION_SUBTYPE is the subtype of CVSRevision that
+# should be used for CVS_REV.
+cvs_revision_type_map = {
+    (False, False) : CVSRevisionNoop,
+    (False, True) : CVSRevisionDelete,
+    (True, False) : CVSRevisionAdd,
+    (True, True) : CVSRevisionChange,
+    }
+
+
+class CVSSymbol(CVSItem):
+  """Represent a symbol on a particular CVSFile.
+
+  This is the base class for CVSBranch and CVSTag.
+
+  Members:
+
+    id -- (int) unique ID for this item.
+
+    cvs_file -- (CVSFile) CVSFile affected by this item.
+
+    symbol -- (Symbol) the symbol affected by this CVSSymbol.
+
+    source_lod -- (LineOfDevelopment) the LOD that is the source for
+        this CVSSymbol.
+
+    source_id -- (int) the ID of the CVSRevision or CVSBranch that is
+        the source for this item.  This initially points to a
+        CVSRevision, but can be changed to a CVSBranch via parent
+        adjustment in FilterSymbolsPass.
+
+    revision_recorder_token -- (arbitrary) a token that can be set by
+        RevisionRecorder for the later use of RevisionReader.
+
+  """
+
+  __slots__ = [
+      'symbol',
+      'source_lod',
+      'source_id',
+      ]
+
+  def __init__(
+      self, id, cvs_file, symbol, source_lod, source_id,
+      revision_recorder_token
+      ):
+    """Initialize a CVSSymbol object."""
+
+    CVSItem.__init__(self, id, cvs_file, revision_recorder_token)
+
+    self.symbol = symbol
+    self.source_lod = source_lod
+    self.source_id = source_id
+
+  def get_cvs_revision_source(self, cvs_file_items):
+    """Return the CVSRevision that is the ultimate source of this symbol."""
+
+    cvs_source = cvs_file_items[self.source_id]
+    while not isinstance(cvs_source, CVSRevision):
+      cvs_source = cvs_file_items[cvs_source.source_id]
+
+    return cvs_source
+
+  def get_svn_path(self):
+    return self.symbol.get_path(self.cvs_file.cvs_path)
+
+  def get_ids_closed(self):
+    # A Symbol does not close any other CVSItems:
+    return []
+
+
+class CVSBranch(CVSSymbol):
+  """Represent the creation of a branch in a particular CVSFile.
+
+  Members:
+
+    id -- (int) unique ID for this item.
+
+    cvs_file -- (CVSFile) CVSFile affected by this item.
+
+    symbol -- (Symbol) the symbol affected by this CVSSymbol.
+
+    branch_number -- (string) the number of this branch (e.g.,
+        '1.3.4'), or None if this is a converted CVSTag.
+
+    source_lod -- (LineOfDevelopment) the LOD that is the source for
+        this CVSSymbol.
+
+    source_id -- (int) id of the CVSRevision or CVSBranch from which
+        this branch sprouts.  This initially points to a CVSRevision,
+        but can be changed to a CVSBranch via parent adjustment in
+        FilterSymbolsPass.
+
+    next_id -- (int or None) id of first CVSRevision on this branch,
+        if any; else, None.
+
+    tag_ids -- (list of int) ids of all CVSTags rooted at this
+        CVSBranch (can be set due to parent adjustment in
+        FilterSymbolsPass).
+
+    branch_ids -- (list of int) ids of all CVSBranches rooted at this
+        CVSBranch (can be set due to parent adjustment in
+        FilterSymbolsPass).
+
+    opened_symbols -- (None or list of (symbol_id, cvs_symbol_id)
+        tuples) information about all CVSSymbols opened by this
+        branch.  This member is set in FilterSymbolsPass; before then,
+        it is None.
+
+    revision_recorder_token -- (arbitrary) a token that can be set by
+        RevisionRecorder for the later use of RevisionReader.
+
+  """
+
+  __slots__ = [
+      'branch_number',
+      'next_id',
+      'tag_ids',
+      'branch_ids',
+      'opened_symbols',
+      ]
+
+  def __init__(
+      self, id, cvs_file, symbol, branch_number,
+      source_lod, source_id, next_id,
+      revision_recorder_token,
+      ):
+    """Initialize a CVSBranch."""
+
+    CVSSymbol.__init__(
+        self, id, cvs_file, symbol, source_lod, source_id,
+        revision_recorder_token
+        )
+    self.branch_number = branch_number
+    self.next_id = next_id
+    self.tag_ids = []
+    self.branch_ids = []
+    self.opened_symbols = None
+
+  def __getstate__(self):
+    return (
+        self.id, self.cvs_file.id,
+        self.symbol.id, self.branch_number,
+        self.source_lod.id, self.source_id, self.next_id,
+        self.tag_ids, self.branch_ids,
+        self.opened_symbols,
+        self.revision_recorder_token,
+        )
+
+  def __setstate__(self, data):
+    (
+        self.id, cvs_file_id,
+        symbol_id, self.branch_number,
+        source_lod_id, self.source_id, self.next_id,
+        self.tag_ids, self.branch_ids,
+        self.opened_symbols,
+        self.revision_recorder_token,
+        ) = data
+    self.cvs_file = Ctx()._cvs_file_db.get_file(cvs_file_id)
+    self.symbol = Ctx()._symbol_db.get_symbol(symbol_id)
+    self.source_lod = Ctx()._symbol_db.get_symbol(source_lod_id)
+
+  def get_pred_ids(self):
+    return set([self.source_id])
+
+  def get_succ_ids(self):
+    retval = set(self.tag_ids + self.branch_ids)
+    if self.next_id is not None:
+      retval.add(self.next_id)
+    return retval
+
+  def get_cvs_symbol_ids_opened(self):
+    return self.tag_ids + self.branch_ids
+
+  def check_links(self, cvs_file_items):
+    source = cvs_file_items.get(self.source_id)
+    next = cvs_file_items.get(self.next_id)
+
+    assert self.id in source.branch_ids
+    if isinstance(source, CVSRevision):
+      assert self.source_lod == source.lod
+    elif isinstance(source, CVSBranch):
+      assert self.source_lod == source.symbol
+    else:
+      assert False
+
+    if next is not None:
+      assert isinstance(next, CVSRevision)
+      assert next.lod == self.symbol
+      assert next.first_on_branch_id == self.id
+
+    for tag_id in self.tag_ids:
+      tag = cvs_file_items[tag_id]
+      assert isinstance(tag, CVSTag)
+      assert tag.source_id == self.id
+      assert tag.source_lod == self.symbol
+
+    for branch_id in self.branch_ids:
+      branch = cvs_file_items[branch_id]
+      assert isinstance(branch, CVSBranch)
+      assert branch.source_id == self.id
+      assert branch.source_lod == self.symbol
+
+  def __str__(self):
+    """For convenience only.  The format is subject to change at any time."""
+
+    return '%s:%s:%s<%x>' \
+           % (self.cvs_file, self.symbol, self.branch_number, self.id,)
+
+
+class CVSBranchNoop(CVSBranch):
+  """A CVSBranch whose source is a CVSRevisionAbsent."""
+
+  __slots__ = []
+
+  def get_cvs_symbol_ids_opened(self):
+    return []
+
+
+# A map
+#
+#   {nondead(source_cvs_rev) : cvs_branch_subtype}
+#
+# , where nondead() means that the cvs revision exists and is not
+# 'dead', and CVS_BRANCH_SUBTYPE is the subtype of CVSBranch that
+# should be used.
+cvs_branch_type_map = {
+    False : CVSBranchNoop,
+    True : CVSBranch,
+    }
+
+
+class CVSTag(CVSSymbol):
+  """Represent the creation of a tag on a particular CVSFile.
+
+  Members:
+
+    id -- (int) unique ID for this item.
+
+    cvs_file -- (CVSFile) CVSFile affected by this item.
+
+    symbol -- (Symbol) the symbol affected by this CVSSymbol.
+
+    source_lod -- (LineOfDevelopment) the LOD that is the source for
+        this CVSSymbol.
+
+    source_id -- (int) the ID of the CVSRevision or CVSBranch that is
+        being tagged.  This initially points to a CVSRevision, but can
+        be changed to a CVSBranch via parent adjustment in
+        FilterSymbolsPass.
+
+    revision_recorder_token -- (arbitrary) a token that can be set by
+        RevisionRecorder for the later use of RevisionReader.
+
+  """
+
+  __slots__ = []
+
+  def __init__(
+      self, id, cvs_file, symbol, source_lod, source_id,
+      revision_recorder_token,
+      ):
+    """Initialize a CVSTag."""
+
+    CVSSymbol.__init__(
+        self, id, cvs_file, symbol, source_lod, source_id,
+        revision_recorder_token,
+        )
+
+  def __getstate__(self):
+    return (
+        self.id, self.cvs_file.id, self.symbol.id,
+        self.source_lod.id, self.source_id,
+        self.revision_recorder_token,
+        )
+
+  def __setstate__(self, data):
+    (
+        self.id, cvs_file_id, symbol_id, source_lod_id, self.source_id,
+        self.revision_recorder_token,
+        ) = data
+    self.cvs_file = Ctx()._cvs_file_db.get_file(cvs_file_id)
+    self.symbol = Ctx()._symbol_db.get_symbol(symbol_id)
+    self.source_lod = Ctx()._symbol_db.get_symbol(source_lod_id)
+
+  def get_pred_ids(self):
+    return set([self.source_id])
+
+  def get_succ_ids(self):
+    return set()
+
+  def get_cvs_symbol_ids_opened(self):
+    return []
+
+  def check_links(self, cvs_file_items):
+    source = cvs_file_items.get(self.source_id)
+
+    assert self.id in source.tag_ids
+    if isinstance(source, CVSRevision):
+      assert self.source_lod == source.lod
+    elif isinstance(source, CVSBranch):
+      assert self.source_lod == source.symbol
+    else:
+      assert False
+
+  def __str__(self):
+    """For convenience only.  The format is subject to change at any time."""
+
+    return '%s:%s<%x>' \
+           % (self.cvs_file, self.symbol, self.id,)
+
+
+class CVSTagNoop(CVSTag):
+  """A CVSTag whose source is a CVSRevisionAbsent."""
+
+  __slots__ = []
+
+
+# A map
+#
+#   {nondead(source_cvs_rev) : cvs_tag_subtype}
+#
+# , where nondead() means that the cvs revision exists and is not
+# 'dead', and CVS_TAG_SUBTYPE is the subtype of CVSTag that should be
+# used.
+cvs_tag_type_map = {
+    False : CVSTagNoop,
+    True : CVSTag,
+    }
+
+
diff --git a/cvs2svn_lib/cvs_item_database.py b/cvs2svn_lib/cvs_item_database.py
new file mode 100644
index 0000000..f072252
--- /dev/null
+++ b/cvs2svn_lib/cvs_item_database.py
@@ -0,0 +1,248 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains a database that can store arbitrary CVSItems."""
+
+
+import re
+import cPickle
+
+from cvs2svn_lib.cvs_item import CVSRevisionAdd
+from cvs2svn_lib.cvs_item import CVSRevisionChange
+from cvs2svn_lib.cvs_item import CVSRevisionDelete
+from cvs2svn_lib.cvs_item import CVSRevisionNoop
+from cvs2svn_lib.cvs_item import CVSBranch
+from cvs2svn_lib.cvs_item import CVSBranchNoop
+from cvs2svn_lib.cvs_item import CVSTag
+from cvs2svn_lib.cvs_item import CVSTagNoop
+from cvs2svn_lib.cvs_file_items import CVSFileItems
+from cvs2svn_lib.serializer import Serializer
+from cvs2svn_lib.serializer import PrimedPickleSerializer
+from cvs2svn_lib.database import IndexedStore
+
+
+cvs_item_primer = (
+    CVSRevisionAdd, CVSRevisionChange,
+    CVSRevisionDelete, CVSRevisionNoop,
+    CVSBranch, CVSBranchNoop,
+    CVSTag, CVSTagNoop,
+    )
+
+
+class NewCVSItemStore:
+  """A file of sequential CVSItems, grouped by CVSFile.
+
+  The file consists of a sequence of pickles.  The zeroth one is a
+  Serializer as described in the serializer module.  Subsequent ones
+  are pickled lists of CVSItems, each list containing all of the
+  CVSItems for a single file.
+
+  We don't use a single pickler for all items because the memo would
+  grow too large."""
+
+  def __init__(self, filename):
+    """Initialize an instance, creating the file and writing the primer."""
+
+    self.f = open(filename, 'wb')
+
+    self.serializer = PrimedPickleSerializer(
+        cvs_item_primer + (CVSFileItems,)
+        )
+    cPickle.dump(self.serializer, self.f, -1)
+
+  def add(self, cvs_file_items):
+    """Write CVS_FILE_ITEMS into the database."""
+
+    self.serializer.dumpf(self.f, cvs_file_items)
+
+  def close(self):
+    self.f.close()
+    self.f = None
+
+
+class OldCVSItemStore:
+  """Read a file created by NewCVSItemStore.
+
+  The file must be read sequentially, one CVSFileItems instance at a
+  time."""
+
+  def __init__(self, filename):
+    self.f = open(filename, 'rb')
+
+    # Read the memo from the first pickle:
+    self.serializer = cPickle.load(self.f)
+
+  def iter_cvs_file_items(self):
+    """Iterate through the CVSFileItems instances, one file at a time.
+
+    Each time yield a CVSFileItems instance for one CVSFile."""
+
+    try:
+      while True:
+        yield self.serializer.loadf(self.f)
+    except EOFError:
+      return
+
+  def close(self):
+    self.f.close()
+    self.f = None
+
+
+class LinewiseSerializer(Serializer):
+  """A serializer that writes exactly one line for each object.
+
+  The actual serialization is done by a wrapped serializer; this class
+  only escapes any newlines in the serialized data then appends a
+  single newline."""
+
+  def __init__(self, wrapee):
+    self.wrapee = wrapee
+
+  @staticmethod
+  def _encode_newlines(s):
+    """Return s with newlines and backslashes encoded.
+
+    The string is returned with the following character transformations:
+
+      LF -> \n
+      CR -> \r
+      ^Z -> \z (needed for Windows)
+      \ -> \\
+
+    """
+
+    return s.replace('\\', '\\\\') \
+            .replace('\n', '\\n') \
+            .replace('\r', '\\r') \
+            .replace('\x1a', '\\z')
+
+  _escape_re = re.compile(r'(\\\\|\\n|\\r|\\z)')
+  _subst = {'\\n' : '\n', '\\r' : '\r', '\\z' : '\x1a', '\\\\' : '\\'}
+
+  @staticmethod
+  def _decode_newlines(s):
+    """Return s with newlines and backslashes decoded.
+
+    This function reverses the encoding of _encode_newlines().
+
+    """
+
+    def repl(m):
+      return LinewiseSerializer._subst[m.group(1)]
+
+    return LinewiseSerializer._escape_re.sub(repl, s)
+
+  def dumpf(self, f, object):
+    f.write(self.dumps(object))
+
+  def dumps(self, object):
+    return self._encode_newlines(self.wrapee.dumps(object)) + '\n'
+
+  def loadf(self, f):
+    return self.loads(f.readline())
+
+  def loads(self, s):
+    return self.wrapee.loads(self._decode_newlines(s[:-1]))
+
+
+class NewSortableCVSRevisionDatabase(object):
+  """A serially-accessible, sortable file for holding CVSRevisions.
+
+  This class creates such files."""
+
+  def __init__(self, filename, serializer):
+    self.f = open(filename, 'w')
+    self.serializer = LinewiseSerializer(serializer)
+
+  def add(self, cvs_rev):
+    self.f.write(
+        '%x %08x %s' % (
+            cvs_rev.metadata_id, cvs_rev.timestamp,
+            self.serializer.dumps(cvs_rev),
+            )
+        )
+
+  def close(self):
+    self.f.close()
+    self.f = None
+
+
+class OldSortableCVSRevisionDatabase(object):
+  """A serially-accessible, sortable file for holding CVSRevisions.
+
+  This class reads such files."""
+
+  def __init__(self, filename, serializer):
+    self.filename = filename
+    self.serializer = LinewiseSerializer(serializer)
+
+  def __iter__(self):
+    f = open(self.filename, 'r')
+    for l in f:
+      s = l.split(' ', 2)[-1]
+      yield self.serializer.loads(s)
+    f.close()
+
+  def close(self):
+    pass
+
+
+class NewSortableCVSSymbolDatabase(object):
+  """A serially-accessible, sortable file for holding CVSSymbols.
+
+  This class creates such files."""
+
+  def __init__(self, filename, serializer):
+    self.f = open(filename, 'w')
+    self.serializer = LinewiseSerializer(serializer)
+
+  def add(self, cvs_symbol):
+    self.f.write(
+        '%x %s' % (cvs_symbol.symbol.id, self.serializer.dumps(cvs_symbol))
+        )
+
+  def close(self):
+    self.f.close()
+    self.f = None
+
+
+class OldSortableCVSSymbolDatabase(object):
+  """A serially-accessible, sortable file for holding CVSSymbols.
+
+  This class reads such files."""
+
+  def __init__(self, filename, serializer):
+    self.filename = filename
+    self.serializer = LinewiseSerializer(serializer)
+
+  def __iter__(self):
+    f = open(self.filename, 'r')
+    for l in f:
+      s = l.split(' ', 1)[-1]
+      yield self.serializer.loads(s)
+    f.close()
+
+  def close(self):
+    pass
+
+
+def IndexedCVSItemStore(filename, index_filename, mode):
+  return IndexedStore(
+      filename, index_filename, mode,
+      PrimedPickleSerializer(cvs_item_primer)
+      )
+
+
diff --git a/cvs2svn_lib/cvs_revision_manager.py b/cvs2svn_lib/cvs_revision_manager.py
new file mode 100644
index 0000000..6f5de3b
--- /dev/null
+++ b/cvs2svn_lib/cvs_revision_manager.py
@@ -0,0 +1,85 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Access the CVS repository via CVS's 'cvs' command."""
+
+
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.process import check_command_runs
+from cvs2svn_lib.process import PipeStream
+from cvs2svn_lib.process import CommandFailedException
+from cvs2svn_lib.revision_manager import RevisionReader
+
+
+class CVSRevisionReader(RevisionReader):
+  """A RevisionReader that reads the contents via CVS."""
+
+  # Different versions of CVS support different global arguments.
+  # Here are the global arguments that we try to use, in order of
+  # decreasing preference:
+  _possible_global_arguments = [
+      ['-q', '-R', '-f'],
+      ['-q', '-R'],
+      ['-q', '-f'],
+      ['-q'],
+      ]
+
+  def __init__(self, cvs_executable):
+    self.cvs_executable = cvs_executable
+
+    for global_arguments in self._possible_global_arguments:
+      try:
+        self._check_cvs_runs(global_arguments)
+      except CommandFailedException, e:
+        pass
+      else:
+        # Those global arguments were OK; use them for all CVS invocations.
+        self.global_arguments = global_arguments
+        break
+    else:
+      raise FatalError(
+          '%s\n'
+          'Please check that cvs is installed and in your PATH.' % (e,)
+          )
+
+  def _check_cvs_runs(self, global_arguments):
+    """Check that CVS can be started.
+
+    Try running 'cvs --version' with the current setting for
+    self.cvs_executable and the specified global_arguments.  If not
+    successful, raise a CommandFailedException."""
+
+    check_command_runs(
+        [self.cvs_executable] + global_arguments + ['--version'],
+        self.cvs_executable,
+        )
+
+  def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False):
+    project = cvs_rev.cvs_file.project
+    pipe_cmd = [
+        self.cvs_executable
+        ] + self.global_arguments + [
+        '-d', project.cvs_repository_root,
+        'co',
+        '-r' + cvs_rev.rev,
+        '-p'
+        ]
+    if suppress_keyword_substitution:
+      pipe_cmd.append('-kk')
+    pipe_cmd.append(project.cvs_module + cvs_rev.cvs_path)
+    return PipeStream(pipe_cmd)
+
+
diff --git a/cvs2svn_lib/database.py b/cvs2svn_lib/database.py
new file mode 100644
index 0000000..9db9be2
--- /dev/null
+++ b/cvs2svn_lib/database.py
@@ -0,0 +1,322 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains database facilities used by cvs2svn."""
+
+
+import sys
+import os
+import cPickle
+
+from cvs2svn_lib.common import DB_OPEN_READ
+from cvs2svn_lib.common import DB_OPEN_WRITE
+from cvs2svn_lib.common import DB_OPEN_NEW
+from cvs2svn_lib.common import warning_prefix
+from cvs2svn_lib.common import error_prefix
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.record_table import FileOffsetPacker
+from cvs2svn_lib.record_table import RecordTable
+
+
+# DBM module selection
+
+# 1. If we have bsddb3, it is probably newer than bsddb.  Fake bsddb = bsddb3,
+#    so that the dbhash module used by anydbm will use bsddb3.
+try:
+  import bsddb3
+  sys.modules['bsddb'] = sys.modules['bsddb3']
+except ImportError:
+  pass
+
+# 2. These DBM modules are not good for cvs2svn.
+import anydbm
+if anydbm._defaultmod.__name__ in ['dumbdbm', 'dbm']:
+  Log().error(
+      '%s: cvs2svn uses the anydbm package, which depends on lower level '
+          'dbm\n'
+      'libraries.  Your system has %s, with which cvs2svn is known to have\n'
+      'problems.  To use cvs2svn, you must install a Python dbm library '
+          'other than\n'
+      'dumbdbm or dbm.  See '
+          'http://python.org/doc/current/lib/module-anydbm.html\n'
+      'for more information.\n'
+      % (error_prefix, anydbm._defaultmod.__name__,)
+      )
+  sys.exit(1)
+
+# 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
+#    Unfortunately, gdbm appears not to be trouble free, either.
+if hasattr(anydbm._defaultmod, 'bsddb') \
+    and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
+  try:
+    gdbm = __import__('gdbm')
+  except ImportError:
+    Log().warn(
+        '%s: The version of the bsddb module found on your computer '
+            'has been\n'
+        'reported to malfunction on some datasets, causing KeyError '
+            'exceptions.\n'
+        % (warning_prefix,)
+        )
+  else:
+    anydbm._defaultmod = gdbm
+
+
+class Database:
+  """A database that uses a Serializer to store objects of a certain type.
+
+  The serializer is stored in the database under the key
+  self.serializer_key.  (This implies that self.serializer_key may not
+  be used as a key for normal entries.)
+
+  The backing database is an anydbm-based DBM.
+
+  """
+
+  serializer_key = '_.%$1\t;_ '
+
+  def __init__(self, filename, mode, serializer=None):
+    """Constructor.
+
+    The database stores its Serializer, so none needs to be supplied
+    when opening an existing database."""
+
+    # pybsddb3 has a bug which prevents it from working with
+    # Berkeley DB 4.2 if you open the db with 'n' ("new").  This
+    # causes the DB_TRUNCATE flag to be passed, which is disallowed
+    # for databases protected by lock and transaction support
+    # (bsddb databases use locking from bsddb version 4.2.4 onwards).
+    #
+    # Therefore, manually perform the removal (we can do this, because
+    # we know that for bsddb - but *not* anydbm in general - the database
+    # consists of one file with the name we specify, rather than several
+    # based on that name).
+    if mode == DB_OPEN_NEW and anydbm._defaultmod.__name__ == 'dbhash':
+      if os.path.isfile(filename):
+        os.unlink(filename)
+      self.db = anydbm.open(filename, 'c')
+    else:
+      self.db = anydbm.open(filename, mode)
+
+    # Import implementations for many mapping interface methods.
+    for meth_name in ('__delitem__',
+        '__iter__', 'has_key', '__contains__', 'iterkeys', 'clear'):
+      meth_ref = getattr(self.db, meth_name, None)
+      if meth_ref:
+        setattr(self, meth_name, meth_ref)
+
+    if mode == DB_OPEN_NEW:
+      self.serializer = serializer
+      self.db[self.serializer_key] = cPickle.dumps(self.serializer)
+    else:
+      self.serializer = cPickle.loads(self.db[self.serializer_key])
+
+  def __getitem__(self, key):
+    return self.serializer.loads(self.db[key])
+
+  def __setitem__(self, key, value):
+    self.db[key] = self.serializer.dumps(value)
+
+  def __delitem__(self, key):
+    # gdbm defines a __delitem__ method, but it cannot be assigned.  So
+    # this method provides a fallback definition via explicit delegation:
+    del self.db[key]
+
+  def keys(self):
+    retval = self.db.keys()
+    retval.remove(self.serializer_key)
+    return retval
+
+  def __iter__(self):
+    for key in self.keys():
+      yield key
+
+  def has_key(self, key):
+    try:
+      self.db[key]
+      return True
+    except KeyError:
+      return False
+
+  def __contains__(self, key):
+    return self.has_key(key)
+
+  def iterkeys(self):
+    return self.__iter__()
+
+  def clear(self):
+    for key in self.keys():
+      del self[key]
+
+  def items(self):
+    return [(key, self[key],) for key in self.keys()]
+
+  def values(self):
+    return [self[key] for key in self.keys()]
+
+  def get(self, key, default=None):
+    try:
+      return self[key]
+    except KeyError:
+      return default
+
+  def close(self):
+    self.db.close()
+    self.db = None
+
+
+class IndexedDatabase:
+  """A file of objects that are written sequentially and read randomly.
+
+  The objects are indexed by small non-negative integers, and a
+  RecordTable is used to store the index -> fileoffset map.
+  fileoffset=0 is used to represent an empty record.  (An offset of 0
+  cannot occur for a legitimate record because the serializer is
+  written there.)
+
+  The main file consists of a sequence of pickles (or other serialized
+  data format).  The zeroth record is a pickled Serializer.
+  Subsequent ones are objects serialized using the serializer.  The
+  offset of each object in the file is stored to an index table so
+  that the data can later be retrieved randomly.
+
+  Objects are always stored to the end of the file.  If an object is
+  deleted or overwritten, the fact is recorded in the index_table but
+  the space in the pickle file is not garbage collected.  This has the
+  advantage that one can create a modified version of a database that
+  shares the main data file with an old version by copying the index
+  file.  But it has the disadvantage that space is wasted whenever
+  objects are written multiple times."""
+
+  def __init__(self, filename, index_filename, mode, serializer=None):
+    """Initialize an IndexedDatabase, writing the serializer if necessary.
+
+    SERIALIZER is only used if MODE is DB_OPEN_NEW; otherwise the
+    serializer is read from the file."""
+
+    self.filename = filename
+    self.index_filename = index_filename
+    self.mode = mode
+    if self.mode == DB_OPEN_NEW:
+      self.f = open(self.filename, 'wb+')
+    elif self.mode == DB_OPEN_WRITE:
+      self.f = open(self.filename, 'rb+')
+    elif self.mode == DB_OPEN_READ:
+      self.f = open(self.filename, 'rb')
+    else:
+      raise RuntimeError('Invalid mode %r' % self.mode)
+
+    self.index_table = RecordTable(
+        self.index_filename, self.mode, FileOffsetPacker()
+        )
+
+    if self.mode == DB_OPEN_NEW:
+      assert serializer is not None
+      self.serializer = serializer
+      cPickle.dump(self.serializer, self.f, -1)
+    else:
+      # Read the memo from the first pickle:
+      self.serializer = cPickle.load(self.f)
+
+    # Seek to the end of the file, and record that position:
+    self.f.seek(0, 2)
+    self.fp = self.f.tell()
+    self.eofp = self.fp
+
+  def __setitem__(self, index, item):
+    """Write ITEM into the database indexed by INDEX."""
+
+    # Make sure we're at the end of the file:
+    if self.fp != self.eofp:
+      self.f.seek(self.eofp)
+    self.index_table[index] = self.eofp
+    s = self.serializer.dumps(item)
+    self.f.write(s)
+    self.eofp += len(s)
+    self.fp = self.eofp
+
+  def _fetch(self, offset):
+    if self.fp != offset:
+      self.f.seek(offset)
+
+    # There is no easy way to tell how much data will be read, so just
+    # indicate that we don't know the current file pointer:
+    self.fp = None
+
+    return self.serializer.loadf(self.f)
+
+  def iterkeys(self):
+    return self.index_table.iterkeys()
+
+  def itervalues(self):
+    for offset in self.index_table.itervalues():
+      yield self._fetch(offset)
+
+  def __getitem__(self, index):
+    offset = self.index_table[index]
+    return self._fetch(offset)
+
+  def get(self, item, default=None):
+    try:
+      return self[item]
+    except KeyError:
+      return default
+
+  def get_many(self, indexes, default=None):
+    """Yield (index,item) tuples for INDEXES, in arbitrary order.
+
+    Yield (index,default) for indexes with no defined values."""
+
+    offsets = []
+    for (index, offset) in self.index_table.get_many(indexes):
+      if offset is None:
+        yield (index, default)
+      else:
+        offsets.append((offset, index))
+
+    # Sort the offsets to reduce disk seeking:
+    offsets.sort()
+    for (offset,index) in offsets:
+      yield (index, self._fetch(offset))
+
+  def __delitem__(self, index):
+    # We don't actually free the data in self.f.
+    del self.index_table[index]
+
+  def close(self):
+    self.index_table.close()
+    self.index_table = None
+    self.f.close()
+    self.f = None
+
+  def __str__(self):
+    return 'IndexedDatabase(%r)' % (self.filename,)
+
+
+class IndexedStore(IndexedDatabase):
+  """A file of items that is written sequentially and read randomly.
+
+  This is just like IndexedDatabase, except that it has an additional
+  add() method which assumes that the object to be written to the
+  database has an 'id' member, which is used as its database index.
+  See IndexedDatabase for more information."""
+
+  def add(self, item):
+    """Write ITEM into the database indexed by ITEM.id."""
+
+    self[item.id] = item
+
+
diff --git a/cvs2svn_lib/dumpfile_delegate.py b/cvs2svn_lib/dumpfile_delegate.py
new file mode 100644
index 0000000..092cfca
--- /dev/null
+++ b/cvs2svn_lib/dumpfile_delegate.py
@@ -0,0 +1,510 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains database facilities used by cvs2svn."""
+
+
+try:
+  from hashlib import md5
+except ImportError:
+  from md5 import new as md5
+
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.common import path_split
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.cvs_file import CVSDirectory
+from cvs2svn_lib.cvs_file import CVSFile
+from cvs2svn_lib.svn_repository_delegate import SVNRepositoryDelegate
+from cvs2svn_lib.apple_single_filter import get_maybe_apple_single_stream
+
+
+# Things that can happen to a file.
+OP_ADD    = 'add'
+OP_CHANGE = 'change'
+
+
+class DumpfileDelegate(SVNRepositoryDelegate):
+  """Create a Subversion dumpfile."""
+
+  def __init__(self, revision_reader, dumpfile_path):
+    """Return a new DumpfileDelegate instance, attached to a dumpfile
+    DUMPFILE_PATH, using Ctx().cvs_filename_decoder()."""
+
+    self._revision_reader = revision_reader
+    self.dumpfile_path = dumpfile_path
+
+    self.dumpfile = open(self.dumpfile_path, 'wb')
+    self._write_dumpfile_header(self.dumpfile)
+
+    # A set of the basic project infrastructure project directories
+    # that have been created so far, as SVN paths.  (The root
+    # directory is considered to be present at initialization.)  This
+    # includes all of the LOD paths, and all of their parent
+    # directories etc.
+    self._basic_directories = set([''])
+
+  def _write_dumpfile_header(self, dumpfile):
+    # Initialize the dumpfile with the standard headers.
+    #
+    # Since the CVS repository doesn't have a UUID, and the Subversion
+    # repository will be created with one anyway, we don't specify a
+    # UUID in the dumpflie
+    dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
+
+  def _utf8_path(self, path):
+    """Return a copy of PATH encoded in UTF-8."""
+
+    # Convert each path component separately (as they may each use
+    # different encodings).
+    try:
+      return '/'.join([
+          Ctx().cvs_filename_decoder(piece).encode('utf8')
+          for piece in path.split('/')
+          ])
+    except UnicodeError:
+      raise FatalError(
+          "Unable to convert a path '%s' to internal encoding.\n"
+          "Consider rerunning with one or more '--encoding' parameters or\n"
+          "with '--fallback-encoding'."
+          % (path,))
+
+  def _string_for_prop(self, name, value):
+    """Return a property in the form needed for the dumpfile."""
+
+    return 'K %d\n%s\nV %d\n%s\n' % (len(name), name, len(value), value)
+
+  def start_commit(self, revnum, revprops):
+    """Emit the start of SVN_COMMIT (an SVNCommit)."""
+
+    self.revision = revnum
+
+    # The start of a new commit typically looks like this:
+    #
+    #   Revision-number: 1
+    #   Prop-content-length: 129
+    #   Content-length: 129
+    #
+    #   K 7
+    #   svn:log
+    #   V 27
+    #   Log message for revision 1.
+    #   K 10
+    #   svn:author
+    #   V 7
+    #   jrandom
+    #   K 8
+    #   svn:date
+    #   V 27
+    #   2003-04-22T22:57:58.132837Z
+    #   PROPS-END
+    #
+    # Notice that the length headers count everything -- not just the
+    # length of the data but also the lengths of the lengths, including
+    # the 'K ' or 'V ' prefixes.
+    #
+    # The reason there are both Prop-content-length and Content-length
+    # is that the former includes just props, while the latter includes
+    # everything.  That's the generic header form for any entity in a
+    # dumpfile.  But since revisions only have props, the two lengths
+    # are always the same for revisions.
+
+    # Calculate the output needed for the property definitions.
+    prop_names = revprops.keys()
+    prop_names.sort()
+    prop_strings = []
+    for propname in prop_names:
+      if revprops[propname] is not None:
+        prop_strings.append(
+            self._string_for_prop(propname, revprops[propname]))
+
+    all_prop_strings = ''.join(prop_strings) + 'PROPS-END\n'
+    total_len = len(all_prop_strings)
+
+    # Print the revision header and revprops
+    self.dumpfile.write(
+        'Revision-number: %d\n'
+        'Prop-content-length: %d\n'
+        'Content-length: %d\n'
+        '\n'
+        '%s'
+        '\n'
+        % (self.revision, total_len, total_len, all_prop_strings)
+        )
+
+  def end_commit(self):
+    pass
+
+  def _make_any_dir(self, path):
+    """Emit the creation of directory PATH."""
+
+    self.dumpfile.write(
+        "Node-path: %s\n"
+        "Node-kind: dir\n"
+        "Node-action: add\n"
+        "\n"
+        "\n"
+        % self._utf8_path(path)
+        )
+
+  def _register_basic_directory(self, path, create):
+    """Register the creation of PATH if it is not already there.
+
+    Create any parent directories that do not already exist.  If
+    CREATE is set, also create PATH if it doesn't already exist.  This
+    method should only be used for the LOD paths and the directories
+    containing them, not for directories within an LOD path."""
+
+    if path not in self._basic_directories:
+      # Make sure that the parent directory is present:
+      self._register_basic_directory(path_split(path)[0], True)
+      if create:
+        self._make_any_dir(path)
+      self._basic_directories.add(path)
+
+  def initialize_project(self, project):
+    """Create any initial directories for the project.
+
+    The trunk, tags, and branches directories directories are created
+    the first time the project is seen.  Be sure not to create parent
+    directories that already exist (e.g., because two directories
+    share part of their paths either within or across projects)."""
+
+    for path in project.get_initial_directories():
+      self._register_basic_directory(path, True)
+
+  def initialize_lod(self, lod):
+    lod_path = lod.get_path()
+    if lod_path:
+      self._register_basic_directory(lod_path, True)
+
+  def mkdir(self, lod, cvs_directory):
+    self._make_any_dir(lod.get_path(cvs_directory.cvs_path))
+
+  def _add_or_change_path(self, s_item, op):
+    """Emit the addition or change corresponding to S_ITEM.
+
+    OP is either the constant OP_ADD or OP_CHANGE."""
+
+    assert op in [OP_ADD, OP_CHANGE]
+
+    # Convenience variables
+    cvs_rev = s_item.cvs_rev
+
+    # The property handling here takes advantage of an undocumented
+    # but IMHO consistent feature of the Subversion dumpfile-loading
+    # code.  When a node's properties aren't mentioned (that is, the
+    # "Prop-content-length:" header is absent, no properties are
+    # listed at all, and there is no "PROPS-END\n" line) then no
+    # change is made to the node's properties.
+    #
+    # This is consistent with the way dumpfiles behave w.r.t. text
+    # content changes, so I'm comfortable relying on it.  If you
+    # commit a change to *just* the properties of some node that
+    # already has text contents from a previous revision, then in the
+    # dumpfile output for the prop change, no "Text-content-length:"
+    # nor "Text-content-md5:" header will be present, and the text of
+    # the file will not be given.  But this does not cause the file's
+    # text to be erased!  It simply remains unchanged.
+    #
+    # This works out great for cvs2svn, due to lucky coincidences:
+    #
+    # For files, the only properties we ever set are set in the first
+    # revision; all other revisions (including on branches) inherit
+    # from that.  After the first revision, we never change file
+    # properties, therefore, there is no need to remember the full set
+    # of properties on a given file once we've set it.
+    #
+    # For directories, the only property we set is "svn:ignore", and
+    # while we may change it after the first revision, we always do so
+    # based on the contents of a ".cvsignore" file -- in other words,
+    # CVS is doing the remembering for us, so we still don't have to
+    # preserve the previous value of the property ourselves.
+
+    # Calculate the (sorted-by-name) property string and length, if any.
+    if s_item.svn_props_changed:
+      svn_props = s_item.svn_props
+      prop_contents = ''
+      prop_names = svn_props.keys()
+      prop_names.sort()
+      for pname in prop_names:
+        pvalue = svn_props[pname]
+        if pvalue is not None:
+          prop_contents += self._string_for_prop(pname, pvalue)
+      prop_contents += 'PROPS-END\n'
+      props_header = 'Prop-content-length: %d\n' % len(prop_contents)
+    else:
+      prop_contents = ''
+      props_header = ''
+
+    # If the file has keywords, we must prevent CVS/RCS from expanding
+    # the keywords because they must be unexpanded in the repository,
+    # or Subversion will get confused.
+    stream = self._revision_reader.get_content_stream(
+        cvs_rev, suppress_keyword_substitution=s_item.has_keywords()
+        )
+
+    if Ctx().decode_apple_single:
+      # Insert a filter to decode any files that are in AppleSingle
+      # format:
+      stream = get_maybe_apple_single_stream(stream)
+
+    # Insert a filter to convert all EOLs to LFs if neccessary
+
+    eol_style = s_item.svn_props.get('svn:eol-style', None)
+    if eol_style:
+      stream = LF_EOL_Filter(stream, eol_style)
+
+    buf = None
+
+    # treat .cvsignore as a directory property
+    dir_path, basename = path_split(cvs_rev.get_svn_path())
+    if basename == '.cvsignore':
+      buf = stream.read()
+      ignore_vals = generate_ignores(buf)
+      ignore_contents = '\n'.join(ignore_vals)
+      if ignore_contents:
+        ignore_contents += '\n'
+      ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
+                         (len(ignore_contents), ignore_contents))
+      ignore_contents += 'PROPS-END\n'
+      ignore_len = len(ignore_contents)
+
+      # write headers, then props
+      self.dumpfile.write(
+          'Node-path: %s\n'
+          'Node-kind: dir\n'
+          'Node-action: change\n'
+          'Prop-content-length: %d\n'
+          'Content-length: %d\n'
+          '\n'
+          '%s'
+          % (self._utf8_path(dir_path),
+             ignore_len, ignore_len, ignore_contents)
+          )
+      if not Ctx().keep_cvsignore:
+        stream.close()
+        return
+
+    self.dumpfile.write(
+        'Node-path: %s\n'
+        'Node-kind: file\n'
+        'Node-action: %s\n'
+        '%s'  # no property header if no props
+        % (self._utf8_path(cvs_rev.get_svn_path()), op, props_header)
+        )
+
+    pos = self.dumpfile.tell()
+
+    content_header_fmt = (
+        'Text-content-length: %16d\n'
+        'Text-content-md5: %32s\n'
+        'Content-length: %16d\n'
+        '\n'
+        )
+
+    self.dumpfile.write(content_header_fmt % (0, '', 0,))
+
+    if prop_contents:
+      self.dumpfile.write(prop_contents)
+
+    # Insert the rev contents, calculating length and checksum as we go.
+    checksum = md5()
+    length = 0
+    if buf is None:
+      buf = stream.read(config.PIPE_READ_SIZE)
+    while buf != '':
+      checksum.update(buf)
+      length += len(buf)
+      self.dumpfile.write(buf)
+      buf = stream.read(config.PIPE_READ_SIZE)
+
+    stream.close()
+
+    # Go back to overwrite the length and checksum headers with the
+    # correct values.  The content length is the length of property
+    # data, text data, and any metadata around/inside around them:
+    self.dumpfile.seek(pos, 0)
+    self.dumpfile.write(
+        content_header_fmt
+        % (length, checksum.hexdigest(), length + len(prop_contents),)
+        )
+
+    # Jump back to the end of the stream
+    self.dumpfile.seek(0, 2)
+
+    # This record is done (write two newlines -- one to terminate
+    # contents that weren't themselves newline-termination, one to
+    # provide a blank line for readability.
+    self.dumpfile.write('\n\n')
+
+  def add_path(self, s_item):
+    """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
+
+    self._add_or_change_path(s_item, OP_ADD)
+
+  def change_path(self, s_item):
+    """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
+
+    self._add_or_change_path(s_item, OP_CHANGE)
+
+  def delete_lod(self, lod):
+    """Emit the deletion of LOD."""
+
+    self.dumpfile.write(
+        'Node-path: %s\n'
+        'Node-action: delete\n'
+        '\n'
+        % (self._utf8_path(lod.get_path()),)
+        )
+    self._basic_directories.remove(lod.get_path())
+
+  def delete_path(self, lod, cvs_path):
+    dir_path, basename = path_split(lod.get_path(cvs_path.get_cvs_path()))
+    if basename == '.cvsignore':
+      # When a .cvsignore file is deleted, the directory's svn:ignore
+      # property needs to be deleted.
+      ignore_contents = 'PROPS-END\n'
+      ignore_len = len(ignore_contents)
+
+      # write headers, then props
+      self.dumpfile.write(
+          'Node-path: %s\n'
+          'Node-kind: dir\n'
+          'Node-action: change\n'
+          'Prop-content-length: %d\n'
+          'Content-length: %d\n'
+          '\n'
+          '%s'
+          % (self._utf8_path(dir_path),
+             ignore_len, ignore_len, ignore_contents)
+          )
+      if not Ctx().keep_cvsignore:
+        return
+
+    self.dumpfile.write(
+        'Node-path: %s\n'
+        'Node-action: delete\n'
+        '\n'
+        % (self._utf8_path(lod.get_path(cvs_path.cvs_path)),)
+        )
+
+  def copy_lod(self, src_lod, dest_lod, src_revnum):
+    # Register the main LOD directory, and create parent directories
+    # as needed:
+    self._register_basic_directory(dest_lod.get_path(), False)
+
+    self.dumpfile.write(
+        'Node-path: %s\n'
+        'Node-kind: dir\n'
+        'Node-action: add\n'
+        'Node-copyfrom-rev: %d\n'
+        'Node-copyfrom-path: %s\n'
+        '\n'
+        % (self._utf8_path(dest_lod.get_path()),
+           src_revnum, self._utf8_path(src_lod.get_path()))
+        )
+
+  def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum):
+    if isinstance(cvs_path, CVSFile):
+      node_kind = 'file'
+      if cvs_path.basename == '.cvsignore':
+        # FIXME: Here we have to adjust the containing directory's
+        # svn:ignore property to reflect the addition of the
+        # .cvsignore file to the LOD!  This is awkward because we
+        # don't have the contents of the .cvsignore file available.
+        if not Ctx().keep_cvsignore:
+          return
+    elif isinstance(cvs_path, CVSDirectory):
+      node_kind = 'dir'
+    else:
+      raise InternalError()
+
+    self.dumpfile.write(
+        'Node-path: %s\n'
+        'Node-kind: %s\n'
+        'Node-action: add\n'
+        'Node-copyfrom-rev: %d\n'
+        'Node-copyfrom-path: %s\n'
+        '\n'
+        % (
+            self._utf8_path(dest_lod.get_path(cvs_path.cvs_path)),
+            node_kind,
+            src_revnum,
+            self._utf8_path(src_lod.get_path(cvs_path.cvs_path))
+            )
+        )
+
+  def finish(self):
+    """Perform any cleanup necessary after all revisions have been
+    committed."""
+
+    self.dumpfile.close()
+
+
+def generate_ignores(raw_ignore_val):
+  ignore_vals = [ ]
+  for ignore in raw_ignore_val.split():
+    # Reset the list if we encounter a '!'
+    # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
+    if ignore == '!':
+      ignore_vals = [ ]
+    else:
+      ignore_vals.append(ignore)
+  return ignore_vals
+
+
+class LF_EOL_Filter:
+  """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
+  into the appropriate canonical eol style."""
+
+  eol_style_replacements = {
+      'LF' : '\n',
+      'CR' : '\r',
+      'CRLF' : '\r\n',
+      'native' : '\n',
+      }
+
+  def __init__(self, stream, eol_style):
+    self.stream = stream
+    self.replacement = self.eol_style_replacements[eol_style]
+    self.carry_cr = False
+    self.eof = False
+
+  def read(self, size=-1):
+    while True:
+      buf = self.stream.read(size)
+      self.eof = len(buf) == 0
+      if self.carry_cr:
+        buf = '\r' + buf
+        self.carry_cr = False
+      if not self.eof and buf[-1] == '\r':
+        self.carry_cr = True
+        buf = buf[:-1]
+      buf = buf.replace('\r\n', '\n')
+      buf = buf.replace('\r', '\n')
+      if self.replacement != '\n':
+        buf = buf.replace('\n', self.replacement)
+      if buf or self.eof:
+        return buf
+
+  def close(self):
+    self.stream.close()
+    self.stream = None
+
+
diff --git a/cvs2svn_lib/fill_source.py b/cvs2svn_lib/fill_source.py
new file mode 100644
index 0000000..2bb8e4c
--- /dev/null
+++ b/cvs2svn_lib/fill_source.py
@@ -0,0 +1,192 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes describing the sources of symbol fills."""
+
+
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import SVN_INVALID_REVNUM
+from cvs2svn_lib.svn_revision_range import SVNRevisionRange
+from cvs2svn_lib.svn_revision_range import RevisionScores
+
+
+class FillSource:
+  """Representation of a fill source.
+
+  A FillSource keeps track of the paths that have to be filled in a
+  particular symbol fill.
+
+  This class holds a SVNRevisionRange instance for each CVSFile that
+  has to be filled within the subtree of the repository rooted at
+  self.cvs_path.  The SVNRevisionRange objects are stored in a tree
+  in which the directory nodes are dictionaries mapping CVSPaths to
+  subnodes and the leaf nodes are the SVNRevisionRange objects telling
+  for what source_lod and what range of revisions the leaf could serve
+  as a source.
+
+  FillSource objects are able to compute the score for arbitrary
+  source LODs and source revision numbers.
+
+  These objects are used by the symbol filler in SVNOutputOption."""
+
+  def __init__(self, cvs_path, symbol, node_tree):
+    """Create a fill source.
+
+    The best LOD and SVN REVNUM to use as the copy source can be
+    determined by calling compute_best_source().
+
+    Members:
+
+      cvs_path -- (CVSPath): the CVSPath described by this FillSource.
+
+      _symbol -- (Symbol) the symbol to be filled.
+
+      _node_tree -- (dict) a tree stored as a map { CVSPath : node },
+          where subnodes have the same form.  Leaves are
+          SVNRevisionRange instances telling the source_lod and range
+          of SVN revision numbers from which the CVSPath can be
+          copied.
+
+    """
+
+    self.cvs_path = cvs_path
+    self._symbol = symbol
+    self._node_tree = node_tree
+
+  def _set_node(self, cvs_file, svn_revision_range):
+    parent_node = self._get_node(cvs_file.parent_directory, create=True)
+    if cvs_file in parent_node:
+      raise InternalError(
+          '%s appeared twice in sources for %s' % (cvs_file, self._symbol)
+          )
+    parent_node[cvs_file] = svn_revision_range
+
+  def _get_node(self, cvs_path, create=False):
+    if cvs_path == self.cvs_path:
+      return self._node_tree
+    else:
+      parent_node = self._get_node(cvs_path.parent_directory, create=create)
+      try:
+        return parent_node[cvs_path]
+      except KeyError:
+        if create:
+          node = {}
+          parent_node[cvs_path] = node
+          return node
+        else:
+          raise
+
+  def compute_best_source(self, preferred_source):
+    """Determine the best source_lod and subversion revision number to copy.
+
+    Return the best source found, as an SVNRevisionRange instance.  If
+    PREFERRED_SOURCE is not None and its opening is among the sources
+    with the best scores, return it; otherwise, return the oldest such
+    revision on the first such source_lod (ordered by the natural LOD
+    sort order).  The return value's source_lod is the best LOD to
+    copy from, and its opening_revnum is the best SVN revision."""
+
+    # Aggregate openings and closings from our rev tree
+    svn_revision_ranges = self._get_revision_ranges(self._node_tree)
+
+    # Score the lists
+    revision_scores = RevisionScores(svn_revision_ranges)
+
+    best_source_lod, best_revnum, best_score = \
+        revision_scores.get_best_revnum()
+
+    if (
+        preferred_source is not None
+        and revision_scores.get_score(preferred_source) == best_score
+        ):
+      best_source_lod = preferred_source.source_lod
+      best_revnum = preferred_source.opening_revnum
+
+    if best_revnum == SVN_INVALID_REVNUM:
+      raise FatalError(
+          "failed to find a revision to copy from when copying %s"
+          % self._symbol.name
+          )
+
+    return SVNRevisionRange(best_source_lod, best_revnum)
+
+  def _get_revision_ranges(self, node):
+    """Return a list of all the SVNRevisionRanges at and under NODE.
+
+    Include duplicates.  This is a helper method used by
+    compute_best_source()."""
+
+    if isinstance(node, SVNRevisionRange):
+      # It is a leaf node.
+      return [ node ]
+    else:
+      # It is an intermediate node.
+      revision_ranges = []
+      for key, subnode in node.items():
+        revision_ranges.extend(self._get_revision_ranges(subnode))
+      return revision_ranges
+
+  def get_subsources(self):
+    """Generate (CVSPath, FillSource) for all direct subsources."""
+
+    if not isinstance(self._node_tree, SVNRevisionRange):
+      for cvs_path, node in self._node_tree.items():
+        fill_source = FillSource(cvs_path, self._symbol, node)
+        yield (cvs_path, fill_source)
+
+  def get_subsource_map(self):
+    """Return the map {CVSPath : FillSource} of direct subsources."""
+
+    src_entries = {}
+
+    for (cvs_path, fill_subsource) in self.get_subsources():
+      src_entries[cvs_path] = fill_subsource
+
+    return src_entries
+
+  def __str__(self):
+    """For convenience only.  The format is subject to change at any time."""
+
+    return '%s(%s:%s)' % (
+        self.__class__.__name__, self._symbol, self.cvs_path,
+        )
+
+  def __repr__(self):
+    """For convenience only.  The format is subject to change at any time."""
+
+    return '%s%r' % (self, self._node_tree,)
+
+
+def get_source_set(symbol, range_map):
+  """Return a FillSource describing the fill sources for RANGE_MAP.
+
+  SYMBOL is either a Branch or a Tag.  RANGE_MAP is a map { CVSSymbol
+  : SVNRevisionRange } as returned by
+  SymbolingsReader.get_range_map().
+
+  Use the SVNRevisionRanges from RANGE_MAP to create a FillSource
+  instance describing the sources for filling SYMBOL."""
+
+  root_cvs_directory = symbol.project.get_root_cvs_directory()
+  fill_source = FillSource(root_cvs_directory, symbol, {})
+
+  for cvs_symbol, svn_revision_range in range_map.items():
+    fill_source._set_node(cvs_symbol.cvs_file, svn_revision_range)
+
+  return fill_source
+
+
diff --git a/cvs2svn_lib/fulltext_revision_recorder.py b/cvs2svn_lib/fulltext_revision_recorder.py
new file mode 100644
index 0000000..ad057b7
--- /dev/null
+++ b/cvs2svn_lib/fulltext_revision_recorder.py
@@ -0,0 +1,127 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2007-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""An abstract class that contructs file contents during CollectRevsPass.
+
+It calls its record_fulltext() method with the full text of every
+revision.  This method should be overridden to do something with the
+fulltext and possibly return a revision_recorder_token."""
+
+
+from cvs2svn_lib.revision_manager import RevisionRecorder
+
+
+class FulltextRevisionRecorder:
+  """Similar to a RevisionRecorder, but it requires the fulltext."""
+
+  def register_artifacts(self, which_pass):
+    pass
+
+  def start(self):
+    pass
+
+  def start_file(self, cvs_file_items):
+    pass
+
+  def record_fulltext(self, cvs_rev, log, fulltext):
+    """Record the fulltext for CVS_REV.
+
+    CVS_REV has the log message LOG and the fulltext FULLTEXT.  This
+    method should be overridden to do something sensible with them."""
+
+    raise NotImplementedError()
+
+  def finish_file(self, cvs_file_items):
+    pass
+
+  def finish(self):
+    pass
+
+
+class FulltextRevisionRecorderAdapter(RevisionRecorder):
+  """Reconstruct the fulltext and pass it to a FulltextRevisionRecorder.
+
+  This class implements RevisionRecorder (so it can be passed directly
+  to CollectRevsPass).  But it doesn't actually record anything.
+  Instead, it reconstructs the fulltext of each revision, and passes
+  the fulltext to a fulltext_revision_recorder."""
+
+  def __init__(self, fulltext_revision_recorder):
+    RevisionRecorder.__init__(self)
+    self.fulltext_revision_recorder = fulltext_revision_recorder
+
+  def register_artifacts(self, which_pass):
+    self.fulltext_revision_recorder.register_artifacts(which_pass)
+
+  def start(self):
+    self.fulltext_revision_recorder.start()
+
+  def start_file(self, cvs_file_items):
+    self.fulltext_revision_recorder.start_file(cvs_file_items)
+
+  def record_text(self, cvs_rev, log, text):
+    """This method should be overwridden.
+
+    It should determine the fulltext of CVS_REV, then pass it to
+    self.fulltext_revision_recorder.record_fulltext() and return the
+    result."""
+
+    raise NotImplementedError()
+
+  def finish_file(self, cvs_file_items):
+    self.fulltext_revision_recorder.finish_file(cvs_file_items)
+
+  def finish(self):
+    self.fulltext_revision_recorder.finish()
+
+
+class SimpleFulltextRevisionRecorderAdapter(FulltextRevisionRecorderAdapter):
+  """Reconstruct the fulltext using a RevisionReader.
+
+  To create the fulltext, this class simply uses a RevisionReader (for
+  example, RCSRevisionReader or CVSRevisionReader).  This is not quite
+  as wasteful as using one of these RevisionReaders in OutputPass,
+  because the same RCS file will be read over and over (and so
+  presumably stay in the disk cache).  But it is still pretty silly,
+  considering that we have all the RCS deltas available to us."""
+
+  def __init__(self, revision_reader, fulltext_revision_recorder):
+    FulltextRevisionRecorderAdapter.__init__(self, fulltext_revision_recorder)
+    self.revision_reader = revision_reader
+
+  def register_artifacts(self, which_pass):
+    FulltextRevisionRecorderAdapter.register_artifacts(self, which_pass)
+    self.revision_reader.register_artifacts(which_pass)
+
+  def start(self):
+    FulltextRevisionRecorderAdapter.start(self)
+    self.revision_reader.start()
+
+  def record_text(self, cvs_rev, log, text):
+    # FIXME: We have to decide what to do about keyword substitution
+    # and eol_style here:
+    fulltext = self.revision_reader.get_content_stream(
+        cvs_rev, suppress_keyword_substitution=False
+        ).read()
+    return self.fulltext_revision_recorder.record_fulltext(
+        cvs_rev, log, fulltext
+        )
+
+  def finish(self):
+    FulltextRevisionRecorderAdapter.finish(self)
+    self.revision_reader.finish()
+
+
diff --git a/cvs2svn_lib/git_output_option.py b/cvs2svn_lib/git_output_option.py
new file mode 100644
index 0000000..a1e46b9
--- /dev/null
+++ b/cvs2svn_lib/git_output_option.py
@@ -0,0 +1,658 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2007-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Classes for outputting the converted repository to git.
+
+For information about the format allowed by git-fast-import, see:
+
+    http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
+
+"""
+
+import bisect
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.openings_closings import SymbolingsReader
+from cvs2svn_lib.symbol import Trunk
+from cvs2svn_lib.symbol import Branch
+from cvs2svn_lib.symbol import Tag
+from cvs2svn_lib.cvs_item import CVSRevisionAdd
+from cvs2svn_lib.cvs_item import CVSRevisionChange
+from cvs2svn_lib.cvs_item import CVSRevisionDelete
+from cvs2svn_lib.cvs_item import CVSRevisionNoop
+from cvs2svn_lib.cvs_item import CVSSymbol
+from cvs2svn_lib.output_option import OutputOption
+from cvs2svn_lib.svn_revision_range import RevisionScores
+from cvs2svn_lib.repository_mirror import RepositoryMirror
+from cvs2svn_lib.key_generator import KeyGenerator
+
+
+# The branch name to use for the "tag fixup branches".  The
+# git-fast-import documentation suggests using 'TAG_FIXUP' (outside of
+# the refs/heads namespace), but this is currently broken.  Use a name
+# containing '.', which is not allowed in CVS symbols, to avoid
+# conflicts (though of course a conflict could still result if the
+# user requests symbol transformations).
+FIXUP_BRANCH_NAME = 'refs/heads/TAG.FIXUP'
+
+
+class ExpectedDirectoryError(Exception):
+  """A file was found where a directory was expected."""
+
+  pass
+
+
+class ExpectedFileError(Exception):
+  """A directory was found where a file was expected."""
+
+  pass
+
+
+class GitRevisionWriter(object):
+  def register_artifacts(self, which_pass):
+    pass
+
+  def start(self, f, mirror):
+    self.f = f
+    self._mirror = mirror
+
+  def _modify_file(self, cvs_item, post_commit):
+    raise NotImplementedError()
+
+  def _mkdir_p(self, cvs_directory, lod):
+    """Make sure that CVS_DIRECTORY exists in LOD.
+
+    If not, create it.  Return the node for CVS_DIRECTORY."""
+
+    try:
+      node = self._mirror.get_current_lod_directory(lod)
+    except KeyError:
+      node = self._mirror.add_lod(lod)
+
+    for sub_path in cvs_directory.get_ancestry()[1:]:
+      try:
+        node = node[sub_path]
+      except KeyError:
+        node = node.mkdir(sub_path)
+      if node is None:
+        raise ExpectedDirectoryError(
+            'File found at \'%s\' where directory was expected.' % (sub_path,)
+            )
+
+    return node
+
+  def add_file(self, cvs_rev, post_commit):
+    cvs_file = cvs_rev.cvs_file
+    if post_commit:
+      lod = cvs_file.project.get_trunk()
+    else:
+      lod = cvs_rev.lod
+    parent_node = self._mkdir_p(cvs_file.parent_directory, lod)
+    parent_node.add_file(cvs_file)
+    self._modify_file(cvs_rev, post_commit)
+
+  def modify_file(self, cvs_rev, post_commit):
+    cvs_file = cvs_rev.cvs_file
+    if post_commit:
+      lod = cvs_file.project.get_trunk()
+    else:
+      lod = cvs_rev.lod
+    if self._mirror.get_current_path(cvs_file, lod) is not None:
+      raise ExpectedFileError(
+          'Directory found at \'%s\' where file was expected.' % (cvs_file,)
+          )
+    self._modify_file(cvs_rev, post_commit)
+
+  def delete_file(self, cvs_rev, post_commit):
+    cvs_file = cvs_rev.cvs_file
+    if post_commit:
+      lod = cvs_file.project.get_trunk()
+    else:
+      lod = cvs_rev.lod
+    parent_node = self._mirror.get_current_path(
+        cvs_file.parent_directory, lod
+        )
+    if parent_node[cvs_file] is not None:
+      raise ExpectedFileError(
+          'Directory found at \'%s\' where file was expected.' % (cvs_file,)
+          )
+    del parent_node[cvs_file]
+    self.f.write('D %s\n' % (cvs_rev.cvs_file.cvs_path,))
+
+  def process_revision(self, cvs_rev, post_commit):
+    if isinstance(cvs_rev, CVSRevisionAdd):
+      self.add_file(cvs_rev, post_commit)
+    elif isinstance(cvs_rev, CVSRevisionChange):
+      self.modify_file(cvs_rev, post_commit)
+    elif isinstance(cvs_rev, CVSRevisionDelete):
+      self.delete_file(cvs_rev, post_commit)
+    elif isinstance(cvs_rev, CVSRevisionNoop):
+      pass
+    else:
+      raise InternalError('Unexpected CVSRevision type: %s' % (cvs_rev,))
+
+  def branch_file(self, cvs_symbol):
+    cvs_file = cvs_symbol.cvs_file
+    parent_node = self._mkdir_p(cvs_file.parent_directory, cvs_symbol.symbol)
+    parent_node.add_file(cvs_file)
+    self._modify_file(cvs_symbol, post_commit=False)
+
+  def finish(self):
+    del self._mirror
+    del self.f
+
+
+class GitRevisionMarkWriter(GitRevisionWriter):
+  def _modify_file(self, cvs_item, post_commit):
+    if cvs_item.cvs_file.executable:
+      mode = '100755'
+    else:
+      mode = '100644'
+
+    self.f.write(
+        'M %s :%d %s\n'
+        % (mode, cvs_item.revision_recorder_token,
+           cvs_item.cvs_file.cvs_path,)
+        )
+
+
+class GitRevisionInlineWriter(GitRevisionWriter):
+  def __init__(self, revision_reader):
+    self.revision_reader = revision_reader
+
+  def register_artifacts(self, which_pass):
+    GitRevisionWriter.register_artifacts(self, which_pass)
+    self.revision_reader.register_artifacts(which_pass)
+
+  def start(self, f, mirror):
+    GitRevisionWriter.start(self, f, mirror)
+    self.revision_reader.start()
+
+  def _modify_file(self, cvs_item, post_commit):
+    if cvs_item.cvs_file.executable:
+      mode = '100755'
+    else:
+      mode = '100644'
+
+    self.f.write(
+        'M %s inline %s\n'
+        % (mode, cvs_item.cvs_file.cvs_path,)
+        )
+
+    if isinstance(cvs_item, CVSSymbol):
+      cvs_rev = cvs_item.get_cvs_revision_source(Ctx()._cvs_items_db)
+    else:
+      cvs_rev = cvs_item
+
+    # FIXME: We have to decide what to do about keyword substitution
+    # and eol_style here:
+    fulltext = self.revision_reader.get_content_stream(
+        cvs_rev, suppress_keyword_substitution=False
+        ).read()
+
+    self.f.write('data %d\n' % (len(fulltext),))
+    self.f.write(fulltext)
+    self.f.write('\n')
+
+  def finish(self):
+    GitRevisionWriter.finish(self)
+    self.revision_reader.finish()
+
+
+def get_chunks(iterable, chunk_size):
+  """Generate lists containing chunks of the output of ITERABLE.
+
+  Each list contains at most CHUNK_SIZE items.  If CHUNK_SIZE is None,
+  yield the whole contents of ITERABLE in one list."""
+
+  if chunk_size is None:
+    yield list(iterable)
+  else:
+    it = iter(iterable)
+    while True:
+      # If this call to it.next() raises StopIteration, then we have
+      # no more chunks to emit, so simply pass the exception through:
+      chunk = [it.next()]
+
+      # Now try filling the rest of the chunk:
+      try:
+        while len(chunk) < chunk_size:
+          chunk.append(it.next())
+      except StopIteration:
+        # The iterator was exhausted while filling chunk, but chunk
+        # contains at least one element.  Yield it, then we're done.
+        yield chunk
+        break
+
+      # Yield the full chunk then continue with the next chunk:
+      yield chunk
+      del chunk
+
+
+class GitOutputOption(OutputOption):
+  """An OutputOption that outputs to a git-fast-import formatted file.
+
+  Members:
+
+    dump_filename -- (string) the name of the file to which the
+        git-fast-import commands for defining revisions will be
+        written.
+
+    author_transforms -- a map {cvsauthor : (fullname, email)} from
+        CVS author names to git full name and email address.  All of
+        the contents are 8-bit strings encoded as UTF-8.
+
+  """
+
+  # The first mark number used for git-fast-import commit marks.  This
+  # value needs to be large to avoid conflicts with blob marks.
+  _first_commit_mark = 1000000000
+
+  def __init__(
+        self, dump_filename, revision_writer,
+        max_merges=None, author_transforms=None,
+        ):
+    """Constructor.
+
+    DUMP_FILENAME is the name of the file to which the git-fast-import
+    commands for defining revisions should be written.  (Please note
+    that depending on the style of revision writer, the actual file
+    contents might not be written to this file.)
+
+    REVISION_WRITER is a GitRevisionWriter that is used to output
+    either the content of revisions or a mark that was previously used
+    to label a blob.
+
+    MAX_MERGES can be set to an integer telling the maximum number of
+    parents that can be merged into a commit at once (aside from the
+    natural parent).  If it is set to None, then there is no limit.
+
+    AUTHOR_TRANSFORMS is a map {cvsauthor : (fullname, email)} from
+    CVS author names to git full name and email address.  All of the
+    contents should either be Unicode strings or 8-bit strings encoded
+    as UTF-8.
+
+    """
+
+    self.dump_filename = dump_filename
+    self.revision_writer = revision_writer
+    self.max_merges = max_merges
+
+    def to_utf8(s):
+      if isinstance(s, unicode):
+        return s.encode('utf8')
+      else:
+        return s
+
+    self.author_transforms = {}
+    if author_transforms is not None:
+      for (cvsauthor, (name, email,)) in author_transforms.iteritems():
+        cvsauthor = to_utf8(cvsauthor)
+        name = to_utf8(name)
+        email = to_utf8(email)
+        self.author_transforms[cvsauthor] = (name, email,)
+
+    self._mirror = RepositoryMirror()
+
+    self._mark_generator = KeyGenerator(GitOutputOption._first_commit_mark)
+
+  def register_artifacts(self, which_pass):
+    # These artifacts are needed for SymbolingsReader:
+    artifact_manager.register_temp_file_needed(
+        config.SYMBOL_OPENINGS_CLOSINGS_SORTED, which_pass
+        )
+    artifact_manager.register_temp_file_needed(
+        config.SYMBOL_OFFSETS_DB, which_pass
+        )
+    self.revision_writer.register_artifacts(which_pass)
+    self._mirror.register_artifacts(which_pass)
+
+  def check(self):
+    if Ctx().cross_project_commits:
+      raise FatalError(
+          'Git output is not supported with cross-project commits'
+          )
+    if Ctx().cross_branch_commits:
+      raise FatalError(
+          'Git output is not supported with cross-branch commits'
+          )
+    if Ctx().username is None:
+      raise FatalError(
+          'Git output requires a default commit username'
+          )
+
+  def check_symbols(self, symbol_map):
+    # FIXME: What constraints does git impose on symbols?
+    pass
+
+  def setup(self, svn_rev_count):
+    self._symbolings_reader = SymbolingsReader()
+    self.f = open(self.dump_filename, 'wb')
+
+    # The youngest revnum that has been committed so far:
+    self._youngest = 0
+
+    # A map {lod : [(revnum, mark)]} giving each of the revision
+    # numbers in which there was a commit to lod, and the mark active
+    # at the end of the revnum.
+    self._marks = {}
+
+    self._mirror.open()
+    self.revision_writer.start(self.f, self._mirror)
+
+  def _create_commit_mark(self, lod, revnum):
+    mark = self._mark_generator.gen_id()
+    self._set_lod_mark(lod, revnum, mark)
+    return mark
+
+  def _set_lod_mark(self, lod, revnum, mark):
+    """Record MARK as the status of LOD for REVNUM.
+
+    If there is already an entry for REVNUM, overwrite it.  If not,
+    append a new entry to the self._marks list for LOD."""
+
+    assert revnum >= self._youngest
+    entry = (revnum, mark)
+    try:
+      modifications = self._marks[lod]
+    except KeyError:
+      # This LOD hasn't appeared before; create a new list and add the
+      # entry:
+      self._marks[lod] = [entry]
+    else:
+      # A record exists, so it necessarily has at least one element:
+      if modifications[-1][0] == revnum:
+        modifications[-1] = entry
+      else:
+        modifications.append(entry)
+    self._youngest = revnum
+
+  def _get_author(self, svn_commit):
+    """Return the author to be used for SVN_COMMIT.
+
+    Return the author in the form needed by git; that is, 'foo <bar>'."""
+
+    author = svn_commit.get_author()
+    (name, email,) = self.author_transforms.get(author, (author, author,))
+    return '%s <%s>' % (name, email,)
+
+  @staticmethod
+  def _get_log_msg(svn_commit):
+    return svn_commit.get_log_msg()
+
+  def process_initial_project_commit(self, svn_commit):
+    self._mirror.start_commit(svn_commit.revnum)
+    self._mirror.end_commit()
+
+  def process_primary_commit(self, svn_commit):
+    author = self._get_author(svn_commit)
+    log_msg = self._get_log_msg(svn_commit)
+
+    lods = set()
+    for cvs_rev in svn_commit.get_cvs_items():
+      lods.add(cvs_rev.lod)
+    if len(lods) != 1:
+      raise InternalError('Commit affects %d LODs' % (len(lods),))
+    lod = lods.pop()
+
+    self._mirror.start_commit(svn_commit.revnum)
+    if isinstance(lod, Trunk):
+      # FIXME: is this correct?:
+      self.f.write('commit refs/heads/master\n')
+    else:
+      self.f.write('commit refs/heads/%s\n' % (lod.name,))
+    self.f.write(
+        'mark :%d\n'
+        % (self._create_commit_mark(lod, svn_commit.revnum),)
+        )
+    self.f.write(
+        'committer %s %d +0000\n' % (author, svn_commit.date,)
+        )
+    self.f.write('data %d\n' % (len(log_msg),))
+    self.f.write('%s\n' % (log_msg,))
+    for cvs_rev in svn_commit.get_cvs_items():
+      self.revision_writer.process_revision(cvs_rev, post_commit=False)
+
+    self.f.write('\n')
+    self._mirror.end_commit()
+
+  def process_post_commit(self, svn_commit):
+    author = self._get_author(svn_commit)
+    log_msg = self._get_log_msg(svn_commit)
+
+    source_lods = set()
+    for cvs_rev in svn_commit.cvs_revs:
+      source_lods.add(cvs_rev.lod)
+    if len(source_lods) != 1:
+      raise InternalError('Commit is from %d LODs' % (len(source_lods),))
+    source_lod = source_lods.pop()
+
+    self._mirror.start_commit(svn_commit.revnum)
+    # FIXME: is this correct?:
+    self.f.write('commit refs/heads/master\n')
+    self.f.write(
+        'mark :%d\n'
+        % (self._create_commit_mark(None, svn_commit.revnum),)
+        )
+    self.f.write(
+        'committer %s %d +0000\n' % (author, svn_commit.date,)
+        )
+    self.f.write('data %d\n' % (len(log_msg),))
+    self.f.write('%s\n' % (log_msg,))
+    self.f.write(
+        'merge :%d\n'
+        % (self._get_source_mark(source_lod, svn_commit.revnum),)
+        )
+    for cvs_rev in svn_commit.cvs_revs:
+      self.revision_writer.process_revision(cvs_rev, post_commit=True)
+
+    self.f.write('\n')
+    self._mirror.end_commit()
+
+  def _get_source_groups(self, svn_commit):
+    """Return groups of sources for SVN_COMMIT.
+
+    SVN_COMMIT is an instance of SVNSymbolCommit.  Yield tuples
+    (source_lod, svn_revnum, cvs_symbols) where source_lod is the line
+    of development and svn_revnum is the revision that should serve as
+    a source, and cvs_symbols is a list of CVSSymbolItems that can be
+    copied from that source.  The groups are returned in arbitrary
+    order."""
+
+    # Get a map {CVSSymbol : SVNRevisionRange}:
+    range_map = self._symbolings_reader.get_range_map(svn_commit)
+
+    # range_map, split up into one map per LOD; i.e., {LOD :
+    # {CVSSymbol : SVNRevisionRange}}:
+    lod_range_maps = {}
+
+    for (cvs_symbol, range) in range_map.iteritems():
+      lod_range_map = lod_range_maps.get(range.source_lod)
+      if lod_range_map is None:
+        lod_range_map = {}
+        lod_range_maps[range.source_lod] = lod_range_map
+      lod_range_map[cvs_symbol] = range
+
+    # Sort the sources so that the branch that serves most often as
+    # parent is processed first:
+    lod_ranges = lod_range_maps.items()
+    lod_ranges.sort(
+        lambda (lod1,lod_range_map1),(lod2,lod_range_map2):
+        -cmp(len(lod_range_map1), len(lod_range_map2)) or cmp(lod1, lod2)
+        )
+
+    for (lod, lod_range_map) in lod_ranges:
+      while lod_range_map:
+        revision_scores = RevisionScores(lod_range_map.values())
+        (source_lod, revnum, score) = revision_scores.get_best_revnum()
+        assert source_lod == lod
+        cvs_symbols = []
+        for (cvs_symbol, range) in lod_range_map.items():
+          if revnum in range:
+            cvs_symbols.append(cvs_symbol)
+            del lod_range_map[cvs_symbol]
+        yield (lod, revnum, cvs_symbols)
+
+  def _get_all_files(self, node):
+    """Generate all of the CVSFiles under NODE."""
+
+    for cvs_path in node:
+      subnode = node[cvs_path]
+      if subnode is None:
+        yield cvs_path
+      else:
+        for sub_cvs_path in self._get_all_files(subnode):
+          yield sub_cvs_path
+
+  def _is_simple_copy(self, svn_commit, source_groups):
+    """Return True iff SVN_COMMIT can be created as a simple copy.
+
+    SVN_COMMIT is an SVNTagCommit.  Return True iff it can be created
+    as a simple copy from an existing revision (i.e., if the fixup
+    branch can be avoided for this tag creation)."""
+
+    # The first requirement is that there be exactly one source:
+    if len(source_groups) != 1:
+      return False
+
+    (source_lod, svn_revnum, cvs_symbols) = source_groups[0]
+
+    # The second requirement is that the destination LOD not already
+    # exist:
+    try:
+      self._mirror.get_current_lod_directory(svn_commit.symbol)
+    except KeyError:
+      # The LOD doesn't already exist.  This is good.
+      pass
+    else:
+      # The LOD already exists.  It cannot be created by a copy.
+      return False
+
+    # The third requirement is that the source LOD contains exactly
+    # the same files as we need to add to the symbol:
+    try:
+      source_node = self._mirror.get_old_lod_directory(source_lod, svn_revnum)
+    except KeyError:
+      raise InternalError('Source %r does not exist' % (source_lod,))
+    return (
+        set([cvs_symbol.cvs_file for cvs_symbol in cvs_symbols])
+        == set(self._get_all_files(source_node))
+        )
+
+  def _get_source_mark(self, source_lod, revnum):
+    """Return the mark active on SOURCE_LOD at the end of REVNUM."""
+
+    modifications = self._marks[source_lod]
+    i = bisect.bisect_left(modifications, (revnum + 1,)) - 1
+    (revnum, mark) = modifications[i]
+    return mark
+
+  def _process_symbol_commit(
+        self, svn_commit, git_branch, source_groups, mark
+        ):
+    author = self._get_author(svn_commit)
+    log_msg = self._get_log_msg(svn_commit)
+
+    self.f.write('commit %s\n' % (git_branch,))
+    self.f.write('mark :%d\n' % (mark,))
+    self.f.write('committer %s %d +0000\n' % (author, svn_commit.date,))
+    self.f.write('data %d\n' % (len(log_msg),))
+    self.f.write('%s\n' % (log_msg,))
+
+    for (source_lod, source_revnum, cvs_symbols,) in source_groups:
+      self.f.write(
+          'merge :%d\n'
+          % (self._get_source_mark(source_lod, source_revnum),)
+          )
+
+    for (source_lod, source_revnum, cvs_symbols,) in source_groups:
+      for cvs_symbol in cvs_symbols:
+        self.revision_writer.branch_file(cvs_symbol)
+
+    self.f.write('\n')
+
+  def process_branch_commit(self, svn_commit):
+    self._mirror.start_commit(svn_commit.revnum)
+    source_groups = list(self._get_source_groups(svn_commit))
+    for groups in get_chunks(source_groups, self.max_merges):
+      self._process_symbol_commit(
+          svn_commit, 'refs/heads/%s' % (svn_commit.symbol.name,),
+          groups,
+          self._create_commit_mark(svn_commit.symbol, svn_commit.revnum),
+          )
+    self._mirror.end_commit()
+
+  def _set_symbol(self, symbol, mark):
+    if isinstance(symbol, Branch):
+      category = 'heads'
+    elif isinstance(symbol, Tag):
+      category = 'tags'
+    else:
+      raise InternalError()
+    self.f.write('reset refs/%s/%s\n' % (category, symbol.name,))
+    self.f.write('from :%d\n' % (mark,))
+
+  def process_tag_commit(self, svn_commit):
+    # FIXME: For now we create a fixup branch with the same name as
+    # the tag, then the tag.  We never delete the fixup branch.  Also,
+    # a fixup branch is created even if the tag could be created from
+    # a single source.
+    self._mirror.start_commit(svn_commit.revnum)
+
+    source_groups = list(self._get_source_groups(svn_commit))
+    if self._is_simple_copy(svn_commit, source_groups):
+      (source_lod, source_revnum, cvs_symbols) = source_groups[0]
+      Log().debug(
+          '%s will be created via a simple copy from %s:r%d'
+          % (svn_commit.symbol, source_lod, source_revnum,)
+          )
+      mark = self._get_source_mark(source_lod, source_revnum)
+      self._set_symbol(svn_commit.symbol, mark)
+    else:
+      Log().debug(
+          '%s will be created via a fixup branch' % (svn_commit.symbol,)
+          )
+
+      # Create the fixup branch (which might involve making more than
+      # one commit):
+      for groups in get_chunks(source_groups, self.max_merges):
+        mark = self._create_commit_mark(svn_commit.symbol, svn_commit.revnum)
+        self._process_symbol_commit(
+            svn_commit, FIXUP_BRANCH_NAME, groups, mark
+            )
+
+      # Store the mark of the last commit to the fixup branch as the
+      # value of the tag:
+      self._set_symbol(svn_commit.symbol, mark)
+      self.f.write('reset %s\n' % (FIXUP_BRANCH_NAME,))
+      self.f.write('\n')
+
+    self._mirror.end_commit()
+
+  def cleanup(self):
+    self.revision_writer.finish()
+    self._mirror.close()
+    self.f.close()
+    del self.f
+    self._symbolings_reader.close()
+    del self._symbolings_reader
+
+
diff --git a/cvs2svn_lib/git_revision_recorder.py b/cvs2svn_lib/git_revision_recorder.py
new file mode 100644
index 0000000..604f8ac
--- /dev/null
+++ b/cvs2svn_lib/git_revision_recorder.py
@@ -0,0 +1,114 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2007-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Write file contents to a stream of git-fast-import blobs."""
+
+import itertools
+
+from cvs2svn_lib.symbol import Trunk
+from cvs2svn_lib.cvs_item import CVSRevisionDelete
+from cvs2svn_lib.cvs_item import CVSSymbol
+from cvs2svn_lib.fulltext_revision_recorder import FulltextRevisionRecorder
+from cvs2svn_lib.key_generator import KeyGenerator
+
+
+class GitRevisionRecorder(FulltextRevisionRecorder):
+  """Output file revisions to git-fast-import."""
+
+  def __init__(self, blob_filename):
+    self.blob_filename = blob_filename
+
+  def start(self):
+    self.dump_file = open(self.blob_filename, 'wb')
+    self._mark_generator = KeyGenerator()
+
+  def start_file(self, cvs_file_items):
+    self._cvs_file_items = cvs_file_items
+
+  def _get_original_source(self, cvs_rev):
+    """Return the original source of the contents of CVS_REV.
+
+    Return the first non-delete CVSRevision with the same contents as
+    CVS_REV.  'First' here refers to deltatext order; i.e., the very
+    first revision is HEAD on trunk, then backwards to the root of a
+    branch, then out to the tip of a branch.
+
+    The candidates are all revisions along the CVS delta-dependency
+    chain until the next one that has a deltatext (inclusive).  Of the
+    candidates, CVSRevisionDeletes are disqualified because, even
+    though CVS records their contents, it is impossible to extract
+    their fulltext using commands like 'cvs checkout -p'.
+
+    If there is no other CVSRevision that has the same content, return
+    CVS_REV itself."""
+
+    # Keep track of the "best" source CVSRevision found so far:
+    best_source_rev = None
+
+    for cvs_rev in itertools.chain(
+          [cvs_rev], self._cvs_file_items.iter_deltatext_ancestors(cvs_rev)
+          ):
+      if not isinstance(cvs_rev, CVSRevisionDelete):
+        best_source_rev = cvs_rev
+
+      if cvs_rev.deltatext_exists:
+        break
+
+    return best_source_rev
+
+  def record_fulltext(self, cvs_rev, log, fulltext):
+    """Write the fulltext to a blob if it is original and not a delete.
+
+    The reason we go to this trouble is to avoid writing the same file
+    contents multiple times for a string of revisions that don't have
+    deltatexts (as, for example, happens with dead revisions and
+    imported revisions)."""
+
+    if isinstance(cvs_rev, CVSRevisionDelete):
+      # There is no need to record a delete revision, and its token
+      # will never be needed:
+      return None
+
+    source = self._get_original_source(cvs_rev)
+
+    if source.id == cvs_rev.id:
+      # Revision is its own source; write it out:
+      mark = self._mark_generator.gen_id()
+      self.dump_file.write('blob\n')
+      self.dump_file.write('mark :%d\n' % (mark,))
+      self.dump_file.write('data %d\n' % (len(fulltext),))
+      self.dump_file.write(fulltext)
+      self.dump_file.write('\n')
+      return mark
+    else:
+      # Return as revision_recorder_token the CVSRevision.id of the
+      # original source revision:
+      return source.revision_recorder_token
+
+  def finish_file(self, cvs_file_items):
+    # Determine the original source of each CVSSymbol, and store it as
+    # the symbol's revision_recorder_token.
+    for cvs_item in cvs_file_items.values():
+      if isinstance(cvs_item, CVSSymbol):
+        cvs_source = cvs_item.get_cvs_revision_source(cvs_file_items)
+        cvs_item.revision_recorder_token = cvs_source.revision_recorder_token
+
+    del self._cvs_file_items
+
+  def finish(self):
+    self.dump_file.close()
+
+
diff --git a/cvs2svn_lib/git_run_options.py b/cvs2svn_lib/git_run_options.py
new file mode 100644
index 0000000..726b127
--- /dev/null
+++ b/cvs2svn_lib/git_run_options.py
@@ -0,0 +1,274 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module manages cvs2git run options."""
+
+
+import sys
+import datetime
+import codecs
+
+from cvs2svn_lib.version import VERSION
+from cvs2svn_lib.common import error_prefix
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.run_options import not_both
+from cvs2svn_lib.run_options import RunOptions
+from cvs2svn_lib.run_options import ContextOption
+from cvs2svn_lib.run_options import IncompatibleOption
+from cvs2svn_lib.run_options import authors
+from cvs2svn_lib.man_writer import ManWriter
+from cvs2svn_lib.project import Project
+from cvs2svn_lib.rcs_revision_manager import RCSRevisionReader
+from cvs2svn_lib.cvs_revision_manager import CVSRevisionReader
+from cvs2svn_lib.git_revision_recorder import GitRevisionRecorder
+from cvs2svn_lib.git_output_option import GitRevisionMarkWriter
+from cvs2svn_lib.git_output_option import GitOutputOption
+from cvs2svn_lib.revision_manager import NullRevisionRecorder
+from cvs2svn_lib.revision_manager import NullRevisionExcluder
+from cvs2svn_lib.fulltext_revision_recorder \
+     import SimpleFulltextRevisionRecorderAdapter
+
+
+short_desc = 'convert a cvs repository into a git repository'
+
+synopsis = """\
+.B cvs2git
+[\\fIOPTION\\fR]... \\fIOUTPUT-OPTIONS CVS-REPOS-PATH\\fR
+.br
+.B cvs2git
+[\\fIOPTION\\fR]... \\fI--options=PATH\\fR
+"""
+
+long_desc = """\
+Create a new git repository based on the version history stored in a
+CVS repository. Each CVS commit will be mirrored in the git
+repository, including such information as date of commit and id of the
+committer.
+.P
+The output of this program are a "blobfile" and a "dumpfile", which
+together can be loaded into a git repository using "git fast-import".
+.P
+\\fICVS-REPOS-PATH\\fR is the filesystem path of the part of the CVS
+repository that you want to convert.  This path doesn't have to be the
+top level directory of a CVS repository; it can point at a project
+within a repository, in which case only that project will be
+converted.  This path or one of its parent directories has to contain
+a subdirectory called CVSROOT (though the CVSROOT directory can be
+empty).
+.P
+It is not possible directly to convert a CVS repository to which you
+only have remote access, but the FAQ describes tools that may be used
+to create a local copy of a remote CVS repository.
+"""
+
+files = """\
+A directory called \\fIcvs2svn-tmp\\fR (or the directory specified by
+\\fB--tmpdir\\fR) is used as scratch space for temporary data files.
+"""
+
+see_also = [
+  ('cvs', '1'),
+  ('git', '1'),
+  ('git-fast-import', '1'),
+  ]
+
+
+class GitRunOptions(RunOptions):
+  def __init__(self, progname, cmd_args, pass_manager):
+    Ctx().cross_project_commits = False
+    Ctx().cross_branch_commits = False
+    RunOptions.__init__(self, progname, cmd_args, pass_manager)
+
+  def _get_output_options_group(self):
+    group = RunOptions._get_output_options_group(self)
+
+    group.add_option(IncompatibleOption(
+        '--blobfile', type='string',
+        action='store',
+        help='path to which the "blob" data should be written',
+        man_help=(
+            'Write the "blob" data (containing revision contents) to '
+            '\\fIpath\\fR.'
+            ),
+        metavar='PATH',
+        ))
+    group.add_option(IncompatibleOption(
+        '--dumpfile', type='string',
+        action='store',
+        help='path to which the revision data should be written',
+        man_help=(
+            'Write the revision data (branches and commits) to \\fIpath\\fR.'
+            ),
+        metavar='PATH',
+        ))
+    group.add_option(ContextOption(
+        '--dry-run',
+        action='store_true',
+        help=(
+            'do not create any output; just print what would happen.'
+            ),
+        man_help=(
+            'Do not create any output; just print what would happen.'
+            ),
+        ))
+
+    return group
+
+  def _get_extraction_options_group(self):
+    group = RunOptions._get_extraction_options_group(self)
+
+    self.parser.set_default('use_cvs', False)
+    group.add_option(IncompatibleOption(
+        '--use-cvs',
+        action='store_true',
+        help=(
+            'use CVS to extract revision contents (slower than '
+            '--use-rcs but more reliable) (default)'
+            ),
+        man_help=(
+            'Use CVS to extract revision contents.  This option is slower '
+            'than \\fB--use-rcs\\fR but more reliable.'
+            ),
+        ))
+    self.parser.set_default('use_rcs', False)
+    group.add_option(IncompatibleOption(
+        '--use-rcs',
+        action='store_true',
+        help=(
+            'use RCS to extract revision contents (faster than '
+            '--use-cvs but fails in some cases)'
+            ),
+        man_help=(
+            'Use RCS \'co\' to extract revision contents.  This option is '
+            'faster than \\fB--use-cvs\\fR but fails in some cases.'
+            ),
+        ))
+
+    return group
+
+  def callback_manpage(self, option, opt_str, value, parser):
+    f = codecs.getwriter('utf_8')(sys.stdout)
+    ManWriter(
+        parser,
+        section='1',
+        date=datetime.date.today(),
+        source='Version %s' % (VERSION,),
+        manual='User Commands',
+        short_desc=short_desc,
+        synopsis=synopsis,
+        long_desc=long_desc,
+        files=files,
+        authors=authors,
+        see_also=see_also,
+        ).write_manpage(f)
+    sys.exit(0)
+
+  def process_io_options(self):
+    """Process input/output options.
+
+    Process options related to extracting data from the CVS repository
+    and writing to 'git fast-import'-formatted files."""
+
+    ctx = Ctx()
+    options = self.options
+
+    not_both(options.use_rcs, '--use-rcs',
+             options.use_cvs, '--use-cvs')
+
+    if options.use_rcs:
+      revision_reader = RCSRevisionReader(
+          co_executable=options.co_executable
+          )
+    else:
+      # --use-cvs is the default:
+      revision_reader = CVSRevisionReader(
+          cvs_executable=options.cvs_executable
+          )
+
+    if ctx.dry_run:
+      ctx.revision_recorder = NullRevisionRecorder()
+    else:
+      if not (options.blobfile and options.dumpfile):
+        raise FatalError("must pass '--blobfile' and '--dumpfile' options.")
+      ctx.revision_recorder = SimpleFulltextRevisionRecorderAdapter(
+          revision_reader,
+          GitRevisionRecorder(options.blobfile),
+          )
+
+    ctx.revision_excluder = NullRevisionExcluder()
+    ctx.revision_reader = None
+
+    ctx.output_option = GitOutputOption(
+        options.dumpfile,
+        GitRevisionMarkWriter(),
+        max_merges=None,
+        # Optional map from CVS author names to git author names:
+        author_transforms={}, # FIXME
+        )
+
+  def set_project(
+        self,
+        project_cvs_repos_path,
+        symbol_transforms=None,
+        symbol_strategy_rules=[],
+        ):
+    """Set the project to be converted.
+
+    If a project had already been set, overwrite it.
+
+    Most arguments are passed straight through to the Project
+    constructor.  SYMBOL_STRATEGY_RULES is an iterable of
+    SymbolStrategyRules that will be applied to symbols in this
+    project."""
+
+    symbol_strategy_rules = list(symbol_strategy_rules)
+
+    project = Project(
+        0,
+        project_cvs_repos_path,
+        symbol_transforms=symbol_transforms,
+        )
+
+    self.projects = [project]
+    self.project_symbol_strategy_rules = [symbol_strategy_rules]
+
+  def process_options(self):
+    # Consistency check for options and arguments.
+    if len(self.args) == 0:
+      self.usage()
+      sys.exit(1)
+
+    if len(self.args) > 1:
+      Log().error(error_prefix + ": must pass only one CVS repository.\n")
+      self.usage()
+      sys.exit(1)
+
+    cvsroot = self.args[0]
+
+    self.process_io_options()
+    self.process_symbol_strategy_options()
+    self.process_property_setter_options()
+
+    # Create the project:
+    self.set_project(
+        cvsroot,
+        symbol_transforms=self.options.symbol_transforms,
+        symbol_strategy_rules=self.options.symbol_strategy_rules,
+        )
+
+
diff --git a/cvs2svn_lib/key_generator.py b/cvs2svn_lib/key_generator.py
new file mode 100644
index 0000000..d580d6b
--- /dev/null
+++ b/cvs2svn_lib/key_generator.py
@@ -0,0 +1,45 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains the KeyGenerator class."""
+
+
+class KeyGenerator:
+  """Generate a series of unique keys."""
+
+  def __init__(self, first_id=1):
+    """Initialize a KeyGenerator with the specified FIRST_ID.
+
+    FIRST_ID should be an int or long, and the generated keys will be
+    of the same type."""
+
+    self._key_base = first_id
+    self._last_id = None
+
+  def gen_id(self):
+    """Generate and return a previously-unused key, as an integer."""
+
+    self._last_id = self._key_base
+    self._key_base += 1
+
+    return self._last_id
+
+  def get_last_id(self):
+    """Return the last id that was generated, as an integer."""
+
+    return self._last_id
+
+
diff --git a/cvs2svn_lib/log.py b/cvs2svn_lib/log.py
new file mode 100644
index 0000000..798350c
--- /dev/null
+++ b/cvs2svn_lib/log.py
@@ -0,0 +1,174 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains a simple logging facility for cvs2svn."""
+
+
+import sys
+import time
+import threading
+
+
+class Log:
+  """A Simple logging facility.
+
+  If self.log_level is DEBUG or higher, each line will be timestamped
+  with the number of wall-clock seconds since the time when this
+  module was first imported.
+
+  If self.use_timestamps is True, each line will be timestamped with a
+  human-readable clock time.
+
+  The public methods of this class are thread-safe.
+
+  This class is a Borg; see
+  http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
+
+  # These constants represent the log levels that this class supports.
+  # The increase_verbosity() and decrease_verbosity() methods rely on
+  # these constants being consecutive integers:
+  ERROR = -2
+  WARN = -1
+  QUIET = 0
+  NORMAL = 1
+  VERBOSE = 2
+  DEBUG = 3
+
+  start_time = time.time()
+
+  __shared_state = {}
+
+  def __init__(self):
+    self.__dict__ = self.__shared_state
+    if self.__dict__:
+      return
+
+    self.log_level = Log.NORMAL
+
+    # Set this to True if you want to see timestamps on each line output.
+    self.use_timestamps = False
+
+    # The output file to use for errors:
+    self._err = sys.stderr
+
+    # The output file to use for lower-priority messages:
+    self._out = sys.stdout
+
+    # Lock to serialize writes to the log:
+    self.lock = threading.Lock()
+
+  def increase_verbosity(self):
+    self.lock.acquire()
+    try:
+      self.log_level = min(self.log_level + 1, Log.DEBUG)
+    finally:
+      self.lock.release()
+
+  def decrease_verbosity(self):
+    self.lock.acquire()
+    try:
+      self.log_level = max(self.log_level - 1, Log.ERROR)
+    finally:
+      self.lock.release()
+
+  def is_on(self, level):
+    """Return True iff messages at the specified LEVEL are currently on.
+
+    LEVEL should be one of the constants Log.WARN, Log.QUIET, etc."""
+
+    return self.log_level >= level
+
+  def _timestamp(self):
+    """Return a timestamp if needed, as a string with a trailing space."""
+
+    retval = []
+
+    if self.log_level >= Log.DEBUG:
+      retval.append('%f: ' % (time.time() - self.start_time,))
+
+    if self.use_timestamps:
+      retval.append(time.strftime('[%Y-%m-%d %I:%M:%S %Z] - '))
+
+    return ''.join(retval)
+
+  def _write(self, out, *args):
+    """Write a message to OUT.
+
+    If there are multiple ARGS, they will be separated by spaces.  If
+    there are multiple lines, they will be output one by one with the
+    same timestamp prefix."""
+
+    timestamp = self._timestamp()
+    s = ' '.join(map(str, args))
+    lines = s.split('\n')
+    if lines and not lines[-1]:
+      del lines[-1]
+
+    self.lock.acquire()
+    try:
+      for s in lines:
+        out.write('%s%s\n' % (timestamp, s,))
+      # Ensure that log output doesn't get out-of-order with respect to
+      # stderr output.
+      out.flush()
+    finally:
+      self.lock.release()
+
+  def write(self, *args):
+    """Write a message to SELF._out.
+
+    This is a public method to use for writing to the output log
+    unconditionally."""
+
+    self._write(self._out, *args)
+
+  def error(self, *args):
+    """Log a message at the ERROR level."""
+
+    if self.is_on(Log.ERROR):
+      self._write(self._err, *args)
+
+  def warn(self, *args):
+    """Log a message at the WARN level."""
+
+    if self.is_on(Log.WARN):
+      self._write(self._out, *args)
+
+  def quiet(self, *args):
+    """Log a message at the QUIET level."""
+
+    if self.is_on(Log.QUIET):
+      self._write(self._out, *args)
+
+  def normal(self, *args):
+    """Log a message at the NORMAL level."""
+
+    if self.is_on(Log.NORMAL):
+      self._write(self._out, *args)
+
+  def verbose(self, *args):
+    """Log a message at the VERBOSE level."""
+
+    if self.is_on(Log.VERBOSE):
+      self._write(self._out, *args)
+
+  def debug(self, *args):
+    """Log a message at the DEBUG level."""
+
+    if self.is_on(Log.DEBUG):
+      self._write(self._out, *args)
+
+
diff --git a/cvs2svn_lib/main.py b/cvs2svn_lib/main.py
new file mode 100644
index 0000000..492c49e
--- /dev/null
+++ b/cvs2svn_lib/main.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python2
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+import os
+import errno
+import gc
+
+try:
+  # Try to get access to a bunch of encodings for use with --encoding.
+  # See http://cjkpython.i18n.org/ for details.
+  import iconv_codec
+except ImportError:
+  pass
+
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.svn_run_options import SVNRunOptions
+from cvs2svn_lib.git_run_options import GitRunOptions
+from cvs2svn_lib.bzr_run_options import BzrRunOptions
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.pass_manager import PassManager
+from cvs2svn_lib.passes import passes
+
+
+def main(progname, run_options, pass_manager):
+  # Disable garbage collection, as we try not to create any circular
+  # data structures:
+  gc.disable()
+
+  # Convenience var, so we don't have to keep instantiating this Borg.
+  ctx = Ctx()
+
+  # Make sure the tmp directory exists.  Note that we don't check if
+  # it's empty -- we want to be able to use, for example, "." to hold
+  # tempfiles.  But if we *did* want check if it were empty, we'd do
+  # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
+  if not os.path.exists(ctx.tmpdir):
+    erase_tmpdir = True
+    os.mkdir(ctx.tmpdir)
+  elif not os.path.isdir(ctx.tmpdir):
+    raise FatalError(
+        "cvs2svn tried to use '%s' for temporary files, but that path\n"
+        "  exists and is not a directory.  Please make it be a directory,\n"
+        "  or specify some other directory for temporary files."
+        % (ctx.tmpdir,))
+  else:
+    erase_tmpdir = False
+
+  # But do lock the tmpdir, to avoid process clash.
+  try:
+    os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
+  except OSError, e:
+    if e.errno == errno.EACCES:
+      raise FatalError("Permission denied:"
+                       + " No write access to directory '%s'." % ctx.tmpdir)
+    if e.errno == errno.EEXIST:
+      raise FatalError(
+          "cvs2svn is using directory '%s' for temporary files, but\n"
+          "  subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
+          "  cvs2svn process is currently using '%s' as its temporary\n"
+          "  workspace.  If you are certain that is not the case,\n"
+          "  then remove the '%s/cvs2svn.lock' subdirectory."
+          % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir,))
+    raise
+
+  try:
+    if run_options.profiling:
+      import hotshot
+      prof = hotshot.Profile('cvs2svn.hotshot')
+      prof.runcall(pass_manager.run, run_options)
+      prof.close()
+    else:
+      pass_manager.run(run_options)
+  finally:
+    try:
+      os.rmdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
+    except:
+      pass
+
+    if erase_tmpdir:
+      try:
+        os.rmdir(ctx.tmpdir)
+      except:
+        pass
+
+
+def svn_main(progname, cmd_args):
+  pass_manager = PassManager(passes)
+  run_options = SVNRunOptions(progname, cmd_args, pass_manager)
+  main(progname, run_options, pass_manager)
+
+
+def git_main(progname, cmd_args):
+  pass_manager = PassManager(passes)
+  run_options = GitRunOptions(progname, cmd_args, pass_manager)
+  main(progname, run_options, pass_manager)
+
+
+def bzr_main(progname, cmd_args):
+  pass_manager = PassManager(passes)
+  run_options = BzrRunOptions(progname, cmd_args, pass_manager)
+  main(progname, run_options, pass_manager)
+
+
diff --git a/cvs2svn_lib/man_writer.py b/cvs2svn_lib/man_writer.py
new file mode 100644
index 0000000..3cca8c9
--- /dev/null
+++ b/cvs2svn_lib/man_writer.py
@@ -0,0 +1,197 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains the ManWriter class for outputting manpages."""
+
+
+import datetime
+import optparse
+import re
+
+
+whitespace_re = re.compile(r'\s+')
+
+def wrap(s, width=70):
+  # Convert all whitespace substrings to single spaces:
+  s = whitespace_re.sub(' ', s)
+  s = s.strip()
+  retval = []
+  while s:
+    if len(s) <= width:
+      retval.append(s)
+      break
+    i = s.rfind(' ', 0, width + 1)
+    if i == -1:
+      # There were no spaces within the first width+1 characters; break
+      # at the next space after width:
+      i = s.find(' ', width + 1)
+      if i == -1:
+        # There were no spaces in s at all.
+        retval.append(s)
+        break
+
+    retval.append(s[:i].rstrip())
+    s = s[i+1:].lstrip()
+
+  for (i,line) in enumerate(retval):
+    if line.startswith('\'') or line.startswith('.'):
+      # These are roff control characters and have to be escaped:
+      retval[i] = '\\' + line
+
+  return '\n'.join(retval)
+
+
+class ManOption(optparse.Option):
+  """An optparse.Option that holds an explicit string for the man page."""
+
+  def __init__(self, *args, **kw):
+    self.man_help = kw.pop('man_help')
+    optparse.Option.__init__(self, *args, **kw)
+
+
+class ManWriter(object):
+  def __init__(
+        self,
+        parser,
+        section, date, source, manual,
+        short_desc, synopsis, long_desc, files, authors, see_also,
+        ):
+    self.parser = parser
+    self.section = section
+    self.date = date
+    self.source = source
+    self.manual = manual
+    self.short_desc = short_desc
+    self.synopsis = synopsis
+    self.long_desc = long_desc
+    self.files = files
+    self.authors = authors
+    self.see_also = see_also
+
+  def write_title(self, f):
+    f.write('.\\" Process this file with\n')
+    f.write(
+        '.\\" groff -man -Tascii %s.%s\n' % (
+            self.parser.get_prog_name(),
+            self.section,
+            )
+        )
+    f.write(
+        '.TH %s "%s" "%s" "%s" "%s"\n' % (
+            self.parser.get_prog_name().upper(),
+            self.section,
+            self.date.strftime('%b %d, %Y'),
+            self.source,
+            self.manual,
+            )
+        )
+
+  def write_name(self, f):
+    f.write('.SH "NAME"\n')
+    f.write(
+        '%s \- %s\n' % (
+            self.parser.get_prog_name(),
+            self.short_desc,
+            )
+        )
+
+  def write_synopsis(self, f):
+    f.write('.SH "SYNOPSIS"\n')
+    f.write(self.synopsis)
+
+  def write_description(self, f):
+    f.write('.SH "DESCRIPTION"\n')
+    f.write(self.long_desc)
+
+  def _get_option_strings(self, option):
+    """Return a list of option strings formatted with their metavariables.
+
+    This method is very similar to
+    optparse.HelpFormatter.format_option_strings().
+
+    """
+
+    if option.takes_value():
+      metavar = (option.metavar or option.dest).lower()
+      short_opts = [
+          '\\fB%s\\fR \\fI%s\\fR' % (opt, metavar)
+          for opt in option._short_opts
+          ]
+      long_opts = [
+          '\\fB%s\\fR=\\fI%s\\fR' % (opt, metavar)
+          for opt in option._long_opts
+          ]
+    else:
+      short_opts = [
+          '\\fB%s\\fR' % (opt,)
+          for opt in option._short_opts
+          ]
+      long_opts = [
+          '\\fB%s\\fR' % (opt,)
+          for opt in option._long_opts
+          ]
+
+    return short_opts + long_opts
+
+  def _write_option(self, f, option):
+    man_help = getattr(option, 'man_help', option.help)
+
+    if man_help is not optparse.SUPPRESS_HELP:
+      man_help = wrap(man_help)
+      f.write('.IP "%s"\n' % (', '.join(self._get_option_strings(option)),))
+      f.write('%s\n' % (man_help,))
+
+  def _write_container_help(self, f, container):
+    for option in container.option_list:
+      if option.help is not optparse.SUPPRESS_HELP:
+        self._write_option(f, option)
+
+  def write_options(self, f):
+    f.write('.SH "OPTIONS"\n')
+    if self.parser.option_list:
+      (self._write_container_help(f, self.parser))
+    for group in self.parser.option_groups:
+      f.write('.SH "%s"\n' % (group.title.upper(),))
+      if group.description:
+        f.write(self.format_description(group.description) + '\n')
+      self._write_container_help(f, group)
+
+  def write_files(self, f):
+    f.write('.SH "FILES"\n')
+    f.write(self.files)
+
+  def write_authors(self, f):
+    f.write('.SH "AUTHORS"\n')
+    f.write(self.authors)
+
+  def write_see_also(self, f):
+    f.write('.SH "SEE ALSO"\n')
+    f.write(', '.join([
+          '%s(%s)' % (name, section,)
+          for (name, section,) in self.see_also
+          ]) + '\n')
+
+  def write_manpage(self, f):
+    self.write_title(f)
+    self.write_name(f)
+    self.write_synopsis(f)
+    self.write_description(f)
+    self.write_options(f)
+    self.write_files(f)
+    self.write_authors(f)
+    self.write_see_also(f)
+
+
diff --git a/cvs2svn_lib/metadata.py b/cvs2svn_lib/metadata.py
new file mode 100644
index 0000000..6cd1337
--- /dev/null
+++ b/cvs2svn_lib/metadata.py
@@ -0,0 +1,26 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Represent CVSRevision metadata."""
+
+
+class Metadata(object):
+  def __init__(self, id, author, log_msg):
+    self.id = id
+    self.author = author
+    self.log_msg = log_msg
+
+
diff --git a/cvs2svn_lib/metadata_database.py b/cvs2svn_lib/metadata_database.py
new file mode 100644
index 0000000..de01920
--- /dev/null
+++ b/cvs2svn_lib/metadata_database.py
@@ -0,0 +1,102 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes to manage CVSRevision metadata."""
+
+
+try:
+  from hashlib import sha1
+except ImportError:
+  from sha import new as sha1
+
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.database import IndexedDatabase
+from cvs2svn_lib.key_generator import KeyGenerator
+from cvs2svn_lib.serializer import PrimedPickleSerializer
+from cvs2svn_lib.metadata import Metadata
+
+
+def MetadataDatabase(store_filename, index_table_filename, mode):
+  """A database to store Metadata instances that describe CVSRevisions.
+
+  This database manages a map
+
+      id -> Metadata instance
+
+  where id is a unique identifier for the metadata."""
+
+  return IndexedDatabase(
+      store_filename, index_table_filename,
+      mode, PrimedPickleSerializer((Metadata,)),
+      )
+
+
+class MetadataLogger:
+  """Store and generate IDs for the metadata associated with CVSRevisions.
+
+  We want CVSRevisions that might be able to be combined to have the
+  same metadata ID, so we want a one-to-one relationship id <->
+  metadata.  We could simply construct a map {metadata : id}, but the
+  map would grow too large.  Therefore, we generate a digest
+  containing the significant parts of the metadata, and construct a
+  map {digest : id}.
+
+  To get the ID for a new set of metadata, we first create the digest.
+  If there is already an ID registered for that digest, we simply
+  return it.  If not, we generate a new ID, store the metadata in the
+  metadata database under that ID, record the mapping {digest : id},
+  and return the new id.
+
+  What metadata is included in the digest?  The author, log_msg,
+  project_id (if Ctx().cross_project_commits is not set), and
+  branch_name (if Ctx().cross_branch_commits is not set)."""
+
+  def __init__(self, metadata_db):
+    self._metadata_db = metadata_db
+
+    # A map { digest : id }:
+    self._digest_to_id = {}
+
+    # A key_generator to generate keys for metadata that haven't been
+    # seen yet:
+    self.key_generator = KeyGenerator()
+
+  def store(self, project, branch_name, author, log_msg):
+    """Store the metadata and return its id.
+
+    Locate the record for a commit with the specified (PROJECT,
+    BRANCH_NAME, AUTHOR, LOG_MSG) and return its id.  (Depending on
+    policy, not all of these items are necessarily used when creating
+    the unique id.)  If there is no such record, create one and return
+    its newly-generated id."""
+
+    key = [author, log_msg]
+    if not Ctx().cross_project_commits:
+      key.append('%x' % project.id)
+    if not Ctx().cross_branch_commits:
+      key.append(branch_name or '')
+
+    digest = sha1('\0'.join(key)).digest()
+    try:
+      # See if it is already known:
+      return self._digest_to_id[digest]
+    except KeyError:
+      id = self.key_generator.gen_id()
+      self._digest_to_id[digest] = id
+      self._metadata_db[id] = Metadata(id, author, log_msg)
+      return id
+
+
diff --git a/cvs2svn_lib/openings_closings.py b/cvs2svn_lib/openings_closings.py
new file mode 100644
index 0000000..b1d4093
--- /dev/null
+++ b/cvs2svn_lib/openings_closings.py
@@ -0,0 +1,236 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes to keep track of symbol openings/closings."""
+
+
+import cPickle
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.svn_revision_range import SVNRevisionRange
+
+
+# Constants used in SYMBOL_OPENINGS_CLOSINGS
+OPENING = 'O'
+CLOSING = 'C'
+
+
+class SymbolingsLogger:
+  """Manage the file that contains lines for symbol openings and closings.
+
+  This data will later be used to determine valid SVNRevision ranges
+  from which a file can be copied when creating a branch or tag in
+  Subversion.  Do this by finding 'Openings' and 'Closings' for each
+  file copied onto a branch or tag.
+
+  An 'Opening' is the beginning of the lifetime of the source
+  (CVSRevision or CVSBranch) from which a given CVSSymbol sprouts.
+
+  The 'Closing' is the SVN revision when the source is deleted or
+  overwritten.
+
+  For example, on file 'foo.c', branch BEE has branch number 1.2.2 and
+  obviously sprouts from revision 1.2.  Therefore, the SVN revision
+  when 1.2 is committed is the opening for BEE on path 'foo.c', and
+  the SVN revision when 1.3 is committed is the closing for BEE on
+  path 'foo.c'.  Note that there may be many revisions chronologically
+  between 1.2 and 1.3, for example, revisions on branches of 'foo.c',
+  perhaps even including on branch BEE itself.  But 1.3 is the next
+  revision *on the same line* as 1.2, that is why it is the closing
+  revision for those symbolic names of which 1.2 is the opening.
+
+  The reason for doing all this hullabaloo is (1) to determine what
+  range of SVN revision numbers can be used as the source of a copy of
+  a particular file onto a branch/tag, and (2) to minimize the number
+  of copies and deletes per creation by choosing source SVN revision
+  numbers that can be used for as many files as possible.
+
+  For example, revisions 1.2 and 1.3 of foo.c might correspond to
+  revisions 17 and 30 in Subversion.  That means that when creating
+  branch BEE, foo.c has to be copied from a Subversion revision number
+  in the range 17 <= revnum < 30.  Now if there were another file,
+  'bar.c', in the same directory, and 'bar.c's opening and closing for
+  BEE correspond to revisions 24 and 39 in Subversion, then we can
+  kill two birds with one stone by copying the whole directory from
+  somewhere in the range 24 <= revnum < 30."""
+
+  def __init__(self):
+    self.symbolings = open(
+        artifact_manager.get_temp_file(config.SYMBOL_OPENINGS_CLOSINGS), 'w')
+
+  def log_revision(self, cvs_rev, svn_revnum):
+    """Log any openings and closings found in CVS_REV."""
+
+    for (symbol_id, cvs_symbol_id,) in cvs_rev.opened_symbols:
+      self._log_opening(symbol_id, cvs_symbol_id, svn_revnum)
+
+    for (symbol_id, cvs_symbol_id) in cvs_rev.closed_symbols:
+      self._log_closing(symbol_id, cvs_symbol_id, svn_revnum)
+
+  def log_branch_revision(self, cvs_branch, svn_revnum):
+    """Log any openings and closings found in CVS_BRANCH."""
+
+    for (symbol_id, cvs_symbol_id,) in cvs_branch.opened_symbols:
+      self._log_opening(symbol_id, cvs_symbol_id, svn_revnum)
+
+  def _log(self, symbol_id, cvs_symbol_id, svn_revnum, type):
+    """Log an opening or closing to self.symbolings.
+
+    Write out a single line to the symbol_openings_closings file
+    representing that SVN_REVNUM is either the opening or closing
+    (TYPE) of CVS_SYMBOL_ID for SYMBOL_ID.
+
+    TYPE should be one of the following constants: OPENING or CLOSING."""
+
+    self.symbolings.write(
+        '%x %d %s %x\n' % (symbol_id, svn_revnum, type, cvs_symbol_id)
+        )
+
+  def _log_opening(self, symbol_id, cvs_symbol_id, svn_revnum):
+    """Log an opening to self.symbolings.
+
+    See _log() for more information."""
+
+    self._log(symbol_id, cvs_symbol_id, svn_revnum, OPENING)
+
+  def _log_closing(self, symbol_id, cvs_symbol_id, svn_revnum):
+    """Log a closing to self.symbolings.
+
+    See _log() for more information."""
+
+    self._log(symbol_id, cvs_symbol_id, svn_revnum, CLOSING)
+
+  def close(self):
+    self.symbolings.close()
+    self.symbolings = None
+
+
+class SymbolingsReader:
+  """Provides an interface to retrieve symbol openings and closings.
+
+  This class accesses the SYMBOL_OPENINGS_CLOSINGS_SORTED file and the
+  SYMBOL_OFFSETS_DB.  Does the heavy lifting of finding and returning
+  the correct opening and closing Subversion revision numbers for a
+  given symbolic name and SVN revision number range."""
+
+  def __init__(self):
+    """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and
+    reads the offsets database into memory."""
+
+    self.symbolings = open(
+        artifact_manager.get_temp_file(
+            config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
+        'r')
+    # The offsets_db is really small, and we need to read and write
+    # from it a fair bit, so suck it into memory
+    offsets_db = file(
+        artifact_manager.get_temp_file(config.SYMBOL_OFFSETS_DB), 'rb')
+    # A map from symbol_id to offset.  The values of this map are
+    # incremented as the openings and closings for a symbol are
+    # consumed.
+    self.offsets = cPickle.load(offsets_db)
+    offsets_db.close()
+
+  def close(self):
+    self.symbolings.close()
+    del self.symbolings
+    del self.offsets
+
+  def _generate_lines(self, symbol):
+    """Generate the lines for SYMBOL.
+
+    SYMBOL is a TypedSymbol instance.  Yield the tuple (revnum, type,
+    cvs_symbol_id) for all openings and closings for SYMBOL."""
+
+    if symbol.id in self.offsets:
+      # Set our read offset for self.symbolings to the offset for this
+      # symbol:
+      self.symbolings.seek(self.offsets[symbol.id])
+
+      while True:
+        line = self.symbolings.readline().rstrip()
+        if not line:
+          break
+        (id, revnum, type, cvs_symbol_id) = line.split()
+        id = int(id, 16)
+        revnum = int(revnum)
+        if id != symbol.id:
+          break
+        cvs_symbol_id = int(cvs_symbol_id, 16)
+
+        yield (revnum, type, cvs_symbol_id)
+
+  def get_range_map(self, svn_symbol_commit):
+    """Return the ranges of all CVSSymbols in SVN_SYMBOL_COMMIT.
+
+    Return a map { CVSSymbol : SVNRevisionRange }."""
+
+    # A map { cvs_symbol_id : CVSSymbol }:
+    cvs_symbol_map = {}
+    for cvs_symbol in svn_symbol_commit.get_cvs_items():
+      cvs_symbol_map[cvs_symbol.id] = cvs_symbol
+
+    range_map = {}
+
+    for (revnum, type, cvs_symbol_id) \
+            in self._generate_lines(svn_symbol_commit.symbol):
+      cvs_symbol = cvs_symbol_map.get(cvs_symbol_id)
+      if cvs_symbol is None:
+        # This CVSSymbol is not part of SVN_SYMBOL_COMMIT.
+        continue
+      range = range_map.get(cvs_symbol)
+      if type == OPENING:
+        if range is not None:
+          raise InternalError(
+              'Multiple openings logged for %r' % (cvs_symbol,)
+              )
+        range_map[cvs_symbol] = SVNRevisionRange(
+            cvs_symbol.source_lod, revnum
+            )
+      else:
+        if range is None:
+          raise InternalError(
+              'Closing precedes opening for %r' % (cvs_symbol,)
+              )
+        if range.closing_revnum is not None:
+          raise InternalError(
+              'Multiple closings logged for %r' % (cvs_symbol,)
+              )
+        range.add_closing(revnum)
+
+    # Make sure that all CVSSymbols are accounted for, and adjust the
+    # closings to be not later than svn_symbol_commit.revnum.
+    for cvs_symbol in cvs_symbol_map.itervalues():
+      try:
+        range = range_map[cvs_symbol]
+      except KeyError:
+        raise InternalError('No opening for %s' % (cvs_symbol,))
+
+      if range.opening_revnum >= svn_symbol_commit.revnum:
+        raise InternalError(
+            'Opening in r%d not ready for %s in r%d'
+            % (range.opening_revnum, cvs_symbol, svn_symbol_commit.revnum,)
+            )
+
+      if range.closing_revnum is not None \
+             and range.closing_revnum > svn_symbol_commit.revnum:
+        range.closing_revnum = None
+
+    return range_map
+
+
diff --git a/cvs2svn_lib/output_option.py b/cvs2svn_lib/output_option.py
new file mode 100644
index 0000000..70419e6
--- /dev/null
+++ b/cvs2svn_lib/output_option.py
@@ -0,0 +1,85 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes that hold the cvs2svn output options."""
+
+
+class OutputOption:
+  """Represents an output choice for a run of cvs2svn."""
+
+  def register_artifacts(self, which_pass):
+    """Register artifacts that will be needed for this output option.
+
+    WHICH_PASS is the pass that will call our callbacks, so it should
+    be used to do the registering (e.g., call
+    WHICH_PASS.register_temp_file() and/or
+    WHICH_PASS.register_temp_file_needed())."""
+
+    pass
+
+  def check(self):
+    """Check that the options stored in SELF are sensible.
+
+    This might including the existence of a repository on disk, etc."""
+
+    raise NotImplementedError()
+
+  def check_symbols(self, symbol_map):
+    """Check that the symbols in SYMBOL_MAP are OK for this output option.
+
+    SYMBOL_MAP is a map {AbstractSymbol : (Trunk|TypedSymbol)},
+    indicating how each symbol is planned to be converted.  Raise a
+    FatalError if the symbol plan is not acceptable for this output
+    option."""
+
+    raise NotImplementedError()
+
+  def setup(self, svn_rev_count):
+    """Prepare this output option."""
+
+    raise NotImplementedError()
+
+  def process_initial_project_commit(self, svn_commit):
+    """Process SVN_COMMIT, which is an SVNInitialProjectCommit."""
+
+    raise NotImplementedError()
+
+  def process_primary_commit(self, svn_commit):
+    """Process SVN_COMMIT, which is an SVNPrimaryCommit."""
+
+    raise NotImplementedError()
+
+  def process_post_commit(self, svn_commit):
+    """Process SVN_COMMIT, which is an SVNPostCommit."""
+
+    raise NotImplementedError()
+
+  def process_branch_commit(self, svn_commit):
+    """Process SVN_COMMIT, which is an SVNBranchCommit."""
+
+    raise NotImplementedError()
+
+  def process_tag_commit(self, svn_commit):
+    """Process SVN_COMMIT, which is an SVNTagCommit."""
+
+    raise NotImplementedError()
+
+  def cleanup(self):
+    """Perform any required cleanup related to this output option."""
+
+    raise NotImplementedError()
+
+
diff --git a/cvs2svn_lib/pass_manager.py b/cvs2svn_lib/pass_manager.py
new file mode 100644
index 0000000..90fa2dc
--- /dev/null
+++ b/cvs2svn_lib/pass_manager.py
@@ -0,0 +1,215 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains tools to manage the passes of a conversion."""
+
+
+import time
+import gc
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.stats_keeper import StatsKeeper
+from cvs2svn_lib.stats_keeper import read_stats_keeper
+from cvs2svn_lib.artifact_manager import artifact_manager
+
+
+class InvalidPassError(FatalError):
+  def __init__(self, msg):
+    FatalError.__init__(
+        self, msg + '\nUse --help-passes for more information.')
+
+
+def check_for_garbage():
+  # We've turned off the garbage collector because we shouldn't
+  # need it (we don't create circular dependencies) and because it
+  # is therefore a waste of time.  So here we check for any
+  # unreachable objects and generate a debug-level warning if any
+  # occur:
+  gc.set_debug(gc.DEBUG_SAVEALL)
+  gc_count = gc.collect()
+  if gc_count:
+    if Log().is_on(Log.DEBUG):
+      Log().debug(
+          'INTERNAL: %d unreachable object(s) were garbage collected:'
+          % (gc_count,)
+          )
+      for g in gc.garbage:
+        Log().debug('    %s' % (g,))
+    del gc.garbage[:]
+
+
+class Pass(object):
+  """Base class for one step of the conversion."""
+
+  def __init__(self):
+    # By default, use the pass object's class name as the pass name:
+    self.name = self.__class__.__name__
+
+  def register_artifacts(self):
+    """Register artifacts (created and needed) in artifact_manager."""
+
+    raise NotImplementedError
+
+  def _register_temp_file(self, basename):
+    """Helper method; for brevity only."""
+
+    artifact_manager.register_temp_file(basename, self)
+
+  def _register_temp_file_needed(self, basename):
+    """Helper method; for brevity only."""
+
+    artifact_manager.register_temp_file_needed(basename, self)
+
+  def run(self, run_options, stats_keeper):
+    """Carry out this step of the conversion.
+
+    RUN_OPTIONS is an instance of RunOptions.  STATS_KEEPER is an
+    instance of StatsKeeper."""
+
+    raise NotImplementedError
+
+
+class PassManager:
+  """Manage a list of passes that can be executed separately or all at once.
+
+  Passes are numbered starting with 1."""
+
+  def __init__(self, passes):
+    """Construct a PassManager with the specified PASSES.
+
+    Internally, passes are numbered starting with 1.  So PASSES[0] is
+    considered to be pass number 1."""
+
+    self.passes = passes
+    self.num_passes = len(self.passes)
+
+  def get_pass_number(self, pass_name, default=None):
+    """Return the number of the pass indicated by PASS_NAME.
+
+    PASS_NAME should be a string containing the name or number of a
+    pass.  If a number, it should be in the range 1 <= value <=
+    self.num_passes.  Return an integer in the same range.  If
+    PASS_NAME is the empty string and DEFAULT is specified, return
+    DEFAULT.  Raise InvalidPassError if PASS_NAME cannot be converted
+    into a valid pass number."""
+
+    if not pass_name and default is not None:
+      assert 1 <= default <= self.num_passes
+      return default
+
+    try:
+      # Does pass_name look like an integer?
+      pass_number = int(pass_name)
+      if not 1 <= pass_number <= self.num_passes:
+        raise InvalidPassError(
+            'illegal value (%d) for pass number.  Must be 1 through %d or\n'
+            'the name of a known pass.'
+            % (pass_number,self.num_passes,))
+      return pass_number
+    except ValueError:
+      # Is pass_name the name of one of the passes?
+      for (i, the_pass) in enumerate(self.passes):
+        if the_pass.name == pass_name:
+          return i + 1
+      raise InvalidPassError('Unknown pass name (%r).' % (pass_name,))
+
+  def run(self, run_options):
+    """Run the specified passes, one after another.
+
+    RUN_OPTIONS will be passed to the Passes' run() methods.
+    RUN_OPTIONS.start_pass is the number of the first pass that should
+    be run.  RUN_OPTIONS.end_pass is the number of the last pass that
+    should be run.  It must be that 1 <= RUN_OPTIONS.start_pass <=
+    RUN_OPTIONS.end_pass <= self.num_passes."""
+
+    # Convert start_pass and end_pass into the indices of the passes
+    # to execute, using the Python index range convention (i.e., first
+    # pass executed and first pass *after* the ones that should be
+    # executed).
+    index_start = run_options.start_pass - 1
+    index_end = run_options.end_pass
+
+    # Inform the artifact manager when artifacts are created and used:
+    for (i, the_pass) in enumerate(self.passes):
+      the_pass.register_artifacts()
+      # Each pass creates a new version of the statistics file:
+      artifact_manager.register_temp_file(
+          config.STATISTICS_FILE % (i + 1,), the_pass
+          )
+      if i != 0:
+        # Each pass subsequent to the first reads the statistics file
+        # from the preceding pass:
+        artifact_manager.register_temp_file_needed(
+            config.STATISTICS_FILE % (i + 1 - 1,), the_pass
+            )
+
+    # Tell the artifact manager about passes that are being skipped this run:
+    for the_pass in self.passes[0:index_start]:
+      artifact_manager.pass_skipped(the_pass)
+
+    start_time = time.time()
+    for i in range(index_start, index_end):
+      the_pass = self.passes[i]
+      Log().quiet('----- pass %d (%s) -----' % (i + 1, the_pass.name,))
+      artifact_manager.pass_started(the_pass)
+
+      if i == 0:
+        stats_keeper = StatsKeeper()
+      else:
+        stats_keeper = read_stats_keeper(
+            artifact_manager.get_temp_file(
+                config.STATISTICS_FILE % (i + 1 - 1,)
+                )
+            )
+
+      the_pass.run(run_options, stats_keeper)
+      end_time = time.time()
+      stats_keeper.log_duration_for_pass(
+          end_time - start_time, i + 1, the_pass.name
+          )
+      Log().normal(stats_keeper.single_pass_timing(i + 1))
+      stats_keeper.archive(
+          artifact_manager.get_temp_file(config.STATISTICS_FILE % (i + 1,))
+          )
+      start_time = end_time
+      Ctx().clean()
+      # Allow the artifact manager to clean up artifacts that are no
+      # longer needed:
+      artifact_manager.pass_done(the_pass, Ctx().skip_cleanup)
+
+      check_for_garbage()
+
+    # Tell the artifact manager about passes that are being deferred:
+    for the_pass in self.passes[index_end:]:
+      artifact_manager.pass_deferred(the_pass)
+
+    Log().quiet(stats_keeper)
+    Log().normal(stats_keeper.timings())
+
+    # Consistency check:
+    artifact_manager.check_clean()
+
+  def help_passes(self):
+    """Output (to sys.stdout) the indices and names of available passes."""
+
+    print 'PASSES:'
+    for (i, the_pass) in enumerate(self.passes):
+      print '%5d : %s' % (i + 1, the_pass.name,)
+
+
diff --git a/cvs2svn_lib/passes.py b/cvs2svn_lib/passes.py
new file mode 100644
index 0000000..af14692
--- /dev/null
+++ b/cvs2svn_lib/passes.py
@@ -0,0 +1,1837 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module defines the passes that make up a conversion."""
+
+
+import sys
+import os
+import shutil
+import cPickle
+
+from cvs2svn_lib import config
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.common import warning_prefix
+from cvs2svn_lib.common import FatalException
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.common import DB_OPEN_NEW
+from cvs2svn_lib.common import DB_OPEN_READ
+from cvs2svn_lib.common import DB_OPEN_WRITE
+from cvs2svn_lib.common import Timestamper
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.pass_manager import Pass
+from cvs2svn_lib.serializer import PrimedPickleSerializer
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.cvs_file_database import CVSFileDatabase
+from cvs2svn_lib.metadata_database import MetadataDatabase
+from cvs2svn_lib.project import read_projects
+from cvs2svn_lib.project import write_projects
+from cvs2svn_lib.symbol import LineOfDevelopment
+from cvs2svn_lib.symbol import Trunk
+from cvs2svn_lib.symbol import Symbol
+from cvs2svn_lib.symbol import Branch
+from cvs2svn_lib.symbol import Tag
+from cvs2svn_lib.symbol import ExcludedSymbol
+from cvs2svn_lib.symbol_database import SymbolDatabase
+from cvs2svn_lib.symbol_database import create_symbol_database
+from cvs2svn_lib.symbol_statistics import SymbolPlanError
+from cvs2svn_lib.symbol_statistics import IndeterminateSymbolException
+from cvs2svn_lib.symbol_statistics import SymbolStatistics
+from cvs2svn_lib.cvs_item import CVSRevision
+from cvs2svn_lib.cvs_item import CVSSymbol
+from cvs2svn_lib.cvs_item_database import OldCVSItemStore
+from cvs2svn_lib.cvs_item_database import IndexedCVSItemStore
+from cvs2svn_lib.cvs_item_database import cvs_item_primer
+from cvs2svn_lib.cvs_item_database import NewSortableCVSRevisionDatabase
+from cvs2svn_lib.cvs_item_database import OldSortableCVSRevisionDatabase
+from cvs2svn_lib.cvs_item_database import NewSortableCVSSymbolDatabase
+from cvs2svn_lib.cvs_item_database import OldSortableCVSSymbolDatabase
+from cvs2svn_lib.key_generator import KeyGenerator
+from cvs2svn_lib.changeset import RevisionChangeset
+from cvs2svn_lib.changeset import OrderedChangeset
+from cvs2svn_lib.changeset import SymbolChangeset
+from cvs2svn_lib.changeset import BranchChangeset
+from cvs2svn_lib.changeset import create_symbol_changeset
+from cvs2svn_lib.changeset_graph import ChangesetGraph
+from cvs2svn_lib.changeset_graph_link import ChangesetGraphLink
+from cvs2svn_lib.changeset_database import ChangesetDatabase
+from cvs2svn_lib.changeset_database import CVSItemToChangesetTable
+from cvs2svn_lib.svn_commit import SVNRevisionCommit
+from cvs2svn_lib.openings_closings import SymbolingsLogger
+from cvs2svn_lib.svn_commit_creator import SVNCommitCreator
+from cvs2svn_lib.persistence_manager import PersistenceManager
+from cvs2svn_lib.collect_data import CollectData
+from cvs2svn_lib.process import call_command
+from cvs2svn_lib.check_dependencies_pass \
+    import CheckItemStoreDependenciesPass
+from cvs2svn_lib.check_dependencies_pass \
+    import CheckIndexedItemStoreDependenciesPass
+
+
+def sort_file(infilename, outfilename, options=[]):
+  """Sort file INFILENAME, storing the results to OUTFILENAME.
+
+  OPTIONS is an optional list of strings that are passed as additional
+  options to the sort command."""
+
+  # GNU sort will sort our dates differently (incorrectly!) if our
+  # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
+  # it to 'C'
+  lc_all_tmp = os.environ.get('LC_ALL', None)
+  os.environ['LC_ALL'] = 'C'
+
+  # The -T option to sort has a nice side effect.  The Win32 sort is
+  # case insensitive and cannot be used, and since it does not
+  # understand the -T option and dies if we try to use it, there is no
+  # risk that we use that sort by accident.
+  command = [
+      Ctx().sort_executable,
+      '-T', Ctx().tmpdir
+      ] + options + [
+      infilename
+      ]
+
+  try:
+    # Under Windows, the subprocess module uses the Win32
+    # CreateProcess, which always looks in the Windows system32
+    # directory before it looks in the directories listed in the PATH
+    # environment variable.  Since the Windows sort.exe is in the
+    # system32 directory it will always be chosen.  A simple
+    # workaround is to launch the sort in a shell.  When the shell
+    # (cmd.exe) searches it only examines the directories in the PATH
+    # so putting the directory with GNU sort ahead of the Windows
+    # system32 directory will cause GNU sort to be chosen.
+    call_command(
+        command, stdout=open(outfilename, 'w'), shell=(sys.platform=='win32')
+        )
+  finally:
+    if lc_all_tmp is None:
+      del os.environ['LC_ALL']
+    else:
+      os.environ['LC_ALL'] = lc_all_tmp
+
+  # On some versions of Windows, os.system() does not return an error
+  # if the command fails.  So add little consistency tests here that
+  # the output file was created and has the right size:
+
+  if not os.path.exists(outfilename):
+    raise FatalError('Sort output file missing: %r' % (outfilename,))
+
+  if os.path.getsize(outfilename) != os.path.getsize(infilename):
+    raise FatalError(
+        'Sort input and output file sizes differ:\n'
+        '    %r (%d bytes)\n'
+        '    %r (%d bytes)' % (
+            infilename, os.path.getsize(infilename),
+            outfilename, os.path.getsize(outfilename),
+            )
+        )
+
+
+class CollectRevsPass(Pass):
+  """This pass was formerly known as pass1."""
+
+  def register_artifacts(self):
+    self._register_temp_file(config.PROJECTS)
+    self._register_temp_file(config.SYMBOL_STATISTICS)
+    self._register_temp_file(config.METADATA_INDEX_TABLE)
+    self._register_temp_file(config.METADATA_STORE)
+    self._register_temp_file(config.CVS_FILES_DB)
+    self._register_temp_file(config.CVS_ITEMS_STORE)
+    Ctx().revision_recorder.register_artifacts(self)
+
+  def run(self, run_options, stats_keeper):
+    Log().quiet("Examining all CVS ',v' files...")
+    Ctx()._projects = {}
+    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_NEW)
+    cd = CollectData(Ctx().revision_recorder, stats_keeper)
+    for project in run_options.projects:
+      cd.process_project(project)
+    run_options.projects = None
+
+    fatal_errors = cd.close()
+
+    if fatal_errors:
+      raise FatalException("Pass 1 complete.\n"
+                           + "=" * 75 + "\n"
+                           + "Error summary:\n"
+                           + "\n".join(fatal_errors) + "\n"
+                           + "Exited due to fatal error(s).")
+
+    Ctx()._cvs_file_db.close()
+    write_projects(artifact_manager.get_temp_file(config.PROJECTS))
+    Log().quiet("Done")
+
+
+class CleanMetadataPass(Pass):
+  """Clean up CVS revision metadata and write it to a new database."""
+
+  def register_artifacts(self):
+    self._register_temp_file(config.METADATA_CLEAN_INDEX_TABLE)
+    self._register_temp_file(config.METADATA_CLEAN_STORE)
+    self._register_temp_file_needed(config.METADATA_INDEX_TABLE)
+    self._register_temp_file_needed(config.METADATA_STORE)
+
+  def _get_clean_author(self, author):
+    """Return AUTHOR, converted appropriately to UTF8.
+
+    Raise a UnicodeException if it cannot be converted using the
+    configured cvs_author_decoder."""
+
+    try:
+      return self._authors[author]
+    except KeyError:
+      pass
+
+    try:
+      clean_author = Ctx().cvs_author_decoder(author)
+    except UnicodeError:
+      self._authors[author] = author
+      raise UnicodeError('Problem decoding author \'%s\'' % (author,))
+
+    try:
+      clean_author = clean_author.encode('utf8')
+    except UnicodeError:
+      self._authors[author] = author
+      raise UnicodeError('Problem encoding author \'%s\'' % (author,))
+
+    self._authors[author] = clean_author
+    return clean_author
+
+  def _get_clean_log_msg(self, log_msg):
+    """Return LOG_MSG, converted appropriately to UTF8.
+
+    Raise a UnicodeException if it cannot be converted using the
+    configured cvs_log_decoder."""
+
+    try:
+      clean_log_msg = Ctx().cvs_log_decoder(log_msg)
+    except UnicodeError:
+      raise UnicodeError(
+          'Problem decoding log message:\n'
+          '%s\n'
+          '%s\n'
+          '%s'
+          % ('-' * 75, log_msg, '-' * 75,)
+          )
+
+    try:
+      return clean_log_msg.encode('utf8')
+    except UnicodeError:
+      raise UnicodeError(
+          'Problem encoding log message:\n'
+          '%s\n'
+          '%s\n'
+          '%s'
+          % ('-' * 75, log_msg, '-' * 75,)
+          )
+
+  def _clean_metadata(self, metadata):
+    """Clean up METADATA by overwriting its members as necessary."""
+
+    try:
+      metadata.author = self._get_clean_author(metadata.author)
+    except UnicodeError, e:
+      Log().warn('%s: %s' % (warning_prefix, e,))
+      self.warnings = True
+
+    try:
+      metadata.log_msg = self._get_clean_log_msg(metadata.log_msg)
+    except UnicodeError, e:
+      Log().warn('%s: %s' % (warning_prefix, e,))
+      self.warnings = True
+
+  def run(self, run_options, stats_keeper):
+    Log().quiet("Converting metadata to UTF8...")
+    metadata_db = MetadataDatabase(
+        artifact_manager.get_temp_file(config.METADATA_STORE),
+        artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE),
+        DB_OPEN_READ,
+        )
+    metadata_clean_db = MetadataDatabase(
+        artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
+        artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
+        DB_OPEN_NEW,
+        )
+
+    self.warnings = False
+
+    # A map {author : clean_author} for those known (to avoid
+    # repeating warnings):
+    self._authors = {}
+
+    for id in metadata_db.iterkeys():
+      metadata = metadata_db[id]
+
+      # Record the original author name because it might be needed for
+      # expanding CVS keywords:
+      metadata.original_author = metadata.author
+
+      self._clean_metadata(metadata)
+
+      metadata_clean_db[id] = metadata
+
+    if self.warnings:
+      raise FatalError(
+          'There were warnings converting author names and/or log messages\n'
+          'to Unicode (see messages above).  Please restart this pass\n'
+          'with one or more \'--encoding\' parameters or with\n'
+          '\'--fallback-encoding\'.'
+          )
+
+    metadata_clean_db.close()
+    metadata_db.close()
+    Log().quiet("Done")
+
+
+class CollateSymbolsPass(Pass):
+  """Divide symbols into branches, tags, and excludes."""
+
+  conversion_names = {
+      Trunk : 'trunk',
+      Branch : 'branch',
+      Tag : 'tag',
+      ExcludedSymbol : 'exclude',
+      Symbol : '.',
+      }
+
+  def register_artifacts(self):
+    self._register_temp_file(config.SYMBOL_DB)
+    self._register_temp_file_needed(config.PROJECTS)
+    self._register_temp_file_needed(config.SYMBOL_STATISTICS)
+
+  def get_symbol(self, run_options, stats):
+    """Use StrategyRules to decide what to do with a symbol.
+
+    STATS is an instance of symbol_statistics._Stats describing an
+    instance of Symbol or Trunk.  To determine how the symbol is to be
+    converted, consult the StrategyRules in the project's
+    symbol_strategy_rules.  Each rule is allowed a chance to change
+    the way the symbol will be converted.  If the symbol is not a
+    Trunk or TypedSymbol after all rules have run, raise
+    IndeterminateSymbolException."""
+
+    symbol = stats.lod
+    rules = run_options.project_symbol_strategy_rules[symbol.project.id]
+    for rule in rules:
+      symbol = rule.get_symbol(symbol, stats)
+      assert symbol is not None
+
+    stats.check_valid(symbol)
+
+    return symbol
+
+  def log_symbol_summary(self, stats, symbol):
+    if not self.symbol_info_file:
+      return
+
+    if isinstance(symbol, Trunk):
+      name = '.trunk.'
+      preferred_parent_name = '.'
+    else:
+      name = stats.lod.name
+      if symbol.preferred_parent_id is None:
+        preferred_parent_name = '.'
+      else:
+        preferred_parent = self.symbol_stats[symbol.preferred_parent_id].lod
+        if isinstance(preferred_parent, Trunk):
+          preferred_parent_name = '.trunk.'
+        else:
+          preferred_parent_name = preferred_parent.name
+
+    if isinstance(symbol, LineOfDevelopment) and symbol.base_path:
+      symbol_path = symbol.base_path
+    else:
+      symbol_path = '.'
+
+    self.symbol_info_file.write(
+        '%-5d %-30s %-10s %s %s\n' % (
+            stats.lod.project.id,
+            name,
+            self.conversion_names[symbol.__class__],
+            symbol_path,
+            preferred_parent_name,
+            )
+        )
+    self.symbol_info_file.write('      # %s\n' % (stats,))
+    parent_counts = stats.possible_parents.items()
+    if parent_counts:
+      self.symbol_info_file.write('      # Possible parents:\n')
+      parent_counts.sort(lambda a,b: cmp((b[1], a[0]), (a[1], b[0])))
+      for (pp, count) in parent_counts:
+        if isinstance(pp, Trunk):
+          self.symbol_info_file.write(
+              '      #     .trunk. : %d\n' % (count,)
+              )
+        else:
+          self.symbol_info_file.write(
+              '      #     %s : %d\n' % (pp.name, count,)
+              )
+
+  def get_symbols(self, run_options):
+    """Return a map telling how to convert symbols.
+
+    The return value is a map {AbstractSymbol : (Trunk|TypedSymbol)},
+    indicating how each symbol should be converted.  Trunk objects in
+    SYMBOL_STATS are passed through unchanged.  One object is included
+    in the return value for each line of development described in
+    SYMBOL_STATS.
+
+    Raise FatalError if there was an error."""
+
+    errors = []
+    mismatches = []
+
+    if Ctx().symbol_info_filename is not None:
+      self.symbol_info_file = open(Ctx().symbol_info_filename, 'w')
+      self.symbol_info_file.write(
+          '# Columns: project_id symbol_name conversion symbol_path '
+          'preferred_parent_name\n'
+          )
+    else:
+      self.symbol_info_file = None
+
+    # Initialize each symbol strategy rule a single time, even if it
+    # is used in more than one project.  First define a map from
+    # object id to symbol strategy rule:
+    rules = {}
+    for rule_list in run_options.project_symbol_strategy_rules:
+      for rule in rule_list:
+        rules[id(rule)] = rule
+
+    for rule in rules.itervalues():
+      rule.start(self.symbol_stats)
+
+    retval = {}
+
+    for stats in self.symbol_stats:
+      try:
+        symbol = self.get_symbol(run_options, stats)
+      except IndeterminateSymbolException, e:
+        self.log_symbol_summary(stats, stats.lod)
+        mismatches.append(e.stats)
+      except SymbolPlanError, e:
+        self.log_symbol_summary(stats, stats.lod)
+        errors.append(e)
+      else:
+        self.log_symbol_summary(stats, symbol)
+        retval[stats.lod] = symbol
+
+    for rule in rules.itervalues():
+      rule.finish()
+
+    if self.symbol_info_file:
+      self.symbol_info_file.close()
+
+    del self.symbol_info_file
+
+    if errors or mismatches:
+      s = ['Problems determining how symbols should be converted:\n']
+      for e in errors:
+        s.append('%s\n' % (e,))
+      if mismatches:
+        s.append(
+            'It is not clear how the following symbols '
+            'should be converted.\n'
+            'Use --symbol-hints, --force-tag, --force-branch, --exclude, '
+            'and/or\n'
+            '--symbol-default to resolve the ambiguity.\n'
+            )
+        for stats in mismatches:
+          s.append('    %s\n' % (stats,))
+      raise FatalError(''.join(s))
+    else:
+      return retval
+
+  def run(self, run_options, stats_keeper):
+    Ctx()._projects = read_projects(
+        artifact_manager.get_temp_file(config.PROJECTS)
+        )
+    self.symbol_stats = SymbolStatistics(
+        artifact_manager.get_temp_file(config.SYMBOL_STATISTICS)
+        )
+
+    symbol_map = self.get_symbols(run_options)
+
+    # Check the symbols for consistency and bail out if there were errors:
+    self.symbol_stats.check_consistency(symbol_map)
+
+    # Check that the symbols all have SVN paths set and that the paths
+    # are disjoint:
+    Ctx().output_option.check_symbols(symbol_map)
+
+    for symbol in symbol_map.itervalues():
+      if isinstance(symbol, ExcludedSymbol):
+        self.symbol_stats.exclude_symbol(symbol)
+
+    create_symbol_database(symbol_map.values())
+
+    del self.symbol_stats
+
+    Log().quiet("Done")
+
+
+class FilterSymbolsPass(Pass):
+  """Delete any branches/tags that are to be excluded.
+
+  Also delete revisions on excluded branches, and delete other
+  references to the excluded symbols."""
+
+  def register_artifacts(self):
+    self._register_temp_file(config.SUMMARY_SERIALIZER)
+    self._register_temp_file(config.CVS_REVS_SUMMARY_DATAFILE)
+    self._register_temp_file(config.CVS_SYMBOLS_SUMMARY_DATAFILE)
+    self._register_temp_file_needed(config.PROJECTS)
+    self._register_temp_file_needed(config.SYMBOL_DB)
+    self._register_temp_file_needed(config.CVS_FILES_DB)
+    self._register_temp_file_needed(config.CVS_ITEMS_STORE)
+    Ctx().revision_excluder.register_artifacts(self)
+
+  def run(self, run_options, stats_keeper):
+    Ctx()._projects = read_projects(
+        artifact_manager.get_temp_file(config.PROJECTS)
+        )
+    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
+    Ctx()._symbol_db = SymbolDatabase()
+    cvs_item_store = OldCVSItemStore(
+        artifact_manager.get_temp_file(config.CVS_ITEMS_STORE))
+
+    cvs_item_serializer = PrimedPickleSerializer(cvs_item_primer)
+    f = open(artifact_manager.get_temp_file(config.SUMMARY_SERIALIZER), 'wb')
+    cPickle.dump(cvs_item_serializer, f, -1)
+    f.close()
+
+    rev_db = NewSortableCVSRevisionDatabase(
+        artifact_manager.get_temp_file(config.CVS_REVS_SUMMARY_DATAFILE),
+        cvs_item_serializer,
+        )
+
+    symbol_db = NewSortableCVSSymbolDatabase(
+        artifact_manager.get_temp_file(config.CVS_SYMBOLS_SUMMARY_DATAFILE),
+        cvs_item_serializer,
+        )
+
+    revision_excluder = Ctx().revision_excluder
+
+    Log().quiet("Filtering out excluded symbols and summarizing items...")
+
+    stats_keeper.reset_cvs_rev_info()
+    revision_excluder.start()
+
+    # Process the cvs items store one file at a time:
+    for cvs_file_items in cvs_item_store.iter_cvs_file_items():
+      Log().verbose(cvs_file_items.cvs_file.filename)
+      cvs_file_items.filter_excluded_symbols(revision_excluder)
+      cvs_file_items.mutate_symbols()
+      cvs_file_items.adjust_parents()
+      cvs_file_items.refine_symbols()
+      cvs_file_items.record_opened_symbols()
+      cvs_file_items.record_closed_symbols()
+      cvs_file_items.check_link_consistency()
+
+      # Store whatever is left to the new file and update statistics:
+      stats_keeper.record_cvs_file(cvs_file_items.cvs_file)
+      for cvs_item in cvs_file_items.values():
+        stats_keeper.record_cvs_item(cvs_item)
+
+        if isinstance(cvs_item, CVSRevision):
+          rev_db.add(cvs_item)
+        elif isinstance(cvs_item, CVSSymbol):
+          symbol_db.add(cvs_item)
+
+    stats_keeper.set_stats_reflect_exclude(True)
+
+    rev_db.close()
+    symbol_db.close()
+    revision_excluder.finish()
+    cvs_item_store.close()
+    Ctx()._symbol_db.close()
+    Ctx()._cvs_file_db.close()
+
+    Log().quiet("Done")
+
+
+class SortRevisionSummaryPass(Pass):
+  """Sort the revision summary file."""
+
+  def register_artifacts(self):
+    self._register_temp_file(config.CVS_REVS_SUMMARY_SORTED_DATAFILE)
+    self._register_temp_file_needed(config.CVS_REVS_SUMMARY_DATAFILE)
+
+  def run(self, run_options, stats_keeper):
+    Log().quiet("Sorting CVS revision summaries...")
+    sort_file(
+        artifact_manager.get_temp_file(config.CVS_REVS_SUMMARY_DATAFILE),
+        artifact_manager.get_temp_file(
+            config.CVS_REVS_SUMMARY_SORTED_DATAFILE))
+    Log().quiet("Done")
+
+
+class SortSymbolSummaryPass(Pass):
+  """Sort the symbol summary file."""
+
+  def register_artifacts(self):
+    self._register_temp_file(config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE)
+    self._register_temp_file_needed(config.CVS_SYMBOLS_SUMMARY_DATAFILE)
+
+  def run(self, run_options, stats_keeper):
+    Log().quiet("Sorting CVS symbol summaries...")
+    sort_file(
+        artifact_manager.get_temp_file(config.CVS_SYMBOLS_SUMMARY_DATAFILE),
+        artifact_manager.get_temp_file(
+            config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE))
+    Log().quiet("Done")
+
+
+class InitializeChangesetsPass(Pass):
+  """Create preliminary CommitSets."""
+
+  def register_artifacts(self):
+    self._register_temp_file(config.CVS_ITEM_TO_CHANGESET)
+    self._register_temp_file(config.CHANGESETS_STORE)
+    self._register_temp_file(config.CHANGESETS_INDEX)
+    self._register_temp_file(config.CVS_ITEMS_SORTED_STORE)
+    self._register_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE)
+    self._register_temp_file_needed(config.PROJECTS)
+    self._register_temp_file_needed(config.SYMBOL_DB)
+    self._register_temp_file_needed(config.CVS_FILES_DB)
+    self._register_temp_file_needed(config.SUMMARY_SERIALIZER)
+    self._register_temp_file_needed(config.CVS_REVS_SUMMARY_SORTED_DATAFILE)
+    self._register_temp_file_needed(
+        config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE)
+
+  def get_revision_changesets(self):
+    """Generate revision changesets, one at a time.
+
+    Each time, yield a list of CVSRevisions that might potentially
+    consititute a changeset."""
+
+    # Create changesets for CVSRevisions:
+    old_metadata_id = None
+    old_timestamp = None
+    changeset_items = []
+
+    db = OldSortableCVSRevisionDatabase(
+        artifact_manager.get_temp_file(
+            config.CVS_REVS_SUMMARY_SORTED_DATAFILE
+            ),
+        self.cvs_item_serializer,
+        )
+
+    for cvs_rev in db:
+      if cvs_rev.metadata_id != old_metadata_id \
+         or cvs_rev.timestamp > old_timestamp + config.COMMIT_THRESHOLD:
+        # Start a new changeset.  First finish up the old changeset,
+        # if any:
+        if changeset_items:
+          yield changeset_items
+          changeset_items = []
+        old_metadata_id = cvs_rev.metadata_id
+      changeset_items.append(cvs_rev)
+      old_timestamp = cvs_rev.timestamp
+
+    # Finish up the last changeset, if any:
+    if changeset_items:
+      yield changeset_items
+
+  def get_symbol_changesets(self):
+    """Generate symbol changesets, one at a time.
+
+    Each time, yield a list of CVSSymbols that might potentially
+    consititute a changeset."""
+
+    old_symbol_id = None
+    changeset_items = []
+
+    db = OldSortableCVSSymbolDatabase(
+        artifact_manager.get_temp_file(
+            config.CVS_SYMBOLS_SUMMARY_SORTED_DATAFILE
+            ),
+        self.cvs_item_serializer,
+        )
+
+    for cvs_symbol in db:
+      if cvs_symbol.symbol.id != old_symbol_id:
+        # Start a new changeset.  First finish up the old changeset,
+        # if any:
+        if changeset_items:
+          yield changeset_items
+          changeset_items = []
+        old_symbol_id = cvs_symbol.symbol.id
+      changeset_items.append(cvs_symbol)
+
+    # Finish up the last changeset, if any:
+    if changeset_items:
+      yield changeset_items
+
+  @staticmethod
+  def compare_items(a, b):
+      return (
+          cmp(a.timestamp, b.timestamp)
+          or cmp(a.cvs_file.cvs_path, b.cvs_file.cvs_path)
+          or cmp([int(x) for x in a.rev.split('.')],
+                 [int(x) for x in b.rev.split('.')])
+          or cmp(a.id, b.id))
+
+  def break_internal_dependencies(self, changeset_items):
+    """Split up CHANGESET_ITEMS if necessary to break internal dependencies.
+
+    CHANGESET_ITEMS is a list of CVSRevisions that could possibly
+    belong in a single RevisionChangeset, but there might be internal
+    dependencies among the items.  Return a list of lists, where each
+    sublist is a list of CVSRevisions and at least one internal
+    dependency has been eliminated.  Iff CHANGESET_ITEMS does not have
+    to be split, then the return value will contain a single value,
+    namely the original value of CHANGESET_ITEMS.  Split
+    CHANGESET_ITEMS at most once, even though the resulting changesets
+    might themselves have internal dependencies."""
+
+    # We only look for succ dependencies, since by doing so we
+    # automatically cover pred dependencies as well.  First create a
+    # list of tuples (pred, succ) of id pairs for CVSItems that depend
+    # on each other.
+    dependencies = []
+    changeset_cvs_item_ids = set([cvs_rev.id for cvs_rev in changeset_items])
+    for cvs_item in changeset_items:
+      for next_id in cvs_item.get_succ_ids():
+        if next_id in changeset_cvs_item_ids:
+          # Sanity check: a CVSItem should never depend on itself:
+          if next_id == cvs_item.id:
+            raise InternalError('Item depends on itself: %s' % (cvs_item,))
+
+          dependencies.append((cvs_item.id, next_id,))
+
+    if dependencies:
+      # Sort the changeset_items in a defined order (chronological to the
+      # extent that the timestamps are correct and unique).
+      changeset_items.sort(self.compare_items)
+      indexes = {}
+      for (i, changeset_item) in enumerate(changeset_items):
+        indexes[changeset_item.id] = i
+      # How many internal dependencies would be broken by breaking the
+      # Changeset after a particular index?
+      breaks = [0] * len(changeset_items)
+      for (pred, succ,) in dependencies:
+        pred_index = indexes[pred]
+        succ_index = indexes[succ]
+        breaks[min(pred_index, succ_index)] += 1
+        breaks[max(pred_index, succ_index)] -= 1
+      best_i = None
+      best_count = -1
+      best_time = 0
+      for i in range(1, len(breaks)):
+        breaks[i] += breaks[i - 1]
+      for i in range(0, len(breaks) - 1):
+        if breaks[i] > best_count:
+          best_i = i
+          best_count = breaks[i]
+          best_time = (changeset_items[i + 1].timestamp
+                       - changeset_items[i].timestamp)
+        elif breaks[i] == best_count \
+             and (changeset_items[i + 1].timestamp
+                  - changeset_items[i].timestamp) < best_time:
+          best_i = i
+          best_count = breaks[i]
+          best_time = (changeset_items[i + 1].timestamp
+                       - changeset_items[i].timestamp)
+      # Reuse the old changeset.id for the first of the split changesets.
+      return [changeset_items[:best_i + 1], changeset_items[best_i + 1:]]
+    else:
+      return [changeset_items]
+
+  def break_all_internal_dependencies(self, changeset_items):
+    """Keep breaking CHANGESET_ITEMS up to break all internal dependencies.
+
+    CHANGESET_ITEMS is a list of CVSRevisions that could conceivably
+    be part of a single changeset.  Break this list into sublists,
+    where the CVSRevisions in each sublist are free of mutual
+    dependencies."""
+
+    # This method is written non-recursively to avoid any possible
+    # problems with recursion depth.
+
+    changesets_to_split = [changeset_items]
+    while changesets_to_split:
+      changesets = self.break_internal_dependencies(changesets_to_split.pop())
+      if len(changesets) == 1:
+        [changeset_items] = changesets
+        yield changeset_items
+      else:
+        # The changeset had to be split; see if either of the
+        # fragments have to be split:
+        changesets.reverse()
+        changesets_to_split.extend(changesets)
+
+  def get_changesets(self):
+    """Generate (Changeset, [CVSItem,...]) for all changesets.
+
+    The Changesets already have their internal dependencies broken.
+    The [CVSItem,...] list is the list of CVSItems in the
+    corresponding Changeset."""
+
+    for changeset_items in self.get_revision_changesets():
+      for split_changeset_items \
+              in self.break_all_internal_dependencies(changeset_items):
+        yield (
+            RevisionChangeset(
+                self.changeset_key_generator.gen_id(),
+                [cvs_rev.id for cvs_rev in split_changeset_items]
+                ),
+            split_changeset_items,
+            )
+
+    for changeset_items in self.get_symbol_changesets():
+      yield (
+          create_symbol_changeset(
+              self.changeset_key_generator.gen_id(),
+              changeset_items[0].symbol,
+              [cvs_symbol.id for cvs_symbol in changeset_items]
+              ),
+          changeset_items,
+          )
+
+  def run(self, run_options, stats_keeper):
+    Log().quiet("Creating preliminary commit sets...")
+
+    Ctx()._projects = read_projects(
+        artifact_manager.get_temp_file(config.PROJECTS)
+        )
+    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
+    Ctx()._symbol_db = SymbolDatabase()
+
+    f = open(artifact_manager.get_temp_file(config.SUMMARY_SERIALIZER), 'rb')
+    self.cvs_item_serializer = cPickle.load(f)
+    f.close()
+
+    changeset_db = ChangesetDatabase(
+        artifact_manager.get_temp_file(config.CHANGESETS_STORE),
+        artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
+        DB_OPEN_NEW,
+        )
+    cvs_item_to_changeset_id = CVSItemToChangesetTable(
+        artifact_manager.get_temp_file(config.CVS_ITEM_TO_CHANGESET),
+        DB_OPEN_NEW,
+        )
+
+    self.sorted_cvs_items_db = IndexedCVSItemStore(
+        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
+        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
+        DB_OPEN_NEW)
+
+    self.changeset_key_generator = KeyGenerator()
+
+    for (changeset, changeset_items) in self.get_changesets():
+      if Log().is_on(Log.DEBUG):
+        Log().debug(repr(changeset))
+      changeset_db.store(changeset)
+      for cvs_item in changeset_items:
+        self.sorted_cvs_items_db.add(cvs_item)
+        cvs_item_to_changeset_id[cvs_item.id] = changeset.id
+
+    self.sorted_cvs_items_db.close()
+    cvs_item_to_changeset_id.close()
+    changeset_db.close()
+    Ctx()._symbol_db.close()
+    Ctx()._cvs_file_db.close()
+
+    del self.cvs_item_serializer
+
+    Log().quiet("Done")
+
+
+class ProcessedChangesetLogger:
+  def __init__(self):
+    self.processed_changeset_ids = []
+
+  def log(self, changeset_id):
+    if Log().is_on(Log.DEBUG):
+      self.processed_changeset_ids.append(changeset_id)
+
+  def flush(self):
+    if self.processed_changeset_ids:
+      Log().debug(
+          'Consumed changeset ids %s'
+          % (', '.join(['%x' % id for id in self.processed_changeset_ids]),))
+
+      del self.processed_changeset_ids[:]
+
+
+class BreakRevisionChangesetCyclesPass(Pass):
+  """Break up any dependency cycles involving only RevisionChangesets."""
+
+  def register_artifacts(self):
+    self._register_temp_file(config.CHANGESETS_REVBROKEN_STORE)
+    self._register_temp_file(config.CHANGESETS_REVBROKEN_INDEX)
+    self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
+    self._register_temp_file_needed(config.PROJECTS)
+    self._register_temp_file_needed(config.SYMBOL_DB)
+    self._register_temp_file_needed(config.CVS_FILES_DB)
+    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
+    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
+    self._register_temp_file_needed(config.CHANGESETS_STORE)
+    self._register_temp_file_needed(config.CHANGESETS_INDEX)
+    self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET)
+
+  def get_source_changesets(self):
+    old_changeset_db = ChangesetDatabase(
+        artifact_manager.get_temp_file(config.CHANGESETS_STORE),
+        artifact_manager.get_temp_file(config.CHANGESETS_INDEX),
+        DB_OPEN_READ)
+
+    changeset_ids = old_changeset_db.keys()
+
+    for changeset_id in changeset_ids:
+      yield old_changeset_db[changeset_id]
+
+    old_changeset_db.close()
+    del old_changeset_db
+
+  def break_cycle(self, cycle):
+    """Break up one or more changesets in CYCLE to help break the cycle.
+
+    CYCLE is a list of Changesets where
+
+        cycle[i] depends on cycle[i - 1]
+
+    Break up one or more changesets in CYCLE to make progress towards
+    breaking the cycle.  Update self.changeset_graph accordingly.
+
+    It is not guaranteed that the cycle will be broken by one call to
+    this routine, but at least some progress must be made."""
+
+    self.processed_changeset_logger.flush()
+    best_i = None
+    best_link = None
+    for i in range(len(cycle)):
+      # It's OK if this index wraps to -1:
+      link = ChangesetGraphLink(
+          cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
+
+      if best_i is None or link < best_link:
+        best_i = i
+        best_link = link
+
+    if Log().is_on(Log.DEBUG):
+      Log().debug(
+          'Breaking cycle %s by breaking node %x' % (
+          ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
+          best_link.changeset.id,))
+
+    new_changesets = best_link.break_changeset(self.changeset_key_generator)
+
+    self.changeset_graph.delete_changeset(best_link.changeset)
+
+    for changeset in new_changesets:
+      self.changeset_graph.add_new_changeset(changeset)
+
+  def run(self, run_options, stats_keeper):
+    Log().quiet("Breaking revision changeset dependency cycles...")
+
+    Ctx()._projects = read_projects(
+        artifact_manager.get_temp_file(config.PROJECTS)
+        )
+    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
+    Ctx()._symbol_db = SymbolDatabase()
+    Ctx()._cvs_items_db = IndexedCVSItemStore(
+        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
+        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
+        DB_OPEN_READ)
+
+    shutil.copyfile(
+        artifact_manager.get_temp_file(
+            config.CVS_ITEM_TO_CHANGESET),
+        artifact_manager.get_temp_file(
+            config.CVS_ITEM_TO_CHANGESET_REVBROKEN))
+    cvs_item_to_changeset_id = CVSItemToChangesetTable(
+        artifact_manager.get_temp_file(
+            config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
+        DB_OPEN_WRITE)
+
+    changeset_db = ChangesetDatabase(
+        artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
+        artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
+        DB_OPEN_NEW)
+
+    self.changeset_graph = ChangesetGraph(
+        changeset_db, cvs_item_to_changeset_id
+        )
+
+    max_changeset_id = 0
+    for changeset in self.get_source_changesets():
+      changeset_db.store(changeset)
+      if isinstance(changeset, RevisionChangeset):
+        self.changeset_graph.add_changeset(changeset)
+      max_changeset_id = max(max_changeset_id, changeset.id)
+
+    self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
+
+    self.processed_changeset_logger = ProcessedChangesetLogger()
+
+    # Consume the graph, breaking cycles using self.break_cycle():
+    for (changeset, time_range) in self.changeset_graph.consume_graph(
+          cycle_breaker=self.break_cycle
+          ):
+      self.processed_changeset_logger.log(changeset.id)
+
+    self.processed_changeset_logger.flush()
+    del self.processed_changeset_logger
+
+    self.changeset_graph.close()
+    self.changeset_graph = None
+    Ctx()._cvs_items_db.close()
+    Ctx()._symbol_db.close()
+    Ctx()._cvs_file_db.close()
+
+    Log().quiet("Done")
+
+
+class RevisionTopologicalSortPass(Pass):
+  """Sort RevisionChangesets into commit order.
+
+  Also convert them to OrderedChangesets, without changing their ids."""
+
+  def register_artifacts(self):
+    self._register_temp_file(config.CHANGESETS_REVSORTED_STORE)
+    self._register_temp_file(config.CHANGESETS_REVSORTED_INDEX)
+    self._register_temp_file_needed(config.PROJECTS)
+    self._register_temp_file_needed(config.SYMBOL_DB)
+    self._register_temp_file_needed(config.CVS_FILES_DB)
+    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
+    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
+    self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_STORE)
+    self._register_temp_file_needed(config.CHANGESETS_REVBROKEN_INDEX)
+    self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
+
+  def get_source_changesets(self, changeset_db):
+    changeset_ids = changeset_db.keys()
+
+    for changeset_id in changeset_ids:
+      yield changeset_db[changeset_id]
+
+  def get_changesets(self):
+    changeset_db = ChangesetDatabase(
+        artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_STORE),
+        artifact_manager.get_temp_file(config.CHANGESETS_REVBROKEN_INDEX),
+        DB_OPEN_READ,
+        )
+
+    changeset_graph = ChangesetGraph(
+        changeset_db,
+        CVSItemToChangesetTable(
+            artifact_manager.get_temp_file(
+                config.CVS_ITEM_TO_CHANGESET_REVBROKEN
+                ),
+            DB_OPEN_READ,
+            )
+        )
+
+    for changeset in self.get_source_changesets(changeset_db):
+      if isinstance(changeset, RevisionChangeset):
+        changeset_graph.add_changeset(changeset)
+      else:
+        yield changeset
+
+    changeset_ids = []
+
+    # Sentry:
+    changeset_ids.append(None)
+
+    for (changeset, time_range) in changeset_graph.consume_graph():
+      changeset_ids.append(changeset.id)
+
+    # Sentry:
+    changeset_ids.append(None)
+
+    for i in range(1, len(changeset_ids) - 1):
+      changeset = changeset_db[changeset_ids[i]]
+      yield OrderedChangeset(
+          changeset.id, changeset.cvs_item_ids, i - 1,
+          changeset_ids[i - 1], changeset_ids[i + 1])
+
+    changeset_graph.close()
+
+  def run(self, run_options, stats_keeper):
+    Log().quiet("Generating CVSRevisions in commit order...")
+
+    Ctx()._projects = read_projects(
+        artifact_manager.get_temp_file(config.PROJECTS)
+        )
+    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
+    Ctx()._symbol_db = SymbolDatabase()
+    Ctx()._cvs_items_db = IndexedCVSItemStore(
+        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
+        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
+        DB_OPEN_READ)
+
+    changesets_revordered_db = ChangesetDatabase(
+        artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
+        artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
+        DB_OPEN_NEW)
+
+    for changeset in self.get_changesets():
+      changesets_revordered_db.store(changeset)
+
+    changesets_revordered_db.close()
+    Ctx()._cvs_items_db.close()
+    Ctx()._symbol_db.close()
+    Ctx()._cvs_file_db.close()
+
+    Log().quiet("Done")
+
+
+class BreakSymbolChangesetCyclesPass(Pass):
+  """Break up any dependency cycles involving only SymbolChangesets."""
+
+  def register_artifacts(self):
+    self._register_temp_file(config.CHANGESETS_SYMBROKEN_STORE)
+    self._register_temp_file(config.CHANGESETS_SYMBROKEN_INDEX)
+    self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
+    self._register_temp_file_needed(config.PROJECTS)
+    self._register_temp_file_needed(config.SYMBOL_DB)
+    self._register_temp_file_needed(config.CVS_FILES_DB)
+    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
+    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
+    self._register_temp_file_needed(config.CHANGESETS_REVSORTED_STORE)
+    self._register_temp_file_needed(config.CHANGESETS_REVSORTED_INDEX)
+    self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_REVBROKEN)
+
+  def get_source_changesets(self):
+    old_changeset_db = ChangesetDatabase(
+        artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_STORE),
+        artifact_manager.get_temp_file(config.CHANGESETS_REVSORTED_INDEX),
+        DB_OPEN_READ)
+
+    changeset_ids = old_changeset_db.keys()
+
+    for changeset_id in changeset_ids:
+      yield old_changeset_db[changeset_id]
+
+    old_changeset_db.close()
+
+  def break_cycle(self, cycle):
+    """Break up one or more changesets in CYCLE to help break the cycle.
+
+    CYCLE is a list of Changesets where
+
+        cycle[i] depends on cycle[i - 1]
+
+    Break up one or more changesets in CYCLE to make progress towards
+    breaking the cycle.  Update self.changeset_graph accordingly.
+
+    It is not guaranteed that the cycle will be broken by one call to
+    this routine, but at least some progress must be made."""
+
+    self.processed_changeset_logger.flush()
+    best_i = None
+    best_link = None
+    for i in range(len(cycle)):
+      # It's OK if this index wraps to -1:
+      link = ChangesetGraphLink(
+          cycle[i - 1], cycle[i], cycle[i + 1 - len(cycle)])
+
+      if best_i is None or link < best_link:
+        best_i = i
+        best_link = link
+
+    if Log().is_on(Log.DEBUG):
+      Log().debug(
+          'Breaking cycle %s by breaking node %x' % (
+          ' -> '.join(['%x' % node.id for node in (cycle + [cycle[0]])]),
+          best_link.changeset.id,))
+
+    new_changesets = best_link.break_changeset(self.changeset_key_generator)
+
+    self.changeset_graph.delete_changeset(best_link.changeset)
+
+    for changeset in new_changesets:
+      self.changeset_graph.add_new_changeset(changeset)
+
+  def run(self, run_options, stats_keeper):
+    Log().quiet("Breaking symbol changeset dependency cycles...")
+
+    Ctx()._projects = read_projects(
+        artifact_manager.get_temp_file(config.PROJECTS)
+        )
+    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
+    Ctx()._symbol_db = SymbolDatabase()
+    Ctx()._cvs_items_db = IndexedCVSItemStore(
+        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
+        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
+        DB_OPEN_READ)
+
+    shutil.copyfile(
+        artifact_manager.get_temp_file(
+            config.CVS_ITEM_TO_CHANGESET_REVBROKEN),
+        artifact_manager.get_temp_file(
+            config.CVS_ITEM_TO_CHANGESET_SYMBROKEN))
+    cvs_item_to_changeset_id = CVSItemToChangesetTable(
+        artifact_manager.get_temp_file(
+            config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
+        DB_OPEN_WRITE)
+
+    changeset_db = ChangesetDatabase(
+        artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
+        artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
+        DB_OPEN_NEW)
+
+    self.changeset_graph = ChangesetGraph(
+        changeset_db, cvs_item_to_changeset_id
+        )
+
+    max_changeset_id = 0
+    for changeset in self.get_source_changesets():
+      changeset_db.store(changeset)
+      if isinstance(changeset, SymbolChangeset):
+        self.changeset_graph.add_changeset(changeset)
+      max_changeset_id = max(max_changeset_id, changeset.id)
+
+    self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
+
+    self.processed_changeset_logger = ProcessedChangesetLogger()
+
+    # Consume the graph, breaking cycles using self.break_cycle():
+    for (changeset, time_range) in self.changeset_graph.consume_graph(
+          cycle_breaker=self.break_cycle
+          ):
+      self.processed_changeset_logger.log(changeset.id)
+
+    self.processed_changeset_logger.flush()
+    del self.processed_changeset_logger
+
+    self.changeset_graph.close()
+    self.changeset_graph = None
+    Ctx()._cvs_items_db.close()
+    Ctx()._symbol_db.close()
+    Ctx()._cvs_file_db.close()
+
+    Log().quiet("Done")
+
+
+class BreakAllChangesetCyclesPass(Pass):
+  """Break up any dependency cycles that are closed by SymbolChangesets."""
+
+  def register_artifacts(self):
+    self._register_temp_file(config.CHANGESETS_ALLBROKEN_STORE)
+    self._register_temp_file(config.CHANGESETS_ALLBROKEN_INDEX)
+    self._register_temp_file(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
+    self._register_temp_file_needed(config.PROJECTS)
+    self._register_temp_file_needed(config.SYMBOL_DB)
+    self._register_temp_file_needed(config.CVS_FILES_DB)
+    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
+    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
+    self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_STORE)
+    self._register_temp_file_needed(config.CHANGESETS_SYMBROKEN_INDEX)
+    self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_SYMBROKEN)
+
+  def get_source_changesets(self):
+    old_changeset_db = ChangesetDatabase(
+        artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_STORE),
+        artifact_manager.get_temp_file(config.CHANGESETS_SYMBROKEN_INDEX),
+        DB_OPEN_READ)
+
+    changeset_ids = old_changeset_db.keys()
+
+    for changeset_id in changeset_ids:
+      yield old_changeset_db[changeset_id]
+
+    old_changeset_db.close()
+
+  def _split_retrograde_changeset(self, changeset):
+    """CHANGESET is retrograde.  Split it into non-retrograde changesets."""
+
+    Log().debug('Breaking retrograde changeset %x' % (changeset.id,))
+
+    self.changeset_graph.delete_changeset(changeset)
+
+    # A map { cvs_branch_id : (max_pred_ordinal, min_succ_ordinal) }
+    ordinal_limits = {}
+    for cvs_branch in changeset.iter_cvs_items():
+      max_pred_ordinal = 0
+      min_succ_ordinal = sys.maxint
+
+      for pred_id in cvs_branch.get_pred_ids():
+        pred_ordinal = self.ordinals.get(
+            self.cvs_item_to_changeset_id[pred_id], 0)
+        max_pred_ordinal = max(max_pred_ordinal, pred_ordinal)
+
+      for succ_id in cvs_branch.get_succ_ids():
+        succ_ordinal = self.ordinals.get(
+            self.cvs_item_to_changeset_id[succ_id], sys.maxint)
+        min_succ_ordinal = min(min_succ_ordinal, succ_ordinal)
+
+      assert max_pred_ordinal < min_succ_ordinal
+      ordinal_limits[cvs_branch.id] = (max_pred_ordinal, min_succ_ordinal,)
+
+    # Find the earliest successor ordinal:
+    min_min_succ_ordinal = sys.maxint
+    for (max_pred_ordinal, min_succ_ordinal) in ordinal_limits.values():
+      min_min_succ_ordinal = min(min_min_succ_ordinal, min_succ_ordinal)
+
+    early_item_ids = []
+    late_item_ids = []
+    for (id, (max_pred_ordinal, min_succ_ordinal)) in ordinal_limits.items():
+      if max_pred_ordinal >= min_min_succ_ordinal:
+        late_item_ids.append(id)
+      else:
+        early_item_ids.append(id)
+
+    assert early_item_ids
+    assert late_item_ids
+
+    early_changeset = changeset.create_split_changeset(
+        self.changeset_key_generator.gen_id(), early_item_ids)
+    late_changeset = changeset.create_split_changeset(
+        self.changeset_key_generator.gen_id(), late_item_ids)
+
+    self.changeset_graph.add_new_changeset(early_changeset)
+    self.changeset_graph.add_new_changeset(late_changeset)
+
+    early_split = self._split_if_retrograde(early_changeset.id)
+
+    # Because of the way we constructed it, the early changeset should
+    # not have to be split:
+    assert not early_split
+
+    self._split_if_retrograde(late_changeset.id)
+
+  def _split_if_retrograde(self, changeset_id):
+    node = self.changeset_graph[changeset_id]
+    pred_ordinals = [
+        self.ordinals[id]
+        for id in node.pred_ids
+        if id in self.ordinals
+        ]
+    pred_ordinals.sort()
+    succ_ordinals = [
+        self.ordinals[id]
+        for id in node.succ_ids
+        if id in self.ordinals
+        ]
+    succ_ordinals.sort()
+    if pred_ordinals and succ_ordinals \
+           and pred_ordinals[-1] >= succ_ordinals[0]:
+      self._split_retrograde_changeset(self.changeset_db[node.id])
+      return True
+    else:
+      return False
+
+  def break_segment(self, segment):
+    """Break a changeset in SEGMENT[1:-1].
+
+    The range SEGMENT[1:-1] is not empty, and all of the changesets in
+    that range are SymbolChangesets."""
+
+    best_i = None
+    best_link = None
+    for i in range(1, len(segment) - 1):
+      link = ChangesetGraphLink(segment[i - 1], segment[i], segment[i + 1])
+
+      if best_i is None or link < best_link:
+        best_i = i
+        best_link = link
+
+    if Log().is_on(Log.DEBUG):
+      Log().debug(
+          'Breaking segment %s by breaking node %x' % (
+          ' -> '.join(['%x' % node.id for node in segment]),
+          best_link.changeset.id,))
+
+    new_changesets = best_link.break_changeset(self.changeset_key_generator)
+
+    self.changeset_graph.delete_changeset(best_link.changeset)
+
+    for changeset in new_changesets:
+      self.changeset_graph.add_new_changeset(changeset)
+
+  def break_cycle(self, cycle):
+    """Break up one or more SymbolChangesets in CYCLE to help break the cycle.
+
+    CYCLE is a list of SymbolChangesets where
+
+        cycle[i] depends on cycle[i - 1]
+
+    .  Break up one or more changesets in CYCLE to make progress
+    towards breaking the cycle.  Update self.changeset_graph
+    accordingly.
+
+    It is not guaranteed that the cycle will be broken by one call to
+    this routine, but at least some progress must be made."""
+
+    if Log().is_on(Log.DEBUG):
+      Log().debug(
+          'Breaking cycle %s' % (
+          ' -> '.join(['%x' % changeset.id
+                       for changeset in cycle + [cycle[0]]]),))
+
+    # Unwrap the cycle into a segment then break the segment:
+    self.break_segment([cycle[-1]] + cycle + [cycle[0]])
+
+  def run(self, run_options, stats_keeper):
+    Log().quiet("Breaking CVSSymbol dependency loops...")
+
+    Ctx()._projects = read_projects(
+        artifact_manager.get_temp_file(config.PROJECTS)
+        )
+    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
+    Ctx()._symbol_db = SymbolDatabase()
+    Ctx()._cvs_items_db = IndexedCVSItemStore(
+        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
+        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
+        DB_OPEN_READ)
+
+    shutil.copyfile(
+        artifact_manager.get_temp_file(
+            config.CVS_ITEM_TO_CHANGESET_SYMBROKEN),
+        artifact_manager.get_temp_file(
+            config.CVS_ITEM_TO_CHANGESET_ALLBROKEN))
+    self.cvs_item_to_changeset_id = CVSItemToChangesetTable(
+        artifact_manager.get_temp_file(
+            config.CVS_ITEM_TO_CHANGESET_ALLBROKEN),
+        DB_OPEN_WRITE)
+
+    self.changeset_db = ChangesetDatabase(
+        artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
+        artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
+        DB_OPEN_NEW)
+
+    self.changeset_graph = ChangesetGraph(
+        self.changeset_db, self.cvs_item_to_changeset_id
+        )
+
+    # A map {changeset_id : ordinal} for OrderedChangesets:
+    self.ordinals = {}
+    # A map {ordinal : changeset_id}:
+    ordered_changeset_map = {}
+    # A list of all BranchChangeset ids:
+    branch_changeset_ids = []
+    max_changeset_id = 0
+    for changeset in self.get_source_changesets():
+      self.changeset_db.store(changeset)
+      self.changeset_graph.add_changeset(changeset)
+      if isinstance(changeset, OrderedChangeset):
+        ordered_changeset_map[changeset.ordinal] = changeset.id
+        self.ordinals[changeset.id] = changeset.ordinal
+      elif isinstance(changeset, BranchChangeset):
+        branch_changeset_ids.append(changeset.id)
+      max_changeset_id = max(max_changeset_id, changeset.id)
+
+    # An array of ordered_changeset ids, indexed by ordinal:
+    ordered_changesets = []
+    for ordinal in range(len(ordered_changeset_map)):
+      id = ordered_changeset_map[ordinal]
+      ordered_changesets.append(id)
+
+    ordered_changeset_ids = set(ordered_changeset_map.values())
+    del ordered_changeset_map
+
+    self.changeset_key_generator = KeyGenerator(max_changeset_id + 1)
+
+    # First we scan through all BranchChangesets looking for
+    # changesets that are individually "retrograde" and splitting
+    # those up:
+    for changeset_id in branch_changeset_ids:
+      self._split_if_retrograde(changeset_id)
+
+    del self.ordinals
+
+    next_ordered_changeset = 0
+
+    self.processed_changeset_logger = ProcessedChangesetLogger()
+
+    while self.changeset_graph:
+      # Consume any nodes that don't have predecessors:
+      for (changeset, time_range) \
+              in self.changeset_graph.consume_nopred_nodes():
+        self.processed_changeset_logger.log(changeset.id)
+        if changeset.id in ordered_changeset_ids:
+          next_ordered_changeset += 1
+          ordered_changeset_ids.remove(changeset.id)
+
+      self.processed_changeset_logger.flush()
+
+      if not self.changeset_graph:
+        break
+
+      # Now work on the next ordered changeset that has not yet been
+      # processed.  BreakSymbolChangesetCyclesPass has broken any
+      # cycles involving only SymbolChangesets, so the presence of a
+      # cycle implies that there is at least one ordered changeset
+      # left in the graph:
+      assert next_ordered_changeset < len(ordered_changesets)
+
+      id = ordered_changesets[next_ordered_changeset]
+      path = self.changeset_graph.search_for_path(id, ordered_changeset_ids)
+      if path:
+        if Log().is_on(Log.DEBUG):
+          Log().debug('Breaking path from %s to %s' % (path[0], path[-1],))
+        self.break_segment(path)
+      else:
+        # There were no ordered changesets among the reachable
+        # predecessors, so do generic cycle-breaking:
+        if Log().is_on(Log.DEBUG):
+          Log().debug(
+              'Breaking generic cycle found from %s'
+              % (self.changeset_db[id],)
+              )
+        self.break_cycle(self.changeset_graph.find_cycle(id))
+
+    del self.processed_changeset_logger
+    self.changeset_graph.close()
+    self.changeset_graph = None
+    self.cvs_item_to_changeset_id = None
+    self.changeset_db = None
+
+    Log().quiet("Done")
+
+
+class TopologicalSortPass(Pass):
+  """Sort changesets into commit order."""
+
+  def register_artifacts(self):
+    self._register_temp_file(config.CHANGESETS_SORTED_DATAFILE)
+    self._register_temp_file_needed(config.PROJECTS)
+    self._register_temp_file_needed(config.SYMBOL_DB)
+    self._register_temp_file_needed(config.CVS_FILES_DB)
+    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
+    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
+    self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
+    self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
+    self._register_temp_file_needed(config.CVS_ITEM_TO_CHANGESET_ALLBROKEN)
+
+  def get_source_changesets(self, changeset_db):
+    for changeset_id in changeset_db.keys():
+      yield changeset_db[changeset_id]
+
+  def get_changesets(self):
+    """Generate (changeset, timestamp) pairs in commit order."""
+
+    changeset_db = ChangesetDatabase(
+        artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
+        artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
+        DB_OPEN_READ)
+
+    changeset_graph = ChangesetGraph(
+        changeset_db,
+        CVSItemToChangesetTable(
+            artifact_manager.get_temp_file(
+                config.CVS_ITEM_TO_CHANGESET_ALLBROKEN
+                ),
+            DB_OPEN_READ,
+            ),
+        )
+    symbol_changeset_ids = set()
+
+    for changeset in self.get_source_changesets(changeset_db):
+      changeset_graph.add_changeset(changeset)
+      if isinstance(changeset, SymbolChangeset):
+        symbol_changeset_ids.add(changeset.id)
+
+    # Ensure a monotonically-increasing timestamp series by keeping
+    # track of the previous timestamp and ensuring that the following
+    # one is larger.
+    timestamper = Timestamper()
+
+    for (changeset, time_range) in changeset_graph.consume_graph():
+      timestamp = timestamper.get(
+          time_range.t_max, changeset.id in symbol_changeset_ids
+          )
+      yield (changeset, timestamp)
+
+    changeset_graph.close()
+
+  def run(self, run_options, stats_keeper):
+    Log().quiet("Generating CVSRevisions in commit order...")
+
+    Ctx()._projects = read_projects(
+        artifact_manager.get_temp_file(config.PROJECTS)
+        )
+    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
+    Ctx()._symbol_db = SymbolDatabase()
+    Ctx()._cvs_items_db = IndexedCVSItemStore(
+        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
+        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
+        DB_OPEN_READ)
+
+    sorted_changesets = open(
+        artifact_manager.get_temp_file(config.CHANGESETS_SORTED_DATAFILE),
+        'w')
+
+    for (changeset, timestamp) in self.get_changesets():
+      sorted_changesets.write('%x %08x\n' % (changeset.id, timestamp,))
+
+    sorted_changesets.close()
+
+    Ctx()._cvs_items_db.close()
+    Ctx()._symbol_db.close()
+    Ctx()._cvs_file_db.close()
+
+    Log().quiet("Done")
+
+
+class CreateRevsPass(Pass):
+  """Generate the SVNCommit <-> CVSRevision mapping databases.
+
+  SVNCommitCreator also calls SymbolingsLogger to register
+  CVSRevisions that represent an opening or closing for a path on a
+  branch or tag.  See SymbolingsLogger for more details.
+
+  This pass was formerly known as pass5."""
+
+  def register_artifacts(self):
+    self._register_temp_file(config.SVN_COMMITS_INDEX_TABLE)
+    self._register_temp_file(config.SVN_COMMITS_STORE)
+    self._register_temp_file(config.CVS_REVS_TO_SVN_REVNUMS)
+    self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS)
+    self._register_temp_file_needed(config.PROJECTS)
+    self._register_temp_file_needed(config.CVS_FILES_DB)
+    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
+    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
+    self._register_temp_file_needed(config.SYMBOL_DB)
+    self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_STORE)
+    self._register_temp_file_needed(config.CHANGESETS_ALLBROKEN_INDEX)
+    self._register_temp_file_needed(config.CHANGESETS_SORTED_DATAFILE)
+
+  def get_changesets(self):
+    """Generate (changeset,timestamp,) tuples in commit order."""
+
+    changeset_db = ChangesetDatabase(
+        artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_STORE),
+        artifact_manager.get_temp_file(config.CHANGESETS_ALLBROKEN_INDEX),
+        DB_OPEN_READ)
+
+    for line in file(
+            artifact_manager.get_temp_file(
+                config.CHANGESETS_SORTED_DATAFILE)):
+      [changeset_id, timestamp] = [int(s, 16) for s in line.strip().split()]
+      yield (changeset_db[changeset_id], timestamp)
+
+    changeset_db.close()
+
+  def get_svn_commits(self, creator):
+    """Generate the SVNCommits, in order."""
+
+    for (changeset, timestamp) in self.get_changesets():
+      for svn_commit in creator.process_changeset(changeset, timestamp):
+        yield svn_commit
+
+  def log_svn_commit(self, svn_commit):
+    """Output information about SVN_COMMIT."""
+
+    Log().normal(
+        'Creating Subversion r%d (%s)'
+        % (svn_commit.revnum, svn_commit.get_description(),)
+        )
+
+    if isinstance(svn_commit, SVNRevisionCommit):
+      for cvs_rev in svn_commit.cvs_revs:
+        Log().verbose(' %s %s' % (cvs_rev.cvs_path, cvs_rev.rev,))
+
+  def run(self, run_options, stats_keeper):
+    Log().quiet("Mapping CVS revisions to Subversion commits...")
+
+    Ctx()._projects = read_projects(
+        artifact_manager.get_temp_file(config.PROJECTS)
+        )
+    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
+    Ctx()._symbol_db = SymbolDatabase()
+    Ctx()._cvs_items_db = IndexedCVSItemStore(
+        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
+        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
+        DB_OPEN_READ)
+
+    Ctx()._symbolings_logger = SymbolingsLogger()
+
+    persistence_manager = PersistenceManager(DB_OPEN_NEW)
+
+    creator = SVNCommitCreator()
+    for svn_commit in self.get_svn_commits(creator):
+      self.log_svn_commit(svn_commit)
+      persistence_manager.put_svn_commit(svn_commit)
+
+    stats_keeper.set_svn_rev_count(creator.revnum_generator.get_last_id())
+    del creator
+
+    persistence_manager.close()
+    Ctx()._symbolings_logger.close()
+    Ctx()._cvs_items_db.close()
+    Ctx()._symbol_db.close()
+    Ctx()._cvs_file_db.close()
+
+    Log().quiet("Done")
+
+
+class SortSymbolsPass(Pass):
+  """This pass was formerly known as pass6."""
+
+  def register_artifacts(self):
+    self._register_temp_file(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
+    self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS)
+
+  def run(self, run_options, stats_keeper):
+    Log().quiet("Sorting symbolic name source revisions...")
+
+    sort_file(
+        artifact_manager.get_temp_file(config.SYMBOL_OPENINGS_CLOSINGS),
+        artifact_manager.get_temp_file(
+            config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
+        options=['-k', '1,1', '-k', '2,2n', '-k', '3'],
+        )
+    Log().quiet("Done")
+
+
+class IndexSymbolsPass(Pass):
+  """This pass was formerly known as pass7."""
+
+  def register_artifacts(self):
+    self._register_temp_file(config.SYMBOL_OFFSETS_DB)
+    self._register_temp_file_needed(config.PROJECTS)
+    self._register_temp_file_needed(config.SYMBOL_DB)
+    self._register_temp_file_needed(config.SYMBOL_OPENINGS_CLOSINGS_SORTED)
+
+  def generate_offsets_for_symbolings(self):
+    """This function iterates through all the lines in
+    SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
+    SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
+    where SYMBOLIC_NAME is first encountered.  This will allow us to
+    seek to the various offsets in the file and sequentially read only
+    the openings and closings that we need."""
+
+    offsets = {}
+
+    f = open(
+        artifact_manager.get_temp_file(
+            config.SYMBOL_OPENINGS_CLOSINGS_SORTED),
+        'r')
+    old_id = None
+    while True:
+      fpos = f.tell()
+      line = f.readline()
+      if not line:
+        break
+      id, svn_revnum, ignored = line.split(" ", 2)
+      id = int(id, 16)
+      if id != old_id:
+        Log().verbose(' ', Ctx()._symbol_db.get_symbol(id).name)
+        old_id = id
+        offsets[id] = fpos
+
+    f.close()
+
+    offsets_db = file(
+        artifact_manager.get_temp_file(config.SYMBOL_OFFSETS_DB), 'wb')
+    cPickle.dump(offsets, offsets_db, -1)
+    offsets_db.close()
+
+  def run(self, run_options, stats_keeper):
+    Log().quiet("Determining offsets for all symbolic names...")
+    Ctx()._projects = read_projects(
+        artifact_manager.get_temp_file(config.PROJECTS)
+        )
+    Ctx()._symbol_db = SymbolDatabase()
+    self.generate_offsets_for_symbolings()
+    Ctx()._symbol_db.close()
+    Log().quiet("Done.")
+
+
+class OutputPass(Pass):
+  """This pass was formerly known as pass8."""
+
+  def register_artifacts(self):
+    self._register_temp_file_needed(config.PROJECTS)
+    self._register_temp_file_needed(config.CVS_FILES_DB)
+    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_STORE)
+    self._register_temp_file_needed(config.CVS_ITEMS_SORTED_INDEX_TABLE)
+    self._register_temp_file_needed(config.SYMBOL_DB)
+    self._register_temp_file_needed(config.METADATA_CLEAN_INDEX_TABLE)
+    self._register_temp_file_needed(config.METADATA_CLEAN_STORE)
+    self._register_temp_file_needed(config.SVN_COMMITS_INDEX_TABLE)
+    self._register_temp_file_needed(config.SVN_COMMITS_STORE)
+    self._register_temp_file_needed(config.CVS_REVS_TO_SVN_REVNUMS)
+    Ctx().output_option.register_artifacts(self)
+
+  def get_svn_commits(self):
+    """Generate the SVNCommits in commit order."""
+
+    persistence_manager = PersistenceManager(DB_OPEN_READ)
+
+    svn_revnum = 1 # The first non-trivial commit
+
+    # Peek at the first revision to find the date to use to initialize
+    # the repository:
+    svn_commit = persistence_manager.get_svn_commit(svn_revnum)
+
+    while svn_commit:
+      yield svn_commit
+      svn_revnum += 1
+      svn_commit = persistence_manager.get_svn_commit(svn_revnum)
+
+    persistence_manager.close()
+
+  def run(self, run_options, stats_keeper):
+    Ctx()._projects = read_projects(
+        artifact_manager.get_temp_file(config.PROJECTS)
+        )
+    Ctx()._cvs_file_db = CVSFileDatabase(DB_OPEN_READ)
+    Ctx()._metadata_db = MetadataDatabase(
+        artifact_manager.get_temp_file(config.METADATA_CLEAN_STORE),
+        artifact_manager.get_temp_file(config.METADATA_CLEAN_INDEX_TABLE),
+        DB_OPEN_READ,
+        )
+    Ctx()._cvs_items_db = IndexedCVSItemStore(
+        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_STORE),
+        artifact_manager.get_temp_file(config.CVS_ITEMS_SORTED_INDEX_TABLE),
+        DB_OPEN_READ)
+    Ctx()._symbol_db = SymbolDatabase()
+
+    Ctx().output_option.setup(stats_keeper.svn_rev_count())
+
+    for svn_commit in self.get_svn_commits():
+      svn_commit.output(Ctx().output_option)
+
+    Ctx().output_option.cleanup()
+
+    Ctx()._symbol_db.close()
+    Ctx()._cvs_items_db.close()
+    Ctx()._metadata_db.close()
+    Ctx()._cvs_file_db.close()
+
+
+# The list of passes constituting a run of cvs2svn:
+passes = [
+    CollectRevsPass(),
+    CleanMetadataPass(),
+    CollateSymbolsPass(),
+    #CheckItemStoreDependenciesPass(config.CVS_ITEMS_STORE),
+    FilterSymbolsPass(),
+    SortRevisionSummaryPass(),
+    SortSymbolSummaryPass(),
+    InitializeChangesetsPass(),
+    #CheckIndexedItemStoreDependenciesPass(
+    #    config.CVS_ITEMS_SORTED_STORE,
+    #    config.CVS_ITEMS_SORTED_INDEX_TABLE),
+    BreakRevisionChangesetCyclesPass(),
+    RevisionTopologicalSortPass(),
+    BreakSymbolChangesetCyclesPass(),
+    BreakAllChangesetCyclesPass(),
+    TopologicalSortPass(),
+    CreateRevsPass(),
+    SortSymbolsPass(),
+    IndexSymbolsPass(),
+    OutputPass(),
+    ]
+
+
diff --git a/cvs2svn_lib/persistence_manager.py b/cvs2svn_lib/persistence_manager.py
new file mode 100644
index 0000000..8a622ab
--- /dev/null
+++ b/cvs2svn_lib/persistence_manager.py
@@ -0,0 +1,106 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains class PersistenceManager."""
+
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import DB_OPEN_NEW
+from cvs2svn_lib.common import DB_OPEN_READ
+from cvs2svn_lib.common import SVN_INVALID_REVNUM
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.record_table import SignedIntegerPacker
+from cvs2svn_lib.record_table import RecordTable
+from cvs2svn_lib.serializer import PrimedPickleSerializer
+from cvs2svn_lib.database import IndexedDatabase
+from cvs2svn_lib.svn_commit import SVNRevisionCommit
+from cvs2svn_lib.svn_commit import SVNInitialProjectCommit
+from cvs2svn_lib.svn_commit import SVNPrimaryCommit
+from cvs2svn_lib.svn_commit import SVNBranchCommit
+from cvs2svn_lib.svn_commit import SVNTagCommit
+from cvs2svn_lib.svn_commit import SVNPostCommit
+
+
+class PersistenceManager:
+  """The PersistenceManager allows us to effectively store SVNCommits
+  to disk and retrieve them later using only their subversion revision
+  number as the key.  It also returns the subversion revision number
+  for a given CVSRevision's unique key.
+
+  All information pertinent to each SVNCommit is stored in a series of
+  on-disk databases so that SVNCommits can be retrieved on-demand.
+
+  MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
+  In 'new' mode, PersistenceManager will initialize a new set of on-disk
+  databases and be fully-featured.
+  In 'read' mode, PersistenceManager will open existing on-disk databases
+  and the set_* methods will be unavailable."""
+
+  def __init__(self, mode):
+    self.mode = mode
+    if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
+      raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
+    primer = (
+        SVNInitialProjectCommit,
+        SVNPrimaryCommit,
+        SVNPostCommit,
+        SVNBranchCommit,
+        SVNTagCommit,
+        )
+    serializer = PrimedPickleSerializer(primer)
+    self.svn_commit_db = IndexedDatabase(
+        artifact_manager.get_temp_file(config.SVN_COMMITS_INDEX_TABLE),
+        artifact_manager.get_temp_file(config.SVN_COMMITS_STORE),
+        mode, serializer)
+    self.cvs2svn_db = RecordTable(
+        artifact_manager.get_temp_file(config.CVS_REVS_TO_SVN_REVNUMS),
+        mode, SignedIntegerPacker(SVN_INVALID_REVNUM))
+
+  def get_svn_revnum(self, cvs_rev_id):
+    """Return the Subversion revision number in which CVS_REV_ID was
+    committed, or SVN_INVALID_REVNUM if there is no mapping for
+    CVS_REV_ID."""
+
+    return self.cvs2svn_db.get(cvs_rev_id, SVN_INVALID_REVNUM)
+
+  def get_svn_commit(self, svn_revnum):
+    """Return an SVNCommit that corresponds to SVN_REVNUM.
+
+    If no SVNCommit exists for revnum SVN_REVNUM, then return None."""
+
+    return self.svn_commit_db.get(svn_revnum, None)
+
+  def put_svn_commit(self, svn_commit):
+    """Record the bidirectional mapping between SVN_REVNUM and
+    CVS_REVS and record associated attributes."""
+
+    if self.mode == DB_OPEN_READ:
+      raise RuntimeError, \
+          'Write operation attempted on read-only PersistenceManager'
+
+    self.svn_commit_db[svn_commit.revnum] = svn_commit
+
+    if isinstance(svn_commit, SVNRevisionCommit):
+      for cvs_rev in svn_commit.cvs_revs:
+        self.cvs2svn_db[cvs_rev.id] = svn_commit.revnum
+
+  def close(self):
+    self.cvs2svn_db.close()
+    self.cvs2svn_db = None
+    self.svn_commit_db.close()
+    self.svn_commit_db = None
+
+
diff --git a/cvs2svn_lib/process.py b/cvs2svn_lib/process.py
new file mode 100644
index 0000000..56469ce
--- /dev/null
+++ b/cvs2svn_lib/process.py
@@ -0,0 +1,116 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains generic utilities used by cvs2svn."""
+
+
+import subprocess
+
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import CommandError
+
+
+def call_command(command, **kw):
+  """Call the specified command, checking that it exits successfully.
+
+  Raise a FatalError if the command cannot be executed, or if it exits
+  with a non-zero exit code.  Pass KW as keyword arguments to
+  subprocess.call()."""
+
+  try:
+    retcode = subprocess.call(command, **kw)
+    if retcode < 0:
+      raise FatalError(
+          'Command terminated by signal %d: "%s"'
+          % (-retcode, ' '.join(command),)
+          )
+    elif retcode > 0:
+      raise FatalError(
+          'Command failed with return code %d: "%s"'
+          % (retcode, ' '.join(command),)
+          )
+  except OSError, e:
+    raise FatalError(
+        'Command execution failed (%s): "%s"'
+        % (e, ' '.join(command),)
+        )
+
+
+class CommandFailedException(Exception):
+  """Exception raised if check_command_runs() fails."""
+
+  pass
+
+
+def check_command_runs(cmd, cmdname):
+  """Check whether the command CMD can be executed without errors.
+
+  CMD is a list or string, as accepted by subprocess.Popen().  CMDNAME
+  is the name of the command as it should be included in exception
+  error messages.
+
+  This function checks three things: (1) the command can be run
+  without throwing an OSError; (2) it exits with status=0; (3) it
+  doesn't output anything to stderr.  If any of these conditions is
+  not met, raise a CommandFailedException describing the problem."""
+
+  try:
+    pipe = subprocess.Popen(
+        cmd,
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        )
+  except OSError, e:
+    raise CommandFailedException('error executing %s: %s' % (cmdname, e,))
+  pipe.stdin.close()
+  pipe.stdout.read()
+  errmsg = pipe.stderr.read()
+  status = pipe.wait()
+  if status or errmsg:
+    msg = 'error executing %s: status %s' % (cmdname, status,)
+    if errmsg:
+      msg += ', error output:\n%s' % (errmsg,)
+    raise CommandFailedException(msg)
+
+
+class PipeStream(object):
+  """A file-like object from which revision contents can be read."""
+
+  def __init__(self, pipe_command):
+    self._pipe_command_str = ' '.join(pipe_command)
+    self.pipe = subprocess.Popen(
+        pipe_command,
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        )
+    self.pipe.stdin.close()
+
+  def read(self, size=None):
+    if size is None:
+      return self.pipe.stdout.read()
+    else:
+      return self.pipe.stdout.read(size)
+
+  def close(self):
+    self.pipe.stdout.close()
+    error_output = self.pipe.stderr.read()
+    exit_status = self.pipe.wait()
+    if exit_status:
+      raise CommandError(self._pipe_command_str, exit_status, error_output)
+
+
diff --git a/cvs2svn_lib/project.py b/cvs2svn_lib/project.py
new file mode 100644
index 0000000..0fe92df
--- /dev/null
+++ b/cvs2svn_lib/project.py
@@ -0,0 +1,219 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains database facilities used by cvs2svn."""
+
+
+import re
+import os
+import cPickle
+
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import IllegalSVNPathError
+from cvs2svn_lib.common import normalize_svn_path
+from cvs2svn_lib.common import verify_paths_disjoint
+from cvs2svn_lib.symbol_transform import CompoundSymbolTransform
+
+
+class FileInAndOutOfAtticException(Exception):
+  def __init__(self, non_attic_path, attic_path):
+    Exception.__init__(
+        self,
+        "A CVS repository cannot contain both %s and %s"
+        % (non_attic_path, attic_path))
+
+    self.non_attic_path = non_attic_path
+    self.attic_path = attic_path
+
+
+def normalize_ttb_path(opt, path, allow_empty=False):
+  try:
+    return normalize_svn_path(path, allow_empty)
+  except IllegalSVNPathError, e:
+    raise FatalError('Problem with %s: %s' % (opt, e,))
+
+
+class Project(object):
+  """A project within a CVS repository."""
+
+  def __init__(
+        self, id, project_cvs_repos_path,
+        initial_directories=[],
+        symbol_transforms=None,
+        ):
+    """Create a new Project record.
+
+    ID is a unique id for this project.  PROJECT_CVS_REPOS_PATH is the
+    main CVS directory for this project (within the filesystem).
+
+    INITIAL_DIRECTORIES is an iterable of all SVN directories that
+    should be created when the project is first created.  Normally,
+    this should include the trunk, branches, and tags directory.
+
+    SYMBOL_TRANSFORMS is an iterable of SymbolTransform instances
+    which will be used to transform any symbol names within this
+    project."""
+
+    self.id = id
+
+    self.project_cvs_repos_path = os.path.normpath(project_cvs_repos_path)
+    if not os.path.isdir(self.project_cvs_repos_path):
+      raise FatalError("The specified CVS repository path '%s' is not an "
+                       "existing directory." % self.project_cvs_repos_path)
+
+    self.cvs_repository_root, self.cvs_module = \
+        self.determine_repository_root(
+            os.path.abspath(self.project_cvs_repos_path))
+
+    # A regexp matching project_cvs_repos_path plus an optional separator:
+    self.project_prefix_re = re.compile(
+        r'^' + re.escape(self.project_cvs_repos_path)
+        + r'(' + re.escape(os.sep) + r'|$)')
+
+    # The SVN directories to add when the project is first created:
+    self._initial_directories = []
+
+    for path in initial_directories:
+      try:
+        path = normalize_svn_path(path, False)
+      except IllegalSVNPathError, e:
+        raise FatalError(
+            'Initial directory %r is not a legal SVN path: %s'
+            % (path, e,)
+            )
+      self._initial_directories.append(path)
+
+    verify_paths_disjoint(*self._initial_directories)
+
+    # A list of transformation rules (regexp, replacement) applied to
+    # symbol names in this project.
+    if symbol_transforms is None:
+      symbol_transforms = []
+
+    self.symbol_transform = CompoundSymbolTransform(symbol_transforms)
+
+    # The ID of the Trunk instance for this Project.  This member is
+    # filled in during CollectRevsPass.
+    self.trunk_id = None
+
+    # The ID of the CVSDirectory representing the root directory of
+    # this project.  This member is filled in during CollectRevsPass.
+    self.root_cvs_directory_id = None
+
+  def __eq__(self, other):
+    return self.id == other.id
+
+  def __cmp__(self, other):
+    return cmp(self.cvs_module, other.cvs_module) \
+           or cmp(self.id, other.id)
+
+  def __hash__(self):
+    return self.id
+
+  @staticmethod
+  def determine_repository_root(path):
+    """Ascend above the specified PATH if necessary to find the
+    cvs_repository_root (a directory containing a CVSROOT directory)
+    and the cvs_module (the path of the conversion root within the cvs
+    repository).  Return the root path and the module path of this
+    project relative to the root.
+
+    NB: cvs_module must be seperated by '/', *not* by os.sep."""
+
+    def is_cvs_repository_root(path):
+      return os.path.isdir(os.path.join(path, 'CVSROOT'))
+
+    original_path = path
+    cvs_module = ''
+    while not is_cvs_repository_root(path):
+      # Step up one directory:
+      prev_path = path
+      path, module_component = os.path.split(path)
+      if path == prev_path:
+        # Hit the root (of the drive, on Windows) without finding a
+        # CVSROOT dir.
+        raise FatalError(
+            "the path '%s' is not a CVS repository, nor a path "
+            "within a CVS repository.  A CVS repository contains "
+            "a CVSROOT directory within its root directory."
+            % (original_path,))
+
+      cvs_module = module_component + "/" + cvs_module
+
+    return path, cvs_module
+
+  def transform_symbol(self, cvs_file, symbol_name, revision):
+    """Transform the symbol SYMBOL_NAME.
+
+    SYMBOL_NAME refers to revision number REVISION in CVS_FILE.
+    REVISION is the CVS revision number as a string, with zeros
+    removed (e.g., '1.7' or '1.7.2').  Use the renaming rules
+    specified with --symbol-transform to possibly rename the symbol.
+    Return the transformed symbol name, the original name if it should
+    not be transformed, or None if the symbol should be omitted from
+    the conversion."""
+
+    return self.symbol_transform.transform(cvs_file, symbol_name, revision)
+
+  def get_trunk(self):
+    """Return the Trunk instance for this project.
+
+    This method can only be called after self.trunk_id has been
+    initialized in CollectRevsPass."""
+
+    return Ctx()._symbol_db.get_symbol(self.trunk_id)
+
+  def get_root_cvs_directory(self):
+    """Return the root CVSDirectory instance for this project.
+
+    This method can only be called after self.root_cvs_directory_id
+    has been initialized in CollectRevsPass."""
+
+    return Ctx()._cvs_file_db.get_file(self.root_cvs_directory_id)
+
+  def get_initial_directories(self):
+    """Generate the project's initial SVN directories.
+
+    Yield as strings the SVN paths of directories that should be
+    created when the project is first created."""
+
+    # Yield the path of the Trunk symbol for this project (which might
+    # differ from the one passed to the --trunk option because of
+    # SymbolStrategyRules).  The trunk path might be '' during a
+    # trunk-only conversion, but that is OK because DumpfileDelegate
+    # considers that directory to exist already and will therefore
+    # ignore it:
+    yield self.get_trunk().base_path
+
+    for path in self._initial_directories:
+      yield path
+
+  def __str__(self):
+    return self.project_cvs_repos_path
+
+
+def read_projects(filename):
+  retval = {}
+  for project in cPickle.load(open(filename, 'rb')):
+    retval[project.id] = project
+  return retval
+
+
+def write_projects(filename):
+  cPickle.dump(Ctx()._projects.values(), open(filename, 'wb'), -1)
+
+
diff --git a/cvs2svn_lib/property_setters.py b/cvs2svn_lib/property_setters.py
new file mode 100644
index 0000000..7cf379e
--- /dev/null
+++ b/cvs2svn_lib/property_setters.py
@@ -0,0 +1,385 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes to set Subversion properties on files."""
+
+
+import os
+import re
+import fnmatch
+import ConfigParser
+from cStringIO import StringIO
+
+from cvs2svn_lib.common import warning_prefix
+from cvs2svn_lib.log import Log
+
+
+class SVNPropertySetter:
+  """Abstract class for objects that can set properties on a SVNCommitItem."""
+
+  def set_properties(self, s_item):
+    """Set any properties that can be determined for S_ITEM.
+
+    S_ITEM is an instance of SVNCommitItem.  This method should modify
+    S_ITEM.svn_props in place."""
+
+    raise NotImplementedError
+
+
+class CVSRevisionNumberSetter(SVNPropertySetter):
+  """Set the cvs2svn:cvs-rev property to the CVS revision number."""
+
+  propname = 'cvs2svn:cvs-rev'
+
+  def set_properties(self, s_item):
+    if self.propname in s_item.svn_props:
+      return
+
+    s_item.svn_props[self.propname] = s_item.cvs_rev.rev
+    s_item.svn_props_changed = True
+
+
+class ExecutablePropertySetter(SVNPropertySetter):
+  """Set the svn:executable property based on cvs_rev.cvs_file.executable."""
+
+  propname = 'svn:executable'
+
+  def set_properties(self, s_item):
+    if self.propname in s_item.svn_props:
+      return
+
+    if s_item.cvs_rev.cvs_file.executable:
+      s_item.svn_props[self.propname] = '*'
+
+
+class CVSBinaryFileEOLStyleSetter(SVNPropertySetter):
+  """Set the eol-style to None for files with CVS mode '-kb'."""
+
+  propname = 'svn:eol-style'
+
+  def set_properties(self, s_item):
+    if self.propname in s_item.svn_props:
+      return
+
+    if s_item.cvs_rev.cvs_file.mode == 'b':
+      s_item.svn_props[self.propname] = None
+
+
+class MimeMapper(SVNPropertySetter):
+  """A class that provides mappings from file names to MIME types."""
+
+  propname = 'svn:mime-type'
+
+  def __init__(self, mime_types_file):
+    self.mappings = { }
+
+    for line in file(mime_types_file):
+      if line.startswith("#"):
+        continue
+
+      # format of a line is something like
+      # text/plain c h cpp
+      extensions = line.split()
+      if len(extensions) < 2:
+        continue
+      type = extensions.pop(0)
+      for ext in extensions:
+        if ext in self.mappings and self.mappings[ext] != type:
+          Log().error(
+              "%s: ambiguous MIME mapping for *.%s (%s or %s)\n"
+              % (warning_prefix, ext, self.mappings[ext], type)
+              )
+        self.mappings[ext] = type
+
+  def set_properties(self, s_item):
+    if self.propname in s_item.svn_props:
+      return
+
+    basename, extension = os.path.splitext(s_item.cvs_rev.cvs_file.basename)
+
+    # Extension includes the dot, so strip it (will leave extension
+    # empty if filename ends with a dot, which is ok):
+    extension = extension[1:]
+
+    # If there is no extension (or the file ends with a period), use
+    # the base name for mapping.  This allows us to set mappings for
+    # files such as README or Makefile:
+    if not extension:
+      extension = basename
+
+    mime_type = self.mappings.get(extension, None)
+    if mime_type is not None:
+      s_item.svn_props[self.propname] = mime_type
+
+
+class AutoPropsPropertySetter(SVNPropertySetter):
+  """Set arbitrary svn properties based on an auto-props configuration.
+
+  This class supports case-sensitive or case-insensitive pattern
+  matching.  The command-line default is case-insensitive behavior,
+  consistent with Subversion (see
+  http://subversion.tigris.org/issues/show_bug.cgi?id=2036).
+
+  As a special extension to Subversion's auto-props handling, if a
+  property name is preceded by a '!' then that property is forced to
+  be left unset.
+
+  If a property specified in auto-props has already been set to a
+  different value, print a warning and leave the old property value
+  unchanged.
+
+  Python's treatment of whitespaces in the ConfigParser module is
+  buggy and inconsistent.  Usually spaces are preserved, but if there
+  is at least one semicolon in the value, and the *first* semicolon is
+  preceded by a space, then that is treated as the start of a comment
+  and the rest of the line is silently discarded."""
+
+  property_name_pattern = r'(?P<name>[^\!\=\s]+)'
+  property_unset_re = re.compile(
+      r'^\!\s*' + property_name_pattern + r'$'
+      )
+  property_set_re = re.compile(
+      r'^' + property_name_pattern + r'\s*\=\s*(?P<value>.*)$'
+      )
+  property_novalue_re = re.compile(
+      r'^' + property_name_pattern + r'$'
+      )
+
+  quoted_re = re.compile(
+      r'^([\'\"]).*\1$'
+      )
+  comment_re = re.compile(r'\s;')
+
+  class Pattern:
+    """Describes the properties to be set for files matching a pattern."""
+
+    def __init__(self, pattern, propdict):
+      # A glob-like pattern:
+      self.pattern = pattern
+      # A dictionary of properties that should be set:
+      self.propdict = propdict
+
+    def match(self, basename):
+      """Does the file with the specified basename match pattern?"""
+
+      return fnmatch.fnmatch(basename, self.pattern)
+
+  def __init__(self, configfilename, ignore_case=True):
+    config = ConfigParser.ConfigParser()
+    if ignore_case:
+      self.transform_case = self.squash_case
+    else:
+      config.optionxform = self.preserve_case
+      self.transform_case = self.preserve_case
+
+    configtext = open(configfilename).read()
+    if self.comment_re.search(configtext):
+      Log().warn(
+          '%s: Please be aware that a space followed by a\n'
+          'semicolon is sometimes treated as a comment in configuration\n'
+          'files.  This pattern was seen in\n'
+          '    %s\n'
+          'Please make sure that you have not inadvertently commented\n'
+          'out part of an important line.'
+          % (warning_prefix, configfilename,)
+          )
+
+    config.readfp(StringIO(configtext), configfilename)
+    self.patterns = []
+    sections = config.sections()
+    sections.sort()
+    for section in sections:
+      if self.transform_case(section) == 'auto-props':
+        patterns = config.options(section)
+        patterns.sort()
+        for pattern in patterns:
+          value = config.get(section, pattern)
+          if value:
+            self._add_pattern(pattern, value)
+
+  def squash_case(self, s):
+    return s.lower()
+
+  def preserve_case(self, s):
+    return s
+
+  def _add_pattern(self, pattern, props):
+    propdict = {}
+    if self.quoted_re.match(pattern):
+      Log().warn(
+          '%s: Quoting is not supported in auto-props; please verify rule\n'
+          'for %r.  (Using pattern including quotation marks.)\n'
+          % (warning_prefix, pattern,)
+          )
+    for prop in props.split(';'):
+      prop = prop.strip()
+      m = self.property_unset_re.match(prop)
+      if m:
+        name = m.group('name')
+        Log().debug(
+            'auto-props: For %r, leaving %r unset.' % (pattern, name,)
+            )
+        propdict[name] = None
+        continue
+
+      m = self.property_set_re.match(prop)
+      if m:
+        name = m.group('name')
+        value = m.group('value')
+        if self.quoted_re.match(value):
+          Log().warn(
+              '%s: Quoting is not supported in auto-props; please verify\n'
+              'rule %r for pattern %r.  (Using value\n'
+              'including quotation marks.)\n'
+              % (warning_prefix, prop, pattern,)
+              )
+        Log().debug(
+            'auto-props: For %r, setting %r to %r.' % (pattern, name, value,)
+            )
+        propdict[name] = value
+        continue
+
+      m = self.property_novalue_re.match(prop)
+      if m:
+        name = m.group('name')
+        Log().debug(
+            'auto-props: For %r, setting %r to the empty string'
+            % (pattern, name,)
+            )
+        propdict[name] = ''
+        continue
+
+      Log().warn(
+          '%s: in auto-props line for %r, value %r cannot be parsed (ignored)'
+          % (warning_prefix, pattern, prop,)
+          )
+
+    self.patterns.append(self.Pattern(self.transform_case(pattern), propdict))
+
+  def get_propdict(self, cvs_file):
+    basename = self.transform_case(cvs_file.basename)
+    propdict = {}
+    for pattern in self.patterns:
+      if pattern.match(basename):
+        for (key,value) in pattern.propdict.items():
+          if key in propdict:
+            if propdict[key] != value:
+              Log().warn(
+                  "Contradictory values set for property '%s' for file %s."
+                  % (key, cvs_file,))
+          else:
+            propdict[key] = value
+
+    return propdict
+
+  def set_properties(self, s_item):
+    propdict = self.get_propdict(s_item.cvs_rev.cvs_file)
+    for (k,v) in propdict.items():
+      if k in s_item.svn_props:
+        if s_item.svn_props[k] != v:
+          Log().warn(
+              "Property '%s' already set to %r for file %s; "
+              "auto-props value (%r) ignored."
+              % (k, s_item.svn_props[k], s_item.cvs_rev.cvs_path, v,))
+      else:
+        s_item.svn_props[k] = v
+
+
+class CVSBinaryFileDefaultMimeTypeSetter(SVNPropertySetter):
+  """If the file is binary and its svn:mime-type property is not yet
+  set, set it to 'application/octet-stream'."""
+
+  propname = 'svn:mime-type'
+
+  def set_properties(self, s_item):
+    if self.propname in s_item.svn_props:
+      return
+
+    if s_item.cvs_rev.cvs_file.mode == 'b':
+      s_item.svn_props[self.propname] = 'application/octet-stream'
+
+
+class EOLStyleFromMimeTypeSetter(SVNPropertySetter):
+  """Set svn:eol-style based on svn:mime-type.
+
+  If svn:mime-type is known but svn:eol-style is not, then set
+  svn:eol-style based on svn:mime-type as follows: if svn:mime-type
+  starts with 'text/', then set svn:eol-style to native; otherwise,
+  force it to remain unset.  See also issue #39."""
+
+  propname = 'svn:eol-style'
+
+  def set_properties(self, s_item):
+    if self.propname in s_item.svn_props:
+      return
+
+    if s_item.svn_props.get('svn:mime-type', None) is not None:
+      if s_item.svn_props['svn:mime-type'].startswith("text/"):
+        s_item.svn_props[self.propname] = 'native'
+      else:
+        s_item.svn_props[self.propname] = None
+
+
+class DefaultEOLStyleSetter(SVNPropertySetter):
+  """Set the eol-style if one has not already been set."""
+
+  propname = 'svn:eol-style'
+
+  def __init__(self, value):
+    """Initialize with the specified default VALUE."""
+
+    self.value = value
+
+  def set_properties(self, s_item):
+    if self.propname in s_item.svn_props:
+      return
+
+    s_item.svn_props[self.propname] = self.value
+
+
+class SVNBinaryFileKeywordsPropertySetter(SVNPropertySetter):
+  """Turn off svn:keywords for files with binary svn:eol-style."""
+
+  propname = 'svn:keywords'
+
+  def set_properties(self, s_item):
+    if self.propname in s_item.svn_props:
+      return
+
+    if not s_item.svn_props.get('svn:eol-style'):
+      s_item.svn_props[self.propname] = None
+
+
+class KeywordsPropertySetter(SVNPropertySetter):
+  """If the svn:keywords property is not yet set, set it based on the
+  file's mode.  See issue #2."""
+
+  propname = 'svn:keywords'
+
+  def __init__(self, value):
+    """Use VALUE for the value of the svn:keywords property if it is
+    to be set."""
+
+    self.value = value
+
+  def set_properties(self, s_item):
+    if self.propname in s_item.svn_props:
+      return
+
+    if s_item.cvs_rev.cvs_file.mode in [None, 'kv', 'kvl']:
+      s_item.svn_props[self.propname] = self.value
+
+
diff --git a/cvs2svn_lib/rcs_revision_manager.py b/cvs2svn_lib/rcs_revision_manager.py
new file mode 100644
index 0000000..1c2dfcf
--- /dev/null
+++ b/cvs2svn_lib/rcs_revision_manager.py
@@ -0,0 +1,51 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Access the CVS repository via RCS's 'co' command."""
+
+
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.process import check_command_runs
+from cvs2svn_lib.process import PipeStream
+from cvs2svn_lib.process import CommandFailedException
+from cvs2svn_lib.revision_manager import RevisionReader
+
+
+class RCSRevisionReader(RevisionReader):
+  """A RevisionReader that reads the contents via RCS."""
+
+  def __init__(self, co_executable):
+    self.co_executable = co_executable
+    try:
+      check_command_runs([self.co_executable, '-V'], self.co_executable)
+    except CommandFailedException, e:
+      raise FatalError('%s\n'
+                       'Please check that co is installed and in your PATH\n'
+                       '(it is a part of the RCS software).' % (e,))
+
+  def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False):
+    pipe_cmd = [
+        self.co_executable,
+        '-q',
+        '-x,v',
+        '-p%s' % (cvs_rev.rev,)
+        ]
+    if suppress_keyword_substitution:
+      pipe_cmd.append('-kk')
+    pipe_cmd.append(cvs_rev.cvs_file.filename)
+    return PipeStream(pipe_cmd)
+
+
diff --git a/cvs2svn_lib/rcs_stream.py b/cvs2svn_lib/rcs_stream.py
new file mode 100644
index 0000000..b893819
--- /dev/null
+++ b/cvs2svn_lib/rcs_stream.py
@@ -0,0 +1,149 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2007 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module processes RCS diffs (deltas)."""
+
+
+import re
+
+def msplit(s):
+  """Split S into an array of lines.
+
+  Only \n is a line separator. The line endings are part of the lines."""
+
+  # return s.splitlines(True) clobbers \r
+  re = [ i + "\n" for i in s.split("\n") ]
+  re[-1] = re[-1][:-1]
+  if not re[-1]:
+    del re[-1]
+  return re
+
+
+class MalformedDeltaException(Exception):
+  """A malformed RCS delta was encountered."""
+
+  pass
+
+class RCSStream:
+  """This class represents a single file object to which RCS deltas can be
+  applied in various ways."""
+
+  ad_command = re.compile(r'^([ad])(\d+)\s(\d+)\n$')
+  a_command = re.compile(r'^a(\d+)\s(\d+)\n$')
+
+  def __init__(self, text):
+    """Instantiate and initialize the file content with TEXT."""
+
+    self._texts = msplit(text)
+
+  def get_text(self):
+    """Return the current file content."""
+
+    return "".join(self._texts)
+
+  def apply_diff(self, diff):
+    """Apply the RCS diff DIFF to the current file content."""
+
+    ntexts = []
+    ooff = 0
+    diffs = msplit(diff)
+    i = 0
+    while i < len(diffs):
+      admatch = self.ad_command.match(diffs[i])
+      if not admatch:
+        raise MalformedDeltaException('Bad ed command')
+      i += 1
+      sl = int(admatch.group(2))
+      cn = int(admatch.group(3))
+      if admatch.group(1) == 'd': # "d" - Delete command
+        sl -= 1
+        if sl < ooff:
+          raise MalformedDeltaException('Deletion before last edit')
+        if sl > len(self._texts):
+          raise MalformedDeltaException('Deletion past file end')
+        if sl + cn > len(self._texts):
+          raise MalformedDeltaException('Deletion beyond file end')
+        ntexts += self._texts[ooff:sl]
+        ooff = sl + cn
+      else: # "a" - Add command
+        if sl < ooff: # Also catches same place
+          raise MalformedDeltaException('Insertion before last edit')
+        if sl > len(self._texts):
+          raise MalformedDeltaException('Insertion past file end')
+        ntexts += self._texts[ooff:sl] + diffs[i:i + cn]
+        ooff = sl
+        i += cn
+    self._texts = ntexts + self._texts[ooff:]
+
+  def invert_diff(self, diff):
+    """Apply the RCS diff DIFF to the current file content and simultaneously
+    generate an RCS diff suitable for reverting the change."""
+
+    ntexts = []
+    ooff = 0
+    diffs = msplit(diff)
+    ndiffs = []
+    adjust = 0
+    i = 0
+    while i < len(diffs):
+      admatch = self.ad_command.match(diffs[i])
+      if not admatch:
+        raise MalformedDeltaException('Bad ed command')
+      i += 1
+      sl = int(admatch.group(2))
+      cn = int(admatch.group(3))
+      if admatch.group(1) == 'd': # "d" - Delete command
+        sl -= 1
+        if sl < ooff:
+          raise MalformedDeltaException('Deletion before last edit')
+        if sl > len(self._texts):
+          raise MalformedDeltaException('Deletion past file end')
+        if sl + cn > len(self._texts):
+          raise MalformedDeltaException('Deletion beyond file end')
+        # Handle substitution explicitly, as add must come after del
+        # (last add may end in no newline, so no command can follow).
+        if i < len(diffs):
+          amatch = self.a_command.match(diffs[i])
+        else:
+          amatch = None
+        if amatch and int(amatch.group(1)) == sl + cn:
+          cn2 = int(amatch.group(2))
+          i += 1
+          ndiffs += ["d%d %d\na%d %d\n" % \
+                        (sl + 1 + adjust, cn2, sl + adjust + cn2, cn)] + \
+                    self._texts[sl:sl + cn]
+          ntexts += self._texts[ooff:sl] + diffs[i:i + cn2]
+          adjust += cn2 - cn
+          i += cn2
+        else:
+          ndiffs += ["a%d %d\n" % (sl + adjust, cn)] + \
+                    self._texts[sl:sl + cn]
+          ntexts += self._texts[ooff:sl]
+          adjust -= cn
+        ooff = sl + cn
+      else: # "a" - Add command
+        if sl < ooff: # Also catches same place
+          raise MalformedDeltaException('Insertion before last edit')
+        if sl > len(self._texts):
+          raise MalformedDeltaException('Insertion past file end')
+        ndiffs += ["d%d %d\n" % (sl + 1 + adjust, cn)]
+        ntexts += self._texts[ooff:sl] + diffs[i:i + cn]
+        ooff = sl
+        adjust += cn
+        i += cn
+    self._texts = ntexts + self._texts[ooff:]
+    return "".join(ndiffs)
+
diff --git a/cvs2svn_lib/record_table.py b/cvs2svn_lib/record_table.py
new file mode 100644
index 0000000..41ab84a
--- /dev/null
+++ b/cvs2svn_lib/record_table.py
@@ -0,0 +1,399 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Classes to manage Databases of fixed-length records.
+
+The databases map small, non-negative integers to fixed-size records.
+The records are written in index order to a disk file.  Gaps in the
+index sequence leave gaps in the data file, so for best space
+efficiency the indexes of existing records should be approximately
+continuous.
+
+To use a RecordTable, you need a class derived from Packer which can
+serialize/deserialize your records into fixed-size strings.  Deriving
+classes have to specify how to pack records into strings and unpack
+strings into records by overwriting the pack() and unpack() methods
+respectively.
+
+Note that these classes keep track of gaps in the records that have
+been written by filling them with packer.empty_value.  If a record is
+read which contains packer.empty_value, then a KeyError is raised."""
+
+
+import os
+import types
+import struct
+import mmap
+
+from cvs2svn_lib.common import DB_OPEN_READ
+from cvs2svn_lib.common import DB_OPEN_WRITE
+from cvs2svn_lib.common import DB_OPEN_NEW
+from cvs2svn_lib.log import Log
+
+
+# A unique value that can be used to stand for "unset" without
+# preventing the use of None.
+_unset = object()
+
+
+class Packer(object):
+  def __init__(self, record_len, empty_value=None):
+    self.record_len = record_len
+    if empty_value is None:
+      self.empty_value = '\0' * self.record_len
+    else:
+      assert type(empty_value) is types.StringType
+      assert len(empty_value) == self.record_len
+      self.empty_value = empty_value
+
+  def pack(self, v):
+    """Pack record V into a string of length self.record_len."""
+
+    raise NotImplementedError()
+
+  def unpack(self, s):
+    """Unpack string S into a record."""
+
+    raise NotImplementedError()
+
+
+class StructPacker(Packer):
+  def __init__(self, format, empty_value=_unset):
+    self.format = format
+    if empty_value is not _unset:
+      empty_value = self.pack(empty_value)
+    else:
+      empty_value = None
+
+    Packer.__init__(self, struct.calcsize(self.format),
+                    empty_value=empty_value)
+
+  def pack(self, v):
+    return struct.pack(self.format, v)
+
+  def unpack(self, v):
+    return struct.unpack(self.format, v)[0]
+
+
+class UnsignedIntegerPacker(StructPacker):
+  def __init__(self, empty_value=0):
+    StructPacker.__init__(self, '=I', empty_value)
+
+
+class SignedIntegerPacker(StructPacker):
+  def __init__(self, empty_value=0):
+    StructPacker.__init__(self, '=i', empty_value)
+
+
+class FileOffsetPacker(Packer):
+  """A packer suitable for file offsets.
+
+  We store the 5 least significant bytes of the file offset.  This is
+  enough bits to represent 1 TiB.  Of course if the computer
+  doesn't have large file support, only the lowest 31 bits can be
+  nonzero, and the offsets are limited to 2 GiB."""
+
+  # Convert file offsets to 8-bit little-endian unsigned longs...
+  INDEX_FORMAT = '<Q'
+  # ...but then truncate to 5 bytes.
+  INDEX_FORMAT_LEN = 5
+
+  PAD = '\0' * (struct.calcsize(INDEX_FORMAT) - INDEX_FORMAT_LEN)
+
+  def __init__(self):
+    Packer.__init__(self, self.INDEX_FORMAT_LEN)
+
+  def pack(self, v):
+    return struct.pack(self.INDEX_FORMAT, v)[:self.INDEX_FORMAT_LEN]
+
+  def unpack(self, s):
+    return struct.unpack(self.INDEX_FORMAT, s + self.PAD)[0]
+
+
+class RecordTableAccessError(RuntimeError):
+  pass
+
+
+class AbstractRecordTable:
+  def __init__(self, filename, mode, packer):
+    self.filename = filename
+    self.mode = mode
+    self.packer = packer
+    # Simplify and speed access to this oft-needed quantity:
+    self._record_len = self.packer.record_len
+
+  def __str__(self):
+    return '%s(%r)' % (self.__class__.__name__, self.filename,)
+
+  def _set_packed_record(self, i, s):
+    """Set the value for index I to the packed value S."""
+
+    raise NotImplementedError()
+
+  def __setitem__(self, i, v):
+    self._set_packed_record(i, self.packer.pack(v))
+
+  def _get_packed_record(self, i):
+    """Return the packed record for index I.
+
+    Raise KeyError if it is not present."""
+
+    raise NotImplementedError()
+
+  def __getitem__(self, i):
+    """Return the item for index I.
+
+    Raise KeyError if that item has never been set (or if it was set
+    to self.packer.empty_value)."""
+
+    s = self._get_packed_record(i)
+
+    if s == self.packer.empty_value:
+      raise KeyError(i)
+
+    return self.packer.unpack(s)
+
+  def get_many(self, indexes, default=None):
+    """Yield (index, item) typles for INDEXES in arbitrary order.
+
+    Yield (index,default) for indices for which not item is defined."""
+
+    indexes = list(indexes)
+    # Sort the indexes to reduce disk seeking:
+    indexes.sort()
+    for i in indexes:
+      yield (i, self.get(i, default))
+
+  def get(self, i, default=None):
+    try:
+      return self[i]
+    except KeyError:
+      return default
+
+  def __delitem__(self, i):
+    """Delete the item for index I.
+
+    Raise KeyError if that item has never been set (or if it was set
+    to self.packer.empty_value)."""
+
+    if self.mode == DB_OPEN_READ:
+      raise RecordTableAccessError()
+
+    # Check that the value was set (otherwise raise KeyError):
+    self[i]
+    self._set_packed_record(i, self.packer.empty_value)
+
+  def iterkeys(self):
+    """Yield the keys in the map in key order."""
+
+    for i in xrange(0, self._limit):
+      try:
+        self[i]
+        yield i
+      except KeyError:
+        pass
+
+  def itervalues(self):
+    """Yield the values in the map in key order.
+
+    Skip over values that haven't been defined."""
+
+    for i in xrange(0, self._limit):
+      try:
+        yield self[i]
+      except KeyError:
+        pass
+
+
+class RecordTable(AbstractRecordTable):
+  # The approximate amount of memory that should be used for the cache
+  # for each instance of this class:
+  CACHE_MEMORY = 4 * 1024 * 1024
+
+  # Empirically, each entry in the cache table has an overhead of
+  # about 96 bytes on a 32-bit computer.
+  CACHE_OVERHEAD_PER_ENTRY = 96
+
+  def __init__(self, filename, mode, packer, cache_memory=CACHE_MEMORY):
+    AbstractRecordTable.__init__(self, filename, mode, packer)
+    if self.mode == DB_OPEN_NEW:
+      self.f = open(self.filename, 'wb+')
+    elif self.mode == DB_OPEN_WRITE:
+      self.f = open(self.filename, 'rb+')
+    elif self.mode == DB_OPEN_READ:
+      self.f = open(self.filename, 'rb')
+    else:
+      raise RuntimeError('Invalid mode %r' % self.mode)
+    self.cache_memory = cache_memory
+
+    # Number of items that can be stored in the write cache.
+    self._max_memory_cache = (
+        self.cache_memory
+        / (self.CACHE_OVERHEAD_PER_ENTRY + self._record_len))
+
+    # Read and write cache; a map {i : (dirty, s)}, where i is an
+    # index, dirty indicates whether the value has to be written to
+    # disk, and s is the packed value for the index.  Up to
+    # self._max_memory_cache items can be stored here.  When the cache
+    # fills up, it is written to disk in one go and then cleared.
+    self._cache = {}
+
+    # The index just beyond the last record ever written:
+    self._limit = os.path.getsize(self.filename) // self._record_len
+
+    # The index just beyond the last record ever written to disk:
+    self._limit_written = self._limit
+
+  def flush(self):
+    Log().debug('Flushing cache for %s' % (self,))
+
+    pairs = [(i, s) for (i, (dirty, s)) in self._cache.items() if dirty]
+
+    if pairs:
+      pairs.sort()
+      old_i = None
+      f = self.f
+      for (i, s) in pairs:
+        if i == old_i:
+          # No seeking needed
+          pass
+        elif i <= self._limit_written:
+          # Just jump there:
+          f.seek(i * self._record_len)
+        else:
+          # Jump to the end of the file then write _empty_values until
+          # we reach the correct location:
+          f.seek(self._limit_written * self._record_len)
+          while self._limit_written < i:
+            f.write(self.packer.empty_value)
+            self._limit_written += 1
+        f.write(s)
+        old_i = i + 1
+        self._limit_written = max(self._limit_written, old_i)
+
+      self.f.flush()
+
+    self._cache.clear()
+
+  def _set_packed_record(self, i, s):
+    if self.mode == DB_OPEN_READ:
+      raise RecordTableAccessError()
+    if i < 0:
+      raise KeyError()
+    self._cache[i] = (True, s)
+    if len(self._cache) >= self._max_memory_cache:
+      self.flush()
+    self._limit = max(self._limit, i + 1)
+
+  def _get_packed_record(self, i):
+    try:
+      return self._cache[i][1]
+    except KeyError:
+      if not 0 <= i < self._limit_written:
+        raise KeyError(i)
+      self.f.seek(i * self._record_len)
+      s = self.f.read(self._record_len)
+      self._cache[i] = (False, s)
+      if len(self._cache) >= self._max_memory_cache:
+        self.flush()
+
+      return s
+
+  def close(self):
+    self.flush()
+    self._cache = None
+    self.f.close()
+    self.f = None
+
+
+class MmapRecordTable(AbstractRecordTable):
+  GROWTH_INCREMENT = 65536
+
+  def __init__(self, filename, mode, packer):
+    AbstractRecordTable.__init__(self, filename, mode, packer)
+    if self.mode == DB_OPEN_NEW:
+      self.python_file = open(self.filename, 'wb+')
+      self.python_file.write('\0' * self.GROWTH_INCREMENT)
+      self.python_file.flush()
+      self._filesize = self.GROWTH_INCREMENT
+      self.f = mmap.mmap(
+          self.python_file.fileno(), self._filesize,
+          access=mmap.ACCESS_WRITE
+          )
+
+      # The index just beyond the last record ever written:
+      self._limit = 0
+    elif self.mode == DB_OPEN_WRITE:
+      self.python_file = open(self.filename, 'rb+')
+      self._filesize = os.path.getsize(self.filename)
+      self.f = mmap.mmap(
+          self.python_file.fileno(), self._filesize,
+          access=mmap.ACCESS_WRITE
+          )
+
+      # The index just beyond the last record ever written:
+      self._limit = os.path.getsize(self.filename) // self._record_len
+    elif self.mode == DB_OPEN_READ:
+      self.python_file = open(self.filename, 'rb')
+      self._filesize = os.path.getsize(self.filename)
+      self.f = mmap.mmap(
+          self.python_file.fileno(), self._filesize,
+          access=mmap.ACCESS_READ
+          )
+
+      # The index just beyond the last record ever written:
+      self._limit = os.path.getsize(self.filename) // self._record_len
+    else:
+      raise RuntimeError('Invalid mode %r' % self.mode)
+
+  def flush(self):
+    self.f.flush()
+
+  def _set_packed_record(self, i, s):
+    if self.mode == DB_OPEN_READ:
+      raise RecordTableAccessError()
+    if i < 0:
+      raise KeyError()
+    if i >= self._limit:
+      # This write extends the range of valid indices.  First check
+      # whether the file has to be enlarged:
+      new_size = (i + 1) * self._record_len
+      if new_size > self._filesize:
+        self._filesize = (
+            (new_size + self.GROWTH_INCREMENT - 1)
+            // self.GROWTH_INCREMENT
+            * self.GROWTH_INCREMENT
+            )
+        self.f.resize(self._filesize)
+      if i > self._limit:
+        # Pad up to the new record with empty_value:
+        self.f[self._limit * self._record_len:i * self._record_len] = \
+            self.packer.empty_value * (i - self._limit)
+      self._limit = i + 1
+
+    self.f[i * self._record_len:(i + 1) * self._record_len] = s
+
+  def _get_packed_record(self, i):
+    if not 0 <= i < self._limit:
+      raise KeyError(i)
+    return self.f[i * self._record_len:(i + 1) * self._record_len]
+
+  def close(self):
+    self.flush()
+    self.f.close()
+    self.python_file.close()
+
+
diff --git a/cvs2svn_lib/repository_delegate.py b/cvs2svn_lib/repository_delegate.py
new file mode 100644
index 0000000..53c9b65
--- /dev/null
+++ b/cvs2svn_lib/repository_delegate.py
@@ -0,0 +1,98 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains class RepositoryDelegate."""
+
+
+import os
+import subprocess
+
+from cvs2svn_lib.common import CommandError
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.config import DUMPFILE
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.dumpfile_delegate import DumpfileDelegate
+
+
+class RepositoryDelegate(DumpfileDelegate):
+  """Creates a new Subversion Repository.  DumpfileDelegate does all
+  of the heavy lifting."""
+
+  def __init__(self, revision_reader, target):
+    self.target = target
+
+    # Since the output of this run is a repository, not a dumpfile,
+    # the temporary dumpfiles we create should go in the tmpdir.  But
+    # since we delete it ourselves, we don't want to use
+    # artifact_manager.
+    DumpfileDelegate.__init__(
+        self, revision_reader, Ctx().get_temp_filename(DUMPFILE)
+        )
+
+    self.dumpfile = open(self.dumpfile_path, 'w+b')
+    self.loader_pipe = subprocess.Popen(
+        [Ctx().svnadmin_executable, 'load', '-q', self.target],
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        )
+    self.loader_pipe.stdout.close()
+    try:
+      self._write_dumpfile_header(self.loader_pipe.stdin)
+    except IOError:
+      raise FatalError(
+          'svnadmin failed with the following output while '
+          'loading the dumpfile:\n%s'
+          % (self.loader_pipe.stderr.read(),)
+          )
+
+  def start_commit(self, revnum, revprops):
+    """Start a new commit."""
+
+    DumpfileDelegate.start_commit(self, revnum, revprops)
+
+  def end_commit(self):
+    """Feed the revision stored in the dumpfile to the svnadmin load pipe."""
+
+    DumpfileDelegate.end_commit(self)
+
+    self.dumpfile.seek(0)
+    while True:
+      data = self.dumpfile.read(128*1024) # Chunk size is arbitrary
+      if not data:
+        break
+      try:
+        self.loader_pipe.stdin.write(data)
+      except IOError:
+        raise FatalError("svnadmin failed with the following output "
+                         "while loading the dumpfile:\n"
+                         + self.loader_pipe.stderr.read())
+    self.dumpfile.seek(0)
+    self.dumpfile.truncate()
+
+  def finish(self):
+    """Clean up."""
+
+    self.dumpfile.close()
+    self.loader_pipe.stdin.close()
+    error_output = self.loader_pipe.stderr.read()
+    exit_status = self.loader_pipe.wait()
+    del self.loader_pipe
+    if exit_status:
+      raise CommandError('svnadmin load', exit_status, error_output)
+    os.remove(self.dumpfile_path)
+
+
diff --git a/cvs2svn_lib/repository_mirror.py b/cvs2svn_lib/repository_mirror.py
new file mode 100644
index 0000000..72e2ba1
--- /dev/null
+++ b/cvs2svn_lib/repository_mirror.py
@@ -0,0 +1,897 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains the RepositoryMirror class and supporting classes.
+
+RepositoryMirror represents the skeleton of a versioned file tree with
+multiple lines of development ('LODs').  It records the presence or
+absence of files and directories, but not their contents.  Given three
+values (revnum, lod, cvs_path), it can tell you whether the specified
+CVSPath existed on the specified LOD in the given revision number.
+The file trees corresponding to the most recent revision can be
+modified.
+
+The individual file trees are stored using immutable tree structures.
+Each directory node is represented as a MirrorDirectory instance,
+which is basically a map {cvs_path : node_id}, where cvs_path is a
+CVSPath within the directory, and node_id is an integer ID that
+uniquely identifies another directory node if that node is a
+CVSDirectory, or None if that node is a CVSFile.  If a directory node
+is to be modified, then first a new node is created with a copy of the
+original node's contents, then the copy is modified.  A reference to
+the copy also has to be stored in the parent node, meaning that the
+parent node needs to be modified, and so on recursively to the root
+node of the file tree.  This data structure allows cheap deep copies,
+which is useful for tagging and branching.
+
+The class must also be able to find the root directory node
+corresponding to a particular (revnum, lod).  This is done by keeping
+an LODHistory instance for each LOD, which can determine the root
+directory node ID for that LOD for any revnum.  It does so by
+recording changes to the root directory node ID only for revisions in
+which it changed.  Thus it stores two arrays, revnums (a list of the
+revision numbers when the ID changed), and ids (a list of the
+corresponding IDs).  To find the ID for a particular revnum, first a
+binary search is done in the revnums array to find the index of the
+last change preceding revnum, then the corresponding ID is read from
+the ids array.  Since most revisions change only one LOD, this allows
+storage of the history of potentially tens of thousands of LODs over
+hundreds of thousands of revisions in an amount of space that scales
+as O(numberOfLODs + numberOfRevisions), rather than O(numberOfLODs *
+numberOfRevisions) as would be needed if the information were stored
+in the equivalent of a 2D array.
+
+The internal operation of these classes is somewhat intricate, but the
+interface attempts to hide the complexity, enforce the usage rules,
+and allow efficient access.  The most important facts to remember are
+(1) that a directory node can be used for multiple purposes (for
+multiple branches and for multiple revisions on a single branch), (2)
+that only a node that has been created within the current revision is
+allowed to be mutated, and (3) that the current revision can include
+nodes carried over from prior revisions, which are immutable.
+
+This leads to a bewildering variety of MirrorDirectory classes.  The
+most important distinction is between OldMirrorDirectories and
+CurrentMirrorDirectories.  A single node can be represented multiple
+ways in memory at the same time, depending on whether it was looked up
+as part of the current revision or part of an old revision:
+
+    MirrorDirectory -- the base class for all MirrorDirectory nodes.
+        This class allows lookup of subnodes and iteration over
+        subnodes.
+
+    OldMirrorDirectory -- a MirrorDirectory that was looked up for an
+        old revision.  These instances are immutable, as only the
+        current revision is allowed to be modified.
+
+    CurrentMirrorDirectory -- a MirrorDirectory that was looked up for
+        the current revision.  Such an instance is always logically
+        mutable, though mutating it might require the node to be
+        copied first.  Such an instance might represent a node that
+        has already been copied during this revision and can therefore
+        be modified freely (such nodes implement
+        _WritableMirrorDirectoryMixin), or it might represent a node
+        that was carried over from an old revision and hasn't been
+        copied yet (such nodes implement
+        _ReadOnlyMirrorDirectoryMixin).  If the latter, then the node
+        copies itself (and bubbles up the change) before allowing
+        itself to be modified.  But the distinction is managed
+        internally; client classes should not have to worry about it.
+
+    CurrentMirrorLODDirectory -- A CurrentMirrorDirectory representing
+        the root directory of a line of development in the current
+        revision.  This class has two concrete subclasses,
+        _CurrentMirrorReadOnlyLODDirectory and
+        _CurrentMirrorWritableLODDirectory, depending on whether the
+        node has already been copied during this revision.
+
+
+    CurrentMirrorSubdirectory -- A CurrentMirrorDirectory representing
+        a subdirectory within a line of development's directory tree
+        in the current revision.  This class has two concrete
+        subclasses, _CurrentMirrorReadOnlySubdirectory and
+        _CurrentMirrorWritableSubdirectory, depending on whether the
+        node has already been copied during this revision.
+
+    DeletedCurrentMirrorDirectory -- a MirrorDirectory that has been
+        deleted.  Such an instance is disabled so that it cannot
+        accidentally be used.
+
+While a revision is being processed, RepositoryMirror._new_nodes holds
+every writable CurrentMirrorDirectory instance (i.e., every node that
+has been created in the revision).  Since these nodes are mutable, it
+is important that there be exactly one instance associated with each
+node; otherwise there would be problems keeping the instances
+synchronized.  These are written to the database by
+RepositoryMirror.end_commit().
+
+OldMirrorDirectory and read-only CurrentMirrorDirectory instances are
+*not* cached; they are recreated whenever they are referenced.  There
+might be multiple instances referring to the same node.  A read-only
+CurrentMirrorDirectory instance is mutated in place into a writable
+CurrentMirrorDirectory instance if it needs to be modified.
+
+FIXME: The rules for when a MirrorDirectory instance can continue to
+be used vs. when it has to be read again (because it has been modified
+indirectly and therefore copied) are confusing and error-prone.
+Probably the semantics should be changed.
+
+"""
+
+
+import bisect
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import DB_OPEN_NEW
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.cvs_file import CVSFile
+from cvs2svn_lib.cvs_file import CVSDirectory
+from cvs2svn_lib.key_generator import KeyGenerator
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.serializer import MarshalSerializer
+from cvs2svn_lib.database import IndexedDatabase
+
+
+class RepositoryMirrorError(Exception):
+  """An error related to the RepositoryMirror."""
+
+  pass
+
+
+class LODExistsError(RepositoryMirrorError):
+  """The LOD already exists in the repository.
+
+  Exception raised if an attempt is made to add an LOD to the
+  repository mirror and that LOD already exists in the youngest
+  revision of the repository."""
+
+  pass
+
+
+class PathExistsError(RepositoryMirrorError):
+  """The path already exists in the repository.
+
+  Exception raised if an attempt is made to add a path to the
+  repository mirror and that path already exists in the youngest
+  revision of the repository."""
+
+  pass
+
+
+class DeletedNodeReusedError(RepositoryMirrorError):
+  """The MirrorDirectory has already been deleted and shouldn't be reused."""
+
+  pass
+
+
+class CopyFromCurrentNodeError(RepositoryMirrorError):
+  """A CurrentMirrorDirectory cannot be copied to the current revision."""
+
+  pass
+
+
+class MirrorDirectory(object):
+  """Represent a node within the RepositoryMirror.
+
+  Instances of this class act like a map {CVSPath : MirrorDirectory},
+  where CVSPath is an item within this directory (i.e., a file or
+  subdirectory within this directory).  The value is either another
+  MirrorDirectory instance (for directories) or None (for files)."""
+
+  def __init__(self, repo, id, entries):
+    # The RepositoryMirror containing this directory:
+    self.repo = repo
+
+    # The id of this node:
+    self.id = id
+
+    # The entries within this directory, stored as a map {CVSPath :
+    # node_id}.  The node_ids are integers for CVSDirectories, None
+    # for CVSFiles:
+    self._entries = entries
+
+  def __getitem__(self, cvs_path):
+    """Return the MirrorDirectory associated with the specified subnode.
+
+    Return a MirrorDirectory instance if the subnode is a
+    CVSDirectory; None if it is a CVSFile.  Raise KeyError if the
+    specified subnode does not exist."""
+
+    raise NotImplementedError()
+
+  def __len__(self):
+    """Return the number of CVSPaths within this node."""
+
+    return len(self._entries)
+
+  def __contains__(self, cvs_path):
+    """Return True iff CVS_PATH is contained in this node."""
+
+    return cvs_path in self._entries
+
+  def __iter__(self):
+    """Iterate over the CVSPaths within this node."""
+
+    return self._entries.__iter__()
+
+  def _format_entries(self):
+    """Format the entries map for output in subclasses' __repr__() methods."""
+
+    def format_item(key, value):
+      if value is None:
+        return str(key)
+      else:
+        return '%s -> %x' % (key, value,)
+
+    items = self._entries.items()
+    items.sort()
+    return '{%s}' % (', '.join([format_item(*item) for item in items]),)
+
+  def __str__(self):
+    """For convenience only.  The format is subject to change at any time."""
+
+    return '%s<%x>' % (self.__class__.__name__, self.id,)
+
+
+class OldMirrorDirectory(MirrorDirectory):
+  """Represent a historical directory within the RepositoryMirror."""
+
+  def __getitem__(self, cvs_path):
+    id = self._entries[cvs_path]
+    if id is None:
+      # This represents a leaf node.
+      return None
+    else:
+      return OldMirrorDirectory(self.repo, id, self.repo._node_db[id])
+
+  def __repr__(self):
+    """For convenience only.  The format is subject to change at any time."""
+
+    return '%s(%s)' % (self, self._format_entries(),)
+
+
+class CurrentMirrorDirectory(MirrorDirectory):
+  """Represent a directory that currently exists in the RepositoryMirror."""
+
+  def __init__(self, repo, id, lod, cvs_path, entries):
+    MirrorDirectory.__init__(self, repo, id, entries)
+    self.lod = lod
+    self.cvs_path = cvs_path
+
+  def __getitem__(self, cvs_path):
+    id = self._entries[cvs_path]
+    if id is None:
+      # This represents a leaf node.
+      return None
+    else:
+      try:
+        return self.repo._new_nodes[id]
+      except KeyError:
+        return _CurrentMirrorReadOnlySubdirectory(
+            self.repo, id, self.lod, cvs_path, self,
+            self.repo._node_db[id]
+            )
+
+  def __setitem__(self, cvs_path, node):
+    """Create or overwrite a subnode of this node.
+
+    CVS_PATH is the path of the subnode.  NODE will be the new value
+    of the node; for CVSDirectories it should be a MirrorDirectory
+    instance; for CVSFiles it should be None."""
+
+    if isinstance(node, DeletedCurrentMirrorDirectory):
+      raise DeletedNodeReusedError(
+          '%r has already been deleted and should not be reused' % (node,)
+          )
+    elif isinstance(node, CurrentMirrorDirectory):
+      raise CopyFromCurrentNodeError(
+          '%r was created in the current node and cannot be copied' % (node,)
+          )
+    else:
+      self._set_entry(cvs_path, node)
+
+  def __delitem__(self, cvs_path):
+    """Remove the subnode of this node at CVS_PATH.
+
+    If the node does not exist, then raise a KeyError."""
+
+    node = self[cvs_path]
+    self._del_entry(cvs_path)
+    if isinstance(node, _WritableMirrorDirectoryMixin):
+      node._mark_deleted()
+
+  def mkdir(self, cvs_directory):
+    """Create an empty subdirectory of this node at CVS_PATH.
+
+    Return the CurrentDirectory that was created."""
+
+    assert isinstance(cvs_directory, CVSDirectory)
+    if cvs_directory in self:
+      raise PathExistsError(
+          'Attempt to create directory \'%s\' in %s in repository mirror '
+          'when it already exists.'
+          % (cvs_directory, self.lod,)
+          )
+
+    new_node = _CurrentMirrorWritableSubdirectory(
+        self.repo, self.repo._key_generator.gen_id(), self.lod, cvs_directory,
+        self, {}
+        )
+    self._set_entry(cvs_directory, new_node)
+    self.repo._new_nodes[new_node.id] = new_node
+    return new_node
+
+  def add_file(self, cvs_file):
+    """Create a file within this node at CVS_FILE."""
+
+    assert isinstance(cvs_file, CVSFile)
+    if cvs_file in self:
+      raise PathExistsError(
+          'Attempt to create file \'%s\' in %s in repository mirror '
+          'when it already exists.'
+          % (cvs_file, self.lod,)
+          )
+
+    self._set_entry(cvs_file, None)
+
+  def __repr__(self):
+    """For convenience only.  The format is subject to change at any time."""
+
+    return '%s(%r, %r, %s)' % (
+        self, self.lod, self.cvs_path, self._format_entries(),
+        )
+
+
+class DeletedCurrentMirrorDirectory(object):
+  """A MirrorDirectory that has been deleted.
+
+  A MirrorDirectory that used to be a _WritableMirrorDirectoryMixin
+  but then was deleted.  Such instances are turned into this class so
+  that nobody can accidentally mutate them again."""
+
+  pass
+
+
+class _WritableMirrorDirectoryMixin:
+  """Mixin for MirrorDirectories that are already writable.
+
+  A MirrorDirectory is writable if it has already been recreated
+  during the current revision."""
+
+  def _set_entry(self, cvs_path, node):
+    """Create or overwrite a subnode of this node, with no checks."""
+
+    if node is None:
+      self._entries[cvs_path] = None
+    else:
+      self._entries[cvs_path] = node.id
+
+  def _del_entry(self, cvs_path):
+    """Remove the subnode of this node at CVS_PATH, with no checks."""
+
+    del self._entries[cvs_path]
+
+  def _mark_deleted(self):
+    """Mark this object and any writable descendants as being deleted."""
+
+    self.__class__ = DeletedCurrentMirrorDirectory
+
+    for (cvs_path, id) in self._entries.iteritems():
+      if id in self.repo._new_nodes:
+        node = self[cvs_path]
+        if isinstance(node, _WritableMirrorDirectoryMixin):
+          # Mark deleted and recurse:
+          node._mark_deleted()
+
+
+class _ReadOnlyMirrorDirectoryMixin:
+  """Mixin for a CurrentMirrorDirectory that hasn't yet been made writable."""
+
+  def _make_writable(self):
+    raise NotImplementedError()
+
+  def _set_entry(self, cvs_path, node):
+    """Create or overwrite a subnode of this node, with no checks."""
+
+    self._make_writable()
+    self._set_entry(cvs_path, node)
+
+  def _del_entry(self, cvs_path):
+    """Remove the subnode of this node at CVS_PATH, with no checks."""
+
+    self._make_writable()
+    self._del_entry(cvs_path)
+
+
+class CurrentMirrorLODDirectory(CurrentMirrorDirectory):
+  """Represent an LOD's main directory in the mirror's current version."""
+
+  def __init__(self, repo, id, lod, entries):
+    CurrentMirrorDirectory.__init__(
+        self, repo, id, lod, lod.project.get_root_cvs_directory(), entries
+        )
+
+  def delete(self):
+    """Remove the directory represented by this object."""
+
+    lod_history = self.repo._get_lod_history(self.lod)
+    assert lod_history.exists()
+    lod_history.update(self.repo._youngest, None)
+    self._mark_deleted()
+
+
+class _CurrentMirrorReadOnlyLODDirectory(
+          CurrentMirrorLODDirectory, _ReadOnlyMirrorDirectoryMixin
+          ):
+  """Represent an LOD's main directory in the mirror's current version."""
+
+  def _make_writable(self):
+    self.__class__ = _CurrentMirrorWritableLODDirectory
+    # Create a new ID:
+    self.id = self.repo._key_generator.gen_id()
+    self.repo._new_nodes[self.id] = self
+    self.repo._get_lod_history(self.lod).update(self.repo._youngest, self.id)
+    self._entries = self._entries.copy()
+
+
+class _CurrentMirrorWritableLODDirectory(
+          CurrentMirrorLODDirectory, _WritableMirrorDirectoryMixin
+          ):
+  pass
+
+
+class CurrentMirrorSubdirectory(CurrentMirrorDirectory):
+  """Represent a subdirectory in the mirror's current version."""
+
+  def __init__(self, repo, id, lod, cvs_path, parent_mirror_dir, entries):
+    CurrentMirrorDirectory.__init__(self, repo, id, lod, cvs_path, entries)
+    self.parent_mirror_dir = parent_mirror_dir
+
+  def delete(self):
+    """Remove the directory represented by this object."""
+
+    del self.parent_mirror_dir[self.cvs_path]
+
+
+class _CurrentMirrorReadOnlySubdirectory(
+          CurrentMirrorSubdirectory, _ReadOnlyMirrorDirectoryMixin
+          ):
+  """Represent a subdirectory in the mirror's current version."""
+
+  def _make_writable(self):
+    self.__class__ = _CurrentMirrorWritableSubdirectory
+    # Create a new ID:
+    self.id = self.repo._key_generator.gen_id()
+    self.repo._new_nodes[self.id] = self
+    self.parent_mirror_dir._set_entry(self.cvs_path, self)
+    self._entries = self._entries.copy()
+
+
+class _CurrentMirrorWritableSubdirectory(
+          CurrentMirrorSubdirectory, _WritableMirrorDirectoryMixin
+          ):
+  pass
+
+
+class LODHistory(object):
+  """The history of root nodes for a line of development.
+
+  Members:
+
+    _mirror -- (RepositoryMirror) the RepositoryMirror that manages
+        this LODHistory.
+
+    lod -- (LineOfDevelopment) the LOD described by this LODHistory.
+
+    revnums -- (list of int) the revision numbers in which the id
+        changed, in numerical order.
+
+    ids -- (list of (int or None)) the ID of the node describing the
+        root of this LOD starting at the corresponding revision
+        number, or None if the LOD did not exist in that revision.
+
+  To find the root id for a given revision number, a binary search is
+  done within REVNUMS to find the index of the most recent revision at
+  the time of REVNUM, then that index is used to read the id out of
+  IDS.
+
+  A sentry is written at the zeroth index of both arrays to describe
+  the initial situation, namely, that the LOD doesn't exist in
+  revision r0."""
+
+  __slots__ = ['_mirror', 'lod', 'revnums', 'ids']
+
+  def __init__(self, mirror, lod):
+    self._mirror = mirror
+    self.lod = lod
+    self.revnums = [0]
+    self.ids = [None]
+
+  def get_id(self, revnum):
+    """Get the ID of the root path for this LOD in REVNUM.
+
+    Raise KeyError if this LOD didn't exist in REVNUM."""
+
+    index = bisect.bisect_right(self.revnums, revnum) - 1
+    id = self.ids[index]
+
+    if id is None:
+      raise KeyError()
+
+    return id
+
+  def get_current_id(self):
+    """Get the ID of the root path for this LOD in the current revision.
+
+    Raise KeyError if this LOD doesn't currently exist."""
+
+    id = self.ids[-1]
+
+    if id is None:
+      raise KeyError()
+
+    return id
+
+  def exists(self):
+    """Return True iff LOD exists in the current revision."""
+
+    return self.ids[-1] is not None
+
+  def update(self, revnum, id):
+    """Indicate that the root node of this LOD changed to ID at REVNUM.
+
+    REVNUM is a revision number that must be the same as that of the
+    previous recorded change (in which case the previous change is
+    overwritten) or later (in which the new change is appended).
+
+    ID can be a node ID, or it can be None to indicate that this LOD
+    ceased to exist in REVNUM."""
+
+    if revnum < self.revnums[-1]:
+      raise KeyError()
+    elif revnum == self.revnums[-1]:
+      # This is an attempt to overwrite an entry that was already
+      # updated during this revision.  Don't allow the replacement
+      # None -> None or allow one new id to be replaced with another:
+      old_id = self.ids[-1]
+      if old_id is None and id is None:
+        raise InternalError(
+            'ID changed from None -> None for %s, r%d' % (self.lod, revnum,)
+            )
+      elif (old_id is not None and id is not None
+            and old_id in self._mirror._new_nodes):
+        raise InternalError(
+            'ID changed from %x -> %x for %s, r%d'
+            % (old_id, id, self.lod, revnum,)
+            )
+      self.ids[-1] = id
+    else:
+      self.revnums.append(revnum)
+      self.ids.append(id)
+
+
+class _NodeDatabase(object):
+  """A database storing all of the directory nodes.
+
+  The nodes are written in groups every time write_new_nodes() is
+  called.  To the database is written a dictionary {node_id :
+  [(cvs_path.id, node_id),...]}, where the keys are the node_ids of
+  the new nodes.  When a node is read, its whole group is read and
+  cached under the assumption that the other nodes in the group are
+  likely to be needed soon.  The cache is retained across revisions
+  and cleared when _cache_max_size is exceeded.
+
+  The dictionaries for nodes that have been read from the database
+  during the current revision are cached by node_id in the _cache
+  member variable.  The corresponding dictionaries are *not* copied
+  when read.  To avoid cross-talk between distinct MirrorDirectory
+  instances that have the same node_id, users of these dictionaries
+  have to copy them before modification."""
+
+  # How many entries should be allowed in the cache for each
+  # CVSDirectory in the repository.  (This number is very roughly the
+  # number of complete lines of development that can be stored in the
+  # cache at one time.)
+  CACHE_SIZE_MULTIPLIER = 5
+
+  # But the cache will never be limited to less than this number:
+  MIN_CACHE_LIMIT = 5000
+
+  def __init__(self):
+    self.cvs_file_db = Ctx()._cvs_file_db
+    self.db = IndexedDatabase(
+        artifact_manager.get_temp_file(config.MIRROR_NODES_STORE),
+        artifact_manager.get_temp_file(config.MIRROR_NODES_INDEX_TABLE),
+        DB_OPEN_NEW, serializer=MarshalSerializer(),
+        )
+
+    # A list of the maximum node_id stored by each call to
+    # write_new_nodes():
+    self._max_node_ids = [0]
+
+    # A map {node_id : {cvs_path : node_id}}:
+    self._cache = {}
+
+    # The number of directories in the repository:
+    num_dirs = len([
+        cvs_path
+        for cvs_path in self.cvs_file_db.itervalues()
+        if isinstance(cvs_path, CVSDirectory)
+        ])
+
+    self._cache_max_size = max(
+        int(self.CACHE_SIZE_MULTIPLIER * num_dirs),
+        self.MIN_CACHE_LIMIT,
+        )
+
+  def _load(self, items):
+    retval = {}
+    for (id, value) in items:
+      retval[self.cvs_file_db.get_file(id)] = value
+    return retval
+
+  def _dump(self, node):
+    return [
+        (cvs_path.id, value)
+        for (cvs_path, value) in node.iteritems()
+        ]
+
+  def _determine_index(self, id):
+    """Return the index of the record holding the node with ID."""
+
+    return bisect.bisect_left(self._max_node_ids, id)
+
+  def __getitem__(self, id):
+    try:
+      items = self._cache[id]
+    except KeyError:
+      index = self._determine_index(id)
+      for (node_id, items) in self.db[index].items():
+        self._cache[node_id] = self._load(items)
+      items = self._cache[id]
+
+    return items
+
+  def write_new_nodes(self, nodes):
+    """Write NODES to the database.
+
+    NODES is an iterable of writable CurrentMirrorDirectory instances."""
+
+    if len(self._cache) > self._cache_max_size:
+      # The size of the cache has exceeded the threshold.  Discard the
+      # old cache values (but still store the new nodes into the
+      # cache):
+      Log().debug('Clearing node cache')
+      self._cache.clear()
+
+    data = {}
+    max_node_id = 0
+    for node in nodes:
+      max_node_id = max(max_node_id, node.id)
+      data[node.id] = self._dump(node._entries)
+      self._cache[node.id] = node._entries
+
+    self.db[len(self._max_node_ids)] = data
+
+    if max_node_id == 0:
+      # Rewrite last value:
+      self._max_node_ids.append(self._max_node_ids[-1])
+    else:
+      self._max_node_ids.append(max_node_id)
+
+  def close(self):
+    self._cache.clear()
+    self.db.close()
+    self.db = None
+
+
+class RepositoryMirror:
+  """Mirror a repository and its history.
+
+  Mirror a repository as it is constructed, one revision at a time.
+  For each LineOfDevelopment we store a skeleton of the directory
+  structure within that LOD for each revnum in which it changed.
+
+  For each LOD that has been seen so far, an LODHistory instance is
+  stored in self._lod_histories.  An LODHistory keeps track of each
+  revnum in which files were added to or deleted from that LOD, as
+  well as the node id of the root of the node tree describing the LOD
+  contents at that revision.
+
+  The LOD trees themselves are stored in the _node_db database, which
+  maps node ids to nodes.  A node is a map from CVSPath to ids of the
+  corresponding subnodes.  The _node_db is stored on disk and each
+  access is expensive.
+
+  The _node_db database only holds the nodes for old revisions.  The
+  revision that is being constructed is kept in memory in the
+  _new_nodes map, which is cheap to access.
+
+  You must invoke start_commit() before each commit and end_commit()
+  afterwards."""
+
+  def register_artifacts(self, which_pass):
+    """Register the artifacts that will be needed for this object."""
+
+    artifact_manager.register_temp_file(
+        config.MIRROR_NODES_INDEX_TABLE, which_pass
+        )
+    artifact_manager.register_temp_file(
+        config.MIRROR_NODES_STORE, which_pass
+        )
+
+  def open(self):
+    """Set up the RepositoryMirror and prepare it for commits."""
+
+    self._key_generator = KeyGenerator()
+
+    # A map from LOD to LODHistory instance for all LODs that have
+    # been referenced so far:
+    self._lod_histories = {}
+
+    # This corresponds to the 'nodes' table in a Subversion fs.  (We
+    # don't need a 'representations' or 'strings' table because we
+    # only track file existence, not file contents.)
+    self._node_db = _NodeDatabase()
+
+    # Start at revision 0 without a root node.
+    self._youngest = 0
+
+  def start_commit(self, revnum):
+    """Start a new commit."""
+
+    assert revnum > self._youngest
+    self._youngest = revnum
+
+    # A map {node_id : _WritableMirrorDirectoryMixin}.
+    self._new_nodes = {}
+
+  def end_commit(self):
+    """Called at the end of each commit.
+
+    This method copies the newly created nodes to the on-disk nodes
+    db."""
+
+    # Copy the new nodes to the _node_db
+    self._node_db.write_new_nodes([
+        node
+        for node in self._new_nodes.values()
+        if not isinstance(node, DeletedCurrentMirrorDirectory)
+        ])
+
+    del self._new_nodes
+
+  def _get_lod_history(self, lod):
+    """Return the LODHistory instance describing LOD.
+
+    Create a new (empty) LODHistory if it doesn't yet exist."""
+
+    try:
+      return self._lod_histories[lod]
+    except KeyError:
+      lod_history = LODHistory(self, lod)
+      self._lod_histories[lod] = lod_history
+      return lod_history
+
+  def get_old_lod_directory(self, lod, revnum):
+    """Return the directory for the root path of LOD at revision REVNUM.
+
+    Return an instance of MirrorDirectory if the path exists;
+    otherwise, raise KeyError."""
+
+    lod_history = self._get_lod_history(lod)
+    id = lod_history.get_id(revnum)
+    return OldMirrorDirectory(self, id, self._node_db[id])
+
+  def get_old_path(self, cvs_path, lod, revnum):
+    """Return the node for CVS_PATH from LOD at REVNUM.
+
+    If CVS_PATH is a CVSDirectory, then return an instance of
+    OldMirrorDirectory.  If CVS_PATH is a CVSFile, return None.
+
+    If CVS_PATH does not exist in the specified LOD and REVNUM, raise
+    KeyError."""
+
+    node = self.get_old_lod_directory(lod, revnum)
+
+    for sub_path in cvs_path.get_ancestry()[1:]:
+      node = node[sub_path]
+
+    return node
+
+  def get_current_lod_directory(self, lod):
+    """Return the directory for the root path of LOD in the current revision.
+
+    Return an instance of CurrentMirrorDirectory.  Raise KeyError if
+    the path doesn't already exist."""
+
+    lod_history = self._get_lod_history(lod)
+    id = lod_history.get_current_id()
+    try:
+      return self._new_nodes[id]
+    except KeyError:
+      return _CurrentMirrorReadOnlyLODDirectory(
+          self, id, lod, self._node_db[id]
+          )
+
+  def get_current_path(self, cvs_path, lod):
+    """Return the node for CVS_PATH from LOD in the current revision.
+
+    If CVS_PATH is a CVSDirectory, then return an instance of
+    CurrentMirrorDirectory.  If CVS_PATH is a CVSFile, return None.
+
+    If CVS_PATH does not exist in the current revision of the
+    specified LOD, raise KeyError."""
+
+    node = self.get_current_lod_directory(lod)
+
+    for sub_path in cvs_path.get_ancestry()[1:]:
+      node = node[sub_path]
+
+    return node
+
+  def add_lod(self, lod):
+    """Create a new LOD in this repository.
+
+    Return the CurrentMirrorDirectory that was created.  If the LOD
+    already exists, raise LODExistsError."""
+
+    lod_history = self._get_lod_history(lod)
+    if lod_history.exists():
+      raise LODExistsError(
+          'Attempt to create %s in repository mirror when it already exists.'
+          % (lod,)
+          )
+    new_node = _CurrentMirrorWritableLODDirectory(
+        self, self._key_generator.gen_id(), lod, {}
+        )
+    lod_history.update(self._youngest, new_node.id)
+    self._new_nodes[new_node.id] = new_node
+    return new_node
+
+  def copy_lod(self, src_lod, dest_lod, src_revnum):
+    """Copy all of SRC_LOD at SRC_REVNUM to DST_LOD.
+
+    In the youngest revision of the repository, the destination LOD
+    *must not* already exist.
+
+    Return the new node at DEST_LOD, as a CurrentMirrorDirectory."""
+
+    # Get the node of our src_path
+    src_node = self.get_old_lod_directory(src_lod, src_revnum)
+
+    dest_lod_history = self._get_lod_history(dest_lod)
+    if dest_lod_history.exists():
+      raise LODExistsError(
+          'Attempt to copy to %s in repository mirror when it already exists.'
+          % (dest_lod,)
+          )
+
+    dest_lod_history.update(self._youngest, src_node.id)
+
+    # Return src_node, except packaged up as a CurrentMirrorDirectory:
+    return self.get_current_lod_directory(dest_lod)
+
+  def close(self):
+    """Free resources and close databases."""
+
+    self._lod_histories = None
+    self._node_db.close()
+    self._node_db = None
+
+
diff --git a/cvs2svn_lib/revision_manager.py b/cvs2svn_lib/revision_manager.py
new file mode 100644
index 0000000..8af7c74
--- /dev/null
+++ b/cvs2svn_lib/revision_manager.py
@@ -0,0 +1,189 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module describes the interface to the CVS repository."""
+
+
+class RevisionRecorder:
+  """An object that can record text and deltas from CVS files."""
+
+  def __init__(self):
+    """Initialize the RevisionRecorder.
+
+    Please note that a RevisionRecorder is instantiated in every
+    program run, even if the data-collection pass will not be
+    executed.  (This is to allow it to register the artifacts that it
+    produces.)  Therefore, the __init__() method should not do much,
+    and more substantial preparation for use (like actually creating
+    the artifacts) should be done in start()."""
+
+    pass
+
+  def register_artifacts(self, which_pass):
+    """Register artifacts that will be needed during data recording.
+
+    WHICH_PASS is the pass that will call our callbacks, so it should
+    be used to do the registering (e.g., call
+    WHICH_PASS.register_temp_file() and/or
+    WHICH_PASS.register_temp_file_needed())."""
+
+    pass
+
+  def start(self):
+    """Data will soon start being collected.
+
+    Any non-idempotent initialization should be done here."""
+
+    pass
+
+  def start_file(self, cvs_file_items):
+    """Prepare to receive data for the file with the specified CVS_FILE_ITEMS.
+
+    CVS_FILE_ITEMS is an instance of CVSFileItems describing the file
+    dependency topology right after the file tree was parsed out of
+    the RCS file.  (I.e., it reflects the original CVS dependency
+    structure.)  Please note that the CVSFileItems instance will be
+    changed later."""
+
+    pass
+
+  def record_text(self, cvs_rev, log, text):
+    """Record information about a revision and optionally return a token.
+
+    CVS_REV is a CVSRevision instance describing a revision that has
+    log message LOG and text TEXT (as retrieved from the RCS file).
+    (TEXT is full text for the HEAD revision, and deltas for other
+    revisions.)"""
+
+    raise NotImplementedError()
+
+  def finish_file(self, cvs_file_items):
+    """The current file is finished; finish and clean up.
+
+    CVS_FILE_ITEMS is a CVSFileItems instance describing the file's
+    items at the end of processing of the RCS file in CollectRevsPass.
+    It may be modified relative to the CVS_FILE_ITEMS instance passed
+    to the corresponding start_file() call (revisions might be
+    deleted, topology changed, etc)."""
+
+    pass
+
+  def finish(self):
+    """All recording is done; clean up."""
+
+    pass
+
+
+class NullRevisionRecorder(RevisionRecorder):
+  """A do-nothing variety of RevisionRecorder."""
+
+  def record_text(self, cvs_rev, log, text):
+    return None
+
+
+class RevisionExcluder:
+  """An interface for informing a RevisionReader about excluded revisions.
+
+  Currently, revisions can be excluded via the --exclude option and
+  various fixups for CVS peculiarities.  This interface can be used to
+  inform the associated RevisionReader about CVSItems that are being
+  excluded.  (The recorder might use that information to free some
+  temporary data or adjust its expectations about which revisions will
+  later be read.)"""
+
+  def __init__(self):
+    """Initialize the RevisionExcluder.
+
+    Please note that a RevisionExcluder is instantiated in every
+    program run, even if the branch-exclusion pass will not be
+    executed.  (This is to allow its register_artifacts() method to be
+    called.)  Therefore, the __init__() method should not do much, and
+    more substantial preparation for use (like actually creating the
+    artifacts) should be done in start()."""
+
+    pass
+
+  def register_artifacts(self, which_pass):
+    """Register artifacts that will be needed during branch exclusion.
+
+    WHICH_PASS is the pass that will call our callbacks, so it should
+    be used to do the registering (e.g., call
+    WHICH_PASS.register_temp_file() and/or
+    WHICH_PASS.register_temp_file_needed())."""
+
+    pass
+
+  def start(self):
+    """Prepare to handle branch exclusions."""
+
+    pass
+
+  def process_file(self, cvs_file_items):
+    """Called for files whose trees were modified in FilterSymbolsPass.
+
+    This callback is called once for each CVSFile whose topology was
+    modified in FilterSymbolsPass."""
+
+    raise NotImplementedError()
+
+  def finish(self):
+    """Called after all branch exclusions for all files are done."""
+
+    pass
+
+
+class NullRevisionExcluder(RevisionExcluder):
+  """A do-nothing variety of RevisionExcluder."""
+
+  def process_file(self, cvs_file_items):
+    pass
+
+
+class RevisionReader(object):
+  """An object that can read the contents of CVSRevisions."""
+
+  def register_artifacts(self, which_pass):
+    """Register artifacts that will be needed during branch exclusion.
+
+    WHICH_PASS is the pass that will call our callbacks, so it should
+    be used to do the registering (e.g., call
+    WHICH_PASS.register_temp_file() and/or
+    WHICH_PASS.register_temp_file_needed())."""
+
+    pass
+
+  def start(self):
+    """Prepare for calls to get_content_stream."""
+
+    pass
+
+  def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False):
+    """Return a file-like object from which the contents of CVS_REV
+    can be read.
+
+    CVS_REV is a CVSRevision.  If SUPPRESS_KEYWORD_SUBSTITUTION is
+    True, then suppress the substitution of RCS/CVS keywords in the
+    output."""
+
+    raise NotImplementedError
+
+  def finish(self):
+    """Inform the reader that all calls to get_content_stream are done.
+    Start may be called again at a later point."""
+
+    pass
+
+
diff --git a/cvs2svn_lib/run_options.py b/cvs2svn_lib/run_options.py
new file mode 100644
index 0000000..27d2ea6
--- /dev/null
+++ b/cvs2svn_lib/run_options.py
@@ -0,0 +1,1035 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes to set common cvs2xxx run options."""
+
+import sys
+import re
+import optparse
+from optparse import OptionGroup
+import time
+
+from cvs2svn_lib.version import VERSION
+from cvs2svn_lib import config
+from cvs2svn_lib.common import warning_prefix
+from cvs2svn_lib.common import error_prefix
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import CVSTextDecoder
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.man_writer import ManOption
+from cvs2svn_lib.pass_manager import InvalidPassError
+from cvs2svn_lib.symbol_strategy import AllBranchRule
+from cvs2svn_lib.symbol_strategy import AllTagRule
+from cvs2svn_lib.symbol_strategy import BranchIfCommitsRule
+from cvs2svn_lib.symbol_strategy import ExcludeRegexpStrategyRule
+from cvs2svn_lib.symbol_strategy import ForceBranchRegexpStrategyRule
+from cvs2svn_lib.symbol_strategy import ForceTagRegexpStrategyRule
+from cvs2svn_lib.symbol_strategy import ExcludeTrivialImportBranchRule
+from cvs2svn_lib.symbol_strategy import HeuristicStrategyRule
+from cvs2svn_lib.symbol_strategy import UnambiguousUsageRule
+from cvs2svn_lib.symbol_strategy import HeuristicPreferredParentRule
+from cvs2svn_lib.symbol_strategy import SymbolHintsFileRule
+from cvs2svn_lib.symbol_transform import ReplaceSubstringsSymbolTransform
+from cvs2svn_lib.symbol_transform import RegexpSymbolTransform
+from cvs2svn_lib.symbol_transform import NormalizePathsSymbolTransform
+from cvs2svn_lib.property_setters import AutoPropsPropertySetter
+from cvs2svn_lib.property_setters import CVSBinaryFileDefaultMimeTypeSetter
+from cvs2svn_lib.property_setters import CVSBinaryFileEOLStyleSetter
+from cvs2svn_lib.property_setters import CVSRevisionNumberSetter
+from cvs2svn_lib.property_setters import DefaultEOLStyleSetter
+from cvs2svn_lib.property_setters import EOLStyleFromMimeTypeSetter
+from cvs2svn_lib.property_setters import ExecutablePropertySetter
+from cvs2svn_lib.property_setters import KeywordsPropertySetter
+from cvs2svn_lib.property_setters import MimeMapper
+from cvs2svn_lib.property_setters import SVNBinaryFileKeywordsPropertySetter
+
+
+usage = """\
+Usage: %prog --options OPTIONFILE
+       %prog [OPTION...] OUTPUT-OPTION CVS-REPOS-PATH"""
+
+description="""\
+Convert a CVS repository into a Subversion repository, including history.
+"""
+
+authors = u"""\
+Main authors are:
+.br
+C. Michael Pilato <cmpilato@collab.net>
+.br
+Greg Stein <gstein@lyra.org>
+.br
+Branko \u010cibej <brane@xbc.nu>
+.br
+Blair Zajac <blair@orcaware.com>
+.br
+Max Bowsher <maxb@ukf.net>
+.br
+Brian Fitzpatrick <fitz@red-bean.com>
+.br
+Tobias Ringstr\u00f6m <tobias@ringstrom.mine.nu>
+.br
+Karl Fogel <kfogel@collab.net>
+.br
+Erik H\u00fclsmann <e.huelsmann@gmx.net>
+.br
+David Summers <david@summersoft.fay.ar.us>
+.br
+Michael Haggerty <mhagger@alum.mit.edu>
+.PP
+Manpage was written for the Debian GNU/Linux system by
+Laszlo 'GCS' Boszormenyi <gcs@lsc.hu> (but may be used by others).
+"""
+
+
+class IncompatibleOption(ManOption):
+  """A ManOption that is incompatible with the --options option.
+
+  Record that the option was used so that error checking can later be
+  done."""
+
+  def __init__(self, *args, **kw):
+    ManOption.__init__(self, *args, **kw)
+
+  def take_action(self, action, dest, opt, value, values, parser):
+    oio = parser.values.options_incompatible_options
+    if opt not in oio:
+      oio.append(opt)
+    return ManOption.take_action(
+        self, action, dest, opt, value, values, parser
+        )
+
+
+class ContextOption(ManOption):
+  """A ManOption that stores its value to Ctx."""
+
+  def __init__(self, *args, **kw):
+    if kw.get('action') not in self.STORE_ACTIONS:
+      raise ValueError('Invalid action: %s' % (kw['action'],))
+
+    self.__compatible_with_option = kw.pop('compatible_with_option', False)
+    self.__action = kw.pop('action')
+    try:
+      self.__dest = kw.pop('dest')
+    except KeyError:
+      opt = args[0]
+      if not opt.startswith('--'):
+        raise ValueError
+      self.__dest = opt[2:].replace('-', '_')
+    if 'const' in kw:
+      self.__const = kw.pop('const')
+
+    kw['action'] = 'callback'
+    kw['callback'] = self.__callback
+
+    ManOption.__init__(self, *args, **kw)
+
+  def __callback(self, option, opt_str, value, parser):
+    if not self.__compatible_with_option:
+      oio = parser.values.options_incompatible_options
+      if opt_str not in oio:
+        oio.append(opt_str)
+
+    action = self.__action
+    dest = self.__dest
+
+    if action == "store":
+        setattr(Ctx(), dest, value)
+    elif action == "store_const":
+        setattr(Ctx(), dest, self.__const)
+    elif action == "store_true":
+        setattr(Ctx(), dest, True)
+    elif action == "store_false":
+        setattr(Ctx(), dest, False)
+    elif action == "append":
+        getattr(Ctx(), dest).append(value)
+    elif action == "count":
+        setattr(Ctx(), dest, getattr(Ctx(), dest, 0) + 1)
+    else:
+        raise RuntimeError("unknown action %r" % self.__action)
+
+    return 1
+
+
+class IncompatibleOptionsException(FatalError):
+  pass
+
+
+# Options that are not allowed to be used with --trunk-only:
+SYMBOL_OPTIONS = [
+    '--symbol-transform',
+    '--symbol-hints',
+    '--force-branch',
+    '--force-tag',
+    '--exclude',
+    '--keep-trivial-imports',
+    '--symbol-default',
+    '--no-cross-branch-commits',
+    ]
+
+class SymbolOptionsWithTrunkOnlyException(IncompatibleOptionsException):
+  def __init__(self):
+    IncompatibleOptionsException.__init__(
+        self,
+        'The following symbol-related options cannot be used together\n'
+        'with --trunk-only:\n'
+        '    %s'
+        % ('\n    '.join(SYMBOL_OPTIONS),)
+        )
+
+
+def not_both(opt1val, opt1name, opt2val, opt2name):
+  """Raise an exception if both opt1val and opt2val are set."""
+  if opt1val and opt2val:
+    raise IncompatibleOptionsException(
+        "cannot pass both '%s' and '%s'." % (opt1name, opt2name,)
+        )
+
+
+class RunOptions(object):
+  """A place to store meta-options that are used to start the conversion."""
+
+  def __init__(self, progname, cmd_args, pass_manager):
+    """Process the command-line options, storing run options to SELF.
+
+    PROGNAME is the name of the program, used in the usage string.
+    CMD_ARGS is the list of command-line arguments passed to the
+    program.  PASS_MANAGER is an instance of PassManager, needed to
+    help process the -p and --help-passes options."""
+
+    self.progname = progname
+    self.cmd_args = cmd_args
+    self.pass_manager = pass_manager
+    self.start_pass = 1
+    self.end_pass = self.pass_manager.num_passes
+    self.profiling = False
+
+    self.projects = []
+
+    # A list of one list of SymbolStrategyRules for each project:
+    self.project_symbol_strategy_rules = []
+
+    parser = self.parser = optparse.OptionParser(
+        usage=usage,
+        description=self.get_description(),
+        add_help_option=False,
+        )
+    # A place to record any options used that are incompatible with
+    # --options:
+    parser.set_default('options_incompatible_options', [])
+
+    # Populate the options parser with the options, one group at a
+    # time:
+    parser.add_option_group(self._get_options_file_options_group())
+    parser.add_option_group(self._get_output_options_group())
+    parser.add_option_group(self._get_conversion_options_group())
+    parser.add_option_group(self._get_symbol_handling_options_group())
+    parser.add_option_group(self._get_subversion_properties_options_group())
+    parser.add_option_group(self._get_extraction_options_group())
+    parser.add_option_group(self._get_environment_options_group())
+    parser.add_option_group(self._get_partial_conversion_options_group())
+    parser.add_option_group(self._get_information_options_group())
+
+    (self.options, self.args) = parser.parse_args(args=self.cmd_args)
+
+    # Now the log level has been set; log the time when the run started:
+    Log().verbose(
+        time.strftime(
+            'Conversion start time: %Y-%m-%d %I:%M:%S %Z',
+            time.localtime(Log().start_time)
+            )
+        )
+
+    if self.options.options_file_found:
+      # Check that no options that are incompatible with --options
+      # were used:
+      self.verify_option_compatibility()
+    else:
+      # --options was not specified.  So do the main initialization
+      # based on other command-line options:
+      self.process_options()
+
+    # Check for problems with the options:
+    self.check_options()
+
+  def get_description(self):
+    return description
+
+  def _get_options_file_options_group(self):
+    group = OptionGroup(
+        self.parser, 'Configuration via options file'
+        )
+    self.parser.set_default('options_file_found', False)
+    group.add_option(ManOption(
+        '--options', type='string',
+        action='callback', callback=self.callback_options,
+        help=(
+            'read the conversion options from PATH.  This '
+            'method allows more flexibility than using '
+            'command-line options.  See documentation for info'
+            ),
+        man_help=(
+            'Read the conversion options from \\fIpath\\fR instead of from '
+            'the command line.  This option allows far more conversion '
+            'flexibility than can be achieved using the command-line alone. '
+            'See the documentation for more information.  Only the following '
+            'command-line options are allowed in combination with '
+            '\\fB--options\\fR: \\fB-h\\fR/\\fB--help\\fR, '
+            '\\fB--help-passes\\fR, \\fB--version\\fR, '
+            '\\fB-v\\fR/\\fB--verbose\\fR, \\fB-q\\fR/\\fB--quiet\\fR, '
+            '\\fB-p\\fR/\\fB--pass\\fR/\\fB--passes\\fR, \\fB--dry-run\\fR, '
+            '\\fB--profile\\fR, \\fB--sort\\fR, \\fB--trunk-only\\fR, '
+            '\\fB--encoding\\fR, and \\fB--fallback-encoding\\fR. '
+            'Options are processed in the order specified on the command '
+            'line.'
+            ),
+        metavar='PATH',
+        ))
+    return group
+
+  def _get_output_options_group(self):
+    group = OptionGroup(self.parser, 'Output options')
+    return group
+
+  def _get_conversion_options_group(self):
+    group = OptionGroup(self.parser, 'Conversion options')
+    group.add_option(ContextOption(
+        '--trunk-only',
+        action='store_true',
+        compatible_with_option=True,
+        help='convert only trunk commits, not tags nor branches',
+        man_help=(
+            'Convert only trunk commits, not tags nor branches.'
+            ),
+        ))
+    group.add_option(ManOption(
+        '--encoding', type='string',
+        action='callback', callback=self.callback_encoding,
+        help=(
+            'encoding for paths and log messages in CVS repos.  '
+            'If option is specified multiple times, encoders '
+            'are tried in order until one succeeds.  See '
+            'http://docs.python.org/lib/standard-encodings.html '
+            'for a list of standard Python encodings.'
+            ),
+        man_help=(
+            'Use \\fIencoding\\fR as the encoding for filenames, log '
+            'messages, and author names in the CVS repos.  This option '
+            'may be specified multiple times, in which case the encodings '
+            'are tried in order until one succeeds.  Default: ascii.  See '
+            'http://docs.python.org/lib/standard-encodings.html for a list '
+            'of other standard encodings.'
+            ),
+        metavar='ENC',
+        ))
+    group.add_option(ManOption(
+        '--fallback-encoding', type='string',
+        action='callback', callback=self.callback_fallback_encoding,
+        help='If all --encodings fail, use lossy encoding with ENC',
+        man_help=(
+            'If none of the encodings specified with \\fB--encoding\\fR '
+            'succeed in decoding an author name or log message, then fall '
+            'back to using \\fIencoding\\fR in lossy \'replace\' mode. '
+            'Use of this option may cause information to be lost, but at '
+            'least it allows the conversion to run to completion.  This '
+            'option only affects the encoding of log messages and author '
+            'names; there is no fallback encoding for filenames.  (By '
+            'using an \\fB--options\\fR file, it is possible to specify '
+            'a fallback encoding for filenames.)  Default: disabled.'
+            ),
+        metavar='ENC',
+        ))
+    group.add_option(ContextOption(
+        '--retain-conflicting-attic-files',
+        action='store_true',
+        help=(
+            'if a file appears both in and out of '
+            'the CVS Attic, then leave the attic version in a '
+            'SVN directory called "Attic"'
+            ),
+        man_help=(
+            'If a file appears both inside an outside of the CVS attic, '
+            'retain the attic version in an SVN subdirectory called '
+            '\'Attic\'.  (Normally this situation is treated as a fatal '
+            'error.)'
+            ),
+        ))
+
+    return group
+
+  def _get_symbol_handling_options_group(self):
+    group = OptionGroup(self.parser, 'Symbol handling')
+    self.parser.set_default('symbol_transforms', [])
+    group.add_option(IncompatibleOption(
+        '--symbol-transform', type='string',
+        action='callback', callback=self.callback_symbol_transform,
+        help=(
+            'transform symbol names from P to S, where P and S '
+            'use Python regexp and reference syntax '
+            'respectively.  P must match the whole symbol name'
+            ),
+        man_help=(
+            'Transform RCS/CVS symbol names before entering them into '
+            'Subversion. \\fIpattern\\fR is a Python regexp pattern that '
+            'is matches against the entire symbol name; \\fIreplacement\\fR '
+            'is a replacement using Python\'s regexp reference syntax. '
+            'You may specify any number of these options; they will be '
+            'applied in the order given on the command line.'
+            ),
+        metavar='P:S',
+        ))
+    self.parser.set_default('symbol_strategy_rules', [])
+    group.add_option(IncompatibleOption(
+        '--symbol-hints', type='string',
+        action='callback', callback=self.callback_symbol_hints,
+        help='read symbol conversion hints from PATH',
+        man_help=(
+            'Read symbol conversion hints from \\fIpath\\fR.  The format of '
+            '\\fIpath\\fR is the same as the format output by '
+            '\\fB--write-symbol-info\\fR, namely a text file with four '
+            'whitespace-separated columns: \\fIproject-id\\fR, '
+            '\\fIsymbol\\fR, \\fIconversion\\fR, and '
+            '\\fIparent-lod-name\\fR.  \\fIproject-id\\fR is the numerical '
+            'ID of the project to which the symbol belongs, counting from '
+            '0. \\fIproject-id\\fR can be set to \'.\' if '
+            'project-specificity is not needed.  \\fIsymbol-name\\fR is the '
+            'name of the symbol being specified.  \\fIconversion\\fR '
+            'specifies how the symbol should be converted, and can be one '
+            'of the values \'branch\', \'tag\', or \'exclude\'. If '
+            '\\fIconversion\\fR is \'.\', then this rule does not affect '
+            'how the symbol is converted.  \\fIparent-lod-name\\fR is the '
+            'name of the symbol from which this symbol should sprout, or '
+            '\'.trunk.\' if the symbol should sprout from trunk.  If '
+            '\\fIparent-lod-name\\fR is omitted or \'.\', then this rule '
+            'does not affect the preferred parent of this symbol. The file '
+            'may contain blank lines or comment lines (lines whose first '
+            'non-whitespace character is \'#\').'
+            ),
+        metavar='PATH',
+        ))
+    self.parser.set_default('symbol_default', 'heuristic')
+    group.add_option(IncompatibleOption(
+        '--symbol-default', type='choice',
+        choices=['heuristic', 'strict', 'branch', 'tag'],
+        action='store',
+        help=(
+            'specify how ambiguous symbols are converted.  '
+            'OPT is "heuristic" (default), "strict", "branch", '
+            'or "tag"'
+            ),
+        man_help=(
+            'Specify how to convert ambiguous symbols (those that appear in '
+            'the CVS archive as both branches and tags).  \\fIopt\\fR must '
+            'be \'heuristic\' (decide how to treat each ambiguous symbol '
+            'based on whether it was used more often as a branch/tag in '
+            'CVS), \'strict\' (no default; every ambiguous symbol has to be '
+            'resolved manually using \\fB--force-branch\\fR, '
+            '\\fB--force-tag\\fR, or \\fB--exclude\\fR), \'branch\' (treat '
+            'every ambiguous symbol as a branch), or \'tag\' (treat every '
+            'ambiguous symbol as a tag).  The default is \'heuristic\'.'
+            ),
+        metavar='OPT',
+        ))
+    group.add_option(IncompatibleOption(
+        '--force-branch', type='string',
+        action='callback', callback=self.callback_force_branch,
+        help='force symbols matching REGEXP to be branches',
+        man_help=(
+            'Force symbols whose names match \\fIregexp\\fR to be branches. '
+            '\\fIregexp\\fR must match the whole symbol name.'
+            ),
+        metavar='REGEXP',
+        ))
+    group.add_option(IncompatibleOption(
+        '--force-tag', type='string',
+        action='callback', callback=self.callback_force_tag,
+        help='force symbols matching REGEXP to be tags',
+        man_help=(
+            'Force symbols whose names match \\fIregexp\\fR to be tags. '
+            '\\fIregexp\\fR must match the whole symbol name.'
+            ),
+        metavar='REGEXP',
+        ))
+    group.add_option(IncompatibleOption(
+        '--exclude', type='string',
+        action='callback', callback=self.callback_exclude,
+        help='exclude branches and tags matching REGEXP',
+        man_help=(
+            'Exclude branches and tags whose names match \\fIregexp\\fR '
+            'from the conversion.  \\fIregexp\\fR must match the whole '
+            'symbol name.'
+            ),
+        metavar='REGEXP',
+        ))
+    self.parser.set_default('keep_trivial_imports', False)
+    group.add_option(IncompatibleOption(
+        '--keep-trivial-imports',
+        action='store_true',
+        help=(
+            'do not exclude branches that were only used for '
+            'a single import (usually these are unneeded)'
+            ),
+        man_help=(
+            'Do not exclude branches that were only used for a single '
+            'import. (By default such branches are excluded because they '
+            'are usually created by the inappropriate use of \\fBcvs '
+            'import\\fR.)'
+            ),
+        ))
+
+    return group
+
+  def _get_subversion_properties_options_group(self):
+    group = OptionGroup(self.parser, 'Subversion properties')
+    group.add_option(ContextOption(
+        '--username', type='string',
+        action='store',
+        help='username for cvs2svn-synthesized commits',
+        man_help=(
+            'Set the default username to \\fIname\\fR when cvs2svn needs '
+            'to generate a commit for which CVS does not record the '
+            'original username. This happens when a branch or tag is '
+            'created. The default is to use no author at all for such '
+            'commits.'
+            ),
+        metavar='NAME',
+        ))
+    self.parser.set_default('auto_props_files', [])
+    group.add_option(IncompatibleOption(
+        '--auto-props', type='string',
+        action='append', dest='auto_props_files',
+        help=(
+            'set file properties from the auto-props section '
+            'of a file in svn config format'
+            ),
+        man_help=(
+            'Specify a file in the format of Subversion\'s config file, '
+            'whose [auto-props] section can be used to set arbitrary '
+            'properties on files in the Subversion repository based on '
+            'their filenames. (The [auto-props] section header must be '
+            'present; other sections of the config file, including the '
+            'enable-auto-props setting, are ignored.) Filenames are matched '
+            'to the filename patterns case-insensitively.'
+
+            ),
+        metavar='FILE',
+        ))
+    self.parser.set_default('mime_types_files', [])
+    group.add_option(IncompatibleOption(
+        '--mime-types', type='string',
+        action='append', dest='mime_types_files',
+        help=(
+            'specify an apache-style mime.types file for setting '
+            'svn:mime-type'
+            ),
+        man_help=(
+            'Specify an apache-style mime.types \\fIfile\\fR for setting '
+            'svn:mime-type.'
+            ),
+        metavar='FILE',
+        ))
+    self.parser.set_default('eol_from_mime_type', False)
+    group.add_option(IncompatibleOption(
+        '--eol-from-mime-type',
+        action='store_true',
+        help='set svn:eol-style from mime type if known',
+        man_help=(
+            'For files that don\'t have the kb expansion mode but have a '
+            'known mime type, set the eol-style based on the mime type. '
+            'For such files, set svn:eol-style to "native" if the mime type '
+            'begins with "text/", and leave it unset (i.e., no EOL '
+            'translation) otherwise. Files with unknown mime types are '
+            'not affected by this option.  This option has no effect '
+            'unless the \\fB--mime-types\\fR option is also specified.'
+            ),
+        ))
+    group.add_option(IncompatibleOption(
+        '--default-eol', type='choice',
+        choices=['binary', 'native', 'CRLF', 'LF', 'CR'],
+        action='store',
+        help=(
+            'default svn:eol-style for non-binary files with '
+            'undetermined mime types.  STYLE is "binary" '
+            '(default), "native", "CRLF", "LF", or "CR"'
+            ),
+        man_help=(
+            'Set svn:eol-style to \\fIstyle\\fR for files that don\'t have '
+            'the CVS \'kb\' expansion mode and whose end-of-line '
+            'translation mode hasn\'t been determined by one of the other '
+            'options. \\fIstyle\\fR must be \'binary\' (default), '
+            '\'native\', \'CRLF\', \'LF\', or \'CR\'.'
+            ),
+        metavar='STYLE',
+        ))
+    self.parser.set_default('keywords_off', False)
+    group.add_option(IncompatibleOption(
+        '--keywords-off',
+        action='store_true',
+        help=(
+            'don\'t set svn:keywords on any files (by default, '
+            'cvs2svn sets svn:keywords on non-binary files to "%s")'
+            % (config.SVN_KEYWORDS_VALUE,)
+            ),
+        man_help=(
+            'By default, cvs2svn sets svn:keywords on CVS files to "author '
+            'id date" if the mode of the RCS file in question is either kv, '
+            'kvl or unset. If you use the --keywords-off switch, cvs2svn '
+            'will not set svn:keywords for any file. While this will not '
+            'touch the keywords in the contents of your files, Subversion '
+            'will not expand them.'
+            ),
+        ))
+    group.add_option(ContextOption(
+        '--keep-cvsignore',
+        action='store_true',
+        help=(
+            'keep .cvsignore files (in addition to creating '
+            'the analogous svn:ignore properties)'
+            ),
+        man_help=(
+            'Include \\fI.cvsignore\\fR files in the output.  (Normally '
+            'they are unneeded because cvs2svn sets the corresponding '
+            '\\fIsvn:ignore\\fR properties.)'
+            ),
+        ))
+    group.add_option(IncompatibleOption(
+        '--cvs-revnums',
+        action='callback', callback=self.callback_cvs_revnums,
+        help='record CVS revision numbers as file properties',
+        man_help=(
+            'Record CVS revision numbers as file properties in the '
+            'Subversion repository. (Note that unless it is removed '
+            'explicitly, the last CVS revision number will remain '
+            'associated with the file even after the file is changed '
+            'within Subversion.)'
+            ),
+        ))
+
+    # Deprecated options:
+    group.add_option(IncompatibleOption(
+        '--no-default-eol',
+        action='store_const', dest='default_eol', const=None,
+        help=optparse.SUPPRESS_HELP,
+        man_help=optparse.SUPPRESS_HELP,
+        ))
+    self.parser.set_default('auto_props_ignore_case', True)
+    # True is the default now, so this option has no effect:
+    group.add_option(IncompatibleOption(
+        '--auto-props-ignore-case',
+        action='store_true',
+        help=optparse.SUPPRESS_HELP,
+        man_help=optparse.SUPPRESS_HELP,
+        ))
+
+    return group
+
+  def _get_extraction_options_group(self):
+    group = OptionGroup(self.parser, 'Extraction options')
+
+    return group
+
+  def _get_environment_options_group(self):
+    group = OptionGroup(self.parser, 'Environment options')
+    group.add_option(ContextOption(
+        '--tmpdir', type='string',
+        action='store',
+        help=(
+            'directory to use for temporary data files '
+            '(default "cvs2svn-tmp")'
+            ),
+        man_help=(
+            'Set the \\fIpath\\fR to use for temporary data. Default '
+            'is a directory called \\fIcvs2svn-tmp\\fR under the current '
+            'directory.'
+            ),
+        metavar='PATH',
+        ))
+    self.parser.set_default('co_executable', config.CO_EXECUTABLE)
+    group.add_option(IncompatibleOption(
+        '--co', type='string',
+        action='store', dest='co_executable',
+        help='path to the "co" program (required if --use-rcs)',
+        man_help=(
+            'Path to the \\fIco\\fR program.  (\\fIco\\fR is needed if the '
+            '\\fB--use-rcs\\fR option is used.)'
+            ),
+        metavar='PATH',
+        ))
+    self.parser.set_default('cvs_executable', config.CVS_EXECUTABLE)
+    group.add_option(IncompatibleOption(
+        '--cvs', type='string',
+        action='store', dest='cvs_executable',
+        help='path to the "cvs" program (required if --use-cvs)',
+        man_help=(
+            'Path to the \\fIcvs\\fR program.  (\\fIcvs\\fR is needed if the '
+            '\\fB--use-cvs\\fR option is used.)'
+            ),
+        metavar='PATH',
+        ))
+    group.add_option(ContextOption(
+        '--sort', type='string',
+        action='store', dest='sort_executable',
+        compatible_with_option=True,
+        help='path to the GNU "sort" program',
+        man_help=(
+            'Path to the GNU \\fIsort\\fR program.  (cvs2svn requires GNU '
+            'sort.)'
+            ),
+        metavar='PATH',
+        ))
+
+    return group
+
+  def _get_partial_conversion_options_group(self):
+    group = OptionGroup(self.parser, 'Partial conversions')
+    group.add_option(ManOption(
+        '--pass', type='string',
+        action='callback', callback=self.callback_passes,
+        help='execute only specified PASS of conversion',
+        man_help=(
+            'Execute only pass \\fIpass\\fR of the conversion. '
+            '\\fIpass\\fR can be specified by name or by number (see '
+            '\\fB--help-passes\\fR).'
+            ),
+        metavar='PASS',
+        ))
+    group.add_option(ManOption(
+        '--passes', '-p', type='string',
+        action='callback', callback=self.callback_passes,
+        help=(
+            'execute passes START through END, inclusive (PASS, '
+            'START, and END can be pass names or numbers)'
+            ),
+        man_help=(
+            'Execute passes \\fIstart\\fR through \\fIend\\fR of the '
+            'conversion (inclusive). \\fIstart\\fR and \\fIend\\fR can be '
+            'specified by name or by number (see \\fB--help-passes\\fR). '
+            'If \\fIstart\\fR or \\fIend\\fR is missing, it defaults to '
+            'the first or last pass, respectively. For this to work the '
+            'earlier passes must have been completed before on the '
+            'same CVS repository, and the generated data files must be '
+            'in the temporary directory (see \\fB--tmpdir\\fR).'
+            ),
+        metavar='[START]:[END]',
+        ))
+
+    return group
+
+  def _get_information_options_group(self):
+    group = OptionGroup(self.parser, 'Information options')
+    group.add_option(ManOption(
+        '--version',
+        action='callback', callback=self.callback_version,
+        help='print the version number',
+        man_help='Print the version number.',
+        ))
+    group.add_option(ManOption(
+        '--help', '-h',
+        action="help",
+        help='print this usage message and exit with success',
+        man_help='Print the usage message and exit with success.',
+        ))
+    group.add_option(ManOption(
+        '--help-passes',
+        action='callback', callback=self.callback_help_passes,
+        help='list the available passes and their numbers',
+        man_help=(
+            'Print the numbers and names of the conversion passes and '
+            'exit with success.'
+            ),
+        ))
+    group.add_option(ManOption(
+        '--man',
+        action='callback', callback=self.callback_manpage,
+        help='write the manpage for this program to standard output',
+        man_help=(
+            'Output the unix-style manpage for this program to standard '
+            'output.'
+            ),
+        ))
+    group.add_option(ManOption(
+        '--verbose', '-v',
+        action='callback', callback=self.callback_verbose,
+        help='verbose (may be specified twice for debug output)',
+        man_help=(
+            'Print more information while running. This option may be '
+            'specified twice to output voluminous debugging information.'
+            ),
+        ))
+    group.add_option(ManOption(
+        '--quiet', '-q',
+        action='callback', callback=self.callback_quiet,
+        help='quiet (may be specified twice for very quiet)',
+        man_help=(
+            'Print less information while running. This option may be '
+            'specified twice to suppress all non-error output.'
+            ),
+        ))
+    group.add_option(ContextOption(
+        '--write-symbol-info', type='string',
+        action='store', dest='symbol_info_filename',
+        help='write information and statistics about CVS symbols to PATH.',
+        man_help=(
+            'Write to \\fIpath\\fR symbol statistics and information about '
+            'how symbols were converted during CollateSymbolsPass.'
+            ),
+        metavar='PATH',
+        ))
+    group.add_option(ContextOption(
+        '--skip-cleanup',
+        action='store_true',
+        help='prevent the deletion of intermediate files',
+        man_help='Prevent the deletion of temporary files.',
+        ))
+    group.add_option(ManOption(
+        '--profile',
+        action='callback', callback=self.callback_profile,
+        help='profile with \'hotshot\' (into file cvs2svn.hotshot)',
+        man_help=(
+            'Profile with \'hotshot\' (into file \\fIcvs2svn.hotshot\\fR).'
+            ),
+        ))
+
+    return group
+
+  def callback_options(self, option, opt_str, value, parser):
+    parser.values.options_file_found = True
+    self.process_options_file(value)
+
+  def callback_encoding(self, option, opt_str, value, parser):
+    ctx = Ctx()
+
+    try:
+      ctx.cvs_author_decoder.add_encoding(value)
+      ctx.cvs_log_decoder.add_encoding(value)
+      ctx.cvs_filename_decoder.add_encoding(value)
+    except LookupError, e:
+      raise FatalError(str(e))
+
+  def callback_fallback_encoding(self, option, opt_str, value, parser):
+    ctx = Ctx()
+
+    try:
+      ctx.cvs_author_decoder.set_fallback_encoding(value)
+      ctx.cvs_log_decoder.set_fallback_encoding(value)
+      # Don't use fallback_encoding for filenames.
+    except LookupError, e:
+      raise FatalError(str(e))
+
+  def callback_help_passes(self, option, opt_str, value, parser):
+    self.pass_manager.help_passes()
+    sys.exit(0)
+
+  def callback_manpage(self, option, opt_str, value, parser):
+    raise NotImplementedError()
+
+  def callback_version(self, option, opt_str, value, parser):
+    sys.stdout.write(
+        '%s version %s\n' % (self.progname, VERSION)
+        )
+    sys.exit(0)
+
+  def callback_verbose(self, option, opt_str, value, parser):
+    Log().increase_verbosity()
+
+  def callback_quiet(self, option, opt_str, value, parser):
+    Log().decrease_verbosity()
+
+  def callback_passes(self, option, opt_str, value, parser):
+    if value.find(':') >= 0:
+      start_pass, end_pass = value.split(':')
+      self.start_pass = self.pass_manager.get_pass_number(start_pass, 1)
+      self.end_pass = self.pass_manager.get_pass_number(
+          end_pass, self.pass_manager.num_passes
+          )
+    else:
+      self.end_pass = \
+          self.start_pass = \
+          self.pass_manager.get_pass_number(value)
+
+  def callback_profile(self, option, opt_str, value, parser):
+    self.profiling = True
+
+  def callback_symbol_hints(self, option, opt_str, value, parser):
+    parser.values.symbol_strategy_rules.append(SymbolHintsFileRule(value))
+
+  def callback_force_branch(self, option, opt_str, value, parser):
+    parser.values.symbol_strategy_rules.append(
+        ForceBranchRegexpStrategyRule(value)
+        )
+
+  def callback_force_tag(self, option, opt_str, value, parser):
+    parser.values.symbol_strategy_rules.append(
+        ForceTagRegexpStrategyRule(value)
+        )
+
+  def callback_exclude(self, option, opt_str, value, parser):
+    parser.values.symbol_strategy_rules.append(
+        ExcludeRegexpStrategyRule(value)
+        )
+
+  def callback_cvs_revnums(self, option, opt_str, value, parser):
+    Ctx().svn_property_setters.append(CVSRevisionNumberSetter())
+
+  def callback_symbol_transform(self, option, opt_str, value, parser):
+    [pattern, replacement] = value.split(":")
+    try:
+      parser.values.symbol_transforms.append(
+          RegexpSymbolTransform(pattern, replacement)
+          )
+    except re.error:
+      raise FatalError("'%s' is not a valid regexp." % (pattern,))
+
+  def process_symbol_strategy_options(self):
+    """Process symbol strategy-related options."""
+
+    ctx = Ctx()
+    options = self.options
+
+    # Add the standard symbol name cleanup rules:
+    self.options.symbol_transforms.extend([
+        ReplaceSubstringsSymbolTransform('\\','/'),
+        # Remove leading, trailing, and repeated slashes:
+        NormalizePathsSymbolTransform(),
+        ])
+
+    if ctx.trunk_only:
+      if options.symbol_strategy_rules or options.keep_trivial_imports:
+        raise SymbolOptionsWithTrunkOnlyException()
+
+    else:
+      if not options.keep_trivial_imports:
+        options.symbol_strategy_rules.append(ExcludeTrivialImportBranchRule())
+
+      options.symbol_strategy_rules.append(UnambiguousUsageRule())
+      if options.symbol_default == 'strict':
+        pass
+      elif options.symbol_default == 'branch':
+        options.symbol_strategy_rules.append(AllBranchRule())
+      elif options.symbol_default == 'tag':
+        options.symbol_strategy_rules.append(AllTagRule())
+      elif options.symbol_default == 'heuristic':
+        options.symbol_strategy_rules.append(BranchIfCommitsRule())
+        options.symbol_strategy_rules.append(HeuristicStrategyRule())
+      else:
+        assert False
+
+      # Now add a rule whose job it is to pick the preferred parents of
+      # branches and tags:
+      options.symbol_strategy_rules.append(HeuristicPreferredParentRule())
+
+  def process_property_setter_options(self):
+    """Process the options that set SVN properties."""
+
+    ctx = Ctx()
+    options = self.options
+
+    for value in options.auto_props_files:
+      ctx.svn_property_setters.append(
+          AutoPropsPropertySetter(value, options.auto_props_ignore_case)
+          )
+
+    for value in options.mime_types_files:
+      ctx.svn_property_setters.append(MimeMapper(value))
+
+    ctx.svn_property_setters.append(CVSBinaryFileEOLStyleSetter())
+
+    ctx.svn_property_setters.append(CVSBinaryFileDefaultMimeTypeSetter())
+
+    if options.eol_from_mime_type:
+      ctx.svn_property_setters.append(EOLStyleFromMimeTypeSetter())
+
+    ctx.svn_property_setters.append(
+        DefaultEOLStyleSetter(options.default_eol)
+        )
+
+    ctx.svn_property_setters.append(SVNBinaryFileKeywordsPropertySetter())
+
+    if not options.keywords_off:
+      ctx.svn_property_setters.append(
+          KeywordsPropertySetter(config.SVN_KEYWORDS_VALUE))
+
+    ctx.svn_property_setters.append(ExecutablePropertySetter())
+
+  def process_options(self):
+    """Do the main configuration based on command-line options.
+
+    This method is only called if the --options option was not
+    specified."""
+
+    raise NotImplementedError()
+
+  def check_options(self):
+    """Check the the run options are OK.
+
+    This should only be called after all options have been processed."""
+
+    # Convenience var, so we don't have to keep instantiating this Borg.
+    ctx = Ctx()
+
+    if not self.start_pass <= self.end_pass:
+      raise InvalidPassError(
+          'Ending pass must not come before starting pass.')
+
+    if not ctx.dry_run and ctx.output_option is None:
+      raise FatalError('No output option specified.')
+
+    if ctx.output_option is not None:
+      ctx.output_option.check()
+
+    if not self.projects:
+      raise FatalError('No project specified.')
+
+  def verify_option_compatibility(self):
+    """Verify that no options incompatible with --options were used.
+
+    The --options option was specified.  Verify that no incompatible
+    options or arguments were specified."""
+
+    if self.options.options_incompatible_options or self.args:
+      if self.options.options_incompatible_options:
+        oio = self.options.options_incompatible_options
+        Log().error(
+            '%s: The following options cannot be used in combination with '
+            'the --options\n'
+            'option:\n'
+            '    %s\n'
+            % (error_prefix, '\n    '.join(oio))
+            )
+      if self.args:
+        Log().error(
+            '%s: No cvs-repos-path arguments are allowed with the --options '
+            'option.\n'
+            % (error_prefix,)
+            )
+      sys.exit(1)
+
+  def process_options_file(self, options_filename):
+    """Read options from the file named OPTIONS_FILENAME.
+
+    Store the run options to SELF."""
+
+    g = {
+      'ctx' : Ctx(),
+      'run_options' : self,
+      }
+    execfile(options_filename, g)
+
+  def usage(self):
+    self.parser.print_help()
+
+
diff --git a/cvs2svn_lib/serializer.py b/cvs2svn_lib/serializer.py
new file mode 100644
index 0000000..24bd81c
--- /dev/null
+++ b/cvs2svn_lib/serializer.py
@@ -0,0 +1,146 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Picklers and unpicklers that are primed with known objects."""
+
+
+import cStringIO
+import marshal
+import cPickle
+import zlib
+
+
+class Serializer:
+  """An object able to serialize/deserialize some class of objects."""
+
+  def dumpf(self, f, object):
+    """Serialize OBJECT to file-like object F."""
+
+    raise NotImplementedError()
+
+  def dumps(self, object):
+    """Return a string containing OBJECT in serialized form."""
+
+    raise NotImplementedError()
+
+  def loadf(self, f):
+    """Return the next object deserialized from file-like object F."""
+
+    raise NotImplementedError()
+
+  def loads(self, s):
+    """Return the object deserialized from string S."""
+
+    raise NotImplementedError()
+
+
+class MarshalSerializer(Serializer):
+  """This class uses the marshal module to serialize/deserialize.
+
+  This means that it shares the limitations of the marshal module,
+  namely only being able to serialize a few simple python data types
+  without reference loops."""
+
+  def dumpf(self, f, object):
+    marshal.dump(object, f)
+
+  def dumps(self, object):
+    return marshal.dumps(object)
+
+  def loadf(self, f):
+    return marshal.load(f)
+
+  def loads(self, s):
+    return marshal.loads(s)
+
+
+class PrimedPickleSerializer(Serializer):
+  """This class acts as a pickler/unpickler with a pre-initialized memo.
+
+  The picklers and unpicklers are 'pre-trained' to recognize the
+  objects that are in the primer.  If objects are recognized
+  from PRIMER, then only their persistent IDs need to be pickled
+  instead of the whole object.  (Note that the memos needed for
+  pickling and unpickling are different.)
+
+  A new pickler/unpickler is created for each use, each time with the
+  memo initialized appropriately for pickling or unpickling."""
+
+  def __init__(self, primer):
+    """Prepare to make picklers/unpicklers with the specified primer.
+
+    The Pickler and Unpickler are 'primed' by pre-pickling PRIMER,
+    which can be an arbitrary object (e.g., a list of objects that are
+    expected to occur frequently in the objects to be serialized)."""
+
+    f = cStringIO.StringIO()
+    pickler = cPickle.Pickler(f, -1)
+    pickler.dump(primer)
+    self.pickler_memo = pickler.memo
+
+    unpickler = cPickle.Unpickler(cStringIO.StringIO(f.getvalue()))
+    unpickler.load()
+    self.unpickler_memo = unpickler.memo
+
+  def dumpf(self, f, object):
+    """Serialize OBJECT to file-like object F."""
+
+    pickler = cPickle.Pickler(f, -1)
+    pickler.memo = self.pickler_memo.copy()
+    pickler.dump(object)
+
+  def dumps(self, object):
+    """Return a string containing OBJECT in serialized form."""
+
+    f = cStringIO.StringIO()
+    self.dumpf(f, object)
+    return f.getvalue()
+
+  def loadf(self, f):
+    """Return the next object deserialized from file-like object F."""
+
+    unpickler = cPickle.Unpickler(f)
+    unpickler.memo = self.unpickler_memo.copy()
+    return unpickler.load()
+
+  def loads(self, s):
+    """Return the object deserialized from string S."""
+
+    return self.loadf(cStringIO.StringIO(s))
+
+
+class CompressingSerializer(Serializer):
+  """This class wraps other Serializers to compress their serialized data."""
+
+  def __init__(self, wrapee):
+    """Constructor.  WRAPEE is the Serializer whose bitstream ought to be
+    compressed."""
+
+    self.wrapee = wrapee
+
+  def dumpf(self, f, object):
+    marshal.dump(zlib.compress(self.wrapee.dumps(object), 9), f)
+
+  def dumps(self, object):
+    return marshal.dumps(zlib.compress(self.wrapee.dumps(object), 9))
+
+  def loadf(self, f):
+    return self.wrapee.loads(zlib.decompress(marshal.load(f)))
+
+  def loads(self, s):
+    return self.wrapee.loads(zlib.decompress(marshal.loads(s)))
+
+
diff --git a/cvs2svn_lib/stats_keeper.py b/cvs2svn_lib/stats_keeper.py
new file mode 100644
index 0000000..1a82540
--- /dev/null
+++ b/cvs2svn_lib/stats_keeper.py
@@ -0,0 +1,189 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains the StatsKeeper class.
+
+A StatsKeeper can pickle itself to a STATISTICS_FILE.  This module
+also includes a function to read a StatsKeeper from a STATISTICS_FILE."""
+
+
+import time
+import cPickle
+from cStringIO import StringIO
+
+from cvs2svn_lib.cvs_item import CVSRevision
+from cvs2svn_lib.cvs_item import CVSBranch
+from cvs2svn_lib.cvs_item import CVSTag
+
+
+class StatsKeeper:
+  def __init__(self):
+    self._svn_rev_count = None
+    self._first_rev_date = 1L<<32
+    self._last_rev_date = 0
+    self._pass_timings = { }
+    self._stats_reflect_exclude = False
+    self.reset_cvs_rev_info()
+
+  def log_duration_for_pass(self, duration, pass_num, pass_name):
+    self._pass_timings[pass_num] = (pass_name, duration,)
+
+  def set_stats_reflect_exclude(self, value):
+    self._stats_reflect_exclude = value
+
+  def reset_cvs_rev_info(self):
+    self._repos_file_count = 0
+    self._repos_size = 0
+    self._cvs_revs_count = 0
+    self._cvs_branches_count = 0
+    self._cvs_tags_count = 0
+
+    # A set of tag_ids seen:
+    self._tag_ids = set()
+
+    # A set of branch_ids seen:
+    self._branch_ids = set()
+
+  def record_cvs_file(self, cvs_file):
+    self._repos_file_count += 1
+    self._repos_size += cvs_file.file_size
+
+  def _record_cvs_rev(self, cvs_rev):
+    self._cvs_revs_count += 1
+
+    if cvs_rev.timestamp < self._first_rev_date:
+      self._first_rev_date = cvs_rev.timestamp
+
+    if cvs_rev.timestamp > self._last_rev_date:
+      self._last_rev_date = cvs_rev.timestamp
+
+  def _record_cvs_branch(self, cvs_branch):
+    self._cvs_branches_count += 1
+    self._branch_ids.add(cvs_branch.symbol.id)
+
+  def _record_cvs_tag(self, cvs_tag):
+    self._cvs_tags_count += 1
+    self._tag_ids.add(cvs_tag.symbol.id)
+
+  def record_cvs_item(self, cvs_item):
+    if isinstance(cvs_item, CVSRevision):
+      self._record_cvs_rev(cvs_item)
+    elif isinstance(cvs_item, CVSBranch):
+      self._record_cvs_branch(cvs_item)
+    elif isinstance(cvs_item, CVSTag):
+      self._record_cvs_tag(cvs_item)
+    else:
+      raise RuntimeError('Unknown CVSItem type')
+
+  def set_svn_rev_count(self, count):
+    self._svn_rev_count = count
+
+  def svn_rev_count(self):
+    return self._svn_rev_count
+
+  def __getstate__(self):
+    state = self.__dict__.copy()
+    # This can get kinda large, so we don't store it:
+    return state
+
+  def archive(self, filename):
+    f = open(filename, 'wb')
+    cPickle.dump(self, f)
+    f.close()
+
+  def __str__(self):
+    f = StringIO()
+    f.write('\n')
+    f.write('cvs2svn Statistics:\n')
+    f.write('------------------\n')
+    f.write('Total CVS Files:        %10i\n' % (self._repos_file_count,))
+    f.write('Total CVS Revisions:    %10i\n' % (self._cvs_revs_count,))
+    f.write('Total CVS Branches:     %10i\n' % (self._cvs_branches_count,))
+    f.write('Total CVS Tags:         %10i\n' % (self._cvs_tags_count,))
+    f.write('Total Unique Tags:      %10i\n' % (len(self._tag_ids),))
+    f.write('Total Unique Branches:  %10i\n' % (len(self._branch_ids),))
+    f.write('CVS Repos Size in KB:   %10i\n' % ((self._repos_size / 1024),))
+
+    if self._svn_rev_count is not None:
+      f.write('Total SVN Commits:      %10i\n' % self._svn_rev_count)
+
+    f.write(
+        'First Revision Date:    %s\n' % (time.ctime(self._first_rev_date),)
+        )
+    f.write(
+        'Last Revision Date:     %s\n' % (time.ctime(self._last_rev_date),)
+        )
+    f.write('------------------')
+
+    if not self._stats_reflect_exclude:
+      f.write(
+          '\n'
+          '(These are unaltered CVS repository stats and do not\n'
+          ' reflect tags or branches excluded via --exclude)\n'
+          )
+
+    return f.getvalue()
+
+  @staticmethod
+  def _get_timing_format(value):
+    # Output times with up to 3 decimal places:
+    decimals = max(0, 4 - len('%d' % int(value)))
+    length = len(('%%.%df' % decimals) % value)
+    return '%%%d.%df' % (length, decimals,)
+
+  def single_pass_timing(self, pass_num):
+    (pass_name, duration,) = self._pass_timings[pass_num]
+    format = self._get_timing_format(duration)
+    time_string = format % (duration,)
+    return (
+        'Time for pass%d (%s): %s seconds.'
+        % (pass_num, pass_name, time_string,)
+        )
+
+  def timings(self):
+    passes = self._pass_timings.keys()
+    passes.sort()
+    f = StringIO()
+    f.write('Timings (seconds):\n')
+    f.write('------------------\n')
+
+    total = 0.0
+    for pass_num in passes:
+      (pass_name, duration,) = self._pass_timings[pass_num]
+      total += duration
+
+    format = self._get_timing_format(total)
+
+    for pass_num in passes:
+      (pass_name, duration,) = self._pass_timings[pass_num]
+      f.write(
+          (format + '   pass%-2d   %s\n') % (duration, pass_num, pass_name,)
+          )
+
+    f.write((format + '   total') % total)
+    return f.getvalue()
+
+
+def read_stats_keeper(filename):
+  """Factory function: Return a _StatsKeeper instance.
+
+  Read the instance from FILENAME as written by StatsKeeper.archive()."""
+
+  f = open(filename, 'rb')
+  retval = cPickle.load(f)
+  f.close()
+  return retval
+
diff --git a/cvs2svn_lib/stdout_delegate.py b/cvs2svn_lib/stdout_delegate.py
new file mode 100644
index 0000000..2b4e228
--- /dev/null
+++ b/cvs2svn_lib/stdout_delegate.py
@@ -0,0 +1,107 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains database facilities used by cvs2svn."""
+
+
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.svn_repository_delegate import SVNRepositoryDelegate
+
+
+class StdoutDelegate(SVNRepositoryDelegate):
+  """Makes no changes to the disk, but writes out information to
+  STDOUT about what is happening in the SVN output.  Of course, our
+  print statements will state that we're doing something, when in
+  reality, we aren't doing anything other than printing out that we're
+  doing something.  Kind of zen, really."""
+
+  def __init__(self, total_revs):
+    self.total_revs = total_revs
+
+  def start_commit(self, revnum, revprops):
+    """Prints out the Subversion revision number of the commit that is
+    being started."""
+
+    Log().verbose("=" * 60)
+    Log().normal("Starting Subversion r%d / %d" % (revnum, self.total_revs))
+
+  def end_commit(self):
+    pass
+
+  def initialize_project(self, project):
+    Log().verbose("  Initializing project %s" % (project,))
+
+  def initialize_lod(self, lod):
+    Log().verbose("  Initializing %s" % (lod,))
+
+  def mkdir(self, lod, cvs_directory):
+    Log().verbose(
+        "  New Directory %s" % (lod.get_path(cvs_directory.cvs_path),)
+        )
+
+  def add_path(self, s_item):
+    """Print a line stating what path we are 'adding'."""
+
+    Log().verbose("  Adding %s" % (s_item.cvs_rev.get_svn_path(),))
+
+  def change_path(self, s_item):
+    """Print a line stating what path we are 'changing'."""
+
+    Log().verbose("  Changing %s" % (s_item.cvs_rev.get_svn_path(),))
+
+  def delete_lod(self, lod):
+    """Print a line stating that we are 'deleting' LOD."""
+
+    Log().verbose("  Deleting %s" % (lod.get_path(),))
+
+  def delete_path(self, lod, cvs_path):
+    """Print a line stating that we are 'deleting' PATH."""
+
+    Log().verbose("  Deleting %s" % (lod.get_path(cvs_path.cvs_path),))
+
+  def _show_copy(self, src_path, dest_path, src_revnum):
+    """Print a line stating that we are 'copying' revision SRC_REVNUM
+    of SRC_PATH to DEST_PATH."""
+
+    Log().verbose(
+        "  Copying revision %d of %s\n"
+        "                to %s\n"
+        % (src_revnum, src_path, dest_path,)
+        )
+
+  def copy_lod(self, src_lod, dest_lod, src_revnum):
+    """Print a line stating that we are 'copying' revision SRC_REVNUM
+    of SRC_PATH to DEST_PATH."""
+
+    self._show_copy(src_lod.get_path(), dest_lod.get_path(), src_revnum)
+
+  def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum):
+    """Print a line stating that we are 'copying' revision SRC_REVNUM
+    of CVS_PATH from SRC_LOD to DEST_LOD."""
+
+    self._show_copy(
+        src_lod.get_path(cvs_path.cvs_path),
+        dest_lod.get_path(cvs_path.cvs_path),
+        src_revnum,
+        )
+
+  def finish(self):
+    """State that we are done creating our repository."""
+
+    Log().verbose("Finished creating Subversion repository.")
+    Log().quiet("Done.")
+
+
diff --git a/cvs2svn_lib/svn_commit.py b/cvs2svn_lib/svn_commit.py
new file mode 100644
index 0000000..25dc38e
--- /dev/null
+++ b/cvs2svn_lib/svn_commit.py
@@ -0,0 +1,381 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains the SVNCommit classes.
+
+There are five types of SVNCommits:
+
+  SVNInitialProjectCommit -- Initializes a project (creates its trunk,
+      branches, and tags directories).
+
+  SVNPrimaryCommit -- Commits one or more CVSRevisions on one or more
+      lines of development.
+
+  SVNBranchCommit -- Creates or fills a branch; that is, copies files
+      from a source line of development to a target branch.
+
+  SVNTagCommit -- Creates or fills a tag; that is, copies files from a
+      source line of development to a target tag.
+
+  SVNPostCommit -- Updates trunk to reflect changes on a non-trunk
+      default branch.
+
+"""
+
+
+import textwrap
+
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.symbol import Branch
+from cvs2svn_lib.symbol import Tag
+
+
+class SVNCommit:
+  """This represents one commit to the Subversion Repository."""
+
+  # textwrap.TextWrapper instance to be used for wrapping log messages:
+  text_wrapper = textwrap.TextWrapper(width=76)
+
+  def __init__(self, date, revnum):
+    """Instantiate an SVNCommit.
+
+    REVNUM is the SVN revision number of this commit."""
+
+    # The date of the commit, as an integer.  While the SVNCommit is
+    # being built up, this contains the latest date seen so far.  This
+    # member is set externally.
+    self.date = date
+
+    # The SVN revision number of this commit, as an integer.
+    self.revnum = revnum
+
+  def __getstate__(self):
+    return (self.date, self.revnum,)
+
+  def __setstate__(self, state):
+    (self.date, self.revnum,) = state
+
+  def get_cvs_items(self):
+    """Return a list containing the CVSItems in this commit."""
+
+    raise NotImplementedError()
+
+  def get_author(self):
+    """Return the author or this commit, or None if none is to be used.
+
+    The return value is exactly as the author appeared in the RCS
+    file, with undefined character encoding."""
+
+    raise NotImplementedError()
+
+  def get_log_msg(self):
+    """Return a log message for this commit.
+
+    The return value is exactly as the log message appeared in the RCS
+    file, with undefined character encoding."""
+
+    raise NotImplementedError()
+
+  def get_warning_summary(self):
+    """Return a summary of this commit that can be used in warnings."""
+
+    return '(subversion rev %s)' % (self.revnum,)
+
+  def get_description(self):
+    """Return a partial description of this SVNCommit, for logging."""
+
+    raise NotImplementedError()
+
+  def output(self, output_option):
+    """Cause this commit to be output to OUTPUT_OPTION.
+
+    This method is used for double-dispatch.  Derived classes should
+    call the OutputOption.process_*_commit() method appropriate for
+    the type of SVNCommit."""
+
+    raise NotImplementedError()
+
+  def __str__(self):
+    """ Print a human-readable description of this SVNCommit.
+
+    This description is not intended to be machine-parseable."""
+
+    ret = "SVNCommit #: " + str(self.revnum) + "\n"
+    ret += "   debug description: " + self.get_description() + "\n"
+    return ret
+
+
+class SVNInitialProjectCommit(SVNCommit):
+  def __init__(self, date, projects, revnum):
+    SVNCommit.__init__(self, date, revnum)
+    self.projects = list(projects)
+
+  def __getstate__(self):
+    return (
+        SVNCommit.__getstate__(self),
+        [project.id for project in self.projects],
+        )
+
+  def __setstate__(self, state):
+    (svn_commit_state, project_ids,) = state
+    SVNCommit.__setstate__(self, svn_commit_state)
+    self.projects = [
+        Ctx()._projects[project_id] for project_id in project_ids
+        ]
+
+  def get_cvs_items(self):
+    return []
+
+  def get_author(self):
+    return Ctx().username
+
+  def get_log_msg(self):
+    return self.text_wrapper.fill(
+        Ctx().initial_project_commit_message % {}
+        )
+
+  def get_description(self):
+    return 'Project initialization'
+
+  def output(self, output_option):
+    output_option.process_initial_project_commit(self)
+
+
+class SVNRevisionCommit(SVNCommit):
+  """A SVNCommit that includes actual CVS revisions."""
+
+  def __init__(self, cvs_revs, date, revnum):
+    SVNCommit.__init__(self, date, revnum)
+
+    self.cvs_revs = list(cvs_revs)
+
+    # This value is set lazily by _get_metadata():
+    self._metadata = None
+
+  def __getstate__(self):
+    """Return the part of the state represented by this mixin."""
+
+    return (
+        SVNCommit.__getstate__(self),
+        [cvs_rev.id for cvs_rev in self.cvs_revs],
+        )
+
+  def __setstate__(self, state):
+    """Restore the part of the state represented by this mixin."""
+
+    (svn_commit_state, cvs_rev_ids) = state
+    SVNCommit.__setstate__(self, svn_commit_state)
+
+    self.cvs_revs = [
+        cvs_rev
+        for (id, cvs_rev) in Ctx()._cvs_items_db.get_many(cvs_rev_ids)
+        ]
+    self._metadata = None
+
+  def get_cvs_items(self):
+    return self.cvs_revs
+
+  def _get_metadata(self):
+    """Return the Metadata instance for this commit."""
+
+    if self._metadata is None:
+      # Set self._metadata for this commit from that of the first cvs
+      # revision.
+      if not self.cvs_revs:
+        raise InternalError('SVNPrimaryCommit contains no CVS revisions')
+
+      metadata_id = self.cvs_revs[0].metadata_id
+      self._metadata = Ctx()._metadata_db[metadata_id]
+
+    return self._metadata
+
+  def get_author(self):
+    return self._get_metadata().author
+
+  def get_warning_summary(self):
+    retval = []
+    retval.append(SVNCommit.get_warning_summary(self) + '  Related files:')
+    for cvs_rev in self.cvs_revs:
+      retval.append('  ' + cvs_rev.cvs_file.filename)
+    return '\n'.join(retval)
+
+  def __str__(self):
+    """Return the revision part of a description of this SVNCommit.
+
+    Derived classes should append the output of this method to the
+    output of SVNCommit.__str__()."""
+
+    ret = []
+    ret.append(SVNCommit.__str__(self))
+    ret.append('   cvs_revs:\n')
+    for cvs_rev in self.cvs_revs:
+      ret.append('     %x\n' % (cvs_rev.id,))
+    return ''.join(ret)
+
+
+class SVNPrimaryCommit(SVNRevisionCommit):
+  def __init__(self, cvs_revs, date, revnum):
+    SVNRevisionCommit.__init__(self, cvs_revs, date, revnum)
+
+  def get_log_msg(self):
+    """Return the actual log message for this commit."""
+
+    return self._get_metadata().log_msg
+
+  def get_description(self):
+    return 'commit'
+
+  def output(self, output_option):
+    output_option.process_primary_commit(self)
+
+
+class SVNPostCommit(SVNRevisionCommit):
+  def __init__(self, motivating_revnum, cvs_revs, date, revnum):
+    SVNRevisionCommit.__init__(self, cvs_revs, date, revnum)
+
+    # The subversion revision number of the *primary* commit where the
+    # default branch changes actually happened.  (NOTE: Secondary
+    # commits that fill branches and tags also have a motivating
+    # commit, but we do not record it because it is (currently) not
+    # needed for anything.)  motivating_revnum is used when generating
+    # the log message for the commit that synchronizes the default
+    # branch with trunk.
+    #
+    # It is possible for multiple synchronization commits to refer to
+    # the same motivating commit revision number, and it is possible
+    # for a single synchronization commit to contain CVSRevisions on
+    # multiple different default branches.
+    self.motivating_revnum = motivating_revnum
+
+  def __getstate__(self):
+    return (
+        SVNRevisionCommit.__getstate__(self),
+        self.motivating_revnum,
+        )
+
+  def __setstate__(self, state):
+    (rev_state, self.motivating_revnum,) = state
+    SVNRevisionCommit.__setstate__(self, rev_state)
+
+  def get_cvs_items(self):
+    # It might seem that we should return
+    # SVNRevisionCommit.get_cvs_items(self) here, but this commit
+    # doesn't really include those CVSItems, but rather followup
+    # commits to those.
+    return []
+
+  def get_log_msg(self):
+    """Return a manufactured log message for this commit."""
+
+    return self.text_wrapper.fill(
+        Ctx().post_commit_message % {'revnum' : self.motivating_revnum}
+        )
+
+  def get_description(self):
+    return 'post-commit default branch(es)'
+
+  def output(self, output_option):
+    output_option.process_post_commit(self)
+
+
+class SVNSymbolCommit(SVNCommit):
+  def __init__(self, symbol, cvs_symbol_ids, date, revnum):
+    SVNCommit.__init__(self, date, revnum)
+
+    # The TypedSymbol that is filled in this SVNCommit.
+    self.symbol = symbol
+
+    self.cvs_symbol_ids = cvs_symbol_ids
+
+  def __getstate__(self):
+    return (
+        SVNCommit.__getstate__(self),
+        self.symbol.id, self.cvs_symbol_ids,
+        )
+
+  def __setstate__(self, state):
+    (svn_commit_state, symbol_id, self.cvs_symbol_ids) = state
+    SVNCommit.__setstate__(self, svn_commit_state)
+    self.symbol = Ctx()._symbol_db.get_symbol(symbol_id)
+
+  def get_cvs_items(self):
+    return [
+        cvs_symbol
+        for (id, cvs_symbol)
+            in Ctx()._cvs_items_db.get_many(self.cvs_symbol_ids)
+        ]
+
+  def _get_symbol_type(self):
+    """Return the type of the self.symbol ('branch' or 'tag')."""
+
+    raise NotImplementedError()
+
+  def get_author(self):
+    return Ctx().username
+
+  def get_log_msg(self):
+    """Return a manufactured log message for this commit."""
+
+    return self.text_wrapper.fill(
+        Ctx().symbol_commit_message % {
+            'symbol_type' : self._get_symbol_type(),
+            'symbol_name' : self.symbol.name,
+            }
+        )
+
+  def get_description(self):
+    return 'copying to %s %r' % (self._get_symbol_type(), self.symbol.name,)
+
+  def __str__(self):
+    """ Print a human-readable description of this SVNCommit.
+
+    This description is not intended to be machine-parseable."""
+
+    return (
+        SVNCommit.__str__(self)
+        + "   symbolic name: %s\n" % (self.symbol.name,)
+        )
+
+
+class SVNBranchCommit(SVNSymbolCommit):
+  def __init__(self, symbol, cvs_symbol_ids, date, revnum):
+    if not isinstance(symbol, Branch):
+      raise InternalError('Incorrect symbol type %r' % (symbol,))
+
+    SVNSymbolCommit.__init__(self, symbol, cvs_symbol_ids, date, revnum)
+
+  def _get_symbol_type(self):
+    return 'branch'
+
+  def output(self, output_option):
+    output_option.process_branch_commit(self)
+
+
+class SVNTagCommit(SVNSymbolCommit):
+  def __init__(self, symbol, cvs_symbol_ids, date, revnum):
+    if not isinstance(symbol, Tag):
+      raise InternalError('Incorrect symbol type %r' % (symbol,))
+
+    SVNSymbolCommit.__init__(self, symbol, cvs_symbol_ids, date, revnum)
+
+  def _get_symbol_type(self):
+    return 'tag'
+
+  def output(self, output_option):
+    output_option.process_tag_commit(self)
+
+
diff --git a/cvs2svn_lib/svn_commit_creator.py b/cvs2svn_lib/svn_commit_creator.py
new file mode 100644
index 0000000..c87db38
--- /dev/null
+++ b/cvs2svn_lib/svn_commit_creator.py
@@ -0,0 +1,217 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains the SVNCommitCreator class."""
+
+
+import time
+
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.cvs_item import CVSRevisionNoop
+from cvs2svn_lib.cvs_item import CVSBranchNoop
+from cvs2svn_lib.cvs_item import CVSTagNoop
+from cvs2svn_lib.changeset import OrderedChangeset
+from cvs2svn_lib.changeset import BranchChangeset
+from cvs2svn_lib.changeset import TagChangeset
+from cvs2svn_lib.svn_commit import SVNInitialProjectCommit
+from cvs2svn_lib.svn_commit import SVNPrimaryCommit
+from cvs2svn_lib.svn_commit import SVNPostCommit
+from cvs2svn_lib.svn_commit import SVNBranchCommit
+from cvs2svn_lib.svn_commit import SVNTagCommit
+from cvs2svn_lib.key_generator import KeyGenerator
+
+
+class SVNCommitCreator:
+  """This class creates and yields SVNCommits via process_changeset()."""
+
+  def __init__(self):
+    # The revision number to assign to the next new SVNCommit.
+    self.revnum_generator = KeyGenerator()
+
+    # A set containing the Projects that have already been
+    # initialized:
+    self._initialized_projects = set()
+
+  def _post_commit(self, cvs_revs, motivating_revnum, timestamp):
+    """Generate any SVNCommits needed to follow CVS_REVS.
+
+    That is, handle non-trunk default branches.  A revision on a CVS
+    non-trunk default branch is visible in a default CVS checkout of
+    HEAD.  So we copy such commits over to Subversion's trunk so that
+    checking out SVN trunk gives the same output as checking out of
+    CVS's default branch."""
+
+    cvs_revs = [
+          cvs_rev
+          for cvs_rev in cvs_revs
+          if cvs_rev.ntdbr and not isinstance(cvs_rev, CVSRevisionNoop)
+          ]
+
+    if cvs_revs:
+      cvs_revs.sort(
+          lambda a, b: cmp(a.cvs_file.filename, b.cvs_file.filename)
+          )
+      # Generate an SVNCommit for all of our default branch cvs_revs.
+      yield SVNPostCommit(
+          motivating_revnum, cvs_revs, timestamp,
+          self.revnum_generator.gen_id(),
+          )
+
+  def _process_revision_changeset(self, changeset, timestamp):
+    """Process CHANGESET, using TIMESTAMP as the commit time.
+
+    Create and yield one or more SVNCommits in the process.  CHANGESET
+    must be an OrderedChangeset.  TIMESTAMP is used as the timestamp
+    for any resulting SVNCommits."""
+
+    if not changeset.cvs_item_ids:
+      Log().warn('Changeset has no items: %r' % changeset)
+      return
+
+    Log().verbose('-' * 60)
+    Log().verbose('CVS Revision grouping:')
+    Log().verbose('  Time: %s' % time.ctime(timestamp))
+
+    # Generate an SVNCommit unconditionally.  Even if the only change in
+    # this group of CVSRevisions is a deletion of an already-deleted
+    # file (that is, a CVS revision in state 'dead' whose predecessor
+    # was also in state 'dead'), the conversion will still generate a
+    # Subversion revision containing the log message for the second dead
+    # revision, because we don't want to lose that information.
+
+    cvs_revs = list(changeset.iter_cvs_items())
+    if cvs_revs:
+      cvs_revs.sort(lambda a, b: cmp(a.cvs_file.filename, b.cvs_file.filename))
+      svn_commit = SVNPrimaryCommit(
+          cvs_revs, timestamp, self.revnum_generator.gen_id()
+          )
+
+      yield svn_commit
+
+      for cvs_rev in cvs_revs:
+        Ctx()._symbolings_logger.log_revision(cvs_rev, svn_commit.revnum)
+
+      # Generate an SVNPostCommit if we have default branch revs.  If
+      # some of the revisions in this commit happened on a non-trunk
+      # default branch, then those files have to be copied into trunk
+      # manually after being changed on the branch (because the RCS
+      # "default branch" appears as head, i.e., trunk, in practice).
+      # Unfortunately, Subversion doesn't support copies with sources
+      # in the current txn.  All copies must be based in committed
+      # revisions.  Therefore, we generate the copies in a new
+      # revision.
+      for svn_post_commit in self._post_commit(
+            cvs_revs, svn_commit.revnum, timestamp
+            ):
+        yield svn_post_commit
+
+  def _process_tag_changeset(self, changeset, timestamp):
+    """Process TagChangeset CHANGESET, producing a SVNTagCommit.
+
+    Filter out CVSTagNoops.  If no CVSTags are left, don't generate a
+    SVNTagCommit."""
+
+    if Ctx().trunk_only:
+      raise InternalError(
+          'TagChangeset encountered during a --trunk-only conversion')
+
+    cvs_tag_ids = [
+        cvs_tag.id
+        for cvs_tag in changeset.iter_cvs_items()
+        if not isinstance(cvs_tag, CVSTagNoop)
+        ]
+    if cvs_tag_ids:
+      yield SVNTagCommit(
+          changeset.symbol, cvs_tag_ids, timestamp,
+          self.revnum_generator.gen_id(),
+          )
+    else:
+      Log().debug(
+          'Omitting %r because it contains only CVSTagNoops' % (changeset,)
+          )
+
+  def _process_branch_changeset(self, changeset, timestamp):
+    """Process BranchChangeset CHANGESET, producing a SVNBranchCommit.
+
+    Filter out CVSBranchNoops.  If no CVSBranches are left, don't
+    generate a SVNBranchCommit."""
+
+    if Ctx().trunk_only:
+      raise InternalError(
+          'BranchChangeset encountered during a --trunk-only conversion')
+
+    cvs_branches = [
+        cvs_branch
+        for cvs_branch in changeset.iter_cvs_items()
+        if not isinstance(cvs_branch, CVSBranchNoop)
+        ]
+    if cvs_branches:
+      svn_commit = SVNBranchCommit(
+          changeset.symbol,
+          [cvs_branch.id for cvs_branch in cvs_branches],
+          timestamp,
+          self.revnum_generator.gen_id(),
+          )
+      yield svn_commit
+      for cvs_branch in cvs_branches:
+        Ctx()._symbolings_logger.log_branch_revision(
+            cvs_branch, svn_commit.revnum
+            )
+    else:
+      Log().debug(
+          'Omitting %r because it contains only CVSBranchNoops' % (changeset,)
+          )
+
+  def process_changeset(self, changeset, timestamp):
+    """Process CHANGESET, using TIMESTAMP for all of its entries.
+
+    Return a generator that generates the resulting SVNCommits.
+
+    The changesets must be fed to this function in proper dependency
+    order."""
+
+    # First create any new projects that might be opened by the
+    # changeset:
+    projects_opened = \
+        changeset.get_projects_opened() - self._initialized_projects
+    if projects_opened:
+      if Ctx().cross_project_commits:
+        yield SVNInitialProjectCommit(
+            timestamp, projects_opened, self.revnum_generator.gen_id()
+            )
+      else:
+        for project in projects_opened:
+          yield SVNInitialProjectCommit(
+              timestamp, [project], self.revnum_generator.gen_id()
+              )
+      self._initialized_projects.update(projects_opened)
+
+    if isinstance(changeset, OrderedChangeset):
+      for svn_commit \
+              in self._process_revision_changeset(changeset, timestamp):
+        yield svn_commit
+    elif isinstance(changeset, TagChangeset):
+      for svn_commit in self._process_tag_changeset(changeset, timestamp):
+        yield svn_commit
+    elif isinstance(changeset, BranchChangeset):
+      for svn_commit in self._process_branch_changeset(changeset, timestamp):
+        yield svn_commit
+    else:
+      raise TypeError('Illegal changeset %r' % changeset)
+
+
diff --git a/cvs2svn_lib/svn_commit_item.py b/cvs2svn_lib/svn_commit_item.py
new file mode 100644
index 0000000..8bc9015
--- /dev/null
+++ b/cvs2svn_lib/svn_commit_item.py
@@ -0,0 +1,50 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains class SVNCommitItem."""
+
+
+from cvs2svn_lib.context import Ctx
+
+
+class SVNCommitItem:
+  """A wrapper class for CVSRevision objects upon which
+  Subversion-related data (such as properties) may be hung."""
+
+  def __init__(self, cvs_rev, svn_props_changed):
+    """Initialize instance and record the properties for this file.
+    SVN_PROPS_CHANGED indicates whether the svn: properties are known
+    to have changed since the last revision.
+
+    The properties are set by the SVNPropertySetters in
+    Ctx().svn_property_setters."""
+
+    self.cvs_rev = cvs_rev
+    # Did the svn properties change for this file (i.e., do they have
+    # to be written to the dumpfile?)
+    self.svn_props_changed = svn_props_changed
+
+    # The properties for this item as a map { key : value }.  If VALUE
+    # is None, the property should be left unset.
+    self.svn_props = { }
+
+    for svn_property_setter in Ctx().svn_property_setters:
+      svn_property_setter.set_properties(self)
+
+  def has_keywords(self):
+    return bool(self.svn_props.get('svn:keywords', None))
+
+
diff --git a/cvs2svn_lib/svn_output_option.py b/cvs2svn_lib/svn_output_option.py
new file mode 100644
index 0000000..86d1ba4
--- /dev/null
+++ b/cvs2svn_lib/svn_output_option.py
@@ -0,0 +1,753 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Classes for outputting the converted repository to SVN."""
+
+
+import os
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import InternalError
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import FatalException
+from cvs2svn_lib.common import error_prefix
+from cvs2svn_lib.common import format_date
+from cvs2svn_lib.common import PathsNotDisjointException
+from cvs2svn_lib.common import verify_paths_disjoint
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.process import CommandFailedException
+from cvs2svn_lib.process import check_command_runs
+from cvs2svn_lib.process import call_command
+from cvs2svn_lib.cvs_file import CVSDirectory
+from cvs2svn_lib.symbol import Trunk
+from cvs2svn_lib.symbol import LineOfDevelopment
+from cvs2svn_lib.cvs_item import CVSRevisionAdd
+from cvs2svn_lib.cvs_item import CVSRevisionChange
+from cvs2svn_lib.cvs_item import CVSRevisionDelete
+from cvs2svn_lib.cvs_item import CVSRevisionNoop
+from cvs2svn_lib.repository_mirror import RepositoryMirror
+from cvs2svn_lib.repository_mirror import PathExistsError
+from cvs2svn_lib.svn_commit_item import SVNCommitItem
+from cvs2svn_lib.openings_closings import SymbolingsReader
+from cvs2svn_lib.fill_source import get_source_set
+from cvs2svn_lib.stdout_delegate import StdoutDelegate
+from cvs2svn_lib.dumpfile_delegate import DumpfileDelegate
+from cvs2svn_lib.repository_delegate import RepositoryDelegate
+from cvs2svn_lib.output_option import OutputOption
+
+
+class SVNOutputOption(OutputOption):
+  """An OutputOption appropriate for output to Subversion."""
+
+  class ParentMissingError(Exception):
+    """The parent of a path is missing.
+
+    Exception raised if an attempt is made to add a path to the
+    repository mirror but the parent's path doesn't exist in the
+    youngest revision of the repository."""
+
+    pass
+
+  class ExpectedDirectoryError(Exception):
+    """A file was found where a directory was expected."""
+
+    pass
+
+  def __init__(self, author_transforms=None):
+    self._mirror = RepositoryMirror()
+
+    def to_utf8(s):
+      if isinstance(s, unicode):
+	return s.encode('utf8')
+      else:
+	return s
+
+    self.author_transforms = {}
+    if author_transforms is not None:
+      for (cvsauthor, name) in author_transforms.iteritems():
+	cvsauthor = to_utf8(cvsauthor)
+	name = to_utf8(name)
+	self.author_transforms[cvsauthor] = name
+
+  def register_artifacts(self, which_pass):
+    # These artifacts are needed for SymbolingsReader:
+    artifact_manager.register_temp_file_needed(
+        config.SYMBOL_OPENINGS_CLOSINGS_SORTED, which_pass
+        )
+    artifact_manager.register_temp_file_needed(
+        config.SYMBOL_OFFSETS_DB, which_pass
+        )
+
+    self._mirror.register_artifacts(which_pass)
+    Ctx().revision_reader.register_artifacts(which_pass)
+
+  def check_symbols(self, symbol_map):
+    """Check that the paths of all included LODs are set and disjoint."""
+
+    error_found = False
+
+    # Check that all included LODs have their base paths set, and
+    # collect the paths into a list:
+    paths = []
+    for lod in symbol_map.itervalues():
+      if isinstance(lod, LineOfDevelopment):
+        if lod.base_path is None:
+          Log().error('%s: No path was set for %r\n' % (error_prefix, lod,))
+          error_found = True
+        else:
+          paths.append(lod.base_path)
+
+    # Check that the SVN paths of all LODS are disjoint:
+    try:
+      verify_paths_disjoint(*paths)
+    except PathsNotDisjointException, e:
+      Log().error(str(e))
+      error_found = True
+
+    if error_found:
+      raise FatalException(
+          'Please fix the above errors and restart CollateSymbolsPass'
+          )
+
+  def setup(self, svn_rev_count):
+    self._symbolings_reader = SymbolingsReader()
+    self._mirror.open()
+    self._delegates = []
+    Ctx().revision_reader.start()
+    self.add_delegate(StdoutDelegate(svn_rev_count))
+
+  def _get_author(self, svn_commit):
+    author = svn_commit.get_author()
+    name = self.author_transforms.get(author, author)
+    return name
+
+  def _get_revprops(self, svn_commit):
+    """Return the Subversion revprops for this SVNCommit."""
+
+    return {
+        'svn:author' : self._get_author(svn_commit),
+        'svn:log'    : svn_commit.get_log_msg(),
+        'svn:date'   : format_date(svn_commit.date),
+        }
+
+  def start_commit(self, revnum, revprops):
+    """Start a new commit."""
+
+    self._mirror.start_commit(revnum)
+    self._invoke_delegates('start_commit', revnum, revprops)
+
+  def end_commit(self):
+    """Called at the end of each commit.
+
+    This method copies the newly created nodes to the on-disk nodes
+    db."""
+
+    self._mirror.end_commit()
+    self._invoke_delegates('end_commit')
+
+  def delete_lod(self, lod):
+    """Delete the main path for LOD from the tree.
+
+    The path must currently exist.  Silently refuse to delete trunk
+    paths."""
+
+    if isinstance(lod, Trunk):
+      # Never delete a Trunk path.
+      return
+
+    self._mirror.get_current_lod_directory(lod).delete()
+    self._invoke_delegates('delete_lod', lod)
+
+  def delete_path(self, cvs_path, lod, should_prune=False):
+    """Delete CVS_PATH from LOD."""
+
+    if cvs_path.parent_directory is None:
+      self.delete_lod(lod)
+      return
+
+    parent_node = self._mirror.get_current_path(
+        cvs_path.parent_directory, lod
+        )
+    del parent_node[cvs_path]
+    self._invoke_delegates('delete_path', lod, cvs_path)
+
+    if should_prune:
+      while parent_node is not None and len(parent_node) == 0:
+        # A drawback of this code is that we issue a delete for each
+        # path and not just a single delete for the topmost directory
+        # pruned.
+        node = parent_node
+        cvs_path = node.cvs_path
+        if cvs_path.parent_directory is None:
+          parent_node = None
+          self.delete_lod(lod)
+        else:
+          parent_node = node.parent_mirror_dir
+          node.delete()
+          self._invoke_delegates('delete_path', lod, cvs_path)
+
+  def initialize_project(self, project):
+    """Create the basic structure for PROJECT."""
+
+    self._invoke_delegates('initialize_project', project)
+
+    # Don't invoke delegates.
+    self._mirror.add_lod(project.get_trunk())
+
+  def change_path(self, cvs_rev):
+    """Register a change in self._youngest for the CVS_REV's svn_path."""
+
+    # We do not have to update the nodes because our mirror is only
+    # concerned with the presence or absence of paths, and a file
+    # content change does not cause any path changes.
+    self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, False))
+
+  def _mkdir_p(self, cvs_directory, lod):
+    """Make sure that CVS_DIRECTORY exists in LOD.
+
+    If not, create it, calling delegates.  Return the node for
+    CVS_DIRECTORY."""
+
+    try:
+      node = self._mirror.get_current_lod_directory(lod)
+    except KeyError:
+      node = self._mirror.add_lod(lod)
+      self._invoke_delegates('initialize_lod', lod)
+
+    for sub_path in cvs_directory.get_ancestry()[1:]:
+      try:
+        node = node[sub_path]
+      except KeyError:
+        node = node.mkdir(sub_path)
+        self._invoke_delegates('mkdir', lod, sub_path)
+      if node is None:
+        raise self.ExpectedDirectoryError(
+            'File found at \'%s\' where directory was expected.' % (sub_path,)
+            )
+
+    return node
+
+  def add_path(self, cvs_rev):
+    """Add the CVS_REV's svn_path to the repository mirror.
+
+    Create any missing intermediate paths."""
+
+    cvs_file = cvs_rev.cvs_file
+    parent_path = cvs_file.parent_directory
+    lod = cvs_rev.lod
+    parent_node = self._mkdir_p(parent_path, lod)
+    parent_node.add_file(cvs_file)
+    self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, True))
+
+  def copy_lod(self, src_lod, dest_lod, src_revnum):
+    """Copy all of SRC_LOD at SRC_REVNUM to DST_LOD.
+
+    In the youngest revision of the repository, the destination LOD
+    *must not* already exist.
+
+    Return the new node at DEST_LOD.  Note that this node is not
+    necessarily writable, though its parent node necessarily is."""
+
+    node = self._mirror.copy_lod(src_lod, dest_lod, src_revnum)
+    self._invoke_delegates('copy_lod', src_lod, dest_lod, src_revnum)
+    return node
+
+  def copy_path(
+        self, cvs_path, src_lod, dest_lod, src_revnum, create_parent=False
+        ):
+    """Copy CVS_PATH from SRC_LOD at SRC_REVNUM to DST_LOD.
+
+    In the youngest revision of the repository, the destination's
+    parent *must* exist unless CREATE_PARENT is specified.  But the
+    destination itself *must not* exist.
+
+    Return the new node at (CVS_PATH, DEST_LOD), as a
+    CurrentMirrorDirectory."""
+
+    if cvs_path.parent_directory is None:
+      return self.copy_lod(src_lod, dest_lod, src_revnum)
+
+    # Get the node of our source, or None if it is a file:
+    src_node = self._mirror.get_old_path(cvs_path, src_lod, src_revnum)
+
+    # Get the parent path of the destination:
+    if create_parent:
+      dest_parent_node = self._mkdir_p(cvs_path.parent_directory, dest_lod)
+    else:
+      try:
+        dest_parent_node = self._mirror.get_current_path(
+            cvs_path.parent_directory, dest_lod
+            )
+      except KeyError:
+        raise self.ParentMissingError(
+            'Attempt to add path \'%s\' to repository mirror, '
+            'but its parent directory doesn\'t exist in the mirror.'
+            % (dest_lod.get_path(cvs_path.cvs_path),)
+            )
+
+    if cvs_path in dest_parent_node:
+      raise PathExistsError(
+          'Attempt to add path \'%s\' to repository mirror '
+          'when it already exists in the mirror.'
+          % (dest_lod.get_path(cvs_path.cvs_path),)
+          )
+
+    dest_parent_node[cvs_path] = src_node
+    self._invoke_delegates(
+        'copy_path',
+        cvs_path, src_lod, dest_lod, src_revnum
+        )
+
+    return dest_parent_node[cvs_path]
+
+  def fill_symbol(self, svn_symbol_commit, fill_source):
+    """Perform all copies for the CVSSymbols in SVN_SYMBOL_COMMIT.
+
+    The symbolic name is guaranteed to exist in the Subversion
+    repository by the end of this call, even if there are no paths
+    under it."""
+
+    symbol = svn_symbol_commit.symbol
+
+    try:
+      dest_node = self._mirror.get_current_lod_directory(symbol)
+    except KeyError:
+      self._fill_directory(symbol, None, fill_source, None)
+    else:
+      self._fill_directory(symbol, dest_node, fill_source, None)
+
+  def _fill_directory(self, symbol, dest_node, fill_source, parent_source):
+    """Fill the tag or branch SYMBOL at the path indicated by FILL_SOURCE.
+
+    Use items from FILL_SOURCE, and recurse into the child items.
+
+    Fill SYMBOL starting at the path FILL_SOURCE.cvs_path.  DEST_NODE
+    is the node of this destination path, or None if the destination
+    does not yet exist.  All directories above this path have already
+    been filled.  FILL_SOURCE is a FillSource instance describing the
+    items within a subtree of the repository that still need to be
+    copied to the destination.
+
+    PARENT_SOURCE is the SVNRevisionRange that was used to copy the
+    parent directory, if it was copied in this commit.  We prefer to
+    copy from the same source as was used for the parent, since it
+    typically requires less touching-up.  If PARENT_SOURCE is None,
+    then the parent directory was not copied in this commit, so no
+    revision is preferable to any other."""
+
+    copy_source = fill_source.compute_best_source(parent_source)
+
+    # Figure out if we shall copy to this destination and delete any
+    # destination path that is in the way.
+    if dest_node is None:
+      # The destination does not exist at all, so it definitely has to
+      # be copied:
+      dest_node = self.copy_path(
+          fill_source.cvs_path, copy_source.source_lod,
+          symbol, copy_source.opening_revnum
+          )
+    elif (parent_source is not None) and (
+          copy_source.source_lod != parent_source.source_lod
+          or copy_source.opening_revnum != parent_source.opening_revnum
+          ):
+      # The parent path was copied from a different source than we
+      # need to use, so we have to delete the version that was copied
+      # with the parent then re-copy from the correct source:
+      self.delete_path(fill_source.cvs_path, symbol)
+      dest_node = self.copy_path(
+          fill_source.cvs_path, copy_source.source_lod,
+          symbol, copy_source.opening_revnum
+          )
+    else:
+      copy_source = parent_source
+
+    # The map {CVSPath : FillSource} of entries within this directory
+    # that need filling:
+    src_entries = fill_source.get_subsource_map()
+
+    if copy_source is not None:
+      self._prune_extra_entries(
+          fill_source.cvs_path, symbol, dest_node, src_entries
+          )
+
+    return self._cleanup_filled_directory(
+        symbol, dest_node, src_entries, copy_source
+        )
+
+  def _cleanup_filled_directory(
+        self, symbol, dest_node, src_entries, copy_source
+        ):
+    """The directory at DEST_NODE has been filled and pruned; recurse.
+
+    Recurse into the SRC_ENTRIES, in alphabetical order.  If DEST_NODE
+    was copied in this revision, COPY_SOURCE should indicate where it
+    was copied from; otherwise, COPY_SOURCE should be None."""
+
+    cvs_paths = src_entries.keys()
+    cvs_paths.sort()
+    for cvs_path in cvs_paths:
+      if isinstance(cvs_path, CVSDirectory):
+        # Path is a CVSDirectory:
+        try:
+          dest_subnode = dest_node[cvs_path]
+        except KeyError:
+          # Path doesn't exist yet; it has to be created:
+          dest_node = self._fill_directory(
+              symbol, None, src_entries[cvs_path], None
+              ).parent_mirror_dir
+        else:
+          # Path already exists, but might have to be cleaned up:
+          dest_node = self._fill_directory(
+              symbol, dest_subnode, src_entries[cvs_path], copy_source
+              ).parent_mirror_dir
+      else:
+        # Path is a CVSFile:
+        self._fill_file(
+            symbol, cvs_path in dest_node, src_entries[cvs_path], copy_source
+            )
+        # Reread dest_node since the call to _fill_file() might have
+        # made it writable:
+        dest_node = self._mirror.get_current_path(
+            dest_node.cvs_path, dest_node.lod
+            )
+
+    return dest_node
+
+  def _fill_file(self, symbol, dest_existed, fill_source, parent_source):
+    """Fill the tag or branch SYMBOL at the path indicated by FILL_SOURCE.
+
+    Use items from FILL_SOURCE.
+
+    Fill SYMBOL at path FILL_SOURCE.cvs_path.  DEST_NODE is the node
+    of this destination path, or None if the destination does not yet
+    exist.  All directories above this path have already been filled
+    as needed.  FILL_SOURCE is a FillSource instance describing the
+    item that needs to be copied to the destination.
+
+    PARENT_SOURCE is the source from which the parent directory was
+    copied, or None if the parent directory was not copied during this
+    commit.  We prefer to copy from PARENT_SOURCE, since it typically
+    requires less touching-up.  If PARENT_SOURCE is None, then the
+    parent directory was not copied in this commit, so no revision is
+    preferable to any other."""
+
+    copy_source = fill_source.compute_best_source(parent_source)
+
+    # Figure out if we shall copy to this destination and delete any
+    # destination path that is in the way.
+    if not dest_existed:
+      # The destination does not exist at all, so it definitely has to
+      # be copied:
+      self.copy_path(
+          fill_source.cvs_path, copy_source.source_lod,
+          symbol, copy_source.opening_revnum
+          )
+    elif (parent_source is not None) and (
+          copy_source.source_lod != parent_source.source_lod
+          or copy_source.opening_revnum != parent_source.opening_revnum
+          ):
+      # The parent path was copied from a different source than we
+      # need to use, so we have to delete the version that was copied
+      # with the parent and then re-copy from the correct source:
+      self.delete_path(fill_source.cvs_path, symbol)
+      self.copy_path(
+          fill_source.cvs_path, copy_source.source_lod,
+          symbol, copy_source.opening_revnum
+          )
+
+  def _prune_extra_entries(
+        self, dest_cvs_path, symbol, dest_node, src_entries
+        ):
+    """Delete any entries in DEST_NODE that are not in SRC_ENTRIES."""
+
+    delete_list = [
+        cvs_path
+        for cvs_path in dest_node
+        if cvs_path not in src_entries
+        ]
+
+    # Sort the delete list so that the output is in a consistent
+    # order:
+    delete_list.sort()
+    for cvs_path in delete_list:
+      del dest_node[cvs_path]
+      self._invoke_delegates('delete_path', symbol, cvs_path)
+
+  def add_delegate(self, delegate):
+    """Adds DELEGATE to self._delegates.
+
+    For every delegate you add, whenever a repository action method is
+    performed, delegate's corresponding repository action method is
+    called.  Multiple delegates will be called in the order that they
+    are added.  See SVNRepositoryDelegate for more information."""
+
+    self._delegates.append(delegate)
+
+  def _invoke_delegates(self, method, *args):
+    """Invoke a method on each delegate.
+
+    Iterate through each of our delegates, in the order that they were
+    added, and call the delegate's method named METHOD with the
+    arguments in ARGS."""
+
+    for delegate in self._delegates:
+      getattr(delegate, method)(*args)
+
+  def process_initial_project_commit(self, svn_commit):
+    self.start_commit(svn_commit.revnum, self._get_revprops(svn_commit))
+
+    for project in svn_commit.projects:
+      self.initialize_project(project)
+
+    self.end_commit()
+
+  def process_primary_commit(self, svn_commit):
+    self.start_commit(svn_commit.revnum, self._get_revprops(svn_commit))
+
+    # This actually commits CVSRevisions
+    if len(svn_commit.cvs_revs) > 1:
+      plural = "s"
+    else:
+      plural = ""
+    Log().verbose("Committing %d CVSRevision%s"
+                  % (len(svn_commit.cvs_revs), plural))
+    for cvs_rev in svn_commit.cvs_revs:
+      if isinstance(cvs_rev, CVSRevisionNoop):
+        pass
+
+      elif isinstance(cvs_rev, CVSRevisionDelete):
+        self.delete_path(cvs_rev.cvs_file, cvs_rev.lod, Ctx().prune)
+
+      elif isinstance(cvs_rev, CVSRevisionAdd):
+        self.add_path(cvs_rev)
+
+      elif isinstance(cvs_rev, CVSRevisionChange):
+        self.change_path(cvs_rev)
+
+    self.end_commit()
+
+  def process_post_commit(self, svn_commit):
+    self.start_commit(svn_commit.revnum, self._get_revprops(svn_commit))
+
+    Log().verbose(
+        'Synchronizing default branch motivated by %d'
+        % (svn_commit.motivating_revnum,)
+        )
+
+    for cvs_rev in svn_commit.cvs_revs:
+      trunk = cvs_rev.cvs_file.project.get_trunk()
+      if isinstance(cvs_rev, CVSRevisionAdd):
+        # Copy from branch to trunk:
+        self.copy_path(
+            cvs_rev.cvs_file, cvs_rev.lod, trunk,
+            svn_commit.motivating_revnum, True
+            )
+      elif isinstance(cvs_rev, CVSRevisionChange):
+        # Delete old version of the path on trunk...
+        self.delete_path(cvs_rev.cvs_file, trunk)
+        # ...and copy the new version over from branch:
+        self.copy_path(
+            cvs_rev.cvs_file, cvs_rev.lod, trunk,
+            svn_commit.motivating_revnum, True
+            )
+      elif isinstance(cvs_rev, CVSRevisionDelete):
+        # Delete trunk path:
+        self.delete_path(cvs_rev.cvs_file, trunk)
+      elif isinstance(cvs_rev, CVSRevisionNoop):
+        # Do nothing
+        pass
+      else:
+        raise InternalError('Unexpected CVSRevision type: %s' % (cvs_rev,))
+
+    self.end_commit()
+
+  def process_branch_commit(self, svn_commit):
+    self.start_commit(svn_commit.revnum, self._get_revprops(svn_commit))
+    Log().verbose('Filling branch:', svn_commit.symbol.name)
+
+    # Get the set of sources for the symbolic name:
+    source_set = get_source_set(
+        svn_commit.symbol,
+        self._symbolings_reader.get_range_map(svn_commit),
+        )
+
+    self.fill_symbol(svn_commit, source_set)
+
+    self.end_commit()
+
+  def process_tag_commit(self, svn_commit):
+    self.start_commit(svn_commit.revnum, self._get_revprops(svn_commit))
+    Log().verbose('Filling tag:', svn_commit.symbol.name)
+
+    # Get the set of sources for the symbolic name:
+    source_set = get_source_set(
+        svn_commit.symbol,
+        self._symbolings_reader.get_range_map(svn_commit),
+        )
+
+    self.fill_symbol(svn_commit, source_set)
+
+    self.end_commit()
+
+  def cleanup(self):
+    self._invoke_delegates('finish')
+    self._mirror.close()
+    self._mirror = None
+    Ctx().revision_reader.finish()
+    self._symbolings_reader.close()
+    del self._symbolings_reader
+
+
+class DumpfileOutputOption(SVNOutputOption):
+  """Output the result of the conversion into a dumpfile."""
+
+  def __init__(self, dumpfile_path, author_transforms=None):
+    SVNOutputOption.__init__(self, author_transforms)
+    self.dumpfile_path = dumpfile_path
+
+  def check(self):
+    pass
+
+  def setup(self, svn_rev_count):
+    Log().quiet("Starting Subversion Dumpfile.")
+    SVNOutputOption.setup(self, svn_rev_count)
+    if not Ctx().dry_run:
+      self.add_delegate(
+          DumpfileDelegate(Ctx().revision_reader, self.dumpfile_path)
+          )
+
+
+class RepositoryOutputOption(SVNOutputOption):
+  """Output the result of the conversion into an SVN repository."""
+
+  def __init__(self, target, author_transforms=None):
+    SVNOutputOption.__init__(self, author_transforms)
+    self.target = target
+
+  def check(self):
+    if not Ctx().dry_run:
+      # Verify that svnadmin can be executed.  The 'help' subcommand
+      # should be harmless.
+      try:
+        check_command_runs([Ctx().svnadmin_executable, 'help'], 'svnadmin')
+      except CommandFailedException, e:
+        raise FatalError(
+            '%s\n'
+            'svnadmin could not be executed.  Please ensure that it is\n'
+            'installed and/or use the --svnadmin option.' % (e,))
+
+  def setup(self, svn_rev_count):
+    Log().quiet("Starting Subversion Repository.")
+    SVNOutputOption.setup(self, svn_rev_count)
+    if not Ctx().dry_run:
+      self.add_delegate(
+          RepositoryDelegate(Ctx().revision_reader, self.target)
+          )
+
+
+class NewRepositoryOutputOption(RepositoryOutputOption):
+  """Output the result of the conversion into a new SVN repository."""
+
+  def __init__(
+        self, target, fs_type=None, bdb_txn_nosync=None, author_transforms=None, create_options=[]
+        ):
+    RepositoryOutputOption.__init__(self, target, author_transforms)
+    self.bdb_txn_nosync = bdb_txn_nosync
+
+    # Determine the options to be passed to "svnadmin create":
+    if not fs_type:
+      # User didn't say what kind repository (bdb, fsfs, etc).  We
+      # still pass --bdb-txn-nosync.  It's a no-op if the default
+      # repository type doesn't support it, but we definitely want it
+      # if BDB is the default.
+      self.create_options = ['--bdb-txn-nosync']
+    elif fs_type == 'bdb':
+      # User explicitly specified bdb.
+      #
+      # Since this is a BDB repository, pass --bdb-txn-nosync, because
+      # it gives us a 4-5x speed boost (if cvs2svn is creating the
+      # repository, cvs2svn should be the only program accessing the
+      # svn repository until cvs2svn is done).  But we'll turn no-sync
+      # off in self.finish(), unless instructed otherwise.
+      self.create_options = ['--fs-type=bdb', '--bdb-txn-nosync']
+    else:
+      # User specified something other than bdb.
+      self.create_options = ['--fs-type=%s' % fs_type]
+
+    # Now append the user's explicitly-set create options:
+    self.create_options += create_options
+
+  def check(self):
+    RepositoryOutputOption.check(self)
+    if not Ctx().dry_run and os.path.exists(self.target):
+      raise FatalError("the svn-repos-path '%s' exists.\n"
+                       "Remove it, or pass '--existing-svnrepos'."
+                       % self.target)
+
+  def setup(self, svn_rev_count):
+    Log().normal("Creating new repository '%s'" % (self.target))
+    if Ctx().dry_run:
+      # Do not actually create repository:
+      pass
+    else:
+      call_command([
+          Ctx().svnadmin_executable, 'create',
+          ] + self.create_options + [
+          self.target
+          ])
+
+    RepositoryOutputOption.setup(self, svn_rev_count)
+
+  def cleanup(self):
+    RepositoryOutputOption.cleanup(self)
+
+    # If this is a BDB repository, and we created the repository, and
+    # --bdb-no-sync wasn't passed, then comment out the DB_TXN_NOSYNC
+    # line in the DB_CONFIG file, because txn syncing should be on by
+    # default in BDB repositories.
+    #
+    # We determine if this is a BDB repository by looking for the
+    # DB_CONFIG file, which doesn't exist in FSFS, rather than by
+    # checking self.fs_type.  That way this code will Do The Right
+    # Thing in all circumstances.
+    db_config = os.path.join(self.target, "db/DB_CONFIG")
+    if Ctx().dry_run:
+      # Do not change repository:
+      pass
+    elif not self.bdb_txn_nosync and os.path.exists(db_config):
+      no_sync = 'set_flags DB_TXN_NOSYNC\n'
+
+      contents = open(db_config, 'r').readlines()
+      index = contents.index(no_sync)
+      contents[index] = '# ' + no_sync
+      open(db_config, 'w').writelines(contents)
+
+
+class ExistingRepositoryOutputOption(RepositoryOutputOption):
+  """Output the result of the conversion into an existing SVN repository."""
+
+  def __init__(self, target, author_transforms=None):
+    RepositoryOutputOption.__init__(self, target, author_transforms)
+
+  def check(self):
+    RepositoryOutputOption.check(self)
+    if not os.path.isdir(self.target):
+      raise FatalError("the svn-repos-path '%s' is not an "
+                       "existing directory." % self.target)
+
+
diff --git a/cvs2svn_lib/svn_repository_delegate.py b/cvs2svn_lib/svn_repository_delegate.py
new file mode 100644
index 0000000..00c4a01
--- /dev/null
+++ b/cvs2svn_lib/svn_repository_delegate.py
@@ -0,0 +1,121 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains the SVNRepositoryDelegate class."""
+
+
+class SVNRepositoryDelegate:
+  """Abstract superclass for any delegate to SVNOutputOption.
+
+  Subclasses must implement all of the methods below.
+
+  For each method, a subclass implements, in its own way, the
+  Subversion operation implied by the method's name.  For example, for
+  the add_path method, the DumpfileDelegate would write out a
+  'Node-add:' command to a Subversion dumpfile, the StdoutDelegate
+  would merely print that the path is being added to the repository,
+  and the RepositoryDelegate would actually cause the path to be added
+  to the Subversion repository that it is creating."""
+
+  def start_commit(self, revnum, revprops):
+    """An SVN commit is starting.
+
+    Perform any actions needed to start an SVN commit with revision
+    number REVNUM and revision properties REVPROPS."""
+
+    raise NotImplementedError()
+
+  def end_commit(self):
+    """An SVN commit is ending."""
+
+    raise NotImplementedError()
+
+  def initialize_project(self, project):
+    """Initialize PROJECT.
+
+    For Subversion, this means to create the trunk, branches, and tags
+    directories for PROJECT."""
+
+    raise NotImplementedError()
+
+  def initialize_lod(self, lod):
+    """Initialize LOD with no contents.
+
+    LOD is an instance of LineOfDevelopment.  It is also possible for
+    an LOD to be created by copying from another LOD; such events are
+    indicated via the copy_lod() callback."""
+
+    raise NotImplementedError()
+
+  def mkdir(self, lod, cvs_directory):
+    """Create CVS_DIRECTORY within LOD.
+
+    LOD is a LineOfDevelopment; CVS_DIRECTORY is a CVSDirectory."""
+
+    raise NotImplementedError()
+
+  def add_path(self, s_item):
+    """Add the path corresponding to S_ITEM to the repository.
+
+    S_ITEM is an SVNCommitItem."""
+
+    raise NotImplementedError()
+
+  def change_path(self, s_item):
+    """Change the path corresponding to S_ITEM in the repository.
+
+    S_ITEM is an SVNCommitItem."""
+
+    raise NotImplementedError()
+
+  def delete_lod(self, lod):
+    """Delete LOD from the repository.
+
+    LOD is a LineOfDevelopment instance."""
+
+    raise NotImplementedError()
+
+  def delete_path(self, lod, cvs_path):
+    """Delete CVS_PATH from LOD.
+
+    LOD is a LineOfDevelopment; CVS_PATH is a CVSPath."""
+
+    raise NotImplementedError()
+
+  def copy_lod(self, src_lod, dest_lod, src_revnum):
+    """Copy SRC_LOD in SRC_REVNUM to DEST_LOD.
+
+    SRC_LOD and DEST_LOD are both LODs, and SRC_REVNUM is a subversion
+    revision number (int)."""
+
+    raise NotImplementedError()
+
+  def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum):
+    """Copy CVS_PATH in SRC_LOD@SRC_REVNUM to DEST_LOD.
+
+    CVS_PATH is a CVSPath, SRC_LOD and DEST_LOD are LODs, and
+    SRC_REVNUM is a subversion revision number (int)."""
+
+    raise NotImplementedError()
+
+  def finish(self):
+    """All SVN revisions have been committed.
+
+    Perform any necessary cleanup."""
+
+    raise NotImplementedError()
+
+
diff --git a/cvs2svn_lib/svn_revision_range.py b/cvs2svn_lib/svn_revision_range.py
new file mode 100644
index 0000000..04ba7fa
--- /dev/null
+++ b/cvs2svn_lib/svn_revision_range.py
@@ -0,0 +1,171 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains the SVNRevisionRange class."""
+
+
+import bisect
+
+from cvs2svn_lib.common import SVN_INVALID_REVNUM
+
+
+class SVNRevisionRange:
+  """The range of subversion revision numbers from which a path can be
+  copied.  self.opening_revnum is the number of the earliest such
+  revision, and self.closing_revnum is one higher than the number of
+  the last such revision.  If self.closing_revnum is None, then no
+  closings were registered."""
+
+  def __init__(self, source_lod, opening_revnum):
+    self.source_lod = source_lod
+    self.opening_revnum = opening_revnum
+    self.closing_revnum = None
+
+  def add_closing(self, closing_revnum):
+    # When we have a non-trunk default branch, we may have multiple
+    # closings--only register the first closing we encounter.
+    if self.closing_revnum is None:
+      self.closing_revnum = closing_revnum
+
+  def __contains__(self, revnum):
+    """Return True iff REVNUM is contained in the range."""
+
+    return (
+        self.opening_revnum <= revnum \
+        and (self.closing_revnum is None or revnum < self.closing_revnum)
+        )
+
+  def __str__(self):
+    if self.closing_revnum is None:
+      return '[%d:]' % (self.opening_revnum,)
+    else:
+      return '[%d:%d]' % (self.opening_revnum, self.closing_revnum,)
+
+  def __repr__(self):
+    return str(self)
+
+
+class RevisionScores:
+  """Represent the scores for a range of revisions."""
+
+  def __init__(self, svn_revision_ranges):
+    """Initialize based on SVN_REVISION_RANGES.
+
+    SVN_REVISION_RANGES is a list of SVNRevisionRange objects.
+
+    The score of an svn source is defined to be the number of
+    SVNRevisionRanges on that LOD that include the revision.  A score
+    thus indicates that copying the corresponding revision (or any
+    following revision up to the next revision in the list) of the
+    object in question would yield that many correct paths at or
+    underneath the object.  There may be other paths underneath it
+    that are not correct and would need to be deleted or recopied;
+    those can only be detected by descending and examining their
+    scores.
+
+    If SVN_REVISION_RANGES is empty, then all scores are undefined."""
+
+    deltas_map = {}
+
+    for range in svn_revision_ranges:
+      source_lod = range.source_lod
+      try:
+        deltas = deltas_map[source_lod]
+      except:
+        deltas = []
+        deltas_map[source_lod] = deltas
+      deltas.append((range.opening_revnum, +1))
+      if range.closing_revnum is not None:
+        deltas.append((range.closing_revnum, -1))
+
+    # A map:
+    #
+    #    {SOURCE_LOD : [(REV1 SCORE1), (REV2 SCORE2), (REV3 SCORE3), ...]}
+    #
+    # where the tuples are sorted by revision number and the revision
+    # numbers are distinct.  Score is the number of correct paths that
+    # would result from using the specified SOURCE_LOD and revision
+    # number (or any other revision preceding the next revision
+    # listed) as a source.  For example, the score of any revision REV
+    # in the range REV2 <= REV < REV3 is equal to SCORE2.
+    self._scores_map = {}
+
+    for (source_lod,deltas) in deltas_map.items():
+      # Sort by revision number:
+      deltas.sort()
+
+      # Initialize output list with zeroth element of deltas.  This
+      # element must exist, because it was verified that
+      # svn_revision_ranges (and therefore openings) is not empty.
+      scores = [ deltas[0] ]
+      total = deltas[0][1]
+      for (rev, change) in deltas[1:]:
+        total += change
+        if rev == scores[-1][0]:
+          # Same revision as last entry; modify last entry:
+          scores[-1] = (rev, total)
+        else:
+          # Previously-unseen revision; create new entry:
+          scores.append((rev, total))
+      self._scores_map[source_lod] = scores
+
+  def get_score(self, range):
+    """Return the score for RANGE's opening revision.
+
+    If RANGE doesn't appear explicitly in self.scores, use the score
+    of the higest revision preceding RANGE.  If there are no preceding
+    revisions, then the score for RANGE is unknown; in this case,
+    return -1."""
+
+    try:
+      scores = self._scores_map[range.source_lod]
+    except KeyError:
+      return -1
+
+    # Remember, according to the tuple sorting rules,
+    #
+    #    (revnum, anything,) < (revnum+1,) < (revnum+1, anything,)
+    predecessor_index = bisect.bisect_right(
+        scores, (range.opening_revnum + 1,)
+        ) - 1
+
+    if predecessor_index < 0:
+      return -1
+
+    return scores[predecessor_index][1]
+
+  def get_best_revnum(self):
+    """Find the revnum with the highest score.
+
+    Return (revnum, score) for the revnum with the highest score.  If
+    the highest score is shared by multiple revisions, select the
+    oldest revision."""
+
+    best_source_lod = None
+    best_revnum = SVN_INVALID_REVNUM
+    best_score = 0
+
+    source_lods = self._scores_map.keys()
+    source_lods.sort()
+    for source_lod in source_lods:
+      for revnum, score in self._scores_map[source_lod]:
+        if score > best_score:
+          best_source_lod = source_lod
+          best_score = score
+          best_revnum = revnum
+    return best_source_lod, best_revnum, best_score
+
+
diff --git a/cvs2svn_lib/svn_run_options.py b/cvs2svn_lib/svn_run_options.py
new file mode 100644
index 0000000..e757730
--- /dev/null
+++ b/cvs2svn_lib/svn_run_options.py
@@ -0,0 +1,543 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module manages cvs2svn run options."""
+
+
+import sys
+import optparse
+import datetime
+import codecs
+
+from cvs2svn_lib.version import VERSION
+from cvs2svn_lib import config
+from cvs2svn_lib.common import warning_prefix
+from cvs2svn_lib.common import error_prefix
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import normalize_svn_path
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.run_options import not_both
+from cvs2svn_lib.run_options import RunOptions
+from cvs2svn_lib.run_options import ContextOption
+from cvs2svn_lib.run_options import IncompatibleOption
+from cvs2svn_lib.run_options import authors
+from cvs2svn_lib.man_writer import ManWriter
+from cvs2svn_lib.project import Project
+from cvs2svn_lib.svn_output_option import DumpfileOutputOption
+from cvs2svn_lib.svn_output_option import ExistingRepositoryOutputOption
+from cvs2svn_lib.svn_output_option import NewRepositoryOutputOption
+from cvs2svn_lib.revision_manager import NullRevisionRecorder
+from cvs2svn_lib.revision_manager import NullRevisionExcluder
+from cvs2svn_lib.rcs_revision_manager import RCSRevisionReader
+from cvs2svn_lib.cvs_revision_manager import CVSRevisionReader
+from cvs2svn_lib.checkout_internal import InternalRevisionRecorder
+from cvs2svn_lib.checkout_internal import InternalRevisionExcluder
+from cvs2svn_lib.checkout_internal import InternalRevisionReader
+from cvs2svn_lib.symbol_strategy import TrunkPathRule
+from cvs2svn_lib.symbol_strategy import BranchesPathRule
+from cvs2svn_lib.symbol_strategy import TagsPathRule
+
+
+short_desc = 'convert a cvs repository into a subversion repository'
+
+synopsis = """\
+.B cvs2svn
+[\\fIOPTION\\fR]... \\fIOUTPUT-OPTION CVS-REPOS-PATH\\fR
+.br
+.B cvs2svn
+[\\fIOPTION\\fR]... \\fI--options=PATH\\fR
+"""
+
+long_desc = """\
+Create a new Subversion repository based on the version history stored in a
+CVS repository. Each CVS commit will be mirrored in the Subversion
+repository, including such information as date of commit and id of the
+committer.
+.P
+\\fICVS-REPOS-PATH\\fR is the filesystem path of the part of the CVS
+repository that you want to convert.  It is not possible to convert a
+CVS repository to which you only have remote access; see the FAQ for
+more information.  This path doesn't have to be the top level
+directory of a CVS repository; it can point at a project within a
+repository, in which case only that project will be converted.  This
+path or one of its parent directories has to contain a subdirectory
+called CVSROOT (though the CVSROOT directory can be empty).
+.P
+Multiple CVS repositories can be converted into a single Subversion
+repository in a single run of cvs2svn, but only by using an
+\\fB--options\\fR file.
+"""
+
+files = """\
+A directory called \\fIcvs2svn-tmp\\fR (or the directory specified by
+\\fB--tmpdir\\fR) is used as scratch space for temporary data files.
+"""
+
+see_also = [
+  ('cvs', '1'),
+  ('svn', '1'),
+  ('svnadmin', '1'),
+  ]
+
+
+class SVNRunOptions(RunOptions):
+  def _get_output_options_group(self):
+    group = RunOptions._get_output_options_group(self)
+
+    group.add_option(IncompatibleOption(
+        '--svnrepos', '-s', type='string',
+        action='store',
+        help='path where SVN repos should be created',
+        man_help=(
+            'Write the output of the conversion into a Subversion repository '
+            'located at \\fIpath\\fR.  This option causes a new Subversion '
+            'repository to be created at \\fIpath\\fR unless the '
+            '\\fB--existing-svnrepos\\fR option is also used.'
+            ),
+        metavar='PATH',
+        ))
+    self.parser.set_default('existing_svnrepos', False)
+    group.add_option(IncompatibleOption(
+        '--existing-svnrepos',
+        action='store_true',
+        help='load into existing SVN repository (for use with --svnrepos)',
+        man_help=(
+            'Load the converted CVS repository into an existing Subversion '
+            'repository, instead of creating a new repository.  (This option '
+            'should be used in combination with '
+            '\\fB-s\\fR/\\fB--svnrepos\\fR.)  The repository must either be '
+            'empty or contain no paths that overlap with those that will '
+            'result from the conversion.  Please note that you need write '
+            'permission for the repository files.'
+            ),
+        ))
+    group.add_option(IncompatibleOption(
+        '--fs-type', type='string',
+        action='store',
+        help=(
+            'pass --fs-type=TYPE to "svnadmin create" (for use with '
+            '--svnrepos)'
+            ),
+        man_help=(
+            'Pass \\fI--fs-type\\fR=\\fItype\\fR to "svnadmin create" when '
+            'creating a new repository.'
+            ),
+        metavar='TYPE',
+        ))
+    self.parser.set_default('bdb_txn_nosync', False)
+    group.add_option(IncompatibleOption(
+        '--bdb-txn-nosync',
+        action='store_true',
+        help=(
+            'pass --bdb-txn-nosync to "svnadmin create" (for use with '
+            '--svnrepos)'
+            ),
+        man_help=(
+            'Pass \\fI--bdb-txn-nosync\\fR to "svnadmin create" when '
+            'creating a new BDB-style Subversion repository.'
+            ),
+        ))
+    self.parser.set_default('create_options', [])
+    group.add_option(IncompatibleOption(
+        '--create-option', type='string',
+        action='append', dest='create_options',
+        help='pass OPT to "svnadmin create" (for use with --svnrepos)',
+        man_help=(
+            'Pass \\fIopt\\fR to "svnadmin create" when creating a new '
+            'Subversion repository (can be specified multiple times to '
+            'pass multiple options).'
+            ),
+        metavar='OPT',
+        ))
+    group.add_option(IncompatibleOption(
+        '--dumpfile', type='string',
+        action='store',
+        help='just produce a dumpfile; don\'t commit to a repos',
+        man_help=(
+            'Just produce a dumpfile; don\'t commit to an SVN repository. '
+            'Write the dumpfile to \\fIpath\\fR.'
+            ),
+        metavar='PATH',
+        ))
+
+    group.add_option(ContextOption(
+        '--dry-run',
+        action='store_true',
+        help=(
+            'do not create a repository or a dumpfile; just print what '
+            'would happen.'
+            ),
+        man_help=(
+            'Do not create a repository or a dumpfile; just print the '
+            'details of what cvs2svn would do if it were really converting '
+            'your repository.'
+            ),
+        ))
+
+    # Deprecated options:
+    self.parser.set_default('dump_only', False)
+    group.add_option(IncompatibleOption(
+        '--dump-only',
+        action='callback', callback=self.callback_dump_only,
+        help=optparse.SUPPRESS_HELP,
+        man_help=optparse.SUPPRESS_HELP,
+        ))
+    group.add_option(IncompatibleOption(
+        '--create',
+        action='callback', callback=self.callback_create,
+        help=optparse.SUPPRESS_HELP,
+        man_help=optparse.SUPPRESS_HELP,
+        ))
+
+    return group
+
+  def _get_conversion_options_group(self):
+    group = RunOptions._get_conversion_options_group(self)
+
+    self.parser.set_default('trunk_base', config.DEFAULT_TRUNK_BASE)
+    group.add_option(IncompatibleOption(
+        '--trunk', type='string',
+        action='store', dest='trunk_base',
+        help=(
+            'path for trunk (default: %s)'
+            % (config.DEFAULT_TRUNK_BASE,)
+            ),
+        man_help=(
+            'Set the top-level path to use for trunk in the Subversion '
+            'repository. The default is \\fI%s\\fR.'
+            % (config.DEFAULT_TRUNK_BASE,)
+            ),
+        metavar='PATH',
+        ))
+    self.parser.set_default('branches_base', config.DEFAULT_BRANCHES_BASE)
+    group.add_option(IncompatibleOption(
+        '--branches', type='string',
+        action='store', dest='branches_base',
+        help=(
+            'path for branches (default: %s)'
+            % (config.DEFAULT_BRANCHES_BASE,)
+            ),
+        man_help=(
+            'Set the top-level path to use for branches in the Subversion '
+            'repository.  The default is \\fI%s\\fR.'
+            % (config.DEFAULT_BRANCHES_BASE,)
+            ),
+        metavar='PATH',
+        ))
+    self.parser.set_default('tags_base', config.DEFAULT_TAGS_BASE)
+    group.add_option(IncompatibleOption(
+        '--tags', type='string',
+        action='store', dest='tags_base',
+        help=(
+            'path for tags (default: %s)'
+            % (config.DEFAULT_TAGS_BASE,)
+            ),
+        man_help=(
+            'Set the top-level path to use for tags in the Subversion '
+            'repository. The default is \\fI%s\\fR.'
+            % (config.DEFAULT_TAGS_BASE,)
+            ),
+        metavar='PATH',
+        ))
+    group.add_option(ContextOption(
+        '--no-prune',
+        action='store_false', dest='prune',
+        help='don\'t prune empty directories',
+        man_help=(
+            'When all files are deleted from a directory in the Subversion '
+            'repository, don\'t delete the empty directory (the default is '
+            'to delete any empty directories).'
+            ),
+        ))
+    group.add_option(ContextOption(
+        '--no-cross-branch-commits',
+        action='store_false', dest='cross_branch_commits',
+        help='prevent the creation of cross-branch commits',
+        man_help=(
+            'Prevent the creation of commits that affect files on multiple '
+            'branches at once.'
+            ),
+        ))
+
+    return group
+
+  def _get_extraction_options_group(self):
+    group = RunOptions._get_extraction_options_group(self)
+
+    self.parser.set_default('use_internal_co', False)
+    group.add_option(IncompatibleOption(
+        '--use-internal-co',
+        action='store_true',
+        help=(
+            'use internal code to extract revision contents '
+            '(fastest but disk space intensive) (default)'
+            ),
+        man_help=(
+            'Use internal code to extract revision contents.  This '
+            'is up to 50% faster than using \\fB--use-rcs\\fR, but needs '
+            'a lot of disk space: roughly the size of your CVS repository '
+            'plus the peak size of a complete checkout of the repository '
+            'with all branches that existed and still had commits pending '
+            'at a given time.  This option is the default.'
+            ),
+        ))
+    self.parser.set_default('use_cvs', False)
+    group.add_option(IncompatibleOption(
+        '--use-cvs',
+        action='store_true',
+        help=(
+            'use CVS to extract revision contents (slower than '
+            '--use-internal-co or --use-rcs)'
+            ),
+        man_help=(
+            'Use CVS to extract revision contents.  This option is slower '
+            'than \\fB--use-internal-co\\fR or \\fB--use-rcs\\fR.'
+            ),
+        ))
+    self.parser.set_default('use_rcs', False)
+    group.add_option(IncompatibleOption(
+        '--use-rcs',
+        action='store_true',
+        help=(
+            'use RCS to extract revision contents (faster than '
+            '--use-cvs but fails in some cases)'
+            ),
+        man_help=(
+            'Use RCS \'co\' to extract revision contents.  This option is '
+            'faster than \\fB--use-cvs\\fR but fails in some cases.'
+            ),
+        ))
+
+    return group
+
+  def _get_environment_options_group(self):
+    group = RunOptions._get_environment_options_group(self)
+
+    group.add_option(ContextOption(
+        '--svnadmin', type='string',
+        action='store', dest='svnadmin_executable',
+        help='path to the "svnadmin" program',
+        man_help=(
+            'Path to the \\fIsvnadmin\\fR program.  (\\fIsvnadmin\\fR is '
+            'needed when the \\fB-s\\fR/\\fB--svnrepos\\fR output option is '
+            'used.)'
+            ),
+        metavar='PATH',
+        ))
+
+    return group
+
+  def callback_dump_only(self, option, opt_str, value, parser):
+    parser.values.dump_only = True
+    Log().error(
+        warning_prefix +
+        ': The --dump-only option is deprecated (it is implied '
+        'by --dumpfile).\n'
+        )
+
+  def callback_create(self, option, opt_str, value, parser):
+    Log().error(
+        warning_prefix +
+        ': The behaviour produced by the --create option is now the '
+        'default;\n'
+        'passing the option is deprecated.\n'
+        )
+
+  def callback_manpage(self, option, opt_str, value, parser):
+    f = codecs.getwriter('utf_8')(sys.stdout)
+    ManWriter(
+        parser,
+        section='1',
+        date=datetime.date.today(),
+        source='Version %s' % (VERSION,),
+        manual='User Commands',
+        short_desc=short_desc,
+        synopsis=synopsis,
+        long_desc=long_desc,
+        files=files,
+        authors=authors,
+        see_also=see_also,
+        ).write_manpage(f)
+    sys.exit(0)
+
+  def process_extraction_options(self):
+    """Process options related to extracting data from the CVS repository."""
+
+    ctx = Ctx()
+    options = self.options
+
+    not_both(options.use_rcs, '--use-rcs',
+             options.use_cvs, '--use-cvs')
+
+    not_both(options.use_rcs, '--use-rcs',
+             options.use_internal_co, '--use-internal-co')
+
+    not_both(options.use_cvs, '--use-cvs',
+             options.use_internal_co, '--use-internal-co')
+
+    if options.use_rcs:
+      ctx.revision_recorder = NullRevisionRecorder()
+      ctx.revision_excluder = NullRevisionExcluder()
+      ctx.revision_reader = RCSRevisionReader(options.co_executable)
+    elif options.use_cvs:
+      ctx.revision_recorder = NullRevisionRecorder()
+      ctx.revision_excluder = NullRevisionExcluder()
+      ctx.revision_reader = CVSRevisionReader(options.cvs_executable)
+    else:
+      # --use-internal-co is the default:
+      ctx.revision_recorder = InternalRevisionRecorder(compress=True)
+      ctx.revision_excluder = InternalRevisionExcluder()
+      ctx.revision_reader = InternalRevisionReader(compress=True)
+
+  def process_output_options(self):
+    """Process the options related to SVN output."""
+
+    ctx = Ctx()
+    options = self.options
+
+    if options.dump_only and not options.dumpfile:
+      raise FatalError("'--dump-only' requires '--dumpfile' to be specified.")
+
+    if not options.svnrepos and not options.dumpfile and not ctx.dry_run:
+      raise FatalError("must pass one of '-s' or '--dumpfile'.")
+
+    not_both(options.svnrepos, '-s',
+             options.dumpfile, '--dumpfile')
+
+    not_both(options.dumpfile, '--dumpfile',
+             options.existing_svnrepos, '--existing-svnrepos')
+
+    not_both(options.bdb_txn_nosync, '--bdb-txn-nosync',
+             options.existing_svnrepos, '--existing-svnrepos')
+
+    not_both(options.dumpfile, '--dumpfile',
+             options.bdb_txn_nosync, '--bdb-txn-nosync')
+
+    not_both(options.fs_type, '--fs-type',
+             options.existing_svnrepos, '--existing-svnrepos')
+
+    if (
+          options.fs_type
+          and options.fs_type != 'bdb'
+          and options.bdb_txn_nosync
+          ):
+      raise FatalError("cannot pass --bdb-txn-nosync with --fs-type=%s."
+                       % options.fs_type)
+
+    if options.svnrepos:
+      if options.existing_svnrepos:
+        ctx.output_option = ExistingRepositoryOutputOption(options.svnrepos)
+      else:
+        ctx.output_option = NewRepositoryOutputOption(
+            options.svnrepos,
+            fs_type=options.fs_type, bdb_txn_nosync=options.bdb_txn_nosync,
+            create_options=options.create_options)
+    else:
+      ctx.output_option = DumpfileOutputOption(options.dumpfile)
+
+  def add_project(
+        self,
+        project_cvs_repos_path,
+        trunk_path=None, branches_path=None, tags_path=None,
+        initial_directories=[],
+        symbol_transforms=None,
+        symbol_strategy_rules=[],
+        ):
+    """Add a project to be converted.
+
+    Most arguments are passed straight through to the Project
+    constructor.  SYMBOL_STRATEGY_RULES is an iterable of
+    SymbolStrategyRules that will be applied to symbols in this
+    project."""
+
+    if trunk_path is not None:
+      trunk_path = normalize_svn_path(trunk_path, allow_empty=True)
+    if branches_path is not None:
+      branches_path = normalize_svn_path(branches_path, allow_empty=False)
+    if tags_path is not None:
+      tags_path = normalize_svn_path(tags_path, allow_empty=False)
+
+    initial_directories = [
+        path
+        for path in [trunk_path, branches_path, tags_path]
+        if path
+        ] + [
+        normalize_svn_path(path)
+        for path in initial_directories
+        ]
+
+    symbol_strategy_rules = list(symbol_strategy_rules)
+
+    # Add rules to set the SVN paths for LODs depending on whether
+    # they are the trunk, tags, or branches:
+    if trunk_path is not None:
+      symbol_strategy_rules.append(TrunkPathRule(trunk_path))
+    if branches_path is not None:
+      symbol_strategy_rules.append(BranchesPathRule(branches_path))
+    if tags_path is not None:
+      symbol_strategy_rules.append(TagsPathRule(tags_path))
+
+    id = len(self.projects)
+    project = Project(
+        id,
+        project_cvs_repos_path,
+        initial_directories=initial_directories,
+        symbol_transforms=symbol_transforms,
+        )
+
+    self.projects.append(project)
+    self.project_symbol_strategy_rules.append(symbol_strategy_rules)
+
+  def clear_projects(self):
+    """Clear the list of projects to be converted.
+
+    This method is for the convenience of options files, which may
+    want to import one another."""
+
+    del self.projects[:]
+    del self.project_symbol_strategy_rules[:]
+
+  def process_options(self):
+    # Consistency check for options and arguments.
+    if len(self.args) == 0:
+      self.usage()
+      sys.exit(1)
+
+    if len(self.args) > 1:
+      Log().error(error_prefix + ": must pass only one CVS repository.\n")
+      self.usage()
+      sys.exit(1)
+
+    cvsroot = self.args[0]
+
+    self.process_extraction_options()
+    self.process_output_options()
+    self.process_symbol_strategy_options()
+    self.process_property_setter_options()
+
+    # Create the default project (using ctx.trunk, ctx.branches, and
+    # ctx.tags):
+    self.add_project(
+        cvsroot,
+        trunk_path=self.options.trunk_base,
+        branches_path=self.options.branches_base,
+        tags_path=self.options.tags_base,
+        symbol_transforms=self.options.symbol_transforms,
+        symbol_strategy_rules=self.options.symbol_strategy_rules,
+        )
+
+
diff --git a/cvs2svn_lib/symbol.py b/cvs2svn_lib/symbol.py
new file mode 100644
index 0000000..e3a6b35
--- /dev/null
+++ b/cvs2svn_lib/symbol.py
@@ -0,0 +1,246 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes that represent trunk, branches, and tags.
+
+The classes in this module represent several concepts related to
+symbols and lines of development in the abstract; that is, not within
+a particular file, but across all files in a project.
+
+The classes in this module are organized into the following class
+hierarchy:
+
+AbstractSymbol
+  |
+  +--LineOfDevelopment
+  |    |
+  |    +--Trunk
+  |    |
+  |    +--IncludedSymbol (also inherits from TypedSymbol)
+  |         |
+  |         +--Branch
+  |         |
+  |         +--Tag
+  |
+  +--Symbol
+       |
+       +--TypedSymbol
+            |
+            +--IncludedSymbol (also inherits from LineOfDevelopment)
+            |    |
+            |    +--Branch
+            |    |
+            |    +--Tag
+            |
+            +--ExcludedSymbol
+
+Please note the use of multiple inheritance.
+
+All AbstractSymbols contain an id that is globally unique across all
+AbstractSymbols.  Moreover, the id of an AbstractSymbol remains the
+same even if the symbol is mutated (as described below), and two
+AbstractSymbols are considered equal iff their ids are the same, even
+if the two instances have different types.  Symbols in different
+projects always have different ids and are therefore always distinct.
+(Indeed, this is pretty much the defining characteristic of a
+project.)  Even if, for example, two projects each have branches with
+the same name, the Symbols representing the branches are distinct and
+have distinct ids.  (This is important to avoid having to rewrite
+databases with new symbol ids in CollateSymbolsPass.)
+
+AbstractSymbols are all initially created in CollectRevsPass as either
+Trunk or Symbol instances.  A Symbol instance is essentially an
+undifferentiated Symbol.
+
+In CollateSymbolsPass, it is decided which symbols will be converted
+as branches, which as tags, and which excluded altogether.  At the
+beginning of this pass, the symbols are all represented by instances
+of the non-specific Symbol class.  During CollateSymbolsPass, each
+Symbol instance is replaced by an instance of Branch, Tag, or
+ExcludedSymbol with the same id.  (Trunk instances are left
+unchanged.)  At the end of CollateSymbolsPass, all ExcludedSymbols are
+discarded and processing continues with only Trunk, Branch, and Tag
+instances.  These three classes inherit from LineOfDevelopment;
+therefore, in later passes the term LineOfDevelopment (abbreviated to
+LOD) is used to refer to such objects."""
+
+
+from cvs2svn_lib.context import Ctx
+from cvs2svn_lib.common import path_join
+
+
+class AbstractSymbol:
+  """Base class for all other classes in this file."""
+
+  def __init__(self, id, project):
+    self.id = id
+    self.project = project
+
+  def __hash__(self):
+    return self.id
+
+  def __eq__(self, other):
+    return self.id == other.id
+
+
+class LineOfDevelopment(AbstractSymbol):
+  """Base class for Trunk, Branch, and Tag.
+
+  This is basically the abstraction for what will be a root tree in
+  the Subversion repository."""
+
+  def __init__(self, id, project):
+    AbstractSymbol.__init__(self, id, project)
+    self.base_path = None
+
+  def get_path(self, *components):
+    """Return the svn path for this LineOfDevelopment."""
+
+    return path_join(self.base_path, *components)
+
+
+class Trunk(LineOfDevelopment):
+  """Represent the main line of development."""
+
+  def __getstate__(self):
+    return (self.id, self.project.id, self.base_path,)
+
+  def __setstate__(self, state):
+    (self.id, project_id, self.base_path,) = state
+    self.project = Ctx()._projects[project_id]
+
+  def __cmp__(self, other):
+    if isinstance(other, Trunk):
+      return cmp(self.project, other.project)
+    elif isinstance(other, Symbol):
+      # Allow Trunk to compare less than Symbols:
+      return -1
+    else:
+      raise NotImplementedError()
+
+  def __str__(self):
+    """For convenience only.  The format is subject to change at any time."""
+
+    return 'Trunk'
+
+  def __repr__(self):
+    return '%s<%x>' % (self, self.id,)
+
+
+class Symbol(AbstractSymbol):
+  """Represents a symbol within one project in the CVS repository.
+
+  Instance of the Symbol class itself are used to represent symbols
+  from the CVS repository.  CVS, of course, distinguishes between
+  normal tags and branch tags, but we allow symbol types to be changed
+  in CollateSymbolsPass.  Therefore, we store all CVS symbols as
+  Symbol instances at the beginning of the conversion.
+
+  In CollateSymbolsPass, Symbols are replaced by Branches, Tags, and
+  ExcludedSymbols (the latter being discarded at the end of that
+  pass)."""
+
+  def __init__(self, id, project, name, preferred_parent_id=None):
+    AbstractSymbol.__init__(self, id, project)
+    self.name = name
+
+    # If this symbol has a preferred parent, this member is the id of
+    # the LineOfDevelopment instance representing it.  If the symbol
+    # never appeared in a CVSTag or CVSBranch (for example, because
+    # all of the branches on this LOD have been detached from the
+    # dependency tree), then this field is set to None.  This field is
+    # set during FilterSymbolsPass.
+    self.preferred_parent_id = preferred_parent_id
+
+  def __getstate__(self):
+    return (self.id, self.project.id, self.name, self.preferred_parent_id,)
+
+  def __setstate__(self, state):
+    (self.id, project_id, self.name, self.preferred_parent_id,) = state
+    self.project = Ctx()._projects[project_id]
+
+  def __cmp__(self, other):
+    if isinstance(other, Symbol):
+      return cmp(self.project, other.project) \
+             or cmp(self.name, other.name) \
+             or cmp(self.id, other.id)
+    elif isinstance(other, Trunk):
+      # Allow Symbols to compare greater than Trunk:
+      return +1
+    else:
+      raise NotImplementedError()
+
+  def __str__(self):
+    return self.name
+
+  def __repr__(self):
+    return '%s<%x>' % (self, self.id,)
+
+
+class TypedSymbol(Symbol):
+  """A Symbol whose type (branch, tag, or excluded) has been decided."""
+
+  def __init__(self, symbol):
+    Symbol.__init__(
+        self, symbol.id, symbol.project, symbol.name,
+        symbol.preferred_parent_id,
+        )
+
+
+class IncludedSymbol(TypedSymbol, LineOfDevelopment):
+  """A TypedSymbol that will be included in the conversion."""
+
+  def __init__(self, symbol):
+    TypedSymbol.__init__(self, symbol)
+    # We can't call the LineOfDevelopment constructor, so initialize
+    # its extra member explicitly:
+    try:
+      # If the old symbol had a base_path set, then use it:
+      self.base_path = symbol.base_path
+    except AttributeError:
+      self.base_path = None
+
+  def __getstate__(self):
+    return (TypedSymbol.__getstate__(self), self.base_path,)
+
+  def __setstate__(self, state):
+    (super_state, self.base_path,) = state
+    TypedSymbol.__setstate__(self, super_state)
+
+
+class Branch(IncludedSymbol):
+  """An object that describes a CVS branch."""
+
+  def __str__(self):
+    """For convenience only.  The format is subject to change at any time."""
+
+    return 'Branch(%r)' % (self.name,)
+
+
+class Tag(IncludedSymbol):
+  def __str__(self):
+    """For convenience only.  The format is subject to change at any time."""
+
+    return 'Tag(%r)' % (self.name,)
+
+
+class ExcludedSymbol(TypedSymbol):
+  def __str__(self):
+    """For convenience only.  The format is subject to change at any time."""
+
+    return 'ExcludedSymbol(%r)' % (self.name,)
+
+
diff --git a/cvs2svn_lib/symbol_database.py b/cvs2svn_lib/symbol_database.py
new file mode 100644
index 0000000..824f97b
--- /dev/null
+++ b/cvs2svn_lib/symbol_database.py
@@ -0,0 +1,68 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains the SymbolDatabase class."""
+
+
+import cPickle
+
+from cvs2svn_lib import config
+from cvs2svn_lib.artifact_manager import artifact_manager
+
+
+class SymbolDatabase:
+  """Read-only access to symbol database.
+
+  This class allows iteration and lookups id -> symbol, where symbol
+  is a TypedSymbol instance.  The whole database is read into memory
+  upon construction."""
+
+  def __init__(self):
+    # A map { id : TypedSymbol }
+    self._symbols = {}
+
+    f = open(artifact_manager.get_temp_file(config.SYMBOL_DB), 'rb')
+    symbols = cPickle.load(f)
+    f.close()
+    for symbol in symbols:
+      self._symbols[symbol.id] = symbol
+
+  def get_symbol(self, id):
+    """Return the symbol instance with id ID.
+
+    Raise KeyError if the symbol is not known."""
+
+    return self._symbols[id]
+
+  def __iter__(self):
+    """Iterate over the Symbol instances within this database."""
+
+    return self._symbols.itervalues()
+
+  def close(self):
+    self._symbols = None
+
+
+def create_symbol_database(symbols):
+  """Create and fill a symbol database.
+
+  Record each symbol that is listed in SYMBOLS, which is an iterable
+  containing Trunk and TypedSymbol objects."""
+
+  f = open(artifact_manager.get_temp_file(config.SYMBOL_DB), 'wb')
+  cPickle.dump(symbols, f, -1)
+  f.close()
+
diff --git a/cvs2svn_lib/symbol_statistics.py b/cvs2svn_lib/symbol_statistics.py
new file mode 100644
index 0000000..0d35a50
--- /dev/null
+++ b/cvs2svn_lib/symbol_statistics.py
@@ -0,0 +1,521 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module gathers and processes statistics about lines of development."""
+
+import cPickle
+
+from cvs2svn_lib import config
+from cvs2svn_lib.common import error_prefix
+from cvs2svn_lib.common import FatalException
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.artifact_manager import artifact_manager
+from cvs2svn_lib.symbol import Trunk
+from cvs2svn_lib.symbol import IncludedSymbol
+from cvs2svn_lib.symbol import Branch
+from cvs2svn_lib.symbol import Tag
+from cvs2svn_lib.symbol import ExcludedSymbol
+
+
+class SymbolPlanError(FatalException):
+  pass
+
+
+class SymbolPlanException(SymbolPlanError):
+  def __init__(self, stats, symbol, msg):
+    self.stats = stats
+    self.symbol = symbol
+    SymbolPlanError.__init__(
+        self,
+        'Cannot convert the following symbol to %s: %s\n    %s'
+        % (symbol, msg, self.stats,)
+        )
+
+
+class IndeterminateSymbolException(SymbolPlanException):
+  def __init__(self, stats, symbol):
+    SymbolPlanException.__init__(self, stats, symbol, 'Indeterminate type')
+
+
+class _Stats:
+  """A summary of information about a symbol (tag or branch).
+
+  Members:
+
+    lod -- the LineOfDevelopment instance of the lod being described
+
+    tag_create_count -- the number of files in which this lod appears
+        as a tag
+
+    branch_create_count -- the number of files in which this lod
+        appears as a branch
+
+    branch_commit_count -- the number of files in which there were
+        commits on this lod
+
+    trivial_import_count -- the number of files in which this branch
+        was purely a non-trunk default branch containing exactly one
+        revision.
+
+    pure_ntdb_count -- the number of files in which this branch was
+        purely a non-trunk default branch (consisting only of
+        non-trunk default branch revisions).
+
+    branch_blockers -- a set of Symbol instances for any symbols that
+        sprout from a branch with this name.
+
+    possible_parents -- a map {LineOfDevelopment : count} indicating
+        in how many files each LOD could have served as the parent of
+        self.lod."""
+
+  def __init__(self, lod):
+    self.lod = lod
+    self.tag_create_count = 0
+    self.branch_create_count = 0
+    self.branch_commit_count = 0
+    self.branch_blockers = set()
+    self.trivial_import_count = 0
+    self.pure_ntdb_count = 0
+    self.possible_parents = { }
+
+  def register_tag_creation(self):
+    """Register the creation of this lod as a tag."""
+
+    self.tag_create_count += 1
+
+  def register_branch_creation(self):
+    """Register the creation of this lod as a branch."""
+
+    self.branch_create_count += 1
+
+  def register_branch_commit(self):
+    """Register that there were commit(s) on this branch in one file."""
+
+    self.branch_commit_count += 1
+
+  def register_branch_blocker(self, blocker):
+    """Register BLOCKER as preventing this symbol from being deleted.
+
+    BLOCKER is a tag or a branch that springs from a revision on this
+    symbol."""
+
+    self.branch_blockers.add(blocker)
+
+  def register_trivial_import(self):
+    """Register that this branch is a trivial import branch in one file."""
+
+    self.trivial_import_count += 1
+
+  def register_pure_ntdb(self):
+    """Register that this branch is a pure import branch in one file."""
+
+    self.pure_ntdb_count += 1
+
+  def register_possible_parent(self, lod):
+    """Register that LOD was a possible parent for SELF.lod in a file."""
+
+    self.possible_parents[lod] = self.possible_parents.get(lod, 0) + 1
+
+  def register_branch_possible_parents(self, cvs_branch, cvs_file_items):
+    """Register any possible parents of this symbol from CVS_BRANCH."""
+
+    # This routine is a bottleneck.  So we define some local variables
+    # to speed up access to frequently-needed variables.
+    register = self.register_possible_parent
+    parent_cvs_rev = cvs_file_items[cvs_branch.source_id]
+
+    # The "obvious" parent of a branch is the branch holding the
+    # revision where the branch is rooted:
+    register(parent_cvs_rev.lod)
+
+    # Any other branches that are rooted at the same revision and
+    # were committed earlier than the branch are also possible
+    # parents:
+    symbol = cvs_branch.symbol
+    for branch_id in parent_cvs_rev.branch_ids:
+      parent_symbol = cvs_file_items[branch_id].symbol
+      # A branch cannot be its own parent, nor can a branch's
+      # parent be a branch that was created after it.  So we stop
+      # iterating when we reached the branch whose parents we are
+      # collecting:
+      if parent_symbol == symbol:
+        break
+      register(parent_symbol)
+
+  def register_tag_possible_parents(self, cvs_tag, cvs_file_items):
+    """Register any possible parents of this symbol from CVS_TAG."""
+
+    # This routine is a bottleneck.  So use local variables to speed
+    # up access to frequently-needed objects.
+    register = self.register_possible_parent
+    parent_cvs_rev = cvs_file_items[cvs_tag.source_id]
+
+    # The "obvious" parent of a tag is the branch holding the
+    # revision where the branch is rooted:
+    register(parent_cvs_rev.lod)
+
+    # Branches that are rooted at the same revision are also
+    # possible parents:
+    for branch_id in parent_cvs_rev.branch_ids:
+      parent_symbol = cvs_file_items[branch_id].symbol
+      register(parent_symbol)
+
+  def is_ghost(self):
+    """Return True iff this lod never really existed."""
+
+    return (
+        not isinstance(self.lod, Trunk)
+        and self.branch_commit_count == 0
+        and not self.branch_blockers
+        and not self.possible_parents
+        )
+
+  def check_valid(self, symbol):
+    """Check whether SYMBOL is a valid conversion of SELF.lod.
+
+    It is planned to convert SELF.lod as SYMBOL.  Verify that SYMBOL
+    is a TypedSymbol and that the information that it contains is
+    consistent with that stored in SELF.lod.  (This routine does not
+    do higher-level tests of whether the chosen conversion is actually
+    sensible.)  If there are any problems, raise a
+    SymbolPlanException."""
+
+    if not isinstance(symbol, (Trunk, Branch, Tag, ExcludedSymbol)):
+      raise IndeterminateSymbolException(self, symbol)
+
+    if symbol.id != self.lod.id:
+      raise SymbolPlanException(self, symbol, 'IDs must match')
+
+    if symbol.project != self.lod.project:
+      raise SymbolPlanException(self, symbol, 'Projects must match')
+
+    if isinstance(symbol, IncludedSymbol) and symbol.name != self.lod.name:
+      raise SymbolPlanException(self, symbol, 'Names must match')
+
+  def check_preferred_parent_allowed(self, symbol):
+    """Check that SYMBOL's preferred_parent_id is an allowed parent.
+
+    SYMBOL is the planned conversion of SELF.lod.  Verify that its
+    preferred_parent_id is a possible parent of SELF.lod.  If not,
+    raise a SymbolPlanException describing the problem."""
+
+    if isinstance(symbol, IncludedSymbol) \
+           and symbol.preferred_parent_id is not None:
+      for pp in self.possible_parents.keys():
+        if pp.id == symbol.preferred_parent_id:
+          return
+      else:
+        raise SymbolPlanException(
+            self, symbol,
+            'The selected parent is not among the symbol\'s '
+            'possible parents.'
+            )
+
+  def __str__(self):
+    return (
+        '\'%s\' is '
+        'a tag in %d files, '
+        'a branch in %d files, '
+        'a trivial import in %d files, '
+        'a pure import in %d files, '
+        'and has commits in %d files'
+        % (self.lod, self.tag_create_count, self.branch_create_count,
+           self.trivial_import_count, self.pure_ntdb_count,
+           self.branch_commit_count)
+        )
+
+  def __repr__(self):
+    retval = ['%s\n  possible parents:\n' % (self,)]
+    parent_counts = self.possible_parents.items()
+    parent_counts.sort(lambda a,b: - cmp(a[1], b[1]))
+    for (symbol, count) in parent_counts:
+      if isinstance(symbol, Trunk):
+        retval.append('    trunk : %d\n' % count)
+      else:
+        retval.append('    \'%s\' : %d\n' % (symbol.name, count))
+    if self.branch_blockers:
+      blockers = list(self.branch_blockers)
+      blockers.sort()
+      retval.append('  blockers:\n')
+      for blocker in blockers:
+        retval.append('    \'%s\'\n' % (blocker,))
+    return ''.join(retval)
+
+
+class SymbolStatisticsCollector:
+  """Collect statistics about lines of development.
+
+  Record a summary of information about each line of development in
+  the RCS files for later storage into a database.  The database is
+  created in CollectRevsPass and it is used in CollateSymbolsPass (via
+  the SymbolStatistics class).
+
+  collect_data._SymbolDataCollector inserts information into instances
+  of this class by by calling its register_*() methods.
+
+  Its main purpose is to assist in the decisions about which symbols
+  can be treated as branches and tags and which may be excluded.
+
+  The data collected by this class can be written to the file
+  config.SYMBOL_STATISTICS."""
+
+  def __init__(self):
+    # A map { lod -> _Stats } for all lines of development:
+    self._stats = { }
+
+  def __getitem__(self, lod):
+    """Return the _Stats record for line of development LOD.
+
+    Create and register a new one if necessary."""
+
+    try:
+      return self._stats[lod]
+    except KeyError:
+      stats = _Stats(lod)
+      self._stats[lod] = stats
+      return stats
+
+  def register(self, cvs_file_items):
+    """Register the statistics for each symbol in CVS_FILE_ITEMS."""
+
+    for lod_items in cvs_file_items.iter_lods():
+      if lod_items.lod is not None:
+        branch_stats = self[lod_items.lod]
+
+        branch_stats.register_branch_creation()
+
+        if lod_items.cvs_revisions:
+          branch_stats.register_branch_commit()
+
+        if lod_items.is_trivial_import():
+          branch_stats.register_trivial_import()
+
+        if lod_items.is_pure_ntdb():
+          branch_stats.register_pure_ntdb()
+
+        for cvs_symbol in lod_items.iter_blockers():
+          branch_stats.register_branch_blocker(cvs_symbol.symbol)
+
+        if lod_items.cvs_branch is not None:
+          branch_stats.register_branch_possible_parents(
+              lod_items.cvs_branch, cvs_file_items
+              )
+
+      for cvs_tag in lod_items.cvs_tags:
+        tag_stats = self[cvs_tag.symbol]
+
+        tag_stats.register_tag_creation()
+
+        tag_stats.register_tag_possible_parents(cvs_tag, cvs_file_items)
+
+  def purge_ghost_symbols(self):
+    """Purge any symbols that don't have any activity.
+
+    Such ghost symbols can arise if a symbol was defined in an RCS
+    file but pointed at a non-existent revision."""
+
+    for stats in self._stats.values():
+      if stats.is_ghost():
+        Log().warn('Deleting ghost symbol: %s' % (stats.lod,))
+        del self._stats[stats.lod]
+
+  def close(self):
+    """Store the stats database to the SYMBOL_STATISTICS file."""
+
+    f = open(artifact_manager.get_temp_file(config.SYMBOL_STATISTICS), 'wb')
+    cPickle.dump(self._stats.values(), f, -1)
+    f.close()
+    self._stats = None
+
+
+class SymbolStatistics:
+  """Read and handle line of development statistics.
+
+  The statistics are read from a database created by
+  SymbolStatisticsCollector.  This class has methods to process the
+  statistics information and help with decisions about:
+
+  1. What tags and branches should be processed/excluded
+
+  2. What tags should be forced to be branches and vice versa (this
+     class maintains some statistics to help the user decide)
+
+  3. Are there inconsistencies?
+
+     - A symbol that is sometimes a branch and sometimes a tag
+
+     - A forced branch with commit(s) on it
+
+     - A non-excluded branch depends on an excluded branch
+
+  The data in this class is read from a pickle file."""
+
+  def __init__(self, filename):
+    """Read the stats database from FILENAME."""
+
+    # A map { LineOfDevelopment -> _Stats } for all lines of
+    # development:
+    self._stats = { }
+
+    # A map { LineOfDevelopment.id -> _Stats } for all lines of
+    # development:
+    self._stats_by_id = { }
+
+    stats_list = cPickle.load(open(filename, 'rb'))
+
+    for stats in stats_list:
+      self._stats[stats.lod] = stats
+      self._stats_by_id[stats.lod.id] = stats
+
+  def __len__(self):
+    return len(self._stats)
+
+  def __getitem__(self, lod_id):
+    return self._stats_by_id[lod_id]
+
+  def get_stats(self, lod):
+    """Return the _Stats object for LineOfDevelopment instance LOD.
+
+    Raise KeyError if no such lod exists."""
+
+    return self._stats[lod]
+
+  def __iter__(self):
+    return self._stats.itervalues()
+
+  def _check_blocked_excludes(self, symbol_map):
+    """Check for any excluded LODs that are blocked by non-excluded symbols.
+
+    If any are found, describe the problem to Log().error() and raise
+    a FatalException."""
+
+    # A list of (lod,[blocker,...]) tuples for excludes that are
+    # blocked by the specified non-excluded blockers:
+    problems = []
+
+    for lod in symbol_map.itervalues():
+      if isinstance(lod, ExcludedSymbol):
+        # Symbol is excluded; make sure that its blockers are also
+        # excluded:
+        lod_blockers = []
+        for blocker in self.get_stats(lod).branch_blockers:
+          if isinstance(symbol_map.get(blocker, None), IncludedSymbol):
+            lod_blockers.append(blocker)
+        if lod_blockers:
+          problems.append((lod, lod_blockers))
+
+    if problems:
+      s = []
+      for (lod, lod_blockers) in problems:
+        s.append(
+            '%s: %s cannot be excluded because the following symbols '
+                'depend on it:\n'
+            % (error_prefix, lod,)
+            )
+        for blocker in lod_blockers:
+          s.append('    %s\n' % (blocker,))
+      s.append('\n')
+      Log().error(''.join(s))
+
+      raise FatalException()
+
+  def _check_invalid_tags(self, symbol_map):
+    """Check for commits on any symbols that are to be converted as tags.
+
+    SYMBOL_MAP is a map {AbstractSymbol : (Trunk|TypedSymbol)}
+    indicating how each AbstractSymbol is to be converted.  If there
+    is a commit on a symbol, then it cannot be converted as a tag.  If
+    any tags with commits are found, output error messages describing
+    the problems then raise a FatalException."""
+
+    Log().quiet("Checking for forced tags with commits...")
+
+    invalid_tags = [ ]
+    for symbol in symbol_map.itervalues():
+      if isinstance(symbol, Tag):
+        stats = self.get_stats(symbol)
+        if stats.branch_commit_count > 0:
+          invalid_tags.append(symbol)
+
+    if not invalid_tags:
+      # No problems found:
+      return
+
+    s = []
+    s.append(
+        '%s: The following branches cannot be forced to be tags '
+        'because they have commits:\n'
+        % (error_prefix,)
+        )
+    for tag in invalid_tags:
+      s.append('    %s\n' % (tag.name))
+    s.append('\n')
+    Log().error(''.join(s))
+
+    raise FatalException()
+
+  def check_consistency(self, symbol_map):
+    """Check the plan for how to convert symbols for consistency.
+
+    SYMBOL_MAP is a map {AbstractSymbol : (Trunk|TypedSymbol)}
+    indicating how each AbstractSymbol is to be converted.  If any
+    problems are detected, describe the problem to Log().error() and
+    raise a FatalException."""
+
+    # We want to do all of the consistency checks even if one of them
+    # fails, so that the user gets as much feedback as possible.  Set
+    # this variable to True if any errors are found.
+    error_found = False
+
+    # Check that the planned preferred parents are OK for all
+    # IncludedSymbols:
+    for lod in symbol_map.itervalues():
+      if isinstance(lod, IncludedSymbol):
+        stats = self.get_stats(lod)
+        try:
+          stats.check_preferred_parent_allowed(lod)
+        except SymbolPlanException, e:
+          Log().error('%s\n' % (e,))
+          error_found = True
+
+    try:
+      self._check_blocked_excludes(symbol_map)
+    except FatalException:
+      error_found = True
+
+    try:
+      self._check_invalid_tags(symbol_map)
+    except FatalException:
+      error_found = True
+
+    if error_found:
+      raise FatalException(
+          'Please fix the above errors and restart CollateSymbolsPass'
+          )
+
+  def exclude_symbol(self, symbol):
+    """SYMBOL has been excluded; remove it from our statistics."""
+
+    del self._stats[symbol]
+    del self._stats_by_id[symbol.id]
+
+    # Remove references to this symbol from other statistics objects:
+    for stats in self._stats.itervalues():
+      stats.branch_blockers.discard(symbol)
+      if symbol in stats.possible_parents:
+        del stats.possible_parents[symbol]
+
+
diff --git a/cvs2svn_lib/symbol_strategy.py b/cvs2svn_lib/symbol_strategy.py
new file mode 100644
index 0000000..9d562a8
--- /dev/null
+++ b/cvs2svn_lib/symbol_strategy.py
@@ -0,0 +1,685 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2000-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""SymbolStrategy classes determine how to convert symbols."""
+
+import re
+
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import path_join
+from cvs2svn_lib.common import normalize_svn_path
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.symbol import Trunk
+from cvs2svn_lib.symbol import TypedSymbol
+from cvs2svn_lib.symbol import Branch
+from cvs2svn_lib.symbol import Tag
+from cvs2svn_lib.symbol import ExcludedSymbol
+from cvs2svn_lib.symbol_statistics import SymbolPlanError
+
+
+class StrategyRule:
+  """A single rule that might determine how to convert a symbol."""
+
+  def start(self, symbol_statistics):
+    """This method is called once before get_symbol() is ever called.
+
+    The StrategyRule can override this method to do whatever it wants
+    to prepare itself for work.  SYMBOL_STATISTICS is an instance of
+    SymbolStatistics containing the statistics for all symbols in all
+    projects."""
+
+    pass
+
+  def get_symbol(self, symbol, stats):
+    """Return an object describing what to do with the symbol in STATS.
+
+    SYMBOL holds a Trunk or Symbol object as it has been determined so
+    far.  Hopefully one of these method calls will turn any naked
+    Symbol instances into TypedSymbols.
+
+    If this rule applies to the SYMBOL (whose statistics are collected
+    in STATS), then return a new or modified AbstractSymbol object.
+    If this rule doesn't apply, return SYMBOL unchanged."""
+
+    raise NotImplementedError()
+
+  def finish(self):
+    """This method is called once after get_symbol() is done being called.
+
+    The StrategyRule can override this method do whatever it wants to
+    release resources, etc."""
+
+    pass
+
+
+class _RegexpStrategyRule(StrategyRule):
+  """A Strategy rule that bases its decisions on regexp matches.
+
+  If self.regexp matches a symbol name, return self.action(symbol);
+  otherwise, return the symbol unchanged."""
+
+  def __init__(self, pattern, action):
+    """Initialize a _RegexpStrategyRule.
+
+    PATTERN is a string that will be treated as a regexp pattern.
+    PATTERN must match a full symbol name for the rule to apply (i.e.,
+    it is anchored at the beginning and end of the symbol name).
+
+    ACTION is the class representing how the symbol should be
+    converted.  It should be one of the classes Branch, Tag, or
+    ExcludedSymbol.
+
+    If PATTERN matches a symbol name, then get_symbol() returns
+    ACTION(name, id); otherwise it returns SYMBOL unchanged."""
+
+    try:
+      self.regexp = re.compile('^' + pattern + '$')
+    except re.error:
+      raise FatalError("%r is not a valid regexp." % (pattern,))
+
+    self.action = action
+
+  def log(self, symbol):
+    raise NotImplementedError()
+
+  def get_symbol(self, symbol, stats):
+    if isinstance(symbol, (Trunk, TypedSymbol)):
+      return symbol
+    elif self.regexp.match(symbol.name):
+      self.log(symbol)
+      return self.action(symbol)
+    else:
+      return symbol
+
+
+class ForceBranchRegexpStrategyRule(_RegexpStrategyRule):
+  """Force symbols matching pattern to be branches."""
+
+  def __init__(self, pattern):
+    _RegexpStrategyRule.__init__(self, pattern, Branch)
+
+  def log(self, symbol):
+    Log().verbose(
+        'Converting symbol %s as a branch because it matches regexp "%s".'
+        % (symbol, self.regexp.pattern,)
+        )
+
+
+class ForceTagRegexpStrategyRule(_RegexpStrategyRule):
+  """Force symbols matching pattern to be tags."""
+
+  def __init__(self, pattern):
+    _RegexpStrategyRule.__init__(self, pattern, Tag)
+
+  def log(self, symbol):
+    Log().verbose(
+        'Converting symbol %s as a tag because it matches regexp "%s".'
+        % (symbol, self.regexp.pattern,)
+        )
+
+
+class ExcludeRegexpStrategyRule(_RegexpStrategyRule):
+  """Exclude symbols matching pattern."""
+
+  def __init__(self, pattern):
+    _RegexpStrategyRule.__init__(self, pattern, ExcludedSymbol)
+
+  def log(self, symbol):
+    Log().verbose(
+        'Excluding symbol %s because it matches regexp "%s".'
+        % (symbol, self.regexp.pattern,)
+        )
+
+
+class ExcludeTrivialImportBranchRule(StrategyRule):
+  """If a symbol is a trivial import branch, exclude it.
+
+  A trivial import branch is defined to be a branch that only had a
+  single import on it (no other kinds of commits) in every file in
+  which it appeared.  In most cases these branches are worthless."""
+
+  def get_symbol(self, symbol, stats):
+    if isinstance(symbol, (Trunk, TypedSymbol)):
+      return symbol
+    if stats.tag_create_count == 0 \
+          and stats.branch_create_count == stats.trivial_import_count:
+      Log().verbose(
+          'Excluding branch %s because it is a trivial import branch.'
+          % (symbol,)
+          )
+      return ExcludedSymbol(symbol)
+    else:
+      return symbol
+
+
+class ExcludeVendorBranchRule(StrategyRule):
+  """If a symbol is a pure vendor branch, exclude it.
+
+  A pure vendor branch is defined to be a branch that only had imports
+  on it (no other kinds of commits) in every file in which it
+  appeared."""
+
+  def get_symbol(self, symbol, stats):
+    if isinstance(symbol, (Trunk, TypedSymbol)):
+      return symbol
+    if stats.tag_create_count == 0 \
+          and stats.branch_create_count == stats.pure_ntdb_count:
+      Log().verbose(
+          'Excluding branch %s because it is a pure vendor branch.'
+          % (symbol,)
+          )
+      return ExcludedSymbol(symbol)
+    else:
+      return symbol
+
+
+class UnambiguousUsageRule(StrategyRule):
+  """If a symbol is used unambiguously as a tag/branch, convert it as such."""
+
+  def get_symbol(self, symbol, stats):
+    if isinstance(symbol, (Trunk, TypedSymbol)):
+      return symbol
+    is_tag = stats.tag_create_count > 0
+    is_branch = stats.branch_create_count > 0 or stats.branch_commit_count > 0
+    if is_tag and is_branch:
+      # Can't decide
+      return symbol
+    elif is_branch:
+      Log().verbose(
+          'Converting symbol %s as a branch because it is always used '
+          'as a branch.'
+          % (symbol,)
+          )
+      return Branch(symbol)
+    elif is_tag:
+      Log().verbose(
+          'Converting symbol %s as a tag because it is always used '
+          'as a tag.'
+          % (symbol,)
+          )
+      return Tag(symbol)
+    else:
+      # The symbol didn't appear at all:
+      return symbol
+
+
+class BranchIfCommitsRule(StrategyRule):
+  """If there was ever a commit on the symbol, convert it as a branch."""
+
+  def get_symbol(self, symbol, stats):
+    if isinstance(symbol, (Trunk, TypedSymbol)):
+      return symbol
+    elif stats.branch_commit_count > 0:
+      Log().verbose(
+          'Converting symbol %s as a branch because there are commits on it.'
+          % (symbol,)
+          )
+      return Branch(symbol)
+    else:
+      return symbol
+
+
+class HeuristicStrategyRule(StrategyRule):
+  """Convert symbol based on how often it was used as a branch/tag.
+
+  Whichever happened more often determines how the symbol is
+  converted."""
+
+  def get_symbol(self, symbol, stats):
+    if isinstance(symbol, (Trunk, TypedSymbol)):
+      return symbol
+    elif stats.tag_create_count >= stats.branch_create_count:
+      Log().verbose(
+          'Converting symbol %s as a tag because it is more often used '
+          'as a tag.'
+          % (symbol,)
+          )
+      return Tag(symbol)
+    else:
+      Log().verbose(
+          'Converting symbol %s as a branch because it is more often used '
+          'as a branch.'
+          % (symbol,)
+          )
+      return Branch(symbol)
+
+
+class AllBranchRule(StrategyRule):
+  """Convert all symbols as branches.
+
+  Usually this rule will appear after a list of more careful rules
+  (including a general rule like UnambiguousUsageRule) and will
+  therefore only apply to the symbols not handled earlier."""
+
+  def get_symbol(self, symbol, stats):
+    if isinstance(symbol, (Trunk, TypedSymbol)):
+      return symbol
+    else:
+      Log().verbose(
+          'Converting symbol %s as a branch because no other rules applied.'
+          % (symbol,)
+          )
+      return Branch(symbol)
+
+
+class AllTagRule(StrategyRule):
+  """Convert all symbols as tags.
+
+  We don't worry about conflicts here; they will be caught later by
+  SymbolStatistics.check_consistency().
+
+  Usually this rule will appear after a list of more careful rules
+  (including a general rule like UnambiguousUsageRule) and will
+  therefore only apply to the symbols not handled earlier."""
+
+  def get_symbol(self, symbol, stats):
+    if isinstance(symbol, (Trunk, TypedSymbol)):
+      return symbol
+    else:
+      Log().verbose(
+          'Converting symbol %s as a tag because no other rules applied.'
+          % (symbol,)
+          )
+      return Tag(symbol)
+
+
+class TrunkPathRule(StrategyRule):
+  """Set the base path for Trunk."""
+
+  def __init__(self, trunk_path):
+    self.trunk_path = trunk_path
+
+  def get_symbol(self, symbol, stats):
+    if isinstance(symbol, Trunk) and symbol.base_path is None:
+      symbol.base_path = self.trunk_path
+
+    return symbol
+
+
+class SymbolPathRule(StrategyRule):
+  """Set the base paths for symbol LODs."""
+
+  def __init__(self, symbol_type, base_path):
+    self.symbol_type = symbol_type
+    self.base_path = base_path
+
+  def get_symbol(self, symbol, stats):
+    if isinstance(symbol, self.symbol_type) and symbol.base_path is None:
+      symbol.base_path = path_join(self.base_path, symbol.name)
+
+    return symbol
+
+
+class BranchesPathRule(SymbolPathRule):
+  """Set the base paths for Branch LODs."""
+
+  def __init__(self, branch_path):
+    SymbolPathRule.__init__(self, Branch, branch_path)
+
+
+class TagsPathRule(SymbolPathRule):
+  """Set the base paths for Tag LODs."""
+
+  def __init__(self, tag_path):
+    SymbolPathRule.__init__(self, Tag, tag_path)
+
+
+class HeuristicPreferredParentRule(StrategyRule):
+  """Use a heuristic rule to pick preferred parents.
+
+  Pick the parent that should be preferred for any TypedSymbols.  As
+  parent, use the symbol that appeared most often as a possible parent
+  of the symbol in question.  If multiple symbols are tied, choose the
+  one that comes first according to the Symbol class's natural sort
+  order."""
+
+  def _get_preferred_parent(self, stats):
+    """Return the LODs that are most often possible parents in STATS.
+
+    Return the set of LinesOfDevelopment that appeared most often as
+    possible parents.  The return value might contain multiple symbols
+    if multiple LinesOfDevelopment appeared the same number of times."""
+
+    best_count = -1
+    best_symbol = None
+    for (symbol, count) in stats.possible_parents.items():
+      if count > best_count or (count == best_count and symbol < best_symbol):
+        best_count = count
+        best_symbol = symbol
+
+    if best_symbol is None:
+      return None
+    else:
+      return best_symbol
+
+  def get_symbol(self, symbol, stats):
+    if isinstance(symbol, TypedSymbol) and symbol.preferred_parent_id is None:
+      preferred_parent = self._get_preferred_parent(stats)
+      if preferred_parent is None:
+        Log().verbose('%s has no preferred parent' % (symbol,))
+      else:
+        symbol.preferred_parent_id = preferred_parent.id
+        Log().verbose(
+            'The preferred parent of %s is %s' % (symbol, preferred_parent,)
+            )
+
+    return symbol
+
+
+class ManualTrunkRule(StrategyRule):
+  """Change the SVN path of Trunk LODs.
+
+  Members:
+
+    project_id -- (int or None) The id of the project whose trunk
+        should be affected by this rule.  If project_id is None, then
+        the rule is not project-specific.
+
+    svn_path -- (str) The SVN path that should be used as the base
+        directory for this trunk.  This member must not be None,
+        though it may be the empty string for a single-project,
+        trunk-only conversion.
+
+  """
+
+  def __init__(self, project_id, svn_path):
+    self.project_id = project_id
+    self.svn_path = normalize_svn_path(svn_path, allow_empty=True)
+
+  def get_symbol(self, symbol, stats):
+    if (self.project_id is not None
+        and self.project_id != stats.lod.project.id):
+      return symbol
+
+    if isinstance(symbol, Trunk):
+      symbol.base_path = self.svn_path
+
+    return symbol
+
+
+def convert_as_branch(symbol):
+  Log().verbose(
+      'Converting symbol %s as a branch because of manual setting.'
+      % (symbol,)
+      )
+  return Branch(symbol)
+
+
+def convert_as_tag(symbol):
+  Log().verbose(
+      'Converting symbol %s as a tag because of manual setting.'
+      % (symbol,)
+      )
+  return Tag(symbol)
+
+
+def exclude(symbol):
+  Log().verbose(
+      'Excluding symbol %s because of manual setting.'
+      % (symbol,)
+      )
+  return ExcludedSymbol(symbol)
+
+
+class ManualSymbolRule(StrategyRule):
+  """Change how particular symbols are converted.
+
+  Members:
+
+    project_id -- (int or None) The id of the project whose trunk
+        should be affected by this rule.  If project_id is None, then
+        the rule is not project-specific.
+
+    symbol_name -- (str) The name of the symbol that should be
+        affected by this rule.
+
+    conversion -- (callable or None) A callable that converts the
+        symbol to its preferred output type.  This should normally be
+        one of (convert_as_branch, convert_as_tag, exclude).  If this
+        member is None, then this rule does not affect the symbol's
+        output type.
+
+    svn_path -- (str) The SVN path that should be used as the base
+        directory for this trunk.  This member must not be None,
+        though it may be the empty string for a single-project,
+        trunk-only conversion.
+
+    parent_lod_name -- (str or None) The name of the line of
+        development that should be preferred as the parent of this
+        symbol.  (The preferred parent is the line of development from
+        which the symbol should sprout.)  If this member is set to the
+        string '.trunk.', then the symbol will be set to sprout
+        directly from trunk.  If this member is set to None, then this
+        rule won't affect the symbol's parent.
+
+  """
+
+  def __init__(
+        self, project_id, symbol_name, conversion, svn_path, parent_lod_name
+        ):
+    self.project_id = project_id
+    self.symbol_name = symbol_name
+    self.conversion = conversion
+    if svn_path is None:
+      self.svn_path = None
+    else:
+      self.svn_path = normalize_svn_path(svn_path, allow_empty=True)
+    self.parent_lod_name = parent_lod_name
+
+  def _get_parent_by_id(self, parent_lod_name, stats):
+    """Return the LOD object for the parent with name PARENT_LOD_NAME.
+
+    STATS is the _Stats object describing a symbol whose parent needs
+    to be determined from its name.  If none of its possible parents
+    has name PARENT_LOD_NAME, raise a SymbolPlanError."""
+
+    for pp in stats.possible_parents.keys():
+      if isinstance(pp, Trunk):
+        pass
+      elif pp.name == parent_lod_name:
+        return pp
+    else:
+      parent_counts = stats.possible_parents.items()
+      parent_counts.sort(lambda a,b: - cmp(a[1], b[1]))
+      lines = [
+          '%s is not a valid parent for %s;'
+              % (parent_lod_name, stats.lod,),
+          '    possible parents (with counts):'
+          ]
+      for (symbol, count) in parent_counts:
+        if isinstance(symbol, Trunk):
+          lines.append('        .trunk. : %d' % count)
+        else:
+          lines.append('        %s : %d' % (symbol.name, count))
+      raise SymbolPlanError('\n'.join(lines))
+
+  def get_symbol(self, symbol, stats):
+    if (self.project_id is not None
+        and self.project_id != stats.lod.project.id):
+      return symbol
+
+    elif isinstance(symbol, Trunk):
+      return symbol
+
+    elif self.symbol_name == stats.lod.name:
+      if self.conversion is not None:
+        symbol = self.conversion(symbol)
+
+      if self.parent_lod_name is None:
+        pass
+      elif self.parent_lod_name == '.trunk.':
+        symbol.preferred_parent_id = stats.lod.project.trunk_id
+      else:
+        symbol.preferred_parent_id = self._get_parent_by_id(
+            self.parent_lod_name, stats
+            ).id
+
+      if self.svn_path is not None:
+        symbol.base_path = self.svn_path
+
+    return symbol
+
+
+class SymbolHintsFileRule(StrategyRule):
+  """Use manual symbol configurations read from a file.
+
+  The input file is line-oriented with the following format:
+
+      <project-id> <symbol-name> <conversion> [<svn-path> [<parent-lod-name>]]
+
+  Where the fields are separated by whitespace and
+
+      project-id -- the numerical id of the Project to which the
+          symbol belongs (numbered starting with 0).  This field can
+          be '.' if the rule is not project-specific.
+
+      symbol-name -- the name of the symbol being specified, or
+          '.trunk.' if the rule should apply to trunk.
+
+      conversion -- how the symbol should be treated in the
+          conversion.  This is one of the following values: 'branch',
+          'tag', or 'exclude'.  This field can be '.' if the rule
+          shouldn't affect how the symbol is treated in the
+          conversion.
+
+      svn-path -- the SVN path that should serve as the root path of
+          this LOD.  The path should be expressed as a path relative
+          to the SVN root directory, with or without a leading '/'.
+          This field can be omitted or '.' if the rule shouldn't
+          affect the LOD's SVN path.
+
+      parent-lod-name -- the name of the LOD that should serve as this
+          symbol's parent.  This field can be omitted or '.'  if the
+          rule shouldn't affect the symbol's parent, or it can be
+          '.trunk.' to indicate that the symbol should sprout from the
+          project's trunk."""
+
+  comment_re = re.compile(r'^(\#|$)')
+
+  conversion_map = {
+      'branch' : convert_as_branch,
+      'tag' : convert_as_tag,
+      'exclude' : exclude,
+      '.' : None,
+      }
+
+  def __init__(self, filename):
+    self.filename = filename
+
+  def start(self, symbol_statistics):
+    self._rules = []
+
+    f = open(self.filename, 'r')
+    for l in f:
+      l = l.rstrip()
+      s = l.lstrip()
+      if self.comment_re.match(s):
+        continue
+      fields = s.split()
+
+      if len(fields) < 3:
+        raise FatalError(
+            'The following line in "%s" cannot be parsed:\n    "%s"'
+            % (self.filename, l,)
+            )
+
+      project_id = fields.pop(0)
+      symbol_name = fields.pop(0)
+      conversion = fields.pop(0)
+
+      if fields:
+        svn_path = fields.pop(0)
+        if svn_path == '.':
+          svn_path = None
+        elif svn_path[0] == '/':
+          svn_path = svn_path[1:]
+      else:
+        svn_path = None
+
+      if fields:
+        parent_lod_name = fields.pop(0)
+      else:
+        parent_lod_name = '.'
+
+      if fields:
+        raise FatalError(
+            'The following line in "%s" cannot be parsed:\n    "%s"'
+            % (self.filename, l,)
+            )
+
+      if project_id == '.':
+        project_id = None
+      else:
+        try:
+          project_id = int(project_id)
+        except ValueError:
+          raise FatalError(
+              'Illegal project_id in the following line:\n    "%s"' % (l,)
+              )
+
+      if symbol_name == '.trunk.':
+        if conversion not in ['.', 'trunk']:
+          raise FatalError('Trunk cannot be converted as a different type')
+
+        if parent_lod_name != '.':
+          raise FatalError('Trunk\'s parent cannot be set')
+
+        if svn_path is None:
+          # This rule doesn't do anything:
+          pass
+        else:
+          self._rules.append(ManualTrunkRule(project_id, svn_path))
+
+      else:
+        try:
+          conversion = self.conversion_map[conversion]
+        except KeyError:
+          raise FatalError(
+              'Illegal conversion in the following line:\n    "%s"' % (l,)
+              )
+
+        if parent_lod_name == '.':
+          parent_lod_name = None
+
+        if conversion is None \
+               and svn_path is None \
+               and parent_lod_name is None:
+          # There is nothing to be done:
+          pass
+        else:
+          self._rules.append(
+              ManualSymbolRule(
+                  project_id, symbol_name,
+                  conversion, svn_path, parent_lod_name
+                  )
+              )
+
+    for rule in self._rules:
+      rule.start(symbol_statistics)
+
+  def get_symbol(self, symbol, stats):
+    for rule in self._rules:
+      symbol = rule.get_symbol(symbol, stats)
+
+    return symbol
+
+  def finish(self):
+    for rule in self._rules:
+      rule.finish()
+
+    del self._rules
+
+
diff --git a/cvs2svn_lib/symbol_transform.py b/cvs2svn_lib/symbol_transform.py
new file mode 100644
index 0000000..a4995b8
--- /dev/null
+++ b/cvs2svn_lib/symbol_transform.py
@@ -0,0 +1,236 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2006-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains classes to transform symbol names."""
+
+
+import os
+import re
+
+from cvs2svn_lib.log import Log
+from cvs2svn_lib.common import FatalError
+from cvs2svn_lib.common import IllegalSVNPathError
+from cvs2svn_lib.common import normalize_svn_path
+
+
+class SymbolTransform:
+  """Transform symbol names arbitrarily."""
+
+  def transform(self, cvs_file, symbol_name, revision):
+    """Possibly transform SYMBOL_NAME, which was found in CVS_FILE.
+
+    Return the transformed symbol name.  If this SymbolTransform
+    doesn't apply, return the original SYMBOL_NAME.  If this symbol
+    should be ignored entirely, return None.  (Please note that
+    ignoring a branch via this mechanism only causes the branch *name*
+    to be ignored; the branch contents will still be converted.
+    Usually branches should be excluded using --exclude.)
+
+    REVISION contains the CVS revision number to which the symbol was
+    attached in the file as a string (with zeros removed).
+
+    This method is free to use the information in CVS_FILE (including
+    CVS_FILE.project) to decide whether and/or how to transform
+    SYMBOL_NAME."""
+
+    raise NotImplementedError()
+
+
+class ReplaceSubstringsSymbolTransform(SymbolTransform):
+  """Replace specific substrings in symbol names.
+
+  If the substring occurs multiple times, replace all copies."""
+
+  def __init__(self, old, new):
+    self.old = old
+    self.new = new
+
+  def transform(self, cvs_file, symbol_name, revision):
+    return symbol_name.replace(self.old, self.new)
+
+
+class NormalizePathsSymbolTransform(SymbolTransform):
+  def transform(self, cvs_file, symbol_name, revision):
+    try:
+      return normalize_svn_path(symbol_name)
+    except IllegalSVNPathError, e:
+      raise FatalError('Problem with %s: %s' % (symbol_name, e,))
+
+
+class CompoundSymbolTransform(SymbolTransform):
+  """A SymbolTransform that applies other SymbolTransforms in series.
+
+  Each of the contained SymbolTransforms is applied, one after the
+  other.  If any of them returns None, then None is returned (the
+  following SymbolTransforms are ignored)."""
+
+  def __init__(self, symbol_transforms):
+    """Ininitialize a CompoundSymbolTransform.
+
+    SYMBOL_TRANSFORMS is an iterable of SymbolTransform instances."""
+
+    self.symbol_transforms = list(symbol_transforms)
+
+  def transform(self, cvs_file, symbol_name, revision):
+    for symbol_transform in self.symbol_transforms:
+      symbol_name = symbol_transform.transform(
+          cvs_file, symbol_name, revision
+          )
+      if symbol_name is None:
+        # Don't continue with other symbol transforms:
+        break
+
+    return symbol_name
+
+
+class RegexpSymbolTransform(SymbolTransform):
+  """Transform symbols by using a regexp textual substitution."""
+
+  def __init__(self, pattern, replacement):
+    """Create a SymbolTransform that transforms symbols matching PATTERN.
+
+    PATTERN is a regular expression that should match the whole symbol
+    name.  REPLACEMENT is the replacement text, which may include
+    patterns like r'\1' or r'\g<1>' or r'\g<name>' (where 'name' is a
+    reference to a named substring in the pattern of the form
+    r'(?P<name>...)')."""
+
+    self.pattern = re.compile('^' + pattern + '$')
+    self.replacement = replacement
+
+  def transform(self, cvs_file, symbol_name, revision):
+    return self.pattern.sub(self.replacement, symbol_name)
+
+
+class SymbolMapper(SymbolTransform):
+  """A SymbolTransform that transforms specific symbol definitions.
+
+  The user has to specify the exact CVS filename, symbol name, and
+  revision number to be transformed, and the new name (or None if the
+  symbol should be ignored).  The mappings can be set via a
+  constructor argument or by calling __setitem__()."""
+
+  def __init__(self, items=[]):
+    """Initialize the mapper.
+
+    ITEMS is a list of tuples (cvs_filename, symbol_name, revision,
+    new_name) which will be set as mappings."""
+
+    # A map {(cvs_filename, symbol_name, revision) : new_name}:
+    self._map = {}
+
+    for (cvs_filename, symbol_name, revision, new_name) in items:
+      self[cvs_filename, symbol_name, revision] = new_name
+
+  def __setitem__(self, (cvs_filename, symbol_name, revision), new_name):
+    """Set a mapping for a particular file, symbol, and revision."""
+
+    cvs_filename = os.path.normcase(os.path.normpath(cvs_filename))
+    key = (cvs_filename, symbol_name, revision)
+    if key in self._map:
+      Log().warn(
+          'Overwriting symbol transform for\n'
+          '    filename=%r symbol=%s revision=%s'
+          % (cvs_filename, symbol_name, revision,)
+          )
+    self._map[key] = new_name
+
+  def transform(self, cvs_file, symbol_name, revision):
+    cvs_filename = os.path.normcase(os.path.normpath(cvs_file.filename))
+    return self._map.get(
+        (cvs_filename, symbol_name, revision), symbol_name
+        )
+
+
+class SubtreeSymbolMapper(SymbolTransform):
+  """A SymbolTransform that transforms symbols within a whole repo subtree.
+
+  The user has to specify a CVS repository path (a filename or
+  directory) and the original symbol name.  All symbols under that
+  path will be renamed to the specified new name (which can be None if
+  the symbol should be ignored).  The mappings can be set via a
+  constructor argument or by calling __setitem__().  Only the most
+  specific rule is applied."""
+
+  def __init__(self, items=[]):
+    """Initialize the mapper.
+
+    ITEMS is a list of tuples (cvs_path, symbol_name, new_name)
+    which will be set as mappings.  cvs_path is a string naming a
+    directory within the CVS repository."""
+
+    # A map {symbol_name : {cvs_path : new_name}}:
+    self._map = {}
+
+    for (cvs_path, symbol_name, new_name) in items:
+      self[cvs_path, symbol_name] = new_name
+
+  def __setitem__(self, (cvs_path, symbol_name), new_name):
+    """Set a mapping for a particular file and symbol."""
+
+    try:
+      symbol_map = self._map[symbol_name]
+    except KeyError:
+      symbol_map = {}
+      self._map[symbol_name] = symbol_map
+
+    cvs_path = os.path.normcase(os.path.normpath(cvs_path))
+    if cvs_path in symbol_map:
+      Log().warn(
+          'Overwriting symbol transform for\n'
+          '    directory=%r symbol=%s'
+          % (cvs_path, symbol_name,)
+          )
+    symbol_map[cvs_path] = new_name
+
+  def transform(self, cvs_file, symbol_name, revision):
+    try:
+      symbol_map = self._map[symbol_name]
+    except KeyError:
+      # No rules for that symbol name
+      return symbol_name
+
+    cvs_path = os.path.normcase(os.path.normpath(cvs_file.filename))
+    while True:
+      try:
+        return symbol_map[cvs_path]
+      except KeyError:
+        new_cvs_path = os.path.dirname(cvs_path)
+        if new_cvs_path == cvs_path:
+          # No rules found for that path; return symbol name unaltered.
+          return symbol_name
+        else:
+          cvs_path = new_cvs_path
+
+
+class IgnoreSymbolTransform(SymbolTransform):
+  """Ignore symbols matching a specified regular expression."""
+
+  def __init__(self, pattern):
+    """Create an SymbolTransform that ignores symbols matching PATTERN.
+
+    PATTERN is a regular expression that should match the whole symbol
+    name."""
+
+    self.pattern = re.compile('^' + pattern + '$')
+
+  def transform(self, cvs_file, symbol_name, revision):
+    if self.pattern.match(symbol_name):
+      return None
+    else:
+      return symbol_name
+
+
diff --git a/cvs2svn_lib/time_range.py b/cvs2svn_lib/time_range.py
new file mode 100644
index 0000000..f7dc234
--- /dev/null
+++ b/cvs2svn_lib/time_range.py
@@ -0,0 +1,44 @@
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2006-2008 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""This module contains a class to manage time ranges."""
+
+
+class TimeRange(object):
+  __slots__ = ('t_min', 't_max')
+
+  def __init__(self):
+    # Start out with a t_min higher than any incoming time T, and a
+    # t_max lower than any incoming T.  This way the first T will push
+    # t_min down to T, and t_max up to T, naturally (without any
+    # special-casing), and successive times will then ratchet them
+    # outward as appropriate.
+    self.t_min = 1L<<32
+    self.t_max = 0
+
+  def add(self, timestamp):
+    """Expand the range to encompass TIMESTAMP."""
+
+    if timestamp < self.t_min:
+      self.t_min = timestamp
+    if timestamp > self.t_max:
+      self.t_max = timestamp
+
+  def __cmp__(self, other):
+    # Sorted by t_max, and break ties using t_min.
+    return cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
+
+
diff --git a/cvs2svn_lib/version.py b/cvs2svn_lib/version.py
new file mode 100644
index 0000000..7900964
--- /dev/null
+++ b/cvs2svn_lib/version.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python2
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2007-2009 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+# The version of cvs2svn:
+VERSION = '2.3.0'
+
+
+# If this file is run as a script, print the cvs2svn version number to
+# stdout:
+if __name__ == '__main__':
+    print VERSION
+
+
diff --git a/cvs2svn_rcsparse/__init__.py b/cvs2svn_rcsparse/__init__.py
new file mode 100644
index 0000000..829c117
--- /dev/null
+++ b/cvs2svn_rcsparse/__init__.py
@@ -0,0 +1,26 @@
+# -*-python-*-
+#
+# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved.
+#
+# By using this file, you agree to the terms and conditions set forth in
+# the LICENSE.html file which can be found at the top level of the ViewVC
+# distribution or at http://viewvc.org/license-1.html.
+#
+# For more information, visit http://viewvc.org/
+#
+# -----------------------------------------------------------------------
+
+"""This package provides parsing tools for RCS files."""
+
+from common import *
+
+try:
+  from tparse import parse
+except ImportError:
+  try:
+    from texttools import Parser
+  except ImportError:
+    from default import Parser
+
+  def parse(file, sink):
+    return Parser().parse(file, sink)
diff --git a/cvs2svn_rcsparse/common.py b/cvs2svn_rcsparse/common.py
new file mode 100644
index 0000000..3eed600
--- /dev/null
+++ b/cvs2svn_rcsparse/common.py
@@ -0,0 +1,324 @@
+# -*-python-*-
+#
+# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved.
+#
+# By using this file, you agree to the terms and conditions set forth in
+# the LICENSE.html file which can be found at the top level of the ViewVC
+# distribution or at http://viewvc.org/license-1.html.
+#
+# For more information, visit http://viewvc.org/
+#
+# -----------------------------------------------------------------------
+
+"""common.py: common classes and functions for the RCS parsing tools."""
+
+import calendar
+import string
+
+class Sink:
+  def set_head_revision(self, revision):
+    pass
+
+  def set_principal_branch(self, branch_name):
+    pass
+
+  def set_access(self, accessors):
+    pass
+
+  def define_tag(self, name, revision):
+    pass
+
+  def set_locker(self, revision, locker):
+    pass
+
+  def set_locking(self, mode):
+    """Used to signal locking mode.
+
+    Called with mode argument 'strict' if strict locking
+    Not called when no locking used."""
+
+    pass
+
+  def set_comment(self, comment):
+    pass
+
+  def set_expansion(self, mode):
+    pass
+
+  def admin_completed(self):
+    pass
+
+  def define_revision(self, revision, timestamp, author, state,
+                      branches, next):
+    pass
+
+  def tree_completed(self):
+    pass
+
+  def set_description(self, description):
+    pass
+
+  def set_revision_info(self, revision, log, text):
+    pass
+
+  def parse_completed(self):
+    pass
+
+
+# --------------------------------------------------------------------------
+#
+# EXCEPTIONS USED BY RCSPARSE
+#
+
+class RCSParseError(Exception):
+  pass
+
+
+class RCSIllegalCharacter(RCSParseError):
+  pass
+
+
+class RCSExpected(RCSParseError):
+  def __init__(self, got, wanted):
+    RCSParseError.__init__(
+        self,
+        'Unexpected parsing error in RCS file.\n'
+        'Expected token: %s, but saw: %s'
+        % (wanted, got)
+        )
+
+
+class RCSStopParser(Exception):
+  pass
+
+
+# --------------------------------------------------------------------------
+#
+# STANDARD TOKEN STREAM-BASED PARSER
+#
+
+class _Parser:
+  stream_class = None   # subclasses need to define this
+
+  def _read_until_semicolon(self):
+    """Read all tokens up to and including the next semicolon token.
+
+    Return the tokens (not including the semicolon) as a list."""
+
+    tokens = []
+
+    while 1:
+      token = self.ts.get()
+      if token == ';':
+        break
+      tokens.append(token)
+
+    return tokens
+
+  def _parse_admin_head(self, token):
+    rev = self.ts.get()
+    if rev == ';':
+      # The head revision is not specified.  Just drop the semicolon
+      # on the floor.
+      pass
+    else:
+      self.sink.set_head_revision(rev)
+      self.ts.match(';')
+
+  def _parse_admin_branch(self, token):
+    branch = self.ts.get()
+    if branch != ';':
+      self.sink.set_principal_branch(branch)
+      self.ts.match(';')
+
+  def _parse_admin_access(self, token):
+    accessors = self._read_until_semicolon()
+    if accessors:
+      self.sink.set_access(accessors)
+
+  def _parse_admin_symbols(self, token):
+    while 1:
+      tag_name = self.ts.get()
+      if tag_name == ';':
+        break
+      self.ts.match(':')
+      tag_rev = self.ts.get()
+      self.sink.define_tag(tag_name, tag_rev)
+
+  def _parse_admin_locks(self, token):
+    while 1:
+      locker = self.ts.get()
+      if locker == ';':
+        break
+      self.ts.match(':')
+      rev = self.ts.get()
+      self.sink.set_locker(rev, locker)
+
+  def _parse_admin_strict(self, token):
+    self.sink.set_locking("strict")
+    self.ts.match(';')
+
+  def _parse_admin_comment(self, token):
+    self.sink.set_comment(self.ts.get())
+    self.ts.match(';')
+
+  def _parse_admin_expand(self, token):
+    expand_mode = self.ts.get()
+    self.sink.set_expansion(expand_mode)
+    self.ts.match(';')
+
+  admin_token_map = {
+      'head' : _parse_admin_head,
+      'branch' : _parse_admin_branch,
+      'access' : _parse_admin_access,
+      'symbols' : _parse_admin_symbols,
+      'locks' : _parse_admin_locks,
+      'strict' : _parse_admin_strict,
+      'comment' : _parse_admin_comment,
+      'expand' : _parse_admin_expand,
+      'desc' : None,
+      }
+
+  def parse_rcs_admin(self):
+    while 1:
+      # Read initial token at beginning of line
+      token = self.ts.get()
+
+      try:
+        f = self.admin_token_map[token]
+      except KeyError:
+        # We're done once we reach the description of the RCS tree
+        if token[0] in string.digits:
+          self.ts.unget(token)
+          return
+        else:
+          # Chew up "newphrase"
+          # warn("Unexpected RCS token: $token\n")
+          pass
+      else:
+        if f is None:
+          self.ts.unget(token)
+          return
+        else:
+          f(self, token)
+
+  def _parse_rcs_tree_entry(self, revision):
+    # Parse date
+    self.ts.match('date')
+    date = self.ts.get()
+    self.ts.match(';')
+
+    # Convert date into timestamp
+    date_fields = string.split(date, '.')
+    # According to rcsfile(5): the year "contains just the last two
+    # digits of the year for years from 1900 through 1999, and all the
+    # digits of years thereafter".
+    if len(date_fields[0]) == 2:
+      date_fields[0] = '19' + date_fields[0]
+    date_fields = map(string.atoi, date_fields)
+    EPOCH = 1970
+    if date_fields[0] < EPOCH:
+      raise ValueError, 'invalid year'
+    timestamp = calendar.timegm(tuple(date_fields) + (0, 0, 0,))
+
+    # Parse author
+    ### NOTE: authors containing whitespace are violations of the
+    ### RCS specification.  We are making an allowance here because
+    ### CVSNT is known to produce these sorts of authors.
+    self.ts.match('author')
+    author = ' '.join(self._read_until_semicolon())
+
+    # Parse state
+    self.ts.match('state')
+    state = ''
+    while 1:
+      token = self.ts.get()
+      if token == ';':
+        break
+      state = state + token + ' '
+    state = state[:-1]   # toss the trailing space
+
+    # Parse branches
+    self.ts.match('branches')
+    branches = self._read_until_semicolon()
+
+    # Parse revision of next delta in chain
+    self.ts.match('next')
+    next = self.ts.get()
+    if next == ';':
+      next = None
+    else:
+      self.ts.match(';')
+
+    # there are some files with extra tags in them. for example:
+    #    owner	640;
+    #    group	15;
+    #    permissions	644;
+    #    hardlinks	@configure.in@;
+    # this is "newphrase" in RCSFILE(5). we just want to skip over these.
+    while 1:
+      token = self.ts.get()
+      if token == 'desc' or token[0] in string.digits:
+        self.ts.unget(token)
+        break
+      # consume everything up to the semicolon
+      self._read_until_semicolon()
+
+    self.sink.define_revision(revision, timestamp, author, state, branches,
+                              next)
+
+  def parse_rcs_tree(self):
+    while 1:
+      revision = self.ts.get()
+
+      # End of RCS tree description ?
+      if revision == 'desc':
+        self.ts.unget(revision)
+        return
+
+      self._parse_rcs_tree_entry(revision)
+
+  def parse_rcs_description(self):
+    self.ts.match('desc')
+    self.sink.set_description(self.ts.get())
+
+  def parse_rcs_deltatext(self):
+    while 1:
+      revision = self.ts.get()
+      if revision is None:
+        # EOF
+        break
+      text, sym2, log, sym1 = self.ts.mget(4)
+      if sym1 != 'log':
+        print `text[:100], sym2[:100], log[:100], sym1[:100]`
+        raise RCSExpected(sym1, 'log')
+      if sym2 != 'text':
+        raise RCSExpected(sym2, 'text')
+      ### need to add code to chew up "newphrase"
+      self.sink.set_revision_info(revision, log, text)
+
+  def parse(self, file, sink):
+    self.ts = self.stream_class(file)
+    self.sink = sink
+
+    self.parse_rcs_admin()
+
+    # let sink know when the admin section has been completed
+    self.sink.admin_completed()
+
+    self.parse_rcs_tree()
+
+    # many sinks want to know when the tree has been completed so they can
+    # do some work to prep for the arrival of the deltatext
+    self.sink.tree_completed()
+
+    self.parse_rcs_description()
+    self.parse_rcs_deltatext()
+
+    # easiest for us to tell the sink it is done, rather than worry about
+    # higher level software doing it.
+    self.sink.parse_completed()
+
+    self.ts = self.sink = None
+
+# --------------------------------------------------------------------------
diff --git a/cvs2svn_rcsparse/debug.py b/cvs2svn_rcsparse/debug.py
new file mode 100644
index 0000000..cfeaf2b
--- /dev/null
+++ b/cvs2svn_rcsparse/debug.py
@@ -0,0 +1,122 @@
+# -*-python-*-
+#
+# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved.
+#
+# By using this file, you agree to the terms and conditions set forth in
+# the LICENSE.html file which can be found at the top level of the ViewVC
+# distribution or at http://viewvc.org/license-1.html.
+#
+# For more information, visit http://viewvc.org/
+#
+# -----------------------------------------------------------------------
+
+"""debug.py: various debugging tools for the rcsparse package."""
+
+import time
+
+from __init__ import parse
+import common
+
+
+class DebugSink(common.Sink):
+  def set_head_revision(self, revision):
+    print 'head:', revision
+
+  def set_principal_branch(self, branch_name):
+    print 'branch:', branch_name
+
+  def define_tag(self, name, revision):
+    print 'tag:', name, '=', revision
+
+  def set_comment(self, comment):
+    print 'comment:', comment
+
+  def set_description(self, description):
+    print 'description:', description
+
+  def define_revision(self, revision, timestamp, author, state,
+                      branches, next):
+    print 'revision:', revision
+    print '    timestamp:', timestamp
+    print '    author:', author
+    print '    state:', state
+    print '    branches:', branches
+    print '    next:', next
+
+  def set_revision_info(self, revision, log, text):
+    print 'revision:', revision
+    print '    log:', log
+    print '    text:', text[:100], '...'
+
+
+class DumpSink(common.Sink):
+  """Dump all the parse information directly to stdout.
+
+  The output is relatively unformatted and untagged. It is intended as a
+  raw dump of the data in the RCS file. A copy can be saved, then changes
+  made to the parsing engine, then a comparison of the new output against
+  the old output.
+  """
+  def __init__(self):
+    global sha
+    import sha
+
+  def set_head_revision(self, revision):
+    print revision
+
+  def set_principal_branch(self, branch_name):
+    print branch_name
+
+  def define_tag(self, name, revision):
+    print name, revision
+
+  def set_comment(self, comment):
+    print comment
+
+  def set_description(self, description):
+    print description
+
+  def define_revision(self, revision, timestamp, author, state,
+                      branches, next):
+    print revision, timestamp, author, state, branches, next
+
+  def set_revision_info(self, revision, log, text):
+    print revision, sha.new(log).hexdigest(), sha.new(text).hexdigest()
+
+  def tree_completed(self):
+    print 'tree_completed'
+
+  def parse_completed(self):
+    print 'parse_completed'
+
+
+def dump_file(fname):
+  parse(open(fname, 'rb'), DumpSink())
+
+def time_file(fname):
+  f = open(fname, 'rb')
+  s = common.Sink()
+  t = time.time()
+  parse(f, s)
+  t = time.time() - t
+  print t
+
+def _usage():
+  print 'This is normally a module for importing, but it has a couple'
+  print 'features for testing as an executable script.'
+  print 'USAGE: %s COMMAND filename,v' % sys.argv[0]
+  print '  where COMMAND is one of:'
+  print '    dump: filename is "dumped" to stdout'
+  print '    time: filename is parsed with the time written to stdout'
+  sys.exit(1)
+
+if __name__ == '__main__':
+  import sys
+  if len(sys.argv) != 3:
+    _usage()
+  if sys.argv[1] == 'dump':
+    dump_file(sys.argv[2])
+  elif sys.argv[1] == 'time':
+    time_file(sys.argv[2])
+  else:
+    _usage()
diff --git a/cvs2svn_rcsparse/default.py b/cvs2svn_rcsparse/default.py
new file mode 100644
index 0000000..57f9fc6
--- /dev/null
+++ b/cvs2svn_rcsparse/default.py
@@ -0,0 +1,172 @@
+# -*-python-*-
+#
+# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved.
+#
+# By using this file, you agree to the terms and conditions set forth in
+# the LICENSE.html file which can be found at the top level of the ViewVC
+# distribution or at http://viewvc.org/license-1.html.
+#
+# For more information, visit http://viewvc.org/
+#
+# -----------------------------------------------------------------------
+#
+# This file was originally based on portions of the blame.py script by
+# Curt Hagenlocher.
+#
+# -----------------------------------------------------------------------
+
+import string
+import common
+
+class _TokenStream:
+  token_term = frozenset(string.whitespace + ';:')
+
+  # the algorithm is about the same speed for any CHUNK_SIZE chosen.
+  # grab a good-sized chunk, but not too large to overwhelm memory.
+  # note: we use a multiple of a standard block size
+  CHUNK_SIZE  = 192 * 512  # about 100k
+
+# CHUNK_SIZE  = 5   # for debugging, make the function grind...
+
+  def __init__(self, file):
+    self.rcsfile = file
+    self.idx = 0
+    self.buf = self.rcsfile.read(self.CHUNK_SIZE)
+    if self.buf == '':
+      raise RuntimeError, 'EOF'
+
+  def get(self):
+    "Get the next token from the RCS file."
+
+    # Note: we can afford to loop within Python, examining individual
+    # characters. For the whitespace and tokens, the number of iterations
+    # is typically quite small. Thus, a simple iterative loop will beat
+    # out more complex solutions.
+
+    buf = self.buf
+    lbuf = len(buf)
+    idx = self.idx
+
+    while 1:
+      if idx == lbuf:
+        buf = self.rcsfile.read(self.CHUNK_SIZE)
+        if buf == '':
+          # signal EOF by returning None as the token
+          del self.buf   # so we fail if get() is called again
+          return None
+        lbuf = len(buf)
+        idx = 0
+
+      if buf[idx] not in string.whitespace:
+        break
+
+      idx = idx + 1
+
+    if buf[idx] in ';:':
+      self.buf = buf
+      self.idx = idx + 1
+      return buf[idx]
+
+    if buf[idx] != '@':
+      end = idx + 1
+      token = ''
+      while 1:
+        # find token characters in the current buffer
+        while end < lbuf and buf[end] not in self.token_term:
+          end = end + 1
+        token = token + buf[idx:end]
+
+        if end < lbuf:
+          # we stopped before the end, so we have a full token
+          idx = end
+          break
+
+        # we stopped at the end of the buffer, so we may have a partial token
+        buf = self.rcsfile.read(self.CHUNK_SIZE)
+        lbuf = len(buf)
+        idx = end = 0
+
+      self.buf = buf
+      self.idx = idx
+      return token
+
+    # a "string" which starts with the "@" character. we'll skip it when we
+    # search for content.
+    idx = idx + 1
+
+    chunks = [ ]
+
+    while 1:
+      if idx == lbuf:
+        idx = 0
+        buf = self.rcsfile.read(self.CHUNK_SIZE)
+        if buf == '':
+          raise RuntimeError, 'EOF'
+        lbuf = len(buf)
+      i = string.find(buf, '@', idx)
+      if i == -1:
+        chunks.append(buf[idx:])
+        idx = lbuf
+        continue
+      if i == lbuf - 1:
+        chunks.append(buf[idx:i])
+        idx = 0
+        buf = '@' + self.rcsfile.read(self.CHUNK_SIZE)
+        if buf == '@':
+          raise RuntimeError, 'EOF'
+        lbuf = len(buf)
+        continue
+      if buf[i + 1] == '@':
+        chunks.append(buf[idx:i+1])
+        idx = i + 2
+        continue
+
+      chunks.append(buf[idx:i])
+
+      self.buf = buf
+      self.idx = i + 1
+
+      return ''.join(chunks)
+
+#  _get = get
+#  def get(self):
+    token = self._get()
+    print 'T:', `token`
+    return token
+
+  def match(self, match):
+    "Try to match the next token from the input buffer."
+
+    token = self.get()
+    if token != match:
+      raise common.RCSExpected(token, match)
+
+  def unget(self, token):
+    "Put this token back, for the next get() to return."
+
+    # Override the class' .get method with a function which clears the
+    # overridden method then returns the pushed token. Since this function
+    # will not be looked up via the class mechanism, it should be a "normal"
+    # function, meaning it won't have "self" automatically inserted.
+    # Therefore, we need to pass both self and the token thru via defaults.
+
+    # note: we don't put this into the input buffer because it may have been
+    # @-unescaped already.
+
+    def give_it_back(self=self, token=token):
+      del self.get
+      return token
+
+    self.get = give_it_back
+
+  def mget(self, count):
+    "Return multiple tokens. 'next' is at the end."
+    result = [ ]
+    for i in range(count):
+      result.append(self.get())
+    result.reverse()
+    return result
+
+
+class Parser(common._Parser):
+  stream_class = _TokenStream
diff --git a/cvs2svn_rcsparse/parse_rcs_file.py b/cvs2svn_rcsparse/parse_rcs_file.py
new file mode 100644
index 0000000..215845d
--- /dev/null
+++ b/cvs2svn_rcsparse/parse_rcs_file.py
@@ -0,0 +1,73 @@
+#!/usr/bin/python2
+
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2006-2007 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://cvs2svn.tigris.org/.
+# ====================================================================
+
+"""Parse an RCS file, showing the rcsparse callbacks that are called.
+
+This program is useful to see whether an RCS file has a problem (in
+the sense of not being parseable by rcsparse) and also to illuminate
+the correspondence between RCS file contents and rcsparse callbacks.
+
+The output of this program can also be considered to be a kind of
+'canonical' format for RCS files, at least in so far as rcsparse
+returns all relevant information in the file and provided that the
+order of callbacks is always the same."""
+
+
+import sys
+import os
+
+
+class Logger:
+  def __init__(self, f, name):
+    self.f = f
+    self.name = name
+
+  def __call__(self, *args):
+    self.f.write(
+        '%s(%s)\n' % (self.name, ', '.join(['%r' % arg for arg in args]),)
+        )
+
+
+class LoggingSink:
+  def __init__(self, f):
+    self.f = f
+
+  def __getattr__(self, name):
+    return Logger(self.f, name)
+
+
+if __name__ == '__main__':
+  # Since there is nontrivial logic in __init__.py, we have to import
+  # parse() via that file.  First make sure that the directory
+  # containing this script is in the path:
+  sys.path.insert(0, os.path.dirname(sys.argv[0]))
+
+  from __init__ import parse
+
+  if sys.argv[1:]:
+    for path in sys.argv[1:]:
+      if os.path.isfile(path) and path.endswith(',v'):
+        parse(
+            open(path, 'rb'), LoggingSink(sys.stdout)
+            )
+      else:
+        sys.stderr.write('%r is being ignored.\n' % path)
+  else:
+    parse(sys.stdin, LoggingSink(sys.stdout))
+
+
diff --git a/cvs2svn_rcsparse/rcparse_redundant_work.patch b/cvs2svn_rcsparse/rcparse_redundant_work.patch
new file mode 100644
index 0000000..b574dd2
--- /dev/null
+++ b/cvs2svn_rcsparse/rcparse_redundant_work.patch
@@ -0,0 +1,99 @@
+=== modified file 'cvs2svn_rcsparse/default.py'
+--- cvs2svn_rcsparse/default.py	2007-11-18 23:05:32 +0000
++++ cvs2svn_rcsparse/default.py	2010-01-23 10:21:47 +0000
+@@ -19,7 +19,7 @@
+ import common
+ 
+ class _TokenStream:
+-  token_term = string.whitespace + ';:'
++  token_term = frozenset(string.whitespace + ';:')
+ 
+   # the algorithm is about the same speed for any CHUNK_SIZE chosen.
+   # grab a good-sized chunk, but not too large to overwhelm memory.
+@@ -44,15 +44,17 @@
+     # out more complex solutions.
+ 
+     buf = self.buf
++    lbuf = len(buf)
+     idx = self.idx
+ 
+     while 1:
+-      if idx == len(buf):
++      if idx == lbuf:
+         buf = self.rcsfile.read(self.CHUNK_SIZE)
+         if buf == '':
+           # signal EOF by returning None as the token
+           del self.buf   # so we fail if get() is called again
+           return None
++        lbuf = len(buf)
+         idx = 0
+ 
+       if buf[idx] not in string.whitespace:
+@@ -60,7 +62,7 @@
+ 
+       idx = idx + 1
+ 
+-    if buf[idx] == ';' or buf[idx] == ':':
++    if buf[idx] in ';:':
+       self.buf = buf
+       self.idx = idx + 1
+       return buf[idx]
+@@ -70,17 +72,18 @@
+       token = ''
+       while 1:
+         # find token characters in the current buffer
+-        while end < len(buf) and buf[end] not in self.token_term:
++        while end < lbuf and buf[end] not in self.token_term:
+           end = end + 1
+         token = token + buf[idx:end]
+ 
+-        if end < len(buf):
++        if end < lbuf:
+           # we stopped before the end, so we have a full token
+           idx = end
+           break
+ 
+         # we stopped at the end of the buffer, so we may have a partial token
+         buf = self.rcsfile.read(self.CHUNK_SIZE)
++        lbuf = len(buf)
+         idx = end = 0
+ 
+       self.buf = buf
+@@ -94,22 +97,24 @@
+     chunks = [ ]
+ 
+     while 1:
+-      if idx == len(buf):
++      if idx == lbuf:
+         idx = 0
+         buf = self.rcsfile.read(self.CHUNK_SIZE)
+         if buf == '':
+           raise RuntimeError, 'EOF'
++        lbuf = len(buf)
+       i = string.find(buf, '@', idx)
+       if i == -1:
+         chunks.append(buf[idx:])
+-        idx = len(buf)
++        idx = lbuf
+         continue
+-      if i == len(buf) - 1:
++      if i == lbuf - 1:
+         chunks.append(buf[idx:i])
+         idx = 0
+         buf = '@' + self.rcsfile.read(self.CHUNK_SIZE)
+         if buf == '@':
+           raise RuntimeError, 'EOF'
++        lbuf = len(buf)
+         continue
+       if buf[i + 1] == '@':
+         chunks.append(buf[idx:i+1])
+@@ -121,7 +126,7 @@
+       self.buf = buf
+       self.idx = i + 1
+ 
+-      return string.join(chunks, '')
++      return ''.join(chunks)
+ 
+ #  _get = get
+ #  def get(self):
+
diff --git a/cvs2svn_rcsparse/run-tests.py b/cvs2svn_rcsparse/run-tests.py
new file mode 100644
index 0000000..eb9c3ea
--- /dev/null
+++ b/cvs2svn_rcsparse/run-tests.py
@@ -0,0 +1,73 @@
+#!/usr/bin/python2
+
+# (Be in -*- python -*- mode.)
+#
+# ====================================================================
+# Copyright (c) 2007 CollabNet.  All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution.  The terms
+# are also available at http://subversion.tigris.org/license-1.html.
+# If newer versions of this license are posted there, you may use a
+# newer version instead, at your option.
+#
+# This software consists of voluntary contributions made by many
+# individuals.  For exact contribution history, see the revision
+# history and logs, available at http://viewvc.tigris.org/.
+# ====================================================================
+
+"""Run tests of rcsparse code."""
+
+import sys
+import os
+import glob
+from cStringIO import StringIO
+from difflib import Differ
+
+# Since there is nontrivial logic in __init__.py, we have to import
+# parse() via that file.  First make sure that the directory
+# containing this script is in the path:
+script_dir = os.path.dirname(sys.argv[0])
+sys.path.insert(0, script_dir)
+
+from __init__ import parse
+from parse_rcs_file import LoggingSink
+
+
+test_dir = os.path.join(script_dir, 'test-data')
+
+filelist = glob.glob(os.path.join(test_dir, '*,v'))
+filelist.sort()
+
+all_tests_ok = 1
+
+for filename in filelist:
+    sys.stderr.write('%s: ' % (filename,))
+    f = StringIO()
+    try:
+        parse(open(filename, 'rb'), LoggingSink(f))
+    except Exception, e:
+        sys.stderr.write('Error parsing file: %s!\n' % (e,))
+        all_tests_ok = 0
+    else:
+        output = f.getvalue()
+
+        expected_output_filename = filename[:-2] + '.out'
+        expected_output = open(expected_output_filename, 'rb').read()
+
+        if output == expected_output:
+            sys.stderr.write('OK\n')
+        else:
+            sys.stderr.write('Output does not match expected output!\n')
+            differ = Differ()
+            for diffline in differ.compare(
+                expected_output.splitlines(1), output.splitlines(1)
+                ):
+                sys.stderr.write(diffline)
+            all_tests_ok = 0
+
+if all_tests_ok:
+    sys.exit(0)
+else:
+    sys.exit(1)
+
diff --git a/cvs2svn_rcsparse/texttools.py b/cvs2svn_rcsparse/texttools.py
new file mode 100644
index 0000000..7c713eb
--- /dev/null
+++ b/cvs2svn_rcsparse/texttools.py
@@ -0,0 +1,348 @@
+# -*-python-*-
+#
+# Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved.
+#
+# By using this file, you agree to the terms and conditions set forth in
+# the LICENSE.html file which can be found at the top level of the ViewVC
+# distribution or at http://viewvc.org/license-1.html.
+#
+# For more information, visit http://viewvc.org/
+#
+# -----------------------------------------------------------------------
+
+import string
+
+# note: this will raise an ImportError if it isn't available. the rcsparse
+# package will recognize this and switch over to the default parser.
+from mx import TextTools
+
+import common
+
+
+# for convenience
+_tt = TextTools
+
+_idchar_list = map(chr, range(33, 127)) + map(chr, range(160, 256))
+_idchar_list.remove('$')
+_idchar_list.remove(',')
+#_idchar_list.remove('.')   # leave as part of 'num' symbol
+_idchar_list.remove(':')
+_idchar_list.remove(';')
+_idchar_list.remove('@')
+_idchar = string.join(_idchar_list, '')
+_idchar_set = _tt.set(_idchar)
+
+_onechar_token_set = _tt.set(':;')
+
+_not_at_set = _tt.invset('@')
+
+_T_TOKEN = 30
+_T_STRING_START = 40
+_T_STRING_SPAN = 60
+_T_STRING_END = 70
+
+_E_COMPLETE = 100       # ended on a complete token
+_E_TOKEN = 110          # ended mid-token
+_E_STRING_SPAN = 130    # ended within a string
+_E_STRING_END = 140     # ended with string-end ('@') (could be mid-@@)
+
+_SUCCESS = +100
+
+_EOF = 'EOF'
+_CONTINUE = 'CONTINUE'
+_UNUSED = 'UNUSED'
+
+
+# continuation of a token over a chunk boundary
+_c_token_table = (
+  (_T_TOKEN,      _tt.AllInSet, _idchar_set),
+  )
+
+class _mxTokenStream:
+
+  # the algorithm is about the same speed for any CHUNK_SIZE chosen.
+  # grab a good-sized chunk, but not too large to overwhelm memory.
+  # note: we use a multiple of a standard block size
+  CHUNK_SIZE  = 192 * 512  # about 100k
+
+# CHUNK_SIZE  = 5   # for debugging, make the function grind...
+
+  def __init__(self, file):
+    self.rcsfile = file
+    self.tokens = [ ]
+    self.partial = None
+
+    self.string_end = None
+
+  def _parse_chunk(self, buf, start=0):
+    "Get the next token from the RCS file."
+
+    buflen = len(buf)
+
+    assert start < buflen
+
+    # construct a tag table which refers to the buffer we need to parse.
+    table = (
+      #1: ignore whitespace. with or without whitespace, move to the next rule.
+      (None, _tt.AllInSet, _tt.whitespace_set, +1),
+
+      #2
+      (_E_COMPLETE, _tt.EOF + _tt.AppendTagobj, _tt.Here, +1, _SUCCESS),
+
+      #3: accumulate token text and exit, or move to the next rule.
+      (_UNUSED,      _tt.AllInSet + _tt.AppendMatch, _idchar_set, +2),
+
+      #4
+      (_E_TOKEN,  _tt.EOF + _tt.AppendTagobj, _tt.Here, -3, _SUCCESS),
+
+      #5: single character tokens exit immediately, or move to the next rule
+      (_UNUSED,    _tt.IsInSet + _tt.AppendMatch, _onechar_token_set, +2),
+
+      #6
+      (_E_COMPLETE, _tt.EOF + _tt.AppendTagobj, _tt.Here, -5, _SUCCESS),
+
+      #7: if this isn't an '@' symbol, then we have a syntax error (go to a
+      # negative index to indicate that condition). otherwise, suck it up
+      # and move to the next rule.
+      (_T_STRING_START, _tt.Is + _tt.AppendTagobj, '@'),
+
+      #8
+      (None, _tt.Is, '@', +4, +1),
+      #9
+      (buf, _tt.Is, '@', +1, -1),
+      #10
+      (_T_STRING_END, _tt.Skip + _tt.AppendTagobj, 0, 0, +1),
+      #11
+      (_E_STRING_END, _tt.EOF + _tt.AppendTagobj, _tt.Here, -10, _SUCCESS),
+
+      #12
+      (_E_STRING_SPAN, _tt.EOF + _tt.AppendTagobj, _tt.Here, +1, _SUCCESS),
+
+      #13: suck up everything that isn't an AT. go to next rule to look for EOF
+      (buf,  _tt.AllInSet, _not_at_set, 0, +1),
+
+      #14: go back to look for double AT if we aren't at the end of the string
+      (_E_STRING_SPAN,   _tt.EOF + _tt.AppendTagobj, _tt.Here, -6, _SUCCESS),
+      )
+
+    # Fast, texttools may be, but it's somewhat lacking in clarity.
+    # Here's an attempt to document the logic encoded in the table above:
+    #
+    # Flowchart:
+    #                                   _____
+    #                                  /    /\
+    # 1 -> 2 ->  3 ->  5 ->  7 ->     8  ->  9 -> 10 -> 11
+    # |         \/    \/           \/  /\               \/
+    #  \         4     6          12    14              /
+    #   \_______/_____/            \    /              /
+    #    \                           13               /
+    #     \__________________________________________/                    
+    #
+    # #1: Skip over any whitespace.
+    # #2: If now EOF, exit with code _E_COMPLETE.
+    # #3: If we have a series of characters in _idchar_set, then:
+    #     #4: Output them as a token, and go back to #1.
+    # #5: If we have a character in _onechar_token_set, then:
+    #     #6: Output it as a token, and go back to #1.
+    # #7: If we do not have an '@', then error.
+    #     If we do, then log a _T_STRING_START and continue.
+    # #8: If we have another '@', continue on to #9. Otherwise:
+    #     #12: If now EOF, exit with code _E_STRING_SPAN.
+    #     #13: Record the slice up to the next '@' (or EOF).
+    #     #14: If now EOF, exit with code _E_STRING_SPAN.
+    #          Otherwise, go back to #8.
+    # #9: If we have another '@', then we've just seen an escaped
+    #     (by doubling) '@' within an @-string.  Record a slice including
+    #     just one '@' character, and jump back to #8.
+    #     Otherwise, we've *either* seen the terminating '@' of an @-string,
+    #     *or* we've seen one half of an escaped @@ sequence that just
+    #     happened to be split over a chunk boundary - in either case,
+    #     we continue on to #10.
+    # #10: Log a _T_STRING_END.
+    # #11: If now EOF, exit with _E_STRING_END. Otherwise, go back to #1.
+
+    success, taglist, idx = _tt.tag(buf, table, start)
+
+    if not success:
+      ### need a better way to report this error
+      raise common.RCSIllegalCharacter()
+    assert idx == buflen
+
+    # pop off the last item
+    last_which = taglist.pop()
+
+    i = 0
+    tlen = len(taglist)
+    while i < tlen:
+      if taglist[i] == _T_STRING_START:
+        j = i + 1
+        while j < tlen:
+          if taglist[j] == _T_STRING_END:
+            s = _tt.join(taglist, '', i+1, j)
+            del taglist[i:j]
+            tlen = len(taglist)
+            taglist[i] = s
+            break
+          j = j + 1
+        else:
+          assert last_which == _E_STRING_SPAN
+          s = _tt.join(taglist, '', i+1)
+          del taglist[i:]
+          self.partial = (_T_STRING_SPAN, [ s ])
+          break
+      i = i + 1
+
+    # figure out whether we have a partial last-token
+    if last_which == _E_TOKEN:
+      self.partial = (_T_TOKEN, [ taglist.pop() ])
+    elif last_which == _E_COMPLETE:
+      pass
+    elif last_which == _E_STRING_SPAN:
+      assert self.partial
+    else:
+      assert last_which == _E_STRING_END
+      self.partial = (_T_STRING_END, [ taglist.pop() ])
+
+    taglist.reverse()
+    taglist.extend(self.tokens)
+    self.tokens = taglist
+
+  def _set_end(self, taglist, text, l, r, subtags):
+    self.string_end = l
+
+  def _handle_partial(self, buf):
+    which, chunks = self.partial
+    if which == _T_TOKEN:
+      success, taglist, idx = _tt.tag(buf, _c_token_table)
+      if not success:
+        # The start of this buffer was not a token. So the end of the
+        # prior buffer was a complete token.
+        self.tokens.insert(0, string.join(chunks, ''))
+      else:
+        assert len(taglist) == 1 and taglist[0][0] == _T_TOKEN \
+               and taglist[0][1] == 0 and taglist[0][2] == idx
+        if idx == len(buf):
+          #
+          # The whole buffer was one huge token, so we may have a
+          # partial token again.
+          #
+          # Note: this modifies the list of chunks in self.partial
+          #
+          chunks.append(buf)
+
+          # consumed the whole buffer
+          return len(buf)
+
+        # got the rest of the token.
+        chunks.append(buf[:idx])
+        self.tokens.insert(0, string.join(chunks, ''))
+
+      # no more partial token
+      self.partial = None
+
+      return idx
+
+    if which == _T_STRING_END:
+      if buf[0] != '@':
+        self.tokens.insert(0, string.join(chunks, ''))
+        return 0
+      chunks.append('@')
+      start = 1
+    else:
+      start = 0
+
+    self.string_end = None
+    string_table = (
+      (None,    _tt.Is, '@', +3, +1),
+      (_UNUSED, _tt.Is + _tt.AppendMatch, '@', +1, -1),
+      (self._set_end, _tt.Skip + _tt.CallTag, 0, 0, _SUCCESS),
+
+      (None,    _tt.EOF, _tt.Here, +1, _SUCCESS),
+
+      # suck up everything that isn't an AT. move to next rule to look
+      # for EOF
+      (_UNUSED, _tt.AllInSet + _tt.AppendMatch, _not_at_set, 0, +1),
+
+      # go back to look for double AT if we aren't at the end of the string
+      (None,    _tt.EOF, _tt.Here, -5, _SUCCESS),
+      )
+
+    success, unused, idx = _tt.tag(buf, string_table,
+                                   start, len(buf), chunks)
+
+    # must have matched at least one item
+    assert success
+
+    if self.string_end is None:
+      assert idx == len(buf)
+      self.partial = (_T_STRING_SPAN, chunks)
+    elif self.string_end < len(buf):
+      self.partial = None
+      self.tokens.insert(0, string.join(chunks, ''))
+    else:
+      self.partial = (_T_STRING_END, chunks)
+
+    return idx
+
+  def _parse_more(self):
+    buf = self.rcsfile.read(self.CHUNK_SIZE)
+    if not buf:
+      return _EOF
+
+    if self.partial:
+      idx = self._handle_partial(buf)
+      if idx is None:
+        return _CONTINUE
+      if idx < len(buf):
+        self._parse_chunk(buf, idx)
+    else:
+      self._parse_chunk(buf)
+
+    return _CONTINUE
+
+  def get(self):
+    try:
+      return self.tokens.pop()
+    except IndexError:
+      pass
+
+    while not self.tokens:
+      action = self._parse_more()
+      if action == _EOF:
+        return None
+
+    return self.tokens.pop()
+
+
+#  _get = get
+#  def get(self):
+    token = self._get()
+    print 'T:', `token`
+    return token
+
+  def match(self, match):
+    if self.tokens:
+      token = self.tokens.pop()
+    else:
+      token = self.get()
+
+    if token != match:
+      raise common.RCSExpected(token, match)
+
+  def unget(self, token):
+    self.tokens.append(token)
+
+  def mget(self, count):
+    "Return multiple tokens. 'next' is at the end."
+    while len(self.tokens) < count:
+      action = self._parse_more()
+      if action == _EOF:
+        ### fix this
+        raise RuntimeError, 'EOF hit while expecting tokens'
+    result = self.tokens[-count:]
+    del self.tokens[-count:]
+    return result
+
+
+class Parser(common._Parser):
+  stream_class = _mxTokenStream
-- 
cgit v1.2.3-65-gdbad