...
 
Commits (1705)
reposurgeon diff=python
repodiffer diff=python
goreposurgeon
repocutter
repomapper
pkg
golang.org
github.com
gitlab.com
*.1
*.html
MANIFEST
*.tar.gz
*.xz
*.md5
cyreposurgeon
cyreposurgeon.c
cyreposurgeon.o
cy*
cy*.c
cy*.o
*~
.rs*/
image: golang:1.11
before_script:
- source ci/prepare.sh
test:
script:
- make -k check
[submodule "externals/agito"]
path = externals/agito
url = git@github.com:fragglet/agito.git
url = git://github.com/fragglet/agito.git
[submodule "externals/svn2git"]
path = externals/svn2git
url = git@github.com:nirvdrum/svn2git.git
url = git://github.com/nirvdrum/svn2git.git
\ No newline at end of file
......@@ -10,6 +10,7 @@ Julien "_FrnchFrgg_" RIVAUD <frnchfrgg@free.fr>
Daniel Brooks <db48x@db48x.net>
Date unit testing, improvements for split and expunge commands.
Assist Python to Go port.
Chris Lemmons <alficles@gmail.com>
Solved some problems with inline blobs, improved interoperability with
......@@ -17,3 +18,21 @@ Chris Lemmons <alficles@gmail.com>
Edward Cree <ec429@cantab.net>
Wrote the Hg extractor class and its test.
Richard Hansen <rhansen@rhansen.org>
Selections as ordered rather than compulsorily sorted sets.
The generalized reparent command.
Improvements in regression-test infrastructure.
Peter Donis <peterdonis@alum.mit.edu>
Python 3 port and Python2/3 interoperability.
Eric Sunshine <sunshine@sunshineco.com>
Review of seldom-used features, test improvements, bug-fixing.
Generalized selection expression parser for use-cases other than events.
Converted selection parser, which evaluated an expression while parsing it,
to a compile/evaluate paradigm in which a selection expression can be
compiled once and evaluated many times.
Added 'attribution' command.
Added 'reorder' command.
Assist Python to Go port.
BSD LICENSE
Copyright (c) 2015, Eric S. Raymond
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
modification, are permitted provided that the following conditions are
met:
Redistributions of source code must retain the above copyright
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
Neither name of the this project nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
THE BLUE STURGEON OF DOOM
......
FROM buildpack-deps:jessie
RUN mkdir -p /usr/local/src/reposurgeon/
WORKDIR /usr/local/src/reposurgeon/
COPY ci/requirements.txt /usr/local/src/reposurgeon/
COPY ci/prepare.sh /usr/local/src/reposurgeon/ci/
RUN bash ci/prepare.sh
COPY . /usr/local/src/reposurgeon/
RUN make install
CMD reposurgeon
= Helping with the Go translation =
We presently need help in two areas:
1. Fixing bugs in the Go translation
2. Speed tuning
== Fixing translation bugs ==
You can help by fixing command handlers to have correct behavior on a
regression test that succeeds under the Python implementation but
fails under Go.
To build the Go tools, simply run "make" in the toplevel directory.
This also builds the suite documentation.
'make gotest' runs the Go unit tests. These are pretty stable.
'make goregress' runs the full regression-test suite using the
Go binary. It will bail out on the first error. Presently it
dies on the x-blob-id test; this is expected.
There are tools and makefile productions for finer-grained testing.
Their behavior is cinrolled by two variables:
REPOSURGEON: either reposurgeon (the default) or goreposurgeon
STOPOUT: 1 (the default) produces a bailout after the first failed
test. Set it to 0 to continue after failed tests - you have to do
this on the make command line, e.g. "cd test; make STOPOUT=0
fi-regress". Note, the output from this may be voluminous and
unuseful.
make goregress forces REPOSURGEON=reposurgeon. It runs fi-regress which is
the inner production for general regression tests.
To run a single test under Go,
cd test; REPOSURGEON=../goreposurgeon ./singletest x-foo
will check the actual result of the script in x-foo.tst against its expected
output in x-foo.chk. No output other than the test notice is good. The
Python version passes all these tests.
The following can't yet be tested as they rely on the Subversion dump reader:
x-blob-id.tst
x-branchify_map.tst
x-branchify.tst
x-debranch.tst
x-debranch2.tst
x-expunge-deletion.tst
x-expunge.tst
x-gitify.tst
x-lint.tst
x-nesting.tst
x-references.tst
x-userignores.tst
x-split-dir.tst
x-split.tst.tst
x-squash-id.tst
x-subdir.tst
x-tagretract.tst
x-treecontent
x-userignores.tst
The following tests presently fail under Go. Reducing this list to
empty is a near-term goal.
x-debranch3: unknown failure
x-divide: divide logic appears broken
x-liftlog: Minor failure
x-macro: Spurious EOF in single-line expansion, and multiline doesn't work.
x-multiroot: ill-formed stream error
x-pathrename: ancestry check is failing.
x-pathrename-with-set: rename operation fails.
x-reorder: toposort code is busted.
x-reparent: stack overflow
x-simple: several command failures, none look very difficult
The next step after these will be testing extractor classes.
== Tuning for speed ==
The goal of this port is to improve conversion performance on large
repositories by an order of magnitude or more, with the horrible
example being the GCC subversion history. Trial runs with the Python
version were taking 9-10 hours!
If you are already a Go expert, you can help by tuning for speed. The
most important single operations to speed up are fast-import stream reads and
(when the Subversion support is working) Subversion dump stream reads.
First thing to do is make a test load. The reposurgeon history itself is
large enough to be a useful one. So:
$ goreposurgeon "read ." "write >reposurgeon.fi"
The ability to dump profile data is built into reposurgeon itself:
$ goreposurgeon "verbose 1" "profile reposurgeon.prof" "read <rs.fi" "profile"
Once you have the profile data you can sic the profile viewer on it.
Have graphviz installed and do
go tool pprof goreposurgeon rs.prof
There are lots of ways to explore the data but single most interesting one
is to enter "web" and look at your browser. The size of each box is proportional
to the number of profiler samples it appears in. "top10" gives you the same
data in tabular form:
flat flat% sum% cum cum%
1.41s 22.45% 22.45% 1.43s 22.77% syscall.Syscall
1.11s 17.68% 40.13% 2.22s 35.35% runtime.scanobject
0.75s 11.94% 52.07% 0.75s 11.94% runtime.greyobject
0.58s 9.24% 61.31% 0.58s 9.24% runtime.memmove
This is telling us that (a) disk I/O (syscall.Syscall) is slow, but garbage
collection overhead dominates (runtime.scanobject and runtime.greyobject, 47%).
That runtime.memmove is probably array copies during append operations.
To go faster we need to exercise the allocator less. In a way this is
good news - it suggests we don't have a big-O/algorithmic problem.
The obvious thing to do first is a search-and-destroy for heap escapes.
We can't avoid doing a lot of allocation; what we can do is avoid creating
lots of short-lived heap objects that will churn heap storage and trigger GC.
Some references:
https://blog.golang.org/profiling-go-programs
https://github.com/google/pprof/blob/master/doc/README.md
https://www.signalfx.com/blog/a-pattern-for-optimizing-go-2/
http://www.agardner.me/golang/garbage/collection/gc/escape/analysis/2015/10/18/go-escape-analysis.html
This diff is collapsed.
#
# makefile for reposurgeon
#
INSTALL=install
XMLTO=xmlto
XMLTOOPTS=-m docbook-extra.xml
ASCIIDOC=asciidoc
PYLINT=pylint
prefix?=/usr/local
mandir?=share/man
target=$(DESTDIR)$(prefix)
CYTHON?=cython
PYVERSION=2.7
pyinclude?=$(shell pkg-config --cflags python-$(PYVERSION) || echo "-I/usr/include/python$(PYVERSION)")
pylib?=$(shell pkg-config --libs python-$(PYVERSION) || echo "-lpython$(PYVERSION)")
VERS=$(shell sed <reposurgeon -n -e '/version=\(.*\)/s//\1/p')
SOURCES = README NEWS AUTHORS COPYING TODO
VERS=$(shell sed <reposurgeon -n -e '/version=\"\(.*\)\"/s//\1/p')
SOURCES += docbook-extra.xml nofooter.conf
SOURCES += \
reposurgeon reposurgeon.xml \
repotool repotool.xml \
repodiffer repodiffer.xml \
conversion.mk features.asc \
dvcs-migration-guide.asc \
go-repocutter/repocutter.go \
go-repomapper/repomapper.go \
go-reposurgeon/goreposurgeon.go \
go-reposurgeon/goreposurgeon_test.go \
go-reposurgeon/intern.go \
repomapper.xml repocutter.xml \
reporting-bugs.adoc features.adoc dvcs-migration-guide.adoc \
reposurgeon-mode.el
SOURCES += Makefile control reposturgeon.png
SOURCES += Makefile control reposturgeon.png reposurgeon-git-aliases
SOURCES += Dockerfile ci/prepare.sh ci/requirements.txt .gitlab-ci.yml
DOCS = README.adoc NEWS TODO
STOPOUT=1
.PHONY: all install clean uninstall version pylint check zip release refresh \
docker-build docker-check docker-check-noscm \
govet gotest goformat gofmt golint
all: reposurgeon.1 repotool.1 repodiffer.1 \
reposurgeon.html repotool.html repodiffer.html \
dvcs-migration-guide.html features.html
BINARIES = reposurgeon repotool repomapper repocutter
MANPAGES = reposurgeon.1 repotool.1 repomapper.1 repocutter.1
HTMLFILES = $(MANPAGES:.1=.html) \
dvcs-migration-guide.html features.html reporting-bugs.html
SHARED = $(DOCS) reposurgeon-git-aliases $(HTMLFILES)
reposurgeon.1: reposurgeon.xml
xmlto man reposurgeon.xml
# THe following would produce reproducible builds, but it breaks Gitlab CI.
#GOFLAGS=-gcflags 'all=-N -l -trimpath $(GOPATH)/src' -asmflags 'all=-trimpath $(GOPATH)/src'
reposurgeon.html: reposurgeon.xml
xmlto html-nochunks reposurgeon.xml
GOFLAGS=-gcflags '-N -l'
all: $(MANPAGES) $(HTMLFILES)
go build $(GOFLAGS) -o repocutter ./go-repocutter
go build $(GOFLAGS) -o repomapper ./go-repomapper
go build $(GOFLAGS) -o goreposurgeon ./go-reposurgeon
repotool.1: repotool.xml
xmlto man repotool.xml
%.1: %.xml
$(XMLTO) $(XMLTOOPTS) man $<
repotool.html: repotool.xml
xmlto html-nochunks repotool.xml
%.html: %.xml
$(XMLTO) $(XMLTOOPTS) html-nochunks $<
repodiffer.1: repodiffer.xml
xmlto man repodiffer.xml
dvcs-migration-guide.html: ASCIIDOC_ARGS=-a toc -f nofooter.conf
%.html: %.adoc
$(ASCIIDOC) $(ASCIIDOC_ARGS) $<
repodiffer.html: repodiffer.xml
xmlto html-nochunks repodiffer.xml
#
# Auxilary Go productions
#
features.html: features.asc
asciidoc features.asc
# Temporary; it's here to track which test sections have succeeded
goregress: gotest
cd test; $(MAKE) STOPOUT=$(STOPOUT) REPOSURGEON=goreposurgeon
reporting-bugs.html: reporting-bugs.asc
asciidoc reporting-bugs.asc
govet:
go vet ./go-repocutter
go vet ./go-repomapper
go vet ./go-reposurgeon
dvcs-migration-guide.html: dvcs-migration-guide.asc
asciidoc -a toc dvcs-migration-guide.asc
gotest:
go test $(TESTOPTS) ./go-reposurgeon
cyreposurgeon: reposurgeon
$(CYTHON) --embed reposurgeon -o cyreposurgeon.c
${CC} ${CFLAGS} $(pyinclude) -c cyreposurgeon.c -o cyreposurgeon.o
${CC} ${CFLAGS} ${LDFLAGS} cyreposurgeon.o $(pylib) -o cyreposurgeon
gofmt goformat:
gofmt -w ./go-repocutter/
gofmt -w ./go-repomapper/
gofmt -w ./go-reposurgeon/
golint:
golint ./go-repocutter | ./lintfilter 2>&1
golint ./go-repomapper | ./lintfilter 2>&1
golint ./go-reposurgeon | ./lintfilter 2>&1
#
# Installation
#
install: all
$(INSTALL) -d "$(target)/bin"
$(INSTALL) -d "$(target)/share/doc/reposurgeon"
$(INSTALL) -d "$(target)/$(mandir)/man1"
$(INSTALL) -m 755 reposurgeon repotool repodiffer "$(target)/bin"
$(INSTALL) -m 644 README NEWS TODO conversion.mk *.html \
"$(target)/share/doc/reposurgeon"
$(INSTALL) -m 644 *.1 "$(target)/$(mandir)/man1"
install-cyreposurgeon: cyreposurgeon
$(INSTALL) -d "$(target)/bin"
$(INSTALL) -m 755 cyreposurgeon "$(target)/bin"
$(INSTALL) -m 755 $(BINARIES) "$(target)/bin"
$(INSTALL) -m 644 $(SHARED) "$(target)/share/doc/reposurgeon"
$(INSTALL) -m 644 $(MANPAGES) "$(target)/$(mandir)/man1"
clean:
rm -fr *~ *.1 *.html *.tar.gz MANIFEST *.md5
rm -fr goreposurgeon repocutter repomapper
rm -fr *~ *.1 *.html *.tar.xz MANIFEST *.md5
rm -fr .rs .rs* test/.rs test/.rs*
rm -f typescript test/typescript *.pyc
rm -f cyreposurgeon.c cyreposurgeon.o cyreposurgeon
reposurgeon-$(VERS).tar.gz: $(SOURCES)
@ls $(SOURCES) | sed s:^:reposurgeon-$(VERS)/: >MANIFEST
@(cd ..; ln -s reposurgeon reposurgeon-$(VERS))
(cd ..; tar -czf reposurgeon/reposurgeon-$(VERS).tar.gz `cat reposurgeon/MANIFEST`)
@(cd ..; rm reposurgeon-$(VERS))
# Uninstallation
INSTALLED_BINARIES := $(BINARIES:%="$(target)/bin/%")
INSTALLED_SHARED := $(SHARED:%="$(target)/share/doc/reposurgeon/%")
INSTALLED_MANPAGES := $(MANPAGES:%="$(target)/$(mandir)/man1/%")
uninstall:
rm -f $(INSTALLED_BINARIES)
rm -f $(INSTALLED_MANPAGES)
rm -f $(INSTALLED_SHARED)
rmdir "$(target)/share/doc/reposurgeon"
version:
@echo $(VERS)
#
# Code validation
#
COMMON_PYLINT = --rcfile=/dev/null --reports=n \
--msg-template="{path}:{line}: [{msg_id}({symbol}), {obj}] {msg}" \
--dummy-variables-rgx='^_'
PYLINTOPTS1 = "C0103,C0111,C0301,C0302,C0322,C0324,C0325,C0321,C0323,C0330,C1001,R0201,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,W0108,W0110,W0123,W0141,W0142,W0212,W0233,W0603,W0632,W0640,W0511,W0611,E1101,E1103,E1124,I0011,F0401"
PYLINTOPTS2 = "C0103,C0111,C0301,C0326,C0330,C1001,W0603,W0621,E1101,E1103,R0902,R0903,R0912,R0914,R0915"
PYLINTOPTS1 = "C0103,C0111,C0301,C0302,C0322,C0324,C0325,C0321,C0323,C0330,C0410,C0411,C0412,C0413,C1001,C1801,R0201,R0101,R0204,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R1705,W0108,W0110,W0123,W0122,W0141,W0142,W0212,W0221,W0232,W0233,W0603,W0632,W0633,W0640,W0511,W0611,E0611,E1101,E1103,E1124,E1133,I0011,F0401"
pylint:
@pylint $(COMMON_PYLINT) --disable=$(PYLINTOPTS1) reposurgeon
@pylint $(COMMON_PYLINT) --disable=$(PYLINTOPTS2) repodiffer
@$(PYLINT) $(COMMON_PYLINT) --disable=$(PYLINTOPTS1) reposurgeon
check:
cd test; $(MAKE) --quiet
$(MAKE) all; cd test; $(MAKE) --quiet check
portcheck:
cd test; $(MAKE) --quiet portcheck
#
# Continuous integration. More specifics are in the ci/ directory
#
docker-build: $(SOURCES)
docker build -t reposurgeon .
docker-check: docker-build
docker run --rm -i -e "MAKEFLAGS=$(MAKEFLAGS)" -e "MAKEOVERRIDES=$(MAKEOVERRIDES)" reposurgeon make check
docker-check-only-%: docker-build
docker run --rm -i -e "MAKEFLAGS=$(MAKEFLAGS)" -e "MAKEOVERRIDES=$(MAKEOVERRIDES)" reposurgeon bash -c "make -C ci install-only-$(*) && make check"
docker-check-no-%: docker-build
docker run --rm -i -e "MAKEFLAGS=$(MAKEFLAGS)" -e "MAKEOVERRIDES=$(MAKEOVERRIDES)" reposurgeon bash -c "make -C ci install-no-$(*) && make check"
# Test that support for each VCS stands on its own and test without legacy
# VCS installed
docker-check-noscm: docker-check-only-bzr docker-check-only-cvs \
docker-check-only-git docker-check-only-mercurial \
docker-check-only-subversion docker-check-no-cvs
# Due to many tests depending on git, docker-check-only-mercurial is a very poor
# test of Mercurial
#
# Release shipping.
#
reposurgeon-$(VERS).tar.xz: $(SOURCES) $(DOCS)
tar --transform='s:^:reposurgeon-$(VERS)/:' --show-transformed-names -cJf reposurgeon-$(VERS).tar.xz $(SOURCES) $(DOCS) test
dist: reposurgeon-$(VERS).tar.gz reposurgeon.1 repotool.1 repodiffer.1
dist: reposurgeon-$(VERS).tar.xz reposurgeon.1 repocutter.1 repotool.1 repomapper.1
reposurgeon-$(VERS).md5: reposurgeon-$(VERS).tar.gz
@md5sum reposurgeon-$(VERS).tar.gz >reposurgeon-$(VERS).md5
reposurgeon-$(VERS).md5: reposurgeon-$(VERS).tar.xz
@md5sum reposurgeon-$(VERS).tar.xz >reposurgeon-$(VERS).md5
zip: $(SOURCES)
zip -r reposurgeon-$(VERS).zip $(SOURCES)
zip: $(SOURCES) $(DOCS)
zip -r reposurgeon-$(VERS).zip $(SOURCES) $(DOCS)
release: reposurgeon-$(VERS).tar.gz reposurgeon-$(VERS).md5 reposurgeon.html repodiffer.html reporting-bugs.html dvcs-migration-guide.html features.html
release: reposurgeon-$(VERS).tar.xz reposurgeon-$(VERS).md5 reposurgeon.html repocutter.html repomapper.html reporting-bugs.html dvcs-migration-guide.html features.html
shipper version=$(VERS) | sh -e -x
refresh: reposurgeon.html repodiffer.html reporting-bugs.html features.html
refresh: reposurgeon.html repocutter.html repomapper.html reporting-bugs.html features.html
shipper -N -w version=$(VERS) | sh -e -x
This diff is collapsed.
reposurgeon - a repository surgeon
reposurgeon enables risky operations that version-control systems don't
want to let you do, such as (a) editing past comments and metadata,
(b) excising commits, (c) coalescing commits, and (d) removing files
and subtrees from repo history. The original motivation for
reposurgeon was to clean up artifacts created by repository
conversions.
reposurgeon is also useful for scripting very high-quality conversions
from Subversion. It is better than git-svn at tag lifting,
automatically cleaning up cvs2svn conversion artifacts, dealing with
nonstandard repository layouts, recognizing branch merges, handling
mixed-branch commits, and generally at coping with Subversion's many
odd corner cases. Normally Subversion repos should be analyzed at a
rate of upwards of ten thousand commits per minute.
repodiffer is a program that reports differences between repository
histories. It uses a diff(1)-like algorithm to identify spans of
identical revisions, and to pick out revisions that have been
changed or deleted or inserted. It may be useful for comparing the
output of different repository-conversion tools in detail.
Another auxiliary program, repopuller, assists in mirroring Subversion
repositories.
This distribution also includes a generic Makefile describing a
repeatable conversion workflow using these tools.
Finally, an Emacs Lisp mode with useful functions for editing large
comment mailboxes is included.
There is an extensive regression-test suite in the test/ directory.
To test the correctness of this software, ensure that pylint
is installed and then type 'make check'.
See reporting-bugs.asc for advice on how to troubleshoot problems
with reposurgeon and report bugs.
= reposurgeon - a repository surgeon =
`reposurgeon` enables risky operations that version-control systems
don't want to let you do, such as (a) editing past comments and metadata,
(b) excising commits, (c) coalescing commits, and (d) removing files and
subtrees from repo history. The original motivation for `reposurgeon`
was to clean up artifacts created by repository conversions.
`reposurgeon` is also useful for scripting very high-quality
conversions from Subversion. It is better than `git-svn` at tag
lifting, automatically cleaning up `cvs2svn` conversion artifacts,
dealing with nonstandard repository layouts, recognizing branch
merges, handling mixed-branch commits, and generally at coping with
Subversion's many odd corner cases. Normally Subversion repos should
be analyzed at a rate of upwards of ten thousand commits per minute,
though that rate can fall significantly on extremely large
repositories.
An auxiliary program, `repotool`, performs various useful
operations such as checkouts and tag listing in a VCS-independent
manner. Yet another, `repomapper`, assists in automatically preparing
contributor maps of CVS and SVN repositories.
The `repocutter` program is available for some specialized operations on
Subversion dumpfiles; it may be useful in extracting portions of
particularly gnarly Subversion repositories for conversion witth
reposurgeon.
This distribution supports a generic conversion workflow using these
tools, and includes the DVCS Migration Guide that describes how to use it.
The file 'reposurgeon-git-aliases` can be appended to your `~/.gitconfig' to
support working directly with action stamps in git.
Finally, an Emacs Lisp mode with useful functions for editing large
comment message-boxes is included.
The hashbang lines in these tools invoke PyPy, an alternative Python
implementation that performs much better than CPython on this code. You
can use python2 or python3 to run these scripts, but it is best to
have pypy installed. The performance difference is dramatic on large
repositories.
We are in the process of moving this suite of tools from Python to Go
for large performance increases. The subdirectory 'src' contains the
Go source. Do your builds through the Makefile, which sets the GOPATH
variable to work around it not being embedded in a normal Go
workspace.
To build the Go tools, simply run "make" in the toplevel directory.
This also builds the suite documentation. Before running it the
*first* time, run "make gosetup" to install package dependencies.
There is an extensive regression-test suite in the `test/` directory.
To test the correctness of this software, ensure that `pypy` and
`pylint` and `golang` are installed and then type `make check`. For a
portability check that includes Python 2 and Python 3 as well, do
"make portcheck".
See `reporting-bugs.asc` for advice on how to troubleshoot problems
with `reposurgeon` and report bugs.
The main `reposurgeon` website along with the documentation in HTML files
lives at http://www.catb.org/esr/reposurgeon/[www.catb.org/esr/reposurgeon/].
The files Dockerfile, .dockerignore, .gitlab-ci.yml, and the contents of ci/
are not distributed; they are configuration for test builds on GitLab's
CI machinery.
= TO-DO =
* Someday, port fully to Python 3. At that point, reapply the commit
"Ensure BZ2File only work on binary data".
* Weird combinations of Subversion copy-delete operations can produce
content-mismatch failures detectable by repotool compare. In the test
suite, this problem is exhibited by agito.svn, fleetwood.svn, and
references.svn.
* The Subversion dumper is pretty weak, more a proof of concept than
anything useful for production.
* Some attempt should be made to create marge links when a deletall is followed
by a directory copy from another branch.
* Can gitspace merges be deduced from svk:merge and svnmerge properties?
* The Subversion dumper is pretty weak, more a proof of concept than
anything useful for production. Improving it might be useful.
#!/bin/bash
# Install dependencies that aren't included in buildpack-deps:jessie
REPOSURGEON_DIR=$(pwd)
apt-get update -qy && apt-get install -qy --no-install-recommends \
asciidoc \
bison \
cvs \
flex \
golang \
libpcre3-dev \
pypy \
python2.7 \
python3 \
python-pip \
subversion \
time \
xmlto \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Install cvs-fast-export - this is the only reason bison and flex are installed
mkdir -p /usr/local/src/
cd /usr/local/src/
git clone https://gitlab.com/esr/cvs-fast-export.git && \
cd cvs-fast-export/ && \
make install
cd $REPOSURGEON_DIR
pip install -r ci/requirements.txt
echo
echo ============= Dependency install complete =============
echo
......@@ -8,10 +8,12 @@ Description: A tool for editing version-control repository history.
don't want to let you do, such as editing past comments and metadata
and removing commits. It works with any version control system that
can export and import git fast-import streams, including git, hg,
fossil, bzr, CVS, RCS, and src. It can also read Subversion dump
fossil, bzr, CVS, RCS, bk, and src. It can also read Subversion dump
files directly and can thus be used to script production of very
high-quality conversions from Subversion to any supported DVCS.
XBS-Destinations: mailto:ubuntu-devel-discuss@lists.ubuntu.com
Homepage: http://www.catb.org/~esr/reposurgeon
XBS-HTML-Target: index.html
......@@ -22,9 +24,9 @@ XBS-IRC-Channel: irc://chat.freenode.net/#reposurgeon
XBS-OpenHub-URL: https://www.ohloh.net/p/reposurgeon
XBS-Logo: reposturgeon.png
XBS-Debian-Packages: reposurgeon
XBS-Web-Extras: conversion.mk
XBS-Logo: reposturgeon.png
XBS-VC-Tag-Template: %(version)s
......
# Generic makefile for DVCS conversions using reposurgeon
#
# Steps to using this:
# 0. Copy this into a scratch directory as Makefile.
# 1. Make sure reposurgeon, repostreamer, and repopuller are on your $PATH.
# 2. Set PROJECT to the name of your project.
# 3. Set SOURCE_VCS to svn or cvs.
# 4. Set TARGET_VCS to git, hg, or bzr.
# 5. For svn, set REMOTE_URL to point at the remote repository
# you want to convert.
# 6. For cvs, set CVS_HOST to the repo hostname and CVS_MODULE to the module,
# then uncomment the line that builds REMOTE_URL
# Note: for CVS hosts other than Sourceforge or Savannah you will need to
# include the path to the CVS modules directory after the hostname.
# 7. Create a $(PROJECT).lift script for your custom commands, initially empty.
# 8. Run 'make stubmap' to create a stub author map.
# 9. (Optional) set REPOSURGEON to point at a faster cython build of the tool.
# 10. Run 'make' to build a converted repository.
#
# The reason both first- and second-stage stream files are generated is that,
# especially with Subversion, making the first-stage stream file is often
# painfully slow. By splitting the process, we lower the overhead of
# experiments with the lift script.
#
# For a production-quality conversion you will need to edit the map
# file and the lift script. During the process you can set EXTRAS to
# name extra metadata such as a comments mailbox.
#
# After the conversion, you may be able to perform a sanity check with
# 'make diff' (supported for CVS and svn). You can check
# individual tags or branches with 'make diff-tag'
#
# Note that CVS-checkout directories not matched in a conversion may be
# historical relics containing only CVSROOT directories.
PROJECT = foo
SOURCE_VCS = svn
TARGET_VCS = git
EXTRAS =
REMOTE_URL = svn://svn.debian.org/$(PROJECT)
#REMOTE_URL = https://$(PROJECT).googlecode.com/svn/
CVS_HOST = cvs.sourceforge.net
#CVS_HOST = cvs.savannah.gnu.org
CVS_MODULE = $(PROJECT)
#REMOTE_URL = cvs://$(CVS_HOST)/$(PROJECT)#$(CVS_MODULE)
VERBOSITY = "verbose 1"
REPOSURGEON = reposurgeon
# Configuration ends here
.PHONY: local-clobber remote-clobber gitk gc compare clean dist stubmap diff
# Tell make not to auto-remove tag directories, because it only tries rm and hence fails
.PRECIOUS: $(PROJECT)-%-checkout $(PROJECT)-%-$(TARGET_VCS)
default: $(PROJECT)-$(TARGET_VCS)
# Build the converted repo from the second-stage fast-import stream
$(PROJECT)-$(TARGET_VCS): $(PROJECT).fi
rm -fr $(PROJECT)-$(TARGET_VCS); $(REPOSURGEON) "read <$(PROJECT).fi" "prefer $(TARGET_VCS)" "rebuild $(PROJECT)-$(TARGET_VCS)"
# Build the second-stage fast-import stream from the first-stage stream dump
$(PROJECT).fi: $(PROJECT).$(SOURCE_VCS) $(PROJECT).lift $(PROJECT).map $(EXTRAS)
$(REPOSURGEON) $(VERBOSITY) "read <$(PROJECT).$(SOURCE_VCS)" "authors read <$(PROJECT).map" "sourcetype $(SOURCE_VCS)" "prefer git" "script $(PROJECT).lift" "legacy write >$(PROJECT).fo" "write >$(PROJECT).fi"
# Build the first-stage stream dump from the local mirror
$(PROJECT).$(SOURCE_VCS): $(PROJECT)-mirror
repotool mirror $(PROJECT)-mirror
(cd $(PROJECT)-mirror/ >/dev/null; repotool export) >$(PROJECT).$(SOURCE_VCS)
# Build a local mirror of the remote repository
$(PROJECT)-mirror:
repotool mirror $(REMOTE_URL) $(PROJECT)-mirror
# Get a list of tags from the project mirror
$(PROJECT)-tags.txt: $(PROJECT)-mirror
cd $(PROJECT)-mirror >/dev/null; repotool tags
# Make a local checkout of the source mirror for inspection
$(PROJECT)-checkout: $(PROJECT)-mirror
cd $(PROJECT)-mirror >/dev/null; repotool checkout ../$(PROJECT)-checkout
# Make a local checkout of the source mirror for inspection at a specific tag
$(PROJECT)-%-checkout: $(PROJECT)-mirror
cd $(PROJECT)-mirror >/dev/null; repotool ../$(PROJECT)-$*-checkout $*
# Force rebuild of first-stage stream from the local mirror on the next make
local-clobber: clean
rm -fr $(PROJECT).fi $(PROJECT)-$(TARGET_VCS) *~ .rs* $(PROJECT)-conversion.tar.gz $(PROJECT)-*-$(TARGET_VCS)
# Force full rebuild from the remote repo on the next make.
remote-clobber: local-clobber
rm -fr $(PROJECT).$(SOURCE_VCS) $(PROJECT)-mirror $(PROJECT)-checkout $(PROJECT)-*-checkout
# Get the (empty) state of the author mapping from the first-stage stream
stubmap: $(PROJECT).$(SOURCE_VCS)
$(REPOSURGEON) "read <$(PROJECT).$(SOURCE_VCS)" "authors write >$(PROJECT).map"
# Compare the histories of the unconverted and converted repositories at head
# and all tags.
EXCLUDE = -x CVS -x .$(SOURCE_VCS) -x .$(TARGET_VCS)
EXCLUDE += -x .$(SOURCE_VCS)ignore -x .$(TARGET_VCS)ignore
headcompare:
repotool compare $(EXCLUDE) $(PROJECT)-checkout $(PROJECT)-$(TARGET_VCS)
tagscompare:
repotool compare-tags $(EXCLUDE) $(PROJECT)-checkout $(PROJECT)-$(TARGET_VCS)
# General cleanup and utility
clean:
rm -fr *~ .rs* $(PROJECT)-conversion.tar.gz *.$(SOURCE_VCS) *.fi *.fo
# Bundle up the conversion metadata for shipping
SOURCES = Makefile $(PROJECT).lift $(PROJECT).map $(EXTRAS)
$(PROJECT)-conversion.tar.gz: $(SOURCES)
tar --dereference --transform 's:^:$(PROJECT)-conversion/:' -czvf $(PROJECT)-conversion.tar.gz $(SOURCES)
dist: $(PROJECT)-conversion.tar.gz
#
# The following productions are git-specific
#
ifeq ($(TARGET_VCS),git)
# Browse the generated git repository
gitk: $(PROJECT)-git
cd $(PROJECT)-git; gitk --all
# Run a garbage-collect on the generated git repository. Import doesn't.
# This repack call is the active part of gc --aggressive. This call is
# tuned for very large repositories.
gc: $(PROJECT)-git
cd $(PROJECT)-git; time git -c pack.threads=1 repack -AdF --window=1250 --depth=250
endif
<?xml version='1.0'?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:param name="variablelist.term.separator"></xsl:param>
<xsl:param name="variablelist.term.break.after">1</xsl:param>
</xsl:stylesheet>
= How reposurgeon wins =
There are many tools for converting repositories between
version-control systems out there. This file explains why
reposurgeon is the best of breed by comparing it to the
version-control systems out there. This file explains why
reposurgeon is the best of breed by comparing it to the
competition.
The problems other repository-translation tools have come from
......@@ -10,7 +10,7 @@ ontological mismatches between their source and target systems -
models of changesets, branching and tagging can differ in complicated
ways. While these gaps can often be bridged by careful analysis, the
techniques for doing so are algorithmically complex, difficult to
test, and have ugly edge cases.
test, and have ugly edge cases.
Furthermore, doing a really high-quality translation often requires
human judgment about how to move artifacts - and what to discard.
......@@ -34,7 +34,7 @@ Here are some specific symptoms of evasion that are common enough to
deserve tags for later reference.
LINEAR: One very common form of evasion is only handling linear
histories.
histories.
NO_IGNORES: There are many different mechanisms for ignoring files -
.cvsignore, Subversion svn:ignore properties, .gitignore and their
......@@ -78,7 +78,7 @@ additional bug tags:
CONFIGURATION: Requires elaborate configuration even for cases that
ought to be simple.
ABANDONED: Effectively abandoned by its maintainer. Some tools with
ABANDONED: Effectively abandoned by its maintainer. Some tools with
this tag are still nominally maintained but have not been updated
or released in years.
......@@ -86,7 +86,7 @@ NO_DOCUMENTATION: Poorly (if at all) documented.
!FOO means the tool is known not to have problem FOO.
?FOO means I have not tried the tool but have strong reason to
?FOO means I have not tried the tool but have strong reason to
suspect the problem is present based on other things I know about it.
You should assume that none of these tools do reference-lifting.
......@@ -95,8 +95,8 @@ You should assume that none of these tools do reference-lifting.
http://cvs2svn.tigris.org/features.html
Just after the turn of the 21st century, when Subversion was the
new thing in version control, most projects that were using version
Just after the turn of the 21st century, when Subversion was the
new thing in version control, most projects that were using version
control were using CVS, and cvs2svn was about the only migration path.
Early cvs2svn had problems on every level, only some of which have
......@@ -160,13 +160,16 @@ repository would most naturally be structured is minimal. But for
conformability with Subversion, git-svn cannot (practically speaking)
use git's annotated-tag facility in the local mirror; instead,
Subversion tags have to be represented in the local mirror as git
branches even if they have no changes after the initial branch copy.
branches even if they have no changes after the initial branch copy.
Another thing the live-gatewaying use case prevents is
reference-lifting. Subversion references like "r1234" in commit
comments have to be left as-is to avoid creating pain for users of the
same Subversion remote not going through git-svn.
git-svn is used by both the Google Code exporter and GitHub importer
web services. Depending on these services is not recommended.
!ABANDONED, MIXEDBRANCH, NO_TAGS, NO_IGNORES.
=== git-svnimport ===
......@@ -181,7 +184,8 @@ MIXEDBRANCH, NO_TAGS, NO_IGNORES, ABANDONED.
https://github.com/stevenharman/git-svn-import
A trivial wrapper around git-svn.
A trivial wrapper around git-svn. All the reasons not to use git-svn
apply to it as well.
MIXEDBRANCH, NO_TAGS, NO_IGNORES, !ABANDONED.
......@@ -215,7 +219,7 @@ It even handles mixed-branch commits correctly.
!LINEAR, !NO_TAGS, !MIXEDBRANCH, CONFIGURATION.
If you cannot use reposurgeon for some reason, this is one of
the best alternatives.
the best alternatives.
=== svn2git (jcoglan/nirvdrum version) ===
......@@ -278,6 +282,40 @@ diverged in 2008.
LINEAR, NO_TAGS, NO_IGNORES, NO_DOCUMENTATION, ABANDONED.
=== SubGit ===
http://www.subgit.com/
Nearly unique for this caregory of software in being closed-source. Beyond
an evaluation period, users have to register, possibly for a cost
(it's supposed to be free-of-charge for certain uses: open source
projects, education, and ``startups'' -- history with BitKeeper shows
that these arrangements should not be trusted).
The intended outcome of this program is to provide a server with
support for both Subversion and Git users to interact at once. This
may be of little value overall, as new developers are frequently
unfamiliar with Subversion (and old ones forget the usage patterns!),
fundamental differences in design of the two VCSes interferring with
the quality of both views, and increased confusion with preferred
modes of contribution arise.
The quality of SubGit's conversion is rather poor. It fails to
properly translate at least half of the reposurgeon *.svn regression
tests, even some of the simpler ones - although trickier cases such as
agito.svn it does translates correctly. Large real-world Subversion repos
will exhibit multiple issues that SubGit may, silently or otherwise,
trip over.
This program will forever contain compromises for the same reasons
git-svn does. The non-open source nature leaves little hope of having
such issues repaired by skilled community members.
Atlassian's BitBucket service relies on this for Subversion-to-Git
migration. Depending on this service is not recommended.
!MIXEDBRANCH, !LINEAR, CONFIGURATION, DOCUMENTATION
== reposurgeon success stories ==
reposurgeon has been used for successful conversion on projects
......@@ -326,6 +364,12 @@ Emacs::
the history and number of layers makes it the most complex
conversion yet.
ntp::
I did BitKeeper to git using a derivative of Tridge's SourcePuller
as a front end, done in early 2015. Nothing especially taxing
about the reposurgeon side of things, the magic was all in the
front end.
pdfrw, playtag, pyeda, rson::
Four small Subversion projects by Patrick Maupin, converted in
two hours' work in May 2015. No significant difficulties.
......@@ -333,4 +377,13 @@ pdfrw, playtag, pyeda, rson::
workflow in conversion.mk is fast and effective for a wide
range of projects.
mh-e::
The Emacs interface for MH. Converted by Bill Wohler in late 2015.
He reports that the standard conversion workflow worked fine.
GNUPLOT::
CVS to git, 30 years of history with some early releases recovered
from tarballs. Converted by me in late 2017. Somewhat messy due to
vendor-branch issues.
//end
This diff is collapsed.
// Package repomapper - update and manipulate contributor maps
package main
// SPDX-License-Identifier: BSD-2-Clause
import (
"bufio"
"flag"
"fmt"
"log"
"os"
"regexp"
"sort"
"strings"
)
// Contributor - ssociate a username with a DVCS-style ID
type Contributor struct {
name string
fullname string
email string
tz string
}
// Does this entry need completion?
func (cb *Contributor) incomplete() bool {
return cb.name == cb.fullname || !strings.Contains(cb.email, "@")
}
// Stringer - render a Contributor in rereadable form
func (cb *Contributor) Stringer() string {
out := fmt.Sprintf("%s = %s <%s>", cb.name, cb.fullname, cb.email)
if cb.tz != "" {
out += " " + cb.tz
}
out += "\n"
return out
}
// ContribMap - a map of contributors.
type ContribMap map[string]Contributor
/* apply a specified function to each line of a file */
func bylines(fn string, hook func(string)) {
file, err := os.Open(fn)
if err != nil {
log.Fatal(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
hook(scanner.Text())
}
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
}
// NewContribMap - initialize a new contributor map from a file */
func NewContribMap(fn string) ContribMap {
re := regexp.MustCompile("([^ ]+) *= ([^<]+)*<([^<]+)> *(.*)")
cm := make(map[string]Contributor)
digest := func(line string) {
groups := re.FindAllStringSubmatch(line, -1)
if groups == nil {
log.Fatal("repomapper: ill-formed map line.\n")
}
firstmatch := groups[0]
v := Contributor{
name: firstmatch[1],
fullname: strings.Trim(firstmatch[2], " \t"),
email: firstmatch[3],
tz: firstmatch[4],
}
cm[v.name] = v
}
bylines(fn, digest)
return cm
}
// Suffix - add an address suffix to entries lacking one.
func (cm *ContribMap) Suffix(addr string) {
for k, obj := range *cm {
if !strings.Contains(obj.email, "@") {
obj.email += "@" + addr
(*cm)[k] = obj
}
}
}
/* Write the current state of this contrib map. */
func (cm *ContribMap) Write(fp *os.File, incomplete bool) {
keys := make([]string, 0)
for k := range *cm {
keys = append(keys, k)
}
sort.Strings(keys)
for _, name := range keys {
item := (*cm)[name]
if incomplete && !item.incomplete() {
continue
}
fmt.Print(item.Stringer())
}
}
// Manifest constants describning the Unix password DSV format
const pwdFLDSEP = ":" // field separator
const pwdNAME = 0 // field index of username
const pwdGECOS = 4 // field index of fullname
const pwdFLDCOUNT = 7 // required number of fields
func main() {
var host string
var passwdfile string
var updatefile string
var incomplete bool
flag.StringVar(&host, "h", "", "set host for suffixing")
flag.StringVar(&passwdfile, "p", "", "specify password file")
flag.StringVar(&updatefile, "u", "", "specify update file")
flag.BoolVar(&incomplete, "i", false, "dump incomplete entries")
flag.Parse()
if flag.NArg() == 0 {
fmt.Fprintf(os.Stderr,
"repomapper: requires a contrib-map file argument.\n")
os.Exit(1)
}
// Read in an ordered dictionary of existing attributions.
contribmap := NewContribMap(flag.Arg(0))
// Apply the -h option
if host != "" {
contribmap.Suffix(host)
}
// With -p, read the password data
if passwdfile != "" {
passwd := make(map[string]string)
eatline := func(line string) {
fields := strings.Split(line, pwdFLDSEP)
if len(fields) != pwdFLDCOUNT {
fmt.Fprintf(os.Stderr,
"repomapper: ill-formed passwd line\n")
os.Exit(1)
}
name := fields[pwdNAME]
gecos := fields[pwdGECOS]
if strings.Index(gecos, ",") != 1 {
gecos = strings.Split(gecos, ",")[0]
}
passwd[name] = gecos
}
bylines(passwdfile, eatline)
// Attempt to fill in the contribmap
for name, obj := range contribmap {
_, ok := passwd[name]
if !ok {
fmt.Fprintf(os.Stderr,