git-archimport.perl 36 KB
Newer Older
1 2 3 4 5
#!/usr/bin/perl -w
#
# This tool is copyright (c) 2005, Martin Langhoff.
# It is released under the Gnu Public License, version 2.
#
Junio C Hamano's avatar
Junio C Hamano committed
6 7
# The basic idea is to walk the output of tla abrowse,
# fetch the changesets and apply them.
8
#
9

10 11
=head1 Invocation

12
    git archimport [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ]
Junio C Hamano's avatar
Junio C Hamano committed
13
	[ -D depth] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
14

15 16
Imports a project from one or more Arch repositories. It will follow branches
and repositories within the namespaces defined by the <archive/branch>
17
parameters supplied. If it cannot find the remote branch a merge comes from
Junio C Hamano's avatar
Junio C Hamano committed
18
it will just import it as a regular commit. If it can find it, it will mark it
19
as a merge whenever possible.
20

21
See man (1) git-archimport for more details.
22

23
=head1 TODO
24

25
 - create tag objects instead of ref tags
26
 - audit shell-escaping of filenames
27
 - hide our private tags somewhere smarter
Junio C Hamano's avatar
Junio C Hamano committed
28
 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
29 30 31
 - sort and apply patches by graphing ancestry relations instead of just
   relying in dates supplied in the changeset itself.
   tla ancestry-graph -m could be helpful here...
32 33 34

=head1 Devel tricks

Junio C Hamano's avatar
Junio C Hamano committed
35
Add print in front of the shell commands invoked via backticks.
36

37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
=head1 Devel Notes

There are several places where Arch and git terminology are intermixed
and potentially confused.

The notion of a "branch" in git is approximately equivalent to
a "archive/category--branch--version" in Arch.  Also, it should be noted
that the "--branch" portion of "archive/category--branch--version" is really
optional in Arch although not many people (nor tools!) seem to know this.
This means that "archive/category--version" is also a valid "branch"
in git terms.

We always refer to Arch names by their fully qualified variant (which
means the "archive" name is prefixed.

For people unfamiliar with Arch, an "archive" is the term for "repository",
and can contain multiple, unrelated branches.

55 56 57 58 59
=cut

use strict;
use warnings;
use Getopt::Std;
60
use File::Temp qw(tempdir);
61
use File::Path qw(mkpath rmtree);
62 63 64 65 66 67 68
use File::Basename qw(basename dirname);
use Data::Dumper qw/ Dumper /;
use IPC::Open2;

$SIG{'PIPE'}="IGNORE";
$ENV{'TZ'}="UTC";

69 70
my $git_dir = $ENV{"GIT_DIR"} || ".git";
$ENV{"GIT_DIR"} = $git_dir;
71
my $ptag_dir = "$git_dir/archimport/tags";
72

73
our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
74 75 76

sub usage() {
    print STDERR <<END;
77
Usage: git archimport     # fetch/update GIT from Arch
78
       [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] [ -D depth ] [ -t tempdir ]
79 80 81 82 83
       repository/arch-branch [ repository/arch-branch] ...
END
    exit(1);
}

84
getopts("fThvat:D:") or usage();
85 86 87
usage if $opt_h;

@ARGV >= 1 or usage();
88 89 90
# $arch_branches:
# values associated with keys:
#   =1 - Arch version / git 'branch' detected via abrowse on a limit
91
#   >1 - Arch version / git 'branch' of an auxiliary branch we've merged
92 93 94 95 96
my %arch_branches = map { my $branch = $_; $branch =~ s/:[^:]*$//; $branch => 1 } @ARGV;

# $branch_name_map:
# maps arch branches to git branch names
my %branch_name_map = map { m/^(.*):([^:]*)$/; $1 => $2 } grep { m/:/ } @ARGV;
97

98 99
$ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
100
$opt_v && print "+ Using $tmp as temporary directory\n";
101

102 103 104 105 106 107 108 109 110
unless (-d $git_dir) { # initial import needs empty directory
    opendir DIR, '.' or die "Unable to open current directory: $!\n";
    while (my $entry = readdir DIR) {
        $entry =~ /^\.\.?$/ or
            die "Initial import needs an empty current working directory.\n"
    }
    closedir DIR
}

111
my $default_archive;		# default Arch archive
112 113
my %reachable = ();             # Arch repositories we can access
my %unreachable = ();           # Arch repositories we can't access :<
114
my @psets  = ();                # the collection
115
my %psets  = ();                # the collection, by name
116 117 118 119
my %stats  = (			# Track which strategy we used to import:
	get_tag => 0, replay => 0, get_new => 0, get_delta => 0,
        simple_changeset => 0, import_or_tag => 0
);
120 121 122

my %rptags = ();                # my reverse private tags
                                # to map a SHA1 to a commitid
123
my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
124

125 126 127 128
sub do_abrowse {
    my $stage = shift;
    while (my ($limit, $level) = each %arch_branches) {
        next unless $level == $stage;
Junio C Hamano's avatar
Junio C Hamano committed
129 130

	open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
131
                                or die "Problems with tla abrowse: $!";
Junio C Hamano's avatar
Junio C Hamano committed
132

133 134
        my %ps        = ();         # the current one
        my $lastseen  = '';
Junio C Hamano's avatar
Junio C Hamano committed
135

136 137
        while (<ABROWSE>) {
            chomp;
Junio C Hamano's avatar
Junio C Hamano committed
138

139 140 141 142 143 144 145 146 147 148 149
            # first record padded w 8 spaces
            if (s/^\s{8}\b//) {
                my ($id, $type) = split(m/\s+/, $_, 2);

                my %last_ps;
                # store the record we just captured
                if (%ps && !exists $psets{ $ps{id} }) {
                    %last_ps = %ps; # break references
                    push (@psets, \%last_ps);
                    $psets{ $last_ps{id} } = \%last_ps;
                }
Junio C Hamano's avatar
Junio C Hamano committed
150

151 152 153 154 155
                my $branch = extract_versionname($id);
                %ps = ( id => $id, branch => $branch );
                if (%last_ps && ($last_ps{branch} eq $branch)) {
                    $ps{parent_id} = $last_ps{id};
                }
Junio C Hamano's avatar
Junio C Hamano committed
156

157 158 159 160 161 162 163 164
                $arch_branches{$branch} = 1;
                $lastseen = 'id';

                # deal with types (should work with baz or tla):
                if ($type =~ m/\(.*changeset\)/) {
                    $ps{type} = 's';
                } elsif ($type =~ /\(.*import\)/) {
                    $ps{type} = 'i';
165
                } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
166 167
                    $ps{type} = 't';
                    # read which revision we've tagged when we parse the log
168
                    $ps{tag}  = $1;
Junio C Hamano's avatar
Junio C Hamano committed
169
                } else {
170 171 172 173 174
                    warn "Unknown type $type";
                }

                $arch_branches{$branch} = 1;
                $lastseen = 'id';
Junio C Hamano's avatar
Junio C Hamano committed
175 176
            } elsif (s/^\s{10}//) {
                # 10 leading spaces or more
177
                # indicate commit metadata
Junio C Hamano's avatar
Junio C Hamano committed
178

179 180 181 182 183 184 185 186 187 188
                # date
                if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
                    $ps{date}   = $1;
                    $lastseen = 'date';
                } elsif ($_ eq 'merges in:') {
                    $ps{merges} = [];
                    $lastseen = 'merges';
                } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
                    my $id = $_;
                    push (@{$ps{merges}}, $id);
Junio C Hamano's avatar
Junio C Hamano committed
189

190 191 192 193
                    # aggressive branch finding:
                    if ($opt_D) {
                        my $branch = extract_versionname($id);
                        my $repo = extract_reponame($branch);
Junio C Hamano's avatar
Junio C Hamano committed
194

195 196 197 198 199 200 201
                        if (archive_reachable($repo) &&
                                !defined $arch_branches{$branch}) {
                            $arch_branches{$branch} = $stage + 1;
                        }
                    }
                } else {
                    warn "more metadata after merges!?: $_\n" unless /^\s*$/;
202 203 204 205
                }
            }
        }

206 207 208 209 210
        if (%ps && !exists $psets{ $ps{id} }) {
            my %temp = %ps;         # break references
            if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
                $temp{parent_id} = $psets[$#psets]{id};
            }
Junio C Hamano's avatar
Junio C Hamano committed
211
            push (@psets, \%temp);
212
            $psets{ $temp{id} } = \%temp;
Junio C Hamano's avatar
Junio C Hamano committed
213 214
        }

215 216
        close ABROWSE or die "$TLA abrowse failed on $limit\n";
    }
217 218
}                               # end foreach $root

219 220 221 222 223 224 225 226
do_abrowse(1);
my $depth = 2;
$opt_D ||= 0;
while ($depth <= $opt_D) {
    do_abrowse($depth);
    $depth++;
}

227
## Order patches by time
228 229 230
# FIXME see if we can find a more optimal way to do this by graphing
# the ancestry data and walking it, that way we won't have to rely on
# client-supplied dates
231 232 233 234 235 236 237 238
@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;

#print Dumper \@psets;

##
## TODO cleanup irrelevant patches
##      and put an initial import
##      or a full tag
239
my $import = 0;
240
unless (-d $git_dir) { # initial import
241 242
    if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
        print "Starting import from $psets[0]{id}\n";
243
	`git-init`;
244 245
	die $! if $?;
	$import = 1;
246 247 248
    } else {
        die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
    }
249 250
} else {    # progressing an import
    # load the rptags
251
    opendir(DIR, $ptag_dir)
252 253
	|| die "can't opendir: $!";
    while (my $file = readdir(DIR)) {
254 255
        # skip non-interesting-files
        next unless -f "$ptag_dir/$file";
Junio C Hamano's avatar
Junio C Hamano committed
256

257 258 259 260 261 262 263 264
        # convert first '--' to '/' from old git-archimport to use
        # as an archivename/c--b--v private tag
        if ($file !~ m!,!) {
            my $oldfile = $file;
            $file =~ s!--!,!;
            print STDERR "converting old tag $oldfile to $file\n";
            rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
        }
265 266 267 268 269
	my $sha = ptag($file);
	chomp $sha;
	$rptags{$sha} = $file;
    }
    closedir DIR;
270 271
}

272
# process patchsets
273 274 275 276 277
# extract the Arch repository name (Arch "archive" in Arch-speak)
sub extract_reponame {
    my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
    return (split(/\//, $fq_cvbr))[0];
}
Junio C Hamano's avatar
Junio C Hamano committed
278

279 280 281 282 283
sub extract_versionname {
    my $name = shift;
    $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
    return $name;
}
284

285
# convert a fully-qualified revision or version to a unique dirname:
Junio C Hamano's avatar
Junio C Hamano committed
286
#   normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
#
# the git notion of a branch is closer to
# archive/category--branch--version than archive/category--branch, so we
# use this to convert to git branch names.
# Also, keep archive names but replace '/' with ',' since it won't require
# subdirectories, and is safer than swapping '--' which could confuse
# reverse-mapping when dealing with bastard branches that
# are just archive/category--version  (no --branch)
sub tree_dirname {
    my $revision = shift;
    my $name = extract_versionname($revision);
    $name =~ s#/#,#;
    return $name;
}

303 304 305 306 307 308 309 310
# old versions of git-archimport just use the <category--branch> part:
sub old_style_branchname {
    my $id = shift;
    my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
    chomp $ret;
    return $ret;
}

311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
*git_default_branchname = $opt_o ? *old_style_branchname : *tree_dirname;

# retrieve default archive, since $branch_name_map keys might not include it
sub get_default_archive {
    if (!defined $default_archive) {
        $default_archive = safe_pipe_capture($TLA,'my-default-archive');
        chomp $default_archive;
    }
    return $default_archive;
}

sub git_branchname {
    my $revision = shift;
    my $name = extract_versionname($revision);

    if (exists $branch_name_map{$name}) {
	return $branch_name_map{$name};

    } elsif ($name =~ m#^([^/]*)/(.*)$#
	     && $1 eq get_default_archive()
	     && exists $branch_name_map{$2}) {
	# the names given in the command-line lacked the archive.
	return $branch_name_map{$2};

    } else {
	return git_default_branchname($revision);
    }
}
339

340 341
sub process_patchset_accurate {
    my $ps = shift;
Junio C Hamano's avatar
Junio C Hamano committed
342

343 344 345
    # switch to that branch if we're not already in that branch:
    if (-e "$git_dir/refs/heads/$ps->{branch}") {
       system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
346

347 348 349
       # remove any old stuff that got leftover:
       my $rm = safe_pipe_capture('git-ls-files','--others','-z');
       rmtree(split(/\0/,$rm)) if $rm;
350
    }
Junio C Hamano's avatar
Junio C Hamano committed
351

352 353 354 355 356 357 358 359 360 361 362 363
    # Apply the import/changeset/merge into the working tree
    my $dir = sync_to_ps($ps);
    # read the new log entry:
    my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id});
    die "Error in cat-log: $!" if $?;
    chomp @commitlog;

    # grab variables we want from the log, new fields get added to $ps:
    # (author, date, email, summary, message body ...)
    parselog($ps, \@commitlog);

    if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) {
Junio C Hamano's avatar
Junio C Hamano committed
364
        # this should work when importing continuations
365
        if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) {
Junio C Hamano's avatar
Junio C Hamano committed
366

367
            # find where we are supposed to branch from
368 369 370 371 372 373 374 375 376 377 378 379 380 381
	    if (! -e "$git_dir/refs/heads/$ps->{branch}") {
		system('git-branch',$ps->{branch},$branchpoint) == 0 or die "$! $?\n";

		# We trust Arch with the fact that this is just a tag,
		# and it does not affect the state of the tree, so
		# we just tag and move on.  If the user really wants us
		# to consolidate more branches into one, don't tag because
		# the tag name would be already taken.
		tag($ps->{id}, $branchpoint);
		ptag($ps->{id}, $branchpoint);
		print " * Tagged $ps->{id} at $branchpoint\n";
	    }
	    system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";

382 383 384 385 386 387 388 389 390
            # remove any old stuff that got leftover:
            my $rm = safe_pipe_capture('git-ls-files','--others','-z');
            rmtree(split(/\0/,$rm)) if $rm;
            return 0;
        } else {
            warn "Tagging from unknown id unsupported\n" if $ps->{tag};
        }
        # allow multiple bases/imports here since Arch supports cherry-picks
        # from unrelated trees
Junio C Hamano's avatar
Junio C Hamano committed
391 392
    }

393
    # update the index with all the changes we got
394 395
    system('git-diff-files --name-only -z | '.
            'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
396 397 398 399
    system('git-ls-files --others -z | '.
            'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
    return 1;
}
400

401 402 403 404
# the native changeset processing strategy.  This is very fast, but
# does not handle permissions or any renames involving directories
sub process_patchset_fast {
    my $ps = shift;
Junio C Hamano's avatar
Junio C Hamano committed
405
    #
406 407
    # create the branch if needed
    #
408 409
    if ($ps->{type} eq 'i' && !$import) {
        die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
410 411
    }

412
    unless ($import) { # skip for import
413
        if ( -e "$git_dir/refs/heads/$ps->{branch}") {
414
            # we know about this branch
415
            system('git-checkout',$ps->{branch});
416 417 418 419
        } else {
            # new branch! we need to verify a few things
            die "Branch on a non-tag!" unless $ps->{type} eq 't';
            my $branchpoint = ptag($ps->{tag});
Junio C Hamano's avatar
Junio C Hamano committed
420
            die "Tagging from unknown id unsupported: $ps->{tag}"
421
                unless $branchpoint;
Junio C Hamano's avatar
Junio C Hamano committed
422

423
            # find where we are supposed to branch from
424 425 426 427 428 429 430 431 432 433 434 435 436
	    if (! -e "$git_dir/refs/heads/$ps->{branch}") {
		system('git-branch',$ps->{branch},$branchpoint) == 0 or die "$! $?\n";

		# We trust Arch with the fact that this is just a tag,
		# and it does not affect the state of the tree, so
		# we just tag and move on.  If the user really wants us
		# to consolidate more branches into one, don't tag because
		# the tag name would be already taken.
		tag($ps->{id}, $branchpoint);
		ptag($ps->{id}, $branchpoint);
		print " * Tagged $ps->{id} at $branchpoint\n";
            }
            system('git-checkout',$ps->{branch}) == 0 or die "$! $?\n";
437
            return 0;
Junio C Hamano's avatar
Junio C Hamano committed
438
        }
439
        die $! if $?;
Junio C Hamano's avatar
Junio C Hamano committed
440
    }
441 442 443

    #
    # Apply the import/changeset/merge into the working tree
Junio C Hamano's avatar
Junio C Hamano committed
444
    #
445 446
    if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
        apply_import($ps) or die $!;
447
        $stats{import_or_tag}++;
448
        $import=0;
449 450
    } elsif ($ps->{type} eq 's') {
        apply_cset($ps);
451
        $stats{simple_changeset}++;
452 453 454 455 456 457
    }

    #
    # prepare update git's index, based on what arch knows
    # about the pset, resolve parents, etc
    #
Junio C Hamano's avatar
Junio C Hamano committed
458 459

    my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id});
460
    die "Error in cat-archive-log: $!" if $?;
Junio C Hamano's avatar
Junio C Hamano committed
461

462
    parselog($ps,\@commitlog);
463 464 465

    # imports don't give us good info
    # on added files. Shame on them
466 467 468
    if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
        system('git-ls-files --deleted -z | '.
                'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
469 470
        system('git-ls-files --others -z | '.
                'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
471 472
    }

473
    # TODO: handle removed_directories and renamed_directories:
474

475 476
    if (my $del = $ps->{removed_files}) {
        unlink @$del;
477 478
        while (@$del) {
            my @slice = splice(@$del, 0, 100);
479 480
            system('git-update-index','--remove','--',@slice) == 0 or
                            die "Error in git-update-index --remove: $! $?\n";
481 482
        }
    }
483 484

    if (my $ren = $ps->{renamed_files}) {                # renamed
485 486 487
        if (@$ren % 2) {
            die "Odd number of entries in rename!?";
        }
Junio C Hamano's avatar
Junio C Hamano committed
488

489
        while (@$ren) {
490
            my $from = shift @$ren;
Junio C Hamano's avatar
Junio C Hamano committed
491
            my $to   = shift @$ren;
492 493 494 495

            unless (-d dirname($to)) {
                mkpath(dirname($to)); # will die on err
            }
496
            # print "moving $from $to";
497 498 499 500 501
            rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
            system('git-update-index','--remove','--',$from) == 0 or
                            die "Error in git-update-index --remove: $! $?\n";
            system('git-update-index','--add','--',$to) == 0 or
                            die "Error in git-update-index --add: $! $?\n";
502 503
        }
    }
504

505 506 507 508 509 510 511 512
    if (my $add = $ps->{new_files}) {
        while (@$add) {
            my @slice = splice(@$add, 0, 100);
            system('git-update-index','--add','--',@slice) == 0 or
                            die "Error in git-update-index --add: $! $?\n";
        }
    }

513
    if (my $mod = $ps->{modified_files}) {
514 515
        while (@$mod) {
            my @slice = splice(@$mod, 0, 100);
516 517
            system('git-update-index','--',@slice) == 0 or
                            die "Error in git-update-index: $! $?\n";
518 519
        }
    }
520 521 522 523 524 525 526 527 528 529 530 531
    return 1; # we successfully applied the changeset
}

if ($opt_f) {
    print "Will import patchsets using the fast strategy\n",
            "Renamed directories and permission changes will be missed\n";
    *process_patchset = *process_patchset_fast;
} else {
    print "Using the default (accurate) import strategy.\n",
            "Things may be a bit slow\n";
    *process_patchset = *process_patchset_accurate;
}
Junio C Hamano's avatar
Junio C Hamano committed
532

533 534 535 536 537
foreach my $ps (@psets) {
    # process patchsets
    $ps->{branch} = git_branchname($ps->{id});

    #
Junio C Hamano's avatar
Junio C Hamano committed
538 539
    # ensure we have a clean state
    #
540 541 542 543 544
    if (my $dirty = `git-diff-files`) {
        die "Unclean tree when about to process $ps->{id} " .
            " - did we fail to commit cleanly before?\n$dirty";
    }
    die $! if $?;
Junio C Hamano's avatar
Junio C Hamano committed
545

546 547 548 549 550
    #
    # skip commits already in repo
    #
    if (ptag($ps->{id})) {
      $opt_v && print " * Skipping already imported: $ps->{id}\n";
551
      next;
552 553 554 555 556 557
    }

    print " * Starting to work on $ps->{id}\n";

    process_patchset($ps) or next;

Junio C Hamano's avatar
Junio C Hamano committed
558
    # warn "errors when running git-update-index! $!";
559
    my $tree = `git-write-tree`;
560 561
    die "cannot write tree $!" if $?;
    chomp $tree;
Junio C Hamano's avatar
Junio C Hamano committed
562

563 564 565 566
    #
    # Who's your daddy?
    #
    my @par;
567
    if ( -e "$git_dir/refs/heads/$ps->{branch}") {
568
        if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
569 570 571 572
            my $p = <HEAD>;
            close HEAD;
            chomp $p;
            push @par, '-p', $p;
Junio C Hamano's avatar
Junio C Hamano committed
573
        } else {
574 575 576 577 578
            if ($ps->{type} eq 's') {
                warn "Could not find the right head for the branch $ps->{branch}";
            }
        }
    }
Junio C Hamano's avatar
Junio C Hamano committed
579

580 581 582
    if ($ps->{merges}) {
        push @par, find_parents($ps);
    }
583

Junio C Hamano's avatar
Junio C Hamano committed
584
    #
585 586 587 588 589 590 591 592 593 594
    # Commit, tag and clean state
    #
    $ENV{TZ}                  = 'GMT';
    $ENV{GIT_AUTHOR_NAME}     = $ps->{author};
    $ENV{GIT_AUTHOR_EMAIL}    = $ps->{email};
    $ENV{GIT_AUTHOR_DATE}     = $ps->{date};
    $ENV{GIT_COMMITTER_NAME}  = $ps->{author};
    $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
    $ENV{GIT_COMMITTER_DATE}  = $ps->{date};

Junio C Hamano's avatar
Junio C Hamano committed
595
    my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par)
596
        or die $!;
597
    print WRITER $ps->{summary},"\n\n";
598 599 600 601 602

    # only print message if it's not empty, to avoid a spurious blank line;
    # also append an extra newline, so there's a blank line before the
    # following "git-archimport-id:" line.
    print WRITER $ps->{message},"\n\n" if ($ps->{message} ne "");
Junio C Hamano's avatar
Junio C Hamano committed
603

604 605
    # make it easy to backtrack and figure out which Arch revision this was:
    print WRITER 'git-archimport-id: ',$ps->{id},"\n";
Junio C Hamano's avatar
Junio C Hamano committed
606

607 608 609 610 611 612 613 614 615 616 617
    close WRITER;
    my $commitid = <READER>;    # read
    chomp $commitid;
    close READER;
    waitpid $pid,0;             # close;

    if (length $commitid != 40) {
        die "Something went wrong with the commit! $! $commitid";
    }
    #
    # Update the branch
Junio C Hamano's avatar
Junio C Hamano committed
618
    #
619
    open  HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
620 621
    print HEAD $commitid;
    close HEAD;
622
    system('git-update-ref', 'HEAD', "$ps->{branch}");
623 624 625 626 627 628 629 630 631

    # tag accordingly
    ptag($ps->{id}, $commitid); # private tag
    if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
        tag($ps->{id}, $commitid);
    }
    print " * Committed $ps->{id}\n";
    print "   + tree   $tree\n";
    print "   + commit $commitid\n";
632
    $opt_v && print "   + commit date is  $ps->{date} \n";
633
    $opt_v && print "   + parents:  ",join(' ',@par),"\n";
634 635 636 637 638 639 640 641 642 643 644 645 646
}

if ($opt_v) {
    foreach (sort keys %stats) {
        print" $_: $stats{$_}\n";
    }
}
exit 0;

# used by the accurate strategy:
sub sync_to_ps {
    my $ps = shift;
    my $tree_dir = $tmp.'/'.tree_dirname($ps->{id});
Junio C Hamano's avatar
Junio C Hamano committed
647

648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680
    $opt_v && print "sync_to_ps($ps->{id}) method: ";

    if (-d $tree_dir) {
        if ($ps->{type} eq 't') {
	    $opt_v && print "get (tag)\n";
            # looks like a tag-only or (worse,) a mixed tags/changeset branch,
            # can't rely on replay to work correctly on these
            rmtree($tree_dir);
            safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
            $stats{get_tag}++;
        } else {
                my $tree_id = arch_tree_id($tree_dir);
                if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) {
                    # the common case (hopefully)
		    $opt_v && print "replay\n";
                    safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
                    $stats{replay}++;
                } else {
                    # getting one tree is usually faster than getting two trees
                    # and applying the delta ...
                    rmtree($tree_dir);
		    $opt_v && print "apply-delta\n";
                    safe_pipe_capture($TLA,'get','--no-pristine',
                                        $ps->{id},$tree_dir);
                    $stats{get_delta}++;
                }
        }
    } else {
        # new branch work
        $opt_v && print "get (new tree)\n";
        safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
        $stats{get_new}++;
    }
Junio C Hamano's avatar
Junio C Hamano committed
681

682 683 684 685 686 687 688
    # added -I flag to rsync since we're going to fast! AIEEEEE!!!!
    system('rsync','-aI','--delete','--exclude',$git_dir,
#               '--exclude','.arch-inventory',
                '--exclude','.arch-ids','--exclude','{arch}',
                '--exclude','+*','--exclude',',*',
                "$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?";
    return $tree_dir;
689 690 691 692
}

sub apply_import {
    my $ps = shift;
693
    my $bname = git_branchname($ps->{id});
694

695
    mkpath($tmp);
696

697
    safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
Junio C Hamano's avatar
Junio C Hamano committed
698
    die "Cannot get import: $!" if $?;
699 700 701
    system('rsync','-aI','--delete', '--exclude',$git_dir,
		'--exclude','.arch-ids','--exclude','{arch}',
		"$tmp/import/", './');
702
    die "Cannot rsync import:$!" if $?;
Junio C Hamano's avatar
Junio C Hamano committed
703

704
    rmtree("$tmp/import");
705
    die "Cannot remove tempdir: $!" if $?;
Junio C Hamano's avatar
Junio C Hamano committed
706

707 708 709 710 711 712 713

    return 1;
}

sub apply_cset {
    my $ps = shift;

714
    mkpath($tmp);
715 716

    # get the changeset
717
    safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
718
    die "Cannot get changeset: $!" if $?;
Junio C Hamano's avatar
Junio C Hamano committed
719

720 721 722 723
    # apply patches
    if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
        # this can be sped up considerably by doing
        #    (find | xargs cat) | patch
724
        # but that can get mucked up by patches
Junio C Hamano's avatar
Junio C Hamano committed
725
        # with missing trailing newlines or the standard
726 727 728 729 730 731 732 733 734 735 736 737 738 739 740
        # 'missing newline' flag in the patch - possibly
        # produced with an old/buggy diff.
        # slow and safe, we invoke patch once per patchfile
        `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
        die "Problem applying patches! $!" if $?;
    }

    # apply changed binary files
    if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
        foreach my $mod (@modified) {
            chomp $mod;
            my $orig = $mod;
            $orig =~ s/\.modified$//; # lazy
            $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
            #print "rsync -p '$mod' '$orig'";
741
            system('rsync','-p',$mod,"./$orig");
742 743 744 745 746
            die "Problem applying binary changes! $!" if $?;
        }
    }

    # bring in new files
747
    system('rsync','-aI','--exclude',$git_dir,
Junio C Hamano's avatar
Junio C Hamano committed
748
		'--exclude','.arch-ids',
749 750
		'--exclude', '{arch}',
		"$tmp/changeset/new-files-archive/",'./');
751 752 753

    # deleted files are hinted from the commitlog processing

754
    rmtree("$tmp/changeset");
755 756 757 758
}


# =for reference
759 760 761
# notes: *-files/-directories keys cannot have spaces, they're always
# pika-escaped.  Everything after the first newline
# A log entry looks like:
762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778
# Revision: moodle-org--moodle--1.3.3--patch-15
# Archive: arch-eduforge@catalyst.net.nz--2004
# Creator: Penny Leach <penny@catalyst.net.nz>
# Date: Wed May 25 14:15:34 NZST 2005
# Standard-date: 2005-05-25 02:15:34 GMT
# New-files: lang/de/.arch-ids/block_glossary_random.php.id
#     lang/de/.arch-ids/block_html.php.id
# New-directories: lang/de/help/questionnaire
#     lang/de/help/questionnaire/.arch-ids
# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
#    db_sears.sql db/db_sears.sql
# Removed-files: lang/be/docs/.arch-ids/release.html.id
#     lang/be/docs/.arch-ids/releaseold.html.id
# Modified-files: admin/cron.php admin/delete.php
#     admin/editor.html backup/lib.php backup/restore.php
# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
779
#   summary can be multiline with a leading space just like the above fields
780 781 782 783
# Keywords:
#
# Updating yadda tadda tadda madda
sub parselog {
784 785 786 787 788 789 790 791 792 793 794 795
    my ($ps, $log) = @_;
    my $key = undef;

    # headers we want that contain filenames:
    my %want_headers = (
        new_files => 1,
        modified_files => 1,
        renamed_files => 1,
        renamed_directories => 1,
        removed_files => 1,
        removed_directories => 1,
    );
Junio C Hamano's avatar
Junio C Hamano committed
796

797 798 799 800 801 802
    chomp (@$log);
    while ($_ = shift @$log) {
        if (/^Continuation-of:\s*(.*)/) {
            $ps->{tag} = $1;
            $key = undef;
        } elsif (/^Summary:\s*(.*)$/ ) {
803 804
            # summary can be multiline as long as it has a leading space.
	    # we squeeze it onto a single line, though.
805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833
            $ps->{summary} = [ $1 ];
            $key = 'summary';
        } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
            $ps->{author} = $1;
            $ps->{email} = $2;
            $key = undef;
        # any *-files or *-directories can be read here:
        } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
            my $val = $2;
            $key = lc $1;
            $key =~ tr/-/_/; # too lazy to quote :P
            if ($want_headers{$key}) {
                push @{$ps->{$key}}, split(/\s+/, $val);
            } else {
                $key = undef;
            }
        } elsif (/^$/) {
            last; # remainder of @$log that didn't get shifted off is message
        } elsif ($key) {
            if (/^\s+(.*)$/) {
                if ($key eq 'summary') {
                    push @{$ps->{$key}}, $1;
                } else { # files/directories:
                    push @{$ps->{$key}}, split(/\s+/, $1);
                }
            } else {
                $key = undef;
            }
        }
834
    }
Junio C Hamano's avatar
Junio C Hamano committed
835

836 837 838 839 840 841 842 843 844 845 846 847
    # drop leading empty lines from the log message
    while (@$log && $log->[0] eq '') {
	shift @$log;
    }
    if (exists $ps->{summary} && @{$ps->{summary}}) {
	$ps->{summary} = join(' ', @{$ps->{summary}});
    }
    elsif (@$log == 0) {
	$ps->{summary} = 'empty commit message';
    } else {
	$ps->{summary} = $log->[0] . '...';
    }
848
    $ps->{message} = join("\n",@$log);
Junio C Hamano's avatar
Junio C Hamano committed
849

850 851 852
    # skip Arch control files, unescape pika-escaped files
    foreach my $k (keys %want_headers) {
        next unless (defined $ps->{$k});
853
        my @tmp = ();
854 855 856 857 858 859
        foreach my $t (@{$ps->{$k}}) {
           next unless length ($t);
           next if $t =~ m!\{arch\}/!;
           next if $t =~ m!\.arch-ids/!;
           # should we skip this?
           next if $t =~ m!\.arch-inventory$!;
860 861
           # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
           # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
862
           if ($t =~ /\\/ ){
863
               $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
864
           }
865
           push @tmp, $t;
866
        }
867
        $ps->{$k} = \@tmp;
868 869 870 871 872 873
    }
}

# write/read a tag
sub tag {
    my ($tag, $commit) = @_;
Junio C Hamano's avatar
Junio C Hamano committed
874

875 876 877
    if ($opt_o) {
        $tag =~ s|/|--|g;
    } else {
878 879 880
	my $patchname = $tag;
	$patchname =~ s/.*--//;
        $tag = git_branchname ($tag) . '--' . $patchname;
881
    }
Junio C Hamano's avatar
Junio C Hamano committed
882

883
    if ($commit) {
884
        open(C,">","$git_dir/refs/tags/$tag")
885 886 887 888 889
            or die "Cannot create tag $tag: $!\n";
        print C "$commit\n"
            or die "Cannot write tag $tag: $!\n";
        close(C)
            or die "Cannot write tag $tag: $!\n";
890
        print " * Created tag '$tag' on '$commit'\n" if $opt_v;
891
    } else {                    # read
892
        open(C,"<","$git_dir/refs/tags/$tag")
893 894 895 896 897 898 899 900 901 902 903 904 905 906
            or die "Cannot read tag $tag: $!\n";
        $commit = <C>;
        chomp $commit;
        die "Error reading tag $tag: $!\n" unless length $commit == 40;
        close(C)
            or die "Cannot read tag $tag: $!\n";
        return $commit;
    }
}

# write/read a private tag
# reads fail softly if the tag isn't there
sub ptag {
    my ($tag, $commit) = @_;
907 908

    # don't use subdirs for tags yet, it could screw up other porcelains
Junio C Hamano's avatar
Junio C Hamano committed
909 910
    $tag =~ s|/|,|g;

911 912 913
    my $tag_file = "$ptag_dir/$tag";
    my $tag_branch_dir = dirname($tag_file);
    mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
914 915

    if ($commit) {              # write
916
        open(C,">",$tag_file)
917 918 919 920 921
            or die "Cannot create tag $tag: $!\n";
        print C "$commit\n"
            or die "Cannot write tag $tag: $!\n";
        close(C)
            or die "Cannot write tag $tag: $!\n";
Junio C Hamano's avatar
Junio C Hamano committed
922
	$rptags{$commit} = $tag
923
	    unless $tag =~ m/--base-0$/;
924 925
    } else {                    # read
        # if the tag isn't there, return 0
926
        unless ( -s $tag_file) {
927 928
            return 0;
        }
929
        open(C,"<",$tag_file)
930 931 932 933 934 935
            or die "Cannot read tag $tag: $!\n";
        $commit = <C>;
        chomp $commit;
        die "Error reading tag $tag: $!\n" unless length $commit == 40;
        close(C)
            or die "Cannot read tag $tag: $!\n";
936 937 938
	unless (defined $rptags{$commit}) {
	    $rptags{$commit} = $tag;
	}
939 940 941
        return $commit;
    }
}
942 943 944 945 946 947

sub find_parents {
    #
    # Identify what branches are merging into me
    # and whether we are fully merged
    # git-merge-base <headsha> <headsha> should tell
Junio C Hamano's avatar
Junio C Hamano committed
948
    # me what the base of the merge should be
949 950 951 952 953 954 955 956 957 958 959 960 961
    #
    my $ps = shift;

    my %branches; # holds an arrayref per branch
                  # the arrayref contains a list of
                  # merged patches between the base
                  # of the merge and the current head

    my @parents;  # parents found for this commit

    # simple loop to split the merges
    # per branch
    foreach my $merge (@{$ps->{merges}}) {
962
	my $branch = git_branchname($merge);
963 964 965 966 967 968 969
	unless (defined $branches{$branch} ){
	    $branches{$branch} = [];
	}
	push @{$branches{$branch}}, $merge;
    }

    #
Junio C Hamano's avatar
Junio C Hamano committed
970
    # foreach branch find a merge base and walk it to the
971 972 973 974 975 976
    # head where we are, collecting the merged patchsets that
    # Arch has recorded. Keep that in @have
    # Compare that with the commits on the other branch
    # between merge-base and the tip of the branch (@need)
    # and see if we have a series of consecutive patches
    # starting from the merge base. The tip of the series
Junio C Hamano's avatar
Junio C Hamano committed
977
    # of consecutive patches merged is our new parent for
978 979 980
    # that branch.
    #
    foreach my $branch (keys %branches) {
981 982 983 984

	# check that we actually know about the branch
	next unless -e "$git_dir/refs/heads/$branch";

985
	my $mergebase = `git-merge-base $branch $ps->{branch}`;
Junio C Hamano's avatar
Junio C Hamano committed
986 987 988 989 990 991 992
	if ($?) {
	    # Don't die here, Arch supports one-way cherry-picking
	    # between branches with no common base (or any relationship
	    # at all beforehand)
	    warn "Cannot find merge base for $branch and $ps->{branch}";
	    next;
	}
993 994 995 996
	chomp $mergebase;

	# now walk up to the mergepoint collecting what patches we have
	my $branchtip = git_rev_parse($ps->{branch});
997
	my @ancestors = `git-rev-list --topo-order $branchtip ^$mergebase`;
998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016
	my %have; # collected merges this branch has
	foreach my $merge (@{$ps->{merges}}) {
	    $have{$merge} = 1;
	}
	my %ancestorshave;
	foreach my $par (@ancestors) {
	    $par = commitid2pset($par);
	    if (defined $par->{merges}) {
		foreach my $merge (@{$par->{merges}}) {
		    $ancestorshave{$merge}=1;
		}
	    }
	}
	# print "++++ Merges in $ps->{id} are....\n";
	# my @have = sort keys %have;	print Dumper(\@have);

	# merge what we have with what ancestors have
	%have = (%have, %ancestorshave);

Junio C Hamano's avatar
Junio C Hamano committed
1017
	# see what the remote branch has - these are the merges we
1018 1019
	# will want to have in a consecutive series from the mergebase
	my $otherbranchtip = git_rev_parse($branch);
1020
	my @needraw = `git-rev-list --topo-order $otherbranchtip ^$mergebase`;
1021 1022 1023 1024
	my @need;
	foreach my $needps (@needraw) { 	# get the psets
	    $needps = commitid2pset($needps);
	    # git-rev-list will also
Junio C Hamano's avatar
Junio C Hamano committed
1025
	    # list commits merged in via earlier
1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060
	    # merges. we are only interested in commits
	    # from the branch we're looking at
	    if ($branch eq $needps->{branch}) {
		push @need, $needps->{id};
	    }
	}

	# print "++++ Merges from $branch we want are....\n";
	# print Dumper(\@need);

	my $newparent;
	while (my $needed_commit = pop @need) {
	    if ($have{$needed_commit}) {
		$newparent = $needed_commit;
	    } else {
		last; # break out of the while
	    }
	}
	if ($newparent) {
	    push @parents, $newparent;
	}


    } # end foreach branch

    # prune redundant parents
    my %parents;
    foreach my $p (@parents) {
	$parents{$p} = 1;
    }
    foreach my $p (@parents) {
	next unless exists $psets{$p}{merges};
	next unless ref    $psets{$p}{merges};
	my @merges = @{$psets{$p}{merges}};
	foreach my $merge (@merges) {
Junio C Hamano's avatar
Junio C Hamano committed
1061
	    if ($parents{$merge}) {
1062 1063 1064 1065
		delete $parents{$merge};
	    }
	}
    }
1066

1067 1068 1069 1070
    @parents = ();
    foreach (keys %parents) {
        push @parents, '-p', ptag($_);
    }
1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085
    return @parents;
}

sub git_rev_parse {
    my $name = shift;
    my $val  = `git-rev-parse $name`;
    die "Error: git-rev-parse $name" if $?;
    chomp $val;
    return $val;
}

# resolve a SHA1 to a known patchset
sub commitid2pset {
    my $commitid = shift;
    chomp $commitid;
Junio C Hamano's avatar
Junio C Hamano committed
1086
    my $name = $rptags{$commitid}
1087
	|| die "Cannot find reverse tag mapping for $commitid";
1088
    $name =~ s|,|/|;
Junio C Hamano's avatar
Junio C Hamano committed
1089
    my $ps   = $psets{$name}
1090 1091 1092
	|| (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
    return $ps;
}
1093

1094

1095
# an alternative to `command` that allows input to be passed as an array
1096 1097 1098 1099 1100 1101 1102
# to work around shell problems with weird characters in arguments
sub safe_pipe_capture {
    my @output;
    if (my $pid = open my $child, '-|') {
        @output = (<$child>);
        close $child or die join(' ',@_).": $! $?";
    } else {
1103
	exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
1104 1105 1106 1107
    }
    return wantarray ? @output : join('',@output);
}

1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118
# `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
sub arch_tree_id {
    my $dir = shift;
    chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
    return $ret;
}

sub archive_reachable {
    my $archive = shift;
    return 1 if $reachable{$archive};
    return 0 if $unreachable{$archive};
Junio C Hamano's avatar
Junio C Hamano committed
1119

1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133
    if (system "$TLA whereis-archive $archive >/dev/null") {
        if ($opt_a && (system($TLA,'register-archive',
                      "http://mirrors.sourcecontrol.net/$archive") == 0)) {
            $reachable{$archive} = 1;
            return 1;
        }
        print STDERR "Archive is unreachable: $archive\n";
        $unreachable{$archive} = 1;
        return 0;
    } else {
        $reachable{$archive} = 1;
        return 1;
    }
}