negra-sentence-selector: don't remove $ in tags, more documentation

......@@ -35,14 +35,14 @@ sub main {
pod2usage(1) if $help;
pod2usage(2) if $man;
pod2usage(3) if $man;
# this is useless atm. Could be used when the script schould write to
# just one file.
die "ERROR: testfile is useless if all sentences go to the trainfile!"
if ( $testfilename && ! $numsentences);
open(my $inputfile, $infile) or die "couldn't open input file!\n";
open(my $inputfile, $infile) or die "couldn't open input file! Use --help for help\n";
open(my $goldfile, '>', $goldfilename);
open(my $testfile, '>', $testfilename);
open(my $trainfile, '>', $trainfilename);
......@@ -74,7 +74,7 @@ sub process {
my $n = 0; # the sentence number
while ($line = <$inputfile>) {
if ($line !~ /^[#*%].*/) {
$line =~ s/\t\$/\t/;
# $line =~ s/\t\$/\t/;
$line = join(' ', (split(/\s+/, $line))[0,1])."\n";
$sentences[$n] .= $line;
if ($line =~ /\.$/) {
......@@ -114,7 +114,7 @@ training out of a file that is in negra format.
=head1 SYNOPSIS -in infile -out outfile [options] -in infile -trainfile trainfile -testfile testfile -goldfile goldfile [options]
......@@ -132,6 +132,16 @@ Options:
-testfile [file]: This is where the test sentences go to
-goldfile [file]: This is where the gold sentences go to
The train file contains sentences with tags for training
The test file contains all other sentences without tags for testing
The gold file contains the same sentences as test, but with tags
=head1 AUTHOR
Arne Köhn, E<lt>arne@arne-koehn.eeE<gt>
