Commit 087e65b6 authored by Arne Koehn's avatar Arne Koehn

negra-sentence-selector: don't remove $ in tags, more documentation

parent 708d218e
......@@ -35,14 +35,14 @@ sub main {
);
pod2usage(1) if $help;
pod2usage(2) if $man;
pod2usage(3) if $man;
# this is useless atm. Could be used when the script schould write to
# just one file.
die "ERROR: testfile is useless if all sentences go to the trainfile!"
if ( $testfilename && ! $numsentences);
open(my $inputfile, $infile) or die "couldn't open input file!\n";
open(my $inputfile, $infile) or die "couldn't open input file! Use --help for help\n";
open(my $goldfile, '>', $goldfilename);
open(my $testfile, '>', $testfilename);
open(my $trainfile, '>', $trainfilename);
......@@ -74,7 +74,7 @@ sub process {
my $n = 0; # the sentence number
while ($line = <$inputfile>) {
if ($line !~ /^[#*%].*/) {
$line =~ s/\t\$/\t/;
# $line =~ s/\t\$/\t/;
$line = join(' ', (split(/\s+/, $line))[0,1])."\n";
$sentences[$n] .= $line;
if ($line =~ /\.$/) {
......@@ -114,7 +114,7 @@ training out of a file that is in negra format.
=head1 SYNOPSIS
negra-sentence-selector.pl -in infile -out outfile [options]
negra-sentence-selector.pl -in infile -trainfile trainfile -testfile testfile -goldfile goldfile [options]
Options:
......@@ -132,6 +132,16 @@ Options:
-testfile [file]: This is where the test sentences go to
-goldfile [file]: This is where the gold sentences go to
=head1 TRAIN-TEST-GOLD
The train file contains sentences with tags for training
The test file contains all other sentences without tags for testing
The gold file contains the same sentences as test, but with tags
=head1 AUTHOR
Arne Köhn, E<lt>arne@arne-koehn.eeE<gt>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment