Commit 75eb581c authored by Arne Köhn's avatar Arne Köhn

Testing of results now works, new tagger svmt-standard is able to train, but not test.

parent b3c6f0ae
#! /bin/sh
# usage: train [trainfile] [modelname]
# where modelname is the name of this experiment
WORKDIR=$(pwd)/$(dirname $0)
cd $WORKDIR
# with this you get $SVMDIR
. ../svmt-config
if [ ! -d tmpfiles ]
then
mkdir tmpfiles
fi
if [ ! -d models ]
then
mkdir models
fi
cat > tmpfiles/config-$2.tmp <<EOF
TRAINSET = $1
SVMDIR = $SVMDIR
NAME = $2
do M0 LRL
EOF
cd models
s
$SVMTOOLDIR/SVMTlearn -V 1 $WORKDIR/tmpfiles/config-$2.tmp
\ No newline at end of file
......@@ -11,8 +11,11 @@ corpus = /path/to/negra-corpus.export
# The database file to use
dbfile = database.db
# path to SVMTeval, used for evaluating the taggers
svmteval = /home/arne/uni/bachelorarbeit/svm/SVMTool-1.3/bin/SVMTeval
# the name of the taggers to use, as csv
taggers = tnt-standard
taggers = svmt-standard,tnt-standard
# The hosts on which the work should be done. You need to have
# ssh-access with a passwordless public key to those hosts. You can
......
......@@ -101,7 +101,11 @@ my $buttonbox = $win1->add(
{
-label => 'train',
-onpress => \&schedule_train,
}
},
{
-label => 'test',
-onpress => \&schedule_test,
},
],
-y => 22,
);
......@@ -137,7 +141,16 @@ $dbh->do('CREATE TABLE IF NOT EXISTS train
(id INTEGER PRIMARY KEY ASC,
tagger TEXT,
tset INTEGER);');
$dbh->do('CREATE TABLE IF NOT EXISTS results
(id INTEGER PRIMARY KEY ASC,
tagger TEXT,
tset INTEGER,
tagtime REAL,
setuptime REAL,
result REAL,
infoblob TEXT,
created_on DEFAULT CURRENT_TIMESTAMP);');
my @taggers;
@taggers=split(/,/,$cfg{'taggers'});
......@@ -155,8 +168,14 @@ sub manage_jobs {
while ( $num_pending and $free_hosts->pending()) {
my $host = $free_hosts->dequeue();
my $job = $pending_jobs->dequeue();
push @threadlist,
threads->create(\&train_tagger, $job->{'tagger'}, $job->{'tset'}, $host);
if ($job->{'type'} eq 'train') {
push @threadlist,
threads->create(\&train_tagger, $job->{'tagger'}, $job->{'tset'}, $host);
}
if ( $job->{'type'} eq 'test') {
push @threadlist,
threads->create(\&test_tagger, $job->{'tagger'}, $job->{'tset'}, $host);
}
$num_running++;
$num_pending = $pending_jobs->pending();
}
......@@ -170,14 +189,34 @@ sub manage_jobs {
}
while ( $job_results->pending()) {
my $job = $job_results->dequeue();
$free_hosts->enqueue($job->{'host'});
$num_running--;
my $tagger = $job->{'tagger'};
my $tset = $job->{'tset'};
process_jobresult($job);
}
}
sub process_jobresult {
my $job = shift;
$free_hosts->enqueue($job->{'host'});
$num_running--;
my $type = $job->{'type'};
my $tagger = $job->{'tagger'};
my $tset = $job->{'tset'};
if ( $type eq 'train' ) {
$dbh->do("INSERT INTO train (tagger, tset) VALUES ('$tagger', $tset);");
die "negative running jobs?!" if ( $num_running<0);
}
elsif ( $type eq 'test') {
my $tagtime = $job->{'tagtime'};
my $setuptime = $job->{'setuptime'};
# FIXME warn if svmt-standard is not trained on this tset
my $info = `$cfg{'svmteval'} 0 \$(pwd)/taggers/svmt-standard/models/$tset tfiles/$tset/test tmp/$tagger-$tset`;
$info =~ m/=== OVERALL ACCURACY ===.*? (\d+\.\d+)%/s;
my $result = $1;
$dbh->do("INSERT INTO results
(tagger, tset, tagtime, setuptime, result, infoblob) VALUES
('$tagger','$tset','$tagtime','$setuptime','$result','$info');");
}
die "negative running jobs?!" if ( $num_running<0);
}
$cui->set_timer('manage_jobs',\&manage_jobs,1);
## END JOBMANAGEMENT
......@@ -205,7 +244,16 @@ update_tsetbox();
sub schedule_train {
foreach my $tagger ($taggerbox->get() ) {
foreach my $tset ($tsetbox->get()) {
$pending_jobs->enqueue({'tagger'=>$tagger, 'tset'=>$tset});
$pending_jobs->enqueue({'tagger'=>$tagger, 'tset'=>$tset, 'type'=>'train'});
}
}
manage_jobs();
}
sub schedule_test {
foreach my $tagger ($taggerbox->get() ) {
foreach my $tset ($tsetbox->get()) {
$pending_jobs->enqueue({'tagger'=>$tagger, 'tset'=>$tset, 'type'=>'test'});
}
}
manage_jobs();
......@@ -220,7 +268,35 @@ sub train_tagger {
my $command = "sh taggers/$tagger/train \$(pwd)/tfiles/$tset/train $tset";
print STDERR "running ",$command;
`$command`;
$job_results->enqueue({'tagger'=>$tagger,'tset'=> $tset,'host'=> $host});
$job_results->enqueue({'type'=>'train','tagger'=>$tagger,'tset'=> $tset,'host'=> $host});
} else {
die "remote train not yet implemented";
}
}
sub test_tagger {
my $tagger = shift;
my $tset = shift;
my $host = shift;
if ( $host eq 'local') {
my $command = "sh taggers/$tagger/tag $tset \$(pwd)/tfiles/$tset/test \$(pwd)/tmp/$tagger-$tset";
print STDERR "running ",$command;
my $time = `$command`;
my $setuptime = -1;
my $tagtime = -1;
if ($time =~ m/SETUPTIME=(\d+)/) {
$setuptime = $1
}
if ($time =~ m/TAGTIME=(\d+)/) {
$tagtime = $1
}
$job_results->enqueue({'type'=>'test',
'tagger'=>$tagger,
'tset'=> $tset,
'host'=> $host,
'tagtime'=> $tagtime,
'setuptime'=> $setuptime,
});
} else {
die "remote train not yet implemented";
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment