Commit f2de7e9d authored by Arne Köhn's avatar Arne Köhn

sample configuration file, first steps for job scheduling

parent 43391782
# This is the configuration file for train-evaluate.pl.
#
# Copyright (C) 2009 by Arne Köhn licensed under the GPLv2 or later.
#
# You can also use and reproduce this program under the same terms as
# Perl itself.
# the corpus. Format is some negra export format for now.
corpus = /path/to/negra-corpus.export
# The database file to use
dbfile = database.db
# the name of the taggers to use, as csv
taggers = tnt-standard
# The hosts on which the work should be done. You need to have
# ssh-access with a passwordless public key to those hosts. You can
# also use 'local' to enable running locally, without ssh. If the same
# host is listed multiple times, it will be used for multiple
# processes.
# run three processes on the local machine
hosts = local,local,local
......@@ -9,6 +9,8 @@ use warnings;
use strict;
use Getopt::Long;
use Curses::UI;
use threads;
use Thread::Queue;
# This imports POSTaggerEvaluator::NegraProcess
require 'negra-sentence-selector.pl';
......@@ -21,6 +23,7 @@ my $pick_random;
my $cui = Curses::UI->new (clear_on_exit => 1, -color_support => 1);
sub exit_dialog() {
my $return = $cui->dialog(
-message => "Do you really want to quit?",
......@@ -62,16 +65,64 @@ my $win1 = $cui->add(
$cui->set_binding(sub {$menu->focus()}, "\cT");
$cui->set_binding( \&exit_dialog , "\cQ");
# GetOptions('corpus=s'=>\$corpus,
# 'num=i'=>\$numsentences,
# 'train'=>\$do_train,
# 'evaluate'=>\$do_evaluate,
# 'tagger'=>\$tagger,
# 'random'=>\$pick_random
# );
my $statuslabel = $win1->add(
'statuslabel', 'Label',
-text => 'Hello, world!',
-bold => 1,
-width => 40,
);
my $taggerbox = $win1->add(
'taggerbox', 'Listbox',
-values => [],
-multi => 1,
-y => 2,
-height => 10,
-border => 1,
-title => 'your taggers',
-vscrollbar => 'right',
);
my $tsetbox = $win1->add(
'tsetbox', 'Listbox',
-values => [],
-multi => 1,
-y => 12,
-height => 10,
-border => 1,
-title => 'your tsets',
-vscrollbar => 'right',
);
## BEGIN JOBMANAGEMENT
my $running_jobs = Thread::Queue->new();
my $pending_jobs = Thread::Queue->new();
my $job_results = Thread::Queue->new();
my $free_hosts = Thread::Queue->new();
sub manage_jobs {
my $num_pending = $pending_jobs->pending();
while ( $num_pending and $free_hosts->pending()) {
my $host = $free_hosts->dequeue();
my %job = $pending_jobs->dequeue();
threads->create(\&train_tagger, $job{'tagger'}, $job{'tset'}, $host);
$running_jobs->enqueue(%job);
$num_pending = $pending_jobs->pending();
}
my $num_running = $running_jobs->pending();
if ( $num_pending or $num_running) {
$statuslabel->text("$num_running Jobs running, $num_pending Jobs pending,");
} else {
$statuslabel->text("No jobs running...");
}
while ( $job_results->pending()) {
my %job = $job_results->dequeue();
$free_hosts->enqueue($job{'host'});
}
}
$cui->add_callback ('manage_jobs',\&manage_jobs);
## END JOBMANAGEMENT
# die "Nothing to do..."if not ($do_train or $do_evaluate);
# die "No tagger given" if not $tagger;
my %cfg;
......@@ -85,22 +136,41 @@ while (<CONFIG>) {
next unless length; # anything left?
my ($var, $value) = split(/\s*=\s*/, $_, 2);
$cfg{$var} = $value;
}
};
# Connect to the db and create the necessary tables if they don't
# exist already
my $dbh = DBI->connect("dbi:SQLite:dbname=$cfg{'dbfile'}","","");
$dbh->do('CREATE TABLE IF NOT EXISTS tsets
(id INTEGER PRIMARY KEY ASC,
numtrain INTEGER,
created_on DEFAULT CURRENT_TIMESTAMP,
infoblob DEFAULT NULL);');
$dbh->do('CREATE TABLE IF NOT EXISTS taggers
(id INTEGER PRIMARY KEY ASC, name);');
$dbh->do('CREATE TABLE IF NOT EXISTS train
(id INTEGER PRIMARY KEY ASC,
tagger INTEGER,
tset INTEGER);');
my @taggers;
@taggers=split(/,/,$cfg{'taggers'});
$taggerbox->values(@taggers);
$free_hosts->enqueue(split(/,/,$cfg{'hosts'}));
sub create_db() {
$dbh->do('CREATE TABLE IF NOT EXISTS tsets (id INTEGER PRIMARY KEY ASC, numtrain INTEGER, created_on DEFAULT CURRENT_TIMESTAMP, infoblob DEFAULT NULL');
}
sub train {
sub train_tagger {
my $tagger = shift;
my $tset = shift;
my $host = shift;
if ( $host eq 'local') {
# TODO: measure time of execution
`taggers/$tagger/train tfiles/$tset/train`;
$job_results->enqueue(($tagger, $tset, $host));
} else {
die "train not yet implemented";
}
}
sub get_or_restore_tset {
......@@ -143,7 +213,6 @@ sub create_tset {
);
close $test;
close $train;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment