Commit b4353e5d authored by Arne Koehn's avatar Arne Koehn

added some utility programs

parent 17eb16ae
#! /usr/bin/env python
from sqlobject import *
import sys,os,re
db_filename = os.path.abspath(sys.argv[1])
connection_string = 'sqlite:' + db_filename
connection = connectionForURI(connection_string)
sqlhub.processConnection = connection
class Heise(SQLObject):
class sqlmeta:
lazyUpdate = True
tagger = StringCol()
tset = IntCol()
tagtime = FloatCol()
setuptime = FloatCol()
result = FloatCol()
infoblob = StringCol()
createdOn = TimestampCol()
unknown = FloatCol()
knownambig = FloatCol()
knownunambig = FloatCol()
unknown = re.compile(r'==== unknown =*\n\s*\d+\s+\d+\s+(\d+\.?\d*)%')
knownambig = re.compile(r'--- known ambiguous tokens -*\n\s*\d+\s+\d+\s+(\d+\.?\d*)%')
knownunambig = re.compile(r'--- known unambiguous tokens -*\n\s*\d+\s+\d+\s+(\d+\.?\d*)%')
for res in Heise.select():
print res.tagger
res.unknown = float(unknown.search(res.infoblob).groups()[0])
res.knownambig = float(knownambig.search(res.infoblob).groups()[0])
res.knownunambig = float(knownunambig.search(res.infoblob).groups()[0])
res.syncUpdate()
print len(res)
# inst.syncUpdate()
# unknown.search(Results.get(1).infoblob).groups()[0]
#! /usr/bin/env python
def get_errors(goldfile, testfile):
numgold = {}
numtest = {}
errors = {}
while True:
# get the tags - gt = gold, tt = test
try:
gl = goldfile.readline()
tl = testfile.readline()
if not gl:
break # eof
(gword, gt) = gl.strip().split(" ")
(tword, tt) = tl.strip().split(" ")
if tword != gword: # just to be safe
exit("words don't match!")
except:
continue # empty line
try:
numgold[gt] += 1
except:
numgold[gt] = 1
try:
numtest[tt] += 1
except:
numtest[tt] = 1
if (gt != tt):
try:
errors[gt+" tagged as "+tt] += 1
except:
errors[gt+" tagged as "+tt] = 1
return (numgold, numtest, errors)
if __name__ == "__main__":
import os,sys
from operator import itemgetter
goldfile = open(os.path.abspath(sys.argv[1]))
testfile = open(os.path.abspath(sys.argv[2]))
(g,t,e) = get_errors(goldfile, testfile)
for error in sorted(e.iteritems(), key=itemgetter(1), reverse=True):
print error
#!/usr/bin/perl -w
# plot2latextable.pl --- Converts the gnuplot plot files to a latex table
# Author: Arne Köhn <arne@arne-koehn.de>
# Created: 05 Oct 2009
# Version: 0.01
use warnings;
use strict;
my $dir = $ARGV[0];
chdir $dir;
my @files = split(/^/m,`ls | grep -v plot`);
foreach my $tagger (@files) {
chomp $tagger;
print $tagger;
open my $fh, '<', $tagger;
while (<$fh>) {
$_ =~ s/\d+ ([^ ]+) .*/$1/;
chomp;
print " & ", $_;
}
print " \\\\ \n";
}
print $files[1];
__END__
=head1 NAME
plot2latextable.pl - Describe the usage of script briefly
=head1 SYNOPSIS
plot2latextable.pl [options] args
-opt --long Option description
=head1 DESCRIPTION
Stub documentation for plot2latextable.pl,
=head1 AUTHOR
Arne Köhn, E<lt>arne@arne-koehn.deE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2009 by Arne Köhn
This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.2 or,
at your option, any later version of Perl 5 you may have available.
Do what you want with this Program. GPLv3 or Later, Apache, PD...
=head1 BUGS
None reported... yet.
=cut
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment