Commit d36757e3 authored by Arne Köhn's avatar Arne Köhn

more tagger wrappers

parent 703a27e5
#! /bin/sh
# usage: test [modelname] [inputfile] [outputfile]
abspath="$(cd "${0%/*}" 2>/dev/null; echo "$PWD"/"${0##*/}")"
WORKDIR=$(dirname $abspath)
cd $WORKDIR
cat $2 | ../incr-helper/taginput | ../hunpos/tagger.native $WORKDIR/../hunpos-standard/models/$1 2>tmp-out-$1 | ../incr-helper/tagoutput --l 1 |sed 's/\([^\t ]*[\t ]*[^\t ]*\).*/\1/'| sed 's/\t/ /g'| sed 's/ */ /g' >$3
TAGTIME=$(grep "Tagging: " tmp-out-$1)
SETUPTIME=$(grep "Setup: " tmp-out-$1)
echo SETUPTIME=$(echo $SETUPTIME | sed "s/.* \([0-9:\.]*\).*/\1/")
echo TAGTIME=$(echo $TAGTIME | sed "s/.*real \([0-9:\.]*\).*/\1/")
rm tmp-out-$1
#! /bin/sh
# usage: train [trainfile] [modelname]
# where modelname is the name of this experiment
echo "ERROR hunpos-incremental uses svmt-standard's models. No need for training!"
#! /bin/sh
# usage: test [modelname] [inputfile] [outputfile]
abspath="$(cd "${0%/*}" 2>/dev/null; echo "$PWD"/"${0##*/}")"
WORKDIR=$(dirname $abspath)
cd $WORKDIR
cat $2 | ../incr-helper/taginput | ../hunpos/tagger.native $WORKDIR/../hunpos-standard/models/$1 2>tmp-out-$1 | ../incr-helper/tagoutput --l 2 |sed 's/\([^\t ]*[\t ]*[^\t ]*\).*/\1/'| sed 's/\t/ /g'| sed 's/ */ /g' >$3
TAGTIME=$(grep "Tagging: " tmp-out-$1)
SETUPTIME=$(grep "Setup: " tmp-out-$1)
echo SETUPTIME=$(echo $SETUPTIME | sed "s/.* \([0-9:\.]*\).*/\1/")
echo TAGTIME=$(echo $TAGTIME | sed "s/.*real \([0-9:\.]*\).*/\1/")
rm tmp-out-$1
#! /bin/sh
# usage: train [trainfile] [modelname]
# where modelname is the name of this experiment
echo "ERROR hunpos-incremental uses svmt-standard's models. No need for training!"
This directory should contain
- tagger.native
- trainer.native
HunPOS can be obtained from http://gitorious.org/hunpos
#! /bin/sh
# usage: test [modelname] [inputfile] [outputfile]
abspath="$(cd "${0%/*}" 2>/dev/null; echo "$PWD"/"${0##*/}")"
WORKDIR=$(dirname $abspath)
cd $WORKDIR
cat $2 | ../hunpos/tagger.native -z -c incremental $WORKDIR/../hunpos-standard/models/$1 2>tmp-out-$1 | sed 's/\(\t[^ \t]*\) [0-9.e-]*/\1/g' | sed 's/\t/ /g'| sed 's/ */ /g' >$3
TAGTIME=$(grep "Tagging: " tmp-out-$1)
SETUPTIME=$(grep "Setup: " tmp-out-$1)
echo SETUPTIME=$(echo $SETUPTIME | sed "s/.* \([0-9:\.]*\).*/\1/")
echo TAGTIME=$(echo $TAGTIME | sed "s/.*real \([0-9:\.]*\).*/\1/")
rm tmp-out-$1
#! /bin/sh
# usage: train [trainfile] [modelname]
# where modelname is the name of this experiment
echo "ERROR hunpos-incremental uses svmt-standard's models. No need for training!"
#! /usr/bin/env python
# usage: test [modelname] [inputfile] [outputfile]
import sys, os, re, subprocess
wordtagre = re.compile(r'^([^ ]*) ([^ ]*)')
tagsre = re.compile(r' ([^ _]*)_([^ \n]*)')
modelname = sys.argv[1]
inputfile = sys.argv[2]
outputfile = sys.argv[3]
out = open(outputfile,'w')
num = int(sys.argv[4])
workdir = os.path.abspath(os.path.dirname(sys.argv[0]))
# I'm lazy: hardcode the directories
svmdir = "/home/arne/uni/bachelorarbeit/svm/svmlight/"
svmtooldir = "/home/arne/uni/bachelorarbeit/svm/SVMTool-1.3/bin/"
comm = "%s/SVMTagger -A %s/../svmt-c_values-no_lookahead/models/%s"% (svmtooldir,workdir,modelname)
# print comm
res = subprocess.Popen(comm, shell=True, stdin = open(inputfile), stdout=subprocess.PIPE)
# res.wait()
def outfunc(res,num):
for l in res:
if l == "\n":
out.write("\n")
continue
try:
m = wordtagre.match(l)
word = m.groups()[0]
except:
print "ERROR",l,"ERROR"
continue
end = m.end()
tags = tagsre.findall(l[end:])
tags.sort(cmp = lambda x,y: cmp(float(y[1]),float(x[1])))
out.write(word+" "+" ".join([x[0] for x in tags[:num]])+"\n")
outfunc(res.stdout.readlines(),num)
# for l in inputfile.readlines():
# abspath="$(cd "${0%/*}" 2>/dev/null; echo "$PWD"/"${0##*/}")"
# WORKDIR=$(dirname $abspath)
# cd $WORKDIR
# . ../svmt-config
# $SVMTOOLDIR/SVMTagger -T 5 -S GLRL $WORKDIR/../svmt-c_values/models/$1 <$2 2>tmp-out-$1 >$3
# SETUPTIME=$(grep "START-UP: " tmp-out-$1)
# TAGTIME=$(grep "TAGGING: " tmp-out-$1)
# echo SETUPTIME=$(echo $SETUPTIME | sed "s/.* START-UP: \([0-9\.]*\).*/\1/")
# echo TAGTIME=$(echo $TAGTIME | sed "s/.* TAGGING: \([0-9\.]*\).*/\1/")
# rm tmp-out-$1
#! /bin/sh
# usage: train [trainfile] [modelname]
# where modelname is the name of this experiment
echo "ERROR svmt-c_values-viterbi uses svmt-c_values' models. No need for training!"
#! /bin/sh
# usage: tag [modelname] [inputfile] [outputfile]
abspath="$(cd "${0%/*}" 2>/dev/null; echo "$PWD"/"${0##*/}")"
WORKDIR=$(dirname $abspath)
cd $WORKDIR
../incr-helper/taginput < $2 | ../tnt/tnt $WORKDIR/../tnt-standard/models/$1 - 2>tmp-out-$1 | ../incr-helper/tagoutput --l 1 | grep -v "^%% " | sed "s/\t/ /g"| sed "s/ */ /g" >$3
TAGTIME=$(grep "Tagging: " tmp-out-$1)
SETUPTIME=$(grep "Setup: " tmp-out-$1)
echo SETUPTIME=$(echo $SETUPTIME | sed "s/.* \([0-9:\.]*\).*/\1/")
echo TAGTIME=$(echo $TAGTIME | sed "s/.*real \([0-9:\.]*\).*/\1/")
rm tmp-out-$1
#! /bin/sh
echo needs tnt-standard
#! /bin/sh
# usage: tag [modelname] [inputfile] [outputfile]
abspath="$(cd "${0%/*}" 2>/dev/null; echo "$PWD"/"${0##*/}")"
WORKDIR=$(dirname $abspath)
cd $WORKDIR
../incr-helper/taginput < $2 | ../tnt/tnt $WORKDIR/../tnt-standard/models/$1 - 2>tmp-out-$1 | ../incr-helper/tagoutput --l 2| grep -v "^%% " | sed "s/\t/ /g"| sed "s/ */ /g" >$3
TAGTIME=$(grep "Tagging: " tmp-out-$1)
SETUPTIME=$(grep "Setup: " tmp-out-$1)
echo SETUPTIME=$(echo $SETUPTIME | sed "s/.* \([0-9:\.]*\).*/\1/")
echo TAGTIME=$(echo $TAGTIME | sed "s/.*real \([0-9:\.]*\).*/\1/")
rm tmp-out-$1
#! /bin/sh
echo needs tnt-standard
#! /bin/sh
# usage: tag [modelname] [inputfile] [outputfile]
abspath="$(cd "${0%/*}" 2>/dev/null; echo "$PWD"/"${0##*/}")"
WORKDIR=$(dirname $abspath)
cd $WORKDIR
../incr-helper/taginput < $2 | ../tnt/tnt $WORKDIR/../tnt-standard/models/$1 - 2>tmp-out-$1 | ../incr-helper/tagoutput | grep -v "^%% " | sed "s/\t/ /g"| sed "s/ */ /g" >$3
TAGTIME=$(grep "Tagging: " tmp-out-$1)
SETUPTIME=$(grep "Setup: " tmp-out-$1)
echo SETUPTIME=$(echo $SETUPTIME | sed "s/.* \([0-9:\.]*\).*/\1/")
echo TAGTIME=$(echo $TAGTIME | sed "s/.*real \([0-9:\.]*\).*/\1/")
rm tmp-out-$1
#! /bin/sh
echo needs tnt-standard
#! /bin/sh
# usage: test [modelname] [inputfile] [outputfile]
abspath="$(cd "${0%/*}" 2>/dev/null; echo "$PWD"/"${0##*/}")"
WORKDIR=$(dirname $abspath)
cd $WORKDIR
../incr-helper/taginput <$2 | ../tnt/tnt -z 100 $WORKDIR/../tnt-standard/models/$1 - 2>tmp-out-$1 | ../incr-helper/tagoutput | grep -v "^%% " | sed 's/\(\t\t*[^ \t]*\)\t\t*[0-9.e+-]*/\1/g' | sed "s/\t/ /g"| sed "s/ */ /g" >$3
TAGTIME=$(grep "Tagging: " tmp-out-$1)
SETUPTIME=$(grep "Setup: " tmp-out-$1)
echo SETUPTIME=$(echo $SETUPTIME | sed "s/.* \([0-9:\.]*\).*/\1/")
echo TAGTIME=$(echo $TAGTIME | sed "s/.*real \([0-9:\.]*\).*/\1/")
rm tmp-out-$1
#! /bin/sh
# usage: train [trainfile] [modelname]
# where modelname is the name of this experiment
echo "ERROR tnt-greedy uses tnt-standard's models. No need for training!"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment