Commit 2eb648cb authored by Arne Koehn's avatar Arne Koehn

some more svmt-based taggers

parent a46a546a
#! /bin/sh
# usage: test [modelname] [inputfile] [outputfile]
abspath="$(cd "${0%/*}" 2>/dev/null; echo "$PWD"/"${0##*/}")"
WORKDIR=$(dirname $abspath)
cd $WORKDIR
. ../svmt-config
$SVMTOOLDIR/SVMTagger $WORKDIR/models/$1 <$2 2>tmp-out-$1 >$3
SETUPTIME=$(grep "START-UP: " tmp-out-$1)
TAGTIME=$(grep "TAGGING: " tmp-out-$1)
echo SETUPTIME=$(echo $SETUPTIME | sed "s/.* START-UP: \([0-9\.]*\).*/\1/")
echo TAGTIME=$(echo $TAGTIME | sed "s/.* TAGGING: \([0-9\.]*\).*/\1/")
rm tmp-out-$1
#! /bin/sh
# usage: train [trainfile] [modelname]
# where modelname is the name of this experiment
abspath="$(cd "${0%/*}" 2>/dev/null; echo "$PWD"/"${0##*/}")"
WORKDIR=$(dirname $abspath)
cd $WORKDIR
# with this you get $SVMDIR and $SVMTOOLDIR
. ../svmt-config
if [ ! -d tmpfiles ]
then
mkdir tmpfiles
fi
if [ ! -d models ]
then
mkdir models
fi
cat > tmpfiles/config-$2.tmp <<EOF
TRAINSET = $1
SVMDIR = $SVMDIR
NAME = $2
# --------------- window definition (lenght, core_position) ---
W = 4 2
# --------------- feature filtering (count_cut_off, max_mapping_size)
F = 2 100000
# --------------- default C Parameter values ------------------
CK = 0.1086
CU = 0.07975
# --------------- % of unknown words expected (3 by default) -
# X = 3
# --------------- weight filtering for known words ------------
Kfilter = 0
# --------------- weight filtering for unknown words ----------
Ufilter = 0
# --------------- remove intermediate files -------------------
REMOVE_FILES = 1
# --------------- action items --------------------------------
do M0 LRL
A0k = C(0;-2) C(0;-1) C(0;0) C(0;1) C(0;-2,-1) C(0;-1,0) C(0;0,1) C(0;-1,1) C(0;-2,-1,0) C(0;-2,-1,1) C(0;-1,0,1) C(1;-2) C(1;-1) C(1;-2,-1) C(1;-1,1) C(1;-2,-1,1) k(0) k(1) m(0) m(1)
A0u = C(0;-2) C(0;-1) C(0;0) C(0;1) C(0;-2,-1) C(0;-1,0) C(0;0,1) C(0;-1,1) C(0;-2,-1,0) C(0;-2,-1,1) C(0;-1,0,1) C(1;-2) C(1;-1) C(1;-2,-1) C(1;-1,1) C(1;-2,-1,1) k(0) k(1) m(0) m(1) a(2) a(3) a(4) z(2) z(3) z(4) ca(1) cz(1) L SA AA SN CA CAA CP CC CN MW
# -----------------------------------------------------------------------------------
EOF
cd models
$SVMTOOLDIR/SVMTlearn -V 1 $WORKDIR/tmpfiles/config-$2.tmp
#! /bin/sh
# usage: test [modelname] [inputfile] [outputfile]
abspath="$(cd "${0%/*}" 2>/dev/null; echo "$PWD"/"${0##*/}")"
WORKDIR=$(dirname $abspath)
cd $WORKDIR
. ../svmt-config
$SVMTOOLDIR/SVMTagger $WORKDIR/models/$1 <$2 2>tmp-out-$1 >$3
SETUPTIME=$(grep "START-UP: " tmp-out-$1)
TAGTIME=$(grep "TAGGING: " tmp-out-$1)
echo SETUPTIME=$(echo $SETUPTIME | sed "s/.* START-UP: \([0-9\.]*\).*/\1/")
echo TAGTIME=$(echo $TAGTIME | sed "s/.* TAGGING: \([0-9\.]*\).*/\1/")
rm tmp-out-$1
#! /bin/sh
# usage: train [trainfile] [modelname]
# where modelname is the name of this experiment
abspath="$(cd "${0%/*}" 2>/dev/null; echo "$PWD"/"${0##*/}")"
WORKDIR=$(dirname $abspath)
cd $WORKDIR
# with this you get $SVMDIR and $SVMTOOLDIR
. ../svmt-config
if [ ! -d tmpfiles ]
then
mkdir tmpfiles
fi
if [ ! -d models ]
then
mkdir models
fi
cat > tmpfiles/config-$2.tmp <<EOF
TRAINSET = $1
SVMDIR = $SVMDIR
NAME = $2
# --------------- window definition (lenght, core_position) ---
W = 3 2
# --------------- feature filtering (count_cut_off, max_mapping_size)
F = 2 100000
# --------------- default C Parameter values ------------------
CK = 0.1086
CU = 0.07975
# --------------- % of unknown words expected (3 by default) -
# X = 3
# --------------- weight filtering for known words ------------
Kfilter = 0
# --------------- weight filtering for unknown words ----------
Ufilter = 0
# --------------- remove intermediate files -------------------
REMOVE_FILES = 1
# --------------- action items --------------------------------
do M0 LR
#ambiguous-right [default]
A0k = C(0;-2) C(0;-1) C(0;0) C(0;-2,-1) C(0;-1,0) C(0;-2,-1,0) C(1;-2) C(1;-1) C(1;-2,-1) k(0) m(0)
A0u = C(0;-2) C(0;-1) C(0;0) C(0;-2,-1) C(0;-1,0) C(0;-2,-1,0) C(1;-2) C(1;-1) C(1;-2,-1) k(0) m(0) a(2) a(3) a(4) z(2) z(3) z(4) ca(1) cz(1) L SA AA SN CA CAA CP CC CN MW
# -----------------------------------------------------------------------------------
EOF
cd models
$SVMTOOLDIR/SVMTlearn -V 1 $WORKDIR/tmpfiles/config-$2.tmp
#! /bin/sh
# usage: test [modelname] [inputfile] [outputfile]
abspath="$(cd "${0%/*}" 2>/dev/null; echo "$PWD"/"${0##*/}")"
WORKDIR=$(dirname $abspath)
cd $WORKDIR
. ../svmt-config
$SVMTOOLDIR/SVMTagger -T 5 -S GLRL $WORKDIR/../svmt-c_values/models/$1 <$2 2>tmp-out-$1 >$3
SETUPTIME=$(grep "START-UP: " tmp-out-$1)
TAGTIME=$(grep "TAGGING: " tmp-out-$1)
echo SETUPTIME=$(echo $SETUPTIME | sed "s/.* START-UP: \([0-9\.]*\).*/\1/")
echo TAGTIME=$(echo $TAGTIME | sed "s/.* TAGGING: \([0-9\.]*\).*/\1/")
rm tmp-out-$1
#! /bin/sh
# usage: train [trainfile] [modelname]
# where modelname is the name of this experiment
echo "ERROR svmt-c_values-viterbi uses svmt-c_values' models. No need for training!"
#! /bin/sh
# usage: test [modelname] [inputfile] [outputfile]
abspath="$(cd "${0%/*}" 2>/dev/null; echo "$PWD"/"${0##*/}")"
WORKDIR=$(dirname $abspath)
cd $WORKDIR
. ../svmt-config
$SVMTOOLDIR/SVMTagger $WORKDIR/models/$1 <$2 2>tmp-out-$1 >$3
SETUPTIME=$(grep "START-UP: " tmp-out-$1)
TAGTIME=$(grep "TAGGING: " tmp-out-$1)
echo SETUPTIME=$(echo $SETUPTIME | sed "s/.* START-UP: \([0-9\.]*\).*/\1/")
echo TAGTIME=$(echo $TAGTIME | sed "s/.* TAGGING: \([0-9\.]*\).*/\1/")
rm tmp-out-$1
#! /bin/sh
# usage: train [trainfile] [modelname]
# where modelname is the name of this experiment
abspath="$(cd "${0%/*}" 2>/dev/null; echo "$PWD"/"${0##*/}")"
WORKDIR=$(dirname $abspath)
cd $WORKDIR
# with this you get $SVMDIR and $SVMTOOLDIR
. ../svmt-config
if [ ! -d tmpfiles ]
then
mkdir tmpfiles
fi
if [ ! -d models ]
then
mkdir models
fi
cat > tmpfiles/config-$2.tmp <<EOF
TRAINSET = $1
SVMDIR = $SVMDIR
NAME = $2
# --------------- window definition (lenght, core_position) ---
W = 5 2
# --------------- feature filtering (count_cut_off, max_mapping_size)
F = 2 100000
# --------------- default C Parameter values ------------------
CK = 0.1086
CU = 0.07975
# --------------- % of unknown words expected (3 by default) -
# X = 3
# --------------- weight filtering for known words ------------
Kfilter = 0
# --------------- weight filtering for unknown words ----------
Ufilter = 0
# --------------- remove intermediate files -------------------
REMOVE_FILES = 1
# --------------- action items --------------------------------
do M0 LRL
EOF
cd models
$SVMTOOLDIR/SVMTlearn -V 1 $WORKDIR/tmpfiles/config-$2.tmp
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment