Commit 66213c85 authored by Anna Vernerová's avatar Anna Vernerová Committed by Martin Rypar
Browse files

Pos (attribute capturing the part of speech)

parent 0b73b935
Pipeline #188814811 passed with stages
in 11 minutes and 28 seconds
......@@ -235,7 +235,7 @@ User Interface (ui)
- Set the window icon / favicon. [Jonathan L. Verner]
- Fix search not working on windows. [Jonathan L. Verner]
The query user enters is anded with a query constructed from the
The query user enters is added to a query constructed from the
pathnames of the lexicons to search through. On windows, these pathnames
typically contain '\' characters, which have special meaning in a
regular expression. We must therefore escape them (and others).
......@@ -309,7 +309,7 @@ Library (lib)
- Parse derivedN, Numbered attributes + attributes with refs. [Anša Vernerová]
- Rename ParamFunctor to ValencySlot and Scope to FunctorCombination.
[Anša Vernerová]
- Rename noun, verb, computed reflexive attrs to IsNoun, isVerb, isReflexverb.
- Rename noun, verb, computed reflexive attrs to isNoun, isVerb, isReflexverb.
[Anša Vernerová]
- Don't crash if frame has bad format. [Jonathan L. Verner]
- When saving, do not put additional newlines after lexemes. [Jonathan
......
......@@ -62,7 +62,7 @@ noun_forms_typical = defaultdict(lambda: {'prod': [], 'noprod': []}) # type: ig
noun_forms_special = defaultdict(lambda: {'prod': [], 'noprod': []}) # type: ignore
for lu in lexicon_coll.lexical_units:
if not lu.dynamic_attrs['isNoun']._data or not 'valdiff' in lu.attribs:
if not (lu.dynamic_attrs['pos']._data in ('stem noun', 'root noun')) or not 'valdiff' in lu.attribs:
continue
derived = lu.attribs.get('derivedV', {})._data.get('ids', []) # type: ignore
if not derived:
......@@ -76,8 +76,8 @@ for lu in lexicon_coll.lexical_units:
spec = lu.attribs['valdiff']
type_ = 'prod' if lu.dynamic_attrs['productive']._data else 'noprod'
class_ = lu.attribs['class']._data.strip().strip('?').split(' ')[0].split('/')[0].strip() if 'class' in lu.attribs else 'unspecified'
type_ = 'prod' if lu.dynamic_attrs['pos']._data == 'stem noun' else 'noprod'
class_ = lu.attribs['class']._data[0].strip().strip('?').split(' ')[0].split('/')[0].strip() if 'class' in lu.attribs else 'unspecified'
verb_forms[class_][type_].extend(sum([verb.match_key_values(['frame', funct, 'forms']) for funct in ACTANT_FUNCTORS], []))
......
import logging
from vallex.log import log
import sys
import os # for better debugging
import re
from vallex import Attrib
def compute_isNoun(lu):
attr = Attrib('isNoun', dynamic=True, help='Is it a noun (True/False)?', data='blu-n' in lu._id)
lu.dynamic_attrs[attr.name] = attr
def compute_isVerb(lu):
attr = Attrib('isVerb', dynamic=True, help='Is it a verb (True/False)?', data='blu-v' in lu._id)
PDTVALLEX_POS_MAPPING = {
'V': 'verb',
'N': 'stem noun',
'Nx': 'root noun',
'A': 'adjective',
'D': 'adverb'
}
def compute_pos(lu):
attr = Attrib('pos', dynamic=True, help='Detailed part of speech (verb / stem noun / root noun)')
attr._data = 'unknown'
if 'blu-v' in lu._id:
attr._data = 'verb'
elif 'blu-n' in lu._id:
attr._data = 'stem noun'
for var in ['', '1', '2', '3', '4']:
if 'no-aspect'+var in lu.lemma._data.keys():
attr._data = 'root noun'
break
elif 'v-w' in lu._id:
attr._data = PDTVALLEX_POS_MAPPING[lu.lemma._data.keys()[0]]
lu.dynamic_attrs[attr.name] = attr
REFLVERB_RGX = re.compile(r'.*T[12]?\s+S[IE].*')
"""A regexp for recognizing reflexive verbs from the id of their parent lexeme."""
def compute_isReflexverb(lu):
attr = Attrib('isReflexverb', dynamic=True, help='Is it a reflexive verb (True/False)?')
attr._data = bool(lu._parent and REFLVERB_RGX.match(lu._parent._id))
lu.dynamic_attrs[attr.name] = attr
REFLEXIVE_RGX = re.compile(r'.*\s+\bs[ei]\d?\b\s*$')
"""A regexp for recognizing reflexive lemmas."""
OPT_REFLEXIVE_RGX = re.compile(r'.*\s+\(s[ei]\d?\)\s*$')
"""A regexp for recognizing optionally reflexive lemmas."""
def compute_productive(lu):
if 'blu-n' in lu._id:
attr = Attrib('productive', dynamic=True, help='Is it a productive noun (True/False)?')
attr._data = True
for var in ['', '1', '2', '3', '4']:
if 'no-aspect'+var in lu.lemma._data.keys():
attr._data = False
break
def compute_isReflexive(lu):
attr = Attrib('isReflexive', dynamic=True, help='Is the lemma reflexive (always/optionally/never)?')
if sum(1 for val in lu.lemma._data.values() if REFLEXIVE_RGX.match(val)) > 0:
attr._data = 'always'
elif sum(1 for val in lu.lemma._data.values() if OPT_REFLEXIVE_RGX.match(val)) > 0:
attr._data = 'optionally'
else:
attr._data = 'never'
lu.dynamic_attrs[attr.name] = attr
......@@ -30,9 +30,9 @@
from vallex.scripts.mapreduce import emit
def map_functor_count(lu):
if lu.isNoun == ['True']:
if lu.dynamic_attrs['pos']._data in ('stem noun','root noun'):
emit(('noun',), len(lu.frame.functor))
elif lu.isVerb == ['True']:
elif lu.dynamic_attrs['pos']._data == 'verb':
emit(('verb',), len(lu.frame.functor))
def reduce_functor_count(key, resuts):
......
......@@ -20,11 +20,11 @@ from vallex.scripts.mapreduce import emit
@requires('collection')
def map_table1_noun_verb_forms_summary(lu, collection):
if lu.isNoun == ['False'] or not lu.valdiff:
if not (lu.dynamic_attrs['pos']._data in ('stem noun', 'root noun') and lu.valdiff):
raise TestDoesNotApply
noun = lu
type_ = 'prod' if noun.productive == ['True'] else 'noprod'
type_ = 'prod' if noun.pos == ['stem noun'] else 'noprod'
class_ = noun.class_[0] if noun.class_ else 'unspecified'
for verb in [collection.id2lu(id) for id in noun.derivedV.ids]:
......@@ -54,11 +54,11 @@ def map_table1_noun_verb_forms_summary(lu, collection):
@requires('collection')
def map_table1b_differing_actant_summary(lu, collection):
if lu.isNoun == ['False'] or not lu.valdiff:
if not (lu.dynamic_attrs['pos']._data in ('stem noun', 'root noun') and lu.valdiff):
raise TestDoesNotApply
noun = lu
type_ = 'prod' if noun.productive == ['True'] else 'noprod'
type_ = 'prod' if noun.pos == ['stem noun'] else 'noprod'
class_ = noun.class_[0] if noun.class_ else 'unspecified'
for verb in [collection.id2lu(id) for id in noun.derivedV.ids]:
......@@ -80,10 +80,10 @@ def map_table1b_differing_actant_summary(lu, collection):
@requires('collection')
def map_table2_spec_nom_forms(lu, collection):
if lu.isNoun == ['False'] or not lu.valdiff:
if not (lu.dynamic_attrs['pos']._data in ('stem noun', 'root noun') and lu.valdiff):
raise TestDoesNotApply
type_ = 'prod' if lu.productive == ['True'] else 'noprod'
type_ = 'prod' if lu.pos == ['stem noun'] else 'noprod'
class_ = lu.class_[0] if lu.class_ else 'unspecified'
for funct in lu.valdiff.actant.eq:
......@@ -100,10 +100,10 @@ def reduce_table2_spec_nom_forms(key, results):
@requires('collection')
def map_table3a_actant_spec_forms(lu, collection):
if lu.isNoun == ['False'] or not lu.valdiff:
if not (lu.dynamic_attrs['pos']._data in ('stem noun', 'root noun') and lu.valdiff):
raise TestDoesNotApply
type_ = 'prod' if lu.productive == ['True'] else 'noprod'
type_ = 'prod' if lu.pos == ['stem noun'] else 'noprod'
class_ = lu.class_[0] if lu.class_ else 'unspecified'
# Actant functors which have at least one specific form
......@@ -114,10 +114,10 @@ def map_table3a_actant_spec_forms(lu, collection):
@requires('collection')
def map_table3b_actant_spec_forms(lu, collection):
if lu.isNoun == ['False'] or not lu.valdiff:
if not (lu.dynamic_attrs['pos']._data in ('stem noun', 'root noun') and lu.valdiff):
raise TestDoesNotApply
type_ = 'prod' if lu.productive == ['True'] else 'noprod'
type_ = 'prod' if lu.pos == ['stem noun'] else 'noprod'
class_ = lu.class_[0] if lu.class_ else 'unspecified'
# Actant functors which specific forms in parentheses (joined together by ',')
......
......@@ -20,7 +20,7 @@ from vallex.scripts import changes, requires, TestDoesNotApply, TestFailed
@changes('valdiff')
@requires('lumap')
def transform_lu_add_valdiff(lu, lumap):
if 'isNoun' not in lu.dynamic_attrs or not lu.dynamic_attrs['isNoun']._data:
if 'pos' not in lu.dynamic_attrs or not lu.dynamic_attrs['pos']._data in ('stem noun', 'root noun'):
raise TestDoesNotApply
if 'derivedV' not in lu.attribs or lu.attribs['derivedV']._data['ids'] == []:
raise TestDoesNotApply
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment