Commit 66213c85 authored by Anna Vernerová's avatar Anna Vernerová Committed by Martin Rypar
Browse files

Pos (attribute capturing the part of speech)

parent 0b73b935
Pipeline #188814811 passed with stages
in 11 minutes and 28 seconds
...@@ -235,7 +235,7 @@ User Interface (ui) ...@@ -235,7 +235,7 @@ User Interface (ui)
- Set the window icon / favicon. [Jonathan L. Verner] - Set the window icon / favicon. [Jonathan L. Verner]
- Fix search not working on windows. [Jonathan L. Verner] - Fix search not working on windows. [Jonathan L. Verner]
The query user enters is anded with a query constructed from the The query user enters is added to a query constructed from the
pathnames of the lexicons to search through. On windows, these pathnames pathnames of the lexicons to search through. On windows, these pathnames
typically contain '\' characters, which have special meaning in a typically contain '\' characters, which have special meaning in a
regular expression. We must therefore escape them (and others). regular expression. We must therefore escape them (and others).
...@@ -309,7 +309,7 @@ Library (lib) ...@@ -309,7 +309,7 @@ Library (lib)
- Parse derivedN, Numbered attributes + attributes with refs. [Anša Vernerová] - Parse derivedN, Numbered attributes + attributes with refs. [Anša Vernerová]
- Rename ParamFunctor to ValencySlot and Scope to FunctorCombination. - Rename ParamFunctor to ValencySlot and Scope to FunctorCombination.
[Anša Vernerová] [Anša Vernerová]
- Rename noun, verb, computed reflexive attrs to IsNoun, isVerb, isReflexverb. - Rename noun, verb, computed reflexive attrs to isNoun, isVerb, isReflexverb.
[Anša Vernerová] [Anša Vernerová]
- Don't crash if frame has bad format. [Jonathan L. Verner] - Don't crash if frame has bad format. [Jonathan L. Verner]
- When saving, do not put additional newlines after lexemes. [Jonathan - When saving, do not put additional newlines after lexemes. [Jonathan
......
...@@ -62,7 +62,7 @@ noun_forms_typical = defaultdict(lambda: {'prod': [], 'noprod': []}) # type: ig ...@@ -62,7 +62,7 @@ noun_forms_typical = defaultdict(lambda: {'prod': [], 'noprod': []}) # type: ig
noun_forms_special = defaultdict(lambda: {'prod': [], 'noprod': []}) # type: ignore noun_forms_special = defaultdict(lambda: {'prod': [], 'noprod': []}) # type: ignore
for lu in lexicon_coll.lexical_units: for lu in lexicon_coll.lexical_units:
if not lu.dynamic_attrs['isNoun']._data or not 'valdiff' in lu.attribs: if not (lu.dynamic_attrs['pos']._data in ('stem noun', 'root noun')) or not 'valdiff' in lu.attribs:
continue continue
derived = lu.attribs.get('derivedV', {})._data.get('ids', []) # type: ignore derived = lu.attribs.get('derivedV', {})._data.get('ids', []) # type: ignore
if not derived: if not derived:
...@@ -76,8 +76,8 @@ for lu in lexicon_coll.lexical_units: ...@@ -76,8 +76,8 @@ for lu in lexicon_coll.lexical_units:
spec = lu.attribs['valdiff'] spec = lu.attribs['valdiff']
type_ = 'prod' if lu.dynamic_attrs['productive']._data else 'noprod' type_ = 'prod' if lu.dynamic_attrs['pos']._data == 'stem noun' else 'noprod'
class_ = lu.attribs['class']._data.strip().strip('?').split(' ')[0].split('/')[0].strip() if 'class' in lu.attribs else 'unspecified' class_ = lu.attribs['class']._data[0].strip().strip('?').split(' ')[0].split('/')[0].strip() if 'class' in lu.attribs else 'unspecified'
verb_forms[class_][type_].extend(sum([verb.match_key_values(['frame', funct, 'forms']) for funct in ACTANT_FUNCTORS], [])) verb_forms[class_][type_].extend(sum([verb.match_key_values(['frame', funct, 'forms']) for funct in ACTANT_FUNCTORS], []))
......
import logging
from vallex.log import log
import sys
import os # for better debugging
import re import re
from vallex import Attrib from vallex import Attrib
PDTVALLEX_POS_MAPPING = {
def compute_isNoun(lu): 'V': 'verb',
attr = Attrib('isNoun', dynamic=True, help='Is it a noun (True/False)?', data='blu-n' in lu._id) 'N': 'stem noun',
lu.dynamic_attrs[attr.name] = attr 'Nx': 'root noun',
'A': 'adjective',
'D': 'adverb'
def compute_isVerb(lu): }
attr = Attrib('isVerb', dynamic=True, help='Is it a verb (True/False)?', data='blu-v' in lu._id)
def compute_pos(lu):
attr = Attrib('pos', dynamic=True, help='Detailed part of speech (verb / stem noun / root noun)')
attr._data = 'unknown'
if 'blu-v' in lu._id:
attr._data = 'verb'
elif 'blu-n' in lu._id:
attr._data = 'stem noun'
for var in ['', '1', '2', '3', '4']:
if 'no-aspect'+var in lu.lemma._data.keys():
attr._data = 'root noun'
break
elif 'v-w' in lu._id:
attr._data = PDTVALLEX_POS_MAPPING[lu.lemma._data.keys()[0]]
lu.dynamic_attrs[attr.name] = attr lu.dynamic_attrs[attr.name] = attr
REFLVERB_RGX = re.compile(r'.*T[12]?\s+S[IE].*') REFLEXIVE_RGX = re.compile(r'.*\s+\bs[ei]\d?\b\s*$')
"""A regexp for recognizing reflexive verbs from the id of their parent lexeme.""" """A regexp for recognizing reflexive lemmas."""
OPT_REFLEXIVE_RGX = re.compile(r'.*\s+\(s[ei]\d?\)\s*$')
"""A regexp for recognizing optionally reflexive lemmas."""
def compute_isReflexverb(lu): def compute_isReflexive(lu):
attr = Attrib('isReflexverb', dynamic=True, help='Is it a reflexive verb (True/False)?') attr = Attrib('isReflexive', dynamic=True, help='Is the lemma reflexive (always/optionally/never)?')
attr._data = bool(lu._parent and REFLVERB_RGX.match(lu._parent._id)) if sum(1 for val in lu.lemma._data.values() if REFLEXIVE_RGX.match(val)) > 0:
attr._data = 'always'
elif sum(1 for val in lu.lemma._data.values() if OPT_REFLEXIVE_RGX.match(val)) > 0:
attr._data = 'optionally'
else:
attr._data = 'never'
lu.dynamic_attrs[attr.name] = attr lu.dynamic_attrs[attr.name] = attr
def compute_productive(lu):
if 'blu-n' in lu._id:
attr = Attrib('productive', dynamic=True, help='Is it a productive noun (True/False)?')
attr._data = True
for var in ['', '1', '2', '3', '4']:
if 'no-aspect'+var in lu.lemma._data.keys():
attr._data = False
break
lu.dynamic_attrs[attr.name] = attr
...@@ -30,9 +30,9 @@ ...@@ -30,9 +30,9 @@
from vallex.scripts.mapreduce import emit from vallex.scripts.mapreduce import emit
def map_functor_count(lu): def map_functor_count(lu):
if lu.isNoun == ['True']: if lu.dynamic_attrs['pos']._data in ('stem noun','root noun'):
emit(('noun',), len(lu.frame.functor)) emit(('noun',), len(lu.frame.functor))
elif lu.isVerb == ['True']: elif lu.dynamic_attrs['pos']._data == 'verb':
emit(('verb',), len(lu.frame.functor)) emit(('verb',), len(lu.frame.functor))
def reduce_functor_count(key, resuts): def reduce_functor_count(key, resuts):
......
...@@ -20,11 +20,11 @@ from vallex.scripts.mapreduce import emit ...@@ -20,11 +20,11 @@ from vallex.scripts.mapreduce import emit
@requires('collection') @requires('collection')
def map_table1_noun_verb_forms_summary(lu, collection): def map_table1_noun_verb_forms_summary(lu, collection):
if lu.isNoun == ['False'] or not lu.valdiff: if not (lu.dynamic_attrs['pos']._data in ('stem noun', 'root noun') and lu.valdiff):
raise TestDoesNotApply raise TestDoesNotApply
noun = lu noun = lu
type_ = 'prod' if noun.productive == ['True'] else 'noprod' type_ = 'prod' if noun.pos == ['stem noun'] else 'noprod'
class_ = noun.class_[0] if noun.class_ else 'unspecified' class_ = noun.class_[0] if noun.class_ else 'unspecified'
for verb in [collection.id2lu(id) for id in noun.derivedV.ids]: for verb in [collection.id2lu(id) for id in noun.derivedV.ids]:
...@@ -54,11 +54,11 @@ def map_table1_noun_verb_forms_summary(lu, collection): ...@@ -54,11 +54,11 @@ def map_table1_noun_verb_forms_summary(lu, collection):
@requires('collection') @requires('collection')
def map_table1b_differing_actant_summary(lu, collection): def map_table1b_differing_actant_summary(lu, collection):
if lu.isNoun == ['False'] or not lu.valdiff: if not (lu.dynamic_attrs['pos']._data in ('stem noun', 'root noun') and lu.valdiff):
raise TestDoesNotApply raise TestDoesNotApply
noun = lu noun = lu
type_ = 'prod' if noun.productive == ['True'] else 'noprod' type_ = 'prod' if noun.pos == ['stem noun'] else 'noprod'
class_ = noun.class_[0] if noun.class_ else 'unspecified' class_ = noun.class_[0] if noun.class_ else 'unspecified'
for verb in [collection.id2lu(id) for id in noun.derivedV.ids]: for verb in [collection.id2lu(id) for id in noun.derivedV.ids]:
...@@ -80,10 +80,10 @@ def map_table1b_differing_actant_summary(lu, collection): ...@@ -80,10 +80,10 @@ def map_table1b_differing_actant_summary(lu, collection):
@requires('collection') @requires('collection')
def map_table2_spec_nom_forms(lu, collection): def map_table2_spec_nom_forms(lu, collection):
if lu.isNoun == ['False'] or not lu.valdiff: if not (lu.dynamic_attrs['pos']._data in ('stem noun', 'root noun') and lu.valdiff):
raise TestDoesNotApply raise TestDoesNotApply
type_ = 'prod' if lu.productive == ['True'] else 'noprod' type_ = 'prod' if lu.pos == ['stem noun'] else 'noprod'
class_ = lu.class_[0] if lu.class_ else 'unspecified' class_ = lu.class_[0] if lu.class_ else 'unspecified'
for funct in lu.valdiff.actant.eq: for funct in lu.valdiff.actant.eq:
...@@ -100,10 +100,10 @@ def reduce_table2_spec_nom_forms(key, results): ...@@ -100,10 +100,10 @@ def reduce_table2_spec_nom_forms(key, results):
@requires('collection') @requires('collection')
def map_table3a_actant_spec_forms(lu, collection): def map_table3a_actant_spec_forms(lu, collection):
if lu.isNoun == ['False'] or not lu.valdiff: if not (lu.dynamic_attrs['pos']._data in ('stem noun', 'root noun') and lu.valdiff):
raise TestDoesNotApply raise TestDoesNotApply
type_ = 'prod' if lu.productive == ['True'] else 'noprod' type_ = 'prod' if lu.pos == ['stem noun'] else 'noprod'
class_ = lu.class_[0] if lu.class_ else 'unspecified' class_ = lu.class_[0] if lu.class_ else 'unspecified'
# Actant functors which have at least one specific form # Actant functors which have at least one specific form
...@@ -114,10 +114,10 @@ def map_table3a_actant_spec_forms(lu, collection): ...@@ -114,10 +114,10 @@ def map_table3a_actant_spec_forms(lu, collection):
@requires('collection') @requires('collection')
def map_table3b_actant_spec_forms(lu, collection): def map_table3b_actant_spec_forms(lu, collection):
if lu.isNoun == ['False'] or not lu.valdiff: if not (lu.dynamic_attrs['pos']._data in ('stem noun', 'root noun') and lu.valdiff):
raise TestDoesNotApply raise TestDoesNotApply
type_ = 'prod' if lu.productive == ['True'] else 'noprod' type_ = 'prod' if lu.pos == ['stem noun'] else 'noprod'
class_ = lu.class_[0] if lu.class_ else 'unspecified' class_ = lu.class_[0] if lu.class_ else 'unspecified'
# Actant functors which specific forms in parentheses (joined together by ',') # Actant functors which specific forms in parentheses (joined together by ',')
......
...@@ -20,7 +20,7 @@ from vallex.scripts import changes, requires, TestDoesNotApply, TestFailed ...@@ -20,7 +20,7 @@ from vallex.scripts import changes, requires, TestDoesNotApply, TestFailed
@changes('valdiff') @changes('valdiff')
@requires('lumap') @requires('lumap')
def transform_lu_add_valdiff(lu, lumap): def transform_lu_add_valdiff(lu, lumap):
if 'isNoun' not in lu.dynamic_attrs or not lu.dynamic_attrs['isNoun']._data: if 'pos' not in lu.dynamic_attrs or not lu.dynamic_attrs['pos']._data in ('stem noun', 'root noun'):
raise TestDoesNotApply raise TestDoesNotApply
if 'derivedV' not in lu.attribs or lu.attribs['derivedV']._data['ids'] == []: if 'derivedV' not in lu.attribs or lu.attribs['derivedV']._data['ids'] == []:
raise TestDoesNotApply raise TestDoesNotApply
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment