Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
9
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Switch to GitLab Next
Sign in / Register
Toggle navigation
Open sidebar
Jonathan L. Verner
pyvallex
Commits
66213c85
Commit
66213c85
authored
Sep 11, 2020
by
Anna Vernerová
Committed by
Martin Rypar
Sep 11, 2020
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Pos (attribute capturing the part of speech)
parent
0b73b935
Pipeline
#188814811
passed with stages
in 11 minutes and 28 seconds
Changes
6
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
57 additions
and
42 deletions
+57
-42
CHANGELOG.rst
CHANGELOG.rst
+2
-2
doc/examples/compute-slovko.py
doc/examples/compute-slovko.py
+3
-3
vallex/scripts/dynamic_properties/misc_info.py
vallex/scripts/dynamic_properties/misc_info.py
+39
-24
vallex/scripts/mapreduce.py
vallex/scripts/mapreduce.py
+2
-2
vallex/scripts/mapreducers/slovko.py
vallex/scripts/mapreducers/slovko.py
+10
-10
vallex/scripts/transforms/add_valdiff.py
vallex/scripts/transforms/add_valdiff.py
+1
-1
No files found.
CHANGELOG.rst
View file @
66213c85
...
...
@@ -235,7 +235,7 @@ User Interface (ui)
- Set the window icon / favicon. [Jonathan L. Verner]
- Fix search not working on windows. [Jonathan L. Verner]
The query user enters is a
n
ded
with
a query constructed from the
The query user enters is a
d
ded
to
a query constructed from the
pathnames of the lexicons to search through. On windows, these pathnames
typically contain '\' characters, which have special meaning in a
regular expression. We must therefore escape them (and others).
...
...
@@ -309,7 +309,7 @@ Library (lib)
- Parse derivedN, Numbered attributes + attributes with refs. [Anša Vernerová]
- Rename ParamFunctor to ValencySlot and Scope to FunctorCombination.
[Anša Vernerová]
- Rename noun, verb, computed reflexive attrs to
I
sNoun, isVerb, isReflexverb.
- Rename noun, verb, computed reflexive attrs to
i
sNoun, isVerb, isReflexverb.
[Anša Vernerová]
- Don't crash if frame has bad format. [Jonathan L. Verner]
- When saving, do not put additional newlines after lexemes. [Jonathan
...
...
doc/examples/compute-slovko.py
View file @
66213c85
...
...
@@ -62,7 +62,7 @@ noun_forms_typical = defaultdict(lambda: {'prod': [], 'noprod': []}) # type: ig
noun_forms_special
=
defaultdict
(
lambda
:
{
'prod'
:
[],
'noprod'
:
[]})
# type: ignore
for
lu
in
lexicon_coll
.
lexical_units
:
if
not
lu
.
dynamic_attrs
[
'
isNoun
'
].
_data
or
not
'valdiff'
in
lu
.
attribs
:
if
not
(
lu
.
dynamic_attrs
[
'
pos
'
].
_data
in
(
'stem noun'
,
'root noun'
))
or
not
'valdiff'
in
lu
.
attribs
:
continue
derived
=
lu
.
attribs
.
get
(
'derivedV'
,
{}).
_data
.
get
(
'ids'
,
[])
# type: ignore
if
not
derived
:
...
...
@@ -76,8 +76,8 @@ for lu in lexicon_coll.lexical_units:
spec
=
lu
.
attribs
[
'valdiff'
]
type_
=
'prod'
if
lu
.
dynamic_attrs
[
'p
roductive'
].
_data
else
'noprod'
class_
=
lu
.
attribs
[
'class'
].
_data
.
strip
().
strip
(
'?'
).
split
(
' '
)[
0
].
split
(
'/'
)[
0
].
strip
()
if
'class'
in
lu
.
attribs
else
'unspecified'
type_
=
'prod'
if
lu
.
dynamic_attrs
[
'p
os'
].
_data
==
'stem noun'
else
'noprod'
class_
=
lu
.
attribs
[
'class'
].
_data
[
0
]
.
strip
().
strip
(
'?'
).
split
(
' '
)[
0
].
split
(
'/'
)[
0
].
strip
()
if
'class'
in
lu
.
attribs
else
'unspecified'
verb_forms
[
class_
][
type_
].
extend
(
sum
([
verb
.
match_key_values
([
'frame'
,
funct
,
'forms'
])
for
funct
in
ACTANT_FUNCTORS
],
[]))
...
...
vallex/scripts/dynamic_properties/misc_info.py
View file @
66213c85
import
logging
from
vallex.log
import
log
import
sys
import
os
# for better debugging
import
re
from
vallex
import
Attrib
def
compute_isNoun
(
lu
):
attr
=
Attrib
(
'isNoun'
,
dynamic
=
True
,
help
=
'Is it a noun (True/False)?'
,
data
=
'blu-n'
in
lu
.
_id
)
lu
.
dynamic_attrs
[
attr
.
name
]
=
attr
def
compute_isVerb
(
lu
):
attr
=
Attrib
(
'isVerb'
,
dynamic
=
True
,
help
=
'Is it a verb (True/False)?'
,
data
=
'blu-v'
in
lu
.
_id
)
PDTVALLEX_POS_MAPPING
=
{
'V'
:
'verb'
,
'N'
:
'stem noun'
,
'Nx'
:
'root noun'
,
'A'
:
'adjective'
,
'D'
:
'adverb'
}
def
compute_pos
(
lu
):
attr
=
Attrib
(
'pos'
,
dynamic
=
True
,
help
=
'Detailed part of speech (verb / stem noun / root noun)'
)
attr
.
_data
=
'unknown'
if
'blu-v'
in
lu
.
_id
:
attr
.
_data
=
'verb'
elif
'blu-n'
in
lu
.
_id
:
attr
.
_data
=
'stem noun'
for
var
in
[
''
,
'1'
,
'2'
,
'3'
,
'4'
]:
if
'no-aspect'
+
var
in
lu
.
lemma
.
_data
.
keys
():
attr
.
_data
=
'root noun'
break
elif
'v-w'
in
lu
.
_id
:
attr
.
_data
=
PDTVALLEX_POS_MAPPING
[
lu
.
lemma
.
_data
.
keys
()[
0
]]
lu
.
dynamic_attrs
[
attr
.
name
]
=
attr
REFLVERB_RGX
=
re
.
compile
(
r
'.*T[12]?\s+S[IE].*'
)
"""A regexp for recognizing reflexive verbs from the id of their parent lexeme."""
REFLEXIVE_RGX
=
re
.
compile
(
r
'.*\s+\bs[ei]\d?\b\s*$'
)
"""A regexp for recognizing reflexive lemmas."""
OPT_REFLEXIVE_RGX
=
re
.
compile
(
r
'.*\s+\(s[ei]\d?\)\s*$'
)
"""A regexp for recognizing optionally reflexive lemmas."""
def
compute_isReflexverb
(
lu
):
attr
=
Attrib
(
'isReflexverb'
,
dynamic
=
True
,
help
=
'Is it a reflexive verb (True/False)?'
)
attr
.
_data
=
bool
(
lu
.
_parent
and
REFLVERB_RGX
.
match
(
lu
.
_parent
.
_id
))
def
compute_isReflexive
(
lu
):
attr
=
Attrib
(
'isReflexive'
,
dynamic
=
True
,
help
=
'Is the lemma reflexive (always/optionally/never)?'
)
if
sum
(
1
for
val
in
lu
.
lemma
.
_data
.
values
()
if
REFLEXIVE_RGX
.
match
(
val
))
>
0
:
attr
.
_data
=
'always'
elif
sum
(
1
for
val
in
lu
.
lemma
.
_data
.
values
()
if
OPT_REFLEXIVE_RGX
.
match
(
val
))
>
0
:
attr
.
_data
=
'optionally'
else
:
attr
.
_data
=
'never'
lu
.
dynamic_attrs
[
attr
.
name
]
=
attr
def
compute_productive
(
lu
):
if
'blu-n'
in
lu
.
_id
:
attr
=
Attrib
(
'productive'
,
dynamic
=
True
,
help
=
'Is it a productive noun (True/False)?'
)
attr
.
_data
=
True
for
var
in
[
''
,
'1'
,
'2'
,
'3'
,
'4'
]:
if
'no-aspect'
+
var
in
lu
.
lemma
.
_data
.
keys
():
attr
.
_data
=
False
break
lu
.
dynamic_attrs
[
attr
.
name
]
=
attr
vallex/scripts/mapreduce.py
View file @
66213c85
...
...
@@ -30,9 +30,9 @@
from vallex.scripts.mapreduce import emit
def map_functor_count(lu):
if lu.
isNoun == ['True']
:
if lu.
dynamic_attrs['pos']._data in ('stem noun','root noun')
:
emit(('noun',), len(lu.frame.functor))
elif lu.
isVerb == ['True']
:
elif lu.
dynamic_attrs['pos']._data == 'verb'
:
emit(('verb',), len(lu.frame.functor))
def reduce_functor_count(key, resuts):
...
...
vallex/scripts/mapreducers/slovko.py
View file @
66213c85
...
...
@@ -20,11 +20,11 @@ from vallex.scripts.mapreduce import emit
@
requires
(
'collection'
)
def
map_table1_noun_verb_forms_summary
(
lu
,
collection
):
if
lu
.
isNoun
==
[
'False'
]
or
not
lu
.
valdiff
:
if
not
(
lu
.
dynamic_attrs
[
'pos'
].
_data
in
(
'stem noun'
,
'root noun'
)
and
lu
.
valdiff
)
:
raise
TestDoesNotApply
noun
=
lu
type_
=
'prod'
if
noun
.
p
roductive
==
[
'True
'
]
else
'noprod'
type_
=
'prod'
if
noun
.
p
os
==
[
'stem noun
'
]
else
'noprod'
class_
=
noun
.
class_
[
0
]
if
noun
.
class_
else
'unspecified'
for
verb
in
[
collection
.
id2lu
(
id
)
for
id
in
noun
.
derivedV
.
ids
]:
...
...
@@ -54,11 +54,11 @@ def map_table1_noun_verb_forms_summary(lu, collection):
@
requires
(
'collection'
)
def
map_table1b_differing_actant_summary
(
lu
,
collection
):
if
lu
.
isNoun
==
[
'False'
]
or
not
lu
.
valdiff
:
if
not
(
lu
.
dynamic_attrs
[
'pos'
].
_data
in
(
'stem noun'
,
'root noun'
)
and
lu
.
valdiff
)
:
raise
TestDoesNotApply
noun
=
lu
type_
=
'prod'
if
noun
.
p
roductive
==
[
'True
'
]
else
'noprod'
type_
=
'prod'
if
noun
.
p
os
==
[
'stem noun
'
]
else
'noprod'
class_
=
noun
.
class_
[
0
]
if
noun
.
class_
else
'unspecified'
for
verb
in
[
collection
.
id2lu
(
id
)
for
id
in
noun
.
derivedV
.
ids
]:
...
...
@@ -80,10 +80,10 @@ def map_table1b_differing_actant_summary(lu, collection):
@
requires
(
'collection'
)
def
map_table2_spec_nom_forms
(
lu
,
collection
):
if
lu
.
isNoun
==
[
'False'
]
or
not
lu
.
valdiff
:
if
not
(
lu
.
dynamic_attrs
[
'pos'
].
_data
in
(
'stem noun'
,
'root noun'
)
and
lu
.
valdiff
)
:
raise
TestDoesNotApply
type_
=
'prod'
if
lu
.
p
roductive
==
[
'True
'
]
else
'noprod'
type_
=
'prod'
if
lu
.
p
os
==
[
'stem noun
'
]
else
'noprod'
class_
=
lu
.
class_
[
0
]
if
lu
.
class_
else
'unspecified'
for
funct
in
lu
.
valdiff
.
actant
.
eq
:
...
...
@@ -100,10 +100,10 @@ def reduce_table2_spec_nom_forms(key, results):
@
requires
(
'collection'
)
def
map_table3a_actant_spec_forms
(
lu
,
collection
):
if
lu
.
isNoun
==
[
'False'
]
or
not
lu
.
valdiff
:
if
not
(
lu
.
dynamic_attrs
[
'pos'
].
_data
in
(
'stem noun'
,
'root noun'
)
and
lu
.
valdiff
)
:
raise
TestDoesNotApply
type_
=
'prod'
if
lu
.
p
roductive
==
[
'True
'
]
else
'noprod'
type_
=
'prod'
if
lu
.
p
os
==
[
'stem noun
'
]
else
'noprod'
class_
=
lu
.
class_
[
0
]
if
lu
.
class_
else
'unspecified'
# Actant functors which have at least one specific form
...
...
@@ -114,10 +114,10 @@ def map_table3a_actant_spec_forms(lu, collection):
@
requires
(
'collection'
)
def
map_table3b_actant_spec_forms
(
lu
,
collection
):
if
lu
.
isNoun
==
[
'False'
]
or
not
lu
.
valdiff
:
if
not
(
lu
.
dynamic_attrs
[
'pos'
].
_data
in
(
'stem noun'
,
'root noun'
)
and
lu
.
valdiff
)
:
raise
TestDoesNotApply
type_
=
'prod'
if
lu
.
p
roductive
==
[
'True
'
]
else
'noprod'
type_
=
'prod'
if
lu
.
p
os
==
[
'stem noun
'
]
else
'noprod'
class_
=
lu
.
class_
[
0
]
if
lu
.
class_
else
'unspecified'
# Actant functors which specific forms in parentheses (joined together by ',')
...
...
vallex/scripts/transforms/add_valdiff.py
View file @
66213c85
...
...
@@ -20,7 +20,7 @@ from vallex.scripts import changes, requires, TestDoesNotApply, TestFailed
@
changes
(
'valdiff'
)
@
requires
(
'lumap'
)
def
transform_lu_add_valdiff
(
lu
,
lumap
):
if
'
isNoun
'
not
in
lu
.
dynamic_attrs
or
not
lu
.
dynamic_attrs
[
'
isNoun
'
].
_data
:
if
'
pos
'
not
in
lu
.
dynamic_attrs
or
not
lu
.
dynamic_attrs
[
'
pos
'
].
_data
in
(
'stem noun'
,
'root noun'
)
:
raise
TestDoesNotApply
if
'derivedV'
not
in
lu
.
attribs
or
lu
.
attribs
[
'derivedV'
].
_data
[
'ids'
]
==
[]:
raise
TestDoesNotApply
...
...
Anna Vernerová
@Ansa211
mentioned in commit
921ed47f
·
Sep 15, 2020
mentioned in commit
921ed47f
mentioned in commit 921ed47fc7dff84cacc6189a773bfd9eb3e2fc05
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment