Commit b8f7ac8c authored by Davide Liga's avatar Davide Liga

add a further control on term identification

parent 30169b75
......@@ -19,35 +19,24 @@ class ParagraphQualifier(SpacyModelUser):
If yes, a new label is returned, along with the subject (He or They)
If not, the original label is preserved and the subject returned will be None
"""
# Checking if it can be a verb using Wordnet
original_token = nlp(token)[0]
lemmatized = original_token.lemma_
token = nlp(lemmatized)[0]
verb_possible = False
token = nlp(token.text)[0]
for i in token._.wordnet.synsets():
if i._pos == 'v':
verb_possible = True
# Adding a subject to extract the correct POS tag
if verb_possible:
if singular:
subject = "He"
else:
subject = "They"
if singular:
subject = "He"
else:
subject = "They"
new_token = subject + " " + token
new_token = subject + " " + original_token.text
new_label = "None"
if len(self.get_postag(new_token)) >= 2:
new_label = self.get_postag(new_token)[1].tag_ # (checking the 2nd token)
if new_label.startswith("V"):
return [subject, new_label]
elif singular == False: # If we already tried plural
return self.can_be_verb(token.text, original_label, True) # Recursively checking the singular
else:
return [None, original_label] # If neither singular nor plural subjects work, return the original label
if new_label.startswith("V"):
return [subject, new_label]
elif singular == False: # If we already tried plural
return self.can_be_verb(token, original_label, True) # Recursively checking the singular
else:
return [None, original_label]
return [None, original_label] # If neither singular nor plural subjects work, return the original label
def get_postag(self, p):
"""
......@@ -1114,6 +1103,11 @@ class ParagraphQualifier(SpacyModelUser):
# =================
qual = self.qualify(p)
qualification, term, term_slice, offsets = qual
if term is not None:
if "".join(p[offsets[0]:offsets[1]].split()).lower() != term.lower():
term = None
res = (qualification, offsets, term)
list_to_return.append(res)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment