Commit 5fa41620 authored by Biagio's avatar Biagio

Merge branch 'body_parser'

parents fd4d5364 9fdd9377
......@@ -130,16 +130,20 @@ class ParagraphQualifier(SpacyModelUser):
return first_tokens
TOKEN_SLOT_SIZE = 2
def qualify(self, p):
"""
This function qualify a given string as preambular or operational
"""
if p == '':
return [None, None, None, (0, 0)]
p = p.strip()
qualification = None
term = None
TOKEN_SLOT_SIZE = 2
list_of_prep = [
'at',
'by',
......@@ -232,8 +236,9 @@ class ParagraphQualifier(SpacyModelUser):
case = "__ING"
# [[ Considering paragraphs of at least 4 tokens ]]
if len(first_tokens) >= 4 * 2:
if len(first_tokens) >= 4 * TOKEN_SLOT_SIZE:
# e.g. "Continuing to underline"
if first_tokens.get('label_2', '') == "TO" and first_tokens.get('label_3', '').lower().startswith('v'):
term = term + first_tokens.get('token_2', '').capitalize() + first_tokens.get('token_3',
......@@ -461,11 +466,11 @@ class ParagraphQualifier(SpacyModelUser):
"PRP"): # token_1.lower() == "we": # per prendere "further + V.*" che inizia con un RB !
qualification = "operational"
case = "__PRP"
if len(first_tokens) >= 2*TOKEN_SLOT_SIZE and len(first_tokens) >= 3*TOKEN_SLOT_SIZE:
if first_tokens.get('token_2', '').lower() == "also" and first_tokens.get('label_3', '').lower().startswith("v"):
if first_tokens.get('label_2', '') is not None and first_tokens.get('label_3', '') is not None:
if first_tokens.get('token_2', '').lower() == "also" and first_tokens.get('label_3',
'').lower().startswith("v"):
term = first_tokens.get('token_2', '').capitalize() + first_tokens.get('token_3', '').capitalize()
terms_join = " ".join(
[first_tokens.get('token_2', '').capitalize(), first_tokens.get('token_3', '').capitalize()])
......@@ -800,8 +805,8 @@ class ParagraphQualifier(SpacyModelUser):
if qualification is not None:
# Cecking in positions 2-3
if len(first_tokens) >= 3 * 2:
if len(first_tokens) >= 3 * TOKEN_SLOT_SIZE:
# with concern
if first_tokens.get('token_2', '').lower() == 'with' and first_tokens.get('token_3',
'').lower() in list_of_nouns:
......@@ -825,8 +830,8 @@ class ParagraphQualifier(SpacyModelUser):
case = case + "_JJVBNVBDnns"
# Cecking in positions 2-3-4
if len(first_tokens) >= 4 * 2:
if len(first_tokens) >= 4 * TOKEN_SLOT_SIZE:
# with deep concern
if first_tokens.get('token_2', '').lower() == 'with' and (
first_tokens.get('label_3', '').lower() in ['jj', 'vbn', 'vbd'] or first_tokens.get("token_3", "").lower() in list_of_adjectives) and first_tokens.get('token_4',
......@@ -864,8 +869,8 @@ class ParagraphQualifier(SpacyModelUser):
case = case + "_JJVBNVBDnns"
# Cecking in positions 3-4-5
if len(first_tokens) >= 5 * 2:
if len(first_tokens) >= 5 * TOKEN_SLOT_SIZE:
# with deep concern
if first_tokens.get('token_3', '').lower() == 'with' and (
first_tokens.get('label_4', '').lower() in ['jj', 'vbn', 'vbd'] or first_tokens.get("token_4", "").lower() in list_of_adjectives) and first_tokens.get('token_5',
......@@ -903,8 +908,8 @@ class ParagraphQualifier(SpacyModelUser):
case = case + "_JJVBNVBDnns"
# Cecking in positions 4-5-6 [rare]
if len(first_tokens) >= 6 * 2:
if len(first_tokens) >= 6 * TOKEN_SLOT_SIZE:
# with deep concern
if first_tokens.get('token_4', '').lower() == 'with' and (
first_tokens.get('label_5', '').lower() in ['jj', 'vbn', 'vbd'] or first_tokens.get("token_5", "").lower() in list_of_adjectives) and first_tokens.get('token_6',
......@@ -942,7 +947,7 @@ class ParagraphQualifier(SpacyModelUser):
case = case + "_JJVBNVBDnns"
# Cecking in positions 5-6-7 [rare]
if len(first_tokens) >= 7 * 2:
if len(first_tokens) >= 7 * TOKEN_SLOT_SIZE:
# with deep concern
if first_tokens.get('token_5', '').lower() == 'with' and (
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment