Commit 31c10da8 authored by Tobias Sterbak's avatar Tobias Sterbak
Browse files

Merge branch '44-bug-multiple-changed-laws-with-the-same-title' into 'main'

Resolve "Multiple changed laws with the same name should be distiguisable in the selection dropdown"

Closes #44

See merge request nototast/lawinprogress!35
parents 44f9a089 1ed90145
......@@ -65,7 +65,9 @@ def generate_diff(request: Request, change_law_pdf: UploadFile = Form(...)):
logger.info(f"Processing {change_law_pdf.filename}...")
results, n_changes, n_success = [], [], []
for law_title, change_law_text in zip(law_titles, proposals_list):
for law_idx, (law_title, change_law_text) in enumerate(
zip(law_titles, proposals_list)
):
logger.info(f"Started processing change for {law_title}...")
# find and load the source law
source_law = retrieve_source_law(law_title)
......@@ -98,13 +100,14 @@ def generate_diff(request: Request, change_law_pdf: UploadFile = Form(...)):
parsed_law_tree.to_text(),
res_law_tree.to_text(),
applied_change_results,
title=law_title,
title=f"{law_idx+1}. {law_title}",
)
results.append(html_side_by_side)
n_changes.append(len(change_requests))
n_success.append(n_succesfull_applied_changes)
# prepare the html output and return it
law_titles = [f"{idx+1}. {title}" for idx, title in enumerate(law_titles)]
result = list(zip(law_titles, n_changes, n_success, results))
return templates.TemplateResponse(
"results_index.html",
......
......@@ -22,6 +22,7 @@ def sentencize(string: str) -> List[str]:
def align_seqs(
seq_a: List[str], seq_b: List[str], fill: str = ""
) -> Tuple[List[str], List[str]]:
"""Align two sequences with a filler."""
out_a, out_b = [], []
seqmatcher = difflib.SequenceMatcher(a=seq_a, b=seq_b, autojunk=False)
for _, idx_a0, idx_a1, idx_b0, idx_b1 in seqmatcher.get_opcodes():
......
......@@ -117,10 +117,7 @@ def html_sidebyside(
change.append('<div class="change-bg change"></div>')
new.append(f'<div style="padding: 2px;" class="new">{right}</div>')
# return lines
return [
(old_i, change_i, new_i) for old_i, change_i, new_i in zip(old, change, new)
]
return list(zip(old, change, new))
def html_diffs(
......
......@@ -6,9 +6,15 @@ import regex as re
from lawinprogress.parsing.lawtree import LawTextNode
HTML_PATTERN = re.compile(r"<.*?>")
def clean_up_structured_string(string: str) -> str:
"""Remove table structure and replace by appropriate newlines for parsing."""
# remove footnote links
string = re.sub(r"<SUP.*?SUP\>", "", string)
# remove other structured tags
string = re.sub(r"<DL.*?>", "", string)
string = re.sub(r"</DL>", "", string)
strings = [
......@@ -25,7 +31,6 @@ def parse_source_law(source_law: List[dict], law_title: str) -> LawTextNode:
source_law_tree = LawTextNode(text=law_title, bulletpoint="Titel:")
source_law_tree._id = None
html_pattern = re.compile(r'<.*?>')
for law_item in source_law:
# find the parent node
parent_node = anytree.search.findall(
......@@ -65,11 +70,7 @@ def parse_source_law(source_law: List[dict], law_title: str) -> LawTextNode:
)
else:
# else just clean and add a new node
law_text = (
html_pattern.sub("", law_text)
if law_text
else "(weggefallen)"
)
law_text = HTML_PATTERN.sub("", law_text) if law_text else "(weggefallen)"
new_node = LawTextNode(
text=law_text,
......@@ -108,7 +109,9 @@ def parse_source_law_tree(text: str, source_node: LawTextNode) -> LawTextNode:
split_text = re.split(pattern, text)
for idx, match in enumerate(re.finditer(pattern, text)):
new_node = LawTextNode(
text=re.split("|".join(patterns), split_text[idx + 1].strip())[0],
text=HTML_PATTERN.sub(
"", re.split("|".join(patterns), split_text[idx + 1].strip())[0]
),
# store the text for this bullet point on this level
bulletpoint=text[match.span()[0] : match.span()[1]].strip(),
# apply the function recursively to get all levels
......
This diff is collapsed.
......@@ -89,8 +89,7 @@
<p>Gefördert von</p>
</div>
<div class="message-body content has-text-centered">
<img src="imgs/logo_prototypefund.png" alt="Prototype Fund Logo" width="100">
<img src="imgs/BMBF_gefoerdert_2021_en.png" alt="BMBF Logo" width="100">
<a href="https://prototypefund.de/en/project/law-in-progress/" target="_blank" rel="noopener noreferrer"><object alt="prototype fund logo" data="imgs/pf_funding_logos.svg" width="364" height="105"> </object></a>
</div>
</article>
</div>
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment