Commit 9fdd9377 authored by Davide Liga's avatar Davide Liga

some fix

parents 6c6b9c44 862a3883
......@@ -12,10 +12,10 @@
## Usage
* download all the documents: python run.py --download
* to parse one document: python run.py --parse <docpath>
* to parse all the documents: python run.py --parseall
* to use with a GUI: python run.py --gui [--port: port_no]
* download all the documents: `python run.py --download`
* to parse one document: `python run.py --parse <filepath>`
* to parse all the documents: `python run.py --parseall`
* to use with a GUI: `python run.py --gui [--port: port_no]`
(it will return *.akn zip archives, also saved locally in
`keld/server/converted/`)
......
......@@ -46,7 +46,7 @@ def parse_main(main_text, document):
continue
m = engine.search(p)
if m is not None:
qualification, (qstart, qend), qterm = qualify_paragraph(p) # qualifier.qualify_paragraph(p)[0]
qualification, (qstart, qend), qterm = qualify_paragraph(p[m.end:].lstrip()) # qualifier.qualify_paragraph(p)[0]
if m.type == "numbers":
first_group = m.groups()[0].key
if first_group == "doc_num":
......
......@@ -9,6 +9,12 @@ from keld.body_parser import parse as parse_doc
from keld.doc_scraper import download_everything
from tqdm import tqdm
from keld.commons import TEST_DOCS_DIR
from keld.commons.logging import setup_logger
here = os.path.abspath(os.path.dirname(os.path.abspath(__file__)))
tqdm.monitor_interval = 0
logger = setup_logger(f"keld", os.path.join(here, f"keld.log"))
def parse(filepath, output_dir=None):
......@@ -61,7 +67,14 @@ def parse_all(batch_no=None):
if not docname.endswith((".doc", ".DOC")):
continue
docpath = os.path.join(TEST_DOCS_DIR, docname)
parse(docpath)
try:
parse(docpath)
except KeyboardInterrupt:
raise
except Exception as e:
# raise
logger.exception(f"Fatal error for {docname}")
tqdm.write(f">>>> Fatal error for {docname}: {e}")
pbar.set_description(docname)
pbar.close()
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment