Loading python_ags4/AGS4.py +20 −18 Original line number Diff line number Diff line Loading @@ -97,6 +97,10 @@ def AGS4_to_dict(filepath_or_buffer, encoding='utf-8', get_line_numbers=False, r if _is_bytebuffer(line): line = line.decode(encoding) else: # Strip byte-order mark from line, if present line = _remove_byte_order_mark(line, encoding) line = list(csv.reader(StringIO(line), quotechar='"'))[0] if len(line) == 0: Loading Loading @@ -887,24 +891,6 @@ def check_file(filepath_or_buffer, standard_AGS4_dictionary=None, rename_duplica 'Could not complete validation. Please fix listed errors and try again.') ags_errors = check.add_error_msg(ags_errors, 'Validator Process Error', '-', '', str(err)) except UnboundLocalError as err: logger.exception(err) # The presence of a byte-order-mark (BOM) in the same row as first # "GROUP" line can cause this exception. This will be caught by line # checks for Rule 1 (since the BOM is not an ASCII character) and Rule 3 # (since the BOM precedes the string "GROUP"). The BOM encoding can be # ignored by setting the 'encoding' argument to 'utf-8-sig'. f.seek(0) tables, headings, line_numbers = AGS4_to_dataframe(f, encoding='utf-8-sig', get_line_numbers=True, rename_duplicate_headers=rename_duplicate_headers) # Add warning to error log msg = 'This file seems to be encoded with a byte-order-mark (BOM). It is highly recommended that the '\ 'file be saved without BOM encoding to avoid issues with other software.' ags_errors = check.add_error_msg(ags_errors, 'General', '', '', msg) except Exception as err: logger.exception(err) Loading Loading @@ -1171,6 +1157,22 @@ def _is_bytebuffer(obj): return False def _remove_byte_order_mark(string, encoding): """Remove byte-order mark from string. """ import codecs string_without_BOM = string.encode(encoding)\ .strip(codecs.BOM_UTF8)\ .strip(codecs.BOM)\ .strip(codecs.BOM_BE)\ .strip(codecs.BOM_LE)\ .decode(encoding) return string_without_BOM class AGS4Error(Exception): """Exception class for AGS4 parsing errors. """ Loading python_ags4/check.py +3 −0 Original line number Diff line number Diff line Loading @@ -372,6 +372,9 @@ def rule_1(line, line_number=0, ags_errors={}, encoding='utf-8'): msg = f"Has Non-ASCII character(s) (assuming that file encoding is '{encoding}') and/or a byte-order-mark (BOM)." add_error_msg(ags_errors, 'AGS Format Rule 1', line_number, '', msg) fyi_msg = "If a BOM is present, then it is highly recommended that the file be saved without BOM encoding to avoid issues with other software." add_error_msg(ags_errors, 'FYI (Related to Rule 1)', line_number, '', fyi_msg) else: msg = f"Has Non-ASCII character(s) (assuming that file encoding is '{encoding}')." add_error_msg(ags_errors, 'AGS Format Rule 1', line_number, '', msg) Loading tests/test_check.py +6 −4 Original line number Diff line number Diff line Loading @@ -764,12 +764,14 @@ def test_rule_AGS3(): def test_file_with_BOM(): error_list = AGS4.check_file('tests/test_files/File_with_BOM.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') msg1 = 'This file seems to be encoded with a byte-order-mark (BOM). It is highly recommended that the '\ msg1 = "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8') and/or a byte-order-mark (BOM)." assert msg1 in error_list['AGS Format Rule 1'][0]['desc'] msg2 = 'If a BOM is present, then it is highly recommended that the '\ 'file be saved without BOM encoding to avoid issues with other software.' msg2 = "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8') and/or a byte-order-mark (BOM)." assert msg1 in error_list['General'][1]['desc'] assert msg2 in error_list['AGS Format Rule 1'][0]['desc'] assert msg2 in error_list['FYI (Related to Rule 1)'][0]['desc'] def test_file_with_invalid_TRAN_AGS(): Loading Loading
python_ags4/AGS4.py +20 −18 Original line number Diff line number Diff line Loading @@ -97,6 +97,10 @@ def AGS4_to_dict(filepath_or_buffer, encoding='utf-8', get_line_numbers=False, r if _is_bytebuffer(line): line = line.decode(encoding) else: # Strip byte-order mark from line, if present line = _remove_byte_order_mark(line, encoding) line = list(csv.reader(StringIO(line), quotechar='"'))[0] if len(line) == 0: Loading Loading @@ -887,24 +891,6 @@ def check_file(filepath_or_buffer, standard_AGS4_dictionary=None, rename_duplica 'Could not complete validation. Please fix listed errors and try again.') ags_errors = check.add_error_msg(ags_errors, 'Validator Process Error', '-', '', str(err)) except UnboundLocalError as err: logger.exception(err) # The presence of a byte-order-mark (BOM) in the same row as first # "GROUP" line can cause this exception. This will be caught by line # checks for Rule 1 (since the BOM is not an ASCII character) and Rule 3 # (since the BOM precedes the string "GROUP"). The BOM encoding can be # ignored by setting the 'encoding' argument to 'utf-8-sig'. f.seek(0) tables, headings, line_numbers = AGS4_to_dataframe(f, encoding='utf-8-sig', get_line_numbers=True, rename_duplicate_headers=rename_duplicate_headers) # Add warning to error log msg = 'This file seems to be encoded with a byte-order-mark (BOM). It is highly recommended that the '\ 'file be saved without BOM encoding to avoid issues with other software.' ags_errors = check.add_error_msg(ags_errors, 'General', '', '', msg) except Exception as err: logger.exception(err) Loading Loading @@ -1171,6 +1157,22 @@ def _is_bytebuffer(obj): return False def _remove_byte_order_mark(string, encoding): """Remove byte-order mark from string. """ import codecs string_without_BOM = string.encode(encoding)\ .strip(codecs.BOM_UTF8)\ .strip(codecs.BOM)\ .strip(codecs.BOM_BE)\ .strip(codecs.BOM_LE)\ .decode(encoding) return string_without_BOM class AGS4Error(Exception): """Exception class for AGS4 parsing errors. """ Loading
python_ags4/check.py +3 −0 Original line number Diff line number Diff line Loading @@ -372,6 +372,9 @@ def rule_1(line, line_number=0, ags_errors={}, encoding='utf-8'): msg = f"Has Non-ASCII character(s) (assuming that file encoding is '{encoding}') and/or a byte-order-mark (BOM)." add_error_msg(ags_errors, 'AGS Format Rule 1', line_number, '', msg) fyi_msg = "If a BOM is present, then it is highly recommended that the file be saved without BOM encoding to avoid issues with other software." add_error_msg(ags_errors, 'FYI (Related to Rule 1)', line_number, '', fyi_msg) else: msg = f"Has Non-ASCII character(s) (assuming that file encoding is '{encoding}')." add_error_msg(ags_errors, 'AGS Format Rule 1', line_number, '', msg) Loading
tests/test_check.py +6 −4 Original line number Diff line number Diff line Loading @@ -764,12 +764,14 @@ def test_rule_AGS3(): def test_file_with_BOM(): error_list = AGS4.check_file('tests/test_files/File_with_BOM.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') msg1 = 'This file seems to be encoded with a byte-order-mark (BOM). It is highly recommended that the '\ msg1 = "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8') and/or a byte-order-mark (BOM)." assert msg1 in error_list['AGS Format Rule 1'][0]['desc'] msg2 = 'If a BOM is present, then it is highly recommended that the '\ 'file be saved without BOM encoding to avoid issues with other software.' msg2 = "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8') and/or a byte-order-mark (BOM)." assert msg1 in error_list['General'][1]['desc'] assert msg2 in error_list['AGS Format Rule 1'][0]['desc'] assert msg2 in error_list['FYI (Related to Rule 1)'][0]['desc'] def test_file_with_invalid_TRAN_AGS(): Loading