Commit 8dc9ae1e authored by Asitha Senanayake's avatar Asitha Senanayake
Browse files

Add functionality to summarize data

parent 03a08b4f
Loading
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -897,6 +897,10 @@ def check_file(filepath_or_buffer, standard_AGS4_dictionary=None, rename_duplica
        # Warnings
        ags_errors = check.warning_16_1(tables, headings, tables_std_dict['ABBR'], ags_errors=ags_errors)

        # Add summary of data
        for val in check.get_data_summary(tables):
            ags_errors = check.add_error_msg(ags_errors, 'Summary of data', '', '', val)

    except AGS4Error as err:
        if print_output:
            logger.exception(err)
@@ -1003,6 +1007,14 @@ def write_error_report(ags_errors, output_file, show_warnings=False, show_fyi=Fa
                    f.write(f'''\r\n  {msg}\r\n''')
                f.write('\r\n')

            # Print 'Summary of data' if present
            if 'Summary of data' in ags_errors.keys():
                f.write('Summary of data:\r\n')
                for entry in ags_errors['Summary of data']:
                    msg = '\r\n  '.join(textwrap.wrap(entry['desc'], width=100))
                    f.write(f'''  {msg}\r\n''')
                f.write('\r\n')

            # Write other AGS Format error messages
            for key in [x for x in ags_errors if 'AGS Format Rule' in x]:
                f.write(f'{key}:\r\n')
+8 −0
Original line number Diff line number Diff line
@@ -318,6 +318,14 @@ def print_to_screen(ags_errors, show_warnings=False, show_fyi=False):
                console.print(f'''  {msg}''')
                console.print('')

        # Print 'Summary of data' if present
        if 'Summary of data' in ags_errors.keys():
            console.print('[underline]Summary of data[/underline]:')
            for entry in ags_errors['Summary of data']:
                msg = '\r\n  '.join(textwrap.wrap(entry['desc'], width=100))
                console.print(f'''  {msg}''')
            console.print('')

        # Write other AGS Format error messages
        for key in [x for x in ags_errors if 'AGS Format Rule' in x]:
            console.print(f'''[white underline]{key}[/white underline]:''')
+40 −0
Original line number Diff line number Diff line
@@ -276,6 +276,46 @@ def add_meta_data(filepath_or_buffer, standard_dictionary, ags_errors={}, encodi
    return ags_errors


def get_data_summary(tables):
    '''Get summary of data in an AGS4 file.

    Parameters
    ----------
    tables : dict of dataframes
      Dictionary of Pandas dataframes (output from 'AGS4_to_dataframe()')

    Returns
    -------
    list
    '''

    summary = []

    # Count and list groups in file
    summary.append(f"{len(tables.keys())} groups identified in file: {' '.join(tables.keys())}")

    # Count and list groups without data rows
    temp = []
    for key in tables.keys():
        if tables[key].query(" HEADING.eq('DATA') ").shape[0] == 0:
            temp.append(key)

    if len(temp):
        summary.append(f"{len(temp)} group(s) do not have any data: {' '.join(temp)}")

    # Count data rows in specified gorups
    for key in ['LOCA']:
        if key in tables.keys():
            N = tables[key].query(" HEADING.eq('DATA') ").shape[0]
            summary.append(f"{N} data row(s) in {key} group")

    # List optional groups
    summary.append(f"Optional DICT group present? {'DICT' in tables.keys()}")
    summary.append(f"Optional FILE group present? {'FILE' in tables.keys()}")

    return summary


def is_ags_ascii(s):
    '''Check if character is in the "extended" ASCII set.

+7 −0
Original line number Diff line number Diff line
@@ -772,3 +772,10 @@ def test_duplicate_groups_raises_error():
    assert 'AGS Format Rule ?' in error_list.keys()
    msg = 'SAMP group duplicated in Line 42. Cannot parse file without overwriting data, therefore please combine all duplicate groups first.'
    assert error_list['AGS Format Rule ?'][0]['desc'] == msg


def test_data_summary():
    error_list = AGS4.check_file('tests/test_files/4.1-rule2.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags')

    assert 'Summary of data' in error_list.keys()
    assert error_list['Summary of data'][0]['desc'] == '7 groups identified in file: PROJ ABBR TRAN TYPE UNIT LOCA SAMP'