Commit 7a857034 authored by Asitha Senanayake's avatar Asitha Senanayake
Browse files

refactor: Remove rprint statements

parent 40f16710
Loading
Loading
Loading
Loading
+10 −56
Original line number Diff line number Diff line
@@ -64,8 +64,6 @@ def AGS4_to_dict(filepath_or_buffer, encoding='utf-8', get_line_numbers=False, r
        function.
    """

    from rich import print as rprint

    if _is_file_like(filepath_or_buffer):
        f = filepath_or_buffer
        f.seek(0)
@@ -105,7 +103,6 @@ def AGS4_to_dict(filepath_or_buffer, encoding='utf-8', get_line_numbers=False, r
                    msg = f"{group} group duplicated in Line {i}. Cannot parse file without overwriting data, "\
                           "therefore please combine all duplicate groups first."

                    rprint(f"[red]  ERROR: {msg}[/red]")
                    logger.error(msg)
                    raise AGS4Error(msg)

@@ -126,7 +123,6 @@ def AGS4_to_dict(filepath_or_buffer, encoding='utf-8', get_line_numbers=False, r
                    if rename_duplicate_headers is False:
                        raise AGS4Error(f"HEADER row in {group} (Line {i}) has duplicate entries")

                    rprint(f"[yellow]  WARNING: HEADER row in [bold]{group}[/bold] (Line {i}) has duplicate entries.[/yellow]")
                    logger.warning(f"HEADER row in {group} (Line {i}) has duplicate entries.")

                    # Rename duplicate headers by appending a number
@@ -142,9 +138,6 @@ def AGS4_to_dict(filepath_or_buffer, encoding='utf-8', get_line_numbers=False, r

                            temp[i] = temp[i]+'_'+str(item_count[item]['count'])

                            rprint(f'[blue]  INFO: Duplicate column {item} found and renamed as {item}_{count}.[/blue]')
                            rprint('[blue]        Automatically renamed columns do not conform to AGS4 Rules 19a and 19b.[/blue]')
                            rprint('[blue]        Therefore, please review the data and rename or drop duplicate columns as appropriate.[/blue]')
                            logger.info(f'Duplicate column {item} found and renamed as {item}_{count}. '
                                        'Automatically renamed columns do not conform to AGS4 Rules 19a and 19b. '
                                        'Therefore, please review the data and rename or drop duplicate columns as appropriate.')
@@ -170,7 +163,7 @@ def AGS4_to_dict(filepath_or_buffer, encoding='utf-8', get_line_numbers=False, r
                # Check whether line has the same number of entries as the
                # number of headings in the group. If not, print error and exit.
                if len(temp) != len(headings[group]):
                    rprint(f"[red]  Error: Line {i} does not have the same number of entries as the HEADING row in [bold]{group}[/bold].[/red]")
                    logger.error(f"Line {i} does not have the same number of entries as the HEADING row in {group}.")
                    raise AGS4Error(f"Line {i} does not have the same number of entries as the HEADING row in {group}.")

                for i in range(0, len(temp)):
@@ -282,7 +275,6 @@ def AGS4_to_excel(input_file, output_file, encoding='utf-8', rename_duplicate_he
    """

    from pandas import ExcelWriter
    from rich import print as rprint
    from openpyxl.utils import get_column_letter

    # Extract AGS4 file into a dictionary of dictionaries
@@ -295,7 +287,6 @@ def AGS4_to_excel(input_file, output_file, encoding='utf-8', rename_duplicate_he
                        'alphabetical': 'alphabetically',
                        'hierarchical': 'according to the hierarchy defined in the dictionary'}
        msg = f'WARNING: Worksheets in Excel file will be sorted {sorting_desc[sorting_strategy]}. The original group order will be lost.'
        rprint(f"[yellow]{msg}[/yellow]")
        logger.warning(f"{msg}")

        list_of_tables = sort_groups(tables, sorting_strategy=sorting_strategy)
@@ -305,20 +296,20 @@ def AGS4_to_excel(input_file, output_file, encoding='utf-8', rename_duplicate_he

    # Exit if there is no AGS4 tables in the input file
    if len(list_of_tables) == 0:
        rprint('[red]  ERROR: No valid AGS4 data found in input file.[/red]')
        logger.error('No valid AGS4 data found in input file.')
        raise AGS4Error('No valid AGS4 data found in input file.')

    # Write to Excel file
    with ExcelWriter(output_file, engine='openpyxl') as writer:
        for key in list_of_tables:
            rprint(f'[green]Writing data from... [bold]{key}[/bold][/green]')
            logger.info(f'Writing data from... {key}')

            # Check table size and issue warning for large files that could crash the program
            if 25000 < tables[key].shape[0] < 100000:
                rprint(f'[blue]  INFO: {key} has {tables[key].shape[0]} rows, so it will take about a minute to export.[/blue]')
                logger.info(f'{key} has {tables[key].shape[0]} rows, so it will take about a minute to export.')
            elif tables[key].shape[0] > 100000:
                rprint(f'[yellow]  WARNING: {key} has {tables[key].shape[0]} rows, so it may take a few minutes to export.[/yellow]')
                rprint('[yellow]           The program will terminate if it runs out of memory in the process.[/yellow]')
                logger.warning(f'{key} has {tables[key].shape[0]} rows, so it may take a few minutes to export. '
                               'The program will terminate if it runs out of memory in the process.')

            tables[key].to_excel(writer, sheet_name=key, index=False)

@@ -362,8 +353,6 @@ def dataframe_to_AGS4(tables, headings, filepath, mode='w', index=False, encodin
    None
    """

    from rich import print as rprint

    # Open file and write/append data
    with open(filepath, mode, newline='', encoding=encoding) as f:
        for key in tables:
@@ -384,16 +373,10 @@ def dataframe_to_AGS4(tables, headings, filepath, mode='w', index=False, encodin
                df.loc[mask, :] = df.loc[mask, :].apply(lambda x: x.str.replace('""', '"'))

            # Write table to file
            rprint(f'[green]Writing data from... [bold]{key}[/bold][green]')
            logger.info(f'Writing data from... {key}')
            f.write('"GROUP"'+","+'"'+key+'"'+'\r\n')

            if key not in headings:
                if warnings is True:
                    rprint(f"[yellow]  WARNING: Input 'headings' dictionary does not have an entry named [bold]{key}[/bold].[/yellow]")
                    rprint(f"[italic yellow]           All columns in the {key} table will be exported in the default order.[/italic yellow]")
                    rprint("[italic yellow]           Please check column order and ensure AGS4 Rule 7 is still satisfied.[/italic yellow]")

                logger.warning(f"Input 'headings' dictionary does not have an entry named {key}. "
                               f"All columns in the {key} table will be exported in the default order. "
                               "Please check column order and ensure AGS4 Rule 7 is still satisfied.")
@@ -412,10 +395,6 @@ def dataframe_to_AGS4(tables, headings, filepath, mode='w', index=False, encodin
                missing_cols = set(headings[key]).difference(set(df.columns))
                columns = [x for x in headings[key] if x not in missing_cols]

                if warnings is True:
                    rprint(f"[yellow]  WARNING: Columns {', '.join(missing_cols)} not found in the {key} table"
                           " although they are in the headings dictionary..[/yellow]")

                logger.warning(f"Columns {', '.join(missing_cols)} not found in the {key} table although they are in the headings dictionary.")

                df.to_csv(f, index=index, quoting=1, columns=columns, lineterminator='\r\n', encoding=encoding)
@@ -450,7 +429,6 @@ def excel_to_AGS4(input_file, output_file, format_numeric_columns=True, dictiona
    """

    from pandas import read_excel
    from rich import print as rprint

    # Read data from Excel file in to a dictionary of dataframes
    tables = read_excel(input_file, sheet_name=None, engine='openpyxl')
@@ -464,13 +442,11 @@ def excel_to_AGS4(input_file, output_file, format_numeric_columns=True, dictiona
        if 'HEADING' in df:
            valid_tables.append(key)
        else:
            rprint(f'[yellow]  WARNING: Worksheet [bold]{key}[/bold] dropped as it does not have a HEADING column.[/yellow]')
            logger.warning(f'Worksheet {key} dropped as it does not have a HEADING column.')
            continue

        # List column names that don't conform to Rule 19 (using a negative look-ahead regex)
        for col_name in df.filter(regex=r'^(?!HEADING|^[A-Z0-9]{4}_[A-Z0-9]{1,4}$)', axis='columns'):
            rprint(f'[yellow]  WARNING: Column [bold]{col_name}[/bold] dropped as name does not conform to AGS4 Rule 19.[/yellow]')
            logger.warning(f'Column {col_name} dropped as name does not conform to AGS4 Rule 19.')

        # Drop columns that don't conform to Rule 19
@@ -481,13 +457,11 @@ def excel_to_AGS4(input_file, output_file, format_numeric_columns=True, dictiona

        # Finally format numeric column if required
        if format_numeric_columns is True:
            rprint(f'[green]Formatting columns in... [bold]{key}[/bold][/green]')
            logger.info(f'Formatting columns in... {key}')
            tables[key] = convert_to_text(df, dictionary=dictionary)

    # Export dictionary of DataFrames to AGS4 file
    if len(valid_tables) == 0:
        rprint('[red]  ERROR: No valid AGS4 data found in input file. Please see warning messages above.[/red]')
        logger.warning('No valid AGS4 data found in input file. Please see warning messages above.')
    else:
        dataframe_to_AGS4({key: tables[key] for key in valid_tables}, {}, output_file, warnings=False)
@@ -562,7 +536,6 @@ def convert_to_text(dataframe, dictionary=None):
    """

    from python_ags4 import check
    from rich import print as rprint

    # Make copy of dataframe and reset index to make sure numbering
    # starts from zero
@@ -579,8 +552,6 @@ def convert_to_text(dataframe, dictionary=None):
                df = format_numeric_column(df, col, TYPE)

        else:
            rprint("[red]  ERROR: Cannot convert to text as UNIT and/or TYPE row(s) are missing.")
            rprint("[red]         Please provide dictonary file or add UNIT & TYPE rows to input file to proceed.[/red]")
            logger.error('Cannot convert to text as UNIT and/or TYPE row(s) are missing. '
                         'Please provide dictonary file or add UNIT & TYPE rows to input file to proceed.')
            raise AGS4Error("Cannot convert to text as UNIT and/or TYPE row(s) are missing. "
@@ -637,7 +608,6 @@ def convert_to_text(dataframe, dictionary=None):
                    df = format_numeric_column(df, col, TYPE)

                except IndexError:
                    rprint(f"[yellow]  WARNING: [bold]{col}[/bold] not found in the dictionary file.[/yellow]")
                    logger.warning(f'{col} not found in the dictionary file.')

    return df.sort_index().reset_index(drop=True)
@@ -661,8 +631,6 @@ def format_numeric_column(dataframe, column_name, TYPE):
        Dataframe with formatted data.
    '''

    from rich import print as rprint

    df = dataframe.copy()
    col = column_name

@@ -692,11 +660,9 @@ def format_numeric_column(dataframe, column_name, TYPE):
            pass

    except ValueError:
        rprint(f"[yellow]  WARNING: Numeric data in [bold]{col:<9}[/bold] not reformatted as it had one or more non-numeric entries.[/yellow]")
        logger.warning(f"Numeric data in {col:<9} not reformatted as it had one or more non-numeric entries.")

    except TypeError:
        rprint(f"[yellow]  WARNING: Numeric data in [bold]{col:<9}[/bold] not reformatted as it had one or more non-numeric entries.[/yellow]")
        logger.warning(f"Numeric data in {col:<9} not reformatted as it had one or more non-numeric entries.")

    return df
@@ -750,7 +716,6 @@ def check_file(filepath_or_buffer, standard_AGS4_dictionary=None, rename_duplica
    """

    from python_ags4 import check
    from rich import print as rprint
    import traceback

    ags_errors = {}
@@ -787,7 +752,7 @@ def check_file(filepath_or_buffer, standard_AGS4_dictionary=None, rename_duplica

        logger.info('Checking lines...')
        if print_output:
            rprint('[green]  Checking lines...[/green]')
            logger.info('Checking lines...')

        for i, line in enumerate(f, start=1):

@@ -835,16 +800,12 @@ def check_file(filepath_or_buffer, standard_AGS4_dictionary=None, rename_duplica

        # Import data into Pandas dataframes to run group checks
        logger.info('Loading tables...')
        if print_output:
            rprint('[green]  Loading tables...[/green]')

        f.seek(0)
        tables, headings, line_numbers = AGS4_to_dataframe(f, get_line_numbers=True, rename_duplicate_headers=rename_duplicate_headers)

        # Group Checks
        logger.info('Checking headings and groups...')
        if print_output:
            rprint('[green]  Checking headings and groups...[/green]')

        ags_errors = check.rule_2(tables, headings, line_numbers, ags_errors=ags_errors)
        ags_errors = check.rule_2b(tables, headings, line_numbers, ags_errors=ags_errors)
@@ -878,8 +839,6 @@ def check_file(filepath_or_buffer, standard_AGS4_dictionary=None, rename_duplica
        dictionary = check.combine_DICT_tables(tables_std_dict, tables)

        logger.info('Checking file schema...')
        if print_output:
            rprint('[green]  Checking file schema...[/green]')

        ags_errors = check.rule_7_2(headings, dictionary, line_numbers, ags_errors=ags_errors)
        ags_errors = check.rule_9(headings, dictionary, line_numbers, ags_errors=ags_errors)
@@ -928,8 +887,6 @@ def check_file(filepath_or_buffer, standard_AGS4_dictionary=None, rename_duplica

    except Exception as err:
        logger.exception(err)
        if print_output:
            rprint(f'[red]\n{traceback.format_exc()}[/red]')

        ags_errors = check.add_error_msg(ags_errors, 'General', '-', '',
                                         'Could not complete validation. Please fix listed errors and try again.')
@@ -966,7 +923,6 @@ def write_error_report(ags_errors, output_file, show_warnings=False, show_fyi=Fa
    None
    '''

    from rich import print as rprint
    import textwrap

    error_count, warnings_count, fyi_count = count_errors(ags_errors)
@@ -1045,11 +1001,11 @@ def write_error_report(ags_errors, output_file, show_warnings=False, show_fyi=Fa
                        f.write(f'''  Line {entry['line']:<8} {entry['group'].strip('"'):<7} {entry['desc']}\r\n''')
                    f.write('\r\n')

        rprint(f'\n[yellow]Error report saved in {output_file}[/yellow]\n')
        logger.info(f'Error report saved in {output_file}')

    except FileNotFoundError:
        rprint('[red]\nERROR: Invalid output file path. Error report could not be saved.[/red]')
        rprint('[red]       Please ensure that the specified directory exists.[/red]')
        logger.error('Invalid output file path. Error report could not be saved. '
                     'Please ensure that the specified directory exists.')

    except TypeError:
        # Nothing to do if output_file is None
@@ -1104,7 +1060,6 @@ def sort_groups(tables, sorting_strategy='dictionary'):
    """

    from .check import pick_standard_dictionary, combine_DICT_tables
    from rich import print as rprint

    # Combine standard dictionary with DICT table in input file to create an extended dictionary
    # This extended dictionary is used to check the table order
@@ -1146,7 +1101,6 @@ def sort_groups(tables, sorting_strategy='dictionary'):
    for item in sorted(set(tables.keys()).difference(set(sorted_tables.keys()))):
        msg = f'WARNING:Table {item} appended to the end as it was either not found in the dictionary '\
              'or its parent group is not defined under DICT_PGRP.'
        rprint(f"[yellow]{msg}[/yellow]")
        logger.warning(f"{msg}")
        sorted_tables[item] = tables[item]

+0 −10
Original line number Diff line number Diff line
@@ -96,7 +96,6 @@ def combine_DICT_tables(*ags_tables):

    from pandas import DataFrame, concat
    from .AGS4 import AGS4Error
    from rich import print as rprint

    # Initialize DataFrame to hold all dictionary entries
    master_DICT = DataFrame()
@@ -108,15 +107,12 @@ def combine_DICT_tables(*ags_tables):

        except KeyError:
            # KeyError if there is no DICT table in an input file
            rprint('[yellow]  WARNING: DICT group not found in input file.[/yellow]')
            logger.warning('DICT group not found in input file.')

    # Check whether master_DICT is empty
    if master_DICT.shape[0] == 0:
        msg = 'No DICT groups available to proceed with checking. '\
              'Please ensure the input file has a DICT group or provide file with standard AGS4 dictionary.'

        rprint(f'[red]  ERROR: {msg}[/red]')
        logger.error(msg)

        raise AGS4Error(msg)
@@ -198,7 +194,6 @@ def pick_standard_dictionary(tables=None, dict_version=None):
    """

    from pathlib import Path
    from rich import print as rprint

    # Select standard dictionary based on TRAN_AGS
    try:
@@ -210,27 +205,22 @@ def pick_standard_dictionary(tables=None, dict_version=None):
            path_to_standard_dictionary = Path(__file__).parent / STANDARD_DICT_FILES[dict_version]

        else:
            rprint('[yellow]  WARNING: Standard dictionary for AGS4 version specified in TRAN_AGS not available.[/yellow]')
            rprint(f'[yellow]           Defaulting to standard dictionary v{LATEST_DICT_VERSION}.[/yellow]')
            logger.warning('Standard dictionary for AGS4 version specified in TRAN_AGS not available. '
                           f'Defaulting to standard dictionary v{LATEST_DICT_VERSION}.')
            path_to_standard_dictionary = Path(__file__).parent / STANDARD_DICT_FILES[LATEST_DICT_VERSION]

    except KeyError:
        # TRAN table not in file
        rprint(f'[yellow]  WARNING: TRAN_AGS not found. Defaulting to standard dictionary v{LATEST_DICT_VERSION}.[/yellow]')
        logger.warning(f'TRAN_AGS not found. Defaulting to standard dictionary v{LATEST_DICT_VERSION}.')
        path_to_standard_dictionary = Path(__file__).parent / STANDARD_DICT_FILES[LATEST_DICT_VERSION]

    except IndexError:
        # No DATA rows in TRAN table
        rprint(f'[yellow]  WARNING: TRAN_AGS not found. Defaulting to standard dictionary v{LATEST_DICT_VERSION}.[/yellow]')
        logger.warning(f'TRAN_AGS not found. Defaulting to standard dictionary v{LATEST_DICT_VERSION}.')
        path_to_standard_dictionary = Path(__file__).parent / STANDARD_DICT_FILES[LATEST_DICT_VERSION]

    except TypeError:
        # TRAN table not found and dict_version not valid
        rprint(f'[yellow]  WARNING: Neither TRAN_AGS nor dict_version is valid. Defaulting to standard dictionary v{LATEST_DICT_VERSION}.[/yellow]')
        logger.warning(f'TRAN_AGS not found. Defaulting to standard dictionary v{LATEST_DICT_VERSION}.')
        path_to_standard_dictionary = Path(__file__).parent / STANDARD_DICT_FILES[LATEST_DICT_VERSION]