Commit aa35d9bf authored by Bryan Eriksson's avatar Bryan Eriksson
Browse files

Use openpyxl for .xlsx files

Check for Excel file type and use xlrd or openpyxl accordingly
parent 6b5448e6
Loading
Loading
Loading
Loading
+42 −12
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@ import json
from contextlib import suppress

import xlrd
import openpyxl

from data_formats import request_format

@@ -67,14 +68,36 @@ class Converter:
            excel_file: The path to the excel file
        """
        self._excel_file = excel_file
        
        if self._excel_file.endswith('.xls'):
            self._kind = 'xls'
        elif self._excel_file.endswith('.xlsx'):
            self._kind = 'xlsx'
        else:
            raise FormatUnsupportedError(f'File must be of format ".xls" or ".xlsx".')
        
        if self._kind=='xls':
            self._file = xlrd.open_workbook(excel_file)
        else:
            self._file = openpyxl.load_workbook(excel_file)

    def _get_columns(self, sheet_name, start=0):
        sheet = self._file.sheet_by_name(sheet_name)

    def _get_columns(self, sheet_name, start=0):
        if self._kind=='xls':
            sheet = self._file.sheet_by_name(sheet_name) # xlrd style
            for colx in range(start, sheet.ncols):
                yield sheet.col_values(colx)[1:]   # ipysheet didn't allow editable column names,
                                                    # so added an index row to sheets and slicing it off here
        elif self._kind=='xlsx':
            sheet = self._file[sheet_name] # openpyxl style
            for i,colx in enumerate(sheet.columns): # Different writing with openpyxl
                if i<start: continue; # Skip until start line is reached (generator can't skip earlier)
                
                column = [c.value if c.value is not None else '' for c in colx]
                if all([c=='' for c in column]): # openpyxl captures rows and rows of useless NaN.
                    continue;
                yield column[1:]


    def _get_general(self):
        raise NotImplementedError
@@ -129,15 +152,22 @@ class Converter:


class _NewFormatConverter(Converter):
    def _get_columns(self, sheet_name, start=0):
    def _get_columns(self, sheet_name, start=1):
        # The first column holds all the names for the rows
        return super()._get_columns(sheet_name, start=1)
        return super()._get_columns(sheet_name, start=start)

    def _get_general(self):
        if self._kind == 'xls': # xlrd stype
            sheet = self._file.sheet_by_name('General')

            return {
            'interest_rate': float(sheet.cell(2, 1).value),  # since all sheets moved down one row, this has to be (2, 1)
                'interest_rate': float(sheet.cell(2,1).value),  # cell(2, 1) in xlrd as skip 1st row and col 
            }
        elif self._kind == 'xlsx': # openpyxl style
            sheet = self._file['General']

            return {
                'interest_rate': float(sheet.cell(3,2).value),  # true location cell(3,2) in openpyxl
            }

    def _get_capacities(self):