Commit a7eb026d authored by Bryan Eriksson's avatar Bryan Eriksson
Browse files

Use openpyxl for .xlsx files

Removed extra row from many test .xlsx files
More checks for file type
parent fa8e1caf
Loading
Loading
Loading
Loading
Loading
+7 −5
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@ import argparse
import json
from contextlib import suppress

import pathlib
import xlrd
import openpyxl

@@ -69,9 +70,11 @@ class Converter:
        """
        self._excel_file = excel_file
        
        if self._excel_file.endswith('.xls'):
        # function to return the file extension
        file_extension = pathlib.Path(excel_file).suffix
        if file_extension == '.xls':
            self._kind = 'xls'
        elif self._excel_file.endswith('.xlsx'):
        elif file_extension == '.xlsx':
            self._kind = 'xlsx'
        else:
            raise FormatUnsupportedError(f'File must be of format ".xls" or ".xlsx".')
@@ -81,7 +84,6 @@ class Converter:
        else:
            self._file = openpyxl.load_workbook(excel_file)


    def _get_columns(self, sheet_name, start=0):
        if self._kind=='xls':
            sheet = self._file.sheet_by_name(sheet_name) # xlrd style
@@ -376,7 +378,7 @@ class _NewFormatConverter(Converter):
                    'type': series_type,
                    'stream': stream,
                    'units': units,
                    'data': [float(d) for d in data if d is not None],
                    'data': [float(d) for d in data if d is not None and d != ''],
                }

                if source:
+48 −12
Original line number Diff line number Diff line
@@ -5,6 +5,9 @@ The Excel file has to be in one of the supported formats, which examples can
be found in the data_formats directory.
"""
import xlrd
import openpyxl
import pathlib

from contextlib import suppress


@@ -18,13 +21,36 @@ def convert(excel_file):
class Converter:
    def __init__(self, excel_file):
        self._excel_file = excel_file
        
        # function to return the file extension
        file_extension = pathlib.Path(excel_file).suffix
        if file_extension == '.xls':
            self._kind = 'xls'
        elif file_extension == '.xlsx':
            self._kind = 'xlsx'
        else:
            raise FormatUnsupportedError(f'File must be of format ".xls" or ".xlsx".')
        
        if self._kind=='xls':
            self._file = xlrd.open_workbook(excel_file)
        else:
            self._file = openpyxl.load_workbook(excel_file)

    def _get_columns(self, sheet_name, start=0):
        sheet = self._file.sheet_by_name(sheet_name)

        if self._kind=='xls':
            sheet = self._file.sheet_by_name(sheet_name) # xlrd style
            for colx in range(start, sheet.ncols):
            yield sheet.col_values(colx)[1:] # slicing the first row
                yield sheet.col_values(colx)[1:]   # ipysheet didn't allow editable column names,
                                                    # so added an index row to sheets and slicing it off here
        elif self._kind=='xlsx':
            sheet = self._file[sheet_name] # openpyxl style
            for i,colx in enumerate(sheet.columns): # Different writing with openpyxl
                if i<start: continue; # Skip until start line is reached (generator can't skip earlier)
                
                column = [c.value if c.value is not None else '' for c in colx]
                if all([c=='' for c in column]): # openpyxl captures rows and rows of useless NaN.
                    continue;
                yield column[1:]

    def _get_capacities(self):
        raise NotImplementedError
@@ -44,8 +70,8 @@ class Converter:


class NewFormatConverter(Converter):
    def _get_columns(self, sheet_name, start=0):
        return super()._get_columns(sheet_name, start=1)
    def _get_columns(self, sheet_name, start=1):
        return super()._get_columns(sheet_name, start=start)

    def _get_capacities(self):
        capacities = []
@@ -129,11 +155,21 @@ class NewFormatConverter(Converter):
        return links

    def _get_network(self):
        sheet = self._file.sheet_by_name('Network')

        if self._kind=='xls':
            sheet = self._file.sheet_by_name('Network') # xlrd style
            return{
                'fixed_network_investment_cost': float(sheet.cell(1, 1).value),
                'link_proportional_cost': float(sheet.cell(2, 1).value), 
            } # shifting the sheets one row below required to change the row indices here
        else:
            sheet = self._file["Network"] # openpyxl style
            return{
                'fixed_network_investment_cost': float(sheet.cell(1, 1).value),
            'link_proportional_cost': float(sheet.cell(2, 1).value), } # shifting the sheets one row below required to change the row indices here
                'link_proportional_cost': float(sheet.cell(2, 1).value), 
            } # shifting the sheets one row below required to change the row indices here
        # sheet = self._file.get_sheet_by_name('Network')

        



+6.14 KiB (32.7 KiB)

File changed.

No diff preview for this file type.

Loading