Commit a8a88852 authored by Bryan Eriksson's avatar Bryan Eriksson
Browse files

Merge branch '4-excel-xlsx-file-not-supported' into 'master'

Resolve "Excel xlsx file; not supported"

Closes #4

See merge request !10
parents 1b5ed539 377c3203
Loading
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -2,6 +2,9 @@
__pycache__/
*.pckl

# Notebooks
.ipynb_checkpoints/

# PyCharm
.idea/

+1 −1
Original line number Diff line number Diff line
@@ -4,7 +4,7 @@ variables:
  PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
  GLPK_CACHE_DIR: "$CI_PROJECT_DIR/.chache/glpk"

  LD_LIBRARY_PATH: "/lib:/usr/lib:/usr/local/lib"
  LD_LIBRARY_PATH: "/usr/lib:/usr/local/lib"

cache:
  paths:
+40 −13
Original line number Diff line number Diff line
@@ -18,7 +18,9 @@ import argparse
import json
from contextlib import suppress

import pathlib
import xlrd
import openpyxl

from data_formats import request_format

@@ -67,14 +69,32 @@ class Converter:
            excel_file: The path to the excel file
        """
        self._excel_file = excel_file

        self._file_extension = pathlib.Path(excel_file).suffix
        if self._file_extension not in [".xls", ".xlsx"]:
            raise FormatUnsupportedError(f'File must be of format ".xls" or ".xlsx".')
        
        if self._file_extension=='.xls':
            self._file = xlrd.open_workbook(excel_file)
        else:
            self._file = openpyxl.load_workbook(excel_file)

    def _get_columns(self, sheet_name, start=0):
        sheet = self._file.sheet_by_name(sheet_name)

        if self._file_extension=='.xls':
            sheet = self._file.sheet_by_name(sheet_name) # xlrd style
            for colx in range(start, sheet.ncols):
                yield sheet.col_values(colx)[1:]   # ipysheet didn't allow editable column names,
                                                    # so added an index row to sheets and slicing it off here
        else:
            sheet = self._file[sheet_name] # openpyxl style
            for i,colx in enumerate(sheet.columns): # Different writing with openpyxl
                if i<start: continue; # Skip until start line is reached (generator can't skip earlier)
                
                column = [c.value if c.value is not None else '' for c in colx]
                if all([c=='' for c in column]): # openpyxl captures rows and rows of useless NaN.
                    continue;
                yield column[1:]


    def _get_general(self):
        raise NotImplementedError
@@ -129,15 +149,22 @@ class Converter:


class _NewFormatConverter(Converter):
    def _get_columns(self, sheet_name, start=0):
    def _get_columns(self, sheet_name, start=1):
        # The first column holds all the names for the rows
        return super()._get_columns(sheet_name, start=1)
        return super()._get_columns(sheet_name, start=start)

    def _get_general(self):
        if self._file_extension=='.xls': # xlrd stype
            sheet = self._file.sheet_by_name('General')

            return {
            'interest_rate': float(sheet.cell(2, 1).value),  # since all sheets moved down one row, this has to be (2, 1)
                'interest_rate': float(sheet.cell(2,1).value),  # cell(2, 1) in xlrd as skip 1st row and col 
            }
        else: # openpyxl style
            sheet = self._file['General']

            return {
                'interest_rate': float(sheet.cell(3,2).value),  # true location cell(3,2) in openpyxl
            }

    def _get_capacities(self):
@@ -346,7 +373,7 @@ class _NewFormatConverter(Converter):
                    'type': series_type,
                    'stream': stream,
                    'units': units,
                    'data': [float(d) for d in data if d is not None],
                    'data': [float(d) for d in data if d is not None and d != ''],
                }

                if source:
+42 −12
Original line number Diff line number Diff line
@@ -5,6 +5,9 @@ The Excel file has to be in one of the supported formats, which examples can
be found in the data_formats directory.
"""
import xlrd
import openpyxl
import pathlib

from contextlib import suppress


@@ -18,13 +21,31 @@ def convert(excel_file):
class Converter:
    def __init__(self, excel_file):
        self._excel_file = excel_file

        self._file_extension = pathlib.Path(excel_file).suffix
        if self._file_extension not in [".xls", ".xlsx"]:
            raise FormatUnsupportedError(f'File must be of format ".xls" or ".xlsx".')
        
        if self._file_extension=='.xls':
            self._file = xlrd.open_workbook(excel_file)
        else:
            self._file = openpyxl.load_workbook(excel_file)

    def _get_columns(self, sheet_name, start=0):
        sheet = self._file.sheet_by_name(sheet_name)

        if self._file_extension=='.xls':
            sheet = self._file.sheet_by_name(sheet_name) # xlrd style
            for colx in range(start, sheet.ncols):
            yield sheet.col_values(colx)[1:] # slicing the first row
                yield sheet.col_values(colx)[1:]   # ipysheet didn't allow editable column names,
                                                    # so added an index row to sheets and slicing it off here
        else:
            sheet = self._file[sheet_name] # openpyxl style
            for i,colx in enumerate(sheet.columns): # Different writing with openpyxl
                if i<start: continue; # Skip until start line is reached (generator can't skip earlier)
                
                column = [c.value if c.value is not None else '' for c in colx]
                if all([c=='' for c in column]): # openpyxl captures rows and rows of useless NaN.
                    continue;
                yield column[1:]

    def _get_capacities(self):
        raise NotImplementedError
@@ -44,8 +65,8 @@ class Converter:


class NewFormatConverter(Converter):
    def _get_columns(self, sheet_name, start=0):
        return super()._get_columns(sheet_name, start=1)
    def _get_columns(self, sheet_name, start=1):
        return super()._get_columns(sheet_name, start=start)

    def _get_capacities(self):
        capacities = []
@@ -129,11 +150,20 @@ class NewFormatConverter(Converter):
        return links

    def _get_network(self):
        sheet = self._file.sheet_by_name('Network')

        if self._file_extension=='.xls':
            sheet = self._file.sheet_by_name('Network') # xlrd style
            return{
                'fixed_network_investment_cost': float(sheet.cell(1, 1).value),
            'link_proportional_cost': float(sheet.cell(2, 1).value), } # shifting the sheets one row below required to change the row indices here
                'link_proportional_cost': float(sheet.cell(2, 1).value), 
            } # shifting the sheets one row below required to change the row indices here
        else:
            sheet = self._file["Network"] # openpyxl style
            return{
                'fixed_network_investment_cost': float(sheet.cell(2, 2).value),
                'link_proportional_cost': float(sheet.cell(3, 2).value), 
            } # shifting the sheets one row below required to change the row indices here

        



Loading