Commit f391acbc authored by Jan-Peter Ceglarek's avatar Jan-Peter Ceglarek

erased flake8 and pylint problems

parent 87352a87
Pipeline #129248954 failed with stages
in 4 minutes and 3 seconds
......@@ -8,14 +8,13 @@ import mlflow
import mlflow.tracking
import numpy as np
import pandas as pd
import xgboost as xgb
from fets.pipeline import FeatureUnion2DF
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
import xgboost as xgb
class FeatureImportanceOptimization(BaseEstimator, TransformerMixin):
......@@ -24,7 +23,7 @@ class FeatureImportanceOptimization(BaseEstimator, TransformerMixin):
def __init__(self, list_of_transformers, selection_methods):
def __init__(self, list_of_transformers):
""" The constructor will help parameterize all options of this
......@@ -38,7 +37,7 @@ class FeatureImportanceOptimization(BaseEstimator, TransformerMixin):
# List of best features finally selected after fit()
self.best_features = []
self.best_features = None
# Building eventually cascading pipelines
......@@ -254,7 +253,8 @@ class FeatureImportanceOptimization(BaseEstimator, TransformerMixin):
return dataset
def find_best_method(self, selection_methods, input_x, input_y):
def find_best_method(self, selection_methods, input_x, input_y,
""" Finding best feaute selection method by making predictions with
each selection method, logging their valdiation values and returning
the method name with the best result.
......@@ -272,37 +272,37 @@ class FeatureImportanceOptimization(BaseEstimator, TransformerMixin):
ml_path = "../mlflow"
# Create directory to store the MLflow experiment in
if (Path(ml_path).exists() is False):
if Path(ml_path).exists() is False:
# Establish experiment id for the whole MLflow Tracking so that it can
# be found in MLflow easily
exp_id = mlflow.create_experiment("feat_select_method_validation",
mlflow.create_experiment("feat_select_method_validation", ml_path)
# Prepare train and test data here, so every method is tested
# with the same set
X_train, x_test, Y_train, y_test = train_test_split(input_x,
x_train, x_test, y_train, y_test = train_test_split(input_x,
shuffle is False)
best_method = None
best_method = "fist_best" # for testing
# best_method = None
# best_method = "fist_best" # for testing
for method in selection_methods:
# Start experiment with each run being one feature selection method
mlflow.set_tags(method, rmse)
mlflow.set_tag("Feature Selection Method", method)
mlflow.set_tag("Validation Method", validation_method)
# Get feature list filtered by correspondig method
feature_list = self.filter_importances(list_of_fimp, method)
# Train a model with filtered dataset
self.models[-1].fit(X_train, Y_train)
self.models[-1].fit(x_train, y_train)
# Predict
y_test_prediction = model.predict(x_test)
y_test_prediction = self.models[-1].predict(x_test)
# Evaluation
rmse = mean_squared_error(y_test, y_test_prediction)
......@@ -316,7 +316,7 @@ class FeatureImportanceOptimization(BaseEstimator, TransformerMixin):
# ! #todo: get best prediction method/score from mlflow
# t o d o : get best prediction method/score from mlflow
# sort method_and_score_log by score value and pick top score
# best_method = $top_pick$
......@@ -366,9 +366,11 @@ class FeatureImportanceOptimization(BaseEstimator, TransformerMixin):
# selection_methods = ["first_best"]
# find best feature selecrion method
# right spot to call this not yet defined
find_best_method(selection_methods, input_dataset, input_y)
self.find_best_method(method, input_dataset, input_y, list_of_fimp)
return self
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment