# Importing Libraries:

import pandas as pd
import numpy as np

from scipy.stats import shapiro, normaltest, kurtosis, skew
from statsmodels.stats.diagnostic import lilliefors
from statsmodels.api import qqplot

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, KFold, cross_validate
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.metrics import r2_score, mean_squared_error

from sklearn.linear_model import Lasso, Ridge
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor

from sklearn.model_selection import GridSearchCV
from sklearn.base import clone
from sklearn.feature_selection import SelectKBest, f_regression, RFECV

import joblib
import os


# Importing data:

df_raw = pd.read_csv("Solar_Power_Plant_Data.csv")


# Copy of the dataset:

df = df_raw.copy()


# Looking at the first 5 rolls:

df.head()


# Information about the dataset:

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8760 entries, 0 to 8759
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Date-Hour(NMT)       8760 non-null   object 
 1   WindSpeed            8760 non-null   float64
 2   Sunshine             8760 non-null   int64  
 3   AirPressure          8760 non-null   float64
 4   Radiation            8760 non-null   float64
 5   AirTemperature       8760 non-null   float64
 6   RelativeAirHumidity  8760 non-null   int64  
 7   SystemProduction     8760 non-null   float64
dtypes: float64(5), int64(2), object(1)
memory usage: 547.6+ KB


# Missing data:

df.isna().sum()

Date-Hour(NMT)         0
WindSpeed              0
Sunshine               0
AirPressure            0
Radiation              0
AirTemperature         0
RelativeAirHumidity    0
SystemProduction       0
dtype: int64


# Changing Date-Hour(NMT) column type:

df['Date-Hour(NMT)'] = pd.to_datetime(df['Date-Hour(NMT)'], format="%d.%m.%Y-%H:%M")

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8760 entries, 0 to 8759
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Date-Hour(NMT)       8760 non-null   datetime64[ns]
 1   WindSpeed            8760 non-null   float64       
 2   Sunshine             8760 non-null   int64         
 3   AirPressure          8760 non-null   float64       
 4   Radiation            8760 non-null   float64       
 5   AirTemperature       8760 non-null   float64       
 6   RelativeAirHumidity  8760 non-null   int64         
 7   SystemProduction     8760 non-null   float64       
dtypes: datetime64[ns](1), float64(5), int64(2)
memory usage: 547.6 KB


# Setting the Date-Hour(NUMT) as the index:

df.set_index("Date-Hour(NMT)", inplace=True)


# Let's change the type of the numeric variables:

df['Sunshine'] = df['Sunshine'].astype("int16")
df['RelativeAirHumidity'] = df['RelativeAirHumidity'].astype("int16")
df['WindSpeed'] = df['WindSpeed'].astype("float32")
df['Radiation'] = df['Radiation'].astype("float32")
df['AirTemperature'] = df['AirTemperature'].astype("float32")


# Information about the dataset:

df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 8760 entries, 2017-01-01 00:00:00 to 2017-12-31 23:00:00
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   WindSpeed            8760 non-null   float32
 1   Sunshine             8760 non-null   int16  
 2   AirPressure          8760 non-null   float64
 3   Radiation            8760 non-null   float32
 4   AirTemperature       8760 non-null   float32
 5   RelativeAirHumidity  8760 non-null   int16  
 6   SystemProduction     8760 non-null   float64
dtypes: float32(3), float64(2), int16(2)
memory usage: 342.2 KB


# Descriptive Statistics:

df.describe()


# Range of datetime column:

print(f"Range of Datetime column: ({df.index.min()}) to ({df.index.max()})")

Range of Datetime column: (2017-01-01 00:00:00) to (2017-12-31 23:00:00)


# Let's see how many data points have negative Radiation:

df[df["Radiation"] < 0].shape

(4464, 7)


# Analysing average monthly Radiation and Sunshine:

df["Months"] = df.index.month_name()
df["Month_number"] = df.index.month
monthly_rad_sun = df.groupby(["Month_number", "Months"]).agg({"Radiation":"mean", "Sunshine":"mean"}).\
    droplevel(level="Month_number")


# Line plot of the Average monthly Radiation and Sushine

fig, ax = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(13, 5))
fig.suptitle("Average monthly Radiation and Sunshine")
plt.xticks(rotation=45)

sns.lineplot(monthly_rad_sun["Radiation"], ax=ax[0])
sns.lineplot(monthly_rad_sun["Sunshine"], ax=ax[1], c="r");


# Dropping Months and Month_number columns:

df.drop(columns=["Months", "Month_number"], inplace=True)


# Histograms:

fig, axes = plt.subplots(nrows=3, ncols=3, sharey=False, figsize=(20, 15))
fig.suptitle("Histograms of the Variables")

sns.histplot(df["SystemProduction"].values, ax=axes[0, 0], kde=True)
axes[0, 0].set_title("SystemProduction")

sns.histplot(df["WindSpeed"].values, ax=axes[0, 1], kde=True)
axes[0, 1].set_title("WindSpeed")

sns.histplot(df["Sunshine"].values, ax=axes[0][2], kde=True)
axes[0][2].set_title("Sunshine")

sns.histplot(df["RelativeAirHumidity"].values, ax=axes[1, 0], kde=True)
axes[1, 0].set_title("RelativeAirHumidity")

sns.histplot(df["Radiation"].values, ax=axes[1, 1], kde=True, legend=False)
axes[1, 1].set_title("Radiation")

sns.histplot(df["AirTemperature"].values, ax=axes[1, 2], kde=True)
axes[1, 2].set_title("AirTemperature")

sns.histplot(df["AirPressure"].values, ax=axes[2, 0], kde=True)
axes[2, 0].set_title("AirPressure");

axes[2, 1].set_visible(False)
axes[2, 2].set_visible(False)


# BoxPlots:

fig, ax = plt.subplots(3, 3, figsize=(20, 15), sharey=False)
fig.suptitle("Boxplots")

sns.boxplot(y=df["SystemProduction"].values, ax=ax[0, 0])
ax[0, 0].set_xlabel("SystemProduction")

sns.boxplot(y=df["WindSpeed"].values, ax=ax[0, 1])
ax[0, 1].set_xlabel("WindSpeed")

sns.boxplot(y=df["Sunshine"].values, ax=ax[0, 2])
ax[0, 2].set_xlabel("Sunshine")

sns.boxplot(y=df["RelativeAirHumidity"].values, ax=ax[1, 0])
ax[1, 0].set_xlabel("RelativeAirHumidity")

sns.boxplot(y=df["Radiation"].values, ax=ax[1, 1])
ax[1, 1].set_xlabel("Radiation")

sns.boxplot(y=df["AirTemperature"].values, ax=ax[1, 2])
ax[1, 2].set_xlabel("AirTemperature")

sns.boxplot(y=df["AirPressure"].values, ax=ax[2, 0])
ax[2, 0].set_xlabel("AirPressure");

ax[2, 1].set_visible(False)
ax[2, 2].set_visible(False)


# QQ Plots:

# Defining subplots:
fig, axe = plt.subplots(3, 3, sharey=False, figsize=(20, 20))
fig.suptitle("Quantile-Quantile Plots")

# Plotting SystemProduction data:
qqplot(df["SystemProduction"], ax=axe[0, 0], line="s");
axe[0, 0].set_title("SystemProduction")

# Plotting WindSpeed data:
qqplot(df["WindSpeed"], ax=axe[0, 1], line="s")
axe[0, 1].set_title("WindSpeed")

# Plotting Sunshine data:
qqplot(df["Sunshine"], ax=axe[0, 2], line="s")
axe[0, 2].set_title("Sunshine")

# Ploting RelativeAirHumidity:
qqplot(df["RelativeAirHumidity"], ax=axe[1, 0], line="s")
axe[1, 0].set_title("RelativeAirHumidity")

# Radiation:
qqplot(df["Radiation"], ax=axe[1, 1], line="s")
axe[1, 1].set_title("Radiation")

# AirTemperature:
qqplot(df["AirTemperature"], ax=axe[1, 2], line="s")
axe[1, 2].set_title("AirTemperature")

# AirPressure
qqplot(df["AirPressure"], ax=axe[2, 0], line="s")
axe[2, 0].set_title("AirPressure");

axe[2, 1].set_visible(False)
axe[2, 2].set_visible(False)


# Function that Calculates Shapiro-Wilk, Lilliefors and D'Agostino_K2 tests:

def normality_tests(df: any):
    tests_names = ["Shapiro-Wilk", "Lilliefors", "D'Agostino_K2"]
    extern_index = np.array(sorted(tests_names*2))
    intern_index = np.array(["statistic", "p-value"]*len(tests_names))

    mult_index = [
        extern_index,
        intern_index
    ]

    results = pd.DataFrame(index=mult_index, columns=df.columns)
   
    for c in df.columns:
        
        # First D'Agostino's K-squared test:
        k2, k2_p = normaltest(df[c])

        # Secondly we will use the Lilliefors test:
        lilliefors_result = lilliefors(df[c])
        ksstat, lilliefours_p = lilliefors_result

        #  We will check the shapiro-Wilk test:
        shapiro_result = shapiro(df[c])
        shapiro_statistic, shapiro_p = shapiro_result.statistic, shapiro_result.pvalue

        results[c] = [k2, k2_p, ksstat, lilliefours_p, shapiro_statistic, shapiro_p]

    return results


# Function that calculates kurtosis and skewness of a dataset:

def kurtosis_skewness(dataset: any):
    index = ["Kurtosis", "Skewness"]
    results = pd.DataFrame(index=index, columns=dataset.columns)
    for c in dataset.columns:
        kurt = kurtosis(dataset[c])
        skewness = skew(dataset[c])
        results[c] = [kurt, skewness]

    return results


# Normality test results:

normality_tests(df)

c:\Users\caios\AppData\Local\Programs\Python\Python311\Lib\site-packages\scipy\stats\_morestats.py:1816: UserWarning: p-value may not be accurate for N > 5000.
  warnings.warn("p-value may not be accurate for N > 5000.")


# Kurtosis and Skewness:

kurtosis_skewness(df)


# Correlation Matrix:

def heatmap_cor(df):
    cor = df.corr()
    mascara = np.zeros_like(cor)
    mascara[np.triu_indices_from(mascara)] = True
    sns.heatmap(cor, mask=mascara, cbar=True, annot=True, cmap="crest")
    

heatmap_cor(df)


# Separating variables (features and target)

X = df.drop(columns="SystemProduction")
y = df["SystemProduction"]


# Divinding dataset in train and test:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Class that put together many feature selection techniques:

class feature_selector:
    seed = 42
    def __init__(self, X, y) -> None:
        self.X_train = X
        self.y_train = y
    
    def  randomforestR_imp(self) -> None:
        model = RandomForestRegressor(random_state=feature_selector.seed)
        model.fit(self.X_train, self.y_train)
        series = pd.Series(index=model.feature_names_in_, data=model.feature_importances_).sort_values(ascending=False)

        # Plotting RandomForest Regression Importance:
        plt.title("RandomForest Importance")
        plt.xlabel("Importance")
        sns.barplot(x=series.values, y=series.index)
        

    def xgbR_imp(self) -> None:
        model = XGBRegressor(random_state=feature_selector.seed)
        model.fit(self.X_train, self.y_train)
        series = pd.Series(index=model.feature_names_in_, data=model.feature_importances_).sort_values(ascending=False)

        # Plotting XGboost Regression Feature Importance:
        plt.title("XGBoost Feature Importance")
        plt.xlabel("Importance")
        sns.barplot(y=series.index, x=series.values)

    # Univariate feature selection:
    def univariate(self, statistic, n="all") -> None:
        selector = SelectKBest(score_func=statistic, k=n)
        selector.fit(self.X_train, self.y_train)
        
        series = pd.Series(index=selector.feature_names_in_, data=selector.scores_).\
            sort_values(ascending=False)
        plt.title("F-Regression Filtering")
        plt.xlabel("F-score")
        sns.barplot(y=series.index, x=series.values)

        return selector
    
    # Wrapper method for feature selection:
    def refcv(self):
        
        models = {
            "Lasso":Lasso(random_state=feature_selector.seed),
            "Ridge":Ridge(random_state=feature_selector.seed),
            "RandomForestR":RandomForestRegressor(random_state=feature_selector.seed),
            "ExtraTreeR":ExtraTreesRegressor(random_state=feature_selector.seed),
            "XGB":XGBRegressor(random_state=feature_selector.seed)
        }

        splits=10
        cross = KFold(n_splits=splits, random_state=feature_selector.seed, shuffle=True)
        ind = [f"Columns {i}" for i in range(1, len(self.X_train.columns) + 1)]
        df = pd.DataFrame(index=ind)
        
        minimo = np.inf
        name = ""
        for key, model in models.items():
            rfecv = RFECV(estimator=model, step=1, cv=cross, min_features_to_select=1, scoring="neg_mean_squared_error")
            rfecv.fit(self.X_train, self.y_train)
            root_mean = np.sqrt(-rfecv.cv_results_["mean_test_score"])
            df[key] = root_mean
            best_value = root_mean[np.argmin(root_mean)]

            if minimo > best_value:
                minimo = best_value
                best_features = rfecv.support_
                name = key

        df_features = pd.DataFrame(columns=self.X_train.columns, data=best_features.reshape(1, -1), index=[name])
    
        return df,  df_features


# Feature selector object:

feature = feature_selector(X_train, y_train)


# Filtering feature selection using f_regression test:

selector_filtering = feature.univariate(statistic=f_regression)


# Plotting Best features accordingly to the Random Forest Algorithm:

feature.randomforestR_imp()


# Plotting the best features accordingly the XGBoost Algorithm:

feature.xgbR_imp()


# Mean test score for all of the Algorithms and for each number of columns:

df_results, df_features = feature.refcv()


# Recursive Feature Elimination results:

df_results


# Features selected using the best Algorithm (ExtraTreeRegression):

df_features


# MinMax Scaler Transformation:

min_max = MinMaxScaler()
X_train_min_max = min_max.fit_transform(X_train)
X_test_min_max = min_max.transform(X_test)


# Standard Scaler Transformation:

std = StandardScaler()
X_train_std = std.fit_transform(X_train)
X_test_std = std.transform(X_test)


# Robust Scaler Transformation:

rob = RobustScaler()
X_train_rb = rob.fit_transform(X_train)
X_test_rb = rob.fit_transform(X_test)


# Function used to evaluate the best algorithms:

def melhor_modelo(X_train, y_train):

    seed = 42
    cv = 5
    score = ['neg_root_mean_squared_error', 'r2']
    result_rmse = {}
    result_r2 = {}

    dicionario = { 
                "Lasso":Lasso(random_state=seed),
                "Ridge":Ridge(random_state=seed),
                "SVR":SVR(),
                "RandomForestR":RandomForestRegressor(random_state=seed),
                "ExtraTreeR":ExtraTreesRegressor(random_state=seed),
                "XGB":XGBRegressor(random_state=seed),
                "MLP":MLPRegressor(random_state=seed, max_iter=2000)
                 }


    for name, model in dicionario.items():
        k_fold = KFold(n_splits=cv, random_state=seed, shuffle=True)
        result = cross_validate(model, X_train, y_train, cv=k_fold, scoring=score)

        result_rmse[name] = -result['test_neg_root_mean_squared_error']
        result_r2[name] = result['test_r2']
        
        
    result_pd_rmse = pd.DataFrame(data=result_rmse)
    result_pd_r2 = pd.DataFrame(data=result_r2)
    
    return result_pd_rmse, result_pd_r2


# Best model:

resultado_rms, resultado_r2 = melhor_modelo(X_train_min_max, y_train)


# Root mean squared error results:

resultado_rms.describe()


# R2 results:

resultado_r2.describe()


# PLotting model's performance:

plt.figure(figsize=(15, 8))
plt.title("Models's Performance - MinMax Scaler")
sns.boxplot(resultado_rms);


# Best model:

resultado_rms, resultado_r2 = melhor_modelo(X_train_std, y_train)


# Root mean squared error results:

resultado_rms.describe()


# R2 results:

resultado_r2.describe()


# PLotting model's performance:

plt.figure(figsize=(15, 8))
plt.title("Models's Performance - Standard Scaler")
sns.boxplot(resultado_rms);


# Best model:

resultado_rms, resultado_r2 = melhor_modelo(X_train_rb, y_train)


# Root mean squared error results:

resultado_rms.describe()


# R2 results:

resultado_r2.describe()


# PLotting model's performance:

plt.figure(figsize=(15, 8))
plt.title("Models's Performance - Robust Scaler")
sns.boxplot(resultado_rms);


# Function for fine tuning an arbitrary model:

def tuning(X_train, y_train, modelo, params):
    
    cv = 5
    score = "neg_root_mean_squared_error"
    grid  = GridSearchCV(modelo, cv=cv, param_grid=params, 
                         scoring=score, 
                         n_jobs=-1,
                         return_train_score=True,
                         )

    grid.fit(X_train, y_train)

    best_index = grid.best_index_
    result = grid.cv_results_

    train_score = -result['mean_train_score'][best_index]
    left_out = -result['mean_test_score'][best_index]



    print(f"Train score: {train_score}")
    print(f"Left out data score: {left_out}")

    return grid.best_estimator_


def save_model(model):
    try:
        joblib.dump(model, "modelo/extra_tree_model.joblib")
    except:
        os.makedirs("modelo")
        path = "modelo/extra_tree_model.joblib"
        joblib.dump(model, path)


# Hypeparameters grid:

params = {"n_estimators":[100, 110, 120],
          "max_depth":[4, 5, 6, 7],
          "max_features":[0.5, 0.6, 0.7],
          "min_samples_split":[3, 5, 6, 7, 10]}


# Fine Tuning an ExtraTreeRegressor:

extra_tree = ExtraTreesRegressor(random_state=42)
best_estimator = tuning(X_train_rb, y_train, extra_tree, params)

Train score: 765.3644951871554
Left out data score: 835.5946923397969


# Saving the best model:

save_model(best_estimator)


# Training the best model using the whole training set:

best_model = clone(best_estimator)
best_model.fit(X_train_rb, y_train)

ExtraTreesRegressor(max_depth=7, max_features=0.7, min_samples_split=3,
                    n_estimators=110, random_state=42)

ExtraTreesRegressor(max_depth=7, max_features=0.7, min_samples_split=3,
                    n_estimators=110, random_state=42)


# Computing Metrics for the training set:

y_pred = best_model.predict(X_train_rb)
rms_train = np.sqrt(mean_squared_error(y_pred, y_train))
r2_train = r2_score(y_pred, y_train)

print("Train set:")
print(f"RMSE: {rms_train}")
print(f"R2: {r2_train}")

Train set:
RMSE: 777.204892038056
R2: 0.5682232687504819


# Computing Metrics for the test set:

y_pred = best_model.predict(X_test_rb)
rmse_test = np.sqrt(mean_squared_error(y_pred, y_test))
r2_test = r2_score(y_pred, y_test)

print("Test set:")
print(f"RMSE: {rmse_test}")
print(f"R2: {r2_test}")

Test set:
RMSE: 819.7307706495565
R2: 0.5347937839096278

	Date-Hour(NMT)	WindSpeed	AirPressure	Radiation	AirTemperature	RelativeAirHumidity
0	01.01.2017-00:00	0.6	1003.8	-7.4	0.1	97
1	01.01.2017-01:00	1.7	1003.5	-7.4	-0.2	98
2	01.01.2017-02:00	0.6	1003.4	-6.7	-1.2	99
3	01.01.2017-03:00	2.4	1003.3	-7.2	-1.3	99
4	01.01.2017-04:00	4.0	1003.1	-6.3	3.6	67

	WindSpeed	Sunshine	AirPressure	Radiation	AirTemperature	RelativeAirHumidity	SystemProduction
count	8760.000000	8760.000000	8760.000000	8760.000000	8760.000000	8760.000000	8760.000000
mean	2.639823	11.180479	1010.361781	97.538498	6.978892	76.719406	684.746071
std	1.628754	21.171295	12.793971	182.336029	7.604266	19.278996	1487.454665
min	0.000000	0.000000	965.900000	-9.300000	-12.400000	13.000000	0.000000
25%	1.400000	0.000000	1002.800000	-6.200000	0.500000	64.000000	0.000000
50%	2.300000	0.000000	1011.000000	-1.400000	6.400000	82.000000	0.000000
75%	3.600000	7.000000	1018.200000	115.599998	13.400000	93.000000	464.249950
max	10.900000	60.000000	1047.300000	899.700012	27.100000	100.000000	7701.000000

		WindSpeed	Sunshine	AirPressure	Radiation	AirTemperature	RelativeAirHumidity	SystemProduction
D'Agostino_K2	statistic	9.806524e+02	2095.571343	6.722747e+01	3444.346422	2.245390e+03	7.997811e+02	4487.420697
D'Agostino_K2	p-value	1.132518e-213	0.000000	2.521971e-15	0.000000	0.000000e+00	2.136654e-174	0.000000
Lilliefors	statistic	8.657087e-02	0.422402	4.527111e-02	0.278957	8.151557e-02	1.136078e-01	0.322633
Lilliefors	p-value	1.000000e-03	0.001000	1.000000e-03	0.001000	1.000000e-03	1.000000e-03	0.001000
Shapiro-Wilk	statistic	9.421549e-01	0.559635	9.922053e-01	0.640642	9.712175e-01	9.158114e-01	0.532115
Shapiro-Wilk	p-value	0.000000e+00	0.000000	1.640946e-21	0.000000	1.376139e-38	0.000000e+00	0.000000

	WindSpeed	Sunshine	AirPressure	Radiation	AirTemperature	RelativeAirHumidity	SystemProduction
Kurtosis	0.645685	0.794225	0.413192	3.634802	-1.020777	-0.506118	5.930889
Skewness	0.904461	1.593429	-0.126781	2.078086	0.078606	-0.727069	2.568234

	Lasso	Ridge	RandomForestR	ExtraTreeR	XGB
Columns 1	1467.860960	1477.986141	1133.246002	1207.972781	1045.061217
Columns 2	1301.266660	1312.583376	974.506010	1003.782575	973.252195
Columns 3	1155.614811	1158.127170	849.627134	934.740169	944.589000
Columns 4	1124.590002	1124.589720	819.966015	864.464221	848.063361
Columns 5	905.993187	905.993069	795.131463	785.949747	823.702505
Columns 6	903.620263	903.620225	772.033482	770.559293	788.551322

1) Importing Libraries and Data Loading¶

2) Data Cleaning¶

3) Exploratory Data Analysis¶

3.1) Exploring the characteristics¶

3.2) Distribution¶

3.3) Normality tests¶

3.4) Correlation¶

4) Preprocessing¶

4.1) Separating into training and testing sets¶

4.2) Feature Selection¶

4.2.1) Filtering selection¶

4.2.2) Random Forest Importance¶

4.2.3) XGBoost Importance¶

4.2.4) Recursive Feature Elimination (RFE)¶

4.3) Transformations¶

4.3.1) MinMax Scaler¶

4.3.2) Standard Scaler¶

4.3.3) Robust Scaler¶

4.4) Selecting the best Algorithm and Transformation¶

4.4.1) Algorithms using MinMax Scaler¶

4.4.2) Algorithms using Standard Scaler¶

4.4.3) Algorithms using Robust Scaler¶

5) Fine Tuning¶

6) Predictions¶

	Lasso	Ridge	SVR	RandomForestR	ExtraTreeR	XGB	MLP
count	5.000000	5.000000	5.000000	5.000000	5.000000	5.000000	5.000000
mean	903.485007	903.400786	1491.625424	777.359187	775.023285	796.631552	903.149462
std	15.562091	15.628135	76.724616	44.754904	37.517289	56.469978	15.249867
min	883.515021	883.085192	1368.718235	709.711976	709.776868	710.759720	884.950591
25%	897.526534	897.819708	1474.371484	767.563919	776.598713	767.368293	895.889170
50%	898.300154	898.192638	1515.347110	781.613578	793.488589	827.759964	897.119584
75%	915.613173	915.621307	1529.832640	796.014928	795.642869	834.921067	916.230928
max	922.470155	922.285084	1569.857651	831.891534	799.609385	842.348716	921.557038

	Lasso	Ridge	SVR	RandomForestR	ExtraTreeR	XGB	MLP
count	5.000000	5.000000	5.000000	5.000000	5.000000	5.000000	5.000000
mean	0.638294	0.638374	0.017998	0.732798	0.734620	0.719589	0.638508
std	0.026434	0.026218	0.015911	0.021213	0.012827	0.025073	0.027293
min	0.599661	0.600050	0.001504	0.704749	0.713481	0.687616	0.598359
25%	0.636953	0.637099	0.003063	0.716051	0.731379	0.707674	0.637671
50%	0.638390	0.638476	0.020332	0.741676	0.741629	0.712968	0.639340
75%	0.642333	0.642327	0.025882	0.748647	0.742695	0.740913	0.641850
max	0.674132	0.673919	0.039210	0.752867	0.743915	0.748775	0.675320

	Lasso	Ridge	SVR	RandomForestR	ExtraTreeR	XGB	MLP
count	5.000000	5.000000	5.000000	5.000000	5.000000	5.000000	5.000000
mean	903.358839	903.348724	1530.475777	777.358424	775.054472	796.703441	862.939395
std	15.613713	15.632424	77.459091	44.998844	37.425748	57.132829	22.959748
min	883.396953	883.376841	1407.559380	709.420058	709.974316	709.015132	830.902836
25%	897.503104	897.532799	1509.000499	766.761334	776.598713	768.916041	856.534749
50%	897.863026	897.762911	1557.105336	782.169400	793.488589	826.964465	863.601966
75%	915.705262	915.761676	1569.520962	796.460193	795.601359	834.373150	869.038325
max	922.325850	922.309391	1609.192706	831.981138	799.609385	844.248418	894.619098

	Lasso	Ridge	SVR	RandomForestR	ExtraTreeR	XGB	MLP
count	5.000000	5.000000	5.000000	5.000000	5.000000	5.000000	5.000000
mean	0.638397	0.638406	-0.033882	0.732799	0.734596	0.719575	0.670359
std	0.026397	0.026382	0.015623	0.021337	0.012812	0.024960	0.019834
min	0.599768	0.599786	-0.050976	0.704685	0.713481	0.688026	0.645921
25%	0.637066	0.637079	-0.047522	0.715733	0.731379	0.708236	0.658544
50%	0.638742	0.638822	-0.034406	0.741889	0.741485	0.711672	0.671233
75%	0.642261	0.642217	-0.020414	0.749172	0.742695	0.742183	0.677795
max	0.674149	0.674128	-0.016094	0.752515	0.743942	0.747761	0.698301

	Lasso	Ridge	SVR	RandomForestR	ExtraTreeR	XGB	MLP
count	5.000000	5.000000	5.000000	5.000000	5.000000	5.000000	5.000000
mean	903.355648	903.348361	1482.596356	777.415145	775.018174	796.668225	876.956954
std	15.623197	15.632835	78.002979	44.754342	37.423964	57.775051	14.926458
min	883.412700	883.384740	1357.623438	709.669560	709.974316	706.881966	863.292220
25%	897.497262	897.526871	1464.431309	767.355707	776.417221	770.807039	867.246465
50%	897.797461	897.752247	1507.997183	781.161161	793.488589	826.768020	870.690222
75%	915.742087	915.767967	1521.297606	797.640675	795.601359	834.138524	883.666721
max	922.328731	922.309982	1561.632242	831.248623	799.609385	844.745576	899.889140