In [1]: import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.l...
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
In [2]:
df = pd.read_excel("Folds5x2_pp.xlsx")
In [3]:
df
Out[3]:
In [4]:
np.mean(df)
Out[4]:
In [5]:
df.median()
Out[5]:
In [6]:
df.std()
Out[6]:
In [7]:
df.corr()
Out[7]:
In [8]:
sns.heatmap(df.corr(), annot = True)
plt.show()
In [9]:
ambient_temperature = df['AT']
exhaust_vacuum = df['V']
ambient_pressure = df['AP']
relative_humidity = df['RH']
plant_energy_output = df['PE']
In [10]:
fig,ccpp = plt.subplots(2,3)
In [11]:
ccpp[0,0].boxplot(ambient_temperature)
ccpp[0,1].boxplot(exhaust_vacuum)
ccpp[0,2].boxplot(ambient_pressure)
ccpp[1,0].boxplot(relative_humidity)
ccpp[1,1].boxplot(plant_energy_output)
Out[11]:
In [12]:
plt.figure(figsize=(20,8))
plt.show()
In [13]:
#ambient_temperature
In [14]:
plt.hist(ambient_temperature,bins=10)
plt.show()
In [15]:
#ambient_pressure
In [16]:
plt.hist(ambient_pressure,bins=20)
plt.show()
In [17]:
plt.hist(exhaust_vacuum,bins=10)
plt.show()
In [18]:
plt.hist(relative_humidity,bins=10)
plt.show()
In [19]:
plt.hist(plant_energy_output,bins=20)
plt.show()
In [20]:
#80% data as Train dataset and 20% as Test dataset
In [21]:
xtrain = df.iloc[:7655,:4]
ytrain = df.iloc[:7655,4:]
#xtrain
In [22]:
xtest = df.iloc[7655:,:4]
ytest = df.iloc[7655:,4:]
#ytest
In [23]:
lin_model = LinearRegression()
In [24]:
lin_model.fit(xtrain,ytrain)
Out[24]:
In [25]:
lin_model.predict(xtest)
Out[25]:
In [26]:
lin_model.score(xtest,ytest)
Out[26]:
In [27]:
sns.lmplot(x="AT", y="PE", data=df)
plt.title('Ambident Temerature and Plant Energy Relation')
plt.show()
In [28]:
sns.lmplot(x="AP", y="PE", data=df)
plt.title('Ambident Pressure and Plant Energy Relation')
plt.show()
In [29]:
sns.lmplot(x="V", y="PE", data=df)
plt.title('Exhaust Volume and Plant Energy Relation')
plt.show()
In [30]:
sns.lmplot(x="RH", y="PE", data=df)
plt.title('Relative Humidity and Plant Energy Relation')
plt.show()
In [31]:
quad_feature = PolynomialFeatures(degree=2)
xtrain_quad = quad_feature.fit_transform(xtrain)
xtest_quad = quad_feature.transform(xtest)
quad_model = LinearRegression()
quad_model.fit(xtrain_quad,ytrain)
quad_model.predict(xtest_quad)
quad_model.score(xtest_quad,ytest)
Out[31]:
In [32]:
quad_feature = PolynomialFeatures(degree=3)
xtrain_quad = quad_feature.fit_transform(xtrain)
xtest_quad = quad_feature.transform(xtest)
quad_model = LinearRegression()
quad_model.fit(xtrain_quad,ytrain)
quad_model.predict(xtest_quad)
quad_model.score(xtest_quad,ytest)
Out[32]:
In [33]:
quad_feature = PolynomialFeatures(degree=4)
xtrain_quad = quad_feature.fit_transform(xtrain)
xtest_quad = quad_feature.transform(xtest)
quad_model = LinearRegression()
quad_model.fit(xtrain_quad,ytrain)
quad_model.predict(xtest_quad)
quad_model.score(xtest_quad,ytest)
Out[33]:
In [34]:
quad_feature = PolynomialFeatures(degree=5)
xtrain_quad = quad_feature.fit_transform(xtrain)
xtest_quad = quad_feature.transform(xtest)
quad_model = LinearRegression()
quad_model.fit(xtrain_quad,ytrain)
quad_model.predict(xtest_quad)
quad_model.score(xtest_quad,ytest)
Out[34]:
In [35]:
quad_feature = PolynomialFeatures(degree=6)
xtrain_quad = quad_feature.fit_transform(xtrain)
xtest_quad = quad_feature.transform(xtest)
quad_model = LinearRegression()
quad_model.fit(xtrain_quad,ytrain)
quad_model.predict(xtest_quad)
quad_model.score(xtest_quad,ytest)
Out[35]:
In [36]:
quad_feature = PolynomialFeatures(degree=7)
xtrain_quad = quad_feature.fit_transform(xtrain)
xtest_quad = quad_feature.transform(xtest)
quad_model = LinearRegression()
quad_model.fit(xtrain_quad,ytrain)
quad_model.predict(xtest_quad)
quad_model.score(xtest_quad,ytest)
Out[36]:
In [37]:
quad_feature = PolynomialFeatures(degree=8)
xtrain_quad = quad_feature.fit_transform(xtrain)
xtest_quad = quad_feature.transform(xtest)
quad_model = LinearRegression()
quad_model.fit(xtrain_quad,ytrain)
quad_model.predict(xtest_quad)
quad_model.score(xtest_quad,ytest)
Out[37]:
In [38]:
quad_feature = PolynomialFeatures(degree=9)
xtrain_quad = quad_feature.fit_transform(xtrain)
xtest_quad = quad_feature.transform(xtest)
quad_model = LinearRegression()
quad_model.fit(xtrain_quad,ytrain)
quad_model.predict(xtest_quad)
quad_model.score(xtest_quad,ytest)
Out[38]:
In [39]:
quad_feature = PolynomialFeatures(degree=10)
xtrain_quad = quad_feature.fit_transform(xtrain)
xtest_quad = quad_feature.transform(xtest)
quad_model = LinearRegression()
quad_model.fit(xtrain_quad,ytrain)
quad_model.predict(xtest_quad)
quad_model.score(xtest_quad,ytest)
Out[39]:
In [40]:
plt.scatter(xtrain['AT'],ytrain)
plt.show()
In [ ]:
Get dataset from "https://github.com/Jakesh-Bohaju/Python-Data-Science/blob/master/Folds5x2_pp.xlsx"
COMMENTS