World's Best AI Learning Platform with profoundly Demanding Certification Programs
Designed by IITians, only for AI Learners.
Designed by IITians, only for AI Learners.
New to InsideAIML? Create an account
Employer? Create an account
Download our e-book of Introduction To Python
4.5 (1,292 Ratings)
559 Learners
Shashank Shanu
a year ago
import pandas as pd
import numpy as np
data = pd.read_csv("Salary_Dataa.csv")
data
data.shape
(30, 2)
data.info()
RangeIndex: 30 entries, 0 to 29
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 YearsExperience 30 non-none float64
1 Salary 30 non-none float64
dtypes: float64(2)
memory usage: 608.0 bytes
data.isnone().count()
YearsExperience 30
Salary 30
dtype: int64
X = data.iloc[:,:-1].values
Y = data.iloc[:,1].values
X
array([[ 1.1],
[ 1.3],
[ 1.5],
[ 2. ],
[ 2.2],
[ 2.9],
[ 3. ],
[ 3.2],
[ 3.2],
[ 3.7],
[ 3.9],
[ 4. ],
[ 4. ],
[ 4.1],
[ 4.5],
[ 4.9],
[ 5.1],
[ 5.3],
[ 5.9],
[ 6. ],
[ 6.8],
[ 7.1],
[ 7.9],
[ 8.2],
[ 8.7],
[ 9. ],
[ 9.5],
[ 9.6],
[10.3],
[10.5]])
Y
array([ 39343., 46205., 37731., 43525., 39891., 56642., 60150.,
54445., 64445., 57189., 63218., 55794., 56957., 57081.,
61111., 67938., 66029., 83088., 81363., 93940., 91738.,
98273., 101302., 113812., 109431., 105582., 116969., 112635.,
122391., 121872.])
import matplotlib.pyplot as plt
plt.scatter(X,Y,color = "blue")
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.33,random_state = 0)
x_train
array([[ 2.9],
[ 5.1],
[ 3.2],
[ 4.5],
[ 8.2],
[ 6.8],
[ 1.3],
[10.5],
[ 3. ],
[ 2.2],
[ 5.9],
[ 6. ],
[ 3.7],
[ 3.2],
[ 9. ],
[ 2. ],
[ 1.1],
[ 7.1],
[ 4.9],
[ 4. ]])
y_train
array([ 56642., 66029., 64445., 61111., 113812., 91738., 46205.,
121872., 60150., 39891., 81363., 93940., 57189., 54445.,
105582., 43525., 39343., 98273., 67938., 56957.])
y_test
array([ 37731., 122391., 57081., 63218., 116969., 109431., 112635.,
55794., 83088., 101302.])
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(x_train,y_train)
LinearRegression(copy_X=true, fit_intercept=true, n_jobs=none, normalize=false)
y_pred = regressor.predict(x_test)
y_pred
array([ 40835.10590871, 123079.39940819, 65134.55626083, 63265.36777221,
115602.64545369, 108125.8914992 , 116537.23969801, 64199.96201652,
76349.68719258, 100649.1375447 ])
y_test
array([ 37731., 122391., 57081., 63218., 116969., 109431., 112635.,
55794., 83088., 101302.])
residue = y_pred - y_test # residue or error between actual and predicted salary
residue
array([ 3104.10590871, 688.39940819, 8053.55626083, 47.36777221,
-1366.35454631, -1305.1085008 , 3902.23969801, 8405.96201652,
-6738.31280742, -652.8624553 ])
plt.scatter(x_train,y_train,color="red")
plt.plot(x_train,regressor.predict(x_train),color="yellow")
plt.title("Salary VS Experience(Training Set)")
plt.xlabel("Years of Experience")
plt.ylabel("Salary")
plt.show()
plt.scatter(x_test,y_test,color="Red")
plt.plot(x_train,regressor.predict(x_train),color="yellow")
plt.title("Salary VS Experience(Training Set)")
plt.xlabel("Years of Experience")
plt.ylabel("Salary")
plt.show()
print(regressor.coef_)
print(regressor.intercept_)
[9345.94244312]
26816.192244031176
y_test.shape
(10,)
y_pred.shape
(10,)
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(y_test,y_pred))
r2 = r2_score(y_test,y_pred) #built-in function r2_score() indicates R-squared value
print("RMSE =", rmse)
print("R2 Score=",r2)
RMSE = 4585.415720467589
R2 Score= 0.9749154407708353