Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ridge Regression Algorithm Added to Regression files and Also updated readme #102

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions 3_Regression/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

[6. Random Forest Regression](Random_Forest_Regression)

[7. Ridge Regression](Ridge_Regression)

## Comparing Regression Models and their performance

### R Squared Intution for Simple Linear Regression
Expand Down
8 changes: 8 additions & 0 deletions 3_Regression/Ridge_Regression/python/.idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions 3_Regression/Ridge_Regression/python/.idea/.name

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions 3_Regression/Ridge_Regression/python/.idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions 3_Regression/Ridge_Regression/python/.idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions 3_Regression/Ridge_Regression/python/.idea/python.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 31 additions & 0 deletions 3_Regression/Ridge_Regression/python/Salary_Data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
YearsExperience,Salary
1.1,39343
1.3,46205
1.5,37731
2,43525
2.2,39891
2.9,56642
3,60150
3.2,54445
3.2,64445
3.7,57189
3.9,63218
4,55794
4,56957
4.1,57081
4.5,61111
4.9,67938
5.1,66029
5.3,83088
5.9,81363
6,93940
6.8,91738
7.1,98273
7.9,101302
8.2,113812
8.7,109431
9,105582
9.5,116969
9.6,112635
10.3,122391
10.5,121872
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Importing libraries

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt


# Ridge Regression

class RidgeRegression():

def __init__(self, learning_rate, iterations, l2_penality):
self.learning_rate = learning_rate
self.iterations = iterations
self.l2_penality = l2_penality

# Function for model training
def fit(self, X, Y):
# no_of_training_examples, no_of_features
self.m, self.n = X.shape

# weight initialization
self.W = np.zeros(self.n)

self.b = 0
self.X = X
self.Y = Y

# gradient descent learning

for i in range(self.iterations):
self.update_weights()
return self

# Helper function to update weights in gradient descent

def update_weights(self):
Y_pred = self.predict(self.X)

# calculate gradients
dW = (- (2 * (self.X.T).dot(self.Y - Y_pred)) +
(2 * self.l2_penality * self.W)) / self.m
db = - 2 * np.sum(self.Y - Y_pred) / self.m

# update weights
self.W = self.W - self.learning_rate * dW
self.b = self.b - self.learning_rate * db
return self

# Hypothetical function h( x )
def predict(self, X):
return X.dot(self.W) + self.b

def train(X,Y):
# Splitting dataset into train and test set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y,
test_size=1 / 3, random_state=0)
# Model training
model = RidgeRegression(iterations=1000,
learning_rate=0.01, l2_penality=1)
model.fit(X_train, Y_train)
return model, X_test, Y_test


def visualize(X, Y, Y_pred, col):
# Splitting dataset into train and test set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y,
test_size=1 / 3, random_state=0)
fig, axs = plt.subplots(2)
axs[0].scatter(X_train, Y_train, color='blue')
axs[0].plot(X_train, Y_train, color='orange')
axs[0].set_title('training')

axs[1].scatter(X_test, Y_pred, color='blue')
axs[1].plot(X_test, Y_pred, color='orange')
axs[1].set_title('validation')

for ax in axs.flat:
ax.set(xlabel=col[0], ylabel=col[1])

fig.tight_layout()
#plt.savefig('output.png')
plt.show()

# Driver code

def main():
# Importing dataset
df = pd.read_csv("../Salary_Data.csv")
X = df.iloc[:, :-1].values
Y = df.iloc[:, 1].values
col = df.columns
#training the regression model
model, X_test, Y_test = train(X, Y)

# Prediction on test set
Y_pred = model.predict(X_test)
print("Predicted values ", np.round(Y_pred[:3], 2))
print("Real values ", Y_test[:3])
print("Trained W ", round(model.W[0], 2))
print("Trained b ", round(model.b, 2))

# Visualization on test set
visualize(X, Y, Y_pred, col)


if __name__ == "__main__":
main()
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
53 changes: 53 additions & 0 deletions 3_Regression/Ridge_Regression/python/ridge_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
""" Ridge Regression
"""

# Importing the libraries
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge


def main():
# Importing the dataset
dataset = pd.read_csv('Salary_Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 1].values

# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1 / 3, random_state=0)

# Feature Scaling
"""from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
sc_y = StandardScaler()
y_train = sc_y.fit_transform(y_train)"""

# Fitting Simple Linear Regression to the Training set
regressor = Ridge()
regressor.fit(X_train, y_train)

# Predicting the Test set results
y_pred = regressor.predict(X_test)

# Visualising the Training set results
plt.scatter(X_train, y_train, color='red')
plt.plot(X_train, regressor.predict(X_train), color='blue')
plt.title('Salary vs Experience (Training set)')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.show()

# Visualising the Test set results
plt.scatter(X_test, y_test, color='red')
plt.plot(X_train, regressor.predict(X_train), color='blue')
plt.title('Salary vs Experience (Test set)')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.show()


if __name__ == '__main__':
main()
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
* [Support Vector Regression](3_Regression/Support_Vector_Regression)
* [Decision Tree Regression](3_Regression/Decision_Tree_Regression)
* [Random Forest Regression](3_Regression/Random_Forest_Regression)
* [Ridge Regression](3_Regression/Ridge_Regression)

[**3. Classification**](4_Classification/README.md)
* [Logistic Regression](4_Classification/Logistic_Regression)
Expand Down Expand Up @@ -109,4 +110,4 @@ Check the official MIT License [here](LICENSE).
- **[@pragyakapoor](https://github.com/pragyakapoor)**

<!-- DO NOT REMOVE - contributor_list:end -->
<!-- prettier-ignore-end -->
<!-- prettier-ignore-end -->