The Algorithms logo
The Algorithms
AboutDonate

Logistic Regression

R
H
W
A
R
E
T
C
and 2 more contributors
#!/usr/bin/python

# Logistic Regression from scratch

# In[62]:

# In[63]:

# importing all the required libraries

"""
Implementing logistic regression for classification problem
Helpful resources:
Coursera ML course
https://medium.com/@martinpella/logistic-regression-from-scratch-in-python-124c5636b8ac
"""
import numpy as np
from matplotlib import pyplot as plt
from sklearn import datasets

# get_ipython().run_line_magic('matplotlib', 'inline')


# In[67]:

# sigmoid function or logistic function is used as a hypothesis function in
# classification problems


def sigmoid_function(z):
    return 1 / (1 + np.exp(-z))


def cost_function(h, y):
    return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()


def log_likelihood(X, Y, weights):
    scores = np.dot(X, weights)
    return np.sum(Y * scores - np.log(1 + np.exp(scores)))


# here alpha is the learning rate, X is the feature matrix,y is the target matrix
def logistic_reg(alpha, X, y, max_iterations=70000):
    theta = np.zeros(X.shape[1])

    for iterations in range(max_iterations):
        z = np.dot(X, theta)
        h = sigmoid_function(z)
        gradient = np.dot(X.T, h - y) / y.size
        theta = theta - alpha * gradient  # updating the weights
        z = np.dot(X, theta)
        h = sigmoid_function(z)
        J = cost_function(h, y)
        if iterations % 100 == 0:
            print(f"loss: {J} \t")  # printing the loss after every 100 iterations
    return theta


# In[68]:

if __name__ == "__main__":
    iris = datasets.load_iris()
    X = iris.data[:, :2]
    y = (iris.target != 0) * 1

    alpha = 0.1
    theta = logistic_reg(alpha, X, y, max_iterations=70000)
    print("theta: ", theta)  # printing the theta i.e our weights vector

    def predict_prob(X):
        return sigmoid_function(
            np.dot(X, theta)
        )  # predicting the value of probability from the logistic regression algorithm

    plt.figure(figsize=(10, 6))
    plt.scatter(X[y == 0][:, 0], X[y == 0][:, 1], color="b", label="0")
    plt.scatter(X[y == 1][:, 0], X[y == 1][:, 1], color="r", label="1")
    (x1_min, x1_max) = (X[:, 0].min(), X[:, 0].max())
    (x2_min, x2_max) = (X[:, 1].min(), X[:, 1].max())
    (xx1, xx2) = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max))
    grid = np.c_[xx1.ravel(), xx2.ravel()]
    probs = predict_prob(grid).reshape(xx1.shape)
    plt.contour(xx1, xx2, probs, [0.5], linewidths=1, colors="black")

    plt.legend()
    plt.show()
About this Algorithm
import numpy as np
import pandas as pd
import sklearn

Importing the MNIST Dataset to perform Multi-Class Classification.

MNIST is a large database of handwritten digits that is commonly used for training various image processing systems.

More Information in Wikipedia

from sklearn.datasets import fetch_openml
X, y = fetch_openml('mnist_784', return_X_y=True)

Shape of the dataset

X.shape
(70000, 784)

Visualizing the first 5 images in the dataset

We are reshaping the each array as a 28X28 Image so that we can plot it

import matplotlib.pyplot as plt
for i in range(5):
    plt.imshow(X[i].reshape((28,28)))
    plt.show()

Importing the Logistic Regression Model from Scikit-Learn and Data Processing

  1. We are decreasing the size of the dataset for training.
  2. Then, we scale the data.
  3. Finally, we split the dataset into training and test sets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

# STEP 1
X, y = X[:10_000], y[:10_000]

# STEP 2
X_scaled = preprocessing.scale(X)

# STEP 3
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, random_state = 42)

Then, we finally train the Logistic Regression Model.

model = LogisticRegression(random_state=0, max_iter=2000)
model.fit(X_train, y_train)
print("Model Training Completed!")
Model Training Completed!

We now measuring the accuracy score of the model that we have trained.

from sklearn.metrics import accuracy_score
preds = model.predict(X_test)
score = accuracy_score(y_test, preds) 
print(f"Model's accuracy score is : {round(score*100, 2)} %")
Model's accuracy score is : 89.76 %

Demonstrating the model's predictions on one of the image from our test-set.

pred = model.predict(X_test[45].reshape(-1, 784))
print(f"Model's Prediction : {pred}")
print("*"*30)

plt.imshow(X_test[45].reshape(28, 28))
plt.show()
Model's Prediction : ['6']
******************************