-
Notifications
You must be signed in to change notification settings - Fork 14
Description
import numpy as np
from keras.datasets import mnist
from sklearn.model_selection import train_test_split
Load MNIST dataset
(X, y), (X_test, y_test) = mnist.load_data()
Subset data to use only class 0 and class 1
indices = np.logical_or(y == 0, y == 1)
X = X[indices]
y = y[indices]
Reshape images to 1D vectors
X = X.reshape(X.shape[0], -1)
Standardize dataset
mean = np.mean(X)
std = np.std(X)
X_std = (X - mean) / std
Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_std, y, test_size=0.2, random_state=42)
Define the sigmoid function
def sigmoid(z):
return 1 / (1 + np.exp(-z))
learning_rate = 0.01
num_iterations = 1000
Use L1 regularization with gradient descent optimizer:
lambdas = [0.001, 0.01]
for l in lambdas:
# Initialize the parameters
w = np.zeros(X_train.shape[1])
b = 0
for i in range(num_iterations):
# Forward pass
z = np.dot(X_train, w) + b
y_pred = sigmoid(z)
# Compute the cost
cost = -np.mean(y_train * np.log(y_pred) + (1 - y_train) * np.log(1 - y_pred)) + l * np.sum(np.abs(w))
# Backward pass
dz = y_pred - y_train
dw = np.dot(X_train.T, dz) / X_train.shape[0] + l * np.sign(w)
db = np.mean(dz)
# Update the parameters
w = w - learning_rate * dw
b = b - learning_rate * db
# Evaluate the model on the validation data
z_val = np.dot(X_val, w) + b
y_val_pred = sigmoid(z_val)
y_val_pred[y_val_pred >= 0.5] = 1
y_val_pred[y_val_pred < 0.5] = 0
accuracy = np.mean(y_val_pred == y_val)
print("Lambda: {}, Validation accuracy: {}".format(l, accuracy))
Use mini-batch gradient descent optimizer:
batch_sizes = [128, 64]
Define the mini-batch generator function
def minibatch_generator(X, y, batch_size):
num_samples = X.shape[0]
indices = np.arange(num_samples)
np.random.shuffle(indices)
for start_idx in range(0, num_samples - batch_size + 1, batch_size):
excerpt = indices[start_idx:start_idx + batch_size]
yield X[excerpt], y[excerpt]
Train the logistic regression model using mini-batch gradient descent
for batch_size in batch_sizes:
print(f"Batch size: {batch_size}")
w = np.zeros(X_train.shape[1])
b = 0
for i in range(num_iterations):
# Mini-batch generator
batch_generator = minibatch_generator(X_train, y_train, batch_size)
for batch_X, batch_y in batch_generator:
# Forward pass
z = np.dot(batch_X, w) + b
y_pred = sigmoid(z)
# Compute the cost
cost = -np.mean(batch_y * np.log(y_pred) + (1 - batch_y) * np.log(1 - y_pred))
# Backward pass
dz = y_pred - batch_y
dw = np.dot(batch_X.T, dz) / batch_size
db = np.mean(dz)
# Update the parameters
w = w - learning_rate * dw
b = b - learning_rate * db
# Evaluate the model on the validation data
z_val = np.dot(X_val, w) + b
y_val_pred = sigmoid(z_val)
y_val_pred[y_val_pred >= 0.5] = 1
y_val_pred[y_val_pred < 0.5] = 0
accuracy = np.mean(y_val_pred == y_val)
print("Validation accuracy:", accuracy)
#RMS Prob optimizer
eps = 1e-8
beta = 0.9
s_w = np.zeros(X_train.shape[1])
s_b = 0
for i in range(num_iterations):
# Forward pass
z = np.dot(X_train, w) + b
y_pred = sigmoid(z)
# Compute the cost
cost = -np.mean(y_train * np.log(y_pred) + (1 - y_train) * np.log(1 - y_pred))
# Backward pass
dz = y_pred - y_train
dw = np.dot(X_train.T, dz) / X_train.shape[0]
db = np.mean(dz)
# Update the RMSprop parameters
s_w = beta * s_w + (1 - beta) * np.square(dw)
s_b = beta * s_b + (1 - beta) * np.square(db)
# Update the parameters using RMSprop optimizer
w = w - learning_rate * dw / np.sqrt(s_w + eps)
b = b - learning_rate * db / np.sqrt(s_b + eps)
Evaluate the model on the validation data
z_val = np.dot(X_val, w) + b
y_val_pred = sigmoid(z_val)
y_val_pred[y_val_pred >= 0.5] = 1
y_val_pred[y_val_pred < 0.5] = 0
accuracy = np.mean(y_val_pred == y_val)
print("Validation accuracy:", accuracy)
Adam optimizer:
Initialize Adam optimizer parameters
eps = 1e-8
beta1 = 0.9
beta2 = 0.999
m_w = np.zeros(X_train.shape[1])
m_b = 0
v_w = np.zeros(X_train.shape[1])
v_b = 0
for i in range(num_iterations):
# Forward pass
z = np.dot(X_train, w) + b
y_pred = sigmoid(z)
# Compute the cost
cost = -np.mean(y_train * np.log(y_pred) + (1 - y_train) * np.log(1 - y_pred))
# Backward pass
dz = y_pred - y_train
dw = np.dot(X_train.T, dz) / X_train.shape[0]
db = np.mean(dz)
# Update the Adam optimizer parameters
m_w = beta1 * m_w + (1 - beta1) * dw
m_b = beta1 * m_b + (1 - beta1) * db
v_w = beta2 * v_w + (1 - beta2) * np.square(dw)
v_b = beta2 * v_b + (1 - beta2) * np.square(db)
m_w_hat = m_w / (1 - beta1**(i+1))
m_b_hat = m_b / (1 - beta1**(i+1))
v_w_hat = v_w / (1 - beta2**(i+1))
v_b_hat = v_b / (1 - beta2**(i+1))
# Update the parameters using Adam optimizer
w = w - learning_rate * m_w_hat / (np.sqrt(v_w_hat) + eps)
b = b - learning_rate * m_b_hat / (np.sqrt(v_b_hat) + eps)
Evaluate the model on the validation data
z_val = np.dot(X_val, w) + b
y_val_pred = sigmoid(z_val)
y_val_pred[y_val_pred >= 0.5] = 1
y_val_pred[y_val_pred < 0.5] = 0
accuracy = np.mean(y_val_pred == y_val)
print("Validation accuracy:", accuracy)