Skip to content

optimizers #23

@16shery

Description

@16shery

import numpy as np
from keras.datasets import mnist
from sklearn.model_selection import train_test_split

Load MNIST dataset

(X, y), (X_test, y_test) = mnist.load_data()

Subset data to use only class 0 and class 1

indices = np.logical_or(y == 0, y == 1)
X = X[indices]
y = y[indices]

Reshape images to 1D vectors

X = X.reshape(X.shape[0], -1)

Standardize dataset

mean = np.mean(X)
std = np.std(X)
X_std = (X - mean) / std

Split data into training and validation sets

X_train, X_val, y_train, y_val = train_test_split(X_std, y, test_size=0.2, random_state=42)

Define the sigmoid function

def sigmoid(z):
return 1 / (1 + np.exp(-z))
learning_rate = 0.01
num_iterations = 1000

Use L1 regularization with gradient descent optimizer:

lambdas = [0.001, 0.01]

for l in lambdas:
# Initialize the parameters
w = np.zeros(X_train.shape[1])
b = 0

for i in range(num_iterations):
    # Forward pass
    z = np.dot(X_train, w) + b
    y_pred = sigmoid(z)

    # Compute the cost
    cost = -np.mean(y_train * np.log(y_pred) + (1 - y_train) * np.log(1 - y_pred)) + l * np.sum(np.abs(w))

    # Backward pass
    dz = y_pred - y_train
    dw = np.dot(X_train.T, dz) / X_train.shape[0] + l * np.sign(w)
    db = np.mean(dz)

    # Update the parameters
    w = w - learning_rate * dw
    b = b - learning_rate * db

# Evaluate the model on the validation data
z_val = np.dot(X_val, w) + b
y_val_pred = sigmoid(z_val)
y_val_pred[y_val_pred >= 0.5] = 1
y_val_pred[y_val_pred < 0.5] = 0
accuracy = np.mean(y_val_pred == y_val)
print("Lambda: {}, Validation accuracy: {}".format(l, accuracy))

Use mini-batch gradient descent optimizer:

batch_sizes = [128, 64]

Define the mini-batch generator function

def minibatch_generator(X, y, batch_size):
num_samples = X.shape[0]
indices = np.arange(num_samples)
np.random.shuffle(indices)
for start_idx in range(0, num_samples - batch_size + 1, batch_size):
excerpt = indices[start_idx:start_idx + batch_size]
yield X[excerpt], y[excerpt]

Train the logistic regression model using mini-batch gradient descent

for batch_size in batch_sizes:
print(f"Batch size: {batch_size}")
w = np.zeros(X_train.shape[1])
b = 0
for i in range(num_iterations):
# Mini-batch generator
batch_generator = minibatch_generator(X_train, y_train, batch_size)

    for batch_X, batch_y in batch_generator:
        # Forward pass
        z = np.dot(batch_X, w) + b
        y_pred = sigmoid(z)

        # Compute the cost
        cost = -np.mean(batch_y * np.log(y_pred) + (1 - batch_y) * np.log(1 - y_pred))

        # Backward pass
        dz = y_pred - batch_y
        dw = np.dot(batch_X.T, dz) / batch_size
        db = np.mean(dz)

        # Update the parameters
        w = w - learning_rate * dw
        b = b - learning_rate * db

# Evaluate the model on the validation data
z_val = np.dot(X_val, w) + b
y_val_pred = sigmoid(z_val)
y_val_pred[y_val_pred >= 0.5] = 1
y_val_pred[y_val_pred < 0.5] = 0
accuracy = np.mean(y_val_pred == y_val)
print("Validation accuracy:", accuracy)

#RMS Prob optimizer
eps = 1e-8
beta = 0.9
s_w = np.zeros(X_train.shape[1])
s_b = 0

for i in range(num_iterations):
# Forward pass
z = np.dot(X_train, w) + b
y_pred = sigmoid(z)

# Compute the cost
cost = -np.mean(y_train * np.log(y_pred) + (1 - y_train) * np.log(1 - y_pred))

# Backward pass
dz = y_pred - y_train
dw = np.dot(X_train.T, dz) / X_train.shape[0]
db = np.mean(dz)

# Update the RMSprop parameters
s_w = beta * s_w + (1 - beta) * np.square(dw)
s_b = beta * s_b + (1 - beta) * np.square(db)

# Update the parameters using RMSprop optimizer
w = w - learning_rate * dw / np.sqrt(s_w + eps)
b = b - learning_rate * db / np.sqrt(s_b + eps)

Evaluate the model on the validation data

z_val = np.dot(X_val, w) + b
y_val_pred = sigmoid(z_val)
y_val_pred[y_val_pred >= 0.5] = 1
y_val_pred[y_val_pred < 0.5] = 0
accuracy = np.mean(y_val_pred == y_val)
print("Validation accuracy:", accuracy)

Adam optimizer:

Initialize Adam optimizer parameters

eps = 1e-8
beta1 = 0.9
beta2 = 0.999
m_w = np.zeros(X_train.shape[1])
m_b = 0
v_w = np.zeros(X_train.shape[1])
v_b = 0

for i in range(num_iterations):
# Forward pass
z = np.dot(X_train, w) + b
y_pred = sigmoid(z)

# Compute the cost
cost = -np.mean(y_train * np.log(y_pred) + (1 - y_train) * np.log(1 - y_pred))

# Backward pass
dz = y_pred - y_train
dw = np.dot(X_train.T, dz) / X_train.shape[0]
db = np.mean(dz)

# Update the Adam optimizer parameters
m_w = beta1 * m_w + (1 - beta1) * dw
m_b = beta1 * m_b + (1 - beta1) * db
v_w = beta2 * v_w + (1 - beta2) * np.square(dw)
v_b = beta2 * v_b + (1 - beta2) * np.square(db)
m_w_hat = m_w / (1 - beta1**(i+1))
m_b_hat = m_b / (1 - beta1**(i+1))
v_w_hat = v_w / (1 - beta2**(i+1))
v_b_hat = v_b / (1 - beta2**(i+1))

# Update the parameters using Adam optimizer
w = w - learning_rate * m_w_hat / (np.sqrt(v_w_hat) + eps)
b = b - learning_rate * m_b_hat / (np.sqrt(v_b_hat) + eps)

Evaluate the model on the validation data

z_val = np.dot(X_val, w) + b
y_val_pred = sigmoid(z_val)
y_val_pred[y_val_pred >= 0.5] = 1
y_val_pred[y_val_pred < 0.5] = 0
accuracy = np.mean(y_val_pred == y_val)
print("Validation accuracy:", accuracy)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions