optimizers

import numpy as np
from keras.datasets import mnist
from sklearn.model_selection import train_test_split
# Load MNIST dataset
(X, y), (X_test, y_test) = mnist.load_data()                                                                               
# Subset data to use only class 0 and class 1
indices = np.logical_or(y == 0, y == 1)
X = X[indices]
y = y[indices]
# Reshape images to 1D vectors
X = X.reshape(X.shape[0], -1)
# Standardize dataset
mean = np.mean(X)
std = np.std(X)
X_std = (X - mean) / std
# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_std, y, test_size=0.2, random_state=42)
# Define the sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))
learning_rate = 0.01
num_iterations = 1000
# Use L1 regularization with gradient descent optimizer:

lambdas = [0.001, 0.01]

for l in lambdas:
    # Initialize the parameters
    w = np.zeros(X_train.shape[1])
    b = 0

    for i in range(num_iterations):
        # Forward pass
        z = np.dot(X_train, w) + b
        y_pred = sigmoid(z)

        # Compute the cost
        cost = -np.mean(y_train * np.log(y_pred) + (1 - y_train) * np.log(1 - y_pred)) + l * np.sum(np.abs(w))

        # Backward pass
        dz = y_pred - y_train
        dw = np.dot(X_train.T, dz) / X_train.shape[0] + l * np.sign(w)
        db = np.mean(dz)

        # Update the parameters
        w = w - learning_rate * dw
        b = b - learning_rate * db

    # Evaluate the model on the validation data
    z_val = np.dot(X_val, w) + b
    y_val_pred = sigmoid(z_val)
    y_val_pred[y_val_pred >= 0.5] = 1
    y_val_pred[y_val_pred < 0.5] = 0
    accuracy = np.mean(y_val_pred == y_val)
    print("Lambda: {}, Validation accuracy: {}".format(l, accuracy))
# Use mini-batch gradient descent optimizer:
batch_sizes = [128, 64] 

# Define the mini-batch generator function
def minibatch_generator(X, y, batch_size):
    num_samples = X.shape[0]
    indices = np.arange(num_samples)
    np.random.shuffle(indices)
    for start_idx in range(0, num_samples - batch_size + 1, batch_size):
        excerpt = indices[start_idx:start_idx + batch_size]
        yield X[excerpt], y[excerpt]

# Train the logistic regression model using mini-batch gradient descent 
for batch_size in batch_sizes:
    print(f"Batch size: {batch_size}")
    w = np.zeros(X_train.shape[1])
    b = 0
    for i in range(num_iterations):
        # Mini-batch generator
        batch_generator = minibatch_generator(X_train, y_train, batch_size)

        for batch_X, batch_y in batch_generator:
            # Forward pass
            z = np.dot(batch_X, w) + b
            y_pred = sigmoid(z)

            # Compute the cost
            cost = -np.mean(batch_y * np.log(y_pred) + (1 - batch_y) * np.log(1 - y_pred))

            # Backward pass
            dz = y_pred - batch_y
            dw = np.dot(batch_X.T, dz) / batch_size
            db = np.mean(dz)

            # Update the parameters
            w = w - learning_rate * dw
            b = b - learning_rate * db

    # Evaluate the model on the validation data
    z_val = np.dot(X_val, w) + b
    y_val_pred = sigmoid(z_val)
    y_val_pred[y_val_pred >= 0.5] = 1
    y_val_pred[y_val_pred < 0.5] = 0
    accuracy = np.mean(y_val_pred == y_val)
    print("Validation accuracy:", accuracy)
#RMS Prob optimizer
eps = 1e-8
beta = 0.9
s_w = np.zeros(X_train.shape[1])
s_b = 0

for i in range(num_iterations):
    # Forward pass
    z = np.dot(X_train, w) + b
    y_pred = sigmoid(z)

    # Compute the cost
    cost = -np.mean(y_train * np.log(y_pred) + (1 - y_train) * np.log(1 - y_pred))

    # Backward pass
    dz = y_pred - y_train
    dw = np.dot(X_train.T, dz) / X_train.shape[0]
    db = np.mean(dz)

    # Update the RMSprop parameters
    s_w = beta * s_w + (1 - beta) * np.square(dw)
    s_b = beta * s_b + (1 - beta) * np.square(db)

    # Update the parameters using RMSprop optimizer
    w = w - learning_rate * dw / np.sqrt(s_w + eps)
    b = b - learning_rate * db / np.sqrt(s_b + eps)

# Evaluate the model on the validation data
z_val = np.dot(X_val, w) + b
y_val_pred = sigmoid(z_val)
y_val_pred[y_val_pred >= 0.5] = 1
y_val_pred[y_val_pred < 0.5] = 0
accuracy = np.mean(y_val_pred == y_val)
print("Validation accuracy:", accuracy)
# Adam optimizer:
# Initialize Adam optimizer parameters
eps = 1e-8
beta1 = 0.9
beta2 = 0.999
m_w = np.zeros(X_train.shape[1])
m_b = 0
v_w = np.zeros(X_train.shape[1])
v_b = 0

for i in range(num_iterations):
    # Forward pass
    z = np.dot(X_train, w) + b
    y_pred = sigmoid(z)

    # Compute the cost
    cost = -np.mean(y_train * np.log(y_pred) + (1 - y_train) * np.log(1 - y_pred))

    # Backward pass
    dz = y_pred - y_train
    dw = np.dot(X_train.T, dz) / X_train.shape[0]
    db = np.mean(dz)

    # Update the Adam optimizer parameters
    m_w = beta1 * m_w + (1 - beta1) * dw
    m_b = beta1 * m_b + (1 - beta1) * db
    v_w = beta2 * v_w + (1 - beta2) * np.square(dw)
    v_b = beta2 * v_b + (1 - beta2) * np.square(db)
    m_w_hat = m_w / (1 - beta1**(i+1))
    m_b_hat = m_b / (1 - beta1**(i+1))
    v_w_hat = v_w / (1 - beta2**(i+1))
    v_b_hat = v_b / (1 - beta2**(i+1))

    # Update the parameters using Adam optimizer
    w = w - learning_rate * m_w_hat / (np.sqrt(v_w_hat) + eps)
    b = b - learning_rate * m_b_hat / (np.sqrt(v_b_hat) + eps)

# Evaluate the model on the validation data
z_val = np.dot(X_val, w) + b
y_val_pred = sigmoid(z_val)
y_val_pred[y_val_pred >= 0.5] = 1
y_val_pred[y_val_pred < 0.5] = 0
accuracy = np.mean(y_val_pred == y_val)
print("Validation accuracy:", accuracy)


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

optimizers #23

Load MNIST dataset

Subset data to use only class 0 and class 1

Reshape images to 1D vectors

Standardize dataset

Split data into training and validation sets

Define the sigmoid function

Use L1 regularization with gradient descent optimizer:

Use mini-batch gradient descent optimizer:

Define the mini-batch generator function

Train the logistic regression model using mini-batch gradient descent

Evaluate the model on the validation data

Adam optimizer:

Initialize Adam optimizer parameters

Evaluate the model on the validation data

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

optimizers #23

Description

Load MNIST dataset

Subset data to use only class 0 and class 1

Reshape images to 1D vectors

Standardize dataset

Split data into training and validation sets

Define the sigmoid function

Use L1 regularization with gradient descent optimizer:

Use mini-batch gradient descent optimizer:

Define the mini-batch generator function

Train the logistic regression model using mini-batch gradient descent

Evaluate the model on the validation data

Adam optimizer:

Initialize Adam optimizer parameters

Evaluate the model on the validation data

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions