From 306a9dbaa4391f0b86855a44ef4e008dbca7725b Mon Sep 17 00:00:00 2001 From: Yuqi Wu Date: Tue, 1 Jul 2025 11:09:43 -0400 Subject: [PATCH 1/2] Add Modified_code folder with updated BIOT scripts --- src/BIOT/Modified_code/BIOT_CPU_modified.py | 267 ++++++++++++++++++ src/BIOT/Modified_code/BIOT_GPU_modified.py | 296 ++++++++++++++++++++ src/BIOT/Modified_code/TorchL1.py | 252 +++++++++++++++++ 3 files changed, 815 insertions(+) create mode 100644 src/BIOT/Modified_code/BIOT_CPU_modified.py create mode 100644 src/BIOT/Modified_code/BIOT_GPU_modified.py create mode 100644 src/BIOT/Modified_code/TorchL1.py diff --git a/src/BIOT/Modified_code/BIOT_CPU_modified.py b/src/BIOT/Modified_code/BIOT_CPU_modified.py new file mode 100644 index 0000000..0875788 --- /dev/null +++ b/src/BIOT/Modified_code/BIOT_CPU_modified.py @@ -0,0 +1,267 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Functions and classes for the BIOT module + +""" +import pandas as pd +import numpy as np +import argparse +import time +import math + +from scipy.stats import mannwhitneyu +from sklearn.linear_model import Lasso +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import KFold +from sklearn.preprocessing import StandardScaler + +############################################################################### +# Functions and classes +def Get_W_Lasso (X, Y, lam, fit_intercept = False, mode='cpu'): + k = Y.shape[1] + d = X.shape[1] + W = np.zeros((d, k)) + w0 = np.zeros(k) + if fit_intercept: + mode = 'cpu' + print('.', end='', flush=True) + for dim_index in range(k): + model = Lasso(alpha = lam, fit_intercept = fit_intercept, max_iter = 5000, tol = 1e-6) + model.fit(X = X, y = Y[:, dim_index]) + W[:, dim_index] = model.coef_ + if fit_intercept: + w0[dim_index] = model.intercept_ + return W, w0 + +def calc_max_lam (X, Y): + n = X.shape[0] + sc = StandardScaler() + X_norm = sc.fit_transform(X) + sc = StandardScaler(with_std=False) + Y_norm = sc.fit_transform(Y) + max_lam = np.max(np.absolute(X_norm.T @ Y_norm))/n + + return max_lam + +def normalize(X, mean, std): + return (X - mean) / std + +def Global_L1_Norm (W): + k = W.shape[1] + norm_val = 0 + for dim_index in range(k): + norm_val += np.linalg.norm(W[:, dim_index], ord = 1) + + return norm_val + +def BIOT_Crit (X, Y, R, W, w0, lam): + n = X.shape[0] + diffs = (Y @ R) - (np.tile(w0, (n, 1)) + (X @ W)) + LS = np.linalg.norm(diffs)**2 + L1 = Global_L1_Norm(W) + + crit = ((1/(2*n)) * LS) + (lam * L1) + + return crit + +def BIOT(X, Y, lam, max_iter=200, eps=1e-6, rotation=False, R=None, fit_intercept=False, mode='cpu'): + print('lambda:', lam) + R = np.eye(6) + d = X.shape[1] + n = X.shape[0] + lam_norm = lam / np.sqrt(d) + + + W, w0 = Get_W_Lasso(X=X, Y=Y, lam=lam_norm, fit_intercept=fit_intercept, mode=mode) + + diff = math.inf + iter_index = 0 + crit_list = [math.inf] + + while iter_index < max_iter and diff > eps: + # Compute De matrix + intercept_matrix = np.tile(w0, (n, 1)) # shape (n, m) + De = (1 / (2 * n)) * Y.T @ (intercept_matrix + X @ W) # shape (m, m) + + # SVD + U, S, Vh = np.linalg.svd(De, full_matrices=False) + + if rotation: + sv = S.copy() + smallest = np.argmin(sv) + sv[smallest] = np.sign(np.linalg.det(U @ Vh)) + sv[sv != sv[smallest]] = 1 + R = U @ np.diag(sv) @ Vh + else: + R = U @ Vh + + # Update W + YR = Y @ R + W, w0 = Get_W_Lasso(X=X, Y=YR, lam=lam_norm, fit_intercept=fit_intercept, mode=mode) + + # Convergence check + crit_list.append(BIOT_Crit(X=X, Y=Y, R=R, W=W, w0=w0, lam=lam_norm)) + diff = abs(crit_list[iter_index] - crit_list[iter_index + 1]) + iter_index += 1 + + # Sparsity + num_nonzero = np.sum(W != 0) + total_elements = W.size + ratio = num_nonzero / total_elements + + return R, W, w0, ratio + +def standardize_features(train, test): + mean = train.mean(axis=0) + std = train.std(axis=0, ddof=1) + std[std == 0] = 1.0 + train_std = (train - mean) / std + test_std = (test - mean) / std + return train_std, test_std + +def center_targets(train, test): + mean = train.mean(axis=0) + train_centered = train - mean + test_centered = test - mean + return train_centered, test_centered + + +def CV_BIOT(Fe, X, feature_names, lam_list, fit_intercept=False, + num_folds=10, random_state=1, R=None, rotation=False, scoring='neg_mean_squared_error', mode='cpu'): + + start_all = time.time() + kf = KFold(n_splits=num_folds, shuffle=True, random_state=random_state) + results = [] + mse_matrix = [] + + for lam in lam_list: + fold_mse = [] + fold_sparsity = [] + + for train_idx, test_idx in kf.split(Fe): + X_train = X[train_idx, :] + X_test = X[test_idx, :] + Fe_train = Fe[train_idx, :] + Fe_test = Fe[test_idx, :] + + Fe_train, Fe_test = standardize_features(Fe_train, Fe_test) + X_train, X_test = center_targets(X_train, X_test) + + R_, W_, w0_, ratio = BIOT(X=Fe_train, Y=X_train, lam=lam) + + intercept = np.tile(w0_, (Fe_test.shape[0], 1)) + X_pred = (intercept + Fe_test @ W_) @ R_.T + + mse = mean_squared_error(X_test, X_pred) / 2 + fold_mse.append(mse) + fold_sparsity.append(ratio) + + avg_mse = np.mean(fold_mse) + avg_ratio = np.mean(fold_sparsity) + results.append((lam, avg_mse, avg_ratio)) + mse_matrix.append(fold_mse) + + end_all = time.time() + print(f"CV BIOT completed in {end_all - start_all:.2f}s") + + df = pd.DataFrame(results, columns=["Lambda", "AvgMSE", "AvgSparsity"]) + search_array = np.array(mse_matrix).T + print("Search array (fold x lambda):") + print(search_array) + # best_idx = df["AvgMSE"].idxmin() + + # Wilcoxon rank-sum test + mean_mse_per_lambda = search_array.mean(axis=0) + min_idx = mean_mse_per_lambda.argmin() + minimum_mse = search_array[:, min_idx] + pval = 0 + best_idx = 0 + + for idx in range(min_idx + 1, len(lam_list)): + current_mse = search_array[:, idx] + pval = 1 + if np.sum(minimum_mse - np.abs(current_mse)) != 0: + result = mannwhitneyu(minimum_mse, current_mse, alternative='two-sided', method='exact', use_continuity=True) + pval = result[1] + print(f"{idx}\t{pval:.4f}") + if pval < 0.05: + break + best_idx = idx + + print(f"The lambda value that produces the sparsest model not significantly different from the model with the lowest mse is at index: {best_idx}") + + # Final BIOT on full data + start_final = time.time() + Fe_norm, _= standardize_features(Fe, Fe) + X_norm, _ = center_targets(X, X) + + R, W, w0, ratio = BIOT( + X=Fe_norm, Y=X_norm, lam=lam_list[best_idx], + max_iter=1, eps=1e-6, + rotation=rotation, R=None, + fit_intercept=fit_intercept + ) + + end_final = time.time() + print(f"Final BIOT time: {end_final - start_final:.2f}s") + + return best_idx, R, W, w0, ratio, df + +########################################################################################################### +parser = argparse.ArgumentParser() +parser.add_argument('-e','--embeddingFile', help='csv file containing the embeddings to be predicted', default="embeddings2.csv") +parser.add_argument('-p','--predictorFile', help='csv file containing the predictors to use to predict the embeddings', default="features2.csv") +parser.add_argument('-d','--dataPath', help='common path for the embedding and predictor files', default="./datasets") +parser.add_argument('-o','--outputPath', help='path to location in which to store the output files', default="./output") +parser.add_argument('-s','--suffix', help='suffix to identify output files from those in other runs', default="best") +parser.add_argument('-l','--numLambdas', help='number of lamba values to calculate', type=int, default=10) +parser.add_argument('-m','--min', help='minimum lambda value', type=float, default=.0001) +parser.add_argument('-x','--max', help='maximum lambda value', type=float, default=3.5) +parser.add_argument('-f','--numFolds', help='maximum lambda value', type=int, default=10) +parser.add_argument('-a','--random_state', help='random state to use', type=int, default=1) +parser.add_argument('-c','--scoring', help='scoring algorithm to use in CV', default='neg_mean_squared_error') +parser.add_argument('-r','--rotation', help='Force rotation not orthogonal transformation', action="store_true") +parser.add_argument('-t','--fit_intercept', help='Force lasso to fit intercept', action="store_true") +parser.add_argument('-u','--mode', help='Processing mode (cpu or gpu)', default='cpu') + +args = parser.parse_args() + +embeddingPath = f"{args.dataPath}/{args.embeddingFile}" +predictorPath = f"{args.dataPath}/{args.predictorFile}" +X = pd.read_csv(embeddingPath,header=None).values +Fe = pd.read_csv(predictorPath,header=0).values + +lam_list = np.geomspace(args.min, args.max, args.numLambdas) +print('Lambda values:') +print(lam_list) + +random_state = 1 +R = None + +if isinstance(Fe, pd.DataFrame): + feature_names = Fe.columns +else: + feature_names = np.array(range(Fe.shape[1])) + +best_idx, R, W, w0, ratio, df = CV_BIOT (Fe, X, feature_names, lam_list, fit_intercept = args.fit_intercept, num_folds=args.numFolds, random_state = args.random_state, R = R, rotation = args.rotation, scoring = args.scoring, mode=args.mode) + +Weights = pd.DataFrame(W) +Weights.index = feature_names +weightfile = f"{args.outputPath}/weights_{args.suffix}.csv" +Weights.to_csv(weightfile) + +print(f"Sparsity ratio: {ratio:.4f}") + +rotationfile = f"{args.outputPath}/rotation_{args.suffix}.csv" +Rotation = pd.DataFrame(R) +Rotation.to_csv(rotationfile) + +interceptfile = f"{args.outputPath}/intercepts_{args.suffix}.csv" +Intercepts = pd.DataFrame(w0) +Intercepts.to_csv(interceptfile) + +dffile = f"{args.outputPath}/avgMSE_{args.suffix}.csv" +df.to_csv(dffile, index=False) + diff --git a/src/BIOT/Modified_code/BIOT_GPU_modified.py b/src/BIOT/Modified_code/BIOT_GPU_modified.py new file mode 100644 index 0000000..f82e939 --- /dev/null +++ b/src/BIOT/Modified_code/BIOT_GPU_modified.py @@ -0,0 +1,296 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Functions and classes for the BIOT module +""" +import warnings +warnings.filterwarnings("ignore") + +import time +import pandas as pd +import numpy as np +import torch +from TorchL1 import TorchL1 +from sklearn.metrics import mean_squared_error +from sklearn.linear_model import Lasso +from sklearn.preprocessing import StandardScaler +from sklearn.compose import TransformedTargetRegressor +import math +from scipy.stats import mannwhitneyu + +from sklearn.base import BaseEstimator, RegressorMixin +from sklearn.utils.validation import check_array, check_is_fitted + +from sklearn.model_selection import KFold +from sklearn.model_selection import GridSearchCV +from sklearn.pipeline import Pipeline + +from sklearn.preprocessing import StandardScaler +import argparse + +############################################################################### +# Functions and classes +def Get_W_Lasso (X, Y, lam, fit_intercept = False, mode='cuda'): + device = torch.device("cuda") + + k = Y.shape[1] + d = X.shape[1] + w0 = torch.zeros(k).double().to(device) + + if fit_intercept: + print(f"Do not support this as of now.") + exit() + print('.', end='', flush=True) + + model = TorchL1(alpha=lam, max_iter=5000, tol=1e-6) + model.coef_ = torch.zeros(d, k).double().to(device) + model.alpha *= Y.shape[0] + model.fit(X=X, y=Y) + + return model.coef_, w0 + +def calc_max_lam (X, Y): + + n = X.shape[0] + sc = StandardScaler() + X_norm = sc.fit_transform(X) + sc = StandardScaler(with_std=False) + Y_norm = sc.fit_transform(Y) + max_lam = np.max(np.absolute(X_norm.T @ Y_norm))/n + + return max_lam + +def normalize(X, mean, std): + return (X - mean) / std + +def Global_L1_Norm (W): + + k = W.shape[1] + norm_val = 0 + for dim_index in range(k): + norm_val += torch.norm(W[:, dim_index], p=1) + + return norm_val + +def BIOT_Crit (X, Y, R, W, w0, lam): + + n = X.shape[0] + diffs = (Y @ R) - (w0.unsqueeze(0).repeat(n, 1) + (X @ W)) + LS = torch.norm(diffs)**2 + L1 = Global_L1_Norm(W) + crit = ((1 / (2 * n)) * LS) + (lam * L1) + + return crit + +def BIOT (X, Y, lam, max_iter = 200, eps = 1e-6, rotation = False, R = None, fit_intercept = False, mode='cuda'): + print('lambda: ' + str(lam)) + d = X.shape[1] + n = X.shape[0] + lam_norm = lam/np.sqrt(d) + + # If R is provided, get Lasso solution only + if R is not None: + YR = Y @ R + W, w0 = Get_W_Lasso(X = X, Y = YR, lam = lam_norm, fit_intercept=fit_intercept, mode=mode) + + # Otherwise, run BIOT iterations + else: + W, w0 = Get_W_Lasso(X = X, Y = Y, lam = lam_norm, fit_intercept=fit_intercept, mode=mode) + + diff = math.inf + iter_index = 0 + crit_list = [math.inf] + + while iter_index < max_iter and diff > eps: + + De = (1/(2*n)) * Y.T @ (w0.unsqueeze(0).repeat(n, 1) + (X @ W)) + U, S, Vh = torch.linalg.svd(De) + + # rotation matrix desired (counterclockwise) + if rotation: + sv = S + smallest = torch.argmin(sv) + sv = sv.clone() + sv[smallest] = torch.sign(torch.det(torch.mm(U, Vh.T))) + sv[sv != sv[smallest]] = 1 + + # Construct the rotation matrix + R = torch.mm(U, torch.mm(torch.diag(sv), Vh.T)) + else: + R = U @ Vh + + YR = Y @ R + W, w0 = Get_W_Lasso(X = X, Y = YR, lam = lam_norm, fit_intercept = fit_intercept, mode=mode) + + crit_list.append(BIOT_Crit(X = X, Y = Y, R = R, W = W, w0 = w0, lam = lam_norm)) + diff = torch.absolute(crit_list[iter_index] - crit_list[iter_index + 1]) + iter_index += 1 + + num_nonzero = (W != 0).sum().item() + total_elements = W.numel() + ratio = num_nonzero / total_elements + + return R, W, w0, ratio + +def standardize_features(train, test): + mean = train.mean(axis=0) + std = train.std(axis=0, ddof=1) + std[std == 0] = 1.0 + train_std = (train - mean) / std + test_std = (test - mean) / std + return train_std, test_std + +def center_targets(train, test): + mean = train.mean(axis=0) + train_centered = train - mean + test_centered = test - mean + return train_centered, test_centered + +def CV_BIOT(Fe, X, feature_names, lam_list, fit_intercept=False, + num_folds=10, random_state=1, R=None, rotation=False, scoring='neg_mean_squared_error', mode='cuda'): + + start_all = time.time() + + kf = KFold(n_splits=num_folds, shuffle=True, random_state=random_state) + results = [] + mse_matrix = [] + + for lam in lam_list: + fold_mse = [] + fold_sparsity = [] + + for train_idx, test_idx in kf.split(Fe): + X_train = X[train_idx, :] + X_test = X[test_idx, :] + Fe_train = Fe[train_idx, :] + Fe_test = Fe[test_idx, :] + + Fe_train, Fe_test = standardize_features(Fe_train, Fe_test) + X_train, X_test = center_targets(X_train, X_test) + + Fe_train_tensor = torch.tensor(Fe_train).to(mode) + X_train_tensor = torch.tensor(X_train).to(mode) + + R_, W_, w0_, ratio = BIOT( + X=Fe_train_tensor, Y=X_train_tensor, lam=lam, rotation=rotation, R=R, + fit_intercept=fit_intercept, mode=mode) + + W_cpu = W_.cpu().numpy() + w0_cpu = w0_.cpu().numpy() + R_cpu = R_.cpu().numpy() + + intercept = np.tile(w0_cpu, (Fe_test.shape[0], 1)) + X_pred = (intercept + Fe_test @ W_cpu) @ R_cpu.T + + mse = mean_squared_error(X_test, X_pred) / 2 + fold_mse.append(mse) + fold_sparsity.append(ratio) + + avg_mse = np.mean(fold_mse) + avg_ratio = np.mean(fold_sparsity) + results.append((lam, avg_mse, avg_ratio)) + mse_matrix.append(fold_mse) + + end_all = time.time() + print(f"CV BIOT completed in {end_all - start_all:.2f}s") + + df = pd.DataFrame(results, columns=["Lambda", "AvgMSE", "AvgSparsity"]) + search_array = np.array(mse_matrix).T + print("Search array (fold x lambda):") + print(search_array) + # best_idx = df["AvgMSE"].idxmin() + + # Wilcoxon rank-sum test + mean_mse_per_lambda = search_array.mean(axis=0) + min_idx = mean_mse_per_lambda.argmin() + minimum_mse = search_array[:, min_idx] + pval = 0 + best_idx = 0 + + for idx in range(min_idx + 1, len(lam_list)): + current_mse = search_array[:, idx] + pval = 1 + if np.sum(minimum_mse - np.abs(current_mse)) != 0: + result = mannwhitneyu(minimum_mse, current_mse, alternative='two-sided', method='exact', use_continuity=True) + pval = result[1] + print(f"{idx}\t{pval:.4f}") + if pval < 0.05: + break + best_idx = idx + + print(f"The lambda value that produces the sparsest model not significantly different from the model with the lowest mse is at index: {best_idx}") + + # Final BIOT on full data + start_final = time.time() + Fe_norm, _= standardize_features(Fe, Fe) + X_norm, _ = center_targets(X, X) + + X_norm = torch.tensor(X_norm).to(mode) + Fe_norm = torch.tensor(Fe_norm).to(mode) + + R, W, w0, ratio= BIOT (Fe_norm, X_norm, lam_list[best_idx], max_iter = 1, eps = 1e-6, rotation = False, R = None, fit_intercept = False,mode='gpu') + + end_final = time.time() + print(f"Final BIOT time: {end_final - start_final:.2f}s") + + return best_idx, R, W, w0, ratio, df + +######################################################################################################## +parser = argparse.ArgumentParser() +parser.add_argument('-e','--embeddingFile', help='csv file containing the embeddings to be predicted', default="embeddings.csv") +parser.add_argument('-p','--predictorFile', help='csv file containing the predictors to use to predict the embeddings', default="features.csv") +parser.add_argument('-d','--dataPath', help='common path for the embedding and predictor files', default="/raid/home/rajivratn/hemant_rajivratn/ets/BIOT/gpu/datasets/") +parser.add_argument('-o','--outputPath', help='path to location in which to store the output files', default="./output") +parser.add_argument('-s','--suffix', help='suffix to identify output files from those in other runs', default="best") +parser.add_argument('-l','--numLambdas', help='number of lamba values to calculate', type=int, default=10) +parser.add_argument('-m','--min', help='minimum lambda value', type=float, default=.0001) +parser.add_argument('-x','--max', help='maximum lambda value', type=float, default=3.5) +parser.add_argument('-f','--numFolds', help='maximum lambda value', type=int, default=10) +parser.add_argument('-a','--random_state', help='random state to use', type=int, default=1) +parser.add_argument('-c','--scoring', help='scoring algorithm to use in CV', default='neg_mean_squared_error') +parser.add_argument('-r','--rotation', help='Force rotation not orthogonal transformation', action="store_true") +parser.add_argument('-t','--fit_intercept', help='Force lasso to fit intercept', action="store_true") +parser.add_argument('-u','--mode', help='Processing mode (cpu or gpu)', default='cuda') + +args = parser.parse_args() + +embeddingPath = f"{args.dataPath}/{args.embeddingFile}" +predictorPath = f"{args.dataPath}/{args.predictorFile}" +Y = pd.read_csv(embeddingPath,header=None).values +X = pd.read_csv(predictorPath,header=0).values + +print(f"Check on big dataset") +print('Embedding shape:', Y.shape) +print('Predictor shape:', X.shape) + +lam_list = np.geomspace(args.min, args.max, args.numLambdas) +print('Lambda values:') +print(lam_list) + +random_state = 1 +R = None + +if isinstance(X, pd.DataFrame): + feature_names = X.columns +else: + feature_names = np.array(range(X.shape[1])) + +best_idx, R, W, w0, ratio, df = CV_BIOT (X, Y, feature_names, lam_list, fit_intercept = args.fit_intercept, num_folds=args.numFolds, random_state = args.random_state, R = R, rotation = args.rotation, scoring = args.scoring, mode=args.mode) + +Weights = pd.DataFrame(W.cpu().numpy()) +Weights.index = feature_names +weightfile = f"{args.outputPath}/weights_{args.suffix}.csv" +Weights.to_csv(weightfile) +print(f"Sparsity ratio: {ratio:.4f}") + +rotationfile = f"{args.outputPath}/rotation_{args.suffix}.csv" +Rotation = pd.DataFrame(R.cpu().numpy()) +Rotation.to_csv(rotationfile) + +interceptfile = f"{args.outputPath}/intercepts_{args.suffix}.csv" +Intercepts = pd.DataFrame(w0.cpu().numpy()) +Intercepts.to_csv(interceptfile) + +dffile = f"{args.outputPath}/avgMSE_{args.suffix}.csv" +df.to_csv(dffile, index=False) + diff --git a/src/BIOT/Modified_code/TorchL1.py b/src/BIOT/Modified_code/TorchL1.py new file mode 100644 index 0000000..716833f --- /dev/null +++ b/src/BIOT/Modified_code/TorchL1.py @@ -0,0 +1,252 @@ +import torch + + +#@torch.jit.script +class TorchL1: + def __init__(self, alpha=1.0, max_iter=1000, tol=1e-6): + self.alpha = alpha + self.max_iter = max_iter + self.tol = tol + self.coef_ = None + + def fit(self, X, y): + """ + Fit model using coordinate descent for L1-regularized linear regression. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Training data. + y : array-like of shape (n_samples,) or (n_samples, n_targets) + Target values. + + Returns + ------- + self : object + Fitted estimator. + """ + # self._coordinate_descent(X, y, self.alpha, self.coef_) + # self._fista(X, y, self.alpha, self.coef_) + self._fasta(X, y, self.alpha, self.coef_) + + def _fasta(self, X, Y, lam, W, tau1=10.0, max_iter=1000, w=10, + backtrack=True, record_iterates=False, + stepsize_shrink=0.5, eps_n=1e-15, tol=1e-10): + n, p = X.shape + _, m = Y.shape + device = X.device + + # x0 = torch.zeros((p, m), device=device) + x0 = W + x1 = x0.clone() + d1 = x1.clone() + + f = lambda B: 0.5 * torch.norm(X @ B - Y, p='fro')**2 + gradf = lambda B: X.T @ (X @ B - Y) + g = lambda B: lam * torch.norm(B, p=1) + proxg = lambda B, tau: torch.sign(B) * torch.clamp(torch.abs(B) - tau * lam, min=0.0) + + residual = torch.zeros(max_iter) + normalized_resid = torch.zeros(max_iter) + taus = torch.zeros(max_iter) + fvals = torch.zeros(max_iter) + objective = torch.zeros(max_iter + 1) + + f1 = f(d1) + gradf1 = gradf(d1) + fvals[0] = f1 + objective[0] = f1 + g(x0) + min_obj = objective[0].item() + best_beta = x1.clone() + + if record_iterates: + iterates = torch.zeros((p, max_iter + 1)) + iterates[:, 0] = x1.squeeze() + else: + iterates = None + + for i in range(max_iter): + x0 = x1.clone() + gradf0 = gradf1.clone() + tau0 = tau1 + # print (tau0) + + x1hat = x0 - tau0 * gradf0 + x1 = proxg(x1hat, tau0) + Dx = x1 - x0 + d1 = x1.clone() + f1 = f(d1) + + if backtrack: + if i > 0: + M = torch.max(fvals[max(i - w, 0):i]) + else: + M = fvals[0] + # print (M) + + prev_f1 = f1 + + backtrack_count = 0 + prop = (f1 - 1e-12 > M + torch.sum(Dx * gradf0) + 0.5 * torch.norm(Dx)**2 / tau0) + + while prop and backtrack_count < 20: + tau0 *= stepsize_shrink + x1hat = x0 - tau0 * gradf0 + x1 = proxg(x1hat, tau0) + d1 = x1.clone() + f1 = f(d1) + Dx = x1 - x0 + backtrack_count += 1 + prop = (f1 - 1e-12 > M + torch.sum(Dx * gradf0) + 0.5 * torch.norm(Dx)**2 / tau0) + + taus[i] = tau0 + residual[i] = torch.norm(Dx) / tau0 + normalizer = max( + torch.norm(gradf0).item(), + torch.norm(x1 - x1hat).item() / tau0 + ) + eps_n + normalized_resid[i] = residual[i].item() / normalizer + fvals[i] = f1 + objective[i + 1] = f1 + g(x1) + + if objective[i + 1] < min_obj: + best_beta = x1.clone() + min_obj = objective[i + 1].item() + + if record_iterates: + iterates[:, i + 1] = x1.squeeze() + + gradf1 = gradf(d1) + Dg = gradf1 + (x1hat - x0) / tau0 + dotprod = torch.sum(Dx * Dg) + + if abs(dotprod.item()) < 1e-15: + break + + tau_s = torch.norm(Dx)**2 / dotprod + tau_m = dotprod / (torch.norm(Dg)**2 + eps_n) + tau_m = max(tau_m.item(), 0) + + tau1 = tau_m if 2 * tau_m > tau_s else tau_s - 0.5 * tau_m + if tau1 <= 0 or not torch.isfinite(torch.as_tensor(tau1)): + tau1 = tau0 * 1.5 + + if torch.max(torch.abs(Dx)) < tol * (torch.max(torch.abs(x1))): + break + + self.coef_ = best_beta + + def _fista(self, X, Y, lam, W, max_iter=1000, tol=1e-10): + """ + FISTA algorithm for L1-regularized regression (multi-output). + + Parameters + ---------- + X : torch.Tensor, shape (n_samples, n_features) + Y : torch.Tensor, shape (n_samples, n_targets) + lam : float + L1 regularization strength. + W : torch.Tensor, shape (n_features, n_targets) + Initial weights. + """ + n, a = X.shape + _, b = Y.shape + + # Estimate Lipschitz constant L = ||X||^2 + L = torch.linalg.norm(X, ord=2).item() ** 2 + + W_old = W.clone() + Z = W.clone() + t = 1 + + for _ in range(max_iter): + W_prev = W.clone() + + # Gradient: Xᵀ (X Z - Y) + grad = X.T @ (X @ Z - Y) / n + + # Gradient step + temp = Z - grad / L + + # Soft-thresholding + W = torch.sign(temp) * torch.maximum(torch.abs(temp) - lam / L, torch.zeros_like(temp)) + + # FISTA update + t_next = 0.5 * (1 + torch.sqrt(torch.tensor(1 + 4 * t ** 2))) + Z = W + ((t - 1) / t_next) * (W - W_prev) + t = t_next + + # stopping criteria + if ( torch.max(torch.abs(W - W_old)) < tol * torch.max(torch.abs(W)) ): + # + break + + self.coef_ = W + + def _coordinate_descent(self, X, Y, lam, W, max_iter = 1000, tol = 1e-10): + """ + Lasso coordinate descent algorithm. + + Parameters + ---------- + X : torch.Tensor of shape (n_samples, n_features) + Training data. + Y : torch.Tensor of shape (n_samples, n_targets) + Target values. + lam : torch.Tensor + Regularization parameter (lambda). + W : torch.Tensor of shape (n_features, n_targets) + Coefficients/weights. + max_iter : int + Maximum number of iterations. + tol : float + Tolerance for stopping criteria. + + Returns + ------- + W : torch.Tensor + Updated weights. + """ + + _, a = X.shape + _, b = Y.shape + # W shape = (a, b) + + summation_ = torch.sum(X ** 2, dim=0) # shape = (a,) + + for _ in range(max_iter): + W_old = W.clone() + + for k in range(a): + + if summation_[k] == 0: + continue + + residual = Y - torch.matmul(X, W) + torch.matmul(X[:, k].unsqueeze(-1), W[k, :].unsqueeze(0)) # shape = (n, b) + + rho = torch.mv(residual.T, X[:, k]).unsqueeze(-1) # shape = (b,1) + W[k,:] = (torch.sign(rho) * torch.maximum( torch.abs(rho) - lam, torch.zeros_like(rho)) ).squeeze() / summation_[k] + + # stopping criteria + if ( torch.max(torch.abs(W - W_old)) < tol * torch.max(torch.abs(W)) ): + # + break + + self.coef_ = W + + def predict(self, X): + """ + Predict using the linear model. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Samples. + + Returns + ------- + C : array-like of shape (n_samples,) + Returns predicted values. + """ + + return torch.matmul(X, self.W) From da12883458b9d924db518e02d266e09365990c0b Mon Sep 17 00:00:00 2001 From: Yuqi Wu Date: Tue, 8 Jul 2025 10:18:11 -0400 Subject: [PATCH 2/2] Update python code --- src/BIOT/Modified_code/BIOT_CPU.py | 504 +++++++++++++++++++ src/BIOT/Modified_code/BIOT_CPU_modified.py | 267 ---------- src/BIOT/Modified_code/BIOT_GPU.py | 519 ++++++++++++++++++++ src/BIOT/Modified_code/BIOT_GPU_modified.py | 296 ----------- src/BIOT/Modified_code/TorchL1.py | 121 ++++- 5 files changed, 1118 insertions(+), 589 deletions(-) create mode 100644 src/BIOT/Modified_code/BIOT_CPU.py delete mode 100644 src/BIOT/Modified_code/BIOT_CPU_modified.py create mode 100644 src/BIOT/Modified_code/BIOT_GPU.py delete mode 100644 src/BIOT/Modified_code/BIOT_GPU_modified.py diff --git a/src/BIOT/Modified_code/BIOT_CPU.py b/src/BIOT/Modified_code/BIOT_CPU.py new file mode 100644 index 0000000..d685bb5 --- /dev/null +++ b/src/BIOT/Modified_code/BIOT_CPU.py @@ -0,0 +1,504 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Functions and classes for the BIOT module + +""" +import pandas as pd +import numpy as np +import argparse +import time +import math + +from scipy.stats import mannwhitneyu +from sklearn.linear_model import Lasso +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import KFold + +############################################################################### + +# Functions and classes + +def Get_W_Lasso (X, Y, lam, fit_intercept = False): + """ + Estimates a separate Lasso regression model for each column of Y. + + Parameters + ---------- + X : numpy.ndarray of shape (n_samples, n_features) + Feature matrix with d features (columns) used to predict Y. + + Y : numpy.ndarray of shape (n_samples, n_targets) + Target matrix with k target variables (embedding dimensions). + + lam : float + Regularization strength (L1 penalty). Higher values yield sparser solutions. + + fit_intercept : bool, optional (default=False) + If True, the model includes an intercept term. + + Returns + ------- + W : numpy.ndarray of shape (n_features, n_targets) + Coefficient matrix. Each column corresponds to the Lasso solution for one dimension of Y. + + w0 : numpy.ndarray of shape (n_targets,) + Intercept vector. Only non-zero if `fit_intercept=True`; otherwise all zeros. + """ + + k = Y.shape[1] + d = X.shape[1] + W = np.zeros((d, k)) + w0 = np.zeros(k) + if fit_intercept: + print(f"Do not support this as of now.") + exit() + print('.', end='', flush=True) + + for dim_index in range(k): + model = Lasso(alpha = lam, fit_intercept = fit_intercept, max_iter = 5000, tol = 1e-6) + model.fit(X = X, y = Y[:, dim_index]) + W[:, dim_index] = model.coef_ + if fit_intercept: + w0[dim_index] = model.intercept_ + return W, w0 + +def normalize(X, mean, std): + return (X - mean) / std + +def Global_L1_Norm (W): + """ + Compute the global L1 norm across all columns of the weight matrix W. + + This is the sum of the L1 norms (i.e., sum of absolute values) of each column in W. + + Parameters + ---------- + W : numpy.ndarray of shape (n_features, n_targets) + Weight matrix, where each column corresponds to the coefficients for one target. + + Returns + ------- + float + Total L1 norm summed across all columns of W. + """ + + k = W.shape[1] + norm_val = 0 + for dim_index in range(k): + norm_val += np.linalg.norm(W[:, dim_index], ord = 1) + + return norm_val + +def BIOT_Crit (X, Y, R, W, w0, lam): + """ + Compute the BIOT objective function value. + + The BIOT criterion includes a least-squares loss term and an L1 regularization term + on the model weights. This function assumes an orthogonal transformation R has already + been applied to the embedding dimensions. + + Parameters + ---------- + X : numpy.ndarray of shape (n_samples, n_features) + Input feature matrix used to predict the rotated embedding. + + Y : numpy.ndarray of shape (n_samples, n_targets) + Original embedding matrix before rotation. + + R : numpy.ndarray of shape (n_targets, n_targets) + Orthogonal rotation matrix applied to Y. + + W : numpy.ndarray of shape (n_features, n_targets) + Weight matrix for the linear model (one target per column). + + w0 : numpy.ndarray of shape (n_targets,) + Intercept vector for each target dimension. + + lam : float + L1 regularization strength (sparsity penalty). + + Returns + ------- + float + The value of the BIOT objective function: + (1/2n) * ||Y R - (X W + w0)||^2 + λ * ||W||_1 + """ + + n = X.shape[0] + diffs = (Y @ R) - (np.tile(w0, (n, 1)) + (X @ W)) + LS = np.linalg.norm(diffs)**2 + L1 = Global_L1_Norm(W) + + crit = ((1/(2*n)) * LS) + (lam * L1) + + return crit + +def BIOT(X, Y, lam, max_iter=200, eps=1e-6, rotation=False, R=None, fit_intercept=False): + """ + Fit a BIOT (Bilinear Orthogonal Transformation) model using an alternating optimization procedure. + + The BIOT model finds a sparse linear mapping W from input features X to a rotated version of + the target embedding Y @ R, where R is an orthogonal (or rotation) matrix. + The optimization alternates between: + - Solving Lasso regression to estimate W (sparse weights) + - Updating R using orthogonal Procrustes alignment (via SVD) + + Parameters + ---------- + X : numpy.ndarray of shape (n_samples, n_features) + Input feature matrix. + + Y : numpy.ndarray of shape (n_samples, n_targets) + Target embedding matrix to be explained. + + lam : float + Sparsity hyperparameter. Controls the L1 penalty in Lasso regression. + + max_iter : int, optional (default=200) + Maximum number of alternating optimization steps. + + eps : float, optional (default=1e-6) + Convergence threshold for the objective function. Optimization stops when the + change in objective value is below this threshold. + + rotation : bool, optional (default=False) + If True, constrain the transformation matrix R to be a proper rotation matrix + (i.e., det(R) = +1). Otherwise, any orthogonal matrix is allowed. + + R : numpy.ndarray of shape (n_targets, n_targets), optional (default=None) + If provided, use this as a fixed orthogonal transformation matrix. + If None, R is initialized and optimized during training. + + fit_intercept : bool, optional (default=False) + If True, includes a bias term (intercept) in the Lasso regression. + + Returns + ------- + R : numpy.ndarray of shape (n_targets, n_targets) + Learned orthogonal (or rotation) matrix applied to Y. + + W : numpy.ndarray of shape (n_features, n_targets) + Sparse weight matrix mapping X to the rotated embedding Y @ R. + + w0 : numpy.ndarray of shape (n_targets,) + Intercept vector (zeros if fit_intercept=False). + + ratio : float + Sparsity ratio of W, i.e., proportion of non-zero elements. + """ + + print('lambda:', lam) + d = X.shape[1] + n = X.shape[0] + lam_norm = lam / np.sqrt(d) + + # If R is provided, get Lasso solution only + if R is not None: + YR = Y @ R + W, w0 = Get_W_Lasso(X = X, Y = YR, lam = lam_norm, fit_intercept=fit_intercept) + # Otherwise, run BIOT iterations + else: + # Init W + W, w0 = Get_W_Lasso(X=X, Y=Y, lam=lam_norm, fit_intercept=fit_intercept) + + diff = math.inf + iter_index = 0 + crit_list = [math.inf] + + while iter_index < max_iter and diff > eps: + + # UPDATE R + intercept_matrix = np.tile(w0, (n, 1)) + De = (1 / (2 * n)) * Y.T @ (intercept_matrix + X @ W) + U, S, Vh = np.linalg.svd(De, full_matrices=False) + + # rotation matrix desired (counterclockwise) + if rotation: + sv = S.copy() + smallest = np.argmin(sv) + sv[smallest] = np.sign(np.linalg.det(U @ Vh)) + sv[sv != sv[smallest]] = 1 + R = U @ np.diag(sv) @ Vh + # orthogonal transformation matrix desired + else: + R = U @ Vh + + # UPDATE W + YR = Y @ R + W, w0 = Get_W_Lasso(X=X, Y=YR, lam=lam_norm, fit_intercept=fit_intercept) + + # CHECK CONVERGENCE + crit_list.append(BIOT_Crit(X=X, Y=Y, R=R, W=W, w0=w0, lam=lam_norm)) + diff = abs(crit_list[iter_index] - crit_list[iter_index + 1]) + iter_index += 1 + + num_nonzero = np.sum(W != 0) + total_elements = W.size + ratio = num_nonzero / total_elements + + return R, W, w0, ratio + +def standardize_features(train, test): + """ + Standardize features in the training and test sets using the training set statistics. + + Each feature is transformed to have zero mean and unit variance, based on the + training data. Features with zero standard deviation are left unchanged (std set to 1.0). + + Parameters + ---------- + train : numpy.ndarray of shape (n_samples_train, n_features) + Training feature matrix. + + test : numpy.ndarray of shape (n_samples_test, n_features) + Test feature matrix to be standardized using training mean and std. + + Returns + ------- + train_std : numpy.ndarray of shape (n_samples_train, n_features) + Standardized training feature matrix. + + test_std : numpy.ndarray of shape (n_samples_test, n_features) + Standardized test feature matrix. + """ + + mean = train.mean(axis=0) + std = train.std(axis=0, ddof=1) + std[std == 0] = 1.0 + train_std = (train - mean) / std + test_std = (test - mean) / std + return train_std, test_std + +def center_targets(train, test): + """ + Center target variables in the training and test sets using the training set mean. + + Each target variable (column) is shifted to have zero mean in the training set. + The same mean is subtracted from the test set to ensure consistency. + + Parameters + ---------- + train : numpy.ndarray of shape (n_samples_train, n_targets) + Training target matrix. + + test : numpy.ndarray of shape (n_samples_test, n_targets) + Test target matrix to be centered using training means. + + Returns + ------- + train_centered : numpy.ndarray of shape (n_samples_train, n_targets) + Centered training targets. + + test_centered : numpy.ndarray of shape (n_samples_test, n_targets) + Centered test targets. + """ + + mean = train.mean(axis=0) + train_centered = train - mean + test_centered = test - mean + return train_centered, test_centered + + +def CV_BIOT(Fe, X, feature_names, lam_list, fit_intercept=False, + num_folds=10, random_state=1, R=None, rotation=False, scoring='neg_mean_squared_error'): + """ + Cross-validated BIOT. + + Performs K-fold cross-validation to select the best lambda for the BIOT model, + then fits a final BIOT model on the full dataset using the selected lambda. + + Parameters + ---------- + Fe : np.ndarray of shape (n_samples, n_features) + Predictor variables (input features) for BIOT. + + X : np.ndarray of shape (n_samples, n_targets) + Target embeddings to be explained (Y in BIOT). + + feature_names : list + List of feature names (used for returning weights as DataFrame). + + lam_list : list of float + List of lambda values (L1 penalties) to try in cross-validation. + + fit_intercept : bool, default=False + Whether to fit an intercept in the Lasso regressions. + + num_folds : int, default=10 + Number of folds in K-fold cross-validation. + + random_state : int, default=1 + Seed for reproducibility. + + R : np.ndarray or None + Optional fixed orthogonal transformation matrix. If None, R is optimized. + + rotation : bool, default=False + If True, constrain R to be a proper rotation matrix (det=+1). + + scoring : str, default='neg_mean_squared_error' + Scoring method for model selection. Currently unused (MSE is hardcoded). + + + Returns + ------- + best_idx : int + Index in `lam_list` of the selected lambda. + + R : np.ndarray of shape (n_targets, n_targets) + Final orthogonal transformation matrix. + + W : pandas.DataFrame of shape (n_features, n_targets) + Final weight matrix with feature names as index. + + w0 : np.ndarray of shape (n_targets,) + Intercept vector for the final model. + + ratio : float + Sparsity ratio (non-zero elements / total) of the final weight matrix. + + df : pandas.DataFrame + Cross-validation summary with columns: ["Lambda", "AvgMSE", "AvgSparsity"] + """ + + start_all = time.time() + kf = KFold(n_splits=num_folds, shuffle=True, random_state=random_state) + results = [] # Store (lambda, avg MSE, avg sparsity) + mse_matrix = [] # Store fold-wise MSEs for statistical testing + + for lam in lam_list: + fold_mse = [] + fold_sparsity = [] + + for train_idx, test_idx in kf.split(Fe): + # Split folds + X_train, X_test = X[train_idx], X[test_idx] + Fe_train, Fe_test = Fe[train_idx], Fe[test_idx] + + # Standardize Fe and center X (targets) + Fe_train, Fe_test = standardize_features(Fe_train, Fe_test) + X_train, X_test = center_targets(X_train, X_test) + + # Run BIOT on training fold + R_, W_, w0_, ratio = BIOT( + X=Fe_train, Y=X_train, lam=lam, + fit_intercept=fit_intercept, + R=R, rotation=rotation + ) + + # Predict on test fold + intercept = np.tile(w0_, (Fe_test.shape[0], 1)) + X_pred = (intercept + Fe_test @ W_) @ R_.T + + # Compute MSE (scaled by 1/2 for consistency with BIOT loss) + mse = mean_squared_error(X_test, X_pred) / 2 + fold_mse.append(mse) + fold_sparsity.append(ratio) + + # Store average results + avg_mse = np.mean(fold_mse) + avg_ratio = np.mean(fold_sparsity) + results.append((lam, avg_mse, avg_ratio)) + mse_matrix.append(fold_mse) + + end_all = time.time() + print(f"CV BIOT completed in {end_all - start_all:.2f}s") + + # Cross-validation summary + df = pd.DataFrame(results, columns=["Lambda", "AvgMSE", "AvgSparsity"]) + search_array = np.array(mse_matrix).T + print("Search array (fold x lambda):") + print(search_array) + + # Statistical test: select sparsest model not significantly worse than best + mean_mse_per_lambda = search_array.mean(axis=0) + min_idx = mean_mse_per_lambda.argmin() + minimum_mse = search_array[:, min_idx] + pval = 0 + best_idx = 0 + + for idx in range(min_idx + 1, len(lam_list)): + current_mse = search_array[:, idx] + pval = 1 + if np.sum(minimum_mse - np.abs(current_mse)) != 0: + result = mannwhitneyu(minimum_mse, current_mse, alternative='two-sided', method='exact', use_continuity=True) + pval = result[1] + print(f"{idx}\t{pval:.4f}") + if pval < 0.05: + break + best_idx = idx + print(f"The lambda value that produces the sparsest model not significantly different from the model with the lowest mse is at index: {best_idx}") + + # Fit final model using best lambda on full data + start_final = time.time() + Fe_norm, _= standardize_features(Fe, Fe) + X_norm, _ = center_targets(X, X) + + R, W, w0, ratio = BIOT( + X=Fe_norm, Y=X_norm, lam=lam_list[best_idx], + max_iter=200, eps=1e-6, + rotation=rotation, R=None, + fit_intercept=fit_intercept + ) + + end_final = time.time() + print(f"Final BIOT time: {end_final - start_final:.2f}s") + + return best_idx, R, W, w0, ratio, df + +########################################################################################################### +parser = argparse.ArgumentParser() +parser.add_argument('-e','--embeddingFile', help='csv file containing the embeddings to be predicted', default="embeddings2.csv") +parser.add_argument('-p','--predictorFile', help='csv file containing the predictors to use to predict the embeddings', default="features2.csv") +parser.add_argument('-d','--dataPath', help='common path for the embedding and predictor files', default="./datasets") +parser.add_argument('-o','--outputPath', help='path to location in which to store the output files', default="./output") +parser.add_argument('-s','--suffix', help='suffix to identify output files from those in other runs', default="best") +parser.add_argument('-l','--numLambdas', help='number of lamba values to calculate', type=int, default=10) +parser.add_argument('-m','--min', help='minimum lambda value', type=float, default=.0001) +parser.add_argument('-x','--max', help='maximum lambda value', type=float, default=3.5) +parser.add_argument('-f','--numFolds', help='maximum lambda value', type=int, default=10) +parser.add_argument('-a','--random_state', help='random state to use', type=int, default=1) +parser.add_argument('-c','--scoring', help='scoring algorithm to use in CV', default='neg_mean_squared_error') +parser.add_argument('-r','--rotation', help='Force rotation not orthogonal transformation', action="store_true") +parser.add_argument('-t','--fit_intercept', help='Force lasso to fit intercept', action="store_true") + +args = parser.parse_args() + +embeddingPath = f"{args.dataPath}/{args.embeddingFile}" +predictorPath = f"{args.dataPath}/{args.predictorFile}" +X = pd.read_csv(embeddingPath,header=None).values +Fe = pd.read_csv(predictorPath,header=0).values + +lam_list = np.geomspace(args.min, args.max, args.numLambdas) +print('Lambda values:') +print(lam_list) + +random_state = 1 +R = None + +if isinstance(Fe, pd.DataFrame): + feature_names = Fe.columns +else: + feature_names = np.array(range(Fe.shape[1])) + +best_idx, R, W, w0, ratio, df = CV_BIOT (Fe, X, feature_names, lam_list, fit_intercept = args.fit_intercept, num_folds=args.numFolds, random_state = args.random_state, R = R, rotation = args.rotation, scoring = args.scoring) + +Weights = pd.DataFrame(W) +Weights.index = feature_names +weightfile = f"{args.outputPath}/weights_{args.suffix}.csv" +Weights.to_csv(weightfile) + +rotationfile = f"{args.outputPath}/rotation_{args.suffix}.csv" +Rotation = pd.DataFrame(R) +Rotation.to_csv(rotationfile) + +interceptfile = f"{args.outputPath}/intercepts_{args.suffix}.csv" +Intercepts = pd.DataFrame(w0) +Intercepts.to_csv(interceptfile) + +dffile = f"{args.outputPath}/avgMSE_{args.suffix}.csv" +df.to_csv(dffile, index=False) +print(f"Sparsity ratio: {ratio:.4f}") + +# python3 src/BIOT/BIOT_CPU.py -e mite_6d_embedding.csv -p mite_F.csv -d test/inputs/Mite -o test/outputs -s mite_6d_cd_CPU_fixed -l 20 -m 0.0001 -f 5 -x 3.5 -a 42 -c neg_mean_squared_error| tee mite_6d_cd_CPU_fixed.txt \ No newline at end of file diff --git a/src/BIOT/Modified_code/BIOT_CPU_modified.py b/src/BIOT/Modified_code/BIOT_CPU_modified.py deleted file mode 100644 index 0875788..0000000 --- a/src/BIOT/Modified_code/BIOT_CPU_modified.py +++ /dev/null @@ -1,267 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Functions and classes for the BIOT module - -""" -import pandas as pd -import numpy as np -import argparse -import time -import math - -from scipy.stats import mannwhitneyu -from sklearn.linear_model import Lasso -from sklearn.preprocessing import StandardScaler -from sklearn.metrics import mean_squared_error -from sklearn.model_selection import KFold -from sklearn.preprocessing import StandardScaler - -############################################################################### -# Functions and classes -def Get_W_Lasso (X, Y, lam, fit_intercept = False, mode='cpu'): - k = Y.shape[1] - d = X.shape[1] - W = np.zeros((d, k)) - w0 = np.zeros(k) - if fit_intercept: - mode = 'cpu' - print('.', end='', flush=True) - for dim_index in range(k): - model = Lasso(alpha = lam, fit_intercept = fit_intercept, max_iter = 5000, tol = 1e-6) - model.fit(X = X, y = Y[:, dim_index]) - W[:, dim_index] = model.coef_ - if fit_intercept: - w0[dim_index] = model.intercept_ - return W, w0 - -def calc_max_lam (X, Y): - n = X.shape[0] - sc = StandardScaler() - X_norm = sc.fit_transform(X) - sc = StandardScaler(with_std=False) - Y_norm = sc.fit_transform(Y) - max_lam = np.max(np.absolute(X_norm.T @ Y_norm))/n - - return max_lam - -def normalize(X, mean, std): - return (X - mean) / std - -def Global_L1_Norm (W): - k = W.shape[1] - norm_val = 0 - for dim_index in range(k): - norm_val += np.linalg.norm(W[:, dim_index], ord = 1) - - return norm_val - -def BIOT_Crit (X, Y, R, W, w0, lam): - n = X.shape[0] - diffs = (Y @ R) - (np.tile(w0, (n, 1)) + (X @ W)) - LS = np.linalg.norm(diffs)**2 - L1 = Global_L1_Norm(W) - - crit = ((1/(2*n)) * LS) + (lam * L1) - - return crit - -def BIOT(X, Y, lam, max_iter=200, eps=1e-6, rotation=False, R=None, fit_intercept=False, mode='cpu'): - print('lambda:', lam) - R = np.eye(6) - d = X.shape[1] - n = X.shape[0] - lam_norm = lam / np.sqrt(d) - - - W, w0 = Get_W_Lasso(X=X, Y=Y, lam=lam_norm, fit_intercept=fit_intercept, mode=mode) - - diff = math.inf - iter_index = 0 - crit_list = [math.inf] - - while iter_index < max_iter and diff > eps: - # Compute De matrix - intercept_matrix = np.tile(w0, (n, 1)) # shape (n, m) - De = (1 / (2 * n)) * Y.T @ (intercept_matrix + X @ W) # shape (m, m) - - # SVD - U, S, Vh = np.linalg.svd(De, full_matrices=False) - - if rotation: - sv = S.copy() - smallest = np.argmin(sv) - sv[smallest] = np.sign(np.linalg.det(U @ Vh)) - sv[sv != sv[smallest]] = 1 - R = U @ np.diag(sv) @ Vh - else: - R = U @ Vh - - # Update W - YR = Y @ R - W, w0 = Get_W_Lasso(X=X, Y=YR, lam=lam_norm, fit_intercept=fit_intercept, mode=mode) - - # Convergence check - crit_list.append(BIOT_Crit(X=X, Y=Y, R=R, W=W, w0=w0, lam=lam_norm)) - diff = abs(crit_list[iter_index] - crit_list[iter_index + 1]) - iter_index += 1 - - # Sparsity - num_nonzero = np.sum(W != 0) - total_elements = W.size - ratio = num_nonzero / total_elements - - return R, W, w0, ratio - -def standardize_features(train, test): - mean = train.mean(axis=0) - std = train.std(axis=0, ddof=1) - std[std == 0] = 1.0 - train_std = (train - mean) / std - test_std = (test - mean) / std - return train_std, test_std - -def center_targets(train, test): - mean = train.mean(axis=0) - train_centered = train - mean - test_centered = test - mean - return train_centered, test_centered - - -def CV_BIOT(Fe, X, feature_names, lam_list, fit_intercept=False, - num_folds=10, random_state=1, R=None, rotation=False, scoring='neg_mean_squared_error', mode='cpu'): - - start_all = time.time() - kf = KFold(n_splits=num_folds, shuffle=True, random_state=random_state) - results = [] - mse_matrix = [] - - for lam in lam_list: - fold_mse = [] - fold_sparsity = [] - - for train_idx, test_idx in kf.split(Fe): - X_train = X[train_idx, :] - X_test = X[test_idx, :] - Fe_train = Fe[train_idx, :] - Fe_test = Fe[test_idx, :] - - Fe_train, Fe_test = standardize_features(Fe_train, Fe_test) - X_train, X_test = center_targets(X_train, X_test) - - R_, W_, w0_, ratio = BIOT(X=Fe_train, Y=X_train, lam=lam) - - intercept = np.tile(w0_, (Fe_test.shape[0], 1)) - X_pred = (intercept + Fe_test @ W_) @ R_.T - - mse = mean_squared_error(X_test, X_pred) / 2 - fold_mse.append(mse) - fold_sparsity.append(ratio) - - avg_mse = np.mean(fold_mse) - avg_ratio = np.mean(fold_sparsity) - results.append((lam, avg_mse, avg_ratio)) - mse_matrix.append(fold_mse) - - end_all = time.time() - print(f"CV BIOT completed in {end_all - start_all:.2f}s") - - df = pd.DataFrame(results, columns=["Lambda", "AvgMSE", "AvgSparsity"]) - search_array = np.array(mse_matrix).T - print("Search array (fold x lambda):") - print(search_array) - # best_idx = df["AvgMSE"].idxmin() - - # Wilcoxon rank-sum test - mean_mse_per_lambda = search_array.mean(axis=0) - min_idx = mean_mse_per_lambda.argmin() - minimum_mse = search_array[:, min_idx] - pval = 0 - best_idx = 0 - - for idx in range(min_idx + 1, len(lam_list)): - current_mse = search_array[:, idx] - pval = 1 - if np.sum(minimum_mse - np.abs(current_mse)) != 0: - result = mannwhitneyu(minimum_mse, current_mse, alternative='two-sided', method='exact', use_continuity=True) - pval = result[1] - print(f"{idx}\t{pval:.4f}") - if pval < 0.05: - break - best_idx = idx - - print(f"The lambda value that produces the sparsest model not significantly different from the model with the lowest mse is at index: {best_idx}") - - # Final BIOT on full data - start_final = time.time() - Fe_norm, _= standardize_features(Fe, Fe) - X_norm, _ = center_targets(X, X) - - R, W, w0, ratio = BIOT( - X=Fe_norm, Y=X_norm, lam=lam_list[best_idx], - max_iter=1, eps=1e-6, - rotation=rotation, R=None, - fit_intercept=fit_intercept - ) - - end_final = time.time() - print(f"Final BIOT time: {end_final - start_final:.2f}s") - - return best_idx, R, W, w0, ratio, df - -########################################################################################################### -parser = argparse.ArgumentParser() -parser.add_argument('-e','--embeddingFile', help='csv file containing the embeddings to be predicted', default="embeddings2.csv") -parser.add_argument('-p','--predictorFile', help='csv file containing the predictors to use to predict the embeddings', default="features2.csv") -parser.add_argument('-d','--dataPath', help='common path for the embedding and predictor files', default="./datasets") -parser.add_argument('-o','--outputPath', help='path to location in which to store the output files', default="./output") -parser.add_argument('-s','--suffix', help='suffix to identify output files from those in other runs', default="best") -parser.add_argument('-l','--numLambdas', help='number of lamba values to calculate', type=int, default=10) -parser.add_argument('-m','--min', help='minimum lambda value', type=float, default=.0001) -parser.add_argument('-x','--max', help='maximum lambda value', type=float, default=3.5) -parser.add_argument('-f','--numFolds', help='maximum lambda value', type=int, default=10) -parser.add_argument('-a','--random_state', help='random state to use', type=int, default=1) -parser.add_argument('-c','--scoring', help='scoring algorithm to use in CV', default='neg_mean_squared_error') -parser.add_argument('-r','--rotation', help='Force rotation not orthogonal transformation', action="store_true") -parser.add_argument('-t','--fit_intercept', help='Force lasso to fit intercept', action="store_true") -parser.add_argument('-u','--mode', help='Processing mode (cpu or gpu)', default='cpu') - -args = parser.parse_args() - -embeddingPath = f"{args.dataPath}/{args.embeddingFile}" -predictorPath = f"{args.dataPath}/{args.predictorFile}" -X = pd.read_csv(embeddingPath,header=None).values -Fe = pd.read_csv(predictorPath,header=0).values - -lam_list = np.geomspace(args.min, args.max, args.numLambdas) -print('Lambda values:') -print(lam_list) - -random_state = 1 -R = None - -if isinstance(Fe, pd.DataFrame): - feature_names = Fe.columns -else: - feature_names = np.array(range(Fe.shape[1])) - -best_idx, R, W, w0, ratio, df = CV_BIOT (Fe, X, feature_names, lam_list, fit_intercept = args.fit_intercept, num_folds=args.numFolds, random_state = args.random_state, R = R, rotation = args.rotation, scoring = args.scoring, mode=args.mode) - -Weights = pd.DataFrame(W) -Weights.index = feature_names -weightfile = f"{args.outputPath}/weights_{args.suffix}.csv" -Weights.to_csv(weightfile) - -print(f"Sparsity ratio: {ratio:.4f}") - -rotationfile = f"{args.outputPath}/rotation_{args.suffix}.csv" -Rotation = pd.DataFrame(R) -Rotation.to_csv(rotationfile) - -interceptfile = f"{args.outputPath}/intercepts_{args.suffix}.csv" -Intercepts = pd.DataFrame(w0) -Intercepts.to_csv(interceptfile) - -dffile = f"{args.outputPath}/avgMSE_{args.suffix}.csv" -df.to_csv(dffile, index=False) - diff --git a/src/BIOT/Modified_code/BIOT_GPU.py b/src/BIOT/Modified_code/BIOT_GPU.py new file mode 100644 index 0000000..d234e94 --- /dev/null +++ b/src/BIOT/Modified_code/BIOT_GPU.py @@ -0,0 +1,519 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Functions and classes for the BIOT module +""" +import warnings +warnings.filterwarnings("ignore") + +import time +import pandas as pd +import numpy as np +import torch +import math +import argparse + +from TorchL1 import TorchL1 +from sklearn.metrics import mean_squared_error +from scipy.stats import mannwhitneyu +from sklearn.model_selection import KFold + +############################################################################### + +# Functions and classes + +def Get_W_Lasso (X, Y, lam, fit_intercept = False): + """ + Estimate multi-target Lasso regression weights using a GPU-accelerated TorchL1 model. + + Each column in Y is treated as a separate regression target, and a shared Lasso model + is fit across all targets simultaneously using coordinate descent (or other solver in TorchL1). + + Parameters + ---------- + X : torch.Tensor of shape (n_samples, n_features) + Feature matrix used to predict Y. Must be moved to GPU beforehand or will be done internally. + + Y : torch.Tensor of shape (n_samples, n_targets) + Target matrix, with each column representing one target dimension. + + lam : float + Regularization parameter (L1 penalty). Controls sparsity of the weight matrix. + Note: Will be scaled internally by the number of samples. + + fit_intercept : bool, optional (default=False) + If True, the model would fit an intercept. Currently not supported; the function will exit if True. + + Returns + ------- + coef_ : torch.Tensor of shape (n_features, n_targets) + Lasso regression coefficient matrix. + + w0 : torch.Tensor of shape (n_targets,) + Intercept vector (currently all zeros, as intercept fitting is not implemented). + """ + + device = torch.device("cuda") + + k = Y.shape[1] + d = X.shape[1] + w0 = torch.zeros(k).double().to(device) + if fit_intercept: + print(f"Do not support this as of now.") + exit() + print('.', end='', flush=True) + + model = TorchL1(alpha=lam, max_iter=5000, tol=1e-6) + model.coef_ = torch.zeros(d, k).double().to(device) + model.alpha *= Y.shape[0] + model.fit(X=X, y=Y) + return model.coef_, w0 + +def normalize(X, mean, std): + return (X - mean) / std + +def Global_L1_Norm (W): + """ + Compute the global L1 norm across all columns of the weight matrix W. + + This is the sum of the L1 norms (i.e., sum of absolute values) of each column in W. + + Parameters + ---------- + W : numpy.ndarray of shape (n_features, n_targets) + Weight matrix, where each column corresponds to the coefficients for one target. + + Returns + ------- + float + Total L1 norm summed across all columns of W. + """ + + k = W.shape[1] + norm_val = 0 + for dim_index in range(k): + norm_val += torch.norm(W[:, dim_index], p=1) + + return norm_val + +def BIOT_Crit (X, Y, R, W, w0, lam): + """ + Compute the BIOT objective function value. + + The BIOT criterion includes a least-squares loss term and an L1 regularization term + on the model weights. This function assumes an orthogonal transformation R has already + been applied to the embedding dimensions. + + Parameters + ---------- + X : numpy.ndarray of shape (n_samples, n_features) + Input feature matrix used to predict the rotated embedding. + + Y : numpy.ndarray of shape (n_samples, n_targets) + Original embedding matrix before rotation. + + R : numpy.ndarray of shape (n_targets, n_targets) + Orthogonal rotation matrix applied to Y. + + W : numpy.ndarray of shape (n_features, n_targets) + Weight matrix for the linear model (one target per column). + + w0 : numpy.ndarray of shape (n_targets,) + Intercept vector for each target dimension. + + lam : float + L1 regularization strength (sparsity penalty). + + Returns + ------- + float + The value of the BIOT objective function: + (1/2n) * ||Y R - (X W + w0)||^2 + λ * ||W||_1 + """ + + n = X.shape[0] + diffs = (Y @ R) - (w0.unsqueeze(0).repeat(n, 1) + (X @ W)) + LS = torch.norm(diffs)**2 + L1 = Global_L1_Norm(W) + crit = ((1 / (2 * n)) * LS) + (lam * L1) + + return crit + +def BIOT (X, Y, lam, max_iter = 200, eps = 1e-6, rotation = False, R = None, fit_intercept = False): + """ + Fit a BIOT (Bilinear Orthogonal Transformation) model using an alternating optimization + procedure on GPU with PyTorch. + + BIOT seeks a sparse linear mapping W that maps input features X to a rotated version of + the target matrix Y @ R, where R is an orthogonal (or rotation) matrix. The optimization + alternates between: + + - Solving multi-target Lasso regression to estimate W (sparse weights) + - Updating R via orthogonal Procrustes alignment using SVD + + This version is GPU-compatible and uses PyTorch tensors and operations throughout. + + Parameters + ---------- + X : torch.Tensor of shape (n_samples, n_features) + Input feature matrix (must be on GPU). + + Y : torch.Tensor of shape (n_samples, n_targets) + Target matrix to be explained (must be on GPU). + + lam : float + Sparsity regularization parameter (L1 penalty). Internally scaled by 1 / sqrt(d). + + max_iter : int, optional (default=200) + Maximum number of alternating optimization iterations. + + eps : float, optional (default=1e-6) + Convergence threshold for BIOT objective value. + + rotation : bool, optional (default=False) + If True, constrain R to be a proper rotation matrix (i.e., det(R) = +1). + If False, R is any orthogonal matrix. + + R : torch.Tensor of shape (n_targets, n_targets), optional (default=None) + If provided, R is used directly and not optimized. Otherwise, R is updated during training. + + fit_intercept : bool, optional (default=False) + Whether to fit an intercept in the Lasso regression (currently must be False if not supported). + + Returns + ------- + R : torch.Tensor of shape (n_targets, n_targets) + Learned orthogonal or rotation matrix. + + W : torch.Tensor of shape (n_features, n_targets) + Sparse regression coefficient matrix. + + w0 : torch.Tensor of shape (n_targets,) + Intercept vector (zeros if fit_intercept=False). + + ratio : float + Sparsity ratio of W, i.e., proportion of non-zero entries in W. + """ + + print('lambda: ' + str(lam)) + d = X.shape[1] + n = X.shape[0] + lam_norm = lam/np.sqrt(d) + + # If R is provided, get Lasso solution only + if R is not None: + YR = Y @ R + W, w0 = Get_W_Lasso(X = X, Y = YR, lam = lam_norm, fit_intercept=fit_intercept) + # Otherwise, run BIOT iterations + else: + W, w0 = Get_W_Lasso(X = X, Y = Y, lam = lam_norm, fit_intercept=fit_intercept) + + diff = math.inf + iter_index = 0 + crit_list = [math.inf] + + while iter_index < max_iter and diff > eps: + + # UPDATE R + De = (1/(2*n)) * Y.T @ (w0.unsqueeze(0).repeat(n, 1) + (X @ W)) + U, S, Vh = torch.linalg.svd(De) + + # rotation matrix desired (counterclockwise) + if rotation: + sv = S + smallest = torch.argmin(sv) + sv = sv.clone() + sv[smallest] = torch.sign(torch.det(torch.mm(U, Vh.T))) + sv[sv != sv[smallest]] = 1 + R = torch.mm(U, torch.mm(torch.diag(sv), Vh.T)) + # orthogonal transformation matrix desired + else: + R = U @ Vh + + # UPDATE W + YR = Y @ R + W, w0 = Get_W_Lasso(X = X, Y = YR, lam = lam_norm, fit_intercept = fit_intercept) + + # CHECK CONVERGENCE + crit_list.append(BIOT_Crit(X = X, Y = Y, R = R, W = W, w0 = w0, lam = lam_norm)) + diff = torch.absolute(crit_list[iter_index] - crit_list[iter_index + 1]) + iter_index += 1 + + num_nonzero = (W != 0).sum().item() + total_elements = W.numel() + ratio = num_nonzero / total_elements + + return R, W, w0, ratio + +def standardize_features(train, test): + """ + Standardize features in the training and test sets using the training set statistics. + + Each feature is transformed to have zero mean and unit variance, based on the + training data. Features with zero standard deviation are left unchanged (std set to 1.0). + + Parameters + ---------- + train : numpy.ndarray of shape (n_samples_train, n_features) + Training feature matrix. + + test : numpy.ndarray of shape (n_samples_test, n_features) + Test feature matrix to be standardized using training mean and std. + + Returns + ------- + train_std : numpy.ndarray of shape (n_samples_train, n_features) + Standardized training feature matrix. + + test_std : numpy.ndarray of shape (n_samples_test, n_features) + Standardized test feature matrix. + """ + + mean = train.mean(axis=0) + std = train.std(axis=0, ddof=1) + std[std == 0] = 1.0 + train_std = (train - mean) / std + test_std = (test - mean) / std + return train_std, test_std + +def center_targets(train, test): + """ + Center target variables in the training and test sets using the training set mean. + + Each target variable (column) is shifted to have zero mean in the training set. + The same mean is subtracted from the test set to ensure consistency. + + Parameters + ---------- + train : numpy.ndarray of shape (n_samples_train, n_targets) + Training target matrix. + + test : numpy.ndarray of shape (n_samples_test, n_targets) + Test target matrix to be centered using training means. + + Returns + ------- + train_centered : numpy.ndarray of shape (n_samples_train, n_targets) + Centered training targets. + + test_centered : numpy.ndarray of shape (n_samples_test, n_targets) + Centered test targets. + """ + + mean = train.mean(axis=0) + train_centered = train - mean + test_centered = test - mean + return train_centered, test_centered + +def CV_BIOT(Fe, X, feature_names, lam_list, fit_intercept=False, + num_folds=10, random_state=1, R=None, rotation=False, scoring='neg_mean_squared_error'): + """ + Cross-validated BIOT. + + Performs K-fold cross-validation to select the best lambda for the BIOT model, + then fits a final BIOT model on the full dataset using the selected lambda. + + Parameters + ---------- + Fe : np.ndarray of shape (n_samples, n_features) + Predictor variables (input features) for BIOT. + + X : np.ndarray of shape (n_samples, n_targets) + Target embeddings to be explained (Y in BIOT). + + feature_names : list + List of feature names (used for returning weights as DataFrame). + + lam_list : list of float + List of lambda values (L1 penalties) to try in cross-validation. + + fit_intercept : bool, default=False + Whether to fit an intercept in the Lasso regressions. + + num_folds : int, default=10 + Number of folds in K-fold cross-validation. + + random_state : int, default=1 + Seed for reproducibility. + + R : np.ndarray or None + Optional fixed orthogonal transformation matrix. If None, R is optimized. + + rotation : bool, default=False + If True, constrain R to be a proper rotation matrix (det=+1). + + scoring : str, default='neg_mean_squared_error' + Scoring method for model selection. Currently unused (MSE is hardcoded). + + + Returns + ------- + best_idx : int + Index in `lam_list` of the selected lambda. + + R : np.ndarray of shape (n_targets, n_targets) + Final orthogonal transformation matrix. + + W : pandas.DataFrame of shape (n_features, n_targets) + Final weight matrix with feature names as index. + + w0 : np.ndarray of shape (n_targets,) + Intercept vector for the final model. + + ratio : float + Sparsity ratio (non-zero elements / total) of the final weight matrix. + + df : pandas.DataFrame + Cross-validation summary with columns: ["Lambda", "AvgMSE", "AvgSparsity"] + """ + + start_all = time.time() + kf = KFold(n_splits=num_folds, shuffle=True, random_state=random_state) + results = [] # Store (lambda, avg MSE, avg sparsity) + mse_matrix = [] # Store fold-wise MSEs for statistical testing + + for lam in lam_list: + fold_mse = [] + fold_sparsity = [] + + for train_idx, test_idx in kf.split(Fe): + # Split folds + X_train, X_test = X[train_idx], X[test_idx] + Fe_train, Fe_test = Fe[train_idx], Fe[test_idx] + + # Standardize Fe and center X (targets) + Fe_train, Fe_test = standardize_features(Fe_train, Fe_test) + X_train, X_test = center_targets(X_train, X_test) + + Fe_train_tensor = torch.tensor(Fe_train).to("cuda") + X_train_tensor = torch.tensor(X_train).to("cuda") + + # Run BIOT on training fold + R_, W_, w0_, ratio = BIOT( + X=Fe_train_tensor, Y=X_train_tensor, lam=lam, + rotation=rotation, R=R, + fit_intercept=fit_intercept + ) + + W_cpu = W_.cpu().numpy() + w0_cpu = w0_.cpu().numpy() + R_cpu = R_.cpu().numpy() + + # Predict on test fold + intercept = np.tile(w0_cpu, (Fe_test.shape[0], 1)) + X_pred = (intercept + Fe_test @ W_cpu) @ R_cpu.T + + # Compute MSE (scaled by 1/2 for consistency with BIOT loss) + mse = mean_squared_error(X_test, X_pred) / 2 + fold_mse.append(mse) + fold_sparsity.append(ratio) + + # Store average results + avg_mse = np.mean(fold_mse) + avg_ratio = np.mean(fold_sparsity) + results.append((lam, avg_mse, avg_ratio)) + mse_matrix.append(fold_mse) + + end_all = time.time() + print(f"CV BIOT completed in {end_all - start_all:.2f}s") + + # Cross-validation summary + df = pd.DataFrame(results, columns=["Lambda", "AvgMSE", "AvgSparsity"]) + search_array = np.array(mse_matrix).T + print("Search array (fold x lambda):") + print(search_array) + + # Statistical test: select sparsest model not significantly worse than best + mean_mse_per_lambda = search_array.mean(axis=0) + min_idx = mean_mse_per_lambda.argmin() + minimum_mse = search_array[:, min_idx] + pval = 0 + best_idx = 0 + + for idx in range(min_idx + 1, len(lam_list)): + current_mse = search_array[:, idx] + pval = 1 + if np.sum(minimum_mse - np.abs(current_mse)) != 0: + result = mannwhitneyu(minimum_mse, current_mse, alternative='two-sided', method='exact', use_continuity=True) + pval = result[1] + print(f"{idx}\t{pval:.4f}") + if pval < 0.05: + break + best_idx = idx + print(f"The lambda value that produces the sparsest model not significantly different from the model with the lowest mse is at index: {best_idx}") + + # Fit final model using best lambda on full data + start_final = time.time() + Fe_norm, _= standardize_features(Fe, Fe) + X_norm, _ = center_targets(X, X) + X_norm = torch.tensor(X_norm).to("cuda") + Fe_norm = torch.tensor(Fe_norm).to("cuda") + + R, W, w0, ratio = BIOT( + X=Fe_norm, Y=X_norm, lam=lam_list[best_idx], + max_iter=200, eps=1e-6, + rotation=rotation, R=None, + fit_intercept=fit_intercept + ) + + end_final = time.time() + print(f"Final BIOT time: {end_final - start_final:.2f}s") + + return best_idx, R, W, w0, ratio, df + +######################################################################################################## +parser = argparse.ArgumentParser() +parser.add_argument('-e','--embeddingFile', help='csv file containing the embeddings to be predicted', default="embeddings.csv") +parser.add_argument('-p','--predictorFile', help='csv file containing the predictors to use to predict the embeddings', default="features.csv") +parser.add_argument('-d','--dataPath', help='common path for the embedding and predictor files', default="/raid/home/rajivratn/hemant_rajivratn/ets/BIOT/gpu/datasets/") +parser.add_argument('-o','--outputPath', help='path to location in which to store the output files', default="./output") +parser.add_argument('-s','--suffix', help='suffix to identify output files from those in other runs', default="best") +parser.add_argument('-l','--numLambdas', help='number of lamba values to calculate', type=int, default=10) +parser.add_argument('-m','--min', help='minimum lambda value', type=float, default=.0001) +parser.add_argument('-x','--max', help='maximum lambda value', type=float, default=3.5) +parser.add_argument('-f','--numFolds', help='maximum lambda value', type=int, default=10) +parser.add_argument('-a','--random_state', help='random state to use', type=int, default=1) +parser.add_argument('-c','--scoring', help='scoring algorithm to use in CV', default='neg_mean_squared_error') +parser.add_argument('-r','--rotation', help='Force rotation not orthogonal transformation', action="store_true") +parser.add_argument('-t','--fit_intercept', help='Force lasso to fit intercept', action="store_true") + +args = parser.parse_args() + +embeddingPath = f"{args.dataPath}/{args.embeddingFile}" +predictorPath = f"{args.dataPath}/{args.predictorFile}" +X = pd.read_csv(embeddingPath,header=None).values +Fe = pd.read_csv(predictorPath,header=0).values + +lam_list = np.geomspace(args.min, args.max, args.numLambdas) +print('Lambda values:') +print(lam_list) + +random_state = 1 +R = None + +if isinstance(Fe, pd.DataFrame): + feature_names = Fe.columns +else: + feature_names = np.array(range(Fe.shape[1])) + +best_idx, R, W, w0, ratio, df = CV_BIOT (Fe, X, feature_names, lam_list, fit_intercept = args.fit_intercept, num_folds=args.numFolds, random_state = args.random_state, R = R, rotation = args.rotation, scoring = args.scoring) + +Weights = pd.DataFrame(W.cpu().numpy()) +Weights.index = feature_names +weightfile = f"{args.outputPath}/weights_{args.suffix}.csv" +Weights.to_csv(weightfile) + +rotationfile = f"{args.outputPath}/rotation_{args.suffix}.csv" +Rotation = pd.DataFrame(R.cpu().numpy()) +Rotation.to_csv(rotationfile) + +interceptfile = f"{args.outputPath}/intercepts_{args.suffix}.csv" +Intercepts = pd.DataFrame(w0.cpu().numpy()) +Intercepts.to_csv(interceptfile) + +dffile = f"{args.outputPath}/avgMSE_{args.suffix}.csv" +df.to_csv(dffile, index=False) +print(f"Sparsity ratio: {ratio:.4f}") + +# python3 src/BIOT/BIOT_GPU.py -e mite_6d_embedding.csv -p mite_F.csv -d test/inputs/Mite -o test/outputs -s mite_6d_fasta_fixed -l 20 -m 0.0001 -f 5 -x 3.5 -a 42 -c neg_mean_squared_error | tee mite_6d_fasta_fixed.txt + \ No newline at end of file diff --git a/src/BIOT/Modified_code/BIOT_GPU_modified.py b/src/BIOT/Modified_code/BIOT_GPU_modified.py deleted file mode 100644 index f82e939..0000000 --- a/src/BIOT/Modified_code/BIOT_GPU_modified.py +++ /dev/null @@ -1,296 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Functions and classes for the BIOT module -""" -import warnings -warnings.filterwarnings("ignore") - -import time -import pandas as pd -import numpy as np -import torch -from TorchL1 import TorchL1 -from sklearn.metrics import mean_squared_error -from sklearn.linear_model import Lasso -from sklearn.preprocessing import StandardScaler -from sklearn.compose import TransformedTargetRegressor -import math -from scipy.stats import mannwhitneyu - -from sklearn.base import BaseEstimator, RegressorMixin -from sklearn.utils.validation import check_array, check_is_fitted - -from sklearn.model_selection import KFold -from sklearn.model_selection import GridSearchCV -from sklearn.pipeline import Pipeline - -from sklearn.preprocessing import StandardScaler -import argparse - -############################################################################### -# Functions and classes -def Get_W_Lasso (X, Y, lam, fit_intercept = False, mode='cuda'): - device = torch.device("cuda") - - k = Y.shape[1] - d = X.shape[1] - w0 = torch.zeros(k).double().to(device) - - if fit_intercept: - print(f"Do not support this as of now.") - exit() - print('.', end='', flush=True) - - model = TorchL1(alpha=lam, max_iter=5000, tol=1e-6) - model.coef_ = torch.zeros(d, k).double().to(device) - model.alpha *= Y.shape[0] - model.fit(X=X, y=Y) - - return model.coef_, w0 - -def calc_max_lam (X, Y): - - n = X.shape[0] - sc = StandardScaler() - X_norm = sc.fit_transform(X) - sc = StandardScaler(with_std=False) - Y_norm = sc.fit_transform(Y) - max_lam = np.max(np.absolute(X_norm.T @ Y_norm))/n - - return max_lam - -def normalize(X, mean, std): - return (X - mean) / std - -def Global_L1_Norm (W): - - k = W.shape[1] - norm_val = 0 - for dim_index in range(k): - norm_val += torch.norm(W[:, dim_index], p=1) - - return norm_val - -def BIOT_Crit (X, Y, R, W, w0, lam): - - n = X.shape[0] - diffs = (Y @ R) - (w0.unsqueeze(0).repeat(n, 1) + (X @ W)) - LS = torch.norm(diffs)**2 - L1 = Global_L1_Norm(W) - crit = ((1 / (2 * n)) * LS) + (lam * L1) - - return crit - -def BIOT (X, Y, lam, max_iter = 200, eps = 1e-6, rotation = False, R = None, fit_intercept = False, mode='cuda'): - print('lambda: ' + str(lam)) - d = X.shape[1] - n = X.shape[0] - lam_norm = lam/np.sqrt(d) - - # If R is provided, get Lasso solution only - if R is not None: - YR = Y @ R - W, w0 = Get_W_Lasso(X = X, Y = YR, lam = lam_norm, fit_intercept=fit_intercept, mode=mode) - - # Otherwise, run BIOT iterations - else: - W, w0 = Get_W_Lasso(X = X, Y = Y, lam = lam_norm, fit_intercept=fit_intercept, mode=mode) - - diff = math.inf - iter_index = 0 - crit_list = [math.inf] - - while iter_index < max_iter and diff > eps: - - De = (1/(2*n)) * Y.T @ (w0.unsqueeze(0).repeat(n, 1) + (X @ W)) - U, S, Vh = torch.linalg.svd(De) - - # rotation matrix desired (counterclockwise) - if rotation: - sv = S - smallest = torch.argmin(sv) - sv = sv.clone() - sv[smallest] = torch.sign(torch.det(torch.mm(U, Vh.T))) - sv[sv != sv[smallest]] = 1 - - # Construct the rotation matrix - R = torch.mm(U, torch.mm(torch.diag(sv), Vh.T)) - else: - R = U @ Vh - - YR = Y @ R - W, w0 = Get_W_Lasso(X = X, Y = YR, lam = lam_norm, fit_intercept = fit_intercept, mode=mode) - - crit_list.append(BIOT_Crit(X = X, Y = Y, R = R, W = W, w0 = w0, lam = lam_norm)) - diff = torch.absolute(crit_list[iter_index] - crit_list[iter_index + 1]) - iter_index += 1 - - num_nonzero = (W != 0).sum().item() - total_elements = W.numel() - ratio = num_nonzero / total_elements - - return R, W, w0, ratio - -def standardize_features(train, test): - mean = train.mean(axis=0) - std = train.std(axis=0, ddof=1) - std[std == 0] = 1.0 - train_std = (train - mean) / std - test_std = (test - mean) / std - return train_std, test_std - -def center_targets(train, test): - mean = train.mean(axis=0) - train_centered = train - mean - test_centered = test - mean - return train_centered, test_centered - -def CV_BIOT(Fe, X, feature_names, lam_list, fit_intercept=False, - num_folds=10, random_state=1, R=None, rotation=False, scoring='neg_mean_squared_error', mode='cuda'): - - start_all = time.time() - - kf = KFold(n_splits=num_folds, shuffle=True, random_state=random_state) - results = [] - mse_matrix = [] - - for lam in lam_list: - fold_mse = [] - fold_sparsity = [] - - for train_idx, test_idx in kf.split(Fe): - X_train = X[train_idx, :] - X_test = X[test_idx, :] - Fe_train = Fe[train_idx, :] - Fe_test = Fe[test_idx, :] - - Fe_train, Fe_test = standardize_features(Fe_train, Fe_test) - X_train, X_test = center_targets(X_train, X_test) - - Fe_train_tensor = torch.tensor(Fe_train).to(mode) - X_train_tensor = torch.tensor(X_train).to(mode) - - R_, W_, w0_, ratio = BIOT( - X=Fe_train_tensor, Y=X_train_tensor, lam=lam, rotation=rotation, R=R, - fit_intercept=fit_intercept, mode=mode) - - W_cpu = W_.cpu().numpy() - w0_cpu = w0_.cpu().numpy() - R_cpu = R_.cpu().numpy() - - intercept = np.tile(w0_cpu, (Fe_test.shape[0], 1)) - X_pred = (intercept + Fe_test @ W_cpu) @ R_cpu.T - - mse = mean_squared_error(X_test, X_pred) / 2 - fold_mse.append(mse) - fold_sparsity.append(ratio) - - avg_mse = np.mean(fold_mse) - avg_ratio = np.mean(fold_sparsity) - results.append((lam, avg_mse, avg_ratio)) - mse_matrix.append(fold_mse) - - end_all = time.time() - print(f"CV BIOT completed in {end_all - start_all:.2f}s") - - df = pd.DataFrame(results, columns=["Lambda", "AvgMSE", "AvgSparsity"]) - search_array = np.array(mse_matrix).T - print("Search array (fold x lambda):") - print(search_array) - # best_idx = df["AvgMSE"].idxmin() - - # Wilcoxon rank-sum test - mean_mse_per_lambda = search_array.mean(axis=0) - min_idx = mean_mse_per_lambda.argmin() - minimum_mse = search_array[:, min_idx] - pval = 0 - best_idx = 0 - - for idx in range(min_idx + 1, len(lam_list)): - current_mse = search_array[:, idx] - pval = 1 - if np.sum(minimum_mse - np.abs(current_mse)) != 0: - result = mannwhitneyu(minimum_mse, current_mse, alternative='two-sided', method='exact', use_continuity=True) - pval = result[1] - print(f"{idx}\t{pval:.4f}") - if pval < 0.05: - break - best_idx = idx - - print(f"The lambda value that produces the sparsest model not significantly different from the model with the lowest mse is at index: {best_idx}") - - # Final BIOT on full data - start_final = time.time() - Fe_norm, _= standardize_features(Fe, Fe) - X_norm, _ = center_targets(X, X) - - X_norm = torch.tensor(X_norm).to(mode) - Fe_norm = torch.tensor(Fe_norm).to(mode) - - R, W, w0, ratio= BIOT (Fe_norm, X_norm, lam_list[best_idx], max_iter = 1, eps = 1e-6, rotation = False, R = None, fit_intercept = False,mode='gpu') - - end_final = time.time() - print(f"Final BIOT time: {end_final - start_final:.2f}s") - - return best_idx, R, W, w0, ratio, df - -######################################################################################################## -parser = argparse.ArgumentParser() -parser.add_argument('-e','--embeddingFile', help='csv file containing the embeddings to be predicted', default="embeddings.csv") -parser.add_argument('-p','--predictorFile', help='csv file containing the predictors to use to predict the embeddings', default="features.csv") -parser.add_argument('-d','--dataPath', help='common path for the embedding and predictor files', default="/raid/home/rajivratn/hemant_rajivratn/ets/BIOT/gpu/datasets/") -parser.add_argument('-o','--outputPath', help='path to location in which to store the output files', default="./output") -parser.add_argument('-s','--suffix', help='suffix to identify output files from those in other runs', default="best") -parser.add_argument('-l','--numLambdas', help='number of lamba values to calculate', type=int, default=10) -parser.add_argument('-m','--min', help='minimum lambda value', type=float, default=.0001) -parser.add_argument('-x','--max', help='maximum lambda value', type=float, default=3.5) -parser.add_argument('-f','--numFolds', help='maximum lambda value', type=int, default=10) -parser.add_argument('-a','--random_state', help='random state to use', type=int, default=1) -parser.add_argument('-c','--scoring', help='scoring algorithm to use in CV', default='neg_mean_squared_error') -parser.add_argument('-r','--rotation', help='Force rotation not orthogonal transformation', action="store_true") -parser.add_argument('-t','--fit_intercept', help='Force lasso to fit intercept', action="store_true") -parser.add_argument('-u','--mode', help='Processing mode (cpu or gpu)', default='cuda') - -args = parser.parse_args() - -embeddingPath = f"{args.dataPath}/{args.embeddingFile}" -predictorPath = f"{args.dataPath}/{args.predictorFile}" -Y = pd.read_csv(embeddingPath,header=None).values -X = pd.read_csv(predictorPath,header=0).values - -print(f"Check on big dataset") -print('Embedding shape:', Y.shape) -print('Predictor shape:', X.shape) - -lam_list = np.geomspace(args.min, args.max, args.numLambdas) -print('Lambda values:') -print(lam_list) - -random_state = 1 -R = None - -if isinstance(X, pd.DataFrame): - feature_names = X.columns -else: - feature_names = np.array(range(X.shape[1])) - -best_idx, R, W, w0, ratio, df = CV_BIOT (X, Y, feature_names, lam_list, fit_intercept = args.fit_intercept, num_folds=args.numFolds, random_state = args.random_state, R = R, rotation = args.rotation, scoring = args.scoring, mode=args.mode) - -Weights = pd.DataFrame(W.cpu().numpy()) -Weights.index = feature_names -weightfile = f"{args.outputPath}/weights_{args.suffix}.csv" -Weights.to_csv(weightfile) -print(f"Sparsity ratio: {ratio:.4f}") - -rotationfile = f"{args.outputPath}/rotation_{args.suffix}.csv" -Rotation = pd.DataFrame(R.cpu().numpy()) -Rotation.to_csv(rotationfile) - -interceptfile = f"{args.outputPath}/intercepts_{args.suffix}.csv" -Intercepts = pd.DataFrame(w0.cpu().numpy()) -Intercepts.to_csv(interceptfile) - -dffile = f"{args.outputPath}/avgMSE_{args.suffix}.csv" -df.to_csv(dffile, index=False) - diff --git a/src/BIOT/Modified_code/TorchL1.py b/src/BIOT/Modified_code/TorchL1.py index 716833f..f3cc433 100644 --- a/src/BIOT/Modified_code/TorchL1.py +++ b/src/BIOT/Modified_code/TorchL1.py @@ -1,6 +1,6 @@ import torch - + #@torch.jit.script class TorchL1: def __init__(self, alpha=1.0, max_iter=1000, tol=1e-6): @@ -32,15 +32,58 @@ def fit(self, X, y): def _fasta(self, X, Y, lam, W, tau1=10.0, max_iter=1000, w=10, backtrack=True, record_iterates=False, stepsize_shrink=0.5, eps_n=1e-15, tol=1e-10): - n, p = X.shape - _, m = Y.shape - device = X.device + """ + Solves a multi-target Lasso problem using the FASTA algorithm (Forward-Backward Splitting with Adaptive step size). + + Objective: + min_B (1/2) * ||X B - Y||_F^2 + lam * ||B||_1 + + Parameters + ---------- + X : torch.Tensor of shape (n_samples, n_features) + Input feature matrix. + + Y : torch.Tensor of shape (n_samples, n_targets) + Target matrix for regression. + + lam : float + L1 penalty (sparsity control). + + W : torch.Tensor of shape (n_features, n_targets) + Initialization of the coefficient matrix. + + tau1 : float, optional (default=10.0) + Initial step size. + + max_iter : int, optional (default=1000) + Maximum number of iterations. + + w : int, optional (default=10) + Number of previous function values to consider for backtracking. + + backtrack : bool, optional (default=True) + Whether to use backtracking line search. + + record_iterates : bool, optional (default=False) + Whether to store the iterate history (useful for debugging/analysis). + + stepsize_shrink : float, optional (default=0.5) + Step size shrinkage factor during backtracking. + + eps_n : float, optional (default=1e-15) + Small constant to avoid division by zero. + + tol : float, optional (default=1e-10) + Convergence tolerance based on relative parameter change. + """ - # x0 = torch.zeros((p, m), device=device) + # Initialize variables + _, p = X.shape x0 = W x1 = x0.clone() d1 = x1.clone() - + + # Define loss and proximal operators f = lambda B: 0.5 * torch.norm(X @ B - Y, p='fro')**2 gradf = lambda B: X.T @ (X @ B - Y) g = lambda B: lam * torch.norm(B, p=1) @@ -51,14 +94,16 @@ def _fasta(self, X, Y, lam, W, tau1=10.0, max_iter=1000, w=10, taus = torch.zeros(max_iter) fvals = torch.zeros(max_iter) objective = torch.zeros(max_iter + 1) - + + # Initial objective f1 = f(d1) gradf1 = gradf(d1) fvals[0] = f1 objective[0] = f1 + g(x0) min_obj = objective[0].item() best_beta = x1.clone() - + + # Optional: record all iterates if record_iterates: iterates = torch.zeros((p, max_iter + 1)) iterates[:, 0] = x1.squeeze() @@ -69,22 +114,21 @@ def _fasta(self, X, Y, lam, W, tau1=10.0, max_iter=1000, w=10, x0 = x1.clone() gradf0 = gradf1.clone() tau0 = tau1 - # print (tau0) - + + # Forward step (gradient descent) x1hat = x0 - tau0 * gradf0 + # Backward step (proximal operator) x1 = proxg(x1hat, tau0) Dx = x1 - x0 d1 = x1.clone() f1 = f(d1) - + + # Optional backtracking line search if backtrack: if i > 0: M = torch.max(fvals[max(i - w, 0):i]) else: M = fvals[0] - # print (M) - - prev_f1 = f1 backtrack_count = 0 prop = (f1 - 1e-12 > M + torch.sum(Dx * gradf0) + 0.5 * torch.norm(Dx)**2 / tau0) @@ -99,6 +143,7 @@ def _fasta(self, X, Y, lam, W, tau1=10.0, max_iter=1000, w=10, backtrack_count += 1 prop = (f1 - 1e-12 > M + torch.sum(Dx * gradf0) + 0.5 * torch.norm(Dx)**2 / tau0) + # Store diagnostics taus[i] = tau0 residual[i] = torch.norm(Dx) / tau0 normalizer = max( @@ -108,14 +153,16 @@ def _fasta(self, X, Y, lam, W, tau1=10.0, max_iter=1000, w=10, normalized_resid[i] = residual[i].item() / normalizer fvals[i] = f1 objective[i + 1] = f1 + g(x1) - + + # Track best solution (objective-wise) if objective[i + 1] < min_obj: best_beta = x1.clone() min_obj = objective[i + 1].item() if record_iterates: iterates[:, i + 1] = x1.squeeze() - + + # Update step size using Barzilai–Borwein rule gradf1 = gradf(d1) Dg = gradf1 + (x1hat - x0) / tau0 dotprod = torch.sum(Dx * Dg) @@ -130,7 +177,8 @@ def _fasta(self, X, Y, lam, W, tau1=10.0, max_iter=1000, w=10, tau1 = tau_m if 2 * tau_m > tau_s else tau_s - 0.5 * tau_m if tau1 <= 0 or not torch.isfinite(torch.as_tensor(tau1)): tau1 = tau0 * 1.5 - + + # Check for convergence if torch.max(torch.abs(Dx)) < tol * (torch.max(torch.abs(x1))): break @@ -138,19 +186,40 @@ def _fasta(self, X, Y, lam, W, tau1=10.0, max_iter=1000, w=10, def _fista(self, X, Y, lam, W, max_iter=1000, tol=1e-10): """ - FISTA algorithm for L1-regularized regression (multi-output). - + FISTA algorithm for solving the L1-regularized least squares problem (multi-target Lasso). + + Solves the following optimization problem: + min_W (1/2n) * ||XW - Y||_F^2 + lam * ||W||_1 + + using the Fast Iterative Shrinkage-Thresholding Algorithm (FISTA), which accelerates + proximal gradient descent with a momentum term. + Parameters ---------- - X : torch.Tensor, shape (n_samples, n_features) - Y : torch.Tensor, shape (n_samples, n_targets) + X : torch.Tensor of shape (n_samples, n_features) + Design matrix. + + Y : torch.Tensor of shape (n_samples, n_targets) + Target matrix for regression. + lam : float - L1 regularization strength. - W : torch.Tensor, shape (n_features, n_targets) - Initial weights. + L1 regularization strength (controls sparsity). + + W : torch.Tensor of shape (n_features, n_targets) + Initial weight matrix (used as warm start). + + max_iter : int, optional (default=1000) + Maximum number of FISTA iterations. + + tol : float, optional (default=1e-10) + Tolerance for convergence. Optimization stops when relative change is small. + + Returns + ------- + None + The solution is stored in `self.coef_`, a tensor of shape (n_features, n_targets). """ - n, a = X.shape - _, b = Y.shape + n, _ = X.shape # Estimate Lipschitz constant L = ||X||^2 L = torch.linalg.norm(X, ord=2).item() ** 2