Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 0 additions & 33 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,39 +46,6 @@ jobs:
- name: Check unit test code coverage
run: pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml codecov

ci_osx-64:
name: osx-64 CI
runs-on: macos-13
strategy:
fail-fast: false
matrix:
environment: ["r43", "r44"]

steps:
- name: Checkout pull request branch
uses: actions/checkout@v5
with:
fetch-depth: 0

- name: Create TOML from recipe
run: |
.github/workflows/create_toml_from_yaml.sh ${GITHUB_WORKSPACE}
mkdir /tmp/pixi
mv ${GITHUB_WORKSPACE}/pixi.toml /tmp/pixi

- name: Setup pixi
uses: prefix-dev/setup-pixi@v0.9.2
with:
manifest-path: /tmp/pixi/pixi.toml

- name: Run unit tests
run: pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml devtools_test

- name: Run R CMD CHECK
run: |
pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml build
pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml rcmdcheck

ci_osx-arm64:
name: osx-arm64 CI
runs-on: macos-14
Expand Down
33 changes: 31 additions & 2 deletions R/colocboost.R
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,17 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
return(xx)
})
}


# Remove duplicates and report: if duplicate columns of X
X <- lapply(seq_along(X), function(i) {
df <- X[[i]]
if (anyDuplicated(colnames(df))) {
message(paste("Removed duplicate columns from X matrix ", i))
df[, !duplicated(colnames(df)), drop = FALSE]
} else {
df
}
})
keep_variable_individual <- lapply(X, colnames)
if (!is.list(X) & !is.list(Y)) {
warning("Error: Input X and Y must be the list containing genotype matrics and all phenotype vectors!")
Expand Down Expand Up @@ -333,11 +343,19 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
warning("Error: effect_est and effect_se should be the same dimension! Please check!")
return(NULL)
}
variables <- rownames(effect_est)
effect_est <- as.matrix(effect_est)
effect_se <- as.matrix(effect_se)
variables <- rownames(effect_est)
if (is.null(variables)) {
variables <- paste0("variant_", 1:nrow(effect_est))
} else {
# add - if duplciates occurs, only keep one
if (anyDuplicated(variables)) {
keep_idx <- !duplicated(variables)
variables <- variables[keep_idx]
effect_est <- effect_est[keep_idx, , drop = FALSE]
effect_se <- effect_se[keep_idx, , drop = FALSE]
}
}
sumstat <- list()
for (sum_iy in 1:ncol(effect_est)) {
Expand Down Expand Up @@ -388,6 +406,17 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
})
}
}

# Remove duplicates and report: if duplicate variant in summary statistics
sumstat <- lapply(seq_along(sumstat), function(i) {
xx <- sumstat[[i]]
if (anyDuplicated(xx$variant)) {
message(paste("Removed duplicate variants from sumstat", i))
xx[!duplicated(xx$variant), , drop = FALSE]
} else {
xx
}
})
keep_variable_sumstat <- lapply(sumstat, function(xx) {
xx$variant
})
Expand Down
6 changes: 5 additions & 1 deletion R/colocboost_assemble.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ colocboost_assemble <- function(cb_obj,
)
# - save model and all coloc and single information for diagnostic
if (output_level != 1) {
tmp <- get_full_output(cb_obj = cb_obj, past_out = NULL, variables = NULL)
tmp <- get_full_output(cb_obj = cb_obj, past_out = NULL, variables = data_info$variables)
if (output_level == 2) {
cb_output <- c(cb_output, list("ucos_details" = tmp$ucos_detials))
cb_output <- cb_output[c("cos_summary", "vcp", "cos_details", "data_info", "model_info", "ucos_details")]
Expand All @@ -79,6 +79,7 @@ colocboost_assemble <- function(cb_obj,
} else if (cb_obj$cb_model_para$model_used == "one_causal"){
# fixme later
check_null_max <- check_null_max * check_null
check_null_max_ucos <- check_null_max_ucos * check_null
}
cb_obj <- get_max_profile(cb_obj, check_null_max = check_null_max,
check_null_max_ucos = check_null_max_ucos,
Expand Down Expand Up @@ -209,6 +210,9 @@ colocboost_assemble <- function(cb_obj,
# - colocalization results
cb_obj$cb_model_para$weight_fudge_factor <- weight_fudge_factor
cb_obj$cb_model_para$coverage <- coverage
cb_obj$cb_model_para$min_abs_corr <- min_abs_corr
cb_obj$cb_model_para$median_abs_corr <- median_abs_corr
cb_obj$cb_model_para$n_purity <- n_purity
cos_results <- get_cos_details(cb_obj, coloc_out = past_out$cos$cos, data_info = data_info)
cb_output <- list(
"vcp" = cos_results$vcp,
Expand Down
91 changes: 60 additions & 31 deletions R/colocboost_check_update_jk.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,15 @@ boost_check_update_jk_nofocal <- function(cb_model, cb_model_para, cb_data) {

# -- define jk and jk_each
cor_vals_each <- do.call(cbind, lapply(model_update, function(cc) cc$correlation))
abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`)
# abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`)
abs_cor_vals_each <- abs(cor_vals_each) * rep(adj_dep, each = nrow(cor_vals_each))
abs_cor_vals <- rowSums(abs_cor_vals_each)

jk <- which(abs_cor_vals == max(abs_cor_vals))
jk <- ifelse(length(jk) == 1, jk, sample(jk, 1))
jk_each <- apply(abs_cor_vals_each, 2, function(temp) {
jk_temp <- which(temp == max(temp))
return(ifelse(length(jk_temp) == 1, jk_temp, sample(jk_temp, 1)))
})
# jk <- which(abs_cor_vals == max(abs_cor_vals))
# jk <- ifelse(length(jk) == 1, jk, sample(jk, 1))
# jk_each <- apply(abs_cor_vals_each, 2, which.max)
jk <- which.max(abs_cor_vals)
jk_each <- sapply(1:ncol(abs_cor_vals_each), function(j) which.max(abs_cor_vals_each[, j]))
update_jk[c(1, pos.update + 1)] <- c(jk, jk_each)


Expand Down Expand Up @@ -142,7 +142,7 @@ boost_check_update_jk_nofocal <- function(cb_model, cb_model_para, cb_data) {
update_status[pos.update[true_update]] <- 1
# - re-define jk_star
abs_cor_vals_redefine <- rowSums(abs_cor_vals_each[, true_update, drop = FALSE])
jk_redefine <- which(abs_cor_vals_redefine == max(abs_cor_vals_redefine))
jk_redefine <- which.max(abs_cor_vals_redefine == max(abs_cor_vals_redefine))
jk_redefine <- ifelse(length(jk_redefine) == 1, jk_redefine, sample(jk_redefine, 1))
real_update_jk[pos.update[true_update]] <- jk_redefine
} else {
Expand Down Expand Up @@ -351,11 +351,10 @@ boost_check_update_jk_focal <- function(cb_model, cb_model_para, cb_data,

# -- define jk and jk_each
cor_vals_each <- do.call(cbind, lapply(model_update, function(cc) as.vector(cc$correlation)))
abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`)
jk_each <- apply(abs_cor_vals_each, 2, function(temp) {
jk_temp <- which(temp == max(temp))
return(ifelse(length(jk_temp) == 1, jk_temp, sample(jk_temp, 1)))
})
# abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`)
abs_cor_vals_each <- abs(cor_vals_each) * rep(adj_dep, each = nrow(cor_vals_each))
# jk_each <- apply(abs_cor_vals_each, 2, which.max)
jk_each <- sapply(1:ncol(abs_cor_vals_each), function(j) which.max(abs_cor_vals_each[, j]))
pp_focal <- which(pos.update == focal_outcome_idx)
jk_focal <- jk_each[pp_focal]

Expand Down Expand Up @@ -503,8 +502,8 @@ get_LD_jk1_jk2 <- function(jk1, jk2,
if (length(XtX) == 1){
LD_temp <- 0
} else {
jk1.remain <- which(remain_jk == jk1)
jk2.remain <- which(remain_jk == jk2)
jk1.remain <- match(jk1, remain_jk)
jk2.remain <- match(jk2, remain_jk)
LD_temp <- XtX[jk1.remain, jk2.remain]
}
}
Expand Down Expand Up @@ -545,29 +544,59 @@ check_pair_jkeach <- function(jk_each,
cb_data, X_dict,
jk_equiv_corr = 0.8,
jk_equiv_loglik = 0.001) {


#' @importFrom stats cor
get_LD_jk_each <- function(jk_each,
X = NULL, XtX = NULL, N = NULL,
remain_jk = NULL) {
if (!is.null(X)) {
LD_temp <- suppressWarnings({
get_cormat(X[, jk_each])
})
LD_temp[which(is.na(LD_temp))] <- 0
# LD_temp <- LD_temp[1, 2]
} else if (!is.null(XtX)) {
if (length(XtX) == 1){
LD_temp <- matrix(0, length(jk_each), length(jk_each))
} else {
jk.remain <- match(jk_each, remain_jk)
LD_temp <- XtX[jk.remain, jk.remain]
LD_temp[which(is.na(LD_temp))] <- 0
}
}
return(LD_temp)
}

detect_func <- function(idx, LD_all, jk_i, jk_j, i, j){
change_log_jk_i <- model_update[[idx]]$change_loglike[jk_i]
change_log_jk_j <- model_update[[idx]]$change_loglike[jk_j]
change_each <- abs(change_log_jk_i - change_log_jk_j)
LD_temp <- LD_all[[idx]][i, j]
return((change_each <= jk_equiv_loglik) & (abs(LD_temp) >= jk_equiv_corr))
}

data_update <- cb_data$data[pos.update]
LD_all <- lapply(1:length(jk_each), function(idx){
get_LD_jk_each(jk_each,
X = cb_data$data[[X_dict[idx]]]$X,
XtX = cb_data$data[[X_dict[idx]]]$XtX,
N = data_update[[idx]]$N,
remain_jk = setdiff(1:length(model_update[[idx]]$res), data_update[[idx]]$variable_miss)
)
})

# -- check if jk_i ~ jk_j
change_each_pair <- matrix(FALSE, nrow = length(jk_each), ncol = length(jk_each))
for (i in 1:length(jk_each)) {

jk_i <- jk_each[i]
change_log_jk_i <- model_update[[i]]$change_loglike[jk_i]
for (j in 1:length(jk_each)) {
for (j in i:length(jk_each)) {
if (j != i) {
jk_j <- jk_each[j]

if (!(jk_i %in% data_update[[i]]$variable_miss) & !(jk_j %in% data_update[[i]]$variable_miss)) {
change_log_jk_j <- model_update[[i]]$change_loglike[jk_j]
change_each <- abs(change_log_jk_i - change_log_jk_j)
LD_temp <- get_LD_jk1_jk2(jk_i, jk_j,
X = cb_data$data[[X_dict[i]]]$X,
XtX = cb_data$data[[X_dict[i]]]$XtX,
N = data_update[[i]]$N,
remain_jk = setdiff(1:length(model_update[[i]]$res), data_update[[i]]$variable_miss)
)
change_each_pair[i, j] <- (change_each <= jk_equiv_loglik) & (abs(LD_temp) >= jk_equiv_corr)
} else {
change_each_pair[i, j] <- FALSE
change_each_pair[i, j] <- detect_func(idx = i, LD_all, jk_i, jk_j, i, j)
# if jk_i and jk_j are equivalent on dataset i, then we don't need to check dataset j
if ( !change_each_pair[i, j] ){
change_each_pair[j, i] <- detect_func(idx = j, LD_all, jk_i, jk_j, i, j)
}
} else {
change_each_pair[i, j] <- FALSE
Expand Down
20 changes: 13 additions & 7 deletions R/colocboost_inference.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,8 @@ get_modularity <- function(Weight, B) {

if (m == 0) return(0)

cate <- B %*% t(B)
# cate <- B %*% t(B)
cate <- tcrossprod(B)

if (m_pos == 0 & m_neg == 0) return(0)

Expand Down Expand Up @@ -190,12 +191,12 @@ get_n_cluster <- function(hc, Sigma, m = ncol(Sigma), min_cluster_corr = 0.8) {
#' @noRd
w_purity <- function(weights, X = NULL, Xcorr = NULL, N = NULL, n = 100, coverage = 0.95,
min_abs_corr = 0.5, median_abs_corr = NULL, miss_idx = NULL) {
tmp <- apply(weights, 2, w_cs, coverage = coverage)
tmp_purity <- apply(tmp, 2, function(tt) {
pos <- which(tt == 1)

tmp_purity <- apply(weights, 2, function(w) {
pos <- w_cs(w, coverage = coverage)
# deal with missing snp here
if (!is.null(Xcorr)) {
pos <- match(pos, setdiff(1:length(tmp), miss_idx))
pos <- match(pos, setdiff(1:length(w), miss_idx))
}
get_purity(pos, X = X, Xcorr = Xcorr, N = N, n = n)
})
Expand Down Expand Up @@ -373,7 +374,12 @@ check_null_post <- function(cb_obj,
}
if (!weaker_effect) {
check_cs_change <- cs_change
check_null_tmp <- sapply(1:cb_obj$cb_model_para$L, function(j) cb_obj$cb_model[[j]]$check_null_max)
if (cb_obj$cb_model_para$L == 1){
check_null_max_tmp <- cb_obj$cb_model[[j]]$check_null_max_ucos
} else {
check_null_max_tmp <- cb_obj$cb_model[[j]]$check_null_max
}
check_null_tmp <- sapply(1:cb_obj$cb_model_para$L, function(j) check_null_max_tmp)
} else {
check_null_tmp <- rep(check_null, cb_obj$cb_model_para$L)
}
Expand Down Expand Up @@ -420,7 +426,7 @@ get_purity <- function(pos, X = NULL, Xcorr = NULL, N = NULL, n = 100) {
corr[which(is.na(corr))] <- 0
value <- abs(get_upper_tri(corr))
} else {
if (sum(Xcorr) == 1){
if (length(Xcorr) == 1){
value <- 0
} else {
Xcorr <- Xcorr # if (!is.null(N)) Xcorr/(N-1) else Xcorr
Expand Down
5 changes: 3 additions & 2 deletions R/colocboost_init.R
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ colocboost_init_model <- function(cb_data,
tmp <- list(
"res" = NULL,
"beta" = rep(0, P),
"weights_path" = c(),
# "weights_path" = c(),
"weights_path" = list(),
"profile_loglike_each" = NULL,
"obj_path" = 999999,
"obj_single" = 999999,
Expand Down Expand Up @@ -619,7 +620,7 @@ process_sumstat <- function(Z, N, Var_y, SeBhat, ld_matrices, variant_lists, dic
Z_extend[pos_target] <- current_z[pos_z]

# Calculate submatrix for each unique entry (not duplicates)
if (sum(current_ld_matrix) == 1){
if (length(current_ld_matrix) == 1){
ld_submatrix <- current_ld_matrix
} else {
ld_submatrix <- NULL
Expand Down
10 changes: 4 additions & 6 deletions R/colocboost_one_causal.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,8 @@ colocboost_one_iteration <- function(cb_model, cb_model_para, cb_data) {
cb_model_para[rm_elements] <- NULL
for (i in 1:length(cb_model)) {
cb_model[[i]]$obj_path <- as.numeric(unlist(cb_model[[i]]$obj_path[-1]))
}
for (i in 1:length(cb_model)) {
cb_model[[i]]$obj_single <- as.numeric(unlist(cb_model[[i]]$obj_single[-1]))
cb_model[[i]]$weights_path <- do.call(rbind, cb_model[[i]]$weights_path)
}

cb_obj <- list("cb_data" = cb_data, "cb_model" = cb_model, "cb_model_para" = cb_model_para)
Expand Down Expand Up @@ -238,7 +237,7 @@ colocboost_diagLD <- function(cb_model, cb_model_para, cb_data) {
cb_model_para,
cb_data
)
weights <- rbind(weights, cb_model_tmp[[iy]]$weights_path)
weights <- rbind(weights, unlist(cb_model_tmp[[iy]]$weights_path))
}
###### overlap weights
overlap_pair <- check_pair_overlap(weights, coverage = 0.95)
Expand Down Expand Up @@ -307,11 +306,10 @@ colocboost_diagLD <- function(cb_model, cb_model_para, cb_data) {
cb_model_para[rm_elements] <- NULL
for (i in 1:length(cb_model)) {
cb_model[[i]]$obj_path <- as.numeric(unlist(cb_model[[i]]$obj_path[-1]))
}
for (i in 1:length(cb_model)) {
cb_model[[i]]$obj_single <- as.numeric(unlist(cb_model[[i]]$obj_single[-1]))
cb_model[[i]]$weights_path <- do.call(rbind, cb_model[[i]]$weights_path)
}

cb_obj <- list("cb_data" = cb_data, "cb_model" = cb_model, "cb_model_para" = cb_model_para)
class(cb_obj) <- "colocboost"
return(cb_obj)
Expand Down
Loading
Loading