From f294f57cfc8d9150d56ceeb4ce59ad375597e367 Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Fri, 12 Sep 2025 20:58:33 -0400 Subject: [PATCH 01/13] improve computational Using profvis to check the computational time for each part of functions. Improve on several way: 1. check_pair_jkeach: instead of checking every pair, we check the upper tri first to reduce the burden. 2. Still some LD checking issue caused the LD-free mode. 3. change matrix multiplication using tcrossprod function --- R/colocboost_check_update_jk.R | 105 ++++++++++++++++++++++++--------- R/colocboost_inference.R | 13 ++-- R/colocboost_init.R | 2 +- R/colocboost_utils.R | 20 +++++-- 4 files changed, 99 insertions(+), 41 deletions(-) diff --git a/R/colocboost_check_update_jk.R b/R/colocboost_check_update_jk.R index 623faa8..05390c8 100644 --- a/R/colocboost_check_update_jk.R +++ b/R/colocboost_check_update_jk.R @@ -66,15 +66,15 @@ boost_check_update_jk_nofocal <- function(cb_model, cb_model_para, cb_data) { # -- define jk and jk_each cor_vals_each <- do.call(cbind, lapply(model_update, function(cc) cc$correlation)) - abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`) + # abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`) + abs_cor_vals_each <- abs(cor_vals_each) * rep(adj_dep, each = nrow(cor_vals_each)) abs_cor_vals <- rowSums(abs_cor_vals_each) - jk <- which(abs_cor_vals == max(abs_cor_vals)) - jk <- ifelse(length(jk) == 1, jk, sample(jk, 1)) - jk_each <- apply(abs_cor_vals_each, 2, function(temp) { - jk_temp <- which(temp == max(temp)) - return(ifelse(length(jk_temp) == 1, jk_temp, sample(jk_temp, 1))) - }) + # jk <- which(abs_cor_vals == max(abs_cor_vals)) + # jk <- ifelse(length(jk) == 1, jk, sample(jk, 1)) + # jk_each <- apply(abs_cor_vals_each, 2, which.max) + jk <- which.max(abs_cor_vals) + jk_each <- sapply(1:ncol(abs_cor_vals_each), function(j) which.max(abs_cor_vals_each[, j])) update_jk[c(1, pos.update + 1)] <- c(jk, jk_each) @@ -142,7 +142,7 @@ boost_check_update_jk_nofocal <- function(cb_model, cb_model_para, cb_data) { update_status[pos.update[true_update]] <- 1 # - re-define jk_star abs_cor_vals_redefine <- rowSums(abs_cor_vals_each[, true_update, drop = FALSE]) - jk_redefine <- which(abs_cor_vals_redefine == max(abs_cor_vals_redefine)) + jk_redefine <- which.max(abs_cor_vals_redefine == max(abs_cor_vals_redefine)) jk_redefine <- ifelse(length(jk_redefine) == 1, jk_redefine, sample(jk_redefine, 1)) real_update_jk[pos.update[true_update]] <- jk_redefine } else { @@ -351,11 +351,10 @@ boost_check_update_jk_focal <- function(cb_model, cb_model_para, cb_data, # -- define jk and jk_each cor_vals_each <- do.call(cbind, lapply(model_update, function(cc) as.vector(cc$correlation))) - abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`) - jk_each <- apply(abs_cor_vals_each, 2, function(temp) { - jk_temp <- which(temp == max(temp)) - return(ifelse(length(jk_temp) == 1, jk_temp, sample(jk_temp, 1))) - }) + # abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`) + abs_cor_vals_each <- abs(cor_vals_each) * rep(adj_dep, each = nrow(cor_vals_each)) + # jk_each <- apply(abs_cor_vals_each, 2, which.max) + jk_each <- sapply(1:ncol(abs_cor_vals_each), function(j) which.max(abs_cor_vals_each[, j])) pp_focal <- which(pos.update == focal_outcome_idx) jk_focal <- jk_each[pp_focal] @@ -503,8 +502,8 @@ get_LD_jk1_jk2 <- function(jk1, jk2, if (length(XtX) == 1){ LD_temp <- 0 } else { - jk1.remain <- which(remain_jk == jk1) - jk2.remain <- which(remain_jk == jk2) + jk1.remain <- match(jk1, remain_jk) + jk2.remain <- match(jk2, remain_jk) LD_temp <- XtX[jk1.remain, jk2.remain] } } @@ -539,33 +538,83 @@ check_jk_jkeach <- function(jk, jk_each, return(judge) } +# check_pair_jkeach <- function(jk_each, +# pos.update, +# model_update, +# cb_data, X_dict, +# jk_equiv_corr = 0.8, +# jk_equiv_loglik = 0.001) { +# data_update <- cb_data$data[pos.update] +# # -- check if jk_i ~ jk_j +# change_each_pair <- matrix(FALSE, nrow = length(jk_each), ncol = length(jk_each)) +# for (i in 1:length(jk_each)) { +# +# jk_i <- jk_each[i] +# change_log_jk_i <- model_update[[i]]$change_loglike[jk_i] +# for (j in 1:length(jk_each)) { +# if (j != i) { +# jk_j <- jk_each[j] +# +# if (!(jk_i %in% data_update[[i]]$variable_miss) & !(jk_j %in% data_update[[i]]$variable_miss)) { +# change_log_jk_j <- model_update[[i]]$change_loglike[jk_j] +# change_each <- abs(change_log_jk_i - change_log_jk_j) +# LD_temp <- get_LD_jk1_jk2(jk_i, jk_j, +# X = cb_data$data[[X_dict[i]]]$X, +# XtX = cb_data$data[[X_dict[i]]]$XtX, +# N = data_update[[i]]$N, +# remain_jk = setdiff(1:length(model_update[[i]]$res), data_update[[i]]$variable_miss) +# ) +# change_each_pair[i, j] <- (change_each <= jk_equiv_loglik) & (abs(LD_temp) >= jk_equiv_corr) +# } else { +# change_each_pair[i, j] <- FALSE +# } +# } else { +# change_each_pair[i, j] <- FALSE +# } +# } +# } +# change_each_pair <- change_each_pair + t(change_each_pair) +# return(change_each_pair) +# } + check_pair_jkeach <- function(jk_each, pos.update, model_update, cb_data, X_dict, jk_equiv_corr = 0.8, jk_equiv_loglik = 0.001) { + + detect_func <- function(idx, jk_i, jk_j){ + change_log_jk_i <- model_update[[idx]]$change_loglike[jk_i] + change_log_jk_j <- model_update[[idx]]$change_loglike[jk_j] + change_each <- abs(change_log_jk_i - change_log_jk_j) + LD_temp <- get_LD_jk1_jk2(jk_i, jk_j, + X = cb_data$data[[X_dict[idx]]]$X, + XtX = cb_data$data[[X_dict[idx]]]$XtX, + N = data_update[[idx]]$N, + remain_jk = setdiff(1:length(model_update[[idx]]$res), data_update[[idx]]$variable_miss) + ) + return((change_each <= jk_equiv_loglik) & (abs(LD_temp) >= jk_equiv_corr)) + } + data_update <- cb_data$data[pos.update] # -- check if jk_i ~ jk_j change_each_pair <- matrix(FALSE, nrow = length(jk_each), ncol = length(jk_each)) for (i in 1:length(jk_each)) { - jk_i <- jk_each[i] - change_log_jk_i <- model_update[[i]]$change_loglike[jk_i] - for (j in 1:length(jk_each)) { + for (j in i:length(jk_each)) { if (j != i) { jk_j <- jk_each[j] - if (!(jk_i %in% data_update[[i]]$variable_miss) & !(jk_j %in% data_update[[i]]$variable_miss)) { - change_log_jk_j <- model_update[[i]]$change_loglike[jk_j] - change_each <- abs(change_log_jk_i - change_log_jk_j) - LD_temp <- get_LD_jk1_jk2(jk_i, jk_j, - X = cb_data$data[[X_dict[i]]]$X, - XtX = cb_data$data[[X_dict[i]]]$XtX, - N = data_update[[i]]$N, - remain_jk = setdiff(1:length(model_update[[i]]$res), data_update[[i]]$variable_miss) - ) - change_each_pair[i, j] <- (change_each <= jk_equiv_loglik) & (abs(LD_temp) >= jk_equiv_corr) + change_each_pair[i, j] <- detect_func(i, jk_i, jk_j) + # if jk_i and jk_j are equivalent on dataset i, then we don't need to check dataset j + if ( !change_each_pair[i, j] ){ + if (!(jk_i %in% data_update[[j]]$variable_miss) & !(jk_j %in% data_update[[j]]$variable_miss)) { + change_each_pair[j, i] <- detect_func(j, jk_i, jk_j) + } else { + change_each_pair[j, i] <- FALSE + } + } } else { change_each_pair[i, j] <- FALSE } diff --git a/R/colocboost_inference.R b/R/colocboost_inference.R index dc53908..e76d708 100644 --- a/R/colocboost_inference.R +++ b/R/colocboost_inference.R @@ -132,7 +132,8 @@ get_modularity <- function(Weight, B) { if (m == 0) return(0) - cate <- B %*% t(B) + # cate <- B %*% t(B) + cate <- tcrossprod(B) if (m_pos == 0 & m_neg == 0) return(0) @@ -190,12 +191,12 @@ get_n_cluster <- function(hc, Sigma, m = ncol(Sigma), min_cluster_corr = 0.8) { #' @noRd w_purity <- function(weights, X = NULL, Xcorr = NULL, N = NULL, n = 100, coverage = 0.95, min_abs_corr = 0.5, median_abs_corr = NULL, miss_idx = NULL) { - tmp <- apply(weights, 2, w_cs, coverage = coverage) - tmp_purity <- apply(tmp, 2, function(tt) { - pos <- which(tt == 1) + + tmp_purity <- apply(weights, 2, function(w) { + pos <- w_cs(w, coverage = coverage) # deal with missing snp here if (!is.null(Xcorr)) { - pos <- match(pos, setdiff(1:length(tmp), miss_idx)) + pos <- match(pos, setdiff(1:length(w), miss_idx)) } get_purity(pos, X = X, Xcorr = Xcorr, N = N, n = n) }) @@ -420,7 +421,7 @@ get_purity <- function(pos, X = NULL, Xcorr = NULL, N = NULL, n = 100) { corr[which(is.na(corr))] <- 0 value <- abs(get_upper_tri(corr)) } else { - if (sum(Xcorr) == 1){ + if (length(Xcorr) == 1){ value <- 0 } else { Xcorr <- Xcorr # if (!is.null(N)) Xcorr/(N-1) else Xcorr diff --git a/R/colocboost_init.R b/R/colocboost_init.R index 8685b01..ea27ce7 100644 --- a/R/colocboost_init.R +++ b/R/colocboost_init.R @@ -619,7 +619,7 @@ process_sumstat <- function(Z, N, Var_y, SeBhat, ld_matrices, variant_lists, dic Z_extend[pos_target] <- current_z[pos_z] # Calculate submatrix for each unique entry (not duplicates) - if (sum(current_ld_matrix) == 1){ + if (length(current_ld_matrix) == 1){ ld_submatrix <- current_ld_matrix } else { ld_submatrix <- NULL diff --git a/R/colocboost_utils.R b/R/colocboost_utils.R index c4da011..0407633 100644 --- a/R/colocboost_utils.R +++ b/R/colocboost_utils.R @@ -426,15 +426,20 @@ get_max_profile <- function(cb_obj, check_null_max = 0.025, } +# ### Function for check cs for each weight +# w_cs <- function(weights, coverage = 0.95) { +# indices <- unlist(get_in_cos(weights, coverage = coverage)) +# result <- rep(0, length(weights)) +# result[indices] <- 1 +# return(result) +# } + ### Function for check cs for each weight w_cs <- function(weights, coverage = 0.95) { indices <- unlist(get_in_cos(weights, coverage = coverage)) - result <- rep(0, length(weights)) - result[indices] <- 1 - return(result) + return(indices) } - #' Pure R implementation (fallback) #' @noRd get_merge_ordered_with_indices <- function(vector_list) { @@ -505,12 +510,14 @@ get_merge_ordered_with_indices <- function(vector_list) { # Step 4: Topological sort using Kahn's algorithm # Start with nodes that have no incoming edges queue <- all_elements[sapply(all_elements, function(elem) in_degree[[elem]] == 0)] - result <- character() + result <- list() + result_idx <- 1 while (length(queue) > 0) { # Take the first element from the queue current <- queue[1] queue <- queue[-1] - result <- c(result, current) + result[[result_idx]] <- current # List assignment is fast + result_idx <- result_idx + 1 # Process all neighbors (elements that must come after current) neighbors <- graph[[current]] for (next_elem in neighbors) { @@ -520,6 +527,7 @@ get_merge_ordered_with_indices <- function(vector_list) { } } } + result <- unlist(result) # Step 5: Check for cycles and use fallback if needed if (length(result) != n_elements) { # Different variable orders detected - use priority-based fallback From 4fd1b757c44604d9e732f5eba673150bd25b5e43 Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Fri, 12 Sep 2025 21:36:13 -0400 Subject: [PATCH 02/13] Enhanced Highlight Functionality with Flexible Styling Options The highlighting feature has been updated to provide greater flexibility in visualization options. In previous versions, additional highlight variables in the colocboost plot could only be displayed as vertical lines by setting add_vertical = TRUE. The updated colocboost_plot function now offers more versatile highlighting capabilities through the new add_highlight = TRUE parameter, which allows users to display highlight variables as either vertical lines or red star markers, providing improved customization for data visualization needs. --- R/colocboost_plot.R | 148 ++++++++++-------- tests/testthat/test_plot.R | 4 +- tests/testthat/test_utils.R | 6 +- vignettes/Visualization_ColocBoost_Output.Rmd | 11 +- 4 files changed, 94 insertions(+), 75 deletions(-) diff --git a/R/colocboost_plot.R b/R/colocboost_plot.R index 90141d6..1f0f7c1 100644 --- a/R/colocboost_plot.R +++ b/R/colocboost_plot.R @@ -14,8 +14,9 @@ #' @param plot_focal_cos_outcome_only Logical, if TRUE only plots colocalization including at least on colocalized outcome with focal outcome, default is FALSE. #' @param points_color Background color for non-colocalized variables, default is "grey80". #' @param cos_color Optional custom colors for CoS. -#' @param add_vertical Logical, if TRUE adds vertical lines at specified positions, default is FALSE -#' @param add_vertical_idx Optional indices for vertical lines. +#' @param add_highlight Logical, if TRUE adds vertical lines at specified positions, default is FALSE +#' @param add_highlight_style Optional style of add_highlight variables, default is "vertical_lines", other choice is "star" - red star. +#' @param add_highlight_idx Optional indices for add_highlight variables. #' @param outcome_names Optional vector of outcomes names for the subtitle of each figure. \code{outcome_names=NULL} for the outcome name shown in \code{data_info}. #' @param plot_cols Number of columns in the plot grid, default is 2. If you have many colocalization. please consider increasing this. #' @param variant_coord Logical, if TRUE uses variant coordinates on x-axis, default is FALSE. This is required the variable names including position information. @@ -78,8 +79,9 @@ colocboost_plot <- function(cb_output, y = "log10p", plot_focal_cos_outcome_only = FALSE, points_color = "grey80", cos_color = NULL, - add_vertical = FALSE, - add_vertical_idx = NULL, + add_highlight = FALSE, + add_highlight_idx = NULL, + add_highlight_style = "vertical_lines", outcome_names = NULL, plot_cols = 2, variant_coord = FALSE, @@ -102,33 +104,34 @@ colocboost_plot <- function(cb_output, y = "log10p", if (!inherits(cb_output, "colocboost")) { stop("Input of colocboost_plot must be a 'colocboost' object!") } - + # get cb_plot_input data from colocboost results cb_plot_input <- get_input_plot(cb_output, - plot_cos_idx = plot_cos_idx, - variant_coord = variant_coord, - outcome_names = outcome_names, - plot_focal_only = plot_focal_only, - plot_focal_cos_outcome_only = plot_focal_cos_outcome_only, - show_cos_to_uncoloc = show_cos_to_uncoloc, - show_cos_to_uncoloc_idx = show_cos_to_uncoloc_idx, - show_cos_to_uncoloc_outcome = show_cos_to_uncoloc_outcome, - plot_ucos = plot_ucos, plot_ucos_idx = plot_ucos_idx + plot_cos_idx = plot_cos_idx, + variant_coord = variant_coord, + outcome_names = outcome_names, + plot_focal_only = plot_focal_only, + plot_focal_cos_outcome_only = plot_focal_cos_outcome_only, + show_cos_to_uncoloc = show_cos_to_uncoloc, + show_cos_to_uncoloc_idx = show_cos_to_uncoloc_idx, + show_cos_to_uncoloc_outcome = show_cos_to_uncoloc_outcome, + plot_ucos = plot_ucos, plot_ucos_idx = plot_ucos_idx ) # get initial set up of plot cb_plot_init <- plot_initial(cb_plot_input, - y = y, points_color = points_color, cos_color = cos_color, - ylim_each = ylim_each, title_specific = title_specific, - outcome_legend_pos = outcome_legend_pos, outcome_legend_size = outcome_legend_size, - cos_legend_pos = cos_legend_pos, plot_ucos = plot_ucos, - show_variable = show_variable, lab_style = lab_style, axis_style = axis_style, - title_style = title_style, ... + y = y, points_color = points_color, cos_color = cos_color, + ylim_each = ylim_each, title_specific = title_specific, + outcome_legend_pos = outcome_legend_pos, outcome_legend_size = outcome_legend_size, + cos_legend_pos = cos_legend_pos, plot_ucos = plot_ucos, + show_variable = show_variable, lab_style = lab_style, axis_style = axis_style, + title_style = title_style, ... ) - + colocboost_plot_basic <- function(cb_plot_input, cb_plot_init, outcome_idx = NULL, plot_all_outcome = FALSE, grange = NULL, plot_cols = 2, - add_vertical = FALSE, add_vertical_idx = NULL, + add_highlight = FALSE, add_highlight_idx = NULL, + add_highlight_style = "vertical_lines", show_top_variables = TRUE, ...) { args <- list(...) @@ -147,12 +150,12 @@ colocboost_plot <- function(cb_output, y = "log10p", args$font.lab <- cb_plot_init$lab_face # - change position cb_plot_init$outcome_legend_pos <- switch(cb_plot_init$outcome_legend_pos, - "right" = 4, - "left" = 2, - "top" = 3, - "bottom" = 1 + "right" = 4, + "left" = 2, + "top" = 3, + "bottom" = 1 ) - + # - begin plotting coloc_cos <- cb_plot_input$cos outcomes <- cb_plot_input$outcomes @@ -229,12 +232,7 @@ colocboost_plot <- function(cb_output, y = "log10p", cex = cb_plot_init$outcome_legend_size, font = 1 ) } - if (add_vertical) { - for (iii in 1:length(add_vertical_idx)) { - abline(v = add_vertical_idx[iii], col = "#E31A1C", lwd = 1.5, lty = "dashed") - } - } - + # mark variables in CoS to colocalized outcomes if (!is.null(coloc_cos)) { n.coloc <- length(coloc_cos) @@ -242,7 +240,7 @@ colocboost_plot <- function(cb_output, y = "log10p", if (length(y)==1){coloc_index = lapply(coloc_index, function(i) 1 )} legend_text <- list(col = vector()) legend_text$col <- head(cb_plot_init$col, n.coloc) - + # check which coloc set for this outcome p.coloc <- sapply(coloc_index, function(idx) sum(idx == iy) != 0) p.coloc <- which(p.coloc) @@ -261,7 +259,7 @@ colocboost_plot <- function(cb_output, y = "log10p", points(x_hits, y_hits, pch = 21, bg = legend_text$col[i.cs], col = "#E31A1C", cex = 3, lwd = 3) } } - + # mark variables in CoS to uncolocalized outcomes uncoloc <- cb_plot_input$uncoloc if (!is.null(uncoloc)) { @@ -276,8 +274,8 @@ colocboost_plot <- function(cb_output, y = "log10p", x0 <- intersect(args$x, cs) y1 <- args$y[match(x0, args$x)] points(x0, y1, - pch = 4, col = adjustcolor(legend_text$col[uncoloc$cos_idx_to_uncoloc[i.uncoloc]], alpha.f = 0.4), - cex = 1.5, lwd = 1.5 + pch = 4, col = adjustcolor(legend_text$col[uncoloc$cos_idx_to_uncoloc[i.uncoloc]], alpha.f = 0.4), + cex = 1.5, lwd = 1.5 ) shape_col <- c(shape_col, adjustcolor(legend_text$col[uncoloc$cos_idx_to_uncoloc[i.uncoloc]], alpha.f = 1)) texts_col <- c(texts_col, adjustcolor(legend_text$col[uncoloc$cos_idx_to_uncoloc[i.uncoloc]], alpha.f = 0.8)) @@ -294,35 +292,51 @@ colocboost_plot <- function(cb_output, y = "log10p", if (length(texts) == 0) { next } - + # Get current plot area coordinates usr <- par("usr") x_pos <- usr[1] + cb_plot_init$cos_legend_pos[1] * (usr[2] - usr[1]) # 5% from left edge y_pos <- usr[3] + cb_plot_init$cos_legend_pos[2] * (usr[4] - usr[3]) # 50% from bottom edge legend(x = x_pos, y = y_pos, texts, - bty = "n", col = shape_col, text.col = texts_col, - cex = 1.5, pt.cex = 1.5, pch = 4, x.intersp = cos_legend_pos[3], y.intersp = cos_legend_pos[4] + bty = "n", col = shape_col, text.col = texts_col, + cex = 1.5, pt.cex = 1.5, pch = 4, x.intersp = cos_legend_pos[3], y.intersp = cos_legend_pos[4] ) } } + if (add_highlight) { + for (iii in 1:length(add_highlight_idx)) { + if ( !(add_highlight_style %in% c("vertical_lines", "star")) ){ + message("add_highlight_style is not 'vertical_lines' and 'star', defaulting to 'vertical_lines'") + add_highlight_style = "vertical_lines" + } + if (add_highlight_style == "vertical_lines"){ + abline(v = add_highlight_idx[iii], col = "#E31A1C", lwd = 1.5, lty = "dashed") + } else if (add_highlight_style == "star") { + points(add_highlight_idx[iii], args$y[match(add_highlight_idx[iii],args$x)], + pch = 8, col = "#E31A1C", lwd = 2, lty = "dashed", cex = 2.5) + } + + } + } } if (!is.null(cb_plot_init$title)) { mtext(cb_plot_init$title, - side = 3, line = 0, outer = TRUE, - cex = cb_plot_init$title_size, font = cb_plot_init$title_face + side = 3, line = 0, outer = TRUE, + cex = cb_plot_init$title_size, font = cb_plot_init$title_face ) } return(invisible()) } - + colocboost_plot_basic(cb_plot_input, cb_plot_init, - grange = grange, - outcome_idx = outcome_idx, - plot_all_outcome = plot_all_outcome, - plot_cols = plot_cols, - add_vertical = add_vertical, add_vertical_idx = add_vertical_idx, - show_top_variables = show_top_variables, - ... + grange = grange, + outcome_idx = outcome_idx, + plot_all_outcome = plot_all_outcome, + plot_cols = plot_cols, + add_highlight = add_highlight, add_highlight_idx = add_highlight_idx, + add_highlight_style = add_highlight_style, + show_top_variables = show_top_variables, + ... ) } @@ -339,7 +353,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, show_cos_to_uncoloc_outcome = NULL, plot_ucos = FALSE, plot_ucos_idx = NULL) { - + # check ucos exists if (plot_ucos && !"ucos_details" %in% names(cb_output)) { warning( @@ -354,7 +368,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, cb_output$data_info$outcome_info$outcome_names <- outcome_names names(cb_output$data_info$z) <- outcome_names } - + # extract results from colocboost analysis_outcome <- cb_output$data_info$outcome_info$outcome_names focal_outcome_idx <- which(cb_output$data_info$outcome_info$is_focal) @@ -371,7 +385,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, Z <- cb_output$data_info$z coef <- cb_output$data_info$coef vcp <- list(as.numeric(cb_output$vcp)) - + # if finemapping if (cb_output$data_info$n_outcomes == 1) { cb_output$cos_details$cos$cos_variables <- cb_output$ucos_details$ucos$ucos_variables @@ -406,7 +420,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, } }) ncos <- length(cb_output$cos_details$cos$cos_index) - + select_cs <- 1:ncos if (!is.null(plot_cos_idx)) { if (length(setdiff(plot_cos_idx, c(1:ncos))) != 0) { @@ -477,7 +491,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, "coloc_index" = coloc_index, "select_cos" = select_cs ) - + # check plot uncolocalizaed confidence sets from ucos_details if (plot_ucos & cb_output$data_info$n_outcomes > 1){ ucos_details <- cb_output$ucos_details @@ -528,7 +542,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, plot_input$ucos_cos_int_weights <- combined } } - + # check if plot cos to uncolocalized outcome # use the updated coloc_cos and related components from plot_input if available coloc_cos <- plot_input$cos @@ -613,7 +627,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, ) plot_input$uncoloc <- uncoloc } - + class(plot_input) <- "colocboost" return(plot_input) } @@ -634,11 +648,11 @@ plot_initial <- function(cb_plot_input, y = "log10p", ...) { args <- list(...) if (!exists("pch", args)) args$pch <- 16 - + # - set background point color and cos color pools args$bg <- points_color if (is.null(cos_color)) { - + # cos_color <- c( # "#377EB8", "#E69F00", "#33A02C", "#984EA3", "#F46D43", # "#A65628", "#1F4E79", "#B2182B", "#D73027", "#F781BF", @@ -688,12 +702,12 @@ plot_initial <- function(cb_plot_input, y = "log10p", "#D5D8DC", "#BDC3C7", "#95A5A6", "#7F8C8D", "#17202A", "#808080", "#A9A9A9", "#C0C0C0", "#696969", "#778899" ) - - + + # cos_color <- c("#1F70A9", "#33A02C", "#CAB2D6", "#EA7827") } args$col <- cos_color[cb_plot_input$select_cos] - + # - set data and x-lab and y-lab if (y == "log10p") { plot_data <- lapply(cb_plot_input$Zscores, function(z) { @@ -729,12 +743,12 @@ plot_initial <- function(cb_plot_input, y = "log10p", if (!exists("ylab", args)) args$ylab <- ylab args$lab_size <- as.numeric(lab_style[1]) args$lab_face <- lab_style[2] - + # - set title format args$title <- title_specific args$title_size <- as.numeric(title_style[1]) args$title_face <- title_style[2] - + # - set x-axis and y-axis args$x <- cb_plot_input$x$pos args$y <- plot_data @@ -744,7 +758,7 @@ plot_initial <- function(cb_plot_input, y = "log10p", } args$axis_size <- as.numeric(axis_style[1]) args$axis_face <- axis_style[2] - + # - set ylim for each subfigure if (exists("ylim", args)) { ymax <- rep(args$ylim[2], length(args$y)) @@ -776,7 +790,7 @@ plot_initial <- function(cb_plot_input, y = "log10p", } args$ymax <- ymax args$ymin <- ymin - + # - set legend text position and format args$outcome_legend_pos <- outcome_legend_pos args$outcome_legend_size <- outcome_legend_size @@ -788,7 +802,7 @@ plot_initial <- function(cb_plot_input, y = "log10p", args$outcome_legend_angle <- 0 } args$cos_legend_pos <- cos_legend_pos - + return(args) } diff --git a/tests/testthat/test_plot.R b/tests/testthat/test_plot.R index 775d3fe..d49aace 100644 --- a/tests/testthat/test_plot.R +++ b/tests/testthat/test_plot.R @@ -176,7 +176,9 @@ test_that("colocboost_plot handles layout options", { test_that("colocboost_plot handles additional visualization options", { # Test with vertical line options - expect_error(suppressWarnings(colocboost_plot(cb_res, add_vertical = TRUE, add_vertical_idx = c(5, 10))), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res, add_highlight = TRUE, add_highlight_idx = c(5, 10))), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res, add_highlight = TRUE, add_highlight_idx = c(5, 10), add_highlight_style = "vertical_lines")), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res, add_highlight = TRUE, add_highlight_idx = c(5, 10), add_highlight_style = "star")), NA) # Test with show_top_variables option expect_error(suppressWarnings(colocboost_plot(cb_res, show_top_variables = TRUE)), NA) diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R index a622540..d3d480d 100644 --- a/tests/testthat/test_utils.R +++ b/tests/testthat/test_utils.R @@ -85,12 +85,12 @@ test_that("w_cs correctly identifies confidence set for weight vector", { result <- w_cs(w, coverage = 0.8) # Expected result for 80% coverage: w[1] + w[2] = 0.8, so first 2 elements should be 1 - expected <- c(1, 1, 0, 0) + expected <- c(1,2) expect_equal(result, expected) # Test with different coverage result2 <- w_cs(w, coverage = 0.9) - expected2 <- c(1, 1, 1, 1) # First 4 elements cover 90% since 3 and 4 with the same 0.1 weight + expected2 <- c(1,2,3,4) # First 4 elements cover 90% since 3 and 4 with the same 0.1 weight expect_equal(result2, expected2) }) @@ -490,7 +490,7 @@ test_that("get_merge_ordered_with_indices handles conflicting variable orders co test_that("get_merge_ordered_with_indices handles edge cases", { # Test with NULL inputs - expect_equal(get_merge_ordered_with_indices(list()), character(0)) + expect_equal(get_merge_ordered_with_indices(list()), NULL) # Test with single vector expect_equal(get_merge_ordered_with_indices(list(c("A", "B", "C"))), c("A", "B", "C")) diff --git a/vignettes/Visualization_ColocBoost_Output.Rmd b/vignettes/Visualization_ColocBoost_Output.Rmd index 1ffd7d9..8ed3e5b 100644 --- a/vignettes/Visualization_ColocBoost_Output.Rmd +++ b/vignettes/Visualization_ColocBoost_Output.Rmd @@ -94,9 +94,11 @@ There are three options available for plotting the CoS variants to uncolocalized colocboost_plot(res, show_cos_to_uncoloc = TRUE) ``` -### 2.4. Plot with an added vertical line +### 2.4. Plot with added highlight points -You can add a vertical line to the plot by setting `add_vertical = TRUE` and `add_vertical_idx = **`. This will add a vertical line at the specified index. +You can highlight specific variants in the plot by setting `add_highlight = TRUE` and `add_highlight_idx = **`. +This will add red dashed vertical lines (defult `add_highlight_style = "vertical_lines"`) at the specified index you want to highlight. +Alternatively, you can use `add_highlight_style = "star"` to change the highlight style to the red star for the specified variants. For example, to add a vertical line at true causal variants, you can set `add_vertical_idx = unique(unlist(Ind_5traits$true_effect_variants))`. Following plot also shows the top variants. @@ -104,8 +106,9 @@ Following plot also shows the top variants. ```{r vertical-plot} colocboost_plot( res, show_top_variables = TRUE, - add_vertical = TRUE, - add_vertical_idx = unique(unlist(Ind_5traits$true_effect_variants)) + add_highlight = TRUE, + add_highlight_idx = unique(unlist(Ind_5traits$true_effect_variants)), + add_highlight_style = "star" ) ``` From 220f5f7f9783a0872685df5b4e32162c8c0ef1aa Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Sun, 14 Sep 2025 19:47:17 -0400 Subject: [PATCH 03/13] Optimized LD calculation for Outcome-Specific Model Updates To enhance computational efficiency, we pre-calculate LD values for each pair of outcome-specific best parameter updates at each boost update. --- R/colocboost_check_update_jk.R | 100 +++++++++++++-------------------- R/colocboost_init.R | 3 +- R/colocboost_one_causal.R | 10 ++-- R/colocboost_update.R | 3 +- R/colocboost_workhorse.R | 3 +- 5 files changed, 49 insertions(+), 70 deletions(-) diff --git a/R/colocboost_check_update_jk.R b/R/colocboost_check_update_jk.R index 05390c8..d449b03 100644 --- a/R/colocboost_check_update_jk.R +++ b/R/colocboost_check_update_jk.R @@ -538,66 +538,54 @@ check_jk_jkeach <- function(jk, jk_each, return(judge) } -# check_pair_jkeach <- function(jk_each, -# pos.update, -# model_update, -# cb_data, X_dict, -# jk_equiv_corr = 0.8, -# jk_equiv_loglik = 0.001) { -# data_update <- cb_data$data[pos.update] -# # -- check if jk_i ~ jk_j -# change_each_pair <- matrix(FALSE, nrow = length(jk_each), ncol = length(jk_each)) -# for (i in 1:length(jk_each)) { -# -# jk_i <- jk_each[i] -# change_log_jk_i <- model_update[[i]]$change_loglike[jk_i] -# for (j in 1:length(jk_each)) { -# if (j != i) { -# jk_j <- jk_each[j] -# -# if (!(jk_i %in% data_update[[i]]$variable_miss) & !(jk_j %in% data_update[[i]]$variable_miss)) { -# change_log_jk_j <- model_update[[i]]$change_loglike[jk_j] -# change_each <- abs(change_log_jk_i - change_log_jk_j) -# LD_temp <- get_LD_jk1_jk2(jk_i, jk_j, -# X = cb_data$data[[X_dict[i]]]$X, -# XtX = cb_data$data[[X_dict[i]]]$XtX, -# N = data_update[[i]]$N, -# remain_jk = setdiff(1:length(model_update[[i]]$res), data_update[[i]]$variable_miss) -# ) -# change_each_pair[i, j] <- (change_each <= jk_equiv_loglik) & (abs(LD_temp) >= jk_equiv_corr) -# } else { -# change_each_pair[i, j] <- FALSE -# } -# } else { -# change_each_pair[i, j] <- FALSE -# } -# } -# } -# change_each_pair <- change_each_pair + t(change_each_pair) -# return(change_each_pair) -# } - check_pair_jkeach <- function(jk_each, pos.update, model_update, cb_data, X_dict, jk_equiv_corr = 0.8, jk_equiv_loglik = 0.001) { - - detect_func <- function(idx, jk_i, jk_j){ + + + #' @importFrom stats cor + get_LD_jk_each <- function(jk_each, + X = NULL, XtX = NULL, N = NULL, + remain_jk = NULL) { + if (!is.null(X)) { + LD_temp <- suppressWarnings({ + get_cormat(X[, jk_each]) + }) + LD_temp[which(is.na(LD_temp))] <- 0 + # LD_temp <- LD_temp[1, 2] + } else if (!is.null(XtX)) { + if (length(XtX) == 1){ + LD_temp <- matrix(0, length(jk_each), length(jk_each)) + } else { + jk.remain <- match(jk_each, remain_jk) + LD_temp <- XtX[jk.remain, jk.remain] + LD_temp[which(is.na(LD_temp))] <- 0 + } + } + return(LD_temp) + } + + detect_func <- function(idx, LD_all, jk_i, jk_j, i, j){ change_log_jk_i <- model_update[[idx]]$change_loglike[jk_i] change_log_jk_j <- model_update[[idx]]$change_loglike[jk_j] change_each <- abs(change_log_jk_i - change_log_jk_j) - LD_temp <- get_LD_jk1_jk2(jk_i, jk_j, - X = cb_data$data[[X_dict[idx]]]$X, - XtX = cb_data$data[[X_dict[idx]]]$XtX, - N = data_update[[idx]]$N, - remain_jk = setdiff(1:length(model_update[[idx]]$res), data_update[[idx]]$variable_miss) - ) + LD_temp <- LD_all[[idx]][i, j] return((change_each <= jk_equiv_loglik) & (abs(LD_temp) >= jk_equiv_corr)) } - + data_update <- cb_data$data[pos.update] + LD_all <- lapply(1:length(jk_each), function(idx){ + get_LD_jk_each(jk_each, + X = cb_data$data[[X_dict[idx]]]$X, + XtX = cb_data$data[[X_dict[idx]]]$XtX, + N = data_update[[idx]]$N, + remain_jk = setdiff(1:length(model_update[[idx]]$res), data_update[[idx]]$variable_miss) + ) + }) + # -- check if jk_i ~ jk_j change_each_pair <- matrix(FALSE, nrow = length(jk_each), ncol = length(jk_each)) for (i in 1:length(jk_each)) { @@ -605,18 +593,10 @@ check_pair_jkeach <- function(jk_each, for (j in i:length(jk_each)) { if (j != i) { jk_j <- jk_each[j] - if (!(jk_i %in% data_update[[i]]$variable_miss) & !(jk_j %in% data_update[[i]]$variable_miss)) { - change_each_pair[i, j] <- detect_func(i, jk_i, jk_j) - # if jk_i and jk_j are equivalent on dataset i, then we don't need to check dataset j - if ( !change_each_pair[i, j] ){ - if (!(jk_i %in% data_update[[j]]$variable_miss) & !(jk_j %in% data_update[[j]]$variable_miss)) { - change_each_pair[j, i] <- detect_func(j, jk_i, jk_j) - } else { - change_each_pair[j, i] <- FALSE - } - } - } else { - change_each_pair[i, j] <- FALSE + change_each_pair[i, j] <- detect_func(idx = i, LD_all, jk_i, jk_j, i, j) + # if jk_i and jk_j are equivalent on dataset i, then we don't need to check dataset j + if ( !change_each_pair[i, j] ){ + change_each_pair[j, i] <- detect_func(idx = j, LD_all, jk_i, jk_j, i, j) } } else { change_each_pair[i, j] <- FALSE diff --git a/R/colocboost_init.R b/R/colocboost_init.R index ea27ce7..edca937 100644 --- a/R/colocboost_init.R +++ b/R/colocboost_init.R @@ -163,7 +163,8 @@ colocboost_init_model <- function(cb_data, tmp <- list( "res" = NULL, "beta" = rep(0, P), - "weights_path" = c(), + # "weights_path" = c(), + "weights_path" = list(), "profile_loglike_each" = NULL, "obj_path" = 999999, "obj_single" = 999999, diff --git a/R/colocboost_one_causal.R b/R/colocboost_one_causal.R index 0ed96b5..09692d5 100644 --- a/R/colocboost_one_causal.R +++ b/R/colocboost_one_causal.R @@ -79,9 +79,8 @@ colocboost_one_iteration <- function(cb_model, cb_model_para, cb_data) { cb_model_para[rm_elements] <- NULL for (i in 1:length(cb_model)) { cb_model[[i]]$obj_path <- as.numeric(unlist(cb_model[[i]]$obj_path[-1])) - } - for (i in 1:length(cb_model)) { cb_model[[i]]$obj_single <- as.numeric(unlist(cb_model[[i]]$obj_single[-1])) + cb_model[[i]]$weights_path <- do.call(rbind, cb_model[[i]]$weights_path) } cb_obj <- list("cb_data" = cb_data, "cb_model" = cb_model, "cb_model_para" = cb_model_para) @@ -238,7 +237,7 @@ colocboost_diagLD <- function(cb_model, cb_model_para, cb_data) { cb_model_para, cb_data ) - weights <- rbind(weights, cb_model_tmp[[iy]]$weights_path) + weights <- rbind(weights, unlist(cb_model_tmp[[iy]]$weights_path)) } ###### overlap weights overlap_pair <- check_pair_overlap(weights, coverage = 0.95) @@ -307,11 +306,10 @@ colocboost_diagLD <- function(cb_model, cb_model_para, cb_data) { cb_model_para[rm_elements] <- NULL for (i in 1:length(cb_model)) { cb_model[[i]]$obj_path <- as.numeric(unlist(cb_model[[i]]$obj_path[-1])) - } - for (i in 1:length(cb_model)) { cb_model[[i]]$obj_single <- as.numeric(unlist(cb_model[[i]]$obj_single[-1])) + cb_model[[i]]$weights_path <- do.call(rbind, cb_model[[i]]$weights_path) } - + cb_obj <- list("cb_data" = cb_data, "cb_model" = cb_model, "cb_model_para" = cb_model_para) class(cb_obj) <- "colocboost" return(cb_obj) diff --git a/R/colocboost_update.R b/R/colocboost_update.R index 6100828..734bcb1 100644 --- a/R/colocboost_update.R +++ b/R/colocboost_update.R @@ -74,7 +74,8 @@ colocboost_update <- function(cb_model, cb_model_para, cb_data) { # weights <- adj_dep * ld_feature / scaling_factor * exp_abs_cor / sum(exp_abs_cor) weights <- adj_dep * obj_ld * exp_abs_cor / sum(exp_abs_cor) weights <- weights / sum(weights) - cb_model[[i]]$weights_path <- rbind(cb_model[[i]]$weights_path, as.vector(weights)) + # cb_model[[i]]$weights_path <- rbind(cb_model[[i]]$weights_path, as.vector(weights)) + cb_model[[i]]$weights_path <- c(cb_model[[i]]$weights_path, list(as.vector(weights))) ########## END: MAIN CALCULATION ################### diff --git a/R/colocboost_workhorse.R b/R/colocboost_workhorse.R index 7583eee..5bfe645 100644 --- a/R/colocboost_workhorse.R +++ b/R/colocboost_workhorse.R @@ -236,9 +236,8 @@ colocboost_workhorse <- function(cb_data, cb_model_para$num_updates <- m for (i in 1:length(cb_model)) { cb_model[[i]]$obj_path <- as.numeric(unlist(cb_model[[i]]$obj_path[-1])) - } - for (i in 1:length(cb_model)) { cb_model[[i]]$obj_single <- as.numeric(unlist(cb_model[[i]]$obj_single[-1])) + cb_model[[i]]$weights_path <- do.call(rbind, cb_model[[i]]$weights_path) } if (m == M) { From 040b1783fefbf6110f00788b65b13c20fdc13e41 Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Tue, 30 Sep 2025 15:04:05 -0400 Subject: [PATCH 04/13] Fix purity issue for merged CoS For the merged CoS, we recalibrate the CoS variants based on the merged weight. We should recalculate purity for the merged CoS and filter unpured merged CoS. --- R/colocboost_assemble.R | 3 +++ R/colocboost_utils.R | 52 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/R/colocboost_assemble.R b/R/colocboost_assemble.R index aba5405..e849872 100644 --- a/R/colocboost_assemble.R +++ b/R/colocboost_assemble.R @@ -209,6 +209,9 @@ colocboost_assemble <- function(cb_obj, # - colocalization results cb_obj$cb_model_para$weight_fudge_factor <- weight_fudge_factor cb_obj$cb_model_para$coverage <- coverage + cb_obj$cb_model_para$min_abs_corr <- min_abs_corr + cb_obj$cb_model_para$median_abs_corr <- median_abs_corr + cb_obj$cb_model_para$n_purity <- n_purity cos_results <- get_cos_details(cb_obj, coloc_out = past_out$cos$cos, data_info = data_info) cb_output <- list( "vcp" = cos_results$vcp, diff --git a/R/colocboost_utils.R b/R/colocboost_utils.R index 0407633..1a13eeb 100644 --- a/R/colocboost_utils.R +++ b/R/colocboost_utils.R @@ -665,8 +665,6 @@ get_cos_details <- function(cb_obj, coloc_out, data_info = NULL) { }) int_weight <- lapply(cos_weights, get_integrated_weight, weight_fudge_factor = cb_obj$cb_model_para$weight_fudge_factor) names(int_weight) <- names(cos_weights) <- colocset_names - vcp <- as.vector(1 - apply(1 - do.call(cbind, int_weight), 1, prod)) - names(vcp) <- data_info$variables # - resummary results cos_re_idx <- lapply(int_weight, function(w) { @@ -676,7 +674,55 @@ get_cos_details <- function(cb_obj, coloc_out, data_info = NULL) { data_info$variables[idx] }) coloc_csets <- list("cos_index" = cos_re_idx, "cos_variables" = cos_re_var) - + + # - recalculate purity + purity <- vector(mode = "list", length = length(cos_re_idx)) + for (ee in 1:length(cos_re_idx)) { + coloc_t <- coloc_outcome_index[[ee]] + p_tmp <- c() + for (i3 in coloc_t) { + pos <- cos_re_idx[[ee]] + X_dict <- cb_obj$cb_data$dict[i3] + if (!is.null(cb_obj$cb_data$data[[X_dict]]$XtX)) { + pos <- match(pos, setdiff(1:cb_obj$cb_model_para$P, cb_obj$cb_data$data[[i3]]$variable_miss)) + } + tmp <- matrix(get_purity(pos, + X = cb_obj$cb_data$data[[X_dict]]$X, + Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX, + N = cb_obj$cb_data$data[[i3]]$N, n = cb_obj$cb_model_para$n_purity + ), 1, 3) + p_tmp <- rbind(p_tmp, tmp) + } + purity[[ee]] <- matrix(apply(p_tmp, 2, max), 1, 3) + } + purity_all <- do.call(rbind, purity) + purity_all <- as.data.frame(purity_all) + colnames(purity_all) <- c("min_abs_corr", "mean_abs_corr", "median_abs_corr") + coloc_out$purity <- purity_all + if (is.null(cb_obj$cb_model_para$median_abs_corr)) { + is_pure <- which(purity_all[, 1] >= cb_obj$cb_model_para$min_abs_corr) + } else { + is_pure <- which(purity_all[, 1] >= cb_obj$cb_model_para$min_abs_corr | purity_all[, 3] >= cb_obj$cb_model_para$median_abs_corr) + } + if (length(is_pure)==0){ + coloc_results <- NULL + vcp <- NULL + return(list("cos_results" = coloc_results, "vcp" = vcp)) + } else if (length(is_pure)!=length(cos_re_idx)){ + int_weight = int_weight[is_pure] + coloc_csets <- lapply(coloc_csets, function(cs) cs[is_pure]) + coloc_outcomes <- lapply(coloc_outcomes, function(cs) cs[is_pure]) + normalization_evidence <- normalization_evidence[is_pure] + npc <- npc[is_pure] + cos_min_npc_outcome <- cos_min_npc_outcome[is_pure] + cos_weights <- cos_weights[is_pure] + coloc_out$purity <- purity_all[is_pure,,drop = FALSE] + colocset_names <- colocset_names[is_pure] + } + vcp <- as.vector(1 - apply(1 - do.call(cbind, int_weight), 1, prod)) + names(vcp) <- data_info$variables + + # - hits variables in each csets coloc_hits <- coloc_hits_variablenames <- coloc_hits_names <- c() for (i in 1:length(int_weight)) { From 5c19c59e237191c9f4452749970e330a6cf24413 Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Thu, 2 Oct 2025 02:13:57 -0400 Subject: [PATCH 05/13] minor fix fix error when focal outcome does not pass multiple testing correction at the first place. --- R/colocboost_assemble.R | 2 +- R/colocboost_utils.R | 2 +- R/colocboost_workhorse.R | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/R/colocboost_assemble.R b/R/colocboost_assemble.R index e849872..4ddd67d 100644 --- a/R/colocboost_assemble.R +++ b/R/colocboost_assemble.R @@ -58,7 +58,7 @@ colocboost_assemble <- function(cb_obj, ) # - save model and all coloc and single information for diagnostic if (output_level != 1) { - tmp <- get_full_output(cb_obj = cb_obj, past_out = NULL, variables = NULL) + tmp <- get_full_output(cb_obj = cb_obj, past_out = NULL, variables = data_info$variables) if (output_level == 2) { cb_output <- c(cb_output, list("ucos_details" = tmp$ucos_detials)) cb_output <- cb_output[c("cos_summary", "vcp", "cos_details", "data_info", "model_info", "ucos_details")] diff --git a/R/colocboost_utils.R b/R/colocboost_utils.R index 1a13eeb..0d57e32 100644 --- a/R/colocboost_utils.R +++ b/R/colocboost_utils.R @@ -893,7 +893,7 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output cb_model_para <- cb_obj$cb_model_para ## - obtain the order of variables based on the variables names if it has position information - if (!is.null(variables)) { + if (is.null(variables)) { ordered <- 1:length(cb_obj$cb_model_para$variables) } else { ordered <- match(variables, cb_obj$cb_model_para$variables) diff --git a/R/colocboost_workhorse.R b/R/colocboost_workhorse.R index 5bfe645..4462680 100644 --- a/R/colocboost_workhorse.R +++ b/R/colocboost_workhorse.R @@ -103,6 +103,9 @@ colocboost_workhorse <- function(cb_data, } if (sum(cb_model_para$update_y == 1) == 0) { + for (i in 1:length(cb_model)){ + cb_model[[i]]$weights_path <- matrix(0, ncol = cb_model_para$P) + } cb_obj <- list("cb_data" = cb_data, "cb_model" = cb_model, "cb_model_para" = cb_model_para) class(cb_obj) <- "colocboost" return(cb_obj) From 8c0dad2856c15f6224d6a5347f259d9e635177bb Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Wed, 22 Oct 2025 13:40:06 -0400 Subject: [PATCH 06/13] Update colocboost_utils.R fix bug for choosing top variable in ucos details --- R/colocboost_utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/colocboost_utils.R b/R/colocboost_utils.R index 0d57e32..9bfa3fd 100644 --- a/R/colocboost_utils.R +++ b/R/colocboost_utils.R @@ -987,7 +987,7 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output # - hits variables in each csets cs_hits <- sapply(1:length(specific_w), function(jj) { inw <- specific_w[[jj]] - sample(which(inw == max(inw)), 1) + which(inw == max(inw))[1] }) cs_hits_variablenames <- sapply(cs_hits, function(ch) variables[ch]) specific_cs_hits <- data.frame("top_index" = cs_hits, "top_variables" = cs_hits_variablenames) # save From 37b33ad3ccfe67a259074a0649457d07060029fb Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Wed, 22 Oct 2025 14:05:32 -0400 Subject: [PATCH 07/13] fineboost one iteration issue fix fine-mapping one iteration issue --- R/colocboost_assemble.R | 1 + R/colocboost_inference.R | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/R/colocboost_assemble.R b/R/colocboost_assemble.R index 4ddd67d..94cbbe5 100644 --- a/R/colocboost_assemble.R +++ b/R/colocboost_assemble.R @@ -79,6 +79,7 @@ colocboost_assemble <- function(cb_obj, } else if (cb_obj$cb_model_para$model_used == "one_causal"){ # fixme later check_null_max <- check_null_max * check_null + check_null_max_ucos <- check_null_max_ucos * check_null } cb_obj <- get_max_profile(cb_obj, check_null_max = check_null_max, check_null_max_ucos = check_null_max_ucos, diff --git a/R/colocboost_inference.R b/R/colocboost_inference.R index e76d708..882fc7a 100644 --- a/R/colocboost_inference.R +++ b/R/colocboost_inference.R @@ -374,7 +374,12 @@ check_null_post <- function(cb_obj, } if (!weaker_effect) { check_cs_change <- cs_change - check_null_tmp <- sapply(1:cb_obj$cb_model_para$L, function(j) cb_obj$cb_model[[j]]$check_null_max) + if (cb_obj$cb_model_para$L == 1){ + check_null_max_tmp <- cb_obj$cb_model[[j]]$check_null_max_ucos + } else { + check_null_max_tmp <- cb_obj$cb_model[[j]]$check_null_max + } + check_null_tmp <- sapply(1:cb_obj$cb_model_para$L, function(j) check_null_max_tmp) } else { check_null_tmp <- rep(check_null, cb_obj$cb_model_para$L) } From c0878aa26d7fe2e8b38f1d94b890c7de386d3823 Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Sat, 1 Nov 2025 08:18:40 -0400 Subject: [PATCH 08/13] Update ColocBoost_Wrapper_Pipeline.Rmd Comment out to avoid running this code here, as we do not have real data files in examples --- vignettes/ColocBoost_Wrapper_Pipeline.Rmd | 173 ++++++++++++---------- 1 file changed, 92 insertions(+), 81 deletions(-) diff --git a/vignettes/ColocBoost_Wrapper_Pipeline.Rmd b/vignettes/ColocBoost_Wrapper_Pipeline.Rmd index 6b5a5a4..49bc4c1 100644 --- a/vignettes/ColocBoost_Wrapper_Pipeline.Rmd +++ b/vignettes/ColocBoost_Wrapper_Pipeline.Rmd @@ -85,23 +85,25 @@ imiss_cutoff = 0.9 # More advanced parameters see pecotmr::load_multitask_regional_data() -region_data_individual <- load_multitask_regional_data( - region = region, - genotype_list = genotype_list, - phenotype_list = phenotype_list, - covariate_list = covariate_list, - conditions_list_individual = conditions_list_individual, - match_geno_pheno = match_geno_pheno, - association_window = association_window, - region_name_col = region_name_col, - extract_region_name = extract_region_name, - keep_indel = keep_indel, - keep_samples = keep_samples, - maf_cutoff = maf_cutoff, - mac_cutoff = mac_cutoff, - xvar_cutoff = xvar_cutoff, - imiss_cutoff = imiss_cutoff -) +#### Comment out to avoid running this code here, as we do not have real data files in this example #### +# region_data_individual <- load_multitask_regional_data( +# region = region, +# genotype_list = genotype_list, +# phenotype_list = phenotype_list, +# covariate_list = covariate_list, +# conditions_list_individual = conditions_list_individual, +# match_geno_pheno = match_geno_pheno, +# association_window = association_window, +# region_name_col = region_name_col, +# extract_region_name = extract_region_name, +# keep_indel = keep_indel, +# keep_samples = keep_samples, +# maf_cutoff = maf_cutoff, +# mac_cutoff = mac_cutoff, +# xvar_cutoff = xvar_cutoff, +# imiss_cutoff = imiss_cutoff +# ) +#### End of comment out ``` @@ -144,17 +146,19 @@ n_controls = c(0, 40000) # More advanced parameters see pecotmr::load_multitask_regional_data() -region_data_sumstat <- load_multitask_regional_data( - sumstat_path_list = sumstat_path_list, - column_file_path_list = column_file_path_list, - LD_meta_file_path_list = LD_meta_file_path_list, - conditions_list_sumstat = conditions_list_sumstat, - match_LD_sumstat = match_LD_sumstat, - association_window = association_window, - n_samples = n_samples, - n_cases = n_cases, - n_controls = n_controls -) +#### Comment out to avoid running this code here, as we do not have real data files in this example #### +# region_data_sumstat <- load_multitask_regional_data( +# sumstat_path_list = sumstat_path_list, +# column_file_path_list = column_file_path_list, +# LD_meta_file_path_list = LD_meta_file_path_list, +# conditions_list_sumstat = conditions_list_sumstat, +# match_LD_sumstat = match_LD_sumstat, +# association_window = association_window, +# n_samples = n_samples, +# n_cases = n_cases, +# n_controls = n_controls +# ) +#### End of comment out ``` @@ -223,58 +227,65 @@ outputs: - **`colocboost_results`**: List of colocboost objects (with `xqtl_coloc`, `joint_gwas`, `separate_gwas`); Output of the `colocboost_analysis_pipeline` function. If the mode is not run, the corresponding element will be `NULL`. ```{r, colocboost-analysis} -# load in individual-level and sumstat data -region_data_combined <- load_multitask_regional_data( - region = region, - genotype_list = genotype_list, - phenotype_list = phenotype_list, - covariate_list = covariate_list, - conditions_list_individual = conditions_list_individual, - match_geno_pheno = match_geno_pheno, - association_window = association_window, - region_name_col = region_name_col, - extract_region_name = extract_region_name, - keep_indel = keep_indel, - keep_samples = keep_samples, - maf_cutoff = maf_cutoff, - mac_cutoff = mac_cutoff, - xvar_cutoff = xvar_cutoff, - imiss_cutoff = imiss_cutoff, - sumstat_path_list = sumstat_path_list, - column_file_path_list = column_file_path_list, - LD_meta_file_path_list = LD_meta_file_path_list, - conditions_list_sumstat = conditions_list_sumstat, - match_LD_sumstat = match_LD_sumstat, - n_samples = n_samples, - n_cases = n_cases, - n_controls = n_controls -) - -maf_cutoff = 0.01 -pip_cutoff_to_skip_ind = rep(0, length(phenotype_list)) -pip_cutoff_to_skip_sumstat = rep(0, length(sumstat_path_list)) -qc_method = "rss_qc" - -# run colocboost analysis -colocboost_results <- colocboost_analysis_pipeline( - region_data_combined, - maf_cutoff = maf_cutoff, - pip_cutoff_to_skip_ind = pip_cutoff_to_skip_ind, - pip_cutoff_to_skip_sumstat = pip_cutoff_to_skip_sumstat, - qc_method = qc_method, - xqtl_coloc = TRUE, - joint_gwas = TRUE, - separate_gwas = TRUE -) - -# visualize results for xQTL-only mode -colocboost_plot(colocboost_results$xqtl_coloc) - -# visualize results for joint GWAS mode -colocboost_plot(colocboost_results$joint_gwas) -# visualize results for separate GWAS mode -for (i in 1:length(colocboost_results$separate_gwas)) { - colocboost_plot(colocboost_results$separate_gwas[[i]]) -} +#### Comment out to avoid running this code here, as we do not have real data files in this example #### +#### Please check the example code below #### + +# # load in individual-level and sumstat data +# region_data_combined <- load_multitask_regional_data( +# region = region, +# genotype_list = genotype_list, +# phenotype_list = phenotype_list, +# covariate_list = covariate_list, +# conditions_list_individual = conditions_list_individual, +# match_geno_pheno = match_geno_pheno, +# association_window = association_window, +# region_name_col = region_name_col, +# extract_region_name = extract_region_name, +# keep_indel = keep_indel, +# keep_samples = keep_samples, +# maf_cutoff = maf_cutoff, +# mac_cutoff = mac_cutoff, +# xvar_cutoff = xvar_cutoff, +# imiss_cutoff = imiss_cutoff, +# sumstat_path_list = sumstat_path_list, +# column_file_path_list = column_file_path_list, +# LD_meta_file_path_list = LD_meta_file_path_list, +# conditions_list_sumstat = conditions_list_sumstat, +# match_LD_sumstat = match_LD_sumstat, +# n_samples = n_samples, +# n_cases = n_cases, +# n_controls = n_controls +# ) + +# maf_cutoff = 0.01 +# pip_cutoff_to_skip_ind = rep(0, length(phenotype_list)) +# pip_cutoff_to_skip_sumstat = rep(0, length(sumstat_path_list)) +# qc_method = "rss_qc" + +# # run colocboost analysis +# colocboost_results <- colocboost_analysis_pipeline( +# region_data_combined, +# maf_cutoff = maf_cutoff, +# pip_cutoff_to_skip_ind = pip_cutoff_to_skip_ind, +# pip_cutoff_to_skip_sumstat = pip_cutoff_to_skip_sumstat, +# qc_method = qc_method, +# xqtl_coloc = TRUE, +# joint_gwas = TRUE, +# separate_gwas = TRUE +# ) + +# # visualize results for xQTL-only mode +# colocboost_plot(colocboost_results$xqtl_coloc) + +# # visualize results for joint GWAS mode +# colocboost_plot(colocboost_results$joint_gwas) + +# # visualize results for separate GWAS mode +# for (i in 1:length(colocboost_results$separate_gwas)) { +# colocboost_plot(colocboost_results$separate_gwas[[i]]) +# } + + +#### End of comment out ``` \ No newline at end of file From b6c16f0f6995c912eb48bbf256da025ff67f3342 Mon Sep 17 00:00:00 2001 From: xueweic Date: Sat, 1 Nov 2025 12:47:07 +0000 Subject: [PATCH 09/13] Update documentation --- man/colocboost_plot.Rd | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/man/colocboost_plot.Rd b/man/colocboost_plot.Rd index fe155fc..8e5c7df 100644 --- a/man/colocboost_plot.Rd +++ b/man/colocboost_plot.Rd @@ -18,8 +18,9 @@ colocboost_plot( plot_focal_cos_outcome_only = FALSE, points_color = "grey80", cos_color = NULL, - add_vertical = FALSE, - add_vertical_idx = NULL, + add_highlight = FALSE, + add_highlight_idx = NULL, + add_highlight_style = "vertical_lines", outcome_names = NULL, plot_cols = 2, variant_coord = FALSE, @@ -62,9 +63,11 @@ colocboost_plot( \item{cos_color}{Optional custom colors for CoS.} -\item{add_vertical}{Logical, if TRUE adds vertical lines at specified positions, default is FALSE} +\item{add_highlight}{Logical, if TRUE adds vertical lines at specified positions, default is FALSE} -\item{add_vertical_idx}{Optional indices for vertical lines.} +\item{add_highlight_idx}{Optional indices for add_highlight variables.} + +\item{add_highlight_style}{Optional style of add_highlight variables, default is "vertical_lines", other choice is "star" - red star.} \item{outcome_names}{Optional vector of outcomes names for the subtitle of each figure. \code{outcome_names=NULL} for the outcome name shown in \code{data_info}.} From 7747844d130c7132d189fabbd25186b98211ff59 Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Sat, 1 Nov 2025 15:40:46 -0400 Subject: [PATCH 10/13] Update colocboost.R handle duplication variables - remove one of them --- R/colocboost.R | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/R/colocboost.R b/R/colocboost.R index 81e8490..6cb1721 100644 --- a/R/colocboost.R +++ b/R/colocboost.R @@ -247,7 +247,17 @@ colocboost <- function(X = NULL, Y = NULL, # individual data return(xx) }) } - + + # Remove duplicates and report: if duplicate columns of X + X <- lapply(seq_along(X), function(i) { + df <- X[[i]] + if (anyDuplicated(colnames(df))) { + message(paste("Removed duplicate columns from X matrix ", i)) + df[, !duplicated(colnames(df)), drop = FALSE] + } else { + df + } + }) keep_variable_individual <- lapply(X, colnames) if (!is.list(X) & !is.list(Y)) { warning("Error: Input X and Y must be the list containing genotype matrics and all phenotype vectors!") @@ -333,11 +343,19 @@ colocboost <- function(X = NULL, Y = NULL, # individual data warning("Error: effect_est and effect_se should be the same dimension! Please check!") return(NULL) } - variables <- rownames(effect_est) effect_est <- as.matrix(effect_est) effect_se <- as.matrix(effect_se) + variables <- rownames(effect_est) if (is.null(variables)) { variables <- paste0("variant_", 1:nrow(effect_est)) + } else { + # add - if duplciates occurs, only keep one + if (anyDuplicated(variables)) { + keep_idx <- !duplicated(variables) + variables <- variables[keep_idx] + effect_est <- effect_est[keep_idx, , drop = FALSE] + effect_se <- effect_se[keep_idx, , drop = FALSE] + } } sumstat <- list() for (sum_iy in 1:ncol(effect_est)) { @@ -388,6 +406,17 @@ colocboost <- function(X = NULL, Y = NULL, # individual data }) } } + + # Remove duplicates and report: if duplicate variant in summary statistics + sumstat <- lapply(seq_along(sumstat), function(i) { + xx <- sumstat[[i]] + if (anyDuplicated(xx$variant)) { + message(paste("Removed duplicate variants from sumstat", i)) + xx[!duplicated(xx$variant), , drop = FALSE] + } else { + xx + } + }) keep_variable_sumstat <- lapply(sumstat, function(xx) { xx$variant }) From d66edefc6abef08d16ded54ff84ab3f881fae33a Mon Sep 17 00:00:00 2001 From: Daniel Nachun Date: Tue, 4 Nov 2025 10:46:39 -0800 Subject: [PATCH 11/13] drop osx-64 --- .github/workflows/ci.yml | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ee5ae5a..b35dcd3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,39 +46,6 @@ jobs: - name: Check unit test code coverage run: pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml codecov - ci_osx-64: - name: osx-64 CI - runs-on: macos-13 - strategy: - fail-fast: false - matrix: - environment: ["r43", "r44"] - - steps: - - name: Checkout pull request branch - uses: actions/checkout@v5 - with: - fetch-depth: 0 - - - name: Create TOML from recipe - run: | - .github/workflows/create_toml_from_yaml.sh ${GITHUB_WORKSPACE} - mkdir /tmp/pixi - mv ${GITHUB_WORKSPACE}/pixi.toml /tmp/pixi - - - name: Setup pixi - uses: prefix-dev/setup-pixi@v0.9.2 - with: - manifest-path: /tmp/pixi/pixi.toml - - - name: Run unit tests - run: pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml devtools_test - - - name: Run R CMD CHECK - run: | - pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml build - pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml rcmdcheck - ci_osx-arm64: name: osx-arm64 CI runs-on: macos-14 From 4e4c0c42e74e6af24ac8933dfd117ed93375149f Mon Sep 17 00:00:00 2001 From: Daniel Nachun Date: Tue, 4 Nov 2025 11:03:39 -0800 Subject: [PATCH 12/13] actually skip vignettes --- vignettes/ColocBoost_Wrapper_Pipeline.Rmd | 185 ++++++++++------------ 1 file changed, 87 insertions(+), 98 deletions(-) diff --git a/vignettes/ColocBoost_Wrapper_Pipeline.Rmd b/vignettes/ColocBoost_Wrapper_Pipeline.Rmd index 49bc4c1..31ebaa9 100644 --- a/vignettes/ColocBoost_Wrapper_Pipeline.Rmd +++ b/vignettes/ColocBoost_Wrapper_Pipeline.Rmd @@ -63,7 +63,7 @@ outputs: The following example demonstrates how to set up input data with 3 phenotypes and 2 cohorts. The first cohort has 2 phenotypes and the second cohort has 1 phenotype. The first phenotype has 2 genes and the second phenotype has 1 gene. -```{r, data-loader-individual} +```{r, data-loader-individual, eval = FALSE} # Example of loading individual-level data region = "chr1:1000000-2000000" genotype_list = c("plink_cohort1.1", "plink_cohort1.2") @@ -85,26 +85,23 @@ imiss_cutoff = 0.9 # More advanced parameters see pecotmr::load_multitask_regional_data() -#### Comment out to avoid running this code here, as we do not have real data files in this example #### -# region_data_individual <- load_multitask_regional_data( -# region = region, -# genotype_list = genotype_list, -# phenotype_list = phenotype_list, -# covariate_list = covariate_list, -# conditions_list_individual = conditions_list_individual, -# match_geno_pheno = match_geno_pheno, -# association_window = association_window, -# region_name_col = region_name_col, -# extract_region_name = extract_region_name, -# keep_indel = keep_indel, -# keep_samples = keep_samples, -# maf_cutoff = maf_cutoff, -# mac_cutoff = mac_cutoff, -# xvar_cutoff = xvar_cutoff, -# imiss_cutoff = imiss_cutoff -# ) -#### End of comment out - +region_data_individual <- load_multitask_regional_data( + region = region, + genotype_list = genotype_list, + phenotype_list = phenotype_list, + covariate_list = covariate_list, + conditions_list_individual = conditions_list_individual, + match_geno_pheno = match_geno_pheno, + association_window = association_window, + region_name_col = region_name_col, + extract_region_name = extract_region_name, + keep_indel = keep_indel, + keep_samples = keep_samples, + maf_cutoff = maf_cutoff, + mac_cutoff = mac_cutoff, + xvar_cutoff = xvar_cutoff, + imiss_cutoff = imiss_cutoff +) ``` @@ -129,7 +126,7 @@ outputs: The following example demonstrates how to set up input data with 2 summary statistics and one LD reference. -```{r, data-loader-sumstat} +```{r, data-loader-sumstat, eval = FALSE} # Example of loading summary statistics sumstat_path_list = c("sumstat1.tsv.gz", "sumstat2.tsv.gz") column_file_path_list = c("column_mapping_sumstat1.yml", "column_mapping_sumstat2.yml") @@ -146,19 +143,17 @@ n_controls = c(0, 40000) # More advanced parameters see pecotmr::load_multitask_regional_data() -#### Comment out to avoid running this code here, as we do not have real data files in this example #### -# region_data_sumstat <- load_multitask_regional_data( -# sumstat_path_list = sumstat_path_list, -# column_file_path_list = column_file_path_list, -# LD_meta_file_path_list = LD_meta_file_path_list, -# conditions_list_sumstat = conditions_list_sumstat, -# match_LD_sumstat = match_LD_sumstat, -# association_window = association_window, -# n_samples = n_samples, -# n_cases = n_cases, -# n_controls = n_controls -# ) -#### End of comment out +region_data_sumstat <- load_multitask_regional_data( + sumstat_path_list = sumstat_path_list, + column_file_path_list = column_file_path_list, + LD_meta_file_path_list = LD_meta_file_path_list, + conditions_list_sumstat = conditions_list_sumstat, + match_LD_sumstat = match_LD_sumstat, + association_window = association_window, + n_samples = n_samples, + n_cases = n_cases, + n_controls = n_controls +) ``` @@ -226,66 +221,60 @@ Example: for sQTL, `list(type_pattern = ".*clu_(\\d+_[+-?]).*", valid_pattern = outputs: - **`colocboost_results`**: List of colocboost objects (with `xqtl_coloc`, `joint_gwas`, `separate_gwas`); Output of the `colocboost_analysis_pipeline` function. If the mode is not run, the corresponding element will be `NULL`. -```{r, colocboost-analysis} - -#### Comment out to avoid running this code here, as we do not have real data files in this example #### -#### Please check the example code below #### - -# # load in individual-level and sumstat data -# region_data_combined <- load_multitask_regional_data( -# region = region, -# genotype_list = genotype_list, -# phenotype_list = phenotype_list, -# covariate_list = covariate_list, -# conditions_list_individual = conditions_list_individual, -# match_geno_pheno = match_geno_pheno, -# association_window = association_window, -# region_name_col = region_name_col, -# extract_region_name = extract_region_name, -# keep_indel = keep_indel, -# keep_samples = keep_samples, -# maf_cutoff = maf_cutoff, -# mac_cutoff = mac_cutoff, -# xvar_cutoff = xvar_cutoff, -# imiss_cutoff = imiss_cutoff, -# sumstat_path_list = sumstat_path_list, -# column_file_path_list = column_file_path_list, -# LD_meta_file_path_list = LD_meta_file_path_list, -# conditions_list_sumstat = conditions_list_sumstat, -# match_LD_sumstat = match_LD_sumstat, -# n_samples = n_samples, -# n_cases = n_cases, -# n_controls = n_controls -# ) - -# maf_cutoff = 0.01 -# pip_cutoff_to_skip_ind = rep(0, length(phenotype_list)) -# pip_cutoff_to_skip_sumstat = rep(0, length(sumstat_path_list)) -# qc_method = "rss_qc" - -# # run colocboost analysis -# colocboost_results <- colocboost_analysis_pipeline( -# region_data_combined, -# maf_cutoff = maf_cutoff, -# pip_cutoff_to_skip_ind = pip_cutoff_to_skip_ind, -# pip_cutoff_to_skip_sumstat = pip_cutoff_to_skip_sumstat, -# qc_method = qc_method, -# xqtl_coloc = TRUE, -# joint_gwas = TRUE, -# separate_gwas = TRUE -# ) - -# # visualize results for xQTL-only mode -# colocboost_plot(colocboost_results$xqtl_coloc) - -# # visualize results for joint GWAS mode -# colocboost_plot(colocboost_results$joint_gwas) - -# # visualize results for separate GWAS mode -# for (i in 1:length(colocboost_results$separate_gwas)) { -# colocboost_plot(colocboost_results$separate_gwas[[i]]) -# } - - -#### End of comment out -``` \ No newline at end of file +```{r, colocboost-analysis, eval = FALSE} + +# load in individual-level and sumstat data +region_data_combined <- load_multitask_regional_data( + region = region, + genotype_list = genotype_list, + phenotype_list = phenotype_list, + covariate_list = covariate_list, + conditions_list_individual = conditions_list_individual, + match_geno_pheno = match_geno_pheno, + association_window = association_window, + region_name_col = region_name_col, + extract_region_name = extract_region_name, + keep_indel = keep_indel, + keep_samples = keep_samples, + maf_cutoff = maf_cutoff, + mac_cutoff = mac_cutoff, + xvar_cutoff = xvar_cutoff, + imiss_cutoff = imiss_cutoff, + sumstat_path_list = sumstat_path_list, + column_file_path_list = column_file_path_list, + LD_meta_file_path_list = LD_meta_file_path_list, + conditions_list_sumstat = conditions_list_sumstat, + match_LD_sumstat = match_LD_sumstat, + n_samples = n_samples, + n_cases = n_cases, + n_controls = n_controls +) + +maf_cutoff = 0.01 +pip_cutoff_to_skip_ind = rep(0, length(phenotype_list)) +pip_cutoff_to_skip_sumstat = rep(0, length(sumstat_path_list)) +qc_method = "rss_qc" + +# run colocboost analysis +colocboost_results <- colocboost_analysis_pipeline( + region_data_combined, + maf_cutoff = maf_cutoff, + pip_cutoff_to_skip_ind = pip_cutoff_to_skip_ind, + pip_cutoff_to_skip_sumstat = pip_cutoff_to_skip_sumstat, + qc_method = qc_method, + xqtl_coloc = TRUE, + joint_gwas = TRUE, + separate_gwas = TRUE +) + +# visualize results for xQTL-only mode +colocboost_plot(colocboost_results$xqtl_coloc) + +# visualize results for joint GWAS mode +colocboost_plot(colocboost_results$joint_gwas) + +# visualize results for separate GWAS mode +for (i in 1:length(colocboost_results$separate_gwas)) { + colocboost_plot(colocboost_results$separate_gwas[[i]]) +} +``` From 281e0763bfb11b44e1c0f54c7bd2d716a5303999 Mon Sep 17 00:00:00 2001 From: Daniel Nachun Date: Tue, 4 Nov 2025 11:27:23 -0800 Subject: [PATCH 13/13] actually skip vignettes --- vignettes/ColocBoost_Wrapper_Pipeline.Rmd | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/vignettes/ColocBoost_Wrapper_Pipeline.Rmd b/vignettes/ColocBoost_Wrapper_Pipeline.Rmd index 31ebaa9..c23bc2e 100644 --- a/vignettes/ColocBoost_Wrapper_Pipeline.Rmd +++ b/vignettes/ColocBoost_Wrapper_Pipeline.Rmd @@ -84,7 +84,7 @@ xvar_cutoff = 0 imiss_cutoff = 0.9 # More advanced parameters see pecotmr::load_multitask_regional_data() - +\dontrun{ region_data_individual <- load_multitask_regional_data( region = region, genotype_list = genotype_list, @@ -102,6 +102,7 @@ region_data_individual <- load_multitask_regional_data( xvar_cutoff = xvar_cutoff, imiss_cutoff = imiss_cutoff ) +} ``` @@ -142,7 +143,7 @@ n_controls = c(0, 40000) # More advanced parameters see pecotmr::load_multitask_regional_data() - +\dontrun{ region_data_sumstat <- load_multitask_regional_data( sumstat_path_list = sumstat_path_list, column_file_path_list = column_file_path_list, @@ -154,6 +155,7 @@ region_data_sumstat <- load_multitask_regional_data( n_cases = n_cases, n_controls = n_controls ) +} ``` @@ -224,6 +226,7 @@ outputs: ```{r, colocboost-analysis, eval = FALSE} # load in individual-level and sumstat data +\dontrun{ region_data_combined <- load_multitask_regional_data( region = region, genotype_list = genotype_list, @@ -249,6 +252,7 @@ region_data_combined <- load_multitask_regional_data( n_cases = n_cases, n_controls = n_controls ) +} maf_cutoff = 0.01 pip_cutoff_to_skip_ind = rep(0, length(phenotype_list))