From f294f57cfc8d9150d56ceeb4ce59ad375597e367 Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Fri, 12 Sep 2025 20:58:33 -0400
Subject: [PATCH 01/13] improve computational

Using profvis to check the computational time for each part of functions. Improve on several way:
1. check_pair_jkeach: instead of checking every pair, we check the upper tri first to reduce the burden.
2. Still some LD checking issue caused the LD-free mode.
3. change matrix multiplication using tcrossprod function
---
 R/colocboost_check_update_jk.R | 105 ++++++++++++++++++++++++---------
 R/colocboost_inference.R       |  13 ++--
 R/colocboost_init.R            |   2 +-
 R/colocboost_utils.R           |  20 +++++--
 4 files changed, 99 insertions(+), 41 deletions(-)

diff --git a/R/colocboost_check_update_jk.R b/R/colocboost_check_update_jk.R
index 623faa8..05390c8 100644
--- a/R/colocboost_check_update_jk.R
+++ b/R/colocboost_check_update_jk.R
@@ -66,15 +66,15 @@ boost_check_update_jk_nofocal <- function(cb_model, cb_model_para, cb_data) {
 
     # -- define jk and jk_each
     cor_vals_each <- do.call(cbind, lapply(model_update, function(cc) cc$correlation))
-    abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`)
+    # abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`)
+    abs_cor_vals_each <- abs(cor_vals_each) * rep(adj_dep, each = nrow(cor_vals_each))
     abs_cor_vals <- rowSums(abs_cor_vals_each)
 
-    jk <- which(abs_cor_vals == max(abs_cor_vals))
-    jk <- ifelse(length(jk) == 1, jk, sample(jk, 1))
-    jk_each <- apply(abs_cor_vals_each, 2, function(temp) {
-      jk_temp <- which(temp == max(temp))
-      return(ifelse(length(jk_temp) == 1, jk_temp, sample(jk_temp, 1)))
-    })
+    # jk <- which(abs_cor_vals == max(abs_cor_vals))
+    # jk <- ifelse(length(jk) == 1, jk, sample(jk, 1))
+    # jk_each <- apply(abs_cor_vals_each, 2, which.max)
+    jk <- which.max(abs_cor_vals)
+    jk_each <- sapply(1:ncol(abs_cor_vals_each), function(j) which.max(abs_cor_vals_each[, j]))
     update_jk[c(1, pos.update + 1)] <- c(jk, jk_each)
 
     
@@ -142,7 +142,7 @@ boost_check_update_jk_nofocal <- function(cb_model, cb_model_para, cb_data) {
         update_status[pos.update[true_update]] <- 1
         # - re-define jk_star
         abs_cor_vals_redefine <- rowSums(abs_cor_vals_each[, true_update, drop = FALSE])
-        jk_redefine <- which(abs_cor_vals_redefine == max(abs_cor_vals_redefine))
+        jk_redefine <- which.max(abs_cor_vals_redefine == max(abs_cor_vals_redefine))
         jk_redefine <- ifelse(length(jk_redefine) == 1, jk_redefine, sample(jk_redefine, 1))
         real_update_jk[pos.update[true_update]] <- jk_redefine
       } else {
@@ -351,11 +351,10 @@ boost_check_update_jk_focal <- function(cb_model, cb_model_para, cb_data,
 
     # -- define jk and jk_each
     cor_vals_each <- do.call(cbind, lapply(model_update, function(cc) as.vector(cc$correlation)))
-    abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`)
-    jk_each <- apply(abs_cor_vals_each, 2, function(temp) {
-      jk_temp <- which(temp == max(temp))
-      return(ifelse(length(jk_temp) == 1, jk_temp, sample(jk_temp, 1)))
-    })
+    # abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`)
+    abs_cor_vals_each <- abs(cor_vals_each) * rep(adj_dep, each = nrow(cor_vals_each))
+    # jk_each <- apply(abs_cor_vals_each, 2, which.max)
+    jk_each <- sapply(1:ncol(abs_cor_vals_each), function(j) which.max(abs_cor_vals_each[, j]))
     pp_focal <- which(pos.update == focal_outcome_idx)
     jk_focal <- jk_each[pp_focal]
 
@@ -503,8 +502,8 @@ get_LD_jk1_jk2 <- function(jk1, jk2,
     if (length(XtX) == 1){
       LD_temp <- 0
     } else {
-      jk1.remain <- which(remain_jk == jk1)
-      jk2.remain <- which(remain_jk == jk2)
+      jk1.remain <- match(jk1, remain_jk)
+      jk2.remain <- match(jk2, remain_jk)
       LD_temp <- XtX[jk1.remain, jk2.remain]
     }
   }
@@ -539,33 +538,83 @@ check_jk_jkeach <- function(jk, jk_each,
   return(judge)
 }
 
+# check_pair_jkeach <- function(jk_each,
+#                               pos.update,
+#                               model_update,
+#                               cb_data, X_dict,
+#                               jk_equiv_corr = 0.8,
+#                               jk_equiv_loglik = 0.001) {
+#   data_update <- cb_data$data[pos.update]
+#   # -- check if jk_i ~ jk_j
+#   change_each_pair <- matrix(FALSE, nrow = length(jk_each), ncol = length(jk_each))
+#   for (i in 1:length(jk_each)) {
+#     
+#     jk_i <- jk_each[i]
+#     change_log_jk_i <- model_update[[i]]$change_loglike[jk_i]
+#     for (j in 1:length(jk_each)) {
+#       if (j != i) {
+#         jk_j <- jk_each[j]
+# 
+#         if (!(jk_i %in% data_update[[i]]$variable_miss) & !(jk_j %in% data_update[[i]]$variable_miss)) {
+#           change_log_jk_j <- model_update[[i]]$change_loglike[jk_j]
+#           change_each <- abs(change_log_jk_i - change_log_jk_j)
+#           LD_temp <- get_LD_jk1_jk2(jk_i, jk_j,
+#             X = cb_data$data[[X_dict[i]]]$X,
+#             XtX = cb_data$data[[X_dict[i]]]$XtX,
+#             N = data_update[[i]]$N,
+#             remain_jk = setdiff(1:length(model_update[[i]]$res), data_update[[i]]$variable_miss)
+#           )
+#           change_each_pair[i, j] <- (change_each <= jk_equiv_loglik) & (abs(LD_temp) >= jk_equiv_corr)
+#         } else {
+#           change_each_pair[i, j] <- FALSE
+#         }
+#       } else {
+#         change_each_pair[i, j] <- FALSE
+#       }
+#     }
+#   }
+#   change_each_pair <- change_each_pair + t(change_each_pair)
+#   return(change_each_pair)
+# }
+
 check_pair_jkeach <- function(jk_each,
                               pos.update,
                               model_update,
                               cb_data, X_dict,
                               jk_equiv_corr = 0.8,
                               jk_equiv_loglik = 0.001) {
+  
+  detect_func <- function(idx, jk_i, jk_j){
+    change_log_jk_i <- model_update[[idx]]$change_loglike[jk_i]
+    change_log_jk_j <- model_update[[idx]]$change_loglike[jk_j]
+    change_each <- abs(change_log_jk_i - change_log_jk_j)
+    LD_temp <- get_LD_jk1_jk2(jk_i, jk_j,
+                              X = cb_data$data[[X_dict[idx]]]$X,
+                              XtX = cb_data$data[[X_dict[idx]]]$XtX,
+                              N = data_update[[idx]]$N,
+                              remain_jk = setdiff(1:length(model_update[[idx]]$res), data_update[[idx]]$variable_miss)
+    )
+    return((change_each <= jk_equiv_loglik) & (abs(LD_temp) >= jk_equiv_corr))
+  }
+  
   data_update <- cb_data$data[pos.update]
   # -- check if jk_i ~ jk_j
   change_each_pair <- matrix(FALSE, nrow = length(jk_each), ncol = length(jk_each))
   for (i in 1:length(jk_each)) {
-    
     jk_i <- jk_each[i]
-    change_log_jk_i <- model_update[[i]]$change_loglike[jk_i]
-    for (j in 1:length(jk_each)) {
+    for (j in i:length(jk_each)) {
       if (j != i) {
         jk_j <- jk_each[j]
-
         if (!(jk_i %in% data_update[[i]]$variable_miss) & !(jk_j %in% data_update[[i]]$variable_miss)) {
-          change_log_jk_j <- model_update[[i]]$change_loglike[jk_j]
-          change_each <- abs(change_log_jk_i - change_log_jk_j)
-          LD_temp <- get_LD_jk1_jk2(jk_i, jk_j,
-            X = cb_data$data[[X_dict[i]]]$X,
-            XtX = cb_data$data[[X_dict[i]]]$XtX,
-            N = data_update[[i]]$N,
-            remain_jk = setdiff(1:length(model_update[[i]]$res), data_update[[i]]$variable_miss)
-          )
-          change_each_pair[i, j] <- (change_each <= jk_equiv_loglik) & (abs(LD_temp) >= jk_equiv_corr)
+          change_each_pair[i, j] <- detect_func(i, jk_i, jk_j)
+          # if jk_i and jk_j are equivalent on dataset i, then we don't need to check dataset j
+          if ( !change_each_pair[i, j] ){
+            if (!(jk_i %in% data_update[[j]]$variable_miss) & !(jk_j %in% data_update[[j]]$variable_miss)) {
+              change_each_pair[j, i] <- detect_func(j, jk_i, jk_j)
+            } else {
+              change_each_pair[j, i] <- FALSE
+            }
+          }
         } else {
           change_each_pair[i, j] <- FALSE
         }
diff --git a/R/colocboost_inference.R b/R/colocboost_inference.R
index dc53908..e76d708 100644
--- a/R/colocboost_inference.R
+++ b/R/colocboost_inference.R
@@ -132,7 +132,8 @@ get_modularity <- function(Weight, B) {
   
   if (m == 0) return(0)
   
-  cate <- B %*% t(B)
+  # cate <- B %*% t(B)
+  cate <- tcrossprod(B)
   
   if (m_pos == 0 & m_neg == 0) return(0)
   
@@ -190,12 +191,12 @@ get_n_cluster <- function(hc, Sigma, m = ncol(Sigma), min_cluster_corr = 0.8) {
 #' @noRd
 w_purity <- function(weights, X = NULL, Xcorr = NULL, N = NULL, n = 100, coverage = 0.95,
                      min_abs_corr = 0.5, median_abs_corr = NULL, miss_idx = NULL) {
-  tmp <- apply(weights, 2, w_cs, coverage = coverage)
-  tmp_purity <- apply(tmp, 2, function(tt) {
-    pos <- which(tt == 1)
+  
+  tmp_purity <- apply(weights, 2, function(w) {
+    pos <- w_cs(w, coverage = coverage)
     # deal with missing snp here
     if (!is.null(Xcorr)) {
-      pos <- match(pos, setdiff(1:length(tmp), miss_idx))
+      pos <- match(pos, setdiff(1:length(w), miss_idx))
     }
     get_purity(pos, X = X, Xcorr = Xcorr, N = N, n = n)
   })
@@ -420,7 +421,7 @@ get_purity <- function(pos, X = NULL, Xcorr = NULL, N = NULL, n = 100) {
       corr[which(is.na(corr))] <- 0
       value <- abs(get_upper_tri(corr))
     } else {
-      if (sum(Xcorr) == 1){
+      if (length(Xcorr) == 1){
         value <- 0
       } else {
         Xcorr <- Xcorr # if (!is.null(N)) Xcorr/(N-1) else Xcorr
diff --git a/R/colocboost_init.R b/R/colocboost_init.R
index 8685b01..ea27ce7 100644
--- a/R/colocboost_init.R
+++ b/R/colocboost_init.R
@@ -619,7 +619,7 @@ process_sumstat <- function(Z, N, Var_y, SeBhat, ld_matrices, variant_lists, dic
     Z_extend[pos_target] <- current_z[pos_z]
 
     # Calculate submatrix for each unique entry (not duplicates)
-    if (sum(current_ld_matrix) == 1){
+    if (length(current_ld_matrix) == 1){
       ld_submatrix <- current_ld_matrix
     } else {
       ld_submatrix <- NULL
diff --git a/R/colocboost_utils.R b/R/colocboost_utils.R
index c4da011..0407633 100644
--- a/R/colocboost_utils.R
+++ b/R/colocboost_utils.R
@@ -426,15 +426,20 @@ get_max_profile <- function(cb_obj, check_null_max = 0.025,
 }
 
 
+# ### Function for check cs for each weight
+# w_cs <- function(weights, coverage = 0.95) {
+#   indices <- unlist(get_in_cos(weights, coverage = coverage))
+#   result <- rep(0, length(weights))
+#   result[indices] <- 1
+#   return(result)
+# }
+
 ### Function for check cs for each weight
 w_cs <- function(weights, coverage = 0.95) {
   indices <- unlist(get_in_cos(weights, coverage = coverage))
-  result <- rep(0, length(weights))
-  result[indices] <- 1
-  return(result)
+  return(indices)
 }
 
-
 #' Pure R implementation (fallback)
 #' @noRd
 get_merge_ordered_with_indices <- function(vector_list) {
@@ -505,12 +510,14 @@ get_merge_ordered_with_indices <- function(vector_list) {
   # Step 4: Topological sort using Kahn's algorithm
   # Start with nodes that have no incoming edges
   queue <- all_elements[sapply(all_elements, function(elem) in_degree[[elem]] == 0)]
-  result <- character()
+  result <- list()
+  result_idx <- 1
   while (length(queue) > 0) {
     # Take the first element from the queue
     current <- queue[1]
     queue <- queue[-1]
-    result <- c(result, current)
+    result[[result_idx]] <- current  # List assignment is fast
+    result_idx <- result_idx + 1
     # Process all neighbors (elements that must come after current)
     neighbors <- graph[[current]]
     for (next_elem in neighbors) {
@@ -520,6 +527,7 @@ get_merge_ordered_with_indices <- function(vector_list) {
       }
     }
   }
+  result <- unlist(result)
   # Step 5: Check for cycles and use fallback if needed
   if (length(result) != n_elements) {
     # Different variable orders detected - use priority-based fallback

From 4fd1b757c44604d9e732f5eba673150bd25b5e43 Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Fri, 12 Sep 2025 21:36:13 -0400
Subject: [PATCH 02/13] Enhanced Highlight Functionality with Flexible Styling
 Options

The highlighting feature has been updated to provide greater flexibility in visualization options. In previous versions, additional highlight variables in the colocboost plot could only be displayed as vertical lines by setting add_vertical = TRUE.
The updated colocboost_plot function now offers more versatile highlighting capabilities through the new add_highlight = TRUE parameter, which allows users to display highlight variables as either vertical lines or red star markers, providing improved customization for data visualization needs.
---
 R/colocboost_plot.R                           | 148 ++++++++++--------
 tests/testthat/test_plot.R                    |   4 +-
 tests/testthat/test_utils.R                   |   6 +-
 vignettes/Visualization_ColocBoost_Output.Rmd |  11 +-
 4 files changed, 94 insertions(+), 75 deletions(-)

diff --git a/R/colocboost_plot.R b/R/colocboost_plot.R
index 90141d6..1f0f7c1 100644
--- a/R/colocboost_plot.R
+++ b/R/colocboost_plot.R
@@ -14,8 +14,9 @@
 #' @param plot_focal_cos_outcome_only Logical, if TRUE only plots colocalization including at least on colocalized outcome with focal outcome, default is FALSE.
 #' @param points_color Background color for non-colocalized variables, default is "grey80".
 #' @param cos_color Optional custom colors for CoS.
-#' @param add_vertical Logical, if TRUE adds vertical lines at specified positions, default is FALSE
-#' @param add_vertical_idx Optional indices for vertical lines.
+#' @param add_highlight Logical, if TRUE adds vertical lines at specified positions, default is FALSE
+#' @param add_highlight_style Optional style of add_highlight variables, default is "vertical_lines", other choice is "star" - red star.
+#' @param add_highlight_idx Optional indices for add_highlight variables.
 #' @param outcome_names Optional vector of outcomes names for the subtitle of each figure. \code{outcome_names=NULL} for the outcome name shown in \code{data_info}.
 #' @param plot_cols Number of columns in the plot grid, default is 2. If you have many colocalization. please consider increasing this.
 #' @param variant_coord Logical, if TRUE uses variant coordinates on x-axis, default is FALSE. This is required the variable names including position information.
@@ -78,8 +79,9 @@ colocboost_plot <- function(cb_output, y = "log10p",
                             plot_focal_cos_outcome_only = FALSE,
                             points_color = "grey80",
                             cos_color = NULL,
-                            add_vertical = FALSE,
-                            add_vertical_idx = NULL,
+                            add_highlight = FALSE,
+                            add_highlight_idx = NULL,
+                            add_highlight_style = "vertical_lines",
                             outcome_names = NULL,
                             plot_cols = 2,
                             variant_coord = FALSE,
@@ -102,33 +104,34 @@ colocboost_plot <- function(cb_output, y = "log10p",
   if (!inherits(cb_output, "colocboost")) {
     stop("Input of colocboost_plot must be a 'colocboost' object!")
   }
-
+  
   # get cb_plot_input data from colocboost results
   cb_plot_input <- get_input_plot(cb_output,
-    plot_cos_idx = plot_cos_idx,
-    variant_coord = variant_coord,
-    outcome_names = outcome_names,
-    plot_focal_only = plot_focal_only,
-    plot_focal_cos_outcome_only = plot_focal_cos_outcome_only,
-    show_cos_to_uncoloc = show_cos_to_uncoloc,
-    show_cos_to_uncoloc_idx = show_cos_to_uncoloc_idx,
-    show_cos_to_uncoloc_outcome = show_cos_to_uncoloc_outcome,
-    plot_ucos = plot_ucos, plot_ucos_idx = plot_ucos_idx
+                                  plot_cos_idx = plot_cos_idx,
+                                  variant_coord = variant_coord,
+                                  outcome_names = outcome_names,
+                                  plot_focal_only = plot_focal_only,
+                                  plot_focal_cos_outcome_only = plot_focal_cos_outcome_only,
+                                  show_cos_to_uncoloc = show_cos_to_uncoloc,
+                                  show_cos_to_uncoloc_idx = show_cos_to_uncoloc_idx,
+                                  show_cos_to_uncoloc_outcome = show_cos_to_uncoloc_outcome,
+                                  plot_ucos = plot_ucos, plot_ucos_idx = plot_ucos_idx
   )
   # get initial set up of plot
   cb_plot_init <- plot_initial(cb_plot_input,
-    y = y, points_color = points_color, cos_color = cos_color,
-    ylim_each = ylim_each, title_specific = title_specific,
-    outcome_legend_pos = outcome_legend_pos, outcome_legend_size = outcome_legend_size,
-    cos_legend_pos = cos_legend_pos, plot_ucos = plot_ucos,
-    show_variable = show_variable, lab_style = lab_style, axis_style = axis_style,
-    title_style = title_style, ...
+                               y = y, points_color = points_color, cos_color = cos_color,
+                               ylim_each = ylim_each, title_specific = title_specific,
+                               outcome_legend_pos = outcome_legend_pos, outcome_legend_size = outcome_legend_size,
+                               cos_legend_pos = cos_legend_pos, plot_ucos = plot_ucos,
+                               show_variable = show_variable, lab_style = lab_style, axis_style = axis_style,
+                               title_style = title_style, ...
   )
-
+  
   colocboost_plot_basic <- function(cb_plot_input, cb_plot_init,
                                     outcome_idx = NULL, plot_all_outcome = FALSE,
                                     grange = NULL, plot_cols = 2,
-                                    add_vertical = FALSE, add_vertical_idx = NULL,
+                                    add_highlight = FALSE, add_highlight_idx = NULL,
+                                    add_highlight_style = "vertical_lines",
                                     show_top_variables = TRUE,
                                     ...) {
     args <- list(...)
@@ -147,12 +150,12 @@ colocboost_plot <- function(cb_output, y = "log10p",
     args$font.lab <- cb_plot_init$lab_face
     # - change position
     cb_plot_init$outcome_legend_pos <- switch(cb_plot_init$outcome_legend_pos,
-      "right" = 4,
-      "left" = 2,
-      "top" = 3,
-      "bottom" = 1
+                                              "right" = 4,
+                                              "left" = 2,
+                                              "top" = 3,
+                                              "bottom" = 1
     )
-
+    
     # - begin plotting
     coloc_cos <- cb_plot_input$cos
     outcomes <- cb_plot_input$outcomes
@@ -229,12 +232,7 @@ colocboost_plot <- function(cb_output, y = "log10p",
               cex = cb_plot_init$outcome_legend_size, font = 1
         )
       }
-      if (add_vertical) {
-        for (iii in 1:length(add_vertical_idx)) {
-          abline(v = add_vertical_idx[iii], col = "#E31A1C", lwd = 1.5, lty = "dashed")
-        }
-      }
-
+      
       # mark variables in CoS to colocalized outcomes
       if (!is.null(coloc_cos)) {
         n.coloc <- length(coloc_cos)
@@ -242,7 +240,7 @@ colocboost_plot <- function(cb_output, y = "log10p",
         if (length(y)==1){coloc_index = lapply(coloc_index, function(i) 1 )}
         legend_text <- list(col = vector())
         legend_text$col <- head(cb_plot_init$col, n.coloc)
-
+        
         # check which coloc set for this outcome
         p.coloc <- sapply(coloc_index, function(idx) sum(idx == iy) != 0)
         p.coloc <- which(p.coloc)
@@ -261,7 +259,7 @@ colocboost_plot <- function(cb_output, y = "log10p",
             points(x_hits, y_hits, pch = 21, bg = legend_text$col[i.cs], col = "#E31A1C", cex = 3, lwd = 3)
           }
         }
-
+        
         # mark variables in CoS to uncolocalized outcomes
         uncoloc <- cb_plot_input$uncoloc
         if (!is.null(uncoloc)) {
@@ -276,8 +274,8 @@ colocboost_plot <- function(cb_output, y = "log10p",
               x0 <- intersect(args$x, cs)
               y1 <- args$y[match(x0, args$x)]
               points(x0, y1,
-                pch = 4, col = adjustcolor(legend_text$col[uncoloc$cos_idx_to_uncoloc[i.uncoloc]], alpha.f = 0.4),
-                cex = 1.5, lwd = 1.5
+                     pch = 4, col = adjustcolor(legend_text$col[uncoloc$cos_idx_to_uncoloc[i.uncoloc]], alpha.f = 0.4),
+                     cex = 1.5, lwd = 1.5
               )
               shape_col <- c(shape_col, adjustcolor(legend_text$col[uncoloc$cos_idx_to_uncoloc[i.uncoloc]], alpha.f = 1))
               texts_col <- c(texts_col, adjustcolor(legend_text$col[uncoloc$cos_idx_to_uncoloc[i.uncoloc]], alpha.f = 0.8))
@@ -294,35 +292,51 @@ colocboost_plot <- function(cb_output, y = "log10p",
           if (length(texts) == 0) {
             next
           }
-
+          
           # Get current plot area coordinates
           usr <- par("usr")
           x_pos <- usr[1] + cb_plot_init$cos_legend_pos[1] * (usr[2] - usr[1])  # 5% from left edge
           y_pos <- usr[3] + cb_plot_init$cos_legend_pos[2] * (usr[4] - usr[3])  # 50% from bottom edge
           legend(x = x_pos, y = y_pos, texts,
-            bty = "n", col = shape_col, text.col = texts_col,
-            cex = 1.5, pt.cex = 1.5, pch = 4, x.intersp = cos_legend_pos[3], y.intersp = cos_legend_pos[4]
+                 bty = "n", col = shape_col, text.col = texts_col,
+                 cex = 1.5, pt.cex = 1.5, pch = 4, x.intersp = cos_legend_pos[3], y.intersp = cos_legend_pos[4]
           )
         }
       }
+      if (add_highlight) {
+        for (iii in 1:length(add_highlight_idx)) {
+          if ( !(add_highlight_style %in% c("vertical_lines", "star")) ){
+            message("add_highlight_style is not 'vertical_lines' and 'star', defaulting to 'vertical_lines'")
+            add_highlight_style = "vertical_lines"
+          }
+          if (add_highlight_style == "vertical_lines"){
+            abline(v = add_highlight_idx[iii], col = "#E31A1C", lwd = 1.5, lty = "dashed")
+          } else if (add_highlight_style == "star") {
+            points(add_highlight_idx[iii], args$y[match(add_highlight_idx[iii],args$x)], 
+                   pch = 8, col = "#E31A1C", lwd = 2, lty = "dashed", cex = 2.5)
+          } 
+          
+        }
+      }
     }
     if (!is.null(cb_plot_init$title)) {
       mtext(cb_plot_init$title,
-        side = 3, line = 0, outer = TRUE,
-        cex = cb_plot_init$title_size, font = cb_plot_init$title_face
+            side = 3, line = 0, outer = TRUE,
+            cex = cb_plot_init$title_size, font = cb_plot_init$title_face
       )
     }
     return(invisible())
   }
-
+  
   colocboost_plot_basic(cb_plot_input, cb_plot_init,
-    grange = grange,
-    outcome_idx = outcome_idx, 
-    plot_all_outcome = plot_all_outcome,
-    plot_cols = plot_cols,
-    add_vertical = add_vertical, add_vertical_idx = add_vertical_idx,
-    show_top_variables = show_top_variables,
-    ...
+                        grange = grange,
+                        outcome_idx = outcome_idx, 
+                        plot_all_outcome = plot_all_outcome,
+                        plot_cols = plot_cols,
+                        add_highlight = add_highlight, add_highlight_idx = add_highlight_idx,
+                        add_highlight_style = add_highlight_style,
+                        show_top_variables = show_top_variables,
+                        ...
   )
 }
 
@@ -339,7 +353,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL,
                            show_cos_to_uncoloc_outcome = NULL,
                            plot_ucos = FALSE,
                            plot_ucos_idx = NULL) {
-
+  
   # check ucos exists
   if (plot_ucos && !"ucos_details" %in% names(cb_output)) {
     warning(
@@ -354,7 +368,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL,
     cb_output$data_info$outcome_info$outcome_names <- outcome_names
     names(cb_output$data_info$z) <- outcome_names
   }
-
+  
   # extract results from colocboost
   analysis_outcome <- cb_output$data_info$outcome_info$outcome_names
   focal_outcome_idx <- which(cb_output$data_info$outcome_info$is_focal)
@@ -371,7 +385,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL,
   Z <- cb_output$data_info$z
   coef <- cb_output$data_info$coef
   vcp <- list(as.numeric(cb_output$vcp))
-
+  
   # if finemapping
   if (cb_output$data_info$n_outcomes == 1) {
     cb_output$cos_details$cos$cos_variables <- cb_output$ucos_details$ucos$ucos_variables
@@ -406,7 +420,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL,
       }
     })
     ncos <- length(cb_output$cos_details$cos$cos_index)
-
+    
     select_cs <- 1:ncos
     if (!is.null(plot_cos_idx)) {
       if (length(setdiff(plot_cos_idx, c(1:ncos))) != 0) {
@@ -477,7 +491,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL,
     "coloc_index" = coloc_index,
     "select_cos" = select_cs
   )
-
+  
   # check plot uncolocalizaed confidence sets from ucos_details
   if (plot_ucos & cb_output$data_info$n_outcomes > 1){
     ucos_details <- cb_output$ucos_details
@@ -528,7 +542,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL,
       plot_input$ucos_cos_int_weights <- combined
     }
   }
-
+  
   # check if plot cos to uncolocalized outcome
   # use the updated coloc_cos and related components from plot_input if available
   coloc_cos <- plot_input$cos
@@ -613,7 +627,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL,
     )
     plot_input$uncoloc <- uncoloc
   }
-
+  
   class(plot_input) <- "colocboost"
   return(plot_input)
 }
@@ -634,11 +648,11 @@ plot_initial <- function(cb_plot_input, y = "log10p",
                          ...) {
   args <- list(...)
   if (!exists("pch", args)) args$pch <- 16
-
+  
   # - set background point color and cos color pools
   args$bg <- points_color
   if (is.null(cos_color)) {
-
+    
     # cos_color <- c(
     #   "#377EB8", "#E69F00", "#33A02C", "#984EA3", "#F46D43",
     #   "#A65628", "#1F4E79", "#B2182B", "#D73027", "#F781BF",
@@ -688,12 +702,12 @@ plot_initial <- function(cb_plot_input, y = "log10p",
       "#D5D8DC", "#BDC3C7", "#95A5A6", "#7F8C8D", "#17202A",
       "#808080", "#A9A9A9", "#C0C0C0", "#696969", "#778899"
     )
-
-
+    
+    
     # cos_color <- c("#1F70A9", "#33A02C", "#CAB2D6", "#EA7827")
   }
   args$col <- cos_color[cb_plot_input$select_cos]
-
+  
   # - set data and x-lab and y-lab
   if (y == "log10p") {
     plot_data <- lapply(cb_plot_input$Zscores, function(z) {
@@ -729,12 +743,12 @@ plot_initial <- function(cb_plot_input, y = "log10p",
   if (!exists("ylab", args)) args$ylab <- ylab
   args$lab_size <- as.numeric(lab_style[1])
   args$lab_face <- lab_style[2]
-
+  
   # - set title format
   args$title <- title_specific
   args$title_size <- as.numeric(title_style[1])
   args$title_face <- title_style[2]
-
+  
   # - set x-axis and y-axis
   args$x <- cb_plot_input$x$pos
   args$y <- plot_data
@@ -744,7 +758,7 @@ plot_initial <- function(cb_plot_input, y = "log10p",
   }
   args$axis_size <- as.numeric(axis_style[1])
   args$axis_face <- axis_style[2]
-
+  
   # - set ylim for each subfigure
   if (exists("ylim", args)) {
     ymax <- rep(args$ylim[2], length(args$y))
@@ -776,7 +790,7 @@ plot_initial <- function(cb_plot_input, y = "log10p",
   }
   args$ymax <- ymax
   args$ymin <- ymin
-
+  
   # - set legend text position and format
   args$outcome_legend_pos <- outcome_legend_pos
   args$outcome_legend_size <- outcome_legend_size
@@ -788,7 +802,7 @@ plot_initial <- function(cb_plot_input, y = "log10p",
     args$outcome_legend_angle <- 0
   }
   args$cos_legend_pos <- cos_legend_pos
-
+  
   return(args)
 }
 
diff --git a/tests/testthat/test_plot.R b/tests/testthat/test_plot.R
index 775d3fe..d49aace 100644
--- a/tests/testthat/test_plot.R
+++ b/tests/testthat/test_plot.R
@@ -176,7 +176,9 @@ test_that("colocboost_plot handles layout options", {
 test_that("colocboost_plot handles additional visualization options", {
   
   # Test with vertical line options
-  expect_error(suppressWarnings(colocboost_plot(cb_res, add_vertical = TRUE, add_vertical_idx = c(5, 10))), NA)
+  expect_error(suppressWarnings(colocboost_plot(cb_res, add_highlight = TRUE, add_highlight_idx = c(5, 10))), NA)
+  expect_error(suppressWarnings(colocboost_plot(cb_res, add_highlight = TRUE, add_highlight_idx = c(5, 10), add_highlight_style = "vertical_lines")), NA)
+  expect_error(suppressWarnings(colocboost_plot(cb_res, add_highlight = TRUE, add_highlight_idx = c(5, 10), add_highlight_style = "star")), NA)
   
   # Test with show_top_variables option
   expect_error(suppressWarnings(colocboost_plot(cb_res, show_top_variables = TRUE)), NA)
diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R
index a622540..d3d480d 100644
--- a/tests/testthat/test_utils.R
+++ b/tests/testthat/test_utils.R
@@ -85,12 +85,12 @@ test_that("w_cs correctly identifies confidence set for weight vector", {
   result <- w_cs(w, coverage = 0.8)
   
   # Expected result for 80% coverage: w[1] + w[2] = 0.8, so first 2 elements should be 1
-  expected <- c(1, 1, 0, 0)
+  expected <- c(1,2)
   expect_equal(result, expected)
   
   # Test with different coverage
   result2 <- w_cs(w, coverage = 0.9)
-  expected2 <- c(1, 1, 1, 1)  # First 4 elements cover 90% since 3 and 4 with the same 0.1 weight 
+  expected2 <- c(1,2,3,4)  # First 4 elements cover 90% since 3 and 4 with the same 0.1 weight 
   expect_equal(result2, expected2)
 })
 
@@ -490,7 +490,7 @@ test_that("get_merge_ordered_with_indices handles conflicting variable orders co
 test_that("get_merge_ordered_with_indices handles edge cases", {
   
   # Test with NULL inputs
-  expect_equal(get_merge_ordered_with_indices(list()), character(0))
+  expect_equal(get_merge_ordered_with_indices(list()), NULL)
   
   # Test with single vector
   expect_equal(get_merge_ordered_with_indices(list(c("A", "B", "C"))), c("A", "B", "C"))
diff --git a/vignettes/Visualization_ColocBoost_Output.Rmd b/vignettes/Visualization_ColocBoost_Output.Rmd
index 1ffd7d9..8ed3e5b 100644
--- a/vignettes/Visualization_ColocBoost_Output.Rmd
+++ b/vignettes/Visualization_ColocBoost_Output.Rmd
@@ -94,9 +94,11 @@ There are three options available for plotting the CoS variants to uncolocalized
 colocboost_plot(res, show_cos_to_uncoloc = TRUE)
 ```
 
-### 2.4. Plot with an added vertical line
+### 2.4. Plot with added highlight points
 
-You can add a vertical line to the plot by setting `add_vertical = TRUE` and `add_vertical_idx = **`. This will add a vertical line at the specified index.
+You can highlight specific variants in the plot by setting `add_highlight = TRUE` and `add_highlight_idx = **`. 
+This will add red dashed vertical lines (defult `add_highlight_style = "vertical_lines"`) at the specified index you want to highlight. 
+Alternatively, you can use `add_highlight_style = "star"` to change the highlight style to the red star for the specified variants.
 For example, to add a vertical line at true causal variants, you can set `add_vertical_idx = unique(unlist(Ind_5traits$true_effect_variants))`.
 Following plot also shows the top variants.
 
@@ -104,8 +106,9 @@ Following plot also shows the top variants.
 ```{r vertical-plot}
 colocboost_plot(
   res, show_top_variables = TRUE, 
-  add_vertical = TRUE, 
-  add_vertical_idx = unique(unlist(Ind_5traits$true_effect_variants))
+  add_highlight = TRUE, 
+  add_highlight_idx = unique(unlist(Ind_5traits$true_effect_variants)),
+  add_highlight_style = "star"
 )
 ```
 

From 220f5f7f9783a0872685df5b4e32162c8c0ef1aa Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Sun, 14 Sep 2025 19:47:17 -0400
Subject: [PATCH 03/13] Optimized LD calculation for Outcome-Specific Model
 Updates

To enhance computational efficiency, we pre-calculate LD values for each pair of outcome-specific best parameter updates at each boost update.
---
 R/colocboost_check_update_jk.R | 100 +++++++++++++--------------------
 R/colocboost_init.R            |   3 +-
 R/colocboost_one_causal.R      |  10 ++--
 R/colocboost_update.R          |   3 +-
 R/colocboost_workhorse.R       |   3 +-
 5 files changed, 49 insertions(+), 70 deletions(-)

diff --git a/R/colocboost_check_update_jk.R b/R/colocboost_check_update_jk.R
index 05390c8..d449b03 100644
--- a/R/colocboost_check_update_jk.R
+++ b/R/colocboost_check_update_jk.R
@@ -538,66 +538,54 @@ check_jk_jkeach <- function(jk, jk_each,
   return(judge)
 }
 
-# check_pair_jkeach <- function(jk_each,
-#                               pos.update,
-#                               model_update,
-#                               cb_data, X_dict,
-#                               jk_equiv_corr = 0.8,
-#                               jk_equiv_loglik = 0.001) {
-#   data_update <- cb_data$data[pos.update]
-#   # -- check if jk_i ~ jk_j
-#   change_each_pair <- matrix(FALSE, nrow = length(jk_each), ncol = length(jk_each))
-#   for (i in 1:length(jk_each)) {
-#     
-#     jk_i <- jk_each[i]
-#     change_log_jk_i <- model_update[[i]]$change_loglike[jk_i]
-#     for (j in 1:length(jk_each)) {
-#       if (j != i) {
-#         jk_j <- jk_each[j]
-# 
-#         if (!(jk_i %in% data_update[[i]]$variable_miss) & !(jk_j %in% data_update[[i]]$variable_miss)) {
-#           change_log_jk_j <- model_update[[i]]$change_loglike[jk_j]
-#           change_each <- abs(change_log_jk_i - change_log_jk_j)
-#           LD_temp <- get_LD_jk1_jk2(jk_i, jk_j,
-#             X = cb_data$data[[X_dict[i]]]$X,
-#             XtX = cb_data$data[[X_dict[i]]]$XtX,
-#             N = data_update[[i]]$N,
-#             remain_jk = setdiff(1:length(model_update[[i]]$res), data_update[[i]]$variable_miss)
-#           )
-#           change_each_pair[i, j] <- (change_each <= jk_equiv_loglik) & (abs(LD_temp) >= jk_equiv_corr)
-#         } else {
-#           change_each_pair[i, j] <- FALSE
-#         }
-#       } else {
-#         change_each_pair[i, j] <- FALSE
-#       }
-#     }
-#   }
-#   change_each_pair <- change_each_pair + t(change_each_pair)
-#   return(change_each_pair)
-# }
-
 check_pair_jkeach <- function(jk_each,
                               pos.update,
                               model_update,
                               cb_data, X_dict,
                               jk_equiv_corr = 0.8,
                               jk_equiv_loglik = 0.001) {
-  
-  detect_func <- function(idx, jk_i, jk_j){
+
+
+  #' @importFrom stats cor
+  get_LD_jk_each <- function(jk_each,
+                             X = NULL, XtX = NULL, N = NULL,
+                             remain_jk = NULL) {
+    if (!is.null(X)) {
+      LD_temp <- suppressWarnings({
+        get_cormat(X[, jk_each])
+      })
+      LD_temp[which(is.na(LD_temp))] <- 0
+      # LD_temp <- LD_temp[1, 2]
+    } else if (!is.null(XtX)) {
+      if (length(XtX) == 1){
+        LD_temp <- matrix(0, length(jk_each), length(jk_each))
+      } else {
+        jk.remain <- match(jk_each, remain_jk)
+        LD_temp <- XtX[jk.remain, jk.remain]
+        LD_temp[which(is.na(LD_temp))] <- 0
+      }
+    }
+    return(LD_temp)
+  }
+
+  detect_func <- function(idx, LD_all, jk_i, jk_j, i, j){
     change_log_jk_i <- model_update[[idx]]$change_loglike[jk_i]
     change_log_jk_j <- model_update[[idx]]$change_loglike[jk_j]
     change_each <- abs(change_log_jk_i - change_log_jk_j)
-    LD_temp <- get_LD_jk1_jk2(jk_i, jk_j,
-                              X = cb_data$data[[X_dict[idx]]]$X,
-                              XtX = cb_data$data[[X_dict[idx]]]$XtX,
-                              N = data_update[[idx]]$N,
-                              remain_jk = setdiff(1:length(model_update[[idx]]$res), data_update[[idx]]$variable_miss)
-    )
+    LD_temp <- LD_all[[idx]][i, j]
     return((change_each <= jk_equiv_loglik) & (abs(LD_temp) >= jk_equiv_corr))
   }
-  
+
   data_update <- cb_data$data[pos.update]
+  LD_all <- lapply(1:length(jk_each), function(idx){
+    get_LD_jk_each(jk_each,
+                   X = cb_data$data[[X_dict[idx]]]$X,
+                   XtX = cb_data$data[[X_dict[idx]]]$XtX,
+                   N = data_update[[idx]]$N,
+                   remain_jk = setdiff(1:length(model_update[[idx]]$res), data_update[[idx]]$variable_miss)
+    )
+  })
+
   # -- check if jk_i ~ jk_j
   change_each_pair <- matrix(FALSE, nrow = length(jk_each), ncol = length(jk_each))
   for (i in 1:length(jk_each)) {
@@ -605,18 +593,10 @@ check_pair_jkeach <- function(jk_each,
     for (j in i:length(jk_each)) {
       if (j != i) {
         jk_j <- jk_each[j]
-        if (!(jk_i %in% data_update[[i]]$variable_miss) & !(jk_j %in% data_update[[i]]$variable_miss)) {
-          change_each_pair[i, j] <- detect_func(i, jk_i, jk_j)
-          # if jk_i and jk_j are equivalent on dataset i, then we don't need to check dataset j
-          if ( !change_each_pair[i, j] ){
-            if (!(jk_i %in% data_update[[j]]$variable_miss) & !(jk_j %in% data_update[[j]]$variable_miss)) {
-              change_each_pair[j, i] <- detect_func(j, jk_i, jk_j)
-            } else {
-              change_each_pair[j, i] <- FALSE
-            }
-          }
-        } else {
-          change_each_pair[i, j] <- FALSE
+        change_each_pair[i, j] <- detect_func(idx = i, LD_all, jk_i, jk_j, i, j)
+        # if jk_i and jk_j are equivalent on dataset i, then we don't need to check dataset j
+        if ( !change_each_pair[i, j] ){
+          change_each_pair[j, i] <- detect_func(idx = j, LD_all, jk_i, jk_j, i, j)
         }
       } else {
         change_each_pair[i, j] <- FALSE
diff --git a/R/colocboost_init.R b/R/colocboost_init.R
index ea27ce7..edca937 100644
--- a/R/colocboost_init.R
+++ b/R/colocboost_init.R
@@ -163,7 +163,8 @@ colocboost_init_model <- function(cb_data,
     tmp <- list(
       "res" = NULL,
       "beta" = rep(0, P),
-      "weights_path" = c(),
+      # "weights_path" = c(),
+      "weights_path" = list(),
       "profile_loglike_each" = NULL,
       "obj_path" = 999999,
       "obj_single" = 999999,
diff --git a/R/colocboost_one_causal.R b/R/colocboost_one_causal.R
index 0ed96b5..09692d5 100644
--- a/R/colocboost_one_causal.R
+++ b/R/colocboost_one_causal.R
@@ -79,9 +79,8 @@ colocboost_one_iteration <- function(cb_model, cb_model_para, cb_data) {
   cb_model_para[rm_elements] <- NULL
   for (i in 1:length(cb_model)) {
     cb_model[[i]]$obj_path <- as.numeric(unlist(cb_model[[i]]$obj_path[-1]))
-  }
-  for (i in 1:length(cb_model)) {
     cb_model[[i]]$obj_single <- as.numeric(unlist(cb_model[[i]]$obj_single[-1]))
+    cb_model[[i]]$weights_path <- do.call(rbind, cb_model[[i]]$weights_path)
   }
 
   cb_obj <- list("cb_data" = cb_data, "cb_model" = cb_model, "cb_model_para" = cb_model_para)
@@ -238,7 +237,7 @@ colocboost_diagLD <- function(cb_model, cb_model_para, cb_data) {
         cb_model_para, 
         cb_data
       )
-      weights <- rbind(weights, cb_model_tmp[[iy]]$weights_path)
+      weights <- rbind(weights, unlist(cb_model_tmp[[iy]]$weights_path))
     }
     ###### overlap weights
     overlap_pair <- check_pair_overlap(weights, coverage = 0.95)
@@ -307,11 +306,10 @@ colocboost_diagLD <- function(cb_model, cb_model_para, cb_data) {
   cb_model_para[rm_elements] <- NULL
   for (i in 1:length(cb_model)) {
     cb_model[[i]]$obj_path <- as.numeric(unlist(cb_model[[i]]$obj_path[-1]))
-  }
-  for (i in 1:length(cb_model)) {
     cb_model[[i]]$obj_single <- as.numeric(unlist(cb_model[[i]]$obj_single[-1]))
+    cb_model[[i]]$weights_path <- do.call(rbind, cb_model[[i]]$weights_path)
   }
-
+  
   cb_obj <- list("cb_data" = cb_data, "cb_model" = cb_model, "cb_model_para" = cb_model_para)
   class(cb_obj) <- "colocboost"
   return(cb_obj)
diff --git a/R/colocboost_update.R b/R/colocboost_update.R
index 6100828..734bcb1 100644
--- a/R/colocboost_update.R
+++ b/R/colocboost_update.R
@@ -74,7 +74,8 @@ colocboost_update <- function(cb_model, cb_model_para, cb_data) {
     # weights <- adj_dep * ld_feature / scaling_factor * exp_abs_cor / sum(exp_abs_cor)
     weights <- adj_dep * obj_ld * exp_abs_cor / sum(exp_abs_cor)
     weights <- weights / sum(weights)
-    cb_model[[i]]$weights_path <- rbind(cb_model[[i]]$weights_path, as.vector(weights))
+    # cb_model[[i]]$weights_path <- rbind(cb_model[[i]]$weights_path, as.vector(weights))
+    cb_model[[i]]$weights_path <- c(cb_model[[i]]$weights_path, list(as.vector(weights)))
     ########## END: MAIN CALCULATION ###################
 
 
diff --git a/R/colocboost_workhorse.R b/R/colocboost_workhorse.R
index 7583eee..5bfe645 100644
--- a/R/colocboost_workhorse.R
+++ b/R/colocboost_workhorse.R
@@ -236,9 +236,8 @@ colocboost_workhorse <- function(cb_data,
     cb_model_para$num_updates <- m
     for (i in 1:length(cb_model)) {
       cb_model[[i]]$obj_path <- as.numeric(unlist(cb_model[[i]]$obj_path[-1]))
-    }
-    for (i in 1:length(cb_model)) {
       cb_model[[i]]$obj_single <- as.numeric(unlist(cb_model[[i]]$obj_single[-1]))
+      cb_model[[i]]$weights_path <- do.call(rbind, cb_model[[i]]$weights_path)
     }
 
     if (m == M) {

From 040b1783fefbf6110f00788b65b13c20fdc13e41 Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Tue, 30 Sep 2025 15:04:05 -0400
Subject: [PATCH 04/13] Fix purity issue for merged CoS

For the merged CoS, we recalibrate the CoS variants based on the merged weight. We should recalculate purity for the merged CoS and filter unpured merged CoS.
---
 R/colocboost_assemble.R |  3 +++
 R/colocboost_utils.R    | 52 ++++++++++++++++++++++++++++++++++++++---
 2 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/R/colocboost_assemble.R b/R/colocboost_assemble.R
index aba5405..e849872 100644
--- a/R/colocboost_assemble.R
+++ b/R/colocboost_assemble.R
@@ -209,6 +209,9 @@ colocboost_assemble <- function(cb_obj,
     # - colocalization results
     cb_obj$cb_model_para$weight_fudge_factor <- weight_fudge_factor
     cb_obj$cb_model_para$coverage <- coverage
+    cb_obj$cb_model_para$min_abs_corr <- min_abs_corr
+    cb_obj$cb_model_para$median_abs_corr <- median_abs_corr
+    cb_obj$cb_model_para$n_purity <- n_purity
     cos_results <- get_cos_details(cb_obj, coloc_out = past_out$cos$cos, data_info = data_info)
     cb_output <- list(
       "vcp" = cos_results$vcp,
diff --git a/R/colocboost_utils.R b/R/colocboost_utils.R
index 0407633..1a13eeb 100644
--- a/R/colocboost_utils.R
+++ b/R/colocboost_utils.R
@@ -665,8 +665,6 @@ get_cos_details <- function(cb_obj, coloc_out, data_info = NULL) {
     })
     int_weight <- lapply(cos_weights, get_integrated_weight, weight_fudge_factor = cb_obj$cb_model_para$weight_fudge_factor)
     names(int_weight) <- names(cos_weights) <- colocset_names
-    vcp <- as.vector(1 - apply(1 - do.call(cbind, int_weight), 1, prod))
-    names(vcp) <- data_info$variables
 
     # - resummary results
     cos_re_idx <- lapply(int_weight, function(w) {
@@ -676,7 +674,55 @@ get_cos_details <- function(cb_obj, coloc_out, data_info = NULL) {
       data_info$variables[idx]
     })
     coloc_csets <- list("cos_index" = cos_re_idx, "cos_variables" = cos_re_var)
-
+    
+    # - recalculate purity
+    purity <- vector(mode = "list", length = length(cos_re_idx))
+    for (ee in 1:length(cos_re_idx)) {
+      coloc_t <- coloc_outcome_index[[ee]]
+      p_tmp <- c()
+      for (i3 in coloc_t) {
+        pos <- cos_re_idx[[ee]]
+        X_dict <- cb_obj$cb_data$dict[i3]
+        if (!is.null(cb_obj$cb_data$data[[X_dict]]$XtX)) {
+          pos <- match(pos, setdiff(1:cb_obj$cb_model_para$P, cb_obj$cb_data$data[[i3]]$variable_miss))
+        }
+        tmp <- matrix(get_purity(pos,
+                                 X = cb_obj$cb_data$data[[X_dict]]$X,
+                                 Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX,
+                                 N = cb_obj$cb_data$data[[i3]]$N, n = cb_obj$cb_model_para$n_purity
+        ), 1, 3)
+        p_tmp <- rbind(p_tmp, tmp)
+      }
+      purity[[ee]] <- matrix(apply(p_tmp, 2, max), 1, 3)
+    }
+    purity_all <- do.call(rbind, purity)
+    purity_all <- as.data.frame(purity_all)
+    colnames(purity_all) <- c("min_abs_corr", "mean_abs_corr", "median_abs_corr")
+    coloc_out$purity <- purity_all
+    if (is.null(cb_obj$cb_model_para$median_abs_corr)) {
+      is_pure <- which(purity_all[, 1] >= cb_obj$cb_model_para$min_abs_corr)
+    } else {
+      is_pure <- which(purity_all[, 1] >= cb_obj$cb_model_para$min_abs_corr | purity_all[, 3] >= cb_obj$cb_model_para$median_abs_corr)
+    }
+    if (length(is_pure)==0){
+      coloc_results <- NULL
+      vcp <- NULL
+      return(list("cos_results" = coloc_results, "vcp" = vcp))
+    } else if (length(is_pure)!=length(cos_re_idx)){
+      int_weight = int_weight[is_pure]
+      coloc_csets <- lapply(coloc_csets, function(cs) cs[is_pure])
+      coloc_outcomes <- lapply(coloc_outcomes, function(cs) cs[is_pure])
+      normalization_evidence <- normalization_evidence[is_pure]
+      npc <- npc[is_pure]
+      cos_min_npc_outcome <- cos_min_npc_outcome[is_pure]
+      cos_weights <- cos_weights[is_pure]
+      coloc_out$purity <- purity_all[is_pure,,drop = FALSE]
+      colocset_names <- colocset_names[is_pure]
+    }
+    vcp <- as.vector(1 - apply(1 - do.call(cbind, int_weight), 1, prod))
+    names(vcp) <- data_info$variables
+    
+    
     # - hits variables in each csets
     coloc_hits <- coloc_hits_variablenames <- coloc_hits_names <- c()
     for (i in 1:length(int_weight)) {

From 5c19c59e237191c9f4452749970e330a6cf24413 Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Thu, 2 Oct 2025 02:13:57 -0400
Subject: [PATCH 05/13] minor fix

fix error when focal outcome does not pass multiple testing correction at the first place.
---
 R/colocboost_assemble.R  | 2 +-
 R/colocboost_utils.R     | 2 +-
 R/colocboost_workhorse.R | 3 +++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/R/colocboost_assemble.R b/R/colocboost_assemble.R
index e849872..4ddd67d 100644
--- a/R/colocboost_assemble.R
+++ b/R/colocboost_assemble.R
@@ -58,7 +58,7 @@ colocboost_assemble <- function(cb_obj,
     )
     # - save model and all coloc and single information for diagnostic
     if (output_level != 1) {
-      tmp <- get_full_output(cb_obj = cb_obj, past_out = NULL, variables = NULL)
+      tmp <- get_full_output(cb_obj = cb_obj, past_out = NULL, variables = data_info$variables)
       if (output_level == 2) {
         cb_output <- c(cb_output, list("ucos_details" = tmp$ucos_detials))
         cb_output <- cb_output[c("cos_summary", "vcp", "cos_details", "data_info", "model_info", "ucos_details")]
diff --git a/R/colocboost_utils.R b/R/colocboost_utils.R
index 1a13eeb..0d57e32 100644
--- a/R/colocboost_utils.R
+++ b/R/colocboost_utils.R
@@ -893,7 +893,7 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output
   cb_model_para <- cb_obj$cb_model_para
 
   ## - obtain the order of variables based on the variables names if it has position information
-  if (!is.null(variables)) {
+  if (is.null(variables)) {
     ordered <- 1:length(cb_obj$cb_model_para$variables)
   } else {
     ordered <- match(variables, cb_obj$cb_model_para$variables)
diff --git a/R/colocboost_workhorse.R b/R/colocboost_workhorse.R
index 5bfe645..4462680 100644
--- a/R/colocboost_workhorse.R
+++ b/R/colocboost_workhorse.R
@@ -103,6 +103,9 @@ colocboost_workhorse <- function(cb_data,
   }
 
   if (sum(cb_model_para$update_y == 1) == 0) {
+    for (i in 1:length(cb_model)){
+      cb_model[[i]]$weights_path <- matrix(0, ncol = cb_model_para$P)
+    }
     cb_obj <- list("cb_data" = cb_data, "cb_model" = cb_model, "cb_model_para" = cb_model_para)
     class(cb_obj) <- "colocboost"
     return(cb_obj)

From 8c0dad2856c15f6224d6a5347f259d9e635177bb Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Wed, 22 Oct 2025 13:40:06 -0400
Subject: [PATCH 06/13] Update colocboost_utils.R

fix bug for choosing top variable in ucos details
---
 R/colocboost_utils.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/colocboost_utils.R b/R/colocboost_utils.R
index 0d57e32..9bfa3fd 100644
--- a/R/colocboost_utils.R
+++ b/R/colocboost_utils.R
@@ -987,7 +987,7 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output
         # - hits variables in each csets
         cs_hits <- sapply(1:length(specific_w), function(jj) {
           inw <- specific_w[[jj]]
-          sample(which(inw == max(inw)), 1)
+          which(inw == max(inw))[1]
         })
         cs_hits_variablenames <- sapply(cs_hits, function(ch) variables[ch])
         specific_cs_hits <- data.frame("top_index" = cs_hits, "top_variables" = cs_hits_variablenames) # save

From 37b33ad3ccfe67a259074a0649457d07060029fb Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Wed, 22 Oct 2025 14:05:32 -0400
Subject: [PATCH 07/13] fineboost one iteration issue

fix fine-mapping one iteration issue
---
 R/colocboost_assemble.R  | 1 +
 R/colocboost_inference.R | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/R/colocboost_assemble.R b/R/colocboost_assemble.R
index 4ddd67d..94cbbe5 100644
--- a/R/colocboost_assemble.R
+++ b/R/colocboost_assemble.R
@@ -79,6 +79,7 @@ colocboost_assemble <- function(cb_obj,
     } else if (cb_obj$cb_model_para$model_used == "one_causal"){
       # fixme later
       check_null_max <- check_null_max * check_null
+      check_null_max_ucos <- check_null_max_ucos * check_null
     }
     cb_obj <- get_max_profile(cb_obj, check_null_max = check_null_max, 
                               check_null_max_ucos = check_null_max_ucos, 
diff --git a/R/colocboost_inference.R b/R/colocboost_inference.R
index e76d708..882fc7a 100644
--- a/R/colocboost_inference.R
+++ b/R/colocboost_inference.R
@@ -374,7 +374,12 @@ check_null_post <- function(cb_obj,
   }
   if (!weaker_effect) {
     check_cs_change <- cs_change
-    check_null_tmp <- sapply(1:cb_obj$cb_model_para$L, function(j) cb_obj$cb_model[[j]]$check_null_max)
+    if (cb_obj$cb_model_para$L == 1){
+      check_null_max_tmp <- cb_obj$cb_model[[j]]$check_null_max_ucos
+    } else {
+      check_null_max_tmp <- cb_obj$cb_model[[j]]$check_null_max
+    }
+    check_null_tmp <- sapply(1:cb_obj$cb_model_para$L, function(j) check_null_max_tmp)
   } else {
     check_null_tmp <- rep(check_null, cb_obj$cb_model_para$L)
   }

From c0878aa26d7fe2e8b38f1d94b890c7de386d3823 Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Sat, 1 Nov 2025 08:18:40 -0400
Subject: [PATCH 08/13] Update ColocBoost_Wrapper_Pipeline.Rmd

Comment out to avoid running this code here, as we do not have real data files in examples
---
 vignettes/ColocBoost_Wrapper_Pipeline.Rmd | 173 ++++++++++++----------
 1 file changed, 92 insertions(+), 81 deletions(-)

diff --git a/vignettes/ColocBoost_Wrapper_Pipeline.Rmd b/vignettes/ColocBoost_Wrapper_Pipeline.Rmd
index 6b5a5a4..49bc4c1 100644
--- a/vignettes/ColocBoost_Wrapper_Pipeline.Rmd
+++ b/vignettes/ColocBoost_Wrapper_Pipeline.Rmd
@@ -85,23 +85,25 @@ imiss_cutoff = 0.9
 
 # More advanced parameters see pecotmr::load_multitask_regional_data()
 
-region_data_individual <- load_multitask_regional_data(
-    region = region,
-    genotype_list = genotype_list,
-    phenotype_list = phenotype_list,
-    covariate_list = covariate_list,
-    conditions_list_individual = conditions_list_individual,
-    match_geno_pheno = match_geno_pheno,
-    association_window = association_window,
-    region_name_col = region_name_col,
-    extract_region_name = extract_region_name,
-    keep_indel = keep_indel,
-    keep_samples = keep_samples,
-    maf_cutoff = maf_cutoff,
-    mac_cutoff = mac_cutoff,
-    xvar_cutoff = xvar_cutoff,
-    imiss_cutoff = imiss_cutoff
-)
+#### Comment out to avoid running this code here, as we do not have real data files in this example ####
+# region_data_individual <- load_multitask_regional_data(
+#     region = region,
+#     genotype_list = genotype_list,
+#     phenotype_list = phenotype_list,
+#     covariate_list = covariate_list,
+#     conditions_list_individual = conditions_list_individual,
+#     match_geno_pheno = match_geno_pheno,
+#     association_window = association_window,
+#     region_name_col = region_name_col,
+#     extract_region_name = extract_region_name,
+#     keep_indel = keep_indel,
+#     keep_samples = keep_samples,
+#     maf_cutoff = maf_cutoff,
+#     mac_cutoff = mac_cutoff,
+#     xvar_cutoff = xvar_cutoff,
+#     imiss_cutoff = imiss_cutoff
+# )
+#### End of comment out
 
 ```
 
@@ -144,17 +146,19 @@ n_controls = c(0, 40000)
 
 # More advanced parameters see pecotmr::load_multitask_regional_data()
 
-region_data_sumstat <- load_multitask_regional_data(
-    sumstat_path_list = sumstat_path_list,
-    column_file_path_list = column_file_path_list,
-    LD_meta_file_path_list = LD_meta_file_path_list,
-    conditions_list_sumstat = conditions_list_sumstat,
-    match_LD_sumstat = match_LD_sumstat,
-    association_window = association_window,
-    n_samples = n_samples,
-    n_cases = n_cases,
-    n_controls = n_controls
-)
+#### Comment out to avoid running this code here, as we do not have real data files in this example ####
+# region_data_sumstat <- load_multitask_regional_data(
+#     sumstat_path_list = sumstat_path_list,
+#     column_file_path_list = column_file_path_list,
+#     LD_meta_file_path_list = LD_meta_file_path_list,
+#     conditions_list_sumstat = conditions_list_sumstat,
+#     match_LD_sumstat = match_LD_sumstat,
+#     association_window = association_window,
+#     n_samples = n_samples,
+#     n_cases = n_cases,
+#     n_controls = n_controls
+# )
+#### End of comment out
 ```
 
 
@@ -223,58 +227,65 @@ outputs:
 - **`colocboost_results`**: List of colocboost objects (with `xqtl_coloc`, `joint_gwas`, `separate_gwas`); Output of the `colocboost_analysis_pipeline` function. If the mode is not run, the corresponding element will be `NULL`.
 
 ```{r, colocboost-analysis}
-# load in individual-level and sumstat data
-region_data_combined <- load_multitask_regional_data(
-    region = region,
-    genotype_list = genotype_list,
-    phenotype_list = phenotype_list,
-    covariate_list = covariate_list,
-    conditions_list_individual = conditions_list_individual,
-    match_geno_pheno = match_geno_pheno,
-    association_window = association_window,
-    region_name_col = region_name_col,
-    extract_region_name = extract_region_name,
-    keep_indel = keep_indel,
-    keep_samples = keep_samples,
-    maf_cutoff = maf_cutoff,
-    mac_cutoff = mac_cutoff,
-    xvar_cutoff = xvar_cutoff,
-    imiss_cutoff = imiss_cutoff,
-    sumstat_path_list = sumstat_path_list,
-    column_file_path_list = column_file_path_list,
-    LD_meta_file_path_list = LD_meta_file_path_list,
-    conditions_list_sumstat = conditions_list_sumstat,
-    match_LD_sumstat = match_LD_sumstat,
-    n_samples = n_samples,
-    n_cases = n_cases,
-    n_controls = n_controls
-)
-
-maf_cutoff = 0.01
-pip_cutoff_to_skip_ind = rep(0, length(phenotype_list))
-pip_cutoff_to_skip_sumstat = rep(0, length(sumstat_path_list))
-qc_method = "rss_qc" 
-
-# run colocboost analysis
-colocboost_results <- colocboost_analysis_pipeline(
-    region_data_combined,
-    maf_cutoff = maf_cutoff, 
-    pip_cutoff_to_skip_ind = pip_cutoff_to_skip_ind,
-    pip_cutoff_to_skip_sumstat = pip_cutoff_to_skip_sumstat,
-    qc_method = qc_method,
-    xqtl_coloc = TRUE,
-    joint_gwas = TRUE,
-    separate_gwas = TRUE
-)
-
-# visualize results for xQTL-only mode
-colocboost_plot(colocboost_results$xqtl_coloc)
-
-# visualize results for joint GWAS mode
-colocboost_plot(colocboost_results$joint_gwas)
 
-# visualize results for separate GWAS mode
-for (i in 1:length(colocboost_results$separate_gwas)) {
-    colocboost_plot(colocboost_results$separate_gwas[[i]])
-}
+#### Comment out to avoid running this code here, as we do not have real data files in this example ####
+#### Please check the example code below ####
+
+# # load in individual-level and sumstat data
+# region_data_combined <- load_multitask_regional_data(
+#     region = region,
+#     genotype_list = genotype_list,
+#     phenotype_list = phenotype_list,
+#     covariate_list = covariate_list,
+#     conditions_list_individual = conditions_list_individual,
+#     match_geno_pheno = match_geno_pheno,
+#     association_window = association_window,
+#     region_name_col = region_name_col,
+#     extract_region_name = extract_region_name,
+#     keep_indel = keep_indel,
+#     keep_samples = keep_samples,
+#     maf_cutoff = maf_cutoff,
+#     mac_cutoff = mac_cutoff,
+#     xvar_cutoff = xvar_cutoff,
+#     imiss_cutoff = imiss_cutoff,
+#     sumstat_path_list = sumstat_path_list,
+#     column_file_path_list = column_file_path_list,
+#     LD_meta_file_path_list = LD_meta_file_path_list,
+#     conditions_list_sumstat = conditions_list_sumstat,
+#     match_LD_sumstat = match_LD_sumstat,
+#     n_samples = n_samples,
+#     n_cases = n_cases,
+#     n_controls = n_controls
+# )
+
+# maf_cutoff = 0.01
+# pip_cutoff_to_skip_ind = rep(0, length(phenotype_list))
+# pip_cutoff_to_skip_sumstat = rep(0, length(sumstat_path_list))
+# qc_method = "rss_qc" 
+
+# # run colocboost analysis
+# colocboost_results <- colocboost_analysis_pipeline(
+#     region_data_combined,
+#     maf_cutoff = maf_cutoff, 
+#     pip_cutoff_to_skip_ind = pip_cutoff_to_skip_ind,
+#     pip_cutoff_to_skip_sumstat = pip_cutoff_to_skip_sumstat,
+#     qc_method = qc_method,
+#     xqtl_coloc = TRUE,
+#     joint_gwas = TRUE,
+#     separate_gwas = TRUE
+# )
+
+# # visualize results for xQTL-only mode
+# colocboost_plot(colocboost_results$xqtl_coloc)
+
+# # visualize results for joint GWAS mode
+# colocboost_plot(colocboost_results$joint_gwas)
+
+# # visualize results for separate GWAS mode
+# for (i in 1:length(colocboost_results$separate_gwas)) {
+#     colocboost_plot(colocboost_results$separate_gwas[[i]])
+# }
+
+
+#### End of comment out
 ```
\ No newline at end of file

From b6c16f0f6995c912eb48bbf256da025ff67f3342 Mon Sep 17 00:00:00 2001
From: xueweic <xueweic@users.noreply.github.com>
Date: Sat, 1 Nov 2025 12:47:07 +0000
Subject: [PATCH 09/13] Update documentation

---
 man/colocboost_plot.Rd | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/man/colocboost_plot.Rd b/man/colocboost_plot.Rd
index fe155fc..8e5c7df 100644
--- a/man/colocboost_plot.Rd
+++ b/man/colocboost_plot.Rd
@@ -18,8 +18,9 @@ colocboost_plot(
   plot_focal_cos_outcome_only = FALSE,
   points_color = "grey80",
   cos_color = NULL,
-  add_vertical = FALSE,
-  add_vertical_idx = NULL,
+  add_highlight = FALSE,
+  add_highlight_idx = NULL,
+  add_highlight_style = "vertical_lines",
   outcome_names = NULL,
   plot_cols = 2,
   variant_coord = FALSE,
@@ -62,9 +63,11 @@ colocboost_plot(
 
 \item{cos_color}{Optional custom colors for CoS.}
 
-\item{add_vertical}{Logical, if TRUE adds vertical lines at specified positions, default is FALSE}
+\item{add_highlight}{Logical, if TRUE adds vertical lines at specified positions, default is FALSE}
 
-\item{add_vertical_idx}{Optional indices for vertical lines.}
+\item{add_highlight_idx}{Optional indices for add_highlight variables.}
+
+\item{add_highlight_style}{Optional style of add_highlight variables, default is "vertical_lines", other choice is "star" - red star.}
 
 \item{outcome_names}{Optional vector of outcomes names for the subtitle of each figure. \code{outcome_names=NULL} for the outcome name shown in \code{data_info}.}
 

From 7747844d130c7132d189fabbd25186b98211ff59 Mon Sep 17 00:00:00 2001
From: xuewei cao <36172337+xueweic@users.noreply.github.com>
Date: Sat, 1 Nov 2025 15:40:46 -0400
Subject: [PATCH 10/13] Update colocboost.R

handle duplication variables - remove one of them
---
 R/colocboost.R | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/R/colocboost.R b/R/colocboost.R
index 81e8490..6cb1721 100644
--- a/R/colocboost.R
+++ b/R/colocboost.R
@@ -247,7 +247,17 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
         return(xx)
       })
     }
-
+    
+    # Remove duplicates and report: if duplicate columns of X
+    X <- lapply(seq_along(X), function(i) {
+      df <- X[[i]]
+      if (anyDuplicated(colnames(df))) {
+        message(paste("Removed duplicate columns from X matrix ", i))
+        df[, !duplicated(colnames(df)), drop = FALSE]
+      } else {
+        df
+      }
+    })
     keep_variable_individual <- lapply(X, colnames)
     if (!is.list(X) & !is.list(Y)) {
       warning("Error: Input X and Y must be the list containing genotype matrics and all phenotype vectors!")
@@ -333,11 +343,19 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
         warning("Error: effect_est and effect_se should be the same dimension! Please check!")
         return(NULL)
       }
-      variables <- rownames(effect_est)
       effect_est <- as.matrix(effect_est)
       effect_se <- as.matrix(effect_se)
+      variables <- rownames(effect_est)
       if (is.null(variables)) {
         variables <- paste0("variant_", 1:nrow(effect_est))
+      } else {
+        # add - if duplciates occurs, only keep one
+        if (anyDuplicated(variables)) {
+          keep_idx <- !duplicated(variables)
+          variables <- variables[keep_idx]
+          effect_est <- effect_est[keep_idx, , drop = FALSE]
+          effect_se <- effect_se[keep_idx, , drop = FALSE]
+        }
       }
       sumstat <- list()
       for (sum_iy in 1:ncol(effect_est)) {
@@ -388,6 +406,17 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
         })
       }
     }
+    
+    # Remove duplicates and report: if duplicate variant in summary statistics
+    sumstat <- lapply(seq_along(sumstat), function(i) {
+      xx <- sumstat[[i]]
+      if (anyDuplicated(xx$variant)) {
+        message(paste("Removed duplicate variants from sumstat", i))
+        xx[!duplicated(xx$variant), , drop = FALSE]
+      } else {
+        xx
+      }
+    })
     keep_variable_sumstat <- lapply(sumstat, function(xx) {
       xx$variant
     })

From d66edefc6abef08d16ded54ff84ab3f881fae33a Mon Sep 17 00:00:00 2001
From: Daniel Nachun <daniel.nachun@gmail.com>
Date: Tue, 4 Nov 2025 10:46:39 -0800
Subject: [PATCH 11/13] drop osx-64

---
 .github/workflows/ci.yml | 33 ---------------------------------
 1 file changed, 33 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ee5ae5a..b35dcd3 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -46,39 +46,6 @@ jobs:
       - name: Check unit test code coverage
         run: pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml codecov
 
-  ci_osx-64:
-    name: osx-64 CI
-    runs-on: macos-13
-    strategy:
-      fail-fast: false
-      matrix:
-        environment: ["r43", "r44"]
-
-    steps:
-      - name: Checkout pull request branch
-        uses: actions/checkout@v5
-        with:
-          fetch-depth: 0
-
-      - name: Create TOML from recipe
-        run: |
-          .github/workflows/create_toml_from_yaml.sh ${GITHUB_WORKSPACE}
-          mkdir /tmp/pixi
-          mv ${GITHUB_WORKSPACE}/pixi.toml /tmp/pixi
-
-      - name: Setup pixi
-        uses: prefix-dev/setup-pixi@v0.9.2
-        with:
-          manifest-path: /tmp/pixi/pixi.toml
-
-      - name: Run unit tests
-        run: pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml devtools_test
-
-      - name: Run R CMD CHECK
-        run: |
-          pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml build
-          pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml rcmdcheck
-  
   ci_osx-arm64:
     name: osx-arm64 CI
     runs-on: macos-14

From 4e4c0c42e74e6af24ac8933dfd117ed93375149f Mon Sep 17 00:00:00 2001
From: Daniel Nachun <daniel.nachun@gmail.com>
Date: Tue, 4 Nov 2025 11:03:39 -0800
Subject: [PATCH 12/13] actually skip vignettes

---
 vignettes/ColocBoost_Wrapper_Pipeline.Rmd | 185 ++++++++++------------
 1 file changed, 87 insertions(+), 98 deletions(-)

diff --git a/vignettes/ColocBoost_Wrapper_Pipeline.Rmd b/vignettes/ColocBoost_Wrapper_Pipeline.Rmd
index 49bc4c1..31ebaa9 100644
--- a/vignettes/ColocBoost_Wrapper_Pipeline.Rmd
+++ b/vignettes/ColocBoost_Wrapper_Pipeline.Rmd
@@ -63,7 +63,7 @@ outputs:
 
 The following example demonstrates how to set up input data with 3 phenotypes and 2 cohorts. The first cohort has 2 phenotypes and the second cohort has 1 phenotype. The first phenotype has 2 genes and the second phenotype has 1 gene.
 
-```{r, data-loader-individual}
+```{r, data-loader-individual, eval = FALSE}
 # Example of loading individual-level data
 region = "chr1:1000000-2000000"
 genotype_list = c("plink_cohort1.1", "plink_cohort1.2")
@@ -85,26 +85,23 @@ imiss_cutoff = 0.9
 
 # More advanced parameters see pecotmr::load_multitask_regional_data()
 
-#### Comment out to avoid running this code here, as we do not have real data files in this example ####
-# region_data_individual <- load_multitask_regional_data(
-#     region = region,
-#     genotype_list = genotype_list,
-#     phenotype_list = phenotype_list,
-#     covariate_list = covariate_list,
-#     conditions_list_individual = conditions_list_individual,
-#     match_geno_pheno = match_geno_pheno,
-#     association_window = association_window,
-#     region_name_col = region_name_col,
-#     extract_region_name = extract_region_name,
-#     keep_indel = keep_indel,
-#     keep_samples = keep_samples,
-#     maf_cutoff = maf_cutoff,
-#     mac_cutoff = mac_cutoff,
-#     xvar_cutoff = xvar_cutoff,
-#     imiss_cutoff = imiss_cutoff
-# )
-#### End of comment out
-
+region_data_individual <- load_multitask_regional_data(
+    region = region,
+    genotype_list = genotype_list,
+    phenotype_list = phenotype_list,
+    covariate_list = covariate_list,
+    conditions_list_individual = conditions_list_individual,
+    match_geno_pheno = match_geno_pheno,
+    association_window = association_window,
+    region_name_col = region_name_col,
+    extract_region_name = extract_region_name,
+    keep_indel = keep_indel,
+    keep_samples = keep_samples,
+    maf_cutoff = maf_cutoff,
+    mac_cutoff = mac_cutoff,
+    xvar_cutoff = xvar_cutoff,
+    imiss_cutoff = imiss_cutoff
+)
 ```
 
 
@@ -129,7 +126,7 @@ outputs:
 
 The following example demonstrates how to set up input data with 2 summary statistics and one LD reference.
 
-```{r, data-loader-sumstat}
+```{r, data-loader-sumstat, eval = FALSE}
 # Example of loading summary statistics
 sumstat_path_list = c("sumstat1.tsv.gz", "sumstat2.tsv.gz")
 column_file_path_list = c("column_mapping_sumstat1.yml", "column_mapping_sumstat2.yml")
@@ -146,19 +143,17 @@ n_controls = c(0, 40000)
 
 # More advanced parameters see pecotmr::load_multitask_regional_data()
 
-#### Comment out to avoid running this code here, as we do not have real data files in this example ####
-# region_data_sumstat <- load_multitask_regional_data(
-#     sumstat_path_list = sumstat_path_list,
-#     column_file_path_list = column_file_path_list,
-#     LD_meta_file_path_list = LD_meta_file_path_list,
-#     conditions_list_sumstat = conditions_list_sumstat,
-#     match_LD_sumstat = match_LD_sumstat,
-#     association_window = association_window,
-#     n_samples = n_samples,
-#     n_cases = n_cases,
-#     n_controls = n_controls
-# )
-#### End of comment out
+region_data_sumstat <- load_multitask_regional_data(
+    sumstat_path_list = sumstat_path_list,
+    column_file_path_list = column_file_path_list,
+    LD_meta_file_path_list = LD_meta_file_path_list,
+    conditions_list_sumstat = conditions_list_sumstat,
+    match_LD_sumstat = match_LD_sumstat,
+    association_window = association_window,
+    n_samples = n_samples,
+    n_cases = n_cases,
+    n_controls = n_controls
+)
 ```
 
 
@@ -226,66 +221,60 @@ Example: for sQTL, `list(type_pattern = ".*clu_(\\d+_[+-?]).*", valid_pattern =
 outputs:
 - **`colocboost_results`**: List of colocboost objects (with `xqtl_coloc`, `joint_gwas`, `separate_gwas`); Output of the `colocboost_analysis_pipeline` function. If the mode is not run, the corresponding element will be `NULL`.
 
-```{r, colocboost-analysis}
-
-#### Comment out to avoid running this code here, as we do not have real data files in this example ####
-#### Please check the example code below ####
-
-# # load in individual-level and sumstat data
-# region_data_combined <- load_multitask_regional_data(
-#     region = region,
-#     genotype_list = genotype_list,
-#     phenotype_list = phenotype_list,
-#     covariate_list = covariate_list,
-#     conditions_list_individual = conditions_list_individual,
-#     match_geno_pheno = match_geno_pheno,
-#     association_window = association_window,
-#     region_name_col = region_name_col,
-#     extract_region_name = extract_region_name,
-#     keep_indel = keep_indel,
-#     keep_samples = keep_samples,
-#     maf_cutoff = maf_cutoff,
-#     mac_cutoff = mac_cutoff,
-#     xvar_cutoff = xvar_cutoff,
-#     imiss_cutoff = imiss_cutoff,
-#     sumstat_path_list = sumstat_path_list,
-#     column_file_path_list = column_file_path_list,
-#     LD_meta_file_path_list = LD_meta_file_path_list,
-#     conditions_list_sumstat = conditions_list_sumstat,
-#     match_LD_sumstat = match_LD_sumstat,
-#     n_samples = n_samples,
-#     n_cases = n_cases,
-#     n_controls = n_controls
-# )
-
-# maf_cutoff = 0.01
-# pip_cutoff_to_skip_ind = rep(0, length(phenotype_list))
-# pip_cutoff_to_skip_sumstat = rep(0, length(sumstat_path_list))
-# qc_method = "rss_qc" 
-
-# # run colocboost analysis
-# colocboost_results <- colocboost_analysis_pipeline(
-#     region_data_combined,
-#     maf_cutoff = maf_cutoff, 
-#     pip_cutoff_to_skip_ind = pip_cutoff_to_skip_ind,
-#     pip_cutoff_to_skip_sumstat = pip_cutoff_to_skip_sumstat,
-#     qc_method = qc_method,
-#     xqtl_coloc = TRUE,
-#     joint_gwas = TRUE,
-#     separate_gwas = TRUE
-# )
-
-# # visualize results for xQTL-only mode
-# colocboost_plot(colocboost_results$xqtl_coloc)
-
-# # visualize results for joint GWAS mode
-# colocboost_plot(colocboost_results$joint_gwas)
-
-# # visualize results for separate GWAS mode
-# for (i in 1:length(colocboost_results$separate_gwas)) {
-#     colocboost_plot(colocboost_results$separate_gwas[[i]])
-# }
-
-
-#### End of comment out
-```
\ No newline at end of file
+```{r, colocboost-analysis, eval = FALSE}
+
+# load in individual-level and sumstat data
+region_data_combined <- load_multitask_regional_data(
+    region = region,
+    genotype_list = genotype_list,
+    phenotype_list = phenotype_list,
+    covariate_list = covariate_list,
+    conditions_list_individual = conditions_list_individual,
+    match_geno_pheno = match_geno_pheno,
+    association_window = association_window,
+    region_name_col = region_name_col,
+    extract_region_name = extract_region_name,
+    keep_indel = keep_indel,
+    keep_samples = keep_samples,
+    maf_cutoff = maf_cutoff,
+    mac_cutoff = mac_cutoff,
+    xvar_cutoff = xvar_cutoff,
+    imiss_cutoff = imiss_cutoff,
+    sumstat_path_list = sumstat_path_list,
+    column_file_path_list = column_file_path_list,
+    LD_meta_file_path_list = LD_meta_file_path_list,
+    conditions_list_sumstat = conditions_list_sumstat,
+    match_LD_sumstat = match_LD_sumstat,
+    n_samples = n_samples,
+    n_cases = n_cases,
+    n_controls = n_controls
+)
+
+maf_cutoff = 0.01
+pip_cutoff_to_skip_ind = rep(0, length(phenotype_list))
+pip_cutoff_to_skip_sumstat = rep(0, length(sumstat_path_list))
+qc_method = "rss_qc" 
+
+# run colocboost analysis
+colocboost_results <- colocboost_analysis_pipeline(
+    region_data_combined,
+    maf_cutoff = maf_cutoff, 
+    pip_cutoff_to_skip_ind = pip_cutoff_to_skip_ind,
+    pip_cutoff_to_skip_sumstat = pip_cutoff_to_skip_sumstat,
+    qc_method = qc_method,
+    xqtl_coloc = TRUE,
+    joint_gwas = TRUE,
+    separate_gwas = TRUE
+)
+
+# visualize results for xQTL-only mode
+colocboost_plot(colocboost_results$xqtl_coloc)
+
+# visualize results for joint GWAS mode
+colocboost_plot(colocboost_results$joint_gwas)
+
+# visualize results for separate GWAS mode
+for (i in 1:length(colocboost_results$separate_gwas)) {
+    colocboost_plot(colocboost_results$separate_gwas[[i]])
+}
+```

From 281e0763bfb11b44e1c0f54c7bd2d716a5303999 Mon Sep 17 00:00:00 2001
From: Daniel Nachun <daniel.nachun@gmail.com>
Date: Tue, 4 Nov 2025 11:27:23 -0800
Subject: [PATCH 13/13] actually skip vignettes

---
 vignettes/ColocBoost_Wrapper_Pipeline.Rmd | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/vignettes/ColocBoost_Wrapper_Pipeline.Rmd b/vignettes/ColocBoost_Wrapper_Pipeline.Rmd
index 31ebaa9..c23bc2e 100644
--- a/vignettes/ColocBoost_Wrapper_Pipeline.Rmd
+++ b/vignettes/ColocBoost_Wrapper_Pipeline.Rmd
@@ -84,7 +84,7 @@ xvar_cutoff = 0
 imiss_cutoff = 0.9
 
 # More advanced parameters see pecotmr::load_multitask_regional_data()
-
+\dontrun{
 region_data_individual <- load_multitask_regional_data(
     region = region,
     genotype_list = genotype_list,
@@ -102,6 +102,7 @@ region_data_individual <- load_multitask_regional_data(
     xvar_cutoff = xvar_cutoff,
     imiss_cutoff = imiss_cutoff
 )
+}
 ```
 
 
@@ -142,7 +143,7 @@ n_controls = c(0, 40000)
 
 
 # More advanced parameters see pecotmr::load_multitask_regional_data()
-
+\dontrun{
 region_data_sumstat <- load_multitask_regional_data(
     sumstat_path_list = sumstat_path_list,
     column_file_path_list = column_file_path_list,
@@ -154,6 +155,7 @@ region_data_sumstat <- load_multitask_regional_data(
     n_cases = n_cases,
     n_controls = n_controls
 )
+}
 ```
 
 
@@ -224,6 +226,7 @@ outputs:
 ```{r, colocboost-analysis, eval = FALSE}
 
 # load in individual-level and sumstat data
+\dontrun{
 region_data_combined <- load_multitask_regional_data(
     region = region,
     genotype_list = genotype_list,
@@ -249,6 +252,7 @@ region_data_combined <- load_multitask_regional_data(
     n_cases = n_cases,
     n_controls = n_controls
 )
+}
 
 maf_cutoff = 0.01
 pip_cutoff_to_skip_ind = rep(0, length(phenotype_list))