diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ee5ae5a..b35dcd3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,39 +46,6 @@ jobs: - name: Check unit test code coverage run: pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml codecov - ci_osx-64: - name: osx-64 CI - runs-on: macos-13 - strategy: - fail-fast: false - matrix: - environment: ["r43", "r44"] - - steps: - - name: Checkout pull request branch - uses: actions/checkout@v5 - with: - fetch-depth: 0 - - - name: Create TOML from recipe - run: | - .github/workflows/create_toml_from_yaml.sh ${GITHUB_WORKSPACE} - mkdir /tmp/pixi - mv ${GITHUB_WORKSPACE}/pixi.toml /tmp/pixi - - - name: Setup pixi - uses: prefix-dev/setup-pixi@v0.9.2 - with: - manifest-path: /tmp/pixi/pixi.toml - - - name: Run unit tests - run: pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml devtools_test - - - name: Run R CMD CHECK - run: | - pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml build - pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml rcmdcheck - ci_osx-arm64: name: osx-arm64 CI runs-on: macos-14 diff --git a/R/colocboost.R b/R/colocboost.R index 81e8490..6cb1721 100644 --- a/R/colocboost.R +++ b/R/colocboost.R @@ -247,7 +247,17 @@ colocboost <- function(X = NULL, Y = NULL, # individual data return(xx) }) } - + + # Remove duplicates and report: if duplicate columns of X + X <- lapply(seq_along(X), function(i) { + df <- X[[i]] + if (anyDuplicated(colnames(df))) { + message(paste("Removed duplicate columns from X matrix ", i)) + df[, !duplicated(colnames(df)), drop = FALSE] + } else { + df + } + }) keep_variable_individual <- lapply(X, colnames) if (!is.list(X) & !is.list(Y)) { warning("Error: Input X and Y must be the list containing genotype matrics and all phenotype vectors!") @@ -333,11 +343,19 @@ colocboost <- function(X = NULL, Y = NULL, # individual data warning("Error: effect_est and effect_se should be the same dimension! Please check!") return(NULL) } - variables <- rownames(effect_est) effect_est <- as.matrix(effect_est) effect_se <- as.matrix(effect_se) + variables <- rownames(effect_est) if (is.null(variables)) { variables <- paste0("variant_", 1:nrow(effect_est)) + } else { + # add - if duplciates occurs, only keep one + if (anyDuplicated(variables)) { + keep_idx <- !duplicated(variables) + variables <- variables[keep_idx] + effect_est <- effect_est[keep_idx, , drop = FALSE] + effect_se <- effect_se[keep_idx, , drop = FALSE] + } } sumstat <- list() for (sum_iy in 1:ncol(effect_est)) { @@ -388,6 +406,17 @@ colocboost <- function(X = NULL, Y = NULL, # individual data }) } } + + # Remove duplicates and report: if duplicate variant in summary statistics + sumstat <- lapply(seq_along(sumstat), function(i) { + xx <- sumstat[[i]] + if (anyDuplicated(xx$variant)) { + message(paste("Removed duplicate variants from sumstat", i)) + xx[!duplicated(xx$variant), , drop = FALSE] + } else { + xx + } + }) keep_variable_sumstat <- lapply(sumstat, function(xx) { xx$variant }) diff --git a/R/colocboost_assemble.R b/R/colocboost_assemble.R index aba5405..94cbbe5 100644 --- a/R/colocboost_assemble.R +++ b/R/colocboost_assemble.R @@ -58,7 +58,7 @@ colocboost_assemble <- function(cb_obj, ) # - save model and all coloc and single information for diagnostic if (output_level != 1) { - tmp <- get_full_output(cb_obj = cb_obj, past_out = NULL, variables = NULL) + tmp <- get_full_output(cb_obj = cb_obj, past_out = NULL, variables = data_info$variables) if (output_level == 2) { cb_output <- c(cb_output, list("ucos_details" = tmp$ucos_detials)) cb_output <- cb_output[c("cos_summary", "vcp", "cos_details", "data_info", "model_info", "ucos_details")] @@ -79,6 +79,7 @@ colocboost_assemble <- function(cb_obj, } else if (cb_obj$cb_model_para$model_used == "one_causal"){ # fixme later check_null_max <- check_null_max * check_null + check_null_max_ucos <- check_null_max_ucos * check_null } cb_obj <- get_max_profile(cb_obj, check_null_max = check_null_max, check_null_max_ucos = check_null_max_ucos, @@ -209,6 +210,9 @@ colocboost_assemble <- function(cb_obj, # - colocalization results cb_obj$cb_model_para$weight_fudge_factor <- weight_fudge_factor cb_obj$cb_model_para$coverage <- coverage + cb_obj$cb_model_para$min_abs_corr <- min_abs_corr + cb_obj$cb_model_para$median_abs_corr <- median_abs_corr + cb_obj$cb_model_para$n_purity <- n_purity cos_results <- get_cos_details(cb_obj, coloc_out = past_out$cos$cos, data_info = data_info) cb_output <- list( "vcp" = cos_results$vcp, diff --git a/R/colocboost_check_update_jk.R b/R/colocboost_check_update_jk.R index 623faa8..d449b03 100644 --- a/R/colocboost_check_update_jk.R +++ b/R/colocboost_check_update_jk.R @@ -66,15 +66,15 @@ boost_check_update_jk_nofocal <- function(cb_model, cb_model_para, cb_data) { # -- define jk and jk_each cor_vals_each <- do.call(cbind, lapply(model_update, function(cc) cc$correlation)) - abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`) + # abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`) + abs_cor_vals_each <- abs(cor_vals_each) * rep(adj_dep, each = nrow(cor_vals_each)) abs_cor_vals <- rowSums(abs_cor_vals_each) - jk <- which(abs_cor_vals == max(abs_cor_vals)) - jk <- ifelse(length(jk) == 1, jk, sample(jk, 1)) - jk_each <- apply(abs_cor_vals_each, 2, function(temp) { - jk_temp <- which(temp == max(temp)) - return(ifelse(length(jk_temp) == 1, jk_temp, sample(jk_temp, 1))) - }) + # jk <- which(abs_cor_vals == max(abs_cor_vals)) + # jk <- ifelse(length(jk) == 1, jk, sample(jk, 1)) + # jk_each <- apply(abs_cor_vals_each, 2, which.max) + jk <- which.max(abs_cor_vals) + jk_each <- sapply(1:ncol(abs_cor_vals_each), function(j) which.max(abs_cor_vals_each[, j])) update_jk[c(1, pos.update + 1)] <- c(jk, jk_each) @@ -142,7 +142,7 @@ boost_check_update_jk_nofocal <- function(cb_model, cb_model_para, cb_data) { update_status[pos.update[true_update]] <- 1 # - re-define jk_star abs_cor_vals_redefine <- rowSums(abs_cor_vals_each[, true_update, drop = FALSE]) - jk_redefine <- which(abs_cor_vals_redefine == max(abs_cor_vals_redefine)) + jk_redefine <- which.max(abs_cor_vals_redefine == max(abs_cor_vals_redefine)) jk_redefine <- ifelse(length(jk_redefine) == 1, jk_redefine, sample(jk_redefine, 1)) real_update_jk[pos.update[true_update]] <- jk_redefine } else { @@ -351,11 +351,10 @@ boost_check_update_jk_focal <- function(cb_model, cb_model_para, cb_data, # -- define jk and jk_each cor_vals_each <- do.call(cbind, lapply(model_update, function(cc) as.vector(cc$correlation))) - abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`) - jk_each <- apply(abs_cor_vals_each, 2, function(temp) { - jk_temp <- which(temp == max(temp)) - return(ifelse(length(jk_temp) == 1, jk_temp, sample(jk_temp, 1))) - }) + # abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`) + abs_cor_vals_each <- abs(cor_vals_each) * rep(adj_dep, each = nrow(cor_vals_each)) + # jk_each <- apply(abs_cor_vals_each, 2, which.max) + jk_each <- sapply(1:ncol(abs_cor_vals_each), function(j) which.max(abs_cor_vals_each[, j])) pp_focal <- which(pos.update == focal_outcome_idx) jk_focal <- jk_each[pp_focal] @@ -503,8 +502,8 @@ get_LD_jk1_jk2 <- function(jk1, jk2, if (length(XtX) == 1){ LD_temp <- 0 } else { - jk1.remain <- which(remain_jk == jk1) - jk2.remain <- which(remain_jk == jk2) + jk1.remain <- match(jk1, remain_jk) + jk2.remain <- match(jk2, remain_jk) LD_temp <- XtX[jk1.remain, jk2.remain] } } @@ -545,29 +544,59 @@ check_pair_jkeach <- function(jk_each, cb_data, X_dict, jk_equiv_corr = 0.8, jk_equiv_loglik = 0.001) { + + + #' @importFrom stats cor + get_LD_jk_each <- function(jk_each, + X = NULL, XtX = NULL, N = NULL, + remain_jk = NULL) { + if (!is.null(X)) { + LD_temp <- suppressWarnings({ + get_cormat(X[, jk_each]) + }) + LD_temp[which(is.na(LD_temp))] <- 0 + # LD_temp <- LD_temp[1, 2] + } else if (!is.null(XtX)) { + if (length(XtX) == 1){ + LD_temp <- matrix(0, length(jk_each), length(jk_each)) + } else { + jk.remain <- match(jk_each, remain_jk) + LD_temp <- XtX[jk.remain, jk.remain] + LD_temp[which(is.na(LD_temp))] <- 0 + } + } + return(LD_temp) + } + + detect_func <- function(idx, LD_all, jk_i, jk_j, i, j){ + change_log_jk_i <- model_update[[idx]]$change_loglike[jk_i] + change_log_jk_j <- model_update[[idx]]$change_loglike[jk_j] + change_each <- abs(change_log_jk_i - change_log_jk_j) + LD_temp <- LD_all[[idx]][i, j] + return((change_each <= jk_equiv_loglik) & (abs(LD_temp) >= jk_equiv_corr)) + } + data_update <- cb_data$data[pos.update] + LD_all <- lapply(1:length(jk_each), function(idx){ + get_LD_jk_each(jk_each, + X = cb_data$data[[X_dict[idx]]]$X, + XtX = cb_data$data[[X_dict[idx]]]$XtX, + N = data_update[[idx]]$N, + remain_jk = setdiff(1:length(model_update[[idx]]$res), data_update[[idx]]$variable_miss) + ) + }) + # -- check if jk_i ~ jk_j change_each_pair <- matrix(FALSE, nrow = length(jk_each), ncol = length(jk_each)) for (i in 1:length(jk_each)) { - jk_i <- jk_each[i] - change_log_jk_i <- model_update[[i]]$change_loglike[jk_i] - for (j in 1:length(jk_each)) { + for (j in i:length(jk_each)) { if (j != i) { jk_j <- jk_each[j] - - if (!(jk_i %in% data_update[[i]]$variable_miss) & !(jk_j %in% data_update[[i]]$variable_miss)) { - change_log_jk_j <- model_update[[i]]$change_loglike[jk_j] - change_each <- abs(change_log_jk_i - change_log_jk_j) - LD_temp <- get_LD_jk1_jk2(jk_i, jk_j, - X = cb_data$data[[X_dict[i]]]$X, - XtX = cb_data$data[[X_dict[i]]]$XtX, - N = data_update[[i]]$N, - remain_jk = setdiff(1:length(model_update[[i]]$res), data_update[[i]]$variable_miss) - ) - change_each_pair[i, j] <- (change_each <= jk_equiv_loglik) & (abs(LD_temp) >= jk_equiv_corr) - } else { - change_each_pair[i, j] <- FALSE + change_each_pair[i, j] <- detect_func(idx = i, LD_all, jk_i, jk_j, i, j) + # if jk_i and jk_j are equivalent on dataset i, then we don't need to check dataset j + if ( !change_each_pair[i, j] ){ + change_each_pair[j, i] <- detect_func(idx = j, LD_all, jk_i, jk_j, i, j) } } else { change_each_pair[i, j] <- FALSE diff --git a/R/colocboost_inference.R b/R/colocboost_inference.R index dc53908..882fc7a 100644 --- a/R/colocboost_inference.R +++ b/R/colocboost_inference.R @@ -132,7 +132,8 @@ get_modularity <- function(Weight, B) { if (m == 0) return(0) - cate <- B %*% t(B) + # cate <- B %*% t(B) + cate <- tcrossprod(B) if (m_pos == 0 & m_neg == 0) return(0) @@ -190,12 +191,12 @@ get_n_cluster <- function(hc, Sigma, m = ncol(Sigma), min_cluster_corr = 0.8) { #' @noRd w_purity <- function(weights, X = NULL, Xcorr = NULL, N = NULL, n = 100, coverage = 0.95, min_abs_corr = 0.5, median_abs_corr = NULL, miss_idx = NULL) { - tmp <- apply(weights, 2, w_cs, coverage = coverage) - tmp_purity <- apply(tmp, 2, function(tt) { - pos <- which(tt == 1) + + tmp_purity <- apply(weights, 2, function(w) { + pos <- w_cs(w, coverage = coverage) # deal with missing snp here if (!is.null(Xcorr)) { - pos <- match(pos, setdiff(1:length(tmp), miss_idx)) + pos <- match(pos, setdiff(1:length(w), miss_idx)) } get_purity(pos, X = X, Xcorr = Xcorr, N = N, n = n) }) @@ -373,7 +374,12 @@ check_null_post <- function(cb_obj, } if (!weaker_effect) { check_cs_change <- cs_change - check_null_tmp <- sapply(1:cb_obj$cb_model_para$L, function(j) cb_obj$cb_model[[j]]$check_null_max) + if (cb_obj$cb_model_para$L == 1){ + check_null_max_tmp <- cb_obj$cb_model[[j]]$check_null_max_ucos + } else { + check_null_max_tmp <- cb_obj$cb_model[[j]]$check_null_max + } + check_null_tmp <- sapply(1:cb_obj$cb_model_para$L, function(j) check_null_max_tmp) } else { check_null_tmp <- rep(check_null, cb_obj$cb_model_para$L) } @@ -420,7 +426,7 @@ get_purity <- function(pos, X = NULL, Xcorr = NULL, N = NULL, n = 100) { corr[which(is.na(corr))] <- 0 value <- abs(get_upper_tri(corr)) } else { - if (sum(Xcorr) == 1){ + if (length(Xcorr) == 1){ value <- 0 } else { Xcorr <- Xcorr # if (!is.null(N)) Xcorr/(N-1) else Xcorr diff --git a/R/colocboost_init.R b/R/colocboost_init.R index 8685b01..edca937 100644 --- a/R/colocboost_init.R +++ b/R/colocboost_init.R @@ -163,7 +163,8 @@ colocboost_init_model <- function(cb_data, tmp <- list( "res" = NULL, "beta" = rep(0, P), - "weights_path" = c(), + # "weights_path" = c(), + "weights_path" = list(), "profile_loglike_each" = NULL, "obj_path" = 999999, "obj_single" = 999999, @@ -619,7 +620,7 @@ process_sumstat <- function(Z, N, Var_y, SeBhat, ld_matrices, variant_lists, dic Z_extend[pos_target] <- current_z[pos_z] # Calculate submatrix for each unique entry (not duplicates) - if (sum(current_ld_matrix) == 1){ + if (length(current_ld_matrix) == 1){ ld_submatrix <- current_ld_matrix } else { ld_submatrix <- NULL diff --git a/R/colocboost_one_causal.R b/R/colocboost_one_causal.R index 0ed96b5..09692d5 100644 --- a/R/colocboost_one_causal.R +++ b/R/colocboost_one_causal.R @@ -79,9 +79,8 @@ colocboost_one_iteration <- function(cb_model, cb_model_para, cb_data) { cb_model_para[rm_elements] <- NULL for (i in 1:length(cb_model)) { cb_model[[i]]$obj_path <- as.numeric(unlist(cb_model[[i]]$obj_path[-1])) - } - for (i in 1:length(cb_model)) { cb_model[[i]]$obj_single <- as.numeric(unlist(cb_model[[i]]$obj_single[-1])) + cb_model[[i]]$weights_path <- do.call(rbind, cb_model[[i]]$weights_path) } cb_obj <- list("cb_data" = cb_data, "cb_model" = cb_model, "cb_model_para" = cb_model_para) @@ -238,7 +237,7 @@ colocboost_diagLD <- function(cb_model, cb_model_para, cb_data) { cb_model_para, cb_data ) - weights <- rbind(weights, cb_model_tmp[[iy]]$weights_path) + weights <- rbind(weights, unlist(cb_model_tmp[[iy]]$weights_path)) } ###### overlap weights overlap_pair <- check_pair_overlap(weights, coverage = 0.95) @@ -307,11 +306,10 @@ colocboost_diagLD <- function(cb_model, cb_model_para, cb_data) { cb_model_para[rm_elements] <- NULL for (i in 1:length(cb_model)) { cb_model[[i]]$obj_path <- as.numeric(unlist(cb_model[[i]]$obj_path[-1])) - } - for (i in 1:length(cb_model)) { cb_model[[i]]$obj_single <- as.numeric(unlist(cb_model[[i]]$obj_single[-1])) + cb_model[[i]]$weights_path <- do.call(rbind, cb_model[[i]]$weights_path) } - + cb_obj <- list("cb_data" = cb_data, "cb_model" = cb_model, "cb_model_para" = cb_model_para) class(cb_obj) <- "colocboost" return(cb_obj) diff --git a/R/colocboost_plot.R b/R/colocboost_plot.R index 90141d6..1f0f7c1 100644 --- a/R/colocboost_plot.R +++ b/R/colocboost_plot.R @@ -14,8 +14,9 @@ #' @param plot_focal_cos_outcome_only Logical, if TRUE only plots colocalization including at least on colocalized outcome with focal outcome, default is FALSE. #' @param points_color Background color for non-colocalized variables, default is "grey80". #' @param cos_color Optional custom colors for CoS. -#' @param add_vertical Logical, if TRUE adds vertical lines at specified positions, default is FALSE -#' @param add_vertical_idx Optional indices for vertical lines. +#' @param add_highlight Logical, if TRUE adds vertical lines at specified positions, default is FALSE +#' @param add_highlight_style Optional style of add_highlight variables, default is "vertical_lines", other choice is "star" - red star. +#' @param add_highlight_idx Optional indices for add_highlight variables. #' @param outcome_names Optional vector of outcomes names for the subtitle of each figure. \code{outcome_names=NULL} for the outcome name shown in \code{data_info}. #' @param plot_cols Number of columns in the plot grid, default is 2. If you have many colocalization. please consider increasing this. #' @param variant_coord Logical, if TRUE uses variant coordinates on x-axis, default is FALSE. This is required the variable names including position information. @@ -78,8 +79,9 @@ colocboost_plot <- function(cb_output, y = "log10p", plot_focal_cos_outcome_only = FALSE, points_color = "grey80", cos_color = NULL, - add_vertical = FALSE, - add_vertical_idx = NULL, + add_highlight = FALSE, + add_highlight_idx = NULL, + add_highlight_style = "vertical_lines", outcome_names = NULL, plot_cols = 2, variant_coord = FALSE, @@ -102,33 +104,34 @@ colocboost_plot <- function(cb_output, y = "log10p", if (!inherits(cb_output, "colocboost")) { stop("Input of colocboost_plot must be a 'colocboost' object!") } - + # get cb_plot_input data from colocboost results cb_plot_input <- get_input_plot(cb_output, - plot_cos_idx = plot_cos_idx, - variant_coord = variant_coord, - outcome_names = outcome_names, - plot_focal_only = plot_focal_only, - plot_focal_cos_outcome_only = plot_focal_cos_outcome_only, - show_cos_to_uncoloc = show_cos_to_uncoloc, - show_cos_to_uncoloc_idx = show_cos_to_uncoloc_idx, - show_cos_to_uncoloc_outcome = show_cos_to_uncoloc_outcome, - plot_ucos = plot_ucos, plot_ucos_idx = plot_ucos_idx + plot_cos_idx = plot_cos_idx, + variant_coord = variant_coord, + outcome_names = outcome_names, + plot_focal_only = plot_focal_only, + plot_focal_cos_outcome_only = plot_focal_cos_outcome_only, + show_cos_to_uncoloc = show_cos_to_uncoloc, + show_cos_to_uncoloc_idx = show_cos_to_uncoloc_idx, + show_cos_to_uncoloc_outcome = show_cos_to_uncoloc_outcome, + plot_ucos = plot_ucos, plot_ucos_idx = plot_ucos_idx ) # get initial set up of plot cb_plot_init <- plot_initial(cb_plot_input, - y = y, points_color = points_color, cos_color = cos_color, - ylim_each = ylim_each, title_specific = title_specific, - outcome_legend_pos = outcome_legend_pos, outcome_legend_size = outcome_legend_size, - cos_legend_pos = cos_legend_pos, plot_ucos = plot_ucos, - show_variable = show_variable, lab_style = lab_style, axis_style = axis_style, - title_style = title_style, ... + y = y, points_color = points_color, cos_color = cos_color, + ylim_each = ylim_each, title_specific = title_specific, + outcome_legend_pos = outcome_legend_pos, outcome_legend_size = outcome_legend_size, + cos_legend_pos = cos_legend_pos, plot_ucos = plot_ucos, + show_variable = show_variable, lab_style = lab_style, axis_style = axis_style, + title_style = title_style, ... ) - + colocboost_plot_basic <- function(cb_plot_input, cb_plot_init, outcome_idx = NULL, plot_all_outcome = FALSE, grange = NULL, plot_cols = 2, - add_vertical = FALSE, add_vertical_idx = NULL, + add_highlight = FALSE, add_highlight_idx = NULL, + add_highlight_style = "vertical_lines", show_top_variables = TRUE, ...) { args <- list(...) @@ -147,12 +150,12 @@ colocboost_plot <- function(cb_output, y = "log10p", args$font.lab <- cb_plot_init$lab_face # - change position cb_plot_init$outcome_legend_pos <- switch(cb_plot_init$outcome_legend_pos, - "right" = 4, - "left" = 2, - "top" = 3, - "bottom" = 1 + "right" = 4, + "left" = 2, + "top" = 3, + "bottom" = 1 ) - + # - begin plotting coloc_cos <- cb_plot_input$cos outcomes <- cb_plot_input$outcomes @@ -229,12 +232,7 @@ colocboost_plot <- function(cb_output, y = "log10p", cex = cb_plot_init$outcome_legend_size, font = 1 ) } - if (add_vertical) { - for (iii in 1:length(add_vertical_idx)) { - abline(v = add_vertical_idx[iii], col = "#E31A1C", lwd = 1.5, lty = "dashed") - } - } - + # mark variables in CoS to colocalized outcomes if (!is.null(coloc_cos)) { n.coloc <- length(coloc_cos) @@ -242,7 +240,7 @@ colocboost_plot <- function(cb_output, y = "log10p", if (length(y)==1){coloc_index = lapply(coloc_index, function(i) 1 )} legend_text <- list(col = vector()) legend_text$col <- head(cb_plot_init$col, n.coloc) - + # check which coloc set for this outcome p.coloc <- sapply(coloc_index, function(idx) sum(idx == iy) != 0) p.coloc <- which(p.coloc) @@ -261,7 +259,7 @@ colocboost_plot <- function(cb_output, y = "log10p", points(x_hits, y_hits, pch = 21, bg = legend_text$col[i.cs], col = "#E31A1C", cex = 3, lwd = 3) } } - + # mark variables in CoS to uncolocalized outcomes uncoloc <- cb_plot_input$uncoloc if (!is.null(uncoloc)) { @@ -276,8 +274,8 @@ colocboost_plot <- function(cb_output, y = "log10p", x0 <- intersect(args$x, cs) y1 <- args$y[match(x0, args$x)] points(x0, y1, - pch = 4, col = adjustcolor(legend_text$col[uncoloc$cos_idx_to_uncoloc[i.uncoloc]], alpha.f = 0.4), - cex = 1.5, lwd = 1.5 + pch = 4, col = adjustcolor(legend_text$col[uncoloc$cos_idx_to_uncoloc[i.uncoloc]], alpha.f = 0.4), + cex = 1.5, lwd = 1.5 ) shape_col <- c(shape_col, adjustcolor(legend_text$col[uncoloc$cos_idx_to_uncoloc[i.uncoloc]], alpha.f = 1)) texts_col <- c(texts_col, adjustcolor(legend_text$col[uncoloc$cos_idx_to_uncoloc[i.uncoloc]], alpha.f = 0.8)) @@ -294,35 +292,51 @@ colocboost_plot <- function(cb_output, y = "log10p", if (length(texts) == 0) { next } - + # Get current plot area coordinates usr <- par("usr") x_pos <- usr[1] + cb_plot_init$cos_legend_pos[1] * (usr[2] - usr[1]) # 5% from left edge y_pos <- usr[3] + cb_plot_init$cos_legend_pos[2] * (usr[4] - usr[3]) # 50% from bottom edge legend(x = x_pos, y = y_pos, texts, - bty = "n", col = shape_col, text.col = texts_col, - cex = 1.5, pt.cex = 1.5, pch = 4, x.intersp = cos_legend_pos[3], y.intersp = cos_legend_pos[4] + bty = "n", col = shape_col, text.col = texts_col, + cex = 1.5, pt.cex = 1.5, pch = 4, x.intersp = cos_legend_pos[3], y.intersp = cos_legend_pos[4] ) } } + if (add_highlight) { + for (iii in 1:length(add_highlight_idx)) { + if ( !(add_highlight_style %in% c("vertical_lines", "star")) ){ + message("add_highlight_style is not 'vertical_lines' and 'star', defaulting to 'vertical_lines'") + add_highlight_style = "vertical_lines" + } + if (add_highlight_style == "vertical_lines"){ + abline(v = add_highlight_idx[iii], col = "#E31A1C", lwd = 1.5, lty = "dashed") + } else if (add_highlight_style == "star") { + points(add_highlight_idx[iii], args$y[match(add_highlight_idx[iii],args$x)], + pch = 8, col = "#E31A1C", lwd = 2, lty = "dashed", cex = 2.5) + } + + } + } } if (!is.null(cb_plot_init$title)) { mtext(cb_plot_init$title, - side = 3, line = 0, outer = TRUE, - cex = cb_plot_init$title_size, font = cb_plot_init$title_face + side = 3, line = 0, outer = TRUE, + cex = cb_plot_init$title_size, font = cb_plot_init$title_face ) } return(invisible()) } - + colocboost_plot_basic(cb_plot_input, cb_plot_init, - grange = grange, - outcome_idx = outcome_idx, - plot_all_outcome = plot_all_outcome, - plot_cols = plot_cols, - add_vertical = add_vertical, add_vertical_idx = add_vertical_idx, - show_top_variables = show_top_variables, - ... + grange = grange, + outcome_idx = outcome_idx, + plot_all_outcome = plot_all_outcome, + plot_cols = plot_cols, + add_highlight = add_highlight, add_highlight_idx = add_highlight_idx, + add_highlight_style = add_highlight_style, + show_top_variables = show_top_variables, + ... ) } @@ -339,7 +353,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, show_cos_to_uncoloc_outcome = NULL, plot_ucos = FALSE, plot_ucos_idx = NULL) { - + # check ucos exists if (plot_ucos && !"ucos_details" %in% names(cb_output)) { warning( @@ -354,7 +368,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, cb_output$data_info$outcome_info$outcome_names <- outcome_names names(cb_output$data_info$z) <- outcome_names } - + # extract results from colocboost analysis_outcome <- cb_output$data_info$outcome_info$outcome_names focal_outcome_idx <- which(cb_output$data_info$outcome_info$is_focal) @@ -371,7 +385,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, Z <- cb_output$data_info$z coef <- cb_output$data_info$coef vcp <- list(as.numeric(cb_output$vcp)) - + # if finemapping if (cb_output$data_info$n_outcomes == 1) { cb_output$cos_details$cos$cos_variables <- cb_output$ucos_details$ucos$ucos_variables @@ -406,7 +420,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, } }) ncos <- length(cb_output$cos_details$cos$cos_index) - + select_cs <- 1:ncos if (!is.null(plot_cos_idx)) { if (length(setdiff(plot_cos_idx, c(1:ncos))) != 0) { @@ -477,7 +491,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, "coloc_index" = coloc_index, "select_cos" = select_cs ) - + # check plot uncolocalizaed confidence sets from ucos_details if (plot_ucos & cb_output$data_info$n_outcomes > 1){ ucos_details <- cb_output$ucos_details @@ -528,7 +542,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, plot_input$ucos_cos_int_weights <- combined } } - + # check if plot cos to uncolocalized outcome # use the updated coloc_cos and related components from plot_input if available coloc_cos <- plot_input$cos @@ -613,7 +627,7 @@ get_input_plot <- function(cb_output, plot_cos_idx = NULL, ) plot_input$uncoloc <- uncoloc } - + class(plot_input) <- "colocboost" return(plot_input) } @@ -634,11 +648,11 @@ plot_initial <- function(cb_plot_input, y = "log10p", ...) { args <- list(...) if (!exists("pch", args)) args$pch <- 16 - + # - set background point color and cos color pools args$bg <- points_color if (is.null(cos_color)) { - + # cos_color <- c( # "#377EB8", "#E69F00", "#33A02C", "#984EA3", "#F46D43", # "#A65628", "#1F4E79", "#B2182B", "#D73027", "#F781BF", @@ -688,12 +702,12 @@ plot_initial <- function(cb_plot_input, y = "log10p", "#D5D8DC", "#BDC3C7", "#95A5A6", "#7F8C8D", "#17202A", "#808080", "#A9A9A9", "#C0C0C0", "#696969", "#778899" ) - - + + # cos_color <- c("#1F70A9", "#33A02C", "#CAB2D6", "#EA7827") } args$col <- cos_color[cb_plot_input$select_cos] - + # - set data and x-lab and y-lab if (y == "log10p") { plot_data <- lapply(cb_plot_input$Zscores, function(z) { @@ -729,12 +743,12 @@ plot_initial <- function(cb_plot_input, y = "log10p", if (!exists("ylab", args)) args$ylab <- ylab args$lab_size <- as.numeric(lab_style[1]) args$lab_face <- lab_style[2] - + # - set title format args$title <- title_specific args$title_size <- as.numeric(title_style[1]) args$title_face <- title_style[2] - + # - set x-axis and y-axis args$x <- cb_plot_input$x$pos args$y <- plot_data @@ -744,7 +758,7 @@ plot_initial <- function(cb_plot_input, y = "log10p", } args$axis_size <- as.numeric(axis_style[1]) args$axis_face <- axis_style[2] - + # - set ylim for each subfigure if (exists("ylim", args)) { ymax <- rep(args$ylim[2], length(args$y)) @@ -776,7 +790,7 @@ plot_initial <- function(cb_plot_input, y = "log10p", } args$ymax <- ymax args$ymin <- ymin - + # - set legend text position and format args$outcome_legend_pos <- outcome_legend_pos args$outcome_legend_size <- outcome_legend_size @@ -788,7 +802,7 @@ plot_initial <- function(cb_plot_input, y = "log10p", args$outcome_legend_angle <- 0 } args$cos_legend_pos <- cos_legend_pos - + return(args) } diff --git a/R/colocboost_update.R b/R/colocboost_update.R index 6100828..734bcb1 100644 --- a/R/colocboost_update.R +++ b/R/colocboost_update.R @@ -74,7 +74,8 @@ colocboost_update <- function(cb_model, cb_model_para, cb_data) { # weights <- adj_dep * ld_feature / scaling_factor * exp_abs_cor / sum(exp_abs_cor) weights <- adj_dep * obj_ld * exp_abs_cor / sum(exp_abs_cor) weights <- weights / sum(weights) - cb_model[[i]]$weights_path <- rbind(cb_model[[i]]$weights_path, as.vector(weights)) + # cb_model[[i]]$weights_path <- rbind(cb_model[[i]]$weights_path, as.vector(weights)) + cb_model[[i]]$weights_path <- c(cb_model[[i]]$weights_path, list(as.vector(weights))) ########## END: MAIN CALCULATION ################### diff --git a/R/colocboost_utils.R b/R/colocboost_utils.R index c4da011..9bfa3fd 100644 --- a/R/colocboost_utils.R +++ b/R/colocboost_utils.R @@ -426,15 +426,20 @@ get_max_profile <- function(cb_obj, check_null_max = 0.025, } +# ### Function for check cs for each weight +# w_cs <- function(weights, coverage = 0.95) { +# indices <- unlist(get_in_cos(weights, coverage = coverage)) +# result <- rep(0, length(weights)) +# result[indices] <- 1 +# return(result) +# } + ### Function for check cs for each weight w_cs <- function(weights, coverage = 0.95) { indices <- unlist(get_in_cos(weights, coverage = coverage)) - result <- rep(0, length(weights)) - result[indices] <- 1 - return(result) + return(indices) } - #' Pure R implementation (fallback) #' @noRd get_merge_ordered_with_indices <- function(vector_list) { @@ -505,12 +510,14 @@ get_merge_ordered_with_indices <- function(vector_list) { # Step 4: Topological sort using Kahn's algorithm # Start with nodes that have no incoming edges queue <- all_elements[sapply(all_elements, function(elem) in_degree[[elem]] == 0)] - result <- character() + result <- list() + result_idx <- 1 while (length(queue) > 0) { # Take the first element from the queue current <- queue[1] queue <- queue[-1] - result <- c(result, current) + result[[result_idx]] <- current # List assignment is fast + result_idx <- result_idx + 1 # Process all neighbors (elements that must come after current) neighbors <- graph[[current]] for (next_elem in neighbors) { @@ -520,6 +527,7 @@ get_merge_ordered_with_indices <- function(vector_list) { } } } + result <- unlist(result) # Step 5: Check for cycles and use fallback if needed if (length(result) != n_elements) { # Different variable orders detected - use priority-based fallback @@ -657,8 +665,6 @@ get_cos_details <- function(cb_obj, coloc_out, data_info = NULL) { }) int_weight <- lapply(cos_weights, get_integrated_weight, weight_fudge_factor = cb_obj$cb_model_para$weight_fudge_factor) names(int_weight) <- names(cos_weights) <- colocset_names - vcp <- as.vector(1 - apply(1 - do.call(cbind, int_weight), 1, prod)) - names(vcp) <- data_info$variables # - resummary results cos_re_idx <- lapply(int_weight, function(w) { @@ -668,7 +674,55 @@ get_cos_details <- function(cb_obj, coloc_out, data_info = NULL) { data_info$variables[idx] }) coloc_csets <- list("cos_index" = cos_re_idx, "cos_variables" = cos_re_var) - + + # - recalculate purity + purity <- vector(mode = "list", length = length(cos_re_idx)) + for (ee in 1:length(cos_re_idx)) { + coloc_t <- coloc_outcome_index[[ee]] + p_tmp <- c() + for (i3 in coloc_t) { + pos <- cos_re_idx[[ee]] + X_dict <- cb_obj$cb_data$dict[i3] + if (!is.null(cb_obj$cb_data$data[[X_dict]]$XtX)) { + pos <- match(pos, setdiff(1:cb_obj$cb_model_para$P, cb_obj$cb_data$data[[i3]]$variable_miss)) + } + tmp <- matrix(get_purity(pos, + X = cb_obj$cb_data$data[[X_dict]]$X, + Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX, + N = cb_obj$cb_data$data[[i3]]$N, n = cb_obj$cb_model_para$n_purity + ), 1, 3) + p_tmp <- rbind(p_tmp, tmp) + } + purity[[ee]] <- matrix(apply(p_tmp, 2, max), 1, 3) + } + purity_all <- do.call(rbind, purity) + purity_all <- as.data.frame(purity_all) + colnames(purity_all) <- c("min_abs_corr", "mean_abs_corr", "median_abs_corr") + coloc_out$purity <- purity_all + if (is.null(cb_obj$cb_model_para$median_abs_corr)) { + is_pure <- which(purity_all[, 1] >= cb_obj$cb_model_para$min_abs_corr) + } else { + is_pure <- which(purity_all[, 1] >= cb_obj$cb_model_para$min_abs_corr | purity_all[, 3] >= cb_obj$cb_model_para$median_abs_corr) + } + if (length(is_pure)==0){ + coloc_results <- NULL + vcp <- NULL + return(list("cos_results" = coloc_results, "vcp" = vcp)) + } else if (length(is_pure)!=length(cos_re_idx)){ + int_weight = int_weight[is_pure] + coloc_csets <- lapply(coloc_csets, function(cs) cs[is_pure]) + coloc_outcomes <- lapply(coloc_outcomes, function(cs) cs[is_pure]) + normalization_evidence <- normalization_evidence[is_pure] + npc <- npc[is_pure] + cos_min_npc_outcome <- cos_min_npc_outcome[is_pure] + cos_weights <- cos_weights[is_pure] + coloc_out$purity <- purity_all[is_pure,,drop = FALSE] + colocset_names <- colocset_names[is_pure] + } + vcp <- as.vector(1 - apply(1 - do.call(cbind, int_weight), 1, prod)) + names(vcp) <- data_info$variables + + # - hits variables in each csets coloc_hits <- coloc_hits_variablenames <- coloc_hits_names <- c() for (i in 1:length(int_weight)) { @@ -839,7 +893,7 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output cb_model_para <- cb_obj$cb_model_para ## - obtain the order of variables based on the variables names if it has position information - if (!is.null(variables)) { + if (is.null(variables)) { ordered <- 1:length(cb_obj$cb_model_para$variables) } else { ordered <- match(variables, cb_obj$cb_model_para$variables) @@ -933,7 +987,7 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output # - hits variables in each csets cs_hits <- sapply(1:length(specific_w), function(jj) { inw <- specific_w[[jj]] - sample(which(inw == max(inw)), 1) + which(inw == max(inw))[1] }) cs_hits_variablenames <- sapply(cs_hits, function(ch) variables[ch]) specific_cs_hits <- data.frame("top_index" = cs_hits, "top_variables" = cs_hits_variablenames) # save diff --git a/R/colocboost_workhorse.R b/R/colocboost_workhorse.R index 7583eee..4462680 100644 --- a/R/colocboost_workhorse.R +++ b/R/colocboost_workhorse.R @@ -103,6 +103,9 @@ colocboost_workhorse <- function(cb_data, } if (sum(cb_model_para$update_y == 1) == 0) { + for (i in 1:length(cb_model)){ + cb_model[[i]]$weights_path <- matrix(0, ncol = cb_model_para$P) + } cb_obj <- list("cb_data" = cb_data, "cb_model" = cb_model, "cb_model_para" = cb_model_para) class(cb_obj) <- "colocboost" return(cb_obj) @@ -236,9 +239,8 @@ colocboost_workhorse <- function(cb_data, cb_model_para$num_updates <- m for (i in 1:length(cb_model)) { cb_model[[i]]$obj_path <- as.numeric(unlist(cb_model[[i]]$obj_path[-1])) - } - for (i in 1:length(cb_model)) { cb_model[[i]]$obj_single <- as.numeric(unlist(cb_model[[i]]$obj_single[-1])) + cb_model[[i]]$weights_path <- do.call(rbind, cb_model[[i]]$weights_path) } if (m == M) { diff --git a/man/colocboost_plot.Rd b/man/colocboost_plot.Rd index fe155fc..8e5c7df 100644 --- a/man/colocboost_plot.Rd +++ b/man/colocboost_plot.Rd @@ -18,8 +18,9 @@ colocboost_plot( plot_focal_cos_outcome_only = FALSE, points_color = "grey80", cos_color = NULL, - add_vertical = FALSE, - add_vertical_idx = NULL, + add_highlight = FALSE, + add_highlight_idx = NULL, + add_highlight_style = "vertical_lines", outcome_names = NULL, plot_cols = 2, variant_coord = FALSE, @@ -62,9 +63,11 @@ colocboost_plot( \item{cos_color}{Optional custom colors for CoS.} -\item{add_vertical}{Logical, if TRUE adds vertical lines at specified positions, default is FALSE} +\item{add_highlight}{Logical, if TRUE adds vertical lines at specified positions, default is FALSE} -\item{add_vertical_idx}{Optional indices for vertical lines.} +\item{add_highlight_idx}{Optional indices for add_highlight variables.} + +\item{add_highlight_style}{Optional style of add_highlight variables, default is "vertical_lines", other choice is "star" - red star.} \item{outcome_names}{Optional vector of outcomes names for the subtitle of each figure. \code{outcome_names=NULL} for the outcome name shown in \code{data_info}.} diff --git a/tests/testthat/test_plot.R b/tests/testthat/test_plot.R index 775d3fe..d49aace 100644 --- a/tests/testthat/test_plot.R +++ b/tests/testthat/test_plot.R @@ -176,7 +176,9 @@ test_that("colocboost_plot handles layout options", { test_that("colocboost_plot handles additional visualization options", { # Test with vertical line options - expect_error(suppressWarnings(colocboost_plot(cb_res, add_vertical = TRUE, add_vertical_idx = c(5, 10))), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res, add_highlight = TRUE, add_highlight_idx = c(5, 10))), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res, add_highlight = TRUE, add_highlight_idx = c(5, 10), add_highlight_style = "vertical_lines")), NA) + expect_error(suppressWarnings(colocboost_plot(cb_res, add_highlight = TRUE, add_highlight_idx = c(5, 10), add_highlight_style = "star")), NA) # Test with show_top_variables option expect_error(suppressWarnings(colocboost_plot(cb_res, show_top_variables = TRUE)), NA) diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R index a622540..d3d480d 100644 --- a/tests/testthat/test_utils.R +++ b/tests/testthat/test_utils.R @@ -85,12 +85,12 @@ test_that("w_cs correctly identifies confidence set for weight vector", { result <- w_cs(w, coverage = 0.8) # Expected result for 80% coverage: w[1] + w[2] = 0.8, so first 2 elements should be 1 - expected <- c(1, 1, 0, 0) + expected <- c(1,2) expect_equal(result, expected) # Test with different coverage result2 <- w_cs(w, coverage = 0.9) - expected2 <- c(1, 1, 1, 1) # First 4 elements cover 90% since 3 and 4 with the same 0.1 weight + expected2 <- c(1,2,3,4) # First 4 elements cover 90% since 3 and 4 with the same 0.1 weight expect_equal(result2, expected2) }) @@ -490,7 +490,7 @@ test_that("get_merge_ordered_with_indices handles conflicting variable orders co test_that("get_merge_ordered_with_indices handles edge cases", { # Test with NULL inputs - expect_equal(get_merge_ordered_with_indices(list()), character(0)) + expect_equal(get_merge_ordered_with_indices(list()), NULL) # Test with single vector expect_equal(get_merge_ordered_with_indices(list(c("A", "B", "C"))), c("A", "B", "C")) diff --git a/vignettes/ColocBoost_Wrapper_Pipeline.Rmd b/vignettes/ColocBoost_Wrapper_Pipeline.Rmd index 6b5a5a4..c23bc2e 100644 --- a/vignettes/ColocBoost_Wrapper_Pipeline.Rmd +++ b/vignettes/ColocBoost_Wrapper_Pipeline.Rmd @@ -63,7 +63,7 @@ outputs: The following example demonstrates how to set up input data with 3 phenotypes and 2 cohorts. The first cohort has 2 phenotypes and the second cohort has 1 phenotype. The first phenotype has 2 genes and the second phenotype has 1 gene. -```{r, data-loader-individual} +```{r, data-loader-individual, eval = FALSE} # Example of loading individual-level data region = "chr1:1000000-2000000" genotype_list = c("plink_cohort1.1", "plink_cohort1.2") @@ -84,7 +84,7 @@ xvar_cutoff = 0 imiss_cutoff = 0.9 # More advanced parameters see pecotmr::load_multitask_regional_data() - +\dontrun{ region_data_individual <- load_multitask_regional_data( region = region, genotype_list = genotype_list, @@ -102,7 +102,7 @@ region_data_individual <- load_multitask_regional_data( xvar_cutoff = xvar_cutoff, imiss_cutoff = imiss_cutoff ) - +} ``` @@ -127,7 +127,7 @@ outputs: The following example demonstrates how to set up input data with 2 summary statistics and one LD reference. -```{r, data-loader-sumstat} +```{r, data-loader-sumstat, eval = FALSE} # Example of loading summary statistics sumstat_path_list = c("sumstat1.tsv.gz", "sumstat2.tsv.gz") column_file_path_list = c("column_mapping_sumstat1.yml", "column_mapping_sumstat2.yml") @@ -143,7 +143,7 @@ n_controls = c(0, 40000) # More advanced parameters see pecotmr::load_multitask_regional_data() - +\dontrun{ region_data_sumstat <- load_multitask_regional_data( sumstat_path_list = sumstat_path_list, column_file_path_list = column_file_path_list, @@ -155,6 +155,7 @@ region_data_sumstat <- load_multitask_regional_data( n_cases = n_cases, n_controls = n_controls ) +} ``` @@ -222,8 +223,10 @@ Example: for sQTL, `list(type_pattern = ".*clu_(\\d+_[+-?]).*", valid_pattern = outputs: - **`colocboost_results`**: List of colocboost objects (with `xqtl_coloc`, `joint_gwas`, `separate_gwas`); Output of the `colocboost_analysis_pipeline` function. If the mode is not run, the corresponding element will be `NULL`. -```{r, colocboost-analysis} +```{r, colocboost-analysis, eval = FALSE} + # load in individual-level and sumstat data +\dontrun{ region_data_combined <- load_multitask_regional_data( region = region, genotype_list = genotype_list, @@ -249,6 +252,7 @@ region_data_combined <- load_multitask_regional_data( n_cases = n_cases, n_controls = n_controls ) +} maf_cutoff = 0.01 pip_cutoff_to_skip_ind = rep(0, length(phenotype_list)) @@ -277,4 +281,4 @@ colocboost_plot(colocboost_results$joint_gwas) for (i in 1:length(colocboost_results$separate_gwas)) { colocboost_plot(colocboost_results$separate_gwas[[i]]) } -``` \ No newline at end of file +``` diff --git a/vignettes/Visualization_ColocBoost_Output.Rmd b/vignettes/Visualization_ColocBoost_Output.Rmd index 1ffd7d9..8ed3e5b 100644 --- a/vignettes/Visualization_ColocBoost_Output.Rmd +++ b/vignettes/Visualization_ColocBoost_Output.Rmd @@ -94,9 +94,11 @@ There are three options available for plotting the CoS variants to uncolocalized colocboost_plot(res, show_cos_to_uncoloc = TRUE) ``` -### 2.4. Plot with an added vertical line +### 2.4. Plot with added highlight points -You can add a vertical line to the plot by setting `add_vertical = TRUE` and `add_vertical_idx = **`. This will add a vertical line at the specified index. +You can highlight specific variants in the plot by setting `add_highlight = TRUE` and `add_highlight_idx = **`. +This will add red dashed vertical lines (defult `add_highlight_style = "vertical_lines"`) at the specified index you want to highlight. +Alternatively, you can use `add_highlight_style = "star"` to change the highlight style to the red star for the specified variants. For example, to add a vertical line at true causal variants, you can set `add_vertical_idx = unique(unlist(Ind_5traits$true_effect_variants))`. Following plot also shows the top variants. @@ -104,8 +106,9 @@ Following plot also shows the top variants. ```{r vertical-plot} colocboost_plot( res, show_top_variables = TRUE, - add_vertical = TRUE, - add_vertical_idx = unique(unlist(Ind_5traits$true_effect_variants)) + add_highlight = TRUE, + add_highlight_idx = unique(unlist(Ind_5traits$true_effect_variants)), + add_highlight_style = "star" ) ```