Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions R/LD.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ extract_file_paths <- function(genomic_data, intersection_rows, column_to_extrac
#' @importFrom dplyr select
#' @importFrom vroom vroom
#' @noRd
get_regional_ld_meta <- function(ld_reference_meta_file, region, complete_coverage_required = FALSE) {
genomic_data <- vroom(ld_reference_meta_file)
get_regional_ld_meta <- function(ld_reference_meta_file, region, complete_coverage_required = FALSE, num_threads = 1) {
genomic_data <- vroom(ld_reference_meta_file, num_threads = num_threads)
region <- parse_region(region)
# Set column names
names(genomic_data) <- c("chrom", "start", "end", "path")
Expand Down Expand Up @@ -318,9 +318,9 @@ create_combined_LD_matrix <- function(LD_matrices, variants) {
#' further partitioning if needed.}
#' }
#' @export
load_LD_matrix <- function(LD_meta_file_path, region, extract_coordinates = NULL) {
load_LD_matrix <- function(LD_meta_file_path, region, extract_coordinates = NULL, num_threads = 1) {
# Intersect LD metadata with specified regions using updated function
intersected_LD_files <- get_regional_ld_meta(LD_meta_file_path, region)
intersected_LD_files <- get_regional_ld_meta(LD_meta_file_path, region, num_threads = num_threads)

# Extract file paths for LD and bim files
LD_file_paths <- intersected_LD_files$intersections$LD_file_paths
Expand Down Expand Up @@ -403,7 +403,7 @@ load_LD_matrix <- function(LD_meta_file_path, region, extract_coordinates = NULL
#' @importFrom dplyr select group_by summarise
#' @importFrom vroom vroom
#' @export
filter_variants_by_ld_reference <- function(variant_ids, ld_reference_meta_file, keep_indel = TRUE) {
filter_variants_by_ld_reference <- function(variant_ids, ld_reference_meta_file, keep_indel = TRUE, num_threads = 1) {
# Step 1: Process variant IDs into a data frame and filter out non-standard nucleotides
variants_df <- do.call(rbind, lapply(strsplit(variant_ids, ":"), function(x) {
data.frame(chrom = x[1], pos = as.integer(x[2]), ref = x[3], alt = x[4])
Expand All @@ -418,11 +418,11 @@ filter_variants_by_ld_reference <- function(variant_ids, ld_reference_meta_file,
group_by(chrom) %>%
summarise(start = min(pos), end = max(pos))
# Step 3: Call get_regional_ld_meta to get bim_file_paths
bim_file_paths <- get_regional_ld_meta(ld_reference_meta_file, region_df)$intersections$bim_file_paths
bim_file_paths <- get_regional_ld_meta(ld_reference_meta_file, region_df, num_threads = num_threads)$intersections$bim_file_paths

# Step 4: Load bim files and consolidate into a single data frame
bim_data <- lapply(bim_file_paths, function(path) {
bim_df <- vroom(path, col_names = FALSE)
bim_df <- vroom(path, col_names = FALSE, num_threads = num_threads)
data.frame(chrom = bim_df$X1, pos = bim_df$X4, stringsAsFactors = FALSE)
}) %>%
do.call("rbind", .)
Expand Down
17 changes: 15 additions & 2 deletions R/allele_qc.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,22 +76,29 @@ allele_qc <- function(target_data, ref_variants, col_to_flip = NULL,
} else {
target_data <- variant_id_to_df(target_data)
}

ref_variants <- variant_id_to_df(ref_variants)

columns_to_remove <- c("chromosome", "position", "ref", "alt", "variant_id")

# Check if any of the specified columns are present
if (any(columns_to_remove %in% colnames(target_data))) {
target_data <- select(target_data, -any_of(columns_to_remove))
}

match_result <- merge(target_data, ref_variants, by = c("chrom", "pos"), all = FALSE, suffixes = c(".target", ".ref")) %>% as.data.frame()
if (nrow(match_result) == 0) {
warning("No matching variants found between target data and reference variants.")
return(list(target_data_qced = match_result, qc_summary = match_result))
}
# match target & ref by chrom and position

# match target & ref by chrom and position
match_result = match_result %>%
mutate(variants_id_original = paste(chrom, pos, A2.target, A1.target, sep = ":")) %>%
mutate(variants_id_qced = paste(chrom, pos, A2.ref, A1.ref, sep = ":")) %>%
mutate(variants_id_qced = paste(chrom, pos, A2.ref, A1.ref, sep = ":"))


match_result = match_result %>%
# filter out totally same rows.
filter(duplicated(.) | !duplicated(.)) %>%
# upper case target/reference A1 A2
Expand Down Expand Up @@ -175,12 +182,18 @@ allele_qc <- function(target_data, ref_variants, col_to_flip = NULL,
}
}

# Normalize variant_id to chr{chrom}:{pos}:{A2}:{A1} format
result$variant_id <- normalize_variant_id(result$variant_id)

if (!remove_unmatched) {
match_variant <- result %>% pull(variants_id_original)
match_result <- select(match_result, -(flip1.ref:keep)) %>%
select(-variants_id_original, -A1.target, -A2.target) %>%
rename(A1 = A1.ref, A2 = A2.ref, variant_id = variants_id_qced)
# Normalize variant_id to chr{chrom}:{pos}:{A2}:{A1} format
match_result$variant_id <- normalize_variant_id(match_result$variant_id)
target_data <- target_data %>% mutate(variant_id = paste(chrom, pos, A2, A1, sep = ":"))
target_data$variant_id <- normalize_variant_id(target_data$variant_id)
if (length(setdiff(target_data %>% pull(variant_id), match_variant)) > 0) {
unmatch_data <- target_data %>% filter(!variant_id %in% match_variant)
result <- rbind(result, unmatch_data %>% mutate(variants_id_original = variant_id))
Expand Down
Loading
Loading