From f0ec689822426ab2743d7be27520dc62eebbc10b Mon Sep 17 00:00:00 2001 From: jonperdomo Date: Tue, 1 Apr 2025 09:16:53 -0400 Subject: [PATCH 1/2] calculate tin with strandedness --- include/tin.h | 2 +- src/hts_reader.cpp | 4 ++-- src/tin.cpp | 19 ++++++++++++++++--- tests/test_general.py | 6 +++--- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/include/tin.h b/include/tin.h index 195596e..fd4c330 100644 --- a/include/tin.h +++ b/include/tin.h @@ -17,7 +17,7 @@ typedef std::unordered_map getReadDepths(htsFile* bam_file, hts_idx_t* idx, bam_hdr_t* header, std::string chr, int start, int end); +std::unordered_map getReadDepths(htsFile* bam_file, hts_idx_t* idx, bam_hdr_t* header, std::string chr, int start, int end, bool transcript_strand); bool checkMinReads(htsFile* bam_file, hts_idx_t* idx, bam_hdr_t* header, std::string chr, int start, int end, int min_reads); diff --git a/src/hts_reader.cpp b/src/hts_reader.cpp index 70a6410..844fbc0 100644 --- a/src/hts_reader.cpp +++ b/src/hts_reader.cpp @@ -81,10 +81,10 @@ int HTSReader::updateReadAndBaseCounts(bam1_t* record, Basic_Seq_Statistics& bas break; case 'N': basic_qc.total_n_cnt++; - std::cerr << "Warning: N base found in read " << bam_get_qname(record) << std::endl; + // std::cerr << "Warning: N base found in read " << bam_get_qname(record) << std::endl; break; default: - printError("Invalid base: " + std::to_string(base)); + // printError("Invalid base: " + std::to_string(base)); break; } } diff --git a/src/tin.cpp b/src/tin.cpp index 3beae15..d31650f 100644 --- a/src/tin.cpp +++ b/src/tin.cpp @@ -13,7 +13,7 @@ #include "tin_stats.h" -std::unordered_map getReadDepths(htsFile* bam_file, hts_idx_t* idx, bam_hdr_t* header, std::string chr, int start, int end) +std::unordered_map getReadDepths(htsFile* bam_file, hts_idx_t* idx, bam_hdr_t* header, std::string chr, int start, int end, bool transcript_strand) { // Set up the region to fetch reads (1-based) std::string region = chr + ":" + std::to_string(start) + "-" + std::to_string(end); @@ -46,6 +46,13 @@ std::unordered_map getReadDepths(htsFile* bam_file, hts_idx_t* idx, ba continue; } + // Skip if on a different strand + bool read_strand = (record->core.flag & BAM_FREVERSE) ? false : true; + if (read_strand != transcript_strand) { + skip_count++; + continue; + } + read_count++; // Clear positions for each read @@ -242,6 +249,12 @@ void calculateTIN(TINStats* tin_stats, const std::string& gene_bed, const std::s continue; } + std::cout << "Calculating TIN for transcript " << name << " with strand " << strand << std::endl; + bool strand_bool = true; + if (strand == "-") { + strand_bool = false; + } + // Remove the last comma from the exon sizes and starts strings if (exon_sizes_str.back() == ',') { exon_sizes_str.pop_back(); @@ -278,7 +291,7 @@ void calculateTIN(TINStats* tin_stats, const std::string& gene_bed, const std::s int exon_end = exon_start + exon_sizes[i] - 1; // Get the depths and cumulative depths for the region - std::unordered_map exon_depths = getReadDepths(bam_file, index, header, chrom, exon_start, exon_end); + std::unordered_map exon_depths = getReadDepths(bam_file, index, header, chrom, exon_start, exon_end, strand_bool); for (const auto& depth : exon_depths) { C[depth.first] = depth.second; } @@ -287,7 +300,7 @@ void calculateTIN(TINStats* tin_stats, const std::string& gene_bed, const std::s // Get the read depths for the transcript start+1 and end std::vector transcript_positions = {start + 1, end}; for (const auto& position : transcript_positions) { - std::unordered_map transcript_depths = getReadDepths(bam_file, index, header, chrom, position, position); + std::unordered_map transcript_depths = getReadDepths(bam_file, index, header, chrom, position, position, strand_bool); for (const auto& depth : transcript_depths) { C[depth.first] = depth.second; } diff --git a/tests/test_general.py b/tests/test_general.py index 98b2562..c0a8420 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -755,19 +755,19 @@ def test_tin_mean(self, rnaseq_bam_output): output_statistics = rnaseq_bam_output[1] input_file = rnaseq_bam_output[2] tin_mean = output_statistics.getTINMean(input_file) - assert round(tin_mean, 1) == 63.6 + assert round(tin_mean, 1) == 60.2 @pytest.mark.dependency(depends=["TestRNASeqBAM::test_success"]) def test_tin_median(self, rnaseq_bam_output): output_statistics = rnaseq_bam_output[1] input_file = rnaseq_bam_output[2] tin_median = output_statistics.getTINMedian(input_file) - assert round(tin_median, 1) == 83.7 + assert round(tin_median, 1) == 70.0 @pytest.mark.dependency(depends=["TestRNASeqBAM::test_success"]) def test_tin_stddev(self, rnaseq_bam_output): output_statistics = rnaseq_bam_output[1] input_file = rnaseq_bam_output[2] tin_stddev = output_statistics.getTINStdDev(input_file) - assert round(tin_stddev, 1) == 32.6 + assert round(tin_stddev, 1) == 31.0 \ No newline at end of file From 27a26f28ce8f373ba52ecc2b21833d9fd77c80e3 Mon Sep 17 00:00:00 2001 From: jonperdomo Date: Tue, 1 Apr 2025 10:01:15 -0400 Subject: [PATCH 2/2] bam index error handling --- src/tin.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/tin.cpp b/src/tin.cpp index d31650f..da945a1 100644 --- a/src/tin.cpp +++ b/src/tin.cpp @@ -201,10 +201,13 @@ void calculateTIN(TINStats* tin_stats, const std::string& gene_bed, const std::s // Get the index for the BAM file hts_idx_t* index = sam_index_load(bam_file, bam_filepath.c_str()); + if (index == NULL) { + std::cerr << "Error loading BAM index" << std::endl; + exit(1); + } // Read the gene BED file std::ifstream gene_bed_file(gene_bed); - if (!gene_bed_file.is_open()) { std::cerr << "Error opening gene BED file" << std::endl; exit(1);