Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/tin.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ typedef std::unordered_map<std::string, std::tuple<std::string, int, int, double
// (Reference: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-016-0922-z#Sec11)
void calculateTIN(TINStats* tin_stats, const std::string& gene_bed, const std::string& bam_filepath, int min_cov, int sample_size, const std::string& output_folder, int thread_count);

std::unordered_map<int, int> getReadDepths(htsFile* bam_file, hts_idx_t* idx, bam_hdr_t* header, std::string chr, int start, int end);
std::unordered_map<int, int> getReadDepths(htsFile* bam_file, hts_idx_t* idx, bam_hdr_t* header, std::string chr, int start, int end, bool transcript_strand);

bool checkMinReads(htsFile* bam_file, hts_idx_t* idx, bam_hdr_t* header, std::string chr, int start, int end, int min_reads);

Expand Down
4 changes: 2 additions & 2 deletions src/hts_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,10 @@ int HTSReader::updateReadAndBaseCounts(bam1_t* record, Basic_Seq_Statistics& bas
break;
case 'N':
basic_qc.total_n_cnt++;
std::cerr << "Warning: N base found in read " << bam_get_qname(record) << std::endl;
// std::cerr << "Warning: N base found in read " << bam_get_qname(record) << std::endl;
break;
default:
printError("Invalid base: " + std::to_string(base));
// printError("Invalid base: " + std::to_string(base));
break;
}
}
Expand Down
24 changes: 20 additions & 4 deletions src/tin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

#include "tin_stats.h"

std::unordered_map<int, int> getReadDepths(htsFile* bam_file, hts_idx_t* idx, bam_hdr_t* header, std::string chr, int start, int end)
std::unordered_map<int, int> getReadDepths(htsFile* bam_file, hts_idx_t* idx, bam_hdr_t* header, std::string chr, int start, int end, bool transcript_strand)
{
// Set up the region to fetch reads (1-based)
std::string region = chr + ":" + std::to_string(start) + "-" + std::to_string(end);
Expand Down Expand Up @@ -46,6 +46,13 @@ std::unordered_map<int, int> getReadDepths(htsFile* bam_file, hts_idx_t* idx, ba
continue;
}

// Skip if on a different strand
bool read_strand = (record->core.flag & BAM_FREVERSE) ? false : true;
if (read_strand != transcript_strand) {
skip_count++;
continue;
}

read_count++;

// Clear positions for each read
Expand Down Expand Up @@ -194,10 +201,13 @@ void calculateTIN(TINStats* tin_stats, const std::string& gene_bed, const std::s

// Get the index for the BAM file
hts_idx_t* index = sam_index_load(bam_file, bam_filepath.c_str());
if (index == NULL) {
std::cerr << "Error loading BAM index" << std::endl;
exit(1);
}

// Read the gene BED file
std::ifstream gene_bed_file(gene_bed);

if (!gene_bed_file.is_open()) {
std::cerr << "Error opening gene BED file" << std::endl;
exit(1);
Expand Down Expand Up @@ -242,6 +252,12 @@ void calculateTIN(TINStats* tin_stats, const std::string& gene_bed, const std::s
continue;
}

std::cout << "Calculating TIN for transcript " << name << " with strand " << strand << std::endl;
bool strand_bool = true;
if (strand == "-") {
strand_bool = false;
}

// Remove the last comma from the exon sizes and starts strings
if (exon_sizes_str.back() == ',') {
exon_sizes_str.pop_back();
Expand Down Expand Up @@ -278,7 +294,7 @@ void calculateTIN(TINStats* tin_stats, const std::string& gene_bed, const std::s
int exon_end = exon_start + exon_sizes[i] - 1;

// Get the depths and cumulative depths for the region
std::unordered_map<int, int> exon_depths = getReadDepths(bam_file, index, header, chrom, exon_start, exon_end);
std::unordered_map<int, int> exon_depths = getReadDepths(bam_file, index, header, chrom, exon_start, exon_end, strand_bool);
for (const auto& depth : exon_depths) {
C[depth.first] = depth.second;
}
Expand All @@ -287,7 +303,7 @@ void calculateTIN(TINStats* tin_stats, const std::string& gene_bed, const std::s
// Get the read depths for the transcript start+1 and end
std::vector<int> transcript_positions = {start + 1, end};
for (const auto& position : transcript_positions) {
std::unordered_map<int, int> transcript_depths = getReadDepths(bam_file, index, header, chrom, position, position);
std::unordered_map<int, int> transcript_depths = getReadDepths(bam_file, index, header, chrom, position, position, strand_bool);
for (const auto& depth : transcript_depths) {
C[depth.first] = depth.second;
}
Expand Down
6 changes: 3 additions & 3 deletions tests/test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,19 +755,19 @@ def test_tin_mean(self, rnaseq_bam_output):
output_statistics = rnaseq_bam_output[1]
input_file = rnaseq_bam_output[2]
tin_mean = output_statistics.getTINMean(input_file)
assert round(tin_mean, 1) == 63.6
assert round(tin_mean, 1) == 60.2

@pytest.mark.dependency(depends=["TestRNASeqBAM::test_success"])
def test_tin_median(self, rnaseq_bam_output):
output_statistics = rnaseq_bam_output[1]
input_file = rnaseq_bam_output[2]
tin_median = output_statistics.getTINMedian(input_file)
assert round(tin_median, 1) == 83.7
assert round(tin_median, 1) == 70.0

@pytest.mark.dependency(depends=["TestRNASeqBAM::test_success"])
def test_tin_stddev(self, rnaseq_bam_output):
output_statistics = rnaseq_bam_output[1]
input_file = rnaseq_bam_output[2]
tin_stddev = output_statistics.getTINStdDev(input_file)
assert round(tin_stddev, 1) == 32.6
assert round(tin_stddev, 1) == 31.0

Loading