From 9e786ac8ab4382049ad758c5e3cbf346d5630866 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Mon, 13 Jan 2025 08:35:43 +0100 Subject: [PATCH 1/2] make gitignore more comprehensive --- .gitignore | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.gitignore b/.gitignore index 27e213de80..f6d59b9041 100755 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.tar.gz +*.npz *.py[cod] # C extensions @@ -60,3 +61,12 @@ env.yml # Rust stuff Cargo.lock target* + +# snakemake +.snakemake +*.smk + +# jupyter notebooks +.ipynb_checkpoints +*.ipynb_checkpoints +*.ipynb \ No newline at end of file From b789d1db8f489301740bbd51cf66bb564eea3246 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Fri, 17 Jan 2025 16:08:53 +0100 Subject: [PATCH 2/2] init alignmentSieve --- deeptools4.0.0_changes.md | 28 +- pydeeptools/deeptools/alignmentSieve2.py | 363 +++++++++++++++++++++++ pyproject.toml | 1 + src/alignmentsieve.rs | 295 ++++++++++++++++++ src/bamcoverage.rs | 4 +- src/computematrix.rs | 8 +- src/covcalc.rs | 4 +- src/filehandler.rs | 4 +- src/lib.rs | 1 + 9 files changed, 687 insertions(+), 21 deletions(-) create mode 100644 pydeeptools/deeptools/alignmentSieve2.py diff --git a/deeptools4.0.0_changes.md b/deeptools4.0.0_changes.md index 909f155eb2..37ea18f016 100644 --- a/deeptools4.0.0_changes.md +++ b/deeptools4.0.0_changes.md @@ -1,4 +1,6 @@ -# computeMatrix +# Changes + +## computeMatrix - --sortRegions 'no' option no longer exists - Sorting ascend / descend no longer has subsorting by position. @@ -6,17 +8,14 @@ - bed files in computeMatrix no longer support '#' to define groups. - 'chromosome matching' i.e. chr1 <-> 1, chrMT <-> MT is no longer performed. -# normalization +## normalization -Exactscaling is no longer an option, it's always performed. + - Exactscaling is no longer an option, it's always performed. -# Todo - - - allow multithreaded bw writing - - properly divide region work over threads -> region sorting & taking size into account - - calc for computeMatrix functions -> Struct / Enum - - filehanlder bed file could all be &str, not clones +## alignmentSieve +- options label, smartLabels, genomeChunkLength are removed. +- ignoreDuplicates is removed, and (if wanted) should be set by the SamFlagExclude setting. # Testing @@ -28,4 +27,13 @@ Exactscaling is no longer an option, it's always performed. - skipZeros - duplicate renaming _r1, _r2, ... - GTF, BED3, BED6, BED12, mixedBED (?) - - scaleRegions, un5, un3, regionbodylength, metagene \ No newline at end of file + - scaleRegions, un5, un3, regionbodylength, metagene + + ## alignmentSieve + + - unmapped reads to unfiltered_out + +# Todo + +- AlignmentSieve: Shift, Bed, Optimization. +- bamCoverage / bamCompare: filtering, extend. \ No newline at end of file diff --git a/pydeeptools/deeptools/alignmentSieve2.py b/pydeeptools/deeptools/alignmentSieve2.py new file mode 100644 index 0000000000..49957f81ca --- /dev/null +++ b/pydeeptools/deeptools/alignmentSieve2.py @@ -0,0 +1,363 @@ +#!/usr/bin/env python +import argparse +import pysam +import os +import sys + +from deeptools import parserCommon +from deeptools.bamHandler import openBam +from deeptools.mapReduce import mapReduce +from deeptools.utilities import getTLen, smartLabels, getTempFileName +from importlib.metadata import version +from deeptools.hp import r_alignmentsieve + +def parseArguments(): + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description="This tool filters alignments in a BAM/CRAM file according the the specified parameters. It can optionally output to BEDPE format.", + usage='alignmentSieve -b sample1.bam -o sample1.filtered.bam --minMappingQuality 10 --filterMetrics log.txt\n' + 'help: alignmentSieve -h / alignmentSieve --help') + + required = parser.add_argument_group('Required arguments') + required.add_argument('--bam', '-b', + metavar='FILE1', + help='An indexed BAM file.', + required=True) + + required.add_argument('--outFile', '-o', + help='The file to write results to. These are the alignments or fragments that pass the filtering criteria.') + + general = parser.add_argument_group('General arguments') + general.add_argument('--numberOfProcessors', '-p', + help='Number of processors to use. Type "max/2" to ' + 'use half the maximum number of processors or "max" ' + 'to use all available processors. (Default: %(default)s)', + metavar="INT", + type=parserCommon.numberOfProcessors, + default=1, + required=False) + + general.add_argument('--filterMetrics', + metavar="FILE.log", + default="None", + help="The number of entries in total and filtered are saved to this file") + + general.add_argument('--filteredOutReads', + metavar="filtered.bam", + default="None", + help="If desired, all reads NOT passing the filtering criteria can be written to this file.") + + general.add_argument('--verbose', '-v', + help='Set to see processing messages.', + action='store_true') + + general.add_argument('--version', action='version', + version='%(prog)s {}'.format(version('deeptools'))) + + general.add_argument('--shift', + nargs='+', + type=int, + help='Shift the left and right end of a read (for BAM files) or a fragment (for BED files). A positive value shift an end to the right (on the + strand) and a negative value shifts a fragment to the left. Either 2 or 4 integers can be provided. For example, "2 -3" will shift the left-most fragment end two bases to the right and the right-most end 3 bases to the left. If 4 integers are provided, then the first and last two refer to fragments whose read 1 is on the left or right, respectively. Consequently, it is possible to take strand into consideration for strand-specific protocols. A fragment whose length falls below 1 due to shifting will not be written to the output. See the online documentation for graphical examples. Note that non-properly-paired reads will be filtered.') + + general.add_argument('--ATACshift', + action='store_true', + help='Shift the produced BAM file or BEDPE regions as commonly done for ATAC-seq. This is equivalent to --shift 4 -5 5 -4.') + + output = parser.add_argument_group('Output arguments') + output.add_argument('--BED', + action='store_true', + help='Instead of producing BAM files, write output in BEDPE format (as defined by MACS2). Note that only reads/fragments passing filtering criterion are written in BEDPE format.') + + filtering = parser.add_argument_group('Optional arguments') + + filtering.add_argument('--filterRNAstrand', + help='Selects RNA-seq reads (single-end or paired-end) in ' + 'the given strand. (Default: %(default)s)', + choices=['forward', 'reverse', 'None'], + default='None') + + filtering.add_argument('--minMappingQuality', + metavar='INT', + help='If set, only reads that have a mapping ' + 'quality score of at least this are ' + 'considered.', + default=0, + type=int) + + filtering.add_argument('--samFlagInclude', + help='Include reads based on the SAM flag. For example, ' + 'to get only reads that are the first mate, use a flag of 64. ' + 'This is useful to count properly paired reads only once, ' + 'as otherwise the second mate will be also considered for the ' + 'coverage.', + metavar='INT', + type=int, + default=0, + required=False) + + filtering.add_argument('--samFlagExclude', + help='Exclude reads based on the SAM flag. For example, ' + 'to get only reads that map to the forward strand, use ' + '--samFlagExclude 16, where 16 is the SAM flag for reads ' + 'that map to the reverse strand.', + metavar='INT', + default=0, + type=int, + required=False) + + filtering.add_argument('--blackListFileName', '-bl', + help="A BED or GTF file containing regions that should be excluded from all analyses. Currently this works by rejecting genomic chunks that happen to overlap an entry. Consequently, for BAM files, if a read partially overlaps a blacklisted region or a fragment spans over it, then the read/fragment might still be considered. Please note that you should adjust the effective genome size, if relevant.", + metavar="BED file", + nargs="+", + default="None", + required=False) + + filtering.add_argument('--minFragmentLength', + help='The minimum fragment length needed for read/pair ' + 'inclusion. This option is primarily useful ' + 'in ATACseq experiments, for filtering mono- or ' + 'di-nucleosome fragments. (Default: %(default)s)', + metavar='INT', + default=0, + type=int, + required=False) + + filtering.add_argument('--maxFragmentLength', + help='The maximum fragment length needed for read/pair ' + 'inclusion. A value of 0 indicates no limit. (Default: %(default)s)', + metavar='INT', + default=0, + type=int, + required=False) + + return parser + + +def shiftRead(b, chromDict, args): + if not b.is_proper_pair: + return None + tLen = getTLen(b, notAbs=True) + start = b.pos + end = start + b.query_alignment_end + if b.is_reverse and not b.is_read2: + end -= args.shift[2] + deltaTLen = args.shift[3] - args.shift[2] + elif b.is_reverse and b.is_read2: + end += args.shift[1] + deltaTLen = args.shift[1] - args.shift[0] + elif not b.is_reverse and not b.is_read2: + start += args.shift[0] + deltaTLen = args.shift[1] - args.shift[0] + else: + start -= args.shift[3] + deltaTLen = args.shift[3] - args.shift[2] + + # Sanity check + if end - start < 1: + if b.is_reverse: + start = end - 1 + else: + end = start + 1 + if start < 0: + start = 0 + if end > chromDict[b.reference_name]: + end = chromDict[b.reference_name] + if end - start < 1: + return None + + # create a new read + b2 = pysam.AlignedSegment() + b2.query_name = b.query_name + b2.flag = b.flag + b2.reference_id = b.reference_id + b2.reference_start = start + b2.mapping_quality = b.mapping_quality + b2.cigar = ((0, end - start),) # Returned cigar is only matches + if tLen < 0: + b2.template_length = tLen - deltaTLen + else: + b2.template_length = tLen + deltaTLen + b2.next_reference_id = b.next_reference_id + b2.next_reference_start = b.next_reference_start + if b.is_proper_pair: + if b2.is_read2 and b2.is_reverse: + b2.next_reference_start += args.shift[0] + elif not b2.is_read2 and b2.is_reverse: + b2.next_reference_start -= args.shift[3] + + return b2 + + +def filterWorker(arglist): + chrom, start, end, args, chromDict = arglist + fh = openBam(args.bam) + mode = 'wb' + oname = getTempFileName(suffix='.bam') + if args.filteredOutReads: + onameFiltered = getTempFileName(suffix='.bam') + else: + onameFiltered = None + ofh = pysam.AlignmentFile(oname, mode=mode, template=fh) + if onameFiltered: + ofiltered = pysam.AlignmentFile(onameFiltered, mode=mode, template=fh) + else: + ofiltered = None + + prev_pos = set() + lpos = None + + nFiltered = 0 + total = 0 + for read in fh.fetch(chrom, start, end): + if read.pos < start: + # ensure that we never double count (in case distanceBetweenBins == 0) + continue + + total += 1 + if read.flag & 4: + # Ignore unmapped reads, they were counted already + nFiltered += 1 + if ofiltered: + ofiltered.write(read) + continue + + if args.minMappingQuality and read.mapq < args.minMappingQuality: + nFiltered += 1 + if ofiltered: + ofiltered.write(read) + continue + + if args.samFlagInclude and read.flag & args.samFlagInclude != args.samFlagInclude: + nFiltered += 1 + if ofiltered: + ofiltered.write(read) + continue + if args.samFlagExclude and read.flag & args.samFlagExclude != 0: + nFiltered += 1 + if ofiltered: + ofiltered.write(read) + continue + + tLen = getTLen(read) + if args.minFragmentLength > 0 and tLen < args.minFragmentLength: + nFiltered += 1 + if ofiltered: + ofiltered.write(read) + continue + if args.maxFragmentLength > 0 and tLen > args.maxFragmentLength: + nFiltered += 1 + if ofiltered: + ofiltered.write(read) + continue + + if args.ignoreDuplicates: + # Assuming more or less concordant reads, use the fragment bounds, otherwise the start positions + if tLen >= 0: + s = read.pos + e = s + tLen + else: + s = read.pnext + e = s - tLen + if read.reference_id != read.next_reference_id: + e = read.pnext + if lpos is not None and lpos == read.reference_start \ + and (s, e, read.next_reference_id, read.is_reverse) in prev_pos: + nFiltered += 1 + if ofiltered: + ofiltered.write(read) + continue + if lpos != read.reference_start: + prev_pos.clear() + lpos = read.reference_start + prev_pos.add((s, e, read.next_reference_id, read.is_reverse)) + + # filterRNAstrand + if args.filterRNAstrand: + if read.is_paired: + if args.filterRNAstrand == 'forward': + if read.flag & 144 == 128 or read.flag & 96 == 64: + pass + else: + nFiltered += 1 + if ofiltered: + ofiltered.write(read) + continue + elif args.filterRNAstrand == 'reverse': + if read.flag & 144 == 144 or read.flag & 96 == 96: + pass + else: + nFiltered += 1 + if ofiltered: + ofiltered.write(read) + continue + else: + if args.filterRNAstrand == 'forward': + if read.flag & 16 == 16: + pass + else: + nFiltered += 1 + if ofiltered: + ofiltered.write(read) + continue + elif args.filterRNAstrand == 'reverse': + if read.flag & 16 == 0: + pass + else: + nFiltered += 1 + if ofiltered: + ofiltered.write(read) + continue + + if args.shift: + read = shiftRead(read, chromDict, args) + if not read: + continue + + # Read survived filtering + ofh.write(read) + + # The results from the workers will get sorted, so get the TID + tid = fh.get_tid(chrom) + + ofh.close() + if ofiltered: + ofiltered.close() + fh.close() + return tid, start, total, nFiltered, oname, onameFiltered + + +def main(args=None): + args = parseArguments().parse_args(args) + if args.shift: + if len(args.shift) not in [2, 4]: + sys.exit("The --shift option can accept either 2 or 4 values only.") + if len(args.shift) == 2: + args.shift.extend([-args.shift[1], -args.shift[0]]) + else: + args.shift = [] + if args.ATACshift: + if args.shift: + print("Warning! The --ATACshift option is used, but a --shift option is provided as well. The latter will be ignored in favor of 4 -5 5 -4.") + args.shift = [4, -5, 5, -4] + + # Remove args: + # label, smartLabels, genomeChunkLength, ignoreDuplicates. + + print(args) + r_alignmentsieve( + args.bam, + args.outFile, + args.numberOfProcessors, + args.filterMetrics, + args.filteredOutReads, + args.verbose, + args.shift, + args.BED, + args.filterRNAstrand, + args.minMappingQuality, + args.samFlagInclude, + args.samFlagExclude, + args.blackListFileName, + args.minFragmentLength, + args.maxFragmentLength, + ) diff --git a/pyproject.toml b/pyproject.toml index 5d7f9c8849..b45ecb2153 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,3 +86,4 @@ plotProfile = "deeptools.plotProfile:main" bamCoverage2 = "deeptools.bamCoverage2:main" bamCompare2 = "deeptools.bamCompare2:main" computeMatrix2 = "deeptools.computeMatrix2:main" +alignmentSieve2 = "deeptools.alignmentSieve2:main" \ No newline at end of file diff --git a/src/alignmentsieve.rs b/src/alignmentsieve.rs index e69de29bb2..195179d93d 100644 --- a/src/alignmentsieve.rs +++ b/src/alignmentsieve.rs @@ -0,0 +1,295 @@ +use bigtools::utils::misc::Name; +use flate2::write; +use pyo3::prelude::*; +use pyo3::types::PyList; +use rayon::prelude::*; +use rayon::ThreadPoolBuilder; +use rust_htslib::bam::{self, record, Header, IndexedReader, Read, Reader, Writer}; +use tempfile::{Builder, TempPath, NamedTempFile}; +use std::fs::File; +use std::io::Write; +use crate::covcalc::{parse_regions, Alignmentfilters}; + +#[pyfunction] +pub fn r_alignmentsieve( + bamifile: &str, // input bamfile + ofile: &str, // output file + nproc: usize, // threads + filter_metrics: &str, // filter metrics file. + filtered_out_readsfile: &str, // filtered_out_reads bam/bedfile. + verbose: bool, // verbose + shift: Py, // python list of the shift to perform. + _bed: bool, // output format in BEDPE. + filter_rna_strand: &str, // "forward", "reverse" or "None". + min_mapping_quality: u8, // minimum mapping quality. + sam_flag_incl: u16, // sam flag include + sam_flag_excl: u16, // sam flag exclude + _blacklist: &str, // blacklist file name. + min_fragment_length: u32, // minimum fragment length. + max_fragment_length: u32, // maximum fragment length. +) -> PyResult<()> { + // Input bam file + let mut bam = Reader::from_path(bamifile).unwrap(); + let header = Header::from_template(bam.header()); + let header_view = bam.header().clone(); + + let mut write_filters: bool = false; + if filtered_out_readsfile != "None" { + write_filters = true; + } + let mut readshift: Vec = Vec::new(); + Python::with_gil(|py| { + readshift = shift.extract(py).expect("Failed to extract shift."); + }); + // shift is of length 0, 2, or 4. + + // Define regions + let (regions, chromsizes) = parse_regions(&Vec::new(), bamifile); + + let filters = Alignmentfilters{ + minmappingquality: min_mapping_quality, + samflaginclude: sam_flag_incl, + samflagexclude: sam_flag_excl, + minfraglen: min_fragment_length, + maxfraglen: max_fragment_length + }; + let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap(); + let (sieve, filtersieve, totalreads, filteredreads) = pool.install(|| { + regions.par_iter() + .map(|i| sieve_bamregion(bamifile, i, &filters, filter_rna_strand, &readshift, write_filters, nproc, verbose)) + .reduce( + || (Vec::new(), Vec::new(), 0, 0), + |(mut _sieve, mut _filtersieve, mut _total, mut _filter), (sieve, filtersieve, total, filter)| { + _sieve.extend(sieve); + _filtersieve.extend(filtersieve); + _total += total; + _filter += filter; + (_sieve, _filtersieve, _total, _filter) + } + ) + }); + + // write output + let mut obam = Writer::from_path(ofile, &header, bam::Format::Bam).unwrap(); + obam.set_threads(nproc); + for sb in sieve.into_iter() { + if let Some(sb) = sb { + let mut bam = Reader::from_path(&sb).unwrap(); + for result in bam.records() { + let record = result.unwrap(); + obam.write(&record).unwrap(); + } + } + } + // write filtered reads if necessary + if write_filters { + let mut ofilterbam = Writer::from_path(filtered_out_readsfile, &header, bam::Format::Bam).unwrap(); + ofilterbam.set_threads(nproc); + for sb in filtersieve.into_iter() { + if let Some(sb) = sb { + let mut bam = Reader::from_path(&sb).unwrap(); + for result in bam.records() { + let record = result.unwrap(); + ofilterbam.write(&record).unwrap(); + } + } + } + } + + let mut ofilterbam = Writer::from_path(filtered_out_readsfile, &header, bam::Format::Bam).unwrap(); + + if filter_metrics != "None" { + let mut of = File::create(filter_metrics).unwrap(); + // write header + writeln!(of, "#bamFilterReads --filterMetrics").unwrap(); + writeln!(of, "#File\tReads\tRemaining Total\tInitial Reads").unwrap(); + writeln!(of, "{}\t{}\t{}", bamifile, totalreads-filteredreads, totalreads).unwrap(); + } + + Ok(()) +} + + +fn sieve_bamregion(ibam: &str, region: &(String, u32, u32), alfilters: &Alignmentfilters, filter_rna_strand: &str, shift: &Vec, write_filters: bool, nproc: usize, verbose: bool) -> (Vec>, Vec>, u64, u64) { + let mut total_reads: u64 = 0; + let mut filtered_reads: u64 = 0; + let mut bam = IndexedReader::from_path(ibam).unwrap(); + let header = Header::from_template(bam.header()); + + let mut written = false; + let mut filterwritten = false; + + let sievebam = Builder::new() + .prefix("deeptoolstmp_alsieve_") + .suffix(".bam") + .rand_bytes(12) + .tempfile() + .expect("Failed to create temporary file."); + + let sievebam_path = sievebam.into_temp_path(); + let mut sievebamout = Writer::from_path(&sievebam_path, &header, bam::Format::Bam).unwrap(); + + let filterbam = Builder::new() + .prefix("deeptoolstmp_alsieve_filtered_") + .suffix(".bam") + .rand_bytes(12) + .tempfile() + .expect("Failed to create temporary file."); + let filterbam_path = filterbam.into_temp_path(); + let mut filterbamout = if write_filters { + Some(Writer::from_path(&filterbam_path, &header, bam::Format::Bam).unwrap()) + } else { + None + }; + if nproc > 4 { + let readthreads = 2; + let writethreads = nproc - 2; + bam.set_threads(readthreads); + sievebamout.set_threads(writethreads); + if verbose { + println!("Reading = {}, Writing = {}", readthreads, writethreads); + } + } + + bam.fetch((region.0.as_str(), region.1, region.2)).unwrap(); + + for result in bam.records() { + let record = result.unwrap(); + total_reads += 1; + + // Filter reads + // Filter unmapped reads. + if record.is_unmapped() { + filtered_reads += 1; + if let Some(filterbamout) = &mut filterbamout { + filterbamout.write(&record).unwrap(); + filterwritten = true; + } + continue; + } + // Mapping qualities. + if record.mapq() < alfilters.minmappingquality { + filtered_reads += 1; + if let Some(filterbamout) = &mut filterbamout { + filterbamout.write(&record).unwrap(); + filterwritten = true; + } + continue; + } + + // SAM flags + if alfilters.samflaginclude != 0 && (record.flags() & alfilters.samflaginclude) == 0 { + filtered_reads += 1; + if let Some(filterbamout) = &mut filterbamout { + filterbamout.write(&record).unwrap(); + filterwritten = true; + } + continue; + } + if alfilters.samflagexclude != 0 && (record.flags() & alfilters.samflagexclude) != 0 { + filtered_reads += 1; + if let Some(filterbamout) = &mut filterbamout { + filterbamout.write(&record).unwrap(); + filterwritten = true; + } + continue; + } + + // fragment length + if alfilters.minfraglen != 0 || alfilters.maxfraglen != 0 { + if record.is_paired() { + if record.insert_size().abs() < alfilters.minfraglen as i64 || record.insert_size().abs() > alfilters.maxfraglen as i64 { + filtered_reads += 1; + if let Some(filterbamout) = &mut filterbamout { + filterbamout.write(&record).unwrap(); + filterwritten = true; + } + continue; + } + } else { + // Parse cigartuples + let mut tlen: u32 = 0; + for cig in record.cigar().iter() { + match cig { + bam::record::Cigar::Match(len) => tlen += len, + bam::record::Cigar::Del(len) => tlen += len, + bam::record::Cigar::Equal(len) => tlen += len, + bam::record::Cigar::Diff(len) => tlen += len, + _ => (), + } + } + if tlen < alfilters.minfraglen || tlen > alfilters.maxfraglen { + filtered_reads += 1; + if let Some(filterbamout) = &mut filterbamout { + filterbamout.write(&record).unwrap(); + filterwritten = true; + } + continue; + } + } + } + if filter_rna_strand != "None" { + match (filter_rna_strand, record.is_paired()) { + ("forward", true) => { + if !((record.flags() & 144 == 128) || (record.flags() & 96 == 64)) { + filtered_reads += 1; + if let Some(filterbamout) = &mut filterbamout { + filterbamout.write(&record).unwrap(); + filterwritten = true; + } + continue; + } + }, + ("forward", false) => { + if !(record.flags() & 16 == 16) { + filtered_reads += 1; + if let Some(filterbamout) = &mut filterbamout { + filterbamout.write(&record).unwrap(); + filterwritten = true; + } + continue; + } + }, + ("reverse", true) => { + if !((record.flags() & 144 == 144) || (record.flags() & 96 == 96)) { + filtered_reads += 1; + if let Some(filterbamout) = &mut filterbamout { + filterbamout.write(&record).unwrap(); + filterwritten = true; + } + continue; + } + }, + ("reverse", false) => { + if !(record.flags() & 16 == 0) { + filtered_reads += 1; + if let Some(filterbamout) = &mut filterbamout { + filterbamout.write(&record).unwrap(); + filterwritten = true; + } + continue; + } + }, + _ => {}, + } + } + sievebamout.write(&record).unwrap(); + written = true; + } + + match (written, filterwritten) { + (true, true) => { + (vec![Some(sievebam_path)], vec![Some(filterbam_path)], total_reads, filtered_reads) + }, + (true, false) => { + (vec![Some(sievebam_path)], vec![None], total_reads, filtered_reads) + }, + (false, true) => { + (vec![None], vec![Some(filterbam_path)], total_reads, filtered_reads) + }, + (false, false) => { + (vec![None], vec![None], total_reads, filtered_reads) + } + } + +} \ No newline at end of file diff --git a/src/bamcoverage.rs b/src/bamcoverage.rs index 52496b6cd2..256cd34b18 100644 --- a/src/bamcoverage.rs +++ b/src/bamcoverage.rs @@ -3,9 +3,9 @@ use pyo3::types::PyList; use rayon::prelude::*; use rayon::ThreadPoolBuilder; use std::io::prelude::*; -use std::io::{BufReader}; +use std::io::BufReader; use std::fs::File; -use bigtools::{Value}; +use bigtools::Value; use crate::covcalc::{bam_pileup, parse_regions, Alignmentfilters}; use crate::filehandler::{bam_ispaired, write_covfile}; use crate::normalization::scale_factor; diff --git a/src/computematrix.rs b/src/computematrix.rs index d1274ccd6f..5ff64f7c19 100644 --- a/src/computematrix.rs +++ b/src/computematrix.rs @@ -1027,7 +1027,6 @@ impl Region { }, _ => panic!("Strand should either be + or - or . {:?} is not supported.", self.strand), } - Vec::new() } } @@ -1319,14 +1318,13 @@ fn matrix_dump( regions.len(), "Length of sorted indices does not match regions length: {} ! = {}", sortedix.len(), regions.len() ); - let mut sortedmatrix: Vec> = Vec::new(); - let mut sortedregions: Vec = Vec::new(); + // Reorder matrix & regions - sortedmatrix = sortedix + let sortedmatrix = sortedix .iter() .map(|ix| matrix[*ix].clone()) .collect(); - sortedregions = sortedix + let sortedregions = sortedix .into_iter() .map(|ix| regions[ix].clone()) .collect(); diff --git a/src/covcalc.rs b/src/covcalc.rs index 270b6a3cba..0013a7da5e 100644 --- a/src/covcalc.rs +++ b/src/covcalc.rs @@ -60,7 +60,7 @@ pub fn bam_pileup<'a>( binsize: &u32, ispe: &bool, ignorechr: &Vec, - filters: &Alignmentfilters, + _filters: &Alignmentfilters, collapse: bool, ) -> ( Vec, // temp bedgraph file. @@ -98,7 +98,7 @@ pub fn bam_pileup<'a>( if binsize > &1 { // populate the bg vector with 0 counts over all bins let mut counts: Vec = vec![0.0; (region.2 - region.1).div_ceil(*binsize) as usize]; - let mut binstart = region.1; + // let mut binstart = region.1; let mut binix: u32 = 0; for record in bam.records() { diff --git a/src/filehandler.rs b/src/filehandler.rs index 8972cb7fdd..0a900bb90e 100644 --- a/src/filehandler.rs +++ b/src/filehandler.rs @@ -344,7 +344,7 @@ pub fn chrombounds_from_bw(bwfile: &str) -> HashMap { // define chromsizes hashmap let mut chromsizes: HashMap = HashMap::new(); let bwf = File::open(bwfile).expect("Failed to open bw file."); - let mut reader = BigWigRead::open(bwf).unwrap(); + let reader = BigWigRead::open(bwf).unwrap(); for chrom in reader.chroms() { chromsizes.insert(chrom.name.clone(), chrom.length); } @@ -531,7 +531,7 @@ pub fn header_matrix(scale_regions: &Scalingregions, regionsizes: HashMap = Vec::new(); samplebounds.push(0); let mut cumsum: u64 = 0; - for i in 0..scale_regions.bwfiles { + for _ in 0..scale_regions.bwfiles { cumsum += colsize_per_sample as u64; samplebounds.push(cumsum); } diff --git a/src/lib.rs b/src/lib.rs index a323de108c..6f7ad448a7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,5 +13,6 @@ fn hp(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(bamcoverage::r_bamcoverage, m)?)?; m.add_function(wrap_pyfunction!(bamcompare::r_bamcompare, m)?)?; m.add_function(wrap_pyfunction!(computematrix::r_computematrix, m)?)?; + m.add_function(wrap_pyfunction!(alignmentsieve::r_alignmentsieve, m)?)?; Ok(()) }