Skip to content
Merged

1.3 #32

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 3 additions & 120 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -568,149 +568,32 @@ if(params.run_umi_dedup) {

if(params.run_calc_crosslinks) {
process {
withName: '.*CROSSLINKS:BEDTOOLS_BAMTOBED' {
publishDir = [
enabled: false
]
}

withName: '.*CROSSLINKS:BEDTOOLS_SHIFT' {
ext.args = '-m 1 -p -1'
publishDir = [
enabled: false
]
}

withName: '.*CROSSLINKS:BEDTOOLS_GENOMECOV_POS' {
ext.args = '-dz -strand + -5'
publishDir = [
enabled: false
]
}

withName: '.*CROSSLINKS:BEDTOOLS_GENOMECOV_NEG' {
ext.args = '-dz -strand - -5'
publishDir = [
enabled: false
]
}

withName: '.*CROSSLINKS:SELECT_BED_POS' {
ext.cmd1 = 'awk \'{OFS="\\t"}{print \$1, \$2, \$2+1, ".", \$3, "+"}\''
ext.suffix = '.pos'
ext.ext = 'bed'
publishDir = [
enabled: false
]
}

withName: '.*CROSSLINKS:SELECT_BED_NEG' {
ext.cmd1 = 'awk \'{OFS="\\t"}{print \$1, \$2, \$2+1, ".", \$3, "-"}\''
ext.suffix = '.neg'
ext.ext = 'bed'
publishDir = [
enabled: false
]
}

withName: 'CLIPSEQ:CALC_GENOME_CROSSLINKS:MERGE_AND_SORT' {
ext.cmd1 = 'sort -k1,1 -k2,2n'
withName: 'CLIPSEQ:CALC_GENOME_CROSSLINKS' {
ext.suffix = '.genome'
ext.ext = 'bed'
publishDir = [
path: { "${params.outdir}/04_crosslinks" },
mode: "${params.publish_dir_mode}",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'CLIPSEQ:CALC_GENOME_CROSSLINKS:CROSSLINK_COVERAGE' {
ext.cmd1 = 'awk \'{OFS = "\t"}{if (\$6 == "+") {print \$1, \$2, \$3, \$5} else {print \$1, \$2, \$3, -\$5}}\' | sort -k1,1 -k2,2n'
ext.suffix = '.genome'
ext.ext = 'bedgraph'
publishDir = [
path: { "${params.outdir}/04_crosslinks" },
mode: "${params.publish_dir_mode}",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'CLIPSEQ:CALC_GENOME_CROSSLINKS:CROSSLINK_NORMCOVERAGE' {
ext.cmd1 = 'awk -v total=\$CMD2 \'{printf "%s\\t%i\\t%i\\t%s\\t%f\\t%s\\n", \$1, \$2, \$3, \$4, 1000000*\$5/total, \$6}\' | awk \'{OFS = "\t"}{if (\$6 == "+") {print \$1, \$2, \$3, \$5} else {print \$1, \$2, \$3, -\$5}}\' | sort -k1,1 -k2,2n'
ext.cmd2 = 'awk \'BEGIN {total=0} {total=total+\$5} END {print total}\''
ext.suffix = '.norm.genome'
ext.ext = 'bedgraph'
publishDir = [
path: { "${params.outdir}/04_crosslinks" },
mode: "${params.publish_dir_mode}",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'CLIPSEQ:CALC_TRANSCRIPT_CROSSLINKS:MERGE_AND_SORT' {
ext.cmd1 = 'sort -k1,1 -k2,2n'
withName: 'CLIPSEQ:CALC_TRANSCRIPT_CROSSLINKS' {
ext.suffix = '.transcript'
ext.ext = 'bed'
publishDir = [
path: { "${params.outdir}/04_crosslinks" },
mode: "${params.publish_dir_mode}",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'CLIPSEQ:CALC_TRANSCRIPT_CROSSLINKS:CROSSLINK_COVERAGE' {
ext.cmd1 = 'awk \'{OFS = "\t"}{if (\$6 == "+") {print \$1, \$2, \$3, \$5} else {print \$1, \$2, \$3, -\$5}}\' | sort -k1,1 -k2,2n'
ext.suffix = '.transcript'
ext.ext = 'bedgraph'
publishDir = [
path: { "${params.outdir}/04_crosslinks" },
mode: "${params.publish_dir_mode}",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'CLIPSEQ:CALC_TRANSCRIPT_CROSSLINKS:CROSSLINK_NORMCOVERAGE' {
ext.cmd1 = 'awk -v total=\$CMD2 \'{printf "%s\\t%i\\t%i\\t%s\\t%f\\t%s\\n", \$1, \$2, \$3, \$4, 1000000*\$5/total, \$6}\' | awk \'{OFS = "\t"}{if (\$6 == "+") {print \$1, \$2, \$3, \$5} else {print \$1, \$2, \$3, -\$5}}\' | sort -k1,1 -k2,2n'
ext.cmd2 = 'awk \'BEGIN {total=0} {total=total+\$5} END {print total}\''
ext.suffix = '.norm.transcript'
ext.ext = 'bedgraph'
publishDir = [
path: { "${params.outdir}/04_crosslinks" },
mode: "${params.publish_dir_mode}",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
withName: 'CLIPSEQ:CALC_SMRNA_K1_CROSSLINKS:MERGE_AND_SORT' {
ext.cmd1 = 'sort -k1,1 -k2,2n'
ext.suffix = '.smrna_withk1'
ext.ext = 'bed'
publishDir = [
path: { "${params.outdir}/04_crosslinks" },
mode: "${params.publish_dir_mode}",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'CLIPSEQ:CALC_SMRNA_K1_CROSSLINKS:CROSSLINK_COVERAGE' {
ext.cmd1 = 'awk \'{OFS = "\t"}{if (\$6 == "+") {print \$1, \$2, \$3, \$5} else {print \$1, \$2, \$3, -\$5}}\' | sort -k1,1 -k2,2n'
withName: 'CLIPSEQ:CALC_SMRNA_K1_CROSSLINKS' {
ext.suffix = '.smrna_withk1'
ext.ext = 'bedgraph'
publishDir = [
path: { "${params.outdir}/04_crosslinks" },
mode: "${params.publish_dir_mode}",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'CLIPSEQ:CALC_SMRNA_K1_CROSSLINKS:CROSSLINK_NORMCOVERAGE' {
ext.cmd1 = 'awk -v total=\$CMD2 \'{printf "%s\\t%i\\t%i\\t%s\\t%f\\t%s\\n", \$1, \$2, \$3, \$4, 1000000*\$5/total, \$6}\' | awk \'{OFS = "\t"}{if (\$6 == "+") {print \$1, \$2, \$3, \$5} else {print \$1, \$2, \$3, -\$5}}\' | sort -k1,1 -k2,2n'
ext.cmd2 = 'awk \'BEGIN {total=0} {total=total+\$5} END {print total}\''
ext.suffix = '.norm.smrna_withk1'
ext.ext = 'bedgraph'
publishDir = [
enabled: false
]
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ params {
max_time = '6.h'

// Input data
samplesheet = './tests/data/samplesheets/small-single-sample-se.csv'
samplesheet = './tests/data/samplesheets/small-dual-sample-se.csv'
fasta = './tests/data/genome/yeast_MitoV.fa.gz'
smrna_fasta = './tests/data/genome/homosapiens_smallRNA.fa.gz'
gtf = './tests/data/genome/yeast_MitoV.gtf.gz'
Expand Down
34 changes: 24 additions & 10 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists:
//

include { MULTIQC } from './modules/local/multiqc'
include { GET_CROSSLINKS as CALC_SMRNA_K1_CROSSLINKS } from './modules/local/get_crosslinks'
include { GET_CROSSLINKS as CALC_GENOME_CROSSLINKS } from './modules/local/get_crosslinks'
include { GET_CROSSLINKS as CALC_TRANSCRIPT_CROSSLINKS } from './modules/local/get_crosslinks'


//
// SUBWORKFLOWS
Expand All @@ -116,6 +120,7 @@ include { CLIPPY as CLIPPY_TRANSCRIPT } from './modules/goodwrigh
include { PEKA } from './modules/goodwright/peka/main'
include { DUMP_SOFTWARE_VERSIONS } from './modules/goodwright/dump_software_versions/main'
include { CLIPSEQ_CLIPQC } from './modules/goodwright/clipseq/clipqc/main'
include { ENCODE_MOVEUMI } from './modules/goodwright/clipseq/encode_moveumi/main'

//
// SUBWORKFLOWS
Expand All @@ -130,9 +135,6 @@ include { BAM_DEDUP_SAMTOOLS_UMITOOLS as GENOME_MULTI_DEDUP } from './subwor
include { BAM_DEDUP_SAMTOOLS_UMITOOLS as SMRNA_DEDUP } from './subworkflows/goodwright/bam_dedup_samtools_umitools/main'
include { BAM_DEDUP_SAMTOOLS_UMITOOLS as SMRNA_K1_DEDUP } from './subworkflows/goodwright/bam_dedup_samtools_umitools/main'
include { BAM_DEDUP_SAMTOOLS_UMITOOLS as TRANSCRIPT_DEDUP } from './subworkflows/goodwright/bam_dedup_samtools_umitools/main'
include { CLIP_CALC_CROSSLINKS as CALC_SMRNA_K1_CROSSLINKS } from './subworkflows/goodwright/clip_calc_crosslinks/main'
include { CLIP_CALC_CROSSLINKS as CALC_GENOME_CROSSLINKS } from './subworkflows/goodwright/clip_calc_crosslinks/main'
include { CLIP_CALC_CROSSLINKS as CALC_TRANSCRIPT_CROSSLINKS } from './subworkflows/goodwright/clip_calc_crosslinks/main'
include { PARACLU_ANALYSE as PARACLU_ANALYSE_GENOME } from './subworkflows/goodwright/paraclu_analyse/main'
include { PARACLU_ANALYSE as PARACLU_ANALYSE_TRANSCRIPT } from './subworkflows/goodwright/paraclu_analyse/main'
include { ICOUNT_ANALYSE } from './subworkflows/goodwright/icount_analyse/main'
Expand Down Expand Up @@ -277,7 +279,13 @@ workflow CLIPSEQ {
}
//EXAMPLE CHANNEL STRUCT: [[id:h3k27me3_R1, group:h3k27me3, replicate:1, single_end:false], [FASTQ]]
//ch_fastq | view

if(params.encode_eclip){
ENCODE_MOVEUMI (
ch_fastq
)
ch_versions = ch_versions.mix(ENCODE_MOVEUMI.out.versions)
ch_fastq = ENCODE_MOVEUMI.out.reads
}
if(params.run_move_umi_to_header){
UMITOOLS_EXTRACT (
ch_fastq
Expand Down Expand Up @@ -433,6 +441,9 @@ workflow CLIPSEQ {
ch_versions = ch_versions.mix(TRANSCRIPT_DEDUP.out.versions)
ch_transcript_bam = TRANSCRIPT_DEDUP.out.bam
ch_transcript_bai = TRANSCRIPT_DEDUP.out.bai
} else {
ch_genome_bam = ch_genome_unique_bam
ch_genome_bai = ch_genome_unique_bai
}

ch_genome_crosslink_bed = Channel.empty()
Expand All @@ -446,8 +457,9 @@ workflow CLIPSEQ {
* SUBWORKFLOW: Run crosslink calculation for smRNA with -k 1
*/
CALC_SMRNA_K1_CROSSLINKS (
ch_smrna_k1_bam,
ch_smrna_fasta_fai.collect{ it[1] }
ch_smrna_k1_bam.join(ch_smrna_k1_bai),
ch_smrna_fasta_fai.collect{ it[1] },
params.crosslink_position
)
ch_versions = ch_versions.mix(CALC_SMRNA_K1_CROSSLINKS.out.versions)
ch_smrna_crosslink_bed = CALC_SMRNA_K1_CROSSLINKS.out.bed
Expand All @@ -458,8 +470,9 @@ workflow CLIPSEQ {
* SUBWORKFLOW: Run crosslink calculation for genome
*/
CALC_GENOME_CROSSLINKS (
ch_genome_bam,
ch_fasta_fai.collect{ it[1] }
ch_genome_bam.join(ch_genome_bai),
ch_fasta_fai.collect{ it[1] },
params.crosslink_position
)
ch_versions = ch_versions.mix(CALC_GENOME_CROSSLINKS.out.versions)
ch_genome_crosslink_bed = CALC_GENOME_CROSSLINKS.out.bed
Expand All @@ -470,8 +483,9 @@ workflow CLIPSEQ {
* SUBWORKFLOW: Run crosslink calculation for transcripts
*/
CALC_TRANSCRIPT_CROSSLINKS (
ch_transcript_bam,
ch_longest_transcript_fai.collect{ it[1] }
ch_transcript_bam.join(ch_transcript_bai),
ch_longest_transcript_fai.collect{ it[1] },
params.crosslink_position
)
ch_versions = ch_versions.mix(CALC_TRANSCRIPT_CROSSLINKS.out.versions)
ch_trans_crosslink_bed = CALC_TRANSCRIPT_CROSSLINKS.out.bed
Expand Down
22 changes: 22 additions & 0 deletions modules/goodwright/clipseq/encode_moveumi/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
process ENCODE_MOVEUMI {
label "process_single"

conda "bioconda::biopython=1.78 pigz=2.6"
container "quay.io/biocontainers/mulled-v2-877c4e5a8fad685ea5bde487e04924ac447923b9:b7daa641364165419b9a87d9988bc803f913c5b6-0"

input:
tuple val(meta), path(reads)

output:
tuple val(meta), path("*.fastq.gz"), emit: reads
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

shell:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
process_name = task.process
template 'encode_moveumi.py'
}
40 changes: 40 additions & 0 deletions modules/goodwright/clipseq/encode_moveumi/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: clipseq_clipqc
description: Runs python-based clip quality control and outputs to a set of tsv files
keywords:
- iCLIP
- eCLIP
- CLIP
- qc
input:
- premap:
type: file
description: Premap metrics files
- mapped:
type: file
description: Alignment metrics files
- collapse:
type: file
description: UMI collapse metrics files
- xlinks:
type: file
description: xlinks metric files
- icount:
type: file
description: iCount metrics files
- paraclu:
type: file
description: paraclu metrics files
- clippy:
type: file
description: Clippy metrics files
output:
- tsv:
type: file
description: All tsv file outputs
pattern: "*.tsv"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@chris-cheshire"
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env python

import os
import sys
import gzip
import platform
from Bio import SeqIO
import Bio

input_fq = "!{reads}"
output_fq = "!{prefix}.umi.fastq"

with gzip.open(input_fq, mode = 'rt') as f_in:
with open(output_fq, mode = 'w') as f_out:
for record in SeqIO.parse(f_in, 'fastq'):
header = record.id.split(":")
if '_' not in header[-1]:
rearranged = ":".join(header[1:]) + '_rbc:' + header[0]
record.id = rearranged
record.name = rearranged
record.description = rearranged
SeqIO.write(record, f_out, 'fastq')

os.system('pigz ' + output_fq)

with open("versions.yml", "w") as out_f:
out_f.write("!{process_name}" + ":\n")
out_f.write(" python: " + platform.python_version() + "\n")
out_f.write(" biopython: " + Bio.__version__ + "\n")
Loading
Loading