From e1728d7afb67f3f560f29e721bd518a489d0ed30 Mon Sep 17 00:00:00 2001 From: Glenn Hickey Date: Wed, 7 Jun 2023 16:55:45 -0400 Subject: [PATCH 1/2] stop doing fat ancestor with --pangenome --- src/cactus/setup/cactus_align.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/cactus/setup/cactus_align.py b/src/cactus/setup/cactus_align.py index 4d58e3195..a87878078 100644 --- a/src/cactus/setup/cactus_align.py +++ b/src/cactus/setup/cactus_align.py @@ -311,8 +311,6 @@ def make_align_job(options, toil, config_wrapper=None, chrom_name=None): cafNode.attrib["runMapQFiltering"] = "0" # more iterations here helps quite a bit to reduce underalignment cafNode.attrib["maxRecoverableChainsIterations"] = "50" - # turn down minimum block degree to get a fat ancestor - barNode.attrib["minimumBlockDegree"] = "1" # turn off POA seeding poaNode.attrib["partialOrderAlignmentDisableSeeding"] = "1" From 8d99782669d96c4cedd6e84b33db9a60bc1017d3 Mon Sep 17 00:00:00 2001 From: Glenn Hickey Date: Wed, 7 Jun 2023 17:07:00 -0400 Subject: [PATCH 2/2] add cli options to assign pangenome outgroups --- src/cactus/refmap/cactus_pangenome.py | 3 ++- src/cactus/setup/cactus_align.py | 9 ++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/cactus/refmap/cactus_pangenome.py b/src/cactus/refmap/cactus_pangenome.py index 465bec15a..f5b31c80b 100644 --- a/src/cactus/refmap/cactus_pangenome.py +++ b/src/cactus/refmap/cactus_pangenome.py @@ -52,7 +52,8 @@ def main(): parser.add_argument("seqFile", help = "Seq file (will be modified if necessary to include graph Fasta sequence)") parser.add_argument("--outDir", help = "Output directory", required=True) parser.add_argument("--outName", help = "Output name (without extension)", required=True) - parser.add_argument("--reference", required=True, nargs='+', type=str, help = "Reference event name(s). The first will be the \"true\" reference and will be left unclipped and uncollapsed. It also should have been used with --reference in all upstream commands. Other names will be promoted to reference paths in vg") + parser.add_argument("--reference", required=True, nargs='+', type=str, help = "Reference event name(s). The first will be the \"true\" reference and will be left unclipped and uncollapsed. It also should have been used with --reference in all upstream commands. Other names will be promoted to reference paths in vg") + parser.add_argument("--outgroup", type=str, nargs='+', help = "Use given genome as outgroup. Multiple allowed") # cactus-minigraph options parser.add_argument("--mgCores", type=int, help = "Number of cores for minigraph construction (defaults to the same as --maxCores).") diff --git a/src/cactus/setup/cactus_align.py b/src/cactus/setup/cactus_align.py index a87878078..967c29a37 100644 --- a/src/cactus/setup/cactus_align.py +++ b/src/cactus/setup/cactus_align.py @@ -69,6 +69,7 @@ def main(): parser.add_argument("--outGFA", action="store_true", help = "export pangenome grpah in GFA (.gfa.gz) in addition to HAL") parser.add_argument("--batch", action="store_true", help = "Launch batch of alignments. Input seqfile is expected to be chromfile as generated by cactus-graphmap-slit. IMPORTANT: paffile argument should bot be specified when using this option") parser.add_argument("--reference", type=str, help = "Ensure that given genome is acyclic by deleting all paralogy edges in postprocessing, also do not mask its PAF mappings") + parser.add_argument("--outgroup", type=str, nargs='+', help = "Use given genome as outgroup. Only works with --pangenome. Multiple allowed") #Progressive Cactus Options parser.add_argument("--configFile", dest="configFile", @@ -130,7 +131,10 @@ def main(): raise RuntimeError('--consCores required for non single_machine batch systems') if options.maxCores is not None and options.consCores > int(options.maxCores): raise RuntimeError('--consCores must be <= --maxCores') - + + if options.outgroup and not options.pangenome: + raise RuntimeError('--outgroup can only be used with --pangenome') + options.buildHal = True options.buildFasta = True @@ -261,6 +265,9 @@ def make_align_job(options, toil, config_wrapper=None, chrom_name=None): if options.reference and options.pangenome: # validate the sample names check_sample_names(input_seq_map.keys(), options.reference) + + if options.outgroup and options.pangenome: + og_map[ options.root if options.root else mc_tree.getRootName()] = options.outgroup # apply path overrides. this was necessary for wdl which doesn't take kindly to # text files of local paths (ie seqfile). one way to fix would be to add support