diff --git a/src/cactus/refmap/cactus_pangenome.py b/src/cactus/refmap/cactus_pangenome.py index 465bec15a..f5b31c80b 100644 --- a/src/cactus/refmap/cactus_pangenome.py +++ b/src/cactus/refmap/cactus_pangenome.py @@ -52,7 +52,8 @@ def main(): parser.add_argument("seqFile", help = "Seq file (will be modified if necessary to include graph Fasta sequence)") parser.add_argument("--outDir", help = "Output directory", required=True) parser.add_argument("--outName", help = "Output name (without extension)", required=True) - parser.add_argument("--reference", required=True, nargs='+', type=str, help = "Reference event name(s). The first will be the \"true\" reference and will be left unclipped and uncollapsed. It also should have been used with --reference in all upstream commands. Other names will be promoted to reference paths in vg") + parser.add_argument("--reference", required=True, nargs='+', type=str, help = "Reference event name(s). The first will be the \"true\" reference and will be left unclipped and uncollapsed. It also should have been used with --reference in all upstream commands. Other names will be promoted to reference paths in vg") + parser.add_argument("--outgroup", type=str, nargs='+', help = "Use given genome as outgroup. Multiple allowed") # cactus-minigraph options parser.add_argument("--mgCores", type=int, help = "Number of cores for minigraph construction (defaults to the same as --maxCores).") diff --git a/src/cactus/setup/cactus_align.py b/src/cactus/setup/cactus_align.py index 4d58e3195..967c29a37 100644 --- a/src/cactus/setup/cactus_align.py +++ b/src/cactus/setup/cactus_align.py @@ -69,6 +69,7 @@ def main(): parser.add_argument("--outGFA", action="store_true", help = "export pangenome grpah in GFA (.gfa.gz) in addition to HAL") parser.add_argument("--batch", action="store_true", help = "Launch batch of alignments. Input seqfile is expected to be chromfile as generated by cactus-graphmap-slit. IMPORTANT: paffile argument should bot be specified when using this option") parser.add_argument("--reference", type=str, help = "Ensure that given genome is acyclic by deleting all paralogy edges in postprocessing, also do not mask its PAF mappings") + parser.add_argument("--outgroup", type=str, nargs='+', help = "Use given genome as outgroup. Only works with --pangenome. Multiple allowed") #Progressive Cactus Options parser.add_argument("--configFile", dest="configFile", @@ -130,7 +131,10 @@ def main(): raise RuntimeError('--consCores required for non single_machine batch systems') if options.maxCores is not None and options.consCores > int(options.maxCores): raise RuntimeError('--consCores must be <= --maxCores') - + + if options.outgroup and not options.pangenome: + raise RuntimeError('--outgroup can only be used with --pangenome') + options.buildHal = True options.buildFasta = True @@ -261,6 +265,9 @@ def make_align_job(options, toil, config_wrapper=None, chrom_name=None): if options.reference and options.pangenome: # validate the sample names check_sample_names(input_seq_map.keys(), options.reference) + + if options.outgroup and options.pangenome: + og_map[ options.root if options.root else mc_tree.getRootName()] = options.outgroup # apply path overrides. this was necessary for wdl which doesn't take kindly to # text files of local paths (ie seqfile). one way to fix would be to add support @@ -311,8 +318,6 @@ def make_align_job(options, toil, config_wrapper=None, chrom_name=None): cafNode.attrib["runMapQFiltering"] = "0" # more iterations here helps quite a bit to reduce underalignment cafNode.attrib["maxRecoverableChainsIterations"] = "50" - # turn down minimum block degree to get a fat ancestor - barNode.attrib["minimumBlockDegree"] = "1" # turn off POA seeding poaNode.attrib["partialOrderAlignmentDisableSeeding"] = "1"