From 90335f1ef0205a16b2620f5056a0a9a34e716637 Mon Sep 17 00:00:00 2001 From: Glenn Hickey Date: Wed, 17 Jul 2024 13:16:25 -0400 Subject: [PATCH] disable outgroup computation for pangenomes --- src/cactus/refmap/cactus_graphmap.py | 3 +-- src/cactus/refmap/cactus_refmap.py | 3 +-- src/cactus/setup/cactus_align.py | 7 ++++++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/cactus/refmap/cactus_graphmap.py b/src/cactus/refmap/cactus_graphmap.py index 24ee6ca4d..74cf0f695 100644 --- a/src/cactus/refmap/cactus_graphmap.py +++ b/src/cactus/refmap/cactus_graphmap.py @@ -114,8 +114,7 @@ def graph_map(options): # load up the seqfile and figure out the outgroups and schedule config_wrapper.substituteAllPredefinedConstantsWithLiterals(options) mc_tree, input_seq_map, og_candidates = parse_seqfile(options.seqFile, config_wrapper, pangenome=True) - og_map = compute_outgroups(mc_tree, config_wrapper, set(og_candidates)) - event_set = get_event_set(mc_tree, config_wrapper, og_map, mc_tree.getRootName()) + event_set = get_event_set(mc_tree, config_wrapper, {}, mc_tree.getRootName()) # apply path overrides. this was necessary for wdl which doesn't take kindly to # text files of local paths (ie seqfile). one way to fix would be to add support diff --git a/src/cactus/refmap/cactus_refmap.py b/src/cactus/refmap/cactus_refmap.py index f6f451bd3..35e2bd31f 100644 --- a/src/cactus/refmap/cactus_refmap.py +++ b/src/cactus/refmap/cactus_refmap.py @@ -297,8 +297,7 @@ def main(): config_wrapper = ConfigWrapper(config_node) config_wrapper.substituteAllPredefinedConstantsWithLiterals(options) mc_tree, input_seq_map, og_candidates = parse_seqfile(options.seqFile, config_wrapper) - og_map = compute_outgroups(mc_tree, config_wrapper, set(og_candidates)) - event_set = get_event_set(mc_tree, config_wrapper, og_map, mc_tree.getRootName()) + event_set = get_event_set(mc_tree, config_wrapper, {}, mc_tree.getRootName()) # apply path overrides. this was necessary for wdl which doesn't take kindly to # text files of local paths (ie seqfile). one way to fix would be to add support diff --git a/src/cactus/setup/cactus_align.py b/src/cactus/setup/cactus_align.py index 1b1e44143..b49d27444 100644 --- a/src/cactus/setup/cactus_align.py +++ b/src/cactus/setup/cactus_align.py @@ -260,7 +260,12 @@ def make_align_job(options, toil, config_wrapper=None, chrom_name=None): mc_tree, input_seq_map, og_candidates = parse_seqfile(options.seqFile, config_wrapper, pangenome=options.pangenome) - og_map = compute_outgroups(mc_tree, config_wrapper, set(og_candidates), chrom_info_file = options.chromInfo) + if options.pangenome: + # outgroups not supported in pangenomes + # also, compute_outgroups() uses about 300 * N^2 bytes which can be huge for big pangenomes + og_map = {} + else: + og_map = compute_outgroups(mc_tree, config_wrapper, set(og_candidates), chrom_info_file = options.chromInfo) event_set = get_event_set(mc_tree, config_wrapper, og_map, options.root if options.root else mc_tree.getRootName()) if options.includeRoot: if options.root not in input_seq_map: