diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh index bfb71465b..e85469e5b 100755 --- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh @@ -358,7 +358,7 @@ echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" echo "Install hifiasm" echo "" cd $LKSRC_DIR -if [[ ! -e ${LKTOOLS_DIR}/primer3_core || ! -z $FORCE_REINSTALL ]]; +if [[ ! -e ${LKTOOLS_DIR}/hifiasm || ! -z $FORCE_REINSTALL ]]; then echo "Cleaning up previous installs" rm -Rf $LKTOOLS_DIR/hifiasm* diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/OutputIntegrationTests.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/OutputIntegrationTests.java index 7fdb138e1..41b45779e 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/OutputIntegrationTests.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/OutputIntegrationTests.java @@ -63,7 +63,7 @@ public static class VariantProcessingTest extends SequenceIntegrationTests.Abstr private static final String PROJECT_NAME = "VariantProcessingTestProject"; @BeforeClass - public static void initialSetUp() throws Exception + public static void initialSetUp() { doInitialSetUp(PROJECT_NAME); } @@ -91,12 +91,12 @@ public void testVariantProcessing() throws Exception //create VCF, import as outputfile String basename = "TestFile_" + FileUtil.getTimestamp(); - File vcf = new File(_pipelineRoot, basename + ".vcf.gz"); + File vcf = FileUtil.appendName(getPipelineRoot(_project), basename + ".vcf.gz"); Integer outputFileId = createTestVcf(genomeId, vcf); //make job params String jobName = "TestVariantProcessing"; - JSONObject config = substituteParams(new File(_sampleData, VARIANT_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, VARIANT_JOB), jobName); Set outputFileIds = Collections.singleton(outputFileId); TableInfo ti = QueryService.get().getUserSchema(TestContext.get().getUser(), _project, SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_OUTPUTFILES, null); @@ -129,7 +129,7 @@ public void testVariantProcessing() throws Exception } } - protected Set createOutputHandlerJob(String jobName, JSONObject config, Class handlerClass, Set outputFileIDs) throws Exception + protected Set createOutputHandlerJob(String jobName, JSONObject config, Class handlerClass, Set outputFileIDs) throws Exception { Map headers = new HashMap<>(); headers.put("Content-Type", "application/json"); @@ -200,7 +200,7 @@ private int createTestVcf(int genomeId, File vcf) Integer dataId = new TableSelector(SequenceAnalysisSchema.getTable(SequenceAnalysisSchema.TABLE_REF_LIBRARIES), PageFlowUtil.set("fasta_file"), new SimpleFilter(FieldKey.fromString("rowid"), genomeId), null).getObject(Integer.class); ExpData data = ExperimentService.get().getExpData(dataId); - File dictFile = new File(data.getFile().getParent(), FileUtil.getBaseName(data.getFile().getName()) + ".dict"); + File dictFile = FileUtil.appendName(data.getFile().getParentFile(), FileUtil.getBaseName(data.getFile().getName()) + ".dict"); if (dictFile.exists()) { SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor.extractDictionary(dictFile.toPath()); @@ -222,7 +222,7 @@ private int createTestVcf(int genomeId, File vcf) writer.add(vcb.make()); } - ExpData d = createExpData(vcf); + ExpData d = createExpData(vcf, _project); Map params = new CaseInsensitiveHashMap<>(); params.put("name", "TestVcf"); params.put("description", "Description"); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java index dc40b1ee9..47c9041ed 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java @@ -1924,7 +1924,7 @@ else if (o.has("relPath") || o.has("fileName")) if (f == null || !f.exists()) { - throw new PipelineValidationException("Unknown file: " + o.getString("relPath") + " / " + o.getString("fileName")); + throw new PipelineValidationException("Unknown file: " + o.optString("relPath") + " / " + o.optString("fileName")); } ret.add(f); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceIntegrationTests.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceIntegrationTests.java index 02e3e0dce..45d1737d4 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceIntegrationTests.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceIntegrationTests.java @@ -9,11 +9,12 @@ import htsjdk.samtools.fastq.FastqRecord; import htsjdk.samtools.fastq.FastqWriter; import htsjdk.samtools.fastq.FastqWriterFactory; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.io.FileUtils; import org.apache.commons.io.filefilter.IOFileFilter; import org.apache.commons.lang3.StringUtils; -import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.json.JSONArray; import org.json.JSONObject; @@ -29,6 +30,7 @@ import org.labkey.api.data.Table; import org.labkey.api.data.TableInfo; import org.labkey.api.data.TableSelector; +import org.labkey.api.data.WorkbookContainerType; import org.labkey.api.exp.api.DataType; import org.labkey.api.exp.api.ExpData; import org.labkey.api.exp.api.ExperimentService; @@ -60,6 +62,7 @@ import org.labkey.api.util.Pair; import org.labkey.api.util.Path; import org.labkey.api.util.TestContext; +import org.labkey.api.util.logging.LogHelper; import org.labkey.api.view.ViewServlet; import org.labkey.sequenceanalysis.model.BarcodeModel; import org.labkey.sequenceanalysis.pipeline.ReferenceLibraryPipelineJob; @@ -70,8 +73,6 @@ import org.springframework.mock.web.MockHttpServletResponse; import org.springframework.web.bind.annotation.RequestMethod; -import jakarta.servlet.http.HttpServletRequest; -import jakarta.servlet.http.HttpServletResponse; import java.io.BufferedReader; import java.io.File; import java.io.IOException; @@ -81,12 +82,12 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.Comparator; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -212,17 +213,14 @@ abstract public static class AbstractPipelineTestCase extends Assert protected final String READSET_JOB = "readsetJob.json"; protected final String ALIGNMENT_JOB = "alignmentJob.json"; protected final String VARIANT_JOB = "variantProcessingJob.json"; - protected final String IMPORT_TASKID = "org.labkey.api.pipeline.file.FileAnalysisTaskPipeline:sequenceImportPipeline"; - protected final String ANALYSIS_TASKID = "org.labkey.api.pipeline.file.FileAnalysisTaskPipeline:sequenceAnalysisPipeline"; protected Container _project; protected TestContext _context; - protected File _pipelineRoot; protected File _sampleData; protected Boolean _isExternalPipelineEnabled = null; - protected static final Logger _log = LogManager.getLogger(AbstractPipelineTestCase.class); + protected static final Logger _log = LogHelper.getLogger(AbstractPipelineTestCase.class, "Messages related to SequenceIntegrationTests"); protected void writeJobLogToLabKeyLog(File log, String jobName) throws IOException { @@ -238,7 +236,12 @@ protected static boolean doSkipCleanup() return "1".equals(TestContext.get().getRequest().getParameter("skipTestCleanup")); } - protected static void doInitialSetUp(String projectName) throws Exception + protected Container createWorkbook() + { + return ContainerManager.createContainer(_project, null, "New Workbook", null, WorkbookContainerType.NAME, TestContext.get().getUser()); + } + + protected static void doInitialSetUp(String projectName) { //pre-clean doCleanup(projectName); @@ -248,11 +251,10 @@ protected static void doInitialSetUp(String projectName) throws Exception { project = ContainerManager.createContainer(ContainerManager.getRoot(), projectName, TestContext.get().getUser()); - //disable search so we dont get conflicts when deleting folder quickly + //disable search so we don't get conflicts when deleting folder quickly ContainerManager.updateSearchable(project, false, TestContext.get().getUser()); - Set modules = new HashSet<>(); - modules.addAll(project.getActiveModules()); + Set modules = new HashSet<>(project.getActiveModules()); modules.add(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.NAME)); project.setFolderType(FolderTypeManager.get().getFolderType("Laboratory Folder"), TestContext.get().getUser()); project.setActiveModules(modules); @@ -266,9 +268,7 @@ public void setUp() throws Exception { _context = TestContext.get(); _sampleData = getSampleDataDir(); - _project = ContainerManager.getForPath(getProjectName()); - _pipelineRoot = PipelineService.get().getPipelineRootSetting(_project).getRootPath(); String path = PipelineJobService.get().getConfigProperties().getSoftwarePackagePath(SequencePipelineService.SEQUENCE_TOOLS_PARAM); path = StringUtils.trimToNull(path); @@ -314,13 +314,15 @@ private File getSampleDataDir() throws Exception return file; } - protected void ensureFilesPresent(String prefix) throws Exception + protected void ensureFilesPresent(String prefix, Container c) throws Exception { - File file1 = new File(_pipelineRoot, prefix + DUAL_BARCODE_FILENAME); + File rootPath = PipelineService.get().getPipelineRootSetting(c).getRootPath(); + + File file1 = FileUtil.appendName(rootPath, prefix + DUAL_BARCODE_FILENAME); if (!file1.exists()) { //debug intermittent failure - File orig = new File(_sampleData, DUAL_BARCODE_FILENAME + ".gz"); + File orig = FileUtil.appendName(_sampleData, DUAL_BARCODE_FILENAME + ".gz"); if (!orig.exists()) { _log.info("missing file: " + orig.getPath()); @@ -332,43 +334,43 @@ protected void ensureFilesPresent(String prefix) throws Exception } FileUtils.copyFile(orig, file1); - Compress.decompressGzip(new File(_sampleData, DUAL_BARCODE_FILENAME + ".gz"), file1); + Compress.decompressGzip(FileUtil.appendName(_sampleData, DUAL_BARCODE_FILENAME + ".gz"), file1); } - File file2 = new File(_pipelineRoot, prefix + SAMPLE_SFF_FILENAME); + File file2 = FileUtil.appendName(rootPath, prefix + SAMPLE_SFF_FILENAME); if (!file2.exists()) - FileUtils.copyFile(new File(_sampleData, SAMPLE_SFF_FILENAME), file2); + FileUtils.copyFile(FileUtil.appendName(_sampleData, SAMPLE_SFF_FILENAME), file2); for (String fn : Arrays.asList(PAIRED_FILENAME1, PAIRED_FILENAME_L1a, PAIRED_FILENAME_L1b, PAIRED_FILENAME_L2)) { - File file3 = new File(_pipelineRoot, prefix + fn); + File file3 = FileUtil.appendName(rootPath, prefix + fn); if (!file3.exists()) - FileUtils.copyFile(new File(_sampleData, PAIRED_FILENAME1), file3); + FileUtils.copyFile(FileUtil.appendName(_sampleData, PAIRED_FILENAME1), file3); } for (String fn : Arrays.asList(PAIRED_FILENAME2, PAIRED_FILENAME2_L1a, PAIRED_FILENAME2_L1b, PAIRED_FILENAME2_L2)) { - File file4 = new File(_pipelineRoot, prefix + fn); + File file4 = FileUtil.appendName(rootPath, prefix + fn); if (!file4.exists()) - FileUtils.copyFile(new File(_sampleData, PAIRED_FILENAME2), file4); + FileUtils.copyFile(FileUtil.appendName(_sampleData, PAIRED_FILENAME2), file4); } - File file5 = new File(_pipelineRoot, prefix + UNZIPPED_PAIRED_FILENAME1); + File file5 = FileUtil.appendName(rootPath, prefix + UNZIPPED_PAIRED_FILENAME1); if (!file5.exists()) { - decompressAndCleanFastq(new File(_sampleData, PAIRED_FILENAME1), file5); + decompressAndCleanFastq(FileUtil.appendName(_sampleData, PAIRED_FILENAME1), file5); } - File file6 = new File(_pipelineRoot, prefix + UNZIPPED_PAIRED_FILENAME2); + File file6 = FileUtil.appendName(rootPath, prefix + UNZIPPED_PAIRED_FILENAME2); if (!file6.exists()) { - decompressAndCleanFastq(new File(_sampleData, PAIRED_FILENAME2), file6); + decompressAndCleanFastq(FileUtil.appendName(_sampleData, PAIRED_FILENAME2), file6); } - File file7 = new File(_pipelineRoot, prefix + UNPAIRED_FILENAME); + File file7 = FileUtil.appendName(rootPath, prefix + UNPAIRED_FILENAME); if (!file7.exists()) { - FileUtils.copyFile(new File(_sampleData, UNPAIRED_FILENAME), file7); + FileUtils.copyFile(FileUtil.appendName(_sampleData, UNPAIRED_FILENAME), file7); } } @@ -381,8 +383,8 @@ protected void decompressAndCleanFastq(File input, File output) while (reader.hasNext()) { FastqRecord rec = reader.next(); - String header = rec.getReadHeader(); - if (rec.getReadHeader().endsWith("/1") || rec.getReadHeader().endsWith("/2")) + String header = rec.getReadName(); + if (rec.getReadName().endsWith("/1") || rec.getReadName().endsWith("/2")) { header = header.substring(0, header.lastIndexOf("/")); } @@ -391,14 +393,16 @@ protected void decompressAndCleanFastq(File input, File output) } } - protected void verifyFileInputs(File basedir, String[] fileNames, JSONObject config, String prefix) + protected void verifyFileInputs(File basedir, String[] fileNames, JSONObject config, String prefix, Container jobContainer) { + File pipelineRoot = PipelineService.get().findPipelineRoot(jobContainer).getRootPath(); + String handling = config.getString("inputFileTreatment"); if ("none".equals(handling)) { for (String fn : fileNames) { - File input = new File(_pipelineRoot, prefix + fn); + File input = FileUtil.appendName(pipelineRoot, prefix + fn); Assert.assertTrue("Input file missing: " + input.getPath(), input.exists()); } } @@ -408,14 +412,14 @@ else if ("compress".equals(handling)) for (String fn : fileNames) { - File input = new File(_pipelineRoot, prefix + fn); + File input = FileUtil.appendName(pipelineRoot, prefix + fn); Assert.assertFalse("Input file still exists: " + input.getPath(), input.exists()); File compressed; if (gz.isType(fn)) - compressed = new File(basedir, prefix + fn); + compressed = FileUtil.appendName(basedir, prefix + fn); else - compressed = new File(basedir, FileUtil.getBaseName(prefix + fn) + ".fastq.gz"); + compressed = FileUtil.appendName(basedir, FileUtil.getBaseName(prefix + fn) + ".fastq.gz"); Assert.assertTrue("Compressed file missing: " + compressed.getPath(), compressed.exists()); } @@ -424,7 +428,7 @@ else if ("delete".equals(handling)) { for (String fn : fileNames) { - File input = new File(_pipelineRoot, prefix + fn); + File input = FileUtil.appendName(pipelineRoot, prefix + fn); Assert.assertFalse("Input file still present: " + input.getPath(), input.exists()); } } @@ -490,7 +494,7 @@ public boolean accept(File dir, String name) Assert.assertEquals("Incorrect number of outputs created", expectedOutputs.size(), files.size()); } - protected Set createPipelineJob(String jobName, JSONObject config, SequenceAnalysisController.AnalyzeForm.TYPE type) throws Exception + protected Set createPipelineJob(String jobName, JSONObject config, SequenceAnalysisController.AnalyzeForm.TYPE type, Container pipelineJobContainer) throws Exception { Map headers = new HashMap<>(); headers.put("Content-Type", "application/json"); @@ -503,7 +507,7 @@ protected Set createPipelineJob(String jobName, JSONObject config, json.put("type", type.name()); String requestContent = json.toString(); - HttpServletRequest request = ViewServlet.mockRequest(RequestMethod.POST.name(), DetailsURL.fromString("/sequenceanalysis/startPipelineJob.view").copy(_project).getActionURL(), _context.getUser(), headers, requestContent); + HttpServletRequest request = ViewServlet.mockRequest(RequestMethod.POST.name(), DetailsURL.fromString("/sequenceanalysis/startPipelineJob.view").copy(pipelineJobContainer).getActionURL(), _context.getUser(), headers, requestContent); MockHttpServletResponse response = ViewServlet.mockDispatch(request, null); JSONObject responseJson = new JSONObject(response.getContentAsString()); @@ -615,7 +619,7 @@ private boolean isJobDone(PipelineJob job) throws Exception _log.error("No log file present for sequence pipeline job"); } - throw new Exception("There was an error running job: " + (job == null ? "PipelineJob was null" : job.getDescription())); + throw new Exception("There was an error running job: " + job.getDescription()); } return false; //job != null && job.getActiveTaskId() != null; @@ -694,11 +698,6 @@ protected void appendSamplesForImport(JSONObject config, List files) } config.put("inputFiles", inputFiles); - - if (config.getBoolean("inputfile.barcode")) - { - //NOTE: this cannot automatically be inferred based on the other info in the config, so we just skip it - } } protected static void doCleanup(String projectName) @@ -738,6 +737,11 @@ protected static void doCleanup(String projectName) ContainerManager.deleteAll(project, TestContext.get().getUser()); } } + + protected File getPipelineRoot(Container c) + { + return PipelineService.get().getPipelineRootSetting(c).getRootPath(); + } } public static class SequenceImportPipelineTestCase extends AbstractPipelineTestCase @@ -745,7 +749,7 @@ public static class SequenceImportPipelineTestCase extends AbstractPipelineTestC private static final String PROJECT_NAME = "SequenceImportTestProject"; @BeforeClass - public static void initialSetUp() throws Exception + public static void initialSetUp() { doInitialSetUp(PROJECT_NAME); } @@ -770,11 +774,12 @@ protected String getProjectName() public void basicTest() throws Exception { String prefix = "BasicTest_"; - ensureFilesPresent(prefix); + Container workbook = createWorkbook(); + ensureFilesPresent(prefix, workbook); String jobName = prefix + System.currentTimeMillis(); String[] fileNames = new String[]{DUAL_BARCODE_FILENAME}; - JSONObject config = substituteParams(new File(_sampleData, READSET_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, READSET_JOB), jobName); FileGroup g = new FileGroup(); g.name = "Group1"; g.filePairs = new ArrayList<>(); @@ -783,22 +788,22 @@ public void basicTest() throws Exception appendSamplesForImport(config, List.of(g)); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.readsetImport); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.readsetImport, workbook); waitForJobs(jobs); Set expectedOutputs = new HashSet<>(); File basedir = getBaseDir(jobs.iterator().next()); - File fq = new File(basedir, prefix + DUAL_BARCODE_FILENAME + ".gz"); + File fq = FileUtil.appendName(basedir, prefix + DUAL_BARCODE_FILENAME + ".gz"); expectedOutputs.add(fq); - expectedOutputs.add(new File(basedir, "sequenceImport.json")); - expectedOutputs.add(new File(basedir, "sequenceSupport.json.gz")); - expectedOutputs.add(new File(basedir, basedir.getName() + ".pipe.xar.xml")); - File log = new File(basedir, jobName + ".log"); + expectedOutputs.add(FileUtil.appendName(basedir, "sequenceImport.json")); + expectedOutputs.add(FileUtil.appendName(basedir, "sequenceSupport.json.gz")); + expectedOutputs.add(FileUtil.appendName(basedir, basedir.getName() + ".pipe.xar.xml")); + File log = FileUtil.appendName(basedir, jobName + ".log"); expectedOutputs.add(log); try { verifyFileOutputs(basedir, expectedOutputs); - verifyFileInputs(basedir, fileNames, config, prefix); + verifyFileInputs(basedir, fileNames, config, prefix, workbook); validateReadsets(jobs, config); Assert.assertEquals("Incorrect read number", 3260L, FastqUtils.getSequenceCount(fq)); @@ -814,11 +819,12 @@ public void basicTest() throws Exception public void leaveInPlaceTest() throws Exception { String prefix = "BasicTest_"; - ensureFilesPresent(prefix); + Container c = createWorkbook(); + ensureFilesPresent(prefix, c); String jobName = prefix + System.currentTimeMillis(); String[] fileNames = new String[]{PAIRED_FILENAME1}; - JSONObject config = substituteParams(new File(_sampleData, READSET_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, READSET_JOB), jobName); FileGroup g = new FileGroup(); g.name = "Group1"; g.filePairs = new ArrayList<>(); @@ -828,23 +834,23 @@ public void leaveInPlaceTest() throws Exception appendSamplesForImport(config, List.of(g)); config.put("inputFileTreatment", "leaveInPlace"); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.readsetImport); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.readsetImport, c); waitForJobs(jobs); Set expectedOutputs = new HashSet<>(); File basedir = getBaseDir(jobs.iterator().next()); - Assert.assertFalse("Unexpected file found", new File(basedir, prefix + PAIRED_FILENAME1).exists()); - File fq = new File(_pipelineRoot, prefix + PAIRED_FILENAME1); + Assert.assertFalse("Unexpected file found", FileUtil.appendName(basedir, prefix + PAIRED_FILENAME1).exists()); + File fq = FileUtil.appendName(getPipelineRoot(c), prefix + PAIRED_FILENAME1); Assert.assertTrue("File not found", fq.exists()); - expectedOutputs.add(new File(basedir, "sequenceImport.json")); - expectedOutputs.add(new File(basedir, "sequenceSupport.json.gz")); - expectedOutputs.add(new File(basedir, basedir.getName() + ".pipe.xar.xml")); - File log = new File(basedir, jobName + ".log"); + expectedOutputs.add(FileUtil.appendName(basedir, "sequenceImport.json")); + expectedOutputs.add(FileUtil.appendName(basedir, "sequenceSupport.json.gz")); + expectedOutputs.add(FileUtil.appendName(basedir, basedir.getName() + ".pipe.xar.xml")); + File log = FileUtil.appendName(basedir, jobName + ".log"); expectedOutputs.add(log); try { verifyFileOutputs(basedir, expectedOutputs); - verifyFileInputs(basedir, fileNames, config, prefix); + verifyFileInputs(basedir, fileNames, config, prefix, c); validateReadsets(jobs, config); Assert.assertEquals("Incorrect read number", 211L, FastqUtils.getSequenceCount(fq)); @@ -858,7 +864,7 @@ public void leaveInPlaceTest() throws Exception private void runMergePipelineJob(String jobName, boolean deleteIntermediates, String prefix) throws Exception { - JSONObject config = substituteParams(new File(_sampleData, READSET_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, READSET_JOB), jobName); FileGroup g = new FileGroup(); g.name = "Group1"; @@ -906,54 +912,49 @@ private void runMergePipelineJob(String jobName, boolean deleteIntermediates, St config.put("inputfile.runFastqc", true); appendSamplesForImport(config, Arrays.asList(g, g2, g3)); - Set jobsUnsorted = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.readsetImport); + Set jobsUnsorted = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.readsetImport, _project); waitForJobs(jobsUnsorted); List jobs = new ArrayList<>(jobsUnsorted); - Collections.sort(jobs, new Comparator<>() - { - @Override - public int compare(PipelineJob o1, PipelineJob o2) - { - JSONObject j1 = new JSONObject(o1.getParameters().get("fileGroup_1")); - JSONObject j2 = new JSONObject(o2.getParameters().get("fileGroup_1")); + jobs.sort((o1, o2) -> { + JSONObject j1 = new JSONObject(o1.getParameters().get("fileGroup_1")); + JSONObject j2 = new JSONObject(o2.getParameters().get("fileGroup_1")); - return j1.getString("name").compareTo(j2.getString("name")); - } + return j1.getString("name").compareTo(j2.getString("name")); }); //job1: g1 Set expectedOutputs = new HashSet<>(); File basedir = getBaseDir(jobs.get(0)); - File normalizeDir = new File(basedir, "Normalization"); + File normalizeDir = FileUtil.appendName(basedir, "Normalization"); expectedOutputs.add(normalizeDir); - File merge1 = new File(normalizeDir, prefix + SequenceTaskHelper.getUnzippedBaseName(PAIRED_FILENAME_L1a) + ".merged.fastq.gz"); + File merge1 = FileUtil.appendName(normalizeDir, prefix + SequenceTaskHelper.getUnzippedBaseName(PAIRED_FILENAME_L1a) + ".merged.fastq.gz"); expectedOutputs.add(merge1); - expectedOutputs.add(new File(normalizeDir, FileUtil.getBaseName(FileUtil.getBaseName(merge1)) + "_fastqc.html.gz")); - expectedOutputs.add(new File(normalizeDir, FileUtil.getBaseName(FileUtil.getBaseName(merge1)) + "_fastqc.zip")); - File merge2 = new File(normalizeDir, prefix + SequenceTaskHelper.getUnzippedBaseName(PAIRED_FILENAME2_L1a) + ".merged.fastq.gz"); + expectedOutputs.add(FileUtil.appendName(normalizeDir, FileUtil.getBaseName(FileUtil.getBaseName(merge1)) + "_fastqc.html.gz")); + expectedOutputs.add(FileUtil.appendName(normalizeDir, FileUtil.getBaseName(FileUtil.getBaseName(merge1)) + "_fastqc.zip")); + File merge2 = FileUtil.appendName(normalizeDir, prefix + SequenceTaskHelper.getUnzippedBaseName(PAIRED_FILENAME2_L1a) + ".merged.fastq.gz"); expectedOutputs.add(merge2); - expectedOutputs.add(new File(normalizeDir, FileUtil.getBaseName(FileUtil.getBaseName(merge2)) + "_fastqc.html.gz")); - expectedOutputs.add(new File(merge2.getParentFile(), FileUtil.getBaseName(FileUtil.getBaseName(merge2)) + "_fastqc.zip")); + expectedOutputs.add(FileUtil.appendName(normalizeDir, FileUtil.getBaseName(FileUtil.getBaseName(merge2)) + "_fastqc.html.gz")); + expectedOutputs.add(FileUtil.appendName(merge2.getParentFile(), FileUtil.getBaseName(FileUtil.getBaseName(merge2)) + "_fastqc.zip")); - expectedOutputs.add(new File(basedir, prefix + PAIRED_FILENAME_L2)); - expectedOutputs.add(new File(basedir, prefix + FileUtil.getBaseName(FileUtil.getBaseName(PAIRED_FILENAME_L2)) + "_fastqc.html.gz")); - expectedOutputs.add(new File(basedir, prefix + FileUtil.getBaseName(FileUtil.getBaseName(PAIRED_FILENAME_L2)) + "_fastqc.zip")); - expectedOutputs.add(new File(basedir, prefix + PAIRED_FILENAME2_L2)); - expectedOutputs.add(new File(basedir, prefix + FileUtil.getBaseName(FileUtil.getBaseName(PAIRED_FILENAME2_L2)) + "_fastqc.html.gz")); - expectedOutputs.add(new File(basedir, prefix + FileUtil.getBaseName(FileUtil.getBaseName(PAIRED_FILENAME2_L2)) + "_fastqc.zip")); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + PAIRED_FILENAME_L2)); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + FileUtil.getBaseName(FileUtil.getBaseName(PAIRED_FILENAME_L2)) + "_fastqc.html.gz")); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + FileUtil.getBaseName(FileUtil.getBaseName(PAIRED_FILENAME_L2)) + "_fastqc.zip")); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + PAIRED_FILENAME2_L2)); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + FileUtil.getBaseName(FileUtil.getBaseName(PAIRED_FILENAME2_L2)) + "_fastqc.html.gz")); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + FileUtil.getBaseName(FileUtil.getBaseName(PAIRED_FILENAME2_L2)) + "_fastqc.zip")); //these will be merged if (!deleteIntermediates) { - expectedOutputs.add(new File(basedir, prefix + PAIRED_FILENAME_L1a)); - expectedOutputs.add(new File(basedir, prefix + PAIRED_FILENAME2_L1a)); - expectedOutputs.add(new File(basedir, prefix + PAIRED_FILENAME_L1b)); - expectedOutputs.add(new File(basedir, prefix + PAIRED_FILENAME2_L1b)); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + PAIRED_FILENAME_L1a)); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + PAIRED_FILENAME2_L1a)); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + PAIRED_FILENAME_L1b)); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + PAIRED_FILENAME2_L1b)); } - verifyJob(basedir, jobName, expectedOutputs, new String[]{PAIRED_FILENAME_L1a, PAIRED_FILENAME2_L1a, PAIRED_FILENAME_L1b, PAIRED_FILENAME2_L1b, PAIRED_FILENAME_L2, PAIRED_FILENAME2_L2}, prefix, config); + verifyJob(basedir, jobName, expectedOutputs, new String[]{PAIRED_FILENAME_L1a, PAIRED_FILENAME2_L1a, PAIRED_FILENAME_L1b, PAIRED_FILENAME2_L1b, PAIRED_FILENAME_L2, PAIRED_FILENAME2_L2}, prefix, config, _project); Assert.assertEquals("Incorrect read number", 422L, FastqUtils.getSequenceCount(merge1)); Assert.assertEquals("Incorrect read number", 422L, FastqUtils.getSequenceCount(merge2)); @@ -962,39 +963,39 @@ public int compare(PipelineJob o1, PipelineJob o2) expectedOutputs = new HashSet<>(); basedir = getBaseDir(jobs.get(1)); - expectedOutputs.add(new File(basedir, prefix + PAIRED_FILENAME1)); - expectedOutputs.add(new File(basedir, prefix + FileUtil.getBaseName(FileUtil.getBaseName(PAIRED_FILENAME1)) + "_fastqc.html.gz")); - expectedOutputs.add(new File(basedir, prefix + FileUtil.getBaseName(FileUtil.getBaseName(PAIRED_FILENAME1)) + "_fastqc.zip")); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + PAIRED_FILENAME1)); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + FileUtil.getBaseName(FileUtil.getBaseName(PAIRED_FILENAME1)) + "_fastqc.html.gz")); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + FileUtil.getBaseName(FileUtil.getBaseName(PAIRED_FILENAME1)) + "_fastqc.zip")); - expectedOutputs.add(new File(basedir, prefix + PAIRED_FILENAME2)); - expectedOutputs.add(new File(basedir, prefix + FileUtil.getBaseName(FileUtil.getBaseName(PAIRED_FILENAME2)) + "_fastqc.html.gz")); - expectedOutputs.add(new File(basedir, prefix + FileUtil.getBaseName(FileUtil.getBaseName(PAIRED_FILENAME2)) + "_fastqc.zip")); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + PAIRED_FILENAME2)); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + FileUtil.getBaseName(FileUtil.getBaseName(PAIRED_FILENAME2)) + "_fastqc.html.gz")); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + FileUtil.getBaseName(FileUtil.getBaseName(PAIRED_FILENAME2)) + "_fastqc.zip")); - verifyJob(basedir, jobName, expectedOutputs, new String[]{PAIRED_FILENAME1, PAIRED_FILENAME2}, prefix, config); + verifyJob(basedir, jobName, expectedOutputs, new String[]{PAIRED_FILENAME1, PAIRED_FILENAME2}, prefix, config, _project); //job3: g3 expectedOutputs = new HashSet<>(); basedir = getBaseDir(jobs.get(2)); - expectedOutputs.add(new File(basedir, prefix + UNPAIRED_FILENAME)); - expectedOutputs.add(new File(basedir, prefix + FileUtil.getBaseName(FileUtil.getBaseName(UNPAIRED_FILENAME)) + "_fastqc.html.gz")); - expectedOutputs.add(new File(basedir, prefix + FileUtil.getBaseName(FileUtil.getBaseName(UNPAIRED_FILENAME)) + "_fastqc.zip")); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + UNPAIRED_FILENAME)); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + FileUtil.getBaseName(FileUtil.getBaseName(UNPAIRED_FILENAME)) + "_fastqc.html.gz")); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + FileUtil.getBaseName(FileUtil.getBaseName(UNPAIRED_FILENAME)) + "_fastqc.zip")); - verifyJob(basedir, jobName, expectedOutputs, new String[]{UNPAIRED_FILENAME}, prefix, config); + verifyJob(basedir, jobName, expectedOutputs, new String[]{UNPAIRED_FILENAME}, prefix, config, _project); validateReadsets(jobs, config, 1); //we expect one per job, total of 3 } - private void verifyJob(File basedir, String jobName, Set expectedOutputs, String[] fileNames, String prefix, JSONObject config) throws Exception + private void verifyJob(File basedir, String jobName, Set expectedOutputs, String[] fileNames, String prefix, JSONObject config, Container c) throws Exception { - expectedOutputs.add(new File(basedir, basedir.getName() + ".pipe.xar.xml")); - expectedOutputs.add(new File(basedir, "sequenceImport.json")); - expectedOutputs.add(new File(basedir, "sequenceSupport.json.gz")); + expectedOutputs.add(FileUtil.appendName(basedir, basedir.getName() + ".pipe.xar.xml")); + expectedOutputs.add(FileUtil.appendName(basedir, "sequenceImport.json")); + expectedOutputs.add(FileUtil.appendName(basedir, "sequenceSupport.json.gz")); - File log = new File(basedir, jobName + ".log"); + File log = FileUtil.appendName(basedir, jobName + ".log"); expectedOutputs.add(log); try { verifyFileOutputs(basedir, expectedOutputs); - verifyFileInputs(basedir, fileNames, config, prefix); + verifyFileInputs(basedir, fileNames, config, prefix, c); } catch (Exception e) { @@ -1014,7 +1015,7 @@ public void mergeTest() throws Exception return; String prefix = "MergeTestLanes_"; - ensureFilesPresent(prefix); + ensureFilesPresent(prefix, _project); String jobName = prefix + System.currentTimeMillis(); runMergePipelineJob(jobName, false, prefix); @@ -1030,7 +1031,7 @@ public void mergeTestDeletingIntermediates() throws Exception return; String prefix = "MergeDeletingIntermediates_"; - ensureFilesPresent(prefix); + ensureFilesPresent(prefix, _project); String jobName = prefix + System.currentTimeMillis(); runMergePipelineJob(jobName, true, prefix); @@ -1038,7 +1039,7 @@ public void mergeTestDeletingIntermediates() throws Exception private JSONObject getBarcodeConfig(String jobName, String[] fileNames, String prefix) throws Exception { - JSONObject config = substituteParams(new File(_sampleData, READSET_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, READSET_JOB), jobName); FileGroup g = new FileGroup(); g.name = "Group1"; @@ -1077,26 +1078,26 @@ private Set getBarcodeOutputs(File basedir, String jobName, String prefix) { Set expectedOutputs = new HashSet<>(); - expectedOutputs.add(new File(basedir, "sequenceImport.json")); - expectedOutputs.add(new File(basedir, "sequenceSupport.json.gz")); + expectedOutputs.add(FileUtil.appendName(basedir, "sequenceImport.json")); + expectedOutputs.add(FileUtil.appendName(basedir, "sequenceSupport.json.gz")); - expectedOutputs.add(new File(basedir, basedir.getName() + ".pipe.xar.xml")); - expectedOutputs.add(new File(basedir, jobName + ".log")); - expectedOutputs.add(new File(basedir, "extraBarcodes.txt")); + expectedOutputs.add(FileUtil.appendName(basedir, basedir.getName() + ".pipe.xar.xml")); + expectedOutputs.add(FileUtil.appendName(basedir, jobName + ".log")); + expectedOutputs.add(FileUtil.appendName(basedir, "extraBarcodes.txt")); - File normalizationDir = new File(basedir, "Normalization"); + File normalizationDir = FileUtil.appendName(basedir, "Normalization"); expectedOutputs.add(normalizationDir); - normalizationDir = new File(normalizationDir, prefix + FileUtil.getBaseName(DUAL_BARCODE_FILENAME)); + normalizationDir = FileUtil.appendName(normalizationDir, prefix + FileUtil.getBaseName(DUAL_BARCODE_FILENAME)); expectedOutputs.add(normalizationDir); - expectedOutputs.add(new File(normalizationDir, prefix + FileUtil.getBaseName(DUAL_BARCODE_FILENAME) + "_MID001_MID001.fastq.gz")); - expectedOutputs.add(new File(normalizationDir, prefix + FileUtil.getBaseName(DUAL_BARCODE_FILENAME) + "_MID002_MID001.fastq.gz")); - expectedOutputs.add(new File(normalizationDir, prefix + FileUtil.getBaseName(DUAL_BARCODE_FILENAME) + "_MID003_MID001.fastq.gz")); - expectedOutputs.add(new File(normalizationDir, prefix + FileUtil.getBaseName(DUAL_BARCODE_FILENAME) + "_MID004_MID001.fastq.gz")); - expectedOutputs.add(new File(normalizationDir, prefix + FileUtil.getBaseName(DUAL_BARCODE_FILENAME) + "_unknowns.fastq.gz")); - expectedOutputs.add(new File(normalizationDir, prefix + FileUtil.getBaseName(DUAL_BARCODE_FILENAME) + "_unknowns.fastq.gz.metrics")); + expectedOutputs.add(FileUtil.appendName(normalizationDir, prefix + FileUtil.getBaseName(DUAL_BARCODE_FILENAME) + "_MID001_MID001.fastq.gz")); + expectedOutputs.add(FileUtil.appendName(normalizationDir, prefix + FileUtil.getBaseName(DUAL_BARCODE_FILENAME) + "_MID002_MID001.fastq.gz")); + expectedOutputs.add(FileUtil.appendName(normalizationDir, prefix + FileUtil.getBaseName(DUAL_BARCODE_FILENAME) + "_MID003_MID001.fastq.gz")); + expectedOutputs.add(FileUtil.appendName(normalizationDir, prefix + FileUtil.getBaseName(DUAL_BARCODE_FILENAME) + "_MID004_MID001.fastq.gz")); + expectedOutputs.add(FileUtil.appendName(normalizationDir, prefix + FileUtil.getBaseName(DUAL_BARCODE_FILENAME) + "_unknowns.fastq.gz")); + expectedOutputs.add(FileUtil.appendName(normalizationDir, prefix + FileUtil.getBaseName(DUAL_BARCODE_FILENAME) + "_unknowns.fastq.gz.metrics")); - expectedOutputs.add(new File(normalizationDir, prefix + FileUtil.getBaseName(DUAL_BARCODE_FILENAME) + ".barcode-summary.txt.gz")); + expectedOutputs.add(FileUtil.appendName(normalizationDir, prefix + FileUtil.getBaseName(DUAL_BARCODE_FILENAME) + ".barcode-summary.txt.gz")); return expectedOutputs; } @@ -1111,27 +1112,28 @@ public void barcodeTest() throws Exception return; String prefix = "BarcodeTest_"; - ensureFilesPresent(prefix); + Container workbook = createWorkbook(); + ensureFilesPresent(prefix, workbook); String jobName = prefix + System.currentTimeMillis(); String[] fileNames = new String[]{DUAL_BARCODE_FILENAME}; JSONObject config = getBarcodeConfig(jobName, fileNames, prefix); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.readsetImport); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.readsetImport, workbook); waitForJobs(jobs); File basedir = getBaseDir(jobs.iterator().next()); Set expectedOutputs = getBarcodeOutputs(basedir, jobName, prefix); - File normalizationDir = new File(basedir, "Normalization"); + File normalizationDir = FileUtil.appendName(basedir, "Normalization"); expectedOutputs.add(normalizationDir); - normalizationDir = new File(normalizationDir, prefix + FileUtil.getBaseName(DUAL_BARCODE_FILENAME)); - expectedOutputs.add(new File(normalizationDir, prefix + DUAL_BARCODE_FILENAME + ".gz")); + normalizationDir = FileUtil.appendName(normalizationDir, prefix + FileUtil.getBaseName(DUAL_BARCODE_FILENAME)); + expectedOutputs.add(FileUtil.appendName(normalizationDir, prefix + DUAL_BARCODE_FILENAME + ".gz")); - File log = new File(basedir, jobName + ".log"); + File log = FileUtil.appendName(basedir, jobName + ".log"); try { verifyFileOutputs(basedir, expectedOutputs); - verifyFileInputs(basedir, fileNames, config, prefix); + verifyFileInputs(basedir, fileNames, config, prefix, workbook); validateReadsets(jobs, config, 4); validateBarcodeFastqs(expectedOutputs); } @@ -1157,7 +1159,7 @@ public void barcodeTestDeletingIntermediates() throws Exception return; String prefix = "BarcodeDeletingIntermediates_"; - ensureFilesPresent(prefix); + ensureFilesPresent(prefix, _project); String jobName = prefix + System.currentTimeMillis(); String[] fileNames = new String[]{DUAL_BARCODE_FILENAME}; @@ -1166,18 +1168,18 @@ public void barcodeTestDeletingIntermediates() throws Exception config.put("deleteIntermediateFiles", true); config.put("inputFileTreatment", "compress"); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.readsetImport); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.readsetImport, _project); waitForJobs(jobs); File basedir = getBaseDir(jobs.iterator().next()); Set expectedOutputs = getBarcodeOutputs(basedir, jobName, prefix); - expectedOutputs.add(new File(basedir, prefix + "dualBarcodes_SIV.fastq.gz")); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + "dualBarcodes_SIV.fastq.gz")); - File log = new File(basedir, jobName + ".log"); + File log = FileUtil.appendName(basedir, jobName + ".log"); try { verifyFileOutputs(basedir, expectedOutputs); - verifyFileInputs(basedir, fileNames, config, prefix); + verifyFileInputs(basedir, fileNames, config, prefix, _project); validateReadsets(jobs, config, 4); validateBarcodeFastqs(expectedOutputs); } @@ -1193,16 +1195,19 @@ private void validateBarcodeFastqs(Set expectedOutputs) throws Exception { for (File f : expectedOutputs) { - if (f.getName().equals("dualBarcodes_SIV_MID001_MID001.fastq.gz")) - Assert.assertEquals("Incorrect read number", 303L, FastqUtils.getSequenceCount(f)); - else if (f.getName().equals("dualBarcodes_SIV_MID002_MID001.fastq.gz")) - Assert.assertEquals("Incorrect read number", 236L, FastqUtils.getSequenceCount(f)); - else if (f.getName().equals("dualBarcodes_SIV_MID003_MID001.fastq.gz")) - Assert.assertEquals("Incorrect read number", 235L, FastqUtils.getSequenceCount(f)); - else if (f.getName().equals("dualBarcodes_SIV_MID004_MID001.fastq.gz")) - Assert.assertEquals("Incorrect read number", 98L, FastqUtils.getSequenceCount(f)); - else if (f.getName().equals("dualBarcodes_SIV_unknowns.fastq.gz")) - Assert.assertEquals("Incorrect read number", 2388L, FastqUtils.getSequenceCount(f)); + switch (f.getName()) + { + case "dualBarcodes_SIV_MID001_MID001.fastq.gz" -> + Assert.assertEquals("Incorrect read number", 303L, FastqUtils.getSequenceCount(f)); + case "dualBarcodes_SIV_MID002_MID001.fastq.gz" -> + Assert.assertEquals("Incorrect read number", 236L, FastqUtils.getSequenceCount(f)); + case "dualBarcodes_SIV_MID003_MID001.fastq.gz" -> + Assert.assertEquals("Incorrect read number", 235L, FastqUtils.getSequenceCount(f)); + case "dualBarcodes_SIV_MID004_MID001.fastq.gz" -> + Assert.assertEquals("Incorrect read number", 98L, FastqUtils.getSequenceCount(f)); + case "dualBarcodes_SIV_unknowns.fastq.gz" -> + Assert.assertEquals("Incorrect read number", 2388L, FastqUtils.getSequenceCount(f)); + } } } @@ -1213,11 +1218,12 @@ else if (f.getName().equals("dualBarcodes_SIV_unknowns.fastq.gz")) public void pairedEndTest() throws Exception { String prefix = "PairedEndTest_"; - ensureFilesPresent(prefix); + Container c = createWorkbook(); + ensureFilesPresent(prefix, c); String jobName = prefix + System.currentTimeMillis(); String[] fileNames = new String[]{PAIRED_FILENAME1, PAIRED_FILENAME2}; - JSONObject config = substituteParams(new File(_sampleData, READSET_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, READSET_JOB), jobName); FileGroup g = new FileGroup(); g.name = "Group1"; g.filePairs = new ArrayList<>(); @@ -1227,23 +1233,23 @@ public void pairedEndTest() throws Exception appendSamplesForImport(config, List.of(g)); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.readsetImport); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.readsetImport, c); waitForJobs(jobs); Set expectedOutputs = new HashSet<>(); File basedir = getBaseDir(jobs.iterator().next()); - expectedOutputs.add(new File(basedir, "sequenceImport.json")); - expectedOutputs.add(new File(basedir, "sequenceSupport.json.gz")); - expectedOutputs.add(new File(basedir, basedir.getName() + ".pipe.xar.xml")); - expectedOutputs.add(new File(basedir, prefix + PAIRED_FILENAME1)); - expectedOutputs.add(new File(basedir, prefix + PAIRED_FILENAME2)); + expectedOutputs.add(FileUtil.appendName(basedir, "sequenceImport.json")); + expectedOutputs.add(FileUtil.appendName(basedir, "sequenceSupport.json.gz")); + expectedOutputs.add(FileUtil.appendName(basedir, basedir.getName() + ".pipe.xar.xml")); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + PAIRED_FILENAME1)); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + PAIRED_FILENAME2)); - File log = new File(basedir, jobName + ".log"); + File log = FileUtil.appendName(basedir, jobName + ".log"); expectedOutputs.add(log); try { verifyFileOutputs(basedir, expectedOutputs); - verifyFileInputs(basedir, fileNames, config, prefix); + verifyFileInputs(basedir, fileNames, config, prefix, c); validateReadsets(jobs, config); } catch (Exception e) @@ -1261,11 +1267,12 @@ public void pairedEndTest() throws Exception public void pairedEndTestMovingInputs() throws Exception { String prefix = "PairedEndMovingInputs_"; - ensureFilesPresent(prefix); + Container workbook = createWorkbook(); + ensureFilesPresent(prefix, workbook); String jobName = prefix + System.currentTimeMillis(); String[] fileNames = new String[]{PAIRED_FILENAME1, PAIRED_FILENAME2, UNZIPPED_PAIRED_FILENAME1, UNZIPPED_PAIRED_FILENAME2}; - JSONObject config = substituteParams(new File(_sampleData, READSET_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, READSET_JOB), jobName); config.put("inputFileTreatment", "compress"); FileGroup g = new FileGroup(); @@ -1281,26 +1288,26 @@ public void pairedEndTestMovingInputs() throws Exception appendSamplesForImport(config, List.of(g)); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.readsetImport); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.readsetImport, workbook); waitForJobs(jobs); Set expectedOutputs = new HashSet<>(); File basedir = getBaseDir(jobs.iterator().next()); - expectedOutputs.add(new File(basedir, "sequenceImport.json")); - expectedOutputs.add(new File(basedir, "sequenceSupport.json.gz")); - expectedOutputs.add(new File(basedir, basedir.getName() + ".pipe.xar.xml")); + expectedOutputs.add(FileUtil.appendName(basedir, "sequenceImport.json")); + expectedOutputs.add(FileUtil.appendName(basedir, "sequenceSupport.json.gz")); + expectedOutputs.add(FileUtil.appendName(basedir, basedir.getName() + ".pipe.xar.xml")); - expectedOutputs.add(new File(basedir, prefix + UNZIPPED_PAIRED_FILENAME1 + ".gz")); - expectedOutputs.add(new File(basedir, prefix + UNZIPPED_PAIRED_FILENAME2 + ".gz")); - expectedOutputs.add(new File(basedir, prefix + PAIRED_FILENAME1)); - expectedOutputs.add(new File(basedir, prefix + PAIRED_FILENAME2)); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + UNZIPPED_PAIRED_FILENAME1 + ".gz")); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + UNZIPPED_PAIRED_FILENAME2 + ".gz")); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + PAIRED_FILENAME1)); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + PAIRED_FILENAME2)); - File log = new File(basedir, jobName + ".log"); + File log = FileUtil.appendName(basedir, jobName + ".log"); expectedOutputs.add(log); try { verifyFileOutputs(basedir, expectedOutputs); - verifyFileInputs(basedir, fileNames, config, prefix); + verifyFileInputs(basedir, fileNames, config, prefix, workbook); validateReadsets(jobs, config); } catch (Exception e) @@ -1317,11 +1324,12 @@ public void pairedEndTestMovingInputs() throws Exception public void pairedEndTestDeletingInputs() throws Exception { String prefix = "PairedEndDeleting_"; - ensureFilesPresent(prefix); + Container workbook = createWorkbook(); + ensureFilesPresent(prefix, workbook); String jobName = prefix + System.currentTimeMillis(); String[] fileNames = new String[]{PAIRED_FILENAME1, PAIRED_FILENAME2, UNZIPPED_PAIRED_FILENAME1, UNZIPPED_PAIRED_FILENAME2}; - JSONObject config = substituteParams(new File(_sampleData, READSET_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, READSET_JOB), jobName); config.put("inputFileTreatment", "delete"); FileGroup g = new FileGroup(); @@ -1337,27 +1345,27 @@ public void pairedEndTestDeletingInputs() throws Exception appendSamplesForImport(config, List.of(g)); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.readsetImport); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.readsetImport, workbook); waitForJobs(jobs); Set expectedOutputs = new HashSet<>(); File basedir = getBaseDir(jobs.iterator().next()); - expectedOutputs.add(new File(basedir, "sequenceImport.json")); - expectedOutputs.add(new File(basedir, "sequenceSupport.json.gz")); - expectedOutputs.add(new File(basedir, basedir.getName() + ".pipe.xar.xml")); + expectedOutputs.add(FileUtil.appendName(basedir, "sequenceImport.json")); + expectedOutputs.add(FileUtil.appendName(basedir, "sequenceSupport.json.gz")); + expectedOutputs.add(FileUtil.appendName(basedir, basedir.getName() + ".pipe.xar.xml")); - expectedOutputs.add(new File(basedir, prefix + UNZIPPED_PAIRED_FILENAME1 + ".gz")); - expectedOutputs.add(new File(basedir, prefix + UNZIPPED_PAIRED_FILENAME2 + ".gz")); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + UNZIPPED_PAIRED_FILENAME1 + ".gz")); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + UNZIPPED_PAIRED_FILENAME2 + ".gz")); - expectedOutputs.add(new File(basedir, prefix + PAIRED_FILENAME1)); - expectedOutputs.add(new File(basedir, prefix + PAIRED_FILENAME2)); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + PAIRED_FILENAME1)); + expectedOutputs.add(FileUtil.appendName(basedir, prefix + PAIRED_FILENAME2)); - File log = new File(basedir, jobName + ".log"); + File log = FileUtil.appendName(basedir, jobName + ".log"); expectedOutputs.add(log); try { verifyFileOutputs(basedir, expectedOutputs); - verifyFileInputs(basedir, fileNames, config, prefix); + verifyFileInputs(basedir, fileNames, config, prefix, workbook); validateReadsets(jobs, config); } catch (Exception e) @@ -1381,7 +1389,7 @@ private void validateReadsets(Collection jobs, JSONObject config, I int numberExpected = expected != null ? expected : inferExpectedReadsets(config); Assert.assertEquals("Incorrect number of readsets created", numberExpected, models.length); validateSamples(models, config); - validateQualityMetrics(models, config); + validateQualityMetrics(models); } } @@ -1434,7 +1442,7 @@ private void validateSamples(SequenceReadsetImpl[] models, JSONObject config) } } - private void validateQualityMetrics(SequenceReadsetImpl[] models, JSONObject config) + private void validateQualityMetrics(SequenceReadsetImpl[] models) { TableInfo ti = SequenceAnalysisSchema.getTable(SequenceAnalysisSchema.TABLE_QUALITY_METRICS); @@ -1492,8 +1500,7 @@ protected void validateAlignmentJob(Set jobs, Collection ad File basedir = job.getAnalysisDirectory().toNioPathForRead().toFile(); String outDir = SequenceTaskHelper.getUnzippedBaseName(rs.getReadDataImpl().get(0).getFile1()); - Set expectedOutputs = new HashSet<>(); - expectedOutputs.addAll(addDefaultAlignmentOutputs(basedir, job.getProtocolName(), rs, outDir)); + Set expectedOutputs = new HashSet<>(addDefaultAlignmentOutputs(basedir, job.getProtocolName(), rs, outDir)); additionalFiles = new HashSet<>(additionalFiles); additionalFiles.add("Shared"); if (includeRefFiles) @@ -1505,17 +1512,17 @@ protected void validateAlignmentJob(Set jobs, Collection ad for (String fn : additionalFiles) { - expectedOutputs.add(new File(basedir, fn)); + expectedOutputs.add(FileUtil.appendPath(basedir, Path.parse(fn))); } - File bam = new File(basedir, outDir + "/Alignment/" + rs.getName() + ".bam"); + File bam = FileUtil.appendPath(basedir, Path.parse(outDir + "/Alignment/" + rs.getName() + ".bam")); expectedOutputs.add(bam); - expectedOutputs.add(new File(basedir, outDir + "/Alignment/" + rs.getName() + ".bam.bai")); + expectedOutputs.add(FileUtil.appendPath(basedir, Path.parse(outDir + "/Alignment/" + rs.getName() + ".bam.bai"))); - expectedOutputs.add(new File(basedir, outDir + "/Alignment/idxstats.txt")); + expectedOutputs.add(FileUtil.appendPath(basedir, Path.parse(outDir + "/Alignment/idxstats.txt"))); - File log = new File(basedir, job.getProtocolName() + ".log"); + File log = FileUtil.appendName(basedir, job.getProtocolName() + ".log"); try { validateInputs(); @@ -1533,22 +1540,16 @@ protected Collection addDefaultAlignmentOutputs(File basedir, String jobNa { List extraFiles = new ArrayList<>(); - extraFiles.add(new File(basedir, jobName + ".log")); - extraFiles.add(new File(basedir, "sequenceAnalysis.json")); - extraFiles.add(new File(basedir, "sequenceSupport.json.gz")); - extraFiles.add(new File(basedir, basedir.getName() + ".pipe.xar.xml")); + extraFiles.add(FileUtil.appendName(basedir, jobName + ".log")); + extraFiles.add(FileUtil.appendName(basedir, "sequenceAnalysis.json")); + extraFiles.add(FileUtil.appendName(basedir, "sequenceSupport.json.gz")); + extraFiles.add(FileUtil.appendName(basedir, basedir.getName() + ".pipe.xar.xml")); - extraFiles.add(new File(basedir, outDir)); - extraFiles.add(new File(basedir, outDir + "/Alignment")); - extraFiles.add(new File(basedir, outDir + "/Alignment/" + rs.getName() + ".summary.metrics")); - if (rs.getReadData().get(0).getFile2() != null) - { - //TODO - //extraFiles.add(new File(basedir, outDir + "/Alignment/" + rs.getName() + ".insertsize.metrics")); - //extraFiles.add(new File(basedir, outDir + "/Alignment/" + rs.getName() + ".insertsize.metrics.pdf")); - } + extraFiles.add(FileUtil.appendName(basedir, outDir)); + extraFiles.add(FileUtil.appendPath(basedir, Path.parse(outDir + "/Alignment"))); + extraFiles.add(FileUtil.appendPath(basedir, Path.parse(outDir + "/Alignment/" + rs.getName() + ".summary.metrics"))); - extraFiles.add(new File(basedir, outDir + "/Alignment/" + rs.getName() + ".bam.bai")); + extraFiles.add(FileUtil.appendPath(basedir, Path.parse(outDir + "/Alignment/" + rs.getName() + ".bam.bai"))); return extraFiles; } @@ -1575,54 +1576,57 @@ public void setUp() throws Exception protected void copyInputFiles() throws Exception { - File file3 = new File(_pipelineRoot, PAIRED_FILENAME1); + File pipelineRoot = getPipelineRoot(_project); + File file3 = FileUtil.appendName(pipelineRoot, PAIRED_FILENAME1); if (!file3.exists()) - FileUtils.copyFile(new File(_sampleData, PAIRED_FILENAME1), file3); + FileUtils.copyFile(FileUtil.appendName(_sampleData, PAIRED_FILENAME1), file3); - File file4 = new File(_pipelineRoot, PAIRED_FILENAME2); + File file4 = FileUtil.appendName(pipelineRoot, PAIRED_FILENAME2); if (!file4.exists()) - FileUtils.copyFile(new File(_sampleData, PAIRED_FILENAME2), file4); + FileUtils.copyFile(FileUtil.appendName(_sampleData, PAIRED_FILENAME2), file4); - File file5 = new File(_pipelineRoot, UNZIPPED_PAIRED_FILENAME1); + File file5 = FileUtil.appendName(pipelineRoot, UNZIPPED_PAIRED_FILENAME1); if (!file5.exists()) { - decompressAndCleanFastq(new File(_sampleData, PAIRED_FILENAME1), file5); + decompressAndCleanFastq(FileUtil.appendName(_sampleData, PAIRED_FILENAME1), file5); } - File file6 = new File(_pipelineRoot, UNZIPPED_PAIRED_FILENAME2); + File file6 = FileUtil.appendName(pipelineRoot, UNZIPPED_PAIRED_FILENAME2); if (!file6.exists()) { - decompressAndCleanFastq(new File(_sampleData, PAIRED_FILENAME2), file6); + decompressAndCleanFastq(FileUtil.appendName(_sampleData, PAIRED_FILENAME2), file6); } } protected List createReadsets() throws Exception { List models = new ArrayList<>(); + File pipelineRoot = getPipelineRoot(_project); + File file1 = FileUtil.appendName(pipelineRoot, PAIRED_FILENAME1); + File file2 = FileUtil.appendName(pipelineRoot, PAIRED_FILENAME2); + models.add(createReadset("TestReadset1", List.of(Pair.of(file1, file2)), false)); - File file1 = new File(_pipelineRoot, PAIRED_FILENAME1); - File file2 = new File(_pipelineRoot, PAIRED_FILENAME2); - models.add(createReadset("TestReadset1", List.of(Pair.of(file1, file2)))); + File file3 = FileUtil.appendName(pipelineRoot, UNZIPPED_PAIRED_FILENAME1); + models.add(createReadset("TestReadset2", List.of(Pair.of(file3, null)), true)); - File file3 = new File(_pipelineRoot, UNZIPPED_PAIRED_FILENAME1); - models.add(createReadset("TestReadset2", List.of(Pair.of(file3, null)))); - - File file4 = new File(_pipelineRoot, UNZIPPED_PAIRED_FILENAME2); - models.add(createReadset("TestReadset3", List.of(Pair.of(file4, null)))); + File file4 = FileUtil.appendName(pipelineRoot, UNZIPPED_PAIRED_FILENAME2); + models.add(createReadset("TestReadset3", List.of(Pair.of(file4, null)), true)); return models; } - protected synchronized SequenceReadsetImpl createReadset(String name, List> fileList) throws Exception + protected synchronized SequenceReadsetImpl createReadset(String name, List> fileList, boolean useWorkbook) { TableInfo ti = SequenceAnalysisSchema.getTable(SequenceAnalysisSchema.TABLE_READSETS); TableInfo readData = SequenceAnalysisSchema.getTable(SequenceAnalysisSchema.TABLE_READ_DATA); SequenceReadsetImpl readset1 = new SequenceReadsetImpl(); + Container target = useWorkbook ? ContainerManager.createContainer(_project, null, "RS_WB", null, WorkbookContainerType.NAME, TestContext.get().getUser()): _project; + readset1.setName(name); - readset1.setContainer(_project.getId()); + readset1.setContainer(target.getId()); readset1.setCreated(new Date()); readset1.setCreatedBy(_context.getUser().getUserId()); readset1 = Table.insert(_context.getUser(), ti, readset1); @@ -1631,11 +1635,11 @@ protected synchronized SequenceReadsetImpl createReadset(String name, List p : fileList) { ReadDataImpl rd = new ReadDataImpl(); - ExpData d1 = createExpData(p.first); - ExpData d2 = p.second == null ? null : createExpData(p.second); + ExpData d1 = createExpData(p.first, target); + ExpData d2 = p.second == null ? null : createExpData(p.second, target); rd.setReadset(readset1.getReadsetId()); rd.setFileId1(d1.getRowId()); - rd.setContainer(_project.getId()); + rd.setContainer(target.getId()); rd.setCreatedBy(_context.getUser().getUserId()); rd.setCreated(new Date()); rd.setModifiedBy(_context.getUser().getUserId()); @@ -1654,33 +1658,15 @@ protected synchronized SequenceReadsetImpl createReadset(String name, List files = new ArrayList<>(); - for (SequenceReadsetImpl m : _readsets) - { - for (ReadDataImpl rd : m.getReadDataImpl()) - { - files.add(rd.getFile1().getName()); - if (rd.getFileId2() != null) - { - files.add(rd.getFile2().getName()); - } - } - } - - return files.toArray(new String[0]); - } - protected void appendSamplesForAlignment(JSONObject config, List readsets) { JSONArray ret = new JSONArray(); @@ -1693,20 +1679,20 @@ protected void appendSamplesForAlignment(JSONObject config, List expectedOutputs, File basedir, String fn) { - File f = new File(basedir, fn); + File f = FileUtil.appendName(basedir, fn); if (f.exists()) { expectedOutputs.add(fn); @@ -1801,7 +1787,7 @@ public static class SequenceAnalysisPipelineTestCase1 extends AbstractAnalysisPi private static final String PROJECT_NAME = "SequenceAnalysisTestProject1"; @BeforeClass - public static void initialSetUp() throws Exception + public static void initialSetUp() { doInitialSetUp(PROJECT_NAME); } @@ -1825,7 +1811,7 @@ public void testMosaik() throws Exception return; String jobName = "TestMosaik_" + System.currentTimeMillis(); - JSONObject config = substituteParams(new File(_sampleData, ALIGNMENT_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, ALIGNMENT_JOB), jobName); config.put("alignment", "Mosaik"); config.put("alignment.Mosaik.banded_smith_waterman", 51); config.put("alignment.Mosaik.max_mismatch_pct", 0.20); //this is primary here to ensure it doesnt get copied into the build command. 20% should include everything @@ -1834,7 +1820,7 @@ public void testMosaik() throws Exception appendSamplesForAlignment(config, _readsets); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment, createWorkbook()); waitForJobs(jobs); validateInputs(); @@ -1876,17 +1862,16 @@ public void testMosaikWithBamPostProcessing() throws Exception return; String jobName = "TestMosaikWithPostProcess_" + System.currentTimeMillis(); - JSONObject config = substituteParams(new File(_sampleData, ALIGNMENT_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, ALIGNMENT_JOB), jobName); config.put("alignment", "Mosaik"); config.put("bamPostProcessing", "AddOrReplaceReadGroups;CallMdTags;CleanSam;FixMateInformation;MarkDuplicates;SortSam"); appendSamplesForAlignment(config, _readsets); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment, createWorkbook()); waitForJobs(jobs); - List extraFiles = new ArrayList<>(); - extraFiles.addAll(Arrays.asList( + List extraFiles = new ArrayList<>(Arrays.asList( "Shared/Mosaik", "Shared/Mosaik/SIVmac239_Test.mosaik", "paired1/Alignment/paired1.mosaikreads", @@ -1974,14 +1959,14 @@ public void testMosaikWithBamPostProcessingAndDelete() throws Exception String jobName = "TestMosaikWithPostProcessAndDelete_" + System.currentTimeMillis(); - JSONObject config = substituteParams(new File(_sampleData, ALIGNMENT_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, ALIGNMENT_JOB), jobName); config.put("alignment", "Mosaik"); config.put("deleteIntermediateFiles", true); config.put("bamPostProcessing", "AddOrReplaceReadGroups;CallMdTags;CleanSam;FixMateInformation;MarkDuplicates;SortSam"); appendSamplesForAlignment(config, _readsets); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment, createWorkbook()); waitForJobs(jobs); Set extraFiles = new HashSet<>(); @@ -2035,12 +2020,12 @@ public void testMosaikDeletingIntermediates() throws Exception return; String jobName = "MosaikDeletingIntermediates_" + System.currentTimeMillis(); - JSONObject config = substituteParams(new File(_sampleData, ALIGNMENT_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, ALIGNMENT_JOB), jobName); config.put("alignment", "Mosaik"); config.put("deleteIntermediateFiles", true); appendSamplesForAlignment(config, _readsets); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment, createWorkbook()); waitForJobs(jobs); Set extraFiles = new HashSet<>(); @@ -2090,11 +2075,11 @@ public void testBWASW() throws Exception return; String jobName = "TestBWASW_" + System.currentTimeMillis(); - JSONObject config = substituteParams(new File(_sampleData, ALIGNMENT_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, ALIGNMENT_JOB), jobName); config.put("alignment", "BWA-SW"); appendSamplesForAlignment(config, _readsets); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment, createWorkbook()); waitForJobs(jobs); Set extraFiles = new HashSet<>(); @@ -2154,11 +2139,11 @@ public void testBWAMem() throws Exception return; String jobName = "TestBWAMem_" + System.currentTimeMillis(); - JSONObject config = substituteParams(new File(_sampleData, ALIGNMENT_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, ALIGNMENT_JOB), jobName); config.put("alignment", "BWA-Mem"); appendSamplesForAlignment(config, _readsets); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment, createWorkbook()); waitForJobs(jobs); Set extraFiles = new HashSet<>(); @@ -2218,7 +2203,7 @@ public void testBWAWithAdapters() throws Exception return; String jobName = "TestBWAWithAdapters_" + System.currentTimeMillis(); - JSONObject config = substituteParams(new File(_sampleData, ALIGNMENT_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, ALIGNMENT_JOB), jobName); config.put("alignment", "BWA"); config.put("fastqProcessing", "AdapterTrimming"); @@ -2226,7 +2211,7 @@ public void testBWAWithAdapters() throws Exception appendSamplesForAlignment(config, _readsets); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment, createWorkbook()); waitForJobs(jobs); Set extraFiles = new HashSet<>(); @@ -2323,11 +2308,11 @@ public void testBWA() throws Exception return; String jobName = "TestBWA_" + System.currentTimeMillis(); - JSONObject config = substituteParams(new File(_sampleData, ALIGNMENT_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, ALIGNMENT_JOB), jobName); config.put("alignment", "BWA"); appendSamplesForAlignment(config, _readsets); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment, createWorkbook()); waitForJobs(jobs); Set extraFiles = new HashSet<>(); @@ -2393,11 +2378,11 @@ public void testBowtie() throws Exception return; String jobName = "TestBowtie_" + System.currentTimeMillis(); - JSONObject config = substituteParams(new File(_sampleData, ALIGNMENT_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, ALIGNMENT_JOB), jobName); config.put("alignment", "Bowtie"); appendSamplesForAlignment(config, _readsets); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment, createWorkbook()); waitForJobs(jobs); Set extraFiles = new HashSet<>(); @@ -2463,12 +2448,12 @@ public void testBowtieDeletingIntermediates() throws Exception return; String jobName = "TestBowtieDeleting_" + System.currentTimeMillis(); - JSONObject config = substituteParams(new File(_sampleData, ALIGNMENT_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, ALIGNMENT_JOB), jobName); config.put("alignment", "Bowtie"); config.put("deleteIntermediateFiles", true); appendSamplesForAlignment(config, _readsets); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment, createWorkbook()); waitForJobs(jobs); Set extraFiles = new HashSet<>(); @@ -2525,24 +2510,24 @@ public void testBwaMemWithSavedLibrary() throws Exception Integer libraryId = createSavedLibrary(); Integer dataId = new TableSelector(SequenceAnalysisSchema.getTable(SequenceAnalysisSchema.TABLE_REF_LIBRARIES), PageFlowUtil.set("fasta_file"), new SimpleFilter(FieldKey.fromString("rowid"), libraryId), null).getObject(Integer.class); ExpData data = ExperimentService.get().getExpData(dataId); - File alignmentIndexDir = new File(data.getFile().getParentFile(), AlignerIndexUtil.INDEX_DIR + "/bwa"); + File alignmentIndexDir = FileUtil.appendPath(data.getFile().getParentFile(), Path.parse(AlignerIndexUtil.INDEX_DIR + "/bwa")); if (alignmentIndexDir.exists()) { FileUtils.deleteDirectory(alignmentIndexDir); } String jobName = "TestBWAMemWithSavedLibrary_" + System.currentTimeMillis(); - JSONObject config = substituteParams(new File(_sampleData, ALIGNMENT_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, ALIGNMENT_JOB), jobName); config.put("alignment", "BWA-Mem"); config.put("referenceLibraryCreation", "SavedLibrary"); config.put("referenceLibraryCreation.SavedLibrary.libraryId", libraryId); appendSamplesForAlignment(config, Collections.singletonList(_readsets.get(0))); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment, createWorkbook()); waitForJobs(jobs); //we expect the index to get copied back to the reference library location - assert alignmentIndexDir.exists() && alignmentIndexDir.listFiles().length > 0 : "Aligner index was not cached"; + assert alignmentIndexDir.exists() && Objects.requireNonNull(alignmentIndexDir.listFiles()).length > 0 : "Aligner index was not cached"; Set extraFiles = new HashSet<>(); extraFiles.add(jobName + ".log"); @@ -2613,13 +2598,13 @@ public void testBwaMemWithSavedLibrary2() throws Exception //run using this library Integer libraryId = createSavedLibrary(); String jobName = "TestBWAMemWithSavedLibrary2_" + System.currentTimeMillis(); - JSONObject config = substituteParams(new File(_sampleData, ALIGNMENT_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, ALIGNMENT_JOB), jobName); config.put("alignment", "BWA-Mem"); config.put("referenceLibraryCreation", "SavedLibrary"); config.put("referenceLibraryCreation.SavedLibrary.libraryId", libraryId); appendSamplesForAlignment(config, _readsets); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment, _project); waitForJobs(jobs); Set extraFiles = new HashSet<>(); @@ -2671,33 +2656,34 @@ public void testMergedAlignments() throws Exception return; String jobName = "TestBWAMemMergedAlign_" + System.currentTimeMillis(); - JSONObject config = substituteParams(new File(_sampleData, ALIGNMENT_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, ALIGNMENT_JOB), jobName); config.put("alignment", "BWA-Mem"); + File pipelineRoot = getPipelineRoot(_project); for (String fn : Arrays.asList(PAIRED_FILENAME_L1a, PAIRED_FILENAME_L2)) { - File file3 = new File(_pipelineRoot, fn); + File file3 = FileUtil.appendName(pipelineRoot, fn); if (!file3.exists()) - FileUtils.copyFile(new File(_sampleData, PAIRED_FILENAME1), file3); + FileUtils.copyFile(FileUtil.appendName(_sampleData, PAIRED_FILENAME1), file3); } for (String fn : Arrays.asList(PAIRED_FILENAME2_L1a, PAIRED_FILENAME2_L2)) { - File file4 = new File(_pipelineRoot, fn); + File file4 = FileUtil.appendName(pipelineRoot, fn); if (!file4.exists()) - FileUtils.copyFile(new File(_sampleData, PAIRED_FILENAME2), file4); + FileUtils.copyFile(FileUtil.appendName(_sampleData, PAIRED_FILENAME2), file4); } List models = new ArrayList<>(); models.add(createReadset("TestMergedReadset", Arrays.asList( - Pair.of(new File(_pipelineRoot, PAIRED_FILENAME_L1a), new File(_pipelineRoot, PAIRED_FILENAME2_L1a)), - Pair.of(new File(_pipelineRoot, PAIRED_FILENAME_L2), new File(_pipelineRoot, PAIRED_FILENAME2_L2)) - ))); + Pair.of(FileUtil.appendName(pipelineRoot, PAIRED_FILENAME_L1a), FileUtil.appendName(pipelineRoot, PAIRED_FILENAME2_L1a)), + Pair.of(FileUtil.appendName(pipelineRoot, PAIRED_FILENAME_L2), FileUtil.appendName(pipelineRoot, PAIRED_FILENAME2_L2)) + ), true)); appendSamplesForAlignment(config, models); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment, createWorkbook()); waitForJobs(jobs); Set extraFiles = new HashSet<>(); @@ -2745,11 +2731,11 @@ public void testBowtie2() throws Exception return; String jobName = "TestBowtie2_" + System.currentTimeMillis(); - JSONObject config = substituteParams(new File(_sampleData, ALIGNMENT_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, ALIGNMENT_JOB), jobName); config.put("alignment", "Bowtie2"); appendSamplesForAlignment(config, _readsets); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment, createWorkbook()); waitForJobs(jobs); Set extraFiles = new HashSet<>(); @@ -2837,73 +2823,6 @@ protected String getProjectName() return PROJECT_NAME; } - //NOTE: there is an issue that seems specific to this genome. disable for now - //@Test - public void testStar() throws Exception - { - if (!isExternalPipelineEnabled()) - return; - - String jobName = "TestStar_" + System.currentTimeMillis(); - JSONObject config = substituteParams(new File(_sampleData, ALIGNMENT_JOB), jobName); - config.put("alignment", "STAR"); - appendSamplesForAlignment(config, _readsets); - - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment); - waitForJobs(jobs); - - Set extraFiles = new HashSet<>(); - extraFiles.add(jobName + ".log"); - extraFiles.add("sequenceAnalysis.json"); - - extraFiles.add("Shared"); - extraFiles.add("Shared/SIVmac239_Test.fasta"); - extraFiles.add("Shared/SIVmac239_Test.fasta.fai"); - extraFiles.add("Shared/SIVmac239_Test.idKey.txt"); - - extraFiles.add("Shared/Bowtie"); - extraFiles.add("Shared/Bowtie/SIVmac239_Test.bowtie.index.1.ebwt"); - extraFiles.add("Shared/Bowtie/SIVmac239_Test.bowtie.index.2.ebwt"); - extraFiles.add("Shared/Bowtie/SIVmac239_Test.bowtie.index.3.ebwt"); - extraFiles.add("Shared/Bowtie/SIVmac239_Test.bowtie.index.4.ebwt"); - extraFiles.add("Shared/Bowtie/SIVmac239_Test.bowtie.index.rev.1.ebwt"); - extraFiles.add("Shared/Bowtie/SIVmac239_Test.bowtie.index.rev.2.ebwt"); - - Set job1Files = new HashSet<>(extraFiles); - job1Files.add("paired1"); - job1Files.add("paired1/Alignment"); - job1Files.add("paired1/Alignment/TestReadset1.bam"); - job1Files.add("paired1/Alignment/TestReadset1.summary.metrics"); - job1Files.add("paired1/Alignment/TestReadset1.insertsize.metrics"); - job1Files.add("paired1/Alignment/TestReadset1.insertsize.metrics.pdf"); - job1Files.add("paired1/Alignment/TestReadset1.bam.bai"); - job1Files.add("paired1/Alignment/paired1.bowtie.unaligned_1.fastq"); - job1Files.add("paired1/Alignment/paired1.bowtie.unaligned_2.fastq"); - - Set job2Files = new HashSet<>(extraFiles); - job2Files.add("paired3"); - job2Files.add("paired3/Alignment"); - job2Files.add("paired3/Alignment/TestReadset2.bam"); - job2Files.add("paired3/Alignment/TestReadset2.summary.metrics"); - //job2Files.add("paired3/Alignment/TestReadset2.insertsize.metrics"); - job2Files.add("paired3/Alignment/TestReadset2.bam.bai"); - job2Files.add("paired3/Alignment/paired3.bowtie.unaligned.fastq"); - - Set job3Files = new HashSet<>(extraFiles); - job3Files.add("paired4"); - job3Files.add("paired4/Alignment"); - job3Files.add("paired4/Alignment/TestReadset3.bam"); - job3Files.add("paired4/Alignment/TestReadset3.summary.metrics"); - //job3Files.add("paired4/Alignment/TestReadset3.insertsize.metrics"); - job3Files.add("paired4/Alignment/TestReadset3.bam.bai"); - job3Files.add("paired4/Alignment/paired4.bowtie.unaligned.fastq"); - - //this is probably due to adapters - validateAlignmentJob(jobs, job1Files, _readsets.get(0), 0, 422); - validateAlignmentJob(jobs, job2Files, _readsets.get(1), 155, 56); - validateAlignmentJob(jobs, job3Files, _readsets.get(2), 154, 57); - } - @Test public void testBismarkWithSavedLibraryAndAdapters() throws Exception { @@ -2914,14 +2833,14 @@ public void testBismarkWithSavedLibraryAndAdapters() throws Exception Integer libraryId = createSavedLibrary(); Integer dataId = new TableSelector(SequenceAnalysisSchema.getTable(SequenceAnalysisSchema.TABLE_REF_LIBRARIES), PageFlowUtil.set("fasta_file"), new SimpleFilter(FieldKey.fromString("rowid"), libraryId), null).getObject(Integer.class); ExpData data = ExperimentService.get().getExpData(dataId); - File alignmentIndexDir = new File(data.getFile().getParentFile(), AlignerIndexUtil.INDEX_DIR + "/Bismark"); + File alignmentIndexDir = FileUtil.appendPath(data.getFile().getParentFile(), Path.parse(AlignerIndexUtil.INDEX_DIR + "/Bismark")); if (alignmentIndexDir.exists()) { FileUtils.deleteDirectory(alignmentIndexDir); } String jobName = "TestBismarkWithSavedLibraryAndAdapters_" + System.currentTimeMillis(); - JSONObject config = substituteParams(new File(_sampleData, ALIGNMENT_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, ALIGNMENT_JOB), jobName); config.put("alignment", "Bismark"); config.put("alignment.Bismark.seed_length", "30"); config.put("alignment.Bismark.max_seed_mismatches", "1"); @@ -2939,7 +2858,7 @@ public void testBismarkWithSavedLibraryAndAdapters() throws Exception config.put("fastqProcessing.AdapterTrimming.adapters", "[[\"Nextera Transposon Adapter A\",\"AGATGTGTATAAGAGACAG\",true,true]]"); appendSamplesForAlignment(config, _readsets); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment, createWorkbook()); waitForJobs(jobs); //we expect the index to get copied back to the reference library location @@ -3064,7 +2983,7 @@ public void testBismarkWithSavedLibraryAndAdapters() throws Exception FileUtils.deleteDirectory(alignmentIndexDir); } - public void testBismarkWithSavedLibraryAdaptersAndDelete() throws Exception + private void testBismarkWithSavedLibraryAdaptersAndDelete() throws Exception { if (!isExternalPipelineEnabled()) return; @@ -3072,7 +2991,7 @@ public void testBismarkWithSavedLibraryAdaptersAndDelete() throws Exception //run using this library Integer libraryId = createSavedLibrary(); String jobName = "TestBismarkWithSavedLibraryAndDelete_" + System.currentTimeMillis(); - JSONObject config = substituteParams(new File(_sampleData, ALIGNMENT_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, ALIGNMENT_JOB), jobName); config.put("alignment", "Bismark"); config.put("alignment.Bismark.seed_length", "30"); config.put("alignment.Bismark.max_seed_mismatches", "1"); @@ -3091,7 +3010,7 @@ public void testBismarkWithSavedLibraryAdaptersAndDelete() throws Exception config.put("fastqProcessing.AdapterTrimming.adapters", "[[\"Nextera Transposon Adapter A\",\"AGATGTGTATAAGAGACAG\",true,true]]"); appendSamplesForAlignment(config, _readsets); - Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment); + Set jobs = createPipelineJob(jobName, config, SequenceAnalysisController.AnalyzeForm.TYPE.alignment, createWorkbook()); waitForJobs(jobs); Set extraFiles = new HashSet<>(); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceRemoteIntegrationTests.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceRemoteIntegrationTests.java index 3a1ecf590..57923f571 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceRemoteIntegrationTests.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceRemoteIntegrationTests.java @@ -15,6 +15,7 @@ import org.labkey.api.pipeline.TaskId; import org.labkey.api.pipeline.WorkDirectory; import org.labkey.api.reader.Readers; +import org.labkey.api.util.FileUtil; import org.labkey.api.writer.PrintWriters; import org.labkey.sequenceanalysis.pipeline.AlignmentInitTask; import org.labkey.sequenceanalysis.pipeline.PrepareAlignerIndexesTask; @@ -25,6 +26,7 @@ import java.io.IOException; import java.io.PrintWriter; import java.util.List; +import java.util.Objects; import static org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService.SEQUENCE_TOOLS_PARAM; @@ -40,35 +42,35 @@ public void setUp() throws Exception } @BeforeClass - public static void initialSetUp() throws Exception + public static void initialSetUp() { doInitialSetUp(PROJECT_NAME); } private File setupConfigDir(File outDir) throws IOException { - File baseDir = new File(outDir, "config"); + File baseDir = FileUtil.appendName(outDir, "config"); if (baseDir.exists()) { FileUtils.deleteDirectory(baseDir); } - baseDir.mkdirs(); + FileUtil.mkdirs(baseDir); if (_sampleData == null) { throw new IOException("_sampleData was null"); } - File source = new File(_sampleData, "remotePipeline"); + File source = FileUtil.appendName(_sampleData, "remotePipeline"); if (!source.exists()) { throw new IOException("Unable to find file: " + source.getPath()); } - FileUtils.copyFile(new File(source, "sequenceanalysisConfig.xml"), new File(baseDir, "sequenceanalysisConfig.xml")); + FileUtils.copyFile(FileUtil.appendName(source, "sequenceanalysisConfig.xml"), FileUtil.appendName(baseDir, "sequenceanalysisConfig.xml")); - try (PrintWriter writer = PrintWriters.getPrintWriter(new File(baseDir, "pipelineConfig.xml")); BufferedReader reader = Readers.getReader(new File(source, "pipelineConfig.xml"))) + try (PrintWriter writer = PrintWriters.getPrintWriter(FileUtil.appendName(baseDir, "pipelineConfig.xml")); BufferedReader reader = Readers.getReader(FileUtil.appendName(source, "pipelineConfig.xml"))) { String line; while ((line = reader.readLine()) != null) @@ -83,12 +85,10 @@ private File setupConfigDir(File outDir) throws IOException path = path.replaceAll("\\\\", "/"); line = line.replaceAll("@@SEQUENCEANALYSIS_TOOLS@@", path); - _log.info("Writing to pipelineConfig.xml: " + line); } else if (line.contains("@@WORK_DIR@@")) { line = line.replaceAll("@@WORK_DIR@@", outDir.getPath().replaceAll("\\\\", "/")); - _log.info("Writing to pipelineConfig.xml: " + line); } writer.println(line); @@ -113,13 +113,13 @@ protected String getProjectName() @Test public void BasicRemoteJob() throws Exception { - File outDir = new File(_pipelineRoot, "clusterBootstrap"); + File outDir = FileUtil.appendName(getPipelineRoot(_project), "clusterBootstrap"); if (outDir.exists()) { FileUtils.deleteDirectory(outDir); } - outDir.mkdirs(); + FileUtil.mkdirs(outDir); executeJobRemote(outDir, null); @@ -143,19 +143,19 @@ public void RunBwaRemote() throws Exception return; String jobName = "TestBWAMem_" + System.currentTimeMillis(); - JSONObject config = substituteParams(new File(_sampleData, ALIGNMENT_JOB), jobName); + JSONObject config = substituteParams(FileUtil.appendName(_sampleData, ALIGNMENT_JOB), jobName); config.put("alignment", "BWA-Mem"); appendSamplesForAlignment(config, _readsets); SequenceAlignmentJob job = SequenceAlignmentJob.createForReadsets(_project, _context.getUser(), "RemoteJob1", "Test of remote pipeline", config, config.getJSONArray("readsetIds"), false).get(0); - File outDir = new File(_pipelineRoot, "remoteBwa"); + File outDir = FileUtil.appendName(getPipelineRoot(_project), "remoteBwa"); if (outDir.exists()) { FileUtils.deleteDirectory(outDir); } - outDir.mkdirs(); - job.getLogFile().getParentFile().mkdirs(); + FileUtil.mkdirs(outDir); + FileUtil.mkdirs(job.getLogFile().getParentFile()); _readsets.forEach(rs -> job.getSequenceSupport().cacheReadset(rs)); @@ -171,7 +171,7 @@ public void RunBwaRemote() throws Exception //Now move to remote tasks job.setActiveTaskId(new TaskId(PrepareAlignerIndexesTask.class)); - File jobFile = new File(outDir, "bwaRemote.job.json.txt"); + File jobFile = FileUtil.appendName(outDir, "bwaRemote.job.json.txt"); job.writeToFile(jobFile); executeJobRemote(outDir, jobFile); @@ -191,7 +191,7 @@ public void RunBwaRemote() throws Exception writeJobLogToLog(job); _log.info("Files in job folder: " + job.getLogFile().getParentFile().getPath()); - for (File f : job.getLogFile().getParentFile().listFiles()) + for (File f : Objects.requireNonNull(job.getLogFile().getParentFile().listFiles())) { _log.info(f.getName()); } @@ -215,14 +215,14 @@ protected void executeJobRemote(File workDir, @Nullable File jobJson) throws IOE ProcessBuilder pb = new ProcessBuilder(args); pb.directory(workDir); - _log.info("Executing job in '" + pb.directory().getAbsolutePath() + "': " + String.join(" ", pb.command())); + _log.info("Executing job in '{}': {}", pb.directory().getAbsolutePath(), String.join(" ", pb.command())); Process proc; try { pb.redirectErrorStream(true); proc = pb.start(); - File logFile = new File(workDir, "clusterBootstrap.txt"); + File logFile = FileUtil.appendName(workDir, "clusterBootstrap.txt"); try (BufferedReader procReader = Readers.getReader(proc.getInputStream());PrintWriter writer = PrintWriters.getPrintWriter(logFile)) { String line; diff --git a/singlecell/resources/chunks/CalculateUCellScores.R b/singlecell/resources/chunks/CalculateUCellScores.R index c11640eea..a7669bf4c 100644 --- a/singlecell/resources/chunks/CalculateUCellScores.R +++ b/singlecell/resources/chunks/CalculateUCellScores.R @@ -9,7 +9,7 @@ for (datasetId in names(seuratObjects)) { seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) message(paste0('Loading dataset ', datasetId, ', with total cells: ', ncol(seuratObj))) - seuratObj <- RIRA::CalculateUCellScores(seuratObj, storeRanks = storeRanks, assayName = assayName, forceRecalculate = forceRecalculate, ncores = nCores) + seuratObj <- RIRA::CalculateUCellScores(seuratObj, storeRanks = storeRanks, assayName = assayName, forceRecalculate = forceRecalculate, ncores = nCores, dropAllExistingUcells = dropAllExistingUcells) saveData(seuratObj, datasetId) diff --git a/singlecell/resources/chunks/PerformTcrClustering.R b/singlecell/resources/chunks/PerformTcrClustering.R new file mode 100644 index 000000000..5ada2327a --- /dev/null +++ b/singlecell/resources/chunks/PerformTcrClustering.R @@ -0,0 +1,34 @@ +for (datasetId in names(seuratObjects)) { + printName(datasetId) + seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) + + seuratObj <- tcrClustR::CalculateTcrDistances( + inputData = seuratObj, + chains = c('TRA', 'TRB', 'TRG', 'TRD'), + organism = organism, + minimumCloneSize = 2, + calculateChainPairs = TRUE + ) + + seuratObj <- tcrClustR::RunTcrClustering( + seuratObj_TCR = seuratObj, + dianaHeight = 20, + clusterSizeThreshold = 1 + ) + + print(paste0('Summary of distances: ')) + if (!'TCR_Distances' %in% names(seuratObj@misc)) { + warning('No TCR_Distances were found, this could indicate a problem with processing') + } else { + for (an in names(seuratObj@misc$TCR_Distances)) { + ad <- seuratObj@misc$TCR_Distances[[an]] + print(paste0('Assay: ', an, ', total clones: ', nrow(ad))) + } + } + + saveData(seuratObj, datasetId) + + # Cleanup + rm(seuratObj) + gc() +} \ No newline at end of file diff --git a/singlecell/src/org/labkey/singlecell/SingleCellModule.java b/singlecell/src/org/labkey/singlecell/SingleCellModule.java index 92efcb752..f1ce9357e 100644 --- a/singlecell/src/org/labkey/singlecell/SingleCellModule.java +++ b/singlecell/src/org/labkey/singlecell/SingleCellModule.java @@ -29,7 +29,6 @@ import org.labkey.api.singlecell.CellHashingService; import org.labkey.api.singlecell.pipeline.SingleCellRawDataStep; import org.labkey.api.singlecell.pipeline.SingleCellStep; -import org.labkey.api.util.PageFlowUtil; import org.labkey.api.view.WebPartFactory; import org.labkey.singlecell.analysis.AbstractSingleCellHandler; import org.labkey.singlecell.analysis.CellRangerRawDataHandler; @@ -76,6 +75,7 @@ import org.labkey.singlecell.pipeline.singlecell.NormalizeAndScale; import org.labkey.singlecell.pipeline.singlecell.PerformDefaultNimbleAppend; import org.labkey.singlecell.pipeline.singlecell.PerformMhcDimRedux; +import org.labkey.singlecell.pipeline.singlecell.PerformTcrClustering; import org.labkey.singlecell.pipeline.singlecell.PhenotypePlots; import org.labkey.singlecell.pipeline.singlecell.PlotAssayFeatures; import org.labkey.singlecell.pipeline.singlecell.PlotAverageCiteSeqCounts; @@ -305,6 +305,7 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new CalculateTcrRepertoireStats.Provider()); SequencePipelineService.get().registerPipelineStep(new PredictTcellActivation.Provider()); SequencePipelineService.get().registerPipelineStep(new IdentifyAndStoreActiveClonotypes.Provider()); + SequencePipelineService.get().registerPipelineStep(new PerformTcrClustering.Provider()); SequenceAnalysisService.get().registerReadsetListener(new SingleCellReadsetListener()); } diff --git a/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java b/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java index 7656f92a2..e9edcf5b3 100644 --- a/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java +++ b/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java @@ -963,6 +963,7 @@ public static String getOutputDescription(JSONObject jsonParams, Logger log, Fil int lowOrNegative = 0; int totalDoublet = 0; double totalSaturation = 0.0; + Set subjectIds = new HashSet<>(); int hashingIdx = -1; int saturationIdx = -1; @@ -972,6 +973,7 @@ public static String getOutputDescription(JSONObject jsonParams, Logger log, Fil int trbIdx = -1; int trdIdx = -1; int trgIdx = -1; + int subjectIdIdx = -1; int totalTNK = 0; int cellsWithTRA = 0; @@ -998,6 +1000,7 @@ public static String getOutputDescription(JSONObject jsonParams, Logger log, Fil trdIdx = Arrays.asList(line).indexOf("TRD"); trgIdx = Arrays.asList(line).indexOf("TRG"); riraIdx = Arrays.asList(line).indexOf("RIRA_Immune_v2.cellclass"); + subjectIdIdx = Arrays.asList(line).indexOf("SubjectIdId"); } else { @@ -1085,6 +1088,15 @@ else if ("NotUsed".equals(val)) } } } + + if (subjectIdIdx > 0) + { + String subjectId = StringUtils.trimToNull(line[subjectIdIdx]); + if (subjectId != null && !"NA".equals(subjectId)) + { + subjectIds.add(subjectId); + } + } } } @@ -1126,6 +1138,11 @@ else if (riraIdx == -1 || traIdx == -1) { descriptions.add("TCR information not present"); } + + if (!subjectIds.isEmpty()) + { + descriptions.add("Distinct SubjectIds: " + subjectIds.size()); + } } catch (IOException e) { diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AbstractTcrClustRStep.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AbstractTcrClustRStep.java new file mode 100644 index 000000000..dd49dc268 --- /dev/null +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AbstractTcrClustRStep.java @@ -0,0 +1,37 @@ +package org.labkey.singlecell.pipeline.singlecell; + +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider; +import org.labkey.api.singlecell.pipeline.AbstractSingleCellPipelineStep; +import org.labkey.api.util.PageFlowUtil; + +import java.util.Collection; + +abstract public class AbstractTcrClustRStep extends AbstractSingleCellPipelineStep +{ + public static String CONTAINER_NAME = "ghcr.io/bimberlabinternal/tcrclustr:latest"; + + public AbstractTcrClustRStep(PipelineStepProvider provider, PipelineContext ctx) + { + super(provider, ctx); + } + + @Override + public Collection getRLibraries() + { + return PageFlowUtil.set("tcrClustR"); + } + + @Override + public String getDockerContainerName() + { + return CONTAINER_NAME; + } + + // NOTE: ExperimentHub and similar packages default to saving data to the user's home dir. Set a directory, to avoid issues when not running the container as root + @Override + public String getDockerHomeDir() + { + return "/dockerHomeDir"; + } +} diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/CalculateUCellScores.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/CalculateUCellScores.java index f8ce868d3..ca98b22da 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/CalculateUCellScores.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/CalculateUCellScores.java @@ -23,13 +23,16 @@ public Provider() { super("CalculateUCellScores", "Calculate UCell Scores", "Seurat", "This will generate UCell scores for a set of pre-defined gene modules", Arrays.asList( SeuratToolParameter.create("storeRanks", "Store Ranks", "Passed directly to UCell::AddModuleScore_UCell.", "checkbox", new JSONObject(){{ - put("checked", true); + put("checked", false); }}, true), SeuratToolParameter.create("assayName", "Assay Name", "Passed directly to UCell::AddModuleScore_UCell.", "textfield", new JSONObject(){{ }}, "RNA"), SeuratToolParameter.create("forceRecalculate", "Force Recalculate", "If checked, the UCell score will always be re-calculated.", "checkbox", new JSONObject(){{ + }}, false), + SeuratToolParameter.create("dropAllExistingUcells", "Drop Existing UCells?", "If checked, this will drop all columns ending in _UCell. This implies forceRecalculate.", "checkbox", new JSONObject(){{ + }}, false) ), null, null); } diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PerformTcrClustering.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PerformTcrClustering.java new file mode 100644 index 000000000..919b64cf0 --- /dev/null +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PerformTcrClustering.java @@ -0,0 +1,47 @@ +package org.labkey.singlecell.pipeline.singlecell; + +import org.json.JSONObject; +import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.singlecell.pipeline.SeuratToolParameter; +import org.labkey.api.singlecell.pipeline.SingleCellStep; + +import java.util.List; + +public class PerformTcrClustering extends AbstractTcrClustRStep +{ + public PerformTcrClustering(PipelineContext ctx, PerformTcrClustering.Provider provider) + { + super(provider, ctx); + } + + public static class Provider extends AbstractPipelineStepProvider + { + public Provider() + { + super("PerformTcrClustering", "tcrClustR", "tcrClustR", "This will run tcrClustR to cluster TCRs by similarity.", List.of( + SeuratToolParameter.create("organism", "Organism", "The organism to use", "ldk-simplecombo", new JSONObject() + {{ + put("multiSelect", false); + put("allowBlank", false); + put("storeValues", "human;rhesus;mouse"); + put("initialValues", "human"); + put("delimiter", ";"); + put("joinReturnValue", true); + }}, null) + ), null, "https://github.com/bimberlabinternal/tcrClustR/"); + } + + @Override + public PerformTcrClustering create(PipelineContext ctx) + { + return new PerformTcrClustering(ctx, this); + } + } + + @Override + public String getFileSuffix() + { + return "tcr"; + } +} diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/VireoHandler.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/VireoHandler.java index 3976c6155..e7a6ae2f2 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/VireoHandler.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/VireoHandler.java @@ -370,7 +370,7 @@ else if (outFiles.length > 1) StringBuilder description = new StringBuilder(); if (vcfFile > -1) { - description.append("Reference VCF ID: \n").append(vcfFile); + description.append("Reference VCF ID: \n").append(vcfFile).append("\n"); } File summary = new File(ctx.getOutputDir(), "summary.tsv"); diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java b/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java index 980918b56..07cd1eeea 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java @@ -116,7 +116,7 @@ private File createNimbleBam(AlignmentOutputImpl output, Readset rs, List private File getCachedLoupeFile(Readset rs, boolean throwIfNotFound) throws PipelineJobException { - Map map = getPipelineCtx().getSequenceSupport().getCachedObject(CACHE_KEY, PipelineJob.createObjectMapper().getTypeFactory().constructParametricType(Map.class, Long.class, Long.class)); + Map map = getPipelineCtx().getSequenceSupport().getCachedObject(CACHE_KEY, PipelineJob.createObjectMapper().getTypeFactory().constructParametricType(HashMap.class, Long.class, Long.class)); Long dataId = map.get(rs.getReadsetId()); if (dataId == null) {