diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3578fca0b..eb3c0794e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -31,6 +31,11 @@ jobs: echo "DEFAULT_BRANCH=${DEFAULT_BRANCH}" >> $GITHUB_ENV id: default-branch + # Note: use slight delay in case there are associated commits across repos + - name: "Sleep for 30 seconds" + run: sleep 30s + shell: bash + - name: "Build DISCVR" uses: bimberlabinternal/DevOps/githubActions/discvr-build@master with: diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh index 8ecd60f37..8aa3df5a5 100755 --- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh @@ -325,11 +325,11 @@ then echo "Cleaning up previous installs" rm -Rf $LKTOOLS_DIR/sawfish* - wget https://github.com/PacificBiosciences/sawfish/releases/download/v2.0.0/sawfish-v2.0.0-x86_64-unknown-linux-gnu.tar.gz - tar -xzf sawfish-v2.0.0-x86_64-unknown-linux-gnu.tar.gz + wget https://github.com/PacificBiosciences/sawfish/releases/download/v2.2.0/sawfish-v2.2.0-x86_64-unknown-linux-gnu.tar.gz + tar -xzf sawfish-v2.2.0-x86_64-unknown-linux-gnu.tar.gz - mv sawfish-v2.0.0-x86_64-unknown-linux-gnu $LKTOOLS_DIR/ - ln -s $LKTOOLS_DIR/sawfish-v2.0.0/bin/sawfish $LKTOOLS_DIR/ + mv sawfish-v2.2.0-x86_64-unknown-linux-gnu $LKTOOLS_DIR/ + ln -s $LKTOOLS_DIR/sawfish-v2.2.0-x86_64-unknown-linux-gnu/bin/sawfish $LKTOOLS_DIR/ else echo "Already installed" fi diff --git a/SequenceAnalysis/resources/queries/sequenceanalysis/sequence_readsets.js b/SequenceAnalysis/resources/queries/sequenceanalysis/sequence_readsets.js index 4556ab827..9c3c14cb4 100644 --- a/SequenceAnalysis/resources/queries/sequenceanalysis/sequence_readsets.js +++ b/SequenceAnalysis/resources/queries/sequenceanalysis/sequence_readsets.js @@ -1,11 +1,15 @@ -/* - * Copyright (c) 2012 LabKey Corporation - * - * Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 - */ +var LABKEY = require("labkey"); + +var triggerHelper = new org.labkey.sequenceanalysis.query.SequenceTriggerHelper(LABKEY.Security.currentUser.id, LABKEY.Security.currentContainer.id); function beforeDelete(row, errors){ if (!this.extraContext.deleteFromServer){ errors._form = 'You cannot directly delete readsets. To delete these records, use the delete button above the readset grid.'; } +} + +function afterInsert(row, errors) { + if (row.sraAccessions) { + triggerHelper.createReaddataForSra(row.rowid, row.sraAccessions); + } } \ No newline at end of file diff --git a/SequenceAnalysis/resources/queries/sequenceanalysis/sequence_readsets/Assigned to Run Lacking Data.qview.xml b/SequenceAnalysis/resources/queries/sequenceanalysis/sequence_readsets/Assigned to Run Lacking Data.qview.xml index 2a9801077..a9bfce5ec 100644 --- a/SequenceAnalysis/resources/queries/sequenceanalysis/sequence_readsets/Assigned to Run Lacking Data.qview.xml +++ b/SequenceAnalysis/resources/queries/sequenceanalysis/sequence_readsets/Assigned to Run Lacking Data.qview.xml @@ -1,6 +1,6 @@ - + diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceImportPanel.js b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceImportPanel.js index 711144294..17212476a 100644 --- a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceImportPanel.js +++ b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceImportPanel.js @@ -2436,7 +2436,7 @@ Ext4.define('SequenceAnalysis.panel.SequenceImportPanel', { },{ xtype: 'textfield', fieldLabel: 'Delimiter', - value: '_', + value: '[_-]', itemId: 'delimiter' }], buttons: [{ @@ -2455,7 +2455,7 @@ Ext4.define('SequenceAnalysis.panel.SequenceImportPanel', { if (prefix) { fg = fg.replace(new RegExp('^' + prefix), ''); } - fg = fg.split(delim); + fg = fg.split(RegExp(delim)); var id = fg[0]; if (Ext4.isNumeric(id)) { r.set('readset', id); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/AlignmentInitTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/AlignmentInitTask.java index 78f6a3332..249832cff 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/AlignmentInitTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/AlignmentInitTask.java @@ -44,6 +44,7 @@ public static class Factory extends AbstractSequenceTaskFactory public Factory() { super(AlignmentInitTask.class); + setLocation("webserver-high-priority"); setJoin(true); } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CreateReferenceLibraryTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CreateReferenceLibraryTask.java index 6e7fcc526..ee424a3a5 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CreateReferenceLibraryTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CreateReferenceLibraryTask.java @@ -165,6 +165,11 @@ public RecordedActionSet run() throws PipelineJobException libraryMembers = new TableSelector(libraryMembersTable, new SimpleFilter(FieldKey.fromString("library_id"), getPipelineJob().getLibraryId()), new Sort("ref_nt_id/name")).getArrayList(ReferenceLibraryMember.class); } + if (libraryMembers == null) + { + throw new PipelineJobException("There are no sequences in the library: " + getPipelineJob().getLibraryId()); + } + getJob().getLogger().info("there are " + libraryMembers.size() + " sequences to process"); //make sure sequence names are unique diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ReadsetInitTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ReadsetInitTask.java index 92ae0ec75..d0e442ef7 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ReadsetInitTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ReadsetInitTask.java @@ -430,6 +430,10 @@ else if (TaskFileManager.InputFileTreatment.compress == inputFileTreatment) moveInputToAnalysisDir(compressed, job, actions, unalteredInputs, outputFiles); } } + else + { + job.getLogger().debug("Input file does not exist, may have already been moved: " + input.getPath()); + } } } else @@ -450,23 +454,40 @@ private static void moveInputToAnalysisDir(File input, SequenceJob job, Collecti File outputDir = job.getAnalysisDirectory(); File output = new File(outputDir, input.getName()); job.getLogger().debug("Destination: " + output.getPath()); + boolean alreadyMoved = false; if (output.exists()) { + job.getLogger().debug("output already exists"); if (unalteredInputs != null && unalteredInputs.contains(output)) { job.getLogger().debug("\tThis input was unaltered during normalization and a copy already exists in the analysis folder so the original will be discarded"); input.delete(); - TaskFileManagerImpl.swapFilesInRecordedActions(job.getLogger(), input, output, actions, job, null); - return; + alreadyMoved = true; } else { - output = new File(outputDir, FileUtil.getBaseName(input.getName()) + ".orig.gz"); - job.getLogger().debug("\tA file with the expected output name already exists, so the original will be renamed: " + output.getPath()); + if (input.length() == output.length() && input.lastModified() == output.lastModified()) + { + job.getLogger().info("Output exists, but has the same size/modified timestamp. Deleting original"); + input.delete(); + alreadyMoved = true; + } + else if (input.exists() && input.length() > output.length() && input.lastModified() == output.lastModified()) + { + job.getLogger().info("Output exists with same timestamp, but with smaller file size. This probably indicates a truncated/failed copy. Deleting this file."); + output.delete(); + } + else + { + throw new PipelineJobException("A file with the expected output name already exists: " + output.getPath()); + } } } - FileUtils.moveFile(input, output); + if (!alreadyMoved) + { + FileUtils.moveFile(input, output); + } if (!output.exists()) { throw new PipelineJobException("Unable to move file: " + input.getPath()); @@ -488,7 +509,7 @@ private static void moveInputToAnalysisDir(File input, SequenceJob job, Collecti TaskFileManagerImpl.swapFilesInRecordedActions(job.getLogger(), input, output, actions, job, null); } - catch (IOException e) + catch (Exception e) { throw new PipelineJobException(e); } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceOutputHandlerInitTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceOutputHandlerInitTask.java index ad8a6011a..905b05169 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceOutputHandlerInitTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceOutputHandlerInitTask.java @@ -35,7 +35,7 @@ public static class Factory extends AbstractTaskFactory 1) + if (readdataToSra.get(accession).size() == 1) + { + SimpleFilter filter = new SimpleFilter(FieldKey.fromString("readset"), rs.getRowId()); + filter.addCondition(FieldKey.fromString("category"), "Readset"); + filter.addCondition(FieldKey.fromString("container"), rs.getContainer()); + filter.addCondition(FieldKey.fromString("dataId"), toMerge.get(0).getFileId1()); + boolean hasMetrics = new TableSelector(SequenceAnalysisSchema.getTable(SequenceAnalysisSchema.TABLE_QUALITY_METRICS), PageFlowUtil.set("RowId"), filter, null).exists(); + if (!hasMetrics) + { + job.getLogger().debug("No existing metrics found for: " + accession); + updatedAccessions.add(accession); + } + } + else { job.getLogger().debug("Consolidating multiple readdata for: " + accession); @@ -299,6 +348,12 @@ public void complete(PipelineJob job, List readsets, List readsets, List toAdd = new ArrayList<>(); toAdd.add(rd.getFileId1()); + if (rd.getFileId2() != null) { toAdd.add(rd.getFileId2()); @@ -384,12 +440,18 @@ public void processFilesRemote(List readsets, JobContext ctx) throws Un File expectedFile1 = ctx.getSequenceSupport().getCachedData(rd.getFileId1()); File expectedFile2 = rd.getFileId2() == null ? null : ctx.getSequenceSupport().getCachedData(rd.getFileId2()); + if (!expectedFile1.getParentFile().exists()) + { + ctx.getLogger().info("Creating folder: " + expectedFile1.getParentFile().getPath()); + expectedFile1.getParentFile().mkdirs(); + } + FastqDumpWrapper wrapper = new FastqDumpWrapper(ctx.getLogger()); Pair files = wrapper.downloadSra(accession, ctx.getOutputDir(), rd.isPairedEnd(), false); long lines1 = SequenceUtil.getLineCount(files.first) / 4; ctx.getJob().getLogger().debug("Reads in " + files.first.getName() + ": " + lines1); - if (lines1 != accessionToReads.get(accession)) + if (accessionToReads.containsKey(accession) && accessionToReads.get(accession) > 0 && lines1 != accessionToReads.get(accession)) { throw new PipelineJobException("Reads found in file, " + lines1 + ", does not match expected: " + accessionToReads.get(accession) + " for file: " + files.first.getPath()); } @@ -398,7 +460,7 @@ public void processFilesRemote(List readsets, JobContext ctx) throws Un { long lines2 = SequenceUtil.getLineCount(files.second) / 4; ctx.getJob().getLogger().debug("Reads in " + files.second.getName() + ": " + lines2); - if (lines2 != accessionToReads.get(accession)) + if (accessionToReads.containsKey(accession) && accessionToReads.get(accession) > 0 && lines2 != accessionToReads.get(accession)) { throw new PipelineJobException("Reads found in file, " + lines2 + ", does not match expected: " + accessionToReads.get(accession) + " for file: " + files.second.getPath()); } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SawfishAnalysis.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SawfishAnalysis.java index 8039ff338..2bb9cf5dc 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SawfishAnalysis.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SawfishAnalysis.java @@ -33,7 +33,7 @@ public static class Provider extends AbstractAnalysisStepProvider args = new ArrayList<>(); args.add(getExe().getPath()); args.add("discover"); args.add("--bam"); - args.add(inputBam.getPath()); + args.add(inputFile.getPath()); // NOTE: sawfish stores the absolute path of the FASTA in the output JSON, so dont rely on working copies: args.add("--ref"); @@ -102,4 +124,41 @@ private File getExe() { return SequencePipelineService.get().getExeForPackage("SAWFISHPATH", "sawfish"); } + + private static class CramToBam extends SamtoolsRunner + { + public CramToBam(Logger log) + { + super(log); + } + + public void convert(File inputCram, File outputBam, File fasta, @Nullable Integer threads) throws PipelineJobException + { + getLogger().info("Converting CRAM to BAM"); + + execute(getParams(inputCram, outputBam, fasta, threads)); + } + + private List getParams(File inputCram, File outputBam, File fasta, @Nullable Integer threads) + { + List params = new ArrayList<>(); + params.add(getSamtoolsPath().getPath()); + params.add("view"); + params.add("-b"); + params.add("-T"); + params.add(fasta.getPath()); + params.add("-o"); + params.add(outputBam.getPath()); + + if (threads != null) + { + params.add("-@"); + params.add(String.valueOf(threads)); + } + + params.add(inputCram.getPath()); + + return params; + } + } } \ No newline at end of file diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SawfishJointCallingHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SawfishJointCallingHandler.java index 9beae27c6..270c1488f 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SawfishJointCallingHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SawfishJointCallingHandler.java @@ -1,7 +1,12 @@ package org.labkey.sequenceanalysis.run.analysis; +import au.com.bytecode.opencsv.CSVWriter; +import htsjdk.samtools.util.IOUtil; import org.apache.commons.io.FileUtils; import org.json.JSONObject; +import org.labkey.api.exp.api.ExpData; +import org.labkey.api.exp.api.ExpRun; +import org.labkey.api.exp.api.ExperimentService; import org.labkey.api.module.ModuleLoader; import org.labkey.api.pipeline.PipelineJob; import org.labkey.api.pipeline.PipelineJobException; @@ -9,6 +14,7 @@ import org.labkey.api.sequenceanalysis.SequenceAnalysisService; import org.labkey.api.sequenceanalysis.SequenceOutputFile; import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; @@ -66,6 +72,38 @@ public SequenceOutputProcessor getProcessor() public static class Processor implements SequenceOutputProcessor { + @Override + public void init(JobContext ctx, List inputFiles, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException + { + try (CSVWriter csv = new CSVWriter(IOUtil.openFileForBufferedUtf8Writing(getSampleCsvFile(ctx)))) + { + for (SequenceOutputFile so : inputFiles) + { + if (so.getRunId() == null) + { + throw new PipelineJobException("Unable to find ExperimentRun for: " + so.getRowid()); + } + + ExpRun run = ExperimentService.get().getExpRun(so.getRunId()); + List inputs = run.getInputDatas("Input BAM File", null); + if (inputs.isEmpty()) + { + throw new PipelineJobException("Unable to find input BAMs for: " + so.getRowid()); + } + else if (inputs.size() > 1) + { + throw new PipelineJobException("More than one input BAM found for ExperimentRun: " + so.getRunId()); + } + + csv.writeNext(new String[]{so.getFile().getParentFile().getPath(), inputs.get(0).getFile().getPath()}); + } + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + @Override public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List inputFiles, JSONObject params, File outputDir, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException { @@ -89,8 +127,6 @@ public void processFilesRemote(List inputFiles, JobContext c outputBaseName = outputBaseName.replaceAll(".vcf$", ""); } - File expectedFinalOutput = new File(ctx.getOutputDir(), outputBaseName + ".vcf.gz"); - File ouputVcf = runSawfishCall(ctx, filesToProcess, genome, outputBaseName); SequenceOutputFile so = new SequenceOutputFile(); @@ -102,6 +138,11 @@ public void processFilesRemote(List inputFiles, JobContext c ctx.addSequenceOutput(so); } + private File getSampleCsvFile(PipelineContext ctx) + { + return new File(ctx.getSourceDirectory(), "sawfish.samples.csv"); + } + private File runSawfishCall(JobContext ctx, List inputs, ReferenceGenome genome, String outputBaseName) throws PipelineJobException { if (inputs.isEmpty()) @@ -126,6 +167,9 @@ private File runSawfishCall(JobContext ctx, List inputs, ReferenceGenome g args.add(sample.getParentFile().getPath()); } + args.add("--sample-csv"); + args.add(getSampleCsvFile(ctx).getPath()); + File outDir = new File(ctx.getOutputDir(), "sawfish"); args.add("--output-dir"); args.add(outDir.getPath()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/RnaSeQCWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/RnaSeQCWrapper.java index aeb02da02..0f21d45d6 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/RnaSeQCWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/RnaSeQCWrapper.java @@ -28,6 +28,8 @@ */ public class RnaSeQCWrapper extends AbstractCommandWrapper { + final static int MAX_WARNINGS = 100; + public RnaSeQCWrapper(@Nullable Logger logger) { super(logger); @@ -122,12 +124,32 @@ public File execute(List inputBams, List sampleIds, @Nullable List } else if (!line.contains("transcript_id")) { - getLogger().info("skipping GTF line " + lineNo + " because it lacks transcript_id"); + if (filteredLines <= MAX_WARNINGS) + { + if (filteredLines == MAX_WARNINGS) + { + getLogger().info("skipping GTF line " + lineNo + " because it lacks transcript_id. No additional warnings will be printed"); + } + else + { + getLogger().info("skipping GTF line " + lineNo + " because it lacks transcript_id"); + } + } filteredLines++; } else if (!line.contains("gene_id")) { - getLogger().info("skipping GTF line " + lineNo + " because it lacks gene_id"); + if (filteredLines <= MAX_WARNINGS) + { + if (filteredLines == MAX_WARNINGS) + { + getLogger().info("skipping GTF line " + lineNo + " because it lacks gene_id. No additional warnings will be printed"); + } + else + { + getLogger().info("skipping GTF line " + lineNo + " because it lacks gene_id"); + } + } filteredLines++; } else diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SNPEffStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SNPEffStep.java index e55bf1915..16903cb05 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SNPEffStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SNPEffStep.java @@ -75,13 +75,17 @@ public static File checkOrCreateIndex(SequenceAnalysisJobSupport support, Logger SnpEffWrapper wrapper = new SnpEffWrapper(log); File snpEffIndexDir = wrapper.getExpectedIndexDir(snpEffBaseDir, genome.getGenomeId(), geneFileId); - if (!snpEffIndexDir.exists()) + if (snpEffIndexDir.exists()) { - wrapper.buildIndex(snpEffBaseDir, genome, geneFile, geneFileId); + log.debug("previously created index found, re-using: " + snpEffIndexDir.getPath()); + return snpEffBaseDir; } - else + + File binFile = new File(snpEffIndexDir, "snpEffectPredictor.bin"); + if (!binFile.exists()) { - log.debug("previously created index found, re-using: " + snpEffIndexDir.getPath()); + log.debug("existing index not found, expected: " + binFile.getPath()); + wrapper.buildIndex(snpEffBaseDir, genome, geneFile, geneFileId); } return snpEffBaseDir; diff --git a/SequenceAnalysis/test/src/org/labkey/test/tests/external/labModules/SequenceTest.java b/SequenceAnalysis/test/src/org/labkey/test/tests/external/labModules/SequenceTest.java index 1e066c54f..6c8f67114 100644 --- a/SequenceAnalysis/test/src/org/labkey/test/tests/external/labModules/SequenceTest.java +++ b/SequenceAnalysis/test/src/org/labkey/test/tests/external/labModules/SequenceTest.java @@ -196,6 +196,44 @@ private void importReadsetMetadata() log("verifying readset count correct"); waitForText("Sequence Readsets"); waitForElement(LabModuleHelper.getNavPanelItem("Sequence Readsets:", _readsetCt.toString())); + + // Repeat, adding SRA accessions: + goToProjectHome(); + waitAndClick(Locator.linkWithText("Plan Sequence Run (Create Readsets)")); + new Window.WindowFinder(getDriver()).withTitle("Create Readsets").waitFor(); + waitAndClickAndWait(Ext4Helper.Locators.ext4Button("Submit")); + + _helper.waitForField("Sample Id", WAIT_FOR_PAGE); + _ext4Helper.clickTabContainingText("Import Spreadsheet"); + waitForText("Copy/Paste Data"); + + setFormElementJS(Locator.name("text"), getIlluminaSRANames()); + + waitAndClick(Ext4Helper.Locators.ext4Button("Upload")); + new Window.WindowFinder(getDriver()).withTitle("Success").waitFor(); + _readsetCt += 3; + assertTextPresent("Success!"); + waitAndClickAndWait(Ext4Helper.Locators.ext4Button("OK")); + + // This is scoped to this workbook: + log("verifying readset count correct"); + waitForText("Sequence Readsets"); + waitAndClickAndWait(LabModuleHelper.getNavPanelItem("Sequence Readsets:", "3")); + + DataRegionTable.DataRegion(getDriver()).withName("query").waitFor(); + + //verify CSV file creation + DataRegionTable.DataRegion(getDriver()).find().goToView("SRA Info"); + DataRegionTable dr = DataRegionTable.DataRegion(getDriver()).withName("query").waitFor(); + waitForElement(Locator.tagContainingText("a", "SRA0")); + waitForElement(Locator.tagContainingText("a", "SRA1")); + waitForElement(Locator.tagContainingText("a", "SRA2")); + + dr.checkAllOnPage(); + dr.clickHeaderButtonAndWait("Delete"); + clickButton("OK"); + + _readsetCt -= 3; } /** @@ -330,6 +368,18 @@ private String getIlluminaNames() return sb.toString(); } + private String getIlluminaSRANames() + { + StringBuilder sb = new StringBuilder("Name\tPlatform\tsraAccessions\n"); + int i = 0; + while (i < 3) + { + sb.append("IlluminaSRA" + (i + 1) + "\tILLUMINA\tSRA" + i + "\n"); + i++; + } + return sb.toString(); + } + /** * This test will kick off a pipeline import using the illumina pipeline. Verification of the result * is performed by readsetFeaturesTest() diff --git a/Studies/api-src/org/labkey/api/studies/StudiesService.java b/Studies/api-src/org/labkey/api/studies/StudiesService.java index 848fde966..700355741 100644 --- a/Studies/api-src/org/labkey/api/studies/StudiesService.java +++ b/Studies/api-src/org/labkey/api/studies/StudiesService.java @@ -2,6 +2,7 @@ import org.labkey.api.data.Container; import org.labkey.api.data.TableCustomizer; +import org.labkey.api.data.triggers.TriggerFactory; import org.labkey.api.module.Module; import org.labkey.api.resource.Resource; import org.labkey.api.security.User; @@ -37,4 +38,6 @@ static public void setInstance(StudiesService instance) abstract public List getEventProviders(Container c); abstract public TableCustomizer getStudiesTableCustomizer(); + + abstract public TriggerFactory getStudiesTriggerFactory(); } diff --git a/Studies/resources/schemas/studies.xml b/Studies/resources/schemas/studies.xml index 5dbd8d185..c28038ff9 100644 --- a/Studies/resources/schemas/studies.xml +++ b/Studies/resources/schemas/studies.xml @@ -140,6 +140,7 @@ Study Name + false Label @@ -196,6 +197,7 @@ DETAILED + Cohort ID true false false @@ -207,7 +209,7 @@ studies studies rowId - label + labelOrName @@ -286,7 +288,7 @@ studies studies rowId - label + labelOrName @@ -351,7 +353,7 @@ studies studies rowId - label + labelOrName @@ -360,7 +362,7 @@ studies studyCohorts rowId - label + labelOrName diff --git a/Studies/resources/views/studiesOverview.html b/Studies/resources/views/studiesOverview.html index 2d1d6a280..5fd144f1e 100644 --- a/Studies/resources/views/studiesOverview.html +++ b/Studies/resources/views/studiesOverview.html @@ -43,15 +43,15 @@ }); dataByCategory[row['CategoryId/Label']] = dataByCategory[row['CategoryId/Label']] || [] - dataByCategory[row['CategoryId/Label']].push(`
  • ${row.Label}
  • `) + dataByCategory[row['CategoryId/Label']].push($(`
  • ${row.Label}
  • `)) }) Object.keys(dataByCategory).sort().forEach(category => { div.append('

    ' + category + '

    ').append('
      ') + const ul = div.children('ul')[0] dataByCategory[category].forEach(item => { - div.append(item) + ul.append(item[0]) }) - div.append('
    ') }) $('#' + webpart.wrapperDivId).append(div) diff --git a/Studies/src/org/labkey/studies/StudiesManager.java b/Studies/src/org/labkey/studies/StudiesManager.java index 4c0d087bb..eedaedba3 100644 --- a/Studies/src/org/labkey/studies/StudiesManager.java +++ b/Studies/src/org/labkey/studies/StudiesManager.java @@ -226,7 +226,7 @@ private Map studyToMap(StudyDefinition s) Map m = new HashMap<>(); if (s.getRowId() != null) m.put("rowId", s.getRowId()); - m.put("name", s.getStudyName()); + m.put("studyName", s.getStudyName()); m.put("label", s.getLabel()); m.put("category", s.getCategory()); m.put("description", s.getDescription()); diff --git a/Studies/src/org/labkey/studies/StudiesServiceImpl.java b/Studies/src/org/labkey/studies/StudiesServiceImpl.java index e19ed526a..93f35a867 100644 --- a/Studies/src/org/labkey/studies/StudiesServiceImpl.java +++ b/Studies/src/org/labkey/studies/StudiesServiceImpl.java @@ -5,6 +5,7 @@ import org.labkey.api.data.Container; import org.labkey.api.data.TableCustomizer; import org.labkey.api.data.TableInfo; +import org.labkey.api.data.triggers.TriggerFactory; import org.labkey.api.module.Module; import org.labkey.api.pipeline.PipeRoot; import org.labkey.api.pipeline.PipelineService; @@ -26,6 +27,7 @@ import org.labkey.api.util.logging.LogHelper; import org.labkey.studies.query.StudiesTableCustomizer; import org.labkey.vfs.FileLike; +import org.labkey.studies.query.StudiesTriggerFactory; import java.io.FileNotFoundException; import java.io.IOException; @@ -51,6 +53,12 @@ private StudiesServiceImpl() } + @Override + public TriggerFactory getStudiesTriggerFactory() + { + return new StudiesTriggerFactory(); + } + @Override public void importFolderDefinition(Container container, User user, Module m, Path sourceFolderDirPath) throws IOException { diff --git a/Studies/src/org/labkey/studies/query/StudiesTriggerFactory.java b/Studies/src/org/labkey/studies/query/StudiesTriggerFactory.java index b8805dd35..c2ca102f2 100644 --- a/Studies/src/org/labkey/studies/query/StudiesTriggerFactory.java +++ b/Studies/src/org/labkey/studies/query/StudiesTriggerFactory.java @@ -38,49 +38,71 @@ public void beforeInsert(TableInfo table, Container c, User user, @Nullable Map< @Override public void beforeInsert(TableInfo table, Container c, User user, @Nullable Map newRow, ValidationException errors, Map extraContext, @Nullable Map existingRecord) throws ValidationException { - possiblyResolveStudy(newRow, c); + possiblyResolveStudy(table, newRow, existingRecord, c); } @Override public void beforeUpdate(TableInfo table, Container c, User user, @Nullable Map newRow, @Nullable Map oldRow, ValidationException errors, Map extraContext) throws ValidationException { - possiblyResolveStudy(newRow, c); + possiblyResolveStudy(table, newRow, oldRow, c); } /** * This allows incoming data to specify the study using the string name, which is resolved into the rowId */ - private void possiblyResolveStudy(@Nullable Map row, Container c) + private void possiblyResolveStudy(TableInfo table, @Nullable Map row, @Nullable Map oldRow, Container c) { if (row == null) { return; } - possiblyResolveStudy(row, c, "studyId"); - if (row.get("studyId") == null & row.get("studyName") != null) + if (table.getColumn("studyId") != null) { - possiblyResolveStudy(row, c, "studyName"); + possiblyResolveStudy(row, c, "studyId"); + if (row.get("studyId") == null & row.get("studyName") != null) + { + possiblyResolveStudy(row, c, "studyName"); + } + } + + if (table.getColumn("cohortId") != null) + { + possiblyResolveCohort(row, c, "cohortId"); + if (row.get("cohortId") == null & row.get("cohortName") != null) + { + possiblyResolveCohort(row, c, "cohortName"); + } } } private void possiblyResolveStudy(@Nullable Map row, Container c, String sourceProperty) + { + possiblyResolveStudyOrCohort(StudiesSchema.TABLE_STUDIES, row, c, sourceProperty, "studyId", "studyName"); + } + + private void possiblyResolveCohort(@Nullable Map row, Container c, String sourceProperty) + { + possiblyResolveStudyOrCohort(StudiesSchema.TABLE_COHORTS, row, c, sourceProperty, "cohortId", "cohortName"); + } + + private void possiblyResolveStudyOrCohort(String tableToQuery, @Nullable Map row, Container c, String sourceProperty, String targetFieldName, String filterFieldName) { if (row == null) { return; } - if (row.get(sourceProperty) != null & row.get(sourceProperty) instanceof String) + if (row.get(sourceProperty) != null & row.get(sourceProperty) instanceof String & !String.valueOf(row.get(sourceProperty)).isEmpty()) { if (!NumberUtils.isCreatable(row.get(sourceProperty).toString())) { Container target = c.isWorkbookOrTab() ? c.getParent() : c; - SimpleFilter filter = new SimpleFilter(FieldKey.fromString("container"), target.getEntityId()).addCondition(FieldKey.fromString("studyName"), row.get(sourceProperty)); - List rowIds = new TableSelector(StudiesSchema.getInstance().getSchema().getTable(StudiesSchema.TABLE_STUDIES), PageFlowUtil.set("rowId"), filter, null).getArrayList(Integer.class); + SimpleFilter filter = new SimpleFilter(FieldKey.fromString("container"), target.getEntityId()).addCondition(FieldKey.fromString(filterFieldName), row.get(sourceProperty)); + List rowIds = new TableSelector(StudiesSchema.getInstance().getSchema().getTable(tableToQuery), PageFlowUtil.set("rowId"), filter, null).getArrayList(Integer.class); if (rowIds.size() == 1) { - row.put("studyId", rowIds.get(0)); + row.put(targetFieldName, rowIds.get(0)); } } } diff --git a/Studies/src/org/labkey/studies/query/StudiesUserSchema.java b/Studies/src/org/labkey/studies/query/StudiesUserSchema.java index 928d882c6..00401b524 100644 --- a/Studies/src/org/labkey/studies/query/StudiesUserSchema.java +++ b/Studies/src/org/labkey/studies/query/StudiesUserSchema.java @@ -138,11 +138,11 @@ else if (TABLE_LOOKUPS.equalsIgnoreCase(name)) } else if (TABLE_STUDIES.equalsIgnoreCase(name)) { - return createStudiesTable(name, cf, false); + return createStudiesTable(name, cf); } else if (TABLE_COHORTS.equalsIgnoreCase(name)) { - return createStudyDesignTable(name, cf, true); + return createCohortsTable(name, cf); } else if (TABLE_ANCHOR_EVENTS.equalsIgnoreCase(name)) { @@ -169,18 +169,42 @@ else if (TABLE_EVENT_TYPES.equalsIgnoreCase(name)) return super.createTable(name, cf); } - private TableInfo createStudiesTable(String name, ContainerFilter cf, boolean addTriggers) + private TableInfo createCohortsTable(String name, ContainerFilter cf) + { + CustomPermissionsTable ret = createStudyDesignTable(name, cf, true); + + SQLFragment lastTerm = ret.getSqlDialect().concatenate(new SQLFragment("'Cohort-'"), new SQLFragment("CAST(" + ExprColumn.STR_TABLE_ALIAS + ".rowId AS VARCHAR)")); + SQLFragment sql2 = new SQLFragment("coalesce(" + ExprColumn.STR_TABLE_ALIAS + ".label, " + ExprColumn.STR_TABLE_ALIAS + ".cohortName, ").append(lastTerm).append(new SQLFragment(")")); + ExprColumn col2 = new ExprColumn(ret, "labelOrName", sql2, JdbcType.VARCHAR, ret.getColumn("cohortName"), ret.getColumn("label")); + col2.setLabel("Cohort Name"); + col2.setHidden(true); + col2.setDescription("This column lists the cohort label, and the name if label is blank"); + + ret.addColumn(col2); + + return ret; + } + + private TableInfo createStudiesTable(String name, ContainerFilter cf) { - CustomPermissionsTable ret = createStudyDesignTable(name, cf, addTriggers); + CustomPermissionsTable ret = createStudyDesignTable(name, cf, false); final String chr = ret.getSqlDialect().isPostgreSQL() ? "chr" : "char"; - SQLFragment sql1 = new SQLFragment("(SELECT ").append(ret.getSqlDialect().getGroupConcat(new SQLFragment("c.label"), true, true, new SQLFragment(chr + "(10)"))).append(" as expr FROM " + StudiesSchema.NAME + "." + TABLE_COHORTS + " c WHERE c.studyId = " + ExprColumn.STR_TABLE_ALIAS + ".rowId)"); + SQLFragment sql1 = new SQLFragment("(SELECT ").append(ret.getSqlDialect().getGroupConcat(new SQLFragment("coalesce(c.label, c.cohortName)"), true, true, new SQLFragment(chr + "(10)"))).append(" as expr FROM " + StudiesSchema.NAME + "." + TABLE_COHORTS + " c WHERE c.studyId = " + ExprColumn.STR_TABLE_ALIAS + ".rowId)"); ExprColumn col1 = new ExprColumn(ret, "cohorts", sql1, JdbcType.VARCHAR, ret.getColumn("rowid")); col1.setLabel("Cohort(s)"); col1.setDescription("This column lists the cohort labels for this study"); ret.addColumn(col1); + SQLFragment sql2 = new SQLFragment("coalesce(" + ExprColumn.STR_TABLE_ALIAS + ".label, " + ExprColumn.STR_TABLE_ALIAS + ".studyName)"); + ExprColumn col2 = new ExprColumn(ret, "labelOrName", sql2, JdbcType.VARCHAR, ret.getColumn("studyName"), ret.getColumn("label")); + col2.setLabel("Study Name"); + col2.setHidden(true); + col2.setDescription("This column lists the study label, and the name if label is blank"); + + ret.addColumn(col2); + return ret; } diff --git a/discvrcore/src/org/labkey/discvrcore/DiscvrCoreController.java b/discvrcore/src/org/labkey/discvrcore/DiscvrCoreController.java index cd83a749c..f948c987c 100644 --- a/discvrcore/src/org/labkey/discvrcore/DiscvrCoreController.java +++ b/discvrcore/src/org/labkey/discvrcore/DiscvrCoreController.java @@ -58,7 +58,6 @@ import java.util.Map; import java.util.TreeMap; -import static javax.swing.Spring.width; import static org.labkey.api.util.DOM.Attribute.valign; import static org.labkey.api.util.DOM.at; import static org.labkey.api.util.DOM.cl; diff --git a/jbrowse/package-lock.json b/jbrowse/package-lock.json index b7ba7390c..f0e5f599c 100644 --- a/jbrowse/package-lock.json +++ b/jbrowse/package-lock.json @@ -3176,9 +3176,10 @@ } }, "node_modules/@labkey/api": { - "version": "1.42.1", - "resolved": "https://labkey.jfrog.io/artifactory/api/npm/libs-client/@labkey/api/-/@labkey/api-1.42.1.tgz", - "integrity": "sha512-rT+Q/ZM6bE6bU8HDj/7f3DIFuq538e+LZAvBw8P3qJjuAnyO+O+ItZz/YukAKCXXiN2GdedOXDJbt1Ms0bgLsg==" + "version": "1.43.0", + "resolved": "https://labkey.jfrog.io/artifactory/api/npm/libs-client/@labkey/api/-/@labkey/api-1.43.0.tgz", + "integrity": "sha512-4hOQz+pM/QaCey6ooJEmEbElnR9+TDEzWG+8caFfeIX1iAg1335NXW3+/Xzs6a+L9ysRKds8bNgFPu2sxjPzfg==", + "license": "Apache-2.0" }, "node_modules/@labkey/build": { "version": "8.6.0", @@ -3217,12 +3218,13 @@ } }, "node_modules/@labkey/components": { - "version": "6.58.5", - "resolved": "https://labkey.jfrog.io/artifactory/api/npm/libs-client/@labkey/components/-/@labkey/components-6.58.5.tgz", - "integrity": "sha512-N+WHs9QDAkJ5p2NIwCpWSp3O0Q5bNeBXqoizaZRZh9uEM9iSxg0I4GD/dd1jCk1PFGHs05mPLZ13yrPeixDiHQ==", + "version": "6.63.0", + "resolved": "https://labkey.jfrog.io/artifactory/api/npm/libs-client/@labkey/components/-/@labkey/components-6.63.0.tgz", + "integrity": "sha512-E1tFkF6/erpzefo3b2Ot+jwFSlgRfeGalIuHGKXENPrubHOr5XkLXmu9lgGeyPD95fCx9Ff3dNHGxiokxsupwQ==", + "license": "SEE LICENSE IN LICENSE.txt", "dependencies": { "@hello-pangea/dnd": "18.0.1", - "@labkey/api": "1.42.1", + "@labkey/api": "1.43.0", "@testing-library/dom": "~10.4.0", "@testing-library/jest-dom": "~6.6.3", "@testing-library/react": "~16.3.0", @@ -4253,6 +4255,12 @@ "@types/node": "*" } }, + "node_modules/@types/pako": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/@types/pako/-/pako-2.0.4.tgz", + "integrity": "sha512-VWDCbrLeVXJM9fihYodcLiIv0ku+AlOa/TQ1SvYOaBuyrSKgEcro95LJyIsJ4vSo6BXIxOKxiJAat04CmST9Fw==", + "license": "MIT" + }, "node_modules/@types/parse-json": { "version": "4.0.2", "license": "MIT" @@ -4761,16 +4769,6 @@ "util": "^0.12.5" } }, - "node_modules/atob": { - "version": "2.1.2", - "license": "(MIT OR Apache-2.0)", - "bin": { - "atob": "bin/atob.js" - }, - "engines": { - "node": ">= 4.5.0" - } - }, "node_modules/available-typed-arrays": { "version": "1.0.7", "license": "MIT", @@ -5219,16 +5217,6 @@ "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" } }, - "node_modules/btoa": { - "version": "1.2.1", - "license": "(MIT OR Apache-2.0)", - "bin": { - "btoa": "bin/btoa.js" - }, - "engines": { - "node": ">= 0.4.0" - } - }, "node_modules/buffer": { "version": "6.0.3", "funding": [ @@ -6736,6 +6724,23 @@ "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==", "dev": true }, + "node_modules/fast-png": { + "version": "6.4.0", + "resolved": "https://registry.npmjs.org/fast-png/-/fast-png-6.4.0.tgz", + "integrity": "sha512-kAqZq1TlgBjZcLr5mcN6NP5Rv4V2f22z00c3g8vRrwkcqjerx7BEhPbOnWCPqaHUl2XWQBJQvOT/FQhdMT7X/Q==", + "license": "MIT", + "dependencies": { + "@types/pako": "^2.0.3", + "iobuffer": "^5.3.2", + "pako": "^2.1.0" + } + }, + "node_modules/fast-png/node_modules/pako": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/pako/-/pako-2.1.0.tgz", + "integrity": "sha512-w+eufiZ1WuJYgPXbV/PO3NCMEc3xqylkKHzp8bxp1uW4qaSNQUkwmLLEc3kKsfz8lpV1F8Ht3U1Cm+9Srog2ug==", + "license": "(MIT AND Zlib)" + }, "node_modules/fast-uri": { "version": "3.0.1", "license": "MIT" @@ -7723,6 +7728,12 @@ "node": ">=10.13.0" } }, + "node_modules/iobuffer": { + "version": "5.4.0", + "resolved": "https://registry.npmjs.org/iobuffer/-/iobuffer-5.4.0.tgz", + "integrity": "sha512-DRebOWuqDvxunfkNJAlc3IzWIPD5xVxwUNbHr7xKB8E6aLJxIPfNX3CoMJghcFjpv6RWQsrcJbghtEwSPoJqMA==", + "license": "MIT" + }, "node_modules/ipaddr.js": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.2.0.tgz", @@ -8126,13 +8137,13 @@ } }, "node_modules/jspdf": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/jspdf/-/jspdf-3.0.1.tgz", - "integrity": "sha512-qaGIxqxetdoNnFQQXxTKUD9/Z7AloLaw94fFsOiJMxbfYdBbrBuhWmbzI8TVjrw7s3jBY1PFHofBKMV/wZPapg==", + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/jspdf/-/jspdf-3.0.3.tgz", + "integrity": "sha512-eURjAyz5iX1H8BOYAfzvdPfIKK53V7mCpBTe7Kb16PaM8JSXEcUQNBQaiWMI8wY5RvNOPj4GccMjTlfwRBd+oQ==", + "license": "MIT", "dependencies": { - "@babel/runtime": "^7.26.7", - "atob": "^2.1.2", - "btoa": "^1.2.1", + "@babel/runtime": "^7.26.9", + "fast-png": "^6.2.0", "fflate": "^0.8.1" }, "optionalDependencies": { diff --git a/jbrowse/src/client/JBrowse/Browser/plugins/ExtendedVariantPlugin/ExtendedVariantAdapter/ExtendedVariantAdapter.ts b/jbrowse/src/client/JBrowse/Browser/plugins/ExtendedVariantPlugin/ExtendedVariantAdapter/ExtendedVariantAdapter.ts index d7973773b..035790c92 100644 --- a/jbrowse/src/client/JBrowse/Browser/plugins/ExtendedVariantPlugin/ExtendedVariantAdapter/ExtendedVariantAdapter.ts +++ b/jbrowse/src/client/JBrowse/Browser/plugins/ExtendedVariantPlugin/ExtendedVariantAdapter/ExtendedVariantAdapter.ts @@ -1,14 +1,57 @@ import QuickLRU from '@jbrowse/core/util/QuickLRU'; -import { BaseOptions } from '@jbrowse/core/data_adapters/BaseAdapter'; +import { BaseOptions, BaseFeatureDataAdapter } from '@jbrowse/core/data_adapters/BaseAdapter'; import { NoAssemblyRegion } from '@jbrowse/core/util/types'; import { ObservableCreate } from '@jbrowse/core/util/rxjs'; import { Feature } from '@jbrowse/core/util/simpleFeature'; import ExtendedVcfFeature from './ExtendedVcfFeature'; import { VcfFeature } from '@jbrowse/plugin-variants'; -import { default as VcfTabixAdapter } from './VcfTabixAdapter'; -export default class extends VcfTabixAdapter { +export default class extends BaseFeatureDataAdapter { protected featureCache = new QuickLRU({ maxSize: 20 }) + private subAdapterP?: Promise + + constructor(...args: any[]) { + super(...args) + + // Return a Proxy that forwards any unknown member access to the sub-adapter. + // This avoids re-implementing methods like getHeader/getRefNames/getMetadata/etc. + const self = this + return new Proxy(this, { + get(target, prop, receiver) { + // If we have it already (e.g., getFeatures, getFeaturesAsArray, BaseFeatureDataAdapter-derived properties), use it directly + if (prop in target || typeof prop === 'symbol') { + return Reflect.get(target, prop, receiver) + } + + // Otherwise, forward to the VcfTabixAdapter sub-adapter + return async (...callArgs: any[]) => { + const sub = await self.getVcfSubAdapter() + const value = (sub as any)[prop] + + // If it’s a method, call it; otherwise return the property value + if (typeof value === 'function') { + return value.apply(sub, callArgs) + } + return value + } + }, + }) + } + + private async getVcfSubAdapter(): Promise { + if (!this.subAdapterP) { + const vcfGzLocation = this.getConf('vcfGzLocation') + const index = this.getConf(['index']) + const vcfAdapterConf = { type: 'VcfTabixAdapter', vcfGzLocation, index } + this.subAdapterP = this.getSubAdapter!(vcfAdapterConf) + .then(({ dataAdapter }) => dataAdapter) + .catch(e => { + this.subAdapterP = undefined + throw e + }) + } + return this.subAdapterP + } public getFeatures(query: NoAssemblyRegion, opts: BaseOptions = {}) { return ObservableCreate(async observer => { @@ -50,4 +93,29 @@ export default class extends VcfTabixAdapter { return features } + + // Typescript errors at compile time without these stubs + async configure(opts?: BaseOptions) { + const sub = await this.getVcfSubAdapter() + return sub.configure(opts) + } + + async getRefNames(opts: BaseOptions = {}) { + const sub = await this.getVcfSubAdapter() + return sub.getRefNames(opts) + } + + async getHeader(opts?: BaseOptions) { + const sub = await this.getVcfSubAdapter() + return sub.getHeader(opts) + } + + async getMetadata(opts?: BaseOptions) { + const sub = await this.getVcfSubAdapter() + return sub.getMetadata(opts) + } + + freeResources(): void { + void this.getVcfSubAdapter().then(sub => sub.freeResources?.()) + } } \ No newline at end of file diff --git a/jbrowse/src/client/JBrowse/Browser/plugins/ExtendedVariantPlugin/ExtendedVariantAdapter/VcfTabixAdapter.ts b/jbrowse/src/client/JBrowse/Browser/plugins/ExtendedVariantPlugin/ExtendedVariantAdapter/VcfTabixAdapter.ts deleted file mode 100644 index fca6a3e64..000000000 --- a/jbrowse/src/client/JBrowse/Browser/plugins/ExtendedVariantPlugin/ExtendedVariantAdapter/VcfTabixAdapter.ts +++ /dev/null @@ -1,159 +0,0 @@ -import { TabixIndexedFile } from '@gmod/tabix' -import VcfParser from '@gmod/vcf' -import { BaseFeatureDataAdapter } from '@jbrowse/core/data_adapters/BaseAdapter' -import { - fetchAndMaybeUnzipText, - updateStatus, -} from '@jbrowse/core/util' -import { openLocation } from '@jbrowse/core/util/io' -import { ObservableCreate } from '@jbrowse/core/util/rxjs' - -import type { BaseOptions } from '@jbrowse/core/data_adapters/BaseAdapter' -import type { Feature } from '@jbrowse/core/util' -import type { NoAssemblyRegion } from '@jbrowse/core/util/types' -import { VcfFeature } from '@jbrowse/plugin-variants'; - -function shorten2(name: string, max = 70) { - return name.length > max ? `${name.slice(0, max)}...` : name -} - -export default class VcfTabixAdapter extends BaseFeatureDataAdapter { - private configured?: Promise<{ - vcf: TabixIndexedFile - parser: VcfParser - }> - - private async configurePre(_opts?: BaseOptions) { - const vcfGzLocation = this.getConf('vcfGzLocation') - const location = this.getConf(['index', 'location']) - const indexType = this.getConf(['index', 'indexType']) - - const filehandle = openLocation(vcfGzLocation, this.pluginManager) - const isCSI = indexType === 'CSI' - const vcf = new TabixIndexedFile({ - filehandle, - csiFilehandle: isCSI - ? openLocation(location, this.pluginManager) - : undefined, - tbiFilehandle: !isCSI - ? openLocation(location, this.pluginManager) - : undefined, - chunkCacheSize: 50 * 2 ** 20, - }) - - return { - vcf, - parser: new VcfParser({ - header: await vcf.getHeader(), - }), - } - } - - protected async configurePre2() { - if (!this.configured) { - this.configured = this.configurePre().catch((e: unknown) => { - this.configured = undefined - throw e - }) - } - return this.configured - } - - async configure(opts?: BaseOptions) { - const { statusCallback = () => {} } = opts || {} - return updateStatus('Downloading index', statusCallback, () => - this.configurePre2(), - ) - } - public async getRefNames(opts: BaseOptions = {}) { - const { vcf } = await this.configure(opts) - return vcf.getReferenceSequenceNames(opts) - } - - async getHeader(opts?: BaseOptions) { - const { vcf } = await this.configure(opts) - return vcf.getHeader() - } - - async getMetadata(opts?: BaseOptions) { - const { parser } = await this.configure(opts) - return parser.getMetadata() - } - - public getFeatures(query: NoAssemblyRegion, opts: BaseOptions = {}) { - return ObservableCreate(async observer => { - const { refName, start, end } = query - const { statusCallback = () => {} } = opts - const { vcf, parser } = await this.configure(opts) - - await updateStatus('Downloading variants', statusCallback, () => - vcf.getLines(refName, start, end, { - lineCallback: (line, fileOffset) => { - observer.next( - new VcfFeature({ - variant: parser.parseLine(line), - parser, - id: `${this.id}-vcf-${fileOffset}`, - }), - ) - }, - ...opts, - }), - ) - observer.complete() - }, opts.stopToken) - } - - async getSources() { - const conf = this.getConf('samplesTsvLocation') - if (conf.uri === '' || conf.uri === '/path/to/samples.tsv') { - const { parser } = await this.configure() - return parser.samples.map(name => ({ - name, - })) - } else { - const txt = await fetchAndMaybeUnzipText( - openLocation(conf, this.pluginManager), - ) - const lines = txt.split(/\n|\r\n|\r/) - const header = lines[0]!.split('\t') - const { parser } = await this.configure() - const metadataLines = lines - .slice(1) - .filter(f => !!f) - .map(line => { - const [name, ...rest] = line.split('\t') - return { - ...Object.fromEntries( - // force col 0 to be called name - rest.map((c, idx) => [header[idx + 1]!, c] as const), - ), - name: name!, - } - }) - const vcfSampleSet = new Set(parser.samples) - const metadataSet = new Set(metadataLines.map(r => r.name)) - const metadataNotInVcfSamples = [...metadataSet].filter( - f => !vcfSampleSet.has(f), - ) - const vcfSamplesNotInMetadata = [...vcfSampleSet].filter( - f => !metadataSet.has(f), - ) - if (metadataNotInVcfSamples.length) { - console.warn( - `There are ${metadataNotInVcfSamples.length} samples in metadata file (${metadataLines.length} lines) not in VCF (${parser.samples.length} samples):`, - shorten2(metadataNotInVcfSamples.join(',')), - ) - } - if (vcfSamplesNotInMetadata.length) { - console.warn( - `There are ${vcfSamplesNotInMetadata.length} samples in VCF file (${parser.samples.length} samples) not in metadata file (${metadataLines.length} lines):`, - shorten2(vcfSamplesNotInMetadata.map(m => m).join(',')), - ) - } - return metadataLines.filter(f => vcfSampleSet.has(f.name)) - } - } - - public freeResources(/* { region } */): void {} -} diff --git a/singlecell/resources/chunks/CommonFilters.R b/singlecell/resources/chunks/CommonFilters.R index 7e76aafbc..94aef6815 100644 --- a/singlecell/resources/chunks/CommonFilters.R +++ b/singlecell/resources/chunks/CommonFilters.R @@ -171,7 +171,10 @@ for (datasetId in names(seuratObjects)) { } toDrop <- is.na(seuratObj@meta.data$scGateConsensus) - if (sum(toDrop) > 0) { + if (sum(toDrop) == ncol(seuratObj)) { + print(paste0('There were no cells remaining after dropping cells without a scGateConsensus value')) + seuratObj <- NULL + } else if (sum(toDrop) > 0) { cells <- colnames(seuratObj)[!is.na(seuratObj@meta.data$scGateConsensus)] seuratObj <- subset(seuratObj, cells = cells) print(paste0('After dropping cells without scGateConsensus: ', length(colnames(x = seuratObj)))) diff --git a/singlecell/resources/chunks/IdentifyAndStoreActiveClonotypes.R b/singlecell/resources/chunks/IdentifyAndStoreActiveClonotypes.R new file mode 100644 index 000000000..0e4d474c2 --- /dev/null +++ b/singlecell/resources/chunks/IdentifyAndStoreActiveClonotypes.R @@ -0,0 +1,22 @@ +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getenv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) +} + +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc)) +Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) + +for (datasetId in names(seuratObjects)) { + printName(datasetId) + seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) + + Rdiscvr::IdentifyAndStoreActiveClonotypes(seuratObj, chain = 'TRA', storeStimLevelData = FALSE) + Rdiscvr::IdentifyAndStoreActiveClonotypes(seuratObj, chain = 'TRB') + + saveData(seuratObj, datasetId) + + # Cleanup + rm(seuratObj) + gc() +} \ No newline at end of file diff --git a/singlecell/resources/chunks/RunCsCore.R b/singlecell/resources/chunks/RunCsCore.R deleted file mode 100644 index b7c7fd7a8..000000000 --- a/singlecell/resources/chunks/RunCsCore.R +++ /dev/null @@ -1,14 +0,0 @@ -for (datasetId in names(seuratObjects)) { - printName(datasetId) - seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) - - outFile <- paste0(outputPrefix, '.', makeLegalFileName(datasetId), '.markers.txt') - module_list <- CellMembrane::RunCsCore(seuratObj, saveFile = paste0(outFile, '.cscore.rds')) - saveRDS(module_list, paste0(outFile, '.cscore.wgcna.rds')) - - saveData(seuratObj, datasetId) - - # Cleanup - rm(seuratObj) - gc() -} \ No newline at end of file diff --git a/singlecell/resources/chunks/RunTricycle.R b/singlecell/resources/chunks/RunTricycle.R deleted file mode 100644 index b04162f1d..000000000 --- a/singlecell/resources/chunks/RunTricycle.R +++ /dev/null @@ -1,12 +0,0 @@ - for (datasetId in names(seuratObjects)) { - printName(datasetId) - seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) - - seuratObj <- CellMembrane::RunTricycle(seuratObj) - - saveData(seuratObj, datasetId) - - # Cleanup - rm(seuratObj) - gc() -} \ No newline at end of file diff --git a/singlecell/resources/chunks/StudyMetadata.R b/singlecell/resources/chunks/StudyMetadata.R index f5fb694ff..45bb03cef 100644 --- a/singlecell/resources/chunks/StudyMetadata.R +++ b/singlecell/resources/chunks/StudyMetadata.R @@ -32,6 +32,8 @@ for (datasetId in names(seuratObjects)) { seuratObj <- Rdiscvr::ApplyEC_Metadata(seuratObj, errorIfUnknownIdsFound = errorIfUnknownIdsFound) } else if (studyName == 'PPG_Stims') { seuratObj <- Rdiscvr::ApplyPPG_Stim_Metadata(seuratObj, errorIfUnknownIdsFound = errorIfUnknownIdsFound) + ] else if (studyName == 'IMPAC_TB_Human') { + seuratObj <- Rdiscvr::ApplyIMPAC_TB_Human_Metadata(seuratObj, errorIfUnknownIdsFound = errorIfUnknownIdsFound) } else { stop(paste0('Unknown study: ', studyName)) } diff --git a/singlecell/resources/queries/singlecell/samples.js b/singlecell/resources/queries/singlecell/samples.js index f66be9f90..6b3ec2600 100644 --- a/singlecell/resources/queries/singlecell/samples.js +++ b/singlecell/resources/queries/singlecell/samples.js @@ -23,7 +23,7 @@ function beforeUpsert(row, oldRow, errors){ else if (['No stim', 'No Stim'].indexOf(row.stim) !== -1){ row.stim = 'NoStim'; } - else if (['Infected cells: SIV+', 'Infected Cells: SIV+'].indexOf(row.stim) !== -1){ + else if (['SIV+', 'Infected cells: SIV+', 'Infected Cells: SIV+'].indexOf(row.stim) !== -1){ row.stim = 'SIV-Infected CD4s'; } else if (['Infected cells: SIV-', 'Infected Cells: SIV-'].indexOf(row.stim) !== -1){ diff --git a/singlecell/src/org/labkey/singlecell/SingleCellModule.java b/singlecell/src/org/labkey/singlecell/SingleCellModule.java index 29ec88138..92efcb752 100644 --- a/singlecell/src/org/labkey/singlecell/SingleCellModule.java +++ b/singlecell/src/org/labkey/singlecell/SingleCellModule.java @@ -70,6 +70,7 @@ import org.labkey.singlecell.pipeline.singlecell.FilterRawCounts; import org.labkey.singlecell.pipeline.singlecell.FindClustersAndDimRedux; import org.labkey.singlecell.pipeline.singlecell.FindMarkers; +import org.labkey.singlecell.pipeline.singlecell.IdentifyAndStoreActiveClonotypes; import org.labkey.singlecell.pipeline.singlecell.IntegrateData; import org.labkey.singlecell.pipeline.singlecell.MergeSeurat; import org.labkey.singlecell.pipeline.singlecell.NormalizeAndScale; @@ -86,7 +87,6 @@ import org.labkey.singlecell.pipeline.singlecell.RunCelltypist; import org.labkey.singlecell.pipeline.singlecell.RunCelltypistCustomModel; import org.labkey.singlecell.pipeline.singlecell.RunConga; -import org.labkey.singlecell.pipeline.singlecell.RunCsCore; import org.labkey.singlecell.pipeline.singlecell.RunDecoupler; import org.labkey.singlecell.pipeline.singlecell.RunEscape; import org.labkey.singlecell.pipeline.singlecell.RunLDA; @@ -97,7 +97,6 @@ import org.labkey.singlecell.pipeline.singlecell.RunScGate; import org.labkey.singlecell.pipeline.singlecell.RunScGateBuiltin; import org.labkey.singlecell.pipeline.singlecell.RunSingleR; -import org.labkey.singlecell.pipeline.singlecell.RunTricycle; import org.labkey.singlecell.pipeline.singlecell.RunVision; import org.labkey.singlecell.pipeline.singlecell.ScoreCellCycle; import org.labkey.singlecell.pipeline.singlecell.SeuratPrototype; @@ -115,6 +114,7 @@ import org.labkey.singlecell.run.CellRangerVDJWrapper; import org.labkey.singlecell.run.NimbleAlignmentStep; import org.labkey.singlecell.run.NimbleAnalysis; +import org.labkey.singlecell.run.NimbleBulkAlignmentStep; import org.labkey.singlecell.run.RepeatNimbleReportHandler; import org.labkey.singlecell.run.VelocytoAlignmentStep; import org.labkey.singlecell.run.VelocytoAnalysisStep; @@ -220,6 +220,7 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new CellRangerVDJWrapper.VDJProvider()); SequencePipelineService.get().registerPipelineStep(new NimbleAlignmentStep.Provider()); SequencePipelineService.get().registerPipelineStep(new NimbleAnalysis.Provider()); + SequencePipelineService.get().registerPipelineStep(new NimbleBulkAlignmentStep.Provider()); SequencePipelineService.get().registerPipelineStep(new VelocytoAlignmentStep.Provider()); SequencePipelineService.get().registerPipelineStep(new VelocytoAnalysisStep.Provider()); @@ -294,17 +295,16 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new TrainScTour.Provider()); SequencePipelineService.get().registerPipelineStep(new PredictScTour.Provider()); SequencePipelineService.get().registerPipelineStep(new RunEscape.Provider()); - SequencePipelineService.get().registerPipelineStep(new RunCsCore.Provider()); SequencePipelineService.get().registerPipelineStep(new CustomGSEA.Provider()); SequencePipelineService.get().registerPipelineStep(new StudyMetadata.Provider()); SequencePipelineService.get().registerPipelineStep(new UpdateSeuratPrototype.Provider()); SequencePipelineService.get().registerPipelineStep(new RunDecoupler.Provider()); SequencePipelineService.get().registerPipelineStep(new PerformDefaultNimbleAppend.Provider()); SequencePipelineService.get().registerPipelineStep(new PerformMhcDimRedux.Provider()); - SequencePipelineService.get().registerPipelineStep(new RunTricycle.Provider()); SequencePipelineService.get().registerPipelineStep(new ApplyKnownClonotypicData.Provider()); SequencePipelineService.get().registerPipelineStep(new CalculateTcrRepertoireStats.Provider()); SequencePipelineService.get().registerPipelineStep(new PredictTcellActivation.Provider()); + SequencePipelineService.get().registerPipelineStep(new IdentifyAndStoreActiveClonotypes.Provider()); SequenceAnalysisService.get().registerReadsetListener(new SingleCellReadsetListener()); } diff --git a/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java b/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java index 45ce396c2..7656f92a2 100644 --- a/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java +++ b/singlecell/src/org/labkey/singlecell/analysis/AbstractSingleCellHandler.java @@ -396,7 +396,20 @@ public void processFilesRemote(List inputFiles, JobContext c currentFiles = new ArrayList<>(); for (SequenceOutputFile so : inputFiles) { - String datasetId = FileUtil.makeLegalName(so.getReadset() != null ? ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName() : so.getName()); + String datasetId; + if ("Seurat Object Prototype".equals(so.getCategory())) + { + datasetId = FileUtil.makeLegalName(ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName()); + } + else if (_doProcessRawCounts) + { + datasetId = FileUtil.makeLegalName(so.getReadset() == null ? so.getName() : ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName()); + } + else + { + datasetId = so.getName(); + } + if (distinctIds.contains(datasetId)) { throw new PipelineJobException("Duplicate dataset Ids in input data: " + datasetId); diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/CalculateGeneComponentScores.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/CalculateGeneComponentScores.java index 43be39118..4bc652684 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/CalculateGeneComponentScores.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/CalculateGeneComponentScores.java @@ -24,7 +24,7 @@ public Provider() super("CalculateGeneComponentScores", "Calculate Gene Module Scores", "RIRA", "This will generate UCell scores for a set of pre-defined gene modules", Collections.singletonList( SeuratToolParameter.create("savedComponent", "Saved Component(s)", "This is the name of the saved component (from RIRA) to apply", "ldk-simplecombo", new JSONObject() {{ - put("storeValues", "Tcell_EffectorDifferentiation;TCR_EarlyStimulationComponent;TCR_StimulationComponent1;PLS_Score_1;PLS_Score_2;PLS_Score_3;PLS_Score_4;PLS_Score_5;PLS_Score_6"); + put("storeValues", "Tcell_EffectorDifferentiation;TCR_EarlyStimulationComponent;CD4_Activation_Axis"); put("multiSelect", true); put("allowBlank", false); put("joinReturnValue", true); diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/CommonFilters.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/CommonFilters.java index 88ccb8696..25365bfec 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/CommonFilters.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/CommonFilters.java @@ -27,12 +27,12 @@ public Provider() put("minValue", 0); put("maxValue", 1); put("decimalPrecision", 3); - }}, 0.5), + }}, 0.1), SeuratToolParameter.create("saturation.RNA.max", "Saturation.RNA Max", "Saturation.RNA max value", "ldk-numberfield", new JSONObject(){{ put("minValue", 0); put("maxValue", 1); put("decimalPrecision", 3); - }}, 0.9), + }}, 0.99), SeuratToolParameter.create("saturation.ADT.min", "Saturation.ADT Min", "Saturation.ADT min value", "ldk-numberfield", new JSONObject(){{ put("minValue", 0); put("maxValue", 1); diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/IdentifyAndStoreActiveClonotypes.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/IdentifyAndStoreActiveClonotypes.java new file mode 100644 index 000000000..f5c67047f --- /dev/null +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/IdentifyAndStoreActiveClonotypes.java @@ -0,0 +1,37 @@ +package org.labkey.singlecell.pipeline.singlecell; + +import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.singlecell.pipeline.SingleCellStep; + +import java.util.List; + +public class IdentifyAndStoreActiveClonotypes extends AbstractRDiscvrStep +{ + public IdentifyAndStoreActiveClonotypes(PipelineContext ctx, IdentifyAndStoreActiveClonotypes.Provider provider) + { + super(provider, ctx); + } + + public static class Provider extends AbstractPipelineStepProvider + { + public Provider() + { + super("IdentifyAndStoreActiveClonotypes", "Identify And Store Active Clonotypes", "Rdiscvr", "This uses RDiscvr::IdentifyAndStoreActiveClonotypes to predict TCR-triggered T cells and save the results to the database", List.of(), null, null); + } + + + @Override + public IdentifyAndStoreActiveClonotypes create(PipelineContext ctx) + { + return new IdentifyAndStoreActiveClonotypes(ctx, this); + } + } + + @Override + public String getFileSuffix() + { + return "is"; + } +} + diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunCsCore.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunCsCore.java deleted file mode 100644 index f3a2dded2..000000000 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunCsCore.java +++ /dev/null @@ -1,67 +0,0 @@ -package org.labkey.singlecell.pipeline.singlecell; - -import org.labkey.api.pipeline.PipelineJobException; -import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider; -import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; -import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; -import org.labkey.api.singlecell.pipeline.SingleCellStep; - -import java.io.File; -import java.util.Collections; -import java.util.List; - -public class RunCsCore extends AbstractCellMembraneStep -{ - public RunCsCore(PipelineContext ctx, Provider provider) - { - super(provider, ctx); - } - - public static class Provider extends AbstractPipelineStepProvider - { - public Provider() - { - super("RunCsCore", "CS-CORE", "CS-CORE", "Run CS-CORE on the seurat object to identify gene modules.", Collections.emptyList(), null, null); - } - - @Override - public RunCsCore create(PipelineContext ctx) - { - return new RunCsCore(ctx, this); - } - } - - @Override - public boolean createsSeuratObjects() - { - return false; - } - - @Override - public String getFileSuffix() - { - return "cscore"; - } - - @Override - public Output execute(SequenceOutputHandler.JobContext ctx, List inputObjects, String outputPrefix) throws PipelineJobException - { - Output output = super.execute(ctx, inputObjects, outputPrefix); - - // Add the RDS files: - File[] outputs = ctx.getOutputDir().listFiles(f -> f.isDirectory() && f.getName().endsWith(".cscore.wgcna.rds")); - if (outputs == null || outputs.length == 0) - { - return output; - } - - for (File rds : outputs) - { - String sn = rds.getName().replaceAll(".cscore.wgcna.rds", ""); - - output.addSequenceOutput(rds, "CS-CORE: " + sn, "CS-CORE Results", inputObjects.get(0).getReadsetId(), null, ctx.getSequenceSupport().getCachedGenomes().iterator().next().getGenomeId(), null); - } - - return output; - } -} \ No newline at end of file diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunTricycle.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunTricycle.java deleted file mode 100644 index 4e79d7673..000000000 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunTricycle.java +++ /dev/null @@ -1,37 +0,0 @@ -package org.labkey.singlecell.pipeline.singlecell; - -import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider; -import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; -import org.labkey.api.singlecell.pipeline.SingleCellStep; - -import java.util.Arrays; - -public class RunTricycle extends AbstractCellMembraneStep -{ - public RunTricycle(PipelineContext ctx, RunTricycle.Provider provider) - { - super(provider, ctx); - } - - public static class Provider extends AbstractPipelineStepProvider - { - public Provider() - { - super("RunTricycle", "Run Tricycle", "CellMembrane/Tricycle", "This will run tricycle on the input object(s) to score cell cycle, and save the results in metadata.", Arrays.asList( - - ), null, null); - } - - @Override - public RunTricycle create(PipelineContext ctx) - { - return new RunTricycle(ctx, this); - } - } - - @Override - public String getFileSuffix() - { - return "tricycle"; - } -} diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/StudyMetadata.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/StudyMetadata.java index 27050d587..82ffbd953 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/StudyMetadata.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/StudyMetadata.java @@ -24,7 +24,7 @@ public Provider() {{ put("multiSelect", false); put("allowBlank", false); - put("storeValues", "PC475;PC531;TB;Malaria;AcuteNx;EC;PPG_Stims"); + put("storeValues", "PC475;PC531;TB;Malaria;AcuteNx;EC;PPG_Stims;IMPAC_TB_Human"); put("delimiter", ";"); }}, null, null, false, false), SeuratToolParameter.create("errorIfUnknownIdsFound", "Error If Unknown Ids Found", "If true, the job will fail if the seurat object contains ID not present in the metadata", "checkbox", null, true) diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/SubsetSeurat.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/SubsetSeurat.java index 7a646eb38..a17c151d8 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/SubsetSeurat.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/SubsetSeurat.java @@ -34,7 +34,7 @@ public Provider() put("width", 600); put("delimiter", DELIM); }}, null), - ToolParameterDescriptor.create("useDplyr", "Use dplyr", "If checked, the subset will be executed using dplyr::filter rather than Seurat::subset. This should allow more complex expressions to be used, including negations", "checkbox", null, false) + ToolParameterDescriptor.create("useDplyr", "Use dplyr", "If checked, the subset will be executed using dplyr::filter rather than Seurat::subset. This should allow more complex expressions to be used, including negations", "checkbox", null, true) ), List.of("/sequenceanalysis/field/TrimmingTextArea.js"), null); } diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/VireoHandler.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/VireoHandler.java index d4ddc7326..3976c6155 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/VireoHandler.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/VireoHandler.java @@ -72,7 +72,7 @@ public boolean doSplitJobs() @Override public boolean canProcess(SequenceOutputFile o) { - return CellRangerGexCountStep.LOUPE_CATEGORY.equals(o.getCategory()) & o.getFile().getName().endsWith("cloupe.cloupe"); + return CellRangerGexCountStep.LOUPE_CATEGORY.equals(o.getCategory()) & o.getFile() != null & o.getFile().getName().endsWith("cloupe.cloupe"); } @Override diff --git a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java index 2bfb601e4..c7b5f284e 100644 --- a/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java +++ b/singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java @@ -61,7 +61,7 @@ public class CellRangerGexCountStep extends AbstractAlignmentPipelineStep provider, PipelineContext ctx, CellRangerWrapper wrapper) { super(provider, ctx, wrapper); } @@ -328,7 +328,7 @@ private boolean shouldDiscardBam() return false; } - return !_alwaysRetainBam && getProvider().getParameterByName(AbstractAlignmentStepProvider.DISCARD_BAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); + return !_alwaysRetainBam && getProvider().getParameterByName(AbstractAlignmentStepProvider.DISCARD_BAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); } private boolean _alwaysRetainBam = false; @@ -349,7 +349,7 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu AbstractAlignmentStepProvider.ALIGNMENT_MODE mode = AbstractAlignmentStepProvider.ALIGNMENT_MODE.valueOf(alignmentMode); List> inputFastqs = new ArrayList<>(); - for (int i = 0; i < inputFastqs1.size();i++) + for (int i = 0; i < inputFastqs1.size(); i++) { File inputFastq1 = inputFastqs1.get(i); File inputFastq2 = inputFastqs2.get(i); @@ -395,9 +395,9 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu File outdir = new File(outputDirectory, id); outdir = new File(outdir, "outs"); + File bam = new File(outdir, "possorted_genome_bam.bam"); if (!shouldDiscardBam()) { - File bam = new File(outdir, "possorted_genome_bam.bam"); if (!bam.exists()) { throw new PipelineJobException("Unable to find file: " + bam.getPath()); diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java b/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java index 0dcb21bf9..9c0a3e785 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java @@ -3,7 +3,18 @@ import org.apache.commons.io.FileUtils; import org.jetbrains.annotations.Nullable; import org.json.JSONObject; +import org.labkey.api.data.Container; +import org.labkey.api.data.SimpleFilter; +import org.labkey.api.data.Sort; +import org.labkey.api.data.TableInfo; +import org.labkey.api.data.TableSelector; +import org.labkey.api.exp.api.ExpData; +import org.labkey.api.exp.api.ExperimentService; +import org.labkey.api.pipeline.PipelineJob; import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.query.FieldKey; +import org.labkey.api.query.QueryService; +import org.labkey.api.query.UserSchema; import org.labkey.api.sequenceanalysis.model.Readset; import org.labkey.api.sequenceanalysis.pipeline.AbstractAlignmentStepProvider; import org.labkey.api.sequenceanalysis.pipeline.AlignmentOutputImpl; @@ -14,19 +25,22 @@ import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; import org.labkey.api.util.PageFlowUtil; +import org.labkey.singlecell.SingleCellSchema; import java.io.File; import java.io.IOException; import java.util.Arrays; +import java.util.HashMap; import java.util.LinkedHashSet; import java.util.List; +import java.util.Map; public class NimbleAlignmentStep extends AbstractCellRangerDependentStep { public static final String REF_GENOMES = "refGenomes"; public static final String MAX_HITS_TO_REPORT = "maxHitsToReport"; - public static final String ALIGN_OUTPUT = "alignmentOutput"; public static final String STRANDEDNESS = "strandedness"; + public static final String REQUIRE_CACHED_BARCODES = "requireCachedBarcodes"; public NimbleAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx, CellRangerWrapper wrapper) { @@ -37,7 +51,7 @@ public static class Provider extends AbstractAlignmentStepProvider(PageFlowUtil.set("sequenceanalysis/field/GenomeField.js", "singlecell/panel/NimbleAlignPanel.js")), null, true, false, ALIGNMENT_MODE.MERGE_THEN_ALIGN); + super("Nimble", "This will run Nimble to generate a supplemental scRNA-seq feature count matrix for the provided libraries", getCellRangerGexParams(getToolParameters()), new LinkedHashSet<>(PageFlowUtil.set("sequenceanalysis/field/GenomeField.js", "singlecell/panel/NimbleAlignPanel.js")), null, true, false, ALIGNMENT_MODE.MERGE_THEN_ALIGN); } @Override @@ -59,7 +73,10 @@ public static List getToolParameters() }}, null), ToolParameterDescriptor.create(MAX_HITS_TO_REPORT, "Max Hits To Report", "If a given hit has more than this number of references, it is discarded", "ldk-integerfield", new JSONObject(){{ put("minValue", 0); - }}, 4) + }}, 4), + ToolParameterDescriptor.create(REQUIRE_CACHED_BARCODES, "Fail Unless Cached Barcodes Present", "If checked, the pipeline will expect a previously computed map of cellbarcodes and UMIs to be computed. Under default conditions, if this is missing, cellranger will be re-run. This flag can be helpful to avoid that computation if you expect the barcode file to exist.", "checkbox", new JSONObject(){{ + + }}, false) ); } @@ -68,6 +85,84 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu { AlignmentOutputImpl output = new AlignmentOutputImpl(); + boolean throwIfNotFound = getProvider().getParameterByName(REQUIRE_CACHED_BARCODES).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); + File cachedBarcodes = getCachedBarcodeFile(rs, throwIfNotFound); + + File localBam; + if (cachedBarcodes == null) + { + localBam = performCellRangerAlignment(output, rs, inputFastqs1, inputFastqs2, outputDirectory, referenceGenome, basename, readGroupId, platformUnit); + } + else + { + localBam = createNimbleBam(output, rs, inputFastqs1, inputFastqs2); + } + + + // Now run nimble itself: + NimbleHelper helper = new NimbleHelper(getPipelineCtx(), getProvider(), getStepIdx()); + helper.doNimbleAlign(localBam, output, rs, basename); + output.setBAM(localBam); + + return output; + } + + private File createNimbleBam(AlignmentOutputImpl output, Readset rs, List inputFastqs1, List inputFastqs2) throws PipelineJobException + { + File cellBarcodeUmiMap = getCachedBarcodeFile(rs, true); + + return NimbleHelper.runFastqToBam(output, getPipelineCtx(), rs, inputFastqs1, inputFastqs2, cellBarcodeUmiMap); + } + + private File getCachedBarcodeFile(Readset rs, boolean throwIfNotFound) throws PipelineJobException + { + Map map = getPipelineCtx().getSequenceSupport().getCachedObject(CACHE_KEY, PipelineJob.createObjectMapper().getTypeFactory().constructParametricType(Map.class, Integer.class, Integer.class)); + Integer dataId = map.get(rs.getReadsetId()); + if (dataId == null) + { + if (throwIfNotFound) + { + throw new PipelineJobException("No cached data found for readset: " + rs.getReadsetId()); + } + + return null; + } + + File ret = getPipelineCtx().getSequenceSupport().getCachedData(dataId); + if (ret == null || ! ret.exists()) + { + throw new PipelineJobException("Missing cached cellbarcode/UMI file: " + dataId); + } + + return ret; + } + + private ExpData findCellBarcodeFiles(Readset rs) throws PipelineJobException + { + Container targetContainer = getPipelineCtx().getJob().getContainer().isWorkbookOrTab() ? getPipelineCtx().getJob().getContainer().getParent() : getPipelineCtx().getJob().getContainer(); + UserSchema us = QueryService.get().getUserSchema(getPipelineCtx().getJob().getUser(), targetContainer, SingleCellSchema.SEQUENCE_SCHEMA_NAME); + TableInfo ti = us.getTable("outputfiles"); + + SimpleFilter sf = new SimpleFilter(FieldKey.fromString("readset"), rs.getRowId()); + sf.addCondition(FieldKey.fromString("category"), NimbleHelper.CATEGORY_CB); + List cbs = new TableSelector(ti, PageFlowUtil.set("dataid"), sf, new Sort("-rowid")).getArrayList(Integer.class); + if (!cbs.isEmpty()) + { + int dataId = cbs.get(0); + ExpData d = ExperimentService.get().getExpData(dataId); + if (d == null || d.getFile() == null) + { + throw new PipelineJobException("Output lacks a file: " + dataId); + } + + return d; + } + + return null; + } + + private File performCellRangerAlignment(AlignmentOutputImpl output, Readset rs, List inputFastqs1, @Nullable List inputFastqs2, File outputDirectory, ReferenceGenome referenceGenome, String basename, String readGroupId, @Nullable String platformUnit) throws PipelineJobException + { // We need to ensure we keep the BAM for post-processing: setAlwaysRetainBam(true); @@ -87,14 +182,7 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu } } - NimbleHelper.write10xBarcodes(localBam, getWrapper().getLogger(), rs, referenceGenome, output); - - // Now run nimble itself: - NimbleHelper helper = new NimbleHelper(getPipelineCtx(), getProvider(), getStepIdx()); - helper.doNimbleAlign(localBam, output, rs, basename); - output.setBAM(localBam); - - return output; + return localBam; } @Override @@ -109,5 +197,21 @@ public void init(SequenceAnalysisJobSupport support) throws PipelineJobException { helper.prepareGenome(id); } + + // Try to find 10x barcodes: + HashMap readsetToBarcodes = new HashMap<>(); + for (Readset rs : support.getCachedReadsets()) + { + ExpData f = findCellBarcodeFiles(rs); + if (f != null) + { + support.cacheExpData(f); + readsetToBarcodes.put(rs.getReadsetId(), f.getRowId()); + } + } + + support.cacheObject(CACHE_KEY, readsetToBarcodes); } + + private static final String CACHE_KEY = "nimble.cb"; } diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleAnalysis.java b/singlecell/src/org/labkey/singlecell/run/NimbleAnalysis.java index 8fa4390b7..b24d546b2 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleAnalysis.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleAnalysis.java @@ -29,7 +29,7 @@ public static class Provider extends AbstractAnalysisStepProvider(PageFlowUtil.set("sequenceanalysis/field/GenomeField.js", "singlecell/panel/NimbleAlignPanel.js")), null); + super("NimbleAnalysis", "Nimble", null, "This will run Nimble to generate a supplemental feature count matrix for the provided libraries. This should work using either CellRanger/scRNA-seq or bulk input data.", NimbleAlignmentStep.getToolParameters(), new LinkedHashSet<>(PageFlowUtil.set("sequenceanalysis/field/GenomeField.js", "singlecell/panel/NimbleAlignPanel.js")), null); } @Override @@ -58,8 +58,6 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc NimbleHelper helper = new NimbleHelper(getPipelineCtx(), getProvider(), getStepIdx()); helper.doNimbleAlign(inputBam, output, rs, FileUtil.getBaseName(inputBam)); - NimbleHelper.write10xBarcodes(inputBam, getPipelineCtx().getLogger(), rs, referenceGenome, output); - return output; } diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleBulkAlignmentStep.java b/singlecell/src/org/labkey/singlecell/run/NimbleBulkAlignmentStep.java new file mode 100644 index 000000000..4311d3e8b --- /dev/null +++ b/singlecell/src/org/labkey/singlecell/run/NimbleBulkAlignmentStep.java @@ -0,0 +1,184 @@ +package org.labkey.singlecell.run; + +import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.Nullable; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.sequenceanalysis.model.Readset; +import org.labkey.api.sequenceanalysis.pipeline.AbstractAlignmentStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.AlignmentOutputImpl; +import org.labkey.api.sequenceanalysis.pipeline.AlignmentStep; +import org.labkey.api.sequenceanalysis.pipeline.AlignmentStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.IndexOutputImpl; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; +import org.labkey.api.sequenceanalysis.pipeline.SamtoolsRunner; +import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.run.AbstractAlignmentPipelineStep; +import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; +import org.labkey.api.util.FileUtil; +import org.labkey.api.util.PageFlowUtil; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedHashSet; +import java.util.List; + +public class NimbleBulkAlignmentStep extends AbstractAlignmentPipelineStep implements AlignmentStep +{ + public static class Provider extends AbstractAlignmentStepProvider + { + public Provider() + { + super("Nimble-Bulk", + "This will run Nimble to generate a supplemental feature count matrix for the provided libraries. This version is intended for bulk input data. Please use the CellRanger/Nimble version for scRNA-seq", + NimbleAlignmentStep.getToolParameters(), + new LinkedHashSet<>(PageFlowUtil.set("sequenceanalysis/field/GenomeField.js", "singlecell/panel/NimbleAlignPanel.js")), + null, + true, false, ALIGNMENT_MODE.MERGE_THEN_ALIGN); + } + + @Override + public NimbleBulkAlignmentStep create(PipelineContext ctx) + { + return new NimbleBulkAlignmentStep(this, ctx, new NimbleBulkWrapper(ctx.getLogger())); + } + } + + public NimbleBulkAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx, NimbleBulkAlignmentStep.NimbleBulkWrapper wrapper) + { + super(provider, ctx, wrapper); + } + + @Override + public IndexOutput createIndex(ReferenceGenome referenceGenome, File outputDir) throws PipelineJobException + { + return new IndexOutputImpl(referenceGenome); + } + + @Override + public void init(SequenceAnalysisJobSupport support) throws PipelineJobException + { + NimbleHelper helper = new NimbleHelper(getPipelineCtx(), getProvider(), getStepIdx()); + + List genomeIds = helper.getGenomeIds(); + for (int id : genomeIds) + { + helper.prepareGenome(id); + } + } + + @Override + public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nullable List inputFastqs2, File outputDirectory, ReferenceGenome referenceGenome, String basename, String readGroupId, @Nullable String platformUnit) throws PipelineJobException + { + AlignmentOutputImpl output = new AlignmentOutputImpl(); + SamtoolsRunner st = new SamtoolsRunner(getPipelineCtx().getLogger()); + + List outputBams = new ArrayList<>(); + int bamIdx = 0; + while (bamIdx < inputFastqs1.size()) + { + File outputBam = new File(getPipelineCtx().getWorkingDirectory(), FileUtil.makeLegalName(rs.getName()) + ".unmapped." + bamIdx + ".bam"); + List args = new ArrayList<>(Arrays.asList(st.getSamtoolsPath().getPath(), "import", "-o", outputBam.getPath())); + args.add("-r"); + args.add("ID:" + readGroupId); + + args.add("-r"); + args.add("LB:" + rs.getReadsetId().toString()); + + args.add("-r"); + args.add("PL:" + (rs.getPlatform() == null ? "ILLUMINA" : rs.getPlatform())); + + args.add("-r"); + args.add("PU:" + (platformUnit == null ? rs.getReadsetId().toString() : platformUnit)); + + args.add("-r"); + args.add("SM:" + rs.getName().replaceAll(" ", "_")); + + if (inputFastqs2 == null || inputFastqs2.isEmpty()) + { + args.add("-O"); + args.add(inputFastqs1.get(bamIdx).getPath()); + } + else + { + args.add("-1"); + args.add(inputFastqs1.get(bamIdx).getPath()); + + if (bamIdx > inputFastqs2.size()) + { + throw new PipelineJobException("Unequal lengths for first/second pair FASTQs"); + } + + args.add("-2"); + args.add(inputFastqs2.get(bamIdx).getPath()); + } + bamIdx++; + + st.execute(args); + outputBams.add(outputBam); + } + + File outputBam; + if (outputBams.size() > 1) + { + outputBam = new File(getPipelineCtx().getWorkingDirectory(), FileUtil.makeLegalName(rs.getName()) + ".unmapped.bam"); + outputBams.forEach(output::addIntermediateFile); + + List args = new ArrayList<>(Arrays.asList(st.getSamtoolsPath().getPath(), "merge", "-o", outputBam.getPath(), "-f")); + Integer maxThreads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger()); + if (maxThreads != null) + { + args.add("-@"); + args.add(maxThreads.toString()); + } + + outputBams.forEach(bam -> args.add(bam.getPath())); + st.execute(args); + } + else + { + outputBam = outputBams.get(0); + } + + // Now run nimble itself: + NimbleHelper helper = new NimbleHelper(getPipelineCtx(), getProvider(), getStepIdx()); + helper.doNimbleAlign(outputBam, output, rs, basename); + output.setBAM(outputBam); + + return output; + } + + @Override + public boolean doAddReadGroups() + { + return false; + } + + @Override + public boolean doSortIndexBam() + { + return false; + } + + @Override + public boolean alwaysCopyIndexToWorkingDir() + { + return false; + } + + @Override + public boolean supportsGzipFastqs() + { + return true; + } + + public static class NimbleBulkWrapper extends AbstractCommandWrapper + { + public NimbleBulkWrapper(Logger log) + { + super(log); + } + } +} diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 6afdbe366..19379d318 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -27,9 +27,11 @@ import org.labkey.api.sequenceanalysis.pipeline.PipelineStepOutput; import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider; import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; +import org.labkey.api.sequenceanalysis.pipeline.SamtoolsRunner; import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.run.DISCVRSeqRunner; import org.labkey.api.sequenceanalysis.run.DockerWrapper; +import org.labkey.api.util.FileUtil; import org.labkey.api.util.PageFlowUtil; import org.labkey.api.writer.PrintWriters; @@ -496,26 +498,23 @@ private Map doAlignment(List genomes, List barcodeArgs = new ArrayList<>(runner.getBaseArgs("Save10xBarcodes")); barcodeArgs.add("-I"); barcodeArgs.add(bam.getPath()); - File cbOutput = new File(bam.getParentFile(), SequenceAnalysisService.get().getUnzippedBaseName(bam.getName()) + "cb.txt.gz"); - barcodeArgs.add("--cbOutput"); - barcodeArgs.add(cbOutput.getPath()); - - File umiOutput = new File(bam.getParentFile(), SequenceAnalysisService.get().getUnzippedBaseName(bam.getName()) + "umi.txt.gz"); - barcodeArgs.add("--umiOutput"); - barcodeArgs.add(umiOutput.getPath()); + File bcOutput = new File(bam.getParentFile(), SequenceAnalysisService.get().getUnzippedBaseName(bam.getName()) + ".cb.txt.gz"); + barcodeArgs.add("--output"); + barcodeArgs.add(bcOutput.getPath()); runner.execute(barcodeArgs); - output.addSequenceOutput(cbOutput, "10x CellBarcode Map: " + rs.getName(), "10x CellBarcode Map", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null); - output.addSequenceOutput(umiOutput, "10x UMI Map: " + rs.getName(), "10x UMI Map", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null); + output.addSequenceOutput(bcOutput, "10x CellBarcode Map: " + rs.getName(), CATEGORY_CB, rs.getReadsetId(), null, referenceGenome.getGenomeId(), null); } public static File runNimbleReport(File alignResultsGz, int genomeId, PipelineStepOutput output, PipelineContext ctx) throws PipelineJobException @@ -595,6 +594,75 @@ private static File getNimbleDoneFile(File parentDir, String resumeString) return new File(parentDir, "nimble." + resumeString + ".done"); } + public static File runFastqToBam(PipelineStepOutput output, PipelineContext ctx, Readset rs, List inputFastqs1, List inputFastqs2, File cellBarcodeUmiMap) throws PipelineJobException + { + List outputBams = new ArrayList<>(); + int bamIdx = 0; + while (bamIdx < inputFastqs1.size()) + { + File outputBam = new File(ctx.getWorkingDirectory(), FileUtil.makeLegalName(rs.getName()) + ".unmapped." + bamIdx + ".bam"); + + List args = new ArrayList<>(); + args.add("python3"); + args.add("-m"); + args.add("nimble"); + + args.add("fastq-to-bam"); + + Integer maxThreads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); + if (maxThreads != null) + { + args.add("-c"); + args.add(maxThreads.toString()); + } + + args.add("--r1-fastq"); + args.add(inputFastqs1.get(bamIdx).getPath()); + if (bamIdx > inputFastqs2.size()) + { + throw new PipelineJobException("Unequal lengths for first/second pair FASTQs"); + } + + args.add("--r2-fastq"); + args.add(inputFastqs2.get(bamIdx).getPath()); + + args.add("--map"); + args.add(cellBarcodeUmiMap.getPath()); + + args.add("--output"); + args.add(outputBam.getPath()); + + runUsingDocker(args, output, "nimble.fastq-to-bam." + bamIdx, ctx); + outputBams.add(outputBam); + bamIdx++; + } + + File outputBam; + if (outputBams.size() > 1) + { + outputBam = new File(ctx.getWorkingDirectory(), FileUtil.makeLegalName(rs.getName()) + ".unmapped.bam"); + outputBams.forEach(output::addIntermediateFile); + + SamtoolsRunner st = new SamtoolsRunner(ctx.getLogger()); + List args = new ArrayList<>(Arrays.asList(st.getSamtoolsPath().getPath(), "merge", "-o", outputBam.getPath(), "-f")); + Integer maxThreads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); + if (maxThreads != null) + { + args.add("-@"); + args.add(maxThreads.toString()); + } + + outputBams.forEach(bam -> args.add(bam.getPath())); + st.execute(args); + } + else + { + outputBam = outputBams.get(0); + } + + return outputBam; + } + public static String DOCKER_CONTAINER_NAME = "ghcr.io/bimberlab/nimble:latest"; private boolean runUsingDocker(List nimbleArgs, PipelineStepOutput output, @Nullable String resumeString) throws PipelineJobException