diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh index 8aa3df5a5..5ba95e5dd 100755 --- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh @@ -333,3 +333,20 @@ then else echo "Already installed" fi + + +if [[ ! -e ${LKTOOLS_DIR}/primer3_core || ! -z $FORCE_REINSTALL ]]; +then + echo "Cleaning up previous installs" + rm -Rf $LKTOOLS_DIR/primer3_core* + rm -Rf primer3* + rm -Rf v2.6.1.tar.gz + + wget https://github.com/primer3-org/primer3/archive/refs/tags/v2.6.1.tar.gz + tar -xf v2.6.1.tar.gz + cd primer3-2.6.1/src + make + install primer3_core $LKTOOLS_DIR/ +else + echo "Already installed" +fi diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/preprocessing/TagPcrSummaryStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/preprocessing/TagPcrSummaryStep.java index 5aa9885ec..a71ee865b 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/preprocessing/TagPcrSummaryStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/preprocessing/TagPcrSummaryStep.java @@ -72,7 +72,7 @@ public Provider() put("checked", true); }}, true), ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("--insert-name"), "insertType", "Insert Type", "The type of insert to detect.", "ldk-simplecombo", new JSONObject(){{ - put("storeValues", "PiggyBac;Lentivirus;PREDICT"); + put("storeValues", "PiggyBac;Lentivirus;PREDICT;BxBI_attP"); put("allowBlank", false); }}, null), ToolParameterDescriptor.create(DESIGN_PRIMERS, "Design Primers", "If selected, Primer3 will be used to design primers to flank integration sites", "checkbox", new JSONObject(){{ diff --git a/Studies/resources/queries/studies/studyCohorts/.qview.xml b/Studies/resources/queries/studies/studyCohorts/.qview.xml new file mode 100644 index 000000000..c2eff2380 --- /dev/null +++ b/Studies/resources/queries/studies/studyCohorts/.qview.xml @@ -0,0 +1,22 @@ + \ No newline at end of file diff --git a/Studies/resources/schemas/dbscripts/postgresql/studies-23.005-23.006.sql b/Studies/resources/schemas/dbscripts/postgresql/studies-23.005-23.006.sql new file mode 100644 index 000000000..72c5798d2 --- /dev/null +++ b/Studies/resources/schemas/dbscripts/postgresql/studies-23.005-23.006.sql @@ -0,0 +1 @@ +ALTER TABLE studies.subjectAnchorDates ADD COLUMN sourceRecord varchar(1000); diff --git a/Studies/resources/schemas/dbscripts/sqlserver/studies-23.005-23.006.sql b/Studies/resources/schemas/dbscripts/sqlserver/studies-23.005-23.006.sql new file mode 100644 index 000000000..1acef5825 --- /dev/null +++ b/Studies/resources/schemas/dbscripts/sqlserver/studies-23.005-23.006.sql @@ -0,0 +1 @@ +ALTER TABLE studies.subjectAnchorDates ADD sourceRecord varchar(1000); diff --git a/Studies/resources/schemas/studies.xml b/Studies/resources/schemas/studies.xml index c28038ff9..b6f4e992f 100644 --- a/Studies/resources/schemas/studies.xml +++ b/Studies/resources/schemas/studies.xml @@ -526,6 +526,10 @@ label + + Source Record + true + Data Source diff --git a/Studies/src/org/labkey/studies/StudiesModule.java b/Studies/src/org/labkey/studies/StudiesModule.java index c6c14306f..fd7553fc4 100644 --- a/Studies/src/org/labkey/studies/StudiesModule.java +++ b/Studies/src/org/labkey/studies/StudiesModule.java @@ -34,7 +34,7 @@ public String getName() @Override public @Nullable Double getSchemaVersion() { - return 23.005; + return 23.006; } @Override diff --git a/Studies/src/org/labkey/studies/query/StudiesTriggerFactory.java b/Studies/src/org/labkey/studies/query/StudiesTriggerFactory.java index c2ca102f2..bcec14717 100644 --- a/Studies/src/org/labkey/studies/query/StudiesTriggerFactory.java +++ b/Studies/src/org/labkey/studies/query/StudiesTriggerFactory.java @@ -93,6 +93,11 @@ private void possiblyResolveStudyOrCohort(String tableToQuery, @Nullable Map implements AlignmentStep { @@ -613,4 +618,88 @@ public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Co } } } + + public enum Chemistry + { + // See: https://kb.10xgenomics.com/s/article/115004506263-What-is-a-barcode-inclusion-list-formerly-barcode-whitelist + // cellranger-x.y.z/lib/python/cellranger/barcodes/ + FivePE_V3("Single Cell 5' PE v3", "3M-5pgex-jan-2023.txt.gz"), + FivePE_V2("Single Cell 5' PE v2", "737k-august-2016.txt"); + + final String _label; + final String _inclusionListFile; + + Chemistry(String label, String inclusionListFile) + { + _label = label; + _inclusionListFile = inclusionListFile; + } + + public File getInclusionListFile(Logger logger) throws PipelineJobException + { + File exe = new CellRangerWrapper(logger).getExe(); + if (Files.isSymbolicLink(exe.toPath())) + { + try + { + exe = Files.readSymbolicLink(exe.toPath()).toFile(); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + + File il = new File(exe.getParentFile(), "lib/python/cellranger/barcodes/" + _inclusionListFile); + if (!il.exists()) + { + throw new PipelineJobException("Unable to find file: " + il.getPath()); + } + + return il; + } + + public static Chemistry getByLabel(String label) + { + for (Chemistry c : Chemistry.values()) + { + if (c._label.equals(label)) + { + return c; + } + } + + throw new IllegalArgumentException("Unknown chemistry: " + label); + } + } + + public static Chemistry inferChemistry(File cloupeFile) throws PipelineJobException + { + File html = new File(cloupeFile.getPath().replaceAll("_cloupe.cloupe$", "_web_summary.html")); + if (!html.exists()) + { + throw new IllegalArgumentException("Missing file: " + html.getPath()); + } + + final Pattern pattern = Pattern.compile("\\[\"Chemistry\",\"(.*?)\"],"); + try (BufferedReader reader = Readers.getReader(html)) + { + String line; + while ((line = reader.readLine()) != null) + { + Matcher m = pattern.matcher(line); + if (m.find()) + { + String chem = m.group(1); + return Chemistry.getByLabel(chem); + } + } + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + + throw new IllegalArgumentException("Unable to infer chemistry for file: " + html.getPath()); + } } diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java b/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java index 9c0a3e785..980918b56 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java @@ -86,10 +86,10 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu AlignmentOutputImpl output = new AlignmentOutputImpl(); boolean throwIfNotFound = getProvider().getParameterByName(REQUIRE_CACHED_BARCODES).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false); - File cachedBarcodes = getCachedBarcodeFile(rs, throwIfNotFound); + File loupeFile = getCachedLoupeFile(rs, throwIfNotFound); File localBam; - if (cachedBarcodes == null) + if (loupeFile == null) { localBam = performCellRangerAlignment(output, rs, inputFastqs1, inputFastqs2, outputDirectory, referenceGenome, basename, readGroupId, platformUnit); } @@ -109,15 +109,15 @@ public AlignmentOutput performAlignment(Readset rs, List inputFastqs1, @Nu private File createNimbleBam(AlignmentOutputImpl output, Readset rs, List inputFastqs1, List inputFastqs2) throws PipelineJobException { - File cellBarcodeUmiMap = getCachedBarcodeFile(rs, true); + File loupeFile = getCachedLoupeFile(rs, true); - return NimbleHelper.runFastqToBam(output, getPipelineCtx(), rs, inputFastqs1, inputFastqs2, cellBarcodeUmiMap); + return NimbleHelper.runFastqToBam(output, getPipelineCtx(), rs, inputFastqs1, inputFastqs2, loupeFile); } - private File getCachedBarcodeFile(Readset rs, boolean throwIfNotFound) throws PipelineJobException + private File getCachedLoupeFile(Readset rs, boolean throwIfNotFound) throws PipelineJobException { - Map map = getPipelineCtx().getSequenceSupport().getCachedObject(CACHE_KEY, PipelineJob.createObjectMapper().getTypeFactory().constructParametricType(Map.class, Integer.class, Integer.class)); - Integer dataId = map.get(rs.getReadsetId()); + Map map = getPipelineCtx().getSequenceSupport().getCachedObject(CACHE_KEY, PipelineJob.createObjectMapper().getTypeFactory().constructParametricType(Map.class, Long.class, Long.class)); + Long dataId = map.get(rs.getReadsetId()); if (dataId == null) { if (throwIfNotFound) @@ -137,14 +137,14 @@ private File getCachedBarcodeFile(Readset rs, boolean throwIfNotFound) throws Pi return ret; } - private ExpData findCellBarcodeFiles(Readset rs) throws PipelineJobException + private ExpData findLoupeFile(Readset rs) throws PipelineJobException { Container targetContainer = getPipelineCtx().getJob().getContainer().isWorkbookOrTab() ? getPipelineCtx().getJob().getContainer().getParent() : getPipelineCtx().getJob().getContainer(); UserSchema us = QueryService.get().getUserSchema(getPipelineCtx().getJob().getUser(), targetContainer, SingleCellSchema.SEQUENCE_SCHEMA_NAME); TableInfo ti = us.getTable("outputfiles"); SimpleFilter sf = new SimpleFilter(FieldKey.fromString("readset"), rs.getRowId()); - sf.addCondition(FieldKey.fromString("category"), NimbleHelper.CATEGORY_CB); + sf.addCondition(FieldKey.fromString("category"), CellRangerGexCountStep.LOUPE_CATEGORY); List cbs = new TableSelector(ti, PageFlowUtil.set("dataid"), sf, new Sort("-rowid")).getArrayList(Integer.class); if (!cbs.isEmpty()) { @@ -199,19 +199,19 @@ public void init(SequenceAnalysisJobSupport support) throws PipelineJobException } // Try to find 10x barcodes: - HashMap readsetToBarcodes = new HashMap<>(); + HashMap readsetToLoupe = new HashMap<>(); for (Readset rs : support.getCachedReadsets()) { - ExpData f = findCellBarcodeFiles(rs); + ExpData f = findLoupeFile(rs); if (f != null) { support.cacheExpData(f); - readsetToBarcodes.put(rs.getReadsetId(), f.getRowId()); + readsetToLoupe.put(rs.getReadsetId(), f.getRowId()); } } - support.cacheObject(CACHE_KEY, readsetToBarcodes); + support.cacheObject(CACHE_KEY, readsetToLoupe); } - private static final String CACHE_KEY = "nimble.cb"; + private static final String CACHE_KEY = "nimble.loupe"; } diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 19379d318..7e30115b5 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -594,7 +594,7 @@ private static File getNimbleDoneFile(File parentDir, String resumeString) return new File(parentDir, "nimble." + resumeString + ".done"); } - public static File runFastqToBam(PipelineStepOutput output, PipelineContext ctx, Readset rs, List inputFastqs1, List inputFastqs2, File cellBarcodeUmiMap) throws PipelineJobException + public static File runFastqToBam(PipelineStepOutput output, PipelineContext ctx, Readset rs, List inputFastqs1, List inputFastqs2, File loupeFile) throws PipelineJobException { List outputBams = new ArrayList<>(); int bamIdx = 0; @@ -627,7 +627,8 @@ public static File runFastqToBam(PipelineStepOutput output, PipelineContext ctx, args.add(inputFastqs2.get(bamIdx).getPath()); args.add("--map"); - args.add(cellBarcodeUmiMap.getPath()); + CellRangerGexCountStep.Chemistry chem = CellRangerGexCountStep.inferChemistry(loupeFile); + args.add(chem.getInclusionListFile(ctx.getLogger()).getPath()); args.add("--output"); args.add(outputBam.getPath());