diff --git a/ehr/api-src/org/labkey/api/ehr/EHRService.java b/ehr/api-src/org/labkey/api/ehr/EHRService.java index 550935660..2d7e8b092 100644 --- a/ehr/api-src/org/labkey/api/ehr/EHRService.java +++ b/ehr/api-src/org/labkey/api/ehr/EHRService.java @@ -15,6 +15,7 @@ */ package org.labkey.api.ehr; +import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.labkey.api.data.AbstractTableInfo; @@ -31,6 +32,7 @@ import org.labkey.api.ehr.history.*; import org.labkey.api.ldk.table.ButtonConfigFactory; import org.labkey.api.module.Module; +import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.query.BatchValidationException; import org.labkey.api.query.DetailsURL; import org.labkey.api.query.ExprColumn; @@ -43,6 +45,7 @@ import org.labkey.api.view.ActionURL; import org.labkey.api.view.template.ClientDependency; +import java.io.File; import java.io.IOException; import java.util.Collection; import java.util.Date; @@ -326,4 +329,14 @@ public EHRQCState getQCState(@NotNull Container c) abstract public List ensureStudyQCStates(Container c, final User u, final boolean commitChanges); abstract public void registerLabWorkOverrides(Module module, String fromType, LabworkType toType); + + /** + * The EHR has a built-in GeneticsCalculations pipeline job that computes inbreeding and kinship based on the pedigree. + * These are normally calculated in R, saved as TSVs, and imported using java code. This method is a separate entrypoint + * that allows other code perform the calculations, save the results as TSVs, and then trigger import here. + * + * A use case is a separate pipeline server that performs the R computation on a cluster, and then triggers the main webserver to import + * those results. + */ + abstract public void standaloneProcessKinshipAndInbreeding(Container c, User u, File pipelineDir, Logger log) throws PipelineJobException; } diff --git a/ehr/resources/pipeline/kinship/populateInbreeding.r b/ehr/resources/pipeline/kinship/populateInbreeding.r index 9bf71ba85..bc5b6b95e 100644 --- a/ehr/resources/pipeline/kinship/populateInbreeding.r +++ b/ehr/resources/pipeline/kinship/populateInbreeding.r @@ -7,47 +7,43 @@ # This R script will calculate and store inbreeding coefficients for all animals in the colony. This data will be compared against # the information currently stored in the DB and the minimal number of inserts/updates/deletes are then performed. This script is designed # to run as a daily cron job. - - -options(error = dump.frames); -library(pedigree); -library(getopt); -library(Matrix); +library(pedigree) +library(getopt) +library(Matrix) library(dplyr) spec <- matrix(c( -'inputFile', '-f', 1, "character" -), ncol=4, byrow=TRUE); -opts = getopt(spec, commandArgs(trailingOnly = TRUE)); + 'inputFile', '-f', 1, 'character' +), ncol=4, byrow=TRUE) +opts <- getopt(spec, commandArgs(trailingOnly = TRUE)) -allPed <- read.table(opts$inputFile); +allPed <- read.table(opts$inputFile) colnames(allPed)<-c('Id', 'Dam', 'Sire', 'Gender') -is.na(allPed$Id)<-which(allPed$Id=="") -is.na(allPed$Dam)<-which(allPed$Dam=="") -is.na(allPed$Sire)<-which(allPed$Sire=="") -is.na(allPed$Gender)<-which(allPed$Gender=="") - -df <- data.frame(id=as.character(allPed$Id), 'id parent1'=allPed$Dam, 'id parent2'=allPed$Sire, stringsAsFactors=FALSE); -colnames(df)<-c("id", "id parent1", "id parent2") +allPed$Id[allPed$Id == ""] <- NA +allPed$Dam[allPed$Dam == ""] <- NA +allPed$Sire[allPed$Sire == ""] <- NA +allPed$Gender[allPed$Gender == ""] <- NA -originalIds <-as.data.frame(df[,1,drop=FALSE]) +df <- data.frame(id=as.character(allPed$Id), 'id parent1'=allPed$Dam, 'id parent2'=allPed$Sire, stringsAsFactors=FALSE) +originalIds <- df$id +print(paste0('Input IDs: ', nrow(df))) #this is a function in the pedigree package designed to add missing parents to the dataframe #see pedigree package documentation for more detail -df <- add.Inds(df); +df <- add.Inds(df) ord <- orderPed(df) df <- df[order(ord),] #use an existing package to calculate inbreeding -ib = calcInbreeding(df); - -newRecords <- data.frame(Id=as.character(df$id), coefficient=ib, stringsAsFactors=FALSE); +ib <- calcInbreeding(df) #only calculate inbreeding for Ids at the center -newRecords <- dplyr::filter(newRecords, Id %in% originalIds$id) +newRecords <- data.frame(Id=as.character(df$id), coefficient=ib, stringsAsFactors=FALSE) %>% + dplyr::filter(Id %in% originalIds) + +if (nrow(newRecords) != length(originalIds)) { + stop(paste0('Output dataframe and input IDs not the same length! Expected: ', length(originalIds), ', was: ', nrow(newRecords))) +} -# write TSV to disk -print("Output table:"); -print(str(newRecords)); -write.table(newRecords, file = "inbreeding.txt", append = FALSE,row.names=F,quote=F,sep="\t"); \ No newline at end of file +write.table(newRecords, file = "inbreeding.txt", append = FALSE, row.names=F, quote=F, sep="\t") \ No newline at end of file diff --git a/ehr/resources/pipeline/kinship/populateKinship.r b/ehr/resources/pipeline/kinship/populateKinship.r index abffbd120..fc2cad6be 100644 --- a/ehr/resources/pipeline/kinship/populateKinship.r +++ b/ehr/resources/pipeline/kinship/populateKinship.r @@ -7,67 +7,64 @@ # This R script will calculate and store kinship coefficients (aka. relatedness) for all animals in the colony. This is a large, sparse matrix. # The matrix is converted into a very long 3-column dataframe (animal1, animal2, coefficient). This dataframe is output to a TSV file, # which is normally imported into ehr.kinship by java code in GeneticCalculationsImportTask - - -#options(echo=TRUE); -options(error = dump.frames); -library(methods); -library(kinship2); -library(getopt); -library(Matrix); -library(dplyr); +library(kinship2) +library(getopt) +library(Matrix) +library(dplyr) spec <- matrix(c( -#'containerPath', '-c', 1, "character", -#'baseUrl', '-b', 1, "character" -'inputFile', '-f', 1, "character" -), ncol=4, byrow=TRUE); -opts = getopt(spec, commandArgs(trailingOnly = TRUE)); + 'inputFile', '-f', 1, 'character' +), ncol=4, byrow=TRUE) +opts <- getopt(spec, commandArgs(trailingOnly = TRUE)) -allPed <- read.table(opts$inputFile, quote="\""); -colnames(allPed)<-c('Id', 'Dam', 'Sire', 'Gender', 'Species'); +allPed <- read.table(opts$inputFile, quote="\"") +colnames(allPed)<-c('Id', 'Dam', 'Sire', 'Gender', 'Species') -is.na(allPed$Id)<-which(allPed$Id=="") -is.na(allPed$Dam)<-which(allPed$Dam=="") -is.na(allPed$Sire)<-which(allPed$Sire=="") -is.na(allPed$Gender)<-which(allPed$Gender=="") +allPed$Id[allPed$Id == ""] <- NA +allPed$Dam[allPed$Dam == ""] <- NA +allPed$Sire[allPed$Sire == ""] <- NA +allPed$Gender[allPed$Gender == "" | is.na(allPed$Gender)] <- 3 # 3 = unknown allPed$Species <- as.character(allPed$Species) allPed$Species[is.na(allPed$Species)] <- c('Unknown') allPed$Species <- as.factor(allPed$Species) -# In order to reduce the max matrix size, calculate famids using makefamid, then analyze each group separately -# It resizes the biggest matrix from 12000^2 to 8200^2 thus reduces the memory used by half -newRecords=NULL +if (any(allPed$Species == 'Unknown')) { + print(paste0('There are ', sum(allPed$Species == 'Unknown'), ' Ids with species = Unknown')) +} + +newRecords <- NULL for (species in unique(allPed$Species)){ - print(paste0('processing species: ', species)) - allRecordsForSpecies <- allPed[allPed$Species == species,] + allRecordsForSpecies <- allPed[allPed$Species %in% species,] + print(paste0('Processing species: ', species, ', with ', nrow(allRecordsForSpecies), ' IDs')) + if (nrow(allRecordsForSpecies) == 1) { + print('single record, skipping') + next + } # Add missing parents for accurate kinship calculations fixedRecords <- with(allRecordsForSpecies, fixParents(id = Id, dadid = Sire, momid = Dam, sex = Gender)) # Kinship is expecting records to be sorted IAW it's own pedigree function - recordsForSpecies <- with(fixedRecords, pedigree(id=id,dadid=dadid,momid=momid,sex=sex,missid=0)) + recordsForSpecies <- with(fixedRecords, pedigree(id = id, dadid = dadid, momid = momid, sex = sex, missid = 0)) - temp.kin=kinship(recordsForSpecies) + temp.kin <- kinship(recordsForSpecies) # Add rownames to make matrix symmetric, which is required downstream rownames(temp.kin) <- colnames(temp.kin) # Convert kinship matrix to a triplet list of two ids and their coefficient - summaryDf = as.data.frame(summary(as(temp.kin, "dgCMatrix"))) + summaryDf <- as.data.frame(summary(as(temp.kin, "dgCMatrix"))) idList <- rownames(temp.kin) - temp.tri = data.frame(Id=idList[summaryDf$i], Id2=idList[summaryDf$j], coefficient=summaryDf$x) + temp.tri <- data.frame(Id=idList[summaryDf$i], Id2=idList[summaryDf$j], coefficient=summaryDf$x) # Now filter out parents added for kinship calculation temp.tri <- dplyr::filter(temp.tri, grepl("^(?!addin).*$", Id, perl = TRUE)) temp.tri <- dplyr::filter(temp.tri, grepl("^(?!addin).*$", Id2, perl = TRUE)) + temp.tri <- merge(temp.tri, allRecordsForSpecies[c('Id', 'Species')], by = 'Id', all.x = TRUE) - newRecords=rbind(newRecords,temp.tri) - print(paste0('total subjects: ', nrow(allRecordsForSpecies))) + newRecords <- dplyr::bind_rows(newRecords,temp.tri) } # write TSV to disk -print("Output table:"); -print(str(newRecords)); -write.table(newRecords, file = "kinship.txt", append = FALSE,row.names=F,quote=F,sep="\t"); \ No newline at end of file +write.table(newRecords, file = "kinship.txt", append = FALSE, row.names = FALSE, quote = FALSE, sep = '\t') \ No newline at end of file diff --git a/ehr/resources/web/ehr/panel/GeneticCalculationSettingsPanel.js b/ehr/resources/web/ehr/panel/GeneticCalculationSettingsPanel.js index 0474ed15b..b5e217dd1 100644 --- a/ehr/resources/web/ehr/panel/GeneticCalculationSettingsPanel.js +++ b/ehr/resources/web/ehr/panel/GeneticCalculationSettingsPanel.js @@ -24,11 +24,15 @@ Ext4.define('EHR.panel.GeneticCalculationSettingsPanel', { xtype: 'checkbox', fieldLabel: 'Is Enabled?', itemId: 'enabled' + },{ + xtype: 'checkbox', + fieldLabel: 'Allow Import During Business Hours?', + itemId: 'allowImportDuringBusinessHours' },{ xtype: 'checkbox', fieldLabel: 'Kinship validation?', itemId: 'kinshipValidation', - listeners : { + listeners: { render: function(c) { Ext4.create('Ext.tip.ToolTip', { target: c.getEl(), @@ -94,6 +98,7 @@ Ext4.define('EHR.panel.GeneticCalculationSettingsPanel', { this.down('#hourOfDay').setValue(results.hourOfDay); this.down('#containerPath').setValue(results.containerPath); this.down('#kinshipValidation').setValue(results.kinshipValidation); + this.down('#allowImportDuringBusinessHours').setValue(results.allowImportDuringBusinessHours) }, saveData: function(){ @@ -104,7 +109,8 @@ Ext4.define('EHR.panel.GeneticCalculationSettingsPanel', { containerPath: this.down('#containerPath').getValue(), enabled: this.down('#enabled').getValue(), hourOfDay: this.down('#hourOfDay').getValue(), - kinshipValidation: this.down('#kinshipValidation').getValue() + kinshipValidation: this.down('#kinshipValidation').getValue(), + allowImportDuringBusinessHours: this.down('#allowImportDuringBusinessHours').getValue() }, method : 'POST', scope: this, diff --git a/ehr/src/org/labkey/ehr/EHRController.java b/ehr/src/org/labkey/ehr/EHRController.java index 15b665ed9..2de880002 100644 --- a/ehr/src/org/labkey/ehr/EHRController.java +++ b/ehr/src/org/labkey/ehr/EHRController.java @@ -83,6 +83,7 @@ import org.labkey.api.settings.AppProps; import org.labkey.api.study.DatasetTable; import org.labkey.api.util.ExceptionUtil; +import org.labkey.api.util.HtmlString; import org.labkey.api.util.HtmlStringBuilder; import org.labkey.api.util.PageFlowUtil; import org.labkey.api.util.Path; @@ -639,7 +640,7 @@ public ApiResponse execute(ScheduleGeneticCalculationForm form, BindException er errors.reject(ERROR_MSG, "Unable to find container for path: " + form.getContainerPath()); return null; } - GeneticCalculationsJob.setProperties(form.isEnabled(), c, form.getHourOfDay(), form.isKinshipValidation()); + GeneticCalculationsJob.setProperties(form.isEnabled(), c, form.getHourOfDay(), form.isKinshipValidation(), form.isAllowImportDuringBusinessHours()); return new ApiSimpleResponse("success", true); } @@ -759,6 +760,7 @@ public static class ScheduleGeneticCalculationForm private int hourOfDay; private boolean _kinshipValidation; + private boolean _allowImportDuringBusinessHours; public boolean isEnabled() { @@ -799,6 +801,16 @@ public void setKinshipValidation(boolean kinshipValidation) { _kinshipValidation = kinshipValidation; } + + public boolean isAllowImportDuringBusinessHours() + { + return _allowImportDuringBusinessHours; + } + + public void setAllowImportDuringBusinessHours(boolean allowImportDuringBusinessHours) + { + _allowImportDuringBusinessHours = allowImportDuringBusinessHours; + } } @RequiresPermission(AdminPermission.class) @@ -817,6 +829,7 @@ public ApiResponse execute(ScheduleGeneticCalculationForm form, BindException er ret.put("enabled", GeneticCalculationsJob.isEnabled()); ret.put("hourOfDay", GeneticCalculationsJob.getHourOfDay()); ret.put("kinshipValidation", GeneticCalculationsJob.isKinshipValidation()); + ret.put("allowImportDuringBusinessHours", GeneticCalculationsJob.isAllowImportDuringBusinessHours()); return new ApiSimpleResponse(ret); } @@ -1250,7 +1263,7 @@ public void validateCommand(Object form, Errors errors) @Override public ModelAndView getConfirmView(Object form, BindException errors) { - return new HtmlView("This will cause the system to recalculate kinship and inbreeding coefficients on the colony. Do you want to continue?"); + return new HtmlView(HtmlString.of("This will cause the system to recalculate kinship and inbreeding coefficients on the colony. Do you want to continue?")); } @Override diff --git a/ehr/src/org/labkey/ehr/EHRServiceImpl.java b/ehr/src/org/labkey/ehr/EHRServiceImpl.java index 039a34b10..0b9baed00 100644 --- a/ehr/src/org/labkey/ehr/EHRServiceImpl.java +++ b/ehr/src/org/labkey/ehr/EHRServiceImpl.java @@ -50,6 +50,7 @@ import org.labkey.api.module.ModuleLoader; import org.labkey.api.module.ModuleProperty; import org.labkey.api.pipeline.PipeRoot; +import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.pipeline.PipelineService; import org.labkey.api.query.BatchValidationException; import org.labkey.api.query.DetailsURL; @@ -78,10 +79,12 @@ import org.labkey.ehr.history.DefaultObservationsDataSource; import org.labkey.ehr.history.DefaultPregnanciesDataSource; import org.labkey.ehr.history.LabworkManager; +import org.labkey.ehr.pipeline.GeneticCalculationsImportTask; import org.labkey.ehr.security.EHRSecurityManager; import org.labkey.ehr.table.DefaultEHRCustomizer; import org.labkey.ehr.table.SNOMEDCodesDisplayColumn; +import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; @@ -1061,4 +1064,10 @@ public Map> getLabWorkOverrides() { return _labWorkOverrides; } + + @Override + public void standaloneProcessKinshipAndInbreeding(Container c, User u, File pipelineDir, Logger log) throws PipelineJobException + { + GeneticCalculationsImportTask.standaloneProcessKinshipAndInbreeding(c, u, pipelineDir, log); + } } diff --git a/ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsImportTask.java b/ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsImportTask.java index 28d277270..217ee28b3 100644 --- a/ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsImportTask.java +++ b/ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsImportTask.java @@ -16,6 +16,7 @@ package org.labkey.ehr.pipeline; import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.labkey.api.collections.CaseInsensitiveHashMap; @@ -50,6 +51,7 @@ import org.labkey.api.query.QueryUpdateServiceException; import org.labkey.api.query.UserSchema; import org.labkey.api.query.ValidationException; +import org.labkey.api.reader.Readers; import org.labkey.api.security.User; import org.labkey.api.util.FileType; import org.labkey.api.util.PageFlowUtil; @@ -116,7 +118,7 @@ public List getProtocolActionNames() } @Override - public PipelineJob.Task createTask(PipelineJob job) + public PipelineJob.Task createTask(PipelineJob job) { GeneticCalculationsImportTask task = new GeneticCalculationsImportTask(this, job); setJoin(false); @@ -145,34 +147,42 @@ public RecordedActionSet run() throws PipelineJobException } else { - processInbreeding(); - processKinship(); + PipelineJob job = getJob(); + FileAnalysisJobSupport support = (FileAnalysisJobSupport) job; + + processInbreeding(job.getContainer(), job.getUser(), support.getAnalysisDirectoryPath().toFile(), job.getLogger()); + processKinship(job.getContainer(), job.getUser(), support.getAnalysisDirectoryPath().toFile(), job.getLogger()); if (GeneticCalculationsJob.isKinshipValidation()) + { validateKinship(); + } } return new RecordedActionSet(actions); } - private void processKinship() throws PipelineJobException + public static void standaloneProcessKinshipAndInbreeding(Container c, User u, File pipelineDir, Logger log) throws PipelineJobException { - PipelineJob job = getJob(); - FileAnalysisJobSupport support = (FileAnalysisJobSupport) job; + processInbreeding(c, u, pipelineDir, log); + processKinship(c, u, pipelineDir, log); + } - File output = new File(support.getAnalysisDirectory(), KINSHIP_FILE); + private static void processKinship(Container c, User u, File pipelineDir, Logger log) throws PipelineJobException + { + File output = new File(pipelineDir, KINSHIP_FILE); if (!output.exists()) throw new PipelineJobException("Unable to find file: " + output.getPath()); DbSchema ehrSchema = EHRSchema.getInstance().getSchema(); TableInfo kinshipTable = ehrSchema.getTable("kinship"); - getJob().getLogger().info("Inspecting file length: " + output.getPath()); + log.info("Inspecting file length: " + output.getPath()); try { try (DbScope.Transaction transaction = ExperimentService.get().ensureTransaction(); - LineNumberReader lnr = new LineNumberReader(new BufferedReader(new FileReader(output)))) + LineNumberReader lnr = new LineNumberReader(Readers.getReader(output))) { while (lnr.readLine() != null) { @@ -180,19 +190,19 @@ private void processKinship() throws PipelineJobException break; } int lineNumber = lnr.getLineNumber(); - lnr.close(); - if (lineNumber < 3) + { throw new PipelineJobException("Too few lines found in output. Line count was: " + lineNumber); + } //delete all previous records - getJob().getLogger().info("Deleting existing rows"); - Table.delete(kinshipTable, new SimpleFilter(FieldKey.fromString("container"), getJob().getContainerId(), CompareType.EQUAL)); + log.info("Deleting existing rows"); + Table.delete(kinshipTable, new SimpleFilter(FieldKey.fromString("container"), c.getId(), CompareType.EQUAL)); //NOTE: this process creates and deletes a ton of rows each day. the rowId can balloon very quickly, so we reset it here SqlSelector ss = new SqlSelector(kinshipTable.getSchema(), new SQLFragment("SELECT max(rowid) as expt FROM " + kinshipTable.getSelectName())); List ret = ss.getArrayList(Long.class); - Integer maxVal; + int maxVal; if (ret.isEmpty()) { maxVal = 0; @@ -230,9 +240,9 @@ else if (kinshipTable.getSqlDialect().isPostgreSQL()) } try (DbScope.Transaction transaction = ExperimentService.get().ensureTransaction(); - BufferedReader reader = new BufferedReader(new FileReader(output))) + BufferedReader reader = Readers.getReader(output)) { - getJob().getLogger().info("Inserting rows"); + log.info("Inserting rows"); String line = null; int lineNum = 0; while ((line = reader.readLine()) != null) @@ -246,7 +256,7 @@ else if (kinshipTable.getSqlDialect().isPostgreSQL()) if (fields[0].equalsIgnoreCase(fields[1])) continue; //dont import self-kinship - Map row = new HashMap(); + Map row = new HashMap<>(); assert fields[0].length() < 80 : "Field Id value too long: [" + fields[0] + ']'; assert fields[1].length() < 80 : "Field Id2 value too long: [" + fields[1] + "]"; @@ -261,19 +271,19 @@ else if (kinshipTable.getSqlDialect().isPostgreSQL()) throw new PipelineJobException("Invalid kinship coefficient on line " + (lineNum + 1) + " for IDs " + fields[0] + " and " + fields[1] + ": " + fields[2], e); } - row.put("container", job.getContainer().getId()); + row.put("container", c.getId()); row.put("created", new Date()); - row.put("createdby", job.getUser().getUserId()); - Table.insert(job.getUser(), kinshipTable, row); + row.put("createdby", u.getUserId()); + Table.insert(u, kinshipTable, row); lineNum++; if (lineNum % 100000 == 0) { - getJob().getLogger().info("processed " + lineNum + " rows"); + log.info("processed " + lineNum + " rows"); } } - job.getLogger().info("Inserted " + lineNum + " rows into ehr.kinship"); + log.info("Inserted " + lineNum + " rows into ehr.kinship"); transaction.commit(); } } @@ -431,7 +441,7 @@ private void validateSetOfRelations(TableInfo kinshipTable, Map familyMembers) { - PipelineJob job = getJob(); - TableSelector familyTs = new TableSelector(familyTable, new LinkedHashSet(familyMembers)); + TableSelector familyTs = new TableSelector(familyTable, new LinkedHashSet<>(familyMembers)); Map> relations = new HashMap<>(); familyTs.forEach(rs -> { @@ -564,7 +572,7 @@ private boolean validateKinship() return true; } - private TableInfo getRealTable(TableInfo ti) + private static TableInfo getRealTable(TableInfo ti) { Domain domain = ti.getDomain(); if (domain != null) @@ -575,48 +583,44 @@ private TableInfo getRealTable(TableInfo ti) return null; } - private void processInbreeding() throws PipelineJobException + private static void processInbreeding(Container c, User u, File pipelineDir, Logger log) throws PipelineJobException { - PipelineJob job = getJob(); - FileAnalysisJobSupport support = (FileAnalysisJobSupport) job; - - File output = new File(support.getAnalysisDirectory(), INBREEDING_FILE); + File output = new File(pipelineDir, INBREEDING_FILE); if (!output.exists()) throw new PipelineJobException("Unable to find file: " + output.getPath()); - UserSchema us = QueryService.get().getUserSchema(job.getUser(), job.getContainer(), "study"); + UserSchema us = QueryService.get().getUserSchema(u, c, "study"); TableInfo ti = us.getTable("Inbreeding Coefficients"); if (ti == null) { - getJob().getLogger().warn("Unable to find table study.inbreeding coefficients"); + log.warn("Unable to find table study.inbreeding coefficients"); return; } QueryUpdateService qus = ti.getUpdateService(); qus.setBulkLoad(true); - LineNumberReader lnr = null; - BufferedReader reader = null; - - try + try (BufferedReader reader = Readers.getReader(output)) { try (DbScope.Transaction transaction = ExperimentService.get().ensureTransaction()) { - getJob().getLogger().info("Inspecting file length: " + output.getPath()); - lnr = new LineNumberReader(new BufferedReader(new FileReader(output))); - while (lnr.readLine() != null) + log.info("Inspecting file length: " + output.getPath()); + try (LineNumberReader lnr = new LineNumberReader(Readers.getReader(output))) { - if (lnr.getLineNumber() > 3) - break; + while (lnr.readLine() != null) + { + if (lnr.getLineNumber() > 3) + break; + } + int lineNumber = lnr.getLineNumber(); + if (lineNumber < 3) + { + throw new PipelineJobException("Too few lines found in inbreeding output. Line count was: " + lineNumber); + } } - int lineNumber = lnr.getLineNumber(); - lnr.close(); - - if (lineNumber < 3) - throw new PipelineJobException("Too few lines found in inbreeding output. Line count was: " + lineNumber); //delete all previous records - getJob().getLogger().info("Deleting existing rows"); + log.info("Deleting existing rows"); TableInfo realTable = getRealTable(ti); if (realTable == null) { @@ -628,14 +632,12 @@ private void processInbreeding() throws PipelineJobException transaction.commit(); } - reader = new BufferedReader(new FileReader(output)); - String line; int lineNum = 0; List> rows = new ArrayList<>(); Date date = new Date(); - getJob().getLogger().info("Reading file"); + log.info("Reading file"); while ((line = reader.readLine()) != null){ String[] fields = line.split("\t"); if (fields.length < 2) @@ -643,11 +645,11 @@ private void processInbreeding() throws PipelineJobException if ("coefficient".equalsIgnoreCase(fields[1])) continue; //skip header - Map row = new CaseInsensitiveHashMap(); + Map row = new CaseInsensitiveHashMap<>(); String subjectId = StringUtils.trimToNull(fields[0]); if (subjectId == null) { - getJob().getLogger().error("Missing subjectId on row " + lineNum); + log.error("Missing subjectId on row " + lineNum); continue; } @@ -660,22 +662,22 @@ private void processInbreeding() throws PipelineJobException lineNum++; } - getJob().getLogger().info("Inserting rows"); + log.info("Inserting rows"); BatchValidationException errors = new BatchValidationException(); Map options = new HashMap<>(); - options.put(QueryUpdateService.ConfigParameters.Logger, getJob().getLogger()); + options.put(QueryUpdateService.ConfigParameters.Logger, log); try (DbScope.Transaction transaction = ExperimentService.get().ensureTransaction()) { - qus.insertRows(getJob().getUser(), getJob().getContainer(), rows, errors, options, new HashMap()); + qus.insertRows(u, c, rows, errors, options, new HashMap<>()); if (errors.hasErrors()) throw errors; transaction.commit(); } - job.getLogger().info("Inserted " + lineNum + " rows into inbreeding coefficients table"); + log.info("Inserted " + lineNum + " rows into inbreeding coefficients table"); } catch (DuplicateKeyException | SQLException | IOException | QueryUpdateServiceException e) @@ -684,21 +686,13 @@ private void processInbreeding() throws PipelineJobException } catch (BatchValidationException e) { - getJob().getLogger().info("error inserting rows"); + log.info("error inserting rows"); for (ValidationException ve : e.getRowErrors()) { - getJob().getLogger().info(ve.getMessage()); + log.info(ve.getMessage()); } throw new PipelineJobException(e); } - finally - { - if (lnr != null) - try{lnr.close();}catch (Exception ignored){} - - if (reader != null) - try{reader.close();}catch (Exception ignored){} - } } } diff --git a/ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsInitTask.java b/ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsInitTask.java index 14cb4948f..fb6482853 100644 --- a/ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsInitTask.java +++ b/ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsInitTask.java @@ -33,6 +33,7 @@ import org.labkey.api.query.UserSchema; import org.labkey.api.util.FileType; import org.labkey.api.util.PageFlowUtil; +import org.labkey.api.writer.PrintWriters; import org.springframework.jdbc.BadSqlGrammarException; import java.io.File; @@ -84,7 +85,7 @@ public List getProtocolActionNames() } @Override - public PipelineJob.Task createTask(PipelineJob job) + public PipelineJob.Task createTask(PipelineJob job) { GeneticCalculationsInitTask task = new GeneticCalculationsInitTask(this, job); setJoin(false); @@ -138,7 +139,7 @@ public RecordedActionSet run() throws PipelineJobException File outputFile = new File(support.getAnalysisDirectory(), GeneticCalculationsImportTask.PEDIGREE_FILE); - try (CSVWriter writer = new CSVWriter(new OutputStreamWriter(new FileOutputStream(outputFile)), '\t', CSVWriter.DEFAULT_QUOTE_CHARACTER)) + try (CSVWriter writer = new CSVWriter(PrintWriters.getPrintWriter(outputFile), '\t', CSVWriter.DEFAULT_QUOTE_CHARACTER)) { long count = ts.getRowCount(); if (count > 0) diff --git a/ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsJob.java b/ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsJob.java index f6bb04170..3b486dee0 100644 --- a/ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsJob.java +++ b/ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsJob.java @@ -122,6 +122,16 @@ public static boolean isKinshipValidation() return false; } + public static boolean isAllowImportDuringBusinessHours() + { + Map saved = PropertyManager.getProperties(GENETICCALCULATIONS_PROPERTY_DOMAIN); + + if (saved.containsKey("allowImportDuringBusinessHours")) + return Boolean.parseBoolean(saved.get("allowImportDuringBusinessHours")); + else + return false; + } + public static boolean isEnabled() { Map saved = PropertyManager.getProperties(GENETICCALCULATIONS_PROPERTY_DOMAIN); @@ -152,13 +162,14 @@ public static Integer getHourOfDay() return null; } - public static void setProperties(Boolean isEnabled, Container c, Integer hourOfDay, Boolean isKinshipValidation) + public static void setProperties(Boolean isEnabled, Container c, Integer hourOfDay, Boolean isKinshipValidation, Boolean allowImportDuringBusinessHours) { PropertyManager.PropertyMap props = PropertyManager.getWritableProperties(GENETICCALCULATIONS_PROPERTY_DOMAIN, true); props.put("enabled", isEnabled.toString()); props.put("container", c.getId()); props.put("hourOfDay", hourOfDay.toString()); props.put("kinshipValidation", isKinshipValidation.toString()); + props.put("allowImportDuringBusinessHours", allowImportDuringBusinessHours.toString()); props.save(); //unschedule in case settings have changed @@ -182,7 +193,7 @@ public void execute(JobExecutionContext context) throws JobExecutionException try { _log.info("Running Scheduled Genetic Calculations Job"); - new GeneticCalculationsRunnable().run(c, false); + new GeneticCalculationsRunnable().run(c, isAllowImportDuringBusinessHours()); } catch (PipelineJobException e) { diff --git a/ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsRTask.java b/ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsRTask.java index 7a075a391..59a0e8b86 100644 --- a/ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsRTask.java +++ b/ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsRTask.java @@ -17,6 +17,7 @@ import org.apache.commons.lang3.StringUtils; import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; import org.labkey.api.module.Module; import org.labkey.api.module.ModuleLoader; import org.labkey.api.pipeline.AbstractTaskFactory; @@ -82,7 +83,7 @@ public List getProtocolActionNames() } @Override - public PipelineJob.Task createTask(PipelineJob job) + public PipelineJob.Task createTask(PipelineJob job) { GeneticCalculationsRTask task = new GeneticCalculationsRTask(this, job); setJoin(false); diff --git a/ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsRunnable.java b/ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsRunnable.java index 4d6f0bf5b..738310145 100644 --- a/ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsRunnable.java +++ b/ehr/src/org/labkey/ehr/pipeline/GeneticCalculationsRunnable.java @@ -31,6 +31,7 @@ import org.labkey.api.pipeline.file.FileAnalysisTaskPipeline; import org.labkey.api.security.User; import org.labkey.api.util.ConfigurationException; +import org.labkey.api.util.StringUtilsLabKey; import org.labkey.api.util.logging.LogHelper; import org.labkey.api.view.ActionURL; import org.labkey.api.view.ViewBackgroundInfo; @@ -73,21 +74,21 @@ private void startCalculation(User u, Container c, boolean allowRunningDuringDay { String taskIdString = FileAnalysisTaskPipeline.class.getName() + ":" + KINSHIP_PIPELINE_NAME; TaskId taskId = new TaskId(taskIdString); - TaskPipeline taskPipeline = PipelineJobService.get().getTaskPipeline(taskId); + TaskPipeline taskPipeline = PipelineJobService.get().getTaskPipeline(taskId); if (taskPipeline == null) throw new PipelineJobException("Unable to find kinship pipeline: " + taskId); AbstractFileAnalysisProvider provider = (AbstractFileAnalysisProvider) PipelineService.get().getPipelineProvider("File Analysis"); - AbstractFileAnalysisProtocolFactory factory = provider.getProtocolFactory(taskPipeline); + AbstractFileAnalysisProtocolFactory factory = provider.getProtocolFactory(taskPipeline); ViewBackgroundInfo bg = new ViewBackgroundInfo(c, u, new ActionURL()); PipeRoot root = PipelineService.get().getPipelineRootSetting(c); String protocolName = "EHR Kinship Calculation"; String xml = "\n" + "\n" + - (allowRunningDuringDay ? "\ttrue" : "") + + "\t" + allowRunningDuringDay + "" + ""; - AbstractFileAnalysisProtocol protocol = factory.createProtocolInstance(protocolName, "", xml); + AbstractFileAnalysisProtocol protocol = factory.createProtocolInstance(protocolName, "", xml); if (protocol == null) { return; @@ -111,7 +112,7 @@ private void startCalculation(User u, Container c, boolean allowRunningDuringDay defaultXml.getParentFile().mkdirs(); defaultXml.createNewFile(); - try (FileWriter w = new FileWriter(defaultXml)) + try (FileWriter w = new FileWriter(defaultXml, StringUtilsLabKey.DEFAULT_CHARSET)) { w.write(xml); } @@ -135,11 +136,7 @@ private void startCalculation(User u, Container c, boolean allowRunningDuringDay { throw new ConfigurationException("The EHR kinship pipeline has not been configured on this server", e); } - catch (IOException e) - { - throw new PipelineJobException(e); - } - catch (PipelineValidationException e) + catch (IOException | PipelineValidationException e) { throw new PipelineJobException(e); }