From 0e87d99bf7a779c32cb614f72400fecb047cc9f4 Mon Sep 17 00:00:00 2001
From: Anna Schroder <t-aschroder@microsoft.com>
Date: Thu, 16 Jun 2022 16:38:03 +0000
Subject: [PATCH 1/3] initial structure for uncertainty metric

---
 InnerEye/ML/run_ml.py                |  5 +-
 InnerEye/ML/uncertainty_aggregate.py | 82 ++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 1 deletion(-)
 create mode 100644 InnerEye/ML/uncertainty_aggregate.py

diff --git a/InnerEye/ML/run_ml.py b/InnerEye/ML/run_ml.py
index 70a9a396d..bf9e2aa88 100644
--- a/InnerEye/ML/run_ml.py
+++ b/InnerEye/ML/run_ml.py
@@ -346,7 +346,10 @@ def run(self) -> None:
                 # We specify the ModelProcessing as DEFAULT here even if the run_recovery points to an ensemble run,
                 # because the current run is a single one. See the documentation of ModelProcessing for more details.
                 self.run_inference(checkpoint_paths_for_testing, ModelProcessing.DEFAULT)
-
+                
+                #if self.model_config.train_uncertainty_aggregate:
+                    # run function to train
+                
                 if self.container.generate_report:
                     self.generate_report(ModelProcessing.DEFAULT)
 
diff --git a/InnerEye/ML/uncertainty_aggregate.py b/InnerEye/ML/uncertainty_aggregate.py
new file mode 100644
index 000000000..1004f339f
--- /dev/null
+++ b/InnerEye/ML/uncertainty_aggregate.py
@@ -0,0 +1,82 @@
+import nibabel as nib
+import numpy as np
+from sklearn.ensemble import RandomForestRegressor 
+import pandas as pd
+from radiomics import featureextractor
+from pathlib import Path
+#from InnerEye.ML.utils.split_dataset import DatasetSplits
+from sklearn.model_selection import GroupKFold
+from sklearn.utils import shuffle
+from sklearn import metrics
+import scipy
+import matplotlib.pyplot as plt
+
+import SimpleITK as sitk
+
+# think about what the inputs will be:
+# output trained random forest model 
+
+
+class uncertainty_aggregate():
+
+    def __init__():
+
+
+    def trainRandomForest(x,y):
+        # requires dataset: subjects, groups
+        mdl = RandomForestRegressor(n_estimators=10,criterion='mse',max_depth=None)
+        mdl.fit(x, y)
+        return mdl
+
+    '''
+    def splitKfolds():
+        # maybe we don't need this... 
+        # we only need this if we want to report a value on how good the random forest is at predicting the dice score
+        # i.e. how trustworthy is our uncertainty metric
+        subject_ids = features['subjects'].values
+        groups =  features['groupID'].values
+        k_folds = GroupKFold(n_splits=5)
+        return k_folds.split(subject_ids, groups=groups)
+    '''
+
+    def UncertaintyMapFeatures():
+        '''extract PyRadiomics features from the uncertainty map'''
+        for subject in subjects:
+            # extract pyradiomics features
+
+        # remove unused features
+        return features
+
+    def load_uncertainty_from_nifti(fname):
+        img = sitk.ReadImage(fname)
+        return sitk.GetArrayFromImage(img)
+
+    def average_dice_across_structures(subjects,metrics):
+        avgDice=[]
+        for subj in subjects:
+            avgDice.append(np.mean(metrics.Dice[metrics.Patient==subj]))
+        return avgDice
+
+    def run_and_save():
+        # iterate through uncertainty maps
+        outputdir = 
+        metrics = 
+
+        dice=[]
+        for subject in subjects:
+            uncertainty = load_uncertainty_from_nifti(subject)
+            features = UncertaintyMapFeatures(testSet)
+            dice.append(average_dice_across_structures())
+
+        # cross-validation to estimate model performance
+        
+        # train model with all data
+        mdl = trainRandomForest(features,dice)
+
+        # save model to disk - save in outputs file? Where will it be seen by the inference model?
+        fname = 'uncertainty_metric_model'
+        pickle.dump(mdl, open(fname, 'wb'))
+
+        # could run cross-validation and save some metrics in the outputs file to report how well the model is about the predict dice
+
+        return 
\ No newline at end of file

From 0290cd1d353c82841f76fc9407823b7fe68e38bb Mon Sep 17 00:00:00 2001
From: Anna Schroder <t-aschroder@microsoft.com>
Date: Fri, 17 Jun 2022 17:34:43 +0000
Subject: [PATCH 2/3] added optional parameter to config.py determine whether
 or not to train the uncertainty aggrgation metric. Progress on
 'uncertainty_aggregate.py' functions

---
 InnerEye/ML/config.py                |   6 +
 InnerEye/ML/run_ml.py                |   9 +-
 InnerEye/ML/uncertainty_aggregate.py | 170 +++++++++++++++++----------
 3 files changed, 119 insertions(+), 66 deletions(-)

diff --git a/InnerEye/ML/config.py b/InnerEye/ML/config.py
index ff709da4d..aed505491 100644
--- a/InnerEye/ML/config.py
+++ b/InnerEye/ML/config.py
@@ -495,6 +495,12 @@ class SegmentationModelBase(ModelConfigBase):
             "some test patient data with missing ground truth data is allowed and will be reflected in the patient "
             "counts in the metrics and report.")
 
+    #: If true, train a model to predict the segmentation dice score. This can be used to identify failure cases for review
+    train_uncertainty_aggregate: bool = param.Boolean(
+        False, 
+        doc="If true, train a model to predict the segmentation dice score. This can be used to identify failure cases "
+            "for review")
+
     def __init__(self, center_size: Optional[TupleInt3] = None,
                  inference_stride_size: Optional[TupleInt3] = None,
                  min_l_rate: float = 0,
diff --git a/InnerEye/ML/run_ml.py b/InnerEye/ML/run_ml.py
index bf9e2aa88..ab4866c87 100644
--- a/InnerEye/ML/run_ml.py
+++ b/InnerEye/ML/run_ml.py
@@ -49,6 +49,7 @@
 from InnerEye.ML.model_config_base import ModelConfigBase
 from InnerEye.ML.model_inference_config import ModelInferenceConfig
 from InnerEye.ML.model_testing import model_test
+from InnerEye.ML.uncertainty_aggregate import UncertaintyAggregate
 from InnerEye.ML.model_training import create_lightning_trainer, model_train
 from InnerEye.ML.reports.notebook_report import generate_classification_crossval_notebook, \
     generate_classification_multilabel_notebook, generate_classification_notebook, generate_segmentation_notebook, \
@@ -347,9 +348,6 @@ def run(self) -> None:
                 # because the current run is a single one. See the documentation of ModelProcessing for more details.
                 self.run_inference(checkpoint_paths_for_testing, ModelProcessing.DEFAULT)
                 
-                #if self.model_config.train_uncertainty_aggregate:
-                    # run function to train
-                
                 if self.container.generate_report:
                     self.generate_report(ModelProcessing.DEFAULT)
 
@@ -361,6 +359,11 @@ def run(self) -> None:
                     if should_wait_for_other_child_runs:
                         self.wait_for_runs_to_finish()
                         self.create_ensemble_model_and_run_inference()
+
+                if self.model_config.train_uncertainty_aggregate:
+                    # run function to train
+                    UncertaintyAggregate(self.model_config)
+
             else:
                 # Inference for all models that are specified via LightningContainers.
                 with logging_section("Model inference"):
diff --git a/InnerEye/ML/uncertainty_aggregate.py b/InnerEye/ML/uncertainty_aggregate.py
index 1004f339f..a795dc40b 100644
--- a/InnerEye/ML/uncertainty_aggregate.py
+++ b/InnerEye/ML/uncertainty_aggregate.py
@@ -12,71 +12,115 @@
 import matplotlib.pyplot as plt
 
 import SimpleITK as sitk
+from InnerEye.Common.common_util import BEST_EPOCH_FOLDER_NAME, METRICS_AGGREGATES_FILE, ModelProcessing, \
+    SUBJECT_METRICS_FILE_NAME, get_best_epoch_results_path, is_linux, logging_section
+from InnerEye.ML.common import ModelExecutionMode
 
 # think about what the inputs will be:
 # output trained random forest model 
 
 
-class uncertainty_aggregate():
-
-    def __init__():
-
-
-    def trainRandomForest(x,y):
-        # requires dataset: subjects, groups
-        mdl = RandomForestRegressor(n_estimators=10,criterion='mse',max_depth=None)
-        mdl.fit(x, y)
-        return mdl
-
-    '''
-    def splitKfolds():
-        # maybe we don't need this... 
-        # we only need this if we want to report a value on how good the random forest is at predicting the dice score
-        # i.e. how trustworthy is our uncertainty metric
-        subject_ids = features['subjects'].values
-        groups =  features['groupID'].values
-        k_folds = GroupKFold(n_splits=5)
-        return k_folds.split(subject_ids, groups=groups)
-    '''
-
-    def UncertaintyMapFeatures():
-        '''extract PyRadiomics features from the uncertainty map'''
-        for subject in subjects:
-            # extract pyradiomics features
-
-        # remove unused features
-        return features
-
-    def load_uncertainty_from_nifti(fname):
-        img = sitk.ReadImage(fname)
-        return sitk.GetArrayFromImage(img)
-
-    def average_dice_across_structures(subjects,metrics):
-        avgDice=[]
-        for subj in subjects:
-            avgDice.append(np.mean(metrics.Dice[metrics.Patient==subj]))
-        return avgDice
-
-    def run_and_save():
-        # iterate through uncertainty maps
-        outputdir = 
-        metrics = 
-
-        dice=[]
-        for subject in subjects:
-            uncertainty = load_uncertainty_from_nifti(subject)
-            features = UncertaintyMapFeatures(testSet)
-            dice.append(average_dice_across_structures())
-
-        # cross-validation to estimate model performance
-        
-        # train model with all data
-        mdl = trainRandomForest(features,dice)
-
-        # save model to disk - save in outputs file? Where will it be seen by the inference model?
-        fname = 'uncertainty_metric_model'
-        pickle.dump(mdl, open(fname, 'wb'))
-
-        # could run cross-validation and save some metrics in the outputs file to report how well the model is about the predict dice
-
-        return 
\ No newline at end of file
+# possibility to add extra functionality to simulate poor segmentations 
+'''
+class UncertaintyAggregate():
+
+    def __init__(self,config):
+        self.outputs_folder = config.outputs_folder
+'''
+
+def trainRandomForest(x,y):
+    # requires dataset: subjects, groups
+    mdl = RandomForestRegressor(n_estimators=10,criterion='mse',max_depth=None)
+    mdl.fit(x, y)
+    return mdl
+
+'''
+def splitKfolds():
+    # maybe we don't need this... 
+    # we only need this if we want to report a value on how good the random forest is at predicting the dice score
+    # i.e. how trustworthy is our uncertainty metric
+    subject_ids = features['subjects'].values
+    groups =  features['groupID'].values
+    k_folds = GroupKFold(n_splits=5)
+    return k_folds.split(subject_ids, groups=groups)
+'''
+
+def UncertaintyMapFeatures(self,subjects):
+    '''extract PyRadiomics features from the uncertainty map'''
+
+    for i,subject in enumerate(subjects):
+        subject_dir = self.outputs_folder / f"{int(patient_id):03d}"
+        extractor = featureextractor.RadiomicsFeatureExtractor(imageType='Original',binWidth=25)
+        result=pd.Series(extractor.execute(
+            str(subject_dir / 'uncertainty.nii.gz'), 
+            str(subject_dir / 'segmentation.nii.gz')))
+        featureVector=self.df[subj_i]
+        featureVector=featureVector.append(result)
+        featureVector.name=subj
+
+        features = features.join(featureVector, how='outer')
+
+    # remove unused features
+    return features
+
+def PostProcessFeatures(features):
+    features = features.T
+
+    features=features.drop(['diagnostics_Versions_PyRadiomics',
+    'diagnostics_Versions_Numpy', 'diagnostics_Versions_SimpleITK',
+    'diagnostics_Versions_PyWavelet', 'diagnostics_Versions_Python',
+    'diagnostics_Configuration_Settings',
+    'diagnostics_Configuration_EnabledImageTypes',
+    'diagnostics_Image-original_Hash',
+    'diagnostics_Image-original_Dimensionality',
+    'diagnostics_Image-original_Spacing', 'diagnostics_Image-original_Size',
+    'diagnostics_Image-original_Mean', 'diagnostics_Image-original_Minimum',
+    'diagnostics_Image-original_Maximum', 'diagnostics_Mask-original_Hash',
+    'diagnostics_Mask-original_Spacing', 'diagnostics_Mask-original_Size',
+    'diagnostics_Mask-original_BoundingBox',
+    'diagnostics_Mask-original_VoxelNum',
+    'diagnostics_Mask-original_VolumeNum',
+    'diagnostics_Mask-original_CenterOfMassIndex',
+    'diagnostics_Mask-original_CenterOfMass','original_ngtdm_Busyness',
+    'original_ngtdm_Coarseness', 'original_ngtdm_Complexity',
+    'original_ngtdm_Contrast', 'original_ngtdm_Strength'],1)
+
+    # shuffle rows - currently ordered by accending Dice score
+    #features = shuffle(features,random_state=0)
+
+
+def load_uncertainty_from_nifti(fname):
+    img = sitk.ReadImage(fname)
+    return sitk.GetArrayFromImage(img)
+
+def average_dice_across_structures(subjects,metrics):
+    avgDice=[]
+    for subjedct_id in subjects:
+        avgDice.append(np.mean(metrics.Dice[metrics.Patient==subj]))
+    return avgDice
+
+def UncertaintyAggregate(config):
+    # iterate through uncertainty maps
+    ###### need to correct this: file path is different for single and ensemble models ####
+    outputs_folder = config.outputs_folder
+    metrics_df = pd.read_csv(outputs_folder / 'best_validation_epoch/Test/metrics.csv')
+    subjects = metrics_df.Patient.unique()
+
+    features = pd.DataFrame()
+    df.dice = average_dice_across_structures(self,subjects,metrics_df)
+    features = UncertaintyMapFeatures(self, features, subject_id, i)
+        #dice.append(average_dice_across_structures())
+
+    features = self.PostProcessFeatures(features)
+    # cross-validation to estimate model performance
+    
+    # train model with all data
+    mdl = trainRandomForest(features,dice)
+
+    # save model to disk - save in outputs file? Where will it be seen by the inference model?
+    fname = 'uncertainty_metric_model'
+    pickle.dump(mdl, open(fname, 'wb'))
+
+    # could run cross-validation and save some metrics in the outputs file to report how well the model is about the predict dice
+
+    return 
\ No newline at end of file

From ed8f6df82564aeaeb5004c10e1a2aff98668a3f2 Mon Sep 17 00:00:00 2001
From: Anna Schroder <t-aschroder@microsoft.com>
Date: Mon, 20 Jun 2022 13:34:31 +0000
Subject: [PATCH 3/3] fixes to uncertainty_aggregate.py

---
 InnerEye/ML/uncertainty_aggregate.py | 36 +++++++++++++---------------
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/InnerEye/ML/uncertainty_aggregate.py b/InnerEye/ML/uncertainty_aggregate.py
index a795dc40b..4a1b7fb82 100644
--- a/InnerEye/ML/uncertainty_aggregate.py
+++ b/InnerEye/ML/uncertainty_aggregate.py
@@ -15,18 +15,13 @@
 from InnerEye.Common.common_util import BEST_EPOCH_FOLDER_NAME, METRICS_AGGREGATES_FILE, ModelProcessing, \
     SUBJECT_METRICS_FILE_NAME, get_best_epoch_results_path, is_linux, logging_section
 from InnerEye.ML.common import ModelExecutionMode
+import pickle
 
 # think about what the inputs will be:
 # output trained random forest model 
 
 
 # possibility to add extra functionality to simulate poor segmentations 
-'''
-class UncertaintyAggregate():
-
-    def __init__(self,config):
-        self.outputs_folder = config.outputs_folder
-'''
 
 def trainRandomForest(x,y):
     # requires dataset: subjects, groups
@@ -45,16 +40,16 @@ def splitKfolds():
     return k_folds.split(subject_ids, groups=groups)
 '''
 
-def UncertaintyMapFeatures(self,subjects):
+def UncertaintyMapFeatures(dice,outputs_folder,subjects):
     '''extract PyRadiomics features from the uncertainty map'''
-
+    features = pd.DataFrame()
     for i,subject in enumerate(subjects):
-        subject_dir = self.outputs_folder / f"{int(patient_id):03d}"
+        subject_dir = outputs_folder / f"{int(patient_id):03d}"
         extractor = featureextractor.RadiomicsFeatureExtractor(imageType='Original',binWidth=25)
         result=pd.Series(extractor.execute(
             str(subject_dir / 'uncertainty.nii.gz'), 
             str(subject_dir / 'segmentation.nii.gz')))
-        featureVector=self.df[subj_i]
+        featureVector=df[subj_i]
         featureVector=featureVector.append(result)
         featureVector.name=subj
 
@@ -64,6 +59,9 @@ def UncertaintyMapFeatures(self,subjects):
     return features
 
 def PostProcessFeatures(features):
+    '''drop unused pyRadiomics features
+    TO DO: pyradiomics website, create .yml file which selects the desired features'''
+
     features = features.T
 
     features=features.drop(['diagnostics_Versions_PyRadiomics',
@@ -87,6 +85,7 @@ def PostProcessFeatures(features):
 
     # shuffle rows - currently ordered by accending Dice score
     #features = shuffle(features,random_state=0)
+    return features
 
 
 def load_uncertainty_from_nifti(fname):
@@ -95,8 +94,8 @@ def load_uncertainty_from_nifti(fname):
 
 def average_dice_across_structures(subjects,metrics):
     avgDice=[]
-    for subjedct_id in subjects:
-        avgDice.append(np.mean(metrics.Dice[metrics.Patient==subj]))
+    for subject_id in subjects:
+        avgDice.append(np.mean(metrics.Dice[metrics.Patient==subject_id]))
     return avgDice
 
 def UncertaintyAggregate(config):
@@ -106,21 +105,18 @@ def UncertaintyAggregate(config):
     metrics_df = pd.read_csv(outputs_folder / 'best_validation_epoch/Test/metrics.csv')
     subjects = metrics_df.Patient.unique()
 
-    features = pd.DataFrame()
-    df.dice = average_dice_across_structures(self,subjects,metrics_df)
-    features = UncertaintyMapFeatures(self, features, subject_id, i)
-        #dice.append(average_dice_across_structures())
-
-    features = self.PostProcessFeatures(features)
+    # why are two dataframes needed???
+    dice = pd.DataFrame(average_dice_across_structures(subjects,metrics_df))
+    features = UncertaintyMapFeatures(dice,outputs_folder,subjects)
+    features = PostProcessFeatures(features)
     # cross-validation to estimate model performance
     
     # train model with all data
-    mdl = trainRandomForest(features,dice)
+    mdl = trainRandomForest(features.drop(['dice']),features.dice)
 
     # save model to disk - save in outputs file? Where will it be seen by the inference model?
     fname = 'uncertainty_metric_model'
     pickle.dump(mdl, open(fname, 'wb'))
 
     # could run cross-validation and save some metrics in the outputs file to report how well the model is about the predict dice
-
     return 
\ No newline at end of file