diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 86ca0b445..2d65938a4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -2,11 +2,10 @@ name: CI
 
 on:
   push:
-    branches: master
+    branches: mhof_dev_merge
   pull_request:
-    branches: master
+    branches: mhof_dev_merge
   workflow_dispatch:
-
 jobs:
   test:
     name: Run tests
diff --git a/a_reproduce_pacs_diva.yaml b/a_reproduce_pacs_diva.yaml
new file mode 100644
index 000000000..db3c234eb
--- /dev/null
+++ b/a_reproduce_pacs_diva.yaml
@@ -0,0 +1,24 @@
+te_d: sketch
+tpath: examples/tasks/task_pacs_aug.py
+bs: 32
+model: diva
+trainer: fbopt
+gamma_y: 1.0
+ini_setpoint_ratio: 0.99
+str_diva_multiplier_type: gammad_recon
+coeff_ma_output_state: 0.1
+coeff_ma_setpoint: 0.9
+exp_shoulder_clip: 5
+mu_init: 0.000001
+k_i_gain_ratio: 0.5
+mu_clip: 10
+epos: 1000
+epos_min: 200
+npath: examples/nets/resnet50domainbed.py
+npath_dom: examples/nets/resnet50domainbed.py
+es: 2
+lr: 0.00005
+zx_dim: 0
+zy_dim: 64
+zd_dim: 64
+force_setpoint_change_once: True
diff --git a/domainlab/algos/builder_fbopt_dial.py b/domainlab/algos/builder_fbopt_dial.py
new file mode 100644
index 000000000..f1faad96b
--- /dev/null
+++ b/domainlab/algos/builder_fbopt_dial.py
@@ -0,0 +1,21 @@
+"""
+builder for feedback optimization of dial
+"""
+from domainlab.algos.builder_diva import NodeAlgoBuilderDIVA
+from domainlab.algos.trainers.train_fbopt_b import TrainerFbOpt
+
+
+class NodeAlgoBuilderFbOptDial(NodeAlgoBuilderDIVA):
+    """
+    builder for feedback optimization for dial
+    """
+
+    def init_business(self, exp):
+        """
+        return trainer, model, observer
+        """
+        trainer_in, model, observer, device = super().init_business(exp)
+        trainer_in.init_business(model, exp.task, observer, device, exp.args)
+        trainer = TrainerFbOpt()
+        trainer.init_business(trainer_in, exp.task, observer, device, exp.args)
+        return trainer, model, observer, device
diff --git a/domainlab/algos/msels/c_msel_setpoint_delay.py b/domainlab/algos/msels/c_msel_setpoint_delay.py
new file mode 100644
index 000000000..d62983e89
--- /dev/null
+++ b/domainlab/algos/msels/c_msel_setpoint_delay.py
@@ -0,0 +1,54 @@
+"""
+logs the best up-to-event selected model at each event when setpoint shrinks
+"""
+from domainlab.algos.msels.a_model_sel import AMSel
+from domainlab.utils.logger import Logger
+
+
+class MSelSetpointDelay(AMSel):
+    """
+    This class decorate another model selection object, it logs the current
+    selected performance from the decoratee each time the setpoint shrinks
+    """
+
+    def __init__(self, msel, val_threshold = None):
+        super().__init__(val_threshold)
+        # NOTE: super() has to come first always otherwise self.msel will be overwritten to be None
+        self.msel = msel
+        self._oracle_last_setpoint_sel_te_acc = 0.0
+
+    @property
+    def oracle_last_setpoint_sel_te_acc(self):
+        """
+        return the last setpoint best acc
+        """
+        return self._oracle_last_setpoint_sel_te_acc
+
+    def base_update(self, clear_counter=False):
+        """
+        if the best model should be updated
+        currently, clear_counter is set via
+        flag = super().tr_epoch(epoch, self.flag_setpoint_updated)
+        """
+        logger = Logger.get_logger()
+        logger.info(
+            f"setpoint selected current acc {self._oracle_last_setpoint_sel_te_acc}"
+        )
+        if clear_counter:
+            # for the current version of code, clear_counter = flag_setpoint_updated
+            log_message = (
+                f"setpoint msel te acc updated from "
+                # self._oracle_last_setpoint_sel_te_acc start from 0.0, and always saves
+                # the test acc when last setpoint decrease occurs
+                f"{self._oracle_last_setpoint_sel_te_acc} to "
+                # self.sel_model_te_acc defined as a property
+                # in a_msel, which returns self.msel.sel_model_te_acc
+                # is the validation acc based model selection, which
+                # does not take setpoint into account
+                f"{self.sel_model_te_acc}"
+            )
+            logger.info(log_message)
+            self._oracle_last_setpoint_sel_te_acc = self.sel_model_te_acc
+        # let decoratee decide if model should be selected or not
+        flag = self.msel.update(clear_counter)
+        return flag
diff --git a/domainlab/algos/msels/c_msel_val_top_k.py b/domainlab/algos/msels/c_msel_val_top_k.py
new file mode 100644
index 000000000..f557c7dc1
--- /dev/null
+++ b/domainlab/algos/msels/c_msel_val_top_k.py
@@ -0,0 +1,61 @@
+"""
+Model Selection should be decoupled from
+"""
+from domainlab.algos.msels.c_msel_val import MSelValPerf
+from domainlab.utils.logger import Logger
+
+
+class MSelValPerfTopK(MSelValPerf):
+    """
+    1. Model selection using validation performance
+    2. Visitor pattern to trainer
+    """
+
+    def __init__(self, max_es, top_k=2):
+        super().__init__(max_es)  # construct self.tr_obs (observer)
+        self.top_k = top_k
+        self.list_top_k_acc = [0.0 for _ in range(top_k)]
+
+    def update(self, clear_counter=False):
+        """
+        if the best model should be updated
+        """
+        flag_super = super().update(clear_counter)
+        metric_val_current = self.tr_obs.metric_val[self.tr_obs.str_metric4msel]
+        acc_min = min(self.list_top_k_acc)
+        if metric_val_current > acc_min:
+            # overwrite
+            logger = Logger.get_logger()
+            logger.info(
+                f"top k validation acc: {self.list_top_k_acc} \
+                        overwriting/reset  counter"
+            )
+            self.es_c = 0  # restore counter
+            ind = self.list_top_k_acc.index(acc_min)
+            # avoid having identical values
+            if metric_val_current not in self.list_top_k_acc:
+                self.list_top_k_acc[ind] = metric_val_current
+                logger.info(
+                    f"top k validation acc updated: \
+                            {self.list_top_k_acc}"
+                )
+                # overwrite to ensure consistency
+                # issue #569: initially self.list_top_k_acc will be [xx, 0] and it does not matter since 0 will be overwriten by second epoch validation acc.
+                # actually, after epoch 1, most often, sefl._best_val_acc will be the higher value of self.list_top_k_acc will overwriten by min(self.list_top_k_acc)
+                logger.info(
+                    f"top-2 val sel: overwriting best val acc from {self._best_val_acc} to "
+                    f"minimum of {self.list_top_k_acc} which is {min(self.list_top_k_acc)} "
+                    f"to ensure consistency"
+                )
+                self._best_val_acc = min(self.list_top_k_acc)
+            # overwrite test acc, this does not depend on if val top-k acc has been overwritten or not
+            metric_te_current = self.tr_obs.metric_te[self.tr_obs.str_metric4msel]
+            if self._sel_model_te_acc != metric_te_current:
+                # this can only happen if the validation acc has decreased and current val acc is only bigger than min(self.list_top_k_acc} but lower than max(self.list_top_k_acc)
+                logger.info(
+                    f"top-2 val sel: overwriting selected model test acc from "
+                    f"{self._sel_model_te_acc} to {metric_te_current} to ensure consistency"
+                )
+            self._sel_model_te_acc = metric_te_current
+            return True # if metric_val_current > acc_min:
+        return flag_super
diff --git a/domainlab/algos/trainers/args_fbopt.py b/domainlab/algos/trainers/args_fbopt.py
new file mode 100644
index 000000000..53719e05f
--- /dev/null
+++ b/domainlab/algos/trainers/args_fbopt.py
@@ -0,0 +1,126 @@
+"""
+feedback opt
+"""
+
+
+def add_args2parser_fbopt(parser):
+    """
+    append hyper-parameters to the main argparser
+    """
+
+    parser.add_argument(
+        "--k_i_gain", type=float, default=0.001, help="PID control gain for integrator"
+    )
+
+    parser.add_argument(
+        "--k_i_gain_ratio",
+        type=float,
+        default=None,
+        help="set k_i_gain to be ratio of \
+                        initial saturation k_i_gain",
+    )
+
+    parser.add_argument(
+        "--mu_clip", type=float, default=1e4, help="maximum value of mu"
+    )
+
+    parser.add_argument(
+        "--mu_min", type=float, default=1e-6, help="minimum value of mu"
+    )
+
+    parser.add_argument(
+        "--mu_init", type=float, default=0.001, help="initial beta for multiplication"
+    )
+
+    parser.add_argument(
+        "--coeff_ma", type=float, default=0.5, help="exponential moving average"
+    )
+
+    parser.add_argument(
+        "--coeff_ma_output_state",
+        type=float,
+        default=0.1,
+        help="state exponential moving average of \
+                        reguarlization loss",
+    )
+
+    parser.add_argument(
+        "--coeff_ma_setpoint",
+        type=float,
+        default=0.9,
+        help="setpoint average coeff for previous setpoint",
+    )
+
+    parser.add_argument(
+        "--exp_shoulder_clip",
+        type=float,
+        default=5,
+        help="clip before exponential operation",
+    )
+
+    parser.add_argument(
+        "--ini_setpoint_ratio",
+        type=float,
+        default=0.99,
+        help="before training start, evaluate reg loss, \
+                        setpoint will be 0.9 of this loss",
+    )
+
+    parser.add_argument(
+        "--force_feedforward",
+        action="store_true",
+        default=False,
+        help="use feedforward scheduler",
+    )
+
+    parser.add_argument(
+        "--force_setpoint_change_once",
+        action="store_true",
+        default=False,
+        help="train until the setpoint changed at least once \
+                        up to maximum epos specified",
+    )
+
+    parser.add_argument(
+        "--no_tensorboard",
+        action="store_true",
+        default=False,
+        help="disable tensorboard",
+    )
+
+    parser.add_argument(
+        "--no_setpoint_update",
+        action="store_true",
+        default=False,
+        help="disable setpoint update",
+    )
+
+    parser.add_argument(
+        "--tr_with_init_mu",
+        action="store_true",
+        default=False,
+        help="disable setpoint update",
+    )
+
+    parser.add_argument(
+        "--overshoot_rewind",
+        type=str,
+        default="yes",
+        help="overshoot_rewind, for benchmark, use yes or no",
+    )
+
+    parser.add_argument(
+        "--setpoint_rewind",
+        type=str,
+        default="no",
+        help="setpoing_rewind, for benchmark, use yes or no",
+    )
+
+    parser.add_argument(
+        "--str_diva_multiplier_type",
+        type=str,
+        default="gammad_recon",
+        help="which penalty to tune",
+    )
+
+    return parser
diff --git a/domainlab/algos/trainers/fbopt_mu_controller.py b/domainlab/algos/trainers/fbopt_mu_controller.py
new file mode 100644
index 000000000..824638461
--- /dev/null
+++ b/domainlab/algos/trainers/fbopt_mu_controller.py
@@ -0,0 +1,280 @@
+"""
+update hyper-parameters during training
+"""
+import os
+import warnings
+
+import numpy as np
+from torch.utils.tensorboard import SummaryWriter
+
+from domainlab.algos.trainers.fbopt_setpoint_ada import (
+    FbOptSetpointController,
+    if_list_sign_agree,
+)
+from domainlab.utils.logger import Logger
+
+
+class StubSummaryWriter:
+    """
+    # stub writer for tensorboard that ignores all messages
+    """
+
+    def add_scalar(self, *args, **kwargs):
+        """
+        stub, pass do nothing
+        """
+
+    def add_scalars(self, *args, **kwargs):
+        """
+        stub, pass, do nothing
+        """
+
+
+class HyperSchedulerFeedback:
+    # pylint: disable=too-many-instance-attributes
+    """
+    design $\\mu$$ sequence based on state of penalized loss
+    """
+
+    def __init__(self, trainer, **kwargs):
+        """
+        kwargs is a dictionary with key the hyper-parameter name and its value
+        """
+        self.trainer = trainer
+        self.init_mu = trainer.aconf.mu_init
+        self.mu_min = trainer.aconf.mu_min
+        self.mu_clip = trainer.aconf.mu_clip
+
+        self.mmu = kwargs
+        # force initial value of mu
+        self.mmu = {key: self.init_mu for key, val in self.mmu.items()}
+        self.set_point_controller = FbOptSetpointController(args=self.trainer.aconf)
+
+        self.k_i_control = trainer.aconf.k_i_gain
+        self.k_i_gain_ratio = None
+        self.overshoot_rewind = trainer.aconf.overshoot_rewind == "yes"
+        self.delta_epsilon_r = None
+
+        # NOTE: this value will be set according to initial evaluation of
+        # neural network
+        self.activation_clip = trainer.aconf.exp_shoulder_clip
+        self.coeff_ma = trainer.aconf.coeff_ma
+        # NOTE:
+        # print(copy.deepcopy(self.model))
+        # TypeError: cannot pickle '_thread.lock' object
+        if trainer.aconf.no_tensorboard:
+            self.writer = StubSummaryWriter()
+        else:
+            str_job_id = os.environ.get("SLURM_JOB_ID", "")
+            self.writer = SummaryWriter(comment=str_job_id)
+
+    def set_k_i_gain(self, epo_reg_loss):
+        if self.k_i_gain_ratio is None:
+            return
+        # NOTE: do not use self.cal_delta4control!!!! which will change
+        # class member variables self.delta_epsilon_r!
+        list_setpoint = self.get_setpoint4r()
+        if_list_sign_agree(epo_reg_loss, list_setpoint)
+        delta_epsilon_r = [a - b for a, b in zip(epo_reg_loss, list_setpoint)]
+
+        # to calculate self.delta_epsilon_r
+        k_i_gain_saturate = [
+            a / b for a, b in zip(self.activation_clip, delta_epsilon_r)
+        ]
+        k_i_gain_saturate_min = min(k_i_gain_saturate)
+        # NOTE: here we override the commandline arguments specification
+        # for k_i_control, so k_i_control is not a hyperparameter anymore
+        self.k_i_control = self.k_i_gain_ratio * k_i_gain_saturate_min
+        warnings.warn(
+            f"hyperparameter k_i_gain disabled! \
+                      replace with {self.k_i_control}"
+        )
+        # FIXME: change this to 1-self.ini_setpoint_ratio, i.e. the more
+        # difficult the initial setpoint is, the bigger the k_i_gain should be
+
+    def get_setpoint4r(self):
+        """
+        get setpoint list
+        """
+        return self.set_point_controller.setpoint4R
+
+    def set_setpoint(self, list_setpoint4r, setpoint4ell):
+        """
+        set the setpoint
+        """
+        self.set_point_controller.setpoint4R = list_setpoint4r
+        self.set_point_controller.setpoint4ell = setpoint4ell
+
+    def cal_delta4control(self, list1, list_setpoint):
+        """
+        list difference
+        """
+        if_list_sign_agree(list1, list_setpoint)
+        delta_epsilon_r = [a - b for a, b in zip(list1, list_setpoint)]
+        if self.delta_epsilon_r is None:
+            self.delta_epsilon_r = delta_epsilon_r
+        else:
+            # PI control.
+            # self.delta_epsilon_r is the previous time step.
+            # delta_epsilon_r is the current time step
+            self.delta_epsilon_r = self.cal_delta_integration(
+                self.delta_epsilon_r, delta_epsilon_r, self.coeff_ma
+            )
+
+    def cal_delta_integration(self, list_old, list_new, coeff):
+        """
+        ma of delta
+        """
+        return [(1 - coeff) * a + coeff * b for a, b in zip(list_old, list_new)]
+
+    def tackle_overshoot(self, activation, epo_reg_loss, list_str_multiplier_na):
+        """
+        tackle overshoot
+        """
+        list_overshoot = [
+            i if (a - b) * (self.delta_epsilon_r[i]) < 0 else None
+            for i, (a, b) in enumerate(
+                zip(epo_reg_loss, self.set_point_controller.setpoint4R)
+            )
+        ]
+        for ind in list_overshoot:
+            if ind is not None:
+                logger = Logger.get_logger(
+                    logger_name="main_out_logger", loglevel="INFO"
+                )
+                logger.info(f"delta integration: {self.delta_epsilon_r}")
+                logger.info(
+                    f"overshooting at  pos \
+                            {ind} of activation: {activation}"
+                )
+                logger.info(f"name reg loss:{list_str_multiplier_na}")
+                if self.overshoot_rewind:
+                    activation[ind] = 0.0
+                    logger.info(
+                        f"PID controller set to zero now, \
+                                new activation: {activation}"
+                    )
+        return activation
+
+    def cal_activation(self):
+        """
+        calculate activation on exponential shoulder
+        """
+        setpoint = self.get_setpoint4r()
+        activation = [
+            self.k_i_control * val if setpoint[i] > 0 else self.k_i_control * (-val)
+            for i, val in enumerate(self.delta_epsilon_r)
+        ]
+        if self.activation_clip is not None:
+            activation = [
+                np.clip(
+                    val, a_min=-1 * self.activation_clip, a_max=self.activation_clip
+                )
+                for val in activation
+            ]
+        return activation
+
+    def search_mu(
+        self, epo_reg_loss, epo_task_loss, epo_loss_tr, list_str_multiplier_na, miter
+    ):
+        # pylint: disable=too-many-locals, too-many-arguments
+        """
+        start from parameter dictionary dict_theta: {"layer":tensor},
+        enlarge mu w.r.t. its current value
+        to see if the criteria is met
+        $$\\mu^{k+1}=mu^{k}exp(rate_mu*[R(\\theta^{k})-ref_R])$$
+        """
+        logger = Logger.get_logger(logger_name="main_out_logger", loglevel="INFO")
+        logger.info(f"before controller: current mu: {self.mmu}")
+        logger.info(f"epo reg loss: {epo_reg_loss}")
+        logger.info(f"name reg loss:{list_str_multiplier_na}")
+        self.cal_delta4control(epo_reg_loss, self.get_setpoint4r())
+        activation = self.cal_activation()
+        # overshoot handling
+        activation = self.tackle_overshoot(
+            activation, epo_reg_loss, list_str_multiplier_na
+        )
+        list_gain = np.exp(activation)
+        dict_gain = dict(zip(list_str_multiplier_na, list_gain))
+        target = self.dict_multiply(self.mmu, dict_gain)
+        self.mmu = self.dict_clip(target)
+        logger = Logger.get_logger(logger_name="main_out_logger", loglevel="INFO")
+        logger.info(f"after contoller: current mu: {self.mmu}")
+
+        for key, val in self.mmu.items():
+            self.writer.add_scalar(f"dyn_mu/{key}", val, miter)
+            self.writer.add_scalar(f"controller_gain/{key}", dict_gain[key], miter)
+            ind = list_str_multiplier_na.index(key)
+            self.writer.add_scalar(f"delta/{key}", self.delta_epsilon_r[ind], miter)
+
+        if list_str_multiplier_na:
+            for i, (reg_dyn, reg_set) in enumerate(
+                zip(epo_reg_loss, self.get_setpoint4r())
+            ):
+
+                self.writer.add_scalar(
+                    f"lossrd/dyn_{list_str_multiplier_na[i]}", reg_dyn, miter
+                )
+                self.writer.add_scalar(
+                    f"lossrs/setpoint_{list_str_multiplier_na[i]}", reg_set, miter
+                )
+
+                self.writer.add_scalars(
+                    f"loss_rds/loss_{list_str_multiplier_na[i]}_w_setpoint",
+                    {
+                        f"lossr/loss_{list_str_multiplier_na[i]}": reg_dyn,
+                        f"lossr/setpoint_{list_str_multiplier_na[i]}": reg_set,
+                    },
+                    miter,
+                )
+                self.writer.add_scalar(
+                    f"x_ell_y_r/loss_{list_str_multiplier_na[i]}", reg_dyn, epo_task_loss
+                )
+        else:
+            logger.info("No multiplier provided")
+        self.writer.add_scalar("loss_task/penalized", epo_loss_tr, miter)
+        self.writer.add_scalar("loss_task/ell", epo_task_loss, miter)
+        acc_te = 0
+        acc_val = 0
+        acc_sel = 0
+        acc_set = 0
+
+        if miter > 1:
+            acc_te = self.trainer.observer.metric_te["acc"]
+            acc_val = self.trainer.observer.metric_val["acc"]
+            acc_sel = self.trainer.observer.model_sel.sel_model_te_acc
+            acc_set = self.trainer.observer.model_sel.oracle_last_setpoint_sel_te_acc
+        self.writer.add_scalar("acc/te", acc_te, miter)
+        self.writer.add_scalar("acc/val", acc_val, miter)
+        self.writer.add_scalar("acc/sel", acc_sel, miter)
+        self.writer.add_scalar("acc/setpoint", acc_set, miter)
+
+    def dict_clip(self, dict_base):
+        """
+        clip each entry of the mu according to pre-set self.mu_clip
+        """
+        return {
+            key: np.clip(val, a_min=self.mu_min, a_max=self.mu_clip)
+            for key, val in dict_base.items()
+        }
+
+    def dict_is_zero(self, dict_mu):
+        """
+        check if hyper-parameter start from zero
+        """
+        for key in dict_mu.keys():
+            if dict_mu[key] == 0.0:
+                return True
+        return False
+
+    def dict_multiply(self, dict_base, dict_multiplier):
+        """
+        multiply a float to each element of a dictionary
+        """
+        return {key: val * dict_multiplier[key] for key, val in dict_base.items()}
+
+    def update_setpoint(self, epo_reg_loss, epo_task_loss):
+        """
+        update setpoint
+        """
+        return self.set_point_controller.observe(epo_reg_loss, epo_task_loss)
diff --git a/domainlab/algos/trainers/fbopt_setpoint_ada.py b/domainlab/algos/trainers/fbopt_setpoint_ada.py
new file mode 100644
index 000000000..c3c0193ce
--- /dev/null
+++ b/domainlab/algos/trainers/fbopt_setpoint_ada.py
@@ -0,0 +1,314 @@
+"""
+update hyper-parameters during training
+"""
+import numpy as np
+
+from domainlab.utils.logger import Logger
+
+
+def list_true(list1):
+    """
+    find out position of a list which has element True
+    """
+    arr_pos = np.arange(len(list1))[list1]
+    return list(arr_pos)
+
+
+def list_add(list1, list2):
+    """
+    add two lists
+    """
+    return [a + b for a, b in zip(list1, list2)]
+
+
+def list_multiply(list1, coeff):
+    """
+    multiply a scalar to a list
+    """
+    return [ele * coeff for ele in list1]
+
+
+def if_list_sign_agree(list1, list2):
+    """
+    each pair must have the same sign
+    """
+    list_agree = [a * b >= 0 for a, b in zip(list1, list2)]
+    if not all(list_agree):
+        raise RuntimeError(f"{list1} and {list2} can not be compared!")
+
+
+def is_less_list_any(list1, list2):
+    """
+    judge if one list is less than the other
+    """
+    if_list_sign_agree(list1, list2)
+    list_comparison = [
+        a < b if a >= 0 and b >= 0 else a > b for a, b in zip(list1, list2)
+    ]
+    return any(list_comparison), list_true(list_comparison)
+
+
+def is_less_list_all(list1, list2, flag_eq=False):
+    """
+    judge if one list is less than the other
+    """
+    if_list_sign_agree(list1, list2)
+    list_comparison = [
+        a < b if a >= 0 and b >= 0 else a > b for a, b in zip(list1, list2)
+    ]
+    if flag_eq:
+        list_comparison = [
+            a <= b if a >= 0 and b >= 0 else a >= b for a, b in zip(list1, list2)
+        ]
+    return all(list_comparison)
+
+
+def list_ma(list_state, list_input, coeff):
+    """
+    moving average of list
+    """
+    return [a * coeff + b * (1 - coeff) for a, b in zip(list_state, list_input)]
+
+
+class SetpointRewinder:
+    """
+    rewind setpoint if current loss exponential moving average is
+    bigger than setpoint
+    """
+
+    def __init__(self, host):
+        self.host = host
+        self.counter = None
+        self.epo_ma = None
+        self.ref = None
+        self.coeff_ma = 0.5
+        self.setpoint_rewind = host.flag_setpoint_rewind
+
+    def reset(self, epo_reg_loss):
+        """
+        when setpoint is adjusted
+        """
+        self.counter = 0
+        self.epo_ma = [0.0 for _ in range(10)]  # FIXME
+        self.ref = epo_reg_loss
+
+    def observe(self, epo_reg_loss):
+        """
+        update moving average
+        """
+        if self.ref is None:
+            self.reset(epo_reg_loss)
+        self.epo_ma = list_ma(self.epo_ma, epo_reg_loss, self.coeff_ma)
+        list_comparison_increase = [a < b for a, b in zip(self.ref, self.epo_ma)]
+        list_comparison_above_setpoint = [
+            a < b for a, b in zip(self.host.setpoint4R, self.epo_ma)
+        ]
+        flag_increase = any(list_comparison_increase)
+        flag_above_setpoint = any(list_comparison_above_setpoint)
+        if flag_increase and flag_above_setpoint:
+            self.counter += 1
+
+        else:
+            self.counter = 0
+            self.reset(epo_reg_loss)
+
+        if self.setpoint_rewind:
+            if self.counter > 2 and self.counter <= 3:
+                # only allow self.counter = 2, 3 to rewind setpoing twice
+                list_pos = list_true(list_comparison_above_setpoint)
+                print(f"\n\n\n!!!!!!!setpoint too low at {list_pos}!\n\n\n")
+                for pos in list_pos:
+                    print(
+                        f"\n\n\n!!!!!!!rewinding setpoint at pos {pos} \
+                        from {self.host.setpoint4R[pos]} to \
+                          {self.epo_ma[pos]}!\n\n\n"
+                    )
+                    self.host.setpoint4R[pos] = self.epo_ma[pos]
+
+            if self.counter > 3:
+                self.host.transition_to(FixedSetpoint())
+                self.counter = np.inf  # FIXME
+
+
+class FbOptSetpointController:
+    # pylint: disable=too-many-instance-attributes
+    """
+    update setpoint for mu
+    """
+
+    def __init__(self, state=None, args=None):
+        """
+        kwargs is a dictionary with key the hyper-parameter name and its value
+        """
+        if state is None:
+            if args is not None and args.no_setpoint_update:
+                state = FixedSetpoint()
+            else:
+                state = DominateAllComponent()
+        self.transition_to(state)
+        self.flag_setpoint_rewind = args.setpoint_rewind == "yes"
+        self.setpoint_rewinder = SetpointRewinder(self)
+        self.state_task_loss = 0.0
+        self.state_epo_reg_loss = [
+            0.0 for _ in range(10)
+        ]  # FIXME: 10 is the maximum number losses here
+        self.coeff_ma_setpoint = args.coeff_ma_setpoint
+        self.coeff_ma_output = args.coeff_ma_output_state
+        # initial value will be set via trainer
+        self.setpoint4R = None
+        self.setpoint4ell = None
+        self.host = None
+
+    def transition_to(self, state):
+        """
+        change internal state
+        """
+        self.state_updater = state
+        self.state_updater.accept(self)
+
+    def update_setpoint_ma(self, list_target, list_pos):
+        """
+        using moving average
+        """
+        target_ma = [
+            self.coeff_ma_setpoint * a + (1 - self.coeff_ma_setpoint) * b
+            for a, b in zip(self.setpoint4R, list_target)
+        ]
+        self.setpoint4R = [
+            target_ma[i] if i in list_pos else self.setpoint4R[i]
+            for i in range(len(target_ma))
+        ]
+
+    def observe(self, epo_reg_loss, epo_task_loss):
+        """
+        read current epo_reg_loss continuously
+        """
+        self.state_epo_reg_loss = [
+            self.coeff_ma_output * a + (1 - self.coeff_ma_output) * b if a != 0.0 else b
+            for a, b in zip(self.state_epo_reg_loss, epo_reg_loss)
+        ]
+        if self.state_task_loss == 0.0:
+            self.state_task_loss = epo_task_loss
+        self.state_task_loss = (
+            self.coeff_ma_output * self.state_task_loss
+            + (1 - self.coeff_ma_output) * epo_task_loss
+        )
+        self.setpoint_rewinder.observe(self.state_epo_reg_loss)
+        flag_update, list_pos = self.state_updater.update_setpoint()
+        if flag_update:
+            self.setpoint_rewinder.reset(self.state_epo_reg_loss)
+            logger = Logger.get_logger(logger_name="main_out_logger", loglevel="INFO")
+            logger.info(f"!!!!!set point old value {self.setpoint4R}!")
+            self.update_setpoint_ma(self.state_epo_reg_loss, list_pos)
+            logger.info(f"!!!!!set point updated to {self.setpoint4R}!")
+            return True
+        return False
+
+
+class FbOptSetpointControllerState:
+    # pylint: disable=too-few-public-methods
+    """
+    abstract state pattern
+    """
+
+    def __init__(self):
+        """ """
+        self.host = None
+
+    def accept(self, controller):
+        """
+        set host for state
+        """
+        self.host = controller
+
+
+class FixedSetpoint(FbOptSetpointControllerState):
+    """
+    do not update setpoint
+    """
+
+    def update_setpoint(self):
+        """
+        always return False so setpoint no update
+        """
+        return False, None
+
+
+class SliderAllComponent(FbOptSetpointControllerState):
+    """
+    concrete state pattern
+    """
+
+    def update_setpoint(self):
+        """
+        all components of R descreases regardless if ell decreases or not
+        """
+        logger = Logger.get_logger(logger_name="main_out_logger", loglevel="INFO")
+        logger.info(
+            f"comparing output vs setpoint: \n \
+            {self.host.state_epo_reg_loss} \n \
+            {self.host.setpoint4R}"
+        )
+        if is_less_list_all(
+            self.host.state_epo_reg_loss, self.host.setpoint4R, flag_eq=True
+        ):
+            logger.info(
+                "!!!!!!!!!In SliderAllComponent: R current value better than current setpoint!"
+            )
+            return True, list(range(len(self.host.setpoint4R)))
+        return False, None
+
+
+class SliderAnyComponent(FbOptSetpointControllerState):
+    """
+    concrete state pattern
+    """
+
+    def update_setpoint(self):
+        """
+        if any component of R has decreased regardless if ell decreases
+        """
+        flag, list_pos = is_less_list_any(
+            self.host.state_epo_reg_loss, self.host.setpoint4R
+        )
+        return flag, list_pos
+
+    def transit(self):
+        self.host.transition_to(SliderAllComponent())
+
+
+class DominateAnyComponent(SliderAnyComponent):
+    """
+    concrete state pattern
+    """
+
+    def update_setpoint(self):
+        """
+        if any of the component of R loss has decreased together with ell loss
+        """
+        flag1, list_pos = super().update_setpoint()
+        flag2 = self.host.state_task_loss < self.host.setpoint4ell
+        if flag2:
+            self.host.setpoint4ell = self.host.state_task_loss
+        return flag1 & flag2, list_pos
+
+
+class DominateAllComponent(SliderAllComponent):
+    """
+    concrete state pattern
+    """
+
+    def update_setpoint(self):
+        """
+        if each component of R loss has decreased and ell loss also decreased
+        """
+        flag1, list_pos = super().update_setpoint()
+        flag2 = self.host.state_task_loss < self.host.setpoint4ell
+        if flag2:
+            logger = Logger.get_logger(logger_name="main_out_logger", loglevel="INFO")
+            logger.info(
+                f"best ell loss: from {self.host.setpoint4ell} to \
+                {self.host.state_task_loss}"
+            )
+            self.host.setpoint4ell = self.host.state_task_loss
+        return flag1 & flag2, list_pos
diff --git a/domainlab/algos/trainers/train_fbopt_b.py b/domainlab/algos/trainers/train_fbopt_b.py
new file mode 100644
index 000000000..1efe3ce58
--- /dev/null
+++ b/domainlab/algos/trainers/train_fbopt_b.py
@@ -0,0 +1,177 @@
+"""
+update hyper-parameters during training
+"""
+from operator import add
+
+import torch
+
+from domainlab.algos.trainers.fbopt_mu_controller import HyperSchedulerFeedback
+from domainlab.algos.trainers.hyper_scheduler import HyperSchedulerWarmupLinear
+from domainlab.algos.trainers.train_basic import TrainerBasic
+from domainlab.utils.logger import Logger
+
+
+def list_divide(list_val, scalar):
+    """
+    divide a list by a scalar
+    """
+    return [ele / scalar for ele in list_val]
+
+
+class HyperSetter:
+    # pylint: disable=too-few-public-methods
+    """
+    mock object to force hyper-parameter in the model
+    """
+
+    def __init__(self, dict_hyper):
+        self.dict_hyper = dict_hyper
+
+    def __call__(self, epoch=None):
+        return self.dict_hyper
+
+
+class TrainerFbOpt(TrainerBasic):
+    """
+    TrainerHyperScheduler
+    """
+
+    def set_scheduler(self, scheduler):
+        """
+        Args:
+            scheduler: The class name of the scheduler, the object corresponding to
+            this class name will be created inside model
+        """
+        # model.hyper_init will register the hyper-parameters of the model to scheduler
+        self.hyper_scheduler = self.model.hyper_init(scheduler, trainer=self)
+
+    def eval_r_loss(self):
+        """
+        evaluate the regularization loss and ERM loss with respect ot parameter dict_theta
+        ERM loss on all available training data
+        # TODO: normalize loss via batchsize
+        """
+        self.model.eval()
+        # mock the model hyper-parameter to be from dict4mu
+        epo_reg_loss = []
+        epo_task_loss = 0
+        epo_p_loss = 0
+        counter = 0.0
+        with torch.no_grad():
+            for _, (tensor_x, vec_y, vec_d, *others) in enumerate(
+                self.loader_tr_no_drop
+            ):
+                tensor_x, vec_y, vec_d = (
+                    tensor_x.to(self.device),
+                    vec_y.to(self.device),
+                    vec_d.to(self.device),
+                )
+                tuple_reg_loss = self.model.cal_reg_loss(tensor_x, vec_y, vec_d, others)
+                p_loss, *_ = self.model.cal_loss(tensor_x, vec_y, vec_d, others)
+                # NOTE: first [0] extract the loss, second [0] get the list
+                list_b_reg_loss = tuple_reg_loss[0]
+                list_b_reg_loss_sumed = [
+                    ele.sum().detach().item() for ele in list_b_reg_loss
+                ]
+                if len(epo_reg_loss) == 0:
+                    epo_reg_loss = list_b_reg_loss_sumed
+                else:
+                    epo_reg_loss = list(map(add, epo_reg_loss, list_b_reg_loss_sumed))
+                b_task_loss = (
+                    self.model.cal_task_loss(tensor_x, vec_y).sum().detach().item()
+                )
+                # sum will kill the dimension of the mini batch
+                epo_task_loss += b_task_loss
+                epo_p_loss += p_loss.sum().detach().item()
+                counter += 1.0
+        return (
+            list_divide(epo_reg_loss, counter),
+            epo_task_loss / counter,
+            epo_p_loss / counter,
+        )
+
+    def before_batch(self, epoch, ind_batch):
+        """
+        if hyper-parameters should be updated per batch, then step
+        should be set to epoch*self.num_batches + ind_batch
+        """
+        if self.flag_update_hyper_per_batch:
+            # NOTE: if not update per_batch, then not updated
+            self.model.hyper_update(
+                epoch * self.num_batches + ind_batch, self.hyper_scheduler
+            )
+        return super().after_batch(epoch, ind_batch)
+
+    def before_tr(self):
+        self.flag_setpoint_updated = False
+        if self.aconf.force_feedforward:
+            self.set_scheduler(scheduler=HyperSchedulerWarmupLinear)
+        else:
+            self.set_scheduler(scheduler=HyperSchedulerFeedback)
+
+        self.set_model_with_mu()  # very small value
+        if self.aconf.tr_with_init_mu:
+            self.tr_with_init_mu()
+
+        (
+            self.epo_reg_loss_tr,
+            self.epo_task_loss_tr,
+            self.epo_loss_tr,
+        ) = self.eval_r_loss()
+        self.hyper_scheduler.set_setpoint(
+            [
+                ele * self.aconf.ini_setpoint_ratio
+                if ele > 0
+                else ele / self.aconf.ini_setpoint_ratio
+                for ele in self.epo_reg_loss_tr
+            ],
+            self.epo_task_loss_tr,
+        )  # setpoing w.r.t. random initialization of neural network
+        self.hyper_scheduler.set_k_i_gain(self.epo_reg_loss_tr)
+
+    @property
+    def list_str_multiplier_na(self):
+        """
+        return the name of multipliers
+        """
+        return self.model.list_str_multiplier_na
+
+    def tr_with_init_mu(self):
+        """
+        erm step with very small mu
+        """
+        super().tr_epoch(-1)
+
+    def set_model_with_mu(self):
+        """
+        set model multipliers
+        """
+        self.model.hyper_update(
+            epoch=None, fun_scheduler=HyperSetter(self.hyper_scheduler.mmu)
+        )
+
+    def tr_epoch(self, epoch, flag_info=False):
+        """
+        update multipliers only per epoch
+        """
+        self.hyper_scheduler.search_mu(
+            self.epo_reg_loss_tr,
+            self.epo_task_loss_tr,
+            self.epo_loss_tr,
+            self.list_str_multiplier_na,
+            miter=epoch,
+        )
+        self.set_model_with_mu()
+        if hasattr(self.model, "dict_multiplier"):
+            logger = Logger.get_logger()
+            logger.info(f"current multiplier: {self.model.dict_multiplier}")
+
+        if self._decoratee is not None:
+            flag = self._decoratee.tr_epoch(epoch, self.flag_setpoint_updated)
+        else:
+            flag = super().tr_epoch(epoch, self.flag_setpoint_updated)
+        # is it good to update setpoint after we know the new value of each loss?
+        self.flag_setpoint_updated = self.hyper_scheduler.update_setpoint(
+            self.epo_reg_loss_tr, self.epo_task_loss_tr
+        )
+        return flag
diff --git a/domainlab/utils/generate_fbopt_phase_portrait.py b/domainlab/utils/generate_fbopt_phase_portrait.py
new file mode 100644
index 000000000..ef4b28806
--- /dev/null
+++ b/domainlab/utils/generate_fbopt_phase_portrait.py
@@ -0,0 +1,402 @@
+"""
+This file is used for generating phase portrait from tensorboard event files.
+"""
+import argparse
+import glob
+import os
+import numpy as np
+import re
+from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
+
+import matplotlib
+import matplotlib.pyplot as plt
+from matplotlib.backends.backend_pdf import PdfPages
+matplotlib.rcParams['pdf.fonttype'] = 42
+matplotlib.rcParams['text.usetex'] = True
+plt.rcParams['text.latex.preamble'] = r'\usepackage{amsmath}'
+font = {'size': 20}
+matplotlib.rc('font', **font)
+
+
+def sav2pdfpage(fig, fname):
+    pdf_page = PdfPages(fname)
+    pdf_page.savefig(fig, bbox_inches="tight")
+    pdf_page.close()
+
+def latex_to_nonlatex(latex_string):
+    nonlatex_string = re.sub(r'[{$}]', '', latex_string)
+    nonlatex_string = nonlatex_string.replace("\\", "")
+    return nonlatex_string
+
+class ListFileHandler:
+    def __init__(self, file_path):
+        self.file_path = file_path
+
+    def write_lists_to_file(self, list1, list2=None):
+        with open(self.file_path, 'w') as file:
+            if list2 is None:
+                for val1 in list1:
+                    file.write(f"{val1}\n")
+            else:
+                for val1, val2 in zip(list1, list2):
+                    file.write(f"{val1} {val2}\n")
+
+    def read_lists_from_file(self):
+        list1 = []
+        list2 = []
+        with open(self.file_path, 'r') as file:
+            for line in file:
+                values = list(map(float, line.strip().split()))
+                if len(values) == 1:
+                    list1.append(values[0])
+                elif len(values) == 2:
+                    list1.append(values[0])
+                    list2.append(values[1])
+        return list1, list2
+
+
+# pylint: disable=too-many-arguments
+def get_xy_from_event_file(
+    event_file,
+    plot1,
+    plot2=None,
+    tf_size_guidance=None,
+    sanity_check=False,
+    verbose=True,
+):
+    """
+    extract x and y values from a tensorboard event file
+    """
+    if tf_size_guidance is None:
+        # settings for which/how much data is loaded from the
+        # tensorboard event files
+        tf_size_guidance = {
+            "compressedHistograms": 0,
+            "images": 0,
+            "scalars": 1e10,  # keep unlimited number
+            "histograms": 0,
+        }
+    # load event file
+    event = EventAccumulator(event_file, tf_size_guidance)
+    event.Reload()
+    # print names of available plots
+    if verbose:
+        print(f"Event file {event_file} -- available plots:")
+        print(event.Tags()["scalars"])
+    if plot2:
+        # extract the plot2 values (e.g., reg/dyn0)
+        y_event = event.Scalars(plot2)
+        y = [s.value for s in y_event]
+        x_int = [s.step for s in y_event]
+        # the .step data are saved as ints in tensorboard,
+        # (so, in case of phase portrait, we re-extact from 'task')
+    else:
+        y = None
+    # extract the corresponding plot1 values (e.g., 'task')
+    x_event = event.Scalars(plot1)
+    x = [s.value for s in x_event]
+    # sanity check (originally added for the reg/dyn0 vs. task phase portrait;
+    # shouldn't be needed if plot1 and plot2 represent something else):
+    if sanity_check:
+        for i in range(len(x)):
+            assert int(x[i]) == x_int[i]
+
+    return x, y
+
+
+# pylint: disable=too-many-arguments, too-many-locals, redefined-outer-name, unused-argument
+def phase_portrait_combined(
+    event_files,
+    colors,
+    plot1,
+    plot2,
+    legend1=None,
+    legend2=None,
+    plot_len=None,
+    skip_n_steps=1,
+    output_dir=".",
+):
+    """
+    combined phase portait for multiple (at least one) Tensorboard
+    event files in the same plot
+    """
+    fig = plt.figure()
+
+    for event_i in range(len(event_files)):
+        x, y = get_xy_from_event_file(event_files[event_i], plot1=plot1, plot2=plot2)
+
+        assert len(x) == len(y)
+        if plot_len is None:
+            plot_len = len(x)
+        # truncate x and y to the desired length:
+        x = x[:plot_len]
+        y = y[:plot_len]
+        # skip every n steps
+        x = x[0::skip_n_steps]
+        y = y[0::skip_n_steps]
+
+        x = [-ele if ele <0 else ele for ele in x ]
+        y = [-ele if ele <0 else ele for ele in y]
+
+        head_w_glob = min((max(x) - min(x)) / plot_len, (max(y) - min(y)) / plot_len)
+        head_w_glob *= skip_n_steps
+        for i in range(len(x) - 1):
+            xy_dist = np.sqrt((x[i + 1] - x[i]) ** 2 + (y[i + 1] - y[i]) ** 2)
+            head_l = xy_dist / plot_len * skip_n_steps
+            # let width be one tenth of length
+            head_w = min(head_l/10.0, head_w_glob)
+            plt.arrow(
+                x[i],
+                y[i],
+                (x[i + 1] - x[i]),
+                (y[i + 1] - y[i]),
+                head_width=head_w,
+                head_length=head_l,
+                length_includes_head=True,
+                fc=colors[event_i],
+                ec=colors[event_i],
+                alpha=0.8,
+            )
+        # the combination of head_width and head_length make the arrow
+        # more visible.
+        # length_includes_head=False makes the arrow stick too far out
+        # beyond of the point, which let; so, True is used.
+
+        # use finite color
+        # colors = ["red", "green", "blue", "yellow", "purple"]
+        # list_color = [colors[i % len(colors)] for i, h in enumerate(x)]
+        # use numerical color
+        colors = np.arange(0, plot_len, skip_n_steps)
+        plt.plot(x[0], y[0], "ko")
+        # plt.scatter(x, y, s=1, c=np.array(list_color))
+        # size
+        plt.scatter(x, y, s=10, c=colors, cmap='viridis')
+        plt.yscale("log")
+        plt.xscale("log")
+        plt.colorbar()
+
+        if legend1 is None:
+            legend1 = plot1
+        if legend2 is None:
+            legend2 = plot2
+        plt.xlabel(legend1)
+        plt.ylabel(legend2)
+        plt.title("output portrait")
+
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    legend22 = legend2.split(os.sep)[-1]
+
+    fname_legend = latex_to_nonlatex(legend22)
+
+    # write x and y data to a text file:
+    txt_name = os.path.join(output_dir, f"phase_portrait_{fname_legend}.txt")
+    fh = ListFileHandler(txt_name)
+    fh.write_lists_to_file(x, y)
+
+    # save figures
+    fname = os.path.join(output_dir, f"phase_portrait_{fname_legend}")
+    plt.savefig(fname+".png", dpi=300, bbox_inches="tight")
+    plt.savefig(fname+".pdf", format="pdf", bbox_inches="tight")
+    plt.savefig(fname+".svg", format="svg", bbox_inches="tight")
+    sav2pdfpage(fig, fname+"_pdfpage.pdf")
+
+
+def two_curves_combined(
+    event_files,
+    colors,
+    plot1,
+    plot2,
+    legend1=None,
+    legend2=None,
+    output_dir=".",
+    title=None,
+    logscale=False,
+    neg=False,
+    prefix="output_r_",
+    plot_len=None):
+    """
+    FIXME: colors parameter is not used
+    """
+    fig = plt.figure()
+    for event_i in range(len(event_files)):
+        x, y = get_xy_from_event_file(event_files[event_i], plot1=plot1, plot2=plot2)
+        if plot_len is None:
+            plot_len = len(x)
+        # truncate x and y to the desired length:
+        x = x[:plot_len]
+        y = y[:plot_len]
+
+        if neg:
+            plt.plot(-np.array(x), color="blue")
+            plt.plot(-np.array(y), color="red")
+        else:
+            plt.plot(x, color="blue")
+            plt.plot(y, color="red")
+        if logscale:
+            plt.yscale("log")
+        plt.xlabel("Epoch")
+        # plt.ylabel("loss")
+        if title is not None:
+            plt.title(title)
+        if legend1 is None:
+            legend1 = plot1
+        if legend2 is None:
+            legend2 = plot2
+        plt.legend([legend1, legend2])
+
+    legend11 = legend1.replace(os.sep, "_")
+    legend22 = legend2.replace(os.sep, "_")
+
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    fname_legend = latex_to_nonlatex(legend11)
+    fname_legend += latex_to_nonlatex(legend22)
+    # write x and y data to a text file:
+    txt_name = os.path.join(output_dir, prefix+f"{fname_legend}.txt")
+    fh = ListFileHandler(txt_name)
+    fh.write_lists_to_file(x, y)
+
+    # save figures
+    fname_logscale = "_logscale" if logscale else ""
+    fname = os.path.join(output_dir, prefix+f"{fname_legend}")
+    plt.savefig(fname+fname_logscale+".png", dpi=300, bbox_inches="tight")
+    plt.savefig(fname+fname_logscale+".pdf", format="pdf", bbox_inches="tight")
+    plt.savefig(fname+fname_logscale+".svg", format="svg", bbox_inches="tight")
+    pdf_page = PdfPages(fname+fname_logscale+"_pdfpage.pdf")
+    pdf_page.savefig(fig, bbox_inches="tight")
+    pdf_page.close()
+
+
+
+
+def plot_single_curve(event_files, colors, plot1, legend1=None, output_dir=".", plot_len=None):
+    """
+    FIXME: colors parameter is not used
+    """
+    fig = plt.figure()
+    for event_i in range(len(event_files)):
+        x, _ = get_xy_from_event_file(event_files[event_i], plot1=plot1)
+        if plot_len is None:
+            plot_len = len(x)
+        # truncate x and y to the desired length:
+        x = x[:plot_len]
+        plt.plot(x)
+        plt.yscale("log")
+        plt.xlabel("Epoch")
+        if legend1 is None:
+            legend1 = plot1
+        plt.ylabel(legend1)
+        # plt.title("timecourse")
+
+    legend11 = legend1.replace(os.sep, "_")
+    fname_legend = latex_to_nonlatex(legend11)
+
+    # save figures
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    plt.savefig(os.path.join(output_dir, f"single_timecourse_{fname_legend}.png"), dpi=300, bbox_inches="tight")
+    plt.savefig(os.path.join(output_dir, f"single_timecourse_{fname_legend}.pdf"), format="pdf", bbox_inches="tight")
+    plt.savefig(os.path.join(output_dir, f"single_timecourse_{fname_legend}.svg"), format="svg", bbox_inches="tight")
+    pdf_page = PdfPages(os.path.join(output_dir, f"single_timecourse_{fname_legend}_pdfpage.pdf"))
+    pdf_page.savefig(fig, bbox_inches="tight")
+    pdf_page.close()
+
+    # write x and y data to a text file:
+    txt_name = os.path.join(output_dir, f"single_timecourse_{fname_legend}.txt")
+    fh = ListFileHandler(txt_name)
+    fh.write_lists_to_file(list(range(len(x))), x)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="plot")
+    parser.add_argument("-plot1", "--plot1", default=None, type=str)
+    parser.add_argument("-plot2", "--plot2", default=None, type=str)
+    parser.add_argument("-legend1", "--legend1", default=None, type=str)
+    parser.add_argument("-legend2", "--legend2", default=None, type=str)
+    parser.add_argument("-plot_len", "--plot_len", default=None, type=int)
+    parser.add_argument("-skip_n_steps", "--skip_n_steps", default=None, type=int)
+    parser.add_argument("-title", "--title", default=None, type=str)
+    parser.add_argument("--output_dir", default=".", type=str)
+    parser.add_argument("--runs_dir", default="runs", type=str)
+    parser.add_argument(
+        "--neg",
+        action="store_true",
+        help="if true, plot negative of a list",
+    )
+    parser.add_argument(
+        "--phase_portrait",
+        action="store_true",
+        help="if True plots a phase portrait,\
+                        otherwise a curve (default)",
+    )
+    args = parser.parse_args()
+
+    # get event files from all available runs
+    # Tensorboard: * could be the date information, this intermediate directory
+    # always exist
+    # events* means all the event folders
+    # this  would combine plots from all subfolders in the runs directory (i.e., all graphs combined in each plot):
+    #event_files = glob.glob(f"{args.runs_dir}/*/events*")
+    # this  needs the user to specify a specific run (subfolder in the runs directory):
+    event_files = glob.glob(f"{args.runs_dir}/events*")
+    if not os.path.isdir(args.runs_dir): raise RuntimeError("runs_dir should be a directory.")
+    print(
+        "Using the following tensorboard event files:\n{}".format(
+            "\n".join(event_files)
+        )
+    )
+
+    # Different colors for the different runs
+    cmap = plt.get_cmap("tab10")  # Choose a colormap
+    colors = [cmap(i) for i in range(len(event_files))]
+
+    if args.phase_portrait:
+        phase_portrait_combined(
+            event_files,
+            colors,
+            plot1=args.plot1,
+            plot2=args.plot2,
+            legend1=args.legend1,
+            legend2=args.legend2,
+            plot_len=args.plot_len,
+            skip_n_steps=args.skip_n_steps,
+            output_dir=args.output_dir,
+        )
+    else:
+        if args.plot2:
+            # two curves per plot
+            two_curves_combined(
+                event_files,
+                colors,
+                plot1=args.plot1,
+                plot2=args.plot2,
+                legend1=args.legend1,
+                legend2=args.legend2,
+                output_dir=args.output_dir,
+                title=args.title,
+                neg=args.neg
+            )
+            two_curves_combined(
+                event_files,
+                colors,
+                plot1=args.plot1,
+                plot2=args.plot2,
+                legend1=args.legend1,
+                legend2=args.legend2,
+                output_dir=args.output_dir,
+                title=args.title,
+                neg=args.neg,
+                logscale=True
+            )
+
+        else:
+            # one curve per plot
+            plot_single_curve(
+                event_files,
+                colors,
+                plot1=args.plot1,
+                legend1=args.legend1,
+                output_dir=args.output_dir,
+            )
diff --git a/examples/benchmark/benchmark_pacs_resnet_grid_jigen.yaml b/examples/benchmark/benchmark_pacs_resnet_grid_jigen.yaml
new file mode 100644
index 000000000..28c6705e3
--- /dev/null
+++ b/examples/benchmark/benchmark_pacs_resnet_grid_jigen.yaml
@@ -0,0 +1,52 @@
+# test benchmark config.
+
+mode: grid
+
+test_domains:
+  - sketch
+
+output_dir: zoutput/benchmarks/pacs_benchmark_grid
+
+startseed: 0
+endseed: 1  # currently included
+
+
+domainlab_args:
+  tpath: examples/tasks/task_pacs_path_list.py
+  dmem: False
+  lr: 5e-5
+  epos: 500
+  es: 1
+  bs: 32
+  npath: examples/nets/resnet50domainbed.py
+  npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py
+  npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py
+  san_check: False
+
+
+Shared params:
+  gamma_reg:
+    min: 0.01
+    max: 10
+    step: 0.1
+    distribution: loguniform
+    num: 3
+
+
+jigen:  # name
+  model: jigen
+  shared:
+    - gamma_reg
+
+  hyperparameters:
+    # probability of permutating the tiles of an image, pperm = 0 -> pure classification
+    pperm:
+      min: 0.7
+      max: 1
+      step: 0.1
+      distribution: uniform
+      num: 3
+
+
+erm:
+  model: erm
diff --git a/examples/benchmark/mnist_dann_fbopt.yaml b/examples/benchmark/mnist_dann_fbopt.yaml
new file mode 100644
index 000000000..8bdbe444c
--- /dev/null
+++ b/examples/benchmark/mnist_dann_fbopt.yaml
@@ -0,0 +1,62 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/benchmark_fbopt
+
+sampling_seed: 0
+startseed: 0
+endseed: 2
+
+test_domains:
+  - 0
+
+
+domainlab_args:
+  task: mnistcolor10
+  tr_d: [1, 2]
+  dmem: False
+  lr: 0.0001
+  epos: 500
+  es: 100
+  bs: 64
+  nname: conv_bn_pool_2
+  san_check: False
+  exp_shoulder_clip: 10
+  mu_clip: 10_000
+  coeff_ma: 0.5
+  no_tensorboard: False
+
+
+
+Shared params:
+  ini_setpoint_ratio:
+    min: 0.9
+    max: 0.99
+    num: 3
+    step: 0.05
+    distribution: uniform
+
+  k_i_gain:
+    min: 0.0001
+    max: 0.01
+    num: 2
+    step: 0.0001
+    distribution: uniform
+
+  mu_init:
+    min: 0.000001
+    max: 0.00001
+    num: 2
+    distribution: uniform
+
+# Test fbopt with different hyperparameter configurations
+
+dann_fbopt:
+  model: dann
+  trainer: fbopt
+  ini_setpoint_ratio: 0.9
+  shared:
+    - k_i_gain
+    - mu_init
+
+erm:
+  model: erm
diff --git a/examples/benchmark/mnist_diva_fbopt_alone.yaml b/examples/benchmark/mnist_diva_fbopt_alone.yaml
new file mode 100644
index 000000000..c483b0e68
--- /dev/null
+++ b/examples/benchmark/mnist_diva_fbopt_alone.yaml
@@ -0,0 +1,92 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/mnist_diva_fbopt_alone
+
+sampling_seed: 0
+startseed: 0
+endseed: 10
+
+test_domains:
+  - 0
+
+
+domainlab_args:
+  task: mnistcolor10
+  tr_d: [1, 2]
+  dmem: False
+  lr: 0.001
+  epos: 500
+  epos_min: 20
+  es: 5
+  bs: 64
+  zx_dim: 0
+  zy_dim: 32
+  zd_dim: 32
+  nname: conv_bn_pool_2
+  nname_dom: conv_bn_pool_2
+  nname_encoder_x2topic_h: conv_bn_pool_2
+  nname_encoder_sandwich_x2h4zd: conv_bn_pool_2
+  san_check: False
+  coeff_ma: 0.5
+  no_tensorboard: False
+
+
+
+Shared params:
+  ini_setpoint_ratio:
+    min: 0.9
+    max: 0.99
+    num: 3
+    distribution: uniform
+
+  k_i_gain:
+    min: 1e-4
+    max: 1e-3
+    num: 2
+    distribution: loguniform
+
+  mu_init:
+    min: 0.000001
+    max: 0.00001
+    num: 3
+    distribution: loguniform
+
+  gamma_y:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+  gamma_d:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+  mu_clip:
+    distribution: categorical
+    datatype: float
+    values:
+      - 1000
+      - 100
+      - 10
+      - 1
+
+# Test fbopt with different hyperparameter configurations
+
+diva_fbopt_a:
+  model: diva
+  trainer: fbopt
+  str_diva_multiplier_type: gammad_recon
+  gamma_y: 1.0
+  init_setpoint_ratio: 0.99
+  exp_shoulder_clip: 1
+  mu_init: 1e-6
+  shared:
+    - k_i_gain
+    - mu_clip
+
+erm:
+  model: erm
diff --git a/examples/benchmark/mnist_diva_fbopt_and_baselines.yaml b/examples/benchmark/mnist_diva_fbopt_and_baselines.yaml
new file mode 100644
index 000000000..b687b69f4
--- /dev/null
+++ b/examples/benchmark/mnist_diva_fbopt_and_baselines.yaml
@@ -0,0 +1,122 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/mnist_diva_fbopt_and_baselines
+
+sampling_seed: 0
+startseed: 0
+endseed: 10
+
+test_domains:
+  - 0
+
+
+domainlab_args:
+  task: mnistcolor10
+  tr_d: [1, 2]
+  dmem: False
+  lr: 0.001
+  epos: 5000
+  epos_min: 500
+  es: 5
+  bs: 64
+  zx_dim: 0
+  zy_dim: 32
+  zd_dim: 32
+  nname: conv_bn_pool_2
+  nname_dom: conv_bn_pool_2
+  nname_encoder_x2topic_h: conv_bn_pool_2
+  nname_encoder_sandwich_x2h4zd: conv_bn_pool_2
+  san_check: False
+  coeff_ma: 0.5
+  no_tensorboard: False
+
+
+
+Shared params:
+  ini_setpoint_ratio:
+    min: 0.9
+    max: 0.99
+    num: 3
+    distribution: uniform
+
+  k_i_gain:
+    min: 1e-4
+    max: 1e-3
+    num: 2
+    distribution: loguniform
+
+  k_i_gain_ratio:
+    min: 0.1
+    max: 1
+    num: 5
+    distribution: uniform
+
+  mu_init:
+    min: 0.000001
+    max: 0.00001
+    num: 3
+    distribution: loguniform
+
+  gamma_y:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+  gamma_d:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+  mu_clip:
+    distribution: categorical
+    datatype: float
+    values:
+      - 1000
+      - 100
+      - 10
+      - 1
+
+# Test fbopt with different hyperparameter configurations
+
+diva_fbopt_a:
+  model: diva
+  trainer: fbopt
+  str_diva_multiplier_type: gammad_recon
+  gamma_y: 1.0
+  init_setpoint_ratio: 0.99
+  exp_shoulder_clip: 1
+  mu_init: 1e-6
+  shared:
+    - k_i_gain_ratio
+    - mu_clip
+
+diva_feedforward_a:
+  model: diva
+  trainer: hyperscheduler
+  str_diva_multiplier_type: gammad_recon
+  gamma_y: 1.0
+  shared:
+    - gamma_d
+
+diva_default:
+  model: diva
+  trainer: hyperscheduler
+  str_diva_multiplier_type: default
+  shared:
+    - gamma_d
+    - gamma_y
+
+diva_fixed_penalty:
+  model: diva
+  trainer: basic
+  str_diva_multiplier_type: default
+  shared:
+    - gamma_d
+    - gamma_y
+
+erm:
+  model: erm
diff --git a/examples/benchmark/mnist_jigen_fbopt_alone.yaml b/examples/benchmark/mnist_jigen_fbopt_alone.yaml
new file mode 100644
index 000000000..388bdcf13
--- /dev/null
+++ b/examples/benchmark/mnist_jigen_fbopt_alone.yaml
@@ -0,0 +1,79 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/benchmark_fbopt
+
+sampling_seed: 0
+startseed: 0
+endseed: 4
+
+test_domains:
+  - 0
+
+
+domainlab_args:
+  task: mnistcolor10
+  tr_d: [1, 2]
+  dmem: False
+  lr: 0.001
+  epos: 1000
+  es: 100
+  bs: 64
+  nname: conv_bn_pool_2
+  san_check: False
+  exp_shoulder_clip: 10
+  mu_clip: 10
+  coeff_ma: 0.5
+  no_tensorboard: False
+  pperm: 0.5
+
+
+
+Shared params:
+  coeff_ma_output_state:
+    distribution: categorical
+    datatype: float
+    values:
+      - 0.1
+      - 0.5
+  coeff_ma_setpoint:
+    distribution: uniform
+    min: 0.0
+    max: 0.9
+    num: 2
+  setpoint_rewind:
+    distribution: categorical
+    datatype: str
+    values:
+      - "yes"
+      - "no"
+  k_i_gain:
+    min: 0.0001
+    max: 0.01
+    num: 2
+    distribution: uniform
+
+  mu_init:
+    min: 0.000001
+    max: 0.00001
+    num: 2
+    distribution: loguniform
+
+  gamma_reg:
+    min: 0.01
+    max: 10_000
+    num: 10
+    distribution: loguniform
+
+
+
+# Test fbopt with different hyperparameter configurations
+
+jigen_feedback:
+  model: jigen
+  trainer: fbopt
+  ini_setpoint_ratio: 0.99
+  coeff_ma_output_state: 0.5
+  coeff_ma_setpoint: 0.5
+  shared:
+    - k_i_gain
+    - mu_init
diff --git a/examples/benchmark/mnist_jigen_fbopt_and_others.yaml b/examples/benchmark/mnist_jigen_fbopt_and_others.yaml
new file mode 100644
index 000000000..bd4857610
--- /dev/null
+++ b/examples/benchmark/mnist_jigen_fbopt_and_others.yaml
@@ -0,0 +1,80 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/mnist_fbopt_and_others
+
+sampling_seed: 0
+startseed: 0
+endseed: 4
+
+test_domains:
+  - 0
+
+
+domainlab_args:
+  task: mnistcolor10
+  tr_d: [1, 2]
+  dmem: False
+  lr: 0.001
+  epos: 2000
+  epos_min: 100
+  es: 1
+  bs: 64
+  nname: conv_bn_pool_2
+  san_check: False
+  no_tensorboard: False
+  pperm: 0.5
+
+
+
+Shared params:
+  k_i_gain:
+    min: 1e-4
+    max: 1e-3
+    num: 2
+    distribution: uniform
+
+  mu_init:
+    min: 0.000001
+    max: 0.00001
+    num: 3
+    distribution: loguniform
+
+  gamma_reg:
+    min: 0.01
+    max: 1e4
+    num: 3
+    distribution: loguniform
+
+  mu_clip:
+    min: 0.01
+    max: 1e4
+    num: 3
+    distribution: loguniform
+
+
+
+# Test fbopt with different hyperparameter configurations
+
+jigen_feedback:
+  model: jigen
+  trainer: fbopt
+  ini_setpoint_ratio: 0.99
+  mu_init: 1e-6
+  shared:
+    - k_i_gain
+    - mu_clip
+
+jigen_feedforward:
+  model: jigen
+  trainer: hyperscheduler
+  shared:
+    - gamma_reg
+
+jigen_fixed_penalty:
+  model: jigen
+  trainer: basic
+  shared:
+    - gamma_reg
+
+erm:
+  model: erm
diff --git a/examples/benchmark/pacs_dann_fbopt.yaml b/examples/benchmark/pacs_dann_fbopt.yaml
new file mode 100644
index 000000000..b5c743033
--- /dev/null
+++ b/examples/benchmark/pacs_dann_fbopt.yaml
@@ -0,0 +1,53 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/benchmark_fbopt_pacs
+
+sampling_seed: 0
+startseed: 0
+endseed: 2
+
+test_domains:
+  - sketch
+
+domainlab_args:
+  tpath: examples/tasks/task_pacs_path_list.py
+  dmem: False
+  lr: 5e-5
+  epos: 1
+  es: 5
+  bs: 64
+  san_check: True
+  npath: examples/nets/resnet50domainbed.py
+  npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py
+  npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py
+  exp_shoulder_clip: 10
+  mu_clip: 10_000
+  coeff_ma: 0.5
+  no_tensorboard: False
+
+
+
+Shared params:
+  ini_setpoint_ratio:
+    min: 0.5
+    max: 0.99
+    num: 2
+    step: 0.05
+    distribution: uniform
+
+  k_i_gain:
+    min: 0.0001
+    max: 0.01
+    num: 2
+    step: 0.0001
+    distribution: uniform
+
+# Test fbopt with different hyperparameter configurations
+
+dann_fbopt:
+  model: dann
+  trainer: fbopt
+  shared:
+    - ini_setpoint_ratio
+    - k_i_gain
+    - es
diff --git a/examples/benchmark/pacs_diva_fbopt_alone_es1_autoki.yaml b/examples/benchmark/pacs_diva_fbopt_alone_es1_autoki.yaml
new file mode 100644
index 000000000..35c93c236
--- /dev/null
+++ b/examples/benchmark/pacs_diva_fbopt_alone_es1_autoki.yaml
@@ -0,0 +1,106 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/pacs_diva_fbopt_alone_zx
+
+sampling_seed: 0
+
+startseed: 0
+endseed: 2
+
+test_domains:
+  - sketch
+
+domainlab_args:
+  tpath: examples/tasks/task_pacs_aug.py
+  dmem: False
+  lr: 5e-5
+  epos: 500
+  epos_min: 200
+  es: 5
+  bs: 32
+  san_check: False
+  npath: examples/nets/resnet50domainbed.py
+  npath_dom: examples/nets/resnet50domainbed.py
+  zx_dim: 16
+  zy_dim: 64
+  zd_dim: 64
+
+
+Shared params:
+  ini_setpoint_ratio:
+    min: 0.990
+    max: 0.999
+    num: 2
+    distribution: uniform
+
+  str_diva_multiplier_type:
+    distribution: categorical
+    datatype: str
+    values:
+      - gammad_recon
+      - gammad_recon_per_pixel
+
+  coeff_ma_output_state:
+    distribution: categorical
+    datatype: float
+    values:
+      - 0.1
+      - 0.5
+
+  mu_clip:
+    distribution: categorical
+    datatype: float
+    values:
+      - 10
+      - 1000
+      - 1
+      - 100
+
+  k_i_gain:
+    min: 0.0001
+    max: 0.01
+    num: 2
+    distribution: uniform
+
+  k_i_gain_ratio:
+    min: 0.1
+    max: 1
+    num: 3
+    distribution: uniform
+
+  mu_init:
+    min: 0.000001
+    max: 0.00001
+    step: 0.000001
+    num: 3
+    distribution: loguniform
+
+  gamma_y:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+  gamma_d:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+
+
+# Test fbopt with different hyperparameter configurations, no noeed to tune mu_clip since this is the job of KI gain when mu_init is small
+
+diva_fbopt_full:
+  model: diva
+  trainer: fbopt
+  force_setpoint_change_once: True
+  gamma_y: 1.0
+  ini_setpoint_ratio: 0.99
+  str_diva_multiplier_type: gammad_recon
+  mu_init: 1e-6
+  shared:
+    - k_i_gain_ratio
+    - mu_clip
diff --git a/examples/benchmark/pacs_diva_fbopt_alone_es1_autoki_1run.yaml b/examples/benchmark/pacs_diva_fbopt_alone_es1_autoki_1run.yaml
new file mode 100644
index 000000000..9ec43ac7b
--- /dev/null
+++ b/examples/benchmark/pacs_diva_fbopt_alone_es1_autoki_1run.yaml
@@ -0,0 +1,107 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/pacs_diva_fbopt_alone_single_run
+
+sampling_seed: 0
+
+startseed: 0
+endseed: 0
+
+test_domains:
+  - sketch
+
+domainlab_args:
+  tpath: examples/tasks/task_pacs_aug.py
+  dmem: False
+  lr: 5e-5
+  epos: 500
+  epos_min: 200
+  es: 5
+  bs: 32
+  san_check: False
+  npath: examples/nets/resnet50domainbed.py
+  npath_dom: examples/nets/resnet50domainbed.py
+  zx_dim: 16
+  zy_dim: 64
+  zd_dim: 64
+
+
+Shared params:
+  ini_setpoint_ratio:
+    min: 0.990
+    max: 0.999
+    num: 2
+    distribution: uniform
+
+  str_diva_multiplier_type:
+    distribution: categorical
+    datatype: str
+    values:
+      - gammad_recon
+      - gammad_recon_per_pixel
+
+  coeff_ma_output_state:
+    distribution: categorical
+    datatype: float
+    values:
+      - 0.1
+      - 0.5
+
+  mu_clip:
+    distribution: categorical
+    datatype: float
+    values:
+      - 10
+      - 1000
+      - 1
+      - 100
+
+  k_i_gain:
+    min: 0.0001
+    max: 0.01
+    num: 2
+    distribution: uniform
+
+  k_i_gain_ratio:
+    min: 0.1
+    max: 1
+    num: 3
+    distribution: uniform
+
+  mu_init:
+    min: 0.000001
+    max: 0.00001
+    step: 0.000001
+    num: 3
+    distribution: loguniform
+
+  gamma_y:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+  gamma_d:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+
+
+# Test fbopt with different hyperparameter configurations, no noeed to tune mu_clip since this is the job of KI gain when mu_init is small
+
+diva_fbopt_full:
+  model: diva
+  trainer: fbopt
+  force_setpoint_change_once: True
+  gamma_y: 1.0
+  ini_setpoint_ratio: 0.99
+  str_diva_multiplier_type: gammad_recon
+  mu_init: 1e-6
+  k_i_gain_ratio: 0.5
+  mu_clip: 10
+  coeff_ma_output_state: 0.0
+  coeff_ma_setpoint: 0.0
diff --git a/examples/benchmark/pacs_diva_fbopt_alone_es1_autoki_output_ma_9.yaml b/examples/benchmark/pacs_diva_fbopt_alone_es1_autoki_output_ma_9.yaml
new file mode 100644
index 000000000..36fd10554
--- /dev/null
+++ b/examples/benchmark/pacs_diva_fbopt_alone_es1_autoki_output_ma_9.yaml
@@ -0,0 +1,108 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/pacs_diva_fbopt_autoki_aug
+
+sampling_seed: 0
+
+startseed: 0
+endseed: 10
+
+test_domains:
+  - sketch
+
+domainlab_args:
+  tpath: examples/tasks/task_pacs_aug.py
+  dmem: False
+  lr: 5e-5
+  epos: 500
+  epos_min: 200
+  es: 1
+  bs: 64
+  san_check: False
+  npath: examples/nets/resnet50domainbed.py
+  npath_dom: examples/nets/resnet50domainbed.py
+  npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py
+  npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py
+  zx_dim: 0
+  zy_dim: 64
+  zd_dim: 64
+
+
+Shared params:
+  ini_setpoint_ratio:
+    min: 0.990
+    max: 0.999
+    num: 2
+    distribution: uniform
+
+  str_diva_multiplier_type:
+    distribution: categorical
+    datatype: str
+    values:
+      - gammad_recon
+      - gammad_recon_per_pixel
+
+  coeff_ma_output_state:
+    distribution: categorical
+    datatype: float
+    values:
+      - 0.1
+      - 0.5
+      - 0.8
+
+  mu_clip:
+    distribution: categorical
+    datatype: float
+    values:
+      - 10
+      - 1000
+      - 1
+      - 100
+
+  k_i_gain:
+    min: 0.0001
+    max: 0.01
+    num: 2
+    distribution: uniform
+
+  k_i_gain_ratio:
+    min: 0.1
+    max: 10
+    num: 10
+    distribution: loguniform
+
+  mu_init:
+    min: 0.000001
+    max: 0.00001
+    step: 0.000001
+    num: 3
+    distribution: loguniform
+
+  gamma_y:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+  gamma_d:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+
+
+# Test fbopt with different hyperparameter configurations, no noeed to tune mu_clip since this is the job of KI gain when mu_init is small
+
+diva_fbopt_full:
+  model: diva
+  trainer: fbopt
+  gamma_y: 1.0
+  ini_setpoint_ratio: 0.99
+  str_diva_multiplier_type: gammad_recon
+  mu_init: 1e-6
+  shared:
+    - k_i_gain_ratio
+    - coeff_ma_output_state
diff --git a/examples/benchmark/pacs_diva_fbopt_alone_es1_random_ki.yaml b/examples/benchmark/pacs_diva_fbopt_alone_es1_random_ki.yaml
new file mode 100644
index 000000000..24177c0bc
--- /dev/null
+++ b/examples/benchmark/pacs_diva_fbopt_alone_es1_random_ki.yaml
@@ -0,0 +1,102 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/pacs_diva_fbopt_alone
+
+sampling_seed: 0
+
+startseed: 0
+endseed: 2
+
+test_domains:
+  - sketch
+
+domainlab_args:
+  tpath: examples/tasks/task_pacs_aug.py
+  dmem: False
+  lr: 5e-5
+  epos: 200
+  epos_min: 20
+  es: 1
+  bs: 32
+  san_check: False
+  npath: examples/nets/resnet50domainbed.py
+  npath_dom: examples/nets/resnet50domainbed.py
+  npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py
+  npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py
+  zx_dim: 0
+  zy_dim: 64
+  zd_dim: 64
+
+
+Shared params:
+  ini_setpoint_ratio:
+    min: 0.990
+    max: 0.999
+    num: 2
+    distribution: uniform
+
+  str_diva_multiplier_type:
+    distribution: categorical
+    datatype: str
+    values:
+      - gammad_recon
+      - gammad_recon_per_pixel
+
+  coeff_ma_output_state:
+    distribution: categorical
+    datatype: float
+    values:
+      - 0.1
+      - 0.5
+
+  mu_clip:
+    distribution: categorical
+    datatype: float
+    values:
+      - 10
+      - 1000
+      - 1
+      - 100
+
+  k_i_gain:
+    min: 0.0001
+    max: 0.01
+    num: 2
+    distribution: uniform
+
+  mu_init:
+    min: 0.000001
+    max: 0.00001
+    step: 0.000001
+    num: 3
+    distribution: loguniform
+
+  gamma_y:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+  gamma_d:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+
+
+# Test fbopt with different hyperparameter configurations, no noeed to tune mu_clip since this is the job of KI gain when mu_init is small
+
+diva_fbopt_full:
+  model: diva
+  trainer: fbopt
+  gamma_y: 1.0
+  ini_setpoint_ratio: 0.99
+  str_diva_multiplier_type: gammad_recon
+  coeff_ma_output_state: 0.1
+  mu_init: 0.000001
+  shared:
+    - k_i_gain
+    - mu_clip
diff --git a/examples/benchmark/pacs_diva_fbopt_alone_fixed.yaml b/examples/benchmark/pacs_diva_fbopt_alone_fixed.yaml
new file mode 100644
index 000000000..e2a78230a
--- /dev/null
+++ b/examples/benchmark/pacs_diva_fbopt_alone_fixed.yaml
@@ -0,0 +1,97 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/pacs_diva_fbopt_alone
+
+sampling_seed: 0
+
+startseed: 0
+endseed: 5
+
+test_domains:
+  - sketch
+
+domainlab_args:
+  tpath: examples/tasks/task_pacs_path_list.py
+  dmem: False
+  lr: 5e-5
+  epos: 200
+  epos_min: 20
+  es: 5
+  bs: 64
+  san_check: False
+  npath: examples/nets/resnet50domainbed.py
+  npath_dom: examples/nets/resnet50domainbed.py
+  npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py
+  npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py
+  zx_dim: 0
+  zy_dim: 64
+  zd_dim: 64
+
+
+Shared params:
+  ini_setpoint_ratio:
+    min: 0.990
+    max: 0.999
+    num: 2
+    distribution: uniform
+
+  str_diva_multiplier_type:
+    distribution: categorical
+    datatype: str
+    values:
+      - gammad_recon
+      - gammad_recon_per_pixel
+
+  coeff_ma_output_state:
+    distribution: categorical
+    datatype: float
+    values:
+      - 0.1
+      - 0.5
+
+  mu_clip:
+    distribution: categorical
+    datatype: int
+    values:
+      - 10
+      - 1000
+      - 1000_000
+
+  k_i_gain:
+    min: 0.0001
+    max: 0.01
+    num: 3
+    distribution: uniform
+
+  mu_init:
+    min: 0.000001
+    max: 0.00001
+    step: 0.000001
+    num: 3
+    distribution: loguniform
+
+  gamma_y:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+  gamma_d:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+
+
+# Test fbopt with different hyperparameter configurations, no noeed to tune mu_clip since this is the job of KI gain when mu_init is small
+
+diva_fbopt_full:
+  model: diva
+  trainer: fbopt
+  exp_shoulder_clip: 5
+  gamma_y: 1.0
+  ini_setpoint_ratio: 0.99
+  mu_init: 0.000001
diff --git a/examples/benchmark/pacs_diva_fbopt_and_baselines.yaml b/examples/benchmark/pacs_diva_fbopt_and_baselines.yaml
new file mode 100644
index 000000000..7ea54939e
--- /dev/null
+++ b/examples/benchmark/pacs_diva_fbopt_and_baselines.yaml
@@ -0,0 +1,119 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/pacs_diva_fbopt_and_baselines_aug
+
+sampling_seed: 0
+
+startseed: 0
+endseed: 6
+
+test_domains:
+  - sketch
+
+domainlab_args:
+  tpath: examples/tasks/task_pacs_aug.py
+  dmem: False
+  lr: 5e-5
+  epos: 500
+  epos_min: 200
+  es: 5
+  bs: 32
+  san_check: False
+  npath: examples/nets/resnet50domainbed.py
+  npath_dom: examples/nets/resnet50domainbed.py
+  zx_dim: 0
+  zy_dim: 64
+  zd_dim: 64
+
+
+
+
+Shared params:
+  ini_setpoint_ratio:
+    min: 0.9
+    max: 0.99
+    num: 3
+    distribution: uniform
+
+  k_i_gain:
+    min: 0.0001
+    max: 0.01
+    num: 3
+    distribution: uniform
+
+  k_i_gain_ratio:
+    min: 0.1
+    max: 1
+    num: 3
+    distribution: uniform
+
+
+  mu_init:
+    min: 0.000001
+    max: 0.9
+    num: 3
+    distribution: loguniform
+
+  gamma_y:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+  gamma_d:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+  mu_clip:
+    distribution: categorical
+    datatype: float
+    values:
+      - 1
+      - 10
+      - 100
+      - 1000
+
+# Test fbopt with different hyperparameter configurations
+diva_fbopt_a:
+  model: diva
+  trainer: fbopt
+  gamma_y: 1.0
+  ini_setpoint_ratio: 0.99
+  force_setpoint_change_once: True
+  str_diva_multiplier_type: gammad_recon
+  coeff_ma_output_state: 0.1
+  mu_init: 1e-6
+  shared:
+    - k_i_gain_ratio
+    - mu_clip
+
+diva_feedforward_full:
+  model: diva
+  trainer: hyperscheduler
+  str_diva_multiplier_type: gammad_recon
+  gamma_y: 1.0
+  shared:
+    - gamma_d
+
+diva_default:
+  model: diva
+  trainer: hyperscheduler
+  str_diva_multiplier_type: default
+  shared:
+    - gamma_d
+    - gamma_y
+
+diva_fixed_penalty:
+  model: diva
+  trainer: basic
+  str_diva_multiplier_type: default
+  shared:
+    - gamma_d
+    - gamma_y
+
+erm:
+  model: erm
diff --git a/examples/benchmark/pacs_diva_others.yaml b/examples/benchmark/pacs_diva_others.yaml
new file mode 100644
index 000000000..55d6a7f37
--- /dev/null
+++ b/examples/benchmark/pacs_diva_others.yaml
@@ -0,0 +1,68 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/pacs_diva_others
+
+sampling_seed: 0
+
+startseed: 0
+endseed: 6
+test_domains: - sketch
+
+domainlab_args:
+  tpath: examples/tasks/task_pacs_aug.py
+  dmem: False
+  lr: 5e-5
+  epos: 500
+  epos_min: 200
+  es: 5
+  bs: 32
+  san_check: False
+  npath: examples/nets/resnet50domainbed.py
+  npath_dom: examples/nets/resnet50domainbed.py
+  zx_dim: 0
+  zy_dim: 64
+  zd_dim: 64
+
+
+
+
+Shared params:
+
+  gamma_y:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+  gamma_d:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+
+diva_feedforward_full:
+  model: diva
+  trainer: hyperscheduler
+  str_diva_multiplier_type: gammad_recon
+  shared:
+    - gamma_d
+    - gamma_y
+
+diva_default:
+  model: diva
+  trainer: hyperscheduler
+  str_diva_multiplier_type: default
+  shared:
+    - gamma_d
+    - gamma_y
+
+diva_fixed_penalty:
+  model: diva
+  trainer: basic
+  str_diva_multiplier_type: default
+  shared:
+    - gamma_d
+    - gamma_y
diff --git a/examples/benchmark/pacs_fbopt_dial_diva.yaml b/examples/benchmark/pacs_fbopt_dial_diva.yaml
new file mode 100644
index 000000000..ca2cf3921
--- /dev/null
+++ b/examples/benchmark/pacs_fbopt_dial_diva.yaml
@@ -0,0 +1,93 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/benchmark_fbopt_dial_diva_pacs
+
+sampling_seed: 0
+startseed: 0
+endseed: 2
+
+test_domains:
+  - sketch
+
+domainlab_args:
+  tpath: examples/tasks/task_pacs_aug.py
+  dmem: False
+  lr: 5e-5
+  epos: 500
+  epos_min: 200
+  es: 5
+  bs: 16
+  san_check: False
+  npath: examples/nets/resnet50domainbed.py
+  npath_dom: examples/nets/resnet50domainbed.py
+  zx_dim: 0
+  zy_dim: 64
+  zd_dim: 64
+
+
+
+
+Shared params:
+  ini_setpoint_ratio:
+    min: 0.9
+    max: 0.99
+    num: 2
+    step: 0.05
+    distribution: uniform
+
+  k_i_gain_ratio:
+    min: 0.01
+    max: 0.90
+    num: 3
+    step: 0.0001
+    distribution: uniform
+  
+  dial_lr: 
+    min: 1e-5
+    max: 1e-3
+    num: 2
+    step: 1e-5
+    distribution: uniform
+
+  dial_epsilon: 
+    min: 1e-5
+    max: 1e-3
+    num: 2
+    step: 1e-5
+    distribution: uniform
+
+  gamma_y:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+  gamma_d:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+
+# Test fbopt with different hyperparameter configurations
+
+dial_fbopt:
+  model: diva
+  trainer: fbopt_dial
+  gamma_y: 1.0
+  shared:
+    - ini_setpoint_ratio
+    - k_i_gain_ratio
+    - dial_lr 
+    - dial_epsilon
+
+# dial:
+# model: diva
+# trainer: dial
+# shared:
+# - dial_lr 
+# - dial_epsilon
+# - gamma_y
+# - gamma_d
diff --git a/examples/benchmark/pacs_fbopt_fishr_erm.yaml b/examples/benchmark/pacs_fbopt_fishr_erm.yaml
new file mode 100644
index 000000000..781a2518e
--- /dev/null
+++ b/examples/benchmark/pacs_fbopt_fishr_erm.yaml
@@ -0,0 +1,66 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/benchmark_fbopt_fishr_erm_pacs
+
+sampling_seed: 0
+startseed: 0
+endseed: 0
+
+test_domains:
+  - sketch
+
+domainlab_args:
+  tpath: examples/tasks/task_pacs_aug.py
+  dmem: False
+  lr: 5e-5
+  epos: 10
+  epos_min: 2
+  es: 5
+  bs: 32
+  san_check: False
+  nname: alexnet
+  nname_dom: alexnet
+  zx_dim: 0
+  zy_dim: 64
+  zd_dim: 64
+
+
+
+
+Shared params:
+  ini_setpoint_ratio:
+    min: 0.5
+    max: 0.99
+    num: 2
+    step: 0.05
+    distribution: uniform
+
+  k_i_gain:
+    min: 0.0001
+    max: 0.01
+    num: 2
+    step: 0.0001
+    distribution: uniform
+
+  gamma_reg:
+    min: 0.01
+    max: 1e4
+    num: 3
+    distribution: loguniform
+
+
+# Test fbopt with different hyperparameter configurations
+
+fbopt_fishr_erm:
+  model: erm
+  trainer: fbopt_fishr
+  shared:
+    - ini_setpoint_ratio
+    - k_i_gain
+    - gamma_reg
+
+fishr_erm:
+  model: erm
+  trainer: fishr
+  shared:
+    - gamma_reg
diff --git a/examples/benchmark/pacs_hduva_baselines.yaml b/examples/benchmark/pacs_hduva_baselines.yaml
new file mode 100644
index 000000000..cbdb704eb
--- /dev/null
+++ b/examples/benchmark/pacs_hduva_baselines.yaml
@@ -0,0 +1,111 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/pacs_hduva_fbopt_and_baselines
+
+sampling_seed: 0
+
+startseed: 0
+endseed: 10
+
+test_domains:
+  - sketch
+
+domainlab_args:
+  tpath: examples/tasks/task_pacs_aug.py
+  dmem: False
+  lr: 5e-5
+  epos: 500
+  epos_min: 200
+  es: 1
+  bs: 16
+  san_check: False
+  npath: examples/nets/resnet50domainbed.py
+  npath_dom: examples/nets/resnet50domainbed.py
+  npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py
+  npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py
+  zx_dim: 0
+  zy_dim: 64
+  zd_dim: 64
+
+
+Shared params:
+  ini_setpoint_ratio:
+    min: 0.990
+    max: 0.999
+    num: 2
+    distribution: uniform
+
+  str_diva_multiplier_type:
+    distribution: categorical
+    datatype: str
+    values:
+      - gammad_recon
+      - gammad_recon_per_pixel
+
+  coeff_ma_output_state:
+    distribution: categorical
+    datatype: float
+    values:
+      - 0.1
+      - 0.5
+      - 0.8
+
+  mu_clip:
+    distribution: categorical
+    datatype: float
+    values:
+      - 10
+      - 1000
+      - 1
+      - 100
+
+  k_i_gain:
+    min: 0.0001
+    max: 0.01
+    num: 2
+    distribution: uniform
+
+  k_i_gain_ratio:
+    min: 0.1
+    max: 10
+    num: 10
+    distribution: loguniform
+
+  mu_init:
+    min: 0.000001
+    max: 0.00001
+    step: 0.000001
+    num: 3
+    distribution: loguniform
+
+  gamma_y:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+  gamma_d:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+
+
+# Test fbopt with different hyperparameter configurations, no noeed to tune mu_clip since this is the job of KI gain when mu_init is small
+
+hduva_beta_warmup:
+  model: hduva
+  shared:
+    - gamma_y
+
+hduva_fbopt_full:
+  model: hduva
+  trainer: fbopt
+  gamma_y: 1.0
+  ini_setpoint_ratio: 0.99
+  mu_init: 1e-6
+  shared:
+    - k_i_gain_ratio
diff --git a/examples/benchmark/pacs_hduva_fbopt_alone_es1_autoki_aug.yaml b/examples/benchmark/pacs_hduva_fbopt_alone_es1_autoki_aug.yaml
new file mode 100644
index 000000000..d773cb25b
--- /dev/null
+++ b/examples/benchmark/pacs_hduva_fbopt_alone_es1_autoki_aug.yaml
@@ -0,0 +1,107 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/pacs_hduva_fbopt_alone_aug
+
+sampling_seed: 0
+
+startseed: 0
+endseed: 10
+
+test_domains:
+  - sketch
+
+domainlab_args:
+  tpath: examples/tasks/task_pacs_aug.py
+  dmem: False
+  lr: 5e-5
+  epos: 500
+  epos_min: 100
+  es: 10
+  bs: 16
+  san_check: False
+  npath: examples/nets/resnet50domainbed.py
+  npath_dom: examples/nets/resnet50domainbed.py
+  npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py
+  npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py
+  zx_dim: 0
+  zy_dim: 64
+  zd_dim: 64
+
+
+Shared params:
+  ini_setpoint_ratio:
+    min: 0.990
+    max: 0.999
+    num: 2
+    distribution: uniform
+
+  str_diva_multiplier_type:
+    distribution: categorical
+    datatype: str
+    values:
+      - gammad_recon
+      - gammad_recon_per_pixel
+
+  coeff_ma_output_state:
+    distribution: categorical
+    datatype: float
+    values:
+      - 0.1
+      - 0.5
+      - 0.8
+
+  mu_clip:
+    distribution: categorical
+    datatype: float
+    values:
+      - 10
+      - 1000
+      - 1
+      - 100
+
+  k_i_gain:
+    min: 0.0001
+    max: 0.01
+    num: 2
+    distribution: uniform
+
+  k_i_gain_ratio:
+    min: 0.1
+    max: 10
+    num: 10
+    distribution: loguniform
+
+  mu_init:
+    min: 0.000001
+    max: 0.00001
+    step: 0.000001
+    num: 3
+    distribution: loguniform
+
+  gamma_y:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+  gamma_d:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+
+
+# Test fbopt with different hyperparameter configurations, no noeed to tune mu_clip since this is the job of KI gain when mu_init is small
+
+diva_fbopt_full:
+  model: hduva
+  trainer: fbopt
+  gamma_y: 1.0
+  ini_setpoint_ratio: 0.99
+  mu_init: 1e-6
+  mu_clip: 10
+  shared:
+    - k_i_gain_ratio
diff --git a/examples/benchmark/pacs_hduva_matchdg.yaml b/examples/benchmark/pacs_hduva_matchdg.yaml
new file mode 100644
index 000000000..f8c99d6d3
--- /dev/null
+++ b/examples/benchmark/pacs_hduva_matchdg.yaml
@@ -0,0 +1,112 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/pacs_hduva_fbopt_alone_aug
+
+sampling_seed: 0
+
+startseed: 0
+endseed: 10
+
+test_domains:
+  - sketch
+
+domainlab_args:
+  tpath: examples/tasks/task_pacs_aug.py
+  dmem: False
+  lr: 5e-5
+  epos: 500
+  epos_min: 100
+  es: 10
+  bs: 16
+  san_check: False
+  npath: examples/nets/resnet50domainbed.py
+  npath_dom: examples/nets/resnet50domainbed.py
+  npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py
+  npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py
+  zx_dim: 0
+  zy_dim: 64
+  zd_dim: 64
+
+
+Shared params:
+  ini_setpoint_ratio:
+    min: 0.990
+    max: 0.999
+    num: 2
+    distribution: uniform
+
+  str_diva_multiplier_type:
+    distribution: categorical
+    datatype: str
+    values:
+      - gammad_recon
+      - gammad_recon_per_pixel
+
+  coeff_ma_output_state:
+    distribution: categorical
+    datatype: float
+    values:
+      - 0.1
+      - 0.5
+      - 0.8
+
+  mu_clip:
+    distribution: categorical
+    datatype: float
+    values:
+      - 10
+      - 1000
+      - 1
+      - 100
+
+  k_i_gain:
+    min: 0.0001
+    max: 0.01
+    num: 2
+    distribution: uniform
+
+  k_i_gain_ratio:
+    min: 0.1
+    max: 10
+    num: 10
+    distribution: loguniform
+
+  mu_init:
+    min: 0.000001
+    max: 0.00001
+    step: 0.000001
+    num: 3
+    distribution: loguniform
+
+  gamma_y:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+  gamma_d:
+    min: 1.0
+    max: 1e6
+    step: 100
+    num: 3
+    distribution: loguniform
+
+  gamma_reg:
+    min: 0.01
+    max: 10
+    distribution: loguniform
+    num: 3
+
+
+
+
+
+# Test fbopt with different hyperparameter configurations, no noeed to tune mu_clip since this is the job of KI gain when mu_init is small
+
+match_duva:
+  model: matchhduva
+  epochs_ctr: 10
+  shared:
+    - gamma_y
+    - gamma_reg
diff --git a/examples/benchmark/pacs_jigen_baslines4fbopt.yaml b/examples/benchmark/pacs_jigen_baslines4fbopt.yaml
new file mode 100644
index 000000000..8c4d99d3d
--- /dev/null
+++ b/examples/benchmark/pacs_jigen_baslines4fbopt.yaml
@@ -0,0 +1,74 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/pacs_jigen_fbopt_baselines
+
+sampling_seed: 0
+
+startseed: 0
+endseed: 4
+
+test_domains:
+  - sketch
+
+domainlab_args:
+  tpath: examples/tasks/task_pacs_aug_noflip.py
+  dmem: False
+  lr: 5e-5
+  epos: 500
+  epos_min: 100
+  es: 1
+  bs: 64
+  san_check: True
+  npath: examples/nets/resnet50domainbed.py
+  npath_dom: examples/nets/resnet50domainbed.py
+  npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py
+  npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py
+  zx_dim: 0
+  zy_dim: 64
+  zd_dim: 64
+
+
+Shared params:
+  k_i_gain:
+    min: 0.0001
+    max: 0.01
+    num: 3
+    distribution: loguniform
+
+  mu_init:
+    min: 0.000001
+    max: 0.00005
+    num: 3
+    distribution: loguniform
+
+  pperm:
+    min: 0.1
+    max: 0.9
+    num: 3
+    distribution: uniform
+
+  gamma_reg:
+    min: 0.01
+    max: 10
+    num: 5
+    distribution: loguniform
+
+# Test fbopt with different hyperparameter configurations
+
+
+jigen_feedforward:
+  model: jigen
+  trainer: hyperscheduler
+  shared:
+    - gamma_reg
+    - pperm
+
+jigen_fixed_penalty:
+  model: jigen
+  trainer: basic
+  shared:
+    - gamma_reg
+    - pperm
+
+erm:
+  model: erm
diff --git a/examples/benchmark/pacs_jigen_fbopt_alone.yaml b/examples/benchmark/pacs_jigen_fbopt_alone.yaml
new file mode 100644
index 000000000..3107894ed
--- /dev/null
+++ b/examples/benchmark/pacs_jigen_fbopt_alone.yaml
@@ -0,0 +1,85 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/pacs_jigen_fbopt_alone
+
+sampling_seed: 0
+
+startseed: 0
+endseed: 3
+
+test_domains:
+  - sketch
+
+domainlab_args:
+  tpath: examples/tasks/task_pacs_aug_noflip.py
+  dmem: False
+  lr: 5e-5
+  epos: 500
+  epos_min: 200
+  es: 1
+  bs: 64
+  san_check: True
+  npath: examples/nets/resnet50domainbed.py
+  npath_dom: examples/nets/resnet50domainbed.py
+  npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py
+  npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py
+  zx_dim: 0
+  zy_dim: 64
+  zd_dim: 64
+
+
+Shared params:
+  k_i_gain:
+    min: 0.0001
+    max: 0.01
+    num: 3
+    distribution: loguniform
+
+  k_i_gain_ratio:
+    min: 0.1
+    max: 10
+    num: 3
+    distribution: loguniform
+
+
+
+  mu_init:
+    min: 0.000001
+    max: 0.00005
+    num: 3
+    distribution: loguniform
+
+  pperm:
+    min: 0.1
+    max: 0.7
+    num: 3
+    distribution: uniform
+
+  gamma_reg:
+    min: 0.01
+    max: 10_000
+    num: 10
+    distribution: loguniform
+
+  coeff_ma_output_state:
+    distribution: categorical
+    datatype: float
+    values:
+      - 0.1
+      - 0.5
+      - 0.9
+
+# Test fbopt with different hyperparameter configurations
+
+jigen_feedback:
+  model: jigen
+  trainer: fbopt
+  ini_setpoint_ratio: 0.99
+  coeff_ma: 0.5
+  mu_init: 1e-6
+  shared:
+    - k_i_gain_ratio
+    - pperm
+
+erm:
+  model: erm
diff --git a/examples/benchmark/pacs_jigen_fbopt_alone_autoki.yaml b/examples/benchmark/pacs_jigen_fbopt_alone_autoki.yaml
new file mode 100644
index 000000000..3c70d07b6
--- /dev/null
+++ b/examples/benchmark/pacs_jigen_fbopt_alone_autoki.yaml
@@ -0,0 +1,92 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/pacs_jigen_fbopt_alone
+
+sampling_seed: 0
+
+startseed: 0
+endseed: 3
+
+test_domains:
+  - sketch
+
+domainlab_args:
+  tpath: examples/tasks/task_pacs_path_list.py
+  dmem: False
+  lr: 5e-5
+  epos: 500
+  epos_min: 50
+  force_setpoint_change_once: True
+  es: 1
+  bs: 64
+  san_check: False
+  npath: examples/nets/resnet50domainbed.py
+  npath_dom: examples/nets/resnet50domainbed.py
+  npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py
+  npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py
+  zx_dim: 0
+  zy_dim: 64
+  zd_dim: 64
+  pperm: 0.1
+  # pperm correspond to 1-bias_wholeimage in https://github.com/fmcarlucci/JigenDG
+
+
+Shared params:
+  k_i_gain:
+    min: 0.0001
+    max: 0.01
+    num: 3
+    distribution: loguniform
+
+  k_i_gain_ratio:
+    min: 0.1
+    max: 1
+    num: 5
+    distribution: uniform
+
+  mu_init:
+    min: 0.000001
+    max: 0.00005
+    num: 3
+    distribution: loguniform
+
+  pperm:
+    min: 0.1
+    max: 0.9
+    num: 3
+    distribution: uniform
+
+  gamma_reg:
+    min: 0.01
+    max: 10_000
+    num: 10
+    distribution: loguniform
+
+  coeff_ma_setpoint:
+    distribution: categorical
+    datatype: float
+    values:
+      - 0.0
+      - 0.5
+      - 0.9
+
+  coeff_ma_output_state:
+    distribution: categorical
+    datatype: float
+    values:
+      - 0.0
+      - 0.5
+      - 0.9
+
+# Test fbopt with different hyperparameter configurations
+
+jigen_feedback:
+  model: jigen
+  trainer: fbopt
+  ini_setpoint_ratio: 0.99
+  mu_init: 1e-6
+
+  shared:
+    - k_i_gain_ratio
+    - coeff_ma_output_state
+    - coeff_ma_setpoint
diff --git a/examples/benchmark/pacs_jigen_fbopt_and_baselines.yaml b/examples/benchmark/pacs_jigen_fbopt_and_baselines.yaml
new file mode 100644
index 000000000..1421913b3
--- /dev/null
+++ b/examples/benchmark/pacs_jigen_fbopt_and_baselines.yaml
@@ -0,0 +1,90 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/benchmark_fbopt_pacs_full
+
+sampling_seed: 0
+
+startseed: 0
+endseed: 5
+
+test_domains:
+  - sketch
+
+domainlab_args:
+  tpath: examples/tasks/task_pacs_path_list.py
+  dmem: False
+  lr: 5e-5
+  epos: 200
+  epos_min: 20
+  es: 1
+  bs: 64
+  san_check: False
+  npath: examples/nets/resnet50domainbed.py
+  npath_dom: examples/nets/resnet50domainbed.py
+  npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py
+  npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py
+  zx_dim: 0
+  zy_dim: 64
+  zd_dim: 64
+  pperm: 0.5
+
+
+Shared params:
+  k_i_gain:
+    min: 0.0001
+    max: 0.01
+    num: 2
+    distribution: loguniform
+
+  mu_init:
+    min: 0.000001
+    max: 0.00005
+    num: 2
+    distribution: loguniform
+
+  pperm:
+    min: 0.1
+    max: 0.9
+    num: 3
+    distribution: uniform
+
+  gamma_reg:
+    min: 0.01
+    max: 10_000
+    num: 4
+    distribution: loguniform
+
+  mu_clip:
+    distribution: categorical
+    datatype: float
+    values:
+      - 1
+      - 10
+      - 100
+      - 1000
+
+# Test fbopt with different hyperparameter configurations
+
+jigen_feedback:
+  model: jigen
+  trainer: fbopt
+  ini_setpoint_ratio: 0.99
+  mu_init: 0.000001
+  shared:
+    - k_i_gain
+    - mu_clip
+
+jigen_feedforward:
+  model: jigen
+  trainer: hyperscheduler
+  shared:
+    - gamma_reg
+
+jigen_fixed_penalty:
+  model: jigen
+  trainer: basic
+  shared:
+    - gamma_reg
+
+erm:
+  model: erm
diff --git a/examples/benchmark/pacs_jigen_fbopt_and_baselines_aug.yaml b/examples/benchmark/pacs_jigen_fbopt_and_baselines_aug.yaml
new file mode 100644
index 000000000..3b0f8dba6
--- /dev/null
+++ b/examples/benchmark/pacs_jigen_fbopt_and_baselines_aug.yaml
@@ -0,0 +1,105 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/pacs_aug_jigen
+
+sampling_seed: 0
+
+startseed: 0
+endseed: 3
+
+test_domains:
+  - sketch
+
+domainlab_args:
+  tpath: examples/tasks/task_pacs_aug_noflip.py
+  dmem: False
+  epos: 500
+  epos_min: 200
+  es: 1
+  bs: 64
+  san_check: False
+  npath: examples/nets/resnet50domainbed.py
+  npath_dom: examples/nets/resnet50domainbed.py
+  npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py
+  npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py
+  zx_dim: 0
+  zy_dim: 64
+  zd_dim: 64
+  pperm: 0.1
+
+
+Shared params:
+  lr:
+    distribution: categorical
+    values:
+      - 5e-5
+      - 1e-3
+
+  k_i_gain:
+    min: 0.0001
+    max: 0.01
+    num: 2
+    distribution: loguniform
+
+  k_i_gain_ratio:
+    min: 0.1
+    max: 1
+    num: 4
+    distribution: uniform
+
+  mu_init:
+    min: 0.000001
+    max: 0.00005
+    num: 2
+    distribution: loguniform
+
+  pperm:
+    min: 0.1
+    max: 0.9
+    num: 3
+    distribution: uniform
+
+  gamma_reg:
+    min: 0.01
+    max: 10
+    num: 4
+    distribution: loguniform
+
+  mu_clip:
+    distribution: categorical
+    datatype: float
+    values:
+      - 0.01
+      - 0.1
+      - 1.0
+      - 10
+
+# Test fbopt with different hyperparameter configurations
+
+jigen_feedback:
+  model: jigen
+  trainer: fbopt
+  ini_setpoint_ratio: 0.99
+  mu_init: 1e-6
+  force_setpoint_change_once: True
+  shared:
+    - k_i_gain_ratio
+    - mu_clip
+    - lr
+
+jigen_feedforward:
+  model: jigen
+  trainer: hyperscheduler
+  shared:
+    - gamma_reg
+    - lr
+
+jigen_fixed_penalty:
+  model: jigen
+  trainer: basic
+  shared:
+    - gamma_reg
+    - lr
+
+erm:
+  model: erm
diff --git a/examples/benchmark/test_benchmark_fbopt.yaml b/examples/benchmark/test_benchmark_fbopt.yaml
new file mode 100644
index 000000000..87ce24a8d
--- /dev/null
+++ b/examples/benchmark/test_benchmark_fbopt.yaml
@@ -0,0 +1,70 @@
+mode: grid
+
+output_dir: zoutput/benchmarks/benchmark_fbopt
+
+num_param_samples: 8
+sampling_seed: 0
+startseed: 0
+endseed: 2
+
+test_domains:
+  - 3
+  - 0
+
+
+domainlab_args:
+  task: mnistcolor10
+  tr_d: [1, 2]
+  dmem: False
+  lr: 0.001
+  epos: 3
+  es: 5
+  bs: 64
+  nname: conv_bn_pool_2
+  san_check: True
+
+
+Shared params:
+  ini_setpoint_ratio:
+    min: 0.5
+    max: 0.99
+    num: 2
+    step: 0.001
+    distribution: uniform
+
+  k_i_gain:
+    min: 0.0001
+    max: 0.01
+    num: 2
+    step: 0.0001
+    distribution: uniform
+
+  exp_shoulder_clip:
+    min: 5
+    max: 10
+    num: 2
+    step: 1
+    distribution: uniform
+
+  mu_clip:
+    min: 0.001
+    max: 1e4
+    num: 2
+    step: 10
+    distribution: loguniform
+
+  coeff_ma:
+    min: 0.001
+    max: 0.99
+    num: 2
+    step: 0.001
+    distribution: uniform
+
+# Test fbopt with different hyperparameter configurations
+
+jigen_fbopt:
+  model: jigen
+  trainer: fbopt
+
+  shared:
+    - ini_setpoint_raio
diff --git a/fbopt_mnist_diva_pixel.sh b/fbopt_mnist_diva_pixel.sh
new file mode 100644
index 000000000..bac129db9
--- /dev/null
+++ b/fbopt_mnist_diva_pixel.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+# export CUDA_VISIBLE_DEVICES=""
+# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error
+# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring
+# pytest -s tests/test_fbopt.py
+
+python main_out.py --te_d=1 --tr_d 0 3 --task=mnistcolor10 --bs=16 --model=diva --trainer=fbopt --nname=conv_bn_pool_2 --epos=2000 --es=2000 --mu_init=0.00001 --gamma_y=1.0 --mu_clip=10 --str_diva_multiplier_type=gammad_recon_per_pixel
diff --git a/requirements_notorch.txt b/requirements_notorch.txt
new file mode 100644
index 000000000..5aec65167
--- /dev/null
+++ b/requirements_notorch.txt
@@ -0,0 +1,79 @@
+appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0"
+attrs==23.1.0 ; python_version >= "3.9" and python_version < "4.0"
+beautifulsoup4==4.12.2 ; python_version >= "3.9" and python_version < "4.0"
+certifi==2023.7.22 ; python_version >= "3.9" and python_version < "4.0"
+charset-normalizer==3.2.0 ; python_version >= "3.9" and python_version < "4.0"
+colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0" and platform_system == "Windows"
+configargparse==1.7 ; python_version >= "3.9" and python_version < "4.0"
+connection-pool==0.0.3 ; python_version >= "3.9" and python_version < "4.0"
+contourpy==1.1.0 ; python_version >= "3.9" and python_version < "4.0"
+cycler==0.11.0 ; python_version >= "3.9" and python_version < "4.0"
+datrie==0.8.2 ; python_version >= "3.9" and python_version < "4.0"
+docutils==0.20.1 ; python_version >= "3.9" and python_version < "4.0"
+dpath==2.1.6 ; python_version >= "3.9" and python_version < "4.0"
+fastjsonschema==2.18.0 ; python_version >= "3.9" and python_version < "4.0"
+filelock==3.12.2 ; python_version >= "3.9" and python_version < "4.0"
+fonttools==4.42.0 ; python_version >= "3.9" and python_version < "4.0"
+gdown==4.7.1 ; python_version >= "3.9" and python_version < "4.0"
+gitdb==4.0.10 ; python_version >= "3.9" and python_version < "4.0"
+gitpython==3.1.32 ; python_version >= "3.9" and python_version < "4.0"
+humanfriendly==10.0 ; python_version >= "3.9" and python_version < "4.0"
+idna==3.4 ; python_version >= "3.9" and python_version < "4.0"
+importlib-resources==6.0.1 ; python_version >= "3.9" and python_version < "3.10"
+jinja2==3.1.2 ; python_version >= "3.9" and python_version < "4.0"
+joblib==1.3.2 ; python_version >= "3.9" and python_version < "4.0"
+jsonschema-specifications==2023.7.1 ; python_version >= "3.9" and python_version < "4.0"
+jsonschema==4.19.0 ; python_version >= "3.9" and python_version < "4.0"
+jupyter-core==5.3.1 ; python_version >= "3.9" and python_version < "4.0"
+kiwisolver==1.4.4 ; python_version >= "3.9" and python_version < "4.0"
+markdown-it-py==3.0.0 ; python_version >= "3.9" and python_version < "4.0"
+markupsafe==2.1.3 ; python_version >= "3.9" and python_version < "4.0"
+matplotlib==3.7.2 ; python_version >= "3.9" and python_version < "4.0"
+mdurl==0.1.2 ; python_version >= "3.9" and python_version < "4.0"
+nbformat==5.9.2 ; python_version >= "3.9" and python_version < "4.0"
+numpy==1.25.2 ; python_version < "4.0" and python_version >= "3.9"
+packaging==23.1 ; python_version >= "3.9" and python_version < "4.0"
+pandas==1.5.3 ; python_version >= "3.9" and python_version < "4.0"
+pillow==9.5.0 ; python_version >= "3.9" and python_version < "4.0"
+plac==1.3.5 ; python_version >= "3.9" and python_version < "4.0"
+platformdirs==3.10.0 ; python_version >= "3.9" and python_version < "4.0"
+psutil==5.9.5 ; python_version >= "3.9" and python_version < "4.0"
+pulp==2.7.0 ; python_version >= "3.9" and python_version < "4.0"
+pygments==2.16.1 ; python_version >= "3.9" and python_version < "4.0"
+pyparsing==3.0.9 ; python_version >= "3.9" and python_version < "4.0"
+pyreadline3==3.4.1 ; sys_platform == "win32" and python_version >= "3.9" and python_version < "4.0"
+pysocks==1.7.1 ; python_version >= "3.9" and python_version < "4.0"
+python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0"
+pytz==2023.3 ; python_version >= "3.9" and python_version < "4.0"
+pywin32==306 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.9" and python_version < "4.0"
+pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "4.0"
+referencing==0.30.2 ; python_version >= "3.9" and python_version < "4.0"
+requests==2.31.0 ; python_version >= "3.9" and python_version < "4.0"
+requests[socks]==2.31.0 ; python_version >= "3.9" and python_version < "4.0"
+reretry==0.11.8 ; python_version >= "3.9" and python_version < "4.0"
+rich==13.5.2 ; python_version >= "3.9" and python_version < "4.0"
+rpds-py==0.9.2 ; python_version >= "3.9" and python_version < "4.0"
+scikit-learn==1.3.0 ; python_version >= "3.9" and python_version < "4.0"
+scipy==1.9.3 ; python_version >= "3.9" and python_version < "4.0"
+seaborn==0.12.2 ; python_version >= "3.9" and python_version < "4.0"
+setuptools-scm==7.1.0 ; python_version >= "3.9" and python_version < "4.0"
+setuptools==68.0.0 ; python_version >= "3.9" and python_version < "4.0"
+six==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
+smart-open==6.3.0 ; python_version >= "3.9" and python_version < "4.0"
+smmap==5.0.0 ; python_version >= "3.9" and python_version < "4.0"
+snakemake==7.32.4 ; python_version >= "3.9" and python_version < "4.0"
+soupsieve==2.4.1 ; python_version >= "3.9" and python_version < "4.0"
+stopit==1.1.2 ; python_version >= "3.9" and python_version < "4.0"
+tabulate==0.9.0 ; python_version >= "3.9" and python_version < "4.0"
+threadpoolctl==3.2.0 ; python_version >= "3.9" and python_version < "4.0"
+throttler==1.2.2 ; python_version >= "3.9" and python_version < "4.0"
+tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11"
+toposort==1.10 ; python_version >= "3.9" and python_version < "4.0"
+tqdm==4.66.1 ; python_version >= "3.9" and python_version < "4.0"
+tensorboard==2.14.0 ; python_version >= "3.9" and python_version < "4.0"
+traitlets==5.9.0 ; python_version >= "3.9" and python_version < "4.0"
+typing-extensions==4.7.1 ; python_version >= "3.9" and python_version < "4.0"
+urllib3==2.0.4 ; python_version >= "3.9" and python_version < "4.0"
+wrapt==1.15.0 ; python_version >= "3.9" and python_version < "4.0"
+yte==1.5.1 ; python_version >= "3.9" and python_version < "4.0"
+zipp==3.16.2 ; python_version >= "3.9" and python_version < "3.10"
diff --git a/run_fbopt_hduva b/run_fbopt_hduva
new file mode 100644
index 000000000..c1add075f
--- /dev/null
+++ b/run_fbopt_hduva
@@ -0,0 +1 @@
+python main_out.py --te_d 0 1 2 --tr_d 3 7 --task=mnistcolor10 --bs=8 --model=hduva --trainer=fbopt  --nname=conv_bn_pool_2 --gamma_y=7e5 --nname_encoder_x2topic_h=conv_bn_pool_2 --nname_encoder_sandwich_x2h4zd=conv_bn_pool_2 --gamma_y=3 --epos=2
diff --git a/script_generate_all_figures_diva.sh b/script_generate_all_figures_diva.sh
new file mode 100755
index 000000000..e7f9617d2
--- /dev/null
+++ b/script_generate_all_figures_diva.sh
@@ -0,0 +1,76 @@
+#!/bin/bash -x -v
+
+STR_LOSS_ELL="loss_task/ell"
+OUT_DIR="./figures_diva"
+# Number of points to plot:
+phase_portrait_plot_len=120
+
+LOSS_GAMMA_D="$\mathbb{E}_{q_{\phi_d}(z_d|x)}[\log q_{\omega_d}(d|z_d)]$"
+
+
+# README:
+# The following scripts will check event files from the 'runs' folder of the working directory.
+# To generate example tensorboard 'runs' folder, one could execute e.g. `sh run_fbopt_mnist_diva_autoki.sh` such that there will be 'runs' folder.
+
+if [ -z "$1" ]; then
+  # Check if an argument is provided
+  runs_dir="runs/*"
+else
+  # Use the provided argument
+  runs_dir=$1
+fi
+
+
+# a command line argument can be passed to this script, in order to skip the first few large jumps on the phase plots; if no argument is provided then all points will be plotted:
+if [ -z "$2" ]; then
+  # Check if an argument is provided
+  skip_n=0
+else
+  # Use the provided argument
+  skip_n=$2
+fi
+
+
+
+
+# Phase portraits
+python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot2="lossrd/dyn_gamma_d" --plot1="loss_task/ell" --legend2="\$R_{\gamma_d}(\cdot)\$" --legend1="\$\ell(\cdot)\$" --plot_len $phase_portrait_plot_len --skip_n_steps $skip_n --output_dir=$OUT_DIR --phase_portrait
+
+python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot2="lossrd/dyn_mu_recon" --plot1="loss_task/ell" --legend2="\$R_{\mu_{recon}}(\cdot)\$" --legend1="\$\ell(\cdot)\$" --plot_len $phase_portrait_plot_len --skip_n_steps $skip_n --output_dir=$OUT_DIR --phase_portrait
+
+python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot2="lossrd/dyn_beta_d" --plot1="loss_task/ell" --legend2="\$R_{\beta_d}(\cdot)\$" --legend1="\$\ell(\cdot)\$" --plot_len $phase_portrait_plot_len --skip_n_steps $skip_n --output_dir=$OUT_DIR --phase_portrait
+
+# python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot2="lossrd/dyn_beta_x" --plot1="loss_task/ell" --legend2="KL (beta_x)" --legend1="ell" --plot_len $phase_portrait_plot_len --skip_n_steps $skip_n --output_dir=$OUT_DIR --phase_portrait
+
+python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot2="lossrd/dyn_beta_y" --plot1="loss_task/ell" --legend2="\$R_{beta_y}(\cdot)\$" --legend1="\$\ell(\cdot)\$" --plot_len $phase_portrait_plot_len --skip_n_steps $skip_n --output_dir=$OUT_DIR --phase_portrait
+
+
+
+
+# Plot R and the corresponding set point curves (both in the same figure)
+python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="lossrd/dyn_gamma_d" --plot2="lossrs/setpoint_gamma_d" --legend1="\$R_{\gamma_d}\$" --legend2="setpoint" --output_dir=$OUT_DIR
+
+python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="lossrd/dyn_mu_recon" --plot2="lossrs/setpoint_mu_recon" --legend1="\$R_{\mu_{recon}}(\cdot)\$" --legend2="setpoint" --output_dir=$OUT_DIR
+
+python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="lossrd/dyn_beta_d" --plot2="lossrs/setpoint_beta_d" --legend1="\$R_{\beta_d}(\cdot)\$" --legend2="setpoint" --output_dir=$OUT_DIR --neg
+
+# python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="lossrd/dyn_beta_x" --plot2="lossrs/setpoint_beta_x" --legend1="KL (beta_x)" --legend2="setpoint" --output_dir=$OUT_DIR
+
+python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="lossrd/dyn_beta_y" --plot2="lossrs/setpoint_beta_y" --legend1="\$R_{\beta_y}(\cdot)\$" --legend2="setpoint" --output_dir=$OUT_DIR --neg
+
+
+ # One curve per figure
+ values=('controller_gain/beta_d' 'controller_gain/beta_y' 'controller_gain/beta_x' 'controller_gain/gamma_d' 'controller_gain/mu_recon' 'dyn_mu/beta_d' 'delta/beta_d' 'dyn_mu/beta_y' 'delta/beta_y' 'dyn_mu/beta_x' 'delta/beta_x' 'dyn_mu/gamma_d' 'delta/gamma_d' 'dyn_mu/mu_recon' 'delta/mu_recon' 'loss_task/penalized' 'loss_task/ell' 'acc/te' 'acc/val' 'acc/sel' 'acc/setpoint')
+ # Loop over the array
+ for val in "${values[@]}"
+ do
+   python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="$val" --legend1="$val" --output_dir=$OUT_DIR
+ done
+
+
+python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="dyn_mu/mu_recon" --legend1="\$\mu_{recon}\$" --output_dir=$OUT_DIR
+python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="dyn_mu/gamma_d" --legend1="\$\gamma_d\$" --output_dir=$OUT_DIR
+python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="dyn_mu/beta_y" --legend1="\$\beta_y\$" --output_dir=$OUT_DIR
+python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="loss_task/ell" --legend1="\$\ell(\cdot)\$" --output_dir=$OUT_DIR
+python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="loss_task/penalized" --legend1="\$\ell(\cdot)+\mu^TR(\cdot)\$" --output_dir=$OUT_DIR
+python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="controller_gain/beta_y" --legend1="controller gain for \$\beta_y\$" --output_dir=$OUT_DIR
diff --git a/script_jigen_plot.sh b/script_jigen_plot.sh
new file mode 100755
index 000000000..5c47a68f8
--- /dev/null
+++ b/script_jigen_plot.sh
@@ -0,0 +1,4 @@
+python domainlab/utils/generate_fbopt_phase_portrait.py --plot2="lossrd/dyn_alpha" --plot1="loss_task/ell" --legend2="regularization loss jigen" --legend1="classification loss" --output_dir="." --phase_portrait
+
+
+python domainlab/utils/generate_fbopt_phase_portrait.py --plot1="lossrs/setpoint_alpha" --plot2="lossrd/dyn_alpha" --legend2="regularization loss jigen" --legend1="setpoint" --output_dir="."
diff --git a/scripts_fbopt/run_erm.sh b/scripts_fbopt/run_erm.sh
new file mode 100644
index 000000000..f5285811f
--- /dev/null
+++ b/scripts_fbopt/run_erm.sh
@@ -0,0 +1 @@
+python main_out.py --te_d=1 --tr_d 0 3 --task=mnistcolor10 --bs=16 --model=erm --nname=conv_bn_pool_2 --epos=10
diff --git a/scripts_fbopt/run_fbopt_dann.sh b/scripts_fbopt/run_fbopt_dann.sh
new file mode 100644
index 000000000..c75fb071c
--- /dev/null
+++ b/scripts_fbopt/run_fbopt_dann.sh
@@ -0,0 +1 @@
+python main_out.py --te_d=caltech --task=mini_vlcs --bs=16 --model=dann --trainer=fbopt --nname=alexnet --epos=200 --es=200 --no_setpoint_update
diff --git a/scripts_fbopt/run_fbopt_diva.sh b/scripts_fbopt/run_fbopt_diva.sh
new file mode 100644
index 000000000..dc48bce9b
--- /dev/null
+++ b/scripts_fbopt/run_fbopt_diva.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+# export CUDA_VISIBLE_DEVICES=""
+# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error
+# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring
+# pytest -s tests/test_fbopt.py
+python main_out.py --te_d=caltech --task=mini_vlcs --bs=8 --model=diva --trainer=fbopt --nname=alexnet --nname_dom=alexnet --gamma_d=3  --gamma_y=3 --epos=200
diff --git a/scripts_fbopt/run_fbopt_diva_cpu.sh b/scripts_fbopt/run_fbopt_diva_cpu.sh
new file mode 100644
index 000000000..59d0c592a
--- /dev/null
+++ b/scripts_fbopt/run_fbopt_diva_cpu.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+export CUDA_VISIBLE_DEVICES=""
+# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error
+# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring
+# pytest -s tests/test_fbopt.py
+python main_out.py --te_d=caltech --task=mini_vlcs --bs=8 --model=diva --trainer=fbopt --nname=alexnet --nname_dom=alexnet --gamma_d=3  --gamma_y=3 --epos=200 --es=100
diff --git a/scripts_fbopt/run_fbopt_hduva_cpu.sh b/scripts_fbopt/run_fbopt_hduva_cpu.sh
new file mode 100644
index 000000000..54b7d5995
--- /dev/null
+++ b/scripts_fbopt/run_fbopt_hduva_cpu.sh
@@ -0,0 +1,2 @@
+export CUDA_VISIBLE_DEVICES=""
+python main_out.py --te_d 0 1 2 --tr_d 3 7 --task=mnistcolor10 --bs=8 --model=hduva --trainer=fbopt  --nname=conv_bn_pool_2 --gamma_y=7e5 --nname_encoder_x2topic_h=conv_bn_pool_2 --nname_encoder_sandwich_x2h4zd=conv_bn_pool_2 --gamma_y=3 --epos=2
diff --git a/scripts_fbopt/run_fbopt_match_diva.sh b/scripts_fbopt/run_fbopt_match_diva.sh
new file mode 100644
index 000000000..c1547567c
--- /dev/null
+++ b/scripts_fbopt/run_fbopt_match_diva.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+export CUDA_VISIBLE_DEVICES=""   
+# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error
+# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring
+# pytest -s tests/test_fbopt.py
+python main_out.py --te_d=caltech --task=mini_vlcs --bs=8 --model=diva --trainer=fbopt_matchdg --nname=alexnet --nname_dom=alexnet --gamma_d=3  --gamma_y=3 --epos=200 --es=100
diff --git a/scripts_fbopt/run_fbopt_mnist.sh b/scripts_fbopt/run_fbopt_mnist.sh
new file mode 100644
index 000000000..2e3edc424
--- /dev/null
+++ b/scripts_fbopt/run_fbopt_mnist.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+# export CUDA_VISIBLE_DEVICES=""
+# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error
+# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring
+# pytest -s tests/test_fbopt.py
+
+python main_out.py --te_d=0 --tr_d 1 2 --task=mnistcolor10 --bs=16 --model=jigen --trainer=fbopt --nname=conv_bn_pool_2 --epos=2000 --es=0 --mu_init=0.00001 --coeff_ma_setpoint=0.5 --coeff_ma_output_state=0.99 --force_setpoint_change_once
diff --git a/scripts_fbopt/run_fbopt_mnist_diva.sh b/scripts_fbopt/run_fbopt_mnist_diva.sh
new file mode 100644
index 000000000..fd5c2b8cf
--- /dev/null
+++ b/scripts_fbopt/run_fbopt_mnist_diva.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+# export CUDA_VISIBLE_DEVICES=""
+# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error
+# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring
+# pytest -s tests/test_fbopt.py
+
+python main_out.py --te_d=0 --tr_d 1 2 --task=mnistcolor10 --bs=16 --model=diva --trainer=fbopt --nname=conv_bn_pool_2 --epos=2000 --es=500 --mu_init=0.000001 --gamma_y=1.0
diff --git a/scripts_fbopt/run_fbopt_mnist_diva_autoki.sh b/scripts_fbopt/run_fbopt_mnist_diva_autoki.sh
new file mode 100644
index 000000000..64c19e102
--- /dev/null
+++ b/scripts_fbopt/run_fbopt_mnist_diva_autoki.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+# export CUDA_VISIBLE_DEVICES=""
+# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error
+# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring
+# pytest -s tests/test_fbopt.py
+
+python main_out.py --te_d=0 --tr_d 1 2 --task=mnistcolor10 --bs=16 --model=diva --trainer=fbopt --nname=conv_bn_pool_2 --epos=5000 --es=5 --mu_init=1e-6 --gamma_y=1.0 --k_i_gain_ratio=0.9 --coeff_ma_output_state=0 --coeff_ma_setpoint=0 --epos_min=1000 --force_setpoint_change_once
diff --git a/scripts_fbopt/run_fbopt_mnist_feedforward.sh b/scripts_fbopt/run_fbopt_mnist_feedforward.sh
new file mode 100644
index 000000000..b04819c61
--- /dev/null
+++ b/scripts_fbopt/run_fbopt_mnist_feedforward.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+# export CUDA_VISIBLE_DEVICES=""
+# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error
+# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring
+# pytest -s tests/test_fbopt.py
+
+python main_out.py --te_d=0 --tr_d 1 2 --task=mnistcolor10 --bs=16 --model=jigen --trainer=fbopt --nname=conv_bn_pool_2 --epos=2000 --epos_min=100 --es=1 --force_feedforward
diff --git a/scripts_fbopt/run_fbopt_mnist_jigen_autoki.sh b/scripts_fbopt/run_fbopt_mnist_jigen_autoki.sh
new file mode 100644
index 000000000..8b346e011
--- /dev/null
+++ b/scripts_fbopt/run_fbopt_mnist_jigen_autoki.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+# export CUDA_VISIBLE_DEVICES=""
+# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error
+# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring
+# pytest -s tests/test_fbopt.py
+
+python main_out.py --te_d=0 --tr_d 1 2 --task=mnistcolor10 --bs=16 --model=jigen --trainer=fbopt --nname=conv_bn_pool_2 --epos=2000 --es=1 --epos_min=500 --mu_init=1e-6 --coeff_ma_output_state=0.99 --k_i_gain_ratio=0.99
diff --git a/scripts_fbopt/run_fbopt_small_pacs.sh b/scripts_fbopt/run_fbopt_small_pacs.sh
new file mode 100644
index 000000000..fc3ab6bc7
--- /dev/null
+++ b/scripts_fbopt/run_fbopt_small_pacs.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+# export CUDA_VISIBLE_DEVICES=""
+# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error
+# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring
+# pytest -s tests/test_fbopt.py
+python main_out.py --te_d=sketch --tpath=examples/tasks/demo_task_path_list_small.py --bs=16 --model=jigen --trainer=fbopt --nname=alexnet --epos=200 --es=100 --init_mu=0.01
diff --git a/scripts_fbopt/run_mnist_jigen.sh b/scripts_fbopt/run_mnist_jigen.sh
new file mode 100644
index 000000000..0bc854c5e
--- /dev/null
+++ b/scripts_fbopt/run_mnist_jigen.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+# export CUDA_VISIBLE_DEVICES=""
+# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error
+# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring
+# pytest -s tests/test_fbopt.py
+
+python main_out.py --te_d=caltech --task=mini_vlcs --bs=16 --model=jigen --trainer=fbopt --nname=alexnet --epos=200 --es=200 --mu_init=1.0 --coeff_ma_output=0 --coeff_ma_setpoint=0 --coeff_ma_output=0
diff --git a/scripts_fbopt/run_pacs_diva_fbopt.sh b/scripts_fbopt/run_pacs_diva_fbopt.sh
new file mode 100644
index 000000000..74d1f0cd3
--- /dev/null
+++ b/scripts_fbopt/run_pacs_diva_fbopt.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+# export CUDA_VISIBLE_DEVICES=""
+# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error
+# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring
+# pytest -s tests/test_fbopt.py
+python main_out.py --te_d=sketch --bs=32 --model=diva --trainer=fbopt --epos=200 --es=200 --npath_dom=examples/nets/resnet50domainbed.py --tpath=examples/tasks/task_pacs_path_list.py --npath=examples/nets/resnet50domainbed.py --gamma_y=1.0 --mu_init=1e-6 --lr=5e-5 --zx_dim=0
diff --git a/scripts_fbopt/run_pacs_jigen_fbopt.sh b/scripts_fbopt/run_pacs_jigen_fbopt.sh
new file mode 100644
index 000000000..99663ee61
--- /dev/null
+++ b/scripts_fbopt/run_pacs_jigen_fbopt.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+# export CUDA_VISIBLE_DEVICES=""
+# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error
+# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring
+# pytest -s tests/test_fbopt.py
+python main_out.py --te_d=sketch --tpath=examples/tasks/task_pacs_path_list.py --model=jigen --trainer=fbopt --bs=64 --epos=200 --es=200 --npath=examples/nets/resnet50domainbed.py --mu_init=1e-6 --lr=5e-5 --coeff_ma_output_state=0.1
diff --git a/test_fbopt_dial.sh b/test_fbopt_dial.sh
new file mode 100644
index 000000000..4bf0c669b
--- /dev/null
+++ b/test_fbopt_dial.sh
@@ -0,0 +1,2 @@
+export CUDA_VISIBLE_DEVICES=""
+python main_out.py --te_d=caltech --task=mini_vlcs --bs=16  --model=fboptdial --trainer=dial --nname=alexnet --nname_dom=alexnet --gamma_y=1e6 --gamma_d=1e6
diff --git a/test_match_duva.sh b/test_match_duva.sh
new file mode 100644
index 000000000..9f3e9951e
--- /dev/null
+++ b/test_match_duva.sh
@@ -0,0 +1,4 @@
+python main_out.py --te_d 0 1 2 --tr_d 3 7 --task=mnistcolor10 --debug --bs=2 --model=matchhduva \
+  --epochs_ctr=3 --epos=6 --nname=conv_bn_pool_2 --gamma_y=7e5 \
+        --nname_encoder_x2topic_h=conv_bn_pool_2 \
+        --nname_encoder_sandwich_x2h4zd=conv_bn_pool_2
diff --git a/test_match_duva_vlcs.sh b/test_match_duva_vlcs.sh
new file mode 100644
index 000000000..a47e76c36
--- /dev/null
+++ b/test_match_duva_vlcs.sh
@@ -0,0 +1,4 @@
+python main_out.py --te_d=caltech --task=mini_vlcs --debug --bs=2 --model=matchhduva \
+  --epochs_ctr=3 --epos=6 --npath=examples/nets/resnet.py --gamma_y=7e5 \
+        --npath_encoder_x2topic_h=examples/nets/resnet.py \
+        --npath_encoder_sandwich_x2h4zd=examples/nets/resnet.py
diff --git a/tests/test_fbopt.py b/tests/test_fbopt.py
new file mode 100644
index 000000000..c442bf090
--- /dev/null
+++ b/tests/test_fbopt.py
@@ -0,0 +1,42 @@
+"""
+unit and end-end test for deep all, mldg
+"""
+from tests.utils_test import utils_test_algo
+
+
+def test_dann_fbopt():
+    """
+    dann
+    """
+    args = "--te_d=caltech --task=mini_vlcs --debug --bs=2 --model=dann --trainer=fbopt --nname=alexnet --epos=3"
+    utils_test_algo(args)
+
+
+def test_jigen_fbopt():
+    """
+    jigen
+    """
+    args = "--te_d=caltech --task=mini_vlcs --debug --bs=2 --model=jigen --trainer=fbopt --nname=alexnet --epos=3"
+    utils_test_algo(args)
+
+
+def test_diva_fbopt():
+    """
+    diva
+    """
+    args = "--te_d=caltech --task=mini_vlcs --debug --bs=2 --model=diva --gamma_y=1.0 --trainer=fbopt --nname=alexnet --epos=3"
+    utils_test_algo(args)
+
+def test_erm_fbopt():
+    """
+    erm
+    """
+    args = "--te_d=caltech --task=mini_vlcs --debug --bs=2 --model=erm --trainer=fbopt --nname=alexnet --epos=3" # pylint: disable=line-too-long
+    utils_test_algo(args)
+
+def test_forcesetpoint_fbopt():
+    """
+    diva
+    """
+    args = "--te_d=0 --tr_d 1 2 --task=mnistcolor10 --bs=16 --model=jigen --trainer=fbopt --nname=conv_bn_pool_2 --epos=10 --es=0 --mu_init=0.00001 --coeff_ma_setpoint=0.5 --coeff_ma_output_state=0.99 --force_setpoint_change_once"
+    utils_test_algo(args)
diff --git a/tests/test_fbopt_setpoint_ada.py b/tests/test_fbopt_setpoint_ada.py
new file mode 100644
index 000000000..4b8029056
--- /dev/null
+++ b/tests/test_fbopt_setpoint_ada.py
@@ -0,0 +1,9 @@
+from domainlab.algos.trainers.fbopt_setpoint_ada import is_less_list_all
+
+
+def test_less_than():
+    a = [3, 4, -9, -8]
+    b = [1, 0.5, -1, -0.5]
+    c = [0.5, 0.25, -0.5, -0.25]
+    assert not is_less_list_all(a, b)
+    assert is_less_list_all(c, b)
diff --git a/tests/test_fbopt_setpoint_rewind.py b/tests/test_fbopt_setpoint_rewind.py
new file mode 100644
index 000000000..3c1011bab
--- /dev/null
+++ b/tests/test_fbopt_setpoint_rewind.py
@@ -0,0 +1,12 @@
+"""
+unit and end-end test for deep all, mldg
+"""
+from tests.utils_test import utils_test_algo
+
+
+def test_jigen_fbopt():
+    """
+    jigen
+    """
+    args = "--te_d=caltech --task=mini_vlcs --debug --bs=2 --model=jigen --trainer=fbopt --nname=alexnet --epos=300 --setpoint_rewind=yes"
+    utils_test_algo(args)