diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 86ca0b445..2d65938a4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,11 +2,10 @@ name: CI on: push: - branches: master + branches: mhof_dev_merge pull_request: - branches: master + branches: mhof_dev_merge workflow_dispatch: - jobs: test: name: Run tests diff --git a/a_reproduce_pacs_diva.yaml b/a_reproduce_pacs_diva.yaml new file mode 100644 index 000000000..db3c234eb --- /dev/null +++ b/a_reproduce_pacs_diva.yaml @@ -0,0 +1,24 @@ +te_d: sketch +tpath: examples/tasks/task_pacs_aug.py +bs: 32 +model: diva +trainer: fbopt +gamma_y: 1.0 +ini_setpoint_ratio: 0.99 +str_diva_multiplier_type: gammad_recon +coeff_ma_output_state: 0.1 +coeff_ma_setpoint: 0.9 +exp_shoulder_clip: 5 +mu_init: 0.000001 +k_i_gain_ratio: 0.5 +mu_clip: 10 +epos: 1000 +epos_min: 200 +npath: examples/nets/resnet50domainbed.py +npath_dom: examples/nets/resnet50domainbed.py +es: 2 +lr: 0.00005 +zx_dim: 0 +zy_dim: 64 +zd_dim: 64 +force_setpoint_change_once: True diff --git a/domainlab/algos/builder_fbopt_dial.py b/domainlab/algos/builder_fbopt_dial.py new file mode 100644 index 000000000..f1faad96b --- /dev/null +++ b/domainlab/algos/builder_fbopt_dial.py @@ -0,0 +1,21 @@ +""" +builder for feedback optimization of dial +""" +from domainlab.algos.builder_diva import NodeAlgoBuilderDIVA +from domainlab.algos.trainers.train_fbopt_b import TrainerFbOpt + + +class NodeAlgoBuilderFbOptDial(NodeAlgoBuilderDIVA): + """ + builder for feedback optimization for dial + """ + + def init_business(self, exp): + """ + return trainer, model, observer + """ + trainer_in, model, observer, device = super().init_business(exp) + trainer_in.init_business(model, exp.task, observer, device, exp.args) + trainer = TrainerFbOpt() + trainer.init_business(trainer_in, exp.task, observer, device, exp.args) + return trainer, model, observer, device diff --git a/domainlab/algos/msels/c_msel_setpoint_delay.py b/domainlab/algos/msels/c_msel_setpoint_delay.py new file mode 100644 index 000000000..d62983e89 --- /dev/null +++ b/domainlab/algos/msels/c_msel_setpoint_delay.py @@ -0,0 +1,54 @@ +""" +logs the best up-to-event selected model at each event when setpoint shrinks +""" +from domainlab.algos.msels.a_model_sel import AMSel +from domainlab.utils.logger import Logger + + +class MSelSetpointDelay(AMSel): + """ + This class decorate another model selection object, it logs the current + selected performance from the decoratee each time the setpoint shrinks + """ + + def __init__(self, msel, val_threshold = None): + super().__init__(val_threshold) + # NOTE: super() has to come first always otherwise self.msel will be overwritten to be None + self.msel = msel + self._oracle_last_setpoint_sel_te_acc = 0.0 + + @property + def oracle_last_setpoint_sel_te_acc(self): + """ + return the last setpoint best acc + """ + return self._oracle_last_setpoint_sel_te_acc + + def base_update(self, clear_counter=False): + """ + if the best model should be updated + currently, clear_counter is set via + flag = super().tr_epoch(epoch, self.flag_setpoint_updated) + """ + logger = Logger.get_logger() + logger.info( + f"setpoint selected current acc {self._oracle_last_setpoint_sel_te_acc}" + ) + if clear_counter: + # for the current version of code, clear_counter = flag_setpoint_updated + log_message = ( + f"setpoint msel te acc updated from " + # self._oracle_last_setpoint_sel_te_acc start from 0.0, and always saves + # the test acc when last setpoint decrease occurs + f"{self._oracle_last_setpoint_sel_te_acc} to " + # self.sel_model_te_acc defined as a property + # in a_msel, which returns self.msel.sel_model_te_acc + # is the validation acc based model selection, which + # does not take setpoint into account + f"{self.sel_model_te_acc}" + ) + logger.info(log_message) + self._oracle_last_setpoint_sel_te_acc = self.sel_model_te_acc + # let decoratee decide if model should be selected or not + flag = self.msel.update(clear_counter) + return flag diff --git a/domainlab/algos/msels/c_msel_val_top_k.py b/domainlab/algos/msels/c_msel_val_top_k.py new file mode 100644 index 000000000..f557c7dc1 --- /dev/null +++ b/domainlab/algos/msels/c_msel_val_top_k.py @@ -0,0 +1,61 @@ +""" +Model Selection should be decoupled from +""" +from domainlab.algos.msels.c_msel_val import MSelValPerf +from domainlab.utils.logger import Logger + + +class MSelValPerfTopK(MSelValPerf): + """ + 1. Model selection using validation performance + 2. Visitor pattern to trainer + """ + + def __init__(self, max_es, top_k=2): + super().__init__(max_es) # construct self.tr_obs (observer) + self.top_k = top_k + self.list_top_k_acc = [0.0 for _ in range(top_k)] + + def update(self, clear_counter=False): + """ + if the best model should be updated + """ + flag_super = super().update(clear_counter) + metric_val_current = self.tr_obs.metric_val[self.tr_obs.str_metric4msel] + acc_min = min(self.list_top_k_acc) + if metric_val_current > acc_min: + # overwrite + logger = Logger.get_logger() + logger.info( + f"top k validation acc: {self.list_top_k_acc} \ + overwriting/reset counter" + ) + self.es_c = 0 # restore counter + ind = self.list_top_k_acc.index(acc_min) + # avoid having identical values + if metric_val_current not in self.list_top_k_acc: + self.list_top_k_acc[ind] = metric_val_current + logger.info( + f"top k validation acc updated: \ + {self.list_top_k_acc}" + ) + # overwrite to ensure consistency + # issue #569: initially self.list_top_k_acc will be [xx, 0] and it does not matter since 0 will be overwriten by second epoch validation acc. + # actually, after epoch 1, most often, sefl._best_val_acc will be the higher value of self.list_top_k_acc will overwriten by min(self.list_top_k_acc) + logger.info( + f"top-2 val sel: overwriting best val acc from {self._best_val_acc} to " + f"minimum of {self.list_top_k_acc} which is {min(self.list_top_k_acc)} " + f"to ensure consistency" + ) + self._best_val_acc = min(self.list_top_k_acc) + # overwrite test acc, this does not depend on if val top-k acc has been overwritten or not + metric_te_current = self.tr_obs.metric_te[self.tr_obs.str_metric4msel] + if self._sel_model_te_acc != metric_te_current: + # this can only happen if the validation acc has decreased and current val acc is only bigger than min(self.list_top_k_acc} but lower than max(self.list_top_k_acc) + logger.info( + f"top-2 val sel: overwriting selected model test acc from " + f"{self._sel_model_te_acc} to {metric_te_current} to ensure consistency" + ) + self._sel_model_te_acc = metric_te_current + return True # if metric_val_current > acc_min: + return flag_super diff --git a/domainlab/algos/trainers/args_fbopt.py b/domainlab/algos/trainers/args_fbopt.py new file mode 100644 index 000000000..53719e05f --- /dev/null +++ b/domainlab/algos/trainers/args_fbopt.py @@ -0,0 +1,126 @@ +""" +feedback opt +""" + + +def add_args2parser_fbopt(parser): + """ + append hyper-parameters to the main argparser + """ + + parser.add_argument( + "--k_i_gain", type=float, default=0.001, help="PID control gain for integrator" + ) + + parser.add_argument( + "--k_i_gain_ratio", + type=float, + default=None, + help="set k_i_gain to be ratio of \ + initial saturation k_i_gain", + ) + + parser.add_argument( + "--mu_clip", type=float, default=1e4, help="maximum value of mu" + ) + + parser.add_argument( + "--mu_min", type=float, default=1e-6, help="minimum value of mu" + ) + + parser.add_argument( + "--mu_init", type=float, default=0.001, help="initial beta for multiplication" + ) + + parser.add_argument( + "--coeff_ma", type=float, default=0.5, help="exponential moving average" + ) + + parser.add_argument( + "--coeff_ma_output_state", + type=float, + default=0.1, + help="state exponential moving average of \ + reguarlization loss", + ) + + parser.add_argument( + "--coeff_ma_setpoint", + type=float, + default=0.9, + help="setpoint average coeff for previous setpoint", + ) + + parser.add_argument( + "--exp_shoulder_clip", + type=float, + default=5, + help="clip before exponential operation", + ) + + parser.add_argument( + "--ini_setpoint_ratio", + type=float, + default=0.99, + help="before training start, evaluate reg loss, \ + setpoint will be 0.9 of this loss", + ) + + parser.add_argument( + "--force_feedforward", + action="store_true", + default=False, + help="use feedforward scheduler", + ) + + parser.add_argument( + "--force_setpoint_change_once", + action="store_true", + default=False, + help="train until the setpoint changed at least once \ + up to maximum epos specified", + ) + + parser.add_argument( + "--no_tensorboard", + action="store_true", + default=False, + help="disable tensorboard", + ) + + parser.add_argument( + "--no_setpoint_update", + action="store_true", + default=False, + help="disable setpoint update", + ) + + parser.add_argument( + "--tr_with_init_mu", + action="store_true", + default=False, + help="disable setpoint update", + ) + + parser.add_argument( + "--overshoot_rewind", + type=str, + default="yes", + help="overshoot_rewind, for benchmark, use yes or no", + ) + + parser.add_argument( + "--setpoint_rewind", + type=str, + default="no", + help="setpoing_rewind, for benchmark, use yes or no", + ) + + parser.add_argument( + "--str_diva_multiplier_type", + type=str, + default="gammad_recon", + help="which penalty to tune", + ) + + return parser diff --git a/domainlab/algos/trainers/fbopt_mu_controller.py b/domainlab/algos/trainers/fbopt_mu_controller.py new file mode 100644 index 000000000..824638461 --- /dev/null +++ b/domainlab/algos/trainers/fbopt_mu_controller.py @@ -0,0 +1,280 @@ +""" +update hyper-parameters during training +""" +import os +import warnings + +import numpy as np +from torch.utils.tensorboard import SummaryWriter + +from domainlab.algos.trainers.fbopt_setpoint_ada import ( + FbOptSetpointController, + if_list_sign_agree, +) +from domainlab.utils.logger import Logger + + +class StubSummaryWriter: + """ + # stub writer for tensorboard that ignores all messages + """ + + def add_scalar(self, *args, **kwargs): + """ + stub, pass do nothing + """ + + def add_scalars(self, *args, **kwargs): + """ + stub, pass, do nothing + """ + + +class HyperSchedulerFeedback: + # pylint: disable=too-many-instance-attributes + """ + design $\\mu$$ sequence based on state of penalized loss + """ + + def __init__(self, trainer, **kwargs): + """ + kwargs is a dictionary with key the hyper-parameter name and its value + """ + self.trainer = trainer + self.init_mu = trainer.aconf.mu_init + self.mu_min = trainer.aconf.mu_min + self.mu_clip = trainer.aconf.mu_clip + + self.mmu = kwargs + # force initial value of mu + self.mmu = {key: self.init_mu for key, val in self.mmu.items()} + self.set_point_controller = FbOptSetpointController(args=self.trainer.aconf) + + self.k_i_control = trainer.aconf.k_i_gain + self.k_i_gain_ratio = None + self.overshoot_rewind = trainer.aconf.overshoot_rewind == "yes" + self.delta_epsilon_r = None + + # NOTE: this value will be set according to initial evaluation of + # neural network + self.activation_clip = trainer.aconf.exp_shoulder_clip + self.coeff_ma = trainer.aconf.coeff_ma + # NOTE: + # print(copy.deepcopy(self.model)) + # TypeError: cannot pickle '_thread.lock' object + if trainer.aconf.no_tensorboard: + self.writer = StubSummaryWriter() + else: + str_job_id = os.environ.get("SLURM_JOB_ID", "") + self.writer = SummaryWriter(comment=str_job_id) + + def set_k_i_gain(self, epo_reg_loss): + if self.k_i_gain_ratio is None: + return + # NOTE: do not use self.cal_delta4control!!!! which will change + # class member variables self.delta_epsilon_r! + list_setpoint = self.get_setpoint4r() + if_list_sign_agree(epo_reg_loss, list_setpoint) + delta_epsilon_r = [a - b for a, b in zip(epo_reg_loss, list_setpoint)] + + # to calculate self.delta_epsilon_r + k_i_gain_saturate = [ + a / b for a, b in zip(self.activation_clip, delta_epsilon_r) + ] + k_i_gain_saturate_min = min(k_i_gain_saturate) + # NOTE: here we override the commandline arguments specification + # for k_i_control, so k_i_control is not a hyperparameter anymore + self.k_i_control = self.k_i_gain_ratio * k_i_gain_saturate_min + warnings.warn( + f"hyperparameter k_i_gain disabled! \ + replace with {self.k_i_control}" + ) + # FIXME: change this to 1-self.ini_setpoint_ratio, i.e. the more + # difficult the initial setpoint is, the bigger the k_i_gain should be + + def get_setpoint4r(self): + """ + get setpoint list + """ + return self.set_point_controller.setpoint4R + + def set_setpoint(self, list_setpoint4r, setpoint4ell): + """ + set the setpoint + """ + self.set_point_controller.setpoint4R = list_setpoint4r + self.set_point_controller.setpoint4ell = setpoint4ell + + def cal_delta4control(self, list1, list_setpoint): + """ + list difference + """ + if_list_sign_agree(list1, list_setpoint) + delta_epsilon_r = [a - b for a, b in zip(list1, list_setpoint)] + if self.delta_epsilon_r is None: + self.delta_epsilon_r = delta_epsilon_r + else: + # PI control. + # self.delta_epsilon_r is the previous time step. + # delta_epsilon_r is the current time step + self.delta_epsilon_r = self.cal_delta_integration( + self.delta_epsilon_r, delta_epsilon_r, self.coeff_ma + ) + + def cal_delta_integration(self, list_old, list_new, coeff): + """ + ma of delta + """ + return [(1 - coeff) * a + coeff * b for a, b in zip(list_old, list_new)] + + def tackle_overshoot(self, activation, epo_reg_loss, list_str_multiplier_na): + """ + tackle overshoot + """ + list_overshoot = [ + i if (a - b) * (self.delta_epsilon_r[i]) < 0 else None + for i, (a, b) in enumerate( + zip(epo_reg_loss, self.set_point_controller.setpoint4R) + ) + ] + for ind in list_overshoot: + if ind is not None: + logger = Logger.get_logger( + logger_name="main_out_logger", loglevel="INFO" + ) + logger.info(f"delta integration: {self.delta_epsilon_r}") + logger.info( + f"overshooting at pos \ + {ind} of activation: {activation}" + ) + logger.info(f"name reg loss:{list_str_multiplier_na}") + if self.overshoot_rewind: + activation[ind] = 0.0 + logger.info( + f"PID controller set to zero now, \ + new activation: {activation}" + ) + return activation + + def cal_activation(self): + """ + calculate activation on exponential shoulder + """ + setpoint = self.get_setpoint4r() + activation = [ + self.k_i_control * val if setpoint[i] > 0 else self.k_i_control * (-val) + for i, val in enumerate(self.delta_epsilon_r) + ] + if self.activation_clip is not None: + activation = [ + np.clip( + val, a_min=-1 * self.activation_clip, a_max=self.activation_clip + ) + for val in activation + ] + return activation + + def search_mu( + self, epo_reg_loss, epo_task_loss, epo_loss_tr, list_str_multiplier_na, miter + ): + # pylint: disable=too-many-locals, too-many-arguments + """ + start from parameter dictionary dict_theta: {"layer":tensor}, + enlarge mu w.r.t. its current value + to see if the criteria is met + $$\\mu^{k+1}=mu^{k}exp(rate_mu*[R(\\theta^{k})-ref_R])$$ + """ + logger = Logger.get_logger(logger_name="main_out_logger", loglevel="INFO") + logger.info(f"before controller: current mu: {self.mmu}") + logger.info(f"epo reg loss: {epo_reg_loss}") + logger.info(f"name reg loss:{list_str_multiplier_na}") + self.cal_delta4control(epo_reg_loss, self.get_setpoint4r()) + activation = self.cal_activation() + # overshoot handling + activation = self.tackle_overshoot( + activation, epo_reg_loss, list_str_multiplier_na + ) + list_gain = np.exp(activation) + dict_gain = dict(zip(list_str_multiplier_na, list_gain)) + target = self.dict_multiply(self.mmu, dict_gain) + self.mmu = self.dict_clip(target) + logger = Logger.get_logger(logger_name="main_out_logger", loglevel="INFO") + logger.info(f"after contoller: current mu: {self.mmu}") + + for key, val in self.mmu.items(): + self.writer.add_scalar(f"dyn_mu/{key}", val, miter) + self.writer.add_scalar(f"controller_gain/{key}", dict_gain[key], miter) + ind = list_str_multiplier_na.index(key) + self.writer.add_scalar(f"delta/{key}", self.delta_epsilon_r[ind], miter) + + if list_str_multiplier_na: + for i, (reg_dyn, reg_set) in enumerate( + zip(epo_reg_loss, self.get_setpoint4r()) + ): + + self.writer.add_scalar( + f"lossrd/dyn_{list_str_multiplier_na[i]}", reg_dyn, miter + ) + self.writer.add_scalar( + f"lossrs/setpoint_{list_str_multiplier_na[i]}", reg_set, miter + ) + + self.writer.add_scalars( + f"loss_rds/loss_{list_str_multiplier_na[i]}_w_setpoint", + { + f"lossr/loss_{list_str_multiplier_na[i]}": reg_dyn, + f"lossr/setpoint_{list_str_multiplier_na[i]}": reg_set, + }, + miter, + ) + self.writer.add_scalar( + f"x_ell_y_r/loss_{list_str_multiplier_na[i]}", reg_dyn, epo_task_loss + ) + else: + logger.info("No multiplier provided") + self.writer.add_scalar("loss_task/penalized", epo_loss_tr, miter) + self.writer.add_scalar("loss_task/ell", epo_task_loss, miter) + acc_te = 0 + acc_val = 0 + acc_sel = 0 + acc_set = 0 + + if miter > 1: + acc_te = self.trainer.observer.metric_te["acc"] + acc_val = self.trainer.observer.metric_val["acc"] + acc_sel = self.trainer.observer.model_sel.sel_model_te_acc + acc_set = self.trainer.observer.model_sel.oracle_last_setpoint_sel_te_acc + self.writer.add_scalar("acc/te", acc_te, miter) + self.writer.add_scalar("acc/val", acc_val, miter) + self.writer.add_scalar("acc/sel", acc_sel, miter) + self.writer.add_scalar("acc/setpoint", acc_set, miter) + + def dict_clip(self, dict_base): + """ + clip each entry of the mu according to pre-set self.mu_clip + """ + return { + key: np.clip(val, a_min=self.mu_min, a_max=self.mu_clip) + for key, val in dict_base.items() + } + + def dict_is_zero(self, dict_mu): + """ + check if hyper-parameter start from zero + """ + for key in dict_mu.keys(): + if dict_mu[key] == 0.0: + return True + return False + + def dict_multiply(self, dict_base, dict_multiplier): + """ + multiply a float to each element of a dictionary + """ + return {key: val * dict_multiplier[key] for key, val in dict_base.items()} + + def update_setpoint(self, epo_reg_loss, epo_task_loss): + """ + update setpoint + """ + return self.set_point_controller.observe(epo_reg_loss, epo_task_loss) diff --git a/domainlab/algos/trainers/fbopt_setpoint_ada.py b/domainlab/algos/trainers/fbopt_setpoint_ada.py new file mode 100644 index 000000000..c3c0193ce --- /dev/null +++ b/domainlab/algos/trainers/fbopt_setpoint_ada.py @@ -0,0 +1,314 @@ +""" +update hyper-parameters during training +""" +import numpy as np + +from domainlab.utils.logger import Logger + + +def list_true(list1): + """ + find out position of a list which has element True + """ + arr_pos = np.arange(len(list1))[list1] + return list(arr_pos) + + +def list_add(list1, list2): + """ + add two lists + """ + return [a + b for a, b in zip(list1, list2)] + + +def list_multiply(list1, coeff): + """ + multiply a scalar to a list + """ + return [ele * coeff for ele in list1] + + +def if_list_sign_agree(list1, list2): + """ + each pair must have the same sign + """ + list_agree = [a * b >= 0 for a, b in zip(list1, list2)] + if not all(list_agree): + raise RuntimeError(f"{list1} and {list2} can not be compared!") + + +def is_less_list_any(list1, list2): + """ + judge if one list is less than the other + """ + if_list_sign_agree(list1, list2) + list_comparison = [ + a < b if a >= 0 and b >= 0 else a > b for a, b in zip(list1, list2) + ] + return any(list_comparison), list_true(list_comparison) + + +def is_less_list_all(list1, list2, flag_eq=False): + """ + judge if one list is less than the other + """ + if_list_sign_agree(list1, list2) + list_comparison = [ + a < b if a >= 0 and b >= 0 else a > b for a, b in zip(list1, list2) + ] + if flag_eq: + list_comparison = [ + a <= b if a >= 0 and b >= 0 else a >= b for a, b in zip(list1, list2) + ] + return all(list_comparison) + + +def list_ma(list_state, list_input, coeff): + """ + moving average of list + """ + return [a * coeff + b * (1 - coeff) for a, b in zip(list_state, list_input)] + + +class SetpointRewinder: + """ + rewind setpoint if current loss exponential moving average is + bigger than setpoint + """ + + def __init__(self, host): + self.host = host + self.counter = None + self.epo_ma = None + self.ref = None + self.coeff_ma = 0.5 + self.setpoint_rewind = host.flag_setpoint_rewind + + def reset(self, epo_reg_loss): + """ + when setpoint is adjusted + """ + self.counter = 0 + self.epo_ma = [0.0 for _ in range(10)] # FIXME + self.ref = epo_reg_loss + + def observe(self, epo_reg_loss): + """ + update moving average + """ + if self.ref is None: + self.reset(epo_reg_loss) + self.epo_ma = list_ma(self.epo_ma, epo_reg_loss, self.coeff_ma) + list_comparison_increase = [a < b for a, b in zip(self.ref, self.epo_ma)] + list_comparison_above_setpoint = [ + a < b for a, b in zip(self.host.setpoint4R, self.epo_ma) + ] + flag_increase = any(list_comparison_increase) + flag_above_setpoint = any(list_comparison_above_setpoint) + if flag_increase and flag_above_setpoint: + self.counter += 1 + + else: + self.counter = 0 + self.reset(epo_reg_loss) + + if self.setpoint_rewind: + if self.counter > 2 and self.counter <= 3: + # only allow self.counter = 2, 3 to rewind setpoing twice + list_pos = list_true(list_comparison_above_setpoint) + print(f"\n\n\n!!!!!!!setpoint too low at {list_pos}!\n\n\n") + for pos in list_pos: + print( + f"\n\n\n!!!!!!!rewinding setpoint at pos {pos} \ + from {self.host.setpoint4R[pos]} to \ + {self.epo_ma[pos]}!\n\n\n" + ) + self.host.setpoint4R[pos] = self.epo_ma[pos] + + if self.counter > 3: + self.host.transition_to(FixedSetpoint()) + self.counter = np.inf # FIXME + + +class FbOptSetpointController: + # pylint: disable=too-many-instance-attributes + """ + update setpoint for mu + """ + + def __init__(self, state=None, args=None): + """ + kwargs is a dictionary with key the hyper-parameter name and its value + """ + if state is None: + if args is not None and args.no_setpoint_update: + state = FixedSetpoint() + else: + state = DominateAllComponent() + self.transition_to(state) + self.flag_setpoint_rewind = args.setpoint_rewind == "yes" + self.setpoint_rewinder = SetpointRewinder(self) + self.state_task_loss = 0.0 + self.state_epo_reg_loss = [ + 0.0 for _ in range(10) + ] # FIXME: 10 is the maximum number losses here + self.coeff_ma_setpoint = args.coeff_ma_setpoint + self.coeff_ma_output = args.coeff_ma_output_state + # initial value will be set via trainer + self.setpoint4R = None + self.setpoint4ell = None + self.host = None + + def transition_to(self, state): + """ + change internal state + """ + self.state_updater = state + self.state_updater.accept(self) + + def update_setpoint_ma(self, list_target, list_pos): + """ + using moving average + """ + target_ma = [ + self.coeff_ma_setpoint * a + (1 - self.coeff_ma_setpoint) * b + for a, b in zip(self.setpoint4R, list_target) + ] + self.setpoint4R = [ + target_ma[i] if i in list_pos else self.setpoint4R[i] + for i in range(len(target_ma)) + ] + + def observe(self, epo_reg_loss, epo_task_loss): + """ + read current epo_reg_loss continuously + """ + self.state_epo_reg_loss = [ + self.coeff_ma_output * a + (1 - self.coeff_ma_output) * b if a != 0.0 else b + for a, b in zip(self.state_epo_reg_loss, epo_reg_loss) + ] + if self.state_task_loss == 0.0: + self.state_task_loss = epo_task_loss + self.state_task_loss = ( + self.coeff_ma_output * self.state_task_loss + + (1 - self.coeff_ma_output) * epo_task_loss + ) + self.setpoint_rewinder.observe(self.state_epo_reg_loss) + flag_update, list_pos = self.state_updater.update_setpoint() + if flag_update: + self.setpoint_rewinder.reset(self.state_epo_reg_loss) + logger = Logger.get_logger(logger_name="main_out_logger", loglevel="INFO") + logger.info(f"!!!!!set point old value {self.setpoint4R}!") + self.update_setpoint_ma(self.state_epo_reg_loss, list_pos) + logger.info(f"!!!!!set point updated to {self.setpoint4R}!") + return True + return False + + +class FbOptSetpointControllerState: + # pylint: disable=too-few-public-methods + """ + abstract state pattern + """ + + def __init__(self): + """ """ + self.host = None + + def accept(self, controller): + """ + set host for state + """ + self.host = controller + + +class FixedSetpoint(FbOptSetpointControllerState): + """ + do not update setpoint + """ + + def update_setpoint(self): + """ + always return False so setpoint no update + """ + return False, None + + +class SliderAllComponent(FbOptSetpointControllerState): + """ + concrete state pattern + """ + + def update_setpoint(self): + """ + all components of R descreases regardless if ell decreases or not + """ + logger = Logger.get_logger(logger_name="main_out_logger", loglevel="INFO") + logger.info( + f"comparing output vs setpoint: \n \ + {self.host.state_epo_reg_loss} \n \ + {self.host.setpoint4R}" + ) + if is_less_list_all( + self.host.state_epo_reg_loss, self.host.setpoint4R, flag_eq=True + ): + logger.info( + "!!!!!!!!!In SliderAllComponent: R current value better than current setpoint!" + ) + return True, list(range(len(self.host.setpoint4R))) + return False, None + + +class SliderAnyComponent(FbOptSetpointControllerState): + """ + concrete state pattern + """ + + def update_setpoint(self): + """ + if any component of R has decreased regardless if ell decreases + """ + flag, list_pos = is_less_list_any( + self.host.state_epo_reg_loss, self.host.setpoint4R + ) + return flag, list_pos + + def transit(self): + self.host.transition_to(SliderAllComponent()) + + +class DominateAnyComponent(SliderAnyComponent): + """ + concrete state pattern + """ + + def update_setpoint(self): + """ + if any of the component of R loss has decreased together with ell loss + """ + flag1, list_pos = super().update_setpoint() + flag2 = self.host.state_task_loss < self.host.setpoint4ell + if flag2: + self.host.setpoint4ell = self.host.state_task_loss + return flag1 & flag2, list_pos + + +class DominateAllComponent(SliderAllComponent): + """ + concrete state pattern + """ + + def update_setpoint(self): + """ + if each component of R loss has decreased and ell loss also decreased + """ + flag1, list_pos = super().update_setpoint() + flag2 = self.host.state_task_loss < self.host.setpoint4ell + if flag2: + logger = Logger.get_logger(logger_name="main_out_logger", loglevel="INFO") + logger.info( + f"best ell loss: from {self.host.setpoint4ell} to \ + {self.host.state_task_loss}" + ) + self.host.setpoint4ell = self.host.state_task_loss + return flag1 & flag2, list_pos diff --git a/domainlab/algos/trainers/train_fbopt_b.py b/domainlab/algos/trainers/train_fbopt_b.py new file mode 100644 index 000000000..1efe3ce58 --- /dev/null +++ b/domainlab/algos/trainers/train_fbopt_b.py @@ -0,0 +1,177 @@ +""" +update hyper-parameters during training +""" +from operator import add + +import torch + +from domainlab.algos.trainers.fbopt_mu_controller import HyperSchedulerFeedback +from domainlab.algos.trainers.hyper_scheduler import HyperSchedulerWarmupLinear +from domainlab.algos.trainers.train_basic import TrainerBasic +from domainlab.utils.logger import Logger + + +def list_divide(list_val, scalar): + """ + divide a list by a scalar + """ + return [ele / scalar for ele in list_val] + + +class HyperSetter: + # pylint: disable=too-few-public-methods + """ + mock object to force hyper-parameter in the model + """ + + def __init__(self, dict_hyper): + self.dict_hyper = dict_hyper + + def __call__(self, epoch=None): + return self.dict_hyper + + +class TrainerFbOpt(TrainerBasic): + """ + TrainerHyperScheduler + """ + + def set_scheduler(self, scheduler): + """ + Args: + scheduler: The class name of the scheduler, the object corresponding to + this class name will be created inside model + """ + # model.hyper_init will register the hyper-parameters of the model to scheduler + self.hyper_scheduler = self.model.hyper_init(scheduler, trainer=self) + + def eval_r_loss(self): + """ + evaluate the regularization loss and ERM loss with respect ot parameter dict_theta + ERM loss on all available training data + # TODO: normalize loss via batchsize + """ + self.model.eval() + # mock the model hyper-parameter to be from dict4mu + epo_reg_loss = [] + epo_task_loss = 0 + epo_p_loss = 0 + counter = 0.0 + with torch.no_grad(): + for _, (tensor_x, vec_y, vec_d, *others) in enumerate( + self.loader_tr_no_drop + ): + tensor_x, vec_y, vec_d = ( + tensor_x.to(self.device), + vec_y.to(self.device), + vec_d.to(self.device), + ) + tuple_reg_loss = self.model.cal_reg_loss(tensor_x, vec_y, vec_d, others) + p_loss, *_ = self.model.cal_loss(tensor_x, vec_y, vec_d, others) + # NOTE: first [0] extract the loss, second [0] get the list + list_b_reg_loss = tuple_reg_loss[0] + list_b_reg_loss_sumed = [ + ele.sum().detach().item() for ele in list_b_reg_loss + ] + if len(epo_reg_loss) == 0: + epo_reg_loss = list_b_reg_loss_sumed + else: + epo_reg_loss = list(map(add, epo_reg_loss, list_b_reg_loss_sumed)) + b_task_loss = ( + self.model.cal_task_loss(tensor_x, vec_y).sum().detach().item() + ) + # sum will kill the dimension of the mini batch + epo_task_loss += b_task_loss + epo_p_loss += p_loss.sum().detach().item() + counter += 1.0 + return ( + list_divide(epo_reg_loss, counter), + epo_task_loss / counter, + epo_p_loss / counter, + ) + + def before_batch(self, epoch, ind_batch): + """ + if hyper-parameters should be updated per batch, then step + should be set to epoch*self.num_batches + ind_batch + """ + if self.flag_update_hyper_per_batch: + # NOTE: if not update per_batch, then not updated + self.model.hyper_update( + epoch * self.num_batches + ind_batch, self.hyper_scheduler + ) + return super().after_batch(epoch, ind_batch) + + def before_tr(self): + self.flag_setpoint_updated = False + if self.aconf.force_feedforward: + self.set_scheduler(scheduler=HyperSchedulerWarmupLinear) + else: + self.set_scheduler(scheduler=HyperSchedulerFeedback) + + self.set_model_with_mu() # very small value + if self.aconf.tr_with_init_mu: + self.tr_with_init_mu() + + ( + self.epo_reg_loss_tr, + self.epo_task_loss_tr, + self.epo_loss_tr, + ) = self.eval_r_loss() + self.hyper_scheduler.set_setpoint( + [ + ele * self.aconf.ini_setpoint_ratio + if ele > 0 + else ele / self.aconf.ini_setpoint_ratio + for ele in self.epo_reg_loss_tr + ], + self.epo_task_loss_tr, + ) # setpoing w.r.t. random initialization of neural network + self.hyper_scheduler.set_k_i_gain(self.epo_reg_loss_tr) + + @property + def list_str_multiplier_na(self): + """ + return the name of multipliers + """ + return self.model.list_str_multiplier_na + + def tr_with_init_mu(self): + """ + erm step with very small mu + """ + super().tr_epoch(-1) + + def set_model_with_mu(self): + """ + set model multipliers + """ + self.model.hyper_update( + epoch=None, fun_scheduler=HyperSetter(self.hyper_scheduler.mmu) + ) + + def tr_epoch(self, epoch, flag_info=False): + """ + update multipliers only per epoch + """ + self.hyper_scheduler.search_mu( + self.epo_reg_loss_tr, + self.epo_task_loss_tr, + self.epo_loss_tr, + self.list_str_multiplier_na, + miter=epoch, + ) + self.set_model_with_mu() + if hasattr(self.model, "dict_multiplier"): + logger = Logger.get_logger() + logger.info(f"current multiplier: {self.model.dict_multiplier}") + + if self._decoratee is not None: + flag = self._decoratee.tr_epoch(epoch, self.flag_setpoint_updated) + else: + flag = super().tr_epoch(epoch, self.flag_setpoint_updated) + # is it good to update setpoint after we know the new value of each loss? + self.flag_setpoint_updated = self.hyper_scheduler.update_setpoint( + self.epo_reg_loss_tr, self.epo_task_loss_tr + ) + return flag diff --git a/domainlab/utils/generate_fbopt_phase_portrait.py b/domainlab/utils/generate_fbopt_phase_portrait.py new file mode 100644 index 000000000..ef4b28806 --- /dev/null +++ b/domainlab/utils/generate_fbopt_phase_portrait.py @@ -0,0 +1,402 @@ +""" +This file is used for generating phase portrait from tensorboard event files. +""" +import argparse +import glob +import os +import numpy as np +import re +from tensorboard.backend.event_processing.event_accumulator import EventAccumulator + +import matplotlib +import matplotlib.pyplot as plt +from matplotlib.backends.backend_pdf import PdfPages +matplotlib.rcParams['pdf.fonttype'] = 42 +matplotlib.rcParams['text.usetex'] = True +plt.rcParams['text.latex.preamble'] = r'\usepackage{amsmath}' +font = {'size': 20} +matplotlib.rc('font', **font) + + +def sav2pdfpage(fig, fname): + pdf_page = PdfPages(fname) + pdf_page.savefig(fig, bbox_inches="tight") + pdf_page.close() + +def latex_to_nonlatex(latex_string): + nonlatex_string = re.sub(r'[{$}]', '', latex_string) + nonlatex_string = nonlatex_string.replace("\\", "") + return nonlatex_string + +class ListFileHandler: + def __init__(self, file_path): + self.file_path = file_path + + def write_lists_to_file(self, list1, list2=None): + with open(self.file_path, 'w') as file: + if list2 is None: + for val1 in list1: + file.write(f"{val1}\n") + else: + for val1, val2 in zip(list1, list2): + file.write(f"{val1} {val2}\n") + + def read_lists_from_file(self): + list1 = [] + list2 = [] + with open(self.file_path, 'r') as file: + for line in file: + values = list(map(float, line.strip().split())) + if len(values) == 1: + list1.append(values[0]) + elif len(values) == 2: + list1.append(values[0]) + list2.append(values[1]) + return list1, list2 + + +# pylint: disable=too-many-arguments +def get_xy_from_event_file( + event_file, + plot1, + plot2=None, + tf_size_guidance=None, + sanity_check=False, + verbose=True, +): + """ + extract x and y values from a tensorboard event file + """ + if tf_size_guidance is None: + # settings for which/how much data is loaded from the + # tensorboard event files + tf_size_guidance = { + "compressedHistograms": 0, + "images": 0, + "scalars": 1e10, # keep unlimited number + "histograms": 0, + } + # load event file + event = EventAccumulator(event_file, tf_size_guidance) + event.Reload() + # print names of available plots + if verbose: + print(f"Event file {event_file} -- available plots:") + print(event.Tags()["scalars"]) + if plot2: + # extract the plot2 values (e.g., reg/dyn0) + y_event = event.Scalars(plot2) + y = [s.value for s in y_event] + x_int = [s.step for s in y_event] + # the .step data are saved as ints in tensorboard, + # (so, in case of phase portrait, we re-extact from 'task') + else: + y = None + # extract the corresponding plot1 values (e.g., 'task') + x_event = event.Scalars(plot1) + x = [s.value for s in x_event] + # sanity check (originally added for the reg/dyn0 vs. task phase portrait; + # shouldn't be needed if plot1 and plot2 represent something else): + if sanity_check: + for i in range(len(x)): + assert int(x[i]) == x_int[i] + + return x, y + + +# pylint: disable=too-many-arguments, too-many-locals, redefined-outer-name, unused-argument +def phase_portrait_combined( + event_files, + colors, + plot1, + plot2, + legend1=None, + legend2=None, + plot_len=None, + skip_n_steps=1, + output_dir=".", +): + """ + combined phase portait for multiple (at least one) Tensorboard + event files in the same plot + """ + fig = plt.figure() + + for event_i in range(len(event_files)): + x, y = get_xy_from_event_file(event_files[event_i], plot1=plot1, plot2=plot2) + + assert len(x) == len(y) + if plot_len is None: + plot_len = len(x) + # truncate x and y to the desired length: + x = x[:plot_len] + y = y[:plot_len] + # skip every n steps + x = x[0::skip_n_steps] + y = y[0::skip_n_steps] + + x = [-ele if ele <0 else ele for ele in x ] + y = [-ele if ele <0 else ele for ele in y] + + head_w_glob = min((max(x) - min(x)) / plot_len, (max(y) - min(y)) / plot_len) + head_w_glob *= skip_n_steps + for i in range(len(x) - 1): + xy_dist = np.sqrt((x[i + 1] - x[i]) ** 2 + (y[i + 1] - y[i]) ** 2) + head_l = xy_dist / plot_len * skip_n_steps + # let width be one tenth of length + head_w = min(head_l/10.0, head_w_glob) + plt.arrow( + x[i], + y[i], + (x[i + 1] - x[i]), + (y[i + 1] - y[i]), + head_width=head_w, + head_length=head_l, + length_includes_head=True, + fc=colors[event_i], + ec=colors[event_i], + alpha=0.8, + ) + # the combination of head_width and head_length make the arrow + # more visible. + # length_includes_head=False makes the arrow stick too far out + # beyond of the point, which let; so, True is used. + + # use finite color + # colors = ["red", "green", "blue", "yellow", "purple"] + # list_color = [colors[i % len(colors)] for i, h in enumerate(x)] + # use numerical color + colors = np.arange(0, plot_len, skip_n_steps) + plt.plot(x[0], y[0], "ko") + # plt.scatter(x, y, s=1, c=np.array(list_color)) + # size + plt.scatter(x, y, s=10, c=colors, cmap='viridis') + plt.yscale("log") + plt.xscale("log") + plt.colorbar() + + if legend1 is None: + legend1 = plot1 + if legend2 is None: + legend2 = plot2 + plt.xlabel(legend1) + plt.ylabel(legend2) + plt.title("output portrait") + + if not os.path.exists(output_dir): + os.makedirs(output_dir) + legend22 = legend2.split(os.sep)[-1] + + fname_legend = latex_to_nonlatex(legend22) + + # write x and y data to a text file: + txt_name = os.path.join(output_dir, f"phase_portrait_{fname_legend}.txt") + fh = ListFileHandler(txt_name) + fh.write_lists_to_file(x, y) + + # save figures + fname = os.path.join(output_dir, f"phase_portrait_{fname_legend}") + plt.savefig(fname+".png", dpi=300, bbox_inches="tight") + plt.savefig(fname+".pdf", format="pdf", bbox_inches="tight") + plt.savefig(fname+".svg", format="svg", bbox_inches="tight") + sav2pdfpage(fig, fname+"_pdfpage.pdf") + + +def two_curves_combined( + event_files, + colors, + plot1, + plot2, + legend1=None, + legend2=None, + output_dir=".", + title=None, + logscale=False, + neg=False, + prefix="output_r_", + plot_len=None): + """ + FIXME: colors parameter is not used + """ + fig = plt.figure() + for event_i in range(len(event_files)): + x, y = get_xy_from_event_file(event_files[event_i], plot1=plot1, plot2=plot2) + if plot_len is None: + plot_len = len(x) + # truncate x and y to the desired length: + x = x[:plot_len] + y = y[:plot_len] + + if neg: + plt.plot(-np.array(x), color="blue") + plt.plot(-np.array(y), color="red") + else: + plt.plot(x, color="blue") + plt.plot(y, color="red") + if logscale: + plt.yscale("log") + plt.xlabel("Epoch") + # plt.ylabel("loss") + if title is not None: + plt.title(title) + if legend1 is None: + legend1 = plot1 + if legend2 is None: + legend2 = plot2 + plt.legend([legend1, legend2]) + + legend11 = legend1.replace(os.sep, "_") + legend22 = legend2.replace(os.sep, "_") + + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + fname_legend = latex_to_nonlatex(legend11) + fname_legend += latex_to_nonlatex(legend22) + # write x and y data to a text file: + txt_name = os.path.join(output_dir, prefix+f"{fname_legend}.txt") + fh = ListFileHandler(txt_name) + fh.write_lists_to_file(x, y) + + # save figures + fname_logscale = "_logscale" if logscale else "" + fname = os.path.join(output_dir, prefix+f"{fname_legend}") + plt.savefig(fname+fname_logscale+".png", dpi=300, bbox_inches="tight") + plt.savefig(fname+fname_logscale+".pdf", format="pdf", bbox_inches="tight") + plt.savefig(fname+fname_logscale+".svg", format="svg", bbox_inches="tight") + pdf_page = PdfPages(fname+fname_logscale+"_pdfpage.pdf") + pdf_page.savefig(fig, bbox_inches="tight") + pdf_page.close() + + + + +def plot_single_curve(event_files, colors, plot1, legend1=None, output_dir=".", plot_len=None): + """ + FIXME: colors parameter is not used + """ + fig = plt.figure() + for event_i in range(len(event_files)): + x, _ = get_xy_from_event_file(event_files[event_i], plot1=plot1) + if plot_len is None: + plot_len = len(x) + # truncate x and y to the desired length: + x = x[:plot_len] + plt.plot(x) + plt.yscale("log") + plt.xlabel("Epoch") + if legend1 is None: + legend1 = plot1 + plt.ylabel(legend1) + # plt.title("timecourse") + + legend11 = legend1.replace(os.sep, "_") + fname_legend = latex_to_nonlatex(legend11) + + # save figures + if not os.path.exists(output_dir): + os.makedirs(output_dir) + plt.savefig(os.path.join(output_dir, f"single_timecourse_{fname_legend}.png"), dpi=300, bbox_inches="tight") + plt.savefig(os.path.join(output_dir, f"single_timecourse_{fname_legend}.pdf"), format="pdf", bbox_inches="tight") + plt.savefig(os.path.join(output_dir, f"single_timecourse_{fname_legend}.svg"), format="svg", bbox_inches="tight") + pdf_page = PdfPages(os.path.join(output_dir, f"single_timecourse_{fname_legend}_pdfpage.pdf")) + pdf_page.savefig(fig, bbox_inches="tight") + pdf_page.close() + + # write x and y data to a text file: + txt_name = os.path.join(output_dir, f"single_timecourse_{fname_legend}.txt") + fh = ListFileHandler(txt_name) + fh.write_lists_to_file(list(range(len(x))), x) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="plot") + parser.add_argument("-plot1", "--plot1", default=None, type=str) + parser.add_argument("-plot2", "--plot2", default=None, type=str) + parser.add_argument("-legend1", "--legend1", default=None, type=str) + parser.add_argument("-legend2", "--legend2", default=None, type=str) + parser.add_argument("-plot_len", "--plot_len", default=None, type=int) + parser.add_argument("-skip_n_steps", "--skip_n_steps", default=None, type=int) + parser.add_argument("-title", "--title", default=None, type=str) + parser.add_argument("--output_dir", default=".", type=str) + parser.add_argument("--runs_dir", default="runs", type=str) + parser.add_argument( + "--neg", + action="store_true", + help="if true, plot negative of a list", + ) + parser.add_argument( + "--phase_portrait", + action="store_true", + help="if True plots a phase portrait,\ + otherwise a curve (default)", + ) + args = parser.parse_args() + + # get event files from all available runs + # Tensorboard: * could be the date information, this intermediate directory + # always exist + # events* means all the event folders + # this would combine plots from all subfolders in the runs directory (i.e., all graphs combined in each plot): + #event_files = glob.glob(f"{args.runs_dir}/*/events*") + # this needs the user to specify a specific run (subfolder in the runs directory): + event_files = glob.glob(f"{args.runs_dir}/events*") + if not os.path.isdir(args.runs_dir): raise RuntimeError("runs_dir should be a directory.") + print( + "Using the following tensorboard event files:\n{}".format( + "\n".join(event_files) + ) + ) + + # Different colors for the different runs + cmap = plt.get_cmap("tab10") # Choose a colormap + colors = [cmap(i) for i in range(len(event_files))] + + if args.phase_portrait: + phase_portrait_combined( + event_files, + colors, + plot1=args.plot1, + plot2=args.plot2, + legend1=args.legend1, + legend2=args.legend2, + plot_len=args.plot_len, + skip_n_steps=args.skip_n_steps, + output_dir=args.output_dir, + ) + else: + if args.plot2: + # two curves per plot + two_curves_combined( + event_files, + colors, + plot1=args.plot1, + plot2=args.plot2, + legend1=args.legend1, + legend2=args.legend2, + output_dir=args.output_dir, + title=args.title, + neg=args.neg + ) + two_curves_combined( + event_files, + colors, + plot1=args.plot1, + plot2=args.plot2, + legend1=args.legend1, + legend2=args.legend2, + output_dir=args.output_dir, + title=args.title, + neg=args.neg, + logscale=True + ) + + else: + # one curve per plot + plot_single_curve( + event_files, + colors, + plot1=args.plot1, + legend1=args.legend1, + output_dir=args.output_dir, + ) diff --git a/examples/benchmark/benchmark_pacs_resnet_grid_jigen.yaml b/examples/benchmark/benchmark_pacs_resnet_grid_jigen.yaml new file mode 100644 index 000000000..28c6705e3 --- /dev/null +++ b/examples/benchmark/benchmark_pacs_resnet_grid_jigen.yaml @@ -0,0 +1,52 @@ +# test benchmark config. + +mode: grid + +test_domains: + - sketch + +output_dir: zoutput/benchmarks/pacs_benchmark_grid + +startseed: 0 +endseed: 1 # currently included + + +domainlab_args: + tpath: examples/tasks/task_pacs_path_list.py + dmem: False + lr: 5e-5 + epos: 500 + es: 1 + bs: 32 + npath: examples/nets/resnet50domainbed.py + npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py + npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py + san_check: False + + +Shared params: + gamma_reg: + min: 0.01 + max: 10 + step: 0.1 + distribution: loguniform + num: 3 + + +jigen: # name + model: jigen + shared: + - gamma_reg + + hyperparameters: + # probability of permutating the tiles of an image, pperm = 0 -> pure classification + pperm: + min: 0.7 + max: 1 + step: 0.1 + distribution: uniform + num: 3 + + +erm: + model: erm diff --git a/examples/benchmark/mnist_dann_fbopt.yaml b/examples/benchmark/mnist_dann_fbopt.yaml new file mode 100644 index 000000000..8bdbe444c --- /dev/null +++ b/examples/benchmark/mnist_dann_fbopt.yaml @@ -0,0 +1,62 @@ +mode: grid + +output_dir: zoutput/benchmarks/benchmark_fbopt + +sampling_seed: 0 +startseed: 0 +endseed: 2 + +test_domains: + - 0 + + +domainlab_args: + task: mnistcolor10 + tr_d: [1, 2] + dmem: False + lr: 0.0001 + epos: 500 + es: 100 + bs: 64 + nname: conv_bn_pool_2 + san_check: False + exp_shoulder_clip: 10 + mu_clip: 10_000 + coeff_ma: 0.5 + no_tensorboard: False + + + +Shared params: + ini_setpoint_ratio: + min: 0.9 + max: 0.99 + num: 3 + step: 0.05 + distribution: uniform + + k_i_gain: + min: 0.0001 + max: 0.01 + num: 2 + step: 0.0001 + distribution: uniform + + mu_init: + min: 0.000001 + max: 0.00001 + num: 2 + distribution: uniform + +# Test fbopt with different hyperparameter configurations + +dann_fbopt: + model: dann + trainer: fbopt + ini_setpoint_ratio: 0.9 + shared: + - k_i_gain + - mu_init + +erm: + model: erm diff --git a/examples/benchmark/mnist_diva_fbopt_alone.yaml b/examples/benchmark/mnist_diva_fbopt_alone.yaml new file mode 100644 index 000000000..c483b0e68 --- /dev/null +++ b/examples/benchmark/mnist_diva_fbopt_alone.yaml @@ -0,0 +1,92 @@ +mode: grid + +output_dir: zoutput/benchmarks/mnist_diva_fbopt_alone + +sampling_seed: 0 +startseed: 0 +endseed: 10 + +test_domains: + - 0 + + +domainlab_args: + task: mnistcolor10 + tr_d: [1, 2] + dmem: False + lr: 0.001 + epos: 500 + epos_min: 20 + es: 5 + bs: 64 + zx_dim: 0 + zy_dim: 32 + zd_dim: 32 + nname: conv_bn_pool_2 + nname_dom: conv_bn_pool_2 + nname_encoder_x2topic_h: conv_bn_pool_2 + nname_encoder_sandwich_x2h4zd: conv_bn_pool_2 + san_check: False + coeff_ma: 0.5 + no_tensorboard: False + + + +Shared params: + ini_setpoint_ratio: + min: 0.9 + max: 0.99 + num: 3 + distribution: uniform + + k_i_gain: + min: 1e-4 + max: 1e-3 + num: 2 + distribution: loguniform + + mu_init: + min: 0.000001 + max: 0.00001 + num: 3 + distribution: loguniform + + gamma_y: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + gamma_d: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + mu_clip: + distribution: categorical + datatype: float + values: + - 1000 + - 100 + - 10 + - 1 + +# Test fbopt with different hyperparameter configurations + +diva_fbopt_a: + model: diva + trainer: fbopt + str_diva_multiplier_type: gammad_recon + gamma_y: 1.0 + init_setpoint_ratio: 0.99 + exp_shoulder_clip: 1 + mu_init: 1e-6 + shared: + - k_i_gain + - mu_clip + +erm: + model: erm diff --git a/examples/benchmark/mnist_diva_fbopt_and_baselines.yaml b/examples/benchmark/mnist_diva_fbopt_and_baselines.yaml new file mode 100644 index 000000000..b687b69f4 --- /dev/null +++ b/examples/benchmark/mnist_diva_fbopt_and_baselines.yaml @@ -0,0 +1,122 @@ +mode: grid + +output_dir: zoutput/benchmarks/mnist_diva_fbopt_and_baselines + +sampling_seed: 0 +startseed: 0 +endseed: 10 + +test_domains: + - 0 + + +domainlab_args: + task: mnistcolor10 + tr_d: [1, 2] + dmem: False + lr: 0.001 + epos: 5000 + epos_min: 500 + es: 5 + bs: 64 + zx_dim: 0 + zy_dim: 32 + zd_dim: 32 + nname: conv_bn_pool_2 + nname_dom: conv_bn_pool_2 + nname_encoder_x2topic_h: conv_bn_pool_2 + nname_encoder_sandwich_x2h4zd: conv_bn_pool_2 + san_check: False + coeff_ma: 0.5 + no_tensorboard: False + + + +Shared params: + ini_setpoint_ratio: + min: 0.9 + max: 0.99 + num: 3 + distribution: uniform + + k_i_gain: + min: 1e-4 + max: 1e-3 + num: 2 + distribution: loguniform + + k_i_gain_ratio: + min: 0.1 + max: 1 + num: 5 + distribution: uniform + + mu_init: + min: 0.000001 + max: 0.00001 + num: 3 + distribution: loguniform + + gamma_y: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + gamma_d: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + mu_clip: + distribution: categorical + datatype: float + values: + - 1000 + - 100 + - 10 + - 1 + +# Test fbopt with different hyperparameter configurations + +diva_fbopt_a: + model: diva + trainer: fbopt + str_diva_multiplier_type: gammad_recon + gamma_y: 1.0 + init_setpoint_ratio: 0.99 + exp_shoulder_clip: 1 + mu_init: 1e-6 + shared: + - k_i_gain_ratio + - mu_clip + +diva_feedforward_a: + model: diva + trainer: hyperscheduler + str_diva_multiplier_type: gammad_recon + gamma_y: 1.0 + shared: + - gamma_d + +diva_default: + model: diva + trainer: hyperscheduler + str_diva_multiplier_type: default + shared: + - gamma_d + - gamma_y + +diva_fixed_penalty: + model: diva + trainer: basic + str_diva_multiplier_type: default + shared: + - gamma_d + - gamma_y + +erm: + model: erm diff --git a/examples/benchmark/mnist_jigen_fbopt_alone.yaml b/examples/benchmark/mnist_jigen_fbopt_alone.yaml new file mode 100644 index 000000000..388bdcf13 --- /dev/null +++ b/examples/benchmark/mnist_jigen_fbopt_alone.yaml @@ -0,0 +1,79 @@ +mode: grid + +output_dir: zoutput/benchmarks/benchmark_fbopt + +sampling_seed: 0 +startseed: 0 +endseed: 4 + +test_domains: + - 0 + + +domainlab_args: + task: mnistcolor10 + tr_d: [1, 2] + dmem: False + lr: 0.001 + epos: 1000 + es: 100 + bs: 64 + nname: conv_bn_pool_2 + san_check: False + exp_shoulder_clip: 10 + mu_clip: 10 + coeff_ma: 0.5 + no_tensorboard: False + pperm: 0.5 + + + +Shared params: + coeff_ma_output_state: + distribution: categorical + datatype: float + values: + - 0.1 + - 0.5 + coeff_ma_setpoint: + distribution: uniform + min: 0.0 + max: 0.9 + num: 2 + setpoint_rewind: + distribution: categorical + datatype: str + values: + - "yes" + - "no" + k_i_gain: + min: 0.0001 + max: 0.01 + num: 2 + distribution: uniform + + mu_init: + min: 0.000001 + max: 0.00001 + num: 2 + distribution: loguniform + + gamma_reg: + min: 0.01 + max: 10_000 + num: 10 + distribution: loguniform + + + +# Test fbopt with different hyperparameter configurations + +jigen_feedback: + model: jigen + trainer: fbopt + ini_setpoint_ratio: 0.99 + coeff_ma_output_state: 0.5 + coeff_ma_setpoint: 0.5 + shared: + - k_i_gain + - mu_init diff --git a/examples/benchmark/mnist_jigen_fbopt_and_others.yaml b/examples/benchmark/mnist_jigen_fbopt_and_others.yaml new file mode 100644 index 000000000..bd4857610 --- /dev/null +++ b/examples/benchmark/mnist_jigen_fbopt_and_others.yaml @@ -0,0 +1,80 @@ +mode: grid + +output_dir: zoutput/benchmarks/mnist_fbopt_and_others + +sampling_seed: 0 +startseed: 0 +endseed: 4 + +test_domains: + - 0 + + +domainlab_args: + task: mnistcolor10 + tr_d: [1, 2] + dmem: False + lr: 0.001 + epos: 2000 + epos_min: 100 + es: 1 + bs: 64 + nname: conv_bn_pool_2 + san_check: False + no_tensorboard: False + pperm: 0.5 + + + +Shared params: + k_i_gain: + min: 1e-4 + max: 1e-3 + num: 2 + distribution: uniform + + mu_init: + min: 0.000001 + max: 0.00001 + num: 3 + distribution: loguniform + + gamma_reg: + min: 0.01 + max: 1e4 + num: 3 + distribution: loguniform + + mu_clip: + min: 0.01 + max: 1e4 + num: 3 + distribution: loguniform + + + +# Test fbopt with different hyperparameter configurations + +jigen_feedback: + model: jigen + trainer: fbopt + ini_setpoint_ratio: 0.99 + mu_init: 1e-6 + shared: + - k_i_gain + - mu_clip + +jigen_feedforward: + model: jigen + trainer: hyperscheduler + shared: + - gamma_reg + +jigen_fixed_penalty: + model: jigen + trainer: basic + shared: + - gamma_reg + +erm: + model: erm diff --git a/examples/benchmark/pacs_dann_fbopt.yaml b/examples/benchmark/pacs_dann_fbopt.yaml new file mode 100644 index 000000000..b5c743033 --- /dev/null +++ b/examples/benchmark/pacs_dann_fbopt.yaml @@ -0,0 +1,53 @@ +mode: grid + +output_dir: zoutput/benchmarks/benchmark_fbopt_pacs + +sampling_seed: 0 +startseed: 0 +endseed: 2 + +test_domains: + - sketch + +domainlab_args: + tpath: examples/tasks/task_pacs_path_list.py + dmem: False + lr: 5e-5 + epos: 1 + es: 5 + bs: 64 + san_check: True + npath: examples/nets/resnet50domainbed.py + npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py + npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py + exp_shoulder_clip: 10 + mu_clip: 10_000 + coeff_ma: 0.5 + no_tensorboard: False + + + +Shared params: + ini_setpoint_ratio: + min: 0.5 + max: 0.99 + num: 2 + step: 0.05 + distribution: uniform + + k_i_gain: + min: 0.0001 + max: 0.01 + num: 2 + step: 0.0001 + distribution: uniform + +# Test fbopt with different hyperparameter configurations + +dann_fbopt: + model: dann + trainer: fbopt + shared: + - ini_setpoint_ratio + - k_i_gain + - es diff --git a/examples/benchmark/pacs_diva_fbopt_alone_es1_autoki.yaml b/examples/benchmark/pacs_diva_fbopt_alone_es1_autoki.yaml new file mode 100644 index 000000000..35c93c236 --- /dev/null +++ b/examples/benchmark/pacs_diva_fbopt_alone_es1_autoki.yaml @@ -0,0 +1,106 @@ +mode: grid + +output_dir: zoutput/benchmarks/pacs_diva_fbopt_alone_zx + +sampling_seed: 0 + +startseed: 0 +endseed: 2 + +test_domains: + - sketch + +domainlab_args: + tpath: examples/tasks/task_pacs_aug.py + dmem: False + lr: 5e-5 + epos: 500 + epos_min: 200 + es: 5 + bs: 32 + san_check: False + npath: examples/nets/resnet50domainbed.py + npath_dom: examples/nets/resnet50domainbed.py + zx_dim: 16 + zy_dim: 64 + zd_dim: 64 + + +Shared params: + ini_setpoint_ratio: + min: 0.990 + max: 0.999 + num: 2 + distribution: uniform + + str_diva_multiplier_type: + distribution: categorical + datatype: str + values: + - gammad_recon + - gammad_recon_per_pixel + + coeff_ma_output_state: + distribution: categorical + datatype: float + values: + - 0.1 + - 0.5 + + mu_clip: + distribution: categorical + datatype: float + values: + - 10 + - 1000 + - 1 + - 100 + + k_i_gain: + min: 0.0001 + max: 0.01 + num: 2 + distribution: uniform + + k_i_gain_ratio: + min: 0.1 + max: 1 + num: 3 + distribution: uniform + + mu_init: + min: 0.000001 + max: 0.00001 + step: 0.000001 + num: 3 + distribution: loguniform + + gamma_y: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + gamma_d: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + + +# Test fbopt with different hyperparameter configurations, no noeed to tune mu_clip since this is the job of KI gain when mu_init is small + +diva_fbopt_full: + model: diva + trainer: fbopt + force_setpoint_change_once: True + gamma_y: 1.0 + ini_setpoint_ratio: 0.99 + str_diva_multiplier_type: gammad_recon + mu_init: 1e-6 + shared: + - k_i_gain_ratio + - mu_clip diff --git a/examples/benchmark/pacs_diva_fbopt_alone_es1_autoki_1run.yaml b/examples/benchmark/pacs_diva_fbopt_alone_es1_autoki_1run.yaml new file mode 100644 index 000000000..9ec43ac7b --- /dev/null +++ b/examples/benchmark/pacs_diva_fbopt_alone_es1_autoki_1run.yaml @@ -0,0 +1,107 @@ +mode: grid + +output_dir: zoutput/benchmarks/pacs_diva_fbopt_alone_single_run + +sampling_seed: 0 + +startseed: 0 +endseed: 0 + +test_domains: + - sketch + +domainlab_args: + tpath: examples/tasks/task_pacs_aug.py + dmem: False + lr: 5e-5 + epos: 500 + epos_min: 200 + es: 5 + bs: 32 + san_check: False + npath: examples/nets/resnet50domainbed.py + npath_dom: examples/nets/resnet50domainbed.py + zx_dim: 16 + zy_dim: 64 + zd_dim: 64 + + +Shared params: + ini_setpoint_ratio: + min: 0.990 + max: 0.999 + num: 2 + distribution: uniform + + str_diva_multiplier_type: + distribution: categorical + datatype: str + values: + - gammad_recon + - gammad_recon_per_pixel + + coeff_ma_output_state: + distribution: categorical + datatype: float + values: + - 0.1 + - 0.5 + + mu_clip: + distribution: categorical + datatype: float + values: + - 10 + - 1000 + - 1 + - 100 + + k_i_gain: + min: 0.0001 + max: 0.01 + num: 2 + distribution: uniform + + k_i_gain_ratio: + min: 0.1 + max: 1 + num: 3 + distribution: uniform + + mu_init: + min: 0.000001 + max: 0.00001 + step: 0.000001 + num: 3 + distribution: loguniform + + gamma_y: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + gamma_d: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + + +# Test fbopt with different hyperparameter configurations, no noeed to tune mu_clip since this is the job of KI gain when mu_init is small + +diva_fbopt_full: + model: diva + trainer: fbopt + force_setpoint_change_once: True + gamma_y: 1.0 + ini_setpoint_ratio: 0.99 + str_diva_multiplier_type: gammad_recon + mu_init: 1e-6 + k_i_gain_ratio: 0.5 + mu_clip: 10 + coeff_ma_output_state: 0.0 + coeff_ma_setpoint: 0.0 diff --git a/examples/benchmark/pacs_diva_fbopt_alone_es1_autoki_output_ma_9.yaml b/examples/benchmark/pacs_diva_fbopt_alone_es1_autoki_output_ma_9.yaml new file mode 100644 index 000000000..36fd10554 --- /dev/null +++ b/examples/benchmark/pacs_diva_fbopt_alone_es1_autoki_output_ma_9.yaml @@ -0,0 +1,108 @@ +mode: grid + +output_dir: zoutput/benchmarks/pacs_diva_fbopt_autoki_aug + +sampling_seed: 0 + +startseed: 0 +endseed: 10 + +test_domains: + - sketch + +domainlab_args: + tpath: examples/tasks/task_pacs_aug.py + dmem: False + lr: 5e-5 + epos: 500 + epos_min: 200 + es: 1 + bs: 64 + san_check: False + npath: examples/nets/resnet50domainbed.py + npath_dom: examples/nets/resnet50domainbed.py + npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py + npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py + zx_dim: 0 + zy_dim: 64 + zd_dim: 64 + + +Shared params: + ini_setpoint_ratio: + min: 0.990 + max: 0.999 + num: 2 + distribution: uniform + + str_diva_multiplier_type: + distribution: categorical + datatype: str + values: + - gammad_recon + - gammad_recon_per_pixel + + coeff_ma_output_state: + distribution: categorical + datatype: float + values: + - 0.1 + - 0.5 + - 0.8 + + mu_clip: + distribution: categorical + datatype: float + values: + - 10 + - 1000 + - 1 + - 100 + + k_i_gain: + min: 0.0001 + max: 0.01 + num: 2 + distribution: uniform + + k_i_gain_ratio: + min: 0.1 + max: 10 + num: 10 + distribution: loguniform + + mu_init: + min: 0.000001 + max: 0.00001 + step: 0.000001 + num: 3 + distribution: loguniform + + gamma_y: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + gamma_d: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + + +# Test fbopt with different hyperparameter configurations, no noeed to tune mu_clip since this is the job of KI gain when mu_init is small + +diva_fbopt_full: + model: diva + trainer: fbopt + gamma_y: 1.0 + ini_setpoint_ratio: 0.99 + str_diva_multiplier_type: gammad_recon + mu_init: 1e-6 + shared: + - k_i_gain_ratio + - coeff_ma_output_state diff --git a/examples/benchmark/pacs_diva_fbopt_alone_es1_random_ki.yaml b/examples/benchmark/pacs_diva_fbopt_alone_es1_random_ki.yaml new file mode 100644 index 000000000..24177c0bc --- /dev/null +++ b/examples/benchmark/pacs_diva_fbopt_alone_es1_random_ki.yaml @@ -0,0 +1,102 @@ +mode: grid + +output_dir: zoutput/benchmarks/pacs_diva_fbopt_alone + +sampling_seed: 0 + +startseed: 0 +endseed: 2 + +test_domains: + - sketch + +domainlab_args: + tpath: examples/tasks/task_pacs_aug.py + dmem: False + lr: 5e-5 + epos: 200 + epos_min: 20 + es: 1 + bs: 32 + san_check: False + npath: examples/nets/resnet50domainbed.py + npath_dom: examples/nets/resnet50domainbed.py + npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py + npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py + zx_dim: 0 + zy_dim: 64 + zd_dim: 64 + + +Shared params: + ini_setpoint_ratio: + min: 0.990 + max: 0.999 + num: 2 + distribution: uniform + + str_diva_multiplier_type: + distribution: categorical + datatype: str + values: + - gammad_recon + - gammad_recon_per_pixel + + coeff_ma_output_state: + distribution: categorical + datatype: float + values: + - 0.1 + - 0.5 + + mu_clip: + distribution: categorical + datatype: float + values: + - 10 + - 1000 + - 1 + - 100 + + k_i_gain: + min: 0.0001 + max: 0.01 + num: 2 + distribution: uniform + + mu_init: + min: 0.000001 + max: 0.00001 + step: 0.000001 + num: 3 + distribution: loguniform + + gamma_y: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + gamma_d: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + + +# Test fbopt with different hyperparameter configurations, no noeed to tune mu_clip since this is the job of KI gain when mu_init is small + +diva_fbopt_full: + model: diva + trainer: fbopt + gamma_y: 1.0 + ini_setpoint_ratio: 0.99 + str_diva_multiplier_type: gammad_recon + coeff_ma_output_state: 0.1 + mu_init: 0.000001 + shared: + - k_i_gain + - mu_clip diff --git a/examples/benchmark/pacs_diva_fbopt_alone_fixed.yaml b/examples/benchmark/pacs_diva_fbopt_alone_fixed.yaml new file mode 100644 index 000000000..e2a78230a --- /dev/null +++ b/examples/benchmark/pacs_diva_fbopt_alone_fixed.yaml @@ -0,0 +1,97 @@ +mode: grid + +output_dir: zoutput/benchmarks/pacs_diva_fbopt_alone + +sampling_seed: 0 + +startseed: 0 +endseed: 5 + +test_domains: + - sketch + +domainlab_args: + tpath: examples/tasks/task_pacs_path_list.py + dmem: False + lr: 5e-5 + epos: 200 + epos_min: 20 + es: 5 + bs: 64 + san_check: False + npath: examples/nets/resnet50domainbed.py + npath_dom: examples/nets/resnet50domainbed.py + npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py + npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py + zx_dim: 0 + zy_dim: 64 + zd_dim: 64 + + +Shared params: + ini_setpoint_ratio: + min: 0.990 + max: 0.999 + num: 2 + distribution: uniform + + str_diva_multiplier_type: + distribution: categorical + datatype: str + values: + - gammad_recon + - gammad_recon_per_pixel + + coeff_ma_output_state: + distribution: categorical + datatype: float + values: + - 0.1 + - 0.5 + + mu_clip: + distribution: categorical + datatype: int + values: + - 10 + - 1000 + - 1000_000 + + k_i_gain: + min: 0.0001 + max: 0.01 + num: 3 + distribution: uniform + + mu_init: + min: 0.000001 + max: 0.00001 + step: 0.000001 + num: 3 + distribution: loguniform + + gamma_y: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + gamma_d: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + + +# Test fbopt with different hyperparameter configurations, no noeed to tune mu_clip since this is the job of KI gain when mu_init is small + +diva_fbopt_full: + model: diva + trainer: fbopt + exp_shoulder_clip: 5 + gamma_y: 1.0 + ini_setpoint_ratio: 0.99 + mu_init: 0.000001 diff --git a/examples/benchmark/pacs_diva_fbopt_and_baselines.yaml b/examples/benchmark/pacs_diva_fbopt_and_baselines.yaml new file mode 100644 index 000000000..7ea54939e --- /dev/null +++ b/examples/benchmark/pacs_diva_fbopt_and_baselines.yaml @@ -0,0 +1,119 @@ +mode: grid + +output_dir: zoutput/benchmarks/pacs_diva_fbopt_and_baselines_aug + +sampling_seed: 0 + +startseed: 0 +endseed: 6 + +test_domains: + - sketch + +domainlab_args: + tpath: examples/tasks/task_pacs_aug.py + dmem: False + lr: 5e-5 + epos: 500 + epos_min: 200 + es: 5 + bs: 32 + san_check: False + npath: examples/nets/resnet50domainbed.py + npath_dom: examples/nets/resnet50domainbed.py + zx_dim: 0 + zy_dim: 64 + zd_dim: 64 + + + + +Shared params: + ini_setpoint_ratio: + min: 0.9 + max: 0.99 + num: 3 + distribution: uniform + + k_i_gain: + min: 0.0001 + max: 0.01 + num: 3 + distribution: uniform + + k_i_gain_ratio: + min: 0.1 + max: 1 + num: 3 + distribution: uniform + + + mu_init: + min: 0.000001 + max: 0.9 + num: 3 + distribution: loguniform + + gamma_y: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + gamma_d: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + mu_clip: + distribution: categorical + datatype: float + values: + - 1 + - 10 + - 100 + - 1000 + +# Test fbopt with different hyperparameter configurations +diva_fbopt_a: + model: diva + trainer: fbopt + gamma_y: 1.0 + ini_setpoint_ratio: 0.99 + force_setpoint_change_once: True + str_diva_multiplier_type: gammad_recon + coeff_ma_output_state: 0.1 + mu_init: 1e-6 + shared: + - k_i_gain_ratio + - mu_clip + +diva_feedforward_full: + model: diva + trainer: hyperscheduler + str_diva_multiplier_type: gammad_recon + gamma_y: 1.0 + shared: + - gamma_d + +diva_default: + model: diva + trainer: hyperscheduler + str_diva_multiplier_type: default + shared: + - gamma_d + - gamma_y + +diva_fixed_penalty: + model: diva + trainer: basic + str_diva_multiplier_type: default + shared: + - gamma_d + - gamma_y + +erm: + model: erm diff --git a/examples/benchmark/pacs_diva_others.yaml b/examples/benchmark/pacs_diva_others.yaml new file mode 100644 index 000000000..55d6a7f37 --- /dev/null +++ b/examples/benchmark/pacs_diva_others.yaml @@ -0,0 +1,68 @@ +mode: grid + +output_dir: zoutput/benchmarks/pacs_diva_others + +sampling_seed: 0 + +startseed: 0 +endseed: 6 +test_domains: - sketch + +domainlab_args: + tpath: examples/tasks/task_pacs_aug.py + dmem: False + lr: 5e-5 + epos: 500 + epos_min: 200 + es: 5 + bs: 32 + san_check: False + npath: examples/nets/resnet50domainbed.py + npath_dom: examples/nets/resnet50domainbed.py + zx_dim: 0 + zy_dim: 64 + zd_dim: 64 + + + + +Shared params: + + gamma_y: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + gamma_d: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + +diva_feedforward_full: + model: diva + trainer: hyperscheduler + str_diva_multiplier_type: gammad_recon + shared: + - gamma_d + - gamma_y + +diva_default: + model: diva + trainer: hyperscheduler + str_diva_multiplier_type: default + shared: + - gamma_d + - gamma_y + +diva_fixed_penalty: + model: diva + trainer: basic + str_diva_multiplier_type: default + shared: + - gamma_d + - gamma_y diff --git a/examples/benchmark/pacs_fbopt_dial_diva.yaml b/examples/benchmark/pacs_fbopt_dial_diva.yaml new file mode 100644 index 000000000..ca2cf3921 --- /dev/null +++ b/examples/benchmark/pacs_fbopt_dial_diva.yaml @@ -0,0 +1,93 @@ +mode: grid + +output_dir: zoutput/benchmarks/benchmark_fbopt_dial_diva_pacs + +sampling_seed: 0 +startseed: 0 +endseed: 2 + +test_domains: + - sketch + +domainlab_args: + tpath: examples/tasks/task_pacs_aug.py + dmem: False + lr: 5e-5 + epos: 500 + epos_min: 200 + es: 5 + bs: 16 + san_check: False + npath: examples/nets/resnet50domainbed.py + npath_dom: examples/nets/resnet50domainbed.py + zx_dim: 0 + zy_dim: 64 + zd_dim: 64 + + + + +Shared params: + ini_setpoint_ratio: + min: 0.9 + max: 0.99 + num: 2 + step: 0.05 + distribution: uniform + + k_i_gain_ratio: + min: 0.01 + max: 0.90 + num: 3 + step: 0.0001 + distribution: uniform + + dial_lr: + min: 1e-5 + max: 1e-3 + num: 2 + step: 1e-5 + distribution: uniform + + dial_epsilon: + min: 1e-5 + max: 1e-3 + num: 2 + step: 1e-5 + distribution: uniform + + gamma_y: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + gamma_d: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + +# Test fbopt with different hyperparameter configurations + +dial_fbopt: + model: diva + trainer: fbopt_dial + gamma_y: 1.0 + shared: + - ini_setpoint_ratio + - k_i_gain_ratio + - dial_lr + - dial_epsilon + +# dial: +# model: diva +# trainer: dial +# shared: +# - dial_lr +# - dial_epsilon +# - gamma_y +# - gamma_d diff --git a/examples/benchmark/pacs_fbopt_fishr_erm.yaml b/examples/benchmark/pacs_fbopt_fishr_erm.yaml new file mode 100644 index 000000000..781a2518e --- /dev/null +++ b/examples/benchmark/pacs_fbopt_fishr_erm.yaml @@ -0,0 +1,66 @@ +mode: grid + +output_dir: zoutput/benchmarks/benchmark_fbopt_fishr_erm_pacs + +sampling_seed: 0 +startseed: 0 +endseed: 0 + +test_domains: + - sketch + +domainlab_args: + tpath: examples/tasks/task_pacs_aug.py + dmem: False + lr: 5e-5 + epos: 10 + epos_min: 2 + es: 5 + bs: 32 + san_check: False + nname: alexnet + nname_dom: alexnet + zx_dim: 0 + zy_dim: 64 + zd_dim: 64 + + + + +Shared params: + ini_setpoint_ratio: + min: 0.5 + max: 0.99 + num: 2 + step: 0.05 + distribution: uniform + + k_i_gain: + min: 0.0001 + max: 0.01 + num: 2 + step: 0.0001 + distribution: uniform + + gamma_reg: + min: 0.01 + max: 1e4 + num: 3 + distribution: loguniform + + +# Test fbopt with different hyperparameter configurations + +fbopt_fishr_erm: + model: erm + trainer: fbopt_fishr + shared: + - ini_setpoint_ratio + - k_i_gain + - gamma_reg + +fishr_erm: + model: erm + trainer: fishr + shared: + - gamma_reg diff --git a/examples/benchmark/pacs_hduva_baselines.yaml b/examples/benchmark/pacs_hduva_baselines.yaml new file mode 100644 index 000000000..cbdb704eb --- /dev/null +++ b/examples/benchmark/pacs_hduva_baselines.yaml @@ -0,0 +1,111 @@ +mode: grid + +output_dir: zoutput/benchmarks/pacs_hduva_fbopt_and_baselines + +sampling_seed: 0 + +startseed: 0 +endseed: 10 + +test_domains: + - sketch + +domainlab_args: + tpath: examples/tasks/task_pacs_aug.py + dmem: False + lr: 5e-5 + epos: 500 + epos_min: 200 + es: 1 + bs: 16 + san_check: False + npath: examples/nets/resnet50domainbed.py + npath_dom: examples/nets/resnet50domainbed.py + npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py + npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py + zx_dim: 0 + zy_dim: 64 + zd_dim: 64 + + +Shared params: + ini_setpoint_ratio: + min: 0.990 + max: 0.999 + num: 2 + distribution: uniform + + str_diva_multiplier_type: + distribution: categorical + datatype: str + values: + - gammad_recon + - gammad_recon_per_pixel + + coeff_ma_output_state: + distribution: categorical + datatype: float + values: + - 0.1 + - 0.5 + - 0.8 + + mu_clip: + distribution: categorical + datatype: float + values: + - 10 + - 1000 + - 1 + - 100 + + k_i_gain: + min: 0.0001 + max: 0.01 + num: 2 + distribution: uniform + + k_i_gain_ratio: + min: 0.1 + max: 10 + num: 10 + distribution: loguniform + + mu_init: + min: 0.000001 + max: 0.00001 + step: 0.000001 + num: 3 + distribution: loguniform + + gamma_y: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + gamma_d: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + + +# Test fbopt with different hyperparameter configurations, no noeed to tune mu_clip since this is the job of KI gain when mu_init is small + +hduva_beta_warmup: + model: hduva + shared: + - gamma_y + +hduva_fbopt_full: + model: hduva + trainer: fbopt + gamma_y: 1.0 + ini_setpoint_ratio: 0.99 + mu_init: 1e-6 + shared: + - k_i_gain_ratio diff --git a/examples/benchmark/pacs_hduva_fbopt_alone_es1_autoki_aug.yaml b/examples/benchmark/pacs_hduva_fbopt_alone_es1_autoki_aug.yaml new file mode 100644 index 000000000..d773cb25b --- /dev/null +++ b/examples/benchmark/pacs_hduva_fbopt_alone_es1_autoki_aug.yaml @@ -0,0 +1,107 @@ +mode: grid + +output_dir: zoutput/benchmarks/pacs_hduva_fbopt_alone_aug + +sampling_seed: 0 + +startseed: 0 +endseed: 10 + +test_domains: + - sketch + +domainlab_args: + tpath: examples/tasks/task_pacs_aug.py + dmem: False + lr: 5e-5 + epos: 500 + epos_min: 100 + es: 10 + bs: 16 + san_check: False + npath: examples/nets/resnet50domainbed.py + npath_dom: examples/nets/resnet50domainbed.py + npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py + npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py + zx_dim: 0 + zy_dim: 64 + zd_dim: 64 + + +Shared params: + ini_setpoint_ratio: + min: 0.990 + max: 0.999 + num: 2 + distribution: uniform + + str_diva_multiplier_type: + distribution: categorical + datatype: str + values: + - gammad_recon + - gammad_recon_per_pixel + + coeff_ma_output_state: + distribution: categorical + datatype: float + values: + - 0.1 + - 0.5 + - 0.8 + + mu_clip: + distribution: categorical + datatype: float + values: + - 10 + - 1000 + - 1 + - 100 + + k_i_gain: + min: 0.0001 + max: 0.01 + num: 2 + distribution: uniform + + k_i_gain_ratio: + min: 0.1 + max: 10 + num: 10 + distribution: loguniform + + mu_init: + min: 0.000001 + max: 0.00001 + step: 0.000001 + num: 3 + distribution: loguniform + + gamma_y: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + gamma_d: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + + +# Test fbopt with different hyperparameter configurations, no noeed to tune mu_clip since this is the job of KI gain when mu_init is small + +diva_fbopt_full: + model: hduva + trainer: fbopt + gamma_y: 1.0 + ini_setpoint_ratio: 0.99 + mu_init: 1e-6 + mu_clip: 10 + shared: + - k_i_gain_ratio diff --git a/examples/benchmark/pacs_hduva_matchdg.yaml b/examples/benchmark/pacs_hduva_matchdg.yaml new file mode 100644 index 000000000..f8c99d6d3 --- /dev/null +++ b/examples/benchmark/pacs_hduva_matchdg.yaml @@ -0,0 +1,112 @@ +mode: grid + +output_dir: zoutput/benchmarks/pacs_hduva_fbopt_alone_aug + +sampling_seed: 0 + +startseed: 0 +endseed: 10 + +test_domains: + - sketch + +domainlab_args: + tpath: examples/tasks/task_pacs_aug.py + dmem: False + lr: 5e-5 + epos: 500 + epos_min: 100 + es: 10 + bs: 16 + san_check: False + npath: examples/nets/resnet50domainbed.py + npath_dom: examples/nets/resnet50domainbed.py + npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py + npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py + zx_dim: 0 + zy_dim: 64 + zd_dim: 64 + + +Shared params: + ini_setpoint_ratio: + min: 0.990 + max: 0.999 + num: 2 + distribution: uniform + + str_diva_multiplier_type: + distribution: categorical + datatype: str + values: + - gammad_recon + - gammad_recon_per_pixel + + coeff_ma_output_state: + distribution: categorical + datatype: float + values: + - 0.1 + - 0.5 + - 0.8 + + mu_clip: + distribution: categorical + datatype: float + values: + - 10 + - 1000 + - 1 + - 100 + + k_i_gain: + min: 0.0001 + max: 0.01 + num: 2 + distribution: uniform + + k_i_gain_ratio: + min: 0.1 + max: 10 + num: 10 + distribution: loguniform + + mu_init: + min: 0.000001 + max: 0.00001 + step: 0.000001 + num: 3 + distribution: loguniform + + gamma_y: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + gamma_d: + min: 1.0 + max: 1e6 + step: 100 + num: 3 + distribution: loguniform + + gamma_reg: + min: 0.01 + max: 10 + distribution: loguniform + num: 3 + + + + + +# Test fbopt with different hyperparameter configurations, no noeed to tune mu_clip since this is the job of KI gain when mu_init is small + +match_duva: + model: matchhduva + epochs_ctr: 10 + shared: + - gamma_y + - gamma_reg diff --git a/examples/benchmark/pacs_jigen_baslines4fbopt.yaml b/examples/benchmark/pacs_jigen_baslines4fbopt.yaml new file mode 100644 index 000000000..8c4d99d3d --- /dev/null +++ b/examples/benchmark/pacs_jigen_baslines4fbopt.yaml @@ -0,0 +1,74 @@ +mode: grid + +output_dir: zoutput/benchmarks/pacs_jigen_fbopt_baselines + +sampling_seed: 0 + +startseed: 0 +endseed: 4 + +test_domains: + - sketch + +domainlab_args: + tpath: examples/tasks/task_pacs_aug_noflip.py + dmem: False + lr: 5e-5 + epos: 500 + epos_min: 100 + es: 1 + bs: 64 + san_check: True + npath: examples/nets/resnet50domainbed.py + npath_dom: examples/nets/resnet50domainbed.py + npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py + npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py + zx_dim: 0 + zy_dim: 64 + zd_dim: 64 + + +Shared params: + k_i_gain: + min: 0.0001 + max: 0.01 + num: 3 + distribution: loguniform + + mu_init: + min: 0.000001 + max: 0.00005 + num: 3 + distribution: loguniform + + pperm: + min: 0.1 + max: 0.9 + num: 3 + distribution: uniform + + gamma_reg: + min: 0.01 + max: 10 + num: 5 + distribution: loguniform + +# Test fbopt with different hyperparameter configurations + + +jigen_feedforward: + model: jigen + trainer: hyperscheduler + shared: + - gamma_reg + - pperm + +jigen_fixed_penalty: + model: jigen + trainer: basic + shared: + - gamma_reg + - pperm + +erm: + model: erm diff --git a/examples/benchmark/pacs_jigen_fbopt_alone.yaml b/examples/benchmark/pacs_jigen_fbopt_alone.yaml new file mode 100644 index 000000000..3107894ed --- /dev/null +++ b/examples/benchmark/pacs_jigen_fbopt_alone.yaml @@ -0,0 +1,85 @@ +mode: grid + +output_dir: zoutput/benchmarks/pacs_jigen_fbopt_alone + +sampling_seed: 0 + +startseed: 0 +endseed: 3 + +test_domains: + - sketch + +domainlab_args: + tpath: examples/tasks/task_pacs_aug_noflip.py + dmem: False + lr: 5e-5 + epos: 500 + epos_min: 200 + es: 1 + bs: 64 + san_check: True + npath: examples/nets/resnet50domainbed.py + npath_dom: examples/nets/resnet50domainbed.py + npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py + npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py + zx_dim: 0 + zy_dim: 64 + zd_dim: 64 + + +Shared params: + k_i_gain: + min: 0.0001 + max: 0.01 + num: 3 + distribution: loguniform + + k_i_gain_ratio: + min: 0.1 + max: 10 + num: 3 + distribution: loguniform + + + + mu_init: + min: 0.000001 + max: 0.00005 + num: 3 + distribution: loguniform + + pperm: + min: 0.1 + max: 0.7 + num: 3 + distribution: uniform + + gamma_reg: + min: 0.01 + max: 10_000 + num: 10 + distribution: loguniform + + coeff_ma_output_state: + distribution: categorical + datatype: float + values: + - 0.1 + - 0.5 + - 0.9 + +# Test fbopt with different hyperparameter configurations + +jigen_feedback: + model: jigen + trainer: fbopt + ini_setpoint_ratio: 0.99 + coeff_ma: 0.5 + mu_init: 1e-6 + shared: + - k_i_gain_ratio + - pperm + +erm: + model: erm diff --git a/examples/benchmark/pacs_jigen_fbopt_alone_autoki.yaml b/examples/benchmark/pacs_jigen_fbopt_alone_autoki.yaml new file mode 100644 index 000000000..3c70d07b6 --- /dev/null +++ b/examples/benchmark/pacs_jigen_fbopt_alone_autoki.yaml @@ -0,0 +1,92 @@ +mode: grid + +output_dir: zoutput/benchmarks/pacs_jigen_fbopt_alone + +sampling_seed: 0 + +startseed: 0 +endseed: 3 + +test_domains: + - sketch + +domainlab_args: + tpath: examples/tasks/task_pacs_path_list.py + dmem: False + lr: 5e-5 + epos: 500 + epos_min: 50 + force_setpoint_change_once: True + es: 1 + bs: 64 + san_check: False + npath: examples/nets/resnet50domainbed.py + npath_dom: examples/nets/resnet50domainbed.py + npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py + npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py + zx_dim: 0 + zy_dim: 64 + zd_dim: 64 + pperm: 0.1 + # pperm correspond to 1-bias_wholeimage in https://github.com/fmcarlucci/JigenDG + + +Shared params: + k_i_gain: + min: 0.0001 + max: 0.01 + num: 3 + distribution: loguniform + + k_i_gain_ratio: + min: 0.1 + max: 1 + num: 5 + distribution: uniform + + mu_init: + min: 0.000001 + max: 0.00005 + num: 3 + distribution: loguniform + + pperm: + min: 0.1 + max: 0.9 + num: 3 + distribution: uniform + + gamma_reg: + min: 0.01 + max: 10_000 + num: 10 + distribution: loguniform + + coeff_ma_setpoint: + distribution: categorical + datatype: float + values: + - 0.0 + - 0.5 + - 0.9 + + coeff_ma_output_state: + distribution: categorical + datatype: float + values: + - 0.0 + - 0.5 + - 0.9 + +# Test fbopt with different hyperparameter configurations + +jigen_feedback: + model: jigen + trainer: fbopt + ini_setpoint_ratio: 0.99 + mu_init: 1e-6 + + shared: + - k_i_gain_ratio + - coeff_ma_output_state + - coeff_ma_setpoint diff --git a/examples/benchmark/pacs_jigen_fbopt_and_baselines.yaml b/examples/benchmark/pacs_jigen_fbopt_and_baselines.yaml new file mode 100644 index 000000000..1421913b3 --- /dev/null +++ b/examples/benchmark/pacs_jigen_fbopt_and_baselines.yaml @@ -0,0 +1,90 @@ +mode: grid + +output_dir: zoutput/benchmarks/benchmark_fbopt_pacs_full + +sampling_seed: 0 + +startseed: 0 +endseed: 5 + +test_domains: + - sketch + +domainlab_args: + tpath: examples/tasks/task_pacs_path_list.py + dmem: False + lr: 5e-5 + epos: 200 + epos_min: 20 + es: 1 + bs: 64 + san_check: False + npath: examples/nets/resnet50domainbed.py + npath_dom: examples/nets/resnet50domainbed.py + npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py + npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py + zx_dim: 0 + zy_dim: 64 + zd_dim: 64 + pperm: 0.5 + + +Shared params: + k_i_gain: + min: 0.0001 + max: 0.01 + num: 2 + distribution: loguniform + + mu_init: + min: 0.000001 + max: 0.00005 + num: 2 + distribution: loguniform + + pperm: + min: 0.1 + max: 0.9 + num: 3 + distribution: uniform + + gamma_reg: + min: 0.01 + max: 10_000 + num: 4 + distribution: loguniform + + mu_clip: + distribution: categorical + datatype: float + values: + - 1 + - 10 + - 100 + - 1000 + +# Test fbopt with different hyperparameter configurations + +jigen_feedback: + model: jigen + trainer: fbopt + ini_setpoint_ratio: 0.99 + mu_init: 0.000001 + shared: + - k_i_gain + - mu_clip + +jigen_feedforward: + model: jigen + trainer: hyperscheduler + shared: + - gamma_reg + +jigen_fixed_penalty: + model: jigen + trainer: basic + shared: + - gamma_reg + +erm: + model: erm diff --git a/examples/benchmark/pacs_jigen_fbopt_and_baselines_aug.yaml b/examples/benchmark/pacs_jigen_fbopt_and_baselines_aug.yaml new file mode 100644 index 000000000..3b0f8dba6 --- /dev/null +++ b/examples/benchmark/pacs_jigen_fbopt_and_baselines_aug.yaml @@ -0,0 +1,105 @@ +mode: grid + +output_dir: zoutput/benchmarks/pacs_aug_jigen + +sampling_seed: 0 + +startseed: 0 +endseed: 3 + +test_domains: + - sketch + +domainlab_args: + tpath: examples/tasks/task_pacs_aug_noflip.py + dmem: False + epos: 500 + epos_min: 200 + es: 1 + bs: 64 + san_check: False + npath: examples/nets/resnet50domainbed.py + npath_dom: examples/nets/resnet50domainbed.py + npath_encoder_x2topic_h: examples/nets/resnet50domainbed.py + npath_encoder_sandwich_x2h4zd: examples/nets/resnet50domainbed.py + zx_dim: 0 + zy_dim: 64 + zd_dim: 64 + pperm: 0.1 + + +Shared params: + lr: + distribution: categorical + values: + - 5e-5 + - 1e-3 + + k_i_gain: + min: 0.0001 + max: 0.01 + num: 2 + distribution: loguniform + + k_i_gain_ratio: + min: 0.1 + max: 1 + num: 4 + distribution: uniform + + mu_init: + min: 0.000001 + max: 0.00005 + num: 2 + distribution: loguniform + + pperm: + min: 0.1 + max: 0.9 + num: 3 + distribution: uniform + + gamma_reg: + min: 0.01 + max: 10 + num: 4 + distribution: loguniform + + mu_clip: + distribution: categorical + datatype: float + values: + - 0.01 + - 0.1 + - 1.0 + - 10 + +# Test fbopt with different hyperparameter configurations + +jigen_feedback: + model: jigen + trainer: fbopt + ini_setpoint_ratio: 0.99 + mu_init: 1e-6 + force_setpoint_change_once: True + shared: + - k_i_gain_ratio + - mu_clip + - lr + +jigen_feedforward: + model: jigen + trainer: hyperscheduler + shared: + - gamma_reg + - lr + +jigen_fixed_penalty: + model: jigen + trainer: basic + shared: + - gamma_reg + - lr + +erm: + model: erm diff --git a/examples/benchmark/test_benchmark_fbopt.yaml b/examples/benchmark/test_benchmark_fbopt.yaml new file mode 100644 index 000000000..87ce24a8d --- /dev/null +++ b/examples/benchmark/test_benchmark_fbopt.yaml @@ -0,0 +1,70 @@ +mode: grid + +output_dir: zoutput/benchmarks/benchmark_fbopt + +num_param_samples: 8 +sampling_seed: 0 +startseed: 0 +endseed: 2 + +test_domains: + - 3 + - 0 + + +domainlab_args: + task: mnistcolor10 + tr_d: [1, 2] + dmem: False + lr: 0.001 + epos: 3 + es: 5 + bs: 64 + nname: conv_bn_pool_2 + san_check: True + + +Shared params: + ini_setpoint_ratio: + min: 0.5 + max: 0.99 + num: 2 + step: 0.001 + distribution: uniform + + k_i_gain: + min: 0.0001 + max: 0.01 + num: 2 + step: 0.0001 + distribution: uniform + + exp_shoulder_clip: + min: 5 + max: 10 + num: 2 + step: 1 + distribution: uniform + + mu_clip: + min: 0.001 + max: 1e4 + num: 2 + step: 10 + distribution: loguniform + + coeff_ma: + min: 0.001 + max: 0.99 + num: 2 + step: 0.001 + distribution: uniform + +# Test fbopt with different hyperparameter configurations + +jigen_fbopt: + model: jigen + trainer: fbopt + + shared: + - ini_setpoint_raio diff --git a/fbopt_mnist_diva_pixel.sh b/fbopt_mnist_diva_pixel.sh new file mode 100644 index 000000000..bac129db9 --- /dev/null +++ b/fbopt_mnist_diva_pixel.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# export CUDA_VISIBLE_DEVICES="" +# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error +# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring +# pytest -s tests/test_fbopt.py + +python main_out.py --te_d=1 --tr_d 0 3 --task=mnistcolor10 --bs=16 --model=diva --trainer=fbopt --nname=conv_bn_pool_2 --epos=2000 --es=2000 --mu_init=0.00001 --gamma_y=1.0 --mu_clip=10 --str_diva_multiplier_type=gammad_recon_per_pixel diff --git a/requirements_notorch.txt b/requirements_notorch.txt new file mode 100644 index 000000000..5aec65167 --- /dev/null +++ b/requirements_notorch.txt @@ -0,0 +1,79 @@ +appdirs==1.4.4 ; python_version >= "3.9" and python_version < "4.0" +attrs==23.1.0 ; python_version >= "3.9" and python_version < "4.0" +beautifulsoup4==4.12.2 ; python_version >= "3.9" and python_version < "4.0" +certifi==2023.7.22 ; python_version >= "3.9" and python_version < "4.0" +charset-normalizer==3.2.0 ; python_version >= "3.9" and python_version < "4.0" +colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0" and platform_system == "Windows" +configargparse==1.7 ; python_version >= "3.9" and python_version < "4.0" +connection-pool==0.0.3 ; python_version >= "3.9" and python_version < "4.0" +contourpy==1.1.0 ; python_version >= "3.9" and python_version < "4.0" +cycler==0.11.0 ; python_version >= "3.9" and python_version < "4.0" +datrie==0.8.2 ; python_version >= "3.9" and python_version < "4.0" +docutils==0.20.1 ; python_version >= "3.9" and python_version < "4.0" +dpath==2.1.6 ; python_version >= "3.9" and python_version < "4.0" +fastjsonschema==2.18.0 ; python_version >= "3.9" and python_version < "4.0" +filelock==3.12.2 ; python_version >= "3.9" and python_version < "4.0" +fonttools==4.42.0 ; python_version >= "3.9" and python_version < "4.0" +gdown==4.7.1 ; python_version >= "3.9" and python_version < "4.0" +gitdb==4.0.10 ; python_version >= "3.9" and python_version < "4.0" +gitpython==3.1.32 ; python_version >= "3.9" and python_version < "4.0" +humanfriendly==10.0 ; python_version >= "3.9" and python_version < "4.0" +idna==3.4 ; python_version >= "3.9" and python_version < "4.0" +importlib-resources==6.0.1 ; python_version >= "3.9" and python_version < "3.10" +jinja2==3.1.2 ; python_version >= "3.9" and python_version < "4.0" +joblib==1.3.2 ; python_version >= "3.9" and python_version < "4.0" +jsonschema-specifications==2023.7.1 ; python_version >= "3.9" and python_version < "4.0" +jsonschema==4.19.0 ; python_version >= "3.9" and python_version < "4.0" +jupyter-core==5.3.1 ; python_version >= "3.9" and python_version < "4.0" +kiwisolver==1.4.4 ; python_version >= "3.9" and python_version < "4.0" +markdown-it-py==3.0.0 ; python_version >= "3.9" and python_version < "4.0" +markupsafe==2.1.3 ; python_version >= "3.9" and python_version < "4.0" +matplotlib==3.7.2 ; python_version >= "3.9" and python_version < "4.0" +mdurl==0.1.2 ; python_version >= "3.9" and python_version < "4.0" +nbformat==5.9.2 ; python_version >= "3.9" and python_version < "4.0" +numpy==1.25.2 ; python_version < "4.0" and python_version >= "3.9" +packaging==23.1 ; python_version >= "3.9" and python_version < "4.0" +pandas==1.5.3 ; python_version >= "3.9" and python_version < "4.0" +pillow==9.5.0 ; python_version >= "3.9" and python_version < "4.0" +plac==1.3.5 ; python_version >= "3.9" and python_version < "4.0" +platformdirs==3.10.0 ; python_version >= "3.9" and python_version < "4.0" +psutil==5.9.5 ; python_version >= "3.9" and python_version < "4.0" +pulp==2.7.0 ; python_version >= "3.9" and python_version < "4.0" +pygments==2.16.1 ; python_version >= "3.9" and python_version < "4.0" +pyparsing==3.0.9 ; python_version >= "3.9" and python_version < "4.0" +pyreadline3==3.4.1 ; sys_platform == "win32" and python_version >= "3.9" and python_version < "4.0" +pysocks==1.7.1 ; python_version >= "3.9" and python_version < "4.0" +python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0" +pytz==2023.3 ; python_version >= "3.9" and python_version < "4.0" +pywin32==306 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.9" and python_version < "4.0" +pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "4.0" +referencing==0.30.2 ; python_version >= "3.9" and python_version < "4.0" +requests==2.31.0 ; python_version >= "3.9" and python_version < "4.0" +requests[socks]==2.31.0 ; python_version >= "3.9" and python_version < "4.0" +reretry==0.11.8 ; python_version >= "3.9" and python_version < "4.0" +rich==13.5.2 ; python_version >= "3.9" and python_version < "4.0" +rpds-py==0.9.2 ; python_version >= "3.9" and python_version < "4.0" +scikit-learn==1.3.0 ; python_version >= "3.9" and python_version < "4.0" +scipy==1.9.3 ; python_version >= "3.9" and python_version < "4.0" +seaborn==0.12.2 ; python_version >= "3.9" and python_version < "4.0" +setuptools-scm==7.1.0 ; python_version >= "3.9" and python_version < "4.0" +setuptools==68.0.0 ; python_version >= "3.9" and python_version < "4.0" +six==1.16.0 ; python_version >= "3.9" and python_version < "4.0" +smart-open==6.3.0 ; python_version >= "3.9" and python_version < "4.0" +smmap==5.0.0 ; python_version >= "3.9" and python_version < "4.0" +snakemake==7.32.4 ; python_version >= "3.9" and python_version < "4.0" +soupsieve==2.4.1 ; python_version >= "3.9" and python_version < "4.0" +stopit==1.1.2 ; python_version >= "3.9" and python_version < "4.0" +tabulate==0.9.0 ; python_version >= "3.9" and python_version < "4.0" +threadpoolctl==3.2.0 ; python_version >= "3.9" and python_version < "4.0" +throttler==1.2.2 ; python_version >= "3.9" and python_version < "4.0" +tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11" +toposort==1.10 ; python_version >= "3.9" and python_version < "4.0" +tqdm==4.66.1 ; python_version >= "3.9" and python_version < "4.0" +tensorboard==2.14.0 ; python_version >= "3.9" and python_version < "4.0" +traitlets==5.9.0 ; python_version >= "3.9" and python_version < "4.0" +typing-extensions==4.7.1 ; python_version >= "3.9" and python_version < "4.0" +urllib3==2.0.4 ; python_version >= "3.9" and python_version < "4.0" +wrapt==1.15.0 ; python_version >= "3.9" and python_version < "4.0" +yte==1.5.1 ; python_version >= "3.9" and python_version < "4.0" +zipp==3.16.2 ; python_version >= "3.9" and python_version < "3.10" diff --git a/run_fbopt_hduva b/run_fbopt_hduva new file mode 100644 index 000000000..c1add075f --- /dev/null +++ b/run_fbopt_hduva @@ -0,0 +1 @@ +python main_out.py --te_d 0 1 2 --tr_d 3 7 --task=mnistcolor10 --bs=8 --model=hduva --trainer=fbopt --nname=conv_bn_pool_2 --gamma_y=7e5 --nname_encoder_x2topic_h=conv_bn_pool_2 --nname_encoder_sandwich_x2h4zd=conv_bn_pool_2 --gamma_y=3 --epos=2 diff --git a/script_generate_all_figures_diva.sh b/script_generate_all_figures_diva.sh new file mode 100755 index 000000000..e7f9617d2 --- /dev/null +++ b/script_generate_all_figures_diva.sh @@ -0,0 +1,76 @@ +#!/bin/bash -x -v + +STR_LOSS_ELL="loss_task/ell" +OUT_DIR="./figures_diva" +# Number of points to plot: +phase_portrait_plot_len=120 + +LOSS_GAMMA_D="$\mathbb{E}_{q_{\phi_d}(z_d|x)}[\log q_{\omega_d}(d|z_d)]$" + + +# README: +# The following scripts will check event files from the 'runs' folder of the working directory. +# To generate example tensorboard 'runs' folder, one could execute e.g. `sh run_fbopt_mnist_diva_autoki.sh` such that there will be 'runs' folder. + +if [ -z "$1" ]; then + # Check if an argument is provided + runs_dir="runs/*" +else + # Use the provided argument + runs_dir=$1 +fi + + +# a command line argument can be passed to this script, in order to skip the first few large jumps on the phase plots; if no argument is provided then all points will be plotted: +if [ -z "$2" ]; then + # Check if an argument is provided + skip_n=0 +else + # Use the provided argument + skip_n=$2 +fi + + + + +# Phase portraits +python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot2="lossrd/dyn_gamma_d" --plot1="loss_task/ell" --legend2="\$R_{\gamma_d}(\cdot)\$" --legend1="\$\ell(\cdot)\$" --plot_len $phase_portrait_plot_len --skip_n_steps $skip_n --output_dir=$OUT_DIR --phase_portrait + +python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot2="lossrd/dyn_mu_recon" --plot1="loss_task/ell" --legend2="\$R_{\mu_{recon}}(\cdot)\$" --legend1="\$\ell(\cdot)\$" --plot_len $phase_portrait_plot_len --skip_n_steps $skip_n --output_dir=$OUT_DIR --phase_portrait + +python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot2="lossrd/dyn_beta_d" --plot1="loss_task/ell" --legend2="\$R_{\beta_d}(\cdot)\$" --legend1="\$\ell(\cdot)\$" --plot_len $phase_portrait_plot_len --skip_n_steps $skip_n --output_dir=$OUT_DIR --phase_portrait + +# python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot2="lossrd/dyn_beta_x" --plot1="loss_task/ell" --legend2="KL (beta_x)" --legend1="ell" --plot_len $phase_portrait_plot_len --skip_n_steps $skip_n --output_dir=$OUT_DIR --phase_portrait + +python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot2="lossrd/dyn_beta_y" --plot1="loss_task/ell" --legend2="\$R_{beta_y}(\cdot)\$" --legend1="\$\ell(\cdot)\$" --plot_len $phase_portrait_plot_len --skip_n_steps $skip_n --output_dir=$OUT_DIR --phase_portrait + + + + +# Plot R and the corresponding set point curves (both in the same figure) +python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="lossrd/dyn_gamma_d" --plot2="lossrs/setpoint_gamma_d" --legend1="\$R_{\gamma_d}\$" --legend2="setpoint" --output_dir=$OUT_DIR + +python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="lossrd/dyn_mu_recon" --plot2="lossrs/setpoint_mu_recon" --legend1="\$R_{\mu_{recon}}(\cdot)\$" --legend2="setpoint" --output_dir=$OUT_DIR + +python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="lossrd/dyn_beta_d" --plot2="lossrs/setpoint_beta_d" --legend1="\$R_{\beta_d}(\cdot)\$" --legend2="setpoint" --output_dir=$OUT_DIR --neg + +# python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="lossrd/dyn_beta_x" --plot2="lossrs/setpoint_beta_x" --legend1="KL (beta_x)" --legend2="setpoint" --output_dir=$OUT_DIR + +python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="lossrd/dyn_beta_y" --plot2="lossrs/setpoint_beta_y" --legend1="\$R_{\beta_y}(\cdot)\$" --legend2="setpoint" --output_dir=$OUT_DIR --neg + + + # One curve per figure + values=('controller_gain/beta_d' 'controller_gain/beta_y' 'controller_gain/beta_x' 'controller_gain/gamma_d' 'controller_gain/mu_recon' 'dyn_mu/beta_d' 'delta/beta_d' 'dyn_mu/beta_y' 'delta/beta_y' 'dyn_mu/beta_x' 'delta/beta_x' 'dyn_mu/gamma_d' 'delta/gamma_d' 'dyn_mu/mu_recon' 'delta/mu_recon' 'loss_task/penalized' 'loss_task/ell' 'acc/te' 'acc/val' 'acc/sel' 'acc/setpoint') + # Loop over the array + for val in "${values[@]}" + do + python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="$val" --legend1="$val" --output_dir=$OUT_DIR + done + + +python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="dyn_mu/mu_recon" --legend1="\$\mu_{recon}\$" --output_dir=$OUT_DIR +python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="dyn_mu/gamma_d" --legend1="\$\gamma_d\$" --output_dir=$OUT_DIR +python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="dyn_mu/beta_y" --legend1="\$\beta_y\$" --output_dir=$OUT_DIR +python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="loss_task/ell" --legend1="\$\ell(\cdot)\$" --output_dir=$OUT_DIR +python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="loss_task/penalized" --legend1="\$\ell(\cdot)+\mu^TR(\cdot)\$" --output_dir=$OUT_DIR +python domainlab/utils/generate_fbopt_phase_portrait.py --runs_dir $runs_dir --plot1="controller_gain/beta_y" --legend1="controller gain for \$\beta_y\$" --output_dir=$OUT_DIR diff --git a/script_jigen_plot.sh b/script_jigen_plot.sh new file mode 100755 index 000000000..5c47a68f8 --- /dev/null +++ b/script_jigen_plot.sh @@ -0,0 +1,4 @@ +python domainlab/utils/generate_fbopt_phase_portrait.py --plot2="lossrd/dyn_alpha" --plot1="loss_task/ell" --legend2="regularization loss jigen" --legend1="classification loss" --output_dir="." --phase_portrait + + +python domainlab/utils/generate_fbopt_phase_portrait.py --plot1="lossrs/setpoint_alpha" --plot2="lossrd/dyn_alpha" --legend2="regularization loss jigen" --legend1="setpoint" --output_dir="." diff --git a/scripts_fbopt/run_erm.sh b/scripts_fbopt/run_erm.sh new file mode 100644 index 000000000..f5285811f --- /dev/null +++ b/scripts_fbopt/run_erm.sh @@ -0,0 +1 @@ +python main_out.py --te_d=1 --tr_d 0 3 --task=mnistcolor10 --bs=16 --model=erm --nname=conv_bn_pool_2 --epos=10 diff --git a/scripts_fbopt/run_fbopt_dann.sh b/scripts_fbopt/run_fbopt_dann.sh new file mode 100644 index 000000000..c75fb071c --- /dev/null +++ b/scripts_fbopt/run_fbopt_dann.sh @@ -0,0 +1 @@ +python main_out.py --te_d=caltech --task=mini_vlcs --bs=16 --model=dann --trainer=fbopt --nname=alexnet --epos=200 --es=200 --no_setpoint_update diff --git a/scripts_fbopt/run_fbopt_diva.sh b/scripts_fbopt/run_fbopt_diva.sh new file mode 100644 index 000000000..dc48bce9b --- /dev/null +++ b/scripts_fbopt/run_fbopt_diva.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# export CUDA_VISIBLE_DEVICES="" +# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error +# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring +# pytest -s tests/test_fbopt.py +python main_out.py --te_d=caltech --task=mini_vlcs --bs=8 --model=diva --trainer=fbopt --nname=alexnet --nname_dom=alexnet --gamma_d=3 --gamma_y=3 --epos=200 diff --git a/scripts_fbopt/run_fbopt_diva_cpu.sh b/scripts_fbopt/run_fbopt_diva_cpu.sh new file mode 100644 index 000000000..59d0c592a --- /dev/null +++ b/scripts_fbopt/run_fbopt_diva_cpu.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export CUDA_VISIBLE_DEVICES="" +# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error +# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring +# pytest -s tests/test_fbopt.py +python main_out.py --te_d=caltech --task=mini_vlcs --bs=8 --model=diva --trainer=fbopt --nname=alexnet --nname_dom=alexnet --gamma_d=3 --gamma_y=3 --epos=200 --es=100 diff --git a/scripts_fbopt/run_fbopt_hduva_cpu.sh b/scripts_fbopt/run_fbopt_hduva_cpu.sh new file mode 100644 index 000000000..54b7d5995 --- /dev/null +++ b/scripts_fbopt/run_fbopt_hduva_cpu.sh @@ -0,0 +1,2 @@ +export CUDA_VISIBLE_DEVICES="" +python main_out.py --te_d 0 1 2 --tr_d 3 7 --task=mnistcolor10 --bs=8 --model=hduva --trainer=fbopt --nname=conv_bn_pool_2 --gamma_y=7e5 --nname_encoder_x2topic_h=conv_bn_pool_2 --nname_encoder_sandwich_x2h4zd=conv_bn_pool_2 --gamma_y=3 --epos=2 diff --git a/scripts_fbopt/run_fbopt_match_diva.sh b/scripts_fbopt/run_fbopt_match_diva.sh new file mode 100644 index 000000000..c1547567c --- /dev/null +++ b/scripts_fbopt/run_fbopt_match_diva.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export CUDA_VISIBLE_DEVICES="" +# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error +# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring +# pytest -s tests/test_fbopt.py +python main_out.py --te_d=caltech --task=mini_vlcs --bs=8 --model=diva --trainer=fbopt_matchdg --nname=alexnet --nname_dom=alexnet --gamma_d=3 --gamma_y=3 --epos=200 --es=100 diff --git a/scripts_fbopt/run_fbopt_mnist.sh b/scripts_fbopt/run_fbopt_mnist.sh new file mode 100644 index 000000000..2e3edc424 --- /dev/null +++ b/scripts_fbopt/run_fbopt_mnist.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# export CUDA_VISIBLE_DEVICES="" +# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error +# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring +# pytest -s tests/test_fbopt.py + +python main_out.py --te_d=0 --tr_d 1 2 --task=mnistcolor10 --bs=16 --model=jigen --trainer=fbopt --nname=conv_bn_pool_2 --epos=2000 --es=0 --mu_init=0.00001 --coeff_ma_setpoint=0.5 --coeff_ma_output_state=0.99 --force_setpoint_change_once diff --git a/scripts_fbopt/run_fbopt_mnist_diva.sh b/scripts_fbopt/run_fbopt_mnist_diva.sh new file mode 100644 index 000000000..fd5c2b8cf --- /dev/null +++ b/scripts_fbopt/run_fbopt_mnist_diva.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# export CUDA_VISIBLE_DEVICES="" +# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error +# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring +# pytest -s tests/test_fbopt.py + +python main_out.py --te_d=0 --tr_d 1 2 --task=mnistcolor10 --bs=16 --model=diva --trainer=fbopt --nname=conv_bn_pool_2 --epos=2000 --es=500 --mu_init=0.000001 --gamma_y=1.0 diff --git a/scripts_fbopt/run_fbopt_mnist_diva_autoki.sh b/scripts_fbopt/run_fbopt_mnist_diva_autoki.sh new file mode 100644 index 000000000..64c19e102 --- /dev/null +++ b/scripts_fbopt/run_fbopt_mnist_diva_autoki.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# export CUDA_VISIBLE_DEVICES="" +# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error +# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring +# pytest -s tests/test_fbopt.py + +python main_out.py --te_d=0 --tr_d 1 2 --task=mnistcolor10 --bs=16 --model=diva --trainer=fbopt --nname=conv_bn_pool_2 --epos=5000 --es=5 --mu_init=1e-6 --gamma_y=1.0 --k_i_gain_ratio=0.9 --coeff_ma_output_state=0 --coeff_ma_setpoint=0 --epos_min=1000 --force_setpoint_change_once diff --git a/scripts_fbopt/run_fbopt_mnist_feedforward.sh b/scripts_fbopt/run_fbopt_mnist_feedforward.sh new file mode 100644 index 000000000..b04819c61 --- /dev/null +++ b/scripts_fbopt/run_fbopt_mnist_feedforward.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# export CUDA_VISIBLE_DEVICES="" +# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error +# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring +# pytest -s tests/test_fbopt.py + +python main_out.py --te_d=0 --tr_d 1 2 --task=mnistcolor10 --bs=16 --model=jigen --trainer=fbopt --nname=conv_bn_pool_2 --epos=2000 --epos_min=100 --es=1 --force_feedforward diff --git a/scripts_fbopt/run_fbopt_mnist_jigen_autoki.sh b/scripts_fbopt/run_fbopt_mnist_jigen_autoki.sh new file mode 100644 index 000000000..8b346e011 --- /dev/null +++ b/scripts_fbopt/run_fbopt_mnist_jigen_autoki.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# export CUDA_VISIBLE_DEVICES="" +# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error +# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring +# pytest -s tests/test_fbopt.py + +python main_out.py --te_d=0 --tr_d 1 2 --task=mnistcolor10 --bs=16 --model=jigen --trainer=fbopt --nname=conv_bn_pool_2 --epos=2000 --es=1 --epos_min=500 --mu_init=1e-6 --coeff_ma_output_state=0.99 --k_i_gain_ratio=0.99 diff --git a/scripts_fbopt/run_fbopt_small_pacs.sh b/scripts_fbopt/run_fbopt_small_pacs.sh new file mode 100644 index 000000000..fc3ab6bc7 --- /dev/null +++ b/scripts_fbopt/run_fbopt_small_pacs.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# export CUDA_VISIBLE_DEVICES="" +# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error +# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring +# pytest -s tests/test_fbopt.py +python main_out.py --te_d=sketch --tpath=examples/tasks/demo_task_path_list_small.py --bs=16 --model=jigen --trainer=fbopt --nname=alexnet --epos=200 --es=100 --init_mu=0.01 diff --git a/scripts_fbopt/run_mnist_jigen.sh b/scripts_fbopt/run_mnist_jigen.sh new file mode 100644 index 000000000..0bc854c5e --- /dev/null +++ b/scripts_fbopt/run_mnist_jigen.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# export CUDA_VISIBLE_DEVICES="" +# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error +# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring +# pytest -s tests/test_fbopt.py + +python main_out.py --te_d=caltech --task=mini_vlcs --bs=16 --model=jigen --trainer=fbopt --nname=alexnet --epos=200 --es=200 --mu_init=1.0 --coeff_ma_output=0 --coeff_ma_setpoint=0 --coeff_ma_output=0 diff --git a/scripts_fbopt/run_pacs_diva_fbopt.sh b/scripts_fbopt/run_pacs_diva_fbopt.sh new file mode 100644 index 000000000..74d1f0cd3 --- /dev/null +++ b/scripts_fbopt/run_pacs_diva_fbopt.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# export CUDA_VISIBLE_DEVICES="" +# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error +# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring +# pytest -s tests/test_fbopt.py +python main_out.py --te_d=sketch --bs=32 --model=diva --trainer=fbopt --epos=200 --es=200 --npath_dom=examples/nets/resnet50domainbed.py --tpath=examples/tasks/task_pacs_path_list.py --npath=examples/nets/resnet50domainbed.py --gamma_y=1.0 --mu_init=1e-6 --lr=5e-5 --zx_dim=0 diff --git a/scripts_fbopt/run_pacs_jigen_fbopt.sh b/scripts_fbopt/run_pacs_jigen_fbopt.sh new file mode 100644 index 000000000..99663ee61 --- /dev/null +++ b/scripts_fbopt/run_pacs_jigen_fbopt.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# export CUDA_VISIBLE_DEVICES="" +# although garbage collector has been explicitly called, sometimes there is still CUDA out of memory error +# so it is better not to use GPU to do the pytest to ensure every time there is no CUDA out of memory error occuring +# pytest -s tests/test_fbopt.py +python main_out.py --te_d=sketch --tpath=examples/tasks/task_pacs_path_list.py --model=jigen --trainer=fbopt --bs=64 --epos=200 --es=200 --npath=examples/nets/resnet50domainbed.py --mu_init=1e-6 --lr=5e-5 --coeff_ma_output_state=0.1 diff --git a/test_fbopt_dial.sh b/test_fbopt_dial.sh new file mode 100644 index 000000000..4bf0c669b --- /dev/null +++ b/test_fbopt_dial.sh @@ -0,0 +1,2 @@ +export CUDA_VISIBLE_DEVICES="" +python main_out.py --te_d=caltech --task=mini_vlcs --bs=16 --model=fboptdial --trainer=dial --nname=alexnet --nname_dom=alexnet --gamma_y=1e6 --gamma_d=1e6 diff --git a/test_match_duva.sh b/test_match_duva.sh new file mode 100644 index 000000000..9f3e9951e --- /dev/null +++ b/test_match_duva.sh @@ -0,0 +1,4 @@ +python main_out.py --te_d 0 1 2 --tr_d 3 7 --task=mnistcolor10 --debug --bs=2 --model=matchhduva \ + --epochs_ctr=3 --epos=6 --nname=conv_bn_pool_2 --gamma_y=7e5 \ + --nname_encoder_x2topic_h=conv_bn_pool_2 \ + --nname_encoder_sandwich_x2h4zd=conv_bn_pool_2 diff --git a/test_match_duva_vlcs.sh b/test_match_duva_vlcs.sh new file mode 100644 index 000000000..a47e76c36 --- /dev/null +++ b/test_match_duva_vlcs.sh @@ -0,0 +1,4 @@ +python main_out.py --te_d=caltech --task=mini_vlcs --debug --bs=2 --model=matchhduva \ + --epochs_ctr=3 --epos=6 --npath=examples/nets/resnet.py --gamma_y=7e5 \ + --npath_encoder_x2topic_h=examples/nets/resnet.py \ + --npath_encoder_sandwich_x2h4zd=examples/nets/resnet.py diff --git a/tests/test_fbopt.py b/tests/test_fbopt.py new file mode 100644 index 000000000..c442bf090 --- /dev/null +++ b/tests/test_fbopt.py @@ -0,0 +1,42 @@ +""" +unit and end-end test for deep all, mldg +""" +from tests.utils_test import utils_test_algo + + +def test_dann_fbopt(): + """ + dann + """ + args = "--te_d=caltech --task=mini_vlcs --debug --bs=2 --model=dann --trainer=fbopt --nname=alexnet --epos=3" + utils_test_algo(args) + + +def test_jigen_fbopt(): + """ + jigen + """ + args = "--te_d=caltech --task=mini_vlcs --debug --bs=2 --model=jigen --trainer=fbopt --nname=alexnet --epos=3" + utils_test_algo(args) + + +def test_diva_fbopt(): + """ + diva + """ + args = "--te_d=caltech --task=mini_vlcs --debug --bs=2 --model=diva --gamma_y=1.0 --trainer=fbopt --nname=alexnet --epos=3" + utils_test_algo(args) + +def test_erm_fbopt(): + """ + erm + """ + args = "--te_d=caltech --task=mini_vlcs --debug --bs=2 --model=erm --trainer=fbopt --nname=alexnet --epos=3" # pylint: disable=line-too-long + utils_test_algo(args) + +def test_forcesetpoint_fbopt(): + """ + diva + """ + args = "--te_d=0 --tr_d 1 2 --task=mnistcolor10 --bs=16 --model=jigen --trainer=fbopt --nname=conv_bn_pool_2 --epos=10 --es=0 --mu_init=0.00001 --coeff_ma_setpoint=0.5 --coeff_ma_output_state=0.99 --force_setpoint_change_once" + utils_test_algo(args) diff --git a/tests/test_fbopt_setpoint_ada.py b/tests/test_fbopt_setpoint_ada.py new file mode 100644 index 000000000..4b8029056 --- /dev/null +++ b/tests/test_fbopt_setpoint_ada.py @@ -0,0 +1,9 @@ +from domainlab.algos.trainers.fbopt_setpoint_ada import is_less_list_all + + +def test_less_than(): + a = [3, 4, -9, -8] + b = [1, 0.5, -1, -0.5] + c = [0.5, 0.25, -0.5, -0.25] + assert not is_less_list_all(a, b) + assert is_less_list_all(c, b) diff --git a/tests/test_fbopt_setpoint_rewind.py b/tests/test_fbopt_setpoint_rewind.py new file mode 100644 index 000000000..3c1011bab --- /dev/null +++ b/tests/test_fbopt_setpoint_rewind.py @@ -0,0 +1,12 @@ +""" +unit and end-end test for deep all, mldg +""" +from tests.utils_test import utils_test_algo + + +def test_jigen_fbopt(): + """ + jigen + """ + args = "--te_d=caltech --task=mini_vlcs --debug --bs=2 --model=jigen --trainer=fbopt --nname=alexnet --epos=300 --setpoint_rewind=yes" + utils_test_algo(args)