From b4210c105a34a2b7f83f5e6a29095f8017318cda Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Thu, 5 Jan 2023 01:49:50 +0530 Subject: [PATCH 01/54] Merge py file changes from benchmark-algs --- src/imitation/algorithms/dagger.py | 62 +++ src/imitation/scripts/analyze.py | 24 +- src/imitation/scripts/config/parallel.py | 406 ++++++++++++++++-- .../scripts/config/train_adversarial.py | 175 +++++++- .../scripts/config/train_imitation.py | 26 ++ .../config/train_preference_comparisons.py | 128 +++++- src/imitation/scripts/config/train_rl.py | 203 ++++++++- src/imitation/scripts/ingredients/reward.py | 5 + src/imitation/scripts/parallel.py | 166 ++++++- src/imitation/scripts/train_adversarial.py | 1 + src/imitation/scripts/train_imitation.py | 4 +- .../scripts/train_preference_comparisons.py | 1 + src/imitation/scripts/train_rl.py | 4 +- tests/algorithms/test_dagger.py | 25 +- tests/scripts/test_scripts.py | 31 +- 15 files changed, 1173 insertions(+), 88 deletions(-) diff --git a/src/imitation/algorithms/dagger.py b/src/imitation/algorithms/dagger.py index a7194a5bf..0034fc4ba 100644 --- a/src/imitation/algorithms/dagger.py +++ b/src/imitation/algorithms/dagger.py @@ -65,6 +65,68 @@ def __call__(self, round_num: int) -> float: assert round_num >= 0 return min(1, max(0, (self.rampdown_rounds - round_num) / self.rampdown_rounds)) + def __repr__(self): + return f"{type(self).__name__}({self.rampdown_rounds!r})" + + +class IndicatorBetaSchedule(BetaSchedule): + """Beta schedule that switches off after a number of rounds.""" + + def __init__(self, rampdown_rounds: int): + """Builds IndicatorBetaSchedule. + + Args: + rampdown_rounds: number of rounds after which beta switches off. + """ + self.rampdown_rounds = rampdown_rounds + + def __call__(self, round_num: int) -> float: + """Computes beta value. + + Args: + round_num: the current round number. + + Returns: + beta as `1` until `self.rampdown_rounds` and then beta as `0`. + """ + assert round_num >= 0 + return 1 if round_num < self.rampdown_rounds else 0 + + def __repr__(self): + return f"{type(self).__name__}({self.rampdown_rounds!r})" + + +class ExponentialBetaSchedule(BetaSchedule): + """Exponentially decaying schedule for beta.""" + + def __init__(self, decay_probability: float): + """Builds ExponentialBetaSchedule. + + Args: + decay_probability: the decay factor for beta. + + Raises: + ValueError: if `decay_probability` not within (0, 1]. + """ + if not (0 < decay_probability <= 1): + raise ValueError("decay_probability lies outside the range (0, 1].") + self.decay_probability = decay_probability + + def __call__(self, round_num: int) -> float: + """Computes beta value. + + Args: + round_num: the current round number. + + Returns: + beta as `self.decay_probability ^ round_num` + """ + assert round_num >= 0 + return self.decay_probability**round_num + + def __repr__(self): + return f"{type(self).__name__}({self.decay_probability!r})" + def reconstruct_trainer( scratch_dir: types.AnyPath, diff --git a/src/imitation/scripts/analyze.py b/src/imitation/scripts/analyze.py index 0586f86d6..54fed52f9 100644 --- a/src/imitation/scripts/analyze.py +++ b/src/imitation/scripts/analyze.py @@ -166,6 +166,8 @@ def _get_algo_name(sd: sacred_util.SacredDicts) -> str: def _return_summaries(sd: sacred_util.SacredDicts) -> dict: imit_stats = get(sd.run, "result.imit_stats") + if imit_stats is None: + imit_stats = get(sd.run, "result.rollout") expert_stats = get(sd.run, "result.expert_stats") expert_return_summary = None @@ -232,7 +234,7 @@ def _return_summaries(sd: sacred_util.SacredDicts) -> dict: # verbosity 2 table_verbosity_mapping.append( table_verbosity_mapping[-1] - | {"status", "imit_expert_ratio", "exp_command", "run_name"}, + | {"status", "imit_expert_ratio", "exp_command", "run_name", "seed", ""}, ) @@ -268,20 +270,26 @@ def analyze_imitation( Returns: The DataFrame generated from the Sacred logs. """ - table_entry_fns_subset = _get_table_entry_fns_subset(table_verbosity) + if table_verbosity == -1: + table_entry_fns_subset = _get_table_entry_fns_subset(0) + else: + table_entry_fns_subset = _get_table_entry_fns_subset(table_verbosity) - rows = [] + df = pd.DataFrame() for sd in _gather_sacred_dicts(): - row = {} + new_df = pd.DataFrame() + if table_verbosity == -1: + new_df = pd.json_normalize(sd.config) + for col_name, make_entry_fn in table_entry_fns_subset.items(): - row[col_name] = make_entry_fn(sd) - rows.append(row) + new_df[col_name] = make_entry_fn(sd) + + df = pd.concat([df, new_df]) - df = pd.DataFrame(rows) if len(df) > 0: df.sort_values(by=["algo", "env_name"], inplace=True) - display_options = dict(index=False) + display_options: Mapping[str, Any] = dict(index=False) if csv_output_path is not None: df.to_csv(csv_output_path, **display_options) print(f"Wrote CSV file to {csv_output_path}") diff --git a/src/imitation/scripts/config/parallel.py b/src/imitation/scripts/config/parallel.py index eb206893f..59295d3d3 100644 --- a/src/imitation/scripts/config/parallel.py +++ b/src/imitation/scripts/config/parallel.py @@ -5,13 +5,15 @@ `@parallel_ex.named_config` to define a new parallel experiment. Adding custom named configs is necessary because the CLI interface can't add -search spaces to the config like `"seed": tune.grid_search([0, 1, 2, 3])`. +search spaces to the config like `"seed": tune.choice([0, 1, 2, 3])`. """ import numpy as np import ray.tune as tune import sacred +from torch import nn +from imitation.algorithms.dagger import ExponentialBetaSchedule, LinearBetaSchedule from imitation.util.util import make_unique_timestamp parallel_ex = sacred.Experiment("parallel") @@ -33,12 +35,39 @@ def config(): local_dir = None # `local_dir` arg for `ray.tune.run` upload_dir = None # `upload_dir` arg for `ray.tune.run` - n_seeds = 3 # Number of seeds to search over by default + # n_seeds_start = 0 + # n_seeds = 1 # Number of seeds to search over by default + experiment_checkpoint_path = "" + eval_best_trial = False + eval_trial_seeds = 5 # Number of seeds to search over by default + num_samples = 1 # Number of samples per grid search configuration + repeat = 3 + env = "seals_half_cheetah" + wandb_name_prefix = "" + + +# @parallel_ex.config +# def seeds(n_seeds_start, n_seeds): +# search_space = { +# "config_updates": { +# "seed": tune.choice( +# list(range(n_seeds_start, n_seeds_start + n_seeds)), +# ) +# } +# } @parallel_ex.config -def seeds(n_seeds): - search_space = {"config_updates": {"seed": tune.grid_search(list(range(n_seeds)))}} +def wandb(run_name): + base_config_updates = { + "common": { + "wandb": { + "wandb_name_prefix": run_name, + "wandb_kwargs": {"project": "algorithm-benchmark"}, + }, + }, + } + # base_named_configs = ["common.wandb_logging"] @parallel_ex.named_config @@ -63,7 +92,7 @@ def generate_test_data(): "config_updates": { "rl": { "rl_kwargs": { - "learning_rate": tune.grid_search( + "learning_rate": tune.choice( [3e-4 * x for x in (1 / 3, 1 / 2)], ), }, @@ -91,8 +120,8 @@ def example_cartpole_rl(): "config_updates": { "rl": { "rl_kwargs": { - "learning_rate": tune.grid_search(np.logspace(3e-6, 1e-1, num=3)), - "nminibatches": tune.grid_search([16, 32, 64]), + "learning_rate": tune.choice(np.logspace(3e-6, 1e-1, num=3)), + "nminibatches": tune.choice([16, 32, 64]), }, }, }, @@ -105,44 +134,367 @@ def example_cartpole_rl(): @parallel_ex.named_config -def example_rl_easy(): +def example_rl(): sacred_ex_name = "train_rl" - run_name = "example-rl-easy" - n_seeds = 2 + run_name = "rl_tuning" + # n_seeds = 2 + base_named_configs = ["common.wandb_logging", "seals_half_cheetah"] + base_config_updates = { + "common": { + "wandb": {"wandb_kwargs": {"project": "algorithm-benchmark"}}, + "num_vec": 1, + }, + } search_space = { - "named_configs": tune.grid_search([[env] for env in EASY_ENVS]), + # "named_configs": tune.choice([[env] for env in EASY_ENVS]), "config_updates": { "rl": { + "batch_size": tune.choice([512, 1024, 2048, 4096, 8192]), "rl_kwargs": { - "learning_rate": tune.grid_search(np.logspace(3e-6, 1e-1, num=3)), - "nminibatches": tune.grid_search([16, 32, 64]), + "learning_rate": tune.loguniform(1e-5, 1e-2), + "batch_size": tune.choice([64, 128, 256, 512]), + "n_epochs": tune.choice([5, 10, 20]), }, }, }, } - resources_per_trial = dict(cpu=4) + num_samples = 100 + eval_best_trial = True + eval_trial_seeds = 5 + repeat = 1 + resources_per_trial = dict(cpu=1) @parallel_ex.named_config -def example_gail_easy(): +def example_bc(): + sacred_ex_name = "train_imitation" + run_name = "bc_tuning_hc" + base_named_configs = ["common.wandb_logging", "seals_half_cheetah"] + base_config_updates = { + # "common": {"wandb": {"wandb_kwargs": {"project": "algorithm-benchmark"}}}, + "common": {"num_vec": 1}, + } + search_space = { + "config_updates": { + "bc_kwargs": dict( + batch_size=tune.choice([8, 16, 32, 64]), + l2_weight=tune.loguniform(1e-6, 1e-2), # L2 regularization weight + optimizer_kwargs=dict( + lr=tune.loguniform(1e-5, 1e-2), + ), + ), + "bc_train_kwargs": dict( + n_epochs=tune.choice([1, 5, 10, 20]), + ), + }, + "command_name": "bc", + } + num_samples = 64 + eval_best_trial = True + eval_trial_seeds = 5 + repeat = 3 + resources_per_trial = dict(cpu=1) + + +@parallel_ex.named_config +def example_dagger(): + sacred_ex_name = "train_imitation" + run_name = "dagger_tuning_hc" + base_named_configs = ["common.wandb_logging", "seals_half_cheetah"] + base_config_updates = { + # "common": {"wandb": {"wandb_kwargs": {"project": "algorithm-benchmark"}}}, + "common": {"num_vec": 1}, + "dagger": {"total_timesteps": 1e5}, + "bc_kwargs": { + "batch_size": 16, + "l2_weight": 1e-4, + "optimizer_kwargs": {"lr": 1e-3}, + }, + } + search_space = { + "config_updates": { + "bc_train_kwargs": dict( + n_epochs=tune.choice([1, 5, 10]), + ), + "dagger": dict( + beta_schedule=tune.choice( + [LinearBetaSchedule(i) for i in [1, 5, 15]] + + [ExponentialBetaSchedule(i) for i in [0.3, 0.5, 0.7]], + ), + rollout_round_min_episodes=tune.choice([3, 5, 10]), + ), + }, + "command_name": "dagger", + } + num_samples = 50 + repeat = 3 + eval_best_trial = True + eval_trial_seeds = 5 + resources_per_trial = dict(cpu=1) + + +@parallel_ex.named_config +def example_gail(): sacred_ex_name = "train_adversarial" - run_name = "example-gail-easy" - n_seeds = 1 + run_name = "gail_tuning_hc" + base_named_configs = ["common.wandb_logging"] + base_config_updates = { + "common": {"num_vec": 1}, + "total_timesteps": 1e7, + } search_space = { - "named_configs": tune.grid_search([[env] for env in EASY_ENVS]), + # "named_configs": tune.choice([[env] for env in MY_ENVS]), "config_updates": { - "init_trainer_kwargs": { - "rl": { - "rl_kwargs": { - "learning_rate": tune.grid_search( - np.logspace(3e-6, 1e-1, num=3), - ), - "nminibatches": tune.grid_search([16, 32, 64]), - }, + "algorithm_kwargs": dict( + demo_batch_size=tune.choice([32, 128, 512, 2048, 8192]), + n_disc_updates_per_round=tune.choice([8, 16]), + # both are same as rl.batch_size + # gen_replay_buffer_capacity=tune.choice([512, 1024]), + # gen_train_timesteps=0, + ), + "rl": { + "batch_size": tune.choice([4096, 8192, 16384]), + "rl_kwargs": { + "ent_coef": tune.loguniform(1e-7, 1e-3), + "learning_rate": tune.loguniform(1e-5, 1e-2), }, }, + "algorithm_specific": {}, }, + "command_name": "gail", + } + num_samples = 100 + eval_best_trial = True + eval_trial_seeds = 5 + repeat = 3 + # experiment_checkpoint_path = f"/home/taufeeque/ray_results/{run_name}" + resources_per_trial = dict(cpu=1) + + +@parallel_ex.named_config +def example_airl(): + sacred_ex_name = "train_adversarial" + run_name = "airl_tuning_hc" + # n_seeds = 1 + base_named_configs = ["common.wandb_logging"] + base_config_updates = { + "common": {"num_vec": 1}, + "total_timesteps": 1e7, } search_space = { - "command_name": "gail", + # "named_configs": tune.choice([[env] for env in MY_ENVS]), + "config_updates": { + "algorithm_kwargs": dict( + demo_batch_size=tune.choice([32, 128, 512, 2048, 8192]), + n_disc_updates_per_round=tune.choice([8, 16]), + # both are same as rl.batch_size + # gen_replay_buffer_capacity=tune.choice([512, 1024]), + # gen_train_timesteps=0, + ), + "rl": { + "batch_size": tune.choice([4096, 8192, 16384]), + "rl_kwargs": { + "ent_coef": tune.loguniform(1e-7, 1e-3), + "learning_rate": tune.loguniform(1e-5, 1e-2), + }, + }, + "algorithm_specific": {}, + }, + "command_name": "airl", + } + num_samples = 100 + eval_best_trial = True + eval_trial_seeds = 5 + repeat = 3 + # experiment_checkpoint_path = f"/home/taufeeque/ray_results/{run_name}" + resources_per_trial = dict(cpu=1) + + +@parallel_ex.named_config +def example_pc(): + sacred_ex_name = "train_preference_comparisons" + run_name = "pc_tuning" + base_named_configs = ["common.wandb_logging", "seals_half_cheetah"] + base_config_updates = { + "common": {"num_vec": 1}, + "total_timesteps": 2e7, + "total_comparisons": 5000, + "query_schedule": "hyperbolic", + "gatherer_kwargs": {"sample": True}, + } + search_space = { + "named_configs": tune.choice( + [ + ["reward.normalize_output_disable"], + # ["reward.normalize_output_running"], + # ["reward.normalize_output_ema"], + ], + ), + "config_updates": { + "train": { + "policy_kwargs": { + "activation_fn": tune.choice( + [ + nn.ReLU, + # nn.Tanh, + ], + ), + }, + }, + "num_iterations": tune.choice([25, 50]), + # "initial_comparison_frac": tune.choice([0.1, 0.25]), + # "reward_trainer_kwargs": { + # "epochs": tune.choice([1, 3, 6]), + # }, + # "query_schedule": tune.choice( + # ["constant", "hyperbolic", "inverse_quadratic"], + # ), + "rl": { + "batch_size": tune.choice([512, 2048, 8192]), + "rl_kwargs": { + "learning_rate": tune.loguniform(1e-5, 1e-2), + "ent_coef": tune.loguniform(1e-7, 1e-3), + }, + }, + }, } + num_samples = 24 + eval_best_trial = True + eval_trial_seeds = 5 + repeat = 3 + resources_per_trial = dict(cpu=1) + + +@parallel_ex.named_config +def debug_eval(): + sacred_ex_name = "train_preference_comparisons" + run_name = "debug_eval" + eval_trial_seeds = 2 + eval_best_trial = True + # base_named_configs = ["seals_half_cheetah"] + base_config_updates = { + "common": {"wandb": {"wandb_kwargs": {"project": "algorithm-benchmark"}}}, + "total_timesteps": 30, + "total_comparisons": 10, + # "query_schedule": "hyperbolic", + "num_iterations": 1, + "fragment_length": 2, + } + search_space = { + # "named_configs": tune.choice([[env] for env in MY_ENVS]), + "config_updates": { + # "num_iterations": tune.choice([5, 20, 50]), + "initial_comparison_frac": tune.choice([0.1, 0.2]), + # "reward_trainer_kwargs": { + # "epochs": tune.choice([1, 2, 3]), + # }, + # "query_schedule": tune.choice( + # ["constant", "hyperbolic", "inverse_quadratic"], + # ), + }, + } + resources_per_trial = dict(cpu=1) + + +@parallel_ex.named_config +def debug_eval_adv(): + sacred_ex_name = "train_adversarial" + run_name = "airl_tuning_debug" + # n_seeds = 5 + base_named_configs = [] + eval_best_trial = True + eval_trial_seeds = 2 + base_config_updates = { + "common": { + "wandb": {"wandb_kwargs": {"project": "algorithm-benchmark"}}, + # "num_env": 1, + }, + "total_timesteps": 2048, + } + search_space = { + # "named_configs": tune.choice([[env] for env in MY_ENVS]), + "config_updates": { + "algorithm_kwargs": dict( + # demo_batch_size=tune.choice([32, 128, 512, 2048, 8192]), + n_disc_updates_per_round=tune.choice([1, 2]), + # both are same as rl.batch_size + # gen_replay_buffer_capacity=tune.choice([512, 1024]), + # gen_train_timesteps=0, + ), + "rl": { + "batch_size": 8, + # "rl_kwargs": { + # "ent_coef": tune.choice([0, 1e-3, 1e-1]), + # "learning_rate": tune.loguniform(1e-5, 5e-3), + # }, + }, + "algorithm_specific": dict(demo_batch_size=1), + }, + "command_name": "airl", + } + num_samples = 2 + repeat = 2 + resources_per_trial = dict(cpu=8) + + +@parallel_ex.named_config +def debug_airl(): + sacred_ex_name = "train_adversarial" + run_name = "airl_debug" + # n_seeds = 1 + base_named_configs = ["common.wandb_logging", "seals_walker"] + base_config_updates = { + "common": {"num_vec": 8}, + "total_timesteps": 1e7, + } + search_space = { + # "named_configs": tune.choice([[env] for env in MY_ENVS]), + "config_updates": { + "train": { + "policy_kwargs": { + "activation_fn": tune.choice( + [ + nn.ReLU, + # nn.Tanh, + ], + ), + }, + }, + "algorithm_kwargs": dict( + demo_batch_size=tune.choice([32]), + n_disc_updates_per_round=tune.choice([10]), + # both are same as rl.batch_size + # gen_replay_buffer_capacity=tune.choice([512, 1024]), + # gen_train_timesteps=0, + ), + "rl": { + "batch_size": tune.choice([10000]), + "rl_kwargs": { + "ent_coef": tune.choice([0.1]), + "learning_rate": tune.choice([1e-4]), + }, + }, + "algorithm_specific": {}, + }, + "command_name": "airl", + } + num_samples = 1 + eval_best_trial = False + # eval_trial_seeds = 5 + repeat = 5 + # experiment_checkpoint_path = f"/home/taufeeque/ray_results/{run_name}" + resources_per_trial = dict(cpu=8) + + +# @parallel_ex.config_hook +# def config_hook(config, command_name, logger): +# """Sets env.""" +# del command_name, logger +# res = {} +# print(config) +# if config["env"]: +# res["base_named_configs"] = tuple( +# config["base_named_configs"] + [config["env"]] +# ) +# print(res) +# return res diff --git a/src/imitation/scripts/config/train_adversarial.py b/src/imitation/scripts/config/train_adversarial.py index aae3baeb0..bd9df6287 100644 --- a/src/imitation/scripts/config/train_adversarial.py +++ b/src/imitation/scripts/config/train_adversarial.py @@ -1,6 +1,7 @@ """Configuration for imitation.scripts.train_adversarial.""" import sacred +from torch import nn from imitation.rewards import reward_nets from imitation.scripts.ingredients import demonstrations, environment, expert @@ -98,9 +99,25 @@ def pendulum(): @train_adversarial_ex.named_config def seals_ant(): - locals().update(**MUJOCO_SHARED_LOCALS) - locals().update(**ANT_SHARED_LOCALS) + # locals().update(**MUJOCO_SHARED_LOCALS) + # locals().update(**ANT_SHARED_LOCALS) environment = dict(gym_id="seals/Ant-v0") + demonstrations = dict(rollout_type="ppo-huggingface") + rl = dict( + batch_size=2048, + rl_kwargs=dict( + batch_size=16, + clip_range=0.3, + ent_coef=3.1441389214159857e-06, + gae_lambda=0.8, + gamma=0.995, + learning_rate=0.00017959211641976886, + max_grad_norm=0.9, + n_epochs=10, + # policy_kwargs are same as the defaults + vf_coef=0.4351450387648799, + ), + ) CHEETAH_SHARED_LOCALS = dict( @@ -139,40 +156,145 @@ def half_cheetah(): @train_adversarial_ex.named_config def seals_half_cheetah(): - locals().update(**CHEETAH_SHARED_LOCALS) + # locals().update(**MUJOCO_SHARED_LOCALS) environment = dict(gym_id="seals/HalfCheetah-v0") + demonstrations = dict(rollout_type="ppo-huggingface") + rl = dict( + batch_size=512, + rl_kwargs=dict( + batch_size=64, + clip_range=0.1, + ent_coef=3.794797423594763e-06, + gae_lambda=0.95, + gamma=0.95, + learning_rate=0.0003286871805949382, + max_grad_norm=0.8, + n_epochs=5, + vf_coef=0.11483689492120866, + ), + ) + # algorithm_specific = dict( + # airl=dict(total_timesteps=int(5e6)), + # gail=dict(total_timesteps=int(8e6)), + # ) + # reward = dict( + # algorithm_specific=dict( + # airl=dict( + # net_cls=reward_nets.BasicShapedRewardNet, + # net_kwargs=dict( + # reward_hid_sizes=(32,), + # potential_hid_sizes=(32,), + # ), + # ), + # ), + # ) + algorithm_kwargs = dict( + # Number of discriminator updates after each round of generator updates + n_disc_updates_per_round=16, + # Equivalent to no replay buffer if batch size is the same + gen_replay_buffer_capacity=512, + demo_batch_size=8192, + ) @train_adversarial_ex.named_config def seals_hopper(): - locals().update(**MUJOCO_SHARED_LOCALS) + # locals().update(**MUJOCO_SHARED_LOCALS) environment = dict(gym_id="seals/Hopper-v0") + demonstrations = dict(rollout_type="ppo-huggingface") + train = dict( + policy_cls="MlpPolicy", + policy_kwargs=dict( + activation_fn=nn.ReLU, + net_arch=[dict(pi=[64, 64], vf=[64, 64])], + ), + ) + rl = dict( + batch_size=2048, + rl_kwargs=dict( + batch_size=512, + clip_range=0.1, + ent_coef=0.0010159833764878474, + gae_lambda=0.98, + gamma=0.995, + learning_rate=0.0003904770450788824, + max_grad_norm=0.9, + n_epochs=20, + vf_coef=0.20315938606555833, + ), + ) @train_adversarial_ex.named_config -def seals_humanoid(): - locals().update(**MUJOCO_SHARED_LOCALS) - environment = dict(gym_id="seals/Humanoid-v0") - total_timesteps = int(4e6) +def seals_swimmer(): + # locals().update(**MUJOCO_SHARED_LOCALS) + environment = dict(gym_id="seals/Swimmer-v0") + total_timesteps = int(2e6) + demonstrations = dict(rollout_type="ppo-huggingface") + train = dict( + policy_cls="MlpPolicy", + policy_kwargs=dict( + activation_fn=nn.ReLU, + net_arch=[dict(pi=[64, 64], vf=[64, 64])], + ), + ) + rl = dict( + batch_size=2048, + rl_kwargs=dict( + batch_size=64, + clip_range=0.1, + ent_coef=5.167107294612664e-08, + gae_lambda=0.95, + gamma=0.999, + learning_rate=0.000414936134792374, + max_grad_norm=2, + n_epochs=5, + # policy_kwargs are same as the defaults + vf_coef=0.6162112311062333, + ), + ) @train_adversarial_ex.named_config -def reacher(): - environment = dict(gym_id="Reacher-v2") - algorithm_kwargs = {"allow_variable_horizon": True} +def seals_walker(): + # locals().update(**MUJOCO_SHARED_LOCALS) + environment = dict(gym_id="seals/Walker2d-v0") + demonstrations = dict(rollout_type="ppo-huggingface") + train = dict( + policy_cls="MlpPolicy", + policy_kwargs=dict( + activation_fn=nn.ReLU, + net_arch=[dict(pi=[64, 64], vf=[64, 64])], + ), + ) + rl = dict( + batch_size=8192, + rl_kwargs=dict( + batch_size=128, + clip_range=0.4, + ent_coef=0.00013057334805552262, + gae_lambda=0.92, + gamma=0.98, + learning_rate=0.000138575372312869, + max_grad_norm=0.6, + n_epochs=20, + # policy_kwargs are same as the defaults + vf_coef=0.6167177795726859, + ), + ) @train_adversarial_ex.named_config -def seals_swimmer(): +def seals_humanoid(): locals().update(**MUJOCO_SHARED_LOCALS) - environment = dict(gym_id="seals/Swimmer-v0") - total_timesteps = int(2e6) + environment = dict(gym_id="seals/Humanoid-v0") + total_timesteps = int(4e6) @train_adversarial_ex.named_config -def seals_walker(): - locals().update(**MUJOCO_SHARED_LOCALS) - environment = dict(gym_id="seals/Walker2d-v0") +def reacher(): + environment = dict(gym_id="Reacher-v2") + algorithm_kwargs = {"allow_variable_horizon": True} # Debug configs @@ -189,3 +311,22 @@ def fast(): demo_batch_size=1, n_disc_updates_per_round=4, ) + + +@train_adversarial_ex.named_config +def debug_nans(): + environment = {"wandb": {"wandb_kwargs": {"project": "algorithm-benchmark"}}} + total_timesteps = 1e7 + algorithm_kwargs = dict( + demo_batch_size=128, + n_disc_updates_per_round=8, + # both are same as rl.batch_size + # gen_replay_buffer_capacity=tune.choice([512, 1024]), + # gen_train_timesteps=0, + ) + rl = { + "batch_size": 4096, + "rl_kwargs": {"ent_coef": 0.1, "learning_rate": 7.316377404994506e-05}, + } + seed = 0 + checkpoint_interval = 1 diff --git a/src/imitation/scripts/config/train_imitation.py b/src/imitation/scripts/config/train_imitation.py index 16da9c694..23e24ec0b 100644 --- a/src/imitation/scripts/config/train_imitation.py +++ b/src/imitation/scripts/config/train_imitation.py @@ -38,6 +38,7 @@ def config(): dagger = dict( use_offline_rollouts=False, # warm-start policy with BC from offline demos total_timesteps=1e5, + rollout_round_min_episodes=None, # use default value ) agent_path = None # Path to load agent from, optional. @@ -81,6 +82,8 @@ def ant(): @train_imitation_ex.named_config def seals_ant(): environment = dict(gym_id="seals/Ant-v0") + demonstrations = dict(rollout_type="ppo-huggingface") + expert = {"policy_type": "ppo-huggingface"} @train_imitation_ex.named_config @@ -95,6 +98,29 @@ def seals_half_cheetah(): environment = dict(gym_id="seals/HalfCheetah-v0") bc_kwargs = dict(l2_weight=0.0) dagger = dict(total_timesteps=60000) + demonstrations = dict(rollout_type="ppo-huggingface") + expert = {"policy_type": "ppo-huggingface"} + + +@train_imitation_ex.named_config +def seals_hopper(): + environment = dict(gym_id="seals/Hopper-v0") + demonstrations = dict(rollout_type="ppo-huggingface") + expert = {"policy_type": "ppo-huggingface"} + + +@train_imitation_ex.named_config +def seals_swimmer(): + environment = dict(gym_id="seals/Swimmer-v0") + demonstrations = dict(rollout_type="ppo-huggingface") + expert = {"policy_type": "ppo-huggingface"} + + +@train_imitation_ex.named_config +def seals_walker(): + environment = dict(gym_id="seals/Walker2d-v0") + demonstrations = dict(rollout_type="ppo-huggingface") + expert = {"policy_type": "ppo-huggingface"} @train_imitation_ex.named_config diff --git a/src/imitation/scripts/config/train_preference_comparisons.py b/src/imitation/scripts/config/train_preference_comparisons.py index cf25f4783..d12869bf0 100644 --- a/src/imitation/scripts/config/train_preference_comparisons.py +++ b/src/imitation/scripts/config/train_preference_comparisons.py @@ -1,6 +1,7 @@ """Configuration for imitation.scripts.train_preference_comparisons.""" import sacred +from torch import nn from imitation.algorithms import preference_comparisons from imitation.scripts.ingredients import environment @@ -72,9 +73,24 @@ def cartpole(): @train_preference_comparisons_ex.named_config def seals_ant(): - locals().update(**MUJOCO_SHARED_LOCALS) - locals().update(**ANT_SHARED_LOCALS) + # locals().update(**MUJOCO_SHARED_LOCALS) + # locals().update(**ANT_SHARED_LOCALS) environment = dict(gym_id="seals/Ant-v0") + rl = dict( + batch_size=2048, + rl_kwargs=dict( + batch_size=16, + clip_range=0.3, + ent_coef=3.1441389214159857e-06, + gae_lambda=0.8, + gamma=0.995, + learning_rate=0.00017959211641976886, + max_grad_norm=0.9, + n_epochs=10, + # policy_kwargs are same as the defaults + vf_coef=0.4351450387648799, + ), + ) @train_preference_comparisons_ex.named_config @@ -84,10 +100,116 @@ def half_cheetah(): rl = dict(batch_size=16384, rl_kwargs=dict(batch_size=1024)) +@train_preference_comparisons_ex.named_config +def seals_half_cheetah(): + # locals().update(**MUJOCO_SHARED_LOCALS) + environment = dict(gym_id="seals/HalfCheetah-v0") + rl = dict( + batch_size=512, + rl_kwargs=dict( + batch_size=64, + clip_range=0.1, + ent_coef=3.794797423594763e-06, + gae_lambda=0.95, + gamma=0.95, + learning_rate=0.0003286871805949382, + max_grad_norm=0.8, + n_epochs=5, + vf_coef=0.11483689492120866, + ), + ) + num_iterations = 50 + total_timesteps = 20000000 + # train = dict( + # policy_cls="MlpPolicy", + # policy_kwargs=dict( + # activation_fn=nn.ReLU, + # # net_arch=[dict(pi=[64, 64], vf=[64, 64])], + # ), + # ) + + @train_preference_comparisons_ex.named_config def seals_hopper(): - locals().update(**MUJOCO_SHARED_LOCALS) + # locals().update(**MUJOCO_SHARED_LOCALS) environment = dict(gym_id="seals/Hopper-v0") + train = dict( + policy_cls="MlpPolicy", + policy_kwargs=dict( + activation_fn=nn.ReLU, + net_arch=[dict(pi=[64, 64], vf=[64, 64])], + ), + ) + rl = dict( + batch_size=2048, + rl_kwargs=dict( + batch_size=512, + clip_range=0.1, + ent_coef=0.0010159833764878474, + gae_lambda=0.98, + gamma=0.995, + learning_rate=0.0003904770450788824, + max_grad_norm=0.9, + n_epochs=20, + vf_coef=0.20315938606555833, + ), + ) + + +@train_preference_comparisons_ex.named_config +def seals_swimmer(): + # locals().update(**MUJOCO_SHARED_LOCALS) + environment = dict(gym_id="seals/Swimmer-v0") + train = dict( + policy_cls="MlpPolicy", + policy_kwargs=dict( + activation_fn=nn.ReLU, + net_arch=[dict(pi=[64, 64], vf=[64, 64])], + ), + ) + rl = dict( + batch_size=2048, + rl_kwargs=dict( + batch_size=64, + clip_range=0.1, + ent_coef=5.167107294612664e-08, + gae_lambda=0.95, + gamma=0.999, + learning_rate=0.000414936134792374, + max_grad_norm=2, + n_epochs=5, + # policy_kwargs are same as the defaults + vf_coef=0.6162112311062333, + ), + ) + + +@train_preference_comparisons_ex.named_config +def seals_walker(): + # locals().update(**MUJOCO_SHARED_LOCALS) + environment = dict(gym_id="seals/Walker2d-v0") + train = dict( + policy_cls="MlpPolicy", + policy_kwargs=dict( + activation_fn=nn.ReLU, + net_arch=[dict(pi=[64, 64], vf=[64, 64])], + ), + ) + rl = dict( + batch_size=8192, + rl_kwargs=dict( + batch_size=128, + clip_range=0.4, + ent_coef=0.00013057334805552262, + gae_lambda=0.92, + gamma=0.98, + learning_rate=0.000138575372312869, + max_grad_norm=0.6, + n_epochs=20, + # policy_kwargs are same as the defaults + vf_coef=0.6167177795726859, + ), + ) @train_preference_comparisons_ex.named_config diff --git a/src/imitation/scripts/config/train_rl.py b/src/imitation/scripts/config/train_rl.py index 6d48f8695..9df2581a6 100644 --- a/src/imitation/scripts/config/train_rl.py +++ b/src/imitation/scripts/config/train_rl.py @@ -1,6 +1,8 @@ """Configuration settings for train_rl, training a policy with RL.""" + import sacred +from torch import nn from imitation.scripts.ingredients import environment from imitation.scripts.ingredients import logging as logging_ingredient @@ -70,8 +72,30 @@ def cartpole(): @train_rl_ex.named_config def seals_cartpole(): - environment = dict(gym_id="seals/CartPole-v0") - total_timesteps = int(1e6) + environment = dict(gym_id="seals/CartPole-v0", num_vec=8) + total_timesteps = int(1e5) + train = dict( + policy_cls="MlpPolicy", + policy_kwargs=dict( + activation_fn=nn.ReLU, + net_arch=[dict(pi=[64, 64], vf=[64, 64])], + ), + ) + normalize_reward = False + rl = dict( + batch_size=4096, + rl_kwargs=dict( + batch_size=256, + clip_range=0.4, + ent_coef=0.008508727919228772, + gae_lambda=0.9, + gamma=0.9999, + learning_rate=0.0012403278189645594, + max_grad_norm=0.8, + n_epochs=10, + vf_coef=0.489343896591493, + ), + ) @train_rl_ex.named_config @@ -80,9 +104,69 @@ def half_cheetah(): total_timesteps = int(5e6) # does OK after 1e6, but continues improving +@train_rl_ex.named_config +def seals_half_cheetah(): + environment = dict( + gym_id="seals/HalfCheetah-v0", + num_vec=1, + ) + + train = dict( + policy_cls="MlpPolicy", + policy_kwargs=dict( + activation_fn=nn.Tanh, + net_arch=[dict(pi=[64, 64], vf=[64, 64])], + ), + ) + # total_timesteps = int(5e6) # does OK after 1e6, but continues improving + total_timesteps = 1e6 + normalize_reward = False + + rl = dict( + batch_size=512, + rl_kwargs=dict( + batch_size=64, + clip_range=0.1, + ent_coef=3.794797423594763e-06, + gae_lambda=0.95, + gamma=0.95, + learning_rate=0.0003286871805949382, + max_grad_norm=0.8, + n_epochs=5, + vf_coef=0.11483689492120866, + ), + ) + + @train_rl_ex.named_config def seals_hopper(): - environment = dict(gym_id="seals/Hopper-v0") + environment = dict(gym_id="seals/Hopper-v0", num_vec=1) + train = dict( + policy_cls="MlpPolicy", + policy_kwargs=dict( + activation_fn=nn.ReLU, + net_arch=[dict(pi=[64, 64], vf=[64, 64])], + ), + ) + + total_timesteps = 1e6 + normalize_reward = False + + rl = dict( + batch_size=2048, + rl_kwargs=dict( + batch_size=512, + clip_range=0.1, + ent_coef=0.0010159833764878474, + gae_lambda=0.98, + gamma=0.995, + learning_rate=0.0003904770450788824, + max_grad_norm=0.9, + n_epochs=20, + # policy_kwargs are same as the defaults + vf_coef=0.20315938606555833, + ), + ) @train_rl_ex.named_config @@ -104,15 +188,34 @@ def seals_mountain_car(): @train_rl_ex.named_config def pendulum(): - environment = dict(gym_id="Pendulum-v1") + environment = dict(gym_id="Pendulum-v1", num_vec=4) + total_timesteps = int(1e5) + + train = dict( + policy_cls="MlpPolicy", + # policy_kwargs=dict( + # activation_fn=nn.Tanh, + # net_arch=[dict(pi=[64, 64], vf=[64, 64])], + # ), + ) + normalize_reward = False + rl = dict( - batch_size=4096, + batch_size=1024 * 4, rl_kwargs=dict( + gae_lambda=0.95, gamma=0.9, + n_epochs=10, + ent_coef=0.0, learning_rate=1e-3, + clip_range=0.2, + use_sde=True, + sde_sample_freq=4, + # batch_size=64, + # max_grad_norm=0.8, + # vf_coef=0.11483689492120866, ), ) - total_timesteps = int(2e5) @train_rl_ex.named_config @@ -122,17 +225,99 @@ def reacher(): @train_rl_ex.named_config def seals_ant(): - environment = dict(gym_id="seals/Ant-v0") + environment = dict( + gym_id="seals/Ant-v0", + num_vec=1, + ) + + train = dict( + policy_cls="MlpPolicy", + policy_kwargs=dict( + activation_fn=nn.Tanh, + net_arch=[dict(pi=[64, 64], vf=[64, 64])], + ), + ) + + total_timesteps = 1e6 + normalize_reward = False + + rl = dict( + batch_size=2048, + rl_kwargs=dict( + batch_size=16, + clip_range=0.3, + ent_coef=3.1441389214159857e-06, + gae_lambda=0.8, + gamma=0.995, + learning_rate=0.00017959211641976886, + max_grad_norm=0.9, + n_epochs=10, + # policy_kwargs are same as the defaults + vf_coef=0.4351450387648799, + ), + ) @train_rl_ex.named_config def seals_swimmer(): - environment = dict(gym_id="seals/Swimmer-v0") + environment = dict(gym_id="seals/Swimmer-v0", num_vec=1) + train = dict( + policy_cls="MlpPolicy", + policy_kwargs=dict( + activation_fn=nn.ReLU, + net_arch=[dict(pi=[64, 64], vf=[64, 64])], + ), + ) + + total_timesteps = 1e6 + normalize_reward = False + + rl = dict( + batch_size=2048, + rl_kwargs=dict( + batch_size=64, + clip_range=0.1, + ent_coef=5.167107294612664e-08, + gae_lambda=0.95, + gamma=0.999, + learning_rate=0.000414936134792374, + max_grad_norm=2, + n_epochs=5, + # policy_kwargs are same as the defaults + vf_coef=0.6162112311062333, + ), + ) @train_rl_ex.named_config def seals_walker(): - environment = dict(gym_id="seals/Walker2d-v0") + environment = dict(gym_id="seals/Walker2d-v0", num_vec=1) + train = dict( + policy_cls="MlpPolicy", + policy_kwargs=dict( + activation_fn=nn.ReLU, + net_arch=[dict(pi=[64, 64], vf=[64, 64])], + ), + ) + + total_timesteps = 1e6 + normalize_reward = False + + rl = dict( + batch_size=8192, + rl_kwargs=dict( + batch_size=128, + clip_range=0.4, + ent_coef=0.00013057334805552262, + gae_lambda=0.92, + gamma=0.98, + learning_rate=0.000138575372312869, + max_grad_norm=0.6, + n_epochs=20, + # policy_kwargs are same as the defaults + vf_coef=0.6167177795726859, + ), + ) # Debug configs diff --git a/src/imitation/scripts/ingredients/reward.py b/src/imitation/scripts/ingredients/reward.py index c40d3751f..a4bd98d1f 100644 --- a/src/imitation/scripts/ingredients/reward.py +++ b/src/imitation/scripts/ingredients/reward.py @@ -46,6 +46,11 @@ def normalize_output_running(): normalize_output_layer = networks.RunningNorm # noqa: F841 +@reward_ingredient.named_config +def normalize_output_ema(): + normalize_output_layer = networks.EMANorm # noqa: F841 + + @reward_ingredient.named_config def reward_ensemble(): net_cls = reward_nets.RewardEnsemble diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py index 6014a08b6..c196954d1 100644 --- a/src/imitation/scripts/parallel.py +++ b/src/imitation/scripts/parallel.py @@ -2,12 +2,18 @@ import collections.abc import copy +import glob import pathlib -from typing import Any, Callable, Dict, Mapping, Optional, Sequence +from typing import Any, Callable, Dict, Mapping, Optional, Sequence, Union +import numpy as np import ray import ray.tune import sacred +from pandas.api.types import is_object_dtype +from ray.tune import search +from ray.tune.registry import register_trainable +from ray.tune.search import optuna from sacred.observers import FileStorageObserver from imitation.scripts.config.parallel import parallel_ex @@ -17,6 +23,7 @@ def parallel( sacred_ex_name: str, run_name: str, + num_samples: int, search_space: Mapping[str, Any], base_named_configs: Sequence[str], base_config_updates: Mapping[str, Any], @@ -24,6 +31,12 @@ def parallel( init_kwargs: Mapping[str, Any], local_dir: Optional[str], upload_dir: Optional[str], + repeat: int = 3, + eval_best_trial: bool = False, + eval_trial_seeds: int = 5, + experiment_checkpoint_path: str = "", + syncer=None, + resume: Union[str, bool] = False, ) -> None: """Parallelize multiple runs of another Sacred Experiment using Ray Tune. @@ -40,6 +53,7 @@ def parallel( under the 'experiment.name' key. This is equivalent to using the Sacred CLI '--name' option on the inner experiment. Offline analysis jobs can use this argument to group similar data. + num_samples: Number of times to sample from the hyperparameter space. search_space: A dictionary which can contain Ray Tune search objects like `ray.tune.grid_search` and `ray.tune.sample_from`, and is passed as the `config` argument to `ray.tune.run()`. After the @@ -62,6 +76,19 @@ def parallel( init_kwargs: Arguments to pass to `ray.init`. local_dir: `local_dir` argument to `ray.tune.run()`. upload_dir: `upload_dir` argument to `ray.tune.run()`. + repeat: Number of runs to repeat each trial for. + eval_best_trial: Whether to evaluate the trial with the best mean return + at the end of tuning on a different set of seeds. + eval_trial_seeds: Number of distinct seeds to evaluate the best trial on. + experiment_checkpoint_path: Path containing the checkpoints of a previous + experiment. ran using this script. Useful for resuming cancelled trials + of the experiments (using `resume`) or evaluating the best trial of the + experiment (using `eval_best_trial`). + resume: If true and `experiment_checkpoint_path` is given, then resumes the + experiment by restarting the trials that did not finish in the experiment + checkpoint path. + syncer: `syncer` argument to `ray.tune.syncer.SyncConfig`. + Raises: TypeError: Named configs not string sequences or config updates not mappings. @@ -73,8 +100,8 @@ def parallel( if not isinstance(base_config_updates, collections.abc.Mapping): raise TypeError("base_config_updates must be a Mapping") - if not isinstance(search_space["named_configs"], collections.abc.Sequence): - raise TypeError('search_space["named_configs"] must be a Sequence') + # if not isinstance(search_space["named_configs"], collections.abc.Sequence): + # raise TypeError('search_space["named_configs"] must be a Sequence') if not isinstance(search_space["config_updates"], collections.abc.Mapping): raise TypeError('search_space["config_updates"] must be a Mapping') @@ -95,15 +122,104 @@ def parallel( ) ray.init(**init_kwargs) + search_alg = optuna.OptunaSearch() + search_alg = search.Repeater(search_alg, repeat=repeat) try: - ray.tune.run( - trainable, - config=search_space, - name=run_name, - local_dir=local_dir, - resources_per_trial=resources_per_trial, - sync_config=ray.tune.syncer.SyncConfig(upload_dir=upload_dir), + if experiment_checkpoint_path: + if resume: + register_trainable("inner", trainable) + runner = ray.tune.execution.trial_runner.TrialRunner( + local_checkpoint_dir=experiment_checkpoint_path, + sync_config=ray.tune.syncer.SyncConfig( + upload_dir=upload_dir, + syncer=syncer, + ), + metric="mean_return", + resume=resume, + ) + print( + "Live trials:", len(runner._live_trials), "/", len(runner._trials) + ) + while not runner.is_finished(): + runner.step() + print("Debug:", runner.debug_string()) + + result = ray.tune.ExperimentAnalysis(experiment_checkpoint_path) + result._load_checkpoints_from_latest( + glob.glob(experiment_checkpoint_path + "/experiment_state*.json"), + ) + result.trials = None + result.fetch_trial_dataframes() + else: + result = ray.tune.run( + trainable, + config=search_space, + num_samples=num_samples * repeat, + name=run_name, + local_dir=local_dir, + resources_per_trial=resources_per_trial, + sync_config=ray.tune.syncer.SyncConfig( + upload_dir=upload_dir, + syncer=syncer, + ), + search_alg=search_alg, + metric="mean_return", + mode="max", + ) + + key = ( + "rollout/" + if sacred_ex_name == "train_preference_comparisons" + else "" + if sacred_ex_name == "train_rl" + else "imit_stats/" ) + key += "monitor_return_mean" + if eval_best_trial: + df = result.results_df + df = df[df["config/named_configs"].notna()] + for col in df.columns: + if is_object_dtype(df[col]): + df[col] = df[col].astype("str") + + grp_keys = [ + c for c in df.columns if c.startswith("config") and "seed" not in c + ] + grps = df.groupby(grp_keys) + print(grps[key]) + df["mean_return"] = grps[key].transform(lambda x: x.mean()) + best_config_df = df[df["mean_return"] == df["mean_return"].max()] + envs_processed = set() + for i, row in best_config_df.iterrows(): + tag = row["experiment_tag"] + trial = [t for t in result.trials if tag in t.experiment_tag][0] + best_config = trial.config + env = tuple(best_config["named_configs"]) + if env in envs_processed: + continue + envs_processed.add(env) + print("Named configs:", env) + print("Mean return:", row["mean_return"]) + print("All returns:", df[df["mean_return"] == row["mean_return"]][key]) + print("Total seeds:", (df["mean_return"] == row["mean_return"]).sum()) + best_config["config_updates"].update( + seed=ray.tune.grid_search(list(range(100, 100 + eval_trial_seeds))), + ) + resources_per_trial = {k: 2 * v for k, v in resources_per_trial.items()} + eval_result = ray.tune.run( + trainable, + config={ + "named_configs": best_config["named_configs"], + "config_updates": best_config["config_updates"], + "command_name": best_config.get("command_name", None), + }, + name=run_name + "_best_hp_eval", + resources_per_trial=resources_per_trial, + ) + returns = eval_result.results_df["mean_return"].to_numpy() + print("Returns:", returns) + print(np.mean(returns), np.std(returns)) + finally: ray.shutdown() @@ -148,7 +264,7 @@ def _ray_tune_sacred_wrapper( `ex.run`) and `reporter`. The function returns the run result. """ - def inner(config: Mapping[str, Any], reporter) -> Mapping[str, Any]: + def inner(config: Dict[str, Any], reporter) -> Mapping[str, Any]: """Trainable function with the correct signature for `ray.tune`. Args: @@ -169,11 +285,17 @@ def inner(config: Mapping[str, Any], reporter) -> Mapping[str, Any]: # Import inside function rather than in module because Sacred experiments # are not picklable, and Ray requires this function to be picklable. from imitation.scripts.train_adversarial import train_adversarial_ex + from imitation.scripts.train_imitation import train_imitation_ex + from imitation.scripts.train_preference_comparisons import ( + train_preference_comparisons_ex, + ) from imitation.scripts.train_rl import train_rl_ex experiments = { "train_rl": train_rl_ex, "train_adversarial": train_adversarial_ex, + "train_imitation": train_imitation_ex, + "train_preference_comparisons": train_preference_comparisons_ex, } ex = experiments[sacred_ex_name] @@ -181,22 +303,28 @@ def inner(config: Mapping[str, Any], reporter) -> Mapping[str, Any]: named_configs = base_named_configs + run_kwargs["named_configs"] updated_run_kwargs["named_configs"] = named_configs - config_updates = {**base_config_updates, **run_kwargs["config_updates"]} + config_updates: Mapping[str, Any] = {} + config_updates.update(base_config_updates) + config_updates.update(run_kwargs["config_updates"]) + if "__trial_index__" in run_kwargs: + config_updates.update(seed=run_kwargs.pop("__trial_index__")) updated_run_kwargs["config_updates"] = config_updates # Add other run_kwargs items to updated_run_kwargs. for k, v in run_kwargs.items(): if k not in updated_run_kwargs: updated_run_kwargs[k] = v - - run = ex.run( - **updated_run_kwargs, - options={"--run": run_name, "--file_storage": "sacred"}, - ) - + run = ex.run(**updated_run_kwargs, options={"--run": run_name}) # Ray Tune has a string formatting error if raylet completes without # any calls to `reporter`. - reporter(done=True) + # reporter(done=True) + # if sacred_ex_name == "train_preference_comparisons": + # #reporter(mean_return=run.result["rollout"]["monitor_return_mean"]) + # #ray.tune.report(mean_return=run.result["rollout"]["monitor_return_mean"]) + # ray.tune.report(mean_return=234) + # else: + # # reporter(mean_return=run.result["imit_stats"]["monitor_return_mean"]) + # ray.tune.report(mean_return=run.result["imit_stats"]["monitor_return_mean"]) assert run.status == "COMPLETED" return run.result diff --git a/src/imitation/scripts/train_adversarial.py b/src/imitation/scripts/train_adversarial.py index 71fc0c2c9..58f7fb4c4 100644 --- a/src/imitation/scripts/train_adversarial.py +++ b/src/imitation/scripts/train_adversarial.py @@ -162,6 +162,7 @@ def callback(round_num: int, /) -> None: return { "imit_stats": imit_stats, "expert_stats": rollout.rollout_stats(expert_trajs), + "mean_return": imit_stats["monitor_return_mean"], } diff --git a/src/imitation/scripts/train_imitation.py b/src/imitation/scripts/train_imitation.py index 2b4946668..c5673fa3e 100644 --- a/src/imitation/scripts/train_imitation.py +++ b/src/imitation/scripts/train_imitation.py @@ -125,10 +125,12 @@ def train_imitation( expert_policy=expert_policy, custom_logger=custom_logger, bc_trainer=bc_trainer, + beta_schedule=dagger["beta_schedule"], rng=_rnd, ) model.train( total_timesteps=int(dagger["total_timesteps"]), + rollout_round_min_episodes=dagger["rollout_round_min_episodes"], bc_train_kwargs=bc_train_kwargs, ) # TODO(adam): add checkpointing to DAgger? @@ -141,7 +143,7 @@ def train_imitation( imit_stats = train.eval_policy(imit_policy, venv) - stats = {"imit_stats": imit_stats} + stats = {"imit_stats": imit_stats, "mean_return": imit_stats["monitor_return_mean"]} trajectories = model._all_demos if use_dagger else expert_trajs assert trajectories is not None if all(isinstance(t, types.TrajectoryWithRew) for t in trajectories): diff --git a/src/imitation/scripts/train_preference_comparisons.py b/src/imitation/scripts/train_preference_comparisons.py index e1aab27ff..1daa306af 100644 --- a/src/imitation/scripts/train_preference_comparisons.py +++ b/src/imitation/scripts/train_preference_comparisons.py @@ -268,6 +268,7 @@ def save_callback(iteration_num): if bool(trajectory_path is None): results = dict(results) results["rollout"] = train.eval_policy(agent, venv) + results["mean_return"] = results["rollout"]["monitor_return_mean"] if save_preferences: main_trainer.dataset.save(log_dir / "preferences.pkl") diff --git a/src/imitation/scripts/train_rl.py b/src/imitation/scripts/train_rl.py index fd345ca62..a88e6096a 100644 --- a/src/imitation/scripts/train_rl.py +++ b/src/imitation/scripts/train_rl.py @@ -157,7 +157,9 @@ def train_rl( serialize.save_stable_model(output_dir, rl_algo) # Final evaluation of expert policy. - return train.eval_policy(rl_algo, venv) + eval_stats = train.eval_policy(rl_algo, venv) + eval_stats["mean_return"] = eval_stats["monitor_return_mean"] + return eval_stats def main_console(): diff --git a/tests/algorithms/test_dagger.py b/tests/algorithms/test_dagger.py index 549e38fd2..6cc42bc78 100644 --- a/tests/algorithms/test_dagger.py +++ b/tests/algorithms/test_dagger.py @@ -33,7 +33,7 @@ def maybe_pendulum_expert_trajectories( return None -def test_beta_schedule(): +def test_linear_beta_schedule(): one_step_sched = dagger.LinearBetaSchedule(1) three_step_sched = dagger.LinearBetaSchedule(3) for i in range(10): @@ -41,6 +41,29 @@ def test_beta_schedule(): assert np.allclose(three_step_sched(i), (3 - i) / 3 if i <= 2 else 0) +def test_indicator_beta_schedule(): + one_step_sched = dagger.IndicatorBetaSchedule(1) + three_step_sched = dagger.IndicatorBetaSchedule(3) + for i in range(10): + assert np.allclose(one_step_sched(i), 1 if i == 0 else 0) + assert np.allclose(three_step_sched(i), 1 if i <= 2 else 0) + + +def test_exponential_beta_schedule(): + constant_sched = dagger.ExponentialBetaSchedule(1) + decay = 0.5 + decaying_sched = dagger.ExponentialBetaSchedule(decay) + for i in range(10): + assert np.allclose(constant_sched(i), 1) + assert np.allclose(decaying_sched(i), decay**i) + + with pytest.raises( + ValueError, + match=r"decay_probability lies outside the range \(0, 1\]\.", + ): + decaying_sched = dagger.ExponentialBetaSchedule(1.1) + + def test_traj_collector_seed(tmpdir, pendulum_venv, rng): collector = dagger.InteractiveTrajectoryCollector( venv=pendulum_venv, diff --git a/tests/scripts/test_scripts.py b/tests/scripts/test_scripts.py index 2196b4af1..0a2766dbb 100644 --- a/tests/scripts/test_scripts.py +++ b/tests/scripts/test_scripts.py @@ -375,7 +375,10 @@ def bc_config(tmpdir, request): policy_type="ppo", loader_kwargs=dict(path=CARTPOLE_TEST_POLICY_PATH / "model.zip"), ), - expert_from_huggingface=dict(policy_type="ppo-huggingface"), + expert_from_huggingface=dict( + policy_type="ppo-huggingface", + loader_kwargs=dict(env_id="seals/CartPole-v0"), + ), random_expert=dict(policy_type="random"), zero_expert=dict(policy_type="zero"), )[request.param] @@ -403,7 +406,10 @@ def test_train_bc_warmstart(tmpdir): config_updates=dict( logging=dict(log_root=tmpdir), demonstrations=dict(rollout_path=CARTPOLE_TEST_ROLLOUT_PATH), - expert=dict(policy_type="ppo-huggingface"), + expert=dict( + policy_type="ppo-huggingface", + loader_kwargs=dict(env_id="seals/CartPole-v0"), + ), ), ) assert run.status == "COMPLETED" @@ -559,6 +565,27 @@ def test_train_adversarial(tmpdir, named_configs, command): _check_train_ex_result(run.result) +def test_train_adversarial_debug(): + """Smoke test for imitation.scripts.train_adversarial.""" + named_configs = ["seals_ant", "debug_nans"] + config_updates = { + "common": dict(log_root="/home/tf/imitation/debug", parallel=False), + "demonstrations": dict( + rollout_path="/home/tf/imitation/download/final.pkl", + ), + # TensorBoard logs to get extra coverage + # "algorithm_kwargs": dict(init_tensorboard=True), + "agent_path": "/home/tf/imitation/download/01124/gen_policy", + } + run = train_adversarial.train_adversarial_ex.run( + command_name="airl", + named_configs=named_configs, + config_updates=config_updates, + ) + assert run.status == "COMPLETED" + _check_train_ex_result(run.result) + + @pytest.mark.parametrize("command", ("airl", "gail")) def test_train_adversarial_warmstart(tmpdir, command): named_configs = ["cartpole"] + ALGO_FAST_CONFIGS["adversarial"] From 97bc063e72e6fc769222351d954f68be28cf761f Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Tue, 10 Jan 2023 15:56:14 +0530 Subject: [PATCH 02/54] Clean parallel script --- src/imitation/scripts/parallel.py | 54 +++++++++++++++++++------------ 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py index c196954d1..da492804e 100644 --- a/src/imitation/scripts/parallel.py +++ b/src/imitation/scripts/parallel.py @@ -27,12 +27,13 @@ def parallel( search_space: Mapping[str, Any], base_named_configs: Sequence[str], base_config_updates: Mapping[str, Any], - resources_per_trial: Mapping[str, Any], + resources_per_trial: Dict[str, Any], init_kwargs: Mapping[str, Any], local_dir: Optional[str], upload_dir: Optional[str], repeat: int = 3, eval_best_trial: bool = False, + eval_best_trial_resource_multiplier: int = 2, eval_trial_seeds: int = 5, experiment_checkpoint_path: str = "", syncer=None, @@ -79,6 +80,8 @@ def parallel( repeat: Number of runs to repeat each trial for. eval_best_trial: Whether to evaluate the trial with the best mean return at the end of tuning on a different set of seeds. + eval_best_trial_resource_multiplier: factor by which to multiply the + number of cpus per trial in `resources_per_trial`. eval_trial_seeds: Number of distinct seeds to evaluate the best trial on. experiment_checkpoint_path: Path containing the checkpoints of a previous experiment. ran using this script. Useful for resuming cancelled trials @@ -122,11 +125,11 @@ def parallel( ) ray.init(**init_kwargs) - search_alg = optuna.OptunaSearch() - search_alg = search.Repeater(search_alg, repeat=repeat) + search_alg = search.Repeater(optuna.OptunaSearch(), repeat=repeat) try: if experiment_checkpoint_path: if resume: + # restart failed runs from experiment_checkpoint_path register_trainable("inner", trainable) runner = ray.tune.execution.trial_runner.TrialRunner( local_checkpoint_dir=experiment_checkpoint_path, @@ -138,16 +141,21 @@ def parallel( resume=resume, ) print( - "Live trials:", len(runner._live_trials), "/", len(runner._trials) + "Live trials:", + len(runner._live_trials), + "/", + len(runner._trials), ) while not runner.is_finished(): runner.step() print("Debug:", runner.debug_string()) + # load experiment analysis results result = ray.tune.ExperimentAnalysis(experiment_checkpoint_path) result._load_checkpoints_from_latest( glob.glob(experiment_checkpoint_path + "/experiment_state*.json"), ) + # update result.trials using all the experiment_state json files result.trials = None result.fetch_trial_dataframes() else: @@ -167,45 +175,50 @@ def parallel( mode="max", ) - key = ( + key_prefix = ( "rollout/" if sacred_ex_name == "train_preference_comparisons" else "" if sacred_ex_name == "train_rl" else "imit_stats/" ) - key += "monitor_return_mean" + key = key_prefix + "monitor_return_mean" if eval_best_trial: df = result.results_df df = df[df["config/named_configs"].notna()] + # convert object dtype to str required by df.groupby for col in df.columns: if is_object_dtype(df[col]): df[col] = df[col].astype("str") - + # group into separate HP configs grp_keys = [ c for c in df.columns if c.startswith("config") and "seed" not in c ] grps = df.groupby(grp_keys) - print(grps[key]) + # store mean return of runs across all seeds in a group df["mean_return"] = grps[key].transform(lambda x: x.mean()) best_config_df = df[df["mean_return"] == df["mean_return"].max()] - envs_processed = set() - for i, row in best_config_df.iterrows(): - tag = row["experiment_tag"] - trial = [t for t in result.trials if tag in t.experiment_tag][0] + row = best_config_df.loc[0] + best_config_tag = row["experiment_tag"] + if result.trials is not None: + trial = [ + t for t in result.trials if best_config_tag in t.experiment_tag + ][0] best_config = trial.config - env = tuple(best_config["named_configs"]) - if env in envs_processed: - continue - envs_processed.add(env) - print("Named configs:", env) print("Mean return:", row["mean_return"]) print("All returns:", df[df["mean_return"] == row["mean_return"]][key]) print("Total seeds:", (df["mean_return"] == row["mean_return"]).sum()) best_config["config_updates"].update( seed=ray.tune.grid_search(list(range(100, 100 + eval_trial_seeds))), ) - resources_per_trial = {k: 2 * v for k, v in resources_per_trial.items()} + # update cpus per trial only if it is provided in `resources_per_trial` + # Uses the default values (cpu=1) if it is not provided + if "cpu" in resources_per_trial: + resources_per_trial["cpu"] *= eval_best_trial_resource_multiplier + best_config["config_updates"].update( + environment=dict(num_vec=resources_per_trial["cpu"]), + ) + eval_result = ray.tune.run( trainable, config={ @@ -219,7 +232,6 @@ def parallel( returns = eval_result.results_df["mean_return"].to_numpy() print("Returns:", returns) print(np.mean(returns), np.std(returns)) - finally: ray.shutdown() @@ -229,7 +241,7 @@ def _ray_tune_sacred_wrapper( run_name: str, base_named_configs: list, base_config_updates: Mapping[str, Any], -) -> Callable[[Mapping[str, Any], Any], Mapping[str, Any]]: +) -> Callable[[Dict[str, Any], Any], Mapping[str, Any]]: """From an Experiment build a wrapped run function suitable for Ray Tune. `ray.tune.run(...)` expects a trainable function that takes a dict @@ -303,7 +315,7 @@ def inner(config: Dict[str, Any], reporter) -> Mapping[str, Any]: named_configs = base_named_configs + run_kwargs["named_configs"] updated_run_kwargs["named_configs"] = named_configs - config_updates: Mapping[str, Any] = {} + config_updates: Dict[str, Any] = {} config_updates.update(base_config_updates) config_updates.update(run_kwargs["config_updates"]) if "__trial_index__" in run_kwargs: From 92912256816e51ce6e4266ac80ed990c6416493d Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Thu, 26 Jan 2023 15:18:04 +0100 Subject: [PATCH 03/54] Undo the changes from #653 to the dagger benchmark config files. This change just made some error messages go away indicating the missing imitation.algorithms.dagger.ExponentialBetaSchedule but it did not fix the root cause. --- benchmarking/example_dagger_seals_ant_best_hp_eval.json | 2 +- .../example_dagger_seals_half_cheetah_best_hp_eval.json | 2 +- benchmarking/example_dagger_seals_hopper_best_hp_eval.json | 2 +- benchmarking/example_dagger_seals_swimmer_best_hp_eval.json | 2 +- benchmarking/example_dagger_seals_walker_best_hp_eval.json | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/benchmarking/example_dagger_seals_ant_best_hp_eval.json b/benchmarking/example_dagger_seals_ant_best_hp_eval.json index 035beab83..38f3f504a 100644 --- a/benchmarking/example_dagger_seals_ant_best_hp_eval.json +++ b/benchmarking/example_dagger_seals_ant_best_hp_eval.json @@ -16,7 +16,7 @@ }, "dagger": { "beta_schedule": { - "py/type": "imitation.algorithms.dagger.LinearBetaSchedule", + "py/object": "imitation.algorithms.dagger.LinearBetaSchedule", "rampdown_rounds": 15 }, "rollout_round_min_episodes": 5, diff --git a/benchmarking/example_dagger_seals_half_cheetah_best_hp_eval.json b/benchmarking/example_dagger_seals_half_cheetah_best_hp_eval.json index 8961f8c26..708c92547 100644 --- a/benchmarking/example_dagger_seals_half_cheetah_best_hp_eval.json +++ b/benchmarking/example_dagger_seals_half_cheetah_best_hp_eval.json @@ -17,7 +17,7 @@ "dagger": { "beta_schedule": { "decay_probability": 0.7, - "py/type": "imitation.algorithms.dagger.ExponentialBetaSchedule" + "py/object": "imitation.algorithms.dagger.ExponentialBetaSchedule" }, "rollout_round_min_episodes": 5, "total_timesteps": 60000, diff --git a/benchmarking/example_dagger_seals_hopper_best_hp_eval.json b/benchmarking/example_dagger_seals_hopper_best_hp_eval.json index fe47291e0..001479ec3 100644 --- a/benchmarking/example_dagger_seals_hopper_best_hp_eval.json +++ b/benchmarking/example_dagger_seals_hopper_best_hp_eval.json @@ -17,7 +17,7 @@ "dagger": { "beta_schedule": { "decay_probability": 0.7, - "py/type": "imitation.algorithms.dagger.ExponentialBetaSchedule" + "py/object": "imitation.algorithms.dagger.ExponentialBetaSchedule" }, "rollout_round_min_episodes": 10, "total_timesteps": 100000, diff --git a/benchmarking/example_dagger_seals_swimmer_best_hp_eval.json b/benchmarking/example_dagger_seals_swimmer_best_hp_eval.json index 2e6cba2c0..df1606fca 100644 --- a/benchmarking/example_dagger_seals_swimmer_best_hp_eval.json +++ b/benchmarking/example_dagger_seals_swimmer_best_hp_eval.json @@ -16,7 +16,7 @@ }, "dagger": { "beta_schedule": { - "py/type": "imitation.algorithms.dagger.LinearBetaSchedule", + "py/object": "imitation.algorithms.dagger.LinearBetaSchedule", "rampdown_rounds": 15 }, "rollout_round_min_episodes": 3, diff --git a/benchmarking/example_dagger_seals_walker_best_hp_eval.json b/benchmarking/example_dagger_seals_walker_best_hp_eval.json index e4569321f..ce6baff1c 100644 --- a/benchmarking/example_dagger_seals_walker_best_hp_eval.json +++ b/benchmarking/example_dagger_seals_walker_best_hp_eval.json @@ -17,7 +17,7 @@ "dagger": { "beta_schedule": { "decay_probability": 0.7, - "py/type": "imitation.algorithms.dagger.ExponentialBetaSchedule" + "py/object": "imitation.algorithms.dagger.ExponentialBetaSchedule" }, "rollout_round_min_episodes": 5, "total_timesteps": 100000, From 276d863f488512067c38408ecf1386e8199abf50 Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Wed, 25 Jan 2023 17:08:27 +0100 Subject: [PATCH 04/54] Improve readability and interpretability of benchmarking tests. --- tests/test_benchmarking.py | 51 ++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/tests/test_benchmarking.py b/tests/test_benchmarking.py index 5c42063c6..67b9eb489 100644 --- a/tests/test_benchmarking.py +++ b/tests/test_benchmarking.py @@ -1,6 +1,4 @@ """Tests for config files in benchmarking/ folder.""" -import glob -import os import pathlib import pytest @@ -10,24 +8,39 @@ THIS_DIR = pathlib.Path(__file__).absolute().parent BENCHMARKING_DIR = THIS_DIR.parent / "benchmarking" +ALGORITHMS = ["bc", "dagger", "airl", "gail"] +ENVIRONMENTS = [ + "seals_walker", + "seals_ant", + "seals_half_cheetah", + "seals_hopper", + "seals_swimmer", +] -@pytest.mark.parametrize( - "command_name", - ["bc", "dagger", "airl", "gail"], -) -def test_benchmarking_configs(tmpdir, command_name): + +@pytest.mark.parametrize("environment", ENVIRONMENTS) +@pytest.mark.parametrize("algorithm", ALGORITHMS) +def test_benchmarks_print_config_succeeds(algorithm: str, environment: str): # We test the configs using the print_config command, # because running the configs requires MuJoCo. # Requiring MuJoCo to run the tests adds too much complexity. - if command_name in ("bc", "dagger"): - ex = train_imitation.train_imitation_ex - elif command_name in ("airl", "gail"): - ex = train_adversarial.train_adversarial_ex - cfg_pattern = os.path.join(BENCHMARKING_DIR, f"example_{command_name}_*.json") - cfg_files = glob.glob(cfg_pattern) - assert len(cfg_files) == 5, "There should be 1 config file for each of environment." - for i, cfg_file in enumerate(cfg_files): - cfg_name = f"{tmpdir.basename}_{i}" - ex.add_named_config(cfg_name, cfg_file) - run = ex.run(command_name="print_config", named_configs=[cfg_name]) - assert run.status == "COMPLETED" + + # GIVEN + if algorithm in ("bc", "dagger"): + experiment = train_imitation.train_imitation_ex + elif algorithm in ("airl", "gail"): + experiment = train_adversarial.train_adversarial_ex + else: + raise ValueError(f"Unknown algorithm: {algorithm}") + + config_name = f"{algorithm}_{environment}" + config_file = str( + BENCHMARKING_DIR / f"example_{algorithm}_{environment}_best_hp_eval.json", + ) + + # WHEN + experiment.add_named_config(config_name, config_file) + run = experiment.run(command_name="print_config", named_configs=[config_name]) + + # THEN + assert run.status == "COMPLETED" From 37eb914cba0aaa416543b763b6f2246eae8f9fa7 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Wed, 1 Mar 2023 21:48:13 +0530 Subject: [PATCH 05/54] Add pxponential beta scheduler for dagger --- src/imitation/algorithms/dagger.py | 29 +++++++++++++++++++ .../scripts/config/train_imitation.py | 1 + src/imitation/scripts/train_imitation.py | 1 + 3 files changed, 31 insertions(+) diff --git a/src/imitation/algorithms/dagger.py b/src/imitation/algorithms/dagger.py index d43ca5eec..34d8cef7e 100644 --- a/src/imitation/algorithms/dagger.py +++ b/src/imitation/algorithms/dagger.py @@ -66,6 +66,35 @@ def __call__(self, round_num: int) -> float: return min(1, max(0, (self.rampdown_rounds - round_num) / self.rampdown_rounds)) +class ExponentialBetaSchedule(BetaSchedule): + """Exponentially decaying schedule for beta.""" + + def __init__(self, decay_probability: float): + """Builds ExponentialBetaSchedule. + + Args: + decay_probability: the decay factor for beta. + + Raises: + ValueError: if `decay_probability` not within (0, 1]. + """ + if not (0 < decay_probability <= 1): + raise ValueError("decay_probability lies outside the range (0, 1].") + self.decay_probability = decay_probability + + def __call__(self, round_num: int) -> float: + """Computes beta value. + + Args: + round_num: the current round number. + + Returns: + beta as `self.decay_probability ^ round_num` + """ + assert round_num >= 0 + return self.decay_probability**round_num + + def reconstruct_trainer( scratch_dir: types.AnyPath, venv: vec_env.VecEnv, diff --git a/src/imitation/scripts/config/train_imitation.py b/src/imitation/scripts/config/train_imitation.py index 16da9c694..2ef2eed44 100644 --- a/src/imitation/scripts/config/train_imitation.py +++ b/src/imitation/scripts/config/train_imitation.py @@ -38,6 +38,7 @@ def config(): dagger = dict( use_offline_rollouts=False, # warm-start policy with BC from offline demos total_timesteps=1e5, + beta_schedule=None, ) agent_path = None # Path to load agent from, optional. diff --git a/src/imitation/scripts/train_imitation.py b/src/imitation/scripts/train_imitation.py index 2b4946668..f8cc992fd 100644 --- a/src/imitation/scripts/train_imitation.py +++ b/src/imitation/scripts/train_imitation.py @@ -125,6 +125,7 @@ def train_imitation( expert_policy=expert_policy, custom_logger=custom_logger, bc_trainer=bc_trainer, + beta_schedule=dagger["beta_schedule"], rng=_rnd, ) model.train( From 877383b03d7d3260746997f3cab7b5272125b07b Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Thu, 2 Feb 2023 13:00:06 +0100 Subject: [PATCH 06/54] Ignore coverage for unknown algorithms. --- tests/test_benchmarking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_benchmarking.py b/tests/test_benchmarking.py index 67b9eb489..ba01b38a2 100644 --- a/tests/test_benchmarking.py +++ b/tests/test_benchmarking.py @@ -31,7 +31,7 @@ def test_benchmarks_print_config_succeeds(algorithm: str, environment: str): elif algorithm in ("airl", "gail"): experiment = train_adversarial.train_adversarial_ex else: - raise ValueError(f"Unknown algorithm: {algorithm}") + raise ValueError(f"Unknown algorithm: {algorithm}") # pragma: no cover config_name = f"{algorithm}_{environment}" config_file = str( From c8e55cb1efee3913bf306c23f6a5c361674d7380 Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Thu, 2 Feb 2023 13:04:02 +0100 Subject: [PATCH 07/54] Cleanup and extend tests for beta schedules in dagger. --- tests/algorithms/test_dagger.py | 39 ++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/tests/algorithms/test_dagger.py b/tests/algorithms/test_dagger.py index 525fc449a..6e5582810 100644 --- a/tests/algorithms/test_dagger.py +++ b/tests/algorithms/test_dagger.py @@ -33,12 +33,39 @@ def maybe_pendulum_expert_trajectories( return None -def test_beta_schedule(): - one_step_sched = dagger.LinearBetaSchedule(1) - three_step_sched = dagger.LinearBetaSchedule(3) - for i in range(10): - assert np.allclose(one_step_sched(i), 1 if i == 0 else 0) - assert np.allclose(three_step_sched(i), (3 - i) / 3 if i <= 2 else 0) +@pytest.mark.parametrize("num_rampdown_rounds", [1, 2, 3, 10]) +def test_linear_beta_schedule(num_rampdown_rounds): + # GIVEN + sched = dagger.LinearBetaSchedule(num_rampdown_rounds) + idx_after_rampdown = num_rampdown_rounds + 1 + + # WHEN + betas = [sched(i) for i in range(num_rampdown_rounds + 10)] + + # THEN + assert np.allclose( + betas[:idx_after_rampdown], + np.linspace(1, 0, idx_after_rampdown), + ) + assert np.allclose(betas[idx_after_rampdown:], 0) + + +@pytest.mark.parametrize("decay_probability", [0.1, 0.5, 0.9, 1]) +def test_exponential_beta_schedule(decay_probability): + # GIVEN + sched = dagger.ExponentialBetaSchedule(decay_probability) + + # WHEN + betas = [sched(i) for i in range(10)] + + # THEN + assert np.allclose(betas, decay_probability ** np.arange(10)) + + +@pytest.mark.parametrize("decay_probability", [-0.1, 0, 1.1, 2]) +def test_forbidden_decay_probability_on_exp_beta_schedule(decay_probability): + with pytest.raises(ValueError): + dagger.ExponentialBetaSchedule(decay_probability) def test_traj_collector_seed(tmpdir, pendulum_venv, rng): From d81eb68d2359ebb1927f6ebb2ba573f0c7e5745a Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Thu, 9 Feb 2023 02:02:21 +0530 Subject: [PATCH 08/54] Add optuna to dependencies --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 557015d91..867c1b775 100644 --- a/setup.py +++ b/setup.py @@ -210,6 +210,7 @@ def get_local_version(version: "ScmVersion", time_format="%Y%m%d") -> str: "chai-sacred>=0.8.3", "tensorboard>=1.14", "huggingface_sb3>=2.2.1", + "optuna>=3.0.1", ], tests_require=TESTS_REQUIRE, extras_require={ From 27467d38268a2217731f019dc0202ce3a520cf2a Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Thu, 9 Feb 2023 02:22:24 +0530 Subject: [PATCH 09/54] Fix test case --- tests/scripts/test_scripts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scripts/test_scripts.py b/tests/scripts/test_scripts.py index 78bbca9bd..ad559d2d9 100644 --- a/tests/scripts/test_scripts.py +++ b/tests/scripts/test_scripts.py @@ -910,7 +910,7 @@ def test_parallel_train_adversarial_custom_env(tmpdir): logging=dict(log_root=tmpdir), demonstrations=dict(rollout_path=rollout_path), ), - search_space=dict(command_name="gail"), + search_space=dict(command_name=tune.choice(["gail"])), ) config_updates.update(PARALLEL_CONFIG_LOW_RESOURCE) run = parallel.parallel_ex.run(config_updates=config_updates) From 1a3b6b81f70cdfc515dc41a264ae1e81347ac588 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Thu, 9 Feb 2023 12:04:03 +0530 Subject: [PATCH 10/54] Clean up the scripts --- src/imitation/scripts/analyze.py | 12 +- src/imitation/scripts/config/parallel.py | 219 ++---------------- .../scripts/config/train_adversarial.py | 40 +--- src/imitation/scripts/parallel.py | 39 ++-- 4 files changed, 48 insertions(+), 262 deletions(-) diff --git a/src/imitation/scripts/analyze.py b/src/imitation/scripts/analyze.py index a7b52af36..b7b990800 100644 --- a/src/imitation/scripts/analyze.py +++ b/src/imitation/scripts/analyze.py @@ -167,6 +167,7 @@ def _get_algo_name(sd: sacred_util.SacredDicts) -> str: def _return_summaries(sd: sacred_util.SacredDicts) -> dict: imit_stats = get(sd.run, "result.imit_stats") if imit_stats is None: + # stored in rollout key for preference comparison imit_stats = get(sd.run, "result.rollout") expert_stats = get(sd.run, "result.expert_stats") @@ -234,7 +235,7 @@ def _return_summaries(sd: sacred_util.SacredDicts) -> dict: # verbosity 2 table_verbosity_mapping.append( table_verbosity_mapping[-1] - | {"status", "imit_expert_ratio", "exp_command", "run_name", "seed", ""}, + | {"status", "imit_expert_ratio", "exp_command", "run_name"}, ) @@ -264,14 +265,14 @@ def analyze_imitation( csv_output_path: If provided, then save a CSV output file to this path. tex_output_path: If provided, then save a LaTeX-format table to this path. print_table: If True, then print the dataframe to stdout. - table_verbosity: Increasing levels of verbosity, from 0 to 2, increase the - number of columns in the table. + table_verbosity: Increasing levels of verbosity, from 0 to 3, increase the + number of columns in the table. Level 3 prints all of the columns available. Returns: The DataFrame generated from the Sacred logs. """ - if table_verbosity == -1: - table_entry_fns_subset = _get_table_entry_fns_subset(0) + if table_verbosity == 3: + table_entry_fns_subset = _get_table_entry_fns_subset(2) else: table_entry_fns_subset = _get_table_entry_fns_subset(table_verbosity) @@ -279,6 +280,7 @@ def analyze_imitation( for sd in _gather_sacred_dicts(): new_df = pd.DataFrame() if table_verbosity == -1: + # gets all config columns new_df = pd.json_normalize(sd.config) else: new_df = new_df.append({}, ignore_index=True) diff --git a/src/imitation/scripts/config/parallel.py b/src/imitation/scripts/config/parallel.py index 0525641e3..697c5d862 100644 --- a/src/imitation/scripts/config/parallel.py +++ b/src/imitation/scripts/config/parallel.py @@ -6,6 +6,11 @@ Adding custom named configs is necessary because the CLI interface can't add search spaces to the config like `"seed": tune.choice([0, 1, 2, 3])`. + +For tuning hyperparameters of an algorithm on a given environment, override +the `base_named_configs` argument with the named config of the environment. +Ex: python -m imitation.scripts.parallel with example_gail \ + 'base_named_configs=["logging.wandb_logging", "seals_half_cheetah"]' """ import numpy as np @@ -13,7 +18,7 @@ import sacred from torch import nn -from imitation.algorithms.dagger import ExponentialBetaSchedule, LinearBetaSchedule +from imitation.algorithms import dagger from imitation.util.util import make_unique_timestamp parallel_ex = sacred.Experiment("parallel") @@ -35,44 +40,11 @@ def config(): local_dir = None # `local_dir` arg for `ray.tune.run` upload_dir = None # `upload_dir` arg for `ray.tune.run` - # n_seeds_start = 0 - # n_seeds = 1 # Number of seeds to search over by default experiment_checkpoint_path = "" eval_best_trial = False eval_trial_seeds = 5 # Number of seeds to search over by default num_samples = 1 # Number of samples per grid search configuration - repeat = 3 - env = "seals_half_cheetah" - wandb_name_prefix = "" - - -# @parallel_ex.config -# def seeds(n_seeds_start, n_seeds): -# search_space = { -# "config_updates": { -# "seed": tune.choice( -# list(range(n_seeds_start, n_seeds_start + n_seeds)), -# ) -# } -# } - - -# @parallel_ex.config -# def wandb(run_name): -# base_config_updates = { -# "logging": { -# "wandb": { -# "wandb_name_prefix": run_name, -# "wandb_kwargs": {"project": "algorithm-benchmark"}, -# }, -# }, -# } -# base_named_configs = ["logging.wandb_logging"] - - -@parallel_ex.named_config -def s3(): - upload_dir = "s3://shwang-chai/private" + repeat = 1 # Debug named configs @@ -137,11 +109,9 @@ def example_cartpole_rl(): def example_rl(): sacred_ex_name = "train_rl" run_name = "rl_tuning" - # n_seeds = 2 - base_named_configs = ["logging.wandb_logging", "seals_half_cheetah"] + base_named_configs = ["logging.wandb_logging"] base_config_updates = {"environment": {"num_vec": 1}} search_space = { - # "named_configs": tune.choice([[env] for env in EASY_ENVS]), "config_updates": { "rl": { "batch_size": tune.choice([512, 1024, 2048, 4096, 8192]), @@ -163,8 +133,8 @@ def example_rl(): @parallel_ex.named_config def example_bc(): sacred_ex_name = "train_imitation" - run_name = "bc_tuning_hc" - base_named_configs = ["logging.wandb_logging", "seals_half_cheetah"] + run_name = "bc_tuning" + base_named_configs = ["logging.wandb_logging"] base_config_updates = {"environment": {"num_vec": 1}} search_space = { "config_updates": { @@ -191,8 +161,8 @@ def example_bc(): @parallel_ex.named_config def example_dagger(): sacred_ex_name = "train_imitation" - run_name = "dagger_tuning_hc" - base_named_configs = ["logging.wandb_logging", "seals_half_cheetah"] + run_name = "dagger_tuning" + base_named_configs = ["logging.wandb_logging"] base_config_updates = { "environment": {"num_vec": 1}, "dagger": {"total_timesteps": 1e5}, @@ -209,8 +179,8 @@ def example_dagger(): ), "dagger": dict( beta_schedule=tune.choice( - [LinearBetaSchedule(i) for i in [1, 5, 15]] - + [ExponentialBetaSchedule(i) for i in [0.3, 0.5, 0.7]], + [dagger.LinearBetaSchedule(i) for i in [1, 5, 15]] + + [dagger.ExponentialBetaSchedule(i) for i in [0.3, 0.5, 0.7]], ), rollout_round_min_episodes=tune.choice([3, 5, 10]), ), @@ -234,14 +204,10 @@ def example_gail(): "total_timesteps": 1e7, } search_space = { - # "named_configs": tune.choice([[env] for env in MY_ENVS]), "config_updates": { "algorithm_kwargs": dict( demo_batch_size=tune.choice([32, 128, 512, 2048, 8192]), n_disc_updates_per_round=tune.choice([8, 16]), - # both are same as rl.batch_size - # gen_replay_buffer_capacity=tune.choice([512, 1024]), - # gen_train_timesteps=0, ), "rl": { "batch_size": tune.choice([4096, 8192, 16384]), @@ -258,29 +224,23 @@ def example_gail(): eval_best_trial = True eval_trial_seeds = 5 repeat = 3 - # experiment_checkpoint_path = f"/home/taufeeque/ray_results/{run_name}" resources_per_trial = dict(cpu=1) @parallel_ex.named_config def example_airl(): sacred_ex_name = "train_adversarial" - run_name = "airl_tuning_hc" - # n_seeds = 1 + run_name = "airl_tuning" base_named_configs = ["logging.wandb_logging"] base_config_updates = { "environment": {"num_vec": 1}, "total_timesteps": 1e7, } search_space = { - # "named_configs": tune.choice([[env] for env in MY_ENVS]), "config_updates": { "algorithm_kwargs": dict( demo_batch_size=tune.choice([32, 128, 512, 2048, 8192]), n_disc_updates_per_round=tune.choice([8, 16]), - # both are same as rl.batch_size - # gen_replay_buffer_capacity=tune.choice([512, 1024]), - # gen_train_timesteps=0, ), "rl": { "batch_size": tune.choice([4096, 8192, 16384]), @@ -297,7 +257,6 @@ def example_airl(): eval_best_trial = True eval_trial_seeds = 5 repeat = 3 - # experiment_checkpoint_path = f"/home/taufeeque/ray_results/{run_name}" resources_per_trial = dict(cpu=1) @@ -305,7 +264,7 @@ def example_airl(): def example_pc(): sacred_ex_name = "train_preference_comparisons" run_name = "pc_tuning" - base_named_configs = ["logging.wandb_logging", "seals_half_cheetah"] + base_named_configs = ["logging.wandb_logging"] base_config_updates = { "environment": {"num_vec": 1}, "total_timesteps": 2e7, @@ -317,8 +276,6 @@ def example_pc(): "named_configs": tune.choice( [ ["reward.normalize_output_disable"], - # ["reward.normalize_output_running"], - # ["reward.normalize_output_ema"], ], ), "config_updates": { @@ -327,19 +284,15 @@ def example_pc(): "activation_fn": tune.choice( [ nn.ReLU, - # nn.Tanh, ], ), }, }, "num_iterations": tune.choice([25, 50]), - # "initial_comparison_frac": tune.choice([0.1, 0.25]), - # "reward_trainer_kwargs": { - # "epochs": tune.choice([1, 3, 6]), - # }, - # "query_schedule": tune.choice( - # ["constant", "hyperbolic", "inverse_quadratic"], - # ), + "initial_comparison_frac": tune.choice([0.1, 0.25]), + "reward_trainer_kwargs": { + "epochs": tune.choice([1, 3, 6]), + }, "rl": { "batch_size": tune.choice([512, 2048, 8192]), "rl_kwargs": { @@ -349,138 +302,8 @@ def example_pc(): }, }, } - num_samples = 24 + num_samples = 100 eval_best_trial = True eval_trial_seeds = 5 repeat = 3 resources_per_trial = dict(cpu=1) - - -@parallel_ex.named_config -def debug_eval(): - sacred_ex_name = "train_preference_comparisons" - run_name = "debug_eval" - eval_trial_seeds = 2 - eval_best_trial = True - # base_named_configs = ["seals_half_cheetah"] - base_config_updates = { - "total_timesteps": 30, - "total_comparisons": 10, - # "query_schedule": "hyperbolic", - "num_iterations": 1, - "fragment_length": 2, - } - search_space = { - # "named_configs": tune.choice([[env] for env in MY_ENVS]), - "config_updates": { - # "num_iterations": tune.choice([5, 20, 50]), - "initial_comparison_frac": tune.choice([0.1, 0.2]), - # "reward_trainer_kwargs": { - # "epochs": tune.choice([1, 2, 3]), - # }, - # "query_schedule": tune.choice( - # ["constant", "hyperbolic", "inverse_quadratic"], - # ), - }, - } - resources_per_trial = dict(cpu=1) - - -@parallel_ex.named_config -def debug_eval_adv(): - sacred_ex_name = "train_adversarial" - run_name = "airl_tuning_debug" - # n_seeds = 5 - base_named_configs = [] - eval_best_trial = True - eval_trial_seeds = 2 - base_config_updates = { - "total_timesteps": 2048, - } - search_space = { - # "named_configs": tune.choice([[env] for env in MY_ENVS]), - "config_updates": { - "algorithm_kwargs": dict( - # demo_batch_size=tune.choice([32, 128, 512, 2048, 8192]), - n_disc_updates_per_round=tune.choice([1, 2]), - # both are same as rl.batch_size - # gen_replay_buffer_capacity=tune.choice([512, 1024]), - # gen_train_timesteps=0, - ), - "rl": { - "batch_size": 8, - # "rl_kwargs": { - # "ent_coef": tune.choice([0, 1e-3, 1e-1]), - # "learning_rate": tune.loguniform(1e-5, 5e-3), - # }, - }, - "algorithm_specific": dict(demo_batch_size=1), - }, - "command_name": "airl", - } - num_samples = 2 - repeat = 2 - resources_per_trial = dict(cpu=8) - - -@parallel_ex.named_config -def debug_airl(): - sacred_ex_name = "train_adversarial" - run_name = "airl_debug" - # n_seeds = 1 - base_named_configs = ["logging.wandb_logging", "seals_walker"] - base_config_updates = { - "environment": {"num_vec": 8}, - "total_timesteps": 1e7, - } - search_space = { - # "named_configs": tune.choice([[env] for env in MY_ENVS]), - "config_updates": { - "train": { - "policy_kwargs": { - "activation_fn": tune.choice( - [ - nn.ReLU, - # nn.Tanh, - ], - ), - }, - }, - "algorithm_kwargs": dict( - demo_batch_size=tune.choice([32]), - n_disc_updates_per_round=tune.choice([10]), - # both are same as rl.batch_size - # gen_replay_buffer_capacity=tune.choice([512, 1024]), - # gen_train_timesteps=0, - ), - "rl": { - "batch_size": tune.choice([10000]), - "rl_kwargs": { - "ent_coef": tune.choice([0.1]), - "learning_rate": tune.choice([1e-4]), - }, - }, - "algorithm_specific": {}, - }, - "command_name": "airl", - } - num_samples = 1 - eval_best_trial = False - # eval_trial_seeds = 5 - repeat = 5 - # experiment_checkpoint_path = f"/home/taufeeque/ray_results/{run_name}" - resources_per_trial = dict(cpu=8) - - -# @parallel_ex.config_hook -# def config_hook(config, command_name, logger): -# """Sets env.""" -# del command_name, logger -# res = {} -# print(config) -# if config["env"]: -# res["base_named_configs"] = tuple( -# config["base_named_configs"] + [config["env"]] -# ) -# print(res) -# return res diff --git a/src/imitation/scripts/config/train_adversarial.py b/src/imitation/scripts/config/train_adversarial.py index bd9df6287..fb26c99c6 100644 --- a/src/imitation/scripts/config/train_adversarial.py +++ b/src/imitation/scripts/config/train_adversarial.py @@ -99,8 +99,8 @@ def pendulum(): @train_adversarial_ex.named_config def seals_ant(): - # locals().update(**MUJOCO_SHARED_LOCALS) - # locals().update(**ANT_SHARED_LOCALS) + locals().update(**MUJOCO_SHARED_LOCALS) + locals().update(**ANT_SHARED_LOCALS) environment = dict(gym_id="seals/Ant-v0") demonstrations = dict(rollout_type="ppo-huggingface") rl = dict( @@ -173,21 +173,6 @@ def seals_half_cheetah(): vf_coef=0.11483689492120866, ), ) - # algorithm_specific = dict( - # airl=dict(total_timesteps=int(5e6)), - # gail=dict(total_timesteps=int(8e6)), - # ) - # reward = dict( - # algorithm_specific=dict( - # airl=dict( - # net_cls=reward_nets.BasicShapedRewardNet, - # net_kwargs=dict( - # reward_hid_sizes=(32,), - # potential_hid_sizes=(32,), - # ), - # ), - # ), - # ) algorithm_kwargs = dict( # Number of discriminator updates after each round of generator updates n_disc_updates_per_round=16, @@ -257,7 +242,7 @@ def seals_swimmer(): @train_adversarial_ex.named_config def seals_walker(): - # locals().update(**MUJOCO_SHARED_LOCALS) + locals().update(**MUJOCO_SHARED_LOCALS) environment = dict(gym_id="seals/Walker2d-v0") demonstrations = dict(rollout_type="ppo-huggingface") train = dict( @@ -311,22 +296,3 @@ def fast(): demo_batch_size=1, n_disc_updates_per_round=4, ) - - -@train_adversarial_ex.named_config -def debug_nans(): - environment = {"wandb": {"wandb_kwargs": {"project": "algorithm-benchmark"}}} - total_timesteps = 1e7 - algorithm_kwargs = dict( - demo_batch_size=128, - n_disc_updates_per_round=8, - # both are same as rl.batch_size - # gen_replay_buffer_capacity=tune.choice([512, 1024]), - # gen_train_timesteps=0, - ) - rl = { - "batch_size": 4096, - "rl_kwargs": {"ent_coef": 0.1, "learning_rate": 7.316377404994506e-05}, - } - seed = 0 - checkpoint_interval = 1 diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py index 3e713777e..9ee8e6ee9 100644 --- a/src/imitation/scripts/parallel.py +++ b/src/imitation/scripts/parallel.py @@ -33,7 +33,7 @@ def parallel( upload_dir: Optional[str], repeat: int = 1, eval_best_trial: bool = False, - eval_best_trial_resource_multiplier: int = 2, + eval_best_trial_resource_multiplier: int = 1, eval_trial_seeds: int = 5, experiment_checkpoint_path: str = "", syncer=None, @@ -54,7 +54,8 @@ def parallel( under the 'experiment.name' key. This is equivalent to using the Sacred CLI '--name' option on the inner experiment. Offline analysis jobs can use this argument to group similar data. - num_samples: Number of times to sample from the hyperparameter space. + num_samples: Number of times to sample from the hyperparameter space without + considering repetition using `repeat`. search_space: A dictionary which can contain Ray Tune search objects like `ray.tune.grid_search` and `ray.tune.sample_from`, and is passed as the `config` argument to `ray.tune.run()`. After the @@ -79,12 +80,12 @@ def parallel( upload_dir: `upload_dir` argument to `ray.tune.run()`. repeat: Number of runs to repeat each trial for. eval_best_trial: Whether to evaluate the trial with the best mean return - at the end of tuning on a different set of seeds. + at the end of tuning on a separate set of seeds. eval_best_trial_resource_multiplier: factor by which to multiply the number of cpus per trial in `resources_per_trial`. eval_trial_seeds: Number of distinct seeds to evaluate the best trial on. experiment_checkpoint_path: Path containing the checkpoints of a previous - experiment. ran using this script. Useful for resuming cancelled trials + experiment ran using this script. Useful for resuming cancelled trials of the experiments (using `resume`) or evaluating the best trial of the experiment (using `eval_best_trial`). resume: If true and `experiment_checkpoint_path` is given, then resumes the @@ -159,6 +160,7 @@ def parallel( result.trials = None result.fetch_trial_dataframes() else: + # run hyperparameter tuning result = ray.tune.run( trainable, config=search_space, @@ -174,15 +176,14 @@ def parallel( metric="mean_return", mode="max", ) - - key_prefix = ( - "rollout/" - if sacred_ex_name == "train_preference_comparisons" - else "" - if sacred_ex_name == "train_rl" - else "imit_stats/" - ) + if sacred_ex_name == "train_rl": + key_prefix = "" + elif sacred_ex_name == "train_preference_comparisons": + key_prefix = "rollout/" + else: + key_prefix = "imit_stats/" key = key_prefix + "monitor_return_mean" + if eval_best_trial: df = result.results_df df = df[df["config/named_configs"].notna()] @@ -230,8 +231,9 @@ def parallel( resources_per_trial=resources_per_trial, ) returns = eval_result.results_df["mean_return"].to_numpy() - print("Returns:", returns) - print(np.mean(returns), np.std(returns)) + print("All returns:", returns) + print("Mean:", np.mean(returns)) + print("Std:", np.std(returns)) finally: ray.shutdown() @@ -333,14 +335,7 @@ def inner(config: Dict[str, Any], reporter) -> Mapping[str, Any]: ) # Ray Tune has a string formatting error if raylet completes without # any calls to `reporter`. - # reporter(done=True) - # if sacred_ex_name == "train_preference_comparisons": - # #reporter(mean_return=run.result["rollout"]["monitor_return_mean"]) - # #ray.tune.report(mean_return=run.result["rollout"]["monitor_return_mean"]) - # ray.tune.report(mean_return=234) - # else: - # # reporter(mean_return=run.result["imit_stats"]["monitor_return_mean"]) - # ray.tune.report(mean_return=run.result["imit_stats"]["monitor_return_mean"]) + reporter(done=True) assert run.status == "COMPLETED" return run.result From 7a438da0f5421f0d98fdb4db9747a8af10d26297 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Thu, 9 Feb 2023 19:53:14 +0530 Subject: [PATCH 11/54] Remove reporter(done) since mean_return is reported by the runs --- src/imitation/scripts/parallel.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py index 9ee8e6ee9..2dd2254bf 100644 --- a/src/imitation/scripts/parallel.py +++ b/src/imitation/scripts/parallel.py @@ -333,9 +333,6 @@ def inner(config: Dict[str, Any], reporter) -> Mapping[str, Any]: **updated_run_kwargs, options={"--run": run_name, "--file_storage": "sacred"}, ) - # Ray Tune has a string formatting error if raylet completes without - # any calls to `reporter`. - reporter(done=True) assert run.status == "COMPLETED" return run.result From 2e56de8eb97713b88ada09564369214f5e4fa661 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Thu, 23 Feb 2023 23:53:12 +0530 Subject: [PATCH 12/54] Add beta_schedule parameter to dagger script --- src/imitation/scripts/train_imitation.py | 1 + src/imitation/scripts/train_preference_comparisons.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/imitation/scripts/train_imitation.py b/src/imitation/scripts/train_imitation.py index e607339b4..56633e33a 100644 --- a/src/imitation/scripts/train_imitation.py +++ b/src/imitation/scripts/train_imitation.py @@ -119,6 +119,7 @@ def dagger( expert_policy=expert_policy, custom_logger=custom_logger, bc_trainer=bc_trainer, + beta_schedule=dagger["beta_schedule"], rng=_rnd, ) diff --git a/src/imitation/scripts/train_preference_comparisons.py b/src/imitation/scripts/train_preference_comparisons.py index 3d4fb4e33..4030317c4 100644 --- a/src/imitation/scripts/train_preference_comparisons.py +++ b/src/imitation/scripts/train_preference_comparisons.py @@ -280,7 +280,6 @@ def save_callback(iteration_num): results = dict(results) results["rollout"] = policy_evaluation.eval_policy(agent, venv) results["mean_return"] = results["rollout"]["monitor_return_mean"] - if save_preferences: main_trainer.dataset.save(log_dir / "preferences.pkl") From 73d8576fc893868c68442b657bd25aaffb7df9bf Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Fri, 17 Mar 2023 03:37:15 +0530 Subject: [PATCH 13/54] Update config policy kwargs --- src/imitation/scripts/config/train_adversarial.py | 6 +++--- .../scripts/config/train_preference_comparisons.py | 13 +++---------- src/imitation/scripts/config/train_rl.py | 12 ++++++------ 3 files changed, 12 insertions(+), 19 deletions(-) diff --git a/src/imitation/scripts/config/train_adversarial.py b/src/imitation/scripts/config/train_adversarial.py index 08b92fe9c..7989f3eab 100644 --- a/src/imitation/scripts/config/train_adversarial.py +++ b/src/imitation/scripts/config/train_adversarial.py @@ -187,7 +187,7 @@ def seals_hopper(): # locals().update(**MUJOCO_SHARED_LOCALS) environment = dict(gym_id="seals/Hopper-v0") demonstrations = dict(rollout_type="ppo-huggingface") - train = dict( + policy = dict( policy_cls="MlpPolicy", policy_kwargs=dict( activation_fn=nn.ReLU, @@ -216,7 +216,7 @@ def seals_swimmer(): environment = dict(gym_id="seals/Swimmer-v0") total_timesteps = int(2e6) demonstrations = dict(rollout_type="ppo-huggingface") - train = dict( + policy = dict( policy_cls="MlpPolicy", policy_kwargs=dict( activation_fn=nn.ReLU, @@ -245,7 +245,7 @@ def seals_walker(): locals().update(**MUJOCO_SHARED_LOCALS) environment = dict(gym_id="seals/Walker2d-v0") demonstrations = dict(rollout_type="ppo-huggingface") - train = dict( + policy = dict( policy_cls="MlpPolicy", policy_kwargs=dict( activation_fn=nn.ReLU, diff --git a/src/imitation/scripts/config/train_preference_comparisons.py b/src/imitation/scripts/config/train_preference_comparisons.py index 236edad47..1a039c762 100644 --- a/src/imitation/scripts/config/train_preference_comparisons.py +++ b/src/imitation/scripts/config/train_preference_comparisons.py @@ -120,20 +120,13 @@ def seals_half_cheetah(): ) num_iterations = 50 total_timesteps = 20000000 - # train = dict( - # policy_cls="MlpPolicy", - # policy_kwargs=dict( - # activation_fn=nn.ReLU, - # # net_arch=[dict(pi=[64, 64], vf=[64, 64])], - # ), - # ) @train_preference_comparisons_ex.named_config def seals_hopper(): # locals().update(**MUJOCO_SHARED_LOCALS) environment = dict(gym_id="seals/Hopper-v0") - train = dict( + policy = dict( policy_cls="MlpPolicy", policy_kwargs=dict( activation_fn=nn.ReLU, @@ -160,7 +153,7 @@ def seals_hopper(): def seals_swimmer(): # locals().update(**MUJOCO_SHARED_LOCALS) environment = dict(gym_id="seals/Swimmer-v0") - train = dict( + policy = dict( policy_cls="MlpPolicy", policy_kwargs=dict( activation_fn=nn.ReLU, @@ -188,7 +181,7 @@ def seals_swimmer(): def seals_walker(): # locals().update(**MUJOCO_SHARED_LOCALS) environment = dict(gym_id="seals/Walker2d-v0") - train = dict( + policy = dict( policy_cls="MlpPolicy", policy_kwargs=dict( activation_fn=nn.ReLU, diff --git a/src/imitation/scripts/config/train_rl.py b/src/imitation/scripts/config/train_rl.py index 34b45250c..a5475540d 100644 --- a/src/imitation/scripts/config/train_rl.py +++ b/src/imitation/scripts/config/train_rl.py @@ -74,7 +74,7 @@ def cartpole(): def seals_cartpole(): environment = dict(gym_id="seals/CartPole-v0", num_vec=8) total_timesteps = int(1e5) - train = dict( + policy = dict( policy_cls="MlpPolicy", policy_kwargs=dict( activation_fn=nn.ReLU, @@ -111,7 +111,7 @@ def seals_half_cheetah(): num_vec=1, ) - train = dict( + policy = dict( policy_cls="MlpPolicy", policy_kwargs=dict( activation_fn=nn.Tanh, @@ -141,7 +141,7 @@ def seals_half_cheetah(): @train_rl_ex.named_config def seals_hopper(): environment = dict(gym_id="seals/Hopper-v0", num_vec=1) - train = dict( + policy = dict( policy_cls="MlpPolicy", policy_kwargs=dict( activation_fn=nn.ReLU, @@ -211,7 +211,7 @@ def seals_ant(): num_vec=1, ) - train = dict( + policy = dict( policy_cls="MlpPolicy", policy_kwargs=dict( activation_fn=nn.Tanh, @@ -242,7 +242,7 @@ def seals_ant(): @train_rl_ex.named_config def seals_swimmer(): environment = dict(gym_id="seals/Swimmer-v0", num_vec=1) - train = dict( + policy = dict( policy_cls="MlpPolicy", policy_kwargs=dict( activation_fn=nn.ReLU, @@ -273,7 +273,7 @@ def seals_swimmer(): @train_rl_ex.named_config def seals_walker(): environment = dict(gym_id="seals/Walker2d-v0", num_vec=1) - train = dict( + policy = dict( policy_cls="MlpPolicy", policy_kwargs=dict( activation_fn=nn.ReLU, From 9fdf8786663473334f94b24a841a832b29da435f Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Tue, 16 May 2023 19:00:32 +0530 Subject: [PATCH 14/54] Changes from review --- src/imitation/scripts/config/parallel.py | 16 ++++++++-------- .../scripts/config/train_adversarial.py | 4 ---- .../config/train_preference_comparisons.py | 6 ------ src/imitation/scripts/parallel.py | 18 +++++++----------- src/imitation/scripts/train_imitation.py | 1 + 5 files changed, 16 insertions(+), 29 deletions(-) diff --git a/src/imitation/scripts/config/parallel.py b/src/imitation/scripts/config/parallel.py index ea90f11b8..b52446154 100644 --- a/src/imitation/scripts/config/parallel.py +++ b/src/imitation/scripts/config/parallel.py @@ -102,9 +102,6 @@ def example_cartpole_rl(): resources_per_trial = dict(cpu=4) -EASY_ENVS = ["cartpole", "pendulum", "mountain_car"] - - @parallel_ex.named_config def example_rl(): sacred_ex_name = "train_rl" @@ -135,18 +132,21 @@ def example_bc(): sacred_ex_name = "train_imitation" run_name = "bc_tuning" base_named_configs = ["logging.wandb_logging"] - base_config_updates = {"environment": {"num_vec": 1}} + base_config_updates = { + "environment": {"num_vec": 1}, + "demonstrations": {"rollout_type": "ppo-huggingface"}, + } search_space = { "config_updates": { - "bc_kwargs": dict( + "bc": dict( batch_size=tune.choice([8, 16, 32, 64]), l2_weight=tune.loguniform(1e-6, 1e-2), # L2 regularization weight optimizer_kwargs=dict( lr=tune.loguniform(1e-5, 1e-2), ), - ), - "bc_train_kwargs": dict( - n_epochs=tune.choice([1, 5, 10, 20]), + train_kwargs=dict( + n_epochs=tune.choice([1, 5, 10, 20]), + ), ), }, "command_name": "bc", diff --git a/src/imitation/scripts/config/train_adversarial.py b/src/imitation/scripts/config/train_adversarial.py index 7989f3eab..ef675eab6 100644 --- a/src/imitation/scripts/config/train_adversarial.py +++ b/src/imitation/scripts/config/train_adversarial.py @@ -156,7 +156,6 @@ def half_cheetah(): @train_adversarial_ex.named_config def seals_half_cheetah(): - # locals().update(**MUJOCO_SHARED_LOCALS) environment = dict(gym_id="seals/HalfCheetah-v0") demonstrations = dict(rollout_type="ppo-huggingface") rl = dict( @@ -184,7 +183,6 @@ def seals_half_cheetah(): @train_adversarial_ex.named_config def seals_hopper(): - # locals().update(**MUJOCO_SHARED_LOCALS) environment = dict(gym_id="seals/Hopper-v0") demonstrations = dict(rollout_type="ppo-huggingface") policy = dict( @@ -212,7 +210,6 @@ def seals_hopper(): @train_adversarial_ex.named_config def seals_swimmer(): - # locals().update(**MUJOCO_SHARED_LOCALS) environment = dict(gym_id="seals/Swimmer-v0") total_timesteps = int(2e6) demonstrations = dict(rollout_type="ppo-huggingface") @@ -242,7 +239,6 @@ def seals_swimmer(): @train_adversarial_ex.named_config def seals_walker(): - locals().update(**MUJOCO_SHARED_LOCALS) environment = dict(gym_id="seals/Walker2d-v0") demonstrations = dict(rollout_type="ppo-huggingface") policy = dict( diff --git a/src/imitation/scripts/config/train_preference_comparisons.py b/src/imitation/scripts/config/train_preference_comparisons.py index 1a039c762..4fe9c793e 100644 --- a/src/imitation/scripts/config/train_preference_comparisons.py +++ b/src/imitation/scripts/config/train_preference_comparisons.py @@ -73,8 +73,6 @@ def cartpole(): @train_preference_comparisons_ex.named_config def seals_ant(): - # locals().update(**MUJOCO_SHARED_LOCALS) - # locals().update(**ANT_SHARED_LOCALS) environment = dict(gym_id="seals/Ant-v0") rl = dict( batch_size=2048, @@ -102,7 +100,6 @@ def half_cheetah(): @train_preference_comparisons_ex.named_config def seals_half_cheetah(): - # locals().update(**MUJOCO_SHARED_LOCALS) environment = dict(gym_id="seals/HalfCheetah-v0") rl = dict( batch_size=512, @@ -124,7 +121,6 @@ def seals_half_cheetah(): @train_preference_comparisons_ex.named_config def seals_hopper(): - # locals().update(**MUJOCO_SHARED_LOCALS) environment = dict(gym_id="seals/Hopper-v0") policy = dict( policy_cls="MlpPolicy", @@ -151,7 +147,6 @@ def seals_hopper(): @train_preference_comparisons_ex.named_config def seals_swimmer(): - # locals().update(**MUJOCO_SHARED_LOCALS) environment = dict(gym_id="seals/Swimmer-v0") policy = dict( policy_cls="MlpPolicy", @@ -179,7 +174,6 @@ def seals_swimmer(): @train_preference_comparisons_ex.named_config def seals_walker(): - # locals().update(**MUJOCO_SHARED_LOCALS) environment = dict(gym_id="seals/Walker2d-v0") policy = dict( policy_cls="MlpPolicy", diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py index 2dd2254bf..53b4c2b32 100644 --- a/src/imitation/scripts/parallel.py +++ b/src/imitation/scripts/parallel.py @@ -139,7 +139,7 @@ def parallel( syncer=syncer, ), metric="mean_return", - resume=resume, + resume=True, ) print( "Live trials:", @@ -176,14 +176,7 @@ def parallel( metric="mean_return", mode="max", ) - if sacred_ex_name == "train_rl": - key_prefix = "" - elif sacred_ex_name == "train_preference_comparisons": - key_prefix = "rollout/" - else: - key_prefix = "imit_stats/" - key = key_prefix + "monitor_return_mean" - + key = "mean_return" if eval_best_trial: df = result.results_df df = df[df["config/named_configs"].notna()] @@ -199,7 +192,7 @@ def parallel( # store mean return of runs across all seeds in a group df["mean_return"] = grps[key].transform(lambda x: x.mean()) best_config_df = df[df["mean_return"] == df["mean_return"].max()] - row = best_config_df.loc[0] + row = best_config_df.iloc[0] best_config_tag = row["experiment_tag"] if result.trials is not None: trial = [ @@ -215,7 +208,10 @@ def parallel( # update cpus per trial only if it is provided in `resources_per_trial` # Uses the default values (cpu=1) if it is not provided if "cpu" in resources_per_trial: - resources_per_trial["cpu"] *= eval_best_trial_resource_multiplier + resources_per_trial_eval = copy.deepcopy(resources_per_trial) + resources_per_trial_eval[ + "cpu" + ] *= eval_best_trial_resource_multiplier best_config["config_updates"].update( environment=dict(num_vec=resources_per_trial["cpu"]), ) diff --git a/src/imitation/scripts/train_imitation.py b/src/imitation/scripts/train_imitation.py index 56633e33a..5a6925eb3 100644 --- a/src/imitation/scripts/train_imitation.py +++ b/src/imitation/scripts/train_imitation.py @@ -76,6 +76,7 @@ def bc( expert_stats = _try_computing_expert_stats(expert_trajs) if expert_stats is not None: stats["expert_stats"] = expert_stats + stats["mean_return"] = imit_stats["monitor_return_mean"] return stats From 1c1dbc44970016fd5ef6bb965cf69afbf33590a1 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Tue, 16 May 2023 21:43:43 +0530 Subject: [PATCH 15/54] Fix errors with some configs --- src/imitation/scripts/config/parallel.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/imitation/scripts/config/parallel.py b/src/imitation/scripts/config/parallel.py index b52446154..095c67107 100644 --- a/src/imitation/scripts/config/parallel.py +++ b/src/imitation/scripts/config/parallel.py @@ -165,8 +165,9 @@ def example_dagger(): base_named_configs = ["logging.wandb_logging"] base_config_updates = { "environment": {"num_vec": 1}, + "demonstrations": {"rollout_type": "ppo-huggingface"}, "dagger": {"total_timesteps": 1e5}, - "bc_kwargs": { + "bc": { "batch_size": 16, "l2_weight": 1e-4, "optimizer_kwargs": {"lr": 1e-3}, @@ -174,8 +175,10 @@ def example_dagger(): } search_space = { "config_updates": { - "bc_train_kwargs": dict( - n_epochs=tune.choice([1, 5, 10]), + "bc": dict( + train_kwargs=dict( + n_epochs=tune.choice([1, 5, 10]), + ), ), "dagger": dict( beta_schedule=tune.choice( @@ -201,6 +204,7 @@ def example_gail(): base_named_configs = ["logging.wandb_logging"] base_config_updates = { "environment": {"num_vec": 1}, + "demonstrations": {"rollout_type": "ppo-huggingface"}, "total_timesteps": 1e7, } search_space = { @@ -234,6 +238,7 @@ def example_airl(): base_named_configs = ["logging.wandb_logging"] base_config_updates = { "environment": {"num_vec": 1}, + "demonstrations": {"rollout_type": "ppo-huggingface"}, "total_timesteps": 1e7, } search_space = { @@ -273,11 +278,9 @@ def example_pc(): "gatherer_kwargs": {"sample": True}, } search_space = { - "named_configs": tune.choice( - [ - ["reward.normalize_output_disable"], - ], - ), + "named_configs": [ + ["reward.normalize_output_disable"], + ], "config_updates": { "train": { "policy_kwargs": { From 44c4e97d64980118b3a07f06f7c15edb273a16a1 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Wed, 14 Jun 2023 06:38:42 +0530 Subject: [PATCH 16/54] Updates based on review --- src/imitation/scripts/analyze.py | 29 ++++++++++--------- src/imitation/scripts/parallel.py | 26 ++++++++++++----- src/imitation/scripts/train_adversarial.py | 1 - src/imitation/scripts/train_imitation.py | 1 - .../scripts/train_preference_comparisons.py | 3 +- src/imitation/scripts/train_rl.py | 1 - 6 files changed, 35 insertions(+), 26 deletions(-) diff --git a/src/imitation/scripts/analyze.py b/src/imitation/scripts/analyze.py index f036efe40..8977fed47 100644 --- a/src/imitation/scripts/analyze.py +++ b/src/imitation/scripts/analyze.py @@ -272,40 +272,43 @@ def analyze_imitation( The DataFrame generated from the Sacred logs. """ if table_verbosity == 3: + # Get column names for which we have get value using make_entry_fn + # These are same across Level 2 & 3. In Level 3, we additionally add remaining + # config columns. table_entry_fns_subset = _get_table_entry_fns_subset(2) else: table_entry_fns_subset = _get_table_entry_fns_subset(table_verbosity) - df = pd.DataFrame() + output_table = pd.DataFrame() for sd in _gather_sacred_dicts(): - new_df = pd.DataFrame() - if table_verbosity == -1: + if table_verbosity == 3: # gets all config columns - new_df = pd.json_normalize(sd.config) + row = pd.json_normalize(sd.config) else: - new_df = new_df.append({}, ignore_index=True) + # create an empty dataframe with a single row + row = pd.DataFrame(index=[0]) for col_name, make_entry_fn in table_entry_fns_subset.items(): - new_df[col_name] = make_entry_fn(sd) + row[col_name] = make_entry_fn(sd) - df = pd.concat([df, new_df]) + output_table = pd.concat([output_table, row]) - if len(df) > 0: - df.sort_values(by=["algo", "env_name"], inplace=True) + if len(output_table) > 0: + output_table.sort_values(by=["algo", "env_name"], inplace=True) display_options: Mapping[str, Any] = dict(index=False) if csv_output_path is not None: - df.to_csv(csv_output_path, **display_options) + output_table.to_csv(csv_output_path, **display_options) print(f"Wrote CSV file to {csv_output_path}") if tex_output_path is not None: - s: str = df.to_latex(**display_options) + s: str = output_table.to_latex(**display_options) with open(tex_output_path, "w") as f: f.write(s) print(f"Wrote TeX file to {tex_output_path}") if print_table: - print(df.to_string(**display_options)) - return df + print(output_table.to_string(**display_options)) + return output_table def _make_return_summary(stats: dict, prefix="") -> str: diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py index 53b4c2b32..2bb0129cb 100644 --- a/src/imitation/scripts/parallel.py +++ b/src/imitation/scripts/parallel.py @@ -127,6 +127,12 @@ def parallel( ray.init(**init_kwargs) search_alg = search.Repeater(optuna.OptunaSearch(), repeat=repeat) + + if sacred_ex_name == "train_rl": + return_key = "monitor_return_mean" + else: + return_key = "imit_stats/monitor_return_mean" + try: if experiment_checkpoint_path: if resume: @@ -173,10 +179,9 @@ def parallel( syncer=syncer, ), search_alg=search_alg, - metric="mean_return", + metric=return_key, mode="max", ) - key = "mean_return" if eval_best_trial: df = result.results_df df = df[df["config/named_configs"].notna()] @@ -190,7 +195,7 @@ def parallel( ] grps = df.groupby(grp_keys) # store mean return of runs across all seeds in a group - df["mean_return"] = grps[key].transform(lambda x: x.mean()) + df["mean_return"] = grps[return_key].transform(lambda x: x.mean()) best_config_df = df[df["mean_return"] == df["mean_return"].max()] row = best_config_df.iloc[0] best_config_tag = row["experiment_tag"] @@ -200,20 +205,25 @@ def parallel( ][0] best_config = trial.config print("Mean return:", row["mean_return"]) - print("All returns:", df[df["mean_return"] == row["mean_return"]][key]) + print( + "All returns:", + df[df["mean_return"] == row["mean_return"]][return_key], + ) print("Total seeds:", (df["mean_return"] == row["mean_return"]).sum()) best_config["config_updates"].update( seed=ray.tune.grid_search(list(range(100, 100 + eval_trial_seeds))), ) + + resources_per_trial_eval = copy.deepcopy(resources_per_trial) # update cpus per trial only if it is provided in `resources_per_trial` # Uses the default values (cpu=1) if it is not provided if "cpu" in resources_per_trial: - resources_per_trial_eval = copy.deepcopy(resources_per_trial) + resources_per_trial_eval[ "cpu" ] *= eval_best_trial_resource_multiplier best_config["config_updates"].update( - environment=dict(num_vec=resources_per_trial["cpu"]), + environment=dict(num_vec=resources_per_trial_eval["cpu"]), ) eval_result = ray.tune.run( @@ -224,9 +234,9 @@ def parallel( "command_name": best_config.get("command_name", None), }, name=run_name + "_best_hp_eval", - resources_per_trial=resources_per_trial, + resources_per_trial=resources_per_trial_eval, ) - returns = eval_result.results_df["mean_return"].to_numpy() + returns = eval_result.results_df[return_key].to_numpy() print("All returns:", returns) print("Mean:", np.mean(returns)) print("Std:", np.std(returns)) diff --git a/src/imitation/scripts/train_adversarial.py b/src/imitation/scripts/train_adversarial.py index d1f99a54b..26c8d7bcf 100644 --- a/src/imitation/scripts/train_adversarial.py +++ b/src/imitation/scripts/train_adversarial.py @@ -167,7 +167,6 @@ def callback(round_num: int, /) -> None: return { "imit_stats": imit_stats, "expert_stats": rollout.rollout_stats(expert_trajs), - "mean_return": imit_stats["monitor_return_mean"], } diff --git a/src/imitation/scripts/train_imitation.py b/src/imitation/scripts/train_imitation.py index 5a6925eb3..56633e33a 100644 --- a/src/imitation/scripts/train_imitation.py +++ b/src/imitation/scripts/train_imitation.py @@ -76,7 +76,6 @@ def bc( expert_stats = _try_computing_expert_stats(expert_trajs) if expert_stats is not None: stats["expert_stats"] = expert_stats - stats["mean_return"] = imit_stats["monitor_return_mean"] return stats diff --git a/src/imitation/scripts/train_preference_comparisons.py b/src/imitation/scripts/train_preference_comparisons.py index b054a5a6c..867a666a4 100644 --- a/src/imitation/scripts/train_preference_comparisons.py +++ b/src/imitation/scripts/train_preference_comparisons.py @@ -280,8 +280,7 @@ def save_callback(iteration_num): # Storing and evaluating policy only useful if we generated trajectory data if bool(trajectory_path is None): results = dict(results) - results["rollout"] = policy_evaluation.eval_policy(agent, venv) - results["mean_return"] = results["rollout"]["monitor_return_mean"] + results["imit_stats"] = policy_evaluation.eval_policy(agent, venv) if save_preferences: main_trainer.dataset.save(log_dir / "preferences.pkl") diff --git a/src/imitation/scripts/train_rl.py b/src/imitation/scripts/train_rl.py index 20a7b263c..6780a557b 100644 --- a/src/imitation/scripts/train_rl.py +++ b/src/imitation/scripts/train_rl.py @@ -159,7 +159,6 @@ def train_rl( # Final evaluation of expert policy. eval_stats = policy_evaluation.eval_policy(rl_algo, venv) - eval_stats["mean_return"] = eval_stats["monitor_return_mean"] return eval_stats From ab0126998a4f8beb44e93eb11d6c2b17e68038a8 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Wed, 14 Jun 2023 07:40:52 +0530 Subject: [PATCH 17/54] Change metric everywhere --- src/imitation/scripts/parallel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py index 2bb0129cb..6f77330df 100644 --- a/src/imitation/scripts/parallel.py +++ b/src/imitation/scripts/parallel.py @@ -144,7 +144,7 @@ def parallel( upload_dir=upload_dir, syncer=syncer, ), - metric="mean_return", + metric=return_key, resume=True, ) print( From e896d7db127f9025d89387cc10e513409fd973b1 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Tue, 11 Jul 2023 16:03:02 +0530 Subject: [PATCH 18/54] Separate tuning code from parallel.py --- benchmarking/tuning.py | 102 ++++++++++ benchmarking/tuning_config.py | 237 +++++++++++++++++++++++ setup.cfg | 1 + src/imitation/scripts/config/parallel.py | 216 +-------------------- src/imitation/scripts/parallel.py | 101 ++-------- 5 files changed, 363 insertions(+), 294 deletions(-) create mode 100644 benchmarking/tuning.py create mode 100644 benchmarking/tuning_config.py diff --git a/benchmarking/tuning.py b/benchmarking/tuning.py new file mode 100644 index 000000000..b4e62a84a --- /dev/null +++ b/benchmarking/tuning.py @@ -0,0 +1,102 @@ +"""Tunes the hyperparameters of the algorithms.""" + +import copy +import pathlib +from typing import Any, Dict + +import numpy as np +import ray +from pandas.api import types as pd_types +from sacred.observers import FileStorageObserver +from tuning_config import parallel_ex, tuning_ex + + +@tuning_ex.main +def tune( + parallel: Dict[str, Any], + eval_best_trial: bool = False, + eval_best_trial_resource_multiplier: int = 1, + eval_trial_seeds: int = 5, +) -> None: + """Tune hyperparameters of imitation algorithms using parallel script. + + Args: + parallel: A dictionary of arguments from the parallel script. + eval_best_trial: Whether to evaluate the trial with the best mean return + at the end of tuning on a separate set of seeds. + eval_best_trial_resource_multiplier: factor by which to multiply the + number of cpus per trial in `resources_per_trial`. + eval_trial_seeds: Number of distinct seeds to evaluate the best trial on. + """ + run = parallel_ex.run(config_updates=parallel) + result = run.result + + if eval_best_trial: + if parallel["sacred_ex_name"] == "train_rl": + return_key = "monitor_return_mean" + else: + return_key = "imit_stats/monitor_return_mean" + df = result.results_df + df = df[df["config/named_configs"].notna()] + # convert object dtype to str required by df.groupby + for col in df.columns: + if pd_types.is_object_dtype(df[col]): + df[col] = df[col].astype("str") + # group into separate HP configs + grp_keys = [c for c in df.columns if c.startswith("config") and "seed" not in c] + grps = df.groupby(grp_keys) + # store mean return of runs across all seeds in a group + df["mean_return"] = grps[return_key].transform(lambda x: x.mean()) + best_config_df = df[df["mean_return"] == df["mean_return"].max()] + row = best_config_df.iloc[0] + best_config_tag = row["experiment_tag"] + if result.trials is not None: + trial = [t for t in result.trials if best_config_tag in t.experiment_tag][0] + best_config = trial.config + print("Mean return:", row["mean_return"]) + print( + "All returns:", + df[df["mean_return"] == row["mean_return"]][return_key], + ) + print("Total seeds:", (df["mean_return"] == row["mean_return"]).sum()) + + best_config["config_updates"].update( + seed=ray.tune.grid_search(list(range(100, 100 + eval_trial_seeds))), + ) + + resources_per_trial_eval = copy.deepcopy(parallel["resources_per_trial"]) + # update cpus per trial only if it is provided in `resources_per_trial` + # Uses the default values (cpu=1) if it is not provided + if "cpu" in parallel["resources_per_trial"]: + resources_per_trial_eval["cpu"] *= eval_best_trial_resource_multiplier + + eval_config_updates = parallel.copy() + eval_config_updates.update( + run_name=parallel["run_name"] + "_best_hp_eval", + num_samples=1, + search_space=best_config, + base_named_configs=parallel["base_named_configs"], + base_config_updates=parallel["base_config_updates"], + resources_per_trial=resources_per_trial_eval, + search_alg=None, + repeat=1, + experiment_checkpoint_path="", + resume=False, + ) + eval_run = parallel_ex.run(config_updates=eval_config_updates) + eval_result = eval_run.result + returns = eval_result.results_df[return_key].to_numpy() + print("All returns:", returns) + print("Mean:", np.mean(returns)) + print("Std:", np.std(returns)) + + +def main_console(): + observer_path = pathlib.Path.cwd() / "output" / "sacred" / "tuning" + observer = FileStorageObserver(observer_path) + tuning_ex.observers.append(observer) + tuning_ex.run_commandline() + + +if __name__ == "__main__": # pragma: no cover + main_console() diff --git a/benchmarking/tuning_config.py b/benchmarking/tuning_config.py new file mode 100644 index 000000000..79c8d0347 --- /dev/null +++ b/benchmarking/tuning_config.py @@ -0,0 +1,237 @@ +"""Config files for tuning experiments.""" + +import ray.tune as tune +import sacred +from torch import nn + +from imitation.algorithms import dagger +from imitation.scripts.parallel import parallel_ex + +tuning_ex = sacred.Experiment("tuning", ingredients=[parallel_ex]) + + +@tuning_ex.named_config +def example_rl(): + parallel = dict( + sacred_ex_name="train_rl", + run_name="rl_tuning", + base_named_configs=["logging.wandb_logging"], + base_config_updates={"environment": {"num_vec": 1}}, + search_space={ + "config_updates": { + "rl": { + "batch_size": tune.choice([512, 1024, 2048, 4096, 8192]), + "rl_kwargs": { + "learning_rate": tune.loguniform(1e-5, 1e-2), + "batch_size": tune.choice([64, 128, 256, 512]), + "n_epochs": tune.choice([5, 10, 20]), + }, + }, + }, + }, + num_samples=100, + repeat=1, + resources_per_trial=dict(cpu=1), + ) + eval_best_trial = True + eval_trial_seeds = 5 + + +@tuning_ex.named_config +def example_bc(): + parallel = dict( + sacred_ex_name="train_imitation", + run_name="bc_tuning", + base_named_configs=["logging.wandb_logging"], + base_config_updates={ + "environment": {"num_vec": 1}, + "demonstrations": {"source": "huggingface"}, + }, + search_space={ + "config_updates": { + "bc": dict( + batch_size=tune.choice([8, 16, 32, 64]), + l2_weight=tune.loguniform(1e-6, 1e-2), # L2 regularization weight + optimizer_kwargs=dict( + lr=tune.loguniform(1e-5, 1e-2), + ), + train_kwargs=dict( + n_epochs=tune.choice([1, 5, 10, 20]), + ), + ), + }, + "command_name": "bc", + }, + num_samples=2, + repeat=1, + resources_per_trial=dict(cpu=1), + ) + + eval_best_trial = True + eval_trial_seeds = 5 + eval_best_trial_resource_multiplier = 1 + + +@tuning_ex.named_config +def example_dagger(): + parallel = dict( + sacred_ex_name="train_imitation", + run_name="dagger_tuning", + base_named_configs=["logging.wandb_logging"], + base_config_updates={ + "environment": {"num_vec": 1}, + "demonstrations": {"source": "huggingface"}, + "dagger": {"total_timesteps": 1e5}, + "bc": { + "batch_size": 16, + "l2_weight": 1e-4, + "optimizer_kwargs": {"lr": 1e-3}, + }, + }, + search_space={ + "config_updates": { + "bc": dict( + train_kwargs=dict( + n_epochs=tune.choice([1, 5, 10]), + ), + ), + "dagger": dict( + beta_schedule=tune.choice( + [dagger.LinearBetaSchedule(i) for i in [1, 5, 15]] + + [dagger.ExponentialBetaSchedule(i) for i in [0.3, 0.5, 0.7]], + ), + rollout_round_min_episodes=tune.choice([3, 5, 10]), + ), + }, + "command_name": "dagger", + }, + num_samples=50, + repeat=3, + resources_per_trial=dict(cpu=1), + ) + eval_best_trial = True + eval_trial_seeds = 5 + + +@tuning_ex.named_config +def example_gail(): + parallel = dict( + sacred_ex_name="train_adversarial", + run_name="gail_tuning_hc", + base_named_configs=["logging.wandb_logging"], + base_config_updates={ + "environment": {"num_vec": 1}, + "demonstrations": {"source": "huggingface"}, + "total_timesteps": 1e7, + }, + search_space={ + "config_updates": { + "algorithm_kwargs": dict( + demo_batch_size=tune.choice([32, 128, 512, 2048, 8192]), + n_disc_updates_per_round=tune.choice([8, 16]), + ), + "rl": { + "batch_size": tune.choice([4096, 8192, 16384]), + "rl_kwargs": { + "ent_coef": tune.loguniform(1e-7, 1e-3), + "learning_rate": tune.loguniform(1e-5, 1e-2), + }, + }, + "algorithm_specific": {}, + }, + "command_name": "gail", + }, + num_samples=100, + repeat=3, + resources_per_trial=dict(cpu=1), + ) + eval_best_trial = True + eval_trial_seeds = 5 + + +@tuning_ex.named_config +def example_airl(): + parallel = dict( + sacred_ex_name="train_adversarial", + run_name="airl_tuning", + base_named_configs=["logging.wandb_logging"], + base_config_updates={ + "environment": {"num_vec": 1}, + "demonstrations": {"source": "huggingface"}, + "total_timesteps": 1e7, + }, + search_space={ + "config_updates": { + "algorithm_kwargs": dict( + demo_batch_size=tune.choice([32, 128, 512, 2048, 8192]), + n_disc_updates_per_round=tune.choice([8, 16]), + ), + "rl": { + "batch_size": tune.choice([4096, 8192, 16384]), + "rl_kwargs": { + "ent_coef": tune.loguniform(1e-7, 1e-3), + "learning_rate": tune.loguniform(1e-5, 1e-2), + }, + }, + "algorithm_specific": {}, + }, + "command_name": "airl", + }, + num_samples=100, + repeat=3, + resources_per_trial=dict(cpu=1), + ) + + eval_best_trial = True + eval_trial_seeds = 5 + + +@tuning_ex.named_config +def example_pc(): + parallel = dict( + sacred_ex_name="train_preference_comparisons", + run_name="pc_tuning", + base_named_configs=["logging.wandb_logging"], + base_config_updates={ + "environment": {"num_vec": 1}, + "demonstrations": {"source": "huggingface"}, + "total_timesteps": 2e7, + "total_comparisons": 5000, + "query_schedule": "hyperbolic", + "gatherer_kwargs": {"sample": True}, + }, + search_space={ + "named_configs": [ + ["reward.normalize_output_disable"], + ], + "config_updates": { + "train": { + "policy_kwargs": { + "activation_fn": tune.choice( + [ + nn.ReLU, + ], + ), + }, + }, + "num_iterations": tune.choice([25, 50]), + "initial_comparison_frac": tune.choice([0.1, 0.25]), + "reward_trainer_kwargs": { + "epochs": tune.choice([1, 3, 6]), + }, + "rl": { + "batch_size": tune.choice([512, 2048, 8192]), + "rl_kwargs": { + "learning_rate": tune.loguniform(1e-5, 1e-2), + "ent_coef": tune.loguniform(1e-7, 1e-3), + }, + }, + }, + }, + num_samples=100, + repeat=3, + resources_per_trial=dict(cpu=1), + ) + + eval_best_trial = True + eval_trial_seeds = 5 diff --git a/setup.cfg b/setup.cfg index 979c3ca46..f39db322f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,6 +7,7 @@ per-file-ignores = # F841 local variable unused [for Sacred config scopes] src/imitation/scripts/config/*.py:F841 ../src/imitation/scripts/config/*.py:F841 + benchmarking/tuning_config.py:F841 src/imitation/envs/examples/airl_envs/*.py:D [darglint] diff --git a/src/imitation/scripts/config/parallel.py b/src/imitation/scripts/config/parallel.py index 095c67107..e9c5b8245 100644 --- a/src/imitation/scripts/config/parallel.py +++ b/src/imitation/scripts/config/parallel.py @@ -16,9 +16,7 @@ import numpy as np import ray.tune as tune import sacred -from torch import nn -from imitation.algorithms import dagger from imitation.util.util import make_unique_timestamp parallel_ex = sacred.Experiment("parallel") @@ -45,6 +43,10 @@ def config(): eval_trial_seeds = 5 # Number of seeds to search over by default num_samples = 1 # Number of samples per grid search configuration repeat = 1 + search_alg = "optuna" # search algorithm to use + experiment_checkpoint_path = "" # Path to checkpoint of experiment to resume + syncer = None # Sacred syncer to use + resume = False # Whether to resume experiment from checkpoint # Debug named configs @@ -100,213 +102,3 @@ def example_cartpole_rl(): } base_named_configs = ["cartpole"] resources_per_trial = dict(cpu=4) - - -@parallel_ex.named_config -def example_rl(): - sacred_ex_name = "train_rl" - run_name = "rl_tuning" - base_named_configs = ["logging.wandb_logging"] - base_config_updates = {"environment": {"num_vec": 1}} - search_space = { - "config_updates": { - "rl": { - "batch_size": tune.choice([512, 1024, 2048, 4096, 8192]), - "rl_kwargs": { - "learning_rate": tune.loguniform(1e-5, 1e-2), - "batch_size": tune.choice([64, 128, 256, 512]), - "n_epochs": tune.choice([5, 10, 20]), - }, - }, - }, - } - num_samples = 100 - eval_best_trial = True - eval_trial_seeds = 5 - repeat = 1 - resources_per_trial = dict(cpu=1) - - -@parallel_ex.named_config -def example_bc(): - sacred_ex_name = "train_imitation" - run_name = "bc_tuning" - base_named_configs = ["logging.wandb_logging"] - base_config_updates = { - "environment": {"num_vec": 1}, - "demonstrations": {"rollout_type": "ppo-huggingface"}, - } - search_space = { - "config_updates": { - "bc": dict( - batch_size=tune.choice([8, 16, 32, 64]), - l2_weight=tune.loguniform(1e-6, 1e-2), # L2 regularization weight - optimizer_kwargs=dict( - lr=tune.loguniform(1e-5, 1e-2), - ), - train_kwargs=dict( - n_epochs=tune.choice([1, 5, 10, 20]), - ), - ), - }, - "command_name": "bc", - } - num_samples = 64 - eval_best_trial = True - eval_trial_seeds = 5 - repeat = 3 - resources_per_trial = dict(cpu=1) - - -@parallel_ex.named_config -def example_dagger(): - sacred_ex_name = "train_imitation" - run_name = "dagger_tuning" - base_named_configs = ["logging.wandb_logging"] - base_config_updates = { - "environment": {"num_vec": 1}, - "demonstrations": {"rollout_type": "ppo-huggingface"}, - "dagger": {"total_timesteps": 1e5}, - "bc": { - "batch_size": 16, - "l2_weight": 1e-4, - "optimizer_kwargs": {"lr": 1e-3}, - }, - } - search_space = { - "config_updates": { - "bc": dict( - train_kwargs=dict( - n_epochs=tune.choice([1, 5, 10]), - ), - ), - "dagger": dict( - beta_schedule=tune.choice( - [dagger.LinearBetaSchedule(i) for i in [1, 5, 15]] - + [dagger.ExponentialBetaSchedule(i) for i in [0.3, 0.5, 0.7]], - ), - rollout_round_min_episodes=tune.choice([3, 5, 10]), - ), - }, - "command_name": "dagger", - } - num_samples = 50 - repeat = 3 - eval_best_trial = True - eval_trial_seeds = 5 - resources_per_trial = dict(cpu=1) - - -@parallel_ex.named_config -def example_gail(): - sacred_ex_name = "train_adversarial" - run_name = "gail_tuning_hc" - base_named_configs = ["logging.wandb_logging"] - base_config_updates = { - "environment": {"num_vec": 1}, - "demonstrations": {"rollout_type": "ppo-huggingface"}, - "total_timesteps": 1e7, - } - search_space = { - "config_updates": { - "algorithm_kwargs": dict( - demo_batch_size=tune.choice([32, 128, 512, 2048, 8192]), - n_disc_updates_per_round=tune.choice([8, 16]), - ), - "rl": { - "batch_size": tune.choice([4096, 8192, 16384]), - "rl_kwargs": { - "ent_coef": tune.loguniform(1e-7, 1e-3), - "learning_rate": tune.loguniform(1e-5, 1e-2), - }, - }, - "algorithm_specific": {}, - }, - "command_name": "gail", - } - num_samples = 100 - eval_best_trial = True - eval_trial_seeds = 5 - repeat = 3 - resources_per_trial = dict(cpu=1) - - -@parallel_ex.named_config -def example_airl(): - sacred_ex_name = "train_adversarial" - run_name = "airl_tuning" - base_named_configs = ["logging.wandb_logging"] - base_config_updates = { - "environment": {"num_vec": 1}, - "demonstrations": {"rollout_type": "ppo-huggingface"}, - "total_timesteps": 1e7, - } - search_space = { - "config_updates": { - "algorithm_kwargs": dict( - demo_batch_size=tune.choice([32, 128, 512, 2048, 8192]), - n_disc_updates_per_round=tune.choice([8, 16]), - ), - "rl": { - "batch_size": tune.choice([4096, 8192, 16384]), - "rl_kwargs": { - "ent_coef": tune.loguniform(1e-7, 1e-3), - "learning_rate": tune.loguniform(1e-5, 1e-2), - }, - }, - "algorithm_specific": {}, - }, - "command_name": "airl", - } - num_samples = 100 - eval_best_trial = True - eval_trial_seeds = 5 - repeat = 3 - resources_per_trial = dict(cpu=1) - - -@parallel_ex.named_config -def example_pc(): - sacred_ex_name = "train_preference_comparisons" - run_name = "pc_tuning" - base_named_configs = ["logging.wandb_logging"] - base_config_updates = { - "environment": {"num_vec": 1}, - "total_timesteps": 2e7, - "total_comparisons": 5000, - "query_schedule": "hyperbolic", - "gatherer_kwargs": {"sample": True}, - } - search_space = { - "named_configs": [ - ["reward.normalize_output_disable"], - ], - "config_updates": { - "train": { - "policy_kwargs": { - "activation_fn": tune.choice( - [ - nn.ReLU, - ], - ), - }, - }, - "num_iterations": tune.choice([25, 50]), - "initial_comparison_frac": tune.choice([0.1, 0.25]), - "reward_trainer_kwargs": { - "epochs": tune.choice([1, 3, 6]), - }, - "rl": { - "batch_size": tune.choice([512, 2048, 8192]), - "rl_kwargs": { - "learning_rate": tune.loguniform(1e-5, 1e-2), - "ent_coef": tune.loguniform(1e-7, 1e-3), - }, - }, - }, - } - num_samples = 100 - eval_best_trial = True - eval_trial_seeds = 5 - repeat = 3 - resources_per_trial = dict(cpu=1) diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py index 6f77330df..2417414cb 100644 --- a/src/imitation/scripts/parallel.py +++ b/src/imitation/scripts/parallel.py @@ -6,11 +6,9 @@ import pathlib from typing import Any, Callable, Dict, Mapping, Optional, Sequence, Union -import numpy as np import ray import ray.tune import sacred -from pandas.api.types import is_object_dtype from ray.tune import search from ray.tune.registry import register_trainable from ray.tune.search import optuna @@ -31,14 +29,12 @@ def parallel( init_kwargs: Mapping[str, Any], local_dir: Optional[str], upload_dir: Optional[str], - repeat: int = 1, - eval_best_trial: bool = False, - eval_best_trial_resource_multiplier: int = 1, - eval_trial_seeds: int = 5, - experiment_checkpoint_path: str = "", - syncer=None, - resume: Union[str, bool] = False, -) -> None: + repeat: int, + search_alg: Optional[str], + experiment_checkpoint_path: str, + syncer, + resume: Union[str, bool], +) -> ray.tune.ExperimentAnalysis: """Parallelize multiple runs of another Sacred Experiment using Ray Tune. A Sacred FileObserver is attached to the inner experiment and writes Sacred @@ -47,7 +43,7 @@ def parallel( Args: sacred_ex_name: The Sacred experiment to tune. Either "train_rl" or - "train_adversarial". + "train_imitation" or "train_adversarial" or "train_preference_comparisons". run_name: A name describing this parallelizing experiment. This argument is also passed to `ray.tune.run` as the `name` argument. It is also saved in 'sacred/run.json' of each inner Sacred experiment @@ -78,24 +74,19 @@ def parallel( init_kwargs: Arguments to pass to `ray.init`. local_dir: `local_dir` argument to `ray.tune.run()`. upload_dir: `upload_dir` argument to `ray.tune.run()`. + search_alg: can be either "optuna" or None. repeat: Number of runs to repeat each trial for. - eval_best_trial: Whether to evaluate the trial with the best mean return - at the end of tuning on a separate set of seeds. - eval_best_trial_resource_multiplier: factor by which to multiply the - number of cpus per trial in `resources_per_trial`. - eval_trial_seeds: Number of distinct seeds to evaluate the best trial on. - experiment_checkpoint_path: Path containing the checkpoints of a previous - experiment ran using this script. Useful for resuming cancelled trials - of the experiments (using `resume`) or evaluating the best trial of the - experiment (using `eval_best_trial`). + Not used if `search_alg` is None. resume: If true and `experiment_checkpoint_path` is given, then resumes the experiment by restarting the trials that did not finish in the experiment checkpoint path. syncer: `syncer` argument to `ray.tune.syncer.SyncConfig`. - Raises: TypeError: Named configs not string sequences or config updates not mappings. + + Returns: + The result of `ray.tune.run()`. """ # Basic validation for config options before we enter parallel jobs. if not isinstance(base_named_configs, collections.abc.Sequence): @@ -126,7 +117,11 @@ def parallel( ) ray.init(**init_kwargs) - search_alg = search.Repeater(optuna.OptunaSearch(), repeat=repeat) + if search_alg == "optuna": + algo = search.Repeater(optuna.OptunaSearch(), repeat=repeat) + else: + assert repeat == 1 # repeat should not be used if search_alg is None + algo = None if sacred_ex_name == "train_rl": return_key = "monitor_return_mean" @@ -166,7 +161,6 @@ def parallel( result.trials = None result.fetch_trial_dataframes() else: - # run hyperparameter tuning result = ray.tune.run( trainable, config=search_space, @@ -178,68 +172,11 @@ def parallel( upload_dir=upload_dir, syncer=syncer, ), - search_alg=search_alg, + search_alg=algo, metric=return_key, mode="max", ) - if eval_best_trial: - df = result.results_df - df = df[df["config/named_configs"].notna()] - # convert object dtype to str required by df.groupby - for col in df.columns: - if is_object_dtype(df[col]): - df[col] = df[col].astype("str") - # group into separate HP configs - grp_keys = [ - c for c in df.columns if c.startswith("config") and "seed" not in c - ] - grps = df.groupby(grp_keys) - # store mean return of runs across all seeds in a group - df["mean_return"] = grps[return_key].transform(lambda x: x.mean()) - best_config_df = df[df["mean_return"] == df["mean_return"].max()] - row = best_config_df.iloc[0] - best_config_tag = row["experiment_tag"] - if result.trials is not None: - trial = [ - t for t in result.trials if best_config_tag in t.experiment_tag - ][0] - best_config = trial.config - print("Mean return:", row["mean_return"]) - print( - "All returns:", - df[df["mean_return"] == row["mean_return"]][return_key], - ) - print("Total seeds:", (df["mean_return"] == row["mean_return"]).sum()) - best_config["config_updates"].update( - seed=ray.tune.grid_search(list(range(100, 100 + eval_trial_seeds))), - ) - - resources_per_trial_eval = copy.deepcopy(resources_per_trial) - # update cpus per trial only if it is provided in `resources_per_trial` - # Uses the default values (cpu=1) if it is not provided - if "cpu" in resources_per_trial: - - resources_per_trial_eval[ - "cpu" - ] *= eval_best_trial_resource_multiplier - best_config["config_updates"].update( - environment=dict(num_vec=resources_per_trial_eval["cpu"]), - ) - - eval_result = ray.tune.run( - trainable, - config={ - "named_configs": best_config["named_configs"], - "config_updates": best_config["config_updates"], - "command_name": best_config.get("command_name", None), - }, - name=run_name + "_best_hp_eval", - resources_per_trial=resources_per_trial_eval, - ) - returns = eval_result.results_df[return_key].to_numpy() - print("All returns:", returns) - print("Mean:", np.mean(returns)) - print("Std:", np.std(returns)) + return result finally: ray.shutdown() From 64c3a8d0deb8748eba2a69be20d7f9a464639523 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Tue, 11 Jul 2023 16:07:13 +0530 Subject: [PATCH 19/54] Fix docstring --- src/imitation/scripts/parallel.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py index 2417414cb..10ae9f924 100644 --- a/src/imitation/scripts/parallel.py +++ b/src/imitation/scripts/parallel.py @@ -77,6 +77,10 @@ def parallel( search_alg: can be either "optuna" or None. repeat: Number of runs to repeat each trial for. Not used if `search_alg` is None. + experiment_checkpoint_path: Path containing the checkpoints of a previous + experiment ran using this script. Useful for resuming cancelled trials + of the experiments (using `resume`) or evaluating the best trial of the + experiment (using `eval_best_trial`). resume: If true and `experiment_checkpoint_path` is given, then resumes the experiment by restarting the trials that did not finish in the experiment checkpoint path. From 8fba0d3ac9b690613b7526b68bd1c68b3ac6efa7 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Tue, 11 Jul 2023 17:42:08 +0530 Subject: [PATCH 20/54] Removing resume option as it is getting tricky to correctly implement --- src/imitation/scripts/config/parallel.py | 5 +--- src/imitation/scripts/parallel.py | 31 ++---------------------- tests/scripts/test_scripts.py | 1 + 3 files changed, 4 insertions(+), 33 deletions(-) diff --git a/src/imitation/scripts/config/parallel.py b/src/imitation/scripts/config/parallel.py index e9c5b8245..3416f9442 100644 --- a/src/imitation/scripts/config/parallel.py +++ b/src/imitation/scripts/config/parallel.py @@ -39,14 +39,11 @@ def config(): local_dir = None # `local_dir` arg for `ray.tune.run` upload_dir = None # `upload_dir` arg for `ray.tune.run` experiment_checkpoint_path = "" - eval_best_trial = False - eval_trial_seeds = 5 # Number of seeds to search over by default num_samples = 1 # Number of samples per grid search configuration repeat = 1 search_alg = "optuna" # search algorithm to use - experiment_checkpoint_path = "" # Path to checkpoint of experiment to resume + experiment_checkpoint_path = "" # Path to checkpoint of experiment syncer = None # Sacred syncer to use - resume = False # Whether to resume experiment from checkpoint # Debug named configs diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py index 10ae9f924..bf73c1c72 100644 --- a/src/imitation/scripts/parallel.py +++ b/src/imitation/scripts/parallel.py @@ -33,7 +33,6 @@ def parallel( search_alg: Optional[str], experiment_checkpoint_path: str, syncer, - resume: Union[str, bool], ) -> ray.tune.ExperimentAnalysis: """Parallelize multiple runs of another Sacred Experiment using Ray Tune. @@ -78,12 +77,8 @@ def parallel( repeat: Number of runs to repeat each trial for. Not used if `search_alg` is None. experiment_checkpoint_path: Path containing the checkpoints of a previous - experiment ran using this script. Useful for resuming cancelled trials - of the experiments (using `resume`) or evaluating the best trial of the - experiment (using `eval_best_trial`). - resume: If true and `experiment_checkpoint_path` is given, then resumes the - experiment by restarting the trials that did not finish in the experiment - checkpoint path. + experiment ran using this script. Useful for evaluating the best trial + of the experiment. syncer: `syncer` argument to `ray.tune.syncer.SyncConfig`. Raises: @@ -134,28 +129,6 @@ def parallel( try: if experiment_checkpoint_path: - if resume: - # restart failed runs from experiment_checkpoint_path - register_trainable("inner", trainable) - runner = ray.tune.execution.trial_runner.TrialRunner( - local_checkpoint_dir=experiment_checkpoint_path, - sync_config=ray.tune.syncer.SyncConfig( - upload_dir=upload_dir, - syncer=syncer, - ), - metric=return_key, - resume=True, - ) - print( - "Live trials:", - len(runner._live_trials), - "/", - len(runner._trials), - ) - while not runner.is_finished(): - runner.step() - print("Debug:", runner.debug_string()) - # load experiment analysis results result = ray.tune.ExperimentAnalysis(experiment_checkpoint_path) result._load_checkpoints_from_latest( diff --git a/tests/scripts/test_scripts.py b/tests/scripts/test_scripts.py index 4435155cd..586fa91ba 100644 --- a/tests/scripts/test_scripts.py +++ b/tests/scripts/test_scripts.py @@ -802,6 +802,7 @@ def test_train_rl_cnn_policy(tmpdir: str, rng): # Need absolute path because raylet runs in different working directory. "demonstrations.path": CARTPOLE_TEST_ROLLOUT_PATH.absolute(), }, + search_alg=None, # Use default search algorithm of ray. search_space={ "command_name": "airl", "config_updates": {"total_timesteps": tune.choice([5, 10])}, From 12ab31c1641b6b99abb6823cf037a3f9340cb86c Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Wed, 12 Jul 2023 04:26:17 +0530 Subject: [PATCH 21/54] Minor fixes --- src/imitation/scripts/config/analyze.py | 2 +- src/imitation/scripts/config/parallel.py | 2 +- src/imitation/scripts/parallel.py | 5 ++--- tests/scripts/test_scripts.py | 7 ++++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/imitation/scripts/config/analyze.py b/src/imitation/scripts/config/analyze.py index 5213a875d..01cc2d035 100644 --- a/src/imitation/scripts/config/analyze.py +++ b/src/imitation/scripts/config/analyze.py @@ -18,7 +18,7 @@ def config(): tex_output_path = None # Write LaTex output to this path print_table = True # Set to True to print analysis to stdout split_str = "," # str used to split source_dir_str into multiple source dirs - table_verbosity = 1 # Choose from 0, 1, or 2 + table_verbosity = 1 # Choose from 0, 1, 2 or 3 source_dirs = None diff --git a/src/imitation/scripts/config/parallel.py b/src/imitation/scripts/config/parallel.py index 3416f9442..b09f9fc4a 100644 --- a/src/imitation/scripts/config/parallel.py +++ b/src/imitation/scripts/config/parallel.py @@ -42,7 +42,7 @@ def config(): num_samples = 1 # Number of samples per grid search configuration repeat = 1 search_alg = "optuna" # search algorithm to use - experiment_checkpoint_path = "" # Path to checkpoint of experiment + experiment_checkpoint_path = "" # Path to checkpoint of experiment syncer = None # Sacred syncer to use diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py index bf73c1c72..ebda17c82 100644 --- a/src/imitation/scripts/parallel.py +++ b/src/imitation/scripts/parallel.py @@ -4,13 +4,12 @@ import copy import glob import pathlib -from typing import Any, Callable, Dict, Mapping, Optional, Sequence, Union +from typing import Any, Callable, Dict, Mapping, Optional, Sequence import ray import ray.tune import sacred from ray.tune import search -from ray.tune.registry import register_trainable from ray.tune.search import optuna from sacred.observers import FileStorageObserver @@ -78,7 +77,7 @@ def parallel( Not used if `search_alg` is None. experiment_checkpoint_path: Path containing the checkpoints of a previous experiment ran using this script. Useful for evaluating the best trial - of the experiment. + of the experiment. syncer: `syncer` argument to `ray.tune.syncer.SyncConfig`. Raises: diff --git a/tests/scripts/test_scripts.py b/tests/scripts/test_scripts.py index 586fa91ba..e17765471 100644 --- a/tests/scripts/test_scripts.py +++ b/tests/scripts/test_scripts.py @@ -802,7 +802,7 @@ def test_train_rl_cnn_policy(tmpdir: str, rng): # Need absolute path because raylet runs in different working directory. "demonstrations.path": CARTPOLE_TEST_ROLLOUT_PATH.absolute(), }, - search_alg=None, # Use default search algorithm of ray. + search_alg=None, # Use default search algorithm of ray. search_space={ "command_name": "airl", "config_updates": {"total_timesteps": tune.choice([5, 10])}, @@ -942,7 +942,7 @@ def test_analyze_imitation(tmpdir: str, run_names: List[str], run_sacred_fn): assert run.status == "COMPLETED" # Check that analyze script finds the correct number of logs. - def check(run_name: Optional[str], count: int) -> None: + def check(run_name: Optional[str], count: int, table_verbosity=1) -> None: run = analyze.analysis_ex.run( command_name="analyze_imitation", config_updates=dict( @@ -952,6 +952,7 @@ def check(run_name: Optional[str], count: int) -> None: csv_output_path=tmpdir_path / "analysis.csv", tex_output_path=tmpdir_path / "analysis.tex", print_table=True, + table_verbosity=table_verbosity, ), ) assert run.status == "COMPLETED" @@ -961,7 +962,7 @@ def check(run_name: Optional[str], count: int) -> None: for run_name, count in Counter(run_names).items(): check(run_name, count) - check(None, len(run_names)) # Check total number of logs. + check(None, len(run_names), table_verbosity=3) # Check total number of logs. def test_analyze_gather_tb(tmpdir: str): From 19b0f2c3ed8d7d2ef10aaabab21739d31b51261c Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Sun, 16 Jul 2023 10:39:12 +0530 Subject: [PATCH 22/54] Updates from review --- benchmarking/tuning.py | 202 +++++++++++++++-------- benchmarking/tuning_config.py | 36 ++-- src/imitation/scripts/config/parallel.py | 3 +- src/imitation/scripts/parallel.py | 9 +- tests/test_benchmarking.py | 27 +++ 5 files changed, 180 insertions(+), 97 deletions(-) diff --git a/benchmarking/tuning.py b/benchmarking/tuning.py index b4e62a84a..0c18b1256 100644 --- a/benchmarking/tuning.py +++ b/benchmarking/tuning.py @@ -13,82 +13,144 @@ @tuning_ex.main def tune( - parallel: Dict[str, Any], - eval_best_trial: bool = False, + parallel_run_config: Dict[str, Any], eval_best_trial_resource_multiplier: int = 1, - eval_trial_seeds: int = 5, + num_eval_seeds: int = 5, ) -> None: """Tune hyperparameters of imitation algorithms using parallel script. Args: - parallel: A dictionary of arguments from the parallel script. - eval_best_trial: Whether to evaluate the trial with the best mean return - at the end of tuning on a separate set of seeds. - eval_best_trial_resource_multiplier: factor by which to multiply the - number of cpus per trial in `resources_per_trial`. - eval_trial_seeds: Number of distinct seeds to evaluate the best trial on. + parallel_run_config: Dictionary of arguments to pass to the parallel script. + eval_best_trial_resource_multiplier: Factor by which to multiply the + number of cpus per trial in `resources_per_trial`. This is useful for + allocating more resources per trial to the evaluation trials than the + resources for hyperparameter tuning since number of evaluation trials + is usually much smaller than the number of tuning trials. + num_eval_seeds: Number of distinct seeds to evaluate the best trial on. + Set to 0 to disable evaluation. + + Raises: + ValueError: If no trials are returned by. + """ + run = parallel_ex.run(config_updates=parallel_run_config) + experiment_analysis = run.result + if not experiment_analysis.trials: + raise ValueError( + "No trials found. Please ensure that the `experiment_checkpoint_path` " + "in `parallel_run_config` is passed correctly " + "or that the tuning run finished properly.", + ) + + return_key = "imit_stats/monitor_return_mean" + if parallel_run_config["sacred_ex_name"] == "train_rl": + return_key = "monitor_return_mean" + best_trial = find_best_trial(experiment_analysis, return_key, print_return=True) + + if num_eval_seeds > 0: # evaluate the best trial + resources_per_trial_eval = copy.deepcopy( + parallel_run_config["resources_per_trial"], + ) + # update cpus per trial only if it is provided in `resources_per_trial` + # Uses the default values (cpu=1) if it is not provided + if "cpu" in parallel_run_config["resources_per_trial"]: + resources_per_trial_eval["cpu"] *= eval_best_trial_resource_multiplier + evaluate_best_trial( + best_trial, + num_eval_seeds, + parallel_run_config, + resources_per_trial_eval, + return_key, + ) + + +def find_best_trial( + experiment_analysis: ray.tune.analysis.ExperimentAnalysis, + return_key: str, + print_return: bool = False, +) -> ray.tune.experiment.Trial: + """Find the trial with the best mean return across all seeds. + + Args: + experiment_analysis: The result of a parallel/tuning experiment. + return_key: The key of the return metric in the results dataframe. + print_return: Whether to print the mean and std of the returns + of the best trial. + + Returns: + best_trial: The trial with the best mean return across all seeds. + """ + df = experiment_analysis.results_df + # convert object dtype to str required by df.groupby + for col in df.columns: + if pd_types.is_object_dtype(df[col]): + df[col] = df[col].astype("str") + # group into separate HP configs + grp_keys = [c for c in df.columns if c.startswith("config") and "seed" not in c] + grps = df.groupby(grp_keys) + # store mean return of runs across all seeds in a group + df["mean_return"] = grps[return_key].transform(lambda x: x.mean()) + best_config_df = df[df["mean_return"] == df["mean_return"].max()] + row = best_config_df.iloc[0] + best_config_tag = row["experiment_tag"] + assert experiment_analysis.trials is not None # for mypy + best_trial = [ + t for t in experiment_analysis.trials if best_config_tag in t.experiment_tag + ][0] + + if print_return: + all_returns = df[df["mean_return"] == row["mean_return"]][return_key] + all_returns = all_returns.to_numpy() + print("All returns:", all_returns) + print("Mean return:", row["mean_return"]) + print("Std return:", np.std(all_returns)) + print("Total seeds:", len(all_returns)) + return best_trial + + +def evaluate_best_trial( + best_trial: ray.tune.experiment.Trial, + num_eval_seeds: int, + parallel_run_config: Dict[str, Any], + resources_per_trial: Dict[str, int], + return_key: str, + print_return: bool = False, +): + """Evaluate the best trial of a parallel run on a separate set of seeds. + + Args: + best_trial: The trial with the best mean return across all seeds. + num_eval_seeds: Number of distinct seeds to evaluate the best trial on. + parallel_run_config: Dictionary of arguments passed to the parallel + script to get best_trial. + resources_per_trial: Resources to be used for each evaluation trial. + return_key: The key of the return metric in the results dataframe. + print_return: Whether to print the mean and std of the evaluation returns. + + Returns: + eval_run: The result of the evaluation run. """ - run = parallel_ex.run(config_updates=parallel) - result = run.result - - if eval_best_trial: - if parallel["sacred_ex_name"] == "train_rl": - return_key = "monitor_return_mean" - else: - return_key = "imit_stats/monitor_return_mean" - df = result.results_df - df = df[df["config/named_configs"].notna()] - # convert object dtype to str required by df.groupby - for col in df.columns: - if pd_types.is_object_dtype(df[col]): - df[col] = df[col].astype("str") - # group into separate HP configs - grp_keys = [c for c in df.columns if c.startswith("config") and "seed" not in c] - grps = df.groupby(grp_keys) - # store mean return of runs across all seeds in a group - df["mean_return"] = grps[return_key].transform(lambda x: x.mean()) - best_config_df = df[df["mean_return"] == df["mean_return"].max()] - row = best_config_df.iloc[0] - best_config_tag = row["experiment_tag"] - if result.trials is not None: - trial = [t for t in result.trials if best_config_tag in t.experiment_tag][0] - best_config = trial.config - print("Mean return:", row["mean_return"]) - print( - "All returns:", - df[df["mean_return"] == row["mean_return"]][return_key], - ) - print("Total seeds:", (df["mean_return"] == row["mean_return"]).sum()) - - best_config["config_updates"].update( - seed=ray.tune.grid_search(list(range(100, 100 + eval_trial_seeds))), - ) - - resources_per_trial_eval = copy.deepcopy(parallel["resources_per_trial"]) - # update cpus per trial only if it is provided in `resources_per_trial` - # Uses the default values (cpu=1) if it is not provided - if "cpu" in parallel["resources_per_trial"]: - resources_per_trial_eval["cpu"] *= eval_best_trial_resource_multiplier - - eval_config_updates = parallel.copy() - eval_config_updates.update( - run_name=parallel["run_name"] + "_best_hp_eval", - num_samples=1, - search_space=best_config, - base_named_configs=parallel["base_named_configs"], - base_config_updates=parallel["base_config_updates"], - resources_per_trial=resources_per_trial_eval, - search_alg=None, - repeat=1, - experiment_checkpoint_path="", - resume=False, - ) - eval_run = parallel_ex.run(config_updates=eval_config_updates) - eval_result = eval_run.result - returns = eval_result.results_df[return_key].to_numpy() - print("All returns:", returns) - print("Mean:", np.mean(returns)) - print("Std:", np.std(returns)) + best_config = best_trial.config + best_config["config_updates"].update( + seed=ray.tune.grid_search(list(range(100, 100 + num_eval_seeds))), + ) + eval_config_updates = parallel_run_config.copy() + eval_config_updates.update( + run_name=parallel_run_config["run_name"] + "_best_hp_eval", + num_samples=1, + search_space=best_config, + resources_per_trial=resources_per_trial, + search_alg=None, + repeat=1, + experiment_checkpoint_path="", + ) + eval_run = parallel_ex.run(config_updates=eval_config_updates) + eval_result = eval_run.result + returns = eval_result.results_df[return_key].to_numpy() + if print_return: + print("All returns:", returns) + print("Mean:", np.mean(returns)) + print("Std:", np.std(returns)) + return eval_run def main_console(): diff --git a/benchmarking/tuning_config.py b/benchmarking/tuning_config.py index 79c8d0347..187963d02 100644 --- a/benchmarking/tuning_config.py +++ b/benchmarking/tuning_config.py @@ -12,7 +12,7 @@ @tuning_ex.named_config def example_rl(): - parallel = dict( + parallel_run_config = dict( sacred_ex_name="train_rl", run_name="rl_tuning", base_named_configs=["logging.wandb_logging"], @@ -33,13 +33,12 @@ def example_rl(): repeat=1, resources_per_trial=dict(cpu=1), ) - eval_best_trial = True - eval_trial_seeds = 5 + num_eval_seeds = 5 @tuning_ex.named_config def example_bc(): - parallel = dict( + parallel_run_config = dict( sacred_ex_name="train_imitation", run_name="bc_tuning", base_named_configs=["logging.wandb_logging"], @@ -62,19 +61,18 @@ def example_bc(): }, "command_name": "bc", }, - num_samples=2, - repeat=1, + num_samples=64, + repeat=3, resources_per_trial=dict(cpu=1), ) - eval_best_trial = True - eval_trial_seeds = 5 + num_eval_seeds = 5 eval_best_trial_resource_multiplier = 1 @tuning_ex.named_config def example_dagger(): - parallel = dict( + parallel_run_config = dict( sacred_ex_name="train_imitation", run_name="dagger_tuning", base_named_configs=["logging.wandb_logging"], @@ -109,13 +107,12 @@ def example_dagger(): repeat=3, resources_per_trial=dict(cpu=1), ) - eval_best_trial = True - eval_trial_seeds = 5 + num_eval_seeds = 5 @tuning_ex.named_config def example_gail(): - parallel = dict( + parallel_run_config = dict( sacred_ex_name="train_adversarial", run_name="gail_tuning_hc", base_named_configs=["logging.wandb_logging"], @@ -145,13 +142,12 @@ def example_gail(): repeat=3, resources_per_trial=dict(cpu=1), ) - eval_best_trial = True - eval_trial_seeds = 5 + num_eval_seeds = 5 @tuning_ex.named_config def example_airl(): - parallel = dict( + parallel_run_config = dict( sacred_ex_name="train_adversarial", run_name="airl_tuning", base_named_configs=["logging.wandb_logging"], @@ -181,14 +177,12 @@ def example_airl(): repeat=3, resources_per_trial=dict(cpu=1), ) - - eval_best_trial = True - eval_trial_seeds = 5 + num_eval_seeds = 5 @tuning_ex.named_config def example_pc(): - parallel = dict( + parallel_run_config = dict( sacred_ex_name="train_preference_comparisons", run_name="pc_tuning", base_named_configs=["logging.wandb_logging"], @@ -232,6 +226,4 @@ def example_pc(): repeat=3, resources_per_trial=dict(cpu=1), ) - - eval_best_trial = True - eval_trial_seeds = 5 + num_eval_seeds = 5 diff --git a/src/imitation/scripts/config/parallel.py b/src/imitation/scripts/config/parallel.py index b09f9fc4a..b38b6f28c 100644 --- a/src/imitation/scripts/config/parallel.py +++ b/src/imitation/scripts/config/parallel.py @@ -38,9 +38,8 @@ def config(): local_dir = None # `local_dir` arg for `ray.tune.run` upload_dir = None # `upload_dir` arg for `ray.tune.run` - experiment_checkpoint_path = "" num_samples = 1 # Number of samples per grid search configuration - repeat = 1 + repeat = 1 # Number of times to repeat a sampled configuration search_alg = "optuna" # search algorithm to use experiment_checkpoint_path = "" # Path to checkpoint of experiment syncer = None # Sacred syncer to use diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py index ebda17c82..93aa932b9 100644 --- a/src/imitation/scripts/parallel.py +++ b/src/imitation/scripts/parallel.py @@ -72,11 +72,13 @@ def parallel( init_kwargs: Arguments to pass to `ray.init`. local_dir: `local_dir` argument to `ray.tune.run()`. upload_dir: `upload_dir` argument to `ray.tune.run()`. - search_alg: can be either "optuna" or None. + search_alg: can be either "optuna" or None. Setting `None` allows for + adding grid_search to the `search_space` hyperparameters but doesn't allow + for trials to be repeated. repeat: Number of runs to repeat each trial for. Not used if `search_alg` is None. experiment_checkpoint_path: Path containing the checkpoints of a previous - experiment ran using this script. Useful for evaluating the best trial + experiment ran using this script. Useful for evaluating the best trial of the experiment. syncer: `syncer` argument to `ray.tune.syncer.SyncConfig`. @@ -84,7 +86,8 @@ def parallel( TypeError: Named configs not string sequences or config updates not mappings. Returns: - The result of `ray.tune.run()`. + The result of running the parallel experiment with `ray.tune.run()`. + Useful for fetching the configs and results dataframe of all the trials. """ # Basic validation for config options before we enter parallel jobs. if not isinstance(base_named_configs, collections.abc.Sequence): diff --git a/tests/test_benchmarking.py b/tests/test_benchmarking.py index ba01b38a2..4a8f6ea6f 100644 --- a/tests/test_benchmarking.py +++ b/tests/test_benchmarking.py @@ -1,5 +1,7 @@ """Tests for config files in benchmarking/ folder.""" import pathlib +import subprocess +import sys import pytest @@ -44,3 +46,28 @@ def test_benchmarks_print_config_succeeds(algorithm: str, environment: str): # THEN assert run.status == "COMPLETED" + + +@pytest.mark.parametrize("algorithm", ALGORITHMS) +def test_tuning_print_config_succeeds(algorithm: str): + # We test the configs using the print_config command, + # because running the configs requires MuJoCo. + # Requiring MuJoCo to run the tests adds too much complexity. + + # We need to use sys.executable, not just "python", on Windows as + # subprocess.call ignores PATH (unless shell=True) so runs a + # system-wide Python interpreter outside of our venv. See: + # https://stackoverflow.com/questions/5658622/ + tuning_path = str(BENCHMARKING_DIR / "tuning.py") + env = 'parallel_run_config.base_named_configs=["seals_cartpole"]' + exit_code = subprocess.call( + [ + sys.executable, + tuning_path, + "print_config", + "with", + f"example_{algorithm}", + env, + ], + ) + assert exit_code == 0 From 046b8d9987e13a8d87f2bd52fe75be562e80db04 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Sun, 16 Jul 2023 13:04:14 +0530 Subject: [PATCH 23/54] fix lint error --- src/imitation/scripts/config/parallel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/imitation/scripts/config/parallel.py b/src/imitation/scripts/config/parallel.py index b38b6f28c..e81a617db 100644 --- a/src/imitation/scripts/config/parallel.py +++ b/src/imitation/scripts/config/parallel.py @@ -39,7 +39,7 @@ def config(): local_dir = None # `local_dir` arg for `ray.tune.run` upload_dir = None # `upload_dir` arg for `ray.tune.run` num_samples = 1 # Number of samples per grid search configuration - repeat = 1 # Number of times to repeat a sampled configuration + repeat = 1 # Number of times to repeat a sampled configuration search_alg = "optuna" # search algorithm to use experiment_checkpoint_path = "" # Path to checkpoint of experiment syncer = None # Sacred syncer to use From 8eee0822d3fb4686d5801a6e955fdde0c9a90ce7 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Sun, 16 Jul 2023 13:52:43 +0530 Subject: [PATCH 24/54] Add documentation for using the tuning script --- benchmarking/README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/benchmarking/README.md b/benchmarking/README.md index 3f5114545..95e67f1d3 100644 --- a/benchmarking/README.md +++ b/benchmarking/README.md @@ -17,3 +17,24 @@ python -m imitation.scripts. with benchmarking/.json') ``` + +# Tuning Hyperparameters + +The hyperparameters of any algorithm in imitation can be tuned using the `tuning.py` script. +The benchmarking hyperparameter configs were generated by tuning the hyperparameters using +the search space defined in the `tuning_config.py` script. The tuning script proceeds in two +phases: 1) The hyperparameters are tuned using the search space provided, and 2) the best +hyperparameter config found in the first phase based on the maximum mean return is +re-evaluated on a separate set of seeds, and the mean and standard deviation of these trials +are reported. + +To tune the hyperparameters of an algorithm using the default search space provided: +```bash +python tuning.py with example_{algo} 'parallel_run_config.base_named_configs=["{env}"]' +``` + +In this command, `example_{algo}` provides the default search space and settings to be used for +the specific algorithm, which is defined in the `tuning_config.py` script and +`'parallel_run_config.base_named_configs=["{env}"]'` sets the environment to tune the algorithm in. +See the documentation of `tuning.py` and `parallel.py` scripts for many other arguments that can be +provided through the command line to change the tuning behavior. From 5ce765859f7cd295ae607cab2709d0f626c65de7 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Mon, 17 Jul 2023 09:08:04 +0530 Subject: [PATCH 25/54] Fix lint error --- benchmarking/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarking/README.md b/benchmarking/README.md index 95e67f1d3..892908ac8 100644 --- a/benchmarking/README.md +++ b/benchmarking/README.md @@ -23,7 +23,7 @@ ex.add_config('benchmarking/.json') The hyperparameters of any algorithm in imitation can be tuned using the `tuning.py` script. The benchmarking hyperparameter configs were generated by tuning the hyperparameters using the search space defined in the `tuning_config.py` script. The tuning script proceeds in two -phases: 1) The hyperparameters are tuned using the search space provided, and 2) the best +phases: 1) The hyperparameters are tuned using the search space provided, and 2) the best hyperparameter config found in the first phase based on the maximum mean return is re-evaluated on a separate set of seeds, and the mean and standard deviation of these trials are reported. From a8be3316b653451ce8366379cf413627dd22e1ec Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Tue, 18 Jul 2023 11:09:05 +0530 Subject: [PATCH 26/54] Updates from the review --- benchmarking/README.md | 4 ++-- ....json => airl_seals_ant_best_hp_eval.json} | 0 ...airl_seals_half_cheetah_best_hp_eval.json} | 0 ...on => airl_seals_hopper_best_hp_eval.json} | 0 ...n => airl_seals_swimmer_best_hp_eval.json} | 0 ...on => airl_seals_walker_best_hp_eval.json} | 0 ...al.json => bc_seals_ant_best_hp_eval.json} | 0 ...> bc_seals_half_cheetah_best_hp_eval.json} | 0 ...json => bc_seals_hopper_best_hp_eval.json} | 0 ...son => bc_seals_swimmer_best_hp_eval.json} | 0 ...json => bc_seals_walker_best_hp_eval.json} | 0 ...son => dagger_seals_ant_best_hp_eval.json} | 0 ...gger_seals_half_cheetah_best_hp_eval.json} | 0 ... => dagger_seals_hopper_best_hp_eval.json} | 0 ...=> dagger_seals_swimmer_best_hp_eval.json} | 0 ... => dagger_seals_walker_best_hp_eval.json} | 0 ....json => gail_seals_ant_best_hp_eval.json} | 0 ...gail_seals_half_cheetah_best_hp_eval.json} | 0 ...on => gail_seals_hopper_best_hp_eval.json} | 0 ...n => gail_seals_swimmer_best_hp_eval.json} | 0 ...on => gail_seals_walker_best_hp_eval.json} | 0 benchmarking/tuning.py | 23 +++++++++++-------- benchmarking/tuning_config.py | 21 +++++++++-------- benchmarking/util.py | 2 +- experiments/commands.py | 18 +++++++-------- src/imitation/scripts/config/parallel.py | 6 ++--- tests/test_benchmarking.py | 4 ++-- tests/test_experiments.py | 16 ++++++------- 28 files changed, 49 insertions(+), 45 deletions(-) rename benchmarking/{example_airl_seals_ant_best_hp_eval.json => airl_seals_ant_best_hp_eval.json} (100%) rename benchmarking/{example_airl_seals_half_cheetah_best_hp_eval.json => airl_seals_half_cheetah_best_hp_eval.json} (100%) rename benchmarking/{example_airl_seals_hopper_best_hp_eval.json => airl_seals_hopper_best_hp_eval.json} (100%) rename benchmarking/{example_airl_seals_swimmer_best_hp_eval.json => airl_seals_swimmer_best_hp_eval.json} (100%) rename benchmarking/{example_airl_seals_walker_best_hp_eval.json => airl_seals_walker_best_hp_eval.json} (100%) rename benchmarking/{example_bc_seals_ant_best_hp_eval.json => bc_seals_ant_best_hp_eval.json} (100%) rename benchmarking/{example_bc_seals_half_cheetah_best_hp_eval.json => bc_seals_half_cheetah_best_hp_eval.json} (100%) rename benchmarking/{example_bc_seals_hopper_best_hp_eval.json => bc_seals_hopper_best_hp_eval.json} (100%) rename benchmarking/{example_bc_seals_swimmer_best_hp_eval.json => bc_seals_swimmer_best_hp_eval.json} (100%) rename benchmarking/{example_bc_seals_walker_best_hp_eval.json => bc_seals_walker_best_hp_eval.json} (100%) rename benchmarking/{example_dagger_seals_ant_best_hp_eval.json => dagger_seals_ant_best_hp_eval.json} (100%) rename benchmarking/{example_dagger_seals_half_cheetah_best_hp_eval.json => dagger_seals_half_cheetah_best_hp_eval.json} (100%) rename benchmarking/{example_dagger_seals_hopper_best_hp_eval.json => dagger_seals_hopper_best_hp_eval.json} (100%) rename benchmarking/{example_dagger_seals_swimmer_best_hp_eval.json => dagger_seals_swimmer_best_hp_eval.json} (100%) rename benchmarking/{example_dagger_seals_walker_best_hp_eval.json => dagger_seals_walker_best_hp_eval.json} (100%) rename benchmarking/{example_gail_seals_ant_best_hp_eval.json => gail_seals_ant_best_hp_eval.json} (100%) rename benchmarking/{example_gail_seals_half_cheetah_best_hp_eval.json => gail_seals_half_cheetah_best_hp_eval.json} (100%) rename benchmarking/{example_gail_seals_hopper_best_hp_eval.json => gail_seals_hopper_best_hp_eval.json} (100%) rename benchmarking/{example_gail_seals_swimmer_best_hp_eval.json => gail_seals_swimmer_best_hp_eval.json} (100%) rename benchmarking/{example_gail_seals_walker_best_hp_eval.json => gail_seals_walker_best_hp_eval.json} (100%) diff --git a/benchmarking/README.md b/benchmarking/README.md index 892908ac8..3973c6181 100644 --- a/benchmarking/README.md +++ b/benchmarking/README.md @@ -30,10 +30,10 @@ are reported. To tune the hyperparameters of an algorithm using the default search space provided: ```bash -python tuning.py with example_{algo} 'parallel_run_config.base_named_configs=["{env}"]' +python tuning.py with {algo} 'parallel_run_config.base_named_configs=["{env}"]' ``` -In this command, `example_{algo}` provides the default search space and settings to be used for +In this command, `{algo}` provides the default search space and settings to be used for the specific algorithm, which is defined in the `tuning_config.py` script and `'parallel_run_config.base_named_configs=["{env}"]'` sets the environment to tune the algorithm in. See the documentation of `tuning.py` and `parallel.py` scripts for many other arguments that can be diff --git a/benchmarking/example_airl_seals_ant_best_hp_eval.json b/benchmarking/airl_seals_ant_best_hp_eval.json similarity index 100% rename from benchmarking/example_airl_seals_ant_best_hp_eval.json rename to benchmarking/airl_seals_ant_best_hp_eval.json diff --git a/benchmarking/example_airl_seals_half_cheetah_best_hp_eval.json b/benchmarking/airl_seals_half_cheetah_best_hp_eval.json similarity index 100% rename from benchmarking/example_airl_seals_half_cheetah_best_hp_eval.json rename to benchmarking/airl_seals_half_cheetah_best_hp_eval.json diff --git a/benchmarking/example_airl_seals_hopper_best_hp_eval.json b/benchmarking/airl_seals_hopper_best_hp_eval.json similarity index 100% rename from benchmarking/example_airl_seals_hopper_best_hp_eval.json rename to benchmarking/airl_seals_hopper_best_hp_eval.json diff --git a/benchmarking/example_airl_seals_swimmer_best_hp_eval.json b/benchmarking/airl_seals_swimmer_best_hp_eval.json similarity index 100% rename from benchmarking/example_airl_seals_swimmer_best_hp_eval.json rename to benchmarking/airl_seals_swimmer_best_hp_eval.json diff --git a/benchmarking/example_airl_seals_walker_best_hp_eval.json b/benchmarking/airl_seals_walker_best_hp_eval.json similarity index 100% rename from benchmarking/example_airl_seals_walker_best_hp_eval.json rename to benchmarking/airl_seals_walker_best_hp_eval.json diff --git a/benchmarking/example_bc_seals_ant_best_hp_eval.json b/benchmarking/bc_seals_ant_best_hp_eval.json similarity index 100% rename from benchmarking/example_bc_seals_ant_best_hp_eval.json rename to benchmarking/bc_seals_ant_best_hp_eval.json diff --git a/benchmarking/example_bc_seals_half_cheetah_best_hp_eval.json b/benchmarking/bc_seals_half_cheetah_best_hp_eval.json similarity index 100% rename from benchmarking/example_bc_seals_half_cheetah_best_hp_eval.json rename to benchmarking/bc_seals_half_cheetah_best_hp_eval.json diff --git a/benchmarking/example_bc_seals_hopper_best_hp_eval.json b/benchmarking/bc_seals_hopper_best_hp_eval.json similarity index 100% rename from benchmarking/example_bc_seals_hopper_best_hp_eval.json rename to benchmarking/bc_seals_hopper_best_hp_eval.json diff --git a/benchmarking/example_bc_seals_swimmer_best_hp_eval.json b/benchmarking/bc_seals_swimmer_best_hp_eval.json similarity index 100% rename from benchmarking/example_bc_seals_swimmer_best_hp_eval.json rename to benchmarking/bc_seals_swimmer_best_hp_eval.json diff --git a/benchmarking/example_bc_seals_walker_best_hp_eval.json b/benchmarking/bc_seals_walker_best_hp_eval.json similarity index 100% rename from benchmarking/example_bc_seals_walker_best_hp_eval.json rename to benchmarking/bc_seals_walker_best_hp_eval.json diff --git a/benchmarking/example_dagger_seals_ant_best_hp_eval.json b/benchmarking/dagger_seals_ant_best_hp_eval.json similarity index 100% rename from benchmarking/example_dagger_seals_ant_best_hp_eval.json rename to benchmarking/dagger_seals_ant_best_hp_eval.json diff --git a/benchmarking/example_dagger_seals_half_cheetah_best_hp_eval.json b/benchmarking/dagger_seals_half_cheetah_best_hp_eval.json similarity index 100% rename from benchmarking/example_dagger_seals_half_cheetah_best_hp_eval.json rename to benchmarking/dagger_seals_half_cheetah_best_hp_eval.json diff --git a/benchmarking/example_dagger_seals_hopper_best_hp_eval.json b/benchmarking/dagger_seals_hopper_best_hp_eval.json similarity index 100% rename from benchmarking/example_dagger_seals_hopper_best_hp_eval.json rename to benchmarking/dagger_seals_hopper_best_hp_eval.json diff --git a/benchmarking/example_dagger_seals_swimmer_best_hp_eval.json b/benchmarking/dagger_seals_swimmer_best_hp_eval.json similarity index 100% rename from benchmarking/example_dagger_seals_swimmer_best_hp_eval.json rename to benchmarking/dagger_seals_swimmer_best_hp_eval.json diff --git a/benchmarking/example_dagger_seals_walker_best_hp_eval.json b/benchmarking/dagger_seals_walker_best_hp_eval.json similarity index 100% rename from benchmarking/example_dagger_seals_walker_best_hp_eval.json rename to benchmarking/dagger_seals_walker_best_hp_eval.json diff --git a/benchmarking/example_gail_seals_ant_best_hp_eval.json b/benchmarking/gail_seals_ant_best_hp_eval.json similarity index 100% rename from benchmarking/example_gail_seals_ant_best_hp_eval.json rename to benchmarking/gail_seals_ant_best_hp_eval.json diff --git a/benchmarking/example_gail_seals_half_cheetah_best_hp_eval.json b/benchmarking/gail_seals_half_cheetah_best_hp_eval.json similarity index 100% rename from benchmarking/example_gail_seals_half_cheetah_best_hp_eval.json rename to benchmarking/gail_seals_half_cheetah_best_hp_eval.json diff --git a/benchmarking/example_gail_seals_hopper_best_hp_eval.json b/benchmarking/gail_seals_hopper_best_hp_eval.json similarity index 100% rename from benchmarking/example_gail_seals_hopper_best_hp_eval.json rename to benchmarking/gail_seals_hopper_best_hp_eval.json diff --git a/benchmarking/example_gail_seals_swimmer_best_hp_eval.json b/benchmarking/gail_seals_swimmer_best_hp_eval.json similarity index 100% rename from benchmarking/example_gail_seals_swimmer_best_hp_eval.json rename to benchmarking/gail_seals_swimmer_best_hp_eval.json diff --git a/benchmarking/example_gail_seals_walker_best_hp_eval.json b/benchmarking/gail_seals_walker_best_hp_eval.json similarity index 100% rename from benchmarking/example_gail_seals_walker_best_hp_eval.json rename to benchmarking/gail_seals_walker_best_hp_eval.json diff --git a/benchmarking/tuning.py b/benchmarking/tuning.py index 0c18b1256..324032088 100644 --- a/benchmarking/tuning.py +++ b/benchmarking/tuning.py @@ -30,7 +30,7 @@ def tune( Set to 0 to disable evaluation. Raises: - ValueError: If no trials are returned by. + ValueError: If no trials are returned by the parallel run of tuning. """ run = parallel_ex.run(config_updates=parallel_run_config) experiment_analysis = run.result @@ -54,9 +54,10 @@ def tune( # Uses the default values (cpu=1) if it is not provided if "cpu" in parallel_run_config["resources_per_trial"]: resources_per_trial_eval["cpu"] *= eval_best_trial_resource_multiplier - evaluate_best_trial( + evaluate_trial( best_trial, num_eval_seeds, + parallel_run_config["run_name"] + "_best_hp_eval", parallel_run_config, resources_per_trial_eval, return_key, @@ -107,19 +108,21 @@ def find_best_trial( return best_trial -def evaluate_best_trial( - best_trial: ray.tune.experiment.Trial, +def evaluate_trial( + trial: ray.tune.experiment.Trial, num_eval_seeds: int, + run_name: str, parallel_run_config: Dict[str, Any], resources_per_trial: Dict[str, int], return_key: str, print_return: bool = False, ): - """Evaluate the best trial of a parallel run on a separate set of seeds. + """Evaluate a given trial of a parallel run on a separate set of seeds. Args: - best_trial: The trial with the best mean return across all seeds. + trial: The trial to evaluate. num_eval_seeds: Number of distinct seeds to evaluate the best trial on. + run_name: The name of the evaluation run. parallel_run_config: Dictionary of arguments passed to the parallel script to get best_trial. resources_per_trial: Resources to be used for each evaluation trial. @@ -129,15 +132,15 @@ def evaluate_best_trial( Returns: eval_run: The result of the evaluation run. """ - best_config = best_trial.config - best_config["config_updates"].update( + config = trial.config + config["config_updates"].update( seed=ray.tune.grid_search(list(range(100, 100 + num_eval_seeds))), ) eval_config_updates = parallel_run_config.copy() eval_config_updates.update( - run_name=parallel_run_config["run_name"] + "_best_hp_eval", + run_name=run_name, num_samples=1, - search_space=best_config, + search_space=config, resources_per_trial=resources_per_trial, search_alg=None, repeat=1, diff --git a/benchmarking/tuning_config.py b/benchmarking/tuning_config.py index 187963d02..239537406 100644 --- a/benchmarking/tuning_config.py +++ b/benchmarking/tuning_config.py @@ -4,14 +4,14 @@ import sacred from torch import nn -from imitation.algorithms import dagger +from imitation.algorithms import dagger as dagger_alg from imitation.scripts.parallel import parallel_ex tuning_ex = sacred.Experiment("tuning", ingredients=[parallel_ex]) @tuning_ex.named_config -def example_rl(): +def rl(): parallel_run_config = dict( sacred_ex_name="train_rl", run_name="rl_tuning", @@ -37,7 +37,7 @@ def example_rl(): @tuning_ex.named_config -def example_bc(): +def bc(): parallel_run_config = dict( sacred_ex_name="train_imitation", run_name="bc_tuning", @@ -71,7 +71,7 @@ def example_bc(): @tuning_ex.named_config -def example_dagger(): +def dagger(): parallel_run_config = dict( sacred_ex_name="train_imitation", run_name="dagger_tuning", @@ -95,8 +95,11 @@ def example_dagger(): ), "dagger": dict( beta_schedule=tune.choice( - [dagger.LinearBetaSchedule(i) for i in [1, 5, 15]] - + [dagger.ExponentialBetaSchedule(i) for i in [0.3, 0.5, 0.7]], + [dagger_alg.LinearBetaSchedule(i) for i in [1, 5, 15]] + + [ + dagger_alg.ExponentialBetaSchedule(i) + for i in [0.3, 0.5, 0.7] + ], ), rollout_round_min_episodes=tune.choice([3, 5, 10]), ), @@ -111,7 +114,7 @@ def example_dagger(): @tuning_ex.named_config -def example_gail(): +def gail(): parallel_run_config = dict( sacred_ex_name="train_adversarial", run_name="gail_tuning_hc", @@ -146,7 +149,7 @@ def example_gail(): @tuning_ex.named_config -def example_airl(): +def airl(): parallel_run_config = dict( sacred_ex_name="train_adversarial", run_name="airl_tuning", @@ -181,7 +184,7 @@ def example_airl(): @tuning_ex.named_config -def example_pc(): +def pc(): parallel_run_config = dict( sacred_ex_name="train_preference_comparisons", run_name="pc_tuning", diff --git a/benchmarking/util.py b/benchmarking/util.py index 408f0d812..88416344d 100644 --- a/benchmarking/util.py +++ b/benchmarking/util.py @@ -79,7 +79,7 @@ def clean_config_file(file: pathlib.Path, write_path: pathlib.Path, /) -> None: remove_empty_dicts(config) # files are of the format - # /path/to/file/example___best_hp_eval//sacred/1/config.json + # /path/to/file/__best_hp_eval//sacred/1/config.json # we want to write to //_.json with open(write_path / f"{file.parents[3].name}.json", "w") as f: json.dump(config, f, indent=4) diff --git a/experiments/commands.py b/experiments/commands.py index 2ac737e06..9021d3738 100644 --- a/experiments/commands.py +++ b/experiments/commands.py @@ -22,13 +22,13 @@ python -m imitation.scripts.train_adversarial airl \ --capture=sys --name=run0 \ --file_storage=output/sacred/$USER-cmd-run0-airl-0-a3531726 \ - with ../benchmarking/example_airl_seals_walker_best_hp_eval.json \ + with ../benchmarking/airl_seals_walker_best_hp_eval.json \ seed=0 logging.log_root=output python -m imitation.scripts.train_adversarial gail \ --capture=sys --name=run0 \ --file_storage=output/sacred/$USER-cmd-run0-gail-0-a1ec171b \ - with ../benchmarking/example_gail_seals_walker_best_hp_eval.json \ + with ../benchmarking/gail_seals_walker_best_hp_eval.json \ seed=0 logging.log_root=output We can execute commands in parallel by piping them to GNU parallel: @@ -42,7 +42,7 @@ python commands.py \ --name=run0 \ - --cfg_pattern=../benchmarking/example_bc_seals_half_cheetah_best_hp_eval.json \ + --cfg_pattern=../benchmarking/bc_seals_half_cheetah_best_hp_eval.json \ --output_dir=/data/output \ --remote @@ -52,7 +52,7 @@ --command "python -m imitation.scripts.train_imitation bc \ --capture=sys --name=run0 \ --file_storage=/data/output/sacred/$USER-cmd-run0-bc-0-72cb1df3 \ - with /data/imitation/benchmarking/example_bc_seals_half_cheetah_best_hp_eval.json \ + with /data/imitation/benchmarking/bc_seals_half_cheetah_best_hp_eval.json \ seed=0 logging.log_root=/data/output" \ --container hacobe/devbox:imitation \ --login --force-pull --never-restart --gpu 0 --shared-host-dir-mount /data @@ -177,19 +177,19 @@ def parse() -> argparse.Namespace: parser.add_argument( "--cfg_pattern", type=str, - default="example_bc_seals_half_cheetah_best_hp_eval.json", + default="bc_seals_half_cheetah_best_hp_eval.json", help="""Generate a command for every file that matches this glob pattern. \ Each matching file should be a config file that has its algorithm name \ (bc, dagger, airl or gail) bookended by underscores in the filename. \ If the --remote flag is enabled, then generate a command for every file in the \ --remote_cfg_dir directory that has the same filename as a file that matches this \ glob pattern. E.g., suppose the current, local working directory is 'foo' and \ -the subdirectory 'foo/bar' contains the config files 'example_bc_best.json' and \ -'example_dagger_best.json'. If the pattern 'bar/*.json' is supplied, then globbing \ -will return ['bar/example_bc_best.json', 'bar/example_dagger_best.json']. \ +the subdirectory 'foo/bar' contains the config files 'bc_best.json' and \ +'dagger_best.json'. If the pattern 'bar/*.json' is supplied, then globbing \ +will return ['bar/bc_best.json', 'bar/dagger_best.json']. \ If the --remote flag is enabled, 'bar' will be replaced with `remote_cfg_dir` and \ commands will be created for the following configs: \ -[`remote_cfg_dir`/example_bc_best.json, `remote_cfg_dir`/example_dagger_best.json] \ +[`remote_cfg_dir`/bc_best.json, `remote_cfg_dir`/dagger_best.json] \ Why not just supply the pattern '`remote_cfg_dir`/*.json' directly? \ Because the `remote_cfg_dir` directory may not exist on the local machine.""", ) diff --git a/src/imitation/scripts/config/parallel.py b/src/imitation/scripts/config/parallel.py index e81a617db..a591f3d9a 100644 --- a/src/imitation/scripts/config/parallel.py +++ b/src/imitation/scripts/config/parallel.py @@ -7,10 +7,8 @@ Adding custom named configs is necessary because the CLI interface can't add search spaces to the config like `"seed": tune.choice([0, 1, 2, 3])`. -For tuning hyperparameters of an algorithm on a given environment, override -the `base_named_configs` argument with the named config of the environment. -Ex: python -m imitation.scripts.parallel with example_gail \ - 'base_named_configs=["logging.wandb_logging", "seals_half_cheetah"]' +For tuning hyperparameters of an algorithm on a given environment, +check out the benchmarking/tuning.py script. """ import numpy as np diff --git a/tests/test_benchmarking.py b/tests/test_benchmarking.py index 4a8f6ea6f..18d4f12cf 100644 --- a/tests/test_benchmarking.py +++ b/tests/test_benchmarking.py @@ -37,7 +37,7 @@ def test_benchmarks_print_config_succeeds(algorithm: str, environment: str): config_name = f"{algorithm}_{environment}" config_file = str( - BENCHMARKING_DIR / f"example_{algorithm}_{environment}_best_hp_eval.json", + BENCHMARKING_DIR / f"{algorithm}_{environment}_best_hp_eval.json", ) # WHEN @@ -66,7 +66,7 @@ def test_tuning_print_config_succeeds(algorithm: str): tuning_path, "print_config", "with", - f"example_{algorithm}", + f"{algorithm}", env, ], ) diff --git a/tests/test_experiments.py b/tests/test_experiments.py index 0f6d314fe..0d431d0e9 100644 --- a/tests/test_experiments.py +++ b/tests/test_experiments.py @@ -245,13 +245,13 @@ def test_commands_hofvarpnir_config_with_special_characters_in_flags(tmpdir): def test_commands_bc_config(): if os.name == "nt": # pragma: no cover pytest.skip("commands.py not ported to Windows.") - cfg_pattern = _get_benchmarking_path("example_bc_seals_ant_best_hp_eval.json") + cfg_pattern = _get_benchmarking_path("bc_seals_ant_best_hp_eval.json") commands = _run_commands_from_flags(cfg_pattern=cfg_pattern) assert len(commands) == 1 expected = """python -m imitation.scripts.train_imitation bc \ --capture=sys --name=run0 --file_storage=output/sacred/\ $USER-cmd-run0-bc-0-138a1475 \ -with benchmarking/example_bc_seals_ant_best_hp_eval.json \ +with benchmarking/bc_seals_ant_best_hp_eval.json \ seed=0 logging.log_root=output""" assert commands[0] == expected @@ -259,13 +259,13 @@ def test_commands_bc_config(): def test_commands_dagger_config(): if os.name == "nt": # pragma: no cover pytest.skip("commands.py not ported to Windows.") - cfg_pattern = _get_benchmarking_path("example_dagger_seals_ant_best_hp_eval.json") + cfg_pattern = _get_benchmarking_path("dagger_seals_ant_best_hp_eval.json") commands = _run_commands_from_flags(cfg_pattern=cfg_pattern) assert len(commands) == 1 expected = """python -m imitation.scripts.train_imitation dagger \ --capture=sys --name=run0 --file_storage=output/sacred/\ $USER-cmd-run0-dagger-0-6a49161a \ -with benchmarking/example_dagger_seals_ant_best_hp_eval.json \ +with benchmarking/dagger_seals_ant_best_hp_eval.json \ seed=0 logging.log_root=output""" assert commands[0] == expected @@ -273,13 +273,13 @@ def test_commands_dagger_config(): def test_commands_gail_config(): if os.name == "nt": # pragma: no cover pytest.skip("commands.py not ported to Windows.") - cfg_pattern = _get_benchmarking_path("example_gail_seals_ant_best_hp_eval.json") + cfg_pattern = _get_benchmarking_path("gail_seals_ant_best_hp_eval.json") commands = _run_commands_from_flags(cfg_pattern=cfg_pattern) assert len(commands) == 1 expected = """python -m imitation.scripts.train_adversarial gail \ --capture=sys --name=run0 --file_storage=output/sacred/\ $USER-cmd-run0-gail-0-3ec8154d \ -with benchmarking/example_gail_seals_ant_best_hp_eval.json \ +with benchmarking/gail_seals_ant_best_hp_eval.json \ seed=0 logging.log_root=output""" assert commands[0] == expected @@ -287,13 +287,13 @@ def test_commands_gail_config(): def test_commands_airl_config(): if os.name == "nt": # pragma: no cover pytest.skip("commands.py not ported to Windows.") - cfg_pattern = _get_benchmarking_path("example_airl_seals_ant_best_hp_eval.json") + cfg_pattern = _get_benchmarking_path("airl_seals_ant_best_hp_eval.json") commands = _run_commands_from_flags(cfg_pattern=cfg_pattern) assert len(commands) == 1 expected = """python -m imitation.scripts.train_adversarial airl \ --capture=sys --name=run0 \ --file_storage=output/sacred/$USER-cmd-run0-airl-0-400e1558 \ -with benchmarking/example_airl_seals_ant_best_hp_eval.json \ +with benchmarking/airl_seals_ant_best_hp_eval.json \ seed=0 logging.log_root=output""" assert commands[0] == expected From 4ff006d1f2162c8f5085c1f824a19090846dd23c Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Tue, 18 Jul 2023 12:06:30 +0530 Subject: [PATCH 27/54] Fix file name test errors --- experiments/commands.py | 2 +- tests/test_experiments.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/experiments/commands.py b/experiments/commands.py index 9021d3738..738a55011 100644 --- a/experiments/commands.py +++ b/experiments/commands.py @@ -85,7 +85,7 @@ def _get_algo_name(cfg_file: str) -> str: """Get the algorithm name from the given config filename.""" algo_names = set() for key in _ALGO_NAME_TO_SCRIPT_NAME: - if cfg_file.find("_" + key + "_") != -1: + if cfg_file.find(key + "_") != -1: algo_names.add(key) if len(algo_names) == 0: diff --git a/tests/test_experiments.py b/tests/test_experiments.py index 0d431d0e9..b2417a9f9 100644 --- a/tests/test_experiments.py +++ b/tests/test_experiments.py @@ -250,7 +250,7 @@ def test_commands_bc_config(): assert len(commands) == 1 expected = """python -m imitation.scripts.train_imitation bc \ --capture=sys --name=run0 --file_storage=output/sacred/\ -$USER-cmd-run0-bc-0-138a1475 \ +$USER-cmd-run0-bc-0-78e5112a \ with benchmarking/bc_seals_ant_best_hp_eval.json \ seed=0 logging.log_root=output""" assert commands[0] == expected @@ -264,7 +264,7 @@ def test_commands_dagger_config(): assert len(commands) == 1 expected = """python -m imitation.scripts.train_imitation dagger \ --capture=sys --name=run0 --file_storage=output/sacred/\ -$USER-cmd-run0-dagger-0-6a49161a \ +$USER-cmd-run0-dagger-0-c27812cf \ with benchmarking/dagger_seals_ant_best_hp_eval.json \ seed=0 logging.log_root=output""" assert commands[0] == expected @@ -278,7 +278,7 @@ def test_commands_gail_config(): assert len(commands) == 1 expected = """python -m imitation.scripts.train_adversarial gail \ --capture=sys --name=run0 --file_storage=output/sacred/\ -$USER-cmd-run0-gail-0-3ec8154d \ +$USER-cmd-run0-gail-0-9d8d1202 \ with benchmarking/gail_seals_ant_best_hp_eval.json \ seed=0 logging.log_root=output""" assert commands[0] == expected @@ -292,7 +292,7 @@ def test_commands_airl_config(): assert len(commands) == 1 expected = """python -m imitation.scripts.train_adversarial airl \ --capture=sys --name=run0 \ ---file_storage=output/sacred/$USER-cmd-run0-airl-0-400e1558 \ +--file_storage=output/sacred/$USER-cmd-run0-airl-0-9ed3120d \ with benchmarking/airl_seals_ant_best_hp_eval.json \ seed=0 logging.log_root=output""" assert commands[0] == expected From 6933afacb22c555fcd70a833041bd716d2d78807 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Wed, 19 Jul 2023 14:41:39 +0530 Subject: [PATCH 28/54] Add tune_run_kwargs in parallel script --- src/imitation/scripts/config/parallel.py | 3 -- src/imitation/scripts/parallel.py | 39 +++++++++++------------- 2 files changed, 17 insertions(+), 25 deletions(-) diff --git a/src/imitation/scripts/config/parallel.py b/src/imitation/scripts/config/parallel.py index a591f3d9a..4773b713e 100644 --- a/src/imitation/scripts/config/parallel.py +++ b/src/imitation/scripts/config/parallel.py @@ -34,13 +34,10 @@ def config(): "config_updates": {}, } # `config` argument to `ray.tune.run(trainable, config)` - local_dir = None # `local_dir` arg for `ray.tune.run` - upload_dir = None # `upload_dir` arg for `ray.tune.run` num_samples = 1 # Number of samples per grid search configuration repeat = 1 # Number of times to repeat a sampled configuration search_alg = "optuna" # search algorithm to use experiment_checkpoint_path = "" # Path to checkpoint of experiment - syncer = None # Sacred syncer to use # Debug named configs diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py index 93aa932b9..7bf3db16f 100644 --- a/src/imitation/scripts/parallel.py +++ b/src/imitation/scripts/parallel.py @@ -26,12 +26,9 @@ def parallel( base_config_updates: Mapping[str, Any], resources_per_trial: Dict[str, Any], init_kwargs: Mapping[str, Any], - local_dir: Optional[str], - upload_dir: Optional[str], repeat: int, - search_alg: Optional[str], experiment_checkpoint_path: str, - syncer, + tune_run_kwargs: Dict[str, Any], ) -> ray.tune.ExperimentAnalysis: """Parallelize multiple runs of another Sacred Experiment using Ray Tune. @@ -70,17 +67,13 @@ def parallel( generated Ray directory name, unlike config updates from `search_space`. resources_per_trial: Argument to `ray.tune.run()`. init_kwargs: Arguments to pass to `ray.init`. - local_dir: `local_dir` argument to `ray.tune.run()`. - upload_dir: `upload_dir` argument to `ray.tune.run()`. - search_alg: can be either "optuna" or None. Setting `None` allows for - adding grid_search to the `search_space` hyperparameters but doesn't allow - for trials to be repeated. repeat: Number of runs to repeat each trial for. - Not used if `search_alg` is None. + If `repeat` > 1, then optuna is used as the default search algorithm + unless specified otherwise in `tune_run_kwargs`. experiment_checkpoint_path: Path containing the checkpoints of a previous experiment ran using this script. Useful for evaluating the best trial of the experiment. - syncer: `syncer` argument to `ray.tune.syncer.SyncConfig`. + tune_run_kwargs: Other arguments to pass to `ray.tune.run()`. Raises: TypeError: Named configs not string sequences or config updates not mappings. @@ -118,11 +111,18 @@ def parallel( ) ray.init(**init_kwargs) - if search_alg == "optuna": - algo = search.Repeater(optuna.OptunaSearch(), repeat=repeat) - else: - assert repeat == 1 # repeat should not be used if search_alg is None - algo = None + if repeat > 1: + if "search_alg" not in tune_run_kwargs: + tune_run_kwargs["search_alg"] = optuna.OptunaSearch() + try: + algo = tune_run_kwargs["search_alg"] + algo = search.Repeater(algo, repeat) + tune_run_kwargs["search_alg"] = algo + except AttributeError: + raise ValueError( + "repeat > 1 but search_alg is not an instance of " + "ray.tune.search.SearchAlgorithm", + ) if sacred_ex_name == "train_rl": return_key = "monitor_return_mean" @@ -145,15 +145,10 @@ def parallel( config=search_space, num_samples=num_samples * repeat, name=run_name, - local_dir=local_dir, resources_per_trial=resources_per_trial, - sync_config=ray.tune.syncer.SyncConfig( - upload_dir=upload_dir, - syncer=syncer, - ), - search_alg=algo, metric=return_key, mode="max", + **tune_run_kwargs, ) return result finally: From 77f9d9b74ddcb42e9181f9f493ca2f144b6a443f Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Wed, 19 Jul 2023 16:10:15 +0530 Subject: [PATCH 29/54] Fix test errors --- src/imitation/scripts/config/parallel.py | 1 + src/imitation/scripts/parallel.py | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/imitation/scripts/config/parallel.py b/src/imitation/scripts/config/parallel.py index 4773b713e..bdc591422 100644 --- a/src/imitation/scripts/config/parallel.py +++ b/src/imitation/scripts/config/parallel.py @@ -38,6 +38,7 @@ def config(): repeat = 1 # Number of times to repeat a sampled configuration search_alg = "optuna" # search algorithm to use experiment_checkpoint_path = "" # Path to checkpoint of experiment + tune_run_kwargs = {} # Additional kwargs to pass to `tune.run` # Debug named configs diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py index 7bf3db16f..65a72eae3 100644 --- a/src/imitation/scripts/parallel.py +++ b/src/imitation/scripts/parallel.py @@ -111,13 +111,14 @@ def parallel( ) ray.init(**init_kwargs) + updated_tune_run_kwargs = copy.deepcopy(tune_run_kwargs) if repeat > 1: - if "search_alg" not in tune_run_kwargs: - tune_run_kwargs["search_alg"] = optuna.OptunaSearch() + if "search_alg" not in updated_tune_run_kwargs: + updated_tune_run_kwargs["search_alg"] = optuna.OptunaSearch() try: - algo = tune_run_kwargs["search_alg"] + algo = updated_tune_run_kwargs["search_alg"] algo = search.Repeater(algo, repeat) - tune_run_kwargs["search_alg"] = algo + updated_tune_run_kwargs["search_alg"] = algo except AttributeError: raise ValueError( "repeat > 1 but search_alg is not an instance of " @@ -148,7 +149,7 @@ def parallel( resources_per_trial=resources_per_trial, metric=return_key, mode="max", - **tune_run_kwargs, + **updated_tune_run_kwargs, ) return result finally: From 54eb8a6f44ea599236b6165fa5de9079df7ca49a Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Wed, 19 Jul 2023 16:31:49 +0530 Subject: [PATCH 30/54] Fix test --- tests/scripts/test_scripts.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/scripts/test_scripts.py b/tests/scripts/test_scripts.py index e17765471..146048c42 100644 --- a/tests/scripts/test_scripts.py +++ b/tests/scripts/test_scripts.py @@ -969,7 +969,10 @@ def test_analyze_gather_tb(tmpdir: str): if os.name == "nt": # pragma: no cover pytest.skip("gather_tb uses symlinks: not supported by Windows") num_runs = 2 - config_updates: Dict[str, Any] = dict(local_dir=tmpdir, run_name="test") + config_updates: Dict[str, Any] = dict( + tune_run_kwargs=dict(local_dir=tmpdir), + run_name="test", + ) config_updates.update(PARALLEL_CONFIG_LOW_RESOURCE) config_updates.update(num_samples=num_runs) parallel_run = parallel.parallel_ex.run( From d50238f1b900b05296d081954624cac9e2bcf6ab Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Wed, 19 Jul 2023 17:02:37 +0530 Subject: [PATCH 31/54] Fix lint --- src/imitation/scripts/parallel.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py index 65a72eae3..a7a08064b 100644 --- a/src/imitation/scripts/parallel.py +++ b/src/imitation/scripts/parallel.py @@ -4,7 +4,7 @@ import copy import glob import pathlib -from typing import Any, Callable, Dict, Mapping, Optional, Sequence +from typing import Any, Callable, Dict, Mapping, Sequence import ray import ray.tune @@ -77,6 +77,8 @@ def parallel( Raises: TypeError: Named configs not string sequences or config updates not mappings. + ValueError: `repeat` > 1 but `search_alg` is not an instance of + `ray.tune.search.SearchAlgorithm`. Returns: The result of running the parallel experiment with `ray.tune.run()`. From 3fe22d4e6904c60c581a69004788b08b0184c8ed Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Wed, 19 Jul 2023 21:37:18 +0530 Subject: [PATCH 32/54] Updates from review --- benchmarking/tuning.py | 21 +++++++++++++++------ src/imitation/scripts/config/parallel.py | 1 - src/imitation/scripts/parallel.py | 2 +- tests/scripts/test_scripts.py | 1 - 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/benchmarking/tuning.py b/benchmarking/tuning.py index 324032088..409d0b5af 100644 --- a/benchmarking/tuning.py +++ b/benchmarking/tuning.py @@ -7,6 +7,7 @@ import numpy as np import ray from pandas.api import types as pd_types +from ray.tune.search import optuna from sacred.observers import FileStorageObserver from tuning_config import parallel_ex, tuning_ex @@ -32,7 +33,15 @@ def tune( Raises: ValueError: If no trials are returned by the parallel run of tuning. """ - run = parallel_ex.run(config_updates=parallel_run_config) + search_alg = optuna.OptunaSearch() + updated_parallel_run_config = copy.deepcopy(parallel_run_config) + if "tune_run_kwargs" not in updated_parallel_run_config: + tune_run_kwargs = {} + else: + tune_run_kwargs = updated_parallel_run_config["tune_run_kwargs"] + tune_run_kwargs.update(search_alg=search_alg) + updated_parallel_run_config.update(tune_run_kwargs=tune_run_kwargs) + run = parallel_ex.run(config_updates=updated_parallel_run_config) experiment_analysis = run.result if not experiment_analysis.trials: raise ValueError( @@ -42,23 +51,23 @@ def tune( ) return_key = "imit_stats/monitor_return_mean" - if parallel_run_config["sacred_ex_name"] == "train_rl": + if updated_parallel_run_config["sacred_ex_name"] == "train_rl": return_key = "monitor_return_mean" best_trial = find_best_trial(experiment_analysis, return_key, print_return=True) if num_eval_seeds > 0: # evaluate the best trial resources_per_trial_eval = copy.deepcopy( - parallel_run_config["resources_per_trial"], + updated_parallel_run_config["resources_per_trial"], ) # update cpus per trial only if it is provided in `resources_per_trial` # Uses the default values (cpu=1) if it is not provided - if "cpu" in parallel_run_config["resources_per_trial"]: + if "cpu" in updated_parallel_run_config["resources_per_trial"]: resources_per_trial_eval["cpu"] *= eval_best_trial_resource_multiplier evaluate_trial( best_trial, num_eval_seeds, - parallel_run_config["run_name"] + "_best_hp_eval", - parallel_run_config, + updated_parallel_run_config["run_name"] + "_best_hp_eval", + updated_parallel_run_config, resources_per_trial_eval, return_key, ) diff --git a/src/imitation/scripts/config/parallel.py b/src/imitation/scripts/config/parallel.py index bdc591422..c9c898feb 100644 --- a/src/imitation/scripts/config/parallel.py +++ b/src/imitation/scripts/config/parallel.py @@ -36,7 +36,6 @@ def config(): num_samples = 1 # Number of samples per grid search configuration repeat = 1 # Number of times to repeat a sampled configuration - search_alg = "optuna" # search algorithm to use experiment_checkpoint_path = "" # Path to checkpoint of experiment tune_run_kwargs = {} # Additional kwargs to pass to `tune.run` diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py index a7a08064b..57503d6e0 100644 --- a/src/imitation/scripts/parallel.py +++ b/src/imitation/scripts/parallel.py @@ -34,7 +34,7 @@ def parallel( A Sacred FileObserver is attached to the inner experiment and writes Sacred logs to "{RAY_LOCAL_DIR}/sacred/". These files are automatically copied over - to `upload_dir` if that argument is provided. + to `upload_dir` if that argument is provided in `tune_run_kwargs`. Args: sacred_ex_name: The Sacred experiment to tune. Either "train_rl" or diff --git a/tests/scripts/test_scripts.py b/tests/scripts/test_scripts.py index 146048c42..7ff241323 100644 --- a/tests/scripts/test_scripts.py +++ b/tests/scripts/test_scripts.py @@ -802,7 +802,6 @@ def test_train_rl_cnn_policy(tmpdir: str, rng): # Need absolute path because raylet runs in different working directory. "demonstrations.path": CARTPOLE_TEST_ROLLOUT_PATH.absolute(), }, - search_alg=None, # Use default search algorithm of ray. search_space={ "command_name": "airl", "config_updates": {"total_timesteps": tune.choice([5, 10])}, From c50aa20ddfa9f7ce5987a3fd08083d22757925a7 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Thu, 20 Jul 2023 16:19:04 +0530 Subject: [PATCH 33/54] Simplify few lines of code --- benchmarking/tuning.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/benchmarking/tuning.py b/benchmarking/tuning.py index 409d0b5af..9c3f52498 100644 --- a/benchmarking/tuning.py +++ b/benchmarking/tuning.py @@ -33,14 +33,12 @@ def tune( Raises: ValueError: If no trials are returned by the parallel run of tuning. """ - search_alg = optuna.OptunaSearch() updated_parallel_run_config = copy.deepcopy(parallel_run_config) - if "tune_run_kwargs" not in updated_parallel_run_config: - tune_run_kwargs = {} + search_alg = optuna.OptunaSearch() + if "tune_run_kwargs" in updated_parallel_run_config: + updated_parallel_run_config["tune_run_kwargs"]["search_alg"] = search_alg else: - tune_run_kwargs = updated_parallel_run_config["tune_run_kwargs"] - tune_run_kwargs.update(search_alg=search_alg) - updated_parallel_run_config.update(tune_run_kwargs=tune_run_kwargs) + updated_parallel_run_config["tune_run_kwargs"] = dict(search_alg=search_alg) run = parallel_ex.run(config_updates=updated_parallel_run_config) experiment_analysis = run.result if not experiment_analysis.trials: From 000af616fb159c165f4806df11d865ee2a6b3663 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Fri, 4 Aug 2023 21:54:48 +0530 Subject: [PATCH 34/54] Updates from review --- benchmarking/README.md | 3 ++- src/imitation/scripts/analyze.py | 3 --- .../scripts/config/train_adversarial.py | 4 ++++ .../config/train_preference_comparisons.py | 4 ++++ src/imitation/scripts/config/train_rl.py | 5 +++++ src/imitation/scripts/parallel.py | 16 +++++++--------- tests/scripts/test_scripts.py | 3 +++ 7 files changed, 25 insertions(+), 13 deletions(-) diff --git a/benchmarking/README.md b/benchmarking/README.md index 3973c6181..ba89da69d 100644 --- a/benchmarking/README.md +++ b/benchmarking/README.md @@ -15,7 +15,8 @@ python -m imitation.scripts. with benchmarking/.json') +from imitation.scripts. import +.run(command_name="", named_configs=["benchmarking/.json"]) ``` # Tuning Hyperparameters diff --git a/src/imitation/scripts/analyze.py b/src/imitation/scripts/analyze.py index 8977fed47..96b34bd6e 100644 --- a/src/imitation/scripts/analyze.py +++ b/src/imitation/scripts/analyze.py @@ -166,9 +166,6 @@ def _get_algo_name(sd: sacred_util.SacredDicts) -> str: def _return_summaries(sd: sacred_util.SacredDicts) -> dict: imit_stats = get(sd.run, "result.imit_stats") - if imit_stats is None: - # stored in rollout key for preference comparison - imit_stats = get(sd.run, "result.rollout") expert_stats = get(sd.run, "result.expert_stats") expert_return_summary = None diff --git a/src/imitation/scripts/config/train_adversarial.py b/src/imitation/scripts/config/train_adversarial.py index ef675eab6..acc842095 100644 --- a/src/imitation/scripts/config/train_adversarial.py +++ b/src/imitation/scripts/config/train_adversarial.py @@ -8,6 +8,10 @@ from imitation.scripts.ingredients import logging as logging_ingredient from imitation.scripts.ingredients import policy_evaluation, reward, rl +# Note: All the hyperparameter configs in the file are of the tuned +# hyperparameters of the RL algorithm of the respective environment. +# Taken from imitation/scripts/config/train_rl.py + train_adversarial_ex = sacred.Experiment( "train_adversarial", ingredients=[ diff --git a/src/imitation/scripts/config/train_preference_comparisons.py b/src/imitation/scripts/config/train_preference_comparisons.py index 4fe9c793e..4d8531732 100644 --- a/src/imitation/scripts/config/train_preference_comparisons.py +++ b/src/imitation/scripts/config/train_preference_comparisons.py @@ -8,6 +8,10 @@ from imitation.scripts.ingredients import logging as logging_ingredient from imitation.scripts.ingredients import policy_evaluation, reward, rl +# Note: All the hyperparameter configs in the file are of the tuned +# hyperparameters of the RL algorithm of the respective environment. +# Taken from imitation/scripts/config/train_rl.py + train_preference_comparisons_ex = sacred.Experiment( "train_preference_comparisons", ingredients=[ diff --git a/src/imitation/scripts/config/train_rl.py b/src/imitation/scripts/config/train_rl.py index a5475540d..e4ab71da1 100644 --- a/src/imitation/scripts/config/train_rl.py +++ b/src/imitation/scripts/config/train_rl.py @@ -8,6 +8,11 @@ from imitation.scripts.ingredients import logging as logging_ingredient from imitation.scripts.ingredients import policy_evaluation, rl +# Note: All the hyperparameter configs in the file are tuned +# for the PPO algorithm on the respective environment using the +# RL Baselines Zoo library: +# https://github.com/HumanCompatibleAI/rl-baselines3-zoo/ + train_rl_ex = sacred.Experiment( "train_rl", ingredients=[ diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py index 57503d6e0..9f5478a6e 100644 --- a/src/imitation/scripts/parallel.py +++ b/src/imitation/scripts/parallel.py @@ -24,7 +24,7 @@ def parallel( search_space: Mapping[str, Any], base_named_configs: Sequence[str], base_config_updates: Mapping[str, Any], - resources_per_trial: Dict[str, Any], + resources_per_trial: Mapping[str, Any], init_kwargs: Mapping[str, Any], repeat: int, experiment_checkpoint_path: str, @@ -115,17 +115,15 @@ def parallel( ray.init(**init_kwargs) updated_tune_run_kwargs = copy.deepcopy(tune_run_kwargs) if repeat > 1: - if "search_alg" not in updated_tune_run_kwargs: - updated_tune_run_kwargs["search_alg"] = optuna.OptunaSearch() try: - algo = updated_tune_run_kwargs["search_alg"] - algo = search.Repeater(algo, repeat) - updated_tune_run_kwargs["search_alg"] = algo - except AttributeError: + # Use optuna as the default search algorithm for repeat runs. + algo = tune_run_kwargs.get("search_alg", optuna.OptunaSearch()) + updated_tune_run_kwargs["search_alg"] = search.Repeater(algo, repeat) + except AttributeError as e: raise ValueError( "repeat > 1 but search_alg is not an instance of " "ray.tune.search.SearchAlgorithm", - ) + ) from e if sacred_ex_name == "train_rl": return_key = "monitor_return_mean" @@ -198,7 +196,7 @@ def _ray_tune_sacred_wrapper( `ex.run`) and `reporter`. The function returns the run result. """ - def inner(config: Dict[str, Any], reporter) -> Mapping[str, Any]: + def inner(config: dict[str, Any], reporter) -> Mapping[str, Any]: """Trainable function with the correct signature for `ray.tune`. Args: diff --git a/tests/scripts/test_scripts.py b/tests/scripts/test_scripts.py index 7ff241323..b0271d83b 100644 --- a/tests/scripts/test_scripts.py +++ b/tests/scripts/test_scripts.py @@ -889,6 +889,9 @@ def test_parallel_train_adversarial_custom_env(tmpdir): logging=dict(log_root=tmpdir), demonstrations=dict(path=path), ), + # specifying repeat=2 uses the optuna search algorithm which + # requires the search space to be non-empty. So we provide + # the command name using tune.choice. search_space=dict(command_name=tune.choice(["gail"])), ) config_updates.update(PARALLEL_CONFIG_LOW_RESOURCE) From 8b551341a89a5008fd5c35e04110710ea746d52a Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Fri, 4 Aug 2023 23:11:15 +0530 Subject: [PATCH 35/54] Fix test --- .../algorithms/adversarial/common.py | 37 +++++++++++++++---- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/src/imitation/algorithms/adversarial/common.py b/src/imitation/algorithms/adversarial/common.py index 62b459a0d..545109b0d 100644 --- a/src/imitation/algorithms/adversarial/common.py +++ b/src/imitation/algorithms/adversarial/common.py @@ -2,13 +2,13 @@ import abc import dataclasses import logging -from typing import Callable, Iterable, Iterator, Mapping, Optional, Type, overload +from typing import Callable, Iterable, Iterator, Mapping, Optional, Type, overload, List import numpy as np import torch as th import torch.utils.tensorboard as thboard import tqdm -from stable_baselines3.common import base_class, on_policy_algorithm, policies, vec_env +from stable_baselines3.common import base_class, on_policy_algorithm, policies, vec_env, callbacks from stable_baselines3.sac import policies as sac_policies from torch.nn import functional as F @@ -86,6 +86,30 @@ def compute_train_stats( } +class TrainDiscriminatorCallback(callbacks.BaseCallback): + """Callback for training discriminator after collecting rollouts.""" + + def __init__(self, adversarial_trainer, *args, **kwargs): + """Builds TrainDiscriminatorCallback. + + Args: + *args: Passed through to `callbacks.BaseCallback`. + **kwargs: Passed through to `callbacks.BaseCallback`. + """ + self.adversarial_trainer = adversarial_trainer + super().__init__(*args, **kwargs) + + def _on_step(self) -> bool: + return True + + def _on_rollout_end(self) -> None: + self.adversarial_trainer.model.train_disc() + for _ in range(self.adversarial_trainer.n_disc_updates_per_round): + with networks.training(self.adversarial_trainer.reward_train): + # switch to training mode (affects dropout, normalization) + self.adversarial_trainer.train_disc() + + class AdversarialTrainer(base.DemonstrationAlgorithm[types.Transitions]): """Base class for adversarial imitation learning algorithms like GAIL and AIRL.""" @@ -222,16 +246,17 @@ def __init__( self.venv_buffering = wrappers.BufferingWrapper(self.venv) + self.disc_trainer_callback = TrainDiscriminatorCallback(self) if debug_use_ground_truth: # Would use an identity reward fn here, but RewardFns can't see rewards. self.venv_wrapped = self.venv_buffering - self.gen_callback = None + self.gen_callback: List[callbacks.BaseCallback] = [self.disc_trainer_callback] else: self.venv_wrapped = reward_wrapper.RewardVecEnvWrapper( self.venv_buffering, reward_fn=self.reward_train.predict_processed, ) - self.gen_callback = self.venv_wrapped.make_log_callback() + self.gen_callback = [self.venv_wrapped.make_log_callback(), self.disc_trainer_callback] self.venv_train = self.venv_wrapped self.gen_algo.set_env(self.venv_train) @@ -446,10 +471,6 @@ def train( ) for r in tqdm.tqdm(range(0, n_rounds), desc="round"): self.train_gen(self.gen_train_timesteps) - for _ in range(self.n_disc_updates_per_round): - with networks.training(self.reward_train): - # switch to training mode (affects dropout, normalization) - self.train_disc() if callback: callback(r) self.logger.dump(self._global_step) From f3ba2b5ec01331f03295856e4219c68212fc7aee Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Fri, 4 Aug 2023 23:13:59 +0530 Subject: [PATCH 36/54] Revert "Fix test" This reverts commit 8b551341a89a5008fd5c35e04110710ea746d52a. --- .../algorithms/adversarial/common.py | 37 ++++--------------- 1 file changed, 8 insertions(+), 29 deletions(-) diff --git a/src/imitation/algorithms/adversarial/common.py b/src/imitation/algorithms/adversarial/common.py index 545109b0d..62b459a0d 100644 --- a/src/imitation/algorithms/adversarial/common.py +++ b/src/imitation/algorithms/adversarial/common.py @@ -2,13 +2,13 @@ import abc import dataclasses import logging -from typing import Callable, Iterable, Iterator, Mapping, Optional, Type, overload, List +from typing import Callable, Iterable, Iterator, Mapping, Optional, Type, overload import numpy as np import torch as th import torch.utils.tensorboard as thboard import tqdm -from stable_baselines3.common import base_class, on_policy_algorithm, policies, vec_env, callbacks +from stable_baselines3.common import base_class, on_policy_algorithm, policies, vec_env from stable_baselines3.sac import policies as sac_policies from torch.nn import functional as F @@ -86,30 +86,6 @@ def compute_train_stats( } -class TrainDiscriminatorCallback(callbacks.BaseCallback): - """Callback for training discriminator after collecting rollouts.""" - - def __init__(self, adversarial_trainer, *args, **kwargs): - """Builds TrainDiscriminatorCallback. - - Args: - *args: Passed through to `callbacks.BaseCallback`. - **kwargs: Passed through to `callbacks.BaseCallback`. - """ - self.adversarial_trainer = adversarial_trainer - super().__init__(*args, **kwargs) - - def _on_step(self) -> bool: - return True - - def _on_rollout_end(self) -> None: - self.adversarial_trainer.model.train_disc() - for _ in range(self.adversarial_trainer.n_disc_updates_per_round): - with networks.training(self.adversarial_trainer.reward_train): - # switch to training mode (affects dropout, normalization) - self.adversarial_trainer.train_disc() - - class AdversarialTrainer(base.DemonstrationAlgorithm[types.Transitions]): """Base class for adversarial imitation learning algorithms like GAIL and AIRL.""" @@ -246,17 +222,16 @@ def __init__( self.venv_buffering = wrappers.BufferingWrapper(self.venv) - self.disc_trainer_callback = TrainDiscriminatorCallback(self) if debug_use_ground_truth: # Would use an identity reward fn here, but RewardFns can't see rewards. self.venv_wrapped = self.venv_buffering - self.gen_callback: List[callbacks.BaseCallback] = [self.disc_trainer_callback] + self.gen_callback = None else: self.venv_wrapped = reward_wrapper.RewardVecEnvWrapper( self.venv_buffering, reward_fn=self.reward_train.predict_processed, ) - self.gen_callback = [self.venv_wrapped.make_log_callback(), self.disc_trainer_callback] + self.gen_callback = self.venv_wrapped.make_log_callback() self.venv_train = self.venv_wrapped self.gen_algo.set_env(self.venv_train) @@ -471,6 +446,10 @@ def train( ) for r in tqdm.tqdm(range(0, n_rounds), desc="round"): self.train_gen(self.gen_train_timesteps) + for _ in range(self.n_disc_updates_per_round): + with networks.training(self.reward_train): + # switch to training mode (affects dropout, normalization) + self.train_disc() if callback: callback(r) self.logger.dump(self._global_step) From f8251c70e98f0ccf29e10f1b1ac35ce08e25a580 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Fri, 4 Aug 2023 23:14:49 +0530 Subject: [PATCH 37/54] Fix test --- src/imitation/scripts/parallel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py index 9f5478a6e..bb90f6174 100644 --- a/src/imitation/scripts/parallel.py +++ b/src/imitation/scripts/parallel.py @@ -196,7 +196,7 @@ def _ray_tune_sacred_wrapper( `ex.run`) and `reporter`. The function returns the run result. """ - def inner(config: dict[str, Any], reporter) -> Mapping[str, Any]: + def inner(config: Dict[str, Any], reporter) -> Mapping[str, Any]: """Trainable function with the correct signature for `ray.tune`. Args: From 664fc37c0dfd118768186e83006fc06def21a48b Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Mon, 7 Aug 2023 22:58:00 +0530 Subject: [PATCH 38/54] Convert Dict to Mapping in input argument --- src/imitation/scripts/parallel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py index bb90f6174..38881ee2b 100644 --- a/src/imitation/scripts/parallel.py +++ b/src/imitation/scripts/parallel.py @@ -196,7 +196,7 @@ def _ray_tune_sacred_wrapper( `ex.run`) and `reporter`. The function returns the run result. """ - def inner(config: Dict[str, Any], reporter) -> Mapping[str, Any]: + def inner(config: Mapping[str, Any], reporter) -> Mapping[str, Any]: """Trainable function with the correct signature for `ray.tune`. Args: @@ -212,7 +212,7 @@ def inner(config: Dict[str, Any], reporter) -> Mapping[str, Any]: # TODO(shwang): Stop modifying CAPTURE_MODE once the issue is fixed. sacred.SETTINGS.CAPTURE_MODE = "sys" - run_kwargs = config + run_kwargs = dict(**config) updated_run_kwargs: Dict[str, Any] = {} # Import inside function rather than in module because Sacred experiments # are not picklable, and Ray requires this function to be picklable. From 8690e1dcb01fc96fcfa1813c038f2b1ac26f4a3c Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Wed, 30 Aug 2023 10:47:28 +0200 Subject: [PATCH 39/54] Ignore coverage in script configurations. --- setup.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.cfg b/setup.cfg index f39db322f..85dedb3e3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -42,6 +42,8 @@ source = imitation include= src/* tests/* +omit = + src/imitation/scripts/config/* [coverage:report] exclude_lines = From dd9eb6a5b7e62b5cf1faf84d9111bac9bef77e9d Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Wed, 30 Aug 2023 11:12:10 +0200 Subject: [PATCH 40/54] Pin huggingface_sb3 version. --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1781a4031..6d1f2489c 100644 --- a/setup.py +++ b/setup.py @@ -207,7 +207,9 @@ def get_local_version(version: "ScmVersion", time_format="%Y%m%d") -> str: STABLE_BASELINES3, "sacred>=0.8.4", "tensorboard>=1.14", - "huggingface_sb3>=2.2.1", + # TODO: remove once https://github.com/huggingface/huggingface_sb3/issues/37 is + # fixed + "huggingface_sb3==2.2.5", "optuna>=3.0.1", "datasets>=2.8.0", ], From 40d87ef2e99dcb8a34041d27dd62327ec8faf8b4 Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Tue, 26 Sep 2023 16:46:04 +0200 Subject: [PATCH 41/54] Update to the newest seals environment versions. --- benchmarking/airl_seals_ant_best_hp_eval.json | 2 +- benchmarking/airl_seals_half_cheetah_best_hp_eval.json | 2 +- benchmarking/airl_seals_hopper_best_hp_eval.json | 2 +- benchmarking/airl_seals_swimmer_best_hp_eval.json | 4 ++-- benchmarking/airl_seals_walker_best_hp_eval.json | 4 ++-- benchmarking/bc_seals_ant_best_hp_eval.json | 2 +- benchmarking/bc_seals_half_cheetah_best_hp_eval.json | 2 +- benchmarking/bc_seals_hopper_best_hp_eval.json | 2 +- benchmarking/bc_seals_swimmer_best_hp_eval.json | 2 +- benchmarking/bc_seals_walker_best_hp_eval.json | 2 +- benchmarking/dagger_seals_ant_best_hp_eval.json | 2 +- benchmarking/dagger_seals_half_cheetah_best_hp_eval.json | 2 +- benchmarking/dagger_seals_hopper_best_hp_eval.json | 2 +- benchmarking/dagger_seals_swimmer_best_hp_eval.json | 2 +- benchmarking/dagger_seals_walker_best_hp_eval.json | 2 +- benchmarking/gail_seals_ant_best_hp_eval.json | 2 +- benchmarking/gail_seals_half_cheetah_best_hp_eval.json | 2 +- benchmarking/gail_seals_hopper_best_hp_eval.json | 2 +- benchmarking/gail_seals_swimmer_best_hp_eval.json | 4 ++-- benchmarking/gail_seals_walker_best_hp_eval.json | 4 ++-- 20 files changed, 24 insertions(+), 24 deletions(-) diff --git a/benchmarking/airl_seals_ant_best_hp_eval.json b/benchmarking/airl_seals_ant_best_hp_eval.json index 17f969ff0..d4131433e 100644 --- a/benchmarking/airl_seals_ant_best_hp_eval.json +++ b/benchmarking/airl_seals_ant_best_hp_eval.json @@ -62,6 +62,6 @@ "n_episodes_eval": 50 }, "environment": { - "gym_id": "seals/Ant-v0" + "gym_id": "seals/Ant-v1" } } diff --git a/benchmarking/airl_seals_half_cheetah_best_hp_eval.json b/benchmarking/airl_seals_half_cheetah_best_hp_eval.json index 754ba6736..f69ba5cb5 100644 --- a/benchmarking/airl_seals_half_cheetah_best_hp_eval.json +++ b/benchmarking/airl_seals_half_cheetah_best_hp_eval.json @@ -62,6 +62,6 @@ "n_episodes_eval": 50 }, "environment": { - "gym_id": "seals/HalfCheetah-v0" + "gym_id": "seals/HalfCheetah-v1" } } diff --git a/benchmarking/airl_seals_hopper_best_hp_eval.json b/benchmarking/airl_seals_hopper_best_hp_eval.json index 91080d7ce..58c2475f5 100644 --- a/benchmarking/airl_seals_hopper_best_hp_eval.json +++ b/benchmarking/airl_seals_hopper_best_hp_eval.json @@ -75,6 +75,6 @@ "n_episodes_eval": 50 }, "environment": { - "gym_id": "seals/Hopper-v0" + "gym_id": "seals/Hopper-v1" } } diff --git a/benchmarking/airl_seals_swimmer_best_hp_eval.json b/benchmarking/airl_seals_swimmer_best_hp_eval.json index fcca8e6b3..8529c58b5 100644 --- a/benchmarking/airl_seals_swimmer_best_hp_eval.json +++ b/benchmarking/airl_seals_swimmer_best_hp_eval.json @@ -12,7 +12,7 @@ }, "expert": { "loader_kwargs": { - "gym_id": "seals/Swimmer-v0", + "gym_id": "seals/Swimmer-v1", "organization": "HumanCompatibleAI" } }, @@ -81,6 +81,6 @@ "n_episodes_eval": 50 }, "environment": { - "gym_id": "seals/Swimmer-v0" + "gym_id": "seals/Swimmer-v1" } } diff --git a/benchmarking/airl_seals_walker_best_hp_eval.json b/benchmarking/airl_seals_walker_best_hp_eval.json index c63070751..edd99806d 100644 --- a/benchmarking/airl_seals_walker_best_hp_eval.json +++ b/benchmarking/airl_seals_walker_best_hp_eval.json @@ -12,7 +12,7 @@ }, "expert": { "loader_kwargs": { - "gym_id": "seals/Walker2d-v0", + "gym_id": "seals/Walker2d-v1", "organization": "HumanCompatibleAI" } }, @@ -81,6 +81,6 @@ "n_episodes_eval": 50 }, "environment": { - "gym_id": "seals/Walker2d-v0" + "gym_id": "seals/Walker2d-v1" } } diff --git a/benchmarking/bc_seals_ant_best_hp_eval.json b/benchmarking/bc_seals_ant_best_hp_eval.json index 108a93ce7..e9baa8fc1 100644 --- a/benchmarking/bc_seals_ant_best_hp_eval.json +++ b/benchmarking/bc_seals_ant_best_hp_eval.json @@ -43,6 +43,6 @@ "n_episodes_eval": 50 }, "environment": { - "gym_id": "seals/Ant-v0" + "gym_id": "seals/Ant-v1" } } diff --git a/benchmarking/bc_seals_half_cheetah_best_hp_eval.json b/benchmarking/bc_seals_half_cheetah_best_hp_eval.json index ecaff2eb0..041f159b0 100644 --- a/benchmarking/bc_seals_half_cheetah_best_hp_eval.json +++ b/benchmarking/bc_seals_half_cheetah_best_hp_eval.json @@ -43,6 +43,6 @@ "n_episodes_eval": 50 }, "environment": { - "gym_id": "seals/HalfCheetah-v0" + "gym_id": "seals/HalfCheetah-v1" } } diff --git a/benchmarking/bc_seals_hopper_best_hp_eval.json b/benchmarking/bc_seals_hopper_best_hp_eval.json index e8c821841..9a7872d37 100644 --- a/benchmarking/bc_seals_hopper_best_hp_eval.json +++ b/benchmarking/bc_seals_hopper_best_hp_eval.json @@ -43,6 +43,6 @@ "n_episodes_eval": 50 }, "environment": { - "gym_id": "seals/Hopper-v0" + "gym_id": "seals/Hopper-v1" } } diff --git a/benchmarking/bc_seals_swimmer_best_hp_eval.json b/benchmarking/bc_seals_swimmer_best_hp_eval.json index 30884c9c4..8a8f2456a 100644 --- a/benchmarking/bc_seals_swimmer_best_hp_eval.json +++ b/benchmarking/bc_seals_swimmer_best_hp_eval.json @@ -43,6 +43,6 @@ "n_episodes_eval": 50 }, "environment": { - "gym_id": "seals/Swimmer-v0" + "gym_id": "seals/Swimmer-v1" } } diff --git a/benchmarking/bc_seals_walker_best_hp_eval.json b/benchmarking/bc_seals_walker_best_hp_eval.json index 0ca30120e..f33e6c5a2 100644 --- a/benchmarking/bc_seals_walker_best_hp_eval.json +++ b/benchmarking/bc_seals_walker_best_hp_eval.json @@ -43,6 +43,6 @@ "n_episodes_eval": 50 }, "environment": { - "gym_id": "seals/Walker2d-v0" + "gym_id": "seals/Walker2d-v1" } } diff --git a/benchmarking/dagger_seals_ant_best_hp_eval.json b/benchmarking/dagger_seals_ant_best_hp_eval.json index de75b80f1..e02828667 100644 --- a/benchmarking/dagger_seals_ant_best_hp_eval.json +++ b/benchmarking/dagger_seals_ant_best_hp_eval.json @@ -47,6 +47,6 @@ "n_episodes_eval": 50 }, "environment": { - "gym_id": "seals/Ant-v0" + "gym_id": "seals/Ant-v1" } } diff --git a/benchmarking/dagger_seals_half_cheetah_best_hp_eval.json b/benchmarking/dagger_seals_half_cheetah_best_hp_eval.json index 7f42bfdf9..d1c9e5923 100644 --- a/benchmarking/dagger_seals_half_cheetah_best_hp_eval.json +++ b/benchmarking/dagger_seals_half_cheetah_best_hp_eval.json @@ -47,6 +47,6 @@ "n_episodes_eval": 50 }, "environment": { - "gym_id": "seals/HalfCheetah-v0" + "gym_id": "seals/HalfCheetah-v1" } } diff --git a/benchmarking/dagger_seals_hopper_best_hp_eval.json b/benchmarking/dagger_seals_hopper_best_hp_eval.json index 1cf29a1a4..b91f66298 100644 --- a/benchmarking/dagger_seals_hopper_best_hp_eval.json +++ b/benchmarking/dagger_seals_hopper_best_hp_eval.json @@ -47,6 +47,6 @@ "n_episodes_eval": 50 }, "environment": { - "gym_id": "seals/Hopper-v0" + "gym_id": "seals/Hopper-v1" } } diff --git a/benchmarking/dagger_seals_swimmer_best_hp_eval.json b/benchmarking/dagger_seals_swimmer_best_hp_eval.json index c112db680..545761cbc 100644 --- a/benchmarking/dagger_seals_swimmer_best_hp_eval.json +++ b/benchmarking/dagger_seals_swimmer_best_hp_eval.json @@ -47,6 +47,6 @@ "n_episodes_eval": 50 }, "environment": { - "gym_id": "seals/Swimmer-v0" + "gym_id": "seals/Swimmer-v1" } } diff --git a/benchmarking/dagger_seals_walker_best_hp_eval.json b/benchmarking/dagger_seals_walker_best_hp_eval.json index e59bef464..7b694c8d2 100644 --- a/benchmarking/dagger_seals_walker_best_hp_eval.json +++ b/benchmarking/dagger_seals_walker_best_hp_eval.json @@ -47,6 +47,6 @@ "n_episodes_eval": 50 }, "environment": { - "gym_id": "seals/Walker2d-v0" + "gym_id": "seals/Walker2d-v1" } } diff --git a/benchmarking/gail_seals_ant_best_hp_eval.json b/benchmarking/gail_seals_ant_best_hp_eval.json index 81399b00c..3d43b34ba 100644 --- a/benchmarking/gail_seals_ant_best_hp_eval.json +++ b/benchmarking/gail_seals_ant_best_hp_eval.json @@ -62,6 +62,6 @@ "n_episodes_eval": 50 }, "environment": { - "gym_id": "seals/Ant-v0" + "gym_id": "seals/Ant-v1" } } diff --git a/benchmarking/gail_seals_half_cheetah_best_hp_eval.json b/benchmarking/gail_seals_half_cheetah_best_hp_eval.json index 1d2f26648..914f3712a 100644 --- a/benchmarking/gail_seals_half_cheetah_best_hp_eval.json +++ b/benchmarking/gail_seals_half_cheetah_best_hp_eval.json @@ -62,6 +62,6 @@ "n_episodes_eval": 50 }, "environment": { - "gym_id": "seals/HalfCheetah-v0" + "gym_id": "seals/HalfCheetah-v1" } } diff --git a/benchmarking/gail_seals_hopper_best_hp_eval.json b/benchmarking/gail_seals_hopper_best_hp_eval.json index 70787ff7e..cebdae71c 100644 --- a/benchmarking/gail_seals_hopper_best_hp_eval.json +++ b/benchmarking/gail_seals_hopper_best_hp_eval.json @@ -75,6 +75,6 @@ "n_episodes_eval": 50 }, "environment": { - "gym_id": "seals/Hopper-v0" + "gym_id": "seals/Hopper-v1" } } diff --git a/benchmarking/gail_seals_swimmer_best_hp_eval.json b/benchmarking/gail_seals_swimmer_best_hp_eval.json index 650c5f46a..b0bd0e645 100644 --- a/benchmarking/gail_seals_swimmer_best_hp_eval.json +++ b/benchmarking/gail_seals_swimmer_best_hp_eval.json @@ -12,7 +12,7 @@ }, "expert": { "loader_kwargs": { - "gym_id": "seals/Swimmer-v0", + "gym_id": "seals/Swimmer-v1", "organization": "HumanCompatibleAI" } }, @@ -81,6 +81,6 @@ "n_episodes_eval": 50 }, "environment": { - "gym_id": "seals/Swimmer-v0" + "gym_id": "seals/Swimmer-v1" } } diff --git a/benchmarking/gail_seals_walker_best_hp_eval.json b/benchmarking/gail_seals_walker_best_hp_eval.json index d85eb46d5..2626b4c43 100644 --- a/benchmarking/gail_seals_walker_best_hp_eval.json +++ b/benchmarking/gail_seals_walker_best_hp_eval.json @@ -12,7 +12,7 @@ }, "expert": { "loader_kwargs": { - "gym_id": "seals/Walker2d-v0", + "gym_id": "seals/Walker2d-v1", "organization": "HumanCompatibleAI" } }, @@ -81,6 +81,6 @@ "n_episodes_eval": 50 }, "environment": { - "gym_id": "seals/Walker2d-v0" + "gym_id": "seals/Walker2d-v1" } } From 71f6c9283a387d35ed94f832ca660711942052e3 Mon Sep 17 00:00:00 2001 From: Maximilian Ernestus Date: Wed, 27 Sep 2023 09:49:28 +0200 Subject: [PATCH 42/54] Push gymnasium dependency to 0.29 to ensure mujoco envs work. --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 7bc4051a9..0384014ee 100644 --- a/setup.py +++ b/setup.py @@ -187,7 +187,7 @@ def get_local_version(version: "ScmVersion", time_format="%Y%m%d") -> str: # encode only known incompatibilities here. This prevents nasty dependency issues # for our users. install_requires=[ - "gymnasium[classic-control]~=0.28.1", + "gymnasium[classic-control]~=0.29", "matplotlib", "numpy>=1.15", "torch>=1.4.0", @@ -220,7 +220,7 @@ def get_local_version(version: "ScmVersion", time_format="%Y%m%d") -> str: "docs": DOCS_REQUIRE, "parallel": PARALLEL_REQUIRE, "mujoco": [ - "gymnasium[classic-control,mujoco]~=0.28.1", + "gymnasium[classic-control,mujoco]~=0.29", ], "atari": ATARI_REQUIRE, }, From 747ad32787e56a6939f6064eedb0cda8a67c3b1a Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Wed, 4 Oct 2023 05:58:13 +0530 Subject: [PATCH 43/54] Incorporate review comments --- src/imitation/scripts/analyze.py | 11 +++----- .../imitation/scripts/config/tuning.py | 12 ++++----- src/imitation/scripts/parallel.py | 12 ++------- .../imitation/scripts}/tuning.py | 26 +++++++++++++------ tests/scripts/test_scripts.py | 2 +- 5 files changed, 31 insertions(+), 32 deletions(-) rename benchmarking/tuning_config.py => src/imitation/scripts/config/tuning.py (97%) rename {benchmarking => src/imitation/scripts}/tuning.py (85%) diff --git a/src/imitation/scripts/analyze.py b/src/imitation/scripts/analyze.py index 96b34bd6e..b63538f6d 100644 --- a/src/imitation/scripts/analyze.py +++ b/src/imitation/scripts/analyze.py @@ -268,13 +268,10 @@ def analyze_imitation( Returns: The DataFrame generated from the Sacred logs. """ - if table_verbosity == 3: - # Get column names for which we have get value using make_entry_fn - # These are same across Level 2 & 3. In Level 3, we additionally add remaining - # config columns. - table_entry_fns_subset = _get_table_entry_fns_subset(2) - else: - table_entry_fns_subset = _get_table_entry_fns_subset(table_verbosity) + # Get column names for which we have get value using make_entry_fn + # These are same across Level 2 & 3. In Level 3, we additionally add remaining + # config columns. + table_entry_fns_subset = _get_table_entry_fns_subset(min(table_verbosity, 2)) output_table = pd.DataFrame() for sd in _gather_sacred_dicts(): diff --git a/benchmarking/tuning_config.py b/src/imitation/scripts/config/tuning.py similarity index 97% rename from benchmarking/tuning_config.py rename to src/imitation/scripts/config/tuning.py index 239537406..07161d04c 100644 --- a/benchmarking/tuning_config.py +++ b/src/imitation/scripts/config/tuning.py @@ -49,24 +49,24 @@ def bc(): search_space={ "config_updates": { "bc": dict( - batch_size=tune.choice([8, 16, 32, 64]), + batch_size=tune.choice([8]), l2_weight=tune.loguniform(1e-6, 1e-2), # L2 regularization weight optimizer_kwargs=dict( lr=tune.loguniform(1e-5, 1e-2), ), train_kwargs=dict( - n_epochs=tune.choice([1, 5, 10, 20]), + n_epochs=tune.choice([1]), ), ), }, "command_name": "bc", }, - num_samples=64, - repeat=3, + num_samples=2, + repeat=2, resources_per_trial=dict(cpu=1), ) - num_eval_seeds = 5 + num_eval_seeds = 1 eval_best_trial_resource_multiplier = 1 @@ -117,7 +117,7 @@ def dagger(): def gail(): parallel_run_config = dict( sacred_ex_name="train_adversarial", - run_name="gail_tuning_hc", + run_name="gail_tuning", base_named_configs=["logging.wandb_logging"], base_config_updates={ "environment": {"num_vec": 1}, diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py index 38881ee2b..d5e5e2378 100644 --- a/src/imitation/scripts/parallel.py +++ b/src/imitation/scripts/parallel.py @@ -2,7 +2,6 @@ import collections.abc import copy -import glob import pathlib from typing import Any, Callable, Dict, Mapping, Sequence @@ -37,8 +36,8 @@ def parallel( to `upload_dir` if that argument is provided in `tune_run_kwargs`. Args: - sacred_ex_name: The Sacred experiment to tune. Either "train_rl" or - "train_imitation" or "train_adversarial" or "train_preference_comparisons". + sacred_ex_name: The Sacred experiment to tune. Either "train_rl", + "train_imitation", "train_adversarial" or "train_preference_comparisons". run_name: A name describing this parallelizing experiment. This argument is also passed to `ray.tune.run` as the `name` argument. It is also saved in 'sacred/run.json' of each inner Sacred experiment @@ -132,14 +131,7 @@ def parallel( try: if experiment_checkpoint_path: - # load experiment analysis results result = ray.tune.ExperimentAnalysis(experiment_checkpoint_path) - result._load_checkpoints_from_latest( - glob.glob(experiment_checkpoint_path + "/experiment_state*.json"), - ) - # update result.trials using all the experiment_state json files - result.trials = None - result.fetch_trial_dataframes() else: result = ray.tune.run( trainable, diff --git a/benchmarking/tuning.py b/src/imitation/scripts/tuning.py similarity index 85% rename from benchmarking/tuning.py rename to src/imitation/scripts/tuning.py index 9c3f52498..a605a206a 100644 --- a/benchmarking/tuning.py +++ b/src/imitation/scripts/tuning.py @@ -9,7 +9,9 @@ from pandas.api import types as pd_types from ray.tune.search import optuna from sacred.observers import FileStorageObserver -from tuning_config import parallel_ex, tuning_ex + +from imitation.scripts.config.parallel import parallel_ex +from imitation.scripts.config.tuning import tuning_ex @tuning_ex.main @@ -18,10 +20,15 @@ def tune( eval_best_trial_resource_multiplier: int = 1, num_eval_seeds: int = 5, ) -> None: - """Tune hyperparameters of imitation algorithms using parallel script. + """Tune hyperparameters of imitation algorithms using the parallel script. + + The parallel script is called twice in this function. The first call is to + tune the hyperparameters. The second call is to evaluate the best trial on + a separate set of seeds. Args: parallel_run_config: Dictionary of arguments to pass to the parallel script. + This is used to define the search space for tuning the hyperparameters. eval_best_trial_resource_multiplier: Factor by which to multiply the number of cpus per trial in `resources_per_trial`. This is useful for allocating more resources per trial to the evaluation trials than the @@ -35,10 +42,8 @@ def tune( """ updated_parallel_run_config = copy.deepcopy(parallel_run_config) search_alg = optuna.OptunaSearch() - if "tune_run_kwargs" in updated_parallel_run_config: - updated_parallel_run_config["tune_run_kwargs"]["search_alg"] = search_alg - else: - updated_parallel_run_config["tune_run_kwargs"] = dict(search_alg=search_alg) + tune_run_kwargs = updated_parallel_run_config.setdefault("tune_run_kwargs", dict()) + tune_run_kwargs["search_alg"] = search_alg run = parallel_ex.run(config_updates=updated_parallel_run_config) experiment_analysis = run.result if not experiment_analysis.trials: @@ -93,9 +98,13 @@ def find_best_trial( if pd_types.is_object_dtype(df[col]): df[col] = df[col].astype("str") # group into separate HP configs - grp_keys = [c for c in df.columns if c.startswith("config") and "seed" not in c] + grp_keys = [c for c in df.columns if c.startswith("config")] + grp_keys = [c for c in grp_keys if "seed" not in c and "trial_index" not in c] grps = df.groupby(grp_keys) # store mean return of runs across all seeds in a group + # the transform method is applied to get the mean return for every trial + # instead of for every group. So every trial in a group will have the same + # mean return column. df["mean_return"] = grps[return_key].transform(lambda x: x.mean()) best_config_df = df[df["mean_return"] == df["mean_return"].max()] row = best_config_df.iloc[0] @@ -149,10 +158,11 @@ def evaluate_trial( num_samples=1, search_space=config, resources_per_trial=resources_per_trial, - search_alg=None, repeat=1, experiment_checkpoint_path="", ) + # required for grid search + eval_config_updates["tune_run_kwargs"].update(search_alg=None) eval_run = parallel_ex.run(config_updates=eval_config_updates) eval_result = eval_run.result returns = eval_result.results_df[return_key].to_numpy() diff --git a/tests/scripts/test_scripts.py b/tests/scripts/test_scripts.py index a44639cef..5fc2f122d 100644 --- a/tests/scripts/test_scripts.py +++ b/tests/scripts/test_scripts.py @@ -981,7 +981,7 @@ def test_analyze_imitation(tmpdir: str, run_names: List[str], run_sacred_fn): assert run.status == "COMPLETED" # Check that analyze script finds the correct number of logs. - def check(run_name: Optional[str], count: int, table_verbosity=1) -> None: + def check(run_name: Optional[str], count: int, table_verbosity: int = 1) -> None: run = analyze.analysis_ex.run( command_name="analyze_imitation", config_updates=dict( From 691e75945579cd8aaea3a133ffd1178bb978a450 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Wed, 4 Oct 2023 07:49:02 +0530 Subject: [PATCH 44/54] Fix test errors --- src/imitation/scripts/tuning.py | 6 +++--- tests/test_benchmarking.py | 34 +++++++++++++-------------------- 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/src/imitation/scripts/tuning.py b/src/imitation/scripts/tuning.py index a605a206a..24095b1de 100644 --- a/src/imitation/scripts/tuning.py +++ b/src/imitation/scripts/tuning.py @@ -2,7 +2,7 @@ import copy import pathlib -from typing import Any, Dict +from typing import Dict import numpy as np import ray @@ -16,7 +16,7 @@ @tuning_ex.main def tune( - parallel_run_config: Dict[str, Any], + parallel_run_config, eval_best_trial_resource_multiplier: int = 1, num_eval_seeds: int = 5, ) -> None: @@ -128,7 +128,7 @@ def evaluate_trial( trial: ray.tune.experiment.Trial, num_eval_seeds: int, run_name: str, - parallel_run_config: Dict[str, Any], + parallel_run_config, resources_per_trial: Dict[str, int], return_key: str, print_return: bool = False, diff --git a/tests/test_benchmarking.py b/tests/test_benchmarking.py index 18d4f12cf..0a93943ef 100644 --- a/tests/test_benchmarking.py +++ b/tests/test_benchmarking.py @@ -1,11 +1,9 @@ """Tests for config files in benchmarking/ folder.""" import pathlib -import subprocess -import sys import pytest -from imitation.scripts import train_adversarial, train_imitation +from imitation.scripts import train_adversarial, train_imitation, tuning THIS_DIR = pathlib.Path(__file__).absolute().parent BENCHMARKING_DIR = THIS_DIR.parent / "benchmarking" @@ -48,26 +46,20 @@ def test_benchmarks_print_config_succeeds(algorithm: str, environment: str): assert run.status == "COMPLETED" +@pytest.mark.parametrize("environment", ENVIRONMENTS) @pytest.mark.parametrize("algorithm", ALGORITHMS) -def test_tuning_print_config_succeeds(algorithm: str): +def test_tuning_print_config_succeeds(algorithm: str, environment: str): # We test the configs using the print_config command, # because running the configs requires MuJoCo. # Requiring MuJoCo to run the tests adds too much complexity. - - # We need to use sys.executable, not just "python", on Windows as - # subprocess.call ignores PATH (unless shell=True) so runs a - # system-wide Python interpreter outside of our venv. See: - # https://stackoverflow.com/questions/5658622/ - tuning_path = str(BENCHMARKING_DIR / "tuning.py") - env = 'parallel_run_config.base_named_configs=["seals_cartpole"]' - exit_code = subprocess.call( - [ - sys.executable, - tuning_path, - "print_config", - "with", - f"{algorithm}", - env, - ], + experiment = tuning.tuning_ex + run = experiment.run( + command_name="print_config", + named_configs=[algorithm], + config_updates=dict( + parallel_run_config=dict( + base_named_configs=[environment], + ), + ), ) - assert exit_code == 0 + assert run.status == "COMPLETED" From 2038e60f9935372ca91a6fad15d665e68e85e5a2 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Thu, 5 Oct 2023 05:08:23 +0530 Subject: [PATCH 45/54] Move benchmarking/ to scripts/ and add named configs for tuned hyperparams --- experiments/commands.py | 17 +- setup.cfg | 1 - .../config}/airl_seals_ant_best_hp_eval.json | 0 .../airl_seals_walker_best_hp_eval.json | 0 src/imitation/scripts/config/parallel.py | 2 +- .../scripts/config/train_adversarial.py | 157 +++--------------- .../scripts/config/train_imitation.py | 59 +++---- .../airl_seals_ant_best_hp_eval.json | 67 ++++++++ .../airl_seals_half_cheetah_best_hp_eval.json | 0 .../airl_seals_hopper_best_hp_eval.json | 0 .../airl_seals_swimmer_best_hp_eval.json | 0 .../airl_seals_walker_best_hp_eval.json | 86 ++++++++++ .../tuned_hps}/bc_seals_ant_best_hp_eval.json | 0 .../bc_seals_half_cheetah_best_hp_eval.json | 0 .../bc_seals_hopper_best_hp_eval.json | 0 .../bc_seals_swimmer_best_hp_eval.json | 0 .../bc_seals_walker_best_hp_eval.json | 0 .../dagger_seals_ant_best_hp_eval.json | 0 ...agger_seals_half_cheetah_best_hp_eval.json | 0 .../dagger_seals_hopper_best_hp_eval.json | 0 .../dagger_seals_swimmer_best_hp_eval.json | 0 .../dagger_seals_walker_best_hp_eval.json | 0 .../fast_dagger_seals_cartpole.json | 0 .../gail_seals_ant_best_hp_eval.json | 0 .../gail_seals_half_cheetah_best_hp_eval.json | 0 .../gail_seals_hopper_best_hp_eval.json | 0 .../gail_seals_swimmer_best_hp_eval.json | 0 .../gail_seals_walker_best_hp_eval.json | 0 tests/test_benchmarking.py | 17 +- tests/test_experiments.py | 58 +++---- 30 files changed, 240 insertions(+), 224 deletions(-) rename {benchmarking => src/imitation/scripts/config}/airl_seals_ant_best_hp_eval.json (100%) rename {benchmarking => src/imitation/scripts/config}/airl_seals_walker_best_hp_eval.json (100%) create mode 100644 src/imitation/scripts/config/tuned_hps/airl_seals_ant_best_hp_eval.json rename {benchmarking => src/imitation/scripts/config/tuned_hps}/airl_seals_half_cheetah_best_hp_eval.json (100%) rename {benchmarking => src/imitation/scripts/config/tuned_hps}/airl_seals_hopper_best_hp_eval.json (100%) rename {benchmarking => src/imitation/scripts/config/tuned_hps}/airl_seals_swimmer_best_hp_eval.json (100%) create mode 100644 src/imitation/scripts/config/tuned_hps/airl_seals_walker_best_hp_eval.json rename {benchmarking => src/imitation/scripts/config/tuned_hps}/bc_seals_ant_best_hp_eval.json (100%) rename {benchmarking => src/imitation/scripts/config/tuned_hps}/bc_seals_half_cheetah_best_hp_eval.json (100%) rename {benchmarking => src/imitation/scripts/config/tuned_hps}/bc_seals_hopper_best_hp_eval.json (100%) rename {benchmarking => src/imitation/scripts/config/tuned_hps}/bc_seals_swimmer_best_hp_eval.json (100%) rename {benchmarking => src/imitation/scripts/config/tuned_hps}/bc_seals_walker_best_hp_eval.json (100%) rename {benchmarking => src/imitation/scripts/config/tuned_hps}/dagger_seals_ant_best_hp_eval.json (100%) rename {benchmarking => src/imitation/scripts/config/tuned_hps}/dagger_seals_half_cheetah_best_hp_eval.json (100%) rename {benchmarking => src/imitation/scripts/config/tuned_hps}/dagger_seals_hopper_best_hp_eval.json (100%) rename {benchmarking => src/imitation/scripts/config/tuned_hps}/dagger_seals_swimmer_best_hp_eval.json (100%) rename {benchmarking => src/imitation/scripts/config/tuned_hps}/dagger_seals_walker_best_hp_eval.json (100%) rename {benchmarking => src/imitation/scripts/config/tuned_hps}/fast_dagger_seals_cartpole.json (100%) rename {benchmarking => src/imitation/scripts/config/tuned_hps}/gail_seals_ant_best_hp_eval.json (100%) rename {benchmarking => src/imitation/scripts/config/tuned_hps}/gail_seals_half_cheetah_best_hp_eval.json (100%) rename {benchmarking => src/imitation/scripts/config/tuned_hps}/gail_seals_hopper_best_hp_eval.json (100%) rename {benchmarking => src/imitation/scripts/config/tuned_hps}/gail_seals_swimmer_best_hp_eval.json (100%) rename {benchmarking => src/imitation/scripts/config/tuned_hps}/gail_seals_walker_best_hp_eval.json (100%) diff --git a/experiments/commands.py b/experiments/commands.py index 738a55011..0dc0cce7c 100644 --- a/experiments/commands.py +++ b/experiments/commands.py @@ -12,9 +12,10 @@ For example, we can run: +TUNED_HPS_DIR=../src/imitation/scripts/config/tuned_hps python commands.py \ --name=run0 \ - --cfg_pattern=../benchmarking/*ai*_seals_walker_*.json \ + --cfg_pattern=$TUNED_HPS_DIR/*ai*_seals_walker_*.json \ --output_dir=output And get the following commands printed out: @@ -22,13 +23,13 @@ python -m imitation.scripts.train_adversarial airl \ --capture=sys --name=run0 \ --file_storage=output/sacred/$USER-cmd-run0-airl-0-a3531726 \ - with ../benchmarking/airl_seals_walker_best_hp_eval.json \ + with ../src/imitation/scripts/config/tuned_hps/airl_seals_walker_best_hp_eval.json \ seed=0 logging.log_root=output python -m imitation.scripts.train_adversarial gail \ --capture=sys --name=run0 \ --file_storage=output/sacred/$USER-cmd-run0-gail-0-a1ec171b \ - with ../benchmarking/gail_seals_walker_best_hp_eval.json \ + with $TUNED_HPS_DIR/gail_seals_walker_best_hp_eval.json \ seed=0 logging.log_root=output We can execute commands in parallel by piping them to GNU parallel: @@ -40,9 +41,10 @@ For example, we can run: +TUNED_HPS_DIR=../src/imitation/scripts/config/tuned_hps python commands.py \ --name=run0 \ - --cfg_pattern=../benchmarking/bc_seals_half_cheetah_best_hp_eval.json \ + --cfg_pattern=$TUNED_HPS_DIR/bc_seals_half_cheetah_best_hp_eval.json \ --output_dir=/data/output \ --remote @@ -51,8 +53,9 @@ ctl job run --name $USER-cmd-run0-bc-0-72cb1df3 \ --command "python -m imitation.scripts.train_imitation bc \ --capture=sys --name=run0 \ - --file_storage=/data/output/sacred/$USER-cmd-run0-bc-0-72cb1df3 \ - with /data/imitation/benchmarking/bc_seals_half_cheetah_best_hp_eval.json \ + --file_storage=/data/output/sacred/$USER-cmd-run0-bc-0-72cb1df3 with \ + /data/imitation/src/imitation/scripts/config/tuned_hps/ + bc_seals_half_cheetah_best_hp_eval.json \ seed=0 logging.log_root=/data/output" \ --container hacobe/devbox:imitation \ --login --force-pull --never-restart --gpu 0 --shared-host-dir-mount /data @@ -220,7 +223,7 @@ def parse() -> argparse.Namespace: parser.add_argument( "--remote_cfg_dir", type=str, - default="/data/imitation/benchmarking", + default="/data/imitation/src/imitation/scripts/config/tuned_hps", help="""Path to a directory storing config files \ accessible from each container. """, ) diff --git a/setup.cfg b/setup.cfg index 95f2223d9..560cac137 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,7 +7,6 @@ per-file-ignores = # F841 local variable unused [for Sacred config scopes] src/imitation/scripts/config/*.py:F841 ../src/imitation/scripts/config/*.py:F841 - benchmarking/tuning_config.py:F841 src/imitation/envs/examples/airl_envs/*.py:D [darglint] diff --git a/benchmarking/airl_seals_ant_best_hp_eval.json b/src/imitation/scripts/config/airl_seals_ant_best_hp_eval.json similarity index 100% rename from benchmarking/airl_seals_ant_best_hp_eval.json rename to src/imitation/scripts/config/airl_seals_ant_best_hp_eval.json diff --git a/benchmarking/airl_seals_walker_best_hp_eval.json b/src/imitation/scripts/config/airl_seals_walker_best_hp_eval.json similarity index 100% rename from benchmarking/airl_seals_walker_best_hp_eval.json rename to src/imitation/scripts/config/airl_seals_walker_best_hp_eval.json diff --git a/src/imitation/scripts/config/parallel.py b/src/imitation/scripts/config/parallel.py index c9c898feb..62ebbd9e3 100644 --- a/src/imitation/scripts/config/parallel.py +++ b/src/imitation/scripts/config/parallel.py @@ -8,7 +8,7 @@ search spaces to the config like `"seed": tune.choice([0, 1, 2, 3])`. For tuning hyperparameters of an algorithm on a given environment, -check out the benchmarking/tuning.py script. +check out the imitation/scripts/tuning.py script. """ import numpy as np diff --git a/src/imitation/scripts/config/train_adversarial.py b/src/imitation/scripts/config/train_adversarial.py index acc842095..ff32a551b 100644 --- a/src/imitation/scripts/config/train_adversarial.py +++ b/src/imitation/scripts/config/train_adversarial.py @@ -1,7 +1,8 @@ """Configuration for imitation.scripts.train_adversarial.""" +import pathlib + import sacred -from torch import nn from imitation.rewards import reward_nets from imitation.scripts.ingredients import demonstrations, environment, expert @@ -101,29 +102,6 @@ def pendulum(): # Standard MuJoCo Gym environment named configs -@train_adversarial_ex.named_config -def seals_ant(): - locals().update(**MUJOCO_SHARED_LOCALS) - locals().update(**ANT_SHARED_LOCALS) - environment = dict(gym_id="seals/Ant-v0") - demonstrations = dict(rollout_type="ppo-huggingface") - rl = dict( - batch_size=2048, - rl_kwargs=dict( - batch_size=16, - clip_range=0.3, - ent_coef=3.1441389214159857e-06, - gae_lambda=0.8, - gamma=0.995, - learning_rate=0.00017959211641976886, - max_grad_norm=0.9, - n_epochs=10, - # policy_kwargs are same as the defaults - vf_coef=0.4351450387648799, - ), - ) - - CHEETAH_SHARED_LOCALS = dict( MUJOCO_SHARED_LOCALS, rl=dict(batch_size=16384, rl_kwargs=dict(batch_size=1024)), @@ -158,117 +136,6 @@ def half_cheetah(): environment = dict(gym_id="HalfCheetah-v2") -@train_adversarial_ex.named_config -def seals_half_cheetah(): - environment = dict(gym_id="seals/HalfCheetah-v0") - demonstrations = dict(rollout_type="ppo-huggingface") - rl = dict( - batch_size=512, - rl_kwargs=dict( - batch_size=64, - clip_range=0.1, - ent_coef=3.794797423594763e-06, - gae_lambda=0.95, - gamma=0.95, - learning_rate=0.0003286871805949382, - max_grad_norm=0.8, - n_epochs=5, - vf_coef=0.11483689492120866, - ), - ) - algorithm_kwargs = dict( - # Number of discriminator updates after each round of generator updates - n_disc_updates_per_round=16, - # Equivalent to no replay buffer if batch size is the same - gen_replay_buffer_capacity=512, - demo_batch_size=8192, - ) - - -@train_adversarial_ex.named_config -def seals_hopper(): - environment = dict(gym_id="seals/Hopper-v0") - demonstrations = dict(rollout_type="ppo-huggingface") - policy = dict( - policy_cls="MlpPolicy", - policy_kwargs=dict( - activation_fn=nn.ReLU, - net_arch=[dict(pi=[64, 64], vf=[64, 64])], - ), - ) - rl = dict( - batch_size=2048, - rl_kwargs=dict( - batch_size=512, - clip_range=0.1, - ent_coef=0.0010159833764878474, - gae_lambda=0.98, - gamma=0.995, - learning_rate=0.0003904770450788824, - max_grad_norm=0.9, - n_epochs=20, - vf_coef=0.20315938606555833, - ), - ) - - -@train_adversarial_ex.named_config -def seals_swimmer(): - environment = dict(gym_id="seals/Swimmer-v0") - total_timesteps = int(2e6) - demonstrations = dict(rollout_type="ppo-huggingface") - policy = dict( - policy_cls="MlpPolicy", - policy_kwargs=dict( - activation_fn=nn.ReLU, - net_arch=[dict(pi=[64, 64], vf=[64, 64])], - ), - ) - rl = dict( - batch_size=2048, - rl_kwargs=dict( - batch_size=64, - clip_range=0.1, - ent_coef=5.167107294612664e-08, - gae_lambda=0.95, - gamma=0.999, - learning_rate=0.000414936134792374, - max_grad_norm=2, - n_epochs=5, - # policy_kwargs are same as the defaults - vf_coef=0.6162112311062333, - ), - ) - - -@train_adversarial_ex.named_config -def seals_walker(): - environment = dict(gym_id="seals/Walker2d-v0") - demonstrations = dict(rollout_type="ppo-huggingface") - policy = dict( - policy_cls="MlpPolicy", - policy_kwargs=dict( - activation_fn=nn.ReLU, - net_arch=[dict(pi=[64, 64], vf=[64, 64])], - ), - ) - rl = dict( - batch_size=8192, - rl_kwargs=dict( - batch_size=128, - clip_range=0.4, - ent_coef=0.00013057334805552262, - gae_lambda=0.92, - gamma=0.98, - learning_rate=0.000138575372312869, - max_grad_norm=0.6, - n_epochs=20, - # policy_kwargs are same as the defaults - vf_coef=0.6167177795726859, - ), - ) - - @train_adversarial_ex.named_config def seals_humanoid(): locals().update(**MUJOCO_SHARED_LOCALS) @@ -296,3 +163,23 @@ def fast(): demo_batch_size=1, n_disc_updates_per_round=4, ) + + +hyperparam_dir = pathlib.Path(__file__).absolute().parent / "tuned_hps" +tuned_alg_envs = [ + "airl_seals_ant", + "airl_seals_half_cheetah", + "airl_seals_hopper", + "airl_seals_swimmer", + "airl_seals_walker", + "gail_seals_ant", + "gail_seals_half_cheetah", + "gail_seals_hopper", + "gail_seals_swimmer", + "gail_seals_walker", +] + +for tuned_alg_env in tuned_alg_envs: + config_file = hyperparam_dir / f"{tuned_alg_env}_best_hp_eval.json" + assert config_file.is_file(), f"{config_file} does not exist" + train_adversarial_ex.add_named_config(tuned_alg_env, str(config_file)) diff --git a/src/imitation/scripts/config/train_imitation.py b/src/imitation/scripts/config/train_imitation.py index 4f3a8a415..f151e768e 100644 --- a/src/imitation/scripts/config/train_imitation.py +++ b/src/imitation/scripts/config/train_imitation.py @@ -1,5 +1,7 @@ """Configuration settings for train_dagger, training DAgger from synthetic demos.""" +import pathlib + import sacred from imitation.scripts.ingredients import bc @@ -67,13 +69,6 @@ def ant(): environment = dict(gym_id="Ant-v2") -@train_imitation_ex.named_config -def seals_ant(): - environment = dict(gym_id="seals/Ant-v0") - demonstrations = dict(rollout_type="ppo-huggingface") - expert = {"policy_type": "ppo-huggingface"} - - @train_imitation_ex.named_config def half_cheetah(): environment = dict(gym_id="HalfCheetah-v2") @@ -81,36 +76,6 @@ def half_cheetah(): dagger = dict(total_timesteps=60000) -@train_imitation_ex.named_config -def seals_half_cheetah(): - environment = dict(gym_id="seals/HalfCheetah-v0") - bc = dict(l2_weight=0.0) - dagger = dict(total_timesteps=60000) - demonstrations = dict(rollout_type="ppo-huggingface") - expert = {"policy_type": "ppo-huggingface"} - - -@train_imitation_ex.named_config -def seals_hopper(): - environment = dict(gym_id="seals/Hopper-v0") - demonstrations = dict(rollout_type="ppo-huggingface") - expert = {"policy_type": "ppo-huggingface"} - - -@train_imitation_ex.named_config -def seals_swimmer(): - environment = dict(gym_id="seals/Swimmer-v0") - demonstrations = dict(rollout_type="ppo-huggingface") - expert = {"policy_type": "ppo-huggingface"} - - -@train_imitation_ex.named_config -def seals_walker(): - environment = dict(gym_id="seals/Walker2d-v0") - demonstrations = dict(rollout_type="ppo-huggingface") - expert = {"policy_type": "ppo-huggingface"} - - @train_imitation_ex.named_config def humanoid(): environment = dict(gym_id="Humanoid-v2") @@ -126,3 +91,23 @@ def fast(): dagger = dict(total_timesteps=50) bc = dict(train_kwargs=dict(n_batches=50)) sqil = dict(total_timesteps=50) + + +hyperparam_dir = pathlib.Path(__file__).absolute().parent / "tuned_hps" +tuned_alg_envs = [ + "bc_seals_ant", + "bc_seals_half_cheetah", + "bc_seals_hopper", + "bc_seals_swimmer", + "bc_seals_walker", + "dagger_seals_ant", + "dagger_seals_half_cheetah", + "dagger_seals_hopper", + "dagger_seals_swimmer", + "dagger_seals_walker", +] + +for tuned_alg_env in tuned_alg_envs: + config_file = hyperparam_dir / f"{tuned_alg_env}_best_hp_eval.json" + assert config_file.is_file(), f"{config_file} does not exist" + train_imitation_ex.add_named_config(tuned_alg_env, str(config_file)) diff --git a/src/imitation/scripts/config/tuned_hps/airl_seals_ant_best_hp_eval.json b/src/imitation/scripts/config/tuned_hps/airl_seals_ant_best_hp_eval.json new file mode 100644 index 000000000..d4131433e --- /dev/null +++ b/src/imitation/scripts/config/tuned_hps/airl_seals_ant_best_hp_eval.json @@ -0,0 +1,67 @@ +{ + "algorithm_kwargs": { + "demo_batch_size": 8192, + "gen_replay_buffer_capacity": 8192, + "n_disc_updates_per_round": 16 + }, + "checkpoint_interval": 0, + "demonstrations": { + "source": "huggingface", + "algo_name": "ppo", + "n_expert_demos": null + }, + "reward": { + "add_std_alpha": null, + "ensemble_size": null, + "net_cls": { + "py/type": "imitation.rewards.reward_nets.BasicShapedRewardNet" + }, + "net_kwargs": { + "normalize_input_layer": { + "py/type": "imitation.util.networks.RunningNorm" + } + }, + "normalize_output_layer": { + "py/type": "imitation.util.networks.RunningNorm" + } + }, + "rl": { + "batch_size": 8192, + "rl_cls": { + "py/type": "stable_baselines3.ppo.ppo.PPO" + }, + "rl_kwargs": { + "batch_size": 16, + "clip_range": 0.3, + "ent_coef": 3.27750078482474e-6, + "gae_lambda": 0.8, + "gamma": 0.995, + "learning_rate": 3.249429831179079e-5, + "max_grad_norm": 0.9, + "n_epochs": 10, + "vf_coef": 0.4351450387648799 + } + }, + "total_timesteps": 10000000, + "policy": { + "policy_cls": { + "py/type": "imitation.policies.base.FeedForward32Policy" + }, + "policy_kwargs": { + "features_extractor_class": { + "py/type": "imitation.policies.base.NormalizeFeaturesExtractor" + }, + "features_extractor_kwargs": { + "normalize_class": { + "py/type": "imitation.util.networks.RunningNorm" + } + } + } + }, + "policy_evaluation": { + "n_episodes_eval": 50 + }, + "environment": { + "gym_id": "seals/Ant-v1" + } +} diff --git a/benchmarking/airl_seals_half_cheetah_best_hp_eval.json b/src/imitation/scripts/config/tuned_hps/airl_seals_half_cheetah_best_hp_eval.json similarity index 100% rename from benchmarking/airl_seals_half_cheetah_best_hp_eval.json rename to src/imitation/scripts/config/tuned_hps/airl_seals_half_cheetah_best_hp_eval.json diff --git a/benchmarking/airl_seals_hopper_best_hp_eval.json b/src/imitation/scripts/config/tuned_hps/airl_seals_hopper_best_hp_eval.json similarity index 100% rename from benchmarking/airl_seals_hopper_best_hp_eval.json rename to src/imitation/scripts/config/tuned_hps/airl_seals_hopper_best_hp_eval.json diff --git a/benchmarking/airl_seals_swimmer_best_hp_eval.json b/src/imitation/scripts/config/tuned_hps/airl_seals_swimmer_best_hp_eval.json similarity index 100% rename from benchmarking/airl_seals_swimmer_best_hp_eval.json rename to src/imitation/scripts/config/tuned_hps/airl_seals_swimmer_best_hp_eval.json diff --git a/src/imitation/scripts/config/tuned_hps/airl_seals_walker_best_hp_eval.json b/src/imitation/scripts/config/tuned_hps/airl_seals_walker_best_hp_eval.json new file mode 100644 index 000000000..edd99806d --- /dev/null +++ b/src/imitation/scripts/config/tuned_hps/airl_seals_walker_best_hp_eval.json @@ -0,0 +1,86 @@ +{ + "algorithm_kwargs": { + "demo_batch_size": 512, + "gen_replay_buffer_capacity": 16384, + "n_disc_updates_per_round": 16 + }, + "checkpoint_interval": 0, + "demonstrations": { + "source": "huggingface", + "algo_name": "ppo", + "n_expert_demos": null + }, + "expert": { + "loader_kwargs": { + "gym_id": "seals/Walker2d-v1", + "organization": "HumanCompatibleAI" + } + }, + "reward": { + "add_std_alpha": null, + "ensemble_size": null, + "net_cls": { + "py/type": "imitation.rewards.reward_nets.BasicShapedRewardNet" + }, + "net_kwargs": { + "normalize_input_layer": { + "py/type": "imitation.util.networks.RunningNorm" + } + }, + "normalize_output_layer": { + "py/type": "imitation.util.networks.RunningNorm" + } + }, + "rl": { + "batch_size": 16384, + "rl_cls": { + "py/type": "stable_baselines3.ppo.ppo.PPO" + }, + "rl_kwargs": { + "batch_size": 128, + "clip_range": 0.4, + "ent_coef": 0.002003867232707145, + "gae_lambda": 0.92, + "gamma": 0.98, + "learning_rate": 3.052170958603811e-5, + "max_grad_norm": 0.6, + "n_epochs": 20, + "vf_coef": 0.6167177795726859 + } + }, + "total_timesteps": 10000000, + "policy": { + "policy_cls": "MlpPolicy", + "policy_kwargs": { + "activation_fn": { + "py/type": "torch.nn.modules.activation.ReLU" + }, + "features_extractor_class": { + "py/type": "imitation.policies.base.NormalizeFeaturesExtractor" + }, + "features_extractor_kwargs": { + "normalize_class": { + "py/type": "imitation.util.networks.RunningNorm" + } + }, + "net_arch": [ + { + "pi": [ + 64, + 64 + ], + "vf": [ + 64, + 64 + ] + } + ] + } + }, + "policy_evaluation": { + "n_episodes_eval": 50 + }, + "environment": { + "gym_id": "seals/Walker2d-v1" + } +} diff --git a/benchmarking/bc_seals_ant_best_hp_eval.json b/src/imitation/scripts/config/tuned_hps/bc_seals_ant_best_hp_eval.json similarity index 100% rename from benchmarking/bc_seals_ant_best_hp_eval.json rename to src/imitation/scripts/config/tuned_hps/bc_seals_ant_best_hp_eval.json diff --git a/benchmarking/bc_seals_half_cheetah_best_hp_eval.json b/src/imitation/scripts/config/tuned_hps/bc_seals_half_cheetah_best_hp_eval.json similarity index 100% rename from benchmarking/bc_seals_half_cheetah_best_hp_eval.json rename to src/imitation/scripts/config/tuned_hps/bc_seals_half_cheetah_best_hp_eval.json diff --git a/benchmarking/bc_seals_hopper_best_hp_eval.json b/src/imitation/scripts/config/tuned_hps/bc_seals_hopper_best_hp_eval.json similarity index 100% rename from benchmarking/bc_seals_hopper_best_hp_eval.json rename to src/imitation/scripts/config/tuned_hps/bc_seals_hopper_best_hp_eval.json diff --git a/benchmarking/bc_seals_swimmer_best_hp_eval.json b/src/imitation/scripts/config/tuned_hps/bc_seals_swimmer_best_hp_eval.json similarity index 100% rename from benchmarking/bc_seals_swimmer_best_hp_eval.json rename to src/imitation/scripts/config/tuned_hps/bc_seals_swimmer_best_hp_eval.json diff --git a/benchmarking/bc_seals_walker_best_hp_eval.json b/src/imitation/scripts/config/tuned_hps/bc_seals_walker_best_hp_eval.json similarity index 100% rename from benchmarking/bc_seals_walker_best_hp_eval.json rename to src/imitation/scripts/config/tuned_hps/bc_seals_walker_best_hp_eval.json diff --git a/benchmarking/dagger_seals_ant_best_hp_eval.json b/src/imitation/scripts/config/tuned_hps/dagger_seals_ant_best_hp_eval.json similarity index 100% rename from benchmarking/dagger_seals_ant_best_hp_eval.json rename to src/imitation/scripts/config/tuned_hps/dagger_seals_ant_best_hp_eval.json diff --git a/benchmarking/dagger_seals_half_cheetah_best_hp_eval.json b/src/imitation/scripts/config/tuned_hps/dagger_seals_half_cheetah_best_hp_eval.json similarity index 100% rename from benchmarking/dagger_seals_half_cheetah_best_hp_eval.json rename to src/imitation/scripts/config/tuned_hps/dagger_seals_half_cheetah_best_hp_eval.json diff --git a/benchmarking/dagger_seals_hopper_best_hp_eval.json b/src/imitation/scripts/config/tuned_hps/dagger_seals_hopper_best_hp_eval.json similarity index 100% rename from benchmarking/dagger_seals_hopper_best_hp_eval.json rename to src/imitation/scripts/config/tuned_hps/dagger_seals_hopper_best_hp_eval.json diff --git a/benchmarking/dagger_seals_swimmer_best_hp_eval.json b/src/imitation/scripts/config/tuned_hps/dagger_seals_swimmer_best_hp_eval.json similarity index 100% rename from benchmarking/dagger_seals_swimmer_best_hp_eval.json rename to src/imitation/scripts/config/tuned_hps/dagger_seals_swimmer_best_hp_eval.json diff --git a/benchmarking/dagger_seals_walker_best_hp_eval.json b/src/imitation/scripts/config/tuned_hps/dagger_seals_walker_best_hp_eval.json similarity index 100% rename from benchmarking/dagger_seals_walker_best_hp_eval.json rename to src/imitation/scripts/config/tuned_hps/dagger_seals_walker_best_hp_eval.json diff --git a/benchmarking/fast_dagger_seals_cartpole.json b/src/imitation/scripts/config/tuned_hps/fast_dagger_seals_cartpole.json similarity index 100% rename from benchmarking/fast_dagger_seals_cartpole.json rename to src/imitation/scripts/config/tuned_hps/fast_dagger_seals_cartpole.json diff --git a/benchmarking/gail_seals_ant_best_hp_eval.json b/src/imitation/scripts/config/tuned_hps/gail_seals_ant_best_hp_eval.json similarity index 100% rename from benchmarking/gail_seals_ant_best_hp_eval.json rename to src/imitation/scripts/config/tuned_hps/gail_seals_ant_best_hp_eval.json diff --git a/benchmarking/gail_seals_half_cheetah_best_hp_eval.json b/src/imitation/scripts/config/tuned_hps/gail_seals_half_cheetah_best_hp_eval.json similarity index 100% rename from benchmarking/gail_seals_half_cheetah_best_hp_eval.json rename to src/imitation/scripts/config/tuned_hps/gail_seals_half_cheetah_best_hp_eval.json diff --git a/benchmarking/gail_seals_hopper_best_hp_eval.json b/src/imitation/scripts/config/tuned_hps/gail_seals_hopper_best_hp_eval.json similarity index 100% rename from benchmarking/gail_seals_hopper_best_hp_eval.json rename to src/imitation/scripts/config/tuned_hps/gail_seals_hopper_best_hp_eval.json diff --git a/benchmarking/gail_seals_swimmer_best_hp_eval.json b/src/imitation/scripts/config/tuned_hps/gail_seals_swimmer_best_hp_eval.json similarity index 100% rename from benchmarking/gail_seals_swimmer_best_hp_eval.json rename to src/imitation/scripts/config/tuned_hps/gail_seals_swimmer_best_hp_eval.json diff --git a/benchmarking/gail_seals_walker_best_hp_eval.json b/src/imitation/scripts/config/tuned_hps/gail_seals_walker_best_hp_eval.json similarity index 100% rename from benchmarking/gail_seals_walker_best_hp_eval.json rename to src/imitation/scripts/config/tuned_hps/gail_seals_walker_best_hp_eval.json diff --git a/tests/test_benchmarking.py b/tests/test_benchmarking.py index 0a93943ef..cbae34688 100644 --- a/tests/test_benchmarking.py +++ b/tests/test_benchmarking.py @@ -1,13 +1,9 @@ -"""Tests for config files in benchmarking/ folder.""" -import pathlib +"""Tests for config files in imitation/scripts/config/tuned_hps/ folder.""" import pytest from imitation.scripts import train_adversarial, train_imitation, tuning -THIS_DIR = pathlib.Path(__file__).absolute().parent -BENCHMARKING_DIR = THIS_DIR.parent / "benchmarking" - ALGORITHMS = ["bc", "dagger", "airl", "gail"] ENVIRONMENTS = [ "seals_walker", @@ -25,7 +21,6 @@ def test_benchmarks_print_config_succeeds(algorithm: str, environment: str): # because running the configs requires MuJoCo. # Requiring MuJoCo to run the tests adds too much complexity. - # GIVEN if algorithm in ("bc", "dagger"): experiment = train_imitation.train_imitation_ex elif algorithm in ("airl", "gail"): @@ -34,15 +29,7 @@ def test_benchmarks_print_config_succeeds(algorithm: str, environment: str): raise ValueError(f"Unknown algorithm: {algorithm}") # pragma: no cover config_name = f"{algorithm}_{environment}" - config_file = str( - BENCHMARKING_DIR / f"{algorithm}_{environment}_best_hp_eval.json", - ) - - # WHEN - experiment.add_named_config(config_name, config_file) run = experiment.run(command_name="print_config", named_configs=[config_name]) - - # THEN assert run.status == "COMPLETED" @@ -58,7 +45,7 @@ def test_tuning_print_config_succeeds(algorithm: str, environment: str): named_configs=[algorithm], config_updates=dict( parallel_run_config=dict( - base_named_configs=[environment], + base_named_configs=[f"{algorithm}_{environment}"], ), ), ) diff --git a/tests/test_experiments.py b/tests/test_experiments.py index b2417a9f9..9efb1be33 100644 --- a/tests/test_experiments.py +++ b/tests/test_experiments.py @@ -3,6 +3,7 @@ import glob import os import pathlib +import re import subprocess from typing import List @@ -18,30 +19,31 @@ ) THIS_DIR = pathlib.Path(__file__).absolute().parent -BENCHMARKING_DIR = THIS_DIR.parent / "benchmarking" +BENCHMARKING_DIR = THIS_DIR.parent / "src/imitation/scripts/config/tuned_hps" EXPERIMENTS_DIR = THIS_DIR.parent / "experiments" COMMANDS_PY_PATH = EXPERIMENTS_DIR / "commands.py" -EXPECTED_LOCAL_CONFIG_TEMPLATE = """python -m imitation.scripts.train_imitation dagger \ ---capture=sys --name=run0 --file_storage={output_dir}/sacred/\ -$USER-cmd-run0-dagger-0-8bf911a8 \ -with benchmarking/fast_dagger_seals_cartpole.json \ -seed=0 logging.log_root={output_dir}""" +EXPECTED_LOCAL_CONFIG_TEMPLATE = f"""python -m imitation.scripts.train_imitation \ +dagger --capture=sys --name=run0 --file_storage={{output_dir}}/sacred/\ +$USER-cmd-run0-dagger-0-72542943 \ +with {BENCHMARKING_DIR}/fast_dagger_seals_cartpole.json \ +seed=0 logging.log_root={{output_dir}}""" -EXPECTED_HOFVARPNIR_CONFIG_TEMPLATE = """ctl job run \ ---name $USER-cmd-run0-dagger-0-c3ac179d \ +BENCHMARKING_DIR_SUFFIX = re.sub(r".*/src/", "", str(BENCHMARKING_DIR)) +EXPECTED_HOFVARPNIR_CONFIG_TEMPLATE = f"""ctl job run \ +--name $USER-cmd-run0-dagger-0-aab021ce \ --command "python -m imitation.scripts.train_imitation dagger \ ---capture=sys --name=run0 --file_storage={output_dir}/sacred/\ -$USER-cmd-run0-dagger-0-c3ac179d \ -with /data/imitation/benchmarking/fast_dagger_seals_cartpole.json \ -seed=0 logging.log_root={output_dir}" \ +--capture=sys --name=run0 --file_storage={{output_dir}}/sacred/\ +$USER-cmd-run0-dagger-0-aab021ce \ +with /data/imitation/src/{BENCHMARKING_DIR_SUFFIX}/fast_dagger_seals_cartpole.json \ +seed=0 logging.log_root={{output_dir}}" \ --container hacobe/devbox:imitation \ --login --force-pull --never-restart --gpu 0 \ --shared-host-dir-mount /data""" def _get_benchmarking_path(benchmarking_file): - return os.path.join(BENCHMARKING_DIR.stem, benchmarking_file) + return os.path.join(BENCHMARKING_DIR, benchmarking_file) def _run_commands_from_flags(**kwargs) -> List[str]: @@ -148,10 +150,10 @@ def test_commands_local_config_with_custom_flags(): output_dir="/foo/bar", ) assert len(commands) == 1 - expected = """python -m imitation.scripts.train_imitation dagger \ + expected = f"""python -m imitation.scripts.train_imitation dagger \ --capture=sys --name=baz --file_storage=/foo/bar/sacred/\ -$USER-cmd-baz-dagger-1-8bf911a8 \ -with benchmarking/fast_dagger_seals_cartpole.json \ +$USER-cmd-baz-dagger-1-72542943 \ +with {BENCHMARKING_DIR}/fast_dagger_seals_cartpole.json \ seed=1 logging.log_root=/foo/bar""" assert commands[0] == expected @@ -248,10 +250,10 @@ def test_commands_bc_config(): cfg_pattern = _get_benchmarking_path("bc_seals_ant_best_hp_eval.json") commands = _run_commands_from_flags(cfg_pattern=cfg_pattern) assert len(commands) == 1 - expected = """python -m imitation.scripts.train_imitation bc \ + expected = f"""python -m imitation.scripts.train_imitation bc \ --capture=sys --name=run0 --file_storage=output/sacred/\ -$USER-cmd-run0-bc-0-78e5112a \ -with benchmarking/bc_seals_ant_best_hp_eval.json \ +$USER-cmd-run0-bc-0-47a528c5 \ +with {BENCHMARKING_DIR}/bc_seals_ant_best_hp_eval.json \ seed=0 logging.log_root=output""" assert commands[0] == expected @@ -262,10 +264,10 @@ def test_commands_dagger_config(): cfg_pattern = _get_benchmarking_path("dagger_seals_ant_best_hp_eval.json") commands = _run_commands_from_flags(cfg_pattern=cfg_pattern) assert len(commands) == 1 - expected = """python -m imitation.scripts.train_imitation dagger \ + expected = f"""python -m imitation.scripts.train_imitation dagger \ --capture=sys --name=run0 --file_storage=output/sacred/\ -$USER-cmd-run0-dagger-0-c27812cf \ -with benchmarking/dagger_seals_ant_best_hp_eval.json \ +$USER-cmd-run0-dagger-0-efa42a6a \ +with {BENCHMARKING_DIR}/dagger_seals_ant_best_hp_eval.json \ seed=0 logging.log_root=output""" assert commands[0] == expected @@ -276,10 +278,10 @@ def test_commands_gail_config(): cfg_pattern = _get_benchmarking_path("gail_seals_ant_best_hp_eval.json") commands = _run_commands_from_flags(cfg_pattern=cfg_pattern) assert len(commands) == 1 - expected = """python -m imitation.scripts.train_adversarial gail \ + expected = f"""python -m imitation.scripts.train_adversarial gail \ --capture=sys --name=run0 --file_storage=output/sacred/\ -$USER-cmd-run0-gail-0-9d8d1202 \ -with benchmarking/gail_seals_ant_best_hp_eval.json \ +$USER-cmd-run0-gail-0-9b83299d \ +with {BENCHMARKING_DIR}/gail_seals_ant_best_hp_eval.json \ seed=0 logging.log_root=output""" assert commands[0] == expected @@ -290,10 +292,10 @@ def test_commands_airl_config(): cfg_pattern = _get_benchmarking_path("airl_seals_ant_best_hp_eval.json") commands = _run_commands_from_flags(cfg_pattern=cfg_pattern) assert len(commands) == 1 - expected = """python -m imitation.scripts.train_adversarial airl \ + expected = f"""python -m imitation.scripts.train_adversarial airl \ --capture=sys --name=run0 \ ---file_storage=output/sacred/$USER-cmd-run0-airl-0-9ed3120d \ -with benchmarking/airl_seals_ant_best_hp_eval.json \ +--file_storage=output/sacred/$USER-cmd-run0-airl-0-9cc929a8 \ +with {BENCHMARKING_DIR}/airl_seals_ant_best_hp_eval.json \ seed=0 logging.log_root=output""" assert commands[0] == expected From 35c7265d836ac9e2c0f2cd0b7b3d19ccf98d0340 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Thu, 5 Oct 2023 05:42:20 +0530 Subject: [PATCH 46/54] Bump cache version & remove unnecessary files --- .circleci/config.yml | 12 +-- .../config/airl_seals_ant_best_hp_eval.json | 67 --------------- .../airl_seals_walker_best_hp_eval.json | 86 ------------------- 3 files changed, 6 insertions(+), 159 deletions(-) delete mode 100644 src/imitation/scripts/config/airl_seals_ant_best_hp_eval.json delete mode 100644 src/imitation/scripts/config/airl_seals_walker_best_hp_eval.json diff --git a/.circleci/config.yml b/.circleci/config.yml index 029bf4cd6..b8aff85cb 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -65,7 +65,7 @@ commands: # Download and cache dependencies - restore_cache: keys: - - v8linux-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.sh" }} + - v9linux-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.sh" }} - run: name: install dependencies @@ -75,7 +75,7 @@ commands: - save_cache: paths: - /venv - key: v8linux-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.sh" }} + key: v9linux-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.sh" }} - run: name: install imitation @@ -96,7 +96,7 @@ commands: - restore_cache: keys: - - v7macos-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.sh" }} + - v8macos-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.sh" }} - run: name: install dependencies @@ -108,7 +108,7 @@ commands: - save_cache: paths: - ~/venv - key: v7macos-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.sh" }} + key: v8macos-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.sh" }} - run: name: install imitation @@ -138,7 +138,7 @@ commands: # Download and cache dependencies - restore_cache: keys: - - v10win-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.ps1" }} + - v11win-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.ps1" }} - run: name: install python and binary dependencies @@ -168,7 +168,7 @@ commands: - save_cache: paths: - .\venv - key: v10win-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.ps1" }} + key: v11win-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.ps1" }} - run: name: install imitation diff --git a/src/imitation/scripts/config/airl_seals_ant_best_hp_eval.json b/src/imitation/scripts/config/airl_seals_ant_best_hp_eval.json deleted file mode 100644 index d4131433e..000000000 --- a/src/imitation/scripts/config/airl_seals_ant_best_hp_eval.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "algorithm_kwargs": { - "demo_batch_size": 8192, - "gen_replay_buffer_capacity": 8192, - "n_disc_updates_per_round": 16 - }, - "checkpoint_interval": 0, - "demonstrations": { - "source": "huggingface", - "algo_name": "ppo", - "n_expert_demos": null - }, - "reward": { - "add_std_alpha": null, - "ensemble_size": null, - "net_cls": { - "py/type": "imitation.rewards.reward_nets.BasicShapedRewardNet" - }, - "net_kwargs": { - "normalize_input_layer": { - "py/type": "imitation.util.networks.RunningNorm" - } - }, - "normalize_output_layer": { - "py/type": "imitation.util.networks.RunningNorm" - } - }, - "rl": { - "batch_size": 8192, - "rl_cls": { - "py/type": "stable_baselines3.ppo.ppo.PPO" - }, - "rl_kwargs": { - "batch_size": 16, - "clip_range": 0.3, - "ent_coef": 3.27750078482474e-6, - "gae_lambda": 0.8, - "gamma": 0.995, - "learning_rate": 3.249429831179079e-5, - "max_grad_norm": 0.9, - "n_epochs": 10, - "vf_coef": 0.4351450387648799 - } - }, - "total_timesteps": 10000000, - "policy": { - "policy_cls": { - "py/type": "imitation.policies.base.FeedForward32Policy" - }, - "policy_kwargs": { - "features_extractor_class": { - "py/type": "imitation.policies.base.NormalizeFeaturesExtractor" - }, - "features_extractor_kwargs": { - "normalize_class": { - "py/type": "imitation.util.networks.RunningNorm" - } - } - } - }, - "policy_evaluation": { - "n_episodes_eval": 50 - }, - "environment": { - "gym_id": "seals/Ant-v1" - } -} diff --git a/src/imitation/scripts/config/airl_seals_walker_best_hp_eval.json b/src/imitation/scripts/config/airl_seals_walker_best_hp_eval.json deleted file mode 100644 index edd99806d..000000000 --- a/src/imitation/scripts/config/airl_seals_walker_best_hp_eval.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "algorithm_kwargs": { - "demo_batch_size": 512, - "gen_replay_buffer_capacity": 16384, - "n_disc_updates_per_round": 16 - }, - "checkpoint_interval": 0, - "demonstrations": { - "source": "huggingface", - "algo_name": "ppo", - "n_expert_demos": null - }, - "expert": { - "loader_kwargs": { - "gym_id": "seals/Walker2d-v1", - "organization": "HumanCompatibleAI" - } - }, - "reward": { - "add_std_alpha": null, - "ensemble_size": null, - "net_cls": { - "py/type": "imitation.rewards.reward_nets.BasicShapedRewardNet" - }, - "net_kwargs": { - "normalize_input_layer": { - "py/type": "imitation.util.networks.RunningNorm" - } - }, - "normalize_output_layer": { - "py/type": "imitation.util.networks.RunningNorm" - } - }, - "rl": { - "batch_size": 16384, - "rl_cls": { - "py/type": "stable_baselines3.ppo.ppo.PPO" - }, - "rl_kwargs": { - "batch_size": 128, - "clip_range": 0.4, - "ent_coef": 0.002003867232707145, - "gae_lambda": 0.92, - "gamma": 0.98, - "learning_rate": 3.052170958603811e-5, - "max_grad_norm": 0.6, - "n_epochs": 20, - "vf_coef": 0.6167177795726859 - } - }, - "total_timesteps": 10000000, - "policy": { - "policy_cls": "MlpPolicy", - "policy_kwargs": { - "activation_fn": { - "py/type": "torch.nn.modules.activation.ReLU" - }, - "features_extractor_class": { - "py/type": "imitation.policies.base.NormalizeFeaturesExtractor" - }, - "features_extractor_kwargs": { - "normalize_class": { - "py/type": "imitation.util.networks.RunningNorm" - } - }, - "net_arch": [ - { - "pi": [ - 64, - 64 - ], - "vf": [ - 64, - 64 - ] - } - ] - } - }, - "policy_evaluation": { - "n_episodes_eval": 50 - }, - "environment": { - "gym_id": "seals/Walker2d-v1" - } -} From fdf4f4903fd8ff5769aaebfc5f0f501bb3e73e64 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Thu, 5 Oct 2023 06:34:41 +0530 Subject: [PATCH 47/54] Include tuned hyperparam json files in package data --- .circleci/config.yml | 12 ++++++------ setup.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index b8aff85cb..029bf4cd6 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -65,7 +65,7 @@ commands: # Download and cache dependencies - restore_cache: keys: - - v9linux-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.sh" }} + - v8linux-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.sh" }} - run: name: install dependencies @@ -75,7 +75,7 @@ commands: - save_cache: paths: - /venv - key: v9linux-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.sh" }} + key: v8linux-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.sh" }} - run: name: install imitation @@ -96,7 +96,7 @@ commands: - restore_cache: keys: - - v8macos-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.sh" }} + - v7macos-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.sh" }} - run: name: install dependencies @@ -108,7 +108,7 @@ commands: - save_cache: paths: - ~/venv - key: v8macos-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.sh" }} + key: v7macos-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.sh" }} - run: name: install imitation @@ -138,7 +138,7 @@ commands: # Download and cache dependencies - restore_cache: keys: - - v11win-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.ps1" }} + - v10win-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.ps1" }} - run: name: install python and binary dependencies @@ -168,7 +168,7 @@ commands: - save_cache: paths: - .\venv - key: v11win-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.ps1" }} + key: v10win-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_and_activate_venv.ps1" }} - run: name: install imitation diff --git a/setup.py b/setup.py index 0384014ee..1c069c463 100644 --- a/setup.py +++ b/setup.py @@ -181,7 +181,7 @@ def get_local_version(version: "ScmVersion", time_format="%Y%m%d") -> str: python_requires=">=3.8.0", packages=find_packages("src"), package_dir={"": "src"}, - package_data={"imitation": ["py.typed", "envs/examples/airl_envs/assets/*.xml"]}, + package_data={"imitation": ["py.typed", "scripts/config/tuned_hps/*.json"]}, # Note: while we are strict with our test and doc requirement versions, we try to # impose as little restrictions on the install requirements as possible. Try to # encode only known incompatibilities here. This prevents nasty dependency issues From 5f9a4e633988a0f8d319c1d93f41c0cf1814f01a Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Thu, 5 Oct 2023 07:44:00 +0530 Subject: [PATCH 48/54] Update storage hash --- tests/test_experiments.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_experiments.py b/tests/test_experiments.py index 9efb1be33..f6b4a8e39 100644 --- a/tests/test_experiments.py +++ b/tests/test_experiments.py @@ -25,7 +25,7 @@ EXPECTED_LOCAL_CONFIG_TEMPLATE = f"""python -m imitation.scripts.train_imitation \ dagger --capture=sys --name=run0 --file_storage={{output_dir}}/sacred/\ -$USER-cmd-run0-dagger-0-72542943 \ +$USER-cmd-run0-dagger-0-152b2005 \ with {BENCHMARKING_DIR}/fast_dagger_seals_cartpole.json \ seed=0 logging.log_root={{output_dir}}""" @@ -152,7 +152,7 @@ def test_commands_local_config_with_custom_flags(): assert len(commands) == 1 expected = f"""python -m imitation.scripts.train_imitation dagger \ --capture=sys --name=baz --file_storage=/foo/bar/sacred/\ -$USER-cmd-baz-dagger-1-72542943 \ +$USER-cmd-baz-dagger-1-152b2005 \ with {BENCHMARKING_DIR}/fast_dagger_seals_cartpole.json \ seed=1 logging.log_root=/foo/bar""" assert commands[0] == expected @@ -252,7 +252,7 @@ def test_commands_bc_config(): assert len(commands) == 1 expected = f"""python -m imitation.scripts.train_imitation bc \ --capture=sys --name=run0 --file_storage=output/sacred/\ -$USER-cmd-run0-bc-0-47a528c5 \ +$USER-cmd-run0-bc-0-f3ab1f87 \ with {BENCHMARKING_DIR}/bc_seals_ant_best_hp_eval.json \ seed=0 logging.log_root=output""" assert commands[0] == expected @@ -266,7 +266,7 @@ def test_commands_dagger_config(): assert len(commands) == 1 expected = f"""python -m imitation.scripts.train_imitation dagger \ --capture=sys --name=run0 --file_storage=output/sacred/\ -$USER-cmd-run0-dagger-0-efa42a6a \ +$USER-cmd-run0-dagger-0-76c1212c \ with {BENCHMARKING_DIR}/dagger_seals_ant_best_hp_eval.json \ seed=0 logging.log_root=output""" assert commands[0] == expected @@ -280,7 +280,7 @@ def test_commands_gail_config(): assert len(commands) == 1 expected = f"""python -m imitation.scripts.train_adversarial gail \ --capture=sys --name=run0 --file_storage=output/sacred/\ -$USER-cmd-run0-gail-0-9b83299d \ +$USER-cmd-run0-gail-0-351c205f \ with {BENCHMARKING_DIR}/gail_seals_ant_best_hp_eval.json \ seed=0 logging.log_root=output""" assert commands[0] == expected @@ -294,7 +294,7 @@ def test_commands_airl_config(): assert len(commands) == 1 expected = f"""python -m imitation.scripts.train_adversarial airl \ --capture=sys --name=run0 \ ---file_storage=output/sacred/$USER-cmd-run0-airl-0-9cc929a8 \ +--file_storage=output/sacred/$USER-cmd-run0-airl-0-3662206a \ with {BENCHMARKING_DIR}/airl_seals_ant_best_hp_eval.json \ seed=0 logging.log_root=output""" assert commands[0] == expected From 91bb785f77892c3ec936f5f008700b27fb1ff5fe Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Thu, 5 Oct 2023 21:47:57 +0530 Subject: [PATCH 49/54] Update search space of bc --- src/imitation/scripts/config/tuning.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/imitation/scripts/config/tuning.py b/src/imitation/scripts/config/tuning.py index 07161d04c..73313770a 100644 --- a/src/imitation/scripts/config/tuning.py +++ b/src/imitation/scripts/config/tuning.py @@ -49,24 +49,24 @@ def bc(): search_space={ "config_updates": { "bc": dict( - batch_size=tune.choice([8]), + batch_size=tune.choice([8, 16, 32, 64]), l2_weight=tune.loguniform(1e-6, 1e-2), # L2 regularization weight optimizer_kwargs=dict( lr=tune.loguniform(1e-5, 1e-2), ), train_kwargs=dict( - n_epochs=tune.choice([1]), + n_epochs=tune.choice([1, 5, 10, 20]), ), ), }, "command_name": "bc", }, - num_samples=2, - repeat=2, + num_samples=64, + repeat=3, resources_per_trial=dict(cpu=1), ) - num_eval_seeds = 1 + num_eval_seeds = 5 eval_best_trial_resource_multiplier = 1 From f59fea232d1af874a5f387407591d450444fce0c Mon Sep 17 00:00:00 2001 From: ZiyueWang25 Date: Thu, 5 Oct 2023 11:40:01 -0700 Subject: [PATCH 50/54] update benchmark and hyper parameter tuning readme --- benchmarking/README.md | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/benchmarking/README.md b/benchmarking/README.md index ba89da69d..fb21a0223 100644 --- a/benchmarking/README.md +++ b/benchmarking/README.md @@ -7,7 +7,7 @@ Configuration files can be loaded either from the CLI or from the Python API. Th ## CLI ```bash -python -m imitation.scripts. with benchmarking/.json +python -m imitation.scripts. with src/imitation/config/tuned_hps/.json ``` `train_script` can be either 1) `train_imitation` with `algo` as `bc` or `dagger` or 2) `train_adversarial` with `algo` as `gail` or `airl`. @@ -16,26 +16,27 @@ python -m imitation.scripts. with benchmarking/ import -.run(command_name="", named_configs=["benchmarking/.json"]) +.run(command_name="", named_configs=["src/imitation/config/tuned_hps/.json"]) ``` # Tuning Hyperparameters -The hyperparameters of any algorithm in imitation can be tuned using the `tuning.py` script. +The hyperparameters of any algorithm in imitation can be tuned using the `scripts/tuning.py`. The benchmarking hyperparameter configs were generated by tuning the hyperparameters using -the search space defined in the `tuning_config.py` script. The tuning script proceeds in two -phases: 1) The hyperparameters are tuned using the search space provided, and 2) the best -hyperparameter config found in the first phase based on the maximum mean return is -re-evaluated on a separate set of seeds, and the mean and standard deviation of these trials -are reported. +the search space defined in the `scripts/config/tuning.py`. -To tune the hyperparameters of an algorithm using the default search space provided: +The tuning script proceeds in two phases: +1. Tune the hyperparameters using the search space provided. +2. Re-evaluate the best hyperparameter config found in the first phase based on the maximum mean return on a separate set of seeds. Report the mean and standard deviation of these trials. + +To use it with the default search space: ```bash -python tuning.py with {algo} 'parallel_run_config.base_named_configs=["{env}"]' +python src/imitation/scripts/tuning.py with 'parallel_run_config.base_named_configs=[""]' ``` -In this command, `{algo}` provides the default search space and settings to be used for -the specific algorithm, which is defined in the `tuning_config.py` script and -`'parallel_run_config.base_named_configs=["{env}"]'` sets the environment to tune the algorithm in. -See the documentation of `tuning.py` and `parallel.py` scripts for many other arguments that can be +In this command: +- `` provides the default search space and settings for the specific algorithm, which is defined in the `scripts/config/tuning.py` +- `` sets the environment to tune the algorithm in. They are defined in the algo-specifc `scripts/config/train_[adversarial/imitation/preference_comparisons/rl].py` files. + +See the documentation of `scripts/tuning.py` and `scripts/parallel.py` for many other arguments that can be provided through the command line to change the tuning behavior. From 95110dc21673cbcaef04a432d0fd38b874ecb501 Mon Sep 17 00:00:00 2001 From: Mohammad Taufeeque <9taufeeque9@gmail.com> Date: Fri, 6 Oct 2023 00:30:24 +0530 Subject: [PATCH 51/54] Update README.md --- benchmarking/README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/benchmarking/README.md b/benchmarking/README.md index fb21a0223..7e7e7c652 100644 --- a/benchmarking/README.md +++ b/benchmarking/README.md @@ -1,22 +1,22 @@ # Benchmarking imitation -This directory contains sacred configuration files for benchmarking imitation's algorithms. For v0.3.2, these correspond to the hyperparameters used in the paper [imitation: Clean Imitation Learning Implementations](https://www.rocamonde.com/publication/gleave-imitation-2022/). +The `src/imitation/scripts/config/tuned_hps` directory provides the tuned hyperparameter configs for benchmarking imitation. For v0.4.0, these correspond to the hyperparameters used in the paper [imitation: Clean Imitation Learning Implementations](https://www.rocamonde.com/publication/gleave-imitation-2022/). -Configuration files can be loaded either from the CLI or from the Python API. The examples below assume that your current working directory is the root of the `imitation` repository. This is not necessarily the case and you should adjust your paths accordingly. +Configuration files can be loaded either from the CLI or from the Python API. ## CLI ```bash -python -m imitation.scripts. with src/imitation/config/tuned_hps/.json +python -m imitation.scripts. with _ ``` -`train_script` can be either 1) `train_imitation` with `algo` as `bc` or `dagger` or 2) `train_adversarial` with `algo` as `gail` or `airl`. +`train_script` can be either 1) `train_imitation` with `algo` as `bc` or `dagger` or 2) `train_adversarial` with `algo` as `gail` or `airl`. The `env` can be either of `seals_ant`, `seals_half_cheetah`, `seals_hopper`, `seals_swimmer`, or `seals_walker`. The hyperparameters for other environments are not tuned yet. You can either the tuned hyperparameter for any of the other environments or tune the hyperparameters using the `tuning` script. ## Python ```python ... from imitation.scripts. import -.run(command_name="", named_configs=["src/imitation/config/tuned_hps/.json"]) +.run(command_name="", named_configs=["_"]) ``` # Tuning Hyperparameters @@ -31,12 +31,12 @@ The tuning script proceeds in two phases: To use it with the default search space: ```bash -python src/imitation/scripts/tuning.py with 'parallel_run_config.base_named_configs=[""]' +python -m imitation.scripts.tuning with 'parallel_run_config.base_named_configs=[""]' ``` In this command: - `` provides the default search space and settings for the specific algorithm, which is defined in the `scripts/config/tuning.py` -- `` sets the environment to tune the algorithm in. They are defined in the algo-specifc `scripts/config/train_[adversarial/imitation/preference_comparisons/rl].py` files. +- `` sets the environment to tune the algorithm in. They are defined in the algo-specifc `scripts/config/train_[adversarial/imitation/preference_comparisons/rl].py` files. For the already tuned environments, use the `_` named configs here. See the documentation of `scripts/tuning.py` and `scripts/parallel.py` for many other arguments that can be provided through the command line to change the tuning behavior. From 10ec8a2fe003f0bdc7e9440a6bd83f1fc43fed25 Mon Sep 17 00:00:00 2001 From: ZiyueWang25 Date: Thu, 5 Oct 2023 20:06:30 -0700 Subject: [PATCH 52/54] mce_irl_train --- src/imitation/scripts/train_imitation.py | 64 ++++++++++++++++++++++-- tests/scripts/test_scripts.py | 6 +++ 2 files changed, 66 insertions(+), 4 deletions(-) diff --git a/src/imitation/scripts/train_imitation.py b/src/imitation/scripts/train_imitation.py index c47ed29bd..6c03684f5 100644 --- a/src/imitation/scripts/train_imitation.py +++ b/src/imitation/scripts/train_imitation.py @@ -1,21 +1,30 @@ """Trains DAgger on synthetic demonstrations generated from an expert policy.""" +from functools import partial import logging import os.path as osp import pathlib from typing import Any, Dict, Mapping, Optional, Sequence, cast + import numpy as np +import torch as th from sacred.observers import FileStorageObserver - -from imitation.algorithms import dagger as dagger_algorithm -from imitation.algorithms import sqil as sqil_algorithm +from seals import base_envs +from seals.diagnostics.cliff_world import CliffWorldEnv +from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv + +from imitation.algorithms import ( + dagger as dagger_algorithm, + sqil as sqil_algorithm, + mce_irl as mceirl_algorithm, +) from imitation.data import rollout, types from imitation.scripts.config.train_imitation import train_imitation_ex from imitation.scripts.ingredients import bc as bc_ingredient from imitation.scripts.ingredients import demonstrations, environment, expert from imitation.scripts.ingredients import logging as logging_ingredient -from imitation.scripts.ingredients import policy_evaluation +from imitation.scripts.ingredients import policy_evaluation, reward from imitation.util import util logger = logging.getLogger(__name__) @@ -185,6 +194,53 @@ def sqil( return stats +@train_imitation_ex.command +def mceirl( + mceirl: Mapping[str, Any], + optimizer_cls: th.optim.Optimizer, # not sure + optimizer_kwargs: Mapping[str, Any], + env_kwargs: Mapping[str, Any], + num_vec: int, + parallel: bool, + _run, + _rnd: np.random.Generator, +) -> Mapping[str, Mapping[str, float]]: + custom_logger, log_dir = logging_ingredient.setup_logging() + expert_trajs = demonstrations.get_expert_trajectories() + env_creator = partial(CliffWorldEnv, **env_kwargs) + env = env_creator() + + env_fns = [lambda: base_envs.ExposePOMDPStateWrapper(env_creator())] * num_vec + # This is just a vectorized environment because `generate_trajectories` expects one + if parallel: + # See GH hill-a/stable-baselines issue #217 + state_venv = SubprocVecEnv(env_fns, start_method="forkserver") + else: + state_venv = DummyVecEnv(env_fns) + + reward_net = reward.make_reward_net(state_venv) + mceirl_trainer = mceirl_algorithm.MCEIRL( + env=env, + demonstrations=expert_trajs, + reward_net=reward_net, + rng=_rnd, + optimizer_cls=optimizer_cls, + optimizer_kwargs=optimizer_kwargs, + discount=mceirl["discount"], + linf_eps=mceirl["linf_eps"], + grad_l2_eps=mceirl["grad_l2_eps"], + log_interval=mceirl["log_interval"], + custom_logger=custom_logger, + ) + mceirl_trainer.train( + max_iter=int(mceirl["max_iter"]), + ) + util.save_policy(mceirl_trainer.policy, policy_path=osp.join(log_dir, "final.th")) + imit_stats = policy_evaluation.eval_policy(mceirl_trainer.policy, state_venv) + stats = _collect_stats(imit_stats, expert_trajs) + return stats + + def main_console(): observer_path = pathlib.Path.cwd() / "output" / "sacred" / "train_imitation" observer = FileStorageObserver(observer_path) diff --git a/tests/scripts/test_scripts.py b/tests/scripts/test_scripts.py index ae39116e7..8dc748480 100644 --- a/tests/scripts/test_scripts.py +++ b/tests/scripts/test_scripts.py @@ -425,6 +425,12 @@ def test_train_bc_warmstart(tmpdir): assert isinstance(run_warmstart.result, dict) +def test_train_mceirl_main(mceirl_config): + run = train_imitation.train_imitation_ex.run(**mceirl_config) + assert run.status == "COMPLETED" + assert isinstance(run.result, dict) + + def test_train_sqil_main(sqil_config): # NOTE: Having four different expert types as in bc might be overkill for sqil run = train_imitation.train_imitation_ex.run(**sqil_config) From 7436784aa62291214bb899e3b1b06f48e703e385 Mon Sep 17 00:00:00 2001 From: ZiyueWang25 Date: Fri, 6 Oct 2023 15:44:17 -0700 Subject: [PATCH 53/54] add train_mce_irl script --- src/imitation/scripts/config/train_mce_irl.py | 48 ++++++++++ src/imitation/scripts/train_imitation.py | 64 +------------- src/imitation/scripts/train_mce_irl.py | 88 +++++++++++++++++++ 3 files changed, 140 insertions(+), 60 deletions(-) create mode 100644 src/imitation/scripts/config/train_mce_irl.py create mode 100644 src/imitation/scripts/train_mce_irl.py diff --git a/src/imitation/scripts/config/train_mce_irl.py b/src/imitation/scripts/config/train_mce_irl.py new file mode 100644 index 000000000..fb3aca2b1 --- /dev/null +++ b/src/imitation/scripts/config/train_mce_irl.py @@ -0,0 +1,48 @@ +"""Configuration for imitation.scripts.train_mce_irl.""" +import sacred +from torch import nn +import torch as th + +from imitation.scripts.ingredients import environment +from imitation.scripts.ingredients import logging as logging_ingredient +from imitation.scripts.ingredients import policy_evaluation, reward, rl + +train_mce_irl_ex = sacred.Experiment( + "train_mce_irl", + ingredients=[ + logging_ingredient.logging_ingredient, + environment.environment_ingredient, + reward.reward_ingredient, + rl.rl_ingredient, + policy_evaluation.policy_evaluation_ingredient, + ], +) + + +MUJOCO_SHARED_LOCALS = dict(rl=dict(rl_kwargs=dict(ent_coef=0.1))) +ANT_SHARED_LOCALS = dict( + total_timesteps=int(3e7), + rl=dict(batch_size=16384), +) + + +@train_mce_irl_ex.config +def train_defaults(): + mceirl = { + "discount": 1, + "linf_eps": 0.001, + "grad_l2_eps": 0.0001, + "log_interval": 100, + } + optimizer_cls = th.optim.Adam + optimizer_kwargs = dict( + lr=4e-4, + ) + env_kwargs = { + "height": 4, + "horizon": 40, + "width": 7, + "use_xy_obs": True, + } + num_vec = 8 # number of environments in VecEnv + parallel = False diff --git a/src/imitation/scripts/train_imitation.py b/src/imitation/scripts/train_imitation.py index 6c03684f5..2c280cc46 100644 --- a/src/imitation/scripts/train_imitation.py +++ b/src/imitation/scripts/train_imitation.py @@ -1,24 +1,15 @@ """Trains DAgger on synthetic demonstrations generated from an expert policy.""" -from functools import partial import logging import os.path as osp import pathlib -from typing import Any, Dict, Mapping, Optional, Sequence, cast - +from typing import Any, Dict, Mapping, Optional, Sequence, Type, cast import numpy as np -import torch as th from sacred.observers import FileStorageObserver -from seals import base_envs -from seals.diagnostics.cliff_world import CliffWorldEnv -from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv - -from imitation.algorithms import ( - dagger as dagger_algorithm, - sqil as sqil_algorithm, - mce_irl as mceirl_algorithm, -) + +from imitation.algorithms import dagger as dagger_algorithm +from imitation.algorithms import sqil as sqil_algorithm from imitation.data import rollout, types from imitation.scripts.config.train_imitation import train_imitation_ex from imitation.scripts.ingredients import bc as bc_ingredient @@ -194,53 +185,6 @@ def sqil( return stats -@train_imitation_ex.command -def mceirl( - mceirl: Mapping[str, Any], - optimizer_cls: th.optim.Optimizer, # not sure - optimizer_kwargs: Mapping[str, Any], - env_kwargs: Mapping[str, Any], - num_vec: int, - parallel: bool, - _run, - _rnd: np.random.Generator, -) -> Mapping[str, Mapping[str, float]]: - custom_logger, log_dir = logging_ingredient.setup_logging() - expert_trajs = demonstrations.get_expert_trajectories() - env_creator = partial(CliffWorldEnv, **env_kwargs) - env = env_creator() - - env_fns = [lambda: base_envs.ExposePOMDPStateWrapper(env_creator())] * num_vec - # This is just a vectorized environment because `generate_trajectories` expects one - if parallel: - # See GH hill-a/stable-baselines issue #217 - state_venv = SubprocVecEnv(env_fns, start_method="forkserver") - else: - state_venv = DummyVecEnv(env_fns) - - reward_net = reward.make_reward_net(state_venv) - mceirl_trainer = mceirl_algorithm.MCEIRL( - env=env, - demonstrations=expert_trajs, - reward_net=reward_net, - rng=_rnd, - optimizer_cls=optimizer_cls, - optimizer_kwargs=optimizer_kwargs, - discount=mceirl["discount"], - linf_eps=mceirl["linf_eps"], - grad_l2_eps=mceirl["grad_l2_eps"], - log_interval=mceirl["log_interval"], - custom_logger=custom_logger, - ) - mceirl_trainer.train( - max_iter=int(mceirl["max_iter"]), - ) - util.save_policy(mceirl_trainer.policy, policy_path=osp.join(log_dir, "final.th")) - imit_stats = policy_evaluation.eval_policy(mceirl_trainer.policy, state_venv) - stats = _collect_stats(imit_stats, expert_trajs) - return stats - - def main_console(): observer_path = pathlib.Path.cwd() / "output" / "sacred" / "train_imitation" observer = FileStorageObserver(observer_path) diff --git a/src/imitation/scripts/train_mce_irl.py b/src/imitation/scripts/train_mce_irl.py new file mode 100644 index 000000000..41e25d862 --- /dev/null +++ b/src/imitation/scripts/train_mce_irl.py @@ -0,0 +1,88 @@ +"""Train Finite-horizon tabular Maximum Causal Entropy IRL. + +Can be used as a CLI script, or the `train_mce_irl` function +can be called directly. +""" + +from functools import partial +import logging +import pathlib +import os.path as osp +from typing import Any, Mapping, Type + + +import numpy as np +import torch as th +from sacred.observers import FileStorageObserver +from seals import base_envs +from seals.diagnostics.cliff_world import CliffWorldEnv +from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv + +from imitation.algorithms import mce_irl as mceirl_algorithm +from imitation.data import rollout +from imitation.scripts.config.train_mce_irl import train_mce_irl_ex +from imitation.scripts.ingredients import demonstrations +from imitation.scripts.ingredients import logging as logging_ingredient +from imitation.scripts.ingredients import policy_evaluation, reward +from imitation.util import util + +logger = logging.getLogger(__name__) + + +@train_mce_irl_ex.command +def train_mce_irl( + mceirl: Mapping[str, Any], + optimizer_cls: Type[th.optim.Optimizer], + optimizer_kwargs: Mapping[str, Any], + env_kwargs: Mapping[str, Any], + num_vec: int, + parallel: bool, + _run, + _rnd: np.random.Generator, +) -> Mapping[str, Mapping[str, float]]: + custom_logger, log_dir = logging_ingredient.setup_logging() + expert_trajs = demonstrations.get_expert_trajectories() + env_creator = partial(CliffWorldEnv, **env_kwargs) + env = env_creator() + + env_fns = [lambda: base_envs.ExposePOMDPStateWrapper(env_creator())] * num_vec + # This is just a vectorized environment because `generate_trajectories` expects one + if parallel: + # See GH hill-a/stable-baselines issue #217 + state_venv = SubprocVecEnv(env_fns, start_method="forkserver") + else: + state_venv = DummyVecEnv(env_fns) + + reward_net = reward.make_reward_net(state_venv) + mceirl_trainer = mceirl_algorithm.MCEIRL( + demonstrations=expert_trajs, + env=env, + reward_net=reward_net, + rng=_rnd, + optimizer_cls=optimizer_cls, + optimizer_kwargs=optimizer_kwargs, + discount=mceirl["discount"], + linf_eps=mceirl["linf_eps"], + grad_l2_eps=mceirl["grad_l2_eps"], + log_interval=mceirl["log_interval"], + custom_logger=custom_logger, + ) + mceirl_trainer.train(max_iter=int(mceirl["max_iter"])) + util.save_policy(mceirl_trainer.policy, policy_path=osp.join(log_dir, "final.th")) + th.save(reward_net, osp.join(log_dir, "reward_net.pt")) + imit_stats = policy_evaluation.eval_policy(mceirl_trainer.policy, state_venv) + return { + "imit_stats": imit_stats, + "expert_stats": rollout.rollout_stats(expert_trajs), + } + + +def main_console(): + observer_path = pathlib.Path.cwd() / "output" / "sacred" / "train_mce_irl" + observer = FileStorageObserver(observer_path) + train_mce_irl_ex.observers.append(observer) + train_mce_irl_ex.run_commandline() + + +if __name__ == "__main__": # pragma: no cover + main_console() From 1ac7848b71913e1f44c4966e82434055788a7ac3 Mon Sep 17 00:00:00 2001 From: ZiyueWang25 Date: Fri, 6 Oct 2023 15:50:12 -0700 Subject: [PATCH 54/54] small fix --- src/imitation/scripts/train_imitation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/imitation/scripts/train_imitation.py b/src/imitation/scripts/train_imitation.py index 2c280cc46..c47ed29bd 100644 --- a/src/imitation/scripts/train_imitation.py +++ b/src/imitation/scripts/train_imitation.py @@ -3,7 +3,7 @@ import logging import os.path as osp import pathlib -from typing import Any, Dict, Mapping, Optional, Sequence, Type, cast +from typing import Any, Dict, Mapping, Optional, Sequence, cast import numpy as np from sacred.observers import FileStorageObserver @@ -15,7 +15,7 @@ from imitation.scripts.ingredients import bc as bc_ingredient from imitation.scripts.ingredients import demonstrations, environment, expert from imitation.scripts.ingredients import logging as logging_ingredient -from imitation.scripts.ingredients import policy_evaluation, reward +from imitation.scripts.ingredients import policy_evaluation from imitation.util import util logger = logging.getLogger(__name__)