diff --git a/README.md b/README.md index d7552013..dc4e394e 100644 --- a/README.md +++ b/README.md @@ -164,8 +164,6 @@ Please refer to the [documentation](https://internrobotics.github.io/user_guide/ | InternVLA-N1 (Dual System) with NavDP* | RGB-D | 4.70 | 59.7 | 50.6 | 69.7 | | InternVLA-N1 (Dual System) DualVLN | RGB | **4.58** | **61.4** | **51.8** | **70.0** | ---- - #### VLN-PE Benchmarks **📍 Flash Controller on R2R Unseen** @@ -203,8 +201,6 @@ Please refer to the [documentation](https://internrobotics.github.io/user_guide/ | ViPlanner | 54.3 | 52.5 | | NavDP | **65.7** | **60.7** | ---- - ## 🔧 Customization Please refer to the [tutorial](https://internrobotics.github.io/user_guide/internnav/tutorials/index.html) for advanced usage of InternNav, including customization of datasets, models and experimental settings. diff --git a/docs/changelog.md b/docs/changelog.md new file mode 100644 index 00000000..889554c1 --- /dev/null +++ b/docs/changelog.md @@ -0,0 +1,67 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +## Unreleased + +Upcoming changes will be tracked in this section. + +## Changelog of v0.3.0 (2026/01/05) +### Highlights +- Support training of InternVLA-N1 and evaluation on RxR (#184) +- Support training and evaluation for the [VL-LN benchmark](https://arxiv.org/html/2512.22342v2) (#193, #198) +- Add a new flash without collision controller (#189) + +### New Features +- Add training code for InternVLA-N1 (#184) +- Support evaluation on the RxR dataset (#184) +- Add training code for the VL-LN benchmark baseline (#198) +- Support evaluation on VL-LN benchmark (#193) +- Add a Flash-without-Collisoin controller (#189) + +### Improvements +- Decouple System 2 and Dual-System evaluation functions in the Habitat evaluator for better readability (#184) +- Update InternVLA-N1 agent in VLN-PE to align with the updated InternVLA-N1 policy interface (#184) +- Enhance the Habitat evaluation pipeline to handle NaN values in results (#217) +- Update the README to include community tutorials (#217) + +### Bug Fixes +- Fix the version of diffusers in the requirements (#184) +- Fix the result JSON saving path in VLN-PE (#217) +- Fix a bug in RxR evaluation result collection (#217) +- Removed legacy code in scripts/demo (#217) + +### Contributors +@kellyiss @DuangZhu @0309hws @kew6688 + +Full Changelog: https://github.com/InternRobotics/InternNav/compare/release/v0.2.0...release/v0.3.0 + +## Changelog of v0.2.0 (2025/12/04) +### Highlights +- Support distributed evaluation for VLN-PE, reducing full benchmark runtime to ~1.6 hours using 16 GPUs (≈13× speedup over single-GPU eval) (#168) +- Enhance Habitat evaluation flow with `DistributedEvaluator` and `HabitatEnv` integrated into the InternNav framework (#168) +- Support install flags for dependency isolation: `[habitat]`, `[isaac]`, `[model]` (#135) + +### New Features +- Support distributed evaluation for VLN-PE (#168) +- Support a unified evaluation script `eval.py`, with new Habitat evaluation configs in `scripts/eval/configs` (#168) +- Support install flags for dependency isolation (#168) + +### Improvements +- Add `HabitatEnv` with episode pool management (#168) +- Update `InternUtopiaEnv` for distributed execution and episode pool management (#168) +- Enhance `episode_loader` in VLN-PE with new distributed mode compatibility (#168) +- Update `data_collector` to support progress checkpointing and incremental result aggregation in distributed evaluation. (#168) + +### Bug Fixes +- Fix logger disabled after Isaac Sim initialization during evaluator bootstrap (#168) +- Fix dataloader bug where `revise_one_data()` was incorrectly applied to all datasets (#168) +- Fix visualization images dimension mismatch during InternVLA-N1 evaluation (#168) +- Fix distributed evaluation crash in rdp policy (#168) +- Fix GitHub CI tests (#168) + +### Contributors +A total of 3 developers contributed to this release. +@kew6688, @Gariscat, @yuqiang-yang + +Full changelog: https://github.com/InternRobotics/InternNav/compare/release/v0.1.0...release/v0.2.0 diff --git a/internnav/dataset/internvla_n1_lerobot_dataset.py b/internnav/dataset/internvla_n1_lerobot_dataset.py index d1c9ebdb..8c2d9084 100644 --- a/internnav/dataset/internvla_n1_lerobot_dataset.py +++ b/internnav/dataset/internvla_n1_lerobot_dataset.py @@ -1371,7 +1371,7 @@ def __getitem__(self, i): def make_supervised_data_module(tokenizer: transformers.PreTrainedTokenizer, data_args) -> Dict: """Make dataset and collator for supervised fine-tuning.""" train_datasets = [] - if data_args.iion_dataset_use: + if data_args.iign_dataset_use: train_datasets.append(VLLNDataset(tokenizer=tokenizer, data_args=data_args)) if data_args.vln_dataset_use: train_datasets.append(NavPixelGoalDataset(tokenizer=tokenizer, data_args=data_args)) diff --git a/internnav/dataset/vlln_lerobot_dataset.py b/internnav/dataset/vlln_lerobot_dataset.py index 40590531..44c64d10 100644 --- a/internnav/dataset/vlln_lerobot_dataset.py +++ b/internnav/dataset/vlln_lerobot_dataset.py @@ -15,21 +15,21 @@ from .rope2d import get_rope_index_2, get_rope_index_25 # Define placeholders for dataset paths -IION_split1 = { +IIGN_split1 = { "data_path": "projects/VL-LN-Bench/traj_data/mp3d_split1", "height": 125, "pitch_1": 0, "pitch_2": 30, } -IION_split2 = { +IIGN_split2 = { "data_path": "projects/VL-LN-Bench/traj_data/mp3d_split2", "height": 125, "pitch_1": 0, "pitch_2": 30, } -IION_split3 = { +IIGN_split3 = { "data_path": "projects/VL-LN-Bench/traj_data/mp3d_split3", "height": 125, "pitch_1": 0, @@ -37,9 +37,9 @@ } data_dict = { - "iion_split1": IION_split1, - "iion_split2": IION_split2, - "iion_split3": IION_split3, + "iign_split1": IIGN_split1, + "iign_split2": IIGN_split2, + "iign_split3": IIGN_split3, } IGNORE_INDEX = -100 @@ -55,14 +55,14 @@ class VLLNDataset(Dataset): """ - Dataset for 'Vision-Language'-'Language-Navigation' (VL-LN) / IION-style training. + Dataset for 'Vision-Language'-'Language-Navigation' (VL-LN) / IIGN-style training. Args: tokenizer (transformers.PreTrainedTokenizer): Tokenizer used to encode the chat template and produce `input_ids` / `labels`. data_args: A config-like object that must provide at least: - - iion_dataset_use (str): comma-separated dataset names, optionally - with sampling rate suffix like `iion_split1%50`. + - iign_dataset_use (str): comma-separated dataset names, optionally + with sampling rate suffix like `iign_split1%50`. - model_type (str): decides which rope-index function to use. - sample_step (int): stride for sampling start frames. - pixel_goal_only (bool): whether to keep only pixel-goal samples. @@ -74,7 +74,7 @@ class VLLNDataset(Dataset): def __init__(self, tokenizer: transformers.PreTrainedTokenizer, data_args): super(VLLNDataset, self).__init__() - dataset = data_args.iion_dataset_use.split(",") + dataset = data_args.iign_dataset_use.split(",") dataset_list = data_list(dataset) rank0_print(f"Loading datasets: {dataset_list}") self.video_max_total_pixels = getattr(data_args, "video_max_total_pixels", 1664 * 28 * 28) diff --git a/internnav/trainer/internvla_n1_argument.py b/internnav/trainer/internvla_n1_argument.py index bc8e0361..5dd6e56d 100644 --- a/internnav/trainer/internvla_n1_argument.py +++ b/internnav/trainer/internvla_n1_argument.py @@ -29,7 +29,7 @@ class DataArguments: video_min_frame_pixels: int = field(default=4 * 28 * 28) vln_dataset_use: str = field(default="") - iion_dataset_use: str = field(default="") + iign_dataset_use: str = field(default="") sample_step: int = field(default=4) num_history: Optional[int] = field(default=8) predict_step_num: Optional[int] = field(default=32) diff --git a/scripts/train/qwenvl_train/train_system2_vlln.sh b/scripts/train/qwenvl_train/train_system2_vlln.sh index 2ac79998..14bf9c20 100644 --- a/scripts/train/qwenvl_train/train_system2_vlln.sh +++ b/scripts/train/qwenvl_train/train_system2_vlln.sh @@ -27,7 +27,7 @@ max_pixels=313600 min_pixels=3136 # Dataset configuration (replace with public dataset names) -iion_datasets=iion_split1,iion_split2 #,iion_split3 +iign_datasets=iign_split1,iign_split2 #,iign_split3 # Output configuration run_name=InternVLA-N1-vlln @@ -38,7 +38,7 @@ srun torchrun --nnodes=$SLURM_NNODES --nproc_per_node=8 \ internnav/trainer/internvla_vlln_trainer.py \ --deepspeed ${deepspeed} \ --model_name_or_path "${llm}" \ - --iion_dataset_use ${iion_datasets} \ + --iign_dataset_use ${iign_datasets} \ --data_flatten False \ --tune_mm_vision True \ --tune_mm_mlp True \