This repository provides training, inference, and evaluation instructions for RAPID.
# Create conda environment
conda create -n rapid python==3.11
conda activate rapid
# Install dependencies
cd verl-main
pip install -r requirements.txt
# Install flash-attention (see: https://github.com/Dao-AILab/flash-attention/releases)
# Follow the instructions for your CUDA version
# Install verl-main
pip install -e .
# Install VLMEvalKit
cd ../VLMEvalKit
pip install -e .
pip install transformers==4.51.1Download ViRL39K dataset and preprocess it:
python verl-main/examples/data_preprocess/virl39k_pre.py \
--src-parquet /cache/data/datasets/ViRL39K/39Krelease.parquet \
--tgt-dir /cache/data/huggingface_datasets/virl39k_hf_no_deepscaler
python verl-main/examples/data_preprocess/virl39k.py \
--src-hf-dataset /cache/data/huggingface_datasets/virl39k_hf_no_deepscaler/ \
--tgt-parquet /cache/data/huggingface_datasets/virl39k_no_deepscaler_caption.parquet
python verl-main/examples/data_preprocess/virl39k_qa.py \
--src-hf-dataset /cache/data/huggingface_datasets/virl39k_hf_no_deepscaler/ \
--tgt-parquet /cache/data/huggingface_datasets/virl39k_no_deepscaler_qa.parquetbash verl-main/examples/grpo_trainer/grpo_7b.shbash verl-main/examples/grpo_trainer/vpo_7b.shbash verl-main/scripts/convert2hf.shbash VLMEvalKit/run_inference.sh.
├── verl-main/
│ ├── examples/
│ ├── scripts/
│ └── ...
└── VLMEvalKit/
├── outputs/
└── ...