convert raw examples into StandardizedExample and provide stable train/val splits.
preprocess format
from benchmax.envs.types import StandardizedExample
@classmethod
def dataset_preprocess(cls, example, **kwargs) -> StandardizedExample:
return StandardizedExample(
prompt=example["question"],
ground_truth=example.get("answer"),
init_rollout_args={},
)
train/val split
from datasets import load_dataset as hf_load_dataset
def get_train_val_split(self, train_ratio: float = 0.7, seed: int = 42, **kwargs):
ds = hf_load_dataset("json", data_files=self._dataset_path)["train"].shuffle(seed=seed)
split_idx = int(len(ds) * train_ratio)
return ds.select(range(split_idx)), ds.select(range(split_idx, len(ds)))
guidance
- keep eval prompts representative of production usage.
- avoid leakage between train and eval.
- validate samples before launching long runs.