Developing foundational world models is a key research direction for embod ied intelligence, with the ability to adapt to non-stationary environments being a crucial criterion. In this work, we introduce a new formalism, Hidden Parameter-POMDP, designed for control with adaptive world models. We demonstrate that this approach enables learning robust behaviors across a variety of non-stationary RL benchmarks. Additionally, this formalism effectively learns task abstractions in an unsupervised manner, resulting in structured, task-aware latent spaces.
@inproceedings{gospodinov2024adaptive,
title={Adaptive World Models: Learning Behaviors by Latent Imagination Under Non-Stationarity},
author={Emiliyan Gospodinov and Vaisakh Shaj and Philipp Becker and Stefan Geyer and Gerhard Neumann},
booktitle={Adaptive Foundation Models: Evolving AI for Personalized and Efficient Learning},
year={2024},
url={https://openreview.net/forum?id=VMIbgzRw78}
}