@inproceedings{zhang2025orso,title={ORSO: Accelerating Reward Design via Online Reward Selection and Policy Optimization},author={Zhang, Chen Bo Calvin and Hong, Zhang-Wei and Pacchiano, Aldo and Agrawal, Pulkit},booktitle={The Thirteenth International Conference on Learning Representations},year={2025},}
arXiv
Humanity’s Last Exam
Long Phan, Alice Gatti, Ziwen Han, Nathaniel Li, Josephina Hu, Hugh Zhang, Chen Bo Calvin Zhang, Mohamed Shaaban, John Ling, and 2 more authors
@article{phan2025humanity,title={Humanity's Last Exam},author={Phan, Long and Gatti, Alice and Han, Ziwen and Li, Nathaniel and Hu, Josephina and Zhang, Hugh and Zhang, Chen Bo Calvin and Shaaban, Mohamed and Ling, John and Shi, Sean and others},journal={arXiv preprint arXiv:2501.14249},year={2025},}
arXiv
SHADE-Arena: Evaluating Sabotage and Monitoring in LLM Agents
Jonathan Kutasov, Yuqi Sun, Paul Colognese, Teun Weij, Linda Petrini, Chen Bo Calvin Zhang, John Hughes, Xiang Deng, Henry Sleight, and 3 more authors
@article{kutasov2025shade,title={SHADE-Arena: Evaluating Sabotage and Monitoring in LLM Agents},author={Kutasov, Jonathan and Sun, Yuqi and Colognese, Paul and van der Weij, Teun and Petrini, Linda and Zhang, Chen Bo Calvin and Hughes, John and Deng, Xiang and Sleight, Henry and Tracy, Tyler and Shlegeris, Buck and Benton, Joe},journal={arXiv preprint arXiv:2506.15740},year={2025},}
2023
ICML
HIP-RL: Hallucinated Inputs for Preference-based Reinforcement Learning in Continuous Domains
Chen Bo Calvin Zhang, and Giorgia Ramponi
In ICML 2023 Workshop: The Many Facets of Preference-Based Learning, 2023
@inproceedings{zhang2023hip,title={HIP-RL: Hallucinated Inputs for Preference-based Reinforcement Learning in Continuous Domains},author={Zhang, Chen Bo Calvin and Ramponi, Giorgia},booktitle={ICML 2023 Workshop: The Many Facets of Preference-Based Learning},year={2023},}
arXiv
Zero-Shot Transfer in Imitation Learning
Alvaro Cauderan, Gauthier Boeshertz, Florian Schwarb, and Calvin Zhang
@article{cauderan2023zero,title={Zero-Shot Transfer in Imitation Learning},author={Cauderan, Alvaro and Boeshertz, Gauthier and Schwarb, Florian and Zhang, Calvin},journal={arXiv preprint arXiv:2310.06710},year={2023},}