hotfix citation

2024-07-11 06:05:05 +00:00 · 2024-07-11 06:05:05 +00:00 · 115c4cc5a4
parent e7a8634636
commit 115c4cc5a4
1 changed files with 31 additions and 1 deletions
--- a/applications/ColossalChat/README.md
+++ b/applications/ColossalChat/README.md
@ -529,7 +529,7 @@ Coati is developed by ColossalAI Team:
 - [Fazzie](https://fazzie-key.cool/about/index.html) Contributing to the algorithm and development for SFT.
 - [ofey404](https://github.com/ofey404) Contributing to both front-end and back-end development.
 - [Wenhao Chen](https://github.com/CWHer) Contributing to subsequent code enhancements and performance improvements.
- [Anbang Ye](https://github.com/YeAnbang) Contributing to the refactored PPO version with updated acceleration framework. Add support for DPO, SimPO.
+- [Anbang Ye](https://github.com/YeAnbang) Contributing to the refactored PPO version with updated acceleration framework. Add support for DPO, SimPO, ORPO.
 The PhD student from [(HPC-AI) Lab](https://ai.comp.nus.edu.sg/) also contributed a lot to this project.
 - [Zangwei Zheng](https://github.com/zhengzangw)
@ -579,6 +579,36 @@ We also appreciate the valuable suggestions provided by [Jian Hu](https://github
  journal = {GitHub repository},
  howpublished = {\url{https://github.com/XueFuzhao/InstructionWild}},
 }
@misc{meng2024simposimplepreferenceoptimization,
      title={SimPO: Simple Preference Optimization with a Reference-Free Reward},
      author={Yu Meng and Mengzhou Xia and Danqi Chen},
      year={2024},
      eprint={2405.14734},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2405.14734},
 }
@misc{rafailov2023directpreferenceoptimizationlanguage,
      title={Direct Preference Optimization: Your Language Model is Secretly a Reward Model},
      author={Rafael Rafailov and Archit Sharma and Eric Mitchell and Stefano Ermon and Christopher D. Manning and Chelsea Finn},
      year={2023},
      eprint={2305.18290},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2305.18290},
 }
@misc{hong2024orpomonolithicpreferenceoptimization,
      title={ORPO: Monolithic Preference Optimization without Reference Model},
      author={Jiwoo Hong and Noah Lee and James Thorne},
      year={2024},
      eprint={2403.07691},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2403.07691},
 }
 ```
 ## Licenses