fix auto loading gpt2 tokenizer (#5279)

2024-01-18 14:08:29 +08:00 · 2024-01-18 14:08:29 +08:00 · 32cb74493a
parent 5d9a0ae75b
commit 32cb74493a
1 changed files with 13 additions and 0 deletions
--- a/applications/ColossalQA/colossalqa/local/llm.py
+++ b/applications/ColossalQA/colossalqa/local/llm.py
@ -136,6 +136,19 @@ class ColossalLLM(LLM):
        """Get the identifying parameters."""
        return {"n": self.n}
    def get_token_ids(self, text: str) -> List[int]:
        """Return the ordered ids of the tokens in a text.
        Args:
            text: The string input to tokenize.
        Returns:
            A list of ids corresponding to the tokens in the text, in order they occur
                in the text.
        """
        # use the colossal llm's tokenizer instead of langchain's cached GPT2 tokenizer
        return self.api.tokenizer.encode(text)
 class VllmLLM(LLM):
    """