Files
LangBot/pkg/provider/requester/tokenizers/tiktoken.py
2024-02-16 14:11:22 +08:00

29 lines
901 B
Python

from __future__ import annotations
import tiktoken
from .. import tokenizer
from ... import entities as llm_entities
from .. import entities
class Tiktoken(tokenizer.LLMTokenizer):
async def count_token(
self,
messages: list[llm_entities.Message],
model: entities.LLMModelInfo
) -> int:
try:
encoding = tiktoken.encoding_for_model(model.name)
except KeyError:
# print("Warning: model not found. Using cl100k_base encoding.")
encoding = tiktoken.get_encoding("cl100k_base")
num_tokens = 0
for message in messages:
num_tokens += len(encoding.encode(message.role))
num_tokens += len(encoding.encode(message.content if message.content is not None else ''))
num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
return num_tokens