mirror of
https://github.com/langbot-app/LangBot.git
synced 2025-11-25 19:37:36 +08:00
feat: 不再预先计算前文token数而是在报错时提醒用户重置
This commit is contained in:
@@ -6,7 +6,7 @@ import traceback
|
||||
|
||||
from ..platform import manager as im_mgr
|
||||
from ..provider.session import sessionmgr as llm_session_mgr
|
||||
from ..provider.requester import modelmgr as llm_model_mgr
|
||||
from ..provider.modelmgr import modelmgr as llm_model_mgr
|
||||
from ..provider.sysprompt import sysprompt as llm_prompt_mgr
|
||||
from ..provider.tools import toolmgr as llm_tool_mgr
|
||||
from ..config import manager as config_mgr
|
||||
|
||||
@@ -9,7 +9,7 @@ import pydantic
|
||||
import mirai
|
||||
|
||||
from ..provider import entities as llm_entities
|
||||
from ..provider.requester import entities
|
||||
from ..provider.modelmgr import entities
|
||||
from ..provider.sysprompt import entities as sysprompt_entities
|
||||
from ..provider.tools import entities as tools_entities
|
||||
from ..platform import adapter as msadapter
|
||||
|
||||
@@ -10,7 +10,7 @@ from ...pipeline import pool, controller, stagemgr
|
||||
from ...plugin import manager as plugin_mgr
|
||||
from ...command import cmdmgr
|
||||
from ...provider.session import sessionmgr as llm_session_mgr
|
||||
from ...provider.requester import modelmgr as llm_model_mgr
|
||||
from ...provider.modelmgr import modelmgr as llm_model_mgr
|
||||
from ...provider.sysprompt import sysprompt as llm_prompt_mgr
|
||||
from ...provider.tools import toolmgr as llm_tool_mgr
|
||||
from ...platform import manager as im_mgr
|
||||
|
||||
@@ -51,28 +51,6 @@ class PreProcessor(stage.PipelineStage):
|
||||
query.prompt.messages = event_ctx.event.default_prompt
|
||||
query.messages = event_ctx.event.prompt
|
||||
|
||||
# 根据模型max_tokens剪裁
|
||||
max_tokens = min(query.use_model.max_tokens, self.ap.pipeline_cfg.data['submit-messages-tokens'])
|
||||
|
||||
test_messages = query.prompt.messages + query.messages + [query.user_message]
|
||||
|
||||
while await query.use_model.tokenizer.count_token(test_messages, query.use_model) > max_tokens:
|
||||
# 前文都pop完了,还是大于max_tokens,由于prompt和user_messages不能删减,报错
|
||||
if len(query.prompt.messages) == 0:
|
||||
return entities.StageProcessResult(
|
||||
result_type=entities.ResultType.INTERRUPT,
|
||||
new_query=query,
|
||||
user_notice='输入内容过长,请减少情景预设或者输入内容长度',
|
||||
console_notice='输入内容过长,请减少情景预设或者输入内容长度,或者增大配置文件中的 submit-messages-tokens 项(但不能超过所用模型最大tokens数)'
|
||||
)
|
||||
|
||||
query.messages.pop(0) # pop第一个肯定是role=user的
|
||||
# 继续pop到第二个role=user前一个
|
||||
while len(query.messages) > 0 and query.messages[0].role != 'user':
|
||||
query.messages.pop(0)
|
||||
|
||||
test_messages = query.prompt.messages + query.messages + [query.user_message]
|
||||
|
||||
return entities.StageProcessResult(
|
||||
result_type=entities.ResultType.CONTINUE,
|
||||
new_query=query
|
||||
|
||||
@@ -21,8 +21,6 @@ class ChatMessageHandler(handler.MessageHandler):
|
||||
) -> typing.AsyncGenerator[entities.StageProcessResult, None]:
|
||||
"""处理
|
||||
"""
|
||||
# 取session
|
||||
# 取conversation
|
||||
# 调API
|
||||
# 生成器
|
||||
|
||||
|
||||
@@ -7,9 +7,23 @@ from ...core import app
|
||||
from ...core import entities as core_entities
|
||||
from .. import entities as llm_entities
|
||||
|
||||
|
||||
preregistered_requesters: list[typing.Type[LLMAPIRequester]] = []
|
||||
|
||||
def requester_class(name: str):
|
||||
|
||||
def decorator(cls: typing.Type[LLMAPIRequester]) -> typing.Type[LLMAPIRequester]:
|
||||
cls.name = name
|
||||
preregistered_requesters.append(cls)
|
||||
return cls
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
class LLMAPIRequester(metaclass=abc.ABCMeta):
|
||||
"""LLM API请求器
|
||||
"""
|
||||
name: str = None
|
||||
|
||||
ap: app.Application
|
||||
|
||||
@@ -17,6 +17,7 @@ from ... import entities as llm_entities
|
||||
from ...tools import entities as tools_entities
|
||||
|
||||
|
||||
@api.requester_class("openai-chat-completion")
|
||||
class OpenAIChatCompletion(api.LLMAPIRequester):
|
||||
"""OpenAI ChatCompletion API 请求器"""
|
||||
|
||||
@@ -133,7 +134,10 @@ class OpenAIChatCompletion(api.LLMAPIRequester):
|
||||
except asyncio.TimeoutError:
|
||||
raise errors.RequesterError('请求超时')
|
||||
except openai.BadRequestError as e:
|
||||
raise errors.RequesterError(f'请求错误: {e.message}')
|
||||
if 'context_length_exceeded' in e.message:
|
||||
raise errors.RequesterError(f'上文过长,请重置会话: {e.message}')
|
||||
else:
|
||||
raise errors.RequesterError(f'请求参数错误: {e.message}')
|
||||
except openai.AuthenticationError as e:
|
||||
raise errors.RequesterError(f'无效的 api-key: {e.message}')
|
||||
except openai.NotFoundError as e:
|
||||
@@ -5,7 +5,7 @@ import typing
|
||||
import pydantic
|
||||
|
||||
from . import api
|
||||
from . import token, tokenizer
|
||||
from . import token
|
||||
|
||||
|
||||
class LLMModelInfo(pydantic.BaseModel):
|
||||
@@ -19,11 +19,7 @@ class LLMModelInfo(pydantic.BaseModel):
|
||||
|
||||
requester: api.LLMAPIRequester
|
||||
|
||||
tokenizer: 'tokenizer.LLMTokenizer'
|
||||
|
||||
tool_call_supported: typing.Optional[bool] = False
|
||||
|
||||
max_tokens: typing.Optional[int] = 2048
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
@@ -3,9 +3,8 @@ from __future__ import annotations
|
||||
from . import entities
|
||||
from ...core import app
|
||||
|
||||
from .apis import chatcmpl
|
||||
from . import token
|
||||
from .tokenizers import tiktoken
|
||||
from .apis import chatcmpl
|
||||
|
||||
|
||||
class ModelManager:
|
||||
@@ -30,9 +29,7 @@ class ModelManager:
|
||||
async def initialize(self):
|
||||
openai_chat_completion = chatcmpl.OpenAIChatCompletion(self.ap)
|
||||
await openai_chat_completion.initialize()
|
||||
openai_token_mgr = token.TokenManager(self.ap, list(self.ap.provider_cfg.data['openai-config']['api-keys']))
|
||||
|
||||
tiktoken_tokenizer = tiktoken.Tiktoken(self.ap)
|
||||
openai_token_mgr = token.TokenManager("openai", list(self.ap.provider_cfg.data['openai-config']['api-keys']))
|
||||
|
||||
model_list = [
|
||||
entities.LLMModelInfo(
|
||||
@@ -40,48 +37,36 @@ class ModelManager:
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=True,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=4096
|
||||
),
|
||||
entities.LLMModelInfo(
|
||||
name="gpt-3.5-turbo-1106",
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=True,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=16385
|
||||
),
|
||||
entities.LLMModelInfo(
|
||||
name="gpt-3.5-turbo-16k",
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=True,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=16385
|
||||
),
|
||||
entities.LLMModelInfo(
|
||||
name="gpt-3.5-turbo-0613",
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=True,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=4096
|
||||
),
|
||||
entities.LLMModelInfo(
|
||||
name="gpt-3.5-turbo-16k-0613",
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=True,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=16385
|
||||
),
|
||||
entities.LLMModelInfo(
|
||||
name="gpt-3.5-turbo-0301",
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=True,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=4096
|
||||
)
|
||||
]
|
||||
|
||||
@@ -93,64 +78,48 @@ class ModelManager:
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=True,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=128000
|
||||
),
|
||||
entities.LLMModelInfo(
|
||||
name="gpt-4-turbo-preview",
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=True,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=128000
|
||||
),
|
||||
entities.LLMModelInfo(
|
||||
name="gpt-4-1106-preview",
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=True,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=128000
|
||||
),
|
||||
entities.LLMModelInfo(
|
||||
name="gpt-4-vision-preview",
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=True,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=128000
|
||||
),
|
||||
entities.LLMModelInfo(
|
||||
name="gpt-4",
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=True,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=8192
|
||||
),
|
||||
entities.LLMModelInfo(
|
||||
name="gpt-4-0613",
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=True,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=8192
|
||||
),
|
||||
entities.LLMModelInfo(
|
||||
name="gpt-4-32k",
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=True,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=32768
|
||||
),
|
||||
entities.LLMModelInfo(
|
||||
name="gpt-4-32k-0613",
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=True,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=32768
|
||||
)
|
||||
]
|
||||
|
||||
@@ -163,8 +132,6 @@ class ModelManager:
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=False,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=8192
|
||||
),
|
||||
entities.LLMModelInfo(
|
||||
name="OneAPI/chatglm_pro",
|
||||
@@ -172,8 +139,6 @@ class ModelManager:
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=False,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=128000
|
||||
),
|
||||
entities.LLMModelInfo(
|
||||
name="OneAPI/chatglm_std",
|
||||
@@ -181,8 +146,6 @@ class ModelManager:
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=False,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=128000
|
||||
),
|
||||
entities.LLMModelInfo(
|
||||
name="OneAPI/chatglm_lite",
|
||||
@@ -190,8 +153,6 @@ class ModelManager:
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=False,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=128000
|
||||
),
|
||||
entities.LLMModelInfo(
|
||||
name="OneAPI/qwen-v1",
|
||||
@@ -199,8 +160,6 @@ class ModelManager:
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=False,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=6000
|
||||
),
|
||||
entities.LLMModelInfo(
|
||||
name="OneAPI/qwen-plus-v1",
|
||||
@@ -208,8 +167,6 @@ class ModelManager:
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=False,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=30000
|
||||
),
|
||||
entities.LLMModelInfo(
|
||||
name="OneAPI/ERNIE-Bot",
|
||||
@@ -217,8 +174,6 @@ class ModelManager:
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=False,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=2000
|
||||
),
|
||||
entities.LLMModelInfo(
|
||||
name="OneAPI/ERNIE-Bot-turbo",
|
||||
@@ -226,8 +181,6 @@ class ModelManager:
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=False,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=7000
|
||||
),
|
||||
entities.LLMModelInfo(
|
||||
name="OneAPI/gemini-pro",
|
||||
@@ -235,8 +188,6 @@ class ModelManager:
|
||||
token_mgr=openai_token_mgr,
|
||||
requester=openai_chat_completion,
|
||||
tool_call_supported=False,
|
||||
tokenizer=tiktoken_tokenizer,
|
||||
max_tokens=30720
|
||||
),
|
||||
]
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
import typing
|
||||
|
||||
from ...core import app
|
||||
from .. import entities as llm_entities
|
||||
from . import entities
|
||||
|
||||
|
||||
class LLMTokenizer(metaclass=abc.ABCMeta):
|
||||
"""LLM分词器抽象类"""
|
||||
|
||||
ap: app.Application
|
||||
|
||||
def __init__(self, ap: app.Application):
|
||||
self.ap = ap
|
||||
|
||||
async def initialize(self):
|
||||
"""初始化分词器
|
||||
"""
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
async def count_token(
|
||||
self,
|
||||
messages: list[llm_entities.Message],
|
||||
model: entities.LLMModelInfo
|
||||
) -> int:
|
||||
pass
|
||||
@@ -1,30 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import tiktoken
|
||||
|
||||
from .. import tokenizer
|
||||
from ... import entities as llm_entities
|
||||
from .. import entities
|
||||
|
||||
|
||||
class Tiktoken(tokenizer.LLMTokenizer):
|
||||
"""TikToken分词器
|
||||
"""
|
||||
|
||||
async def count_token(
|
||||
self,
|
||||
messages: list[llm_entities.Message],
|
||||
model: entities.LLMModelInfo
|
||||
) -> int:
|
||||
try:
|
||||
encoding = tiktoken.encoding_for_model(model.name)
|
||||
except KeyError:
|
||||
# print("Warning: model not found. Using cl100k_base encoding.")
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
|
||||
num_tokens = 0
|
||||
for message in messages:
|
||||
num_tokens += len(encoding.encode(message.role))
|
||||
num_tokens += len(encoding.encode(message.content if message.content is not None else ''))
|
||||
num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
|
||||
return num_tokens
|
||||
Reference in New Issue
Block a user