diff --git a/pkg/core/app.py b/pkg/core/app.py index ed035e5d..0d726a44 100644 --- a/pkg/core/app.py +++ b/pkg/core/app.py @@ -6,7 +6,7 @@ import traceback from ..platform import manager as im_mgr from ..provider.session import sessionmgr as llm_session_mgr -from ..provider.requester import modelmgr as llm_model_mgr +from ..provider.modelmgr import modelmgr as llm_model_mgr from ..provider.sysprompt import sysprompt as llm_prompt_mgr from ..provider.tools import toolmgr as llm_tool_mgr from ..config import manager as config_mgr diff --git a/pkg/core/entities.py b/pkg/core/entities.py index 78bcf1fe..8bf1ff2e 100644 --- a/pkg/core/entities.py +++ b/pkg/core/entities.py @@ -9,7 +9,7 @@ import pydantic import mirai from ..provider import entities as llm_entities -from ..provider.requester import entities +from ..provider.modelmgr import entities from ..provider.sysprompt import entities as sysprompt_entities from ..provider.tools import entities as tools_entities from ..platform import adapter as msadapter diff --git a/pkg/core/stages/build_app.py b/pkg/core/stages/build_app.py index a6c0fe3c..09b4342b 100644 --- a/pkg/core/stages/build_app.py +++ b/pkg/core/stages/build_app.py @@ -10,7 +10,7 @@ from ...pipeline import pool, controller, stagemgr from ...plugin import manager as plugin_mgr from ...command import cmdmgr from ...provider.session import sessionmgr as llm_session_mgr -from ...provider.requester import modelmgr as llm_model_mgr +from ...provider.modelmgr import modelmgr as llm_model_mgr from ...provider.sysprompt import sysprompt as llm_prompt_mgr from ...provider.tools import toolmgr as llm_tool_mgr from ...platform import manager as im_mgr diff --git a/pkg/pipeline/preproc/preproc.py b/pkg/pipeline/preproc/preproc.py index c0eb92d6..cedc030f 100644 --- a/pkg/pipeline/preproc/preproc.py +++ b/pkg/pipeline/preproc/preproc.py @@ -51,28 +51,6 @@ class PreProcessor(stage.PipelineStage): query.prompt.messages = event_ctx.event.default_prompt query.messages = event_ctx.event.prompt - # 根据模型max_tokens剪裁 - max_tokens = min(query.use_model.max_tokens, self.ap.pipeline_cfg.data['submit-messages-tokens']) - - test_messages = query.prompt.messages + query.messages + [query.user_message] - - while await query.use_model.tokenizer.count_token(test_messages, query.use_model) > max_tokens: - # 前文都pop完了,还是大于max_tokens,由于prompt和user_messages不能删减,报错 - if len(query.prompt.messages) == 0: - return entities.StageProcessResult( - result_type=entities.ResultType.INTERRUPT, - new_query=query, - user_notice='输入内容过长,请减少情景预设或者输入内容长度', - console_notice='输入内容过长,请减少情景预设或者输入内容长度,或者增大配置文件中的 submit-messages-tokens 项(但不能超过所用模型最大tokens数)' - ) - - query.messages.pop(0) # pop第一个肯定是role=user的 - # 继续pop到第二个role=user前一个 - while len(query.messages) > 0 and query.messages[0].role != 'user': - query.messages.pop(0) - - test_messages = query.prompt.messages + query.messages + [query.user_message] - return entities.StageProcessResult( result_type=entities.ResultType.CONTINUE, new_query=query diff --git a/pkg/pipeline/process/handlers/chat.py b/pkg/pipeline/process/handlers/chat.py index b3e8fa18..33dedb04 100644 --- a/pkg/pipeline/process/handlers/chat.py +++ b/pkg/pipeline/process/handlers/chat.py @@ -21,8 +21,6 @@ class ChatMessageHandler(handler.MessageHandler): ) -> typing.AsyncGenerator[entities.StageProcessResult, None]: """处理 """ - # 取session - # 取conversation # 调API # 生成器 diff --git a/pkg/provider/requester/__init__.py b/pkg/provider/modelmgr/__init__.py similarity index 100% rename from pkg/provider/requester/__init__.py rename to pkg/provider/modelmgr/__init__.py diff --git a/pkg/provider/requester/api.py b/pkg/provider/modelmgr/api.py similarity index 65% rename from pkg/provider/requester/api.py rename to pkg/provider/modelmgr/api.py index 88ba78cd..da362468 100644 --- a/pkg/provider/requester/api.py +++ b/pkg/provider/modelmgr/api.py @@ -7,9 +7,23 @@ from ...core import app from ...core import entities as core_entities from .. import entities as llm_entities + +preregistered_requesters: list[typing.Type[LLMAPIRequester]] = [] + +def requester_class(name: str): + + def decorator(cls: typing.Type[LLMAPIRequester]) -> typing.Type[LLMAPIRequester]: + cls.name = name + preregistered_requesters.append(cls) + return cls + + return decorator + + class LLMAPIRequester(metaclass=abc.ABCMeta): """LLM API请求器 """ + name: str = None ap: app.Application diff --git a/pkg/provider/requester/apis/__init__.py b/pkg/provider/modelmgr/apis/__init__.py similarity index 100% rename from pkg/provider/requester/apis/__init__.py rename to pkg/provider/modelmgr/apis/__init__.py diff --git a/pkg/provider/requester/apis/chatcmpl.py b/pkg/provider/modelmgr/apis/chatcmpl.py similarity index 94% rename from pkg/provider/requester/apis/chatcmpl.py rename to pkg/provider/modelmgr/apis/chatcmpl.py index 2d520017..4965acf7 100644 --- a/pkg/provider/requester/apis/chatcmpl.py +++ b/pkg/provider/modelmgr/apis/chatcmpl.py @@ -17,6 +17,7 @@ from ... import entities as llm_entities from ...tools import entities as tools_entities +@api.requester_class("openai-chat-completion") class OpenAIChatCompletion(api.LLMAPIRequester): """OpenAI ChatCompletion API 请求器""" @@ -133,7 +134,10 @@ class OpenAIChatCompletion(api.LLMAPIRequester): except asyncio.TimeoutError: raise errors.RequesterError('请求超时') except openai.BadRequestError as e: - raise errors.RequesterError(f'请求错误: {e.message}') + if 'context_length_exceeded' in e.message: + raise errors.RequesterError(f'上文过长,请重置会话: {e.message}') + else: + raise errors.RequesterError(f'请求参数错误: {e.message}') except openai.AuthenticationError as e: raise errors.RequesterError(f'无效的 api-key: {e.message}') except openai.NotFoundError as e: diff --git a/pkg/provider/requester/entities.py b/pkg/provider/modelmgr/entities.py similarity index 76% rename from pkg/provider/requester/entities.py rename to pkg/provider/modelmgr/entities.py index d4c51d6f..277f125a 100644 --- a/pkg/provider/requester/entities.py +++ b/pkg/provider/modelmgr/entities.py @@ -5,7 +5,7 @@ import typing import pydantic from . import api -from . import token, tokenizer +from . import token class LLMModelInfo(pydantic.BaseModel): @@ -19,11 +19,7 @@ class LLMModelInfo(pydantic.BaseModel): requester: api.LLMAPIRequester - tokenizer: 'tokenizer.LLMTokenizer' - tool_call_supported: typing.Optional[bool] = False - max_tokens: typing.Optional[int] = 2048 - class Config: arbitrary_types_allowed = True diff --git a/pkg/provider/requester/errors.py b/pkg/provider/modelmgr/errors.py similarity index 100% rename from pkg/provider/requester/errors.py rename to pkg/provider/modelmgr/errors.py diff --git a/pkg/provider/requester/modelmgr.py b/pkg/provider/modelmgr/modelmgr.py similarity index 76% rename from pkg/provider/requester/modelmgr.py rename to pkg/provider/modelmgr/modelmgr.py index e1a48bc2..a91c3110 100644 --- a/pkg/provider/requester/modelmgr.py +++ b/pkg/provider/modelmgr/modelmgr.py @@ -3,9 +3,8 @@ from __future__ import annotations from . import entities from ...core import app -from .apis import chatcmpl from . import token -from .tokenizers import tiktoken +from .apis import chatcmpl class ModelManager: @@ -30,9 +29,7 @@ class ModelManager: async def initialize(self): openai_chat_completion = chatcmpl.OpenAIChatCompletion(self.ap) await openai_chat_completion.initialize() - openai_token_mgr = token.TokenManager(self.ap, list(self.ap.provider_cfg.data['openai-config']['api-keys'])) - - tiktoken_tokenizer = tiktoken.Tiktoken(self.ap) + openai_token_mgr = token.TokenManager("openai", list(self.ap.provider_cfg.data['openai-config']['api-keys'])) model_list = [ entities.LLMModelInfo( @@ -40,48 +37,36 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer, - max_tokens=4096 ), entities.LLMModelInfo( name="gpt-3.5-turbo-1106", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer, - max_tokens=16385 ), entities.LLMModelInfo( name="gpt-3.5-turbo-16k", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer, - max_tokens=16385 ), entities.LLMModelInfo( name="gpt-3.5-turbo-0613", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer, - max_tokens=4096 ), entities.LLMModelInfo( name="gpt-3.5-turbo-16k-0613", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer, - max_tokens=16385 ), entities.LLMModelInfo( name="gpt-3.5-turbo-0301", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer, - max_tokens=4096 ) ] @@ -93,64 +78,48 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer, - max_tokens=128000 ), entities.LLMModelInfo( name="gpt-4-turbo-preview", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer, - max_tokens=128000 ), entities.LLMModelInfo( name="gpt-4-1106-preview", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer, - max_tokens=128000 ), entities.LLMModelInfo( name="gpt-4-vision-preview", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer, - max_tokens=128000 ), entities.LLMModelInfo( name="gpt-4", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer, - max_tokens=8192 ), entities.LLMModelInfo( name="gpt-4-0613", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer, - max_tokens=8192 ), entities.LLMModelInfo( name="gpt-4-32k", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer, - max_tokens=32768 ), entities.LLMModelInfo( name="gpt-4-32k-0613", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer, - max_tokens=32768 ) ] @@ -163,8 +132,6 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=False, - tokenizer=tiktoken_tokenizer, - max_tokens=8192 ), entities.LLMModelInfo( name="OneAPI/chatglm_pro", @@ -172,8 +139,6 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=False, - tokenizer=tiktoken_tokenizer, - max_tokens=128000 ), entities.LLMModelInfo( name="OneAPI/chatglm_std", @@ -181,8 +146,6 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=False, - tokenizer=tiktoken_tokenizer, - max_tokens=128000 ), entities.LLMModelInfo( name="OneAPI/chatglm_lite", @@ -190,8 +153,6 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=False, - tokenizer=tiktoken_tokenizer, - max_tokens=128000 ), entities.LLMModelInfo( name="OneAPI/qwen-v1", @@ -199,8 +160,6 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=False, - tokenizer=tiktoken_tokenizer, - max_tokens=6000 ), entities.LLMModelInfo( name="OneAPI/qwen-plus-v1", @@ -208,8 +167,6 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=False, - tokenizer=tiktoken_tokenizer, - max_tokens=30000 ), entities.LLMModelInfo( name="OneAPI/ERNIE-Bot", @@ -217,8 +174,6 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=False, - tokenizer=tiktoken_tokenizer, - max_tokens=2000 ), entities.LLMModelInfo( name="OneAPI/ERNIE-Bot-turbo", @@ -226,8 +181,6 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=False, - tokenizer=tiktoken_tokenizer, - max_tokens=7000 ), entities.LLMModelInfo( name="OneAPI/gemini-pro", @@ -235,8 +188,6 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=False, - tokenizer=tiktoken_tokenizer, - max_tokens=30720 ), ] diff --git a/pkg/provider/requester/token.py b/pkg/provider/modelmgr/token.py similarity index 100% rename from pkg/provider/requester/token.py rename to pkg/provider/modelmgr/token.py diff --git a/pkg/provider/requester/tokenizer.py b/pkg/provider/requester/tokenizer.py deleted file mode 100644 index cdd91470..00000000 --- a/pkg/provider/requester/tokenizer.py +++ /dev/null @@ -1,30 +0,0 @@ -from __future__ import annotations - -import abc -import typing - -from ...core import app -from .. import entities as llm_entities -from . import entities - - -class LLMTokenizer(metaclass=abc.ABCMeta): - """LLM分词器抽象类""" - - ap: app.Application - - def __init__(self, ap: app.Application): - self.ap = ap - - async def initialize(self): - """初始化分词器 - """ - pass - - @abc.abstractmethod - async def count_token( - self, - messages: list[llm_entities.Message], - model: entities.LLMModelInfo - ) -> int: - pass diff --git a/pkg/provider/requester/tokenizers/__init__.py b/pkg/provider/requester/tokenizers/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/pkg/provider/requester/tokenizers/tiktoken.py b/pkg/provider/requester/tokenizers/tiktoken.py deleted file mode 100644 index 24d2d8b6..00000000 --- a/pkg/provider/requester/tokenizers/tiktoken.py +++ /dev/null @@ -1,30 +0,0 @@ -from __future__ import annotations - -import tiktoken - -from .. import tokenizer -from ... import entities as llm_entities -from .. import entities - - -class Tiktoken(tokenizer.LLMTokenizer): - """TikToken分词器 - """ - - async def count_token( - self, - messages: list[llm_entities.Message], - model: entities.LLMModelInfo - ) -> int: - try: - encoding = tiktoken.encoding_for_model(model.name) - except KeyError: - # print("Warning: model not found. Using cl100k_base encoding.") - encoding = tiktoken.get_encoding("cl100k_base") - - num_tokens = 0 - for message in messages: - num_tokens += len(encoding.encode(message.role)) - num_tokens += len(encoding.encode(message.content if message.content is not None else '')) - num_tokens += 3 # every reply is primed with <|start|>assistant<|message|> - return num_tokens