From 0eac9135c07bc7897cddf70859c8018f4d21b01d Mon Sep 17 00:00:00 2001 From: fdc Date: Mon, 30 Jun 2025 17:58:18 +0800 Subject: [PATCH 001/257] =?UTF-8?q?feat:=20=E5=AE=9E=E7=8E=B0=E6=B5=81?= =?UTF-8?q?=E5=BC=8F=E6=B6=88=E6=81=AF=E5=A4=84=E7=90=86=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fix.MD | 47 +++++++ pkg/core/entities.py | 2 +- pkg/pipeline/process/handlers/chat.py | 40 +++++- pkg/pipeline/respback/respback.py | 22 ++++ pkg/platform/adapter.py | 21 ++++ pkg/provider/entities.py | 83 +++++++++++++ pkg/provider/modelmgr/requester.py | 5 +- pkg/provider/modelmgr/requesters/chatcmpl.py | 90 ++++++++++++-- pkg/provider/runners/localagent.py | 123 +++++++++++++++---- 9 files changed, 387 insertions(+), 46 deletions(-) create mode 100644 fix.MD diff --git a/fix.MD b/fix.MD new file mode 100644 index 00000000..51927eb9 --- /dev/null +++ b/fix.MD @@ -0,0 +1,47 @@ +## 底层模型请求器 + +- pkg/provider/modelmgr/requesters/... + +给 invoke_llm 加个 stream: bool 参数,并允许 invoke_llm 返回两种参数:原来的 llm_entities.Message(非流式)和 返回 llm_entities.MessageChunk(流式,需要新增这个实体)的 AsyncGenerator + +## Runner + +- pkg/provider/runners/... + +每个runner的run方法也允许传入stream: bool。 + +现在的run方法本身就是生成器(AsyncGenerator),因为agent是有多回合的,会生成多条Message。但现在需要支持文本消息可以分段。 + +现在run方法应该返回 AsyncGenerator[ Union[ Message, AsyncGenerator[MessageChunk] ] ]。 + +对于 local agent 的实现上,调用模型invoke_llm时,传入stream,当发现模型返回的是Message时,即按照现在的写法操作Message;当返回的是 AsyncGenerator 时,需要 yield MessageChunk 给上层,同时需要注意判断工具调用。 + +## 流水线 + +- pkg/pipeline/process/handlers/chat.py + +之前这里就已经有一个生成器写法了,用于处理 AsyncGenerator[Message],但现在需要加上一个判断,如果yield出来的是 Message 则按照现在的处理;如果yield出来的是 AsyncGenerator,那么就需要再 async for 一层; + +因为流水线是基于责任链模式设计的,这里的生成结果只需要放入 Query 对象中,供下一层处理。 + +所以需要在 Query 对象中支持存入MessageChunk,现在只支持存 Message 到 resp_messages,这里得设计一下。 + +## 回复阶段 + +最终会在 pkg/pipeline/respback/respback.py 中检出 query 中的信息并发回,这里也要改成支持 MessagChunk 的。 + +这里应该判断适配器是否支持流式,若不支持,应该等待所有 MessageChunk 生成,拼接成 Message 再转换成 MessageChain 调用 send_message(); + +若支持,则uuid生成一个message id,使用该message id调用适配器的 reply_message_chunk 方法。 + +## 机器人适配器 + +因为机器人可能会由于用户配置项不同而表现为对流式的支持性不同,比如飞书默认不支持流式,需要用户额外配置卡片。 + +所以需要新增一个方法 `is_stream_output_supported() -> bool`,这个让每个适配器来判断并返回是否支持流式; + +在发送时,得加两个方法 `send_message_chunk(target_type: str, target_id: str, message_id: , message: MessageChain)` + +message_id 确定同一条消息,由调用方生成; + +`reply_message_chunk(message_source: MessageEvent, message: MessageChain)` \ No newline at end of file diff --git a/pkg/core/entities.py b/pkg/core/entities.py index 4caf18ed..4873d9ce 100644 --- a/pkg/core/entities.py +++ b/pkg/core/entities.py @@ -87,7 +87,7 @@ class Query(pydantic.BaseModel): """使用的函数,由前置处理器阶段设置""" resp_messages: ( - typing.Optional[list[llm_entities.Message]] | typing.Optional[list[platform_message.MessageChain]] + typing.Optional[list[llm_entities.Message]] | typing.Optional[list[platform_message.MessageChain]] | typing.Optional[list[llm_entities.MessageChunk]] ) = [] """由Process阶段生成的回复消息对象列表""" diff --git a/pkg/pipeline/process/handlers/chat.py b/pkg/pipeline/process/handlers/chat.py index 35fa1611..c90d283b 100644 --- a/pkg/pipeline/process/handlers/chat.py +++ b/pkg/pipeline/process/handlers/chat.py @@ -1,5 +1,6 @@ from __future__ import annotations +from itertools import accumulate import typing import traceback @@ -59,6 +60,8 @@ class ChatMessageHandler(handler.MessageHandler): text_length = 0 + is_stream = query.adapter.is_stream_output_supported() + try: for r in runner_module.preregistered_runners: if r.name == query.pipeline_config['ai']['runner']['runner']: @@ -66,18 +69,43 @@ class ChatMessageHandler(handler.MessageHandler): break else: raise ValueError(f'未找到请求运行器: {query.pipeline_config["ai"]["runner"]["runner"]}') + if is_stream: + accumulated_messages = [] + async for result in runner.run(query): + accumulated_messages.append(result) + query.resp_messages.append(result) - async for result in runner.run(query): - query.resp_messages.append(result) + self.ap.logger.info(f'对话({query.query_id})流式响应: {self.cut_str(result.readable_str())}') - self.ap.logger.info(f'对话({query.query_id})响应: {self.cut_str(result.readable_str())}') + if result.content is not None: + text_length += len(result.content) - if result.content is not None: - text_length += len(result.content) + # current_chain = platform_message.MessageChain([]) + # for msg in accumulated_messages: + # if msg.content is not None: + # current_chain.append(platform_message.Plain(msg.content)) + # query.resp_message_chain = [current_chain] + + + + + - yield entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query) + yield entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query) + else: + + async for result in runner.run(query): + query.resp_messages.append(result) + + self.ap.logger.info(f'对话({query.query_id})响应: {self.cut_str(result.readable_str())}') + + if result.content is not None: + text_length += len(result.content) + + yield entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query) query.session.using_conversation.messages.append(query.user_message) + query.session.using_conversation.messages.extend(query.resp_messages) except Exception as e: self.ap.logger.error(f'对话({query.query_id})请求失败: {type(e).__name__} {str(e)}') diff --git a/pkg/pipeline/respback/respback.py b/pkg/pipeline/respback/respback.py index 39d3abb1..7654896b 100644 --- a/pkg/pipeline/respback/respback.py +++ b/pkg/pipeline/respback/respback.py @@ -36,6 +36,28 @@ class SendResponseBackStage(stage.PipelineStage): quote_origin = query.pipeline_config['output']['misc']['quote-origin'] + has_chunks = any(isinstance(msg, llm_entities.MessageChunk) for msg in query.resp_messages) + if has_chunks and hasattr(query.adapter,'reply_message_chunk'): + + async def message_generator(): + for msg in query.resp_messages: + if isinstance(msg, llm_entities.MessageChunk): + yield msg.content + else: + yield msg.content + await query.adapter.reply_message_chunk( + message_source=query.message_event, + message_id=query.message_event.message_id, + message_generator=message_generator(), + quote_origin=quote_origin, + ) + else: + await query.adapter.reply_message( + message_source=query.message_event, + message=query.resp_message_chain[-1], + quote_origin=quote_origin, + ) + await query.adapter.reply_message( message_source=query.message_event, message=query.resp_message_chain[-1], diff --git a/pkg/platform/adapter.py b/pkg/platform/adapter.py index f28ad3dc..c841ae98 100644 --- a/pkg/platform/adapter.py +++ b/pkg/platform/adapter.py @@ -49,11 +49,27 @@ class MessagePlatformAdapter(metaclass=abc.ABCMeta): async def reply_message( self, message_source: platform_events.MessageEvent, + message_id: int, message: platform_message.MessageChain, quote_origin: bool = False, ): """回复消息 + Args: + message_source (platform.types.MessageEvent): 消息源事件 + message_id (int): 消息ID + message (platform.types.MessageChain): 消息链 + quote_origin (bool, optional): 是否引用原消息. Defaults to False. + """ + raise NotImplementedError + + async def reply_message_chunk( + self, + message_source: platform_events.MessageEvent, + message: platform_message.MessageChain, + quote_origin: bool = False, + ): + """回复消息(流式输出) Args: message_source (platform.types.MessageEvent): 消息源事件 message (platform.types.MessageChain): 消息链 @@ -94,6 +110,11 @@ class MessagePlatformAdapter(metaclass=abc.ABCMeta): async def run_async(self): """异步运行""" raise NotImplementedError + + + async def is_stream_output_supported(self) -> bool: + """是否支持流式输出""" + return False async def kill(self) -> bool: """关闭适配器 diff --git a/pkg/provider/entities.py b/pkg/provider/entities.py index 94b812d9..a149fea3 100644 --- a/pkg/provider/entities.py +++ b/pkg/provider/entities.py @@ -125,6 +125,89 @@ class Message(pydantic.BaseModel): return platform_message.MessageChain(mc) +class MessageChunk(pydantic.BaseModel): + """消息""" + + role: str # user, system, assistant, tool, command, plugin + """消息的角色""" + + name: typing.Optional[str] = None + """名称,仅函数调用返回时设置""" + + all_content: typing.Optional[str] = None + """所有内容""" + + content: typing.Optional[list[ContentElement]] | typing.Optional[str] = None + """内容""" + + # tool_calls: typing.Optional[list[ToolCall]] = None + """工具调用""" + + tool_call_id: typing.Optional[str] = None + + tool_calls: typing.Optional[list[ToolCallChunk]] = None + + is_final: bool = False + + def readable_str(self) -> str: + if self.content is not None: + return str(self.role) + ': ' + str(self.get_content_platform_message_chain()) + elif self.tool_calls is not None: + return f'调用工具: {self.tool_calls[0].id}' + else: + return '未知消息' + + def get_content_platform_message_chain(self, prefix_text: str = '') -> platform_message.MessageChain | None: + """将内容转换为平台消息 MessageChain 对象 + + Args: + prefix_text (str): 首个文字组件的前缀文本 + """ + + if self.content is None: + return None + elif isinstance(self.content, str): + return platform_message.MessageChain([platform_message.Plain(prefix_text + self.content)]) + elif isinstance(self.content, list): + mc = [] + for ce in self.content: + if ce.type == 'text': + mc.append(platform_message.Plain(ce.text)) + elif ce.type == 'image_url': + if ce.image_url.url.startswith('http'): + mc.append(platform_message.Image(url=ce.image_url.url)) + else: # base64 + b64_str = ce.image_url.url + + if b64_str.startswith('data:'): + b64_str = b64_str.split(',')[1] + + mc.append(platform_message.Image(base64=b64_str)) + + # 找第一个文字组件 + if prefix_text: + for i, c in enumerate(mc): + if isinstance(c, platform_message.Plain): + mc[i] = platform_message.Plain(prefix_text + c.text) + break + else: + mc.insert(0, platform_message.Plain(prefix_text)) + + return platform_message.MessageChain(mc) + + +class ToolCallChunk(pydantic.BaseModel): + """工具调用""" + + id: str + """工具调用ID""" + + type: str + """工具调用类型""" + + function: FunctionCall + """函数调用""" + class Prompt(pydantic.BaseModel): """供AI使用的Prompt""" diff --git a/pkg/provider/modelmgr/requester.py b/pkg/provider/modelmgr/requester.py index 244f4c82..3e5e791f 100644 --- a/pkg/provider/modelmgr/requester.py +++ b/pkg/provider/modelmgr/requester.py @@ -60,8 +60,9 @@ class LLMAPIRequester(metaclass=abc.ABCMeta): model: RuntimeLLMModel, messages: typing.List[llm_entities.Message], funcs: typing.List[tools_entities.LLMFunction] = None, + stream: bool = False, extra_args: dict[str, typing.Any] = {}, - ) -> llm_entities.Message: + ) -> llm_entities.Message | typing.AsyncGenerator[llm_entities.MessageChunk, None]: """调用API Args: @@ -71,6 +72,6 @@ class LLMAPIRequester(metaclass=abc.ABCMeta): extra_args (dict[str, typing.Any], optional): 额外的参数. Defaults to {}. Returns: - llm_entities.Message: 返回消息对象 + llm_entities.Message | typing.AsyncGenerator[llm_entities.MessageChunk, None]: 返回消息对象 """ pass diff --git a/pkg/provider/modelmgr/requesters/chatcmpl.py b/pkg/provider/modelmgr/requesters/chatcmpl.py index 513086e5..22931611 100644 --- a/pkg/provider/modelmgr/requesters/chatcmpl.py +++ b/pkg/provider/modelmgr/requesters/chatcmpl.py @@ -57,13 +57,35 @@ class OpenAIChatCompletions(requester.LLMAPIRequester): message = llm_entities.Message(**chatcmpl_message) return message + + async def _make_msg_chunk( + self, + chat_completion: chat_completion.ChatCompletion, + ) -> llm_entities.MessageChunk: + choice = chat_completion.choices[0] + delta = choice.delta.model_dump() + # 确保 role 字段存在且不为 None + if 'role' not in delta or delta['role'] is None: + delta['role'] = 'assistant' + + reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None + + # deepseek的reasoner模型 + if reasoning_content is not None: + delta['content'] = '\n' + reasoning_content + '\n\n' + delta['content'] + + message = llm_entities.MessageChunk(**delta) + + return message + async def _closure( self, query: core_entities.Query, req_messages: list[dict], use_model: requester.RuntimeLLMModel, use_funcs: list[tools_entities.LLMFunction] = None, + stream: bool = False, extra_args: dict[str, typing.Any] = {}, ) -> llm_entities.Message: self.client.api_key = use_model.token_mgr.get_token() @@ -91,13 +113,42 @@ class OpenAIChatCompletions(requester.LLMAPIRequester): args['messages'] = messages - # 发送请求 - resp = await self._req(args, extra_body=extra_args) + if stream: + current_content = '' + async for chunk in await self._req(args, extra_body=extra_args): - # 处理请求结果 - message = await self._make_msg(resp) + # 处理流式消息 + delta_message = await self._make_msg_chunk( + chat_completion=chunk, + ) + if delta_message.content: + current_content += delta_message.content + delta_message.all_content = current_content + + # 检查是否为最后一个块 + if chunk.choices[0].finish_reason is not None: + delta_message.is_final = True - return message + yield delta_message + return + + else: + + # 非流式请求 + resp = await self._req(args, extra_body=extra_args) + # 处理请求结果 + # 发送请求 + resp = await self._req(args, extra_body=extra_args) + + # 处理请求结果 + message = await self._make_msg(resp) + + return message + + + + + async def invoke_llm( self, @@ -105,8 +156,9 @@ class OpenAIChatCompletions(requester.LLMAPIRequester): model: requester.RuntimeLLMModel, messages: typing.List[llm_entities.Message], funcs: typing.List[tools_entities.LLMFunction] = None, + stream: bool = False, extra_args: dict[str, typing.Any] = {}, - ) -> llm_entities.Message: + ) -> llm_entities.Message | typing.AsyncGenerator[llm_entities.MessageChunk, None]: req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行 for m in messages: msg_dict = m.dict(exclude_none=True) @@ -119,13 +171,25 @@ class OpenAIChatCompletions(requester.LLMAPIRequester): req_messages.append(msg_dict) try: - return await self._closure( - query=query, - req_messages=req_messages, - use_model=model, - use_funcs=funcs, - extra_args=extra_args, - ) + if stream: + async for item in self._closure( + query=query, + req_messages=req_messages, + use_model=model, + use_funcs=funcs, + stream=stream, + extra_args=extra_args, + ): + yield item + return + else: + return await self._closure( + query=query, + req_messages=req_messages, + use_model=model, + use_funcs=funcs, + extra_args=extra_args, + ) except asyncio.TimeoutError: raise errors.RequesterError('请求超时') except openai.BadRequestError as e: diff --git a/pkg/provider/runners/localagent.py b/pkg/provider/runners/localagent.py index 7d5e04c5..02b2db16 100644 --- a/pkg/provider/runners/localagent.py +++ b/pkg/provider/runners/localagent.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +from ssl import ALERT_DESCRIPTION_BAD_CERTIFICATE_HASH_VALUE import typing from .. import runner @@ -12,26 +13,68 @@ from .. import entities as llm_entities class LocalAgentRunner(runner.RequestRunner): """本地Agent请求运行器""" - async def run(self, query: core_entities.Query) -> typing.AsyncGenerator[llm_entities.Message, None]: + class ToolCallTracker: + """工具调用追踪器""" + def __init__(self): + self.active_calls: dict[str,dict] = {} + self.completed_calls: list[llm_entities.ToolCall] = [] + + async def run(self, query: core_entities.Query) -> typing.AsyncGenerator[llm_entities.Message | llm_entities.MessageChunk, None]: """运行请求""" pending_tool_calls = [] req_messages = query.prompt.messages.copy() + query.messages.copy() + [query.user_message] - # 首次请求 - msg = await query.use_llm_model.requester.invoke_llm( - query, - query.use_llm_model, - req_messages, - query.use_funcs, - extra_args=query.use_llm_model.model_entity.extra_args, - ) + is_stream = query.adapter.is_stream_output_supported() + # while True: + # pass + if not is_stream: + # 非流式输出,直接请求 + msg = await query.use_llm_model.requester.invoke_llm( + query, + query.use_llm_model, + req_messages, + query.use_funcs, + extra_args=query.use_llm_model.model_entity.extra_args, + ) + yield msg + final_msg = msg + else: + # 流式输出,需要处理工具调用 + tool_calls_map: dict[str, llm_entities.ToolCall] = {} + async for msg in await query.use_llm_model.requester.invoke_llm( + query, + query.use_llm_model, + req_messages, + query.use_funcs, + stream=is_stream, + extra_args=query.use_llm_model.model_entity.extra_args, + ): + yield msg + if msg.tool_calls: + for tool_call in msg.tool_calls: + if tool_call.id not in tool_calls_map: + tool_calls_map[tool_call.id] = llm_entities.ToolCall( + id=tool_call.id, + type=tool_call.type, + function=llm_entities.FunctionCall( + name=tool_call.function.name if tool_call.function else '', + arguments='' + ), + ) + if tool_call.function and tool_call.function.arguments: + # 流式处理中,工具调用参数可能分多个chunk返回,需要追加而不是覆盖 + tool_calls_map[tool_call.id].function.arguments += tool_call.function.arguments + final_msg = llm_entities.Message( + role=msg.role, + content=msg.all_content, + tool_calls=list(tool_calls_map.values()), + ) - yield msg + + pending_tool_calls = final_msg.tool_calls - pending_tool_calls = msg.tool_calls - - req_messages.append(msg) + req_messages.append(final_msg) # 持续请求,只要还有待处理的工具调用就继续处理调用 while pending_tool_calls: @@ -60,17 +103,49 @@ class LocalAgentRunner(runner.RequestRunner): req_messages.append(err_msg) - # 处理完所有调用,再次请求 - msg = await query.use_llm_model.requester.invoke_llm( - query, - query.use_llm_model, - req_messages, - query.use_funcs, - extra_args=query.use_llm_model.model_entity.extra_args, - ) + if is_stream: + tool_calls_map = {} + async for msg in await query.use_llm_model.requester.invoke_llm( + query, + query.use_llm_model, + req_messages, + query.use_funcs, + stream=is_stream, + extra_args=query.use_llm_model.model_entity.extra_args, + ): + yield msg + if msg.tool_calls: + for tool_call in msg.tool_calls: + if tool_call.id not in tool_calls_map: + tool_calls_map[tool_call.id] = llm_entities.ToolCall( + id=tool_call.id, + type=tool_call.type, + function=llm_entities.FunctionCall( + name=tool_call.function.name if tool_call.function else '', + arguments='' + ), + ) + if tool_call.function and tool_call.function.arguments: + # 流式处理中,工具调用参数可能分多个chunk返回,需要追加而不是覆盖 + tool_calls_map[tool_call.id].function.arguments += tool_call.function.arguments + final_msg = llm_entities.Message( + role=msg.role, + content=all_content, + tool_calls=list(tool_calls_map.values()), + ) + else: + # 处理完所有调用,再次请求 + msg = await query.use_llm_model.requester.invoke_llm( + query, + query.use_llm_model, + req_messages, + query.use_funcs, + extra_args=query.use_llm_model.model_entity.extra_args, + ) - yield msg + yield msg + final_msg = msg - pending_tool_calls = msg.tool_calls + pending_tool_calls = final_msg.tool_calls - req_messages.append(msg) + req_messages.append(final_msg) From 48c9d66ab8151f7bf5ccdab6b4a6981e1b7b6600 Mon Sep 17 00:00:00 2001 From: fdc Date: Tue, 1 Jul 2025 18:03:05 +0800 Subject: [PATCH 002/257] =?UTF-8?q?chat=E4=B8=AD=E7=9A=84=E6=B5=81?= =?UTF-8?q?=E5=BC=8F=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pkg/pipeline/process/handlers/chat.py | 23 +++++++++++------------ pkg/pipeline/respback/respback.py | 23 +++++++++-------------- 2 files changed, 20 insertions(+), 26 deletions(-) diff --git a/pkg/pipeline/process/handlers/chat.py b/pkg/pipeline/process/handlers/chat.py index c90d283b..9b3e0cd5 100644 --- a/pkg/pipeline/process/handlers/chat.py +++ b/pkg/pipeline/process/handlers/chat.py @@ -70,15 +70,15 @@ class ChatMessageHandler(handler.MessageHandler): else: raise ValueError(f'未找到请求运行器: {query.pipeline_config["ai"]["runner"]["runner"]}') if is_stream: - accumulated_messages = [] - async for result in runner.run(query): - accumulated_messages.append(result) - query.resp_messages.append(result) + async for results in runner.run(query): + async for result in results: - self.ap.logger.info(f'对话({query.query_id})流式响应: {self.cut_str(result.readable_str())}') + query.resp_messages.append(result) - if result.content is not None: - text_length += len(result.content) + self.ap.logger.info(f'对话({query.query_id})流式响应: {self.cut_str(result.readable_str())}') + + if result.content is not None: + text_length += len(result.content) # current_chain = platform_message.MessageChain([]) # for msg in accumulated_messages: @@ -86,12 +86,11 @@ class ChatMessageHandler(handler.MessageHandler): # current_chain.append(platform_message.Plain(msg.content)) # query.resp_message_chain = [current_chain] - - - - + yield entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query) + # query.resp_messages.append(results) + # self.ap.logger.info(f'对话({query.query_id})响应') + # yield entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query) - yield entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query) else: async for result in runner.run(query): diff --git a/pkg/pipeline/respback/respback.py b/pkg/pipeline/respback/respback.py index 7654896b..4ac4e1e3 100644 --- a/pkg/pipeline/respback/respback.py +++ b/pkg/pipeline/respback/respback.py @@ -7,6 +7,8 @@ import asyncio from ...platform.types import events as platform_events from ...platform.types import message as platform_message +from ...provider import entities as llm_entities + from .. import stage, entities from ...core import entities as core_entities @@ -38,17 +40,10 @@ class SendResponseBackStage(stage.PipelineStage): has_chunks = any(isinstance(msg, llm_entities.MessageChunk) for msg in query.resp_messages) if has_chunks and hasattr(query.adapter,'reply_message_chunk'): - - async def message_generator(): - for msg in query.resp_messages: - if isinstance(msg, llm_entities.MessageChunk): - yield msg.content - else: - yield msg.content await query.adapter.reply_message_chunk( message_source=query.message_event, - message_id=query.message_event.message_id, - message_generator=message_generator(), + message_id=query.query_id, + message_generator=query.resp_message_chain[-1], quote_origin=quote_origin, ) else: @@ -58,10 +53,10 @@ class SendResponseBackStage(stage.PipelineStage): quote_origin=quote_origin, ) - await query.adapter.reply_message( - message_source=query.message_event, - message=query.resp_message_chain[-1], - quote_origin=quote_origin, - ) + # await query.adapter.reply_message( + # message_source=query.message_event, + # message=query.resp_message_chain[-1], + # quote_origin=quote_origin, + # ) return entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query) From 14411a8af60ac9e1b45cb37a5aa5beeb767f6966 Mon Sep 17 00:00:00 2001 From: Matthew_Astral <50231148+ydzat@users.noreply.github.com> Date: Wed, 2 Jul 2025 03:48:49 +0200 Subject: [PATCH 003/257] Add Discord platform adapter implementation (#1560) - Implement DiscordMessageConverter for message conversion - Support image handling from base64, URL, and file paths - Add DiscordEventConverter for event conversion - Implement DiscordAdapter for Discord bot integration - Support DM and TextChannel message handling --- pkg/platform/sources/discord.py | 99 ++++++++++++++++++++++++++++++--- 1 file changed, 90 insertions(+), 9 deletions(-) diff --git a/pkg/platform/sources/discord.py b/pkg/platform/sources/discord.py index f159c628..4f5cac28 100644 --- a/pkg/platform/sources/discord.py +++ b/pkg/platform/sources/discord.py @@ -8,6 +8,7 @@ import base64 import uuid import os import datetime +import io import aiohttp @@ -35,28 +36,88 @@ class DiscordMessageConverter(adapter.MessageConverter): for ele in message_chain: if isinstance(ele, platform_message.Image): image_bytes = None + filename = f'{uuid.uuid4()}.png' # 默认文件名 if ele.base64: - image_bytes = base64.b64decode(ele.base64) + # 处理base64编码的图片 + if ele.base64.startswith('data:'): + # 从data URL中提取文件类型 + data_header = ele.base64.split(',')[0] + if 'jpeg' in data_header or 'jpg' in data_header: + filename = f'{uuid.uuid4()}.jpg' + elif 'gif' in data_header: + filename = f'{uuid.uuid4()}.gif' + elif 'webp' in data_header: + filename = f'{uuid.uuid4()}.webp' + # 去掉data:image/xxx;base64,前缀 + base64_data = ele.base64.split(',')[1] + else: + base64_data = ele.base64 + image_bytes = base64.b64decode(base64_data) elif ele.url: + # 从URL下载图片 async with aiohttp.ClientSession() as session: async with session.get(ele.url) as response: image_bytes = await response.read() + # 从URL或Content-Type推断文件类型 + content_type = response.headers.get('Content-Type', '') + if 'jpeg' in content_type or 'jpg' in content_type: + filename = f'{uuid.uuid4()}.jpg' + elif 'gif' in content_type: + filename = f'{uuid.uuid4()}.gif' + elif 'webp' in content_type: + filename = f'{uuid.uuid4()}.webp' + elif ele.url.lower().endswith(('.jpg', '.jpeg')): + filename = f'{uuid.uuid4()}.jpg' + elif ele.url.lower().endswith('.gif'): + filename = f'{uuid.uuid4()}.gif' + elif ele.url.lower().endswith('.webp'): + filename = f'{uuid.uuid4()}.webp' elif ele.path: - with open(ele.path, 'rb') as f: - image_bytes = f.read() + # 从文件路径读取图片 + # 确保路径没有空字节 + clean_path = ele.path.replace('\x00', '') + clean_path = os.path.abspath(clean_path) + + if not os.path.exists(clean_path): + continue # 跳过不存在的文件 + + try: + with open(clean_path, 'rb') as f: + image_bytes = f.read() + # 从文件路径获取文件名,保持原始扩展名 + original_filename = os.path.basename(clean_path) + if original_filename and '.' in original_filename: + # 保持原始文件名的扩展名 + ext = original_filename.split('.')[-1].lower() + filename = f'{uuid.uuid4()}.{ext}' + else: + # 如果没有扩展名,尝试从文件内容检测 + if image_bytes.startswith(b'\xff\xd8\xff'): + filename = f'{uuid.uuid4()}.jpg' + elif image_bytes.startswith(b'GIF'): + filename = f'{uuid.uuid4()}.gif' + elif image_bytes.startswith(b'RIFF') and b'WEBP' in image_bytes[:20]: + filename = f'{uuid.uuid4()}.webp' + # 默认保持PNG + except Exception as e: + print(f"Error reading image file {clean_path}: {e}") + continue # 跳过读取失败的文件 - image_files.append(discord.File(fp=image_bytes, filename=f'{uuid.uuid4()}.png')) + if image_bytes: + # 使用BytesIO创建文件对象,避免路径问题 + import io + image_files.append(discord.File(fp=io.BytesIO(image_bytes), filename=filename)) elif isinstance(ele, platform_message.Plain): text_string += ele.text elif isinstance(ele, platform_message.Forward): for node in ele.node_list: ( - text_string, - image_files, + node_text, + node_images, ) = await DiscordMessageConverter.yiri2target(node.message_chain) - text_string += text_string - image_files.extend(image_files) + text_string += node_text + image_files.extend(node_images) return text_string, image_files @@ -199,7 +260,27 @@ class DiscordAdapter(adapter.MessagePlatformAdapter): self.bot = MyClient(intents=intents, **args) async def send_message(self, target_type: str, target_id: str, message: platform_message.MessageChain): - pass + msg_to_send, image_files = await self.message_converter.yiri2target(message) + + try: + # 获取频道对象 + channel = self.bot.get_channel(int(target_id)) + if channel is None: + # 如果本地缓存中没有,尝试从API获取 + channel = await self.bot.fetch_channel(int(target_id)) + + args = { + 'content': msg_to_send, + } + + if len(image_files) > 0: + args['files'] = image_files + + await channel.send(**args) + + except Exception as e: + await self.logger.error(f"Discord send_message failed: {e}") + raise e async def reply_message( self, From 8670ae82a35e47e95d74b5a5d1410bd80ebd936b Mon Sep 17 00:00:00 2001 From: fdc Date: Wed, 2 Jul 2025 10:49:50 +0800 Subject: [PATCH 004/257] =?UTF-8?q?fix:=E4=BF=AE=E6=94=B9=E6=89=8B?= =?UTF-8?q?=E8=AF=AFmessage=5Fid=E5=86=99=E8=BF=9Breply=5Fmessage=E4=B8=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pkg/platform/adapter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/platform/adapter.py b/pkg/platform/adapter.py index c841ae98..18403b75 100644 --- a/pkg/platform/adapter.py +++ b/pkg/platform/adapter.py @@ -49,7 +49,6 @@ class MessagePlatformAdapter(metaclass=abc.ABCMeta): async def reply_message( self, message_source: platform_events.MessageEvent, - message_id: int, message: platform_message.MessageChain, quote_origin: bool = False, ): @@ -57,7 +56,6 @@ class MessagePlatformAdapter(metaclass=abc.ABCMeta): Args: message_source (platform.types.MessageEvent): 消息源事件 - message_id (int): 消息ID message (platform.types.MessageChain): 消息链 quote_origin (bool, optional): 是否引用原消息. Defaults to False. """ @@ -66,12 +64,14 @@ class MessagePlatformAdapter(metaclass=abc.ABCMeta): async def reply_message_chunk( self, message_source: platform_events.MessageEvent, + message_id: int, message: platform_message.MessageChain, quote_origin: bool = False, ): """回复消息(流式输出) Args: message_source (platform.types.MessageEvent): 消息源事件 + message_id (int): 消息ID message (platform.types.MessageChain): 消息链 quote_origin (bool, optional): 是否引用原消息. Defaults to False. """ From 3c6e858c358786e7df666a9572263aa44afbcc1a Mon Sep 17 00:00:00 2001 From: zejiewang <511217265@qq.com> Date: Sun, 18 May 2025 12:03:01 +0800 Subject: [PATCH 005/257] feat:support dify message streaming output (#1437) * fix:lark adapter listeners init problem * feat:support dify streaming mode * feat:remove some log * fix(bot form): field desc missing * fix: not compatible with chatflow --------- Co-authored-by: wangzejie Co-authored-by: Junyan Qin --- pkg/platform/botmgr.py | 4 +- pkg/platform/sources/lark.py | 108 ++++++++++++++++++++++++++++ pkg/platform/sources/lark.yaml | 17 +++++ pkg/provider/runners/difysvapi.py | 41 +++++++++-- templates/metadata/pipeline/ai.yaml | 15 ++++ 5 files changed, 180 insertions(+), 5 deletions(-) diff --git a/pkg/platform/botmgr.py b/pkg/platform/botmgr.py index 5855525f..1da5eec8 100644 --- a/pkg/platform/botmgr.py +++ b/pkg/platform/botmgr.py @@ -120,8 +120,10 @@ class RuntimeBot: if isinstance(e, asyncio.CancelledError): self.task_context.set_current_action('Exited.') return + + traceback_str = traceback.format_exc() self.task_context.set_current_action('Exited with error.') - await self.logger.error(f'平台适配器运行出错:\n{e}\n{traceback.format_exc()}') + await self.logger.error(f'平台适配器运行出错:\n{e}\n{traceback_str}') self.task_wrapper = self.ap.task_mgr.create_task( exception_wrapper(), diff --git a/pkg/platform/sources/lark.py b/pkg/platform/sources/lark.py index d1116362..49ff53be 100644 --- a/pkg/platform/sources/lark.py +++ b/pkg/platform/sources/lark.py @@ -9,6 +9,7 @@ import re import base64 import uuid import json +import time import datetime import hashlib from Crypto.Cipher import AES @@ -320,6 +321,10 @@ class LarkEventConverter(adapter.EventConverter): ) +CARD_ID_CACHE_SIZE = 500 +CARD_ID_CACHE_MAX_LIFETIME = 20 * 60 # 20分钟 + + class LarkAdapter(adapter.MessagePlatformAdapter): bot: lark_oapi.ws.Client api_client: lark_oapi.Client @@ -338,6 +343,8 @@ class LarkAdapter(adapter.MessagePlatformAdapter): config: dict quart_app: quart.Quart ap: app.Application + + message_id_to_card_id: typing.Dict[str, typing.Tuple[str, int]] def __init__(self, config: dict, ap: app.Application, logger: EventLogger): self.config = config @@ -345,6 +352,7 @@ class LarkAdapter(adapter.MessagePlatformAdapter): self.logger = logger self.quart_app = quart.Quart(__name__) self.listeners = {} + self.message_id_to_card_id = {} @self.quart_app.route('/lark/callback', methods=['POST']) async def lark_callback(): @@ -390,6 +398,19 @@ class LarkAdapter(adapter.MessagePlatformAdapter): return {'code': 500, 'message': 'error'} async def on_message(event: lark_oapi.im.v1.P2ImMessageReceiveV1): + if self.config['enable-card-reply'] and event.event.message.message_id not in self.message_id_to_card_id: + self.ap.logger.debug('卡片回复模式开启') + # 开启卡片回复模式. 这里可以实现飞书一发消息,马上创建卡片进行回复"思考中..." + reply_message_id = await self.create_message_card(event.event.message.message_id) + self.message_id_to_card_id[event.event.message.message_id] = (reply_message_id, time.time()) + + if len(self.message_id_to_card_id) > CARD_ID_CACHE_SIZE: + self.message_id_to_card_id = { + k: v + for k, v in self.message_id_to_card_id.items() + if v[1] > time.time() - CARD_ID_CACHE_MAX_LIFETIME + } + lb_event = await self.event_converter.target2yiri(event, self.api_client) await self.listeners[type(lb_event)](lb_event, self) @@ -409,11 +430,93 @@ class LarkAdapter(adapter.MessagePlatformAdapter): async def send_message(self, target_type: str, target_id: str, message: platform_message.MessageChain): pass + async def create_message_card(self, message_id: str) -> str: + """ + 创建卡片消息。 + 使用卡片消息是因为普通消息更新次数有限制,而大模型流式返回结果可能很多而超过限制,而飞书卡片没有这个限制 + """ + + # TODO 目前只支持卡片模板方式,且卡片变量一定是content,未来这块要做成可配置 + # 发消息马上就会回复显示初始化的content信息,即思考中 + content = { + 'type': 'template', + 'data': {'template_id': self.config['card_template_id'], 'template_variable': {'content': 'Thinking...'}}, + } + request: ReplyMessageRequest = ( + ReplyMessageRequest.builder() + .message_id(message_id) + .request_body( + ReplyMessageRequestBody.builder().content(json.dumps(content)).msg_type('interactive').build() + ) + .build() + ) + + # 发起请求 + response: ReplyMessageResponse = await self.api_client.im.v1.message.areply(request) + + # 处理失败返回 + if not response.success(): + raise Exception( + f'client.im.v1.message.reply failed, code: {response.code}, msg: {response.msg}, log_id: {response.get_log_id()}, resp: \n{json.dumps(json.loads(response.raw.content), indent=4, ensure_ascii=False)}' + ) + return response.data.message_id + async def reply_message( self, message_source: platform_events.MessageEvent, message: platform_message.MessageChain, quote_origin: bool = False, + ): + if self.config['enable-card-reply']: + await self.reply_card_message(message_source, message, quote_origin) + else: + await self.reply_normal_message(message_source, message, quote_origin) + + async def reply_card_message( + self, + message_source: platform_events.MessageEvent, + message: platform_message.MessageChain, + quote_origin: bool = False, + ): + """ + 回复消息变成更新卡片消息 + """ + lark_message = await self.message_converter.yiri2target(message, self.api_client) + + text_message = '' + for ele in lark_message[0]: + if ele['tag'] == 'text': + text_message += ele['text'] + elif ele['tag'] == 'md': + text_message += ele['text'] + + content = { + 'type': 'template', + 'data': {'template_id': self.config['card_template_id'], 'template_variable': {'content': text_message}}, + } + + request: PatchMessageRequest = ( + PatchMessageRequest.builder() + .message_id(self.message_id_to_card_id[message_source.message_chain.message_id][0]) + .request_body(PatchMessageRequestBody.builder().content(json.dumps(content)).build()) + .build() + ) + + # 发起请求 + response: PatchMessageResponse = self.api_client.im.v1.message.patch(request) + + # 处理失败返回 + if not response.success(): + raise Exception( + f'client.im.v1.message.patch failed, code: {response.code}, msg: {response.msg}, log_id: {response.get_log_id()}, resp: \n{json.dumps(json.loads(response.raw.content), indent=4, ensure_ascii=False)}' + ) + return + + async def reply_normal_message( + self, + message_source: platform_events.MessageEvent, + message: platform_message.MessageChain, + quote_origin: bool = False, ): # 不再需要了,因为message_id已经被包含到message_chain中 # lark_event = await self.event_converter.yiri2target(message_source) @@ -492,4 +595,9 @@ class LarkAdapter(adapter.MessagePlatformAdapter): ) async def kill(self) -> bool: + # 需要断开连接,不然旧的连接会继续运行,导致飞书消息来时会随机选择一个连接 + # 断开时lark.ws.Client的_receive_message_loop会打印error日志: receive message loop exit。然后进行重连, + # 所以要设置_auto_reconnect=False,让其不重连。 + self.bot._auto_reconnect = False + await self.bot._disconnect() return False diff --git a/pkg/platform/sources/lark.yaml b/pkg/platform/sources/lark.yaml index f51bab76..bafaba81 100644 --- a/pkg/platform/sources/lark.yaml +++ b/pkg/platform/sources/lark.yaml @@ -65,6 +65,23 @@ spec: type: string required: true default: "" + - name: enable-card-reply + label: + en_US: Enable Card Reply Mode + zh_Hans: 启用飞书卡片回复模式 + description: + en_US: If enabled, the bot will use the card of lark reply mode + zh_Hans: 如果启用,将使用飞书卡片方式来回复内容 + type: boolean + required: true + default: false + - name: card_template_id + label: + en_US: card template id + zh_Hans: 卡片模板ID + type: string + required: true + default: "填写你的卡片template_id" execution: python: path: ./lark.py diff --git a/pkg/provider/runners/difysvapi.py b/pkg/provider/runners/difysvapi.py index b2542491..98b50f86 100644 --- a/pkg/provider/runners/difysvapi.py +++ b/pkg/provider/runners/difysvapi.py @@ -108,7 +108,13 @@ class DifyServiceAPIRunner(runner.RequestRunner): mode = 'basic' # 标记是基础编排还是工作流编排 - basic_mode_pending_chunk = '' + stream_output_pending_chunk = '' + + batch_pending_max_size = self.pipeline_config['ai']['dify-service-api'].get( + 'output-batch-size', 0 + ) # 积累一定量的消息更新消息一次 + + batch_pending_index = 0 inputs = {} @@ -126,6 +132,13 @@ class DifyServiceAPIRunner(runner.RequestRunner): ): self.ap.logger.debug('dify-chat-chunk: ' + str(chunk)) + # 查询异常情况 + if chunk['event'] == 'error': + yield llm_entities.Message( + role='assistant', + content=f"查询异常: [{chunk['code']}]. {chunk['message']}.\n请重试,如果还报错,请用 **!reset** 命令重置对话再尝试。", + ) + if chunk['event'] == 'workflow_started': mode = 'workflow' @@ -136,15 +149,35 @@ class DifyServiceAPIRunner(runner.RequestRunner): role='assistant', content=self._try_convert_thinking(chunk['data']['outputs']['answer']), ) + elif chunk['event'] == 'message': + stream_output_pending_chunk += chunk['answer'] + if self.pipeline_config['ai']['dify-service-api'].get('enable-streaming', False): + # 消息数超过量就输出,从而达到streaming的效果 + batch_pending_index += 1 + if batch_pending_index >= batch_pending_max_size: + yield llm_entities.Message( + role='assistant', + content=self._try_convert_thinking(stream_output_pending_chunk), + ) + batch_pending_index = 0 elif mode == 'basic': if chunk['event'] == 'message': - basic_mode_pending_chunk += chunk['answer'] + stream_output_pending_chunk += chunk['answer'] + if self.pipeline_config['ai']['dify-service-api'].get('enable-streaming', False): + # 消息数超过量就输出,从而达到streaming的效果 + batch_pending_index += 1 + if batch_pending_index >= batch_pending_max_size: + yield llm_entities.Message( + role='assistant', + content=self._try_convert_thinking(stream_output_pending_chunk), + ) + batch_pending_index = 0 elif chunk['event'] == 'message_end': yield llm_entities.Message( role='assistant', - content=self._try_convert_thinking(basic_mode_pending_chunk), + content=self._try_convert_thinking(stream_output_pending_chunk), ) - basic_mode_pending_chunk = '' + stream_output_pending_chunk = '' if chunk is None: raise errors.DifyAPIError('Dify API 没有返回任何响应,请检查网络连接和API配置') diff --git a/templates/metadata/pipeline/ai.yaml b/templates/metadata/pipeline/ai.yaml index 90732dc8..fb2672d4 100644 --- a/templates/metadata/pipeline/ai.yaml +++ b/templates/metadata/pipeline/ai.yaml @@ -128,6 +128,21 @@ stages: label: en_US: Remove zh_Hans: 移除 + - name: enable-streaming + label: + en_US: enable streaming mode + zh_Hans: 开启流式输出 + type: boolean + required: true + default: false + - name: output-batch-size + label: + en_US: output batch size + zh_Hans: 输出批次大小(积累多少条消息后一起输出) + type: integer + required: true + default: 10 + - name: dashscope-app-api label: en_US: Aliyun Dashscope App API From abd02f04af798500205d2a12eb76a157c9550fc6 Mon Sep 17 00:00:00 2001 From: "Junyan Qin (Chin)" Date: Thu, 3 Jul 2025 15:04:02 +0800 Subject: [PATCH 006/257] Feat/compshare requester (#1561) * feat: add compshare requester * doc: add compshare to README --- README.md | 1 + README_EN.md | 1 + README_JP.md | 1 + .../modelmgr/requesters/compshare.png | Bin 0 -> 60829 bytes .../modelmgr/requesters/compsharechatcmpl.py | 17 +++++++++++ .../requesters/compsharechatcmpl.yaml | 28 ++++++++++++++++++ 6 files changed, 48 insertions(+) create mode 100644 pkg/provider/modelmgr/requesters/compshare.png create mode 100644 pkg/provider/modelmgr/requesters/compsharechatcmpl.py create mode 100644 pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml diff --git a/README.md b/README.md index ba6bbf90..1ca4553a 100644 --- a/README.md +++ b/README.md @@ -119,6 +119,7 @@ docker compose up -d | [Anthropic](https://www.anthropic.com/) | ✅ | | | [xAI](https://x.ai/) | ✅ | | | [智谱AI](https://open.bigmodel.cn/) | ✅ | | +| [优云智算](https://www.compshare.cn/) | ✅ | 大模型和 GPU 资源平台 | | [PPIO](https://ppinfra.com/user/register?invited_by=QJKFYD&utm_source=github_langbot) | ✅ | 大模型和 GPU 资源平台 | | [302 AI](https://share.302.ai/SuTG99) | ✅ | 大模型聚合平台 | | [Google Gemini](https://aistudio.google.com/prompts/new_chat) | ✅ | | diff --git a/README_EN.md b/README_EN.md index 0542f16d..791ea1a3 100644 --- a/README_EN.md +++ b/README_EN.md @@ -116,6 +116,7 @@ Directly use the released version to run, see the [Manual Deployment](https://do | [Anthropic](https://www.anthropic.com/) | ✅ | | | [xAI](https://x.ai/) | ✅ | | | [Zhipu AI](https://open.bigmodel.cn/) | ✅ | | +| [CompShare](https://www.compshare.cn/) | ✅ | LLM and GPU resource platform | | [Dify](https://dify.ai) | ✅ | LLMOps platform | | [PPIO](https://ppinfra.com/user/register?invited_by=QJKFYD&utm_source=github_langbot) | ✅ | LLM and GPU resource platform | | [302 AI](https://share.302.ai/SuTG99) | ✅ | LLM gateway(MaaS) | diff --git a/README_JP.md b/README_JP.md index 7a3a16dd..38a47153 100644 --- a/README_JP.md +++ b/README_JP.md @@ -115,6 +115,7 @@ LangBotはBTPanelにリストされています。BTPanelをインストール | [Anthropic](https://www.anthropic.com/) | ✅ | | | [xAI](https://x.ai/) | ✅ | | | [Zhipu AI](https://open.bigmodel.cn/) | ✅ | | +| [CompShare](https://www.compshare.cn/) | ✅ | 大模型とGPUリソースプラットフォーム | | [PPIO](https://ppinfra.com/user/register?invited_by=QJKFYD&utm_source=github_langbot) | ✅ | 大模型とGPUリソースプラットフォーム | | [302 AI](https://share.302.ai/SuTG99) | ✅ | LLMゲートウェイ(MaaS) | | [Google Gemini](https://aistudio.google.com/prompts/new_chat) | ✅ | | diff --git a/pkg/provider/modelmgr/requesters/compshare.png b/pkg/provider/modelmgr/requesters/compshare.png new file mode 100644 index 0000000000000000000000000000000000000000..3ef1709cdfedc40e5f5b36591a38562613c27344 GIT binary patch literal 60829 zcmZU)1ytL?vnU+go#Ngi#i6){;zcXC7l#lm5ZtAOVx_nj3dP->pvB$Yg1bX`>Hpq$ z-#y>A=S+5gyF0TsyE`*G`Kqa|h=)yu4FCY}l$GQ@002m02oS+UN8}7fNb3Lq*hRLo zvYN`WvJ9Foj#joHO8`LWYeEvnClWpKfYqTqa{afcc_AyPd#JI*Z*f|MsTr{d-+g=i zWiFxTg&__*Q#L1@{Y#KNl0~U*MFktX`!YjhL=!g|NcS%iz8UoS;`#aRL=<`qJzj#Y z`a13mH=+Qv!dc_Cx~KqKU$-abO#Q7~-m#NNV>$uJ#{yUw&Tjd!#KpvDviZ%Pxlh2z zHdDs+)WcUNH_yd3*dhE<08N2Ed&rd11lzFRZQCk&E`V6CDaR%tH{~|>kG&5yz#j>@ z>7pZ!ilV;X_(Cd^Z*}qi+ye;NBr*j^01+?tt#>Zoj7$6={6X7|97nN@{KNPBd*lj(g^Y$^y0^=N0Dh=(R&y5=qT+YGa()UuOJx#Qxw)UdrmGuxDF1^enEp$(CO(!X^7UkMyDBsbV`F8 z2sK3wVvvvvwTb1nKuwXYizVp}^2r9W(L{uP$QEEDA(q9q;j6)B5B-z=-LW;A>u5p#ZM6Jzg9un3oept^kGO}l{ueoGDtMCF?tV%Yu5I;P}jGFYldX7hsFF&B~C@poXa8j zkmyhp#N!Tlzz>J1)x(gDvv29j@>>LCno*=1C9mDyMinbR2zUrI23BNY6yBd771^gYl}$xvfS z9>i+Qh)<}fl=@2LyYTDGA8PtCMGS&*BXP&^E3pF>BvlxqVVq1ntU9p-u{^O4{q(Q( zNvW~J`6GX3m*gJir2kyXY03FeNcKtEj$KRLNYNoXGe;`VPcyf0zSwyh)oyUAXi9&o ztg!VHYf)Np_4Mv<@}i_-`VZ|t|Go|WZI;tHzGllg{d+>B$XUf)H$H#wH|4LCf<|rE z@*Vl%0(}oIoKhfHpmyAhdg&YeCxH;Fcbj9kHCi=BHLf*CV$opb-HGbGUzbmp;QQFR z#JWiLpYDO~3D?dJ%qV8Myba$Bd%b#PdewRhUZa1U`xx?ZM%6ae#o>>4MxmIW*xL-^ z466*|42>q$W#BT+a#T}k6Tu~~myj2q7tWM0B(wzb=yDqSVzp)dihltFnSfJ%OdGJ;;pa%U zPLb`s?`7=8-qha&ZxWHH#G}S1une;hs4C}kPU21u=hNoPS}9l&Sw#+04IihH4D0Ra z?HCM;53d^N>c7$}sk5o|uq!klsO78y)wDRu*Php=I%-+F!KkcOKYR1=J&-b{TdpM;u1V4M9dB+~S7d<_W`> zhC0pe4Svq9(D)|7M!cr9#)3wj=GD*RdgCRvB{+1t1pYCWbbS(1UQM2VBniC!?pf|8 zY}U>m?BNblc2PE4U3#Z^T(sD?CP=Af31rdx{%GrMlUNm7RrJgELvCO8)0mq%1YJvA ztN3I17d{g`-_U4`5RViBUNMMdzseTZD48Of|--Mg#Yk;gIa??izIS1X4D+1v-E_-2by^*w4V*{(|*#) z87Pd#{AETbC!ziL;zDGG&)pehO~VBSFRe5{&;FhrY@RUdGP4nj5l@p?GYh?H)4*0@ zSNXuAp<(>pM^v6i{>MaOgoc{xa)y}18>gMb6qIU1>ldRorjUGmpfY{qEb4( z+>XaN0wDbl3oA9FEo{cLjv^Up#u-xGyQlT%SvZfVX=uT39nDfsmBXM4dcBjXCqrr} z!Fp|a+j=~5H1B`GwcM55)vh1Xf2T`|uDZm#q+MWo+IlMX=x@-R&3lGjVDDThftY)U zw|z$M40Z^VVR_A#v7ovVHFZVmJnqnh4RLvsHVr?mzu{eI|_ ztK}UhGk*T_?@3U*J*PHDBH60`QEk(X_t4GHljd2G^7*)ws9|~n{J@6j;K}~z@;wr9R zrMoup@o4w*>zD0w&sDq0XO%^p3w^U$3>j=+q8yjX8Wikt7CipN~-F|8)& zz)hJu*{5)~cD?7B639oo6CC(~7wHY$Sh^h{byclO6Oc;gOL6;uzRX)MoSZZdmI>C$ z8uk%7H9Ys(l0TzQ_5ajf&0KS_U5l^5BUMlkqNAA2W+X7JkyN)^{{ug2q@DJy|VwCS;01U(z zF#ZJ34TiSvY>SyO_M>^(Z!NMh?|$2mq`knfq_B7#llMLgPg*Dqa(f~nQR~s zCovu#C=|*KeZ%eOV$H)RDk{pu%g@8l&xNSLj-A}N7wALqZ>q$iRoWM{}cYJpOzlB|F0#m>wgamF+iSwB|LoGygdIy8-XhEFIP;{ z*25B{Cui$`m@`BhQg8V9CH_#vqqDhF< z|68vAM*iQ0|BWcY^Kaz;YbO3{n*WoFm}e<$37-F1GbwBczWN))GSb+}X?#M&2qyba zNJsp#A>dz(0B1dVTpc_B00>Z)llkOI)UZ{Vu$ z4FQsqhIA{Hu?JXC;(2N06rTHYwLW{9^}GW=8IBCW;QTFrIBA4@xyEfo6;{1_jRy9H zJt$s>#TQ(MNB$g#6-#?L-7MEjOFT{4d0ZuAtj}9R!&tL)@nR%eE>DlA^{W8Ot@jp* zKFbNjfVDDX+OWu^n{a}dV*=yN^R!_Y^h0zj#pTDi zDzf`f>t>M~56)wz#Pb*XOJ@Vn7W=KC;sZ&YF7W3N^POMHq(; z@tRK`y6^`6Y@tI~DS@N_uEscQ@{zW+rH`JR@VF%LPMmy$^^ga6gC!lneAjG(EUG$9 zc$K%aE`ZUed9_v8itdyeX)mkgQpbmD^2|=(h(Sm3S>)mHO=`Qh8SBo8Zp)n3sH1Zh z2#XVVGm!ZF{oaTZ32^);;c0#8jxpe<^~SYxeSg~U{qm|t>`N6u(v#?1pfg~ZBFAj_3DGOtzTO22Z`s-EnGd;^2OG`ssslTk-|Hut9( z+BCQy^)e_*iD#pA5NX6Nc1K3jnf5tpUqCr^Tk`LklZIk|#ztFXv~};a6--l>*3 zkxmb9)K2^lf~WAy+Y|^Kf2|l1=?6huP|32JH|1MiohttTBGY)U5ihCRF<&+8Y;hau z)4T9n^ibYnqWE14U%`iRF~Zrr4}%nvO~%7d*HAdPv~j8a*ohS1HSY?<^ab@)J#$AK zMVtILK__7qQ%6T>FMD!s{7SX0V1nzg+=bggGUB?(PXH>|(!$o)|-us)OK@`;uj07HP6TE;fZu~+pet3$Pj21gnSWNiV7fYpYE{R zCqi1Xbs%s2O-I9^=s27$pU)2PBnw~0r}`4=)BMq*OC9?L!t_Vp6q==N zHgEV?r8`Q-5Qf<@4e4 z!If=Um3zg3Ef5V3mSyB-kxC&3ba3xQg}Hh;9Q^1wf{(YZlNJ{uOQMX^fcp287TYrN z{-7m0Ad3OHQK>sbmpU9j%f?|tMo?gSLe$k_59fp8u)&4s^K|3a-k0MvBitD1ni2G? zT)>;NL8nTmBm`?k&ejQVZo8G5_no(X0-h|?Vp~wMc)PBtEYt|sv%Ep7a_*;c33TG( zIt=gJ$Q6&5!CVL_6EP(VxXA{V9kqL3y(k?)a0!f{+BER2$2Xl=H{f{l$I4G{9hDA- z^+|cDPXx!`)XCO+yoH{+Fi@QnYaLU4M+ZfXp9VC^O*gW92IU%hdl>BQ4eP#wcMX6- zbYm4SoV*o^s~l}$X9kY@9RaKt6@akj$Ojwd3c#`=N4@+3cTzxjA?|xov9CIHBbn0@ zhW^DlNKxl#W4DSz9H)e%JV_lRcb;k};=fjIPx(-T+T8RqyIw+>Rm5$%JW!RAGg+yP` z46!Z0S?N~~P)4FeiFqmg`66o4uRrREy+Y>If6 zYI?Ino!UORfB0s><1u;X#KYe`rQP-Q@{&ow@s4GUk`FR70F##hs&|r~IprQlpld_e zVHEAz;N14DL@mG^WloPJ;Fkmb$f<|9TLaa#1xEesx3rm`SS7ESDA7_Sdk&ITb&f)v&(cf7GSM^`$sv_P(#~Neiym5;72a;g!)RPT(vB)fKhyRX9aV&< zSlN$6dNj|u!2~pb_m#X+m}rETI< zi6u%0qIJmX$JB6fS8ffO0lHbp`+!4A(j7_uNv?Q?Dtw4C{ZR;=>130Qs~^DhG|tuw zRli}7)s|x{6x&@0E;PO7)<&jBEYvh-NR2M`-vLxHDCFNup_o8on=8PxXO;*cIbI%s_Q+%J?c24G^t7tH$ zBdj;LfWoh%nOzX)vt|Q#KywL+FOtZL$~LnP8GTvp&4=aw*|69@@cuzVadnWS1X|-T zzbz{g+XG}-9f=``X!_Sf$Wt|Cay}8uI|*bWTDR16%fpxSLP4=jGB3IN4lD=`-1zYy z9yiFLkXNMT@ueGtf!l6XoG%dAw9#5-;B&3_rj1i@VND%W7xRD`bcs?%)+T0f6@-_xtU_jO|fyq^HQ;=9#i+jDp`% zGM-ERys*E4Po9PhIFE#C5^F5N<0t7go`n&&a#|oKBu+JL22VVSEOF8Yw~vE|l#4aR z%inzELxg$GPE#^sG}c-3_c2TR=u*NtP7m>dD&{-;9l!zZ1*^BJKpu9@aQheQhQ%!DAj*g#9))^BG9d6GPXaBUv%+L3qI=0Ff)tYUqfsvY#E)V_~}lHD{R!+=d+o0&AP!MDXV>7h4zV)%AQO(lwhS3-#ZylA>5&p@hHVRZ`8 zVzu^?vaaHGoA`15oYC-P;p1Vw*=w45|J;>yOWb#hWDNaO*RpL!JA3jH^1UyoeY0jN zSK&llW*g|0N(7m1TjIlODPYQ5d35*oau$@W3wN{v>h{+a-de@gJbSt~tZrRr)@HCX zWXOAt#K9vrBr5j4S4b``!+;Lwg_CQ#bZV#TmVQ``OgTyh_5COAt0KR!-3~0b%rLh% zrC^hr3Cr;TxFT|uLsOljzn=z@b{?Lt;LCLqf?t8 zdg_Xv`X{4O6W6Q0`sAEs*|{v7$L0wE)RcfeWR!An#w1odh`w<_iWQE^sj{^0Ich(N zESFGjw)ndS*U{Lv)MXRH!HR*rzY?G21yHGe@k7TM`^UQk@VuIoz+|&}HT~sx?rUng zn<_aBYjHV;(fa_(NaDlxO<75k`whF>^qrH;iTy*A`!AyfXLyll@SpmeNiq7!mm3t{ zGsXL?R+mmyZn0rgGyQh5t|+fsaUJ)EE$xkxGK3cX&>s z!YylpLp>R-n}=6>gQL^(-fyINm=zCv`rA$Rn4$`=z3fdmBu(Ga@6;`je3+7fN0u z8ov~E#eQowUD!!Og!@$wiQ^80Di2I}gJ{Wj_Kc?s*+2qT_%XpKbl$32wo`?DS?KkE z<%0FsD?Q%XIj6?#u!-tjL5zZ!@zaHw7f4HPfuqB~=#uN+{2q+HBkC=hdip4>xcBvf zrl?yA_jB>ZVjn3{%!g&T9Kb z$wh1QvEsc){$liLm;0 z7Ck}#%RX?R?U!RgOB?qc9r{d5g?q1)F_&D`>UX)j^isC+K9|9X<8d)GHTfjtWSNr= z)f0|nWen4kS@S1%ujn6#9Q|hPTL{@*5(r{;BM;p9i#!t5B76Ues|#9*Jk)7hn~XhF z$Jl@IPBvyW2pG|DJ!54CZd)h+8NBgPj}1lNSuex7wIje{7A7sctcXexj|B=R+7VwD<;GE0&S!YKvmitcM;u z_Nm&ZMNf(Kf~}v!eIRXajBg#2M#5(-OxJh(o#3s@1xIW#RkZSsIzHPf&0fDa+bug; zwqRq;1_9hzM=o5S6ihb^?PzjL3dRjO>_Xf6((d3Z<%Y?01f^a1oeSZ2? zm%Cc-*;}g_^MfYWCdw+Q%E(gQAAW!pFdcq}clGN1jNNTU`C)H5-RGMs#NIdd6U)+E z0b~~MHySnkj%p{r4#U|EUvG&PXwMcEyYBPsm%a9a*_cfpD$16)P$I(Eyw%rkKOG#xi+l*?`FojVtaG(iLOAW z_Z*^^bbcaPpJo7JVZU|Cjs|9}(Wc|gLlfk`Tp&wc|2Zp4@`eh0@ysoB?lSjDS5djg zg`}3gmvXaDcTqNaER?)B49}!?xR8{q8^))alYd`!AQ;q{@!PZC%`3iI?eSig%vwKW zx*A*1f_ju+QiE~q*1)1<(!s3!X{UuQ>nz^b3nJ4WZw+X?AlF_{&y#jPa+*5vH5Ytr z_HTakc&msr_Wo0(cnGs;c5_iBdw^nYj*Q8qPQ!0FtHuOuaK#AMT39Hx#W|R9PFccH7dShK-O<$h{vkFUA^mBRd9^4YK-?h zhzucJ-oUrV8VVe_k&$ii^Zlhkgb(Yv&913SOV{tACUy&2!9!n_1`P#*ORZn49WrMRPOxB8x5GvOMuYUoZi9-o~t}sSpo+S@*fF8Vc!g6-SjqPgXxn9 zGZyTcFhf=2nI(FwK_2-!+~q%Kk8}3rhX?Lt<&BMeHL1h>PmdVAAkz2C4~e2poY&i4 zbMMxyfV&T0*GqOBN9+y;_9j#NCTZxr4@E?ijgh#ir$l0eCf3Hq?1QU)V$+T(>JFnE zqR-7`@rTFhq73uSF29qF9?HF%RPqXtON9#+9ckJ(8I^a-YMqIWx~tWv?_KjvTSge! zC*#E)*(j z*}%Tm@c@dE^B`%-=J?b>(bA|MM!$%7JKXgv3>08q#diJ+9 zYhV?0zQxVd@cs2M`pucN3s?Y1b@$bP?Y{b!GOi8?MnL&6P^Wu3L@>=l?j?iiYU>Kl z?bkNa&Psu{dMLMwa0#VvM3G-j9JCzRF>h~pd~d}t3=)+v15IfNvNo;-@5%1DT=ub_pHtBSImpE479nZ*D-OEB9WxWn=- z#!uwVz7AVD+Q^Z=KU{yZHuI@s7NQ2Dzo^aB4#+wZe18xodB$XXs4ZqTsHo2+RWKRZ zTw=DLQjO(AGtp7~Q`oqZ>sc;GvI!rOn)|R!&n-J!X^}XV!yXue3~7_0niae{1vGx# zpU;@ABlW`4VGT%<=EZYn>Z?vC|4*74ibSEQK)Iz0qwUx*djo`Q_;79m{bl8;uT ztJhs^dyKIziF-G-cQ8v2_DhWj8!G9)o3Wqpdgm~H#I>hJ8o?pD zEou4-l%18fhBsZptHu%!);R)Wa3F2wRLmDp+{h8z)#U*anA2moh`+ESJZ)nx2)qi^ zv3=?IBdWJtCE*~FpelU8O2{%>5ZUjMX_|~ob8V+Z$vXI0u5J$RuHq9&(63%z%#7iw zpWHTirYj$VGSxpZepIshcgOzT-XyZ<)yhb{TDxDti!-%W;b4Q)NI-Gfs_@v8DP_|K zsek9EQ*%JsidZBGNKYg;6QR-X)_fRT;q;;&H)UR;wzIE*Yj~!aT)iz@Q07euOb4-i zsDbptdsk>S8kGDKv-+)h(@I$mAEB-J(ybT`xtX4E))UI^x6{VE2Ko0Pi##CnEV^_d zZxg$Rb#+^8#udBqB9T^+<{%RmHRFj@RCgrEsv+KUA|1j|(Yln_!N?IPjsyXJ(Wy1< zEIP!xIu;=cy|F2`Q?k#lN{{>|0`|+!-%G4_%sX8&?AOwaEiH>&M<&i#*(#;}oX8ZOwzZ@^Jx;x3Hg8lHTgu%R9I7odwAfKLk+wI!vqXmM z66ap2k?vwSEfC!}Yq!t$#AhZ=Bl~FwE`(&DZ~jYA_WG*>jyL??@fn09mbAJ--$mR& z3$!L&yRLq3rSfyRSA+c8iO&RLOtQ%;03}}ym?oTUx;6K>cy~aR9C|fH?e=Dz4oZhj zs6OG6xs8FD;DZWCY`6l6e-b+7NY)d2WKSEgObr>XcGQ1G{CV8_WWm{Eax-JLiR9bWPi6uqg)1cqL5=}Ls$ zW{Ha(7p|l!@gP3}n;ydntaE>89Z%`%M9SHmRn0)D*LcQIgboC{da z3uIpmh^NJK>?#O^rQZb@4ZX2#i;jOIbsG?K#_OMS;dNR}+ZA0l(g`18>TUQA5O=&(e9w(2I-;EI)Ur@w*5ffl<;>0yN2X1YqtW^xs#u}Z*<*Wj6DVPd_jZn^wf3}$YL#Fk4^Vd+z3(UzdgXK z5S&}-=O_=go#^j-%%D2;#Ou+`*~XO+Vn7iSS%lory;?mND=(Ow2o1-{h#hSR1le(T zj%_5pmyhrrxAP;cY=JRq$c;$&EMj>n#4C8X$Mgf7E;r!YnPRn6(Z#!iG&IsXyE|n; ziLmRqWwN>qY|up}E%3%DKs}NOXRNWC?0GDqLne@$iW921N#O6x4c^jYiF~;r=9M!C z%8SG248U~Ntc*9oOPd`?cCyxTe$%1q4M{0P{hEo3^pwu+_s9UvQ3zIaFV+zGlB@b< z$VRL8F?;xGQi3Uk;BW}Um)D*rPi;Ic2F(GtcSG;ifn8^koJ$w=}7Dq+1MD z`*8u}vtc20IVk3gLU=hAD-oWe*_)WoO~mDg{T1Iq0h$?G2#kml(7g;wJ~I{e4w0Sp z`9w?Rq)0fg`7W#CViQt91oK#ax%-z71FG7p?*-u_AV=qQ*<8^>E23KgfX!G-wtK3s zgI5W8ci>aG4{rjF9|p&2Nf(JFRGBOA;9kL8Q6OZOrhygHMzaC83~fLHy@XL0P1Vfn zs=CgxsCCYh!K#zvZoXEt9G%ohB8PySg*xN8p%M#YDx`UQtJ55E<2fe!pJDqrueK z&0#6$XMk(nOuA?;UOQD;mX2d|Jq<d{(tfjpQv1i6{1s?;cCr#s)h6xuZ67Iu()S#i?s{-sD8~?S8wXw{6aK zt9x9zg_byr6!Mq39cxNxyux6w+j%j7%Oh0J&3+D@lS)?S3N8Wh^_Vv%-fl6(*TVup zd^+y`Og}E=?5;ON&8YXGABNe}Q->Ip2#0!8&+(kZc>?2C zXzATWz1?}DT?+~@){sKt$hTl~$>7LqAJqU}7bOwvqw9V#SIue{7~SL64^*{-p?Vj?IXLsYhaLPb8hgE(91xr>y4FHPPk){Aojb+B!) z=rV}`@Uq{mP?6{f*H*&kC7!m4hu2S&w<*NQhAXYb*D4VzQ5W3WJLNhy6h^I zuaBp=-u?ZeyoW=iS-tdY-5VI?w?pw^s&cc7v#9q8{;^IvoI*vdn1deIaQ86(RsgMh zRRc^PnHFAlEI&GOvmQp}OjU_I=0su~X`QPP*jR$Z_@gw2fH9iRd`=YSKt$+pItO1VDqf*_*yLs;B<9 zW^LLEF$9F)=!{>b3&ldk;tV69`h>#$`UF|3{cZ+KvG}k>4+oW2krlh!Q;)VfwB@@w zbl@U9<%2OExLasIhbmxZ+uJnn#RrPjJe8PZ@ztrZD-HLs6CPj^fmtJ68!=|~q^EO| zK@2yQw|&p}mqz9u#^FG~+az0`L0#5ID}q!0)#5^?uRNjKW`(Q*o_qI=q!Fld) zIz9H$1d?(f-O6XjVfBy6@E4oHswVG{Ap7H72g{4M%5 z*BDYxt)1|3cKxEz@S4zW=xgpC7PX{Ib?AqxjIpf#d~kB|_-@tV`%N?tsk&h+Lm|31 zKhpc-ejg4^ruE4Vx!e+Nfg+7xx`u*C%*;aGYyd=24Bt7Ti+Q{NHI<;e@k=N*>-KKR z*2i*>J#^7x#dT7)PI;K;9U8leNutXbv^pa&!=gYOv{h_Gco?W~;3J1sE$@ zqsCy}d|hdsww@#ba4JdXHBq=H$P6UH zu3j|hfM1e#xY!$E=A51|?g}qMMTVogB-KbC*)1ySP`ig2@Tf%5gzH!aY1;6Shi22b z80#_^h8LQZQKrt>d*!1sgjV9&3eR(IdfXy^K%%0~sSd|j+ui`sAAHr?=_l!ZQ?6cp zPo$vE&o|4GHuOAhZb28l8Lz8`b{ks@MoXYVcf&4|TQFoFKEDn&L1p*-se4FzRtmIV zj&;D+is|`sQbV_V)7H$Xa7sf4#Z>74LgVN%yjVqqr$B3#Y?MQw)5JmsjHu`jUjng- z#+a%6h3F3$6$!ZIg6KQ3G$D(h8SR$85=ed%khbWP$NTYC2htHuzCEAkoql62IP|S2 zBGDFzt8)|%e#OvqdpQHI%@Ap$iY`j(Cln-;1oIMv4`cDlu#I$luoMdA7zTt=9*n|> z9EVDq1r910Vd)E98stsbQKWzszl|IOD%d;6k2@y= zWM|Rf`y>Wk;Z>-&cBB`s^v=AA+H97`}5cYAU;31;tW_S9f= zx@bB^N%4YZBzdeWCp29CF+&WZp5_wIDA61p@ts8qA-e`S7NOX}Cc&l4;z#w(14jkG za{YAf0|}Rc&x!ayk>}>0$O9NNVh1W7L@BI*yP`yWsH86&EMzgdIZ-+&ryh?mx^vcX zT|!<)eP#t>E4c0~y5A-$DwzG0>h;}9dK9J{40{Xzwuhn%+K?=$XBT-st{LfkgSe<# z+?*eE!3x*Bf@J&_eQTyBdFn3>jbPDO&k9}%!bpWyQ`cYI^@zkN6Dj!sG!W z;!@GFF<^uS*1tjjhZqio+Gk=dqF-oXFBPxDZ?5nb2x9gr zT#tIpZ47Q;Ig_2txFvnE`DaXq9k8bB-N$GVwbi2Q-Sg>aH;&T%&hoPrZ=Bo|({1nQ z+~z~!5ZZTETnV}~7X3tcp(s4_+d zz4y`IGy0ow|HlE|#~n!rwQ)TUTocxg_i@1e1Z!`tymQ(*`g7vrbII65yQ%fGy)Z+P zI{cZjV&{nWY7FV#&IuaH?nIJeNXyWq9D9yZ?<_a-4O?pMG`F-pZEFK zAntuv4Ka4!;?F(Tq^~ zKPVp$h(F$@@Q;n>=GIe>WcvIWa|OMSBUY;NqyR~L>QO+8p;F+vrM3vd&w_jvNV+%I zAlmG7bUAgtmTey^0@WMcF%O+y@%;UaD(3aeJ#9Z22{}cyy%#a%LXj?5(b*rrx;}(j zr-cd(M1~jSH5TroOG46+f*BzTUBc~c`T@~h4LY z>N0IuLFN*rv4@{v%O!DZOUAmbmj^+a&WiVyv9w*=zhj@A8yl`4GXf-XFiZmQ5^H*J z))RSr0HbCRI#eZ|0D@CAmDjayX4Nd`J}kDI z;L+P;rCY9PS!?q3cNDLC`gGaCZV3 zZfD897`tL5I&m(LFL#JJq8q*AvWPKbZ>_CP{Umk2zH(|1xa8;u^L)f|I(?0n6tXEX z{ZP|&c{wS*zkGyS-dabM>5uT4JiFyx@_#OXir8n1Ys9Eany?Ee9cDzeHU@(z&&-Rq z4_wOpwiR7oQDGGb|0$aPnt4WwxtypuCN91G7F$2zs~lB3U#~!FwuH^)KON}kxj%>F z%KZ2n--|DGp|U#IGtSw4%)D4hO7(iS6zP30iBLW%EJeB!fb)eq3fOqHhUDdHCBJ2- zJ-`vqD(7Qv2upIB#^GCL%k9I*(AM>2FX#6As}(0X6MyUE{o(9>iaEn^Z^IX<5k1Ig z7m!iu<_S&h_ImfQ0)ZM|U0Fk8CxxBKEnLX%RPH7Ls;nRt5b#X`g=Tx(v!Qn+j7j12 ztup^Dqk#|U7AHAB7x=zu)$5yC{V%>*TU&kMLy|aGDH``wgr%El+W)zjG2l4a3*yUx z6)`+5wpQ^(m_w&W(JuF{BEEi~%xsW!Lf=TP_fZW%-om08UQqnGh17Nlskm%Bmt@R0 z{GXv=KfblutRte72!wN6yR$AO(nOBI%fH(lp(hY5h!jVb(PEj+Z}v`uz7h;L4)Su@~v=|DA=MLZ7pFT=xiA+&V`_ zm!&Qp%wCS<6|rr&NXCNSb9ZL4*y`M~IHeVs2MqcYT+5YH$c53-0u}$ss>T7)=Pd`( zVt8|w)KRG)y7BI<=}&B*|o0i@A zB24-Zvb!{@jtcY}Wjd2DX%?zlNM{J~)a)-$H! zLto>e0xW3msN)b8T2gXHIdu=?@iP-_Ia?SxML0sx`5&GVSmQygc?@HusU(z+MgIi$ z0x44c=BaIA{Ve95J|S-tRb1;VGkueag(=eL{LI}AhQa3`JzFg?iw!m+PP4v(zaI2s z&Ts(4pDX);{y7;q8(yY>GvLNcSQ_h{H$ihO_Nqp{ymIDn$ZeS;?Xp`F%sm0Z+N?D# zX*jM)b}K$oTVMIdPu_dujbBbm2mqCz{}4u+p~0qJ?hhD!ih0$B++$Z%HOA4%RF49$ z__{d0e<-^@Ao;Y#+j7=Ba{7zMMmz!#Gd&Lz%iG4xeRCh|Yz&0{YNzBY`|b66-z5rJ zTkn!T-&7w-58XJn_=I-@zqnRZm7%|BEe%Z1WadGXc}Ik270x+CMz^rh7kA5yZ+;dfh*U9$b zt$oMTwg>*{BARqqR}?S+{#vGD`A1rGr5qbt;pYJhbIv7M{5At`QykJop}iCObj&}_L*bs7e>ae zc^~2bjNvv&V&t-`7JhE zNe8^CM{e?ev_+=%m%|e>tCoA`#AJRNIA`gVJ+8mBg>5HDd;6lKsRb@c+m-Vput~Dp z;_<7`$KN%AltdDWH;VohxJIwD7LNhhOWa+}g(Sz>T!a#y`gPs!>QuMlc7hz58{YS~S&`1H+EV>i=*oKn zVKhG1@M|jYUZl?S(bxNg0@yM{w@7uuC~%*Yanbeb*R8w77BZNxQZQiac@|vu(N!y- zqjF>XENXcuP9SLP?UKK_$r%$P>a*hZ)Qs@U;=#qvNk~%I|AA$y8MgCjnF{<^w7YJL zCe+=;6}s1$(q2ezZ47_WlR7f&ahj0?5c7f{H#u^-LP&>?oH<#Xct4lq7h&`ni>!dx zB(=QUpJ;doJV$jt>23hxtB76*BcsjrXIgHBtH(XD0e3?Q4$s}u7T*IT!m6|UtyzU= zq#sY&JuV^?+>g=v(wLt2+6{G1AdVhSc-u-*!Pk5|S^XT_3fXsBEd9J(vb-T@lH{{H zeX3dgq(&f8N@sqs{`P2B{|2A|@u>NVM^3@7pEZhZdG$HN3-bIatZX2{zB4*XGOtn7 z_c#r*o+gD<_?<;&QD)}+u7qD`+yQx}@IJ&jA?ZhreUooMv}lv`{nO#NWX)l&tFx~} zmN3PwD(#~Dsm4J%FFU;~rZ#3_%lp7>dQ>=P3`n3WUmBy%Q{oalky{KT_xwQIVssXw zF6a5))xG@SC<2P=f$$6?#nj1+(Ro|1lUCmM{NV5J>+#RAo5N&4;@)^i>a}8{W@5Dy zhR7e``R{@qzntJgncE2MgP?9Zh&;rKWRcpUG~N$8B#McEz}hP$nEw~r*wYj5qa3iW zj(J}mgACy(uY)Y5)d~Ux|#}EF_#fOo6IPuHUkT zx*j~o(%O}E$~$KGOV))4tX_VeL4H24CopdXzbAMGjM=K|Vt{8vo3N4R)9ezd`$Ke) z`mhES>#N_*Nmh=;gCv3!enW;B{fiCc#)>`bhu}+)|%ZC>>(^OihRpWxk1~3 z;i=)117#<+f>#Y;#Xd$~9bN&*+kb!gzKjaVv(s!Q0g~gX+a5e)LgbjFxg6!Jff!j4 zHO2Z5T{w-;(d(QKCv=k$N3CP9%}8DUt66WD$>Vk{MV;SnvnVRm(=|q%|VZQ!qy$c^C^{f(D6K;jBMs&ega13@;bq6^3=IA39$C8f!vt& z7ecY}%zI#lfx4V3^BGq_-#mHMZNdlh?j)@f@BaY%Km@;5moE`$rW3ebiwjP|d9!ai zzMXl!M<<~B)&g`co`d;RDnJR4J0@UzU$1T*KXE<2uEchSjc+xeleKycR9s1K2hh

6c78lwK?5!?s^V%J+AwtvONBndU((74DYp6$B|0cujibOD{BX?-dk!FyT>z4SLQA+G57L1FI7iCk_}wvw>T}%02cXB zJY*z0r#l{Y=6Q15c4e4h>(e?Q;1vyHIV|7}y+h0XI~FnSrsIBCg!u1&wKx2)|J(K0 zrt|&pt%mOk=s46M?pqnEzQch?@ERY0!v$X#=v*d#=fg}{+p`_hQ*kB}#zM8_sex#t zW6Sosot93-Te!+tuIduzlZfx?%)+1iWEg(_OT{&^2-hBw0RUQkYNp$-I&;~G+g=E! zE$@Ge+bYJZO^59_^$tvq+qWF%YIx2kZoYZ=j6A;fI}6a&GPz+G5Mt;31$M03WKl|hpx&O{{WXRnKNB;O2%ultY2m za00qZ0yr960-XJP46cM7xH?#6t(<^}QnY;a?*`EI^vFX6ws~CHo=lYMHkrHQO^O#^ z$gL<}{6Zf5`Iz3Hy;Jjdv3#_7&$zX%>uVOj{p!?}R4Z|;f zReE&Ibk@)*FaQXq8^9b#Ey9g}S9wI{09QfhAmNs^zW2@XiLQyHUoud`7O6D%=>CAA z>jHExqNxv1UDc_zY++zO+{sgWaR4SgJ1;i4!X@D46L6)yn?UC@X@zr;44!VyJ-Eno zG`ws~(Wq_#=-ddg!>SgqlktYdJD*z(U-|NGY)iQl@6PfE=ZOB${D=QQPi}AcwHh>k z^b}k#odyh$F*s?qUG0Rh{vt|P_4yBcn{8j+6ihZYVzvqK= ziun?p(ksJ8if*+5(0x@y*8sY2EIqnXTv^FjR5*QB@1qLj*bjF~^Kh4Rimi_^Phd+~ zln(-Q42`0a&OxO$09RoAIasa5Wdm1rqCs!a4H;ycd;oAw6Z$+)HQr@g{w43UY!kp^ zJId=i8Rc_2`|iy*6Z{^27~zlaXu$7kE6IQPuh)nF{9ieFnUCV+Hkg+*j0Wh~UeYT$ zc~-LAJ1%|ZP?OQ=$?oNt!l_&DF~Uu@(c393deABiak0g;UbE7akob6THK@VibiB2mWeaZMF2I8f6go9g&uAglT2Hd#AE;_8mPrj&uK_wxWFb%R4cb$LnNF4)eNMt(YWN9r>UK$0qDkr-tEQ{$+3Y z%fH@Nxykw8PhNkd239EH~Awi;b4qoqh&^R*z$Db{bBfL0o^aOy=1TT z=xFqIB^acLH(~`i4^t<)MY%r1ZDFU0f3qR#&1_UY_G)}v_&)uC{rXoHpo@wIz@0gh zN0^_M&l?TUXmEfA^{D=J&Un(1!Hu-r)1!;3t4jx;AlxJ#`OG{=o|LzqFY@SoRysN% zoU)PLJRSn@bA`Fe6CU%ZRvl8Fxwe5Ooddy=U;g3V>(NRdv8KWvF zz}f_E&^G~G)!i0G7YE;IIvxyo4iGMp8n#HKZ3`o7Ud=Z>y03ngfsO{nF!ih&4sw9)Xk11RfpC*Jdj3haryIr+ z7=qvO+-+mH=>=^bZ~EJS&S~!Lsm5XlbGk&?$fp;_8x_x~hsW=kXBf`rF;VpX7}j3a zbO9@>mj!b4;*K8U)Md_^<50~Uf;k=XVW8tksn5SPXb;_L_`(;d?AdA%$-F3#|GBHk zSNBbCe&|?6G`lg~q*#uad67toZ5ecA;bi$cY)7VFMmpqg$P2%?WZ3r_ z+!P6!khPrY@|&6Nce$|JdW=rvM*fX>B|V8nTA41*b&Mx%Ow&C3^yoflK=-Qxbd}kr=e5N}aGIc%A|0I23kSW0 z!xqFQTm2xmhNDPF2$Y05KU!L_t)X z%*+O9qig+^m(U5LquqHr8{ekleUJg)3_AnA{~_{JY2>zFT}}{X*~{2zsT>J4LsqrD0&R za_Gl()HM-XxUiYQ2q$qWUuBzU%S#$@zTnMed%P)D*|~g|?RjhW(Bq-e(oTccu`S2( z0lK3{6XxOCWq>2w&iXc4=qy(Kj(T*zr&ZTa zEzt4cnTO+pA!pAqMCCgT32^XgL!yw~*x=m==&CF_xYS9#FnGjr~KJxhT+ftM2CfbF3-|Ssb;(B+h&-D)`Xm1EoaMC-{F)WpP}g+ zeTZ^Sqxv;GmC2@C`5DeMtE|IwSp(hAem)GpDLuOE=`{NdA#MZ^*vCy|l9X53Xl6Fb zRK2+!o7pBap_g<*&v|b~s%#DC{ftcjksyH+vgd5b53WYZ1(7msYmzoU+5GQE#~a$ zpoy^60D^D@1c&X1@Jz-!#YBi%r_xQcP6(FEHh!hgb=`}rL)0qoFmhd{Wh&|KO1J6&GC3VsBH7^5MSS;H9W_>?2XbHDgSccroI~zq>np{nOB+;~}%qz)|LG6*(t>?kscqbTQcV~#H3^K_MdB-#EncX^0%*mODDe%!x!&}%+JF6{5?OuRvVsfXC3 z7q6N@4})CNE^%yzD}2+o`|HC+%VLYL<28*=bDDG6&>LZ5l%J+&y3?Bb@GWB>8HTRM z4Rr6V#>I#+NyIuhOh6(y4a6jmO66A*6MAJd8+hAfHql$v2<>VP*?HVabUQ@=aI=F9 z&@o5%;~xp=zIKQgBY4QXy}&T_{KecV!gdi}ymw{r+}ON?boFTej=J)IB}}pb4CE$gBcLOd2OrqiJGH~`2fwf0oc88%no8Uk2IV5}5r`z?Vt(+H zrd_3niU{j8OE=q1AMe1j)@iL8J^vzuH?U>7o23;%r<0d|@k@@G&mLU|gmw!xUc-&O zu@{$5HYKV~4O`#7dG?{#FbHP52D(kXx4FHDTh|7#P5H)Uw0r1*j)t(Oo*G9bozoY# zPM^6Rn}4qg;CR@*@s-NtUKs^Ng_$%+(X3;B9fn%h70t5qIJyT0Ivx!9z-wWAes1Uo z1v=|(^bLwFov_`~33=$Z1L)!&^v9vt?!Xqes#?jtm9#|8`*j|RF6KhE$wawGZdYbr zcwrd6@;iF%>+5{IENvi<{q9EqV)E@{$6@rCiKc^Mk;@+d`X7{DIQGOT(wcd z2v{bPD-rkN3aRJSK=&htu7|Bh$3x9wDlgw(AI@pJ2n!8yKAavj{7pZD6$J%<(jZNn zG`O*6wx&C4tZuy6J}A(+Q92(HZ+QEe)=;eNF zW8E)3I&?57*VBmv?kv!e=6(9J|D`Wzlkhk9hu2=s_h+4#$U@7P%ac6%UL3~9B|Q~3 z$=Gg}4K3@nNVC&rTKd-I52kseKbGZ43$cxPb>3_--z!DreA`&`c~gZp7clbMFmvHn~l<(=CV&H(TR3$PYmG6%Ac~^ut30bOM`w zhN;ZG@txFj=l9}--@_HJCaVa~3%|tgvnv7-TH$BTXv2NS+^z)A-GNwE1sT9Yn@33|7YuNm$ zpRFD_Ml&Z*?#Fi{e*6A7M~6Jlz=4LtLFZE3gjYbtBAfV|XvoK7ZZ@i^nukrg=uPPD zp;jAIsT!~HFxW?T=z#9R#l3wt`f^U3dvUG^0Sv8Dn6~auD|YZzm^3Eby8OX=bXE7p zKu3C+51tzja}&@xZwKpXt>>Nl>44(<;~pD5(sp@4mrT6}W;s&ocJI zG#9xl>$vSXo_;pJDsQ=Yx|T5?eO!%KnUOa(amzAv4WQE=I``K@ zH_;|`njv~@?Qc)0xCck1(yePB{zSKA>_)>=^ukHLtF!<;I$i-|==#;KYB54vlJ0f4 z`rz2LefGogahj|B<@>AB9qHK?f1RIyZh!dBw^!OX$H~U| zDj9h&a7U1Q>*sLwGu-~rDB}Bpj$4a2nt5}7b2{0|s;y1ExEtA;+g+F4++&O$osJk| zv#^1#4p9v%9S~&LG}6G$<4GOqmA@@)a3h_bmI6IIOQ+hgP|D_m+^9FR7x`?0oJ)A0 z{s>Y>zW%kH;rrj+)d|@7A$H%-z3Q({ow>HJUR``Q^@4`33|U>7?gr2~t?JLg58g)s zIv(c)Xhc|}ZIVH5!;gn(pz9#lZ1y}G*syCGbDHD9KRD1uIY`F<6me>>(~;lvQD{9M z&|1FY*(%JC^^0HF*XI`5L#MepoqEDhHX25=p;+9@W*>{mcRE36mr6^?QPz5vw=910 z#-=#Z%p1!sDVy_H^&C#ywu4@#W4?XM0d$;-&B-tCwV`VTkujGeaG_geGPQMDmDvllK8;7nNb)~#XcMfzt zJwSsziV(COZ}bVMY@prr{;-`tiQX}w3)>|frx|6T+TP=FvtyptdUP|s^}5gfA|uCK z-0QFH3+VLP*K!kFkXZ>F0&7SF1-n7WpOwK&y3HvwdXDh4p)VVRp zRrzXrv|I0G?8RAj?lW{fr8&CayqiGh`iWe@f$hM7pxgvZX7SEz2D{yPyy$hEFh0!- zBxf#7WZ~QHR$U9At}s82{Cf6j-jvaC{%`P=x>bCG_;DQzc39l%?D?zvSGA>tV&DPB zD_-r*2R`%0u|CkZgWa^!g>17$A-K|w{@7@1LmBy-fvzrMRN2T^r1LR=&W)^qB6*K{ zX*>$ZWkmXpM_!sNI=!3-H{CUw(!r(u%y-0_JYnl1!z4?OJMAVLK!-@ndkjg#z(})h z_~KEQdDZLHSN3CL@QW`BLI&i4#=ik}UAH<#bNE4@8 z`=lJ~9|pHNW1wr5j)p~7oxO7xIL}N6G{zA>20Y180mk2_1L&w(6JXQRo5Z>2Lc3+4 zlWs4wPY~#+^$+PU^z+Z}4{yDtPbfNG2*AxrOnWhhqwZ16|+IN149%)gATVR>R|u zkDuMNJokDS-0I9Z4PCXpq!&?nxa^_3q`h>PbW#bR$702#@#_w~+UZIZ{GjLUyMm4W z*gL5`I;3qIA2;d{u6l(>PbOp=;P!Z<7hBF-<6<(wbOLpJSZKo|PW7QL;F zaq=qTJv|aY2e0Xp-dc|iQs^o_ts3`u(+@jM#|nV;nsc?f$e}mht)^4jf7K(U(_2Ayan;l!>t#m zk7n=gPKT~k{C=;_plq0yxy(4>pf}%fd^_ve)aZ}RbEG*P3S^qbws_UuER*RVXIaZR z9zJ{+xEAKU?z3k`pz||Hq48?4fa0+IkWv=<-S)spo5&U$H`dME7IQsq^yW6l42(Qv z_(KQ4z4D3{8Q#dDD&HdE`@(BBB14DkvO0bCih6Y8!;fNX513nV#o*xrc=YTpYB-xO z_UFs}mO(xmZ_zVv^vA#8Au9$vT*hYrLtf&y9q2;0+36tC?!j?v)Vys2I_IPMk$wd> zY}dGiGmnife&{3DWH-he;JN1#AU4w_&$u{%Q@oo2UEF6VWL^mJmzW9V7c^W2;GTL? zLlDi;@z*=&h{f%)ExALT8o^>;x&6BYCo6@w7{i!_EQNM;$ zZme%(8le)GO|}x?#$GJXfX_qn$;0h0iedWh24w0S+8y0vzq`E&tUfcDL2#8 zRIzs8>w&_LJI^=(_n5$pI>8(qpVWL!iw+#umx4dPYXX4v{j*lNlzQ|9@TO=1mftEIR zvby@rQ~Sfy>dig+7z+{k#W2sy0doH399=d7+l%zuJvj_7!s+$2TQuUqZ+YtSZs}r< zwA3=lnQndib2{-kZIm5B)U<7Wx6$uu1UYmV)llExe?NiF&l6q;msuF3F~*cz-Z(71 zk&Q@$OFZQW#KpF6s%t@MSr!t1{CL(J#Rb4|8p+G*%{`%cI2Ni7^DuQRr7^TRd8W^W4&jxl7gDnl2}3v7z-r_2Qm* ze1CXCf2|)Ez&)&?DqnEQIXV5s_5wuE671MR$6swIqISDk8)2di9!=x8I;he(+l=#G3ip1uRm;4u$Za5L86Z2ksdSQ`zjxYU-;3IC}alN`2Y@E((b9I04Y9S5_ZzmhbrB zGn6VJ7ZpPlK!52N!t#zRd+%cYj*qaYVt9x}*tRo#bD%@M{z&;F109)f_DCOR+qV>N zy4+Xj*Lbn%w2-+~pmV;+V^622*LrR}?)wm3H8{#vd7`4yJ7cK&=pzC-9i+)T3-Rc| zwV`SD;52kCAQv?8u|Y#2Ke*~QQm30|ImhuGHjgH1+Q^^Py|CAMNAzvi4+M1d@NgAODgezv&Hz*8Yq;xL5Zh$nM#EDwAlL0w z-3?*nc>YhS$Hv^-8?Wugd(CgWzVoT=_>`x@B$GK51_SI&71Ugu8siGq2F#<6|FZ zxtql;7k<<2090+srs+;A(kp3ijCUJj-ciZ>8S|OxZvnan9+8ynG3)@~*s;z#h-?#h zK#L6tXaY6?8Sg53rIl~Mde~~8-k=*0X-AgiM;axboXn?X?2mfc>f#={ZU#mfTbkap zZQaZV+&0OWHc3ZGN9y5cKB}B$<2i~!oO;4%BMo$}Hyxmm%Pc^OA2wZvgmII&1e;0; z5X?Mv-b`;|BaCuMXI|(w1JLf%LeryITr zNE6BQ;FyB}yzU3+2($PQJv!*Qr3AXcj!Mt4HC_RWyUlXqHgM#(`We#|x*C-4hseBa zt><|-$E)vfO{a#u=zgpIJIKiYcs7wFOH+pZrT9q%n< z3@D1U-~>1-h}3YH?z%4F(FXh~-@{JC>D71*i_agd`}Nuu!&F||`tp}{wW_*`UjBm* ze0qnel-=B|j-RAQ*MLsk`NPjj0b^7}#!-hU;|Qx#Fq~y5%lBP58KnZ=y##cTU+(1~ zzmEWP2eKP!9{}hCXcK$N54~I*K*j06%)im2qZc;<8UQygJ{bIX)hlK8CqM_S&eQOm zoo*!7;)DUvM>Ma)-aLRaaw7%)81ON3j^O9{vpoYFxP$mLJR1E1UNQWjDFVXnp|)uD{9HUmmgiPw z`HRki#Tl(u0eT$64&YHiofb0lzwq#qp7w0>hC^57=O8^gr~8q7dBc7sJ)uPLcqZ zukp~!vuqA&+ZL>}@5HtQUK3(??1n}Uh|_TR&dAA=0N(gf^J6=!=-PKIZXEWk-%b-(6e#-5u1PGA(nRfeqmqE|Nqy5KdM<>XI#bY!GB>;iBv zys)D!D?94V-&k2x zs62Sl-Sb|nd`%~gW>(k$!27bS%Cgy*%1<~e-u5%;dOo7EjXgn`?VvWwBv?=>Bfrui z|4u*CYWF0W9!KtgzqNF&=X6NA+9r-nrCDdgIX4n4=3Y^w1Sv_VVV9f_&hhKmHg!h27z4O>94)UP7JE zOR4dS9-zjvKBJGFpU`pSAAAVl?Zy4rF1~-G_LSz40Q{H`dTlHB!vTErXBlaH1~01*k0;Q%5{!=}L4tyHMR9F#yn~zzPK>UvfaPElroms< z{2i~7F@MMJvWB$TLuoRSbI;8B`12lJ*lV(B|8M~vc^>amH{v(}?n^()<*GoADFA?u z!68SSQ+fMb8(ncFwCO z=f(%`W{#d5f8b7@VmP}$oX}|?9Qeus5;YC%5VxPav3XZsm2ub=FinhqK%fV$+Lls_ zM!@`e8ayXnf#ln+03C6h?;pL$;h0UuyXE8+`Kn8}3lJNjJ^<)0?M3Bk&#zZB_u5d&3P-3t!uA~T;z76CF|QvU&*dPwS73@S; zyfL225f?oG?rQepF3V?ehp)3S2j}4^Jt@M`qXXy)PAdvf?(p{8yTjv}1H$gDI_zuU9`Dc7`Dn7_nR#Y_?wEUY%3kKM8EKK0>mXKs1+X42 z0-aXPO>8CuIbPl4wXGPY>U0rSRNv71sVoffT;{f#$%Qio2VylqSCB10SYQ$$Xf5)m8b2`u>FVe%-^mp$99k&^I)qREU zIQJkLuHM%)K7us&fANyG=Uf2bpl2_RBjOU&~eF$sACaL7t7K&=9ub>Rs( zQ?BVZAf-n~V{zG&R~8#y*Cx}?3Fzoeew@a{D(n@VbqL5EJ+`N98+*ek9fK6lpFeoI zXIxF+@wt!8Shpzbj0U~g=={bD8|JmhM85GbjpMlibbQ46={5CDbylkpv|WJC6(smwmzbR68QUmJC8E-^;h4%q<&PeW=#i(EMuravbR^yMdQXcu zHVbd9_q8$2(H6JS?MfD75zkFFZQQ#HF5y%BKr?>NY6nFRBkGKf>BN= z8^FR>#gE?GyhkT`^wb(Z=k4eXw)r^t@ER~?Z*ZKW!#4TG;d4G6jp>cocL#<^@Nb8s z7tiYK86EU`{CM{APM+S2`r!1PPLyLSPiW?|2vy?_2f5rsr>j7%+v{hu+vyS}9O}C8 z9IpU$`nKz_6FEm$!Hs9sU3+GZPk^ThR@x+NCHa+3Lu2dEvpOH{b6RNN)h)J*JgsBR zAJ#nF?Rp+1`A=PNb>ygkuJ!1Aclp2tD)FR~O3(`ot`Kt{Nz-~IO7%|fe9Ou(wiEDj zbt{f@*Syp^;C*zUBar}_mM;0&1axF~4oJvC!-byX*x(1SFE4?v0wD4^%*~;1U5p@o?hUsqpnL9F zPM6hc>+?IeeS(|&b?@$&0Ppw-Em&w;gYT*Ka#g_ju@930YL6?ggd>ikQ%`cDh(J!m z)VDMj$17Y6UF%$GQD1@^N?`RqK&KvEKD>Oe$hCIeho@0_M7NJjgk33uAR2yimYw^s zO|t@e(D<=PabCk&zBS8=-8h`R5zwK#VFlM2lD`S)$V;cUn~r$P3w-hf&EXzzGVda!MtqCr2eV&{5fAgRf{6=$NN-{JvcQ9Yat3 zibQ+v zJvY=_4KUw+u;7^wL2iv-Xq$Mc)XjuGpvzx#PB=cRVNJY08>_Ro4s?;lmd^mNmLKv0 zDB(5&5`6UJ@NXB;vDuUrNQSAW&#Z=%I=PmO)LD-D3l4bc7rrXHI$iNsJwVLB21u1( z_R{te45brs{3a7|&9TK?o`Db^z!<*g=&%cr<%+8BAN>~qI`!!86wr<3tUqp=$~dF% z3?Dfn&^xjh$C$_V8ufnXLsjQ7{QAtopyXA4<~a@fT~`E$4RDsh=h*{vY<_0=O1uJ1 z4IA77v^sJHj-ilh2>>ep-7Jzw#xHP#WjqmhB=@WFP6Wdhdg4`{$XXE8|o@ zbo*E)JdgJj;kAfm>|56Ymg{r{JRg{}(HL2xM@J>Y2lOsq*6au(?KcIW^&XwXB6G-F zb`98U&%h_&_A^hTuIaYrIO#;V>7v)u31$M?IW02KU|4|Ro*1^mPH@u@vH=~9hYAni z0Y(fn9iA*kz?0lAJvzyFF(PSccWsVN@(F@%=-R;!JN(8L$~{4Q@r7NTcyVWejxw{0 z;jDUhr_Star88G(PiR|;S7%-BA+J1AhFrqWU_6h`Z;YxJ;SgvFc-Uw^Sg7#_k#u=( zCp3JW1D(MQ`3icIFwEvEy$zXy!*gO2_SEIDZpVYI9#%VH9aa8->W#QiqB96;Jd1A7ar1v zykz-n@BDYa86$3R>Hs9WxV3l#D|B#G*LH0yWU$@j%;yHMiyUu;kZ*>lr*tIu3H9O_ zo>SrZxd*qSzvwpuIzT5t3;+y(OCY-Mz7gmw)8*8F*qXwI;m&J-hD_QKuZ3%mj{9NA z`{MKW#n9Ee86ok(8r;WfeT zM<8`fYa$_R8|?Kvt><_8belfuXqC?pogg)OVUGaVfDKukpFLmKVm0+U-)6BvZ#8IM zj_0$0T$a=AxUPN+(4F57bR^;m(CWmbA~_RLxm;;XMAx*oHwt3`Yo;SR3%6+3?R+r+ z4POFrxRPyEkAm%gakndpD?30h10Udgl8}mKV!c>uQJksMf;E@mg=ykG^c>Ltf z@S)~)0B0I3GE{JWUUZoE=&0cE+#?Hs1H^6ubTkgHoOZyIP9M&SA2y>WSU^tn)Xg5B z8<@)g9f0mlO@@CGK<9F%Tpwf|0USon0D$gj%R^6@+(QfRaInr!2T z8J=hRvHj_^dgBHc;keR(d4tRnZ>_7C9wL< z`w4UgfoSZ-wk!KKKSW)*h)^+_zK5pPjaq|<1yVeuO|#$a*tY#s3lYNfkTdmKa;TndafKP ztH)nu3p4B@)ZCg~^e|ZY3U|`PZq9cxtYLU@NpBS}Ck8NK0|1JApwoT^LDSdb2LL+T zvd;=J!)aDhkFnoMKzBxSN!l@y;HKgTfKzsE1a$P|0HE5Vi{Yz)ZWUlu08Wi1K@Oie zIzVj%vJt=xcFfbYuw)H%qYbUTiq8AAG*0df*L!qt>}am;?hjov7wcD^G4IZLI|_>x zY)`qQ-n1(Vx8VEm^LD)ZHr?t+r?>leoCPERmHA=QR7|5&q{lb(;ws>=0sHfx+ZlfU z_taC?>s!pnE$+^hKmZ+cbOr_|#e?+2gx14;_!ewD_ukfJ$GM)SI@+>bGaRoz2Ic1I zo92AxJdVD}3}Z!|DWWgdtpi=B)x<2O@f9+kh^>tpOc!0=f=n0yS(T0FJz%H69=ufEe)vI(m+^eCg4>#LyL> zd*R*zI`YVK=T$kT3Eq8gZ}{zRd992sD&_s@xEU^EJ&NZVgdj)#^0TgT$xS`xTNyd{ zxH0oOfNcObWIA7A{{drf-0%N*SFdvI>LvWF8@9Ljx4#5dzxr)1MpU5Vf#d9FB(C{~ zQv4<&nfx2pWm_QlMXuT#Wt+`rtkXq?RO10kVPvyT$Vv%t;~NITS;Wr1i zdE9bypFCI=UIHAiF0FCu}YJv;Z_4g_>+D98gulxhHw;z28ObnpYL><37- zp=$v;%U~zLY<*s?4u5$S3?1j0d&Hsp(u?=$c&2*~=rV^qcdf{)H-DD>cOUEzKmUc! zz|$KR%-{MsA{*fdDBS!`Z};o_Hr+mTE9MBb`pT&?;R?7Bub}3zefX3&U%vnLw|0gf z|7bT3E~U&DcV|mr^-F+GixK=~#MOt~!6jLpXBmT>8K#GwnXS&-Qkfb#YG18@VQZ$- zZEZ<|EeU|oMQsUS8wZt>{c0>?plh&-`%@g+L4E;r0x^cE^x!znf`x{Ynxtlb+-3FR zsMPTz<8lTK;nf&!GG2tRzjG6yi{4iQH-i{{gf(=* zmLt%Scb79j_cB1I6~TM0M@K%ZqaHJjLGSJtzsNZ}zEbBuK2eq0^G472=;vQ;nPwSm zdz%S$09Qe*hPUzf1m!oszB~Nz`#K=ub2~Br8HSe29W8;?FMo4&|D1r%y`D%dYJ)04 zNje{T`zt28AASnmV0cQUvK`x<`-LB@86UROoBLh0>tw+BWeCGimZPu%yBN+2tdf6g zKEqLZip;$oIkFnw(>DZ;3-BUOE&W>sI^+QxhBSR2o#K>q^ukJy4$!Vh^iS>^zN=MM=H-|VUEG~7fz>Yn zI-U4RZwE(R2uCGw@SwxfiH?=KxiIKT1Qq)=?gUM3C2&QLt^r+uWCOSWzyLYqJhzwc+xm7W6OLti`6cz} z4hhhack&I`{X$^(lb^6)u@`^nsAKRfkwdPcSE3SLVO_l9nH=C z;UDbAreL1);_glftbYFM26Sq$Q3+bQ9+vh8-I!xMA3Wlw+3&Pgp&akgR%xL3G&?ix zt=S6UF|AN-LUY*~Pr20NuQ6{}Fr+=B;bv?{Xv0}_HvIzNP7N9g7=+FazJZbG=E#QT%0IJpl>pd~Y)q{)02*m-vJb5^EGOlpY6V$?1 z*c^e5dAfN6M3v>jp1*8Tz`{blO`pi^`Jfq|1fA|CKnfummd{;FEmh0Upfz{7{l|V<&#BL4bqJFFo zPmzxbA%0dN2x-L?rKy@6{3>fX+p2NX(qc=iMUZnZuF7rGH=W@N;Kgv3;S4|i^4Fnz zRA$Mr6aJXy;6Bts1HHJ@>c!E!>-ks0cM|9TB|wLb!kd6DdSn2VVWUso z3Ze9?3}Je8S$A?|0L|rvTF15?$Uj`Kys|U=yML1_t(-!#xVu*Z0d&kc(717as1DQz zKH*z71mL8u?|HSg=TehGhfLZ&LbP0p2-n@TCFp zi$x4AUhrNcy|~}Juf+!V0NjSW-6^2ME}#`a2T*P3(E(KB&=r77pflL5&(SqN#1^_? z7rz1>Y1VSi5AMA35-*$cg?AmiDR4bZj`w5#+h1NEe)-G%5}EVvdf?}3-+##{1=GS8 z0iPc|`T#qmvPGxpS}a&-mkfQO!U?} z2gkeS<#n%leiPVfn#xYwwAVv zj&%V#<*O$+0A2aD)F-m=wgMeTw`Oy#)!i-?Q*MntRoe)n3zTv9*xR7tnHey3{Z!1v za1xSXq-^i$9UhcZ5Etjuh!l+N?9%3XU~>Q1cD#EOQ&&b-DTpqWEi#xY2;#hHXce7N zIdTblV}W&@N>L#d`7lMc%=+e`q%1d|n{>HpR7&+M$543%WijuX>l=A-+HiOm$hCTE zucmCol>G(h@YZ5Y4jLb=kLd=0&d8$!%miHQ&;^hMUp+p1Wdfs!HwR!DwCb*F?9@j2 zP!`^zb@AwEvj@8JtLYy;yfA#EX%`32x6z|xJ$7-oY-Wq4{+o3Sbhd%DwDyB-0UHD8 z;vp%pV`czv+hFXRoIh63(z8;zRP!XGd=JP009605h~Y%002M$Nkl{ k912PFxTxqrwXFu5^ zxo5cLmZo2SU%4JwytXfiPCF@fI6Khw=gus}ZVb#$*d8R~Du@p#;!X48!MAKfGba2d zzB1J^$XcG~=VjwR`)nbi8-{=X<<@ZY#1$es{Qw}9^AR2P>&`=TL1-pAkshQ4@tNqF zy^tV;*FVCkjt%*sGts$kOmvSwCZfAvL?_H{N~ce64Zr#Ax#5-9HV5+k7%0aty3ku! zWb}i+q>8V(gy7De-4fven5KnT4x)Sh*^S}nKi{K27>nVC8igvU%EP7iB#E9-! zzZ!=B5YfH)7Z6=EMqeDl9M~5$q64>r=+ND4Ts*?96tV7 zEtf71i{LAOF&xYZ%&p5WHM8@&(gYWMd{`sHw40qnq{+R5R z{`;b|bMfllPq%{TJazRBwq#4$nciaSyQ1yvfbC#w_6Dd2i}RMfnprGYS8?ad+iuy~ zFg*xugXsS2zpf6^1<3@tF?D5%n#7ZY2eCjxCO-(T>6)C9*f%3aP7EBA34Xd zh>T2=)-hm!@Q@@(j`Y(X?-~B}UzoPG5nj`-zpqpeEM9s2?ACklZT1nJcNh(DKKExT zraRaTtvh|Li&zObAFg4?I-;Bb?Q2Y``vF(=#!lrj$=TkC?pJ@=)YWaFhKm`|feo^e z41$wlkBDa^zcw0UTDcw3;cv3L4$?{@>qKX}i7tsVNH6jp(b1MxPwz|Z{Od$?4?}d1 z?=GT)Y>vLQHN5olratF}4?omsOB?2W9#V=)89Q8EXFQwV-`1)hO3l{Zu}6xkkr-9G zR!faqQK70fu~+RKtF}&sX*QDa{I zN?YnFAgneHI6QwyY`PB_9=RQogGemE0D)6mZ_a*?h3GyoL-!9RS1pJ8qwUvCzm3LSdtddWLXM3Ip1iqVekY31ari zPyAn=1BbupI-Fm{^uTN8vEyO{42JWYo`aL(71rMS_Hf%asOH&5_yhDUt6!yFUhvl- z-2U;+{_|*xNT?_mRm+eMR;`FPxls6IA+Bw8J)!5E8lk1qb9xkTkt-vT*E|&oza7Bd ztd~@*SjDcM?awp7b4FQRV)vyHG0@kIl8*(Y3MD1K`TiDM;iqu|h5SmfvKWoKLFW%Q z@0*ayU9E%_R!wicel+EV8k4E^yqr{q-JG&dB9#2i9E#6m=H`m+1tr>zkU4HCSSeZV z4VPc#V%1eN_Q|RuOR4R~jFb1lieH_~={N(@}Zn5yJqa6gs`G zIS%0dbM^-|OOZQUp zJ6%pXi&ftY4dK{hKMU#^Gxx)Yjqk@FtBb2azyu(ZzPaqBT<4|S)2dSz%oG4p9LMtGBy_e&L}INB=HM~ zT92#dkO?9D^BuzZPnBjc^r>}TJ}`lKZ(~h1Shzj6WQTg10*w|g%(t$0-&-H3__!44 zDiQ>^N2eoy2ixQ~e0*&-M|&!{OSi=%8TiW$Yy~b{+|U~XU8R*(#4o(iZg54MFZ}UF z)wGv*BY+Tc!Ml^qy-4_%)}J#Q>wQOQRj6&<(0Fbg#{L>ICneKH;rpuiwM*wK86POo; zHl?Bm?RML)7nN0Z6Prfw`LCq106>(?W!E3H3HQJ7C_5i)!^3Ym8u1C!YPW;B2<_4P zV@-{3w7xV(-%2KN!1~1s>4tbq5z+_>h-H-Hs<)ukMVE8eX>?kzY_Y8GF8ZVAZYE=w z%y~I2MZ;oq%TlyltFQacgx&Twe;;!wchQ4&{oRFaU2;5yhU4^|&5h!wm__bWNl)ZJ zeq+=uP_8PRbvWSuT=?eo=2oRRy23<-_J>5JU)j3;aSS$yNJ*Kc<1_&gsntClF?QHn zA^><}moU)|b2f?b^c?xOuEz~Jqkj=Mey}ax3f*(P`1kjhy=l(&@T1)th|r|_fi*HX zWS34rry=BtQ)GhtFDs<=>+m*nBJI`2x@@@+lhD?Kw%8R$a!1l7UrP~RQ-it>s=tZI zUz-%EFX2bawTV$Nue5fx5Vi)}mAN#xkO%3tUBvf#>!&h)TG%-@tXTMxF>lm8>1;4b zl1Pc!-~=HbQLVf}hJ5GkQ+?bzWF#E32xy?QADB`xA6y%kD_EEw&dt;`6=44ly)|eT zjR1grPgYvKmpWdnCQ>r%PV=<8;SS>k3%9?&IeDW$OKb9*wLI=qWD?LkwS*-t;; z?%HmtGV#f3y&fjA33}qxs0kl;LK~zer1!cpzq5V_K-P$kHk{mRqJ%q$F8OfN>`^=G z`&BM?S_VxokBq6!cJ9?rTQ7nm_D@ROFA=6#=9j7OKb!v1hRyQt2nxuuC*ercP2Kl> z)h%h_pnWXZA=oX28wJ^x#%$1`(KK2o_ezRItV&t=P&J}QMWU?Q62bpQd!D{jfn5;U zYP1U{G%9Ts8Fto?j>bXtc8le|gQROaUVXn!0$=m(!eA|DW-g-#2&6| z5tD7JRlNk}Y-h1>Mn7rI5Tyw;*79Q<6d&5Q&AU!6tTKqgO2*S;n<>?au`t0L^2rQ5-w>ZlvMR; zUhr_1Pp)3XpPm|7;#WT4t=^))QSr09Q4iIG=o7m=^Ds`0!AgFfeM8hY9(Ot6_S)$o zga|D!x;!`WGxk65ioE4zH`VUErYU=Uga|-(hh7PU_MpH3P;?ze)k8|Ki7r+~W!r_x ze}6MGyB2S~SoWbZ2SRb$PBn7J6p<|q>LPl#7s8d$U3_Hle2Na7Tr7zT!E8>$c2G~S_YYjZ>(nzNQ$*|eqIgfM)~24H^aDr{ z$xdL;!QFRCQ!0e%!j@@ahgB2~KQFEqAhM^ficVwpYGpGQ=Z#NUH1pO}&cKwIDzQ?q zJ8?>?2`{>XPu?sPD_&&AM|$uEmF!ij)-vh4R= zE6wJ2qe@ViiguHN%r~-xQ+T)>G(kJ@8 z3`IX&m%L<<`5yk@rk@92#-8L|w<^OpK*MN}Aj9NB;|XQk)8~R`Y*JSJuCM0Wz9c+6KSX>AS|5tWK6M8;~`02lz^dUW-R@I#19>sU_F!Kelinta5)i5v_Tiw z=xT!f)cAM)n}hnP+cRWSC4&Iburom#6yxBlCh?O=i6o}|F!l*1tRceB>Hc;v&P<`a zePkLxdg<;C7t0(Dc=_|bYXtmjtiuGUsO@j`mq(g=zR>0JE4@swr*6(Wa(9QH_a;k& zWol%s5&I1sCzDD^7iSbT4);+JTI0o=*|XGuC_GRGtaCH|Sj`rofVJ|3*0-`>wOV;dq>WJS~5jZGM}0VqUEF7wjVbs(dUhY8y>buZa`1C9zGt+ z&qA?@r9@lKx}WG74E*_Fop{_6adBwF_!&PsOiJY(b6_Ijt2EB%3ly(my$C)kxym^1 zjGqq-j@=#p{Kz^n`u630O>_C1dZehrf~j*fj=PVWHQ1?J!2Ojsm9f~r ziiEJXHT^B$?4jH}=KNvGIET2@~UL+@5V)Je}`UGrv(hL9JI_a>SE{K8_UpxaU-M^6b~!P zF(O^^lHWWKYu)hO&e+8^+8EvZZOCz8hB(8?W(JuPuV})$+IQIw)b5Iy-v?f5x%HaP&pVHbM%w>QEPs4R^{mz)QVZV$KU!ORdMx;{3A`e*YDkXJ{Neaak zG@hgcx9|08dV$I?qP^*r9fB6`x1V)6zMMEqlPW)6nf5DiTBOT+1ni*p>)`p~!en3< zt59sDJ@fhlybo4v$e?pu{gO^aWe(ZlIq!&OWg4EAIh=VNhhhzL+8`$xyQrE;G<27BkU zN6DOD-FOkNNSj|fA#W}MN(%WkH}Q;_Gc_NSGmW3r$R@cmPo%v@Nd1|L*;3`cF;39y zG2sE+^9C(*7`z(U)Sboi&*byPzb8&kzt!)=rTtvI7d)oYQ|Il zmI~w=Ao7D1KewvD5U?LHH+EjcAZ=56bUOCiUbof5XudBL8LE{_hoi$w6g*C~=V*Y} z@Rr&~A6R6YMb-8Ym5lwSFNH*&s{$yW(80iBy*ydGb2=`4E@*k`e%z#+A-zHzA5 zWC1;F7Aap-4kMe|MUd8$v&ylno#~&2 zl%M7cr&QS16v?8s=uNc4qQ3@eXYQ$s>b|Jjix0`y_A zjX!q(CWSE^qjxY|FpbC=igK)((|Y0Hpwf=0u+VIr|5(wGb@fSoRW)+z_!%L926-Px zfl!7`KSay($~%C*3ZO476ytTMQi&h0?ke_00F~Xajn@xe+t#o(v!TqgsSqlz=zz-& zStX0jFqr>)k8P0^Ux#*Fk(0a}FU!FFipoM}X&lX+%g_`df54Giv*XK7FIKi$!Vm6# zxs|B5>uowLEq_*aT=3-2$B za<90^?27H1Uk$Xa@C)wjwH=NMOAFNcDCb=7+n-M=ceRD!w*Rg2CKetC09}3Gi(epa zwQIH7xkMc=1Wd)&?7kS(LzWs3#}SS#{5>l5wpX~CmLJ-rRnvgo3)MF3M+}FAgcM6g ztYyu|EORUJm^A+rrZR;j2E}S8^7!v{>CL%wC5xWMTafPz-A0vrImK`|G+6h_Pg@Y|a$4ZR7WjzdwOVjHPMfVQ)5$2>L_n63#Ujw8XpD>REiUz=E**^6Y?f zs}}Q`i;XHJm`L7GekPf_V@3Z6agg82dk-3vSC)~PRZi~*2kp}(k*c$EP3&c1N7dEC z?uY2xEp6}jL=*th!(f3ftI%HWyhKZZEz%@x&ClLe-6 zit!mN5pjy*k}GLVT$A}>e^>O1l4-i%HeW^iIP6RSa_J&N_T_Ns9@N~U%l>yr-6%XZ zU<2DSw*mwp76x1+{q~!Gj$F`=wYmCEHn&v{Au6W&7Dp^Xgr~*`7d}Mna|toDbt+Cr zXcAgd{pQc>q6y*lLckYf352|2I&BSaqB32ePjN=0K zt52lQ@R(n~9u}6pRER=mSzU`%Tr`7*S!bL6dC?z3{XLa3}86LXw||Sj^-Isx;|s>abq+ey@*b>}sgjz?x}V zODzo^Qd}>MbdW90q{o;^4G@h4ab?Wo~!d&hBcmX5K&;t3Va^oYXdRc?4y?m(|OL-aPCX3Lm!x17NO~`XvbWJ^X zYJUJ(UbH}!nq6505Qnd~=iyjThs&g%q_5`#a>c&zw#@Sl#fH4+vE(0anq@rzw~Hc+ z^^sMEo57q;I<;i!rTrQwFYJzO2mh6|eYC4V#?~+oNNJTgjsfQ~0Zk0e^*lwTM|vLG z4Q1W<>MqRy)(ml_dV8RNKj+xz*vjth>{%7h4{{;%gbS(f7w? z+6{)eW-72n_WINM@rB%Hn=Ko|ZS&fi%=33;+fOpdL!>86tE}RZBvaLE?p6PNRP38L z#a-Q+Q?0#-30--f9?esjw!C<~Oy{NM&Ou_DtukI*A@^0+Iq0H1$H1VTJ#oi?i zweiex7TcwyN|7>(RmkVQX~{qpDun6yM$IyI7GJiiXtkxgOW79~5?S8HeK<;MYU|-z zdR1s6-(M9aPmpa7yJpG^(TV?gNzBxjxz-tnoRnQ-Igzz*hyrLdBKYg zX9s3g-;1%DZX2@7EV*)}F7eh&Un}{QjPZR5p+wkP!V25^qA4+6`9>ml-bO z!@uknKn$&x^2F>!s^TXYbcoP4Rx~}u&*bKxw;N*$T=!* z7FC^_HUn1nlRic?K+U+%_)TLS!6;>?H7WO-K5^yZ&#c#3JgQ5T;Xbl=pDWwP8CtFn zt}SGS{SI^5FJRFsYr=VeitR?k$;vm#P+M7m@R^8z-$hgS{1Sj&?@Y@$3R-UUu$Hc% z_~+aUa&&9CE6gAo8|1{6!B%QcvbCNj9t!QmZHP@TX2|6)Sr=8*K*4G9! zg!(^wN7gRxR`3%mZ~z$Ey9yYqst<@vs@Y{6qZ=x&{Usj?RzEt4Gg3 zmAo_PS3hCG%h@W_f1lpK0!jS3;Fd)>yds8Xc~^dKo8ttJ8>pEhfG4VAv+cb)8k1Bw zQX=V2-Fvo~jFivK^nlJ3`MVn)ha7tHFwieZ%`|59Tby_Nn`b*$3OUG>qK5@B%}>Gh z?4R5iZ8N#{U@THOrNDZ1x`=Yq(NVu{Nj?ItG0siNscHKS!?o?*{J6TF-X)q9nG=eG zs!?bj@JOEQ-Q~-p`ir>xahwz~+OXp^XJlV*a!UKg;jE_o5PQPA*&yO-V`9!D>s;Pr zP^v595De9WI0WL z2@iXY62-X+pE!S#23gOg9{cZDxz!z?G+npuL{XW}=rRP4T}p)&3eJ+q?!H=YeA?AU zoH1jgip&@JXsb|s$>HYnGnC&xF#o3sP_*`{whmIUgi6+VQ+@a#egG8pQa@(($M?44 z7kJ~o6ib4ix>X7L*67x2Or+sEA6wglHh7;)kJty02$4A&e3tTe*EPCnj@TEH(X?oe zrd;%N{5F)oo{FXd?UXrWOZ1BFUU<_g`7@M3t-%D4_azRwQ5+{WFw&uY0r332wKB{g z0vj|6Ouh0Z;J!vwpqBQxY&CQ38%c~ zZ3{WMGC6pOAb7#5yeUIZy;)}l1Ko#q!L_S+*hpws9jyYFrBxe~qJsno%sf8WMw_~*+s zz>R!J`f?JV-umi_K46VS#&#`UuaUQpH<&oD{ozVp67p7iye>sr>b@*2MsFwN&a{3yQc@i2-i)<)Y4u`IDw%5 z>CR7Mm6JD3+DEaQi~mr$i_3-M`U0A6xke?B$mm5o@K;BupwCVBX|1Tf+w6;Xe!i5} zR00oU+G>)XXxqhL&1?WD4;-emW!9#a>vV{7+2?&>8X(w_2$E^c|ebj+}QN&(mtfjd-m~ng3Q^%GVjf; zR~z{eb#1CU4>MlDrivcBc5hu#0k;3~4txLOg5!q(gSp=C!!%09MLvlxP)Rtd)LVHf z)^%)-m>^*>#l~;qR*S`DKgfAP|L)~%T6G_#YgZfN3~gp+RA+9sp8MJ5+0imdDxuS6`E>X>TE ze=Ex(#>z)BZqKFs*NCq@W3ZT9Ll^E2v~LjsZ=_JvJV|k55&Fn;AR0ntMHG&k>2+;p z04u4&5s>JYdC>`3!brlE0*DU_qQ5-uhHhZv^V>9~#jAoqGh2fzf}N7q{mg;85n#pIN02UF55#gPB=vVy3_d zFraVW+=f#F((=13#{Px?}sngh-!=|4GU0%rN^>hiL>| zUxrKkcB>ZF(Nnp(EOp=Y`@5dA*Hm>bvm*O>9n*)Y|DSB|Wf#8XJQ+BsB4TAe<`AGh zAreH=BVJ>#P9HRLnY;Ubp1S5zCnIh(rt7byb7kyQzqLOLS=MQdAH9SxfCvID*Vmyp zrHd+W8zyw19Klgmajlzev#f2^bay!c7`i)s0N6pGd20MM;V6yVQQQe`HJE-YNP4{( z-|}E@*)!<(>1qL(z~O~&K?9*uDYee-#LNT*r065p@uc3?_U6{HERN$eX3{4 z25uhRdK@V6L|Z@^0(XVBi-*cBO+}RE%Ti_q2>pMh#7$Xi!p}Rl1j$XOHhcI?6-JAmH(eg06cyLfSH;bRGtE78* zf~gPx?q|o((iuQd)3f;ouvQpz4eZmS^hTwQE7E7byab_dBK}6_f|24{zR?P?lmL`% z8kQ}ejbGV56l?KKSt_C|!DL-ee5I!tcuQZC&Z!v^Lsd22F=Q|2uTL*=k#zB{9^X%Z zvq;?2+v#S;(sjw5k9D(^mA5ba>v{s`jxPFE*v)iE!{SF(Peg_PBb#}Owy((#)mz^Y z>DgxP;E!+aM{3JmU+{kkVN3C|b{W{0yfFX$bT9KV5BhCN4L7W`a_B*)#3{W+Pr6yeEKr9*DEy!OPa=`7G)x+LftWedAH(W=ozfLyXQ5deq=#O+Y zvD`4@BXlSz((9Cv>!#VN%Uo04$Uy@T)9V>po`SSOi~jX9%`2`w%qKw|q%h!CO@?#W zZ)xGbi;{JoU2DFMK4%~?`j#_tzbXJZqztpO)}&xKnDdQ0_Q)7!nKS|yBCTQ7<5Cqx z6esVWD-(H`#SxnE-Ne7+ujS>~aXD2(Xu>zlfyO$a$D6Tycycoakm=jX3=~U%<0ekV zw}ldX8NmO}+y7ehM;ck75RuxYq&cU3@Ym55d@&kQfwcsB@y+1TyoITrSqUQHNIT9( znk2q23nJ_fo0WdJ7fmT0ercIMn28D(6-`_d!L}HG4b8jICAZHdPp|boo6Wm*n`Z6M zmi(*FcA(a8f5zyspaP%u8iReQ~+yxLPM*c*ycD zi9b~{K{eUz@6e~#7jO~rz1*CS7#8)+j$R_VV5*HKMU?<3sW01)E?T#g0rO3T*OHF$ zLxJkQ`He5E1?T#ld!L@>YxjZ-{a!V%rfCjLoRTQOU=N8OnqOTFJ1cu`hOL1D&fCnq zJ0TaEr*QByyY(%}$3LAp`G_P={+$<9ym#&M*FGAuP7 zdnKRuJCuqUP|e2Z%f?J3XtiE&e&UKQs*TLGV?DZBfUW7bOs|kpE+D*Vc@@tmo9BNH zY#X;orp;Q{kMxh6M-N%QEVz87=y9XyGd`^na|W|(xets;NWfzT$Eiq>YJyltVbx^m z{J#^bko)7#^YKi$$Zc|o&caFN$JM^7H*6XDQt32ZhbG9nwm$t&;KZGoM?R%*L}W`m z?7r!*s4t4x5&8A3XuJ-`pXC@BZxeqFQM`lhf&T*~FGz~5Uc~PLK&CMlvkt6iG`kk; zzvI*MQ-jQ&9>*^j44Z zkw6$?2Db5(H5E}*F8#=(>xpsAXwvy3>63Vt#*43oMGQTM7K|2Dj(kmVmc_RU{${DD zJdD=*>*+F*s^wuakJ1PpTC@ckpIuB)hr)hHm-TIYZ4x5x`;_PNAs@BIP++2~SacWM z$dB&vG1MiuNhg(3=_3fubev;oyPA}*;>00r={yNH8%__LbU&l%)T&)~Td!W8qyCCH zl|D6}{AKHgJtT&#G0jc!d6}$R6tpk_ll5<|?K9=SgVWHD z#{_acwe9(%NS;;danNRZb{^nExCR$hZ3&?9D~(T&mR*Qg<=={=IvED}bi7%LBktbO z;{xr7Z{I62`z;7rwPhNHy-swLSv-ot`j?yC*#-yz4Nf(1iqh$hqPJ*nmS%<&`gW;l8*0e$sc|Jx4qef zDmHkC++3d8#$hE#rDG9qG$Bg93pDOYK+c5%SI6_Mz(~ln*lB_aTMN=M#QujikLkO? zeSvc&A&+)grO2=JWpCyOpLzWatzj#xLX?>K1MCf^*;i7Q0sZoRtdbO7Ewno2^4mI- zIkQH6?_0+_CEfk|G8TLhPIf7af5H3y0@V-7u7djhuK739aqDa!)8PxA=5=X(xNm+h z{uFBg2E3@Br=fUe5>Ur;Xd-WwBb*mI{&8#r2WJdY0_Ye4y7T*5C3N z4Ta1hJeYxibno`I`y*-uc_CeB$*;gQN=({xF0afF8eA0HIo6-xcAFm{ZK||UB_Lt4 zQ@)L+Svc=vKb4tzb_Qe*vodjZqb0mqZa*)-4YD&Ul1U{Vh|6hFT)QI z=*xvQ9%NWBp_02%cIOscJALeo4LxD@++T*%s~Cr#b>>)fKspyn+~%4R=RIx-$dtWsnrtnN{^q)le`>(*dd&A>yf(CdnS>{=%&qU)O5@eG%+ zNK$uMN)lrU9lD`FTmJ7D{lmZxmHP2^Qc21GB*`~zO4VY{5_p6L3LQcWg(bG*a5nU| z7{ydSD(0)e#j#=anh%guWZ};PoU3cGZ!J3-AxZ-+(sKV6+%v56k!Sb!Cxagzjc)ae zt2B~iA$~E~Gc%UF{3jH&ed;{o&mR^7N3e;DO+vCApy z=oNpN*Wsxd7bZ;3Gu+5@R?dJ3I65^`G+?X3mOp`>`2Va{buVdKormRa(8_0Z^!X&|`|T=!u(haZQ#fbw_Y zKYJgLBZ`I3oi$t>)Vs4Uk?(KwYKcuX2$kqjFrx2^vR%|}t%k_AiH!cM#tc0K_5#Ww7T0z4om$&)gDI>pP>WNe5 zx2J41ko}+TPA5T8=?#zCb=&5Ag?lK&as-Cx%-KU*IZy`YrVM$<57vVt`Jdw?ko74t z(Zcd++m(Yo?`;8EdYoNW;b7f_RC`O$?R;AXaf&CDIsMZPu)~s$a)KO(vpP4B0QhBG zz~Qv}sdFAfoGSg|uWg#ll;&9eN6@Kq->2 zMOs9E(S7ut=~haS!emdy%>)MB8lo&)c+{MiL%mCN-X8mD&sk>`m$(R3nvuuw&&34ZK9Qm&dZauTfZV zn%V!htS3gCi>1@DZW!$KVbC?~*y)W8+&4bUC52bha`-^eWwKY~T){MkDDV0(b(e`5 zQP1B)HqByv@~hNqcxew4pk=TkSyY5zH{Ok}HQ3hT{3K)eWf57ryW zXDo7_<&0YWmJCq&X%%j0@y4)v>Vb#s>dNX&I^nqe({CmQ)2TC)?UYP^*goH|s@O$Q z16o!YYmNmbE}r^)BAyKTz)2?fsDT~1D!oAHWy`J2c!<}j81^Gtl15E$N$ii==`f-H zU)Ow^ynAd<&%Y`hW3t{iy$9b!-*W9>7o=zf(uZAqC=9Is2UXJg;n$zjJ3Tii5T~5{@ol?7gR{KTF@A#VMN8x{tUN&4YddQ=-=R0I&kuX& zoDL^7Wk8w5zS0{4TSO*_EcmA;)ZA$mf#EG^qJ#Vwd+ZMpz89A%beL3gtPo&)dpm7{ z;8D2F9NmQv&fsH%4fpH+cn_u?fdg|U&0L^22Q=D~Jkq5W^y%kl8>~h#>D)C%G&my!`StVyu z3jTFJwPRVK{Ma^b`Zr>5jf1~-vG&T{4m2|&8nOQs_whGOT^=T4m8KPSd)ZMVxLyes z3-YlT^^$D;wBPI#wA|>1naUoxuc?U_ompR@5VOT*NCs~Z7Vf-7UvAC6Q98w<^ zQK`}aQB#Ih9ZXAsWHL(4D&90i_TRFiykC7UCY-s9AWMY)=oLGi2a%r#AM8BYN^H?i z0yBELFC?p;Fw$~)e!Bzrjr7FX^tHt>Q#7+>c*q-G|2T1vZ3~rtoU*}tuoXwXc|gWI z78oJ!>Au__W~4R(L$ZEtI;Pz9Ey_*>PP{PYyE0_F9p_Po z5z=-o;v2xR4x^J3&5BMYY8w_^KQr=Viwp)1>nQKOm~C~(ZOX(}FZu5LY?Pz~gpGL} zb``xV*(uyr*uf8~#2?OaocU|s&Tm6PTLpIkr)R@mj2e;<(gkHPWA7Y$DhHc2Qyenl zgsx#ek+TizpqKdu%A_~(y^>nfj4a=~!Erf>w>)h+W8%oT%mcu4m+%V>Sju9dV*H+a~4*REOI;D>rc=Wk{HFl<|&XH z^#ZtiGD%k)@DRGc7ah-6pX80^sO<^I$Kv5mt|KaDLUs@Fh0u8K>EEj`Z_-|J`0Vr zqOntP5$tu8&u=7u8MP*DGQq{jsZ2v)XtzRM zKz-7zQF=(ER9R8+OXaTEQv=59s8f#xxuMBffa7*YHHYl*=3zKhqEp_JB=>#_)VF0j zi76|)nm;Ax$98}vB4IW*pM^#3g8c~QVoHd!!wX$q<;tAT9gYjN1$)k=ePL>ZnPdVl zx$m^$Ils(xOFjEUq|RLUw=WyFZJ7Wl)>p9m7LoEuAnN?s&v~@=46f4X3bCZf<|Khj zjm{a+zWX3UhfftHtm((`3#)D#&W2J*wN~u&YHW6345Jm3vFbSZi7H!J38yiz&zSSJ zyv$_f%tv*p$VRt?zrP}4&pyq!w}^Yq#@!}A{J6TLKva-A z(nojlK*OMcC!bS1w2V_3My+#d+W9`~YnifBH-{eNNFKKIf%LwX8#N`Lx}Aw1SZ_v1 zc6365a~mnhc`cCd9IS>HM@f#V%3iF#B2_9@@#~l&6tqnsW1iZ+QP+S8(q3-wd0%V< z8*v|1e7wl6EcAycp5yw2P9Mw~dG7hTzPyQ*80iU(yi;%oFE-QTT;E0GvUg*u^e)HG?Y6q!}dkz{UJthO%XJqq3P9(le&y*T`-f*NXDnC4I`s^a=N8ZS}>EC(Y?rN?+6Eif&Y!&ifI!gm%eUGy2roT6BCD;}8wmow?#Gq_4;v zLfbAFFefReZn{;B88BZb9D5wJ7C{yBLBmU}CE7V;gZm+@tg3dpPbVyGbP3^ou{)0; zyY{;C&M97H`NdVh;6T;W;@%Cb;*@zIpFdHM4|?bTN++kyv`%jSRih0f7OmSngxfe2aQ z{AWxs=ZSjpNoh_mY`?|Ky;KAo|DP5j{2Csy*Rx66bv$B^sUg~8v&j-A+JWt7X<9@gvNtaz&m+vh7>N>FW@8v53A434@HvGARV&8*~ z$qBFna~Pr9JWaI!{h2if>R>mrkMHtpQ;UqVO}B4nVjA3rm_s(y?%P z*!K7w#E#r5r;lN1Ev2%P5ZInb*qLi#fD0VOP|%xeIUxVkNT)>6d z(1#kFBJ>vPt*HCSj$8mq)6QR(u-P ze*ZQ^=HlL-a8fe^P`IuGa1L`o)o@qqzrBwaeeLtq+O%H(Z0$2p#>7C-YX}ANbN;^KDCL>%dhOEl?Zn2i7jGe^$(^!b7HlZ zCF6%LPL+C(;wV@qQ&K`)bAyKN!C27JX8rOxc*KUY=-7E=$9F--<2i6A_S~SBRTIZt zi*Rr>wSQ^bMviYDv|W$*>0i=Ar7Y&2dw!6EOQ+g$dbfeN07uU z%aDZ%h6e~_TtAFq5nb!jp)4#`WeJL`pi*{uJ54#P7%U`vRU5a*P7Ov|{dy{+-xV`H zku3XRS)~0pppRU`|+aU_|=OgUFE)MgREZ2wS;KCY0vVYms71I+$ zq*O7wv|e^}jCSw`k=+$PE7ch}n=5~I72_k5TK})3v+#@RYr?pM5+W&Gi!>tA9fC-Q2#CbeNcRHL zBHbO5N=o+vOG!(!Ah|3Z3k!=#r|P?7(;ADr9R1ch?UTb!i>SWASa@=C2`i;#^CCcJw+a zz$V6zd<-%hl;!Ms&_*=IJAwNeX>Z%}=c2ojEchV!GE}B78`WS{16}(T`lr?Bgi!{b zmD6M9_jQ}=(EaVLIUU@Y2rn^?6(+OP=(F#zFQg4U`H$Q#5O9bE4p`f{L3bi*&*#9` z*P*D>t*n;!q1#Vk?IC zC{M=imHN9T-V$wFoL|&0?25z^Xy^Kmld$BN`INqd@NB&sv3?$M$@QP@qLGm~SkQ!1 z$;#f)oJhr9j%|4R{X2;?&G&Kj52i(|b6_q+|9%*bXQP&5=dL#X9b_ zE+&~|8w41pvpX)l*8ZlgmNCs}UQD9qUc_$&UE*|#=@+%Wd@TrP40L3G&mNjSix)Q1 zuHSK7ylxvK{Y!uIDZzewFvAmpTJ?aoV|;xRd}#m1NWF$I)ZJsYxbMZGBECs9?w7f6 z9NYi|m9G9=fBXvr)9_2{HGN!cS%7Ov#OG;-fbqKH& zn@D=ob&tY>-^~KY_yTZhbm&(bPF@aVi>v2ls*;bg*Xv8Pzeu1=Y59Jk*nV$+R82Gy8+>&eI{&D|WrnKf?oxrWRCS>vT{0}=k zpJ5SXopEq=dKOx@1W7x!{ z%XsEZbj53I+kVEU&GwM}1_UT+DU{4La3=0d$}~3p{RYoyr1fgjJ7_E!-T!51%H`@D za-Nm%Fcxz66DM|^53Jcqr+w8)bv*{Sq|@cFZ@-P(+`Ajkog1}uo@$r*j=S5*co^F_ zp6?i+cvg*4HiCo7${QRv3TKV!j{jj2W7kP)o1a&Bsq>J1^Pvs?%ZG?Oai42T_({3C zPwk%JfHt|E}Wi<%!{7!tqe-lcJhg;Y71TsNs8rw{NP+nPK=g$-0zL_G!BuZ^XYK z!`(l6HHx1}aO>V>s`XbqCZ3(QZe$Y*8d4f_uW;zJX&N{)veYxOS0|!>1*LABLZsf4 zA};`6HyJV;g_`wRN|*VGzvC>^R!olcwtjmyVcc-ck($BWj-%1noIL#7wtvx8>V~H5 zN-sB%vD)HJ$osBHcp@kP=mk=nSdBpiJulpR)*r?{4G(+e?oyDNTi?Te?XlQJ+d!sW zUz=UAgue|JD|t?!GYC z<7v~s$zryWOkk(sL_<^8cA$BI2<@wlfTd}+sDyI#5lHd{5%IIPHu9w5EHI{&UxOxG z!(OYW=xw4V{mgb3YHCUr z={dhj3BSW(g^9qM6HJ1Sx|nF96R3?^q*xaS@qeBRJLFv7Ib zm%hOvR#^AHHq6C;<*OF0_If-Gh%!i0=5GagZ$)xW3%uB!J0ptJGj5koDNlqoqF~Lg zB`Co-d~d;8LZ=Z~_++_iGX5VaBderY+=Jh$>@qZ0A8h>{4QG7*ss=QBNs|odHaP!I ztOqM+t_w8#(1)D5cW`}V;fEUHNqecJ<|2B&tW%2=8dHG1uZ<2SnAWJclpxAYD&CTX zj@@BeL`xkRW-<K+8?IRWz7CK|i{B$3Jf*sl}NIp*tG}sX7ze520FxTV>zhA9?J<0Njv6 z-v2*}STZnu^(=GnXkt;T4We}bny0;C5!VT3vbmL(n(*6nBtq;6*2OROTZ}aQ)6Bc3 z!;{iA!1&(H6j1jun~iU?V?PWj=`W*r^suC^g~P`fH6vE8Wkyd;};>$aqhpu^M6fBclZXZ`(b=1qddwj`fv5Qlph;#hrK7FBW{QX>3x0m`*#JM$Anx;d;Itxk%JA!EF> zQBZlNnnI|7%eMf^+2g7iB0xMfwo;T8IO$NG(4WuZXr+5BHS-RC!VmIva|TEFp#j)x zpAa7CqX~iAqid1#IC&^xFPVJBq4&8*&tCU69g5gHand`mmCkluji$OYm2DX8ym=dr z@WV>+til-74-hcCQG%46wU%a@8WIHy!c|v;|1FLw5h@}VKl-0AN7oFpnvJga8I-b1 zeRASv)lMdY=|KqIb5A_CGzgMSc}(3kP0+&`rBzFIc?IZmuh&vF@VOpOY347$qiXK1 zHAyp^R<*8xhC|PP-zu+T!Oz|ybT)s!Tm1WD1lp!C)%0Xh{5BLNTzGR*8IRf@hzfhC zlZ8LUE*7?EVu8FB<=a%gL9Kd`mG1$-x*k>TqMB0sELzcn!S`+vQ47QdKWiu!qcN^x z5l1!3kVS-JJ=*W5qK}H}JE{a_1$pIZYblw(yV|07U zhKW`ixg(KM33faHHCvgD0tj$>XUCSf3tMp@%2X^rPS1y;(-nx;?4Yk5FzrvG{?)7p z3H0(|I-BigdMmko+J=Z7UH!LR6@>Wnvg!IPw~c)H{I|W0|Ap@a7vnPI5)LV2qFpux zBf_e8Nvo!}mH9wupvf%JIYa73*m20akd7O83&ODv8*aGs|K3|5h86&cDW3hW%{2Ig z0-HZtkN(Bs_2W;8q4n#TW+M5AEH)^ps*L!@PRdmCRFez+!O1i*7ysSb^QzQq!H;n$ zMM!W92Mo1)HqUp6werPFPj?=GP(9d58P7$VcvH7JEe0%UJ6ALg(KK3=ZG%erg#$LL zZX=qEp!ALs8urS1cKt61Z#5vbsGgjcN8@C9x1bRAXLcf5PCs^LX4_o*S@Q4tI3IuY z0e{BMOzCh=6(3wIaKqx5X^?jtTptVOUocS7oA(q@*hSrAT<3J>ff=Urm;;x*jPGADe+Lgxw9_#8d=kcjm>uruM#3)7ltuNqIf_`^>kr z=D#%(tDc9$!_iHM|28XN3zs_a2P5Y61pk~HijAC)(vB+e4P{xGh!U)mQPU9G(^}E$ zq{6F&dc5{D9xs^e$nr&sL~F6a=FekLr-ck=UIV$yQ>yHcMt}Y1_E`aR2}c>v61sMd z59rxAP|Hj!=Z(0^U7q|kcS?ONL`^G$(D-BkA0xfAmei*s*V^d}t_@q?>hTxmf zs52T4mLzPw4NWX}BnJ>kXHg0HMrGi691a)`o^oN%?WJdI6}`<(cq`GtmlM(i|BBt5 zkK9Tvy7Jc|D4L?LN>cSb9DFg20ozJt6guB+n?1y82yc~Owq z*g20_Q`go1P1zSUp*v6A-N*#DX{r8ipGv^1{n8{6zwzw?rfk@Y;VaM;Xxl+?%GWmJ zc2onSd^1l*G!I?9=G`7eQXB#ySP97R%i5Lah5ms1RnMC(+C5vXN)7)cVBzW=-QAD} zDkb;Jk8Jxm`^Ed$i_gath(~TNmb5GF1PoD6~$A+TtyC?speQ)HdyS*)dlvAp% ze0N$iGxL+@PK&)JzUnt44!M`g3=|`xY>|?n=)NKMs)k&v_?%0&)|)W*IQxTs1GO4h z(4QXu1H{WhsVOChIel;(oQHetBxF~e0FffxRxI3tO9ke6=HhuIu*2Hs9>aqlw8zlJ zPKCvgd3#$J>a3pPAF&Rg&CW(ZY!Oq@kc+%;viR{<&M&AI2S zR{+w29ov;0R^hWE=*IDs^7aSkX-JRUE3RDoMY^IaSOI)3qi&BB+aGVTPKcpSB7+PW z$xPgLj}wn-75ZY0&CeYcrkCAjA=pixBk*}mDB8+q-#KwXhXx(Z&H3q_-tUG^u3KrS zMaJO!z1x0+`VahLQJC6!MP=&h8JEKl?JO9RM49J1-9jsWj<9K3mNOn7{YkO$6tfnJ zrZc^$ik9;IxIA?QJ%)k)(@Rfg1QK?#{W+$a&(u*JL>*$13j@>TrGuroc8W`&)o#2(Fsp!-S|o zPRgI>*kIQKU#ci;D2-5NUO?~H+O_g>NS1;m)&r>-FS|)$>!-UPGX3lF>PeBxwh`-V z@?hWD$cO#KYSV<5Pu0}pQ_{$^7^c|3$L~}&#RUfBt$`jc+jgb z+3o;Ydu69Vw_8lD#{4)x*o+@bb-B1JhT9#g0g3d=$E>BHG#>S{$m7nj+|`pweao90 zWVbGHO=SG`hBWV{2C;KSDAiqY%Z7j>xYX3f;(B;Xmh+ zS@Zbc6Up)%G9)cPXM)|~jY>-5KYYdUHy!lMRYPr)Q}njJj7(luc4RV|!+hb)RVpPm z3Co0|%~X4-C&e#sPMMKrBe+G#3y>X&|I}k1P!B6WJrFH`dQkR~apS&ZlMqf`C0A!l z7QY5H7%Dy0J$WAEo6BEAJw|_iXcCRmnZDSUBCNEYp99Ad_Jk7=c5I{4SP)T5$+#I{d9? zY>zEa-s*IZnx^&RIAN4}?h|#29KWieL1WzA*uePSZ*q5}-b3 z1_lFgfv?&wDyYHsQ&3>S1h_4uy&6vvemG7BzQtbZwSW#3PY<)DIX|aZ3KBNY@jESF z5SGjjlu{(Tk(#MTCS}piwMOaJf0Zlfydmf=pX8(iI-&50w}0zDpdefTg@UjNkL991 zDv(c0g}YMu88>?AkKa zF*f^48Ou>9NIstuMbI3v-nc5vllYt{xu{=n_WjFJWxw- zb?9nQrO`IXJV^L1^L4TH;Q=k!>uxv^xJF~_xL?YXA>9QIO@W$<7dvo9B@b1Gbh+Fk zwr6ipPHW2PSA2~*CZba3f4ya;223nYR@{~kt=G1WNY`X%qcHJe0H|RCu8A*C%(wZN zrIlgYn#Isrmf7~vo~+E^X$8-D!Xn4V3LIOfCi|V(@vRY*J3Bm_)tO0^Sk47Y2rhGR z02EQhs%Q8PLK^*$EZS5-hhCio2l3VGGuo*Kx0~9JP>M+iyAA%=txg1ER~UrQ3zZh* z?%v%m8`S@dT*qI|rpwM!5*2(ZjE%xf(bK`V2ckga_{AcDX{fiQEPoBznD?7Fi_OA* z!e{32b3Xq8h6X3-mz?R(NiW!aww_EW`#hYYg}H)5zYiiIC;S~i>n=oYNl5zmG9T0x z(dykz4ZEnr%saM}pfkYrOn^br)6I1^xhTg-yh9>aR@2u}ywYF#>tM3D-G0bSVxb;v zxg@Wmpbo41K>?bv<8GwW`ZMX8PkAx0MN!t zYo()ve(~by_2SF$!a?#>vedgL=B?q{L&A|Dmj6y3glO^ebRK?$svEQoQyR6S9a#QS z>XF!d;AXsWU*H8_gsV(FSwSSz5>1yM_3obV`Pe1d3Ee*<3ZzI^XifOeaUbv^tJh^@ zQsd{DnWReRLHP^Q{!gKnEY40s$w<@#|eT5$jN6AL^bnS)+m?PlKxM zE58Ti;+4q^x(h|F&U-NjJ8plir3!|0fmT&DuGD~+ak!K`fnK^FB^Q6lPfV5|U~1iJ z%Iq~m-j<483q%z(TIxZi<1{B@?E^~+*DJqwl9 z!qtQ=$>Wj&tFg0YjjlvYnkxylolNLq1U)nY(cM6UnQdzsVm2mPy+0rPQ>WxEhQ`MA z9xALsrp>X!kWDQ$Ay9<}CRLYua<@KjJ}IyKmgo=@jno|pt~&AQ5Fgx<_td2Eq!o45 z7Mx<(^oR%Q_v}#eEQa{{+G4< zedbR+%1hr|-cg2`S>J$Da7!;W{n;a8Ugp4X62T|=6$-^BZ$FP=I&B4{&)5oN`fvaf zVLF0J0A`iek~5Qy@;+Hv0+IcPNFL^C^1*eIZJb&~o^0z#;xTqbhiJ&Uj-?MB~`nEDydOzlW-jXCB zKoI=s2c>O>2fnXy=M>)`Av%vZY~aU@)bMx%azQ+t-Ief|np8MnMCL(HFEIk~nA zq+ly?p)Mo-?kv9Jn7fiuY35%BEYPu42iIqwvG3=@2-hIb*Nz)h0-D=|XzK8_>yNdQ z@V4i3e@{*y6fw+Y*E4LdefR9kF$(s%EzaL8#lz+QdvYzc|5;q)8DBf5qE;0B!U75^ zz8nd7Brxc~?!A*auu`+Hz5s%4I=?`AE82>N=z+46l3j~h39i_bEZM5Z5I&ZyS??AE zj|5r!ot51h&irP{wraa4cGsySNm>M?E4QDK1UU;VH^gSdD%zU#&-*F*kmWT;V3u)` za5(oR41CmL;}Z8R2SQ#O_YLbF-h!*-bn#PvnMhvq+@-i^>3yuAJf>&O@B@=AWE=FH zg~8J7Z`{d{WO-%iQFUBF?z25Rc#m>^g?B@FKMqK@FX8yD@b$+B`C+iO=; zMIRFuO*QgvHr&*u5!h0{2mhSmH8Wx6pe9?fqA{MUN@hS!u32jCN_`e*S#%IdIn+!P z?&%ZoYY0_Cs!~14)|f}+B&dtOVg0#n64y8|F0+ZGBHu^jrr5fF=qWK~9XJ&kRN@$&M_b|)pDzGI(R4I*nS(yD z@<}gyJcf5)+RGH)Y#eP?!oqX=<$|ya9f9GG`O6F|v)58g{(2OOGs8*|zj7c%!dDCV z*X&Lk{8FX*U`bv6kK>qB8r&^!1;rtXOwLjDABqHpf_rbfl9Cz1!nD&bQzQ*-R<@IB zDz7P`tc`bE0SkbX7h*1y!~wVL@sri*yeAsEt{um*p5IxZH_|}ggOoO2^>{%o9w^=$ zKr-8|&dkU`?lDQFjhwbj>cRL+!^ty$sHK`;E!Cau`GO@t zRrimm-E-h!70*cd4Sl;xEmIwh0j!6$-w&lKD;t59tSL$(8QxdgZx-9NTT<Q{B1^fTwZAc<>`I(64mRg_$Oe^+0OFZ$jw22jOJaolAF)+@xTX?dFbs% z_q?LdaUk^kl0oKy1^=0q#E1@3Z7?NGEWv3{@!yUcRj<0`_ZUzKrCN{Qwp8Cb^A+2tSM;_r$ zwKKDNE*-%KWO}|HMLuVRs$VW*bF+Q~-B2UY#0<$Z;8YV*D7nU)qF;^dd2d zXIlGZ1sU>%QDpaG5W!lzf0jJbglr*FUrz;kpDWv0H#&J`&iM?q>dTI__p{}Gw^D)` z=5ocp)ymuX{>UM9=^*w=SjZ<0#mbSBBUaz0QHvl|s?;8p#G5_wEV#loTz;7pZ~Z0keNg?}}MecEWut$QNL< zi>$anHi}5!okpm*X!A0*p|xH-n%hk6`h{LMay7ZV$VTyXP0lf8mg9k_m}uM-IPVO7 zE_QMxuO=zpL0-P7r57gfQk5d4^?I+6M>J~#U9_?CZ@d0n?XDk&TA}>U?1lIEd+MK| z?WBSCqm-Kgqy`)1Y|o5KyDUMi?MF--^{RoelH*LbX|Pc|TlR!@wUoWw$}wqB3AbYWl8{$4@3@<}vCv`$mZMOGk;r z5D~cMjTIQC-GJ6YhZK&(tYlc;_HeEox#9a|P1x~0UuWhz+6Z-X zxp_$IPO_K0BZg_X5bpOjWd($@UCFy2M!S226@}M(qquvdRnHKZ3l*3Y(){%v&|EzZRcx~_;VVh`~3ug zlZXCroVBeWF7pDhcUT>&IVg4=2e>^G_M%*zOU@l8EFQMw9DW%2hl~k3RMB-k(JDSg6QN)t7OI!~v> zRgF3{61ebnN|-7qGERR%eLDC(JQI(SOdkHeYf)=Rn{(Jz=nyRnWKH};iE_cXxW+ z)i3Yjc|Sfp5E9YON(T=KpVzRgv<7HTxRDwKpW@o6E@dyhI$!GJ4q1(LPt58eGe@{w z^Z1bUe<~)|{Vg&a9~i{#39NToXIlbr@ACkA z^A?H6%)7yYReOY_GxfhSF{xX_#BeXBv*Z{hxU#=8UT;u?m}@vV)t&sx9{QLP`m;5A z1dUq&muM|(G6^GFW^6ej9TVvRrX&affzRU{BYCcm#`^V(w%`o{Ad0lLDl@G= zgvTAs&HSEI&HU%D3CZkEoEI_KX8OFa0*`ezy+7HhGaJ=enQN0~V*2STqcYl`zRdB) zv2HYpO3_CNu4s(kSXr-|cw#RYv9|OM4miicP}R99?nvbNpzqjBl52ja`ODW=1|eHVaDDR= zIoa~c@6sS*v4kE&3Lk|%6*jv4h4noN98{?@IqScE>@QiSOHu=agGaa>_s=01QhYW^ zu2G%3aC2z9@<;UIA4eic+#;g|Vs6DHT~nsyWix_u?eCQRhk75b&n^R^Y#EIvk3YCr z-fHc8`+sDPT18^{G5=XL1WDU-3e|%ypKu$)L!XNmEM#qB%e%kf+2s|h{2ed z*(ryPuXE>EIH);qwB>qi!{_?e%%BqRF}}r5p|rQEd2z>29vsmqk(J=SptW!D#C0fS za?$Y@sJ(eKH9nf+UNHFK)zJiN^_frTByW2gV{+3EX({8Du^)uq)p^p-|6qayYvb!* coiJ_DANvZQ?zCUUJ^=ny-)Je;D42iwA5Ec?8vp Date: Thu, 3 Jul 2025 15:07:19 +0800 Subject: [PATCH 007/257] chore: release v4.0.8 --- pkg/utils/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/utils/constants.py b/pkg/utils/constants.py index 450a2e16..8c4da3cc 100644 --- a/pkg/utils/constants.py +++ b/pkg/utils/constants.py @@ -1,4 +1,4 @@ -semantic_version = 'v4.0.7' +semantic_version = 'v4.0.8' required_database_version = 3 """标记本版本所需要的数据库结构版本,用于判断数据库迁移""" From 3f0153ea4de7f55a5fa2da43725b1852d8e62c07 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Thu, 3 Jul 2025 17:26:17 +0800 Subject: [PATCH 008/257] doc: fix incorrect 302.AI name --- README.md | 2 +- README_EN.md | 2 +- README_JP.md | 2 +- pkg/provider/modelmgr/requesters/302aichatcmpl.yaml | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 1ca4553a..2d4ce119 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,7 @@ docker compose up -d | [智谱AI](https://open.bigmodel.cn/) | ✅ | | | [优云智算](https://www.compshare.cn/) | ✅ | 大模型和 GPU 资源平台 | | [PPIO](https://ppinfra.com/user/register?invited_by=QJKFYD&utm_source=github_langbot) | ✅ | 大模型和 GPU 资源平台 | -| [302 AI](https://share.302.ai/SuTG99) | ✅ | 大模型聚合平台 | +| [302.AI](https://share.302.ai/SuTG99) | ✅ | 大模型聚合平台 | | [Google Gemini](https://aistudio.google.com/prompts/new_chat) | ✅ | | | [Dify](https://dify.ai) | ✅ | LLMOps 平台 | | [Ollama](https://ollama.com/) | ✅ | 本地大模型运行平台 | diff --git a/README_EN.md b/README_EN.md index 791ea1a3..56425bd5 100644 --- a/README_EN.md +++ b/README_EN.md @@ -119,7 +119,7 @@ Directly use the released version to run, see the [Manual Deployment](https://do | [CompShare](https://www.compshare.cn/) | ✅ | LLM and GPU resource platform | | [Dify](https://dify.ai) | ✅ | LLMOps platform | | [PPIO](https://ppinfra.com/user/register?invited_by=QJKFYD&utm_source=github_langbot) | ✅ | LLM and GPU resource platform | -| [302 AI](https://share.302.ai/SuTG99) | ✅ | LLM gateway(MaaS) | +| [302.AI](https://share.302.ai/SuTG99) | ✅ | LLM gateway(MaaS) | | [Google Gemini](https://aistudio.google.com/prompts/new_chat) | ✅ | | | [Ollama](https://ollama.com/) | ✅ | Local LLM running platform | | [LMStudio](https://lmstudio.ai/) | ✅ | Local LLM running platform | diff --git a/README_JP.md b/README_JP.md index 38a47153..f0423401 100644 --- a/README_JP.md +++ b/README_JP.md @@ -117,7 +117,7 @@ LangBotはBTPanelにリストされています。BTPanelをインストール | [Zhipu AI](https://open.bigmodel.cn/) | ✅ | | | [CompShare](https://www.compshare.cn/) | ✅ | 大模型とGPUリソースプラットフォーム | | [PPIO](https://ppinfra.com/user/register?invited_by=QJKFYD&utm_source=github_langbot) | ✅ | 大模型とGPUリソースプラットフォーム | -| [302 AI](https://share.302.ai/SuTG99) | ✅ | LLMゲートウェイ(MaaS) | +| [302.AI](https://share.302.ai/SuTG99) | ✅ | LLMゲートウェイ(MaaS) | | [Google Gemini](https://aistudio.google.com/prompts/new_chat) | ✅ | | | [Dify](https://dify.ai) | ✅ | LLMOpsプラットフォーム | | [Ollama](https://ollama.com/) | ✅ | ローカルLLM実行プラットフォーム | diff --git a/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml b/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml index 9d8ce9ea..2d9df778 100644 --- a/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml @@ -3,8 +3,8 @@ kind: LLMAPIRequester metadata: name: 302-ai-chat-completions label: - en_US: 302 AI - zh_Hans: 302 AI + en_US: 302.AI + zh_Hans: 302.AI icon: 302ai.png spec: config: From 4005a8a3e225fc99b5cadaef5d906972d3d7eb7e Mon Sep 17 00:00:00 2001 From: fdc Date: Thu, 3 Jul 2025 22:58:17 +0800 Subject: [PATCH 009/257] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86=E9=A3=9E?= =?UTF-8?q?=E4=B9=A6=E4=B8=AD=E7=9A=84=E6=B5=81=E5=BC=8F=E4=BD=86=E6=98=AF?= =?UTF-8?q?=E5=A5=BD=E5=83=8F=E8=BF=98=E6=9C=89=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pkg/platform/sources/lark.py | 142 ++++++++++++++++++++++++----------- 1 file changed, 100 insertions(+), 42 deletions(-) diff --git a/pkg/platform/sources/lark.py b/pkg/platform/sources/lark.py index 49ff53be..9c9b5605 100644 --- a/pkg/platform/sources/lark.py +++ b/pkg/platform/sources/lark.py @@ -346,6 +346,8 @@ class LarkAdapter(adapter.MessagePlatformAdapter): message_id_to_card_id: typing.Dict[str, typing.Tuple[str, int]] + card_id_dict: dict[str, str] + def __init__(self, config: dict, ap: app.Application, logger: EventLogger): self.config = config self.ap = ap @@ -353,6 +355,7 @@ class LarkAdapter(adapter.MessagePlatformAdapter): self.quart_app = quart.Quart(__name__) self.listeners = {} self.message_id_to_card_id = {} + self.card_id_dict = {} @self.quart_app.route('/lark/callback', methods=['POST']) async def lark_callback(): @@ -397,11 +400,69 @@ class LarkAdapter(adapter.MessagePlatformAdapter): await self.logger.error(f"Error in lark callback: {traceback.format_exc()}") return {'code': 500, 'message': 'error'} + + def is_stream_output_supported() -> bool: + is_stream = False + if self.config.get("",None): + is_stream = True + + return is_stream + + async def create_card_id(): + try: + is_stream = is_stream_output_supported() + if is_stream: + self.ap.logger.debug('飞书支持stream输出,创建卡片......') + + card_id = '' + if self.card_id_dict: + card_id = [k for k,v in self.card_id_dict.items() if (v+datetime.timedelta(days=14))< datetime.datetime.now()][0] + + if self.card_id_dict is None or card_id == '': + # content = { + # "type": "card_json", + # "data": {"schema":"2.0","header":{"title":{"content":"bot","tag":"plain_text"}},"body":{"elements":[{"tag":"markdown","content":""}]}} + # } + card_data = {"schema":"2.0","header":{"title":{"content":"bot","tag":"plain_text"}}, + "body":{"elements":[{"tag":"markdown","content":""}]},"config": {"streaming_mode": True, + "streaming_config": {"print_strategy": "fast"}}} + + request: CreateCardRequest = ( + CreateCardRequest.builder() + .request_body( + CreateCardRequestBody.builder() + .type("card_json") + .data(json.dumps(card_data)) + .build() + ) + ) + # 发起请求 + response: CreateCardResponse = await self.api_client.im.v1.card.create(request) + + + # 处理失败返回 + if not response.success(): + raise Exception( + f"client.cardkit.v1.card.create failed, code: {response.code}, msg: {response.msg}, log_id: {response.get_log_id()}, resp: \n{json.dumps(json.loads(response.raw.content), indent=4, ensure_ascii=False)}") + + self.ap.logger.debug(f'飞书卡片创建成功,卡片ID: {response.data.card_id}') + self.card_id_dict[response.data.card_id] = datetime.datetime.now() + + card_id = response.data.card_id + return card_id + + except Exception as e: + self.ap.logger.error(f'飞书卡片创建失败,错误信息: {e}') + + + + async def on_message(event: lark_oapi.im.v1.P2ImMessageReceiveV1): - if self.config['enable-card-reply'] and event.event.message.message_id not in self.message_id_to_card_id: + if is_stream_output_supported(): self.ap.logger.debug('卡片回复模式开启') # 开启卡片回复模式. 这里可以实现飞书一发消息,马上创建卡片进行回复"思考中..." - reply_message_id = await self.create_message_card(event.event.message.message_id) + card_id = await create_card_id() + reply_message_id = await self.create_message_card(card_id, event.event.message.message_id) self.message_id_to_card_id[event.event.message.message_id] = (reply_message_id, time.time()) if len(self.message_id_to_card_id) > CARD_ID_CACHE_SIZE: @@ -430,7 +491,7 @@ class LarkAdapter(adapter.MessagePlatformAdapter): async def send_message(self, target_type: str, target_id: str, message: platform_message.MessageChain): pass - async def create_message_card(self, message_id: str) -> str: + async def create_message_card(self, card_id: str, message_id: str) -> str: """ 创建卡片消息。 使用卡片消息是因为普通消息更新次数有限制,而大模型流式返回结果可能很多而超过限制,而飞书卡片没有这个限制 @@ -440,7 +501,7 @@ class LarkAdapter(adapter.MessagePlatformAdapter): # 发消息马上就会回复显示初始化的content信息,即思考中 content = { 'type': 'template', - 'data': {'template_id': self.config['card_template_id'], 'template_variable': {'content': 'Thinking...'}}, + 'data': {'template_id': card_id, 'template_variable': {'content': 'Thinking...'}}, } request: ReplyMessageRequest = ( ReplyMessageRequest.builder() @@ -467,12 +528,40 @@ class LarkAdapter(adapter.MessagePlatformAdapter): message: platform_message.MessageChain, quote_origin: bool = False, ): - if self.config['enable-card-reply']: - await self.reply_card_message(message_source, message, quote_origin) - else: - await self.reply_normal_message(message_source, message, quote_origin) + # 不再需要了,因为message_id已经被包含到message_chain中 + # lark_event = await self.event_converter.yiri2target(message_source) + lark_message = await self.message_converter.yiri2target(message, self.api_client) - async def reply_card_message( + final_content = { + 'zh_Hans': { + 'title': '', + 'content': lark_message, + }, + } + + request: ReplyMessageRequest = ( + ReplyMessageRequest.builder() + .message_id(message_source.message_chain.message_id) + .request_body( + ReplyMessageRequestBody.builder() + .content(json.dumps(final_content)) + .msg_type('post') + .reply_in_thread(False) + .uuid(str(uuid.uuid4())) + .build() + ) + .build() + ) + + response: ReplyMessageResponse = await self.api_client.im.v1.message.areply(request) + + if not response.success(): + raise Exception( + f'client.im.v1.message.reply failed, code: {response.code}, msg: {response.msg}, log_id: {response.get_log_id()}, resp: \n{json.dumps(json.loads(response.raw.content), indent=4, ensure_ascii=False)}' + ) + + + async def reply_message_chunk( self, message_source: platform_events.MessageEvent, message: platform_message.MessageChain, @@ -512,43 +601,12 @@ class LarkAdapter(adapter.MessagePlatformAdapter): ) return - async def reply_normal_message( - self, - message_source: platform_events.MessageEvent, - message: platform_message.MessageChain, - quote_origin: bool = False, - ): - # 不再需要了,因为message_id已经被包含到message_chain中 - # lark_event = await self.event_converter.yiri2target(message_source) - lark_message = await self.message_converter.yiri2target(message, self.api_client) - final_content = { - 'zh_Hans': { - 'title': '', - 'content': lark_message, - }, - } - request: ReplyMessageRequest = ( - ReplyMessageRequest.builder() - .message_id(message_source.message_chain.message_id) - .request_body( - ReplyMessageRequestBody.builder() - .content(json.dumps(final_content)) - .msg_type('post') - .reply_in_thread(False) - .uuid(str(uuid.uuid4())) - .build() - ) - .build() - ) - response: ReplyMessageResponse = await self.api_client.im.v1.message.areply(request) - if not response.success(): - raise Exception( - f'client.im.v1.message.reply failed, code: {response.code}, msg: {response.msg}, log_id: {response.get_log_id()}, resp: \n{json.dumps(json.loads(response.raw.content), indent=4, ensure_ascii=False)}' - ) + + async def is_muted(self, group_id: int) -> bool: return False From 68cdd163d30f02e03fdade10feb6006bf7128d31 Mon Sep 17 00:00:00 2001 From: Dong_master <2213070223@qq.com> Date: Fri, 4 Jul 2025 03:26:44 +0800 Subject: [PATCH 010/257] =?UTF-8?q?=E6=B5=81=E5=BC=8F=E5=9F=BA=E6=9C=AC?= =?UTF-8?q?=E6=B5=81=E7=A8=8B=E5=B7=B2=E9=80=9A=E8=BF=87=E4=BF=AE=E6=94=B9?= =?UTF-8?q?=E4=BA=86yield=E5=92=8Creturn=E7=9A=84=E5=86=B2=E7=AA=81?= =?UTF-8?q?=E5=AF=BC=E8=87=B4=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pkg/pipeline/process/handlers/chat.py | 50 ++++-- pkg/pipeline/respback/respback.py | 9 +- pkg/platform/adapter.py | 4 + pkg/platform/sources/lark.py | 135 ++++++++++----- pkg/provider/entities.py | 4 +- pkg/provider/modelmgr/requester.py | 27 ++- pkg/provider/modelmgr/requesters/chatcmpl.py | 167 +++++++++++++++---- pkg/provider/runners/localagent.py | 44 ++--- 8 files changed, 323 insertions(+), 117 deletions(-) diff --git a/pkg/pipeline/process/handlers/chat.py b/pkg/pipeline/process/handlers/chat.py index 9b3e0cd5..3a5925cc 100644 --- a/pkg/pipeline/process/handlers/chat.py +++ b/pkg/pipeline/process/handlers/chat.py @@ -59,8 +59,11 @@ class ChatMessageHandler(handler.MessageHandler): query.user_message.content = event_ctx.event.alter text_length = 0 - - is_stream = query.adapter.is_stream_output_supported() + try: + is_stream = query.adapter.is_stream + except AttributeError: + is_stream = False + print(is_stream) try: for r in runner_module.preregistered_runners: @@ -70,31 +73,44 @@ class ChatMessageHandler(handler.MessageHandler): else: raise ValueError(f'未找到请求运行器: {query.pipeline_config["ai"]["runner"]["runner"]}') if is_stream: - async for results in runner.run(query): - async for result in results: + # async for results in runner.run(query): + async for result in runner.run(query): + print(result) + query.resp_messages.append(result) + print(result) - query.resp_messages.append(result) + self.ap.logger.info(f'对话({query.query_id})响应: {self.cut_str(result.readable_str())}') - self.ap.logger.info(f'对话({query.query_id})流式响应: {self.cut_str(result.readable_str())}') + if result.content is not None: + text_length += len(result.content) - if result.content is not None: - text_length += len(result.content) - - # current_chain = platform_message.MessageChain([]) - # for msg in accumulated_messages: - # if msg.content is not None: - # current_chain.append(platform_message.Plain(msg.content)) - # query.resp_message_chain = [current_chain] - - yield entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query) + yield entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query) + # for result in results: + # + # query.resp_messages.append(result) + # print(result) + # + # self.ap.logger.info(f'对话({query.query_id})流式响应: {self.cut_str(result.content)}') + # + # if result.content is not None: + # text_length += len(result.content) + # + # # current_chain = platform_message.MessageChain([]) + # # for msg in accumulated_messages: + # # if msg.content is not None: + # # current_chain.append(platform_message.Plain(msg.content)) + # # query.resp_message_chain = [current_chain] + # + # yield entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query) # query.resp_messages.append(results) # self.ap.logger.info(f'对话({query.query_id})响应') # yield entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query) else: - + print("非流式") async for result in runner.run(query): query.resp_messages.append(result) + print(result) self.ap.logger.info(f'对话({query.query_id})响应: {self.cut_str(result.readable_str())}') diff --git a/pkg/pipeline/respback/respback.py b/pkg/pipeline/respback/respback.py index 4ac4e1e3..52714ce2 100644 --- a/pkg/pipeline/respback/respback.py +++ b/pkg/pipeline/respback/respback.py @@ -3,6 +3,7 @@ from __future__ import annotations import random import asyncio +from typing_inspection.typing_objects import is_final from ...platform.types import events as platform_events from ...platform.types import message as platform_message @@ -39,12 +40,16 @@ class SendResponseBackStage(stage.PipelineStage): quote_origin = query.pipeline_config['output']['misc']['quote-origin'] has_chunks = any(isinstance(msg, llm_entities.MessageChunk) for msg in query.resp_messages) + print(has_chunks) if has_chunks and hasattr(query.adapter,'reply_message_chunk'): + is_final = [msg.is_final for msg in query.resp_messages][0] + print(is_final) await query.adapter.reply_message_chunk( message_source=query.message_event, - message_id=query.query_id, - message_generator=query.resp_message_chain[-1], + message_id=query.message_event.message_chain.message_id, + message=query.resp_message_chain[-1], quote_origin=quote_origin, + is_final=is_final, ) else: await query.adapter.reply_message( diff --git a/pkg/platform/adapter.py b/pkg/platform/adapter.py index 18403b75..3951326c 100644 --- a/pkg/platform/adapter.py +++ b/pkg/platform/adapter.py @@ -25,6 +25,8 @@ class MessagePlatformAdapter(metaclass=abc.ABCMeta): logger: EventLogger + is_stream: bool + def __init__(self, config: dict, ap: app.Application, logger: EventLogger): """初始化适配器 @@ -67,6 +69,7 @@ class MessagePlatformAdapter(metaclass=abc.ABCMeta): message_id: int, message: platform_message.MessageChain, quote_origin: bool = False, + is_final: bool = False, ): """回复消息(流式输出) Args: @@ -114,6 +117,7 @@ class MessagePlatformAdapter(metaclass=abc.ABCMeta): async def is_stream_output_supported(self) -> bool: """是否支持流式输出""" + self.is_stream = False return False async def kill(self) -> bool: diff --git a/pkg/platform/sources/lark.py b/pkg/platform/sources/lark.py index 9c9b5605..af57d66c 100644 --- a/pkg/platform/sources/lark.py +++ b/pkg/platform/sources/lark.py @@ -18,6 +18,7 @@ import aiohttp import lark_oapi.ws.exception import quart from lark_oapi.api.im.v1 import * +from lark_oapi.api.cardkit.v1 import * from .. import adapter from ...core import app @@ -348,6 +349,8 @@ class LarkAdapter(adapter.MessagePlatformAdapter): card_id_dict: dict[str, str] + seq: int + def __init__(self, config: dict, ap: app.Application, logger: EventLogger): self.config = config self.ap = ap @@ -356,6 +359,7 @@ class LarkAdapter(adapter.MessagePlatformAdapter): self.listeners = {} self.message_id_to_card_id = {} self.card_id_dict = {} + self.seq = 0 @self.quart_app.route('/lark/callback', methods=['POST']) async def lark_callback(): @@ -401,54 +405,79 @@ class LarkAdapter(adapter.MessagePlatformAdapter): return {'code': 500, 'message': 'error'} - def is_stream_output_supported() -> bool: + async def is_stream_output_supported() -> bool: is_stream = False - if self.config.get("",None): + if self.config.get("enable-card-reply",None): is_stream = True + self.is_stream = is_stream return is_stream - async def create_card_id(): + async def create_card_id(message_id): try: - is_stream = is_stream_output_supported() + is_stream = await is_stream_output_supported() if is_stream: self.ap.logger.debug('飞书支持stream输出,创建卡片......') - card_id = '' - if self.card_id_dict: - card_id = [k for k,v in self.card_id_dict.items() if (v+datetime.timedelta(days=14))< datetime.datetime.now()][0] + # card_id = '' + # # if self.card_id_dict: + # # card_id = [k for k,v in self.card_id_dict.items() if (v+datetime.timedelta(days=14))< datetime.datetime.now()][0] + # + # if self.card_id_dict is None: + # # content = { + # # "type": "card_json", + # # "data": {"schema":"2.0","header":{"title":{"content":"bot","tag":"plain_text"}},"body":{"elements":[{"tag":"markdown","content":""}]}} + # # } + # card_data = {"schema":"2.0","header":{"title":{"content":"bot","tag":"plain_text"}}, + # "body":{"elements":[{"tag":"markdown","content":""}]},"config": {"streaming_mode": True, + # "streaming_config": {"print_strategy": "fast"}}} + # + # request: CreateCardRequest = CreateCardRequest.builder() \ + # .request_body( + # CreateCardRequestBody.builder() + # .type("card_json") + # .data(json.dumps(card_data)) \ + # .build() + # ).build() + # + # # 发起请求 + # response: CreateCardResponse = self.api_client.cardkit.v1.card.create(request) + # + # + # # 处理失败返回 + # if not response.success(): + # raise Exception( + # f"client.cardkit.v1.card.create failed, code: {response.code}, msg: {response.msg}, log_id: {response.get_log_id()}, resp: \n{json.dumps(json.loads(response.raw.content), indent=4, ensure_ascii=False)}") + # + # self.ap.logger.debug(f'飞书卡片创建成功,卡片ID: {response.data.card_id}') + # self.card_id_dict[response.data.card_id] = datetime.datetime.now() + # + # card_id = response.data.card_id + card_data = {"schema": "2.0", "header": {"title": {"content": "bot", "tag": "plain_text"}}, + "body": {"elements": [{"tag": "markdown", "content": "[思考中.....]","element_id":"markdown_1"}]}, + "config": {"streaming_mode": True, + "streaming_config": {"print_strategy": "fast"}}} - if self.card_id_dict is None or card_id == '': - # content = { - # "type": "card_json", - # "data": {"schema":"2.0","header":{"title":{"content":"bot","tag":"plain_text"}},"body":{"elements":[{"tag":"markdown","content":""}]}} - # } - card_data = {"schema":"2.0","header":{"title":{"content":"bot","tag":"plain_text"}}, - "body":{"elements":[{"tag":"markdown","content":""}]},"config": {"streaming_mode": True, - "streaming_config": {"print_strategy": "fast"}}} + request: CreateCardRequest = CreateCardRequest.builder() \ + .request_body( + CreateCardRequestBody.builder() + .type("card_json") + .data(json.dumps(card_data)) \ + .build() + ).build() - request: CreateCardRequest = ( - CreateCardRequest.builder() - .request_body( - CreateCardRequestBody.builder() - .type("card_json") - .data(json.dumps(card_data)) - .build() - ) - ) - # 发起请求 - response: CreateCardResponse = await self.api_client.im.v1.card.create(request) + # 发起请求 + response: CreateCardResponse = self.api_client.cardkit.v1.card.create(request) + # 处理失败返回 + if not response.success(): + raise Exception( + f"client.cardkit.v1.card.create failed, code: {response.code}, msg: {response.msg}, log_id: {response.get_log_id()}, resp: \n{json.dumps(json.loads(response.raw.content), indent=4, ensure_ascii=False)}") - # 处理失败返回 - if not response.success(): - raise Exception( - f"client.cardkit.v1.card.create failed, code: {response.code}, msg: {response.msg}, log_id: {response.get_log_id()}, resp: \n{json.dumps(json.loads(response.raw.content), indent=4, ensure_ascii=False)}") + self.ap.logger.debug(f'飞书卡片创建成功,卡片ID: {response.data.card_id}') + self.card_id_dict[message_id] = response.data.card_id - self.ap.logger.debug(f'飞书卡片创建成功,卡片ID: {response.data.card_id}') - self.card_id_dict[response.data.card_id] = datetime.datetime.now() - - card_id = response.data.card_id + card_id = response.data.card_id return card_id except Exception as e: @@ -458,10 +487,10 @@ class LarkAdapter(adapter.MessagePlatformAdapter): async def on_message(event: lark_oapi.im.v1.P2ImMessageReceiveV1): - if is_stream_output_supported(): + if await is_stream_output_supported(): self.ap.logger.debug('卡片回复模式开启') # 开启卡片回复模式. 这里可以实现飞书一发消息,马上创建卡片进行回复"思考中..." - card_id = await create_card_id() + card_id = await create_card_id(event.event.message.message_id) reply_message_id = await self.create_message_card(card_id, event.event.message.message_id) self.message_id_to_card_id[event.event.message.message_id] = (reply_message_id, time.time()) @@ -500,8 +529,8 @@ class LarkAdapter(adapter.MessagePlatformAdapter): # TODO 目前只支持卡片模板方式,且卡片变量一定是content,未来这块要做成可配置 # 发消息马上就会回复显示初始化的content信息,即思考中 content = { - 'type': 'template', - 'data': {'template_id': card_id, 'template_variable': {'content': 'Thinking...'}}, + 'type': 'card', + 'data': {'card_id': card_id, 'template_variable': {'content': 'Thinking...'}}, } request: ReplyMessageRequest = ( ReplyMessageRequest.builder() @@ -564,35 +593,49 @@ class LarkAdapter(adapter.MessagePlatformAdapter): async def reply_message_chunk( self, message_source: platform_events.MessageEvent, + message_id: str, message: platform_message.MessageChain, quote_origin: bool = False, + is_final: bool = False, ): """ 回复消息变成更新卡片消息 """ lark_message = await self.message_converter.yiri2target(message, self.api_client) + if not is_final: + self.seq += 1 + + + text_message = '' for ele in lark_message[0]: if ele['tag'] == 'text': text_message += ele['text'] elif ele['tag'] == 'md': text_message += ele['text'] + print(text_message) content = { - 'type': 'template', - 'data': {'template_id': self.config['card_template_id'], 'template_variable': {'content': text_message}}, + 'type': 'card_json', + 'data': {'card_id': self.card_id_dict[message_id], 'elements': {'content': text_message}}, } - request: PatchMessageRequest = ( - PatchMessageRequest.builder() - .message_id(self.message_id_to_card_id[message_source.message_chain.message_id][0]) - .request_body(PatchMessageRequestBody.builder().content(json.dumps(content)).build()) + request: ContentCardElementRequest = ContentCardElementRequest.builder() \ + .card_id(self.card_id_dict[message_id]) \ + .element_id("markdown_1") \ + .request_body(ContentCardElementRequestBody.builder() + # .uuid("a0d69e20-1dd1-458b-k525-dfeca4015204") + .content(text_message) + .sequence(self.seq) + .build()) \ .build() - ) + if is_final: + self.seq = 0 # 发起请求 - response: PatchMessageResponse = self.api_client.im.v1.message.patch(request) + response: ContentCardElementResponse = self.api_client.cardkit.v1.card_element.content(request) + # 处理失败返回 if not response.success(): diff --git a/pkg/provider/entities.py b/pkg/provider/entities.py index a149fea3..e8037e68 100644 --- a/pkg/provider/entities.py +++ b/pkg/provider/entities.py @@ -140,12 +140,12 @@ class MessageChunk(pydantic.BaseModel): content: typing.Optional[list[ContentElement]] | typing.Optional[str] = None """内容""" - # tool_calls: typing.Optional[list[ToolCall]] = None + tool_calls: typing.Optional[list[ToolCall]] = None """工具调用""" tool_call_id: typing.Optional[str] = None - tool_calls: typing.Optional[list[ToolCallChunk]] = None + # tool_calls: typing.Optional[list[ToolCallChunk]] = None is_final: bool = False diff --git a/pkg/provider/modelmgr/requester.py b/pkg/provider/modelmgr/requester.py index 3e5e791f..49a28f56 100644 --- a/pkg/provider/modelmgr/requester.py +++ b/pkg/provider/modelmgr/requester.py @@ -62,7 +62,7 @@ class LLMAPIRequester(metaclass=abc.ABCMeta): funcs: typing.List[tools_entities.LLMFunction] = None, stream: bool = False, extra_args: dict[str, typing.Any] = {}, - ) -> llm_entities.Message | typing.AsyncGenerator[llm_entities.MessageChunk, None]: + ) -> llm_entities.Message: """调用API Args: @@ -72,6 +72,29 @@ class LLMAPIRequester(metaclass=abc.ABCMeta): extra_args (dict[str, typing.Any], optional): 额外的参数. Defaults to {}. Returns: - llm_entities.Message | typing.AsyncGenerator[llm_entities.MessageChunk, None]: 返回消息对象 + llm_entities.Message | typing.AsyncGenerator[llm_entities.MessageChunk]: 返回消息对象 + """ + pass + + @abc.abstractmethod + async def invoke_llm_stream( + self, + query: core_entities.Query, + model: RuntimeLLMModel, + messages: typing.List[llm_entities.Message], + funcs: typing.List[tools_entities.LLMFunction] = None, + stream: bool = False, + extra_args: dict[str, typing.Any] = {}, + ) -> llm_entities.MessageChunk: + """调用API + + Args: + model (RuntimeLLMModel): 使用的模型信息 + messages (typing.List[llm_entities.Message]): 消息对象列表 + funcs (typing.List[tools_entities.LLMFunction], optional): 使用的工具函数列表. Defaults to None. + extra_args (dict[str, typing.Any], optional): 额外的参数. Defaults to {}. + + Returns: + llm_entities.Message | typing.AsyncGenerator[llm_entities.MessageChunk]: 返回消息对象 """ pass diff --git a/pkg/provider/modelmgr/requesters/chatcmpl.py b/pkg/provider/modelmgr/requesters/chatcmpl.py index 22931611..f06041fc 100644 --- a/pkg/provider/modelmgr/requesters/chatcmpl.py +++ b/pkg/provider/modelmgr/requesters/chatcmpl.py @@ -38,6 +38,15 @@ class OpenAIChatCompletions(requester.LLMAPIRequester): ) -> chat_completion.ChatCompletion: return await self.client.chat.completions.create(**args, extra_body=extra_body) + async def _req_stream( + self, + args: dict, + extra_body: dict = {}, + ) -> chat_completion.ChatCompletion: + + async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body): + yield chunk + async def _make_msg( self, chat_completion: chat_completion.ChatCompletion, @@ -62,9 +71,19 @@ class OpenAIChatCompletions(requester.LLMAPIRequester): self, chat_completion: chat_completion.ChatCompletion, ) -> llm_entities.MessageChunk: - choice = chat_completion.choices[0] - delta = choice.delta.model_dump() + + # 处理流式chunk和完整响应的差异 + # print(chat_completion.choices[0]) + if hasattr(chat_completion, 'choices'): + # 完整响应模式 + choice = chat_completion.choices[0] + delta = choice.delta.model_dump() if hasattr(choice, 'delta') else choice.message.model_dump() + else: + # 流式chunk模式 + delta = chat_completion.delta.model_dump() if hasattr(chat_completion, 'delta') else {} + # 确保 role 字段存在且不为 None + # print(delta) if 'role' not in delta or delta['role'] is None: delta['role'] = 'assistant' @@ -78,8 +97,8 @@ class OpenAIChatCompletions(requester.LLMAPIRequester): message = llm_entities.MessageChunk(**delta) return message - - async def _closure( + + async def _closure_stream( self, query: core_entities.Query, req_messages: list[dict], @@ -87,7 +106,7 @@ class OpenAIChatCompletions(requester.LLMAPIRequester): use_funcs: list[tools_entities.LLMFunction] = None, stream: bool = False, extra_args: dict[str, typing.Any] = {}, - ) -> llm_entities.Message: + ) -> llm_entities.Message | typing.AsyncGenerator[llm_entities.MessageChunk, None]: self.client.api_key = use_model.token_mgr.get_token() args = {} @@ -115,36 +134,76 @@ class OpenAIChatCompletions(requester.LLMAPIRequester): if stream: current_content = '' - async for chunk in await self._req(args, extra_body=extra_args): + args["stream"] = True + async for chunk in self._req_stream(args, extra_body=extra_args): + # print(chunk) # 处理流式消息 - delta_message = await self._make_msg_chunk( - chat_completion=chunk, - ) + delta_message = await self._make_msg_chunk(chunk) if delta_message.content: current_content += delta_message.content + delta_message.content = current_content + print(current_content) delta_message.all_content = current_content - - # 检查是否为最后一个块 - if chunk.choices[0].finish_reason is not None: + + # # 检查是否为最后一个块 + # if chunk.finish_reason is not None: + # delta_message.is_final = True + # + # yield delta_message + # 检查结束标志 + chunk_choices = getattr(chunk, 'choices', None) + if chunk_choices and getattr(chunk_choices[0], 'finish_reason', None): delta_message.is_final = True - yield delta_message - return - - else: + yield delta_message + # return - # 非流式请求 - resp = await self._req(args, extra_body=extra_args) - # 处理请求结果 - # 发送请求 - resp = await self._req(args, extra_body=extra_args) + + async def _closure( + self, + query: core_entities.Query, + req_messages: list[dict], + use_model: requester.RuntimeLLMModel, + use_funcs: list[tools_entities.LLMFunction] = None, + stream: bool = False, + extra_args: dict[str, typing.Any] = {}, + ) -> llm_entities.Message | typing.AsyncGenerator[llm_entities.MessageChunk, None]: + self.client.api_key = use_model.token_mgr.get_token() - # 处理请求结果 - message = await self._make_msg(resp) + args = {} + args['model'] = use_model.model_entity.name - return message - + if use_funcs: + tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) + + if tools: + args['tools'] = tools + + # 设置此次请求中的messages + messages = req_messages.copy() + + # 检查vision + for msg in messages: + if 'content' in msg and isinstance(msg['content'], list): + for me in msg['content']: + if me['type'] == 'image_base64': + me['image_url'] = {'url': me['image_base64']} + me['type'] = 'image_url' + del me['image_base64'] + + args['messages'] = messages + + + + # 发送请求 + + resp = await self._req(args, extra_body=extra_args) + # 处理请求结果 + message = await self._make_msg(resp) + + + return message @@ -171,8 +230,9 @@ class OpenAIChatCompletions(requester.LLMAPIRequester): req_messages.append(msg_dict) try: + if stream: - async for item in self._closure( + async for item in self._closure_stream( query=query, req_messages=req_messages, use_model=model, @@ -180,16 +240,17 @@ class OpenAIChatCompletions(requester.LLMAPIRequester): stream=stream, extra_args=extra_args, ): - yield item - return + return item else: - return await self._closure( + print(req_messages) + msg = await self._closure( query=query, req_messages=req_messages, use_model=model, use_funcs=funcs, extra_args=extra_args, ) + return msg except asyncio.TimeoutError: raise errors.RequesterError('请求超时') except openai.BadRequestError as e: @@ -205,3 +266,51 @@ class OpenAIChatCompletions(requester.LLMAPIRequester): raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}') except openai.APIError as e: raise errors.RequesterError(f'请求错误: {e.message}') + + async def invoke_llm_stream( + self, + query: core_entities.Query, + model: requester.RuntimeLLMModel, + messages: typing.List[llm_entities.Message], + funcs: typing.List[tools_entities.LLMFunction] = None, + stream: bool = False, + extra_args: dict[str, typing.Any] = {}, + ) -> llm_entities.MessageChunk: + req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行 + for m in messages: + msg_dict = m.dict(exclude_none=True) + content = msg_dict.get('content') + if isinstance(content, list): + # 检查 content 列表中是否每个部分都是文本 + if all(isinstance(part, dict) and part.get('type') == 'text' for part in content): + # 将所有文本部分合并为一个字符串 + msg_dict['content'] = '\n'.join(part['text'] for part in content) + req_messages.append(msg_dict) + + try: + if stream: + async for item in self._closure_stream( + query=query, + req_messages=req_messages, + use_model=model, + use_funcs=funcs, + stream=stream, + extra_args=extra_args, + ): + yield item + + except asyncio.TimeoutError: + raise errors.RequesterError('请求超时') + except openai.BadRequestError as e: + if 'context_length_exceeded' in e.message: + raise errors.RequesterError(f'上文过长,请重置会话: {e.message}') + else: + raise errors.RequesterError(f'请求参数错误: {e.message}') + except openai.AuthenticationError as e: + raise errors.RequesterError(f'无效的 api-key: {e.message}') + except openai.NotFoundError as e: + raise errors.RequesterError(f'请求路径错误: {e.message}') + except openai.RateLimitError as e: + raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}') + except openai.APIError as e: + raise errors.RequesterError(f'请求错误: {e.message}') \ No newline at end of file diff --git a/pkg/provider/runners/localagent.py b/pkg/provider/runners/localagent.py index 02b2db16..da97e334 100644 --- a/pkg/provider/runners/localagent.py +++ b/pkg/provider/runners/localagent.py @@ -24,25 +24,30 @@ class LocalAgentRunner(runner.RequestRunner): pending_tool_calls = [] req_messages = query.prompt.messages.copy() + query.messages.copy() + [query.user_message] - - is_stream = query.adapter.is_stream_output_supported() + try: + is_stream = query.adapter.is_stream + except AttributeError: + is_stream = False # while True: # pass if not is_stream: # 非流式输出,直接请求 + # print(123) msg = await query.use_llm_model.requester.invoke_llm( query, query.use_llm_model, req_messages, query.use_funcs, + is_stream, extra_args=query.use_llm_model.model_entity.extra_args, ) yield msg final_msg = msg + print(final_msg) else: # 流式输出,需要处理工具调用 tool_calls_map: dict[str, llm_entities.ToolCall] = {} - async for msg in await query.use_llm_model.requester.invoke_llm( + async for msg in query.use_llm_model.requester.invoke_llm_stream( query, query.use_llm_model, req_messages, @@ -51,20 +56,20 @@ class LocalAgentRunner(runner.RequestRunner): extra_args=query.use_llm_model.model_entity.extra_args, ): yield msg - if msg.tool_calls: - for tool_call in msg.tool_calls: - if tool_call.id not in tool_calls_map: - tool_calls_map[tool_call.id] = llm_entities.ToolCall( - id=tool_call.id, - type=tool_call.type, - function=llm_entities.FunctionCall( - name=tool_call.function.name if tool_call.function else '', - arguments='' - ), - ) - if tool_call.function and tool_call.function.arguments: - # 流式处理中,工具调用参数可能分多个chunk返回,需要追加而不是覆盖 - tool_calls_map[tool_call.id].function.arguments += tool_call.function.arguments + # if msg.tool_calls: + # for tool_call in msg.tool_calls: + # if tool_call.id not in tool_calls_map: + # tool_calls_map[tool_call.id] = llm_entities.ToolCall( + # id=tool_call.id, + # type=tool_call.type, + # function=llm_entities.FunctionCall( + # name=tool_call.function.name if tool_call.function else '', + # arguments='' + # ), + # ) + # if tool_call.function and tool_call.function.arguments: + # # 流式处理中,工具调用参数可能分多个chunk返回,需要追加而不是覆盖 + # tool_calls_map[tool_call.id].function.arguments += tool_call.function.arguments final_msg = llm_entities.Message( role=msg.role, content=msg.all_content, @@ -105,7 +110,7 @@ class LocalAgentRunner(runner.RequestRunner): if is_stream: tool_calls_map = {} - async for msg in await query.use_llm_model.requester.invoke_llm( + async for msg in await query.use_llm_model.requester.invoke_llm_stream( query, query.use_llm_model, req_messages, @@ -130,10 +135,11 @@ class LocalAgentRunner(runner.RequestRunner): tool_calls_map[tool_call.id].function.arguments += tool_call.function.arguments final_msg = llm_entities.Message( role=msg.role, - content=all_content, + content=msg.all_content, tool_calls=list(tool_calls_map.values()), ) else: + print("非流式") # 处理完所有调用,再次请求 msg = await query.use_llm_model.requester.invoke_llm( query, From a8d03c98dce0cdf25689fa1bb573d70105bd05b3 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Fri, 4 Jul 2025 11:37:31 +0800 Subject: [PATCH 011/257] doc: replace comshare link --- README.md | 2 +- README_EN.md | 2 +- README_JP.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2d4ce119..6e0fa350 100644 --- a/README.md +++ b/README.md @@ -119,7 +119,7 @@ docker compose up -d | [Anthropic](https://www.anthropic.com/) | ✅ | | | [xAI](https://x.ai/) | ✅ | | | [智谱AI](https://open.bigmodel.cn/) | ✅ | | -| [优云智算](https://www.compshare.cn/) | ✅ | 大模型和 GPU 资源平台 | +| [优云智算](https://www.compshare.cn/?ytag=GPU_YY-gh_langbot) | ✅ | 大模型和 GPU 资源平台 | | [PPIO](https://ppinfra.com/user/register?invited_by=QJKFYD&utm_source=github_langbot) | ✅ | 大模型和 GPU 资源平台 | | [302.AI](https://share.302.ai/SuTG99) | ✅ | 大模型聚合平台 | | [Google Gemini](https://aistudio.google.com/prompts/new_chat) | ✅ | | diff --git a/README_EN.md b/README_EN.md index 56425bd5..07667f84 100644 --- a/README_EN.md +++ b/README_EN.md @@ -116,7 +116,7 @@ Directly use the released version to run, see the [Manual Deployment](https://do | [Anthropic](https://www.anthropic.com/) | ✅ | | | [xAI](https://x.ai/) | ✅ | | | [Zhipu AI](https://open.bigmodel.cn/) | ✅ | | -| [CompShare](https://www.compshare.cn/) | ✅ | LLM and GPU resource platform | +| [CompShare](https://www.compshare.cn/?ytag=GPU_YY-gh_langbot) | ✅ | LLM and GPU resource platform | | [Dify](https://dify.ai) | ✅ | LLMOps platform | | [PPIO](https://ppinfra.com/user/register?invited_by=QJKFYD&utm_source=github_langbot) | ✅ | LLM and GPU resource platform | | [302.AI](https://share.302.ai/SuTG99) | ✅ | LLM gateway(MaaS) | diff --git a/README_JP.md b/README_JP.md index f0423401..c54ce51b 100644 --- a/README_JP.md +++ b/README_JP.md @@ -115,7 +115,7 @@ LangBotはBTPanelにリストされています。BTPanelをインストール | [Anthropic](https://www.anthropic.com/) | ✅ | | | [xAI](https://x.ai/) | ✅ | | | [Zhipu AI](https://open.bigmodel.cn/) | ✅ | | -| [CompShare](https://www.compshare.cn/) | ✅ | 大模型とGPUリソースプラットフォーム | +| [CompShare](https://www.compshare.cn/?ytag=GPU_YY-gh_langbot) | ✅ | 大模型とGPUリソースプラットフォーム | | [PPIO](https://ppinfra.com/user/register?invited_by=QJKFYD&utm_source=github_langbot) | ✅ | 大模型とGPUリソースプラットフォーム | | [302.AI](https://share.302.ai/SuTG99) | ✅ | LLMゲートウェイ(MaaS) | | [Google Gemini](https://aistudio.google.com/prompts/new_chat) | ✅ | | From a01706d16364a382b82e9121a97aabd66db00c00 Mon Sep 17 00:00:00 2001 From: "Junyan Qin (Chin)" Date: Sat, 5 Jul 2025 17:36:35 +0800 Subject: [PATCH 012/257] Feat/reset password (#1566) * feat: reset password with recovery key * perf: formatting and multi language --- pkg/api/http/controller/groups/user.py | 27 ++++ pkg/api/http/service/user.py | 9 ++ pkg/core/stages/genkeys.py | 7 + templates/config.yaml | 1 + web/package.json | 1 + web/src/app/infra/http/HttpClient.ts | 12 ++ web/src/app/login/page.tsx | 12 +- web/src/app/reset-password/layout.tsx | 15 ++ web/src/app/reset-password/page.tsx | 205 +++++++++++++++++++++++++ web/src/components/ui/input-otp.tsx | 77 ++++++++++ web/src/i18n/locales/en-US.ts | 20 +++ web/src/i18n/locales/ja-JP.ts | 20 +++ web/src/i18n/locales/zh-Hans.ts | 18 +++ 13 files changed, 423 insertions(+), 1 deletion(-) create mode 100644 web/src/app/reset-password/layout.tsx create mode 100644 web/src/app/reset-password/page.tsx create mode 100644 web/src/components/ui/input-otp.tsx diff --git a/pkg/api/http/controller/groups/user.py b/pkg/api/http/controller/groups/user.py index 498efaa4..3ad1335b 100644 --- a/pkg/api/http/controller/groups/user.py +++ b/pkg/api/http/controller/groups/user.py @@ -1,5 +1,6 @@ import quart import argon2 +import asyncio from .. import group @@ -40,3 +41,29 @@ class UserRouterGroup(group.RouterGroup): token = await self.ap.user_service.generate_jwt_token(user_email) return self.success(data={'token': token}) + + @self.route('/reset-password', methods=['POST'], auth_type=group.AuthType.NONE) + async def _() -> str: + json_data = await quart.request.json + + user_email = json_data['user'] + recovery_key = json_data['recovery_key'] + new_password = json_data['new_password'] + + # hard sleep 3s for security + await asyncio.sleep(3) + + if not await self.ap.user_service.is_initialized(): + return self.http_status(400, -1, 'system not initialized') + + user_obj = await self.ap.user_service.get_user_by_email(user_email) + + if user_obj is None: + return self.http_status(400, -1, 'user not found') + + if recovery_key != self.ap.instance_config.data['system']['recovery_key']: + return self.http_status(403, -1, 'invalid recovery key') + + await self.ap.user_service.reset_password(user_email, new_password) + + return self.success(data={'user': user_email}) diff --git a/pkg/api/http/service/user.py b/pkg/api/http/service/user.py index 782aad75..c724bfcf 100644 --- a/pkg/api/http/service/user.py +++ b/pkg/api/http/service/user.py @@ -73,3 +73,12 @@ class UserService: jwt_secret = self.ap.instance_config.data['system']['jwt']['secret'] return jwt.decode(token, jwt_secret, algorithms=['HS256'])['user'] + + async def reset_password(self, user_email: str, new_password: str) -> None: + ph = argon2.PasswordHasher() + + hashed_password = ph.hash(new_password) + + await self.ap.persistence_mgr.execute_async( + sqlalchemy.update(user.User).where(user.User.user == user_email).values(password=hashed_password) + ) diff --git a/pkg/core/stages/genkeys.py b/pkg/core/stages/genkeys.py index c24ebd70..50e7cf7b 100644 --- a/pkg/core/stages/genkeys.py +++ b/pkg/core/stages/genkeys.py @@ -15,3 +15,10 @@ class GenKeysStage(stage.BootingStage): if not ap.instance_config.data['system']['jwt']['secret']: ap.instance_config.data['system']['jwt']['secret'] = secrets.token_hex(16) await ap.instance_config.dump_config() + + if 'recovery_key' not in ap.instance_config.data['system']: + ap.instance_config.data['system']['recovery_key'] = '' + + if not ap.instance_config.data['system']['recovery_key']: + ap.instance_config.data['system']['recovery_key'] = secrets.token_hex(3).upper() + await ap.instance_config.dump_config() diff --git a/templates/config.yaml b/templates/config.yaml index 109cd8d7..d347af77 100644 --- a/templates/config.yaml +++ b/templates/config.yaml @@ -15,6 +15,7 @@ proxy: http: '' https: '' system: + recovery_key: '' jwt: expire: 604800 secret: '' diff --git a/web/package.json b/web/package.json index 17516ac4..458e4132 100644 --- a/web/package.json +++ b/web/package.json @@ -40,6 +40,7 @@ "clsx": "^2.1.1", "i18next": "^25.1.2", "i18next-browser-languagedetector": "^8.1.0", + "input-otp": "^1.4.2", "lodash": "^4.17.21", "lucide-react": "^0.507.0", "next": "15.2.4", diff --git a/web/src/app/infra/http/HttpClient.ts b/web/src/app/infra/http/HttpClient.ts index a86cdbe8..5193703b 100644 --- a/web/src/app/infra/http/HttpClient.ts +++ b/web/src/app/infra/http/HttpClient.ts @@ -492,6 +492,18 @@ class HttpClient { public checkUserToken(): Promise { return this.get('/api/v1/user/check-token'); } + + public resetPassword( + user: string, + recoveryKey: string, + newPassword: string, + ): Promise<{ user: string }> { + return this.post('/api/v1/user/reset-password', { + user, + recovery_key: recoveryKey, + new_password: newPassword, + }); + } } const getBaseURL = (): string => { diff --git a/web/src/app/login/page.tsx b/web/src/app/login/page.tsx index 9d4b3a17..d55e3fd3 100644 --- a/web/src/app/login/page.tsx +++ b/web/src/app/login/page.tsx @@ -34,6 +34,7 @@ import langbotIcon from '@/app/assets/langbot-logo.webp'; import { toast } from 'sonner'; import { useTranslation } from 'react-i18next'; import i18n from '@/i18n'; +import Link from 'next/link'; const formSchema = (t: (key: string) => string) => z.object({ @@ -209,7 +210,16 @@ export default function Login() { name="password" render={({ field }) => ( - {t('common.password')} +

+
diff --git a/web/src/app/reset-password/layout.tsx b/web/src/app/reset-password/layout.tsx new file mode 100644 index 00000000..5db7817e --- /dev/null +++ b/web/src/app/reset-password/layout.tsx @@ -0,0 +1,15 @@ +'use client'; + +import React from 'react'; + +export default function ResetPasswordLayout({ + children, +}: Readonly<{ + children: React.ReactNode; +}>) { + return ( +
+
{children}
+
+ ); +} diff --git a/web/src/app/reset-password/page.tsx b/web/src/app/reset-password/page.tsx new file mode 100644 index 00000000..30671595 --- /dev/null +++ b/web/src/app/reset-password/page.tsx @@ -0,0 +1,205 @@ +'use client'; +import { Button } from '@/components/ui/button'; +import { Input } from '@/components/ui/input'; +import { + Card, + CardContent, + CardHeader, + CardTitle, + CardDescription, +} from '@/components/ui/card'; +import { + InputOTP, + InputOTPGroup, + InputOTPSlot, + InputOTPSeparator, +} from '@/components/ui/input-otp'; +import { useForm } from 'react-hook-form'; +import { zodResolver } from '@hookform/resolvers/zod'; +import * as z from 'zod'; +import { + Form, + FormControl, + FormField, + FormItem, + FormLabel, + FormMessage, + FormDescription, +} from '@/components/ui/form'; +import { useState } from 'react'; +import { httpClient } from '@/app/infra/http/HttpClient'; +import { useRouter } from 'next/navigation'; +import { Mail, Lock, ArrowLeft } from 'lucide-react'; +import { toast } from 'sonner'; +import { useTranslation } from 'react-i18next'; +import Link from 'next/link'; + +const REGEXP_ONLY_DIGITS_AND_CHARS = /^[0-9a-zA-Z]+$/; + +const formSchema = (t: (key: string) => string) => + z.object({ + email: z.string().email(t('common.invalidEmail')), + recoveryKey: z.string().min(1, t('resetPassword.recoveryKeyRequired')), + newPassword: z.string().min(1, t('resetPassword.newPasswordRequired')), + }); + +export default function ResetPassword() { + const router = useRouter(); + const { t } = useTranslation(); + const [isResetting, setIsResetting] = useState(false); + + const form = useForm>>({ + resolver: zodResolver(formSchema(t)), + defaultValues: { + email: '', + recoveryKey: '', + newPassword: '', + }, + }); + + function onSubmit(values: z.infer>) { + handleResetPassword(values.email, values.recoveryKey, values.newPassword); + } + + function handleResetPassword( + email: string, + recoveryKey: string, + newPassword: string, + ) { + setIsResetting(true); + httpClient + .resetPassword(email, recoveryKey, newPassword) + .then((res) => { + console.log('reset password success: ', res); + toast.success(t('resetPassword.resetSuccess')); + router.push('/login'); + }) + .catch((err) => { + console.log('reset password error: ', err); + toast.error(t('resetPassword.resetFailed')); + }) + .finally(() => { + setIsResetting(false); + }); + } + + return ( +
+ + +
+ + + {t('resetPassword.backToLogin')} + +
+ + {t('resetPassword.title')} + + + {t('resetPassword.description')} + +
+ +
+ + ( + + {t('common.email')} + +
+ + +
+
+ +
+ )} + /> + + ( + + {t('resetPassword.recoveryKey')} + + {t('resetPassword.recoveryKeyDescription')} + + + { + // 将输入的值转换为大写 + const upperValue = value.toUpperCase(); + field.onChange(upperValue); + }} + > + + + + + + + + + + + + + + + + )} + /> + + ( + + {t('resetPassword.newPassword')} + +
+ + +
+
+ +
+ )} + /> + + + + +
+
+
+ ); +} diff --git a/web/src/components/ui/input-otp.tsx b/web/src/components/ui/input-otp.tsx new file mode 100644 index 00000000..26c5f7af --- /dev/null +++ b/web/src/components/ui/input-otp.tsx @@ -0,0 +1,77 @@ +'use client'; + +import * as React from 'react'; +import { OTPInput, OTPInputContext } from 'input-otp'; +import { MinusIcon } from 'lucide-react'; + +import { cn } from '@/lib/utils'; + +function InputOTP({ + className, + containerClassName, + ...props +}: React.ComponentProps & { + containerClassName?: string; +}) { + return ( + + ); +} + +function InputOTPGroup({ className, ...props }: React.ComponentProps<'div'>) { + return ( +
+ ); +} + +function InputOTPSlot({ + index, + className, + ...props +}: React.ComponentProps<'div'> & { + index: number; +}) { + const inputOTPContext = React.useContext(OTPInputContext); + const { char, hasFakeCaret, isActive } = inputOTPContext?.slots[index] ?? {}; + + return ( +
+ {char} + {hasFakeCaret && ( +
+
+
+ )} +
+ ); +} + +function InputOTPSeparator({ ...props }: React.ComponentProps<'div'>) { + return ( +
+ +
+ ); +} + +export { InputOTP, InputOTPGroup, InputOTPSlot, InputOTPSeparator }; diff --git a/web/src/i18n/locales/en-US.ts b/web/src/i18n/locales/en-US.ts index 0e171e4b..1975a521 100644 --- a/web/src/i18n/locales/en-US.ts +++ b/web/src/i18n/locales/en-US.ts @@ -39,6 +39,7 @@ const enUS = { addRound: 'Add Round', copySuccess: 'Copy Successfully', test: 'Test', + forgotPassword: 'Forgot Password?', }, notFound: { title: 'Page not found', @@ -239,6 +240,25 @@ const enUS = { initSuccess: 'Initialization successful, please login', initFailed: 'Initialization failed: ', }, + resetPassword: { + title: 'Reset Password 🔐', + description: + 'Enter your recovery key and new password to reset your account password', + recoveryKey: 'Recovery Key', + recoveryKeyDescription: + 'Stored in `system.recovery_key` of config file `data/config.yaml`', + newPassword: 'New Password', + enterRecoveryKey: 'Enter recovery key', + enterNewPassword: 'Enter new password', + recoveryKeyRequired: 'Recovery key cannot be empty', + newPasswordRequired: 'New password cannot be empty', + resetPassword: 'Reset Password', + resetting: 'Resetting...', + resetSuccess: 'Password reset successfully, please login', + resetFailed: + 'Password reset failed, please check your email and recovery key', + backToLogin: 'Back to Login', + }, }; export default enUS; diff --git a/web/src/i18n/locales/ja-JP.ts b/web/src/i18n/locales/ja-JP.ts index f1783a35..bac6f805 100644 --- a/web/src/i18n/locales/ja-JP.ts +++ b/web/src/i18n/locales/ja-JP.ts @@ -40,6 +40,7 @@ const jaJP = { addRound: 'ラウンドを追加', copySuccess: 'コピーに成功しました', test: 'テスト', + forgotPassword: 'パスワードを忘れた?', }, notFound: { title: 'ページが見つかりません', @@ -240,6 +241,25 @@ const jaJP = { initSuccess: '初期化に成功しました。ログインしてください', initFailed: '初期化に失敗しました:', }, + resetPassword: { + title: 'パスワードをリセット 🔐', + description: + '復旧キーと新しいパスワードを入力して、アカウントのパスワードをリセットします', + recoveryKey: '復旧キー', + recoveryKeyDescription: + '設定ファイル `data/config.yaml` の `system.recovery_key` に保存されています', + newPassword: '新しいパスワード', + enterRecoveryKey: '復旧キーを入力', + enterNewPassword: '新しいパスワードを入力', + recoveryKeyRequired: '復旧キーは必須です', + newPasswordRequired: '新しいパスワードは必須です', + resetPassword: 'パスワードをリセット', + resetting: 'リセット中...', + resetSuccess: 'パスワードのリセットに成功しました。ログインしてください', + resetFailed: + 'パスワードのリセットに失敗しました。メールアドレスと復旧キーを確認してください', + backToLogin: 'ログインに戻る', + }, }; export default jaJP; diff --git a/web/src/i18n/locales/zh-Hans.ts b/web/src/i18n/locales/zh-Hans.ts index 2a960131..2ded8236 100644 --- a/web/src/i18n/locales/zh-Hans.ts +++ b/web/src/i18n/locales/zh-Hans.ts @@ -39,6 +39,7 @@ const zhHans = { addRound: '添加回合', copySuccess: '复制成功', test: '测试', + forgotPassword: '忘记密码?', }, notFound: { title: '页面不存在', @@ -233,6 +234,23 @@ const zhHans = { initSuccess: '初始化成功 请登录', initFailed: '初始化失败:', }, + resetPassword: { + title: '重置密码 🔐', + description: '输入恢复密钥和新的密码来重置您的账户密码', + recoveryKey: '恢复密钥', + recoveryKeyDescription: + '存储在配置文件`data/config.yaml`的`system.recovery_key`中', + newPassword: '新密码', + enterRecoveryKey: '输入恢复密钥', + enterNewPassword: '输入新密码', + recoveryKeyRequired: '恢复密钥不能为空', + newPasswordRequired: '新密码不能为空', + resetPassword: '重置密码', + resetting: '重置中...', + resetSuccess: '密码重置成功,请登录', + resetFailed: '密码重置失败,请检查邮箱和恢复密钥是否正确', + backToLogin: '返回登录', + }, }; export default zhHans; From c81d5a1a49194e2e806801f53c32b405f12627e7 Mon Sep 17 00:00:00 2001 From: "devin-ai-integration[bot]" <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 21 May 2025 12:42:39 +0800 Subject: [PATCH 013/257] feat: add embeddings model management (#1461) * feat: add embeddings model management backend support Co-Authored-By: Junyan Qin * feat: add embeddings model management frontend support Co-Authored-By: Junyan Qin * chore: revert HttpClient URL to production setting Co-Authored-By: Junyan Qin * refactor: integrate embeddings models into models page with tabs Co-Authored-By: Junyan Qin * perf: move files * perf: remove `s` * feat: allow requester to declare supported types in manifest * feat(embedding): delete dimension and encoding format * feat: add extra_args for embedding moels * perf: i18n ref * fix: linter err * fix: lint err * fix: linter err --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: Junyan Qin --- .../http/controller/groups/provider/models.py | 55 +- .../controller/groups/provider/requesters.py | 3 +- pkg/api/http/service/model.py | 89 ++- pkg/core/app.py | 4 +- pkg/core/stages/build_app.py | 7 +- pkg/entity/persistence/model.py | 21 + pkg/provider/modelmgr/entities.py | 2 +- pkg/provider/modelmgr/modelmgr.py | 89 ++- pkg/provider/modelmgr/requester.py | 51 +- .../modelmgr/requesters/anthropicmsgs.py | 2 +- .../modelmgr/requesters/anthropicmsgs.yaml | 2 + .../modelmgr/requesters/bailianchatcmpl.yaml | 2 + pkg/provider/modelmgr/requesters/chatcmpl.py | 38 +- .../modelmgr/requesters/chatcmpl.yaml | 3 + .../modelmgr/requesters/deepseekchatcmpl.yaml | 2 + .../modelmgr/requesters/geminichatcmpl.yaml | 2 + .../modelmgr/requesters/giteeaichatcmpl.yaml | 2 + .../modelmgr/requesters/lmstudiochatcmpl.yaml | 2 + .../modelmgr/requesters/modelscopechatcmpl.py | 2 +- .../requesters/modelscopechatcmpl.yaml | 2 + .../modelmgr/requesters/moonshotchatcmpl.yaml | 2 + .../modelmgr/requesters/ollamachat.py | 2 +- .../modelmgr/requesters/ollamachat.yaml | 2 + .../requesters/openrouterchatcmpl.yaml | 2 + .../modelmgr/requesters/ppiochatcmpl.yaml | 2 + .../requesters/siliconflowchatcmpl.yaml | 2 + .../modelmgr/requesters/volcarkchatcmpl.yaml | 2 + .../modelmgr/requesters/xaichatcmpl.yaml | 2 + .../modelmgr/requesters/zhipuaichatcmpl.yaml | 2 + .../home-sidebar/sidbarConfigList.tsx | 1 + .../{llm-form => }/ChooseRequesterEntity.ts | 0 .../models/component/ICreateEmbeddingField.ts | 7 + .../models/{ => component}/ICreateLLMField.ts | 0 .../embedding-card/EmbeddingCard.module.css | 97 +++ .../embedding-card/EmbeddingCard.tsx | 53 ++ .../embedding-card/EmbeddingCardVO.ts | 23 + .../embedding-form/EmbeddingForm.tsx | 563 ++++++++++++++++++ .../models/component/llm-form/LLMForm.tsx | 8 +- web/src/app/home/models/page.tsx | 183 +++++- web/src/app/infra/entities/api/index.ts | 23 + web/src/app/infra/http/HttpClient.ts | 42 +- web/src/i18n/locales/en-US.ts | 18 +- web/src/i18n/locales/zh-Hans.ts | 18 +- 43 files changed, 1370 insertions(+), 64 deletions(-) rename web/src/app/home/models/component/{llm-form => }/ChooseRequesterEntity.ts (100%) create mode 100644 web/src/app/home/models/component/ICreateEmbeddingField.ts rename web/src/app/home/models/{ => component}/ICreateLLMField.ts (100%) create mode 100644 web/src/app/home/models/component/embedding-card/EmbeddingCard.module.css create mode 100644 web/src/app/home/models/component/embedding-card/EmbeddingCard.tsx create mode 100644 web/src/app/home/models/component/embedding-card/EmbeddingCardVO.ts create mode 100644 web/src/app/home/models/component/embedding-form/EmbeddingForm.tsx diff --git a/pkg/api/http/controller/groups/provider/models.py b/pkg/api/http/controller/groups/provider/models.py index bb77986c..0de0c922 100644 --- a/pkg/api/http/controller/groups/provider/models.py +++ b/pkg/api/http/controller/groups/provider/models.py @@ -9,18 +9,18 @@ class LLMModelsRouterGroup(group.RouterGroup): @self.route('', methods=['GET', 'POST']) async def _() -> str: if quart.request.method == 'GET': - return self.success(data={'models': await self.ap.model_service.get_llm_models()}) + return self.success(data={'models': await self.ap.llm_model_service.get_llm_models()}) elif quart.request.method == 'POST': json_data = await quart.request.json - model_uuid = await self.ap.model_service.create_llm_model(json_data) + model_uuid = await self.ap.llm_model_service.create_llm_model(json_data) return self.success(data={'uuid': model_uuid}) @self.route('/', methods=['GET', 'PUT', 'DELETE']) async def _(model_uuid: str) -> str: if quart.request.method == 'GET': - model = await self.ap.model_service.get_llm_model(model_uuid) + model = await self.ap.llm_model_service.get_llm_model(model_uuid) if model is None: return self.http_status(404, -1, 'model not found') @@ -29,11 +29,11 @@ class LLMModelsRouterGroup(group.RouterGroup): elif quart.request.method == 'PUT': json_data = await quart.request.json - await self.ap.model_service.update_llm_model(model_uuid, json_data) + await self.ap.llm_model_service.update_llm_model(model_uuid, json_data) return self.success() elif quart.request.method == 'DELETE': - await self.ap.model_service.delete_llm_model(model_uuid) + await self.ap.llm_model_service.delete_llm_model(model_uuid) return self.success() @@ -41,6 +41,49 @@ class LLMModelsRouterGroup(group.RouterGroup): async def _(model_uuid: str) -> str: json_data = await quart.request.json - await self.ap.model_service.test_llm_model(model_uuid, json_data) + await self.ap.llm_model_service.test_llm_model(model_uuid, json_data) + + return self.success() + + +@group.group_class('models/embedding', '/api/v1/provider/models/embedding') +class EmbeddingModelsRouterGroup(group.RouterGroup): + async def initialize(self) -> None: + @self.route('', methods=['GET', 'POST']) + async def _() -> str: + if quart.request.method == 'GET': + return self.success(data={'models': await self.ap.embedding_models_service.get_embedding_models()}) + elif quart.request.method == 'POST': + json_data = await quart.request.json + + model_uuid = await self.ap.embedding_models_service.create_embedding_model(json_data) + + return self.success(data={'uuid': model_uuid}) + + @self.route('/', methods=['GET', 'PUT', 'DELETE']) + async def _(model_uuid: str) -> str: + if quart.request.method == 'GET': + model = await self.ap.embedding_models_service.get_embedding_model(model_uuid) + + if model is None: + return self.http_status(404, -1, 'model not found') + + return self.success(data={'model': model}) + elif quart.request.method == 'PUT': + json_data = await quart.request.json + + await self.ap.embedding_models_service.update_embedding_model(model_uuid, json_data) + + return self.success() + elif quart.request.method == 'DELETE': + await self.ap.embedding_models_service.delete_embedding_model(model_uuid) + + return self.success() + + @self.route('//test', methods=['POST']) + async def _(model_uuid: str) -> str: + json_data = await quart.request.json + + await self.ap.embedding_models_service.test_embedding_model(model_uuid, json_data) return self.success() diff --git a/pkg/api/http/controller/groups/provider/requesters.py b/pkg/api/http/controller/groups/provider/requesters.py index 0f999288..af9e1540 100644 --- a/pkg/api/http/controller/groups/provider/requesters.py +++ b/pkg/api/http/controller/groups/provider/requesters.py @@ -8,7 +8,8 @@ class RequestersRouterGroup(group.RouterGroup): async def initialize(self) -> None: @self.route('', methods=['GET']) async def _() -> quart.Response: - return self.success(data={'requesters': self.ap.model_mgr.get_available_requesters_info()}) + model_type = quart.request.args.get('type', '') + return self.success(data={'requesters': self.ap.model_mgr.get_available_requesters_info(model_type)}) @self.route('/', methods=['GET']) async def _(requester_name: str) -> quart.Response: diff --git a/pkg/api/http/service/model.py b/pkg/api/http/service/model.py index 74fb4e02..afeae3eb 100644 --- a/pkg/api/http/service/model.py +++ b/pkg/api/http/service/model.py @@ -10,7 +10,7 @@ from ....provider.modelmgr import requester as model_requester from ....provider import entities as llm_entities -class ModelsService: +class LLMModelsService: ap: app.Application def __init__(self, ap: app.Application) -> None: @@ -103,3 +103,90 @@ class ModelsService: funcs=[], extra_args={}, ) + + +class EmbeddingModelsService: + ap: app.Application + + def __init__(self, ap: app.Application) -> None: + self.ap = ap + + async def get_embedding_models(self) -> list[dict]: + result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_model.EmbeddingModel)) + + models = result.all() + return [self.ap.persistence_mgr.serialize_model(persistence_model.EmbeddingModel, model) for model in models] + + async def create_embedding_model(self, model_data: dict) -> str: + model_data['uuid'] = str(uuid.uuid4()) + + await self.ap.persistence_mgr.execute_async( + sqlalchemy.insert(persistence_model.EmbeddingModel).values(**model_data) + ) + + embedding_model = await self.get_embedding_model(model_data['uuid']) + + await self.ap.model_mgr.load_embedding_model(embedding_model) + + return model_data['uuid'] + + async def get_embedding_model(self, model_uuid: str) -> dict | None: + result = await self.ap.persistence_mgr.execute_async( + sqlalchemy.select(persistence_model.EmbeddingModel).where( + persistence_model.EmbeddingModel.uuid == model_uuid + ) + ) + + model = result.first() + + if model is None: + return None + + return self.ap.persistence_mgr.serialize_model(persistence_model.EmbeddingModel, model) + + async def update_embedding_model(self, model_uuid: str, model_data: dict) -> None: + if 'uuid' in model_data: + del model_data['uuid'] + + await self.ap.persistence_mgr.execute_async( + sqlalchemy.update(persistence_model.EmbeddingModel) + .where(persistence_model.EmbeddingModel.uuid == model_uuid) + .values(**model_data) + ) + + await self.ap.model_mgr.remove_embedding_model(model_uuid) + + embedding_model = await self.get_embedding_model(model_uuid) + + await self.ap.model_mgr.load_embedding_model(embedding_model) + + async def delete_embedding_model(self, model_uuid: str) -> None: + await self.ap.persistence_mgr.execute_async( + sqlalchemy.delete(persistence_model.EmbeddingModel).where( + persistence_model.EmbeddingModel.uuid == model_uuid + ) + ) + + await self.ap.model_mgr.remove_embedding_model(model_uuid) + + async def test_embedding_model(self, model_uuid: str, model_data: dict) -> None: + runtime_embedding_model: model_requester.RuntimeEmbeddingModel | None = None + + if model_uuid != '_': + for model in self.ap.model_mgr.embedding_models: + if model.model_entity.uuid == model_uuid: + runtime_embedding_model = model + break + + if runtime_embedding_model is None: + raise Exception('model not found') + + else: + runtime_embedding_model = await self.ap.model_mgr.init_runtime_embedding_model(model_data) + + await runtime_embedding_model.requester.invoke_embedding( + query=None, + model=runtime_embedding_model, + input_text='Hello, world!', + extra_args={}, + ) diff --git a/pkg/core/app.py b/pkg/core/app.py index 911acd3d..318cddcb 100644 --- a/pkg/core/app.py +++ b/pkg/core/app.py @@ -103,7 +103,9 @@ class Application: user_service: user_service.UserService = None - model_service: model_service.ModelsService = None + llm_model_service: model_service.LLMModelsService = None + + embedding_models_service: model_service.EmbeddingModelsService = None pipeline_service: pipeline_service.PipelineService = None diff --git a/pkg/core/stages/build_app.py b/pkg/core/stages/build_app.py index 6ee35610..482a468b 100644 --- a/pkg/core/stages/build_app.py +++ b/pkg/core/stages/build_app.py @@ -95,8 +95,11 @@ class BuildAppStage(stage.BootingStage): user_service_inst = user_service.UserService(ap) ap.user_service = user_service_inst - model_service_inst = model_service.ModelsService(ap) - ap.model_service = model_service_inst + llm_model_service_inst = model_service.LLMModelsService(ap) + ap.llm_model_service = llm_model_service_inst + + embedding_models_service_inst = model_service.EmbeddingModelsService(ap) + ap.embedding_models_service = embedding_models_service_inst pipeline_service_inst = pipeline_service.PipelineService(ap) ap.pipeline_service = pipeline_service_inst diff --git a/pkg/entity/persistence/model.py b/pkg/entity/persistence/model.py index 9eb2ccef..418cab70 100644 --- a/pkg/entity/persistence/model.py +++ b/pkg/entity/persistence/model.py @@ -23,3 +23,24 @@ class LLMModel(Base): server_default=sqlalchemy.func.now(), onupdate=sqlalchemy.func.now(), ) + + +class EmbeddingModel(Base): + """Embedding 模型""" + + __tablename__ = 'embedding_models' + + uuid = sqlalchemy.Column(sqlalchemy.String(255), primary_key=True, unique=True) + name = sqlalchemy.Column(sqlalchemy.String(255), nullable=False) + description = sqlalchemy.Column(sqlalchemy.String(255), nullable=False) + requester = sqlalchemy.Column(sqlalchemy.String(255), nullable=False) + requester_config = sqlalchemy.Column(sqlalchemy.JSON, nullable=False, default={}) + api_keys = sqlalchemy.Column(sqlalchemy.JSON, nullable=False) + extra_args = sqlalchemy.Column(sqlalchemy.JSON, nullable=False, default={}) + created_at = sqlalchemy.Column(sqlalchemy.DateTime, nullable=False, server_default=sqlalchemy.func.now()) + updated_at = sqlalchemy.Column( + sqlalchemy.DateTime, + nullable=False, + server_default=sqlalchemy.func.now(), + onupdate=sqlalchemy.func.now(), + ) diff --git a/pkg/provider/modelmgr/entities.py b/pkg/provider/modelmgr/entities.py index cf856894..7bc02a32 100644 --- a/pkg/provider/modelmgr/entities.py +++ b/pkg/provider/modelmgr/entities.py @@ -17,7 +17,7 @@ class LLMModelInfo(pydantic.BaseModel): token_mgr: token.TokenManager - requester: requester.LLMAPIRequester + requester: requester.ProviderAPIRequester tool_call_supported: typing.Optional[bool] = False diff --git a/pkg/provider/modelmgr/modelmgr.py b/pkg/provider/modelmgr/modelmgr.py index b15e53a9..2c92eacc 100644 --- a/pkg/provider/modelmgr/modelmgr.py +++ b/pkg/provider/modelmgr/modelmgr.py @@ -18,7 +18,7 @@ class ModelManager: model_list: list[entities.LLMModelInfo] # deprecated - requesters: dict[str, requester.LLMAPIRequester] # deprecated + requesters: dict[str, requester.ProviderAPIRequester] # deprecated token_mgrs: dict[str, token.TokenManager] # deprecated @@ -28,9 +28,11 @@ class ModelManager: llm_models: list[requester.RuntimeLLMModel] + embedding_models: list[requester.RuntimeEmbeddingModel] + requester_components: list[engine.Component] - requester_dict: dict[str, type[requester.LLMAPIRequester]] # cache + requester_dict: dict[str, type[requester.ProviderAPIRequester]] # cache def __init__(self, ap: app.Application): self.ap = ap @@ -38,6 +40,7 @@ class ModelManager: self.requesters = {} self.token_mgrs = {} self.llm_models = [] + self.embedding_models = [] self.requester_components = [] self.requester_dict = {} @@ -45,7 +48,7 @@ class ModelManager: self.requester_components = self.ap.discover.get_components_by_kind('LLMAPIRequester') # forge requester class dict - requester_dict: dict[str, type[requester.LLMAPIRequester]] = {} + requester_dict: dict[str, type[requester.ProviderAPIRequester]] = {} for component in self.requester_components: requester_dict[component.metadata.name] = component.get_python_component_class() @@ -58,13 +61,11 @@ class ModelManager: self.ap.logger.info('Loading models from db...') self.llm_models = [] + self.embedding_models = [] # llm models result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_model.LLMModel)) - llm_models = result.all() - - # load models for llm_model in llm_models: try: await self.load_llm_model(llm_model) @@ -73,11 +74,17 @@ class ModelManager: except Exception as e: self.ap.logger.error(f'Failed to load model {llm_model.uuid}: {e}\n{traceback.format_exc()}') + # embedding models + result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_model.EmbeddingModel)) + embedding_models = result.all() + for embedding_model in embedding_models: + await self.load_embedding_model(embedding_model) + async def init_runtime_llm_model( self, model_info: persistence_model.LLMModel | sqlalchemy.Row[persistence_model.LLMModel] | dict, ): - """初始化运行时模型""" + """初始化运行时 LLM 模型""" if isinstance(model_info, sqlalchemy.Row): model_info = persistence_model.LLMModel(**model_info._mapping) elif isinstance(model_info, dict): @@ -101,14 +108,47 @@ class ModelManager: return runtime_llm_model + async def init_runtime_embedding_model( + self, + model_info: persistence_model.EmbeddingModel | sqlalchemy.Row[persistence_model.EmbeddingModel] | dict, + ): + """初始化运行时 Embedding 模型""" + if isinstance(model_info, sqlalchemy.Row): + model_info = persistence_model.EmbeddingModel(**model_info._mapping) + elif isinstance(model_info, dict): + model_info = persistence_model.EmbeddingModel(**model_info) + + requester_inst = self.requester_dict[model_info.requester](ap=self.ap, config=model_info.requester_config) + + await requester_inst.initialize() + + runtime_embedding_model = requester.RuntimeEmbeddingModel( + model_entity=model_info, + token_mgr=token.TokenManager( + name=model_info.uuid, + tokens=model_info.api_keys, + ), + requester=requester_inst, + ) + + return runtime_embedding_model + async def load_llm_model( self, model_info: persistence_model.LLMModel | sqlalchemy.Row[persistence_model.LLMModel] | dict, ): - """加载模型""" + """加载 LLM 模型""" runtime_llm_model = await self.init_runtime_llm_model(model_info) self.llm_models.append(runtime_llm_model) + async def load_embedding_model( + self, + model_info: persistence_model.EmbeddingModel | sqlalchemy.Row[persistence_model.EmbeddingModel] | dict, + ): + """加载 Embedding 模型""" + runtime_embedding_model = await self.init_runtime_embedding_model(model_info) + self.embedding_models.append(runtime_embedding_model) + async def get_model_by_name(self, name: str) -> entities.LLMModelInfo: # deprecated """通过名称获取模型""" for model in self.model_list: @@ -116,23 +156,44 @@ class ModelManager: return model raise ValueError(f'无法确定模型 {name} 的信息') - async def get_model_by_uuid(self, uuid: str) -> entities.LLMModelInfo: - """通过uuid获取模型""" + async def get_model_by_uuid(self, uuid: str) -> requester.RuntimeLLMModel: + """通过uuid获取 LLM 模型""" for model in self.llm_models: if model.model_entity.uuid == uuid: return model - raise ValueError(f'model {uuid} not found') + raise ValueError(f'LLM model {uuid} not found') + + async def get_embedding_model_by_uuid(self, uuid: str) -> requester.RuntimeEmbeddingModel: + """通过uuid获取 Embedding 模型""" + for model in self.embedding_models: + if model.model_entity.uuid == uuid: + return model + raise ValueError(f'Embedding model {uuid} not found') async def remove_llm_model(self, model_uuid: str): - """移除模型""" + """移除 LLM 模型""" for model in self.llm_models: if model.model_entity.uuid == model_uuid: self.llm_models.remove(model) return - def get_available_requesters_info(self) -> list[dict]: + async def remove_embedding_model(self, model_uuid: str): + """移除 Embedding 模型""" + for model in self.embedding_models: + if model.model_entity.uuid == model_uuid: + self.embedding_models.remove(model) + return + + def get_available_requesters_info(self, model_type: str) -> list[dict]: """获取所有可用的请求器""" - return [component.to_plain_dict() for component in self.requester_components] + if model_type != '': + return [ + component.to_plain_dict() + for component in self.requester_components + if model_type in component.spec['support_type'] + ] + else: + return [component.to_plain_dict() for component in self.requester_components] def get_available_requester_info_by_name(self, name: str) -> dict | None: """通过名称获取请求器信息""" diff --git a/pkg/provider/modelmgr/requester.py b/pkg/provider/modelmgr/requester.py index 244f4c82..9742a52c 100644 --- a/pkg/provider/modelmgr/requester.py +++ b/pkg/provider/modelmgr/requester.py @@ -20,22 +20,45 @@ class RuntimeLLMModel: token_mgr: token.TokenManager """api key管理器""" - requester: LLMAPIRequester + requester: ProviderAPIRequester """请求器实例""" def __init__( self, model_entity: persistence_model.LLMModel, token_mgr: token.TokenManager, - requester: LLMAPIRequester, + requester: ProviderAPIRequester, ): self.model_entity = model_entity self.token_mgr = token_mgr self.requester = requester -class LLMAPIRequester(metaclass=abc.ABCMeta): - """LLM API请求器""" +class RuntimeEmbeddingModel: + """运行时 Embedding 模型""" + + model_entity: persistence_model.EmbeddingModel + """模型数据""" + + token_mgr: token.TokenManager + """api key管理器""" + + requester: ProviderAPIRequester + """请求器实例""" + + def __init__( + self, + model_entity: persistence_model.EmbeddingModel, + token_mgr: token.TokenManager, + requester: ProviderAPIRequester, + ): + self.model_entity = model_entity + self.token_mgr = token_mgr + self.requester = requester + + +class ProviderAPIRequester(metaclass=abc.ABCMeta): + """Provider API请求器""" name: str = None @@ -74,3 +97,23 @@ class LLMAPIRequester(metaclass=abc.ABCMeta): llm_entities.Message: 返回消息对象 """ pass + + async def invoke_embedding( + self, + query: core_entities.Query, + model: RuntimeEmbeddingModel, + input_text: str, + extra_args: dict[str, typing.Any] = {}, + ) -> list[float]: + """调用 Embedding API + + Args: + query (core_entities.Query): 请求上下文 + model (RuntimeEmbeddingModel): 使用的模型信息 + input_text (str): 输入文本 + extra_args (dict[str, typing.Any], optional): 额外的参数. Defaults to {}. + + Returns: + list[float]: 返回的 embedding 向量 + """ + pass diff --git a/pkg/provider/modelmgr/requesters/anthropicmsgs.py b/pkg/provider/modelmgr/requesters/anthropicmsgs.py index 38573854..b195ae51 100644 --- a/pkg/provider/modelmgr/requesters/anthropicmsgs.py +++ b/pkg/provider/modelmgr/requesters/anthropicmsgs.py @@ -15,7 +15,7 @@ from ...tools import entities as tools_entities from ....utils import image -class AnthropicMessages(requester.LLMAPIRequester): +class AnthropicMessages(requester.ProviderAPIRequester): """Anthropic Messages API 请求器""" client: anthropic.AsyncAnthropic diff --git a/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml b/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml index c124fed9..7dbcf3ed 100644 --- a/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml +++ b/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./anthropicmsgs.py diff --git a/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml b/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml index 24beb915..10aae30f 100644 --- a/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./bailianchatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/chatcmpl.py b/pkg/provider/modelmgr/requesters/chatcmpl.py index 513086e5..98d1f13a 100644 --- a/pkg/provider/modelmgr/requesters/chatcmpl.py +++ b/pkg/provider/modelmgr/requesters/chatcmpl.py @@ -13,7 +13,7 @@ from ... import entities as llm_entities from ...tools import entities as tools_entities -class OpenAIChatCompletions(requester.LLMAPIRequester): +class OpenAIChatCompletions(requester.ProviderAPIRequester): """OpenAI ChatCompletion API 请求器""" client: openai.AsyncClient @@ -141,3 +141,39 @@ class OpenAIChatCompletions(requester.LLMAPIRequester): raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}') except openai.APIError as e: raise errors.RequesterError(f'请求错误: {e.message}') + + async def invoke_embedding( + self, + query: core_entities.Query, + model: requester.RuntimeEmbeddingModel, + input_text: str, + extra_args: dict[str, typing.Any] = {}, + ) -> list[float]: + """调用 Embedding API""" + self.client.api_key = model.token_mgr.get_token() + + args = { + 'model': model.model_entity.name, + 'input': input_text, + } + + if model.model_entity.extra_args: + args.update(model.model_entity.extra_args) + + args.update(extra_args) + + try: + resp = await self.client.embeddings.create(**args) + return resp.data[0].embedding + except asyncio.TimeoutError: + raise errors.RequesterError('请求超时') + except openai.BadRequestError as e: + raise errors.RequesterError(f'请求参数错误: {e.message}') + except openai.AuthenticationError as e: + raise errors.RequesterError(f'无效的 api-key: {e.message}') + except openai.NotFoundError as e: + raise errors.RequesterError(f'请求路径错误: {e.message}') + except openai.RateLimitError as e: + raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}') + except openai.APIError as e: + raise errors.RequesterError(f'请求错误: {e.message}') diff --git a/pkg/provider/modelmgr/requesters/chatcmpl.yaml b/pkg/provider/modelmgr/requesters/chatcmpl.yaml index 908b30ac..ff0de6f9 100644 --- a/pkg/provider/modelmgr/requesters/chatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/chatcmpl.yaml @@ -22,6 +22,9 @@ spec: type: integer required: true default: 120 + support_type: + - llm + - text-embedding execution: python: path: ./chatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml b/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml index ea2c7eea..6f320e66 100644 --- a/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./deepseekchatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml b/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml index 6bfc085e..73fca19c 100644 --- a/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./geminichatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml b/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml index a18675a1..3a79bb49 100644 --- a/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./giteeaichatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml b/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml index 893235b2..fbe57dad 100644 --- a/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./lmstudiochatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py b/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py index b8868f4d..4708f671 100644 --- a/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py +++ b/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py @@ -14,7 +14,7 @@ from ... import entities as llm_entities from ...tools import entities as tools_entities -class ModelScopeChatCompletions(requester.LLMAPIRequester): +class ModelScopeChatCompletions(requester.ProviderAPIRequester): """ModelScope ChatCompletion API 请求器""" client: openai.AsyncClient diff --git a/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml b/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml index a641a672..a926d889 100644 --- a/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml @@ -29,6 +29,8 @@ spec: type: int required: true default: 120 + support_type: + - llm execution: python: path: ./modelscopechatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml b/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml index f3ae73c8..52f7bcda 100644 --- a/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./moonshotchatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/ollamachat.py b/pkg/provider/modelmgr/requesters/ollamachat.py index 2ea4bb7d..1456515f 100644 --- a/pkg/provider/modelmgr/requesters/ollamachat.py +++ b/pkg/provider/modelmgr/requesters/ollamachat.py @@ -17,7 +17,7 @@ from ....core import entities as core_entities REQUESTER_NAME: str = 'ollama-chat' -class OllamaChatCompletions(requester.LLMAPIRequester): +class OllamaChatCompletions(requester.ProviderAPIRequester): """Ollama平台 ChatCompletion API请求器""" client: ollama.AsyncClient diff --git a/pkg/provider/modelmgr/requesters/ollamachat.yaml b/pkg/provider/modelmgr/requesters/ollamachat.yaml index 01435775..f4c4bf5a 100644 --- a/pkg/provider/modelmgr/requesters/ollamachat.yaml +++ b/pkg/provider/modelmgr/requesters/ollamachat.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./ollamachat.py diff --git a/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml b/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml index 2ecee6cc..ea35bce6 100644 --- a/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./openrouterchatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml b/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml index 9f201aa9..a5a3421c 100644 --- a/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml @@ -29,6 +29,8 @@ spec: type: int required: true default: 120 + support_type: + - llm execution: python: path: ./ppiochatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml b/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml index 19b3dcc3..3872cb6f 100644 --- a/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./siliconflowchatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml b/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml index 402f04e7..c711ef2d 100644 --- a/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./volcarkchatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml b/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml index 29db4eb3..2769a402 100644 --- a/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./xaichatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml b/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml index a05184ef..34539d95 100644 --- a/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./zhipuaichatcmpl.py diff --git a/web/src/app/home/components/home-sidebar/sidbarConfigList.tsx b/web/src/app/home/components/home-sidebar/sidbarConfigList.tsx index e21317d6..ef9c6f45 100644 --- a/web/src/app/home/components/home-sidebar/sidbarConfigList.tsx +++ b/web/src/app/home/components/home-sidebar/sidbarConfigList.tsx @@ -47,6 +47,7 @@ export const sidebarConfigList = [ zh_Hans: 'https://docs.langbot.app/zh/deploy/models/readme.html', }, }), + new SidebarChildVO({ id: 'pipelines', name: t('pipelines.title'), diff --git a/web/src/app/home/models/component/llm-form/ChooseRequesterEntity.ts b/web/src/app/home/models/component/ChooseRequesterEntity.ts similarity index 100% rename from web/src/app/home/models/component/llm-form/ChooseRequesterEntity.ts rename to web/src/app/home/models/component/ChooseRequesterEntity.ts diff --git a/web/src/app/home/models/component/ICreateEmbeddingField.ts b/web/src/app/home/models/component/ICreateEmbeddingField.ts new file mode 100644 index 00000000..ea198f3f --- /dev/null +++ b/web/src/app/home/models/component/ICreateEmbeddingField.ts @@ -0,0 +1,7 @@ +export interface ICreateEmbeddingField { + name: string; + model_provider: string; + url: string; + api_key: string; + extra_args?: string[]; +} diff --git a/web/src/app/home/models/ICreateLLMField.ts b/web/src/app/home/models/component/ICreateLLMField.ts similarity index 100% rename from web/src/app/home/models/ICreateLLMField.ts rename to web/src/app/home/models/component/ICreateLLMField.ts diff --git a/web/src/app/home/models/component/embedding-card/EmbeddingCard.module.css b/web/src/app/home/models/component/embedding-card/EmbeddingCard.module.css new file mode 100644 index 00000000..9c6c54f7 --- /dev/null +++ b/web/src/app/home/models/component/embedding-card/EmbeddingCard.module.css @@ -0,0 +1,97 @@ +.cardContainer { + width: 100%; + height: 10rem; + background-color: #fff; + border-radius: 10px; + box-shadow: 0px 2px 2px 0 rgba(0, 0, 0, 0.2); + padding: 1.2rem; + cursor: pointer; +} + +.cardContainer:hover { + box-shadow: 0px 2px 8px 0 rgba(0, 0, 0, 0.1); +} + +.iconBasicInfoContainer { + width: 100%; + height: 100%; + display: flex; + flex-direction: row; + gap: 0.8rem; + user-select: none; +} + +.iconImage { + width: 3.8rem; + height: 3.8rem; + margin: 0.2rem; + border-radius: 50%; +} + +.basicInfoContainer { + display: flex; + flex-direction: column; + gap: 0.2rem; + min-width: 0; + width: 100%; +} + +.basicInfoText { + font-size: 1.4rem; + font-weight: bold; +} + +.providerContainer { + display: flex; + flex-direction: row; + align-items: center; + justify-content: flex-start; + gap: 0.2rem; +} + +.providerIcon { + width: 1.2rem; + height: 1.2rem; + margin-top: 0.2rem; + color: #626262; +} + +.providerLabel { + font-size: 1.2rem; + font-weight: 600; + color: #626262; +} + +.baseURLContainer { + display: flex; + flex-direction: row; + align-items: center; + justify-content: flex-start; + gap: 0.2rem; + width: calc(100% - 3rem); +} + +.baseURLIcon { + width: 1.2rem; + height: 1.2rem; + color: #626262; +} + +.baseURLText { + font-size: 1rem; + width: 100%; + color: #626262; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + max-width: 100%; +} + +.bigText { + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + font-size: 1.4rem; + font-weight: bold; + max-width: 100%; +} diff --git a/web/src/app/home/models/component/embedding-card/EmbeddingCard.tsx b/web/src/app/home/models/component/embedding-card/EmbeddingCard.tsx new file mode 100644 index 00000000..e3dfaf80 --- /dev/null +++ b/web/src/app/home/models/component/embedding-card/EmbeddingCard.tsx @@ -0,0 +1,53 @@ +import styles from './EmbeddingCard.module.css'; +import { EmbeddingCardVO } from '@/app/home/models/component/embedding-card/EmbeddingCardVO'; + +export default function EmbeddingCard({ cardVO }: { cardVO: EmbeddingCardVO }) { + return ( +
+
+ icon + +
+ {/* 名称 */} +
+ {cardVO.name} +
+ {/* 厂商 */} +
+ + + + + {cardVO.providerLabel} + +
+ {/* baseURL */} +
+ + + + {cardVO.baseURL} +
+
+
+
+ ); +} diff --git a/web/src/app/home/models/component/embedding-card/EmbeddingCardVO.ts b/web/src/app/home/models/component/embedding-card/EmbeddingCardVO.ts new file mode 100644 index 00000000..f6d960f6 --- /dev/null +++ b/web/src/app/home/models/component/embedding-card/EmbeddingCardVO.ts @@ -0,0 +1,23 @@ +export interface IEmbeddingCardVO { + id: string; + iconURL: string; + name: string; + providerLabel: string; + baseURL: string; +} + +export class EmbeddingCardVO implements IEmbeddingCardVO { + id: string; + iconURL: string; + providerLabel: string; + name: string; + baseURL: string; + + constructor(props: IEmbeddingCardVO) { + this.id = props.id; + this.iconURL = props.iconURL; + this.providerLabel = props.providerLabel; + this.name = props.name; + this.baseURL = props.baseURL; + } +} diff --git a/web/src/app/home/models/component/embedding-form/EmbeddingForm.tsx b/web/src/app/home/models/component/embedding-form/EmbeddingForm.tsx new file mode 100644 index 00000000..4658a22f --- /dev/null +++ b/web/src/app/home/models/component/embedding-form/EmbeddingForm.tsx @@ -0,0 +1,563 @@ +import { ICreateEmbeddingField } from '@/app/home/models/component/ICreateEmbeddingField'; +import { useEffect, useState } from 'react'; +import { IChooseRequesterEntity } from '@/app/home/models/component/ChooseRequesterEntity'; +import { httpClient } from '@/app/infra/http/HttpClient'; +import { EmbeddingModel } from '@/app/infra/entities/api'; +import { UUID } from 'uuidjs'; + +import { zodResolver } from '@hookform/resolvers/zod'; +import { useForm } from 'react-hook-form'; +import { z } from 'zod'; +import { useTranslation } from 'react-i18next'; + +import { + Dialog, + DialogContent, + DialogDescription, + DialogHeader, + DialogTitle, + DialogFooter, +} from '@/components/ui/dialog'; +import { Button } from '@/components/ui/button'; +import { + Form, + FormControl, + FormDescription, + FormField, + FormItem, + FormLabel, + FormMessage, +} from '@/components/ui/form'; +import { Input } from '@/components/ui/input'; +import { + Select, + SelectContent, + SelectGroup, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select'; +import { toast } from 'sonner'; +import { i18nObj } from '@/i18n/I18nProvider'; + +const getExtraArgSchema = (t: (key: string) => string) => + z + .object({ + key: z.string().min(1, { message: t('models.keyNameRequired') }), + type: z.enum(['string', 'number', 'boolean']), + value: z.string(), + }) + .superRefine((data, ctx) => { + if (data.type === 'number' && isNaN(Number(data.value))) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: t('models.mustBeValidNumber'), + path: ['value'], + }); + } + if ( + data.type === 'boolean' && + data.value !== 'true' && + data.value !== 'false' + ) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: t('models.mustBeTrueOrFalse'), + path: ['value'], + }); + } + }); + +const getFormSchema = (t: (key: string) => string) => + z.object({ + name: z.string().min(1, { message: t('models.modelNameRequired') }), + model_provider: z + .string() + .min(1, { message: t('models.modelProviderRequired') }), + url: z.string().min(1, { message: t('models.requestURLRequired') }), + api_key: z.string().min(1, { message: t('models.apiKeyRequired') }), + extra_args: z.array(getExtraArgSchema(t)).optional(), + }); + +export default function EmbeddingForm({ + editMode, + initEmbeddingId, + onFormSubmit, + onFormCancel, + onEmbeddingDeleted, +}: { + editMode: boolean; + initEmbeddingId?: string; + onFormSubmit: () => void; + onFormCancel: () => void; + onEmbeddingDeleted: () => void; +}) { + const { t } = useTranslation(); + const formSchema = getFormSchema(t); + + const form = useForm>({ + resolver: zodResolver(formSchema), + defaultValues: { + name: '', + model_provider: '', + url: '', + api_key: 'sk-xxxxx', + extra_args: [], + }, + }); + + const [extraArgs, setExtraArgs] = useState< + { key: string; type: 'string' | 'number' | 'boolean'; value: string }[] + >([]); + + const [showDeleteConfirmModal, setShowDeleteConfirmModal] = useState(false); + const [requesterNameList, setRequesterNameList] = useState< + IChooseRequesterEntity[] + >([]); + const [requesterDefaultURLList, setRequesterDefaultURLList] = useState< + string[] + >([]); + const [modelTesting, setModelTesting] = useState(false); + + useEffect(() => { + initEmbeddingModelFormComponent().then(() => { + if (editMode && initEmbeddingId) { + getEmbeddingConfig(initEmbeddingId).then((val) => { + form.setValue('name', val.name); + form.setValue('model_provider', val.model_provider); + // setCurrentModelProvider(val.model_provider); + form.setValue('url', val.url); + form.setValue('api_key', val.api_key); + if (val.extra_args) { + const args = val.extra_args.map((arg) => { + const [key, value] = arg.split(':'); + let type: 'string' | 'number' | 'boolean' = 'string'; + if (!isNaN(Number(value))) { + type = 'number'; + } else if (value === 'true' || value === 'false') { + type = 'boolean'; + } + return { + key, + type, + value, + }; + }); + setExtraArgs(args); + form.setValue('extra_args', args); + } + }); + } else { + form.reset(); + } + }); + }, []); + + const addExtraArg = () => { + setExtraArgs([...extraArgs, { key: '', type: 'string', value: '' }]); + }; + + const updateExtraArg = ( + index: number, + field: 'key' | 'type' | 'value', + value: string, + ) => { + const newArgs = [...extraArgs]; + newArgs[index] = { + ...newArgs[index], + [field]: value, + }; + setExtraArgs(newArgs); + form.setValue('extra_args', newArgs); + }; + + const removeExtraArg = (index: number) => { + const newArgs = extraArgs.filter((_, i) => i !== index); + setExtraArgs(newArgs); + form.setValue('extra_args', newArgs); + }; + + async function initEmbeddingModelFormComponent() { + const requesterNameList = + await httpClient.getProviderRequesters('text-embedding'); + setRequesterNameList( + requesterNameList.requesters.map((item) => { + return { + label: i18nObj(item.label), + value: item.name, + }; + }), + ); + setRequesterDefaultURLList( + requesterNameList.requesters.map((item) => { + const config = item.spec.config; + for (let i = 0; i < config.length; i++) { + if (config[i].name == 'base_url') { + return config[i].default?.toString() || ''; + } + } + return ''; + }), + ); + } + + async function getEmbeddingConfig( + id: string, + ): Promise { + const embeddingModel = await httpClient.getProviderEmbeddingModel(id); + + const fakeExtraArgs = []; + const extraArgs = embeddingModel.model.extra_args as Record; + for (const key in extraArgs) { + fakeExtraArgs.push(`${key}:${extraArgs[key]}`); + } + return { + name: embeddingModel.model.name, + model_provider: embeddingModel.model.requester, + url: embeddingModel.model.requester_config?.base_url, + api_key: embeddingModel.model.api_keys[0], + extra_args: fakeExtraArgs, + }; + } + + function handleFormSubmit(value: z.infer) { + const extraArgsObj: Record = {}; + value.extra_args?.forEach( + (arg: { key: string; type: string; value: string }) => { + if (arg.type === 'number') { + extraArgsObj[arg.key] = Number(arg.value); + } else if (arg.type === 'boolean') { + extraArgsObj[arg.key] = arg.value === 'true'; + } else { + extraArgsObj[arg.key] = arg.value; + } + }, + ); + + const embeddingModel: EmbeddingModel = { + uuid: editMode ? initEmbeddingId || '' : UUID.generate(), + name: value.name, + description: '', + requester: value.model_provider, + requester_config: { + base_url: value.url, + timeout: 120, + }, + extra_args: extraArgsObj, + api_keys: [value.api_key], + }; + + if (editMode) { + onSaveEdit(embeddingModel).then(() => { + form.reset(); + }); + } else { + onCreateEmbedding(embeddingModel).then(() => { + form.reset(); + }); + } + } + + async function onCreateEmbedding(embeddingModel: EmbeddingModel) { + try { + await httpClient.createProviderEmbeddingModel(embeddingModel); + onFormSubmit(); + toast.success(t('models.createSuccess')); + } catch (err) { + toast.error(t('models.createError') + (err as Error).message); + } + } + + async function onSaveEdit(embeddingModel: EmbeddingModel) { + try { + await httpClient.updateProviderEmbeddingModel( + initEmbeddingId || '', + embeddingModel, + ); + onFormSubmit(); + toast.success(t('models.saveSuccess')); + } catch (err) { + toast.error(t('models.saveError') + (err as Error).message); + } + } + + function deleteModel() { + if (initEmbeddingId) { + httpClient + .deleteProviderEmbeddingModel(initEmbeddingId) + .then(() => { + onEmbeddingDeleted(); + toast.success(t('models.deleteSuccess')); + }) + .catch((err) => { + toast.error(t('models.deleteError') + err.message); + }); + } + } + + function testEmbeddingModelInForm() { + setModelTesting(true); + httpClient + .testEmbeddingModel('_', { + uuid: '', + name: form.getValues('name'), + description: '', + requester: form.getValues('model_provider'), + requester_config: { + base_url: form.getValues('url'), + timeout: 120, + }, + api_keys: [form.getValues('api_key')], + }) + .then((res) => { + console.log(res); + toast.success(t('models.testSuccess')); + }) + .catch(() => { + toast.error(t('models.testError')); + }) + .finally(() => { + setModelTesting(false); + }); + } + + return ( +
+ + + + {t('common.confirmDelete')} + + + {t('models.deleteConfirmation')} + + + + + + + + +
+ +
+ ( + + + {t('models.modelName')} + * + + + + + + + {t('models.modelProviderDescription')} + + + )} + /> + + ( + + + {t('models.modelProvider')} + * + + + + + + + )} + /> + + ( + + + {t('models.requestURL')} + * + + + + + + + )} + /> + + ( + + + {t('models.apiKey')} + * + + + + + + + )} + /> + + + {t('models.extraParameters')} +
+ {extraArgs.map((arg, index) => ( +
+ + updateExtraArg(index, 'key', e.target.value) + } + /> + + + updateExtraArg(index, 'value', e.target.value) + } + /> + +
+ ))} + +
+ + {t('embedding.extraParametersDescription')} + + +
+
+ + {editMode && ( + + )} + + + + + + + +
+ +
+ ); +} diff --git a/web/src/app/home/models/component/llm-form/LLMForm.tsx b/web/src/app/home/models/component/llm-form/LLMForm.tsx index f483f183..73cc32fe 100644 --- a/web/src/app/home/models/component/llm-form/LLMForm.tsx +++ b/web/src/app/home/models/component/llm-form/LLMForm.tsx @@ -1,6 +1,6 @@ -import { ICreateLLMField } from '@/app/home/models/ICreateLLMField'; +import { ICreateLLMField } from '@/app/home/models/component/ICreateLLMField'; import { useEffect, useState } from 'react'; -import { IChooseRequesterEntity } from '@/app/home/models/component/llm-form/ChooseRequesterEntity'; +import { IChooseRequesterEntity } from '@/app/home/models/component/ChooseRequesterEntity'; import { httpClient } from '@/app/infra/http/HttpClient'; import { LLMModel } from '@/app/infra/entities/api'; import { UUID } from 'uuidjs'; @@ -197,7 +197,7 @@ export default function LLMForm({ }; async function initLLMModelFormComponent() { - const requesterNameList = await httpClient.getProviderRequesters(); + const requesterNameList = await httpClient.getProviderRequesters('llm'); setRequesterNameList( requesterNameList.requesters.map((item) => { return { @@ -596,7 +596,7 @@ export default function LLMForm({
- {t('models.extraParametersDescription')} + {t('llm.extraParametersDescription')} diff --git a/web/src/app/home/models/page.tsx b/web/src/app/home/models/page.tsx index 3ccec486..2f936753 100644 --- a/web/src/app/home/models/page.tsx +++ b/web/src/app/home/models/page.tsx @@ -8,6 +8,7 @@ import LLMForm from '@/app/home/models/component/llm-form/LLMForm'; import CreateCardComponent from '@/app/infra/basic-component/create-card-component/CreateCardComponent'; import { httpClient } from '@/app/infra/http/HttpClient'; import { LLMModel } from '@/app/infra/entities/api'; +import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs'; import { Dialog, DialogContent, @@ -17,6 +18,9 @@ import { import { toast } from 'sonner'; import { useTranslation } from 'react-i18next'; import { i18nObj } from '@/i18n/I18nProvider'; +import { EmbeddingCardVO } from '@/app/home/models/component/embedding-card/EmbeddingCardVO'; +import EmbeddingCard from '@/app/home/models/component/embedding-card/EmbeddingCard'; +import EmbeddingForm from '@/app/home/models/component/embedding-form/EmbeddingForm'; export default function LLMConfigPage() { const { t } = useTranslation(); @@ -24,13 +28,21 @@ export default function LLMConfigPage() { const [modalOpen, setModalOpen] = useState(false); const [isEditForm, setIsEditForm] = useState(false); const [nowSelectedLLM, setNowSelectedLLM] = useState(null); + const [embeddingCardList, setEmbeddingCardList] = useState( + [], + ); + const [embeddingModalOpen, setEmbeddingModalOpen] = useState(false); + const [isEditEmbeddingForm, setIsEditEmbeddingForm] = useState(false); + const [nowSelectedEmbedding, setNowSelectedEmbedding] = + useState(null); useEffect(() => { getLLMModelList(); + getEmbeddingModelList(); }, []); async function getLLMModelList() { - const requesterNameListResp = await httpClient.getProviderRequesters(); + const requesterNameListResp = await httpClient.getProviderRequesters('llm'); const requesterNameList = requesterNameListResp.requesters.map((item) => { return { label: i18nObj(item.label), @@ -74,6 +86,55 @@ export default function LLMConfigPage() { setNowSelectedLLM(null); setModalOpen(true); } + function selectEmbedding(cardVO: EmbeddingCardVO) { + setIsEditEmbeddingForm(true); + setNowSelectedEmbedding(cardVO); + setEmbeddingModalOpen(true); + } + + function handleCreateEmbeddingModelClick() { + setIsEditEmbeddingForm(false); + setNowSelectedEmbedding(null); + setEmbeddingModalOpen(true); + } + async function getEmbeddingModelList() { + const requesterNameListResp = + await httpClient.getProviderRequesters('text-embedding'); + const requesterNameList = requesterNameListResp.requesters.map((item) => { + return { + label: i18nObj(item.label), + value: item.name, + }; + }); + + httpClient + .getProviderEmbeddingModels() + .then((resp) => { + const embeddingModelList: EmbeddingCardVO[] = resp.models.map( + (model: { + uuid: string; + requester: string; + name: string; + requester_config?: { base_url?: string }; + }) => { + return new EmbeddingCardVO({ + id: model.uuid, + iconURL: httpClient.getProviderRequesterIconURL(model.requester), + name: model.name, + providerLabel: + requesterNameList.find((item) => item.value === model.requester) + ?.label || model.requester.substring(0, 10), + baseURL: model.requester_config?.base_url || '', + }); + }, + ); + setEmbeddingCardList(embeddingModelList); + }) + .catch((err) => { + console.error('get Embedding model list error', err); + toast.error(t('embedding.getModelListError') + err.message); + }); + } return (
@@ -101,26 +162,108 @@ export default function LLMConfigPage() { /> -
- - {cardList.map((cardVO) => { - return ( -
{ - selectLLM(cardVO); - }} - > - + + + + + {isEditEmbeddingForm + ? t('embedding.editModel') + : t('embedding.createModel')} + + + { + setEmbeddingModalOpen(false); + getEmbeddingModelList(); + }} + onFormCancel={() => { + setEmbeddingModalOpen(false); + }} + onEmbeddingDeleted={() => { + setEmbeddingModalOpen(false); + getEmbeddingModelList(); + }} + /> + + + + +
+
+ + + {t('llm.llmModels')} + + + {t('embedding.embeddingModels')} + + +
+ +
+

{t('llm.description')}

- ); - })} -
+ + +
+

+ {t('embedding.description')} +

+
+
+
+ + +
+ + {cardList.map((cardVO) => { + return ( +
{ + selectLLM(cardVO); + }} + > + +
+ ); + })} +
+
+ + +
+ + {embeddingCardList.map((cardVO) => { + return ( +
{ + selectEmbedding(cardVO); + }} + > + +
+ ); + })} +
+
+
); } diff --git a/web/src/app/infra/entities/api/index.ts b/web/src/app/infra/entities/api/index.ts index d86a8be0..53ddf1dd 100644 --- a/web/src/app/infra/entities/api/index.ts +++ b/web/src/app/infra/entities/api/index.ts @@ -55,6 +55,29 @@ export interface LLMModel { // updated_at: string; } +export interface ApiRespProviderEmbeddingModels { + models: EmbeddingModel[]; +} + +export interface ApiRespProviderEmbeddingModel { + model: EmbeddingModel; +} + +export interface EmbeddingModel { + name: string; + description: string; + uuid: string; + requester: string; + requester_config: { + base_url: string; + timeout: number; + }; + extra_args?: object; + api_keys: string[]; + // created_at: string; + // updated_at: string; +} + export interface ApiRespPipelines { pipelines: Pipeline[]; } diff --git a/web/src/app/infra/http/HttpClient.ts b/web/src/app/infra/http/HttpClient.ts index 5193703b..1fd335d9 100644 --- a/web/src/app/infra/http/HttpClient.ts +++ b/web/src/app/infra/http/HttpClient.ts @@ -10,6 +10,9 @@ import { ApiRespProviderLLMModels, ApiRespProviderLLMModel, LLMModel, + ApiRespProviderEmbeddingModels, + ApiRespProviderEmbeddingModel, + EmbeddingModel, ApiRespPipelines, Pipeline, ApiRespPlatformAdapters, @@ -226,8 +229,10 @@ class HttpClient { // real api request implementation // ============ Provider API ============ - public getProviderRequesters(): Promise { - return this.get('/api/v1/provider/requesters'); + public getProviderRequesters( + model_type: string, + ): Promise { + return this.get('/api/v1/provider/requesters', { type: model_type }); } public getProviderRequester(name: string): Promise { @@ -275,6 +280,39 @@ class HttpClient { return this.post(`/api/v1/provider/models/llm/${uuid}/test`, model); } + // ============ Provider Model Embedding ============ + public getProviderEmbeddingModels(): Promise { + return this.get('/api/v1/provider/models/embedding'); + } + + public getProviderEmbeddingModel( + uuid: string, + ): Promise { + return this.get(`/api/v1/provider/models/embedding/${uuid}`); + } + + public createProviderEmbeddingModel(model: EmbeddingModel): Promise { + return this.post('/api/v1/provider/models/embedding', model); + } + + public deleteProviderEmbeddingModel(uuid: string): Promise { + return this.delete(`/api/v1/provider/models/embedding/${uuid}`); + } + + public updateProviderEmbeddingModel( + uuid: string, + model: EmbeddingModel, + ): Promise { + return this.put(`/api/v1/provider/models/embedding/${uuid}`, model); + } + + public testEmbeddingModel( + uuid: string, + model: EmbeddingModel, + ): Promise { + return this.post(`/api/v1/provider/models/embedding/${uuid}/test`, model); + } + // ============ Pipeline API ============ public getGeneralPipelineMetadata(): Promise { // as designed, this method will be deprecated, and only for developer to check the prefered config schema diff --git a/web/src/i18n/locales/en-US.ts b/web/src/i18n/locales/en-US.ts index 1975a521..d0df9841 100644 --- a/web/src/i18n/locales/en-US.ts +++ b/web/src/i18n/locales/en-US.ts @@ -86,14 +86,13 @@ const enUS = { string: 'String', number: 'Number', boolean: 'Boolean', - extraParametersDescription: - 'Will be attached to the request body, such as max_tokens, temperature, top_p, etc.', selectModelProvider: 'Select Model Provider', modelProviderDescription: 'Please fill in the model name provided by the supplier', selectModel: 'Select Model', testSuccess: 'Test successful', testError: 'Test failed, please check your model configuration', + llmModels: 'LLM Models', }, bots: { title: 'Bots', @@ -259,6 +258,21 @@ const enUS = { 'Password reset failed, please check your email and recovery key', backToLogin: 'Back to Login', }, + embedding: { + description: 'Manage Embedding models for text vectorization', + createModel: 'Create Embedding Model', + editModel: 'Edit Embedding Model', + getModelListError: 'Failed to get Embedding model list: ', + embeddingModels: 'Embedding', + extraParametersDescription: + 'Will be attached to the request body, such as encoding_format, dimensions, etc.', + }, + llm: { + description: 'Manage LLM models for conversation generation', + llmModels: 'LLM', + extraParametersDescription: + 'Will be attached to the request body, such as max_tokens, temperature, top_p, etc.', + }, }; export default enUS; diff --git a/web/src/i18n/locales/zh-Hans.ts b/web/src/i18n/locales/zh-Hans.ts index 2ded8236..96acc0e6 100644 --- a/web/src/i18n/locales/zh-Hans.ts +++ b/web/src/i18n/locales/zh-Hans.ts @@ -87,13 +87,12 @@ const zhHans = { string: '字符串', number: '数字', boolean: '布尔值', - extraParametersDescription: - '将在请求时附加到请求体中,如 max_tokens, temperature, top_p 等', selectModelProvider: '选择模型供应商', modelProviderDescription: '请填写供应商向您提供的模型名称', selectModel: '请选择模型', testSuccess: '测试成功', testError: '测试失败,请检查模型配置', + llmModels: '对话模型', }, bots: { title: '机器人', @@ -251,6 +250,21 @@ const zhHans = { resetFailed: '密码重置失败,请检查邮箱和恢复密钥是否正确', backToLogin: '返回登录', }, + embedding: { + description: '管理嵌入模型,用于向量化文本', + createModel: '创建嵌入模型', + editModel: '编辑嵌入模型', + getModelListError: '获取嵌入模型列表失败:', + embeddingModels: '嵌入模型', + extraParametersDescription: + '将在请求时附加到请求体中,如 encoding_format, dimensions 等', + }, + llm: { + llmModels: '对话模型', + description: '管理 LLM 模型,用于对话消息生成', + extraParametersDescription: + '将在请求时附加到请求体中,如 max_tokens, temperature, top_p 等', + }, }; export default zhHans; From 157ffdc34c96d3faf7bc6b9d22d362e55834c678 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Tue, 10 Jun 2025 08:34:53 +0800 Subject: [PATCH 014/257] feat: add knowledge page --- web/src/app/home/knowledge/page.tsx | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 web/src/app/home/knowledge/page.tsx diff --git a/web/src/app/home/knowledge/page.tsx b/web/src/app/home/knowledge/page.tsx new file mode 100644 index 00000000..9707a8ee --- /dev/null +++ b/web/src/app/home/knowledge/page.tsx @@ -0,0 +1,5 @@ +'use client'; + +export default function KnowledgePage() { + return
KnowledgePage
; +} From 348f6d9eaa0759638a37e752502f361c5e848f75 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Wed, 11 Jun 2025 20:24:42 +0800 Subject: [PATCH 015/257] feat: add api for uploading files --- pkg/api/http/controller/groups/files.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pkg/api/http/controller/groups/files.py b/pkg/api/http/controller/groups/files.py index 0a8b2210..d08cbd71 100644 --- a/pkg/api/http/controller/groups/files.py +++ b/pkg/api/http/controller/groups/files.py @@ -2,6 +2,10 @@ from __future__ import annotations import quart import mimetypes +import uuid +import asyncio + +import quart.datastructures from .. import group @@ -20,3 +24,22 @@ class FilesRouterGroup(group.RouterGroup): mime_type = 'image/jpeg' return quart.Response(image_bytes, mimetype=mime_type) + + @self.route('/documents', methods=['POST'], auth_type=group.AuthType.USER_TOKEN) + async def _() -> quart.Response: + request = quart.request + # get file bytes from 'file' + file = (await request.files)['file'] + assert isinstance(file, quart.datastructures.FileStorage) + + file_bytes = await asyncio.to_thread(file.stream.read) + extension = file.filename.split('.')[-1] + + file_key = str(uuid.uuid4()) + '.' + extension + # save file to storage + await self.ap.storage_mgr.storage_provider.save(file_key, file_bytes) + return self.success( + data={ + 'file_id': file_key, + } + ) From 4bcc06c9559f08f8e84b3c95a94eb4258da22688 Mon Sep 17 00:00:00 2001 From: WangCham <651122857@qq.com> Date: Wed, 25 Jun 2025 14:32:53 +0800 Subject: [PATCH 016/257] kb --- .../http/controller/groups/knowledge_base.py | 83 +++++ pkg/core/app.py | 7 + pkg/rag/knowledge/RAG_Manager.py | 283 +++++++++++++++++ pkg/rag/knowledge/services/__init__.py | 0 pkg/rag/knowledge/services/base_service.py | 26 ++ pkg/rag/knowledge/services/chroma_manager.py | 65 ++++ pkg/rag/knowledge/services/chunker.py | 63 ++++ pkg/rag/knowledge/services/database.py | 57 ++++ pkg/rag/knowledge/services/embedder.py | 93 ++++++ .../knowledge/services/embedding_models.py | 223 ++++++++++++++ pkg/rag/knowledge/services/parser.py | 288 ++++++++++++++++++ pkg/rag/knowledge/services/retriever.py | 106 +++++++ pkg/rag/knowledge/utils/crawler.py | 0 pyproject.toml | 11 + 14 files changed, 1305 insertions(+) create mode 100644 pkg/api/http/controller/groups/knowledge_base.py create mode 100644 pkg/rag/knowledge/RAG_Manager.py create mode 100644 pkg/rag/knowledge/services/__init__.py create mode 100644 pkg/rag/knowledge/services/base_service.py create mode 100644 pkg/rag/knowledge/services/chroma_manager.py create mode 100644 pkg/rag/knowledge/services/chunker.py create mode 100644 pkg/rag/knowledge/services/database.py create mode 100644 pkg/rag/knowledge/services/embedder.py create mode 100644 pkg/rag/knowledge/services/embedding_models.py create mode 100644 pkg/rag/knowledge/services/parser.py create mode 100644 pkg/rag/knowledge/services/retriever.py create mode 100644 pkg/rag/knowledge/utils/crawler.py diff --git a/pkg/api/http/controller/groups/knowledge_base.py b/pkg/api/http/controller/groups/knowledge_base.py new file mode 100644 index 00000000..c819397a --- /dev/null +++ b/pkg/api/http/controller/groups/knowledge_base.py @@ -0,0 +1,83 @@ +import quart +from __future__ import annotations +from .. import group + +@group.group_class('knowledge_base', '/api/v1/knowledge/bases') +class KnowledgeBaseRouterGroup(group.RouterGroup): + + # 定义成功方法 + def success(self, code=0, data=None, msg: str = 'ok') -> quart.Response: + return quart.jsonify({ + "code": code, + "data": data or {}, + "msg": msg + }) + + + + async def initialize(self) -> None: + rag = self.ap.knowledge_base_service.RAG_Manager() + + @self.route('', methods=['POST', 'GET']) + async def _() -> str: + + if quart.request.method == 'GET': + knowledge_bases = await rag.get_all_knowledge_bases() + bases_list = [ + { + "uuid": kb.id, + "name": kb.name, + "description": kb.description, + } for kb in knowledge_bases + ] + return self.success(code=0, + data={'bases': bases_list}, + msg='ok') + + json_data = await quart.request.json + knowledge_base_uuid = await rag.create_knowledge_base( + json_data.get('name'), + json_data.get('description') + ) + return self.success() + + + @self.route('/', methods=['GET']) + async def _(knowledge_base_uuid: str) -> str: + if quart.request.method == 'GET': + knowledge_base = await rag.get_knowledge_base_by_id(knowledge_base_uuid) + + if knowledge_base is None: + return self.http_status(404, -1, 'knowledge base not found') + + return self.success( + code=0, + data={ + "name": knowledge_base.name, + "description": knowledge_base.description, + "uuid": knowledge_base.id + }, + msg='ok' + ) + + @self.route('//files', methods=['GET']) + async def _(knowledge_base_uuid: str) -> str: + if quart.request.method == 'GET': + files = await rag.get_files_by_knowledge_base(knowledge_base_uuid) + return self.success(code=0,data=[{ + "id": file.id, + "file_name": file.file_name, + "status": file.status + } for file in files],msg='ok') + + # delete specific file in knowledge base + @self.route('//files/', methods=['DELETE']) + async def _(knowledge_base_uuid: str, file_id: str) -> str: + await rag.delete_data_by_file_id(file_id) + return self.success(code=0, msg='ok') + + # delete specific kb + @self.route('/', methods=['DELETE']) + async def _(knowledge_base_uuid: str) -> str: + await rag.delete_kb_by_id(knowledge_base_uuid) + return self.success(code=0, msg='ok') diff --git a/pkg/core/app.py b/pkg/core/app.py index 318cddcb..d8824466 100644 --- a/pkg/core/app.py +++ b/pkg/core/app.py @@ -27,6 +27,10 @@ from ..storage import mgr as storagemgr from ..utils import logcache from . import taskmgr from . import entities as core_entities +from ...pkg.rag.knowledge import RAG_Manager + + + class Application: @@ -99,6 +103,8 @@ class Application: storage_mgr: storagemgr.StorageMgr = None + knowledge_base_service: RAG_Manager = None + # ========= HTTP Services ========= user_service: user_service.UserService = None @@ -111,6 +117,7 @@ class Application: bot_service: bot_service.BotService = None + def __init__(self): pass diff --git a/pkg/rag/knowledge/RAG_Manager.py b/pkg/rag/knowledge/RAG_Manager.py new file mode 100644 index 00000000..e172c132 --- /dev/null +++ b/pkg/rag/knowledge/RAG_Manager.py @@ -0,0 +1,283 @@ +# RAG_Manager class (main class, adjust imports as needed) +import logging +import os +import asyncio +from services.parser import FileParser +from services.chunker import Chunker +from services.embedder import Embedder +from services.retriever import Retriever +from services.database import create_db_and_tables, SessionLocal, KnowledgeBase, File, Chunk # Ensure Chunk is imported if you need to manipulate it directly +from services.embedding_models import EmbeddingModelFactory +from services.chroma_manager import ChromaIndexManager +from ...core import app + +class RAG_Manager: + def __init__(self, logger: logging.Logger = None): + self.logger = logger or logging.getLogger(__name__) + self.embedding_model_type = None + self.embedding_model_name = None + self.chroma_manager = None + self.parser = None + self.chunker = None + self.embedder = None + self.retriever = None + self.ap = app.Application + + async def initialize_system(self): + await asyncio.to_thread(create_db_and_tables) + + async def create_model(self, embedding_model_type: str, + embedding_model_name: str): + self.embedding_model_type = embedding_model_type + self.embedding_model_name = embedding_model_name + + try: + model = EmbeddingModelFactory.create_model( + model_type=self.embedding_model_type, + model_name_key=self.embedding_model_name + ) + self.logger.info(f"Configured embedding model '{self.embedding_model_name}' has dimension: {model.embedding_dimension}") + except Exception as e: + self.logger.critical(f"Failed to get dimension for configured embedding model '{self.embedding_model_name}': {e}") + raise RuntimeError("Failed to initialize RAG_Manager due to embedding model issues.") + + self.chroma_manager = ChromaIndexManager(collection_name=f"rag_collection_{self.embedding_model_name.replace('-', '_')}") + + self.parser = FileParser() + self.chunker = Chunker() + # Pass chroma_manager to Embedder and Retriever + self.embedder = Embedder( + model_type=self.embedding_model_type, + model_name_key=self.embedding_model_name, + chroma_manager=self.chroma_manager # Inject dependency + ) + self.retriever = Retriever( + model_type=self.embedding_model_type, + model_name_key=self.embedding_model_name, + chroma_manager=self.chroma_manager # Inject dependency + ) + + + async def create_knowledge_base(self, kb_name: str, kb_description: str): + """ + Creates a new knowledge base with the given name and description. + If a knowledge base with the same name already exists, it returns that one. + """ + try: + def _get_kb_sync(name): + session = SessionLocal() + try: + return session.query(KnowledgeBase).filter_by(name=name).first() + finally: + session.close() + + kb = await asyncio.to_thread(_get_kb_sync, kb_name) + + if not kb: + def _add_kb_sync(): + session = SessionLocal() + try: + new_kb = KnowledgeBase(name=kb_name, description=kb_description) + session.add(new_kb) + session.commit() + session.refresh(new_kb) + return new_kb + finally: + session.close() + kb = await asyncio.to_thread(_add_kb_sync) + except Exception as e: + self.logger.error(f"Error creating knowledge base '{kb_name}': {str(e)}", exc_info=True) + raise + except Exception as e: + self.logger.error(f"Error creating knowledge base '{kb_name}': {str(e)}", exc_info=True) + raise + + async def get_all_knowledge_bases(self): + """ + Retrieves all knowledge bases from the database. + """ + try: + def _get_all_kbs_sync(): + session = SessionLocal() + try: + return session.query(KnowledgeBase).all() + finally: + session.close() + + kbs = await asyncio.to_thread(_get_all_kbs_sync) + return kbs + except Exception as e: + self.logger.error(f"Error retrieving knowledge bases: {str(e)}", exc_info=True) + return [] + + async def get_knowledge_base_by_id(self, kb_id: int): + """ + Retrieves a knowledge base by its ID. + """ + try: + def _get_kb_sync(kb_id): + session = SessionLocal() + try: + return session.query(KnowledgeBase).filter_by(id=kb_id).first() + finally: + session.close() + + kb = await asyncio.to_thread(_get_kb_sync, kb_id) + return kb + except Exception as e: + self.logger.error(f"Error retrieving knowledge base with ID {kb_id}: {str(e)}", exc_info=True) + return None + + async def get_files_by_knowledge_base(self, kb_id: int): + try: + def _get_files_sync(kb_id): + session = SessionLocal() + try: + return session.query(File).filter_by(kb_id=kb_id).all() + finally: + session.close() + + files = await asyncio.to_thread(_get_files_sync, kb_id) + return files + except Exception as e: + self.logger.error(f"Error retrieving files for knowledge base ID {kb_id}: {str(e)}", exc_info=True) + return [] + + + async def store_data(self, file_path: str, kb_name: str, file_type: str, kb_description: str = "Default knowledge base"): + self.logger.info(f"Starting data storage process for file: {file_path}") + try: + def _get_kb_sync(name): + session = SessionLocal() + try: + return session.query(KnowledgeBase).filter_by(name=name).first() + finally: + session.close() + + kb = await asyncio.to_thread(_get_kb_sync, kb_name) + + if not kb: + self.logger.info(f"Knowledge Base '{kb_name}' not found. Creating a new one.") + def _add_kb_sync(): + session = SessionLocal() + try: + new_kb = KnowledgeBase(name=kb_name, description=kb_description) + session.add(new_kb) + session.commit() + session.refresh(new_kb) + return new_kb + finally: + session.close() + kb = await asyncio.to_thread(_add_kb_sync) + self.logger.info(f"Created Knowledge Base: {kb.name} (ID: {kb.id})") + + def _add_file_sync(kb_id, file_name, path, file_type): + session = SessionLocal() + try: + file = File(kb_id=kb_id, file_name=file_name, path=path, file_type=file_type) + session.add(file) + session.commit() + session.refresh(file) + return file + finally: + session.close() + + file_obj = await asyncio.to_thread(_add_file_sync, kb.id, os.path.basename(file_path), file_path, file_type) + self.logger.info(f"Added file entry: {file_obj.file_name} (ID: {file_obj.id})") + + text = await self.parser.parse(file_path) + if not text: + self.logger.warning(f"File {file_path} parsed to empty content. Skipping chunking and embedding.") + # You might want to delete the file_obj from the DB here if it's empty. + session = SessionLocal() + try: + session.delete(file_obj) + session.commit() + except Exception as del_e: + self.logger.error(f"Failed to delete empty file_obj {file_obj.id}: {del_e}") + finally: + session.close() + return + + chunks_texts = await self.chunker.chunk(text) + self.logger.info(f"Chunked into {len(chunks_texts)} pieces.") + + # embed_and_store now handles both DB chunk saving and Chroma embedding + await self.embedder.embed_and_store(file_id=file_obj.id, chunks=chunks_texts) + + self.logger.info(f"Data storage process completed for file: {file_path}") + + except Exception as e: + self.logger.error(f"Error in store_data for file {file_path}: {str(e)}", exc_info=True) + # Consider cleaning up partially stored data if an error occurs. + return + + async def retrieve_data(self, query: str): + self.logger.info(f"Starting data retrieval process for query: '{query}'") + try: + retrieved_chunks = await self.retriever.retrieve(query) + self.logger.info(f"Successfully retrieved {len(retrieved_chunks)} chunks for query.") + return retrieved_chunks + except Exception as e: + self.logger.error(f"Error in retrieve_data for query '{query}': {str(e)}", exc_info=True) + return [] + + async def delete_data_by_file_id(self, file_id: int): + """ + Deletes data associated with a specific file_id from both the relational DB and Chroma. + """ + self.logger.info(f"Starting data deletion process for file_id: {file_id}") + session = SessionLocal() + try: + # 1. Delete from Chroma + await asyncio.to_thread(self.chroma_manager.delete_by_file_id_sync, file_id) + + # 2. Delete chunks from relational DB + chunks_to_delete = session.query(Chunk).filter_by(file_id=file_id).all() + for chunk in chunks_to_delete: + session.delete(chunk) + self.logger.info(f"Deleted {len(chunks_to_delete)} chunks from relational DB for file_id: {file_id}.") + + # 3. Delete file entry from relational DB + file_to_delete = session.query(File).filter_by(id=file_id).first() + if file_to_delete: + session.delete(file_to_delete) + self.logger.info(f"Deleted file entry {file_id} from relational DB.") + else: + self.logger.warning(f"File entry {file_id} not found in relational DB.") + + session.commit() + self.logger.info(f"Data deletion completed for file_id: {file_id}.") + except Exception as e: + session.rollback() + self.logger.error(f"Error deleting data for file_id {file_id}: {str(e)}", exc_info=True) + finally: + session.close() + + async def delete_kb_by_id(self, kb_id: int): + """ + Deletes a knowledge base and all associated files and chunks. + """ + self.logger.info(f"Starting deletion of knowledge base with ID: {kb_id}") + session = SessionLocal() + try: + # 1. Get the knowledge base + kb = session.query(KnowledgeBase).filter_by(id=kb_id).first() + if not kb: + self.logger.warning(f"Knowledge Base with ID {kb_id} not found.") + return + + # 2. Delete all files associated with this knowledge base + files_to_delete = session.query(File).filter_by(kb_id=kb.id).all() + for file in files_to_delete: + await self.delete_data_by_file_id(file.id) + + # 3. Delete the knowledge base itself + session.delete(kb) + session.commit() + self.logger.info(f"Successfully deleted knowledge base with ID: {kb_id}") + except Exception as e: + session.rollback() + self.logger.error(f"Error deleting knowledge base with ID {kb_id}: {str(e)}", exc_info=True) + finally: + session.close() diff --git a/pkg/rag/knowledge/services/__init__.py b/pkg/rag/knowledge/services/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pkg/rag/knowledge/services/base_service.py b/pkg/rag/knowledge/services/base_service.py new file mode 100644 index 00000000..0298226a --- /dev/null +++ b/pkg/rag/knowledge/services/base_service.py @@ -0,0 +1,26 @@ +# 封装异步操作 +import asyncio +import logging +from services.database import SessionLocal # 导入 SessionLocal 工厂函数 + +class BaseService: + def __init__(self): + self.logger = logging.getLogger(self.__class__.__name__) + self.db_session_factory = SessionLocal # 使用 SessionLocal 工厂函数 + + async def _run_sync(self, func, *args, **kwargs): + """ + 在单独的线程中运行同步函数。 + 如果第一个参数是 session,则在 to_thread 中获取新的 session。 + """ + # 如果函数需要数据库会话作为第一个参数,我们在这里获取它 + if getattr(func, '__name__', '').startswith('_db_'): # 约定:数据库操作的同步方法以 _db_ 开头 + session = await asyncio.to_thread(self.db_session_factory) + try: + result = await asyncio.to_thread(func, session, *args, **kwargs) + return result + finally: + session.close() + else: + # 否则,直接运行同步函数 + return await asyncio.to_thread(func, *args, **kwargs) \ No newline at end of file diff --git a/pkg/rag/knowledge/services/chroma_manager.py b/pkg/rag/knowledge/services/chroma_manager.py new file mode 100644 index 00000000..6a469168 --- /dev/null +++ b/pkg/rag/knowledge/services/chroma_manager.py @@ -0,0 +1,65 @@ +# services/chroma_manager.py +import numpy as np +import logging +from chromadb import PersistentClient +import os + +logger = logging.getLogger(__name__) + +class ChromaIndexManager: + def __init__(self, collection_name: str = "default_collection"): + self.logger = logging.getLogger(self.__class__.__name__) + chroma_data_path = "./chroma_data" + os.makedirs(chroma_data_path, exist_ok=True) + self.client = PersistentClient(path=chroma_data_path) + self._collection_name = collection_name + self._collection = None + + self.logger.info(f"ChromaIndexManager initialized. Collection name: {self._collection_name}") + + @property + def collection(self): + if self._collection is None: + self._collection = self.client.get_or_create_collection(name=self._collection_name) + self.logger.info(f"Chroma collection '{self._collection_name}' accessed/created.") + return self._collection + + def add_embeddings_sync(self, file_ids: list[int], chunk_ids: list[int], embeddings: np.ndarray, documents: list[str]): + if embeddings.shape[0] != len(chunk_ids) or embeddings.shape[0] != len(file_ids) or embeddings.shape[0] != len(documents): + raise ValueError("Embedding, file_id, chunk_id, and document count mismatch.") + + chroma_ids = [f"{file_id}_{chunk_id}" for file_id, chunk_id in zip(file_ids, chunk_ids)] + metadatas = [{"file_id": fid, "chunk_id": cid} for fid, cid in zip(file_ids, chunk_ids)] + + self.logger.debug(f"Adding {len(embeddings)} embeddings to Chroma collection '{self._collection_name}'.") + self.collection.add( + embeddings=embeddings.tolist(), + ids=chroma_ids, + metadatas=metadatas, + documents=documents + ) + self.logger.info(f"Added {len(embeddings)} embeddings to Chroma collection '{self._collection_name}'.") + + def search_sync(self, query_embedding: np.ndarray, k: int = 5): + """ + Searches the Chroma collection for the top-k nearest neighbors. + Args: + query_embedding: A numpy array of the query embedding. + k: The number of results to return. + Returns: + A dictionary containing query results from Chroma. + """ + self.logger.debug(f"Searching Chroma collection '{self._collection_name}' with k={k}.") + results = self.collection.query( + query_embeddings=query_embedding.tolist(), + n_results=k, + # REMOVE 'ids' from the include list. It's returned by default. + include=["metadatas", "distances", "documents"] + ) + self.logger.debug(f"Chroma search returned {len(results.get('ids', [[]])[0])} results.") + return results + + def delete_by_file_id_sync(self, file_id: int): + self.logger.info(f"Deleting embeddings for file_id: {file_id} from Chroma collection '{self._collection_name}'.") + self.collection.delete(where={"file_id": file_id}) + self.logger.info(f"Deleted embeddings for file_id: {file_id} from Chroma.") \ No newline at end of file diff --git a/pkg/rag/knowledge/services/chunker.py b/pkg/rag/knowledge/services/chunker.py new file mode 100644 index 00000000..f115dac4 --- /dev/null +++ b/pkg/rag/knowledge/services/chunker.py @@ -0,0 +1,63 @@ +# services/chunker.py +import logging +from typing import List +from services.base_service import BaseService # Assuming BaseService provides _run_sync + +logger = logging.getLogger(__name__) + +class Chunker(BaseService): + """ + A class for splitting long texts into smaller, overlapping chunks. + """ + def __init__(self, chunk_size: int = 500, chunk_overlap: int = 50): + super().__init__() # Initialize BaseService + self.logger = logging.getLogger(self.__class__.__name__) + self.chunk_size = chunk_size + self.chunk_overlap = chunk_overlap + if self.chunk_overlap >= self.chunk_size: + self.logger.warning("Chunk overlap is greater than or equal to chunk size. This may lead to empty or malformed chunks.") + + def _split_text_sync(self, text: str) -> List[str]: + """ + Synchronously splits a long text into chunks with specified overlap. + This is a CPU-bound operation, intended to be run in a separate thread. + """ + if not text: + return [] + + # Simple whitespace-based splitting for demonstration + # For more advanced chunking, consider libraries like LangChain's text splitters + words = text.split() + chunks = [] + current_chunk = [] + + for word in words: + current_chunk.append(word) + if len(current_chunk) > self.chunk_size: + chunks.append(" ".join(current_chunk[:self.chunk_size])) + current_chunk = current_chunk[self.chunk_size - self.chunk_overlap:] + + if current_chunk: + chunks.append(" ".join(current_chunk)) + + # A more robust chunking strategy (e.g., using recursive character text splitter) + # from langchain.text_splitter import RecursiveCharacterTextSplitter + # text_splitter = RecursiveCharacterTextSplitter( + # chunk_size=self.chunk_size, + # chunk_overlap=self.chunk_overlap, + # length_function=len, + # is_separator_regex=False, + # ) + # return text_splitter.split_text(text) + + return [chunk for chunk in chunks if chunk.strip()] # Filter out empty chunks + + async def chunk(self, text: str) -> List[str]: + """ + Asynchronously chunks a given text into smaller pieces. + """ + self.logger.info(f"Chunking text (length: {len(text)})...") + # Run the synchronous splitting logic in a separate thread + chunks = await self._run_sync(self._split_text_sync, text) + self.logger.info(f"Text chunked into {len(chunks)} pieces.") + return chunks \ No newline at end of file diff --git a/pkg/rag/knowledge/services/database.py b/pkg/rag/knowledge/services/database.py new file mode 100644 index 00000000..4ec21af3 --- /dev/null +++ b/pkg/rag/knowledge/services/database.py @@ -0,0 +1,57 @@ +from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, ForeignKey, LargeBinary +from sqlalchemy.orm import declarative_base, sessionmaker, relationship +from datetime import datetime +import numpy as np # 用于处理从LargeBinary转换回来的embedding + +Base = declarative_base() + +class KnowledgeBase(Base): + __tablename__ = 'kb' + id = Column(Integer, primary_key=True, index=True) + name = Column(String, index=True) + description = Column(Text) + created_at = Column(DateTime, default=datetime.utcnow) + + files = relationship("File", back_populates="knowledge_base") + +class File(Base): + __tablename__ = 'file' + id = Column(Integer, primary_key=True, index=True) + kb_id = Column(Integer, ForeignKey('kb.id')) + file_name = Column(String) + path = Column(String) + created_at = Column(DateTime, default=datetime.utcnow) + file_type = Column(String) + status = Column(Integer, default=0) # 0: 未处理, 1: 处理中, 2: 已处理, 3: 错误 + knowledge_base = relationship("KnowledgeBase", back_populates="files") + chunks = relationship("Chunk", back_populates="file") + +class Chunk(Base): + __tablename__ = 'chunks' + id = Column(Integer, primary_key=True, index=True) + file_id = Column(Integer, ForeignKey('file.id')) + text = Column(Text) + + file = relationship("File", back_populates="chunks") + vector = relationship("Vector", uselist=False, back_populates="chunk") # One-to-one + +class Vector(Base): + __tablename__ = 'vectors' + id = Column(Integer, primary_key=True, index=True) + chunk_id = Column(Integer, ForeignKey('chunks.id'), unique=True) + embedding = Column(LargeBinary) # Store embeddings as binary + + chunk = relationship("Chunk", back_populates="vector") + +# 数据库连接 +DATABASE_URL = "sqlite:///./knowledge_base.db" # 生产环境请更换为 PostgreSQL/MySQL +engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False} if "sqlite" in DATABASE_URL else {}) +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + +# 创建所有表 (可以在应用启动时执行一次) +def create_db_and_tables(): + Base.metadata.create_all(bind=engine) + print("Database tables created/checked.") + +# 定义嵌入维度(请根据你实际使用的模型调整) +EMBEDDING_DIM = 1024 \ No newline at end of file diff --git a/pkg/rag/knowledge/services/embedder.py b/pkg/rag/knowledge/services/embedder.py new file mode 100644 index 00000000..2b581e96 --- /dev/null +++ b/pkg/rag/knowledge/services/embedder.py @@ -0,0 +1,93 @@ +# services/embedder.py +import asyncio +import logging +import numpy as np +from typing import List +from sqlalchemy.orm import Session +from services.base_service import BaseService +from services.database import Chunk, SessionLocal +from services.embedding_models import BaseEmbeddingModel, EmbeddingModelFactory +from services.chroma_manager import ChromaIndexManager # Import the manager + +logger = logging.getLogger(__name__) + +class Embedder(BaseService): + def __init__(self, model_type: str, model_name_key: str, chroma_manager: ChromaIndexManager): + super().__init__() + self.logger = logging.getLogger(self.__class__.__name__) + self.model_type = model_type + self.model_name_key = model_name_key + self.chroma_manager = chroma_manager # Dependency Injection + + self.embedding_model: BaseEmbeddingModel = self._load_embedding_model() + + def _load_embedding_model(self) -> BaseEmbeddingModel: + self.logger.info(f"Loading embedding model: type={self.model_type}, name_key={self.model_name_key}...") + try: + model = EmbeddingModelFactory.create_model(self.model_type, self.model_name_key) + self.logger.info(f"Embedding model '{self.model_name_key}' loaded. Output dimension: {model.embedding_dimension}") + return model + except Exception as e: + self.logger.error(f"Failed to load embedding model '{self.model_name_key}': {e}") + raise + + def _db_save_chunks_sync(self, session: Session, file_id: int, chunks_texts: List[str]): + """ + Saves chunks to the relational database and returns the created Chunk objects. + This function assumes it's called within a context where the session + will be committed/rolled back and closed by the caller. + """ + self.logger.debug(f"Saving {len(chunks_texts)} chunks for file_id {file_id} to DB (sync).") + chunk_objects = [] + for text in chunks_texts: + chunk = Chunk(file_id=file_id, text=text) + session.add(chunk) + chunk_objects.append(chunk) + session.flush() # This populates the .id attribute for each new chunk object + self.logger.debug(f"Successfully added {len(chunk_objects)} chunk entries to DB.") + return chunk_objects + + async def embed_and_store(self, file_id: int, chunks: List[str]): + if not self.embedding_model: + raise RuntimeError("Embedding model not loaded. Please check Embedder initialization.") + + self.logger.info(f"Embedding {len(chunks)} chunks for file_id: {file_id} using {self.model_name_key}...") + + session = SessionLocal() # Start a session that will live for the whole operation + chunk_objects = [] + try: + # 1. Save chunks to the relational database first to get their IDs + # We call _db_save_chunks_sync directly without _run_sync's session management + # because we manage the session here across multiple async calls. + chunk_objects = await asyncio.to_thread(self._db_save_chunks_sync, session, file_id, chunks) + session.commit() # Commit chunks to make their IDs permanent and accessible + + if not chunk_objects: + self.logger.warning(f"No chunk objects created for file_id {file_id}. Skipping embedding and Chroma storage.") + return [] + + # 2. Generate embeddings + embeddings: List[List[float]] = await self.embedding_model.embed_documents(chunks) + embeddings_np = np.array(embeddings, dtype=np.float32) + + if embeddings_np.shape[1] != self.embedding_model.embedding_dimension: + self.logger.error(f"Mismatch in embedding dimension: Model returned {embeddings_np.shape[1]}, expected {self.embedding_model.embedding_dimension}. Aborting storage.") + raise ValueError("Embedding dimension mismatch during embedding process.") + + self.logger.info("Saving embeddings to Chroma...") + chunk_ids = [c.id for c in chunk_objects] # Now safe to access .id because session is still open and committed + file_ids_for_chroma = [file_id] * len(chunk_ids) + + await self._run_sync( # Use _run_sync for the Chroma operation, as it's a sync call + self.chroma_manager.add_embeddings_sync, + file_ids_for_chroma, chunk_ids, embeddings_np, chunks # Pass original chunks texts for documents + ) + self.logger.info(f"Successfully saved {len(chunk_objects)} embeddings to Chroma.") + return chunk_objects + + except Exception as e: + session.rollback() # Rollback on any error + self.logger.error(f"Failed to process and store data for file_id {file_id}: {e}", exc_info=True) + raise # Re-raise the exception to propagate it + finally: + session.close() # Ensure the session is always closed \ No newline at end of file diff --git a/pkg/rag/knowledge/services/embedding_models.py b/pkg/rag/knowledge/services/embedding_models.py new file mode 100644 index 00000000..a6ce73ae --- /dev/null +++ b/pkg/rag/knowledge/services/embedding_models.py @@ -0,0 +1,223 @@ +# services/embedding_models.py + +import os +from typing import Dict, Any, List, Type, Optional +import logging +import aiohttp # Import aiohttp for asynchronous requests +import asyncio +from sentence_transformers import SentenceTransformer + +logger = logging.getLogger(__name__) + +# Base class for all embedding models +class BaseEmbeddingModel: + def __init__(self, model_name: str): + self.model_name = model_name + self._embedding_dimension = None + + async def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Asynchronously embeds a list of texts.""" + raise NotImplementedError + + async def embed_query(self, text: str) -> List[float]: + """Asynchronously embeds a single query text.""" + raise NotImplementedError + + @property + def embedding_dimension(self) -> int: + """Returns the embedding dimension of the model.""" + if self._embedding_dimension is None: + raise NotImplementedError("Embedding dimension not set for this model.") + return self._embedding_dimension + +class EmbeddingModelFactory: + @staticmethod + def create_model(model_type: str, model_name_key: str) -> BaseEmbeddingModel: + """ + Factory method to create an embedding model instance. + Currently only supports 'third_party_api' types. + """ + if model_name_key not in EMBEDDING_MODEL_CONFIGS: + raise ValueError(f"Embedding model configuration '{model_name_key}' not found in EMBEDDING_MODEL_CONFIGS.") + + config = EMBEDDING_MODEL_CONFIGS[model_name_key] + + if config['type'] == "third_party_api": + required_keys = ['api_endpoint', 'headers', 'payload_template', 'embedding_dimension'] + if not all(key in config for key in required_keys): + raise ValueError(f"Missing configuration keys for third_party_api model '{model_name_key}'. Required: {required_keys}") + + # Retrieve model_name from config if it differs from model_name_key + # Some APIs expect a specific 'model' value in the payload that might be different from the key + api_model_name = config.get('model_name', model_name_key) + + return ThirdPartyAPIEmbeddingModel( + model_name=api_model_name, # Use the model_name from config or the key + api_endpoint=config['api_endpoint'], + headers=config['headers'], + payload_template=config['payload_template'], + embedding_dimension=config['embedding_dimension'] + ) + +class SentenceTransformerEmbeddingModel(BaseEmbeddingModel): + def __init__(self, model_name: str): + super().__init__(model_name) + try: + # SentenceTransformer is inherently synchronous, but we'll wrap its calls + # in async methods. The actual computation will still block the event loop + # if not run in a separate thread/process, but this keeps the API consistent. + self.model = SentenceTransformer(model_name) + self._embedding_dimension = self.model.get_sentence_embedding_dimension() + logger.info(f"Initialized SentenceTransformer model '{model_name}' with dimension {self._embedding_dimension}") + except Exception as e: + logger.error(f"Failed to load SentenceTransformer model {model_name}: {e}") + raise + + async def embed_documents(self, texts: List[str]) -> List[List[float]]: + # For CPU-bound tasks like local model inference, consider running in a thread pool + # to prevent blocking the event loop for long operations. + # For simplicity here, we'll call it directly. + return self.model.encode(texts).tolist() + + async def embed_query(self, text: str) -> List[float]: + return self.model.encode(text).tolist() + + +class ThirdPartyAPIEmbeddingModel(BaseEmbeddingModel): + def __init__(self, model_name: str, api_endpoint: str, headers: Dict[str, str], payload_template: Dict[str, Any], embedding_dimension: int): + super().__init__(model_name) + self.api_endpoint = api_endpoint + self.headers = headers + self.payload_template = payload_template + self._embedding_dimension = embedding_dimension + self.session = None # aiohttp client session will be initialized on first use or in a context manager + logger.info(f"Initialized ThirdPartyAPIEmbeddingModel '{model_name}' for async calls to {api_endpoint} with dimension {embedding_dimension}") + + async def _get_session(self): + """Lazily create or return the aiohttp client session.""" + if self.session is None or self.session.closed: + self.session = aiohttp.ClientSession() + return self.session + + async def close_session(self): + """Explicitly close the aiohttp client session.""" + if self.session and not self.session.closed: + await self.session.close() + self.session = None + logger.info(f"Closed aiohttp session for model {self.model_name}") + + async def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Asynchronously embeds a list of texts using the third-party API.""" + session = await self._get_session() + embeddings = [] + tasks = [] + for text in texts: + payload = self.payload_template.copy() + if 'input' in payload: + payload['input'] = text + elif 'texts' in payload: + payload['texts'] = [text] + else: + raise ValueError("Payload template does not contain expected text input key.") + + tasks.append(self._make_api_request(session, payload)) + + results = await asyncio.gather(*tasks, return_exceptions=True) + + for i, res in enumerate(results): + if isinstance(res, Exception): + logger.error(f"Error embedding text '{texts[i][:50]}...': {res}") + # Depending on your error handling strategy, you might: + # - Append None or an empty list + # - Re-raise the exception to stop processing + # - Log and skip, then continue + embeddings.append([0.0] * self.embedding_dimension) # Append dummy embedding or handle failure + else: + embeddings.append(res) + + return embeddings + + async def _make_api_request(self, session: aiohttp.ClientSession, payload: Dict[str, Any]) -> List[float]: + """Helper to make an asynchronous API request and extract embedding.""" + try: + async with session.post(self.api_endpoint, headers=self.headers, json=payload) as response: + response.raise_for_status() # Raise an exception for HTTP errors (4xx, 5xx) + api_response = await response.json() + + # Adjust this based on your API's actual response structure + if "data" in api_response and len(api_response["data"]) > 0 and "embedding" in api_response["data"][0]: + embedding = api_response["data"][0]["embedding"] + if len(embedding) != self.embedding_dimension: + logger.warning(f"API returned embedding of dimension {len(embedding)}, but expected {self.embedding_dimension} for model {self.model_name}. Adjusting config might be needed.") + return embedding + elif "embeddings" in api_response and isinstance(api_response["embeddings"], list) and api_response["embeddings"]: + embedding = api_response["embeddings"][0] + if len(embedding) != self.embedding_dimension: + logger.warning(f"API returned embedding of dimension {len(embedding)}, but expected {self.embedding_dimension} for model {self.model_name}. Adjusting config might be needed.") + return embedding + else: + raise ValueError(f"Unexpected API response structure: {api_response}") + + except aiohttp.ClientError as e: + raise ConnectionError(f"API request failed: {e}") from e + except ValueError as e: + raise ValueError(f"Error processing API response: {e}") from e + + + async def embed_query(self, text: str) -> List[float]: + """Asynchronously embeds a single query text.""" + results = await self.embed_documents([text]) + if results: + return results[0] + return [] # Or raise an error if embedding a query must always succeed + +# --- Embedding Model Configuration --- +EMBEDDING_MODEL_CONFIGS: Dict[str, Dict[str, Any]] = { + "MiniLM": { # Example for a local Sentence Transformer model + "type": "sentence_transformer", + "model_name": "sentence-transformers/all-MiniLM-L6-v2" + }, + "bge-m3": { # Example for a third-party API model + "type": "third_party_api", + "model_name": "bge-m3", + "api_endpoint": "https://api.qhaigc.net/v1/embeddings", + "headers": { + "Content-Type": "application/json", + "Authorization": f"Bearer {os.getenv('rag_api_key')}" + }, + "payload_template": { + "model": "bge-m3", + "input": "" + }, + "embedding_dimension": 1024 + }, + "OpenAI-Ada-002": { + "type": "third_party_api", + "model_name": "text-embedding-ada-002", + "api_endpoint": "https://api.openai.com/v1/embeddings", + "headers": { + "Content-Type": "application/json", + "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}" # Ensure OPENAI_API_KEY is set + }, + "payload_template": { + "model": "text-embedding-ada-002", + "input": "" # Text will be injected here + }, + "embedding_dimension": 1536 + }, + "OpenAI-Embedding-3-Small": { + "type": "third_party_api", + "model_name": "text-embedding-3-small", + "api_endpoint": "https://api.openai.com/v1/embeddings", + "headers": { + "Content-Type": "application/json", + "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}" + }, + "payload_template": { + "model": "text-embedding-3-small", + "input": "", + # "dimensions": 512 # Optional: uncomment if you want a specific output dimension + }, + "embedding_dimension": 1536 # Default max dimension for text-embedding-3-small + }, +} \ No newline at end of file diff --git a/pkg/rag/knowledge/services/parser.py b/pkg/rag/knowledge/services/parser.py new file mode 100644 index 00000000..5fa7d589 --- /dev/null +++ b/pkg/rag/knowledge/services/parser.py @@ -0,0 +1,288 @@ + +import PyPDF2 +from docx import Document +import pandas as pd +import csv +import chardet +from typing import Union, List, Callable, Any +import logging +import markdown +from bs4 import BeautifulSoup +import ebooklib +from ebooklib import epub +import re +import asyncio # Import asyncio for async operations +import os + +# Configure logging +logger = logging.getLogger(__name__) + +class FileParser: + """ + A robust file parser class to extract text content from various document formats. + It supports TXT, PDF, DOCX, XLSX, CSV, Markdown, HTML, and EPUB files. + All core file reading operations are designed to be run synchronously in a thread pool + to avoid blocking the asyncio event loop. + """ + def __init__(self): + + self.logger = logging.getLogger(self.__class__.__name__) + + async def _run_sync(self, sync_func: Callable, *args: Any, **kwargs: Any) -> Any: + """ + Runs a synchronous function in a separate thread to prevent blocking the event loop. + This is a general utility method for wrapping blocking I/O operations. + """ + try: + return await asyncio.to_thread(sync_func, *args, **kwargs) + except Exception as e: + self.logger.error(f"Error running synchronous function {sync_func.__name__}: {e}") + raise + + async def parse(self, file_path: str) -> Union[str, None]: + """ + Parses the file based on its extension and returns the extracted text content. + This is the main asynchronous entry point for parsing. + + Args: + file_path (str): The path to the file to be parsed. + + Returns: + Union[str, None]: The extracted text content as a single string, or None if parsing fails. + """ + if not file_path or not os.path.exists(file_path): + self.logger.error(f"Invalid file path provided: {file_path}") + return None + + file_extension = file_path.split('.')[-1].lower() + parser_method = getattr(self, f'_parse_{file_extension}', None) + + if parser_method is None: + self.logger.error(f"Unsupported file format: {file_extension} for file {file_path}") + return None + + try: + # Pass file_path to the specific parser methods + return await parser_method(file_path) + except Exception as e: + self.logger.error(f"Failed to parse {file_extension} file {file_path}: {e}") + return None + + # --- Helper for reading files with encoding detection --- + async def _read_file_content(self, file_path: str, mode: str = 'r') -> Union[str, bytes]: + """ + Reads a file with automatic encoding detection, ensuring the synchronous + file read operation runs in a separate thread. + """ + def _read_sync(): + with open(file_path, 'rb') as file: + raw_data = file.read() + detected = chardet.detect(raw_data) + encoding = detected['encoding'] or 'utf-8' + + if mode == 'r': + return raw_data.decode(encoding, errors='ignore') + return raw_data # For binary mode + + return await self._run_sync(_read_sync) + + # --- Specific Parser Methods --- + + async def _parse_txt(self, file_path: str) -> str: + """Parses a TXT file and returns its content.""" + self.logger.info(f"Parsing TXT file: {file_path}") + return await self._read_file_content(file_path, mode='r') + + async def _parse_pdf(self, file_path: str) -> str: + """Parses a PDF file and returns its text content.""" + self.logger.info(f"Parsing PDF file: {file_path}") + def _parse_pdf_sync(): + text_content = [] + with open(file_path, 'rb') as file: + pdf_reader = PyPDF2.PdfReader(file) + for page in pdf_reader.pages: + text = page.extract_text() + if text: + text_content.append(text) + return '\n'.join(text_content) + return await self._run_sync(_parse_pdf_sync) + + async def _parse_docx(self, file_path: str) -> str: + """Parses a DOCX file and returns its text content.""" + self.logger.info(f"Parsing DOCX file: {file_path}") + def _parse_docx_sync(): + doc = Document(file_path) + text_content = [paragraph.text for paragraph in doc.paragraphs if paragraph.text.strip()] + return '\n'.join(text_content) + return await self._run_sync(_parse_docx_sync) + + async def _parse_doc(self, file_path: str) -> str: + """Handles .doc files, explicitly stating lack of direct support.""" + self.logger.warning(f"Direct .doc parsing is not supported for {file_path}. Please convert to .docx first.") + raise NotImplementedError("Direct .doc parsing not supported. Please convert to .docx first.") + + async def _parse_xlsx(self, file_path: str) -> str: + """Parses an XLSX file, returning text from all sheets.""" + self.logger.info(f"Parsing XLSX file: {file_path}") + def _parse_xlsx_sync(): + excel_file = pd.ExcelFile(file_path) + all_sheet_content = [] + for sheet_name in excel_file.sheet_names: + df = pd.read_excel(file_path, sheet_name=sheet_name) + sheet_text = f"--- Sheet: {sheet_name} ---\n{df.to_string(index=False)}\n" + all_sheet_content.append(sheet_text) + return '\n'.join(all_sheet_content) + return await self._run_sync(_parse_xlsx_sync) + + async def _parse_csv(self, file_path: str) -> str: + """Parses a CSV file and returns its content as a string.""" + self.logger.info(f"Parsing CSV file: {file_path}") + def _parse_csv_sync(): + # pd.read_csv can often detect encoding, but explicit detection is safer + raw_data = self._read_file_content(file_path, mode='rb') # Note: this will need to be await outside this sync function + # For simplicity, we'll let pandas handle encoding internally after a raw read. + # A more robust solution might pass encoding directly to pd.read_csv after detection. + detected = chardet.detect(open(file_path, 'rb').read()) + encoding = detected['encoding'] or 'utf-8' + df = pd.read_csv(file_path, encoding=encoding) + return df.to_string(index=False) + return await self._run_sync(_parse_csv_sync) + + async def _parse_markdown(self, file_path: str) -> str: + """Parses a Markdown file, converting it to structured plain text.""" + self.logger.info(f"Parsing Markdown file: {file_path}") + def _parse_markdown_sync(): + md_content = self._read_file_content(file_path, mode='r') # This is a synchronous call within a sync function + html_content = markdown.markdown( + md_content, + extensions=['extra', 'codehilite', 'tables', 'toc', 'fenced_code'] + ) + soup = BeautifulSoup(html_content, 'html.parser') + text_parts = [] + for element in soup.children: + if element.name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']: + level = int(element.name[1]) + text_parts.append('#' * level + ' ' + element.get_text().strip()) + elif element.name == 'p': + text = element.get_text().strip() + if text: + text_parts.append(text) + elif element.name in ['ul', 'ol']: + for li in element.find_all('li'): + text_parts.append(f"* {li.get_text().strip()}") + elif element.name == 'pre': + code_block = element.get_text().strip() + if code_block: + text_parts.append(f"```\n{code_block}\n```") + elif element.name == 'table': + table_str = self._extract_table_to_markdown_sync(element) # Call sync helper + if table_str: + text_parts.append(table_str) + elif element.name: + text = element.get_text(separator=' ', strip=True) + if text: + text_parts.append(text) + cleaned_text = re.sub(r'\n\s*\n', '\n\n', '\n'.join(text_parts)) + return cleaned_text.strip() + return await self._run_sync(_parse_markdown_sync) + + async def _parse_html(self, file_path: str) -> str: + """Parses an HTML file, extracting structured plain text.""" + self.logger.info(f"Parsing HTML file: {file_path}") + def _parse_html_sync(): + html_content = self._read_file_content(file_path, mode='r') # Sync call within sync function + soup = BeautifulSoup(html_content, 'html.parser') + for script_or_style in soup(["script", "style"]): + script_or_style.decompose() + text_parts = [] + for element in soup.body.children if soup.body else soup.children: + if element.name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']: + level = int(element.name[1]) + text_parts.append('#' * level + ' ' + element.get_text().strip()) + elif element.name == 'p': + text = element.get_text().strip() + if text: + text_parts.append(text) + elif element.name in ['ul', 'ol']: + for li in element.find_all('li'): + text = li.get_text().strip() + if text: + text_parts.append(f"* {text}") + elif element.name == 'table': + table_str = self._extract_table_to_markdown_sync(element) # Call sync helper + if table_str: + text_parts.append(table_str) + elif element.name: + text = element.get_text(separator=' ', strip=True) + if text: + text_parts.append(text) + cleaned_text = re.sub(r'\n\s*\n', '\n\n', '\n'.join(text_parts)) + return cleaned_text.strip() + return await self._run_sync(_parse_html_sync) + + async def _parse_epub(self, file_path: str) -> str: + """Parses an EPUB file, extracting metadata and content.""" + self.logger.info(f"Parsing EPUB file: {file_path}") + def _parse_epub_sync(): + book = epub.read_epub(file_path) + text_content = [] + title_meta = book.get_metadata('DC', 'title') + if title_meta: + text_content.append(f"Title: {title_meta[0][0]}") + creator_meta = book.get_metadata('DC', 'creator') + if creator_meta: + text_content.append(f"Author: {creator_meta[0][0]}") + date_meta = book.get_metadata('DC', 'date') + if date_meta: + text_content.append(f"Publish Date: {date_meta[0][0]}") + toc = book.get_toc() + if toc: + text_content.append("\n--- Table of Contents ---") + self._add_toc_items_sync(toc, text_content, level=0) # Call sync helper + text_content.append("--- End of Table of Contents ---\n") + for item in book.get_items(): + if item.get_type() == ebooklib.ITEM_DOCUMENT: + html_content = item.get_content().decode('utf-8', errors='ignore') + soup = BeautifulSoup(html_content, 'html.parser') + for junk in soup(["script", "style", "nav", "header", "footer"]): + junk.decompose() + text = soup.get_text(separator='\n', strip=True) + text = re.sub(r'\n\s*\n', '\n\n', text) + if text: + text_content.append(text) + return re.sub(r'\n\s*\n', '\n\n', '\n'.join(text_content)).strip() + return await self._run_sync(_parse_epub_sync) + + def _add_toc_items_sync(self, toc_list: list, text_content: list, level: int): + """Recursively adds TOC items to text_content (synchronous helper).""" + indent = ' ' * level + for item in toc_list: + if isinstance(item, tuple): + chapter, subchapters = item + text_content.append(f"{indent}- {chapter.title}") + self._add_toc_items_sync(subchapters, text_content, level + 1) + else: + text_content.append(f"{indent}- {item.title}") + + def _extract_table_to_markdown_sync(self, table_element: BeautifulSoup) -> str: + """Helper to convert a BeautifulSoup table element into a Markdown table string (synchronous).""" + headers = [th.get_text().strip() for th in table_element.find_all('th')] + rows = [] + for tr in table_element.find_all('tr'): + cells = [td.get_text().strip() for td in tr.find_all('td')] + if cells: + rows.append(cells) + + if not headers and not rows: + return "" + + table_lines = [] + if headers: + table_lines.append(' | '.join(headers)) + table_lines.append(' | '.join(['---'] * len(headers))) + + for row_cells in rows: + padded_cells = row_cells + [''] * (len(headers) - len(row_cells)) if headers else row_cells + table_lines.append(' | '.join(padded_cells)) + + return '\n'.join(table_lines) \ No newline at end of file diff --git a/pkg/rag/knowledge/services/retriever.py b/pkg/rag/knowledge/services/retriever.py new file mode 100644 index 00000000..6da1c5d8 --- /dev/null +++ b/pkg/rag/knowledge/services/retriever.py @@ -0,0 +1,106 @@ +# services/retriever.py +import asyncio +import logging +import numpy as np # Make sure numpy is imported +from typing import List, Dict, Any +from sqlalchemy.orm import Session +from services.base_service import BaseService +from services.database import Chunk, SessionLocal +from services.embedding_models import BaseEmbeddingModel, EmbeddingModelFactory +from services.chroma_manager import ChromaIndexManager + +logger = logging.getLogger(__name__) + +class Retriever(BaseService): + def __init__(self, model_type: str, model_name_key: str, chroma_manager: ChromaIndexManager): + super().__init__() + self.logger = logging.getLogger(self.__class__.__name__) + self.model_type = model_type + self.model_name_key = model_name_key + self.chroma_manager = chroma_manager + + self.embedding_model: BaseEmbeddingModel = self._load_embedding_model() + + def _load_embedding_model(self) -> BaseEmbeddingModel: + self.logger.info(f"Loading retriever embedding model: type={self.model_type}, name_key={self.model_name_key}...") + try: + model = EmbeddingModelFactory.create_model(self.model_type, self.model_name_key) + self.logger.info(f"Retriever embedding model '{self.model_name_key}' loaded. Output dimension: {model.embedding_dimension}") + return model + except Exception as e: + self.logger.error(f"Failed to load retriever embedding model '{self.model_name_key}': {e}") + raise + + async def retrieve(self, query: str, k: int = 5) -> List[Dict[str, Any]]: + if not self.embedding_model: + raise RuntimeError("Retriever embedding model not loaded. Please check Retriever initialization.") + + self.logger.info(f"Retrieving for query: '{query}' with k={k} using {self.model_name_key}") + + query_embedding: List[float] = await self.embedding_model.embed_query(query) + query_embedding_np = np.array([query_embedding], dtype=np.float32) + + chroma_results = await self._run_sync( + self.chroma_manager.search_sync, + query_embedding_np, k + ) + + # 'ids' is always returned by ChromaDB, even if not explicitly in 'include' + matched_chroma_ids = chroma_results.get("ids", [[]])[0] + distances = chroma_results.get("distances", [[]])[0] + chroma_metadatas = chroma_results.get("metadatas", [[]])[0] + chroma_documents = chroma_results.get("documents", [[]])[0] + + if not matched_chroma_ids: + self.logger.info("No relevant chunks found in Chroma.") + return [] + + db_chunk_ids = [] + for metadata in chroma_metadatas: + if "chunk_id" in metadata: + db_chunk_ids.append(metadata["chunk_id"]) + else: + self.logger.warning(f"Metadata missing 'chunk_id': {metadata}. Skipping this entry.") + + if not db_chunk_ids: + self.logger.warning("No valid chunk_ids extracted from Chroma results metadata.") + return [] + + self.logger.info(f"Fetching {len(db_chunk_ids)} chunk details from relational database...") + chunks_from_db = await self._run_sync( + lambda cids: self._db_get_chunks_sync(SessionLocal(), cids), # Ensure SessionLocal is passed correctly for _db_get_chunks_sync + db_chunk_ids + ) + + chunk_map = {chunk.id: chunk for chunk in chunks_from_db} + results_list: List[Dict[str, Any]] = [] + + for i, chroma_id in enumerate(matched_chroma_ids): + try: + # Ensure original_chunk_id is int for DB lookup + original_chunk_id = int(chroma_id.split('_')[-1]) + except (ValueError, IndexError): + self.logger.warning(f"Could not parse chunk_id from Chroma ID: {chroma_id}. Skipping.") + continue + + chunk_text_from_chroma = chroma_documents[i] + distance = float(distances[i]) + file_id_from_chroma = chroma_metadatas[i].get("file_id") + + chunk_from_db = chunk_map.get(original_chunk_id) + + results_list.append({ + "chunk_id": original_chunk_id, + "text": chunk_from_db.text if chunk_from_db else chunk_text_from_chroma, + "distance": distance, + "file_id": file_id_from_chroma + }) + + self.logger.info(f"Retrieved {len(results_list)} chunks.") + return results_list + + def _db_get_chunks_sync(self, session: Session, chunk_ids: List[int]) -> List[Chunk]: + self.logger.debug(f"Fetching {len(chunk_ids)} chunk details from database (sync).") + chunks = session.query(Chunk).filter(Chunk.id.in_(chunk_ids)).all() + session.close() + return chunks \ No newline at end of file diff --git a/pkg/rag/knowledge/utils/crawler.py b/pkg/rag/knowledge/utils/crawler.py new file mode 100644 index 00000000..e69de29b diff --git a/pyproject.toml b/pyproject.toml index 5e85bfb0..27a03a92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,17 @@ dependencies = [ "ruff>=0.11.9", "pre-commit>=4.2.0", "uv>=0.7.11", + "PyPDF2>=3.0.1", + "python-docx>=1.1.0", + "pandas>=2.2.2", + "chardet>=5.2.0", + "markdown>=3.6", + "beautifulsoup4>=4.12.3", + "ebooklib>=0.18", + "html2text>=2024.2.26", + "langchain>=0.2.0", + "chromadb>=0.4.24", + "sentence-transformers>=2.6.1", ] keywords = [ "bot", From c4671fbf1c046c83f8478bebb95af8865f9f6cf8 Mon Sep 17 00:00:00 2001 From: WangCham <651122857@qq.com> Date: Thu, 26 Jun 2025 14:09:26 +0800 Subject: [PATCH 017/257] delete ap --- pkg/rag/knowledge/RAG_Manager.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/rag/knowledge/RAG_Manager.py b/pkg/rag/knowledge/RAG_Manager.py index e172c132..d85699d3 100644 --- a/pkg/rag/knowledge/RAG_Manager.py +++ b/pkg/rag/knowledge/RAG_Manager.py @@ -21,7 +21,6 @@ class RAG_Manager: self.chunker = None self.embedder = None self.retriever = None - self.ap = app.Application async def initialize_system(self): await asyncio.to_thread(create_db_and_tables) From 34fe8b324d0b5e1412641ef3b6029372c2327a90 Mon Sep 17 00:00:00 2001 From: WangCham <651122857@qq.com> Date: Thu, 3 Jul 2025 23:28:47 +0800 Subject: [PATCH 018/257] feat: add functions --- .../http/controller/groups/knowledge_base.py | 27 ++++++++-------- .../controller/groups/pipelines/pipelines.py | 2 +- pkg/core/app.py | 10 +++--- pkg/core/entities.py | 2 +- pkg/core/stages/build_app.py | 7 ++++ pkg/entity/persistence/vector.py | 14 ++++++++ pkg/rag/knowledge/RAG_Manager.py | 32 +++++++++++-------- pkg/rag/knowledge/services/base_service.py | 8 ++--- pkg/rag/knowledge/services/chroma_manager.py | 4 +-- pkg/rag/knowledge/services/chunker.py | 2 +- pkg/rag/knowledge/services/embedder.py | 8 ++--- pkg/rag/knowledge/services/retriever.py | 8 ++--- 12 files changed, 75 insertions(+), 49 deletions(-) create mode 100644 pkg/entity/persistence/vector.py diff --git a/pkg/api/http/controller/groups/knowledge_base.py b/pkg/api/http/controller/groups/knowledge_base.py index c819397a..f9aa09e0 100644 --- a/pkg/api/http/controller/groups/knowledge_base.py +++ b/pkg/api/http/controller/groups/knowledge_base.py @@ -1,5 +1,4 @@ import quart -from __future__ import annotations from .. import group @group.group_class('knowledge_base', '/api/v1/knowledge/bases') @@ -16,13 +15,13 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): async def initialize(self) -> None: - rag = self.ap.knowledge_base_service.RAG_Manager() + @self.route('', methods=['POST', 'GET']) async def _() -> str: if quart.request.method == 'GET': - knowledge_bases = await rag.get_all_knowledge_bases() + knowledge_bases = await self.ap.knowledge_base_service.get_all_knowledge_bases() bases_list = [ { "uuid": kb.id, @@ -35,17 +34,19 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): msg='ok') json_data = await quart.request.json - knowledge_base_uuid = await rag.create_knowledge_base( + knowledge_base_uuid = await self.ap.knowledge_base_service.create_knowledge_base( json_data.get('name'), json_data.get('description') ) - return self.success() + return self.success(code=0, + data={}, + msg='ok') - @self.route('/', methods=['GET']) + @self.route('/', methods=['GET','DELETE']) async def _(knowledge_base_uuid: str) -> str: if quart.request.method == 'GET': - knowledge_base = await rag.get_knowledge_base_by_id(knowledge_base_uuid) + knowledge_base = await self.ap.knowledge_base_service.get_knowledge_base_by_id(knowledge_base_uuid) if knowledge_base is None: return self.http_status(404, -1, 'knowledge base not found') @@ -59,11 +60,14 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): }, msg='ok' ) + elif quart.request.method == 'DELETE': + await self.ap.knowledge_base_service.delete_kb_by_id(knowledge_base_uuid) + return self.success(code=0, msg='ok') @self.route('//files', methods=['GET']) async def _(knowledge_base_uuid: str) -> str: if quart.request.method == 'GET': - files = await rag.get_files_by_knowledge_base(knowledge_base_uuid) + files = await self.ap.knowledge_base_service.get_files_by_knowledge_base(knowledge_base_uuid) return self.success(code=0,data=[{ "id": file.id, "file_name": file.file_name, @@ -73,11 +77,6 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): # delete specific file in knowledge base @self.route('//files/', methods=['DELETE']) async def _(knowledge_base_uuid: str, file_id: str) -> str: - await rag.delete_data_by_file_id(file_id) + await self.ap.knowledge_base_service.delete_data_by_file_id(file_id) return self.success(code=0, msg='ok') - # delete specific kb - @self.route('/', methods=['DELETE']) - async def _(knowledge_base_uuid: str) -> str: - await rag.delete_kb_by_id(knowledge_base_uuid) - return self.success(code=0, msg='ok') diff --git a/pkg/api/http/controller/groups/pipelines/pipelines.py b/pkg/api/http/controller/groups/pipelines/pipelines.py index 96ca239a..1a8036cc 100644 --- a/pkg/api/http/controller/groups/pipelines/pipelines.py +++ b/pkg/api/http/controller/groups/pipelines/pipelines.py @@ -2,7 +2,7 @@ from __future__ import annotations import quart -from ... import group +from .. import group @group.group_class('pipelines', '/api/v1/pipelines') diff --git a/pkg/core/app.py b/pkg/core/app.py index d8824466..2e3c9500 100644 --- a/pkg/core/app.py +++ b/pkg/core/app.py @@ -27,10 +27,7 @@ from ..storage import mgr as storagemgr from ..utils import logcache from . import taskmgr from . import entities as core_entities -from ...pkg.rag.knowledge import RAG_Manager - - - +from pkg.rag.knowledge.RAG_Manager import RAG_Manager class Application: @@ -51,6 +48,7 @@ class Application: model_mgr: llm_model_mgr.ModelManager = None + # TODO 移动到 pipeline 里 tool_mgr: llm_tool_mgr.ToolManager = None @@ -103,7 +101,6 @@ class Application: storage_mgr: storagemgr.StorageMgr = None - knowledge_base_service: RAG_Manager = None # ========= HTTP Services ========= @@ -117,6 +114,8 @@ class Application: bot_service: bot_service.BotService = None + knowledge_base_service: RAG_Manager = None + def __init__(self): pass @@ -152,6 +151,7 @@ class Application: name='http-api-controller', scopes=[core_entities.LifecycleControlScope.APPLICATION], ) + self.task_mgr.create_task( never_ending(), name='never-ending-task', diff --git a/pkg/core/entities.py b/pkg/core/entities.py index 4caf18ed..8dc51e5b 100644 --- a/pkg/core/entities.py +++ b/pkg/core/entities.py @@ -19,7 +19,7 @@ class LifecycleControlScope(enum.Enum): APPLICATION = 'application' PLATFORM = 'platform' PLUGIN = 'plugin' - PROVIDER = 'provider' + PROVIDER = 'provider' class LauncherTypes(enum.Enum): diff --git a/pkg/core/stages/build_app.py b/pkg/core/stages/build_app.py index 482a468b..3ba468c8 100644 --- a/pkg/core/stages/build_app.py +++ b/pkg/core/stages/build_app.py @@ -9,6 +9,7 @@ from ...command import cmdmgr from ...provider.session import sessionmgr as llm_session_mgr from ...provider.modelmgr import modelmgr as llm_model_mgr from ...provider.tools import toolmgr as llm_tool_mgr +from ...rag.knowledge.RAG_Manager import RAG_Manager as knowledge_base_mgr from ...platform import botmgr as im_mgr from ...persistence import mgr as persistencemgr from ...api.http.controller import main as http_controller @@ -101,6 +102,12 @@ class BuildAppStage(stage.BootingStage): embedding_models_service_inst = model_service.EmbeddingModelsService(ap) ap.embedding_models_service = embedding_models_service_inst + knowledge_base_service_inst = knowledge_base_mgr(ap) + print("knowledge_base_service_inst1", type(knowledge_base_service_inst)) + await knowledge_base_service_inst.initialize_rag_system() + ap.knowledge_base_service = knowledge_base_service_inst + print("knowledge_base_service_inst", type(ap.knowledge_base_service)) + pipeline_service_inst = pipeline_service.PipelineService(ap) ap.pipeline_service = pipeline_service_inst diff --git a/pkg/entity/persistence/vector.py b/pkg/entity/persistence/vector.py new file mode 100644 index 00000000..84d1dfb1 --- /dev/null +++ b/pkg/entity/persistence/vector.py @@ -0,0 +1,14 @@ +from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, ForeignKey, LargeBinary +from sqlalchemy.orm import declarative_base, sessionmaker, relationship +from datetime import datetime +import numpy as np # 用于处理从LargeBinary转换回来的embedding + +Base = declarative_base() + +class Vector(Base): + __tablename__ = 'vectors' + id = Column(Integer, primary_key=True, index=True) + chunk_id = Column(Integer, ForeignKey('chunks.id'), unique=True) + embedding = Column(LargeBinary) # Store embeddings as binary + + chunk = relationship("Chunk", back_populates="vector") \ No newline at end of file diff --git a/pkg/rag/knowledge/RAG_Manager.py b/pkg/rag/knowledge/RAG_Manager.py index d85699d3..292f23ce 100644 --- a/pkg/rag/knowledge/RAG_Manager.py +++ b/pkg/rag/knowledge/RAG_Manager.py @@ -1,18 +1,24 @@ # RAG_Manager class (main class, adjust imports as needed) +from __future__ import annotations # For type hinting in Python 3.7+ import logging import os import asyncio -from services.parser import FileParser -from services.chunker import Chunker -from services.embedder import Embedder -from services.retriever import Retriever -from services.database import create_db_and_tables, SessionLocal, KnowledgeBase, File, Chunk # Ensure Chunk is imported if you need to manipulate it directly -from services.embedding_models import EmbeddingModelFactory -from services.chroma_manager import ChromaIndexManager -from ...core import app +from pkg.rag.knowledge.services.parser import FileParser +from pkg.rag.knowledge.services.chunker import Chunker +from pkg.rag.knowledge.services.embedder import Embedder +from pkg.rag.knowledge.services.retriever import Retriever +from pkg.rag.knowledge.services.database import create_db_and_tables, SessionLocal, KnowledgeBase, File, Chunk # Ensure Chunk is imported if you need to manipulate it directly +from pkg.rag.knowledge.services.embedding_models import EmbeddingModelFactory +from pkg.rag.knowledge.services.chroma_manager import ChromaIndexManager +from pkg.core import app # Adjust the import path as needed + class RAG_Manager: - def __init__(self, logger: logging.Logger = None): + + ap: app.Application + + def __init__(self, ap: app.Application,logger: logging.Logger = None): + self.ap = ap self.logger = logger or logging.getLogger(__name__) self.embedding_model_type = None self.embedding_model_name = None @@ -21,11 +27,11 @@ class RAG_Manager: self.chunker = None self.embedder = None self.retriever = None - - async def initialize_system(self): + + async def initialize_rag_system(self): await asyncio.to_thread(create_db_and_tables) - async def create_model(self, embedding_model_type: str, + async def create_specific_model(self, embedding_model_type: str, embedding_model_name: str): self.embedding_model_type = embedding_model_type self.embedding_model_name = embedding_model_name @@ -57,7 +63,7 @@ class RAG_Manager: ) - async def create_knowledge_base(self, kb_name: str, kb_description: str): + async def create_knowledge_base(self, kb_name: str, kb_description: str ,): """ Creates a new knowledge base with the given name and description. If a knowledge base with the same name already exists, it returns that one. diff --git a/pkg/rag/knowledge/services/base_service.py b/pkg/rag/knowledge/services/base_service.py index 0298226a..4ff1ce39 100644 --- a/pkg/rag/knowledge/services/base_service.py +++ b/pkg/rag/knowledge/services/base_service.py @@ -1,20 +1,20 @@ # 封装异步操作 import asyncio import logging -from services.database import SessionLocal # 导入 SessionLocal 工厂函数 +from pkg.rag.knowledge.services.database import SessionLocal class BaseService: def __init__(self): self.logger = logging.getLogger(self.__class__.__name__) - self.db_session_factory = SessionLocal # 使用 SessionLocal 工厂函数 + self.db_session_factory = SessionLocal async def _run_sync(self, func, *args, **kwargs): """ 在单独的线程中运行同步函数。 如果第一个参数是 session,则在 to_thread 中获取新的 session。 """ - # 如果函数需要数据库会话作为第一个参数,我们在这里获取它 - if getattr(func, '__name__', '').startswith('_db_'): # 约定:数据库操作的同步方法以 _db_ 开头 + + if getattr(func, '__name__', '').startswith('_db_'): session = await asyncio.to_thread(self.db_session_factory) try: result = await asyncio.to_thread(func, session, *args, **kwargs) diff --git a/pkg/rag/knowledge/services/chroma_manager.py b/pkg/rag/knowledge/services/chroma_manager.py index 6a469168..f8020cdb 100644 --- a/pkg/rag/knowledge/services/chroma_manager.py +++ b/pkg/rag/knowledge/services/chroma_manager.py @@ -1,4 +1,4 @@ -# services/chroma_manager.py + import numpy as np import logging from chromadb import PersistentClient @@ -9,7 +9,7 @@ logger = logging.getLogger(__name__) class ChromaIndexManager: def __init__(self, collection_name: str = "default_collection"): self.logger = logging.getLogger(self.__class__.__name__) - chroma_data_path = "./chroma_data" + chroma_data_path = os.path.abspath(os.path.join(__file__, "../../../../../../data/chroma")) os.makedirs(chroma_data_path, exist_ok=True) self.client = PersistentClient(path=chroma_data_path) self._collection_name = collection_name diff --git a/pkg/rag/knowledge/services/chunker.py b/pkg/rag/knowledge/services/chunker.py index f115dac4..17202a7a 100644 --- a/pkg/rag/knowledge/services/chunker.py +++ b/pkg/rag/knowledge/services/chunker.py @@ -1,7 +1,7 @@ # services/chunker.py import logging from typing import List -from services.base_service import BaseService # Assuming BaseService provides _run_sync +from pkg.rag.knowledge.services.base_service import BaseService # Assuming BaseService provides _run_sync logger = logging.getLogger(__name__) diff --git a/pkg/rag/knowledge/services/embedder.py b/pkg/rag/knowledge/services/embedder.py index 2b581e96..7e20b19a 100644 --- a/pkg/rag/knowledge/services/embedder.py +++ b/pkg/rag/knowledge/services/embedder.py @@ -4,10 +4,10 @@ import logging import numpy as np from typing import List from sqlalchemy.orm import Session -from services.base_service import BaseService -from services.database import Chunk, SessionLocal -from services.embedding_models import BaseEmbeddingModel, EmbeddingModelFactory -from services.chroma_manager import ChromaIndexManager # Import the manager +from pkg.rag.knowledge.services.base_service import BaseService +from pkg.rag.knowledge.services.database import Chunk, SessionLocal +from pkg.rag.knowledge.services.embedding_models import BaseEmbeddingModel, EmbeddingModelFactory +from pkg.rag.knowledge.services.chroma_manager import ChromaIndexManager # Import the manager logger = logging.getLogger(__name__) diff --git a/pkg/rag/knowledge/services/retriever.py b/pkg/rag/knowledge/services/retriever.py index 6da1c5d8..4da81eb1 100644 --- a/pkg/rag/knowledge/services/retriever.py +++ b/pkg/rag/knowledge/services/retriever.py @@ -4,10 +4,10 @@ import logging import numpy as np # Make sure numpy is imported from typing import List, Dict, Any from sqlalchemy.orm import Session -from services.base_service import BaseService -from services.database import Chunk, SessionLocal -from services.embedding_models import BaseEmbeddingModel, EmbeddingModelFactory -from services.chroma_manager import ChromaIndexManager +from pkg.rag.knowledge.services.base_service import BaseService +from pkg.rag.knowledge.services.database import Chunk, SessionLocal +from pkg.rag.knowledge.services.embedding_models import BaseEmbeddingModel, EmbeddingModelFactory +from pkg.rag.knowledge.services.chroma_manager import ChromaIndexManager logger = logging.getLogger(__name__) From 552fee9bacf0ade89cabd77ee12f886c7d8693a1 Mon Sep 17 00:00:00 2001 From: WangCham <651122857@qq.com> Date: Sat, 5 Jul 2025 17:53:11 +0800 Subject: [PATCH 019/257] fix: modify rag database --- pkg/rag/knowledge/RAG_Manager.py | 6 +++--- pkg/rag/knowledge/services/database.py | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pkg/rag/knowledge/RAG_Manager.py b/pkg/rag/knowledge/RAG_Manager.py index 292f23ce..6ded737a 100644 --- a/pkg/rag/knowledge/RAG_Manager.py +++ b/pkg/rag/knowledge/RAG_Manager.py @@ -62,8 +62,8 @@ class RAG_Manager: chroma_manager=self.chroma_manager # Inject dependency ) - - async def create_knowledge_base(self, kb_name: str, kb_description: str ,): + + async def create_knowledge_base(self, kb_name: str, kb_description: str, embedding_model: str = "", top_k: int = 5): """ Creates a new knowledge base with the given name and description. If a knowledge base with the same name already exists, it returns that one. @@ -82,7 +82,7 @@ class RAG_Manager: def _add_kb_sync(): session = SessionLocal() try: - new_kb = KnowledgeBase(name=kb_name, description=kb_description) + new_kb = KnowledgeBase(name=kb_name, description=kb_description, embedding_model=embedding_model, top_k=top_k) session.add(new_kb) session.commit() session.refresh(new_kb) diff --git a/pkg/rag/knowledge/services/database.py b/pkg/rag/knowledge/services/database.py index 4ec21af3..a8c35883 100644 --- a/pkg/rag/knowledge/services/database.py +++ b/pkg/rag/knowledge/services/database.py @@ -11,7 +11,8 @@ class KnowledgeBase(Base): name = Column(String, index=True) description = Column(Text) created_at = Column(DateTime, default=datetime.utcnow) - + embedding_model = Column(String, default="") # 默认嵌入模型 + top_k = Column(Integer, default=5) # 默认返回的top_k数量 files = relationship("File", back_populates="knowledge_base") class File(Base): From d2b93b3296b24028b5edc13d4a283b296b8ff12d Mon Sep 17 00:00:00 2001 From: "devin-ai-integration[bot]" <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 21 May 2025 12:42:39 +0800 Subject: [PATCH 020/257] feat: add embeddings model management (#1461) * feat: add embeddings model management backend support Co-Authored-By: Junyan Qin * feat: add embeddings model management frontend support Co-Authored-By: Junyan Qin * chore: revert HttpClient URL to production setting Co-Authored-By: Junyan Qin * refactor: integrate embeddings models into models page with tabs Co-Authored-By: Junyan Qin * perf: move files * perf: remove `s` * feat: allow requester to declare supported types in manifest * feat(embedding): delete dimension and encoding format * feat: add extra_args for embedding moels * perf: i18n ref * fix: linter err * fix: lint err * fix: linter err --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: Junyan Qin --- .../http/controller/groups/provider/models.py | 55 +- .../controller/groups/provider/requesters.py | 3 +- pkg/api/http/service/model.py | 89 ++- pkg/core/app.py | 4 +- pkg/core/stages/build_app.py | 7 +- pkg/entity/persistence/model.py | 21 + pkg/provider/modelmgr/entities.py | 2 +- pkg/provider/modelmgr/modelmgr.py | 89 ++- pkg/provider/modelmgr/requester.py | 51 +- .../modelmgr/requesters/anthropicmsgs.py | 2 +- .../modelmgr/requesters/anthropicmsgs.yaml | 2 + .../modelmgr/requesters/bailianchatcmpl.yaml | 2 + pkg/provider/modelmgr/requesters/chatcmpl.py | 38 +- .../modelmgr/requesters/chatcmpl.yaml | 3 + .../modelmgr/requesters/deepseekchatcmpl.yaml | 2 + .../modelmgr/requesters/geminichatcmpl.yaml | 2 + .../modelmgr/requesters/giteeaichatcmpl.yaml | 2 + .../modelmgr/requesters/lmstudiochatcmpl.yaml | 2 + .../modelmgr/requesters/modelscopechatcmpl.py | 2 +- .../requesters/modelscopechatcmpl.yaml | 2 + .../modelmgr/requesters/moonshotchatcmpl.yaml | 2 + .../modelmgr/requesters/ollamachat.py | 2 +- .../modelmgr/requesters/ollamachat.yaml | 2 + .../requesters/openrouterchatcmpl.yaml | 2 + .../modelmgr/requesters/ppiochatcmpl.yaml | 2 + .../requesters/siliconflowchatcmpl.yaml | 2 + .../modelmgr/requesters/volcarkchatcmpl.yaml | 2 + .../modelmgr/requesters/xaichatcmpl.yaml | 2 + .../modelmgr/requesters/zhipuaichatcmpl.yaml | 2 + .../home-sidebar/sidbarConfigList.tsx | 1 + .../{llm-form => }/ChooseRequesterEntity.ts | 0 .../models/component/ICreateEmbeddingField.ts | 7 + .../models/{ => component}/ICreateLLMField.ts | 0 .../embedding-card/EmbeddingCard.module.css | 97 +++ .../embedding-card/EmbeddingCard.tsx | 53 ++ .../embedding-card/EmbeddingCardVO.ts | 23 + .../embedding-form/EmbeddingForm.tsx | 563 ++++++++++++++++++ .../models/component/llm-form/LLMForm.tsx | 8 +- web/src/app/home/models/page.tsx | 183 +++++- web/src/app/infra/entities/api/index.ts | 23 + web/src/app/infra/http/HttpClient.ts | 42 +- web/src/i18n/locales/en-US.ts | 18 +- web/src/i18n/locales/zh-Hans.ts | 18 +- 43 files changed, 1370 insertions(+), 64 deletions(-) rename web/src/app/home/models/component/{llm-form => }/ChooseRequesterEntity.ts (100%) create mode 100644 web/src/app/home/models/component/ICreateEmbeddingField.ts rename web/src/app/home/models/{ => component}/ICreateLLMField.ts (100%) create mode 100644 web/src/app/home/models/component/embedding-card/EmbeddingCard.module.css create mode 100644 web/src/app/home/models/component/embedding-card/EmbeddingCard.tsx create mode 100644 web/src/app/home/models/component/embedding-card/EmbeddingCardVO.ts create mode 100644 web/src/app/home/models/component/embedding-form/EmbeddingForm.tsx diff --git a/pkg/api/http/controller/groups/provider/models.py b/pkg/api/http/controller/groups/provider/models.py index bb77986c..0de0c922 100644 --- a/pkg/api/http/controller/groups/provider/models.py +++ b/pkg/api/http/controller/groups/provider/models.py @@ -9,18 +9,18 @@ class LLMModelsRouterGroup(group.RouterGroup): @self.route('', methods=['GET', 'POST']) async def _() -> str: if quart.request.method == 'GET': - return self.success(data={'models': await self.ap.model_service.get_llm_models()}) + return self.success(data={'models': await self.ap.llm_model_service.get_llm_models()}) elif quart.request.method == 'POST': json_data = await quart.request.json - model_uuid = await self.ap.model_service.create_llm_model(json_data) + model_uuid = await self.ap.llm_model_service.create_llm_model(json_data) return self.success(data={'uuid': model_uuid}) @self.route('/', methods=['GET', 'PUT', 'DELETE']) async def _(model_uuid: str) -> str: if quart.request.method == 'GET': - model = await self.ap.model_service.get_llm_model(model_uuid) + model = await self.ap.llm_model_service.get_llm_model(model_uuid) if model is None: return self.http_status(404, -1, 'model not found') @@ -29,11 +29,11 @@ class LLMModelsRouterGroup(group.RouterGroup): elif quart.request.method == 'PUT': json_data = await quart.request.json - await self.ap.model_service.update_llm_model(model_uuid, json_data) + await self.ap.llm_model_service.update_llm_model(model_uuid, json_data) return self.success() elif quart.request.method == 'DELETE': - await self.ap.model_service.delete_llm_model(model_uuid) + await self.ap.llm_model_service.delete_llm_model(model_uuid) return self.success() @@ -41,6 +41,49 @@ class LLMModelsRouterGroup(group.RouterGroup): async def _(model_uuid: str) -> str: json_data = await quart.request.json - await self.ap.model_service.test_llm_model(model_uuid, json_data) + await self.ap.llm_model_service.test_llm_model(model_uuid, json_data) + + return self.success() + + +@group.group_class('models/embedding', '/api/v1/provider/models/embedding') +class EmbeddingModelsRouterGroup(group.RouterGroup): + async def initialize(self) -> None: + @self.route('', methods=['GET', 'POST']) + async def _() -> str: + if quart.request.method == 'GET': + return self.success(data={'models': await self.ap.embedding_models_service.get_embedding_models()}) + elif quart.request.method == 'POST': + json_data = await quart.request.json + + model_uuid = await self.ap.embedding_models_service.create_embedding_model(json_data) + + return self.success(data={'uuid': model_uuid}) + + @self.route('/', methods=['GET', 'PUT', 'DELETE']) + async def _(model_uuid: str) -> str: + if quart.request.method == 'GET': + model = await self.ap.embedding_models_service.get_embedding_model(model_uuid) + + if model is None: + return self.http_status(404, -1, 'model not found') + + return self.success(data={'model': model}) + elif quart.request.method == 'PUT': + json_data = await quart.request.json + + await self.ap.embedding_models_service.update_embedding_model(model_uuid, json_data) + + return self.success() + elif quart.request.method == 'DELETE': + await self.ap.embedding_models_service.delete_embedding_model(model_uuid) + + return self.success() + + @self.route('//test', methods=['POST']) + async def _(model_uuid: str) -> str: + json_data = await quart.request.json + + await self.ap.embedding_models_service.test_embedding_model(model_uuid, json_data) return self.success() diff --git a/pkg/api/http/controller/groups/provider/requesters.py b/pkg/api/http/controller/groups/provider/requesters.py index 0f999288..af9e1540 100644 --- a/pkg/api/http/controller/groups/provider/requesters.py +++ b/pkg/api/http/controller/groups/provider/requesters.py @@ -8,7 +8,8 @@ class RequestersRouterGroup(group.RouterGroup): async def initialize(self) -> None: @self.route('', methods=['GET']) async def _() -> quart.Response: - return self.success(data={'requesters': self.ap.model_mgr.get_available_requesters_info()}) + model_type = quart.request.args.get('type', '') + return self.success(data={'requesters': self.ap.model_mgr.get_available_requesters_info(model_type)}) @self.route('/', methods=['GET']) async def _(requester_name: str) -> quart.Response: diff --git a/pkg/api/http/service/model.py b/pkg/api/http/service/model.py index 74fb4e02..afeae3eb 100644 --- a/pkg/api/http/service/model.py +++ b/pkg/api/http/service/model.py @@ -10,7 +10,7 @@ from ....provider.modelmgr import requester as model_requester from ....provider import entities as llm_entities -class ModelsService: +class LLMModelsService: ap: app.Application def __init__(self, ap: app.Application) -> None: @@ -103,3 +103,90 @@ class ModelsService: funcs=[], extra_args={}, ) + + +class EmbeddingModelsService: + ap: app.Application + + def __init__(self, ap: app.Application) -> None: + self.ap = ap + + async def get_embedding_models(self) -> list[dict]: + result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_model.EmbeddingModel)) + + models = result.all() + return [self.ap.persistence_mgr.serialize_model(persistence_model.EmbeddingModel, model) for model in models] + + async def create_embedding_model(self, model_data: dict) -> str: + model_data['uuid'] = str(uuid.uuid4()) + + await self.ap.persistence_mgr.execute_async( + sqlalchemy.insert(persistence_model.EmbeddingModel).values(**model_data) + ) + + embedding_model = await self.get_embedding_model(model_data['uuid']) + + await self.ap.model_mgr.load_embedding_model(embedding_model) + + return model_data['uuid'] + + async def get_embedding_model(self, model_uuid: str) -> dict | None: + result = await self.ap.persistence_mgr.execute_async( + sqlalchemy.select(persistence_model.EmbeddingModel).where( + persistence_model.EmbeddingModel.uuid == model_uuid + ) + ) + + model = result.first() + + if model is None: + return None + + return self.ap.persistence_mgr.serialize_model(persistence_model.EmbeddingModel, model) + + async def update_embedding_model(self, model_uuid: str, model_data: dict) -> None: + if 'uuid' in model_data: + del model_data['uuid'] + + await self.ap.persistence_mgr.execute_async( + sqlalchemy.update(persistence_model.EmbeddingModel) + .where(persistence_model.EmbeddingModel.uuid == model_uuid) + .values(**model_data) + ) + + await self.ap.model_mgr.remove_embedding_model(model_uuid) + + embedding_model = await self.get_embedding_model(model_uuid) + + await self.ap.model_mgr.load_embedding_model(embedding_model) + + async def delete_embedding_model(self, model_uuid: str) -> None: + await self.ap.persistence_mgr.execute_async( + sqlalchemy.delete(persistence_model.EmbeddingModel).where( + persistence_model.EmbeddingModel.uuid == model_uuid + ) + ) + + await self.ap.model_mgr.remove_embedding_model(model_uuid) + + async def test_embedding_model(self, model_uuid: str, model_data: dict) -> None: + runtime_embedding_model: model_requester.RuntimeEmbeddingModel | None = None + + if model_uuid != '_': + for model in self.ap.model_mgr.embedding_models: + if model.model_entity.uuid == model_uuid: + runtime_embedding_model = model + break + + if runtime_embedding_model is None: + raise Exception('model not found') + + else: + runtime_embedding_model = await self.ap.model_mgr.init_runtime_embedding_model(model_data) + + await runtime_embedding_model.requester.invoke_embedding( + query=None, + model=runtime_embedding_model, + input_text='Hello, world!', + extra_args={}, + ) diff --git a/pkg/core/app.py b/pkg/core/app.py index 911acd3d..318cddcb 100644 --- a/pkg/core/app.py +++ b/pkg/core/app.py @@ -103,7 +103,9 @@ class Application: user_service: user_service.UserService = None - model_service: model_service.ModelsService = None + llm_model_service: model_service.LLMModelsService = None + + embedding_models_service: model_service.EmbeddingModelsService = None pipeline_service: pipeline_service.PipelineService = None diff --git a/pkg/core/stages/build_app.py b/pkg/core/stages/build_app.py index 6ee35610..482a468b 100644 --- a/pkg/core/stages/build_app.py +++ b/pkg/core/stages/build_app.py @@ -95,8 +95,11 @@ class BuildAppStage(stage.BootingStage): user_service_inst = user_service.UserService(ap) ap.user_service = user_service_inst - model_service_inst = model_service.ModelsService(ap) - ap.model_service = model_service_inst + llm_model_service_inst = model_service.LLMModelsService(ap) + ap.llm_model_service = llm_model_service_inst + + embedding_models_service_inst = model_service.EmbeddingModelsService(ap) + ap.embedding_models_service = embedding_models_service_inst pipeline_service_inst = pipeline_service.PipelineService(ap) ap.pipeline_service = pipeline_service_inst diff --git a/pkg/entity/persistence/model.py b/pkg/entity/persistence/model.py index 9eb2ccef..418cab70 100644 --- a/pkg/entity/persistence/model.py +++ b/pkg/entity/persistence/model.py @@ -23,3 +23,24 @@ class LLMModel(Base): server_default=sqlalchemy.func.now(), onupdate=sqlalchemy.func.now(), ) + + +class EmbeddingModel(Base): + """Embedding 模型""" + + __tablename__ = 'embedding_models' + + uuid = sqlalchemy.Column(sqlalchemy.String(255), primary_key=True, unique=True) + name = sqlalchemy.Column(sqlalchemy.String(255), nullable=False) + description = sqlalchemy.Column(sqlalchemy.String(255), nullable=False) + requester = sqlalchemy.Column(sqlalchemy.String(255), nullable=False) + requester_config = sqlalchemy.Column(sqlalchemy.JSON, nullable=False, default={}) + api_keys = sqlalchemy.Column(sqlalchemy.JSON, nullable=False) + extra_args = sqlalchemy.Column(sqlalchemy.JSON, nullable=False, default={}) + created_at = sqlalchemy.Column(sqlalchemy.DateTime, nullable=False, server_default=sqlalchemy.func.now()) + updated_at = sqlalchemy.Column( + sqlalchemy.DateTime, + nullable=False, + server_default=sqlalchemy.func.now(), + onupdate=sqlalchemy.func.now(), + ) diff --git a/pkg/provider/modelmgr/entities.py b/pkg/provider/modelmgr/entities.py index cf856894..7bc02a32 100644 --- a/pkg/provider/modelmgr/entities.py +++ b/pkg/provider/modelmgr/entities.py @@ -17,7 +17,7 @@ class LLMModelInfo(pydantic.BaseModel): token_mgr: token.TokenManager - requester: requester.LLMAPIRequester + requester: requester.ProviderAPIRequester tool_call_supported: typing.Optional[bool] = False diff --git a/pkg/provider/modelmgr/modelmgr.py b/pkg/provider/modelmgr/modelmgr.py index b15e53a9..2c92eacc 100644 --- a/pkg/provider/modelmgr/modelmgr.py +++ b/pkg/provider/modelmgr/modelmgr.py @@ -18,7 +18,7 @@ class ModelManager: model_list: list[entities.LLMModelInfo] # deprecated - requesters: dict[str, requester.LLMAPIRequester] # deprecated + requesters: dict[str, requester.ProviderAPIRequester] # deprecated token_mgrs: dict[str, token.TokenManager] # deprecated @@ -28,9 +28,11 @@ class ModelManager: llm_models: list[requester.RuntimeLLMModel] + embedding_models: list[requester.RuntimeEmbeddingModel] + requester_components: list[engine.Component] - requester_dict: dict[str, type[requester.LLMAPIRequester]] # cache + requester_dict: dict[str, type[requester.ProviderAPIRequester]] # cache def __init__(self, ap: app.Application): self.ap = ap @@ -38,6 +40,7 @@ class ModelManager: self.requesters = {} self.token_mgrs = {} self.llm_models = [] + self.embedding_models = [] self.requester_components = [] self.requester_dict = {} @@ -45,7 +48,7 @@ class ModelManager: self.requester_components = self.ap.discover.get_components_by_kind('LLMAPIRequester') # forge requester class dict - requester_dict: dict[str, type[requester.LLMAPIRequester]] = {} + requester_dict: dict[str, type[requester.ProviderAPIRequester]] = {} for component in self.requester_components: requester_dict[component.metadata.name] = component.get_python_component_class() @@ -58,13 +61,11 @@ class ModelManager: self.ap.logger.info('Loading models from db...') self.llm_models = [] + self.embedding_models = [] # llm models result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_model.LLMModel)) - llm_models = result.all() - - # load models for llm_model in llm_models: try: await self.load_llm_model(llm_model) @@ -73,11 +74,17 @@ class ModelManager: except Exception as e: self.ap.logger.error(f'Failed to load model {llm_model.uuid}: {e}\n{traceback.format_exc()}') + # embedding models + result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_model.EmbeddingModel)) + embedding_models = result.all() + for embedding_model in embedding_models: + await self.load_embedding_model(embedding_model) + async def init_runtime_llm_model( self, model_info: persistence_model.LLMModel | sqlalchemy.Row[persistence_model.LLMModel] | dict, ): - """初始化运行时模型""" + """初始化运行时 LLM 模型""" if isinstance(model_info, sqlalchemy.Row): model_info = persistence_model.LLMModel(**model_info._mapping) elif isinstance(model_info, dict): @@ -101,14 +108,47 @@ class ModelManager: return runtime_llm_model + async def init_runtime_embedding_model( + self, + model_info: persistence_model.EmbeddingModel | sqlalchemy.Row[persistence_model.EmbeddingModel] | dict, + ): + """初始化运行时 Embedding 模型""" + if isinstance(model_info, sqlalchemy.Row): + model_info = persistence_model.EmbeddingModel(**model_info._mapping) + elif isinstance(model_info, dict): + model_info = persistence_model.EmbeddingModel(**model_info) + + requester_inst = self.requester_dict[model_info.requester](ap=self.ap, config=model_info.requester_config) + + await requester_inst.initialize() + + runtime_embedding_model = requester.RuntimeEmbeddingModel( + model_entity=model_info, + token_mgr=token.TokenManager( + name=model_info.uuid, + tokens=model_info.api_keys, + ), + requester=requester_inst, + ) + + return runtime_embedding_model + async def load_llm_model( self, model_info: persistence_model.LLMModel | sqlalchemy.Row[persistence_model.LLMModel] | dict, ): - """加载模型""" + """加载 LLM 模型""" runtime_llm_model = await self.init_runtime_llm_model(model_info) self.llm_models.append(runtime_llm_model) + async def load_embedding_model( + self, + model_info: persistence_model.EmbeddingModel | sqlalchemy.Row[persistence_model.EmbeddingModel] | dict, + ): + """加载 Embedding 模型""" + runtime_embedding_model = await self.init_runtime_embedding_model(model_info) + self.embedding_models.append(runtime_embedding_model) + async def get_model_by_name(self, name: str) -> entities.LLMModelInfo: # deprecated """通过名称获取模型""" for model in self.model_list: @@ -116,23 +156,44 @@ class ModelManager: return model raise ValueError(f'无法确定模型 {name} 的信息') - async def get_model_by_uuid(self, uuid: str) -> entities.LLMModelInfo: - """通过uuid获取模型""" + async def get_model_by_uuid(self, uuid: str) -> requester.RuntimeLLMModel: + """通过uuid获取 LLM 模型""" for model in self.llm_models: if model.model_entity.uuid == uuid: return model - raise ValueError(f'model {uuid} not found') + raise ValueError(f'LLM model {uuid} not found') + + async def get_embedding_model_by_uuid(self, uuid: str) -> requester.RuntimeEmbeddingModel: + """通过uuid获取 Embedding 模型""" + for model in self.embedding_models: + if model.model_entity.uuid == uuid: + return model + raise ValueError(f'Embedding model {uuid} not found') async def remove_llm_model(self, model_uuid: str): - """移除模型""" + """移除 LLM 模型""" for model in self.llm_models: if model.model_entity.uuid == model_uuid: self.llm_models.remove(model) return - def get_available_requesters_info(self) -> list[dict]: + async def remove_embedding_model(self, model_uuid: str): + """移除 Embedding 模型""" + for model in self.embedding_models: + if model.model_entity.uuid == model_uuid: + self.embedding_models.remove(model) + return + + def get_available_requesters_info(self, model_type: str) -> list[dict]: """获取所有可用的请求器""" - return [component.to_plain_dict() for component in self.requester_components] + if model_type != '': + return [ + component.to_plain_dict() + for component in self.requester_components + if model_type in component.spec['support_type'] + ] + else: + return [component.to_plain_dict() for component in self.requester_components] def get_available_requester_info_by_name(self, name: str) -> dict | None: """通过名称获取请求器信息""" diff --git a/pkg/provider/modelmgr/requester.py b/pkg/provider/modelmgr/requester.py index 244f4c82..9742a52c 100644 --- a/pkg/provider/modelmgr/requester.py +++ b/pkg/provider/modelmgr/requester.py @@ -20,22 +20,45 @@ class RuntimeLLMModel: token_mgr: token.TokenManager """api key管理器""" - requester: LLMAPIRequester + requester: ProviderAPIRequester """请求器实例""" def __init__( self, model_entity: persistence_model.LLMModel, token_mgr: token.TokenManager, - requester: LLMAPIRequester, + requester: ProviderAPIRequester, ): self.model_entity = model_entity self.token_mgr = token_mgr self.requester = requester -class LLMAPIRequester(metaclass=abc.ABCMeta): - """LLM API请求器""" +class RuntimeEmbeddingModel: + """运行时 Embedding 模型""" + + model_entity: persistence_model.EmbeddingModel + """模型数据""" + + token_mgr: token.TokenManager + """api key管理器""" + + requester: ProviderAPIRequester + """请求器实例""" + + def __init__( + self, + model_entity: persistence_model.EmbeddingModel, + token_mgr: token.TokenManager, + requester: ProviderAPIRequester, + ): + self.model_entity = model_entity + self.token_mgr = token_mgr + self.requester = requester + + +class ProviderAPIRequester(metaclass=abc.ABCMeta): + """Provider API请求器""" name: str = None @@ -74,3 +97,23 @@ class LLMAPIRequester(metaclass=abc.ABCMeta): llm_entities.Message: 返回消息对象 """ pass + + async def invoke_embedding( + self, + query: core_entities.Query, + model: RuntimeEmbeddingModel, + input_text: str, + extra_args: dict[str, typing.Any] = {}, + ) -> list[float]: + """调用 Embedding API + + Args: + query (core_entities.Query): 请求上下文 + model (RuntimeEmbeddingModel): 使用的模型信息 + input_text (str): 输入文本 + extra_args (dict[str, typing.Any], optional): 额外的参数. Defaults to {}. + + Returns: + list[float]: 返回的 embedding 向量 + """ + pass diff --git a/pkg/provider/modelmgr/requesters/anthropicmsgs.py b/pkg/provider/modelmgr/requesters/anthropicmsgs.py index 38573854..b195ae51 100644 --- a/pkg/provider/modelmgr/requesters/anthropicmsgs.py +++ b/pkg/provider/modelmgr/requesters/anthropicmsgs.py @@ -15,7 +15,7 @@ from ...tools import entities as tools_entities from ....utils import image -class AnthropicMessages(requester.LLMAPIRequester): +class AnthropicMessages(requester.ProviderAPIRequester): """Anthropic Messages API 请求器""" client: anthropic.AsyncAnthropic diff --git a/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml b/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml index c124fed9..7dbcf3ed 100644 --- a/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml +++ b/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./anthropicmsgs.py diff --git a/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml b/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml index 24beb915..10aae30f 100644 --- a/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./bailianchatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/chatcmpl.py b/pkg/provider/modelmgr/requesters/chatcmpl.py index 513086e5..98d1f13a 100644 --- a/pkg/provider/modelmgr/requesters/chatcmpl.py +++ b/pkg/provider/modelmgr/requesters/chatcmpl.py @@ -13,7 +13,7 @@ from ... import entities as llm_entities from ...tools import entities as tools_entities -class OpenAIChatCompletions(requester.LLMAPIRequester): +class OpenAIChatCompletions(requester.ProviderAPIRequester): """OpenAI ChatCompletion API 请求器""" client: openai.AsyncClient @@ -141,3 +141,39 @@ class OpenAIChatCompletions(requester.LLMAPIRequester): raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}') except openai.APIError as e: raise errors.RequesterError(f'请求错误: {e.message}') + + async def invoke_embedding( + self, + query: core_entities.Query, + model: requester.RuntimeEmbeddingModel, + input_text: str, + extra_args: dict[str, typing.Any] = {}, + ) -> list[float]: + """调用 Embedding API""" + self.client.api_key = model.token_mgr.get_token() + + args = { + 'model': model.model_entity.name, + 'input': input_text, + } + + if model.model_entity.extra_args: + args.update(model.model_entity.extra_args) + + args.update(extra_args) + + try: + resp = await self.client.embeddings.create(**args) + return resp.data[0].embedding + except asyncio.TimeoutError: + raise errors.RequesterError('请求超时') + except openai.BadRequestError as e: + raise errors.RequesterError(f'请求参数错误: {e.message}') + except openai.AuthenticationError as e: + raise errors.RequesterError(f'无效的 api-key: {e.message}') + except openai.NotFoundError as e: + raise errors.RequesterError(f'请求路径错误: {e.message}') + except openai.RateLimitError as e: + raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}') + except openai.APIError as e: + raise errors.RequesterError(f'请求错误: {e.message}') diff --git a/pkg/provider/modelmgr/requesters/chatcmpl.yaml b/pkg/provider/modelmgr/requesters/chatcmpl.yaml index 908b30ac..ff0de6f9 100644 --- a/pkg/provider/modelmgr/requesters/chatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/chatcmpl.yaml @@ -22,6 +22,9 @@ spec: type: integer required: true default: 120 + support_type: + - llm + - text-embedding execution: python: path: ./chatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml b/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml index ea2c7eea..6f320e66 100644 --- a/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./deepseekchatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml b/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml index 6bfc085e..73fca19c 100644 --- a/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./geminichatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml b/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml index a18675a1..3a79bb49 100644 --- a/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./giteeaichatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml b/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml index 893235b2..fbe57dad 100644 --- a/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./lmstudiochatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py b/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py index b8868f4d..4708f671 100644 --- a/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py +++ b/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py @@ -14,7 +14,7 @@ from ... import entities as llm_entities from ...tools import entities as tools_entities -class ModelScopeChatCompletions(requester.LLMAPIRequester): +class ModelScopeChatCompletions(requester.ProviderAPIRequester): """ModelScope ChatCompletion API 请求器""" client: openai.AsyncClient diff --git a/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml b/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml index a641a672..a926d889 100644 --- a/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml @@ -29,6 +29,8 @@ spec: type: int required: true default: 120 + support_type: + - llm execution: python: path: ./modelscopechatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml b/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml index f3ae73c8..52f7bcda 100644 --- a/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./moonshotchatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/ollamachat.py b/pkg/provider/modelmgr/requesters/ollamachat.py index 2ea4bb7d..1456515f 100644 --- a/pkg/provider/modelmgr/requesters/ollamachat.py +++ b/pkg/provider/modelmgr/requesters/ollamachat.py @@ -17,7 +17,7 @@ from ....core import entities as core_entities REQUESTER_NAME: str = 'ollama-chat' -class OllamaChatCompletions(requester.LLMAPIRequester): +class OllamaChatCompletions(requester.ProviderAPIRequester): """Ollama平台 ChatCompletion API请求器""" client: ollama.AsyncClient diff --git a/pkg/provider/modelmgr/requesters/ollamachat.yaml b/pkg/provider/modelmgr/requesters/ollamachat.yaml index 01435775..f4c4bf5a 100644 --- a/pkg/provider/modelmgr/requesters/ollamachat.yaml +++ b/pkg/provider/modelmgr/requesters/ollamachat.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./ollamachat.py diff --git a/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml b/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml index 2ecee6cc..ea35bce6 100644 --- a/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./openrouterchatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml b/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml index 9f201aa9..a5a3421c 100644 --- a/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml @@ -29,6 +29,8 @@ spec: type: int required: true default: 120 + support_type: + - llm execution: python: path: ./ppiochatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml b/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml index 19b3dcc3..3872cb6f 100644 --- a/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./siliconflowchatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml b/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml index 402f04e7..c711ef2d 100644 --- a/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./volcarkchatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml b/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml index 29db4eb3..2769a402 100644 --- a/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./xaichatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml b/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml index a05184ef..34539d95 100644 --- a/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./zhipuaichatcmpl.py diff --git a/web/src/app/home/components/home-sidebar/sidbarConfigList.tsx b/web/src/app/home/components/home-sidebar/sidbarConfigList.tsx index e21317d6..ef9c6f45 100644 --- a/web/src/app/home/components/home-sidebar/sidbarConfigList.tsx +++ b/web/src/app/home/components/home-sidebar/sidbarConfigList.tsx @@ -47,6 +47,7 @@ export const sidebarConfigList = [ zh_Hans: 'https://docs.langbot.app/zh/deploy/models/readme.html', }, }), + new SidebarChildVO({ id: 'pipelines', name: t('pipelines.title'), diff --git a/web/src/app/home/models/component/llm-form/ChooseRequesterEntity.ts b/web/src/app/home/models/component/ChooseRequesterEntity.ts similarity index 100% rename from web/src/app/home/models/component/llm-form/ChooseRequesterEntity.ts rename to web/src/app/home/models/component/ChooseRequesterEntity.ts diff --git a/web/src/app/home/models/component/ICreateEmbeddingField.ts b/web/src/app/home/models/component/ICreateEmbeddingField.ts new file mode 100644 index 00000000..ea198f3f --- /dev/null +++ b/web/src/app/home/models/component/ICreateEmbeddingField.ts @@ -0,0 +1,7 @@ +export interface ICreateEmbeddingField { + name: string; + model_provider: string; + url: string; + api_key: string; + extra_args?: string[]; +} diff --git a/web/src/app/home/models/ICreateLLMField.ts b/web/src/app/home/models/component/ICreateLLMField.ts similarity index 100% rename from web/src/app/home/models/ICreateLLMField.ts rename to web/src/app/home/models/component/ICreateLLMField.ts diff --git a/web/src/app/home/models/component/embedding-card/EmbeddingCard.module.css b/web/src/app/home/models/component/embedding-card/EmbeddingCard.module.css new file mode 100644 index 00000000..9c6c54f7 --- /dev/null +++ b/web/src/app/home/models/component/embedding-card/EmbeddingCard.module.css @@ -0,0 +1,97 @@ +.cardContainer { + width: 100%; + height: 10rem; + background-color: #fff; + border-radius: 10px; + box-shadow: 0px 2px 2px 0 rgba(0, 0, 0, 0.2); + padding: 1.2rem; + cursor: pointer; +} + +.cardContainer:hover { + box-shadow: 0px 2px 8px 0 rgba(0, 0, 0, 0.1); +} + +.iconBasicInfoContainer { + width: 100%; + height: 100%; + display: flex; + flex-direction: row; + gap: 0.8rem; + user-select: none; +} + +.iconImage { + width: 3.8rem; + height: 3.8rem; + margin: 0.2rem; + border-radius: 50%; +} + +.basicInfoContainer { + display: flex; + flex-direction: column; + gap: 0.2rem; + min-width: 0; + width: 100%; +} + +.basicInfoText { + font-size: 1.4rem; + font-weight: bold; +} + +.providerContainer { + display: flex; + flex-direction: row; + align-items: center; + justify-content: flex-start; + gap: 0.2rem; +} + +.providerIcon { + width: 1.2rem; + height: 1.2rem; + margin-top: 0.2rem; + color: #626262; +} + +.providerLabel { + font-size: 1.2rem; + font-weight: 600; + color: #626262; +} + +.baseURLContainer { + display: flex; + flex-direction: row; + align-items: center; + justify-content: flex-start; + gap: 0.2rem; + width: calc(100% - 3rem); +} + +.baseURLIcon { + width: 1.2rem; + height: 1.2rem; + color: #626262; +} + +.baseURLText { + font-size: 1rem; + width: 100%; + color: #626262; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + max-width: 100%; +} + +.bigText { + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + font-size: 1.4rem; + font-weight: bold; + max-width: 100%; +} diff --git a/web/src/app/home/models/component/embedding-card/EmbeddingCard.tsx b/web/src/app/home/models/component/embedding-card/EmbeddingCard.tsx new file mode 100644 index 00000000..e3dfaf80 --- /dev/null +++ b/web/src/app/home/models/component/embedding-card/EmbeddingCard.tsx @@ -0,0 +1,53 @@ +import styles from './EmbeddingCard.module.css'; +import { EmbeddingCardVO } from '@/app/home/models/component/embedding-card/EmbeddingCardVO'; + +export default function EmbeddingCard({ cardVO }: { cardVO: EmbeddingCardVO }) { + return ( +
+
+ icon + +
+ {/* 名称 */} +
+ {cardVO.name} +
+ {/* 厂商 */} +
+ + + + + {cardVO.providerLabel} + +
+ {/* baseURL */} +
+ + + + {cardVO.baseURL} +
+
+
+
+ ); +} diff --git a/web/src/app/home/models/component/embedding-card/EmbeddingCardVO.ts b/web/src/app/home/models/component/embedding-card/EmbeddingCardVO.ts new file mode 100644 index 00000000..f6d960f6 --- /dev/null +++ b/web/src/app/home/models/component/embedding-card/EmbeddingCardVO.ts @@ -0,0 +1,23 @@ +export interface IEmbeddingCardVO { + id: string; + iconURL: string; + name: string; + providerLabel: string; + baseURL: string; +} + +export class EmbeddingCardVO implements IEmbeddingCardVO { + id: string; + iconURL: string; + providerLabel: string; + name: string; + baseURL: string; + + constructor(props: IEmbeddingCardVO) { + this.id = props.id; + this.iconURL = props.iconURL; + this.providerLabel = props.providerLabel; + this.name = props.name; + this.baseURL = props.baseURL; + } +} diff --git a/web/src/app/home/models/component/embedding-form/EmbeddingForm.tsx b/web/src/app/home/models/component/embedding-form/EmbeddingForm.tsx new file mode 100644 index 00000000..4658a22f --- /dev/null +++ b/web/src/app/home/models/component/embedding-form/EmbeddingForm.tsx @@ -0,0 +1,563 @@ +import { ICreateEmbeddingField } from '@/app/home/models/component/ICreateEmbeddingField'; +import { useEffect, useState } from 'react'; +import { IChooseRequesterEntity } from '@/app/home/models/component/ChooseRequesterEntity'; +import { httpClient } from '@/app/infra/http/HttpClient'; +import { EmbeddingModel } from '@/app/infra/entities/api'; +import { UUID } from 'uuidjs'; + +import { zodResolver } from '@hookform/resolvers/zod'; +import { useForm } from 'react-hook-form'; +import { z } from 'zod'; +import { useTranslation } from 'react-i18next'; + +import { + Dialog, + DialogContent, + DialogDescription, + DialogHeader, + DialogTitle, + DialogFooter, +} from '@/components/ui/dialog'; +import { Button } from '@/components/ui/button'; +import { + Form, + FormControl, + FormDescription, + FormField, + FormItem, + FormLabel, + FormMessage, +} from '@/components/ui/form'; +import { Input } from '@/components/ui/input'; +import { + Select, + SelectContent, + SelectGroup, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select'; +import { toast } from 'sonner'; +import { i18nObj } from '@/i18n/I18nProvider'; + +const getExtraArgSchema = (t: (key: string) => string) => + z + .object({ + key: z.string().min(1, { message: t('models.keyNameRequired') }), + type: z.enum(['string', 'number', 'boolean']), + value: z.string(), + }) + .superRefine((data, ctx) => { + if (data.type === 'number' && isNaN(Number(data.value))) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: t('models.mustBeValidNumber'), + path: ['value'], + }); + } + if ( + data.type === 'boolean' && + data.value !== 'true' && + data.value !== 'false' + ) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: t('models.mustBeTrueOrFalse'), + path: ['value'], + }); + } + }); + +const getFormSchema = (t: (key: string) => string) => + z.object({ + name: z.string().min(1, { message: t('models.modelNameRequired') }), + model_provider: z + .string() + .min(1, { message: t('models.modelProviderRequired') }), + url: z.string().min(1, { message: t('models.requestURLRequired') }), + api_key: z.string().min(1, { message: t('models.apiKeyRequired') }), + extra_args: z.array(getExtraArgSchema(t)).optional(), + }); + +export default function EmbeddingForm({ + editMode, + initEmbeddingId, + onFormSubmit, + onFormCancel, + onEmbeddingDeleted, +}: { + editMode: boolean; + initEmbeddingId?: string; + onFormSubmit: () => void; + onFormCancel: () => void; + onEmbeddingDeleted: () => void; +}) { + const { t } = useTranslation(); + const formSchema = getFormSchema(t); + + const form = useForm>({ + resolver: zodResolver(formSchema), + defaultValues: { + name: '', + model_provider: '', + url: '', + api_key: 'sk-xxxxx', + extra_args: [], + }, + }); + + const [extraArgs, setExtraArgs] = useState< + { key: string; type: 'string' | 'number' | 'boolean'; value: string }[] + >([]); + + const [showDeleteConfirmModal, setShowDeleteConfirmModal] = useState(false); + const [requesterNameList, setRequesterNameList] = useState< + IChooseRequesterEntity[] + >([]); + const [requesterDefaultURLList, setRequesterDefaultURLList] = useState< + string[] + >([]); + const [modelTesting, setModelTesting] = useState(false); + + useEffect(() => { + initEmbeddingModelFormComponent().then(() => { + if (editMode && initEmbeddingId) { + getEmbeddingConfig(initEmbeddingId).then((val) => { + form.setValue('name', val.name); + form.setValue('model_provider', val.model_provider); + // setCurrentModelProvider(val.model_provider); + form.setValue('url', val.url); + form.setValue('api_key', val.api_key); + if (val.extra_args) { + const args = val.extra_args.map((arg) => { + const [key, value] = arg.split(':'); + let type: 'string' | 'number' | 'boolean' = 'string'; + if (!isNaN(Number(value))) { + type = 'number'; + } else if (value === 'true' || value === 'false') { + type = 'boolean'; + } + return { + key, + type, + value, + }; + }); + setExtraArgs(args); + form.setValue('extra_args', args); + } + }); + } else { + form.reset(); + } + }); + }, []); + + const addExtraArg = () => { + setExtraArgs([...extraArgs, { key: '', type: 'string', value: '' }]); + }; + + const updateExtraArg = ( + index: number, + field: 'key' | 'type' | 'value', + value: string, + ) => { + const newArgs = [...extraArgs]; + newArgs[index] = { + ...newArgs[index], + [field]: value, + }; + setExtraArgs(newArgs); + form.setValue('extra_args', newArgs); + }; + + const removeExtraArg = (index: number) => { + const newArgs = extraArgs.filter((_, i) => i !== index); + setExtraArgs(newArgs); + form.setValue('extra_args', newArgs); + }; + + async function initEmbeddingModelFormComponent() { + const requesterNameList = + await httpClient.getProviderRequesters('text-embedding'); + setRequesterNameList( + requesterNameList.requesters.map((item) => { + return { + label: i18nObj(item.label), + value: item.name, + }; + }), + ); + setRequesterDefaultURLList( + requesterNameList.requesters.map((item) => { + const config = item.spec.config; + for (let i = 0; i < config.length; i++) { + if (config[i].name == 'base_url') { + return config[i].default?.toString() || ''; + } + } + return ''; + }), + ); + } + + async function getEmbeddingConfig( + id: string, + ): Promise { + const embeddingModel = await httpClient.getProviderEmbeddingModel(id); + + const fakeExtraArgs = []; + const extraArgs = embeddingModel.model.extra_args as Record; + for (const key in extraArgs) { + fakeExtraArgs.push(`${key}:${extraArgs[key]}`); + } + return { + name: embeddingModel.model.name, + model_provider: embeddingModel.model.requester, + url: embeddingModel.model.requester_config?.base_url, + api_key: embeddingModel.model.api_keys[0], + extra_args: fakeExtraArgs, + }; + } + + function handleFormSubmit(value: z.infer) { + const extraArgsObj: Record = {}; + value.extra_args?.forEach( + (arg: { key: string; type: string; value: string }) => { + if (arg.type === 'number') { + extraArgsObj[arg.key] = Number(arg.value); + } else if (arg.type === 'boolean') { + extraArgsObj[arg.key] = arg.value === 'true'; + } else { + extraArgsObj[arg.key] = arg.value; + } + }, + ); + + const embeddingModel: EmbeddingModel = { + uuid: editMode ? initEmbeddingId || '' : UUID.generate(), + name: value.name, + description: '', + requester: value.model_provider, + requester_config: { + base_url: value.url, + timeout: 120, + }, + extra_args: extraArgsObj, + api_keys: [value.api_key], + }; + + if (editMode) { + onSaveEdit(embeddingModel).then(() => { + form.reset(); + }); + } else { + onCreateEmbedding(embeddingModel).then(() => { + form.reset(); + }); + } + } + + async function onCreateEmbedding(embeddingModel: EmbeddingModel) { + try { + await httpClient.createProviderEmbeddingModel(embeddingModel); + onFormSubmit(); + toast.success(t('models.createSuccess')); + } catch (err) { + toast.error(t('models.createError') + (err as Error).message); + } + } + + async function onSaveEdit(embeddingModel: EmbeddingModel) { + try { + await httpClient.updateProviderEmbeddingModel( + initEmbeddingId || '', + embeddingModel, + ); + onFormSubmit(); + toast.success(t('models.saveSuccess')); + } catch (err) { + toast.error(t('models.saveError') + (err as Error).message); + } + } + + function deleteModel() { + if (initEmbeddingId) { + httpClient + .deleteProviderEmbeddingModel(initEmbeddingId) + .then(() => { + onEmbeddingDeleted(); + toast.success(t('models.deleteSuccess')); + }) + .catch((err) => { + toast.error(t('models.deleteError') + err.message); + }); + } + } + + function testEmbeddingModelInForm() { + setModelTesting(true); + httpClient + .testEmbeddingModel('_', { + uuid: '', + name: form.getValues('name'), + description: '', + requester: form.getValues('model_provider'), + requester_config: { + base_url: form.getValues('url'), + timeout: 120, + }, + api_keys: [form.getValues('api_key')], + }) + .then((res) => { + console.log(res); + toast.success(t('models.testSuccess')); + }) + .catch(() => { + toast.error(t('models.testError')); + }) + .finally(() => { + setModelTesting(false); + }); + } + + return ( +
+ + + + {t('common.confirmDelete')} + + + {t('models.deleteConfirmation')} + + + + + + + + +
+ +
+ ( + + + {t('models.modelName')} + * + + + + + + + {t('models.modelProviderDescription')} + + + )} + /> + + ( + + + {t('models.modelProvider')} + * + + + + + + + )} + /> + + ( + + + {t('models.requestURL')} + * + + + + + + + )} + /> + + ( + + + {t('models.apiKey')} + * + + + + + + + )} + /> + + + {t('models.extraParameters')} +
+ {extraArgs.map((arg, index) => ( +
+ + updateExtraArg(index, 'key', e.target.value) + } + /> + + + updateExtraArg(index, 'value', e.target.value) + } + /> + +
+ ))} + +
+ + {t('embedding.extraParametersDescription')} + + +
+
+ + {editMode && ( + + )} + + + + + + + +
+ +
+ ); +} diff --git a/web/src/app/home/models/component/llm-form/LLMForm.tsx b/web/src/app/home/models/component/llm-form/LLMForm.tsx index f483f183..73cc32fe 100644 --- a/web/src/app/home/models/component/llm-form/LLMForm.tsx +++ b/web/src/app/home/models/component/llm-form/LLMForm.tsx @@ -1,6 +1,6 @@ -import { ICreateLLMField } from '@/app/home/models/ICreateLLMField'; +import { ICreateLLMField } from '@/app/home/models/component/ICreateLLMField'; import { useEffect, useState } from 'react'; -import { IChooseRequesterEntity } from '@/app/home/models/component/llm-form/ChooseRequesterEntity'; +import { IChooseRequesterEntity } from '@/app/home/models/component/ChooseRequesterEntity'; import { httpClient } from '@/app/infra/http/HttpClient'; import { LLMModel } from '@/app/infra/entities/api'; import { UUID } from 'uuidjs'; @@ -197,7 +197,7 @@ export default function LLMForm({ }; async function initLLMModelFormComponent() { - const requesterNameList = await httpClient.getProviderRequesters(); + const requesterNameList = await httpClient.getProviderRequesters('llm'); setRequesterNameList( requesterNameList.requesters.map((item) => { return { @@ -596,7 +596,7 @@ export default function LLMForm({ - {t('models.extraParametersDescription')} + {t('llm.extraParametersDescription')} diff --git a/web/src/app/home/models/page.tsx b/web/src/app/home/models/page.tsx index 3ccec486..2f936753 100644 --- a/web/src/app/home/models/page.tsx +++ b/web/src/app/home/models/page.tsx @@ -8,6 +8,7 @@ import LLMForm from '@/app/home/models/component/llm-form/LLMForm'; import CreateCardComponent from '@/app/infra/basic-component/create-card-component/CreateCardComponent'; import { httpClient } from '@/app/infra/http/HttpClient'; import { LLMModel } from '@/app/infra/entities/api'; +import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs'; import { Dialog, DialogContent, @@ -17,6 +18,9 @@ import { import { toast } from 'sonner'; import { useTranslation } from 'react-i18next'; import { i18nObj } from '@/i18n/I18nProvider'; +import { EmbeddingCardVO } from '@/app/home/models/component/embedding-card/EmbeddingCardVO'; +import EmbeddingCard from '@/app/home/models/component/embedding-card/EmbeddingCard'; +import EmbeddingForm from '@/app/home/models/component/embedding-form/EmbeddingForm'; export default function LLMConfigPage() { const { t } = useTranslation(); @@ -24,13 +28,21 @@ export default function LLMConfigPage() { const [modalOpen, setModalOpen] = useState(false); const [isEditForm, setIsEditForm] = useState(false); const [nowSelectedLLM, setNowSelectedLLM] = useState(null); + const [embeddingCardList, setEmbeddingCardList] = useState( + [], + ); + const [embeddingModalOpen, setEmbeddingModalOpen] = useState(false); + const [isEditEmbeddingForm, setIsEditEmbeddingForm] = useState(false); + const [nowSelectedEmbedding, setNowSelectedEmbedding] = + useState(null); useEffect(() => { getLLMModelList(); + getEmbeddingModelList(); }, []); async function getLLMModelList() { - const requesterNameListResp = await httpClient.getProviderRequesters(); + const requesterNameListResp = await httpClient.getProviderRequesters('llm'); const requesterNameList = requesterNameListResp.requesters.map((item) => { return { label: i18nObj(item.label), @@ -74,6 +86,55 @@ export default function LLMConfigPage() { setNowSelectedLLM(null); setModalOpen(true); } + function selectEmbedding(cardVO: EmbeddingCardVO) { + setIsEditEmbeddingForm(true); + setNowSelectedEmbedding(cardVO); + setEmbeddingModalOpen(true); + } + + function handleCreateEmbeddingModelClick() { + setIsEditEmbeddingForm(false); + setNowSelectedEmbedding(null); + setEmbeddingModalOpen(true); + } + async function getEmbeddingModelList() { + const requesterNameListResp = + await httpClient.getProviderRequesters('text-embedding'); + const requesterNameList = requesterNameListResp.requesters.map((item) => { + return { + label: i18nObj(item.label), + value: item.name, + }; + }); + + httpClient + .getProviderEmbeddingModels() + .then((resp) => { + const embeddingModelList: EmbeddingCardVO[] = resp.models.map( + (model: { + uuid: string; + requester: string; + name: string; + requester_config?: { base_url?: string }; + }) => { + return new EmbeddingCardVO({ + id: model.uuid, + iconURL: httpClient.getProviderRequesterIconURL(model.requester), + name: model.name, + providerLabel: + requesterNameList.find((item) => item.value === model.requester) + ?.label || model.requester.substring(0, 10), + baseURL: model.requester_config?.base_url || '', + }); + }, + ); + setEmbeddingCardList(embeddingModelList); + }) + .catch((err) => { + console.error('get Embedding model list error', err); + toast.error(t('embedding.getModelListError') + err.message); + }); + } return (
@@ -101,26 +162,108 @@ export default function LLMConfigPage() { /> -
- - {cardList.map((cardVO) => { - return ( -
{ - selectLLM(cardVO); - }} - > - + + + + + {isEditEmbeddingForm + ? t('embedding.editModel') + : t('embedding.createModel')} + + + { + setEmbeddingModalOpen(false); + getEmbeddingModelList(); + }} + onFormCancel={() => { + setEmbeddingModalOpen(false); + }} + onEmbeddingDeleted={() => { + setEmbeddingModalOpen(false); + getEmbeddingModelList(); + }} + /> + + + + +
+
+ + + {t('llm.llmModels')} + + + {t('embedding.embeddingModels')} + + +
+ +
+

{t('llm.description')}

- ); - })} -
+ + +
+

+ {t('embedding.description')} +

+
+
+
+ + +
+ + {cardList.map((cardVO) => { + return ( +
{ + selectLLM(cardVO); + }} + > + +
+ ); + })} +
+
+ + +
+ + {embeddingCardList.map((cardVO) => { + return ( +
{ + selectEmbedding(cardVO); + }} + > + +
+ ); + })} +
+
+
); } diff --git a/web/src/app/infra/entities/api/index.ts b/web/src/app/infra/entities/api/index.ts index d86a8be0..53ddf1dd 100644 --- a/web/src/app/infra/entities/api/index.ts +++ b/web/src/app/infra/entities/api/index.ts @@ -55,6 +55,29 @@ export interface LLMModel { // updated_at: string; } +export interface ApiRespProviderEmbeddingModels { + models: EmbeddingModel[]; +} + +export interface ApiRespProviderEmbeddingModel { + model: EmbeddingModel; +} + +export interface EmbeddingModel { + name: string; + description: string; + uuid: string; + requester: string; + requester_config: { + base_url: string; + timeout: number; + }; + extra_args?: object; + api_keys: string[]; + // created_at: string; + // updated_at: string; +} + export interface ApiRespPipelines { pipelines: Pipeline[]; } diff --git a/web/src/app/infra/http/HttpClient.ts b/web/src/app/infra/http/HttpClient.ts index 5193703b..1fd335d9 100644 --- a/web/src/app/infra/http/HttpClient.ts +++ b/web/src/app/infra/http/HttpClient.ts @@ -10,6 +10,9 @@ import { ApiRespProviderLLMModels, ApiRespProviderLLMModel, LLMModel, + ApiRespProviderEmbeddingModels, + ApiRespProviderEmbeddingModel, + EmbeddingModel, ApiRespPipelines, Pipeline, ApiRespPlatformAdapters, @@ -226,8 +229,10 @@ class HttpClient { // real api request implementation // ============ Provider API ============ - public getProviderRequesters(): Promise { - return this.get('/api/v1/provider/requesters'); + public getProviderRequesters( + model_type: string, + ): Promise { + return this.get('/api/v1/provider/requesters', { type: model_type }); } public getProviderRequester(name: string): Promise { @@ -275,6 +280,39 @@ class HttpClient { return this.post(`/api/v1/provider/models/llm/${uuid}/test`, model); } + // ============ Provider Model Embedding ============ + public getProviderEmbeddingModels(): Promise { + return this.get('/api/v1/provider/models/embedding'); + } + + public getProviderEmbeddingModel( + uuid: string, + ): Promise { + return this.get(`/api/v1/provider/models/embedding/${uuid}`); + } + + public createProviderEmbeddingModel(model: EmbeddingModel): Promise { + return this.post('/api/v1/provider/models/embedding', model); + } + + public deleteProviderEmbeddingModel(uuid: string): Promise { + return this.delete(`/api/v1/provider/models/embedding/${uuid}`); + } + + public updateProviderEmbeddingModel( + uuid: string, + model: EmbeddingModel, + ): Promise { + return this.put(`/api/v1/provider/models/embedding/${uuid}`, model); + } + + public testEmbeddingModel( + uuid: string, + model: EmbeddingModel, + ): Promise { + return this.post(`/api/v1/provider/models/embedding/${uuid}/test`, model); + } + // ============ Pipeline API ============ public getGeneralPipelineMetadata(): Promise { // as designed, this method will be deprecated, and only for developer to check the prefered config schema diff --git a/web/src/i18n/locales/en-US.ts b/web/src/i18n/locales/en-US.ts index 1975a521..d0df9841 100644 --- a/web/src/i18n/locales/en-US.ts +++ b/web/src/i18n/locales/en-US.ts @@ -86,14 +86,13 @@ const enUS = { string: 'String', number: 'Number', boolean: 'Boolean', - extraParametersDescription: - 'Will be attached to the request body, such as max_tokens, temperature, top_p, etc.', selectModelProvider: 'Select Model Provider', modelProviderDescription: 'Please fill in the model name provided by the supplier', selectModel: 'Select Model', testSuccess: 'Test successful', testError: 'Test failed, please check your model configuration', + llmModels: 'LLM Models', }, bots: { title: 'Bots', @@ -259,6 +258,21 @@ const enUS = { 'Password reset failed, please check your email and recovery key', backToLogin: 'Back to Login', }, + embedding: { + description: 'Manage Embedding models for text vectorization', + createModel: 'Create Embedding Model', + editModel: 'Edit Embedding Model', + getModelListError: 'Failed to get Embedding model list: ', + embeddingModels: 'Embedding', + extraParametersDescription: + 'Will be attached to the request body, such as encoding_format, dimensions, etc.', + }, + llm: { + description: 'Manage LLM models for conversation generation', + llmModels: 'LLM', + extraParametersDescription: + 'Will be attached to the request body, such as max_tokens, temperature, top_p, etc.', + }, }; export default enUS; diff --git a/web/src/i18n/locales/zh-Hans.ts b/web/src/i18n/locales/zh-Hans.ts index 2ded8236..96acc0e6 100644 --- a/web/src/i18n/locales/zh-Hans.ts +++ b/web/src/i18n/locales/zh-Hans.ts @@ -87,13 +87,12 @@ const zhHans = { string: '字符串', number: '数字', boolean: '布尔值', - extraParametersDescription: - '将在请求时附加到请求体中,如 max_tokens, temperature, top_p 等', selectModelProvider: '选择模型供应商', modelProviderDescription: '请填写供应商向您提供的模型名称', selectModel: '请选择模型', testSuccess: '测试成功', testError: '测试失败,请检查模型配置', + llmModels: '对话模型', }, bots: { title: '机器人', @@ -251,6 +250,21 @@ const zhHans = { resetFailed: '密码重置失败,请检查邮箱和恢复密钥是否正确', backToLogin: '返回登录', }, + embedding: { + description: '管理嵌入模型,用于向量化文本', + createModel: '创建嵌入模型', + editModel: '编辑嵌入模型', + getModelListError: '获取嵌入模型列表失败:', + embeddingModels: '嵌入模型', + extraParametersDescription: + '将在请求时附加到请求体中,如 encoding_format, dimensions 等', + }, + llm: { + llmModels: '对话模型', + description: '管理 LLM 模型,用于对话消息生成', + extraParametersDescription: + '将在请求时附加到请求体中,如 max_tokens, temperature, top_p 等', + }, }; export default zhHans; From 6d8936bd741f3f0523ab0673a2cd6d98fa04d3c0 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Tue, 10 Jun 2025 08:34:53 +0800 Subject: [PATCH 021/257] feat: add knowledge page --- web/src/app/home/knowledge/page.tsx | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 web/src/app/home/knowledge/page.tsx diff --git a/web/src/app/home/knowledge/page.tsx b/web/src/app/home/knowledge/page.tsx new file mode 100644 index 00000000..9707a8ee --- /dev/null +++ b/web/src/app/home/knowledge/page.tsx @@ -0,0 +1,5 @@ +'use client'; + +export default function KnowledgePage() { + return
KnowledgePage
; +} From f36a61dbb20f4fbe554494aef559379c62e0943f Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Wed, 11 Jun 2025 20:24:42 +0800 Subject: [PATCH 022/257] feat: add api for uploading files --- pkg/api/http/controller/groups/files.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pkg/api/http/controller/groups/files.py b/pkg/api/http/controller/groups/files.py index 0a8b2210..d08cbd71 100644 --- a/pkg/api/http/controller/groups/files.py +++ b/pkg/api/http/controller/groups/files.py @@ -2,6 +2,10 @@ from __future__ import annotations import quart import mimetypes +import uuid +import asyncio + +import quart.datastructures from .. import group @@ -20,3 +24,22 @@ class FilesRouterGroup(group.RouterGroup): mime_type = 'image/jpeg' return quart.Response(image_bytes, mimetype=mime_type) + + @self.route('/documents', methods=['POST'], auth_type=group.AuthType.USER_TOKEN) + async def _() -> quart.Response: + request = quart.request + # get file bytes from 'file' + file = (await request.files)['file'] + assert isinstance(file, quart.datastructures.FileStorage) + + file_bytes = await asyncio.to_thread(file.stream.read) + extension = file.filename.split('.')[-1] + + file_key = str(uuid.uuid4()) + '.' + extension + # save file to storage + await self.ap.storage_mgr.storage_provider.save(file_key, file_bytes) + return self.success( + data={ + 'file_id': file_key, + } + ) From 0733f8878f93a17ad281b6a71ea8685877bf78c7 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Fri, 27 Jun 2025 21:37:53 +0800 Subject: [PATCH 023/257] feat: add sidebar for rag and related i18n --- .../home-sidebar/sidbarConfigList.tsx | 19 +++++++++++++++++++ web/src/i18n/locales/en-US.ts | 4 ++++ web/src/i18n/locales/ja-JP.ts | 19 +++++++++++++++++++ web/src/i18n/locales/zh-Hans.ts | 4 ++++ 4 files changed, 46 insertions(+) diff --git a/web/src/app/home/components/home-sidebar/sidbarConfigList.tsx b/web/src/app/home/components/home-sidebar/sidbarConfigList.tsx index ef9c6f45..1c3fb4bb 100644 --- a/web/src/app/home/components/home-sidebar/sidbarConfigList.tsx +++ b/web/src/app/home/components/home-sidebar/sidbarConfigList.tsx @@ -88,4 +88,23 @@ export const sidebarConfigList = [ zh_Hans: 'https://docs.langbot.app/zh/plugin/plugin-intro.html', }, }), + new SidebarChildVO({ + id: 'knowledge', + name: t('knowledge.title'), + icon: ( + + + + ), + route: '/home/knowledge', + description: t('knowledge.description'), + helpLink: { + en_US: 'https://docs.langbot.app/en/deploy/knowledge/readme.html', + zh_Hans: 'https://docs.langbot.app/zh/deploy/knowledge/readme.html', + }, + }), ]; diff --git a/web/src/i18n/locales/en-US.ts b/web/src/i18n/locales/en-US.ts index d0df9841..5596e35f 100644 --- a/web/src/i18n/locales/en-US.ts +++ b/web/src/i18n/locales/en-US.ts @@ -230,6 +230,10 @@ const enUS = { atTips: 'Mention the bot', }, }, + knowledge: { + title: 'Knowledge', + description: 'Configuring knowledge bases for improved LLM responses', + }, register: { title: 'Initialize LangBot 👋', description: 'This is your first time starting LangBot', diff --git a/web/src/i18n/locales/ja-JP.ts b/web/src/i18n/locales/ja-JP.ts index bac6f805..21b0ff7d 100644 --- a/web/src/i18n/locales/ja-JP.ts +++ b/web/src/i18n/locales/ja-JP.ts @@ -232,6 +232,10 @@ const jaJP = { atTips: 'ボットをメンション', }, }, + knowledge: { + title: '知識ベース', + description: 'LLMの応答品質を向上させるための知識ベースを設定します', + }, register: { title: 'LangBot を初期化 👋', description: 'これはLangBotの初回起動です', @@ -260,6 +264,21 @@ const jaJP = { 'パスワードのリセットに失敗しました。メールアドレスと復旧キーを確認してください', backToLogin: 'ログインに戻る', }, + embedding: { + description: 'テキストのベクトル化に使用する埋め込みモデルを管理します', + createModel: '埋め込みモデルを作成', + editModel: '埋め込みモデルを編集', + getModelListError: '埋め込みモデルリストの取得に失敗しました:', + embeddingModels: '埋め込みモデル', + extraParametersDescription: + 'リクエストボディに追加されるパラメータ(encoding_format、dimensions など)', + }, + llm: { + description: 'チャットメッセージの生成に使用するLLMモデルを管理します', + llmModels: 'LLMモデル', + extraParametersDescription: + 'リクエストボディに追加されるパラメータ(max_tokens、temperature、top_p など)', + }, }; export default jaJP; diff --git a/web/src/i18n/locales/zh-Hans.ts b/web/src/i18n/locales/zh-Hans.ts index 96acc0e6..1bd04ca8 100644 --- a/web/src/i18n/locales/zh-Hans.ts +++ b/web/src/i18n/locales/zh-Hans.ts @@ -225,6 +225,10 @@ const zhHans = { atTips: '提及机器人', }, }, + knowledge: { + title: '知识库', + description: '配置可用于提升模型回复质量的知识库', + }, register: { title: '初始化 LangBot 👋', description: '这是您首次启动 LangBot', From 22ef1a399e07d676625a82b728e1661f245bc210 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Fri, 27 Jun 2025 22:18:48 +0800 Subject: [PATCH 024/257] feat: add knowledge base page --- .../home-sidebar/sidbarConfigList.tsx | 38 +++++++++---------- .../home/knowledge/knowledgeBase.module.css | 15 ++++++++ web/src/app/home/knowledge/page.tsx | 19 +++++++++- 3 files changed, 52 insertions(+), 20 deletions(-) create mode 100644 web/src/app/home/knowledge/knowledgeBase.module.css diff --git a/web/src/app/home/components/home-sidebar/sidbarConfigList.tsx b/web/src/app/home/components/home-sidebar/sidbarConfigList.tsx index 1c3fb4bb..b3edb98a 100644 --- a/web/src/app/home/components/home-sidebar/sidbarConfigList.tsx +++ b/web/src/app/home/components/home-sidebar/sidbarConfigList.tsx @@ -68,6 +68,25 @@ export const sidebarConfigList = [ zh_Hans: 'https://docs.langbot.app/zh/deploy/pipelines/readme.html', }, }), + new SidebarChildVO({ + id: 'knowledge', + name: t('knowledge.title'), + icon: ( + + + + ), + route: '/home/knowledge', + description: t('knowledge.description'), + helpLink: { + en_US: 'https://docs.langbot.app/en/deploy/knowledge/readme.html', + zh_Hans: 'https://docs.langbot.app/zh/deploy/knowledge/readme.html', + }, + }), new SidebarChildVO({ id: 'plugins', name: t('plugins.title'), @@ -88,23 +107,4 @@ export const sidebarConfigList = [ zh_Hans: 'https://docs.langbot.app/zh/plugin/plugin-intro.html', }, }), - new SidebarChildVO({ - id: 'knowledge', - name: t('knowledge.title'), - icon: ( - - - - ), - route: '/home/knowledge', - description: t('knowledge.description'), - helpLink: { - en_US: 'https://docs.langbot.app/en/deploy/knowledge/readme.html', - zh_Hans: 'https://docs.langbot.app/zh/deploy/knowledge/readme.html', - }, - }), ]; diff --git a/web/src/app/home/knowledge/knowledgeBase.module.css b/web/src/app/home/knowledge/knowledgeBase.module.css new file mode 100644 index 00000000..e811b521 --- /dev/null +++ b/web/src/app/home/knowledge/knowledgeBase.module.css @@ -0,0 +1,15 @@ +.configPageContainer { + width: 100%; + height: 100%; +} + +.knowledgeListContainer { + width: 100%; + padding-left: 0.8rem; + padding-right: 0.8rem; + display: grid; + grid-template-columns: repeat(auto-fill, minmax(24rem, 1fr)); + gap: 2rem; + justify-items: stretch; + align-items: start; +} diff --git a/web/src/app/home/knowledge/page.tsx b/web/src/app/home/knowledge/page.tsx index 9707a8ee..7c9fd048 100644 --- a/web/src/app/home/knowledge/page.tsx +++ b/web/src/app/home/knowledge/page.tsx @@ -1,5 +1,22 @@ 'use client'; +import CreateCardComponent from '@/app/infra/basic-component/create-card-component/CreateCardComponent'; +import styles from './knowledgeBase.module.css'; + export default function KnowledgePage() { - return
KnowledgePage
; + return ( +
+
+ { + // setIsEditForm(false); + // setModalOpen(true); + }} + /> +
+
+ ); } From bbf583ddb5c98239ef1f1bf755a2101b656c3126 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Sun, 29 Jun 2025 21:00:48 +0800 Subject: [PATCH 025/257] feat: basic entities of kb --- .../components/kb-card/KBCard.module.css | 107 ++++++++++++++++++ .../knowledge/components/kb-card/KBCard.tsx | 36 ++++++ .../knowledge/components/kb-card/KBCardVO.ts | 23 ++++ web/src/app/home/knowledge/page.tsx | 22 ++++ web/src/app/infra/entities/api/index.ts | 17 +++ web/src/app/infra/http/HttpClient.ts | 16 +++ 6 files changed, 221 insertions(+) create mode 100644 web/src/app/home/knowledge/components/kb-card/KBCard.module.css create mode 100644 web/src/app/home/knowledge/components/kb-card/KBCard.tsx create mode 100644 web/src/app/home/knowledge/components/kb-card/KBCardVO.ts diff --git a/web/src/app/home/knowledge/components/kb-card/KBCard.module.css b/web/src/app/home/knowledge/components/kb-card/KBCard.module.css new file mode 100644 index 00000000..2ecbd44a --- /dev/null +++ b/web/src/app/home/knowledge/components/kb-card/KBCard.module.css @@ -0,0 +1,107 @@ +.cardContainer { + width: 100%; + height: 10rem; + background-color: #fff; + border-radius: 10px; + box-shadow: 0px 2px 2px 0 rgba(0, 0, 0, 0.2); + padding: 1.2rem; + cursor: pointer; + display: flex; + flex-direction: row; + justify-content: space-between; + gap: 0.5rem; +} + +.cardContainer:hover { + box-shadow: 0px 2px 8px 0 rgba(0, 0, 0, 0.1); +} + +.basicInfoContainer { + width: 100%; + height: 100%; + display: flex; + flex-direction: column; + justify-content: space-between; + gap: 0.4rem; + min-width: 0; +} + +.basicInfoNameContainer { + display: flex; + flex-direction: column; + gap: 0.2rem; +} + +.basicInfoNameText { + font-size: 1.4rem; + font-weight: 500; +} + +.basicInfoDescriptionText { + font-size: 0.9rem; + font-weight: 400; + display: -webkit-box; + -webkit-line-clamp: 3; + -webkit-box-orient: vertical; + overflow: hidden; + text-overflow: ellipsis; + color: #b1b1b1; +} + +.basicInfoLastUpdatedTimeContainer { + display: flex; + flex-direction: row; + align-items: center; + gap: 0.5rem; +} + +.basicInfoUpdateTimeIcon { + width: 1.2rem; + height: 1.2rem; +} + +.basicInfoUpdateTimeText { + font-size: 1rem; + font-weight: 400; +} + +.operationContainer { + display: flex; + flex-direction: column; + align-items: flex-end; + justify-content: space-between; + gap: 0.5rem; + width: 8rem; +} + +.operationDefaultBadge { + display: flex; + flex-direction: row; + gap: 0.5rem; +} + +.operationDefaultBadgeIcon { + width: 1.2rem; + height: 1.2rem; + color: #ffcd27; +} + +.operationDefaultBadgeText { + font-size: 1rem; + font-weight: 400; + color: #ffcd27; +} + +.bigText { + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + font-size: 1.4rem; + font-weight: bold; + max-width: 100%; +} + +.debugButtonIcon { + width: 1.2rem; + height: 1.2rem; +} diff --git a/web/src/app/home/knowledge/components/kb-card/KBCard.tsx b/web/src/app/home/knowledge/components/kb-card/KBCard.tsx new file mode 100644 index 00000000..5d49e738 --- /dev/null +++ b/web/src/app/home/knowledge/components/kb-card/KBCard.tsx @@ -0,0 +1,36 @@ +import { KnowledgeBaseVO } from '@/app/home/knowledge/components/kb-card/KBCardVO'; +import { useTranslation } from 'react-i18next'; +import styles from './KBCard.module.css'; + +export default function KBCard({ kbCardVO }: { kbCardVO: KnowledgeBaseVO }) { + const { t } = useTranslation(); + return ( +
+
+
+
+ {kbCardVO.name} +
+
+ {kbCardVO.description} +
+
+ +
+ + + +
+ {t('knowledge.bases.updateTime')} + {kbCardVO.lastUpdatedTimeAgo} +
+
+
+
+ ); +} diff --git a/web/src/app/home/knowledge/components/kb-card/KBCardVO.ts b/web/src/app/home/knowledge/components/kb-card/KBCardVO.ts new file mode 100644 index 00000000..bfbc2adb --- /dev/null +++ b/web/src/app/home/knowledge/components/kb-card/KBCardVO.ts @@ -0,0 +1,23 @@ +export interface IKnowledgeBaseVO { + id: string; + name: string; + description: string; + embeddingModelUUID: string; + lastUpdatedTimeAgo: string; +} + +export class KnowledgeBaseVO implements IKnowledgeBaseVO { + id: string; + name: string; + description: string; + embeddingModelUUID: string; + lastUpdatedTimeAgo: string; + + constructor(props: IKnowledgeBaseVO) { + this.id = props.id; + this.name = props.name; + this.description = props.description; + this.embeddingModelUUID = props.embeddingModelUUID; + this.lastUpdatedTimeAgo = props.lastUpdatedTimeAgo; + } +} diff --git a/web/src/app/home/knowledge/page.tsx b/web/src/app/home/knowledge/page.tsx index 7c9fd048..7ee25eac 100644 --- a/web/src/app/home/knowledge/page.tsx +++ b/web/src/app/home/knowledge/page.tsx @@ -2,8 +2,22 @@ import CreateCardComponent from '@/app/infra/basic-component/create-card-component/CreateCardComponent'; import styles from './knowledgeBase.module.css'; +import { useTranslation } from 'react-i18next'; +import { useState } from 'react'; +import { KnowledgeBaseVO } from '@/app/home/knowledge/components/kb-card/KBCardVO'; +import KBCard from '@/app/home/knowledge/components/kb-card/KBCard'; export default function KnowledgePage() { + const { t } = useTranslation(); + const [knowledgeBaseList, setKnowledgeBaseList] = useState( + [], + ); + + const handleKBCardClick = (kbId: string) => { + // setIsEditForm(false); + // setModalOpen(true); + }; + return (
@@ -16,6 +30,14 @@ export default function KnowledgePage() { // setModalOpen(true); }} /> + + {knowledgeBaseList.map((kb) => { + return ( +
handleKBCardClick(kb.id)}> + +
+ ); + })}
); diff --git a/web/src/app/infra/entities/api/index.ts b/web/src/app/infra/entities/api/index.ts index 53ddf1dd..a44b1991 100644 --- a/web/src/app/infra/entities/api/index.ts +++ b/web/src/app/infra/entities/api/index.ts @@ -133,6 +133,23 @@ export interface Bot { updated_at?: string; } +export interface ApiRespKnowledgeBases { + bases: KnowledgeBase[]; +} + +export interface ApiRespKnowledgeBase { + base: KnowledgeBase; +} + +export interface KnowledgeBase { + uuid?: string; + name: string; + description: string; + embedding_model_uuid: string; + created_at?: string; + updated_at?: string; +} + // plugins export interface ApiRespPlugins { plugins: Plugin[]; diff --git a/web/src/app/infra/http/HttpClient.ts b/web/src/app/infra/http/HttpClient.ts index 1fd335d9..5c6e0abd 100644 --- a/web/src/app/infra/http/HttpClient.ts +++ b/web/src/app/infra/http/HttpClient.ts @@ -34,6 +34,9 @@ import { AsyncTask, ApiRespWebChatMessage, ApiRespWebChatMessages, + ApiRespKnowledgeBases, + ApiRespKnowledgeBase, + KnowledgeBase, } from '@/app/infra/entities/api'; import { GetBotLogsRequest } from '@/app/infra/http/requestParam/bots/GetBotLogsRequest'; import { GetBotLogsResponse } from '@/app/infra/http/requestParam/bots/GetBotLogsResponse'; @@ -427,6 +430,19 @@ class HttpClient { return this.post(`/api/v1/platform/bots/${botId}/logs`, request); } + // ============ Knowledge Base API ============ + public getKnowledgeBases(): Promise { + return this.get('/api/v1/knowledge/bases'); + } + + public getKnowledgeBase(uuid: string): Promise { + return this.get(`/api/v1/knowledge/bases/${uuid}`); + } + + public createKnowledgeBase(base: KnowledgeBase): Promise<{ uuid: string }> { + return this.post('/api/v1/knowledge/bases', base); + } + // ============ Plugins API ============ public getPlugins(): Promise { return this.get('/api/v1/plugins'); From 01f2ef569484c582b0ebd8d65077c5764868076c Mon Sep 17 00:00:00 2001 From: Matthew_Astral <50231148+ydzat@users.noreply.github.com> Date: Sat, 5 Jul 2025 14:51:04 +0200 Subject: [PATCH 026/257] feat: new discord adapter (#1563) --- pkg/platform/sources/discord.py | 767 +++++++++++++++++++++++++++++++- pyproject.toml | 3 + 2 files changed, 769 insertions(+), 1 deletion(-) diff --git a/pkg/platform/sources/discord.py b/pkg/platform/sources/discord.py index 4f5cac28..929636a5 100644 --- a/pkg/platform/sources/discord.py +++ b/pkg/platform/sources/discord.py @@ -9,15 +9,591 @@ import uuid import os import datetime import io +import asyncio +from enum import Enum import aiohttp from .. import adapter from ...core import app +from ..logger import EventLogger from ..types import message as platform_message from ..types import events as platform_events from ..types import entities as platform_entities -from ..logger import EventLogger + +# 语音功能相关异常定义 +class VoiceConnectionError(Exception): + """语音连接基础异常""" + def __init__(self, message: str, error_code: str = None, guild_id: int = None): + super().__init__(message) + self.error_code = error_code + self.guild_id = guild_id + self.timestamp = datetime.datetime.now() + + +class VoicePermissionError(VoiceConnectionError): + """语音权限异常""" + def __init__(self, message: str, missing_permissions: list = None, user_id: int = None, channel_id: int = None): + super().__init__(message, "PERMISSION_ERROR") + self.missing_permissions = missing_permissions or [] + self.user_id = user_id + self.channel_id = channel_id + + +class VoiceNetworkError(VoiceConnectionError): + """语音网络异常""" + def __init__(self, message: str, retry_count: int = 0): + super().__init__(message, "NETWORK_ERROR") + self.retry_count = retry_count + self.last_attempt = datetime.datetime.now() + + +class VoiceConnectionStatus(Enum): + """语音连接状态枚举""" + IDLE = "idle" + CONNECTING = "connecting" + CONNECTED = "connected" + PLAYING = "playing" + RECONNECTING = "reconnecting" + FAILED = "failed" + + +class VoiceConnectionInfo: + """ + 语音连接信息类 + + 用于存储和管理单个语音连接的详细信息,包括连接状态、时间戳、 + 频道信息等。提供连接信息的标准化数据结构。 + + @author: @ydzat + @version: 1.0 + @since: 2025-07-04 + """ + + def __init__(self, guild_id: int, channel_id: int, channel_name: str = None): + """ + 初始化语音连接信息 + + @author: @ydzat + + Args: + guild_id (int): 服务器ID + channel_id (int): 语音频道ID + channel_name (str, optional): 语音频道名称 + """ + self.guild_id = guild_id + self.channel_id = channel_id + self.channel_name = channel_name or f"Channel-{channel_id}" + self.connected = False + self.connection_time: datetime.datetime = None + self.last_activity = datetime.datetime.now() + self.status = VoiceConnectionStatus.IDLE + self.user_count = 0 + self.latency = 0.0 + self.connection_health = "unknown" + self.voice_client = None + + def update_status(self, status: VoiceConnectionStatus): + """ + 更新连接状态 + + @author: @ydzat + + Args: + status (VoiceConnectionStatus): 新的连接状态 + """ + self.status = status + self.last_activity = datetime.datetime.now() + + if status == VoiceConnectionStatus.CONNECTED: + self.connected = True + if self.connection_time is None: + self.connection_time = datetime.datetime.now() + elif status in [VoiceConnectionStatus.IDLE, VoiceConnectionStatus.FAILED]: + self.connected = False + self.connection_time = None + self.voice_client = None + + def to_dict(self) -> dict: + """ + 转换为字典格式 + + @author: @ydzat + + Returns: + dict: 连接信息的字典表示 + """ + return { + "guild_id": self.guild_id, + "channel_id": self.channel_id, + "channel_name": self.channel_name, + "connected": self.connected, + "connection_time": self.connection_time.isoformat() if self.connection_time else None, + "last_activity": self.last_activity.isoformat(), + "status": self.status.value, + "user_count": self.user_count, + "latency": self.latency, + "connection_health": self.connection_health + } + + +class VoiceConnectionManager: + """ + 语音连接管理器 + + 负责管理多个服务器的语音连接,提供连接建立、断开、状态查询等功能。 + 采用单例模式确保全局只有一个连接管理器实例。 + + @author: @ydzat + @version: 1.0 + @since: 2025-07-04 + """ + + def __init__(self, bot: discord.Client, logger: EventLogger): + """ + 初始化语音连接管理器 + + @author: @ydzat + + Args: + bot (discord.Client): Discord 客户端实例 + logger (EventLogger): 事件日志记录器 + """ + self.bot = bot + self.logger = logger + self.connections: typing.Dict[int, VoiceConnectionInfo] = {} + self._connection_lock = asyncio.Lock() + self._cleanup_task = None + self._monitoring_enabled = True + + async def join_voice_channel(self, guild_id: int, channel_id: int, + user_id: int = None) -> discord.VoiceClient: + """ + 加入语音频道 + + 验证用户权限和频道状态后,建立到指定语音频道的连接。 + 支持连接复用和自动重连机制。 + + @author: @ydzat + + Args: + guild_id (int): 服务器ID + channel_id (int): 语音频道ID + user_id (int, optional): 请求用户ID,用于权限验证 + + Returns: + discord.VoiceClient: 语音客户端实例 + + Raises: + VoicePermissionError: 权限不足时抛出 + VoiceNetworkError: 网络连接失败时抛出 + VoiceConnectionError: 其他连接错误时抛出 + """ + async with self._connection_lock: + try: + # 获取服务器和频道对象 + guild = self.bot.get_guild(guild_id) + if not guild: + raise VoiceConnectionError( + f"无法找到服务器 {guild_id}", + "GUILD_NOT_FOUND", + guild_id + ) + + channel = guild.get_channel(channel_id) + if not channel or not isinstance(channel, discord.VoiceChannel): + raise VoiceConnectionError( + f"无法找到语音频道 {channel_id}", + "CHANNEL_NOT_FOUND", + guild_id + ) + + # 验证用户是否在语音频道中(如果提供了用户ID) + if user_id: + await self._validate_user_in_channel(guild, channel, user_id) + + # 验证机器人权限 + await self._validate_bot_permissions(channel) + + # 检查是否已有连接 + if guild_id in self.connections: + existing_conn = self.connections[guild_id] + if existing_conn.connected and existing_conn.voice_client: + if existing_conn.channel_id == channel_id: + # 已连接到相同频道,返回现有连接 + await self.logger.info(f"复用现有语音连接: {guild.name} -> {channel.name}") + return existing_conn.voice_client + else: + # 连接到不同频道,先断开旧连接 + await self._disconnect_internal(guild_id) + + # 建立新连接 + voice_client = await channel.connect() + + # 更新连接信息 + conn_info = VoiceConnectionInfo(guild_id, channel_id, channel.name) + conn_info.voice_client = voice_client + conn_info.update_status(VoiceConnectionStatus.CONNECTED) + conn_info.user_count = len(channel.members) + self.connections[guild_id] = conn_info + + await self.logger.info(f"成功连接到语音频道: {guild.name} -> {channel.name}") + return voice_client + + except discord.ClientException as e: + raise VoiceNetworkError(f"Discord 客户端错误: {str(e)}") + except discord.opus.OpusNotLoaded as e: + raise VoiceConnectionError(f"Opus 编码器未加载: {str(e)}", "OPUS_NOT_LOADED", guild_id) + except Exception as e: + await self.logger.error(f"连接语音频道时发生未知错误: {str(e)}") + raise VoiceConnectionError(f"连接失败: {str(e)}", "UNKNOWN_ERROR", guild_id) + + async def leave_voice_channel(self, guild_id: int) -> bool: + """ + 离开语音频道 + + 断开指定服务器的语音连接,清理相关资源和状态信息。 + 确保音频播放停止后再断开连接。 + + @author: @ydzat + + Args: + guild_id (int): 服务器ID + + Returns: + bool: 断开是否成功 + """ + async with self._connection_lock: + return await self._disconnect_internal(guild_id) + + async def _disconnect_internal(self, guild_id: int) -> bool: + """ + 内部断开连接方法 + + @author: @ydzat + + Args: + guild_id (int): 服务器ID + + Returns: + bool: 断开是否成功 + """ + if guild_id not in self.connections: + return True + + conn_info = self.connections[guild_id] + + try: + if conn_info.voice_client and conn_info.voice_client.is_connected(): + # 停止当前播放 + if conn_info.voice_client.is_playing(): + conn_info.voice_client.stop() + + # 等待播放完全停止 + await asyncio.sleep(0.1) + + # 断开连接 + await conn_info.voice_client.disconnect() + + conn_info.update_status(VoiceConnectionStatus.IDLE) + del self.connections[guild_id] + + await self.logger.info(f"已断开语音连接: Guild {guild_id}") + return True + + except Exception as e: + await self.logger.error(f"断开语音连接时发生错误: {str(e)}") + # 即使出错也要清理连接记录 + conn_info.update_status(VoiceConnectionStatus.FAILED) + if guild_id in self.connections: + del self.connections[guild_id] + return False + + async def get_voice_client(self, guild_id: int) -> typing.Optional[discord.VoiceClient]: + """ + 获取语音客户端 + + 返回指定服务器的语音客户端实例,如果未连接则返回 None。 + 会验证连接的有效性,自动清理无效连接。 + + @author: @ydzat + + Args: + guild_id (int): 服务器ID + + Returns: + Optional[discord.VoiceClient]: 语音客户端实例或 None + """ + if guild_id not in self.connections: + return None + + conn_info = self.connections[guild_id] + + # 验证连接是否仍然有效 + if conn_info.voice_client and not conn_info.voice_client.is_connected(): + # 连接已失效,清理状态 + await self._disconnect_internal(guild_id) + return None + + return conn_info.voice_client if conn_info.connected else None + + async def is_connected_to_voice(self, guild_id: int) -> bool: + """ + 检查是否连接到语音频道 + + @author: @ydzat + + Args: + guild_id (int): 服务器ID + + Returns: + bool: 是否已连接 + """ + if guild_id not in self.connections: + return False + + conn_info = self.connections[guild_id] + + # 检查实际连接状态 + if conn_info.voice_client and not conn_info.voice_client.is_connected(): + # 连接已失效,清理状态 + await self._disconnect_internal(guild_id) + return False + + return conn_info.connected + + async def get_connection_status(self, guild_id: int) -> typing.Optional[dict]: + """ + 获取连接状态信息 + + @author: @ydzat + + Args: + guild_id (int): 服务器ID + + Returns: + Optional[dict]: 连接状态信息字典或 None + """ + if guild_id not in self.connections: + return None + + conn_info = self.connections[guild_id] + + # 更新实时信息 + if conn_info.voice_client and conn_info.voice_client.is_connected(): + conn_info.latency = conn_info.voice_client.latency * 1000 # 转换为毫秒 + conn_info.connection_health = "good" if conn_info.latency < 100 else "poor" + + # 更新频道用户数 + guild = self.bot.get_guild(guild_id) + if guild: + channel = guild.get_channel(conn_info.channel_id) + if channel and isinstance(channel, discord.VoiceChannel): + conn_info.user_count = len(channel.members) + + return conn_info.to_dict() + + async def list_active_connections(self) -> typing.List[dict]: + """ + 列出所有活跃连接 + + @author: @ydzat + + Returns: + List[dict]: 活跃连接列表 + """ + active_connections = [] + + for guild_id, conn_info in self.connections.items(): + if conn_info.connected: + status = await self.get_connection_status(guild_id) + if status: + active_connections.append(status) + + return active_connections + + async def get_voice_channel_info(self, guild_id: int, channel_id: int) -> typing.Optional[dict]: + """ + 获取语音频道信息 + + @author: @ydzat + + Args: + guild_id (int): 服务器ID + channel_id (int): 频道ID + + Returns: + Optional[dict]: 频道信息字典或 None + """ + guild = self.bot.get_guild(guild_id) + if not guild: + return None + + channel = guild.get_channel(channel_id) + if not channel or not isinstance(channel, discord.VoiceChannel): + return None + + # 获取用户信息 + users = [] + for member in channel.members: + users.append({ + "id": member.id, + "name": member.display_name, + "status": str(member.status), + "is_bot": member.bot + }) + + # 获取权限信息 + bot_member = guild.me + permissions = channel.permissions_for(bot_member) + + return { + "channel_id": channel_id, + "channel_name": channel.name, + "guild_id": guild_id, + "guild_name": guild.name, + "user_limit": channel.user_limit, + "current_users": users, + "user_count": len(users), + "bitrate": channel.bitrate, + "permissions": { + "connect": permissions.connect, + "speak": permissions.speak, + "use_voice_activation": permissions.use_voice_activation, + "priority_speaker": permissions.priority_speaker + } + } + + async def _validate_user_in_channel(self, guild: discord.Guild, + channel: discord.VoiceChannel, user_id: int): + """ + 验证用户是否在语音频道中 + + @author: @ydzat + + Args: + guild: Discord 服务器对象 + channel: 语音频道对象 + user_id: 用户ID + + Raises: + VoicePermissionError: 用户不在频道中时抛出 + """ + member = guild.get_member(user_id) + if not member: + raise VoicePermissionError( + f"无法找到用户 {user_id}", + ["member_not_found"], + user_id, + channel.id + ) + + if not member.voice or member.voice.channel != channel: + raise VoicePermissionError( + f"用户 {member.display_name} 不在语音频道 {channel.name} 中", + ["user_not_in_channel"], + user_id, + channel.id + ) + + async def _validate_bot_permissions(self, channel: discord.VoiceChannel): + """ + 验证机器人权限 + + @author: @ydzat + + Args: + channel: 语音频道对象 + + Raises: + VoicePermissionError: 权限不足时抛出 + """ + bot_member = channel.guild.me + permissions = channel.permissions_for(bot_member) + + missing_permissions = [] + + if not permissions.connect: + missing_permissions.append("connect") + if not permissions.speak: + missing_permissions.append("speak") + + if missing_permissions: + raise VoicePermissionError( + f"机器人在频道 {channel.name} 中缺少权限: {', '.join(missing_permissions)}", + missing_permissions, + channel_id=channel.id + ) + + async def cleanup_inactive_connections(self): + """ + 清理无效连接 + + 定期检查并清理已断开或无效的语音连接,释放资源。 + + @author: @ydzat + """ + cleanup_guilds = [] + + for guild_id, conn_info in self.connections.items(): + if not conn_info.voice_client or not conn_info.voice_client.is_connected(): + cleanup_guilds.append(guild_id) + + for guild_id in cleanup_guilds: + await self._disconnect_internal(guild_id) + + if cleanup_guilds: + await self.logger.info(f"清理了 {len(cleanup_guilds)} 个无效的语音连接") + + async def start_monitoring(self): + """ + 开始连接监控 + + @author: @ydzat + """ + if self._cleanup_task is None and self._monitoring_enabled: + self._cleanup_task = asyncio.create_task(self._monitoring_loop()) + + async def stop_monitoring(self): + """ + 停止连接监控 + + @author: @ydzat + """ + self._monitoring_enabled = False + if self._cleanup_task: + self._cleanup_task.cancel() + try: + await self._cleanup_task + except asyncio.CancelledError: + pass + self._cleanup_task = None + + async def _monitoring_loop(self): + """ + 监控循环 + + @author: @ydzat + """ + try: + while self._monitoring_enabled: + await asyncio.sleep(60) # 每分钟检查一次 + await self.cleanup_inactive_connections() + except asyncio.CancelledError: + pass + + async def disconnect_all(self): + """ + 断开所有连接 + + @author: @ydzat + """ + async with self._connection_lock: + guild_ids = list(self.connections.keys()) + for guild_id in guild_ids: + await self._disconnect_internal(guild_id) + + await self.stop_monitoring() class DiscordMessageConverter(adapter.MessageConverter): @@ -238,6 +814,9 @@ class DiscordAdapter(adapter.MessagePlatformAdapter): self.logger = logger self.bot_account_id = self.config['client_id'] + + # 初始化语音连接管理器 + self.voice_manager: VoiceConnectionManager = None adapter_self = self @@ -258,6 +837,169 @@ class DiscordAdapter(adapter.MessagePlatformAdapter): args['proxy'] = os.getenv('http_proxy') self.bot = MyClient(intents=intents, **args) + + # Voice functionality methods + async def join_voice_channel(self, guild_id: int, channel_id: int, + user_id: int = None) -> discord.VoiceClient: + """ + 加入语音频道 + + 为指定服务器的语音频道建立连接,支持用户权限验证和连接复用。 + + @author: @ydzat + @version: 1.0 + @since: 2025-07-04 + + Args: + guild_id (int): Discord 服务器ID + channel_id (int): 语音频道ID + user_id (int, optional): 请求用户ID,用于权限验证 + + Returns: + discord.VoiceClient: 语音客户端实例 + + Raises: + VoicePermissionError: 权限不足 + VoiceNetworkError: 网络连接失败 + VoiceConnectionError: 其他连接错误 + """ + if not self.voice_manager: + raise VoiceConnectionError("语音管理器未初始化", "MANAGER_NOT_READY") + + return await self.voice_manager.join_voice_channel(guild_id, channel_id, user_id) + + async def leave_voice_channel(self, guild_id: int) -> bool: + """ + 离开语音频道 + + 断开指定服务器的语音连接,清理相关资源。 + + @author: @ydzat + @version: 1.0 + @since: 2025-07-04 + + Args: + guild_id (int): Discord 服务器ID + + Returns: + bool: 是否成功断开连接 + """ + if not self.voice_manager: + return False + + return await self.voice_manager.leave_voice_channel(guild_id) + + async def get_voice_client(self, guild_id: int) -> typing.Optional[discord.VoiceClient]: + """ + 获取语音客户端 + + 返回指定服务器的语音客户端实例,用于音频播放控制。 + + @author: @ydzat + @version: 1.0 + @since: 2025-07-04 + + Args: + guild_id (int): Discord 服务器ID + + Returns: + Optional[discord.VoiceClient]: 语音客户端实例或 None + """ + if not self.voice_manager: + return None + + return await self.voice_manager.get_voice_client(guild_id) + + async def is_connected_to_voice(self, guild_id: int) -> bool: + """ + 检查语音连接状态 + + @author: @ydzat + @version: 1.0 + @since: 2025-07-04 + + Args: + guild_id (int): Discord 服务器ID + + Returns: + bool: 是否已连接到语音频道 + """ + if not self.voice_manager: + return False + + return await self.voice_manager.is_connected_to_voice(guild_id) + + async def get_voice_connection_status(self, guild_id: int) -> typing.Optional[dict]: + """ + 获取语音连接详细状态 + + 返回包含连接时间、延迟、用户数等详细信息的状态字典。 + + @author: @ydzat + @version: 1.0 + @since: 2025-07-04 + + Args: + guild_id (int): Discord 服务器ID + + Returns: + Optional[dict]: 连接状态信息或 None + """ + if not self.voice_manager: + return None + + return await self.voice_manager.get_connection_status(guild_id) + + async def list_active_voice_connections(self) -> typing.List[dict]: + """ + 列出所有活跃的语音连接 + + @author: @ydzat + @version: 1.0 + @since: 2025-07-04 + + Returns: + List[dict]: 活跃语音连接列表 + """ + if not self.voice_manager: + return [] + + return await self.voice_manager.list_active_connections() + + async def get_voice_channel_info(self, guild_id: int, channel_id: int) -> typing.Optional[dict]: + """ + 获取语音频道详细信息 + + 包括频道名称、用户列表、权限信息等。 + + @author: @ydzat + @version: 1.0 + @since: 2025-07-04 + + Args: + guild_id (int): Discord 服务器ID + channel_id (int): 语音频道ID + + Returns: + Optional[dict]: 频道信息字典或 None + """ + if not self.voice_manager: + return None + + return await self.voice_manager.get_voice_channel_info(guild_id, channel_id) + + async def cleanup_voice_connections(self): + """ + 清理无效的语音连接 + + 手动触发语音连接清理,移除已断开或无效的连接。 + + @author: @ydzat + @version: 1.0 + @since: 2025-07-04 + """ + if self.voice_manager: + await self.voice_manager.cleanup_inactive_connections() async def send_message(self, target_type: str, target_id: str, message: platform_message.MessageChain): msg_to_send, image_files = await self.message_converter.yiri2target(message) @@ -324,9 +1066,32 @@ class DiscordAdapter(adapter.MessagePlatformAdapter): self.listeners.pop(event_type) async def run_async(self): + """ + 启动 Discord 适配器 + + 初始化语音管理器并启动 Discord 客户端连接。 + + @author: @ydzat (修改) + """ async with self.bot: + # 初始化语音管理器 + self.voice_manager = VoiceConnectionManager(self.bot, self.logger) + await self.voice_manager.start_monitoring() + + await self.logger.info("Discord 适配器语音功能已启用") await self.bot.start(self.config['token'], reconnect=True) async def kill(self) -> bool: + """ + 关闭 Discord 适配器 + + 清理语音连接并关闭 Discord 客户端。 + + @author: @ydzat (修改) + """ + if self.voice_manager: + await self.voice_manager.disconnect_all() + await self.bot.close() return True + diff --git a/pyproject.toml b/pyproject.toml index 5e85bfb0..28b1a28f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ dependencies = [ "dashscope>=1.23.2", "dingtalk-stream>=0.24.0", "discord-py>=2.5.2", + "pynacl>=1.5.0", # Required for Discord voice support "gewechat-client>=0.1.5", "lark-oapi>=1.4.15", "mcp>=1.8.1", @@ -84,6 +85,8 @@ Repository = "https://github.com/RockChinQ/langbot" [dependency-groups] dev = [ "pre-commit>=4.2.0", + "pytest>=8.4.1", + "pytest-asyncio>=1.0.0", "ruff>=0.11.9", ] From 0e5c9e19e16f30cbbfb0ab922d431328e95f9cb1 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Sat, 5 Jul 2025 21:03:14 +0800 Subject: [PATCH 027/257] feat: complete support_type for 302ai and compshare requester --- pkg/provider/modelmgr/requesters/302aichatcmpl.yaml | 2 ++ pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml b/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml index 2d9df778..754a9078 100644 --- a/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./302aichatcmpl.py diff --git a/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml b/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml index ca57c31c..2b7f9a70 100644 --- a/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml +++ b/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml @@ -22,6 +22,8 @@ spec: type: integer required: true default: 120 + support_type: + - llm execution: python: path: ./compsharechatcmpl.py From 39c062f73e83181d8ec57d42600cf00caf86f9a5 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Sat, 5 Jul 2025 21:56:17 +0800 Subject: [PATCH 028/257] perf: format --- .../http/controller/groups/knowledge_base.py | 65 +++++++++---------- .../controller/groups/pipelines/pipelines.py | 2 +- 2 files changed, 30 insertions(+), 37 deletions(-) diff --git a/pkg/api/http/controller/groups/knowledge_base.py b/pkg/api/http/controller/groups/knowledge_base.py index f9aa09e0..e9606a3d 100644 --- a/pkg/api/http/controller/groups/knowledge_base.py +++ b/pkg/api/http/controller/groups/knowledge_base.py @@ -1,49 +1,36 @@ import quart from .. import group + @group.group_class('knowledge_base', '/api/v1/knowledge/bases') class KnowledgeBaseRouterGroup(group.RouterGroup): - # 定义成功方法 def success(self, code=0, data=None, msg: str = 'ok') -> quart.Response: - return quart.jsonify({ - "code": code, - "data": data or {}, - "msg": msg - }) - + return quart.jsonify({'code': code, 'data': data or {}, 'msg': msg}) - async def initialize(self) -> None: - - @self.route('', methods=['POST', 'GET']) async def _() -> str: - if quart.request.method == 'GET': knowledge_bases = await self.ap.knowledge_base_service.get_all_knowledge_bases() bases_list = [ { - "uuid": kb.id, - "name": kb.name, - "description": kb.description, - } for kb in knowledge_bases + 'uuid': kb.id, + 'name': kb.name, + 'description': kb.description, + } + for kb in knowledge_bases ] - return self.success(code=0, - data={'bases': bases_list}, - msg='ok') + return self.success(code=0, data={'bases': bases_list}, msg='ok') json_data = await quart.request.json knowledge_base_uuid = await self.ap.knowledge_base_service.create_knowledge_base( - json_data.get('name'), - json_data.get('description') + json_data.get('name'), json_data.get('description') ) - return self.success(code=0, - data={}, - msg='ok') + _ = knowledge_base_uuid + return self.success(code=0, data={}, msg='ok') - - @self.route('/', methods=['GET','DELETE']) + @self.route('/', methods=['GET', 'DELETE']) async def _(knowledge_base_uuid: str) -> str: if quart.request.method == 'GET': knowledge_base = await self.ap.knowledge_base_service.get_knowledge_base_by_id(knowledge_base_uuid) @@ -54,11 +41,11 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): return self.success( code=0, data={ - "name": knowledge_base.name, - "description": knowledge_base.description, - "uuid": knowledge_base.id + 'name': knowledge_base.name, + 'description': knowledge_base.description, + 'uuid': knowledge_base.id, }, - msg='ok' + msg='ok', ) elif quart.request.method == 'DELETE': await self.ap.knowledge_base_service.delete_kb_by_id(knowledge_base_uuid) @@ -68,15 +55,21 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): async def _(knowledge_base_uuid: str) -> str: if quart.request.method == 'GET': files = await self.ap.knowledge_base_service.get_files_by_knowledge_base(knowledge_base_uuid) - return self.success(code=0,data=[{ - "id": file.id, - "file_name": file.file_name, - "status": file.status - } for file in files],msg='ok') - + return self.success( + code=0, + data=[ + { + 'id': file.id, + 'file_name': file.file_name, + 'status': file.status, + } + for file in files + ], + msg='ok', + ) + # delete specific file in knowledge base @self.route('//files/', methods=['DELETE']) async def _(knowledge_base_uuid: str, file_id: str) -> str: await self.ap.knowledge_base_service.delete_data_by_file_id(file_id) return self.success(code=0, msg='ok') - diff --git a/pkg/api/http/controller/groups/pipelines/pipelines.py b/pkg/api/http/controller/groups/pipelines/pipelines.py index 1a8036cc..96ca239a 100644 --- a/pkg/api/http/controller/groups/pipelines/pipelines.py +++ b/pkg/api/http/controller/groups/pipelines/pipelines.py @@ -2,7 +2,7 @@ from __future__ import annotations import quart -from .. import group +from ... import group @group.group_class('pipelines', '/api/v1/pipelines') From 8d28ace25276820714d33c5aedf359b48d0faf3e Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Sat, 5 Jul 2025 21:56:54 +0800 Subject: [PATCH 029/257] perf: ruff check --fix --- libs/wechatpad_api/__init__.py | 2 +- libs/wechatpad_api/api/chatroom.py | 6 +- libs/wechatpad_api/api/downloadpai.py | 25 +- libs/wechatpad_api/api/friend.py | 5 - libs/wechatpad_api/api/login.py | 60 +-- libs/wechatpad_api/api/message.py | 111 ++--- libs/wechatpad_api/util/http_util.py | 48 +- pkg/entity/persistence/vector.py | 11 +- pkg/platform/sources/aiocqhttp.py | 229 ++++++---- pkg/platform/sources/discord.py | 20 +- pkg/platform/sources/lark.py | 8 +- pkg/platform/sources/nakuru.py | 5 +- pkg/platform/sources/officialaccount.py | 4 +- pkg/platform/sources/qqofficial.py | 9 +- pkg/platform/sources/slack.py | 8 +- pkg/platform/sources/telegram.py | 4 +- pkg/platform/sources/wechatpad.py | 425 +++++++----------- pkg/platform/sources/wecom.py | 6 +- pkg/platform/sources/wecomcs.py | 6 +- pkg/rag/knowledge/services/database.py | 32 +- .../knowledge/services/embedding_models.py | 165 +++---- pkg/rag/knowledge/services/parser.py | 128 +++--- pkg/rag/knowledge/services/retriever.py | 67 +-- 23 files changed, 647 insertions(+), 737 deletions(-) diff --git a/libs/wechatpad_api/__init__.py b/libs/wechatpad_api/__init__.py index 23c23fb2..9ac533f7 100644 --- a/libs/wechatpad_api/__init__.py +++ b/libs/wechatpad_api/__init__.py @@ -1 +1 @@ -from .client import WeChatPadClient \ No newline at end of file +from .client import WeChatPadClient as WeChatPadClient diff --git a/libs/wechatpad_api/api/chatroom.py b/libs/wechatpad_api/api/chatroom.py index a7af207c..2d9281a2 100644 --- a/libs/wechatpad_api/api/chatroom.py +++ b/libs/wechatpad_api/api/chatroom.py @@ -1,4 +1,4 @@ -from libs.wechatpad_api.util.http_util import async_request, post_json +from libs.wechatpad_api.util.http_util import post_json class ChatRoomApi: @@ -7,8 +7,6 @@ class ChatRoomApi: self.token = token def get_chatroom_member_detail(self, chatroom_name): - params = { - "ChatRoomName": chatroom_name - } + params = {'ChatRoomName': chatroom_name} url = self.base_url + '/group/GetChatroomMemberDetail' return post_json(url, token=self.token, data=params) diff --git a/libs/wechatpad_api/api/downloadpai.py b/libs/wechatpad_api/api/downloadpai.py index a82a5674..2d45fac6 100644 --- a/libs/wechatpad_api/api/downloadpai.py +++ b/libs/wechatpad_api/api/downloadpai.py @@ -1,32 +1,23 @@ -from libs.wechatpad_api.util.http_util import async_request, post_json +from libs.wechatpad_api.util.http_util import post_json import httpx import base64 + class DownloadApi: def __init__(self, base_url, token): self.base_url = base_url self.token = token def send_download(self, aeskey, file_type, file_url): - json_data = { - "AesKey": aeskey, - "FileType": file_type, - "FileURL": file_url - } - url = self.base_url + "/message/SendCdnDownload" + json_data = {'AesKey': aeskey, 'FileType': file_type, 'FileURL': file_url} + url = self.base_url + '/message/SendCdnDownload' return post_json(url, token=self.token, data=json_data) - def get_msg_voice(self,buf_id, length, new_msgid): - json_data = { - "Bufid": buf_id, - "Length": length, - "NewMsgId": new_msgid, - "ToUserName": "" - } - url = self.base_url + "/message/GetMsgVoice" + def get_msg_voice(self, buf_id, length, new_msgid): + json_data = {'Bufid': buf_id, 'Length': length, 'NewMsgId': new_msgid, 'ToUserName': ''} + url = self.base_url + '/message/GetMsgVoice' return post_json(url, token=self.token, data=json_data) - async def download_url_to_base64(self, download_url): async with httpx.AsyncClient() as client: response = await client.get(download_url) @@ -36,4 +27,4 @@ class DownloadApi: base64_str = base64.b64encode(file_bytes).decode('utf-8') # 返回字符串格式 return base64_str else: - raise Exception('获取文件失败') \ No newline at end of file + raise Exception('获取文件失败') diff --git a/libs/wechatpad_api/api/friend.py b/libs/wechatpad_api/api/friend.py index 00701a5d..a7a448aa 100644 --- a/libs/wechatpad_api/api/friend.py +++ b/libs/wechatpad_api/api/friend.py @@ -1,11 +1,6 @@ -from libs.wechatpad_api.util.http_util import post_json,async_request -from typing import List, Dict, Any, Optional - - class FriendApi: """联系人API类,处理所有与联系人相关的操作""" def __init__(self, base_url: str, token: str): self.base_url = base_url self.token = token - diff --git a/libs/wechatpad_api/api/login.py b/libs/wechatpad_api/api/login.py index 142a3c85..4aa4ae8d 100644 --- a/libs/wechatpad_api/api/login.py +++ b/libs/wechatpad_api/api/login.py @@ -1,37 +1,34 @@ -from libs.wechatpad_api.util.http_util import async_request,post_json,get_json +from libs.wechatpad_api.util.http_util import post_json, get_json class LoginApi: def __init__(self, base_url: str, token: str = None, admin_key: str = None): - ''' + """ Args: base_url: 原始路径 token: token admin_key: 管理员key - ''' + """ self.base_url = base_url self.token = token # self.admin_key = admin_key - def get_token(self, admin_key, day: int=365): + def get_token(self, admin_key, day: int = 365): # 获取普通token - url = f"{self.base_url}/admin/GenAuthKey1" - json_data = { - "Count": 1, - "Days": day - } + url = f'{self.base_url}/admin/GenAuthKey1' + json_data = {'Count': 1, 'Days': day} return post_json(base_url=url, token=admin_key, data=json_data) - def get_login_qr(self, Proxy: str = ""): - ''' + def get_login_qr(self, Proxy: str = ''): + """ Args: Proxy:异地使用时代理 Returns:json数据 - ''' + """ """ { @@ -49,54 +46,37 @@ class LoginApi: } """ - #获取登录二维码 - url = f"{self.base_url}/login/GetLoginQrCodeNew" + # 获取登录二维码 + url = f'{self.base_url}/login/GetLoginQrCodeNew' check = False - if Proxy != "": + if Proxy != '': check = True - json_data = { - "Check": check, - "Proxy": Proxy - } + json_data = {'Check': check, 'Proxy': Proxy} return post_json(base_url=url, token=self.token, data=json_data) - def get_login_status(self): # 获取登录状态 url = f'{self.base_url}/login/GetLoginStatus' return get_json(base_url=url, token=self.token) - - def logout(self): # 退出登录 url = f'{self.base_url}/login/LogOut' return post_json(base_url=url, token=self.token) - - - - def wake_up_login(self, Proxy: str = ""): + def wake_up_login(self, Proxy: str = ''): # 唤醒登录 url = f'{self.base_url}/login/WakeUpLogin' check = False - if Proxy != "": + if Proxy != '': check = True - json_data = { - "Check": check, - "Proxy": "" - } + json_data = {'Check': check, 'Proxy': ''} return post_json(base_url=url, token=self.token, data=json_data) - - - def login(self,admin_key): + def login(self, admin_key): login_status = self.get_login_status() - if login_status["Code"] == 300 and login_status["Text"] == "你已退出微信": - print("token已经失效,重新获取") + if login_status['Code'] == 300 and login_status['Text'] == '你已退出微信': + print('token已经失效,重新获取') token_data = self.get_token(admin_key) - self.token = token_data["Data"][0] - - - + self.token = token_data['Data'][0] diff --git a/libs/wechatpad_api/api/message.py b/libs/wechatpad_api/api/message.py index 2089ce96..cca76313 100644 --- a/libs/wechatpad_api/api/message.py +++ b/libs/wechatpad_api/api/message.py @@ -1,5 +1,4 @@ - -from libs.wechatpad_api.util.http_util import async_request, post_json +from libs.wechatpad_api.util.http_util import post_json class MessageApi: @@ -7,8 +6,8 @@ class MessageApi: self.base_url = base_url self.token = token - def post_text(self, to_wxid, content, ats: list= []): - ''' + def post_text(self, to_wxid, content, ats: list = []): + """ Args: app_id: 微信id @@ -18,106 +17,64 @@ class MessageApi: Returns: - ''' - url = self.base_url + "/message/SendTextMessage" + """ + url = self.base_url + '/message/SendTextMessage' """发送文字消息""" json_data = { - "MsgItem": [ - { - "AtWxIDList": ats, - "ImageContent": "", - "MsgType": 0, - "TextContent": content, - "ToUserName": to_wxid - } - ] - } - return post_json(base_url=url, token=self.token, data=json_data) + 'MsgItem': [ + {'AtWxIDList': ats, 'ImageContent': '', 'MsgType': 0, 'TextContent': content, 'ToUserName': to_wxid} + ] + } + return post_json(base_url=url, token=self.token, data=json_data) - - - - def post_image(self, to_wxid, img_url, ats: list= []): + def post_image(self, to_wxid, img_url, ats: list = []): """发送图片消息""" # 这里好像可以尝试发送多个暂时未测试 json_data = { - "MsgItem": [ - { - "AtWxIDList": ats, - "ImageContent": img_url, - "MsgType": 0, - "TextContent": '', - "ToUserName": to_wxid - } + 'MsgItem': [ + {'AtWxIDList': ats, 'ImageContent': img_url, 'MsgType': 0, 'TextContent': '', 'ToUserName': to_wxid} ] } - url = self.base_url + "/message/SendImageMessage" + url = self.base_url + '/message/SendImageMessage' return post_json(base_url=url, token=self.token, data=json_data) def post_voice(self, to_wxid, voice_data, voice_forma, voice_duration): """发送语音消息""" json_data = { - "ToUserName": to_wxid, - "VoiceData": voice_data, - "VoiceFormat": voice_forma, - "VoiceSecond": voice_duration + 'ToUserName': to_wxid, + 'VoiceData': voice_data, + 'VoiceFormat': voice_forma, + 'VoiceSecond': voice_duration, } - url = self.base_url + "/message/SendVoice" + url = self.base_url + '/message/SendVoice' return post_json(base_url=url, token=self.token, data=json_data) - - - - def post_name_card(self, alias, to_wxid, nick_name, name_card_wxid, flag): """发送名片消息""" param = { - "CardAlias": alias, - "CardFlag": flag, - "CardNickName": nick_name, - "CardWxId": name_card_wxid, - "ToUserName": to_wxid + 'CardAlias': alias, + 'CardFlag': flag, + 'CardNickName': nick_name, + 'CardWxId': name_card_wxid, + 'ToUserName': to_wxid, } - url = f"{self.base_url}/message/ShareCardMessage" + url = f'{self.base_url}/message/ShareCardMessage' return post_json(base_url=url, token=self.token, data=param) - def post_emoji(self, to_wxid, emoji_md5, emoji_size:int=0): + def post_emoji(self, to_wxid, emoji_md5, emoji_size: int = 0): """发送emoji消息""" - json_data = { - "EmojiList": [ - { - "EmojiMd5": emoji_md5, - "EmojiSize": emoji_size, - "ToUserName": to_wxid - } - ] - } - url = f"{self.base_url}/message/SendEmojiMessage" + json_data = {'EmojiList': [{'EmojiMd5': emoji_md5, 'EmojiSize': emoji_size, 'ToUserName': to_wxid}]} + url = f'{self.base_url}/message/SendEmojiMessage' return post_json(base_url=url, token=self.token, data=json_data) - def post_app_msg(self, to_wxid,xml_data, contenttype:int=0): + def post_app_msg(self, to_wxid, xml_data, contenttype: int = 0): """发送appmsg消息""" - json_data = { - "AppList": [ - { - "ContentType": contenttype, - "ContentXML": xml_data, - "ToUserName": to_wxid - } - ] - } - url = f"{self.base_url}/message/SendAppMessage" + json_data = {'AppList': [{'ContentType': contenttype, 'ContentXML': xml_data, 'ToUserName': to_wxid}]} + url = f'{self.base_url}/message/SendAppMessage' return post_json(base_url=url, token=self.token, data=json_data) - - def revoke_msg(self, to_wxid, msg_id, new_msg_id, create_time): """撤回消息""" - param = { - "ClientMsgId": msg_id, - "CreateTime": create_time, - "NewMsgId": new_msg_id, - "ToUserName": to_wxid - } - url = f"{self.base_url}/message/RevokeMsg" - return post_json(base_url=url, token=self.token, data=param) \ No newline at end of file + param = {'ClientMsgId': msg_id, 'CreateTime': create_time, 'NewMsgId': new_msg_id, 'ToUserName': to_wxid} + url = f'{self.base_url}/message/RevokeMsg' + return post_json(base_url=url, token=self.token, data=param) diff --git a/libs/wechatpad_api/util/http_util.py b/libs/wechatpad_api/util/http_util.py index 754003e9..447c29df 100644 --- a/libs/wechatpad_api/util/http_util.py +++ b/libs/wechatpad_api/util/http_util.py @@ -1,10 +1,9 @@ import requests +import aiohttp + def post_json(base_url, token, data=None): - headers = { - 'Content-Type': 'application/json' - } - + headers = {'Content-Type': 'application/json'} url = base_url + f'?key={token}' @@ -18,14 +17,12 @@ def post_json(base_url, token, data=None): else: raise RuntimeError(response.text) except Exception as e: - print(f"http请求失败, url={url}, exception={e}") + print(f'http请求失败, url={url}, exception={e}') raise RuntimeError(str(e)) -def get_json(base_url, token): - headers = { - 'Content-Type': 'application/json' - } +def get_json(base_url, token): + headers = {'Content-Type': 'application/json'} url = base_url + f'?key={token}' @@ -39,21 +36,18 @@ def get_json(base_url, token): else: raise RuntimeError(response.text) except Exception as e: - print(f"http请求失败, url={url}, exception={e}") + print(f'http请求失败, url={url}, exception={e}') raise RuntimeError(str(e)) -import aiohttp -import asyncio - async def async_request( - base_url: str, - token_key: str, - method: str = 'POST', - params: dict = None, - # headers: dict = None, - data: dict = None, - json: dict = None + base_url: str, + token_key: str, + method: str = 'POST', + params: dict = None, + # headers: dict = None, + data: dict = None, + json: dict = None, ): """ 通用异步请求函数 @@ -67,18 +61,11 @@ async def async_request( :param json: JSON数据 :return: 响应文本 """ - headers = { - 'Content-Type': 'application/json' - } - url = f"{base_url}?key={token_key}" + headers = {'Content-Type': 'application/json'} + url = f'{base_url}?key={token_key}' async with aiohttp.ClientSession() as session: async with session.request( - method=method, - url=url, - params=params, - headers=headers, - data=data, - json=json + method=method, url=url, params=params, headers=headers, data=data, json=json ) as response: response.raise_for_status() # 如果状态码不是200,抛出异常 result = await response.json() @@ -89,4 +76,3 @@ async def async_request( # return await result # else: # raise RuntimeError("请求失败",response.text) - diff --git a/pkg/entity/persistence/vector.py b/pkg/entity/persistence/vector.py index 84d1dfb1..465125f5 100644 --- a/pkg/entity/persistence/vector.py +++ b/pkg/entity/persistence/vector.py @@ -1,14 +1,13 @@ -from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, ForeignKey, LargeBinary -from sqlalchemy.orm import declarative_base, sessionmaker, relationship -from datetime import datetime -import numpy as np # 用于处理从LargeBinary转换回来的embedding +from sqlalchemy import Column, Integer, ForeignKey, LargeBinary +from sqlalchemy.orm import declarative_base, relationship Base = declarative_base() + class Vector(Base): __tablename__ = 'vectors' id = Column(Integer, primary_key=True, index=True) chunk_id = Column(Integer, ForeignKey('chunks.id'), unique=True) - embedding = Column(LargeBinary) # Store embeddings as binary + embedding = Column(LargeBinary) # Store embeddings as binary - chunk = relationship("Chunk", back_populates="vector") \ No newline at end of file + chunk = relationship('Chunk', back_populates='vector') diff --git a/pkg/platform/sources/aiocqhttp.py b/pkg/platform/sources/aiocqhttp.py index 3f3ef512..2730874f 100644 --- a/pkg/platform/sources/aiocqhttp.py +++ b/pkg/platform/sources/aiocqhttp.py @@ -16,7 +16,6 @@ from ..logger import EventLogger class AiocqhttpMessageConverter(adapter.MessageConverter): - @staticmethod async def yiri2target( message_chain: platform_message.MessageChain, @@ -62,87 +61,170 @@ class AiocqhttpMessageConverter(adapter.MessageConverter): for node in msg.node_list: msg_list.extend((await AiocqhttpMessageConverter.yiri2target(node.message_chain))[0]) elif isinstance(msg, platform_message.File): - msg_list.append({"type":"file", "data":{'file': msg.url, "name": msg.name}}) + msg_list.append({'type': 'file', 'data': {'file': msg.url, 'name': msg.name}}) elif isinstance(msg, platform_message.Face): - if msg.face_type=='face': + if msg.face_type == 'face': msg_list.append(aiocqhttp.MessageSegment.face(msg.face_id)) - elif msg.face_type=='rps': + elif msg.face_type == 'rps': msg_list.append(aiocqhttp.MessageSegment.rps()) - elif msg.face_type=='dice': + elif msg.face_type == 'dice': msg_list.append(aiocqhttp.MessageSegment.dice()) - else: msg_list.append(aiocqhttp.MessageSegment.text(str(msg))) return msg_list, msg_id, msg_time @staticmethod - async def target2yiri(message: str, message_id: int = -1,bot=None): + async def target2yiri(message: str, message_id: int = -1, bot=None): print(message) message = aiocqhttp.Message(message) def get_face_name(face_id): face_code_dict = { - "2": '好色', - "4": "得意", "5": "流泪", "8": "睡", "9": "大哭", "10": "尴尬", "12": "调皮", "14": "微笑", "16": "酷", - "21": "可爱", - "23": "傲慢", "24": "饥饿", "25": "困", "26": "惊恐", "27": "流汗", "28": "憨笑", "29": "悠闲", - "30": "奋斗", - "32": "疑问", "33": "嘘", "34": "晕", "38": "敲打", "39": "再见", "41": "发抖", "42": "爱情", - "43": "跳跳", - "49": "拥抱", "53": "蛋糕", "60": "咖啡", "63": "玫瑰", "66": "爱心", "74": "太阳", "75": "月亮", - "76": "赞", - "78": "握手", "79": "胜利", "85": "飞吻", "89": "西瓜", "96": "冷汗", "97": "擦汗", "98": "抠鼻", - "99": "鼓掌", - "100": "糗大了", "101": "坏笑", "102": "左哼哼", "103": "右哼哼", "104": "哈欠", "106": "委屈", - "109": "左亲亲", - "111": "可怜", "116": "示爱", "118": "抱拳", "120": "拳头", "122": "爱你", "123": "NO", "124": "OK", - "125": "转圈", - "129": "挥手", "144": "喝彩", "147": "棒棒糖", "171": "茶", "173": "泪奔", "174": "无奈", "175": "卖萌", - "176": "小纠结", "179": "doge", "180": "惊喜", "181": "骚扰", "182": "笑哭", "183": "我最美", - "201": "点赞", - "203": "托脸", "212": "托腮", "214": "啵啵", "219": "蹭一蹭", "222": "抱抱", "227": "拍手", - "232": "佛系", - "240": "喷脸", "243": "甩头", "246": "加油抱抱", "262": "脑阔疼", "264": "捂脸", "265": "辣眼睛", - "266": "哦哟", - "267": "头秃", "268": "问号脸", "269": "暗中观察", "270": "emm", "271": "吃瓜", "272": "呵呵哒", - "273": "我酸了", - "277": "汪汪", "278": "汗", "281": "无眼笑", "282": "敬礼", "284": "面无表情", "285": "摸鱼", - "287": "哦", - "289": "睁眼", "290": "敲开心", "293": "摸锦鲤", "294": "期待", "297": "拜谢", "298": "元宝", - "299": "牛啊", - "305": "右亲亲", "306": "牛气冲天", "307": "喵喵", "314": "仔细分析", "315": "加油", "318": "崇拜", - "319": "比心", - "320": "庆祝", "322": "拒绝", "324": "吃糖", "326": "生气" + '2': '好色', + '4': '得意', + '5': '流泪', + '8': '睡', + '9': '大哭', + '10': '尴尬', + '12': '调皮', + '14': '微笑', + '16': '酷', + '21': '可爱', + '23': '傲慢', + '24': '饥饿', + '25': '困', + '26': '惊恐', + '27': '流汗', + '28': '憨笑', + '29': '悠闲', + '30': '奋斗', + '32': '疑问', + '33': '嘘', + '34': '晕', + '38': '敲打', + '39': '再见', + '41': '发抖', + '42': '爱情', + '43': '跳跳', + '49': '拥抱', + '53': '蛋糕', + '60': '咖啡', + '63': '玫瑰', + '66': '爱心', + '74': '太阳', + '75': '月亮', + '76': '赞', + '78': '握手', + '79': '胜利', + '85': '飞吻', + '89': '西瓜', + '96': '冷汗', + '97': '擦汗', + '98': '抠鼻', + '99': '鼓掌', + '100': '糗大了', + '101': '坏笑', + '102': '左哼哼', + '103': '右哼哼', + '104': '哈欠', + '106': '委屈', + '109': '左亲亲', + '111': '可怜', + '116': '示爱', + '118': '抱拳', + '120': '拳头', + '122': '爱你', + '123': 'NO', + '124': 'OK', + '125': '转圈', + '129': '挥手', + '144': '喝彩', + '147': '棒棒糖', + '171': '茶', + '173': '泪奔', + '174': '无奈', + '175': '卖萌', + '176': '小纠结', + '179': 'doge', + '180': '惊喜', + '181': '骚扰', + '182': '笑哭', + '183': '我最美', + '201': '点赞', + '203': '托脸', + '212': '托腮', + '214': '啵啵', + '219': '蹭一蹭', + '222': '抱抱', + '227': '拍手', + '232': '佛系', + '240': '喷脸', + '243': '甩头', + '246': '加油抱抱', + '262': '脑阔疼', + '264': '捂脸', + '265': '辣眼睛', + '266': '哦哟', + '267': '头秃', + '268': '问号脸', + '269': '暗中观察', + '270': 'emm', + '271': '吃瓜', + '272': '呵呵哒', + '273': '我酸了', + '277': '汪汪', + '278': '汗', + '281': '无眼笑', + '282': '敬礼', + '284': '面无表情', + '285': '摸鱼', + '287': '哦', + '289': '睁眼', + '290': '敲开心', + '293': '摸锦鲤', + '294': '期待', + '297': '拜谢', + '298': '元宝', + '299': '牛啊', + '305': '右亲亲', + '306': '牛气冲天', + '307': '喵喵', + '314': '仔细分析', + '315': '加油', + '318': '崇拜', + '319': '比心', + '320': '庆祝', + '322': '拒绝', + '324': '吃糖', + '326': '生气', } - return face_code_dict.get(face_id,'') + return face_code_dict.get(face_id, '') async def process_message_data(msg_data, reply_list): - if msg_data["type"] == "image": - image_base64, image_format = await image.qq_image_url_to_base64(msg_data["data"]['url']) - reply_list.append( - platform_message.Image(base64=f'data:image/{image_format};base64,{image_base64}')) + if msg_data['type'] == 'image': + image_base64, image_format = await image.qq_image_url_to_base64(msg_data['data']['url']) + reply_list.append(platform_message.Image(base64=f'data:image/{image_format};base64,{image_base64}')) - elif msg_data["type"] == "text": - reply_list.append(platform_message.Plain(text=msg_data["data"]["text"])) + elif msg_data['type'] == 'text': + reply_list.append(platform_message.Plain(text=msg_data['data']['text'])) - elif msg_data["type"] == "forward": # 这里来应该传入转发消息组,暂时传入qoute - for forward_msg_datas in msg_data["data"]["content"]: - for forward_msg_data in forward_msg_datas["message"]: + elif msg_data['type'] == 'forward': # 这里来应该传入转发消息组,暂时传入qoute + for forward_msg_datas in msg_data['data']['content']: + for forward_msg_data in forward_msg_datas['message']: await process_message_data(forward_msg_data, reply_list) - elif msg_data["type"] == "at": - if msg_data["data"]['qq'] == 'all': + elif msg_data['type'] == 'at': + if msg_data['data']['qq'] == 'all': reply_list.append(platform_message.AtAll()) else: reply_list.append( platform_message.At( - target=msg_data["data"]['qq'], + target=msg_data['data']['qq'], ) ) - yiri_msg_list = [] yiri_msg_list.append(platform_message.Source(id=message_id, time=datetime.datetime.now())) @@ -161,10 +243,10 @@ class AiocqhttpMessageConverter(adapter.MessageConverter): elif msg.type == 'text': yiri_msg_list.append(platform_message.Plain(text=msg.data['text'])) elif msg.type == 'image': - emoji_id = msg.data.get("emoji_package_id", None) + emoji_id = msg.data.get('emoji_package_id', None) if emoji_id: face_id = emoji_id - face_name = msg.data.get("summary", '') + face_name = msg.data.get('summary', '') image_msg = platform_message.Face(face_id=face_id, face_name=face_name) else: image_base64, image_format = await image.qq_image_url_to_base64(msg.data['url']) @@ -178,14 +260,15 @@ class AiocqhttpMessageConverter(adapter.MessageConverter): # await process_message_data(msg_data, yiri_msg_list) pass - elif msg.type == 'reply': # 此处处理引用消息传入Qoute - msg_datas = await bot.get_msg(message_id=msg.data["id"]) + msg_datas = await bot.get_msg(message_id=msg.data['id']) - for msg_data in msg_datas["message"]: + for msg_data in msg_datas['message']: await process_message_data(msg_data, reply_list) - reply_msg = platform_message.Quote(message_id=msg.data["id"],sender_id=msg_datas["user_id"],origin=reply_list) + reply_msg = platform_message.Quote( + message_id=msg.data['id'], sender_id=msg_datas['user_id'], origin=reply_list + ) yiri_msg_list.append(reply_msg) elif msg.type == 'file': @@ -194,49 +277,36 @@ class AiocqhttpMessageConverter(adapter.MessageConverter): file_data = await bot.get_file(file_id=file_id) file_name = file_data.get('file_name') file_path = file_data.get('file') + _ = file_path file_url = file_data.get('file_url') file_size = file_data.get('file_size') - yiri_msg_list.append(platform_message.File(id=file_id, name=file_name,url=file_url,size=file_size)) + yiri_msg_list.append(platform_message.File(id=file_id, name=file_name, url=file_url, size=file_size)) elif msg.type == 'face': face_id = msg.data['id'] face_name = msg.data['raw']['faceText'] if not face_name: face_name = get_face_name(face_id) - yiri_msg_list.append(platform_message.Face(face_id=int(face_id),face_name=face_name.replace('/',''))) + yiri_msg_list.append(platform_message.Face(face_id=int(face_id), face_name=face_name.replace('/', ''))) elif msg.type == 'rps': face_id = msg.data['result'] - yiri_msg_list.append(platform_message.Face(face_type="rps",face_id=int(face_id),face_name='猜拳')) + yiri_msg_list.append(platform_message.Face(face_type='rps', face_id=int(face_id), face_name='猜拳')) elif msg.type == 'dice': face_id = msg.data['result'] - yiri_msg_list.append(platform_message.Face(face_type='dice',face_id=int(face_id),face_name='骰子')) - - - - - - - - + yiri_msg_list.append(platform_message.Face(face_type='dice', face_id=int(face_id), face_name='骰子')) chain = platform_message.MessageChain(yiri_msg_list) return chain - - - - class AiocqhttpEventConverter(adapter.EventConverter): @staticmethod async def yiri2target(event: platform_events.MessageEvent, bot_account_id: int): return event.source_platform_object @staticmethod - async def target2yiri(event: aiocqhttp.Event,bot=None): - yiri_chain = await AiocqhttpMessageConverter.target2yiri(event.message, event.message_id,bot) - - + async def target2yiri(event: aiocqhttp.Event, bot=None): + yiri_chain = await AiocqhttpMessageConverter.target2yiri(event.message, event.message_id, bot) if event.message_type == 'group': permission = 'MEMBER' @@ -316,7 +386,6 @@ class AiocqhttpAdapter(adapter.MessagePlatformAdapter): aiocq_msg = (await AiocqhttpMessageConverter.yiri2target(message))[0] if target_type == 'group': - await self.bot.send_group_msg(group_id=int(target_id), message=aiocq_msg) elif target_type == 'person': await self.bot.send_private_msg(user_id=int(target_id), message=aiocq_msg) @@ -345,7 +414,7 @@ class AiocqhttpAdapter(adapter.MessagePlatformAdapter): async def on_message(event: aiocqhttp.Event): self.bot_account_id = event.self_id try: - return await callback(await self.event_converter.target2yiri(event,self.bot), self) + return await callback(await self.event_converter.target2yiri(event, self.bot), self) except Exception: await self.logger.error(f'Error in on_message: {traceback.format_exc()}') traceback.print_exc() diff --git a/pkg/platform/sources/discord.py b/pkg/platform/sources/discord.py index 4f5cac28..6cc09a72 100644 --- a/pkg/platform/sources/discord.py +++ b/pkg/platform/sources/discord.py @@ -8,7 +8,6 @@ import base64 import uuid import os import datetime -import io import aiohttp @@ -78,10 +77,10 @@ class DiscordMessageConverter(adapter.MessageConverter): # 确保路径没有空字节 clean_path = ele.path.replace('\x00', '') clean_path = os.path.abspath(clean_path) - + if not os.path.exists(clean_path): continue # 跳过不存在的文件 - + try: with open(clean_path, 'rb') as f: image_bytes = f.read() @@ -101,12 +100,13 @@ class DiscordMessageConverter(adapter.MessageConverter): filename = f'{uuid.uuid4()}.webp' # 默认保持PNG except Exception as e: - print(f"Error reading image file {clean_path}: {e}") + print(f'Error reading image file {clean_path}: {e}') continue # 跳过读取失败的文件 if image_bytes: # 使用BytesIO创建文件对象,避免路径问题 import io + image_files.append(discord.File(fp=io.BytesIO(image_bytes), filename=filename)) elif isinstance(ele, platform_message.Plain): text_string += ele.text @@ -261,25 +261,25 @@ class DiscordAdapter(adapter.MessagePlatformAdapter): async def send_message(self, target_type: str, target_id: str, message: platform_message.MessageChain): msg_to_send, image_files = await self.message_converter.yiri2target(message) - + try: # 获取频道对象 channel = self.bot.get_channel(int(target_id)) if channel is None: # 如果本地缓存中没有,尝试从API获取 channel = await self.bot.fetch_channel(int(target_id)) - + args = { 'content': msg_to_send, } - + if len(image_files) > 0: args['files'] = image_files - + await channel.send(**args) - + except Exception as e: - await self.logger.error(f"Discord send_message failed: {e}") + await self.logger.error(f'Discord send_message failed: {e}') raise e async def reply_message( diff --git a/pkg/platform/sources/lark.py b/pkg/platform/sources/lark.py index d1116362..f8faf522 100644 --- a/pkg/platform/sources/lark.py +++ b/pkg/platform/sources/lark.py @@ -378,15 +378,15 @@ class LarkAdapter(adapter.MessagePlatformAdapter): if 'im.message.receive_v1' == type: try: event = await self.event_converter.target2yiri(p2v1, self.api_client) - except Exception as e: - await self.logger.error(f"Error in lark callback: {traceback.format_exc()}") + except Exception: + await self.logger.error(f'Error in lark callback: {traceback.format_exc()}') if event.__class__ in self.listeners: await self.listeners[event.__class__](event, self) return {'code': 200, 'message': 'ok'} - except Exception as e: - await self.logger.error(f"Error in lark callback: {traceback.format_exc()}") + except Exception: + await self.logger.error(f'Error in lark callback: {traceback.format_exc()}') return {'code': 500, 'message': 'error'} async def on_message(event: lark_oapi.im.v1.P2ImMessageReceiveV1): diff --git a/pkg/platform/sources/nakuru.py b/pkg/platform/sources/nakuru.py index 389a2db1..16ad54db 100644 --- a/pkg/platform/sources/nakuru.py +++ b/pkg/platform/sources/nakuru.py @@ -72,8 +72,9 @@ class NakuruProjectMessageConverter(adapter_model.MessageConverter): content=content_list, ) nakuru_forward_node_list.append(nakuru_forward_node) - except Exception as e: + except Exception: import traceback + traceback.print_exc() nakuru_msg_list.append(nakuru_forward_node_list) @@ -276,7 +277,7 @@ class NakuruAdapter(adapter_model.MessagePlatformAdapter): # 注册监听器 self.bot.receiver(source_cls.__name__)(listener_wrapper) except Exception as e: - self.logger.error(f"Error in nakuru register_listener: {traceback.format_exc()}") + self.logger.error(f'Error in nakuru register_listener: {traceback.format_exc()}') raise e def unregister_listener( diff --git a/pkg/platform/sources/officialaccount.py b/pkg/platform/sources/officialaccount.py index 030db56d..3fc1e393 100644 --- a/pkg/platform/sources/officialaccount.py +++ b/pkg/platform/sources/officialaccount.py @@ -125,8 +125,8 @@ class OfficialAccountAdapter(adapter.MessagePlatformAdapter): self.bot_account_id = event.receiver_id try: return await callback(await self.event_converter.target2yiri(event), self) - except Exception as e: - await self.logger.error(f"Error in officialaccount callback: {traceback.format_exc()}") + except Exception: + await self.logger.error(f'Error in officialaccount callback: {traceback.format_exc()}') if event_type == platform_events.FriendMessage: self.bot.on_message('text')(on_message) diff --git a/pkg/platform/sources/qqofficial.py b/pkg/platform/sources/qqofficial.py index c61afea4..63ab531f 100644 --- a/pkg/platform/sources/qqofficial.py +++ b/pkg/platform/sources/qqofficial.py @@ -154,10 +154,7 @@ class QQOfficialAdapter(adapter.MessagePlatformAdapter): raise ParamNotEnoughError('QQ官方机器人缺少相关配置项,请查看文档或联系管理员') self.bot = QQOfficialClient( - app_id=config['appid'], - secret=config['secret'], - token=config['token'], - logger=self.logger + app_id=config['appid'], secret=config['secret'], token=config['token'], logger=self.logger ) async def reply_message( @@ -224,8 +221,8 @@ class QQOfficialAdapter(adapter.MessagePlatformAdapter): self.bot_account_id = 'justbot' try: return await callback(await self.event_converter.target2yiri(event), self) - except Exception as e: - await self.logger.error(f"Error in qqofficial callback: {traceback.format_exc()}") + except Exception: + await self.logger.error(f'Error in qqofficial callback: {traceback.format_exc()}') if event_type == platform_events.FriendMessage: self.bot.on_message('DIRECT_MESSAGE_CREATE')(on_message) diff --git a/pkg/platform/sources/slack.py b/pkg/platform/sources/slack.py index 6dfcff59..1bd5aa2d 100644 --- a/pkg/platform/sources/slack.py +++ b/pkg/platform/sources/slack.py @@ -104,7 +104,9 @@ class SlackAdapter(adapter.MessagePlatformAdapter): if missing_keys: raise ParamNotEnoughError('Slack机器人缺少相关配置项,请查看文档或联系管理员') - self.bot = SlackClient(bot_token=self.config['bot_token'], signing_secret=self.config['signing_secret'], logger=self.logger) + self.bot = SlackClient( + bot_token=self.config['bot_token'], signing_secret=self.config['signing_secret'], logger=self.logger + ) async def reply_message( self, @@ -139,8 +141,8 @@ class SlackAdapter(adapter.MessagePlatformAdapter): self.bot_account_id = 'SlackBot' try: return await callback(await self.event_converter.target2yiri(event, self.bot), self) - except Exception as e: - await self.logger.error(f"Error in slack callback: {traceback.format_exc()}") + except Exception: + await self.logger.error(f'Error in slack callback: {traceback.format_exc()}') if event_type == platform_events.FriendMessage: self.bot.on_message('im')(on_message) diff --git a/pkg/platform/sources/telegram.py b/pkg/platform/sources/telegram.py index 266d994e..c2fcc22e 100644 --- a/pkg/platform/sources/telegram.py +++ b/pkg/platform/sources/telegram.py @@ -160,8 +160,8 @@ class TelegramAdapter(adapter.MessagePlatformAdapter): try: lb_event = await self.event_converter.target2yiri(update, self.bot, self.bot_account_id) await self.listeners[type(lb_event)](lb_event, self) - except Exception as e: - await self.logger.error(f"Error in telegram callback: {traceback.format_exc()}") + except Exception: + await self.logger.error(f'Error in telegram callback: {traceback.format_exc()}') self.application = ApplicationBuilder().token(self.config['token']).build() self.bot = self.application.bot diff --git a/pkg/platform/sources/wechatpad.py b/pkg/platform/sources/wechatpad.py index fdd4a69b..75cad727 100644 --- a/pkg/platform/sources/wechatpad.py +++ b/pkg/platform/sources/wechatpad.py @@ -1,5 +1,4 @@ import requests -import websockets import websocket import json import time @@ -10,53 +9,40 @@ from libs.wechatpad_api.client import WeChatPadClient import typing import asyncio import traceback -import time import re import base64 -import uuid -import json -import os import copy -import datetime import threading import quart -import aiohttp from .. import adapter -from ...pipeline.longtext.strategies import forward from ...core import app from ..types import message as platform_message from ..types import events as platform_events from ..types import entities as platform_entities -from ...utils import image from ..logger import EventLogger import xml.etree.ElementTree as ET -from typing import Optional, List, Tuple +from typing import Optional, Tuple from functools import partial import logging -class WeChatPadMessageConverter(adapter.MessageConverter): +class WeChatPadMessageConverter(adapter.MessageConverter): def __init__(self, config: dict): self.config = config - self.bot = WeChatPadClient(self.config["wechatpad_url"],self.config["token"]) - self.logger = logging.getLogger("WeChatPadMessageConverter") + self.bot = WeChatPadClient(self.config['wechatpad_url'], self.config['token']) + self.logger = logging.getLogger('WeChatPadMessageConverter') @staticmethod - async def yiri2target( - message_chain: platform_message.MessageChain - ) -> list[dict]: + async def yiri2target(message_chain: platform_message.MessageChain) -> list[dict]: content_list = [] - current_file_path = os.path.abspath(__file__) - - for component in message_chain: if isinstance(component, platform_message.At): - content_list.append({"type": "at", "target": component.target}) + content_list.append({'type': 'at', 'target': component.target}) elif isinstance(component, platform_message.Plain): - content_list.append({"type": "text", "content": component.text}) + content_list.append({'type': 'text', 'content': component.text}) elif isinstance(component, platform_message.Image): if component.url: async with httpx.AsyncClient() as client: @@ -68,15 +54,16 @@ class WeChatPadMessageConverter(adapter.MessageConverter): else: raise Exception('获取文件失败') # pass - content_list.append({"type": "image", "image": base64_str}) + content_list.append({'type': 'image', 'image': base64_str}) elif component.base64: - content_list.append({"type": "image", "image": component.base64}) + content_list.append({'type': 'image', 'image': component.base64}) elif isinstance(component, platform_message.WeChatEmoji): content_list.append( - {'type': 'WeChatEmoji', 'emoji_md5': component.emoji_md5, 'emoji_size': component.emoji_size}) + {'type': 'WeChatEmoji', 'emoji_md5': component.emoji_md5, 'emoji_size': component.emoji_size} + ) elif isinstance(component, platform_message.Voice): - content_list.append({"type": "voice", "data": component.url, "duration": component.length, "forma": 0}) + content_list.append({'type': 'voice', 'data': component.url, 'duration': component.length, 'forma': 0}) elif isinstance(component, platform_message.WeChatAppMsg): content_list.append({'type': 'WeChatAppMsg', 'app_msg': component.app_msg}) elif isinstance(component, platform_message.Forward): @@ -86,28 +73,23 @@ class WeChatPadMessageConverter(adapter.MessageConverter): return content_list - - async def target2yiri( - self, - message: dict, - bot_account_id: str - ) -> platform_message.MessageChain: + async def target2yiri(self, message: dict, bot_account_id: str) -> platform_message.MessageChain: """外部消息转平台消息""" # 数据预处理 message_list = [] ats_bot = False # 是否被@ - content = message["content"]["str"] + content = message['content']['str'] content_no_preifx = content # 群消息则去掉前缀 is_group_message = self._is_group_message(message) if is_group_message: ats_bot = self._ats_bot(message, bot_account_id) - if "@所有人" in content: + if '@所有人' in content: message_list.append(platform_message.AtAll()) elif ats_bot: message_list.append(platform_message.At(target=bot_account_id)) content_no_preifx, _ = self._extract_content_and_sender(content) - msg_type = message["msg_type"] + msg_type = message['msg_type'] # 映射消息类型到处理器方法 handler_map = { @@ -129,11 +111,7 @@ class WeChatPadMessageConverter(adapter.MessageConverter): return platform_message.MessageChain(message_list) - async def _handler_text( - self, - message: Optional[dict], - content_no_preifx: str - ) -> platform_message.MessageChain: + async def _handler_text(self, message: Optional[dict], content_no_preifx: str) -> platform_message.MessageChain: """处理文本消息 (msg_type=1)""" if message and self._is_group_message(message): pattern = r'@\S{1,20}' @@ -141,16 +119,12 @@ class WeChatPadMessageConverter(adapter.MessageConverter): return platform_message.MessageChain([platform_message.Plain(content_no_preifx)]) - async def _handler_image( - self, - message: Optional[dict], - content_no_preifx: str - ) -> platform_message.MessageChain: + async def _handler_image(self, message: Optional[dict], content_no_preifx: str) -> platform_message.MessageChain: """处理图像消息 (msg_type=3)""" try: image_xml = content_no_preifx if not image_xml: - return platform_message.MessageChain([platform_message.Unknown("[图片内容为空]")]) + return platform_message.MessageChain([platform_message.Unknown('[图片内容为空]')]) root = ET.fromstring(image_xml) # 提取img标签的属性 @@ -160,28 +134,22 @@ class WeChatPadMessageConverter(adapter.MessageConverter): cdnthumburl = img_tag.get('cdnthumburl') # cdnmidimgurl = img_tag.get('cdnmidimgurl') - image_data = self.bot.cdn_download(aeskey=aeskey, file_type=1, file_url=cdnthumburl) - if image_data["Data"]['FileData'] == '': + if image_data['Data']['FileData'] == '': image_data = self.bot.cdn_download(aeskey=aeskey, file_type=2, file_url=cdnthumburl) - base64_str = image_data["Data"]['FileData'] + base64_str = image_data['Data']['FileData'] # self.logger.info(f"data:image/png;base64,{base64_str}") - elements = [ - platform_message.Image(base64=f"data:image/png;base64,{base64_str}"), + platform_message.Image(base64=f'data:image/png;base64,{base64_str}'), # platform_message.WeChatForwardImage(xml_data=image_xml) # 微信消息转发 ] return platform_message.MessageChain(elements) except Exception as e: - self.logger.error(f"处理图片失败: {str(e)}") - return platform_message.MessageChain([platform_message.Unknown("[图片处理失败]")]) + self.logger.error(f'处理图片失败: {str(e)}') + return platform_message.MessageChain([platform_message.Unknown('[图片处理失败]')]) - async def _handler_voice( - self, - message: Optional[dict], - content_no_preifx: str - ) -> platform_message.MessageChain: + async def _handler_voice(self, message: Optional[dict], content_no_preifx: str) -> platform_message.MessageChain: """处理语音消息 (msg_type=34)""" message_List = [] try: @@ -197,39 +165,33 @@ class WeChatPadMessageConverter(adapter.MessageConverter): bufid = voicemsg.get('bufid') length = voicemsg.get('voicelength') voice_data = self.bot.get_msg_voice(buf_id=str(bufid), length=int(length), msgid=str(new_msg_id)) - audio_base64 = voice_data["Data"]['Base64'] + audio_base64 = voice_data['Data']['Base64'] # 验证语音数据有效性 if not audio_base64: - message_List.append(platform_message.Unknown(text="[语音内容为空]")) + message_List.append(platform_message.Unknown(text='[语音内容为空]')) return platform_message.MessageChain(message_List) # 转换为平台支持的语音格式(如 Silk 格式) - voice_element = platform_message.Voice( - base64=f"data:audio/silk;base64,{audio_base64}" - ) + voice_element = platform_message.Voice(base64=f'data:audio/silk;base64,{audio_base64}') message_List.append(voice_element) except KeyError as e: - self.logger.error(f"语音数据字段缺失: {str(e)}") - message_List.append(platform_message.Unknown(text="[语音数据解析失败]")) + self.logger.error(f'语音数据字段缺失: {str(e)}') + message_List.append(platform_message.Unknown(text='[语音数据解析失败]')) except Exception as e: - self.logger.error(f"处理语音消息异常: {str(e)}") - message_List.append(platform_message.Unknown(text="[语音处理失败]")) + self.logger.error(f'处理语音消息异常: {str(e)}') + message_List.append(platform_message.Unknown(text='[语音处理失败]')) return platform_message.MessageChain(message_List) - async def _handler_compound( - self, - message: Optional[dict], - content_no_preifx: str - ) -> platform_message.MessageChain: + async def _handler_compound(self, message: Optional[dict], content_no_preifx: str) -> platform_message.MessageChain: """处理复合消息 (msg_type=49),根据子类型分派""" try: xml_data = ET.fromstring(content_no_preifx) appmsg_data = xml_data.find('.//appmsg') if appmsg_data: - data_type = appmsg_data.findtext('.//type', "") + data_type = appmsg_data.findtext('.//type', '') # 二次分派处理器 sub_handler_map = { '57': self._handler_compound_quote, @@ -238,9 +200,9 @@ class WeChatPadMessageConverter(adapter.MessageConverter): '74': self._handler_compound_file, '33': self._handler_compound_mini_program, '36': self._handler_compound_mini_program, - '2000': partial(self._handler_compound_unsupported, text="[转账消息]"), - '2001': partial(self._handler_compound_unsupported, text="[红包消息]"), - '51': partial(self._handler_compound_unsupported, text="[视频号消息]"), + '2000': partial(self._handler_compound_unsupported, text='[转账消息]'), + '2001': partial(self._handler_compound_unsupported, text='[红包消息]'), + '51': partial(self._handler_compound_unsupported, text='[视频号消息]'), } handler = sub_handler_map.get(data_type, self._handler_compound_unsupported) @@ -251,56 +213,54 @@ class WeChatPadMessageConverter(adapter.MessageConverter): else: return platform_message.MessageChain([platform_message.Unknown(text=content_no_preifx)]) except Exception as e: - self.logger.error(f"解析复合消息失败: {str(e)}") + self.logger.error(f'解析复合消息失败: {str(e)}') return platform_message.MessageChain([platform_message.Unknown(text=content_no_preifx)]) async def _handler_compound_quote( - self, - message: Optional[dict], - xml_data: ET.Element + self, message: Optional[dict], xml_data: ET.Element ) -> platform_message.MessageChain: """处理引用消息 (data_type=57)""" message_list = [] -# self.logger.info("_handler_compound_quote", ET.tostring(xml_data, encoding='unicode')) + # self.logger.info("_handler_compound_quote", ET.tostring(xml_data, encoding='unicode')) appmsg_data = xml_data.find('.//appmsg') - quote_data = "" # 引用原文 + quote_data = '' # 引用原文 quote_id = None # 引用消息的原发送者 tousername = None # 接收方: 所属微信的wxid - user_data = "" # 用户消息 + user_data = '' # 用户消息 sender_id = xml_data.findtext('.//fromusername') # 发送方:单聊用户/群member # 引用消息转发 if appmsg_data: - user_data = appmsg_data.findtext('.//title') or "" + user_data = appmsg_data.findtext('.//title') or '' quote_data = appmsg_data.find('.//refermsg').findtext('.//content') quote_id = appmsg_data.find('.//refermsg').findtext('.//chatusr') - message_list.append( - platform_message.WeChatAppMsg( - app_msg=ET.tostring(appmsg_data, encoding='unicode')) - ) + message_list.append(platform_message.WeChatAppMsg(app_msg=ET.tostring(appmsg_data, encoding='unicode'))) if message: - tousername = message['to_user_name']["str"] - + tousername = message['to_user_name']['str'] + + _ = quote_id + _ = tousername + if quote_data: quote_data_message_list = platform_message.MessageChain() # 文本消息 try: - if "" not in quote_data: + if '' not in quote_data: quote_data_message_list.append(platform_message.Plain(quote_data)) else: # 引用消息展开 quote_data_xml = ET.fromstring(quote_data) - if quote_data_xml.find("img"): + if quote_data_xml.find('img'): quote_data_message_list.extend(await self._handler_image(None, quote_data)) - elif quote_data_xml.find("voicemsg"): + elif quote_data_xml.find('voicemsg'): quote_data_message_list.extend(await self._handler_voice(None, quote_data)) - elif quote_data_xml.find("videomsg"): + elif quote_data_xml.find('videomsg'): quote_data_message_list.extend(await self._handler_default(None, quote_data)) # 先不处理 else: # appmsg quote_data_message_list.extend(await self._handler_compound(None, quote_data)) except Exception as e: - self.logger.error(f"处理引用消息异常 expcetion:{e}") + self.logger.error(f'处理引用消息异常 expcetion:{e}') quote_data_message_list.append(platform_message.Plain(quote_data)) message_list.append( platform_message.Quote( @@ -315,15 +275,11 @@ class WeChatPadMessageConverter(adapter.MessageConverter): return platform_message.MessageChain(message_list) - async def _handler_compound_file( - self, - message: dict, - xml_data: ET.Element - ) -> platform_message.MessageChain: + async def _handler_compound_file(self, message: dict, xml_data: ET.Element) -> platform_message.MessageChain: """处理文件消息 (data_type=6)""" file_data = xml_data.find('.//appmsg') - if file_data.findtext('.//type', "") == "74": + if file_data.findtext('.//type', '') == '74': return None else: @@ -346,22 +302,21 @@ class WeChatPadMessageConverter(adapter.MessageConverter): file_data = self.bot.cdn_download(aeskey=aeskey, file_type=5, file_url=cdnthumburl) - file_base64 = file_data["Data"]['FileData'] + file_base64 = file_data['Data']['FileData'] # print(file_data) - file_size = file_data["Data"]['TotalSize'] + file_size = file_data['Data']['TotalSize'] # print(file_base64) - return platform_message.MessageChain([ - platform_message.WeChatFile(file_id=file_id, file_name=file_name, file_size=file_size, - file_base64=file_base64), - platform_message.WeChatForwardFile(xml_data=xml_data_str) - ]) + return platform_message.MessageChain( + [ + platform_message.WeChatFile( + file_id=file_id, file_name=file_name, file_size=file_size, file_base64=file_base64 + ), + platform_message.WeChatForwardFile(xml_data=xml_data_str), + ] + ) - async def _handler_compound_link( - self, - message: dict, - xml_data: ET.Element - ) -> platform_message.MessageChain: + async def _handler_compound_link(self, message: dict, xml_data: ET.Element) -> platform_message.MessageChain: """处理链接消息(如公众号文章、外部网页)""" message_list = [] try: @@ -374,56 +329,38 @@ class WeChatPadMessageConverter(adapter.MessageConverter): link_title=appmsg.findtext('title', ''), link_desc=appmsg.findtext('des', ''), link_url=appmsg.findtext('url', ''), - link_thumb_url=appmsg.findtext("thumburl", '') # 这个字段拿不到 + link_thumb_url=appmsg.findtext('thumburl', ''), # 这个字段拿不到 ) ) # 还没有发链接的接口, 暂时还需要自己构造appmsg, 先用WeChatAppMsg。 - message_list.append( - platform_message.WeChatAppMsg( - app_msg=ET.tostring(appmsg, encoding='unicode') - ) - ) + message_list.append(platform_message.WeChatAppMsg(app_msg=ET.tostring(appmsg, encoding='unicode'))) except Exception as e: - self.logger.error(f"解析链接消息失败: {str(e)}") + self.logger.error(f'解析链接消息失败: {str(e)}') return platform_message.MessageChain(message_list) async def _handler_compound_mini_program( - self, - message: dict, - xml_data: ET.Element + self, message: dict, xml_data: ET.Element ) -> platform_message.MessageChain: """处理小程序消息(如小程序卡片、服务通知)""" xml_data_str = ET.tostring(xml_data, encoding='unicode') - return platform_message.MessageChain([ - platform_message.WeChatForwardMiniPrograms(xml_data=xml_data_str) - ]) + return platform_message.MessageChain([platform_message.WeChatForwardMiniPrograms(xml_data=xml_data_str)]) - async def _handler_default( - self, - message: Optional[dict], - content_no_preifx: str - ) -> platform_message.MessageChain: + async def _handler_default(self, message: Optional[dict], content_no_preifx: str) -> platform_message.MessageChain: """处理未知消息类型""" if message: - msg_type = message["msg_type"] + msg_type = message['msg_type'] else: - msg_type = "" - return platform_message.MessageChain([ - platform_message.Unknown(text=f"[未知消息类型 msg_type:{msg_type}]") - ]) + msg_type = '' + return platform_message.MessageChain([platform_message.Unknown(text=f'[未知消息类型 msg_type:{msg_type}]')]) def _handler_compound_unsupported( - self, - message: dict, - xml_data: str, - text: Optional[str] = None + self, message: dict, xml_data: str, text: Optional[str] = None ) -> platform_message.MessageChain: """处理未支持复合消息类型(msg_type=49)子类型""" if not text: - text = f"[xml_data={xml_data}]" + text = f'[xml_data={xml_data}]' content_list = [] - content_list.append( - platform_message.Unknown(text=f"[处理未支持复合消息类型[msg_type=49]|{text}")) + content_list.append(platform_message.Unknown(text=f'[处理未支持复合消息类型[msg_type=49]|{text}')) return platform_message.MessageChain(content_list) @@ -432,7 +369,7 @@ class WeChatPadMessageConverter(adapter.MessageConverter): ats_bot = False try: to_user_name = message['to_user_name']['str'] # 接收方: 所属微信的wxid - raw_content = message["content"]["str"] # 原始消息内容 + raw_content = message['content']['str'] # 原始消息内容 content_no_prefix, _ = self._extract_content_and_sender(raw_content) # 直接艾特机器人(这个有bug,当被引用的消息里面有@bot,会套娃 # ats_bot = ats_bot or (f"@{bot_account_id}" in content_no_prefix) @@ -443,7 +380,7 @@ class WeChatPadMessageConverter(adapter.MessageConverter): msg_source = message.get('msg_source', '') or '' if len(msg_source) > 0: msg_source_data = ET.fromstring(msg_source) - at_user_list = msg_source_data.findtext("atuserlist") or "" + at_user_list = msg_source_data.findtext('atuserlist') or '' ats_bot = ats_bot or (to_user_name in at_user_list) # 引用bot if message.get('msg_type', 0) == 49: @@ -454,7 +391,7 @@ class WeChatPadMessageConverter(adapter.MessageConverter): quote_id = appmsg_data.find('.//refermsg').findtext('.//chatusr') # 引用消息的原发送者 ats_bot = ats_bot or (quote_id == tousername) except Exception as e: - self.logger.error(f"_ats_bot got except: {e}") + self.logger.error(f'_ats_bot got except: {e}') finally: return ats_bot @@ -463,47 +400,41 @@ class WeChatPadMessageConverter(adapter.MessageConverter): try: # 检查消息开头,如果有 wxid_sbitaz0mt65n22:\n 则删掉 # add: 有些用户的wxid不是上述格式。换成user_name: - regex = re.compile(r"^[a-zA-Z0-9_\-]{5,20}:") - line_split = raw_content.split("\n") + regex = re.compile(r'^[a-zA-Z0-9_\-]{5,20}:') + line_split = raw_content.split('\n') if len(line_split) > 0 and regex.match(line_split[0]): - raw_content = "\n".join(line_split[1:]) - sender_id = line_split[0].strip(":") + raw_content = '\n'.join(line_split[1:]) + sender_id = line_split[0].strip(':') return raw_content, sender_id except Exception as e: - self.logger.error(f"_extract_content_and_sender got except: {e}") + self.logger.error(f'_extract_content_and_sender got except: {e}') finally: return raw_content, None # 是否是群消息 def _is_group_message(self, message: dict) -> bool: from_user_name = message['from_user_name']['str'] - return from_user_name.endswith("@chatroom") + return from_user_name.endswith('@chatroom') class WeChatPadEventConverter(adapter.EventConverter): - def __init__(self, config: dict): self.config = config self.message_converter = WeChatPadMessageConverter(config) - self.logger = logging.getLogger("WeChatPadEventConverter") - + self.logger = logging.getLogger('WeChatPadEventConverter') + @staticmethod - async def yiri2target( - event: platform_events.MessageEvent - ) -> dict: + async def yiri2target(event: platform_events.MessageEvent) -> dict: pass - async def target2yiri( - self, - event: dict, - bot_account_id: str - ) -> platform_events.MessageEvent: - + async def target2yiri(self, event: dict, bot_account_id: str) -> platform_events.MessageEvent: # 排除公众号以及微信团队消息 - if event['from_user_name']['str'].startswith('gh_') \ - or event['from_user_name']['str']=='weixin'\ - or event['from_user_name']['str'] == "newsapp"\ - or event['from_user_name']['str'] == self.config["wxid"]: + if ( + event['from_user_name']['str'].startswith('gh_') + or event['from_user_name']['str'] == 'weixin' + or event['from_user_name']['str'] == 'newsapp' + or event['from_user_name']['str'] == self.config['wxid'] + ): return None message_chain = await self.message_converter.target2yiri(copy.deepcopy(event), bot_account_id) @@ -512,7 +443,7 @@ class WeChatPadEventConverter(adapter.EventConverter): if '@chatroom' in event['from_user_name']['str']: # 找出开头的 wxid_ 字符串,以:结尾 - sender_wxid = event['content']['str'].split(":")[0] + sender_wxid = event['content']['str'].split(':')[0] return platform_events.GroupMessage( sender=platform_entities.GroupMember( @@ -524,13 +455,13 @@ class WeChatPadEventConverter(adapter.EventConverter): name=event['from_user_name']['str'], permission=platform_entities.Permission.Member, ), - special_title="", + special_title='', join_timestamp=0, last_speak_timestamp=0, mute_time_remaining=0, ), message_chain=message_chain, - time=event["create_time"], + time=event['create_time'], source_platform_object=event, ) else: @@ -541,13 +472,13 @@ class WeChatPadEventConverter(adapter.EventConverter): remark='', ), message_chain=message_chain, - time=event["create_time"], + time=event['create_time'], source_platform_object=event, ) class WeChatPadAdapter(adapter.MessagePlatformAdapter): - name: str = "WeChatPad" # 定义适配器名称 + name: str = 'WeChatPad' # 定义适配器名称 bot: WeChatPadClient quart_app: quart.Quart @@ -580,27 +511,21 @@ class WeChatPadAdapter(adapter.MessagePlatformAdapter): # self.ap.logger.debug(f"Gewechat callback event: {data}") # print(data) - try: event = await self.event_converter.target2yiri(data.copy(), self.bot_account_id) - except Exception as e: - await self.logger.error(f"Error in wechatpad callback: {traceback.format_exc()}") + except Exception: + await self.logger.error(f'Error in wechatpad callback: {traceback.format_exc()}') if event.__class__ in self.listeners: await self.listeners[event.__class__](event, self) return 'ok' - - async def _handle_message( - self, - message: platform_message.MessageChain, - target_id: str - ): + async def _handle_message(self, message: platform_message.MessageChain, target_id: str): """统一消息处理核心逻辑""" content_list = await self.message_converter.yiri2target(message) # print(content_list) - at_targets = [item["target"] for item in content_list if item["type"] == "at"] + at_targets = [item['target'] for item in content_list if item['type'] == 'at'] # print(at_targets) # 处理@逻辑 at_targets = at_targets or [] @@ -608,7 +533,7 @@ class WeChatPadAdapter(adapter.MessagePlatformAdapter): if at_targets: member_info = self.bot.get_chatroom_member_detail( target_id, - )["Data"]["member_data"]["chatroom_member_list"] + )['Data']['member_data']['chatroom_member_list'] # 处理消息组件 for msg in content_list: @@ -616,63 +541,51 @@ class WeChatPadAdapter(adapter.MessagePlatformAdapter): if msg['type'] == 'text' and at_targets: at_nick_name_list = [] for member in member_info: - if member["user_name"] in at_targets: + if member['user_name'] in at_targets: at_nick_name_list.append(f'@{member["nick_name"]}') msg['content'] = f'{" ".join(at_nick_name_list)} {msg["content"]}' # 统一消息派发 handler_map = { 'text': lambda msg: self.bot.send_text_message( - to_wxid=target_id, - message=msg['content'], - ats=at_targets + to_wxid=target_id, message=msg['content'], ats=at_targets ), 'image': lambda msg: self.bot.send_image_message( - to_wxid=target_id, - img_url=msg["image"], - ats = at_targets + to_wxid=target_id, img_url=msg['image'], ats=at_targets ), 'WeChatEmoji': lambda msg: self.bot.send_emoji_message( - to_wxid=target_id, - emoji_md5=msg['emoji_md5'], - emoji_size=msg['emoji_size'] + to_wxid=target_id, emoji_md5=msg['emoji_md5'], emoji_size=msg['emoji_size'] ), - 'voice': lambda msg: self.bot.send_voice_message( to_wxid=target_id, voice_data=msg['data'], - voice_duration=msg["duration"], - voice_forma=msg["forma"], + voice_duration=msg['duration'], + voice_forma=msg['forma'], ), 'WeChatAppMsg': lambda msg: self.bot.send_app_message( to_wxid=target_id, app_message=msg['app_msg'], type=0, ), - 'at': lambda msg: None + 'at': lambda msg: None, } if handler := handler_map.get(msg['type']): handler(msg) # self.ap.logger.warning(f"未处理的消息类型: {ret}") else: - self.ap.logger.warning(f"未处理的消息类型: {msg['type']}") + self.ap.logger.warning(f'未处理的消息类型: {msg["type"]}') continue - async def send_message( - self, - target_type: str, - target_id: str, - message: platform_message.MessageChain - ): + async def send_message(self, target_type: str, target_id: str, message: platform_message.MessageChain): """主动发送消息""" return await self._handle_message(message, target_id) async def reply_message( - self, - message_source: platform_events.MessageEvent, - message: platform_message.MessageChain, - quote_origin: bool = False + self, + message_source: platform_events.MessageEvent, + message: platform_message.MessageChain, + quote_origin: bool = False, ): """回复消息""" if message_source.source_platform_object: @@ -683,58 +596,49 @@ class WeChatPadAdapter(adapter.MessagePlatformAdapter): pass def register_listener( - self, - event_type: typing.Type[platform_events.Event], - callback: typing.Callable[[platform_events.Event, adapter.MessagePlatformAdapter], None] + self, + event_type: typing.Type[platform_events.Event], + callback: typing.Callable[[platform_events.Event, adapter.MessagePlatformAdapter], None], ): self.listeners[event_type] = callback def unregister_listener( - self, - event_type: typing.Type[platform_events.Event], - callback: typing.Callable[[platform_events.Event, adapter.MessagePlatformAdapter], None] + self, + event_type: typing.Type[platform_events.Event], + callback: typing.Callable[[platform_events.Event, adapter.MessagePlatformAdapter], None], ): pass async def run_async(self): - - if not self.config["admin_key"] and not self.config["token"]: - raise RuntimeError("无wechatpad管理密匙,请填入配置文件后重启") + if not self.config['admin_key'] and not self.config['token']: + raise RuntimeError('无wechatpad管理密匙,请填入配置文件后重启') else: - if self.config["token"]: - self.bot = WeChatPadClient( - self.config['wechatpad_url'], - self.config["token"] - ) + if self.config['token']: + self.bot = WeChatPadClient(self.config['wechatpad_url'], self.config['token']) data = self.bot.get_login_status() self.ap.logger.info(data) - if data["Code"] == 300 and data["Text"] == "你已退出微信": + if data['Code'] == 300 and data['Text'] == '你已退出微信': response = requests.post( - f"{self.config['wechatpad_url']}/admin/GenAuthKey1?key={self.config['admin_key']}", - json={"Count": 1, "Days": 365} + f'{self.config["wechatpad_url"]}/admin/GenAuthKey1?key={self.config["admin_key"]}', + json={'Count': 1, 'Days': 365}, ) if response.status_code != 200: - raise Exception(f"获取token失败: {response.text}") - self.config["token"] = response.json()["Data"][0] + raise Exception(f'获取token失败: {response.text}') + self.config['token'] = response.json()['Data'][0] - elif not self.config["token"]: + elif not self.config['token']: response = requests.post( - f"{self.config['wechatpad_url']}/admin/GenAuthKey1?key={self.config['admin_key']}", - json={"Count": 1, "Days": 365} + f'{self.config["wechatpad_url"]}/admin/GenAuthKey1?key={self.config["admin_key"]}', + json={'Count': 1, 'Days': 365}, ) if response.status_code != 200: - raise Exception(f"获取token失败: {response.text}") - self.config["token"] = response.json()["Data"][0] + raise Exception(f'获取token失败: {response.text}') + self.config['token'] = response.json()['Data'][0] - self.bot = WeChatPadClient( - self.config['wechatpad_url'], - self.config["token"], - logger=self.logger - ) - self.ap.logger.info(self.config["token"]) + self.bot = WeChatPadClient(self.config['wechatpad_url'], self.config['token'], logger=self.logger) + self.ap.logger.info(self.config['token']) thread_1 = threading.Event() - def wechat_login_process(): # 不登录,这些先注释掉,避免登陆态尝试拉qrcode。 # login_data =self.bot.get_login_qr() @@ -742,67 +646,54 @@ class WeChatPadAdapter(adapter.MessagePlatformAdapter): # url = login_data['Data']["QrCodeUrl"] # self.ap.logger.info(login_data) - - profile =self.bot.get_profile() + profile = self.bot.get_profile() self.ap.logger.info(profile) - self.bot_account_id = profile["Data"]["userInfo"]["nickName"]["str"] - self.config["wxid"] = profile["Data"]["userInfo"]["userName"]["str"] + self.bot_account_id = profile['Data']['userInfo']['nickName']['str'] + self.config['wxid'] = profile['Data']['userInfo']['userName']['str'] thread_1.set() - # asyncio.create_task(wechat_login_process) threading.Thread(target=wechat_login_process).start() def connect_websocket_sync() -> None: - thread_1.wait() - uri = f"{self.config['wechatpad_ws']}/GetSyncMsg?key={self.config['token']}" - self.ap.logger.info(f"Connecting to WebSocket: {uri}") + uri = f'{self.config["wechatpad_ws"]}/GetSyncMsg?key={self.config["token"]}' + self.ap.logger.info(f'Connecting to WebSocket: {uri}') + def on_message(ws, message): try: data = json.loads(message) - self.ap.logger.debug(f"Received message: {data}") + self.ap.logger.debug(f'Received message: {data}') # 这里需要确保ws_message是同步的,或者使用asyncio.run调用异步方法 asyncio.run(self.ws_message(data)) except json.JSONDecodeError: - self.ap.logger.error(f"Non-JSON message: {message[:100]}...") + self.ap.logger.error(f'Non-JSON message: {message[:100]}...') def on_error(ws, error): - self.ap.logger.error(f"WebSocket error: {str(error)[:200]}") + self.ap.logger.error(f'WebSocket error: {str(error)[:200]}') def on_close(ws, close_status_code, close_msg): - self.ap.logger.info("WebSocket closed, reconnecting...") + self.ap.logger.info('WebSocket closed, reconnecting...') time.sleep(5) connect_websocket_sync() # 自动重连 def on_open(ws): - self.ap.logger.info("WebSocket connected successfully!") + self.ap.logger.info('WebSocket connected successfully!') ws = websocket.WebSocketApp( - uri, - on_message=on_message, - on_error=on_error, - on_close=on_close, - on_open=on_open - ) - ws.run_forever( - ping_interval=60, - ping_timeout=20 + uri, on_message=on_message, on_error=on_error, on_close=on_close, on_open=on_open ) + ws.run_forever(ping_interval=60, ping_timeout=20) # 直接调用同步版本(会阻塞) # connect_websocket_sync() # 这行代码会在WebSocket连接断开后才会执行 # self.ap.logger.info("WebSocket client thread started") - thread = threading.Thread( - target=connect_websocket_sync, - name="WebSocketClientThread", - daemon=True - ) + thread = threading.Thread(target=connect_websocket_sync, name='WebSocketClientThread', daemon=True) thread.start() - self.ap.logger.info("WebSocket client thread started") + self.ap.logger.info('WebSocket client thread started') async def kill(self) -> bool: pass diff --git a/pkg/platform/sources/wecom.py b/pkg/platform/sources/wecom.py index f1cc677e..7be05a85 100644 --- a/pkg/platform/sources/wecom.py +++ b/pkg/platform/sources/wecom.py @@ -157,7 +157,7 @@ class WecomAdapter(adapter.MessagePlatformAdapter): token=config['token'], EncodingAESKey=config['EncodingAESKey'], contacts_secret=config['contacts_secret'], - logger=self.logger + logger=self.logger, ) async def reply_message( @@ -201,8 +201,8 @@ class WecomAdapter(adapter.MessagePlatformAdapter): self.bot_account_id = event.receiver_id try: return await callback(await self.event_converter.target2yiri(event), self) - except Exception as e: - await self.logger.error(f"Error in wecom callback: {traceback.format_exc()}") + except Exception: + await self.logger.error(f'Error in wecom callback: {traceback.format_exc()}') if event_type == platform_events.FriendMessage: self.bot.on_message('text')(on_message) diff --git a/pkg/platform/sources/wecomcs.py b/pkg/platform/sources/wecomcs.py index aab8d394..da84ac6d 100644 --- a/pkg/platform/sources/wecomcs.py +++ b/pkg/platform/sources/wecomcs.py @@ -145,7 +145,7 @@ class WecomCSAdapter(adapter.MessagePlatformAdapter): secret=config['secret'], token=config['token'], EncodingAESKey=config['EncodingAESKey'], - logger=self.logger + logger=self.logger, ) async def reply_message( @@ -178,8 +178,8 @@ class WecomCSAdapter(adapter.MessagePlatformAdapter): self.bot_account_id = event.receiver_id try: return await callback(await self.event_converter.target2yiri(event), self) - except Exception as e: - await self.logger.error(f"Error in wecomcs callback: {traceback.format_exc()}") + except Exception: + await self.logger.error(f'Error in wecomcs callback: {traceback.format_exc()}') if event_type == platform_events.FriendMessage: self.bot.on_message('text')(on_message) diff --git a/pkg/rag/knowledge/services/database.py b/pkg/rag/knowledge/services/database.py index a8c35883..35a52453 100644 --- a/pkg/rag/knowledge/services/database.py +++ b/pkg/rag/knowledge/services/database.py @@ -1,19 +1,20 @@ from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, ForeignKey, LargeBinary from sqlalchemy.orm import declarative_base, sessionmaker, relationship from datetime import datetime -import numpy as np # 用于处理从LargeBinary转换回来的embedding Base = declarative_base() + class KnowledgeBase(Base): __tablename__ = 'kb' id = Column(Integer, primary_key=True, index=True) name = Column(String, index=True) description = Column(Text) created_at = Column(DateTime, default=datetime.utcnow) - embedding_model = Column(String, default="") # 默认嵌入模型 + embedding_model = Column(String, default='') # 默认嵌入模型 top_k = Column(Integer, default=5) # 默认返回的top_k数量 - files = relationship("File", back_populates="knowledge_base") + files = relationship('File', back_populates='knowledge_base') + class File(Base): __tablename__ = 'file' @@ -24,8 +25,9 @@ class File(Base): created_at = Column(DateTime, default=datetime.utcnow) file_type = Column(String) status = Column(Integer, default=0) # 0: 未处理, 1: 处理中, 2: 已处理, 3: 错误 - knowledge_base = relationship("KnowledgeBase", back_populates="files") - chunks = relationship("Chunk", back_populates="file") + knowledge_base = relationship('KnowledgeBase', back_populates='files') + chunks = relationship('Chunk', back_populates='file') + class Chunk(Base): __tablename__ = 'chunks' @@ -33,26 +35,30 @@ class Chunk(Base): file_id = Column(Integer, ForeignKey('file.id')) text = Column(Text) - file = relationship("File", back_populates="chunks") - vector = relationship("Vector", uselist=False, back_populates="chunk") # One-to-one + file = relationship('File', back_populates='chunks') + vector = relationship('Vector', uselist=False, back_populates='chunk') # One-to-one + class Vector(Base): __tablename__ = 'vectors' id = Column(Integer, primary_key=True, index=True) chunk_id = Column(Integer, ForeignKey('chunks.id'), unique=True) - embedding = Column(LargeBinary) # Store embeddings as binary + embedding = Column(LargeBinary) # Store embeddings as binary + + chunk = relationship('Chunk', back_populates='vector') - chunk = relationship("Chunk", back_populates="vector") # 数据库连接 -DATABASE_URL = "sqlite:///./knowledge_base.db" # 生产环境请更换为 PostgreSQL/MySQL -engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False} if "sqlite" in DATABASE_URL else {}) +DATABASE_URL = 'sqlite:///./knowledge_base.db' # 生产环境请更换为 PostgreSQL/MySQL +engine = create_engine(DATABASE_URL, connect_args={'check_same_thread': False} if 'sqlite' in DATABASE_URL else {}) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + # 创建所有表 (可以在应用启动时执行一次) def create_db_and_tables(): Base.metadata.create_all(bind=engine) - print("Database tables created/checked.") + print('Database tables created/checked.') + # 定义嵌入维度(请根据你实际使用的模型调整) -EMBEDDING_DIM = 1024 \ No newline at end of file +EMBEDDING_DIM = 1024 diff --git a/pkg/rag/knowledge/services/embedding_models.py b/pkg/rag/knowledge/services/embedding_models.py index a6ce73ae..7301d640 100644 --- a/pkg/rag/knowledge/services/embedding_models.py +++ b/pkg/rag/knowledge/services/embedding_models.py @@ -1,14 +1,15 @@ # services/embedding_models.py import os -from typing import Dict, Any, List, Type, Optional +from typing import Dict, Any, List import logging -import aiohttp # Import aiohttp for asynchronous requests +import aiohttp # Import aiohttp for asynchronous requests import asyncio from sentence_transformers import SentenceTransformer logger = logging.getLogger(__name__) + # Base class for all embedding models class BaseEmbeddingModel: def __init__(self, model_name: str): @@ -27,9 +28,10 @@ class BaseEmbeddingModel: def embedding_dimension(self) -> int: """Returns the embedding dimension of the model.""" if self._embedding_dimension is None: - raise NotImplementedError("Embedding dimension not set for this model.") + raise NotImplementedError('Embedding dimension not set for this model.') return self._embedding_dimension - + + class EmbeddingModelFactory: @staticmethod def create_model(model_type: str, model_name_key: str) -> BaseEmbeddingModel: @@ -39,26 +41,29 @@ class EmbeddingModelFactory: """ if model_name_key not in EMBEDDING_MODEL_CONFIGS: raise ValueError(f"Embedding model configuration '{model_name_key}' not found in EMBEDDING_MODEL_CONFIGS.") - + config = EMBEDDING_MODEL_CONFIGS[model_name_key] - - if config['type'] == "third_party_api": + + if config['type'] == 'third_party_api': required_keys = ['api_endpoint', 'headers', 'payload_template', 'embedding_dimension'] if not all(key in config for key in required_keys): - raise ValueError(f"Missing configuration keys for third_party_api model '{model_name_key}'. Required: {required_keys}") - + raise ValueError( + f"Missing configuration keys for third_party_api model '{model_name_key}'. Required: {required_keys}" + ) + # Retrieve model_name from config if it differs from model_name_key # Some APIs expect a specific 'model' value in the payload that might be different from the key - api_model_name = config.get('model_name', model_name_key) + api_model_name = config.get('model_name', model_name_key) return ThirdPartyAPIEmbeddingModel( - model_name=api_model_name, # Use the model_name from config or the key + model_name=api_model_name, # Use the model_name from config or the key api_endpoint=config['api_endpoint'], headers=config['headers'], payload_template=config['payload_template'], - embedding_dimension=config['embedding_dimension'] + embedding_dimension=config['embedding_dimension'], ) + class SentenceTransformerEmbeddingModel(BaseEmbeddingModel): def __init__(self, model_name: str): super().__init__(model_name) @@ -68,9 +73,11 @@ class SentenceTransformerEmbeddingModel(BaseEmbeddingModel): # if not run in a separate thread/process, but this keeps the API consistent. self.model = SentenceTransformer(model_name) self._embedding_dimension = self.model.get_sentence_embedding_dimension() - logger.info(f"Initialized SentenceTransformer model '{model_name}' with dimension {self._embedding_dimension}") + logger.info( + f"Initialized SentenceTransformer model '{model_name}' with dimension {self._embedding_dimension}" + ) except Exception as e: - logger.error(f"Failed to load SentenceTransformer model {model_name}: {e}") + logger.error(f'Failed to load SentenceTransformer model {model_name}: {e}') raise async def embed_documents(self, texts: List[str]) -> List[List[float]]: @@ -84,14 +91,23 @@ class SentenceTransformerEmbeddingModel(BaseEmbeddingModel): class ThirdPartyAPIEmbeddingModel(BaseEmbeddingModel): - def __init__(self, model_name: str, api_endpoint: str, headers: Dict[str, str], payload_template: Dict[str, Any], embedding_dimension: int): + def __init__( + self, + model_name: str, + api_endpoint: str, + headers: Dict[str, str], + payload_template: Dict[str, Any], + embedding_dimension: int, + ): super().__init__(model_name) self.api_endpoint = api_endpoint self.headers = headers self.payload_template = payload_template self._embedding_dimension = embedding_dimension - self.session = None # aiohttp client session will be initialized on first use or in a context manager - logger.info(f"Initialized ThirdPartyAPIEmbeddingModel '{model_name}' for async calls to {api_endpoint} with dimension {embedding_dimension}") + self.session = None # aiohttp client session will be initialized on first use or in a context manager + logger.info( + f"Initialized ThirdPartyAPIEmbeddingModel '{model_name}' for async calls to {api_endpoint} with dimension {embedding_dimension}" + ) async def _get_session(self): """Lazily create or return the aiohttp client session.""" @@ -104,7 +120,7 @@ class ThirdPartyAPIEmbeddingModel(BaseEmbeddingModel): if self.session and not self.session.closed: await self.session.close() self.session = None - logger.info(f"Closed aiohttp session for model {self.model_name}") + logger.info(f'Closed aiohttp session for model {self.model_name}') async def embed_documents(self, texts: List[str]) -> List[List[float]]: """Asynchronously embeds a list of texts using the third-party API.""" @@ -118,10 +134,10 @@ class ThirdPartyAPIEmbeddingModel(BaseEmbeddingModel): elif 'texts' in payload: payload['texts'] = [text] else: - raise ValueError("Payload template does not contain expected text input key.") + raise ValueError('Payload template does not contain expected text input key.') tasks.append(self._make_api_request(session, payload)) - + results = await asyncio.gather(*tasks, return_exceptions=True) for i, res in enumerate(results): @@ -131,93 +147,92 @@ class ThirdPartyAPIEmbeddingModel(BaseEmbeddingModel): # - Append None or an empty list # - Re-raise the exception to stop processing # - Log and skip, then continue - embeddings.append([0.0] * self.embedding_dimension) # Append dummy embedding or handle failure + embeddings.append([0.0] * self.embedding_dimension) # Append dummy embedding or handle failure else: embeddings.append(res) - + return embeddings async def _make_api_request(self, session: aiohttp.ClientSession, payload: Dict[str, Any]) -> List[float]: """Helper to make an asynchronous API request and extract embedding.""" try: async with session.post(self.api_endpoint, headers=self.headers, json=payload) as response: - response.raise_for_status() # Raise an exception for HTTP errors (4xx, 5xx) + response.raise_for_status() # Raise an exception for HTTP errors (4xx, 5xx) api_response = await response.json() - + # Adjust this based on your API's actual response structure - if "data" in api_response and len(api_response["data"]) > 0 and "embedding" in api_response["data"][0]: - embedding = api_response["data"][0]["embedding"] + if 'data' in api_response and len(api_response['data']) > 0 and 'embedding' in api_response['data'][0]: + embedding = api_response['data'][0]['embedding'] if len(embedding) != self.embedding_dimension: - logger.warning(f"API returned embedding of dimension {len(embedding)}, but expected {self.embedding_dimension} for model {self.model_name}. Adjusting config might be needed.") + logger.warning( + f'API returned embedding of dimension {len(embedding)}, but expected {self.embedding_dimension} for model {self.model_name}. Adjusting config might be needed.' + ) return embedding - elif "embeddings" in api_response and isinstance(api_response["embeddings"], list) and api_response["embeddings"]: - embedding = api_response["embeddings"][0] + elif ( + 'embeddings' in api_response + and isinstance(api_response['embeddings'], list) + and api_response['embeddings'] + ): + embedding = api_response['embeddings'][0] if len(embedding) != self.embedding_dimension: - logger.warning(f"API returned embedding of dimension {len(embedding)}, but expected {self.embedding_dimension} for model {self.model_name}. Adjusting config might be needed.") + logger.warning( + f'API returned embedding of dimension {len(embedding)}, but expected {self.embedding_dimension} for model {self.model_name}. Adjusting config might be needed.' + ) return embedding else: - raise ValueError(f"Unexpected API response structure: {api_response}") + raise ValueError(f'Unexpected API response structure: {api_response}') except aiohttp.ClientError as e: - raise ConnectionError(f"API request failed: {e}") from e + raise ConnectionError(f'API request failed: {e}') from e except ValueError as e: - raise ValueError(f"Error processing API response: {e}") from e - + raise ValueError(f'Error processing API response: {e}') from e async def embed_query(self, text: str) -> List[float]: """Asynchronously embeds a single query text.""" results = await self.embed_documents([text]) if results: return results[0] - return [] # Or raise an error if embedding a query must always succeed + return [] # Or raise an error if embedding a query must always succeed + # --- Embedding Model Configuration --- EMBEDDING_MODEL_CONFIGS: Dict[str, Dict[str, Any]] = { - "MiniLM": { # Example for a local Sentence Transformer model - "type": "sentence_transformer", - "model_name": "sentence-transformers/all-MiniLM-L6-v2" + 'MiniLM': { # Example for a local Sentence Transformer model + 'type': 'sentence_transformer', + 'model_name': 'sentence-transformers/all-MiniLM-L6-v2', }, - "bge-m3": { # Example for a third-party API model - "type": "third_party_api", - "model_name": "bge-m3", - "api_endpoint": "https://api.qhaigc.net/v1/embeddings", - "headers": { - "Content-Type": "application/json", - "Authorization": f"Bearer {os.getenv('rag_api_key')}" - }, - "payload_template": { - "model": "bge-m3", - "input": "" - }, - "embedding_dimension": 1024 + 'bge-m3': { # Example for a third-party API model + 'type': 'third_party_api', + 'model_name': 'bge-m3', + 'api_endpoint': 'https://api.qhaigc.net/v1/embeddings', + 'headers': {'Content-Type': 'application/json', 'Authorization': f'Bearer {os.getenv("rag_api_key")}'}, + 'payload_template': {'model': 'bge-m3', 'input': ''}, + 'embedding_dimension': 1024, }, - "OpenAI-Ada-002": { - "type": "third_party_api", - "model_name": "text-embedding-ada-002", - "api_endpoint": "https://api.openai.com/v1/embeddings", - "headers": { - "Content-Type": "application/json", - "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}" # Ensure OPENAI_API_KEY is set + 'OpenAI-Ada-002': { + 'type': 'third_party_api', + 'model_name': 'text-embedding-ada-002', + 'api_endpoint': 'https://api.openai.com/v1/embeddings', + 'headers': { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {os.getenv("OPENAI_API_KEY")}', # Ensure OPENAI_API_KEY is set }, - "payload_template": { - "model": "text-embedding-ada-002", - "input": "" # Text will be injected here + 'payload_template': { + 'model': 'text-embedding-ada-002', + 'input': '', # Text will be injected here }, - "embedding_dimension": 1536 + 'embedding_dimension': 1536, }, - "OpenAI-Embedding-3-Small": { - "type": "third_party_api", - "model_name": "text-embedding-3-small", - "api_endpoint": "https://api.openai.com/v1/embeddings", - "headers": { - "Content-Type": "application/json", - "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}" - }, - "payload_template": { - "model": "text-embedding-3-small", - "input": "", + 'OpenAI-Embedding-3-Small': { + 'type': 'third_party_api', + 'model_name': 'text-embedding-3-small', + 'api_endpoint': 'https://api.openai.com/v1/embeddings', + 'headers': {'Content-Type': 'application/json', 'Authorization': f'Bearer {os.getenv("OPENAI_API_KEY")}'}, + 'payload_template': { + 'model': 'text-embedding-3-small', + 'input': '', # "dimensions": 512 # Optional: uncomment if you want a specific output dimension }, - "embedding_dimension": 1536 # Default max dimension for text-embedding-3-small + 'embedding_dimension': 1536, # Default max dimension for text-embedding-3-small }, -} \ No newline at end of file +} diff --git a/pkg/rag/knowledge/services/parser.py b/pkg/rag/knowledge/services/parser.py index 5fa7d589..bea49721 100644 --- a/pkg/rag/knowledge/services/parser.py +++ b/pkg/rag/knowledge/services/parser.py @@ -1,22 +1,21 @@ - import PyPDF2 from docx import Document import pandas as pd -import csv import chardet -from typing import Union, List, Callable, Any +from typing import Union, Callable, Any import logging import markdown from bs4 import BeautifulSoup import ebooklib from ebooklib import epub import re -import asyncio # Import asyncio for async operations +import asyncio # Import asyncio for async operations import os # Configure logging logger = logging.getLogger(__name__) + class FileParser: """ A robust file parser class to extract text content from various document formats. @@ -24,8 +23,8 @@ class FileParser: All core file reading operations are designed to be run synchronously in a thread pool to avoid blocking the asyncio event loop. """ + def __init__(self): - self.logger = logging.getLogger(self.__class__.__name__) async def _run_sync(self, sync_func: Callable, *args: Any, **kwargs: Any) -> Any: @@ -36,14 +35,14 @@ class FileParser: try: return await asyncio.to_thread(sync_func, *args, **kwargs) except Exception as e: - self.logger.error(f"Error running synchronous function {sync_func.__name__}: {e}") + self.logger.error(f'Error running synchronous function {sync_func.__name__}: {e}') raise async def parse(self, file_path: str) -> Union[str, None]: """ Parses the file based on its extension and returns the extracted text content. This is the main asynchronous entry point for parsing. - + Args: file_path (str): The path to the file to be parsed. @@ -51,21 +50,21 @@ class FileParser: Union[str, None]: The extracted text content as a single string, or None if parsing fails. """ if not file_path or not os.path.exists(file_path): - self.logger.error(f"Invalid file path provided: {file_path}") + self.logger.error(f'Invalid file path provided: {file_path}') return None file_extension = file_path.split('.')[-1].lower() parser_method = getattr(self, f'_parse_{file_extension}', None) - + if parser_method is None: - self.logger.error(f"Unsupported file format: {file_extension} for file {file_path}") + self.logger.error(f'Unsupported file format: {file_extension} for file {file_path}') return None - + try: # Pass file_path to the specific parser methods return await parser_method(file_path) except Exception as e: - self.logger.error(f"Failed to parse {file_extension} file {file_path}: {e}") + self.logger.error(f'Failed to parse {file_extension} file {file_path}: {e}') return None # --- Helper for reading files with encoding detection --- @@ -74,15 +73,16 @@ class FileParser: Reads a file with automatic encoding detection, ensuring the synchronous file read operation runs in a separate thread. """ + def _read_sync(): with open(file_path, 'rb') as file: raw_data = file.read() detected = chardet.detect(raw_data) encoding = detected['encoding'] or 'utf-8' - + if mode == 'r': return raw_data.decode(encoding, errors='ignore') - return raw_data # For binary mode + return raw_data # For binary mode return await self._run_sync(_read_sync) @@ -90,12 +90,13 @@ class FileParser: async def _parse_txt(self, file_path: str) -> str: """Parses a TXT file and returns its content.""" - self.logger.info(f"Parsing TXT file: {file_path}") + self.logger.info(f'Parsing TXT file: {file_path}') return await self._read_file_content(file_path, mode='r') async def _parse_pdf(self, file_path: str) -> str: """Parses a PDF file and returns its text content.""" - self.logger.info(f"Parsing PDF file: {file_path}") + self.logger.info(f'Parsing PDF file: {file_path}') + def _parse_pdf_sync(): text_content = [] with open(file_path, 'rb') as file: @@ -105,57 +106,69 @@ class FileParser: if text: text_content.append(text) return '\n'.join(text_content) + return await self._run_sync(_parse_pdf_sync) async def _parse_docx(self, file_path: str) -> str: """Parses a DOCX file and returns its text content.""" - self.logger.info(f"Parsing DOCX file: {file_path}") + self.logger.info(f'Parsing DOCX file: {file_path}') + def _parse_docx_sync(): doc = Document(file_path) text_content = [paragraph.text for paragraph in doc.paragraphs if paragraph.text.strip()] return '\n'.join(text_content) + return await self._run_sync(_parse_docx_sync) - + async def _parse_doc(self, file_path: str) -> str: """Handles .doc files, explicitly stating lack of direct support.""" - self.logger.warning(f"Direct .doc parsing is not supported for {file_path}. Please convert to .docx first.") - raise NotImplementedError("Direct .doc parsing not supported. Please convert to .docx first.") - + self.logger.warning(f'Direct .doc parsing is not supported for {file_path}. Please convert to .docx first.') + raise NotImplementedError('Direct .doc parsing not supported. Please convert to .docx first.') + async def _parse_xlsx(self, file_path: str) -> str: """Parses an XLSX file, returning text from all sheets.""" - self.logger.info(f"Parsing XLSX file: {file_path}") + self.logger.info(f'Parsing XLSX file: {file_path}') + def _parse_xlsx_sync(): excel_file = pd.ExcelFile(file_path) all_sheet_content = [] for sheet_name in excel_file.sheet_names: df = pd.read_excel(file_path, sheet_name=sheet_name) - sheet_text = f"--- Sheet: {sheet_name} ---\n{df.to_string(index=False)}\n" + sheet_text = f'--- Sheet: {sheet_name} ---\n{df.to_string(index=False)}\n' all_sheet_content.append(sheet_text) return '\n'.join(all_sheet_content) + return await self._run_sync(_parse_xlsx_sync) - + async def _parse_csv(self, file_path: str) -> str: """Parses a CSV file and returns its content as a string.""" - self.logger.info(f"Parsing CSV file: {file_path}") + self.logger.info(f'Parsing CSV file: {file_path}') + def _parse_csv_sync(): # pd.read_csv can often detect encoding, but explicit detection is safer - raw_data = self._read_file_content(file_path, mode='rb') # Note: this will need to be await outside this sync function + raw_data = self._read_file_content( + file_path, mode='rb' + ) # Note: this will need to be await outside this sync function + _ = raw_data # For simplicity, we'll let pandas handle encoding internally after a raw read. # A more robust solution might pass encoding directly to pd.read_csv after detection. detected = chardet.detect(open(file_path, 'rb').read()) encoding = detected['encoding'] or 'utf-8' df = pd.read_csv(file_path, encoding=encoding) return df.to_string(index=False) + return await self._run_sync(_parse_csv_sync) - + async def _parse_markdown(self, file_path: str) -> str: """Parses a Markdown file, converting it to structured plain text.""" - self.logger.info(f"Parsing Markdown file: {file_path}") + self.logger.info(f'Parsing Markdown file: {file_path}') + def _parse_markdown_sync(): - md_content = self._read_file_content(file_path, mode='r') # This is a synchronous call within a sync function + md_content = self._read_file_content( + file_path, mode='r' + ) # This is a synchronous call within a sync function html_content = markdown.markdown( - md_content, - extensions=['extra', 'codehilite', 'tables', 'toc', 'fenced_code'] + md_content, extensions=['extra', 'codehilite', 'tables', 'toc', 'fenced_code'] ) soup = BeautifulSoup(html_content, 'html.parser') text_parts = [] @@ -169,13 +182,13 @@ class FileParser: text_parts.append(text) elif element.name in ['ul', 'ol']: for li in element.find_all('li'): - text_parts.append(f"* {li.get_text().strip()}") + text_parts.append(f'* {li.get_text().strip()}') elif element.name == 'pre': code_block = element.get_text().strip() if code_block: - text_parts.append(f"```\n{code_block}\n```") + text_parts.append(f'```\n{code_block}\n```') elif element.name == 'table': - table_str = self._extract_table_to_markdown_sync(element) # Call sync helper + table_str = self._extract_table_to_markdown_sync(element) # Call sync helper if table_str: text_parts.append(table_str) elif element.name: @@ -184,15 +197,17 @@ class FileParser: text_parts.append(text) cleaned_text = re.sub(r'\n\s*\n', '\n\n', '\n'.join(text_parts)) return cleaned_text.strip() + return await self._run_sync(_parse_markdown_sync) async def _parse_html(self, file_path: str) -> str: """Parses an HTML file, extracting structured plain text.""" - self.logger.info(f"Parsing HTML file: {file_path}") + self.logger.info(f'Parsing HTML file: {file_path}') + def _parse_html_sync(): - html_content = self._read_file_content(file_path, mode='r') # Sync call within sync function + html_content = self._read_file_content(file_path, mode='r') # Sync call within sync function soup = BeautifulSoup(html_content, 'html.parser') - for script_or_style in soup(["script", "style"]): + for script_or_style in soup(['script', 'style']): script_or_style.decompose() text_parts = [] for element in soup.body.children if soup.body else soup.children: @@ -207,9 +222,9 @@ class FileParser: for li in element.find_all('li'): text = li.get_text().strip() if text: - text_parts.append(f"* {text}") + text_parts.append(f'* {text}') elif element.name == 'table': - table_str = self._extract_table_to_markdown_sync(element) # Call sync helper + table_str = self._extract_table_to_markdown_sync(element) # Call sync helper if table_str: text_parts.append(table_str) elif element.name: @@ -218,39 +233,42 @@ class FileParser: text_parts.append(text) cleaned_text = re.sub(r'\n\s*\n', '\n\n', '\n'.join(text_parts)) return cleaned_text.strip() + return await self._run_sync(_parse_html_sync) - + async def _parse_epub(self, file_path: str) -> str: """Parses an EPUB file, extracting metadata and content.""" - self.logger.info(f"Parsing EPUB file: {file_path}") + self.logger.info(f'Parsing EPUB file: {file_path}') + def _parse_epub_sync(): book = epub.read_epub(file_path) text_content = [] title_meta = book.get_metadata('DC', 'title') if title_meta: - text_content.append(f"Title: {title_meta[0][0]}") + text_content.append(f'Title: {title_meta[0][0]}') creator_meta = book.get_metadata('DC', 'creator') if creator_meta: - text_content.append(f"Author: {creator_meta[0][0]}") + text_content.append(f'Author: {creator_meta[0][0]}') date_meta = book.get_metadata('DC', 'date') if date_meta: - text_content.append(f"Publish Date: {date_meta[0][0]}") + text_content.append(f'Publish Date: {date_meta[0][0]}') toc = book.get_toc() if toc: - text_content.append("\n--- Table of Contents ---") - self._add_toc_items_sync(toc, text_content, level=0) # Call sync helper - text_content.append("--- End of Table of Contents ---\n") + text_content.append('\n--- Table of Contents ---') + self._add_toc_items_sync(toc, text_content, level=0) # Call sync helper + text_content.append('--- End of Table of Contents ---\n') for item in book.get_items(): if item.get_type() == ebooklib.ITEM_DOCUMENT: html_content = item.get_content().decode('utf-8', errors='ignore') soup = BeautifulSoup(html_content, 'html.parser') - for junk in soup(["script", "style", "nav", "header", "footer"]): + for junk in soup(['script', 'style', 'nav', 'header', 'footer']): junk.decompose() text = soup.get_text(separator='\n', strip=True) text = re.sub(r'\n\s*\n', '\n\n', text) if text: text_content.append(text) return re.sub(r'\n\s*\n', '\n\n', '\n'.join(text_content)).strip() + return await self._run_sync(_parse_epub_sync) def _add_toc_items_sync(self, toc_list: list, text_content: list, level: int): @@ -259,10 +277,10 @@ class FileParser: for item in toc_list: if isinstance(item, tuple): chapter, subchapters = item - text_content.append(f"{indent}- {chapter.title}") + text_content.append(f'{indent}- {chapter.title}') self._add_toc_items_sync(subchapters, text_content, level + 1) else: - text_content.append(f"{indent}- {item.title}") + text_content.append(f'{indent}- {item.title}') def _extract_table_to_markdown_sync(self, table_element: BeautifulSoup) -> str: """Helper to convert a BeautifulSoup table element into a Markdown table string (synchronous).""" @@ -272,17 +290,17 @@ class FileParser: cells = [td.get_text().strip() for td in tr.find_all('td')] if cells: rows.append(cells) - + if not headers and not rows: - return "" + return '' table_lines = [] if headers: table_lines.append(' | '.join(headers)) table_lines.append(' | '.join(['---'] * len(headers))) - + for row_cells in rows: padded_cells = row_cells + [''] * (len(headers) - len(row_cells)) if headers else row_cells table_lines.append(' | '.join(padded_cells)) - - return '\n'.join(table_lines) \ No newline at end of file + + return '\n'.join(table_lines) diff --git a/pkg/rag/knowledge/services/retriever.py b/pkg/rag/knowledge/services/retriever.py index 4da81eb1..f563f9b3 100644 --- a/pkg/rag/knowledge/services/retriever.py +++ b/pkg/rag/knowledge/services/retriever.py @@ -1,7 +1,6 @@ # services/retriever.py -import asyncio import logging -import numpy as np # Make sure numpy is imported +import numpy as np # Make sure numpy is imported from typing import List, Dict, Any from sqlalchemy.orm import Session from pkg.rag.knowledge.services.base_service import BaseService @@ -11,6 +10,7 @@ from pkg.rag.knowledge.services.chroma_manager import ChromaIndexManager logger = logging.getLogger(__name__) + class Retriever(BaseService): def __init__(self, model_type: str, model_name_key: str, chroma_manager: ChromaIndexManager): super().__init__() @@ -22,10 +22,14 @@ class Retriever(BaseService): self.embedding_model: BaseEmbeddingModel = self._load_embedding_model() def _load_embedding_model(self) -> BaseEmbeddingModel: - self.logger.info(f"Loading retriever embedding model: type={self.model_type}, name_key={self.model_name_key}...") + self.logger.info( + f'Loading retriever embedding model: type={self.model_type}, name_key={self.model_name_key}...' + ) try: model = EmbeddingModelFactory.create_model(self.model_type, self.model_name_key) - self.logger.info(f"Retriever embedding model '{self.model_name_key}' loaded. Output dimension: {model.embedding_dimension}") + self.logger.info( + f"Retriever embedding model '{self.model_name_key}' loaded. Output dimension: {model.embedding_dimension}" + ) return model except Exception as e: self.logger.error(f"Failed to load retriever embedding model '{self.model_name_key}': {e}") @@ -33,43 +37,42 @@ class Retriever(BaseService): async def retrieve(self, query: str, k: int = 5) -> List[Dict[str, Any]]: if not self.embedding_model: - raise RuntimeError("Retriever embedding model not loaded. Please check Retriever initialization.") + raise RuntimeError('Retriever embedding model not loaded. Please check Retriever initialization.') self.logger.info(f"Retrieving for query: '{query}' with k={k} using {self.model_name_key}") query_embedding: List[float] = await self.embedding_model.embed_query(query) query_embedding_np = np.array([query_embedding], dtype=np.float32) - chroma_results = await self._run_sync( - self.chroma_manager.search_sync, - query_embedding_np, k - ) + chroma_results = await self._run_sync(self.chroma_manager.search_sync, query_embedding_np, k) # 'ids' is always returned by ChromaDB, even if not explicitly in 'include' - matched_chroma_ids = chroma_results.get("ids", [[]])[0] - distances = chroma_results.get("distances", [[]])[0] - chroma_metadatas = chroma_results.get("metadatas", [[]])[0] - chroma_documents = chroma_results.get("documents", [[]])[0] + matched_chroma_ids = chroma_results.get('ids', [[]])[0] + distances = chroma_results.get('distances', [[]])[0] + chroma_metadatas = chroma_results.get('metadatas', [[]])[0] + chroma_documents = chroma_results.get('documents', [[]])[0] if not matched_chroma_ids: - self.logger.info("No relevant chunks found in Chroma.") + self.logger.info('No relevant chunks found in Chroma.') return [] db_chunk_ids = [] for metadata in chroma_metadatas: - if "chunk_id" in metadata: - db_chunk_ids.append(metadata["chunk_id"]) + if 'chunk_id' in metadata: + db_chunk_ids.append(metadata['chunk_id']) else: self.logger.warning(f"Metadata missing 'chunk_id': {metadata}. Skipping this entry.") if not db_chunk_ids: - self.logger.warning("No valid chunk_ids extracted from Chroma results metadata.") + self.logger.warning('No valid chunk_ids extracted from Chroma results metadata.') return [] - self.logger.info(f"Fetching {len(db_chunk_ids)} chunk details from relational database...") + self.logger.info(f'Fetching {len(db_chunk_ids)} chunk details from relational database...') chunks_from_db = await self._run_sync( - lambda cids: self._db_get_chunks_sync(SessionLocal(), cids), # Ensure SessionLocal is passed correctly for _db_get_chunks_sync - db_chunk_ids + lambda cids: self._db_get_chunks_sync( + SessionLocal(), cids + ), # Ensure SessionLocal is passed correctly for _db_get_chunks_sync + db_chunk_ids, ) chunk_map = {chunk.id: chunk for chunk in chunks_from_db} @@ -80,27 +83,29 @@ class Retriever(BaseService): # Ensure original_chunk_id is int for DB lookup original_chunk_id = int(chroma_id.split('_')[-1]) except (ValueError, IndexError): - self.logger.warning(f"Could not parse chunk_id from Chroma ID: {chroma_id}. Skipping.") + self.logger.warning(f'Could not parse chunk_id from Chroma ID: {chroma_id}. Skipping.') continue chunk_text_from_chroma = chroma_documents[i] distance = float(distances[i]) - file_id_from_chroma = chroma_metadatas[i].get("file_id") + file_id_from_chroma = chroma_metadatas[i].get('file_id') chunk_from_db = chunk_map.get(original_chunk_id) - results_list.append({ - "chunk_id": original_chunk_id, - "text": chunk_from_db.text if chunk_from_db else chunk_text_from_chroma, - "distance": distance, - "file_id": file_id_from_chroma - }) + results_list.append( + { + 'chunk_id': original_chunk_id, + 'text': chunk_from_db.text if chunk_from_db else chunk_text_from_chroma, + 'distance': distance, + 'file_id': file_id_from_chroma, + } + ) - self.logger.info(f"Retrieved {len(results_list)} chunks.") + self.logger.info(f'Retrieved {len(results_list)} chunks.') return results_list def _db_get_chunks_sync(self, session: Session, chunk_ids: List[int]) -> List[Chunk]: - self.logger.debug(f"Fetching {len(chunk_ids)} chunk details from database (sync).") + self.logger.debug(f'Fetching {len(chunk_ids)} chunk details from database (sync).') chunks = session.query(Chunk).filter(Chunk.id.in_(chunk_ids)).all() session.close() - return chunks \ No newline at end of file + return chunks From bef0d73e83703e79e31bc178f96edfbb943c8d9b Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Sun, 6 Jul 2025 10:25:28 +0800 Subject: [PATCH 030/257] feat: basic definition --- web/src/app/home/knowledge/KBDetailDialog.tsx | 89 +++++++++++++++++++ web/src/app/infra/entities/api/index.ts | 10 +++ web/src/app/infra/http/HttpClient.ts | 21 +++++ web/src/i18n/locales/en-US.ts | 2 + 4 files changed, 122 insertions(+) create mode 100644 web/src/app/home/knowledge/KBDetailDialog.tsx diff --git a/web/src/app/home/knowledge/KBDetailDialog.tsx b/web/src/app/home/knowledge/KBDetailDialog.tsx new file mode 100644 index 00000000..5291dd59 --- /dev/null +++ b/web/src/app/home/knowledge/KBDetailDialog.tsx @@ -0,0 +1,89 @@ +'use client'; + +import { useEffect, useState } from 'react'; +import { + Dialog, + DialogContent, + DialogHeader, + DialogTitle, + DialogFooter, +} from '@/components/ui/dialog'; +import { Button } from '@/components/ui/button'; +import { useTranslation } from 'react-i18next'; +import { z } from 'zod'; +import { httpClient } from '@/app/infra/http/HttpClient'; +import { KnowledgeBase } from '@/app/infra/entities/api'; + +interface KBDetailDialogProps { + open: boolean; + onOpenChange: (open: boolean) => void; + kbId?: string; + onFormSubmit: (value: z.infer) => void; + onFormCancel: () => void; + onKbDeleted: () => void; + onNewKbCreated: (kbId: string) => void; +} + +export default function KBDetailDialog({ + open, + onOpenChange, + kbId: propKbId, + onFormSubmit, + onFormCancel, + onKbDeleted, + onNewKbCreated, +}: KBDetailDialogProps) { + const { t } = useTranslation(); + const [kbId, setKbId] = useState(propKbId); + const [activeMenu, setActiveMenu] = useState('metadata'); + const [showDeleteConfirm, setShowDeleteConfirm] = useState(false); + + useEffect(() => { + setKbId(propKbId); + setActiveMenu('metadata'); + }, [propKbId, open]); + + const menu = [ + { + key: 'metadata', + label: t('knowledge.metadata'), + icon: ( + + + + ), + }, + { + key: 'files', + label: t('knowledge.files'), + icon: ( + + + + ), + }, + ]; + + if (!kbId) { + // new kb + return ( + + +
+ + {t('knowledge.newKb')} + +
+
+
+ ); + } +} diff --git a/web/src/app/infra/entities/api/index.ts b/web/src/app/infra/entities/api/index.ts index a44b1991..b230cf9e 100644 --- a/web/src/app/infra/entities/api/index.ts +++ b/web/src/app/infra/entities/api/index.ts @@ -150,6 +150,16 @@ export interface KnowledgeBase { updated_at?: string; } +export interface ApiRespKnowledgeBaseFiles { + files: KnowledgeBaseFile[]; +} + +export interface KnowledgeBaseFile { + file_id: string; + file_name: string; + status: string; +} + // plugins export interface ApiRespPlugins { plugins: Plugin[]; diff --git a/web/src/app/infra/http/HttpClient.ts b/web/src/app/infra/http/HttpClient.ts index 5c6e0abd..8842b04d 100644 --- a/web/src/app/infra/http/HttpClient.ts +++ b/web/src/app/infra/http/HttpClient.ts @@ -37,6 +37,7 @@ import { ApiRespKnowledgeBases, ApiRespKnowledgeBase, KnowledgeBase, + ApiRespKnowledgeBaseFiles, } from '@/app/infra/entities/api'; import { GetBotLogsRequest } from '@/app/infra/http/requestParam/bots/GetBotLogsRequest'; import { GetBotLogsResponse } from '@/app/infra/http/requestParam/bots/GetBotLogsResponse'; @@ -430,6 +431,11 @@ class HttpClient { return this.post(`/api/v1/platform/bots/${botId}/logs`, request); } + // ============ File management API ============ + public uploadDocumentFile(file: File): Promise<{ file_id: string }> { + return this.post('/api/v1/files/documents', file); + } + // ============ Knowledge Base API ============ public getKnowledgeBases(): Promise { return this.get('/api/v1/knowledge/bases'); @@ -443,6 +449,21 @@ class HttpClient { return this.post('/api/v1/knowledge/bases', base); } + public uploadKnowledgeBaseFile( + uuid: string, + file_id: string, + ): Promise { + return this.post(`/api/v1/knowledge/bases/${uuid}/files`, { + file_id, + }); + } + + public getKnowledgeBaseFiles( + uuid: string, + ): Promise { + return this.get(`/api/v1/knowledge/bases/${uuid}/files`); + } + // ============ Plugins API ============ public getPlugins(): Promise { return this.get('/api/v1/plugins'); diff --git a/web/src/i18n/locales/en-US.ts b/web/src/i18n/locales/en-US.ts index 5596e35f..7a1f79c8 100644 --- a/web/src/i18n/locales/en-US.ts +++ b/web/src/i18n/locales/en-US.ts @@ -233,6 +233,8 @@ const enUS = { knowledge: { title: 'Knowledge', description: 'Configuring knowledge bases for improved LLM responses', + metadata: 'Metadata', + files: 'Files', }, register: { title: 'Initialize LangBot 👋', From 09884d3152e5be32a0bb7738fd1f365edae4df86 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Sun, 6 Jul 2025 10:34:24 +0800 Subject: [PATCH 031/257] revert: 0203faa --- pkg/platform/sources/dingtalk.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pkg/platform/sources/dingtalk.py b/pkg/platform/sources/dingtalk.py index 3147c984..a40b0f9b 100644 --- a/pkg/platform/sources/dingtalk.py +++ b/pkg/platform/sources/dingtalk.py @@ -116,6 +116,15 @@ class DingTalkAdapter(adapter.MessagePlatformAdapter): self.bot_account_id = self.config['robot_name'] + self.bot = DingTalkClient( + client_id=config['client_id'], + client_secret=config['client_secret'], + robot_name=config['robot_name'], + robot_code=config['robot_code'], + markdown_card=config['markdown_card'], + logger=self.logger, + ) + async def reply_message( self, message_source: platform_events.MessageEvent, @@ -157,15 +166,6 @@ class DingTalkAdapter(adapter.MessagePlatformAdapter): self.bot.on_message('GroupMessage')(on_message) async def run_async(self): - config = self.config - self.bot = DingTalkClient( - client_id=config['client_id'], - client_secret=config['client_secret'], - robot_name=config['robot_name'], - robot_code=config['robot_code'], - markdown_card=config['markdown_card'], - logger=self.logger, - ) await self.bot.start() async def kill(self) -> bool: From a0b7d759ac8bfdd499dcf1f137c6df2fbcb3dd1c Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Sun, 6 Jul 2025 10:46:32 +0800 Subject: [PATCH 032/257] chore: release v4.0.8.1 --- pkg/utils/constants.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/utils/constants.py b/pkg/utils/constants.py index 8c4da3cc..47d4036d 100644 --- a/pkg/utils/constants.py +++ b/pkg/utils/constants.py @@ -1,4 +1,4 @@ -semantic_version = 'v4.0.8' +semantic_version = 'v4.0.8.1' required_database_version = 3 """标记本版本所需要的数据库结构版本,用于判断数据库迁移""" diff --git a/pyproject.toml b/pyproject.toml index 28b1a28f..3ef9b98c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "langbot" -version = "4.0.7" +version = "4.0.8.1" description = "高稳定、支持扩展、多模态 - 大模型原生即时通信机器人平台" readme = "README.md" requires-python = ">=3.10.1" From ebd8e014c61d7389dd44b16d840abd7160d5854d Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Sun, 6 Jul 2025 15:52:53 +0800 Subject: [PATCH 033/257] feat: rag fe framework --- web/src/app/home/bots/BotDetailDialog.tsx | 2 - .../home/bots/components/bot-form/BotForm.tsx | 41 ----- web/src/app/home/bots/page.tsx | 2 +- web/src/app/home/knowledge/KBDetailDialog.tsx | 134 +++++++++++++- .../knowledge/components/kb-card/KBCard.tsx | 2 +- .../knowledge/components/kb-docs/KBDoc.tsx | 0 .../components/kb-form/ChooseEntity.ts | 4 + .../knowledge/components/kb-form/KBForm.tsx | 172 ++++++++++++++++++ web/src/app/home/knowledge/page.tsx | 84 ++++++++- web/src/i18n/locales/en-US.ts | 16 +- 10 files changed, 398 insertions(+), 59 deletions(-) create mode 100644 web/src/app/home/knowledge/components/kb-docs/KBDoc.tsx create mode 100644 web/src/app/home/knowledge/components/kb-form/ChooseEntity.ts create mode 100644 web/src/app/home/knowledge/components/kb-form/KBForm.tsx diff --git a/web/src/app/home/bots/BotDetailDialog.tsx b/web/src/app/home/bots/BotDetailDialog.tsx index 1c4a2403..cad04e7b 100644 --- a/web/src/app/home/bots/BotDetailDialog.tsx +++ b/web/src/app/home/bots/BotDetailDialog.tsx @@ -130,7 +130,6 @@ export default function BotDetailDialog({ onFormCancel={handleFormCancel} onBotDeleted={handleBotDeleted} onNewBotCreated={handleNewBotCreated} - hideButtons={true} /> @@ -202,7 +201,6 @@ export default function BotDetailDialog({ onFormCancel={handleFormCancel} onBotDeleted={handleBotDeleted} onNewBotCreated={handleNewBotCreated} - hideButtons={true} /> )} {activeMenu === 'logs' && botId && ( diff --git a/web/src/app/home/bots/components/bot-form/BotForm.tsx b/web/src/app/home/bots/components/bot-form/BotForm.tsx index 40a902c2..fe36d33b 100644 --- a/web/src/app/home/bots/components/bot-form/BotForm.tsx +++ b/web/src/app/home/bots/components/bot-form/BotForm.tsx @@ -67,14 +67,12 @@ export default function BotForm({ onFormCancel, onBotDeleted, onNewBotCreated, - hideButtons = false, }: { initBotId?: string; onFormSubmit: (value: z.infer>) => void; onFormCancel: () => void; onBotDeleted: () => void; onNewBotCreated: (botId: string) => void; - hideButtons?: boolean; }) { const { t } = useTranslation(); const formSchema = getFormSchema(t); @@ -527,45 +525,6 @@ export default function BotForm({ )} - - {!hideButtons && ( -
-
- {!initBotId && ( - - )} - {initBotId && ( - <> - - - - )} - -
-
- )} diff --git a/web/src/app/home/bots/page.tsx b/web/src/app/home/bots/page.tsx index d4305898..ad130fae 100644 --- a/web/src/app/home/bots/page.tsx +++ b/web/src/app/home/bots/page.tsx @@ -92,7 +92,7 @@ export default function BotConfigPage() { } return ( -
+
void; kbId?: string; + // eslint-disable-next-line @typescript-eslint/no-explicit-any onFormSubmit: (value: z.infer) => void; onFormCancel: () => void; onKbDeleted: () => void; @@ -36,7 +48,7 @@ export default function KBDetailDialog({ const { t } = useTranslation(); const [kbId, setKbId] = useState(propKbId); const [activeMenu, setActiveMenu] = useState('metadata'); - const [showDeleteConfirm, setShowDeleteConfirm] = useState(false); + // const [showDeleteConfirm, setShowDeleteConfirm] = useState(false); useEffect(() => { setKbId(propKbId); @@ -58,8 +70,8 @@ export default function KBDetailDialog({ ), }, { - key: 'files', - label: t('knowledge.files'), + key: 'documents', + label: t('knowledge.documents'), icon: (
- {t('knowledge.newKb')} + {t('knowledge.createKnowledgeBase')} +
+ {activeMenu === 'metadata' && ( + + )} + {activeMenu === 'documents' &&
documents
} +
+ {activeMenu === 'metadata' && ( + +
+ + +
+
+ )} ); } + + return ( + <> + + + + + + + + + {menu.map((item) => ( + + setActiveMenu(item.key)} + > + + {item.icon} + {item.label} + + + + ))} + + + + + +
+ + + {activeMenu === 'metadata' + ? t('knowledge.createKnowledgeBase') + : t('knowledge.editDocument')} + + +
+ {activeMenu === 'metadata' && ( + + )} + {activeMenu === 'documents' &&
documents
} +
+ {activeMenu === 'metadata' && ( + +
+ + + +
+
+ )} +
+
+
+
+ + ); } diff --git a/web/src/app/home/knowledge/components/kb-card/KBCard.tsx b/web/src/app/home/knowledge/components/kb-card/KBCard.tsx index 5d49e738..560b0497 100644 --- a/web/src/app/home/knowledge/components/kb-card/KBCard.tsx +++ b/web/src/app/home/knowledge/components/kb-card/KBCard.tsx @@ -26,7 +26,7 @@ export default function KBCard({ kbCardVO }: { kbCardVO: KnowledgeBaseVO }) {
- {t('knowledge.bases.updateTime')} + {t('knowledge.updateTime')} {kbCardVO.lastUpdatedTimeAgo}
diff --git a/web/src/app/home/knowledge/components/kb-docs/KBDoc.tsx b/web/src/app/home/knowledge/components/kb-docs/KBDoc.tsx new file mode 100644 index 00000000..e69de29b diff --git a/web/src/app/home/knowledge/components/kb-form/ChooseEntity.ts b/web/src/app/home/knowledge/components/kb-form/ChooseEntity.ts new file mode 100644 index 00000000..54f983e4 --- /dev/null +++ b/web/src/app/home/knowledge/components/kb-form/ChooseEntity.ts @@ -0,0 +1,4 @@ +export interface IEmbeddingModelEntity { + label: string; + value: string; +} diff --git a/web/src/app/home/knowledge/components/kb-form/KBForm.tsx b/web/src/app/home/knowledge/components/kb-form/KBForm.tsx new file mode 100644 index 00000000..9ae51656 --- /dev/null +++ b/web/src/app/home/knowledge/components/kb-form/KBForm.tsx @@ -0,0 +1,172 @@ +import { useEffect, useState } from 'react'; +import { useForm } from 'react-hook-form'; +import { zodResolver } from '@hookform/resolvers/zod'; +import { z } from 'zod'; +import { useTranslation } from 'react-i18next'; +import { Input } from '@/components/ui/input'; +import { + Form, + FormControl, + FormField, + FormItem, + FormLabel, + FormMessage, + FormDescription, +} from '@/components/ui/form'; +import { IEmbeddingModelEntity } from './ChooseEntity'; +import { httpClient } from '@/app/infra/http/HttpClient'; +import { + Select, + SelectContent, + SelectGroup, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select'; + +const getFormSchema = (t: (key: string) => string) => + z.object({ + name: z.string().min(1, { message: t('knowledge.kbNameRequired') }), + description: z + .string() + .min(1, { message: t('knowledge.kbDescriptionRequired') }), + embeddingModelUUID: z + .string() + .min(1, { message: t('knowledge.embeddingModelUUIDRequired') }), + }); + +export default function KBForm({ + initKbId, + onFormSubmit, + onFormCancel, + onKbDeleted, + onNewKbCreated, +}: { + initKbId?: string; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + onFormSubmit: (value: any) => void; + onFormCancel: () => void; + onKbDeleted: () => void; + onNewKbCreated: (kbId: string) => void; +}) { + const { t } = useTranslation(); + const formSchema = getFormSchema(t); + + const form = useForm>({ + resolver: zodResolver(formSchema), + defaultValues: { + name: '', + description: t('knowledge.defaultDescription'), + embeddingModelUUID: '', + }, + }); + + const [embeddingModelNameList, setEmbeddingModelNameList] = useState< + IEmbeddingModelEntity[] + >([]); + + useEffect(() => { + getEmbeddingModelNameList(); + }, []); + + const getEmbeddingModelNameList = async () => { + const resp = await httpClient.getProviderEmbeddingModels(); + setEmbeddingModelNameList( + resp.models.map((item) => { + return { + label: item.name, + value: item.uuid, + }; + }), + ); + }; + + return ( + <> +
+ +
+ ( + + + {t('knowledge.kbName')} + * + + + + + + + )} + /> + ( + + + {t('knowledge.kbDescription')} + * + + + + + + + )} + /> + ( + + + {t('knowledge.embeddingModelUUID')} + * + + +
+ +
+
+ + {t('knowledge.embeddingModelDescription')} + + +
+ )} + /> +
+
+ + + ); +} diff --git a/web/src/app/home/knowledge/page.tsx b/web/src/app/home/knowledge/page.tsx index 7ee25eac..99de73d8 100644 --- a/web/src/app/home/knowledge/page.tsx +++ b/web/src/app/home/knowledge/page.tsx @@ -3,32 +3,102 @@ import CreateCardComponent from '@/app/infra/basic-component/create-card-component/CreateCardComponent'; import styles from './knowledgeBase.module.css'; import { useTranslation } from 'react-i18next'; -import { useState } from 'react'; +import { useEffect, useState } from 'react'; import { KnowledgeBaseVO } from '@/app/home/knowledge/components/kb-card/KBCardVO'; import KBCard from '@/app/home/knowledge/components/kb-card/KBCard'; +import KBDetailDialog from '@/app/home/knowledge/KBDetailDialog'; +import { httpClient } from '@/app/infra/http/HttpClient'; +import { KnowledgeBase } from '@/app/infra/entities/api'; export default function KnowledgePage() { const { t } = useTranslation(); const [knowledgeBaseList, setKnowledgeBaseList] = useState( [], ); + const [selectedKbId, setSelectedKbId] = useState(''); + const [detailDialogOpen, setDetailDialogOpen] = useState(false); + + useEffect(() => { + getKnowledgeBaseList(); + }, []); + + async function getKnowledgeBaseList() { + const resp = await httpClient.getKnowledgeBases(); + setKnowledgeBaseList( + resp.bases.map((kb: KnowledgeBase) => { + const currentTime = new Date(); + const lastUpdatedTimeAgo = Math.floor( + (currentTime.getTime() - + new Date(kb.updated_at ?? currentTime.getTime()).getTime()) / + 1000 / + 60 / + 60 / + 24, + ); + + const lastUpdatedTimeAgoText = + lastUpdatedTimeAgo > 0 + ? ` ${lastUpdatedTimeAgo} ${t('knowledge.daysAgo')}` + : t('knowledge.today'); + + return new KnowledgeBaseVO({ + id: kb.uuid || '', + name: kb.name, + description: kb.description, + embeddingModelUUID: kb.embedding_model_uuid, + lastUpdatedTimeAgo: lastUpdatedTimeAgoText, + }); + }), + ); + } const handleKBCardClick = (kbId: string) => { - // setIsEditForm(false); - // setModalOpen(true); + setSelectedKbId(kbId); + setDetailDialogOpen(true); + }; + + const handleCreateKBClick = () => { + setSelectedKbId(''); + setDetailDialogOpen(true); + }; + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const handleFormSubmit = (value: any) => { + console.log('handleFormSubmit', value); + }; + + const handleFormCancel = () => { + setDetailDialogOpen(false); + }; + + const handleKbDeleted = () => { + getKnowledgeBaseList(); + setDetailDialogOpen(false); + }; + + const handleNewKbCreated = () => { + getKnowledgeBaseList(); + setDetailDialogOpen(false); }; return (
+ +
{ - // setIsEditForm(false); - // setModalOpen(true); - }} + onClick={handleCreateKBClick} /> {knowledgeBaseList.map((kb) => { diff --git a/web/src/i18n/locales/en-US.ts b/web/src/i18n/locales/en-US.ts index 7a1f79c8..e50e7cc7 100644 --- a/web/src/i18n/locales/en-US.ts +++ b/web/src/i18n/locales/en-US.ts @@ -232,9 +232,23 @@ const enUS = { }, knowledge: { title: 'Knowledge', + createKnowledgeBase: 'Create Knowledge Base', description: 'Configuring knowledge bases for improved LLM responses', metadata: 'Metadata', - files: 'Files', + documents: 'Documents', + kbNameRequired: 'Knowledge base name cannot be empty', + kbDescriptionRequired: 'Knowledge base description cannot be empty', + embeddingModelUUIDRequired: 'Embedding model cannot be empty', + daysAgo: 'days ago', + today: 'Today', + kbName: 'Knowledge Base Name', + kbDescription: 'Knowledge Base Description', + defaultDescription: 'A knowledge base', + embeddingModelUUID: 'Embedding Model', + selectEmbeddingModel: 'Select Embedding Model', + embeddingModelDescription: + 'Used to vectorize the text, you can configure it in the Models page', + updateTime: 'Updated ', }, register: { title: 'Initialize LangBot 👋', From cd2534082698870662df7e0ebb14499e16b4f012 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Sun, 6 Jul 2025 16:08:02 +0800 Subject: [PATCH 034/257] perf: en comments --- pkg/utils/constants.py | 2 +- web/src/app/home/knowledge/KBDetailDialog.tsx | 2 +- web/src/i18n/locales/en-US.ts | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/utils/constants.py b/pkg/utils/constants.py index 8c4da3cc..e8193839 100644 --- a/pkg/utils/constants.py +++ b/pkg/utils/constants.py @@ -1,7 +1,7 @@ semantic_version = 'v4.0.8' required_database_version = 3 -"""标记本版本所需要的数据库结构版本,用于判断数据库迁移""" +"""Tag the version of the database schema, used to check if the database needs to be migrated""" debug_mode = False diff --git a/web/src/app/home/knowledge/KBDetailDialog.tsx b/web/src/app/home/knowledge/KBDetailDialog.tsx index d79702bc..e3ab4f9d 100644 --- a/web/src/app/home/knowledge/KBDetailDialog.tsx +++ b/web/src/app/home/knowledge/KBDetailDialog.tsx @@ -163,7 +163,7 @@ export default function KBDetailDialog({ {activeMenu === 'metadata' - ? t('knowledge.createKnowledgeBase') + ? t('knowledge.editKnowledgeBase') : t('knowledge.editDocument')} diff --git a/web/src/i18n/locales/en-US.ts b/web/src/i18n/locales/en-US.ts index e50e7cc7..ecc43204 100644 --- a/web/src/i18n/locales/en-US.ts +++ b/web/src/i18n/locales/en-US.ts @@ -233,6 +233,8 @@ const enUS = { knowledge: { title: 'Knowledge', createKnowledgeBase: 'Create Knowledge Base', + editKnowledgeBase: 'Edit Knowledge Base', + editDocument: 'Documents', description: 'Configuring knowledge bases for improved LLM responses', metadata: 'Metadata', documents: 'Documents', From 244aaf6e2096e326e70b0f14cad9cabf7b12ec3a Mon Sep 17 00:00:00 2001 From: gaord Date: Mon, 7 Jul 2025 10:28:12 +0800 Subject: [PATCH 035/257] =?UTF-8?q?feat:=20=E8=81=8A=E5=A4=A9=E7=9A=84@?= =?UTF-8?q?=E7=94=A8=E6=88=B7id=E5=86=85=E5=AE=B9=E9=9C=80=E8=A6=81?= =?UTF-8?q?=E4=BF=9D=E7=95=99=20(#1564)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * converters could use the application logger * keep @targets in message for some plugins may need it to their functionality * fix:form wxid in config fix:传参问题,可以直接从config中拿到wxid --------- Co-authored-by: fdc310 <82008029+fdc310@users.noreply.github.com> --- pkg/platform/sources/wechatpad.py | 44 ++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/pkg/platform/sources/wechatpad.py b/pkg/platform/sources/wechatpad.py index fdd4a69b..c9fb34ab 100644 --- a/pkg/platform/sources/wechatpad.py +++ b/pkg/platform/sources/wechatpad.py @@ -38,10 +38,10 @@ import logging class WeChatPadMessageConverter(adapter.MessageConverter): - def __init__(self, config: dict): + def __init__(self, config: dict, logger: logging.Logger): self.config = config self.bot = WeChatPadClient(self.config["wechatpad_url"],self.config["token"]) - self.logger = logging.getLogger("WeChatPadMessageConverter") + self.logger = logger @staticmethod async def yiri2target( @@ -90,21 +90,30 @@ class WeChatPadMessageConverter(adapter.MessageConverter): async def target2yiri( self, message: dict, - bot_account_id: str + bot_account_id: str, ) -> platform_message.MessageChain: """外部消息转平台消息""" # 数据预处理 message_list = [] + bot_wxid = self.config['wxid'] ats_bot = False # 是否被@ content = message["content"]["str"] content_no_preifx = content # 群消息则去掉前缀 is_group_message = self._is_group_message(message) if is_group_message: ats_bot = self._ats_bot(message, bot_account_id) + self.logger.info(f"ats_bot: {ats_bot}; bot_account_id: {bot_account_id}; bot_wxid: {bot_wxid}") if "@所有人" in content: message_list.append(platform_message.AtAll()) elif ats_bot: message_list.append(platform_message.At(target=bot_account_id)) + + # 解析@信息并生成At组件 + at_targets = self._extract_at_targets(message) + for target_id in at_targets: + if target_id != bot_wxid: # 避免重复添加机器人的At + message_list.append(platform_message.At(target=target_id)) + content_no_preifx, _ = self._extract_content_and_sender(content) msg_type = message["msg_type"] @@ -458,6 +467,23 @@ class WeChatPadMessageConverter(adapter.MessageConverter): finally: return ats_bot + # 提取一下at的wxid列表 + def _extract_at_targets(self, message: dict) -> list[str]: + """从消息中提取被@用户的ID列表""" + at_targets = [] + try: + # 从msg_source中解析atuserlist + msg_source = message.get('msg_source', '') or '' + if len(msg_source) > 0: + msg_source_data = ET.fromstring(msg_source) + at_user_list = msg_source_data.findtext("atuserlist") or "" + if at_user_list: + # atuserlist格式通常是逗号分隔的用户ID列表 + at_targets = [user_id.strip() for user_id in at_user_list.split(',') if user_id.strip()] + except Exception as e: + self.logger.error(f"_extract_at_targets got except: {e}") + return at_targets + # 提取一下content前面的sender_id, 和去掉前缀的内容 def _extract_content_and_sender(self, raw_content: str) -> Tuple[str, Optional[str]]: try: @@ -482,10 +508,10 @@ class WeChatPadMessageConverter(adapter.MessageConverter): class WeChatPadEventConverter(adapter.EventConverter): - def __init__(self, config: dict): + def __init__(self, config: dict, logger: logging.Logger): self.config = config - self.message_converter = WeChatPadMessageConverter(config) - self.logger = logging.getLogger("WeChatPadEventConverter") + self.message_converter = WeChatPadMessageConverter(config, logger) + self.logger = logger @staticmethod async def yiri2target( @@ -496,7 +522,7 @@ class WeChatPadEventConverter(adapter.EventConverter): async def target2yiri( self, event: dict, - bot_account_id: str + bot_account_id: str, ) -> platform_events.MessageEvent: # 排除公众号以及微信团队消息 @@ -572,8 +598,8 @@ class WeChatPadAdapter(adapter.MessagePlatformAdapter): self.logger = logger self.quart_app = quart.Quart(__name__) - self.message_converter = WeChatPadMessageConverter(config) - self.event_converter = WeChatPadEventConverter(config) + self.message_converter = WeChatPadMessageConverter(config, ap.logger) + self.event_converter = WeChatPadEventConverter(config, ap.logger) async def ws_message(self, data): """处理接收到的消息""" From 56248c350f797cff0f448117ddd53ae728645dd4 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Mon, 7 Jul 2025 19:00:55 +0800 Subject: [PATCH 036/257] chore: repo transferred --- .github/pull_request_template.md | 2 +- README.md | 16 ++++++++-------- README_EN.md | 14 +++++++------- README_JP.md | 14 +++++++------- main.py | 2 +- pkg/utils/announce.py | 2 +- pkg/utils/version.py | 2 +- pyproject.toml | 2 +- 8 files changed, 27 insertions(+), 27 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index f3389c25..71ef28fc 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -9,7 +9,7 @@ *请在方括号间写`x`以打勾 / Please tick the box with `x`* -- [ ] 阅读仓库[贡献指引](https://github.com/RockChinQ/LangBot/blob/master/CONTRIBUTING.md)了吗? / Have you read the [contribution guide](https://github.com/RockChinQ/LangBot/blob/master/CONTRIBUTING.md)? +- [ ] 阅读仓库[贡献指引](https://github.com/langbot-app/LangBot/blob/master/CONTRIBUTING.md)了吗? / Have you read the [contribution guide](https://github.com/langbot-app/LangBot/blob/master/CONTRIBUTING.md)? - [ ] 与项目所有者沟通过了吗? / Have you communicated with the project maintainer? - [ ] 我确定已自行测试所作的更改,确保功能符合预期。 / I have tested the changes and ensured they work as expected. diff --git a/README.md b/README.md index 6e0fa350..02bba74d 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ 项目主页部署文档插件介绍 | -提交插件 +提交插件
😎高稳定、🧩支持扩展、🦄多模态 - 大模型原生即时通信机器人平台🤖 @@ -21,10 +21,10 @@ [![Discord](https://img.shields.io/discord/1335141740050649118?logo=discord&labelColor=%20%235462eb&logoColor=%20%23f5f5f5&color=%20%235462eb)](https://discord.gg/wdNEHETs87) [![QQ Group](https://img.shields.io/badge/%E7%A4%BE%E5%8C%BAQQ%E7%BE%A4-966235608-blue)](https://qm.qq.com/q/JLi38whHum) -[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/RockChinQ/LangBot) -[![GitHub release (latest by date)](https://img.shields.io/github/v/release/RockChinQ/LangBot)](https://github.com/RockChinQ/LangBot/releases/latest) +[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/langbot-app/LangBot) +[![GitHub release (latest by date)](https://img.shields.io/github/v/release/langbot-app/LangBot)](https://github.com/langbot-app/LangBot/releases/latest) python -[![star](https://gitcode.com/RockChinQ/LangBot/star/badge.svg)](https://gitcode.com/RockChinQ/LangBot) +[![star](https://gitcode.com/langbot-app/LangBot/star/badge.svg)](https://gitcode.com/langbot-app/LangBot) 简体中文 / [English](README_EN.md) / [日本語](README_JP.md) / (PR for your language) @@ -44,7 +44,7 @@ #### Docker Compose 部署 ```bash -git clone https://github.com/RockChinQ/LangBot +git clone https://github.com/langbot-app/LangBot cd LangBot docker compose up -d ``` @@ -149,10 +149,10 @@ docker compose up -d ## 😘 社区贡献 -感谢以下[代码贡献者](https://github.com/RockChinQ/LangBot/graphs/contributors)和社区里其他成员对 LangBot 的贡献: +感谢以下[代码贡献者](https://github.com/langbot-app/LangBot/graphs/contributors)和社区里其他成员对 LangBot 的贡献: - - + + ## 😎 保持更新 diff --git a/README_EN.md b/README_EN.md index 07667f84..c6b68cfa 100644 --- a/README_EN.md +++ b/README_EN.md @@ -10,7 +10,7 @@ HomeDeploymentPlugin | -Submit Plugin +Submit Plugin
😎High Stability, 🧩Extension Supported, 🦄Multi-modal - LLM Native Instant Messaging Bot Platform🤖 @@ -20,8 +20,8 @@ [![Discord](https://img.shields.io/discord/1335141740050649118?logo=discord&labelColor=%20%235462eb&logoColor=%20%23f5f5f5&color=%20%235462eb)](https://discord.gg/wdNEHETs87) -[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/RockChinQ/LangBot) -[![GitHub release (latest by date)](https://img.shields.io/github/v/release/RockChinQ/LangBot)](https://github.com/RockChinQ/LangBot/releases/latest) +[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/langbot-app/LangBot) +[![GitHub release (latest by date)](https://img.shields.io/github/v/release/langbot-app/LangBot)](https://github.com/langbot-app/LangBot/releases/latest) python [简体中文](README.md) / English / [日本語](README_JP.md) / (PR for your language) @@ -42,7 +42,7 @@ #### Docker Compose Deployment ```bash -git clone https://github.com/RockChinQ/LangBot +git clone https://github.com/langbot-app/LangBot cd LangBot docker compose up -d ``` @@ -132,10 +132,10 @@ Directly use the released version to run, see the [Manual Deployment](https://do ## 🤝 Community Contribution -Thank you for the following [code contributors](https://github.com/RockChinQ/LangBot/graphs/contributors) and other members in the community for their contributions to LangBot: +Thank you for the following [code contributors](https://github.com/langbot-app/LangBot/graphs/contributors) and other members in the community for their contributions to LangBot: - - + + ## 😎 Stay Ahead diff --git a/README_JP.md b/README_JP.md index c54ce51b..aaee328f 100644 --- a/README_JP.md +++ b/README_JP.md @@ -10,7 +10,7 @@ ホームデプロイプラグイン | -プラグインの提出 +プラグインの提出
😎高い安定性、🧩拡張サポート、🦄マルチモーダル - LLMネイティブインスタントメッセージングボットプラットフォーム🤖 @@ -19,8 +19,8 @@
[![Discord](https://img.shields.io/discord/1335141740050649118?logo=discord&labelColor=%20%235462eb&logoColor=%20%23f5f5f5&color=%20%235462eb)](https://discord.gg/wdNEHETs87) -[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/RockChinQ/LangBot) -[![GitHub release (latest by date)](https://img.shields.io/github/v/release/RockChinQ/LangBot)](https://github.com/RockChinQ/LangBot/releases/latest) +[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/langbot-app/LangBot) +[![GitHub release (latest by date)](https://img.shields.io/github/v/release/langbot-app/LangBot)](https://github.com/langbot-app/LangBot/releases/latest) python [简体中文](README_CN.md) / [English](README.md) / [日本語](README_JP.md) / (PR for your language) @@ -41,7 +41,7 @@ #### Docker Compose デプロイ ```bash -git clone https://github.com/RockChinQ/LangBot +git clone https://github.com/langbot-app/LangBot cd LangBot docker compose up -d ``` @@ -131,10 +131,10 @@ LangBotはBTPanelにリストされています。BTPanelをインストール ## 🤝 コミュニティ貢献 -LangBot への貢献に対して、以下の [コード貢献者](https://github.com/RockChinQ/LangBot/graphs/contributors) とコミュニティの他のメンバーに感謝します。 +LangBot への貢献に対して、以下の [コード貢献者](https://github.com/langbot-app/LangBot/graphs/contributors) とコミュニティの他のメンバーに感謝します。 - - + + ## 😎 最新情報を入手 diff --git a/main.py b/main.py index 19cb32d6..1909e343 100644 --- a/main.py +++ b/main.py @@ -11,7 +11,7 @@ asciiart = r""" |____\__,_|_||_\__, |___/\___/\__| |___/ -⭐️ Open Source 开源地址: https://github.com/RockChinQ/LangBot +⭐️ Open Source 开源地址: https://github.com/langbot-app/LangBot 📖 Documentation 文档地址: https://docs.langbot.app """ diff --git a/pkg/utils/announce.py b/pkg/utils/announce.py index 7108a08c..a6b8539a 100644 --- a/pkg/utils/announce.py +++ b/pkg/utils/announce.py @@ -46,7 +46,7 @@ class AnnouncementManager: async def fetch_all(self) -> list[Announcement]: """获取所有公告""" resp = requests.get( - url='https://api.github.com/repos/RockChinQ/LangBot/contents/res/announcement.json', + url='https://api.github.com/repos/langbot-app/LangBot/contents/res/announcement.json', proxies=self.ap.proxy_mgr.get_forward_proxies(), timeout=5, ) diff --git a/pkg/utils/version.py b/pkg/utils/version.py index ec0683c3..b26b1e33 100644 --- a/pkg/utils/version.py +++ b/pkg/utils/version.py @@ -29,7 +29,7 @@ class VersionManager: async def get_release_list(self) -> list: """获取发行列表""" rls_list_resp = requests.get( - url='https://api.github.com/repos/RockChinQ/LangBot/releases', + url='https://api.github.com/repos/langbot-app/LangBot/releases', proxies=self.ap.proxy_mgr.get_forward_proxies(), timeout=5, ) diff --git a/pyproject.toml b/pyproject.toml index 3ef9b98c..2736ea90 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,7 +80,7 @@ classifiers = [ [project.urls] Homepage = "https://langbot.app" Documentation = "https://docs.langbot.app" -Repository = "https://github.com/RockChinQ/langbot" +Repository = "https://github.com/langbot-app/LangBot" [dependency-groups] dev = [ From ac03a2dceb1bcb7da5e10571630ee85d47079a58 Mon Sep 17 00:00:00 2001 From: WangCham <651122857@qq.com> Date: Wed, 9 Jul 2025 22:09:46 +0800 Subject: [PATCH 037/257] feat: modify the rag.py --- .../http/controller/groups/knowledge_base.py | 72 ++-- pkg/entity/persistence/rag.py | 58 +++ pkg/rag/knowledge/RAG_Manager.py | 354 +++++++++++------- pkg/rag/knowledge/services/database.py | 83 ++-- 4 files changed, 338 insertions(+), 229 deletions(-) create mode 100644 pkg/entity/persistence/rag.py diff --git a/pkg/api/http/controller/groups/knowledge_base.py b/pkg/api/http/controller/groups/knowledge_base.py index e9606a3d..ce391042 100644 --- a/pkg/api/http/controller/groups/knowledge_base.py +++ b/pkg/api/http/controller/groups/knowledge_base.py @@ -1,6 +1,6 @@ import quart from .. import group - +import os # 导入 os 用于文件操作 @group.group_class('knowledge_base', '/api/v1/knowledge/bases') class KnowledgeBaseRouterGroup(group.RouterGroup): @@ -9,8 +9,8 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): return quart.jsonify({'code': code, 'data': data or {}, 'msg': msg}) async def initialize(self) -> None: - @self.route('', methods=['POST', 'GET']) - async def _() -> str: + @self.route('', methods=['POST', 'GET'], endpoint='handle_knowledge_bases') + async def handle_knowledge_bases() -> str: if quart.request.method == 'GET': knowledge_bases = await self.ap.knowledge_base_service.get_all_knowledge_bases() bases_list = [ @@ -23,17 +23,17 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): ] return self.success(code=0, data={'bases': bases_list}, msg='ok') + # POST: create a new knowledge base json_data = await quart.request.json knowledge_base_uuid = await self.ap.knowledge_base_service.create_knowledge_base( json_data.get('name'), json_data.get('description') ) - _ = knowledge_base_uuid - return self.success(code=0, data={}, msg='ok') + return self.success(code=0, data={'uuid': knowledge_base_uuid}, msg='ok') - @self.route('/', methods=['GET', 'DELETE']) - async def _(knowledge_base_uuid: str) -> str: + @self.route('/', methods=['GET', 'DELETE'], endpoint='handle_specific_knowledge_base') + async def handle_specific_knowledge_base(knowledge_base_uuid: str) -> str: if quart.request.method == 'GET': - knowledge_base = await self.ap.knowledge_base_service.get_knowledge_base_by_id(knowledge_base_uuid) + knowledge_base = await self.ap.knowledge_base_service.get_knowledge_base_by_id(int(knowledge_base_uuid)) if knowledge_base is None: return self.http_status(404, -1, 'knowledge base not found') @@ -48,28 +48,42 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): msg='ok', ) elif quart.request.method == 'DELETE': - await self.ap.knowledge_base_service.delete_kb_by_id(knowledge_base_uuid) + await self.ap.knowledge_base_service.delete_kb_by_id(int(knowledge_base_uuid)) return self.success(code=0, msg='ok') - @self.route('//files', methods=['GET']) - async def _(knowledge_base_uuid: str) -> str: - if quart.request.method == 'GET': - files = await self.ap.knowledge_base_service.get_files_by_knowledge_base(knowledge_base_uuid) - return self.success( - code=0, - data=[ - { - 'id': file.id, - 'file_name': file.file_name, - 'status': file.status, - } - for file in files - ], - msg='ok', - ) - # delete specific file in knowledge base - @self.route('//files/', methods=['DELETE']) - async def _(knowledge_base_uuid: str, file_id: str) -> str: - await self.ap.knowledge_base_service.delete_data_by_file_id(file_id) + @self.route('//files', methods=['GET'], endpoint='get_knowledge_base_files') + async def get_knowledge_base_files(knowledge_base_uuid: str) -> str: + files = await self.ap.knowledge_base_service.get_files_by_knowledge_base(int(knowledge_base_uuid)) + return self.success( + code=0, + data=[ + { + 'id': file.id, + 'file_name': file.file_name, + 'status': file.status, + } + for file in files + ], + msg='ok', + ) + + + @self.route('//files/', methods=['DELETE'], endpoint='delete_specific_file_in_kb') + async def delete_specific_file_in_kb(file_id: str) -> str: + await self.ap.knowledge_base_service.delete_data_by_file_id(int(file_id)) return self.success(code=0, msg='ok') + + @self.route('//files', methods=['POST'], endpoint='relate_file_with_kb') + async def relate_file_id_with_kb(knowledge_base_uuid:str,file_id: str) -> str: + if 'file' not in quart.request.files: + return self.http_status(400, -1, 'No file part in the request') + + json_data = await quart.request.json + file_id = json_data.get('file_id') + if not file_id: + return self.http_status(400, -1, 'File ID is required') + + # 调用服务层方法将文件与知识库关联 + await self.ap.knowledge_base_service.relate_file_id_with_kb(int(knowledge_base_uuid), int(file_id)) + return self.success(code=0, data={}, msg='ok') \ No newline at end of file diff --git a/pkg/entity/persistence/rag.py b/pkg/entity/persistence/rag.py new file mode 100644 index 00000000..175720f1 --- /dev/null +++ b/pkg/entity/persistence/rag.py @@ -0,0 +1,58 @@ +from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, LargeBinary +from sqlalchemy.orm import declarative_base, sessionmaker +from datetime import datetime +import os + + +Base = declarative_base() +DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./rag_knowledge.db") + + +engine = create_engine( + DATABASE_URL, + connect_args={"check_same_thread": False} +) + + +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + + +def create_db_and_tables(): + """Creates all database tables defined in the Base.""" + Base.metadata.create_all(bind=engine) + print("Database tables created or already exist.") + +class KnowledgeBase(Base): + __tablename__ = 'kb' + id = Column(Integer, primary_key=True, index=True) + name = Column(String, index=True) + description = Column(Text) + created_at = Column(DateTime, default=datetime.utcnow) + embedding_model = Column(String, default='') + top_k = Column(Integer, default=5) + + +class File(Base): + __tablename__ = 'file' + id = Column(Integer, primary_key=True, index=True) + kb_id = Column(Integer, nullable=True) + file_name = Column(String) + path = Column(String) + created_at = Column(DateTime, default=datetime.utcnow) + file_type = Column(String) + status = Column(Integer, default=0) + + +class Chunk(Base): + __tablename__ = 'chunks' + id = Column(Integer, primary_key=True, index=True) + file_id = Column(Integer, nullable=True) + + text = Column(Text) + + +class Vector(Base): + __tablename__ = 'vectors' + id = Column(Integer, primary_key=True, index=True) + chunk_id = Column(Integer, nullable=True) + embedding = Column(LargeBinary) diff --git a/pkg/rag/knowledge/RAG_Manager.py b/pkg/rag/knowledge/RAG_Manager.py index 6ded737a..9675371b 100644 --- a/pkg/rag/knowledge/RAG_Manager.py +++ b/pkg/rag/knowledge/RAG_Manager.py @@ -1,38 +1,42 @@ -# RAG_Manager class (main class, adjust imports as needed) -from __future__ import annotations # For type hinting in Python 3.7+ +# rag_manager.py +from __future__ import annotations import logging import os import asyncio +import json +import uuid from pkg.rag.knowledge.services.parser import FileParser from pkg.rag.knowledge.services.chunker import Chunker from pkg.rag.knowledge.services.embedder import Embedder from pkg.rag.knowledge.services.retriever import Retriever -from pkg.rag.knowledge.services.database import create_db_and_tables, SessionLocal, KnowledgeBase, File, Chunk # Ensure Chunk is imported if you need to manipulate it directly +from pkg.rag.knowledge.services.database import create_db_and_tables, SessionLocal, KnowledgeBase, File, Chunk from pkg.rag.knowledge.services.embedding_models import EmbeddingModelFactory from pkg.rag.knowledge.services.chroma_manager import ChromaIndexManager -from pkg.core import app # Adjust the import path as needed - +from pkg.core import app class RAG_Manager: - ap: app.Application - def __init__(self, ap: app.Application,logger: logging.Logger = None): + def __init__(self, ap: app.Application, logger: logging.Logger = None): self.ap = ap self.logger = logger or logging.getLogger(__name__) self.embedding_model_type = None self.embedding_model_name = None self.chroma_manager = None - self.parser = None - self.chunker = None + self.parser = FileParser() + self.chunker = Chunker() self.embedder = None self.retriever = None async def initialize_rag_system(self): + """Initializes the RAG system by creating database tables.""" await asyncio.to_thread(create_db_and_tables) - async def create_specific_model(self, embedding_model_type: str, - embedding_model_name: str): + async def create_specific_model(self, embedding_model_type: str, embedding_model_name: str): + """ + Creates and configures the specific embedding model and ChromaDB manager. + This must be called before performing embedding or retrieval operations. + """ self.embedding_model_type = embedding_model_type self.embedding_model_name = embedding_model_name @@ -47,52 +51,38 @@ class RAG_Manager: raise RuntimeError("Failed to initialize RAG_Manager due to embedding model issues.") self.chroma_manager = ChromaIndexManager(collection_name=f"rag_collection_{self.embedding_model_name.replace('-', '_')}") - - self.parser = FileParser() - self.chunker = Chunker() - # Pass chroma_manager to Embedder and Retriever - self.embedder = Embedder( - model_type=self.embedding_model_type, - model_name_key=self.embedding_model_name, - chroma_manager=self.chroma_manager # Inject dependency - ) - self.retriever = Retriever( - model_type=self.embedding_model_type, - model_name_key=self.embedding_model_name, - chroma_manager=self.chroma_manager # Inject dependency - ) - + self.embedder = Embedder(model_type=self.embedding_model_type, model_name_key=self.embedding_model_name, chroma_manager=self.chroma_manager) + self.retriever = Retriever(model_type=self.embedding_model_type, model_name_key=self.embedding_model_name, chroma_manager=self.chroma_manager) async def create_knowledge_base(self, kb_name: str, kb_description: str, embedding_model: str = "", top_k: int = 5): """ - Creates a new knowledge base with the given name and description. - If a knowledge base with the same name already exists, it returns that one. + Creates a new knowledge base if it doesn't already exist. """ try: - def _get_kb_sync(name): + if not self.embedding_model_type or not kb_name: + raise ValueError("Embedding model type and knowledge base name must be set before creating a knowledge base.") + def _create_kb_sync(): session = SessionLocal() try: - return session.query(KnowledgeBase).filter_by(name=name).first() - finally: - session.close() - - kb = await asyncio.to_thread(_get_kb_sync, kb_name) - - if not kb: - def _add_kb_sync(): - session = SessionLocal() - try: - new_kb = KnowledgeBase(name=kb_name, description=kb_description, embedding_model=embedding_model, top_k=top_k) + kb = session.query(KnowledgeBase).filter_by(name=kb_name).first() + if not kb: + id = uuid.uuid4().int + new_kb = KnowledgeBase(name=kb_name, description=kb_description, embedding_model=embedding_model, top_k=top_k,id=id) session.add(new_kb) session.commit() session.refresh(new_kb) - return new_kb - finally: - session.close() - kb = await asyncio.to_thread(_add_kb_sync) - except Exception as e: - self.logger.error(f"Error creating knowledge base '{kb_name}': {str(e)}", exc_info=True) - raise + self.logger.info(f"Knowledge Base '{kb_name}' created.") + return new_kb.id + else: + self.logger.info(f"Knowledge Base '{kb_name}' already exists.") + except Exception as e: + session.rollback() + self.logger.error(f"Error in _create_kb_sync for '{kb_name}': {str(e)}", exc_info=True) + raise + finally: + session.close() + + return await asyncio.to_thread(_create_kb_sync) except Exception as e: self.logger.error(f"Error creating knowledge base '{kb_name}': {str(e)}", exc_info=True) raise @@ -108,116 +98,124 @@ class RAG_Manager: return session.query(KnowledgeBase).all() finally: session.close() - - kbs = await asyncio.to_thread(_get_all_kbs_sync) - return kbs + return await asyncio.to_thread(_get_all_kbs_sync) except Exception as e: self.logger.error(f"Error retrieving knowledge bases: {str(e)}", exc_info=True) return [] - + async def get_knowledge_base_by_id(self, kb_id: int): """ - Retrieves a knowledge base by its ID. + Retrieves a specific knowledge base by its ID. """ try: - def _get_kb_sync(kb_id): + def _get_kb_sync(kb_id_param): session = SessionLocal() try: - return session.query(KnowledgeBase).filter_by(id=kb_id).first() + return session.query(KnowledgeBase).filter_by(id=kb_id_param).first() finally: session.close() - - kb = await asyncio.to_thread(_get_kb_sync, kb_id) - return kb + return await asyncio.to_thread(_get_kb_sync, kb_id) except Exception as e: self.logger.error(f"Error retrieving knowledge base with ID {kb_id}: {str(e)}", exc_info=True) return None - + async def get_files_by_knowledge_base(self, kb_id: int): + """ + Retrieves files associated with a specific knowledge base by querying the File table directly. + """ try: - def _get_files_sync(kb_id): + def _get_files_sync(kb_id_param): session = SessionLocal() try: - return session.query(File).filter_by(kb_id=kb_id).all() + return session.query(File).filter_by(kb_id=kb_id_param).all() finally: session.close() - - files = await asyncio.to_thread(_get_files_sync, kb_id) - return files + return await asyncio.to_thread(_get_files_sync, kb_id) except Exception as e: self.logger.error(f"Error retrieving files for knowledge base ID {kb_id}: {str(e)}", exc_info=True) return [] + async def get_all_files(self): + """ + Retrieves all files stored in the database, regardless of their association + with any specific knowledge base. + """ + try: + def _get_all_files_sync(): + session = SessionLocal() + try: + return session.query(File).all() + finally: + session.close() + return await asyncio.to_thread(_get_all_files_sync) + except Exception as e: + self.logger.error(f"Error retrieving all files: {str(e)}", exc_info=True) + return [] async def store_data(self, file_path: str, kb_name: str, file_type: str, kb_description: str = "Default knowledge base"): + """ + Parses, chunks, embeds, and stores data from a given file into the RAG system. + Associates the file with a knowledge base using kb_id in the File table. + """ self.logger.info(f"Starting data storage process for file: {file_path}") + session = SessionLocal() + file_obj = None + try: - def _get_kb_sync(name): - session = SessionLocal() - try: - return session.query(KnowledgeBase).filter_by(name=name).first() - finally: - session.close() - - kb = await asyncio.to_thread(_get_kb_sync, kb_name) - + # 1. 确保知识库存在或创建它 + kb = session.query(KnowledgeBase).filter_by(name=kb_name).first() if not kb: - self.logger.info(f"Knowledge Base '{kb_name}' not found. Creating a new one.") - def _add_kb_sync(): - session = SessionLocal() - try: - new_kb = KnowledgeBase(name=kb_name, description=kb_description) - session.add(new_kb) - session.commit() - session.refresh(new_kb) - return new_kb - finally: - session.close() - kb = await asyncio.to_thread(_add_kb_sync) - self.logger.info(f"Created Knowledge Base: {kb.name} (ID: {kb.id})") + kb = KnowledgeBase(name=kb_name, description=kb_description) + session.add(kb) + session.commit() + session.refresh(kb) + self.logger.info(f"Knowledge Base '{kb_name}' created during store_data.") + else: + self.logger.info(f"Knowledge Base '{kb_name}' already exists.") - def _add_file_sync(kb_id, file_name, path, file_type): - session = SessionLocal() - try: - file = File(kb_id=kb_id, file_name=file_name, path=path, file_type=file_type) - session.add(file) - session.commit() - session.refresh(file) - return file - finally: - session.close() - - file_obj = await asyncio.to_thread(_add_file_sync, kb.id, os.path.basename(file_path), file_path, file_type) - self.logger.info(f"Added file entry: {file_obj.file_name} (ID: {file_obj.id})") - - text = await self.parser.parse(file_path) - if not text: - self.logger.warning(f"File {file_path} parsed to empty content. Skipping chunking and embedding.") - # You might want to delete the file_obj from the DB here if it's empty. - session = SessionLocal() - try: - session.delete(file_obj) - session.commit() - except Exception as del_e: - self.logger.error(f"Failed to delete empty file_obj {file_obj.id}: {del_e}") - finally: - session.close() + # 2. 添加文件记录到数据库,并直接关联 kb_id + file_name = os.path.basename(file_path) + existing_file = session.query(File).filter_by(kb_id=kb.id, file_name=file_name).first() + if existing_file: + self.logger.warning(f"File '{file_name}' already exists in knowledge base '{kb_name}'. Skipping storage.") return + file_obj = File(kb_id=kb.id, file_name=file_name, path=file_path, file_type=file_type) + session.add(file_obj) + session.commit() + session.refresh(file_obj) + self.logger.info(f"File record '{file_name}' added to database with ID: {file_obj.id}, associated with KB ID: {kb.id}") + + # 3. 解析文件内容 + text = await self.parser.parse(file_path) + if not text: + self.logger.warning(f"No text extracted from file {file_path}. Deleting file record ID: {file_obj.id}.") + session.delete(file_obj) + session.commit() # 提交删除操作 + return + + # 4. 分块并嵌入/存储块 chunks_texts = await self.chunker.chunk(text) - self.logger.info(f"Chunked into {len(chunks_texts)} pieces.") - - # embed_and_store now handles both DB chunk saving and Chroma embedding + self.logger.info(f"Chunked file '{file_name}' into {len(chunks_texts)} chunks.") await self.embedder.embed_and_store(file_id=file_obj.id, chunks=chunks_texts) - self.logger.info(f"Data storage process completed for file: {file_path}") except Exception as e: + session.rollback() self.logger.error(f"Error in store_data for file {file_path}: {str(e)}", exc_info=True) - # Consider cleaning up partially stored data if an error occurs. - return + if file_obj and file_obj.id: + try: + await asyncio.to_thread(self.chroma_manager.delete_by_file_id_sync, file_obj.id) + except Exception as chroma_e: + self.logger.warning(f"Could not clean up ChromaDB entries for file_id {file_obj.id} after store_data failure: {chroma_e}") + raise + finally: + session.close() async def retrieve_data(self, query: str): + """ + Retrieves relevant data chunks based on a given query using the configured retriever. + """ self.logger.info(f"Starting data retrieval process for query: '{query}'") try: retrieved_chunks = await self.retriever.retrieve(query) @@ -229,60 +227,140 @@ class RAG_Manager: async def delete_data_by_file_id(self, file_id: int): """ - Deletes data associated with a specific file_id from both the relational DB and Chroma. + Deletes all data associated with a specific file ID, including its chunks and vectors, + and the file record itself. """ self.logger.info(f"Starting data deletion process for file_id: {file_id}") session = SessionLocal() try: - # 1. Delete from Chroma + # 1. 从 ChromaDB 删除 embeddings await asyncio.to_thread(self.chroma_manager.delete_by_file_id_sync, file_id) + self.logger.info(f"Deleted embeddings from ChromaDB for file_id: {file_id}") - # 2. Delete chunks from relational DB + # 2. 删除与文件关联的 chunks 记录 chunks_to_delete = session.query(Chunk).filter_by(file_id=file_id).all() for chunk in chunks_to_delete: session.delete(chunk) - self.logger.info(f"Deleted {len(chunks_to_delete)} chunks from relational DB for file_id: {file_id}.") + self.logger.info(f"Deleted {len(chunks_to_delete)} chunk records for file_id: {file_id}") - # 3. Delete file entry from relational DB + # 3. 删除文件记录本身 file_to_delete = session.query(File).filter_by(id=file_id).first() if file_to_delete: session.delete(file_to_delete) - self.logger.info(f"Deleted file entry {file_id} from relational DB.") + self.logger.info(f"Deleted file record for file_id: {file_id}") else: - self.logger.warning(f"File entry {file_id} not found in relational DB.") + self.logger.warning(f"File with ID {file_id} not found in database. Skipping deletion of file record.") session.commit() - self.logger.info(f"Data deletion completed for file_id: {file_id}.") + self.logger.info(f"Successfully completed data deletion for file_id: {file_id}") except Exception as e: session.rollback() self.logger.error(f"Error deleting data for file_id {file_id}: {str(e)}", exc_info=True) + raise finally: session.close() async def delete_kb_by_id(self, kb_id: int): """ - Deletes a knowledge base and all associated files and chunks. + Deletes a knowledge base and all associated files, chunks, and vectors. + This involves querying for associated files and then deleting them. """ self.logger.info(f"Starting deletion of knowledge base with ID: {kb_id}") - session = SessionLocal() + session = SessionLocal() # 使用新的会话来获取 KB 和关联文件 + try: - # 1. Get the knowledge base - kb = session.query(KnowledgeBase).filter_by(id=kb_id).first() - if not kb: + kb_to_delete = session.query(KnowledgeBase).filter_by(id=kb_id).first() + if not kb_to_delete: self.logger.warning(f"Knowledge Base with ID {kb_id} not found.") return - # 2. Delete all files associated with this knowledge base - files_to_delete = session.query(File).filter_by(kb_id=kb.id).all() - for file in files_to_delete: - await self.delete_data_by_file_id(file.id) + # 获取所有关联的文件,通过 File 表的 kb_id 字段查询 + files_to_delete = session.query(File).filter_by(kb_id=kb_id).all() + + # 关闭当前会话,因为 delete_data_by_file_id 会创建自己的会话 + session.close() - # 3. Delete the knowledge base itself - session.delete(kb) + # 遍历删除每个关联文件及其数据 + for file_obj in files_to_delete: + try: + await self.delete_data_by_file_id(file_obj.id) + except Exception as file_del_e: + self.logger.error(f"Failed to delete file ID {file_obj.id} during KB deletion: {file_del_e}") + # 记录错误但继续,尝试删除其他文件 + + # 所有文件删除完毕后,重新打开会话来删除 KnowledgeBase 本身 + session = SessionLocal() + try: + # 重新查询,确保对象是当前会话的一部分 + kb_final_delete = session.query(KnowledgeBase).filter_by(id=kb_id).first() + if kb_final_delete: + session.delete(kb_final_delete) + session.commit() + self.logger.info(f"Successfully deleted knowledge base with ID: {kb_id}") + else: + self.logger.warning(f"Knowledge Base with ID {kb_id} not found after file deletion, skipping KB deletion.") + except Exception as kb_del_e: + session.rollback() + self.logger.error(f"Error deleting KnowledgeBase record for ID {kb_id}: {kb_del_e}", exc_info=True) + raise + finally: + session.close() + + except Exception as e: + # 如果在最初获取 KB 或文件列表时出错 + if session.is_active: + session.rollback() + self.logger.error(f"Error during overall knowledge base deletion for ID {kb_id}: {str(e)}", exc_info=True) + raise + finally: + if session.is_active: + session.close() + + + + async def get_file_content_by_file_id(self, file_id: str) -> str: + + file_bytes = await self.ap.storage_mgr.storage_provider.load(file_id) + + _, ext = os.path.splitext(file_id.lower()) + ext = ext.lstrip('.') + + try: + text = file_bytes.decode("utf-8") + except UnicodeDecodeError: + return "[非文本文件或编码无法识别]" + + if ext in ["txt", "md", "csv", "log", "py", "html"]: + return text + else: + return f"[未知类型: .{ext}]" + + async def relate_file_id_with_kb(self, knowledge_base_uuid: str, file_id: str) -> None: + """ + Associates a file with a knowledge base by updating the kb_id in the File table. + """ + self.logger.info(f"Associating file ID {file_id} with knowledge base UUID {knowledge_base_uuid}") + session = SessionLocal() + try: + # 查询知识库是否存在 + kb = session.query(KnowledgeBase).filter_by(id=knowledge_base_uuid).first() + if not kb: + self.logger.error(f"Knowledge Base with UUID {knowledge_base_uuid} not found.") + return + + # 更新文件的 kb_id + file_to_update = session.query(File).filter_by(id=file_id).first() + if not file_to_update: + self.logger.error(f"File with ID {file_id} not found.") + return + + file_to_update.kb_id = kb.id session.commit() - self.logger.info(f"Successfully deleted knowledge base with ID: {kb_id}") + self.logger.info(f"Successfully associated file ID {file_id} with knowledge base UUID {knowledge_base_uuid}") except Exception as e: session.rollback() - self.logger.error(f"Error deleting knowledge base with ID {kb_id}: {str(e)}", exc_info=True) + self.logger.error(f"Error associating file ID {file_id} with knowledge base UUID {knowledge_base_uuid}: {str(e)}", exc_info=True) finally: session.close() + + diff --git a/pkg/rag/knowledge/services/database.py b/pkg/rag/knowledge/services/database.py index 35a52453..bc5caa10 100644 --- a/pkg/rag/knowledge/services/database.py +++ b/pkg/rag/knowledge/services/database.py @@ -1,64 +1,23 @@ -from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, ForeignKey, LargeBinary -from sqlalchemy.orm import declarative_base, sessionmaker, relationship -from datetime import datetime +# 全部迁移过去 -Base = declarative_base() +from pkg.entity.persistence.rag import ( + create_db_and_tables, + SessionLocal, + Base, + engine, + KnowledgeBase, + File, + Chunk, + Vector, +) - -class KnowledgeBase(Base): - __tablename__ = 'kb' - id = Column(Integer, primary_key=True, index=True) - name = Column(String, index=True) - description = Column(Text) - created_at = Column(DateTime, default=datetime.utcnow) - embedding_model = Column(String, default='') # 默认嵌入模型 - top_k = Column(Integer, default=5) # 默认返回的top_k数量 - files = relationship('File', back_populates='knowledge_base') - - -class File(Base): - __tablename__ = 'file' - id = Column(Integer, primary_key=True, index=True) - kb_id = Column(Integer, ForeignKey('kb.id')) - file_name = Column(String) - path = Column(String) - created_at = Column(DateTime, default=datetime.utcnow) - file_type = Column(String) - status = Column(Integer, default=0) # 0: 未处理, 1: 处理中, 2: 已处理, 3: 错误 - knowledge_base = relationship('KnowledgeBase', back_populates='files') - chunks = relationship('Chunk', back_populates='file') - - -class Chunk(Base): - __tablename__ = 'chunks' - id = Column(Integer, primary_key=True, index=True) - file_id = Column(Integer, ForeignKey('file.id')) - text = Column(Text) - - file = relationship('File', back_populates='chunks') - vector = relationship('Vector', uselist=False, back_populates='chunk') # One-to-one - - -class Vector(Base): - __tablename__ = 'vectors' - id = Column(Integer, primary_key=True, index=True) - chunk_id = Column(Integer, ForeignKey('chunks.id'), unique=True) - embedding = Column(LargeBinary) # Store embeddings as binary - - chunk = relationship('Chunk', back_populates='vector') - - -# 数据库连接 -DATABASE_URL = 'sqlite:///./knowledge_base.db' # 生产环境请更换为 PostgreSQL/MySQL -engine = create_engine(DATABASE_URL, connect_args={'check_same_thread': False} if 'sqlite' in DATABASE_URL else {}) -SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) - - -# 创建所有表 (可以在应用启动时执行一次) -def create_db_and_tables(): - Base.metadata.create_all(bind=engine) - print('Database tables created/checked.') - - -# 定义嵌入维度(请根据你实际使用的模型调整) -EMBEDDING_DIM = 1024 +__all__ = [ + "create_db_and_tables", + "SessionLocal", + "Base", + "engine", + "KnowledgeBase", + "File", + "Chunk", + "Vector", +] From 4d0a39eb6513e2b88c35adabcba5aa7dde1e73f9 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Thu, 10 Jul 2025 11:01:16 +0800 Subject: [PATCH 038/257] chore: switch comments to en --- pkg/api/http/controller/group.py | 24 +++++----- .../controller/groups/pipelines/webchat.py | 6 +-- pkg/api/http/controller/groups/plugins.py | 6 +-- pkg/api/http/controller/groups/user.py | 10 ++-- pkg/api/http/controller/main.py | 2 +- pkg/api/http/service/bot.py | 12 ++--- pkg/config/impls/json.py | 4 +- pkg/config/impls/pymodule.py | 12 ++--- pkg/config/impls/yaml.py | 4 +- pkg/config/manager.py | 48 +++++++++---------- pkg/config/model.py | 8 ++-- pkg/core/app.py | 18 +++---- pkg/core/boot.py | 10 ++-- pkg/core/bootutils/deps.py | 6 +-- pkg/core/bootutils/log.py | 12 ++--- pkg/core/notes/n001_classic_msgs.py | 2 +- .../notes/n002_selection_mode_on_windows.py | 7 ++- pkg/core/stages/build_app.py | 6 +-- pkg/core/stages/genkeys.py | 4 +- pkg/core/stages/load_config.py | 4 +- pkg/core/stages/migrate.py | 11 +++-- pkg/core/stages/setup_logger.py | 10 ++-- pkg/core/stages/show_notes.py | 4 +- 23 files changed, 119 insertions(+), 111 deletions(-) diff --git a/pkg/api/http/controller/group.py b/pkg/api/http/controller/group.py index ce366539..73780208 100644 --- a/pkg/api/http/controller/group.py +++ b/pkg/api/http/controller/group.py @@ -11,11 +11,11 @@ from ....core import app preregistered_groups: list[type[RouterGroup]] = [] -"""RouterGroup 的预注册列表""" +"""Pre-registered list of RouterGroup""" def group_class(name: str, path: str) -> None: - """注册一个 RouterGroup""" + """Register a RouterGroup""" def decorator(cls: typing.Type[RouterGroup]) -> typing.Type[RouterGroup]: cls.name = name @@ -27,7 +27,7 @@ def group_class(name: str, path: str) -> None: class AuthType(enum.Enum): - """认证类型""" + """Authentication type""" NONE = 'none' USER_TOKEN = 'user-token' @@ -56,7 +56,7 @@ class RouterGroup(abc.ABC): auth_type: AuthType = AuthType.USER_TOKEN, **options: typing.Any, ) -> typing.Callable[[RouteCallable], RouteCallable]: # decorator - """注册一个路由""" + """Register a route""" def decorator(f: RouteCallable) -> RouteCallable: nonlocal rule @@ -64,11 +64,11 @@ class RouterGroup(abc.ABC): async def handler_error(*args, **kwargs): if auth_type == AuthType.USER_TOKEN: - # 从Authorization头中获取token + # get token from Authorization header token = quart.request.headers.get('Authorization', '').replace('Bearer ', '') if not token: - return self.http_status(401, -1, '未提供有效的用户令牌') + return self.http_status(401, -1, 'No valid user token provided') try: user_email = await self.ap.user_service.verify_jwt_token(token) @@ -76,9 +76,9 @@ class RouterGroup(abc.ABC): # check if this account exists user = await self.ap.user_service.get_user_by_email(user_email) if not user: - return self.http_status(401, -1, '用户不存在') + return self.http_status(401, -1, 'User not found') - # 检查f是否接受user_email参数 + # check if f accepts user_email parameter if 'user_email' in f.__code__.co_varnames: kwargs['user_email'] = user_email except Exception as e: @@ -86,7 +86,7 @@ class RouterGroup(abc.ABC): try: return await f(*args, **kwargs) - except Exception: # 自动 500 + except Exception: # auto 500 traceback.print_exc() # return self.http_status(500, -2, str(e)) return self.http_status(500, -2, 'internal server error') @@ -101,7 +101,7 @@ class RouterGroup(abc.ABC): return decorator def success(self, data: typing.Any = None) -> quart.Response: - """返回一个 200 响应""" + """Return a 200 response""" return quart.jsonify( { 'code': 0, @@ -111,7 +111,7 @@ class RouterGroup(abc.ABC): ) def fail(self, code: int, msg: str) -> quart.Response: - """返回一个异常响应""" + """Return an error response""" return quart.jsonify( { @@ -121,5 +121,5 @@ class RouterGroup(abc.ABC): ) def http_status(self, status: int, code: int, msg: str) -> quart.Response: - """返回一个指定状态码的响应""" + """Return a response with a specified status code""" return self.fail(code, msg), status diff --git a/pkg/api/http/controller/groups/pipelines/webchat.py b/pkg/api/http/controller/groups/pipelines/webchat.py index 005738db..c8c8db54 100644 --- a/pkg/api/http/controller/groups/pipelines/webchat.py +++ b/pkg/api/http/controller/groups/pipelines/webchat.py @@ -8,7 +8,7 @@ class WebChatDebugRouterGroup(group.RouterGroup): async def initialize(self) -> None: @self.route('/send', methods=['POST']) async def send_message(pipeline_uuid: str) -> str: - """发送调试消息到流水线""" + """Send a message to the pipeline for debugging""" try: data = await quart.request.get_json() session_type = data.get('session_type', 'person') @@ -38,7 +38,7 @@ class WebChatDebugRouterGroup(group.RouterGroup): @self.route('/messages/', methods=['GET']) async def get_messages(pipeline_uuid: str, session_type: str) -> str: - """获取调试消息历史""" + """Get the message history of the pipeline for debugging""" try: if session_type not in ['person', 'group']: return self.http_status(400, -1, 'session_type must be person or group') @@ -57,7 +57,7 @@ class WebChatDebugRouterGroup(group.RouterGroup): @self.route('/reset/', methods=['POST']) async def reset_session(session_type: str) -> str: - """重置调试会话""" + """Reset the debug session""" try: if session_type not in ['person', 'group']: return self.http_status(400, -1, 'session_type must be person or group') diff --git a/pkg/api/http/controller/groups/plugins.py b/pkg/api/http/controller/groups/plugins.py index daf6ea7d..b7e0a5e9 100644 --- a/pkg/api/http/controller/groups/plugins.py +++ b/pkg/api/http/controller/groups/plugins.py @@ -40,7 +40,7 @@ class PluginsRouterGroup(group.RouterGroup): self.ap.plugin_mgr.update_plugin(plugin_name, task_context=ctx), kind='plugin-operation', name=f'plugin-update-{plugin_name}', - label=f'更新插件 {plugin_name}', + label=f'Updating plugin {plugin_name}', context=ctx, ) return self.success(data={'task_id': wrapper.id}) @@ -62,7 +62,7 @@ class PluginsRouterGroup(group.RouterGroup): self.ap.plugin_mgr.uninstall_plugin(plugin_name, task_context=ctx), kind='plugin-operation', name=f'plugin-remove-{plugin_name}', - label=f'删除插件 {plugin_name}', + label=f'Removing plugin {plugin_name}', context=ctx, ) @@ -102,7 +102,7 @@ class PluginsRouterGroup(group.RouterGroup): self.ap.plugin_mgr.install_plugin(data['source'], task_context=ctx), kind='plugin-operation', name='plugin-install-github', - label=f'安装插件 ...{short_source_str}', + label=f'Installing plugin ...{short_source_str}', context=ctx, ) diff --git a/pkg/api/http/controller/groups/user.py b/pkg/api/http/controller/groups/user.py index 3ad1335b..d8024107 100644 --- a/pkg/api/http/controller/groups/user.py +++ b/pkg/api/http/controller/groups/user.py @@ -14,7 +14,7 @@ class UserRouterGroup(group.RouterGroup): return self.success(data={'initialized': await self.ap.user_service.is_initialized()}) if await self.ap.user_service.is_initialized(): - return self.fail(1, '系统已初始化') + return self.fail(1, 'System already initialized') json_data = await quart.request.json @@ -32,7 +32,7 @@ class UserRouterGroup(group.RouterGroup): try: token = await self.ap.user_service.authenticate(json_data['user'], json_data['password']) except argon2.exceptions.VerifyMismatchError: - return self.fail(1, '用户名或密码错误') + return self.fail(1, 'Invalid username or password') return self.success(data={'token': token}) @@ -54,15 +54,15 @@ class UserRouterGroup(group.RouterGroup): await asyncio.sleep(3) if not await self.ap.user_service.is_initialized(): - return self.http_status(400, -1, 'system not initialized') + return self.http_status(400, -1, 'System not initialized') user_obj = await self.ap.user_service.get_user_by_email(user_email) if user_obj is None: - return self.http_status(400, -1, 'user not found') + return self.http_status(400, -1, 'User not found') if recovery_key != self.ap.instance_config.data['system']['recovery_key']: - return self.http_status(403, -1, 'invalid recovery key') + return self.http_status(403, -1, 'Invalid recovery key') await self.ap.user_service.reset_password(user_email, new_password) diff --git a/pkg/api/http/controller/main.py b/pkg/api/http/controller/main.py index eb434d88..0191ead5 100644 --- a/pkg/api/http/controller/main.py +++ b/pkg/api/http/controller/main.py @@ -45,7 +45,7 @@ class HTTPController: try: await self.quart_app.run_task(*args, **kwargs) except Exception as e: - self.ap.logger.error(f'启动 HTTP 服务失败: {e}') + self.ap.logger.error(f'Failed to start HTTP service: {e}') self.ap.task_mgr.create_task( exception_handler( diff --git a/pkg/api/http/service/bot.py b/pkg/api/http/service/bot.py index e5010007..adf19d03 100644 --- a/pkg/api/http/service/bot.py +++ b/pkg/api/http/service/bot.py @@ -10,7 +10,7 @@ from ....entity.persistence import pipeline as persistence_pipeline class BotService: - """机器人服务""" + """Bot service""" ap: app.Application @@ -18,7 +18,7 @@ class BotService: self.ap = ap async def get_bots(self) -> list[dict]: - """获取所有机器人""" + """Get all bots""" result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_bot.Bot)) bots = result.all() @@ -26,7 +26,7 @@ class BotService: return [self.ap.persistence_mgr.serialize_model(persistence_bot.Bot, bot) for bot in bots] async def get_bot(self, bot_uuid: str) -> dict | None: - """获取机器人""" + """Get bot""" result = await self.ap.persistence_mgr.execute_async( sqlalchemy.select(persistence_bot.Bot).where(persistence_bot.Bot.uuid == bot_uuid) ) @@ -39,7 +39,7 @@ class BotService: return self.ap.persistence_mgr.serialize_model(persistence_bot.Bot, bot) async def create_bot(self, bot_data: dict) -> str: - """创建机器人""" + """Create bot""" # TODO: 检查配置信息格式 bot_data['uuid'] = str(uuid.uuid4()) @@ -63,7 +63,7 @@ class BotService: return bot_data['uuid'] async def update_bot(self, bot_uuid: str, bot_data: dict) -> None: - """更新机器人""" + """Update bot""" if 'uuid' in bot_data: del bot_data['uuid'] @@ -99,7 +99,7 @@ class BotService: session.using_conversation = None async def delete_bot(self, bot_uuid: str) -> None: - """删除机器人""" + """Delete bot""" await self.ap.platform_mgr.remove_bot(bot_uuid) await self.ap.persistence_mgr.execute_async( sqlalchemy.delete(persistence_bot.Bot).where(persistence_bot.Bot.uuid == bot_uuid) diff --git a/pkg/config/impls/json.py b/pkg/config/impls/json.py index 07fc533c..44b4843c 100644 --- a/pkg/config/impls/json.py +++ b/pkg/config/impls/json.py @@ -6,7 +6,7 @@ from .. import model as file_model class JSONConfigFile(file_model.ConfigFile): - """JSON配置文件""" + """JSON config file""" def __init__( self, @@ -42,7 +42,7 @@ class JSONConfigFile(file_model.ConfigFile): try: cfg = json.load(f) except json.JSONDecodeError as e: - raise Exception(f'配置文件 {self.config_file_name} 语法错误: {e}') + raise Exception(f'Syntax error in config file {self.config_file_name}: {e}') if completion: for key in self.template_data: diff --git a/pkg/config/impls/pymodule.py b/pkg/config/impls/pymodule.py index 2311992e..c3d04bc8 100644 --- a/pkg/config/impls/pymodule.py +++ b/pkg/config/impls/pymodule.py @@ -7,13 +7,13 @@ from .. import model as file_model class PythonModuleConfigFile(file_model.ConfigFile): - """Python模块配置文件""" + """Python module config file""" config_file_name: str = None - """配置文件名""" + """Config file name""" template_file_name: str = None - """模板文件名""" + """Template file name""" def __init__(self, config_file_name: str, template_file_name: str) -> None: self.config_file_name = config_file_name @@ -42,7 +42,7 @@ class PythonModuleConfigFile(file_model.ConfigFile): cfg[key] = getattr(module, key) - # 从模板模块文件中进行补全 + # complete from template module file if completion: module_name = os.path.splitext(os.path.basename(self.template_file_name))[0] module = importlib.import_module(module_name) @@ -60,7 +60,7 @@ class PythonModuleConfigFile(file_model.ConfigFile): return cfg async def save(self, data: dict): - logging.warning('Python模块配置文件不支持保存') + logging.warning('Python module config file does not support saving') def save_sync(self, data: dict): - logging.warning('Python模块配置文件不支持保存') + logging.warning('Python module config file does not support saving') diff --git a/pkg/config/impls/yaml.py b/pkg/config/impls/yaml.py index 55045186..0d69ef9e 100644 --- a/pkg/config/impls/yaml.py +++ b/pkg/config/impls/yaml.py @@ -6,7 +6,7 @@ from .. import model as file_model class YAMLConfigFile(file_model.ConfigFile): - """YAML配置文件""" + """YAML config file""" def __init__( self, @@ -42,7 +42,7 @@ class YAMLConfigFile(file_model.ConfigFile): try: cfg = yaml.load(f, Loader=yaml.FullLoader) except yaml.YAMLError as e: - raise Exception(f'配置文件 {self.config_file_name} 语法错误: {e}') + raise Exception(f'Syntax error in config file {self.config_file_name}: {e}') if completion: for key in self.template_data: diff --git a/pkg/config/manager.py b/pkg/config/manager.py index c2e6bdf4..d552b038 100644 --- a/pkg/config/manager.py +++ b/pkg/config/manager.py @@ -5,27 +5,27 @@ from .impls import pymodule, json as json_file, yaml as yaml_file class ConfigManager: - """配置文件管理器""" + """Config file manager""" name: str = None - """配置管理器名""" + """Config manager name""" description: str = None - """配置管理器描述""" + """Config manager description""" schema: dict = None - """配置文件 schema - 需要符合 JSON Schema Draft 7 规范 + """Config file schema + Must conform to JSON Schema Draft 7 specification """ file: file_model.ConfigFile = None - """配置文件实例""" + """Config file instance""" data: dict = None - """配置数据""" + """Config data""" doc_link: str = None - """配置文件文档链接""" + """Config file documentation link""" def __init__(self, cfg_file: file_model.ConfigFile) -> None: self.file = cfg_file @@ -42,15 +42,15 @@ class ConfigManager: async def load_python_module_config(config_name: str, template_name: str, completion: bool = True) -> ConfigManager: - """加载Python模块配置文件 + """Load Python module config file Args: - config_name (str): 配置文件名 - template_name (str): 模板文件名 - completion (bool): 是否自动补全内存中的配置文件 + config_name (str): Config file name + template_name (str): Template file name + completion (bool): Whether to automatically complete the config file in memory Returns: - ConfigManager: 配置文件管理器 + ConfigManager: Config file manager """ cfg_inst = pymodule.PythonModuleConfigFile(config_name, template_name) @@ -66,13 +66,13 @@ async def load_json_config( template_data: dict = None, completion: bool = True, ) -> ConfigManager: - """加载JSON配置文件 + """Load JSON config file Args: - config_name (str): 配置文件名 - template_name (str): 模板文件名 - template_data (dict): 模板数据 - completion (bool): 是否自动补全内存中的配置文件 + config_name (str): Config file name + template_name (str): Template file name + template_data (dict): Template data + completion (bool): Whether to automatically complete the config file in memory """ cfg_inst = json_file.JSONConfigFile(config_name, template_name, template_data) @@ -88,16 +88,16 @@ async def load_yaml_config( template_data: dict = None, completion: bool = True, ) -> ConfigManager: - """加载YAML配置文件 + """Load YAML config file Args: - config_name (str): 配置文件名 - template_name (str): 模板文件名 - template_data (dict): 模板数据 - completion (bool): 是否自动补全内存中的配置文件 + config_name (str): Config file name + template_name (str): Template file name + template_data (dict): Template data + completion (bool): Whether to automatically complete the config file in memory Returns: - ConfigManager: 配置文件管理器 + ConfigManager: Config file manager """ cfg_inst = yaml_file.YAMLConfigFile(config_name, template_name, template_data) diff --git a/pkg/config/model.py b/pkg/config/model.py index f3536804..8b040f05 100644 --- a/pkg/config/model.py +++ b/pkg/config/model.py @@ -2,16 +2,16 @@ import abc class ConfigFile(metaclass=abc.ABCMeta): - """配置文件抽象类""" + """Config file abstract class""" config_file_name: str = None - """配置文件名""" + """Config file name""" template_file_name: str = None - """模板文件名""" + """Template file name""" template_data: dict = None - """模板数据""" + """Template data""" @abc.abstractmethod def exists(self) -> bool: diff --git a/pkg/core/app.py b/pkg/core/app.py index 911acd3d..23ce2759 100644 --- a/pkg/core/app.py +++ b/pkg/core/app.py @@ -30,7 +30,7 @@ from . import entities as core_entities class Application: - """运行时应用对象和上下文""" + """Runtime application object and context""" event_loop: asyncio.AbstractEventLoop = None @@ -47,10 +47,10 @@ class Application: model_mgr: llm_model_mgr.ModelManager = None - # TODO 移动到 pipeline 里 + # TODO move to pipeline tool_mgr: llm_tool_mgr.ToolManager = None - # ======= 配置管理器 ======= + # ======= Config manager ======= command_cfg: config_mgr.ConfigManager = None # deprecated @@ -64,7 +64,7 @@ class Application: instance_config: config_mgr.ConfigManager = None - # ======= 元数据配置管理器 ======= + # ======= Metadata config manager ======= sensitive_meta: config_mgr.ConfigManager = None @@ -154,11 +154,11 @@ class Application: except asyncio.CancelledError: pass except Exception as e: - self.logger.error(f'应用运行致命异常: {e}') + self.logger.error(f'Application runtime fatal exception: {e}') self.logger.debug(f'Traceback: {traceback.format_exc()}') async def print_web_access_info(self): - """打印访问 webui 的提示""" + """Print access webui tips""" if not os.path.exists(os.path.join('.', 'web/out')): self.logger.warning('WebUI 文件缺失,请根据文档部署:https://docs.langbot.app/zh') @@ -190,7 +190,7 @@ class Application: ): match scope: case core_entities.LifecycleControlScope.PLATFORM.value: - self.logger.info('执行热重载 scope=' + scope) + self.logger.info('Hot reload scope=' + scope) await self.platform_mgr.shutdown() self.platform_mgr = im_mgr.PlatformManager(self) @@ -206,7 +206,7 @@ class Application: ], ) case core_entities.LifecycleControlScope.PLUGIN.value: - self.logger.info('执行热重载 scope=' + scope) + self.logger.info('Hot reload scope=' + scope) await self.plugin_mgr.destroy_plugins() # 删除 sys.module 中所有的 plugins/* 下的模块 @@ -222,7 +222,7 @@ class Application: await self.plugin_mgr.load_plugins() await self.plugin_mgr.initialize_plugins() case core_entities.LifecycleControlScope.PROVIDER.value: - self.logger.info('执行热重载 scope=' + scope) + self.logger.info('Hot reload scope=' + scope) await self.tool_mgr.shutdown() diff --git a/pkg/core/boot.py b/pkg/core/boot.py index aff117e6..b8243d4a 100644 --- a/pkg/core/boot.py +++ b/pkg/core/boot.py @@ -8,7 +8,7 @@ from . import app from . import stage from ..utils import constants, importutil -# 引入启动阶段实现以便注册 +# Import startup stage implementation to register from . import stages importutil.import_modules_in_pkg(stages) @@ -25,7 +25,7 @@ stage_order = [ async def make_app(loop: asyncio.AbstractEventLoop) -> app.Application: - # 确定是否为调试模式 + # Determine if it is debug mode if 'DEBUG' in os.environ and os.environ['DEBUG'] in ['true', '1']: constants.debug_mode = True @@ -33,7 +33,7 @@ async def make_app(loop: asyncio.AbstractEventLoop) -> app.Application: ap.event_loop = loop - # 执行启动阶段 + # Execute startup stage for stage_name in stage_order: stage_cls = stage.preregistered_stages[stage_name] stage_inst = stage_cls() @@ -47,11 +47,11 @@ async def make_app(loop: asyncio.AbstractEventLoop) -> app.Application: async def main(loop: asyncio.AbstractEventLoop): try: - # 挂系统信号处理 + # Hang system signal processing import signal def signal_handler(sig, frame): - print('[Signal] 程序退出.') + print('[Signal] Program exit.') # ap.shutdown() os._exit(0) diff --git a/pkg/core/bootutils/deps.py b/pkg/core/bootutils/deps.py index b403bf8d..1a439af8 100644 --- a/pkg/core/bootutils/deps.py +++ b/pkg/core/bootutils/deps.py @@ -2,8 +2,8 @@ import pip import os from ...utils import pkgmgr -# 检查依赖,防止用户未安装 -# 左边为引入名称,右边为依赖名称 +# Check dependencies to prevent users from not installing +# Left is the import name, right is the dependency name required_deps = { 'requests': 'requests', 'openai': 'openai', @@ -65,7 +65,7 @@ async def install_deps(deps: list[str]): async def precheck_plugin_deps(): print('[Startup] Prechecking plugin dependencies...') - # 只有在plugins目录存在时才执行插件依赖安装 + # Only execute plugin dependency installation when the plugins directory exists if os.path.exists('plugins'): for dir in os.listdir('plugins'): subdir = os.path.join('plugins', dir) diff --git a/pkg/core/bootutils/log.py b/pkg/core/bootutils/log.py index eb6806fa..631b05e2 100644 --- a/pkg/core/bootutils/log.py +++ b/pkg/core/bootutils/log.py @@ -17,7 +17,7 @@ log_colors_config = { async def init_logging(extra_handlers: list[logging.Handler] = None) -> logging.Logger: - # 删除所有现有的logger + # Remove all existing loggers for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) @@ -54,13 +54,13 @@ async def init_logging(extra_handlers: list[logging.Handler] = None) -> logging. handler.setFormatter(color_formatter) qcg_logger.addHandler(handler) - qcg_logger.debug('日志初始化完成,日志级别:%s' % level) + qcg_logger.debug('Logging initialized, log level: %s' % level) logging.basicConfig( - level=logging.CRITICAL, # 设置日志输出格式 + level=logging.CRITICAL, # Set log output format format='[DEPR][%(asctime)s.%(msecs)03d] %(pathname)s (%(lineno)d) - [%(levelname)s] :\n%(message)s', - # 日志输出的格式 - # -8表示占位符,让输出左对齐,输出长度都为8位 - datefmt='%Y-%m-%d %H:%M:%S', # 时间输出的格式 + # Log output format + # -8 is a placeholder, left-align the output, and output length is 8 + datefmt='%Y-%m-%d %H:%M:%S', # Time output format handlers=[logging.NullHandler()], ) diff --git a/pkg/core/notes/n001_classic_msgs.py b/pkg/core/notes/n001_classic_msgs.py index 3f3bd8e0..265ddbe9 100644 --- a/pkg/core/notes/n001_classic_msgs.py +++ b/pkg/core/notes/n001_classic_msgs.py @@ -7,7 +7,7 @@ from .. import note @note.note_class('ClassicNotes', 1) class ClassicNotes(note.LaunchNote): - """经典启动信息""" + """Classic launch information""" async def need_show(self) -> bool: return True diff --git a/pkg/core/notes/n002_selection_mode_on_windows.py b/pkg/core/notes/n002_selection_mode_on_windows.py index 23bff24a..16028de1 100644 --- a/pkg/core/notes/n002_selection_mode_on_windows.py +++ b/pkg/core/notes/n002_selection_mode_on_windows.py @@ -9,7 +9,7 @@ from .. import note @note.note_class('SelectionModeOnWindows', 2) class SelectionModeOnWindows(note.LaunchNote): - """Windows 上的选择模式提示信息""" + """Selection mode prompt information on Windows""" async def need_show(self) -> bool: return os.name == 'nt' @@ -19,3 +19,8 @@ class SelectionModeOnWindows(note.LaunchNote): """您正在使用 Windows 系统,若窗口左上角显示处于”选择“模式,程序将被暂停运行,此时请右键窗口中空白区域退出选择模式。""", logging.INFO, ) + + yield ( + """You are using Windows system, if the top left corner of the window displays "Selection" mode, the program will be paused running, please right-click on the blank area in the window to exit the selection mode.""", + logging.INFO, + ) diff --git a/pkg/core/stages/build_app.py b/pkg/core/stages/build_app.py index 6ee35610..d4b443cf 100644 --- a/pkg/core/stages/build_app.py +++ b/pkg/core/stages/build_app.py @@ -24,10 +24,10 @@ from .. import taskmgr @stage.stage_class('BuildAppStage') class BuildAppStage(stage.BootingStage): - """构建应用阶段""" + """Build LangBot application""" async def run(self, ap: app.Application): - """构建app对象的各个组件对象并初始化""" + """Build LangBot application""" ap.task_mgr = taskmgr.AsyncTaskManager(ap) discover = discover_engine.ComponentDiscoveryEngine(ap) @@ -42,7 +42,7 @@ class BuildAppStage(stage.BootingStage): await ver_mgr.initialize() ap.ver_mgr = ver_mgr - # 发送公告 + # Send announcement ann_mgr = announce.AnnouncementManager(ap) ap.ann_mgr = ann_mgr diff --git a/pkg/core/stages/genkeys.py b/pkg/core/stages/genkeys.py index 50e7cf7b..f0412b9d 100644 --- a/pkg/core/stages/genkeys.py +++ b/pkg/core/stages/genkeys.py @@ -7,10 +7,10 @@ from .. import stage, app @stage.stage_class('GenKeysStage') class GenKeysStage(stage.BootingStage): - """生成密钥阶段""" + """Generate keys stage""" async def run(self, ap: app.Application): - """启动""" + """Generate keys""" if not ap.instance_config.data['system']['jwt']['secret']: ap.instance_config.data['system']['jwt']['secret'] = secrets.token_hex(16) diff --git a/pkg/core/stages/load_config.py b/pkg/core/stages/load_config.py index ef5f611b..0474b33a 100644 --- a/pkg/core/stages/load_config.py +++ b/pkg/core/stages/load_config.py @@ -8,10 +8,10 @@ from ..bootutils import config @stage.stage_class('LoadConfigStage') class LoadConfigStage(stage.BootingStage): - """加载配置文件阶段""" + """Load config file stage""" async def run(self, ap: app.Application): - """启动""" + """Load config file""" # ======= deprecated ======= if os.path.exists('data/config/command.json'): diff --git a/pkg/core/stages/migrate.py b/pkg/core/stages/migrate.py index 02b03256..229e0060 100644 --- a/pkg/core/stages/migrate.py +++ b/pkg/core/stages/migrate.py @@ -11,10 +11,13 @@ importutil.import_modules_in_pkg(migrations) @stage.stage_class('MigrationStage') class MigrationStage(stage.BootingStage): - """迁移阶段""" + """Migration stage + + These migrations are legacy, only performed in version 3.x + """ async def run(self, ap: app.Application): - """启动""" + """Run migration""" if any( [ @@ -29,7 +32,7 @@ class MigrationStage(stage.BootingStage): migrations = migration.preregistered_migrations - # 按照迁移号排序 + # Sort by migration number migrations.sort(key=lambda x: x.number) for migration_cls in migrations: @@ -37,4 +40,4 @@ class MigrationStage(stage.BootingStage): if await migration_instance.need_migrate(): await migration_instance.run() - print(f'已执行迁移 {migration_instance.name}') + print(f'Migration {migration_instance.name} executed') diff --git a/pkg/core/stages/setup_logger.py b/pkg/core/stages/setup_logger.py index 0c630175..1f7c81ac 100644 --- a/pkg/core/stages/setup_logger.py +++ b/pkg/core/stages/setup_logger.py @@ -8,7 +8,7 @@ from ..bootutils import log class PersistenceHandler(logging.Handler, object): """ - 保存日志到数据库 + Save logs to database """ ap: app.Application @@ -19,9 +19,9 @@ class PersistenceHandler(logging.Handler, object): def emit(self, record): """ - emit函数为自定义handler类时必重写的函数,这里可以根据需要对日志消息做一些处理,比如发送日志到服务器 + emit function is a required function for custom handler classes, here you can process the log messages as needed, such as sending logs to the server - 发出记录(Emit a record) + Emit a record """ try: msg = self.format(record) @@ -34,10 +34,10 @@ class PersistenceHandler(logging.Handler, object): @stage.stage_class('SetupLoggerStage') class SetupLoggerStage(stage.BootingStage): - """设置日志器阶段""" + """Setup logger stage""" async def run(self, ap: app.Application): - """启动""" + """Setup logger""" persistence_handler = PersistenceHandler('LoggerHandler', ap) extra_handlers = [] diff --git a/pkg/core/stages/show_notes.py b/pkg/core/stages/show_notes.py index 5fa7ff08..d0f861ba 100644 --- a/pkg/core/stages/show_notes.py +++ b/pkg/core/stages/show_notes.py @@ -12,10 +12,10 @@ importutil.import_modules_in_pkg(notes) @stage.stage_class('ShowNotesStage') class ShowNotesStage(stage.BootingStage): - """显示启动信息阶段""" + """Show notes stage""" async def run(self, ap: app.Application): - # 排序 + # Sort note.preregistered_notes.sort(key=lambda x: x.number) for note_cls in note.preregistered_notes: From c6e77e42be5090faedb57640bcfeccf567385dc5 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Thu, 10 Jul 2025 11:09:33 +0800 Subject: [PATCH 039/257] chore: switch some comments to en --- pkg/core/migration.py | 10 ++--- pkg/core/note.py | 8 ++-- pkg/core/stage.py | 8 ++-- pkg/core/taskmgr.py | 40 +++++++++---------- pkg/entity/persistence/bot.py | 2 +- pkg/entity/persistence/metadata.py | 2 +- pkg/entity/persistence/model.py | 2 +- pkg/entity/persistence/pipeline.py | 4 +- pkg/entity/persistence/plugin.py | 2 +- pkg/persistence/database.py | 4 +- pkg/persistence/databases/sqlite.py | 2 +- pkg/persistence/mgr.py | 6 +-- pkg/persistence/migration.py | 10 ++--- .../migrations/dbm001_migrate_v3_config.py | 28 ++++++------- .../dbm002_combine_quote_msg_config.py | 6 +-- .../migrations/dbm003_n8n_config.py | 6 +-- pkg/pipeline/bansess/bansess.py | 8 ++-- pkg/pipeline/cntfilter/filter.py | 30 +++++++------- pkg/pipeline/cntfilter/filters/banwords.py | 2 +- pkg/pipeline/cntfilter/filters/cntignore.py | 6 +-- pkg/pipeline/longtext/longtext.py | 18 ++++----- pkg/pipeline/longtext/strategies/forward.py | 12 +++--- pkg/pipeline/longtext/strategy.py | 18 ++++----- pkg/pipeline/msgtrun/msgtrun.py | 8 ++-- pkg/pipeline/msgtrun/truncators/round.py | 6 +-- pkg/pipeline/preproc/preproc.py | 14 +++---- pkg/pipeline/process/handler.py | 2 +- pkg/pipeline/process/handlers/chat.py | 16 ++++---- pkg/pipeline/process/handlers/command.py | 6 +-- pkg/pipeline/process/process.py | 4 +- 30 files changed, 146 insertions(+), 144 deletions(-) diff --git a/pkg/core/migration.py b/pkg/core/migration.py index e97c0cf3..a921e6c7 100644 --- a/pkg/core/migration.py +++ b/pkg/core/migration.py @@ -7,11 +7,11 @@ from . import app preregistered_migrations: list[typing.Type[Migration]] = [] -"""当前阶段暂不支持扩展""" +"""Currently not supported for extension""" def migration_class(name: str, number: int): - """注册一个迁移""" + """Register a migration""" def decorator(cls: typing.Type[Migration]) -> typing.Type[Migration]: cls.name = name @@ -23,7 +23,7 @@ def migration_class(name: str, number: int): class Migration(abc.ABC): - """一个版本的迁移""" + """A version migration""" name: str @@ -36,10 +36,10 @@ class Migration(abc.ABC): @abc.abstractmethod async def need_migrate(self) -> bool: - """判断当前环境是否需要运行此迁移""" + """Determine if the current environment needs to run this migration""" pass @abc.abstractmethod async def run(self): - """执行迁移""" + """Run migration""" pass diff --git a/pkg/core/note.py b/pkg/core/note.py index 07171581..b4c37ce1 100644 --- a/pkg/core/note.py +++ b/pkg/core/note.py @@ -9,7 +9,7 @@ preregistered_notes: list[typing.Type[LaunchNote]] = [] def note_class(name: str, number: int): - """注册一个启动信息""" + """Register a launch information""" def decorator(cls: typing.Type[LaunchNote]) -> typing.Type[LaunchNote]: cls.name = name @@ -21,7 +21,7 @@ def note_class(name: str, number: int): class LaunchNote(abc.ABC): - """启动信息""" + """Launch information""" name: str @@ -34,10 +34,10 @@ class LaunchNote(abc.ABC): @abc.abstractmethod async def need_show(self) -> bool: - """判断当前环境是否需要显示此启动信息""" + """Determine if the current environment needs to display this launch information""" pass @abc.abstractmethod async def yield_note(self) -> typing.AsyncGenerator[typing.Tuple[str, int], None]: - """生成启动信息""" + """Generate launch information""" pass diff --git a/pkg/core/stage.py b/pkg/core/stage.py index 220c474d..1483e23a 100644 --- a/pkg/core/stage.py +++ b/pkg/core/stage.py @@ -7,9 +7,9 @@ from . import app preregistered_stages: dict[str, typing.Type[BootingStage]] = {} -"""预注册的请求处理阶段。在初始化时,所有请求处理阶段类会被注册到此字典中。 +"""Pre-registered request processing stages. All request processing stage classes are registered in this dictionary during initialization. -当前阶段暂不支持扩展 +Currently not supported for extension """ @@ -22,11 +22,11 @@ def stage_class(name: str): class BootingStage(abc.ABC): - """启动阶段""" + """Booting stage""" name: str = None @abc.abstractmethod async def run(self, ap: app.Application): - """启动""" + """Run""" pass diff --git a/pkg/core/taskmgr.py b/pkg/core/taskmgr.py index 0f756118..ca6eb029 100644 --- a/pkg/core/taskmgr.py +++ b/pkg/core/taskmgr.py @@ -9,13 +9,13 @@ from . import entities as core_entities class TaskContext: - """任务跟踪上下文""" + """Task tracking context""" current_action: str - """当前正在执行的动作""" + """Current action being executed""" log: str - """记录日志""" + """Log""" def __init__(self): self.current_action = 'default' @@ -58,40 +58,40 @@ placeholder_context: TaskContext | None = None class TaskWrapper: - """任务包装器""" + """Task wrapper""" _id_index: int = 0 - """任务ID索引""" + """Task ID index""" id: int - """任务ID""" + """Task ID""" - task_type: str = 'system' # 任务类型: system 或 user - """任务类型""" + task_type: str = 'system' # Task type: system or user + """Task type""" - kind: str = 'system_task' # 由发起者确定任务种类,通常同质化的任务种类相同 - """任务种类""" + kind: str = 'system_task' # Task type determined by the initiator, usually the same task type + """Task type""" name: str = '' - """任务唯一名称""" + """Task unique name""" label: str = '' - """任务显示名称""" + """Task display name""" task_context: TaskContext - """任务上下文""" + """Task context""" task: asyncio.Task - """任务""" + """Task""" task_stack: list = None - """任务堆栈""" + """Task stack""" ap: app.Application - """应用实例""" + """Application instance""" scopes: list[core_entities.LifecycleControlScope] - """任务所属生命周期控制范围""" + """Task scope""" def __init__( self, @@ -165,13 +165,13 @@ class TaskWrapper: class AsyncTaskManager: - """保存app中的所有异步任务 - 包含系统级的和用户级(插件安装、更新等由用户直接发起的)的""" + """Save all asynchronous tasks in the app + Include system-level and user-level (plugin installation, update, etc. initiated by users directly)""" ap: app.Application tasks: list[TaskWrapper] - """所有任务""" + """All tasks""" def __init__(self, ap: app.Application): self.ap = ap diff --git a/pkg/entity/persistence/bot.py b/pkg/entity/persistence/bot.py index 3c08f4ec..08eda478 100644 --- a/pkg/entity/persistence/bot.py +++ b/pkg/entity/persistence/bot.py @@ -4,7 +4,7 @@ from .base import Base class Bot(Base): - """机器人""" + """Bot""" __tablename__ = 'bots' diff --git a/pkg/entity/persistence/metadata.py b/pkg/entity/persistence/metadata.py index d9e03663..4db732b9 100644 --- a/pkg/entity/persistence/metadata.py +++ b/pkg/entity/persistence/metadata.py @@ -12,7 +12,7 @@ initial_metadata = [ class Metadata(Base): - """数据库元数据""" + """Database metadata""" __tablename__ = 'metadata' diff --git a/pkg/entity/persistence/model.py b/pkg/entity/persistence/model.py index 9eb2ccef..6cf93ec7 100644 --- a/pkg/entity/persistence/model.py +++ b/pkg/entity/persistence/model.py @@ -4,7 +4,7 @@ from .base import Base class LLMModel(Base): - """LLM 模型""" + """LLM model""" __tablename__ = 'llm_models' diff --git a/pkg/entity/persistence/pipeline.py b/pkg/entity/persistence/pipeline.py index 56e2cae9..8f28b242 100644 --- a/pkg/entity/persistence/pipeline.py +++ b/pkg/entity/persistence/pipeline.py @@ -4,7 +4,7 @@ from .base import Base class LegacyPipeline(Base): - """旧版流水线""" + """Legacy pipeline""" __tablename__ = 'legacy_pipelines' @@ -26,7 +26,7 @@ class LegacyPipeline(Base): class PipelineRunRecord(Base): - """流水线运行记录""" + """Pipeline run record""" __tablename__ = 'pipeline_run_records' diff --git a/pkg/entity/persistence/plugin.py b/pkg/entity/persistence/plugin.py index 30db6bd6..e777441f 100644 --- a/pkg/entity/persistence/plugin.py +++ b/pkg/entity/persistence/plugin.py @@ -4,7 +4,7 @@ from .base import Base class PluginSetting(Base): - """插件配置""" + """Plugin setting""" __tablename__ = 'plugin_settings' diff --git a/pkg/persistence/database.py b/pkg/persistence/database.py index 528c6a34..4debb03d 100644 --- a/pkg/persistence/database.py +++ b/pkg/persistence/database.py @@ -11,7 +11,7 @@ preregistered_managers: list[type[BaseDatabaseManager]] = [] def manager_class(name: str) -> None: - """注册一个数据库管理类""" + """Register a database manager class""" def decorator(cls: type[BaseDatabaseManager]) -> type[BaseDatabaseManager]: cls.name = name @@ -22,7 +22,7 @@ def manager_class(name: str) -> None: class BaseDatabaseManager(abc.ABC): - """基础数据库管理类""" + """Base database manager class""" name: str diff --git a/pkg/persistence/databases/sqlite.py b/pkg/persistence/databases/sqlite.py index 7b095e61..c1337459 100644 --- a/pkg/persistence/databases/sqlite.py +++ b/pkg/persistence/databases/sqlite.py @@ -7,7 +7,7 @@ from .. import database @database.manager_class('sqlite') class SQLiteDatabaseManager(database.BaseDatabaseManager): - """SQLite 数据库管理类""" + """SQLite database manager""" async def initialize(self) -> None: sqlite_path = 'data/langbot.db' diff --git a/pkg/persistence/mgr.py b/pkg/persistence/mgr.py index 606aa9fd..9d2bab7b 100644 --- a/pkg/persistence/mgr.py +++ b/pkg/persistence/mgr.py @@ -22,12 +22,12 @@ importutil.import_modules_in_pkg(persistence) class PersistenceManager: - """持久化模块管理器""" + """Persistence module manager""" ap: app.Application db: database.BaseDatabaseManager - """数据库管理器""" + """Database manager""" meta: sqlalchemy.MetaData @@ -79,7 +79,7 @@ class PersistenceManager: 'stages': pipeline_service.default_stage_order, 'is_default': True, 'name': 'ChatPipeline', - 'description': '默认提供的流水线,您配置的机器人、第一个模型将自动绑定到此流水线', + 'description': 'Default pipeline provided, your new bots will be automatically bound to this pipeline | 默认提供的流水线,您配置的机器人将自动绑定到此流水线', 'config': pipeline_config, } diff --git a/pkg/persistence/migration.py b/pkg/persistence/migration.py index c191b686..294e30ca 100644 --- a/pkg/persistence/migration.py +++ b/pkg/persistence/migration.py @@ -10,7 +10,7 @@ preregistered_db_migrations: list[typing.Type[DBMigration]] = [] def migration_class(number: int): - """迁移类装饰器""" + """Migration class decorator""" def wrapper(cls: typing.Type[DBMigration]) -> typing.Type[DBMigration]: cls.number = number @@ -21,20 +21,20 @@ def migration_class(number: int): class DBMigration(abc.ABC): - """数据库迁移""" + """Database migration""" number: int - """迁移号""" + """Migration number""" def __init__(self, ap: app.Application): self.ap = ap @abc.abstractmethod async def upgrade(self): - """升级""" + """Upgrade""" pass @abc.abstractmethod async def downgrade(self): - """降级""" + """Downgrade""" pass diff --git a/pkg/persistence/migrations/dbm001_migrate_v3_config.py b/pkg/persistence/migrations/dbm001_migrate_v3_config.py index a1145527..58f05e04 100644 --- a/pkg/persistence/migrations/dbm001_migrate_v3_config.py +++ b/pkg/persistence/migrations/dbm001_migrate_v3_config.py @@ -15,21 +15,21 @@ from ...entity.persistence import ( @migration.migration_class(1) class DBMigrateV3Config(migration.DBMigration): - """从 v3 的配置迁移到 v4 的数据库""" + """Migrate v3 config to v4 database""" async def upgrade(self): - """升级""" + """Upgrade""" """ - 将 data/config 下的所有配置文件进行迁移。 - 迁移后,之前的配置文件都保存到 data/legacy/config 下。 - 迁移后,data/metadata/ 下的所有配置文件都保存到 data/legacy/metadata 下。 + Migrate all config files under data/config. + After migration, all previous config files are saved under data/legacy/config. + After migration, all config files under data/metadata/ are saved under data/legacy/metadata. """ if self.ap.provider_cfg is None: return - # ======= 迁移模型 ======= - # 只迁移当前选中的模型 + # ======= Migrate model ======= + # Only migrate the currently selected model model_name = self.ap.provider_cfg.data.get('model', 'gpt-4o') model_requester = 'openai-chat-completions' @@ -91,8 +91,8 @@ class DBMigrateV3Config(migration.DBMigration): sqlalchemy.insert(persistence_model.LLMModel).values(**llm_model_data) ) - # ======= 迁移流水线配置 ======= - # 修改到默认流水线 + # ======= Migrate pipeline config ======= + # Modify to default pipeline default_pipeline = [ self.ap.persistence_mgr.serialize_model(persistence_pipeline.LegacyPipeline, pipeline) for pipeline in ( @@ -184,8 +184,8 @@ class DBMigrateV3Config(migration.DBMigration): .where(persistence_pipeline.LegacyPipeline.uuid == default_pipeline['uuid']) ) - # ======= 迁移机器人 ======= - # 只迁移启用的机器人 + # ======= Migrate bot ======= + # Only migrate enabled bots for adapter in self.ap.platform_cfg.data.get('platform-adapters', []): if not adapter.get('enable'): continue @@ -207,7 +207,7 @@ class DBMigrateV3Config(migration.DBMigration): await self.ap.persistence_mgr.execute_async(sqlalchemy.insert(persistence_bot.Bot).values(**bot_data)) - # ======= 迁移系统设置 ======= + # ======= Migrate system settings ======= self.ap.instance_config.data['admins'] = self.ap.system_cfg.data['admin-sessions'] self.ap.instance_config.data['api']['port'] = self.ap.system_cfg.data['http-api']['port'] self.ap.instance_config.data['command'] = { @@ -223,7 +223,7 @@ class DBMigrateV3Config(migration.DBMigration): await self.ap.instance_config.dump_config() # ======= move files ======= - # 迁移 data/config 下的所有配置文件 + # Migrate all config files under data/config all_legacy_dir_name = [ 'config', # 'metadata', @@ -246,4 +246,4 @@ class DBMigrateV3Config(migration.DBMigration): move_legacy_files(dir_name) async def downgrade(self): - """降级""" + """Downgrade""" diff --git a/pkg/persistence/migrations/dbm002_combine_quote_msg_config.py b/pkg/persistence/migrations/dbm002_combine_quote_msg_config.py index cebf403b..349bb0c2 100644 --- a/pkg/persistence/migrations/dbm002_combine_quote_msg_config.py +++ b/pkg/persistence/migrations/dbm002_combine_quote_msg_config.py @@ -7,10 +7,10 @@ from ...entity.persistence import pipeline as persistence_pipeline @migration.migration_class(2) class DBMigrateCombineQuoteMsgConfig(migration.DBMigration): - """引用消息合并配置""" + """Combine quote message config""" async def upgrade(self): - """升级""" + """Upgrade""" # read all pipelines pipelines = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_pipeline.LegacyPipeline)) @@ -37,5 +37,5 @@ class DBMigrateCombineQuoteMsgConfig(migration.DBMigration): ) async def downgrade(self): - """降级""" + """Downgrade""" pass diff --git a/pkg/persistence/migrations/dbm003_n8n_config.py b/pkg/persistence/migrations/dbm003_n8n_config.py index 8705040b..15484f22 100644 --- a/pkg/persistence/migrations/dbm003_n8n_config.py +++ b/pkg/persistence/migrations/dbm003_n8n_config.py @@ -7,10 +7,10 @@ from ...entity.persistence import pipeline as persistence_pipeline @migration.migration_class(3) class DBMigrateN8nConfig(migration.DBMigration): - """N8n配置""" + """N8n config""" async def upgrade(self): - """升级""" + """Upgrade""" # read all pipelines pipelines = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_pipeline.LegacyPipeline)) @@ -45,5 +45,5 @@ class DBMigrateN8nConfig(migration.DBMigration): ) async def downgrade(self): - """降级""" + """Downgrade""" pass diff --git a/pkg/pipeline/bansess/bansess.py b/pkg/pipeline/bansess/bansess.py index 3b927a55..c88a1aa2 100644 --- a/pkg/pipeline/bansess/bansess.py +++ b/pkg/pipeline/bansess/bansess.py @@ -6,9 +6,9 @@ from ...core import entities as core_entities @stage.stage_class('BanSessionCheckStage') class BanSessionCheckStage(stage.PipelineStage): - """访问控制处理阶段 + """Access control processing stage - 仅检查query中群号或个人号是否在访问控制列表中。 + Only check if the group or personal number in the query is in the access control list. """ async def initialize(self, pipeline_config: dict): @@ -41,5 +41,7 @@ class BanSessionCheckStage(stage.PipelineStage): return entities.StageProcessResult( result_type=entities.ResultType.CONTINUE if ctn else entities.ResultType.INTERRUPT, new_query=query, - console_notice=f'根据访问控制忽略消息: {query.launcher_type.value}_{query.launcher_id}' if not ctn else '', + console_notice=f'Ignore message according to access control: {query.launcher_type.value}_{query.launcher_id}' + if not ctn + else '', ) diff --git a/pkg/pipeline/cntfilter/filter.py b/pkg/pipeline/cntfilter/filter.py index 0a3ceaae..36d8a7f4 100644 --- a/pkg/pipeline/cntfilter/filter.py +++ b/pkg/pipeline/cntfilter/filter.py @@ -13,13 +13,13 @@ preregistered_filters: list[typing.Type[ContentFilter]] = [] def filter_class( name: str, ) -> typing.Callable[[typing.Type[ContentFilter]], typing.Type[ContentFilter]]: - """内容过滤器类装饰器 + """Content filter class decorator Args: - name (str): 过滤器名称 + name (str): Filter name Returns: - typing.Callable[[typing.Type[ContentFilter]], typing.Type[ContentFilter]]: 装饰器 + typing.Callable[[typing.Type[ContentFilter]], typing.Type[ContentFilter]]: Decorator """ def decorator(cls: typing.Type[ContentFilter]) -> typing.Type[ContentFilter]: @@ -35,7 +35,7 @@ def filter_class( class ContentFilter(metaclass=abc.ABCMeta): - """内容过滤器抽象类""" + """Content filter abstract class""" name: str @@ -46,31 +46,31 @@ class ContentFilter(metaclass=abc.ABCMeta): @property def enable_stages(self): - """启用的阶段 + """Enabled stages - 默认为消息请求AI前后的两个阶段。 + Default is the two stages before and after the message request to AI. - entity.EnableStage.PRE: 消息请求AI前,此时需要检查的内容是用户的输入消息。 - entity.EnableStage.POST: 消息请求AI后,此时需要检查的内容是AI的回复消息。 + entity.EnableStage.PRE: Before message request to AI, the content to check is the user's input message. + entity.EnableStage.POST: After message request to AI, the content to check is the AI's reply message. """ return [entities.EnableStage.PRE, entities.EnableStage.POST] async def initialize(self): - """初始化过滤器""" + """Initialize filter""" pass @abc.abstractmethod async def process(self, query: core_entities.Query, message: str = None, image_url=None) -> entities.FilterResult: - """处理消息 + """Process message - 分为前后阶段,具体取决于 enable_stages 的值。 - 对于内容过滤器来说,不需要考虑消息所处的阶段,只需要检查消息内容即可。 + It is divided into two stages, depending on the value of enable_stages. + For content filters, you do not need to consider the stage of the message, you only need to check the message content. Args: - message (str): 需要检查的内容 - image_url (str): 要检查的图片的 URL + message (str): Content to check + image_url (str): URL of the image to check Returns: - entities.FilterResult: 过滤结果,具体内容请查看 entities.FilterResult 类的文档 + entities.FilterResult: Filter result, please refer to the documentation of entities.FilterResult class """ raise NotImplementedError diff --git a/pkg/pipeline/cntfilter/filters/banwords.py b/pkg/pipeline/cntfilter/filters/banwords.py index 916a1bc1..b03e79a9 100644 --- a/pkg/pipeline/cntfilter/filters/banwords.py +++ b/pkg/pipeline/cntfilter/filters/banwords.py @@ -8,7 +8,7 @@ from ....core import entities as core_entities @filter_model.filter_class('ban-word-filter') class BanWordFilter(filter_model.ContentFilter): - """根据内容过滤""" + """Filter content""" async def initialize(self): pass diff --git a/pkg/pipeline/cntfilter/filters/cntignore.py b/pkg/pipeline/cntfilter/filters/cntignore.py index 5e410e31..b80d90eb 100644 --- a/pkg/pipeline/cntfilter/filters/cntignore.py +++ b/pkg/pipeline/cntfilter/filters/cntignore.py @@ -8,7 +8,7 @@ from ....core import entities as core_entities @filter_model.filter_class('content-ignore') class ContentIgnore(filter_model.ContentFilter): - """根据内容忽略消息""" + """Ignore message according to content""" @property def enable_stages(self): @@ -24,7 +24,7 @@ class ContentIgnore(filter_model.ContentFilter): level=entities.ResultLevel.BLOCK, replacement='', user_notice='', - console_notice='根据 ignore_rules 中的 prefix 规则,忽略消息', + console_notice='Ignore message according to prefix rule in ignore_rules', ) if 'regexp' in query.pipeline_config['trigger']['ignore-rules']: @@ -34,7 +34,7 @@ class ContentIgnore(filter_model.ContentFilter): level=entities.ResultLevel.BLOCK, replacement='', user_notice='', - console_notice='根据 ignore_rules 中的 regexp 规则,忽略消息', + console_notice='Ignore message according to regexp rule in ignore_rules', ) return entities.FilterResult( diff --git a/pkg/pipeline/longtext/longtext.py b/pkg/pipeline/longtext/longtext.py index 5be20650..03457212 100644 --- a/pkg/pipeline/longtext/longtext.py +++ b/pkg/pipeline/longtext/longtext.py @@ -16,9 +16,9 @@ importutil.import_modules_in_pkg(strategies) @stage.stage_class('LongTextProcessStage') class LongTextProcessStage(stage.PipelineStage): - """长消息处理阶段 + """Long message processing stage - 改写: + Rewrite: - resp_message_chain """ @@ -36,22 +36,22 @@ class LongTextProcessStage(stage.PipelineStage): use_font = 'C:/Windows/Fonts/msyh.ttc' if not os.path.exists(use_font): self.ap.logger.warn( - '未找到字体文件,且无法使用Windows自带字体,更换为转发消息组件以发送长消息,您可以在配置文件中调整相关设置。' + 'Font file not found, and Windows system font cannot be used, switch to forward message component to send long messages, you can adjust the related settings in the configuration file.' ) config['blob_message_strategy'] = 'forward' else: - self.ap.logger.info('使用Windows自带字体:' + use_font) + self.ap.logger.info('Using Windows system font: ' + use_font) config['font-path'] = use_font else: self.ap.logger.warn( - '未找到字体文件,且无法使用系统自带字体,更换为转发消息组件以发送长消息,您可以在配置文件中调整相关设置。' + 'Font file not found, and system font cannot be used, switch to forward message component to send long messages, you can adjust the related settings in the configuration file.' ) pipeline_config['output']['long-text-processing']['strategy'] = 'forward' except Exception: traceback.print_exc() self.ap.logger.error( - '加载字体文件失败({}),更换为转发消息组件以发送长消息,您可以在配置文件中调整相关设置。'.format( + 'Failed to load font file ({}), switch to forward message component to send long messages, you can adjust the related settings in the configuration file.'.format( use_font ) ) @@ -63,12 +63,12 @@ class LongTextProcessStage(stage.PipelineStage): self.strategy_impl = strategy_cls(self.ap) break else: - raise ValueError(f'未找到名为 {config["strategy"]} 的长消息处理策略') + raise ValueError(f'Long message processing strategy not found: {config["strategy"]}') await self.strategy_impl.initialize() async def process(self, query: core_entities.Query, stage_inst_name: str) -> entities.StageProcessResult: - # 检查是否包含非 Plain 组件 + # Check if it contains non-Plain components contains_non_plain = False for msg in query.resp_message_chain[-1]: @@ -77,7 +77,7 @@ class LongTextProcessStage(stage.PipelineStage): break if contains_non_plain: - self.ap.logger.debug('消息中包含非 Plain 组件,跳过长消息处理。') + self.ap.logger.debug('Message contains non-Plain components, skip long message processing.') elif ( len(str(query.resp_message_chain[-1])) > query.pipeline_config['output']['long-text-processing']['threshold'] diff --git a/pkg/pipeline/longtext/strategies/forward.py b/pkg/pipeline/longtext/strategies/forward.py index 6228d580..cb772339 100644 --- a/pkg/pipeline/longtext/strategies/forward.py +++ b/pkg/pipeline/longtext/strategies/forward.py @@ -15,17 +15,17 @@ Forward = platform_message.Forward class ForwardComponentStrategy(strategy_model.LongTextStrategy): async def process(self, message: str, query: core_entities.Query) -> list[platform_message.MessageComponent]: display = ForwardMessageDiaplay( - title='群聊的聊天记录', - brief='[聊天记录]', - source='聊天记录', - preview=['QQ用户: ' + message], - summary='查看1条转发消息', + title='Group chat history', + brief='[Chat history]', + source='Chat history', + preview=['User: ' + message], + summary='View 1 forwarded message', ) node_list = [ platform_message.ForwardMessageNode( sender_id=query.adapter.bot_account_id, - sender_name='QQ用户', + sender_name='User', message_chain=platform_message.MessageChain([message]), ) ] diff --git a/pkg/pipeline/longtext/strategy.py b/pkg/pipeline/longtext/strategy.py index 0ddec0c6..5b521067 100644 --- a/pkg/pipeline/longtext/strategy.py +++ b/pkg/pipeline/longtext/strategy.py @@ -14,13 +14,13 @@ preregistered_strategies: list[typing.Type[LongTextStrategy]] = [] def strategy_class( name: str, ) -> typing.Callable[[typing.Type[LongTextStrategy]], typing.Type[LongTextStrategy]]: - """长文本处理策略类装饰器 + """Long text processing strategy class decorator Args: - name (str): 策略名称 + name (str): Strategy name Returns: - typing.Callable[[typing.Type[LongTextStrategy]], typing.Type[LongTextStrategy]]: 装饰器 + typing.Callable[[typing.Type[LongTextStrategy]], typing.Type[LongTextStrategy]]: Decorator """ def decorator(cls: typing.Type[LongTextStrategy]) -> typing.Type[LongTextStrategy]: @@ -36,7 +36,7 @@ def strategy_class( class LongTextStrategy(metaclass=abc.ABCMeta): - """长文本处理策略抽象类""" + """Long text processing strategy abstract class""" name: str @@ -50,15 +50,15 @@ class LongTextStrategy(metaclass=abc.ABCMeta): @abc.abstractmethod async def process(self, message: str, query: core_entities.Query) -> list[platform_message.MessageComponent]: - """处理长文本 + """Process long text - 在 platform.json 中配置 long-text-process 字段,只要 文本长度超过了 threshold 就会调用此方法 + If the text length exceeds the threshold, this method will be called. Args: - message (str): 消息 - query (core_entities.Query): 此次请求的上下文对象 + message (str): Message + query (core_entities.Query): Query object Returns: - list[platform_message.MessageComponent]: 转换后的 平台 消息组件列表 + list[platform_message.MessageComponent]: Converted platform message components """ return [] diff --git a/pkg/pipeline/msgtrun/msgtrun.py b/pkg/pipeline/msgtrun/msgtrun.py index c64f67fc..1c5ee17d 100644 --- a/pkg/pipeline/msgtrun/msgtrun.py +++ b/pkg/pipeline/msgtrun/msgtrun.py @@ -12,9 +12,9 @@ importutil.import_modules_in_pkg(truncators) @stage.stage_class('ConversationMessageTruncator') class ConversationMessageTruncator(stage.PipelineStage): - """会话消息截断器 + """Conversation message truncator - 用于截断会话消息链,以适应平台消息长度限制。 + Used to truncate the conversation message chain to adapt to the LLM message length limit. """ trun: truncator.Truncator @@ -27,10 +27,10 @@ class ConversationMessageTruncator(stage.PipelineStage): self.trun = trun(self.ap) break else: - raise ValueError(f'未知的截断器: {use_method}') + raise ValueError(f'Unknown truncator: {use_method}') async def process(self, query: core_entities.Query, stage_inst_name: str) -> entities.StageProcessResult: - """处理""" + """Process""" query = await self.trun.truncate(query) return entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query) diff --git a/pkg/pipeline/msgtrun/truncators/round.py b/pkg/pipeline/msgtrun/truncators/round.py index fa72a0e1..2acb1d8c 100644 --- a/pkg/pipeline/msgtrun/truncators/round.py +++ b/pkg/pipeline/msgtrun/truncators/round.py @@ -6,17 +6,17 @@ from ....core import entities as core_entities @truncator.truncator_class('round') class RoundTruncator(truncator.Truncator): - """前文回合数阶段器""" + """Truncate the conversation message chain to adapt to the LLM message length limit.""" async def truncate(self, query: core_entities.Query) -> core_entities.Query: - """截断""" + """Truncate""" max_round = query.pipeline_config['ai']['local-agent']['max-round'] temp_messages = [] current_round = 0 - # 从后往前遍历 + # Traverse from back to front for msg in query.messages[::-1]: if current_round < max_round: temp_messages.append(msg) diff --git a/pkg/pipeline/preproc/preproc.py b/pkg/pipeline/preproc/preproc.py index 19478200..bfa0924d 100644 --- a/pkg/pipeline/preproc/preproc.py +++ b/pkg/pipeline/preproc/preproc.py @@ -11,11 +11,11 @@ from ...platform.types import message as platform_message @stage.stage_class('PreProcessor') class PreProcessor(stage.PipelineStage): - """请求预处理阶段 + """Request pre-processing stage - 签出会话、prompt、上文、模型、内容函数。 + Check out session, prompt, context, model, and content functions. - 改写: + Rewrite: - session - prompt - messages @@ -29,12 +29,12 @@ class PreProcessor(stage.PipelineStage): query: core_entities.Query, stage_inst_name: str, ) -> entities.StageProcessResult: - """处理""" + """Process""" selected_runner = query.pipeline_config['ai']['runner']['runner'] session = await self.ap.sess_mgr.get_session(query) - # 非 local-agent 时,llm_model 为 None + # When not local-agent, llm_model is None llm_model = ( await self.ap.model_mgr.get_model_by_uuid(query.pipeline_config['ai']['local-agent']['model']) if selected_runner == 'local-agent' @@ -51,7 +51,7 @@ class PreProcessor(stage.PipelineStage): conversation.use_llm_model = llm_model - # 设置query + # Set query query.session = session query.prompt = conversation.prompt.copy() query.messages = conversation.messages.copy() @@ -109,7 +109,7 @@ class PreProcessor(stage.PipelineStage): query.variables['user_message_text'] = plain_text query.user_message = llm_entities.Message(role='user', content=content_list) - # =========== 触发事件 PromptPreProcessing + # =========== Trigger event PromptPreProcessing event_ctx = await self.ap.plugin_mgr.emit_event( event=events.PromptPreProcessing( diff --git a/pkg/pipeline/process/handler.py b/pkg/pipeline/process/handler.py index 8a32bcfb..837b72e2 100644 --- a/pkg/pipeline/process/handler.py +++ b/pkg/pipeline/process/handler.py @@ -25,7 +25,7 @@ class MessageHandler(metaclass=abc.ABCMeta): def cut_str(self, s: str) -> str: """ - 取字符串第一行,最多20个字符,若有多行,或超过20个字符,则加省略号 + Take the first line of the string, up to 20 characters, if there are multiple lines, or more than 20 characters, add an ellipsis """ s0 = s.split('\n')[0] if len(s0) > 20 or '\n' in s: diff --git a/pkg/pipeline/process/handlers/chat.py b/pkg/pipeline/process/handlers/chat.py index 35fa1611..2aa08e17 100644 --- a/pkg/pipeline/process/handlers/chat.py +++ b/pkg/pipeline/process/handlers/chat.py @@ -22,11 +22,11 @@ class ChatMessageHandler(handler.MessageHandler): self, query: core_entities.Query, ) -> typing.AsyncGenerator[entities.StageProcessResult, None]: - """处理""" - # 调API - # 生成器 + """Process""" + # Call API + # generator - # 触发插件事件 + # Trigger plugin event event_class = ( events.PersonNormalMessageReceived if query.launcher_type == core_entities.LauncherTypes.PERSON @@ -54,7 +54,7 @@ class ChatMessageHandler(handler.MessageHandler): yield entities.StageProcessResult(result_type=entities.ResultType.INTERRUPT, new_query=query) else: if event_ctx.event.alter is not None: - # if isinstance(event_ctx.event, str): # 现在暂时不考虑多模态alter + # if isinstance(event_ctx.event, str): # Currently not considering multi-modal alter query.user_message.content = event_ctx.event.alter text_length = 0 @@ -65,12 +65,12 @@ class ChatMessageHandler(handler.MessageHandler): runner = r(self.ap, query.pipeline_config) break else: - raise ValueError(f'未找到请求运行器: {query.pipeline_config["ai"]["runner"]["runner"]}') + raise ValueError(f'Request runner not found: {query.pipeline_config["ai"]["runner"]["runner"]}') async for result in runner.run(query): query.resp_messages.append(result) - self.ap.logger.info(f'对话({query.query_id})响应: {self.cut_str(result.readable_str())}') + self.ap.logger.info(f'Response({query.query_id}): {self.cut_str(result.readable_str())}') if result.content is not None: text_length += len(result.content) @@ -80,7 +80,7 @@ class ChatMessageHandler(handler.MessageHandler): query.session.using_conversation.messages.append(query.user_message) query.session.using_conversation.messages.extend(query.resp_messages) except Exception as e: - self.ap.logger.error(f'对话({query.query_id})请求失败: {type(e).__name__} {str(e)}') + self.ap.logger.error(f'Request failed({query.query_id}): {type(e).__name__} {str(e)}') hide_exception_info = query.pipeline_config['output']['misc']['hide-exception'] diff --git a/pkg/pipeline/process/handlers/command.py b/pkg/pipeline/process/handlers/command.py index cc0e9314..7348d6b8 100644 --- a/pkg/pipeline/process/handlers/command.py +++ b/pkg/pipeline/process/handlers/command.py @@ -15,7 +15,7 @@ class CommandHandler(handler.MessageHandler): self, query: core_entities.Query, ) -> typing.AsyncGenerator[entities.StageProcessResult, None]: - """处理""" + """Process""" command_text = str(query.message_chain).strip()[1:] @@ -70,7 +70,7 @@ class CommandHandler(handler.MessageHandler): ) ) - self.ap.logger.info(f'命令({query.query_id})报错: {self.cut_str(str(ret.error))}') + self.ap.logger.info(f'Command({query.query_id}) error: {self.cut_str(str(ret.error))}') yield entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query) elif ret.text is not None or ret.image_url is not None: @@ -89,7 +89,7 @@ class CommandHandler(handler.MessageHandler): ) ) - self.ap.logger.info(f'命令返回: {self.cut_str(str(content[0]))}') + self.ap.logger.info(f'Command returned: {self.cut_str(str(content[0]))}') yield entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query) else: diff --git a/pkg/pipeline/process/process.py b/pkg/pipeline/process/process.py index 64903552..db66135c 100644 --- a/pkg/pipeline/process/process.py +++ b/pkg/pipeline/process/process.py @@ -33,11 +33,11 @@ class Processor(stage.PipelineStage): query: core_entities.Query, stage_inst_name: str, ) -> entities.StageProcessResult: - """处理""" + """Process""" message_text = str(query.message_chain).strip() self.ap.logger.info( - f'处理 {query.launcher_type.value}_{query.launcher_id} 的请求({query.query_id}): {message_text}' + f'Processing request from {query.launcher_type.value}_{query.launcher_id} ({query.query_id}): {message_text}' ) async def generator(): From 75c3ddde19b54418af5c1687bfe71810c3773c6e Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Thu, 10 Jul 2025 16:45:59 +0800 Subject: [PATCH 040/257] perf: definitions --- .../controller/groups/knowledge/__init__.py | 0 .../{knowledge_base.py => knowledge/base.py} | 18 +- pkg/api/http/controller/main.py | 2 + pkg/core/app.py | 7 +- pkg/core/stages/build_app.py | 4 +- pkg/rag/knowledge/{RAG_Manager.py => mgr.py} | 160 +++++++++++------- 6 files changed, 112 insertions(+), 79 deletions(-) create mode 100644 pkg/api/http/controller/groups/knowledge/__init__.py rename pkg/api/http/controller/groups/{knowledge_base.py => knowledge/base.py} (90%) rename pkg/rag/knowledge/{RAG_Manager.py => mgr.py} (67%) diff --git a/pkg/api/http/controller/groups/knowledge/__init__.py b/pkg/api/http/controller/groups/knowledge/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pkg/api/http/controller/groups/knowledge_base.py b/pkg/api/http/controller/groups/knowledge/base.py similarity index 90% rename from pkg/api/http/controller/groups/knowledge_base.py rename to pkg/api/http/controller/groups/knowledge/base.py index ce391042..cf5bb44e 100644 --- a/pkg/api/http/controller/groups/knowledge_base.py +++ b/pkg/api/http/controller/groups/knowledge/base.py @@ -1,13 +1,9 @@ import quart -from .. import group -import os # 导入 os 用于文件操作 +from ... import group + @group.group_class('knowledge_base', '/api/v1/knowledge/bases') class KnowledgeBaseRouterGroup(group.RouterGroup): - # 定义成功方法 - def success(self, code=0, data=None, msg: str = 'ok') -> quart.Response: - return quart.jsonify({'code': code, 'data': data or {}, 'msg': msg}) - async def initialize(self) -> None: @self.route('', methods=['POST', 'GET'], endpoint='handle_knowledge_bases') async def handle_knowledge_bases() -> str: @@ -51,7 +47,6 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): await self.ap.knowledge_base_service.delete_kb_by_id(int(knowledge_base_uuid)) return self.success(code=0, msg='ok') - @self.route('//files', methods=['GET'], endpoint='get_knowledge_base_files') async def get_knowledge_base_files(knowledge_base_uuid: str) -> str: files = await self.ap.knowledge_base_service.get_files_by_knowledge_base(int(knowledge_base_uuid)) @@ -68,14 +63,13 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): msg='ok', ) - @self.route('//files/', methods=['DELETE'], endpoint='delete_specific_file_in_kb') async def delete_specific_file_in_kb(file_id: str) -> str: await self.ap.knowledge_base_service.delete_data_by_file_id(int(file_id)) return self.success(code=0, msg='ok') - + @self.route('//files', methods=['POST'], endpoint='relate_file_with_kb') - async def relate_file_id_with_kb(knowledge_base_uuid:str,file_id: str) -> str: + async def relate_file_id_with_kb(knowledge_base_uuid: str, file_id: str) -> str: if 'file' not in quart.request.files: return self.http_status(400, -1, 'No file part in the request') @@ -83,7 +77,7 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): file_id = json_data.get('file_id') if not file_id: return self.http_status(400, -1, 'File ID is required') - + # 调用服务层方法将文件与知识库关联 await self.ap.knowledge_base_service.relate_file_id_with_kb(int(knowledge_base_uuid), int(file_id)) - return self.success(code=0, data={}, msg='ok') \ No newline at end of file + return self.success(code=0, data={}, msg='ok') diff --git a/pkg/api/http/controller/main.py b/pkg/api/http/controller/main.py index eb434d88..4eec4e1d 100644 --- a/pkg/api/http/controller/main.py +++ b/pkg/api/http/controller/main.py @@ -14,11 +14,13 @@ from . import group from .groups import provider as groups_provider from .groups import platform as groups_platform from .groups import pipelines as groups_pipelines +from .groups import knowledge as groups_knowledge importutil.import_modules_in_pkg(groups) importutil.import_modules_in_pkg(groups_provider) importutil.import_modules_in_pkg(groups_platform) importutil.import_modules_in_pkg(groups_pipelines) +importutil.import_modules_in_pkg(groups_knowledge) class HTTPController: diff --git a/pkg/core/app.py b/pkg/core/app.py index 2e3c9500..11d25826 100644 --- a/pkg/core/app.py +++ b/pkg/core/app.py @@ -27,7 +27,7 @@ from ..storage import mgr as storagemgr from ..utils import logcache from . import taskmgr from . import entities as core_entities -from pkg.rag.knowledge.RAG_Manager import RAG_Manager +from ..rag.knowledge import mgr as rag_mgr class Application: @@ -48,7 +48,6 @@ class Application: model_mgr: llm_model_mgr.ModelManager = None - # TODO 移动到 pipeline 里 tool_mgr: llm_tool_mgr.ToolManager = None @@ -101,7 +100,6 @@ class Application: storage_mgr: storagemgr.StorageMgr = None - # ========= HTTP Services ========= user_service: user_service.UserService = None @@ -114,8 +112,7 @@ class Application: bot_service: bot_service.BotService = None - knowledge_base_service: RAG_Manager = None - + knowledge_base_service: rag_mgr.RAGManager = None def __init__(self): pass diff --git a/pkg/core/stages/build_app.py b/pkg/core/stages/build_app.py index bb86a6d3..ac76c331 100644 --- a/pkg/core/stages/build_app.py +++ b/pkg/core/stages/build_app.py @@ -9,7 +9,7 @@ from ...command import cmdmgr from ...provider.session import sessionmgr as llm_session_mgr from ...provider.modelmgr import modelmgr as llm_model_mgr from ...provider.tools import toolmgr as llm_tool_mgr -from ...rag.knowledge.RAG_Manager import RAG_Manager as knowledge_base_mgr +from ...rag.knowledge import mgr as rag_mgr from ...platform import botmgr as im_mgr from ...persistence import mgr as persistencemgr from ...api.http.controller import main as http_controller @@ -102,7 +102,7 @@ class BuildAppStage(stage.BootingStage): embedding_models_service_inst = model_service.EmbeddingModelsService(ap) ap.embedding_models_service = embedding_models_service_inst - knowledge_base_service_inst = knowledge_base_mgr(ap) + knowledge_base_service_inst = rag_mgr.RAGManager(ap) await knowledge_base_service_inst.initialize_rag_system() ap.knowledge_base_service = knowledge_base_service_inst diff --git a/pkg/rag/knowledge/RAG_Manager.py b/pkg/rag/knowledge/mgr.py similarity index 67% rename from pkg/rag/knowledge/RAG_Manager.py rename to pkg/rag/knowledge/mgr.py index 9675371b..7d1787e0 100644 --- a/pkg/rag/knowledge/RAG_Manager.py +++ b/pkg/rag/knowledge/mgr.py @@ -3,7 +3,6 @@ from __future__ import annotations import logging import os import asyncio -import json import uuid from pkg.rag.knowledge.services.parser import FileParser from pkg.rag.knowledge.services.chunker import Chunker @@ -14,7 +13,8 @@ from pkg.rag.knowledge.services.embedding_models import EmbeddingModelFactory from pkg.rag.knowledge.services.chroma_manager import ChromaIndexManager from pkg.core import app -class RAG_Manager: + +class RAGManager: ap: app.Application def __init__(self, ap: app.Application, logger: logging.Logger = None): @@ -42,32 +42,54 @@ class RAG_Manager: try: model = EmbeddingModelFactory.create_model( - model_type=self.embedding_model_type, - model_name_key=self.embedding_model_name + model_type=self.embedding_model_type, model_name_key=self.embedding_model_name + ) + self.logger.info( + f"Configured embedding model '{self.embedding_model_name}' has dimension: {model.embedding_dimension}" ) - self.logger.info(f"Configured embedding model '{self.embedding_model_name}' has dimension: {model.embedding_dimension}") except Exception as e: - self.logger.critical(f"Failed to get dimension for configured embedding model '{self.embedding_model_name}': {e}") - raise RuntimeError("Failed to initialize RAG_Manager due to embedding model issues.") + self.logger.critical( + f"Failed to get dimension for configured embedding model '{self.embedding_model_name}': {e}" + ) + raise RuntimeError('Failed to initialize RAG_Manager due to embedding model issues.') - self.chroma_manager = ChromaIndexManager(collection_name=f"rag_collection_{self.embedding_model_name.replace('-', '_')}") - self.embedder = Embedder(model_type=self.embedding_model_type, model_name_key=self.embedding_model_name, chroma_manager=self.chroma_manager) - self.retriever = Retriever(model_type=self.embedding_model_type, model_name_key=self.embedding_model_name, chroma_manager=self.chroma_manager) + self.chroma_manager = ChromaIndexManager( + collection_name=f'rag_collection_{self.embedding_model_name.replace("-", "_")}' + ) + self.embedder = Embedder( + model_type=self.embedding_model_type, + model_name_key=self.embedding_model_name, + chroma_manager=self.chroma_manager, + ) + self.retriever = Retriever( + model_type=self.embedding_model_type, + model_name_key=self.embedding_model_name, + chroma_manager=self.chroma_manager, + ) - async def create_knowledge_base(self, kb_name: str, kb_description: str, embedding_model: str = "", top_k: int = 5): + async def create_knowledge_base(self, kb_name: str, kb_description: str, embedding_model: str = '', top_k: int = 5): """ Creates a new knowledge base if it doesn't already exist. """ try: if not self.embedding_model_type or not kb_name: - raise ValueError("Embedding model type and knowledge base name must be set before creating a knowledge base.") + raise ValueError( + 'Embedding model type and knowledge base name must be set before creating a knowledge base.' + ) + def _create_kb_sync(): session = SessionLocal() try: kb = session.query(KnowledgeBase).filter_by(name=kb_name).first() if not kb: id = uuid.uuid4().int - new_kb = KnowledgeBase(name=kb_name, description=kb_description, embedding_model=embedding_model, top_k=top_k,id=id) + new_kb = KnowledgeBase( + name=kb_name, + description=kb_description, + embedding_model=embedding_model, + top_k=top_k, + id=id, + ) session.add(new_kb) session.commit() session.refresh(new_kb) @@ -80,7 +102,7 @@ class RAG_Manager: self.logger.error(f"Error in _create_kb_sync for '{kb_name}': {str(e)}", exc_info=True) raise finally: - session.close() + session.close() return await asyncio.to_thread(_create_kb_sync) except Exception as e: @@ -92,15 +114,17 @@ class RAG_Manager: Retrieves all knowledge bases from the database. """ try: + def _get_all_kbs_sync(): session = SessionLocal() try: return session.query(KnowledgeBase).all() finally: session.close() + return await asyncio.to_thread(_get_all_kbs_sync) except Exception as e: - self.logger.error(f"Error retrieving knowledge bases: {str(e)}", exc_info=True) + self.logger.error(f'Error retrieving knowledge bases: {str(e)}', exc_info=True) return [] async def get_knowledge_base_by_id(self, kb_id: int): @@ -108,15 +132,17 @@ class RAG_Manager: Retrieves a specific knowledge base by its ID. """ try: + def _get_kb_sync(kb_id_param): session = SessionLocal() try: return session.query(KnowledgeBase).filter_by(id=kb_id_param).first() finally: session.close() + return await asyncio.to_thread(_get_kb_sync, kb_id) except Exception as e: - self.logger.error(f"Error retrieving knowledge base with ID {kb_id}: {str(e)}", exc_info=True) + self.logger.error(f'Error retrieving knowledge base with ID {kb_id}: {str(e)}', exc_info=True) return None async def get_files_by_knowledge_base(self, kb_id: int): @@ -124,15 +150,17 @@ class RAG_Manager: Retrieves files associated with a specific knowledge base by querying the File table directly. """ try: + def _get_files_sync(kb_id_param): session = SessionLocal() try: return session.query(File).filter_by(kb_id=kb_id_param).all() finally: session.close() + return await asyncio.to_thread(_get_files_sync, kb_id) except Exception as e: - self.logger.error(f"Error retrieving files for knowledge base ID {kb_id}: {str(e)}", exc_info=True) + self.logger.error(f'Error retrieving files for knowledge base ID {kb_id}: {str(e)}', exc_info=True) return [] async def get_all_files(self): @@ -141,23 +169,27 @@ class RAG_Manager: with any specific knowledge base. """ try: + def _get_all_files_sync(): session = SessionLocal() try: return session.query(File).all() finally: session.close() + return await asyncio.to_thread(_get_all_files_sync) except Exception as e: - self.logger.error(f"Error retrieving all files: {str(e)}", exc_info=True) + self.logger.error(f'Error retrieving all files: {str(e)}', exc_info=True) return [] - async def store_data(self, file_path: str, kb_name: str, file_type: str, kb_description: str = "Default knowledge base"): + async def store_data( + self, file_path: str, kb_name: str, file_type: str, kb_description: str = 'Default knowledge base' + ): """ Parses, chunks, embeds, and stores data from a given file into the RAG system. Associates the file with a knowledge base using kb_id in the File table. """ - self.logger.info(f"Starting data storage process for file: {file_path}") + self.logger.info(f'Starting data storage process for file: {file_path}') session = SessionLocal() file_obj = None @@ -177,37 +209,43 @@ class RAG_Manager: file_name = os.path.basename(file_path) existing_file = session.query(File).filter_by(kb_id=kb.id, file_name=file_name).first() if existing_file: - self.logger.warning(f"File '{file_name}' already exists in knowledge base '{kb_name}'. Skipping storage.") + self.logger.warning( + f"File '{file_name}' already exists in knowledge base '{kb_name}'. Skipping storage." + ) return file_obj = File(kb_id=kb.id, file_name=file_name, path=file_path, file_type=file_type) session.add(file_obj) session.commit() session.refresh(file_obj) - self.logger.info(f"File record '{file_name}' added to database with ID: {file_obj.id}, associated with KB ID: {kb.id}") + self.logger.info( + f"File record '{file_name}' added to database with ID: {file_obj.id}, associated with KB ID: {kb.id}" + ) # 3. 解析文件内容 text = await self.parser.parse(file_path) if not text: - self.logger.warning(f"No text extracted from file {file_path}. Deleting file record ID: {file_obj.id}.") + self.logger.warning(f'No text extracted from file {file_path}. Deleting file record ID: {file_obj.id}.') session.delete(file_obj) - session.commit() # 提交删除操作 + session.commit() # 提交删除操作 return # 4. 分块并嵌入/存储块 chunks_texts = await self.chunker.chunk(text) self.logger.info(f"Chunked file '{file_name}' into {len(chunks_texts)} chunks.") await self.embedder.embed_and_store(file_id=file_obj.id, chunks=chunks_texts) - self.logger.info(f"Data storage process completed for file: {file_path}") + self.logger.info(f'Data storage process completed for file: {file_path}') except Exception as e: session.rollback() - self.logger.error(f"Error in store_data for file {file_path}: {str(e)}", exc_info=True) + self.logger.error(f'Error in store_data for file {file_path}: {str(e)}', exc_info=True) if file_obj and file_obj.id: try: await asyncio.to_thread(self.chroma_manager.delete_by_file_id_sync, file_obj.id) except Exception as chroma_e: - self.logger.warning(f"Could not clean up ChromaDB entries for file_id {file_obj.id} after store_data failure: {chroma_e}") + self.logger.warning( + f'Could not clean up ChromaDB entries for file_id {file_obj.id} after store_data failure: {chroma_e}' + ) raise finally: session.close() @@ -219,7 +257,7 @@ class RAG_Manager: self.logger.info(f"Starting data retrieval process for query: '{query}'") try: retrieved_chunks = await self.retriever.retrieve(query) - self.logger.info(f"Successfully retrieved {len(retrieved_chunks)} chunks for query.") + self.logger.info(f'Successfully retrieved {len(retrieved_chunks)} chunks for query.') return retrieved_chunks except Exception as e: self.logger.error(f"Error in retrieve_data for query '{query}': {str(e)}", exc_info=True) @@ -230,32 +268,32 @@ class RAG_Manager: Deletes all data associated with a specific file ID, including its chunks and vectors, and the file record itself. """ - self.logger.info(f"Starting data deletion process for file_id: {file_id}") + self.logger.info(f'Starting data deletion process for file_id: {file_id}') session = SessionLocal() try: # 1. 从 ChromaDB 删除 embeddings await asyncio.to_thread(self.chroma_manager.delete_by_file_id_sync, file_id) - self.logger.info(f"Deleted embeddings from ChromaDB for file_id: {file_id}") + self.logger.info(f'Deleted embeddings from ChromaDB for file_id: {file_id}') # 2. 删除与文件关联的 chunks 记录 chunks_to_delete = session.query(Chunk).filter_by(file_id=file_id).all() for chunk in chunks_to_delete: session.delete(chunk) - self.logger.info(f"Deleted {len(chunks_to_delete)} chunk records for file_id: {file_id}") + self.logger.info(f'Deleted {len(chunks_to_delete)} chunk records for file_id: {file_id}') # 3. 删除文件记录本身 file_to_delete = session.query(File).filter_by(id=file_id).first() if file_to_delete: session.delete(file_to_delete) - self.logger.info(f"Deleted file record for file_id: {file_id}") + self.logger.info(f'Deleted file record for file_id: {file_id}') else: - self.logger.warning(f"File with ID {file_id} not found in database. Skipping deletion of file record.") + self.logger.warning(f'File with ID {file_id} not found in database. Skipping deletion of file record.') session.commit() - self.logger.info(f"Successfully completed data deletion for file_id: {file_id}") + self.logger.info(f'Successfully completed data deletion for file_id: {file_id}') except Exception as e: session.rollback() - self.logger.error(f"Error deleting data for file_id {file_id}: {str(e)}", exc_info=True) + self.logger.error(f'Error deleting data for file_id {file_id}: {str(e)}', exc_info=True) raise finally: session.close() @@ -265,27 +303,27 @@ class RAG_Manager: Deletes a knowledge base and all associated files, chunks, and vectors. This involves querying for associated files and then deleting them. """ - self.logger.info(f"Starting deletion of knowledge base with ID: {kb_id}") - session = SessionLocal() # 使用新的会话来获取 KB 和关联文件 + self.logger.info(f'Starting deletion of knowledge base with ID: {kb_id}') + session = SessionLocal() # 使用新的会话来获取 KB 和关联文件 try: kb_to_delete = session.query(KnowledgeBase).filter_by(id=kb_id).first() if not kb_to_delete: - self.logger.warning(f"Knowledge Base with ID {kb_id} not found.") + self.logger.warning(f'Knowledge Base with ID {kb_id} not found.') return # 获取所有关联的文件,通过 File 表的 kb_id 字段查询 files_to_delete = session.query(File).filter_by(kb_id=kb_id).all() - + # 关闭当前会话,因为 delete_data_by_file_id 会创建自己的会话 - session.close() + session.close() # 遍历删除每个关联文件及其数据 for file_obj in files_to_delete: try: await self.delete_data_by_file_id(file_obj.id) except Exception as file_del_e: - self.logger.error(f"Failed to delete file ID {file_obj.id} during KB deletion: {file_del_e}") + self.logger.error(f'Failed to delete file ID {file_obj.id} during KB deletion: {file_del_e}') # 记录错误但继续,尝试删除其他文件 # 所有文件删除完毕后,重新打开会话来删除 KnowledgeBase 本身 @@ -296,12 +334,14 @@ class RAG_Manager: if kb_final_delete: session.delete(kb_final_delete) session.commit() - self.logger.info(f"Successfully deleted knowledge base with ID: {kb_id}") + self.logger.info(f'Successfully deleted knowledge base with ID: {kb_id}') else: - self.logger.warning(f"Knowledge Base with ID {kb_id} not found after file deletion, skipping KB deletion.") + self.logger.warning( + f'Knowledge Base with ID {kb_id} not found after file deletion, skipping KB deletion.' + ) except Exception as kb_del_e: session.rollback() - self.logger.error(f"Error deleting KnowledgeBase record for ID {kb_id}: {kb_del_e}", exc_info=True) + self.logger.error(f'Error deleting KnowledgeBase record for ID {kb_id}: {kb_del_e}', exc_info=True) raise finally: session.close() @@ -310,57 +350,57 @@ class RAG_Manager: # 如果在最初获取 KB 或文件列表时出错 if session.is_active: session.rollback() - self.logger.error(f"Error during overall knowledge base deletion for ID {kb_id}: {str(e)}", exc_info=True) + self.logger.error(f'Error during overall knowledge base deletion for ID {kb_id}: {str(e)}', exc_info=True) raise finally: if session.is_active: session.close() - - async def get_file_content_by_file_id(self, file_id: str) -> str: - file_bytes = await self.ap.storage_mgr.storage_provider.load(file_id) _, ext = os.path.splitext(file_id.lower()) ext = ext.lstrip('.') try: - text = file_bytes.decode("utf-8") + text = file_bytes.decode('utf-8') except UnicodeDecodeError: - return "[非文本文件或编码无法识别]" + return '[非文本文件或编码无法识别]' - if ext in ["txt", "md", "csv", "log", "py", "html"]: + if ext in ['txt', 'md', 'csv', 'log', 'py', 'html']: return text else: - return f"[未知类型: .{ext}]" - + return f'[未知类型: .{ext}]' + async def relate_file_id_with_kb(self, knowledge_base_uuid: str, file_id: str) -> None: """ Associates a file with a knowledge base by updating the kb_id in the File table. """ - self.logger.info(f"Associating file ID {file_id} with knowledge base UUID {knowledge_base_uuid}") + self.logger.info(f'Associating file ID {file_id} with knowledge base UUID {knowledge_base_uuid}') session = SessionLocal() try: # 查询知识库是否存在 kb = session.query(KnowledgeBase).filter_by(id=knowledge_base_uuid).first() if not kb: - self.logger.error(f"Knowledge Base with UUID {knowledge_base_uuid} not found.") + self.logger.error(f'Knowledge Base with UUID {knowledge_base_uuid} not found.') return # 更新文件的 kb_id file_to_update = session.query(File).filter_by(id=file_id).first() if not file_to_update: - self.logger.error(f"File with ID {file_id} not found.") + self.logger.error(f'File with ID {file_id} not found.') return file_to_update.kb_id = kb.id session.commit() - self.logger.info(f"Successfully associated file ID {file_id} with knowledge base UUID {knowledge_base_uuid}") + self.logger.info( + f'Successfully associated file ID {file_id} with knowledge base UUID {knowledge_base_uuid}' + ) except Exception as e: session.rollback() - self.logger.error(f"Error associating file ID {file_id} with knowledge base UUID {knowledge_base_uuid}: {str(e)}", exc_info=True) + self.logger.error( + f'Error associating file ID {file_id} with knowledge base UUID {knowledge_base_uuid}: {str(e)}', + exc_info=True, + ) finally: session.close() - - From 367d04d0f073f1f2b052976e3719944fd528178c Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Fri, 11 Jul 2025 11:28:43 +0800 Subject: [PATCH 041/257] fix: success method bad params --- .../http/controller/groups/knowledge/base.py | 56 ++++++++++++------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/pkg/api/http/controller/groups/knowledge/base.py b/pkg/api/http/controller/groups/knowledge/base.py index cf5bb44e..bfbbbe10 100644 --- a/pkg/api/http/controller/groups/knowledge/base.py +++ b/pkg/api/http/controller/groups/knowledge/base.py @@ -17,16 +17,20 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): } for kb in knowledge_bases ] - return self.success(code=0, data={'bases': bases_list}, msg='ok') + return self.success(data={'bases': bases_list}) # POST: create a new knowledge base json_data = await quart.request.json knowledge_base_uuid = await self.ap.knowledge_base_service.create_knowledge_base( json_data.get('name'), json_data.get('description') ) - return self.success(code=0, data={'uuid': knowledge_base_uuid}, msg='ok') + return self.success(data={'uuid': knowledge_base_uuid}) - @self.route('/', methods=['GET', 'DELETE'], endpoint='handle_specific_knowledge_base') + @self.route( + '/', + methods=['GET', 'DELETE'], + endpoint='handle_specific_knowledge_base', + ) async def handle_specific_knowledge_base(knowledge_base_uuid: str) -> str: if quart.request.method == 'GET': knowledge_base = await self.ap.knowledge_base_service.get_knowledge_base_by_id(int(knowledge_base_uuid)) @@ -35,40 +39,50 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): return self.http_status(404, -1, 'knowledge base not found') return self.success( - code=0, data={ 'name': knowledge_base.name, 'description': knowledge_base.description, 'uuid': knowledge_base.id, }, - msg='ok', ) elif quart.request.method == 'DELETE': await self.ap.knowledge_base_service.delete_kb_by_id(int(knowledge_base_uuid)) - return self.success(code=0, msg='ok') + return self.success({}) - @self.route('//files', methods=['GET'], endpoint='get_knowledge_base_files') + @self.route( + '//files', + methods=['GET'], + endpoint='get_knowledge_base_files', + ) async def get_knowledge_base_files(knowledge_base_uuid: str) -> str: files = await self.ap.knowledge_base_service.get_files_by_knowledge_base(int(knowledge_base_uuid)) return self.success( - code=0, - data=[ - { - 'id': file.id, - 'file_name': file.file_name, - 'status': file.status, - } - for file in files - ], - msg='ok', + data={ + 'files': [ + { + 'id': file.id, + 'file_name': file.file_name, + 'status': file.status, + } + for file in files + ], + } ) - @self.route('//files/', methods=['DELETE'], endpoint='delete_specific_file_in_kb') + @self.route( + '//files/', + methods=['DELETE'], + endpoint='delete_specific_file_in_kb', + ) async def delete_specific_file_in_kb(file_id: str) -> str: await self.ap.knowledge_base_service.delete_data_by_file_id(int(file_id)) - return self.success(code=0, msg='ok') + return self.success({}) - @self.route('//files', methods=['POST'], endpoint='relate_file_with_kb') + @self.route( + '//files', + methods=['POST'], + endpoint='relate_file_with_kb', + ) async def relate_file_id_with_kb(knowledge_base_uuid: str, file_id: str) -> str: if 'file' not in quart.request.files: return self.http_status(400, -1, 'No file part in the request') @@ -80,4 +94,4 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): # 调用服务层方法将文件与知识库关联 await self.ap.knowledge_base_service.relate_file_id_with_kb(int(knowledge_base_uuid), int(file_id)) - return self.success(code=0, data={}, msg='ok') + return self.success({}) From 9ba1ad5bd38e48a3315f165ce88e73c3399b56fc Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Fri, 11 Jul 2025 16:38:08 +0800 Subject: [PATCH 042/257] fix: bugs --- .../http/controller/groups/knowledge/base.py | 14 +++--- pkg/entity/persistence/rag.py | 12 ++--- pkg/rag/knowledge/mgr.py | 47 +------------------ 3 files changed, 15 insertions(+), 58 deletions(-) diff --git a/pkg/api/http/controller/groups/knowledge/base.py b/pkg/api/http/controller/groups/knowledge/base.py index bfbbbe10..b5a48d29 100644 --- a/pkg/api/http/controller/groups/knowledge/base.py +++ b/pkg/api/http/controller/groups/knowledge/base.py @@ -14,17 +14,19 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): 'uuid': kb.id, 'name': kb.name, 'description': kb.description, + 'embedding_model_uuid': kb.embedding_model_uuid, + 'top_k': kb.top_k, } for kb in knowledge_bases ] return self.success(data={'bases': bases_list}) - # POST: create a new knowledge base - json_data = await quart.request.json - knowledge_base_uuid = await self.ap.knowledge_base_service.create_knowledge_base( - json_data.get('name'), json_data.get('description') - ) - return self.success(data={'uuid': knowledge_base_uuid}) + elif quart.request.method == 'POST': + json_data = await quart.request.json + knowledge_base_uuid = await self.ap.knowledge_base_service.create_knowledge_base( + json_data.get('name'), json_data.get('description'), json_data.get('embedding_model_uuid') + ) + return self.success(data={'uuid': knowledge_base_uuid}) @self.route( '/', diff --git a/pkg/entity/persistence/rag.py b/pkg/entity/persistence/rag.py index 175720f1..1657196a 100644 --- a/pkg/entity/persistence/rag.py +++ b/pkg/entity/persistence/rag.py @@ -5,13 +5,10 @@ import os Base = declarative_base() -DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./rag_knowledge.db") +DATABASE_URL = os.getenv('DATABASE_URL', 'sqlite:///./rag_knowledge.db') -engine = create_engine( - DATABASE_URL, - connect_args={"check_same_thread": False} -) +engine = create_engine(DATABASE_URL, connect_args={'check_same_thread': False}) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) @@ -20,7 +17,8 @@ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) def create_db_and_tables(): """Creates all database tables defined in the Base.""" Base.metadata.create_all(bind=engine) - print("Database tables created or already exist.") + print('Database tables created or already exist.') + class KnowledgeBase(Base): __tablename__ = 'kb' @@ -28,7 +26,7 @@ class KnowledgeBase(Base): name = Column(String, index=True) description = Column(Text) created_at = Column(DateTime, default=datetime.utcnow) - embedding_model = Column(String, default='') + embedding_model_uuid = Column(String, default='') top_k = Column(Integer, default=5) diff --git a/pkg/rag/knowledge/mgr.py b/pkg/rag/knowledge/mgr.py index 7d1787e0..5d4eece9 100644 --- a/pkg/rag/knowledge/mgr.py +++ b/pkg/rag/knowledge/mgr.py @@ -6,11 +6,7 @@ import asyncio import uuid from pkg.rag.knowledge.services.parser import FileParser from pkg.rag.knowledge.services.chunker import Chunker -from pkg.rag.knowledge.services.embedder import Embedder -from pkg.rag.knowledge.services.retriever import Retriever from pkg.rag.knowledge.services.database import create_db_and_tables, SessionLocal, KnowledgeBase, File, Chunk -from pkg.rag.knowledge.services.embedding_models import EmbeddingModelFactory -from pkg.rag.knowledge.services.chroma_manager import ChromaIndexManager from pkg.core import app @@ -20,8 +16,6 @@ class RAGManager: def __init__(self, ap: app.Application, logger: logging.Logger = None): self.ap = ap self.logger = logger or logging.getLogger(__name__) - self.embedding_model_type = None - self.embedding_model_name = None self.chroma_manager = None self.parser = FileParser() self.chunker = Chunker() @@ -32,50 +26,13 @@ class RAGManager: """Initializes the RAG system by creating database tables.""" await asyncio.to_thread(create_db_and_tables) - async def create_specific_model(self, embedding_model_type: str, embedding_model_name: str): - """ - Creates and configures the specific embedding model and ChromaDB manager. - This must be called before performing embedding or retrieval operations. - """ - self.embedding_model_type = embedding_model_type - self.embedding_model_name = embedding_model_name - - try: - model = EmbeddingModelFactory.create_model( - model_type=self.embedding_model_type, model_name_key=self.embedding_model_name - ) - self.logger.info( - f"Configured embedding model '{self.embedding_model_name}' has dimension: {model.embedding_dimension}" - ) - except Exception as e: - self.logger.critical( - f"Failed to get dimension for configured embedding model '{self.embedding_model_name}': {e}" - ) - raise RuntimeError('Failed to initialize RAG_Manager due to embedding model issues.') - - self.chroma_manager = ChromaIndexManager( - collection_name=f'rag_collection_{self.embedding_model_name.replace("-", "_")}' - ) - self.embedder = Embedder( - model_type=self.embedding_model_type, - model_name_key=self.embedding_model_name, - chroma_manager=self.chroma_manager, - ) - self.retriever = Retriever( - model_type=self.embedding_model_type, - model_name_key=self.embedding_model_name, - chroma_manager=self.chroma_manager, - ) - async def create_knowledge_base(self, kb_name: str, kb_description: str, embedding_model: str = '', top_k: int = 5): """ Creates a new knowledge base if it doesn't already exist. """ try: - if not self.embedding_model_type or not kb_name: - raise ValueError( - 'Embedding model type and knowledge base name must be set before creating a knowledge base.' - ) + if not kb_name: + raise ValueError('Knowledge base name must be set while creating.') def _create_kb_sync(): session = SessionLocal() From 7d5503dab201a44112b120e2bde6b47e3117f610 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Fri, 11 Jul 2025 16:49:55 +0800 Subject: [PATCH 043/257] fix: bug --- pkg/rag/knowledge/mgr.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/rag/knowledge/mgr.py b/pkg/rag/knowledge/mgr.py index 5d4eece9..4da10a09 100644 --- a/pkg/rag/knowledge/mgr.py +++ b/pkg/rag/knowledge/mgr.py @@ -26,7 +26,9 @@ class RAGManager: """Initializes the RAG system by creating database tables.""" await asyncio.to_thread(create_db_and_tables) - async def create_knowledge_base(self, kb_name: str, kb_description: str, embedding_model: str = '', top_k: int = 5): + async def create_knowledge_base( + self, kb_name: str, kb_description: str, embedding_model_uuid: str = '', top_k: int = 5 + ): """ Creates a new knowledge base if it doesn't already exist. """ @@ -43,7 +45,7 @@ class RAGManager: new_kb = KnowledgeBase( name=kb_name, description=kb_description, - embedding_model=embedding_model, + embedding_model_uuid=embedding_model_uuid, top_k=top_k, id=id, ) From 815cdf8b4acb25e429cf2b15acb90b408340f4a0 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Fri, 11 Jul 2025 17:22:43 +0800 Subject: [PATCH 044/257] feat: kb dialog action --- .../knowledge/components/kb-form/KBForm.tsx | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/web/src/app/home/knowledge/components/kb-form/KBForm.tsx b/web/src/app/home/knowledge/components/kb-form/KBForm.tsx index 9ae51656..0d4f0909 100644 --- a/web/src/app/home/knowledge/components/kb-form/KBForm.tsx +++ b/web/src/app/home/knowledge/components/kb-form/KBForm.tsx @@ -23,6 +23,7 @@ import { SelectTrigger, SelectValue, } from '@/components/ui/select'; +import { KnowledgeBase } from '@/app/infra/entities/api'; const getFormSchema = (t: (key: string) => string) => z.object({ @@ -81,11 +82,40 @@ export default function KBForm({ ); }; + const onSubmit = (data: z.infer) => { + console.log('data', data); + + if (initKbId) { + // update knowledge base + const updateKb: KnowledgeBase = { + name: data.name, + description: data.description, + embedding_model_uuid: data.embeddingModelUUID, + }; + } else { + // create knowledge base + const newKb: KnowledgeBase = { + name: data.name, + description: data.description, + embedding_model_uuid: data.embeddingModelUUID, + }; + httpClient + .createKnowledgeBase(newKb) + .then((res) => { + console.log('create knowledge base success', res); + onNewKbCreated(res.uuid); + }) + .catch((err) => { + console.error('create knowledge base failed', err); + }); + } + }; + return ( <>
From 14c161b73316e268e91b30f6705af9bec0652e6a Mon Sep 17 00:00:00 2001 From: WangCham <651122857@qq.com> Date: Fri, 11 Jul 2025 18:14:03 +0800 Subject: [PATCH 045/257] fix: create knwoledge base issue --- pkg/entity/persistence/rag.py | 26 ++++++++++---------------- pkg/rag/knowledge/mgr.py | 34 +++++++++++++++++----------------- 2 files changed, 27 insertions(+), 33 deletions(-) diff --git a/pkg/entity/persistence/rag.py b/pkg/entity/persistence/rag.py index 1657196a..95a78712 100644 --- a/pkg/entity/persistence/rag.py +++ b/pkg/entity/persistence/rag.py @@ -1,19 +1,17 @@ -from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, LargeBinary +from sqlalchemy import create_engine, Column, String, Text, DateTime, LargeBinary, Integer from sqlalchemy.orm import declarative_base, sessionmaker from datetime import datetime import os - Base = declarative_base() DATABASE_URL = os.getenv('DATABASE_URL', 'sqlite:///./rag_knowledge.db') +print("Using database URL:", DATABASE_URL) engine = create_engine(DATABASE_URL, connect_args={'check_same_thread': False}) - SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) - def create_db_and_tables(): """Creates all database tables defined in the Base.""" Base.metadata.create_all(bind=engine) @@ -22,35 +20,31 @@ def create_db_and_tables(): class KnowledgeBase(Base): __tablename__ = 'kb' - id = Column(Integer, primary_key=True, index=True) + id = Column(String, primary_key=True, index=True) name = Column(String, index=True) description = Column(Text) created_at = Column(DateTime, default=datetime.utcnow) embedding_model_uuid = Column(String, default='') top_k = Column(Integer, default=5) - class File(Base): __tablename__ = 'file' - id = Column(Integer, primary_key=True, index=True) - kb_id = Column(Integer, nullable=True) + id = Column(String, primary_key=True, index=True) + kb_id = Column(String, nullable=True) file_name = Column(String) path = Column(String) created_at = Column(DateTime, default=datetime.utcnow) file_type = Column(String) - status = Column(Integer, default=0) - + status = Column(String, default='0') class Chunk(Base): __tablename__ = 'chunks' - id = Column(Integer, primary_key=True, index=True) - file_id = Column(Integer, nullable=True) - + id = Column(String, primary_key=True, index=True) + file_id = Column(String, nullable=True) text = Column(Text) - class Vector(Base): __tablename__ = 'vectors' - id = Column(Integer, primary_key=True, index=True) - chunk_id = Column(Integer, nullable=True) + id = Column(String, primary_key=True, index=True) + chunk_id = Column(String, nullable=True) embedding = Column(LargeBinary) diff --git a/pkg/rag/knowledge/mgr.py b/pkg/rag/knowledge/mgr.py index 4da10a09..585a5075 100644 --- a/pkg/rag/knowledge/mgr.py +++ b/pkg/rag/knowledge/mgr.py @@ -41,7 +41,7 @@ class RAGManager: try: kb = session.query(KnowledgeBase).filter_by(name=kb_name).first() if not kb: - id = uuid.uuid4().int + id = str(uuid.uuid4()) new_kb = KnowledgeBase( name=kb_name, description=kb_description, @@ -86,7 +86,7 @@ class RAGManager: self.logger.error(f'Error retrieving knowledge bases: {str(e)}', exc_info=True) return [] - async def get_knowledge_base_by_id(self, kb_id: int): + async def get_knowledge_base_by_id(self, kb_id: str): """ Retrieves a specific knowledge base by its ID. """ @@ -104,7 +104,7 @@ class RAGManager: self.logger.error(f'Error retrieving knowledge base with ID {kb_id}: {str(e)}', exc_info=True) return None - async def get_files_by_knowledge_base(self, kb_id: int): + async def get_files_by_knowledge_base(self, kb_id: str): """ Retrieves files associated with a specific knowledge base by querying the File table directly. """ @@ -153,7 +153,7 @@ class RAGManager: file_obj = None try: - # 1. 确保知识库存在或创建它 + kb = session.query(KnowledgeBase).filter_by(name=kb_name).first() if not kb: kb = KnowledgeBase(name=kb_name, description=kb_description) @@ -164,7 +164,7 @@ class RAGManager: else: self.logger.info(f"Knowledge Base '{kb_name}' already exists.") - # 2. 添加文件记录到数据库,并直接关联 kb_id + file_name = os.path.basename(file_path) existing_file = session.query(File).filter_by(kb_id=kb.id, file_name=file_name).first() if existing_file: @@ -181,15 +181,15 @@ class RAGManager: f"File record '{file_name}' added to database with ID: {file_obj.id}, associated with KB ID: {kb.id}" ) - # 3. 解析文件内容 + text = await self.parser.parse(file_path) if not text: self.logger.warning(f'No text extracted from file {file_path}. Deleting file record ID: {file_obj.id}.') session.delete(file_obj) - session.commit() # 提交删除操作 + session.commit() return - # 4. 分块并嵌入/存储块 + chunks_texts = await self.chunker.chunk(text) self.logger.info(f"Chunked file '{file_name}' into {len(chunks_texts)} chunks.") await self.embedder.embed_and_store(file_id=file_obj.id, chunks=chunks_texts) @@ -222,7 +222,7 @@ class RAGManager: self.logger.error(f"Error in retrieve_data for query '{query}': {str(e)}", exc_info=True) return [] - async def delete_data_by_file_id(self, file_id: int): + async def delete_data_by_file_id(self, file_id: str): """ Deletes all data associated with a specific file ID, including its chunks and vectors, and the file record itself. @@ -257,13 +257,13 @@ class RAGManager: finally: session.close() - async def delete_kb_by_id(self, kb_id: int): + async def delete_kb_by_id(self, kb_id: str): """ Deletes a knowledge base and all associated files, chunks, and vectors. This involves querying for associated files and then deleting them. """ self.logger.info(f'Starting deletion of knowledge base with ID: {kb_id}') - session = SessionLocal() # 使用新的会话来获取 KB 和关联文件 + session = SessionLocal() try: kb_to_delete = session.query(KnowledgeBase).filter_by(id=kb_id).first() @@ -271,24 +271,24 @@ class RAGManager: self.logger.warning(f'Knowledge Base with ID {kb_id} not found.') return - # 获取所有关联的文件,通过 File 表的 kb_id 字段查询 + files_to_delete = session.query(File).filter_by(kb_id=kb_id).all() - # 关闭当前会话,因为 delete_data_by_file_id 会创建自己的会话 + session.close() - # 遍历删除每个关联文件及其数据 + for file_obj in files_to_delete: try: await self.delete_data_by_file_id(file_obj.id) except Exception as file_del_e: self.logger.error(f'Failed to delete file ID {file_obj.id} during KB deletion: {file_del_e}') - # 记录错误但继续,尝试删除其他文件 + - # 所有文件删除完毕后,重新打开会话来删除 KnowledgeBase 本身 + session = SessionLocal() try: - # 重新查询,确保对象是当前会话的一部分 + kb_final_delete = session.query(KnowledgeBase).filter_by(id=kb_id).first() if kb_final_delete: session.delete(kb_final_delete) From bd9331ce62f8aea92dbe63757b8c38696464ff87 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Fri, 11 Jul 2025 20:57:09 +0800 Subject: [PATCH 046/257] fix: kb get api format --- .../http/controller/groups/knowledge/base.py | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/pkg/api/http/controller/groups/knowledge/base.py b/pkg/api/http/controller/groups/knowledge/base.py index b5a48d29..70cf2b0c 100644 --- a/pkg/api/http/controller/groups/knowledge/base.py +++ b/pkg/api/http/controller/groups/knowledge/base.py @@ -24,7 +24,9 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): elif quart.request.method == 'POST': json_data = await quart.request.json knowledge_base_uuid = await self.ap.knowledge_base_service.create_knowledge_base( - json_data.get('name'), json_data.get('description'), json_data.get('embedding_model_uuid') + json_data.get('name'), + json_data.get('description'), + json_data.get('embedding_model_uuid'), ) return self.success(data={'uuid': knowledge_base_uuid}) @@ -35,20 +37,22 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): ) async def handle_specific_knowledge_base(knowledge_base_uuid: str) -> str: if quart.request.method == 'GET': - knowledge_base = await self.ap.knowledge_base_service.get_knowledge_base_by_id(int(knowledge_base_uuid)) + knowledge_base = await self.ap.knowledge_base_service.get_knowledge_base_by_id(knowledge_base_uuid) if knowledge_base is None: return self.http_status(404, -1, 'knowledge base not found') return self.success( data={ - 'name': knowledge_base.name, - 'description': knowledge_base.description, - 'uuid': knowledge_base.id, - }, + 'base': { + 'name': knowledge_base.name, + 'description': knowledge_base.description, + 'uuid': knowledge_base.id, + }, + } ) elif quart.request.method == 'DELETE': - await self.ap.knowledge_base_service.delete_kb_by_id(int(knowledge_base_uuid)) + await self.ap.knowledge_base_service.delete_kb_by_id(knowledge_base_uuid) return self.success({}) @self.route( @@ -57,7 +61,7 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): endpoint='get_knowledge_base_files', ) async def get_knowledge_base_files(knowledge_base_uuid: str) -> str: - files = await self.ap.knowledge_base_service.get_files_by_knowledge_base(int(knowledge_base_uuid)) + files = await self.ap.knowledge_base_service.get_files_by_knowledge_base(knowledge_base_uuid) return self.success( data={ 'files': [ @@ -77,7 +81,7 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): endpoint='delete_specific_file_in_kb', ) async def delete_specific_file_in_kb(file_id: str) -> str: - await self.ap.knowledge_base_service.delete_data_by_file_id(int(file_id)) + await self.ap.knowledge_base_service.delete_data_by_file_id(file_id) return self.success({}) @self.route( @@ -95,5 +99,5 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): return self.http_status(400, -1, 'File ID is required') # 调用服务层方法将文件与知识库关联 - await self.ap.knowledge_base_service.relate_file_id_with_kb(int(knowledge_base_uuid), int(file_id)) + await self.ap.knowledge_base_service.relate_file_id_with_kb(knowledge_base_uuid, file_id) return self.success({}) From 2ed3b687904feaa48ee27faf6793a91486ca3b31 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Fri, 11 Jul 2025 20:58:51 +0800 Subject: [PATCH 047/257] fix: kb get api not contains model uuid --- pkg/api/http/controller/groups/knowledge/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/api/http/controller/groups/knowledge/base.py b/pkg/api/http/controller/groups/knowledge/base.py index 70cf2b0c..594fe7bf 100644 --- a/pkg/api/http/controller/groups/knowledge/base.py +++ b/pkg/api/http/controller/groups/knowledge/base.py @@ -48,6 +48,8 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): 'name': knowledge_base.name, 'description': knowledge_base.description, 'uuid': knowledge_base.id, + 'embedding_model_uuid': knowledge_base.embedding_model_uuid, + 'top_k': knowledge_base.top_k, }, } ) From a79a22a74d0ea19b04c23f0bc5f9a25657012f9d Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Fri, 11 Jul 2025 21:30:47 +0800 Subject: [PATCH 048/257] fix: api bug --- .../http/controller/groups/knowledge/base.py | 57 ++++++++----------- 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/pkg/api/http/controller/groups/knowledge/base.py b/pkg/api/http/controller/groups/knowledge/base.py index 594fe7bf..b3fd50ea 100644 --- a/pkg/api/http/controller/groups/knowledge/base.py +++ b/pkg/api/http/controller/groups/knowledge/base.py @@ -59,23 +59,34 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): @self.route( '//files', - methods=['GET'], + methods=['GET', 'POST'], endpoint='get_knowledge_base_files', ) async def get_knowledge_base_files(knowledge_base_uuid: str) -> str: - files = await self.ap.knowledge_base_service.get_files_by_knowledge_base(knowledge_base_uuid) - return self.success( - data={ - 'files': [ - { - 'id': file.id, - 'file_name': file.file_name, - 'status': file.status, - } - for file in files - ], - } - ) + if quart.request.method == 'GET': + files = await self.ap.knowledge_base_service.get_files_by_knowledge_base(knowledge_base_uuid) + return self.success( + data={ + 'files': [ + { + 'id': file.id, + 'file_name': file.file_name, + 'status': file.status, + } + for file in files + ], + } + ) + + elif quart.request.method == 'POST': + json_data = await quart.request.json + file_id = json_data.get('file_id') + if not file_id: + return self.http_status(400, -1, 'File ID is required') + + # 调用服务层方法将文件与知识库关联 + await self.ap.knowledge_base_service.relate_file_id_with_kb(knowledge_base_uuid, file_id) + return self.success({}) @self.route( '//files/', @@ -85,21 +96,3 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): async def delete_specific_file_in_kb(file_id: str) -> str: await self.ap.knowledge_base_service.delete_data_by_file_id(file_id) return self.success({}) - - @self.route( - '//files', - methods=['POST'], - endpoint='relate_file_with_kb', - ) - async def relate_file_id_with_kb(knowledge_base_uuid: str, file_id: str) -> str: - if 'file' not in quart.request.files: - return self.http_status(400, -1, 'No file part in the request') - - json_data = await quart.request.json - file_id = json_data.get('file_id') - if not file_id: - return self.http_status(400, -1, 'File ID is required') - - # 调用服务层方法将文件与知识库关联 - await self.ap.knowledge_base_service.relate_file_id_with_kb(knowledge_base_uuid, file_id) - return self.success({}) From 6d788cadbc24348355810e7c7ae2d0b58f2e6124 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Fri, 11 Jul 2025 21:37:31 +0800 Subject: [PATCH 049/257] fix: the fucking logger --- pkg/rag/knowledge/mgr.py | 102 ++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 54 deletions(-) diff --git a/pkg/rag/knowledge/mgr.py b/pkg/rag/knowledge/mgr.py index 585a5075..89e5b393 100644 --- a/pkg/rag/knowledge/mgr.py +++ b/pkg/rag/knowledge/mgr.py @@ -1,6 +1,5 @@ # rag_manager.py from __future__ import annotations -import logging import os import asyncio import uuid @@ -13,9 +12,8 @@ from pkg.core import app class RAGManager: ap: app.Application - def __init__(self, ap: app.Application, logger: logging.Logger = None): + def __init__(self, ap: app.Application): self.ap = ap - self.logger = logger or logging.getLogger(__name__) self.chroma_manager = None self.parser = FileParser() self.chunker = Chunker() @@ -52,20 +50,20 @@ class RAGManager: session.add(new_kb) session.commit() session.refresh(new_kb) - self.logger.info(f"Knowledge Base '{kb_name}' created.") + self.ap.logger.info(f"Knowledge Base '{kb_name}' created.") return new_kb.id else: - self.logger.info(f"Knowledge Base '{kb_name}' already exists.") + self.ap.logger.info(f"Knowledge Base '{kb_name}' already exists.") except Exception as e: session.rollback() - self.logger.error(f"Error in _create_kb_sync for '{kb_name}': {str(e)}", exc_info=True) + self.ap.logger.error(f"Error in _create_kb_sync for '{kb_name}': {str(e)}", exc_info=True) raise finally: session.close() return await asyncio.to_thread(_create_kb_sync) except Exception as e: - self.logger.error(f"Error creating knowledge base '{kb_name}': {str(e)}", exc_info=True) + self.ap.logger.error(f"Error creating knowledge base '{kb_name}': {str(e)}", exc_info=True) raise async def get_all_knowledge_bases(self): @@ -83,7 +81,7 @@ class RAGManager: return await asyncio.to_thread(_get_all_kbs_sync) except Exception as e: - self.logger.error(f'Error retrieving knowledge bases: {str(e)}', exc_info=True) + self.ap.logger.error(f'Error retrieving knowledge bases: {str(e)}', exc_info=True) return [] async def get_knowledge_base_by_id(self, kb_id: str): @@ -101,7 +99,7 @@ class RAGManager: return await asyncio.to_thread(_get_kb_sync, kb_id) except Exception as e: - self.logger.error(f'Error retrieving knowledge base with ID {kb_id}: {str(e)}', exc_info=True) + self.ap.logger.error(f'Error retrieving knowledge base with ID {kb_id}: {str(e)}', exc_info=True) return None async def get_files_by_knowledge_base(self, kb_id: str): @@ -119,7 +117,7 @@ class RAGManager: return await asyncio.to_thread(_get_files_sync, kb_id) except Exception as e: - self.logger.error(f'Error retrieving files for knowledge base ID {kb_id}: {str(e)}', exc_info=True) + self.ap.logger.error(f'Error retrieving files for knowledge base ID {kb_id}: {str(e)}', exc_info=True) return [] async def get_all_files(self): @@ -138,7 +136,7 @@ class RAGManager: return await asyncio.to_thread(_get_all_files_sync) except Exception as e: - self.logger.error(f'Error retrieving all files: {str(e)}', exc_info=True) + self.ap.logger.error(f'Error retrieving all files: {str(e)}', exc_info=True) return [] async def store_data( @@ -148,27 +146,25 @@ class RAGManager: Parses, chunks, embeds, and stores data from a given file into the RAG system. Associates the file with a knowledge base using kb_id in the File table. """ - self.logger.info(f'Starting data storage process for file: {file_path}') + self.ap.logger.info(f'Starting data storage process for file: {file_path}') session = SessionLocal() file_obj = None try: - kb = session.query(KnowledgeBase).filter_by(name=kb_name).first() if not kb: kb = KnowledgeBase(name=kb_name, description=kb_description) session.add(kb) session.commit() session.refresh(kb) - self.logger.info(f"Knowledge Base '{kb_name}' created during store_data.") + self.ap.logger.info(f"Knowledge Base '{kb_name}' created during store_data.") else: - self.logger.info(f"Knowledge Base '{kb_name}' already exists.") + self.ap.logger.info(f"Knowledge Base '{kb_name}' already exists.") - file_name = os.path.basename(file_path) existing_file = session.query(File).filter_by(kb_id=kb.id, file_name=file_name).first() if existing_file: - self.logger.warning( + self.ap.logger.warning( f"File '{file_name}' already exists in knowledge base '{kb_name}'. Skipping storage." ) return @@ -177,32 +173,32 @@ class RAGManager: session.add(file_obj) session.commit() session.refresh(file_obj) - self.logger.info( + self.ap.logger.info( f"File record '{file_name}' added to database with ID: {file_obj.id}, associated with KB ID: {kb.id}" ) - text = await self.parser.parse(file_path) if not text: - self.logger.warning(f'No text extracted from file {file_path}. Deleting file record ID: {file_obj.id}.') + self.ap.logger.warning( + f'No text extracted from file {file_path}. Deleting file record ID: {file_obj.id}.' + ) session.delete(file_obj) session.commit() return - chunks_texts = await self.chunker.chunk(text) - self.logger.info(f"Chunked file '{file_name}' into {len(chunks_texts)} chunks.") + self.ap.logger.info(f"Chunked file '{file_name}' into {len(chunks_texts)} chunks.") await self.embedder.embed_and_store(file_id=file_obj.id, chunks=chunks_texts) - self.logger.info(f'Data storage process completed for file: {file_path}') + self.ap.logger.info(f'Data storage process completed for file: {file_path}') except Exception as e: session.rollback() - self.logger.error(f'Error in store_data for file {file_path}: {str(e)}', exc_info=True) + self.ap.logger.error(f'Error in store_data for file {file_path}: {str(e)}', exc_info=True) if file_obj and file_obj.id: try: await asyncio.to_thread(self.chroma_manager.delete_by_file_id_sync, file_obj.id) except Exception as chroma_e: - self.logger.warning( + self.ap.logger.warning( f'Could not clean up ChromaDB entries for file_id {file_obj.id} after store_data failure: {chroma_e}' ) raise @@ -213,13 +209,13 @@ class RAGManager: """ Retrieves relevant data chunks based on a given query using the configured retriever. """ - self.logger.info(f"Starting data retrieval process for query: '{query}'") + self.ap.logger.info(f"Starting data retrieval process for query: '{query}'") try: retrieved_chunks = await self.retriever.retrieve(query) - self.logger.info(f'Successfully retrieved {len(retrieved_chunks)} chunks for query.') + self.ap.logger.info(f'Successfully retrieved {len(retrieved_chunks)} chunks for query.') return retrieved_chunks except Exception as e: - self.logger.error(f"Error in retrieve_data for query '{query}': {str(e)}", exc_info=True) + self.ap.logger.error(f"Error in retrieve_data for query '{query}': {str(e)}", exc_info=True) return [] async def delete_data_by_file_id(self, file_id: str): @@ -227,32 +223,34 @@ class RAGManager: Deletes all data associated with a specific file ID, including its chunks and vectors, and the file record itself. """ - self.logger.info(f'Starting data deletion process for file_id: {file_id}') + self.ap.logger.info(f'Starting data deletion process for file_id: {file_id}') session = SessionLocal() try: # 1. 从 ChromaDB 删除 embeddings await asyncio.to_thread(self.chroma_manager.delete_by_file_id_sync, file_id) - self.logger.info(f'Deleted embeddings from ChromaDB for file_id: {file_id}') + self.ap.logger.info(f'Deleted embeddings from ChromaDB for file_id: {file_id}') # 2. 删除与文件关联的 chunks 记录 chunks_to_delete = session.query(Chunk).filter_by(file_id=file_id).all() for chunk in chunks_to_delete: session.delete(chunk) - self.logger.info(f'Deleted {len(chunks_to_delete)} chunk records for file_id: {file_id}') + self.ap.logger.info(f'Deleted {len(chunks_to_delete)} chunk records for file_id: {file_id}') # 3. 删除文件记录本身 file_to_delete = session.query(File).filter_by(id=file_id).first() if file_to_delete: session.delete(file_to_delete) - self.logger.info(f'Deleted file record for file_id: {file_id}') + self.ap.logger.info(f'Deleted file record for file_id: {file_id}') else: - self.logger.warning(f'File with ID {file_id} not found in database. Skipping deletion of file record.') + self.ap.logger.warning( + f'File with ID {file_id} not found in database. Skipping deletion of file record.' + ) session.commit() - self.logger.info(f'Successfully completed data deletion for file_id: {file_id}') + self.ap.logger.info(f'Successfully completed data deletion for file_id: {file_id}') except Exception as e: session.rollback() - self.logger.error(f'Error deleting data for file_id {file_id}: {str(e)}', exc_info=True) + self.ap.logger.error(f'Error deleting data for file_id {file_id}: {str(e)}', exc_info=True) raise finally: session.close() @@ -262,45 +260,39 @@ class RAGManager: Deletes a knowledge base and all associated files, chunks, and vectors. This involves querying for associated files and then deleting them. """ - self.logger.info(f'Starting deletion of knowledge base with ID: {kb_id}') - session = SessionLocal() + self.ap.logger.info(f'Starting deletion of knowledge base with ID: {kb_id}') + session = SessionLocal() try: kb_to_delete = session.query(KnowledgeBase).filter_by(id=kb_id).first() if not kb_to_delete: - self.logger.warning(f'Knowledge Base with ID {kb_id} not found.') + self.ap.logger.warning(f'Knowledge Base with ID {kb_id} not found.') return - files_to_delete = session.query(File).filter_by(kb_id=kb_id).all() - session.close() - for file_obj in files_to_delete: try: await self.delete_data_by_file_id(file_obj.id) except Exception as file_del_e: - self.logger.error(f'Failed to delete file ID {file_obj.id} during KB deletion: {file_del_e}') - + self.ap.logger.error(f'Failed to delete file ID {file_obj.id} during KB deletion: {file_del_e}') - session = SessionLocal() try: - kb_final_delete = session.query(KnowledgeBase).filter_by(id=kb_id).first() if kb_final_delete: session.delete(kb_final_delete) session.commit() - self.logger.info(f'Successfully deleted knowledge base with ID: {kb_id}') + self.ap.logger.info(f'Successfully deleted knowledge base with ID: {kb_id}') else: - self.logger.warning( + self.ap.logger.warning( f'Knowledge Base with ID {kb_id} not found after file deletion, skipping KB deletion.' ) except Exception as kb_del_e: session.rollback() - self.logger.error(f'Error deleting KnowledgeBase record for ID {kb_id}: {kb_del_e}', exc_info=True) + self.ap.logger.error(f'Error deleting KnowledgeBase record for ID {kb_id}: {kb_del_e}', exc_info=True) raise finally: session.close() @@ -309,7 +301,9 @@ class RAGManager: # 如果在最初获取 KB 或文件列表时出错 if session.is_active: session.rollback() - self.logger.error(f'Error during overall knowledge base deletion for ID {kb_id}: {str(e)}', exc_info=True) + self.ap.logger.error( + f'Error during overall knowledge base deletion for ID {kb_id}: {str(e)}', exc_info=True + ) raise finally: if session.is_active: @@ -335,29 +329,29 @@ class RAGManager: """ Associates a file with a knowledge base by updating the kb_id in the File table. """ - self.logger.info(f'Associating file ID {file_id} with knowledge base UUID {knowledge_base_uuid}') + self.ap.logger.info(f'Associating file ID {file_id} with knowledge base UUID {knowledge_base_uuid}') session = SessionLocal() try: # 查询知识库是否存在 kb = session.query(KnowledgeBase).filter_by(id=knowledge_base_uuid).first() if not kb: - self.logger.error(f'Knowledge Base with UUID {knowledge_base_uuid} not found.') + self.ap.logger.error(f'Knowledge Base with UUID {knowledge_base_uuid} not found.') return # 更新文件的 kb_id file_to_update = session.query(File).filter_by(id=file_id).first() if not file_to_update: - self.logger.error(f'File with ID {file_id} not found.') + self.ap.logger.error(f'File with ID {file_id} not found.') return file_to_update.kb_id = kb.id session.commit() - self.logger.info( + self.ap.logger.info( f'Successfully associated file ID {file_id} with knowledge base UUID {knowledge_base_uuid}' ) except Exception as e: session.rollback() - self.logger.error( + self.ap.logger.error( f'Error associating file ID {file_id} with knowledge base UUID {knowledge_base_uuid}: {str(e)}', exc_info=True, ) From fe122281fdaa056386098daeffe999eb1cf0325a Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Fri, 11 Jul 2025 21:40:42 +0800 Subject: [PATCH 050/257] feat(fe): component for available apis --- .../home/bots/components/bot-form/BotForm.tsx | 1 + web/src/app/home/knowledge/KBDetailDialog.tsx | 4 +++- .../knowledge/components/kb-docs/KBDoc.tsx | 5 ++++ .../kb-docs/doc-card/DocumentCard.tsx | 9 +++++++ .../knowledge/components/kb-form/KBForm.tsx | 24 ++++++++++++++++++- 5 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 web/src/app/home/knowledge/components/kb-docs/doc-card/DocumentCard.tsx diff --git a/web/src/app/home/bots/components/bot-form/BotForm.tsx b/web/src/app/home/bots/components/bot-form/BotForm.tsx index fe36d33b..e4b6d40e 100644 --- a/web/src/app/home/bots/components/bot-form/BotForm.tsx +++ b/web/src/app/home/bots/components/bot-form/BotForm.tsx @@ -212,6 +212,7 @@ export default function BotForm({ }); setAdapterNameToDynamicConfigMap(adapterNameToDynamicConfigMap); } + async function getBotConfig( botId: string, ): Promise> { diff --git a/web/src/app/home/knowledge/KBDetailDialog.tsx b/web/src/app/home/knowledge/KBDetailDialog.tsx index e3ab4f9d..d31306bd 100644 --- a/web/src/app/home/knowledge/KBDetailDialog.tsx +++ b/web/src/app/home/knowledge/KBDetailDialog.tsx @@ -24,6 +24,7 @@ import { z } from 'zod'; // import { httpClient } from '@/app/infra/http/HttpClient'; // import { KnowledgeBase } from '@/app/infra/entities/api'; import KBForm from '@/app/home/knowledge/components/kb-form/KBForm'; +import KBDoc from '@/app/home/knowledge/components/kb-docs/KBDoc'; interface KBDetailDialogProps { open: boolean; @@ -48,6 +49,7 @@ export default function KBDetailDialog({ const { t } = useTranslation(); const [kbId, setKbId] = useState(propKbId); const [activeMenu, setActiveMenu] = useState('metadata'); + const [fileId, setFileId] = useState(undefined); // const [showDeleteConfirm, setShowDeleteConfirm] = useState(false); useEffect(() => { @@ -177,7 +179,7 @@ export default function KBDetailDialog({ onNewKbCreated={onNewKbCreated} /> )} - {activeMenu === 'documents' &&
documents
} + {activeMenu === 'documents' && }
{activeMenu === 'metadata' && ( diff --git a/web/src/app/home/knowledge/components/kb-docs/KBDoc.tsx b/web/src/app/home/knowledge/components/kb-docs/KBDoc.tsx index e69de29b..5cc9a850 100644 --- a/web/src/app/home/knowledge/components/kb-docs/KBDoc.tsx +++ b/web/src/app/home/knowledge/components/kb-docs/KBDoc.tsx @@ -0,0 +1,5 @@ +import { useEffect, useState } from 'react'; + +export default function KBDoc({ kbId }: { kbId: string }) { + return
Documents
; +} diff --git a/web/src/app/home/knowledge/components/kb-docs/doc-card/DocumentCard.tsx b/web/src/app/home/knowledge/components/kb-docs/doc-card/DocumentCard.tsx new file mode 100644 index 00000000..23a884ba --- /dev/null +++ b/web/src/app/home/knowledge/components/kb-docs/doc-card/DocumentCard.tsx @@ -0,0 +1,9 @@ +export default function DocumentCard({ + kbId, + fileId, +}: { + kbId: string; + fileId: string; +}) { + return
; +} diff --git a/web/src/app/home/knowledge/components/kb-form/KBForm.tsx b/web/src/app/home/knowledge/components/kb-form/KBForm.tsx index 0d4f0909..b56c327b 100644 --- a/web/src/app/home/knowledge/components/kb-form/KBForm.tsx +++ b/web/src/app/home/knowledge/components/kb-form/KBForm.tsx @@ -67,9 +67,31 @@ export default function KBForm({ >([]); useEffect(() => { - getEmbeddingModelNameList(); + getEmbeddingModelNameList().then(() => { + if (initKbId) { + getKbConfig(initKbId).then((val) => { + form.setValue('name', val.name); + form.setValue('description', val.description); + form.setValue('embeddingModelUUID', val.embeddingModelUUID); + }); + } + }); }, []); + const getKbConfig = async ( + kbId: string, + ): Promise> => { + return new Promise((resolve, reject) => { + httpClient.getKnowledgeBase(kbId).then((res) => { + resolve({ + name: res.base.name, + description: res.base.description, + embeddingModelUUID: res.base.embedding_model_uuid, + }); + }); + }); + }; + const getEmbeddingModelNameList = async () => { const resp = await httpClient.getProviderEmbeddingModels(); setEmbeddingModelNameList( From f395cac893a3e416b93f25f1a9af1c877514b0ea Mon Sep 17 00:00:00 2001 From: WangCham <651122857@qq.com> Date: Sat, 12 Jul 2025 01:07:49 +0800 Subject: [PATCH 051/257] fix: embbeding and chunking --- .gitignore | 3 +- pkg/entity/persistence/rag.py | 5 +- pkg/rag/knowledge/mgr.py | 84 ++++++++++++-------------- pkg/rag/knowledge/services/chunker.py | 41 ++++++------- pkg/rag/knowledge/services/embedder.py | 2 +- 5 files changed, 64 insertions(+), 71 deletions(-) diff --git a/.gitignore b/.gitignore index 2869b7cc..db62bdca 100644 --- a/.gitignore +++ b/.gitignore @@ -42,4 +42,5 @@ botpy.log* test.py /web_ui .venv/ -uv.lock \ No newline at end of file +uv.lock +/test \ No newline at end of file diff --git a/pkg/entity/persistence/rag.py b/pkg/entity/persistence/rag.py index 95a78712..9ca84741 100644 --- a/pkg/entity/persistence/rag.py +++ b/pkg/entity/persistence/rag.py @@ -2,6 +2,7 @@ from sqlalchemy import create_engine, Column, String, Text, DateTime, LargeBinar from sqlalchemy.orm import declarative_base, sessionmaker from datetime import datetime import os +import uuid Base = declarative_base() DATABASE_URL = os.getenv('DATABASE_URL', 'sqlite:///./rag_knowledge.db') @@ -35,11 +36,11 @@ class File(Base): path = Column(String) created_at = Column(DateTime, default=datetime.utcnow) file_type = Column(String) - status = Column(String, default='0') + status = Column(Integer, default=0) # 0: uploaded and processing, 1: completed, 2: failed class Chunk(Base): __tablename__ = 'chunks' - id = Column(String, primary_key=True, index=True) + id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) file_id = Column(String, nullable=True) text = Column(Text) diff --git a/pkg/rag/knowledge/mgr.py b/pkg/rag/knowledge/mgr.py index 89e5b393..09023d03 100644 --- a/pkg/rag/knowledge/mgr.py +++ b/pkg/rag/knowledge/mgr.py @@ -7,6 +7,9 @@ from pkg.rag.knowledge.services.parser import FileParser from pkg.rag.knowledge.services.chunker import Chunker from pkg.rag.knowledge.services.database import create_db_and_tables, SessionLocal, KnowledgeBase, File, Chunk from pkg.core import app +from pkg.rag.knowledge.services.embedder import Embedder +from pkg.rag.knowledge.services.retriever import Retriever +from pkg.rag.knowledge.services.chroma_manager import ChromaIndexManager class RAGManager: @@ -14,11 +17,12 @@ class RAGManager: def __init__(self, ap: app.Application): self.ap = ap - self.chroma_manager = None + self.chroma_manager = ChromaIndexManager() self.parser = FileParser() self.chunker = Chunker() - self.embedder = None - self.retriever = None + # Initialize Embedder with targeted model type and name + self.embedder = Embedder(model_type='third_party_api', model_name_key='bge-m3', chroma_manager=self.chroma_manager) + self.retriever = Retriever(model_type='third_party_api', model_name_key='bge-m3', chroma_manager=self.chroma_manager) async def initialize_rag_system(self): """Initializes the RAG system by creating database tables.""" @@ -140,7 +144,7 @@ class RAGManager: return [] async def store_data( - self, file_path: str, kb_name: str, file_type: str, kb_description: str = 'Default knowledge base' + self, file_path: str, kb_id: str, file_type: str, file_id: str = None ): """ Parses, chunks, embeds, and stores data from a given file into the RAG system. @@ -151,58 +155,35 @@ class RAGManager: file_obj = None try: - kb = session.query(KnowledgeBase).filter_by(name=kb_name).first() + kb = session.query(KnowledgeBase).filter_by(id=kb_id).first() if not kb: - kb = KnowledgeBase(name=kb_name, description=kb_description) - session.add(kb) - session.commit() - session.refresh(kb) - self.ap.logger.info(f"Knowledge Base '{kb_name}' created during store_data.") + self.ap.logger.info(f'Knowledge Base "{kb_id}" does not exist. ') + self.ap.logger.info(f'Created Knowledge Base with ID: {kb_id}') else: - self.ap.logger.info(f"Knowledge Base '{kb_name}' already exists.") + self.ap.logger.info(f"Knowledge Base '{kb_id}' already exists.") file_name = os.path.basename(file_path) - existing_file = session.query(File).filter_by(kb_id=kb.id, file_name=file_name).first() - if existing_file: - self.ap.logger.warning( - f"File '{file_name}' already exists in knowledge base '{kb_name}'. Skipping storage." - ) - return - - file_obj = File(kb_id=kb.id, file_name=file_name, path=file_path, file_type=file_type) - session.add(file_obj) - session.commit() - session.refresh(file_obj) - self.ap.logger.info( - f"File record '{file_name}' added to database with ID: {file_obj.id}, associated with KB ID: {kb.id}" - ) - text = await self.parser.parse(file_path) if not text: self.ap.logger.warning( - f'No text extracted from file {file_path}. Deleting file record ID: {file_obj.id}.' + f'No text extracted from file {file_path}. ' ) - session.delete(file_obj) - session.commit() return chunks_texts = await self.chunker.chunk(text) self.ap.logger.info(f"Chunked file '{file_name}' into {len(chunks_texts)} chunks.") - await self.embedder.embed_and_store(file_id=file_obj.id, chunks=chunks_texts) + await self.embedder.embed_and_store(file_id=file_id, chunks=chunks_texts) self.ap.logger.info(f'Data storage process completed for file: {file_path}') except Exception as e: session.rollback() self.ap.logger.error(f'Error in store_data for file {file_path}: {str(e)}', exc_info=True) - if file_obj and file_obj.id: - try: - await asyncio.to_thread(self.chroma_manager.delete_by_file_id_sync, file_obj.id) - except Exception as chroma_e: - self.ap.logger.warning( - f'Could not clean up ChromaDB entries for file_id {file_obj.id} after store_data failure: {chroma_e}' - ) raise finally: + if file_id: + file_obj = session.query(File).filter_by(id=file_id).first() + if file_obj: + file_obj.status = 1 session.close() async def retrieve_data(self, query: str): @@ -245,7 +226,6 @@ class RAGManager: self.ap.logger.warning( f'File with ID {file_id} not found in database. Skipping deletion of file record.' ) - session.commit() self.ap.logger.info(f'Successfully completed data deletion for file_id: {file_id}') except Exception as e: @@ -338,13 +318,13 @@ class RAGManager: self.ap.logger.error(f'Knowledge Base with UUID {knowledge_base_uuid} not found.') return - # 更新文件的 kb_id - file_to_update = session.query(File).filter_by(id=file_id).first() - if not file_to_update: - self.ap.logger.error(f'File with ID {file_id} not found.') + if not self.ap.storage_mgr.storage_provider.exists(file_id): + self.ap.logger.error(f'File with ID {file_id} does not exist.') return - - file_to_update.kb_id = kb.id + self.ap.logger.info(f'File with ID {file_id} exists, proceeding with association.') + # add new file record + file_to_update = File(id=file_id, kb_id=kb.id) + session.add(file_to_update) session.commit() self.ap.logger.info( f'Successfully associated file ID {file_id} with knowledge base UUID {knowledge_base_uuid}' @@ -356,4 +336,20 @@ class RAGManager: exc_info=True, ) finally: + # 进行文件解析 + try: + await self.store_data( + file_path = os.path.join('data', 'storage', file_id), + kb_id=knowledge_base_uuid, + file_type=os.path.splitext(file_id)[1].lstrip('.'), + file_id=file_id + ) + except Exception as store_e: + # 如果存储数据时出错,更新文件状态为失败 + file_obj = session.query(File).filter_by(id=file_id).first() + if file_obj: + file_obj.status = 2 + session.commit() + self.ap.logger.error(f'Error storing data for file ID {file_id}', exc_info=True) + session.close() diff --git a/pkg/rag/knowledge/services/chunker.py b/pkg/rag/knowledge/services/chunker.py index 17202a7a..2db7c104 100644 --- a/pkg/rag/knowledge/services/chunker.py +++ b/pkg/rag/knowledge/services/chunker.py @@ -24,33 +24,28 @@ class Chunker(BaseService): """ if not text: return [] - - # Simple whitespace-based splitting for demonstration - # For more advanced chunking, consider libraries like LangChain's text splitters - words = text.split() - chunks = [] - current_chunk = [] + # words = text.split() + # chunks = [] + # current_chunk = [] - for word in words: - current_chunk.append(word) - if len(current_chunk) > self.chunk_size: - chunks.append(" ".join(current_chunk[:self.chunk_size])) - current_chunk = current_chunk[self.chunk_size - self.chunk_overlap:] + # for word in words: + # current_chunk.append(word) + # if len(current_chunk) > self.chunk_size: + # chunks.append(" ".join(current_chunk[:self.chunk_size])) + # current_chunk = current_chunk[self.chunk_size - self.chunk_overlap:] - if current_chunk: - chunks.append(" ".join(current_chunk)) + # if current_chunk: + # chunks.append(" ".join(current_chunk)) # A more robust chunking strategy (e.g., using recursive character text splitter) - # from langchain.text_splitter import RecursiveCharacterTextSplitter - # text_splitter = RecursiveCharacterTextSplitter( - # chunk_size=self.chunk_size, - # chunk_overlap=self.chunk_overlap, - # length_function=len, - # is_separator_regex=False, - # ) - # return text_splitter.split_text(text) - - return [chunk for chunk in chunks if chunk.strip()] # Filter out empty chunks + from langchain.text_splitter import RecursiveCharacterTextSplitter + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=self.chunk_size, + chunk_overlap=self.chunk_overlap, + length_function=len, + is_separator_regex=False, + ) + return text_splitter.split_text(text) async def chunk(self, text: str) -> List[str]: """ diff --git a/pkg/rag/knowledge/services/embedder.py b/pkg/rag/knowledge/services/embedder.py index 7e20b19a..063ae79e 100644 --- a/pkg/rag/knowledge/services/embedder.py +++ b/pkg/rag/knowledge/services/embedder.py @@ -12,7 +12,7 @@ from pkg.rag.knowledge.services.chroma_manager import ChromaIndexManager # Impor logger = logging.getLogger(__name__) class Embedder(BaseService): - def __init__(self, model_type: str, model_name_key: str, chroma_manager: ChromaIndexManager): + def __init__(self, model_type: str, model_name_key: str, chroma_manager: ChromaIndexManager = None): super().__init__() self.logger = logging.getLogger(self.__class__.__name__) self.model_type = model_type From 9f43097361c33eb1ca8c60a2268d657fa0dbbe86 Mon Sep 17 00:00:00 2001 From: WangCham <651122857@qq.com> Date: Sat, 12 Jul 2025 01:21:02 +0800 Subject: [PATCH 052/257] fix: ensure File.status is set correctly after storing data to avoid null values --- pkg/rag/knowledge/mgr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/rag/knowledge/mgr.py b/pkg/rag/knowledge/mgr.py index 09023d03..a5d5f513 100644 --- a/pkg/rag/knowledge/mgr.py +++ b/pkg/rag/knowledge/mgr.py @@ -318,12 +318,12 @@ class RAGManager: self.ap.logger.error(f'Knowledge Base with UUID {knowledge_base_uuid} not found.') return - if not self.ap.storage_mgr.storage_provider.exists(file_id): + if not await self.ap.storage_mgr.storage_provider.exists(file_id): self.ap.logger.error(f'File with ID {file_id} does not exist.') return self.ap.logger.info(f'File with ID {file_id} exists, proceeding with association.') # add new file record - file_to_update = File(id=file_id, kb_id=kb.id) + file_to_update = File(id=file_id, kb_id=kb.id, file_name=file_id, path=os.path.join('data', 'storage', file_id), file_type=os.path.splitext(file_id)[1].lstrip('.'), status=0) session.add(file_to_update) session.commit() self.ap.logger.info( From 234b61e2f8c0629b7c9de0eb575d8c28269d2196 Mon Sep 17 00:00:00 2001 From: WangCham <651122857@qq.com> Date: Sat, 12 Jul 2025 01:37:44 +0800 Subject: [PATCH 053/257] fix: add functions for deleting files --- pkg/api/http/controller/groups/knowledge/base.py | 2 +- pkg/rag/knowledge/mgr.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pkg/api/http/controller/groups/knowledge/base.py b/pkg/api/http/controller/groups/knowledge/base.py index b3fd50ea..50183f0f 100644 --- a/pkg/api/http/controller/groups/knowledge/base.py +++ b/pkg/api/http/controller/groups/knowledge/base.py @@ -93,6 +93,6 @@ class KnowledgeBaseRouterGroup(group.RouterGroup): methods=['DELETE'], endpoint='delete_specific_file_in_kb', ) - async def delete_specific_file_in_kb(file_id: str) -> str: + async def delete_specific_file_in_kb(file_id: str,knowledge_base_uuid: str) -> str: await self.ap.knowledge_base_service.delete_data_by_file_id(file_id) return self.success({}) diff --git a/pkg/rag/knowledge/mgr.py b/pkg/rag/knowledge/mgr.py index a5d5f513..6ebc85a7 100644 --- a/pkg/rag/knowledge/mgr.py +++ b/pkg/rag/knowledge/mgr.py @@ -207,20 +207,23 @@ class RAGManager: self.ap.logger.info(f'Starting data deletion process for file_id: {file_id}') session = SessionLocal() try: - # 1. 从 ChromaDB 删除 embeddings + # delete vectors await asyncio.to_thread(self.chroma_manager.delete_by_file_id_sync, file_id) self.ap.logger.info(f'Deleted embeddings from ChromaDB for file_id: {file_id}') - # 2. 删除与文件关联的 chunks 记录 chunks_to_delete = session.query(Chunk).filter_by(file_id=file_id).all() for chunk in chunks_to_delete: session.delete(chunk) self.ap.logger.info(f'Deleted {len(chunks_to_delete)} chunk records for file_id: {file_id}') - # 3. 删除文件记录本身 file_to_delete = session.query(File).filter_by(id=file_id).first() if file_to_delete: session.delete(file_to_delete) + try: + await self.ap.storage_mgr.storage_provider.delete(file_id) + except Exception as e: + self.ap.logger.error(f'Error deleting file from storage for file_id {file_id}: {str(e)}', exc_info=True) + await self.ap.storage_mgr.storage_provider.delete(file_id) self.ap.logger.info(f'Deleted file record for file_id: {file_id}') else: self.ap.logger.warning( From bfdf238db516f8b6c3f56a127cacd3f1f1897eea Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Sat, 12 Jul 2025 11:44:08 +0800 Subject: [PATCH 054/257] chore: use new social image --- README.md | 2 +- README_EN.md | 2 +- README_JP.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 02bba74d..409e7b59 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@

-LangBot +LangBot

diff --git a/README_EN.md b/README_EN.md index c6b68cfa..d207bb6e 100644 --- a/README_EN.md +++ b/README_EN.md @@ -1,6 +1,6 @@

-LangBot +LangBot

diff --git a/README_JP.md b/README_JP.md index aaee328f..a1a560ad 100644 --- a/README_JP.md +++ b/README_JP.md @@ -1,6 +1,6 @@

-LangBot +LangBot

From d78a329aa9bce20c525ca442f55d12120d303e23 Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Sat, 12 Jul 2025 17:15:07 +0800 Subject: [PATCH 055/257] feat(fe): file uploading --- web/package-lock.json | 490 ++++++++++++++++-- web/package.json | 1 + .../components/kb-docs/FileUploadZone.tsx | 145 ++++++ .../knowledge/components/kb-docs/KBDoc.tsx | 45 +- .../kb-docs/doc-card/DocumentCard.tsx | 9 - .../components/kb-docs/documents/columns.tsx | 24 + .../kb-docs/documents/data-table.tsx | 81 +++ web/src/app/infra/http/HttpClient.ts | 12 +- web/src/components/ui/table.tsx | 116 +++++ web/src/i18n/locales/en-US.ts | 12 + web/src/i18n/locales/ja-JP.ts | 33 +- web/src/i18n/locales/zh-Hans.ts | 28 + 12 files changed, 937 insertions(+), 59 deletions(-) create mode 100644 web/src/app/home/knowledge/components/kb-docs/FileUploadZone.tsx delete mode 100644 web/src/app/home/knowledge/components/kb-docs/doc-card/DocumentCard.tsx create mode 100644 web/src/app/home/knowledge/components/kb-docs/documents/columns.tsx create mode 100644 web/src/app/home/knowledge/components/kb-docs/documents/data-table.tsx create mode 100644 web/src/components/ui/table.tsx diff --git a/web/package-lock.json b/web/package-lock.json index ee9b5767..fcc17852 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -12,23 +12,27 @@ "@dnd-kit/sortable": "^10.0.0", "@hookform/resolvers": "^5.0.1", "@radix-ui/react-checkbox": "^1.3.1", - "@radix-ui/react-dialog": "^1.1.13", + "@radix-ui/react-dialog": "^1.1.14", "@radix-ui/react-hover-card": "^1.1.13", "@radix-ui/react-label": "^2.1.6", "@radix-ui/react-popover": "^1.1.14", "@radix-ui/react-scroll-area": "^1.2.9", "@radix-ui/react-select": "^2.2.4", - "@radix-ui/react-slot": "^1.2.2", + "@radix-ui/react-separator": "^1.1.7", + "@radix-ui/react-slot": "^1.2.3", "@radix-ui/react-switch": "^1.2.4", "@radix-ui/react-tabs": "^1.1.11", "@radix-ui/react-toggle": "^1.1.8", "@radix-ui/react-toggle-group": "^1.1.9", + "@radix-ui/react-tooltip": "^1.2.7", "@tailwindcss/postcss": "^4.1.5", + "@tanstack/react-table": "^8.21.3", "axios": "^1.8.4", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "i18next": "^25.1.2", "i18next-browser-languagedetector": "^8.1.0", + "input-otp": "^1.4.2", "lodash": "^4.17.21", "lucide-react": "^0.507.0", "next": "15.2.4", @@ -1037,6 +1041,24 @@ } } }, + "node_modules/@radix-ui/react-collection/node_modules/@radix-ui/react-slot": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.2.tgz", + "integrity": "sha512-y7TBO4xN4Y94FvcWIOIh18fM4R1A8S4q1jhoz4PNzOoHsFcN8pogcFmZrTYAm4F9VRUrWP/Mw7xSKybIeRI+CQ==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.2" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-compose-refs": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.1.2.tgz", @@ -1068,22 +1090,22 @@ } }, "node_modules/@radix-ui/react-dialog": { - "version": "1.1.13", - "resolved": "https://registry.npmjs.org/@radix-ui/react-dialog/-/react-dialog-1.1.13.tgz", - "integrity": "sha512-ARFmqUyhIVS3+riWzwGTe7JLjqwqgnODBUZdqpWar/z1WFs9z76fuOs/2BOWCR+YboRn4/WN9aoaGVwqNRr8VA==", + "version": "1.1.14", + "resolved": "https://registry.npmjs.org/@radix-ui/react-dialog/-/react-dialog-1.1.14.tgz", + "integrity": "sha512-+CpweKjqpzTmwRwcYECQcNYbI8V9VSQt0SNFKeEBLgfucbsLssU6Ppq7wUdNXEGb573bMjFhVjKVll8rmV6zMw==", "license": "MIT", "dependencies": { "@radix-ui/primitive": "1.1.2", "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-dismissable-layer": "1.1.9", + "@radix-ui/react-dismissable-layer": "1.1.10", "@radix-ui/react-focus-guards": "1.1.2", - "@radix-ui/react-focus-scope": "1.1.6", + "@radix-ui/react-focus-scope": "1.1.7", "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-portal": "1.1.8", + "@radix-ui/react-portal": "1.1.9", "@radix-ui/react-presence": "1.1.4", - "@radix-ui/react-primitive": "2.1.2", - "@radix-ui/react-slot": "1.2.2", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-slot": "1.2.3", "@radix-ui/react-use-controllable-state": "1.2.2", "aria-hidden": "^1.2.4", "react-remove-scroll": "^2.6.3" @@ -1103,6 +1125,105 @@ } } }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-dismissable-layer": { + "version": "1.1.10", + "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.10.tgz", + "integrity": "sha512-IM1zzRV4W3HtVgftdQiiOmA0AdJlCtMLe00FXaHwgt3rAnNsIyDqshvkIW3hj/iu5hu8ERP7KIYki6NkqDxAwQ==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.2", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-use-escape-keydown": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-focus-scope": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz", + "integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-portal": { + "version": "1.1.9", + "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz", + "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-layout-effect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-primitive": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz", + "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-slot": "1.2.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-direction": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/@radix-ui/react-direction/-/react-direction-1.1.1.tgz", @@ -1448,24 +1569,6 @@ } } }, - "node_modules/@radix-ui/react-popover/node_modules/@radix-ui/react-slot": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz", - "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, "node_modules/@radix-ui/react-popper": { "version": "1.2.6", "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.6.tgz", @@ -1569,6 +1672,24 @@ } } }, + "node_modules/@radix-ui/react-primitive/node_modules/@radix-ui/react-slot": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.2.tgz", + "integrity": "sha512-y7TBO4xN4Y94FvcWIOIh18fM4R1A8S4q1jhoz4PNzOoHsFcN8pogcFmZrTYAm4F9VRUrWP/Mw7xSKybIeRI+CQ==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.2" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-roving-focus": { "version": "1.1.9", "resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.9.tgz", @@ -1654,24 +1775,6 @@ } } }, - "node_modules/@radix-ui/react-scroll-area/node_modules/@radix-ui/react-slot": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz", - "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, "node_modules/@radix-ui/react-select": { "version": "2.2.4", "resolved": "https://registry.npmjs.org/@radix-ui/react-select/-/react-select-2.2.4.tgz", @@ -1715,7 +1818,7 @@ } } }, - "node_modules/@radix-ui/react-slot": { + "node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-slot": { "version": "1.2.2", "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.2.tgz", "integrity": "sha512-y7TBO4xN4Y94FvcWIOIh18fM4R1A8S4q1jhoz4PNzOoHsFcN8pogcFmZrTYAm4F9VRUrWP/Mw7xSKybIeRI+CQ==", @@ -1733,6 +1836,70 @@ } } }, + "node_modules/@radix-ui/react-separator": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-separator/-/react-separator-1.1.7.tgz", + "integrity": "sha512-0HEb8R9E8A+jZjvmFCy/J4xhbXy3TV+9XSnGJ3KvTtjlIUy/YQ/p6UYZvi7YbeoeXdyU9+Y3scizK6hkY37baA==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-primitive": "2.1.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-separator/node_modules/@radix-ui/react-primitive": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz", + "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-slot": "1.2.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-slot": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz", + "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.2" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-switch": { "version": "1.2.4", "resolved": "https://registry.npmjs.org/@radix-ui/react-switch/-/react-switch-1.2.4.tgz", @@ -1846,6 +2013,192 @@ } } }, + "node_modules/@radix-ui/react-tooltip": { + "version": "1.2.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.2.7.tgz", + "integrity": "sha512-Ap+fNYwKTYJ9pzqW+Xe2HtMRbQ/EeWkj2qykZ6SuEV4iS/o1bZI5ssJbk4D2r8XuDuOBVz/tIx2JObtuqU+5Zw==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.2", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-dismissable-layer": "1.1.10", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-popper": "1.2.7", + "@radix-ui/react-portal": "1.1.9", + "@radix-ui/react-presence": "1.1.4", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-slot": "1.2.3", + "@radix-ui/react-use-controllable-state": "1.2.2", + "@radix-ui/react-visually-hidden": "1.2.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-arrow": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz", + "integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-primitive": "2.1.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-dismissable-layer": { + "version": "1.1.10", + "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.10.tgz", + "integrity": "sha512-IM1zzRV4W3HtVgftdQiiOmA0AdJlCtMLe00FXaHwgt3rAnNsIyDqshvkIW3hj/iu5hu8ERP7KIYki6NkqDxAwQ==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.2", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-use-escape-keydown": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-popper": { + "version": "1.2.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.7.tgz", + "integrity": "sha512-IUFAccz1JyKcf/RjB552PlWwxjeCJB8/4KxT7EhBHOJM+mN7LdW+B3kacJXILm32xawcMMjb2i0cIZpo+f9kiQ==", + "license": "MIT", + "dependencies": { + "@floating-ui/react-dom": "^2.0.0", + "@radix-ui/react-arrow": "1.1.7", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-use-layout-effect": "1.1.1", + "@radix-ui/react-use-rect": "1.1.1", + "@radix-ui/react-use-size": "1.1.1", + "@radix-ui/rect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-portal": { + "version": "1.1.9", + "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz", + "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-layout-effect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-primitive": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz", + "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-slot": "1.2.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-visually-hidden": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.2.3.tgz", + "integrity": "sha512-pzJq12tEaaIhqjbzpCuv/OypJY/BPavOofm+dbab+MHLajy277+1lLm6JFcGgF5eskJ6mquGirhXY2GD/8u8Ug==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-primitive": "2.1.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-use-callback-ref": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.1.1.tgz", @@ -2295,6 +2648,39 @@ "tailwindcss": "4.1.5" } }, + "node_modules/@tanstack/react-table": { + "version": "8.21.3", + "resolved": "https://registry.npmjs.org/@tanstack/react-table/-/react-table-8.21.3.tgz", + "integrity": "sha512-5nNMTSETP4ykGegmVkhjcS8tTLW6Vl4axfEGQN3v0zdHYbK4UfoqfPChclTrJ4EoK9QynqAu9oUf8VEmrpZ5Ww==", + "license": "MIT", + "dependencies": { + "@tanstack/table-core": "8.21.3" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/tannerlinsley" + }, + "peerDependencies": { + "react": ">=16.8", + "react-dom": ">=16.8" + } + }, + "node_modules/@tanstack/table-core": { + "version": "8.21.3", + "resolved": "https://registry.npmjs.org/@tanstack/table-core/-/table-core-8.21.3.tgz", + "integrity": "sha512-ldZXEhOBb8Is7xLs01fR3YEc3DERiz5silj8tnGkFZytt1abEvl/GhUmCE0PMLaMPTa3Jk4HbKmRlHmu+gCftg==", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/tannerlinsley" + } + }, "node_modules/@tybys/wasm-util": { "version": "0.9.0", "resolved": "https://registry.npmjs.org/@tybys/wasm-util/-/wasm-util-0.9.0.tgz", @@ -4763,6 +5149,16 @@ "node": ">=0.8.19" } }, + "node_modules/input-otp": { + "version": "1.4.2", + "resolved": "https://registry.npmjs.org/input-otp/-/input-otp-1.4.2.tgz", + "integrity": "sha512-l3jWwYNvrEa6NTCt7BECfCm48GvwuZzkoeG3gBL2w4CHeOXW3eKFmf9UNYkNfYc3mxMrthMnxjIE07MT0zLBQA==", + "license": "MIT", + "peerDependencies": { + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0.0 || ^19.0.0-rc" + } + }, "node_modules/internal-slot": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/internal-slot/-/internal-slot-1.1.0.tgz", diff --git a/web/package.json b/web/package.json index 458e4132..d5e8542c 100644 --- a/web/package.json +++ b/web/package.json @@ -35,6 +35,7 @@ "@radix-ui/react-toggle-group": "^1.1.9", "@radix-ui/react-tooltip": "^1.2.7", "@tailwindcss/postcss": "^4.1.5", + "@tanstack/react-table": "^8.21.3", "axios": "^1.8.4", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", diff --git a/web/src/app/home/knowledge/components/kb-docs/FileUploadZone.tsx b/web/src/app/home/knowledge/components/kb-docs/FileUploadZone.tsx new file mode 100644 index 00000000..8c072bdf --- /dev/null +++ b/web/src/app/home/knowledge/components/kb-docs/FileUploadZone.tsx @@ -0,0 +1,145 @@ +import React, { useCallback, useState } from 'react'; +import { Card, CardContent } from '@/components/ui/card'; +import { httpClient } from '@/app/infra/http/HttpClient'; +import { toast } from 'sonner'; +import { useTranslation } from 'react-i18next'; + +interface FileUploadZoneProps { + kbId: string; + onUploadSuccess: () => void; + onUploadError: (error: string) => void; +} + +export default function FileUploadZone({ + kbId, + onUploadSuccess, + onUploadError, +}: FileUploadZoneProps) { + const { t } = useTranslation(); + const [isDragOver, setIsDragOver] = useState(false); + const [isUploading, setIsUploading] = useState(false); + + const handleUpload = useCallback( + async (file: File) => { + if (isUploading) return; + + setIsUploading(true); + const toastId = toast.loading(t('knowledge.documentsTab.uploadingFile')); + + try { + // Step 1: Upload file to server + const uploadResult = await httpClient.uploadDocumentFile(file); + + // Step 2: Associate file with knowledge base + await httpClient.uploadKnowledgeBaseFile(kbId, uploadResult.file_id); + + toast.success(t('knowledge.documentsTab.uploadSuccess'), { + id: toastId, + }); + onUploadSuccess(); + } catch (error) { + console.error('File upload failed:', error); + const errorMessage = t('knowledge.documentsTab.uploadError'); + toast.error(errorMessage, { id: toastId }); + onUploadError(errorMessage); + } finally { + setIsUploading(false); + } + }, + [kbId, isUploading, onUploadSuccess, onUploadError], + ); + + const handleDragOver = useCallback((e: React.DragEvent) => { + e.preventDefault(); + setIsDragOver(true); + }, []); + + const handleDragLeave = useCallback((e: React.DragEvent) => { + e.preventDefault(); + setIsDragOver(false); + }, []); + + const handleDrop = useCallback( + (e: React.DragEvent) => { + e.preventDefault(); + setIsDragOver(false); + + const files = Array.from(e.dataTransfer.files); + if (files.length > 0) { + handleUpload(files[0]); + } + }, + [handleUpload], + ); + + const handleFileSelect = useCallback( + (e: React.ChangeEvent) => { + const files = e.target.files; + if (files && files.length > 0) { + handleUpload(files[0]); + } + }, + [handleUpload], + ); + + return ( + + +
+ + + +
+
+
+ ); +} diff --git a/web/src/app/home/knowledge/components/kb-docs/KBDoc.tsx b/web/src/app/home/knowledge/components/kb-docs/KBDoc.tsx index 5cc9a850..b1730602 100644 --- a/web/src/app/home/knowledge/components/kb-docs/KBDoc.tsx +++ b/web/src/app/home/knowledge/components/kb-docs/KBDoc.tsx @@ -1,5 +1,48 @@ import { useEffect, useState } from 'react'; +import { httpClient } from '@/app/infra/http/HttpClient'; +import { KnowledgeBaseFile } from '@/app/infra/entities/api'; +import { columns, DocumentFile } from './documents/columns'; +import { DataTable } from './documents/data-table'; +import FileUploadZone from './FileUploadZone'; export default function KBDoc({ kbId }: { kbId: string }) { - return
Documents
; + const [documentsList, setDocumentsList] = useState([]); + + useEffect(() => { + getDocumentsList(); + }, []); + + async function getDocumentsList() { + const resp = await httpClient.getKnowledgeBaseFiles(kbId); + setDocumentsList( + resp.files.map((file: KnowledgeBaseFile) => { + return { + id: file.file_id, + name: file.file_name, + status: file.status, + }; + }), + ); + } + + const handleUploadSuccess = () => { + // Refresh document list after successful upload + getDocumentsList(); + }; + + const handleUploadError = (error: string) => { + // Error messages are already handled by toast in FileUploadZone component + console.error('Upload failed:', error); + }; + + return ( +
+ + +
+ ); } diff --git a/web/src/app/home/knowledge/components/kb-docs/doc-card/DocumentCard.tsx b/web/src/app/home/knowledge/components/kb-docs/doc-card/DocumentCard.tsx deleted file mode 100644 index 23a884ba..00000000 --- a/web/src/app/home/knowledge/components/kb-docs/doc-card/DocumentCard.tsx +++ /dev/null @@ -1,9 +0,0 @@ -export default function DocumentCard({ - kbId, - fileId, -}: { - kbId: string; - fileId: string; -}) { - return
; -} diff --git a/web/src/app/home/knowledge/components/kb-docs/documents/columns.tsx b/web/src/app/home/knowledge/components/kb-docs/documents/columns.tsx new file mode 100644 index 00000000..d43afd68 --- /dev/null +++ b/web/src/app/home/knowledge/components/kb-docs/documents/columns.tsx @@ -0,0 +1,24 @@ +'use client'; + +import { ColumnDef } from '@tanstack/react-table'; +import { useTranslation } from 'react-i18next'; + +export type DocumentFile = { + id: string; + name: string; + status: string; +}; + +export const columns = (): ColumnDef[] => { + const { t } = useTranslation(); + return [ + { + accessorKey: 'name', + header: t('knowledge.documentsTab.name'), + }, + { + accessorKey: 'status', + header: t('knowledge.documentsTab.status'), + }, + ]; +}; diff --git a/web/src/app/home/knowledge/components/kb-docs/documents/data-table.tsx b/web/src/app/home/knowledge/components/kb-docs/documents/data-table.tsx new file mode 100644 index 00000000..178ccad9 --- /dev/null +++ b/web/src/app/home/knowledge/components/kb-docs/documents/data-table.tsx @@ -0,0 +1,81 @@ +'use client'; + +import { + ColumnDef, + flexRender, + getCoreRowModel, + useReactTable, +} from '@tanstack/react-table'; +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from '@/components/ui/table'; +import { useTranslation } from 'react-i18next'; + +interface DataTableProps { + columns: ColumnDef[]; + data: TData[]; +} + +export function DataTable({ + columns, + data, +}: DataTableProps) { + const { t } = useTranslation(); + const table = useReactTable({ + data, + columns, + getCoreRowModel: getCoreRowModel(), + }); + + return ( +
+ + + {table.getHeaderGroups().map((headerGroup) => ( + + {headerGroup.headers.map((header) => { + return ( + + {header.isPlaceholder + ? null + : flexRender( + header.column.columnDef.header, + header.getContext(), + )} + + ); + })} + + ))} + + + {table.getRowModel().rows?.length ? ( + table.getRowModel().rows.map((row) => ( + + {row.getVisibleCells().map((cell) => ( + + {flexRender(cell.column.columnDef.cell, cell.getContext())} + + ))} + + )) + ) : ( + + + {t('knowledge.documentsTab.noResults')} + + + )} + +
+
+ ); +} diff --git a/web/src/app/infra/http/HttpClient.ts b/web/src/app/infra/http/HttpClient.ts index 8842b04d..3a0b5f35 100644 --- a/web/src/app/infra/http/HttpClient.ts +++ b/web/src/app/infra/http/HttpClient.ts @@ -433,7 +433,17 @@ class HttpClient { // ============ File management API ============ public uploadDocumentFile(file: File): Promise<{ file_id: string }> { - return this.post('/api/v1/files/documents', file); + const formData = new FormData(); + formData.append('file', file); + + return this.request<{ file_id: string }>({ + method: 'post', + url: '/api/v1/files/documents', + data: formData, + headers: { + 'Content-Type': 'multipart/form-data', + }, + }); } // ============ Knowledge Base API ============ diff --git a/web/src/components/ui/table.tsx b/web/src/components/ui/table.tsx new file mode 100644 index 00000000..ebded8ed --- /dev/null +++ b/web/src/components/ui/table.tsx @@ -0,0 +1,116 @@ +'use client'; + +import * as React from 'react'; + +import { cn } from '@/lib/utils'; + +function Table({ className, ...props }: React.ComponentProps<'table'>) { + return ( +
+ + + ); +} + +function TableHeader({ className, ...props }: React.ComponentProps<'thead'>) { + return ( + + ); +} + +function TableBody({ className, ...props }: React.ComponentProps<'tbody'>) { + return ( + + ); +} + +function TableFooter({ className, ...props }: React.ComponentProps<'tfoot'>) { + return ( + tr]:last:border-b-0', + className, + )} + {...props} + /> + ); +} + +function TableRow({ className, ...props }: React.ComponentProps<'tr'>) { + return ( + + ); +} + +function TableHead({ className, ...props }: React.ComponentProps<'th'>) { + return ( +
[role=checkbox]]:translate-y-[2px]', + className, + )} + {...props} + /> + ); +} + +function TableCell({ className, ...props }: React.ComponentProps<'td'>) { + return ( + [role=checkbox]]:translate-y-[2px]', + className, + )} + {...props} + /> + ); +} + +function TableCaption({ + className, + ...props +}: React.ComponentProps<'caption'>) { + return ( +
+ ); +} + +export { + Table, + TableHeader, + TableBody, + TableFooter, + TableHead, + TableRow, + TableCell, + TableCaption, +}; diff --git a/web/src/i18n/locales/en-US.ts b/web/src/i18n/locales/en-US.ts index ecc43204..cfb50966 100644 --- a/web/src/i18n/locales/en-US.ts +++ b/web/src/i18n/locales/en-US.ts @@ -251,6 +251,18 @@ const enUS = { embeddingModelDescription: 'Used to vectorize the text, you can configure it in the Models page', updateTime: 'Updated ', + documentsTab: { + name: 'Name', + status: 'Status', + noResults: 'No results', + dragAndDrop: 'Drag and drop files here or click to upload', + uploading: 'Uploading...', + supportedFormats: + 'Supports PDF, Word, TXT, Markdown and other document formats', + uploadSuccess: 'File uploaded successfully!', + uploadError: 'File upload failed, please try again', + uploadingFile: 'Uploading file...', + }, }, register: { title: 'Initialize LangBot 👋', diff --git a/web/src/i18n/locales/ja-JP.ts b/web/src/i18n/locales/ja-JP.ts index 21b0ff7d..639549b1 100644 --- a/web/src/i18n/locales/ja-JP.ts +++ b/web/src/i18n/locales/ja-JP.ts @@ -234,7 +234,38 @@ const jaJP = { }, knowledge: { title: '知識ベース', - description: 'LLMの応答品質を向上させるための知識ベースを設定します', + createKnowledgeBase: '知識ベースを作成', + editKnowledgeBase: '知識ベースを編集', + editDocument: 'ドキュメント', + description: 'LLMの回答品質向上のための知識ベースを設定します', + metadata: 'メタデータ', + documents: 'ドキュメント', + kbNameRequired: '知識ベース名は必須です', + kbDescriptionRequired: '知識ベースの説明は必須です', + embeddingModelUUIDRequired: '埋め込みモデルは必須です', + daysAgo: '日前', + today: '今日', + kbName: '知識ベース名', + kbDescription: '知識ベースの説明', + defaultDescription: '知識ベース', + embeddingModelUUID: '埋め込みモデル', + selectEmbeddingModel: '埋め込みモデルを選択', + embeddingModelDescription: + 'テキストのベクトル化に使用する埋め込みモデルを管理します', + updateTime: '更新日時', + documentsTab: { + name: '名前', + status: 'ステータス', + noResults: '結果がありません', + dragAndDrop: + 'ファイルをここにドラッグ&ドロップするか、クリックしてアップロードしてください', + uploading: 'アップロード中...', + supportedFormats: + 'PDF、Word、TXT、Markdownなどのドキュメントファイルをサポートしています', + uploadSuccess: 'ファイルのアップロードに成功しました!', + uploadError: 'ファイルのアップロードに失敗しました。再度お試しください', + uploadingFile: 'ファイルをアップロード中...', + }, }, register: { title: 'LangBot を初期化 👋', diff --git a/web/src/i18n/locales/zh-Hans.ts b/web/src/i18n/locales/zh-Hans.ts index 1bd04ca8..71089fa2 100644 --- a/web/src/i18n/locales/zh-Hans.ts +++ b/web/src/i18n/locales/zh-Hans.ts @@ -227,7 +227,35 @@ const zhHans = { }, knowledge: { title: '知识库', + createKnowledgeBase: '创建知识库', + editKnowledgeBase: '编辑知识库', + editDocument: '文档', description: '配置可用于提升模型回复质量的知识库', + metadata: '元数据', + documents: '文档', + kbNameRequired: '知识库名称不能为空', + kbDescriptionRequired: '知识库描述不能为空', + embeddingModelUUIDRequired: '嵌入模型不能为空', + daysAgo: '天前', + today: '今天', + kbName: '知识库名称', + kbDescription: '知识库描述', + defaultDescription: '一个知识库', + embeddingModelUUID: '嵌入模型', + selectEmbeddingModel: '选择嵌入模型', + embeddingModelDescription: '用于向量化文本,可在模型配置页面配置', + updateTime: '更新于', + documentsTab: { + name: '名称', + status: '状态', + noResults: '暂无结果', + dragAndDrop: '拖拽文件到此处或点击上传', + uploading: '上传中...', + supportedFormats: '支持 PDF、Word、TXT、Markdown 等文档格式', + uploadSuccess: '文件上传成功!', + uploadError: '文件上传失败,请重试', + uploadingFile: '上传文件中...', + }, }, register: { title: '初始化 LangBot 👋', From 1e85d02ae4a328b3c8f96baf198070cf99945cce Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Sat, 12 Jul 2025 17:29:39 +0800 Subject: [PATCH 056/257] perf: adjust ui --- .../components/kb-docs/FileUploadZone.tsx | 14 +++++++------- .../home/knowledge/components/kb-docs/KBDoc.tsx | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/web/src/app/home/knowledge/components/kb-docs/FileUploadZone.tsx b/web/src/app/home/knowledge/components/kb-docs/FileUploadZone.tsx index 8c072bdf..aa8adede 100644 --- a/web/src/app/home/knowledge/components/kb-docs/FileUploadZone.tsx +++ b/web/src/app/home/knowledge/components/kb-docs/FileUploadZone.tsx @@ -84,10 +84,10 @@ export default function FileUploadZone({ return ( - +