fix:修复了因为迭代数据只推入resq_messages和resq_message_chain导致缓存到内存中的数据和写入log中的数据量庞大,以及有思考的think处理

feat:增加带有深度思考模型的think的去think操作
feat:dify中聊天机器人,chatflow对流式的支持
This commit is contained in:
Dong_master
2025-07-13 22:41:39 +08:00
committed by Junyan Qin
parent 5ce32d2f04
commit f9a5507029
4 changed files with 113 additions and 41 deletions

View File

@@ -359,7 +359,7 @@ class LarkAdapter(adapter.MessagePlatformAdapter):
self.listeners = {}
self.message_id_to_card_id = {}
self.card_id_dict = {}
self.seq = 0
self.seq = 1
@self.quart_app.route('/lark/callback', methods=['POST'])
async def lark_callback():
@@ -456,7 +456,7 @@ class LarkAdapter(adapter.MessagePlatformAdapter):
card_data = {"schema": "2.0", "header": {"title": {"content": "bot", "tag": "plain_text"}},
"body": {"elements": [{"tag": "markdown", "content": "[思考中.....]","element_id":"markdown_1"}]},
"config": {"streaming_mode": True,
"streaming_config": {"print_strategy": "fast"}}}
"streaming_config": {"print_strategy": "delay"}}} # delay / fast
request: CreateCardRequest = CreateCardRequest.builder() \
.request_body(
@@ -620,7 +620,7 @@ class LarkAdapter(adapter.MessagePlatformAdapter):
'type': 'card_json',
'data': {'card_id': self.card_id_dict[message_id], 'elements': {'content': text_message}},
}
print(self.seq)
request: ContentCardElementRequest = ContentCardElementRequest.builder() \
.card_id(self.card_id_dict[message_id]) \
.element_id("markdown_1") \
@@ -632,7 +632,7 @@ class LarkAdapter(adapter.MessagePlatformAdapter):
.build()
if is_final:
self.seq = 0
self.seq = 1
# 发起请求
response: ContentCardElementResponse = self.api_client.cardkit.v1.card_element.content(request)

View File

@@ -8,7 +8,7 @@ import openai.types.chat.chat_completion as chat_completion
import httpx
from .. import errors, requester
from ....core import entities as core_entities
from ....core import entities as core_entities, app
from ... import entities as llm_entities
from ...tools import entities as tools_entities
@@ -25,7 +25,6 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
}
async def initialize(self):
self.client = openai.AsyncClient(
api_key='',
@@ -53,6 +52,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
async def _make_msg(
self,
pipeline_config: dict[str, typing.Any],
chat_completion: chat_completion.ChatCompletion,
) -> llm_entities.Message:
chatcmpl_message = chat_completion.choices[0].message.model_dump()
@@ -64,8 +64,12 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
# deepseek的reasoner模型
if reasoning_content is not None:
chatcmpl_message['content'] = '<think>\n' + reasoning_content + '\n</think>\n' + chatcmpl_message['content']
print(pipeline_config['trigger'].get('misc', '').get('remove_think'))
if pipeline_config['trigger'].get('misc', '').get('remove_think'):
pass
else:
if reasoning_content is not None :
chatcmpl_message['content'] = '<think>\n' + reasoning_content + '\n</think>\n' + chatcmpl_message['content']
message = llm_entities.Message(**chatcmpl_message)
@@ -73,7 +77,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
async def _make_msg_chunk(
self,
index:int,
pipeline_config: dict[str, typing.Any],
chat_completion: chat_completion.ChatCompletion,
) -> llm_entities.MessageChunk:
@@ -96,16 +100,22 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None
# deepseek的reasoner模型
if reasoning_content is not None and index == 0:
delta['content'] += f'<think>\n{reasoning_content}'
elif reasoning_content is None:
if self.is_content:
delta['content'] = delta['content']
if pipeline_config['trigger'].get('misc', '').get('remove_think'):
if reasoning_content is not None :
pass
else:
delta['content'] = f'\n<think>\n\n{delta["content"]}'
self.is_content = True
delta['content'] = delta['content']
else:
delta['content'] += reasoning_content
if reasoning_content is not None:
delta['content'] += f'<think>\n{reasoning_content}'
elif reasoning_content is None:
if self.is_content:
delta['content'] = delta['content']
else:
delta['content'] = f'\n<think>\n\n{delta["content"]}'
self.is_content = True
else:
delta['content'] += reasoning_content
message = llm_entities.MessageChunk(**delta)
@@ -151,20 +161,41 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
args["stream"] = True
chunk_idx = 0
self.is_content = False
tool_calls_map: dict[str, llm_entities.ToolCall] = {}
pipeline_config = query.pipeline_config
async for chunk in self._req_stream(args, extra_body=extra_args):
# 处理流式消息
delta_message = await self._make_msg_chunk(chunk_idx,chunk)
# print(delta_message)
delta_message = await self._make_msg_chunk(pipeline_config,chunk)
if delta_message.content:
current_content += delta_message.content
delta_message.content = current_content
print(current_content)
# delta_message.all_content = current_content
if delta_message.tool_calls:
for tool_call in delta_message.tool_calls:
if tool_call.id not in tool_calls_map:
tool_calls_map[tool_call.id] = llm_entities.ToolCall(
id=tool_call.id,
type=tool_call.type,
function=llm_entities.FunctionCall(
name=tool_call.function.name if tool_call.function else '',
arguments=''
),
)
if tool_call.function and tool_call.function.arguments:
# 流式处理中工具调用参数可能分多个chunk返回需要追加而不是覆盖
tool_calls_map[tool_call.id].function.arguments += tool_call.function.arguments
chunk_idx += 1
chunk_choices = getattr(chunk, 'choices', None)
if chunk_choices and getattr(chunk_choices[0], 'finish_reason', None):
delta_message.is_final = True
delta_message.content = current_content
yield delta_message
if chunk_idx % 64 == 0 or delta_message.is_final:
yield delta_message
# return
@@ -208,7 +239,8 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
resp = await self._req(args, extra_body=extra_args)
# 处理请求结果
message = await self._make_msg(resp)
pipeline_config = query.pipeline_config
message = await self._make_msg(pipeline_config,resp)
return message

View File

@@ -95,6 +95,11 @@ class DifyServiceAPIRunner(runner.RequestRunner):
cov_id = query.session.using_conversation.uuid or ''
query.variables['conversation_id'] = cov_id
try:
is_stream = query.adapter.is_stream
except AttributeError:
is_stream = False
plain_text, image_ids = await self._preprocess_user_message(query)
files = [
@@ -144,40 +149,54 @@ class DifyServiceAPIRunner(runner.RequestRunner):
if mode == 'workflow':
if chunk['event'] == 'node_finished':
if chunk['data']['node_type'] == 'answer':
yield llm_entities.Message(
role='assistant',
content=self._try_convert_thinking(chunk['data']['outputs']['answer']),
)
if not is_stream:
if chunk['data']['node_type'] == 'answer':
yield llm_entities.Message(
role='assistant',
content=self._try_convert_thinking(chunk['data']['outputs']['answer']),
)
else:
if chunk['data']['node_type'] == 'answer':
yield llm_entities.MessageChunk(
role='assistant',
content=self._try_convert_thinking(chunk['data']['outputs']['answer']),
is_final=True,
)
elif chunk['event'] == 'message':
stream_output_pending_chunk += chunk['answer']
if self.pipeline_config['ai']['dify-service-api'].get('enable-streaming', False):
if is_stream:
# 消息数超过量就输出从而达到streaming的效果
batch_pending_index += 1
if batch_pending_index >= batch_pending_max_size:
yield llm_entities.Message(
yield llm_entities.MessageChunk(
role='assistant',
content=self._try_convert_thinking(stream_output_pending_chunk),
)
batch_pending_index = 0
elif mode == 'basic':
if chunk['event'] == 'message':
stream_output_pending_chunk += chunk['answer']
if self.pipeline_config['ai']['dify-service-api'].get('enable-streaming', False):
# 消息数超过量就输出从而达到streaming的效果
batch_pending_index += 1
if batch_pending_index >= batch_pending_max_size:
if chunk['event'] == 'message' or chunk['event'] == 'message_end':
if chunk['event'] == 'message_end':
is_final = True
if is_stream and batch_pending_index % batch_pending_max_size == 0:
# 消息数超过量就输出从而达到streaming的效果
batch_pending_index += 1
# if batch_pending_index >= batch_pending_max_size:
yield llm_entities.MessageChunk(
role='assistant',
content=self._try_convert_thinking(stream_output_pending_chunk),
is_final=is_final,
)
# batch_pending_index = 0
elif not is_stream:
yield llm_entities.Message(
role='assistant',
content=self._try_convert_thinking(stream_output_pending_chunk),
)
batch_pending_index = 0
elif chunk['event'] == 'message_end':
yield llm_entities.Message(
role='assistant',
content=self._try_convert_thinking(stream_output_pending_chunk),
)
stream_output_pending_chunk = ''
stream_output_pending_chunk = ''
else:
stream_output_pending_chunk += chunk['answer']
is_final = False
if chunk is None:
raise errors.DifyAPIError('Dify API 没有返回任何响应请检查网络连接和API配置')
@@ -191,6 +210,13 @@ class DifyServiceAPIRunner(runner.RequestRunner):
cov_id = query.session.using_conversation.uuid or ''
query.variables['conversation_id'] = cov_id
try:
is_stream = query.adapter.is_stream
except AttributeError:
is_stream = False
batch_pending_index = 0
plain_text, image_ids = await self._preprocess_user_message(query)
files = [
@@ -285,6 +311,13 @@ class DifyServiceAPIRunner(runner.RequestRunner):
query.variables['conversation_id'] = query.session.using_conversation.uuid
try:
is_stream = query.adapter.is_stream
except AttributeError:
is_stream = False
batch_pending_index = 0
plain_text, image_ids = await self._preprocess_user_message(query)
files = [

View File

@@ -132,3 +132,10 @@ stages:
type: boolean
required: true
default: true
- name: remove_think
label:
en_US: remove think
zh_Hans: 删除深度思考消息
type: boolean
required: true
default: true