fix:Fixed the issue where the rich text processing in the DingTalk AP… (#1759)

* fix:Fixed the issue where the rich text processing in the DingTalk API did not account for multiple texts and images, as well as the presence of default line breaks. Also resolved the error in Dify caused by sending only images, which resulted in an empty query. * fix:Considering the various possible scenarios, there are cases where plan_text is empty when there is file content, and there is no file (the message could not be parsed) and the content is empty. * fix:Add the default modifiable prompt input for didify in the ai.yaml file to ensure that the error of query being empty occurs when receiving data. * add: The config migration of Dify * fix:Migration issue * perf: minor fix * chore: minor fix --------- Co-authored-by: Junyan Qin <rockchinq@gmail.com>
2025-11-25 03:15:06 +08:00 · 2025-11-10 21:42:09 +08:00
parent dd1c7ffc39
commit 32215e9a3f
8 changed files with 187 additions and 16 deletions
--- a/libs/dify_service_api/v1/client.py
+++ b/libs/dify_service_api/v1/client.py
@@ -5,6 +5,8 @@ import typing
 import json

 from .errors import DifyAPIError
+from pathlib import Path
+import os


 class AsyncDifyServiceClient:
@@ -109,7 +111,23 @@ class AsyncDifyServiceClient:
        user: str,
        timeout: float = 30.0,
    ) -> str:
-        """上传文件"""
+        # 处理 Path 对象
+        if isinstance(file, Path):
+            if not file.exists():
+                raise ValueError(f'File not found: {file}')
+            with open(file, 'rb') as f:
+                file = f.read()
+
+        # 处理文件路径字符串
+        elif isinstance(file, str):
+            if not os.path.isfile(file):
+                raise ValueError(f'File not found: {file}')
+            with open(file, 'rb') as f:
+                file = f.read()
+
+        # 处理文件对象
+        elif hasattr(file, 'read'):
+            file = file.read()
        async with httpx.AsyncClient(
            base_url=self.base_url,
            trust_env=True,
@@ -121,6 +139,8 @@ class AsyncDifyServiceClient:
                headers={'Authorization': f'Bearer {self.api_key}'},
                files={
                    'file': file,
+                },
+                data={
                    'user': (None, user),
                },
            )
--- a/libs/dingtalk_api/api.py
+++ b/libs/dingtalk_api/api.py
@@ -188,12 +188,80 @@ class DingTalkClient:

            if incoming_message.message_type == 'richText':
                data = incoming_message.rich_text_content.to_dict()
+
+                # 使用统一的结构化数据格式，保持顺序
+                rich_content = {
+                    'Type': 'richText',
+                    'Elements': [],  # 按顺序存储所有元素
+                    'SimpleContent': '',  # 兼容字段：纯文本内容
+                    'SimplePicture': ''  # 兼容字段：第一张图片
+                }
+
+                # 先收集所有文本和图片占位符
+                text_elements = []
+                image_placeholders = []
+
+                # 解析富文本内容，保持原始顺序
                for item in data['richText']:
-                    if 'text' in item:
-                        message_data['Content'] = item['text']
-                    if incoming_message.get_image_list()[0]:
-                        message_data['Picture'] = await self.download_image(incoming_message.get_image_list()[0])
-                message_data['Type'] = 'text'
+
+                    # 处理文本内容
+                    if 'text' in item and item['text'] != "\n":
+                        element = {
+                            'Type': 'text',
+                            'Content': item['text']
+                        }
+                        rich_content['Elements'].append(element)
+                        text_elements.append(item['text'])
+
+                    # 检查是否是图片元素 - 根据钉钉API的实际结构调整
+                    # 钉钉富文本中的图片通常有特定标识，可能需要根据实际返回调整
+                    elif item.get("type") == "picture":
+                        # 创建图片占位符
+                        element = {
+                            'Type': 'image_placeholder',
+                        }
+                        rich_content['Elements'].append(element)
+
+                # 获取并下载所有图片
+                image_list = incoming_message.get_image_list()
+                if image_list:
+                    new_elements = []
+                    image_index = 0
+
+                    for element in rich_content['Elements']:
+                        if element['Type'] == 'image_placeholder':
+                            if image_index < len(image_list) and image_list[image_index]:
+                                image_url = await self.download_image(image_list[image_index])
+                                new_elements.append({
+                                    'Type': 'image',
+                                    'Picture': image_url
+                                })
+                                image_index += 1
+                            else:
+                                # 如果没有对应的图片，保留占位符或跳过
+                                continue
+                        else:
+                            new_elements.append(element)
+
+                    rich_content['Elements'] = new_elements
+
+
+                # 设置兼容字段
+                all_texts = [elem['Content'] for elem in rich_content['Elements'] if elem.get('Type') == 'text']
+                rich_content['SimpleContent'] = '\n'.join(all_texts) if all_texts else ''
+
+                all_images = [elem['Picture'] for elem in rich_content['Elements'] if elem.get('Type') == 'image']
+                if all_images:
+                    rich_content['SimplePicture'] = all_images[0]
+                    rich_content['AllImages'] = all_images  # 所有图片的列表
+
+                # 设置原始的 content 和 picture 字段以保持兼容
+                message_data['Content'] = rich_content['SimpleContent']
+                message_data['Rich_Content'] = rich_content
+                if all_images:
+                    message_data['Picture'] = all_images[0]
+
+

            elif incoming_message.message_type == 'text':
                message_data['Content'] = incoming_message.get_text_list()[0]
--- a/libs/dingtalk_api/dingtalkevent.py
+++ b/libs/dingtalk_api/dingtalkevent.py
@@ -15,6 +15,10 @@ class DingTalkEvent(dict):
    def content(self):
        return self.get('Content', '')

+    @property
+    def rich_content(self):
+        return self.get('Rich_Content', '')
+
    @property
    def incoming_message(self) -> Optional['dingtalk_stream.chatbot.ChatbotMessage']:
        return self.get('IncomingMessage')
--- a/pkg/persistence/migrations/dbm011_dify_base_prompt_config.py
+++ b/pkg/persistence/migrations/dbm011_dify_base_prompt_config.py
@@ -0,0 +1,40 @@
+from .. import migration
+
+import sqlalchemy
+
+from ...entity.persistence import pipeline as persistence_pipeline
+
+
+@migration.migration_class(11)
+class DBMigrateDifyApiConfig(migration.DBMigration):
+    """Langflow API config"""
+
+    async def upgrade(self):
+        """Upgrade"""
+        # read all pipelines
+        pipelines = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_pipeline.LegacyPipeline))
+
+        for pipeline in pipelines:
+            serialized_pipeline = self.ap.persistence_mgr.serialize_model(persistence_pipeline.LegacyPipeline, pipeline)
+
+            config = serialized_pipeline['config']
+
+            if 'base-prompt' not in config['ai']['dify-service-api']:
+                config['ai']['dify-service-api']['base-prompt'] = (
+                    'When the file content is readable, please read the content of this file. When the file is an image, describe the content of this image.',
+                )
+
+            await self.ap.persistence_mgr.execute_async(
+                sqlalchemy.update(persistence_pipeline.LegacyPipeline)
+                .where(persistence_pipeline.LegacyPipeline.uuid == serialized_pipeline['uuid'])
+                .values(
+                    {
+                        'config': config,
+                        'for_version': self.ap.ver_mgr.get_current_version(),
+                    }
+                )
+            )
+
+    async def downgrade(self):
+        """Downgrade"""
+        pass
--- a/pkg/platform/sources/dingtalk.py
+++ b/pkg/platform/sources/dingtalk.py
@@ -1,3 +1,4 @@
+
 import traceback
 import typing
 from libs.dingtalk_api.dingtalkevent import DingTalkEvent
@@ -36,16 +37,31 @@ class DingTalkMessageConverter(abstract_platform_adapter.AbstractMessageConverte
            if atUser.dingtalk_id == event.incoming_message.chatbot_user_id:
                yiri_msg_list.append(platform_message.At(target=bot_name))

-        if event.content:
-            text_content = event.content.replace('@' + bot_name, '')
-            yiri_msg_list.append(platform_message.Plain(text=text_content))
-        if event.picture:
-            yiri_msg_list.append(platform_message.Image(base64=event.picture))
+        if event.rich_content:
+            elements = event.rich_content.get("Elements")
+            for element in elements:
+                if element.get('Type') == 'text':
+                    text = element.get('Content', '').replace('@' + bot_name, '')
+                    if text.strip():
+                        yiri_msg_list.append(platform_message.Plain(text=text))
+                elif element.get('Type') == 'image' and element.get('Picture'):
+                    yiri_msg_list.append(platform_message.Image(base64=element['Picture']))
+        else:
+            # 回退到原有简单逻辑
+            if event.content:
+                text_content = event.content.replace('@' + bot_name, '')
+                yiri_msg_list.append(platform_message.Plain(text=text_content))
+            if event.picture:
+                yiri_msg_list.append(platform_message.Image(base64=event.picture))
+
+            # 处理其他类型消息（文件、音频等）
        if event.file:
            yiri_msg_list.append(platform_message.File(url=event.file, name=event.name))
        if event.audio:
            yiri_msg_list.append(platform_message.Voice(base64=event.audio))

+
+
        chain = platform_message.MessageChain(yiri_msg_list)

        return chain
--- a/pkg/provider/runners/difysvapi.py
+++ b/pkg/provider/runners/difysvapi.py
@@ -14,6 +14,7 @@ import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 from libs.dify_service_api.v1 import client, errors


+
@runner.runner_class('dify-service-api')
 class DifyServiceAPIRunner(runner.RequestRunner):
    """Dify Service API 对话请求器"""
@@ -77,7 +78,7 @@ class DifyServiceAPIRunner(runner.RequestRunner):
            tuple[str, list[str]]: 纯文本和图片的 Dify 服务图片 ID
        """
        plain_text = ''
-        image_ids = []
+        file_ids = []

        if isinstance(query.user_message.content, list):
            for ce in query.user_message.content:
@@ -92,11 +93,24 @@ class DifyServiceAPIRunner(runner.RequestRunner):
                        f'{query.session.launcher_type.value}_{query.session.launcher_id}',
                    )
                    image_id = file_upload_resp['id']
-                    image_ids.append(image_id)
+                    file_ids.append(image_id)
+                # elif ce.type == "file_url":
+                #     file_bytes = base64.b64decode(ce.file_url)
+                #     file_upload_resp = await self.dify_client.upload_file(
+                #         file_bytes,
+                #         f'{query.session.launcher_type.value}_{query.session.launcher_id}',
+                #     )
+                #     file_id = file_upload_resp['id']
+                #     file_ids.append(file_id)
        elif isinstance(query.user_message.content, str):
            plain_text = query.user_message.content
+        # plain_text = "When the file content is readable, please read the content of this file. When the file is an image, describe the content of this image." if file_ids and not plain_text else plain_text
+        # plain_text = "The user message type cannot be parsed." if not file_ids and not plain_text else plain_text
+        # plain_text = plain_text if plain_text else "When the file content is readable, please read the content of this file. When the file is an image, describe the content of this image."
+        # print(self.pipeline_config['ai'])
+        plain_text = plain_text if plain_text else self.pipeline_config['ai']['dify-service-api']['base-prompt']

-        return plain_text, image_ids
+        return plain_text, file_ids

    async def _chat_messages(
        self, query: pipeline_query.Query
@@ -110,7 +124,6 @@ class DifyServiceAPIRunner(runner.RequestRunner):
        files = [
            {
                'type': 'image',
-                'transfer_method': 'local_file',
                'upload_file_id': image_id,
            }
            for image_id in image_ids
--- a/pkg/utils/constants.py
+++ b/pkg/utils/constants.py
@@ -1,6 +1,6 @@
 semantic_version = 'v4.4.1'

-required_database_version = 10
+required_database_version = 11
 """Tag the version of the database schema, used to check if the database needs to be migrated"""

 debug_mode = False
--- a/templates/metadata/pipeline/ai.yaml
+++ b/templates/metadata/pipeline/ai.yaml
@@ -124,6 +124,16 @@ stages:
          zh_Hans: 基础 URL
        type: string
        required: true
+      - name: base-prompt
+        label:
+          en_US: Base PROMPT
+          zh_Hans: 基础提示词
+        description:
+          en_US: When Dify receives a message with empty input (only images), it will pass this default prompt into it.
+          zh_Hans: 当 Dify 接收到输入文字为空（仅图片）的消息时，传入该默认提示词
+        type: string
+        required: true
+        default: "When the file content is readable, please read the content of this file. When the file is an image, describe the content of this image."
      - name: app-type
        label:
          en_US: App Type