feat(docreader): add health check

fix: rewrite query is empty
feat: Added knowledge base column value update method and optimized logging
2025-11-25 19:37:45 +08:00 · 2025-09-10 20:22:14 +08:00 · 2025-09-10 19:35:26 +08:00 · 2025-09-10 19:04:37 +08:00 · 2025-09-10 17:25:07 +08:00 · 2025-09-10 17:25:07 +08:00
30 changed files with 1490 additions and 834 deletions
--- a/.env.example
+++ b/.env.example
@@ -27,6 +27,15 @@ STREAM_MANAGER_TYPE=redis
 # 数据库端口，默认为5432
 DB_PORT=5432

+# 应用服务端口，默认为8080
+APP_PORT=8080
+
+# 前端服务端口，默认为80
+FRONTEND_PORT=80
+
+# 文档解析模块端口，默认为50051
+DOCREADER_PORT=50051
+
 # 数据库用户名
 DB_USER=postgres

--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@@ -42,6 +42,9 @@ jobs:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_PASSWORD }}

+      - name: Read VERSION file
+        run: echo "VERSION=$(cat VERSION)" >> $GITHUB_ENV
+      
      - name: Build ${{ matrix.service_name }} Image
        uses: docker/build-push-action@v3
        with:
@@ -49,4 +52,8 @@ jobs:
          platforms: ${{ matrix.platform }}
          file: ${{ matrix.file }}
          context: ${{ matrix.context }}
-          tags: ${{ secrets.DOCKERHUB_USERNAME }}/weknora-${{ matrix.service_name }}:latest
+          tags: |
+            ${{ secrets.DOCKERHUB_USERNAME }}/weknora-${{ matrix.service_name }}:latest
+            ${{ secrets.DOCKERHUB_USERNAME }}/weknora-${{ matrix.service_name }}:${{ env.VERSION }}
+          cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/weknora-${{ matrix.service_name }}:cache
+          cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/weknora-${{ matrix.service_name }}:cache,mode=max
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -0,0 +1,17 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+## [0.1.0] - 2025-09-08
+
+- Initial public release of WeKnora.
+- Web UI for knowledge upload, chat, configuration, and settings.
+- RAG pipeline with chunking, embedding, retrieval, reranking, and generation.
+- Initialization wizard for configuring models (LLM, embedding, rerank, retriever).
+- Support for local Ollama and remote API models.
+- Vector backends: PostgreSQL (pgvector), Elasticsearch; GraphRAG support.
+- End-to-end evaluation utilities and metrics.
+- Docker Compose for quick startup and service orchestration.
+- MCP server support for integrating with MCP-compatible clients.
+
+[0.1.0]: https://github.com/Tencent/WeKnora/tree/v0.1.0
--- a/68
+++ b/68
@@ -1,4 +1,4 @@
-.PHONY: help build run test clean docker-build docker-run migrate-up migrate-down docker-restart docker-stop start-all stop-all start-ollama stop-ollama build-images build-images-app build-images-docreader build-images-frontend clean-images
+.PHONY: help build run test clean docker-build-app docker-build-docreader docker-build-frontend docker-build-all docker-run migrate-up migrate-down docker-restart docker-stop start-all stop-all start-ollama stop-ollama build-images build-images-app build-images-docreader build-images-frontend clean-images check-env list-containers pull-images show-platform

 # Show help
 help:
@@ -11,10 +11,13 @@ help:
 	@echo "  clean             清理构建文件"
 	@echo ""
 	@echo "Docker 命令:"
-	@echo "  docker-build      构建 Docker 镜像"
-	@echo "  docker-run        运行 Docker 容器"
-	@echo "  docker-stop       停止 Docker 容器"
-	@echo "  docker-restart    重启 Docker 容器"
+	@echo "  docker-build-app       构建应用 Docker 镜像 (wechatopenai/weknora-app)"
+	@echo "  docker-build-docreader 构建文档读取器镜像 (wechatopenai/weknora-docreader)"
+	@echo "  docker-build-frontend  构建前端镜像 (wechatopenai/weknora-ui)"
+	@echo "  docker-build-all       构建所有 Docker 镜像"
+	@echo "  docker-run            运行 Docker 容器"
+	@echo "  docker-stop           停止 Docker 容器"
+	@echo "  docker-restart        重启 Docker 容器"
 	@echo ""
 	@echo "服务管理:"
 	@echo "  start-all         启动所有服务"
@@ -37,15 +40,32 @@ help:
 	@echo "  lint              代码检查"
 	@echo "  deps              安装依赖"
 	@echo "  docs              生成 API 文档"
+	@echo ""
+	@echo "环境检查:"
+	@echo "  check-env         检查环境配置"
+	@echo "  list-containers   列出运行中的容器"
+	@echo "  pull-images       拉取最新镜像"
+	@echo "  show-platform     显示当前构建平台"

 # Go related variables
 BINARY_NAME=WeKnora
 MAIN_PATH=./cmd/server

 # Docker related variables
-DOCKER_IMAGE=WeKnora
+DOCKER_IMAGE=wechatopenai/weknora-app
 DOCKER_TAG=latest

+# Platform detection
+ifeq ($(shell uname -m),x86_64)
+    PLATFORM=linux/amd64
+else ifeq ($(shell uname -m),aarch64)
+    PLATFORM=linux/arm64
+else ifeq ($(shell uname -m),arm64)
+    PLATFORM=linux/arm64
+else
+    PLATFORM=linux/amd64
+endif
+
 # Build the application
 build:
 	go build -o $(BINARY_NAME) $(MAIN_PATH)
@@ -64,8 +84,19 @@ clean:
 	rm -f $(BINARY_NAME)

 # Build Docker image
-docker-build:
-	docker build -t $(DOCKER_IMAGE):$(DOCKER_TAG) .
+docker-build-app:
+	docker build --platform $(PLATFORM) -f docker/Dockerfile.app -t $(DOCKER_IMAGE):$(DOCKER_TAG) .
+
+# Build docreader Docker image
+docker-build-docreader:
+	docker build --platform $(PLATFORM) -f docker/Dockerfile.docreader -t wechatopenai/weknora-docreader:latest .
+
+# Build frontend Docker image
+docker-build-frontend:
+	docker build --platform $(PLATFORM) -f frontend/Dockerfile -t wechatopenai/weknora-ui:latest frontend/
+
+# Build all Docker images
+docker-build-all: docker-build-app docker-build-docreader docker-build-frontend

 # Run Docker container (传统方式)
 docker-run:
@@ -107,10 +138,10 @@ build-images-frontend:
 clean-images:
 	./scripts/build_images.sh --clean

-# Restart Docker container (stop, rebuild, start)
+# Restart Docker container (stop, start)
 docker-restart:
 	docker-compose stop -t 60
-	docker-compose up --build
+	docker-compose up

 # Database migrations
 migrate-up:
@@ -151,4 +182,21 @@ clean-db:
 		docker volume rm weknora_redis_data; \
 	fi

+# Environment check
+check-env:
+	./scripts/start_all.sh --check
+
+# List containers
+list-containers:
+	./scripts/start_all.sh --list
+
+# Pull latest images
+pull-images:
+	./scripts/start_all.sh --pull
+
+# Show current platform
+show-platform:
+	@echo "当前系统架构: $(shell uname -m)"
+	@echo "Docker构建平台: $(PLATFORM)"
+

--- a/README.md
+++ b/README.md
@@ -14,148 +14,154 @@
    <a href="https://github.com/Tencent/WeKnora/blob/main/LICENSE">
        <img src="https://img.shields.io/badge/License-MIT-ffffff?labelColor=d4eaf7&color=2e6cc4" alt="License">
    </a>
+    <a href="./CHANGELOG.md">
+        <img alt="Version" src="https://img.shields.io/badge/version-0.1.0-2e6cc4?labelColor=d4eaf7">
+    </a>
 </p>

 <p align="center">
-| <a href="./README_EN.md"><b>English</b></a> | <b>简体中文</b> | <a href="./README_JA.md"><b>日本語</b></a> |
+| <b>English</b> | <a href="./README_CN.md"><b>简体中文</b></a> | <a href="./README_JA.md"><b>日本語</b></a> |
 </p>

 <p align="center">
  <h4 align="center">

-  [项目介绍](#-项目介绍) • [架构设计](#-架构设计) • [核心特性](#-核心特性) • [快速开始](#-快速开始) • [文档](#-文档) • [开发指南](#-开发指南)
-
+  [Overview](#-overview) • [Architecture](#-architecture) • [Key Features](#-key-features) • [Getting Started](#-getting-started) • [API Reference](#-api-reference) • [Developer Guide](#-developer-guide)
+  
  </h4>
 </p>

-# 💡 WeKnora - 基于大模型的文档理解检索框架
+# 💡 WeKnora - LLM-Powered Document Understanding & Retrieval Framework

-## 📌 项目介绍
+## 📌 Overview

-[**WeKnora（维娜拉）**](https://weknora.weixin.qq.com) 是一款基于大语言模型（LLM）的文档理解与语义检索框架，专为结构复杂、内容异构的文档场景而打造。
+[**WeKnora**](https://weknora.weixin.qq.com) is an LLM-powered framework designed for deep document understanding and semantic retrieval, especially for handling complex, heterogeneous documents. 

-框架采用模块化架构，融合多模态预处理、语义向量索引、智能召回与大模型生成推理，构建起高效、可控的文档问答流程。核心检索流程基于 **RAG（Retrieval-Augmented Generation）** 机制，将上下文相关片段与语言模型结合，实现更高质量的语义回答。
+It adopts a modular architecture that combines multimodal preprocessing, semantic vector indexing, intelligent retrieval, and large language model inference. At its core, WeKnora follows the **RAG (Retrieval-Augmented Generation)** paradigm, enabling high-quality, context-aware answers by combining relevant document chunks with model reasoning.

-**官网：** https://weknora.weixin.qq.com
+**Website:** https://weknora.weixin.qq.com

-## 🏗️ 架构设计
+## 🏗️ Architecture

-![weknora-pipelone.png](./docs/images/pipeline.jpg)
+![weknora-pipeline.png](./docs/images/pipeline.jpg)

-WeKnora 采用现代化模块化设计，构建了一条完整的文档理解与检索流水线。系统主要包括文档解析、向量化处理、检索引擎和大模型推理等核心模块，每个组件均可灵活配置与扩展。
+WeKnora employs a modern modular design to build a complete document understanding and retrieval pipeline. The system primarily includes document parsing, vector processing, retrieval engine, and large model inference as core modules, with each component being flexibly configurable and extendable.

-## 🎯 核心特性
+## 🎯 Key Features

- **🔍 精准理解**：支持 PDF、Word、图片等文档的结构化内容提取，统一构建语义视图
- **🧠 智能推理**：借助大语言模型理解文档上下文与用户意图，支持精准问答与多轮对话
- **🔧 灵活扩展**：从解析、嵌入、召回到生成全流程解耦，便于灵活集成与定制扩展
- **⚡ 高效检索**：混合多种检索策略：关键词、向量、知识图谱
- **🎯 简单易用**：直观的Web界面与标准API，零技术门槛快速上手
- **🔒 安全可控**：支持本地化与私有云部署，数据完全自主可控
+- **🔍 Precise Understanding**: Structured content extraction from PDFs, Word documents, images and more into unified semantic views
+- **🧠 Intelligent Reasoning**: Leverages LLMs to understand document context and user intent for accurate Q&A and multi-turn conversations
+- **🔧 Flexible Extension**: All components from parsing and embedding to retrieval and generation are decoupled for easy customization
+- **⚡ Efficient Retrieval**: Hybrid retrieval strategies combining keywords, vectors, and knowledge graphs
+- **🎯 User-Friendly**: Intuitive web interface and standardized APIs for zero technical barriers
+- **🔒 Secure & Controlled**: Support for local deployment and private cloud, ensuring complete data sovereignty

-## 📊 适用场景
+## 📊 Application Scenarios

-| 应用场景 | 具体应用 | 核心价值 |
+| Scenario | Applications | Core Value |
 |---------|----------|----------|
-| **企业知识管理** | 内部文档检索、规章制度问答、操作手册查询 | 提升知识查找效率，降低培训成本 |
-| **科研文献分析** | 论文检索、研究报告分析、学术资料整理 | 加速文献调研，辅助研究决策 |
-| **产品技术支持** | 产品手册问答、技术文档检索、故障排查 | 提升客户服务质量，减少技术支持负担 |
-| **法律合规审查** | 合同条款检索、法规政策查询、案例分析 | 提高合规效率，降低法律风险 |
-| **医疗知识辅助** | 医学文献检索、诊疗指南查询、病例分析 | 辅助临床决策，提升诊疗质量 |
+| **Enterprise Knowledge Management** | Internal document retrieval, policy Q&A, operation manual search | Improve knowledge discovery efficiency, reduce training costs |
+| **Academic Research Analysis** | Paper retrieval, research report analysis, scholarly material organization | Accelerate literature review, assist research decisions |
+| **Product Technical Support** | Product manual Q&A, technical documentation search, troubleshooting | Enhance customer service quality, reduce support burden |
+| **Legal & Compliance Review** | Contract clause retrieval, regulatory policy search, case analysis | Improve compliance efficiency, reduce legal risks |
+| **Medical Knowledge Assistance** | Medical literature retrieval, treatment guideline search, case analysis | Support clinical decisions, improve diagnosis quality |

-## 🧩 功能模块能力
+## 🧩 Feature Matrix

-| 功能模块 | 支持情况 | 说明 |
+| Module | Support | Description |
 |---------|---------|------|
-| 文档格式支持 | ✅ PDF / Word / Txt / Markdown / 图片（含 OCR / Caption） | 支持多种结构化与非结构化文档内容解析，支持图文混排与图像文字提取 |
-| 嵌入模型支持 | ✅ 本地模型、BGE / GTE API 等 | 支持自定义 embedding 模型，兼容本地部署与云端向量生成接口 |
-| 向量数据库接入 | ✅ PostgreSQL（pgvector）、Elasticsearch | 支持主流向量索引后端，可灵活切换与扩展，适配不同检索场景 |
-| 检索机制 | ✅ BM25 / Dense Retrieve / GraphRAG | 支持稠密/稀疏召回、知识图谱增强检索等多种策略，可自由组合召回-重排-生成流程 |
-| 大模型集成 | ✅ 支持 Qwen、DeepSeek 等，思考/非思考模式切换 | 可接入本地大模型（如 Ollama 启动）或调用外部 API 服务，支持推理模式灵活配置 |
-| 问答能力 | ✅ 上下文感知、多轮对话、提示词模板 | 支持复杂语义建模、指令控制与链式问答，可配置提示词与上下文窗口 |
-| 端到端测试支持 | ✅ 检索+生成过程可视化与指标评估 | 提供一体化链路测试工具，支持评估召回命中率、回答覆盖度、BLEU / ROUGE 等主流指标 |
-| 部署模式 | ✅ 支持本地部署 / Docker 镜像 | 满足私有化、离线部署与灵活运维的需求 |
-| 用户界面 | ✅ Web UI + RESTful API | 提供交互式界面与标准 API 接口，适配开发者与业务用户使用习惯 |
+| Document Formats | ✅ PDF / Word / Txt / Markdown / Images (with OCR / Caption) | Support for structured and unstructured documents with text extraction from images |
+| Embedding Models | ✅ Local models, BGE / GTE APIs, etc. | Customizable embedding models, compatible with local deployment and cloud vector generation APIs |
+| Vector DB Integration | ✅ PostgreSQL (pgvector), Elasticsearch | Support for mainstream vector index backends, flexible switching for different retrieval scenarios |
+| Retrieval Strategies | ✅ BM25 / Dense Retrieval / GraphRAG | Support for sparse/dense recall and knowledge graph-enhanced retrieval with customizable retrieve-rerank-generate pipelines |
+| LLM Integration | ✅ Support for Qwen, DeepSeek, etc., with thinking/non-thinking mode switching | Compatible with local models (e.g., via Ollama) or external API services with flexible inference configuration |
+| QA Capabilities | ✅ Context-aware, multi-turn dialogue, prompt templates | Support for complex semantic modeling, instruction control and chain-of-thought Q&A with configurable prompts and context windows |
+| E2E Testing | ✅ Retrieval+generation process visualization and metric evaluation | End-to-end testing tools for evaluating recall hit rates, answer coverage, BLEU/ROUGE and other metrics |
+| Deployment Modes | ✅ Support for local deployment / Docker images | Meets private, offline deployment and flexible operation requirements |
+| User Interfaces | ✅ Web UI + RESTful API | Interactive interface and standard API endpoints, suitable for both developers and business users |

-## 🚀 快速开始
+## 🚀 Getting Started

-### 🛠 环境要求
+### 🛠 Prerequisites

-确保本地已安装以下工具：
+Make sure the following tools are installed on your system:

 * [Docker](https://www.docker.com/)
 * [Docker Compose](https://docs.docker.com/compose/)
 * [Git](https://git-scm.com/)

-### 📦 安装步骤
+### 📦 Installation

-#### ① 克隆代码仓库
+#### ① Clone the repository

 ```bash
-# 克隆主仓库
+# Clone the main repository
 git clone https://github.com/Tencent/WeKnora.git
 cd WeKnora
 ```

-#### ② 配置环境变量
+#### ② Configure environment variables

 ```bash
-# 复制示例配置文件
+# Copy example env file
 cp .env.example .env

-# 编辑 .env，填入对应配置信息
-# 所有变量说明详见 .env.example 注释
+# Edit .env and set required values
+# All variables are documented in the .env.example comments
 ```

-#### ③ 启动服务
+#### ③ Start the services

 ```bash
-# 启动全部服务（含 Ollama 与后端容器）
+# Start all services (Ollama + backend containers)
 ./scripts/start_all.sh
-# 或
+# Or
 make start-all
 ```

-#### ③ 启动服务备选
+#### ③ Start the services (backup)

 ```bash
-# 启动 ollama 服务 (可选)
+# Start ollama services (Optional)
 ollama serve > /dev/null 2>&1 &

-# 启动服务
+# Start the service
 docker compose up -d
 ```

-#### ④ 停止服务
+#### ④ Stop the services

 ```bash
 ./scripts/start_all.sh --stop
-# 或
+# Or
 make stop-all
 ```

-### 🌐 服务访问地址
+### 🌐 Access Services

-启动成功后，可访问以下地址：
+Once started, services will be available at:

-* Web UI：`http://localhost`
-* 后端 API：`http://localhost:8080`
-* 链路追踪（Jaeger）：`http://localhost:16686`
+* Web UI: `http://localhost`
+* Backend API: `http://localhost:8080`
+* Jaeger Tracing: `http://localhost:16686`

-### 🔌 使用微信对话开放平台
+### 🔌 Using WeChat Dialog Open Platform

-WeKnora 作为[微信对话开放平台](https://chatbot.weixin.qq.com)的核心技术框架，提供更简便的使用方式：
+WeKnora serves as the core technology framework for the [WeChat Dialog Open Platform](https://chatbot.weixin.qq.com), providing a more convenient usage approach:

- **零代码部署**：只需上传知识，即可在微信生态中快速部署智能问答服务，实现"即问即答"的体验
- **高效问题管理**：支持高频问题的独立分类管理，提供丰富的数据工具，确保回答精准可靠且易于维护
- **微信生态覆盖**：通过微信对话开放平台，WeKnora 的智能问答能力可无缝集成到公众号、小程序等微信场景中，提升用户交互体验
-### 🔗MCP服务器访问已经部署好的WEKnora
-#### 1️⃣克隆储存库
+- **Zero-code Deployment**: Simply upload knowledge to quickly deploy intelligent Q&A services within the WeChat ecosystem, achieving an "ask and answer" experience
+- **Efficient Question Management**: Support for categorized management of high-frequency questions, with rich data tools to ensure accurate, reliable, and easily maintainable answers
+- **WeChat Ecosystem Integration**: Through the WeChat Dialog Open Platform, WeKnora's intelligent Q&A capabilities can be seamlessly integrated into WeChat Official Accounts, Mini Programs, and other WeChat scenarios, enhancing user interaction experiences
+
+### 🔗 Access WeKnora via MCP Server
+
+#### 1️⃣ Clone the repository
 ```
 git clone https://github.com/Tencent/WeKnora
 ```
-#### 2️⃣配置MCP服务器
-mcp客户端配置服务器
+
+#### 2️⃣ Configure MCP Server
+Configure the MCP client to connect to the server:
 ```json
 {
  "mcpServers": {
@@ -165,150 +171,150 @@ mcp客户端配置服务器
      ],
      "command": "python",
      "env":{
-        "WEKNORA_API_KEY":"进入你的weknora实例，打开开发者工具，查看请求头x-api-key，以sk开头",
-        "WEKNORA_BASE_URL":"http(s)://你的weknora地址/api/v1"
+        "WEKNORA_API_KEY":"Enter your WeKnora instance, open developer tools, check the request header x-api-key starting with sk",
+        "WEKNORA_BASE_URL":"http(s)://your-weknora-address/api/v1"
      }
    }
  }
 }
 ```
-使用stdio命令直接运行
+
+Run directly using stdio command:
 ```
 pip install weknora-mcp-server
 python -m weknora-mcp-server
 ```

-## 🔧 初始化配置引导
+## 🔧 Initialization Configuration Guide

-为了方便用户快速配置各类模型，降低试错成本，我们改进了原来的配置文件初始化方式，增加了Web UI界面进行各种模型的配置。在使用之前，请确保代码更新到最新版本。具体使用步骤如下：
-如果是第一次使用本项目，可跳过①②步骤，直接进入③④步骤。
+To help users quickly configure various models and reduce trial-and-error costs, we've improved the original configuration file initialization method by adding a Web UI interface for model configuration. Before using, please ensure the code is updated to the latest version. The specific steps are as follows:
+If this is your first time using this project, you can skip steps ①② and go directly to steps ③④.

-### ① 关闭服务
+### ① Stop the services

 ```bash
 ./scripts/start_all.sh --stop
 ```

-### ② 清空原有数据表（建议在没有重要数据的情况下使用）
+### ② Clear existing data tables (recommended when no important data exists)

 ```bash
 make clean-db
 ```

-### ③ 编译并启动服务
+### ③ Compile and start services

 ```bash
 ./scripts/start_all.sh
 ```

-### ④ 访问Web UI
+### ④ Access Web UI

 http://localhost

-首次访问会自动跳转到初始化配置页面，配置完成后会自动跳转到知识库页面。请按照页面提示信息完成模型的配置。
+On first access, it will automatically redirect to the initialization configuration page. After configuration is complete, it will automatically redirect to the knowledge base page. Please follow the page instructions to complete model configuration.

-![配置页面](./docs/images/config.png)
+![Configuration Page](./docs/images/config.png)

+## 📱 Interface Showcase

-## 📱 功能展示
-
-### Web UI 界面
+### Web UI Interface

 <table>
  <tr>
-    <td><b>知识上传</b><br/><img src="./docs/images/knowledges.png" alt="知识上传界面"></td>
-    <td><b>知识问答入口</b><br/><img src="./docs/images/qa.png" alt="知识问答入口"></td>
+    <td><b>Knowledge Upload</b><br/><img src="./docs/images/knowledges.png" alt="Knowledge Upload Interface"></td>
+    <td><b>Q&A Entry</b><br/><img src="./docs/images/qa.png" alt="Q&A Entry Interface"></td>
  </tr>
  <tr>
-    <td colspan="2"><b>图文结果回答</b><br/><img src="./docs/images/answer.png" alt="图文结果回答"></td>
+    <td colspan="2"><b>Rich Text & Image Responses</b><br/><img src="./docs/images/answer.png" alt="Rich Answer Interface"></td>
  </tr>
 </table>

-**知识库管理：** 支持拖拽上传各类文档，自动识别文档结构并提取核心知识，建立索引。系统清晰展示处理进度和文档状态，实现高效的知识库管理。
+**Knowledge Base Management:** Support for dragging and dropping various documents, automatically identifying document structures and extracting core knowledge to establish indexes. The system clearly displays processing progress and document status, achieving efficient knowledge base management.

-### 文档知识图谱
+### Document Knowledge Graph

 <table>
  <tr>
-    <td><img src="./docs/images/graph2.png" alt="知识图谱展示1"></td>
-    <td><img src="./docs/images/graph1.png" alt="知识图谱展示2"></td>
+    <td><img src="./docs/images/graph2.png" alt="Knowledge Graph View 1"></td>
+    <td><img src="./docs/images/graph1.png" alt="Knowledge Graph View 2"></td>
  </tr>
 </table>

-WeKnora 支持将文档转化为知识图谱，展示文档中不同段落之间的关联关系。开启知识图谱功能后，系统会分析并构建文档内部的语义关联网络，不仅帮助用户理解文档内容，还为索引和检索提供结构化支撑，提升检索结果的相关性和广度。
-### 配套MCP服务器调用效果
-<img width="950" height="2063" alt="118d078426f42f3d4983c13386085d7f" src="https://github.com/user-attachments/assets/09111ec8-0489-415c-969d-aa3835778e14" />
+WeKnora supports transforming documents into knowledge graphs, displaying the relationships between different sections of the documents. Once the knowledge graph feature is enabled, the system analyzes and constructs an internal semantic association network that not only helps users understand document content but also provides structured support for indexing and retrieval, enhancing the relevance and breadth of search results.

+### MCP Server Integration Effects
+<img width="950" height="2063" alt="MCP Server Integration Demo" src="https://github.com/user-attachments/assets/09111ec8-0489-415c-969d-aa3835778e14" />

-## 📘 文档
+## 📘 API Reference

-常见问题排查：[常见问题排查](./docs/QA.md)
+Troubleshooting FAQ: [Troubleshooting FAQ](./docs/QA.md)

-详细接口说明请参考：[API 文档](./docs/API.md)
+Detailed API documentation is available at: [API Docs](./docs/API.md)

-## 🧭 开发指南
+## 🧭 Developer Guide

-### 📁 项目目录结构
+### 📁 Directory Structure

 ```
 WeKnora/
-├── cmd/         # 应用入口
-├── internal/    # 核心业务逻辑
-├── config/      # 配置文件
-├── migrations/  # 数据库迁移脚本
-├── scripts/     # 启动与工具脚本
-├── services/    # 各子服务实现
-├── frontend/    # 前端项目
-└── docs/        # 项目文档
+├── cmd/         # Main entry point
+├── internal/    # Core business logic
+├── config/      # Configuration files
+├── migrations/  # DB migration scripts
+├── scripts/     # Shell scripts
+├── services/    # Microservice logic
+├── frontend/    # Frontend app
+└── docs/        # Project documentation
 ```

-### 🔧 常用命令
+### 🔧 Common Commands

 ```bash
-# 清空数据库（慎用！）
+# Wipe all data from DB (use with caution)
 make clean-db
 ```

-## 🤝 贡献指南
+## 🤝 Contributing

-我们欢迎社区用户参与贡献！如有建议、Bug 或新功能需求，请通过 [Issue](https://github.com/Tencent/WeKnora/issues) 提出，或直接提交 Pull Request。
+We welcome community contributions! For suggestions, bugs, or feature requests, please submit an [Issue](https://github.com/Tencent/WeKnora/issues) or directly create a Pull Request.

-### 🎯 贡献方式
+### 🎯 How to Contribute

- 🐛 **Bug修复**: 发现并修复系统缺陷
- ✨ **新功能**: 提出并实现新特性
- 📚 **文档改进**: 完善项目文档
- 🧪 **测试用例**: 编写单元测试和集成测试
- 🎨 **UI/UX优化**: 改进用户界面和体验
+- 🐛 **Bug Fixes**: Discover and fix system defects
+- ✨ **New Features**: Propose and implement new capabilities
+- 📚 **Documentation**: Improve project documentation
+- 🧪 **Test Cases**: Write unit and integration tests
+- 🎨 **UI/UX Enhancements**: Improve user interface and experience

-### 📋 贡献流程
+### 📋 Contribution Process

-1. **Fork项目** 到你的GitHub账户
-2. **创建特性分支** `git checkout -b feature/amazing-feature`
-3. **提交更改** `git commit -m 'Add amazing feature'`
-4. **推送分支** `git push origin feature/amazing-feature`
-5. **创建Pull Request** 并详细描述变更内容
+1. **Fork the project** to your GitHub account
+2. **Create a feature branch** `git checkout -b feature/amazing-feature`
+3. **Commit changes** `git commit -m 'Add amazing feature'`
+4. **Push branch** `git push origin feature/amazing-feature`
+5. **Create a Pull Request** with detailed description of changes

-### 🎨 代码规范
+### 🎨 Code Standards

- 遵循 [Go Code Review Comments](https://github.com/golang/go/wiki/CodeReviewComments)
- 使用 `gofmt` 格式化代码
- 添加必要的单元测试
- 更新相关文档
+- Follow [Go Code Review Comments](https://github.com/golang/go/wiki/CodeReviewComments)
+- Format code using `gofmt`
+- Add necessary unit tests
+- Update relevant documentation

-### 📝 提交规范
+### 📝 Commit Guidelines

-使用 [Conventional Commits](https://www.conventionalcommits.org/) 规范：
+Use [Conventional Commits](https://www.conventionalcommits.org/) standard:

 ```
-feat: 添加文档批量上传功能
-fix: 修复向量检索精度问题  
-docs: 更新API文档
-test: 添加检索引擎测试用例
-refactor: 重构文档解析模块
+feat: Add document batch upload functionality
+fix: Resolve vector retrieval precision issue
+docs: Update API documentation
+test: Add retrieval engine test cases
+refactor: Restructure document parsing module
 ```

-## 📄 许可证
+## 📄 License

-本项目基于 [MIT](./LICENSE) 协议发布。
-你可以自由使用、修改和分发本项目代码，但需保留原始版权声明。
+This project is licensed under the [MIT License](./LICENSE).
+You are free to use, modify, and distribute the code with proper attribution.
--- a/README_CN.md
+++ b/README_CN.md
@@ -0,0 +1,317 @@
+<p align="center">
+  <picture>
+    <img src="./docs/images/logo.png" alt="WeKnora Logo" height="120"/>
+  </picture>
+</p>
+
+<p align="center">
+    <a href="https://weknora.weixin.qq.com" target="_blank">
+        <img alt="官方网站" src="https://img.shields.io/badge/官方网站-WeKnora-4e6b99">
+    </a>
+    <a href="https://chatbot.weixin.qq.com" target="_blank">
+        <img alt="微信对话开放平台" src="https://img.shields.io/badge/微信对话开放平台-5ac725">
+    </a>
+    <a href="https://github.com/Tencent/WeKnora/blob/main/LICENSE">
+        <img src="https://img.shields.io/badge/License-MIT-ffffff?labelColor=d4eaf7&color=2e6cc4" alt="License">
+    </a>
+    <a href="./CHANGELOG.md">
+        <img alt="版本" src="https://img.shields.io/badge/version-0.1.0-2e6cc4?labelColor=d4eaf7">
+    </a>
+</p>
+
+<p align="center">
+| <a href="./README.md"><b>English</b></a> | <b>简体中文</b> | <a href="./README_JA.md"><b>日本語</b></a> |
+</p>
+
+<p align="center">
+  <h4 align="center">
+
+  [项目介绍](#-项目介绍) • [架构设计](#-架构设计) • [核心特性](#-核心特性) • [快速开始](#-快速开始) • [文档](#-文档) • [开发指南](#-开发指南)
+
+  </h4>
+</p>
+
+# 💡 WeKnora - 基于大模型的文档理解检索框架
+
+## 📌 项目介绍
+
+[**WeKnora（维娜拉）**](https://weknora.weixin.qq.com) 是一款基于大语言模型（LLM）的文档理解与语义检索框架，专为结构复杂、内容异构的文档场景而打造。
+
+框架采用模块化架构，融合多模态预处理、语义向量索引、智能召回与大模型生成推理，构建起高效、可控的文档问答流程。核心检索流程基于 **RAG（Retrieval-Augmented Generation）** 机制，将上下文相关片段与语言模型结合，实现更高质量的语义回答。
+
+**官网：** https://weknora.weixin.qq.com
+
+## 🏗️ 架构设计
+
+![weknora-pipelone.png](./docs/images/pipeline.jpg)
+
+WeKnora 采用现代化模块化设计，构建了一条完整的文档理解与检索流水线。系统主要包括文档解析、向量化处理、检索引擎和大模型推理等核心模块，每个组件均可灵活配置与扩展。
+
+## 🎯 核心特性
+
+- **🔍 精准理解**：支持 PDF、Word、图片等文档的结构化内容提取，统一构建语义视图
+- **🧠 智能推理**：借助大语言模型理解文档上下文与用户意图，支持精准问答与多轮对话
+- **🔧 灵活扩展**：从解析、嵌入、召回到生成全流程解耦，便于灵活集成与定制扩展
+- **⚡ 高效检索**：混合多种检索策略：关键词、向量、知识图谱
+- **🎯 简单易用**：直观的Web界面与标准API，零技术门槛快速上手
+- **🔒 安全可控**：支持本地化与私有云部署，数据完全自主可控
+
+## 📊 适用场景
+
+| 应用场景 | 具体应用 | 核心价值 |
+|---------|----------|----------|
+| **企业知识管理** | 内部文档检索、规章制度问答、操作手册查询 | 提升知识查找效率，降低培训成本 |
+| **科研文献分析** | 论文检索、研究报告分析、学术资料整理 | 加速文献调研，辅助研究决策 |
+| **产品技术支持** | 产品手册问答、技术文档检索、故障排查 | 提升客户服务质量，减少技术支持负担 |
+| **法律合规审查** | 合同条款检索、法规政策查询、案例分析 | 提高合规效率，降低法律风险 |
+| **医疗知识辅助** | 医学文献检索、诊疗指南查询、病例分析 | 辅助临床决策，提升诊疗质量 |
+
+## 🧩 功能模块能力
+
+| 功能模块 | 支持情况 | 说明 |
+|---------|---------|------|
+| 文档格式支持 | ✅ PDF / Word / Txt / Markdown / 图片（含 OCR / Caption） | 支持多种结构化与非结构化文档内容解析，支持图文混排与图像文字提取 |
+| 嵌入模型支持 | ✅ 本地模型、BGE / GTE API 等 | 支持自定义 embedding 模型，兼容本地部署与云端向量生成接口 |
+| 向量数据库接入 | ✅ PostgreSQL（pgvector）、Elasticsearch | 支持主流向量索引后端，可灵活切换与扩展，适配不同检索场景 |
+| 检索机制 | ✅ BM25 / Dense Retrieve / GraphRAG | 支持稠密/稀疏召回、知识图谱增强检索等多种策略，可自由组合召回-重排-生成流程 |
+| 大模型集成 | ✅ 支持 Qwen、DeepSeek 等，思考/非思考模式切换 | 可接入本地大模型（如 Ollama 启动）或调用外部 API 服务，支持推理模式灵活配置 |
+| 问答能力 | ✅ 上下文感知、多轮对话、提示词模板 | 支持复杂语义建模、指令控制与链式问答，可配置提示词与上下文窗口 |
+| 端到端测试支持 | ✅ 检索+生成过程可视化与指标评估 | 提供一体化链路测试工具，支持评估召回命中率、回答覆盖度、BLEU / ROUGE 等主流指标 |
+| 部署模式 | ✅ 支持本地部署 / Docker 镜像 | 满足私有化、离线部署与灵活运维的需求 |
+| 用户界面 | ✅ Web UI + RESTful API | 提供交互式界面与标准 API 接口，适配开发者与业务用户使用习惯 |
+
+## 🚀 快速开始
+
+### 🛠 环境要求
+
+确保本地已安装以下工具：
+
+* [Docker](https://www.docker.com/)
+* [Docker Compose](https://docs.docker.com/compose/)
+* [Git](https://git-scm.com/)
+
+### 📦 安装步骤
+
+#### ① 克隆代码仓库
+
+```bash
+# 克隆主仓库
+git clone https://github.com/Tencent/WeKnora.git
+cd WeKnora
+```
+
+#### ② 配置环境变量
+
+```bash
+# 复制示例配置文件
+cp .env.example .env
+
+# 编辑 .env，填入对应配置信息
+# 所有变量说明详见 .env.example 注释
+```
+
+#### ③ 启动服务
+
+```bash
+# 启动全部服务（含 Ollama 与后端容器）
+./scripts/start_all.sh
+# 或
+make start-all
+```
+
+#### ③ 启动服务备选
+
+```bash
+# 启动 ollama 服务 (可选)
+ollama serve > /dev/null 2>&1 &
+
+# 启动服务
+docker compose up -d
+```
+
+#### ④ 停止服务
+
+```bash
+./scripts/start_all.sh --stop
+# 或
+make stop-all
+```
+
+### 🌐 服务访问地址
+
+启动成功后，可访问以下地址：
+
+* Web UI：`http://localhost`
+* 后端 API：`http://localhost:8080`
+* 链路追踪（Jaeger）：`http://localhost:16686`
+
+### 🔌 使用微信对话开放平台
+
+WeKnora 作为[微信对话开放平台](https://chatbot.weixin.qq.com)的核心技术框架，提供更简便的使用方式：
+
+- **零代码部署**：只需上传知识，即可在微信生态中快速部署智能问答服务，实现"即问即答"的体验
+- **高效问题管理**：支持高频问题的独立分类管理，提供丰富的数据工具，确保回答精准可靠且易于维护
+- **微信生态覆盖**：通过微信对话开放平台，WeKnora 的智能问答能力可无缝集成到公众号、小程序等微信场景中，提升用户交互体验
+### 🔗MCP服务器访问已经部署好的WEKnora
+#### 1️⃣克隆储存库
+```
+git clone https://github.com/Tencent/WeKnora
+```
+#### 2️⃣配置MCP服务器
+mcp客户端配置服务器
+```json
+{
+  "mcpServers": {
+    "weknora": {
+      "args": [
+        "path/to/WeKnora/mcp-server/run_server.py"
+      ],
+      "command": "python",
+      "env":{
+        "WEKNORA_API_KEY":"进入你的weknora实例，打开开发者工具，查看请求头x-api-key，以sk开头",
+        "WEKNORA_BASE_URL":"http(s)://你的weknora地址/api/v1"
+      }
+    }
+  }
+}
+```
+使用stdio命令直接运行
+```
+pip install weknora-mcp-server
+python -m weknora-mcp-server
+```
+
+## 🔧 初始化配置引导
+
+为了方便用户快速配置各类模型，降低试错成本，我们改进了原来的配置文件初始化方式，增加了Web UI界面进行各种模型的配置。在使用之前，请确保代码更新到最新版本。具体使用步骤如下：
+如果是第一次使用本项目，可跳过①②步骤，直接进入③④步骤。
+
+### ① 关闭服务
+
+```bash
+./scripts/start_all.sh --stop
+```
+
+### ② 清空原有数据表（建议在没有重要数据的情况下使用）
+
+```bash
+make clean-db
+```
+
+### ③ 编译并启动服务
+
+```bash
+./scripts/start_all.sh
+```
+
+### ④ 访问Web UI
+
+http://localhost
+
+首次访问会自动跳转到初始化配置页面，配置完成后会自动跳转到知识库页面。请按照页面提示信息完成模型的配置。
+
+![配置页面](./docs/images/config.png)
+
+
+## 📱 功能展示
+
+### Web UI 界面
+
+<table>
+  <tr>
+    <td><b>知识上传</b><br/><img src="./docs/images/knowledges.png" alt="知识上传界面"></td>
+    <td><b>知识问答入口</b><br/><img src="./docs/images/qa.png" alt="知识问答入口"></td>
+  </tr>
+  <tr>
+    <td colspan="2"><b>图文结果回答</b><br/><img src="./docs/images/answer.png" alt="图文结果回答"></td>
+  </tr>
+</table>
+
+**知识库管理：** 支持拖拽上传各类文档，自动识别文档结构并提取核心知识，建立索引。系统清晰展示处理进度和文档状态，实现高效的知识库管理。
+
+### 文档知识图谱
+
+<table>
+  <tr>
+    <td><img src="./docs/images/graph2.png" alt="知识图谱展示1"></td>
+    <td><img src="./docs/images/graph1.png" alt="知识图谱展示2"></td>
+  </tr>
+</table>
+
+WeKnora 支持将文档转化为知识图谱，展示文档中不同段落之间的关联关系。开启知识图谱功能后，系统会分析并构建文档内部的语义关联网络，不仅帮助用户理解文档内容，还为索引和检索提供结构化支撑，提升检索结果的相关性和广度。
+### 配套MCP服务器调用效果
+<img width="950" height="2063" alt="118d078426f42f3d4983c13386085d7f" src="https://github.com/user-attachments/assets/09111ec8-0489-415c-969d-aa3835778e14" />
+
+
+## 📘 文档
+
+常见问题排查：[常见问题排查](./docs/QA.md)
+
+详细接口说明请参考：[API 文档](./docs/API.md)
+
+## 🧭 开发指南
+
+### 📁 项目目录结构
+
+```
+WeKnora/
+├── cmd/         # 应用入口
+├── internal/    # 核心业务逻辑
+├── config/      # 配置文件
+├── migrations/  # 数据库迁移脚本
+├── scripts/     # 启动与工具脚本
+├── services/    # 各子服务实现
+├── frontend/    # 前端项目
+└── docs/        # 项目文档
+```
+
+### 🔧 常用命令
+
+```bash
+# 清空数据库（慎用！）
+make clean-db
+```
+
+## 🤝 贡献指南
+
+我们欢迎社区用户参与贡献！如有建议、Bug 或新功能需求，请通过 [Issue](https://github.com/Tencent/WeKnora/issues) 提出，或直接提交 Pull Request。
+
+### 🎯 贡献方式
+
+- 🐛 **Bug修复**: 发现并修复系统缺陷
+- ✨ **新功能**: 提出并实现新特性
+- 📚 **文档改进**: 完善项目文档
+- 🧪 **测试用例**: 编写单元测试和集成测试
+- 🎨 **UI/UX优化**: 改进用户界面和体验
+
+### 📋 贡献流程
+
+1. **Fork项目** 到你的GitHub账户
+2. **创建特性分支** `git checkout -b feature/amazing-feature`
+3. **提交更改** `git commit -m 'Add amazing feature'`
+4. **推送分支** `git push origin feature/amazing-feature`
+5. **创建Pull Request** 并详细描述变更内容
+
+### 🎨 代码规范
+
+- 遵循 [Go Code Review Comments](https://github.com/golang/go/wiki/CodeReviewComments)
+- 使用 `gofmt` 格式化代码
+- 添加必要的单元测试
+- 更新相关文档
+
+### 📝 提交规范
+
+使用 [Conventional Commits](https://www.conventionalcommits.org/) 规范：
+
+```
+feat: 添加文档批量上传功能
+fix: 修复向量检索精度问题  
+docs: 更新API文档
+test: 添加检索引擎测试用例
+refactor: 重构文档解析模块
+```
+
+## 📄 许可证
+
+本项目基于 [MIT](./LICENSE) 协议发布。
+你可以自由使用、修改和分发本项目代码，但需保留原始版权声明。
--- a/README_EN.md
+++ b/README_EN.md
@@ -1,249 +0,0 @@
-<p align="center">
-  <picture>
-    <img src="./docs/images/logo.png" alt="WeKnora Logo" height="120"/>
-  </picture>
-</p>
-
-<p align="center">
-    <a href="https://weknora.weixin.qq.com" target="_blank">
-        <img alt="官方网站" src="https://img.shields.io/badge/官方网站-WeKnora-4e6b99">
-    </a>
-    <a href="https://chatbot.weixin.qq.com" target="_blank">
-        <img alt="微信对话开放平台" src="https://img.shields.io/badge/微信对话开放平台-5ac725">
-    </a>
-    <a href="https://github.com/Tencent/WeKnora/blob/main/LICENSE">
-        <img src="https://img.shields.io/badge/License-MIT-ffffff?labelColor=d4eaf7&color=2e6cc4" alt="License">
-    </a>
-</p>
-
-<p align="center">
-| <b>English</b> | <a href="./README.md"><b>简体中文</b></a> | <a href="./README_JA.md"><b>日本語</b></a> |
-</p>
-
-<p align="center">
-  <h4 align="center">
-
-  [Overview](#-overview) • [Architecture](#-architecture) • [Key Features](#-key-features) • [Getting Started](#-getting-started) • [API Reference](#-api-reference) • [Developer Guide](#-developer-guide)
-  
-  </h4>
-</p>
-
-# 💡 WeKnora - LLM-Powered Document Understanding & Retrieval Framework
-
-## 📌 Overview
-
-[**WeKnora**](https://weknora.weixin.qq.com) is an LLM-powered framework designed for deep document understanding and semantic retrieval, especially for handling complex, heterogeneous documents. 
-
-It adopts a modular architecture that combines multimodal preprocessing, semantic vector indexing, intelligent retrieval, and large language model inference. At its core, WeKnora follows the **RAG (Retrieval-Augmented Generation)** paradigm, enabling high-quality, context-aware answers by combining relevant document chunks with model reasoning.
-
-**Website:** https://weknora.weixin.qq.com
-
-## 🏗️ Architecture
-
-![weknora-pipeline.png](./docs/images/pipeline.jpg)
-
-WeKnora employs a modern modular design to build a complete document understanding and retrieval pipeline. The system primarily includes document parsing, vector processing, retrieval engine, and large model inference as core modules, with each component being flexibly configurable and extendable.
-
-## 🎯 Key Features
-
- **🔍 Precise Understanding**: Structured content extraction from PDFs, Word documents, images and more into unified semantic views
- **🧠 Intelligent Reasoning**: Leverages LLMs to understand document context and user intent for accurate Q&A and multi-turn conversations
- **🔧 Flexible Extension**: All components from parsing and embedding to retrieval and generation are decoupled for easy customization
- **⚡ Efficient Retrieval**: Hybrid retrieval strategies combining keywords, vectors, and knowledge graphs
- **🎯 User-Friendly**: Intuitive web interface and standardized APIs for zero technical barriers
- **🔒 Secure & Controlled**: Support for local deployment and private cloud, ensuring complete data sovereignty
-
-## 📊 Application Scenarios
-
-| Scenario | Applications | Core Value |
-|---------|----------|----------|
-| **Enterprise Knowledge Management** | Internal document retrieval, policy Q&A, operation manual search | Improve knowledge discovery efficiency, reduce training costs |
-| **Academic Research Analysis** | Paper retrieval, research report analysis, scholarly material organization | Accelerate literature review, assist research decisions |
-| **Product Technical Support** | Product manual Q&A, technical documentation search, troubleshooting | Enhance customer service quality, reduce support burden |
-| **Legal & Compliance Review** | Contract clause retrieval, regulatory policy search, case analysis | Improve compliance efficiency, reduce legal risks |
-| **Medical Knowledge Assistance** | Medical literature retrieval, treatment guideline search, case analysis | Support clinical decisions, improve diagnosis quality |
-
-## 🧩 Feature Matrix
-
-| Module | Support | Description |
-|---------|---------|------|
-| Document Formats | ✅ PDF / Word / Txt / Markdown / Images (with OCR / Caption) | Support for structured and unstructured documents with text extraction from images |
-| Embedding Models | ✅ Local models, BGE / GTE APIs, etc. | Customizable embedding models, compatible with local deployment and cloud vector generation APIs |
-| Vector DB Integration | ✅ PostgreSQL (pgvector), Elasticsearch | Support for mainstream vector index backends, flexible switching for different retrieval scenarios |
-| Retrieval Strategies | ✅ BM25 / Dense Retrieval / GraphRAG | Support for sparse/dense recall and knowledge graph-enhanced retrieval with customizable retrieve-rerank-generate pipelines |
-| LLM Integration | ✅ Support for Qwen, DeepSeek, etc., with thinking/non-thinking mode switching | Compatible with local models (e.g., via Ollama) or external API services with flexible inference configuration |
-| QA Capabilities | ✅ Context-aware, multi-turn dialogue, prompt templates | Support for complex semantic modeling, instruction control and chain-of-thought Q&A with configurable prompts and context windows |
-| E2E Testing | ✅ Retrieval+generation process visualization and metric evaluation | End-to-end testing tools for evaluating recall hit rates, answer coverage, BLEU/ROUGE and other metrics |
-| Deployment Modes | ✅ Support for local deployment / Docker images | Meets private, offline deployment and flexible operation requirements |
-| User Interfaces | ✅ Web UI + RESTful API | Interactive interface and standard API endpoints, suitable for both developers and business users |
-
-## 🚀 Getting Started
-
-### 🛠 Prerequisites
-
-Make sure the following tools are installed on your system:
-
-* [Docker](https://www.docker.com/)
-* [Docker Compose](https://docs.docker.com/compose/)
-* [Git](https://git-scm.com/)
-
-### 📦 Installation
-
-#### ① Clone the repository
-
-```bash
-# Clone the main repository
-git clone https://github.com/Tencent/WeKnora.git
-cd WeKnora
-```
-
-#### ② Configure environment variables
-
-```bash
-# Copy example env file
-cp .env.example .env
-
-# Edit .env and set required values
-# All variables are documented in the .env.example comments
-```
-
-#### ③ Start the services
-
-```bash
-# Start all services (Ollama + backend containers)
-./scripts/start_all.sh
-# Or
-make start-all
-```
-
-#### ③ Start the services (backup)
-
-```bash
-# Start ollama services (Optional)
-ollama serve > /dev/null 2>&1 &
-
-# Start the service
-docker compose up -d
-```
-
-#### ④ Stop the services
-
-```bash
-./scripts/start_all.sh --stop
-# Or
-make stop-all
-```
-
-### 🌐 Access Services
-
-Once started, services will be available at:
-
-* Web UI: `http://localhost`
-* Backend API: `http://localhost:8080`
-* Jaeger Tracing: `http://localhost:16686`
-
-### 🔌 Using WeChat Dialog Open Platform
-
-WeKnora serves as the core technology framework for the [WeChat Dialog Open Platform](https://chatbot.weixin.qq.com), providing a more convenient usage approach:
-
- **Zero-code Deployment**: Simply upload knowledge to quickly deploy intelligent Q&A services within the WeChat ecosystem, achieving an "ask and answer" experience
- **Efficient Question Management**: Support for categorized management of high-frequency questions, with rich data tools to ensure accurate, reliable, and easily maintainable answers
- **WeChat Ecosystem Integration**: Through the WeChat Dialog Open Platform, WeKnora's intelligent Q&A capabilities can be seamlessly integrated into WeChat Official Accounts, Mini Programs, and other WeChat scenarios, enhancing user interaction experiences
-
-## 📱 Interface Showcase
-
-### Web UI Interface
-
-<table>
-  <tr>
-    <td><b>Knowledge Upload</b><br/><img src="./docs/images/knowledges.png" alt="Knowledge Upload Interface"></td>
-    <td><b>Q&A Entry</b><br/><img src="./docs/images/qa.png" alt="Q&A Entry Interface"></td>
-  </tr>
-  <tr>
-    <td colspan="2"><b>Rich Text & Image Responses</b><br/><img src="./docs/images/answer.png" alt="Rich Answer Interface"></td>
-  </tr>
-</table>
-
-**Knowledge Base Management:** Support for dragging and dropping various documents, automatically identifying document structures and extracting core knowledge to establish indexes. The system clearly displays processing progress and document status, achieving efficient knowledge base management.
-
-### Document Knowledge Graph
-
-<table>
-  <tr>
-    <td><img src="./docs/images/graph2.png" alt="Knowledge Graph View 1"></td>
-    <td><img src="./docs/images/graph1.png" alt="Knowledge Graph View 2"></td>
-  </tr>
-</table>
-
-WeKnora supports transforming documents into knowledge graphs, displaying the relationships between different sections of the documents. Once the knowledge graph feature is enabled, the system analyzes and constructs an internal semantic association network that not only helps users understand document content but also provides structured support for indexing and retrieval, enhancing the relevance and breadth of search results.
-
-## 📘 API Reference
-
-Detailed API documentation is available at: [API Docs](./docs/API.md)
-
-## 🧭 Developer Guide
-
-### 📁 Directory Structure
-
-```
-WeKnora/
-├── cmd/         # Main entry point
-├── internal/    # Core business logic
-├── config/      # Configuration files
-├── migrations/  # DB migration scripts
-├── scripts/     # Shell scripts
-├── services/    # Microservice logic
-├── frontend/    # Frontend app
-└── docs/        # Project documentation
-```
-
-### 🔧 Common Commands
-
-```bash
-# Wipe all data from DB (use with caution)
-make clean-db
-```
-
-## 🤝 Contributing
-
-We welcome community contributions! For suggestions, bugs, or feature requests, please submit an [Issue](https://github.com/Tencent/WeKnora/issues) or directly create a Pull Request.
-
-### 🎯 How to Contribute
-
- 🐛 **Bug Fixes**: Discover and fix system defects
- ✨ **New Features**: Propose and implement new capabilities
- 📚 **Documentation**: Improve project documentation
- 🧪 **Test Cases**: Write unit and integration tests
- 🎨 **UI/UX Enhancements**: Improve user interface and experience
-
-### 📋 Contribution Process
-
-1. **Fork the project** to your GitHub account
-2. **Create a feature branch** `git checkout -b feature/amazing-feature`
-3. **Commit changes** `git commit -m 'Add amazing feature'`
-4. **Push branch** `git push origin feature/amazing-feature`
-5. **Create a Pull Request** with detailed description of changes
-
-### 🎨 Code Standards
-
- Follow [Go Code Review Comments](https://github.com/golang/go/wiki/CodeReviewComments)
- Format code using `gofmt`
- Add necessary unit tests
- Update relevant documentation
-
-### 📝 Commit Guidelines
-
-Use [Conventional Commits](https://www.conventionalcommits.org/) standard:
-
-```
-feat: Add document batch upload functionality
-fix: Resolve vector retrieval precision issue
-docs: Update API documentation
-test: Add retrieval engine test cases
-refactor: Restructure document parsing module
-```
-
-## 📄 License
-
-This project is licensed under the [MIT License](./LICENSE).
-You are free to use, modify, and distribute the code with proper attribution.
--- a/README_JA.md
+++ b/README_JA.md
@@ -14,10 +14,13 @@
    <a href="https://github.com/Tencent/WeKnora/blob/main/LICENSE">
        <img src="https://img.shields.io/badge/License-MIT-ffffff?labelColor=d4eaf7&color=2e6cc4" alt="License">
    </a>
+    <a href="./CHANGELOG.md">
+        <img alt="バージョン" src="https://img.shields.io/badge/version-0.1.0-2e6cc4?labelColor=d4eaf7">
+    </a>
 </p>

 <p align="center">
-| <a href="./README_EN.md"><b>English</b></a> | <a href="./README.md"><b>简体中文</b></a> | <b>日本語</b> |
+| <a href="./README.md"><b>English</b></a> | <a href="./README_CN.md"><b>简体中文</b></a> | <b>日本語</b> |
 </p>

 <p align="center">
--- a/1
+++ b/1
@@ -0,0 +1 @@
+0.1.0
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -3,11 +3,18 @@ services:
    image: wechatopenai/weknora-app:latest
    container_name: WeKnora-app
    ports:
-      - "8080:8080"
+      - "${APP_PORT:-8080}:8080"
    volumes:
      - data-files:/data/files
      - ./config/config.yaml:/app/config/config.yaml
    environment:
+      - COS_SECRET_ID=${COS_SECRET_ID}
+      - COS_SECRET_KEY=${COS_SECRET_KEY}
+      - COS_REGION=${COS_REGION}
+      - COS_BUCKET_NAME=${COS_BUCKET_NAME}
+      - COS_APP_ID=${COS_APP_ID}
+      - COS_PATH_PREFIX=${COS_PATH_PREFIX}
+      - COS_ENABLE_OLD_DOMAIN=${COS_ENABLE_OLD_DOMAIN}
      - GIN_MODE=${GIN_MODE}
      - DB_DRIVER=postgres
      - DB_HOST=postgres
@@ -61,6 +68,8 @@ services:
        condition: service_healthy
      minio:
        condition: service_started
+      docreader:
+        condition: service_healthy
    networks:
      - WeKnora-network
    restart: unless-stopped
@@ -91,7 +100,7 @@ services:
    image: wechatopenai/weknora-ui:latest
    container_name: WeKnora-frontend
    ports:
-      - "80:80"
+      - "${FRONTEND_PORT:-80}:80"
    depends_on:
      - app
    networks:
@@ -102,7 +111,7 @@ services:
    image: wechatopenai/weknora-docreader:latest
    container_name: WeKnora-docreader
    ports:
-      - "50051:50051"
+      - "${DOCREADER_PORT:-50051}:50051"
    environment:
      - COS_SECRET_ID=${COS_SECRET_ID}
      - COS_SECRET_KEY=${COS_SECRET_KEY}
@@ -122,6 +131,12 @@ services:
      - MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME}
      - MINIO_USE_SSL=${MINIO_USE_SSL}
      - WEB_PROXY=${WEB_PROXY}
+    healthcheck:
+      test: ["CMD", "grpc_health_probe", "-addr=:50051"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 60s
    networks:
      - WeKnora-network
    restart: unless-stopped
--- a/docker/Dockerfile.docreader
+++ b/docker/Dockerfile.docreader
@@ -50,22 +50,33 @@ COPY services/docreader/requirements.txt .
 # 安装依赖
 RUN pip cache purge && pip install --no-cache-dir -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple

-# 预下载 PP-OCRv5 模型
-RUN mkdir -p /root/.paddlex/official_models && \
-    wget https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_server_det_infer.tar \
-        -O /root/.paddlex/official_models/PP-OCRv5_server_det_infer.tar && \
-    wget https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-OCRv5_server_rec_infer.tar \
-        -O /root/.paddlex/official_models/PP-OCRv5_server_rec_infer.tar && \
-    tar -xf /root/.paddlex/official_models/PP-OCRv5_server_det_infer.tar -C /root/.paddlex/official_models/ && \
-    tar -xf /root/.paddlex/official_models/PP-OCRv5_server_rec_infer.tar -C /root/.paddlex/official_models/ && \
-    rm -rf /root/.paddlex/official_models/PP-OCRv5_server_det_infer.tar /root/.paddlex/official_models/PP-OCRv5_server_rec_infer.tar
+# 预下载 PP-OCRv4 模型
+RUN mkdir -p /root/.paddleocr/whl/det/ch && \
+    mkdir -p /root/.paddleocr/whl/rec/ch && \
+    mkdir -p /root/.paddleocr/whl/cls/ch && \
+    # 下载检测模型
+    wget https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_infer.tar \
+        -O /root/.paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer.tar && \
+    tar -xf /root/.paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer.tar -C /root/.paddleocr/whl/det/ch/ && \
+    # 下载识别模型
+    wget https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_rec_infer.tar \
+        -O /root/.paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer.tar && \
+    tar -xf /root/.paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer.tar -C /root/.paddleocr/whl/rec/ch/ && \
+    # 下载文本方向分类模型（用于判断文本是否需要旋转）
+    wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar \
+        -O /root/.paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer.tar && \
+    tar -xf /root/.paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer.tar -C /root/.paddleocr/whl/cls/ && \
+    # 清理压缩包
+    rm -f /root/.paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer.tar && \
+    rm -f /root/.paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer.tar && \
+    rm -f /root/.paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer.tar

 # 复制源代码和生成脚本
 COPY services/docreader/src/ /app/src/
 COPY services/docreader/scripts/ /app/scripts/

 # 确保模型目录存在
-RUN ls -la /root/.paddlex/official_models
+RUN ls -la /root/.paddleocr/whl/

 # 生成 protobuf 代码
 RUN chmod +x /app/scripts/generate_proto.sh && bash /app/scripts/generate_proto.sh
@@ -103,30 +114,18 @@ RUN apt-get update && apt-get install -y \
    libglu1-mesa \
    libsm6 \
    libreoffice \
+    curl \
    && rm -rf /var/lib/apt/lists/*

-# # 下载并安装 LibreOffice（区分架构）
-# RUN mkdir -p /tmp/libreoffice && cd /tmp/libreoffice && \
-#     if [ "$(uname -m)" = "x86_64" ]; then \
-#         wget https://mirrors.tuna.tsinghua.edu.cn/libreoffice/libreoffice/stable/25.2.5/deb/x86_64/LibreOffice_25.2.5_Linux_x86-64_deb.tar.gz && \
-#         tar -xzf LibreOffice_25.2.5_Linux_x86-64_deb.tar.gz && \
-#         cd LibreOffice_25.2.5.2_Linux_x86-64_deb/DEBS && dpkg -i *.deb; \
-#     elif [ "$(uname -m)" = "aarch64" ] || [ "$(uname -m)" = "arm64" ]; then \
-#         wget https://mirrors.aliyun.com/libreoffice/testing/25.8.0/deb/aarch64/LibreOffice_25.8.0.3_Linux_aarch64_deb.tar.gz && \
-#         tar -xzf LibreOffice_25.8.0.3_Linux_aarch64_deb.tar.gz && \
-#         cd LibreOffice_25.8.0.3_Linux_aarch64_deb/DEBS && dpkg -i *.deb; \
-#     else \
-#         echo "Unsupported architecture: $(uname -m)" && exit 1; \
-#     fi && \
-#     cd / && rm -rf /tmp/libreoffice
-
-# 设置 LibreOffice 环境变量
-# RUN echo 'export LIBREOFFICE_PATH=/opt/libreoffice25.2/program/soffice' >> /etc/environment;
+# 安装 grpc_health_probe
+RUN GRPC_HEALTH_PROBE_VERSION=v0.4.24 && \
+    wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-amd64 && \
+    chmod +x /bin/grpc_health_probe
    
 # 从构建阶段复制已安装的依赖和生成的代码
 COPY --from=builder /usr/local/lib/python3.10/site-packages /usr/local/lib/python3.10/site-packages
 COPY --from=builder /usr/local/bin /usr/local/bin
-COPY --from=builder /root/.paddlex/official_models /root/.paddlex/official_models
+COPY --from=builder /root/.paddleocr /root/.paddleocr
 COPY --from=builder /app/src /app/src

 # 安装 Playwright 浏览器
--- a/docs/WeKnora.md
+++ b/docs/WeKnora.md
@@ -0,0 +1,319 @@
+## 介绍
+WeKora 是一个可立即在生产环境投入的企业级RAG框架，实现智能文档理解和检索功能。该系统采用模块化设计，将文档理解、向量存储、推理文件等功能分离。
+
+![arc](./images/arc.png)
+
+---
+
+## PipeLine
+WeKnora 处理文档需要多个步骤：插入-》知识提取-》索引-》检索-》生成，整个流程支持多种检索方法，
+
+![](./images/pipeline2.jpeg)
+
+
+以用户上传的一张住宿流水单pdf文件为例，详细介绍下其数据流：
+
+### 1. 接收请求与初始化 
+ **请求识别**: 系统收到一个请求，并为其分配了唯一的 `request_id=Lkq0OGLYu2fV`，用于追踪整个处理流程。
+ **租户与会话验证**:
+    - 系统首先验证了租户信息（ID: 1, Name: Default Tenant）。
+    - 接着开始处理一个知识库问答（Knowledge QA）请求，该请求属于会话 `1f241340-ae75-40a5-8731-9a3a82e34fdd`。
+ **用户问题**: 用户的原始问题是：“**入住的房型是什么**”。
+ **消息创建**: 系统为用户的提问和即将生成的回答分别创建了消息记录，ID 分别为 `703ddf09-...` 和 `6f057649-...`。
+
+### 2. 知识库问答流程启动
+系统正式调用知识库问答服务，并定义了将要按顺序执行的完整处理管道（Pipeline），包含以下9个事件：  
+`[rewrite_query, preprocess_query, chunk_search, chunk_rerank, chunk_merge, filter_top_k, into_chat_message, chat_completion_stream, stream_filter]`
+
+---
+
+### 3. 事件执行详情
+#### 事件 1: `rewrite_query` - 问题改写
+ **目的**: 为了让检索更精确，系统需要结合上下文来理解用户的真实意图。
+ **操作**:
+    1. 系统检索了当前会话最近的20条历史消息（实际检索到8条）作为上下文。
+    2. 调用了一个名为 `deepseek-r1:7b` 的本地大语言模型。
+    3. 模型根据聊天历史分析出提问者是“Liwx”，并将原问题“入住的房型是什么”改写得更具体。
+ **结果**: 问题被成功改写为：“**Liwx本次入住的房型是什么**”。
+
+#### 事件 2: `preprocess_query` - 问题预处理
+ **目的**: 将改写后的问题进行分词，转换为适合搜索引擎处理的关键词序列。
+ **操作**: 对改写后的问题进行了分词处理。
+ **结果**: 生成了一串关键词：“`需要 改写 用户 问题 入住 房型 根据 提供 信息 入住 人 Liwx 选择 房型 双床 房 因此 改写 后 完整 问题 为 Liwx 本次 入住 房型`”。
+
+#### 事件 3: `chunk_search` - 知识区块检索
+这是最核心的**检索（Retrieval）**步骤，系统执行了两次混合搜索（Hybrid Search）。
+
+ **第一次搜索 (使用改写后的完整问句)**:
+    - **向量检索**:
+        1. 加载嵌入模型 `bge-m3:latest` 将问句转换为一个1024维的向量。
+        2. 在PostgreSQL数据库中进行向量相似度搜索，找到了2个相关的知识区块（chunk），ID 分别为 `e3bf6599-...` 和 `3989c6ce-...`。
+    - **关键词检索**:
+        1. 同时，系统也进行了关键词搜索。
+        2. 同样找到了上述2个知识区块。
+    - **结果合并**: 两种方法找到的4个结果（实际是2个重复的）被去重，最终得到2个唯一的知识区块。
+ **第二次搜索 (使用预处理后的关键词序列)**:
+    - 系统使用分词后的关键词重复了上述的**向量检索**和**关键词检索**过程。
+    - 最终也得到了相同的2个知识区块。
+ **最终结果**: 经过两次搜索和结果合并，系统锁定了2个最相关的知识区块，并将它们的内容提取出来，准备用于生成答案。
+
+#### 事件 4: `chunk_rerank` - 结果重排序 
+ **目的**: 使用一个更强大的模型对初步检索出的结果进行更精细的排序，以提高最终答案的质量。
+ **操作**: 日志显示 `Rerank model ID is empty, skipping reranking`。这意味着系统配置了重排序步骤，但没有指定具体的重排序模型，因此**跳过了此步骤**。
+
+#### 事件 5: `chunk_merge` - 区块合并
+ **目的**: 将内容上相邻或相关的知识区块进行合并，形成更完整的上下文。
+ **操作**: 系统分析了检索到的2个区块，并尝试进行合并。根据日志，最终处理后仍然是2个独立的区块，但已按相关性分数排好序。
+
+#### 事件 6: `filter_top_k` - Top-K 过滤 
+ **目的**: 仅保留最相关的K个结果，防止过多无关信息干扰语言模型。
+ **操作**: 系统配置保留前5个（Top-K = 5）最相关的区块。由于当前只有2个区块，它们全部通过了此过滤器。
+
+#### 事件 7 & 8: `into_chat_message` & `chat_completion_stream` - 生成回答
+这是**生成（Generation）**步骤。
+
+ **目的**: 基于检索到的信息，生成自然流畅的回答。
+ **操作**:
+    1. 系统将检索到的2个知识区块的内容、用户的原始问题以及聊天历史整合在一起，形成一个完整的提示（Prompt）。
+    2. 再次调用 `deepseek-r1:7b` 大语言模型，并以**流式（Stream）**的方式请求生成答案。流式输出可以实现打字机效果，提升用户体验。
+
+#### 事件 9: `stream_filter` - 流式输出过滤
+ **目的**: 对模型生成的实时文本流进行后处理，过滤掉不需要的特殊标记或内容。
+ **操作**:
+    - 系统设置了一个过滤器，用于移除模型在思考过程中可能产生的内部标记，如 `<think>` 和 `</think>`。
+    - 日志显示，模型输出的第一个词块是 `<think> 根据`，过滤器成功拦截并移除了 `<think>` 标记，只将“根据”及之后的内容传递下去。
+
+### 4. 完成与响应 
+ **发送引用**: 在生成答案的同时，系统将作为依据的2个知识区块作为“参考内容”发送给前端，以便用户查证来源。
+ **更新消息**: 当模型生成完所有内容后，系统将完整的回答更新到之前创建的消息记录（ID: `6f057649-...`）中。
+ **请求结束**: 服务器返回 `200` 成功状态码，标志着本次从提问到回答的完整流程结束。
+
+### 总结
+这个日志完整地记录了一次典型的RAG流程：系统通过**问题改写**和**预处理**来精确理解用户意图，接着利用**向量与关键词混合检索**从知识库中找到相关信息，虽然跳过了**重排序**，但依然执行了**合并**与**过滤**，最后将检索到的知识作为上下文，交由大语言模型**生成**流畅、准确的回答，并通过**流式过滤**保证了输出的纯净性。
+
+## 文档解析切分
+代码实现了一个独立的、通过gRPC通信的微服务，专门负责文档内容的深度解析、分块和多模态信息提取。它正是“异步处理”阶段的核心执行者。
+
+### **整体架构**
+这是一个基于Python的gRPC服务，其核心职责是接收文件（或URL），并将其解析成结构化的、可供后续处理（如向量化）的文本块（Chunks）。
+
+ `server.py`: 服务的入口和网络层。它负责启动一个多进程、多线程的gRPC服务器，接收来自Go后端的请求，并将解析结果返回。
+ `parser.py`: 设计模式中的**外观（Facade）模式**。它提供了一个统一的`Parser`类，屏蔽了内部多种具体解析器（如PDF、DOCX、Markdown等）的复杂性。外部调用者（`server.py`）只需与这个`Parser`类交互。
+ `base_parser.py`: 解析器的基类，定义了所有具体解析器共享的核心逻辑和抽象方法。这是整个解析流程的“大脑”，包含了最复杂的文本分块、图片处理、OCR和图像描述生成等功能。
+
+---
+
+### **详细工作流程**
+当Go后端启动异步任务时，它会携带文件内容和配置信息，向这个Python服务发起一次gRPC调用。以下是完整的处理流程：
+
+#### **第一步：请求接收与分发 (**`server.py`** & **`parser.py`**)
+1. **gRPC服务入口 (**`server.py: serve`**)**:
+    - 服务通过`serve()`函数启动。它会根据环境变量（`GRPC_WORKER_PROCESSES`, `GRPC_MAX_WORKERS`）启动一个**多进程、多线程**的服务器，以充分利用CPU资源，提高并发处理能力。
+    - 每个工作进程都监听在指定的端口（如50051），准备接收请求。
+2. **请求处理 (**`server.py: ReadFromFile`**)**:
+    - 当Go后端发起`ReadFromFile`请求时，其中一个工作进程会接收到该请求。
+    - 该方法首先会解析请求中的参数，包括：
+        * `file_name`, `file_type`, `file_content`：文件的基本信息和二进制内容。
+        * `read_config`: 一个包含所有解析配置的复杂对象，如`chunk_size`（分块大小）、`chunk_overlap`（重叠大小）、`enable_multimodal`（是否启用多模态处理）、`storage_config`（对象存储配置）、`vlm_config`（视觉语言模型配置）等。
+    - 它将这些配置整合成一个`ChunkingConfig`数据对象。
+    - 最关键的一步是调用 `self.parser.parse_file(...)`，将解析任务交给`Parser`外观类处理。
+3. **解析器选择 (**`parser.py: Parser.parse_file`**)**:
+    - `Parser`类接收到任务后，首先调用`get_parser(file_type)`方法。
+    - 该方法会根据文件类型（例如 `'pdf'`）在一个字典 `self.parsers` 中查找对应的具体解析器类（例如 `PDFParser`）。
+    - 找到后，它会**实例化**这个`PDFParser`类，并将`ChunkingConfig`等所有配置信息传递给构造函数。
+
+#### **第二步：核心解析与分块 (**`base_parser.py`**)**
+它触及了整个流程的核心：**如何保证信息的上下文完整性和原始顺序**。
+
+根据 `base_parser.py` 代码，**最终切分出的 Chunk 中的文本、表格和图像是按照它们在原始文档中的出现顺序来保存的**。
+
+这个顺序得以保证，主要归功于 `BaseParser` 中几个设计精巧的方法相互协作。我们来详细追踪一下这个流程。
+
+整个顺序的保证可以分为三个阶段：
+
+1. **阶段一：统一的文本流创建 (**`pdf_parser.py`**)**:
+    - 在 `parse_into_text` 方法中，您的代码会**逐页**处理PDF。
+    - 在每一页内部，它会按照一定的逻辑（先提取非表格文本，再附加表格，最后附加图像占位符）将所有内容**拼接成一个长字符串** (`page_content_parts`)。
+    - **关键点**: 虽然在这个阶段，文本、表格和图像占位符的拼接顺序可能不是100%精确到字符级别，但它保证了**同一页的内容会在一起**，并且大致遵循了从上到下的阅读顺序。
+    - 最后，所有页面的内容被 `"\n\n--- Page Break ---\n\n"` 连接起来，形成一个**包含了所有信息（文本、Markdown表格、图像占位符）的、单一的、有序的文本流 (**`final_text`**)**。
+2. **阶段二：原子化与保护 (**`_split_into_units`**)**:
+    - 这个单一的 `final_text` 被传递给 `_split_into_units` 方法。
+    - 这个方法是**保证结构完整性的关键**。它使用正则表达式，将**整个Markdown表格**和**整个Markdown图像占位符**识别为**不可分割的原子单元 (atomic units)**。
+    - 它会将这些原子单元（表格、图片）和它们之间的普通文本块，按照它们在 `final_text` 中出现的**原始顺序**，切分成一个列表 (`units`)。
+    - **结果**: 我们现在有了一个列表，例如 `['一些文本', '![...](...)', '另一些文本', '|...|...|\n|---|---|\n...', '更多文本']`。这个列表中的元素顺序**完全等同于它们在原始文档中的顺序**。
+3. **阶段三：顺序分块 (**`chunk_text`**)**:
+    - `chunk_text` 方法接收到这个**有序的 **`units`** 列表**。
+    - 它的工作机制非常简单直接：它会**按顺序**遍历这个列表中的每一个单元（`unit`）。
+    - 它将这些单元**依次添加**到一个临时的 `current_chunk` 列表中，直到这个块的长度接近 `chunk_size` 的上限。
+    - 当一个块满了之后，它就被保存下来，然后开始一个新的块（可能会带有上一个块的重叠部分）。
+    - **关键点**: 因为 `chunk_text` **严格按照 **`units`** 列表的顺序进行处理**，所以它永远不会打乱表格、文本和图像之间的相对顺序。一个在文档中先出现的表格，也必然会出现在一个序号更靠前的 Chunk 中。
+4. **阶段四：图像信息附加 (**`process_chunks_images`**)**:
+    - 在文本块被切分好之后，`process_chunks_images` 方法会被调用。
+    - 它会处理**每一个**已经生成好的 Chunk。
+    - 在每个 Chunk 内部，它会找到图像占位符，然后进行AI处理。
+    - 最后，它会将处理好的图像信息（包含永久URL、OCR文本、图像描述等）附加到**该 Chunk 自己**的 `.images` 属性中。
+    - **关键点**: 这个过程**不会改变 Chunk 的顺序或其 **`.content`** 的内容**。它只是为已经存在的、顺序正确的 Chunk 附加额外的信息。
+
+#### **第三步：多模态处理（如果启用） (**`base_parser.py`**)**
+如果 `enable_multimodal` 为 `True`，在文本分块完成后，会进入最复杂的多模态处理阶段。
+
+1. **并发任务启动 (**`BaseParser.process_chunks_images`**)**:
+    - 该方法使用`asyncio`（Python的异步I/O框架）来**并发处理所有文本块中的图片**，以极大地提升效率。
+    - 它为每个`Chunk`创建一个异步任务`process_chunk_images_async`。
+2. **处理单个块中的图片 (**`BaseParser.process_chunk_images_async`**)**:
+    - **提取图片引用**: 首先，使用正则表达式 `extract_images_from_chunk` 从当前块的文本中找到所有的图片引用（例如，`![alt text](image.png)`）。
+    - **图片持久化**: 对于找到的每个图片，并发地调用 `download_and_upload_image`。这个函数负责：
+        * 从其原始位置（可能是PDF内部、本地路径或远程URL）获取图片数据。
+        * 将图片**上传到配置好的对象存储（COS/MinIO）**。这一步至关重要，它将临时的、不稳定的图片引用转换成一个持久化、可通过URL公开访问的地址。
+        * 返回持久化的URL和图片对象（PIL Image）。
+    - **并发AI处理**: 将所有成功上传的图片收集起来，调用`process_multiple_images`。
+        * 该方法内部使用`asyncio.Semaphore`来限制并发数量（例如最多同时处理5张图片），防止瞬间消耗过多内存或触发模型API的速率限制。
+        * 对于每张图片，它会调用`process_image_async`。
+3. **处理单张图片 (**`BaseParser.process_image_async`**)**:
+    - **OCR**: 调用`perform_ocr`，它会使用一个OCR引擎（如`PaddleOCR`）来识别图片中的所有文字。
+    - **图像描述 (Caption)**: 调用`get_image_caption`，它会将图片数据（转为Base64）发送给配置的视觉语言模型（VLM），生成对图片内容的自然语言描述。
+    - 该方法返回 `(ocr_text, caption, 持久化URL)`。
+4. **结果聚合**:
+    - 所有图片处理完成后，包含持久化URL、OCR文本和图像描述的结构化信息，会被附加到对应`Chunk`对象的 `.images` 字段上。
+
+#### **第四步：返回结果 (**`server.py`**)**
+1. **数据转换 (**`server.py: _convert_chunk_to_proto`**)**:
+    - 当`parser.parse_file`执行完毕后，它返回一个包含所有处理过的`Chunk`对象的列表（`ParseResult`）。
+    - `ReadFromFile`方法接收到这个结果，并调用`_convert_chunk_to_proto`，将Python的`Chunk`对象（包括其内部的图片信息）转换成gRPC定义的Protobuf消息格式。
+2. **响应返回**:
+    - 最后，gRPC服务器将这个包含所有分块和多模态信息的`ReadResponse`消息发送回给调用方——Go后端服务。
+
+至此，Go后端就拿到了结构化、信息丰富的文档数据，可以进行下一步的向量化和索引存储了。
+
+
+## 部署
+支持Docker 镜像本地部署，并通过API端口提供接口服务
+
+## 性能和监控
+Weknora包含丰富的监控和测试组件：
+
+ 分布式跟踪：集成Jaeger用于跟踪请求在服务架构中的完整执行路。本质上，Jaeger是一种帮助用户“看见”请求在分布式系统中完整生命周期的技术。
+ 健康监控：监控服务处在健康状态
+ 可扩展性：通过容器化部署，可通过多个服务满足大规模并发请求
+
+## QA
+### 问题1: 在检索过程的执行了两次混合搜索的目的是什么？以及第一次和第二次搜索有什么不同？
+这是一个非常好的观察。系统执行两次混合搜索是为了**最大化检索的准确性和召回率**，本质上是一种**查询扩展（Query Expansion）和多策略检索**的组合方法。
+
+#### 目的
+通过两种不同形式的查询（原始改写句 vs. 分词后的关键词序列）去搜索，系统可以结合两种查询方式的优点：
+
+ **语义检索的深度**: 使用完整的句子进行搜索，能更好地利用向量模型（如`bge-m3`）对句子整体含义的理解能力，找到语义上最接近的知识区块。
+ **关键词检索的广度**: 使用分词后的关键词进行搜索，能确保即使知识区块的表述方式与原问题不同，但只要包含了核心关键词，就有机会被命中。这对于传统的关键词匹配算法（如BM25）尤其有效。
+
+简单来说，就是**用两种不同的“问法”去问同一个问题**，然后将两边的结果汇总起来，确保最相关的知识不会被遗漏。
+
+#### 两次搜索的不同点
+它们最核心的不同在于**输入的查询文本（Query Text）**：
+
+1. **第一次混合搜索**
+    - **输入**: 使用的是经过`rewrite_query`事件后生成的、**语法完整的自然语言问句**。
+    - **日志证据**:
+
+```plain
+INFO [2025-08-29 09:46:36.896] [request_id=Lkq0OGLYu2fV] knowledgebase.go:266[HybridSearch] | Hybrid search parameters, knowledge base ID: kb-00000001, query text: 需要改写的用户问题是：“入住的房型是什么”。根据提供的信息，入住人Liwx选择的房型是双床房。因此，改写后的完整问题为： “Liwx本次入住的房型是什么”
+```
+
+2. **第二次混合搜索**
+    - **输入**: 使用的是经过`preprocess_query`事件处理后生成的、**由空格隔开的关键词序列**。
+    - **日志证据**:
+
+```plain
+INFO [2025-08-29 09:46:37.257] [request_id=Lkq0OGLYu2fV] knowledgebase.go:266[HybridSearch] | Hybrid search parameters, knowledge base ID: kb-00000001, query text: 需要 改写 用户 问题 入住 房型 根据 提供 信息 入住 人 Liwx 选择 房型 双床 房 因此 改写 后 完整 问题 为 Liwx 本次 入住 房型
+```
+
+最终，系统将这两次搜索的结果进行去重和合并（日志中显示每次都找到2个结果，去重后总共还是2个），从而得到一个更可靠的知识集合，用于后续的答案生成。
+
+
+
+### 问题2：重排序模型分析
+Reranker（重排器）是目前RAG领域中非常先进的技术，它们在工作原理和适用场景上有着显著的区别。
+
+简单来说，它们代表了从“**专门的判别模型**”到“**利用大语言模型（LLM）进行判别**”再到“**深度挖掘LLM内部信息进行判别**”的演进。
+
+以下是它们的详细区别：
+
+
+
+#### 1. Normal Reranker (常规重排器 / 交叉编码器)
+这是最经典也是最主流的重排方法。
+
+ **模型类型**: **序列分类模型 (Sequence Classification Model)**。本质上是一个**交叉编码器 (Cross-Encoder)**，通常基于BERT、RoBERTa等双向编码器架构。`BAAI/bge-reranker-base/large/v2-m3` 都属于这一类。
+ **工作原理**:
+    1. 它将**查询（Query）**和**待排序的文档（Passage）**拼接成一个单一的输入序列，例如：`[CLS] what is panda? [SEP] The giant panda is a bear species endemic to China. [SEP]`。
+    2. 这个拼接后的序列被完整地送入模型中。模型内部的自注意力机制（Self-Attention）可以同时分析查询和文档中的每一个词，并计算它们之间**细粒度的交互关系**。
+    3. 模型最终输出一个**单一的分数（Logit）**，这个分数直接代表了查询和文档的相关性。分数越高，相关性越强。
+ **关键特性**:
+    - **优点**: 由于查询和文档在模型内部进行了充分的、深度的交互，其**准确度通常非常高**，是衡量Reranker性能的黄金标准。
+    - **缺点**: **速度较慢**。因为它必须为**每一个“查询-文档”对**都独立执行一次完整的、代价高昂的计算。如果初步检索返回了100个文档，它就需要运行100次。
+
+
+
+#### 2. LLM-based Reranker (基于LLM的重排器)
+这种方法创造性地利用了通用大语言模型（LLM）的能力来进行重排。
+
+ **模型类型**: **因果语言模型 (Causal Language Model)**，即我们常说的GPT、Llama、Gemma这类用于生成文本的LLM。`BAAI/bge-reranker-v2-gemma` 就是一个典型的例子。
+ **工作原理**:
+    1. 它**不是直接输出一个分数**，而是将重排任务**转化为一个问答或文本生成任务**。
+    2. 它通过一个精心设计的**提示（Prompt）**来组织输入，例如：`"Given a query A and a passage B, determine whether the passage contains an answer to the query by providing a prediction of either 'Yes' or 'No'. A: {query} B: {passage}"`。
+    3. 它将这个完整的Prompt喂给LLM，然后**观察LLM在最后生成“Yes”这个词的概率**。
+    4. 这个**生成“Yes”的概率（或其Logit值）就被当作是相关性分数**。如果模型非常确信答案是“Yes”，说明它认为文档B包含了查询A的答案，即相关性高。
+ **关键特性**:
+    - **优点**: 能够利用LLM强大的**语义理解、推理和世界知识**，对于需要深度理解和推理才能判断相关性的复杂查询，效果可能更好。
+    - **缺点**: 计算开销可能非常大（取决于LLM的大小），并且性能**高度依赖于Prompt的设计**。
+
+
+
+#### 3. LLM-based Layerwise Reranker (基于LLM分层信息的重排器)
+这是第二种方法的“威力加强版”，是一种更前沿、更复杂的探究性技术。
+
+ **模型类型**: 同样是**因果语言模型 (Causal Language Model)**，例如`BAAI/bge-reranker-v2-minicpm-layerwise`。
+ **工作原理**:
+    1. 输入部分与第二种方法完全相同，也是使用“Yes/No”的Prompt。
+    2. 核心区别在于**分数的提取方式**。它不再仅仅依赖LLM**最后一层**的输出（即最终的预测结果）。
+    3. 它认为LLM在逐层处理信息的过程中，不同深度的网络层（Layer）可能捕获了不同层次的语义相关性信息。因此，它会从**模型的多个中间层**提取出关于“Yes”这个词的预测Logit。
+    4. 代码中的 `cutoff_layers=[28]` 参数就是告诉模型：“请把第28层的输出给我”。最终，你会得到一个或多个来自不同网络层的分数，这些分数可以被平均或以其他方式组合，形成一个更鲁棒的最终相关性判断。
+ **关键特性**:
+    - **优点**: 理论上可以获得**更丰富、更全面的相关性信号**，可能达到比只看最后一层更高的精度，是目前探索性能极限的一种方法。
+    - **缺点**: **复杂度最高**，需要对模型进行特定的修改才能提取中间层信息（代码中的`trust_remote_code=True`就是一个信号），计算开销也很大。
+
+#### 总结对比
+| 特性 | 1. Normal Reranker (常规) | 2. LLM-based Reranker (基于LLM) | 3. LLM-based Layerwise Reranker (基于LLM分层) |
+| :--- | :--- | :--- | :--- |
+| **底层模型** | 交叉编码器 (如BERT) | 因果语言模型 (如Gemma) | 因果语言模型 (如MiniCPM) |
+| **工作原理** | 计算Query和Passage的深度交互，直接输出相关分 | 将排序任务转为"Yes/No"预测，用"Yes"的概率作为分数 | 与2类似，但从LLM的多个中间层提取"Yes"的概率 |
+| **输出** | 单一的相关性分数 | 单一的相关性分数（来自最后一层） | 多个相关性分数（来自不同层） |
+| **优点** | **速度与精度的最佳平衡点**，成熟稳定 | 利用LLM的推理能力，处理复杂问题 | 理论上精度最高，信号更丰富 |
+| **缺点** | 相比向量检索慢 | 计算开销大，依赖Prompt设计 | **复杂度最高**，计算开销最大 |
+| **推荐场景** | **大多数生产环境的首选**，效果好，易于部署 | 对答案质量有极致要求，且计算资源充足的场景 | 学术研究或追求SOTA（State-of-the-art）性能的场景 |
+
+
+#### 使用建议
+1. **开始阶段**: 强烈建议您**从 **`Normal Reranker`** 开始**，例如 `BAAI/bge-reranker-v2-m3`。它是目前综合表现最好的模型之一，能显著提升您的RAG系统性能，并且相对容易集成和部署。
+2. **进阶探索**: 如果您发现常规Reranker在处理某些非常微妙或需要复杂推理的查询时表现不佳，并且您拥有充足的GPU资源，可以尝试 `LLM-based Reranker`。
+3. **前沿研究**: `Layerwise Reranker` 更适合研究人员或希望在特定任务上压榨出最后一点性能的专家。
+
+
+### 问题3：粗过滤或细过滤后的知识（带重排）如何组装发送给大模型的？
+这一块主要是设计提示词，典型的指令细节，其核心任务是根据上下文回答用户问题。组装上下文时需要指定  
+关键约束：必须严格按照所提供文档回答，禁止使用你自己的知识回答
+未知情况处理： 如果文档中没有足够的信息来回答问题，请告知“根据所掌握的资料，无法回答这个问题”
+引用要求：在回答时，如果引用了某个文档内容，请在句子末尾加上文档编号
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/docs/WeKnora.pdf
+++ b/docs/WeKnora.pdf
--- a/docs/images/arc.png
+++ b/docs/images/arc.png
--- a/docs/images/pipeline2.jpeg
+++ b/docs/images/pipeline2.jpeg
--- a/internal/application/repository/knowledge.go
+++ b/internal/application/repository/knowledge.go
@@ -197,3 +197,8 @@ func (r *knowledgeRepository) AminusB(
 	}
 	return knowledgeIDs, err
 }
+
+func (r *knowledgeRepository) UpdateKnowledgeColumn(ctx context.Context, id string, column string, value interface{}) error {
+	err := r.db.WithContext(ctx).Model(&types.Knowledge{}).Where("id = ?", id).Update(column, value).Error
+	return err
+}
--- a/internal/application/service/chat_pipline/rewrite.go
+++ b/internal/application/service/chat_pipline/rewrite.go
@@ -166,8 +166,10 @@ func (p *PluginRewrite) OnEvent(ctx context.Context,
 		return next()
 	}

-	// Update rewritten query
-	chatManage.RewriteQuery = response.Content
+	if response.Content != "" {
+		// Update rewritten query
+		chatManage.RewriteQuery = response.Content
+	}
 	logger.GetLogger(ctx).Infof("Rewritten query, session_id: %s, rewrite_query: %s",
 		chatManage.SessionID, chatManage.RewriteQuery)
 	return next()
--- a/internal/application/service/knowledge.go
+++ b/internal/application/service/knowledge.go
@@ -166,9 +166,7 @@ func (s *knowledgeService) CreateKnowledgeFromFile(ctx context.Context,
 	if exists {
 		logger.Infof(ctx, "File already exists: %s", file.Filename)
 		// Update creation time for existing knowledge
-		existingKnowledge.CreatedAt = time.Now()
-		existingKnowledge.UpdatedAt = time.Now()
-		if err := s.repo.UpdateKnowledge(ctx, existingKnowledge); err != nil {
+		if err := s.repo.UpdateKnowledgeColumn(ctx, existingKnowledge.ID, "created_at", time.Now()); err != nil {
 			logger.Errorf(ctx, "Failed to update existing knowledge: %v", err)
 			return nil, err
 		}
@@ -627,6 +625,8 @@ func (s *knowledgeService) processDocument(ctx context.Context,
 		attribute.String("embedding_model", knowledge.EmbeddingModelID),
 		attribute.Bool("enable_multimodal", enableMultimodel),
 	)
+	logger.GetLogger(ctx).Infof("processDocument trace id: %s", span.SpanContext().TraceID().String())
+
 	if !enableMultimodel && IsImageType(knowledge.FileType) {
 		logger.GetLogger(ctx).WithField("knowledge_id", knowledge.ID).
 			WithField("error", ErrImageNotParse).Errorf("processDocument image without enable multimodel")
@@ -1137,6 +1137,7 @@ func (s *knowledgeService) processChunks(ctx context.Context,
 		span.RecordError(err)
 		return
 	}
+	logger.GetLogger(ctx).Infof("processChunks batch index successfully, with %d index", len(indexInfoList))

 	// Update knowledge status to completed
 	knowledge.ParseStatus = "completed"
@@ -1154,6 +1155,7 @@ func (s *knowledgeService) processChunks(ctx context.Context,
 	if err := s.tenantRepo.AdjustStorageUsed(ctx, tenantInfo.ID, totalStorageSize); err != nil {
 		logger.GetLogger(ctx).WithField("error", err).Errorf("processChunks update tenant storage used failed")
 	}
+	logger.GetLogger(ctx).Infof("processChunks successfully")
 }

 // GetSummary generates a summary for knowledge content using an AI model
--- a/internal/handler/initialization.go
+++ b/internal/handler/initialization.go
@@ -18,6 +18,7 @@ import (
 	"github.com/Tencent/WeKnora/internal/errors"
 	"github.com/Tencent/WeKnora/internal/logger"
 	"github.com/Tencent/WeKnora/internal/models/embedding"
+	"github.com/Tencent/WeKnora/internal/models/rerank"
 	"github.com/Tencent/WeKnora/internal/models/utils/ollama"
 	"github.com/Tencent/WeKnora/internal/types"
 	"github.com/Tencent/WeKnora/internal/types/interfaces"
@@ -1410,93 +1411,38 @@ func (h *InitializationHandler) checkRemoteModelConnection(ctx context.Context,
 // checkRerankModelConnection 检查Rerank模型连接和功能的内部方法
 func (h *InitializationHandler) checkRerankModelConnection(ctx context.Context,
 	modelName, baseURL, apiKey string) (bool, string) {
-	client := &http.Client{
-		Timeout: 15 * time.Second,
+
+	// 创建Reranker配置
+	config := &rerank.RerankerConfig{
+		APIKey:    apiKey,
+		BaseURL:   baseURL,
+		ModelName: modelName,
+		Source:    types.ModelSourceRemote, // 默认值，实际会根据URL判断
 	}

-	// 构造重排API端点
-	rerankEndpoint := baseURL + "/rerank"
-
-	// Mock测试数据
-	testQuery := "什么是人工智能？"
-	testPassages := []string{
-		"机器学习是人工智能的一个子领域，专注于算法和统计模型，使计算机系统能够通过经验自动改进。",
-		"深度学习是机器学习的一个子集，使用人工神经网络来模拟人脑的工作方式。",
-	}
-
-	// 构造重排请求
-	rerankRequest := map[string]interface{}{
-		"model":                  modelName,
-		"query":                  testQuery,
-		"documents":              testPassages,
-		"truncate_prompt_tokens": 512,
-	}
-
-	jsonData, err := json.Marshal(rerankRequest)
+	// 创建Reranker实例
+	reranker, err := rerank.NewReranker(config)
 	if err != nil {
-		return false, fmt.Sprintf("构造请求失败: %v", err)
+		return false, fmt.Sprintf("创建Reranker失败: %v", err)
 	}

-	logger.Infof(ctx, "Rerank request: %s, modelName=%s, baseURL=%s, apiKey=%s",
-		string(jsonData), modelName, baseURL, apiKey)
+	// 简化的测试数据
+	testQuery := "ping"
+	testDocuments := []string{
+		"pong",
+	}

-	req, err := http.NewRequestWithContext(
-		ctx, "POST", rerankEndpoint, strings.NewReader(string(jsonData)),
-	)
+	// 使用Reranker进行测试
+	results, err := reranker.Rerank(ctx, testQuery, testDocuments)
 	if err != nil {
-		return false, fmt.Sprintf("创建请求失败: %v", err)
+		return false, fmt.Sprintf("重排测试失败: %v", err)
 	}

-	// 添加认证头
-	if apiKey != "" {
-		req.Header.Set("Authorization", "Bearer "+apiKey)
-	}
-	req.Header.Set("Content-Type", "application/json")
-
-	resp, err := client.Do(req)
-	if err != nil {
-		return false, fmt.Sprintf("连接失败: %v", err)
-	}
-	defer resp.Body.Close()
-
-	// 读取响应
-	body, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return false, fmt.Sprintf("读取响应失败: %v", err)
-	}
-
-	// 检查响应状态
-	if resp.StatusCode >= 200 && resp.StatusCode < 300 {
-		// 尝试解析重排响应
-		var rerankResp struct {
-			Results []struct {
-				Index          int     `json:"index"`
-				Document       string  `json:"document"`
-				RelevanceScore float64 `json:"relevance_score"`
-			} `json:"results"`
-		}
-
-		if err := json.Unmarshal(body, &rerankResp); err != nil {
-			// 如果无法解析标准重排响应，检查是否有其他格式
-			return true, "连接正常，但响应格式非标准"
-		}
-
-		// 检查是否返回了重排结果
-		if len(rerankResp.Results) > 0 {
-			return true, fmt.Sprintf("重排功能正常，返回%d个结果", len(rerankResp.Results))
-		} else {
-			return false, "重排接口连接成功，但未返回重排结果"
-		}
-	} else if resp.StatusCode == 401 {
-		return false, "认证失败，请检查API Key"
-	} else if resp.StatusCode == 403 {
-		return false, "权限不足，请检查API Key权限"
-	} else if resp.StatusCode == 404 {
-		return false, "重排API端点不存在，请检查Base URL"
-	} else if resp.StatusCode == 422 {
-		return false, fmt.Sprintf("请求参数错误: %s", string(body))
+	// 检查结果
+	if len(results) > 0 {
+		return true, fmt.Sprintf("重排功能正常，返回%d个结果", len(results))
 	} else {
-		return false, fmt.Sprintf("API返回错误状态: %d, 响应: %s", resp.StatusCode, string(body))
+		return false, "重排接口连接成功，但未返回重排结果"
 	}
 }

--- a/internal/models/rerank/aliyun_reranker.go
+++ b/internal/models/rerank/aliyun_reranker.go
@@ -0,0 +1,164 @@
+package rerank
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+
+	"github.com/Tencent/WeKnora/internal/logger"
+)
+
+// AliyunReranker implements a reranking system based on Aliyun DashScope models
+type AliyunReranker struct {
+	modelName string       // Name of the model used for reranking
+	modelID   string       // Unique identifier of the model
+	apiKey    string       // API key for authentication
+	baseURL   string       // Base URL for API requests
+	client    *http.Client // HTTP client for making API requests
+}
+
+// AliyunRerankRequest represents a request to rerank documents using Aliyun DashScope API
+type AliyunRerankRequest struct {
+	Model      string                 `json:"model"`      // Model to use for reranking
+	Input      AliyunRerankInput      `json:"input"`      // Input containing query and documents
+	Parameters AliyunRerankParameters `json:"parameters"` // Parameters for the reranking
+}
+
+// AliyunRerankInput contains the query and documents for reranking
+type AliyunRerankInput struct {
+	Query     string   `json:"query"`     // Query text to compare documents against
+	Documents []string `json:"documents"` // List of document texts to rerank
+}
+
+// AliyunRerankParameters contains parameters for the reranking request
+type AliyunRerankParameters struct {
+	ReturnDocuments bool `json:"return_documents"` // Whether to return documents in response
+	TopN            int  `json:"top_n"`            // Number of top results to return
+}
+
+// AliyunRerankResponse represents the response from Aliyun DashScope reranking request
+type AliyunRerankResponse struct {
+	Output AliyunOutput `json:"output"` // Output containing results
+	Usage  AliyunUsage  `json:"usage"`  // Token usage information
+}
+
+// AliyunOutput contains the reranking results
+type AliyunOutput struct {
+	Results []AliyunRankResult `json:"results"` // Ranked results with relevance scores
+}
+
+// AliyunRankResult represents a single reranking result from Aliyun
+type AliyunRankResult struct {
+	Document       AliyunDocument `json:"document"`        // Document information
+	Index          int            `json:"index"`           // Original index of the document
+	RelevanceScore float64        `json:"relevance_score"` // Relevance score
+}
+
+// AliyunDocument represents document information in Aliyun response
+type AliyunDocument struct {
+	Text string `json:"text"` // Document text
+}
+
+// AliyunUsage contains information about token usage in the Aliyun API request
+type AliyunUsage struct {
+	TotalTokens int `json:"total_tokens"` // Total tokens consumed
+}
+
+// NewAliyunReranker creates a new instance of Aliyun reranker with the provided configuration
+func NewAliyunReranker(config *RerankerConfig) (*AliyunReranker, error) {
+	apiKey := config.APIKey
+	baseURL := "https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank"
+	if url := config.BaseURL; url != "" {
+		baseURL = url
+	}
+
+	return &AliyunReranker{
+		modelName: config.ModelName,
+		modelID:   config.ModelID,
+		apiKey:    apiKey,
+		baseURL:   baseURL,
+		client:    &http.Client{},
+	}, nil
+}
+
+// Rerank performs document reranking based on relevance to the query using Aliyun DashScope API
+func (r *AliyunReranker) Rerank(ctx context.Context, query string, documents []string) ([]RankResult, error) {
+	// Build the request body
+	requestBody := &AliyunRerankRequest{
+		Model: r.modelName,
+		Input: AliyunRerankInput{
+			Query:     query,
+			Documents: documents,
+		},
+		Parameters: AliyunRerankParameters{
+			ReturnDocuments: true,
+			TopN:            len(documents), // Return all documents
+		},
+	}
+
+	jsonData, err := json.Marshal(requestBody)
+	if err != nil {
+		return nil, fmt.Errorf("marshal request body: %w", err)
+	}
+
+	// Send the request
+	req, err := http.NewRequestWithContext(ctx, "POST", r.baseURL, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return nil, fmt.Errorf("create request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", r.apiKey))
+
+	// Log the curl equivalent for debugging
+	logger.Debugf(ctx, "curl -X POST %s -H \"Content-Type: application/json\" -H \"Authorization: Bearer %s\" -d '%s'",
+		r.baseURL, r.apiKey, string(jsonData),
+	)
+
+	resp, err := r.client.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("do request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	// Read the response
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("read response body: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("aliyun rerank API error: Http Status: %s, Body: %s", resp.Status, string(body))
+	}
+
+	var response AliyunRerankResponse
+	if err := json.Unmarshal(body, &response); err != nil {
+		return nil, fmt.Errorf("unmarshal response: %w", err)
+	}
+
+	// Convert Aliyun results to standard RankResult format
+	results := make([]RankResult, len(response.Output.Results))
+	for i, aliyunResult := range response.Output.Results {
+		results[i] = RankResult{
+			Index: aliyunResult.Index,
+			Document: DocumentInfo{
+				Text: aliyunResult.Document.Text,
+			},
+			RelevanceScore: aliyunResult.RelevanceScore,
+		}
+	}
+
+	return results, nil
+}
+
+// GetModelName returns the name of the reranking model
+func (r *AliyunReranker) GetModelName() string {
+	return r.modelName
+}
+
+// GetModelID returns the unique identifier of the reranking model
+func (r *AliyunReranker) GetModelID() string {
+	return r.modelID
+}
--- a/internal/models/rerank/reranker.go
+++ b/internal/models/rerank/reranker.go
@@ -88,10 +88,10 @@ type RerankerConfig struct {

 // NewReranker creates a reranker
 func NewReranker(config *RerankerConfig) (Reranker, error) {
-	switch strings.ToLower(string(config.Source)) {
-	case string(types.ModelSourceRemote):
+	// 根据URL判断模型来源，而不是依赖Source字段
+	if strings.Contains(config.BaseURL, "https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank") {
+		return NewAliyunReranker(config)
+	} else {
 		return NewOpenAIReranker(config)
-	default:
-		return nil, fmt.Errorf("unsupported rerank model source: %s", config.Source)
 	}
 }
--- a/internal/types/interfaces/knowledge.go
+++ b/internal/types/interfaces/knowledge.go
@@ -70,4 +70,5 @@ type KnowledgeRepository interface {
 	) (bool, *types.Knowledge, error)
 	// AminusB returns the difference set of A and B.
 	AminusB(ctx context.Context, Atenant uint, A string, Btenant uint, B string) ([]string, error)
+	UpdateKnowledgeColumn(ctx context.Context, id string, column string, value interface{}) error
 }
--- a/internal/types/model.go
+++ b/internal/types/model.go
@@ -33,6 +33,7 @@ type ModelSource string
 const (
 	ModelSourceLocal  ModelSource = "local"  // Local model
 	ModelSourceRemote ModelSource = "remote" // Remote model
+	ModelSourceAliyun ModelSource = "aliyun" // Aliyun DashScope model
 )

 type EmbeddingParameters struct {
--- a/scripts/start_all.sh
+++ b/scripts/start_all.sh
@@ -703,8 +703,8 @@ else
    if [ "$START_OLLAMA" = true ] && [ "$START_DOCKER" = true ]; then
        if [ $OLLAMA_RESULT -eq 0 ] && [ $DOCKER_RESULT -eq 0 ]; then
            log_success "所有服务启动完成，可通过以下地址访问:"
-            echo -e "${GREEN}  - 前端界面: http://localhost${NC}"
-            echo -e "${GREEN}  - API接口: http://localhost:8080${NC}"
+            echo -e "${GREEN}  - 前端界面: http://localhost:${FRONTEND_PORT:-80}${NC}"
+            echo -e "${GREEN}  - API接口: http://localhost:${APP_PORT:-8080}${NC}"
            echo -e "${GREEN}  - Jaeger链路追踪: http://localhost:16686${NC}"
        else
            log_error "部分服务启动失败，请检查日志并修复问题"
@@ -714,8 +714,8 @@ else
        echo -e "${GREEN}  - Ollama API: http://localhost:$OLLAMA_PORT${NC}"
    elif [ "$START_DOCKER" = true ] && [ $DOCKER_RESULT -eq 0 ]; then
        log_success "Docker容器启动完成，可通过以下地址访问:"
-        echo -e "${GREEN}  - 前端界面: http://localhost${NC}"
-        echo -e "${GREEN}  - API接口: http://localhost:8080${NC}"
+        echo -e "${GREEN}  - 前端界面: http://localhost:${FRONTEND_PORT:-80}${NC}"
+        echo -e "${GREEN}  - API接口: http://localhost:${APP_PORT:-8080}${NC}"
        echo -e "${GREEN}  - Jaeger链路追踪: http://localhost:16686${NC}"
    fi
 fi
--- a/services/docreader/requirements.txt
+++ b/services/docreader/requirements.txt
@@ -1,5 +1,6 @@
 grpcio
 grpcio-tools
+grpcio-health-checking
 protobuf
 python-docx
 PyPDF2
@@ -13,7 +14,7 @@ urllib3
 markdownify
 mistletoe
 goose3[all]
-paddleocr==3.0.0
+paddleocr>=2.10.0,<3.0.0
 markdown
 pypdf
 cos-python-sdk-v5
@@ -25,7 +26,7 @@ ollama
 pdfplumber

 --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cpu/
-paddlepaddle==3.0.0
+paddlepaddle>=3.0.0,<4.0.0

 # --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/
 # paddlepaddle-gpu==3.0.0 
--- a/services/docreader/src/download_deps.py
+++ b/services/docreader/src/download_deps.py
@@ -24,4 +24,47 @@ logger = logging.getLogger(__name__)


 def init_ocr_model():
-    PaddleOCR()
+    """Initialize PaddleOCR model to pre-download and cache models"""
+    try:
+        logger.info("Initializing PaddleOCR model for pre-download...")
+        
+        # 使用与代码中相同的配置
+        ocr_config = {
+            "use_gpu": False,
+            "text_det_limit_type": "max",
+            "text_det_limit_side_len": 960,
+            "use_doc_orientation_classify": True,  # 启用文档方向分类
+            "use_doc_unwarping": False,
+            "use_textline_orientation": True,  # 启用文本行方向检测
+            "text_recognition_model_name": "PP-OCRv4_server_rec",
+            "text_detection_model_name": "PP-OCRv4_server_det",
+            "text_det_thresh": 0.3,
+            "text_det_box_thresh": 0.6,
+            "text_det_unclip_ratio": 1.5,
+            "text_rec_score_thresh": 0.0,
+            "ocr_version": "PP-OCRv4",
+            "lang": "ch",
+            "show_log": False,
+            "use_dilation": True,
+            "det_db_score_mode": "slow",
+        }
+        
+        # 初始化PaddleOCR，这会触发模型下载和缓存
+        ocr = PaddleOCR(**ocr_config)
+        logger.info("PaddleOCR model initialization completed successfully")
+        
+        # 测试OCR功能以确保模型正常工作
+        import numpy as np
+        from PIL import Image
+        
+        # 创建一个简单的测试图像
+        test_image = np.ones((100, 300, 3), dtype=np.uint8) * 255
+        test_pil = Image.fromarray(test_image)
+        
+        # 执行一次OCR测试
+        result = ocr.ocr(np.array(test_pil), cls=False)
+        logger.info("PaddleOCR test completed successfully")
+        
+    except Exception as e:
+        logger.error(f"Failed to initialize PaddleOCR model: {str(e)}")
+        raise
--- a/services/docreader/src/parser/base_parser.py
+++ b/services/docreader/src/parser/base_parser.py
@@ -33,7 +33,9 @@ except ImportError:
    except ImportError:
        # If both imports fail, set to None
        Caption = None
-        logging.warning("Failed to import Caption, image captioning will be unavailable")
+        logging.warning(
+            "Failed to import Caption, image captioning will be unavailable"
+        )

 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -78,7 +80,9 @@ class BaseParser(ABC):
        """
        if cls._ocr_engine is None and not cls._ocr_engine_failed:
            try:
-                cls._ocr_engine = OCREngine.get_instance(backend_type=backend_type, **kwargs)
+                cls._ocr_engine = OCREngine.get_instance(
+                    backend_type=backend_type, **kwargs
+                )
                if cls._ocr_engine is None:
                    cls._ocr_engine_failed = True
                    logger.error(f"Failed to initialize OCR engine ({backend_type})")
@@ -89,7 +93,6 @@ class BaseParser(ABC):
                logger.error(f"Failed to initialize OCR engine: {str(e)}")
                return None
        return cls._ocr_engine
-    

    def __init__(
        self,
@@ -135,7 +138,7 @@ class BaseParser(ABC):
        self.max_concurrent_tasks = max_concurrent_tasks
        self.max_chunks = max_chunks
        self.chunking_config = chunking_config
-        
+
        logger.info(
            f"Initializing {self.__class__.__name__} for file: {file_name}, type: {self.file_type}"
        )
@@ -174,10 +177,14 @@ class BaseParser(ABC):
            resized_image = self._resize_image_if_needed(image)

            # Get OCR engine
-            ocr_engine = self.get_ocr_engine(backend_type=self.ocr_backend, **self.ocr_config)
+            ocr_engine = self.get_ocr_engine(
+                backend_type=self.ocr_backend, **self.ocr_config
+            )
            if ocr_engine is None:
-                logger.error(f"OCR engine ({self.ocr_backend}) initialization failed or unavailable, "
-                             "skipping OCR recognition")
+                logger.error(
+                    f"OCR engine ({self.ocr_backend}) initialization failed or unavailable, "
+                    "skipping OCR recognition"
+                )
                return ""

            # Execute OCR prediction
@@ -199,11 +206,13 @@ class BaseParser(ABC):
            return ocr_result
        except Exception as e:
            process_time = time.time() - start_time
-            logger.error(f"OCR recognition error: {str(e)}, time: {process_time:.2f} seconds")
+            logger.error(
+                f"OCR recognition error: {str(e)}, time: {process_time:.2f} seconds"
+            )
            return ""
        finally:
            # Release image resources
-            if resized_image is not image and hasattr(resized_image, 'close'):
+            if resized_image is not image and hasattr(resized_image, "close"):
                # Only close the new image we created, not the original image
                resized_image.close()

@@ -218,25 +227,33 @@ class BaseParser(ABC):
        """
        try:
            # If it's a PIL Image
-            if hasattr(image, 'size'):
+            if hasattr(image, "size"):
                width, height = image.size
                if width > self.max_image_size or height > self.max_image_size:
                    logger.info(f"Resizing PIL image, original size: {width}x{height}")
-                    scale = min(self.max_image_size / width, self.max_image_size / height)
+                    scale = min(
+                        self.max_image_size / width, self.max_image_size / height
+                    )
                    new_width = int(width * scale)
                    new_height = int(height * scale)
                    resized_image = image.resize((new_width, new_height))
                    logger.info(f"Resized to: {new_width}x{new_height}")
                    return resized_image
                else:
-                    logger.info(f"PIL image size {width}x{height} is within limits, no resizing needed")
+                    logger.info(
+                        f"PIL image size {width}x{height} is within limits, no resizing needed"
+                    )
                    return image
            # If it's a numpy array
-            elif hasattr(image, 'shape'):
+            elif hasattr(image, "shape"):
                height, width = image.shape[:2]
                if width > self.max_image_size or height > self.max_image_size:
-                    logger.info(f"Resizing numpy image, original size: {width}x{height}")
-                    scale = min(self.max_image_size / width, self.max_image_size / height)
+                    logger.info(
+                        f"Resizing numpy image, original size: {width}x{height}"
+                    )
+                    scale = min(
+                        self.max_image_size / width, self.max_image_size / height
+                    )
                    new_width = int(width * scale)
                    new_height = int(height * scale)
                    # Use PIL for resizing numpy arrays
@@ -246,7 +263,9 @@ class BaseParser(ABC):
                    logger.info(f"Resized to: {new_width}x{new_height}")
                    return resized_image
                else:
-                    logger.info(f"Numpy image size {width}x{height} is within limits, no resizing needed")
+                    logger.info(
+                        f"Numpy image size {width}x{height} is within limits, no resizing needed"
+                    )
                    return image
            else:
                logger.warning(f"Unknown image type: {type(image)}, cannot resize")
@@ -278,7 +297,9 @@ class BaseParser(ABC):
        caption = ""

        if self.caption_parser:
-            logger.info(f"OCR successfully extracted {len(ocr_text)} characters, continuing to get caption")
+            logger.info(
+                f"OCR successfully extracted {len(ocr_text)} characters, continuing to get caption"
+            )
            # Convert image to base64 for caption generation
            img_base64 = image_to_base64(image)
            if img_base64:
@@ -295,7 +316,7 @@ class BaseParser(ABC):

        # Release image resources
        del image
-        
+
        return ocr_text, caption, image_url

    async def process_image_async(self, image, image_url=None):
@@ -325,13 +346,17 @@ class BaseParser(ABC):
                ocr_task = loop.run_in_executor(None, self.perform_ocr, resized_image)
                ocr_text = await asyncio.wait_for(ocr_task, timeout=30.0)
            except asyncio.TimeoutError:
-                logger.error("OCR processing timed out (30 seconds), skipping this image")
+                logger.error(
+                    "OCR processing timed out (30 seconds), skipping this image"
+                )
                ocr_text = ""
            except Exception as e:
                logger.error(f"OCR processing error: {str(e)}")
                ocr_text = ""

-            logger.info(f"OCR successfully extracted {len(ocr_text)} characters, continuing to get caption")
+            logger.info(
+                f"OCR successfully extracted {len(ocr_text)} characters, continuing to get caption"
+            )
            caption = ""
            if self.caption_parser:
                try:
@@ -340,9 +365,13 @@ class BaseParser(ABC):
                    if img_base64:
                        # Add timeout to avoid blocking caption retrieval (30 seconds timeout)
                        caption_task = self.get_image_caption_async(img_base64)
-                        image_data, caption = await asyncio.wait_for(caption_task, timeout=30.0)
+                        image_data, caption = await asyncio.wait_for(
+                            caption_task, timeout=30.0
+                        )
                        if caption:
-                            logger.info(f"Successfully obtained image caption: {caption}")
+                            logger.info(
+                                f"Successfully obtained image caption: {caption}"
+                            )
                        else:
                            logger.warning("Failed to get caption")
                    else:
@@ -353,27 +382,20 @@ class BaseParser(ABC):
                except Exception as e:
                    logger.error(f"Failed to get caption: {str(e)}")
            else:
-                logger.info("Caption service not initialized, skipping caption retrieval")
+                logger.info(
+                    "Caption service not initialized, skipping caption retrieval"
+                )

            return ocr_text, caption, image_url
        finally:
            # Release image resources
-            if resized_image is not image and hasattr(resized_image, 'close'):
+            if resized_image is not image and hasattr(resized_image, "close"):
                # Only close the new image we created, not the original image
                resized_image.close()

-    async def process_with_limit(self, idx, image, url, semaphore, current_request_id=None):
+    async def process_with_limit(self, idx, image, url, semaphore):
        """Function to process a single image using a semaphore"""
        try:
-            # Set request ID in the asynchronous task
-            if current_request_id:
-                try:
-                    from utils.request import set_request_id
-                    set_request_id(current_request_id)
-                    logger.info(f"Asynchronous task {idx+1} setting request ID: {current_request_id}")
-                except Exception as e:
-                    logger.warning(f"Failed to set request ID in asynchronous task: {str(e)}")
-            
            logger.info(f"Waiting to process image {idx+1}")
            async with semaphore:  # Use semaphore to control concurrency
                logger.info(f"Starting to process image {idx+1}")
@@ -385,7 +407,7 @@ class BaseParser(ABC):
            return ("", "", url)  # Return empty result to avoid overall failure
        finally:
            # Manually release image resources
-            if hasattr(image, 'close'):
+            if hasattr(image, "close"):
                image.close()

    async def process_multiple_images(self, images_data):
@@ -404,26 +426,19 @@ class BaseParser(ABC):
            return []

        # Set max concurrency, reduce concurrency to avoid resource contention
-        max_concurrency = min(self.max_concurrent_tasks, 5)  # Reduce concurrency to prevent excessive memory usage
+        max_concurrency = min(
+            self.max_concurrent_tasks, 1
+        )  # Reduce concurrency to prevent excessive memory usage

        # Use semaphore to limit concurrency
        semaphore = asyncio.Semaphore(max_concurrency)

        # Store results to avoid overall failure due to task failure
        results = []
-        
-        # Get current request ID to set in each asynchronous task
-        current_request_id = None
-        try:
-            from utils.request import get_request_id
-            current_request_id = get_request_id()
-            logger.info(f"Capturing current request ID before async processing: {current_request_id}")
-        except Exception as e:
-            logger.warning(f"Failed to get current request ID: {str(e)}")

        # Create all tasks, but use semaphore to limit actual concurrency
        tasks = [
-            self.process_with_limit(i, img, url, semaphore, current_request_id) 
+            self.process_with_limit(i, img, url, semaphore)
            for i, (img, url) in enumerate(images_data)
        ]

@@ -434,7 +449,9 @@ class BaseParser(ABC):
            # Handle possible exception results
            for i, result in enumerate(completed_results):
                if isinstance(result, Exception):
-                    logger.error(f"Image {i+1} processing returned an exception: {str(result)}")
+                    logger.error(
+                        f"Image {i+1} processing returned an exception: {str(result)}"
+                    )
                    # For exceptions, add empty results
                    if i < len(images_data):
                        results.append(("", "", images_data[i][1]))
@@ -449,7 +466,9 @@ class BaseParser(ABC):
            images_data.clear()
            logger.info("Image processing resource cleanup complete")

-        logger.info(f"Completed concurrent processing of {len(results)}/{len(images_data)} images")
+        logger.info(
+            f"Completed concurrent processing of {len(results)}/{len(images_data)} images"
+        )
        return results

    def decode_bytes(self, content: bytes) -> str:
@@ -529,9 +548,13 @@ class BaseParser(ABC):
    def __init_storage(self):
        """Initialize storage client based on configuration"""
        if self._storage is None:
-            storage_config = self.chunking_config.storage_config if self.chunking_config else None
+            storage_config = (
+                self.chunking_config.storage_config if self.chunking_config else None
+            )
            self._storage = create_storage(storage_config)
-            logger.info(f"Initialized storage client: {self._storage.__class__.__name__}")
+            logger.info(
+                f"Initialized storage client: {self._storage.__class__.__name__}"
+            )
        return self._storage

    def upload_file(self, file_path: str) -> str:
@@ -605,40 +628,50 @@ class BaseParser(ABC):
        logger.info(f"Beginning chunking process for text")
        chunks = self.chunk_text(text)
        logger.info(f"Created {len(chunks)} chunks from document")
-        
+
        # Limit the number of returned chunks
        if len(chunks) > self.max_chunks:
-            logger.warning(f"Limiting chunks from {len(chunks)} to maximum {self.max_chunks}")
-            chunks = chunks[:self.max_chunks]
-        
+            logger.warning(
+                f"Limiting chunks from {len(chunks)} to maximum {self.max_chunks}"
+            )
+            chunks = chunks[: self.max_chunks]
+
        # If multimodal is enabled and file type is supported, process images in each chunk
        if self.enable_multimodal:
            # Get file extension and convert to lowercase
            file_ext = (
                os.path.splitext(self.file_name)[1].lower()
                if self.file_name
-                else (
-                    self.file_type.lower()
-                    if self.file_type
-                    else ""
-                )
+                else (self.file_type.lower() if self.file_type else "")
            )
-            
+
            # Define allowed file types for image processing
            allowed_types = [
-                '.pdf',            # PDF files
-                '.md', '.markdown',  # Markdown files
-                '.doc', '.docx',     # Word documents
+                ".pdf",  # PDF files
+                ".md",
+                ".markdown",  # Markdown files
+                ".doc",
+                ".docx",  # Word documents
                # Image files
-                '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'
+                ".jpg",
+                ".jpeg",
+                ".png",
+                ".gif",
+                ".bmp",
+                ".tiff",
+                ".webp",
            ]
-            
+
            if file_ext in allowed_types:
-                logger.info(f"Processing images in each chunk for file type: {file_ext}")
+                logger.info(
+                    f"Processing images in each chunk for file type: {file_ext}"
+                )
                chunks = self.process_chunks_images(chunks, image_map)
            else:
-                logger.info(f"Skipping image processing for unsupported file type: {file_ext}")
-            
+                logger.info(
+                    f"Skipping image processing for unsupported file type: {file_ext}"
+                )
+
        return ParseResult(text=text, chunks=chunks)

    def _split_into_units(self, text: str) -> List[str]:
@@ -649,11 +682,13 @@ class BaseParser(ABC):
        Returns:
            基本单元的列表
        """
-        logger.info(f"Splitting text into basic units with robust structure protection, text length: {len(text)}")
-        
+        logger.info(
+            f"Splitting text into basic units with robust structure protection, text length: {len(text)}"
+        )
+
        # 定义所有需要作为整体保护的结构模式 ---
        table_pattern = r"(?m)(^\|.*\|[ \t]*\r?\n(?:[ \t]*\r?\n)?^\|\s*:?--+.*\r?\n(?:^\|.*\|\r?\n?)*)"
-        
+
        # 其他需要保护的结构（代码块、公式块、行内元素）
        code_block_pattern = r"```[\s\S]*?```"
        math_block_pattern = r"\$\$[\s\S]*?\$\$"
@@ -661,7 +696,12 @@ class BaseParser(ABC):

        # 查找所有受保护结构的位置 ---
        protected_ranges = []
-        for pattern in [table_pattern, code_block_pattern, math_block_pattern, inline_pattern]:
+        for pattern in [
+            table_pattern,
+            code_block_pattern,
+            math_block_pattern,
+            inline_pattern,
+        ]:
            for match in re.finditer(pattern, text):
                # 确保匹配到的不是空字符串，避免无效范围
                if match.group(0).strip():
@@ -669,8 +709,10 @@ class BaseParser(ABC):

        # 按起始位置排序
        protected_ranges.sort(key=lambda x: x[0])
-        logger.info(f"Found {len(protected_ranges)} protected structures (tables, code, formulas, images, links).")
-        
+        logger.info(
+            f"Found {len(protected_ranges)} protected structures (tables, code, formulas, images, links)."
+        )
+
        # 合并可能重叠的保护范围 ---
        # 确保我们有一组不相交的、需要保护的文本块
        if protected_ranges:
@@ -685,15 +727,17 @@ class BaseParser(ABC):
                    # 如果不重叠，则完成当前范围并开始一个新的范围
                    merged_ranges.append((current_start, current_end))
                    current_start, current_end = next_start, next_end
-            
+
            merged_ranges.append((current_start, current_end))
            protected_ranges = merged_ranges
-            logger.info(f"After merging overlaps, {len(protected_ranges)} protected ranges remain.")
+            logger.info(
+                f"After merging overlaps, {len(protected_ranges)} protected ranges remain."
+            )

        # 根据保护范围和分隔符来分割文本 ---
        units = []
        last_end = 0
-        
+
        # 定义分隔符的正则表达式，通过加括号来保留分隔符本身
        separator_pattern = f"({'|'.join(re.escape(s) for s in self.separators)})"

@@ -703,7 +747,7 @@ class BaseParser(ABC):
                pre_text = text[last_end:start]
                # 对这部分非保护文本进行分割，并保留分隔符
                segments = re.split(separator_pattern, pre_text)
-                units.extend([s for s in segments if s]) # 添加所有非空部分
+                units.extend([s for s in segments if s])  # 添加所有非空部分

            # b. 将整个受保护的块（例如，一个完整的表格）作为一个单独的、不可分割的单元添加
            protected_text = text[start:end]
@@ -715,10 +759,11 @@ class BaseParser(ABC):
        if last_end < len(text):
            post_text = text[last_end:]
            segments = re.split(separator_pattern, post_text)
-            units.extend([s for s in segments if s]) # 添加所有非空部分
-       
+            units.extend([s for s in segments if s])  # 添加所有非空部分
+
        logger.info(f"Text splitting complete, created {len(units)} final basic units.")
        return units
+
    def _find_complete_units(self, units: List[str], target_size: int) -> List[str]:
        """Find a list of complete units that do not exceed the target size

@@ -884,71 +929,58 @@ class BaseParser(ABC):
        """
        logger.info(f"Extracting image information from Chunk #{chunk.seq}")
        text = chunk.content
-        
+
        # Regex to extract image information from text, supporting Markdown images and HTML images
        img_pattern = r'!\[([^\]]*)\]\(([^)]+)\)|<img [^>]*src="([^"]+)" [^>]*>'
-        
+
        # Extract image information
        img_matches = list(re.finditer(img_pattern, text))
        logger.info(f"Chunk #{chunk.seq} found {len(img_matches)} images")
-        
+
        images_info = []
        for match_idx, match in enumerate(img_matches):
            # Process image URL
            img_url = match.group(2) if match.group(2) else match.group(3)
            alt_text = match.group(1) if match.group(1) else ""
-            
+
            # Record image information
            image_info = {
                "original_url": img_url,
                "start": match.start(),
                "end": match.end(),
                "alt_text": alt_text,
-                "match_text": text[match.start():match.end()]
+                "match_text": text[match.start() : match.end()],
            }
            images_info.append(image_info)
-            
+
            logger.info(
-                f"Image in Chunk #{chunk.seq} {match_idx+1}: "
-                f"URL={img_url[:50]}..."
+                f"Image in Chunk #{chunk.seq} {match_idx+1}: " f"URL={img_url[:50]}..."
                if len(img_url) > 50
                else f"Image in Chunk #{chunk.seq} {match_idx+1}: URL={img_url}"
            )
-            
+
        return images_info

-    async def download_and_upload_image(self, img_url: str, current_request_id=None, image_map=None):
+    async def download_and_upload_image(self, img_url: str):
        """Download image and upload to object storage, if it's already an object storage path or local path, use directly

        Args:
            img_url: Image URL or local path
-            current_request_id: Current request ID
-            image_map: Optional dictionary mapping image URLs to Image objects

        Returns:
            tuple: (original URL, storage URL, image object), if failed returns (original URL, None, None)
        """
-        # Set request ID context in the asynchronous task
-        try:
-            if current_request_id:
-                from utils.request import set_request_id
-                set_request_id(current_request_id)
-                logger.info(f"Asynchronous task setting request ID: {current_request_id}")
-        except Exception as e:
-            logger.warning(f"Failed to set request ID in asynchronous task: {str(e)}")
-        
+
        try:
            import requests
            from PIL import Image
            import io
-            
-            # Check if image is already in the image_map
-            if image_map and img_url in image_map:
-                logger.info(f"Image already in image_map: {img_url}, using cached object")
-                return img_url, img_url, image_map[img_url]
-                
+
            # Check if it's already a storage URL (COS or MinIO)
-            is_storage_url = any(pattern in img_url for pattern in ["cos", "myqcloud.com", "minio", ".s3."])
+            is_storage_url = any(
+                pattern in img_url
+                for pattern in ["cos", "myqcloud.com", "minio", ".s3."]
+            )
            if is_storage_url:
                logger.info(f"Image already on COS: {img_url}, no need to re-upload")
                try:
@@ -961,7 +993,7 @@ class BaseParser(ABC):
                        proxies["http"] = http_proxy
                    if https_proxy:
                        proxies["https"] = https_proxy
-                    
+
                    response = requests.get(img_url, timeout=5, proxies=proxies)
                    if response.status_code == 200:
                        image = Image.open(io.BytesIO(response.content))
@@ -972,12 +1004,14 @@ class BaseParser(ABC):
                            # Image will be closed by the caller
                            pass
                    else:
-                        logger.warning(f"Failed to get storage image: {response.status_code}")
+                        logger.warning(
+                            f"Failed to get storage image: {response.status_code}"
+                        )
                        return img_url, img_url, None
                except Exception as e:
                    logger.error(f"Error getting storage image: {str(e)}")
                    return img_url, img_url, None
-            
+
            # Check if it's a local file path
            elif os.path.exists(img_url) and os.path.isfile(img_url):
                logger.info(f"Using local image file: {img_url}")
@@ -986,17 +1020,19 @@ class BaseParser(ABC):
                    # Read local image
                    image = Image.open(img_url)
                    # Upload to storage
-                    with open(img_url, 'rb') as f:
+                    with open(img_url, "rb") as f:
                        content = f.read()
                    storage_url = self.upload_bytes(content)
-                    logger.info(f"Successfully uploaded local image to storage: {storage_url}")
+                    logger.info(
+                        f"Successfully uploaded local image to storage: {storage_url}"
+                    )
                    return img_url, storage_url, image
                except Exception as e:
                    logger.error(f"Error processing local image: {str(e)}")
-                    if image and hasattr(image, 'close'):
+                    if image and hasattr(image, "close"):
                        image.close()
                    return img_url, None, None
-            
+
            # Normal remote URL download handling
            else:
                # Get proxy settings from environment variables
@@ -1007,17 +1043,21 @@ class BaseParser(ABC):
                    proxies["http"] = http_proxy
                if https_proxy:
                    proxies["https"] = https_proxy
-                    
-                logger.info(f"Downloading image {img_url}, using proxy: {proxies if proxies else 'None'}")
+
+                logger.info(
+                    f"Downloading image {img_url}, using proxy: {proxies if proxies else 'None'}"
+                )
                response = requests.get(img_url, timeout=5, proxies=proxies)
-                
+
                if response.status_code == 200:
                    # Download successful, create image object
                    image = Image.open(io.BytesIO(response.content))
                    try:
                        # Upload to storage using the method in BaseParser
                        storage_url = self.upload_bytes(response.content)
-                        logger.info(f"Successfully uploaded image to storage: {storage_url}")
+                        logger.info(
+                            f"Successfully uploaded image to storage: {storage_url}"
+                        )
                        return img_url, storage_url, image
                    finally:
                        # Image will be closed by the caller
@@ -1025,74 +1065,79 @@ class BaseParser(ABC):
                else:
                    logger.warning(f"Failed to download image: {response.status_code}")
                    return img_url, None, None
-                        
+
        except Exception as e:
            logger.error(f"Error downloading or processing image: {str(e)}")
            return img_url, None, None

-    async def process_chunk_images_async(self, chunk, chunk_idx, total_chunks, current_request_id=None, image_map=None):
+    async def process_chunk_images_async(
+        self, chunk, chunk_idx, total_chunks, image_map=None
+    ):
        """Asynchronously process images in a single Chunk

        Args:
            chunk: Chunk object to process
            chunk_idx: Chunk index
            total_chunks: Total number of chunks
-            current_request_id: Current request ID
+            image_map: Optional dictionary mapping image URLs to Image objects

        Returns:
            Processed Chunk object
        """
-        # Set request ID context in the asynchronous task
-        try:
-            if current_request_id:
-                from utils.request import set_request_id
-                set_request_id(current_request_id)
-                logger.info(f"Chunk processing task #{chunk_idx+1} setting request ID: {current_request_id}")
-        except Exception as e:
-            logger.warning(f"Failed to set request ID in Chunk processing task: {str(e)}")
-            
-        logger.info(f"Starting to process images in Chunk #{chunk_idx+1}/{total_chunks}")
-        
+
+        logger.info(
+            f"Starting to process images in Chunk #{chunk_idx+1}/{total_chunks}"
+        )
+
        # Extract image information from the Chunk
        images_info = self.extract_images_from_chunk(chunk)
        if not images_info:
            logger.info(f"Chunk #{chunk_idx+1} found no images")
            return chunk
-            
+
        # Prepare images that need to be downloaded and processed
        images_to_process = []
        url_to_info_map = {}  # Map URL to image information
-        
+
        # Record all image URLs that need to be processed
        for img_info in images_info:
            url = img_info["original_url"]
            url_to_info_map[url] = img_info
-        
-        # Create an asynchronous event loop (current loop)
-        loop = asyncio.get_event_loop()
-        
-        # Concurrent download and upload of images
-        tasks = [self.download_and_upload_image(url, current_request_id, image_map) for url in url_to_info_map.keys()]
-        results = await asyncio.gather(*tasks)
-        
+
+        results = []
+        download_tasks = []
+        for img_url in  url_to_info_map.keys():          # Check if image is already in the image_map
+            if image_map and img_url in image_map:
+                logger.info(f"Image already in image_map: {img_url}, using cached object")
+                results.append((img_url, img_url, image_map[img_url]))
+            else:
+                download_task = self.download_and_upload_image(img_url)
+                download_tasks.append(download_task)
+        # Concurrent download and upload of images, ignore images that are already in the image_map
+        results.extend(await asyncio.gather(*download_tasks))
+
        # Process download results, prepare for OCR processing
        for orig_url, cos_url, image in results:
            if cos_url and image:
                img_info = url_to_info_map[orig_url]
                img_info["cos_url"] = cos_url
                images_to_process.append((image, cos_url))
-        
+
        # If no images were successfully downloaded and uploaded, return the original Chunk
        if not images_to_process:
-            logger.info(f"Chunk #{chunk_idx+1} found no successfully downloaded and uploaded images")
+            logger.info(
+                f"Chunk #{chunk_idx+1} found no successfully downloaded and uploaded images"
+            )
            return chunk
-            
+
        # Concurrent processing of all images (OCR + caption)
-        logger.info(f"Processing {len(images_to_process)} images in Chunk #{chunk_idx+1}")
-        
+        logger.info(
+            f"Processing {len(images_to_process)} images in Chunk #{chunk_idx+1}"
+        )
+
        # Concurrent processing of all images
        processed_results = await self.process_multiple_images(images_to_process)
-        
+
        # Process OCR and Caption results
        for ocr_text, caption, img_url in processed_results:
            # Find the corresponding original URL
@@ -1100,22 +1145,24 @@ class BaseParser(ABC):
                if info.get("cos_url") == img_url:
                    info["ocr_text"] = ocr_text if ocr_text else ""
                    info["caption"] = caption if caption else ""
-                    
+
                    if ocr_text:
-                        logger.info(f"Image OCR extracted {len(ocr_text)} characters: {img_url}")
+                        logger.info(
+                            f"Image OCR extracted {len(ocr_text)} characters: {img_url}"
+                        )
                    if caption:
                        logger.info(f"Obtained image description: '{caption}'")
                    break
-        
+
        # Add processed image information to the Chunk
        processed_images = []
        for img_info in images_info:
            if "cos_url" in img_info:
                processed_images.append(img_info)
-        
+
        # Update image information in the Chunk
        chunk.images = processed_images
-        
+
        logger.info(f"Completed image processing in Chunk #{chunk_idx+1}")
        return chunk

@@ -1128,42 +1175,37 @@ class BaseParser(ABC):
        Returns:
            List of processed document chunks
        """
-        logger.info(f"Starting concurrent processing of images in all {len(chunks)} chunks")
-        
+        logger.info(
+            f"Starting concurrent processing of images in all {len(chunks)} chunks"
+        )
+
        if not chunks:
            logger.warning("No chunks to process")
            return chunks
-        
-        # Get current request ID to pass to asynchronous tasks
-        current_request_id = None
-        try:
-            from utils.request import get_request_id
-            current_request_id = get_request_id()
-            logger.info(f"Capturing current request ID before async processing: {current_request_id}")
-        except Exception as e:
-            logger.warning(f"Failed to get current request ID: {str(e)}")
-        
+
        # Create and run all Chunk concurrent processing tasks
        async def process_all_chunks():
            # Set max concurrency, reduce concurrency to avoid resource contention
-            max_concurrency = min(self.max_concurrent_tasks, 5)  # Reduce concurrency
+            max_concurrency = min(self.max_concurrent_tasks, 1)  # Reduce concurrency
            # Use semaphore to limit concurrency
            semaphore = asyncio.Semaphore(max_concurrency)
-            
+
            async def process_with_limit(chunk, idx, total):
                """Use semaphore to control concurrent processing of Chunks"""
                async with semaphore:
-                    return await self.process_chunk_images_async(chunk, idx, total, current_request_id, image_map)
-            
+                    return await self.process_chunk_images_async(
+                        chunk, idx, total, image_map
+                    )
+
            # Create tasks for all Chunks
            tasks = [
                process_with_limit(chunk, idx, len(chunks))
                for idx, chunk in enumerate(chunks)
            ]
-            
+
            # Execute all tasks concurrently
            results = await asyncio.gather(*tasks, return_exceptions=True)
-            
+
            # Handle possible exceptions
            processed_chunks = []
            for i, result in enumerate(results):
@@ -1174,9 +1216,9 @@ class BaseParser(ABC):
                        processed_chunks.append(chunks[i])
                else:
                    processed_chunks.append(result)
-                    
+
            return processed_chunks
-        
+
        # Create event loop and run all tasks
        try:
            # Check if event loop already exists
@@ -1189,11 +1231,13 @@ class BaseParser(ABC):
                # If no event loop, create a new one
                loop = asyncio.new_event_loop()
                asyncio.set_event_loop(loop)
-            
+
            # Execute processing for all Chunks
            processed_chunks = loop.run_until_complete(process_all_chunks())
-            logger.info(f"Successfully completed concurrent processing of {len(processed_chunks)}/{len(chunks)} chunks")
-            
+            logger.info(
+                f"Successfully completed concurrent processing of {len(processed_chunks)}/{len(chunks)} chunks"
+            )
+
            return processed_chunks
        except Exception as e:
            logger.error(f"Error during concurrent chunk processing: {str(e)}")
--- a/services/docreader/src/parser/doc_parser.py
+++ b/services/docreader/src/parser/doc_parser.py
@@ -54,6 +54,7 @@ class DocParser(BaseParser):
                        enable_multimodal=self.enable_multimodal,
                        chunk_size=self.chunk_size,
                        chunk_overlap=self.chunk_overlap,
+                        chunking_config=self.chunking_config,
                        separators=self.separators,
                    )
                    text = docx_parser.parse_into_text(docx_content)
--- a/services/docreader/src/parser/ocr_engine.py
+++ b/services/docreader/src/parser/ocr_engine.py
@@ -33,30 +33,40 @@ class PaddleOCRBackend(OCRBackend):
        """Initialize PaddleOCR backend"""
        self.ocr = None
        try:
+            import os
+            import paddle
+            
+            # Set PaddlePaddle to use CPU and disable GPU
+            os.environ['CUDA_VISIBLE_DEVICES'] = ''
+            paddle.set_device('cpu')
+            
            from paddleocr import PaddleOCR
-            # Default OCR configuration
+            # OCR configuration with text orientation classification enabled
            ocr_config = {
-                "text_det_limit_type": "max",  # Change from 'min' to 'max'
-                "text_det_limit_side_len": 960,  # A standard and safe limit for the longest side
-                "use_doc_orientation_classify": False,  # Do not use document image orientation classification
-                "use_doc_unwarping": False,  # Do not use document unwarping
-                "use_textline_orientation": False,  # Do not use textline orientation classification
-                "text_recognition_model_name": "PP-OCRv5_server_rec",
-                "text_detection_model_name": "PP-OCRv5_server_det",
-                "text_recognition_model_dir": "/root/.paddlex/official_models/PP-OCRv5_server_rec_infer",
-                "text_detection_model_dir": "/root/.paddlex/official_models/PP-OCRv5_server_det_infer",
-                "text_det_thresh": 0.3,  # Text detection pixel threshold
-                "text_det_box_thresh": 0.6,  # Text detection box threshold
-                "text_det_unclip_ratio": 1.5,  # Text detection expansion ratio
-                "text_rec_score_thresh": 0.0,  # Text recognition confidence threshold
-                "ocr_version": "PP-OCRv5",  # Switch to PP-OCRv4 here to compare
+                "use_gpu": False,
+                "text_det_limit_type": "max",
+                "text_det_limit_side_len": 960,
+                "use_doc_orientation_classify": True,  # 启用文档方向分类
+                "use_doc_unwarping": False,
+                "use_textline_orientation": True,  # 启用文本行方向检测
+                "text_recognition_model_name": "PP-OCRv4_server_rec",
+                "text_detection_model_name": "PP-OCRv4_server_det",
+                "text_det_thresh": 0.3,
+                "text_det_box_thresh": 0.6,
+                "text_det_unclip_ratio": 1.5,
+                "text_rec_score_thresh": 0.0,
+                "ocr_version": "PP-OCRv4",
                "lang": "ch",
+                "show_log": False,
+                "use_dilation": True,  # improves accuracy
+                "det_db_score_mode": "slow",  # improves accuracy
            }
            
            self.ocr = PaddleOCR(**ocr_config)
            logger.info("PaddleOCR engine initialized successfully")
-        except ImportError:
-            logger.error("Failed to import paddleocr. Please install it with 'pip install paddleocr'")
+            
+        except ImportError as e:
+            logger.error(f"Failed to import paddleocr: {str(e)}. Please install it with 'pip install paddleocr'")
        except Exception as e:
            logger.error(f"Failed to initialize PaddleOCR: {str(e)}")
    
@@ -71,50 +81,39 @@ class PaddleOCRBackend(OCRBackend):
        """
        try:
            # Ensure image is in RGB format
+            if hasattr(image, "convert") and image.mode != "RGB":
+                image = image.convert("RGB")
+
+            # Convert to numpy array if needed
            if hasattr(image, "convert"):
-                if image.mode == "RGBA":
-                    img_for_ocr = image.convert("RGB") # 尝试转换为 RGB
-                    logger.info(f"Converted image from RGBA to RGB format for OCR.")
-                elif image.mode != "RGB": # 如果不是 RGBA 也不是 RGB，也尝试转 RGB
-                    img_for_ocr = image.convert("RGB")
-                    logger.info(f"Converted image from {image.mode} to RGB format for OCR.")
-                else:
-                    img_for_ocr = image
-                    logger.info(f"Image already in RGB format.")
+                image_array = np.array(image)
            else:
-                img_for_ocr = image
-                logger.info(f"Image is not a PIL.Image object, assuming it's already suitable for OCR.")
+                image_array = image

-            # Convert to numpy array if not already
-            if hasattr(img_for_ocr, "convert"):
-                image_array = np.array(img_for_ocr)
-            else:
-                image_array = img_for_ocr
-
-            ocr_result = self.ocr.predict(image_array)
+            # Perform OCR
+            ocr_result = self.ocr.ocr(image_array, cls=False)
   
            # Extract text
-            if ocr_result and any(ocr_result):
-                ocr_text = ""
-                for image_result in ocr_result:
-                    ocr_text = ocr_text + " ".join(image_result["rec_texts"])
-                text_length = len(ocr_text)
-                if text_length > 0:
-                    logger.info(f"OCR extracted {text_length} characters")
-                    logger.info(
-                        f"OCR text sample: {ocr_text[:100]}..."
-                        if text_length > 100
-                        else f"OCR text: {ocr_text}"
-                    )
-                    return ocr_text
-                else:
-                    logger.warning("OCR returned empty result")
+            ocr_text = ""
+            if ocr_result and ocr_result[0]:
+                for line in ocr_result[0]:
+                    if line and len(line) >= 2:
+                        text = line[1][0] if line[1] else ""
+                        if text:
+                            ocr_text += text + " "
+            
+            text_length = len(ocr_text.strip())
+            if text_length > 0:
+                logger.info(f"OCR extracted {text_length} characters")
+                return ocr_text.strip()
            else:
-                logger.warning("OCR did not return any result")
-            return ""
+                logger.warning("OCR returned empty result")
+                return ""
+                
        except Exception as e:
            logger.error(f"OCR recognition error: {str(e)}")
            return ""
+    
 class NanonetsOCRBackend(OCRBackend):
    """Nanonets OCR backend implementation using OpenAI API format"""
    
--- a/services/docreader/src/server/server.py
+++ b/services/docreader/src/server/server.py
@@ -5,6 +5,9 @@ from concurrent import futures
 import traceback
 import grpc
 import uuid
+import atexit
+from grpc_health.v1 import health_pb2_grpc
+from grpc_health.v1.health import HealthServicer

 # Add parent directory to Python path
 current_dir = os.path.dirname(os.path.abspath(__file__))
@@ -324,99 +327,51 @@ def init_ocr_engine(ocr_backend, ocr_config):
        logger.error(f"Error initializing OCR engine: {str(e)}")
        return False

+
 def serve():
+    
    init_ocr_engine(os.getenv("OCR_BACKEND", "paddle"), {
        "OCR_API_BASE_URL": os.getenv("OCR_API_BASE_URL", ""),
    })
-    # Set max number of worker threads and processes
+    
+    # Set max number of worker threads
    max_workers = int(os.environ.get("GRPC_MAX_WORKERS", "4"))
-    worker_processes = int(os.environ.get("GRPC_WORKER_PROCESSES", str(os.cpu_count() or 1)))
-    logger.info(f"Starting DocReader service, max worker threads per process: {max_workers}, "
-                f"processes: {worker_processes}")
+    logger.info(f"Starting DocReader service with {max_workers} worker threads")
    
    # Get port number
    port = os.environ.get("GRPC_PORT", "50051")
    
-    # Multi-process mode
-    if worker_processes > 1:
-        import multiprocessing
-        processes = []
-        
-        def run_server():
-            # Create server
-            server = grpc.server(
-                futures.ThreadPoolExecutor(max_workers=max_workers),
-                options=[
-                    ('grpc.max_send_message_length', MAX_MESSAGE_LENGTH),
-                    ('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH),
-                ],
-            )
-            
-            # Register service
-            docreader_pb2_grpc.add_DocReaderServicer_to_server(DocReaderServicer(), server)
-            
-            # Set listen address
-            server.add_insecure_port(f"[::]:{port}")
-            
-            # Start service
-            server.start()
-            
-            logger.info(f"Worker process {os.getpid()} started on port {port}")
-            
-            try:
-                # Wait for service termination
-                server.wait_for_termination()
-            except KeyboardInterrupt:
-                logger.info(f"Worker process {os.getpid()} received termination signal")
-                server.stop(0)
-        
-        # Start specified number of worker processes
-        for i in range(worker_processes):
-            process = multiprocessing.Process(target=run_server)
-            processes.append(process)
-            process.start()
-            logger.info(f"Started worker process {process.pid} ({i+1}/{worker_processes})")
-        
-        # Wait for all processes to complete
-        try:
-            for process in processes:
-                process.join()
-        except KeyboardInterrupt:
-            logger.info("Master process received termination signal")
-            for process in processes:
-                if process.is_alive():
-                    logger.info(f"Terminating worker process {process.pid}")
-                    process.terminate()
+    # Create server
+    server = grpc.server(
+        futures.ThreadPoolExecutor(max_workers=max_workers),
+        options=[
+            ('grpc.max_send_message_length', MAX_MESSAGE_LENGTH),
+            ('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH),
+        ],
+    )
    
-    # Single-process mode
-    else:
-        # Create server
-        server = grpc.server(
-            futures.ThreadPoolExecutor(max_workers=max_workers),
-            options=[
-                ('grpc.max_send_message_length', MAX_MESSAGE_LENGTH),
-                ('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH),
-            ],
-        )
-        
-        # Register service
-        docreader_pb2_grpc.add_DocReaderServicer_to_server(DocReaderServicer(), server)
-        
-        # Set listen address
-        server.add_insecure_port(f"[::]:{port}")
-        
-        # Start service
-        server.start()
-        
-        logger.info(f"Server started on port {port} (single process mode)")
-        logger.info("Server is ready to accept connections")
-        
-        try:
-            # Wait for service termination
-            server.wait_for_termination()
-        except KeyboardInterrupt:
-            logger.info("Received termination signal, shutting down server")
-            server.stop(0)
+    # Register services
+    docreader_pb2_grpc.add_DocReaderServicer_to_server(DocReaderServicer(), server)
+    
+    # Register health check service
+    health_servicer = HealthServicer()
+    health_pb2_grpc.add_HealthServicer_to_server(health_servicer, server)
+    
+    # Set listen address
+    server.add_insecure_port(f"[::]:{port}")
+    
+    # Start service
+    server.start()
+    
+    logger.info(f"Server started on port {port}")
+    logger.info("Server is ready to accept connections")
+    
+    try:
+        # Wait for service termination
+        server.wait_for_termination()
+    except KeyboardInterrupt:
+        logger.info("Received termination signal, shutting down server")
+        server.stop(0)

 if __name__ == "__main__":
    serve()
Author	SHA1	Message	Date
wizardchen	6f6ca84dae	feat(docreader): add health check	2025-09-10 20:22:14 +08:00
wizardchen	8a2b38da6f	fix: rewrite query is empty	2025-09-10 19:35:26 +08:00
begoniezhao	79b9315758	feat: Added knowledge base column value update method and optimized logging	2025-09-10 19:04:37 +08:00
Liwx	b3c43b2180	Update WeKnora.md	2025-09-10 17:25:07 +08:00
Liwx1014	bfea6775ee	doc: add process parsing md	2025-09-10 17:25:07 +08:00
Liwx1014	2241127a41	doc: add process parsing document	2025-09-10 17:25:07 +08:00
wizardchen	7cfae7e0d3	fix: pre fetch ocr models in docker container	2025-09-10 17:24:26 +08:00
wizardchen	19d2493afc	fix: make file docker build not work	2025-09-10 15:13:12 +08:00
wizardchen	0e1d7edca3	fix: image parser concurrency error	2025-09-10 13:19:39 +08:00
wizardchen	fd6c50059e	feat: support modify listening port	2025-09-10 10:14:43 +08:00
wizardchen	7775559a9b	feat: use paddle ocr v4 instead	2025-09-10 01:22:25 +08:00
wizardchen	2b6cbee1b6	feat: add aliyun rerank	2025-09-10 01:22:25 +08:00
wizardchen	4214e6782b	feat: add aliyun rerank model	2025-09-10 01:22:25 +08:00
begoniezhao	3f8a1d20c1	fix(docreader): update paddle version	2025-09-09 19:25:02 +08:00
begoniezhao	7efa173812	chore: Added VERSION file to optimize docker image build configuration	2025-09-09 14:57:09 +08:00
wizardchen	44e0e9ecb8	docs: update readme	2025-09-09 11:33:32 +08:00
KaiFan Yu	820aeacbba	fix: fix app start fail when config STORAGE_TYPE=cos https://github.com/Tencent/WeKnora/issues/223	2025-09-08 23:26:37 +08:00
wizardchen	daa5e8853a	docs: add CHANGELOG for 0.1.0 and version badges	2025-09-08 23:14:31 +08:00