Merge pull request #1735 from langbot-app/fix/text_splitter

fix: langchain error
This commit is contained in:
Guanchao Wang
2025-10-30 12:55:10 +08:00
committed by GitHub
2 changed files with 2 additions and 15 deletions

View File

@@ -4,6 +4,7 @@ import json
from typing import List
from pkg.rag.knowledge.services import base_service
from pkg.core import app
from langchain_text_splitters import RecursiveCharacterTextSplitter
class Chunker(base_service.BaseService):
@@ -27,21 +28,6 @@ class Chunker(base_service.BaseService):
"""
if not text:
return []
# words = text.split()
# chunks = []
# current_chunk = []
# for word in words:
# current_chunk.append(word)
# if len(current_chunk) > self.chunk_size:
# chunks.append(" ".join(current_chunk[:self.chunk_size]))
# current_chunk = current_chunk[self.chunk_size - self.chunk_overlap:]
# if current_chunk:
# chunks.append(" ".join(current_chunk))
# A more robust chunking strategy (e.g., using recursive character text splitter)
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=self.chunk_size,

View File

@@ -60,6 +60,7 @@ dependencies = [
"ebooklib>=0.18",
"html2text>=2024.2.26",
"langchain>=0.2.0",
"langchain-text-splitters>=0.0.1",
"chromadb>=0.4.24",
"qdrant-client (>=1.15.1,<2.0.0)",
"langbot-plugin==0.1.4",