mirror of
https://github.com/langbot-app/LangBot.git
synced 2025-11-25 03:15:06 +08:00
Merge pull request #1735 from langbot-app/fix/text_splitter
fix: langchain error
This commit is contained in:
@@ -4,6 +4,7 @@ import json
|
||||
from typing import List
|
||||
from pkg.rag.knowledge.services import base_service
|
||||
from pkg.core import app
|
||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||
|
||||
|
||||
class Chunker(base_service.BaseService):
|
||||
@@ -27,21 +28,6 @@ class Chunker(base_service.BaseService):
|
||||
"""
|
||||
if not text:
|
||||
return []
|
||||
# words = text.split()
|
||||
# chunks = []
|
||||
# current_chunk = []
|
||||
|
||||
# for word in words:
|
||||
# current_chunk.append(word)
|
||||
# if len(current_chunk) > self.chunk_size:
|
||||
# chunks.append(" ".join(current_chunk[:self.chunk_size]))
|
||||
# current_chunk = current_chunk[self.chunk_size - self.chunk_overlap:]
|
||||
|
||||
# if current_chunk:
|
||||
# chunks.append(" ".join(current_chunk))
|
||||
|
||||
# A more robust chunking strategy (e.g., using recursive character text splitter)
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
|
||||
text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=self.chunk_size,
|
||||
|
||||
@@ -60,6 +60,7 @@ dependencies = [
|
||||
"ebooklib>=0.18",
|
||||
"html2text>=2024.2.26",
|
||||
"langchain>=0.2.0",
|
||||
"langchain-text-splitters>=0.0.1",
|
||||
"chromadb>=0.4.24",
|
||||
"qdrant-client (>=1.15.1,<2.0.0)",
|
||||
"langbot-plugin==0.1.4",
|
||||
|
||||
Reference in New Issue
Block a user