mirror of
https://github.com/langbot-app/LangBot.git
synced 2025-11-25 11:29:39 +08:00
fix: file be deleted twice
This commit is contained in:
@@ -21,8 +21,12 @@ class RAGManager:
|
||||
self.parser = FileParser()
|
||||
self.chunker = Chunker()
|
||||
# Initialize Embedder with targeted model type and name
|
||||
self.embedder = Embedder(model_type='third_party_api', model_name_key='bge-m3', chroma_manager=self.chroma_manager)
|
||||
self.retriever = Retriever(model_type='third_party_api', model_name_key='bge-m3', chroma_manager=self.chroma_manager)
|
||||
self.embedder = Embedder(
|
||||
model_type='third_party_api', model_name_key='bge-m3', chroma_manager=self.chroma_manager
|
||||
)
|
||||
self.retriever = Retriever(
|
||||
model_type='third_party_api', model_name_key='bge-m3', chroma_manager=self.chroma_manager
|
||||
)
|
||||
|
||||
async def initialize_rag_system(self):
|
||||
"""Initializes the RAG system by creating database tables."""
|
||||
@@ -143,9 +147,7 @@ class RAGManager:
|
||||
self.ap.logger.error(f'Error retrieving all files: {str(e)}', exc_info=True)
|
||||
return []
|
||||
|
||||
async def store_data(
|
||||
self, file_path: str, kb_id: str, file_type: str, file_id: str = None
|
||||
):
|
||||
async def store_data(self, file_path: str, kb_id: str, file_type: str, file_id: str = None):
|
||||
"""
|
||||
Parses, chunks, embeds, and stores data from a given file into the RAG system.
|
||||
Associates the file with a knowledge base using kb_id in the File table.
|
||||
@@ -165,9 +167,7 @@ class RAGManager:
|
||||
file_name = os.path.basename(file_path)
|
||||
text = await self.parser.parse(file_path)
|
||||
if not text:
|
||||
self.ap.logger.warning(
|
||||
f'No text extracted from file {file_path}. '
|
||||
)
|
||||
self.ap.logger.warning(f'No text extracted from file {file_path}. ')
|
||||
return
|
||||
|
||||
chunks_texts = await self.chunker.chunk(text)
|
||||
@@ -222,8 +222,9 @@ class RAGManager:
|
||||
try:
|
||||
await self.ap.storage_mgr.storage_provider.delete(file_id)
|
||||
except Exception as e:
|
||||
self.ap.logger.error(f'Error deleting file from storage for file_id {file_id}: {str(e)}', exc_info=True)
|
||||
await self.ap.storage_mgr.storage_provider.delete(file_id)
|
||||
self.ap.logger.error(
|
||||
f'Error deleting file from storage for file_id {file_id}: {str(e)}', exc_info=True
|
||||
)
|
||||
self.ap.logger.info(f'Deleted file record for file_id: {file_id}')
|
||||
else:
|
||||
self.ap.logger.warning(
|
||||
@@ -326,7 +327,14 @@ class RAGManager:
|
||||
return
|
||||
self.ap.logger.info(f'File with ID {file_id} exists, proceeding with association.')
|
||||
# add new file record
|
||||
file_to_update = File(id=file_id, kb_id=kb.id, file_name=file_id, path=os.path.join('data', 'storage', file_id), file_type=os.path.splitext(file_id)[1].lstrip('.'), status=0)
|
||||
file_to_update = File(
|
||||
id=file_id,
|
||||
kb_id=kb.id,
|
||||
file_name=file_id,
|
||||
path=os.path.join('data', 'storage', file_id),
|
||||
file_type=os.path.splitext(file_id)[1].lstrip('.'),
|
||||
status=0,
|
||||
)
|
||||
session.add(file_to_update)
|
||||
session.commit()
|
||||
self.ap.logger.info(
|
||||
@@ -342,12 +350,12 @@ class RAGManager:
|
||||
# 进行文件解析
|
||||
try:
|
||||
await self.store_data(
|
||||
file_path = os.path.join('data', 'storage', file_id),
|
||||
file_path=os.path.join('data', 'storage', file_id),
|
||||
kb_id=knowledge_base_uuid,
|
||||
file_type=os.path.splitext(file_id)[1].lstrip('.'),
|
||||
file_id=file_id
|
||||
file_id=file_id,
|
||||
)
|
||||
except Exception as store_e:
|
||||
except Exception:
|
||||
# 如果存储数据时出错,更新文件状态为失败
|
||||
file_obj = session.query(File).filter_by(id=file_id).first()
|
||||
if file_obj:
|
||||
|
||||
Reference in New Issue
Block a user