fix: file be deleted twice

This commit is contained in:
Junyan Qin
2025-07-12 17:47:53 +08:00
parent 1e85d02ae4
commit 1ef0193028

View File

@@ -21,8 +21,12 @@ class RAGManager:
self.parser = FileParser()
self.chunker = Chunker()
# Initialize Embedder with targeted model type and name
self.embedder = Embedder(model_type='third_party_api', model_name_key='bge-m3', chroma_manager=self.chroma_manager)
self.retriever = Retriever(model_type='third_party_api', model_name_key='bge-m3', chroma_manager=self.chroma_manager)
self.embedder = Embedder(
model_type='third_party_api', model_name_key='bge-m3', chroma_manager=self.chroma_manager
)
self.retriever = Retriever(
model_type='third_party_api', model_name_key='bge-m3', chroma_manager=self.chroma_manager
)
async def initialize_rag_system(self):
"""Initializes the RAG system by creating database tables."""
@@ -143,9 +147,7 @@ class RAGManager:
self.ap.logger.error(f'Error retrieving all files: {str(e)}', exc_info=True)
return []
async def store_data(
self, file_path: str, kb_id: str, file_type: str, file_id: str = None
):
async def store_data(self, file_path: str, kb_id: str, file_type: str, file_id: str = None):
"""
Parses, chunks, embeds, and stores data from a given file into the RAG system.
Associates the file with a knowledge base using kb_id in the File table.
@@ -165,9 +167,7 @@ class RAGManager:
file_name = os.path.basename(file_path)
text = await self.parser.parse(file_path)
if not text:
self.ap.logger.warning(
f'No text extracted from file {file_path}. '
)
self.ap.logger.warning(f'No text extracted from file {file_path}. ')
return
chunks_texts = await self.chunker.chunk(text)
@@ -222,8 +222,9 @@ class RAGManager:
try:
await self.ap.storage_mgr.storage_provider.delete(file_id)
except Exception as e:
self.ap.logger.error(f'Error deleting file from storage for file_id {file_id}: {str(e)}', exc_info=True)
await self.ap.storage_mgr.storage_provider.delete(file_id)
self.ap.logger.error(
f'Error deleting file from storage for file_id {file_id}: {str(e)}', exc_info=True
)
self.ap.logger.info(f'Deleted file record for file_id: {file_id}')
else:
self.ap.logger.warning(
@@ -326,7 +327,14 @@ class RAGManager:
return
self.ap.logger.info(f'File with ID {file_id} exists, proceeding with association.')
# add new file record
file_to_update = File(id=file_id, kb_id=kb.id, file_name=file_id, path=os.path.join('data', 'storage', file_id), file_type=os.path.splitext(file_id)[1].lstrip('.'), status=0)
file_to_update = File(
id=file_id,
kb_id=kb.id,
file_name=file_id,
path=os.path.join('data', 'storage', file_id),
file_type=os.path.splitext(file_id)[1].lstrip('.'),
status=0,
)
session.add(file_to_update)
session.commit()
self.ap.logger.info(
@@ -345,9 +353,9 @@ class RAGManager:
file_path=os.path.join('data', 'storage', file_id),
kb_id=knowledge_base_uuid,
file_type=os.path.splitext(file_id)[1].lstrip('.'),
file_id=file_id
file_id=file_id,
)
except Exception as store_e:
except Exception:
# 如果存储数据时出错,更新文件状态为失败
file_obj = session.query(File).filter_by(id=file_id).first()
if file_obj: