Files
WeKnora/internal/application/service/chat_pipline/search_entity.go
2025-10-16 17:48:21 +08:00

137 lines
4.5 KiB
Go

package chatpipline
import (
"context"
"github.com/Tencent/WeKnora/internal/logger"
"github.com/Tencent/WeKnora/internal/types"
"github.com/Tencent/WeKnora/internal/types/interfaces"
)
// PluginSearch implements search functionality for chat pipeline
type PluginSearchEntity struct {
graphRepo interfaces.RetrieveGraphRepository
chunkRepo interfaces.ChunkRepository
knowledgeRepo interfaces.KnowledgeRepository
}
func NewPluginSearchEntity(
eventManager *EventManager,
graphRepository interfaces.RetrieveGraphRepository,
chunkRepository interfaces.ChunkRepository,
knowledgeRepository interfaces.KnowledgeRepository,
) *PluginSearchEntity {
res := &PluginSearchEntity{
graphRepo: graphRepository,
chunkRepo: chunkRepository,
knowledgeRepo: knowledgeRepository,
}
eventManager.Register(res)
return res
}
// ActivationEvents returns the event types this plugin handles
func (p *PluginSearchEntity) ActivationEvents() []types.EventType {
return []types.EventType{types.ENTITY_SEARCH}
}
// OnEvent handles search events in the chat pipeline
func (p *PluginSearchEntity) OnEvent(ctx context.Context,
eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError,
) *PluginError {
entity := chatManage.Entity
if len(entity) == 0 {
logger.Infof(ctx, "No entity found")
return next()
}
graph, err := p.graphRepo.SearchNode(ctx, types.NameSpace{KnowledgeBase: chatManage.KnowledgeBaseID}, entity)
if err != nil {
logger.Errorf(ctx, "Failed to search node, session_id: %s, error: %v", chatManage.SessionID, err)
return next()
}
chatManage.GraphResult = graph
logger.Infof(ctx, "search entity result count: %d", len(graph.Node))
// graphStr, _ := json.Marshal(graph)
// logger.Debugf(ctx, "search entity result: %s", string(graphStr))
chunkIDs := filterSeenChunk(ctx, graph, chatManage.SearchResult)
if len(chunkIDs) == 0 {
logger.Infof(ctx, "No new chunk found")
return next()
}
chunks, err := p.chunkRepo.ListChunksByID(ctx, ctx.Value(types.TenantIDContextKey).(uint), chunkIDs)
if err != nil {
logger.Errorf(ctx, "Failed to list chunks, session_id: %s, error: %v", chatManage.SessionID, err)
return next()
}
knowledgeIDs := []string{}
for _, chunk := range chunks {
knowledgeIDs = append(knowledgeIDs, chunk.KnowledgeID)
}
knowledges, err := p.knowledgeRepo.GetKnowledgeBatch(ctx, ctx.Value(types.TenantIDContextKey).(uint), knowledgeIDs)
if err != nil {
logger.Errorf(ctx, "Failed to list knowledge, session_id: %s, error: %v", chatManage.SessionID, err)
return next()
}
knowledgeMap := map[string]*types.Knowledge{}
for _, knowledge := range knowledges {
knowledgeMap[knowledge.ID] = knowledge
}
for _, chunk := range chunks {
searchResult := chunk2SearchResult(chunk, knowledgeMap[chunk.KnowledgeID])
chatManage.SearchResult = append(chatManage.SearchResult, searchResult)
}
// remove duplicate results
chatManage.SearchResult = removeDuplicateResults(chatManage.SearchResult)
if len(chatManage.SearchResult) == 0 {
logger.Infof(ctx, "No new search result, session_id: %s", chatManage.SessionID)
return ErrSearchNothing
}
logger.Infof(ctx, "search entity result count: %d, session_id: %s", len(chatManage.SearchResult), chatManage.SessionID)
return next()
}
func filterSeenChunk(ctx context.Context, graph *types.GraphData, searchResult []*types.SearchResult) []string {
seen := map[string]bool{}
for _, chunk := range searchResult {
seen[chunk.ID] = true
}
logger.Infof(ctx, "filterSeenChunk: seen count: %d", len(seen))
chunkIDs := []string{}
for _, node := range graph.Node {
for _, chunkID := range node.Chunks {
if seen[chunkID] {
continue
}
seen[chunkID] = true
chunkIDs = append(chunkIDs, chunkID)
}
}
logger.Infof(ctx, "filterSeenChunk: new chunkIDs count: %d", len(chunkIDs))
return chunkIDs
}
func chunk2SearchResult(chunk *types.Chunk, knowledge *types.Knowledge) *types.SearchResult {
return &types.SearchResult{
ID: chunk.ID,
Content: chunk.Content,
KnowledgeID: chunk.KnowledgeID,
ChunkIndex: chunk.ChunkIndex,
KnowledgeTitle: knowledge.Title,
StartAt: chunk.StartAt,
EndAt: chunk.EndAt,
Seq: chunk.ChunkIndex,
Score: 1.0,
MatchType: types.MatchTypeGraph,
Metadata: knowledge.GetMetadata(),
ChunkType: string(chunk.ChunkType),
ParentChunkID: chunk.ParentChunkID,
ImageInfo: chunk.ImageInfo,
KnowledgeFilename: knowledge.FileName,
KnowledgeSource: knowledge.Source,
}
}