package chatpipline import ( "context" "github.com/Tencent/WeKnora/internal/logger" "github.com/Tencent/WeKnora/internal/types" "github.com/Tencent/WeKnora/internal/types/interfaces" ) // PluginSearch implements search functionality for chat pipeline type PluginSearchEntity struct { graphRepo interfaces.RetrieveGraphRepository chunkRepo interfaces.ChunkRepository knowledgeRepo interfaces.KnowledgeRepository } func NewPluginSearchEntity( eventManager *EventManager, graphRepository interfaces.RetrieveGraphRepository, chunkRepository interfaces.ChunkRepository, knowledgeRepository interfaces.KnowledgeRepository, ) *PluginSearchEntity { res := &PluginSearchEntity{ graphRepo: graphRepository, chunkRepo: chunkRepository, knowledgeRepo: knowledgeRepository, } eventManager.Register(res) return res } // ActivationEvents returns the event types this plugin handles func (p *PluginSearchEntity) ActivationEvents() []types.EventType { return []types.EventType{types.ENTITY_SEARCH} } // OnEvent handles search events in the chat pipeline func (p *PluginSearchEntity) OnEvent(ctx context.Context, eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError, ) *PluginError { entity := chatManage.Entity if len(entity) == 0 { logger.Infof(ctx, "No entity found") return next() } graph, err := p.graphRepo.SearchNode(ctx, types.NameSpace{KnowledgeBase: chatManage.KnowledgeBaseID}, entity) if err != nil { logger.Errorf(ctx, "Failed to search node, session_id: %s, error: %v", chatManage.SessionID, err) return next() } chatManage.GraphResult = graph logger.Infof(ctx, "search entity result count: %d", len(graph.Node)) // graphStr, _ := json.Marshal(graph) // logger.Debugf(ctx, "search entity result: %s", string(graphStr)) chunkIDs := filterSeenChunk(ctx, graph, chatManage.SearchResult) if len(chunkIDs) == 0 { logger.Infof(ctx, "No new chunk found") return next() } chunks, err := p.chunkRepo.ListChunksByID(ctx, ctx.Value(types.TenantIDContextKey).(uint), chunkIDs) if err != nil { logger.Errorf(ctx, "Failed to list chunks, session_id: %s, error: %v", chatManage.SessionID, err) return next() } knowledgeIDs := []string{} for _, chunk := range chunks { knowledgeIDs = append(knowledgeIDs, chunk.KnowledgeID) } knowledges, err := p.knowledgeRepo.GetKnowledgeBatch(ctx, ctx.Value(types.TenantIDContextKey).(uint), knowledgeIDs) if err != nil { logger.Errorf(ctx, "Failed to list knowledge, session_id: %s, error: %v", chatManage.SessionID, err) return next() } knowledgeMap := map[string]*types.Knowledge{} for _, knowledge := range knowledges { knowledgeMap[knowledge.ID] = knowledge } for _, chunk := range chunks { searchResult := chunk2SearchResult(chunk, knowledgeMap[chunk.KnowledgeID]) chatManage.SearchResult = append(chatManage.SearchResult, searchResult) } // remove duplicate results chatManage.SearchResult = removeDuplicateResults(chatManage.SearchResult) if len(chatManage.SearchResult) == 0 { logger.Infof(ctx, "No new search result, session_id: %s", chatManage.SessionID) return ErrSearchNothing } logger.Infof(ctx, "search entity result count: %d, session_id: %s", len(chatManage.SearchResult), chatManage.SessionID) return next() } func filterSeenChunk(ctx context.Context, graph *types.GraphData, searchResult []*types.SearchResult) []string { seen := map[string]bool{} for _, chunk := range searchResult { seen[chunk.ID] = true } logger.Infof(ctx, "filterSeenChunk: seen count: %d", len(seen)) chunkIDs := []string{} for _, node := range graph.Node { for _, chunkID := range node.Chunks { if seen[chunkID] { continue } seen[chunkID] = true chunkIDs = append(chunkIDs, chunkID) } } logger.Infof(ctx, "filterSeenChunk: new chunkIDs count: %d", len(chunkIDs)) return chunkIDs } func chunk2SearchResult(chunk *types.Chunk, knowledge *types.Knowledge) *types.SearchResult { return &types.SearchResult{ ID: chunk.ID, Content: chunk.Content, KnowledgeID: chunk.KnowledgeID, ChunkIndex: chunk.ChunkIndex, KnowledgeTitle: knowledge.Title, StartAt: chunk.StartAt, EndAt: chunk.EndAt, Seq: chunk.ChunkIndex, Score: 1.0, MatchType: types.MatchTypeGraph, Metadata: knowledge.GetMetadata(), ChunkType: string(chunk.ChunkType), ParentChunkID: chunk.ParentChunkID, ImageInfo: chunk.ImageInfo, KnowledgeFilename: knowledge.FileName, KnowledgeSource: knowledge.Source, } }