mirror of
https://github.com/Tencent/WeKnora.git
synced 2025-11-25 03:15:00 +08:00
137 lines
4.5 KiB
Go
137 lines
4.5 KiB
Go
|
|
package chatpipline
|
||
|
|
|
||
|
|
import (
|
||
|
|
"context"
|
||
|
|
|
||
|
|
"github.com/Tencent/WeKnora/internal/logger"
|
||
|
|
"github.com/Tencent/WeKnora/internal/types"
|
||
|
|
"github.com/Tencent/WeKnora/internal/types/interfaces"
|
||
|
|
)
|
||
|
|
|
||
|
|
// PluginSearch implements search functionality for chat pipeline
|
||
|
|
type PluginSearchEntity struct {
|
||
|
|
graphRepo interfaces.RetrieveGraphRepository
|
||
|
|
chunkRepo interfaces.ChunkRepository
|
||
|
|
knowledgeRepo interfaces.KnowledgeRepository
|
||
|
|
}
|
||
|
|
|
||
|
|
func NewPluginSearchEntity(
|
||
|
|
eventManager *EventManager,
|
||
|
|
graphRepository interfaces.RetrieveGraphRepository,
|
||
|
|
chunkRepository interfaces.ChunkRepository,
|
||
|
|
knowledgeRepository interfaces.KnowledgeRepository,
|
||
|
|
) *PluginSearchEntity {
|
||
|
|
res := &PluginSearchEntity{
|
||
|
|
graphRepo: graphRepository,
|
||
|
|
chunkRepo: chunkRepository,
|
||
|
|
knowledgeRepo: knowledgeRepository,
|
||
|
|
}
|
||
|
|
eventManager.Register(res)
|
||
|
|
return res
|
||
|
|
}
|
||
|
|
|
||
|
|
// ActivationEvents returns the event types this plugin handles
|
||
|
|
func (p *PluginSearchEntity) ActivationEvents() []types.EventType {
|
||
|
|
return []types.EventType{types.ENTITY_SEARCH}
|
||
|
|
}
|
||
|
|
|
||
|
|
// OnEvent handles search events in the chat pipeline
|
||
|
|
func (p *PluginSearchEntity) OnEvent(ctx context.Context,
|
||
|
|
eventType types.EventType, chatManage *types.ChatManage, next func() *PluginError,
|
||
|
|
) *PluginError {
|
||
|
|
entity := chatManage.Entity
|
||
|
|
if len(entity) == 0 {
|
||
|
|
logger.Infof(ctx, "No entity found")
|
||
|
|
return next()
|
||
|
|
}
|
||
|
|
|
||
|
|
graph, err := p.graphRepo.SearchNode(ctx, types.NameSpace{KnowledgeBase: chatManage.KnowledgeBaseID}, entity)
|
||
|
|
if err != nil {
|
||
|
|
logger.Errorf(ctx, "Failed to search node, session_id: %s, error: %v", chatManage.SessionID, err)
|
||
|
|
return next()
|
||
|
|
}
|
||
|
|
chatManage.GraphResult = graph
|
||
|
|
logger.Infof(ctx, "search entity result count: %d", len(graph.Node))
|
||
|
|
// graphStr, _ := json.Marshal(graph)
|
||
|
|
// logger.Debugf(ctx, "search entity result: %s", string(graphStr))
|
||
|
|
|
||
|
|
chunkIDs := filterSeenChunk(ctx, graph, chatManage.SearchResult)
|
||
|
|
if len(chunkIDs) == 0 {
|
||
|
|
logger.Infof(ctx, "No new chunk found")
|
||
|
|
return next()
|
||
|
|
}
|
||
|
|
chunks, err := p.chunkRepo.ListChunksByID(ctx, ctx.Value(types.TenantIDContextKey).(uint), chunkIDs)
|
||
|
|
if err != nil {
|
||
|
|
logger.Errorf(ctx, "Failed to list chunks, session_id: %s, error: %v", chatManage.SessionID, err)
|
||
|
|
return next()
|
||
|
|
}
|
||
|
|
knowledgeIDs := []string{}
|
||
|
|
for _, chunk := range chunks {
|
||
|
|
knowledgeIDs = append(knowledgeIDs, chunk.KnowledgeID)
|
||
|
|
}
|
||
|
|
knowledges, err := p.knowledgeRepo.GetKnowledgeBatch(ctx, ctx.Value(types.TenantIDContextKey).(uint), knowledgeIDs)
|
||
|
|
if err != nil {
|
||
|
|
logger.Errorf(ctx, "Failed to list knowledge, session_id: %s, error: %v", chatManage.SessionID, err)
|
||
|
|
return next()
|
||
|
|
}
|
||
|
|
|
||
|
|
knowledgeMap := map[string]*types.Knowledge{}
|
||
|
|
for _, knowledge := range knowledges {
|
||
|
|
knowledgeMap[knowledge.ID] = knowledge
|
||
|
|
}
|
||
|
|
for _, chunk := range chunks {
|
||
|
|
searchResult := chunk2SearchResult(chunk, knowledgeMap[chunk.KnowledgeID])
|
||
|
|
chatManage.SearchResult = append(chatManage.SearchResult, searchResult)
|
||
|
|
}
|
||
|
|
// remove duplicate results
|
||
|
|
chatManage.SearchResult = removeDuplicateResults(chatManage.SearchResult)
|
||
|
|
if len(chatManage.SearchResult) == 0 {
|
||
|
|
logger.Infof(ctx, "No new search result, session_id: %s", chatManage.SessionID)
|
||
|
|
return ErrSearchNothing
|
||
|
|
}
|
||
|
|
logger.Infof(ctx, "search entity result count: %d, session_id: %s", len(chatManage.SearchResult), chatManage.SessionID)
|
||
|
|
return next()
|
||
|
|
}
|
||
|
|
|
||
|
|
func filterSeenChunk(ctx context.Context, graph *types.GraphData, searchResult []*types.SearchResult) []string {
|
||
|
|
seen := map[string]bool{}
|
||
|
|
for _, chunk := range searchResult {
|
||
|
|
seen[chunk.ID] = true
|
||
|
|
}
|
||
|
|
logger.Infof(ctx, "filterSeenChunk: seen count: %d", len(seen))
|
||
|
|
|
||
|
|
chunkIDs := []string{}
|
||
|
|
for _, node := range graph.Node {
|
||
|
|
for _, chunkID := range node.Chunks {
|
||
|
|
if seen[chunkID] {
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
seen[chunkID] = true
|
||
|
|
chunkIDs = append(chunkIDs, chunkID)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
logger.Infof(ctx, "filterSeenChunk: new chunkIDs count: %d", len(chunkIDs))
|
||
|
|
return chunkIDs
|
||
|
|
}
|
||
|
|
|
||
|
|
func chunk2SearchResult(chunk *types.Chunk, knowledge *types.Knowledge) *types.SearchResult {
|
||
|
|
return &types.SearchResult{
|
||
|
|
ID: chunk.ID,
|
||
|
|
Content: chunk.Content,
|
||
|
|
KnowledgeID: chunk.KnowledgeID,
|
||
|
|
ChunkIndex: chunk.ChunkIndex,
|
||
|
|
KnowledgeTitle: knowledge.Title,
|
||
|
|
StartAt: chunk.StartAt,
|
||
|
|
EndAt: chunk.EndAt,
|
||
|
|
Seq: chunk.ChunkIndex,
|
||
|
|
Score: 1.0,
|
||
|
|
MatchType: types.MatchTypeGraph,
|
||
|
|
Metadata: knowledge.GetMetadata(),
|
||
|
|
ChunkType: string(chunk.ChunkType),
|
||
|
|
ParentChunkID: chunk.ParentChunkID,
|
||
|
|
ImageInfo: chunk.ImageInfo,
|
||
|
|
KnowledgeFilename: knowledge.FileName,
|
||
|
|
KnowledgeSource: knowledge.Source,
|
||
|
|
}
|
||
|
|
}
|