mirror of
https://github.com/fish2018/pansou.git
synced 2025-11-25 03:14:59 +08:00
优化搜索tg频道标题匹配
This commit is contained in:
@@ -1063,7 +1063,8 @@ func mergeResultsByType(results []model.SearchResult, keyword string, cloudTypes
|
||||
source = "unknown"
|
||||
}
|
||||
|
||||
// 创建合并后的链接
|
||||
// 赋值给Note前,支持多个关键词裁剪
|
||||
title = util.CutTitleByKeywords(title, []string{"简介", "描述"})
|
||||
mergedLink := model.MergedLink{
|
||||
URL: link.URL,
|
||||
Password: link.Password,
|
||||
|
||||
@@ -540,6 +540,20 @@ func ParseSearchResults(html string, channel string) ([]model.SearchResult, stri
|
||||
return results, nextPageParam, nil
|
||||
}
|
||||
|
||||
// CutTitleByKeywords 根据关键词进行裁剪,保留最前关键词前的部分
|
||||
func CutTitleByKeywords(title string, keywords []string) string {
|
||||
minIdx := -1
|
||||
for _, kw := range keywords {
|
||||
if idx := strings.Index(title, kw); idx >= 0 && (minIdx == -1 || idx < minIdx) {
|
||||
minIdx = idx
|
||||
}
|
||||
}
|
||||
if minIdx > 0 {
|
||||
return strings.TrimSpace(title[:minIdx])
|
||||
}
|
||||
return strings.TrimSpace(title)
|
||||
}
|
||||
|
||||
// extractImageURLFromStyle 从CSS样式字符串中提取background-image的URL
|
||||
func extractImageURLFromStyle(style string) string {
|
||||
// 查找background-image:url('...') 或 background-image:url("...")
|
||||
@@ -624,5 +638,8 @@ func extractTitle(htmlContent string, textContent string) string {
|
||||
}
|
||||
|
||||
// 否则直接使用第一行作为标题
|
||||
return firstLine
|
||||
result := firstLine
|
||||
// 统一裁剪:遇到简介/描述等关键字时,只保留前半部分
|
||||
result = CutTitleByKeywords(result, []string{"简介", "描述"})
|
||||
return result
|
||||
}
|
||||
Reference in New Issue
Block a user