优化tg搜索

This commit is contained in:
www.xueximeng.com
2025-11-08 15:04:40 +08:00
parent b7ace90dec
commit e565502e83
8 changed files with 342 additions and 65 deletions

View File

@@ -61,8 +61,8 @@ ENV CACHE_PATH=/app/cache \
ASYNC_MAX_BACKGROUND_WORKERS=20 \
ASYNC_MAX_BACKGROUND_TASKS=100 \
ASYNC_CACHE_TTL_HOURS=1 \
CHANNELS=tgsearchers3,Aliyun_4K_Movies,bdbdndn11,yunpanx,bsbdbfjfjff,yp123pan,sbsbsnsqq,yunpanxunlei,tianyifc,BaiduCloudDisk,txtyzy,peccxinpd,gotopan,PanjClub,kkxlzy,baicaoZY,MCPH01,bdwpzhpd,ysxb48,jdjdn1111,yggpan,MCPH086,zaihuayun,Q66Share,ucwpzy,shareAliyun,alyp_1,dianyingshare,Quark_Movies,XiangxiuNBB,ydypzyfx,ucquark,xx123pan,yingshifenxiang123,zyfb123,tyypzhpd,tianyirigeng,cloudtianyi,hdhhd21,Lsp115,oneonefivewpfx,qixingzhenren,taoxgzy,Channel_Shares_115,tyysypzypd,vip115hot,wp123zy,yunpan139,yunpan189,yunpanuc,yydf_hzl,leoziyuan,pikpakpan,Q_dongman,yoyokuakeduanju,TG654TG,WFYSFX02,QukanMovie,yeqingjie_GJG666,movielover8888_film3,Baidu_netdisk,D_wusun,FLMdongtianfudi,KaiPanshare,QQZYDAPP,rjyxfx,PikPak_Share_Channel,btzhi,newproductsourcing,cctv1211,duan_ju,QuarkFree,yunpanNB,kkdj001,xxzlzn,pxyunpanxunlei,jxwpzy,kuakedongman,liangxingzhinan,xiangnikanj,solidsexydoll,guoman4K,zdqxm,kduanju,cilidianying,CBduanju,SharePanFilms,dzsgx,BooksRealm,Oscar_4Kmovies \
ENABLED_PLUGINS=labi,zhizhen,shandian,duoduo,muou,wanou,hunhepan,jikepan,panwiki,pansearch,panta,qupansou,hdr4k,pan666,susu,thepiratebay,xuexizhinan,panyq,ouge,huban,cyg,erxiao,miaoso,fox4k,pianku,clmao,wuji,cldi,xiaozhang,libvio,leijing,xb6v,xys,ddys,hdmoli,yuhuage,u3c3,javdb,clxiong,jutoushe,sdso,xiaoji,xdyh,haisou,bixin,djgou,nyaa,xinjuc,aikanzy,qupanshe,xdpan,discourse,yunsou \
CHANNELS=tgsearchers4,Aliyun_4K_Movies,bdbdndn11,yunpanx,bsbdbfjfjff,yp123pan,sbsbsnsqq,yunpanxunlei,tianyifc,BaiduCloudDisk,txtyzy,peccxinpd,gotopan,PanjClub,kkxlzy,baicaoZY,MCPH01,MCPH02,MCPH03,bdwpzhpd,ysxb48,jdjdn1111,yggpan,MCPH086,zaihuayun,Q66Share,ucwpzy,shareAliyun,alyp_1,dianyingshare,Quark_Movies,XiangxiuNBB,ydypzyfx,ucquark,xx123pan,yingshifenxiang123,zyfb123,tyypzhpd,tianyirigeng,cloudtianyi,hdhhd21,Lsp115,oneonefivewpfx,qixingzhenren,taoxgzy,Channel_Shares_115,tyysypzypd,vip115hot,wp123zy,yunpan139,yunpan189,yunpanuc,yydf_hzl,leoziyuan,pikpakpan,Q_dongman,yoyokuakeduanju,TG654TG,WFYSFX02,QukanMovie,yeqingjie_GJG666,movielover8888_film3,Baidu_netdisk,D_wusun,FLMdongtianfudi,KaiPanshare,QQZYDAPP,rjyxfx,PikPak_Share_Channel,btzhi,newproductsourcing,cctv1211,duan_ju,QuarkFree,yunpanNB,kkdj001,xxzlzn,pxyunpanxunlei,jxwpzy,kuakedongman,liangxingzhinan,xiangnikanj,solidsexydoll,guoman4K,zdqxm,kduanju,cilidianying,CBduanju,SharePanFilms,dzsgx,BooksRealm,Oscar_4Kmovies,douerpan,baidu_yppan,Q_jilupian,Netdisk_Movies,yunpanquark,ammmziyuan,ciliziyuanku,cili8888,jzmm_123pan \
ENABLED_PLUGINS=labi,zhizhen,shandian,duoduo,muou,wanou,hunhepan,jikepan,panwiki,pansearch,panta,qupansou,hdr4k,pan666,susu,thepiratebay,xuexizhinan,panyq,ouge,huban,cyg,erxiao,miaoso,fox4k,pianku,clmao,wuji,cldi,xiaozhang,libvio,leijing,xb6v,xys,ddys,hdmoli,yuhuage,u3c3,javdb,clxiong,jutoushe,sdso,xiaoji,xdyh,haisou,bixin,djgou,nyaa,xinjuc,aikanzy,qupanshe,xdpan,discourse,yunsou,qqpd,ahhhhfs,nsgame,gying,quark4k,quarksoo,sousou,ash \
AUTH_ENABLED=false \
AUTH_TOKEN_EXPIRY=24

View File

@@ -27,21 +27,14 @@ PanSou 还提供了一个基于 [Model Context Protocol (MCP)](https://modelcont
本项目,并点上 Star !!!
### 使用Docker部署
[qqpd搜索插件文档](plugin/qqpd/README.md)
[gying搜索插件文档](plugin/gying/README.md)
[常见问题总结](https://github.com/fish2018/pansou/issues/46)
[TG资源频道列表](https://github.com/fish2018/pansou/issues/4)
[gying插件文档](https://github.com/fish2018/pansou/blob/main/plugin/gying/README.md)
<details>
<summary>插件列表(请务必按需加载)</summary>
<pre>
export ENABLED_PLUGINS=hunhepan,jikepan,panwiki,pansearch,panta,qupansou,
susu,thepiratebay,wanou,xuexizhinan,panyq,zhizhen,labi,muou,ouge,shandian,
duoduo,huban,cyg,erxiao,miaoso,fox4k,pianku,clmao,wuji,cldi,xiaozhang,
libvio,leijing,xb6v,xys,ddys,hdmoli,yuhuage,u3c3,javdb,clxiong,jutoushe,
sdso,xiaoji,xdyh,haisou,bixin,djgou,nyaa,xinjuc,aikanzy,qupanshe,xdpan,
discourse,yunsou,ahhhhfs,nsgame,gying,quark4k,quarksoo,sousou,ash
export ENABLED_PLUGINS=hunhepan,jikepan,panwiki,pansearch,panta,qupansou,hdr4k,pan666,susu,thepiratebay,wanou,xuexizhinan,panyq,zhizhen,labi,muou,ouge,shandian,duoduo,huban,cyg,erxiao,miaoso,fox4k,pianku,clmao,wuji,cldi,xiaozhang,libvio,leijing,xb6v,xys,ddys,hdmoli,yuhuage,u3c3,javdb,clxiong,jutoushe,sdso,xiaoji,xdyh,haisou,bixin,djgou,nyaa,xinjuc,aikanzy,qupanshe,xdpan,discourse,yunsou,qqpd,gying,quark4k,quarksoo,sousou
</pre>
</details>
@@ -52,7 +45,7 @@ discourse,yunsou,ahhhhfs,nsgame,gying,quark4k,quarksoo,sousou,ash
一键启动,开箱即用
```
docker run -d --name pansou -p 80:80 -e ENABLED_PLUGINS="labi,zhizhen,shandian,duoduo,muou,wanou" ghcr.io/fish2018/pansou-web
docker run -d --name pansou -p 80:80 ghcr.io/fish2018/pansou-web
```
##### 使用Docker Compose推荐
@@ -72,7 +65,7 @@ docker-compose logs -f
##### 直接使用Docker命令
```bash
docker run -d --name pansou -p 8888:8888 -v pansou-cache:/app/cache -e CHANNELS="tgsearchers3,xxx" -e ENABLED_PLUGINS="labi,zhizhen,shandian,duoduo,muou,wanou" ghcr.io/fish2018/pansou:latest
docker run -d --name pansou -p 8888:8888 ghcr.io/fish2018/pansou:latest
```
##### 使用Docker Compose推荐
@@ -213,7 +206,7 @@ CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags="-s -w -extldflags '-sta
```
[program:pansou]
environment=PORT=8888,CHANNELS="tgsearchers3,Aliyun_4K_Movies,bdbdndn11,yunpanx,bsbdbfjfjff,yp123pan,sbsbsnsqq,yunpanxunlei,tianyifc,BaiduCloudDisk,txtyzy,peccxinpd,gotopan,PanjClub,kkxlzy,baicaoZY,MCPH01,bdwpzhpd,ysxb48,jdjdn1111,yggpan,MCPH086,zaihuayun,Q66Share,Oscar_4Kmovies,ucwpzy,shareAliyun,alyp_1,dianyingshare,Quark_Movies,XiangxiuNBB,ydypzyfx,ucquark,xx123pan,yingshifenxiang123,zyfb123,tyypzhpd,tianyirigeng,cloudtianyi,hdhhd21,Lsp115,oneonefivewpfx,qixingzhenren,taoxgzy,Channel_Shares_115,tyysypzypd,vip115hot,wp123zy,yunpan139,yunpan189,yunpanuc,yydf_hzl,leoziyuan,pikpakpan,Q_dongman,yoyokuakeduanju",ENABLED_PLUGINS="labi,zhizhen,shandian,duoduo,muou"
environment=PORT=8888,CHANNELS="tgsearchers4,Aliyun_4K_Movies,bdbdndn11,yunpanx,bsbdbfjfjff,yp123pan,sbsbsnsqq,yunpanxunlei,tianyifc,BaiduCloudDisk,txtyzy,peccxinpd,gotopan,PanjClub,kkxlzy,baicaoZY,MCPH01,bdwpzhpd,ysxb48,jdjdn1111,yggpan,MCPH086,zaihuayun,Q66Share,Oscar_4Kmovies,ucwpzy,shareAliyun,alyp_1,dianyingshare,Quark_Movies,XiangxiuNBB,ydypzyfx,ucquark,xx123pan,yingshifenxiang123,zyfb123,tyypzhpd,tianyirigeng,cloudtianyi,hdhhd21,Lsp115,oneonefivewpfx,qixingzhenren,taoxgzy,Channel_Shares_115,tyysypzypd,vip115hot,wp123zy,yunpan139,yunpan189,yunpanuc,yydf_hzl,leoziyuan,pikpakpan,Q_dongman,yoyokuakeduanju",ENABLED_PLUGINS="labi,zhizhen,shandian,duoduo,muou"
command=/home/work/pansou/pansou
directory=/home/work/pansou
autostart=true
@@ -485,7 +478,9 @@ curl "http://localhost:8888/api/search?kw=速度与激情&res=merge" \
{
"type": "baidu",
"url": "https://pan.baidu.com/s/1abcdef",
"password": "1234"
"password": "1234",
"datetime": "2023-06-10T14:23:45Z",
"work_title": "速度与激情全集1-10"
}
],
"tags": ["电影", "合集"],
@@ -529,11 +524,37 @@ curl "http://localhost:8888/api/search?kw=速度与激情&res=merge" \
**字段说明**
**SearchResult对象**
- `message_id`: 消息ID
- `unique_id`: 全局唯一标识符
- `channel`: 来源频道名称
- `datetime`: 消息发布时间
- `title`: 消息标题
- `content`: 消息内容
- `links`: 网盘链接数组
- `tags`: 标签数组(可选)
- `images`: TG消息中的图片链接数组可选
**Link对象**
- `type`: 网盘类型baidu、quark、aliyun等
- `url`: 网盘链接地址
- `password`: 提取码/密码
- `datetime`: 链接更新时间(可选)
- `work_title`: 作品标题(可选)
- 用于区分同一消息中多个作品的链接
- 当一条消息包含≤4个链接时所有链接使用相同的work_title
- 当一条消息包含>4个链接时系统会智能识别每个链接对应的作品标题
**MergedLink对象**
- `url`: 网盘链接地址
- `password`: 提取码/密码
- `note`: 资源说明/标题
- `datetime`: 链接更新时间
- `source`: 数据来源标识
- `tg:频道名称`: 来自Telegram频道
- `plugin:插件名`: 来自指定插件
- `unknown`: 未知来源
- `images`: TG消息中的图片链接数组可选字段
- `images`: TG消息中的图片链接数组可选
- 仅在来源为Telegram频道且消息包含图片时出现

View File

@@ -112,7 +112,7 @@ func Init() {
func getDefaultChannels() []string {
channelsEnv := os.Getenv("CHANNELS")
if channelsEnv == "" {
return []string{"tgsearchers3"}
return []string{"tgsearchers4"}
}
return strings.Split(channelsEnv, ",")
}

View File

@@ -9,9 +9,9 @@ services:
- "8888:8888"
environment:
- PORT=8888
- CHANNELS=tgsearchers3,Aliyun_4K_Movies,bdbdndn11,yunpanx,bsbdbfjfjff,yp123pan,sbsbsnsqq,yunpanxunlei,tianyifc,BaiduCloudDisk,txtyzy,peccxinpd,gotopan,PanjClub,kkxlzy,baicaoZY,MCPH01,bdwpzhpd,ysxb48,jdjdn1111,yggpan,MCPH086,zaihuayun,Q66Share,ucwpzy,shareAliyun,alyp_1,dianyingshare,Quark_Movies,XiangxiuNBB,ydypzyfx,ucquark,xx123pan,yingshifenxiang123,zyfb123,tyypzhpd,tianyirigeng,cloudtianyi,hdhhd21,Lsp115,oneonefivewpfx,qixingzhenren,taoxgzy,Channel_Shares_115,tyysypzypd,vip115hot,wp123zy,yunpan139,yunpan189,yunpanuc,yydf_hzl,leoziyuan,pikpakpan,Q_dongman,yoyokuakeduanju,TG654TG,WFYSFX02,QukanMovie,yeqingjie_GJG666,movielover8888_film3,Baidu_netdisk,D_wusun,FLMdongtianfudi,KaiPanshare,QQZYDAPP,rjyxfx,PikPak_Share_Channel,btzhi,newproductsourcing,cctv1211,duan_ju,QuarkFree,yunpanNB,kkdj001,xxzlzn,pxyunpanxunlei,jxwpzy,kuakedongman,liangxingzhinan,xiangnikanj,solidsexydoll,guoman4K,zdqxm,kduanju,cilidianying,CBduanju,SharePanFilms,dzsgx,BooksRealm
- CHANNELS=tgsearchers4,Aliyun_4K_Movies,bdbdndn11,yunpanx,bsbdbfjfjff,yp123pan,sbsbsnsqq,yunpanxunlei,tianyifc,BaiduCloudDisk,txtyzy,peccxinpd,gotopan,PanjClub,kkxlzy,baicaoZY,MCPH01,MCPH02,MCPH03,bdwpzhpd,ysxb48,jdjdn1111,yggpan,MCPH086,zaihuayun,Q66Share,ucwpzy,shareAliyun,alyp_1,dianyingshare,Quark_Movies,XiangxiuNBB,ydypzyfx,ucquark,xx123pan,yingshifenxiang123,zyfb123,tyypzhpd,tianyirigeng,cloudtianyi,hdhhd21,Lsp115,oneonefivewpfx,qixingzhenren,taoxgzy,Channel_Shares_115,tyysypzypd,vip115hot,wp123zy,yunpan139,yunpan189,yunpanuc,yydf_hzl,leoziyuan,pikpakpan,Q_dongman,yoyokuakeduanju,TG654TG,WFYSFX02,QukanMovie,yeqingjie_GJG666,movielover8888_film3,Baidu_netdisk,D_wusun,FLMdongtianfudi,KaiPanshare,QQZYDAPP,rjyxfx,PikPak_Share_Channel,btzhi,newproductsourcing,cctv1211,duan_ju,QuarkFree,yunpanNB,kkdj001,xxzlzn,pxyunpanxunlei,jxwpzy,kuakedongman,liangxingzhinan,xiangnikanj,solidsexydoll,guoman4K,zdqxm,kduanju,cilidianying,CBduanju,SharePanFilms,dzsgx,BooksRealm,Oscar_4Kmovies,douerpan,baidu_yppan,Q_jilupian,Netdisk_Movies,yunpanquark,ammmziyuan,ciliziyuanku,cili8888,jzmm_123pan
# 必须指定启用的插件,多个插件用逗号分隔
- ENABLED_PLUGINS=labi,zhizhen,shandian,duoduo,muou,wanou,hunhepan,jikepan,panwiki,pansearch,panta,qupansou,hdr4k,pan666,susu,thepiratebay,xuexizhinan,panyq,ouge,huban,cyg,erxiao,miaoso,fox4k,pianku,clmao,wuji,cldi,xiaozhang,libvio,leijing,xb6v,xys,ddys,hdmoli,yuhuage,u3c3,javdb,clxiong,jutoushe,sdso,xiaoji,xdyh,haisou,bixin,djgou,nyaa,xinjuc,aikanzy,qupanshe,xdpan,discourse,yunsou
- ENABLED_PLUGINS=labi,zhizhen,shandian,duoduo,muou,wanou,hunhepan,jikepan,panwiki,pansearch,panta,qupansou,hdr4k,pan666,susu,thepiratebay,xuexizhinan,panyq,ouge,huban,cyg,erxiao,miaoso,fox4k,pianku,clmao,wuji,cldi,xiaozhang,libvio,leijing,xb6v,xys,ddys,hdmoli,yuhuage,u3c3,javdb,clxiong,jutoushe,sdso,xiaoji,xdyh,haisou,bixin,djgou,nyaa,xinjuc,aikanzy,qupanshe,xdpan,discourse,yunsou,qqpd,ahhhhfs,nsgame,gying,quark4k,quarksoo,sousou,ash
- CACHE_ENABLED=true
- CACHE_PATH=/app/cache
- CACHE_MAX_SIZE=100

View File

@@ -4,10 +4,11 @@ import "time"
// Link 网盘链接
type Link struct {
Type string `json:"type" sonic:"type"`
URL string `json:"url" sonic:"url"`
Password string `json:"password" sonic:"password"`
Datetime time.Time `json:"datetime,omitempty" sonic:"datetime,omitempty"` // 链接更新时间(可选)
Type string `json:"type" sonic:"type"`
URL string `json:"url" sonic:"url"`
Password string `json:"password" sonic:"password"`
Datetime time.Time `json:"datetime,omitempty" sonic:"datetime,omitempty"` // 链接更新时间(可选)
WorkTitle string `json:"work_title,omitempty" sonic:"work_title,omitempty"` // 作品标题(用于区分同一消息中多个作品的链接)
}
// SearchResult 搜索结果

View File

@@ -906,14 +906,59 @@ func extractTitleFromLinkLine(line string) string {
return ""
}
// 判断是否为链接前缀词
// 判断是否为链接前缀词(包括网盘名称)
func isLinkPrefix(text string) bool {
text = strings.ToLower(strings.TrimSpace(text))
return text == "链接" ||
text == "地址" ||
text == "资源地址" ||
text == "网盘" ||
text == "网盘地址"
// 标准链接前缀词
if text == "链接" ||
text == "地址" ||
text == "资源地址" ||
text == "网盘" ||
text == "网盘地址" {
return true
}
// 网盘名称(防止误将网盘名称当作标题)
cloudDiskNames := []string{
// 夸克网盘
"夸克", "夸克网盘", "quark", "夸克云盘",
// 百度网盘
"百度", "百度网盘", "baidu", "百度云", "bdwp", "bdpan",
// 迅雷网盘
"迅雷", "迅雷网盘", "xunlei", "迅雷云盘",
// 115网盘
"115", "115网盘", "115云盘",
// 123网盘
"123", "123pan", "123网盘", "123云盘",
// 阿里云盘
"阿里", "阿里云", "阿里云盘", "aliyun", "alipan", "阿里网盘",
// 天翼云盘
"天翼", "天翼云", "天翼云盘", "tianyi", "天翼网盘",
// UC网盘
"uc", "uc网盘", "uc云盘",
// 移动云盘
"移动", "移动云", "移动云盘", "caiyun", "彩云",
// PikPak
"pikpak", "pikpak网盘",
}
for _, name := range cloudDiskNames {
if text == name {
return true
}
}
return false
}
// 清理标题文本
@@ -1010,18 +1055,24 @@ func mergeResultsByType(results []model.SearchResult, keyword string, cloudTypes
}
for _, link := range result.Links {
// 尝试从映射中获取该链接对应的标题
// 优先使用链接的WorkTitle字段如果为空则回退到传统方式
title := result.Title // 默认使用消息标题
// 查找完全匹配的链接
if specificTitle, found := linkTitleMap[link.URL]; found && specificTitle != "" {
title = specificTitle // 如果找到特定标题,则使用它
if link.WorkTitle != "" {
// 如果链接有WorkTitle字段优先使用
title = link.WorkTitle
} else {
// 如果没有找到完全匹配的链接,尝试查找前缀匹配的链接
for mappedLink, mappedTitle := range linkTitleMap {
if strings.HasPrefix(mappedLink, link.URL) {
title = mappedTitle
break
// 如果没有WorkTitle使用传统方式从映射中获取该链接对应的标题
// 查找完全匹配的链接
if specificTitle, found := linkTitleMap[link.URL]; found && specificTitle != "" {
title = specificTitle // 如果找到特定标题,则使用它
} else {
// 如果没有找到完全匹配的链接,尝试查找前缀匹配的链接
for mappedLink, mappedTitle := range linkTitleMap {
if strings.HasPrefix(mappedLink, link.URL) {
title = mappedTitle
break
}
}
}
}
@@ -1507,4 +1558,3 @@ func calculateTimeScore(datetime time.Time) float64 {
}

View File

@@ -523,6 +523,9 @@ func ParseSearchResults(html string, channel string) ([]model.SearchResult, stri
// 只有包含链接的消息才添加到结果中
if len(links) > 0 {
// 为每个链接提取作品标题
links = extractWorkTitlesForLinks(links, messageText, title)
results = append(results, model.SearchResult{
MessageID: messageID,
UniqueID: uniqueID,
@@ -619,7 +622,12 @@ func extractTitle(htmlContent string, textContent string) string {
return strings.TrimSpace(firstLine[len("名称:"):])
}
return firstLine
// 如果第一行只是标签(以#开头),尝试从第二行提取
if strings.HasPrefix(firstLine, "#") && !strings.Contains(firstLine, "名称") {
// 继续从文本内容提取
} else {
return firstLine
}
}
}
@@ -632,6 +640,31 @@ func extractTitle(htmlContent string, textContent string) string {
// 第一行通常是标题
firstLine := strings.TrimSpace(lines[0])
// 如果第一行只是标签(以#开头且不包含实际内容),尝试从第二行或"名称:"字段提取
if strings.HasPrefix(firstLine, "#") {
// 检查是否有"名称:"字段
for _, line := range lines {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "名称:") {
return strings.TrimSpace(line[len("名称:"):])
}
}
// 如果没有"名称:"字段,尝试使用第二行
if len(lines) > 1 {
secondLine := strings.TrimSpace(lines[1])
if strings.HasPrefix(secondLine, "名称:") {
return strings.TrimSpace(secondLine[len("名称:"):])
}
// 如果第二行不是空的且不是标签,使用第二行
if secondLine != "" && !strings.HasPrefix(secondLine, "#") {
result := secondLine
result = CutTitleByKeywords(result, []string{"简介", "描述"})
return result
}
}
}
// 如果第一行以"名称:"开头,则提取冒号后面的内容作为标题
if strings.HasPrefix(firstLine, "名称:") {
return strings.TrimSpace(firstLine[len("名称:"):])
@@ -643,3 +676,169 @@ func extractTitle(htmlContent string, textContent string) string {
result = CutTitleByKeywords(result, []string{"简介", "描述"})
return result
}
// extractWorkTitlesForLinks 为每个链接提取作品标题
func extractWorkTitlesForLinks(links []model.Link, messageText string, defaultTitle string) []model.Link {
if len(links) == 0 {
return links
}
// 如果链接数量 <= 4认为是同一个作品的不同网盘链接
if len(links) <= 4 {
for i := range links {
links[i].WorkTitle = defaultTitle
}
return links
}
// 如果链接数量 > 4尝试为每个链接匹配具体的作品标题
lines := strings.Split(messageText, "\n")
// 检测是否是单行格式:"作品名丨网盘:链接" 或 "作品名 网盘:链接"
if isSingleLineFormat(lines) {
return extractWorkTitlesFromSingleLineFormat(links, lines, defaultTitle)
}
// 其他格式:尝试通过上下文匹配
return extractWorkTitlesFromContext(links, messageText, defaultTitle)
}
// isSingleLineFormat 检测是否是单行格式
func isSingleLineFormat(lines []string) bool {
singleLineCount := 0
for _, line := range lines {
line = strings.TrimSpace(line)
if line == "" {
continue
}
// 检测是否包含:"作品名丨网盘:链接" 或类似格式
if strings.Contains(line, "丨") && strings.Contains(line, "") && (strings.Contains(line, "http://") || strings.Contains(line, "https://")) {
singleLineCount++
}
}
// 如果超过一半的行都符合单行格式,则认为是单行格式
return singleLineCount > len(lines)/3
}
// extractWorkTitlesFromSingleLineFormat 从单行格式中提取作品标题
func extractWorkTitlesFromSingleLineFormat(links []model.Link, lines []string, defaultTitle string) []model.Link {
// 为每个链接构建URL到作品标题的映射
urlToWorkTitle := make(map[string]string)
for _, line := range lines {
line = strings.TrimSpace(line)
if line == "" {
continue
}
// 匹配格式: "作品名丨网盘名:链接" 或 "作品名 网盘名:链接"
// 提取作品名和链接
var workTitle string
var linkURL string
// 优先匹配 "作品名丨网盘:链接" 格式
if strings.Contains(line, "丨") {
parts := strings.Split(line, "丨")
if len(parts) >= 2 {
workTitle = strings.TrimSpace(parts[0])
// 从第二部分提取链接
restPart := parts[1]
if idx := strings.Index(restPart, "http"); idx >= 0 {
linkURL = extractFirstURL(restPart[idx:])
}
}
} else if strings.Contains(line, "") {
// 匹配 "作品名 网盘:链接" 格式
colonIdx := strings.Index(line, "")
if colonIdx > 0 {
beforeColon := line[:colonIdx]
afterColon := line[colonIdx+len(""):]
// 尝试从冒号前提取作品名(去除网盘名)
workTitle = extractWorkTitleBeforeColon(beforeColon)
// 从冒号后提取链接
if idx := strings.Index(afterColon, "http"); idx >= 0 {
linkURL = extractFirstURL(afterColon[idx:])
}
}
}
// 如果成功提取了作品名和链接,添加到映射
if workTitle != "" && linkURL != "" {
// 标准化URL用于匹配
normalizedURL := normalizeUrl(linkURL)
urlToWorkTitle[normalizedURL] = workTitle
}
}
// 为每个链接设置作品标题
for i := range links {
normalizedURL := normalizeUrl(links[i].URL)
if workTitle, found := urlToWorkTitle[normalizedURL]; found {
links[i].WorkTitle = workTitle
} else {
links[i].WorkTitle = defaultTitle
}
}
return links
}
// extractFirstURL 从文本中提取第一个URL
func extractFirstURL(text string) string {
// 提取到空格或换行符为止
endIdx := len(text)
if idx := strings.Index(text, " "); idx > 0 && idx < endIdx {
endIdx = idx
}
if idx := strings.Index(text, "\n"); idx > 0 && idx < endIdx {
endIdx = idx
}
if idx := strings.Index(text, "\r"); idx > 0 && idx < endIdx {
endIdx = idx
}
return strings.TrimSpace(text[:endIdx])
}
// extractWorkTitleBeforeColon 从冒号前的文本中提取作品名
func extractWorkTitleBeforeColon(text string) string {
text = strings.TrimSpace(text)
// 移除常见的网盘名称
netdiskNames := []string{
"夸克网盘", "夸克云盘", "夸克",
"百度网盘", "百度云盘", "百度云", "百度",
"迅雷网盘", "迅雷云盘", "迅雷",
"阿里云盘", "阿里网盘", "阿里云", "阿里",
"天翼云盘", "天翼网盘", "天翼云", "天翼",
"UC网盘", "UC云盘", "UC",
"移动云盘", "移动云", "移动",
"115网盘", "115云盘", "115",
"123网盘", "123云盘", "123",
"PikPak网盘", "PikPak",
"网盘", "云盘",
}
// 从右向左移除网盘名称
for _, name := range netdiskNames {
if strings.HasSuffix(text, name) {
text = strings.TrimSpace(text[:len(text)-len(name)])
break
}
}
return text
}
// extractWorkTitlesFromContext 通过上下文为链接提取作品标题
func extractWorkTitlesFromContext(links []model.Link, messageText string, defaultTitle string) []model.Link {
// 简单实现:如果无法精确匹配,则都使用默认标题
for i := range links {
links[i].WorkTitle = defaultTitle
}
return links
}

View File

@@ -13,24 +13,24 @@ var AllPanLinksPattern = regexp.MustCompile(`(?i)(?:(?:magnet:\?xt=urn:btih:[a-z
// 修改百度网盘链接正则表达式,确保只匹配到链接本身,不包含后面的文本
var BaiduPanPattern = regexp.MustCompile(`https?://pan\.baidu\.com/s/[a-zA-Z0-9_-]+(?:\?pwd=[a-zA-Z0-9]{4})?`)
var QuarkPanPattern = regexp.MustCompile(`https?://pan\.quark\.cn/s/[a-zA-Z0-9]+`)
var XunleiPanPattern = regexp.MustCompile(`https?://pan\.xunlei\.com/s/[a-zA-Z0-9]+(?:\?pwd=[a-zA-Z0-9]+)?(?:#)?`)
var XunleiPanPattern = regexp.MustCompile(`https?://pan\.xunlei\.com/s/[a-zA-Z0-9]+(?:\?pwd=[a-zA-Z0-9]{4})?(?:#)?`)
// 添加天翼云盘链接正则表达式 - 精确匹配支持URL编码的访问码
var TianyiPanPattern = regexp.MustCompile(`https?://cloud\.189\.cn/t/[a-zA-Z0-9]+(?:%[0-9A-Fa-f]{2})*(?:[^]*)?`)
// 添加UC网盘链接正则表达式
var UCPanPattern = regexp.MustCompile(`https?://drive\.uc\.cn/s/[a-zA-Z0-9]+(?:\?public=\d)?`)
// 添加123网盘链接正则表达式
var Pan123Pattern = regexp.MustCompile(`https?://(?:www\.)?123(?:684|685|912|pan|592)\.(?:com|cn)/s/[a-zA-Z0-9_-]+(?:\?(?:%E6%8F%90%E5%8F%96%E7%A0%81|提取码)[:][a-zA-Z0-9]+)?`)
var Pan123Pattern = regexp.MustCompile(`https?://(?:www\.)?123(?:684|865|685|912|pan|592)\.(?:com|cn)/s/[a-zA-Z0-9_-]+(?:\?(?:%E6%8F%90%E5%8F%96%E7%A0%81|提取码)[:][a-zA-Z0-9]+)?`)
// 添加115网盘链接正则表达式
var Pan115Pattern = regexp.MustCompile(`https?://(?:115\.com|115cdn\.com|anxia\.com)/s/[a-zA-Z0-9]+(?:\?password=[a-zA-Z0-9]{4})?(?:#)?`)
// 添加阿里云盘链接正则表达式
var AliyunPanPattern = regexp.MustCompile(`https?://(?:www\.)?(?:alipan|aliyundrive)\.com/s/[a-zA-Z0-9]+`)
// 提取码匹配正则表达式 - 增强提取密码的能力
var PasswordPattern = regexp.MustCompile(`(?i)(?:(?:提取|访问|提取密|密)码|pwd)[:]\s*([a-zA-Z0-9]{4})`)
var UrlPasswordPattern = regexp.MustCompile(`(?i)[?&]pwd=([a-zA-Z0-9]{4})`)
var PasswordPattern = regexp.MustCompile(`(?i)(?:(?:提取|访问|提取密|密)码|pwd)[:]\s*([a-zA-Z0-9]{4})(?:[^a-zA-Z0-9]|$)`)
var UrlPasswordPattern = regexp.MustCompile(`(?i)[?&]pwd=([a-zA-Z0-9]{4})(?:[^a-zA-Z0-9]|$)`)
// 百度网盘密码专用正则表达式 - 确保只提取4位密码
var BaiduPasswordPattern = regexp.MustCompile(`(?i)(?:链接:.*?提取码:|密码:|提取码:|pwd=|pwd:|pwd)([a-zA-Z0-9]{4})`)
var BaiduPasswordPattern = regexp.MustCompile(`(?i)(?:链接:.*?提取码:|密码:|提取码:|pwd=|pwd:|pwd)([a-zA-Z0-9]{4})(?:[^a-zA-Z0-9]|$)`)
// GetLinkType 获取链接类型
func GetLinkType(url string) string {
@@ -83,7 +83,7 @@ func GetLinkType(url string) string {
}
// 123网盘有多个域名
if strings.Contains(url, "123684.com") || strings.Contains(url, "123685.com") ||
if strings.Contains(url, "123684.com") || strings.Contains(url, "123685.com") || strings.Contains(url, "123865.com") ||
strings.Contains(url, "123912.com") || strings.Contains(url, "123pan.com") ||
strings.Contains(url, "123pan.cn") || strings.Contains(url, "123592.com") {
return "123"
@@ -115,22 +115,20 @@ func CleanBaiduPanURL(url string) string {
// 如果找到了结束标记,截取到结束标记位置
if minEndIdx < len(url) {
// 特殊处理pwd参数
if strings.Contains(url[:minEndIdx], "?pwd=") {
pwdIdx := strings.Index(url, "?pwd=")
pwdEndIdx := pwdIdx + 10 // ?pwd=xxxx 总共9个字符加上问号前的位置
if pwdEndIdx < len(url) {
return url[:pwdEndIdx]
}
}
return url[:minEndIdx]
url = url[:minEndIdx]
}
// 如果没有找到结束标记但URL包含?pwd=确保只保留4位密码
// 特殊处理pwd参数确保只保留4位密码
if strings.Contains(url, "?pwd=") {
pwdIdx := strings.Index(url, "?pwd=")
if pwdIdx > 0 && pwdIdx+9 <= len(url) { // ?pwd=xxxx 总共9个字符
return url[:pwdIdx+9]
if pwdIdx >= 0 && len(url) > pwdIdx+5 { // ?pwd= 有5个字符
// 只保留?pwd=后面的4位密码
pwdEndIdx := pwdIdx + 9 // ?pwd=xxxx 总共9个字符
if pwdEndIdx <= len(url) {
return url[:pwdEndIdx]
}
// 如果剩余字符不足4位返回所有可用字符
return url
}
}
}
@@ -218,7 +216,7 @@ func CleanUCPanURL(url string) string {
// Clean123PanURL 清理123网盘URL确保链接格式正确
func Clean123PanURL(url string) string {
// 检查是否为123网盘链接
domains := []string{"123684.com", "123685.com", "123912.com", "123pan.com", "123pan.cn", "123592.com"}
domains := []string{"123684.com", "123685.com","123865.com", "123912.com", "123pan.com", "123pan.cn", "123592.com"}
isDomain123 := false
for _, domain := range domains {
@@ -416,13 +414,18 @@ func ExtractPassword(content, url string) string {
}
}
// 特殊处理迅雷网盘URL中的pwd参数
if strings.Contains(url, "pan.xunlei.com") && strings.Contains(url, "?pwd=") {
pwdPattern := regexp.MustCompile(`\?pwd=([a-zA-Z0-9]{4})`)
pwdMatches := pwdPattern.FindStringSubmatch(url)
if len(pwdMatches) > 1 {
return pwdMatches[1]
}
}
// 先从URL中提取密码
matches := UrlPasswordPattern.FindStringSubmatch(url)
if len(matches) > 1 {
// 确保百度网盘密码只有4位
if strings.Contains(strings.ToLower(url), "pan.baidu.com") && len(matches[1]) > 4 {
return matches[1][:4]
}
return matches[1]
}
@@ -443,6 +446,7 @@ func ExtractPassword(content, url string) string {
// 特殊处理123网盘URL中的提取码
if (strings.Contains(url, "123684.com") ||
strings.Contains(url, "123685.com") ||
strings.Contains(url, "123865.com") ||
strings.Contains(url, "123912.com") ||
strings.Contains(url, "123pan.com") ||
strings.Contains(url, "123pan.cn") ||
@@ -460,6 +464,7 @@ func ExtractPassword(content, url string) string {
// 检查123网盘URL中的提取码参数
if (strings.Contains(url, "123684.com") ||
strings.Contains(url, "123685.com") ||
strings.Contains(url, "123865.com") ||
strings.Contains(url, "123912.com") ||
strings.Contains(url, "123pan.com") ||
strings.Contains(url, "123pan.cn") ||
@@ -766,6 +771,7 @@ func ExtractNetDiskLinks(text string) []string {
strings.Contains(cleanURL, "drive.uc.cn") ||
strings.Contains(cleanURL, "123684.com") ||
strings.Contains(cleanURL, "123685.com") ||
strings.Contains(cleanURL, "123865.com") ||
strings.Contains(cleanURL, "123912.com") ||
strings.Contains(cleanURL, "123pan.com") ||
strings.Contains(cleanURL, "123pan.cn") ||