mirror of
https://github.com/fish2018/pansou.git
synced 2025-11-24 19:12:50 +08:00
优化tg搜索
This commit is contained in:
@@ -61,8 +61,8 @@ ENV CACHE_PATH=/app/cache \
|
||||
ASYNC_MAX_BACKGROUND_WORKERS=20 \
|
||||
ASYNC_MAX_BACKGROUND_TASKS=100 \
|
||||
ASYNC_CACHE_TTL_HOURS=1 \
|
||||
CHANNELS=tgsearchers3,Aliyun_4K_Movies,bdbdndn11,yunpanx,bsbdbfjfjff,yp123pan,sbsbsnsqq,yunpanxunlei,tianyifc,BaiduCloudDisk,txtyzy,peccxinpd,gotopan,PanjClub,kkxlzy,baicaoZY,MCPH01,bdwpzhpd,ysxb48,jdjdn1111,yggpan,MCPH086,zaihuayun,Q66Share,ucwpzy,shareAliyun,alyp_1,dianyingshare,Quark_Movies,XiangxiuNBB,ydypzyfx,ucquark,xx123pan,yingshifenxiang123,zyfb123,tyypzhpd,tianyirigeng,cloudtianyi,hdhhd21,Lsp115,oneonefivewpfx,qixingzhenren,taoxgzy,Channel_Shares_115,tyysypzypd,vip115hot,wp123zy,yunpan139,yunpan189,yunpanuc,yydf_hzl,leoziyuan,pikpakpan,Q_dongman,yoyokuakeduanju,TG654TG,WFYSFX02,QukanMovie,yeqingjie_GJG666,movielover8888_film3,Baidu_netdisk,D_wusun,FLMdongtianfudi,KaiPanshare,QQZYDAPP,rjyxfx,PikPak_Share_Channel,btzhi,newproductsourcing,cctv1211,duan_ju,QuarkFree,yunpanNB,kkdj001,xxzlzn,pxyunpanxunlei,jxwpzy,kuakedongman,liangxingzhinan,xiangnikanj,solidsexydoll,guoman4K,zdqxm,kduanju,cilidianying,CBduanju,SharePanFilms,dzsgx,BooksRealm,Oscar_4Kmovies \
|
||||
ENABLED_PLUGINS=labi,zhizhen,shandian,duoduo,muou,wanou,hunhepan,jikepan,panwiki,pansearch,panta,qupansou,hdr4k,pan666,susu,thepiratebay,xuexizhinan,panyq,ouge,huban,cyg,erxiao,miaoso,fox4k,pianku,clmao,wuji,cldi,xiaozhang,libvio,leijing,xb6v,xys,ddys,hdmoli,yuhuage,u3c3,javdb,clxiong,jutoushe,sdso,xiaoji,xdyh,haisou,bixin,djgou,nyaa,xinjuc,aikanzy,qupanshe,xdpan,discourse,yunsou \
|
||||
CHANNELS=tgsearchers4,Aliyun_4K_Movies,bdbdndn11,yunpanx,bsbdbfjfjff,yp123pan,sbsbsnsqq,yunpanxunlei,tianyifc,BaiduCloudDisk,txtyzy,peccxinpd,gotopan,PanjClub,kkxlzy,baicaoZY,MCPH01,MCPH02,MCPH03,bdwpzhpd,ysxb48,jdjdn1111,yggpan,MCPH086,zaihuayun,Q66Share,ucwpzy,shareAliyun,alyp_1,dianyingshare,Quark_Movies,XiangxiuNBB,ydypzyfx,ucquark,xx123pan,yingshifenxiang123,zyfb123,tyypzhpd,tianyirigeng,cloudtianyi,hdhhd21,Lsp115,oneonefivewpfx,qixingzhenren,taoxgzy,Channel_Shares_115,tyysypzypd,vip115hot,wp123zy,yunpan139,yunpan189,yunpanuc,yydf_hzl,leoziyuan,pikpakpan,Q_dongman,yoyokuakeduanju,TG654TG,WFYSFX02,QukanMovie,yeqingjie_GJG666,movielover8888_film3,Baidu_netdisk,D_wusun,FLMdongtianfudi,KaiPanshare,QQZYDAPP,rjyxfx,PikPak_Share_Channel,btzhi,newproductsourcing,cctv1211,duan_ju,QuarkFree,yunpanNB,kkdj001,xxzlzn,pxyunpanxunlei,jxwpzy,kuakedongman,liangxingzhinan,xiangnikanj,solidsexydoll,guoman4K,zdqxm,kduanju,cilidianying,CBduanju,SharePanFilms,dzsgx,BooksRealm,Oscar_4Kmovies,douerpan,baidu_yppan,Q_jilupian,Netdisk_Movies,yunpanquark,ammmziyuan,ciliziyuanku,cili8888,jzmm_123pan \
|
||||
ENABLED_PLUGINS=labi,zhizhen,shandian,duoduo,muou,wanou,hunhepan,jikepan,panwiki,pansearch,panta,qupansou,hdr4k,pan666,susu,thepiratebay,xuexizhinan,panyq,ouge,huban,cyg,erxiao,miaoso,fox4k,pianku,clmao,wuji,cldi,xiaozhang,libvio,leijing,xb6v,xys,ddys,hdmoli,yuhuage,u3c3,javdb,clxiong,jutoushe,sdso,xiaoji,xdyh,haisou,bixin,djgou,nyaa,xinjuc,aikanzy,qupanshe,xdpan,discourse,yunsou,qqpd,ahhhhfs,nsgame,gying,quark4k,quarksoo,sousou,ash \
|
||||
AUTH_ENABLED=false \
|
||||
AUTH_TOKEN_EXPIRY=24
|
||||
|
||||
|
||||
51
README.md
51
README.md
@@ -27,21 +27,14 @@ PanSou 还提供了一个基于 [Model Context Protocol (MCP)](https://modelcont
|
||||
本项目,并点上 Star !!!
|
||||
|
||||
### 使用Docker部署
|
||||
[qqpd搜索插件文档](plugin/qqpd/README.md)
|
||||
[gying搜索插件文档](plugin/gying/README.md)
|
||||
[常见问题总结](https://github.com/fish2018/pansou/issues/46)
|
||||
|
||||
[TG资源频道列表](https://github.com/fish2018/pansou/issues/4)
|
||||
|
||||
[gying插件文档](https://github.com/fish2018/pansou/blob/main/plugin/gying/README.md)
|
||||
|
||||
<details>
|
||||
<summary>插件列表(请务必按需加载)</summary>
|
||||
<pre>
|
||||
export ENABLED_PLUGINS=hunhepan,jikepan,panwiki,pansearch,panta,qupansou,
|
||||
susu,thepiratebay,wanou,xuexizhinan,panyq,zhizhen,labi,muou,ouge,shandian,
|
||||
duoduo,huban,cyg,erxiao,miaoso,fox4k,pianku,clmao,wuji,cldi,xiaozhang,
|
||||
libvio,leijing,xb6v,xys,ddys,hdmoli,yuhuage,u3c3,javdb,clxiong,jutoushe,
|
||||
sdso,xiaoji,xdyh,haisou,bixin,djgou,nyaa,xinjuc,aikanzy,qupanshe,xdpan,
|
||||
discourse,yunsou,ahhhhfs,nsgame,gying,quark4k,quarksoo,sousou,ash
|
||||
export ENABLED_PLUGINS=hunhepan,jikepan,panwiki,pansearch,panta,qupansou,hdr4k,pan666,susu,thepiratebay,wanou,xuexizhinan,panyq,zhizhen,labi,muou,ouge,shandian,duoduo,huban,cyg,erxiao,miaoso,fox4k,pianku,clmao,wuji,cldi,xiaozhang,libvio,leijing,xb6v,xys,ddys,hdmoli,yuhuage,u3c3,javdb,clxiong,jutoushe,sdso,xiaoji,xdyh,haisou,bixin,djgou,nyaa,xinjuc,aikanzy,qupanshe,xdpan,discourse,yunsou,qqpd,gying,quark4k,quarksoo,sousou
|
||||
</pre>
|
||||
</details>
|
||||
|
||||
@@ -52,7 +45,7 @@ discourse,yunsou,ahhhhfs,nsgame,gying,quark4k,quarksoo,sousou,ash
|
||||
一键启动,开箱即用
|
||||
|
||||
```
|
||||
docker run -d --name pansou -p 80:80 -e ENABLED_PLUGINS="labi,zhizhen,shandian,duoduo,muou,wanou" ghcr.io/fish2018/pansou-web
|
||||
docker run -d --name pansou -p 80:80 ghcr.io/fish2018/pansou-web
|
||||
```
|
||||
|
||||
##### 使用Docker Compose(推荐)
|
||||
@@ -72,7 +65,7 @@ docker-compose logs -f
|
||||
##### 直接使用Docker命令
|
||||
|
||||
```bash
|
||||
docker run -d --name pansou -p 8888:8888 -v pansou-cache:/app/cache -e CHANNELS="tgsearchers3,xxx" -e ENABLED_PLUGINS="labi,zhizhen,shandian,duoduo,muou,wanou" ghcr.io/fish2018/pansou:latest
|
||||
docker run -d --name pansou -p 8888:8888 ghcr.io/fish2018/pansou:latest
|
||||
```
|
||||
|
||||
##### 使用Docker Compose(推荐)
|
||||
@@ -213,7 +206,7 @@ CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags="-s -w -extldflags '-sta
|
||||
|
||||
```
|
||||
[program:pansou]
|
||||
environment=PORT=8888,CHANNELS="tgsearchers3,Aliyun_4K_Movies,bdbdndn11,yunpanx,bsbdbfjfjff,yp123pan,sbsbsnsqq,yunpanxunlei,tianyifc,BaiduCloudDisk,txtyzy,peccxinpd,gotopan,PanjClub,kkxlzy,baicaoZY,MCPH01,bdwpzhpd,ysxb48,jdjdn1111,yggpan,MCPH086,zaihuayun,Q66Share,Oscar_4Kmovies,ucwpzy,shareAliyun,alyp_1,dianyingshare,Quark_Movies,XiangxiuNBB,ydypzyfx,ucquark,xx123pan,yingshifenxiang123,zyfb123,tyypzhpd,tianyirigeng,cloudtianyi,hdhhd21,Lsp115,oneonefivewpfx,qixingzhenren,taoxgzy,Channel_Shares_115,tyysypzypd,vip115hot,wp123zy,yunpan139,yunpan189,yunpanuc,yydf_hzl,leoziyuan,pikpakpan,Q_dongman,yoyokuakeduanju",ENABLED_PLUGINS="labi,zhizhen,shandian,duoduo,muou"
|
||||
environment=PORT=8888,CHANNELS="tgsearchers4,Aliyun_4K_Movies,bdbdndn11,yunpanx,bsbdbfjfjff,yp123pan,sbsbsnsqq,yunpanxunlei,tianyifc,BaiduCloudDisk,txtyzy,peccxinpd,gotopan,PanjClub,kkxlzy,baicaoZY,MCPH01,bdwpzhpd,ysxb48,jdjdn1111,yggpan,MCPH086,zaihuayun,Q66Share,Oscar_4Kmovies,ucwpzy,shareAliyun,alyp_1,dianyingshare,Quark_Movies,XiangxiuNBB,ydypzyfx,ucquark,xx123pan,yingshifenxiang123,zyfb123,tyypzhpd,tianyirigeng,cloudtianyi,hdhhd21,Lsp115,oneonefivewpfx,qixingzhenren,taoxgzy,Channel_Shares_115,tyysypzypd,vip115hot,wp123zy,yunpan139,yunpan189,yunpanuc,yydf_hzl,leoziyuan,pikpakpan,Q_dongman,yoyokuakeduanju",ENABLED_PLUGINS="labi,zhizhen,shandian,duoduo,muou"
|
||||
command=/home/work/pansou/pansou
|
||||
directory=/home/work/pansou
|
||||
autostart=true
|
||||
@@ -485,7 +478,9 @@ curl "http://localhost:8888/api/search?kw=速度与激情&res=merge" \
|
||||
{
|
||||
"type": "baidu",
|
||||
"url": "https://pan.baidu.com/s/1abcdef",
|
||||
"password": "1234"
|
||||
"password": "1234",
|
||||
"datetime": "2023-06-10T14:23:45Z",
|
||||
"work_title": "速度与激情全集1-10"
|
||||
}
|
||||
],
|
||||
"tags": ["电影", "合集"],
|
||||
@@ -529,11 +524,37 @@ curl "http://localhost:8888/api/search?kw=速度与激情&res=merge" \
|
||||
|
||||
**字段说明**:
|
||||
|
||||
**SearchResult对象**:
|
||||
- `message_id`: 消息ID
|
||||
- `unique_id`: 全局唯一标识符
|
||||
- `channel`: 来源频道名称
|
||||
- `datetime`: 消息发布时间
|
||||
- `title`: 消息标题
|
||||
- `content`: 消息内容
|
||||
- `links`: 网盘链接数组
|
||||
- `tags`: 标签数组(可选)
|
||||
- `images`: TG消息中的图片链接数组(可选)
|
||||
|
||||
**Link对象**:
|
||||
- `type`: 网盘类型(baidu、quark、aliyun等)
|
||||
- `url`: 网盘链接地址
|
||||
- `password`: 提取码/密码
|
||||
- `datetime`: 链接更新时间(可选)
|
||||
- `work_title`: 作品标题(可选)
|
||||
- 用于区分同一消息中多个作品的链接
|
||||
- 当一条消息包含≤4个链接时,所有链接使用相同的work_title
|
||||
- 当一条消息包含>4个链接时,系统会智能识别每个链接对应的作品标题
|
||||
|
||||
**MergedLink对象**:
|
||||
- `url`: 网盘链接地址
|
||||
- `password`: 提取码/密码
|
||||
- `note`: 资源说明/标题
|
||||
- `datetime`: 链接更新时间
|
||||
- `source`: 数据来源标识
|
||||
- `tg:频道名称`: 来自Telegram频道
|
||||
- `plugin:插件名`: 来自指定插件
|
||||
- `unknown`: 未知来源
|
||||
- `images`: TG消息中的图片链接数组(可选字段)
|
||||
- `images`: TG消息中的图片链接数组(可选)
|
||||
- 仅在来源为Telegram频道且消息包含图片时出现
|
||||
|
||||
|
||||
|
||||
@@ -112,7 +112,7 @@ func Init() {
|
||||
func getDefaultChannels() []string {
|
||||
channelsEnv := os.Getenv("CHANNELS")
|
||||
if channelsEnv == "" {
|
||||
return []string{"tgsearchers3"}
|
||||
return []string{"tgsearchers4"}
|
||||
}
|
||||
return strings.Split(channelsEnv, ",")
|
||||
}
|
||||
|
||||
@@ -9,9 +9,9 @@ services:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
- PORT=8888
|
||||
- CHANNELS=tgsearchers3,Aliyun_4K_Movies,bdbdndn11,yunpanx,bsbdbfjfjff,yp123pan,sbsbsnsqq,yunpanxunlei,tianyifc,BaiduCloudDisk,txtyzy,peccxinpd,gotopan,PanjClub,kkxlzy,baicaoZY,MCPH01,bdwpzhpd,ysxb48,jdjdn1111,yggpan,MCPH086,zaihuayun,Q66Share,ucwpzy,shareAliyun,alyp_1,dianyingshare,Quark_Movies,XiangxiuNBB,ydypzyfx,ucquark,xx123pan,yingshifenxiang123,zyfb123,tyypzhpd,tianyirigeng,cloudtianyi,hdhhd21,Lsp115,oneonefivewpfx,qixingzhenren,taoxgzy,Channel_Shares_115,tyysypzypd,vip115hot,wp123zy,yunpan139,yunpan189,yunpanuc,yydf_hzl,leoziyuan,pikpakpan,Q_dongman,yoyokuakeduanju,TG654TG,WFYSFX02,QukanMovie,yeqingjie_GJG666,movielover8888_film3,Baidu_netdisk,D_wusun,FLMdongtianfudi,KaiPanshare,QQZYDAPP,rjyxfx,PikPak_Share_Channel,btzhi,newproductsourcing,cctv1211,duan_ju,QuarkFree,yunpanNB,kkdj001,xxzlzn,pxyunpanxunlei,jxwpzy,kuakedongman,liangxingzhinan,xiangnikanj,solidsexydoll,guoman4K,zdqxm,kduanju,cilidianying,CBduanju,SharePanFilms,dzsgx,BooksRealm
|
||||
- CHANNELS=tgsearchers4,Aliyun_4K_Movies,bdbdndn11,yunpanx,bsbdbfjfjff,yp123pan,sbsbsnsqq,yunpanxunlei,tianyifc,BaiduCloudDisk,txtyzy,peccxinpd,gotopan,PanjClub,kkxlzy,baicaoZY,MCPH01,MCPH02,MCPH03,bdwpzhpd,ysxb48,jdjdn1111,yggpan,MCPH086,zaihuayun,Q66Share,ucwpzy,shareAliyun,alyp_1,dianyingshare,Quark_Movies,XiangxiuNBB,ydypzyfx,ucquark,xx123pan,yingshifenxiang123,zyfb123,tyypzhpd,tianyirigeng,cloudtianyi,hdhhd21,Lsp115,oneonefivewpfx,qixingzhenren,taoxgzy,Channel_Shares_115,tyysypzypd,vip115hot,wp123zy,yunpan139,yunpan189,yunpanuc,yydf_hzl,leoziyuan,pikpakpan,Q_dongman,yoyokuakeduanju,TG654TG,WFYSFX02,QukanMovie,yeqingjie_GJG666,movielover8888_film3,Baidu_netdisk,D_wusun,FLMdongtianfudi,KaiPanshare,QQZYDAPP,rjyxfx,PikPak_Share_Channel,btzhi,newproductsourcing,cctv1211,duan_ju,QuarkFree,yunpanNB,kkdj001,xxzlzn,pxyunpanxunlei,jxwpzy,kuakedongman,liangxingzhinan,xiangnikanj,solidsexydoll,guoman4K,zdqxm,kduanju,cilidianying,CBduanju,SharePanFilms,dzsgx,BooksRealm,Oscar_4Kmovies,douerpan,baidu_yppan,Q_jilupian,Netdisk_Movies,yunpanquark,ammmziyuan,ciliziyuanku,cili8888,jzmm_123pan
|
||||
# 必须指定启用的插件,多个插件用逗号分隔
|
||||
- ENABLED_PLUGINS=labi,zhizhen,shandian,duoduo,muou,wanou,hunhepan,jikepan,panwiki,pansearch,panta,qupansou,hdr4k,pan666,susu,thepiratebay,xuexizhinan,panyq,ouge,huban,cyg,erxiao,miaoso,fox4k,pianku,clmao,wuji,cldi,xiaozhang,libvio,leijing,xb6v,xys,ddys,hdmoli,yuhuage,u3c3,javdb,clxiong,jutoushe,sdso,xiaoji,xdyh,haisou,bixin,djgou,nyaa,xinjuc,aikanzy,qupanshe,xdpan,discourse,yunsou
|
||||
- ENABLED_PLUGINS=labi,zhizhen,shandian,duoduo,muou,wanou,hunhepan,jikepan,panwiki,pansearch,panta,qupansou,hdr4k,pan666,susu,thepiratebay,xuexizhinan,panyq,ouge,huban,cyg,erxiao,miaoso,fox4k,pianku,clmao,wuji,cldi,xiaozhang,libvio,leijing,xb6v,xys,ddys,hdmoli,yuhuage,u3c3,javdb,clxiong,jutoushe,sdso,xiaoji,xdyh,haisou,bixin,djgou,nyaa,xinjuc,aikanzy,qupanshe,xdpan,discourse,yunsou,qqpd,ahhhhfs,nsgame,gying,quark4k,quarksoo,sousou,ash
|
||||
- CACHE_ENABLED=true
|
||||
- CACHE_PATH=/app/cache
|
||||
- CACHE_MAX_SIZE=100
|
||||
|
||||
@@ -4,10 +4,11 @@ import "time"
|
||||
|
||||
// Link 网盘链接
|
||||
type Link struct {
|
||||
Type string `json:"type" sonic:"type"`
|
||||
URL string `json:"url" sonic:"url"`
|
||||
Password string `json:"password" sonic:"password"`
|
||||
Datetime time.Time `json:"datetime,omitempty" sonic:"datetime,omitempty"` // 链接更新时间(可选)
|
||||
Type string `json:"type" sonic:"type"`
|
||||
URL string `json:"url" sonic:"url"`
|
||||
Password string `json:"password" sonic:"password"`
|
||||
Datetime time.Time `json:"datetime,omitempty" sonic:"datetime,omitempty"` // 链接更新时间(可选)
|
||||
WorkTitle string `json:"work_title,omitempty" sonic:"work_title,omitempty"` // 作品标题(用于区分同一消息中多个作品的链接)
|
||||
}
|
||||
|
||||
// SearchResult 搜索结果
|
||||
@@ -65,4 +66,4 @@ func NewErrorResponse(code int, message string) Response {
|
||||
Code: code,
|
||||
Message: message,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -906,14 +906,59 @@ func extractTitleFromLinkLine(line string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// 判断是否为链接前缀词
|
||||
// 判断是否为链接前缀词(包括网盘名称)
|
||||
func isLinkPrefix(text string) bool {
|
||||
text = strings.ToLower(strings.TrimSpace(text))
|
||||
return text == "链接" ||
|
||||
text == "地址" ||
|
||||
text == "资源地址" ||
|
||||
text == "网盘" ||
|
||||
text == "网盘地址"
|
||||
|
||||
// 标准链接前缀词
|
||||
if text == "链接" ||
|
||||
text == "地址" ||
|
||||
text == "资源地址" ||
|
||||
text == "网盘" ||
|
||||
text == "网盘地址" {
|
||||
return true
|
||||
}
|
||||
|
||||
// 网盘名称(防止误将网盘名称当作标题)
|
||||
cloudDiskNames := []string{
|
||||
// 夸克网盘
|
||||
"夸克", "夸克网盘", "quark", "夸克云盘",
|
||||
|
||||
// 百度网盘
|
||||
"百度", "百度网盘", "baidu", "百度云", "bdwp", "bdpan",
|
||||
|
||||
// 迅雷网盘
|
||||
"迅雷", "迅雷网盘", "xunlei", "迅雷云盘",
|
||||
|
||||
// 115网盘
|
||||
"115", "115网盘", "115云盘",
|
||||
|
||||
// 123网盘
|
||||
"123", "123pan", "123网盘", "123云盘",
|
||||
|
||||
// 阿里云盘
|
||||
"阿里", "阿里云", "阿里云盘", "aliyun", "alipan", "阿里网盘",
|
||||
|
||||
// 天翼云盘
|
||||
"天翼", "天翼云", "天翼云盘", "tianyi", "天翼网盘",
|
||||
|
||||
// UC网盘
|
||||
"uc", "uc网盘", "uc云盘",
|
||||
|
||||
// 移动云盘
|
||||
"移动", "移动云", "移动云盘", "caiyun", "彩云",
|
||||
|
||||
// PikPak
|
||||
"pikpak", "pikpak网盘",
|
||||
}
|
||||
|
||||
for _, name := range cloudDiskNames {
|
||||
if text == name {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// 清理标题文本
|
||||
@@ -1010,18 +1055,24 @@ func mergeResultsByType(results []model.SearchResult, keyword string, cloudTypes
|
||||
}
|
||||
|
||||
for _, link := range result.Links {
|
||||
// 尝试从映射中获取该链接对应的标题
|
||||
// 优先使用链接的WorkTitle字段,如果为空则回退到传统方式
|
||||
title := result.Title // 默认使用消息标题
|
||||
|
||||
// 查找完全匹配的链接
|
||||
if specificTitle, found := linkTitleMap[link.URL]; found && specificTitle != "" {
|
||||
title = specificTitle // 如果找到特定标题,则使用它
|
||||
if link.WorkTitle != "" {
|
||||
// 如果链接有WorkTitle字段,优先使用
|
||||
title = link.WorkTitle
|
||||
} else {
|
||||
// 如果没有找到完全匹配的链接,尝试查找前缀匹配的链接
|
||||
for mappedLink, mappedTitle := range linkTitleMap {
|
||||
if strings.HasPrefix(mappedLink, link.URL) {
|
||||
title = mappedTitle
|
||||
break
|
||||
// 如果没有WorkTitle,使用传统方式从映射中获取该链接对应的标题
|
||||
// 查找完全匹配的链接
|
||||
if specificTitle, found := linkTitleMap[link.URL]; found && specificTitle != "" {
|
||||
title = specificTitle // 如果找到特定标题,则使用它
|
||||
} else {
|
||||
// 如果没有找到完全匹配的链接,尝试查找前缀匹配的链接
|
||||
for mappedLink, mappedTitle := range linkTitleMap {
|
||||
if strings.HasPrefix(mappedLink, link.URL) {
|
||||
title = mappedTitle
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1507,4 +1558,3 @@ func calculateTimeScore(datetime time.Time) float64 {
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -523,6 +523,9 @@ func ParseSearchResults(html string, channel string) ([]model.SearchResult, stri
|
||||
|
||||
// 只有包含链接的消息才添加到结果中
|
||||
if len(links) > 0 {
|
||||
// 为每个链接提取作品标题
|
||||
links = extractWorkTitlesForLinks(links, messageText, title)
|
||||
|
||||
results = append(results, model.SearchResult{
|
||||
MessageID: messageID,
|
||||
UniqueID: uniqueID,
|
||||
@@ -619,7 +622,12 @@ func extractTitle(htmlContent string, textContent string) string {
|
||||
return strings.TrimSpace(firstLine[len("名称:"):])
|
||||
}
|
||||
|
||||
return firstLine
|
||||
// 如果第一行只是标签(以#开头),尝试从第二行提取
|
||||
if strings.HasPrefix(firstLine, "#") && !strings.Contains(firstLine, "名称") {
|
||||
// 继续从文本内容提取
|
||||
} else {
|
||||
return firstLine
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -632,6 +640,31 @@ func extractTitle(htmlContent string, textContent string) string {
|
||||
// 第一行通常是标题
|
||||
firstLine := strings.TrimSpace(lines[0])
|
||||
|
||||
// 如果第一行只是标签(以#开头且不包含实际内容),尝试从第二行或"名称:"字段提取
|
||||
if strings.HasPrefix(firstLine, "#") {
|
||||
// 检查是否有"名称:"字段
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if strings.HasPrefix(line, "名称:") {
|
||||
return strings.TrimSpace(line[len("名称:"):])
|
||||
}
|
||||
}
|
||||
|
||||
// 如果没有"名称:"字段,尝试使用第二行
|
||||
if len(lines) > 1 {
|
||||
secondLine := strings.TrimSpace(lines[1])
|
||||
if strings.HasPrefix(secondLine, "名称:") {
|
||||
return strings.TrimSpace(secondLine[len("名称:"):])
|
||||
}
|
||||
// 如果第二行不是空的且不是标签,使用第二行
|
||||
if secondLine != "" && !strings.HasPrefix(secondLine, "#") {
|
||||
result := secondLine
|
||||
result = CutTitleByKeywords(result, []string{"简介", "描述"})
|
||||
return result
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 如果第一行以"名称:"开头,则提取冒号后面的内容作为标题
|
||||
if strings.HasPrefix(firstLine, "名称:") {
|
||||
return strings.TrimSpace(firstLine[len("名称:"):])
|
||||
@@ -642,4 +675,170 @@ func extractTitle(htmlContent string, textContent string) string {
|
||||
// 统一裁剪:遇到简介/描述等关键字时,只保留前半部分
|
||||
result = CutTitleByKeywords(result, []string{"简介", "描述"})
|
||||
return result
|
||||
}
|
||||
|
||||
// extractWorkTitlesForLinks 为每个链接提取作品标题
|
||||
func extractWorkTitlesForLinks(links []model.Link, messageText string, defaultTitle string) []model.Link {
|
||||
if len(links) == 0 {
|
||||
return links
|
||||
}
|
||||
|
||||
// 如果链接数量 <= 4,认为是同一个作品的不同网盘链接
|
||||
if len(links) <= 4 {
|
||||
for i := range links {
|
||||
links[i].WorkTitle = defaultTitle
|
||||
}
|
||||
return links
|
||||
}
|
||||
|
||||
// 如果链接数量 > 4,尝试为每个链接匹配具体的作品标题
|
||||
lines := strings.Split(messageText, "\n")
|
||||
|
||||
// 检测是否是单行格式:"作品名丨网盘:链接" 或 "作品名 网盘:链接"
|
||||
if isSingleLineFormat(lines) {
|
||||
return extractWorkTitlesFromSingleLineFormat(links, lines, defaultTitle)
|
||||
}
|
||||
|
||||
// 其他格式:尝试通过上下文匹配
|
||||
return extractWorkTitlesFromContext(links, messageText, defaultTitle)
|
||||
}
|
||||
|
||||
// isSingleLineFormat 检测是否是单行格式
|
||||
func isSingleLineFormat(lines []string) bool {
|
||||
singleLineCount := 0
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// 检测是否包含:"作品名丨网盘:链接" 或类似格式
|
||||
if strings.Contains(line, "丨") && strings.Contains(line, ":") && (strings.Contains(line, "http://") || strings.Contains(line, "https://")) {
|
||||
singleLineCount++
|
||||
}
|
||||
}
|
||||
|
||||
// 如果超过一半的行都符合单行格式,则认为是单行格式
|
||||
return singleLineCount > len(lines)/3
|
||||
}
|
||||
|
||||
// extractWorkTitlesFromSingleLineFormat 从单行格式中提取作品标题
|
||||
func extractWorkTitlesFromSingleLineFormat(links []model.Link, lines []string, defaultTitle string) []model.Link {
|
||||
// 为每个链接构建URL到作品标题的映射
|
||||
urlToWorkTitle := make(map[string]string)
|
||||
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// 匹配格式: "作品名丨网盘名:链接" 或 "作品名 网盘名:链接"
|
||||
// 提取作品名和链接
|
||||
var workTitle string
|
||||
var linkURL string
|
||||
|
||||
// 优先匹配 "作品名丨网盘:链接" 格式
|
||||
if strings.Contains(line, "丨") {
|
||||
parts := strings.Split(line, "丨")
|
||||
if len(parts) >= 2 {
|
||||
workTitle = strings.TrimSpace(parts[0])
|
||||
// 从第二部分提取链接
|
||||
restPart := parts[1]
|
||||
if idx := strings.Index(restPart, "http"); idx >= 0 {
|
||||
linkURL = extractFirstURL(restPart[idx:])
|
||||
}
|
||||
}
|
||||
} else if strings.Contains(line, ":") {
|
||||
// 匹配 "作品名 网盘:链接" 格式
|
||||
colonIdx := strings.Index(line, ":")
|
||||
if colonIdx > 0 {
|
||||
beforeColon := line[:colonIdx]
|
||||
afterColon := line[colonIdx+len(":"):]
|
||||
|
||||
// 尝试从冒号前提取作品名(去除网盘名)
|
||||
workTitle = extractWorkTitleBeforeColon(beforeColon)
|
||||
|
||||
// 从冒号后提取链接
|
||||
if idx := strings.Index(afterColon, "http"); idx >= 0 {
|
||||
linkURL = extractFirstURL(afterColon[idx:])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 如果成功提取了作品名和链接,添加到映射
|
||||
if workTitle != "" && linkURL != "" {
|
||||
// 标准化URL用于匹配
|
||||
normalizedURL := normalizeUrl(linkURL)
|
||||
urlToWorkTitle[normalizedURL] = workTitle
|
||||
}
|
||||
}
|
||||
|
||||
// 为每个链接设置作品标题
|
||||
for i := range links {
|
||||
normalizedURL := normalizeUrl(links[i].URL)
|
||||
if workTitle, found := urlToWorkTitle[normalizedURL]; found {
|
||||
links[i].WorkTitle = workTitle
|
||||
} else {
|
||||
links[i].WorkTitle = defaultTitle
|
||||
}
|
||||
}
|
||||
|
||||
return links
|
||||
}
|
||||
|
||||
// extractFirstURL 从文本中提取第一个URL
|
||||
func extractFirstURL(text string) string {
|
||||
// 提取到空格或换行符为止
|
||||
endIdx := len(text)
|
||||
if idx := strings.Index(text, " "); idx > 0 && idx < endIdx {
|
||||
endIdx = idx
|
||||
}
|
||||
if idx := strings.Index(text, "\n"); idx > 0 && idx < endIdx {
|
||||
endIdx = idx
|
||||
}
|
||||
if idx := strings.Index(text, "\r"); idx > 0 && idx < endIdx {
|
||||
endIdx = idx
|
||||
}
|
||||
|
||||
return strings.TrimSpace(text[:endIdx])
|
||||
}
|
||||
|
||||
// extractWorkTitleBeforeColon 从冒号前的文本中提取作品名
|
||||
func extractWorkTitleBeforeColon(text string) string {
|
||||
text = strings.TrimSpace(text)
|
||||
|
||||
// 移除常见的网盘名称
|
||||
netdiskNames := []string{
|
||||
"夸克网盘", "夸克云盘", "夸克",
|
||||
"百度网盘", "百度云盘", "百度云", "百度",
|
||||
"迅雷网盘", "迅雷云盘", "迅雷",
|
||||
"阿里云盘", "阿里网盘", "阿里云", "阿里",
|
||||
"天翼云盘", "天翼网盘", "天翼云", "天翼",
|
||||
"UC网盘", "UC云盘", "UC",
|
||||
"移动云盘", "移动云", "移动",
|
||||
"115网盘", "115云盘", "115",
|
||||
"123网盘", "123云盘", "123",
|
||||
"PikPak网盘", "PikPak",
|
||||
"网盘", "云盘",
|
||||
}
|
||||
|
||||
// 从右向左移除网盘名称
|
||||
for _, name := range netdiskNames {
|
||||
if strings.HasSuffix(text, name) {
|
||||
text = strings.TrimSpace(text[:len(text)-len(name)])
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return text
|
||||
}
|
||||
|
||||
// extractWorkTitlesFromContext 通过上下文为链接提取作品标题
|
||||
func extractWorkTitlesFromContext(links []model.Link, messageText string, defaultTitle string) []model.Link {
|
||||
// 简单实现:如果无法精确匹配,则都使用默认标题
|
||||
for i := range links {
|
||||
links[i].WorkTitle = defaultTitle
|
||||
}
|
||||
return links
|
||||
}
|
||||
@@ -13,24 +13,24 @@ var AllPanLinksPattern = regexp.MustCompile(`(?i)(?:(?:magnet:\?xt=urn:btih:[a-z
|
||||
// 修改百度网盘链接正则表达式,确保只匹配到链接本身,不包含后面的文本
|
||||
var BaiduPanPattern = regexp.MustCompile(`https?://pan\.baidu\.com/s/[a-zA-Z0-9_-]+(?:\?pwd=[a-zA-Z0-9]{4})?`)
|
||||
var QuarkPanPattern = regexp.MustCompile(`https?://pan\.quark\.cn/s/[a-zA-Z0-9]+`)
|
||||
var XunleiPanPattern = regexp.MustCompile(`https?://pan\.xunlei\.com/s/[a-zA-Z0-9]+(?:\?pwd=[a-zA-Z0-9]+)?(?:#)?`)
|
||||
var XunleiPanPattern = regexp.MustCompile(`https?://pan\.xunlei\.com/s/[a-zA-Z0-9]+(?:\?pwd=[a-zA-Z0-9]{4})?(?:#)?`)
|
||||
// 添加天翼云盘链接正则表达式 - 精确匹配,支持URL编码的访问码
|
||||
var TianyiPanPattern = regexp.MustCompile(`https?://cloud\.189\.cn/t/[a-zA-Z0-9]+(?:%[0-9A-Fa-f]{2})*(?:([^)]*))?`)
|
||||
// 添加UC网盘链接正则表达式
|
||||
var UCPanPattern = regexp.MustCompile(`https?://drive\.uc\.cn/s/[a-zA-Z0-9]+(?:\?public=\d)?`)
|
||||
// 添加123网盘链接正则表达式
|
||||
var Pan123Pattern = regexp.MustCompile(`https?://(?:www\.)?123(?:684|685|912|pan|592)\.(?:com|cn)/s/[a-zA-Z0-9_-]+(?:\?(?:%E6%8F%90%E5%8F%96%E7%A0%81|提取码)[::][a-zA-Z0-9]+)?`)
|
||||
var Pan123Pattern = regexp.MustCompile(`https?://(?:www\.)?123(?:684|865|685|912|pan|592)\.(?:com|cn)/s/[a-zA-Z0-9_-]+(?:\?(?:%E6%8F%90%E5%8F%96%E7%A0%81|提取码)[::][a-zA-Z0-9]+)?`)
|
||||
// 添加115网盘链接正则表达式
|
||||
var Pan115Pattern = regexp.MustCompile(`https?://(?:115\.com|115cdn\.com|anxia\.com)/s/[a-zA-Z0-9]+(?:\?password=[a-zA-Z0-9]{4})?(?:#)?`)
|
||||
// 添加阿里云盘链接正则表达式
|
||||
var AliyunPanPattern = regexp.MustCompile(`https?://(?:www\.)?(?:alipan|aliyundrive)\.com/s/[a-zA-Z0-9]+`)
|
||||
|
||||
// 提取码匹配正则表达式 - 增强提取密码的能力
|
||||
var PasswordPattern = regexp.MustCompile(`(?i)(?:(?:提取|访问|提取密|密)码|pwd)[::]\s*([a-zA-Z0-9]{4})`)
|
||||
var UrlPasswordPattern = regexp.MustCompile(`(?i)[?&]pwd=([a-zA-Z0-9]{4})`)
|
||||
var PasswordPattern = regexp.MustCompile(`(?i)(?:(?:提取|访问|提取密|密)码|pwd)[::]\s*([a-zA-Z0-9]{4})(?:[^a-zA-Z0-9]|$)`)
|
||||
var UrlPasswordPattern = regexp.MustCompile(`(?i)[?&]pwd=([a-zA-Z0-9]{4})(?:[^a-zA-Z0-9]|$)`)
|
||||
|
||||
// 百度网盘密码专用正则表达式 - 确保只提取4位密码
|
||||
var BaiduPasswordPattern = regexp.MustCompile(`(?i)(?:链接:.*?提取码:|密码:|提取码:|pwd=|pwd:|pwd:)([a-zA-Z0-9]{4})`)
|
||||
var BaiduPasswordPattern = regexp.MustCompile(`(?i)(?:链接:.*?提取码:|密码:|提取码:|pwd=|pwd:|pwd:)([a-zA-Z0-9]{4})(?:[^a-zA-Z0-9]|$)`)
|
||||
|
||||
// GetLinkType 获取链接类型
|
||||
func GetLinkType(url string) string {
|
||||
@@ -83,7 +83,7 @@ func GetLinkType(url string) string {
|
||||
}
|
||||
|
||||
// 123网盘有多个域名
|
||||
if strings.Contains(url, "123684.com") || strings.Contains(url, "123685.com") ||
|
||||
if strings.Contains(url, "123684.com") || strings.Contains(url, "123685.com") || strings.Contains(url, "123865.com") ||
|
||||
strings.Contains(url, "123912.com") || strings.Contains(url, "123pan.com") ||
|
||||
strings.Contains(url, "123pan.cn") || strings.Contains(url, "123592.com") {
|
||||
return "123"
|
||||
@@ -115,22 +115,20 @@ func CleanBaiduPanURL(url string) string {
|
||||
|
||||
// 如果找到了结束标记,截取到结束标记位置
|
||||
if minEndIdx < len(url) {
|
||||
// 特殊处理pwd参数
|
||||
if strings.Contains(url[:minEndIdx], "?pwd=") {
|
||||
pwdIdx := strings.Index(url, "?pwd=")
|
||||
pwdEndIdx := pwdIdx + 10 // ?pwd=xxxx 总共9个字符,加上问号前的位置
|
||||
if pwdEndIdx < len(url) {
|
||||
return url[:pwdEndIdx]
|
||||
}
|
||||
}
|
||||
return url[:minEndIdx]
|
||||
url = url[:minEndIdx]
|
||||
}
|
||||
|
||||
// 如果没有找到结束标记,但URL包含?pwd=,确保只保留4位密码
|
||||
// 特殊处理pwd参数,确保只保留4位密码
|
||||
if strings.Contains(url, "?pwd=") {
|
||||
pwdIdx := strings.Index(url, "?pwd=")
|
||||
if pwdIdx > 0 && pwdIdx+9 <= len(url) { // ?pwd=xxxx 总共9个字符
|
||||
return url[:pwdIdx+9]
|
||||
if pwdIdx >= 0 && len(url) > pwdIdx+5 { // ?pwd= 有5个字符
|
||||
// 只保留?pwd=后面的4位密码
|
||||
pwdEndIdx := pwdIdx + 9 // ?pwd=xxxx 总共9个字符
|
||||
if pwdEndIdx <= len(url) {
|
||||
return url[:pwdEndIdx]
|
||||
}
|
||||
// 如果剩余字符不足4位,返回所有可用字符
|
||||
return url
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -218,7 +216,7 @@ func CleanUCPanURL(url string) string {
|
||||
// Clean123PanURL 清理123网盘URL,确保链接格式正确
|
||||
func Clean123PanURL(url string) string {
|
||||
// 检查是否为123网盘链接
|
||||
domains := []string{"123684.com", "123685.com", "123912.com", "123pan.com", "123pan.cn", "123592.com"}
|
||||
domains := []string{"123684.com", "123685.com","123865.com", "123912.com", "123pan.com", "123pan.cn", "123592.com"}
|
||||
isDomain123 := false
|
||||
|
||||
for _, domain := range domains {
|
||||
@@ -416,13 +414,18 @@ func ExtractPassword(content, url string) string {
|
||||
}
|
||||
}
|
||||
|
||||
// 特殊处理迅雷网盘URL中的pwd参数
|
||||
if strings.Contains(url, "pan.xunlei.com") && strings.Contains(url, "?pwd=") {
|
||||
pwdPattern := regexp.MustCompile(`\?pwd=([a-zA-Z0-9]{4})`)
|
||||
pwdMatches := pwdPattern.FindStringSubmatch(url)
|
||||
if len(pwdMatches) > 1 {
|
||||
return pwdMatches[1]
|
||||
}
|
||||
}
|
||||
|
||||
// 先从URL中提取密码
|
||||
matches := UrlPasswordPattern.FindStringSubmatch(url)
|
||||
if len(matches) > 1 {
|
||||
// 确保百度网盘密码只有4位
|
||||
if strings.Contains(strings.ToLower(url), "pan.baidu.com") && len(matches[1]) > 4 {
|
||||
return matches[1][:4]
|
||||
}
|
||||
return matches[1]
|
||||
}
|
||||
|
||||
@@ -443,6 +446,7 @@ func ExtractPassword(content, url string) string {
|
||||
// 特殊处理123网盘URL中的提取码
|
||||
if (strings.Contains(url, "123684.com") ||
|
||||
strings.Contains(url, "123685.com") ||
|
||||
strings.Contains(url, "123865.com") ||
|
||||
strings.Contains(url, "123912.com") ||
|
||||
strings.Contains(url, "123pan.com") ||
|
||||
strings.Contains(url, "123pan.cn") ||
|
||||
@@ -460,6 +464,7 @@ func ExtractPassword(content, url string) string {
|
||||
// 检查123网盘URL中的提取码参数
|
||||
if (strings.Contains(url, "123684.com") ||
|
||||
strings.Contains(url, "123685.com") ||
|
||||
strings.Contains(url, "123865.com") ||
|
||||
strings.Contains(url, "123912.com") ||
|
||||
strings.Contains(url, "123pan.com") ||
|
||||
strings.Contains(url, "123pan.cn") ||
|
||||
@@ -766,6 +771,7 @@ func ExtractNetDiskLinks(text string) []string {
|
||||
strings.Contains(cleanURL, "drive.uc.cn") ||
|
||||
strings.Contains(cleanURL, "123684.com") ||
|
||||
strings.Contains(cleanURL, "123685.com") ||
|
||||
strings.Contains(cleanURL, "123865.com") ||
|
||||
strings.Contains(cleanURL, "123912.com") ||
|
||||
strings.Contains(cleanURL, "123pan.com") ||
|
||||
strings.Contains(cleanURL, "123pan.cn") ||
|
||||
|
||||
Reference in New Issue
Block a user