From e565502e832cc4953b78ca5a4fd220e15378fe9d Mon Sep 17 00:00:00 2001 From: "www.xueximeng.com" Date: Sat, 8 Nov 2025 15:04:40 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96tg=E6=90=9C=E7=B4=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 4 +- README.md | 51 +++++++--- config/config.go | 2 +- docker-compose.yml | 4 +- model/response.go | 11 ++- service/search_service.go | 82 +++++++++++++--- util/parser_util.go | 201 +++++++++++++++++++++++++++++++++++++- util/regex_util.go | 52 +++++----- 8 files changed, 342 insertions(+), 65 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7841335..99f4131 100644 --- a/Dockerfile +++ b/Dockerfile @@ -61,8 +61,8 @@ ENV CACHE_PATH=/app/cache \ ASYNC_MAX_BACKGROUND_WORKERS=20 \ ASYNC_MAX_BACKGROUND_TASKS=100 \ ASYNC_CACHE_TTL_HOURS=1 \ - CHANNELS=tgsearchers3,Aliyun_4K_Movies,bdbdndn11,yunpanx,bsbdbfjfjff,yp123pan,sbsbsnsqq,yunpanxunlei,tianyifc,BaiduCloudDisk,txtyzy,peccxinpd,gotopan,PanjClub,kkxlzy,baicaoZY,MCPH01,bdwpzhpd,ysxb48,jdjdn1111,yggpan,MCPH086,zaihuayun,Q66Share,ucwpzy,shareAliyun,alyp_1,dianyingshare,Quark_Movies,XiangxiuNBB,ydypzyfx,ucquark,xx123pan,yingshifenxiang123,zyfb123,tyypzhpd,tianyirigeng,cloudtianyi,hdhhd21,Lsp115,oneonefivewpfx,qixingzhenren,taoxgzy,Channel_Shares_115,tyysypzypd,vip115hot,wp123zy,yunpan139,yunpan189,yunpanuc,yydf_hzl,leoziyuan,pikpakpan,Q_dongman,yoyokuakeduanju,TG654TG,WFYSFX02,QukanMovie,yeqingjie_GJG666,movielover8888_film3,Baidu_netdisk,D_wusun,FLMdongtianfudi,KaiPanshare,QQZYDAPP,rjyxfx,PikPak_Share_Channel,btzhi,newproductsourcing,cctv1211,duan_ju,QuarkFree,yunpanNB,kkdj001,xxzlzn,pxyunpanxunlei,jxwpzy,kuakedongman,liangxingzhinan,xiangnikanj,solidsexydoll,guoman4K,zdqxm,kduanju,cilidianying,CBduanju,SharePanFilms,dzsgx,BooksRealm,Oscar_4Kmovies \ - ENABLED_PLUGINS=labi,zhizhen,shandian,duoduo,muou,wanou,hunhepan,jikepan,panwiki,pansearch,panta,qupansou,hdr4k,pan666,susu,thepiratebay,xuexizhinan,panyq,ouge,huban,cyg,erxiao,miaoso,fox4k,pianku,clmao,wuji,cldi,xiaozhang,libvio,leijing,xb6v,xys,ddys,hdmoli,yuhuage,u3c3,javdb,clxiong,jutoushe,sdso,xiaoji,xdyh,haisou,bixin,djgou,nyaa,xinjuc,aikanzy,qupanshe,xdpan,discourse,yunsou \ + CHANNELS=tgsearchers4,Aliyun_4K_Movies,bdbdndn11,yunpanx,bsbdbfjfjff,yp123pan,sbsbsnsqq,yunpanxunlei,tianyifc,BaiduCloudDisk,txtyzy,peccxinpd,gotopan,PanjClub,kkxlzy,baicaoZY,MCPH01,MCPH02,MCPH03,bdwpzhpd,ysxb48,jdjdn1111,yggpan,MCPH086,zaihuayun,Q66Share,ucwpzy,shareAliyun,alyp_1,dianyingshare,Quark_Movies,XiangxiuNBB,ydypzyfx,ucquark,xx123pan,yingshifenxiang123,zyfb123,tyypzhpd,tianyirigeng,cloudtianyi,hdhhd21,Lsp115,oneonefivewpfx,qixingzhenren,taoxgzy,Channel_Shares_115,tyysypzypd,vip115hot,wp123zy,yunpan139,yunpan189,yunpanuc,yydf_hzl,leoziyuan,pikpakpan,Q_dongman,yoyokuakeduanju,TG654TG,WFYSFX02,QukanMovie,yeqingjie_GJG666,movielover8888_film3,Baidu_netdisk,D_wusun,FLMdongtianfudi,KaiPanshare,QQZYDAPP,rjyxfx,PikPak_Share_Channel,btzhi,newproductsourcing,cctv1211,duan_ju,QuarkFree,yunpanNB,kkdj001,xxzlzn,pxyunpanxunlei,jxwpzy,kuakedongman,liangxingzhinan,xiangnikanj,solidsexydoll,guoman4K,zdqxm,kduanju,cilidianying,CBduanju,SharePanFilms,dzsgx,BooksRealm,Oscar_4Kmovies,douerpan,baidu_yppan,Q_jilupian,Netdisk_Movies,yunpanquark,ammmziyuan,ciliziyuanku,cili8888,jzmm_123pan \ + ENABLED_PLUGINS=labi,zhizhen,shandian,duoduo,muou,wanou,hunhepan,jikepan,panwiki,pansearch,panta,qupansou,hdr4k,pan666,susu,thepiratebay,xuexizhinan,panyq,ouge,huban,cyg,erxiao,miaoso,fox4k,pianku,clmao,wuji,cldi,xiaozhang,libvio,leijing,xb6v,xys,ddys,hdmoli,yuhuage,u3c3,javdb,clxiong,jutoushe,sdso,xiaoji,xdyh,haisou,bixin,djgou,nyaa,xinjuc,aikanzy,qupanshe,xdpan,discourse,yunsou,qqpd,ahhhhfs,nsgame,gying,quark4k,quarksoo,sousou,ash \ AUTH_ENABLED=false \ AUTH_TOKEN_EXPIRY=24 diff --git a/README.md b/README.md index 4b3266a..6b3e1cf 100644 --- a/README.md +++ b/README.md @@ -27,21 +27,14 @@ PanSou 还提供了一个基于 [Model Context Protocol (MCP)](https://modelcont 本项目,并点上 Star !!! ### 使用Docker部署 +[qqpd搜索插件文档](plugin/qqpd/README.md) +[gying搜索插件文档](plugin/gying/README.md) [常见问题总结](https://github.com/fish2018/pansou/issues/46) - [TG资源频道列表](https://github.com/fish2018/pansou/issues/4) - -[gying插件文档](https://github.com/fish2018/pansou/blob/main/plugin/gying/README.md) -
插件列表(请务必按需加载)
-export ENABLED_PLUGINS=hunhepan,jikepan,panwiki,pansearch,panta,qupansou,
-susu,thepiratebay,wanou,xuexizhinan,panyq,zhizhen,labi,muou,ouge,shandian,
-duoduo,huban,cyg,erxiao,miaoso,fox4k,pianku,clmao,wuji,cldi,xiaozhang,
-libvio,leijing,xb6v,xys,ddys,hdmoli,yuhuage,u3c3,javdb,clxiong,jutoushe,
-sdso,xiaoji,xdyh,haisou,bixin,djgou,nyaa,xinjuc,aikanzy,qupanshe,xdpan,
-discourse,yunsou,ahhhhfs,nsgame,gying,quark4k,quarksoo,sousou,ash
+export ENABLED_PLUGINS=hunhepan,jikepan,panwiki,pansearch,panta,qupansou,hdr4k,pan666,susu,thepiratebay,wanou,xuexizhinan,panyq,zhizhen,labi,muou,ouge,shandian,duoduo,huban,cyg,erxiao,miaoso,fox4k,pianku,clmao,wuji,cldi,xiaozhang,libvio,leijing,xb6v,xys,ddys,hdmoli,yuhuage,u3c3,javdb,clxiong,jutoushe,sdso,xiaoji,xdyh,haisou,bixin,djgou,nyaa,xinjuc,aikanzy,qupanshe,xdpan,discourse,yunsou,qqpd,gying,quark4k,quarksoo,sousou
 
@@ -52,7 +45,7 @@ discourse,yunsou,ahhhhfs,nsgame,gying,quark4k,quarksoo,sousou,ash 一键启动,开箱即用 ``` -docker run -d --name pansou -p 80:80 -e ENABLED_PLUGINS="labi,zhizhen,shandian,duoduo,muou,wanou" ghcr.io/fish2018/pansou-web +docker run -d --name pansou -p 80:80 ghcr.io/fish2018/pansou-web ``` ##### 使用Docker Compose(推荐) @@ -72,7 +65,7 @@ docker-compose logs -f ##### 直接使用Docker命令 ```bash -docker run -d --name pansou -p 8888:8888 -v pansou-cache:/app/cache -e CHANNELS="tgsearchers3,xxx" -e ENABLED_PLUGINS="labi,zhizhen,shandian,duoduo,muou,wanou" ghcr.io/fish2018/pansou:latest +docker run -d --name pansou -p 8888:8888 ghcr.io/fish2018/pansou:latest ``` ##### 使用Docker Compose(推荐) @@ -213,7 +206,7 @@ CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags="-s -w -extldflags '-sta ``` [program:pansou] -environment=PORT=8888,CHANNELS="tgsearchers3,Aliyun_4K_Movies,bdbdndn11,yunpanx,bsbdbfjfjff,yp123pan,sbsbsnsqq,yunpanxunlei,tianyifc,BaiduCloudDisk,txtyzy,peccxinpd,gotopan,PanjClub,kkxlzy,baicaoZY,MCPH01,bdwpzhpd,ysxb48,jdjdn1111,yggpan,MCPH086,zaihuayun,Q66Share,Oscar_4Kmovies,ucwpzy,shareAliyun,alyp_1,dianyingshare,Quark_Movies,XiangxiuNBB,ydypzyfx,ucquark,xx123pan,yingshifenxiang123,zyfb123,tyypzhpd,tianyirigeng,cloudtianyi,hdhhd21,Lsp115,oneonefivewpfx,qixingzhenren,taoxgzy,Channel_Shares_115,tyysypzypd,vip115hot,wp123zy,yunpan139,yunpan189,yunpanuc,yydf_hzl,leoziyuan,pikpakpan,Q_dongman,yoyokuakeduanju",ENABLED_PLUGINS="labi,zhizhen,shandian,duoduo,muou" +environment=PORT=8888,CHANNELS="tgsearchers4,Aliyun_4K_Movies,bdbdndn11,yunpanx,bsbdbfjfjff,yp123pan,sbsbsnsqq,yunpanxunlei,tianyifc,BaiduCloudDisk,txtyzy,peccxinpd,gotopan,PanjClub,kkxlzy,baicaoZY,MCPH01,bdwpzhpd,ysxb48,jdjdn1111,yggpan,MCPH086,zaihuayun,Q66Share,Oscar_4Kmovies,ucwpzy,shareAliyun,alyp_1,dianyingshare,Quark_Movies,XiangxiuNBB,ydypzyfx,ucquark,xx123pan,yingshifenxiang123,zyfb123,tyypzhpd,tianyirigeng,cloudtianyi,hdhhd21,Lsp115,oneonefivewpfx,qixingzhenren,taoxgzy,Channel_Shares_115,tyysypzypd,vip115hot,wp123zy,yunpan139,yunpan189,yunpanuc,yydf_hzl,leoziyuan,pikpakpan,Q_dongman,yoyokuakeduanju",ENABLED_PLUGINS="labi,zhizhen,shandian,duoduo,muou" command=/home/work/pansou/pansou directory=/home/work/pansou autostart=true @@ -485,7 +478,9 @@ curl "http://localhost:8888/api/search?kw=速度与激情&res=merge" \ { "type": "baidu", "url": "https://pan.baidu.com/s/1abcdef", - "password": "1234" + "password": "1234", + "datetime": "2023-06-10T14:23:45Z", + "work_title": "速度与激情全集1-10" } ], "tags": ["电影", "合集"], @@ -529,11 +524,37 @@ curl "http://localhost:8888/api/search?kw=速度与激情&res=merge" \ **字段说明**: +**SearchResult对象**: +- `message_id`: 消息ID +- `unique_id`: 全局唯一标识符 +- `channel`: 来源频道名称 +- `datetime`: 消息发布时间 +- `title`: 消息标题 +- `content`: 消息内容 +- `links`: 网盘链接数组 +- `tags`: 标签数组(可选) +- `images`: TG消息中的图片链接数组(可选) + +**Link对象**: +- `type`: 网盘类型(baidu、quark、aliyun等) +- `url`: 网盘链接地址 +- `password`: 提取码/密码 +- `datetime`: 链接更新时间(可选) +- `work_title`: 作品标题(可选) + - 用于区分同一消息中多个作品的链接 + - 当一条消息包含≤4个链接时,所有链接使用相同的work_title + - 当一条消息包含>4个链接时,系统会智能识别每个链接对应的作品标题 + +**MergedLink对象**: +- `url`: 网盘链接地址 +- `password`: 提取码/密码 +- `note`: 资源说明/标题 +- `datetime`: 链接更新时间 - `source`: 数据来源标识 - `tg:频道名称`: 来自Telegram频道 - `plugin:插件名`: 来自指定插件 - `unknown`: 未知来源 -- `images`: TG消息中的图片链接数组(可选字段) +- `images`: TG消息中的图片链接数组(可选) - 仅在来源为Telegram频道且消息包含图片时出现 diff --git a/config/config.go b/config/config.go index d5b529a..9a5cb33 100644 --- a/config/config.go +++ b/config/config.go @@ -112,7 +112,7 @@ func Init() { func getDefaultChannels() []string { channelsEnv := os.Getenv("CHANNELS") if channelsEnv == "" { - return []string{"tgsearchers3"} + return []string{"tgsearchers4"} } return strings.Split(channelsEnv, ",") } diff --git a/docker-compose.yml b/docker-compose.yml index 9d18e71..27e5037 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,9 +9,9 @@ services: - "8888:8888" environment: - PORT=8888 - - CHANNELS=tgsearchers3,Aliyun_4K_Movies,bdbdndn11,yunpanx,bsbdbfjfjff,yp123pan,sbsbsnsqq,yunpanxunlei,tianyifc,BaiduCloudDisk,txtyzy,peccxinpd,gotopan,PanjClub,kkxlzy,baicaoZY,MCPH01,bdwpzhpd,ysxb48,jdjdn1111,yggpan,MCPH086,zaihuayun,Q66Share,ucwpzy,shareAliyun,alyp_1,dianyingshare,Quark_Movies,XiangxiuNBB,ydypzyfx,ucquark,xx123pan,yingshifenxiang123,zyfb123,tyypzhpd,tianyirigeng,cloudtianyi,hdhhd21,Lsp115,oneonefivewpfx,qixingzhenren,taoxgzy,Channel_Shares_115,tyysypzypd,vip115hot,wp123zy,yunpan139,yunpan189,yunpanuc,yydf_hzl,leoziyuan,pikpakpan,Q_dongman,yoyokuakeduanju,TG654TG,WFYSFX02,QukanMovie,yeqingjie_GJG666,movielover8888_film3,Baidu_netdisk,D_wusun,FLMdongtianfudi,KaiPanshare,QQZYDAPP,rjyxfx,PikPak_Share_Channel,btzhi,newproductsourcing,cctv1211,duan_ju,QuarkFree,yunpanNB,kkdj001,xxzlzn,pxyunpanxunlei,jxwpzy,kuakedongman,liangxingzhinan,xiangnikanj,solidsexydoll,guoman4K,zdqxm,kduanju,cilidianying,CBduanju,SharePanFilms,dzsgx,BooksRealm + - CHANNELS=tgsearchers4,Aliyun_4K_Movies,bdbdndn11,yunpanx,bsbdbfjfjff,yp123pan,sbsbsnsqq,yunpanxunlei,tianyifc,BaiduCloudDisk,txtyzy,peccxinpd,gotopan,PanjClub,kkxlzy,baicaoZY,MCPH01,MCPH02,MCPH03,bdwpzhpd,ysxb48,jdjdn1111,yggpan,MCPH086,zaihuayun,Q66Share,ucwpzy,shareAliyun,alyp_1,dianyingshare,Quark_Movies,XiangxiuNBB,ydypzyfx,ucquark,xx123pan,yingshifenxiang123,zyfb123,tyypzhpd,tianyirigeng,cloudtianyi,hdhhd21,Lsp115,oneonefivewpfx,qixingzhenren,taoxgzy,Channel_Shares_115,tyysypzypd,vip115hot,wp123zy,yunpan139,yunpan189,yunpanuc,yydf_hzl,leoziyuan,pikpakpan,Q_dongman,yoyokuakeduanju,TG654TG,WFYSFX02,QukanMovie,yeqingjie_GJG666,movielover8888_film3,Baidu_netdisk,D_wusun,FLMdongtianfudi,KaiPanshare,QQZYDAPP,rjyxfx,PikPak_Share_Channel,btzhi,newproductsourcing,cctv1211,duan_ju,QuarkFree,yunpanNB,kkdj001,xxzlzn,pxyunpanxunlei,jxwpzy,kuakedongman,liangxingzhinan,xiangnikanj,solidsexydoll,guoman4K,zdqxm,kduanju,cilidianying,CBduanju,SharePanFilms,dzsgx,BooksRealm,Oscar_4Kmovies,douerpan,baidu_yppan,Q_jilupian,Netdisk_Movies,yunpanquark,ammmziyuan,ciliziyuanku,cili8888,jzmm_123pan # 必须指定启用的插件,多个插件用逗号分隔 - - ENABLED_PLUGINS=labi,zhizhen,shandian,duoduo,muou,wanou,hunhepan,jikepan,panwiki,pansearch,panta,qupansou,hdr4k,pan666,susu,thepiratebay,xuexizhinan,panyq,ouge,huban,cyg,erxiao,miaoso,fox4k,pianku,clmao,wuji,cldi,xiaozhang,libvio,leijing,xb6v,xys,ddys,hdmoli,yuhuage,u3c3,javdb,clxiong,jutoushe,sdso,xiaoji,xdyh,haisou,bixin,djgou,nyaa,xinjuc,aikanzy,qupanshe,xdpan,discourse,yunsou + - ENABLED_PLUGINS=labi,zhizhen,shandian,duoduo,muou,wanou,hunhepan,jikepan,panwiki,pansearch,panta,qupansou,hdr4k,pan666,susu,thepiratebay,xuexizhinan,panyq,ouge,huban,cyg,erxiao,miaoso,fox4k,pianku,clmao,wuji,cldi,xiaozhang,libvio,leijing,xb6v,xys,ddys,hdmoli,yuhuage,u3c3,javdb,clxiong,jutoushe,sdso,xiaoji,xdyh,haisou,bixin,djgou,nyaa,xinjuc,aikanzy,qupanshe,xdpan,discourse,yunsou,qqpd,ahhhhfs,nsgame,gying,quark4k,quarksoo,sousou,ash - CACHE_ENABLED=true - CACHE_PATH=/app/cache - CACHE_MAX_SIZE=100 diff --git a/model/response.go b/model/response.go index 6b56693..3f7c2c5 100644 --- a/model/response.go +++ b/model/response.go @@ -4,10 +4,11 @@ import "time" // Link 网盘链接 type Link struct { - Type string `json:"type" sonic:"type"` - URL string `json:"url" sonic:"url"` - Password string `json:"password" sonic:"password"` - Datetime time.Time `json:"datetime,omitempty" sonic:"datetime,omitempty"` // 链接更新时间(可选) + Type string `json:"type" sonic:"type"` + URL string `json:"url" sonic:"url"` + Password string `json:"password" sonic:"password"` + Datetime time.Time `json:"datetime,omitempty" sonic:"datetime,omitempty"` // 链接更新时间(可选) + WorkTitle string `json:"work_title,omitempty" sonic:"work_title,omitempty"` // 作品标题(用于区分同一消息中多个作品的链接) } // SearchResult 搜索结果 @@ -65,4 +66,4 @@ func NewErrorResponse(code int, message string) Response { Code: code, Message: message, } -} +} \ No newline at end of file diff --git a/service/search_service.go b/service/search_service.go index 467c106..e81e31d 100644 --- a/service/search_service.go +++ b/service/search_service.go @@ -906,14 +906,59 @@ func extractTitleFromLinkLine(line string) string { return "" } -// 判断是否为链接前缀词 +// 判断是否为链接前缀词(包括网盘名称) func isLinkPrefix(text string) bool { text = strings.ToLower(strings.TrimSpace(text)) - return text == "链接" || - text == "地址" || - text == "资源地址" || - text == "网盘" || - text == "网盘地址" + + // 标准链接前缀词 + if text == "链接" || + text == "地址" || + text == "资源地址" || + text == "网盘" || + text == "网盘地址" { + return true + } + + // 网盘名称(防止误将网盘名称当作标题) + cloudDiskNames := []string{ + // 夸克网盘 + "夸克", "夸克网盘", "quark", "夸克云盘", + + // 百度网盘 + "百度", "百度网盘", "baidu", "百度云", "bdwp", "bdpan", + + // 迅雷网盘 + "迅雷", "迅雷网盘", "xunlei", "迅雷云盘", + + // 115网盘 + "115", "115网盘", "115云盘", + + // 123网盘 + "123", "123pan", "123网盘", "123云盘", + + // 阿里云盘 + "阿里", "阿里云", "阿里云盘", "aliyun", "alipan", "阿里网盘", + + // 天翼云盘 + "天翼", "天翼云", "天翼云盘", "tianyi", "天翼网盘", + + // UC网盘 + "uc", "uc网盘", "uc云盘", + + // 移动云盘 + "移动", "移动云", "移动云盘", "caiyun", "彩云", + + // PikPak + "pikpak", "pikpak网盘", + } + + for _, name := range cloudDiskNames { + if text == name { + return true + } + } + + return false } // 清理标题文本 @@ -1010,18 +1055,24 @@ func mergeResultsByType(results []model.SearchResult, keyword string, cloudTypes } for _, link := range result.Links { - // 尝试从映射中获取该链接对应的标题 + // 优先使用链接的WorkTitle字段,如果为空则回退到传统方式 title := result.Title // 默认使用消息标题 - // 查找完全匹配的链接 - if specificTitle, found := linkTitleMap[link.URL]; found && specificTitle != "" { - title = specificTitle // 如果找到特定标题,则使用它 + if link.WorkTitle != "" { + // 如果链接有WorkTitle字段,优先使用 + title = link.WorkTitle } else { - // 如果没有找到完全匹配的链接,尝试查找前缀匹配的链接 - for mappedLink, mappedTitle := range linkTitleMap { - if strings.HasPrefix(mappedLink, link.URL) { - title = mappedTitle - break + // 如果没有WorkTitle,使用传统方式从映射中获取该链接对应的标题 + // 查找完全匹配的链接 + if specificTitle, found := linkTitleMap[link.URL]; found && specificTitle != "" { + title = specificTitle // 如果找到特定标题,则使用它 + } else { + // 如果没有找到完全匹配的链接,尝试查找前缀匹配的链接 + for mappedLink, mappedTitle := range linkTitleMap { + if strings.HasPrefix(mappedLink, link.URL) { + title = mappedTitle + break + } } } } @@ -1507,4 +1558,3 @@ func calculateTimeScore(datetime time.Time) float64 { } - diff --git a/util/parser_util.go b/util/parser_util.go index a43e7ac..3b64569 100644 --- a/util/parser_util.go +++ b/util/parser_util.go @@ -523,6 +523,9 @@ func ParseSearchResults(html string, channel string) ([]model.SearchResult, stri // 只有包含链接的消息才添加到结果中 if len(links) > 0 { + // 为每个链接提取作品标题 + links = extractWorkTitlesForLinks(links, messageText, title) + results = append(results, model.SearchResult{ MessageID: messageID, UniqueID: uniqueID, @@ -619,7 +622,12 @@ func extractTitle(htmlContent string, textContent string) string { return strings.TrimSpace(firstLine[len("名称:"):]) } - return firstLine + // 如果第一行只是标签(以#开头),尝试从第二行提取 + if strings.HasPrefix(firstLine, "#") && !strings.Contains(firstLine, "名称") { + // 继续从文本内容提取 + } else { + return firstLine + } } } @@ -632,6 +640,31 @@ func extractTitle(htmlContent string, textContent string) string { // 第一行通常是标题 firstLine := strings.TrimSpace(lines[0]) + // 如果第一行只是标签(以#开头且不包含实际内容),尝试从第二行或"名称:"字段提取 + if strings.HasPrefix(firstLine, "#") { + // 检查是否有"名称:"字段 + for _, line := range lines { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "名称:") { + return strings.TrimSpace(line[len("名称:"):]) + } + } + + // 如果没有"名称:"字段,尝试使用第二行 + if len(lines) > 1 { + secondLine := strings.TrimSpace(lines[1]) + if strings.HasPrefix(secondLine, "名称:") { + return strings.TrimSpace(secondLine[len("名称:"):]) + } + // 如果第二行不是空的且不是标签,使用第二行 + if secondLine != "" && !strings.HasPrefix(secondLine, "#") { + result := secondLine + result = CutTitleByKeywords(result, []string{"简介", "描述"}) + return result + } + } + } + // 如果第一行以"名称:"开头,则提取冒号后面的内容作为标题 if strings.HasPrefix(firstLine, "名称:") { return strings.TrimSpace(firstLine[len("名称:"):]) @@ -642,4 +675,170 @@ func extractTitle(htmlContent string, textContent string) string { // 统一裁剪:遇到简介/描述等关键字时,只保留前半部分 result = CutTitleByKeywords(result, []string{"简介", "描述"}) return result +} + +// extractWorkTitlesForLinks 为每个链接提取作品标题 +func extractWorkTitlesForLinks(links []model.Link, messageText string, defaultTitle string) []model.Link { + if len(links) == 0 { + return links + } + + // 如果链接数量 <= 4,认为是同一个作品的不同网盘链接 + if len(links) <= 4 { + for i := range links { + links[i].WorkTitle = defaultTitle + } + return links + } + + // 如果链接数量 > 4,尝试为每个链接匹配具体的作品标题 + lines := strings.Split(messageText, "\n") + + // 检测是否是单行格式:"作品名丨网盘:链接" 或 "作品名 网盘:链接" + if isSingleLineFormat(lines) { + return extractWorkTitlesFromSingleLineFormat(links, lines, defaultTitle) + } + + // 其他格式:尝试通过上下文匹配 + return extractWorkTitlesFromContext(links, messageText, defaultTitle) +} + +// isSingleLineFormat 检测是否是单行格式 +func isSingleLineFormat(lines []string) bool { + singleLineCount := 0 + for _, line := range lines { + line = strings.TrimSpace(line) + if line == "" { + continue + } + + // 检测是否包含:"作品名丨网盘:链接" 或类似格式 + if strings.Contains(line, "丨") && strings.Contains(line, ":") && (strings.Contains(line, "http://") || strings.Contains(line, "https://")) { + singleLineCount++ + } + } + + // 如果超过一半的行都符合单行格式,则认为是单行格式 + return singleLineCount > len(lines)/3 +} + +// extractWorkTitlesFromSingleLineFormat 从单行格式中提取作品标题 +func extractWorkTitlesFromSingleLineFormat(links []model.Link, lines []string, defaultTitle string) []model.Link { + // 为每个链接构建URL到作品标题的映射 + urlToWorkTitle := make(map[string]string) + + for _, line := range lines { + line = strings.TrimSpace(line) + if line == "" { + continue + } + + // 匹配格式: "作品名丨网盘名:链接" 或 "作品名 网盘名:链接" + // 提取作品名和链接 + var workTitle string + var linkURL string + + // 优先匹配 "作品名丨网盘:链接" 格式 + if strings.Contains(line, "丨") { + parts := strings.Split(line, "丨") + if len(parts) >= 2 { + workTitle = strings.TrimSpace(parts[0]) + // 从第二部分提取链接 + restPart := parts[1] + if idx := strings.Index(restPart, "http"); idx >= 0 { + linkURL = extractFirstURL(restPart[idx:]) + } + } + } else if strings.Contains(line, ":") { + // 匹配 "作品名 网盘:链接" 格式 + colonIdx := strings.Index(line, ":") + if colonIdx > 0 { + beforeColon := line[:colonIdx] + afterColon := line[colonIdx+len(":"):] + + // 尝试从冒号前提取作品名(去除网盘名) + workTitle = extractWorkTitleBeforeColon(beforeColon) + + // 从冒号后提取链接 + if idx := strings.Index(afterColon, "http"); idx >= 0 { + linkURL = extractFirstURL(afterColon[idx:]) + } + } + } + + // 如果成功提取了作品名和链接,添加到映射 + if workTitle != "" && linkURL != "" { + // 标准化URL用于匹配 + normalizedURL := normalizeUrl(linkURL) + urlToWorkTitle[normalizedURL] = workTitle + } + } + + // 为每个链接设置作品标题 + for i := range links { + normalizedURL := normalizeUrl(links[i].URL) + if workTitle, found := urlToWorkTitle[normalizedURL]; found { + links[i].WorkTitle = workTitle + } else { + links[i].WorkTitle = defaultTitle + } + } + + return links +} + +// extractFirstURL 从文本中提取第一个URL +func extractFirstURL(text string) string { + // 提取到空格或换行符为止 + endIdx := len(text) + if idx := strings.Index(text, " "); idx > 0 && idx < endIdx { + endIdx = idx + } + if idx := strings.Index(text, "\n"); idx > 0 && idx < endIdx { + endIdx = idx + } + if idx := strings.Index(text, "\r"); idx > 0 && idx < endIdx { + endIdx = idx + } + + return strings.TrimSpace(text[:endIdx]) +} + +// extractWorkTitleBeforeColon 从冒号前的文本中提取作品名 +func extractWorkTitleBeforeColon(text string) string { + text = strings.TrimSpace(text) + + // 移除常见的网盘名称 + netdiskNames := []string{ + "夸克网盘", "夸克云盘", "夸克", + "百度网盘", "百度云盘", "百度云", "百度", + "迅雷网盘", "迅雷云盘", "迅雷", + "阿里云盘", "阿里网盘", "阿里云", "阿里", + "天翼云盘", "天翼网盘", "天翼云", "天翼", + "UC网盘", "UC云盘", "UC", + "移动云盘", "移动云", "移动", + "115网盘", "115云盘", "115", + "123网盘", "123云盘", "123", + "PikPak网盘", "PikPak", + "网盘", "云盘", + } + + // 从右向左移除网盘名称 + for _, name := range netdiskNames { + if strings.HasSuffix(text, name) { + text = strings.TrimSpace(text[:len(text)-len(name)]) + break + } + } + + return text +} + +// extractWorkTitlesFromContext 通过上下文为链接提取作品标题 +func extractWorkTitlesFromContext(links []model.Link, messageText string, defaultTitle string) []model.Link { + // 简单实现:如果无法精确匹配,则都使用默认标题 + for i := range links { + links[i].WorkTitle = defaultTitle + } + return links } \ No newline at end of file diff --git a/util/regex_util.go b/util/regex_util.go index 36b975e..b41f18a 100644 --- a/util/regex_util.go +++ b/util/regex_util.go @@ -13,24 +13,24 @@ var AllPanLinksPattern = regexp.MustCompile(`(?i)(?:(?:magnet:\?xt=urn:btih:[a-z // 修改百度网盘链接正则表达式,确保只匹配到链接本身,不包含后面的文本 var BaiduPanPattern = regexp.MustCompile(`https?://pan\.baidu\.com/s/[a-zA-Z0-9_-]+(?:\?pwd=[a-zA-Z0-9]{4})?`) var QuarkPanPattern = regexp.MustCompile(`https?://pan\.quark\.cn/s/[a-zA-Z0-9]+`) -var XunleiPanPattern = regexp.MustCompile(`https?://pan\.xunlei\.com/s/[a-zA-Z0-9]+(?:\?pwd=[a-zA-Z0-9]+)?(?:#)?`) +var XunleiPanPattern = regexp.MustCompile(`https?://pan\.xunlei\.com/s/[a-zA-Z0-9]+(?:\?pwd=[a-zA-Z0-9]{4})?(?:#)?`) // 添加天翼云盘链接正则表达式 - 精确匹配,支持URL编码的访问码 var TianyiPanPattern = regexp.MustCompile(`https?://cloud\.189\.cn/t/[a-zA-Z0-9]+(?:%[0-9A-Fa-f]{2})*(?:([^)]*))?`) // 添加UC网盘链接正则表达式 var UCPanPattern = regexp.MustCompile(`https?://drive\.uc\.cn/s/[a-zA-Z0-9]+(?:\?public=\d)?`) // 添加123网盘链接正则表达式 -var Pan123Pattern = regexp.MustCompile(`https?://(?:www\.)?123(?:684|685|912|pan|592)\.(?:com|cn)/s/[a-zA-Z0-9_-]+(?:\?(?:%E6%8F%90%E5%8F%96%E7%A0%81|提取码)[::][a-zA-Z0-9]+)?`) +var Pan123Pattern = regexp.MustCompile(`https?://(?:www\.)?123(?:684|865|685|912|pan|592)\.(?:com|cn)/s/[a-zA-Z0-9_-]+(?:\?(?:%E6%8F%90%E5%8F%96%E7%A0%81|提取码)[::][a-zA-Z0-9]+)?`) // 添加115网盘链接正则表达式 var Pan115Pattern = regexp.MustCompile(`https?://(?:115\.com|115cdn\.com|anxia\.com)/s/[a-zA-Z0-9]+(?:\?password=[a-zA-Z0-9]{4})?(?:#)?`) // 添加阿里云盘链接正则表达式 var AliyunPanPattern = regexp.MustCompile(`https?://(?:www\.)?(?:alipan|aliyundrive)\.com/s/[a-zA-Z0-9]+`) // 提取码匹配正则表达式 - 增强提取密码的能力 -var PasswordPattern = regexp.MustCompile(`(?i)(?:(?:提取|访问|提取密|密)码|pwd)[::]\s*([a-zA-Z0-9]{4})`) -var UrlPasswordPattern = regexp.MustCompile(`(?i)[?&]pwd=([a-zA-Z0-9]{4})`) +var PasswordPattern = regexp.MustCompile(`(?i)(?:(?:提取|访问|提取密|密)码|pwd)[::]\s*([a-zA-Z0-9]{4})(?:[^a-zA-Z0-9]|$)`) +var UrlPasswordPattern = regexp.MustCompile(`(?i)[?&]pwd=([a-zA-Z0-9]{4})(?:[^a-zA-Z0-9]|$)`) // 百度网盘密码专用正则表达式 - 确保只提取4位密码 -var BaiduPasswordPattern = regexp.MustCompile(`(?i)(?:链接:.*?提取码:|密码:|提取码:|pwd=|pwd:|pwd:)([a-zA-Z0-9]{4})`) +var BaiduPasswordPattern = regexp.MustCompile(`(?i)(?:链接:.*?提取码:|密码:|提取码:|pwd=|pwd:|pwd:)([a-zA-Z0-9]{4})(?:[^a-zA-Z0-9]|$)`) // GetLinkType 获取链接类型 func GetLinkType(url string) string { @@ -83,7 +83,7 @@ func GetLinkType(url string) string { } // 123网盘有多个域名 - if strings.Contains(url, "123684.com") || strings.Contains(url, "123685.com") || + if strings.Contains(url, "123684.com") || strings.Contains(url, "123685.com") || strings.Contains(url, "123865.com") || strings.Contains(url, "123912.com") || strings.Contains(url, "123pan.com") || strings.Contains(url, "123pan.cn") || strings.Contains(url, "123592.com") { return "123" @@ -115,22 +115,20 @@ func CleanBaiduPanURL(url string) string { // 如果找到了结束标记,截取到结束标记位置 if minEndIdx < len(url) { - // 特殊处理pwd参数 - if strings.Contains(url[:minEndIdx], "?pwd=") { - pwdIdx := strings.Index(url, "?pwd=") - pwdEndIdx := pwdIdx + 10 // ?pwd=xxxx 总共9个字符,加上问号前的位置 - if pwdEndIdx < len(url) { - return url[:pwdEndIdx] - } - } - return url[:minEndIdx] + url = url[:minEndIdx] } - // 如果没有找到结束标记,但URL包含?pwd=,确保只保留4位密码 + // 特殊处理pwd参数,确保只保留4位密码 if strings.Contains(url, "?pwd=") { pwdIdx := strings.Index(url, "?pwd=") - if pwdIdx > 0 && pwdIdx+9 <= len(url) { // ?pwd=xxxx 总共9个字符 - return url[:pwdIdx+9] + if pwdIdx >= 0 && len(url) > pwdIdx+5 { // ?pwd= 有5个字符 + // 只保留?pwd=后面的4位密码 + pwdEndIdx := pwdIdx + 9 // ?pwd=xxxx 总共9个字符 + if pwdEndIdx <= len(url) { + return url[:pwdEndIdx] + } + // 如果剩余字符不足4位,返回所有可用字符 + return url } } } @@ -218,7 +216,7 @@ func CleanUCPanURL(url string) string { // Clean123PanURL 清理123网盘URL,确保链接格式正确 func Clean123PanURL(url string) string { // 检查是否为123网盘链接 - domains := []string{"123684.com", "123685.com", "123912.com", "123pan.com", "123pan.cn", "123592.com"} + domains := []string{"123684.com", "123685.com","123865.com", "123912.com", "123pan.com", "123pan.cn", "123592.com"} isDomain123 := false for _, domain := range domains { @@ -416,13 +414,18 @@ func ExtractPassword(content, url string) string { } } + // 特殊处理迅雷网盘URL中的pwd参数 + if strings.Contains(url, "pan.xunlei.com") && strings.Contains(url, "?pwd=") { + pwdPattern := regexp.MustCompile(`\?pwd=([a-zA-Z0-9]{4})`) + pwdMatches := pwdPattern.FindStringSubmatch(url) + if len(pwdMatches) > 1 { + return pwdMatches[1] + } + } + // 先从URL中提取密码 matches := UrlPasswordPattern.FindStringSubmatch(url) if len(matches) > 1 { - // 确保百度网盘密码只有4位 - if strings.Contains(strings.ToLower(url), "pan.baidu.com") && len(matches[1]) > 4 { - return matches[1][:4] - } return matches[1] } @@ -443,6 +446,7 @@ func ExtractPassword(content, url string) string { // 特殊处理123网盘URL中的提取码 if (strings.Contains(url, "123684.com") || strings.Contains(url, "123685.com") || + strings.Contains(url, "123865.com") || strings.Contains(url, "123912.com") || strings.Contains(url, "123pan.com") || strings.Contains(url, "123pan.cn") || @@ -460,6 +464,7 @@ func ExtractPassword(content, url string) string { // 检查123网盘URL中的提取码参数 if (strings.Contains(url, "123684.com") || strings.Contains(url, "123685.com") || + strings.Contains(url, "123865.com") || strings.Contains(url, "123912.com") || strings.Contains(url, "123pan.com") || strings.Contains(url, "123pan.cn") || @@ -766,6 +771,7 @@ func ExtractNetDiskLinks(text string) []string { strings.Contains(cleanURL, "drive.uc.cn") || strings.Contains(cleanURL, "123684.com") || strings.Contains(cleanURL, "123685.com") || + strings.Contains(cleanURL, "123865.com") || strings.Contains(cleanURL, "123912.com") || strings.Contains(cleanURL, "123pan.com") || strings.Contains(cleanURL, "123pan.cn") ||