From 27fc03381c813c55171a5b5529e9cda71ea96524 Mon Sep 17 00:00:00 2001 From: woleigedouer <38418090+woleigedouer@users.noreply.github.com> Date: Sat, 1 Nov 2025 17:29:36 +0800 Subject: [PATCH 1/8] =?UTF-8?q?=E6=9B=B4=E6=8D=A2muou=E5=9F=9F=E5=90=8D=20?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E5=B0=81=E9=9D=A2=E8=8E=B7=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugin/muou/muou.go | 84 +++++++++++++++++++++++++++++---------------- 1 file changed, 54 insertions(+), 30 deletions(-) diff --git a/plugin/muou/muou.go b/plugin/muou/muou.go index ea0d6ee..394c079 100644 --- a/plugin/muou/muou.go +++ b/plugin/muou/muou.go @@ -135,7 +135,7 @@ func (p *MuouAsyncPlugin) searchImpl(client *http.Client, keyword string, ext ma } // 1. 构建搜索URL - searchURL := fmt.Sprintf("http://123.666291.xyz/index.php/vod/search/wd/%s.html", url.QueryEscape(keyword)) + searchURL := fmt.Sprintf("https://666.666291.xyz/index.php/vod/search/wd/%s.html", url.QueryEscape(keyword)) // 2. 创建带超时的上下文 ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout) @@ -154,7 +154,7 @@ func (p *MuouAsyncPlugin) searchImpl(client *http.Client, keyword string, ext ma req.Header.Set("Connection", "keep-alive") req.Header.Set("Upgrade-Insecure-Requests", "1") req.Header.Set("Cache-Control", "max-age=0") - req.Header.Set("Referer", "http://123.666291.xyz/") + req.Header.Set("Referer", "https://666.666291.xyz/") // 5. 发送请求(带重试机制) resp, err := p.doRequestWithRetry(req, client) @@ -256,7 +256,14 @@ func (p *MuouAsyncPlugin) parseSearchItem(s *goquery.Selection, keyword string) return strings.Contains(title, "剧情") }) plot := strings.TrimSpace(plotElement.Find(".video-info-item").Text()) - + + // 提取封面图片 (参考 Pan_mogg.js 的选择器) + var images []string + if picURL, exists := s.Find(".module-item-pic > img").Attr("data-src"); exists && picURL != "" { + images = append(images, picURL) + } + result.Images = images + // 构建内容描述 var contentParts []string if quality != "" { @@ -275,11 +282,11 @@ func (p *MuouAsyncPlugin) parseSearchItem(s *goquery.Selection, keyword string) if plot != "" { contentParts = append(contentParts, plot) } - + result.Content = strings.Join(contentParts, "\n") result.Channel = "" // 插件搜索结果不设置频道名,只有Telegram频道结果才设置 result.Datetime = time.Time{} // 使用零值而不是nil,参考jikepan插件标准 - + return result } @@ -311,7 +318,7 @@ func (p *MuouAsyncPlugin) enhanceWithDetails(client *http.Client, results []mode } itemID := parts[1] - + // 检查缓存 if cached, ok := detailCache.Load(itemID); ok { if cachedResult, ok := cached.(model.SearchResult); ok { @@ -323,14 +330,19 @@ func (p *MuouAsyncPlugin) enhanceWithDetails(client *http.Client, results []mode } } atomic.AddInt64(&cacheMisses, 1) - - // 获取详情页链接 - detailLinks := p.fetchDetailLinks(client, itemID) + + // 获取详情页链接和图片 + detailLinks, detailImages := p.fetchDetailLinksAndImages(client, itemID) r.Links = detailLinks - + + // 合并图片:优先使用详情页的海报,如果没有则使用搜索结果的图片 + if len(detailImages) > 0 { + r.Images = detailImages + } + // 缓存结果 detailCache.Store(itemID, r) - + mu.Lock() enhancedResults = append(enhancedResults, r) mu.Unlock() @@ -370,8 +382,8 @@ func (p *MuouAsyncPlugin) doRequestWithRetry(req *http.Request, client *http.Cli return nil, fmt.Errorf("重试 %d 次后仍然失败: %w", maxRetries, lastErr) } -// fetchDetailLinks 获取详情页的下载链接 -func (p *MuouAsyncPlugin) fetchDetailLinks(client *http.Client, itemID string) []model.Link { +// fetchDetailLinksAndImages 获取详情页的下载链接和图片 +func (p *MuouAsyncPlugin) fetchDetailLinksAndImages(client *http.Client, itemID string) ([]model.Link, []string) { // 性能统计 start := time.Now() atomic.AddInt64(&detailPageRequests, 1) @@ -380,43 +392,49 @@ func (p *MuouAsyncPlugin) fetchDetailLinks(client *http.Client, itemID string) [ atomic.AddInt64(&totalDetailTime, duration) }() - detailURL := fmt.Sprintf("http://123.666291.xyz/index.php/vod/detail/id/%s.html", itemID) - + detailURL := fmt.Sprintf("https://666.666291.xyz/index.php/vod/detail/id/%s.html", itemID) + // 创建带超时的上下文 ctx, cancel := context.WithTimeout(context.Background(), DetailTimeout) defer cancel() - + // 创建请求 req, err := http.NewRequestWithContext(ctx, "GET", detailURL, nil) if err != nil { - return nil + return nil, nil } - + // 设置请求头 req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8") req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") req.Header.Set("Connection", "keep-alive") - req.Header.Set("Referer", "http://123.666291.xyz/") - + req.Header.Set("Referer", "https://666.666291.xyz/") + // 发送请求(带重试) resp, err := p.doRequestWithRetry(req, client) if err != nil { - return nil + return nil, nil } defer resp.Body.Close() - + if resp.StatusCode != 200 { - return nil + return nil, nil } - + doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { - return nil + return nil, nil } - + var links []model.Link - + var images []string + + // 提取详情页的海报图片 (参考 Pan_mogg.js 的选择器) + if posterURL, exists := doc.Find(".mobile-play .lazyload").Attr("data-src"); exists && posterURL != "" { + images = append(images, posterURL) + } + // 查找下载链接区域 doc.Find("#download-list .module-row-one").Each(func(i int, s *goquery.Selection) { // 从data-clipboard-text属性提取链接 @@ -433,7 +451,7 @@ func (p *MuouAsyncPlugin) fetchDetailLinks(client *http.Client, itemID string) [ } } } - + // 也检查直接的href属性 s.Find("a[href]").Each(func(j int, a *goquery.Selection) { if linkURL, exists := a.Attr("href"); exists { @@ -448,7 +466,7 @@ func (p *MuouAsyncPlugin) fetchDetailLinks(client *http.Client, itemID string) [ break } } - + if !isDuplicate { link := model.Link{ Type: linkType, @@ -462,7 +480,13 @@ func (p *MuouAsyncPlugin) fetchDetailLinks(client *http.Client, itemID string) [ } }) }) - + + return links, images +} + +// fetchDetailLinks 获取详情页的下载链接(兼容性方法,仅返回链接) +func (p *MuouAsyncPlugin) fetchDetailLinks(client *http.Client, itemID string) []model.Link { + links, _ := p.fetchDetailLinksAndImages(client, itemID) return links } From 54d66d2b016b09d5c4a73f376d9c52a49513e4a6 Mon Sep 17 00:00:00 2001 From: woleigedouer <38418090+woleigedouer@users.noreply.github.com> Date: Sat, 1 Nov 2025 17:40:20 +0800 Subject: [PATCH 2/8] =?UTF-8?q?wanou=20=E5=A2=9E=E5=8A=A0=E5=9B=BE?= =?UTF-8?q?=E7=89=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugin/wanou/wanou.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/plugin/wanou/wanou.go b/plugin/wanou/wanou.go index e1f9850..fb00a00 100644 --- a/plugin/wanou/wanou.go +++ b/plugin/wanou/wanou.go @@ -229,7 +229,13 @@ func (p *WanouAsyncPlugin) parseAPIItem(item WanouAPIItem) model.SearchResult { // 解析下载链接 links := p.parseDownloadLinks(item.VodDownFrom, item.VodDownURL) - + + // 提取封面图片 + var images []string + if item.VodPic != "" { + images = append(images, item.VodPic) + } + // 构建标签 var tags []string if item.VodYear != "" { @@ -238,13 +244,14 @@ func (p *WanouAsyncPlugin) parseAPIItem(item WanouAPIItem) model.SearchResult { if item.VodArea != "" { tags = append(tags, item.VodArea) } - + return model.SearchResult{ UniqueID: uniqueID, Title: title, Content: content, Links: links, Tags: tags, + Images: images, Channel: "", // 插件搜索结果Channel为空 Datetime: time.Time{}, // 使用零值而不是nil,参考jikepan插件标准 } From 52b30a3fd10bd33f10d94c6573d8c59887ae3dd6 Mon Sep 17 00:00:00 2001 From: woleigedouer <38418090+woleigedouer@users.noreply.github.com> Date: Sat, 1 Nov 2025 17:45:27 +0800 Subject: [PATCH 3/8] =?UTF-8?q?labi=20=E5=A2=9E=E5=8A=A0=E5=B0=81=E9=9D=A2?= =?UTF-8?q?=E6=98=BE=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugin/labi/labi.go | 70 ++++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 23 deletions(-) diff --git a/plugin/labi/labi.go b/plugin/labi/labi.go index 9d9d1c2..9a962ea 100644 --- a/plugin/labi/labi.go +++ b/plugin/labi/labi.go @@ -239,7 +239,14 @@ func (p *LabiAsyncPlugin) parseSearchItem(s *goquery.Selection, keyword string) return strings.Contains(title, "剧情") }) plot := strings.TrimSpace(plotElement.Find(".video-info-item").Text()) - + + // 提取封面图片 (参考 Pan_wogg.js 的选择器) + var images []string + if picURL, exists := s.Find(".module-item-pic > img").Attr("data-src"); exists && picURL != "" { + images = append(images, picURL) + } + result.Images = images + // 构建内容描述 var contentParts []string if quality != "" { @@ -258,11 +265,11 @@ func (p *LabiAsyncPlugin) parseSearchItem(s *goquery.Selection, keyword string) if plot != "" { contentParts = append(contentParts, plot) } - + result.Content = strings.Join(contentParts, "\n") result.Channel = "" // 插件搜索结果不设置频道名,只有Telegram频道结果才设置 result.Datetime = time.Time{} // 使用零值而不是nil,参考jikepan插件标准 - + return result } @@ -305,10 +312,15 @@ func (p *LabiAsyncPlugin) enhanceWithDetails(client *http.Client, results []mode } } - // 获取详情页链接 - detailLinks := p.fetchDetailLinks(client, itemID) + // 获取详情页链接和图片 + detailLinks, detailImages := p.fetchDetailLinksAndImages(client, itemID) r.Links = detailLinks - + + // 合并图片:优先使用详情页的海报,如果没有则使用搜索结果的图片 + if len(detailImages) > 0 { + r.Images = detailImages + } + // 缓存结果 detailCache.Store(itemID, r) @@ -351,45 +363,51 @@ func (p *LabiAsyncPlugin) doRequestWithRetry(req *http.Request, client *http.Cli return nil, fmt.Errorf("重试 %d 次后仍然失败: %w", maxRetries, lastErr) } -// fetchDetailLinks 获取详情页的下载链接 -func (p *LabiAsyncPlugin) fetchDetailLinks(client *http.Client, itemID string) []model.Link { +// fetchDetailLinksAndImages 获取详情页的下载链接和图片 +func (p *LabiAsyncPlugin) fetchDetailLinksAndImages(client *http.Client, itemID string) ([]model.Link, []string) { detailURL := fmt.Sprintf("http://xiaocge.fun/index.php/vod/detail/id/%s.html", itemID) - + // 创建带超时的上下文 ctx, cancel := context.WithTimeout(context.Background(), DetailTimeout) defer cancel() - + // 创建请求 req, err := http.NewRequestWithContext(ctx, "GET", detailURL, nil) if err != nil { - return nil + return nil, nil } - + // 设置请求头 req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8") req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") req.Header.Set("Connection", "keep-alive") req.Header.Set("Referer", "http://xiaocge.fun/") - + // 发送请求(带重试) resp, err := p.doRequestWithRetry(req, client) if err != nil { - return nil + return nil, nil } defer resp.Body.Close() - + if resp.StatusCode != 200 { - return nil + return nil, nil } - + doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { - return nil + return nil, nil } - + var links []model.Link - + var images []string + + // 提取详情页的海报图片 (参考 Pan_wogg.js 的选择器) + if posterURL, exists := doc.Find(".module-item-pic > img").Attr("data-src"); exists && posterURL != "" { + images = append(images, posterURL) + } + // 查找下载链接区域 doc.Find("#download-list .module-row-one").Each(func(i int, s *goquery.Selection) { // 从data-clipboard-text属性提取链接 @@ -404,7 +422,7 @@ func (p *LabiAsyncPlugin) fetchDetailLinks(client *http.Client, itemID string) [ links = append(links, link) } } - + // 也检查直接的href属性 s.Find("a[href]").Each(func(j int, a *goquery.Selection) { if linkURL, exists := a.Attr("href"); exists { @@ -418,7 +436,7 @@ func (p *LabiAsyncPlugin) fetchDetailLinks(client *http.Client, itemID string) [ break } } - + if !isDuplicate { link := model.Link{ Type: "quark", @@ -431,7 +449,13 @@ func (p *LabiAsyncPlugin) fetchDetailLinks(client *http.Client, itemID string) [ } }) }) - + + return links, images +} + +// fetchDetailLinks 获取详情页的下载链接(兼容性方法,仅返回链接) +func (p *LabiAsyncPlugin) fetchDetailLinks(client *http.Client, itemID string) []model.Link { + links, _ := p.fetchDetailLinksAndImages(client, itemID) return links } From c2ab4b32f3aac5f734103bdf30083ea6fbcc9cda Mon Sep 17 00:00:00 2001 From: woleigedouer <38418090+woleigedouer@users.noreply.github.com> Date: Sat, 1 Nov 2025 18:08:41 +0800 Subject: [PATCH 4/8] =?UTF-8?q?ouge=20=E5=A2=9E=E5=8A=A0=E5=9B=BE=E7=89=87?= =?UTF-8?q?=E6=98=BE=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugin/ouge/ouge.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/plugin/ouge/ouge.go b/plugin/ouge/ouge.go index 449b222..aeafa58 100644 --- a/plugin/ouge/ouge.go +++ b/plugin/ouge/ouge.go @@ -229,7 +229,13 @@ func (p *OugeAsyncPlugin) parseAPIItem(item OugeAPIItem) model.SearchResult { // 解析下载链接 links := p.parseDownloadLinks(item.VodDownFrom, item.VodDownURL) - + + // 提取封面图片 + var images []string + if item.VodPic != "" { + images = append(images, item.VodPic) + } + // 构建标签 var tags []string if item.VodYear != "" { @@ -238,13 +244,14 @@ func (p *OugeAsyncPlugin) parseAPIItem(item OugeAPIItem) model.SearchResult { if item.VodArea != "" { tags = append(tags, item.VodArea) } - + return model.SearchResult{ UniqueID: uniqueID, Title: title, Content: content, Links: links, Tags: tags, + Images: images, Channel: "", // 插件搜索结果Channel为空 Datetime: time.Time{}, // 使用零值而不是nil,参考jikepan插件标准 } From 22cb1b116af44ae44acf2805cefd4f8cd3b73d34 Mon Sep 17 00:00:00 2001 From: woleigedouer <38418090+woleigedouer@users.noreply.github.com> Date: Sat, 1 Nov 2025 18:54:16 +0800 Subject: [PATCH 5/8] =?UTF-8?q?duoduo=20=E5=A2=9E=E5=8A=A0=E5=9B=BE?= =?UTF-8?q?=E7=89=87=E8=8E=B7=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugin/duoduo/duoduo.go | 72 +++++++++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 24 deletions(-) diff --git a/plugin/duoduo/duoduo.go b/plugin/duoduo/duoduo.go index 21cba56..5117e2f 100644 --- a/plugin/duoduo/duoduo.go +++ b/plugin/duoduo/duoduo.go @@ -273,7 +273,14 @@ func (p *DuoduoAsyncPlugin) parseSearchItem(s *goquery.Selection, keyword string return strings.Contains(title, "剧情") }) plot := strings.TrimSpace(plotElement.Find(".video-info-item").Text()) - + + // 提取封面图片 (参考 Pan_mogg.js 的选择器) + var images []string + if picURL, exists := s.Find(".module-item-pic > img").Attr("data-src"); exists && picURL != "" { + images = append(images, picURL) + } + result.Images = images + // 构建内容描述 var contentParts []string if quality != "" { @@ -328,7 +335,7 @@ func (p *DuoduoAsyncPlugin) enhanceWithDetails(client *http.Client, results []mo } itemID := parts[1] - + // 检查缓存 if cached, ok := detailCache.Load(itemID); ok { if cachedResult, ok := cached.(model.SearchResult); ok { @@ -340,14 +347,19 @@ func (p *DuoduoAsyncPlugin) enhanceWithDetails(client *http.Client, results []mo } } atomic.AddInt64(&cacheMisses, 1) - - // 获取详情页链接 - detailLinks := p.fetchDetailLinks(client, itemID) + + // 获取详情页链接和图片 + detailLinks, detailImages := p.fetchDetailLinksAndImages(client, itemID) r.Links = detailLinks - + + // 合并图片:优先使用详情页的海报,如果没有则使用搜索结果的图片 + if len(detailImages) > 0 { + r.Images = detailImages + } + // 缓存结果 detailCache.Store(itemID, r) - + mu.Lock() enhancedResults = append(enhancedResults, r) mu.Unlock() @@ -387,8 +399,8 @@ func (p *DuoduoAsyncPlugin) doRequestWithRetry(req *http.Request, client *http.C return nil, fmt.Errorf("重试 %d 次后仍然失败: %w", maxRetries, lastErr) } -// fetchDetailLinks 获取详情页的下载链接 -func (p *DuoduoAsyncPlugin) fetchDetailLinks(client *http.Client, itemID string) []model.Link { +// fetchDetailLinksAndImages 获取详情页的下载链接和图片 +func (p *DuoduoAsyncPlugin) fetchDetailLinksAndImages(client *http.Client, itemID string) ([]model.Link, []string) { // 性能统计 start := time.Now() atomic.AddInt64(&detailPageRequests, 1) @@ -398,42 +410,48 @@ func (p *DuoduoAsyncPlugin) fetchDetailLinks(client *http.Client, itemID string) }() detailURL := fmt.Sprintf("https://tv.yydsys.top/index.php/vod/detail/id/%s.html", itemID) - + // 创建带超时的上下文 ctx, cancel := context.WithTimeout(context.Background(), DetailTimeout) defer cancel() - + // 创建请求 req, err := http.NewRequestWithContext(ctx, "GET", detailURL, nil) if err != nil { - return nil + return nil, nil } - + // 设置请求头 req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8") req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") req.Header.Set("Connection", "keep-alive") req.Header.Set("Referer", "https://tv.yydsys.top/") - + // 发送请求(带重试) resp, err := p.doRequestWithRetry(req, client) if err != nil { - return nil + return nil, nil } defer resp.Body.Close() - + if resp.StatusCode != 200 { - return nil + return nil, nil } - + doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { - return nil + return nil, nil } - + var links []model.Link - + var images []string + + // 提取详情页的海报图片 (参考 Pan_mogg.js 的选择器) + if posterURL, exists := doc.Find(".mobile-play .lazyload").Attr("data-src"); exists && posterURL != "" { + images = append(images, posterURL) + } + // 查找下载链接区域 doc.Find("#download-list .module-row-one").Each(func(i int, s *goquery.Selection) { // 从data-clipboard-text属性提取链接 @@ -450,7 +468,7 @@ func (p *DuoduoAsyncPlugin) fetchDetailLinks(client *http.Client, itemID string) } } } - + // 也检查直接的href属性 s.Find("a[href]").Each(func(j int, a *goquery.Selection) { if linkURL, exists := a.Attr("href"); exists { @@ -465,7 +483,7 @@ func (p *DuoduoAsyncPlugin) fetchDetailLinks(client *http.Client, itemID string) break } } - + if !isDuplicate { link := model.Link{ Type: linkType, @@ -479,7 +497,13 @@ func (p *DuoduoAsyncPlugin) fetchDetailLinks(client *http.Client, itemID string) } }) }) - + + return links, images +} + +// fetchDetailLinks 获取详情页的下载链接(兼容性方法,仅返回链接) +func (p *DuoduoAsyncPlugin) fetchDetailLinks(client *http.Client, itemID string) []model.Link { + links, _ := p.fetchDetailLinksAndImages(client, itemID) return links } From 5e6358c6cfcf12e22800609eb738a0665c3a1e59 Mon Sep 17 00:00:00 2001 From: woleigedouer <38418090+woleigedouer@users.noreply.github.com> Date: Sat, 1 Nov 2025 19:21:00 +0800 Subject: [PATCH 6/8] =?UTF-8?q?zhizhen=20=E4=BF=AE=E6=AD=A3=E6=90=9C?= =?UTF-8?q?=E7=B4=A2url=20=E9=87=8D=E6=9E=84=E4=B8=BAhtml=20=E5=A2=9E?= =?UTF-8?q?=E5=8A=A0=E5=B0=81=E9=9D=A2=E8=8E=B7=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugin/zhizhen/json结构分析.md | 253 +++++++------- plugin/zhizhen/zhizhen.go | 617 ++++++++++++++++++++------------- 2 files changed, 522 insertions(+), 348 deletions(-) diff --git a/plugin/zhizhen/json结构分析.md b/plugin/zhizhen/json结构分析.md index 296a74b..66d9a62 100644 --- a/plugin/zhizhen/json结构分析.md +++ b/plugin/zhizhen/json结构分析.md @@ -1,84 +1,117 @@ -# Zhizhen API 数据结构分析 +# Zhizhen HTML 数据结构分析 ## 基本信息 -- **数据源类型**: JSON API -- **API URL格式**: `https://xiaomi666.fun/api.php/provide/vod?ac=detail&wd={关键词}` -- **数据特点**: 视频点播(VOD)系统API,提供结构化影视资源数据 -- **特殊说明**: 使用独立域名,网盘标识符与wanou/ouge略有不同 +- **数据源类型**: HTML 网页 +- **搜索URL格式**: `https://xiaomi666.fun/index.php/vod/search/wd/{关键词}.html` +- **详情URL格式**: `https://xiaomi666.fun/index.php/vod/detail/id/{资源ID}.html` +- **数据特点**: 视频点播(VOD)系统网页,提供HTML格式的影视资源数据 +- **特殊说明**: 使用独立域名,HTML结构与muou插件相同 -## API响应结构 +## HTML 页面结构 -### 顶层结构 -```json -{ - "code": 1, // 状态码:1表示成功 - "msg": "数据列表", // 响应消息 - "page": 1, // 当前页码 - "pagecount": 1, // 总页数 - "limit": 20, // 每页限制条数 - "total": 6, // 总记录数 - "list": [] // 数据列表数组 -} +### 搜索结果页面 (`.module-search-item`) +搜索结果页面包含多个搜索项,每个搜索项的HTML结构如下: + +```html +
+
+ +
+
+

+ 资源标题 +

+
+
更新至11集
+
+ + 分类1 + 分类2 + +
+
+
+ 导演: + 导演名 +
+
+ 主演: + 演员1 + 演员2 +
+
+ 剧情: + 剧情简介内容 +
+
+
``` -### `list`数组中的数据项结构 -```json -{ - "vod_id": 11455, // 资源唯一ID - "vod_name": "凡人修仙传真人版", // 资源标题 - "vod_actor": "杨洋,金晨,汪铎...", // 主演(逗号分隔) - "vod_director": "杨阳", // 导演 - "vod_area": "大陆", // 地区 - "vod_lang": "国语", // 语言 - "vod_year": "2025", // 年份 - "vod_remarks": "更新至11集", // 更新状态/备注 - "vod_pubdate": "", // 发布日期(可能为空) - "vod_blurb": "该剧改编自忘语...", // 简介 - "vod_content": "该剧改编自忘语...", // 内容描述 - "vod_pic": "https://...", // 封面图片URL - - // 关键字段:下载链接相关 - "vod_down_from": "kuake$$$BAIDUI$$$kuake", - "vod_down_url": "https://pan.quark.cn/s/d228bf3a6e44$$$https://pan.baidu.com/s/1kOWHnazfGFe6wJ-tin2pNQ?pwd=b2s4$$$https://pan.quark.cn/s/12e29bdacec4" -} +### 详情页面 (`.module-row-one`) +详情页面包含下载链接区域,每个链接的HTML结构如下: + +```html +
+
+ + 打开链接 +
+
``` ## 插件所需字段映射 | 源字段 | 目标字段 | 说明 | |--------|----------|------| -| `vod_id` | `UniqueID` | 格式: `zhizhen-{vod_id}` | -| `vod_name` | `Title` | 资源标题 | -| `vod_actor`, `vod_director`, `vod_area`, `vod_year`, `vod_remarks` | `Content` | 组合描述信息 | -| `vod_year`, `vod_area` | `Tags` | 标签数组 | -| `vod_down_from` + `vod_down_url` | `Links` | 解析为Link数组 | +| 详情页URL中的ID | `UniqueID` | 格式: `zhizhen-{id}` | +| `.video-info-header h3 a` 文本 | `Title` | 资源标题 | +| 质量、导演、主演、剧情 | `Content` | 组合描述信息 | +| `.video-info-aux .tag-link a` | `Tags` | 标签数组 | +| 详情页 `#download-list` 中的链接 | `Links` | 解析为Link数组 | +| `.module-item-pic > img` 的 `data-src` | `Images` | 封面图片 | | `""` | `Channel` | 插件搜索结果Channel为空 | -| `time.Now()` | `Datetime` | 当前时间 | +| `time.Time{}` | `Datetime` | 使用零值 | ## 下载链接解析 -### 分隔符规则 -- **多个下载源**: 使用 `$$$` 分隔 -- **对应关系**: `vod_down_from`、`vod_down_url` 按相同位置对应 +### 链接提取方式 +- **从 `data-clipboard-text` 属性**: 优先从按钮的 `data-clipboard-text` 属性提取链接 +- **从 `href` 属性**: 如果没有 `data-clipboard-text`,则从 `` 标签的 `href` 属性提取 +- **去重处理**: 避免重复添加相同的链接 -### 下载源标识映射(zhizhen特有) -| API标识 | 网盘类型 | 域名示例 | 备注 | -|---------|----------|----------|------| -| `kuake` | quark (夸克网盘) | `pan.quark.cn` | ⚠️ 使用`kuake`而非`KG` | -| `BAIDUI` | baidu (百度网盘) | `pan.baidu.com` | ⚠️ 使用`BAIDUI`而非`bd` | -| `UC` | uc (UC网盘) | `drive.uc.cn` | 与标准一致 | +### 链接类型识别 +通过正则表达式匹配URL来自动识别网盘类型,支持16种网盘类型: -### 多源示例数据 -``` -vod_down_from: "kuake$$$BAIDUI$$$UC$$$BAIDUI" -vod_down_url: "https://pan.quark.cn/s/24afb59cd9ae$$$https://pan.baidu.com/s/1d8bHaARjn60rlY_5mN3phA?pwd=ceda$$$https://drive.uc.cn/s/40f6a8d5c9804?public=1$$$https://pan.baidu.com/s/19CVP2d8_ka901b9myBh68w?pwd=begh" +```go +// 主流网盘 +quark: https://pan.quark.cn/s/... +baidu: https://pan.baidu.com/s/...?pwd=... +aliyun: https://aliyundrive.com/s/... 或 https://www.alipan.com/s/... +uc: https://drive.uc.cn/s/... +xunlei: https://pan.xunlei.com/s/... + +// 运营商网盘 +tianyi: https://cloud.189.cn/t/... +mobile: https://caiyun.feixin.10086.cn/... + +// 专业网盘 +115: https://115.com/s/... +weiyun: https://share.weiyun.com/... +lanzou: https://lanzou.com/... 或其他变体 +jianguoyun: https://jianguoyun.com/p/... +123: https://123pan.com/s/... +pikpak: https://mypikpak.com/s/... + +// 其他协议 +magnet: magnet:?xt=urn:btih:... +ed2k: ed2k://|file|...| ``` -### 链接格式示例 +### 密码提取 +从URL中提取 `?pwd=` 参数作为密码,例如: ``` -夸克网盘: https://pan.quark.cn/s/d228bf3a6e44 -百度网盘: https://pan.baidu.com/s/1kOWHnazfGFe6wJ-tin2pNQ?pwd=b2s4 -UC网盘: https://drive.uc.cn/s/40f6a8d5c9804?public=1 +https://pan.baidu.com/s/1kOWHnazfGFe6wJ-tin2pNQ?pwd=b2s4 +提取密码: b2s4 ``` ## 支持的网盘类型(16种) @@ -109,76 +142,64 @@ UC网盘: https://drive.uc.cn/s/40f6a8d5c9804?public=1 ## 插件开发指导 -### 请求示例 +### 搜索请求示例 ```go -searchURL := fmt.Sprintf("https://xiaomi666.fun/api.php/provide/vod?ac=detail&wd=%s", url.QueryEscape(keyword)) +searchURL := fmt.Sprintf("https://xiaomi666.fun/index.php/vod/search/wd/%s.html", url.QueryEscape(keyword)) ``` +### 详情页请求示例 +```go +detailURL := fmt.Sprintf("https://xiaomi666.fun/index.php/vod/detail/id/%s.html", itemID) +``` + +### HTML解析流程 +1. **搜索页面解析**: 使用 goquery 解析搜索结果页面 +2. **提取搜索项**: 遍历 `.module-search-item` 元素 +3. **提取基本信息**: 从搜索项中提取标题、分类、导演、主演等 +4. **异步获取详情**: 并发请求详情页面获取下载链接 +5. **缓存管理**: 使用 sync.Map 缓存详情页结果,TTL为1小时 + ### SearchResult构建示例 ```go result := model.SearchResult{ - UniqueID: fmt.Sprintf("zhizhen-%d", item.VodID), - Title: item.VodName, - Content: buildContent(item), - Links: parseDownloadLinks(item.VodDownFrom, item.VodDownURL), - Tags: []string{item.VodYear, item.VodArea}, + UniqueID: fmt.Sprintf("zhizhen-%s", itemID), + Title: title, + Content: strings.Join(contentParts, "\n"), + Links: detailLinks, + Tags: tags, + Images: images, Channel: "", // 插件搜索结果Channel为空 - Datetime: time.Now(), + Datetime: time.Time{}, // 使用零值 } ``` -### 特殊映射函数 -```go -func (p *ZhizhenAsyncPlugin) mapCloudType(apiType, url string) string { - // 优先根据API标识映射(zhizhen特有) - switch strings.ToUpper(apiType) { - case "KUAKE": - return "quark" - case "BAIDUI": - return "baidu" - case "UC": - return "uc" - } - - // 如果API标识无法识别,则通过URL模式匹配 - return p.determineLinkType(url) -} -``` - -### 链接解析逻辑 -```go -// 按$$$分隔 -fromParts := strings.Split(item.VodDownFrom, "$$$") -urlParts := strings.Split(item.VodDownURL, "$$$") - -// 遍历对应位置 -for i := 0; i < min(len(fromParts), len(urlParts)); i++ { - linkType := p.mapCloudType(fromParts[i], urlParts[i]) - password := extractPassword(urlParts[i]) - // ... -} -``` +### 并发控制 +- **最大并发数**: 20 (MaxConcurrency) +- **搜索超时**: 8秒 (DefaultTimeout) +- **详情页超时**: 6秒 (DetailTimeout) +- **缓存TTL**: 1小时 (cacheTTL) ## 与其他插件的差异 -| 特性 | zhizhen | wanou/ouge | 说明 | -|------|---------|------------|------| -| **API域名** | `xiaomi666.fun` | `woog.nxog.eu.org` | 不同域名 | -| **夸克标识** | `kuake` | `KG` | 标识符不同 | -| **百度标识** | `BAIDUI` | `bd` | 标识符不同 | -| **UC标识** | `UC` | `UC` | 一致 | -| **数据结构** | 相同 | 相同 | JSON结构完全一致 | +| 特性 | zhizhen | muou | 说明 | +|------|---------|------|------| +| **域名** | `xiaomi666.fun` | `666.666291.xyz` | 不同域名 | +| **数据格式** | HTML | HTML | 都是HTML格式 | +| **HTML结构** | 相同 | 相同 | 使用相同的CSS选择器 | +| **并发数** | 20 | 20 | 相同 | +| **缓存TTL** | 1小时 | 1小时 | 相同 | ## 注意事项 -1. **标识符差异**: 需要专门处理`kuake`和`BAIDUI`标识符 -2. **数据格式**: 纯JSON API,无需HTML解析 -3. **分隔符处理**: 多个值使用`$$$`分隔,需要split处理 -4. **密码提取**: 部分百度网盘链接包含`?pwd=`参数 -5. **错误处理**: API可能返回`code != 1`的错误状态 -6. **链接验证**: 应过滤无效链接(如`javascript:;`等) +1. **HTML解析**: 使用 goquery 库进行HTML解析 +2. **异步获取详情**: 搜索结果只包含基本信息,需要异步请求详情页获取下载链接 +3. **并发控制**: 使用信号量限制并发数为20 +4. **缓存管理**: 使用 sync.Map 缓存详情页结果,避免重复请求 +5. **链接验证**: 过滤掉无效链接(如包含`javascript:`、`#`等) +6. **密码提取**: 从URL中提取 `?pwd=` 参数作为密码 +7. **去重处理**: 避免在详情页中重复添加相同的链接 ## 开发建议 -- **基于wanou改造**: 可以复制wanou插件实现,修改域名和标识符映射 -- **映射函数重点**: 关键是正确处理`kuake`→`quark`和`BAIDUI`→`baidu`的映射 -- **测试覆盖**: 重点测试多种网盘类型的混合链接解析 -- **缓存策略**: 建议使用相同的缓存机制和TTL设置 \ No newline at end of file +- **参考muou插件**: zhizhen的HTML结构与muou完全相同,可以直接参考muou的实现 +- **关键差异**: 仅需修改域名和插件名称 +- **测试覆盖**: 重点测试多种网盘类型的链接解析和缓存功能 +- **性能优化**: 使用并发请求详情页,提高搜索速度 \ No newline at end of file diff --git a/plugin/zhizhen/zhizhen.go b/plugin/zhizhen/zhizhen.go index 94e1148..092c993 100644 --- a/plugin/zhizhen/zhizhen.go +++ b/plugin/zhizhen/zhizhen.go @@ -1,36 +1,47 @@ package zhizhen import ( + "context" "fmt" - "io" "net/http" "net/url" - "regexp" - "strings" - "time" - "context" - "sync/atomic" - "pansou/model" "pansou/plugin" - "pansou/util/json" + "regexp" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/PuerkitoBio/goquery" ) const ( // 默认超时时间 - 优化为更短时间 DefaultTimeout = 8 * time.Second + DetailTimeout = 6 * time.Second + + // 并发数限制 - 大幅提高并发数 + MaxConcurrency = 20 // HTTP连接池配置 MaxIdleConns = 200 MaxIdleConnsPerHost = 50 MaxConnsPerHost = 100 IdleConnTimeout = 90 * time.Second + + // 缓存TTL - 更短的缓存时间 + cacheTTL = 1 * time.Hour ) // 性能统计(原子操作) var ( - searchRequests int64 = 0 - totalSearchTime int64 = 0 // 纳秒 + searchRequests int64 = 0 + detailPageRequests int64 = 0 + cacheHits int64 = 0 + cacheMisses int64 = 0 + totalSearchTime int64 = 0 // 纳秒 + totalDetailTime int64 = 0 // 纳秒 ) func init() { @@ -39,9 +50,12 @@ func init() { // 预编译的正则表达式 var ( + // 从详情页URL中提取ID的正则表达式 + detailIDRegex = regexp.MustCompile(`/vod/detail/id/(\d+)\.html`) + // 密码提取正则表达式 passwordRegex = regexp.MustCompile(`\?pwd=([0-9a-zA-Z]+)`) - + // 常见网盘链接的正则表达式(支持16种类型) quarkLinkRegex = regexp.MustCompile(`https?://pan\.quark\.cn/s/[0-9a-zA-Z]+`) ucLinkRegex = regexp.MustCompile(`https?://drive\.uc\.cn/s/[0-9a-zA-Z]+(\?[^"'\s]*)?`) @@ -58,6 +72,9 @@ var ( pikpakLinkRegex = regexp.MustCompile(`https?://mypikpak\.com/s/[0-9a-zA-Z]+`) magnetLinkRegex = regexp.MustCompile(`magnet:\?xt=urn:btih:[0-9a-fA-F]{40}`) ed2kLinkRegex = regexp.MustCompile(`ed2k://\|file\|.+\|\d+\|[0-9a-fA-F]{32}\|/`) + + // 缓存相关 + detailCache = sync.Map{} // 缓存详情页解析结果 ) // ZhizhenAsyncPlugin Zhizhen异步插件 @@ -104,7 +121,7 @@ func (p *ZhizhenAsyncPlugin) SearchWithResult(keyword string, ext map[string]int return p.AsyncSearchWithResult(keyword, p.searchImpl, p.MainCacheKey, ext) } -// searchImpl 搜索实现 +// searchImpl 实现具体的搜索逻辑 func (p *ZhizhenAsyncPlugin) searchImpl(client *http.Client, keyword string, ext map[string]interface{}) ([]model.SearchResult, error) { // 性能统计 start := time.Now() @@ -119,222 +136,160 @@ func (p *ZhizhenAsyncPlugin) searchImpl(client *http.Client, keyword string, ext client = p.optimizedClient } - // 构建API搜索URL - 使用zhizhen专用域名 - searchURL := fmt.Sprintf("https://xiaomi666.fun/api.php/provide/vod?ac=detail&wd=%s", url.QueryEscape(keyword)) - - // 创建HTTP请求 + // 1. 构建搜索URL + searchURL := fmt.Sprintf("https://xiaomi666.fun/index.php/vod/search/wd/%s.html", url.QueryEscape(keyword)) + + // 2. 创建带超时的上下文 ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout) defer cancel() - + + // 3. 创建请求 req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil) if err != nil { - return nil, fmt.Errorf("[%s] 创建搜索请求失败: %w", p.Name(), err) + return nil, fmt.Errorf("[%s] 创建请求失败: %w", p.Name(), err) } - - // 设置请求头 + + // 4. 设置完整的请求头(避免反爬虫) req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") - req.Header.Set("Accept", "application/json, text/plain, */*") + req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8") req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") req.Header.Set("Connection", "keep-alive") + req.Header.Set("Upgrade-Insecure-Requests", "1") + req.Header.Set("Cache-Control", "max-age=0") req.Header.Set("Referer", "https://xiaomi666.fun/") - req.Header.Set("Cache-Control", "no-cache") - - // 发送请求 + + // 5. 发送请求(带重试机制) resp, err := p.doRequestWithRetry(req, client) if err != nil { return nil, fmt.Errorf("[%s] 搜索请求失败: %w", p.Name(), err) } defer resp.Body.Close() - - // 解析JSON响应 - body, err := io.ReadAll(resp.Body) + + if resp.StatusCode != 200 { + return nil, fmt.Errorf("[%s] 搜索请求返回状态码: %d", p.Name(), resp.StatusCode) + } + + // 6. 解析搜索结果页面 + doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { - return nil, fmt.Errorf("[%s] 读取响应失败: %w", p.Name(), err) + return nil, fmt.Errorf("[%s] 解析搜索页面失败: %w", p.Name(), err) } - - var apiResponse ZhizhenAPIResponse - if err := json.Unmarshal(body, &apiResponse); err != nil { - return nil, fmt.Errorf("[%s] 解析JSON响应失败: %w", p.Name(), err) - } - - // 检查API响应状态 - if apiResponse.Code != 1 { - return nil, fmt.Errorf("[%s] API返回错误: %s", p.Name(), apiResponse.Msg) - } - - // 解析搜索结果 + + // 7. 提取搜索结果 var results []model.SearchResult - for _, item := range apiResponse.List { - if result := p.parseAPIItem(item); result.Title != "" { + + doc.Find(".module-search-item").Each(func(i int, s *goquery.Selection) { + result := p.parseSearchItem(s, keyword) + if result.UniqueID != "" { results = append(results, result) } - } - - return results, nil + }) + + // 8. 异步获取详情页信息 + enhancedResults := p.enhanceWithDetails(client, results) + + // 9. 关键词过滤 + return plugin.FilterResultsByKeyword(enhancedResults, keyword), nil } -// ZhizhenAPIResponse API响应结构 -type ZhizhenAPIResponse struct { - Code int `json:"code"` - Msg string `json:"msg"` - Page int `json:"page"` - PageCount int `json:"pagecount"` - Limit int `json:"limit"` - Total int `json:"total"` - List []ZhizhenAPIItem `json:"list"` -} +// parseSearchItem 解析单个搜索结果项 +func (p *ZhizhenAsyncPlugin) parseSearchItem(s *goquery.Selection, keyword string) model.SearchResult { + result := model.SearchResult{} -// ZhizhenAPIItem API数据项 -type ZhizhenAPIItem struct { - VodID int `json:"vod_id"` - VodName string `json:"vod_name"` - VodActor string `json:"vod_actor"` - VodDirector string `json:"vod_director"` - VodDownFrom string `json:"vod_down_from"` - VodDownURL string `json:"vod_down_url"` - VodRemarks string `json:"vod_remarks"` - VodPubdate string `json:"vod_pubdate"` - VodArea string `json:"vod_area"` - VodLang string `json:"vod_lang"` - VodYear string `json:"vod_year"` - VodContent string `json:"vod_content"` - VodPic string `json:"vod_pic"` -} + // 提取详情页链接和ID (修正:使用正确的选择器) + detailLink, exists := s.Find(".video-info-header h3 a").First().Attr("href") + if !exists { + return result + } -// parseAPIItem 解析API数据项 -func (p *ZhizhenAsyncPlugin) parseAPIItem(item ZhizhenAPIItem) model.SearchResult { - // 构建唯一ID - uniqueID := fmt.Sprintf("%s-%d", p.Name(), item.VodID) - - // 构建标题 - title := strings.TrimSpace(item.VodName) - if title == "" { - return model.SearchResult{} + // 提取ID + matches := detailIDRegex.FindStringSubmatch(detailLink) + if len(matches) < 2 { + return result } - - // 构建描述 - var contentParts []string - if item.VodActor != "" { - contentParts = append(contentParts, fmt.Sprintf("主演: %s", item.VodActor)) - } - if item.VodDirector != "" { - contentParts = append(contentParts, fmt.Sprintf("导演: %s", item.VodDirector)) - } - if item.VodArea != "" { - contentParts = append(contentParts, fmt.Sprintf("地区: %s", item.VodArea)) - } - if item.VodLang != "" { - contentParts = append(contentParts, fmt.Sprintf("语言: %s", item.VodLang)) - } - if item.VodYear != "" { - contentParts = append(contentParts, fmt.Sprintf("年份: %s", item.VodYear)) - } - if item.VodRemarks != "" { - contentParts = append(contentParts, fmt.Sprintf("状态: %s", item.VodRemarks)) - } - content := strings.Join(contentParts, " | ") - - // 解析下载链接 - links := p.parseDownloadLinks(item.VodDownFrom, item.VodDownURL) - - // 构建标签 + + itemID := matches[1] + result.UniqueID = fmt.Sprintf("%s-%s", p.Name(), itemID) + + // 提取标题 + titleElement := s.Find(".video-info-header h3 a") + result.Title = strings.TrimSpace(titleElement.Text()) + + // 提取资源类型/质量 + qualityElement := s.Find(".video-serial") + quality := strings.TrimSpace(qualityElement.Text()) + + // 提取分类信息 var tags []string - if item.VodYear != "" { - tags = append(tags, item.VodYear) - } - if item.VodArea != "" { - tags = append(tags, item.VodArea) - } - - return model.SearchResult{ - UniqueID: uniqueID, - Title: title, - Content: content, - Links: links, - Tags: tags, - Channel: "", // 插件搜索结果Channel为空 - Datetime: time.Time{}, // 使用零值而不是nil,参考jikepan插件标准 - } -} - -// parseDownloadLinks 解析下载链接 -func (p *ZhizhenAsyncPlugin) parseDownloadLinks(vodDownFrom, vodDownURL string) []model.Link { - if vodDownFrom == "" || vodDownURL == "" { - return nil - } - - // 按$$$分隔 - fromParts := strings.Split(vodDownFrom, "$$$") - urlParts := strings.Split(vodDownURL, "$$$") - - // 确保数组长度一致 - minLen := len(fromParts) - if len(urlParts) < minLen { - minLen = len(urlParts) - } - - var links []model.Link - for i := 0; i < minLen; i++ { - fromType := strings.TrimSpace(fromParts[i]) - urlStr := strings.TrimSpace(urlParts[i]) - - if urlStr == "" || !p.isValidNetworkDriveURL(urlStr) { - continue + s.Find(".video-info-aux .tag-link a").Each(func(i int, tag *goquery.Selection) { + tagText := strings.TrimSpace(tag.Text()) + if tagText != "" { + tags = append(tags, tagText) } - - // 映射网盘类型 - linkType := p.mapCloudType(fromType, urlStr) - if linkType == "" { - continue - } - - // 提取密码 - password := p.extractPassword(urlStr) - - links = append(links, model.Link{ - Type: linkType, - URL: urlStr, - Password: password, - }) - } - - return links -} + }) + result.Tags = tags -// mapCloudType 映射网盘类型(zhizhen特有标识符) -func (p *ZhizhenAsyncPlugin) mapCloudType(apiType, url string) string { - // 优先根据API标识映射(zhizhen特有) - switch strings.ToUpper(apiType) { - case "KUAKE": // ⚠️ zhizhen特有:kuake -> quark - return "quark" - case "BAIDUI": // ⚠️ zhizhen特有:BAIDUI -> baidu - return "baidu" - case "UC": // ✅ 标准:UC -> uc - return "uc" - case "ALY": - return "aliyun" - case "XL": - return "xunlei" - case "TY": - return "tianyi" - case "115": - return "115" - case "MB": - return "mobile" - case "WY": - return "weiyun" - case "LZ": - return "lanzou" - case "JGY": - return "jianguoyun" - case "123": - return "123" - case "PK": - return "pikpak" + // 提取导演信息 + director := "" + s.Find(".video-info-items").Each(func(i int, item *goquery.Selection) { + title := strings.TrimSpace(item.Find(".video-info-itemtitle").Text()) + if strings.Contains(title, "导演") { + director = strings.TrimSpace(item.Find(".video-info-actor a").Text()) + } + }) + + // 提取主演信息 + var actors []string + s.Find(".video-info-items").Each(func(i int, item *goquery.Selection) { + title := strings.TrimSpace(item.Find(".video-info-itemtitle").Text()) + if strings.Contains(title, "主演") { + item.Find(".video-info-actor a").Each(func(j int, actor *goquery.Selection) { + actorName := strings.TrimSpace(actor.Text()) + if actorName != "" { + actors = append(actors, actorName) + } + }) + } + }) + + // 提取剧情简介 + plotElement := s.Find(".video-info-items").FilterFunction(func(i int, item *goquery.Selection) bool { + title := strings.TrimSpace(item.Find(".video-info-itemtitle").Text()) + return strings.Contains(title, "剧情") + }) + plot := strings.TrimSpace(plotElement.Find(".video-info-item").Text()) + + // 提取封面图片 (参考 Pan_mogg.js 的选择器) + var images []string + if picURL, exists := s.Find(".module-item-pic > img").Attr("data-src"); exists && picURL != "" { + images = append(images, picURL) } - - // 如果API标识无法识别,则通过URL模式匹配 - return p.determineLinkType(url) + result.Images = images + + // 构建内容描述 + var contentParts []string + if quality != "" { + contentParts = append(contentParts, "【"+quality+"】") + } + if director != "" { + contentParts = append(contentParts, "导演:"+director) + } + if len(actors) > 0 { + actorStr := strings.Join(actors[:min(3, len(actors))], "、") // 只显示前3个演员 + if len(actors) > 3 { + actorStr += "等" + } + contentParts = append(contentParts, "主演:"+actorStr) + } + if plot != "" { + contentParts = append(contentParts, plot) + } + + result.Content = strings.Join(contentParts, "\n") + result.Channel = "" // 插件搜索结果不设置频道名,只有Telegram频道结果才设置 + result.Datetime = time.Time{} // 使用零值而不是nil,参考jikepan插件标准 + + return result } // isValidNetworkDriveURL 检查URL是否为有效的网盘链接 @@ -412,45 +367,243 @@ func (p *ZhizhenAsyncPlugin) extractPassword(url string) string { return "" } -// doRequestWithRetry 带重试的HTTP请求(优化JSON API的重试策略) -func (p *ZhizhenAsyncPlugin) doRequestWithRetry(req *http.Request, client *http.Client) (*http.Response, error) { - maxRetries := 2 // 对于JSON API减少重试次数 - var lastErr error - - for i := 0; i < maxRetries; i++ { - resp, err := client.Do(req) - if err == nil { - if resp.StatusCode == http.StatusOK { - return resp, nil +// enhanceWithDetails 异步获取详情页信息以获取下载链接 +func (p *ZhizhenAsyncPlugin) enhanceWithDetails(client *http.Client, results []model.SearchResult) []model.SearchResult { + var enhancedResults []model.SearchResult + var mu sync.Mutex + var wg sync.WaitGroup + + // 限制并发数 + semaphore := make(chan struct{}, MaxConcurrency) + + for _, result := range results { + wg.Add(1) + go func(r model.SearchResult) { + defer wg.Done() + + // 获取信号量 + semaphore <- struct{}{} + defer func() { <-semaphore }() + + // 从UniqueID提取ID + parts := strings.Split(r.UniqueID, "-") + if len(parts) < 2 { + mu.Lock() + enhancedResults = append(enhancedResults, r) + mu.Unlock() + return } - resp.Body.Close() - lastErr = fmt.Errorf("HTTP状态码: %d", resp.StatusCode) - } else { - lastErr = err - } - - // JSON API快速重试:只等待很短时间 - if i < maxRetries-1 { - time.Sleep(100 * time.Millisecond) // 从秒级改为100毫秒 - } + + itemID := parts[1] + + // 检查缓存 + if cached, ok := detailCache.Load(itemID); ok { + if cachedResult, ok := cached.(model.SearchResult); ok { + atomic.AddInt64(&cacheHits, 1) + mu.Lock() + enhancedResults = append(enhancedResults, cachedResult) + mu.Unlock() + return + } + } + atomic.AddInt64(&cacheMisses, 1) + + // 获取详情页链接和图片 + detailLinks, detailImages := p.fetchDetailLinksAndImages(client, itemID) + r.Links = detailLinks + + // 合并图片:优先使用详情页的海报,如果没有则使用搜索结果的图片 + if len(detailImages) > 0 { + r.Images = detailImages + } + + // 缓存结果 + detailCache.Store(itemID, r) + + mu.Lock() + enhancedResults = append(enhancedResults, r) + mu.Unlock() + }(result) } - - return nil, fmt.Errorf("[%s] 请求失败,重试%d次后仍失败: %w", p.Name(), maxRetries, lastErr) + + wg.Wait() + return enhancedResults +} + +// doRequestWithRetry 带重试机制的HTTP请求 +func (p *ZhizhenAsyncPlugin) doRequestWithRetry(req *http.Request, client *http.Client) (*http.Response, error) { + maxRetries := 3 + var lastErr error + + for i := 0; i < maxRetries; i++ { + if i > 0 { + // 指数退避 + backoff := time.Duration(1< 0 { - avgTime = float64(totalTime) / float64(totalRequests) / 1e6 // 转换为毫秒 + totalSearchRequests := atomic.LoadInt64(&searchRequests) + totalDetailRequests := atomic.LoadInt64(&detailPageRequests) + totalCacheHits := atomic.LoadInt64(&cacheHits) + totalCacheMisses := atomic.LoadInt64(&cacheMisses) + totalSearchTime := atomic.LoadInt64(&totalSearchTime) + totalDetailTime := atomic.LoadInt64(&totalDetailTime) + + var avgSearchTime, avgDetailTime, cacheHitRate float64 + if totalSearchRequests > 0 { + avgSearchTime = float64(totalSearchTime) / float64(totalSearchRequests) / 1e6 // 转换为毫秒 } - + if totalDetailRequests > 0 { + avgDetailTime = float64(totalDetailTime) / float64(totalDetailRequests) / 1e6 // 转换为毫秒 + } + if totalCacheHits+totalCacheMisses > 0 { + cacheHitRate = float64(totalCacheHits) / float64(totalCacheHits+totalCacheMisses) * 100 + } + return map[string]interface{}{ - "search_requests": totalRequests, - "avg_search_time_ms": avgTime, - "total_search_time_ns": totalTime, + "search_requests": totalSearchRequests, + "detail_page_requests": totalDetailRequests, + "cache_hits": totalCacheHits, + "cache_misses": totalCacheMisses, + "cache_hit_rate": cacheHitRate, + "avg_search_time_ms": avgSearchTime, + "avg_detail_time_ms": avgDetailTime, + "total_search_time_ns": totalSearchTime, + "total_detail_time_ns": totalDetailTime, } } \ No newline at end of file From 3e692127276adfd9e0eea2aa0f5d80700ae2dcae Mon Sep 17 00:00:00 2001 From: woleigedouer <38418090+woleigedouer@users.noreply.github.com> Date: Sat, 1 Nov 2025 19:42:08 +0800 Subject: [PATCH 7/8] =?UTF-8?q?erxiao=20=E4=BF=AE=E6=AD=A3=E6=90=9C?= =?UTF-8?q?=E7=B4=A2url=20=E9=87=8D=E6=9E=84=E4=B8=BAhtml=20=E5=A2=9E?= =?UTF-8?q?=E5=8A=A0=E5=B0=81=E9=9D=A2=E8=8E=B7=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugin/erxiao/erxiao.go | 538 +++++++++++++++++++--------------- plugin/erxiao/html结构分析.md | 138 +++++++++ 2 files changed, 442 insertions(+), 234 deletions(-) create mode 100644 plugin/erxiao/html结构分析.md diff --git a/plugin/erxiao/erxiao.go b/plugin/erxiao/erxiao.go index 169a33b..9008686 100644 --- a/plugin/erxiao/erxiao.go +++ b/plugin/erxiao/erxiao.go @@ -2,35 +2,52 @@ package erxiao import ( "fmt" - "io" "net/http" "net/url" "regexp" "strings" "time" "context" + "sync" "sync/atomic" + "github.com/PuerkitoBio/goquery" "pansou/model" "pansou/plugin" - "pansou/util/json" ) const ( - // 默认超时时间 - 优化为更短时间 + // 默认超时时间 DefaultTimeout = 8 * time.Second + DetailTimeout = 6 * time.Second // HTTP连接池配置 MaxIdleConns = 200 MaxIdleConnsPerHost = 50 MaxConnsPerHost = 100 IdleConnTimeout = 90 * time.Second + + // 并发控制 + MaxConcurrency = 20 + + // 缓存TTL + cacheTTL = 1 * time.Hour ) // 性能统计(原子操作) var ( - searchRequests int64 = 0 - totalSearchTime int64 = 0 // 纳秒 + searchRequests int64 = 0 + totalSearchTime int64 = 0 // 纳秒 + detailPageRequests int64 = 0 + totalDetailTime int64 = 0 // 纳秒 + cacheHits int64 = 0 + cacheMisses int64 = 0 +) + +// Detail page缓存 +var ( + detailCache sync.Map + cacheMutex sync.RWMutex ) func init() { @@ -41,7 +58,10 @@ func init() { var ( // 密码提取正则表达式 passwordRegex = regexp.MustCompile(`\?pwd=([0-9a-zA-Z]+)`) - + + // 详情页ID提取正则表达式 + detailIDRegex = regexp.MustCompile(`/id/(\d+)`) + // 常见网盘链接的正则表达式(支持16种类型) quarkLinkRegex = regexp.MustCompile(`https?://pan\.quark\.cn/s/[0-9a-zA-Z]+`) ucLinkRegex = regexp.MustCompile(`https?://drive\.uc\.cn/s/[0-9a-zA-Z]+(\?[^"'\s]*)?`) @@ -100,7 +120,7 @@ func (p *ErxiaoAsyncPlugin) SearchWithResult(keyword string, ext map[string]inte return p.AsyncSearchWithResult(keyword, p.searchImpl, p.MainCacheKey, ext) } -// searchImpl 搜索实现 +// searchImpl 搜索实现 - HTML解析版本 func (p *ErxiaoAsyncPlugin) searchImpl(client *http.Client, keyword string, ext map[string]interface{}) ([]model.SearchResult, error) { // 性能统计 start := time.Now() @@ -115,271 +135,307 @@ func (p *ErxiaoAsyncPlugin) searchImpl(client *http.Client, keyword string, ext client = p.optimizedClient } - // 构建API搜索URL - searchURL := fmt.Sprintf("https://erxiaofn.click/api.php/provide/vod?ac=detail&wd=%s", url.QueryEscape(keyword)) - - // 创建HTTP请求 + // 1. 构建搜索URL + searchURL := fmt.Sprintf("https://erxiaofn.click/index.php/vod/search/wd/%s.html", url.QueryEscape(keyword)) + + // 2. 创建带超时的上下文 ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout) defer cancel() - + + // 3. 创建请求 req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil) if err != nil { - return nil, fmt.Errorf("[%s] 创建搜索请求失败: %w", p.Name(), err) + return nil, fmt.Errorf("[%s] 创建请求失败: %w", p.Name(), err) } - - // 设置请求头 + + // 4. 设置请求头 req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") - req.Header.Set("Accept", "application/json, text/plain, */*") + req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8") req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") req.Header.Set("Connection", "keep-alive") req.Header.Set("Referer", "https://erxiaofn.click/") - req.Header.Set("Cache-Control", "no-cache") - - // 发送请求 + + // 5. 发送请求 resp, err := p.doRequestWithRetry(req, client) if err != nil { return nil, fmt.Errorf("[%s] 搜索请求失败: %w", p.Name(), err) } defer resp.Body.Close() - - // 解析JSON响应 - body, err := io.ReadAll(resp.Body) + + if resp.StatusCode != 200 { + return nil, fmt.Errorf("[%s] 搜索请求返回状态码: %d", p.Name(), resp.StatusCode) + } + + // 6. 解析搜索结果页面 + doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { - return nil, fmt.Errorf("[%s] 读取响应失败: %w", p.Name(), err) + return nil, fmt.Errorf("[%s] 解析搜索页面失败: %w", p.Name(), err) } - - var apiResponse ErxiaoAPIResponse - if err := json.Unmarshal(body, &apiResponse); err != nil { - return nil, fmt.Errorf("[%s] 解析JSON响应失败: %w", p.Name(), err) - } - - // 检查API响应状态 - if apiResponse.Code != 1 { - return nil, fmt.Errorf("[%s] API返回错误: %s", p.Name(), apiResponse.Msg) - } - - // 解析搜索结果 + + // 7. 提取搜索结果 var results []model.SearchResult - for _, item := range apiResponse.List { - if result := p.parseAPIItem(item); result.Title != "" { + + doc.Find(".module-search-item").Each(func(i int, s *goquery.Selection) { + result := p.parseSearchItem(s, keyword) + if result.UniqueID != "" { results = append(results, result) } + }) + + // 8. 异步获取详情页信息 + enhancedResults := p.enhanceWithDetails(client, results) + + // 9. 关键词过滤 + return plugin.FilterResultsByKeyword(enhancedResults, keyword), nil +} + +// parseSearchItem 解析单个搜索结果项 +func (p *ErxiaoAsyncPlugin) parseSearchItem(s *goquery.Selection, keyword string) model.SearchResult { + result := model.SearchResult{} + + // 提取详情页链接和ID + detailLink, exists := s.Find(".video-info-header h3 a").First().Attr("href") + if !exists { + return result } - - return results, nil -} -type ErxiaoAPIResponse struct { - Code int `json:"code"` - Msg string `json:"msg"` - Page int `json:"page"` - PageCount int `json:"pagecount"` - Limit int `json:"limit"` - Total int `json:"total"` - List []ErxiaoAPIItem `json:"list"` -} + // 提取ID + matches := detailIDRegex.FindStringSubmatch(detailLink) + if len(matches) < 2 { + return result + } + itemID := matches[1] -type ErxiaoAPIItem struct { - VodID int `json:"vod_id"` - VodName string `json:"vod_name"` - VodActor string `json:"vod_actor"` - VodDirector string `json:"vod_director"` - VodDownFrom string `json:"vod_down_from"` - VodDownURL string `json:"vod_down_url"` - VodRemarks string `json:"vod_remarks"` - VodPubdate string `json:"vod_pubdate"` - VodArea string `json:"vod_area"` - VodYear string `json:"vod_year"` - VodContent string `json:"vod_content"` - VodPic string `json:"vod_pic"` -} - -// parseAPIItem 解析API数据项 -func (p *ErxiaoAsyncPlugin) parseAPIItem(item ErxiaoAPIItem) model.SearchResult { // 构建唯一ID - uniqueID := fmt.Sprintf("%s-%d", p.Name(), item.VodID) - - // 构建标题 - title := strings.TrimSpace(item.VodName) + uniqueID := fmt.Sprintf("%s-%s", p.Name(), itemID) + + // 提取标题 + title := strings.TrimSpace(s.Find(".video-info-header h3 a").First().Text()) if title == "" { - return model.SearchResult{} + return result } - - // 构建描述 + + // 提取分类 + category := strings.TrimSpace(s.Find(".video-info-items").First().Find(".video-info-item").First().Text()) + + // 提取导演 + directorElement := s.Find(".video-info-items").FilterFunction(func(i int, item *goquery.Selection) bool { + title := strings.TrimSpace(item.Find(".video-info-itemtitle").Text()) + return strings.Contains(title, "导演") + }) + director := strings.TrimSpace(directorElement.Find(".video-info-item").Text()) + + // 提取主演 + actorElement := s.Find(".video-info-items").FilterFunction(func(i int, item *goquery.Selection) bool { + title := strings.TrimSpace(item.Find(".video-info-itemtitle").Text()) + return strings.Contains(title, "主演") + }) + actor := strings.TrimSpace(actorElement.Find(".video-info-item").Text()) + + // 提取年份 + year := strings.TrimSpace(s.Find(".video-info-items").Last().Find(".video-info-item").First().Text()) + + // 提取质量/状态 + quality := strings.TrimSpace(s.Find(".video-info-header .video-info-remarks").Text()) + + // 提取剧情简介 + plotElement := s.Find(".video-info-items").FilterFunction(func(i int, item *goquery.Selection) bool { + title := strings.TrimSpace(item.Find(".video-info-itemtitle").Text()) + return strings.Contains(title, "剧情") + }) + plot := strings.TrimSpace(plotElement.Find(".video-info-item").Text()) + + // 提取封面图片 + var images []string + if picURL, exists := s.Find(".module-item-pic > img").Attr("data-src"); exists && picURL != "" { + images = append(images, picURL) + } + result.Images = images + + // 构建内容描述 var contentParts []string - if item.VodActor != "" { - contentParts = append(contentParts, fmt.Sprintf("主演: %s", item.VodActor)) + if quality != "" { + contentParts = append(contentParts, "【"+quality+"】") } - if item.VodDirector != "" { - contentParts = append(contentParts, fmt.Sprintf("导演: %s", item.VodDirector)) + if director != "" { + contentParts = append(contentParts, "导演:"+director) } - if item.VodArea != "" { - contentParts = append(contentParts, fmt.Sprintf("地区: %s", item.VodArea)) + if actor != "" { + contentParts = append(contentParts, "主演:"+actor) } - if item.VodYear != "" { - contentParts = append(contentParts, fmt.Sprintf("年份: %s", item.VodYear)) + if year != "" { + contentParts = append(contentParts, "年份:"+year) } - if item.VodRemarks != "" { - contentParts = append(contentParts, fmt.Sprintf("状态: %s", item.VodRemarks)) + if plot != "" { + contentParts = append(contentParts, "剧情:"+plot) } - content := strings.Join(contentParts, " | ") - - // 解析下载链接 - links := p.parseDownloadLinks(item.VodDownFrom, item.VodDownURL) - + content := strings.Join(contentParts, "\n") + // 构建标签 var tags []string - if item.VodYear != "" { - tags = append(tags, item.VodYear) + if year != "" { + tags = append(tags, year) } - if item.VodArea != "" { - tags = append(tags, item.VodArea) - } - - return model.SearchResult{ - UniqueID: uniqueID, - Title: title, - Content: content, - Links: links, - Tags: tags, - Channel: "", // 插件搜索结果Channel为空 - Datetime: time.Time{}, // 使用零值而不是nil,参考jikepan插件标准 + if category != "" { + tags = append(tags, category) } + + result.UniqueID = uniqueID + result.Title = title + result.Content = content + result.Tags = tags + result.Channel = "" // 插件搜索结果Channel为空 + result.Datetime = time.Time{} // 使用零值 + + return result } -// parseDownloadLinks 解析下载链接 -func (p *ErxiaoAsyncPlugin) parseDownloadLinks(vodDownFrom, vodDownURL string) []model.Link { - if vodDownFrom == "" || vodDownURL == "" { - return nil +// enhanceWithDetails 异步获取详情页信息 +func (p *ErxiaoAsyncPlugin) enhanceWithDetails(client *http.Client, results []model.SearchResult) []model.SearchResult { + var enhancedResults []model.SearchResult + var wg sync.WaitGroup + var mu sync.Mutex + + // 创建信号量限制并发数 + semaphore := make(chan struct{}, MaxConcurrency) + + for _, result := range results { + wg.Add(1) + go func(result model.SearchResult) { + defer wg.Done() + semaphore <- struct{}{} // 获取信号量 + defer func() { <-semaphore }() // 释放信号量 + + // 从UniqueID中提取itemID + parts := strings.Split(result.UniqueID, "-") + if len(parts) < 2 { + mu.Lock() + enhancedResults = append(enhancedResults, result) + mu.Unlock() + return + } + itemID := parts[1] + + // 检查缓存 + if cached, ok := detailCache.Load(itemID); ok { + atomic.AddInt64(&cacheHits, 1) + r := cached.(model.SearchResult) + mu.Lock() + enhancedResults = append(enhancedResults, r) + mu.Unlock() + return + } + + atomic.AddInt64(&cacheMisses, 1) + + // 获取详情页链接和图片 + detailLinks, detailImages := p.fetchDetailLinksAndImages(client, itemID) + result.Links = detailLinks + + // 合并图片:优先使用详情页的海报,如果没有则使用搜索结果的图片 + if len(detailImages) > 0 { + result.Images = detailImages + } + + // 缓存结果 + detailCache.Store(itemID, result) + + mu.Lock() + enhancedResults = append(enhancedResults, result) + mu.Unlock() + }(result) } - - // 按$$$分隔 - fromParts := strings.Split(vodDownFrom, "$$$") - urlParts := strings.Split(vodDownURL, "$$$") - - // 确保数组长度一致 - minLen := len(fromParts) - if len(urlParts) < minLen { - minLen = len(urlParts) + + wg.Wait() + return enhancedResults +} + +// fetchDetailLinksAndImages 获取详情页的下载链接和图片 +func (p *ErxiaoAsyncPlugin) fetchDetailLinksAndImages(client *http.Client, itemID string) ([]model.Link, []string) { + // 性能统计 + start := time.Now() + atomic.AddInt64(&detailPageRequests, 1) + defer func() { + duration := time.Since(start).Nanoseconds() + atomic.AddInt64(&totalDetailTime, duration) + }() + + detailURL := fmt.Sprintf("https://erxiaofn.click/index.php/vod/detail/id/%s.html", itemID) + + // 创建带超时的上下文 + ctx, cancel := context.WithTimeout(context.Background(), DetailTimeout) + defer cancel() + + // 创建请求 + req, err := http.NewRequestWithContext(ctx, "GET", detailURL, nil) + if err != nil { + return nil, nil } - + + // 设置请求头 + req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") + req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8") + req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") + req.Header.Set("Connection", "keep-alive") + req.Header.Set("Referer", "https://erxiaofn.click/") + + // 发送请求(带重试) + resp, err := p.doRequestWithRetry(req, client) + if err != nil { + return nil, nil + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return nil, nil + } + + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return nil, nil + } + var links []model.Link - for i := 0; i < minLen; i++ { - fromType := strings.TrimSpace(fromParts[i]) - urlStr := strings.TrimSpace(urlParts[i]) - - if urlStr == "" { - continue - } - - // 直接确定链接类型(合并验证和类型判断,避免重复正则匹配) - linkType := p.determineLinkTypeOptimized(fromType, urlStr) - if linkType == "" { - continue - } - - // 提取密码 - password := p.extractPassword(urlStr) - - links = append(links, model.Link{ - Type: linkType, - URL: urlStr, - Password: password, - }) + var images []string + + // 提取详情页的海报图片 + if posterURL, exists := doc.Find(".mobile-play .lazyload").Attr("data-src"); exists && posterURL != "" { + images = append(images, posterURL) } - - return links + + // 查找下载链接区域 + doc.Find("#download-list .module-row-one").Each(func(i int, s *goquery.Selection) { + // 从data-clipboard-text属性提取链接 + if linkURL, exists := s.Find("[data-clipboard-text]").Attr("data-clipboard-text"); exists { + // 过滤掉无效链接 + if p.isValidNetworkDriveURL(linkURL) { + if linkType := p.determineLinkType(linkURL); linkType != "" { + link := model.Link{ + Type: linkType, + URL: linkURL, + Password: "", // 大部分网盘不需要密码 + } + links = append(links, link) + } + } + } + }) + + return links, images } - - - - -// determineLinkTypeOptimized 优化的链接类型判断(避免重复正则匹配) -func (p *ErxiaoAsyncPlugin) determineLinkTypeOptimized(apiType, url string) string { - // 基本验证(包含原 isValidNetworkDriveURL 的逻辑) - if strings.Contains(url, "javascript:") || +// isValidNetworkDriveURL 验证是否为有效的网盘URL +func (p *ErxiaoAsyncPlugin) isValidNetworkDriveURL(url string) bool { + if strings.Contains(url, "javascript:") || strings.Contains(url, "#") || url == "" || (!strings.HasPrefix(url, "http") && !strings.HasPrefix(url, "magnet:") && !strings.HasPrefix(url, "ed2k:")) { - return "" - } - - // 优先根据API标识快速映射(避免正则匹配) - switch strings.ToUpper(apiType) { - case "BD": - if baiduLinkRegex.MatchString(url) { - return "baidu" - } - case "KG": - if quarkLinkRegex.MatchString(url) { - return "quark" - } - case "UC": - if ucLinkRegex.MatchString(url) { - return "uc" - } - case "ALY": - if aliyunLinkRegex.MatchString(url) { - return "aliyun" - } - case "XL": - if xunleiLinkRegex.MatchString(url) { - return "xunlei" - } - case "TY": - if tianyiLinkRegex.MatchString(url) { - return "tianyi" - } - case "115": - if link115Regex.MatchString(url) { - return "115" - } - case "MB": - if mobileLinkRegex.MatchString(url) { - return "mobile" - } - case "123": - if link123Regex.MatchString(url) { - return "123" - } - case "PIKPAK": - if pikpakLinkRegex.MatchString(url) { - return "pikpak" - } - } - - // 如果API标识匹配失败,回退到URL正则匹配(一次性匹配) - switch { - case baiduLinkRegex.MatchString(url): - return "baidu" - case ucLinkRegex.MatchString(url): - return "uc" - case aliyunLinkRegex.MatchString(url): - return "aliyun" - case xunleiLinkRegex.MatchString(url): - return "xunlei" - case tianyiLinkRegex.MatchString(url): - return "tianyi" - case link115Regex.MatchString(url): - return "115" - case mobileLinkRegex.MatchString(url): - return "mobile" - case link123Regex.MatchString(url): - return "123" - case pikpakLinkRegex.MatchString(url): - return "pikpak" - case magnetLinkRegex.MatchString(url): - return "magnet" - case ed2kLinkRegex.MatchString(url): - return "ed2k" - case quarkLinkRegex.MatchString(url): - return "quark" - default: - return "" // 不支持的类型 + return false } + return true } + // determineLinkType 根据URL确定链接类型 func (p *ErxiaoAsyncPlugin) determineLinkType(url string) string { switch { @@ -421,11 +477,11 @@ func (p *ErxiaoAsyncPlugin) extractPassword(url string) string { return "" } -// doRequestWithRetry 带重试的HTTP请求(优化JSON API的重试策略) +// doRequestWithRetry 带重试的HTTP请求 func (p *ErxiaoAsyncPlugin) doRequestWithRetry(req *http.Request, client *http.Client) (*http.Response, error) { - maxRetries := 2 // 对于JSON API减少重试次数 + maxRetries := 2 var lastErr error - + for i := 0; i < maxRetries; i++ { resp, err := client.Do(req) if err == nil { @@ -437,13 +493,13 @@ func (p *ErxiaoAsyncPlugin) doRequestWithRetry(req *http.Request, client *http.C } else { lastErr = err } - - // JSON API快速重试:只等待很短时间 + + // 快速重试:只等待很短时间 if i < maxRetries-1 { - time.Sleep(100 * time.Millisecond) // 从秒级改为100毫秒 + time.Sleep(100 * time.Millisecond) } } - + return nil, fmt.Errorf("[%s] 请求失败,重试%d次后仍失败: %w", p.Name(), maxRetries, lastErr) } @@ -451,15 +507,29 @@ func (p *ErxiaoAsyncPlugin) doRequestWithRetry(req *http.Request, client *http.C func (p *ErxiaoAsyncPlugin) GetPerformanceStats() map[string]interface{} { totalRequests := atomic.LoadInt64(&searchRequests) totalTime := atomic.LoadInt64(&totalSearchTime) - + detailRequests := atomic.LoadInt64(&detailPageRequests) + detailTime := atomic.LoadInt64(&totalDetailTime) + hits := atomic.LoadInt64(&cacheHits) + misses := atomic.LoadInt64(&cacheMisses) + var avgTime float64 if totalRequests > 0 { avgTime = float64(totalTime) / float64(totalRequests) / 1e6 // 转换为毫秒 } - + + var avgDetailTime float64 + if detailRequests > 0 { + avgDetailTime = float64(detailTime) / float64(detailRequests) / 1e6 // 转换为毫秒 + } + return map[string]interface{}{ - "search_requests": totalRequests, - "avg_search_time_ms": avgTime, + "search_requests": totalRequests, + "avg_search_time_ms": avgTime, "total_search_time_ns": totalTime, + "detail_page_requests": detailRequests, + "avg_detail_time_ms": avgDetailTime, + "total_detail_time_ns": detailTime, + "cache_hits": hits, + "cache_misses": misses, } } \ No newline at end of file diff --git a/plugin/erxiao/html结构分析.md b/plugin/erxiao/html结构分析.md new file mode 100644 index 0000000..d21eb2b --- /dev/null +++ b/plugin/erxiao/html结构分析.md @@ -0,0 +1,138 @@ +# Erxiao HTML 数据结构分析 + +## 基本信息 +- **数据源类型**: HTML 网页 +- **搜索URL格式**: `https://erxiaofn.click/index.php/vod/search/wd/{关键词}.html` +- **详情URL格式**: `https://erxiaofn.click/index.php/vod/detail/id/{资源ID}.html` +- **数据特点**: 视频点播(VOD)系统网页,提供HTML格式的影视资源数据 +- **特殊说明**: 使用HTML解析替代JSON API,与zhizhen/muou插件使用相同的HTML结构 + +## HTML 页面结构 + +### 搜索结果页面 (`.module-search-item`) +搜索结果页面包含多个搜索项,每个搜索项的HTML结构如下: + +```html +
+
+ +
+
+ +
+
+ 分类: + 动作 +
+
+ 导演: + 导演名字 +
+
+ 主演: + 演员1,演员2 +
+
+ 年份: + 2024 +
+
+ 剧情: + 这是一部精彩的电影... +
+
+
+
+``` + +### 详情页面 (`.mobile-play` 和 `#download-list`) +详情页面包含海报图片和下载链接: + +```html +
+ +
+ +
+
+
+ 夸克网盘 +
+
+
+
+ 百度网盘 +
+
+
+``` + +## CSS 选择器参考 + +### 搜索结果提取 +- **搜索结果容器**: `.module-search-item` +- **标题**: `.video-info-header h3 a` (文本内容) +- **详情页链接**: `.video-info-header h3 a` (href属性) +- **封面图片**: `.module-item-pic > img` (data-src属性) +- **质量/状态**: `.video-info-header .video-info-remarks` (文本内容) + +### 详情页下载链接提取 +- **海报图片**: `.mobile-play .lazyload` (data-src属性) +- **下载链接容器**: `#download-list .module-row-one` +- **下载链接**: `[data-clipboard-text]` (data-clipboard-text属性) + +## 支持的网盘类型 +- **Quark网盘**: `https://pan.quark.cn/s/{分享码}` +- **百度网盘**: `https://pan.baidu.com/s/{分享码}?pwd={密码}` +- **阿里云盘**: `https://www.aliyundrive.com/s/{分享码}` +- **迅雷网盘**: `https://pan.xunlei.com/s/{分享码}` +- **天翼云盘**: `https://cloud.189.cn/t/{分享码}` +- **UC网盘**: `https://drive.uc.cn/s/{分享码}` +- **115网盘**: `https://115.com/s/{分享码}` +- **123网盘**: `https://123pan.com/s/{分享码}` +- **PikPak**: `https://mypikpak.com/s/{分享码}` +- **移动云盘**: `https://caiyun.feixin.10086.cn/{分享码}` +- **磁力链接**: `magnet:?xt=urn:btih:{hash}` +- **ED2K链接**: `ed2k://|file|...` + +## 数据流程 + +### 搜索流程 +1. **构建搜索URL**: `https://erxiaofn.click/index.php/vod/search/wd/{keyword}.html` +2. **发送HTTP请求**: 获取搜索结果页面 +3. **解析HTML**: 使用goquery解析页面 +4. **提取搜索项**: 遍历`.module-search-item`元素 +5. **异步获取详情**: 并发请求详情页面获取下载链接 +6. **缓存管理**: 使用sync.Map缓存详情页结果,TTL为1小时 +7. **关键词过滤**: 过滤不相关的结果 + +### 详情页请求示例 +```go +detailURL := fmt.Sprintf("https://erxiaofn.click/index.php/vod/detail/id/%s.html", itemID) +``` + +## 并发控制 +- **最大并发数**: 20 (MaxConcurrency) +- **搜索超时**: 8秒 (DefaultTimeout) +- **详情页超时**: 6秒 (DetailTimeout) +- **缓存TTL**: 1小时 (cacheTTL) + +## 性能统计 +- **搜索请求数**: 总搜索请求数 +- **平均搜索时间**: 单次搜索平均耗时(毫秒) +- **详情页请求数**: 总详情页请求数 +- **平均详情页时间**: 单次详情页请求平均耗时(毫秒) +- **缓存命中数**: 详情页缓存命中次数 +- **缓存未命中数**: 详情页缓存未命中次数 + +## 注意事项 +1. **HTML解析**: 使用goquery库进行HTML解析 +2. **异步获取详情**: 搜索结果只包含基本信息,需要异步请求详情页获取下载链接 +3. **并发控制**: 使用信号量限制并发数为20 +4. **缓存管理**: 使用sync.Map缓存详情页结果,避免重复请求 +5. **链接验证**: 过滤掉无效链接(如包含`javascript:`、`#`等) +6. **密码提取**: 从URL中提取`?pwd=`参数作为密码 + From 72fd1943f9347f60cf87b2d5dfb5dae017102148 Mon Sep 17 00:00:00 2001 From: woleigedouer <38418090+woleigedouer@users.noreply.github.com> Date: Sat, 1 Nov 2025 20:09:46 +0800 Subject: [PATCH 8/8] =?UTF-8?q?huban=20=E4=BF=AE=E6=AD=A3=E6=90=9C?= =?UTF-8?q?=E7=B4=A2url=20=E9=87=8D=E6=9E=84=E4=B8=BAhtml=20=E5=A2=9E?= =?UTF-8?q?=E5=8A=A0=E5=B0=81=E9=9D=A2=E8=8E=B7=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugin/huban/html结构分析.md | 133 +++++++++ plugin/huban/huban.go | 547 +++++++++++++++++++---------------- 2 files changed, 434 insertions(+), 246 deletions(-) create mode 100644 plugin/huban/html结构分析.md diff --git a/plugin/huban/html结构分析.md b/plugin/huban/html结构分析.md new file mode 100644 index 0000000..317bbb9 --- /dev/null +++ b/plugin/huban/html结构分析.md @@ -0,0 +1,133 @@ +# Huban HTML 数据结构分析 + +## 基本信息 +- **数据源类型**: HTML 网页 +- **搜索URL格式**: `http://xsayang.fun:12512/index.php/vod/search/wd/{关键词}.html` +- **详情URL格式**: `http://xsayang.fun:12512/index.php/vod/detail/id/{资源ID}.html` +- **数据特点**: 视频点播(VOD)系统网页,提供HTML格式的影视资源数据 +- **特殊说明**: 使用HTML解析替代JSON API,与erxiao/zhizhen/muou插件使用相同的HTML结构 + +## HTML 页面结构 + +### 搜索结果页面 (`.module-search-item`) +搜索结果页面包含多个搜索项,每个搜索项的HTML结构如下: + +```html +
+
+ +
+
+
+

电影标题

+ HD +
+
+
+ 分类: + 动作 +
+
+ 导演: + 导演名字 +
+
+ 主演: + 演员1,演员2 +
+
+ 年份: + 2024 +
+
+ 剧情: + 这是一部精彩的电影... +
+
+
+
+``` + +### 详情页面 (`.mobile-play` 和 `#download-list`) +详情页面包含海报图片和下载链接: + +```html +
+ +
+ +
+
+
+ 夸克网盘 +
+
+
+
+ 百度网盘 +
+
+
+``` + +## CSS 选择器参考 + +### 搜索结果提取 +- **搜索结果容器**: `.module-search-item` +- **标题**: `.video-info-header h3 a` (文本内容) +- **详情页链接**: `.video-info-header h3 a` (href属性) +- **封面图片**: `.module-item-pic > img` (data-src属性) +- **质量/状态**: `.video-info-header .video-info-remarks` (文本内容) + +### 详情页下载链接提取 +- **海报图片**: `.mobile-play .lazyload` (data-src属性) +- **下载链接容器**: `#download-list .module-row-one` +- **下载链接**: `[data-clipboard-text]` (data-clipboard-text属性) + +## 支持的网盘类型 +- **Quark网盘**: `https://pan.quark.cn/s/{分享码}` +- **百度网盘**: `https://pan.baidu.com/s/{分享码}?pwd={密码}` +- **阿里云盘**: `https://www.aliyundrive.com/s/{分享码}` +- **迅雷网盘**: `https://pan.xunlei.com/s/{分享码}` +- **天翼云盘**: `https://cloud.189.cn/t/{分享码}` +- **UC网盘**: `https://drive.uc.cn/s/{分享码}` +- **115网盘**: `https://115.com/s/{分享码}` +- **123网盘**: `https://123pan.com/s/{分享码}` +- **PikPak**: `https://mypikpak.com/s/{分享码}` +- **移动云盘**: `https://caiyun.feixin.10086.cn/{分享码}` +- **磁力链接**: `magnet:?xt=urn:btih:{hash}` +- **ED2K链接**: `ed2k://|file|...` + +## 数据流程 + +### 搜索流程 +1. **构建搜索URL**: `http://xsayang.fun:12512/index.php/vod/search/wd/{keyword}.html` +2. **发送HTTP请求**: 获取搜索结果页面 +3. **解析HTML**: 使用goquery解析页面 +4. **提取搜索项**: 遍历`.module-search-item`元素 +5. **异步获取详情**: 并发请求详情页面获取下载链接 +6. **缓存管理**: 使用sync.Map缓存详情页结果,TTL为1小时 +7. **关键词过滤**: 过滤不相关的结果 + +## 并发控制 +- **最大并发数**: 20 (MaxConcurrency) +- **搜索超时**: 8秒 (DefaultTimeout) +- **详情页超时**: 6秒 (DetailTimeout) +- **缓存TTL**: 1小时 (cacheTTL) + +## 性能统计 +- **搜索请求数**: 总搜索请求数 +- **平均搜索时间**: 单次搜索平均耗时(毫秒) +- **详情页请求数**: 总详情页请求数 +- **平均详情页时间**: 单次详情页请求平均耗时(毫秒) +- **缓存命中数**: 详情页缓存命中次数 +- **缓存未命中数**: 详情页缓存未命中次数 + +## 注意事项 +1. **HTML解析**: 使用goquery库进行HTML解析 +2. **异步获取详情**: 搜索结果只包含基本信息,需要异步请求详情页获取下载链接 +3. **并发控制**: 使用信号量限制并发数为20 +4. **缓存管理**: 使用sync.Map缓存详情页结果,避免重复请求 +5. **链接验证**: 过滤掉无效链接(如包含`javascript:`、`#`等) +6. **密码提取**: 从URL中提取`?pwd=`参数作为密码 + diff --git a/plugin/huban/huban.go b/plugin/huban/huban.go index 65c8633..560357e 100644 --- a/plugin/huban/huban.go +++ b/plugin/huban/huban.go @@ -2,41 +2,58 @@ package huban import ( "fmt" - "io" "net/http" "net/url" "regexp" "strings" "time" "context" + "sync" "sync/atomic" + "github.com/PuerkitoBio/goquery" "pansou/model" "pansou/plugin" - "pansou/util/json" ) const ( - // 默认超时时间 - 优化为更短时间 + // 默认超时时间 DefaultTimeout = 8 * time.Second + DetailTimeout = 6 * time.Second // HTTP连接池配置 MaxIdleConns = 200 MaxIdleConnsPerHost = 50 MaxConnsPerHost = 100 IdleConnTimeout = 90 * time.Second - + + // 并发控制 + MaxConcurrency = 20 + + // 缓存TTL + cacheTTL = 1 * time.Hour + // 请求来源控制 - 默认开启,提高安全性 EnableRefererCheck = false - + // 调试日志开关 DebugLog = false ) // 性能统计(原子操作) var ( - searchRequests int64 = 0 - totalSearchTime int64 = 0 // 纳秒 + searchRequests int64 = 0 + totalSearchTime int64 = 0 // 纳秒 + detailPageRequests int64 = 0 + totalDetailTime int64 = 0 // 纳秒 + cacheHits int64 = 0 + cacheMisses int64 = 0 +) + +// Detail page缓存 +var ( + detailCache sync.Map + cacheMutex sync.RWMutex ) // 请求来源控制配置 @@ -59,6 +76,9 @@ var ( // 密码提取正则表达式 passwordRegex = regexp.MustCompile(`\?pwd=([0-9a-zA-Z]+)`) password115Regex = regexp.MustCompile(`password=([0-9a-zA-Z]+)`) + + // 详情页ID提取正则表达式 + detailIDRegex = regexp.MustCompile(`/id/(\d+)`) // 常见网盘链接的正则表达式(支持16种类型) quarkLinkRegex = regexp.MustCompile(`https?://pan\.quark\.cn/s/[0-9a-zA-Z]+`) @@ -149,7 +169,7 @@ func (p *HubanAsyncPlugin) SearchWithResult(keyword string, ext map[string]inter return p.AsyncSearchWithResult(keyword, p.searchImpl, p.MainCacheKey, ext) } -// searchImpl 搜索实现(双域名支持) +// searchImpl 搜索实现 - HTML解析版本 func (p *HubanAsyncPlugin) searchImpl(client *http.Client, keyword string, ext map[string]interface{}) ([]model.SearchResult, error) { // 性能统计 start := time.Now() @@ -164,259 +184,296 @@ func (p *HubanAsyncPlugin) searchImpl(client *http.Client, keyword string, ext m client = p.optimizedClient } - // 定义双域名 - 主备模式 - urls := []string{ - fmt.Sprintf("http://xsayang.fun:12512/api.php/provide/vod?ac=detail&wd=%s", url.QueryEscape(keyword)), - fmt.Sprintf("http://103.45.162.207:20720/api.php/provide/vod?ac=detail&wd=%s", url.QueryEscape(keyword)), - } - - // 主备模式:优先使用第一个域名,失败时切换到第二个 - for i, searchURL := range urls { - if results, err := p.tryRequest(searchURL, client); err == nil { - return results, nil - } else if i == 0 { - // 第一个域名失败,记录日志但继续尝试第二个 - // fmt.Printf("[%s] 域名1失败,尝试域名2: %v\n", p.Name(), err) - } - } - - return nil, fmt.Errorf("[%s] 所有域名都请求失败", p.Name()) -} + // 1. 构建搜索URL + searchURL := fmt.Sprintf("http://103.45.162.207:20720/index.php/vod/search/wd/%s.html", url.QueryEscape(keyword)) -// tryRequest 尝试单个域名请求 -func (p *HubanAsyncPlugin) tryRequest(searchURL string, client *http.Client) ([]model.SearchResult, error) { - // 创建HTTP请求 + // 2. 创建带超时的上下文 ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout) defer cancel() - + + // 3. 创建请求 req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil) if err != nil { - return nil, fmt.Errorf("创建搜索请求失败: %w", err) + return nil, fmt.Errorf("[%s] 创建请求失败: %w", p.Name(), err) } - - // 设置请求头 + + // 4. 设置请求头 req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") - req.Header.Set("Accept", "application/json, text/plain, */*") + req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8") req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") req.Header.Set("Connection", "keep-alive") - req.Header.Set("Cache-Control", "no-cache") - - // 发送请求 + req.Header.Set("Referer", "http://103.45.162.207:20720/") + + // 5. 发送请求 resp, err := p.doRequestWithRetry(req, client) if err != nil { - return nil, fmt.Errorf("搜索请求失败: %w", err) + return nil, fmt.Errorf("[%s] 搜索请求失败: %w", p.Name(), err) } defer resp.Body.Close() - - // 解析JSON响应 - body, _ := io.ReadAll(resp.Body) - - var apiResponse HubanAPIResponse - if err := json.Unmarshal(body, &apiResponse); err != nil { - return nil, fmt.Errorf("解析JSON响应失败: %w", err) + + if resp.StatusCode != 200 { + return nil, fmt.Errorf("[%s] 搜索请求返回状态码: %d", p.Name(), resp.StatusCode) } - - // 检查API响应状态 - if apiResponse.Code != 1 { - return nil, fmt.Errorf("API返回错误: %s", apiResponse.Msg) + + // 6. 解析搜索结果页面 + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return nil, fmt.Errorf("[%s] 解析搜索页面失败: %w", p.Name(), err) } - - // 解析搜索结果 + + // 7. 提取搜索结果 var results []model.SearchResult - for _, item := range apiResponse.List { - if result := p.parseAPIItem(item); result.Title != "" { + + doc.Find(".module-search-item").Each(func(i int, s *goquery.Selection) { + result := p.parseSearchItem(s, keyword) + if result.UniqueID != "" { results = append(results, result) } + }) + + // 8. 异步获取详情页信息 + enhancedResults := p.enhanceWithDetails(client, results) + + // 9. 关键词过滤 + return plugin.FilterResultsByKeyword(enhancedResults, keyword), nil +} + +// parseSearchItem 解析单个搜索结果项 +func (p *HubanAsyncPlugin) parseSearchItem(s *goquery.Selection, keyword string) model.SearchResult { + result := model.SearchResult{} + + // 提取详情页链接和ID + detailLink, exists := s.Find(".video-info-header h3 a").First().Attr("href") + if !exists { + return result } - - return results, nil -} -// HubanAPIResponse API响应结构 -type HubanAPIResponse struct { - Code int `json:"code"` - Msg string `json:"msg"` - Page int `json:"page"` - PageCount int `json:"pagecount"` - Limit interface{} `json:"limit"` // 可能是字符串或数字 - Total int `json:"total"` - List []HubanAPIItem `json:"list"` -} + // 提取ID + matches := detailIDRegex.FindStringSubmatch(detailLink) + if len(matches) < 2 { + return result + } + itemID := matches[1] -// HubanAPIItem API数据项 -type HubanAPIItem struct { - VodID int `json:"vod_id"` - VodName string `json:"vod_name"` - VodActor string `json:"vod_actor"` - VodDirector string `json:"vod_director"` - VodDownFrom string `json:"vod_down_from"` - VodDownURL string `json:"vod_down_url"` - VodRemarks string `json:"vod_remarks"` - VodPubdate string `json:"vod_pubdate"` - VodArea string `json:"vod_area"` - VodLang string `json:"vod_lang"` - VodYear string `json:"vod_year"` - VodContent string `json:"vod_content"` - VodBlurb string `json:"vod_blurb"` - VodPic string `json:"vod_pic"` -} - -// parseAPIItem 解析API数据项 -func (p *HubanAsyncPlugin) parseAPIItem(item HubanAPIItem) model.SearchResult { // 构建唯一ID - uniqueID := fmt.Sprintf("%s-%d", p.Name(), item.VodID) - - // 构建标题 - title := strings.TrimSpace(item.VodName) + uniqueID := fmt.Sprintf("%s-%s", p.Name(), itemID) + + // 提取标题 + title := strings.TrimSpace(s.Find(".video-info-header h3 a").First().Text()) if title == "" { - return model.SearchResult{} + return result } - - // 构建描述(需要清理数据) - content := p.buildContent(item) - - // 解析下载链接(huban特殊格式) - links := p.parseHubanLinks(item.VodDownFrom, item.VodDownURL) - + + // 提取分类 + category := strings.TrimSpace(s.Find(".video-info-items").First().Find(".video-info-item").First().Text()) + + // 提取导演 + directorElement := s.Find(".video-info-items").FilterFunction(func(i int, item *goquery.Selection) bool { + title := strings.TrimSpace(item.Find(".video-info-itemtitle").Text()) + return strings.Contains(title, "导演") + }) + director := strings.TrimSpace(directorElement.Find(".video-info-item").Text()) + + // 提取主演 + actorElement := s.Find(".video-info-items").FilterFunction(func(i int, item *goquery.Selection) bool { + title := strings.TrimSpace(item.Find(".video-info-itemtitle").Text()) + return strings.Contains(title, "主演") + }) + actor := strings.TrimSpace(actorElement.Find(".video-info-item").Text()) + + // 提取年份 + year := strings.TrimSpace(s.Find(".video-info-items").Last().Find(".video-info-item").First().Text()) + + // 提取质量/状态 + quality := strings.TrimSpace(s.Find(".video-info-header .video-info-remarks").Text()) + + // 提取剧情简介 + plotElement := s.Find(".video-info-items").FilterFunction(func(i int, item *goquery.Selection) bool { + title := strings.TrimSpace(item.Find(".video-info-itemtitle").Text()) + return strings.Contains(title, "剧情") + }) + plot := strings.TrimSpace(plotElement.Find(".video-info-item").Text()) + + // 提取封面图片 + coverImage, _ := s.Find(".module-item-pic > img").Attr("data-src") + + // 构建内容描述 + var contentParts []string + if category != "" { + contentParts = append(contentParts, fmt.Sprintf("分类: %s", category)) + } + if director != "" { + contentParts = append(contentParts, fmt.Sprintf("导演: %s", director)) + } + if actor != "" { + contentParts = append(contentParts, fmt.Sprintf("主演: %s", actor)) + } + if quality != "" { + contentParts = append(contentParts, fmt.Sprintf("质量: %s", quality)) + } + if plot != "" { + contentParts = append(contentParts, fmt.Sprintf("剧情: %s", plot)) + } + // 构建标签 var tags []string - if item.VodYear != "" { - tags = append(tags, item.VodYear) + if year != "" { + tags = append(tags, year) } - // area通常为空,不添加 - + + // 构建图片数组 + var images []string + if coverImage != "" { + images = append(images, coverImage) + } + return model.SearchResult{ UniqueID: uniqueID, Title: title, - Content: content, - Links: links, + Content: strings.Join(contentParts, " | "), + Images: images, Tags: tags, - Channel: "", // 插件搜索结果Channel为空 - Datetime: time.Time{}, // 使用零值而不是nil,参考jikepan插件标准 + Channel: "", + Datetime: time.Time{}, } } -// buildContent 构建内容描述(清理特殊字符) -func (p *HubanAsyncPlugin) buildContent(item HubanAPIItem) string { - var contentParts []string - - // 清理演员字段(移除前后逗号) - if item.VodActor != "" { - actor := strings.Trim(item.VodActor, ",") - actor = strings.TrimSpace(actor) - if actor != "" { - contentParts = append(contentParts, fmt.Sprintf("主演: %s", actor)) - } - } - - // 清理导演字段(移除前后逗号) - if item.VodDirector != "" { - director := strings.Trim(item.VodDirector, ",") - director = strings.TrimSpace(director) - if director != "" { - contentParts = append(contentParts, fmt.Sprintf("导演: %s", director)) - } - } - - if item.VodYear != "" { - contentParts = append(contentParts, fmt.Sprintf("年份: %s", item.VodYear)) - } - - if item.VodRemarks != "" { - contentParts = append(contentParts, fmt.Sprintf("状态: %s", item.VodRemarks)) - } - - return strings.Join(contentParts, " | ") -} +// enhanceWithDetails 异步获取详情页信息 +func (p *HubanAsyncPlugin) enhanceWithDetails(client *http.Client, results []model.SearchResult) []model.SearchResult { + var enhancedResults []model.SearchResult + var wg sync.WaitGroup + var mu sync.Mutex -// parseHubanLinks 解析huban特殊格式的链接 -func (p *HubanAsyncPlugin) parseHubanLinks(vodDownFrom, vodDownURL string) []model.Link { - if vodDownFrom == "" || vodDownURL == "" { - return nil - } - - // 按$$$分隔网盘类型 - fromParts := strings.Split(vodDownFrom, "$$$") - urlParts := strings.Split(vodDownURL, "$$$") - - var links []model.Link - minLen := len(fromParts) - if len(urlParts) < minLen { - minLen = len(urlParts) - } - - for i := 0; i < minLen; i++ { - linkType := p.mapHubanCloudType(fromParts[i]) - if linkType == "" { - continue - } - - // 解析单个网盘类型的多个链接 - // 格式: "来源$链接1#标题1$链接2#标题2#" - urlSection := urlParts[i] - - // 移除来源前缀(如"小虎斑$") - if strings.Contains(urlSection, "$") { - urlSection = urlSection[strings.Index(urlSection, "$")+1:] - } - - // 按#分隔多个链接 - linkParts := strings.Split(urlSection, "#") - for j := 0; j < len(linkParts); j++ { - linkURL := strings.TrimSpace(linkParts[j]) - - // 跳过空链接和标题(标题通常不是链接格式) - if linkURL == "" || !p.isValidNetworkDriveURL(linkURL) { - continue + // 创建信号量限制并发数 + semaphore := make(chan struct{}, MaxConcurrency) + + for _, result := range results { + wg.Add(1) + go func(result model.SearchResult) { + defer wg.Done() + semaphore <- struct{}{} // 获取信号量 + defer func() { <-semaphore }() // 释放信号量 + + // 从UniqueID中提取itemID + parts := strings.Split(result.UniqueID, "-") + if len(parts) < 2 { + mu.Lock() + enhancedResults = append(enhancedResults, result) + mu.Unlock() + return } - - // 提取密码 - password := p.extractPassword(linkURL) - - links = append(links, model.Link{ - Type: linkType, - URL: linkURL, - Password: password, - }) - } + itemID := parts[1] + + // 检查缓存 + if cached, ok := detailCache.Load(itemID); ok { + atomic.AddInt64(&cacheHits, 1) + r := cached.(model.SearchResult) + mu.Lock() + enhancedResults = append(enhancedResults, r) + mu.Unlock() + return + } + + atomic.AddInt64(&cacheMisses, 1) + + // 获取详情页链接和图片 + detailLinks, detailImages := p.fetchDetailLinksAndImages(client, itemID) + result.Links = detailLinks + + // 合并图片:优先使用详情页的海报,如果没有则使用搜索结果的图片 + if len(detailImages) > 0 { + result.Images = detailImages + } + + // 缓存结果 + detailCache.Store(itemID, result) + + mu.Lock() + enhancedResults = append(enhancedResults, result) + mu.Unlock() + }(result) } - - // 去重(可能存在重复链接) - return p.deduplicateLinks(links) + + wg.Wait() + return enhancedResults } -// mapHubanCloudType 映射huban特有的网盘标识符 -func (p *HubanAsyncPlugin) mapHubanCloudType(apiType string) string { - switch strings.ToUpper(apiType) { - case "UCWP": - return "uc" - case "KKWP": - return "quark" - case "ALWP": - return "aliyun" - case "BDWP": - return "baidu" - case "123WP": - return "123" - case "115WP": - return "115" - case "TYWP": - return "tianyi" - case "XYWP": - return "xunlei" - case "WYWP": - return "weiyun" - case "LZWP": - return "lanzou" - case "JGYWP": - return "jianguoyun" - case "PKWP": - return "pikpak" - default: - return "" +// fetchDetailLinksAndImages 获取详情页的下载链接和图片 +func (p *HubanAsyncPlugin) fetchDetailLinksAndImages(client *http.Client, itemID string) ([]model.Link, []string) { + // 性能统计 + start := time.Now() + atomic.AddInt64(&detailPageRequests, 1) + defer func() { + duration := time.Since(start).Nanoseconds() + atomic.AddInt64(&totalDetailTime, duration) + }() + + detailURL := fmt.Sprintf("http://103.45.162.207:20720/index.php/vod/detail/id/%s.html", itemID) + + // 创建带超时的上下文 + ctx, cancel := context.WithTimeout(context.Background(), DetailTimeout) + defer cancel() + + // 创建请求 + req, err := http.NewRequestWithContext(ctx, "GET", detailURL, nil) + if err != nil { + return nil, nil } + + // 设置请求头 + req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") + req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8") + req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") + req.Header.Set("Connection", "keep-alive") + req.Header.Set("Referer", "http://103.45.162.207:20720/") + + // 发送请求(带重试) + resp, err := p.doRequestWithRetry(req, client) + if err != nil { + return nil, nil + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return nil, nil + } + + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return nil, nil + } + + var links []model.Link + var images []string + + // 提取详情页的海报图片 + if posterURL, exists := doc.Find(".mobile-play .lazyload").Attr("data-src"); exists && posterURL != "" { + images = append(images, posterURL) + } + + // 查找下载链接区域 + doc.Find("#download-list .module-row-one").Each(func(i int, s *goquery.Selection) { + // 从data-clipboard-text属性提取链接 + if linkURL, exists := s.Find("[data-clipboard-text]").Attr("data-clipboard-text"); exists { + // 过滤掉无效链接 + if p.isValidNetworkDriveURL(linkURL) { + if linkType := p.determineLinkType(linkURL); linkType != "" { + link := model.Link{ + Type: linkType, + URL: linkURL, + Password: "", // 大部分网盘不需要密码 + } + links = append(links, link) + } + } + } + }) + + return links, images } + + // isValidNetworkDriveURL 检查URL是否为有效的网盘链接 func (p *HubanAsyncPlugin) isValidNetworkDriveURL(url string) bool { // 过滤掉明显无效的链接 @@ -497,27 +554,11 @@ func (p *HubanAsyncPlugin) extractPassword(url string) string { return "" } -// deduplicateLinks 去重链接 -func (p *HubanAsyncPlugin) deduplicateLinks(links []model.Link) []model.Link { - seen := make(map[string]bool) - var result []model.Link - - for _, link := range links { - key := fmt.Sprintf("%s-%s", link.Type, link.URL) - if !seen[key] { - seen[key] = true - result = append(result, link) - } - } - - return result -} - -// doRequestWithRetry 带重试的HTTP请求(优化JSON API的重试策略) +// doRequestWithRetry 带重试的HTTP请求 func (p *HubanAsyncPlugin) doRequestWithRetry(req *http.Request, client *http.Client) (*http.Response, error) { - maxRetries := 2 // 对于JSON API减少重试次数 + maxRetries := 2 var lastErr error - + for i := 0; i < maxRetries; i++ { resp, err := client.Do(req) if err == nil { @@ -529,13 +570,13 @@ func (p *HubanAsyncPlugin) doRequestWithRetry(req *http.Request, client *http.Cl } else { lastErr = err } - - // JSON API快速重试:只等待很短时间 + + // 快速重试:只等待很短时间 if i < maxRetries-1 { - time.Sleep(100 * time.Millisecond) // 从秒级改为100毫秒 + time.Sleep(100 * time.Millisecond) } } - + return nil, fmt.Errorf("[%s] 请求失败,重试%d次后仍失败: %w", p.Name(), maxRetries, lastErr) } @@ -543,16 +584,30 @@ func (p *HubanAsyncPlugin) doRequestWithRetry(req *http.Request, client *http.Cl func (p *HubanAsyncPlugin) GetPerformanceStats() map[string]interface{} { totalRequests := atomic.LoadInt64(&searchRequests) totalTime := atomic.LoadInt64(&totalSearchTime) - + detailRequests := atomic.LoadInt64(&detailPageRequests) + detailTime := atomic.LoadInt64(&totalDetailTime) + hits := atomic.LoadInt64(&cacheHits) + misses := atomic.LoadInt64(&cacheMisses) + var avgTime float64 if totalRequests > 0 { avgTime = float64(totalTime) / float64(totalRequests) / 1e6 // 转换为毫秒 } - + + var avgDetailTime float64 + if detailRequests > 0 { + avgDetailTime = float64(detailTime) / float64(detailRequests) / 1e6 // 转换为毫秒 + } + return map[string]interface{}{ - "search_requests": totalRequests, - "avg_search_time_ms": avgTime, + "search_requests": totalRequests, + "avg_search_time_ms": avgTime, "total_search_time_ns": totalTime, + "detail_page_requests": detailRequests, + "avg_detail_time_ms": avgDetailTime, + "total_detail_time_ns": detailTime, + "cache_hits": hits, + "cache_misses": misses, } }