Merge branch 'fish2018:main' into main

This commit is contained in:
YAYOI27
2025-08-26 17:07:46 +08:00
committed by GitHub
18 changed files with 4951 additions and 11 deletions

21
LICENSE Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2025 fish2018
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -390,6 +390,10 @@ GET /api/search?kw=速度与激情&channels=tgsearchers3,xxx&conc=2&refresh=true
}
```
## 📄 许可证
本项目采用 MIT 许可证。详情请见 [LICENSE](LICENSE) 文件。
## ⭐ Star 历史
[![Star History Chart](https://api.star-history.com/svg?repos=fish2018/pansou&type=Date)](https://star-history.com/#fish2018/pansou&Date)

View File

@@ -56,6 +56,13 @@ import (
_ "pansou/plugin/libvio"
_ "pansou/plugin/leijing"
_ "pansou/plugin/xb6v"
_ "pansou/plugin/xys"
_ "pansou/plugin/ddys"
_ "pansou/plugin/hdmoli"
_ "pansou/plugin/javdb"
_ "pansou/plugin/yuhuage"
_ "pansou/plugin/u3c3"
_ "pansou/plugin/clxiong"
)
// 全局缓存写入管理器

645
plugin/clxiong/clxiong.go Normal file
View File

@@ -0,0 +1,645 @@
package clxiong
import (
"fmt"
"io"
"log"
"net/http"
"net/url"
"regexp"
"strings"
"sync"
"time"
"github.com/PuerkitoBio/goquery"
"pansou/model"
"pansou/plugin"
)
const (
BaseURL = "https://www.cilixiong.org"
SearchURL = "https://www.cilixiong.org/e/search/index.php"
UserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
MaxRetries = 3
RetryDelay = 2 * time.Second
MaxResults = 30
)
// DetailPageInfo 详情页信息结构体
type DetailPageInfo struct {
MagnetLinks []model.Link
UpdateTime time.Time
Title string
FileNames []string // 所有文件的名称,与磁力链接对应
}
// ClxiongPlugin 磁力熊插件
type ClxiongPlugin struct {
*plugin.BaseAsyncPlugin
debugMode bool
}
func init() {
p := &ClxiongPlugin{
BaseAsyncPlugin: plugin.NewBaseAsyncPluginWithFilter("clxiong", 2, true),
debugMode: false, // 开启调试模式检查磁力链接提取问题
}
plugin.RegisterGlobalPlugin(p)
}
// Search 搜索接口实现
func (p *ClxiongPlugin) Search(keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
result, err := p.SearchWithResult(keyword, ext)
if err != nil {
return nil, err
}
return result.Results, nil
}
// SearchWithResult 搜索并返回详细结果
func (p *ClxiongPlugin) SearchWithResult(keyword string, ext map[string]interface{}) (*model.PluginSearchResult, error) {
if p.debugMode {
log.Printf("[CLXIONG] 开始搜索: %s", keyword)
}
// 第一步POST搜索获取searchid
searchID, err := p.getSearchID(keyword)
if err != nil {
if p.debugMode {
log.Printf("[CLXIONG] 获取searchid失败: %v", err)
}
return nil, fmt.Errorf("获取searchid失败: %v", err)
}
// 第二步GET搜索结果
results, err := p.getSearchResults(searchID, keyword)
if err != nil {
if p.debugMode {
log.Printf("[CLXIONG] 获取搜索结果失败: %v", err)
}
return nil, err
}
// 第三步:同步获取详情页磁力链接
results = p.fetchDetailLinksSync(results)
if p.debugMode {
log.Printf("[CLXIONG] 搜索完成,获得 %d 个结果", len(results))
}
// 应用关键词过滤
filteredResults := plugin.FilterResultsByKeyword(results, keyword)
return &model.PluginSearchResult{
Results: filteredResults,
IsFinal: true,
Timestamp: time.Now(),
Source: p.Name(),
Message: fmt.Sprintf("找到 %d 个结果", len(filteredResults)),
}, nil
}
// getSearchID 第一步POST搜索获取searchid
func (p *ClxiongPlugin) getSearchID(keyword string) (string, error) {
if p.debugMode {
log.Printf("[CLXIONG] 正在获取searchid...")
}
client := &http.Client{
Timeout: 30 * time.Second,
CheckRedirect: func(req *http.Request, via []*http.Request) error {
// 不自动跟随重定向,我们需要手动处理
return http.ErrUseLastResponse
},
}
// 准备POST数据
formData := url.Values{}
formData.Set("classid", "1,2") // 1=电影2=剧集
formData.Set("show", "title") // 搜索字段
formData.Set("tempid", "1") // 模板ID
formData.Set("keyboard", keyword) // 搜索关键词
req, err := http.NewRequest("POST", SearchURL, strings.NewReader(formData.Encode()))
if err != nil {
return "", err
}
req.Header.Set("User-Agent", UserAgent)
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.Header.Set("Referer", BaseURL+"/")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
var resp *http.Response
var lastErr error
// 重试机制
for i := 0; i < MaxRetries; i++ {
resp, lastErr = client.Do(req)
if lastErr == nil && (resp.StatusCode == 302 || resp.StatusCode == 301) {
break
}
if resp != nil {
resp.Body.Close()
}
if i < MaxRetries-1 {
time.Sleep(RetryDelay)
}
}
if lastErr != nil {
return "", lastErr
}
defer resp.Body.Close()
// 检查重定向响应
if resp.StatusCode != 302 && resp.StatusCode != 301 {
return "", fmt.Errorf("期望302重定向但得到状态码: %d", resp.StatusCode)
}
// 从Location头部提取searchid
location := resp.Header.Get("Location")
if location == "" {
return "", fmt.Errorf("重定向响应中没有Location头部")
}
// 解析searchid
searchID := p.extractSearchIDFromLocation(location)
if searchID == "" {
return "", fmt.Errorf("无法从Location中提取searchid: %s", location)
}
if p.debugMode {
log.Printf("[CLXIONG] 获取到searchid: %s", searchID)
}
return searchID, nil
}
// extractSearchIDFromLocation 从Location头部提取searchid
func (p *ClxiongPlugin) extractSearchIDFromLocation(location string) string {
// location格式: "result/?searchid=7549"
re := regexp.MustCompile(`searchid=(\d+)`)
matches := re.FindStringSubmatch(location)
if len(matches) > 1 {
return matches[1]
}
return ""
}
// getSearchResults 第二步GET搜索结果
func (p *ClxiongPlugin) getSearchResults(searchID, keyword string) ([]model.SearchResult, error) {
if p.debugMode {
log.Printf("[CLXIONG] 正在获取搜索结果searchid: %s", searchID)
}
// 构建结果页URL
resultURL := fmt.Sprintf("%s/e/search/result/?searchid=%s", BaseURL, searchID)
client := &http.Client{Timeout: 30 * time.Second}
req, err := http.NewRequest("GET", resultURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", UserAgent)
req.Header.Set("Referer", BaseURL+"/")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
var resp *http.Response
var lastErr error
// 重试机制
for i := 0; i < MaxRetries; i++ {
resp, lastErr = client.Do(req)
if lastErr == nil && resp.StatusCode == 200 {
break
}
if resp != nil {
resp.Body.Close()
}
if i < MaxRetries-1 {
time.Sleep(RetryDelay)
}
}
if lastErr != nil {
return nil, lastErr
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("搜索结果请求失败,状态码: %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
return p.parseSearchResults(string(body))
}
// parseSearchResults 解析搜索结果页面
func (p *ClxiongPlugin) parseSearchResults(html string) ([]model.SearchResult, error) {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
return nil, err
}
var results []model.SearchResult
// 查找搜索结果项
doc.Find(".row.row-cols-2.row-cols-lg-4 .col").Each(func(i int, s *goquery.Selection) {
if i >= MaxResults {
return // 限制结果数量
}
// 提取详情页链接
linkEl := s.Find("a[href*='/drama/'], a[href*='/movie/']")
if linkEl.Length() == 0 {
return // 跳过无链接的项
}
detailPath, exists := linkEl.Attr("href")
if !exists || detailPath == "" {
return
}
// 构建完整的详情页URL
detailURL := BaseURL + detailPath
// 提取标题
title := strings.TrimSpace(linkEl.Find("h2.h4").Text())
if title == "" {
return // 跳过无标题的项
}
// 提取评分
rating := strings.TrimSpace(s.Find(".rank").Text())
// 提取年份
year := strings.TrimSpace(s.Find(".small").Last().Text())
// 提取海报图片
poster := ""
cardImg := s.Find(".card-img")
if cardImg.Length() > 0 {
if style, exists := cardImg.Attr("style"); exists {
poster = p.extractImageFromStyle(style)
}
}
// 构建内容信息
var contentParts []string
if rating != "" {
contentParts = append(contentParts, "评分: "+rating)
}
if year != "" {
contentParts = append(contentParts, "年份: "+year)
}
if poster != "" {
contentParts = append(contentParts, "海报: "+poster)
}
// 添加详情页链接到content中供后续提取磁力链接使用
contentParts = append(contentParts, "详情页: "+detailURL)
content := strings.Join(contentParts, " | ")
// 生成唯一ID
uniqueID := p.generateUniqueID(detailPath)
result := model.SearchResult{
Title: title,
Content: content,
Channel: "", // 插件搜索结果必须为空
Tags: []string{"磁力链接", "影视"},
Datetime: time.Now(), // 搜索时间
Links: []model.Link{}, // 初始为空,后续异步获取
UniqueID: uniqueID,
}
results = append(results, result)
})
if p.debugMode {
log.Printf("[CLXIONG] 解析到 %d 个搜索结果", len(results))
}
return results, nil
}
// extractImageFromStyle 从style属性中提取背景图片URL
func (p *ClxiongPlugin) extractImageFromStyle(style string) string {
// style格式: "background-image: url('https://i.nacloud.cc/2024/12154.webp');"
re := regexp.MustCompile(`url\(['"]?([^'"]+)['"]?\)`)
matches := re.FindStringSubmatch(style)
if len(matches) > 1 {
return matches[1]
}
return ""
}
// fetchDetailLinksSync 同步获取详情页磁力链接
func (p *ClxiongPlugin) fetchDetailLinksSync(results []model.SearchResult) []model.SearchResult {
if len(results) == 0 {
return results
}
if p.debugMode {
log.Printf("[CLXIONG] 开始同步获取 %d 个详情页的磁力链接", len(results))
}
// 使用WaitGroup确保所有请求完成后再返回
var wg sync.WaitGroup
var mu sync.Mutex // 保护results切片的互斥锁
var additionalResults []model.SearchResult // 存储额外创建的搜索结果
// 限制并发数,避免过多请求
semaphore := make(chan struct{}, 5) // 最多5个并发请求
for i := range results {
wg.Add(1)
go func(index int) {
defer wg.Done()
// 获取信号量
semaphore <- struct{}{}
defer func() { <-semaphore }()
detailURL := p.extractDetailURLFromContent(results[index].Content)
if detailURL != "" {
detailInfo := p.fetchDetailPageInfo(detailURL, results[index].Title)
if detailInfo != nil && len(detailInfo.MagnetLinks) > 0 {
// 为每个磁力链接创建独立的搜索结果这样每个链接都有自己的note
baseResult := results[index]
// 第一个链接更新原结果
if len(detailInfo.FileNames) > 0 {
results[index].Title = fmt.Sprintf("%s-%s", baseResult.Title, detailInfo.FileNames[0])
}
results[index].Links = []model.Link{detailInfo.MagnetLinks[0]}
if !detailInfo.UpdateTime.IsZero() {
results[index].Datetime = detailInfo.UpdateTime
}
// 其他链接创建新的搜索结果
var newResults []model.SearchResult
for i := 1; i < len(detailInfo.MagnetLinks); i++ {
newResult := model.SearchResult{
MessageID: fmt.Sprintf("%s-%d", baseResult.MessageID, i+1),
UniqueID: fmt.Sprintf("%s-%d", baseResult.UniqueID, i+1),
Channel: baseResult.Channel,
Content: baseResult.Content,
Tags: baseResult.Tags,
Images: baseResult.Images,
Links: []model.Link{detailInfo.MagnetLinks[i]},
}
// 设置独特的标题和时间
if i < len(detailInfo.FileNames) {
newResult.Title = fmt.Sprintf("%s-%s", baseResult.Title, detailInfo.FileNames[i])
} else {
newResult.Title = baseResult.Title
}
if !detailInfo.UpdateTime.IsZero() {
newResult.Datetime = detailInfo.UpdateTime
} else {
newResult.Datetime = baseResult.Datetime
}
newResults = append(newResults, newResult)
}
// 使用锁保护切片的修改
if len(newResults) > 0 {
mu.Lock()
additionalResults = append(additionalResults, newResults...)
mu.Unlock()
}
if p.debugMode {
log.Printf("[CLXIONG] 为结果 %d 获取到 %d 个磁力链接,创建了 %d 个搜索结果", index+1, len(detailInfo.MagnetLinks), len(detailInfo.MagnetLinks))
}
}
}
}(i)
}
// 等待所有goroutine完成
wg.Wait()
// 合并额外创建的搜索结果
results = append(results, additionalResults...)
if p.debugMode {
totalLinks := 0
for _, result := range results {
totalLinks += len(result.Links)
}
log.Printf("[CLXIONG] 所有磁力链接获取完成,共获得 %d 个磁力链接,总搜索结果 %d 个", totalLinks, len(results))
}
return results
}
// extractDetailURLFromContent 从content中提取详情页URL
func (p *ClxiongPlugin) extractDetailURLFromContent(content string) string {
// 查找"详情页: URL"模式
re := regexp.MustCompile(`详情页: (https?://[^\s|]+)`)
matches := re.FindStringSubmatch(content)
if len(matches) > 1 {
return matches[1]
}
return ""
}
// fetchDetailPageInfo 获取详情页的完整信息
func (p *ClxiongPlugin) fetchDetailPageInfo(detailURL string, movieTitle string) *DetailPageInfo {
if p.debugMode {
log.Printf("[CLXIONG] 正在获取详情页信息: %s", detailURL)
}
client := &http.Client{Timeout: 20 * time.Second}
req, err := http.NewRequest("GET", detailURL, nil)
if err != nil {
if p.debugMode {
log.Printf("[CLXIONG] 创建详情页请求失败: %v", err)
}
return nil
}
req.Header.Set("User-Agent", UserAgent)
req.Header.Set("Referer", BaseURL+"/")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
resp, err := client.Do(req)
if err != nil {
if p.debugMode {
log.Printf("[CLXIONG] 详情页请求失败: %v", err)
}
return nil
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
if p.debugMode {
log.Printf("[CLXIONG] 详情页HTTP状态错误: %d", resp.StatusCode)
}
return nil
}
body, err := io.ReadAll(resp.Body)
if err != nil {
if p.debugMode {
log.Printf("[CLXIONG] 读取详情页响应失败: %v", err)
}
return nil
}
return p.parseDetailPageInfo(string(body), movieTitle)
}
// parseDetailPageInfo 从详情页HTML中解析完整信息
func (p *ClxiongPlugin) parseDetailPageInfo(html string, movieTitle string) *DetailPageInfo {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
if p.debugMode {
log.Printf("[CLXIONG] 解析详情页HTML失败: %v", err)
}
return nil
}
detailInfo := &DetailPageInfo{
Title: movieTitle,
}
// 解析更新时间
detailInfo.UpdateTime = p.parseUpdateTimeFromDetail(doc)
// 解析磁力链接
magnetLinks, fileNames := p.parseMagnetLinksFromDetailDoc(doc, movieTitle)
detailInfo.MagnetLinks = magnetLinks
detailInfo.FileNames = fileNames
if p.debugMode {
log.Printf("[CLXIONG] 详情页解析完成: 磁力链接 %d 个,更新时间: %v",
len(detailInfo.MagnetLinks), detailInfo.UpdateTime)
}
return detailInfo
}
// parseUpdateTimeFromDetail 从详情页解析更新时间
func (p *ClxiongPlugin) parseUpdateTimeFromDetail(doc *goquery.Document) time.Time {
// 查找"最后更新于2025-08-16"这样的文本
var updateTime time.Time
doc.Find(".mv_detail p").Each(func(i int, s *goquery.Selection) {
text := strings.TrimSpace(s.Text())
if strings.Contains(text, "最后更新于:") {
// 提取日期部分
dateStr := strings.Replace(text, "最后更新于:", "", 1)
dateStr = strings.TrimSpace(dateStr)
// 解析日期,支持多种格式
layouts := []string{
"2006-01-02",
"2006-1-2",
"2006/01/02",
"2006/1/2",
}
for _, layout := range layouts {
if t, err := time.Parse(layout, dateStr); err == nil {
updateTime = t
if p.debugMode {
log.Printf("[CLXIONG] 解析到更新时间: %s -> %v", dateStr, updateTime)
}
return
}
}
if p.debugMode {
log.Printf("[CLXIONG] 无法解析更新时间: %s", dateStr)
}
}
})
return updateTime
}
// parseMagnetLinksFromDetailDoc 从详情页DOM解析磁力链接
func (p *ClxiongPlugin) parseMagnetLinksFromDetailDoc(doc *goquery.Document, movieTitle string) ([]model.Link, []string) {
var links []model.Link
var fileNames []string
if p.debugMode {
// 调试:检查是否找到磁力下载区域
mvDown := doc.Find(".mv_down")
log.Printf("[CLXIONG] 找到 .mv_down 区域数量: %d", mvDown.Length())
// 调试:检查磁力链接数量
magnetLinks := doc.Find(".mv_down a[href^='magnet:']")
log.Printf("[CLXIONG] 找到磁力链接数量: %d", magnetLinks.Length())
// 如果没找到,尝试其他可能的选择器
if magnetLinks.Length() == 0 {
allMagnetLinks := doc.Find("a[href^='magnet:']")
log.Printf("[CLXIONG] 页面总磁力链接数量: %d", allMagnetLinks.Length())
}
}
// 查找磁力链接
doc.Find(".mv_down a[href^='magnet:']").Each(func(i int, s *goquery.Selection) {
href, exists := s.Attr("href")
if exists && href != "" {
// 获取文件名(链接文本)
fileName := strings.TrimSpace(s.Text())
link := model.Link{
URL: href,
Type: "magnet",
}
// 磁力链接密码字段设置为空(按用户要求)
link.Password = ""
links = append(links, link)
fileNames = append(fileNames, fileName)
if p.debugMode {
log.Printf("[CLXIONG] 找到磁力链接: %s", fileName)
}
}
})
if p.debugMode {
log.Printf("[CLXIONG] 详情页共找到 %d 个磁力链接", len(links))
}
return links, fileNames
}
// generateUniqueID 生成唯一ID
func (p *ClxiongPlugin) generateUniqueID(detailPath string) string {
// 从路径中提取ID如 "/drama/4466.html" -> "4466"
re := regexp.MustCompile(`/(?:drama|movie)/(\d+)\.html`)
matches := re.FindStringSubmatch(detailPath)
if len(matches) > 1 {
return fmt.Sprintf("clxiong-%s", matches[1])
}
// 备用方案:使用完整路径生成哈希
hash := 0
for _, char := range detailPath {
hash = hash*31 + int(char)
}
if hash < 0 {
hash = -hash
}
return fmt.Sprintf("clxiong-%d", hash)
}

View File

@@ -0,0 +1,168 @@
# 磁力熊(CiLiXiong) HTML结构分析文档
## 网站信息
- **域名**: `www.cilixiong.org`
- **名称**: 磁力熊
- **类型**: 影视磁力链接搜索网站
- **特点**: 两步式搜索流程需要先POST获取searchid再GET搜索结果
## 搜索流程分析
### 第一步:提交搜索请求
#### 请求信息
- **URL**: `https://www.cilixiong.org/e/search/index.php`
- **方法**: POST
- **Content-Type**: `application/x-www-form-urlencoded`
- **Referer**: `https://www.cilixiong.org/`
#### POST参数
```
classid=1%2C2&show=title&tempid=1&keyboard={URL编码的关键词}
```
参数说明:
- `classid=1,2` - 搜索分类1=电影2=剧集)
- `show=title` - 搜索字段
- `tempid=1` - 模板ID
- `keyboard` - 搜索关键词需URL编码
#### 响应处理
- **状态码**: 302重定向
- **关键信息**: 从响应头`Location`字段获取searchid
- **格式**: `result/?searchid=7549`
### 第二步:获取搜索结果
#### 请求信息
- **URL**: `https://www.cilixiong.org/e/search/result/?searchid={searchid}`
- **方法**: GET
- **Referer**: `https://www.cilixiong.org/`
## 搜索结果页面结构
### 页面布局
- **容器**: `.container`
- **结果提示**: `.text-white.py-3` - 显示"找到 X 条符合搜索条件"
- **结果网格**: `.row.row-cols-2.row-cols-lg-4.align-items-stretch.g-4.py-2`
### 单个结果项结构
```html
<div class="col">
<div class="card card-cover h-100 overflow-hidden text-bg-dark rounded-4 shadow-lg position-relative">
<a href="/drama/4466.html">
<div class="card-img" style="background-image: url('海报图片URL');"><span></span></div>
<div class="card-body position-absolute d-flex w-100 flex-column text-white">
<h2 class="pt-5 lh-1 pb-2 h4">影片标题</h2>
<ul class="d-flex list-unstyled mb-0">
<li class="me-auto"><span class="rank bg-success p-1">8.9</span></li>
<li class="d-flex align-items-center small">2025</li>
</ul>
</div>
</a>
</div>
</div>
```
### 数据提取选择器
#### 结果列表
- **选择器**: `.row.row-cols-2.row-cols-lg-4 .col`
- **排除**: 空白或无效的卡片
#### 单项数据提取
1. **详情链接**: `.col a[href*="/drama/"]``.col a[href*="/movie/"]`
2. **标题**: `.col h2.h4`
3. **评分**: `.col .rank`
4. **年份**: `.col .small`最后一个li元素
5. **海报**: `.col .card-img[style*="background-image"]` - 从style属性提取url
#### 链接格式
- 电影:`/movie/ID.html`
- 剧集:`/drama/ID.html`
- 需补全为绝对URL`https://www.cilixiong.org/drama/ID.html`
## 详情页面结构
### 基本信息区域
```html
<div class="mv_detail lh-2 px-3">
<p class="mb-2"><h1>影片标题</h1></p>
<p class="mb-2">豆瓣评分: <span class="db_rank">8.9</span></p>
<p class="mb-2">又名:英文名称</p>
<p class="mb-2">上映日期2025-05-25(美国)</p>
<p class="mb-2">类型:|喜剧|冒险|科幻|动画|</p>
<p class="mb-2">单集片长22分钟</p>
<p class="mb-2">上映地区:美国</p>
<p class="mb-2">主演:演员列表</p>
</div>
```
### 磁力链接区域
```html
<div class="mv_down p-5 pb-3 rounded-4 text-center">
<h2 class="h6 pb-3">影片名磁力下载地址</h2>
<div class="container">
<div class="border-bottom pt-2 pb-4 mb-3">
<a href="magnet:?xt=urn:btih:HASH">文件名.mkv[文件大小]</a>
<a class="ms-3 text-muted small" href="/magnet.php?url=..." target="_blank">详情</a>
</div>
</div>
</div>
```
### 磁力链接提取
- **容器**: `.mv_down .container`
- **链接项**: `.border-bottom`
- **磁力链接**: `a[href^="magnet:"]`
- **文件名**: 链接的文本内容
- **大小信息**: 通常包含在文件名的方括号中
## 错误处理
### 常见问题
1. **搜索无结果**: 页面会显示"找到 0 条符合搜索条件"
2. **searchid失效**: 可能需要重新发起搜索请求
3. **详情页无磁力链接**: 某些内容可能暂时无下载资源
### 限流检测
- **状态码**: 检测429或403状态码
- **页面内容**: 检测是否包含"访问频繁"等提示
## 实现要点
### 请求头设置
```http
User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36
Content-Type: application/x-www-form-urlencoded (POST)
Referer: https://www.cilixiong.org/
```
### Cookie处理
- 网站可能需要维持会话状态
- 建议在客户端中启用Cookie存储
### 搜索策略
1. **首次搜索**: POST提交 → 解析Location → GET结果页
2. **结果解析**: 提取基本信息,构建搜索结果
3. **详情获取**: 可选,异步获取磁力链接
### 数据字段映射
- **Title**: 影片中文标题
- **Content**: 评分、年份、类型等信息组合
- **UniqueID**: 使用详情页URL的ID部分
- **Links**: 磁力链接数组
- **Tags**: 影片类型标签
## 技术注意事项
### URL编码
- 搜索关键词必须进行URL编码
- 中文字符使用UTF-8编码
### 重定向处理
- POST请求会返回302重定向
- 需要从响应头提取Location信息
- 不要自动跟随重定向,需要手动解析
### 异步处理
- 搜索结果可以先返回基本信息
- 磁力链接通过异步请求详情页获取
- 设置合理的并发限制和超时时间

View File

@@ -27,7 +27,7 @@ var (
xunleiLinkRegex = regexp.MustCompile(`https?://pan\.xunlei\.com/s/[0-9a-zA-Z_\-]+`)
tianyiLinkRegex = regexp.MustCompile(`https?://cloud\.189\.cn/t/[0-9a-zA-Z]+`)
link115Regex = regexp.MustCompile(`https?://115\.com/s/[0-9a-zA-Z]+`)
mobileLinkRegex = regexp.MustCompile(`https?://caiyun\.feixin\.10086\.cn/[0-9a-zA-Z]+`)
mobileLinkRegex = regexp.MustCompile(`https?://(caiyun\.feixin\.10086\.cn|caiyun\.139\.com|yun\.139\.com|cloud\.139\.com|pan\.139\.com)/.*`)
weiyunLinkRegex = regexp.MustCompile(`https?://share\.weiyun\.com/[0-9a-zA-Z]+`)
lanzouLinkRegex = regexp.MustCompile(`https?://(www\.)?(lanzou[uixys]*|lan[zs]o[ux])\.(com|net|org)/[0-9a-zA-Z]+`)
jianguoyunLinkRegex = regexp.MustCompile(`https?://(www\.)?jianguoyun\.com/p/[0-9a-zA-Z]+`)
@@ -350,7 +350,7 @@ func (p *CygPlugin) determineCloudType(name string) string {
return "tianyi"
case "115", "115网盘":
return "115"
case "移动云盘", "移动", "mobile", "和彩云":
case "移动云盘", "移动", "mobile", "和彩云", "139云盘", "139", "中国移动云盘":
return "mobile"
case "微云", "腾讯微云", "weiyun":
return "weiyun"

600
plugin/ddys/ddys.go Normal file
View File

@@ -0,0 +1,600 @@
package ddys
import (
"context"
"fmt"
"io"
"log"
"net/http"
"net/url"
"regexp"
"strings"
"sync"
"time"
"github.com/PuerkitoBio/goquery"
"pansou/model"
"pansou/plugin"
)
const (
PluginName = "ddys"
DisplayName = "低端影视"
Description = "低端影视 - 影视资源网盘链接搜索"
BaseURL = "https://ddys.pro"
SearchPath = "/?s=%s&post_type=post"
UserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
MaxResults = 50
MaxConcurrency = 20
)
// DdysPlugin 低端影视插件
type DdysPlugin struct {
*plugin.BaseAsyncPlugin
debugMode bool
detailCache sync.Map // 缓存详情页结果
cacheTTL time.Duration
}
// init 注册插件
func init() {
plugin.RegisterGlobalPlugin(NewDdysPlugin())
}
// NewDdysPlugin 创建新的低端影视插件实例
func NewDdysPlugin() *DdysPlugin {
debugMode := false // 生产环境关闭调试
p := &DdysPlugin{
BaseAsyncPlugin: plugin.NewBaseAsyncPlugin(PluginName, 1), // 标准网盘插件启用Service层过滤
debugMode: debugMode,
cacheTTL: 30 * time.Minute, // 详情页缓存30分钟
}
return p
}
// Name 插件名称
func (p *DdysPlugin) Name() string {
return PluginName
}
// DisplayName 插件显示名称
func (p *DdysPlugin) DisplayName() string {
return DisplayName
}
// Description 插件描述
func (p *DdysPlugin) Description() string {
return Description
}
// Search 搜索接口
func (p *DdysPlugin) Search(keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
return p.searchImpl(&http.Client{Timeout: 30 * time.Second}, keyword, ext)
}
// searchImpl 搜索实现
func (p *DdysPlugin) searchImpl(client *http.Client, keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
if p.debugMode {
log.Printf("[DDYS] 开始搜索: %s", keyword)
}
// 第一步:执行搜索获取结果列表
searchResults, err := p.executeSearch(client, keyword)
if err != nil {
return nil, fmt.Errorf("[%s] 执行搜索失败: %w", p.Name(), err)
}
if p.debugMode {
log.Printf("[DDYS] 搜索获取到 %d 个结果", len(searchResults))
}
// 第二步:并发获取详情页链接
finalResults := p.fetchDetailLinks(client, searchResults, keyword)
if p.debugMode {
log.Printf("[DDYS] 最终获取到 %d 个有效结果", len(finalResults))
}
// 第三步:关键词过滤(标准网盘插件需要过滤)
filteredResults := plugin.FilterResultsByKeyword(finalResults, keyword)
if p.debugMode {
log.Printf("[DDYS] 关键词过滤后剩余 %d 个结果", len(filteredResults))
}
return filteredResults, nil
}
// executeSearch 执行搜索请求
func (p *DdysPlugin) executeSearch(client *http.Client, keyword string) ([]model.SearchResult, error) {
// 构建搜索URL
searchURL := fmt.Sprintf("%s%s", BaseURL, fmt.Sprintf(SearchPath, url.QueryEscape(keyword)))
// 创建带超时的上下文
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil)
if err != nil {
return nil, fmt.Errorf("[%s] 创建搜索请求失败: %w", p.Name(), err)
}
// 设置完整的请求头
req.Header.Set("User-Agent", UserAgent)
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
req.Header.Set("Connection", "keep-alive")
req.Header.Set("Upgrade-Insecure-Requests", "1")
req.Header.Set("Cache-Control", "max-age=0")
req.Header.Set("Referer", BaseURL+"/")
resp, err := p.doRequestWithRetry(req, client)
if err != nil {
return nil, fmt.Errorf("[%s] 搜索请求失败: %w", p.Name(), err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("[%s] 搜索请求HTTP状态错误: %d", p.Name(), resp.StatusCode)
}
// 解析HTML提取搜索结果
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, fmt.Errorf("[%s] 解析搜索结果HTML失败: %w", p.Name(), err)
}
return p.parseSearchResults(doc)
}
// doRequestWithRetry 带重试机制的HTTP请求
func (p *DdysPlugin) doRequestWithRetry(req *http.Request, client *http.Client) (*http.Response, error) {
maxRetries := 3
var lastErr error
for i := 0; i < maxRetries; i++ {
if i > 0 {
// 指数退避重试
backoff := time.Duration(1<<uint(i-1)) * 200 * time.Millisecond
time.Sleep(backoff)
}
// 克隆请求避免并发问题
reqClone := req.Clone(req.Context())
resp, err := client.Do(reqClone)
if err == nil && resp.StatusCode == 200 {
return resp, nil
}
if resp != nil {
resp.Body.Close()
}
lastErr = err
}
return nil, fmt.Errorf("[%s] 重试 %d 次后仍然失败: %w", p.Name(), maxRetries, lastErr)
}
// parseSearchResults 解析搜索结果HTML
func (p *DdysPlugin) parseSearchResults(doc *goquery.Document) ([]model.SearchResult, error) {
var results []model.SearchResult
// 查找搜索结果项: article[class^="post-"]
doc.Find("article[class*='post-']").Each(func(i int, s *goquery.Selection) {
if len(results) >= MaxResults {
return
}
result := p.parseResultItem(s, i+1)
if result != nil {
results = append(results, *result)
}
})
if p.debugMode {
log.Printf("[DDYS] 解析到 %d 个原始结果", len(results))
}
return results, nil
}
// parseResultItem 解析单个搜索结果项
func (p *DdysPlugin) parseResultItem(s *goquery.Selection, index int) *model.SearchResult {
// 提取文章ID
articleClass, _ := s.Attr("class")
postID := p.extractPostID(articleClass)
if postID == "" {
postID = fmt.Sprintf("unknown-%d", index)
}
// 提取标题和链接
linkEl := s.Find(".post-title a")
if linkEl.Length() == 0 {
if p.debugMode {
log.Printf("[DDYS] 跳过无标题链接的结果")
}
return nil
}
// 提取标题
title := strings.TrimSpace(linkEl.Text())
if title == "" {
return nil
}
// 提取详情页链接
detailURL, _ := linkEl.Attr("href")
if detailURL == "" {
if p.debugMode {
log.Printf("[DDYS] 跳过无链接的结果: %s", title)
}
return nil
}
// 提取发布时间
publishTime := p.extractPublishTime(s)
// 提取分类
category := p.extractCategory(s)
// 提取简介
content := p.extractContent(s)
// 构建初始结果对象(详情页链接稍后获取)
result := model.SearchResult{
Title: title,
Content: fmt.Sprintf("分类:%s\n%s", category, content),
Channel: "", // 插件搜索结果必须为空字符串(按开发指南要求)
MessageID: fmt.Sprintf("%s-%s-%d", p.Name(), postID, index),
UniqueID: fmt.Sprintf("%s-%s-%d", p.Name(), postID, index),
Datetime: publishTime,
Links: []model.Link{}, // 先为空,详情页处理后添加
Tags: []string{category},
}
// 添加详情页URL到临时字段用于后续处理
result.Content += fmt.Sprintf("\n详情页: %s", detailURL)
if p.debugMode {
log.Printf("[DDYS] 解析结果: %s (%s)", title, category)
}
return &result
}
// extractPostID 从文章class中提取文章ID
func (p *DdysPlugin) extractPostID(articleClass string) string {
// 匹配 post-{数字} 格式
re := regexp.MustCompile(`post-(\d+)`)
matches := re.FindStringSubmatch(articleClass)
if len(matches) > 1 {
return matches[1]
}
return ""
}
// extractPublishTime 提取发布时间
func (p *DdysPlugin) extractPublishTime(s *goquery.Selection) time.Time {
timeEl := s.Find(".meta_date time.entry-date")
if timeEl.Length() == 0 {
return time.Now()
}
datetime, exists := timeEl.Attr("datetime")
if !exists {
return time.Now()
}
// 解析ISO 8601格式时间
if t, err := time.Parse(time.RFC3339, datetime); err == nil {
return t
}
return time.Now()
}
// extractCategory 提取分类
func (p *DdysPlugin) extractCategory(s *goquery.Selection) string {
categoryEl := s.Find(".meta_categories .cat-links a")
if categoryEl.Length() > 0 {
return strings.TrimSpace(categoryEl.Text())
}
return "未分类"
}
// extractContent 提取内容简介
func (p *DdysPlugin) extractContent(s *goquery.Selection) string {
contentEl := s.Find(".entry-content")
if contentEl.Length() > 0 {
content := strings.TrimSpace(contentEl.Text())
// 限制长度
if len(content) > 200 {
content = content[:200] + "..."
}
return content
}
return ""
}
// fetchDetailLinks 并发获取详情页链接
func (p *DdysPlugin) fetchDetailLinks(client *http.Client, searchResults []model.SearchResult, keyword string) []model.SearchResult {
if len(searchResults) == 0 {
return []model.SearchResult{}
}
// 使用通道控制并发数
semaphore := make(chan struct{}, MaxConcurrency)
var wg sync.WaitGroup
resultsChan := make(chan model.SearchResult, len(searchResults))
for _, result := range searchResults {
wg.Add(1)
go func(r model.SearchResult) {
defer wg.Done()
semaphore <- struct{}{} // 获取信号量
defer func() { <-semaphore }() // 释放信号量
// 从Content中提取详情页URL
detailURL := p.extractDetailURLFromContent(r.Content)
if detailURL == "" {
if p.debugMode {
log.Printf("[DDYS] 跳过无详情页URL的结果: %s", r.Title)
}
return
}
// 获取详情页链接
links := p.fetchDetailPageLinks(client, detailURL)
if len(links) > 0 {
r.Links = links
// 清理Content中的详情页URL
r.Content = p.cleanContent(r.Content)
resultsChan <- r
} else if p.debugMode {
log.Printf("[DDYS] 详情页无有效链接: %s", r.Title)
}
}(result)
}
// 等待所有goroutine完成
go func() {
wg.Wait()
close(resultsChan)
}()
// 收集结果
var finalResults []model.SearchResult
for result := range resultsChan {
finalResults = append(finalResults, result)
}
return finalResults
}
// extractDetailURLFromContent 从Content中提取详情页URL
func (p *DdysPlugin) extractDetailURLFromContent(content string) string {
lines := strings.Split(content, "\n")
for _, line := range lines {
if strings.HasPrefix(line, "详情页: ") {
return strings.TrimPrefix(line, "详情页: ")
}
}
return ""
}
// cleanContent 清理Content移除详情页URL行
func (p *DdysPlugin) cleanContent(content string) string {
lines := strings.Split(content, "\n")
var cleanedLines []string
for _, line := range lines {
if !strings.HasPrefix(line, "详情页: ") {
cleanedLines = append(cleanedLines, line)
}
}
return strings.Join(cleanedLines, "\n")
}
// fetchDetailPageLinks 获取详情页的网盘链接
func (p *DdysPlugin) fetchDetailPageLinks(client *http.Client, detailURL string) []model.Link {
// 检查缓存
if cached, found := p.detailCache.Load(detailURL); found {
if links, ok := cached.([]model.Link); ok {
if p.debugMode {
log.Printf("[DDYS] 使用缓存的详情页链接: %s", detailURL)
}
return links
}
}
// 创建带超时的上下文
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "GET", detailURL, nil)
if err != nil {
if p.debugMode {
log.Printf("[DDYS] 创建详情页请求失败: %v", err)
}
return []model.Link{}
}
// 设置请求头
req.Header.Set("User-Agent", UserAgent)
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
req.Header.Set("Referer", BaseURL+"/")
resp, err := client.Do(req)
if err != nil {
if p.debugMode {
log.Printf("[DDYS] 详情页请求失败: %v", err)
}
return []model.Link{}
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
if p.debugMode {
log.Printf("[DDYS] 详情页HTTP状态错误: %d", resp.StatusCode)
}
return []model.Link{}
}
// 读取响应体
body, err := io.ReadAll(resp.Body)
if err != nil {
if p.debugMode {
log.Printf("[DDYS] 读取详情页响应失败: %v", err)
}
return []model.Link{}
}
// 解析网盘链接
links := p.parseNetworkDiskLinks(string(body))
// 缓存结果
if len(links) > 0 {
p.detailCache.Store(detailURL, links)
}
if p.debugMode {
log.Printf("[DDYS] 从详情页提取到 %d 个链接: %s", len(links), detailURL)
}
return links
}
// parseNetworkDiskLinks 解析网盘链接
func (p *DdysPlugin) parseNetworkDiskLinks(htmlContent string) []model.Link {
var links []model.Link
// 定义网盘链接匹配模式
patterns := []struct {
name string
pattern string
urlType string
}{
{"夸克网盘", `\(夸克[^)]*\)[:]\s*<a[^>]*href\s*=\s*["']([^"']+)["'][^>]*>([^<]+)</a>`, "quark"},
{"百度网盘", `\(百度[^)]*\)[:]\s*<a[^>]*href\s*=\s*["']([^"']+)["'][^>]*>([^<]+)</a>`, "baidu"},
{"阿里云盘", `\(阿里[^)]*\)[:]\s*<a[^>]*href\s*=\s*["']([^"']+)["'][^>]*>([^<]+)</a>`, "aliyun"},
{"天翼云盘", `\(天翼[^)]*\)[:]\s*<a[^>]*href\s*=\s*["']([^"']+)["'][^>]*>([^<]+)</a>`, "tianyi"},
{"迅雷网盘", `\(迅雷[^)]*\)[:]\s*<a[^>]*href\s*=\s*["']([^"']+)["'][^>]*>([^<]+)</a>`, "xunlei"},
// 通用模式
{"通用网盘", `<a[^>]*href\s*=\s*["'](https?://[^"']*(?:pan|drive|cloud)[^"']*)["'][^>]*>([^<]+)</a>`, "others"},
}
// 去重用的map
seen := make(map[string]bool)
for _, pattern := range patterns {
re := regexp.MustCompile(pattern.pattern)
matches := re.FindAllStringSubmatch(htmlContent, -1)
for _, match := range matches {
if len(match) >= 3 {
url := match[1]
// 去重
if seen[url] {
continue
}
seen[url] = true
// 确定网盘类型
urlType := p.determineCloudType(url)
if urlType == "others" {
urlType = pattern.urlType
}
// 提取可能的提取码
password := p.extractPassword(htmlContent, url)
link := model.Link{
Type: urlType,
URL: url,
Password: password,
}
links = append(links, link)
if p.debugMode {
log.Printf("[DDYS] 找到链接: %s (%s)", url, urlType)
}
}
}
}
return links
}
// extractPassword 提取网盘提取码
func (p *DdysPlugin) extractPassword(content string, panURL string) string {
// 常见提取码模式
patterns := []string{
`提取[码密][:]?\s*([A-Za-z0-9]{4,8})`,
`密码[:]?\s*([A-Za-z0-9]{4,8})`,
`[码密][:]?\s*([A-Za-z0-9]{4,8})`,
`([A-Za-z0-9]{4,8})\s*[是为]?提取[码密]`,
}
// 在网盘链接附近搜索提取码
urlIndex := strings.Index(content, panURL)
if urlIndex == -1 {
return ""
}
// 搜索范围链接前后200个字符
start := urlIndex - 200
if start < 0 {
start = 0
}
end := urlIndex + len(panURL) + 200
if end > len(content) {
end = len(content)
}
searchArea := content[start:end]
for _, pattern := range patterns {
re := regexp.MustCompile(pattern)
matches := re.FindStringSubmatch(searchArea)
if len(matches) > 1 {
return matches[1]
}
}
return ""
}
// determineCloudType 根据URL自动识别网盘类型按开发指南完整列表
func (p *DdysPlugin) determineCloudType(url string) string {
switch {
case strings.Contains(url, "pan.quark.cn"):
return "quark"
case strings.Contains(url, "drive.uc.cn"):
return "uc"
case strings.Contains(url, "pan.baidu.com"):
return "baidu"
case strings.Contains(url, "aliyundrive.com") || strings.Contains(url, "alipan.com"):
return "aliyun"
case strings.Contains(url, "pan.xunlei.com"):
return "xunlei"
case strings.Contains(url, "cloud.189.cn"):
return "tianyi"
case strings.Contains(url, "caiyun.139.com"):
return "mobile"
case strings.Contains(url, "115.com"):
return "115"
case strings.Contains(url, "123pan.com"):
return "123"
case strings.Contains(url, "mypikpak.com"):
return "pikpak"
case strings.Contains(url, "lanzou"):
return "lanzou"
default:
return "others"
}
}

View File

@@ -0,0 +1,182 @@
# DDYS低端影视插件HTML结构分析
## 网站概述
- **网站名称**: 低端影视
- **域名**: https://ddys.pro/
- **类型**: 影视资源网站,提供在线播放和网盘下载链接
## API流程概述
### 搜索页面
- **请求URL**: `https://ddys.pro/?s={keyword}&post_type=post`
- **方法**: GET
- **Headers**: 标准浏览器请求头
- **特点**: WordPress网站使用标准搜索功能
## 搜索结果结构
### 搜索结果页面HTML结构
```html
<main id="main" class="site-main col-md-8" role="main">
<article id="post-1404" class="post-1404 post type-post status-publish ...">
<div class="row">
<div class="post-content col-md-12">
<header class="entry-header">
<h2 class="post-title">
<a href="https://ddys.pro/deadpool/" rel="bookmark">死侍 1-3</a>
</h2>
</header>
<div class="entry-content">
<p>注:本片不适合公共场合观看</p>
</div>
<footer class="entry-footer">
<div class="metadata">
<ul>
<li class="meta_date">
<time class="entry-date published" datetime="2018-08-08T01:41:40+08:00">
2018年8月8日
</time>
</li>
<li class="meta_categories">
<span class="cat-links">
<a href="..." rel="category tag">欧美电影</a>
</span>
</li>
</ul>
</div>
</footer>
</div>
</div>
</article>
</main>
```
### 详情页面HTML结构
```html
<main id="main" class="site-main" role="main">
<article id="post-19840" class="...">
<div class="post-content">
<h1 class="post-title">变形金刚 超能勇士崛起</h1>
<div class="metadata">
<ul>
<li class="meta_date">
<time class="entry-date published updated"
datetime="2023-07-13T14:27:08+08:00">
2023年7月13日
</time>
</li>
<li class="meta_categories">
<span class="cat-links">
<a href="..." rel="category tag">欧美电影</a>
</span>
</li>
<li class="meta_tags">
<span class="tags-links">
标签:<a href="..." rel="tag">动作</a>
</span>
</li>
</ul>
</div>
<div class="entry">
<!-- 播放器相关内容 -->
<!-- 网盘下载链接 -->
<p>视频下载 (夸克网盘)
<a href="https://pan.quark.cn/s/a372a91a0296"
rel="noopener nofollow" target="_blank">
https://pan.quark.cn/s/a372a91a0296
</a>
</p>
<!-- 豆瓣信息区块 -->
<div class="doulist-item">
<div class="mod">
<div class="v-overflowHidden doulist-subject">
<div class="post">
<img src="douban_cache/xxx.jpg">
</div>
<div class="title">
<a href="https://movie.douban.com/subject/..."
class="cute" target="_blank">
影片名称 英文名
</a>
</div>
<div class="rating">
<span class="rating_nums">5.8</span>
</div>
<div class="abstract">
<!-- 详细信息:又名、导演、演员、类型等 -->
</div>
</div>
</div>
</div>
</div>
</div>
</article>
</main>
```
## 数据提取要点
### 搜索结果页面
1. **结果列表**: `article[class^="post-"]` - 每个搜索结果
2. **文章ID**: 从article的class或id属性提取`post-1404`
3. **标题**: `.post-title > a` - 获取文本和href属性
4. **链接**: `.post-title > a[href]` - 详情页链接
5. **发布时间**: `.meta_date > time.entry-date[datetime]` - ISO格式时间
6. **分类**: `.meta_categories > .cat-links > a` - 分类信息
7. **简介**: `.entry-content` - 内容简介(可能为空)
### 详情页面
1. **标题**: `h1.post-title` - 影片标题
2. **发布时间**: `.meta_date > time.entry-date[datetime]` - 发布时间
3. **分类标签**: `.meta_categories``.meta_tags`中的链接
4. **网盘链接提取**:
- 模式1: `(网盘名)<a href="链接">链接文本</a>`
- 模式2: `(网盘名) <a href="链接">链接文本</a>`
- 常见网盘: 夸克网盘、百度网盘、阿里云盘、天翼云盘等
5. **豆瓣信息**: `.doulist-item`区块(可选)
## 网盘链接识别规则
### 支持的网盘类型
- **夸克网盘**: `pan.quark.cn`
- **百度网盘**: `pan.baidu.com`
- **阿里云盘**: `aliyundrive.com` / `alipan.com`
- **天翼云盘**: `cloud.189.cn`
- **迅雷网盘**: `pan.xunlei.com`
- **115网盘**: `115.com`
- **蓝奏云**: `lanzou`相关域名
### 链接提取策略
1. 在详情页的`.entry`内容区域搜索
2. 使用正则表达式匹配网盘链接模式
3. 提取网盘类型、链接和可能的提取码
4. 链接去重和验证
## 特殊处理
### 时间解析
- 格式: ISO 8601格式 `2023-07-13T14:27:08+08:00`
- 显示: `2023年7月13日`
### 内容清理
- 移除HTML标签
- 处理特殊字符和编码
- 清理多余空格和换行
### 错误处理
- 网络超时重试
- 解析失败的降级处理
- 空结果的处理
## 注意事项
1. **反爬虫**: 网站可能有基础的反爬虫措施,需要设置合理的请求头
2. **限频**: 避免请求过于频繁
3. **编码**: 处理中文关键词的URL编码
4. **更新**: 网站结构可能会变化,需要定期维护选择器

712
plugin/hdmoli/hdmoli.go Normal file
View File

@@ -0,0 +1,712 @@
package hdmoli
import (
"context"
"fmt"
"io"
"log"
"net/http"
"net/url"
"regexp"
"strings"
"sync"
"time"
"github.com/PuerkitoBio/goquery"
"pansou/model"
"pansou/plugin"
)
const (
PluginName = "hdmoli"
DisplayName = "HDmoli"
Description = "HDmoli - 影视资源网盘下载链接搜索"
BaseURL = "https://www.hdmoli.pro"
SearchPath = "/search.php?searchkey=%s&submit="
UserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
MaxResults = 50
MaxConcurrency = 20
)
// HdmoliPlugin HDmoli插件
type HdmoliPlugin struct {
*plugin.BaseAsyncPlugin
debugMode bool
detailCache sync.Map // 缓存详情页结果
cacheTTL time.Duration
}
// init 注册插件
func init() {
plugin.RegisterGlobalPlugin(NewHdmoliPlugin())
}
// NewHdmoliPlugin 创建新的HDmoli插件实例
func NewHdmoliPlugin() *HdmoliPlugin {
debugMode := false // 生产环境关闭调试
p := &HdmoliPlugin{
BaseAsyncPlugin: plugin.NewBaseAsyncPlugin(PluginName, 2), // 标准网盘插件启用Service层过滤
debugMode: debugMode,
cacheTTL: 30 * time.Minute, // 详情页缓存30分钟
}
return p
}
// Name 插件名称
func (p *HdmoliPlugin) Name() string {
return PluginName
}
// DisplayName 插件显示名称
func (p *HdmoliPlugin) DisplayName() string {
return DisplayName
}
// Description 插件描述
func (p *HdmoliPlugin) Description() string {
return Description
}
// Search 搜索接口
func (p *HdmoliPlugin) Search(keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
return p.searchImpl(&http.Client{Timeout: 30 * time.Second}, keyword, ext)
}
// searchImpl 搜索实现
func (p *HdmoliPlugin) searchImpl(client *http.Client, keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
if p.debugMode {
log.Printf("[HDMOLI] 开始搜索: %s", keyword)
}
// 第一步:执行搜索获取结果列表
searchResults, err := p.executeSearch(client, keyword)
if err != nil {
return nil, fmt.Errorf("[%s] 执行搜索失败: %w", p.Name(), err)
}
if p.debugMode {
log.Printf("[HDMOLI] 搜索获取到 %d 个结果", len(searchResults))
}
// 第二步:并发获取详情页链接
finalResults := p.fetchDetailLinks(client, searchResults, keyword)
if p.debugMode {
log.Printf("[HDMOLI] 最终获取到 %d 个有效结果", len(finalResults))
}
// 第三步:关键词过滤(标准网盘插件需要过滤)
filteredResults := plugin.FilterResultsByKeyword(finalResults, keyword)
if p.debugMode {
log.Printf("[HDMOLI] 关键词过滤后剩余 %d 个结果", len(filteredResults))
}
return filteredResults, nil
}
// executeSearch 执行搜索请求
func (p *HdmoliPlugin) executeSearch(client *http.Client, keyword string) ([]model.SearchResult, error) {
// 构建搜索URL
searchURL := fmt.Sprintf("%s%s", BaseURL, fmt.Sprintf(SearchPath, url.QueryEscape(keyword)))
// 创建带超时的上下文
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil)
if err != nil {
return nil, fmt.Errorf("[%s] 创建搜索请求失败: %w", p.Name(), err)
}
// 设置完整的请求头
req.Header.Set("User-Agent", UserAgent)
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
req.Header.Set("Connection", "keep-alive")
req.Header.Set("Upgrade-Insecure-Requests", "1")
req.Header.Set("Cache-Control", "max-age=0")
req.Header.Set("Referer", BaseURL+"/") // HDmoli需要设置referer
resp, err := p.doRequestWithRetry(req, client)
if err != nil {
return nil, fmt.Errorf("[%s] 搜索请求失败: %w", p.Name(), err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("[%s] 搜索请求HTTP状态错误: %d", p.Name(), resp.StatusCode)
}
// 解析HTML提取搜索结果
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, fmt.Errorf("[%s] 解析搜索结果HTML失败: %w", p.Name(), err)
}
return p.parseSearchResults(doc)
}
// doRequestWithRetry 带重试机制的HTTP请求
func (p *HdmoliPlugin) doRequestWithRetry(req *http.Request, client *http.Client) (*http.Response, error) {
maxRetries := 3
var lastErr error
for i := 0; i < maxRetries; i++ {
if i > 0 {
// 指数退避重试
backoff := time.Duration(1<<uint(i-1)) * 200 * time.Millisecond
time.Sleep(backoff)
}
// 克隆请求避免并发问题
reqClone := req.Clone(req.Context())
resp, err := client.Do(reqClone)
if err == nil && resp.StatusCode == 200 {
return resp, nil
}
if resp != nil {
resp.Body.Close()
}
lastErr = err
}
return nil, fmt.Errorf("[%s] 重试 %d 次后仍然失败: %w", p.Name(), maxRetries, lastErr)
}
// parseSearchResults 解析搜索结果HTML
func (p *HdmoliPlugin) parseSearchResults(doc *goquery.Document) ([]model.SearchResult, error) {
var results []model.SearchResult
// 查找搜索结果项: #searchList > li.active.clearfix
doc.Find("#searchList > li.active.clearfix").Each(func(i int, s *goquery.Selection) {
if len(results) >= MaxResults {
return
}
result := p.parseResultItem(s, i+1)
if result != nil {
results = append(results, *result)
}
})
if p.debugMode {
log.Printf("[HDMOLI] 解析到 %d 个原始结果", len(results))
}
return results, nil
}
// parseResultItem 解析单个搜索结果项
func (p *HdmoliPlugin) parseResultItem(s *goquery.Selection, index int) *model.SearchResult {
// 提取标题和链接
titleEl := s.Find(".detail h4.title a")
if titleEl.Length() == 0 {
if p.debugMode {
log.Printf("[HDMOLI] 跳过无标题链接的结果")
}
return nil
}
// 提取标题
title := strings.TrimSpace(titleEl.Text())
if title == "" {
return nil
}
// 提取详情页链接
detailURL, _ := titleEl.Attr("href")
if detailURL == "" {
// 尝试从缩略图获取链接
thumbEl := s.Find(".thumb a")
if thumbEl.Length() > 0 {
detailURL, _ = thumbEl.Attr("href")
}
}
if detailURL == "" {
if p.debugMode {
log.Printf("[HDMOLI] 跳过无链接的结果: %s", title)
}
return nil
}
// 处理相对路径
if strings.HasPrefix(detailURL, "/") {
detailURL = BaseURL + detailURL
}
// 提取评分
rating := p.extractRating(s)
// 提取更新状态
updateStatus := p.extractUpdateStatus(s)
// 提取导演
director := p.extractDirector(s)
// 提取主演
actors := p.extractActors(s)
// 提取分类信息
category, region, year := p.extractCategoryInfo(s)
// 提取简介
description := p.extractDescription(s)
// 构建内容
var contentParts []string
if rating != "" {
contentParts = append(contentParts, fmt.Sprintf("评分:%s", rating))
}
if updateStatus != "" {
contentParts = append(contentParts, fmt.Sprintf("状态:%s", updateStatus))
}
if director != "" {
contentParts = append(contentParts, fmt.Sprintf("导演:%s", director))
}
if len(actors) > 0 {
actorStr := strings.Join(actors, " ")
if len(actorStr) > 100 {
actorStr = actorStr[:100] + "..."
}
contentParts = append(contentParts, fmt.Sprintf("主演:%s", actorStr))
}
if category != "" {
contentParts = append(contentParts, fmt.Sprintf("分类:%s", category))
}
if region != "" {
contentParts = append(contentParts, fmt.Sprintf("地区:%s", region))
}
if year != "" {
contentParts = append(contentParts, fmt.Sprintf("年份:%s", year))
}
if description != "" {
contentParts = append(contentParts, fmt.Sprintf("简介:%s", description))
}
content := strings.Join(contentParts, "\n")
// 构建标签
var tags []string
if category != "" {
tags = append(tags, category)
}
if region != "" {
tags = append(tags, region)
}
if year != "" {
tags = append(tags, year)
}
// 构建初始结果对象(详情页链接稍后获取)
result := model.SearchResult{
Title: title,
Content: content,
Channel: "", // 插件搜索结果必须为空字符串(按开发指南要求)
MessageID: fmt.Sprintf("%s-%d-%d", p.Name(), index, time.Now().Unix()),
UniqueID: fmt.Sprintf("%s-%d-%d", p.Name(), index, time.Now().Unix()),
Datetime: time.Now(), // 搜索结果页没有明确时间,使用当前时间
Links: []model.Link{}, // 先为空,详情页处理后添加
Tags: tags,
}
// 添加详情页URL到临时字段用于后续处理
result.Content += fmt.Sprintf("\n详情页URL: %s", detailURL)
if p.debugMode {
log.Printf("[HDMOLI] 解析结果: %s (%s)", title, category)
}
return &result
}
// extractRating 提取评分
func (p *HdmoliPlugin) extractRating(s *goquery.Selection) string {
ratingEl := s.Find(".pic-tag")
if ratingEl.Length() > 0 {
rating := strings.TrimSpace(ratingEl.Text())
return rating
}
return ""
}
// extractUpdateStatus 提取更新状态
func (p *HdmoliPlugin) extractUpdateStatus(s *goquery.Selection) string {
statusEl := s.Find(".pic-text")
if statusEl.Length() > 0 {
status := strings.TrimSpace(statusEl.Text())
return status
}
return ""
}
// extractDirector 提取导演
func (p *HdmoliPlugin) extractDirector(s *goquery.Selection) string {
var director string
s.Find("p").Each(func(i int, p *goquery.Selection) {
if director != "" {
return // 已找到,跳过
}
text := p.Text()
if strings.Contains(text, "导演:") {
// 提取导演名称
parts := strings.Split(text, "导演:")
if len(parts) > 1 {
director = strings.TrimSpace(parts[1])
}
}
})
return director
}
// extractActors 提取主演
func (p *HdmoliPlugin) extractActors(s *goquery.Selection) []string {
var actors []string
s.Find("p").Each(func(i int, p *goquery.Selection) {
text := p.Text()
if strings.Contains(text, "主演:") {
// 在这个p标签中查找所有链接
p.Find("a").Each(func(j int, a *goquery.Selection) {
actor := strings.TrimSpace(a.Text())
if actor != "" {
actors = append(actors, actor)
}
})
}
})
return actors
}
// extractCategoryInfo 提取分类信息(分类、地区、年份)
func (p *HdmoliPlugin) extractCategoryInfo(s *goquery.Selection) (category, region, year string) {
s.Find("p").Each(func(i int, p *goquery.Selection) {
text := p.Text()
if strings.Contains(text, "分类:") {
// 解析分类信息行
parts := strings.Split(text, "")
for i, part := range parts {
part = strings.TrimSpace(part)
if strings.HasSuffix(parts[i], "分类") && i+1 < len(parts) {
// 提取分类,可能包含地区和年份信息
info := strings.TrimSpace(parts[i+1])
// 按分隔符分割
infoParts := regexp.MustCompile(`[,\s]+`).Split(info, -1)
if len(infoParts) > 0 && infoParts[0] != "" {
category = infoParts[0]
}
} else if strings.HasSuffix(parts[i], "地区") && i+1 < len(parts) {
regionPart := strings.TrimSpace(parts[i+1])
regionParts := regexp.MustCompile(`[,\s]+`).Split(regionPart, -1)
if len(regionParts) > 0 && regionParts[0] != "" {
region = regionParts[0]
}
} else if strings.HasSuffix(parts[i], "年份") && i+1 < len(parts) {
yearPart := strings.TrimSpace(parts[i+1])
yearParts := regexp.MustCompile(`[,\s]+`).Split(yearPart, -1)
if len(yearParts) > 0 && yearParts[0] != "" {
year = yearParts[0]
}
}
}
}
})
return category, region, year
}
// extractDescription 提取简介
func (p *HdmoliPlugin) extractDescription(s *goquery.Selection) string {
var description string
descEl := s.Find("p.hidden-xs")
descEl.Each(func(i int, p *goquery.Selection) {
if description != "" {
return // 已找到,跳过
}
text := p.Text()
if strings.Contains(text, "简介:") {
parts := strings.Split(text, "简介:")
if len(parts) > 1 {
desc := strings.TrimSpace(parts[1])
// 限制长度
if len(desc) > 200 {
desc = desc[:200] + "..."
}
description = desc
}
}
})
return description
}
// fetchDetailLinks 并发获取详情页链接
func (p *HdmoliPlugin) fetchDetailLinks(client *http.Client, searchResults []model.SearchResult, keyword string) []model.SearchResult {
if len(searchResults) == 0 {
return []model.SearchResult{}
}
// 使用通道控制并发数
semaphore := make(chan struct{}, MaxConcurrency)
var wg sync.WaitGroup
resultsChan := make(chan model.SearchResult, len(searchResults))
for _, result := range searchResults {
wg.Add(1)
go func(r model.SearchResult) {
defer wg.Done()
semaphore <- struct{}{} // 获取信号量
defer func() { <-semaphore }() // 释放信号量
// 从Content中提取详情页URL
detailURL := p.extractDetailURLFromContent(r.Content)
if detailURL == "" {
if p.debugMode {
log.Printf("[HDMOLI] 跳过无详情页URL的结果: %s", r.Title)
}
return
}
// 获取详情页链接
links := p.fetchDetailPageLinks(client, detailURL)
if len(links) > 0 {
r.Links = links
// 清理Content中的详情页URL
r.Content = p.cleanContent(r.Content)
resultsChan <- r
} else if p.debugMode {
log.Printf("[HDMOLI] 详情页无有效链接: %s", r.Title)
}
}(result)
}
// 等待所有goroutine完成
go func() {
wg.Wait()
close(resultsChan)
}()
// 收集结果
var finalResults []model.SearchResult
for result := range resultsChan {
finalResults = append(finalResults, result)
}
return finalResults
}
// extractDetailURLFromContent 从Content中提取详情页URL
func (p *HdmoliPlugin) extractDetailURLFromContent(content string) string {
lines := strings.Split(content, "\n")
for _, line := range lines {
if strings.HasPrefix(line, "详情页URL: ") {
return strings.TrimPrefix(line, "详情页URL: ")
}
}
return ""
}
// cleanContent 清理Content移除详情页URL行
func (p *HdmoliPlugin) cleanContent(content string) string {
lines := strings.Split(content, "\n")
var cleanedLines []string
for _, line := range lines {
if !strings.HasPrefix(line, "详情页URL: ") {
cleanedLines = append(cleanedLines, line)
}
}
return strings.Join(cleanedLines, "\n")
}
// fetchDetailPageLinks 获取详情页的网盘链接
func (p *HdmoliPlugin) fetchDetailPageLinks(client *http.Client, detailURL string) []model.Link {
// 检查缓存
if cached, found := p.detailCache.Load(detailURL); found {
if links, ok := cached.([]model.Link); ok {
if p.debugMode {
log.Printf("[HDMOLI] 使用缓存的详情页链接: %s", detailURL)
}
return links
}
}
// 创建带超时的上下文
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "GET", detailURL, nil)
if err != nil {
if p.debugMode {
log.Printf("[HDMOLI] 创建详情页请求失败: %v", err)
}
return []model.Link{}
}
// 设置请求头
req.Header.Set("User-Agent", UserAgent)
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
req.Header.Set("Referer", BaseURL+"/")
resp, err := client.Do(req)
if err != nil {
if p.debugMode {
log.Printf("[HDMOLI] 详情页请求失败: %v", err)
}
return []model.Link{}
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
if p.debugMode {
log.Printf("[HDMOLI] 详情页HTTP状态错误: %d", resp.StatusCode)
}
return []model.Link{}
}
// 读取响应体
body, err := io.ReadAll(resp.Body)
if err != nil {
if p.debugMode {
log.Printf("[HDMOLI] 读取详情页响应失败: %v", err)
}
return []model.Link{}
}
// 解析网盘链接
links := p.parseNetworkDiskLinks(string(body))
// 缓存结果
if len(links) > 0 {
p.detailCache.Store(detailURL, links)
}
if p.debugMode {
log.Printf("[HDMOLI] 从详情页提取到 %d 个链接: %s", len(links), detailURL)
}
return links
}
// parseNetworkDiskLinks 解析网盘链接
func (p *HdmoliPlugin) parseNetworkDiskLinks(htmlContent string) []model.Link {
var links []model.Link
// 解析HTML文档以便更精确的提取
doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent))
if err != nil {
if p.debugMode {
log.Printf("[HDMOLI] 解析详情页HTML失败: %v", err)
}
// 如果解析失败,使用正则表达式作为备选
return p.parseNetworkDiskLinksWithRegex(htmlContent)
}
// 在"视频下载"区域查找网盘链接
doc.Find(".downlist").Each(func(i int, s *goquery.Selection) {
s.Find("p").Each(func(j int, pEl *goquery.Selection) {
text := pEl.Text()
// 查找夸克网盘
if strings.Contains(text, "夸 克:") || strings.Contains(text, "夸克:") {
pEl.Find("a").Each(func(k int, a *goquery.Selection) {
href, exists := a.Attr("href")
if exists && strings.Contains(href, "pan.quark.cn") {
link := model.Link{
Type: "quark",
URL: href,
Password: p.extractPasswordFromQuarkURL(href),
}
links = append(links, link)
if p.debugMode {
log.Printf("[HDMOLI] 找到夸克链接: %s", href)
}
}
})
}
// 查找百度网盘
if strings.Contains(text, "百 度:") || strings.Contains(text, "百度:") {
pEl.Find("a").Each(func(k int, a *goquery.Selection) {
href, exists := a.Attr("href")
if exists && strings.Contains(href, "pan.baidu.com") {
password := p.extractPasswordFromBaiduURL(href)
link := model.Link{
Type: "baidu",
URL: href,
Password: password,
}
links = append(links, link)
if p.debugMode {
log.Printf("[HDMOLI] 找到百度链接: %s (密码: %s)", href, password)
}
}
})
}
})
})
return links
}
// parseNetworkDiskLinksWithRegex 使用正则表达式解析网盘链接(备选方案)
func (p *HdmoliPlugin) parseNetworkDiskLinksWithRegex(htmlContent string) []model.Link {
var links []model.Link
// 夸克网盘链接模式
quarkPattern := regexp.MustCompile(`<b>夸\s*克:</b><a[^>]*href\s*=\s*["']([^"']*pan\.quark\.cn[^"']*)["'][^>]*>`)
quarkMatches := quarkPattern.FindAllStringSubmatch(htmlContent, -1)
for _, match := range quarkMatches {
if len(match) > 1 {
link := model.Link{
Type: "quark",
URL: match[1],
Password: "",
}
links = append(links, link)
}
}
// 百度网盘链接模式
baiduPattern := regexp.MustCompile(`<b>百\s*度:</b><a[^>]*href\s*=\s*["']([^"']*pan\.baidu\.com[^"']*)["'][^>]*>`)
baiduMatches := baiduPattern.FindAllStringSubmatch(htmlContent, -1)
for _, match := range baiduMatches {
if len(match) > 1 {
password := p.extractPasswordFromBaiduURL(match[1])
link := model.Link{
Type: "baidu",
URL: match[1],
Password: password,
}
links = append(links, link)
}
}
return links
}
// extractPasswordFromQuarkURL 从夸克网盘URL提取提取码
func (p *HdmoliPlugin) extractPasswordFromQuarkURL(panURL string) string {
// 夸克网盘一般不需要提取码,直接返回空
return ""
}
// extractPasswordFromBaiduURL 从百度网盘URL提取提取码
func (p *HdmoliPlugin) extractPasswordFromBaiduURL(panURL string) string {
// 检查URL中是否包含pwd参数
if strings.Contains(panURL, "?pwd=") {
parts := strings.Split(panURL, "?pwd=")
if len(parts) > 1 {
return parts[1]
}
}
if strings.Contains(panURL, "&pwd=") {
parts := strings.Split(panURL, "&pwd=")
if len(parts) > 1 {
return parts[1]
}
}
return ""
}

View File

@@ -0,0 +1,167 @@
# HDMOLIHDmoli插件HTML结构分析
## 网站概述
- **网站名称**: HDmoli
- **域名**: https://www.hdmoli.pro/
- **类型**: 影视资源网站,主要提供网盘下载链接(夸克网盘、百度网盘)
## API流程概述
### 搜索页面
- **请求URL**: `https://www.hdmoli.pro/search.php?searchkey={keyword}&submit=`
- **方法**: GET
- **Headers**: 需要设置 `Referer: https://www.hdmoli.pro/`
- **特点**: 简单的GET请求搜索
## 搜索结果结构
### 搜索结果页面HTML结构
```html
<ul class="myui-vodlist__media clearfix" id="searchList">
<li class="active clearfix">
<div class="thumb">
<a class="myui-vodlist__thumb" href="/movie/index2976.html" title="怪兽8号 第二季">
<span class="pic-tag pic-tag-top" style="background-color: #5bb7fe;">
7.6分
</span>
<span class="pic-text text-right">
更新至06集
</span>
</a>
</div>
<div class="detail">
<h4 class="title">
<a href="/movie/index2976.html">怪兽8号 第二季</a>
</h4>
<p><span class="text-muted">导演:</span>宫繁之</p>
<p><span class="text-muted">主演:</span>
<a href="...">福西胜也</a>&nbsp;
<a href="...">濑户麻沙美</a>&nbsp;
</p>
<p><span class="text-muted">分类:</span>日本
<span class="split-line"></span>
<span class="text-muted hidden-xs">地区:</span>日本
<span class="split-line"></span>
<span class="text-muted hidden-xs">年份:</span>2025
</p>
<p class="hidden-xs"><span class="text-muted">简介:</span>...</p>
<p class="margin-0">
<a class="btn btn-lg btn-warm" href="/movie/index2976.html">立即播放</a>
</p>
</div>
</li>
</ul>
```
### 详情页面HTML结构
```html
<div class="myui-content__detail">
<h1 class="title text-fff">怪兽8号 第二季</h1>
<!-- 评分 -->
<div id="rating" class="score" data-id="2976">
<span class="branch">7.6</span>
</div>
<!-- 基本信息 -->
<p class="data">
<span class="text-muted">分类:</span>动作,科幻
<span class="text-muted hidden-xs">地区:</span>日本
<span class="text-muted hidden-xs">年份:</span>2025
</p>
<p class="data"><span class="text-muted">演员:</span>...</p>
<p class="data"><span class="text-muted">导演:</span>...</p>
<p class="data hidden-sm"><span class="text-muted hidden-xs">更新:</span>2025-08-24 02:21</p>
</div>
<!-- 视频下载区域 -->
<div class="myui-panel myui-panel-bg clearfix">
<div class="myui-panel_hd">
<h3 class="title">视频下载</h3>
</div>
<ul class="stui-vodlist__text downlist col-pd clearfix">
<div class="row">
<p class="text-muted col-pd">
<b>夸 克:</b>
<a title="夸克链接" href="https://pan.quark.cn/s/a061332a75e9" target="_blank">
https://pan.quark.cn/s/a061332a75e9
</a>
</p>
<p class="text-muted col-pd">
<b>百 度:</b>
<a title="百度网盘" href="https://pan.baidu.com/s/xxx?pwd=moil" target="_blank">
https://pan.baidu.com/s/...
</a>
</p>
</div>
</ul>
</div>
```
## 数据提取要点
### 搜索结果页面
1. **结果列表**: `#searchList > li.active.clearfix` - 每个搜索结果
2. **标题**: `.detail h4.title a` - 获取文本和href属性
3. **详情页链接**: `.detail h4.title a[href]``.thumb a[href]`
4. **评分**: `.pic-tag` - 数字+分
5. **更新状态**: `.pic-text` - 如"更新至06集"、"12集全"
6. **导演**: 包含"导演:"的`<p>`标签内容
7. **主演**: 包含"主演:"的`<p>`标签内的链接
8. **分类信息**: 包含"分类:"的`<p>`标签 - 分类/地区/年份
9. **简介**: 包含"简介:"的`<p>`标签(可能为空或很短)
### 详情页面
1. **标题**: `h1.title` - 影片完整标题
2. **豆瓣评分**: `.score .branch` - 数字评分
3. **基本信息**: `.data`标签中的各种信息
- 分类: "分类:" 后的内容
- 地区: "地区:" 后的内容
- 年份: "年份:" 后的内容
- 又名: "又名:" 后的内容(如有)
4. **演员**: 包含"演员:"的`.data`标签内的链接
5. **导演**: 包含"导演:"的`.data`标签内的链接
6. **更新时间**: 包含"更新:"的`.data`标签
7. **网盘链接提取**:
- 夸克网盘: `<b>夸 克:</b>` 后的 `<a>` 标签
- 百度网盘: `<b>百 度:</b>` 后的 `<a>` 标签
- 其他可能的网盘类型
## 网盘链接识别规则
### 支持的网盘类型
- **夸克网盘**: `pan.quark.cn`
- **百度网盘**: `pan.baidu.com`
- **阿里云盘**: `aliyundrive.com` / `alipan.com`(可能出现)
- **天翼云盘**: `cloud.189.cn`(可能出现)
### 链接提取策略
1. 在详情页的"视频下载"区域搜索
2. 按网盘类型标识符匹配(夸 克:、百 度:等)
3. 提取对应的`<a>`标签的`href`属性
4. 从URL或周围文本提取可能的提取码`?pwd=xxx`
## 特殊处理
### 时间解析
- 搜索结果页无明确时间信息
- 详情页有更新时间:格式 `2025-08-24 02:21`
- 可使用更新时间作为发布时间
### 内容处理
- 评分处理:提取数字部分
- 更新状态:如"更新至06集"、"完结"等
- 简介可能很短或为空
- 标题清理:移除多余空格
### 分页处理
- 搜索结果有分页:`.myui-page` 区域
- 分页链接格式:`?page=2&searchkey=xxx&searchtype=`
## 注意事项
1. **网盘为主**: 此网站主要提供网盘下载链接,而非在线播放
2. **referer必需**: 请求时需要设置正确的referer头
3. **编码处理**: 关键词需要URL编码
4. **链接验证**: 网盘链接可能失效,需要验证有效性
5. **提取码**: 百度网盘链接通常有提取码在URL参数或文本中

View File

@@ -473,9 +473,9 @@ if err != nil {
```
**处理策略**
- 📝 详细错误日志
- 🔄 降级处理机制
- 📊 监控告警
- 详细错误日志
- 降级处理机制
- 监控告警
#### 3. 数据错误
@@ -880,7 +880,7 @@ log.Debug("缓存命中", "key", cacheKey, "type", "detail_page")
---
## 📝 开发指南
## 开发指南
### 代码规范

1008
plugin/javdb/javdb.go Normal file

File diff suppressed because it is too large Load Diff

422
plugin/u3c3/u3c3.go Normal file
View File

@@ -0,0 +1,422 @@
package u3c3
import (
"fmt"
"io"
"log"
"net/http"
"net/url"
"regexp"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"pansou/model"
"pansou/plugin"
)
const (
BaseURL = "https://u3c3u3c3.u3c3u3c3u3c3.com"
UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
MaxRetries = 3
RetryDelay = 2 * time.Second
)
// U3c3Plugin U3C3插件
type U3c3Plugin struct {
*plugin.BaseAsyncPlugin
debugMode bool
search2 string // 缓存的search2参数
lastSync time.Time
}
func init() {
p := &U3c3Plugin{
BaseAsyncPlugin: plugin.NewBaseAsyncPluginWithFilter("u3c3", 5, true),
debugMode: false,
}
plugin.RegisterGlobalPlugin(p)
}
// Search 搜索接口实现
func (p *U3c3Plugin) Search(keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
result, err := p.SearchWithResult(keyword, ext)
if err != nil {
return nil, err
}
return result.Results, nil
}
// SearchWithResult 搜索并返回详细结果
func (p *U3c3Plugin) SearchWithResult(keyword string, ext map[string]interface{}) (*model.PluginSearchResult, error) {
if p.debugMode {
log.Printf("[U3C3] 开始搜索: %s", keyword)
}
// 第一步获取search2参数
search2, err := p.getSearch2Parameter()
if err != nil {
if p.debugMode {
log.Printf("[U3C3] 获取search2参数失败: %v", err)
}
return nil, fmt.Errorf("获取search2参数失败: %v", err)
}
// 第二步:执行搜索
results, err := p.doSearch(keyword, search2)
if err != nil {
if p.debugMode {
log.Printf("[U3C3] 搜索失败: %v", err)
}
return nil, err
}
if p.debugMode {
log.Printf("[U3C3] 搜索完成,获得 %d 个结果", len(results))
}
// 应用关键词过滤
filteredResults := plugin.FilterResultsByKeyword(results, keyword)
return &model.PluginSearchResult{
Results: filteredResults,
IsFinal: true,
Timestamp: time.Now(),
Source: p.Name(),
Message: fmt.Sprintf("找到 %d 个结果", len(filteredResults)),
}, nil
}
// getSearch2Parameter 获取search2参数
func (p *U3c3Plugin) getSearch2Parameter() (string, error) {
// 如果缓存有效1小时内直接返回
if p.search2 != "" && time.Since(p.lastSync) < time.Hour {
return p.search2, nil
}
if p.debugMode {
log.Printf("[U3C3] 正在获取search2参数...")
}
client := &http.Client{
Timeout: 30 * time.Second,
}
req, err := http.NewRequest("GET", BaseURL, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", UserAgent)
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
var resp *http.Response
var lastErr error
// 重试机制
for i := 0; i < MaxRetries; i++ {
resp, lastErr = client.Do(req)
if lastErr == nil && resp.StatusCode == 200 {
break
}
if resp != nil {
resp.Body.Close()
}
if i < MaxRetries-1 {
time.Sleep(RetryDelay)
}
}
if lastErr != nil {
return "", lastErr
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return "", fmt.Errorf("HTTP状态码错误: %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
// 从JavaScript中提取search2参数
search2 := p.extractSearch2FromHTML(string(body))
if search2 == "" {
return "", fmt.Errorf("无法从首页提取search2参数")
}
// 缓存参数
p.search2 = search2
p.lastSync = time.Now()
if p.debugMode {
log.Printf("[U3C3] 获取到search2参数: %s", search2)
}
return search2, nil
}
// extractSearch2FromHTML 从HTML中提取search2参数
func (p *U3c3Plugin) extractSearch2FromHTML(html string) string {
// 按行处理,排除注释行
lines := strings.Split(html, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
// 跳过注释行
if strings.HasPrefix(line, "//") {
continue
}
// 查找包含nmefafej的行
if strings.Contains(line, "nmefafej") && strings.Contains(line, `"`) {
// 使用正则提取引号内的值
re := regexp.MustCompile(`var\s+nmefafej\s*=\s*"([^"]+)"`)
matches := re.FindStringSubmatch(line)
if len(matches) > 1 && len(matches[1]) > 5 {
if p.debugMode {
log.Printf("[U3C3] 提取到search2参数: %s (来自行: %s)", matches[1], line)
}
return matches[1]
}
// 备用方案:直接提取引号内容
start := strings.Index(line, `"`)
if start != -1 {
end := strings.Index(line[start+1:], `"`)
if end != -1 && end > 5 {
candidate := line[start+1 : start+1+end]
if len(candidate) > 5 {
if p.debugMode {
log.Printf("[U3C3] 备用方案提取search2: %s (来自行: %s)", candidate, line)
}
return candidate
}
}
}
}
}
if p.debugMode {
log.Printf("[U3C3] 未能找到search2参数")
}
return ""
}
// doSearch 执行搜索
func (p *U3c3Plugin) doSearch(keyword, search2 string) ([]model.SearchResult, error) {
// 构建搜索URL
encodedKeyword := url.QueryEscape(keyword)
searchURL := fmt.Sprintf("%s/?search2=%s&search=%s", BaseURL, search2, encodedKeyword)
if p.debugMode {
log.Printf("[U3C3] 搜索URL: %s", searchURL)
}
client := &http.Client{
Timeout: 30 * time.Second,
}
req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", UserAgent)
req.Header.Set("Referer", BaseURL+"/")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
var resp *http.Response
var lastErr error
// 重试机制
for i := 0; i < MaxRetries; i++ {
resp, lastErr = client.Do(req)
if lastErr == nil && resp.StatusCode == 200 {
break
}
if resp != nil {
resp.Body.Close()
}
if i < MaxRetries-1 {
time.Sleep(RetryDelay)
}
}
if lastErr != nil {
return nil, lastErr
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("搜索请求失败,状态码: %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
return p.parseSearchResults(string(body))
}
// parseSearchResults 解析搜索结果
func (p *U3c3Plugin) parseSearchResults(html string) ([]model.SearchResult, error) {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
return nil, err
}
var results []model.SearchResult
// 查找搜索结果表格行
doc.Find("tbody tr.default").Each(func(i int, s *goquery.Selection) {
// 跳过广告行(通常包含置顶标识)
titleCell := s.Find("td:nth-child(2)")
titleText := titleCell.Text()
if strings.Contains(titleText, "[置顶]") {
return // 跳过置顶广告
}
// 提取标题和详情链接
titleLink := titleCell.Find("a")
title := strings.TrimSpace(titleLink.Text())
if title == "" {
return // 跳过空标题
}
// 清理标题中的HTML标签和特殊字符
title = p.cleanTitle(title)
// 提取详情页链接(可选,用于后续扩展)
detailURL, _ := titleLink.Attr("href")
if detailURL != "" && !strings.HasPrefix(detailURL, "http") {
detailURL = BaseURL + detailURL
}
// 提取链接信息
linkCell := s.Find("td:nth-child(3)")
var links []model.Link
// 磁力链接
linkCell.Find("a[href^='magnet:']").Each(func(j int, link *goquery.Selection) {
href, exists := link.Attr("href")
if exists && href != "" {
links = append(links, model.Link{
URL: href,
Type: "magnet",
})
}
})
// 种子文件链接
linkCell.Find("a[href$='.torrent']").Each(func(j int, link *goquery.Selection) {
href, exists := link.Attr("href")
if exists && href != "" {
if !strings.HasPrefix(href, "http") {
href = BaseURL + href
}
links = append(links, model.Link{
URL: href,
Type: "torrent",
})
}
})
// 提取文件大小
sizeText := strings.TrimSpace(s.Find("td:nth-child(4)").Text())
// 提取上传时间
dateText := strings.TrimSpace(s.Find("td:nth-child(5)").Text())
// 提取分类
categoryText := s.Find("td:nth-child(1) a").AttrOr("title", "")
// 构建内容信息
var contentParts []string
if categoryText != "" {
contentParts = append(contentParts, "分类: "+categoryText)
}
if sizeText != "" {
contentParts = append(contentParts, "大小: "+sizeText)
}
if dateText != "" {
contentParts = append(contentParts, "时间: "+dateText)
}
content := strings.Join(contentParts, " | ")
// 生成唯一ID
uniqueID := p.generateUniqueID(title, sizeText)
result := model.SearchResult{
Title: title,
Content: content,
Channel: "", // 插件搜索结果必须为空
Tags: []string{"种子", "磁力链接"},
Datetime: p.parseDateTime(dateText),
Links: links,
UniqueID: uniqueID,
}
results = append(results, result)
})
if p.debugMode {
log.Printf("[U3C3] 解析到 %d 个搜索结果", len(results))
}
return results, nil
}
// cleanTitle 清理标题文本
func (p *U3c3Plugin) cleanTitle(title string) string {
// 移除HTML标签
title = regexp.MustCompile(`<[^>]*>`).ReplaceAllString(title, "")
// 移除多余的空白字符
title = regexp.MustCompile(`\s+`).ReplaceAllString(title, " ")
// 移除前后空白
title = strings.TrimSpace(title)
return title
}
// parseDateTime 解析日期时间
func (p *U3c3Plugin) parseDateTime(dateStr string) time.Time {
if dateStr == "" {
return time.Time{}
}
// 尝试解析常见的日期格式
formats := []string{
"2006-01-02 15:04:05",
"2006-01-02",
"01-02 15:04",
}
for _, format := range formats {
if t, err := time.Parse(format, dateStr); err == nil {
return t
}
}
// 如果解析失败,返回零值
return time.Time{}
}
// generateUniqueID 生成唯一ID
func (p *U3c3Plugin) generateUniqueID(title, size string) string {
// 使用插件名、标题和大小生成唯一ID
source := fmt.Sprintf("%s-%s-%s", p.Name(), title, size)
// 简单的哈希处理(实际项目中可使用更复杂的哈希算法)
hash := 0
for _, char := range source {
hash = hash*31 + int(char)
}
if hash < 0 {
hash = -hash
}
return fmt.Sprintf("u3c3-%d", hash)
}

View File

@@ -0,0 +1,114 @@
# XYS小云搜索插件HTML结构分析
## API流程概述
### 第一步获取Token
- **请求URL**: `https://www.yunso.net/index/user/s?wd={keyword}&mode=undefined&stype=undefined`
- **方法**: GET
- **Headers**:
- `Referer: https://www.yunso.net/`
- `User-Agent: Mozilla/5.0...`
- **Token提取**: 从返回HTML中匹配 `const DToken = "42b63a003f80bd5ff0a731fcd2a49fd40aefb5e96a46d546abbf92094da54763";`
### 第二步:执行搜索
- **请求URL**: `https://www.yunso.net/api/validate/searchX2`
- **方法**: POST
- **URL参数**:
- `DToken2={token}`
- `requestID=undefined`
- `mode=90002`
- `stype=undefined`
- `scope_content=0`
- `wd={keyword}` (URL编码)
- `uk=`
- `page=1`
- `limit=20`
- `screen_filetype=`
- **Headers**:
- `Referer: https://www.yunso.net/`
- `Content-Type: application/x-www-form-urlencoded`
## 搜索结果结构
### JSON响应格式
```json
{
"code": 0,
"msg": "",
"time": "1755998625",
"data": "HTML内容"
}
```
### HTML结构 (在data字段中)
#### 搜索结果项
```html
<div class="layui-card" style="..." id="{qid}-{timestamp}-{hash}" data-qid="{qid}">
<div class="layui-card-header" style="...">
<div style="...">
序号、 <span class="layui-badge">24小时内</span>
<img src="/assets/xyso/icon/filetype_folder.png" style="...">
<a onclick="open_sid(this)" id="{qid}-{timestamp}-{hash}"
url="{base64_url}" href="{real_url}" pa="{password}" target="_blank">
标题内容
</a>
</div>
<div class="responsive-container">
<div><i class="layui-icon layui-icon-time"></i> 2025-08-24 22:56:32</div>
<div>按钮组</div>
</div>
</div>
<div class="layui-card-body">
<p>
<span>所有文件共计: 合计 :N/A</span>
<img src="/assets/xyso/{type}.png" alt="{platform}">
</p>
</div>
</div>
```
## 数据提取要点
### 1. 标题提取
- **选择器**: `.layui-card-header a[onclick="open_sid(this)"]`
- **内容**: 链接文本内容,可能包含 `@` 等特殊符号需要清理
### 2. 链接提取
- **属性**: `href` - 真实链接URL
- **属性**: `url` - Base64编码的URL (备用)
- **属性**: `pa` - 提取码/密码
### 3. 时间提取
- **选择器**: `.layui-icon-time` 的父元素或下一个兄弟元素
- **格式**: `2025-08-24 22:56:32`
### 4. 网盘类型提取
- **选择器**: `.layui-card-body img[alt]`
- **类型映射**:
- `夸克` → quark
- `百度` → baidu
- `阿里` → aliyun
- 等等
### 5. 结果统计
- **总数**: 从顶部 `找到相关结果约 <strong>5919</strong> 个` 提取
## 特殊处理
### 1. 标题清理
- 移除 `@` 符号: `凡@人@修@仙@传``凡人修仙传`
- 移除HTML标签: `<font color='red'>凡人修仙传</font>``凡人修仙传`
### 2. 链接处理
- 优先使用 `href` 属性
- 如果没有则解码 `url` 属性 (Base64)
- 提取密码从 `pa` 属性
### 3. 时间解析
- 格式: `2025-08-24 22:56:32`
- 转换为标准时间格式
### 4. 网盘识别
- 根据图片alt属性确定网盘类型
- 根据URL域名辅助识别

475
plugin/xys/xys.go Normal file
View File

@@ -0,0 +1,475 @@
package xys
import (
"context"
"encoding/base64"
"fmt"
"io"
"log"
"net/http"
"net/url"
"regexp"
"strings"
"sync"
"time"
"github.com/PuerkitoBio/goquery"
"pansou/model"
"pansou/plugin"
"pansou/util/json"
)
const (
PluginName = "xys"
DisplayName = "小云搜索"
Description = "小云搜索 - 阿里云盘、夸克网盘、百度网盘等多网盘搜索引擎"
BaseURL = "https://www.yunso.net"
TokenPath = "/index/user/s"
SearchPath = "/api/validate/searchX2"
UserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
MaxResults = 50
)
// XysPlugin 小云搜索插件
type XysPlugin struct {
*plugin.BaseAsyncPlugin
debugMode bool
tokenCache sync.Map // 缓存token避免频繁获取
cacheTTL time.Duration
}
// TokenCache token缓存结构
type TokenCache struct {
Token string
Timestamp time.Time
}
// SearchResponse API响应结构
type SearchResponse struct {
Code int `json:"code"`
Msg string `json:"msg"`
Time string `json:"time"`
Data string `json:"data"`
}
// init 注册插件
func init() {
plugin.RegisterGlobalPlugin(NewXysPlugin())
}
// NewXysPlugin 创建新的小云搜索插件实例
func NewXysPlugin() *XysPlugin {
// 检查调试模式
debugMode := false // 生产环境关闭调试
p := &XysPlugin{
BaseAsyncPlugin: plugin.NewBaseAsyncPlugin(PluginName, 3), // 标准网盘插件启用Service层过滤
debugMode: debugMode,
cacheTTL: 30 * time.Minute, // token缓存30分钟
}
return p
}
// Name 插件名称
func (p *XysPlugin) Name() string {
return PluginName
}
// DisplayName 插件显示名称
func (p *XysPlugin) DisplayName() string {
return DisplayName
}
// Description 插件描述
func (p *XysPlugin) Description() string {
return Description
}
// Search 搜索接口
func (p *XysPlugin) Search(keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
return p.searchImpl(&http.Client{Timeout: 30 * time.Second}, keyword, ext)
}
// searchImpl 搜索实现
func (p *XysPlugin) searchImpl(client *http.Client, keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
if p.debugMode {
log.Printf("[XYS] 开始搜索: %s", keyword)
}
// 第一步获取token
token, err := p.getToken(client, keyword)
if err != nil {
return nil, fmt.Errorf("获取token失败: %w", err)
}
if p.debugMode {
log.Printf("[XYS] 获取到token: %s", token[:10]+"...")
}
// 第二步:执行搜索
results, err := p.executeSearch(client, token, keyword)
if err != nil {
return nil, fmt.Errorf("执行搜索失败: %w", err)
}
if p.debugMode {
log.Printf("[XYS] 搜索完成,获取到 %d 个结果", len(results))
}
return results, nil
}
// getToken 获取搜索token
func (p *XysPlugin) getToken(client *http.Client, keyword string) (string, error) {
// 检查缓存
cacheKey := "token"
if cached, found := p.tokenCache.Load(cacheKey); found {
if tokenCache, ok := cached.(TokenCache); ok {
// 检查是否过期
if time.Since(tokenCache.Timestamp) < p.cacheTTL {
if p.debugMode {
log.Printf("[XYS] 使用缓存的token")
}
return tokenCache.Token, nil
}
}
}
// 构建请求URL
tokenURL := fmt.Sprintf("%s%s?wd=%s&mode=undefined&stype=undefined",
BaseURL, TokenPath, url.QueryEscape(keyword))
// 创建带超时的上下文
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "GET", tokenURL, nil)
if err != nil {
return "", fmt.Errorf("[%s] 创建token请求失败: %w", p.Name(), err)
}
// 设置完整的请求头
req.Header.Set("User-Agent", UserAgent)
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
req.Header.Set("Connection", "keep-alive")
req.Header.Set("Upgrade-Insecure-Requests", "1")
req.Header.Set("Cache-Control", "max-age=0")
req.Header.Set("Referer", BaseURL+"/")
resp, err := p.doRequestWithRetry(req, client)
if err != nil {
return "", fmt.Errorf("[%s] token请求失败: %w", p.Name(), err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return "", fmt.Errorf("[%s] token请求HTTP状态错误: %d", p.Name(), resp.StatusCode)
}
// 解析HTML提取token
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return "", fmt.Errorf("[%s] 解析token页面HTML失败: %w", p.Name(), err)
}
// 查找script标签中的DToken定义
var token string
doc.Find("script").Each(func(i int, s *goquery.Selection) {
scriptContent := s.Text()
if strings.Contains(scriptContent, "DToken") {
// 使用正则表达式提取token
re := regexp.MustCompile(`const\s+DToken\s*=\s*"([^"]+)"`)
matches := re.FindStringSubmatch(scriptContent)
if len(matches) > 1 {
token = matches[1]
if p.debugMode {
log.Printf("[XYS] 从script中提取到token: %s", token[:10]+"...")
}
}
}
})
if token == "" {
return "", fmt.Errorf("未找到DToken")
}
// 缓存token
p.tokenCache.Store(cacheKey, TokenCache{
Token: token,
Timestamp: time.Now(),
})
return token, nil
}
// doRequestWithRetry 带重试机制的HTTP请求
func (p *XysPlugin) doRequestWithRetry(req *http.Request, client *http.Client) (*http.Response, error) {
maxRetries := 3
var lastErr error
for i := 0; i < maxRetries; i++ {
if i > 0 {
// 指数退避重试
backoff := time.Duration(1<<uint(i-1)) * 200 * time.Millisecond
time.Sleep(backoff)
}
// 克隆请求避免并发问题
reqClone := req.Clone(req.Context())
resp, err := client.Do(reqClone)
if err == nil && resp.StatusCode == 200 {
return resp, nil
}
if resp != nil {
resp.Body.Close()
}
lastErr = err
}
return nil, fmt.Errorf("[%s] 重试 %d 次后仍然失败: %w", p.Name(), maxRetries, lastErr)
}
// executeSearch 执行搜索请求
func (p *XysPlugin) executeSearch(client *http.Client, token, keyword string) ([]model.SearchResult, error) {
// 构建搜索URL
searchURL := fmt.Sprintf("%s%s?DToken2=%s&requestID=undefined&mode=90002&stype=undefined&scope_content=0&wd=%s&uk=&page=1&limit=20&screen_filetype=",
BaseURL, SearchPath, token, url.QueryEscape(keyword))
// 创建带超时的上下文
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "POST", searchURL, nil)
if err != nil {
return nil, fmt.Errorf("[%s] 创建搜索请求失败: %w", p.Name(), err)
}
// 设置完整的请求头
req.Header.Set("User-Agent", UserAgent)
req.Header.Set("Accept", "application/json, text/plain, */*")
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
req.Header.Set("Connection", "keep-alive")
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.Header.Set("Referer", BaseURL+"/")
req.Header.Set("Origin", BaseURL)
req.Header.Set("X-Requested-With", "XMLHttpRequest")
resp, err := p.doRequestWithRetry(req, client)
if err != nil {
return nil, fmt.Errorf("[%s] 搜索请求失败: %w", p.Name(), err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("[%s] 搜索请求HTTP状态错误: %d", p.Name(), resp.StatusCode)
}
// 读取响应体
respBody, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("[%s] 读取响应体失败: %w", p.Name(), err)
}
// 解析JSON响应
var searchResp SearchResponse
if err := json.Unmarshal(respBody, &searchResp); err != nil {
return nil, fmt.Errorf("[%s] JSON解析失败: %w", p.Name(), err)
}
if searchResp.Code != 0 {
return nil, fmt.Errorf("[%s] 搜索API返回错误: %s", p.Name(), searchResp.Msg)
}
if p.debugMode {
log.Printf("[XYS] 搜索API响应成功data长度: %d", len(searchResp.Data))
}
// 解析HTML内容
return p.parseSearchResults(searchResp.Data, keyword)
}
// parseSearchResults 解析搜索结果HTML
func (p *XysPlugin) parseSearchResults(htmlData, keyword string) ([]model.SearchResult, error) {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlData))
if err != nil {
return nil, fmt.Errorf("[%s] 解析搜索结果HTML失败: %w", p.Name(), err)
}
var results []model.SearchResult
// 查找搜索结果项
doc.Find(".layui-card[data-qid]").Each(func(i int, s *goquery.Selection) {
if len(results) >= MaxResults {
return
}
result := p.parseResultItem(s, i+1)
if result != nil {
results = append(results, *result)
}
})
if p.debugMode {
log.Printf("[XYS] 解析到 %d 个原始结果", len(results))
}
// 关键词过滤(标准网盘插件需要过滤)
filteredResults := plugin.FilterResultsByKeyword(results, keyword)
if p.debugMode {
log.Printf("[XYS] 关键词过滤后剩余 %d 个结果", len(filteredResults))
}
return filteredResults, nil
}
// parseResultItem 解析单个搜索结果项
func (p *XysPlugin) parseResultItem(s *goquery.Selection, index int) *model.SearchResult {
// 提取QID
qid, _ := s.Attr("data-qid")
if qid == "" {
return nil
}
// 提取标题和链接
linkEl := s.Find(`a[onclick="open_sid(this)"]`)
if linkEl.Length() == 0 {
return nil
}
// 提取标题
title := p.cleanTitle(linkEl.Text())
if title == "" {
return nil
}
// 提取链接URL
href, _ := linkEl.Attr("href")
if href == "" {
// 尝试从url属性解码
urlAttr, _ := linkEl.Attr("url")
if urlAttr != "" {
if decoded, err := base64.StdEncoding.DecodeString(urlAttr); err == nil {
href = string(decoded)
}
}
}
if href == "" {
if p.debugMode {
log.Printf("[XYS] 跳过无链接的结果: %s", title)
}
return nil
}
// 提取密码
password, _ := linkEl.Attr("pa")
// 提取时间
timeStr := strings.TrimSpace(s.Find(".layui-icon-time").Parent().Text())
publishTime := p.parseTime(timeStr)
// 提取网盘类型
platform := p.extractPlatform(s, href)
// 构建链接对象
link := model.Link{
Type: platform,
URL: href,
Password: password,
}
// 构建结果对象
result := model.SearchResult{
Title: title,
Content: fmt.Sprintf("来源:%s", platform),
Channel: "", // 插件搜索结果必须为空字符串(按开发指南要求)
MessageID: fmt.Sprintf("%s-%s-%d", p.Name(), qid, index),
UniqueID: fmt.Sprintf("%s-%s-%d", p.Name(), qid, index),
Datetime: publishTime,
Links: []model.Link{link},
Tags: []string{platform},
}
if p.debugMode {
log.Printf("[XYS] 解析结果: %s (%s)", title, platform)
}
return &result
}
// cleanTitle 清理标题
func (p *XysPlugin) cleanTitle(title string) string {
if title == "" {
return ""
}
// 移除HTML标签
re := regexp.MustCompile(`<[^>]*>`)
cleaned := re.ReplaceAllString(title, "")
// 移除@符号
cleaned = strings.ReplaceAll(cleaned, "@", "")
// 清理多余的空格
cleaned = strings.TrimSpace(cleaned)
re = regexp.MustCompile(`\s+`)
cleaned = re.ReplaceAllString(cleaned, " ")
return cleaned
}
// parseTime 解析时间字符串
func (p *XysPlugin) parseTime(timeStr string) time.Time {
// 清理时间字符串,移除图标等
timeStr = strings.TrimSpace(timeStr)
// 查找时间格式 YYYY-MM-DD HH:MM:SS
re := regexp.MustCompile(`(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})`)
matches := re.FindStringSubmatch(timeStr)
if len(matches) > 1 {
if t, err := time.Parse("2006-01-02 15:04:05", matches[1]); err == nil {
return t
}
}
// 如果解析失败,返回当前时间
return time.Now()
}
// extractPlatform 提取网盘平台类型(按开发指南标准实现)
func (p *XysPlugin) extractPlatform(s *goquery.Selection, href string) string {
return determineCloudType(href)
}
// determineCloudType 根据URL自动识别网盘类型按开发指南完整列表
func determineCloudType(url string) string {
switch {
case strings.Contains(url, "pan.quark.cn"):
return "quark"
case strings.Contains(url, "drive.uc.cn"):
return "uc"
case strings.Contains(url, "pan.baidu.com"):
return "baidu"
case strings.Contains(url, "aliyundrive.com") || strings.Contains(url, "alipan.com"):
return "aliyun"
case strings.Contains(url, "pan.xunlei.com"):
return "xunlei"
case strings.Contains(url, "cloud.189.cn"):
return "tianyi"
case strings.Contains(url, "caiyun.139.com"):
return "mobile"
case strings.Contains(url, "magnet:"):
return "magnet"
case strings.Contains(url, "ed2k://"):
return "ed2k"
default:
return "others"
}
}

415
plugin/yuhuage/yuhuage.go Normal file
View File

@@ -0,0 +1,415 @@
package yuhuage
import (
"context"
"fmt"
"io"
"log"
"net/http"
"net/url"
"regexp"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/PuerkitoBio/goquery"
"pansou/model"
"pansou/plugin"
)
const (
BaseURL = "https://www.iyuhuage.fun"
SearchPath = "/search/"
UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
MaxConcurrency = 5 // 详情页最大并发数
MaxRetryCount = 2 // 最大重试次数
)
// YuhuagePlugin 雨花阁插件
type YuhuagePlugin struct {
*plugin.BaseAsyncPlugin
debugMode bool
detailCache sync.Map // 缓存详情页结果
cacheTTL time.Duration
rateLimited int32 // 429限流标志位
}
func init() {
p := &YuhuagePlugin{
BaseAsyncPlugin: plugin.NewBaseAsyncPluginWithFilter("yuhuage", 3, true),
debugMode: false,
cacheTTL: 30 * time.Minute,
}
plugin.RegisterGlobalPlugin(p)
}
// Search 搜索接口实现
func (p *YuhuagePlugin) Search(keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
result, err := p.SearchWithResult(keyword, ext)
if err != nil {
return nil, err
}
return result.Results, nil
}
// SearchWithResult 执行搜索并返回包含IsFinal标记的结果
func (p *YuhuagePlugin) SearchWithResult(keyword string, ext map[string]interface{}) (model.PluginSearchResult, error) {
return p.AsyncSearchWithResult(keyword, p.searchImpl, p.MainCacheKey, ext)
}
// searchImpl 搜索实现方法
func (p *YuhuagePlugin) searchImpl(client *http.Client, keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
if p.debugMode {
log.Printf("[YUHUAGE] 开始搜索: %s", keyword)
}
// 检查限流状态
if atomic.LoadInt32(&p.rateLimited) == 1 {
if p.debugMode {
log.Printf("[YUHUAGE] 当前处于限流状态,跳过搜索")
}
return nil, fmt.Errorf("rate limited")
}
// 构建搜索URL
encodedQuery := url.QueryEscape(keyword)
searchURL := fmt.Sprintf("%s%s%s-%d-time.html", BaseURL, SearchPath, encodedQuery, 1)
// 创建带超时的上下文
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
// 创建请求对象
req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil)
if err != nil {
return nil, fmt.Errorf("[%s] 创建请求失败: %w", p.Name(), err)
}
// 设置请求头
req.Header.Set("User-Agent", UserAgent)
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
req.Header.Set("Connection", "keep-alive")
req.Header.Set("Referer", BaseURL+"/")
// 发送HTTP请求
resp, err := p.doRequestWithRetry(req, client)
if err != nil {
return nil, fmt.Errorf("[%s] 搜索请求失败: %w", p.Name(), err)
}
defer resp.Body.Close()
if resp.StatusCode == 429 {
atomic.StoreInt32(&p.rateLimited, 1)
go func() {
time.Sleep(60 * time.Second)
atomic.StoreInt32(&p.rateLimited, 0)
}()
return nil, fmt.Errorf("[%s] 请求被限流", p.Name())
}
if resp.StatusCode != 200 {
return nil, fmt.Errorf("[%s] HTTP错误: %d", p.Name(), resp.StatusCode)
}
// 读取响应
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("[%s] 读取响应失败: %w", p.Name(), err)
}
// 解析搜索结果
results, err := p.parseSearchResults(string(body))
if err != nil {
return nil, err
}
if p.debugMode {
log.Printf("[YUHUAGE] 搜索完成,获得 %d 个结果", len(results))
}
// 关键词过滤
return plugin.FilterResultsByKeyword(results, keyword), nil
}
// parseSearchResults 解析搜索结果
func (p *YuhuagePlugin) parseSearchResults(html string) ([]model.SearchResult, error) {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
return nil, err
}
var results []model.SearchResult
var detailURLs []string
// 提取搜索结果
doc.Find(".search-item.detail-width").Each(func(i int, s *goquery.Selection) {
title := strings.TrimSpace(p.cleanTitle(s.Find(".item-title h3 a").Text()))
detailHref, exists := s.Find(".item-title h3 a").Attr("href")
if !exists || title == "" {
return
}
detailURL := BaseURL + detailHref
detailURLs = append(detailURLs, detailURL)
// 提取基本信息
createTime := strings.TrimSpace(s.Find(".item-bar span:contains('创建时间') b").Text())
size := strings.TrimSpace(s.Find(".item-bar .cpill.blue-pill").Text())
fileCount := strings.TrimSpace(s.Find(".item-bar .cpill.yellow-pill").Text())
hot := strings.TrimSpace(s.Find(".item-bar span:contains('热度') b").Text())
lastDownload := strings.TrimSpace(s.Find(".item-bar span:contains('最近下载') b").Text())
// 构建内容描述
content := fmt.Sprintf("创建时间: %s | 大小: %s | 文件数: %s | 热度: %s",
createTime, size, fileCount, hot)
if lastDownload != "" {
content += fmt.Sprintf(" | 最近下载: %s", lastDownload)
}
result := model.SearchResult{
Title: title,
Content: content,
Channel: "", // 插件搜索结果必须为空字符串
Tags: []string{"磁力链接"},
Datetime: p.parseDateTime(createTime),
UniqueID: fmt.Sprintf("%s-%s", p.Name(), p.extractHashFromURL(detailURL)),
}
results = append(results, result)
})
if p.debugMode {
log.Printf("[YUHUAGE] 解析到 %d 个搜索结果,准备获取详情", len(results))
}
// 同步获取详情页链接
p.fetchDetailsSync(detailURLs, results)
return results, nil
}
// fetchDetailsSync 同步获取详情页信息
func (p *YuhuagePlugin) fetchDetailsSync(detailURLs []string, results []model.SearchResult) {
if len(detailURLs) == 0 {
return
}
semaphore := make(chan struct{}, MaxConcurrency)
var wg sync.WaitGroup
for i, detailURL := range detailURLs {
if i >= len(results) {
break
}
wg.Add(1)
go func(url string, result *model.SearchResult) {
defer wg.Done()
semaphore <- struct{}{}
defer func() { <-semaphore }()
links := p.fetchDetailLinks(url)
if len(links) > 0 {
result.Links = links
if p.debugMode {
log.Printf("[YUHUAGE] 为结果设置了 %d 个链接", len(links))
}
} else if p.debugMode {
log.Printf("[YUHUAGE] 详情页没有找到有效链接: %s", url)
}
}(detailURL, &results[i])
}
wg.Wait()
if p.debugMode {
log.Printf("[YUHUAGE] 详情页获取完成")
}
}
// fetchDetailLinks 获取详情页链接
func (p *YuhuagePlugin) fetchDetailLinks(detailURL string) []model.Link {
// 检查缓存
if cached, exists := p.detailCache.Load(detailURL); exists {
if links, ok := cached.([]model.Link); ok {
return links
}
}
client := &http.Client{Timeout: 15 * time.Second}
for retry := 0; retry <= MaxRetryCount; retry++ {
req, err := http.NewRequest("GET", detailURL, nil)
if err != nil {
continue
}
req.Header.Set("User-Agent", UserAgent)
req.Header.Set("Referer", BaseURL+"/")
resp, err := client.Do(req)
if err != nil {
if retry < MaxRetryCount {
time.Sleep(time.Duration(retry+1) * time.Second)
continue
}
break
}
if resp.StatusCode != 200 {
resp.Body.Close()
if retry < MaxRetryCount {
time.Sleep(time.Duration(retry+1) * time.Second)
continue
}
break
}
body, err := io.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
if retry < MaxRetryCount {
time.Sleep(time.Duration(retry+1) * time.Second)
continue
}
break
}
links := p.parseDetailLinks(string(body))
// 缓存结果
if len(links) > 0 {
p.detailCache.Store(detailURL, links)
// 设置缓存过期
go func() {
time.Sleep(p.cacheTTL)
p.detailCache.Delete(detailURL)
}()
}
return links
}
return nil
}
// parseDetailLinks 解析详情页链接
func (p *YuhuagePlugin) parseDetailLinks(html string) []model.Link {
var links []model.Link
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
return links
}
// 提取磁力链接
doc.Find("a.download[href^='magnet:']").Each(func(i int, s *goquery.Selection) {
href, exists := s.Attr("href")
if exists && href != "" {
if p.debugMode {
log.Printf("[YUHUAGE] 找到磁力链接: %s", href)
}
links = append(links, model.Link{
URL: href,
Type: "magnet",
})
}
})
// 提取迅雷链接
doc.Find("a.download[href^='thunder:']").Each(func(i int, s *goquery.Selection) {
href, exists := s.Attr("href")
if exists && href != "" {
if p.debugMode {
log.Printf("[YUHUAGE] 找到迅雷链接: %s", href)
}
links = append(links, model.Link{
URL: href,
Type: "thunder",
})
}
})
if p.debugMode && len(links) > 0 {
log.Printf("[YUHUAGE] 从详情页解析到 %d 个链接", len(links))
}
return links
}
// extractHashFromURL 从URL中提取哈希ID
func (p *YuhuagePlugin) extractHashFromURL(detailURL string) string {
re := regexp.MustCompile(`/hash/(\d+)\.html`)
matches := re.FindStringSubmatch(detailURL)
if len(matches) > 1 {
return matches[1]
}
return ""
}
// cleanTitle 清理标题
func (p *YuhuagePlugin) cleanTitle(title string) string {
title = strings.TrimSpace(title)
// 移除HTML标签如<b>标签)
re := regexp.MustCompile(`<[^>]*>`)
title = re.ReplaceAllString(title, "")
// 移除多余的空格
re = regexp.MustCompile(`\s+`)
title = re.ReplaceAllString(title, " ")
return strings.TrimSpace(title)
}
// parseDateTime 解析时间字符串
func (p *YuhuagePlugin) parseDateTime(timeStr string) time.Time {
if timeStr == "" {
return time.Time{}
}
// 尝试不同的时间格式
formats := []string{
"2006-01-02 15:04:05",
"2006-01-02",
"2006/01/02 15:04:05",
"2006/01/02",
}
for _, format := range formats {
if t, err := time.Parse(format, timeStr); err == nil {
return t
}
}
return time.Time{}
}
// doRequestWithRetry 带重试机制的HTTP请求
func (p *YuhuagePlugin) doRequestWithRetry(req *http.Request, client *http.Client) (*http.Response, error) {
maxRetries := 3
var lastErr error
for i := 0; i < maxRetries; i++ {
if i > 0 {
// 指数退避重试
backoff := time.Duration(1<<uint(i-1)) * 200 * time.Millisecond
time.Sleep(backoff)
}
// 克隆请求避免并发问题
reqClone := req.Clone(req.Context())
resp, err := client.Do(reqClone)
if err == nil && resp.StatusCode == 200 {
return resp, nil
}
if resp != nil {
resp.Body.Close()
}
lastErr = err
}
return nil, fmt.Errorf("重试 %d 次后仍然失败: %w", maxRetries, lastErr)
}

View File

@@ -268,9 +268,9 @@ func injectMainCacheToAsyncPlugins(pluginManager *plugin.PluginManager, mainCach
if config.AppConfig != nil && config.AppConfig.AsyncLogEnabled {
displayKey := key[:8] + "..."
if keyword != "" {
fmt.Printf("📝 [异步插件 %s] 初始缓存创建: %s(关键词:%s) | 结果数: %d\n", pluginName, displayKey, keyword, len(newResults))
fmt.Printf("[异步插件 %s] 初始缓存创建: %s(关键词:%s) | 结果数: %d\n", pluginName, displayKey, keyword, len(newResults))
} else {
fmt.Printf("📝 [异步插件 %s] 初始缓存创建: %s | 结果数: %d\n", pluginName, key, len(newResults))
fmt.Printf("[异步插件 %s] 初始缓存创建: %s | 结果数: %d\n", pluginName, key, len(newResults))
}
}
}
@@ -1293,7 +1293,7 @@ func (s *SearchService) searchPlugins(keyword string, plugins []string, forceRef
// 🔥 修复:使用同步方式确保数据写入磁盘
enhancedTwoLevelCache.SetBothLevels(key, data, ttl)
if config.AppConfig != nil && config.AppConfig.AsyncLogEnabled {
fmt.Printf("📝 [主程序] 缓存更新完成: %s | 结果数: %d",
fmt.Printf("[主程序] 缓存更新完成: %s | 结果数: %d",
key, len(res))
}
}

View File

@@ -248,7 +248,7 @@ func NewBufferStatusMonitor() *BufferStatusMonitor {
alertThresholds: &AlertThresholds{
MemoryUsageWarning: 50 * 1024 * 1024, // 50MB
MemoryUsageCritical: 100 * 1024 * 1024, // 100MB
BufferCountWarning: 30,
BufferCountWarning: 40,
BufferCountCritical: 50,
OperationQueueWarning: 500,
OperationQueueCritical: 1000,
@@ -559,7 +559,7 @@ func (b *BufferStatusMonitor) triggerAlert(component, level, message string) {
b.alertManager.mutex.Unlock()
// 输出报警日志
fmt.Printf("🚨 [报警] %s - %s: %s\n", level, component, message)
// fmt.Printf("🚨 [报警] %s - %s: %s\n", level, component, message)
}
// updatePredictions 更新预测