mirror of
https://github.com/fish2018/pansou.git
synced 2025-11-25 03:14:59 +08:00
新增插件hdmoli,ddys
This commit is contained in:
2
main.go
2
main.go
@@ -57,6 +57,8 @@ import (
|
||||
_ "pansou/plugin/leijing"
|
||||
_ "pansou/plugin/xb6v"
|
||||
_ "pansou/plugin/xys"
|
||||
_ "pansou/plugin/ddys"
|
||||
_ "pansou/plugin/hdmoli"
|
||||
)
|
||||
|
||||
// 全局缓存写入管理器
|
||||
|
||||
600
plugin/ddys/ddys.go
Normal file
600
plugin/ddys/ddys.go
Normal file
@@ -0,0 +1,600 @@
|
||||
package ddys
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"pansou/model"
|
||||
"pansou/plugin"
|
||||
)
|
||||
|
||||
const (
|
||||
PluginName = "ddys"
|
||||
DisplayName = "低端影视"
|
||||
Description = "低端影视 - 影视资源网盘链接搜索"
|
||||
BaseURL = "https://ddys.pro"
|
||||
SearchPath = "/?s=%s&post_type=post"
|
||||
UserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
|
||||
MaxResults = 50
|
||||
MaxConcurrency = 20
|
||||
)
|
||||
|
||||
// DdysPlugin 低端影视插件
|
||||
type DdysPlugin struct {
|
||||
*plugin.BaseAsyncPlugin
|
||||
debugMode bool
|
||||
detailCache sync.Map // 缓存详情页结果
|
||||
cacheTTL time.Duration
|
||||
}
|
||||
|
||||
// init 注册插件
|
||||
func init() {
|
||||
plugin.RegisterGlobalPlugin(NewDdysPlugin())
|
||||
}
|
||||
|
||||
// NewDdysPlugin 创建新的低端影视插件实例
|
||||
func NewDdysPlugin() *DdysPlugin {
|
||||
debugMode := false // 生产环境关闭调试
|
||||
|
||||
p := &DdysPlugin{
|
||||
BaseAsyncPlugin: plugin.NewBaseAsyncPlugin(PluginName, 1), // 标准网盘插件,启用Service层过滤
|
||||
debugMode: debugMode,
|
||||
cacheTTL: 30 * time.Minute, // 详情页缓存30分钟
|
||||
}
|
||||
|
||||
return p
|
||||
}
|
||||
|
||||
// Name 插件名称
|
||||
func (p *DdysPlugin) Name() string {
|
||||
return PluginName
|
||||
}
|
||||
|
||||
// DisplayName 插件显示名称
|
||||
func (p *DdysPlugin) DisplayName() string {
|
||||
return DisplayName
|
||||
}
|
||||
|
||||
// Description 插件描述
|
||||
func (p *DdysPlugin) Description() string {
|
||||
return Description
|
||||
}
|
||||
|
||||
// Search 搜索接口
|
||||
func (p *DdysPlugin) Search(keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
|
||||
return p.searchImpl(&http.Client{Timeout: 30 * time.Second}, keyword, ext)
|
||||
}
|
||||
|
||||
// searchImpl 搜索实现
|
||||
func (p *DdysPlugin) searchImpl(client *http.Client, keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
|
||||
if p.debugMode {
|
||||
log.Printf("[DDYS] 开始搜索: %s", keyword)
|
||||
}
|
||||
|
||||
// 第一步:执行搜索获取结果列表
|
||||
searchResults, err := p.executeSearch(client, keyword)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("[%s] 执行搜索失败: %w", p.Name(), err)
|
||||
}
|
||||
|
||||
if p.debugMode {
|
||||
log.Printf("[DDYS] 搜索获取到 %d 个结果", len(searchResults))
|
||||
}
|
||||
|
||||
// 第二步:并发获取详情页链接
|
||||
finalResults := p.fetchDetailLinks(client, searchResults, keyword)
|
||||
|
||||
if p.debugMode {
|
||||
log.Printf("[DDYS] 最终获取到 %d 个有效结果", len(finalResults))
|
||||
}
|
||||
|
||||
// 第三步:关键词过滤(标准网盘插件需要过滤)
|
||||
filteredResults := plugin.FilterResultsByKeyword(finalResults, keyword)
|
||||
|
||||
if p.debugMode {
|
||||
log.Printf("[DDYS] 关键词过滤后剩余 %d 个结果", len(filteredResults))
|
||||
}
|
||||
|
||||
return filteredResults, nil
|
||||
}
|
||||
|
||||
// executeSearch 执行搜索请求
|
||||
func (p *DdysPlugin) executeSearch(client *http.Client, keyword string) ([]model.SearchResult, error) {
|
||||
// 构建搜索URL
|
||||
searchURL := fmt.Sprintf("%s%s", BaseURL, fmt.Sprintf(SearchPath, url.QueryEscape(keyword)))
|
||||
|
||||
// 创建带超时的上下文
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("[%s] 创建搜索请求失败: %w", p.Name(), err)
|
||||
}
|
||||
|
||||
// 设置完整的请求头
|
||||
req.Header.Set("User-Agent", UserAgent)
|
||||
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
|
||||
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
|
||||
req.Header.Set("Connection", "keep-alive")
|
||||
req.Header.Set("Upgrade-Insecure-Requests", "1")
|
||||
req.Header.Set("Cache-Control", "max-age=0")
|
||||
req.Header.Set("Referer", BaseURL+"/")
|
||||
|
||||
resp, err := p.doRequestWithRetry(req, client)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("[%s] 搜索请求失败: %w", p.Name(), err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("[%s] 搜索请求HTTP状态错误: %d", p.Name(), resp.StatusCode)
|
||||
}
|
||||
|
||||
// 解析HTML提取搜索结果
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("[%s] 解析搜索结果HTML失败: %w", p.Name(), err)
|
||||
}
|
||||
|
||||
return p.parseSearchResults(doc)
|
||||
}
|
||||
|
||||
// doRequestWithRetry 带重试机制的HTTP请求
|
||||
func (p *DdysPlugin) doRequestWithRetry(req *http.Request, client *http.Client) (*http.Response, error) {
|
||||
maxRetries := 3
|
||||
var lastErr error
|
||||
|
||||
for i := 0; i < maxRetries; i++ {
|
||||
if i > 0 {
|
||||
// 指数退避重试
|
||||
backoff := time.Duration(1<<uint(i-1)) * 200 * time.Millisecond
|
||||
time.Sleep(backoff)
|
||||
}
|
||||
|
||||
// 克隆请求避免并发问题
|
||||
reqClone := req.Clone(req.Context())
|
||||
|
||||
resp, err := client.Do(reqClone)
|
||||
if err == nil && resp.StatusCode == 200 {
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
if resp != nil {
|
||||
resp.Body.Close()
|
||||
}
|
||||
lastErr = err
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("[%s] 重试 %d 次后仍然失败: %w", p.Name(), maxRetries, lastErr)
|
||||
}
|
||||
|
||||
// parseSearchResults 解析搜索结果HTML
|
||||
func (p *DdysPlugin) parseSearchResults(doc *goquery.Document) ([]model.SearchResult, error) {
|
||||
var results []model.SearchResult
|
||||
|
||||
// 查找搜索结果项: article[class^="post-"]
|
||||
doc.Find("article[class*='post-']").Each(func(i int, s *goquery.Selection) {
|
||||
if len(results) >= MaxResults {
|
||||
return
|
||||
}
|
||||
|
||||
result := p.parseResultItem(s, i+1)
|
||||
if result != nil {
|
||||
results = append(results, *result)
|
||||
}
|
||||
})
|
||||
|
||||
if p.debugMode {
|
||||
log.Printf("[DDYS] 解析到 %d 个原始结果", len(results))
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// parseResultItem 解析单个搜索结果项
|
||||
func (p *DdysPlugin) parseResultItem(s *goquery.Selection, index int) *model.SearchResult {
|
||||
// 提取文章ID
|
||||
articleClass, _ := s.Attr("class")
|
||||
postID := p.extractPostID(articleClass)
|
||||
if postID == "" {
|
||||
postID = fmt.Sprintf("unknown-%d", index)
|
||||
}
|
||||
|
||||
// 提取标题和链接
|
||||
linkEl := s.Find(".post-title a")
|
||||
if linkEl.Length() == 0 {
|
||||
if p.debugMode {
|
||||
log.Printf("[DDYS] 跳过无标题链接的结果")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// 提取标题
|
||||
title := strings.TrimSpace(linkEl.Text())
|
||||
if title == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// 提取详情页链接
|
||||
detailURL, _ := linkEl.Attr("href")
|
||||
if detailURL == "" {
|
||||
if p.debugMode {
|
||||
log.Printf("[DDYS] 跳过无链接的结果: %s", title)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// 提取发布时间
|
||||
publishTime := p.extractPublishTime(s)
|
||||
|
||||
// 提取分类
|
||||
category := p.extractCategory(s)
|
||||
|
||||
// 提取简介
|
||||
content := p.extractContent(s)
|
||||
|
||||
// 构建初始结果对象(详情页链接稍后获取)
|
||||
result := model.SearchResult{
|
||||
Title: title,
|
||||
Content: fmt.Sprintf("分类:%s\n%s", category, content),
|
||||
Channel: "", // 插件搜索结果必须为空字符串(按开发指南要求)
|
||||
MessageID: fmt.Sprintf("%s-%s-%d", p.Name(), postID, index),
|
||||
UniqueID: fmt.Sprintf("%s-%s-%d", p.Name(), postID, index),
|
||||
Datetime: publishTime,
|
||||
Links: []model.Link{}, // 先为空,详情页处理后添加
|
||||
Tags: []string{category},
|
||||
}
|
||||
|
||||
// 添加详情页URL到临时字段(用于后续处理)
|
||||
result.Content += fmt.Sprintf("\n详情页: %s", detailURL)
|
||||
|
||||
if p.debugMode {
|
||||
log.Printf("[DDYS] 解析结果: %s (%s)", title, category)
|
||||
}
|
||||
|
||||
return &result
|
||||
}
|
||||
|
||||
// extractPostID 从文章class中提取文章ID
|
||||
func (p *DdysPlugin) extractPostID(articleClass string) string {
|
||||
// 匹配 post-{数字} 格式
|
||||
re := regexp.MustCompile(`post-(\d+)`)
|
||||
matches := re.FindStringSubmatch(articleClass)
|
||||
if len(matches) > 1 {
|
||||
return matches[1]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// extractPublishTime 提取发布时间
|
||||
func (p *DdysPlugin) extractPublishTime(s *goquery.Selection) time.Time {
|
||||
timeEl := s.Find(".meta_date time.entry-date")
|
||||
if timeEl.Length() == 0 {
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
datetime, exists := timeEl.Attr("datetime")
|
||||
if !exists {
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
// 解析ISO 8601格式时间
|
||||
if t, err := time.Parse(time.RFC3339, datetime); err == nil {
|
||||
return t
|
||||
}
|
||||
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
// extractCategory 提取分类
|
||||
func (p *DdysPlugin) extractCategory(s *goquery.Selection) string {
|
||||
categoryEl := s.Find(".meta_categories .cat-links a")
|
||||
if categoryEl.Length() > 0 {
|
||||
return strings.TrimSpace(categoryEl.Text())
|
||||
}
|
||||
return "未分类"
|
||||
}
|
||||
|
||||
// extractContent 提取内容简介
|
||||
func (p *DdysPlugin) extractContent(s *goquery.Selection) string {
|
||||
contentEl := s.Find(".entry-content")
|
||||
if contentEl.Length() > 0 {
|
||||
content := strings.TrimSpace(contentEl.Text())
|
||||
// 限制长度
|
||||
if len(content) > 200 {
|
||||
content = content[:200] + "..."
|
||||
}
|
||||
return content
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// fetchDetailLinks 并发获取详情页链接
|
||||
func (p *DdysPlugin) fetchDetailLinks(client *http.Client, searchResults []model.SearchResult, keyword string) []model.SearchResult {
|
||||
if len(searchResults) == 0 {
|
||||
return []model.SearchResult{}
|
||||
}
|
||||
|
||||
// 使用通道控制并发数
|
||||
semaphore := make(chan struct{}, MaxConcurrency)
|
||||
var wg sync.WaitGroup
|
||||
resultsChan := make(chan model.SearchResult, len(searchResults))
|
||||
|
||||
for _, result := range searchResults {
|
||||
wg.Add(1)
|
||||
go func(r model.SearchResult) {
|
||||
defer wg.Done()
|
||||
semaphore <- struct{}{} // 获取信号量
|
||||
defer func() { <-semaphore }() // 释放信号量
|
||||
|
||||
// 从Content中提取详情页URL
|
||||
detailURL := p.extractDetailURLFromContent(r.Content)
|
||||
if detailURL == "" {
|
||||
if p.debugMode {
|
||||
log.Printf("[DDYS] 跳过无详情页URL的结果: %s", r.Title)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// 获取详情页链接
|
||||
links := p.fetchDetailPageLinks(client, detailURL)
|
||||
if len(links) > 0 {
|
||||
r.Links = links
|
||||
// 清理Content中的详情页URL
|
||||
r.Content = p.cleanContent(r.Content)
|
||||
resultsChan <- r
|
||||
} else if p.debugMode {
|
||||
log.Printf("[DDYS] 详情页无有效链接: %s", r.Title)
|
||||
}
|
||||
}(result)
|
||||
}
|
||||
|
||||
// 等待所有goroutine完成
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(resultsChan)
|
||||
}()
|
||||
|
||||
// 收集结果
|
||||
var finalResults []model.SearchResult
|
||||
for result := range resultsChan {
|
||||
finalResults = append(finalResults, result)
|
||||
}
|
||||
|
||||
return finalResults
|
||||
}
|
||||
|
||||
// extractDetailURLFromContent 从Content中提取详情页URL
|
||||
func (p *DdysPlugin) extractDetailURLFromContent(content string) string {
|
||||
lines := strings.Split(content, "\n")
|
||||
for _, line := range lines {
|
||||
if strings.HasPrefix(line, "详情页: ") {
|
||||
return strings.TrimPrefix(line, "详情页: ")
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// cleanContent 清理Content,移除详情页URL行
|
||||
func (p *DdysPlugin) cleanContent(content string) string {
|
||||
lines := strings.Split(content, "\n")
|
||||
var cleanedLines []string
|
||||
for _, line := range lines {
|
||||
if !strings.HasPrefix(line, "详情页: ") {
|
||||
cleanedLines = append(cleanedLines, line)
|
||||
}
|
||||
}
|
||||
return strings.Join(cleanedLines, "\n")
|
||||
}
|
||||
|
||||
// fetchDetailPageLinks 获取详情页的网盘链接
|
||||
func (p *DdysPlugin) fetchDetailPageLinks(client *http.Client, detailURL string) []model.Link {
|
||||
// 检查缓存
|
||||
if cached, found := p.detailCache.Load(detailURL); found {
|
||||
if links, ok := cached.([]model.Link); ok {
|
||||
if p.debugMode {
|
||||
log.Printf("[DDYS] 使用缓存的详情页链接: %s", detailURL)
|
||||
}
|
||||
return links
|
||||
}
|
||||
}
|
||||
|
||||
// 创建带超时的上下文
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", detailURL, nil)
|
||||
if err != nil {
|
||||
if p.debugMode {
|
||||
log.Printf("[DDYS] 创建详情页请求失败: %v", err)
|
||||
}
|
||||
return []model.Link{}
|
||||
}
|
||||
|
||||
// 设置请求头
|
||||
req.Header.Set("User-Agent", UserAgent)
|
||||
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
|
||||
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
|
||||
req.Header.Set("Referer", BaseURL+"/")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
if p.debugMode {
|
||||
log.Printf("[DDYS] 详情页请求失败: %v", err)
|
||||
}
|
||||
return []model.Link{}
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
if p.debugMode {
|
||||
log.Printf("[DDYS] 详情页HTTP状态错误: %d", resp.StatusCode)
|
||||
}
|
||||
return []model.Link{}
|
||||
}
|
||||
|
||||
// 读取响应体
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
if p.debugMode {
|
||||
log.Printf("[DDYS] 读取详情页响应失败: %v", err)
|
||||
}
|
||||
return []model.Link{}
|
||||
}
|
||||
|
||||
// 解析网盘链接
|
||||
links := p.parseNetworkDiskLinks(string(body))
|
||||
|
||||
// 缓存结果
|
||||
if len(links) > 0 {
|
||||
p.detailCache.Store(detailURL, links)
|
||||
}
|
||||
|
||||
if p.debugMode {
|
||||
log.Printf("[DDYS] 从详情页提取到 %d 个链接: %s", len(links), detailURL)
|
||||
}
|
||||
|
||||
return links
|
||||
}
|
||||
|
||||
// parseNetworkDiskLinks 解析网盘链接
|
||||
func (p *DdysPlugin) parseNetworkDiskLinks(htmlContent string) []model.Link {
|
||||
var links []model.Link
|
||||
|
||||
// 定义网盘链接匹配模式
|
||||
patterns := []struct {
|
||||
name string
|
||||
pattern string
|
||||
urlType string
|
||||
}{
|
||||
{"夸克网盘", `\(夸克[^)]*\)[::]\s*<a[^>]*href\s*=\s*["']([^"']+)["'][^>]*>([^<]+)</a>`, "quark"},
|
||||
{"百度网盘", `\(百度[^)]*\)[::]\s*<a[^>]*href\s*=\s*["']([^"']+)["'][^>]*>([^<]+)</a>`, "baidu"},
|
||||
{"阿里云盘", `\(阿里[^)]*\)[::]\s*<a[^>]*href\s*=\s*["']([^"']+)["'][^>]*>([^<]+)</a>`, "aliyun"},
|
||||
{"天翼云盘", `\(天翼[^)]*\)[::]\s*<a[^>]*href\s*=\s*["']([^"']+)["'][^>]*>([^<]+)</a>`, "tianyi"},
|
||||
{"迅雷网盘", `\(迅雷[^)]*\)[::]\s*<a[^>]*href\s*=\s*["']([^"']+)["'][^>]*>([^<]+)</a>`, "xunlei"},
|
||||
// 通用模式
|
||||
{"通用网盘", `<a[^>]*href\s*=\s*["'](https?://[^"']*(?:pan|drive|cloud)[^"']*)["'][^>]*>([^<]+)</a>`, "others"},
|
||||
}
|
||||
|
||||
// 去重用的map
|
||||
seen := make(map[string]bool)
|
||||
|
||||
for _, pattern := range patterns {
|
||||
re := regexp.MustCompile(pattern.pattern)
|
||||
matches := re.FindAllStringSubmatch(htmlContent, -1)
|
||||
|
||||
for _, match := range matches {
|
||||
if len(match) >= 3 {
|
||||
url := match[1]
|
||||
|
||||
// 去重
|
||||
if seen[url] {
|
||||
continue
|
||||
}
|
||||
seen[url] = true
|
||||
|
||||
// 确定网盘类型
|
||||
urlType := p.determineCloudType(url)
|
||||
if urlType == "others" {
|
||||
urlType = pattern.urlType
|
||||
}
|
||||
|
||||
// 提取可能的提取码
|
||||
password := p.extractPassword(htmlContent, url)
|
||||
|
||||
link := model.Link{
|
||||
Type: urlType,
|
||||
URL: url,
|
||||
Password: password,
|
||||
}
|
||||
|
||||
links = append(links, link)
|
||||
|
||||
if p.debugMode {
|
||||
log.Printf("[DDYS] 找到链接: %s (%s)", url, urlType)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return links
|
||||
}
|
||||
|
||||
// extractPassword 提取网盘提取码
|
||||
func (p *DdysPlugin) extractPassword(content string, panURL string) string {
|
||||
// 常见提取码模式
|
||||
patterns := []string{
|
||||
`提取[码密][::]?\s*([A-Za-z0-9]{4,8})`,
|
||||
`密码[::]?\s*([A-Za-z0-9]{4,8})`,
|
||||
`[码密][::]?\s*([A-Za-z0-9]{4,8})`,
|
||||
`([A-Za-z0-9]{4,8})\s*[是为]?提取[码密]`,
|
||||
}
|
||||
|
||||
// 在网盘链接附近搜索提取码
|
||||
urlIndex := strings.Index(content, panURL)
|
||||
if urlIndex == -1 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// 搜索范围:链接前后200个字符
|
||||
start := urlIndex - 200
|
||||
if start < 0 {
|
||||
start = 0
|
||||
}
|
||||
end := urlIndex + len(panURL) + 200
|
||||
if end > len(content) {
|
||||
end = len(content)
|
||||
}
|
||||
|
||||
searchArea := content[start:end]
|
||||
|
||||
for _, pattern := range patterns {
|
||||
re := regexp.MustCompile(pattern)
|
||||
matches := re.FindStringSubmatch(searchArea)
|
||||
if len(matches) > 1 {
|
||||
return matches[1]
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// determineCloudType 根据URL自动识别网盘类型(按开发指南完整列表)
|
||||
func (p *DdysPlugin) determineCloudType(url string) string {
|
||||
switch {
|
||||
case strings.Contains(url, "pan.quark.cn"):
|
||||
return "quark"
|
||||
case strings.Contains(url, "drive.uc.cn"):
|
||||
return "uc"
|
||||
case strings.Contains(url, "pan.baidu.com"):
|
||||
return "baidu"
|
||||
case strings.Contains(url, "aliyundrive.com") || strings.Contains(url, "alipan.com"):
|
||||
return "aliyun"
|
||||
case strings.Contains(url, "pan.xunlei.com"):
|
||||
return "xunlei"
|
||||
case strings.Contains(url, "cloud.189.cn"):
|
||||
return "tianyi"
|
||||
case strings.Contains(url, "caiyun.139.com"):
|
||||
return "mobile"
|
||||
case strings.Contains(url, "115.com"):
|
||||
return "115"
|
||||
case strings.Contains(url, "123pan.com"):
|
||||
return "123"
|
||||
case strings.Contains(url, "mypikpak.com"):
|
||||
return "pikpak"
|
||||
case strings.Contains(url, "lanzou"):
|
||||
return "lanzou"
|
||||
default:
|
||||
return "others"
|
||||
}
|
||||
}
|
||||
182
plugin/ddys/html结构分析.md
Normal file
182
plugin/ddys/html结构分析.md
Normal file
@@ -0,0 +1,182 @@
|
||||
# DDYS(低端影视)插件HTML结构分析
|
||||
|
||||
## 网站概述
|
||||
- **网站名称**: 低端影视
|
||||
- **域名**: https://ddys.pro/
|
||||
- **类型**: 影视资源网站,提供在线播放和网盘下载链接
|
||||
|
||||
## API流程概述
|
||||
|
||||
### 搜索页面
|
||||
- **请求URL**: `https://ddys.pro/?s={keyword}&post_type=post`
|
||||
- **方法**: GET
|
||||
- **Headers**: 标准浏览器请求头
|
||||
- **特点**: WordPress网站,使用标准搜索功能
|
||||
|
||||
## 搜索结果结构
|
||||
|
||||
### 搜索结果页面HTML结构
|
||||
```html
|
||||
<main id="main" class="site-main col-md-8" role="main">
|
||||
<article id="post-1404" class="post-1404 post type-post status-publish ...">
|
||||
<div class="row">
|
||||
<div class="post-content col-md-12">
|
||||
<header class="entry-header">
|
||||
<h2 class="post-title">
|
||||
<a href="https://ddys.pro/deadpool/" rel="bookmark">死侍 1-3</a>
|
||||
</h2>
|
||||
</header>
|
||||
|
||||
<div class="entry-content">
|
||||
<p>注:本片不适合公共场合观看</p>
|
||||
</div>
|
||||
|
||||
<footer class="entry-footer">
|
||||
<div class="metadata">
|
||||
<ul>
|
||||
<li class="meta_date">
|
||||
<time class="entry-date published" datetime="2018-08-08T01:41:40+08:00">
|
||||
2018年8月8日
|
||||
</time>
|
||||
</li>
|
||||
<li class="meta_categories">
|
||||
<span class="cat-links">
|
||||
<a href="..." rel="category tag">欧美电影</a>
|
||||
</span>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</article>
|
||||
</main>
|
||||
```
|
||||
|
||||
### 详情页面HTML结构
|
||||
```html
|
||||
<main id="main" class="site-main" role="main">
|
||||
<article id="post-19840" class="...">
|
||||
<div class="post-content">
|
||||
<h1 class="post-title">变形金刚 超能勇士崛起</h1>
|
||||
|
||||
<div class="metadata">
|
||||
<ul>
|
||||
<li class="meta_date">
|
||||
<time class="entry-date published updated"
|
||||
datetime="2023-07-13T14:27:08+08:00">
|
||||
2023年7月13日
|
||||
</time>
|
||||
</li>
|
||||
<li class="meta_categories">
|
||||
<span class="cat-links">
|
||||
<a href="..." rel="category tag">欧美电影</a>
|
||||
</span>
|
||||
</li>
|
||||
<li class="meta_tags">
|
||||
<span class="tags-links">
|
||||
标签:<a href="..." rel="tag">动作</a>
|
||||
</span>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="entry">
|
||||
<!-- 播放器相关内容 -->
|
||||
|
||||
<!-- 网盘下载链接 -->
|
||||
<p>视频下载 (夸克网盘):
|
||||
<a href="https://pan.quark.cn/s/a372a91a0296"
|
||||
rel="noopener nofollow" target="_blank">
|
||||
https://pan.quark.cn/s/a372a91a0296
|
||||
</a>
|
||||
</p>
|
||||
|
||||
<!-- 豆瓣信息区块 -->
|
||||
<div class="doulist-item">
|
||||
<div class="mod">
|
||||
<div class="v-overflowHidden doulist-subject">
|
||||
<div class="post">
|
||||
<img src="douban_cache/xxx.jpg">
|
||||
</div>
|
||||
<div class="title">
|
||||
<a href="https://movie.douban.com/subject/..."
|
||||
class="cute" target="_blank">
|
||||
影片名称 英文名
|
||||
</a>
|
||||
</div>
|
||||
<div class="rating">
|
||||
<span class="rating_nums">5.8</span>
|
||||
</div>
|
||||
<div class="abstract">
|
||||
<!-- 详细信息:又名、导演、演员、类型等 -->
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</article>
|
||||
</main>
|
||||
```
|
||||
|
||||
## 数据提取要点
|
||||
|
||||
### 搜索结果页面
|
||||
1. **结果列表**: `article[class^="post-"]` - 每个搜索结果
|
||||
2. **文章ID**: 从article的class或id属性提取,如 `post-1404`
|
||||
3. **标题**: `.post-title > a` - 获取文本和href属性
|
||||
4. **链接**: `.post-title > a[href]` - 详情页链接
|
||||
5. **发布时间**: `.meta_date > time.entry-date[datetime]` - ISO格式时间
|
||||
6. **分类**: `.meta_categories > .cat-links > a` - 分类信息
|
||||
7. **简介**: `.entry-content` - 内容简介(可能为空)
|
||||
|
||||
### 详情页面
|
||||
1. **标题**: `h1.post-title` - 影片标题
|
||||
2. **发布时间**: `.meta_date > time.entry-date[datetime]` - 发布时间
|
||||
3. **分类标签**: `.meta_categories`和`.meta_tags`中的链接
|
||||
4. **网盘链接提取**:
|
||||
- 模式1: `(网盘名):<a href="链接">链接文本</a>`
|
||||
- 模式2: `(网盘名) <a href="链接">链接文本</a>`
|
||||
- 常见网盘: 夸克网盘、百度网盘、阿里云盘、天翼云盘等
|
||||
5. **豆瓣信息**: `.doulist-item`区块(可选)
|
||||
|
||||
## 网盘链接识别规则
|
||||
|
||||
### 支持的网盘类型
|
||||
- **夸克网盘**: `pan.quark.cn`
|
||||
- **百度网盘**: `pan.baidu.com`
|
||||
- **阿里云盘**: `aliyundrive.com` / `alipan.com`
|
||||
- **天翼云盘**: `cloud.189.cn`
|
||||
- **迅雷网盘**: `pan.xunlei.com`
|
||||
- **115网盘**: `115.com`
|
||||
- **蓝奏云**: `lanzou`相关域名
|
||||
|
||||
### 链接提取策略
|
||||
1. 在详情页的`.entry`内容区域搜索
|
||||
2. 使用正则表达式匹配网盘链接模式
|
||||
3. 提取网盘类型、链接和可能的提取码
|
||||
4. 链接去重和验证
|
||||
|
||||
## 特殊处理
|
||||
|
||||
### 时间解析
|
||||
- 格式: ISO 8601格式 `2023-07-13T14:27:08+08:00`
|
||||
- 显示: `2023年7月13日`
|
||||
|
||||
### 内容清理
|
||||
- 移除HTML标签
|
||||
- 处理特殊字符和编码
|
||||
- 清理多余空格和换行
|
||||
|
||||
### 错误处理
|
||||
- 网络超时重试
|
||||
- 解析失败的降级处理
|
||||
- 空结果的处理
|
||||
|
||||
## 注意事项
|
||||
|
||||
1. **反爬虫**: 网站可能有基础的反爬虫措施,需要设置合理的请求头
|
||||
2. **限频**: 避免请求过于频繁
|
||||
3. **编码**: 处理中文关键词的URL编码
|
||||
4. **更新**: 网站结构可能会变化,需要定期维护选择器
|
||||
712
plugin/hdmoli/hdmoli.go
Normal file
712
plugin/hdmoli/hdmoli.go
Normal file
@@ -0,0 +1,712 @@
|
||||
package hdmoli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"pansou/model"
|
||||
"pansou/plugin"
|
||||
)
|
||||
|
||||
const (
|
||||
PluginName = "hdmoli"
|
||||
DisplayName = "HDmoli"
|
||||
Description = "HDmoli - 影视资源网盘下载链接搜索"
|
||||
BaseURL = "https://www.hdmoli.pro"
|
||||
SearchPath = "/search.php?searchkey=%s&submit="
|
||||
UserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
|
||||
MaxResults = 50
|
||||
MaxConcurrency = 20
|
||||
)
|
||||
|
||||
// HdmoliPlugin HDmoli插件
|
||||
type HdmoliPlugin struct {
|
||||
*plugin.BaseAsyncPlugin
|
||||
debugMode bool
|
||||
detailCache sync.Map // 缓存详情页结果
|
||||
cacheTTL time.Duration
|
||||
}
|
||||
|
||||
// init 注册插件
|
||||
func init() {
|
||||
plugin.RegisterGlobalPlugin(NewHdmoliPlugin())
|
||||
}
|
||||
|
||||
// NewHdmoliPlugin 创建新的HDmoli插件实例
|
||||
func NewHdmoliPlugin() *HdmoliPlugin {
|
||||
debugMode := false // 生产环境关闭调试
|
||||
|
||||
p := &HdmoliPlugin{
|
||||
BaseAsyncPlugin: plugin.NewBaseAsyncPlugin(PluginName, 2), // 标准网盘插件,启用Service层过滤
|
||||
debugMode: debugMode,
|
||||
cacheTTL: 30 * time.Minute, // 详情页缓存30分钟
|
||||
}
|
||||
|
||||
return p
|
||||
}
|
||||
|
||||
// Name 插件名称
|
||||
func (p *HdmoliPlugin) Name() string {
|
||||
return PluginName
|
||||
}
|
||||
|
||||
// DisplayName 插件显示名称
|
||||
func (p *HdmoliPlugin) DisplayName() string {
|
||||
return DisplayName
|
||||
}
|
||||
|
||||
// Description 插件描述
|
||||
func (p *HdmoliPlugin) Description() string {
|
||||
return Description
|
||||
}
|
||||
|
||||
// Search 搜索接口
|
||||
func (p *HdmoliPlugin) Search(keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
|
||||
return p.searchImpl(&http.Client{Timeout: 30 * time.Second}, keyword, ext)
|
||||
}
|
||||
|
||||
// searchImpl 搜索实现
|
||||
func (p *HdmoliPlugin) searchImpl(client *http.Client, keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
|
||||
if p.debugMode {
|
||||
log.Printf("[HDMOLI] 开始搜索: %s", keyword)
|
||||
}
|
||||
|
||||
// 第一步:执行搜索获取结果列表
|
||||
searchResults, err := p.executeSearch(client, keyword)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("[%s] 执行搜索失败: %w", p.Name(), err)
|
||||
}
|
||||
|
||||
if p.debugMode {
|
||||
log.Printf("[HDMOLI] 搜索获取到 %d 个结果", len(searchResults))
|
||||
}
|
||||
|
||||
// 第二步:并发获取详情页链接
|
||||
finalResults := p.fetchDetailLinks(client, searchResults, keyword)
|
||||
|
||||
if p.debugMode {
|
||||
log.Printf("[HDMOLI] 最终获取到 %d 个有效结果", len(finalResults))
|
||||
}
|
||||
|
||||
// 第三步:关键词过滤(标准网盘插件需要过滤)
|
||||
filteredResults := plugin.FilterResultsByKeyword(finalResults, keyword)
|
||||
|
||||
if p.debugMode {
|
||||
log.Printf("[HDMOLI] 关键词过滤后剩余 %d 个结果", len(filteredResults))
|
||||
}
|
||||
|
||||
return filteredResults, nil
|
||||
}
|
||||
|
||||
// executeSearch 执行搜索请求
|
||||
func (p *HdmoliPlugin) executeSearch(client *http.Client, keyword string) ([]model.SearchResult, error) {
|
||||
// 构建搜索URL
|
||||
searchURL := fmt.Sprintf("%s%s", BaseURL, fmt.Sprintf(SearchPath, url.QueryEscape(keyword)))
|
||||
|
||||
// 创建带超时的上下文
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("[%s] 创建搜索请求失败: %w", p.Name(), err)
|
||||
}
|
||||
|
||||
// 设置完整的请求头
|
||||
req.Header.Set("User-Agent", UserAgent)
|
||||
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
|
||||
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
|
||||
req.Header.Set("Connection", "keep-alive")
|
||||
req.Header.Set("Upgrade-Insecure-Requests", "1")
|
||||
req.Header.Set("Cache-Control", "max-age=0")
|
||||
req.Header.Set("Referer", BaseURL+"/") // HDmoli需要设置referer
|
||||
|
||||
resp, err := p.doRequestWithRetry(req, client)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("[%s] 搜索请求失败: %w", p.Name(), err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("[%s] 搜索请求HTTP状态错误: %d", p.Name(), resp.StatusCode)
|
||||
}
|
||||
|
||||
// 解析HTML提取搜索结果
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("[%s] 解析搜索结果HTML失败: %w", p.Name(), err)
|
||||
}
|
||||
|
||||
return p.parseSearchResults(doc)
|
||||
}
|
||||
|
||||
// doRequestWithRetry 带重试机制的HTTP请求
|
||||
func (p *HdmoliPlugin) doRequestWithRetry(req *http.Request, client *http.Client) (*http.Response, error) {
|
||||
maxRetries := 3
|
||||
var lastErr error
|
||||
|
||||
for i := 0; i < maxRetries; i++ {
|
||||
if i > 0 {
|
||||
// 指数退避重试
|
||||
backoff := time.Duration(1<<uint(i-1)) * 200 * time.Millisecond
|
||||
time.Sleep(backoff)
|
||||
}
|
||||
|
||||
// 克隆请求避免并发问题
|
||||
reqClone := req.Clone(req.Context())
|
||||
|
||||
resp, err := client.Do(reqClone)
|
||||
if err == nil && resp.StatusCode == 200 {
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
if resp != nil {
|
||||
resp.Body.Close()
|
||||
}
|
||||
lastErr = err
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("[%s] 重试 %d 次后仍然失败: %w", p.Name(), maxRetries, lastErr)
|
||||
}
|
||||
|
||||
// parseSearchResults 解析搜索结果HTML
|
||||
func (p *HdmoliPlugin) parseSearchResults(doc *goquery.Document) ([]model.SearchResult, error) {
|
||||
var results []model.SearchResult
|
||||
|
||||
// 查找搜索结果项: #searchList > li.active.clearfix
|
||||
doc.Find("#searchList > li.active.clearfix").Each(func(i int, s *goquery.Selection) {
|
||||
if len(results) >= MaxResults {
|
||||
return
|
||||
}
|
||||
|
||||
result := p.parseResultItem(s, i+1)
|
||||
if result != nil {
|
||||
results = append(results, *result)
|
||||
}
|
||||
})
|
||||
|
||||
if p.debugMode {
|
||||
log.Printf("[HDMOLI] 解析到 %d 个原始结果", len(results))
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// parseResultItem 解析单个搜索结果项
|
||||
func (p *HdmoliPlugin) parseResultItem(s *goquery.Selection, index int) *model.SearchResult {
|
||||
// 提取标题和链接
|
||||
titleEl := s.Find(".detail h4.title a")
|
||||
if titleEl.Length() == 0 {
|
||||
if p.debugMode {
|
||||
log.Printf("[HDMOLI] 跳过无标题链接的结果")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// 提取标题
|
||||
title := strings.TrimSpace(titleEl.Text())
|
||||
if title == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// 提取详情页链接
|
||||
detailURL, _ := titleEl.Attr("href")
|
||||
if detailURL == "" {
|
||||
// 尝试从缩略图获取链接
|
||||
thumbEl := s.Find(".thumb a")
|
||||
if thumbEl.Length() > 0 {
|
||||
detailURL, _ = thumbEl.Attr("href")
|
||||
}
|
||||
}
|
||||
|
||||
if detailURL == "" {
|
||||
if p.debugMode {
|
||||
log.Printf("[HDMOLI] 跳过无链接的结果: %s", title)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// 处理相对路径
|
||||
if strings.HasPrefix(detailURL, "/") {
|
||||
detailURL = BaseURL + detailURL
|
||||
}
|
||||
|
||||
// 提取评分
|
||||
rating := p.extractRating(s)
|
||||
|
||||
// 提取更新状态
|
||||
updateStatus := p.extractUpdateStatus(s)
|
||||
|
||||
// 提取导演
|
||||
director := p.extractDirector(s)
|
||||
|
||||
// 提取主演
|
||||
actors := p.extractActors(s)
|
||||
|
||||
// 提取分类信息
|
||||
category, region, year := p.extractCategoryInfo(s)
|
||||
|
||||
// 提取简介
|
||||
description := p.extractDescription(s)
|
||||
|
||||
// 构建内容
|
||||
var contentParts []string
|
||||
if rating != "" {
|
||||
contentParts = append(contentParts, fmt.Sprintf("评分:%s", rating))
|
||||
}
|
||||
if updateStatus != "" {
|
||||
contentParts = append(contentParts, fmt.Sprintf("状态:%s", updateStatus))
|
||||
}
|
||||
if director != "" {
|
||||
contentParts = append(contentParts, fmt.Sprintf("导演:%s", director))
|
||||
}
|
||||
if len(actors) > 0 {
|
||||
actorStr := strings.Join(actors, " ")
|
||||
if len(actorStr) > 100 {
|
||||
actorStr = actorStr[:100] + "..."
|
||||
}
|
||||
contentParts = append(contentParts, fmt.Sprintf("主演:%s", actorStr))
|
||||
}
|
||||
if category != "" {
|
||||
contentParts = append(contentParts, fmt.Sprintf("分类:%s", category))
|
||||
}
|
||||
if region != "" {
|
||||
contentParts = append(contentParts, fmt.Sprintf("地区:%s", region))
|
||||
}
|
||||
if year != "" {
|
||||
contentParts = append(contentParts, fmt.Sprintf("年份:%s", year))
|
||||
}
|
||||
if description != "" {
|
||||
contentParts = append(contentParts, fmt.Sprintf("简介:%s", description))
|
||||
}
|
||||
|
||||
content := strings.Join(contentParts, "\n")
|
||||
|
||||
// 构建标签
|
||||
var tags []string
|
||||
if category != "" {
|
||||
tags = append(tags, category)
|
||||
}
|
||||
if region != "" {
|
||||
tags = append(tags, region)
|
||||
}
|
||||
if year != "" {
|
||||
tags = append(tags, year)
|
||||
}
|
||||
|
||||
// 构建初始结果对象(详情页链接稍后获取)
|
||||
result := model.SearchResult{
|
||||
Title: title,
|
||||
Content: content,
|
||||
Channel: "", // 插件搜索结果必须为空字符串(按开发指南要求)
|
||||
MessageID: fmt.Sprintf("%s-%d-%d", p.Name(), index, time.Now().Unix()),
|
||||
UniqueID: fmt.Sprintf("%s-%d-%d", p.Name(), index, time.Now().Unix()),
|
||||
Datetime: time.Now(), // 搜索结果页没有明确时间,使用当前时间
|
||||
Links: []model.Link{}, // 先为空,详情页处理后添加
|
||||
Tags: tags,
|
||||
}
|
||||
|
||||
// 添加详情页URL到临时字段(用于后续处理)
|
||||
result.Content += fmt.Sprintf("\n详情页URL: %s", detailURL)
|
||||
|
||||
if p.debugMode {
|
||||
log.Printf("[HDMOLI] 解析结果: %s (%s)", title, category)
|
||||
}
|
||||
|
||||
return &result
|
||||
}
|
||||
|
||||
// extractRating 提取评分
|
||||
func (p *HdmoliPlugin) extractRating(s *goquery.Selection) string {
|
||||
ratingEl := s.Find(".pic-tag")
|
||||
if ratingEl.Length() > 0 {
|
||||
rating := strings.TrimSpace(ratingEl.Text())
|
||||
return rating
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// extractUpdateStatus 提取更新状态
|
||||
func (p *HdmoliPlugin) extractUpdateStatus(s *goquery.Selection) string {
|
||||
statusEl := s.Find(".pic-text")
|
||||
if statusEl.Length() > 0 {
|
||||
status := strings.TrimSpace(statusEl.Text())
|
||||
return status
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// extractDirector 提取导演
|
||||
func (p *HdmoliPlugin) extractDirector(s *goquery.Selection) string {
|
||||
var director string
|
||||
s.Find("p").Each(func(i int, p *goquery.Selection) {
|
||||
if director != "" {
|
||||
return // 已找到,跳过
|
||||
}
|
||||
text := p.Text()
|
||||
if strings.Contains(text, "导演:") {
|
||||
// 提取导演名称
|
||||
parts := strings.Split(text, "导演:")
|
||||
if len(parts) > 1 {
|
||||
director = strings.TrimSpace(parts[1])
|
||||
}
|
||||
}
|
||||
})
|
||||
return director
|
||||
}
|
||||
|
||||
// extractActors 提取主演
|
||||
func (p *HdmoliPlugin) extractActors(s *goquery.Selection) []string {
|
||||
var actors []string
|
||||
s.Find("p").Each(func(i int, p *goquery.Selection) {
|
||||
text := p.Text()
|
||||
if strings.Contains(text, "主演:") {
|
||||
// 在这个p标签中查找所有链接
|
||||
p.Find("a").Each(func(j int, a *goquery.Selection) {
|
||||
actor := strings.TrimSpace(a.Text())
|
||||
if actor != "" {
|
||||
actors = append(actors, actor)
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
return actors
|
||||
}
|
||||
|
||||
// extractCategoryInfo 提取分类信息(分类、地区、年份)
|
||||
func (p *HdmoliPlugin) extractCategoryInfo(s *goquery.Selection) (category, region, year string) {
|
||||
s.Find("p").Each(func(i int, p *goquery.Selection) {
|
||||
text := p.Text()
|
||||
if strings.Contains(text, "分类:") {
|
||||
// 解析分类信息行
|
||||
parts := strings.Split(text, ":")
|
||||
for i, part := range parts {
|
||||
part = strings.TrimSpace(part)
|
||||
if strings.HasSuffix(parts[i], "分类") && i+1 < len(parts) {
|
||||
// 提取分类,可能包含地区和年份信息
|
||||
info := strings.TrimSpace(parts[i+1])
|
||||
// 按分隔符分割
|
||||
infoParts := regexp.MustCompile(`[,,\s]+`).Split(info, -1)
|
||||
if len(infoParts) > 0 && infoParts[0] != "" {
|
||||
category = infoParts[0]
|
||||
}
|
||||
} else if strings.HasSuffix(parts[i], "地区") && i+1 < len(parts) {
|
||||
regionPart := strings.TrimSpace(parts[i+1])
|
||||
regionParts := regexp.MustCompile(`[,,\s]+`).Split(regionPart, -1)
|
||||
if len(regionParts) > 0 && regionParts[0] != "" {
|
||||
region = regionParts[0]
|
||||
}
|
||||
} else if strings.HasSuffix(parts[i], "年份") && i+1 < len(parts) {
|
||||
yearPart := strings.TrimSpace(parts[i+1])
|
||||
yearParts := regexp.MustCompile(`[,,\s]+`).Split(yearPart, -1)
|
||||
if len(yearParts) > 0 && yearParts[0] != "" {
|
||||
year = yearParts[0]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
return category, region, year
|
||||
}
|
||||
|
||||
// extractDescription 提取简介
|
||||
func (p *HdmoliPlugin) extractDescription(s *goquery.Selection) string {
|
||||
var description string
|
||||
descEl := s.Find("p.hidden-xs")
|
||||
descEl.Each(func(i int, p *goquery.Selection) {
|
||||
if description != "" {
|
||||
return // 已找到,跳过
|
||||
}
|
||||
text := p.Text()
|
||||
if strings.Contains(text, "简介:") {
|
||||
parts := strings.Split(text, "简介:")
|
||||
if len(parts) > 1 {
|
||||
desc := strings.TrimSpace(parts[1])
|
||||
// 限制长度
|
||||
if len(desc) > 200 {
|
||||
desc = desc[:200] + "..."
|
||||
}
|
||||
description = desc
|
||||
}
|
||||
}
|
||||
})
|
||||
return description
|
||||
}
|
||||
|
||||
// fetchDetailLinks 并发获取详情页链接
|
||||
func (p *HdmoliPlugin) fetchDetailLinks(client *http.Client, searchResults []model.SearchResult, keyword string) []model.SearchResult {
|
||||
if len(searchResults) == 0 {
|
||||
return []model.SearchResult{}
|
||||
}
|
||||
|
||||
// 使用通道控制并发数
|
||||
semaphore := make(chan struct{}, MaxConcurrency)
|
||||
var wg sync.WaitGroup
|
||||
resultsChan := make(chan model.SearchResult, len(searchResults))
|
||||
|
||||
for _, result := range searchResults {
|
||||
wg.Add(1)
|
||||
go func(r model.SearchResult) {
|
||||
defer wg.Done()
|
||||
semaphore <- struct{}{} // 获取信号量
|
||||
defer func() { <-semaphore }() // 释放信号量
|
||||
|
||||
// 从Content中提取详情页URL
|
||||
detailURL := p.extractDetailURLFromContent(r.Content)
|
||||
if detailURL == "" {
|
||||
if p.debugMode {
|
||||
log.Printf("[HDMOLI] 跳过无详情页URL的结果: %s", r.Title)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// 获取详情页链接
|
||||
links := p.fetchDetailPageLinks(client, detailURL)
|
||||
if len(links) > 0 {
|
||||
r.Links = links
|
||||
// 清理Content中的详情页URL
|
||||
r.Content = p.cleanContent(r.Content)
|
||||
resultsChan <- r
|
||||
} else if p.debugMode {
|
||||
log.Printf("[HDMOLI] 详情页无有效链接: %s", r.Title)
|
||||
}
|
||||
}(result)
|
||||
}
|
||||
|
||||
// 等待所有goroutine完成
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(resultsChan)
|
||||
}()
|
||||
|
||||
// 收集结果
|
||||
var finalResults []model.SearchResult
|
||||
for result := range resultsChan {
|
||||
finalResults = append(finalResults, result)
|
||||
}
|
||||
|
||||
return finalResults
|
||||
}
|
||||
|
||||
// extractDetailURLFromContent 从Content中提取详情页URL
|
||||
func (p *HdmoliPlugin) extractDetailURLFromContent(content string) string {
|
||||
lines := strings.Split(content, "\n")
|
||||
for _, line := range lines {
|
||||
if strings.HasPrefix(line, "详情页URL: ") {
|
||||
return strings.TrimPrefix(line, "详情页URL: ")
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// cleanContent 清理Content,移除详情页URL行
|
||||
func (p *HdmoliPlugin) cleanContent(content string) string {
|
||||
lines := strings.Split(content, "\n")
|
||||
var cleanedLines []string
|
||||
for _, line := range lines {
|
||||
if !strings.HasPrefix(line, "详情页URL: ") {
|
||||
cleanedLines = append(cleanedLines, line)
|
||||
}
|
||||
}
|
||||
return strings.Join(cleanedLines, "\n")
|
||||
}
|
||||
|
||||
// fetchDetailPageLinks 获取详情页的网盘链接
|
||||
func (p *HdmoliPlugin) fetchDetailPageLinks(client *http.Client, detailURL string) []model.Link {
|
||||
// 检查缓存
|
||||
if cached, found := p.detailCache.Load(detailURL); found {
|
||||
if links, ok := cached.([]model.Link); ok {
|
||||
if p.debugMode {
|
||||
log.Printf("[HDMOLI] 使用缓存的详情页链接: %s", detailURL)
|
||||
}
|
||||
return links
|
||||
}
|
||||
}
|
||||
|
||||
// 创建带超时的上下文
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", detailURL, nil)
|
||||
if err != nil {
|
||||
if p.debugMode {
|
||||
log.Printf("[HDMOLI] 创建详情页请求失败: %v", err)
|
||||
}
|
||||
return []model.Link{}
|
||||
}
|
||||
|
||||
// 设置请求头
|
||||
req.Header.Set("User-Agent", UserAgent)
|
||||
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
|
||||
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
|
||||
req.Header.Set("Referer", BaseURL+"/")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
if p.debugMode {
|
||||
log.Printf("[HDMOLI] 详情页请求失败: %v", err)
|
||||
}
|
||||
return []model.Link{}
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
if p.debugMode {
|
||||
log.Printf("[HDMOLI] 详情页HTTP状态错误: %d", resp.StatusCode)
|
||||
}
|
||||
return []model.Link{}
|
||||
}
|
||||
|
||||
// 读取响应体
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
if p.debugMode {
|
||||
log.Printf("[HDMOLI] 读取详情页响应失败: %v", err)
|
||||
}
|
||||
return []model.Link{}
|
||||
}
|
||||
|
||||
// 解析网盘链接
|
||||
links := p.parseNetworkDiskLinks(string(body))
|
||||
|
||||
// 缓存结果
|
||||
if len(links) > 0 {
|
||||
p.detailCache.Store(detailURL, links)
|
||||
}
|
||||
|
||||
if p.debugMode {
|
||||
log.Printf("[HDMOLI] 从详情页提取到 %d 个链接: %s", len(links), detailURL)
|
||||
}
|
||||
|
||||
return links
|
||||
}
|
||||
|
||||
// parseNetworkDiskLinks 解析网盘链接
|
||||
func (p *HdmoliPlugin) parseNetworkDiskLinks(htmlContent string) []model.Link {
|
||||
var links []model.Link
|
||||
|
||||
// 解析HTML文档以便更精确的提取
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent))
|
||||
if err != nil {
|
||||
if p.debugMode {
|
||||
log.Printf("[HDMOLI] 解析详情页HTML失败: %v", err)
|
||||
}
|
||||
// 如果解析失败,使用正则表达式作为备选
|
||||
return p.parseNetworkDiskLinksWithRegex(htmlContent)
|
||||
}
|
||||
|
||||
// 在"视频下载"区域查找网盘链接
|
||||
doc.Find(".downlist").Each(func(i int, s *goquery.Selection) {
|
||||
s.Find("p").Each(func(j int, pEl *goquery.Selection) {
|
||||
text := pEl.Text()
|
||||
|
||||
// 查找夸克网盘
|
||||
if strings.Contains(text, "夸 克:") || strings.Contains(text, "夸克:") {
|
||||
pEl.Find("a").Each(func(k int, a *goquery.Selection) {
|
||||
href, exists := a.Attr("href")
|
||||
if exists && strings.Contains(href, "pan.quark.cn") {
|
||||
link := model.Link{
|
||||
Type: "quark",
|
||||
URL: href,
|
||||
Password: p.extractPasswordFromQuarkURL(href),
|
||||
}
|
||||
links = append(links, link)
|
||||
if p.debugMode {
|
||||
log.Printf("[HDMOLI] 找到夸克链接: %s", href)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// 查找百度网盘
|
||||
if strings.Contains(text, "百 度:") || strings.Contains(text, "百度:") {
|
||||
pEl.Find("a").Each(func(k int, a *goquery.Selection) {
|
||||
href, exists := a.Attr("href")
|
||||
if exists && strings.Contains(href, "pan.baidu.com") {
|
||||
password := p.extractPasswordFromBaiduURL(href)
|
||||
link := model.Link{
|
||||
Type: "baidu",
|
||||
URL: href,
|
||||
Password: password,
|
||||
}
|
||||
links = append(links, link)
|
||||
if p.debugMode {
|
||||
log.Printf("[HDMOLI] 找到百度链接: %s (密码: %s)", href, password)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
return links
|
||||
}
|
||||
|
||||
// parseNetworkDiskLinksWithRegex 使用正则表达式解析网盘链接(备选方案)
|
||||
func (p *HdmoliPlugin) parseNetworkDiskLinksWithRegex(htmlContent string) []model.Link {
|
||||
var links []model.Link
|
||||
|
||||
// 夸克网盘链接模式
|
||||
quarkPattern := regexp.MustCompile(`<b>夸\s*克:</b><a[^>]*href\s*=\s*["']([^"']*pan\.quark\.cn[^"']*)["'][^>]*>`)
|
||||
quarkMatches := quarkPattern.FindAllStringSubmatch(htmlContent, -1)
|
||||
for _, match := range quarkMatches {
|
||||
if len(match) > 1 {
|
||||
link := model.Link{
|
||||
Type: "quark",
|
||||
URL: match[1],
|
||||
Password: "",
|
||||
}
|
||||
links = append(links, link)
|
||||
}
|
||||
}
|
||||
|
||||
// 百度网盘链接模式
|
||||
baiduPattern := regexp.MustCompile(`<b>百\s*度:</b><a[^>]*href\s*=\s*["']([^"']*pan\.baidu\.com[^"']*)["'][^>]*>`)
|
||||
baiduMatches := baiduPattern.FindAllStringSubmatch(htmlContent, -1)
|
||||
for _, match := range baiduMatches {
|
||||
if len(match) > 1 {
|
||||
password := p.extractPasswordFromBaiduURL(match[1])
|
||||
link := model.Link{
|
||||
Type: "baidu",
|
||||
URL: match[1],
|
||||
Password: password,
|
||||
}
|
||||
links = append(links, link)
|
||||
}
|
||||
}
|
||||
|
||||
return links
|
||||
}
|
||||
|
||||
// extractPasswordFromQuarkURL 从夸克网盘URL提取提取码
|
||||
func (p *HdmoliPlugin) extractPasswordFromQuarkURL(panURL string) string {
|
||||
// 夸克网盘一般不需要提取码,直接返回空
|
||||
return ""
|
||||
}
|
||||
|
||||
// extractPasswordFromBaiduURL 从百度网盘URL提取提取码
|
||||
func (p *HdmoliPlugin) extractPasswordFromBaiduURL(panURL string) string {
|
||||
// 检查URL中是否包含pwd参数
|
||||
if strings.Contains(panURL, "?pwd=") {
|
||||
parts := strings.Split(panURL, "?pwd=")
|
||||
if len(parts) > 1 {
|
||||
return parts[1]
|
||||
}
|
||||
}
|
||||
if strings.Contains(panURL, "&pwd=") {
|
||||
parts := strings.Split(panURL, "&pwd=")
|
||||
if len(parts) > 1 {
|
||||
return parts[1]
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
167
plugin/hdmoli/html结构分析.md
Normal file
167
plugin/hdmoli/html结构分析.md
Normal file
@@ -0,0 +1,167 @@
|
||||
# HDMOLI(HDmoli)插件HTML结构分析
|
||||
|
||||
## 网站概述
|
||||
- **网站名称**: HDmoli
|
||||
- **域名**: https://www.hdmoli.pro/
|
||||
- **类型**: 影视资源网站,主要提供网盘下载链接(夸克网盘、百度网盘)
|
||||
|
||||
## API流程概述
|
||||
|
||||
### 搜索页面
|
||||
- **请求URL**: `https://www.hdmoli.pro/search.php?searchkey={keyword}&submit=`
|
||||
- **方法**: GET
|
||||
- **Headers**: 需要设置 `Referer: https://www.hdmoli.pro/`
|
||||
- **特点**: 简单的GET请求搜索
|
||||
|
||||
## 搜索结果结构
|
||||
|
||||
### 搜索结果页面HTML结构
|
||||
```html
|
||||
<ul class="myui-vodlist__media clearfix" id="searchList">
|
||||
<li class="active clearfix">
|
||||
<div class="thumb">
|
||||
<a class="myui-vodlist__thumb" href="/movie/index2976.html" title="怪兽8号 第二季">
|
||||
<span class="pic-tag pic-tag-top" style="background-color: #5bb7fe;">
|
||||
7.6分
|
||||
</span>
|
||||
<span class="pic-text text-right">
|
||||
更新至06集
|
||||
</span>
|
||||
</a>
|
||||
</div>
|
||||
<div class="detail">
|
||||
<h4 class="title">
|
||||
<a href="/movie/index2976.html">怪兽8号 第二季</a>
|
||||
</h4>
|
||||
<p><span class="text-muted">导演:</span>宫繁之</p>
|
||||
<p><span class="text-muted">主演:</span>
|
||||
<a href="...">福西胜也</a>
|
||||
<a href="...">濑户麻沙美</a>
|
||||
</p>
|
||||
<p><span class="text-muted">分类:</span>日本
|
||||
<span class="split-line"></span>
|
||||
<span class="text-muted hidden-xs">地区:</span>日本
|
||||
<span class="split-line"></span>
|
||||
<span class="text-muted hidden-xs">年份:</span>2025
|
||||
</p>
|
||||
<p class="hidden-xs"><span class="text-muted">简介:</span>...</p>
|
||||
<p class="margin-0">
|
||||
<a class="btn btn-lg btn-warm" href="/movie/index2976.html">立即播放</a>
|
||||
</p>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
```
|
||||
|
||||
### 详情页面HTML结构
|
||||
```html
|
||||
<div class="myui-content__detail">
|
||||
<h1 class="title text-fff">怪兽8号 第二季</h1>
|
||||
|
||||
<!-- 评分 -->
|
||||
<div id="rating" class="score" data-id="2976">
|
||||
<span class="branch">7.6</span>
|
||||
</div>
|
||||
|
||||
<!-- 基本信息 -->
|
||||
<p class="data">
|
||||
<span class="text-muted">分类:</span>动作,科幻
|
||||
<span class="text-muted hidden-xs">地区:</span>日本
|
||||
<span class="text-muted hidden-xs">年份:</span>2025
|
||||
</p>
|
||||
<p class="data"><span class="text-muted">演员:</span>...</p>
|
||||
<p class="data"><span class="text-muted">导演:</span>...</p>
|
||||
<p class="data hidden-sm"><span class="text-muted hidden-xs">更新:</span>2025-08-24 02:21</p>
|
||||
</div>
|
||||
|
||||
<!-- 视频下载区域 -->
|
||||
<div class="myui-panel myui-panel-bg clearfix">
|
||||
<div class="myui-panel_hd">
|
||||
<h3 class="title">视频下载</h3>
|
||||
</div>
|
||||
<ul class="stui-vodlist__text downlist col-pd clearfix">
|
||||
<div class="row">
|
||||
<p class="text-muted col-pd">
|
||||
<b>夸 克:</b>
|
||||
<a title="夸克链接" href="https://pan.quark.cn/s/a061332a75e9" target="_blank">
|
||||
https://pan.quark.cn/s/a061332a75e9
|
||||
</a>
|
||||
</p>
|
||||
<p class="text-muted col-pd">
|
||||
<b>百 度:</b>
|
||||
<a title="百度网盘" href="https://pan.baidu.com/s/xxx?pwd=moil" target="_blank">
|
||||
https://pan.baidu.com/s/...
|
||||
</a>
|
||||
</p>
|
||||
</div>
|
||||
</ul>
|
||||
</div>
|
||||
```
|
||||
|
||||
## 数据提取要点
|
||||
|
||||
### 搜索结果页面
|
||||
1. **结果列表**: `#searchList > li.active.clearfix` - 每个搜索结果
|
||||
2. **标题**: `.detail h4.title a` - 获取文本和href属性
|
||||
3. **详情页链接**: `.detail h4.title a[href]` 或 `.thumb a[href]`
|
||||
4. **评分**: `.pic-tag` - 数字+分
|
||||
5. **更新状态**: `.pic-text` - 如"更新至06集"、"12集全"
|
||||
6. **导演**: 包含"导演:"的`<p>`标签内容
|
||||
7. **主演**: 包含"主演:"的`<p>`标签内的链接
|
||||
8. **分类信息**: 包含"分类:"的`<p>`标签 - 分类/地区/年份
|
||||
9. **简介**: 包含"简介:"的`<p>`标签(可能为空或很短)
|
||||
|
||||
### 详情页面
|
||||
1. **标题**: `h1.title` - 影片完整标题
|
||||
2. **豆瓣评分**: `.score .branch` - 数字评分
|
||||
3. **基本信息**: `.data`标签中的各种信息
|
||||
- 分类: "分类:" 后的内容
|
||||
- 地区: "地区:" 后的内容
|
||||
- 年份: "年份:" 后的内容
|
||||
- 又名: "又名:" 后的内容(如有)
|
||||
4. **演员**: 包含"演员:"的`.data`标签内的链接
|
||||
5. **导演**: 包含"导演:"的`.data`标签内的链接
|
||||
6. **更新时间**: 包含"更新:"的`.data`标签
|
||||
7. **网盘链接提取**:
|
||||
- 夸克网盘: `<b>夸 克:</b>` 后的 `<a>` 标签
|
||||
- 百度网盘: `<b>百 度:</b>` 后的 `<a>` 标签
|
||||
- 其他可能的网盘类型
|
||||
|
||||
## 网盘链接识别规则
|
||||
|
||||
### 支持的网盘类型
|
||||
- **夸克网盘**: `pan.quark.cn`
|
||||
- **百度网盘**: `pan.baidu.com`
|
||||
- **阿里云盘**: `aliyundrive.com` / `alipan.com`(可能出现)
|
||||
- **天翼云盘**: `cloud.189.cn`(可能出现)
|
||||
|
||||
### 链接提取策略
|
||||
1. 在详情页的"视频下载"区域搜索
|
||||
2. 按网盘类型标识符匹配(夸 克:、百 度:等)
|
||||
3. 提取对应的`<a>`标签的`href`属性
|
||||
4. 从URL或周围文本提取可能的提取码(如`?pwd=xxx`)
|
||||
|
||||
## 特殊处理
|
||||
|
||||
### 时间解析
|
||||
- 搜索结果页无明确时间信息
|
||||
- 详情页有更新时间:格式 `2025-08-24 02:21`
|
||||
- 可使用更新时间作为发布时间
|
||||
|
||||
### 内容处理
|
||||
- 评分处理:提取数字部分
|
||||
- 更新状态:如"更新至06集"、"完结"等
|
||||
- 简介可能很短或为空
|
||||
- 标题清理:移除多余空格
|
||||
|
||||
### 分页处理
|
||||
- 搜索结果有分页:`.myui-page` 区域
|
||||
- 分页链接格式:`?page=2&searchkey=xxx&searchtype=`
|
||||
|
||||
## 注意事项
|
||||
|
||||
1. **网盘为主**: 此网站主要提供网盘下载链接,而非在线播放
|
||||
2. **referer必需**: 请求时需要设置正确的referer头
|
||||
3. **编码处理**: 关键词需要URL编码
|
||||
4. **链接验证**: 网盘链接可能失效,需要验证有效性
|
||||
5. **提取码**: 百度网盘链接通常有提取码,在URL参数或文本中
|
||||
Reference in New Issue
Block a user