Files

649 lines
19 KiB
Go
Raw Permalink Normal View History

package huban
import (
"fmt"
"net/http"
"net/url"
"regexp"
"strings"
"time"
"context"
"sync"
2025-08-01 14:57:44 +08:00
"sync/atomic"
"github.com/PuerkitoBio/goquery"
"pansou/model"
"pansou/plugin"
)
2025-08-01 14:57:44 +08:00
const (
// 默认超时时间
2025-08-01 14:57:44 +08:00
DefaultTimeout = 8 * time.Second
DetailTimeout = 6 * time.Second
2025-08-01 14:57:44 +08:00
// HTTP连接池配置
MaxIdleConns = 200
MaxIdleConnsPerHost = 50
MaxConnsPerHost = 100
IdleConnTimeout = 90 * time.Second
// 并发控制
MaxConcurrency = 20
// 缓存TTL
cacheTTL = 1 * time.Hour
2025-08-04 16:33:12 +08:00
// 请求来源控制 - 默认开启,提高安全性
EnableRefererCheck = false
2025-08-04 16:33:12 +08:00
// 调试日志开关
DebugLog = false
2025-08-01 14:57:44 +08:00
)
// 性能统计(原子操作)
var (
searchRequests int64 = 0
totalSearchTime int64 = 0 // 纳秒
detailPageRequests int64 = 0
totalDetailTime int64 = 0 // 纳秒
cacheHits int64 = 0
cacheMisses int64 = 0
)
// Detail page缓存
var (
detailCache sync.Map
cacheMutex sync.RWMutex
2025-08-01 14:57:44 +08:00
)
2025-08-04 16:33:12 +08:00
// 请求来源控制配置
var (
// 允许的请求来源列表 - 参考panyq插件实现
// 支持前缀匹配,例如 "https://example.com" 会匹配 "https://example.com/path"
AllowedReferers = []string{
"https://dm.xueximeng.com",
"http://localhost:8888",
// 可以根据需要添加更多允许的来源
}
)
func init() {
plugin.RegisterGlobalPlugin(NewHubanPlugin())
}
// 预编译的正则表达式
var (
// 密码提取正则表达式
passwordRegex = regexp.MustCompile(`\?pwd=([0-9a-zA-Z]+)`)
password115Regex = regexp.MustCompile(`password=([0-9a-zA-Z]+)`)
// 详情页ID提取正则表达式
detailIDRegex = regexp.MustCompile(`/id/(\d+)`)
// 常见网盘链接的正则表达式支持16种类型
quarkLinkRegex = regexp.MustCompile(`https?://pan\.quark\.cn/s/[0-9a-zA-Z]+`)
ucLinkRegex = regexp.MustCompile(`https?://drive\.uc\.cn/s/[0-9a-zA-Z]+(\?[^"'\s]*)?`)
baiduLinkRegex = regexp.MustCompile(`https?://pan\.baidu\.com/s/[0-9a-zA-Z_\-]+(\?pwd=[0-9a-zA-Z]+)?`)
aliyunLinkRegex = regexp.MustCompile(`https?://(www\.)?(aliyundrive\.com|alipan\.com)/s/[0-9a-zA-Z]+`)
xunleiLinkRegex = regexp.MustCompile(`https?://pan\.xunlei\.com/s/[0-9a-zA-Z_\-]+(\?pwd=[0-9a-zA-Z]+)?`)
tianyiLinkRegex = regexp.MustCompile(`https?://cloud\.189\.cn/t/[0-9a-zA-Z]+`)
link115Regex = regexp.MustCompile(`https?://(115\.com|115cdn\.com)/s/[0-9a-zA-Z]+`)
mobileLinkRegex = regexp.MustCompile(`https?://caiyun\.feixin\.10086\.cn/[0-9a-zA-Z]+`)
weiyunLinkRegex = regexp.MustCompile(`https?://share\.weiyun\.com/[0-9a-zA-Z]+`)
lanzouLinkRegex = regexp.MustCompile(`https?://(www\.)?(lanzou[uixys]*|lan[zs]o[ux])\.(com|net|org)/[0-9a-zA-Z]+`)
jianguoyunLinkRegex = regexp.MustCompile(`https?://(www\.)?jianguoyun\.com/p/[0-9a-zA-Z]+`)
link123Regex = regexp.MustCompile(`https?://(123pan\.com|www\.123912\.com|www\.123865\.com|www\.123684\.com)/s/[0-9a-zA-Z]+`)
pikpakLinkRegex = regexp.MustCompile(`https?://mypikpak\.com/s/[0-9a-zA-Z]+`)
magnetLinkRegex = regexp.MustCompile(`magnet:\?xt=urn:btih:[0-9a-fA-F]{40}`)
ed2kLinkRegex = regexp.MustCompile(`ed2k://\|file\|.+\|\d+\|[0-9a-fA-F]{32}\|/`)
)
// HubanAsyncPlugin Huban异步插件
type HubanAsyncPlugin struct {
*plugin.BaseAsyncPlugin
2025-08-01 14:57:44 +08:00
optimizedClient *http.Client
}
// createOptimizedHTTPClient 创建优化的HTTP客户端
func createOptimizedHTTPClient() *http.Client {
transport := &http.Transport{
MaxIdleConns: MaxIdleConns,
MaxIdleConnsPerHost: MaxIdleConnsPerHost,
MaxConnsPerHost: MaxConnsPerHost,
IdleConnTimeout: IdleConnTimeout,
DisableKeepAlives: false,
}
return &http.Client{
Transport: transport,
Timeout: DefaultTimeout,
}
}
// NewHubanPlugin 创建新的Huban异步插件
func NewHubanPlugin() *HubanAsyncPlugin {
return &HubanAsyncPlugin{
2025-08-01 21:21:42 +08:00
BaseAsyncPlugin: plugin.NewBaseAsyncPlugin("huban", 2),
2025-08-01 14:57:44 +08:00
optimizedClient: createOptimizedHTTPClient(),
}
}
// Search 同步搜索接口
func (p *HubanAsyncPlugin) Search(keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
2025-08-04 16:33:12 +08:00
// 请求来源检查 - 参考panyq插件实现
if EnableRefererCheck && ext != nil {
referer := ""
if refererVal, ok := ext["referer"].(string); ok {
referer = refererVal
}
// 检查referer是否在允许列表中
allowed := false
for _, allowedReferer := range AllowedReferers {
if strings.HasPrefix(referer, allowedReferer) {
if DebugLog {
fmt.Printf("[%s] 允许来自 %s 的请求\n", p.Name(), referer)
}
allowed = true
break
}
}
if !allowed {
if DebugLog {
fmt.Printf("[%s] 拒绝来自 %s 的请求\n", p.Name(), referer)
}
return nil, fmt.Errorf("[%s] 请求来源不被允许", p.Name())
}
}
result, err := p.SearchWithResult(keyword, ext)
if err != nil {
return nil, err
}
return result.Results, nil
}
// SearchWithResult 带结果统计的搜索接口
func (p *HubanAsyncPlugin) SearchWithResult(keyword string, ext map[string]interface{}) (model.PluginSearchResult, error) {
return p.AsyncSearchWithResult(keyword, p.searchImpl, p.MainCacheKey, ext)
}
// searchImpl 搜索实现 - HTML解析版本
func (p *HubanAsyncPlugin) searchImpl(client *http.Client, keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
2025-08-01 14:57:44 +08:00
// 性能统计
start := time.Now()
atomic.AddInt64(&searchRequests, 1)
defer func() {
duration := time.Since(start).Nanoseconds()
atomic.AddInt64(&totalSearchTime, duration)
}()
// 使用优化的客户端
if p.optimizedClient != nil {
client = p.optimizedClient
}
// 1. 构建搜索URL
searchURL := fmt.Sprintf("http://103.45.162.207:20720/index.php/vod/search/wd/%s.html", url.QueryEscape(keyword))
// 2. 创建带超时的上下文
2025-08-01 14:57:44 +08:00
ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout)
defer cancel()
// 3. 创建请求
req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil)
if err != nil {
return nil, fmt.Errorf("[%s] 创建请求失败: %w", p.Name(), err)
}
// 4. 设置请求头
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
req.Header.Set("Connection", "keep-alive")
req.Header.Set("Referer", "http://103.45.162.207:20720/")
// 5. 发送请求
resp, err := p.doRequestWithRetry(req, client)
if err != nil {
return nil, fmt.Errorf("[%s] 搜索请求失败: %w", p.Name(), err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("[%s] 搜索请求返回状态码: %d", p.Name(), resp.StatusCode)
}
// 6. 解析搜索结果页面
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, fmt.Errorf("[%s] 解析搜索页面失败: %w", p.Name(), err)
}
// 7. 提取搜索结果
var results []model.SearchResult
doc.Find(".module-search-item").Each(func(i int, s *goquery.Selection) {
result := p.parseSearchItem(s, keyword)
if result.UniqueID != "" {
results = append(results, result)
}
})
// 8. 异步获取详情页信息
enhancedResults := p.enhanceWithDetails(client, results)
// 9. 关键词过滤
return plugin.FilterResultsByKeyword(enhancedResults, keyword), nil
}
// parseSearchItem 解析单个搜索结果项
func (p *HubanAsyncPlugin) parseSearchItem(s *goquery.Selection, keyword string) model.SearchResult {
result := model.SearchResult{}
// 提取详情页链接和ID
detailLink, exists := s.Find(".video-info-header h3 a").First().Attr("href")
if !exists {
return result
}
// 提取ID
matches := detailIDRegex.FindStringSubmatch(detailLink)
if len(matches) < 2 {
return result
}
itemID := matches[1]
// 构建唯一ID
uniqueID := fmt.Sprintf("%s-%s", p.Name(), itemID)
// 提取标题
title := strings.TrimSpace(s.Find(".video-info-header h3 a").First().Text())
if title == "" {
return result
}
// 提取分类
category := strings.TrimSpace(s.Find(".video-info-items").First().Find(".video-info-item").First().Text())
// 提取导演
directorElement := s.Find(".video-info-items").FilterFunction(func(i int, item *goquery.Selection) bool {
title := strings.TrimSpace(item.Find(".video-info-itemtitle").Text())
return strings.Contains(title, "导演")
})
director := strings.TrimSpace(directorElement.Find(".video-info-item").Text())
// 提取主演
actorElement := s.Find(".video-info-items").FilterFunction(func(i int, item *goquery.Selection) bool {
title := strings.TrimSpace(item.Find(".video-info-itemtitle").Text())
return strings.Contains(title, "主演")
})
actor := strings.TrimSpace(actorElement.Find(".video-info-item").Text())
// 提取年份
year := strings.TrimSpace(s.Find(".video-info-items").Last().Find(".video-info-item").First().Text())
// 提取质量/状态
quality := strings.TrimSpace(s.Find(".video-info-header .video-info-remarks").Text())
// 提取剧情简介
plotElement := s.Find(".video-info-items").FilterFunction(func(i int, item *goquery.Selection) bool {
title := strings.TrimSpace(item.Find(".video-info-itemtitle").Text())
return strings.Contains(title, "剧情")
})
plot := strings.TrimSpace(plotElement.Find(".video-info-item").Text())
// 提取封面图片
coverImage, _ := s.Find(".module-item-pic > img").Attr("data-src")
// 构建内容描述
var contentParts []string
if category != "" {
contentParts = append(contentParts, fmt.Sprintf("分类: %s", category))
}
if director != "" {
contentParts = append(contentParts, fmt.Sprintf("导演: %s", director))
}
if actor != "" {
contentParts = append(contentParts, fmt.Sprintf("主演: %s", actor))
}
if quality != "" {
contentParts = append(contentParts, fmt.Sprintf("质量: %s", quality))
}
if plot != "" {
contentParts = append(contentParts, fmt.Sprintf("剧情: %s", plot))
}
// 构建标签
var tags []string
if year != "" {
tags = append(tags, year)
}
// 构建图片数组
var images []string
if coverImage != "" {
images = append(images, coverImage)
}
return model.SearchResult{
UniqueID: uniqueID,
Title: title,
Content: strings.Join(contentParts, " | "),
Images: images,
Tags: tags,
Channel: "",
Datetime: time.Time{},
}
}
// enhanceWithDetails 异步获取详情页信息
func (p *HubanAsyncPlugin) enhanceWithDetails(client *http.Client, results []model.SearchResult) []model.SearchResult {
var enhancedResults []model.SearchResult
var wg sync.WaitGroup
var mu sync.Mutex
// 创建信号量限制并发数
semaphore := make(chan struct{}, MaxConcurrency)
for _, result := range results {
wg.Add(1)
go func(result model.SearchResult) {
defer wg.Done()
semaphore <- struct{}{} // 获取信号量
defer func() { <-semaphore }() // 释放信号量
// 从UniqueID中提取itemID
parts := strings.Split(result.UniqueID, "-")
if len(parts) < 2 {
mu.Lock()
enhancedResults = append(enhancedResults, result)
mu.Unlock()
return
}
itemID := parts[1]
// 检查缓存
if cached, ok := detailCache.Load(itemID); ok {
atomic.AddInt64(&cacheHits, 1)
r := cached.(model.SearchResult)
mu.Lock()
enhancedResults = append(enhancedResults, r)
mu.Unlock()
return
}
atomic.AddInt64(&cacheMisses, 1)
// 获取详情页链接和图片
detailLinks, detailImages := p.fetchDetailLinksAndImages(client, itemID)
result.Links = detailLinks
// 合并图片:优先使用详情页的海报,如果没有则使用搜索结果的图片
if len(detailImages) > 0 {
result.Images = detailImages
}
// 缓存结果
detailCache.Store(itemID, result)
mu.Lock()
enhancedResults = append(enhancedResults, result)
mu.Unlock()
}(result)
}
wg.Wait()
return enhancedResults
}
// fetchDetailLinksAndImages 获取详情页的下载链接和图片
func (p *HubanAsyncPlugin) fetchDetailLinksAndImages(client *http.Client, itemID string) ([]model.Link, []string) {
// 性能统计
start := time.Now()
atomic.AddInt64(&detailPageRequests, 1)
defer func() {
duration := time.Since(start).Nanoseconds()
atomic.AddInt64(&totalDetailTime, duration)
}()
detailURL := fmt.Sprintf("http://103.45.162.207:20720/index.php/vod/detail/id/%s.html", itemID)
// 创建带超时的上下文
ctx, cancel := context.WithTimeout(context.Background(), DetailTimeout)
defer cancel()
// 创建请求
req, err := http.NewRequestWithContext(ctx, "GET", detailURL, nil)
if err != nil {
return nil, nil
}
// 设置请求头
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
req.Header.Set("Connection", "keep-alive")
req.Header.Set("Referer", "http://103.45.162.207:20720/")
// 发送请求(带重试)
resp, err := p.doRequestWithRetry(req, client)
if err != nil {
return nil, nil
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, nil
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, nil
}
var links []model.Link
var images []string
// 提取详情页的海报图片
if posterURL, exists := doc.Find(".mobile-play .lazyload").Attr("data-src"); exists && posterURL != "" {
images = append(images, posterURL)
}
// 查找下载链接区域
doc.Find("#download-list .module-row-one").Each(func(i int, s *goquery.Selection) {
// 从data-clipboard-text属性提取链接
if linkURL, exists := s.Find("[data-clipboard-text]").Attr("data-clipboard-text"); exists {
// 过滤掉无效链接
if p.isValidNetworkDriveURL(linkURL) {
if linkType := p.determineLinkType(linkURL); linkType != "" {
link := model.Link{
Type: linkType,
URL: linkURL,
Password: "", // 大部分网盘不需要密码
}
links = append(links, link)
}
}
}
})
return links, images
}
// isValidNetworkDriveURL 检查URL是否为有效的网盘链接
func (p *HubanAsyncPlugin) isValidNetworkDriveURL(url string) bool {
// 过滤掉明显无效的链接
if strings.Contains(url, "javascript:") ||
url == "" ||
(!strings.HasPrefix(url, "http") && !strings.HasPrefix(url, "magnet:") && !strings.HasPrefix(url, "ed2k:")) {
return false
}
// 检查是否匹配任何支持的网盘格式16种
return quarkLinkRegex.MatchString(url) ||
ucLinkRegex.MatchString(url) ||
baiduLinkRegex.MatchString(url) ||
aliyunLinkRegex.MatchString(url) ||
xunleiLinkRegex.MatchString(url) ||
tianyiLinkRegex.MatchString(url) ||
link115Regex.MatchString(url) ||
mobileLinkRegex.MatchString(url) ||
weiyunLinkRegex.MatchString(url) ||
lanzouLinkRegex.MatchString(url) ||
jianguoyunLinkRegex.MatchString(url) ||
link123Regex.MatchString(url) ||
pikpakLinkRegex.MatchString(url) ||
magnetLinkRegex.MatchString(url) ||
ed2kLinkRegex.MatchString(url)
}
// determineLinkType 根据URL确定链接类型支持16种类型
func (p *HubanAsyncPlugin) determineLinkType(url string) string {
switch {
case quarkLinkRegex.MatchString(url):
return "quark"
case ucLinkRegex.MatchString(url):
return "uc"
case baiduLinkRegex.MatchString(url):
return "baidu"
case aliyunLinkRegex.MatchString(url):
return "aliyun"
case xunleiLinkRegex.MatchString(url):
return "xunlei"
case tianyiLinkRegex.MatchString(url):
return "tianyi"
case link115Regex.MatchString(url):
return "115"
case mobileLinkRegex.MatchString(url):
return "mobile"
case weiyunLinkRegex.MatchString(url):
return "weiyun"
case lanzouLinkRegex.MatchString(url):
return "lanzou"
case jianguoyunLinkRegex.MatchString(url):
return "jianguoyun"
case link123Regex.MatchString(url):
return "123"
case pikpakLinkRegex.MatchString(url):
return "pikpak"
case magnetLinkRegex.MatchString(url):
return "magnet"
case ed2kLinkRegex.MatchString(url):
return "ed2k"
default:
return "" // 不支持的类型返回空字符串
}
}
// extractPassword 从URL中提取密码
func (p *HubanAsyncPlugin) extractPassword(url string) string {
// 百度网盘密码
if matches := passwordRegex.FindStringSubmatch(url); len(matches) > 1 {
return matches[1]
}
// 115网盘密码
if matches := password115Regex.FindStringSubmatch(url); len(matches) > 1 {
return matches[1]
}
return ""
}
// doRequestWithRetry 带重试的HTTP请求
func (p *HubanAsyncPlugin) doRequestWithRetry(req *http.Request, client *http.Client) (*http.Response, error) {
maxRetries := 2
var lastErr error
for i := 0; i < maxRetries; i++ {
resp, err := client.Do(req)
if err == nil {
if resp.StatusCode == http.StatusOK {
return resp, nil
}
resp.Body.Close()
lastErr = fmt.Errorf("HTTP状态码: %d", resp.StatusCode)
} else {
lastErr = err
}
// 快速重试:只等待很短时间
if i < maxRetries-1 {
time.Sleep(100 * time.Millisecond)
}
}
return nil, fmt.Errorf("[%s] 请求失败,重试%d次后仍失败: %w", p.Name(), maxRetries, lastErr)
2025-08-01 14:57:44 +08:00
}
// GetPerformanceStats 获取性能统计信息
func (p *HubanAsyncPlugin) GetPerformanceStats() map[string]interface{} {
totalRequests := atomic.LoadInt64(&searchRequests)
totalTime := atomic.LoadInt64(&totalSearchTime)
detailRequests := atomic.LoadInt64(&detailPageRequests)
detailTime := atomic.LoadInt64(&totalDetailTime)
hits := atomic.LoadInt64(&cacheHits)
misses := atomic.LoadInt64(&cacheMisses)
2025-08-01 14:57:44 +08:00
var avgTime float64
if totalRequests > 0 {
avgTime = float64(totalTime) / float64(totalRequests) / 1e6 // 转换为毫秒
}
var avgDetailTime float64
if detailRequests > 0 {
avgDetailTime = float64(detailTime) / float64(detailRequests) / 1e6 // 转换为毫秒
}
2025-08-01 14:57:44 +08:00
return map[string]interface{}{
"search_requests": totalRequests,
"avg_search_time_ms": avgTime,
2025-08-01 14:57:44 +08:00
"total_search_time_ns": totalTime,
"detail_page_requests": detailRequests,
"avg_detail_time_ms": avgDetailTime,
"total_detail_time_ns": detailTime,
"cache_hits": hits,
"cache_misses": misses,
2025-08-01 14:57:44 +08:00
}
2025-08-04 16:33:12 +08:00
}
// AddAllowedReferer 添加允许的请求来源
func AddAllowedReferer(referer string) {
for _, existing := range AllowedReferers {
if existing == referer {
return // 已存在,不重复添加
}
}
AllowedReferers = append(AllowedReferers, referer)
}
// RemoveAllowedReferer 移除允许的请求来源
func RemoveAllowedReferer(referer string) {
for i, existing := range AllowedReferers {
if existing == referer {
AllowedReferers = append(AllowedReferers[:i], AllowedReferers[i+1:]...)
return
}
}
}
// GetAllowedReferers 获取当前允许的请求来源列表
func GetAllowedReferers() []string {
result := make([]string, len(AllowedReferers))
copy(result, AllowedReferers)
return result
}
// IsRefererAllowed 检查指定的referer是否被允许
func IsRefererAllowed(referer string) bool {
for _, allowedReferer := range AllowedReferers {
if strings.HasPrefix(referer, allowedReferer) {
return true
}
}
return false
}