mirror of
https://github.com/fish2018/pansou.git
synced 2025-11-25 03:14:59 +08:00
411 lines
11 KiB
Go
411 lines
11 KiB
Go
package wuji
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"io"
|
||
"net/http"
|
||
"net/url"
|
||
"regexp"
|
||
"strings"
|
||
"sync"
|
||
"time"
|
||
|
||
"github.com/PuerkitoBio/goquery"
|
||
"pansou/model"
|
||
"pansou/plugin"
|
||
)
|
||
|
||
// 常量定义
|
||
const (
|
||
// 基础URL
|
||
BaseURL = "https://xcili.net"
|
||
|
||
// 搜索URL格式:/search?q={keyword}
|
||
SearchURL = BaseURL + "/search?q=%s"
|
||
|
||
// 默认参数
|
||
MaxRetries = 3
|
||
TimeoutSeconds = 30
|
||
|
||
// 并发控制参数
|
||
MaxConcurrency = 20 // 最大并发数
|
||
)
|
||
|
||
// 预编译的正则表达式
|
||
var (
|
||
// 磁力链接正则
|
||
magnetLinkRegex = regexp.MustCompile(`magnet:\?xt=urn:btih:[0-9a-fA-F]{40}[^"'\s]*`)
|
||
|
||
// 磁力链接缓存,键为详情页URL,值为磁力链接
|
||
magnetCache = sync.Map{}
|
||
cacheTTL = 1 * time.Hour // 缓存1小时
|
||
)
|
||
|
||
// 缓存的磁力链接响应
|
||
type magnetCacheEntry struct {
|
||
MagnetLink string
|
||
Timestamp time.Time
|
||
}
|
||
|
||
// 常用UA列表
|
||
var userAgents = []string{
|
||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36",
|
||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
|
||
}
|
||
|
||
// WujiPlugin 无极磁链搜索插件
|
||
type WujiPlugin struct {
|
||
*plugin.BaseAsyncPlugin
|
||
}
|
||
|
||
// NewWujiPlugin 创建新的无极磁链插件实例
|
||
func NewWujiPlugin() *WujiPlugin {
|
||
return &WujiPlugin{
|
||
BaseAsyncPlugin: plugin.NewBaseAsyncPlugin("wuji", 3),
|
||
}
|
||
}
|
||
|
||
// Name 返回插件名称
|
||
func (p *WujiPlugin) Name() string {
|
||
return "wuji"
|
||
}
|
||
|
||
// DisplayName 返回插件显示名称
|
||
func (p *WujiPlugin) DisplayName() string {
|
||
return "无极磁链"
|
||
}
|
||
|
||
// Description 返回插件描述
|
||
func (p *WujiPlugin) Description() string {
|
||
return "ØMagnet 无极磁链 - 磁力链接搜索引擎"
|
||
}
|
||
|
||
// Search 执行搜索并返回结果(兼容性方法)
|
||
func (p *WujiPlugin) Search(keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
|
||
result, err := p.SearchWithResult(keyword, ext)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
return result.Results, nil
|
||
}
|
||
|
||
// SearchWithResult 执行搜索并返回包含IsFinal标记的结果
|
||
func (p *WujiPlugin) SearchWithResult(keyword string, ext map[string]interface{}) (model.PluginSearchResult, error) {
|
||
return p.AsyncSearchWithResult(keyword, p.searchImpl, p.MainCacheKey, ext)
|
||
}
|
||
|
||
// searchImpl 实际的搜索实现
|
||
func (p *WujiPlugin) searchImpl(client *http.Client, keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
|
||
// URL编码关键词
|
||
encodedKeyword := url.QueryEscape(keyword)
|
||
searchURL := fmt.Sprintf(SearchURL, encodedKeyword)
|
||
|
||
// 创建带超时的上下文
|
||
ctx, cancel := context.WithTimeout(context.Background(), TimeoutSeconds*time.Second)
|
||
defer cancel()
|
||
|
||
// 创建请求
|
||
req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("[%s] 创建请求失败: %w", p.Name(), err)
|
||
}
|
||
|
||
// 设置请求头
|
||
p.setRequestHeaders(req)
|
||
|
||
// 发送HTTP请求
|
||
resp, err := p.doRequestWithRetry(req, client)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("[%s] 搜索请求失败: %w", p.Name(), err)
|
||
}
|
||
defer resp.Body.Close()
|
||
|
||
// 检查状态码
|
||
if resp.StatusCode != 200 {
|
||
return nil, fmt.Errorf("[%s] 请求返回状态码: %d", p.Name(), resp.StatusCode)
|
||
}
|
||
|
||
// 读取响应体内容
|
||
body, err := io.ReadAll(resp.Body)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("[%s] 读取响应失败: %w", p.Name(), err)
|
||
}
|
||
|
||
// 解析HTML
|
||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(string(body)))
|
||
if err != nil {
|
||
return nil, fmt.Errorf("[%s] HTML解析失败: %w", p.Name(), err)
|
||
}
|
||
|
||
// 提取搜索结果
|
||
searchResults := p.extractSearchResults(doc)
|
||
|
||
// 并发获取每个结果的详情页磁力链接
|
||
finalResults := p.enrichWithMagnetLinks(searchResults, client)
|
||
|
||
// 关键词过滤
|
||
searchKeyword := keyword
|
||
if searchParam, ok := ext["search"]; ok {
|
||
if searchStr, ok := searchParam.(string); ok && searchStr != "" {
|
||
searchKeyword = searchStr
|
||
}
|
||
}
|
||
|
||
return plugin.FilterResultsByKeyword(finalResults, searchKeyword), nil
|
||
}
|
||
|
||
// extractSearchResults 提取搜索结果
|
||
func (p *WujiPlugin) extractSearchResults(doc *goquery.Document) []model.SearchResult {
|
||
var results []model.SearchResult
|
||
|
||
// 查找所有搜索结果
|
||
doc.Find("table.file-list tbody tr").Each(func(i int, s *goquery.Selection) {
|
||
result := p.parseSearchResult(s)
|
||
if result.Title != "" {
|
||
results = append(results, result)
|
||
}
|
||
})
|
||
|
||
return results
|
||
}
|
||
|
||
// parseSearchResult 解析单个搜索结果
|
||
func (p *WujiPlugin) parseSearchResult(s *goquery.Selection) model.SearchResult {
|
||
result := model.SearchResult{
|
||
Channel: p.Name(),
|
||
Datetime: time.Now(),
|
||
}
|
||
|
||
// 提取标题和详情页链接
|
||
titleCell := s.Find("td").First()
|
||
titleLink := titleCell.Find("a")
|
||
|
||
// 详情页链接
|
||
detailPath, exists := titleLink.Attr("href")
|
||
if !exists || detailPath == "" {
|
||
return result
|
||
}
|
||
|
||
// 构造完整的详情页URL
|
||
detailURL := BaseURL + detailPath
|
||
|
||
// 提取标题(排除 p.sample 的内容)
|
||
titleText := titleLink.Clone()
|
||
titleText.Find("p.sample").Remove()
|
||
title := strings.TrimSpace(titleText.Text())
|
||
result.Title = p.cleanTitle(title)
|
||
|
||
// 提取文件名预览
|
||
sampleText := strings.TrimSpace(titleLink.Find("p.sample").Text())
|
||
|
||
// 提取文件大小
|
||
sizeText := strings.TrimSpace(s.Find("td.td-size").Text())
|
||
|
||
// 构造内容
|
||
var contentParts []string
|
||
if sampleText != "" {
|
||
contentParts = append(contentParts, "文件: "+sampleText)
|
||
}
|
||
if sizeText != "" {
|
||
contentParts = append(contentParts, "大小: "+sizeText)
|
||
}
|
||
result.Content = strings.Join(contentParts, "\n")
|
||
|
||
// 暂时将详情页链接作为占位符(后续会被磁力链接替换)
|
||
result.Links = []model.Link{{
|
||
Type: "detail",
|
||
URL: detailURL,
|
||
}}
|
||
|
||
// 生成唯一ID
|
||
result.UniqueID = fmt.Sprintf("%s_%d", p.Name(), time.Now().UnixNano())
|
||
|
||
// 添加标签
|
||
result.Tags = []string{"magnet"}
|
||
|
||
return result
|
||
}
|
||
|
||
// fetchMagnetLink 获取详情页的磁力链接(带缓存)
|
||
func (p *WujiPlugin) fetchMagnetLink(client *http.Client, detailURL string) (string, error) {
|
||
// 检查缓存
|
||
if cached, ok := magnetCache.Load(detailURL); ok {
|
||
if entry, ok := cached.(magnetCacheEntry); ok {
|
||
if time.Since(entry.Timestamp) < cacheTTL {
|
||
// 缓存命中
|
||
return entry.MagnetLink, nil
|
||
}
|
||
// 缓存过期,删除
|
||
magnetCache.Delete(detailURL)
|
||
}
|
||
}
|
||
// 创建带超时的上下文
|
||
ctx, cancel := context.WithTimeout(context.Background(), TimeoutSeconds*time.Second)
|
||
defer cancel()
|
||
|
||
// 创建请求
|
||
req, err := http.NewRequestWithContext(ctx, "GET", detailURL, nil)
|
||
if err != nil {
|
||
return "", fmt.Errorf("创建详情页请求失败: %w", err)
|
||
}
|
||
|
||
// 设置请求头
|
||
p.setRequestHeaders(req)
|
||
|
||
// 发送HTTP请求
|
||
resp, err := p.doRequestWithRetry(req, client)
|
||
if err != nil {
|
||
return "", fmt.Errorf("详情页请求失败: %w", err)
|
||
}
|
||
defer resp.Body.Close()
|
||
|
||
// 检查状态码
|
||
if resp.StatusCode != 200 {
|
||
return "", fmt.Errorf("详情页返回状态码: %d", resp.StatusCode)
|
||
}
|
||
|
||
// 读取响应体内容
|
||
body, err := io.ReadAll(resp.Body)
|
||
if err != nil {
|
||
return "", fmt.Errorf("读取详情页响应失败: %w", err)
|
||
}
|
||
|
||
// 解析HTML
|
||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(string(body)))
|
||
if err != nil {
|
||
return "", fmt.Errorf("详情页HTML解析失败: %w", err)
|
||
}
|
||
|
||
// 提取磁力链接
|
||
magnetInput := doc.Find("input#input-magnet")
|
||
if magnetInput.Length() == 0 {
|
||
return "", fmt.Errorf("未找到磁力链接输入框")
|
||
}
|
||
|
||
magnetLink, exists := magnetInput.Attr("value")
|
||
if !exists || magnetLink == "" {
|
||
return "", fmt.Errorf("磁力链接为空")
|
||
}
|
||
|
||
// 存入缓存
|
||
magnetCache.Store(detailURL, magnetCacheEntry{
|
||
MagnetLink: magnetLink,
|
||
Timestamp: time.Now(),
|
||
})
|
||
|
||
return magnetLink, nil
|
||
}
|
||
|
||
// cleanTitle 清理标题中的广告内容
|
||
func (p *WujiPlugin) cleanTitle(title string) string {
|
||
// 移除【】之间的广告内容
|
||
title = regexp.MustCompile(`【[^】]*】`).ReplaceAllString(title, "")
|
||
// 移除数字+【】格式的广告
|
||
title = regexp.MustCompile(`^\d+【[^】]*】`).ReplaceAllString(title, "")
|
||
// 移除[]之间的内容(如有需要)
|
||
title = regexp.MustCompile(`\[[^\]]*\]`).ReplaceAllString(title, "")
|
||
// 移除多余的空格
|
||
title = regexp.MustCompile(`\s+`).ReplaceAllString(title, " ")
|
||
return strings.TrimSpace(title)
|
||
}
|
||
|
||
// setRequestHeaders 设置请求头
|
||
func (p *WujiPlugin) setRequestHeaders(req *http.Request) {
|
||
// 使用第一个稳定的UA
|
||
ua := userAgents[0]
|
||
req.Header.Set("User-Agent", ua)
|
||
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
|
||
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
|
||
req.Header.Set("Connection", "keep-alive")
|
||
req.Header.Set("Upgrade-Insecure-Requests", "1")
|
||
req.Header.Set("Cache-Control", "no-cache")
|
||
req.Header.Set("Pragma", "no-cache")
|
||
}
|
||
|
||
// doRequestWithRetry 带重试的HTTP请求
|
||
func (p *WujiPlugin) doRequestWithRetry(req *http.Request, client *http.Client) (*http.Response, error) {
|
||
var lastErr error
|
||
|
||
for i := 0; i < MaxRetries; i++ {
|
||
resp, err := client.Do(req)
|
||
if err == nil {
|
||
return resp, nil
|
||
}
|
||
|
||
lastErr = err
|
||
if i < MaxRetries-1 {
|
||
time.Sleep(time.Duration(i+1) * time.Second)
|
||
}
|
||
}
|
||
|
||
return nil, fmt.Errorf("请求失败,已重试%d次: %w", MaxRetries, lastErr)
|
||
}
|
||
|
||
// enrichWithMagnetLinks 并发获取磁力链接并丰富搜索结果
|
||
func (p *WujiPlugin) enrichWithMagnetLinks(results []model.SearchResult, client *http.Client) []model.SearchResult {
|
||
if len(results) == 0 {
|
||
return results
|
||
}
|
||
|
||
// 使用信号量控制并发数
|
||
semaphore := make(chan struct{}, MaxConcurrency)
|
||
var wg sync.WaitGroup
|
||
var mutex sync.Mutex
|
||
|
||
enrichedResults := make([]model.SearchResult, len(results))
|
||
copy(enrichedResults, results)
|
||
|
||
for i := range enrichedResults {
|
||
// 检查是否有详情页链接
|
||
if len(enrichedResults[i].Links) == 0 {
|
||
continue
|
||
}
|
||
|
||
wg.Add(1)
|
||
go func(index int) {
|
||
defer wg.Done()
|
||
|
||
// 获取信号量
|
||
semaphore <- struct{}{}
|
||
defer func() { <-semaphore }()
|
||
|
||
// 获取详情页URL
|
||
detailURL := enrichedResults[index].Links[0].URL
|
||
|
||
// 添加适当的间隔避免请求过于频繁
|
||
time.Sleep(time.Duration(index%5) * 100 * time.Millisecond)
|
||
|
||
// 请求详情页并解析磁力链接
|
||
magnetLink, err := p.fetchMagnetLink(client, detailURL)
|
||
if err == nil && magnetLink != "" {
|
||
mutex.Lock()
|
||
enrichedResults[index].Links = []model.Link{{
|
||
Type: "magnet",
|
||
URL: magnetLink,
|
||
}}
|
||
mutex.Unlock()
|
||
} else if err != nil {
|
||
fmt.Printf("[%s] 获取磁力链接失败 [%d]: %v\n", p.Name(), index, err)
|
||
}
|
||
}(i)
|
||
}
|
||
|
||
wg.Wait()
|
||
|
||
// 过滤掉没有有效磁力链接的结果
|
||
var validResults []model.SearchResult
|
||
for _, result := range enrichedResults {
|
||
if len(result.Links) > 0 && result.Links[0].Type == "magnet" {
|
||
validResults = append(validResults, result)
|
||
}
|
||
}
|
||
|
||
return validResults
|
||
}
|
||
|
||
// init 注册插件
|
||
func init() {
|
||
plugin.RegisterGlobalPlugin(NewWujiPlugin())
|
||
} |