Files
pansou/plugin/ahhhhfs/ahhhhfs.go
www.xueximeng.com 569931a8e8 新增插件ahhhhfs
2025-10-27 17:59:05 +08:00

538 lines
14 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package ahhhhfs
import (
"context"
"fmt"
"net/http"
"net/url"
"pansou/model"
"pansou/plugin"
"regexp"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/PuerkitoBio/goquery"
)
// 预编译的正则表达式
var (
// 从详情页URL中提取文章ID的正则表达式
articleIDRegex = regexp.MustCompile(`/(\d+)/?$`)
// 常见网盘链接的正则表达式
quarkLinkRegex = regexp.MustCompile(`https?://pan\.quark\.cn/s/[0-9a-zA-Z]+`)
baiduLinkRegex = regexp.MustCompile(`https?://pan\.baidu\.com/s/[0-9a-zA-Z_\-]+`)
aliyunLinkRegex = regexp.MustCompile(`https?://(www\.)?(aliyundrive\.com|alipan\.com)/s/[0-9a-zA-Z]+`)
ucLinkRegex = regexp.MustCompile(`https?://drive\.uc\.cn/s/[0-9a-zA-Z]+`)
xunleiLinkRegex = regexp.MustCompile(`https?://pan\.xunlei\.com/s/[0-9a-zA-Z_\-]+`)
tianyiLinkRegex = regexp.MustCompile(`https?://cloud\.189\.cn/(t|web)/[0-9a-zA-Z]+`)
link115Regex = regexp.MustCompile(`https?://115\.com/s/[0-9a-zA-Z]+`)
link123Regex = regexp.MustCompile(`https?://123pan\.com/s/[0-9a-zA-Z]+`)
pikpakLinkRegex = regexp.MustCompile(`https?://mypikpak\.com/s/[0-9a-zA-Z]+`)
// 提取码匹配模式
pwdPatterns = []*regexp.Regexp{
regexp.MustCompile(`提取码[:]\s*([0-9a-zA-Z]+)`),
regexp.MustCompile(`密码[:]\s*([0-9a-zA-Z]+)`),
regexp.MustCompile(`pwd[=:]\s*([0-9a-zA-Z]+)`),
regexp.MustCompile(`code[=:]\s*([0-9a-zA-Z]+)`),
}
// 缓存相关
detailCache = sync.Map{} // 缓存详情页解析结果
lastCleanupTime = time.Now()
cacheTTL = 1 * time.Hour
)
const (
// 插件名称
pluginName = "ahhhhfs"
// 优先级
defaultPriority = 2
// 超时时间
DefaultTimeout = 10 * time.Second
DetailTimeout = 8 * time.Second
// 并发数限制
MaxConcurrency = 15
// HTTP连接池配置
MaxIdleConns = 100
MaxIdleConnsPerHost = 30
MaxConnsPerHost = 50
IdleConnTimeout = 90 * time.Second
)
// 性能统计
var (
searchRequests int64 = 0
detailPageRequests int64 = 0
cacheHits int64 = 0
cacheMisses int64 = 0
)
// AhhhhfsAsyncPlugin ahhhhfs异步插件
type AhhhhfsAsyncPlugin struct {
*plugin.BaseAsyncPlugin
optimizedClient *http.Client
}
// 在init函数中注册插件
func init() {
plugin.RegisterGlobalPlugin(NewAhhhhfsPlugin())
// 启动缓存清理goroutine
go startCacheCleaner()
}
// startCacheCleaner 启动一个定期清理缓存的goroutine
func startCacheCleaner() {
ticker := time.NewTicker(30 * time.Minute)
defer ticker.Stop()
for range ticker.C {
// 清空所有缓存
detailCache = sync.Map{}
lastCleanupTime = time.Now()
}
}
// createOptimizedHTTPClient 创建优化的HTTP客户端
func createOptimizedHTTPClient() *http.Client {
transport := &http.Transport{
MaxIdleConns: MaxIdleConns,
MaxIdleConnsPerHost: MaxIdleConnsPerHost,
MaxConnsPerHost: MaxConnsPerHost,
IdleConnTimeout: IdleConnTimeout,
DisableKeepAlives: false,
}
return &http.Client{
Transport: transport,
Timeout: DefaultTimeout,
}
}
// NewAhhhhfsPlugin 创建新的ahhhhfs异步插件
func NewAhhhhfsPlugin() *AhhhhfsAsyncPlugin {
return &AhhhhfsAsyncPlugin{
BaseAsyncPlugin: plugin.NewBaseAsyncPlugin(pluginName, defaultPriority),
optimizedClient: createOptimizedHTTPClient(),
}
}
// Search 执行搜索并返回结果(兼容性方法)
func (p *AhhhhfsAsyncPlugin) Search(keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
result, err := p.SearchWithResult(keyword, ext)
if err != nil {
return nil, err
}
return result.Results, nil
}
// SearchWithResult 执行搜索并返回包含IsFinal标记的结果
func (p *AhhhhfsAsyncPlugin) SearchWithResult(keyword string, ext map[string]interface{}) (model.PluginSearchResult, error) {
return p.AsyncSearchWithResult(keyword, p.searchImpl, p.MainCacheKey, ext)
}
// searchImpl 实现具体的搜索逻辑
func (p *AhhhhfsAsyncPlugin) searchImpl(client *http.Client, keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
// 性能统计
start := time.Now()
atomic.AddInt64(&searchRequests, 1)
defer func() {
fmt.Printf("[%s] 搜索耗时: %v\n", p.Name(), time.Since(start))
}()
// 使用优化的客户端
if p.optimizedClient != nil {
client = p.optimizedClient
}
// 1. 构建搜索URL
searchURL := fmt.Sprintf("https://www.ahhhhfs.com/search/%s", url.QueryEscape(keyword))
// 2. 创建带超时的上下文
ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout)
defer cancel()
// 3. 创建请求
req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil)
if err != nil {
return nil, fmt.Errorf("[%s] 创建请求失败: %w", p.Name(), err)
}
// 4. 设置完整的请求头(避免反爬虫)
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
req.Header.Set("Connection", "keep-alive")
req.Header.Set("Upgrade-Insecure-Requests", "1")
req.Header.Set("Cache-Control", "max-age=0")
req.Header.Set("Referer", "https://www.ahhhhfs.com/")
// 5. 发送请求(带重试机制)
resp, err := p.doRequestWithRetry(req, client)
if err != nil {
return nil, fmt.Errorf("[%s] 搜索请求失败: %w", p.Name(), err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("[%s] 搜索请求返回状态码: %d", p.Name(), resp.StatusCode)
}
// 6. 解析搜索结果页面
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, fmt.Errorf("[%s] 解析搜索页面失败: %w", p.Name(), err)
}
// 7. 提取搜索结果
var results []model.SearchResult
var wg sync.WaitGroup
var mu sync.Mutex
semaphore := make(chan struct{}, MaxConcurrency)
doc.Find("article.post-item.item-list").Each(func(i int, s *goquery.Selection) {
// 解析基本信息
titleElem := s.Find(".entry-title a")
title := strings.TrimSpace(titleElem.Text())
if title == "" {
title = strings.TrimSpace(titleElem.AttrOr("title", ""))
}
detailURL, exists := titleElem.Attr("href")
if !exists || detailURL == "" || title == "" {
return
}
// 提取文章ID
articleID := p.extractArticleID(detailURL)
if articleID == "" {
return
}
// 提取分类标签
var tags []string
s.Find(".entry-cat-dot a").Each(func(j int, tag *goquery.Selection) {
tagText := strings.TrimSpace(tag.Text())
if tagText != "" {
tags = append(tags, tagText)
}
})
// 提取描述
content := strings.TrimSpace(s.Find(".entry-desc").Text())
// 提取时间
datetime := ""
timeElem := s.Find(".entry-meta .meta-date time")
if dt, exists := timeElem.Attr("datetime"); exists {
datetime = dt
} else {
datetime = strings.TrimSpace(timeElem.Text())
}
// 解析时间
publishTime := p.parseDateTime(datetime)
// 异步获取详情页的网盘链接
wg.Add(1)
semaphore <- struct{}{} // 获取信号量
go func(title, detailURL, articleID, content string, tags []string, publishTime time.Time) {
defer wg.Done()
defer func() { <-semaphore }() // 释放信号量
// 获取网盘链接
links := p.fetchDetailLinks(client, detailURL, articleID)
if len(links) > 0 {
result := model.SearchResult{
UniqueID: fmt.Sprintf("%s-%s", p.Name(), articleID),
Title: title,
Content: content,
Links: links,
Tags: tags,
Channel: "", // 插件搜索结果 Channel 必须为空
Datetime: publishTime,
}
mu.Lock()
results = append(results, result)
mu.Unlock()
}
}(title, detailURL, articleID, content, tags, publishTime)
})
// 等待所有详情页请求完成
wg.Wait()
fmt.Printf("[%s] 搜索结果: %d 条\n", p.Name(), len(results))
// 关键词过滤
return plugin.FilterResultsByKeyword(results, keyword), nil
}
// extractArticleID 从URL中提取文章ID
func (p *AhhhhfsAsyncPlugin) extractArticleID(detailURL string) string {
matches := articleIDRegex.FindStringSubmatch(detailURL)
if len(matches) >= 2 {
return matches[1]
}
return ""
}
// parseDateTime 解析时间字符串
func (p *AhhhhfsAsyncPlugin) parseDateTime(datetime string) time.Time {
datetime = strings.TrimSpace(datetime)
// 尝试解析 ISO 格式
if t, err := time.Parse(time.RFC3339, datetime); err == nil {
return t
}
// 尝试解析标准日期格式
layouts := []string{
"2006-01-02",
"2006-01-02 15:04:05",
"2006-01-02T15:04:05",
"2006-01-02T15:04:05Z07:00",
}
for _, layout := range layouts {
if t, err := time.Parse(layout, datetime); err == nil {
return t
}
}
// 处理相对时间(如"1 周前"、"2 天前"
now := time.Now()
if strings.Contains(datetime, "小时前") || strings.Contains(datetime, "hours ago") {
// 简单处理,返回当天
return now
}
if strings.Contains(datetime, "天前") || strings.Contains(datetime, "days ago") {
// 简单处理,返回近期
return now.AddDate(0, 0, -7)
}
if strings.Contains(datetime, "周前") || strings.Contains(datetime, "weeks ago") {
// 简单处理,返回一个月前
return now.AddDate(0, -1, 0)
}
// 默认返回当前时间
return now
}
// fetchDetailLinks 获取详情页的网盘链接
func (p *AhhhhfsAsyncPlugin) fetchDetailLinks(client *http.Client, detailURL, articleID string) []model.Link {
atomic.AddInt64(&detailPageRequests, 1)
// 检查缓存
if cached, ok := detailCache.Load(articleID); ok {
atomic.AddInt64(&cacheHits, 1)
return cached.([]model.Link)
}
atomic.AddInt64(&cacheMisses, 1)
// 创建带超时的上下文
ctx, cancel := context.WithTimeout(context.Background(), DetailTimeout)
defer cancel()
// 创建请求
req, err := http.NewRequestWithContext(ctx, "GET", detailURL, nil)
if err != nil {
fmt.Printf("[%s] 创建详情页请求失败: %v\n", p.Name(), err)
return nil
}
// 设置请求头
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
req.Header.Set("Referer", "https://www.ahhhhfs.com/")
// 发送请求
resp, err := client.Do(req)
if err != nil {
fmt.Printf("[%s] 详情页请求失败: %v\n", p.Name(), err)
return nil
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
fmt.Printf("[%s] 详情页返回状态码: %d\n", p.Name(), resp.StatusCode)
return nil
}
// 解析详情页
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
fmt.Printf("[%s] 解析详情页失败: %v\n", p.Name(), err)
return nil
}
// 提取网盘链接
links := p.extractNetDiskLinks(doc)
// 缓存结果
if len(links) > 0 {
detailCache.Store(articleID, links)
}
return links
}
// extractNetDiskLinks 从详情页提取网盘链接
func (p *AhhhhfsAsyncPlugin) extractNetDiskLinks(doc *goquery.Document) []model.Link {
var links []model.Link
linkMap := make(map[string]model.Link) // 用于去重
// 在文章内容中查找所有链接
doc.Find(".post-content a").Each(func(i int, s *goquery.Selection) {
href, exists := s.Attr("href")
if !exists || href == "" {
return
}
// 判断是否为网盘链接
cloudType := p.determineCloudType(href)
if cloudType == "others" {
return
}
// 提取提取码
password := p.extractPassword(s, href)
// 添加到结果(去重)
if _, exists := linkMap[href]; !exists {
link := model.Link{
Type: cloudType,
URL: href,
Password: password,
}
linkMap[href] = link
links = append(links, link)
}
})
return links
}
// determineCloudType 判断链接类型
func (p *AhhhhfsAsyncPlugin) determineCloudType(url string) string {
switch {
case strings.Contains(url, "pan.quark.cn"):
return "quark"
case strings.Contains(url, "drive.uc.cn"):
return "uc"
case strings.Contains(url, "pan.baidu.com"):
return "baidu"
case strings.Contains(url, "aliyundrive.com") || strings.Contains(url, "alipan.com"):
return "aliyun"
case strings.Contains(url, "pan.xunlei.com"):
return "xunlei"
case strings.Contains(url, "cloud.189.cn"):
return "tianyi"
case strings.Contains(url, "115.com"):
return "115"
case strings.Contains(url, "123pan.com"):
return "123"
case strings.Contains(url, "mypikpak.com"):
return "pikpak"
default:
return "others"
}
}
// extractPassword 提取提取码
func (p *AhhhhfsAsyncPlugin) extractPassword(linkElem *goquery.Selection, url string) string {
// 1. 从链接的 title 属性中提取
if title, exists := linkElem.Attr("title"); exists {
for _, pattern := range pwdPatterns {
if matches := pattern.FindStringSubmatch(title); len(matches) >= 2 {
return matches[1]
}
}
}
// 2. 从链接文本中提取
linkText := linkElem.Text()
for _, pattern := range pwdPatterns {
if matches := pattern.FindStringSubmatch(linkText); len(matches) >= 2 {
return matches[1]
}
}
// 3. 从链接后面的兄弟节点或父节点的文本中提取
parent := linkElem.Parent()
parentText := parent.Text()
// 获取链接在父元素文本中的位置
linkIndex := strings.Index(parentText, linkText)
if linkIndex >= 0 {
// 获取链接后面的文本
afterText := parentText[linkIndex+len(linkText):]
for _, pattern := range pwdPatterns {
if matches := pattern.FindStringSubmatch(afterText); len(matches) >= 2 {
return matches[1]
}
}
}
// 4. 从 URL 参数中提取
if strings.Contains(url, "pwd=") {
parts := strings.Split(url, "pwd=")
if len(parts) >= 2 {
pwd := parts[1]
// 只取密码部分(去除其他参数)
if idx := strings.IndexAny(pwd, "&?#"); idx >= 0 {
pwd = pwd[:idx]
}
return pwd
}
}
return ""
}
// doRequestWithRetry 带重试机制的HTTP请求
func (p *AhhhhfsAsyncPlugin) doRequestWithRetry(req *http.Request, client *http.Client) (*http.Response, error) {
maxRetries := 3
var lastErr error
for i := 0; i < maxRetries; i++ {
if i > 0 {
// 指数退避重试
backoff := time.Duration(1<<uint(i-1)) * 200 * time.Millisecond
time.Sleep(backoff)
}
// 克隆请求避免并发问题
reqClone := req.Clone(req.Context())
resp, err := client.Do(reqClone)
if err == nil && resp.StatusCode == 200 {
return resp, nil
}
if resp != nil {
resp.Body.Close()
}
lastErr = err
}
return nil, fmt.Errorf("重试 %d 次后仍然失败: %w", maxRetries, lastErr)
}