新增插件ahhhhfs

2025-11-25 03:14:59 +08:00 · 2025-10-27 17:59:05 +08:00
parent f5bc0baa4c
commit 569931a8e8
12 changed files with 757 additions and 1 deletions
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ susu,thepiratebay,wanou,xuexizhinan,panyq,zhizhen,labi,muou,ouge,shandian,
 duoduo,huban,cyg,erxiao,miaoso,fox4k,pianku,clmao,wuji,cldi,xiaozhang,
 libvio,leijing,xb6v,xys,ddys,hdmoli,yuhuage,u3c3,javdb,clxiong,jutoushe,
 sdso,xiaoji,xdyh,haisou,bixin,djgou,nyaa,xinjuc,aikanzy,qupanshe,xdpan,
-discourse,yunsou
+discourse,yunsou,ahhhhfs
 </pre>
 </details>

--- a/cache/shard_13/b84edcde43dc50163b0bdca4d4877599
+++ b/cache/shard_13/b84edcde43dc50163b0bdca4d4877599
--- a/cache/shard_13/b84edcde43dc50163b0bdca4d4877599.meta
+++ b/cache/shard_13/b84edcde43dc50163b0bdca4d4877599.meta
@@ -0,0 +1 @@
+{"key":"86dfe1d575e2529c0fcc682e5ce6ac18","expiry":"2025-10-27T18:57:36.308013158+08:00","last_used":"2025-10-27T17:58:00.252958+08:00","size":271,"last_modified":"2025-10-27T17:58:00.252958+08:00"}
--- a/cache/shard_15/9f8f814bd1d2c580f5926791ee1daaaf
+++ b/cache/shard_15/9f8f814bd1d2c580f5926791ee1daaaf
--- a/cache/shard_15/9f8f814bd1d2c580f5926791ee1daaaf.meta
+++ b/cache/shard_15/9f8f814bd1d2c580f5926791ee1daaaf.meta
@@ -0,0 +1 @@
+{"key":"8a1ceb1a686b9eb4fb9a83b93166ab45","expiry":"2025-10-27T18:57:38.623512363+08:00","last_used":"2025-10-27T17:58:00.254162+08:00","size":3535,"last_modified":"2025-10-27T17:58:00.254162+08:00"}
--- a/cache/shard_4/3efad7600545961197b7d17e7a39eb92
+++ b/cache/shard_4/3efad7600545961197b7d17e7a39eb92
--- a/cache/shard_4/3efad7600545961197b7d17e7a39eb92.meta
+++ b/cache/shard_4/3efad7600545961197b7d17e7a39eb92.meta
@@ -0,0 +1 @@
+{"key":"b1bc7f0835474af246e9f96c1b8befff","expiry":"2025-10-27T18:57:15.116751458+08:00","last_used":"2025-10-27T17:58:00.249897+08:00","size":271,"last_modified":"2025-10-27T17:58:00.249897+08:00"}
--- a/cache/shard_9/468298d39de2c822a2c4f2b97ebe2305
+++ b/cache/shard_9/468298d39de2c822a2c4f2b97ebe2305
--- a/cache/shard_9/468298d39de2c822a2c4f2b97ebe2305.meta
+++ b/cache/shard_9/468298d39de2c822a2c4f2b97ebe2305.meta
@@ -0,0 +1 @@
+{"key":"b19c2520964407945e4e9e3576b97577","expiry":"2025-10-27T18:57:15.118728017+08:00","last_used":"2025-10-27T17:58:00.251944+08:00","size":4310,"last_modified":"2025-10-27T17:58:00.251944+08:00"}
--- a/main.go
+++ b/main.go
@@ -78,6 +78,7 @@ import (
 	_ "pansou/plugin/xdpan"
 	_ "pansou/plugin/discourse"
 	_ "pansou/plugin/yunsou"
+	_ "pansou/plugin/ahhhhfs"
 )

 // 全局缓存写入管理器
--- a/plugin/ahhhhfs/ahhhhfs.go
+++ b/plugin/ahhhhfs/ahhhhfs.go
@@ -0,0 +1,537 @@
+package ahhhhfs
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"net/url"
+	"pansou/model"
+	"pansou/plugin"
+	"regexp"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/PuerkitoBio/goquery"
+)
+
+// 预编译的正则表达式
+var (
+	// 从详情页URL中提取文章ID的正则表达式
+	articleIDRegex = regexp.MustCompile(`/(\d+)/?$`)
+	
+	// 常见网盘链接的正则表达式
+	quarkLinkRegex  = regexp.MustCompile(`https?://pan\.quark\.cn/s/[0-9a-zA-Z]+`)
+	baiduLinkRegex  = regexp.MustCompile(`https?://pan\.baidu\.com/s/[0-9a-zA-Z_\-]+`)
+	aliyunLinkRegex = regexp.MustCompile(`https?://(www\.)?(aliyundrive\.com|alipan\.com)/s/[0-9a-zA-Z]+`)
+	ucLinkRegex     = regexp.MustCompile(`https?://drive\.uc\.cn/s/[0-9a-zA-Z]+`)
+	xunleiLinkRegex = regexp.MustCompile(`https?://pan\.xunlei\.com/s/[0-9a-zA-Z_\-]+`)
+	tianyiLinkRegex = regexp.MustCompile(`https?://cloud\.189\.cn/(t|web)/[0-9a-zA-Z]+`)
+	link115Regex    = regexp.MustCompile(`https?://115\.com/s/[0-9a-zA-Z]+`)
+	link123Regex    = regexp.MustCompile(`https?://123pan\.com/s/[0-9a-zA-Z]+`)
+	pikpakLinkRegex = regexp.MustCompile(`https?://mypikpak\.com/s/[0-9a-zA-Z]+`)
+	
+	// 提取码匹配模式
+	pwdPatterns = []*regexp.Regexp{
+		regexp.MustCompile(`提取码[：:]\s*([0-9a-zA-Z]+)`),
+		regexp.MustCompile(`密码[：:]\s*([0-9a-zA-Z]+)`),
+		regexp.MustCompile(`pwd[=:：]\s*([0-9a-zA-Z]+)`),
+		regexp.MustCompile(`code[=:：]\s*([0-9a-zA-Z]+)`),
+	}
+	
+	// 缓存相关
+	detailCache     = sync.Map{} // 缓存详情页解析结果
+	lastCleanupTime = time.Now()
+	cacheTTL        = 1 * time.Hour
+)
+
+const (
+	// 插件名称
+	pluginName = "ahhhhfs"
+	
+	// 优先级
+	defaultPriority = 2
+	
+	// 超时时间
+	DefaultTimeout = 10 * time.Second
+	DetailTimeout  = 8 * time.Second
+	
+	// 并发数限制
+	MaxConcurrency = 15
+	
+	// HTTP连接池配置
+	MaxIdleConns        = 100
+	MaxIdleConnsPerHost = 30
+	MaxConnsPerHost     = 50
+	IdleConnTimeout     = 90 * time.Second
+)
+
+// 性能统计
+var (
+	searchRequests     int64 = 0
+	detailPageRequests int64 = 0
+	cacheHits          int64 = 0
+	cacheMisses        int64 = 0
+)
+
+// AhhhhfsAsyncPlugin ahhhhfs异步插件
+type AhhhhfsAsyncPlugin struct {
+	*plugin.BaseAsyncPlugin
+	optimizedClient *http.Client
+}
+
+// 在init函数中注册插件
+func init() {
+	plugin.RegisterGlobalPlugin(NewAhhhhfsPlugin())
+	
+	// 启动缓存清理goroutine
+	go startCacheCleaner()
+}
+
+// startCacheCleaner 启动一个定期清理缓存的goroutine
+func startCacheCleaner() {
+	ticker := time.NewTicker(30 * time.Minute)
+	defer ticker.Stop()
+	
+	for range ticker.C {
+		// 清空所有缓存
+		detailCache = sync.Map{}
+		lastCleanupTime = time.Now()
+	}
+}
+
+// createOptimizedHTTPClient 创建优化的HTTP客户端
+func createOptimizedHTTPClient() *http.Client {
+	transport := &http.Transport{
+		MaxIdleConns:        MaxIdleConns,
+		MaxIdleConnsPerHost: MaxIdleConnsPerHost,
+		MaxConnsPerHost:     MaxConnsPerHost,
+		IdleConnTimeout:     IdleConnTimeout,
+		DisableKeepAlives:   false,
+	}
+
+	return &http.Client{
+		Transport: transport,
+		Timeout:   DefaultTimeout,
+	}
+}
+
+// NewAhhhhfsPlugin 创建新的ahhhhfs异步插件
+func NewAhhhhfsPlugin() *AhhhhfsAsyncPlugin {
+	return &AhhhhfsAsyncPlugin{
+		BaseAsyncPlugin: plugin.NewBaseAsyncPlugin(pluginName, defaultPriority),
+		optimizedClient: createOptimizedHTTPClient(),
+	}
+}
+
+// Search 执行搜索并返回结果（兼容性方法）
+func (p *AhhhhfsAsyncPlugin) Search(keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
+	result, err := p.SearchWithResult(keyword, ext)
+	if err != nil {
+		return nil, err
+	}
+	return result.Results, nil
+}
+
+// SearchWithResult 执行搜索并返回包含IsFinal标记的结果
+func (p *AhhhhfsAsyncPlugin) SearchWithResult(keyword string, ext map[string]interface{}) (model.PluginSearchResult, error) {
+	return p.AsyncSearchWithResult(keyword, p.searchImpl, p.MainCacheKey, ext)
+}
+
+// searchImpl 实现具体的搜索逻辑
+func (p *AhhhhfsAsyncPlugin) searchImpl(client *http.Client, keyword string, ext map[string]interface{}) ([]model.SearchResult, error) {
+	// 性能统计
+	start := time.Now()
+	atomic.AddInt64(&searchRequests, 1)
+	defer func() {
+		fmt.Printf("[%s] 搜索耗时: %v\n", p.Name(), time.Since(start))
+	}()
+
+	// 使用优化的客户端
+	if p.optimizedClient != nil {
+		client = p.optimizedClient
+	}
+
+	// 1. 构建搜索URL
+	searchURL := fmt.Sprintf("https://www.ahhhhfs.com/search/%s", url.QueryEscape(keyword))
+	
+	// 2. 创建带超时的上下文
+	ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout)
+	defer cancel()
+	
+	// 3. 创建请求
+	req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil)
+	if err != nil {
+		return nil, fmt.Errorf("[%s] 创建请求失败: %w", p.Name(), err)
+	}
+	
+	// 4. 设置完整的请求头（避免反爬虫）
+	req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
+	req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
+	req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
+	req.Header.Set("Connection", "keep-alive")
+	req.Header.Set("Upgrade-Insecure-Requests", "1")
+	req.Header.Set("Cache-Control", "max-age=0")
+	req.Header.Set("Referer", "https://www.ahhhhfs.com/")
+	
+	// 5. 发送请求（带重试机制）
+	resp, err := p.doRequestWithRetry(req, client)
+	if err != nil {
+		return nil, fmt.Errorf("[%s] 搜索请求失败: %w", p.Name(), err)
+	}
+	defer resp.Body.Close()
+	
+	if resp.StatusCode != 200 {
+		return nil, fmt.Errorf("[%s] 搜索请求返回状态码: %d", p.Name(), resp.StatusCode)
+	}
+	
+	// 6. 解析搜索结果页面
+	doc, err := goquery.NewDocumentFromReader(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("[%s] 解析搜索页面失败: %w", p.Name(), err)
+	}
+	
+	// 7. 提取搜索结果
+	var results []model.SearchResult
+	var wg sync.WaitGroup
+	var mu sync.Mutex
+	semaphore := make(chan struct{}, MaxConcurrency)
+	
+	doc.Find("article.post-item.item-list").Each(func(i int, s *goquery.Selection) {
+		// 解析基本信息
+		titleElem := s.Find(".entry-title a")
+		title := strings.TrimSpace(titleElem.Text())
+		if title == "" {
+			title = strings.TrimSpace(titleElem.AttrOr("title", ""))
+		}
+		
+		detailURL, exists := titleElem.Attr("href")
+		if !exists || detailURL == "" || title == "" {
+			return
+		}
+		
+		// 提取文章ID
+		articleID := p.extractArticleID(detailURL)
+		if articleID == "" {
+			return
+		}
+		
+		// 提取分类标签
+		var tags []string
+		s.Find(".entry-cat-dot a").Each(func(j int, tag *goquery.Selection) {
+			tagText := strings.TrimSpace(tag.Text())
+			if tagText != "" {
+				tags = append(tags, tagText)
+			}
+		})
+		
+		// 提取描述
+		content := strings.TrimSpace(s.Find(".entry-desc").Text())
+		
+		// 提取时间
+		datetime := ""
+		timeElem := s.Find(".entry-meta .meta-date time")
+		if dt, exists := timeElem.Attr("datetime"); exists {
+			datetime = dt
+		} else {
+			datetime = strings.TrimSpace(timeElem.Text())
+		}
+		
+		// 解析时间
+		publishTime := p.parseDateTime(datetime)
+		
+		// 异步获取详情页的网盘链接
+		wg.Add(1)
+		semaphore <- struct{}{} // 获取信号量
+		
+		go func(title, detailURL, articleID, content string, tags []string, publishTime time.Time) {
+			defer wg.Done()
+			defer func() { <-semaphore }() // 释放信号量
+			
+			// 获取网盘链接
+			links := p.fetchDetailLinks(client, detailURL, articleID)
+			
+			if len(links) > 0 {
+				result := model.SearchResult{
+					UniqueID: fmt.Sprintf("%s-%s", p.Name(), articleID),
+					Title:    title,
+					Content:  content,
+					Links:    links,
+					Tags:     tags,
+					Channel:  "", // 插件搜索结果 Channel 必须为空
+					Datetime: publishTime,
+				}
+				
+				mu.Lock()
+				results = append(results, result)
+				mu.Unlock()
+			}
+		}(title, detailURL, articleID, content, tags, publishTime)
+	})
+	
+	// 等待所有详情页请求完成
+	wg.Wait()
+	
+	fmt.Printf("[%s] 搜索结果: %d 条\n", p.Name(), len(results))
+	
+	// 关键词过滤
+	return plugin.FilterResultsByKeyword(results, keyword), nil
+}
+
+// extractArticleID 从URL中提取文章ID
+func (p *AhhhhfsAsyncPlugin) extractArticleID(detailURL string) string {
+	matches := articleIDRegex.FindStringSubmatch(detailURL)
+	if len(matches) >= 2 {
+		return matches[1]
+	}
+	return ""
+}
+
+// parseDateTime 解析时间字符串
+func (p *AhhhhfsAsyncPlugin) parseDateTime(datetime string) time.Time {
+	datetime = strings.TrimSpace(datetime)
+	
+	// 尝试解析 ISO 格式
+	if t, err := time.Parse(time.RFC3339, datetime); err == nil {
+		return t
+	}
+	
+	// 尝试解析标准日期格式
+	layouts := []string{
+		"2006-01-02",
+		"2006-01-02 15:04:05",
+		"2006-01-02T15:04:05",
+		"2006-01-02T15:04:05Z07:00",
+	}
+	
+	for _, layout := range layouts {
+		if t, err := time.Parse(layout, datetime); err == nil {
+			return t
+		}
+	}
+	
+	// 处理相对时间（如"1 周前"、"2 天前"）
+	now := time.Now()
+	
+	if strings.Contains(datetime, "小时前") || strings.Contains(datetime, "hours ago") {
+		// 简单处理，返回当天
+		return now
+	}
+	
+	if strings.Contains(datetime, "天前") || strings.Contains(datetime, "days ago") {
+		// 简单处理，返回近期
+		return now.AddDate(0, 0, -7)
+	}
+	
+	if strings.Contains(datetime, "周前") || strings.Contains(datetime, "weeks ago") {
+		// 简单处理，返回一个月前
+		return now.AddDate(0, -1, 0)
+	}
+	
+	// 默认返回当前时间
+	return now
+}
+
+// fetchDetailLinks 获取详情页的网盘链接
+func (p *AhhhhfsAsyncPlugin) fetchDetailLinks(client *http.Client, detailURL, articleID string) []model.Link {
+	atomic.AddInt64(&detailPageRequests, 1)
+	
+	// 检查缓存
+	if cached, ok := detailCache.Load(articleID); ok {
+		atomic.AddInt64(&cacheHits, 1)
+		return cached.([]model.Link)
+	}
+	
+	atomic.AddInt64(&cacheMisses, 1)
+	
+	// 创建带超时的上下文
+	ctx, cancel := context.WithTimeout(context.Background(), DetailTimeout)
+	defer cancel()
+	
+	// 创建请求
+	req, err := http.NewRequestWithContext(ctx, "GET", detailURL, nil)
+	if err != nil {
+		fmt.Printf("[%s] 创建详情页请求失败: %v\n", p.Name(), err)
+		return nil
+	}
+	
+	// 设置请求头
+	req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
+	req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
+	req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
+	req.Header.Set("Referer", "https://www.ahhhhfs.com/")
+	
+	// 发送请求
+	resp, err := client.Do(req)
+	if err != nil {
+		fmt.Printf("[%s] 详情页请求失败: %v\n", p.Name(), err)
+		return nil
+	}
+	defer resp.Body.Close()
+	
+	if resp.StatusCode != 200 {
+		fmt.Printf("[%s] 详情页返回状态码: %d\n", p.Name(), resp.StatusCode)
+		return nil
+	}
+	
+	// 解析详情页
+	doc, err := goquery.NewDocumentFromReader(resp.Body)
+	if err != nil {
+		fmt.Printf("[%s] 解析详情页失败: %v\n", p.Name(), err)
+		return nil
+	}
+	
+	// 提取网盘链接
+	links := p.extractNetDiskLinks(doc)
+	
+	// 缓存结果
+	if len(links) > 0 {
+		detailCache.Store(articleID, links)
+	}
+	
+	return links
+}
+
+// extractNetDiskLinks 从详情页提取网盘链接
+func (p *AhhhhfsAsyncPlugin) extractNetDiskLinks(doc *goquery.Document) []model.Link {
+	var links []model.Link
+	linkMap := make(map[string]model.Link) // 用于去重
+	
+	// 在文章内容中查找所有链接
+	doc.Find(".post-content a").Each(func(i int, s *goquery.Selection) {
+		href, exists := s.Attr("href")
+		if !exists || href == "" {
+			return
+		}
+		
+		// 判断是否为网盘链接
+		cloudType := p.determineCloudType(href)
+		if cloudType == "others" {
+			return
+		}
+		
+		// 提取提取码
+		password := p.extractPassword(s, href)
+		
+		// 添加到结果（去重）
+		if _, exists := linkMap[href]; !exists {
+			link := model.Link{
+				Type:     cloudType,
+				URL:      href,
+				Password: password,
+			}
+			linkMap[href] = link
+			links = append(links, link)
+		}
+	})
+	
+	return links
+}
+
+// determineCloudType 判断链接类型
+func (p *AhhhhfsAsyncPlugin) determineCloudType(url string) string {
+	switch {
+	case strings.Contains(url, "pan.quark.cn"):
+		return "quark"
+	case strings.Contains(url, "drive.uc.cn"):
+		return "uc"
+	case strings.Contains(url, "pan.baidu.com"):
+		return "baidu"
+	case strings.Contains(url, "aliyundrive.com") || strings.Contains(url, "alipan.com"):
+		return "aliyun"
+	case strings.Contains(url, "pan.xunlei.com"):
+		return "xunlei"
+	case strings.Contains(url, "cloud.189.cn"):
+		return "tianyi"
+	case strings.Contains(url, "115.com"):
+		return "115"
+	case strings.Contains(url, "123pan.com"):
+		return "123"
+	case strings.Contains(url, "mypikpak.com"):
+		return "pikpak"
+	default:
+		return "others"
+	}
+}
+
+// extractPassword 提取提取码
+func (p *AhhhhfsAsyncPlugin) extractPassword(linkElem *goquery.Selection, url string) string {
+	// 1. 从链接的 title 属性中提取
+	if title, exists := linkElem.Attr("title"); exists {
+		for _, pattern := range pwdPatterns {
+			if matches := pattern.FindStringSubmatch(title); len(matches) >= 2 {
+				return matches[1]
+			}
+		}
+	}
+	
+	// 2. 从链接文本中提取
+	linkText := linkElem.Text()
+	for _, pattern := range pwdPatterns {
+		if matches := pattern.FindStringSubmatch(linkText); len(matches) >= 2 {
+			return matches[1]
+		}
+	}
+	
+	// 3. 从链接后面的兄弟节点或父节点的文本中提取
+	parent := linkElem.Parent()
+	parentText := parent.Text()
+	
+	// 获取链接在父元素文本中的位置
+	linkIndex := strings.Index(parentText, linkText)
+	if linkIndex >= 0 {
+		// 获取链接后面的文本
+		afterText := parentText[linkIndex+len(linkText):]
+		for _, pattern := range pwdPatterns {
+			if matches := pattern.FindStringSubmatch(afterText); len(matches) >= 2 {
+				return matches[1]
+			}
+		}
+	}
+	
+	// 4. 从 URL 参数中提取
+	if strings.Contains(url, "pwd=") {
+		parts := strings.Split(url, "pwd=")
+		if len(parts) >= 2 {
+			pwd := parts[1]
+			// 只取密码部分（去除其他参数）
+			if idx := strings.IndexAny(pwd, "&?#"); idx >= 0 {
+				pwd = pwd[:idx]
+			}
+			return pwd
+		}
+	}
+	
+	return ""
+}
+
+// doRequestWithRetry 带重试机制的HTTP请求
+func (p *AhhhhfsAsyncPlugin) doRequestWithRetry(req *http.Request, client *http.Client) (*http.Response, error) {
+	maxRetries := 3
+	var lastErr error
+	
+	for i := 0; i < maxRetries; i++ {
+		if i > 0 {
+			// 指数退避重试
+			backoff := time.Duration(1<<uint(i-1)) * 200 * time.Millisecond
+			time.Sleep(backoff)
+		}
+		
+		// 克隆请求避免并发问题
+		reqClone := req.Clone(req.Context())
+		
+		resp, err := client.Do(reqClone)
+		if err == nil && resp.StatusCode == 200 {
+			return resp, nil
+		}
+		
+		if resp != nil {
+			resp.Body.Close()
+		}
+		lastErr = err
+	}
+	
+	return nil, fmt.Errorf("重试 %d 次后仍然失败: %w", maxRetries, lastErr)
+}
+
--- a/plugin/ahhhhfs/html结构分析.md
+++ b/plugin/ahhhhfs/html结构分析.md
@@ -0,0 +1,214 @@
+# ahhhhfs (A姐分享) HTML结构分析
+
+## 网站信息
+- **网站名称**: ahhhhfs (A姐分享)
+- **域名**: www.ahhhhfs.com
+- **类型**: 资源分享网站（WordPress 站点）
+- **特点**: 分享各类学习资源、软件、教程等
+
+## 搜索页面结构
+
+### 1. 搜索URL模式
+```
+https://www.ahhhhfs.com/search/{关键词}
+或
+https://www.ahhhhfs.com/?s={关键词}
+
+示例:
+https://www.ahhhhfs.com/search/小红书
+https://www.ahhhhfs.com/?s=小红书
+
+参数说明:
+- 关键词: 直接使用中文或URL编码都可以
+```
+
+### 2. 搜索结果容器
+- **父容器**: `.row` (结果列表容器)
+- **结果项**: `<article class="post-item item-list">` (每个搜索结果)
+
+### 3. 单个搜索结果结构
+
+#### 标题区域 (.entry-title)
+```html
+<h2 class="entry-title">
+    <a target="_blank" href="https://www.ahhhhfs.com/76567/" 
+       title="AI小红书虚拟电商全链路实战课：从选品到变现的AI爆款打法">
+        AI小红书虚拟电商全链路实战课：从选品到变现的AI爆款打法
+    </a>
+</h2>
+
+提取要素:
+- 标题: a 的文本内容或 title 属性
+- 详情页链接: a 的 href 属性
+```
+
+#### 分类标签 (.entry-cat-dot)
+```html
+<div class="entry-cat-dot">
+    <a href="https://www.ahhhhfs.com/recourse/%e7%9f%ad%e8%a7%86%e9%a2%91/">短视频</a>
+    <a href="https://www.ahhhhfs.com/recourse/">资源</a>
+</div>
+
+提取要素:
+- 分类: 所有 a 标签的文本内容
+```
+
+#### 描述区域 (.entry-desc)
+```html
+<div class="entry-desc">
+    AI小红书虚拟电商全链路实战课程概览 《AI小红书虚拟电商5.0实战课》是一门聚焦AI与小红书生态融合的系统课程，围绕AI赋能选品、创作、运营与变现四大环节展开...
+</div>
+
+提取要素:
+- 描述: div 的文本内容
+```
+
+#### 元数据栏 (.entry-meta)
+```html
+<div class="entry-meta">
+    <span class="meta-date">
+        <i class="far fa-clock me-1"></i>
+        <time class="pub-date" datetime="2025-10-18T13:43:10+08:00">1 周前</time>
+    </span>
+    <span class="meta-likes d-none d-md-inline-block"><i class="far fa-heart me-1"></i>0</span>
+    <span class="meta-fav d-none d-md-inline-block"><i class="far fa-star me-1"></i>1</span>
+</div>
+
+提取要素:
+- 发布时间: time 标签的 datetime 属性或文本内容
+```
+
+## 详情页面结构
+
+### 1. 详情页URL模式
+```
+https://www.ahhhhfs.com/{文章ID}/
+
+示例:
+https://www.ahhhhfs.com/76567/
+```
+
+### 2. 下载链接位置
+下载链接在文章正文内容中 `.post-content` 里面，通常在文章末尾部分。
+
+#### 下载链接格式示例
+```html
+<p>
+    学习地址：
+    <a title="..." 
+       href="https://pan.quark.cn/s/c16a5ae18ea0" 
+       target="_blank" 
+       rel="nofollow noopener noreferrer">夸克</a>
+</p>
+
+或者
+
+<p>
+    下载地址：
+    <a href="https://pan.baidu.com/s/xxxxx" 
+       target="_blank" 
+       rel="nofollow noopener noreferrer">百度网盘</a>
+    提取码: xxxx
+</p>
+
+或者多个网盘链接：
+<p>
+    阿里云盘：<a href="...">链接</a><br>
+    夸克网盘：<a href="...">链接</a><br>
+    百度网盘：<a href="...">链接</a> 提取码: xxxx
+</p>
+
+提取要素:
+- 网盘链接: .post-content 中包含网盘域名的 a 标签的 href 属性
+- 提取码/密码: 链接附近的文本内容，可能包含 "提取码"、"密码"、"pwd" 等关键词
+```
+
+## CSS选择器总结
+
+| 数据项 | CSS选择器 | 提取方式 |
+|--------|-----------|----------|
+| 搜索结果列表 | `article.post-item.item-list` | 遍历所有结果项 |
+| 标题 | `.entry-title a` | 文本内容或 title 属性 |
+| 详情页链接 | `.entry-title a` | href 属性 |
+| 分类标签 | `.entry-cat-dot a` | 所有 a 标签的文本内容 |
+| 描述 | `.entry-desc` | 文本内容 |
+| 发布时间 | `.entry-meta .meta-date time` | datetime 属性或文本内容 |
+| 文章内容 | `.post-content` | HTML 内容 |
+| 网盘链接 | `.post-content a[href*="pan"]` 或匹配网盘域名 | href 属性 |
+
+## 实现要点
+
+### 1. 支持的网盘类型
+- 夸克网盘: `pan.quark.cn`
+- 阿里云盘: `aliyundrive.com`, `alipan.com`
+- 百度网盘: `pan.baidu.com`
+- UC网盘: `drive.uc.cn`
+- 迅雷网盘: `pan.xunlei.com`
+- 天翼云盘: `cloud.189.cn`
+- 115网盘: `115.com`
+- 123网盘: `123pan.com`
+
+### 2. 提取码识别
+提取码可能出现在以下位置：
+- 链接后面的文本: `提取码: xxxx` 或 `密码: xxxx`
+- 链接的 title 属性中
+- `<br>` 标签分隔的下一行
+- 括号内: `(提取码: xxxx)`
+
+常见关键词：
+- 提取码
+- 密码
+- pwd
+- code
+- 取码
+
+### 3. 链接提取策略
+1. 先从搜索结果页获取文章列表
+2. 访问每篇文章的详情页
+3. 在详情页的 `.post-content` 中查找包含网盘域名的链接
+4. 提取链接和相应的提取码
+5. 如果文章没有网盘链接，则跳过
+
+### 4. 时间格式处理
+- 相对时间: "1 周前"、"2 天前" 需要转换为具体日期
+- 绝对时间: "2025-10-18" 可以直接使用
+- datetime 属性: "2025-10-18T13:43:10+08:00" 标准ISO格式
+
+### 5. 去重标识
+- 使用文章ID作为唯一标识: 从详情页URL中提取 `/76567/`
+
+## 注意事项
+
+1. **搜索结果可能为空**: 如果关键词没有匹配结果，页面会显示"没有找到相关内容"
+2. **分页**: 搜索结果可能有多页，但通常只抓取第一页即可
+3. **网盘链接位置不固定**: 链接可能在文章开头、中间或结尾，需要遍历整个 `.post-content`
+4. **广告干扰**: 页面包含广告，需要准确定位到实际内容区域
+5. **需要访问详情页**: 搜索结果页不包含下载链接，必须访问详情页才能获取
+6. **请求频率**: 需要访问详情页，建议控制请求频率避免被封
+
+## 示例数据流
+
+```
+1. 搜索请求: https://www.ahhhhfs.com/search/小红书
+   ↓
+2. 解析搜索结果页，提取文章列表
+   - 标题: "AI小红书虚拟电商全链路实战课：从选品到变现的AI爆款打法"
+   - 详情页URL: https://www.ahhhhfs.com/76567/
+   - 分类: ["短视频", "资源"]
+   - 发布时间: 2025-10-18
+   ↓
+3. 访问详情页: https://www.ahhhhfs.com/76567/
+   ↓
+4. 解析详情页 .post-content，提取网盘链接
+   - 夸克网盘: https://pan.quark.cn/s/c16a5ae18ea0
+   - 提取码: (如果有)
+   ↓
+5. 构建最终结果
+   - UniqueID: ahhhhfs-76567
+   - Title: "AI小红书虚拟电商全链路实战课：从选品到变现的AI爆款打法"
+   - Content: 文章描述
+   - Links: [{Type: "quark", URL: "...", Password: ""}]
+   - Tags: ["短视频", "资源"]
+   - Datetime: 2025-10-18T13:43:10+08:00
+```
+
				`@@ -0,0 +1 @@`
				`{"key":"86dfe1d575e2529c0fcc682e5ce6ac18","expiry":"2025-10-27T18:57:36.308013158+08:00","last_used":"2025-10-27T17:58:00.252958+08:00","size":271,"last_modified":"2025-10-27T17:58:00.252958+08:00"}`
				`@@ -0,0 +1 @@`
				`{"key":"8a1ceb1a686b9eb4fb9a83b93166ab45","expiry":"2025-10-27T18:57:38.623512363+08:00","last_used":"2025-10-27T17:58:00.254162+08:00","size":3535,"last_modified":"2025-10-27T17:58:00.254162+08:00"}`
				`@@ -0,0 +1 @@`
				`{"key":"b1bc7f0835474af246e9f96c1b8befff","expiry":"2025-10-27T18:57:15.116751458+08:00","last_used":"2025-10-27T17:58:00.249897+08:00","size":271,"last_modified":"2025-10-27T17:58:00.249897+08:00"}`
				`@@ -0,0 +1 @@`
				`{"key":"b19c2520964407945e4e9e3576b97577","expiry":"2025-10-27T18:57:15.118728017+08:00","last_used":"2025-10-27T17:58:00.251944+08:00","size":4310,"last_modified":"2025-10-27T17:58:00.251944+08:00"}`