diff --git a/main.go b/main.go index 617171b..d68d78f 100644 --- a/main.go +++ b/main.go @@ -27,49 +27,51 @@ import ( // 添加新插件时,只需在此处添加对应的导入语句即可 // _ "pansou/plugin/hdr4k" // _ "pansou/plugin/pan666" - _ "pansou/plugin/hunhepan" - _ "pansou/plugin/jikepan" - _ "pansou/plugin/panwiki" - _ "pansou/plugin/pansearch" - _ "pansou/plugin/panta" - _ "pansou/plugin/qupansou" - _ "pansou/plugin/susu" - _ "pansou/plugin/thepiratebay" - _ "pansou/plugin/wanou" - _ "pansou/plugin/xuexizhinan" - _ "pansou/plugin/panyq" - _ "pansou/plugin/zhizhen" - _ "pansou/plugin/labi" - _ "pansou/plugin/muou" - _ "pansou/plugin/ouge" - _ "pansou/plugin/shandian" - _ "pansou/plugin/duoduo" - _ "pansou/plugin/huban" - _ "pansou/plugin/cyg" - _ "pansou/plugin/erxiao" - _ "pansou/plugin/miaoso" - _ "pansou/plugin/fox4k" - _ "pansou/plugin/pianku" - _ "pansou/plugin/clmao" - _ "pansou/plugin/wuji" - _ "pansou/plugin/cldi" - _ "pansou/plugin/xiaozhang" - _ "pansou/plugin/libvio" - _ "pansou/plugin/leijing" - _ "pansou/plugin/xb6v" - _ "pansou/plugin/xys" - _ "pansou/plugin/ddys" - _ "pansou/plugin/hdmoli" - _ "pansou/plugin/yuhuage" - _ "pansou/plugin/u3c3" - _ "pansou/plugin/javdb" - _ "pansou/plugin/clxiong" - _ "pansou/plugin/jutoushe" - _ "pansou/plugin/sdso" - _ "pansou/plugin/xiaoji" - _ "pansou/plugin/xdyh" - _ "pansou/plugin/haisou" - _ "pansou/plugin/bixin" + // _ "pansou/plugin/hunhepan" + // _ "pansou/plugin/jikepan" + // _ "pansou/plugin/panwiki" + // _ "pansou/plugin/pansearch" + // _ "pansou/plugin/panta" + // _ "pansou/plugin/qupansou" + // _ "pansou/plugin/susu" + // _ "pansou/plugin/thepiratebay" + // _ "pansou/plugin/wanou" + // _ "pansou/plugin/xuexizhinan" + // _ "pansou/plugin/panyq" + // _ "pansou/plugin/zhizhen" + // _ "pansou/plugin/labi" + // _ "pansou/plugin/muou" + // _ "pansou/plugin/ouge" + // _ "pansou/plugin/shandian" + // _ "pansou/plugin/duoduo" + // _ "pansou/plugin/huban" + // _ "pansou/plugin/cyg" + // _ "pansou/plugin/erxiao" + // _ "pansou/plugin/miaoso" + // _ "pansou/plugin/fox4k" + // _ "pansou/plugin/pianku" + // _ "pansou/plugin/clmao" + // _ "pansou/plugin/wuji" + // _ "pansou/plugin/cldi" + // _ "pansou/plugin/xiaozhang" + // _ "pansou/plugin/libvio" + // _ "pansou/plugin/leijing" + // _ "pansou/plugin/xb6v" + // _ "pansou/plugin/xys" + // _ "pansou/plugin/ddys" + // _ "pansou/plugin/hdmoli" + // _ "pansou/plugin/yuhuage" + // _ "pansou/plugin/u3c3" + // _ "pansou/plugin/javdb" + // _ "pansou/plugin/clxiong" + // _ "pansou/plugin/jutoushe" + // _ "pansou/plugin/sdso" + // _ "pansou/plugin/xiaoji" + // _ "pansou/plugin/xdyh" + // _ "pansou/plugin/haisou" + // _ "pansou/plugin/bixin" + _ "pansou/plugin/nyaa" + _ "pansou/plugin/djgou" ) // 全局缓存写入管理器 diff --git a/plugin/djgou/djgou.go b/plugin/djgou/djgou.go new file mode 100644 index 0000000..aa783d2 --- /dev/null +++ b/plugin/djgou/djgou.go @@ -0,0 +1,479 @@ +package djgou + +import ( + "context" + "fmt" + "net/http" + "net/url" + "pansou/model" + "pansou/plugin" + "regexp" + "strings" + "sync" + "time" + + "github.com/PuerkitoBio/goquery" +) + +// 预编译的正则表达式 +var ( + // 夸克网盘链接正则表达式(网站只有夸克网盘) + // 注意:夸克链接可能包含字母、数字、下划线、连字符等字符 + quarkLinkRegex = regexp.MustCompile(`https?://pan\.quark\.cn/s/[0-9a-zA-Z_\-]+`) + + // 提取码正则表达式 + pwdRegex = regexp.MustCompile(`提取码[::]\s*([a-zA-Z0-9]{4})`) + + // 缓存相关 + detailCache = sync.Map{} // 缓存详情页解析结果 + lastCleanupTime = time.Now() + cacheTTL = 1 * time.Hour +) + +const ( + // 超时时间 + DefaultTimeout = 8 * time.Second + DetailTimeout = 6 * time.Second + + // 并发数(精简后的代码使用较低的并发即可) + MaxConcurrency = 15 + + // HTTP连接池配置 + MaxIdleConns = 50 + MaxIdleConnsPerHost = 20 + MaxConnsPerHost = 30 + IdleConnTimeout = 90 * time.Second + + // 网站URL + SiteURL = "https://duanjugou.top" +) + +// 在init函数中注册插件 +func init() { + plugin.RegisterGlobalPlugin(NewDjgouPlugin()) + + // 启动缓存清理goroutine + go startCacheCleaner() +} + +// startCacheCleaner 启动一个定期清理缓存的goroutine +func startCacheCleaner() { + ticker := time.NewTicker(30 * time.Minute) + defer ticker.Stop() + + for range ticker.C { + // 清空所有缓存 + detailCache = sync.Map{} + lastCleanupTime = time.Now() + } +} + +// DjgouPlugin 短剧狗插件 +type DjgouPlugin struct { + *plugin.BaseAsyncPlugin + optimizedClient *http.Client +} + +// createOptimizedHTTPClient 创建优化的HTTP客户端 +func createOptimizedHTTPClient() *http.Client { + transport := &http.Transport{ + MaxIdleConns: MaxIdleConns, + MaxIdleConnsPerHost: MaxIdleConnsPerHost, + MaxConnsPerHost: MaxConnsPerHost, + IdleConnTimeout: IdleConnTimeout, + DisableKeepAlives: false, + } + return &http.Client{Transport: transport, Timeout: DefaultTimeout} +} + +// NewDjgouPlugin 创建新的短剧狗插件 +func NewDjgouPlugin() *DjgouPlugin { + return &DjgouPlugin{ + BaseAsyncPlugin: plugin.NewBaseAsyncPlugin("djgou", 2), // 优先级2:质量良好的数据源 + optimizedClient: createOptimizedHTTPClient(), + } +} + +// Search 执行搜索并返回结果(兼容性方法) +func (p *DjgouPlugin) Search(keyword string, ext map[string]interface{}) ([]model.SearchResult, error) { + result, err := p.SearchWithResult(keyword, ext) + if err != nil { + return nil, err + } + return result.Results, nil +} + +// SearchWithResult 执行搜索并返回包含IsFinal标记的结果 +func (p *DjgouPlugin) SearchWithResult(keyword string, ext map[string]interface{}) (model.PluginSearchResult, error) { + return p.AsyncSearchWithResult(keyword, p.searchImpl, p.MainCacheKey, ext) +} + +// searchImpl 实现具体的搜索逻辑 +func (p *DjgouPlugin) searchImpl(client *http.Client, keyword string, ext map[string]interface{}) ([]model.SearchResult, error) { + // 1. 构建搜索URL + searchURL := fmt.Sprintf("%s/search.php?q=%s&page=1", SiteURL, url.QueryEscape(keyword)) + + // 2. 创建带超时的上下文 + ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout) + defer cancel() + + // 3. 创建请求 + req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil) + if err != nil { + return nil, fmt.Errorf("[%s] 创建请求失败: %w", p.Name(), err) + } + + // 4. 设置完整的请求头(避免反爬虫) + req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36") + req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8") + req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") + req.Header.Set("Connection", "keep-alive") + req.Header.Set("Upgrade-Insecure-Requests", "1") + req.Header.Set("Cache-Control", "max-age=0") + req.Header.Set("Referer", SiteURL) + + // 5. 发送请求(带重试机制) + resp, err := p.doRequestWithRetry(req, client) + if err != nil { + return nil, fmt.Errorf("[%s] 搜索请求失败: %w", p.Name(), err) + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return nil, fmt.Errorf("[%s] 搜索请求返回状态码: %d", p.Name(), resp.StatusCode) + } + + // 6. 解析搜索结果页面 + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return nil, fmt.Errorf("[%s] 解析搜索页面失败: %w", p.Name(), err) + } + + // 7. 提取搜索结果 + var results []model.SearchResult + + // 查找主列表容器 + mainListSection := doc.Find("div.erx-list-box") + if mainListSection.Length() == 0 { + return nil, fmt.Errorf("[%s] 未找到erx-list-box容器", p.Name()) + } + + // 查找列表项 + items := mainListSection.Find("ul.erx-list li.item") + if items.Length() == 0 { + return []model.SearchResult{}, nil // 没有搜索结果 + } + + // 8. 解析每个搜索结果项 + items.Each(func(i int, s *goquery.Selection) { + result := p.parseSearchItem(s, keyword) + if result.UniqueID != "" { + results = append(results, result) + } + }) + + // 9. 异步获取详情页信息 + enhancedResults := p.enhanceWithDetails(client, results) + + // 10. 关键词过滤 + return plugin.FilterResultsByKeyword(enhancedResults, keyword), nil +} + +// parseSearchItem 解析单个搜索结果项 +func (p *DjgouPlugin) parseSearchItem(s *goquery.Selection, keyword string) model.SearchResult { + result := model.SearchResult{} + + // 提取标题区域 + aDiv := s.Find("div.a") + if aDiv.Length() == 0 { + return result + } + + // 提取链接和标题 + linkElem := aDiv.Find("a.main") + if linkElem.Length() == 0 { + return result + } + + title := strings.TrimSpace(linkElem.Text()) + link, exists := linkElem.Attr("href") + if !exists || link == "" { + return result + } + + // 处理相对路径 + if !strings.HasPrefix(link, "http") { + if strings.HasPrefix(link, "/") { + link = SiteURL + link + } else { + link = SiteURL + "/" + link + } + } + + // 提取时间 + timeText := "" + iDiv := s.Find("div.i") + if iDiv.Length() > 0 { + timeSpan := iDiv.Find("span.time") + if timeSpan.Length() > 0 { + timeText = strings.TrimSpace(timeSpan.Text()) + } + } + + // 生成唯一ID(使用链接的路径部分) + itemID := strings.TrimPrefix(link, SiteURL) + itemID = strings.Trim(itemID, "/") + result.UniqueID = fmt.Sprintf("%s-%s", p.Name(), url.QueryEscape(itemID)) + + result.Title = title + result.Datetime = p.parseTime(timeText) + result.Tags = []string{"短剧"} + result.Channel = "" // 插件搜索结果必须为空字符串 + + // 将详情页链接存储在Content中,后续获取详情 + result.Content = link + + return result +} + +// parseTime 解析时间字符串 +func (p *DjgouPlugin) parseTime(timeStr string) time.Time { + if timeStr == "" { + return time.Now() + } + + // 尝试多种时间格式 + formats := []string{ + "2006-01-02 15:04:05", + "2006-01-02 15:04", + "2006-01-02", + "2006/01/02 15:04:05", + "2006/01/02 15:04", + "2006/01/02", + } + + for _, format := range formats { + if t, err := time.Parse(format, timeStr); err == nil { + return t + } + } + + return time.Now() +} + +// enhanceWithDetails 异步获取详情页信息 +func (p *DjgouPlugin) enhanceWithDetails(client *http.Client, results []model.SearchResult) []model.SearchResult { + var wg sync.WaitGroup + var mu sync.Mutex + + // 使用信号量控制并发数 + semaphore := make(chan struct{}, MaxConcurrency) + + enhancedResults := make([]model.SearchResult, 0, len(results)) + + for _, result := range results { + wg.Add(1) + go func(r model.SearchResult) { + defer wg.Done() + + // 获取信号量 + semaphore <- struct{}{} + defer func() { <-semaphore }() + + // 从缓存或详情页获取链接 + links, content := p.getDetailInfo(client, r.Content) + + // 更新结果 + r.Links = links + r.Content = content + + // 只添加有链接的结果 + if len(links) > 0 { + mu.Lock() + enhancedResults = append(enhancedResults, r) + mu.Unlock() + } + }(result) + } + + wg.Wait() + return enhancedResults +} + +// getDetailInfo 获取详情页信息(带缓存) +func (p *DjgouPlugin) getDetailInfo(client *http.Client, detailURL string) ([]model.Link, string) { + // 检查缓存 + if cached, ok := detailCache.Load(detailURL); ok { + cachedData := cached.(DetailCacheData) + if time.Since(cachedData.Timestamp) < cacheTTL { + return cachedData.Links, cachedData.Content + } + } + + // 获取详情页 + links, content := p.fetchDetailPage(client, detailURL) + + // 存入缓存 + if len(links) > 0 { + detailCache.Store(detailURL, DetailCacheData{ + Links: links, + Content: content, + Timestamp: time.Now(), + }) + } + + return links, content +} + +// DetailCacheData 详情页缓存数据 +type DetailCacheData struct { + Links []model.Link + Content string + Timestamp time.Time +} + +// fetchDetailPage 获取详情页信息 +func (p *DjgouPlugin) fetchDetailPage(client *http.Client, detailURL string) ([]model.Link, string) { + // 创建带超时的上下文 + ctx, cancel := context.WithTimeout(context.Background(), DetailTimeout) + defer cancel() + + // 创建请求 + req, err := http.NewRequestWithContext(ctx, "GET", detailURL, nil) + if err != nil { + return nil, "" + } + + // 设置请求头 + req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36") + req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8") + req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") + req.Header.Set("Referer", SiteURL) + + // 发送请求 + resp, err := client.Do(req) + if err != nil { + return nil, "" + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return nil, "" + } + + // 解析页面 + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return nil, "" + } + + // 查找主内容区域(用于提取简介) + mainContent := doc.Find("div.erx-wrap") + if mainContent.Length() == 0 { + return nil, "" + } + + // 提取网盘链接(从整个页面HTML中提取,不仅仅是mainContent) + links := p.extractLinksFromDoc(doc) + + // 提取简介(从mainContent提取) + content := p.extractContent(mainContent) + + return links, content +} + +// extractLinksFromDoc 从整个文档中提取夸克网盘链接(重要:从整个页面HTML中提取,不限于某个div) +func (p *DjgouPlugin) extractLinksFromDoc(doc *goquery.Document) []model.Link { + var links []model.Link + linkMap := make(map[string]bool) // 去重 + + // 获取整个页面的HTML内容(这是关键!) + htmlContent, _ := doc.Html() + + // 提取提取码 + password := "" + if match := pwdRegex.FindStringSubmatch(htmlContent); len(match) > 1 { + password = match[1] + } + + // 方法1:使用专用正则表达式提取夸克网盘链接 + quarkLinks := quarkLinkRegex.FindAllString(htmlContent, -1) + for _, quarkURL := range quarkLinks { + // 去重 + if !linkMap[quarkURL] { + linkMap[quarkURL] = true + links = append(links, model.Link{ + Type: "quark", + URL: quarkURL, + Password: password, + }) + } + } + + // 方法2:从所有标签中查找夸克链接(作为补充) + doc.Find("a").Each(func(i int, s *goquery.Selection) { + href, exists := s.Attr("href") + if !exists || href == "" { + return + } + + // 检查是否是夸克网盘链接 + if strings.Contains(href, "pan.quark.cn") { + // 去重 + if !linkMap[href] { + linkMap[href] = true + links = append(links, model.Link{ + Type: "quark", + URL: href, + Password: password, + }) + } + } + }) + + return links +} + +// extractContent 提取简介 +func (p *DjgouPlugin) extractContent(mainContent *goquery.Selection) string { + content := strings.TrimSpace(mainContent.Text()) + + // 清理空白字符 + content = regexp.MustCompile(`\s+`).ReplaceAllString(content, " ") + + // 限制长度 + if len(content) > 300 { + content = content[:300] + "..." + } + + return content +} + +// doRequestWithRetry 带重试机制的HTTP请求 +func (p *DjgouPlugin) doRequestWithRetry(req *http.Request, client *http.Client) (*http.Response, error) { + maxRetries := 3 + var lastErr error + + for i := 0; i < maxRetries; i++ { + if i > 0 { + // 指数退避重试 + backoff := time.Duration(1< + + + Category + Name + Comments + Link + Size + Date + Seeders + Leechers + Downloads + + + + + + +``` + +### 单个搜索结果行结构 + +每个搜索结果是一个 `` 元素,包含以下字段: + +```html + + + + + Anime - Non-English-translated + + + + + + + [GM-Team][国漫][神墓 第3季][Tomb of Fallen Gods Ⅲ][2025][09][GB][4K HEVC 10Bit] + + + + + + + + + + + + + 1.1 GiB + + + 2025-09-27 02:46 + + + 60 + + + 13 + + + 286 + +``` + +## 字段提取规则 + +### 1. 分类信息 +- **选择器**: `td:nth-child(1) a` +- **提取**: `title` 属性 +- **示例**: "Anime - Non-English-translated" + +### 2. 标题和详情链接 +- **选择器**: `td[colspan="2"] a` +- **标题**: `text()` 或 `title` 属性 +- **详情链接**: `href` 属性 (如 `/view/2024388`) +- **唯一ID**: 从href提取数字部分 + +### 3. 下载链接 +- **种子文件**: `td.text-center a[href^="/download/"]` + - 格式: `/download/{ID}.torrent` + - 完整URL: `https://nyaa.si/download/{ID}.torrent` + +- **磁力链接**: `td.text-center a[href^="magnet:"]` + - 格式: `magnet:?xt=urn:btih:{HASH}&dn={文件名}&tr={tracker列表}` + - 提取: 直接获取 `href` 属性 + +### 4. 文件大小 +- **选择器**: `td.text-center` (第4个td) +- **格式**: "1.1 GiB", "500.0 MiB", "3.2 TiB" +- **提取**: 直接文本内容 + +### 5. 发布时间 +- **选择器**: `td.text-center[data-timestamp]` +- **时间戳**: `data-timestamp` 属性 (Unix timestamp) +- **显示时间**: 文本内容 "2025-09-27 02:46" + +### 6. 种子统计信息 +- **做种数 (Seeders)**: 第6个 `td.text-center` +- **下载数 (Leechers)**: 第7个 `td.text-center` +- **完成数 (Downloads)**: 第8个 `td.text-center` + +## 搜索结果类型标识 + +通过 `` 的 class 属性区分资源质量: + +| Class | 含义 | 说明 | +|-------|------|------| +| `default` | 普通资源 | 灰色背景 | +| `success` | 可信任/已验证资源 | 绿色背景 | +| `danger` | 重制版 | 红色背景 | +| `warning` | 警告/可疑 | 黄色背景 | + +## 磁力链接格式 + +``` +magnet:?xt=urn:btih:{INFO_HASH} +&dn={URL编码的文件名} +&tr={tracker1} +&tr={tracker2} +&tr={tracker3} +... +``` + +### 常见Tracker列表 + +``` +http://nyaa.tracker.wf:7777/announce +udp://open.stealth.si:80/announce +udp://tracker.opentrackr.org:1337/announce +udp://exodus.desync.com:6969/announce +udp://tracker.torrent.eu.org:451/announce +``` + +## 反爬虫策略 + +### 请求头设置 + +```go +req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36...") +req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") +req.Header.Set("Accept-Language", "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7") +req.Header.Set("Referer", "https://nyaa.si/") +``` + +### 访问频率控制 +- 建议请求间隔:100-200ms +- 超时时间:10秒 +- 重试次数:3次 + +## 插件设计 + +### 基本信息 +- **插件名称**: nyaa +- **优先级**: 3 (普通质量数据源) +- **Service层过滤**: 跳过 (磁力搜索插件,标题格式特殊) +- **缓存TTL**: 30分钟 + +### 搜索流程 + +``` +1. 构建搜索URL + ↓ +2. 发送HTTP请求(带重试) + ↓ +3. 解析HTML页面 (goquery) + ↓ +4. 查找表格 table.torrent-list + ↓ +5. 遍历 tbody > tr 提取信息 + ↓ +6. 提取磁力链接 + ↓ +7. 关键词过滤(插件层) + ↓ +8. 返回结果 +``` + +### 数据转换 + +#### SearchResult 字段映射 + +| Nyaa字段 | SearchResult字段 | 说明 | +|---------|-----------------|------| +| 标题 | Title | 资源标题 | +| 分类+大小+统计 | Content | 拼接描述信息 | +| 磁力链接 | Links[0].URL | magnet链接 | +| 发布时间 | Datetime | Unix timestamp转换 | +| 分类 | Tags[0] | 资源分类 | +| 做种/下载/完成 | Tags[1-3] | 统计信息 | +| 唯一ID | UniqueID | nyaa-{ID} | +| 频道 | Channel | 空字符串 | + +#### Link 字段设置 + +```go +Link{ + Type: "magnet", // 固定为magnet + URL: magnetURL, // 完整的磁力链接 + Password: "", // 磁力链接无密码 +} +``` + +## 性能优化 + +### 1. HTTP连接池 +```go +MaxIdleConns: 50 +MaxIdleConnsPerHost: 20 +MaxConnsPerHost: 30 +IdleConnTimeout: 90 * time.Second +``` + +### 2. 超时控制 +- 搜索请求超时:10秒 +- 重试间隔:指数退避(200ms, 400ms, 800ms) + +### 3. 缓存策略 +- 搜索结果缓存:30分钟 +- 定期清理:每小时清理一次过期缓存 + +## 使用示例 + +### API请求 +```bash +curl "http://localhost:8888/api/search?kw=神墓&plugins=nyaa" +``` + +### 预期响应 +```json +{ + "code": 0, + "message": "success", + "data": { + "results": [ + { + "unique_id": "nyaa-2024388", + "title": "[GM-Team][国漫][神墓 第3季][Tomb of Fallen Gods Ⅲ][2025][09][GB][4K HEVC 10Bit]", + "content": "分类: Anime - Non-English-translated | 大小: 1.1 GiB | 做种: 60 | 下载: 13 | 完成: 286", + "datetime": "2025-09-27T02:46:00Z", + "links": [ + { + "type": "magnet", + "url": "magnet:?xt=urn:btih:e47fcca0f3f1e24b1cc871a07881350faca92636&dn=...", + "password": "" + } + ], + "tags": ["Anime - Non-English-translated", "做种:60", "下载:13", "完成:286"], + "channel": "" + } + ] + } +} +``` + +## 注意事项 + +### 优点 +- ✅ **专业的ACG资源站**: 动漫资源质量高 +- ✅ **磁力链接直接可用**: 无需下载种子文件 +- ✅ **完整的统计信息**: 做种数、下载数、完成数 +- ✅ **分类清晰**: 多种分类便于筛选 +- ✅ **更新及时**: 最新动漫资源快速更新 + +### 注意事项 +- ⚠️ **仅提供磁力链接**: 不是网盘资源 +- ⚠️ **标题格式特殊**: 使用方括号、点号等特殊格式 +- ⚠️ **需要跳过Service层过滤**: 避免误删有效结果 +- ⚠️ **英文为主**: 部分资源标题为英文 +- ⚠️ **BT下载**: 需要BT客户端支持 + +## 维护建议 + +1. **定期检查网站结构**: 网站可能更新HTML结构 +2. **监控成功率**: 检查请求成功率和解析准确率 +3. **优化关键词匹配**: 针对特殊标题格式优化过滤逻辑 +4. **tracker更新**: 定期更新tracker列表以提高连接成功率 diff --git a/plugin/nyaa/nyaa.go b/plugin/nyaa/nyaa.go new file mode 100644 index 0000000..0b05dd2 --- /dev/null +++ b/plugin/nyaa/nyaa.go @@ -0,0 +1,308 @@ +package nyaa + +import ( + "context" + "fmt" + "net/http" + "net/url" + "pansou/model" + "pansou/plugin" + "regexp" + "strconv" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" +) + +// 预编译的正则表达式 +var ( + // 从详情链接提取ID的正则表达式 + viewIDRegex = regexp.MustCompile(`/view/(\d+)`) + + // 磁力链接正则表达式 + magnetRegex = regexp.MustCompile(`magnet:\?xt=urn:btih:[a-zA-Z0-9]+[^\s'"<>]*`) +) + +const ( + // 超时时间 + DefaultTimeout = 10 * time.Second + + // HTTP连接池配置 + MaxIdleConns = 50 + MaxIdleConnsPerHost = 20 + MaxConnsPerHost = 30 + IdleConnTimeout = 90 * time.Second + + // 网站URL + SiteURL = "https://nyaa.si" +) + +// 在init函数中注册插件 +func init() { + plugin.RegisterGlobalPlugin(NewNyaaPlugin()) +} + +// NyaaPlugin Nyaa BT搜索插件 +type NyaaPlugin struct { + *plugin.BaseAsyncPlugin + optimizedClient *http.Client +} + +// createOptimizedHTTPClient 创建优化的HTTP客户端 +func createOptimizedHTTPClient() *http.Client { + transport := &http.Transport{ + MaxIdleConns: MaxIdleConns, + MaxIdleConnsPerHost: MaxIdleConnsPerHost, + MaxConnsPerHost: MaxConnsPerHost, + IdleConnTimeout: IdleConnTimeout, + DisableKeepAlives: false, + } + return &http.Client{Transport: transport, Timeout: DefaultTimeout} +} + +// NewNyaaPlugin 创建新的Nyaa插件 +func NewNyaaPlugin() *NyaaPlugin { + return &NyaaPlugin{ + // 优先级3:普通质量数据源,跳过Service层过滤(磁力搜索插件) + BaseAsyncPlugin: plugin.NewBaseAsyncPluginWithFilter("nyaa", 3, true), + optimizedClient: createOptimizedHTTPClient(), + } +} + +// Search 执行搜索并返回结果(兼容性方法) +func (p *NyaaPlugin) Search(keyword string, ext map[string]interface{}) ([]model.SearchResult, error) { + result, err := p.SearchWithResult(keyword, ext) + if err != nil { + return nil, err + } + return result.Results, nil +} + +// SearchWithResult 执行搜索并返回包含IsFinal标记的结果 +func (p *NyaaPlugin) SearchWithResult(keyword string, ext map[string]interface{}) (model.PluginSearchResult, error) { + return p.AsyncSearchWithResult(keyword, p.searchImpl, p.MainCacheKey, ext) +} + +// searchImpl 实现具体的搜索逻辑 +func (p *NyaaPlugin) searchImpl(client *http.Client, keyword string, ext map[string]interface{}) ([]model.SearchResult, error) { + // 支持英文搜索优化 + searchKeyword := keyword + if ext != nil { + if titleEn, exists := ext["title_en"]; exists { + if titleEnStr, ok := titleEn.(string); ok && titleEnStr != "" { + searchKeyword = titleEnStr + } + } + } + + // 1. 构建搜索URL + searchURL := fmt.Sprintf("%s/?f=0&c=0_0&q=%s", SiteURL, url.QueryEscape(searchKeyword)) + + // 2. 创建带超时的上下文 + ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout) + defer cancel() + + // 3. 创建请求 + req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil) + if err != nil { + return nil, fmt.Errorf("[%s] 创建请求失败: %w", p.Name(), err) + } + + // 4. 设置完整的请求头(避免反爬虫) + req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") + req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") + req.Header.Set("Accept-Language", "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7") + req.Header.Set("Connection", "keep-alive") + req.Header.Set("Referer", SiteURL) + + // 5. 发送请求(带重试机制) + resp, err := p.doRequestWithRetry(req, client) + if err != nil { + return nil, fmt.Errorf("[%s] 搜索请求失败: %w", p.Name(), err) + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return nil, fmt.Errorf("[%s] 搜索请求返回状态码: %d", p.Name(), resp.StatusCode) + } + + // 6. 解析搜索结果页面 + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return nil, fmt.Errorf("[%s] 解析搜索页面失败: %w", p.Name(), err) + } + + // 7. 提取搜索结果 + var results []model.SearchResult + + // 查找种子列表表格 + table := doc.Find("table.torrent-list tbody") + if table.Length() == 0 { + return []model.SearchResult{}, nil // 没有搜索结果 + } + + // 8. 解析每个搜索结果行 + table.Find("tr").Each(func(i int, s *goquery.Selection) { + result := p.parseSearchRow(s) + if result.UniqueID != "" { + results = append(results, result) + } + }) + + // 9. 关键词过滤(插件层过滤,使用实际搜索的关键词) + return plugin.FilterResultsByKeyword(results, searchKeyword), nil +} + +// parseSearchRow 解析单个搜索结果行 +func (p *NyaaPlugin) parseSearchRow(s *goquery.Selection) model.SearchResult { + result := model.SearchResult{} + + // 1. 提取分类信息 + categoryLink := s.Find("td:nth-child(1) a") + category := "" + if categoryLink.Length() > 0 { + category, _ = categoryLink.Attr("title") + } + + // 2. 提取标题和详情链接 + titleLink := s.Find("td[colspan='2'] a") + if titleLink.Length() == 0 { + return result + } + + title := strings.TrimSpace(titleLink.Text()) + if title == "" { + // 如果text为空,尝试从title属性获取 + title, _ = titleLink.Attr("title") + } + + detailHref, exists := titleLink.Attr("href") + if !exists || detailHref == "" { + return result + } + + // 3. 从详情链接提取ID + matches := viewIDRegex.FindStringSubmatch(detailHref) + if len(matches) < 2 { + return result + } + itemID := matches[1] + result.UniqueID = fmt.Sprintf("%s-%s", p.Name(), itemID) + result.Title = title + + // 4. 提取磁力链接 + magnetLink := s.Find("td.text-center a[href^='magnet:']") + if magnetLink.Length() > 0 { + magnetURL, exists := magnetLink.Attr("href") + if exists && magnetURL != "" { + result.Links = []model.Link{ + { + Type: "magnet", + URL: magnetURL, + Password: "", + }, + } + } + } + + // 如果没有找到磁力链接,返回空结果 + if len(result.Links) == 0 { + result.UniqueID = "" + return result + } + + // 5. 提取文件大小 + sizeTd := s.Find("td.text-center").Eq(1) // 第4个td,索引从1开始(跳过链接td) + size := strings.TrimSpace(sizeTd.Text()) + + // 6. 提取发布时间 + dateTd := s.Find("td.text-center[data-timestamp]") + timestamp := int64(0) + if dateTd.Length() > 0 { + if timestampStr, exists := dateTd.Attr("data-timestamp"); exists { + if ts, err := strconv.ParseInt(timestampStr, 10, 64); err == nil { + timestamp = ts + } + } + } + + if timestamp > 0 { + result.Datetime = time.Unix(timestamp, 0) + } else { + result.Datetime = time.Now() + } + + // 7. 提取种子统计信息 + tds := s.Find("td.text-center") + seeders := "0" + leechers := "0" + downloads := "0" + + if tds.Length() >= 6 { + // 倒数第3个是做种数 + seeders = strings.TrimSpace(tds.Eq(tds.Length() - 3).Text()) + // 倒数第2个是下载数 + leechers = strings.TrimSpace(tds.Eq(tds.Length() - 2).Text()) + // 倒数第1个是完成数 + downloads = strings.TrimSpace(tds.Eq(tds.Length() - 1).Text()) + } + + // 8. 构建内容描述 + var contentParts []string + if category != "" { + contentParts = append(contentParts, fmt.Sprintf("分类: %s", category)) + } + if size != "" { + contentParts = append(contentParts, fmt.Sprintf("大小: %s", size)) + } + contentParts = append(contentParts, fmt.Sprintf("做种: %s", seeders)) + contentParts = append(contentParts, fmt.Sprintf("下载: %s", leechers)) + contentParts = append(contentParts, fmt.Sprintf("完成: %s", downloads)) + + result.Content = strings.Join(contentParts, " | ") + + // 9. 设置标签 + var tags []string + if category != "" { + tags = append(tags, category) + } + tags = append(tags, fmt.Sprintf("做种:%s", seeders)) + tags = append(tags, fmt.Sprintf("下载:%s", leechers)) + tags = append(tags, fmt.Sprintf("完成:%s", downloads)) + result.Tags = tags + + // 10. Channel必须为空字符串(插件搜索结果) + result.Channel = "" + + return result +} + +// doRequestWithRetry 带重试机制的HTTP请求 +func (p *NyaaPlugin) doRequestWithRetry(req *http.Request, client *http.Client) (*http.Response, error) { + maxRetries := 3 + var lastErr error + + for i := 0; i < maxRetries; i++ { + if i > 0 { + // 指数退避重试 + backoff := time.Duration(1<