mirror of
https://github.com/fish2018/pansou.git
synced 2025-11-25 03:14:59 +08:00
785 lines
21 KiB
Go
785 lines
21 KiB
Go
package pan666
|
||
|
||
import (
|
||
"encoding/json"
|
||
"fmt"
|
||
"io"
|
||
"net/http"
|
||
"net/url"
|
||
"strings"
|
||
"time"
|
||
|
||
"pansou/model"
|
||
"pansou/plugin"
|
||
"sync"
|
||
"math/rand"
|
||
"sort"
|
||
)
|
||
|
||
// 在init函数中注册插件
|
||
func init() {
|
||
plugin.RegisterGlobalPlugin(NewPan666Plugin())
|
||
}
|
||
|
||
const (
|
||
// API基础URL
|
||
BaseURL = "https://pan666.net/api/discussions"
|
||
|
||
// 默认参数
|
||
DefaultTimeout = 6 * time.Second
|
||
PageSize = 50 // 恢复为50,符合API实际返回数量
|
||
MaxRetries = 2
|
||
)
|
||
|
||
// 常用UA列表
|
||
var userAgents = []string{
|
||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36",
|
||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15",
|
||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0",
|
||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36",
|
||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36",
|
||
}
|
||
|
||
// Pan666Plugin pan666网盘搜索插件
|
||
type Pan666Plugin struct {
|
||
client *http.Client
|
||
timeout time.Duration
|
||
retries int
|
||
}
|
||
|
||
// NewPan666Plugin 创建新的pan666插件
|
||
func NewPan666Plugin() *Pan666Plugin {
|
||
timeout := DefaultTimeout
|
||
|
||
return &Pan666Plugin{
|
||
client: &http.Client{
|
||
Timeout: timeout,
|
||
},
|
||
timeout: timeout,
|
||
retries: MaxRetries,
|
||
}
|
||
}
|
||
|
||
// Name 返回插件名称
|
||
func (p *Pan666Plugin) Name() string {
|
||
return "pan666"
|
||
}
|
||
|
||
// Priority 返回插件优先级
|
||
func (p *Pan666Plugin) Priority() int {
|
||
return 3 // 中等优先级
|
||
}
|
||
|
||
// 生成随机IP
|
||
func generateRandomIP() string {
|
||
return fmt.Sprintf("%d.%d.%d.%d",
|
||
rand.Intn(223)+1, // 避免0和255
|
||
rand.Intn(255),
|
||
rand.Intn(255),
|
||
rand.Intn(254)+1) // 避免0
|
||
}
|
||
|
||
// 获取随机UA
|
||
func getRandomUA() string {
|
||
return userAgents[rand.Intn(len(userAgents))]
|
||
}
|
||
|
||
// Search 执行搜索并返回结果
|
||
func (p *Pan666Plugin) Search(keyword string) ([]model.SearchResult, error) {
|
||
|
||
// 初始化随机数种子
|
||
rand.Seed(time.Now().UnixNano())
|
||
|
||
// 只并发请求2个页面(0-1页)
|
||
allResults, _, err := p.fetchBatch(keyword, 0, 2)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
// 去重
|
||
uniqueResults := p.deduplicateResults(allResults)
|
||
|
||
return uniqueResults, nil
|
||
}
|
||
|
||
// fetchBatch 获取一批页面的数据
|
||
func (p *Pan666Plugin) fetchBatch(keyword string, startOffset, pageCount int) ([]model.SearchResult, bool, error) {
|
||
var wg sync.WaitGroup
|
||
resultChan := make(chan struct{
|
||
offset int
|
||
results []model.SearchResult
|
||
hasMore bool
|
||
err error
|
||
}, pageCount)
|
||
|
||
// 并发请求多个页面,但每个请求之间添加随机延迟
|
||
for i := 0; i < pageCount; i++ {
|
||
offset := (startOffset + i) * PageSize
|
||
wg.Add(1)
|
||
|
||
go func(offset int, index int) {
|
||
defer wg.Done()
|
||
|
||
// 第一个请求立即执行,后续请求添加随机延迟
|
||
if index > 0 {
|
||
// 随机等待0-1秒
|
||
randomDelay := time.Duration(100 + rand.Intn(900)) * time.Millisecond
|
||
time.Sleep(randomDelay)
|
||
}
|
||
|
||
// 请求特定页面
|
||
results, hasMore, err := p.fetchPage(keyword, offset)
|
||
|
||
resultChan <- struct{
|
||
offset int
|
||
results []model.SearchResult
|
||
hasMore bool
|
||
err error
|
||
}{
|
||
offset: offset,
|
||
results: results,
|
||
hasMore: hasMore,
|
||
err: err,
|
||
}
|
||
}(offset, i)
|
||
}
|
||
|
||
// 等待所有请求完成
|
||
go func() {
|
||
wg.Wait()
|
||
close(resultChan)
|
||
}()
|
||
|
||
// 收集结果
|
||
var allResults []model.SearchResult
|
||
resultsByOffset := make(map[int][]model.SearchResult)
|
||
errorsByOffset := make(map[int]error)
|
||
hasMoreByOffset := make(map[int]bool)
|
||
|
||
// 处理返回的结果
|
||
for res := range resultChan {
|
||
if res.err != nil {
|
||
errorsByOffset[res.offset] = res.err
|
||
continue
|
||
}
|
||
|
||
resultsByOffset[res.offset] = res.results
|
||
hasMoreByOffset[res.offset] = res.hasMore
|
||
}
|
||
|
||
// 按偏移量顺序整理结果
|
||
emptyPageCount := 0
|
||
for i := 0; i < pageCount; i++ {
|
||
offset := (startOffset + i) * PageSize
|
||
results, ok := resultsByOffset[offset]
|
||
|
||
if !ok {
|
||
// 这个偏移量的请求失败了
|
||
continue
|
||
}
|
||
|
||
if len(results) == 0 {
|
||
emptyPageCount++
|
||
// 如果连续两页没有结果,可能已经到达末尾,可以提前终止
|
||
if emptyPageCount >= 2 {
|
||
break
|
||
}
|
||
} else {
|
||
emptyPageCount = 0 // 重置空页计数
|
||
allResults = append(allResults, results...)
|
||
}
|
||
}
|
||
|
||
// 检查是否所有请求都失败
|
||
if len(errorsByOffset) == pageCount {
|
||
for _, err := range errorsByOffset {
|
||
return nil, false, fmt.Errorf("所有请求都失败: %w", err)
|
||
}
|
||
}
|
||
|
||
// 检查是否需要继续请求
|
||
needMoreRequests := false
|
||
for _, hasMore := range hasMoreByOffset {
|
||
if hasMore {
|
||
needMoreRequests = true
|
||
break
|
||
}
|
||
}
|
||
|
||
return allResults, needMoreRequests, nil
|
||
}
|
||
|
||
// deduplicateResults 去除重复的搜索结果
|
||
func (p *Pan666Plugin) deduplicateResults(results []model.SearchResult) []model.SearchResult {
|
||
seen := make(map[string]bool)
|
||
var uniqueResults []model.SearchResult
|
||
|
||
for _, result := range results {
|
||
if !seen[result.UniqueID] {
|
||
seen[result.UniqueID] = true
|
||
uniqueResults = append(uniqueResults, result)
|
||
}
|
||
}
|
||
|
||
return uniqueResults
|
||
}
|
||
|
||
// fetchPage 获取指定偏移量的页面数据
|
||
func (p *Pan666Plugin) fetchPage(keyword string, offset int) ([]model.SearchResult, bool, error) {
|
||
// 构建请求URL,包含查询参数
|
||
reqURL := fmt.Sprintf("%s?filter%%5Bq%%5D=%s&page%%5Blimit%%5D=%d",
|
||
BaseURL, url.QueryEscape(keyword), PageSize)
|
||
|
||
// 添加偏移量参数
|
||
if offset > 0 {
|
||
reqURL += fmt.Sprintf("&page%%5Boffset%%5D=%d", offset)
|
||
}
|
||
|
||
// 添加包含mostRelevantPost参数
|
||
reqURL += "&include=mostRelevantPost"
|
||
|
||
// 发送请求
|
||
req, err := http.NewRequest("GET", reqURL, nil)
|
||
if err != nil {
|
||
return nil, false, fmt.Errorf("创建请求失败: %w", err)
|
||
}
|
||
|
||
// 使用随机UA和IP
|
||
randomUA := getRandomUA()
|
||
randomIP := generateRandomIP()
|
||
|
||
req.Header.Set("User-Agent", randomUA)
|
||
req.Header.Set("Referer", "https://pan666.net/")
|
||
req.Header.Set("X-Forwarded-For", randomIP)
|
||
req.Header.Set("X-Real-IP", randomIP)
|
||
|
||
// 添加一些常见请求头,使请求更真实
|
||
req.Header.Set("Accept", "application/json, text/plain, */*")
|
||
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
|
||
req.Header.Set("Connection", "keep-alive")
|
||
|
||
// 发送请求
|
||
resp, err := p.client.Do(req)
|
||
if err != nil {
|
||
return nil, false, fmt.Errorf("请求失败: %w", err)
|
||
}
|
||
defer resp.Body.Close()
|
||
|
||
// 读取响应体
|
||
respBody, err := io.ReadAll(resp.Body)
|
||
if err != nil {
|
||
return nil, false, fmt.Errorf("读取响应失败: %w", err)
|
||
}
|
||
|
||
// 解析响应
|
||
var apiResp Pan666Response
|
||
if err := json.Unmarshal(respBody, &apiResp); err != nil {
|
||
return nil, false, fmt.Errorf("解析响应失败: %w", err)
|
||
}
|
||
|
||
// 如果没有数据,返回空结果
|
||
if len(apiResp.Data) == 0 {
|
||
return []model.SearchResult{}, false, nil
|
||
}
|
||
|
||
// 判断是否有更多页面
|
||
hasMore := len(apiResp.Data) >= PageSize && apiResp.Links.Next != ""
|
||
|
||
// 构建ID到included post的映射
|
||
postMap := make(map[string]Pan666Post)
|
||
for _, post := range apiResp.Included {
|
||
if post.Type == "posts" {
|
||
postMap[post.ID] = post
|
||
}
|
||
}
|
||
|
||
// 处理搜索结果
|
||
results := make([]model.SearchResult, 0, len(apiResp.Data))
|
||
|
||
for _, item := range apiResp.Data {
|
||
// 获取关联的post内容
|
||
postID := item.Relationships.MostRelevantPost.Data.ID
|
||
post, exists := postMap[postID]
|
||
|
||
if !exists {
|
||
continue // 跳过没有关联内容的结果
|
||
}
|
||
|
||
// 解析时间
|
||
createdAt, _ := time.Parse(time.RFC3339, item.Attributes.CreatedAt)
|
||
|
||
// 先清理HTML,保留纯文本内容
|
||
cleanContent := cleanHTML(post.Attributes.ContentHTML)
|
||
|
||
// 提取网盘链接
|
||
links := extractLinksFromText(cleanContent)
|
||
|
||
// 只有当links数组不为空时,才添加结果
|
||
if len(links) > 0 {
|
||
// 创建搜索结果
|
||
result := model.SearchResult{
|
||
MessageID: item.ID,
|
||
UniqueID: fmt.Sprintf("pan666_%s", item.ID),
|
||
Channel: "", // 设置为空字符串,因为不是TG频道
|
||
Datetime: createdAt,
|
||
Title: item.Attributes.Title,
|
||
Content: cleanContent,
|
||
Links: links,
|
||
}
|
||
|
||
results = append(results, result)
|
||
}
|
||
}
|
||
|
||
return results, hasMore, nil
|
||
}
|
||
|
||
// extractLinks 从HTML内容中提取网盘链接
|
||
func extractLinks(content string) []model.Link {
|
||
links := make([]model.Link, 0)
|
||
|
||
// 定义网盘类型及其对应的链接关键词
|
||
categories := map[string][]string{
|
||
"magnet": {"magnet"}, // 磁力链接
|
||
"ed2k": {"ed2k"}, // 电驴链接
|
||
"uc": {"drive.uc.cn"}, // UC网盘
|
||
"mobile": {"caiyun.139.com"}, // 移动云盘
|
||
"tianyi": {"cloud.189.cn"}, // 天翼云盘
|
||
"quark": {"pan.quark.cn"}, // 夸克网盘
|
||
"115": {"115cdn.com", "115.com", "anxia.com"}, // 115网盘
|
||
"aliyun": {"alipan.com", "aliyundrive.com"}, // 阿里云盘
|
||
"pikpak": {"mypikpak.com"}, // PikPak网盘
|
||
"baidu": {"pan.baidu.com"}, // 百度网盘
|
||
"123": {"123684.com", "123685.com", "123912.com", "123pan.com", "123pan.cn", "123592.com"}, // 123网盘
|
||
"lanzou": {"lanzou", "lanzoux"}, // 蓝奏云
|
||
"xunlei": {"pan.xunlei.com"}, // 迅雷网盘
|
||
"weiyun": {"weiyun.com"}, // 微云
|
||
"jianguoyun": {"jianguoyun.com"}, // 坚果云
|
||
}
|
||
|
||
// 遍历所有分类,提取对应的链接
|
||
for category, patterns := range categories {
|
||
for _, pattern := range patterns {
|
||
categoryLinks := extractLinksByPattern(content, pattern, "", category)
|
||
links = append(links, categoryLinks...)
|
||
}
|
||
}
|
||
|
||
return links
|
||
}
|
||
|
||
// extractLinksByPattern 根据特定模式提取链接
|
||
func extractLinksByPattern(content, pattern, altPattern, linkType string) []model.Link {
|
||
links := make([]model.Link, 0)
|
||
|
||
// 查找所有包含pattern的行
|
||
lines := strings.Split(content, "\n")
|
||
for _, line := range lines {
|
||
// 提取主要pattern的链接
|
||
if idx := strings.Index(line, pattern); idx != -1 {
|
||
link := extractLinkFromLine(line[idx:], pattern)
|
||
if link.URL != "" {
|
||
link.Type = linkType
|
||
links = append(links, link)
|
||
}
|
||
}
|
||
|
||
// 如果有替代pattern,也提取
|
||
if altPattern != "" {
|
||
if idx := strings.Index(line, altPattern); idx != -1 {
|
||
link := extractLinkFromLine(line[idx:], altPattern)
|
||
if link.URL != "" {
|
||
link.Type = linkType
|
||
links = append(links, link)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return links
|
||
}
|
||
|
||
// extractLinkFromLine 从行中提取链接和密码
|
||
func extractLinkFromLine(line, prefix string) model.Link {
|
||
link := model.Link{}
|
||
|
||
// 提取URL
|
||
endIdx := strings.Index(line, "\"")
|
||
if endIdx == -1 {
|
||
endIdx = strings.Index(line, "'")
|
||
}
|
||
if endIdx == -1 {
|
||
endIdx = strings.Index(line, " ")
|
||
}
|
||
if endIdx == -1 {
|
||
endIdx = strings.Index(line, "<")
|
||
}
|
||
if endIdx == -1 {
|
||
endIdx = len(line)
|
||
}
|
||
|
||
url := line[:endIdx]
|
||
link.URL = url
|
||
|
||
// 查找密码
|
||
pwdKeywords := []string{"提取码", "密码", "提取密码", "pwd", "password", "提取"}
|
||
for _, keyword := range pwdKeywords {
|
||
if pwdIdx := strings.Index(strings.ToLower(line), strings.ToLower(keyword)); pwdIdx != -1 {
|
||
// 密码通常在关键词后面
|
||
restOfLine := line[pwdIdx+len(keyword):]
|
||
|
||
// 跳过可能的分隔符
|
||
restOfLine = strings.TrimLeft(restOfLine, " ::=")
|
||
|
||
// 提取密码(通常是4个字符)
|
||
if len(restOfLine) >= 4 {
|
||
// 获取前4个字符作为密码
|
||
password := strings.TrimSpace(restOfLine[:4])
|
||
// 确保密码不包含HTML标签或其他非法字符
|
||
if !strings.ContainsAny(password, "<>\"'") {
|
||
link.Password = password
|
||
break
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return link
|
||
}
|
||
|
||
// cleanHTML 清理HTML标签,保留纯文本内容
|
||
func cleanHTML(html string) string {
|
||
// 移除HTML标签
|
||
text := html
|
||
|
||
// 移除<script>标签及其内容
|
||
for {
|
||
startIdx := strings.Index(text, "<script")
|
||
if startIdx == -1 {
|
||
break
|
||
}
|
||
|
||
endIdx := strings.Index(text[startIdx:], "</script>")
|
||
if endIdx == -1 {
|
||
break
|
||
}
|
||
|
||
text = text[:startIdx] + text[startIdx+endIdx+9:]
|
||
}
|
||
|
||
// 移除<style>标签及其内容
|
||
for {
|
||
startIdx := strings.Index(text, "<style")
|
||
if startIdx == -1 {
|
||
break
|
||
}
|
||
|
||
endIdx := strings.Index(text[startIdx:], "</style>")
|
||
if endIdx == -1 {
|
||
break
|
||
}
|
||
|
||
text = text[:startIdx] + text[startIdx+endIdx+8:]
|
||
}
|
||
|
||
// 移除其他HTML标签
|
||
for {
|
||
startIdx := strings.Index(text, "<")
|
||
if startIdx == -1 {
|
||
break
|
||
}
|
||
|
||
endIdx := strings.Index(text[startIdx:], ">")
|
||
if endIdx == -1 {
|
||
break
|
||
}
|
||
|
||
text = text[:startIdx] + " " + text[startIdx+endIdx+1:]
|
||
}
|
||
|
||
// 替换HTML实体
|
||
text = strings.ReplaceAll(text, " ", " ")
|
||
text = strings.ReplaceAll(text, "<", "<")
|
||
text = strings.ReplaceAll(text, ">", ">")
|
||
text = strings.ReplaceAll(text, "&", "&")
|
||
text = strings.ReplaceAll(text, """, "\"")
|
||
|
||
// 移除多余空白
|
||
text = strings.Join(strings.Fields(text), " ")
|
||
|
||
return text
|
||
}
|
||
|
||
// min 返回两个整数中的较小值
|
||
func min(a, b int) int {
|
||
if a < b {
|
||
return a
|
||
}
|
||
return b
|
||
}
|
||
|
||
// Pan666Response API响应结构
|
||
type Pan666Response struct {
|
||
Links struct {
|
||
First string `json:"first"`
|
||
Next string `json:"next,omitempty"`
|
||
} `json:"links"`
|
||
Data []Pan666Discussion `json:"data"`
|
||
Included []Pan666Post `json:"included"`
|
||
}
|
||
|
||
// Pan666Discussion 讨论数据结构
|
||
type Pan666Discussion struct {
|
||
Type string `json:"type"`
|
||
ID string `json:"id"`
|
||
Attributes struct {
|
||
Title string `json:"title"`
|
||
Slug string `json:"slug"`
|
||
CommentCount int `json:"commentCount"`
|
||
CreatedAt string `json:"createdAt"`
|
||
LastPostedAt string `json:"lastPostedAt"`
|
||
LastPostNumber int `json:"lastPostNumber"`
|
||
IsApproved bool `json:"isApproved"`
|
||
} `json:"attributes"`
|
||
Relationships struct {
|
||
MostRelevantPost struct {
|
||
Data struct {
|
||
Type string `json:"type"`
|
||
ID string `json:"id"`
|
||
} `json:"data"`
|
||
} `json:"mostRelevantPost"`
|
||
} `json:"relationships"`
|
||
}
|
||
|
||
// Pan666Post 帖子内容结构
|
||
type Pan666Post struct {
|
||
Type string `json:"type"`
|
||
ID string `json:"id"`
|
||
Attributes struct {
|
||
Number int `json:"number"`
|
||
CreatedAt string `json:"createdAt"`
|
||
ContentType string `json:"contentType"`
|
||
ContentHTML string `json:"contentHtml"`
|
||
} `json:"attributes"`
|
||
}
|
||
|
||
// extractLinksFromText 从清理后的文本中提取网盘链接
|
||
func extractLinksFromText(content string) []model.Link {
|
||
// 定义网盘类型及其对应的链接关键词
|
||
categories := map[string][]string{
|
||
"magnet": {"magnet"}, // 磁力链接
|
||
"ed2k": {"ed2k"}, // 电驴链接
|
||
"uc": {"drive.uc.cn"}, // UC网盘
|
||
"mobile": {"caiyun.139.com"}, // 移动云盘
|
||
"tianyi": {"cloud.189.cn"}, // 天翼云盘
|
||
"quark": {"pan.quark.cn"}, // 夸克网盘
|
||
"115": {"115cdn.com", "115.com", "anxia.com"}, // 115网盘
|
||
"aliyun": {"alipan.com", "aliyundrive.com"}, // 阿里云盘
|
||
"pikpak": {"mypikpak.com"}, // PikPak网盘
|
||
"baidu": {"pan.baidu.com"}, // 百度网盘
|
||
"123": {"123684.com", "123685.com", "123912.com", "123pan.com", "123pan.cn", "123592.com"}, // 123网盘
|
||
"lanzou": {"lanzou", "lanzoux"}, // 蓝奏云
|
||
"xunlei": {"pan.xunlei.com"}, // 迅雷网盘
|
||
"weiyun": {"weiyun.com"}, // 微云
|
||
"jianguoyun": {"jianguoyun.com"}, // 坚果云
|
||
}
|
||
|
||
// 存储所有找到的链接及其在文本中的位置
|
||
type linkInfo struct {
|
||
link model.Link
|
||
position int
|
||
category string
|
||
}
|
||
var allLinks []linkInfo
|
||
|
||
// 第一步:提取所有链接及其位置
|
||
for category, patterns := range categories {
|
||
for _, pattern := range patterns {
|
||
pos := 0
|
||
for {
|
||
idx := strings.Index(content[pos:], pattern)
|
||
if idx == -1 {
|
||
break
|
||
}
|
||
|
||
// 计算实际位置
|
||
actualPos := pos + idx
|
||
|
||
// 提取URL
|
||
url := extractURLFromText(content[actualPos:])
|
||
if url != "" {
|
||
// 检查URL是否已包含密码参数
|
||
password := extractPasswordFromURL(url)
|
||
|
||
// 创建链接
|
||
link := model.Link{
|
||
Type: category,
|
||
URL: url,
|
||
Password: password,
|
||
}
|
||
|
||
// 存储链接及其位置
|
||
allLinks = append(allLinks, linkInfo{
|
||
link: link,
|
||
position: actualPos,
|
||
category: category,
|
||
})
|
||
}
|
||
|
||
// 移动位置继续查找
|
||
pos = actualPos + len(pattern)
|
||
}
|
||
}
|
||
}
|
||
|
||
// 按位置排序链接
|
||
sort.Slice(allLinks, func(i, j int) bool {
|
||
return allLinks[i].position < allLinks[j].position
|
||
})
|
||
|
||
// 第二步:提取所有密码关键词及其位置
|
||
type passwordInfo struct {
|
||
keyword string
|
||
position int
|
||
password string
|
||
}
|
||
var allPasswords []passwordInfo
|
||
|
||
// 密码关键词
|
||
pwdKeywords := []string{"提取码", "密码", "提取密码", "pwd", "password", "提取码:", "密码:", "提取密码:", "pwd:", "password:", "提取:"}
|
||
|
||
for _, keyword := range pwdKeywords {
|
||
pos := 0
|
||
for {
|
||
idx := strings.Index(strings.ToLower(content[pos:]), strings.ToLower(keyword))
|
||
if idx == -1 {
|
||
break
|
||
}
|
||
|
||
// 计算实际位置
|
||
actualPos := pos + idx
|
||
|
||
// 提取密码
|
||
restContent := content[actualPos+len(keyword):]
|
||
restContent = strings.TrimLeft(restContent, " ::=")
|
||
|
||
var password string
|
||
if len(restContent) >= 4 {
|
||
possiblePwd := strings.TrimSpace(restContent[:4])
|
||
if !strings.ContainsAny(possiblePwd, "<>\"'\t\n\r") {
|
||
password = possiblePwd
|
||
}
|
||
}
|
||
|
||
if password != "" {
|
||
allPasswords = append(allPasswords, passwordInfo{
|
||
keyword: keyword,
|
||
position: actualPos,
|
||
password: password,
|
||
})
|
||
}
|
||
|
||
// 移动位置继续查找
|
||
pos = actualPos + len(keyword)
|
||
}
|
||
}
|
||
|
||
// 按位置排序密码
|
||
sort.Slice(allPasswords, func(i, j int) bool {
|
||
return allPasswords[i].position < allPasswords[j].position
|
||
})
|
||
|
||
// 第三步:为每个密码找到它前面最近的链接
|
||
// 创建链接的副本,用于最终结果
|
||
finalLinks := make([]model.Link, len(allLinks))
|
||
for i, linkInfo := range allLinks {
|
||
finalLinks[i] = linkInfo.link
|
||
}
|
||
|
||
// 对于每个密码,找到它前面最近的链接
|
||
for _, pwdInfo := range allPasswords {
|
||
// 找到密码前面最近的链接
|
||
var closestLinkIndex int = -1
|
||
minDistance := 1000000
|
||
|
||
for i, linkInfo := range allLinks {
|
||
// 只考虑密码前面的链接
|
||
if linkInfo.position < pwdInfo.position {
|
||
distance := pwdInfo.position - linkInfo.position
|
||
|
||
// 密码必须在链接后的200个字符内
|
||
if distance < 200 && distance < minDistance {
|
||
minDistance = distance
|
||
closestLinkIndex = i
|
||
}
|
||
}
|
||
}
|
||
|
||
// 如果找到了链接,并且该链接没有从URL中提取的密码
|
||
if closestLinkIndex != -1 && finalLinks[closestLinkIndex].Password == "" {
|
||
// 检查这个链接后面是否有其他链接
|
||
hasNextLink := false
|
||
for _, linkInfo := range allLinks {
|
||
// 如果有链接在当前链接和密码之间,说明当前链接不需要密码
|
||
if linkInfo.position > allLinks[closestLinkIndex].position &&
|
||
linkInfo.position < pwdInfo.position {
|
||
hasNextLink = true
|
||
break
|
||
}
|
||
}
|
||
|
||
// 只有当没有其他链接在当前链接和密码之间时,才将密码关联到链接
|
||
if !hasNextLink {
|
||
finalLinks[closestLinkIndex].Password = pwdInfo.password
|
||
}
|
||
}
|
||
}
|
||
|
||
return finalLinks
|
||
}
|
||
|
||
// extractURLFromText 从文本中提取URL
|
||
func extractURLFromText(text string) string {
|
||
// 查找URL的结束位置
|
||
endIdx := strings.IndexAny(text, " \t\n\r\"'<>")
|
||
if endIdx == -1 {
|
||
endIdx = len(text)
|
||
}
|
||
|
||
// 提取URL
|
||
url := text[:endIdx]
|
||
|
||
// 清理URL
|
||
url = strings.TrimPrefix(url, "http://")
|
||
url = strings.TrimPrefix(url, "https://")
|
||
url = strings.TrimPrefix(url, "www.")
|
||
|
||
return url
|
||
}
|
||
|
||
// extractPasswordFromURL 从URL中提取密码参数
|
||
func extractPasswordFromURL(url string) string {
|
||
// 检查URL是否包含密码参数
|
||
if strings.Contains(url, "?pwd=") {
|
||
parts := strings.Split(url, "?pwd=")
|
||
if len(parts) > 1 {
|
||
// 提取密码参数
|
||
pwd := parts[1]
|
||
// 如果密码后面还有其他参数,只取密码部分
|
||
if idx := strings.IndexAny(pwd, "&?"); idx != -1 {
|
||
pwd = pwd[:idx]
|
||
}
|
||
return pwd
|
||
}
|
||
}
|
||
return ""
|
||
}
|
||
|
||
// abs 返回整数的绝对值
|
||
func abs(n int) int {
|
||
if n < 0 {
|
||
return -n
|
||
}
|
||
return n
|
||
} |