fix: 修复违禁词正常显示的问题

fix: 修复违禁词正常显示的问题
This commit is contained in:
ctwj
2025-08-22 09:32:12 +08:00
committed by GitHub
12 changed files with 460 additions and 52 deletions

View File

@@ -37,6 +37,9 @@ type ResourceResponse struct {
DescriptionHighlight string `json:"description_highlight,omitempty"`
CategoryHighlight string `json:"category_highlight,omitempty"`
TagsHighlight []string `json:"tags_highlight,omitempty"`
// 违禁词相关字段
HasForbiddenWords bool `json:"has_forbidden_words"`
ForbiddenWords []string `json:"forbidden_words"`
}
// CategoryResponse 分类响应

View File

@@ -41,3 +41,23 @@ type Resource struct {
func (Resource) TableName() string {
return "resources"
}
// GetTitle 获取资源标题实现utils.Resource接口
func (r *Resource) GetTitle() string {
return r.Title
}
// GetDescription 获取资源描述实现utils.Resource接口
func (r *Resource) GetDescription() string {
return r.Description
}
// SetTitle 设置资源标题实现utils.Resource接口
func (r *Resource) SetTitle(title string) {
r.Title = title
}
// SetDescription 设置资源描述实现utils.Resource接口
func (r *Resource) SetDescription(description string) {
r.Description = description
}

View File

@@ -281,42 +281,45 @@ func (h *PublicAPIHandler) SearchResources(c *gin.Context) {
}
}
// 过滤违禁词
filteredResources, foundForbiddenWords := h.filterForbiddenWords(resources)
// 获取违禁词配置(只获取一次)
cleanWords, err := utils.GetForbiddenWordsFromConfig(func() (string, error) {
return repoManager.SystemConfigRepository.GetConfigValue(entity.ConfigKeyForbiddenWords)
})
if err != nil {
utils.Error("获取违禁词配置失败: %v", err)
cleanWords = []string{} // 如果获取失败,使用空列表
}
// 计算过滤后的总数
filteredTotal := len(filteredResources)
// 转换为响应格式
// 转换为响应格式并添加违禁词标记
var resourceResponses []gin.H
for _, resource := range filteredResources {
resourceResponses = append(resourceResponses, gin.H{
"id": resource.ID,
"title": resource.Title,
"url": resource.URL,
"description": resource.Description,
"view_count": resource.ViewCount,
"created_at": resource.CreatedAt.Format("2006-01-02 15:04:05"),
"updated_at": resource.UpdatedAt.Format("2006-01-02 15:04:05"),
})
for i, processedResource := range resources {
originalResource := resources[i]
forbiddenInfo := utils.CheckResourceForbiddenWords(originalResource.Title, originalResource.Description, cleanWords)
resourceResponse := gin.H{
"id": processedResource.ID,
"title": forbiddenInfo.ProcessedTitle, // 使用处理后的标题
"url": processedResource.URL,
"description": forbiddenInfo.ProcessedDesc, // 使用处理后的描述
"view_count": processedResource.ViewCount,
"created_at": processedResource.CreatedAt.Format("2006-01-02 15:04:05"),
"updated_at": processedResource.UpdatedAt.Format("2006-01-02 15:04:05"),
}
// 添加违禁词标记
resourceResponse["has_forbidden_words"] = forbiddenInfo.HasForbiddenWords
resourceResponse["forbidden_words"] = forbiddenInfo.ForbiddenWords
resourceResponses = append(resourceResponses, resourceResponse)
}
// 构建响应数据
responseData := gin.H{
"data": resourceResponses,
"total": filteredTotal,
"total": total,
"page": page,
"page_size": pageSize,
}
// 如果存在违禁词过滤,添加提醒字段
if len(foundForbiddenWords) > 0 {
responseData["forbidden_words_filtered"] = true
responseData["filtered_forbidden_words"] = foundForbiddenWords
responseData["original_total"] = total
responseData["filtered_count"] = total - int64(filteredTotal)
}
SuccessResponse(c, responseData)
}

View File

@@ -64,9 +64,17 @@ func GetResources(c *gin.Context) {
params["pan_name"] = panName
}
// 获取违禁词配置(只获取一次)
cleanWords, err := utils.GetForbiddenWordsFromConfig(func() (string, error) {
return repoManager.SystemConfigRepository.GetConfigValue(entity.ConfigKeyForbiddenWords)
})
if err != nil {
utils.Error("获取违禁词配置失败: %v", err)
cleanWords = []string{} // 如果获取失败,使用空列表
}
var resources []entity.Resource
var total int64
var err error
// 如果有搜索关键词且启用了Meilisearch优先使用Meilisearch搜索
if search := c.Query("search"); search != "" && meilisearchManager != nil && meilisearchManager.IsEnabled() {
@@ -84,10 +92,25 @@ func GetResources(c *gin.Context) {
if service != nil {
docs, docTotal, err := service.Search(search, filters, page, pageSize)
if err == nil {
// 将Meilisearch文档转换为ResourceResponse包含高亮信息
// 将Meilisearch文档转换为ResourceResponse包含高亮信息并处理违禁词
var resourceResponses []dto.ResourceResponse
for _, doc := range docs {
resourceResponse := converter.ToResourceResponseFromMeilisearch(doc)
// 处理违禁词Meilisearch场景需要处理高亮标记
if len(cleanWords) > 0 {
forbiddenInfo := utils.CheckResourceForbiddenWords(resourceResponse.Title, resourceResponse.Description, cleanWords)
if forbiddenInfo.HasForbiddenWords {
resourceResponse.Title = forbiddenInfo.ProcessedTitle
resourceResponse.Description = forbiddenInfo.ProcessedDesc
resourceResponse.TitleHighlight = forbiddenInfo.ProcessedTitle
resourceResponse.DescriptionHighlight = forbiddenInfo.ProcessedDesc
}
resourceResponse.HasForbiddenWords = forbiddenInfo.HasForbiddenWords
resourceResponse.ForbiddenWords = forbiddenInfo.ForbiddenWords
}
resourceResponses = append(resourceResponses, resourceResponse)
}
@@ -116,12 +139,48 @@ func GetResources(c *gin.Context) {
return
}
SuccessResponse(c, gin.H{
"data": converter.ToResourceResponseList(resources),
// 处理违禁词替换和标记
var processedResources []entity.Resource
if len(cleanWords) > 0 {
processedResources = utils.ProcessResourcesForbiddenWords(resources, cleanWords)
} else {
processedResources = resources
}
// 转换为响应格式并添加违禁词标记
var resourceResponses []gin.H
for i, processedResource := range processedResources {
// 使用原始资源进行检查违禁词(数据库搜索场景,使用普通处理)
originalResource := resources[i]
forbiddenInfo := utils.CheckResourceForbiddenWords(originalResource.Title, originalResource.Description, cleanWords)
resourceResponse := gin.H{
"id": processedResource.ID,
"title": forbiddenInfo.ProcessedTitle, // 使用处理后的标题
"url": processedResource.URL,
"description": forbiddenInfo.ProcessedDesc, // 使用处理后的描述
"pan_id": processedResource.PanID,
"view_count": processedResource.ViewCount,
"created_at": processedResource.CreatedAt.Format("2006-01-02 15:04:05"),
"updated_at": processedResource.UpdatedAt.Format("2006-01-02 15:04:05"),
}
// 添加违禁词标记
resourceResponse["has_forbidden_words"] = forbiddenInfo.HasForbiddenWords
resourceResponse["forbidden_words"] = forbiddenInfo.ForbiddenWords
resourceResponses = append(resourceResponses, resourceResponse)
}
// 构建响应数据
responseData := gin.H{
"data": resourceResponses,
"total": total,
"page": page,
"page_size": pageSize,
})
}
SuccessResponse(c, responseData)
}
// GetResourceByID 根据ID获取资源

View File

@@ -187,28 +187,28 @@ func (r *ReadyResourceScheduler) convertReadyResourceToResource(readyResource en
}
// 检查违禁词
forbiddenWords, err := r.systemConfigRepo.GetConfigValue(entity.ConfigKeyForbiddenWords)
if err == nil && forbiddenWords != "" {
words := strings.Split(forbiddenWords, ",")
var matchedWords []string
title := strings.ToLower(resource.Title)
description := strings.ToLower(resource.Description)
// forbiddenWords, err := r.systemConfigRepo.GetConfigValue(entity.ConfigKeyForbiddenWords)
// if err == nil && forbiddenWords != "" {
// words := strings.Split(forbiddenWords, ",")
// var matchedWords []string
// title := strings.ToLower(resource.Title)
// description := strings.ToLower(resource.Description)
for _, word := range words {
word = strings.TrimSpace(word)
if word != "" {
wordLower := strings.ToLower(word)
if strings.Contains(title, wordLower) || strings.Contains(description, wordLower) {
matchedWords = append(matchedWords, word)
}
}
}
// for _, word := range words {
// word = strings.TrimSpace(word)
// if word != "" {
// wordLower := strings.ToLower(word)
// if strings.Contains(title, wordLower) || strings.Contains(description, wordLower) {
// matchedWords = append(matchedWords, word)
// }
// }
// }
if len(matchedWords) > 0 {
utils.Warn(fmt.Sprintf("资源包含违禁词: %s, 违禁词: %s", resource.Title, strings.Join(matchedWords, ", ")))
return fmt.Errorf("存在违禁词: %s", strings.Join(matchedWords, ", "))
}
}
// if len(matchedWords) > 0 {
// utils.Warn(fmt.Sprintf("资源包含违禁词: %s, 违禁词: %s", resource.Title, strings.Join(matchedWords, ", ")))
// return fmt.Errorf("存在违禁词: %s", strings.Join(matchedWords, ", "))
// }
// }
// 不是夸克,直接保存
if serviceType != panutils.Quark {

287
utils/forbidden_words.go Normal file
View File

@@ -0,0 +1,287 @@
package utils
import (
"regexp"
"strings"
"github.com/ctwj/urldb/db/entity"
)
// ForbiddenWordsProcessor 违禁词处理器
type ForbiddenWordsProcessor struct{}
// NewForbiddenWordsProcessor 创建违禁词处理器实例
func NewForbiddenWordsProcessor() *ForbiddenWordsProcessor {
return &ForbiddenWordsProcessor{}
}
// CheckContainsForbiddenWords 检查字符串是否包含违禁词
// 参数:
// - text: 要检查的文本
// - forbiddenWords: 违禁词列表
//
// 返回:
// - bool: 是否包含违禁词
// - []string: 匹配到的违禁词列表
func (p *ForbiddenWordsProcessor) CheckContainsForbiddenWords(text string, forbiddenWords []string) (bool, []string) {
if len(forbiddenWords) == 0 {
return false, nil
}
var matchedWords []string
textLower := strings.ToLower(text)
for _, word := range forbiddenWords {
wordLower := strings.ToLower(word)
if strings.Contains(textLower, wordLower) {
matchedWords = append(matchedWords, word)
}
}
return len(matchedWords) > 0, matchedWords
}
// ReplaceForbiddenWords 替换字符串中的违禁词为 *
// 参数:
// - text: 要处理的文本
// - forbiddenWords: 违禁词列表
//
// 返回:
// - string: 替换后的文本
func (p *ForbiddenWordsProcessor) ReplaceForbiddenWords(text string, forbiddenWords []string) string {
if len(forbiddenWords) == 0 {
return text
}
result := text
// 按长度降序排序,避免短词替换后影响长词的匹配
sortedWords := make([]string, len(forbiddenWords))
copy(sortedWords, forbiddenWords)
// 简单的长度排序(这里可以优化为更复杂的排序)
for i := 0; i < len(sortedWords)-1; i++ {
for j := i + 1; j < len(sortedWords); j++ {
if len(sortedWords[i]) < len(sortedWords[j]) {
sortedWords[i], sortedWords[j] = sortedWords[j], sortedWords[i]
}
}
}
for _, word := range sortedWords {
// 使用正则表达式进行不区分大小写的替换
// 对于中文,不使用单词边界,直接替换
re := regexp.MustCompile(`(?i)` + regexp.QuoteMeta(word))
// 使用字符长度而不是字节长度
charCount := len([]rune(word))
result = re.ReplaceAllString(result, strings.Repeat("*", charCount))
}
return result
}
// ReplaceForbiddenWordsWithHighlight 替换字符串中的违禁词为 *(处理高亮标记)
// 参数:
// - text: 要处理的文本(可能包含高亮标记)
// - forbiddenWords: 违禁词列表
//
// 返回:
// - string: 替换后的文本
func (p *ForbiddenWordsProcessor) ReplaceForbiddenWordsWithHighlight(text string, forbiddenWords []string) string {
if len(forbiddenWords) == 0 {
return text
}
// 1. 先移除所有高亮标记,获取纯文本
cleanText := regexp.MustCompile(`<mark>(.*?)</mark>`).ReplaceAllString(text, "$1")
// 2. 检查纯文本中是否包含违禁词
hasForbidden := false
for _, word := range forbiddenWords {
re := regexp.MustCompile(`(?i)` + regexp.QuoteMeta(word))
if re.MatchString(cleanText) {
hasForbidden = true
break
}
}
// 3. 如果包含违禁词,则替换非高亮文本
if hasForbidden {
return p.ReplaceForbiddenWords(text, forbiddenWords)
}
// 4. 如果不包含违禁词,直接返回原文本
return text
}
// ProcessForbiddenWords 处理违禁词:检查并替换
// 参数:
// - text: 要处理的文本
// - forbiddenWords: 违禁词列表
//
// 返回:
// - bool: 是否包含违禁词
// - []string: 匹配到的违禁词列表
// - string: 替换后的文本
func (p *ForbiddenWordsProcessor) ProcessForbiddenWords(text string, forbiddenWords []string) (bool, []string, string) {
contains, matchedWords := p.CheckContainsForbiddenWords(text, forbiddenWords)
replacedText := p.ReplaceForbiddenWords(text, forbiddenWords)
return contains, matchedWords, replacedText
}
// ParseForbiddenWordsConfig 解析违禁词配置字符串
// 参数:
// - config: 违禁词配置字符串,多个词用逗号分隔
//
// 返回:
// - []string: 处理后的违禁词列表
func (p *ForbiddenWordsProcessor) ParseForbiddenWordsConfig(config string) []string {
if config == "" {
return nil
}
words := strings.Split(config, ",")
var cleanWords []string
for _, word := range words {
word = strings.TrimSpace(word)
if word != "" {
cleanWords = append(cleanWords, word)
}
}
return cleanWords
}
// 全局实例,方便直接调用
var DefaultForbiddenWordsProcessor = NewForbiddenWordsProcessor()
// 便捷函数,直接调用全局实例
// CheckContainsForbiddenWords 检查字符串是否包含违禁词(便捷函数)
func CheckContainsForbiddenWords(text string, forbiddenWords []string) (bool, []string) {
return DefaultForbiddenWordsProcessor.CheckContainsForbiddenWords(text, forbiddenWords)
}
// ReplaceForbiddenWords 替换字符串中的违禁词为 *(便捷函数)
func ReplaceForbiddenWords(text string, forbiddenWords []string) string {
return DefaultForbiddenWordsProcessor.ReplaceForbiddenWords(text, forbiddenWords)
}
// ReplaceForbiddenWordsWithHighlight 替换字符串中的违禁词为 *(处理高亮标记,便捷函数)
func ReplaceForbiddenWordsWithHighlight(text string, forbiddenWords []string) string {
return DefaultForbiddenWordsProcessor.ReplaceForbiddenWordsWithHighlight(text, forbiddenWords)
}
// ProcessForbiddenWords 处理违禁词:检查并替换(便捷函数)
func ProcessForbiddenWords(text string, forbiddenWords []string) (bool, []string, string) {
return DefaultForbiddenWordsProcessor.ProcessForbiddenWords(text, forbiddenWords)
}
// ParseForbiddenWordsConfig 解析违禁词配置字符串(便捷函数)
func ParseForbiddenWordsConfig(config string) []string {
return DefaultForbiddenWordsProcessor.ParseForbiddenWordsConfig(config)
}
// RemoveDuplicates 去除字符串切片中的重复项
func RemoveDuplicates(slice []string) []string {
keys := make(map[string]bool)
var result []string
for _, item := range slice {
if _, value := keys[item]; !value {
keys[item] = true
result = append(result, item)
}
}
return result
}
// ResourceForbiddenInfo 资源违禁词信息
type ResourceForbiddenInfo struct {
HasForbiddenWords bool `json:"has_forbidden_words"`
ForbiddenWords []string `json:"forbidden_words"`
ProcessedTitle string `json:"-"` // 不序列化,仅内部使用
ProcessedDesc string `json:"-"` // 不序列化,仅内部使用
}
// CheckResourceForbiddenWords 检查资源是否包含违禁词(检查标题和描述)
// 参数:
// - title: 资源标题
// - description: 资源描述
// - forbiddenWords: 违禁词列表
//
// 返回:
// - ResourceForbiddenInfo: 包含检查结果和处理后的文本
func CheckResourceForbiddenWords(title, description string, forbiddenWords []string) ResourceForbiddenInfo {
if len(forbiddenWords) == 0 {
return ResourceForbiddenInfo{
HasForbiddenWords: false,
ForbiddenWords: []string{},
ProcessedTitle: title,
ProcessedDesc: description,
}
}
// 分别检查标题和描述
titleHasForbidden, titleMatchedWords := CheckContainsForbiddenWords(title, forbiddenWords)
descHasForbidden, descMatchedWords := CheckContainsForbiddenWords(description, forbiddenWords)
// 合并结果
hasForbiddenWords := titleHasForbidden || descHasForbidden
var matchedWords []string
if titleHasForbidden {
matchedWords = append(matchedWords, titleMatchedWords...)
}
if descHasForbidden {
matchedWords = append(matchedWords, descMatchedWords...)
}
// 去重
matchedWords = RemoveDuplicates(matchedWords)
// 处理文本(替换违禁词)
processedTitle := ReplaceForbiddenWords(title, forbiddenWords)
processedDesc := ReplaceForbiddenWords(description, forbiddenWords)
return ResourceForbiddenInfo{
HasForbiddenWords: hasForbiddenWords,
ForbiddenWords: matchedWords,
ProcessedTitle: processedTitle,
ProcessedDesc: processedDesc,
}
}
// GetForbiddenWordsFromConfig 从系统配置获取违禁词列表
// 参数:
// - getConfigFunc: 获取配置的函数
//
// 返回:
// - []string: 解析后的违禁词列表
// - error: 获取配置时的错误
func GetForbiddenWordsFromConfig(getConfigFunc func() (string, error)) ([]string, error) {
forbiddenWords, err := getConfigFunc()
if err != nil {
return nil, err
}
return ParseForbiddenWordsConfig(forbiddenWords), nil
}
// ProcessResourcesForbiddenWords 批量处理资源的违禁词
// 参数:
// - resources: 资源切片
// - forbiddenWords: 违禁词列表
//
// 返回:
// - 处理后的资源切片
func ProcessResourcesForbiddenWords(resources []entity.Resource, forbiddenWords []string) []entity.Resource {
if len(forbiddenWords) == 0 {
return resources
}
for i := range resources {
// 处理标题中的违禁词
resources[i].Title = ReplaceForbiddenWords(resources[i].Title, forbiddenWords)
// 处理描述中的违禁词
resources[i].Description = ReplaceForbiddenWords(resources[i].Description, forbiddenWords)
}
return resources
}

View File

@@ -9,6 +9,21 @@
</div>
</div>
<!-- 违禁词禁止访问状态 -->
<div v-else-if="forbidden" class="space-y-4">
<div class="flex flex-col items-center justify-center py-4">
<!-- 使用SVG图标 -->
<div class="mb-6">
<img src="/assets/svg/forbidden.svg" alt="禁止访问" class="w-48 h-48" />
</div>
<h3 class="text-xl font-bold text-red-600 dark:text-red-400 mb-2">禁止访问</h3>
<p class="text-gray-600 dark:text-gray-400 mb-4">该资源包含违禁内容无法访问</p>
<n-button @click="closeModal" class="bg-red-500 hover:bg-red-600 text-white">
我知道了
</n-button>
</div>
</div>
<!-- 错误状态 -->
<div v-else-if="error" class="space-y-4">
<n-alert type="error" :show-icon="false">
@@ -150,6 +165,8 @@ interface Props {
platform?: string
message?: string
error?: string
forbidden?: boolean
forbidden_words?: string[]
}
interface Emits {

View File

@@ -201,6 +201,8 @@
:platform="selectedResource?.platform"
:message="selectedResource?.message"
:error="selectedResource?.error"
:forbidden="selectedResource?.forbidden"
:forbidden_words="selectedResource?.forbidden_words"
@close="showLinkModal = false"
/>
@@ -419,6 +421,18 @@ const getPlatformIcon = (panId: string | number) => {
// 切换链接显示
const toggleLink = async (resource: any) => {
// 如果包含违禁词,直接显示禁止访问,不发送请求
if (resource.has_forbidden_words) {
selectedResource.value = {
...resource,
forbidden: true,
error: '该资源包含违禁内容,无法访问',
forbidden_words: resource.forbidden_words || []
}
showLinkModal.value = true
return
}
// 显示加载状态
selectedResource.value = { ...resource, loading: true }
showLinkModal.value = true
@@ -438,9 +452,10 @@ const toggleLink = async (resource: any) => {
platform: linkData.platform,
message: linkData.message
}
} catch (error) {
} catch (error: any) {
console.error('获取资源链接失败:', error)
// 出错时使用原始资源信息
// 其他错误
selectedResource.value = {
...resource,
loading: false,

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 31 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 18 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 34 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 22 KiB