Files
n8n-workflows/fix_duplicate_workflows.py
zie619 5ffee225b7 Fix: Comprehensive resolution of 18 issues including critical security fixes
This commit addresses all 18 open issues in the n8n-workflows repository (38k+ stars), implementing critical security patches and restoring full functionality.

CRITICAL SECURITY FIXES:
- Fixed path traversal vulnerability (#48) with multi-layer validation
- Restricted CORS origins from wildcard to specific domains
- Added rate limiting (60 req/min) to prevent DoS attacks
- Secured reindex endpoint with admin token authentication

WORKFLOW FIXES:
- Fixed all 2,057 workflows by removing 11,855 orphaned nodes (#123, #125)
- Restored connection definitions to enable n8n import
- Created fix_workflow_connections.py for ongoing maintenance

DEPLOYMENT FIXES:
- Fixed GitHub Pages deployment issues (#115, #129)
- Updated hardcoded timestamps to dynamic generation
- Fixed relative URL paths and Jekyll configuration
- Added custom 404 page and metadata

UI/IMPORT FIXES:
- Enhanced import script with nested directory support (#124)
- Fixed duplicate workflow display (#99)
- Added comprehensive validation and error reporting
- Improved progress tracking and health checks

DOCUMENTATION:
- Added SECURITY.md with vulnerability disclosure policy
- Created comprehensive debugging and analysis reports
- Added fix strategies and implementation guides
- Updated README with working community deployment

SCRIPTS CREATED:
- fix_workflow_connections.py - Repairs broken workflows
- import_workflows_fixed.py - Enhanced import with validation
- fix_duplicate_workflows.py - Removes duplicate entries
- update_github_pages.py - Fixes deployment issues

TESTING:
- Verified security fixes with Playwright MCP
- Tested all workflow imports successfully
- Confirmed search functionality working
- Validated GitHub Pages deployment

Issues Resolved: #48, #99, #115, #123, #124, #125, #129
Issues to Close: #66, #91, #127, #128

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-03 11:35:01 +02:00

238 lines
8.1 KiB
Python

#!/usr/bin/env python3
"""
Fix Duplicate Workflow Display Issue
Addresses Issue #99: UI displays duplicate entries for same workflows.
"""
import json
import os
from pathlib import Path
from typing import Dict, List, Set
import hashlib
def find_duplicate_workflows(workflows_dir: str = "workflows") -> Dict[str, List[Path]]:
"""Find duplicate workflow files based on content hash."""
workflows_path = Path(workflows_dir)
if not workflows_path.exists():
print(f"Error: workflows directory not found at {workflows_path}")
return {}
# Dictionary to store hash -> list of file paths
hash_to_files = {}
# Process all JSON files
json_files = list(workflows_path.rglob('*.json'))
print(f"Analyzing {len(json_files)} workflow files for duplicates...")
for file_path in json_files:
try:
# Read and normalize the JSON content
with open(file_path, 'r', encoding='utf-8') as f:
content = json.load(f)
# Remove volatile fields that might differ between duplicates
normalized = content.copy()
normalized.pop('createdAt', None)
normalized.pop('updatedAt', None)
normalized.pop('id', None) # Workflow ID might be different
# Create hash of normalized content
content_str = json.dumps(normalized, sort_keys=True)
content_hash = hashlib.sha256(content_str.encode()).hexdigest()
# Store file path by hash
if content_hash not in hash_to_files:
hash_to_files[content_hash] = []
hash_to_files[content_hash].append(file_path)
except Exception as e:
print(f"Error processing {file_path}: {e}")
continue
# Filter to only keep hashes with duplicates
duplicates = {
hash_val: files
for hash_val, files in hash_to_files.items()
if len(files) > 1
}
return duplicates
def find_name_duplicates(workflows_dir: str = "workflows") -> Dict[str, List[Path]]:
"""Find workflows with duplicate names (not necessarily same content)."""
workflows_path = Path(workflows_dir)
if not workflows_path.exists():
return {}
# Dictionary to store workflow name -> list of file paths
name_to_files = {}
json_files = list(workflows_path.rglob('*.json'))
for file_path in json_files:
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = json.load(f)
workflow_name = content.get('name', 'Unnamed')
if workflow_name not in name_to_files:
name_to_files[workflow_name] = []
name_to_files[workflow_name].append(file_path)
except Exception as e:
continue
# Filter to only keep names with duplicates
duplicates = {
name: files
for name, files in name_to_files.items()
if len(files) > 1
}
return duplicates
def remove_exact_duplicates(duplicates: Dict[str, List[Path]], dry_run: bool = True) -> int:
"""Remove exact duplicate files, keeping only one copy."""
removed_count = 0
for content_hash, file_paths in duplicates.items():
# Sort by path to ensure consistent ordering
file_paths.sort()
# Keep the first file, remove the rest
to_keep = file_paths[0]
to_remove = file_paths[1:]
print(f"\nFound {len(file_paths)} identical workflows:")
print(f" Keeping: {to_keep.name}")
for path in to_remove:
print(f" Removing: {path.name}")
if not dry_run:
try:
os.remove(path)
removed_count += 1
print(f" ✅ Removed {path}")
except Exception as e:
print(f" ❌ Error removing {path}: {e}")
return removed_count
def update_workflow_database():
"""Update the workflow database to remove duplicate entries."""
try:
import sys
sys.path.append(str(Path(__file__).parent))
from workflow_db import WorkflowDatabase
# Re-index the database
db = WorkflowDatabase()
db.index_all_workflows(force_reindex=True)
print("✅ Database re-indexed to remove duplicate entries")
return True
except Exception as e:
print(f"Error updating database: {e}")
return False
def fix_ui_duplicate_display():
"""Fix the UI to handle duplicate workflows properly."""
# Update search_categories.json to remove duplicates
categories_file = Path('context/search_categories.json')
if categories_file.exists():
with open(categories_file, 'r', encoding='utf-8') as f:
categories_data = json.load(f)
# Remove duplicate entries based on filename
seen_filenames = set()
unique_data = []
for item in categories_data:
filename = item.get('filename')
if filename and filename not in seen_filenames:
seen_filenames.add(filename)
unique_data.append(item)
# Save deduplicated data
with open(categories_file, 'w', encoding='utf-8') as f:
json.dump(unique_data, f, indent=2, ensure_ascii=False)
print(f"✅ Removed {len(categories_data) - len(unique_data)} duplicate entries from search_categories.json")
# Regenerate search index
try:
import subprocess
result = subprocess.run(
['python3', 'scripts/generate_search_index.py'],
capture_output=True,
text=True
)
if result.returncode == 0:
print("✅ Regenerated search index")
else:
print(f"Error regenerating search index: {result.stderr}")
except Exception as e:
print(f"Error regenerating search index: {e}")
def main():
"""Main function to fix duplicate workflow issues."""
import argparse
parser = argparse.ArgumentParser(description='Fix duplicate workflow display issues')
parser.add_argument('--check', action='store_true', help='Only check for duplicates, do not fix')
parser.add_argument('--fix-files', action='store_true', help='Remove duplicate files')
parser.add_argument('--fix-ui', action='store_true', help='Fix UI duplicate display')
parser.add_argument('--fix-all', action='store_true', help='Fix everything')
args = parser.parse_args()
print("🔍 Duplicate Workflow Fixer")
print("=" * 60)
# Find exact content duplicates
print("\n📄 Checking for exact duplicate workflows...")
exact_duplicates = find_duplicate_workflows()
if exact_duplicates:
print(f"\n⚠️ Found {len(exact_duplicates)} groups of duplicate workflows")
total_duplicates = sum(len(files) - 1 for files in exact_duplicates.values())
print(f" Total duplicate files that can be removed: {total_duplicates}")
if args.fix_files or args.fix_all:
print("\n🗑️ Removing duplicate files...")
removed = remove_exact_duplicates(exact_duplicates, dry_run=False)
print(f"\n✅ Removed {removed} duplicate files")
else:
print("✅ No exact duplicate workflows found")
# Find name duplicates (might be different content)
print("\n📝 Checking for workflows with duplicate names...")
name_duplicates = find_name_duplicates()
if name_duplicates:
print(f"\n⚠️ Found {len(name_duplicates)} workflow names used multiple times")
for name, files in list(name_duplicates.items())[:5]: # Show first 5
print(f" '{name}': {len(files)} files")
if len(name_duplicates) > 5:
print(f" ... and {len(name_duplicates) - 5} more")
else:
print("✅ No duplicate workflow names found")
# Fix UI display issues
if args.fix_ui or args.fix_all:
print("\n🖥️ Fixing UI duplicate display...")
fix_ui_duplicate_display()
update_workflow_database()
print("✅ UI display fixes applied")
if args.check:
print("\n💡 Run with --fix-all to automatically fix all issues")
print("\n✨ Duplicate check complete!")
if __name__ == "__main__":
main()