feat: Add GitHub Pages public search interface and enhanced documentation system

## 🌐 GitHub Pages Public Search Interface - Complete client-side search application solving Issue #84 - Responsive HTML/CSS/JavaScript with mobile optimization - Real-time search across 2,057+ workflows with instant results - Category filtering across 15 workflow categories - Dark/light theme support with system preference detection - Direct workflow JSON download functionality ## 🤖 GitHub Actions Automation - deploy-pages.yml: Automated deployment to GitHub Pages - update-readme.yml: Weekly automated README statistics updates - Comprehensive workflow indexing and category generation ## 🔍 Enhanced Search & Categorization - Static search index generation for GitHub Pages - Developer-chosen category prioritization system - CalcsLive custom node integration and categorization - Enhanced workflow database with better custom node detection - Fixed README corruption with live database statistics ## 📚 Documentation & Infrastructure - Comprehensive CHANGELOG.md with proper versioning - Enhanced README with accurate statistics and public interface links - Professional documentation solving repository infrastructure needs ## Technical Improvements - Fixed Unicode encoding issues in Python scripts - Enhanced CalcsLive detection with false positive prevention - Improved JSON description preservation and indexing - Mobile-optimized responsive design for all devices 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-25 03:15:25 +08:00 · 2025-09-29 21:54:12 -07:00
parent ebcdcc4734
commit 56789e895e
16 changed files with 45032 additions and 1325 deletions
--- a/scripts/generate_search_index.py
+++ b/scripts/generate_search_index.py
@@ -0,0 +1,263 @@
+#!/usr/bin/env python3
+"""
+Generate Static Search Index for GitHub Pages
+Creates a lightweight JSON index for client-side search functionality.
+"""
+
+import json
+import os
+import sys
+from pathlib import Path
+from typing import Dict, List, Any
+
+# Add the parent directory to path for imports
+sys.path.append(str(Path(__file__).parent.parent))
+
+from workflow_db import WorkflowDatabase
+
+
+def generate_static_search_index(db_path: str, output_dir: str) -> Dict[str, Any]:
+    """Generate a static search index for client-side searching."""
+
+    # Initialize database
+    db = WorkflowDatabase(db_path)
+
+    # Get all workflows
+    workflows, total = db.search_workflows(limit=10000)  # Get all workflows
+
+    # Get statistics
+    stats = db.get_stats()
+
+    # Get categories from service mapping
+    categories = db.get_service_categories()
+
+    # Load existing categories from create_categories.py system
+    existing_categories = load_existing_categories()
+
+    # Create simplified workflow data for search
+    search_workflows = []
+    for workflow in workflows:
+        # Create searchable text combining multiple fields
+        searchable_text = ' '.join([
+            workflow['name'],
+            workflow['description'],
+            workflow['filename'],
+            ' '.join(workflow['integrations']),
+            ' '.join(workflow['tags']) if workflow['tags'] else ''
+        ]).lower()
+
+        # Use existing category from create_categories.py system, fallback to integration-based
+        category = get_workflow_category(workflow['filename'], existing_categories, workflow['integrations'], categories)
+
+        search_workflow = {
+            'id': workflow['filename'].replace('.json', ''),
+            'name': workflow['name'],
+            'description': workflow['description'],
+            'filename': workflow['filename'],
+            'active': workflow['active'],
+            'trigger_type': workflow['trigger_type'],
+            'complexity': workflow['complexity'],
+            'node_count': workflow['node_count'],
+            'integrations': workflow['integrations'],
+            'tags': workflow['tags'],
+            'category': category,
+            'searchable_text': searchable_text,
+            'download_url': f"https://raw.githubusercontent.com/Zie619/n8n-workflows/main/workflows/{extract_folder_from_filename(workflow['filename'])}/{workflow['filename']}"
+        }
+        search_workflows.append(search_workflow)
+
+    # Create comprehensive search index
+    search_index = {
+        'version': '1.0',
+        'generated_at': stats.get('last_indexed', ''),
+        'stats': {
+            'total_workflows': stats['total'],
+            'active_workflows': stats['active'],
+            'inactive_workflows': stats['inactive'],
+            'total_nodes': stats['total_nodes'],
+            'unique_integrations': stats['unique_integrations'],
+            'categories': len(get_category_list(categories)),
+            'triggers': stats['triggers'],
+            'complexity': stats['complexity']
+        },
+        'categories': get_category_list(categories),
+        'integrations': get_popular_integrations(workflows),
+        'workflows': search_workflows
+    }
+
+    return search_index
+
+
+def load_existing_categories() -> Dict[str, str]:
+    """Load existing categories from search_categories.json created by create_categories.py."""
+    try:
+        with open('context/search_categories.json', 'r', encoding='utf-8') as f:
+            categories_data = json.load(f)
+
+        # Convert to filename -> category mapping
+        category_mapping = {}
+        for item in categories_data:
+            if item.get('category'):
+                category_mapping[item['filename']] = item['category']
+
+        return category_mapping
+    except FileNotFoundError:
+        print("Warning: search_categories.json not found, using integration-based categorization")
+        return {}
+
+
+def get_workflow_category(filename: str, existing_categories: Dict[str, str],
+                         integrations: List[str], service_categories: Dict[str, List[str]]) -> str:
+    """Get category for workflow, preferring existing assignment over integration-based."""
+
+    # First priority: Use existing category from create_categories.py system
+    if filename in existing_categories:
+        return existing_categories[filename]
+
+    # Fallback: Use integration-based categorization
+    return determine_category(integrations, service_categories)
+
+
+def determine_category(integrations: List[str], categories: Dict[str, List[str]]) -> str:
+    """Determine the category for a workflow based on its integrations."""
+    if not integrations:
+        return "Uncategorized"
+
+    # Check each category for matching integrations
+    for category, services in categories.items():
+        for integration in integrations:
+            if integration in services:
+                return format_category_name(category)
+
+    return "Uncategorized"
+
+
+def format_category_name(category_key: str) -> str:
+    """Format category key to display name."""
+    category_mapping = {
+        'messaging': 'Communication & Messaging',
+        'email': 'Communication & Messaging',
+        'cloud_storage': 'Cloud Storage & File Management',
+        'database': 'Data Processing & Analysis',
+        'project_management': 'Project Management',
+        'ai_ml': 'AI Agent Development',
+        'social_media': 'Social Media Management',
+        'ecommerce': 'E-commerce & Retail',
+        'analytics': 'Data Processing & Analysis',
+        'calendar_tasks': 'Project Management',
+        'forms': 'Data Processing & Analysis',
+        'development': 'Technical Infrastructure & DevOps'
+    }
+    return category_mapping.get(category_key, category_key.replace('_', ' ').title())
+
+
+def get_category_list(categories: Dict[str, List[str]]) -> List[str]:
+    """Get formatted list of all categories."""
+    formatted_categories = set()
+    for category_key in categories.keys():
+        formatted_categories.add(format_category_name(category_key))
+
+    # Add categories from the create_categories.py system
+    additional_categories = [
+        "Business Process Automation",
+        "Web Scraping & Data Extraction",
+        "Marketing & Advertising Automation",
+        "Creative Content & Video Automation",
+        "Creative Design Automation",
+        "CRM & Sales",
+        "Financial & Accounting"
+    ]
+
+    for cat in additional_categories:
+        formatted_categories.add(cat)
+
+    return sorted(list(formatted_categories))
+
+
+def get_popular_integrations(workflows: List[Dict]) -> List[Dict[str, Any]]:
+    """Get list of popular integrations with counts."""
+    integration_counts = {}
+
+    for workflow in workflows:
+        for integration in workflow['integrations']:
+            integration_counts[integration] = integration_counts.get(integration, 0) + 1
+
+    # Sort by count and take top 50
+    sorted_integrations = sorted(
+        integration_counts.items(),
+        key=lambda x: x[1],
+        reverse=True
+    )[:50]
+
+    return [
+        {'name': name, 'count': count}
+        for name, count in sorted_integrations
+    ]
+
+
+def extract_folder_from_filename(filename: str) -> str:
+    """Extract folder name from workflow filename."""
+    # Most workflows follow pattern: ID_Service_Purpose_Trigger.json
+    # Extract the service name as folder
+    parts = filename.replace('.json', '').split('_')
+    if len(parts) >= 2:
+        return parts[1].capitalize()  # Second part is usually the service
+    return 'Misc'
+
+
+def save_search_index(search_index: Dict[str, Any], output_dir: str):
+    """Save the search index to multiple formats for different uses."""
+
+    # Ensure output directory exists
+    os.makedirs(output_dir, exist_ok=True)
+
+    # Save complete index
+    with open(os.path.join(output_dir, 'search-index.json'), 'w', encoding='utf-8') as f:
+        json.dump(search_index, f, indent=2, ensure_ascii=False)
+
+    # Save stats only (for quick loading)
+    with open(os.path.join(output_dir, 'stats.json'), 'w', encoding='utf-8') as f:
+        json.dump(search_index['stats'], f, indent=2, ensure_ascii=False)
+
+    # Save categories only
+    with open(os.path.join(output_dir, 'categories.json'), 'w', encoding='utf-8') as f:
+        json.dump(search_index['categories'], f, indent=2, ensure_ascii=False)
+
+    # Save integrations only
+    with open(os.path.join(output_dir, 'integrations.json'), 'w', encoding='utf-8') as f:
+        json.dump(search_index['integrations'], f, indent=2, ensure_ascii=False)
+
+    print(f"Search index generated successfully:")
+    print(f"   {search_index['stats']['total_workflows']} workflows indexed")
+    print(f"   {len(search_index['categories'])} categories")
+    print(f"   {len(search_index['integrations'])} popular integrations")
+    print(f"   Files saved to: {output_dir}")
+
+
+def main():
+    """Main function to generate search index."""
+
+    # Paths
+    db_path = "database/workflows.db"
+    output_dir = "docs/api"
+
+    # Check if database exists
+    if not os.path.exists(db_path):
+        print(f"Database not found: {db_path}")
+        print("Run 'python run.py --reindex' first to create the database")
+        sys.exit(1)
+
+    try:
+        print("Generating static search index...")
+        search_index = generate_static_search_index(db_path, output_dir)
+        save_search_index(search_index, output_dir)
+
+        print("Static search index ready for GitHub Pages!")
+
+    except Exception as e:
+        print(f"Error generating search index: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/update_readme_stats.py
+++ b/scripts/update_readme_stats.py
@@ -0,0 +1,213 @@
+#!/usr/bin/env python3
+"""
+Update README.md with current workflow statistics
+Replaces hardcoded numbers with live data from the database.
+"""
+
+import json
+import os
+import re
+import sys
+from pathlib import Path
+from datetime import datetime
+
+# Add the parent directory to path for imports
+sys.path.append(str(Path(__file__).parent.parent))
+
+from workflow_db import WorkflowDatabase
+
+
+def get_current_stats():
+    """Get current workflow statistics from the database."""
+    db_path = "database/workflows.db"
+
+    if not os.path.exists(db_path):
+        print("Database not found. Run workflow indexing first.")
+        return None
+
+    db = WorkflowDatabase(db_path)
+    stats = db.get_stats()
+
+    # Get categories count
+    categories = db.get_service_categories()
+
+    return {
+        'total_workflows': stats['total'],
+        'active_workflows': stats['active'],
+        'inactive_workflows': stats['inactive'],
+        'total_nodes': stats['total_nodes'],
+        'unique_integrations': stats['unique_integrations'],
+        'categories_count': len(get_category_list(categories)),
+        'triggers': stats['triggers'],
+        'complexity': stats['complexity'],
+        'last_updated': datetime.now().strftime('%Y-%m-%d')
+    }
+
+
+def get_category_list(categories):
+    """Get formatted list of all categories (same logic as search index)."""
+    formatted_categories = set()
+
+    # Map technical categories to display names
+    category_mapping = {
+        'messaging': 'Communication & Messaging',
+        'email': 'Communication & Messaging',
+        'cloud_storage': 'Cloud Storage & File Management',
+        'database': 'Data Processing & Analysis',
+        'project_management': 'Project Management',
+        'ai_ml': 'AI Agent Development',
+        'social_media': 'Social Media Management',
+        'ecommerce': 'E-commerce & Retail',
+        'analytics': 'Data Processing & Analysis',
+        'calendar_tasks': 'Project Management',
+        'forms': 'Data Processing & Analysis',
+        'development': 'Technical Infrastructure & DevOps'
+    }
+
+    for category_key in categories.keys():
+        display_name = category_mapping.get(category_key, category_key.replace('_', ' ').title())
+        formatted_categories.add(display_name)
+
+    # Add categories from the create_categories.py system
+    additional_categories = [
+        "Business Process Automation",
+        "Web Scraping & Data Extraction",
+        "Marketing & Advertising Automation",
+        "Creative Content & Video Automation",
+        "Creative Design Automation",
+        "CRM & Sales",
+        "Financial & Accounting"
+    ]
+
+    for cat in additional_categories:
+        formatted_categories.add(cat)
+
+    return sorted(list(formatted_categories))
+
+
+def update_readme_stats(stats):
+    """Update README.md with current statistics."""
+    readme_path = "README.md"
+
+    if not os.path.exists(readme_path):
+        print("README.md not found")
+        return False
+
+    with open(readme_path, 'r', encoding='utf-8') as f:
+        content = f.read()
+
+    # Define replacement patterns and their new values
+    replacements = [
+        # Main collection description
+        (r'A professionally organized collection of \*\*[\d,]+\s+n8n workflows\*\*',
+         f'A professionally organized collection of **{stats["total_workflows"]:,} n8n workflows**'),
+
+        # Total workflows in various contexts
+        (r'- \*\*[\d,]+\s+workflows\*\* with meaningful',
+         f'- **{stats["total_workflows"]:,} workflows** with meaningful'),
+
+        # Statistics section
+        (r'- \*\*Total Workflows\*\*: [\d,]+',
+         f'- **Total Workflows**: {stats["total_workflows"]:,}'),
+
+        (r'- \*\*Active Workflows\*\*: [\d,]+ \([\d.]+%',
+         f'- **Active Workflows**: {stats["active_workflows"]:,} ({(stats["active_workflows"]/stats["total_workflows"]*100):.1f}%'),
+
+        (r'- \*\*Total Nodes\*\*: [\d,]+ \(avg [\d.]+ nodes',
+         f'- **Total Nodes**: {stats["total_nodes"]:,} (avg {(stats["total_nodes"]/stats["total_workflows"]):.1f} nodes'),
+
+        (r'- \*\*Unique Integrations\*\*: [\d,]+ different',
+         f'- **Unique Integrations**: {stats["unique_integrations"]:,} different'),
+
+        # Update complexity/trigger distribution
+        (r'- \*\*Complex\*\*: [\d,]+ workflows \([\d.]+%\)',
+         f'- **Complex**: {stats["triggers"].get("Complex", 0):,} workflows ({(stats["triggers"].get("Complex", 0)/stats["total_workflows"]*100):.1f}%)'),
+
+        (r'- \*\*Webhook\*\*: [\d,]+ workflows \([\d.]+%\)',
+         f'- **Webhook**: {stats["triggers"].get("Webhook", 0):,} workflows ({(stats["triggers"].get("Webhook", 0)/stats["total_workflows"]*100):.1f}%)'),
+
+        (r'- \*\*Manual\*\*: [\d,]+ workflows \([\d.]+%\)',
+         f'- **Manual**: {stats["triggers"].get("Manual", 0):,} workflows ({(stats["triggers"].get("Manual", 0)/stats["total_workflows"]*100):.1f}%)'),
+
+        (r'- \*\*Scheduled\*\*: [\d,]+ workflows \([\d.]+%\)',
+         f'- **Scheduled**: {stats["triggers"].get("Scheduled", 0):,} workflows ({(stats["triggers"].get("Scheduled", 0)/stats["total_workflows"]*100):.1f}%)'),
+
+        # Update total in current collection stats
+        (r'\*\*Total Workflows\*\*: [\d,]+ automation',
+         f'**Total Workflows**: {stats["total_workflows"]:,} automation'),
+
+        (r'\*\*Active Workflows\*\*: [\d,]+ \([\d.]+% active',
+         f'**Active Workflows**: {stats["active_workflows"]:,} ({(stats["active_workflows"]/stats["total_workflows"]*100):.1f}% active'),
+
+        (r'\*\*Total Nodes\*\*: [\d,]+ \(avg [\d.]+ nodes',
+         f'**Total Nodes**: {stats["total_nodes"]:,} (avg {(stats["total_nodes"]/stats["total_workflows"]):.1f} nodes'),
+
+        (r'\*\*Unique Integrations\*\*: [\d,]+ different',
+         f'**Unique Integrations**: {stats["unique_integrations"]:,} different'),
+
+        # Categories count
+        (r'Our system automatically categorizes workflows into [\d]+ service categories',
+         f'Our system automatically categorizes workflows into {stats["categories_count"]} service categories'),
+
+        # Update any "2000+" references
+        (r'2000\+', f'{stats["total_workflows"]:,}+'),
+        (r'2,000\+', f'{stats["total_workflows"]:,}+'),
+
+        # Search across X workflows
+        (r'Search across [\d,]+ workflows', f'Search across {stats["total_workflows"]:,} workflows'),
+
+        # Instant search across X workflows
+        (r'Instant search across [\d,]+ workflows', f'Instant search across {stats["total_workflows"]:,} workflows'),
+    ]
+
+    # Apply all replacements
+    updated_content = content
+    replacements_made = 0
+
+    for pattern, replacement in replacements:
+        old_content = updated_content
+        updated_content = re.sub(pattern, replacement, updated_content)
+        if updated_content != old_content:
+            replacements_made += 1
+
+    # Write back to file
+    with open(readme_path, 'w', encoding='utf-8') as f:
+        f.write(updated_content)
+
+    print(f"README.md updated with current statistics:")
+    print(f"  - Total workflows: {stats['total_workflows']:,}")
+    print(f"  - Active workflows: {stats['active_workflows']:,}")
+    print(f"  - Total nodes: {stats['total_nodes']:,}")
+    print(f"  - Unique integrations: {stats['unique_integrations']:,}")
+    print(f"  - Categories: {stats['categories_count']}")
+    print(f"  - Replacements made: {replacements_made}")
+
+    return True
+
+
+def main():
+    """Main function to update README statistics."""
+    try:
+        print("Getting current workflow statistics...")
+        stats = get_current_stats()
+
+        if not stats:
+            print("Failed to get statistics")
+            sys.exit(1)
+
+        print("Updating README.md...")
+        success = update_readme_stats(stats)
+
+        if success:
+            print("README.md successfully updated with latest statistics!")
+        else:
+            print("Failed to update README.md")
+            sys.exit(1)
+
+    except Exception as e:
+        print(f"Error updating README stats: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()