feat: Add GitHub Pages public search interface and enhanced documentation system

## 🌐 GitHub Pages Public Search Interface
- Complete client-side search application solving Issue #84
- Responsive HTML/CSS/JavaScript with mobile optimization
- Real-time search across 2,057+ workflows with instant results
- Category filtering across 15 workflow categories
- Dark/light theme support with system preference detection
- Direct workflow JSON download functionality

## 🤖 GitHub Actions Automation
- deploy-pages.yml: Automated deployment to GitHub Pages
- update-readme.yml: Weekly automated README statistics updates
- Comprehensive workflow indexing and category generation

## 🔍 Enhanced Search & Categorization
- Static search index generation for GitHub Pages
- Developer-chosen category prioritization system
- CalcsLive custom node integration and categorization
- Enhanced workflow database with better custom node detection
- Fixed README corruption with live database statistics

## 📚 Documentation & Infrastructure
- Comprehensive CHANGELOG.md with proper versioning
- Enhanced README with accurate statistics and public interface links
- Professional documentation solving repository infrastructure needs

## Technical Improvements
- Fixed Unicode encoding issues in Python scripts
- Enhanced CalcsLive detection with false positive prevention
- Improved JSON description preservation and indexing
- Mobile-optimized responsive design for all devices

🤖 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
e3d
2025-09-29 21:54:12 -07:00
parent ebcdcc4734
commit 56789e895e
16 changed files with 45032 additions and 1325 deletions

View File

@@ -0,0 +1,263 @@
#!/usr/bin/env python3
"""
Generate Static Search Index for GitHub Pages
Creates a lightweight JSON index for client-side search functionality.
"""
import json
import os
import sys
from pathlib import Path
from typing import Dict, List, Any
# Add the parent directory to path for imports
sys.path.append(str(Path(__file__).parent.parent))
from workflow_db import WorkflowDatabase
def generate_static_search_index(db_path: str, output_dir: str) -> Dict[str, Any]:
"""Generate a static search index for client-side searching."""
# Initialize database
db = WorkflowDatabase(db_path)
# Get all workflows
workflows, total = db.search_workflows(limit=10000) # Get all workflows
# Get statistics
stats = db.get_stats()
# Get categories from service mapping
categories = db.get_service_categories()
# Load existing categories from create_categories.py system
existing_categories = load_existing_categories()
# Create simplified workflow data for search
search_workflows = []
for workflow in workflows:
# Create searchable text combining multiple fields
searchable_text = ' '.join([
workflow['name'],
workflow['description'],
workflow['filename'],
' '.join(workflow['integrations']),
' '.join(workflow['tags']) if workflow['tags'] else ''
]).lower()
# Use existing category from create_categories.py system, fallback to integration-based
category = get_workflow_category(workflow['filename'], existing_categories, workflow['integrations'], categories)
search_workflow = {
'id': workflow['filename'].replace('.json', ''),
'name': workflow['name'],
'description': workflow['description'],
'filename': workflow['filename'],
'active': workflow['active'],
'trigger_type': workflow['trigger_type'],
'complexity': workflow['complexity'],
'node_count': workflow['node_count'],
'integrations': workflow['integrations'],
'tags': workflow['tags'],
'category': category,
'searchable_text': searchable_text,
'download_url': f"https://raw.githubusercontent.com/Zie619/n8n-workflows/main/workflows/{extract_folder_from_filename(workflow['filename'])}/{workflow['filename']}"
}
search_workflows.append(search_workflow)
# Create comprehensive search index
search_index = {
'version': '1.0',
'generated_at': stats.get('last_indexed', ''),
'stats': {
'total_workflows': stats['total'],
'active_workflows': stats['active'],
'inactive_workflows': stats['inactive'],
'total_nodes': stats['total_nodes'],
'unique_integrations': stats['unique_integrations'],
'categories': len(get_category_list(categories)),
'triggers': stats['triggers'],
'complexity': stats['complexity']
},
'categories': get_category_list(categories),
'integrations': get_popular_integrations(workflows),
'workflows': search_workflows
}
return search_index
def load_existing_categories() -> Dict[str, str]:
"""Load existing categories from search_categories.json created by create_categories.py."""
try:
with open('context/search_categories.json', 'r', encoding='utf-8') as f:
categories_data = json.load(f)
# Convert to filename -> category mapping
category_mapping = {}
for item in categories_data:
if item.get('category'):
category_mapping[item['filename']] = item['category']
return category_mapping
except FileNotFoundError:
print("Warning: search_categories.json not found, using integration-based categorization")
return {}
def get_workflow_category(filename: str, existing_categories: Dict[str, str],
integrations: List[str], service_categories: Dict[str, List[str]]) -> str:
"""Get category for workflow, preferring existing assignment over integration-based."""
# First priority: Use existing category from create_categories.py system
if filename in existing_categories:
return existing_categories[filename]
# Fallback: Use integration-based categorization
return determine_category(integrations, service_categories)
def determine_category(integrations: List[str], categories: Dict[str, List[str]]) -> str:
"""Determine the category for a workflow based on its integrations."""
if not integrations:
return "Uncategorized"
# Check each category for matching integrations
for category, services in categories.items():
for integration in integrations:
if integration in services:
return format_category_name(category)
return "Uncategorized"
def format_category_name(category_key: str) -> str:
"""Format category key to display name."""
category_mapping = {
'messaging': 'Communication & Messaging',
'email': 'Communication & Messaging',
'cloud_storage': 'Cloud Storage & File Management',
'database': 'Data Processing & Analysis',
'project_management': 'Project Management',
'ai_ml': 'AI Agent Development',
'social_media': 'Social Media Management',
'ecommerce': 'E-commerce & Retail',
'analytics': 'Data Processing & Analysis',
'calendar_tasks': 'Project Management',
'forms': 'Data Processing & Analysis',
'development': 'Technical Infrastructure & DevOps'
}
return category_mapping.get(category_key, category_key.replace('_', ' ').title())
def get_category_list(categories: Dict[str, List[str]]) -> List[str]:
"""Get formatted list of all categories."""
formatted_categories = set()
for category_key in categories.keys():
formatted_categories.add(format_category_name(category_key))
# Add categories from the create_categories.py system
additional_categories = [
"Business Process Automation",
"Web Scraping & Data Extraction",
"Marketing & Advertising Automation",
"Creative Content & Video Automation",
"Creative Design Automation",
"CRM & Sales",
"Financial & Accounting"
]
for cat in additional_categories:
formatted_categories.add(cat)
return sorted(list(formatted_categories))
def get_popular_integrations(workflows: List[Dict]) -> List[Dict[str, Any]]:
"""Get list of popular integrations with counts."""
integration_counts = {}
for workflow in workflows:
for integration in workflow['integrations']:
integration_counts[integration] = integration_counts.get(integration, 0) + 1
# Sort by count and take top 50
sorted_integrations = sorted(
integration_counts.items(),
key=lambda x: x[1],
reverse=True
)[:50]
return [
{'name': name, 'count': count}
for name, count in sorted_integrations
]
def extract_folder_from_filename(filename: str) -> str:
"""Extract folder name from workflow filename."""
# Most workflows follow pattern: ID_Service_Purpose_Trigger.json
# Extract the service name as folder
parts = filename.replace('.json', '').split('_')
if len(parts) >= 2:
return parts[1].capitalize() # Second part is usually the service
return 'Misc'
def save_search_index(search_index: Dict[str, Any], output_dir: str):
"""Save the search index to multiple formats for different uses."""
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
# Save complete index
with open(os.path.join(output_dir, 'search-index.json'), 'w', encoding='utf-8') as f:
json.dump(search_index, f, indent=2, ensure_ascii=False)
# Save stats only (for quick loading)
with open(os.path.join(output_dir, 'stats.json'), 'w', encoding='utf-8') as f:
json.dump(search_index['stats'], f, indent=2, ensure_ascii=False)
# Save categories only
with open(os.path.join(output_dir, 'categories.json'), 'w', encoding='utf-8') as f:
json.dump(search_index['categories'], f, indent=2, ensure_ascii=False)
# Save integrations only
with open(os.path.join(output_dir, 'integrations.json'), 'w', encoding='utf-8') as f:
json.dump(search_index['integrations'], f, indent=2, ensure_ascii=False)
print(f"Search index generated successfully:")
print(f" {search_index['stats']['total_workflows']} workflows indexed")
print(f" {len(search_index['categories'])} categories")
print(f" {len(search_index['integrations'])} popular integrations")
print(f" Files saved to: {output_dir}")
def main():
"""Main function to generate search index."""
# Paths
db_path = "database/workflows.db"
output_dir = "docs/api"
# Check if database exists
if not os.path.exists(db_path):
print(f"Database not found: {db_path}")
print("Run 'python run.py --reindex' first to create the database")
sys.exit(1)
try:
print("Generating static search index...")
search_index = generate_static_search_index(db_path, output_dir)
save_search_index(search_index, output_dir)
print("Static search index ready for GitHub Pages!")
except Exception as e:
print(f"Error generating search index: {e}")
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,213 @@
#!/usr/bin/env python3
"""
Update README.md with current workflow statistics
Replaces hardcoded numbers with live data from the database.
"""
import json
import os
import re
import sys
from pathlib import Path
from datetime import datetime
# Add the parent directory to path for imports
sys.path.append(str(Path(__file__).parent.parent))
from workflow_db import WorkflowDatabase
def get_current_stats():
"""Get current workflow statistics from the database."""
db_path = "database/workflows.db"
if not os.path.exists(db_path):
print("Database not found. Run workflow indexing first.")
return None
db = WorkflowDatabase(db_path)
stats = db.get_stats()
# Get categories count
categories = db.get_service_categories()
return {
'total_workflows': stats['total'],
'active_workflows': stats['active'],
'inactive_workflows': stats['inactive'],
'total_nodes': stats['total_nodes'],
'unique_integrations': stats['unique_integrations'],
'categories_count': len(get_category_list(categories)),
'triggers': stats['triggers'],
'complexity': stats['complexity'],
'last_updated': datetime.now().strftime('%Y-%m-%d')
}
def get_category_list(categories):
"""Get formatted list of all categories (same logic as search index)."""
formatted_categories = set()
# Map technical categories to display names
category_mapping = {
'messaging': 'Communication & Messaging',
'email': 'Communication & Messaging',
'cloud_storage': 'Cloud Storage & File Management',
'database': 'Data Processing & Analysis',
'project_management': 'Project Management',
'ai_ml': 'AI Agent Development',
'social_media': 'Social Media Management',
'ecommerce': 'E-commerce & Retail',
'analytics': 'Data Processing & Analysis',
'calendar_tasks': 'Project Management',
'forms': 'Data Processing & Analysis',
'development': 'Technical Infrastructure & DevOps'
}
for category_key in categories.keys():
display_name = category_mapping.get(category_key, category_key.replace('_', ' ').title())
formatted_categories.add(display_name)
# Add categories from the create_categories.py system
additional_categories = [
"Business Process Automation",
"Web Scraping & Data Extraction",
"Marketing & Advertising Automation",
"Creative Content & Video Automation",
"Creative Design Automation",
"CRM & Sales",
"Financial & Accounting"
]
for cat in additional_categories:
formatted_categories.add(cat)
return sorted(list(formatted_categories))
def update_readme_stats(stats):
"""Update README.md with current statistics."""
readme_path = "README.md"
if not os.path.exists(readme_path):
print("README.md not found")
return False
with open(readme_path, 'r', encoding='utf-8') as f:
content = f.read()
# Define replacement patterns and their new values
replacements = [
# Main collection description
(r'A professionally organized collection of \*\*[\d,]+\s+n8n workflows\*\*',
f'A professionally organized collection of **{stats["total_workflows"]:,} n8n workflows**'),
# Total workflows in various contexts
(r'- \*\*[\d,]+\s+workflows\*\* with meaningful',
f'- **{stats["total_workflows"]:,} workflows** with meaningful'),
# Statistics section
(r'- \*\*Total Workflows\*\*: [\d,]+',
f'- **Total Workflows**: {stats["total_workflows"]:,}'),
(r'- \*\*Active Workflows\*\*: [\d,]+ \([\d.]+%',
f'- **Active Workflows**: {stats["active_workflows"]:,} ({(stats["active_workflows"]/stats["total_workflows"]*100):.1f}%'),
(r'- \*\*Total Nodes\*\*: [\d,]+ \(avg [\d.]+ nodes',
f'- **Total Nodes**: {stats["total_nodes"]:,} (avg {(stats["total_nodes"]/stats["total_workflows"]):.1f} nodes'),
(r'- \*\*Unique Integrations\*\*: [\d,]+ different',
f'- **Unique Integrations**: {stats["unique_integrations"]:,} different'),
# Update complexity/trigger distribution
(r'- \*\*Complex\*\*: [\d,]+ workflows \([\d.]+%\)',
f'- **Complex**: {stats["triggers"].get("Complex", 0):,} workflows ({(stats["triggers"].get("Complex", 0)/stats["total_workflows"]*100):.1f}%)'),
(r'- \*\*Webhook\*\*: [\d,]+ workflows \([\d.]+%\)',
f'- **Webhook**: {stats["triggers"].get("Webhook", 0):,} workflows ({(stats["triggers"].get("Webhook", 0)/stats["total_workflows"]*100):.1f}%)'),
(r'- \*\*Manual\*\*: [\d,]+ workflows \([\d.]+%\)',
f'- **Manual**: {stats["triggers"].get("Manual", 0):,} workflows ({(stats["triggers"].get("Manual", 0)/stats["total_workflows"]*100):.1f}%)'),
(r'- \*\*Scheduled\*\*: [\d,]+ workflows \([\d.]+%\)',
f'- **Scheduled**: {stats["triggers"].get("Scheduled", 0):,} workflows ({(stats["triggers"].get("Scheduled", 0)/stats["total_workflows"]*100):.1f}%)'),
# Update total in current collection stats
(r'\*\*Total Workflows\*\*: [\d,]+ automation',
f'**Total Workflows**: {stats["total_workflows"]:,} automation'),
(r'\*\*Active Workflows\*\*: [\d,]+ \([\d.]+% active',
f'**Active Workflows**: {stats["active_workflows"]:,} ({(stats["active_workflows"]/stats["total_workflows"]*100):.1f}% active'),
(r'\*\*Total Nodes\*\*: [\d,]+ \(avg [\d.]+ nodes',
f'**Total Nodes**: {stats["total_nodes"]:,} (avg {(stats["total_nodes"]/stats["total_workflows"]):.1f} nodes'),
(r'\*\*Unique Integrations\*\*: [\d,]+ different',
f'**Unique Integrations**: {stats["unique_integrations"]:,} different'),
# Categories count
(r'Our system automatically categorizes workflows into [\d]+ service categories',
f'Our system automatically categorizes workflows into {stats["categories_count"]} service categories'),
# Update any "2000+" references
(r'2000\+', f'{stats["total_workflows"]:,}+'),
(r'2,000\+', f'{stats["total_workflows"]:,}+'),
# Search across X workflows
(r'Search across [\d,]+ workflows', f'Search across {stats["total_workflows"]:,} workflows'),
# Instant search across X workflows
(r'Instant search across [\d,]+ workflows', f'Instant search across {stats["total_workflows"]:,} workflows'),
]
# Apply all replacements
updated_content = content
replacements_made = 0
for pattern, replacement in replacements:
old_content = updated_content
updated_content = re.sub(pattern, replacement, updated_content)
if updated_content != old_content:
replacements_made += 1
# Write back to file
with open(readme_path, 'w', encoding='utf-8') as f:
f.write(updated_content)
print(f"README.md updated with current statistics:")
print(f" - Total workflows: {stats['total_workflows']:,}")
print(f" - Active workflows: {stats['active_workflows']:,}")
print(f" - Total nodes: {stats['total_nodes']:,}")
print(f" - Unique integrations: {stats['unique_integrations']:,}")
print(f" - Categories: {stats['categories_count']}")
print(f" - Replacements made: {replacements_made}")
return True
def main():
"""Main function to update README statistics."""
try:
print("Getting current workflow statistics...")
stats = get_current_stats()
if not stats:
print("Failed to get statistics")
sys.exit(1)
print("Updating README.md...")
success = update_readme_stats(stats)
if success:
print("README.md successfully updated with latest statistics!")
else:
print("Failed to update README.md")
sys.exit(1)
except Exception as e:
print(f"Error updating README stats: {e}")
sys.exit(1)
if __name__ == "__main__":
main()