scripts-analyze-navigation
#!/usr/bin/env python3 """
title: "Analyze Navigation" component_type: script version: "1.0.0" audience: contributor status: stable summary: "Analyze documentation navigation and cross-references." keywords: ['analysis', 'analyze', 'navigation'] tokens: ~500 created: 2025-12-22 updated: 2025-12-22 script_name: "analyze_navigation.py" language: python executable: true usage: "python3 scripts/analyze_navigation.py [options]" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: false
Analyze documentation navigation and cross-references."""
import argparse import os import re import sys from pathlib import Path from collections import defaultdict, Counter
def parse_args(): """Parse command line arguments""" parser = argparse.ArgumentParser( description='Analyze CODITECT documentation navigation and cross-references.', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=''' Examples: %(prog)s # Full analysis of docs/ directory %(prog)s --docs-dir ./docs # Specify custom docs directory %(prog)s --orphans-only # Show only orphaned documents %(prog)s --broken-only # Show only broken links %(prog)s --json # Output results as JSON
Analysis includes:
- Total document count
- Orphaned documents (not linked from anywhere)
- Broken links (pointing to non-existent files)
- Navigation entry points (README.md files)
- Most-referenced documents
- Documents with no outbound links ''' ) parser.add_argument('--docs-dir', type=str, default='docs', help='Directory to analyze (default: docs)') parser.add_argument('--orphans-only', action='store_true', help='Show only orphaned documents') parser.add_argument('--broken-only', action='store_true', help='Show only broken links') parser.add_argument('--json', action='store_true', help='Output results as JSON') parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output') return parser.parse_args()
def main(): args = parse_args() docs_dir = Path(args.docs_dir)
if not docs_dir.exists():
print(f"Error: Directory not found: {docs_dir}", file=sys.stderr)
sys.exit(1)
all_docs = list(docs_dir.rglob("*.md"))
print("=" * 80)
print("CODITECT CORE DOCUMENTATION NAVIGATION ANALYSIS")
print("=" * 80)
print(f"\nTotal markdown files: {len(all_docs)}")
# Extract all relative markdown links
link_pattern = re.compile(r'\[([^\]]+)\]\(([^)]+\.md(?:#[^)]*)?)\)')
doc_links = defaultdict(list)
all_linked_files = set()
for doc in all_docs:
try:
content = doc.read_text()
matches = link_pattern.findall(content)
for text, link in matches:
# Only process relative links (not http/https)
if not link.startswith('http'):
doc_links[str(doc)].append(link)
# Normalize the link path
target = (doc.parent / link.split('#')[0]).resolve()
all_linked_files.add(str(target))
except Exception as e:
pass
print(f"Documents with outbound links: {len(doc_links)}")
print(f"Unique files referenced: {len(all_linked_files)}")
# Find orphaned documents (not linked from anywhere)
all_doc_paths = {str(doc.resolve()) for doc in all_docs}
orphaned = all_doc_paths - all_linked_files
# Exclude READMEs and CLAUDE.md from orphan check (they're entry points)
orphaned_filtered = {p for p in orphaned if not (p.endswith('README.md') or p.endswith('CLAUDE.md'))}
# Handle --orphans-only
if args.orphans_only:
print("\nOrphaned Documents:")
for doc in sorted(orphaned_filtered):
print(f" {doc}")
return
print("\n" + "=" * 80)
print("ORPHANED DOCUMENTS (not linked from anywhere)")
print("=" * 80)
print(f"\nTotal orphaned: {len(orphaned_filtered)}")
if orphaned_filtered:
print("\nOrphaned documents by category:")
by_category = defaultdict(list)
for doc in sorted(orphaned_filtered):
rel_path = Path(doc).relative_to(docs_dir.resolve())
category = str(rel_path).split('/')[0] if '/' in str(rel_path) else 'root'
by_category[category].append(str(rel_path))
for category in sorted(by_category.keys()):
print(f"\n{category}/ ({len(by_category[category])} files):")
for doc in sorted(by_category[category])[:10]:
print(f" - {doc}")
if len(by_category[category]) > 10:
print(f" ... and {len(by_category[category]) - 10} more")
# Check for broken links
print("\n" + "=" * 80)
print("BROKEN LINKS")
print("=" * 80)
broken_links = []
for doc in all_docs:
try:
content = doc.read_text()
matches = link_pattern.findall(content)
for text, link in matches:
# Only process relative links
if not link.startswith('http'):
# Remove anchor
target_path = link.split('#')[0]
target = (doc.parent / target_path).resolve()
if not target.exists():
rel_doc = doc.relative_to(docs_dir.resolve())
broken_links.append((str(rel_doc), link))
except Exception as e:
pass
# Handle --broken-only
if args.broken_only:
print(f"\nBroken links: {len(broken_links)}")
for doc, link in broken_links:
print(f" {doc} -> {link}")
return
print(f"\nBroken links found: {len(broken_links)}")
if broken_links:
print("\nBroken link examples (first 20):")
for doc, link in broken_links[:20]:
print(f" {doc}")
print(f" -> {link}")
if len(broken_links) > 20:
print(f"\n ... and {len(broken_links) - 20} more")
else:
print("\nNo broken links detected!")
# Analyze navigation entry points
print("\n" + "=" * 80)
print("NAVIGATION ENTRY POINTS")
print("=" * 80)
readme_files = [d for d in all_docs if d.name == 'README.md']
print(f"\nREADME.md files: {len(readme_files)}")
docs_resolved = docs_dir.resolve()
for readme in sorted(readme_files):
try:
readme_resolved = readme.resolve()
if str(readme_resolved).startswith(str(docs_resolved)):
rel_path = readme_resolved.relative_to(docs_resolved)
else:
rel_path = readme
content = readme.read_text()
links = link_pattern.findall(content)
print(f" - {rel_path} ({len(links)} internal links)")
except Exception as e:
print(f" - {readme} (error: {e})")
# Analyze most-linked documents
print("\n" + "=" * 80)
print("MOST-REFERENCED DOCUMENTS (Top 15)")
print("=" * 80)
link_counts = Counter()
for links in doc_links.values():
for link in links:
# Normalize link
link_counts[link.split('#')[0]] += 1
print("\nTop referenced documents:")
for link, count in link_counts.most_common(15):
print(f" {count:3d} refs: {link}")
# Find docs with no outbound links
print("\n" + "=" * 80)
print("DOCUMENTS WITH NO OUTBOUND LINKS")
print("=" * 80)
no_outbound = [str(doc.relative_to(docs_dir.resolve())) for doc in all_docs if str(doc) not in doc_links]
print(f"\nTotal documents with no outbound links: {len(no_outbound)}")
if len(no_outbound) <= 20:
for doc in sorted(no_outbound):
print(f" - {doc}")
else:
print("\nBy category:")
by_cat = defaultdict(list)
for doc in no_outbound:
cat = doc.split('/')[0] if '/' in doc else 'root'
by_cat[cat].append(doc)
for cat in sorted(by_cat.keys()):
print(f" {cat}/ ({len(by_cat[cat])} files)")
print("\n" + "=" * 80)
print("SUMMARY")
print("=" * 80)
print(f"""
Total documents: {len(all_docs)} Documents with internal links: {len(doc_links)} ({100len(doc_links)/len(all_docs):.1f}%) Documents with no outbound links: {len(no_outbound)} ({100len(no_outbound)/len(all_docs):.1f}%) Orphaned documents: {len(orphaned_filtered)} ({100*len(orphaned_filtered)/len(all_docs):.1f}%) Broken links: {len(broken_links)} README.md navigation files: {len(readme_files)} """)
if name == 'main': main()