scripts-analyze-navigation

#!/usr/bin/env python3 """

title: "Analyze Navigation" component_type: script version: "1.0.0" audience: contributor status: stable summary: "Analyze documentation navigation and cross-references." keywords: ['analysis', 'analyze', 'navigation'] tokens: ~500 created: 2025-12-22 updated: 2025-12-22 script_name: "analyze_navigation.py" language: python executable: true usage: "python3 scripts/analyze_navigation.py [options]" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: false

Analyze documentation navigation and cross-references."""

import argparse import os import re import sys from pathlib import Path from collections import defaultdict, Counter

def parse_args(): """Parse command line arguments""" parser = argparse.ArgumentParser( description='Analyze CODITECT documentation navigation and cross-references.', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=''' Examples: %(prog)s # Full analysis of docs/ directory %(prog)s --docs-dir ./docs # Specify custom docs directory %(prog)s --orphans-only # Show only orphaned documents %(prog)s --broken-only # Show only broken links %(prog)s --json # Output results as JSON

Analysis includes:

Total document count
Orphaned documents (not linked from anywhere)
Broken links (pointing to non-existent files)
Navigation entry points (README.md files)
Most-referenced documents
Documents with no outbound links ''' ) parser.add_argument('--docs-dir', type=str, default='docs', help='Directory to analyze (default: docs)') parser.add_argument('--orphans-only', action='store_true', help='Show only orphaned documents') parser.add_argument('--broken-only', action='store_true', help='Show only broken links') parser.add_argument('--json', action='store_true', help='Output results as JSON') parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output') return parser.parse_args()

def main(): args = parse_args() docs_dir = Path(args.docs_dir)

if not docs_dir.exists():
    print(f"Error: Directory not found: {docs_dir}", file=sys.stderr)
    sys.exit(1)

all_docs = list(docs_dir.rglob("*.md"))

print("=" * 80)
print("CODITECT CORE DOCUMENTATION NAVIGATION ANALYSIS")
print("=" * 80)

print(f"\nTotal markdown files: {len(all_docs)}")

# Extract all relative markdown links
link_pattern = re.compile(r'\[([^\]]+)\]\(([^)]+\.md(?:#[^)]*)?)\)')
doc_links = defaultdict(list)
all_linked_files = set()

for doc in all_docs:
    try:
        content = doc.read_text()
        matches = link_pattern.findall(content)
        for text, link in matches:
            # Only process relative links (not http/https)
            if not link.startswith('http'):
                doc_links[str(doc)].append(link)
                # Normalize the link path
                target = (doc.parent / link.split('#')[0]).resolve()
                all_linked_files.add(str(target))
    except Exception as e:
        pass

print(f"Documents with outbound links: {len(doc_links)}")
print(f"Unique files referenced: {len(all_linked_files)}")

# Find orphaned documents (not linked from anywhere)
all_doc_paths = {str(doc.resolve()) for doc in all_docs}
orphaned = all_doc_paths - all_linked_files

# Exclude READMEs and CLAUDE.md from orphan check (they're entry points)
orphaned_filtered = {p for p in orphaned if not (p.endswith('README.md') or p.endswith('CLAUDE.md'))}

# Handle --orphans-only
if args.orphans_only:
    print("\nOrphaned Documents:")
    for doc in sorted(orphaned_filtered):
        print(f"  {doc}")
    return

print("\n" + "=" * 80)
print("ORPHANED DOCUMENTS (not linked from anywhere)")
print("=" * 80)
print(f"\nTotal orphaned: {len(orphaned_filtered)}")

if orphaned_filtered:
    print("\nOrphaned documents by category:")
    by_category = defaultdict(list)
    for doc in sorted(orphaned_filtered):
        rel_path = Path(doc).relative_to(docs_dir.resolve())
        category = str(rel_path).split('/')[0] if '/' in str(rel_path) else 'root'
        by_category[category].append(str(rel_path))

    for category in sorted(by_category.keys()):
        print(f"\n{category}/ ({len(by_category[category])} files):")
        for doc in sorted(by_category[category])[:10]:
            print(f"  - {doc}")
        if len(by_category[category]) > 10:
            print(f"  ... and {len(by_category[category]) - 10} more")

# Check for broken links
print("\n" + "=" * 80)
print("BROKEN LINKS")
print("=" * 80)

broken_links = []
for doc in all_docs:
    try:
        content = doc.read_text()
        matches = link_pattern.findall(content)
        for text, link in matches:
            # Only process relative links
            if not link.startswith('http'):
                # Remove anchor
                target_path = link.split('#')[0]
                target = (doc.parent / target_path).resolve()
                if not target.exists():
                    rel_doc = doc.relative_to(docs_dir.resolve())
                    broken_links.append((str(rel_doc), link))
    except Exception as e:
        pass

# Handle --broken-only
if args.broken_only:
    print(f"\nBroken links: {len(broken_links)}")
    for doc, link in broken_links:
        print(f"  {doc} -> {link}")
    return

print(f"\nBroken links found: {len(broken_links)}")
if broken_links:
    print("\nBroken link examples (first 20):")
    for doc, link in broken_links[:20]:
        print(f"  {doc}")
        print(f"    -> {link}")
    if len(broken_links) > 20:
        print(f"\n  ... and {len(broken_links) - 20} more")
else:
    print("\nNo broken links detected!")

# Analyze navigation entry points
print("\n" + "=" * 80)
print("NAVIGATION ENTRY POINTS")
print("=" * 80)

readme_files = [d for d in all_docs if d.name == 'README.md']
print(f"\nREADME.md files: {len(readme_files)}")
docs_resolved = docs_dir.resolve()
for readme in sorted(readme_files):
    try:
        readme_resolved = readme.resolve()
        if str(readme_resolved).startswith(str(docs_resolved)):
            rel_path = readme_resolved.relative_to(docs_resolved)
        else:
            rel_path = readme
        content = readme.read_text()
        links = link_pattern.findall(content)
        print(f"  - {rel_path} ({len(links)} internal links)")
    except Exception as e:
        print(f"  - {readme} (error: {e})")

# Analyze most-linked documents
print("\n" + "=" * 80)
print("MOST-REFERENCED DOCUMENTS (Top 15)")
print("=" * 80)

link_counts = Counter()
for links in doc_links.values():
    for link in links:
        # Normalize link
        link_counts[link.split('#')[0]] += 1

print("\nTop referenced documents:")
for link, count in link_counts.most_common(15):
    print(f"  {count:3d} refs: {link}")

# Find docs with no outbound links
print("\n" + "=" * 80)
print("DOCUMENTS WITH NO OUTBOUND LINKS")
print("=" * 80)

no_outbound = [str(doc.relative_to(docs_dir.resolve())) for doc in all_docs if str(doc) not in doc_links]
print(f"\nTotal documents with no outbound links: {len(no_outbound)}")
if len(no_outbound) <= 20:
    for doc in sorted(no_outbound):
        print(f"  - {doc}")
else:
    print("\nBy category:")
    by_cat = defaultdict(list)
    for doc in no_outbound:
        cat = doc.split('/')[0] if '/' in doc else 'root'
        by_cat[cat].append(doc)
    for cat in sorted(by_cat.keys()):
        print(f"  {cat}/ ({len(by_cat[cat])} files)")

print("\n" + "=" * 80)
print("SUMMARY")
print("=" * 80)
print(f"""

Total documents: {len(all_docs)} Documents with internal links: {len(doc_links)} ({100len(doc_links)/len(all_docs):.1f}%) Documents with no outbound links: {len(no_outbound)} ({100len(no_outbound)/len(all_docs):.1f}%) Orphaned documents: {len(orphaned_filtered)} ({100*len(orphaned_filtered)/len(all_docs):.1f}%) Broken links: {len(broken_links)} README.md navigation files: {len(readme_files)} """)

if name == 'main': main()

#!/usr/bin/env python3 """​

#!/usr/bin/env python3 """