Skip to main content

scripts-analyze-navigation

#!/usr/bin/env python3 """

title: "Analyze Navigation" component_type: script version: "1.0.0" audience: contributor status: stable summary: "Analyze documentation navigation and cross-references." keywords: ['analysis', 'analyze', 'navigation'] tokens: ~500 created: 2025-12-22 updated: 2025-12-22 script_name: "analyze_navigation.py" language: python executable: true usage: "python3 scripts/analyze_navigation.py [options]" python_version: "3.10+" dependencies: [] modifies_files: false network_access: false requires_auth: false

Analyze documentation navigation and cross-references."""

import argparse import os import re import sys from pathlib import Path from collections import defaultdict, Counter

def parse_args(): """Parse command line arguments""" parser = argparse.ArgumentParser( description='Analyze CODITECT documentation navigation and cross-references.', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=''' Examples: %(prog)s # Full analysis of docs/ directory %(prog)s --docs-dir ./docs # Specify custom docs directory %(prog)s --orphans-only # Show only orphaned documents %(prog)s --broken-only # Show only broken links %(prog)s --json # Output results as JSON

Analysis includes:

  • Total document count
  • Orphaned documents (not linked from anywhere)
  • Broken links (pointing to non-existent files)
  • Navigation entry points (README.md files)
  • Most-referenced documents
  • Documents with no outbound links ''' ) parser.add_argument('--docs-dir', type=str, default='docs', help='Directory to analyze (default: docs)') parser.add_argument('--orphans-only', action='store_true', help='Show only orphaned documents') parser.add_argument('--broken-only', action='store_true', help='Show only broken links') parser.add_argument('--json', action='store_true', help='Output results as JSON') parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output') return parser.parse_args()

def main(): args = parse_args() docs_dir = Path(args.docs_dir)

if not docs_dir.exists():
print(f"Error: Directory not found: {docs_dir}", file=sys.stderr)
sys.exit(1)

all_docs = list(docs_dir.rglob("*.md"))

print("=" * 80)
print("CODITECT CORE DOCUMENTATION NAVIGATION ANALYSIS")
print("=" * 80)

print(f"\nTotal markdown files: {len(all_docs)}")

# Extract all relative markdown links
link_pattern = re.compile(r'\[([^\]]+)\]\(([^)]+\.md(?:#[^)]*)?)\)')
doc_links = defaultdict(list)
all_linked_files = set()

for doc in all_docs:
try:
content = doc.read_text()
matches = link_pattern.findall(content)
for text, link in matches:
# Only process relative links (not http/https)
if not link.startswith('http'):
doc_links[str(doc)].append(link)
# Normalize the link path
target = (doc.parent / link.split('#')[0]).resolve()
all_linked_files.add(str(target))
except Exception as e:
pass

print(f"Documents with outbound links: {len(doc_links)}")
print(f"Unique files referenced: {len(all_linked_files)}")

# Find orphaned documents (not linked from anywhere)
all_doc_paths = {str(doc.resolve()) for doc in all_docs}
orphaned = all_doc_paths - all_linked_files

# Exclude READMEs and CLAUDE.md from orphan check (they're entry points)
orphaned_filtered = {p for p in orphaned if not (p.endswith('README.md') or p.endswith('CLAUDE.md'))}

# Handle --orphans-only
if args.orphans_only:
print("\nOrphaned Documents:")
for doc in sorted(orphaned_filtered):
print(f" {doc}")
return

print("\n" + "=" * 80)
print("ORPHANED DOCUMENTS (not linked from anywhere)")
print("=" * 80)
print(f"\nTotal orphaned: {len(orphaned_filtered)}")

if orphaned_filtered:
print("\nOrphaned documents by category:")
by_category = defaultdict(list)
for doc in sorted(orphaned_filtered):
rel_path = Path(doc).relative_to(docs_dir.resolve())
category = str(rel_path).split('/')[0] if '/' in str(rel_path) else 'root'
by_category[category].append(str(rel_path))

for category in sorted(by_category.keys()):
print(f"\n{category}/ ({len(by_category[category])} files):")
for doc in sorted(by_category[category])[:10]:
print(f" - {doc}")
if len(by_category[category]) > 10:
print(f" ... and {len(by_category[category]) - 10} more")

# Check for broken links
print("\n" + "=" * 80)
print("BROKEN LINKS")
print("=" * 80)

broken_links = []
for doc in all_docs:
try:
content = doc.read_text()
matches = link_pattern.findall(content)
for text, link in matches:
# Only process relative links
if not link.startswith('http'):
# Remove anchor
target_path = link.split('#')[0]
target = (doc.parent / target_path).resolve()
if not target.exists():
rel_doc = doc.relative_to(docs_dir.resolve())
broken_links.append((str(rel_doc), link))
except Exception as e:
pass

# Handle --broken-only
if args.broken_only:
print(f"\nBroken links: {len(broken_links)}")
for doc, link in broken_links:
print(f" {doc} -> {link}")
return

print(f"\nBroken links found: {len(broken_links)}")
if broken_links:
print("\nBroken link examples (first 20):")
for doc, link in broken_links[:20]:
print(f" {doc}")
print(f" -> {link}")
if len(broken_links) > 20:
print(f"\n ... and {len(broken_links) - 20} more")
else:
print("\nNo broken links detected!")

# Analyze navigation entry points
print("\n" + "=" * 80)
print("NAVIGATION ENTRY POINTS")
print("=" * 80)

readme_files = [d for d in all_docs if d.name == 'README.md']
print(f"\nREADME.md files: {len(readme_files)}")
docs_resolved = docs_dir.resolve()
for readme in sorted(readme_files):
try:
readme_resolved = readme.resolve()
if str(readme_resolved).startswith(str(docs_resolved)):
rel_path = readme_resolved.relative_to(docs_resolved)
else:
rel_path = readme
content = readme.read_text()
links = link_pattern.findall(content)
print(f" - {rel_path} ({len(links)} internal links)")
except Exception as e:
print(f" - {readme} (error: {e})")

# Analyze most-linked documents
print("\n" + "=" * 80)
print("MOST-REFERENCED DOCUMENTS (Top 15)")
print("=" * 80)

link_counts = Counter()
for links in doc_links.values():
for link in links:
# Normalize link
link_counts[link.split('#')[0]] += 1

print("\nTop referenced documents:")
for link, count in link_counts.most_common(15):
print(f" {count:3d} refs: {link}")

# Find docs with no outbound links
print("\n" + "=" * 80)
print("DOCUMENTS WITH NO OUTBOUND LINKS")
print("=" * 80)

no_outbound = [str(doc.relative_to(docs_dir.resolve())) for doc in all_docs if str(doc) not in doc_links]
print(f"\nTotal documents with no outbound links: {len(no_outbound)}")
if len(no_outbound) <= 20:
for doc in sorted(no_outbound):
print(f" - {doc}")
else:
print("\nBy category:")
by_cat = defaultdict(list)
for doc in no_outbound:
cat = doc.split('/')[0] if '/' in doc else 'root'
by_cat[cat].append(doc)
for cat in sorted(by_cat.keys()):
print(f" {cat}/ ({len(by_cat[cat])} files)")

print("\n" + "=" * 80)
print("SUMMARY")
print("=" * 80)
print(f"""

Total documents: {len(all_docs)} Documents with internal links: {len(doc_links)} ({100len(doc_links)/len(all_docs):.1f}%) Documents with no outbound links: {len(no_outbound)} ({100len(no_outbound)/len(all_docs):.1f}%) Orphaned documents: {len(orphaned_filtered)} ({100*len(orphaned_filtered)/len(all_docs):.1f}%) Broken links: {len(broken_links)} README.md navigation files: {len(readme_files)} """)

if name == 'main': main()