#!/usr/bin/env python3 """ CODITECT Skill Generator CLI
Generate Claude Code skills from documentation websites and GitHub repositories.
Usage: # From documentation URL python main.py docs https://docs.example.com --name my-skill
# From GitHub repository
python main.py repo owner/repo --depth c3x
# Full analysis with AI enhancement
python main.py repo facebook/react --three-stream --enhance
Commands: docs Generate skill from documentation website repo Generate skill from GitHub repository analyze Analyze codebase without generating skill
Author: CODITECT Version: 1.0.0 """
import argparse import asyncio import json import sys from datetime import datetime from pathlib import Path from typing import Optional
Local imports
from core import DocumentationScraper, ScrapeConfig, SmartCategorizer, QualityGate from analyzers import PatternDetector, CodebaseAnalyzer from builders import SkillBuilder, SkillMetadata from extractors import CodeExtractor
def get_default_output_dir() -> Path: """Get default output directory.
ADR-114: Generated skills go to framework installation (~/.coditect/skills)
"""
coditect_dir = Path.home() / ".coditect" / "skills"
coditect_dir.mkdir(parents=True, exist_ok=True)
return coditect_dir
def create_docs_parser(subparsers): """Create parser for docs command.""" parser = subparsers.add_parser( 'docs', help='Generate skill from documentation website' ) parser.add_argument( 'url', help='Documentation website URL' ) parser.add_argument( '--name', required=True, help='Skill name (e.g., react-docs)' ) parser.add_argument( '--output', '-o', type=Path, help='Output directory' ) parser.add_argument( '--max-pages', type=int, default=100, help='Maximum pages to scrape (default: 100)' ) parser.add_argument( '--max-depth', type=int, default=3, help='Maximum crawl depth (default: 3)' ) parser.add_argument( '--llms-txt-only', action='store_true', help='Only use llms.txt (skip crawling)' ) parser.add_argument( '--enhance', action='store_true', help='Enhance with AI (requires API key)' ) parser.add_argument( '--verbose', '-v', action='store_true', help='Verbose output' ) return parser
def create_repo_parser(subparsers): """Create parser for repo command.""" parser = subparsers.add_parser( 'repo', help='Generate skill from GitHub repository' ) parser.add_argument( 'repo', help='GitHub repository (owner/repo or full URL)' ) parser.add_argument( '--name', help='Skill name (defaults to repo name)' ) parser.add_argument( '--output', '-o', type=Path, help='Output directory' ) parser.add_argument( '--depth', choices=['basic', 'c3x'], default='c3x', help='Analysis depth (default: c3x)' ) parser.add_argument( '--three-stream', action='store_true', help='Enable Code + Docs + Insights streams' ) parser.add_argument( '--skip', help='Skip features: patterns,tests,guides,configs,architecture' ) parser.add_argument( '--languages', help='Filter languages (comma-separated)' ) parser.add_argument( '--branch', default='main', help='Git branch to analyze (default: main)' ) parser.add_argument( '--max-issues', type=int, default=100, help='Maximum issues to analyze (default: 100)' ) parser.add_argument( '--enhance', action='store_true', help='Enhance with AI' ) parser.add_argument( '--verbose', '-v', action='store_true', help='Verbose output' ) return parser
def create_analyze_parser(subparsers): """Create parser for analyze command.""" parser = subparsers.add_parser( 'analyze', help='Analyze codebase without generating skill' ) parser.add_argument( 'path', type=Path, help='Path to codebase' ) parser.add_argument( '--output', '-o', type=Path, help='Output JSON file' ) parser.add_argument( '--patterns-only', action='store_true', help='Only detect design patterns' ) parser.add_argument( '--verbose', '-v', action='store_true', help='Verbose output' ) return parser
async def cmd_docs(args): """Handle docs command.""" print(f"š·ļø Generating skill from: {args.url}")
# Configure scraper
config = ScrapeConfig(
max_pages=args.max_pages,
max_depth=args.max_depth,
rate_limit=1.0,
llms_txt_only=args.llms_txt_only,
verbose=args.verbose
)
scraper = DocumentationScraper(args.url, config)
# Scrape pages
print("š Scraping documentation...")
pages = await scraper.scrape()
if not pages:
print("ā No pages scraped. Check the URL and try again.")
return 1
print(f"ā
Scraped {len(pages)} pages")
# Categorize
print("š Categorizing content...")
categorizer = SmartCategorizer()
for page in pages:
result = categorizer.categorize(
page.get("url", ""),
page.get("title", ""),
page.get("headings", []),
page.get("content", "")[:1000]
)
page["category"] = result.category
# Extract code blocks
print("š» Extracting code examples...")
extractor = CodeExtractor()
for page in pages:
html = page.get("raw_html", page.get("content", ""))
blocks = extractor.extract_from_html(html, page.get("url", ""))
page["code_blocks"] = [b.to_dict() for b in blocks]
# Detect patterns in code blocks
print("š Detecting design patterns...")
detector = PatternDetector()
patterns = []
# Note: Pattern detection on scraped code is limited without full files
# Build skill
output_dir = args.output or get_default_output_dir() / args.name
output_dir.mkdir(parents=True, exist_ok=True)
metadata = SkillMetadata(
name=args.name,
description=f"Skill generated from {args.url}",
source_url=args.url,
tags=["documentation", "generated"]
)
print("šØ Building skill...")
builder = SkillBuilder(output_dir, metadata)
skill_path = builder.build_from_pages(pages, patterns, enhance_with_ai=args.enhance)
# Quality check
print("āļø Running quality checks...")
gate = QualityGate(output_dir)
report = gate.validate()
# Output summary
print("\n" + "=" * 60)
print("ā
SKILL GENERATION COMPLETE")
print("=" * 60)
print(f"š Output: {output_dir}")
print(f"š SKILL.md: {skill_path}")
print(f"š Pages scraped: {len(pages)}")
print(f"š» Code blocks: {sum(len(p.get('code_blocks', [])) for p in pages)}")
print(f"ā Quality score: {report.score:.1f}/10")
if report.blocking_failures:
print(f"ā ļø Warnings: {len(report.blocking_failures)}")
for failure in report.blocking_failures[:3]:
print(f" - {failure}")
return 0
async def cmd_repo(args): """Handle repo command.""" import subprocess import tempfile import shutil
# Parse repo
repo = args.repo
if repo.startswith("https://"):
# Extract owner/repo from URL
parts = repo.rstrip("/").split("/")
repo = f"{parts[-2]}/{parts[-1]}"
owner, repo_name = repo.split("/")
name = args.name or repo_name
print(f"š¬ Generating skill from: {repo}")
# Clone repository
with tempfile.TemporaryDirectory() as tmpdir:
repo_path = Path(tmpdir) / repo_name
print(f"š„ Cloning repository (branch: {args.branch})...")
try:
subprocess.run(
["git", "clone", "--depth", "1", "--branch", args.branch,
f"https://github.com/{repo}.git", str(repo_path)],
check=True,
capture_output=True
)
except subprocess.CalledProcessError as e:
print(f"ā Failed to clone: {e.stderr.decode()}")
return 1
# Parse skip options
skip = set()
if args.skip:
skip = set(args.skip.split(","))
# C3.x Analysis
print("š Running C3.x codebase analysis...")
analyzer = CodebaseAnalyzer(repo_path)
report = analyzer.analyze(
skip_tests="tests" in skip,
skip_guides="guides" in skip,
skip_configs="configs" in skip,
skip_architecture="architecture" in skip
)
print(f" Languages: {', '.join(report.languages.keys())}")
print(f" Files: {report.total_files}")
print(f" Lines: {report.total_lines:,}")
print(f" Patterns: {len([p for r in report.pattern_reports for p in r.patterns])}")
print(f" Tests: {len(report.test_examples)}")
print(f" Guides: {len(report.how_to_guides)}")
print(f" Configs: {len(report.config_patterns)}")
if report.architecture:
print(f" Architecture: {report.architecture.primary_pattern.value}")
# Build pages from analysis
pages = []
# Add architecture overview as a page
if report.architecture:
arch = report.architecture
pages.append({
"title": "Architecture Overview",
"url": f"repo://{repo}/architecture",
"category": "concepts",
"content": f"""
Architecture Overview
Primary Pattern: {arch.primary_pattern.value} Confidence: {arch.confidence:.0%}
Layersā
{chr(10).join(f'- {layer}' for layer in arch.layers)}
Entry Pointsā
{chr(10).join(f'- {ep}' for ep in arch.entry_points)} """, "headings": ["Layers", "Entry Points"], "code_blocks": [] })
# Add test examples as pages
for i, test in enumerate(report.test_examples[:20]):
pages.append({
"title": test.name,
"url": f"repo://{repo}/tests/{i}",
"category": "tutorials",
"content": test.description,
"headings": [],
"code_blocks": [{
"code": test.code,
"language": "python" # TODO: detect
}]
})
# Add guides
for guide in report.how_to_guides:
pages.append({
"title": guide.title,
"url": f"repo://{repo}/guides/{guide.title}",
"category": "tutorials",
"content": guide.description + "\n\n" + "\n".join(f"- {s}" for s in guide.steps),
"headings": guide.steps[:3],
"code_blocks": [{"code": ex, "language": "python"} for ex in guide.code_examples]
})
# Add config patterns
for config in report.config_patterns:
pages.append({
"title": f"Configuration: {config.name}",
"url": f"repo://{repo}/configs/{config.name}",
"category": "configuration",
"content": f"Configuration file: {config.file_path}\nKeys: {', '.join(config.keys[:10])}",
"headings": [],
"code_blocks": []
})
# Convert pattern reports to serializable format
patterns = []
for pr in report.pattern_reports:
for p in pr.patterns:
patterns.append(p.to_dict())
# Build skill
output_dir = args.output or get_default_output_dir() / name
output_dir.mkdir(parents=True, exist_ok=True)
metadata = SkillMetadata(
name=name,
description=f"Skill generated from GitHub repository {repo}",
source_url=f"https://github.com/{repo}",
tags=["github", "codebase", "c3x"] + list(report.languages.keys())[:5]
)
print("šØ Building skill...")
builder = SkillBuilder(output_dir, metadata)
skill_path = builder.build_from_pages(pages, patterns, enhance_with_ai=args.enhance)
# Quality check
gate = QualityGate(output_dir)
quality_report = gate.validate()
# Output summary
print("\n" + "=" * 60)
print("ā
SKILL GENERATION COMPLETE")
print("=" * 60)
print(f"š Output: {output_dir}")
print(f"š SKILL.md: {skill_path}")
print()
print("C3.x Results:")
print(f" [{'x' if 'patterns' not in skip else ' '}] C3.1 Patterns: {len(patterns)} detected")
print(f" [{'x' if 'tests' not in skip else ' '}] C3.2 Tests: {len(report.test_examples)} examples")
print(f" [{'x' if 'guides' not in skip else ' '}] C3.3 Guides: {len(report.how_to_guides)} generated")
print(f" [{'x' if 'configs' not in skip else ' '}] C3.4 Config: {len(report.config_patterns)} patterns")
print(f" [{'x' if 'architecture' not in skip else ' '}] C3.5 Architecture: {report.architecture.primary_pattern.value if report.architecture else 'N/A'}")
print()
print(f"ā Quality score: {quality_report.score:.1f}/10")
return 0
def cmd_analyze(args): """Handle analyze command.""" path = args.path.resolve()
if not path.exists():
print(f"ā Path does not exist: {path}")
return 1
print(f"š Analyzing codebase: {path}")
if args.patterns_only:
detector = PatternDetector()
reports = detector.detect_in_directory(path)
summary = detector.get_summary(reports)
if args.verbose:
for report in reports:
for pattern in report.patterns:
print(f" {pattern.pattern_type}: {pattern.class_name} ({pattern.confidence:.0%})")
print(f"\nš Pattern Summary:")
print(f" Files analyzed: {summary['total_files']}")
print(f" Patterns found: {summary['total_patterns']}")
print(f" By type: {summary['by_type']}")
print(f" By category: {summary['by_category']}")
print(f" Avg confidence: {summary['avg_confidence']:.0%}")
if args.output:
with open(args.output, 'w') as f:
json.dump({
"summary": summary,
"patterns": [p.to_dict() for r in reports for p in r.patterns]
}, f, indent=2)
print(f"\nš¾ Results saved to: {args.output}")
else:
analyzer = CodebaseAnalyzer(path)
report = analyzer.analyze()
print(f"\nš Codebase Report:")
print(f" Languages: {report.languages}")
print(f" Total files: {report.total_files}")
print(f" Total lines: {report.total_lines:,}")
print(f" Design patterns: {sum(len(r.patterns) for r in report.pattern_reports)}")
print(f" Test examples: {len(report.test_examples)}")
print(f" How-to guides: {len(report.how_to_guides)}")
print(f" Config patterns: {len(report.config_patterns)}")
if report.architecture:
print(f"\nšļø Architecture:")
print(f" Primary: {report.architecture.primary_pattern.value}")
print(f" Secondary: {[p.value for p in report.architecture.secondary_patterns]}")
print(f" Layers: {report.architecture.layers}")
print(f" Confidence: {report.architecture.confidence:.0%}")
if args.output:
with open(args.output, 'w') as f:
json.dump(report.to_dict(), f, indent=2)
print(f"\nš¾ Results saved to: {args.output}")
return 0
def main(): """Main entry point.""" parser = argparse.ArgumentParser( description='CODITECT Skill Generator - Generate skills from docs and repos', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: %(prog)s docs https://docs.python.org --name python-docs %(prog)s repo facebook/react --depth c3x %(prog)s analyze ./my-project --patterns-only
For more information, see: ~/.coditect/commands/skill-from-docs.md """ )
subparsers = parser.add_subparsers(dest='command', required=True)
create_docs_parser(subparsers)
create_repo_parser(subparsers)
create_analyze_parser(subparsers)
args = parser.parse_args()
if args.command == 'docs':
return asyncio.run(cmd_docs(args))
elif args.command == 'repo':
return asyncio.run(cmd_repo(args))
elif args.command == 'analyze':
return cmd_analyze(args)
return 0
if name == 'main': sys.exit(main())