scripts-skill-builder
#!/usr/bin/env python3 """ CODITECT Skill Builder
Assembles scraped content and analysis into CODITECT-standard skills. Generates SKILL.md with AI enhancement support.
Author: CODITECT Version: 1.0.0 """
import json from dataclasses import dataclass, field from datetime import datetime from pathlib import Path from typing import Optional, Any
@dataclass class SkillMetadata: """Skill metadata for frontmatter.""" name: str description: str version: str = "1.0.0" author: str = "CODITECT" skill_type: str = "reference" model: str = "claude-sonnet-4" source_url: Optional[str] = None generated_at: str = field(default_factory=lambda: datetime.utcnow().isoformat()) tags: list[str] = field(default_factory=list)
class SkillBuilder: """ CODITECT Skill Builder
Builds skills from:
- Scraped documentation pages
- Pattern detection results
- Code analysis output
Output structure:
- SKILL.md (main file with frontmatter)
- references/ (categorized documentation)
- examples/ (code examples)
- metadata.json
"""
SKILL_MD_TEMPLATE = '''---
type: skill name: {name} description: {description} version: {version} author: {author} model: {model} skill_type: {skill_type} source_url: {source_url} generated_at: {generated_at} tags: {tags}
{title}
Overview
{overview}
When to Use
Use this skill when: {when_to_use}
When NOT to Use
Do not use when: {when_not_to_use}
Quick Reference
{quick_reference}
Key Concepts
{key_concepts}
Examples
{examples}
Common Patterns
{patterns}
References
{references}
Troubleshooting
{troubleshooting}
Generated by: CODITECT Skill Generator Source: {source_url} Generated: {generated_at} '''
def __init__(
self,
output_dir: Path,
metadata: SkillMetadata
):
self.output_dir = Path(output_dir)
self.metadata = metadata
# Ensure directories exist
self.output_dir.mkdir(parents=True, exist_ok=True)
(self.output_dir / "references").mkdir(exist_ok=True)
(self.output_dir / "examples").mkdir(exist_ok=True)
def build_from_pages(
self,
pages: list[dict],
patterns: Optional[list[dict]] = None,
enhance_with_ai: bool = False
) -> Path:
"""
Build skill from scraped pages.
Args:
pages: List of scraped pages with content, code_blocks, category
patterns: Optional detected design patterns
enhance_with_ai: Whether to enhance with Claude
Returns:
Path to generated SKILL.md
"""
# Group pages by category
categorized = {}
for page in pages:
cat = page.get("category", "other")
if cat not in categorized:
categorized[cat] = []
categorized[cat].append(page)
# Build sections
overview = self._build_overview(pages)
when_to_use = self._build_when_to_use(pages)
when_not_to_use = self._build_when_not_to_use()
quick_reference = self._build_quick_reference(pages)
key_concepts = self._build_key_concepts(pages)
examples = self._build_examples(pages)
patterns_section = self._build_patterns(patterns or [])
references = self._build_references(categorized)
troubleshooting = self._build_troubleshooting(pages)
# Generate SKILL.md
skill_content = self.SKILL_MD_TEMPLATE.format(
name=self.metadata.name,
title=self.metadata.name.replace("-", " ").title(),
description=self.metadata.description,
version=self.metadata.version,
author=self.metadata.author,
model=self.metadata.model,
skill_type=self.metadata.skill_type,
source_url=self.metadata.source_url or "N/A",
generated_at=self.metadata.generated_at,
tags=json.dumps(self.metadata.tags),
overview=overview,
when_to_use=when_to_use,
when_not_to_use=when_not_to_use,
quick_reference=quick_reference,
key_concepts=key_concepts,
examples=examples,
patterns=patterns_section,
references=references,
troubleshooting=troubleshooting
)
# Write SKILL.md
skill_path = self.output_dir / "SKILL.md"
skill_path.write_text(skill_content)
# Write reference files
self._write_reference_files(categorized)
# Write examples
self._write_example_files(pages)
# Write metadata
self._write_metadata(pages, patterns)
return skill_path
def _build_overview(self, pages: list[dict]) -> str:
"""Build overview section from pages."""
# Look for intro/overview page
for page in pages:
if any(kw in page.get("title", "").lower() for kw in ["introduction", "overview", "about"]):
content = page.get("content", "")
# Take first 3 paragraphs
paragraphs = [p.strip() for p in content.split('\n\n') if p.strip()]
if paragraphs:
return '\n\n'.join(paragraphs[:3])
# Fallback: use description
return self.metadata.description or "Documentation skill for comprehensive reference."
def _build_when_to_use(self, pages: list[dict]) -> str:
"""Build when-to-use section."""
uses = [
f"- Working with {self.metadata.name} framework or library",
"- Need quick reference for API methods and functions",
"- Looking for code examples and common patterns",
"- Troubleshooting issues or errors"
]
# Add category-specific uses
categories = set(p.get("category") for p in pages)
if "tutorials" in categories:
uses.append("- Following step-by-step tutorials")
if "api_reference" in categories:
uses.append("- Understanding API signatures and parameters")
if "configuration" in categories:
uses.append("- Configuring settings and options")
return '\n'.join(uses)
def _build_when_not_to_use(self) -> str:
"""Build when-not-to-use section."""
return '''- Looking for general programming concepts not specific to this framework
-
Need version-specific information for an older/newer version
-
Seeking community discussions or Stack Overflow answers'''
def _build_quick_reference(self, pages: list[dict]) -> str: """Build quick reference from code blocks.""" # Collect most common code patterns code_blocks = [] for page in pages: for block in page.get("code_blocks", []): if 10 < len(block.get("code", "")) < 500: code_blocks.append(block)
if not code_blocks:
return "See Examples section below."
# Take first 5 diverse examples
examples = []
seen_patterns = set()
for block in code_blocks[:20]:
code = block.get("code", "")
# Simple deduplication
pattern = code[:50]
if pattern not in seen_patterns:
seen_patterns.add(pattern)
lang = block.get("language", "")
examples.append(f"```{lang}\n{code}\n```")
if len(examples) >= 5:
break
return '\n\n'.join(examples)def _build_key_concepts(self, pages: list[dict]) -> str: """Extract key concepts from headings.""" concepts = set()
for page in pages:
if page.get("category") == "concepts":
concepts.add(f"- **{page.get('title', 'Concept')}**")
for heading in page.get("headings", [])[:3]:
if len(heading) < 50 and not heading.startswith('#'):
concepts.add(f"- {heading}")
if concepts:
return '\n'.join(list(concepts)[:10])
return "Refer to the References section for detailed concepts."def _build_examples(self, pages: list[dict]) -> str: """Build examples section with best code blocks.""" examples = []
# Prioritize tutorial pages
tutorial_pages = [p for p in pages if p.get("category") == "tutorials"]
other_pages = [p for p in pages if p.get("category") != "tutorials"]
for page in (tutorial_pages + other_pages)[:10]:
for block in page.get("code_blocks", [])[:2]:
code = block.get("code", "")
lang = block.get("language", "")
if len(code) > 50 and len(code) < 1000:
title = page.get("title", "Example")
examples.append(f"### {title}\n\n```{lang}\n{code}\n```")
if len(examples) >= 5:
break
if len(examples) >= 5:
break
if examples:
return '\n\n'.join(examples)
return "See the references directory for code examples."def _build_patterns(self, patterns: list[dict]) -> str: """Build patterns section from detected patterns.""" if not patterns: return "No design patterns detected. See references for implementation patterns."
pattern_text = []
for pattern in patterns[:10]:
p_type = pattern.get("pattern_type", "Unknown")
confidence = pattern.get("confidence", 0)
location = pattern.get("location", "")
pattern_text.append(
f"- **{p_type}** (confidence: {confidence:.0%}) - Found in `{location}`"
)
return '\n'.join(pattern_text)def _build_references(self, categorized: dict[str, list]) -> str: """Build references section with links to category files.""" refs = ["The following reference files are available:\n"]
for category, pages in sorted(categorized.items()):
if pages:
refs.append(f"- **{category.replace('_', ' ').title()}**: {len(pages)} pages")
refs.append(f" - See `references/{category}/`")
return '\n'.join(refs)def _build_troubleshooting(self, pages: list[dict]) -> str: """Build troubleshooting section.""" # Look for FAQ or troubleshooting pages for page in pages: if any(kw in page.get("title", "").lower() for kw in ["faq", "troubleshoot", "common issues"]): content = page.get("content", "") # Take first part return content[:1500] + "..." if len(content) > 1500 else content
return '''If you encounter issues:
-
Check the API reference for correct method signatures
-
Verify your configuration settings
-
Look for error messages in the examples
-
Consult the original documentation for updates'''
def _write_reference_files(self, categorized: dict[str, list]) -> None: """Write categorized reference markdown files.""" refs_dir = self.output_dir / "references"
# Write index
index_content = f"# {self.metadata.name} Reference Index\n\n"
for category, pages in sorted(categorized.items()):
if pages:
index_content += f"\n## {category.replace('_', ' ').title()}\n\n"
for page in pages[:20]:
title = page.get("title", "Untitled")
index_content += f"- [{title}]({category}/{self._slugify(title)}.md)\n"
(refs_dir / "index.md").write_text(index_content)
# Write category directories
for category, pages in categorized.items():
if not pages:
continue
cat_dir = refs_dir / category
cat_dir.mkdir(exist_ok=True)
for page in pages[:50]: # Limit per category
title = page.get("title", "Untitled")
content = page.get("content", "")
code_blocks = page.get("code_blocks", [])
# Build page content
page_content = f"# {title}\n\n"
page_content += f"**Source:** {page.get('url', 'N/A')}\n\n"
page_content += content
# Add code blocks
if code_blocks:
page_content += "\n\n## Code Examples\n\n"
for block in code_blocks[:5]:
lang = block.get("language", "")
code = block.get("code", "")
page_content += f"```{lang}\n{code}\n```\n\n"
# Write file
filename = self._slugify(title) + ".md"
(cat_dir / filename).write_text(page_content)def _write_example_files(self, pages: list[dict]) -> None: """Write standalone example files.""" examples_dir = self.output_dir / "examples"
example_count = 0
for page in pages:
for block in page.get("code_blocks", []):
code = block.get("code", "")
lang = block.get("language", "")
if len(code) > 100 and len(code) < 2000 and lang:
ext = self._get_extension(lang)
filename = f"example_{example_count:03d}{ext}"
# Add header comment
header = f"# Example from: {page.get('title', 'Unknown')}\n"
header += f"# Source: {page.get('url', 'N/A')}\n\n"
(examples_dir / filename).write_text(header + code)
example_count += 1
if example_count >= 20:
returndef _write_metadata(self, pages: list[dict], patterns: Optional[list[dict]]) -> None: """Write metadata.json.""" metadata = { "name": self.metadata.name, "description": self.metadata.description, "version": self.metadata.version, "source_url": self.metadata.source_url, "generated_at": self.metadata.generated_at, "pages_count": len(pages), "patterns_count": len(patterns) if patterns else 0, "categories": list(set(p.get("category") for p in pages)), "languages": list(set( b.get("language") for p in pages for b in p.get("code_blocks", []) if b.get("language") )), "tags": self.metadata.tags }
metadata_path = self.output_dir / "metadata.json"
with open(metadata_path, 'w') as f:
json.dump(metadata, f, indent=2)def _slugify(self, text: str) -> str: """Convert text to URL-safe slug.""" import re slug = text.lower() slug = re.sub(r'[^\w\s-]', '', slug) slug = re.sub(r'[-\s]+', '-', slug) return slug[:50]
def _get_extension(self, language: str) -> str: """Get file extension for language.""" ext_map = { "python": ".py", "javascript": ".js", "typescript": ".ts", "java": ".java", "go": ".go", "rust": ".rs", "cpp": ".cpp", "c": ".c", "csharp": ".cs", "shell": ".sh", "bash": ".sh", "yaml": ".yaml", "json": ".json" } return ext_map.get(language.lower(), ".txt")