Documentation Quality Skill
Documentation Quality Skill
When to Use This Skill
Use this skill when implementing documentation quality patterns in your codebase.
How to Use This Skill
- Review the patterns and examples below
- Apply the relevant patterns to your implementation
- Follow the best practices outlined in this skill
Documentation standards validation, quality metrics, completeness checks, and consistency analysis for comprehensive documentation quality.
Core Capabilities
- Completeness Validation - All public APIs documented, examples present, parameters described
- Style Guide Enforcement - Consistent formatting, tone, structure
- Link Validation - External links valid, internal references correct
- Quality Metrics - Readability scores, coverage percentage, staleness detection
- Consistency Analysis - Terminology consistency, naming conventions
Documentation Quality Validator
scripts/doc_quality_validator.py
from typing import List, Dict, Optional from dataclasses import dataclass import re from pathlib import Path
@dataclass class DocIssue: """Documentation quality issue.""" file: str line: int severity: str # 'error', 'warning', 'info' category: str message: str suggestion: Optional[str] = None
class DocumentationQualityValidator: """Validate documentation quality and completeness."""
REQUIRED_SECTIONS = {
'README.md': ['Installation', 'Usage', 'Examples', 'License'],
'CONTRIBUTING.md': ['Development Setup', 'Testing', 'Submitting Changes'],
'API.md': ['Authentication', 'Endpoints', 'Error Handling']
}
def __init__(self, docs_path: str):
self.docs_path = Path(docs_path)
self.issues: List[DocIssue] = []
def validate(self) -> List[DocIssue]:
"""Run all validation checks."""
self._check_completeness()
self._check_links()
self._check_code_examples()
self._check_consistency()
self._check_readability()
return self.issues
def _check_completeness(self):
"""Check for required sections and content."""
for file_name, required_sections in self.REQUIRED_SECTIONS.items():
file_path = self.docs_path / file_name
if not file_path.exists():
self.issues.append(DocIssue(
file=file_name,
line=0,
severity='error',
category='completeness',
message=f'Required file {file_name} is missing',
suggestion=f'Create {file_name} with sections: {", ".join(required_sections)}'
))
continue
with open(file_path, 'r') as f:
content = f.read()
for section in required_sections:
if section not in content:
self.issues.append(DocIssue(
file=file_name,
line=0,
severity='warning',
category='completeness',
message=f'Required section "{section}" not found',
suggestion=f'Add ## {section} section'
))
def _check_links(self):
"""Validate all links in documentation."""
for md_file in self.docs_path.rglob('*.md'):
with open(md_file, 'r') as f:
content = f.read()
lines = content.splitlines()
# Find all markdown links
link_pattern = r'\[([^\]]+)\]\(([^\)]+)\)'
for i, line in enumerate(lines, 1):
for match in re.finditer(link_pattern, line):
link_text, link_url = match.groups()
if link_url.startswith('http'):
# External link - would need HTTP check
pass
elif link_url.startswith('#'):
# Internal anchor - check if exists
anchor = link_url[1:]
if not self._anchor_exists(content, anchor):
self.issues.append(DocIssue(
file=str(md_file.relative_to(self.docs_path)),
line=i,
severity='warning',
category='links',
message=f'Broken internal link: {link_url}',
suggestion=f'Check anchor #{anchor} exists in document'
))
else:
# Relative file link - check if exists
target_path = md_file.parent / link_url
if not target_path.exists():
self.issues.append(DocIssue(
file=str(md_file.relative_to(self.docs_path)),
line=i,
severity='error',
category='links',
message=f'Broken file link: {link_url}',
suggestion=f'Check file path is correct'
))
def _check_code_examples(self):
"""Validate code examples in documentation."""
for md_file in self.docs_path.rglob('*.md'):
with open(md_file, 'r') as f:
content = f.read()
lines = content.splitlines()
in_code_block = False
code_block_start = 0
code_language = ''
for i, line in enumerate(lines, 1):
if line.strip().startswith('```'):
if not in_code_block:
# Start of code block
in_code_block = True
code_block_start = i
code_language = line.strip()[3:].strip()
else:
# End of code block
in_code_block = False
# Check if language was specified
if not code_language:
self.issues.append(DocIssue(
file=str(md_file.relative_to(self.docs_path)),
line=code_block_start,
severity='warning',
category='code_examples',
message='Code block missing language identifier',
suggestion='Add language: ```python or ```javascript'
))
def _check_consistency(self):
"""Check terminology and naming consistency."""
# Build terminology map from all docs
terminology: Dict[str, List[tuple]] = {} # term -> [(file, line, context)]
for md_file in self.docs_path.rglob('*.md'):
with open(md_file, 'r') as f:
lines = f.readlines()
for i, line in enumerate(lines, 1):
# Extract potential terms (capitalized words, technical terms)
terms = re.findall(r'\b[A-Z][a-zA-Z0-9]+\b', line)
for term in terms:
if term not in terminology:
terminology[term] = []
terminology[term].append((str(md_file.relative_to(self.docs_path)), i, line.strip()))
# Check for inconsistent casing
for term, occurrences in terminology.items():
if len(occurrences) > 1:
# Check if same term appears with different casing
casings = set(occ[2].count(term) for occ in occurrences)
if len(casings) > 1:
file, line, _ = occurrences[0]
self.issues.append(DocIssue(
file=file,
line=line,
severity='info',
category='consistency',
message=f'Term "{term}" used with inconsistent casing',
suggestion='Use consistent capitalization throughout docs'
))
def _check_readability(self):
"""Calculate readability metrics."""
for md_file in self.docs_path.rglob('*.md'):
with open(md_file, 'r') as f:
content = f.read()
# Simple readability checks
sentences = re.split(r'[.!?]+', content)
words = content.split()
avg_sentence_length = len(words) / len(sentences) if sentences else 0
avg_word_length = sum(len(w) for w in words) / len(words) if words else 0
# Flag overly complex writing
if avg_sentence_length > 25:
self.issues.append(DocIssue(
file=str(md_file.relative_to(self.docs_path)),
line=0,
severity='info',
category='readability',
message=f'Average sentence length is {avg_sentence_length:.1f} words (recommended <25)',
suggestion='Break up long sentences for better readability'
))
if avg_word_length > 6:
self.issues.append(DocIssue(
file=str(md_file.relative_to(self.docs_path)),
line=0,
severity='info',
category='readability',
message=f'Average word length is {avg_word_length:.1f} characters (recommended <6)',
suggestion='Use simpler words where possible'
))
def _anchor_exists(self, content: str, anchor: str) -> bool:
"""Check if anchor exists in content."""
# Convert anchor to heading pattern
heading_pattern = re.compile(r'^#+\s+(.+)$', re.MULTILINE)
headings = heading_pattern.findall(content)
# Convert headings to anchor format (lowercase, dashes)
anchors = [h.lower().replace(' ', '-') for h in headings]
return anchor in anchors
def generate_report(self) -> str:
"""Generate quality report."""
report = "# Documentation Quality Report\n\n"
# Group by severity
errors = [i for i in self.issues if i.severity == 'error']
warnings = [i for i in self.issues if i.severity == 'warning']
info = [i for i in self.issues if i.severity == 'info']
report += f"## Summary\n\n"
report += f"- Errors: {len(errors)}\n"
report += f"- Warnings: {len(warnings)}\n"
report += f"- Info: {len(info)}\n\n"
if errors:
report += "## Errors\n\n"
for issue in errors:
report += f"### {issue.file}:{issue.line}\n"
report += f"**Category:** {issue.category}\n\n"
report += f"{issue.message}\n\n"
if issue.suggestion:
report += f"*Suggestion:* {issue.suggestion}\n\n"
return report
Usage
validator = DocumentationQualityValidator('docs/') issues = validator.validate() report = validator.generate_report() print(report)
Quality Scoring System
// tools/doc-quality-scorer.ts
interface QualityMetrics {
completeness: number; // 0-100
accuracy: number; // 0-100
clarity: number; // 0-100
consistency: number; // 0-100
overall: number; // 0-100
}
class DocumentationQualityScorer {
calculateScore(docPath: string): QualityMetrics {
const completeness = this.scoreCompleteness(docPath);
const accuracy = this.scoreAccuracy(docPath);
const clarity = this.scoreClarity(docPath);
const consistency = this.scoreConsistency(docPath);
const overall = (completeness * 0.3) + (accuracy * 0.3) +
(clarity * 0.2) + (consistency * 0.2);
return {
completeness,
accuracy,
clarity,
consistency,
overall
};
}
private scoreCompleteness(docPath: string): number {
let score = 100;
// Deduct points for missing sections
// Deduct points for undocumented APIs
// Deduct points for missing examples
return Math.max(0, score);
}
private scoreAccuracy(docPath: string): number {
let score = 100;
// Deduct for broken links
// Deduct for outdated code examples
// Deduct for incorrect API signatures
return Math.max(0, score);
}
private scoreClarity(docPath: string): number {
// Calculate readability scores
// Check for jargon without explanation
// Verify logical flow
return 85; // Placeholder
}
private scoreConsistency(docPath: string): number {
// Check terminology consistency
// Check formatting consistency
// Check naming conventions
return 90; // Placeholder
}
}
## Usage Examples
### Validate Documentation Quality
Apply documentation-quality skill to validate all markdown files for completeness, links, and readability
### Generate Quality Report
Apply documentation-quality skill to generate comprehensive quality report with scores and recommendations
### Check Style Guide Compliance
Apply documentation-quality skill to enforce style guide rules and check consistency across all docs
Integration Points
- code-documentation-patterns - Documentation generation and standards
- qa-review-methodology - Quality assessment methodology
- comprehensive-review-patterns - Multi-dimensional quality analysis
Success Output
When successful, this skill MUST output:
✅ SKILL COMPLETE: documentation-quality
Completed:
- [x] Completeness validation performed (all required sections present)
- [x] Link validation completed (internal and external links checked)
- [x] Code examples validated (language identifiers present)
- [x] Consistency analysis finished (terminology and naming conventions)
- [x] Readability metrics calculated (sentence/word length analyzed)
- [x] Quality report generated with actionable recommendations
Outputs:
- doc-quality-report.md (comprehensive quality assessment)
- doc-issues.json (categorized issues with severity and suggestions)
- quality-metrics.json (completeness, accuracy, clarity, consistency scores)
Quality Scores:
- Completeness: XX/100
- Accuracy: XX/100
- Clarity: XX/100
- Consistency: XX/100
- Overall: XX/100
Issues Summary:
- Errors: X (blocking issues)
- Warnings: X (should fix)
- Info: X (nice to have)
Completion Checklist
Before marking this skill as complete, verify:
- All required documentation files exist (README.md, CONTRIBUTING.md, API.md, etc.)
- Required sections present in each file (Installation, Usage, Examples, etc.)
- All internal links validated (anchors and file paths exist)
- All external links validated (HTTP status checked)
- Code blocks have language identifiers (
python,javascript, etc.) - Terminology used consistently across all documentation
- Readability metrics calculated (avg sentence length, avg word length)
- Quality report generated with severity-categorized issues
- Suggestions provided for each identified issue
- Overall quality score calculated (weighted average)
Failure Indicators
This skill has FAILED if:
- ❌ Required documentation files missing (README.md, API.md)
- ❌ Critical sections missing from documentation (Installation, Usage)
- ❌ Broken internal links found (> 5 broken anchors/file paths)
- ❌ Code blocks missing language identifiers (> 25% unlabeled)
- ❌ Validation script crashes or throws exceptions
- ❌ No quality report generated (output file missing)
- ❌ Overall quality score below 60/100 (failing grade)
- ❌ Documentation paths incorrect (cannot find docs/ directory)
When NOT to Use
Do NOT use this skill when:
- Documentation is in draft stage (quality validation premature)
- Creating new documentation from scratch (use code-documentation-patterns first)
- Documentation is API-generated and auto-maintained (OpenAPI/Swagger)
- Project uses external documentation platform (GitBook, Docusaurus)
- Use code-documentation-patterns for initial documentation creation
- Use api-documentation-generator for automated API docs
Alternative skills for different documentation needs:
- code-documentation-patterns - Creating documentation from scratch
- api-documentation-generator - Auto-generating API documentation
- comprehensive-review-patterns - Broader code + documentation review
Anti-Patterns (Avoid)
| Anti-Pattern | Problem | Solution |
|---|---|---|
| Validating without fixing | Issues identified but not addressed | Immediately fix errors, schedule warnings/info for next sprint |
| Checking only README.md | Misses issues in other critical docs | Validate all markdown files in docs/ recursively |
| Ignoring external link failures | Broken links hurt user experience | Either fix links or mark as deprecated with alternatives |
| Using generic quality scores | No actionable insights | Provide category-specific scores (completeness, accuracy, clarity) |
| Skipping readability metrics | Complex writing goes unnoticed | Calculate sentence/word length, flag overly complex passages |
| Not providing suggestions | Issues identified without solutions | Include "suggestion" field for every issue detected |
| Manual validation only | Inconsistent, time-consuming | Automate validation with scripts, run in CI/CD pipeline |
| One-time validation | Documentation quality degrades over time | Integrate into pre-commit hooks or CI/CD for continuous validation |
Principles
This skill embodies CODITECT foundational principles:
#5 Eliminate Ambiguity
- Clear quality metrics (completeness, accuracy, clarity, consistency)
- Severity classifications (error/warning/info) with explicit thresholds
- Specific suggestions for each issue (not just "fix this")
#6 Clear, Understandable, Explainable
- Readability metrics ensure documentation is accessible
- Consistency checks maintain terminology uniformity
- Quality report organized by severity for prioritized remediation
#7 Measurable Outcomes
- Quantitative scores (0-100 scale) for each quality dimension
- Issue counts by severity (errors, warnings, info)
- Weighted overall score combining all quality dimensions
#8 No Assumptions
- Validate all links (don't assume they're correct)
- Check all code blocks for language identifiers (don't assume present)
- Verify required sections exist (don't assume standard structure followed)
#10 Automation First
- Automated validation scripts (not manual checks)
- CI/CD integration for continuous quality monitoring
- Pre-commit hooks for early issue detection
Full Principles: CODITECT-STANDARD-AUTOMATION.md
Version: 1.1.0 | Updated: 2026-01-04 | Author: CODITECT Team