Skip to main content

scripts-quality-gate

#!/usr/bin/env python3 """ CODITECT Quality Gate

Validates skill generation output against CODITECT standards. Ensures minimum quality thresholds before skill is usable.

Author: CODITECT Version: 1.0.0 """

from dataclasses import dataclass, field from pathlib import Path from typing import Optional import json import re

@dataclass class QualityCheck: """Single quality check result.""" name: str passed: bool score: float # 0-1 message: str blocking: bool = True

@dataclass class QualityReport: """Complete quality assessment report.""" passed: bool overall_score: float checks: list[QualityCheck] = field(default_factory=list) recommendations: list[str] = field(default_factory=list)

def to_dict(self) -> dict:
return {
"passed": self.passed,
"overall_score": self.overall_score,
"checks": [
{
"name": c.name,
"passed": c.passed,
"score": c.score,
"message": c.message,
"blocking": c.blocking
}
for c in self.checks
],
"recommendations": self.recommendations
}

class QualityGate: """ CODITECT Quality Gate

Validates skill output against these criteria:
1. SKILL.md completeness (required sections)
2. Code examples validity
3. Cross-reference integrity
4. Content coverage
5. Category balance
"""

REQUIRED_SKILL_SECTIONS = [
"## Overview",
"## When to Use",
"## Quick Reference",
"## Examples",
]

RECOMMENDED_SKILL_SECTIONS = [
"## Key Concepts",
"## Common Patterns",
"## Troubleshooting",
"## Related",
]

MIN_SKILL_MD_LINES = 100
MIN_EXAMPLES = 3
MIN_CATEGORIES = 2
MIN_PAGES_PER_CATEGORY = 5

def __init__(
self,
skill_dir: Path,
min_score: float = 0.7
):
self.skill_dir = Path(skill_dir)
self.min_score = min_score

def validate(self) -> QualityReport:
"""Run all quality checks and return report."""
checks = []

# Check 1: SKILL.md exists and has content
checks.append(self._check_skill_md_exists())

# Check 2: Required sections present
checks.append(self._check_required_sections())

# Check 3: SKILL.md minimum length
checks.append(self._check_skill_md_length())

# Check 4: Code examples present and valid
checks.append(self._check_code_examples())

# Check 5: References directory has content
checks.append(self._check_references())

# Check 6: Category balance
checks.append(self._check_category_balance())

# Check 7: Metadata complete
checks.append(self._check_metadata())

# Calculate overall score
total_weight = sum(1 if c.blocking else 0.5 for c in checks)
weighted_score = sum(
c.score * (1 if c.blocking else 0.5)
for c in checks
)
overall_score = weighted_score / total_weight if total_weight > 0 else 0

# Determine pass/fail
blocking_failed = any(not c.passed and c.blocking for c in checks)
passed = not blocking_failed and overall_score >= self.min_score

# Generate recommendations
recommendations = self._generate_recommendations(checks)

return QualityReport(
passed=passed,
overall_score=overall_score,
checks=checks,
recommendations=recommendations
)

def _check_skill_md_exists(self) -> QualityCheck:
"""Check SKILL.md exists and is readable."""
skill_md = self.skill_dir / "SKILL.md"

if not skill_md.exists():
return QualityCheck(
name="skill_md_exists",
passed=False,
score=0.0,
message="SKILL.md not found",
blocking=True
)

try:
content = skill_md.read_text()
if len(content) < 100:
return QualityCheck(
name="skill_md_exists",
passed=False,
score=0.2,
message="SKILL.md exists but is too short",
blocking=True
)

return QualityCheck(
name="skill_md_exists",
passed=True,
score=1.0,
message="SKILL.md exists and has content",
blocking=True
)

except Exception as e:
return QualityCheck(
name="skill_md_exists",
passed=False,
score=0.0,
message=f"Error reading SKILL.md: {e}",
blocking=True
)

def _check_required_sections(self) -> QualityCheck:
"""Check required sections are present in SKILL.md."""
skill_md = self.skill_dir / "SKILL.md"

if not skill_md.exists():
return QualityCheck(
name="required_sections",
passed=False,
score=0.0,
message="SKILL.md not found",
blocking=True
)

content = skill_md.read_text()
found = []
missing = []

for section in self.REQUIRED_SKILL_SECTIONS:
if section in content or section.replace("## ", "# ") in content:
found.append(section)
else:
missing.append(section)

score = len(found) / len(self.REQUIRED_SKILL_SECTIONS)
passed = len(missing) == 0

if passed:
message = f"All {len(self.REQUIRED_SKILL_SECTIONS)} required sections present"
else:
message = f"Missing sections: {', '.join(missing)}"

return QualityCheck(
name="required_sections",
passed=passed,
score=score,
message=message,
blocking=True
)

def _check_skill_md_length(self) -> QualityCheck:
"""Check SKILL.md meets minimum length."""
skill_md = self.skill_dir / "SKILL.md"

if not skill_md.exists():
return QualityCheck(
name="skill_md_length",
passed=False,
score=0.0,
message="SKILL.md not found",
blocking=False
)

lines = skill_md.read_text().split('\n')
line_count = len([l for l in lines if l.strip()])

score = min(line_count / self.MIN_SKILL_MD_LINES, 1.0)
passed = line_count >= self.MIN_SKILL_MD_LINES

return QualityCheck(
name="skill_md_length",
passed=passed,
score=score,
message=f"SKILL.md has {line_count} lines (min: {self.MIN_SKILL_MD_LINES})",
blocking=False
)

def _check_code_examples(self) -> QualityCheck:
"""Check code examples are present and well-formed."""
skill_md = self.skill_dir / "SKILL.md"

if not skill_md.exists():
return QualityCheck(
name="code_examples",
passed=False,
score=0.0,
message="SKILL.md not found",
blocking=True
)

content = skill_md.read_text()

# Count code blocks
code_blocks = re.findall(r'```\w*\n.*?```', content, re.DOTALL)
count = len(code_blocks)

# Check code blocks have language tags
tagged = len(re.findall(r'```\w+\n', content))

score = min(count / self.MIN_EXAMPLES, 1.0)
passed = count >= self.MIN_EXAMPLES

if passed:
message = f"{count} code examples found ({tagged} with language tags)"
else:
message = f"Only {count} code examples (min: {self.MIN_EXAMPLES})"

return QualityCheck(
name="code_examples",
passed=passed,
score=score,
message=message,
blocking=True
)

def _check_references(self) -> QualityCheck:
"""Check references directory has content."""
refs_dir = self.skill_dir / "references"

if not refs_dir.exists():
return QualityCheck(
name="references",
passed=False,
score=0.0,
message="references/ directory not found",
blocking=False
)

md_files = list(refs_dir.glob("**/*.md"))
count = len(md_files)

if count == 0:
return QualityCheck(
name="references",
passed=False,
score=0.0,
message="No markdown files in references/",
blocking=False
)

# Check total content size
total_size = sum(f.stat().st_size for f in md_files)
avg_size = total_size / count

score = min(count / 10, 1.0) # Target: 10+ reference files
passed = count >= 3 and avg_size > 500

return QualityCheck(
name="references",
passed=passed,
score=score,
message=f"{count} reference files, avg {avg_size:.0f} bytes",
blocking=False
)

def _check_category_balance(self) -> QualityCheck:
"""Check categories are balanced."""
refs_dir = self.skill_dir / "references"

if not refs_dir.exists():
return QualityCheck(
name="category_balance",
passed=False,
score=0.0,
message="references/ directory not found",
blocking=False
)

# Count files per category (subdirectory)
categories = {}
for subdir in refs_dir.iterdir():
if subdir.is_dir():
files = list(subdir.glob("*.md"))
if files:
categories[subdir.name] = len(files)

# Also count root-level files
root_files = list(refs_dir.glob("*.md"))
if root_files:
categories["root"] = len(root_files)

cat_count = len(categories)

if cat_count < self.MIN_CATEGORIES:
return QualityCheck(
name="category_balance",
passed=False,
score=cat_count / self.MIN_CATEGORIES,
message=f"Only {cat_count} categories (min: {self.MIN_CATEGORIES})",
blocking=False
)

# Check balance (no category should have >50% of content)
total = sum(categories.values())
max_pct = max(categories.values()) / total if total > 0 else 0

score = 1.0 - (max_pct - 0.5) if max_pct > 0.5 else 1.0
passed = max_pct <= 0.5 and cat_count >= self.MIN_CATEGORIES

return QualityCheck(
name="category_balance",
passed=passed,
score=score,
message=f"{cat_count} categories, largest has {max_pct*100:.0f}% of content",
blocking=False
)

def _check_metadata(self) -> QualityCheck:
"""Check metadata.json is complete."""
metadata_path = self.skill_dir / "data" / "summary.json"

if not metadata_path.exists():
metadata_path = self.skill_dir / "metadata.json"

if not metadata_path.exists():
return QualityCheck(
name="metadata",
passed=False,
score=0.0,
message="No metadata file found",
blocking=False
)

try:
with open(metadata_path) as f:
metadata = json.load(f)

required_fields = ["name", "base_url", "pages_scraped"]
missing = [f for f in required_fields if f not in metadata]

if missing:
return QualityCheck(
name="metadata",
passed=False,
score=1 - len(missing) / len(required_fields),
message=f"Missing metadata fields: {missing}",
blocking=False
)

return QualityCheck(
name="metadata",
passed=True,
score=1.0,
message="Metadata complete",
blocking=False
)

except Exception as e:
return QualityCheck(
name="metadata",
passed=False,
score=0.0,
message=f"Error reading metadata: {e}",
blocking=False
)

def _generate_recommendations(self, checks: list[QualityCheck]) -> list[str]:
"""Generate improvement recommendations based on check results."""
recommendations = []

for check in checks:
if not check.passed:
if check.name == "required_sections":
recommendations.append(
"Add missing sections to SKILL.md: Overview, When to Use, Quick Reference, Examples"
)
elif check.name == "skill_md_length":
recommendations.append(
"Expand SKILL.md with more detailed explanations and examples"
)
elif check.name == "code_examples":
recommendations.append(
f"Add more code examples (currently have fewer than {self.MIN_EXAMPLES})"
)
elif check.name == "references":
recommendations.append(
"Add more reference documentation files"
)
elif check.name == "category_balance":
recommendations.append(
"Balance content across categories - avoid putting everything in one category"
)

if not recommendations:
recommendations.append("All quality checks passed - skill is ready for use")

return recommendations

def validate_skill(skill_dir: str, min_score: float = 0.7) -> QualityReport: """Convenience function to validate a skill directory.""" gate = QualityGate(Path(skill_dir), min_score) return gate.validate()