scripts-readme-expert
""" README Expert - Type Expert for README Documents
Identifies README files - project introduction and orientation documents that provide overview, setup instructions, and quick navigation.
Key signals:
- README in filename
- Project overview/description sections
- Installation/setup instructions
- Quick start sections
- Badge/shield images
- License and contributing references """
import re from typing import Dict, List from pathlib import Path
import sys sys.path.insert(0, str(Path(file).parent.parent)) from core.models import Document, AnalystVote
from .base import TypeExpert, TypeAnalysis, ContentEnhancement
class ReadmeExpert(TypeExpert): """Expert for identifying README documents."""
EXPERT_TYPE = 'readme'
# README-specific sections
README_SECTIONS = [
'overview',
'introduction',
'getting started',
'installation',
'quick start',
'usage',
'features',
'requirements',
'prerequisites',
'contributing',
'license',
'acknowledgments',
'credits',
'about',
'what is',
'why use',
]
# README patterns
README_PATTERNS = [
r'!\[.*\]\(.*badge.*\)', # Badges
r'!\[.*\]\(.*shield.*\)', # Shields
r'npm\s+install',
r'pip\s+install',
r'cargo\s+add',
r'git\s+clone',
r'docker\s+pull',
r'## license',
r'## contributing',
r'mit\s+license|apache\s+license|gpl',
]
def analyze(self, document: Document, analyst_votes: List[AnalystVote]) -> TypeAnalysis:
"""Analyze if document is a README."""
content = document.content.lower()
evidence_for = []
evidence_against = []
missing_signals = []
# Check filename - strongest signal
filename = Path(document.path).stem.upper()
is_readme_file = 'README' in filename
if is_readme_file:
evidence_for.append(f"Filename is README: {Path(document.path).name}")
# Check for README sections
section_count = 0
for section in self.README_SECTIONS:
if re.search(rf'#+\s*{section}', content, re.IGNORECASE):
section_count += 1
if section_count <= 4:
evidence_for.append(f"Has README section: '{section}'")
# Check for README patterns
pattern_count = 0
for pattern in self.README_PATTERNS:
if re.search(pattern, content, re.IGNORECASE):
pattern_count += 1
if pattern_count <= 3:
evidence_for.append(f"Contains README pattern: '{pattern[:30]}...'")
# Check for badges (common in READMEs)
badge_count = len(re.findall(r'!\[.*?\]\(.*?(badge|shield|img\.shields).*?\)', content, re.IGNORECASE))
if badge_count > 0:
evidence_for.append(f"Has {badge_count} badge(s)")
# Check for installation commands
if re.search(r'```(bash|sh|shell).*?(install|clone|pull)', content, re.IGNORECASE | re.DOTALL):
evidence_for.append("Has installation code blocks")
# Check for project description at start
if re.search(r'^#\s+\w+.*\n\n.*\w{20,}', content[:500]):
evidence_for.append("Has project title and description")
# Evidence against
if re.search(r'component_type:\s*(agent|command|skill|workflow|hook)', content[:500]):
evidence_against.append("Frontmatter indicates specific component type")
if re.search(r'you\s+are\s+(a|an|the)\s+\w+\s+agent', content, re.IGNORECASE):
evidence_against.append("Has agent persona definition - might be agent doc")
if len(re.findall(r'^\|\s*\w+\s*\|', content, re.MULTILINE)) > 10:
evidence_against.append("Heavy table usage - might be reference doc")
# Missing signals
if not is_readme_file:
if section_count < 3:
missing_signals.append('readme_sections')
if not re.search(r'#+\s*(install|getting\s+started|quick\s+start)', content, re.IGNORECASE):
missing_signals.append('setup_section')
if badge_count == 0 and is_readme_file:
missing_signals.append('badges')
# Calculate confidence
confidence = self._calculate_confidence(
is_readme_file, evidence_for, evidence_against, section_count, pattern_count
)
is_readme = confidence > 0.6 or (is_readme_file and confidence > 0.4)
# Determine which analysts to sway
analysts_to_sway = {}
for vote in analyst_votes:
if vote.classification not in ('readme', 'guide', 'reference') and is_readme:
analysts_to_sway[vote.agent] = f"Document is README, not {vote.classification}"
return TypeAnalysis(
is_this_type=is_readme,
confidence=confidence,
evidence_for=evidence_for,
evidence_against=evidence_against,
semantic_purpose="Introduce and orient users to project" if is_readme else "Unknown",
missing_signals=missing_signals,
recommended_changes=[],
analysts_to_sway=analysts_to_sway,
expert_type=self.EXPERT_TYPE
)
def _calculate_confidence(
self,
is_readme_file: bool,
evidence_for: List[str],
evidence_against: List[str],
section_count: int,
pattern_count: int
) -> float:
"""Calculate confidence score."""
# Filename is strongest signal
base = 0.7 if is_readme_file else 0.2
# Add for evidence
base += min(0.2, len(evidence_for) * 0.04)
base += min(0.1, section_count * 0.02)
# Subtract for counter-evidence
base -= len(evidence_against) * 0.15
return max(0.0, min(0.98, base))
def generate_enhancements(
self,
document: Document,
analysis: TypeAnalysis
) -> List[ContentEnhancement]:
"""Generate enhancements for README documents."""
enhancements = []
for signal in analysis.missing_signals:
if signal == 'readme_sections':
enhancements.append(ContentEnhancement(
signal_type='readme_sections',
content=self._generate_readme_structure(),
insertion_point='after_title',
reason='READMEs need standard sections like Overview, Installation, Usage',
expected_analyst_boost={'structural': 0.2, 'content': 0.15},
priority=1
))
elif signal == 'setup_section':
enhancements.append(ContentEnhancement(
signal_type='setup_section',
content=self._generate_setup_section(),
insertion_point='after_overview',
reason='READMEs should have installation/getting started section',
expected_analyst_boost={'content': 0.15, 'semantic': 0.1},
priority=1
))
elif signal == 'badges':
enhancements.append(ContentEnhancement(
signal_type='badges',
content=self._generate_badges(document),
insertion_point='after_title',
reason='READMEs typically include status badges',
expected_analyst_boost={'pattern': 0.1},
priority=3
))
return enhancements
def _generate_readme_structure(self) -> str:
"""Generate standard README structure."""
return '''## Overview
[Brief description of what this project/component does]
Features
- [Feature 1]
- [Feature 2]
- [Feature 3]
Getting Started
Prerequisites
- [Prerequisite 1]
- [Prerequisite 2]
Installation
# Installation commands
Usage
# Usage example
Contributing
See CONTRIBUTING.md for guidelines.
License
[License type] '''
def _generate_setup_section(self) -> str:
"""Generate setup/installation section."""
return '''## Getting Started
Prerequisites
- [Required tool/dependency 1]
- [Required tool/dependency 2]
Installation
# Clone the repository
git clone [repository-url]
# Install dependencies
[package-manager] install
Quick Start
# Run the project
[start command]
'''
def _generate_badges(self, document: Document) -> str:
"""Generate badge placeholders."""
return '''[]()
'''