Skip to main content

scripts-changelog-expert

""" Changelog Expert - Type Expert for Changelog Documents

Identifies changelog files - documents that track version history, changes, releases, and updates over time.

Key signals:

  • CHANGELOG in filename
  • Version headers (## [1.0.0], ## v2.0)
  • Date entries
  • Added/Changed/Fixed/Removed sections
  • Semantic versioning patterns """

import re from typing import Dict, List from pathlib import Path

import sys sys.path.insert(0, str(Path(file).parent.parent)) from core.models import Document, AnalystVote

from .base import TypeExpert, TypeAnalysis, ContentEnhancement

class ChangelogExpert(TypeExpert): """Expert for identifying changelog documents."""

EXPERT_TYPE = 'changelog'

# Changelog-specific sections (per Keep a Changelog format)
CHANGELOG_SECTIONS = [
'added',
'changed',
'deprecated',
'removed',
'fixed',
'security',
'unreleased',
'breaking changes',
'new features',
'bug fixes',
'improvements',
'updates',
]

# Version patterns
VERSION_PATTERNS = [
r'##\s*\[?\d+\.\d+\.\d+\]?', # ## [1.0.0] or ## 1.0.0
r'##\s*v\d+\.\d+', # ## v1.0
r'version\s+\d+\.\d+',
r'\(\d{4}-\d{2}-\d{2}\)', # (2024-01-15)
r'\[\d+\.\d+\.\d+\]\s*-\s*\d{4}-\d{2}-\d{2}', # [1.0.0] - 2024-01-15
]

def analyze(self, document: Document, analyst_votes: List[AnalystVote]) -> TypeAnalysis:
"""Analyze if document is a changelog."""
content = document.content
content_lower = content.lower()
evidence_for = []
evidence_against = []
missing_signals = []

# Check filename - very strong signal
filename = Path(document.path).stem.upper()
is_changelog_file = 'CHANGELOG' in filename or 'HISTORY' in filename or 'RELEASES' in filename
if is_changelog_file:
evidence_for.append(f"Filename indicates changelog: {Path(document.path).name}")

# Check for version patterns
version_count = 0
for pattern in self.VERSION_PATTERNS:
matches = re.findall(pattern, content, re.IGNORECASE)
version_count += len(matches)
if matches and len(evidence_for) < 5:
evidence_for.append(f"Has version pattern: '{matches[0]}'")

if version_count > 3:
evidence_for.append(f"Multiple version entries: {version_count}")

# Check for changelog sections
section_count = 0
for section in self.CHANGELOG_SECTIONS:
if re.search(rf'#+\s*{section}|^\*\*{section}\*\*', content_lower, re.MULTILINE):
section_count += 1
if section_count <= 4:
evidence_for.append(f"Has changelog section: '{section}'")

# Check for date patterns
date_count = len(re.findall(r'\d{4}-\d{2}-\d{2}', content))
if date_count > 2:
evidence_for.append(f"Has {date_count} date entries")

# Check for bullet lists with changes
change_bullets = len(re.findall(r'^\s*[-*]\s+\w+', content, re.MULTILINE))
if change_bullets > 10:
evidence_for.append(f"Has {change_bullets} change entries")

# Check for Keep a Changelog format
if re.search(r'all\s+notable\s+changes', content_lower):
evidence_for.append("Follows Keep a Changelog format")

# Check for semantic versioning reference
if re.search(r'semantic\s+versioning|semver', content_lower):
evidence_for.append("References semantic versioning")

# Evidence against
if re.search(r'#+\s*(step\s+\d|how\s+to|tutorial)', content_lower):
evidence_against.append("Has tutorial sections - might be guide")
if re.search(r'api\s*reference|parameters?:|returns?:', content_lower):
evidence_against.append("Has API documentation - might be reference")
if version_count < 2 and not is_changelog_file:
evidence_against.append("Low version count for a changelog")

# Missing signals
if version_count < 2:
missing_signals.append('version_headers')
if section_count < 2:
missing_signals.append('change_sections')
if date_count < 2:
missing_signals.append('date_entries')

# Calculate confidence
confidence = self._calculate_confidence(
is_changelog_file, evidence_for, evidence_against,
version_count, section_count
)

is_changelog = confidence > 0.6 or (is_changelog_file and confidence > 0.4)

# Determine which analysts to sway
analysts_to_sway = {}
for vote in analyst_votes:
if vote.classification != 'changelog' and is_changelog:
analysts_to_sway[vote.agent] = f"Document is changelog, not {vote.classification}"

return TypeAnalysis(
is_this_type=is_changelog,
confidence=confidence,
evidence_for=evidence_for,
evidence_against=evidence_against,
semantic_purpose="Track version history and changes over time" if is_changelog else "Unknown",
missing_signals=missing_signals,
recommended_changes=[],
analysts_to_sway=analysts_to_sway,
expert_type=self.EXPERT_TYPE
)

def _calculate_confidence(
self,
is_changelog_file: bool,
evidence_for: List[str],
evidence_against: List[str],
version_count: int,
section_count: int
) -> float:
"""Calculate confidence score."""
# Filename is very strong signal for changelogs
base = 0.75 if is_changelog_file else 0.15

# Version entries are key
base += min(0.15, version_count * 0.03)

# Sections help
base += min(0.1, section_count * 0.03)

# Other evidence
base += min(0.08, len(evidence_for) * 0.02)

# Subtract for counter-evidence
base -= len(evidence_against) * 0.12

return max(0.0, min(0.98, base))

def generate_enhancements(
self,
document: Document,
analysis: TypeAnalysis
) -> List[ContentEnhancement]:
"""Generate enhancements for changelog documents."""
enhancements = []

for signal in analysis.missing_signals:
if signal == 'version_headers':
enhancements.append(ContentEnhancement(
signal_type='version_headers',
content=self._generate_version_headers(),
insertion_point='after_title',
reason='Changelogs need version headers with dates',
expected_analyst_boost={'pattern': 0.25, 'structural': 0.15},
priority=1
))
elif signal == 'change_sections':
enhancements.append(ContentEnhancement(
signal_type='change_sections',
content=self._generate_change_sections(),
insertion_point='under_version',
reason='Changelogs need Added/Changed/Fixed sections',
expected_analyst_boost={'structural': 0.15, 'content': 0.1},
priority=1
))
elif signal == 'date_entries':
enhancements.append(ContentEnhancement(
signal_type='date_entries',
content=self._generate_date_format(),
insertion_point='version_header',
reason='Changelogs should include release dates',
expected_analyst_boost={'pattern': 0.1},
priority=2
))

return enhancements

def _generate_version_headers(self) -> str:
"""Generate version headers."""
return '''## [Unreleased]

[1.0.0] - 2024-01-15

Added

  • Initial release

[0.1.0] - 2024-01-01

Added

  • Beta release '''

    def _generate_change_sections(self) -> str: """Generate change sections.""" return '''### Added

  • [New feature or capability]

Changed

  • [Modified behavior or improvement]

Fixed

  • [Bug fix]

Removed

  • [Deprecated feature removed]

'''

def _generate_date_format(self) -> str:
"""Generate date format example."""
return '''<!-- Date format: YYYY-MM-DD (ISO 8601) -->

'''