Skip to main content

scripts-pattern

""" Pattern Analyst Agent

Analyzes document classification based on:

  • CODITECT naming conventions
  • Component template patterns
  • Framework-specific structures
  • Cross-reference patterns """

import re from pathlib import Path import time from typing import Dict, List, Set

import sys sys.path.insert(0, str(Path(file).parent.parent))

from core.models import Document, AnalystVote from analysts.base import BaseAnalyst

class PatternAnalyst(BaseAnalyst): """Analyst that classifies based on CODITECT-specific patterns."""

name = "pattern"

# CODITECT component templates - required sections
COMPONENT_TEMPLATES: Dict[str, Dict[str, List[str]]] = {
'agent': {
'required': ['## Role', '## Capabilities'],
'optional': ['## Responsibilities', '## Tools', '## Invocation', '## Example'],
'patterns': [r'subagent_type\s*[=:]\s*["\']?\w+', r'Task\s*\(']
},
'command': {
'required': ['## Invocation', '## Arguments'],
'optional': ['## Examples', '## Usage', '## Options'],
'patterns': [r'^/\w+', r'--\w+']
},
'skill': {
'required': ['## When to Use'],
'optional': ['## Capabilities', '## Pattern', '## Examples'],
'patterns': [r'SKILL\.md', r'skill\s*[=:]\s*["\']']
},
'adr': {
'required': ['## Status', '## Context', '## Decision'],
'optional': ['## Consequences', '## References'],
'patterns': [r'ADR-\d+', r'\*\*Accepted\*\*|\*\*Rejected\*\*|\*\*Superseded\*\*']
},
'workflow': {
'required': [],
'optional': ['## Steps', '## Phases', '## Pipeline'],
'patterns': [r'```mermaid', r'flowchart|sequenceDiagram|graph']
},
'guide': {
'required': [],
'optional': ['## Prerequisites', '## Getting Started', '## Quick Start'],
'patterns': [r'Step \d+:', r'^\d+\.\s+\*\*']
},
'hook': {
'required': ['## Trigger'],
'optional': ['## Events', '## Configuration'],
'patterns': [r'pre-commit|post-commit|pre-push', r'hook\s*[=:]\s*["\']']
},
'config': {
'required': [],
'optional': ['## Configuration', '## Options', '## Settings'],
'patterns': [r'\.json$|\.ya?ml$', r'"[^"]+"\s*:\s*']
},
'reference': {
'required': [],
'optional': ['## Overview', '## API', '## Schema'],
'patterns': [r'## Reference', r'## Specification']
},
'script': {
'required': [],
'optional': ['## Usage', '## Arguments'],
'patterns': [r'if __name__\s*==\s*["\']__main__["\']', r'#!/usr/bin/env']
},
}

# CODITECT naming conventions
NAMING_PATTERNS: Dict[str, List[str]] = {
'agent': [
r'-agent\.md$',
r'agent-\w+\.md$',
r'/agents/\w+\.md$',
],
'command': [
r'-command\.md$',
r'/commands/\w+\.md$',
],
'skill': [
r'SKILL\.md$',
r'/skills/\w+/SKILL\.md$',
],
'adr': [
r'ADR-\d+.*\.md$',
r'/adrs?/\w+\.md$',
],
'workflow': [
r'-workflow\.md$',
r'/workflows?/\w+\.md$',
r'\.workflow\.ya?ml$',
],
'guide': [
r'-guide\.md$',
r'-GUIDE\.md$',
r'/guides?/\w+\.md$',
],
'reference': [
r'REFERENCE\.md$',
r'README\.md$',
r'CLAUDE\.md$',
],
'hook': [
r'-hook\.md$',
r'/hooks?/\w+\.md$',
],
}

# Cross-reference patterns
CROSS_REF_PATTERNS: Dict[str, List[str]] = {
'agent': [r'\[.*\]\(.*agents/.*\)', r'agent:\s*\w+'],
'command': [r'\[/\w+\]', r'command:\s*/\w+'],
'skill': [r'\[.*\]\(.*skills/.*\)', r'skill:\s*\w+'],
}

def analyze(self, document: Document) -> AnalystVote:
"""Analyze document based on CODITECT patterns."""
start = time.time()

content = document.content
path_str = str(document.path)
scores: Dict[str, float] = {}
reasons: List[str] = []

# Check naming conventions
for doc_type, patterns in self.NAMING_PATTERNS.items():
for pattern in patterns:
if re.search(pattern, path_str, re.IGNORECASE):
naming_score = 0.85
if doc_type not in scores or scores[doc_type] < naming_score:
scores[doc_type] = naming_score
reasons.append(f"Filename matches {doc_type} convention")
break

# Check component templates
for doc_type, template in self.COMPONENT_TEMPLATES.items():
template_score = self._check_template_compliance(content, template)
if template_score > 0:
current = scores.get(doc_type, 0)
scores[doc_type] = max(current, template_score)
if template_score > 0.70:
reasons.append(f"Matches {doc_type} template ({template_score:.0%})")

# Check cross-reference patterns
for doc_type, patterns in self.CROSS_REF_PATTERNS.items():
ref_count = 0
for pattern in patterns:
ref_count += len(re.findall(pattern, content))

if ref_count > 0:
# Documents referencing this type might be guides/references about it
ref_boost = min(0.15, ref_count * 0.03)
scores['reference'] = scores.get('reference', 0.5) + ref_boost

# Check for CODITECT-specific markers
coditect_markers = self._check_coditect_markers(content)
for doc_type, boost in coditect_markers.items():
scores[doc_type] = scores.get(doc_type, 0.5) + boost

# Determine best classification
if scores:
best_type = max(scores, key=scores.get)
confidence = min(0.98, scores[best_type])
reasoning = "; ".join(reasons[:3]) if reasons else "CODITECT pattern analysis"
else:
best_type = 'reference'
confidence = 0.50
reasoning = "No strong CODITECT patterns, defaulting to reference"

duration_ms = int((time.time() - start) * 1000)

return self._create_vote(
classification=best_type,
confidence=confidence,
reasoning=reasoning,
duration_ms=duration_ms,
metadata={
'all_scores': {k: round(v, 3) for k, v in scores.items()}
}
)

def _check_template_compliance(self, content: str, template: Dict) -> float:
"""Check how well content matches a component template."""
score = 0.0
total_weight = 0.0

# Check required sections (higher weight)
required = template.get('required', [])
for section in required:
total_weight += 0.25
if re.search(section, content, re.IGNORECASE):
score += 0.25

# Check optional sections (lower weight)
optional = template.get('optional', [])
for section in optional:
total_weight += 0.10
if re.search(section, content, re.IGNORECASE):
score += 0.10

# Check patterns
patterns = template.get('patterns', [])
for pattern in patterns:
total_weight += 0.15
if re.search(pattern, content, re.IGNORECASE | re.MULTILINE):
score += 0.15

if total_weight == 0:
return 0.0

# Normalize to 0-1 range
normalized = score / total_weight if total_weight > 0 else 0
return min(0.95, normalized)

def _check_coditect_markers(self, content: str) -> Dict[str, float]:
"""Check for CODITECT-specific markers and keywords."""
boosts: Dict[str, float] = {}

# Framework version markers
if re.search(r'CODITECT\s*v\d+', content, re.IGNORECASE):
boosts['reference'] = 0.10

# Component registry references
if 'component-activation-status' in content:
boosts['config'] = 0.10

# Memory system references
if '/cxq' in content or '/cx' in content:
boosts['command'] = 0.05
boosts['reference'] = 0.05

# Agent invocation patterns
if 'Task(subagent_type' in content or '/agent ' in content:
boosts['agent'] = 0.10
boosts['reference'] = 0.05

# Workflow library references
if 'workflow-library' in content.lower():
boosts['workflow'] = 0.10

return boosts