scripts-pattern

""" Pattern Analyst Agent
Analyzes document classification based on:
CODITECT naming conventions
Component template patterns
Framework-specific structures
Cross-reference patterns """
import re from pathlib import Path import time from typing import Dict, List, Set
import sys sys.path.insert(0, str(Path(file).parent.parent))
from core.models import Document, AnalystVote from analysts.base import BaseAnalyst
class PatternAnalyst(BaseAnalyst): """Analyst that classifies based on CODITECT-specific patterns."""
name = "pattern"

# CODITECT component templates - required sections
COMPONENT_TEMPLATES: Dict[str, Dict[str, List[str]]] = {
    'agent': {
        'required': ['## Role', '## Capabilities'],
        'optional': ['## Responsibilities', '## Tools', '## Invocation', '## Example'],
        'patterns': [r'subagent_type\s*[=:]\s*["\']?\w+', r'Task\s*\(']
    },
    'command': {
        'required': ['## Invocation', '## Arguments'],
        'optional': ['## Examples', '## Usage', '## Options'],
        'patterns': [r'^/\w+', r'--\w+']
    },
    'skill': {
        'required': ['## When to Use'],
        'optional': ['## Capabilities', '## Pattern', '## Examples'],
        'patterns': [r'SKILL\.md', r'skill\s*[=:]\s*["\']']
    },
    'adr': {
        'required': ['## Status', '## Context', '## Decision'],
        'optional': ['## Consequences', '## References'],
        'patterns': [r'ADR-\d+', r'\*\*Accepted\*\*|\*\*Rejected\*\*|\*\*Superseded\*\*']
    },
    'workflow': {
        'required': [],
        'optional': ['## Steps', '## Phases', '## Pipeline'],
        'patterns': [r'```mermaid', r'flowchart|sequenceDiagram|graph']
    },
    'guide': {
        'required': [],
        'optional': ['## Prerequisites', '## Getting Started', '## Quick Start'],
        'patterns': [r'Step \d+:', r'^\d+\.\s+\*\*']
    },
    'hook': {
        'required': ['## Trigger'],
        'optional': ['## Events', '## Configuration'],
        'patterns': [r'pre-commit|post-commit|pre-push', r'hook\s*[=:]\s*["\']']
    },
    'config': {
        'required': [],
        'optional': ['## Configuration', '## Options', '## Settings'],
        'patterns': [r'\.json$|\.ya?ml$', r'"[^"]+"\s*:\s*']
    },
    'reference': {
        'required': [],
        'optional': ['## Overview', '## API', '## Schema'],
        'patterns': [r'## Reference', r'## Specification']
    },
    'script': {
        'required': [],
        'optional': ['## Usage', '## Arguments'],
        'patterns': [r'if __name__\s*==\s*["\']__main__["\']', r'#!/usr/bin/env']
    },
}

# CODITECT naming conventions
NAMING_PATTERNS: Dict[str, List[str]] = {
    'agent': [
        r'-agent\.md$',
        r'agent-\w+\.md$',
        r'/agents/\w+\.md$',
    ],
    'command': [
        r'-command\.md$',
        r'/commands/\w+\.md$',
    ],
    'skill': [
        r'SKILL\.md$',
        r'/skills/\w+/SKILL\.md$',
    ],
    'adr': [
        r'ADR-\d+.*\.md$',
        r'/adrs?/\w+\.md$',
    ],
    'workflow': [
        r'-workflow\.md$',
        r'/workflows?/\w+\.md$',
        r'\.workflow\.ya?ml$',
    ],
    'guide': [
        r'-guide\.md$',
        r'-GUIDE\.md$',
        r'/guides?/\w+\.md$',
    ],
    'reference': [
        r'REFERENCE\.md$',
        r'README\.md$',
        r'CLAUDE\.md$',
    ],
    'hook': [
        r'-hook\.md$',
        r'/hooks?/\w+\.md$',
    ],
}

# Cross-reference patterns
CROSS_REF_PATTERNS: Dict[str, List[str]] = {
    'agent': [r'\[.*\]\(.*agents/.*\)', r'agent:\s*\w+'],
    'command': [r'\[/\w+\]', r'command:\s*/\w+'],
    'skill': [r'\[.*\]\(.*skills/.*\)', r'skill:\s*\w+'],
}

def analyze(self, document: Document) -> AnalystVote:
    """Analyze document based on CODITECT patterns."""
    start = time.time()

    content = document.content
    path_str = str(document.path)
    scores: Dict[str, float] = {}
    reasons: List[str] = []

    # Check naming conventions
    for doc_type, patterns in self.NAMING_PATTERNS.items():
        for pattern in patterns:
            if re.search(pattern, path_str, re.IGNORECASE):
                naming_score = 0.85
                if doc_type not in scores or scores[doc_type] < naming_score:
                    scores[doc_type] = naming_score
                    reasons.append(f"Filename matches {doc_type} convention")
                break

    # Check component templates
    for doc_type, template in self.COMPONENT_TEMPLATES.items():
        template_score = self._check_template_compliance(content, template)
        if template_score > 0:
            current = scores.get(doc_type, 0)
            scores[doc_type] = max(current, template_score)
            if template_score > 0.70:
                reasons.append(f"Matches {doc_type} template ({template_score:.0%})")

    # Check cross-reference patterns
    for doc_type, patterns in self.CROSS_REF_PATTERNS.items():
        ref_count = 0
        for pattern in patterns:
            ref_count += len(re.findall(pattern, content))

        if ref_count > 0:
            # Documents referencing this type might be guides/references about it
            ref_boost = min(0.15, ref_count * 0.03)
            scores['reference'] = scores.get('reference', 0.5) + ref_boost

    # Check for CODITECT-specific markers
    coditect_markers = self._check_coditect_markers(content)
    for doc_type, boost in coditect_markers.items():
        scores[doc_type] = scores.get(doc_type, 0.5) + boost

    # Determine best classification
    if scores:
        best_type = max(scores, key=scores.get)
        confidence = min(0.98, scores[best_type])
        reasoning = "; ".join(reasons[:3]) if reasons else "CODITECT pattern analysis"
    else:
        best_type = 'reference'
        confidence = 0.50
        reasoning = "No strong CODITECT patterns, defaulting to reference"

    duration_ms = int((time.time() - start) * 1000)

    return self._create_vote(
        classification=best_type,
        confidence=confidence,
        reasoning=reasoning,
        duration_ms=duration_ms,
        metadata={
            'all_scores': {k: round(v, 3) for k, v in scores.items()}
        }
    )

def _check_template_compliance(self, content: str, template: Dict) -> float:
    """Check how well content matches a component template."""
    score = 0.0
    total_weight = 0.0

    # Check required sections (higher weight)
    required = template.get('required', [])
    for section in required:
        total_weight += 0.25
        if re.search(section, content, re.IGNORECASE):
            score += 0.25

    # Check optional sections (lower weight)
    optional = template.get('optional', [])
    for section in optional:
        total_weight += 0.10
        if re.search(section, content, re.IGNORECASE):
            score += 0.10

    # Check patterns
    patterns = template.get('patterns', [])
    for pattern in patterns:
        total_weight += 0.15
        if re.search(pattern, content, re.IGNORECASE | re.MULTILINE):
            score += 0.15

    if total_weight == 0:
        return 0.0

    # Normalize to 0-1 range
    normalized = score / total_weight if total_weight > 0 else 0
    return min(0.95, normalized)

def _check_coditect_markers(self, content: str) -> Dict[str, float]:
    """Check for CODITECT-specific markers and keywords."""
    boosts: Dict[str, float] = {}

    # Framework version markers
    if re.search(r'CODITECT\s*v\d+', content, re.IGNORECASE):
        boosts['reference'] = 0.10

    # Component registry references
    if 'component-activation-status' in content:
        boosts['config'] = 0.10

    # Memory system references
    if '/cxq' in content or '/cx' in content:
        boosts['command'] = 0.05
        boosts['reference'] = 0.05

    # Agent invocation patterns
    if 'Task(subagent_type' in content or '/agent ' in content:
        boosts['agent'] = 0.10
        boosts['reference'] = 0.05

    # Workflow library references
    if 'workflow-library' in content.lower():
        boosts['workflow'] = 0.10

    return boosts