scripts-structural

""" Structural Analyst Agent

Analyzes document classification based on:

File path patterns
Directory location
File extension
File size heuristics """

import re from pathlib import Path import time

import sys sys.path.insert(0, str(Path(file).parent.parent))

from core.models import Document, AnalystVote from analysts.base import BaseAnalyst

class StructuralAnalyst(BaseAnalyst): """Analyst that classifies based on file structure and location."""

name = "structural"

# Path patterns with confidence weights
PATH_PATTERNS = {
    # High confidence patterns (directory names)
    r'/agents?/': ('agent', 0.95),
    r'/commands?/': ('command', 0.95),
    r'/skills?/': ('skill', 0.95),
    r'/scripts?/': ('script', 0.90),
    r'/hooks?/': ('hook', 0.95),
    r'/workflows?/': ('workflow', 0.90),
    r'/adrs?/': ('adr', 0.90),
    r'/guides?/': ('guide', 0.85),
    r'/reference/': ('reference', 0.85),
    r'/config/': ('config', 0.90),

    # Medium confidence patterns
    r'/docs?/': ('reference', 0.70),
    r'/internal/': ('reference', 0.65),
    r'/architecture/': ('adr', 0.75),

    # Filename patterns
    r'ADR-\d+': ('adr', 0.92),
    r'SKILL\.md$': ('skill', 0.95),
    r'CLAUDE\.md$': ('reference', 0.85),
    r'README\.md$': ('reference', 0.80),
    r'CHANGELOG\.md$': ('reference', 0.85),
    r'CONTRIBUTING\.md$': ('guide', 0.80),
}

# Extension patterns
EXTENSION_PATTERNS = {
    '.md': {'reference': 0.5, 'guide': 0.3, 'adr': 0.2},
    '.json': {'config': 0.8, 'reference': 0.2},
    '.yaml': {'config': 0.8, 'workflow': 0.2},
    '.yml': {'config': 0.8, 'workflow': 0.2},
    '.py': {'script': 0.9, 'reference': 0.1},
    '.sh': {'script': 0.95},
}

def analyze(self, document: Document) -> AnalystVote:
    """Analyze document based on structural patterns."""
    start = time.time()

    path_str = str(document.path).lower()
    scores = {}
    reasons = []

    # Check path patterns
    for pattern, (doc_type, confidence) in self.PATH_PATTERNS.items():
        if re.search(pattern, path_str, re.IGNORECASE):
            if doc_type not in scores or scores[doc_type] < confidence:
                scores[doc_type] = confidence
                reasons.append(f"Path matches '{pattern}' → {doc_type}")

    # Check extension patterns
    ext = document.extension
    if ext in self.EXTENSION_PATTERNS:
        for doc_type, weight in self.EXTENSION_PATTERNS[ext].items():
            base_conf = scores.get(doc_type, 0.5)
            scores[doc_type] = max(scores.get(doc_type, 0), base_conf * weight + weight * 0.3)

    # Check directory depth (deeper = more likely to be specific type)
    depth = len(document.path.parts)
    if depth > 5:
        # Deep files are often more specialized
        for t in ['agent', 'command', 'skill']:
            if t in scores:
                scores[t] *= 1.05

    # Check file size heuristics
    size_kb = document.size_bytes / 1024
    if size_kb > 50:
        # Large files are often references or guides
        scores['reference'] = scores.get('reference', 0.5) * 1.1
        scores['guide'] = scores.get('guide', 0.5) * 1.1
    elif size_kb < 5:
        # Small files might be configs or simple scripts
        scores['config'] = scores.get('config', 0.5) * 1.1

    # Determine best classification
    if scores:
        best_type = max(scores, key=scores.get)
        confidence = min(0.98, scores[best_type])
        reasoning = "; ".join(reasons[:3]) if reasons else f"Extension {ext}, depth {depth}"
    else:
        # Default fallback
        best_type = 'reference'
        confidence = 0.50
        reasoning = "No strong structural indicators, defaulting to reference"

    duration_ms = int((time.time() - start) * 1000)

    return self._create_vote(
        classification=best_type,
        confidence=confidence,
        reasoning=reasoning,
        duration_ms=duration_ms,
        metadata={
            'path_depth': depth,
            'extension': ext,
            'size_kb': round(size_kb, 2),
            'all_scores': {k: round(v, 3) for k, v in scores.items()}
        }
    )