Skip to main content

scripts-structural

""" Structural Analyst Agent

Analyzes document classification based on:

  • File path patterns
  • Directory location
  • File extension
  • File size heuristics """

import re from pathlib import Path import time

import sys sys.path.insert(0, str(Path(file).parent.parent))

from core.models import Document, AnalystVote from analysts.base import BaseAnalyst

class StructuralAnalyst(BaseAnalyst): """Analyst that classifies based on file structure and location."""

name = "structural"

# Path patterns with confidence weights
PATH_PATTERNS = {
# High confidence patterns (directory names)
r'/agents?/': ('agent', 0.95),
r'/commands?/': ('command', 0.95),
r'/skills?/': ('skill', 0.95),
r'/scripts?/': ('script', 0.90),
r'/hooks?/': ('hook', 0.95),
r'/workflows?/': ('workflow', 0.90),
r'/adrs?/': ('adr', 0.90),
r'/guides?/': ('guide', 0.85),
r'/reference/': ('reference', 0.85),
r'/config/': ('config', 0.90),

# Medium confidence patterns
r'/docs?/': ('reference', 0.70),
r'/internal/': ('reference', 0.65),
r'/architecture/': ('adr', 0.75),

# Filename patterns
r'ADR-\d+': ('adr', 0.92),
r'SKILL\.md$': ('skill', 0.95),
r'CLAUDE\.md$': ('reference', 0.85),
r'README\.md$': ('reference', 0.80),
r'CHANGELOG\.md$': ('reference', 0.85),
r'CONTRIBUTING\.md$': ('guide', 0.80),
}

# Extension patterns
EXTENSION_PATTERNS = {
'.md': {'reference': 0.5, 'guide': 0.3, 'adr': 0.2},
'.json': {'config': 0.8, 'reference': 0.2},
'.yaml': {'config': 0.8, 'workflow': 0.2},
'.yml': {'config': 0.8, 'workflow': 0.2},
'.py': {'script': 0.9, 'reference': 0.1},
'.sh': {'script': 0.95},
}

def analyze(self, document: Document) -> AnalystVote:
"""Analyze document based on structural patterns."""
start = time.time()

path_str = str(document.path).lower()
scores = {}
reasons = []

# Check path patterns
for pattern, (doc_type, confidence) in self.PATH_PATTERNS.items():
if re.search(pattern, path_str, re.IGNORECASE):
if doc_type not in scores or scores[doc_type] < confidence:
scores[doc_type] = confidence
reasons.append(f"Path matches '{pattern}' → {doc_type}")

# Check extension patterns
ext = document.extension
if ext in self.EXTENSION_PATTERNS:
for doc_type, weight in self.EXTENSION_PATTERNS[ext].items():
base_conf = scores.get(doc_type, 0.5)
scores[doc_type] = max(scores.get(doc_type, 0), base_conf * weight + weight * 0.3)

# Check directory depth (deeper = more likely to be specific type)
depth = len(document.path.parts)
if depth > 5:
# Deep files are often more specialized
for t in ['agent', 'command', 'skill']:
if t in scores:
scores[t] *= 1.05

# Check file size heuristics
size_kb = document.size_bytes / 1024
if size_kb > 50:
# Large files are often references or guides
scores['reference'] = scores.get('reference', 0.5) * 1.1
scores['guide'] = scores.get('guide', 0.5) * 1.1
elif size_kb < 5:
# Small files might be configs or simple scripts
scores['config'] = scores.get('config', 0.5) * 1.1

# Determine best classification
if scores:
best_type = max(scores, key=scores.get)
confidence = min(0.98, scores[best_type])
reasoning = "; ".join(reasons[:3]) if reasons else f"Extension {ext}, depth {depth}"
else:
# Default fallback
best_type = 'reference'
confidence = 0.50
reasoning = "No strong structural indicators, defaulting to reference"

duration_ms = int((time.time() - start) * 1000)

return self._create_vote(
classification=best_type,
confidence=confidence,
reasoning=reasoning,
duration_ms=duration_ms,
metadata={
'path_depth': depth,
'extension': ext,
'size_kb': round(size_kb, 2),
'all_scores': {k: round(v, 3) for k, v in scores.items()}
}
)