#!/usr/bin/env python3 """ Research Triage Automation (ADR-208)

Automates research outcome triage:

Updates manifest YAML with outcome fields
Updates master registry README.md counts
Registers product outcomes in projects.db
Suggests TRACK entries for product/component outcomes

Task ID: H.15.3.1-H.15.3.4 Created: 2026-02-16 ADR: ADR-208 Research-to-Product Promotion Lifecycle

Usage: python3 scripts/research_triage.py --list # List un-triaged manifests python3 scripts/research_triage.py --triage # Triage a specific manifest python3 scripts/research_triage.py --manifest \ --outcome product --repository submodules/products/X # Non-interactive triage python3 scripts/research_triage.py --stats # Show triage statistics python3 scripts/research_triage.py --validate # Validate all manifests """

import argparse import json import os import re import sqlite3 import sys import uuid from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, List, Optional, Tuple

Resolve paths relative to coditect-core root

SCRIPT_DIR = Path(file).parent CORE_ROOT = SCRIPT_DIR.parent MANIFESTS_DIR = CORE_ROOT / "internal" / "research" / "manifests" REGISTRY_PATH = MANIFESTS_DIR / "README.md" SCHEMA_PATH = CORE_ROOT / "config" / "schemas" / "research-manifest-v1.schema.json" TRACKS_DIR = CORE_ROOT / "internal" / "project" / "plans" / "tracks"

Add parent for imports

sys.path.insert(0, str(CORE_ROOT))

try: from scripts.core.paths import get_projects_db_path except ImportError: def get_projects_db_path() -> Path: home = Path.home() candidates = [ home / "PROJECTS" / ".coditect-data" / "context-storage" / "projects.db", home / ".coditect-data" / "context-storage" / "projects.db", ] for c in candidates: if c.exists(): return c return candidates[0]

VALID_OUTCOMES = ["product", "prototype", "component", "archive"] VALID_CATEGORIES = [ "technology-evaluation", "academic", "competitive-intelligence", "business-market", "domain", "process-internal" ]

def load_yaml(path: Path) -> Dict[str, Any]: """Load a YAML manifest. Uses simple parser to avoid PyYAML dependency.""" try: import yaml with open(path) as f: return yaml.safe_load(f) or {} except ImportError: # Fallback: basic YAML parsing for simple manifests return _parse_yaml_simple(path)

def _parse_yaml_simple(path: Path) -> Dict[str, Any]: """Minimal YAML parser for flat/nested manifest fields.""" result = {} current_section = None current_list = None

with open(path) as f:
    for line in f:
        stripped = line.rstrip()
        # Skip comments and empty lines
        if not stripped or stripped.startswith('#'):
            continue

        # Detect list items
        if stripped.startswith('  - ') and current_list is not None:
            val = stripped[4:].strip().strip('"').strip("'")
            result.setdefault(current_section, {}).setdefault(current_list, []).append(val)
            continue
        elif stripped.startswith('- ') and current_list is not None:
            val = stripped[2:].strip().strip('"').strip("'")
            if current_section:
                result.setdefault(current_section, {}).setdefault(current_list, []).append(val)
            else:
                result.setdefault(current_list, []).append(val)
            continue

        # Top-level key
        if ':' in stripped and not stripped.startswith(' '):
            key, _, val = stripped.partition(':')
            key = key.strip()
            val = val.strip().strip('"').strip("'")
            if val:
                result[key] = val
            else:
                current_section = key
                current_list = None
                if key not in result:
                    result[key] = {}
            continue

        # Nested key
        if ':' in stripped and stripped.startswith('  '):
            key, _, val = stripped.partition(':')
            key = key.strip()
            val = val.strip().strip('"').strip("'")
            if current_section:
                if val:
                    result.setdefault(current_section, {})[key] = val
                    current_list = None
                else:
                    current_list = key
                    result.setdefault(current_section, {})[key] = []
            continue

return result

def dump_yaml(data: Dict[str, Any], path: Path): """Write YAML manifest.""" try: import yaml with open(path, 'w') as f: f.write("# Research Manifest (ADR-207 + ADR-208)\n") f.write(f"# Updated: {datetime.now(timezone.utc).strftime('%Y-%m-%d')} (research-triage automation)\n\n") yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True) except ImportError: _dump_yaml_simple(data, path)

def _dump_yaml_simple(data: Dict[str, Any], path: Path): """Minimal YAML writer for manifest data.""" lines = [ "# Research Manifest (ADR-207 + ADR-208)", f"# Updated: {datetime.now(timezone.utc).strftime('%Y-%m-%d')} (research-triage automation)", "", ]

def write_value(key, val, indent=0):
    prefix = "  " * indent
    if isinstance(val, dict):
        lines.append(f"{prefix}{key}:")
        for k, v in val.items():
            write_value(k, v, indent + 1)
    elif isinstance(val, list):
        lines.append(f"{prefix}{key}:")
        for item in val:
            if isinstance(item, dict):
                first = True
                for k, v in item.items():
                    if first:
                        lines.append(f"{prefix}  - {k}: {_quote(v)}")
                        first = False
                    else:
                        lines.append(f"{prefix}    {k}: {_quote(v)}")
            else:
                lines.append(f"{prefix}  - {_quote(item)}")
    else:
        lines.append(f"{prefix}{key}: {_quote(val)}")

for key, val in data.items():
    write_value(key, val)

with open(path, 'w') as f:
    f.write('\n'.join(lines) + '\n')

def _quote(val) -> str: """Quote YAML value if needed.""" if val is None: return '""' s = str(val) if any(c in s for c in [':', '#', '{', '}', '[', ']', ',', '&', '*', '?', '|', '-', '<', '>', '=', '!', '%', '@', '`']): return f'"{s}"' if s in ('true', 'false', 'yes', 'no', 'null', ''): return f'"{s}"' return s

=============================================================================

Core Operations

=============================================================================

def list_manifests(untriaged_only: bool = True) -> List[Dict[str, Any]]: """List all manifests, optionally filtering to un-triaged only.""" manifests = [] for path in sorted(MANIFESTS_DIR.glob("*.yaml")): data = load_yaml(path) meta = data.get("metadata", {}) outcome = data.get("outcome", "monitor") rec = meta.get("recommendation", "MONITOR") conf = meta.get("confidence", "LOW") cat = meta.get("category", "unknown")

    if untriaged_only and outcome != "monitor":
        continue

    manifests.append({
        "filename": path.name,
        "path": path,
        "research_id": meta.get("research_id", path.stem),
        "topic": meta.get("topic", ""),
        "category": cat,
        "recommendation": rec,
        "confidence": conf,
        "outcome": outcome,
    })

return manifests

def triage_manifest( manifest_path: Path, outcome: str, rationale: str, outcome_details: Optional[Dict[str, Any]] = None, dry_run: bool = False, ) -> Dict[str, Any]: """ Triage a single manifest with the given outcome.

Returns a result dict with actions taken.
"""
if outcome not in VALID_OUTCOMES:
    raise ValueError(f"Invalid outcome '{outcome}'. Must be one of: {VALID_OUTCOMES}")

data = load_yaml(manifest_path)
meta = data.get("metadata", {})
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
result = {"manifest": manifest_path.name, "outcome": outcome, "actions": []}

# 1. Update manifest with outcome fields
data["outcome"] = outcome
data["outcome_date"] = today
data["outcome_rationale"] = rationale

# Update recommendation based on outcome
if outcome in ("product", "component"):
    meta["recommendation"] = "ADOPT"
    if meta.get("confidence") == "LOW":
        meta["confidence"] = "HIGH"
elif outcome == "prototype":
    meta["recommendation"] = "ADOPT"
    if meta.get("confidence") == "LOW":
        meta["confidence"] = "MEDIUM"

data["metadata"] = meta

# Add outcome-specific details
if outcome_details:
    data[outcome] = outcome_details
elif outcome == "product":
    data["product"] = {
        "repository": "",
        "project_id": "",
        "track_tasks": [],
        "launch_target": "",
    }
elif outcome == "prototype":
    data["prototype"] = {
        "repository": "",
        "graduation_criteria": [],
        "graduation_target": "",
    }
elif outcome == "component":
    data["component"] = {
        "type": "skill",
        "name": "",
        "description": "",
        "path": "",
        "track_tasks": [],
    }
elif outcome == "archive":
    data["archive"] = {
        "reason": rationale,
        "lessons_learned": [],
        "reusable_artifacts": [],
        "archived_date": today,
    }

result["actions"].append(f"Updated manifest with outcome={outcome}")

if not dry_run:
    dump_yaml(data, manifest_path)
    result["actions"].append(f"Wrote {manifest_path.name}")

# 2. Update registry README.md
registry_updates = update_registry(manifest_path.name, data, dry_run=dry_run)
result["actions"].extend(registry_updates)

# 3. Register in projects.db for product outcomes
if outcome == "product":
    project_result = register_project(data, dry_run=dry_run)
    result["actions"].extend(project_result)

# 4. Suggest TRACK entries
if outcome in ("product", "component"):
    track_suggestions = suggest_track_entries(data, outcome)
    result["track_suggestions"] = track_suggestions

return result

def update_registry(manifest_name: str, data: Dict[str, Any], dry_run: bool = False) -> List[str]: """Update the master registry README.md with updated counts.""" actions = []

if not REGISTRY_PATH.exists():
    actions.append("WARN: Registry README.md not found — skipping")
    return actions

content = REGISTRY_PATH.read_text()
meta = data.get("metadata", {})
outcome = data.get("outcome", "monitor")
rec = meta.get("recommendation", "MONITOR")
conf = meta.get("confidence", "LOW")
category = meta.get("category", "unknown")

# Count promoted artifacts
promoted = sum(1 for a in data.get("artifacts", []) if a.get("status") in ("promoted", "integrated"))

# Update the individual manifest row in registry
# Pattern: | `manifest_name` | OLD_REC | OLD_CONF | OLD_PROMOTED |
row_pattern = re.compile(
    r'\| `' + re.escape(manifest_name) + r'` \| \w+ \| \w+ \| \d+ \|'
)
new_row = f"| `{manifest_name}` | {rec} | {conf} | {promoted} |"

if row_pattern.search(content):
    content = row_pattern.sub(new_row, content)
    actions.append(f"Updated registry row for {manifest_name}")
else:
    actions.append(f"WARN: Could not find registry row for {manifest_name}")

# Recount totals from registry content
# Count outcomes by parsing manifests
all_manifests = list(MANIFESTS_DIR.glob("*.yaml"))
outcome_counts = {"product": 0, "prototype": 0, "component": 0, "archive": 0, "monitor": 0}
total_promoted = 0
with_promotions = 0

for mpath in all_manifests:
    mdata = load_yaml(mpath)
    o = mdata.get("outcome", "monitor")
    if o in outcome_counts:
        outcome_counts[o] += 1
    else:
        outcome_counts["monitor"] += 1
    m_promoted = sum(1 for a in mdata.get("artifacts", []) if a.get("status") in ("promoted", "integrated"))
    total_promoted += m_promoted
    if m_promoted > 0:
        with_promotions += 1

total_manifests = len(all_manifests)
triaged = sum(v for k, v in outcome_counts.items() if k != "monitor")

# Update summary table metrics
content = re.sub(
    r'\| Total Manifests \| \d+ \|',
    f'| Total Manifests | {total_manifests} |',
    content
)
content = re.sub(
    r'\| Manifests with Promotions \| \d+ \|',
    f'| Manifests with Promotions | {with_promotions} |',
    content
)
content = re.sub(
    r'\| Total Promoted Artifacts \| \d+ \|',
    f'| Total Promoted Artifacts | {total_promoted} |',
    content
)
content = re.sub(
    r'\| Triaged Outcomes \(ADR-208\) \| \d+ \|',
    f'| Triaged Outcomes (ADR-208) | {triaged} |',
    content
)

# Update monitor count in outcomes table
content = re.sub(
    r'\| \*\*Monitor\*\* \| \d+ \|',
    f'| **Monitor** | {outcome_counts["monitor"]} |',
    content
)

# Update Last Updated date
content = re.sub(
    r'\*\*Last Updated:\*\* \d{4}-\d{2}-\d{2}',
    f'**Last Updated:** {datetime.now(timezone.utc).strftime("%Y-%m-%d")}',
    content
)

actions.append(f"Updated registry totals: {total_manifests} manifests, {triaged} triaged, {outcome_counts['monitor']} monitor")

if not dry_run:
    REGISTRY_PATH.write_text(content)
    actions.append("Wrote updated README.md")

return actions

def register_project(data: Dict[str, Any], dry_run: bool = False) -> List[str]: """Register a product outcome in projects.db (H.15.3.2).""" actions = [] meta = data.get("metadata", {}) product = data.get("product", {})

if not product.get("repository"):
    actions.append("SKIP: No repository specified for product — fill in manifest first")
    return actions

project_id = product.get("project_id") or meta.get("research_id", "unknown")
repo = product.get("repository", "")

db_path = get_projects_db_path()
if not db_path.exists():
    actions.append(f"WARN: projects.db not found at {db_path} — skipping registration")
    return actions

if dry_run:
    actions.append(f"DRY-RUN: Would register project '{project_id}' at {repo} in projects.db")
    return actions

try:
    conn = sqlite3.connect(str(db_path))
    cursor = conn.cursor()

    # Check if project already exists
    cursor.execute("SELECT project_id FROM projects WHERE project_id = ?", (project_id,))
    if cursor.fetchone():
        actions.append(f"Project '{project_id}' already registered in projects.db")
        conn.close()
        return actions

    # Insert new project
    now = datetime.now(timezone.utc).isoformat()
    cursor.execute("""
        INSERT INTO projects (project_id, name, description, path, status, created_at, updated_at)
        VALUES (?, ?, ?, ?, ?, ?, ?)
    """, (
        project_id,
        meta.get("topic", project_id),
        data.get("outcome_rationale", ""),
        repo,
        "active",
        now,
        now,
    ))
    conn.commit()
    conn.close()
    actions.append(f"Registered project '{project_id}' in projects.db")
except sqlite3.OperationalError as e:
    actions.append(f"WARN: projects.db registration failed: {e}")

return actions

def suggest_track_entries(data: Dict[str, Any], outcome: str) -> List[str]: """Suggest TRACK entries for product/component outcomes (H.15.3.3).""" suggestions = [] meta = data.get("metadata", {}) topic = meta.get("topic", meta.get("research_id", "unknown"))

if outcome == "product":
    product = data.get("product", {})
    repo = product.get("repository", "TBD")
    suggestions.append(f"Suggested TRACK entries for product '{topic}':")
    suggestions.append(f"  Track N (GTM): Add launch task for {repo}")
    suggestions.append(f"  Example: N.X.1: Set up {topic} project structure")
    suggestions.append(f"  Example: N.X.2: Configure CI/CD for {topic}")
    suggestions.append(f"  Example: N.X.3: Write user documentation for {topic}")
elif outcome == "component":
    component = data.get("component", {})
    ctype = component.get("type", "skill")
    cname = component.get("name", topic)
    suggestions.append(f"Suggested TRACK entries for {ctype} '{cname}':")
    suggestions.append(f"  Track H (Framework): Add skill implementation tasks")
    suggestions.append(f"  Example: H.X.1: Create {cname} {ctype} SKILL.md")
    suggestions.append(f"  Example: H.X.2: Implement {cname} core functionality")
    suggestions.append(f"  Example: H.X.3: Write tests for {cname}")

return suggestions

def get_stats() -> Dict[str, Any]: """Get triage statistics across all manifests.""" outcomes = {"product": 0, "prototype": 0, "component": 0, "archive": 0, "monitor": 0} categories = {} total = 0

for path in sorted(MANIFESTS_DIR.glob("*.yaml")):
    data = load_yaml(path)
    total += 1
    outcome = data.get("outcome", "monitor")
    outcomes[outcome] = outcomes.get(outcome, 0) + 1

    cat = data.get("metadata", {}).get("category", "unknown")
    if cat not in categories:
        categories[cat] = {"total": 0, "triaged": 0}
    categories[cat]["total"] += 1
    if outcome != "monitor":
        categories[cat]["triaged"] += 1

triaged = sum(v for k, v in outcomes.items() if k != "monitor")

return {
    "total_manifests": total,
    "triaged": triaged,
    "untriaged": outcomes["monitor"],
    "outcomes": outcomes,
    "categories": categories,
    "triage_rate": f"{(triaged / total * 100):.1f}%" if total > 0 else "0%",
}

def validate_manifests() -> List[Dict[str, str]]: """Validate all manifests against the schema.""" issues = [] required_fields = ["metadata", "input_sources", "artifacts"]

for path in sorted(MANIFESTS_DIR.glob("*.yaml")):
    data = load_yaml(path)

    for field in required_fields:
        if field not in data:
            issues.append({"file": path.name, "issue": f"Missing required field: {field}"})

    meta = data.get("metadata", {})
    for mf in ["research_id", "topic", "category", "date_conducted", "recommendation", "confidence"]:
        if mf not in meta:
            issues.append({"file": path.name, "issue": f"Missing metadata.{mf}"})

    cat = meta.get("category", "")
    if cat and cat not in VALID_CATEGORIES:
        issues.append({"file": path.name, "issue": f"Invalid category: {cat}"})

    outcome = data.get("outcome")
    if outcome and outcome not in VALID_OUTCOMES + ["monitor"]:
        issues.append({"file": path.name, "issue": f"Invalid outcome: {outcome}"})

    # Check outcome-specific section exists
    if outcome in VALID_OUTCOMES:
        if outcome not in data:
            issues.append({"file": path.name, "issue": f"Outcome is '{outcome}' but no {outcome} section defined"})

return issues

=============================================================================

CLI

=============================================================================

def main(): parser = argparse.ArgumentParser( description="Research Triage Automation (ADR-208)", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python3 scripts/research_triage.py --list python3 scripts/research_triage.py --stats python3 scripts/research_triage.py --validate python3 scripts/research_triage.py --manifest 2026-01-15-example.yaml \ --outcome product --rationale "Became standalone product" \ --repository submodules/products/my-product python3 scripts/research_triage.py --manifest 2026-01-15-example.yaml \ --outcome archive --rationale "No viable path forward" """, )

parser.add_argument("--list", action="store_true", help="List un-triaged manifests")
parser.add_argument("--list-all", action="store_true", help="List all manifests")
parser.add_argument("--stats", action="store_true", help="Show triage statistics")
parser.add_argument("--validate", action="store_true", help="Validate all manifests")

parser.add_argument("--manifest", "-m", help="Manifest filename to triage (e.g., 2026-01-15-example.yaml)")
parser.add_argument("--outcome", "-o", choices=VALID_OUTCOMES, help="Outcome: product, prototype, component, archive")
parser.add_argument("--rationale", "-r", help="Rationale for the outcome decision")

# Product-specific
parser.add_argument("--repository", help="Repository path for product/prototype outcome")
parser.add_argument("--project-id", help="Project ID for product outcome")
parser.add_argument("--launch-target", help="Launch target for product outcome (e.g., 2026-Q2)")

# Component-specific
parser.add_argument("--component-type", choices=["agent", "skill", "command", "workflow"], default="skill",
                    help="Component type (default: skill)")
parser.add_argument("--component-name", help="Component name")

# Prototype-specific
parser.add_argument("--graduation-target", help="Graduation target for prototype (e.g., 2026-Q3)")

parser.add_argument("--dry-run", action="store_true", help="Show what would be done without writing")
parser.add_argument("--json", action="store_true", help="Output as JSON")

args = parser.parse_args()

# ── List ──
if args.list or args.list_all:
    manifests = list_manifests(untriaged_only=not args.list_all)
    if args.json:
        print(json.dumps([{k: str(v) if isinstance(v, Path) else v for k, v in m.items()} for m in manifests], indent=2))
    else:
        label = "All" if args.list_all else "Un-triaged"
        print(f"\n{label} Research Manifests ({len(manifests)}):\n")
        print(f"{'Manifest':<60} {'Category':<25} {'Rec':<8} {'Conf':<6} {'Outcome'}")
        print("-" * 120)
        for m in manifests:
            print(f"{m['filename']:<60} {m['category']:<25} {m['recommendation']:<8} {m['confidence']:<6} {m['outcome']}")
    return

# ── Stats ──
if args.stats:
    stats = get_stats()
    if args.json:
        print(json.dumps(stats, indent=2))
    else:
        print(f"\nResearch Triage Statistics")
        print(f"{'='*40}")
        print(f"Total Manifests:  {stats['total_manifests']}")
        print(f"Triaged:          {stats['triaged']} ({stats['triage_rate']})")
        print(f"Un-triaged:       {stats['untriaged']}")
        print(f"\nOutcomes:")
        for k, v in stats['outcomes'].items():
            print(f"  {k:<12} {v}")
        print(f"\nBy Category:")
        for cat, info in stats['categories'].items():
            print(f"  {cat:<30} {info['triaged']}/{info['total']} triaged")
    return

# ── Validate ──
if args.validate:
    issues = validate_manifests()
    if args.json:
        print(json.dumps(issues, indent=2))
    else:
        if not issues:
            print("All manifests valid.")
        else:
            print(f"\nValidation Issues ({len(issues)}):\n")
            for i in issues:
                print(f"  {i['file']}: {i['issue']}")
    return

# ── Triage ──
if args.manifest:
    manifest_path = MANIFESTS_DIR / args.manifest
    if not manifest_path.exists():
        print(f"ERROR: Manifest not found: {manifest_path}")
        sys.exit(1)

    if not args.outcome:
        print("ERROR: --outcome is required for triage")
        sys.exit(1)

    rationale = args.rationale or f"Triaged as {args.outcome} via research-triage automation"

    # Build outcome details
    outcome_details = None
    if args.outcome == "product":
        outcome_details = {
            "repository": args.repository or "",
            "project_id": args.project_id or "",
            "track_tasks": [],
            "launch_target": args.launch_target or "",
        }
    elif args.outcome == "prototype":
        outcome_details = {
            "repository": args.repository or "",
            "graduation_criteria": [],
            "graduation_target": args.graduation_target or "",
        }
    elif args.outcome == "component":
        outcome_details = {
            "type": args.component_type,
            "name": args.component_name or "",
            "description": "",
            "path": "",
            "track_tasks": [],
        }
    elif args.outcome == "archive":
        outcome_details = {
            "reason": rationale,
            "lessons_learned": [],
            "reusable_artifacts": [],
            "archived_date": datetime.now(timezone.utc).strftime("%Y-%m-%d"),
        }

    result = triage_manifest(
        manifest_path,
        args.outcome,
        rationale,
        outcome_details=outcome_details,
        dry_run=args.dry_run,
    )

    if args.json:
        print(json.dumps(result, indent=2))
    else:
        prefix = "[DRY-RUN] " if args.dry_run else ""
        print(f"\n{prefix}Triage Result for {result['manifest']}:")
        print(f"  Outcome: {result['outcome']}")
        for action in result["actions"]:
            print(f"  - {action}")
        if "track_suggestions" in result:
            print(f"\n  Track Suggestions:")
            for s in result["track_suggestions"]:
                print(f"    {s}")
    return

# No action specified
parser.print_help()

if name == "main": main()