Skip to main content

scripts-error-solution-extractor

#!/usr/bin/env python3 """ CP-14: ErrorSolution Node Extractor (ADR-151)

Migrates error_solution entities from org.db error_solutions table:

  • node_type: 'error_solution'
  • Properties: error_pattern, solution, language, occurrences

Source: org.db error_solutions table Target: org.db kg_nodes table

Created: 2026-02-03 Track: J (Memory Intelligence) Task: J.3.4.4 """

import logging from pathlib import Path from typing import Any, Dict, Generator, Optional, Tuple

from .base_extractor import SQLiteSourceExtractor

logger = logging.getLogger(name)

class ErrorSolutionExtractor(SQLiteSourceExtractor): """ Migrate error_solutions from org.db into kg_nodes. """

@property
def node_type(self) -> str:
return "error_solution"

def extract_entities(self) -> Generator[Tuple[str, str, Optional[str], Dict[str, Any], Optional[str], Optional[str]], None, None]:
"""
Extract error_solutions from error_solutions table.

Yields:
Tuple of (node_id, name, subtype, properties, source_table, source_id)
"""
conn = self.connect_source()

cursor = conn.execute("""
SELECT
id,
error_hash,
error_type,
error_signature,
error_context,
solution,
solution_code,
language,
success_count,
failure_count,
last_used,
created_at,
tenant_id,
project_id
FROM error_solutions
ORDER BY success_count DESC, created_at
""")

for row in cursor:
error_id = row['id']
error_hash = row['error_hash'] or str(error_id)
error_type = row['error_type'] or "unknown"
error_signature = row['error_signature'] or ""

# Generate node_id from error_hash for determinism
node_id = self.generate_node_id(error_hash)

# Display name
name = self._generate_name(error_type, error_signature)

# Subtype is the error type (e.g., TypeError, ValueError)
subtype = self._normalize_error_type(error_type)

# Build properties
properties = {
"error_type": error_type,
"error_signature": error_signature,
"error_context": row['error_context'],
"error_hash": error_hash,
"solution": row['solution'],
"solution_code": row['solution_code'],
"language": row['language'],
"success_count": row['success_count'],
"failure_count": row['failure_count'],
"last_used": row['last_used'],
"created_at": row['created_at'],
}

# Calculate effectiveness
total_uses = (row['success_count'] or 0) + (row['failure_count'] or 0)
if total_uses > 0:
properties['effectiveness'] = round(
(row['success_count'] or 0) / total_uses,
2
)
properties['total_uses'] = total_uses

# Include tenant info if present
if row['tenant_id']:
properties['tenant_id'] = row['tenant_id']
if row['project_id']:
properties['project_id'] = row['project_id']

# Clean None values
properties = {k: v for k, v in properties.items() if v is not None}

yield (
node_id,
name,
subtype,
properties,
"error_solutions",
str(error_id),
)

def _generate_name(self, error_type: str, error_signature: str) -> str:
"""
Generate display name from error info.

Examples:
"TypeError: 'NoneType' object is not subscriptable"
"ModuleNotFoundError: No module named 'foo'"
"""
# Clean up signature
sig = error_signature.strip()

# Truncate if too long
max_sig_len = 60
if len(sig) > max_sig_len:
sig = sig[:max_sig_len-3] + "..."

if sig:
return f"{error_type}: {sig}"
return error_type

def _normalize_error_type(self, error_type: str) -> str:
"""
Normalize error type for consistent subtype values.

Returns standardized error category.
"""
if not error_type:
return "unknown"

type_lower = error_type.lower()

# Map to standard Python error categories
if "type" in type_lower:
return "type_error"
if "value" in type_lower:
return "value_error"
if "key" in type_lower:
return "key_error"
if "index" in type_lower:
return "index_error"
if "attribute" in type_lower:
return "attribute_error"
if "import" in type_lower or "module" in type_lower:
return "import_error"
if "file" in type_lower or "io" in type_lower:
return "io_error"
if "syntax" in type_lower:
return "syntax_error"
if "runtime" in type_lower:
return "runtime_error"
if "permission" in type_lower:
return "permission_error"
if "timeout" in type_lower:
return "timeout_error"
if "connection" in type_lower or "network" in type_lower:
return "connection_error"

# Return normalized version of the original
return error_type.lower().replace(" ", "_").replace("error", "_error").replace("__", "_")