scripts-error-solution-extractor
#!/usr/bin/env python3 """ CP-14: ErrorSolution Node Extractor (ADR-151)
Migrates error_solution entities from org.db error_solutions table:
- node_type: 'error_solution'
- Properties: error_pattern, solution, language, occurrences
Source: org.db error_solutions table Target: org.db kg_nodes table
Created: 2026-02-03 Track: J (Memory Intelligence) Task: J.3.4.4 """
import logging from pathlib import Path from typing import Any, Dict, Generator, Optional, Tuple
from .base_extractor import SQLiteSourceExtractor
logger = logging.getLogger(name)
class ErrorSolutionExtractor(SQLiteSourceExtractor): """ Migrate error_solutions from org.db into kg_nodes. """
@property
def node_type(self) -> str:
return "error_solution"
def extract_entities(self) -> Generator[Tuple[str, str, Optional[str], Dict[str, Any], Optional[str], Optional[str]], None, None]:
"""
Extract error_solutions from error_solutions table.
Yields:
Tuple of (node_id, name, subtype, properties, source_table, source_id)
"""
conn = self.connect_source()
cursor = conn.execute("""
SELECT
id,
error_hash,
error_type,
error_signature,
error_context,
solution,
solution_code,
language,
success_count,
failure_count,
last_used,
created_at,
tenant_id,
project_id
FROM error_solutions
ORDER BY success_count DESC, created_at
""")
for row in cursor:
error_id = row['id']
error_hash = row['error_hash'] or str(error_id)
error_type = row['error_type'] or "unknown"
error_signature = row['error_signature'] or ""
# Generate node_id from error_hash for determinism
node_id = self.generate_node_id(error_hash)
# Display name
name = self._generate_name(error_type, error_signature)
# Subtype is the error type (e.g., TypeError, ValueError)
subtype = self._normalize_error_type(error_type)
# Build properties
properties = {
"error_type": error_type,
"error_signature": error_signature,
"error_context": row['error_context'],
"error_hash": error_hash,
"solution": row['solution'],
"solution_code": row['solution_code'],
"language": row['language'],
"success_count": row['success_count'],
"failure_count": row['failure_count'],
"last_used": row['last_used'],
"created_at": row['created_at'],
}
# Calculate effectiveness
total_uses = (row['success_count'] or 0) + (row['failure_count'] or 0)
if total_uses > 0:
properties['effectiveness'] = round(
(row['success_count'] or 0) / total_uses,
2
)
properties['total_uses'] = total_uses
# Include tenant info if present
if row['tenant_id']:
properties['tenant_id'] = row['tenant_id']
if row['project_id']:
properties['project_id'] = row['project_id']
# Clean None values
properties = {k: v for k, v in properties.items() if v is not None}
yield (
node_id,
name,
subtype,
properties,
"error_solutions",
str(error_id),
)
def _generate_name(self, error_type: str, error_signature: str) -> str:
"""
Generate display name from error info.
Examples:
"TypeError: 'NoneType' object is not subscriptable"
"ModuleNotFoundError: No module named 'foo'"
"""
# Clean up signature
sig = error_signature.strip()
# Truncate if too long
max_sig_len = 60
if len(sig) > max_sig_len:
sig = sig[:max_sig_len-3] + "..."
if sig:
return f"{error_type}: {sig}"
return error_type
def _normalize_error_type(self, error_type: str) -> str:
"""
Normalize error type for consistent subtype values.
Returns standardized error category.
"""
if not error_type:
return "unknown"
type_lower = error_type.lower()
# Map to standard Python error categories
if "type" in type_lower:
return "type_error"
if "value" in type_lower:
return "value_error"
if "key" in type_lower:
return "key_error"
if "index" in type_lower:
return "index_error"
if "attribute" in type_lower:
return "attribute_error"
if "import" in type_lower or "module" in type_lower:
return "import_error"
if "file" in type_lower or "io" in type_lower:
return "io_error"
if "syntax" in type_lower:
return "syntax_error"
if "runtime" in type_lower:
return "runtime_error"
if "permission" in type_lower:
return "permission_error"
if "timeout" in type_lower:
return "timeout_error"
if "connection" in type_lower or "network" in type_lower:
return "connection_error"
# Return normalized version of the original
return error_type.lower().replace(" ", "_").replace("error", "_error").replace("__", "_")