scripts-function-extractor
#!/usr/bin/env python3 """ CP-09: Function Node Extractor (ADR-151)
Extracts function entities from call_graph_functions in sessions.db:
- node_type: 'function'
- Properties: name, file_path, line_start, line_end, signature, class_name
Source: sessions.db call_graph_functions table Target: org.db kg_nodes table
Created: 2026-02-03 Track: J (Memory Intelligence) Task: J.3.4.7 """
import logging from pathlib import Path from typing import Any, Dict, Generator, Optional, Tuple
from .base_extractor import SQLiteSourceExtractor
logger = logging.getLogger(name)
class FunctionExtractor(SQLiteSourceExtractor): """ Extract function entities from call_graph_functions into kg_nodes. """
@property
def node_type(self) -> str:
return "function"
def extract_entities(self) -> Generator[Tuple[str, str, Optional[str], Dict[str, Any], Optional[str], Optional[str]], None, None]:
"""
Extract functions from call_graph_functions.
Yields:
Tuple of (node_id, name, subtype, properties, source_table, source_id)
"""
conn = self.connect_source()
cursor = conn.execute("""
SELECT
node_id,
name,
file_path,
start_line,
end_line,
language,
signature,
docstring,
class_name,
created_at,
updated_at
FROM call_graph_functions
ORDER BY file_path, start_line
""")
for row in cursor:
source_node_id = row['node_id']
func_name = row['name']
file_path = row['file_path']
# Generate KG node_id
# Format: function:{file_path}:{func_name}
# e.g., function:scripts/core/paths.py:get_org_db_path
kg_node_id = self.generate_node_id(f"{file_path}:{func_name}")
# Display name includes class if present
if row['class_name']:
display_name = f"{row['class_name']}.{func_name}"
else:
display_name = func_name
# Subtype is the language
subtype = row['language'].lower() if row['language'] else "unknown"
# Build properties
properties = {
"file_path": file_path,
"line_start": row['start_line'],
"line_end": row['end_line'],
"signature": row['signature'],
"class_name": row['class_name'],
"docstring": row['docstring'][:500] if row['docstring'] else None, # Truncate
"language": row['language'],
"call_graph_node_id": source_node_id, # Original node_id for edge linking
}
# Clean None values
properties = {k: v for k, v in properties.items() if v is not None}
yield (
kg_node_id,
display_name,
subtype,
properties,
"call_graph_functions",
source_node_id,
)