Skip to main content

scripts-function-extractor

#!/usr/bin/env python3 """ CP-09: Function Node Extractor (ADR-151)

Extracts function entities from call_graph_functions in sessions.db:

  • node_type: 'function'
  • Properties: name, file_path, line_start, line_end, signature, class_name

Source: sessions.db call_graph_functions table Target: org.db kg_nodes table

Created: 2026-02-03 Track: J (Memory Intelligence) Task: J.3.4.7 """

import logging from pathlib import Path from typing import Any, Dict, Generator, Optional, Tuple

from .base_extractor import SQLiteSourceExtractor

logger = logging.getLogger(name)

class FunctionExtractor(SQLiteSourceExtractor): """ Extract function entities from call_graph_functions into kg_nodes. """

@property
def node_type(self) -> str:
return "function"

def extract_entities(self) -> Generator[Tuple[str, str, Optional[str], Dict[str, Any], Optional[str], Optional[str]], None, None]:
"""
Extract functions from call_graph_functions.

Yields:
Tuple of (node_id, name, subtype, properties, source_table, source_id)
"""
conn = self.connect_source()

cursor = conn.execute("""
SELECT
node_id,
name,
file_path,
start_line,
end_line,
language,
signature,
docstring,
class_name,
created_at,
updated_at
FROM call_graph_functions
ORDER BY file_path, start_line
""")

for row in cursor:
source_node_id = row['node_id']
func_name = row['name']
file_path = row['file_path']

# Generate KG node_id
# Format: function:{file_path}:{func_name}
# e.g., function:scripts/core/paths.py:get_org_db_path
kg_node_id = self.generate_node_id(f"{file_path}:{func_name}")

# Display name includes class if present
if row['class_name']:
display_name = f"{row['class_name']}.{func_name}"
else:
display_name = func_name

# Subtype is the language
subtype = row['language'].lower() if row['language'] else "unknown"

# Build properties
properties = {
"file_path": file_path,
"line_start": row['start_line'],
"line_end": row['end_line'],
"signature": row['signature'],
"class_name": row['class_name'],
"docstring": row['docstring'][:500] if row['docstring'] else None, # Truncate
"language": row['language'],
"call_graph_node_id": source_node_id, # Original node_id for edge linking
}

# Clean None values
properties = {k: v for k, v in properties.items() if v is not None}

yield (
kg_node_id,
display_name,
subtype,
properties,
"call_graph_functions",
source_node_id,
)