Skip to main content

scripts-test-roundtrip

#!/usr/bin/env python3 """

title: "Session Export/Import Roundtrip Tests" component_type: test version: "1.0.0" audience: contributor status: stable summary: "Integration tests for export → import roundtrip" keywords: ['test', 'roundtrip', 'export', 'import', 'cusf', 'integration'] tokens: ~400 created: 2026-01-28 updated: 2026-01-28

Integration tests for export → import roundtrip (J.13.4.3).

Tests that session data can be exported and re-imported without data loss.

Track: J.13 (Memory - Generic Session Export) Task: J.13.4.3 """

import json import os import sys import tempfile import unittest from datetime import datetime, timezone from pathlib import Path

Add parent paths for imports

_test_dir = Path(file).resolve().parent _scripts_dir = _test_dir.parent _coditect_root = _scripts_dir.parent if str(_coditect_root) not in sys.path: sys.path.insert(0, str(_coditect_root)) if str(_scripts_dir) not in sys.path: sys.path.insert(0, str(_scripts_dir))

from core.session_extractor import SessionMetadata, ExtractedEntry, ExtractionResult from core.cusf_formatter import CUSFFormatter from core.output_writer import OutputWriter

class TestCUSFFormatter(unittest.TestCase): """Tests for CUSF formatter."""

def test_format_creates_entries(self):
"""Test formatter creates CUSF entries."""
metadata = SessionMetadata(
session_id="test-roundtrip-1",
llm_source="claude",
llm_model="claude-opus-4-5",
total_messages=2
)
entries = [
ExtractedEntry(
type="message",
timestamp=datetime.now(timezone.utc),
data={
"message_id": "msg-1",
"role": "user",
"content": "Hello world"
}
),
ExtractedEntry(
type="message",
timestamp=datetime.now(timezone.utc),
data={
"message_id": "msg-2",
"role": "assistant",
"content": "Hi there!"
}
)
]
result = ExtractionResult(
success=True,
metadata=metadata,
entries=entries
)

formatter = CUSFFormatter()
cusf_entries = list(formatter.format(result))

# Should have: meta, session_start, 2 messages, session_end
self.assertGreaterEqual(len(cusf_entries), 4)

# First should be meta
self.assertIn("_meta", cusf_entries[0])

# Check for session_start
session_starts = [e for e in cusf_entries if e.get("type") == "session_start"]
self.assertEqual(len(session_starts), 1)

# Check messages
messages = [e for e in cusf_entries if e.get("type") == "message"]
self.assertEqual(len(messages), 2)

def test_format_to_jsonl(self):
"""Test formatter produces valid JSONL."""
metadata = SessionMetadata(
session_id="test-jsonl",
llm_source="codex"
)
result = ExtractionResult(
success=True,
metadata=metadata,
entries=[]
)

formatter = CUSFFormatter()
jsonl = formatter.to_jsonl(result)

# Should be valid JSONL (each line is valid JSON)
lines = jsonl.strip().split("\n")
for line in lines:
if line.strip():
parsed = json.loads(line)
self.assertIsInstance(parsed, dict)

class TestOutputWriter(unittest.TestCase): """Tests for output writer."""

def test_write_jsonl(self):
"""Test writing JSONL format."""
with tempfile.NamedTemporaryFile(suffix=".jsonl", delete=False) as f:
temp_path = Path(f.name)

try:
entries = [
{"_meta": {"format": "cusf", "version": "1.0.0"}},
{"type": "session_start", "session_id": "test-1"},
{"type": "message", "role": "user", "content": "Hello"}
]

with OutputWriter(temp_path, format="jsonl") as writer:
count = writer.write_all(entries)

self.assertEqual(count, 3)

# Verify file content
with open(temp_path, 'r') as f:
lines = f.readlines()
self.assertEqual(len(lines), 3)

# Verify each line is valid JSON
for line in lines:
parsed = json.loads(line)
self.assertIsInstance(parsed, dict)
finally:
if temp_path.exists():
os.unlink(temp_path)

def test_write_json(self):
"""Test writing JSON format."""
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
temp_path = Path(f.name)

try:
entries = [
{"type": "session_start", "session_id": "test-2"},
{"type": "message", "role": "user", "content": "Test"}
]

with OutputWriter(temp_path, format="json") as writer:
count = writer.write_all(entries)

self.assertEqual(count, 2)

# Verify file content is valid JSON array
with open(temp_path, 'r') as f:
data = json.load(f)
self.assertIsInstance(data, list)
self.assertEqual(len(data), 2)
finally:
if temp_path.exists():
os.unlink(temp_path)

def test_write_sqlite(self):
"""Test writing SQLite format."""
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
temp_path = Path(f.name)

try:
entries = [
{"_meta": {"format": "cusf", "version": "1.0.0", "exported_at": "2026-01-28T12:00:00Z"}},
{"type": "session_start", "session_id": "test-3", "llm_source": "gemini"},
{"type": "message", "role": "user", "content": "Hello", "timestamp": "2026-01-28T12:00:00Z", "message_id": "m1"},
{"type": "session_end", "session_id": "test-3"}
]

with OutputWriter(temp_path, format="sqlite") as writer:
count = writer.write_all(entries)

self.assertEqual(count, 4)

# Verify SQLite file exists and has data
import sqlite3
conn = sqlite3.connect(temp_path)
cursor = conn.cursor()

cursor.execute("SELECT COUNT(*) FROM sessions")
session_count = cursor.fetchone()[0]
self.assertEqual(session_count, 1)

cursor.execute("SELECT COUNT(*) FROM messages")
message_count = cursor.fetchone()[0]
self.assertEqual(message_count, 1)

conn.close()
finally:
if temp_path.exists():
os.unlink(temp_path)

class TestRoundtrip(unittest.TestCase): """Integration tests for full export → import roundtrip."""

def test_roundtrip_jsonl(self):
"""Test roundtrip through JSONL format."""
# Create test data
metadata = SessionMetadata(
session_id="roundtrip-test-1",
llm_source="claude",
llm_model="claude-opus-4-5",
started_at=datetime.now(timezone.utc),
total_messages=2,
total_tokens_input=100,
total_tokens_output=50
)
entries = [
ExtractedEntry(
type="message",
timestamp=datetime.now(timezone.utc),
data={
"message_id": "rt-msg-1",
"role": "user",
"content": "Test message 1"
}
),
ExtractedEntry(
type="message",
timestamp=datetime.now(timezone.utc),
data={
"message_id": "rt-msg-2",
"role": "assistant",
"content": "Test response 1"
}
)
]
result = ExtractionResult(
success=True,
metadata=metadata,
entries=entries
)

# Export to JSONL
with tempfile.NamedTemporaryFile(suffix=".jsonl", delete=False) as f:
temp_path = Path(f.name)

try:
formatter = CUSFFormatter()
with OutputWriter(temp_path, format="jsonl") as writer:
writer.write_all(formatter.format(result))

# Read back and verify
with open(temp_path, 'r') as f:
lines = f.readlines()

# Parse all entries
parsed = [json.loads(line) for line in lines if line.strip()]

# Find session_start
session_starts = [e for e in parsed if e.get("type") == "session_start"]
self.assertEqual(len(session_starts), 1)
self.assertEqual(session_starts[0]["session_id"], "roundtrip-test-1")
self.assertEqual(session_starts[0]["llm_source"], "claude")

# Find messages
messages = [e for e in parsed if e.get("type") == "message"]
self.assertEqual(len(messages), 2)

# Verify message content preserved
user_msg = next(m for m in messages if m["role"] == "user")
self.assertEqual(user_msg["content"], "Test message 1")

asst_msg = next(m for m in messages if m["role"] == "assistant")
self.assertEqual(asst_msg["content"], "Test response 1")

finally:
if temp_path.exists():
os.unlink(temp_path)

def test_roundtrip_preserves_metadata(self):
"""Test that roundtrip preserves all metadata."""
original_session_id = "metadata-test-123"
original_model = "gpt-4-turbo"

metadata = SessionMetadata(
session_id=original_session_id,
llm_source="codex",
llm_model=original_model,
started_at=datetime.now(timezone.utc),
total_messages=1,
total_tokens_input=500,
total_tokens_output=200
)
result = ExtractionResult(
success=True,
metadata=metadata,
entries=[]
)

with tempfile.NamedTemporaryFile(suffix=".jsonl", delete=False) as f:
temp_path = Path(f.name)

try:
formatter = CUSFFormatter()
with OutputWriter(temp_path, format="jsonl") as writer:
writer.write_all(formatter.format(result))

# Read back
with open(temp_path, 'r') as f:
parsed = [json.loads(line) for line in f if line.strip()]

session_start = next(e for e in parsed if e.get("type") == "session_start")
self.assertEqual(session_start["session_id"], original_session_id)
self.assertEqual(session_start["llm_model"], original_model)

finally:
if temp_path.exists():
os.unlink(temp_path)

def test_roundtrip_with_tool_calls(self):
"""Test roundtrip with tool use entries."""
metadata = SessionMetadata(
session_id="tool-test-1",
llm_source="claude"
)
entries = [
ExtractedEntry(
type="message",
timestamp=datetime.now(timezone.utc),
data={
"message_id": "tm-1",
"role": "user",
"content": "Read a file"
}
),
ExtractedEntry(
type="tool_use",
timestamp=datetime.now(timezone.utc),
data={
"tool_id": "tool-read-1",
"tool_name": "Read",
"tool_input": {"file_path": "/test.txt"}
}
),
ExtractedEntry(
type="tool_result",
timestamp=datetime.now(timezone.utc),
data={
"tool_id": "tool-read-1",
"result": "File contents here"
}
)
]
result = ExtractionResult(
success=True,
metadata=metadata,
entries=entries
)

with tempfile.NamedTemporaryFile(suffix=".jsonl", delete=False) as f:
temp_path = Path(f.name)

try:
formatter = CUSFFormatter()
with OutputWriter(temp_path, format="jsonl") as writer:
writer.write_all(formatter.format(result))

# Read back
with open(temp_path, 'r') as f:
parsed = [json.loads(line) for line in f if line.strip()]

# Verify tool entries
tool_uses = [e for e in parsed if e.get("type") == "tool_use"]
self.assertEqual(len(tool_uses), 1)
self.assertEqual(tool_uses[0]["tool_name"], "Read")

tool_results = [e for e in parsed if e.get("type") == "tool_result"]
self.assertEqual(len(tool_results), 1)

finally:
if temp_path.exists():
os.unlink(temp_path)

class TestCUSFVersioning(unittest.TestCase): """Tests for CUSF format versioning."""

def test_meta_includes_version(self):
"""Test that meta entry includes CUSF version."""
metadata = SessionMetadata(session_id="v-test", llm_source="claude")
result = ExtractionResult(success=True, metadata=metadata, entries=[])

formatter = CUSFFormatter()
cusf_entries = list(formatter.format(result))

meta_entry = cusf_entries[0]
self.assertIn("_meta", meta_entry)
self.assertEqual(meta_entry["_meta"]["version"], "1.0.0")
self.assertEqual(meta_entry["_meta"]["format"], "cusf")

def test_meta_includes_exporter(self):
"""Test that meta entry includes exporter info."""
metadata = SessionMetadata(session_id="exp-test", llm_source="gemini")
result = ExtractionResult(success=True, metadata=metadata, entries=[])

formatter = CUSFFormatter()
cusf_entries = list(formatter.format(result))

meta_entry = cusf_entries[0]
self.assertIn("exporter", meta_entry["_meta"])

if name == "main": unittest.main()