Concrete Postgres Schema Aligned to CODITECT API
Map the existing /api/v1/documents schema into relational tables and extend with compliance/ABAC fields.
Base Document Table (Maps to DocumentResponse)
-- Raw file + high-level doc info (maps to DocumentResponse)
CREATE TABLE documents (
id UUID PRIMARY KEY,
filename TEXT NOT NULL,
filepath TEXT NOT NULL,
mime_type TEXT NOT NULL,
file_size BIGINT NOT NULL,
file_hash TEXT NOT NULL,
status TEXT NOT NULL, -- pending, processed, error, deleted
document_type TEXT NOT NULL, -- reference, policy, record, etc.
title TEXT NOT NULL,
version TEXT NOT NULL,
summary TEXT,
chunk_count INT NOT NULL DEFAULT 0,
processing_error TEXT,
created_at TIMESTAMPTZ NOT NULL,
updated_at TIMESTAMPTZ NOT NULL
);
Document Metadata Tables
-- Keywords (from DocumentResponse.keywords)
CREATE TABLE document_keywords (
document_id UUID NOT NULL REFERENCES documents(id),
keyword TEXT NOT NULL,
PRIMARY KEY (document_id, keyword)
);
-- Tags (from DocumentResponse.tags)
CREATE TABLE document_tags (
document_id UUID NOT NULL REFERENCES documents(id),
tag TEXT NOT NULL,
PRIMARY KEY (document_id, tag)
);
Compliance Metadata Extension
-- Compliance-focused metadata for HIPAA + FINRA
CREATE TABLE document_metadata (
document_id UUID PRIMARY KEY REFERENCES documents(id),
domain TEXT NOT NULL, -- clinical, security-privacy, finance, etc.
jurisdiction TEXT[] NOT NULL, -- ["US"], ["US","EU"], etc.
regulations TEXT[] NOT NULL, -- ["HIPAA-164.316","FINRA-4511"]
security_class TEXT NOT NULL, -- public/internal/confidential/restricted
contains_phi BOOLEAN NOT NULL DEFAULT FALSE,
contains_pii BOOLEAN NOT NULL DEFAULT FALSE,
contains_financial BOOLEAN NOT NULL DEFAULT FALSE,
status TEXT NOT NULL, -- draft/in_review/effective/obsolete
effective_date DATE,
review_due_date DATE,
expiry_date DATE,
retention_category TEXT NOT NULL, -- HIPAA-6Y, FINRA-6Y, etc.
retention_period_y INT NOT NULL,
retain_until DATE NOT NULL,
legal_hold BOOLEAN NOT NULL DEFAULT FALSE,
legal_hold_reason TEXT,
business_unit TEXT,
desk TEXT,
facility TEXT,
owner_user_id TEXT NOT NULL,
owner_role TEXT NOT NULL,
created_at TIMESTAMPTZ NOT NULL,
last_modified_at TIMESTAMPTZ NOT NULL
);
Chunk Table (Aligned with /documents/{id}/chunks)
-- Optional: chunk metadata (aligned with /documents/{id}/chunks)
CREATE TABLE document_chunks (
id UUID PRIMARY KEY,
document_id UUID NOT NULL REFERENCES documents(id),
index INT NOT NULL,
content TEXT NOT NULL,
embedding VECTOR(1536), -- pgvector for semantic search
metadata JSONB,
created_at TIMESTAMPTZ NOT NULL
);
CREATE INDEX idx_chunks_document ON document_chunks(document_id);
CREATE INDEX idx_chunks_embedding ON document_chunks USING ivfflat (embedding vector_cosine_ops);
Mapping Table: API Response ↔ Database
| API Field (DocumentResponse) | Database Column | Table |
|---|---|---|
id | id | documents |
filename | filename | documents |
filepath | filepath | documents |
mime_type | mime_type | documents |
file_size | file_size | documents |
file_hash | file_hash | documents |
status | status | documents |
document_type | document_type | documents |
title | title | documents |
version | version | documents |
summary | summary | documents |
keywords | keyword (array) | document_keywords |
tags | tag (array) | document_tags |
chunk_count | chunk_count | documents |
processing_error | processing_error | documents |
created_at | created_at | documents |
updated_at | updated_at | documents |
Extended Fields for Compliance
| Field | Table | Purpose |
|---|---|---|
domain | document_metadata | Business domain classification |
jurisdiction | document_metadata | Legal jurisdiction(s) |
regulations | document_metadata | Applicable regulations |
security_class | document_metadata | Security classification |
contains_phi | document_metadata | PHI flag for HIPAA |
contains_pii | document_metadata | PII flag |
contains_financial | document_metadata | Financial data flag |
effective_date | document_metadata | Document effective date |
retain_until | document_metadata | Computed retention expiry |
legal_hold | document_metadata | Legal hold status |
API Extension Recommendations
New Compliance Endpoints
GET /api/v1/documents/{id}/compliance
PUT /api/v1/documents/{id}/compliance
POST /api/v1/documents/{id}/legal-hold
DELETE /api/v1/documents/{id}/legal-hold
GET /api/v1/compliance/retention-summary
GET /api/v1/compliance/audit-log
Enhanced Search Filters
{
"q": "privacy policy",
"filter": {
"regulations": ["HIPAA-164.316"],
"contains_phi": true,
"status": "effective",
"retain_until_after": "2025-01-01"
}
}