Saltar al contenido principal

Schema del CDP - Base de Datos

🗄️ Arquitectura de Base de Datos

Tecnología

  • Engine: PostgreSQL 14+
  • Deployment: Railway PostgreSQL
  • Connections: Pool de conexiones con pgbouncer
  • Backup: Automático diario + WAL-E

Diseño Multi-tenant

-- Todas las tablas principales incluyen tenant_id
-- Row-Level Security (RLS) automático por tenant
-- Índices optimizados para consultas tenant-specific

📊 Schema Overview

Core Tables (10)

-- Gestión de tenants y usuarios
tenants -- Organizaciones/marcas
users -- Usuarios del sistema
tenant_users -- Relación many-to-many

-- Datos de clientes
customers -- Perfiles de clientes
customer_orders -- Historial de órdenes
customer_order_items -- Detalles de productos por orden
products -- Catálogo de productos

Analytics Tables (15)

-- Análisis RFM
customer_rfm_analysis -- Scores y segmentos RFM
rfm_segment_definitions -- Definiciones de segmentos

-- Customer Lifetime Value
customer_clv_analysis -- Predicciones CLV
clv_calculation_log -- Log de cálculos

-- Churn Analysis
customer_churn_analysis -- Risk scores y probabilidades
churn_prediction_log -- Historial de predicciones

-- Business Intelligence
business_metrics -- KPIs agregados
customer_segments -- Segmentación dinámica
cohort_analysis -- Análisis de cohortes

Integration Tables (8)

-- Integraciones con servicios externos
tenant_integrations -- Configuración de integraciones
integration_sync_log -- Log de sincronizaciones
vtex_orders -- Raw data de VTEX
mercadolibre_orders -- Raw data de MercadoLibre
google_ads_data -- Datos de Google Ads
facebook_ads_data -- Datos de Facebook Ads

Processing Tables (7)

-- Gestión de procesamiento
processing_jobs -- Cola de trabajos
processing_log -- Log de ejecuciones
data_quality_checks -- Validaciones de calidad
etl_metadata -- Metadata de ETL processes

🏗️ Core Tables Detail

tenants

CREATE TABLE tenants (
id SERIAL PRIMARY KEY,
name VARCHAR(255) NOT NULL,
slug VARCHAR(100) UNIQUE NOT NULL,
type VARCHAR(50) DEFAULT 'business', -- trial, business, enterprise
status VARCHAR(50) DEFAULT 'active',
settings JSONB DEFAULT '{}',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Ejemplo de settings JSONB
{
"timezone": "America/Argentina/Buenos_Aires",
"currency": "ARS",
"language": "es",
"data_retention_days": 2555,
"max_customers": 1000000,
"features": {
"rfm_analysis": true,
"clv_prediction": true,
"churn_detection": true
}
}

customers

CREATE TABLE customers (
id SERIAL PRIMARY KEY,
tenant_id INTEGER NOT NULL REFERENCES tenants(id),
external_id VARCHAR(255), -- ID del sistema origen (VTEX, ML)
email VARCHAR(255),
phone VARCHAR(50),
first_name VARCHAR(255),
last_name VARCHAR(255),
document_number VARCHAR(50),
document_type VARCHAR(20),
birth_date DATE,
gender VARCHAR(20),
city VARCHAR(255),
state VARCHAR(255),
country VARCHAR(3) DEFAULT 'ARG',
registration_date TIMESTAMP,
first_purchase_date TIMESTAMP,
last_purchase_date TIMESTAMP,
total_orders INTEGER DEFAULT 0,
total_spent DECIMAL(15,2) DEFAULT 0,
avg_order_value DECIMAL(15,2) DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,

-- Índices multi-tenant
CONSTRAINT uk_customer_tenant_email UNIQUE(tenant_id, email),
CONSTRAINT uk_customer_tenant_document UNIQUE(tenant_id, document_number)
);

-- Índices optimizados
CREATE INDEX idx_customers_tenant_id ON customers(tenant_id);
CREATE INDEX idx_customers_tenant_email ON customers(tenant_id, email);
CREATE INDEX idx_customers_tenant_document ON customers(tenant_id, document_number);
CREATE INDEX idx_customers_last_purchase ON customers(tenant_id, last_purchase_date);

customer_orders

CREATE TABLE customer_orders (
id SERIAL PRIMARY KEY,
tenant_id INTEGER NOT NULL REFERENCES tenants(id),
customer_id INTEGER NOT NULL REFERENCES customers(id),
external_order_id VARCHAR(255), -- ID del sistema origen
order_number VARCHAR(255),
order_date TIMESTAMP NOT NULL,
order_status VARCHAR(100),
payment_status VARCHAR(100),
payment_method VARCHAR(100),
shipping_method VARCHAR(100),

-- Montos
subtotal DECIMAL(15,2) DEFAULT 0,
tax_amount DECIMAL(15,2) DEFAULT 0,
shipping_cost DECIMAL(15,2) DEFAULT 0,
discount_amount DECIMAL(15,2) DEFAULT 0,
total_amount DECIMAL(15,2) NOT NULL,

-- Dirección de envío
shipping_address JSONB,

-- Metadata
source_system VARCHAR(50), -- vtex, mercadolibre, etc.
raw_data JSONB, -- Datos originales del sistema

created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,

CONSTRAINT uk_order_tenant_external UNIQUE(tenant_id, external_order_id, source_system)
);

-- Índices para performance
CREATE INDEX idx_orders_tenant_customer ON customer_orders(tenant_id, customer_id);
CREATE INDEX idx_orders_tenant_date ON customer_orders(tenant_id, order_date);
CREATE INDEX idx_orders_tenant_status ON customer_orders(tenant_id, order_status);

📈 Analytics Tables Detail

customer_rfm_analysis

CREATE TABLE customer_rfm_analysis (
id SERIAL PRIMARY KEY,
tenant_id INTEGER NOT NULL REFERENCES tenants(id),
customer_id INTEGER NOT NULL REFERENCES customers(id),

-- Métricas RFM
recency_days INTEGER NOT NULL,
frequency INTEGER NOT NULL,
monetary DECIMAL(15,2) NOT NULL,

-- Scores (1-5)
recency_score INTEGER CHECK (recency_score BETWEEN 1 AND 5),
frequency_score INTEGER CHECK (frequency_score BETWEEN 1 AND 5),
monetary_score INTEGER CHECK (monetary_score BETWEEN 1 AND 5),

-- Segmento final
rfm_score VARCHAR(3), -- "555", "444", etc.
rfm_segment VARCHAR(50), -- "Champions", "Loyal Customers", etc.

-- Metadata
calculation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
is_current BOOLEAN DEFAULT TRUE,

CONSTRAINT uk_rfm_tenant_customer_current
UNIQUE(tenant_id, customer_id) WHERE is_current = TRUE
);

-- Índices para análisis
CREATE INDEX idx_rfm_tenant_segment ON customer_rfm_analysis(tenant_id, rfm_segment);
CREATE INDEX idx_rfm_tenant_score ON customer_rfm_analysis(tenant_id, rfm_score);
CREATE INDEX idx_rfm_calculation_date ON customer_rfm_analysis(calculation_date);

customer_clv_analysis

CREATE TABLE customer_clv_analysis (
id SERIAL PRIMARY KEY,
tenant_id INTEGER NOT NULL REFERENCES tenants(id),
customer_id INTEGER NOT NULL REFERENCES customers(id),

-- Valores CLV
historical_value DECIMAL(15,2) NOT NULL DEFAULT 0,
predicted_value DECIMAL(15,2) NOT NULL DEFAULT 0,
total_clv DECIMAL(15,2) GENERATED ALWAYS AS (historical_value + predicted_value) STORED,

-- Clasificación
clv_tier VARCHAR(20), -- high, medium, low
percentile_rank INTEGER CHECK (percentile_rank BETWEEN 1 AND 100),

-- Factores de cálculo
rfm_multiplier DECIMAL(5,2) DEFAULT 1.0,
churn_probability DECIMAL(5,4) DEFAULT 0.0,
confidence_score DECIMAL(5,4) DEFAULT 0.0,

-- Metadata
calculation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
model_version VARCHAR(20) DEFAULT 'v1.0',
is_current BOOLEAN DEFAULT TRUE,

CONSTRAINT uk_clv_tenant_customer_current
UNIQUE(tenant_id, customer_id) WHERE is_current = TRUE
);

-- Índices para queries comunes
CREATE INDEX idx_clv_tenant_tier ON customer_clv_analysis(tenant_id, clv_tier);
CREATE INDEX idx_clv_tenant_value ON customer_clv_analysis(tenant_id, total_clv DESC);
CREATE INDEX idx_clv_confidence ON customer_clv_analysis(tenant_id, confidence_score DESC);

customer_churn_analysis

CREATE TABLE customer_churn_analysis (
id SERIAL PRIMARY KEY,
tenant_id INTEGER NOT NULL REFERENCES tenants(id),
customer_id INTEGER NOT NULL REFERENCES customers(id),

-- Análisis de churn
churn_probability DECIMAL(5,4) NOT NULL CHECK (churn_probability BETWEEN 0 AND 1),
churn_risk VARCHAR(20) NOT NULL, -- active, at_risk, churning, churned
days_since_last_order INTEGER,
predicted_churn_date DATE,

-- Factores de riesgo
recency_factor DECIMAL(5,4) DEFAULT 0,
frequency_factor DECIMAL(5,4) DEFAULT 0,
engagement_factor DECIMAL(5,4) DEFAULT 0,

-- Metadata del modelo
model_confidence DECIMAL(5,4) DEFAULT 0,
calculation_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
model_version VARCHAR(20) DEFAULT 'v1.0',
is_current BOOLEAN DEFAULT TRUE,

CONSTRAINT uk_churn_tenant_customer_current
UNIQUE(tenant_id, customer_id) WHERE is_current = TRUE
);

-- Índices para alertas y dashboards
CREATE INDEX idx_churn_tenant_risk ON customer_churn_analysis(tenant_id, churn_risk);
CREATE INDEX idx_churn_tenant_probability ON customer_churn_analysis(tenant_id, churn_probability DESC);
CREATE INDEX idx_churn_at_risk ON customer_churn_analysis(tenant_id, calculation_date)
WHERE churn_risk = 'at_risk';

🔌 Integration Tables

tenant_integrations

CREATE TABLE tenant_integrations (
id SERIAL PRIMARY KEY,
tenant_id INTEGER NOT NULL REFERENCES tenants(id),
integration_type VARCHAR(50) NOT NULL, -- vtex, mercadolibre, google_ads, etc.

-- Configuración específica por tipo
account_name VARCHAR(255),
environment VARCHAR(100),

-- Credenciales (encriptadas)
credentials JSONB NOT NULL,

-- Estado
is_active BOOLEAN DEFAULT TRUE,
last_sync TIMESTAMP,
last_sync_status VARCHAR(50), -- success, error, in_progress
sync_frequency VARCHAR(50) DEFAULT 'daily', -- hourly, daily, weekly, manual

-- Metadata
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,

CONSTRAINT uk_tenant_integration_type UNIQUE(tenant_id, integration_type, account_name)
);

-- Ejemplo de credentials JSONB para VTEX
{
"app_key": "encrypted_value",
"app_token": "encrypted_value",
"api_url": "https://chelseaio.vtexcommercestable.com.br"
}

🔒 Row Level Security (RLS)

Implementación Automática

-- Habilitar RLS en todas las tablas principales
ALTER TABLE customers ENABLE ROW LEVEL SECURITY;
ALTER TABLE customer_orders ENABLE ROW LEVEL SECURITY;
ALTER TABLE customer_rfm_analysis ENABLE ROW LEVEL SECURITY;
ALTER TABLE customer_clv_analysis ENABLE ROW LEVEL SECURITY;
ALTER TABLE customer_churn_analysis ENABLE ROW LEVEL SECURITY;

-- Política de aislamiento por tenant
CREATE POLICY tenant_isolation_customers ON customers
FOR ALL TO app_role
USING (tenant_id = current_setting('app.current_tenant_id')::INTEGER);

CREATE POLICY tenant_isolation_orders ON customer_orders
FOR ALL TO app_role
USING (tenant_id = current_setting('app.current_tenant_id')::INTEGER);

-- Repetir para todas las tablas con tenant_id...

Uso en Aplicación

# Establecer contexto de tenant antes de queries
def set_tenant_context(tenant_id):
cursor.execute(
"SET app.current_tenant_id = %s",
(tenant_id,)
)

# Ejemplo de uso
set_tenant_context(56)
customers = session.query(Customer).all() # Solo verá customers del tenant 56

📊 Views y Funciones

Vista Consolidada de Clientes

CREATE VIEW customer_360_view AS
SELECT
c.id,
c.tenant_id,
c.email,
c.first_name,
c.last_name,
c.total_orders,
c.total_spent,
c.last_purchase_date,

-- RFM Data
rfm.recency_days,
rfm.frequency,
rfm.monetary,
rfm.rfm_segment,

-- CLV Data
clv.total_clv,
clv.clv_tier,

-- Churn Data
churn.churn_probability,
churn.churn_risk

FROM customers c
LEFT JOIN customer_rfm_analysis rfm ON c.id = rfm.customer_id AND rfm.is_current = true
LEFT JOIN customer_clv_analysis clv ON c.id = clv.customer_id AND clv.is_current = true
LEFT JOIN customer_churn_analysis churn ON c.id = churn.customer_id AND churn.is_current = true;

🔧 Maintenance y Performance

Particionado por Fecha

-- Particionado mensual para tablas de log grandes
CREATE TABLE processing_log (
id SERIAL,
tenant_id INTEGER,
process_type VARCHAR(50),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
...
) PARTITION BY RANGE (created_at);

-- Crear particiones automáticamente
CREATE TABLE processing_log_2024_09 PARTITION OF processing_log
FOR VALUES FROM ('2024-09-01') TO ('2024-10-01');

Índices Especializados

-- Índice para queries de dashboard
CREATE INDEX idx_customer_dashboard ON customers
(tenant_id, last_purchase_date DESC, total_spent DESC)
WHERE last_purchase_date IS NOT NULL;

-- Índice para análisis temporal
CREATE INDEX idx_orders_time_series ON customer_orders
(tenant_id, order_date, total_amount)
WHERE order_status = 'completed';

Estadísticas y Vacuuming

-- Auto-vacuum optimizado
ALTER TABLE customers SET (
autovacuum_vacuum_scale_factor = 0.1,
autovacuum_analyze_scale_factor = 0.05
);

-- Estadísticas extendidas para queries complejas
CREATE STATISTICS customer_rfm_stats ON tenant_id, rfm_segment, total_spent
FROM customers;

💡 Tip: El diseño multi-tenant con RLS garantiza aislamiento de datos sin duplicar la estructura. Cada query automáticamente filtra por tenant_id actual.