Saltar al contenido principal

Monitoring & Troubleshooting

Sistema completo de monitoreo y troubleshooting para la integración con Facebook Ads.

Dashboard de Monitoreo

Estado General del Sistema

-- Vista general de todas las configuraciones
SELECT
system_name,
ad_account_id,
array_length(tenant_ids, 1) as num_tenants,
is_active,
schedule_hours,
last_sync,
next_sync,
total_customers,
sync_status,
CASE
WHEN error_log IS NULL THEN '✅ OK'
ELSE '❌ ERROR'
END as health_status
FROM cdp.facebook_configurations
ORDER BY last_sync DESC NULLS LAST;

Últimas Sincronizaciones

-- Historial de sincronizaciones (últimas 24 horas)
SELECT
system_name,
last_sync,
total_customers,
sync_status,
custom_audience_id,
EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - last_sync))/3600 as hours_since_last_sync
FROM cdp.facebook_configurations
WHERE last_sync > CURRENT_TIMESTAMP - INTERVAL '24 hours'
ORDER BY last_sync DESC;

Próximas Sincronizaciones

-- Schedule de próximas sincronizaciones
SELECT
system_name,
next_sync,
EXTRACT(EPOCH FROM (next_sync - CURRENT_TIMESTAMP))/3600 as hours_until_sync,
schedule_hours,
is_active,
CASE
WHEN is_active AND next_sync < CURRENT_TIMESTAMP THEN '⚠️ OVERDUE'
WHEN is_active THEN '✅ SCHEDULED'
ELSE '⏸️ PAUSED'
END as status
FROM cdp.facebook_configurations
ORDER BY next_sync ASC;

Scripts de Monitoreo

1. Check Facebook Status

Script: check_facebook_status.py

#!/usr/bin/env python3
"""
Check Facebook Ads Sync Status
Ubicación: nerdistan-worker/scripts/monitoring/
"""

import psycopg2
import os
from datetime import datetime, timedelta
from tabulate import tabulate

def check_status():
"""Verificar estado de todas las configuraciones"""

conn = psycopg2.connect(os.environ['DATABASE_URL'])
cursor = conn.cursor()

# Query principal
cursor.execute("""
SELECT
system_name,
ad_account_id,
array_length(tenant_ids, 1) as num_tenants,
is_active,
last_sync,
next_sync,
total_customers,
sync_status,
error_log
FROM cdp.facebook_configurations
ORDER BY system_name
""")

configs = cursor.fetchall()

# Formatear output
headers = ['System', 'Ad Account', 'Tenants', 'Active', 'Last Sync',
'Next Sync', 'Customers', 'Status', 'Error']

table_data = []
for config in configs:
system, account, tenants, active, last, next_s, customers, status, error = config

# Calcular tiempo desde última sync
if last:
hours_ago = (datetime.now() - last).total_seconds() / 3600
last_str = f"{int(hours_ago)}h ago"
else:
last_str = "Never"

# Calcular tiempo hasta próxima sync
if next_s:
hours_until = (next_s - datetime.now()).total_seconds() / 3600
if hours_until < 0:
next_str = f"⚠️ OVERDUE ({abs(int(hours_until))}h)"
else:
next_str = f"{int(hours_until)}h"
else:
next_str = "Not scheduled"

# Status icon
status_icon = {
'success': '✅',
'failed': '❌',
'pending': '⏳',
None: '⚪'
}.get(status, '❓')

table_data.append([
system,
account[-10:], # Últimos 10 dígitos
tenants or 0,
'✅' if active else '❌',
last_str,
next_str,
f"{customers:,}" if customers else 0,
f"{status_icon} {status or 'N/A'}",
error[:50] + '...' if error and len(error) > 50 else (error or '-')
])

print("\n" + "="*120)
print("FACEBOOK ADS SYNC STATUS".center(120))
print("="*120 + "\n")
print(tabulate(table_data, headers=headers, tablefmt='grid'))
print(f"\nLast checked: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")

cursor.close()
conn.close()

if __name__ == "__main__":
check_status()

Uso:

cd /app/scripts/monitoring
python check_facebook_status.py

2. Monitor Facebook Sync

Script: monitor_facebook_sync.py

#!/usr/bin/env python3
"""
Monitor Facebook Sync in Real-time
"""

import time
import psycopg2
import os
from datetime import datetime

def monitor_sync(interval=60):
"""Monitorear sincronización en tiempo real"""

while True:
os.system('clear')

conn = psycopg2.connect(os.environ['DATABASE_URL'])
cursor = conn.cursor()

# Status actual
cursor.execute("""
SELECT
system_name,
last_sync,
next_sync,
total_customers,
sync_status
FROM cdp.facebook_configurations
WHERE is_active = true
ORDER BY system_name
""")

print(f"\n{'='*80}")
print(f"FACEBOOK ADS SYNC MONITOR - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"{'='*80}\n")

for row in cursor.fetchall():
system, last, next_s, customers, status = row

print(f"System: {system}")
print(f" Last Sync: {last or 'Never'}")
print(f" Next Sync: {next_s or 'Not scheduled'}")
print(f" Customers: {customers:,}" if customers else " Customers: 0")
print(f" Status: {status or 'N/A'}")

# Alertas
if next_s and next_s < datetime.now():
print(f" ⚠️ ALERT: Sync overdue!")

if status == 'failed':
print(f" ❌ ERROR: Last sync failed!")

print()

cursor.close()
conn.close()

print(f"Refreshing in {interval} seconds... (Ctrl+C to stop)")
time.sleep(interval)

if __name__ == "__main__":
try:
monitor_sync(interval=60)
except KeyboardInterrupt:
print("\n\nMonitoring stopped.")

Uso:

cd /app/scripts/monitoring
python monitor_facebook_sync.py # Refresh cada 60 segundos

3. Force Sync Now

Script: force_facebook_sync_now.py

#!/usr/bin/env python3
"""
Force Facebook Sync Immediately
"""

import sys
import psycopg2
import os
from start_facebook_automation import sync_all_systems

def force_sync(system_name=None):
"""Forzar sincronización inmediata"""

conn = psycopg2.connect(os.environ['DATABASE_URL'])
cursor = conn.cursor()

if system_name:
# Sync un sistema específico
cursor.execute("""
SELECT system_name, ad_account_id, tenant_ids
FROM cdp.facebook_configurations
WHERE system_name = %s AND is_active = true
""", (system_name,))

config = cursor.fetchone()
if not config:
print(f"❌ Error: System '{system_name}' not found or not active")
return

print(f"🚀 Forcing sync for: {system_name}")
# Ejecutar sync...

else:
# Sync todos los sistemas activos
print("🚀 Forcing sync for ALL active systems...")
sync_all_systems()

cursor.close()
conn.close()

if __name__ == "__main__":
system = sys.argv[1] if len(sys.argv) > 1 else None
force_sync(system)

Uso:

# Sync específico
python force_facebook_sync_now.py kangoo_system

# Sync todos
python force_facebook_sync_now.py

Railway Monitoring

Ver Logs en Tiempo Real

# Logs completos
railway logs --service facebook-automation

# Solo errores
railway logs --service facebook-automation | grep ERROR

# Últimas 100 líneas
railway logs --service facebook-automation --tail 100

# Seguir logs (como tail -f)
railway logs --service facebook-automation --follow

Verificar Variables de Entorno

# Listar todas las variables
railway variables --service facebook-automation

# Verificar variable específica
railway variables --service facebook-automation | grep FACEBOOK_ACCESS_TOKEN

Restart Service

# Restart del servicio
railway service restart facebook-automation

# Redeploy completo
cd /path/to/nerdistan-worker
git push origin main # Trigger automatic redeploy

Alertas y Notificaciones

Crear Alertas por Email (Futuro)

# Ejemplo de sistema de alertas
def check_and_alert():
"""Verificar y enviar alertas si hay problemas"""

conn = psycopg2.connect(os.environ['DATABASE_URL'])
cursor = conn.cursor()

# Buscar problemas
cursor.execute("""
SELECT
system_name,
sync_status,
error_log,
last_sync
FROM cdp.facebook_configurations
WHERE is_active = true
AND (
sync_status = 'failed'
OR next_sync < CURRENT_TIMESTAMP - INTERVAL '2 hours'
)
""")

issues = cursor.fetchall()

if issues:
# Enviar email de alerta
send_alert_email(
subject=f"Facebook Ads Sync Alert - {len(issues)} issues found",
body=format_issues(issues)
)

cursor.close()
conn.close()

Troubleshooting Guide

Problema: Sync no se ejecuta

Diagnóstico:

-- Verificar configuración
SELECT
system_name,
is_active,
next_sync,
sync_status
FROM cdp.facebook_configurations;

Soluciones:

  1. Verificar que is_active = true
  2. Verificar que next_sync está en el pasado
  3. Restart del worker: railway service restart facebook-automation

Problema: No se encuentran clientes

Diagnóstico:

-- Contar clientes elegibles
SELECT
c.tenant_id,
COUNT(*) as total,
COUNT(CASE WHEN c.email IS NOT NULL AND c.email != '' THEN 1 END) as with_email,
COUNT(CASE WHEN r.rfm_segment NOT IN ('Lost', 'Hibernating') THEN 1 END) as active
FROM customers_master c
LEFT JOIN cdp_rfm_analysis r ON c.id = r.customer_id
WHERE c.tenant_id = ANY(ARRAY[20, 25]) -- Tus tenant IDs
GROUP BY c.tenant_id;

Soluciones:

  1. Verificar que tenants tienen customers
  2. Verificar que customers tienen email
  3. Verificar análisis RFM está actualizado

Problema: Facebook API Error

Errores comunes:

"Invalid OAuth access token"

# Verificar token
curl -X GET "https://graph.facebook.com/v18.0/debug_token" \
-d "input_token={ACCESS_TOKEN}" \
-d "access_token={APP_ACCESS_TOKEN}"

# Solución: Regenerar y actualizar token
railway variables --set "FACEBOOK_ACCESS_TOKEN=new_token"

"Ad account does not exist"

-- Verificar formato del ad account
SELECT
system_name,
ad_account_id,
CASE
WHEN ad_account_id LIKE 'act_%' THEN 'OK'
ELSE 'INVALID - must start with act_'
END as validation
FROM cdp.facebook_configurations;

"Rate limit exceeded"

  • Solución: El sistema tiene retry automático, esperar 1 hora

Problema: Audiencia no actualiza en Facebook

Verificar:

  1. Ir a Facebook Ads Manager > Audiences
  2. Buscar la audiencia
  3. Verificar "Last Updated" timestamp
  4. Verificar "Approximate Audience Size"

Nota: Facebook puede tardar hasta 24-48 horas en procesar completamente una audiencia.

Métricas de Salud

KPIs a Monitorear

-- Dashboard de KPIs
WITH stats AS (
SELECT
COUNT(*) as total_systems,
COUNT(CASE WHEN is_active THEN 1 END) as active_systems,
COUNT(CASE WHEN sync_status = 'success' THEN 1 END) as successful_syncs,
COUNT(CASE WHEN sync_status = 'failed' THEN 1 END) as failed_syncs,
SUM(total_customers) as total_customers_synced,
MAX(last_sync) as last_sync_time
FROM cdp.facebook_configurations
)
SELECT
total_systems as "Total Systems",
active_systems as "Active Systems",
successful_syncs as "Successful Syncs",
failed_syncs as "Failed Syncs",
total_customers_synced as "Total Customers",
last_sync_time as "Last Sync Time",
CASE
WHEN failed_syncs = 0 THEN '✅ Healthy'
WHEN failed_syncs < total_systems * 0.3 THEN '⚠️ Warning'
ELSE '❌ Critical'
END as "System Health"
FROM stats;

Performance Metrics

-- Tiempo promedio de sincronización (si se registra)
SELECT
system_name,
AVG(EXTRACT(EPOCH FROM (updated_at - last_sync))) / 60 as avg_sync_minutes,
total_customers,
total_customers::float / NULLIF(AVG(EXTRACT(EPOCH FROM (updated_at - last_sync))) / 60, 0) as customers_per_minute
FROM cdp.facebook_configurations
WHERE last_sync IS NOT NULL
GROUP BY system_name, total_customers
ORDER BY total_customers DESC;

Checklist de Monitoreo Diario

  • ✅ Verificar que todos los sistemas activos sincronizaron en las últimas 48 horas
  • ✅ Revisar logs de Railway para errores
  • ✅ Verificar que no hay sync_status = 'failed'
  • ✅ Confirmar que audiences en Facebook tienen size correcto
  • ✅ Verificar que access token no expira pronto (>7 días restantes)
  • ✅ Revisar métricas de performance
  • ✅ Verificar que next_sync está programado correctamente

Próximos Pasos