Monitoring & Troubleshooting
Sistema completo de monitoreo y troubleshooting para la integración con Facebook Ads.
Dashboard de Monitoreo
Estado General del Sistema
-- Vista general de todas las configuraciones
SELECT
system_name,
ad_account_id,
array_length(tenant_ids, 1) as num_tenants,
is_active,
schedule_hours,
last_sync,
next_sync,
total_customers,
sync_status,
CASE
WHEN error_log IS NULL THEN '✅ OK'
ELSE '❌ ERROR'
END as health_status
FROM cdp.facebook_configurations
ORDER BY last_sync DESC NULLS LAST;
Últimas Sincronizaciones
-- Historial de sincronizaciones (últimas 24 horas)
SELECT
system_name,
last_sync,
total_customers,
sync_status,
custom_audience_id,
EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - last_sync))/3600 as hours_since_last_sync
FROM cdp.facebook_configurations
WHERE last_sync > CURRENT_TIMESTAMP - INTERVAL '24 hours'
ORDER BY last_sync DESC;
Próximas Sincronizaciones
-- Schedule de próximas sincronizaciones
SELECT
system_name,
next_sync,
EXTRACT(EPOCH FROM (next_sync - CURRENT_TIMESTAMP))/3600 as hours_until_sync,
schedule_hours,
is_active,
CASE
WHEN is_active AND next_sync < CURRENT_TIMESTAMP THEN '⚠️ OVERDUE'
WHEN is_active THEN '✅ SCHEDULED'
ELSE '⏸️ PAUSED'
END as status
FROM cdp.facebook_configurations
ORDER BY next_sync ASC;
Scripts de Monitoreo
1. Check Facebook Status
Script: check_facebook_status.py
#!/usr/bin/env python3
"""
Check Facebook Ads Sync Status
Ubicación: nerdistan-worker/scripts/monitoring/
"""
import psycopg2
import os
from datetime import datetime, timedelta
from tabulate import tabulate
def check_status():
"""Verificar estado de todas las configuraciones"""
conn = psycopg2.connect(os.environ['DATABASE_URL'])
cursor = conn.cursor()
# Query principal
cursor.execute("""
SELECT
system_name,
ad_account_id,
array_length(tenant_ids, 1) as num_tenants,
is_active,
last_sync,
next_sync,
total_customers,
sync_status,
error_log
FROM cdp.facebook_configurations
ORDER BY system_name
""")
configs = cursor.fetchall()
# Formatear output
headers = ['System', 'Ad Account', 'Tenants', 'Active', 'Last Sync',
'Next Sync', 'Customers', 'Status', 'Error']
table_data = []
for config in configs:
system, account, tenants, active, last, next_s, customers, status, error = config
# Calcular tiempo desde última sync
if last:
hours_ago = (datetime.now() - last).total_seconds() / 3600
last_str = f"{int(hours_ago)}h ago"
else:
last_str = "Never"
# Calcular tiempo hasta próxima sync
if next_s:
hours_until = (next_s - datetime.now()).total_seconds() / 3600
if hours_until < 0:
next_str = f"⚠️ OVERDUE ({abs(int(hours_until))}h)"
else:
next_str = f"{int(hours_until)}h"
else:
next_str = "Not scheduled"
# Status icon
status_icon = {
'success': '✅',
'failed': '❌',
'pending': '⏳',
None: '⚪'
}.get(status, '❓')
table_data.append([
system,
account[-10:], # Últimos 10 dígitos
tenants or 0,
'✅' if active else '❌',
last_str,
next_str,
f"{customers:,}" if customers else 0,
f"{status_icon} {status or 'N/A'}",
error[:50] + '...' if error and len(error) > 50 else (error or '-')
])
print("\n" + "="*120)
print("FACEBOOK ADS SYNC STATUS".center(120))
print("="*120 + "\n")
print(tabulate(table_data, headers=headers, tablefmt='grid'))
print(f"\nLast checked: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
cursor.close()
conn.close()
if __name__ == "__main__":
check_status()
Uso:
cd /app/scripts/monitoring
python check_facebook_status.py
2. Monitor Facebook Sync
Script: monitor_facebook_sync.py
#!/usr/bin/env python3
"""
Monitor Facebook Sync in Real-time
"""
import time
import psycopg2
import os
from datetime import datetime
def monitor_sync(interval=60):
"""Monitorear sincronización en tiempo real"""
while True:
os.system('clear')
conn = psycopg2.connect(os.environ['DATABASE_URL'])
cursor = conn.cursor()
# Status actual
cursor.execute("""
SELECT
system_name,
last_sync,
next_sync,
total_customers,
sync_status
FROM cdp.facebook_configurations
WHERE is_active = true
ORDER BY system_name
""")
print(f"\n{'='*80}")
print(f"FACEBOOK ADS SYNC MONITOR - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"{'='*80}\n")
for row in cursor.fetchall():
system, last, next_s, customers, status = row
print(f"System: {system}")
print(f" Last Sync: {last or 'Never'}")
print(f" Next Sync: {next_s or 'Not scheduled'}")
print(f" Customers: {customers:,}" if customers else " Customers: 0")
print(f" Status: {status or 'N/A'}")
# Alertas
if next_s and next_s < datetime.now():
print(f" ⚠️ ALERT: Sync overdue!")
if status == 'failed':
print(f" ❌ ERROR: Last sync failed!")
print()
cursor.close()
conn.close()
print(f"Refreshing in {interval} seconds... (Ctrl+C to stop)")
time.sleep(interval)
if __name__ == "__main__":
try:
monitor_sync(interval=60)
except KeyboardInterrupt:
print("\n\nMonitoring stopped.")
Uso:
cd /app/scripts/monitoring
python monitor_facebook_sync.py # Refresh cada 60 segundos
3. Force Sync Now
Script: force_facebook_sync_now.py
#!/usr/bin/env python3
"""
Force Facebook Sync Immediately
"""
import sys
import psycopg2
import os
from start_facebook_automation import sync_all_systems
def force_sync(system_name=None):
"""Forzar sincronización inmediata"""
conn = psycopg2.connect(os.environ['DATABASE_URL'])
cursor = conn.cursor()
if system_name:
# Sync un sistema específico
cursor.execute("""
SELECT system_name, ad_account_id, tenant_ids
FROM cdp.facebook_configurations
WHERE system_name = %s AND is_active = true
""", (system_name,))
config = cursor.fetchone()
if not config:
print(f"❌ Error: System '{system_name}' not found or not active")
return
print(f"🚀 Forcing sync for: {system_name}")
# Ejecutar sync...
else:
# Sync todos los sistemas activos
print("🚀 Forcing sync for ALL active systems...")
sync_all_systems()
cursor.close()
conn.close()
if __name__ == "__main__":
system = sys.argv[1] if len(sys.argv) > 1 else None
force_sync(system)
Uso:
# Sync específico
python force_facebook_sync_now.py kangoo_system
# Sync todos
python force_facebook_sync_now.py
Railway Monitoring
Ver Logs en Tiempo Real
# Logs completos
railway logs --service facebook-automation
# Solo errores
railway logs --service facebook-automation | grep ERROR
# Últimas 100 líneas
railway logs --service facebook-automation --tail 100
# Seguir logs (como tail -f)
railway logs --service facebook-automation --follow
Verificar Variables de Entorno
# Listar todas las variables
railway variables --service facebook-automation
# Verificar variable específica
railway variables --service facebook-automation | grep FACEBOOK_ACCESS_TOKEN
Restart Service
# Restart del servicio
railway service restart facebook-automation
# Redeploy completo
cd /path/to/nerdistan-worker
git push origin main # Trigger automatic redeploy
Alertas y Notificaciones
Crear Alertas por Email (Futuro)
# Ejemplo de sistema de alertas
def check_and_alert():
"""Verificar y enviar alertas si hay problemas"""
conn = psycopg2.connect(os.environ['DATABASE_URL'])
cursor = conn.cursor()
# Buscar problemas
cursor.execute("""
SELECT
system_name,
sync_status,
error_log,
last_sync
FROM cdp.facebook_configurations
WHERE is_active = true
AND (
sync_status = 'failed'
OR next_sync < CURRENT_TIMESTAMP - INTERVAL '2 hours'
)
""")
issues = cursor.fetchall()
if issues:
# Enviar email de alerta
send_alert_email(
subject=f"Facebook Ads Sync Alert - {len(issues)} issues found",
body=format_issues(issues)
)
cursor.close()
conn.close()
Troubleshooting Guide
Problema: Sync no se ejecuta
Diagnóstico:
-- Verificar configuración
SELECT
system_name,
is_active,
next_sync,
sync_status
FROM cdp.facebook_configurations;
Soluciones:
- Verificar que
is_active = true - Verificar que
next_syncestá en el pasado - Restart del worker:
railway service restart facebook-automation
Problema: No se encuentran clientes
Diagnóstico:
-- Contar clientes elegibles
SELECT
c.tenant_id,
COUNT(*) as total,
COUNT(CASE WHEN c.email IS NOT NULL AND c.email != '' THEN 1 END) as with_email,
COUNT(CASE WHEN r.rfm_segment NOT IN ('Lost', 'Hibernating') THEN 1 END) as active
FROM customers_master c
LEFT JOIN cdp_rfm_analysis r ON c.id = r.customer_id
WHERE c.tenant_id = ANY(ARRAY[20, 25]) -- Tus tenant IDs
GROUP BY c.tenant_id;
Soluciones:
- Verificar que tenants tienen customers
- Verificar que customers tienen email
- Verificar análisis RFM está actualizado
Problema: Facebook API Error
Errores comunes:
"Invalid OAuth access token"
# Verificar token
curl -X GET "https://graph.facebook.com/v18.0/debug_token" \
-d "input_token={ACCESS_TOKEN}" \
-d "access_token={APP_ACCESS_TOKEN}"
# Solución: Regenerar y actualizar token
railway variables --set "FACEBOOK_ACCESS_TOKEN=new_token"
"Ad account does not exist"
-- Verificar formato del ad account
SELECT
system_name,
ad_account_id,
CASE
WHEN ad_account_id LIKE 'act_%' THEN 'OK'
ELSE 'INVALID - must start with act_'
END as validation
FROM cdp.facebook_configurations;
"Rate limit exceeded"
- Solución: El sistema tiene retry automático, esperar 1 hora
Problema: Audiencia no actualiza en Facebook
Verificar:
- Ir a Facebook Ads Manager > Audiences
- Buscar la audiencia
- Verificar "Last Updated" timestamp
- Verificar "Approximate Audience Size"
Nota: Facebook puede tardar hasta 24-48 horas en procesar completamente una audiencia.
Métricas de Salud
KPIs a Monitorear
-- Dashboard de KPIs
WITH stats AS (
SELECT
COUNT(*) as total_systems,
COUNT(CASE WHEN is_active THEN 1 END) as active_systems,
COUNT(CASE WHEN sync_status = 'success' THEN 1 END) as successful_syncs,
COUNT(CASE WHEN sync_status = 'failed' THEN 1 END) as failed_syncs,
SUM(total_customers) as total_customers_synced,
MAX(last_sync) as last_sync_time
FROM cdp.facebook_configurations
)
SELECT
total_systems as "Total Systems",
active_systems as "Active Systems",
successful_syncs as "Successful Syncs",
failed_syncs as "Failed Syncs",
total_customers_synced as "Total Customers",
last_sync_time as "Last Sync Time",
CASE
WHEN failed_syncs = 0 THEN '✅ Healthy'
WHEN failed_syncs < total_systems * 0.3 THEN '⚠️ Warning'
ELSE '❌ Critical'
END as "System Health"
FROM stats;
Performance Metrics
-- Tiempo promedio de sincronización (si se registra)
SELECT
system_name,
AVG(EXTRACT(EPOCH FROM (updated_at - last_sync))) / 60 as avg_sync_minutes,
total_customers,
total_customers::float / NULLIF(AVG(EXTRACT(EPOCH FROM (updated_at - last_sync))) / 60, 0) as customers_per_minute
FROM cdp.facebook_configurations
WHERE last_sync IS NOT NULL
GROUP BY system_name, total_customers
ORDER BY total_customers DESC;
Checklist de Monitoreo Diario
- ✅ Verificar que todos los sistemas activos sincronizaron en las últimas 48 horas
- ✅ Revisar logs de Railway para errores
- ✅ Verificar que no hay sync_status = 'failed'
- ✅ Confirmar que audiences en Facebook tienen size correcto
- ✅ Verificar que access token no expira pronto (>7 días restantes)
- ✅ Revisar métricas de performance
- ✅ Verificar que next_sync está programado correctamente