sync: 2026-03-14 17:28 — 45 files from Alexandria
Some checks failed
Lint & Format / detect (push) Has been cancelled
Lint & Format / js-lint (push) Has been cancelled
Lint & Format / py-lint (push) Has been cancelled
Lint & Format / sh-lint (push) Has been cancelled
Lint & Format / go-lint (push) Has been cancelled

RoadChain-SHA2048: abc9be08a46f52c2
RoadChain-Identity: alexa@sovereign
RoadChain-Full: abc9be08a46f52c20e45ae95233112ee88407ca3a606ec0ef568784041b755a56343cf4d7b817f2f4f7a11ec3f34ce826f607b2150a77248ade06b449e5b7e281b4be665ab148c46e3b71c9c029ee4d77f120e5919a7b87b0b7b6ed45f12c87f420fdda633f3bae4c5f7b851979bb52c725913fa63300772174263d1e64a02aa3356f73819e1110ad94d16836fa9f24b40e60e2da2f252506fbf02f82acc5fb8e03fd6ec08691ea60dea318ce5099a93d8ead7f9ef45b13a1ab533f592b60c702a0ba854b243e94be7eece0bfab14f822a928f8681c8777dc6a881da7e2ec324d6ace471f6c3f77ad83a22bfea01760be75f191128aa0a100d497dd0f0801ea8
This commit is contained in:
2026-03-14 17:28:48 -05:00
parent 84864e7733
commit 0c1d3bacfc
45 changed files with 1911 additions and 2529 deletions

View File

@@ -1,225 +0,0 @@
#!/bin/bash
# Real-time Slack alerts for fleet/service issues
# Posts to SLACK_ALERTS_WEBHOOK_URL (or falls back to SLACK_WEBHOOK_URL)
#
# Usage: slack-alert.sh — auto-detect issues from latest KPI data
# slack-alert.sh "message" — post a custom alert
# slack-alert.sh git-patrol — post git-agent patrol results
source "$(dirname "$0")/../lib/common.sh"
source "$(dirname "$0")/../lib/slack.sh"
slack_load
if ! slack_ready; then
err "Slack not configured. Run: bash scripts/setup-slack.sh"
exit 1
fi
# ─── Custom message mode ─────────────────────────────────────────────
if [ -n "${1:-}" ] && [ "$1" != "git-patrol" ]; then
payload=$(python3 -c "
import json, sys
msg = ' '.join(sys.argv[1:])
blocks = [
{'type': 'section', 'text': {'type': 'mrkdwn', 'text': f':rotating_light: *BlackRoad Alert*\n{msg}'}},
{'type': 'context', 'elements': [{'type': 'mrkdwn', 'text': '$(date -u +%Y-%m-%dT%H:%M:%SZ) | slack-alert.sh'}]}
]
print(json.dumps({'blocks': blocks}))
" "$@")
slack_alert "$payload"
ok "Alert posted: $*"
exit 0
fi
# ─── Git patrol mode ─────────────────────────────────────────────────
if [ "${1:-}" = "git-patrol" ]; then
AGENT_SCRIPT="$(dirname "$0")/../agents/git-agent.sh"
if [ ! -x "$AGENT_SCRIPT" ]; then
err "git-agent.sh not found"
exit 1
fi
patrol_output=$(bash "$AGENT_SCRIPT" health 2>&1)
fleet_output=$(bash "$AGENT_SCRIPT" fleet status 2>&1)
payload=$(python3 -c "
import json, sys, re
patrol = '''$patrol_output'''
fleet = '''$fleet_output'''
# Parse health output
issues = []
for line in patrol.split('\n'):
if '✗' in line:
# Strip ANSI codes
clean = re.sub(r'\033\[[0-9;]*m', '', line).strip()
if clean:
issues.append(clean.lstrip('✗ '))
fleet_lines = []
for line in fleet.split('\n'):
clean = re.sub(r'\033\[[0-9;]*m', '', line).strip()
if 'repos=' in clean:
fleet_lines.append(clean.lstrip('✓ '))
health_text = '\n'.join(f'• {i}' for i in issues) if issues else ':white_check_mark: All repos clean'
fleet_text = '\n'.join(f'• {l}' for l in fleet_lines) if fleet_lines else 'No fleet data'
blocks = [
{'type': 'header', 'text': {'type': 'plain_text', 'text': 'Git Agent Patrol Report'}},
{'type': 'section', 'fields': [
{'type': 'mrkdwn', 'text': f':mag: *Local Repos*\n{health_text}'},
{'type': 'mrkdwn', 'text': f':satellite: *Fleet Repos*\n{fleet_text}'},
]},
{'type': 'context', 'elements': [
{'type': 'mrkdwn', 'text': '$(date -u +%Y-%m-%dT%H:%M:%SZ) | git-agent patrol'}
]}
]
print(json.dumps({'blocks': blocks}))
")
slack_alert "$payload"
ok "Git patrol posted to Slack"
exit 0
fi
# ─── Auto-detect mode — scan latest KPIs for alertable issues ────────
DAILY=$(today_file)
[ ! -f "$DAILY" ] && { err "No daily data"; exit 1; }
export DAILY
alerts=$(python3 << 'PYEOF'
import json, os
with open(os.environ['DAILY']) as f:
s = json.load(f).get('summary', {})
alerts = []
# Fleet nodes down
offline = s.get('fleet_offline', [])
if offline:
alerts.append({
'severity': 'critical',
'emoji': ':rotating_light:',
'text': f"*Nodes offline*: {', '.join(offline)}"
})
# Fleet degraded
online = s.get('fleet_online', 0)
total = s.get('fleet_total', 4)
if online < total and not offline:
alerts.append({
'severity': 'warning',
'emoji': ':large_yellow_circle:',
'text': f"*Fleet degraded*: {online}/{total} online"
})
# Failed systemd units
failed = s.get('failed_units', 0)
if failed > 0:
alerts.append({
'severity': 'warning',
'emoji': ':warning:',
'text': f"*{failed} failed systemd units*"
})
# Throttled nodes (undervoltage/thermal)
throttled = s.get('throttled_nodes', [])
if throttled:
alerts.append({
'severity': 'warning',
'emoji': ':zap:',
'text': f"*Throttled nodes*: {', '.join(throttled)}"
})
# High temperature
temp = s.get('avg_temp_c', 0)
if temp > 70:
alerts.append({
'severity': 'critical' if temp > 80 else 'warning',
'emoji': ':fire:',
'text': f"*High fleet temp*: {temp:.1f}C avg"
})
# Disk pressure (fleet)
disk_used = s.get('fleet_disk_used_gb', 0)
disk_total = s.get('fleet_disk_total_gb', 1)
if disk_total > 0 and (disk_used / disk_total) > 0.85:
pct = round(disk_used / disk_total * 100)
alerts.append({
'severity': 'warning',
'emoji': ':floppy_disk:',
'text': f"*Fleet disk {pct}%*: {disk_used}/{disk_total} GB"
})
# Mac disk pressure
mac_pct = s.get('mac_disk_pct', 0)
if mac_pct > 85:
alerts.append({
'severity': 'warning',
'emoji': ':computer:',
'text': f"*Mac disk at {mac_pct}%*"
})
# Low autonomy score
score = s.get('autonomy_score', 0)
if score < 30:
alerts.append({
'severity': 'warning',
'emoji': ':robot_face:',
'text': f"*Low autonomy score*: {score}/100"
})
# Too many service restarts (possible crash loop)
restarts = s.get('service_restarts_today', 0)
if restarts > 100:
alerts.append({
'severity': 'warning',
'emoji': ':repeat:',
'text': f"*{restarts} service restarts today* — possible crash loop"
})
print(json.dumps(alerts))
PYEOF
)
if [ "$alerts" = "[]" ]; then
log "No alerts to send"
exit 0
fi
# Build and send alert payload
payload=$(python3 -c "
import json
alerts = json.loads('''$alerts''')
text_lines = []
for a in alerts:
key = a['text']
text_lines.append(f\"{a['emoji']} {a['text']}\")
severity = 'critical' if any(a['severity'] == 'critical' for a in alerts) else 'warning'
header_emoji = ':rotating_light:' if severity == 'critical' else ':warning:'
blocks = [
{'type': 'header', 'text': {'type': 'plain_text', 'text': f'{header_emoji} BlackRoad Fleet Alert'}},
{'type': 'section', 'text': {'type': 'mrkdwn', 'text': chr(10).join(text_lines)}},
{'type': 'context', 'elements': [
{'type': 'mrkdwn', 'text': '$(date -u +%Y-%m-%dT%H:%M:%SZ) | slack-alert.sh auto-detect'}
]}
]
print(json.dumps({'blocks': blocks}))
")
# Check dedup for the overall alert set
alert_key=$(echo "$alerts" | md5 2>/dev/null || echo "$alerts" | md5sum | cut -d' ' -f1)
if slack_dedup "$alert_key"; then
slack_alert "$payload"
ok "Alert posted to Slack ($(echo "$alerts" | python3 -c 'import json,sys; print(len(json.load(sys.stdin)))') issues)"
else
log "Alert suppressed (already sent within 1 hour)"
fi

1
bin/slack-alert Symbolic link
View File

@@ -0,0 +1 @@
/Users/alexa/blackroad-os-kpis/reports/slack-alert.sh