bin/ 230 CLI tools (ask-*, br-*, agent-*, roadid, carpool) scripts/ 99 automation scripts fleet/ Node configs and deployment workers/ Cloudflare Worker sources (roadpay, road-search, squad webhooks) roadc/ RoadC programming language roadnet/ Mesh network (5 APs, WireGuard) operator/ Memory system scripts config/ System configs dotfiles/ Shell configs docs/ Documentation BlackRoad OS — Pave Tomorrow. RoadChain-SHA2048: d1a24f55318d338b RoadChain-Identity: alexa@sovereign RoadChain-Full: d1a24f55318d338b24b60bad7be39286379c76ae5470817482100cb0ddbbcb97e147d07ac7243da0a9f0363e4e5c833d612b9c0df3a3cd20802465420278ef74875a5b77f55af6fe42a931b8b635b3d0d0b6bde9abf33dc42eea52bc03c951406d8cbe49f1a3d29b26a94dade05e9477f34a7d4d4c6ec4005c3c2ac54e73a68440c512c8e83fd9b1fe234750b898ef8f4032c23db173961fe225e67a0432b5293a9714f76c5c57ed5fdf35b9fb40fd73c03ebf88b7253c6a0575f5afb6a6b49b3bda310602fb1ef676859962dad2aebbb2875814b30eee0a8ba195e482d4cbc91d8819e7f38f6db53e8063401649c77bb994371473cabfb917fb53e8cbe73d60
339 lines
10 KiB
Bash
Executable File
339 lines
10 KiB
Bash
Executable File
#!/bin/bash
|
|
# fleet-ai-bootstrap.sh — Pre-warm Ollama fleet, pin models in VRAM, zero cold starts
|
|
# Usage: ./fleet-ai-bootstrap.sh [--warm|--status|--fix]
|
|
|
|
set -e
|
|
|
|
PINK='\033[38;5;205m'
|
|
AMBER='\033[38;5;214m'
|
|
BLUE='\033[38;5;69m'
|
|
VIOLET='\033[38;5;135m'
|
|
GREEN='\033[38;5;82m'
|
|
RED='\033[38;5;196m'
|
|
DIM='\033[2m'
|
|
RESET='\033[0m'
|
|
|
|
# ── Fleet nodes ──
|
|
declare -A NODES=(
|
|
[cecilia]="blackroad@192.168.4.96"
|
|
[lucidia]="octavia@192.168.4.38"
|
|
[alice]="pi@192.168.4.49"
|
|
[octavia]="pi@192.168.4.100"
|
|
)
|
|
|
|
# ── Models to pre-warm per node (fastest first) ──
|
|
# keep_alive=-1 means forever until restart
|
|
declare -A WARM_MODELS=(
|
|
[cecilia]="llama3.2:3b tinyllama:latest qwen2.5-coder:3b deepseek-coder:1.3b cece:latest"
|
|
[lucidia]="tinyllama:latest qwen2.5:3b llama3.2:1b"
|
|
[alice]="tinyllama:latest llama3.2:1b"
|
|
)
|
|
|
|
SSH_OPTS="-o ConnectTimeout=5 -o StrictHostKeyChecking=no -o BatchMode=yes"
|
|
|
|
header() {
|
|
echo -e "\n${PINK}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}"
|
|
echo -e "${PINK} ◆ BlackRoad Fleet AI Bootstrap${RESET}"
|
|
echo -e "${PINK}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}\n"
|
|
}
|
|
|
|
log() { echo -e " ${BLUE}→${RESET} $1"; }
|
|
ok() { echo -e " ${GREEN}✓${RESET} $1"; }
|
|
warn(){ echo -e " ${AMBER}⚠${RESET} $1"; }
|
|
err() { echo -e " ${RED}✗${RESET} $1"; }
|
|
|
|
# ── Check if node is reachable ──
|
|
node_alive() {
|
|
local host="${NODES[$1]}"
|
|
ssh $SSH_OPTS "$host" "echo ok" &>/dev/null
|
|
}
|
|
|
|
# ── Get Ollama status on a node ──
|
|
node_ollama_status() {
|
|
local name="$1"
|
|
local host="${NODES[$name]}"
|
|
|
|
echo -e "\n ${VIOLET}[$name]${RESET} ${DIM}${host}${RESET}"
|
|
|
|
if ! node_alive "$name"; then
|
|
err "$name — unreachable"
|
|
return 1
|
|
fi
|
|
ok "$name — online"
|
|
|
|
# Check Ollama service
|
|
local status
|
|
status=$(ssh $SSH_OPTS "$host" "systemctl is-active ollama 2>/dev/null || echo dead")
|
|
if [ "$status" != "active" ]; then
|
|
err "ollama service: $status"
|
|
return 1
|
|
fi
|
|
ok "ollama service: active"
|
|
|
|
# Check loaded models
|
|
local loaded
|
|
loaded=$(ssh $SSH_OPTS "$host" "curl -s localhost:11434/api/ps 2>/dev/null" || echo '{}')
|
|
local loaded_names
|
|
loaded_names=$(echo "$loaded" | python3 -c "
|
|
import sys,json
|
|
try:
|
|
d=json.load(sys.stdin)
|
|
for m in d.get('models',[]):
|
|
print(m['name'])
|
|
except: pass
|
|
" 2>/dev/null)
|
|
|
|
if [ -n "$loaded_names" ]; then
|
|
ok "loaded in VRAM:"
|
|
echo "$loaded_names" | while read -r m; do
|
|
echo -e " ${GREEN}●${RESET} $m"
|
|
done
|
|
else
|
|
warn "no models in VRAM"
|
|
fi
|
|
|
|
# Check available memory
|
|
local mem
|
|
mem=$(ssh $SSH_OPTS "$host" "free -h | awk '/Mem:/{print \$4}'" 2>/dev/null)
|
|
log "available RAM: ${mem:-unknown}"
|
|
|
|
# Model count
|
|
local count
|
|
count=$(ssh $SSH_OPTS "$host" "curl -s localhost:11434/api/tags 2>/dev/null | python3 -c 'import sys,json;print(len(json.load(sys.stdin).get(\"models\",[])))' 2>/dev/null" || echo "?")
|
|
log "models installed: $count"
|
|
}
|
|
|
|
# ── Pre-warm models on a node ──
|
|
node_warm() {
|
|
local name="$1"
|
|
local host="${NODES[$name]}"
|
|
local models="${WARM_MODELS[$name]}"
|
|
|
|
if [ -z "$models" ]; then
|
|
log "no warm targets for $name"
|
|
return
|
|
fi
|
|
|
|
echo -e "\n ${VIOLET}[$name]${RESET} warming models..."
|
|
|
|
if ! node_alive "$name"; then
|
|
err "$name — unreachable, skipping"
|
|
return
|
|
fi
|
|
|
|
# Check Ollama is running
|
|
local status
|
|
status=$(ssh $SSH_OPTS "$host" "systemctl is-active ollama 2>/dev/null || echo dead")
|
|
if [ "$status" != "active" ]; then
|
|
err "$name ollama not running ($status)"
|
|
return
|
|
fi
|
|
|
|
for model in $models; do
|
|
log "loading $model (keep_alive=-1)..."
|
|
# Send a minimal generate request with keep_alive=-1 to pin model
|
|
local result
|
|
result=$(ssh $SSH_OPTS "$host" "curl -s -X POST localhost:11434/api/generate \
|
|
-d '{\"model\":\"$model\",\"prompt\":\"hi\",\"keep_alive\":-1,\"options\":{\"num_predict\":1}}' \
|
|
--max-time 120 2>&1 | tail -1" 2>/dev/null)
|
|
|
|
if echo "$result" | grep -q '"done":true\|"done_reason"'; then
|
|
ok "$model — loaded and pinned"
|
|
else
|
|
warn "$model — may have failed: $(echo "$result" | head -c 80)"
|
|
fi
|
|
done
|
|
}
|
|
|
|
# ── Fix Ollama on a node ──
|
|
node_fix() {
|
|
local name="$1"
|
|
local host="${NODES[$name]}"
|
|
|
|
echo -e "\n ${VIOLET}[$name]${RESET} fixing Ollama..."
|
|
|
|
if ! node_alive "$name"; then
|
|
err "$name — unreachable"
|
|
return
|
|
fi
|
|
|
|
# Check if ollama binary exists
|
|
local has_bin
|
|
has_bin=$(ssh $SSH_OPTS "$host" "test -x /usr/local/bin/ollama && echo yes || echo no")
|
|
|
|
if [ "$has_bin" = "no" ]; then
|
|
log "ollama binary missing — installing..."
|
|
ssh $SSH_OPTS "$host" "curl -fsSL https://ollama.com/install.sh | sudo sh" 2>&1 | tail -3
|
|
ok "ollama installed"
|
|
fi
|
|
|
|
# Ensure service is enabled and running
|
|
ssh $SSH_OPTS "$host" "sudo systemctl enable ollama && sudo systemctl restart ollama" 2>/dev/null
|
|
sleep 2
|
|
|
|
local status
|
|
status=$(ssh $SSH_OPTS "$host" "systemctl is-active ollama 2>/dev/null || echo dead")
|
|
if [ "$status" = "active" ]; then
|
|
ok "ollama running on $name"
|
|
else
|
|
err "ollama still not running on $name"
|
|
fi
|
|
}
|
|
|
|
# ── Configure Ollama for max speed ──
|
|
node_optimize() {
|
|
local name="$1"
|
|
local host="${NODES[$name]}"
|
|
|
|
echo -e "\n ${VIOLET}[$name]${RESET} optimizing Ollama config..."
|
|
|
|
if ! node_alive "$name"; then
|
|
err "$name — unreachable"
|
|
return
|
|
fi
|
|
|
|
# Set environment variables for speed
|
|
# OLLAMA_NUM_PARALLEL=2 — handle 2 concurrent requests
|
|
# OLLAMA_KEEP_ALIVE=-1 — default keep forever
|
|
# OLLAMA_MAX_LOADED_MODELS=3 — keep up to 3 models hot
|
|
# OLLAMA_FLASH_ATTENTION=1 — faster attention
|
|
ssh $SSH_OPTS "$host" "sudo mkdir -p /etc/systemd/system/ollama.service.d && \
|
|
sudo tee /etc/systemd/system/ollama.service.d/speed.conf > /dev/null << 'UNIT'
|
|
[Service]
|
|
Environment=\"OLLAMA_KEEP_ALIVE=-1\"
|
|
Environment=\"OLLAMA_NUM_PARALLEL=2\"
|
|
Environment=\"OLLAMA_MAX_LOADED_MODELS=3\"
|
|
Environment=\"OLLAMA_FLASH_ATTENTION=1\"
|
|
UNIT
|
|
sudo systemctl daemon-reload && sudo systemctl restart ollama" 2>/dev/null
|
|
|
|
sleep 2
|
|
local status
|
|
status=$(ssh $SSH_OPTS "$host" "systemctl is-active ollama 2>/dev/null || echo dead")
|
|
if [ "$status" = "active" ]; then
|
|
ok "$name — optimized (keep_alive=-1, flash_attention, parallel=2, max_loaded=3)"
|
|
else
|
|
err "$name — ollama failed to restart after config"
|
|
fi
|
|
}
|
|
|
|
# ── Deploy Ollama bridge config for chat.blackroad.io routing ──
|
|
deploy_chat_config() {
|
|
echo -e "\n ${PINK}Deploying chat fleet config...${RESET}"
|
|
|
|
# Write fleet endpoint map that chat worker can use
|
|
cat > /tmp/fleet-models.json << 'JSON'
|
|
{
|
|
"endpoints": {
|
|
"cecilia": "https://ollama.blackroad.io",
|
|
"lucidia": "https://ai.blackroad.io"
|
|
},
|
|
"routing": {
|
|
"qwen3:8b": "cecilia",
|
|
"qwen2.5-coder:3b": "cecilia",
|
|
"codellama:7b": "cecilia",
|
|
"deepseek-coder:1.3b": "cecilia",
|
|
"llama3:8b-instruct-q4_K_M": "cecilia",
|
|
"llama3.2:3b": "cecilia",
|
|
"deepseek-r1:1.5b": "cecilia",
|
|
"cece:latest": "cecilia",
|
|
"cece2:latest": "cecilia",
|
|
"tinyllama:latest": "cecilia",
|
|
"lucidia:latest": "lucidia",
|
|
"qwen2.5:3b": "lucidia",
|
|
"qwen2.5:1.5b": "lucidia"
|
|
},
|
|
"defaults": {
|
|
"fast": "llama3.2:3b",
|
|
"code": "qwen2.5-coder:3b",
|
|
"instant": "tinyllama:latest",
|
|
"reasoning": "qwen3:8b",
|
|
"custom": "cece:latest"
|
|
}
|
|
}
|
|
JSON
|
|
ok "fleet model routing map: /tmp/fleet-models.json"
|
|
}
|
|
|
|
# ── Main ──
|
|
header
|
|
|
|
case "${1:---warm}" in
|
|
--status)
|
|
echo -e "${AMBER}Fleet AI Status${RESET}"
|
|
for node in cecilia lucidia alice octavia; do
|
|
node_ollama_status "$node" || true
|
|
done
|
|
;;
|
|
|
|
--fix)
|
|
echo -e "${AMBER}Fixing Ollama on all nodes${RESET}"
|
|
for node in cecilia lucidia alice; do
|
|
node_fix "$node" || true
|
|
done
|
|
;;
|
|
|
|
--optimize)
|
|
echo -e "${AMBER}Optimizing Ollama speed config${RESET}"
|
|
for node in cecilia lucidia; do
|
|
node_optimize "$node" || true
|
|
done
|
|
;;
|
|
|
|
--warm)
|
|
echo -e "${AMBER}Optimizing + Warming Fleet${RESET}"
|
|
|
|
# Step 1: Optimize Ollama config on working nodes
|
|
for node in cecilia lucidia; do
|
|
node_optimize "$node" || true
|
|
done
|
|
|
|
# Step 2: Pre-warm models
|
|
for node in cecilia lucidia; do
|
|
node_warm "$node" || true
|
|
done
|
|
|
|
# Step 3: Deploy routing config
|
|
deploy_chat_config
|
|
|
|
echo -e "\n${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}"
|
|
echo -e "${GREEN} Fleet warmed. Models pinned. Zero cold starts.${RESET}"
|
|
echo -e "${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}\n"
|
|
;;
|
|
|
|
--all)
|
|
echo -e "${AMBER}Full bootstrap: fix → optimize → warm${RESET}"
|
|
|
|
# Fix broken nodes first
|
|
for node in cecilia lucidia alice; do
|
|
node_fix "$node" || true
|
|
done
|
|
|
|
# Optimize
|
|
for node in cecilia lucidia alice; do
|
|
node_optimize "$node" || true
|
|
done
|
|
|
|
# Warm
|
|
for node in cecilia lucidia alice; do
|
|
node_warm "$node" || true
|
|
done
|
|
|
|
deploy_chat_config
|
|
|
|
echo -e "\n${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}"
|
|
echo -e "${GREEN} Full fleet bootstrap complete.${RESET}"
|
|
echo -e "${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${RESET}\n"
|
|
;;
|
|
|
|
*)
|
|
echo "Usage: $0 [--status|--warm|--fix|--optimize|--all]"
|
|
echo ""
|
|
echo " --status Check Ollama status on all nodes"
|
|
echo " --warm Optimize config + pre-load models into VRAM (default)"
|
|
echo " --fix Reinstall Ollama where broken"
|
|
echo " --optimize Push speed config (keep_alive, flash_attention)"
|
|
echo " --all Fix + optimize + warm everything"
|
|
;;
|
|
esac
|