Files
blackroad/bin/br-infer
Alexa Amundson 78fbe80f2a Initial monorepo — everything BlackRoad in one place
bin/       230 CLI tools (ask-*, br-*, agent-*, roadid, carpool)
scripts/   99 automation scripts
fleet/     Node configs and deployment
workers/   Cloudflare Worker sources (roadpay, road-search, squad webhooks)
roadc/     RoadC programming language
roadnet/   Mesh network (5 APs, WireGuard)
operator/  Memory system scripts
config/    System configs
dotfiles/  Shell configs
docs/      Documentation

BlackRoad OS — Pave Tomorrow.

RoadChain-SHA2048: d1a24f55318d338b
RoadChain-Identity: alexa@sovereign
RoadChain-Full: d1a24f55318d338b24b60bad7be39286379c76ae5470817482100cb0ddbbcb97e147d07ac7243da0a9f0363e4e5c833d612b9c0df3a3cd20802465420278ef74875a5b77f55af6fe42a931b8b635b3d0d0b6bde9abf33dc42eea52bc03c951406d8cbe49f1a3d29b26a94dade05e9477f34a7d4d4c6ec4005c3c2ac54e73a68440c512c8e83fd9b1fe234750b898ef8f4032c23db173961fe225e67a0432b5293a9714f76c5c57ed5fdf35b9fb40fd73c03ebf88b7253c6a0575f5afb6a6b49b3bda310602fb1ef676859962dad2aebbb2875814b30eee0a8ba195e482d4cbc91d8819e7f38f6db53e8063401649c77bb994371473cabfb917fb53e8cbe73d60
2026-03-14 17:08:41 -05:00

398 lines
12 KiB
Bash
Executable File

#!/usr/bin/env bash
# ============================================================================
# BLACKROAD OS, INC. - PROPRIETARY AND CONFIDENTIAL
# Copyright (c) 2025-2026 BlackRoad OS, Inc. All Rights Reserved.
# ============================================================================
# br-infer - Production AI inference across the fleet
# Smart routing with health checks, retries, and metrics
# Usage: br-infer [options] "prompt"
set -eo pipefail
source "$HOME/.blackroad/config/nodes.sh" 2>/dev/null || true
INFER_DIR="$HOME/.blackroad/inference"
INFER_DB="$INFER_DIR/inference.db"
CACHE_DIR="$INFER_DIR/cache"
mkdir -p "$INFER_DIR" "$CACHE_DIR"
_sql() { sqlite3 "$INFER_DB" "$@" 2>/dev/null; }
_sql_escape() { echo "$1" | sed "s/'/''/g"; }
OLLAMA_NODES=(cecilia lucidia alice)
DEFAULT_MODEL="${BR_DEFAULT_MODEL:-llama3.2}"
MAX_RETRIES=2
TIMEOUT_S=120
init_db() {
_sql <<'SQL'
CREATE TABLE IF NOT EXISTS inferences (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
node TEXT,
model TEXT,
prompt_len INTEGER,
response_len INTEGER,
latency_ms INTEGER,
status TEXT,
cached INTEGER DEFAULT 0
);
CREATE TABLE IF NOT EXISTS node_health (
node TEXT PRIMARY KEY,
last_check DATETIME,
status TEXT DEFAULT 'unknown',
avg_latency_ms REAL DEFAULT 0,
success_count INTEGER DEFAULT 0,
fail_count INTEGER DEFAULT 0,
models TEXT
);
CREATE INDEX IF NOT EXISTS idx_infer_ts ON inferences(timestamp);
CREATE INDEX IF NOT EXISTS idx_infer_node ON inferences(node);
PRAGMA journal_mode=WAL;
PRAGMA busy_timeout=5000;
SQL
}
# Health check all nodes, update DB
cmd_health() {
printf '%bChecking fleet inference health...%b\n\n' "$AMBER" "$RESET"
printf ' %-12s %-8s %-10s %-8s %s\n' "NODE" "STATUS" "LATENCY" "MODELS" "ENDPOINT"
printf ' %-12s %-8s %-10s %-8s %s\n' "────" "──────" "───────" "──────" "────────"
for node in "${OLLAMA_NODES[@]}"; do
local ip="${NODE_IP[$node]:-}"
[[ -z "$ip" ]] && continue
printf ' %-12s ' "$node"
local start_ms=$(date +%s%3N 2>/dev/null || python3 -c 'import time; print(int(time.time()*1000))')
local tags
tags=$(curl -sf --connect-timeout 3 --max-time 5 "http://${ip}:11434/api/tags" 2>/dev/null)
local end_ms=$(date +%s%3N 2>/dev/null || python3 -c 'import time; print(int(time.time()*1000))')
local lat=$((end_ms - start_ms))
if [[ -n "$tags" ]]; then
local count
count=$(echo "$tags" | jq '.models | length' 2>/dev/null || echo "0")
local model_list
model_list=$(echo "$tags" | jq -r '[.models[].name] | join(",")' 2>/dev/null || echo "")
printf '%b%-8s%b %-10s %-8s %s\n' "$GREEN" "UP" "$RESET" "${lat}ms" "$count" "http://${ip}:11434"
_sql "INSERT OR REPLACE INTO node_health (node, last_check, status, avg_latency_ms, models)
VALUES ('$node', datetime('now'), 'up', $lat, '$(_sql_escape "$model_list")')"
else
printf '%b%-8s%b %-10s %-8s %s\n' "$RED" "DOWN" "$RESET" "—" "—" "http://${ip}:11434"
_sql "INSERT OR REPLACE INTO node_health (node, last_check, status)
VALUES ('$node', datetime('now'), 'down')"
fi
done
# Also check localhost
printf ' %-12s ' "localhost"
if curl -sf --connect-timeout 1 "http://localhost:11434/api/tags" &>/dev/null; then
printf '%b%-8s%b\n' "$GREEN" "UP" "$RESET"
else
printf '%b%-8s%b\n' "$AMBER" "N/A" "$RESET"
fi
echo
}
# Find best available node (least latency, healthy)
find_best_node() {
local model="$1"
# Check local first
if curl -sf --connect-timeout 1 "http://localhost:11434/api/tags" &>/dev/null; then
echo "localhost:localhost"
return 0
fi
# Try nodes by health record (best latency first)
local best_node=""
local best_ip=""
local best_lat=999999
for node in "${OLLAMA_NODES[@]}"; do
local ip="${NODE_IP[$node]:-}"
[[ -z "$ip" ]] && continue
# Quick health check
if curl -sf --connect-timeout 2 "http://${ip}:11434/api/tags" &>/dev/null; then
local cached_lat
cached_lat=$(_sql "SELECT avg_latency_ms FROM node_health WHERE node='$node' AND status='up'" 2>/dev/null)
cached_lat="${cached_lat:-500}"
if [[ "${cached_lat%.*}" -lt "$best_lat" ]]; then
best_lat="${cached_lat%.*}"
best_node="$node"
best_ip="$ip"
fi
fi
done
if [[ -n "$best_node" ]]; then
echo "${best_node}:${best_ip}"
return 0
fi
return 1
}
# Core inference
do_infer() {
local model="$1"
local prompt="$2"
local node_info="$3"
local node="${node_info%%:*}"
local ip="${node_info#*:}"
local host="http://${ip}:11434"
[[ "$ip" == "localhost" ]] && host="http://localhost:11434"
local start_ms=$(date +%s%3N 2>/dev/null || python3 -c 'import time; print(int(time.time()*1000))')
local response
response=$(curl -sf --max-time "$TIMEOUT_S" "${host}/api/generate" \
-d "$(jq -n --arg m "$model" --arg p "$prompt" '{model: $m, prompt: $p, stream: false}')" 2>/dev/null)
local end_ms=$(date +%s%3N 2>/dev/null || python3 -c 'import time; print(int(time.time()*1000))')
local lat=$((end_ms - start_ms))
local text
text=$(echo "$response" | jq -r '.response // empty' 2>/dev/null)
if [[ -n "$text" ]]; then
# Record success
_sql "INSERT INTO inferences (node, model, prompt_len, response_len, latency_ms, status)
VALUES ('$node', '$(_sql_escape "$model")', ${#prompt}, ${#text}, $lat, 'success')"
_sql "UPDATE node_health SET success_count = success_count + 1,
avg_latency_ms = (avg_latency_ms * success_count + $lat) / (success_count + 1)
WHERE node='$node'"
echo "$text"
return 0
else
_sql "INSERT INTO inferences (node, model, prompt_len, response_len, latency_ms, status)
VALUES ('$node', '$(_sql_escape "$model")', ${#prompt}, 0, $lat, 'fail')"
_sql "UPDATE node_health SET fail_count = fail_count + 1 WHERE node='$node'"
return 1
fi
}
# Main query with retries and fallback
cmd_query() {
local model="$DEFAULT_MODEL"
local system_prompt=""
local json_output=false
while [[ $# -gt 0 ]]; do
case "$1" in
-m|--model) model="$2"; shift 2 ;;
-s|--system) system_prompt="$2"; shift 2 ;;
-j|--json) json_output=true; shift ;;
*) break ;;
esac
done
local prompt="$*"
# Read from stdin if no prompt
if [[ -z "$prompt" && ! -t 0 ]]; then
prompt=$(cat)
fi
[[ -z "$prompt" ]] && { echo "Usage: br-infer \"prompt\"" >&2; return 1; }
# Prepend system prompt if given
[[ -n "$system_prompt" ]] && prompt="$system_prompt\n\n$prompt"
local attempt=0
local tried_nodes=()
while [[ $attempt -le $MAX_RETRIES ]]; do
local node_info
node_info=$(find_best_node "$model") || {
printf '%bNo inference nodes available%b\n' "$RED" "$RESET" >&2
return 1
}
local node="${node_info%%:*}"
# Skip already-tried nodes
local skip=false
for tried in "${tried_nodes[@]}"; do
[[ "$tried" == "$node" ]] && skip=true
done
$skip && { ((attempt++)); continue; }
tried_nodes+=("$node")
[[ $attempt -gt 0 ]] && printf '%bRetrying on %s...%b\n' "$AMBER" "$node" "$RESET" >&2
printf '%b→ %s%b (model: %s)\n' "$PINK" "$node" "$RESET" "$model" >&2
local result
if result=$(do_infer "$model" "$prompt" "$node_info"); then
if $json_output; then
jq -n --arg node "$node" --arg model "$model" --arg response "$result" \
'{node: $node, model: $model, response: $response}'
else
echo "$result"
fi
return 0
fi
((attempt++))
done
printf '%bAll inference attempts failed%b\n' "$RED" "$RESET" >&2
return 1
}
# Batch inference
cmd_batch() {
local model="$DEFAULT_MODEL"
local input_file=""
while [[ $# -gt 0 ]]; do
case "$1" in
-m|--model) model="$2"; shift 2 ;;
*) input_file="$1"; shift ;;
esac
done
[[ -z "$input_file" || ! -f "$input_file" ]] && {
echo "Usage: br-infer batch [-m model] <file>" >&2
echo " File should contain one prompt per line" >&2
return 1
}
local total=0 success=0 fail=0
while IFS= read -r line; do
[[ -z "$line" || "$line" == "#"* ]] && continue
((total++))
printf '%b[%d] %b' "$AMBER" "$total" "$RESET" >&2
if result=$(cmd_query -m "$model" "$line" 2>/dev/null); then
((success++))
printf '%bOK%b\n' "$GREEN" "$RESET" >&2
echo "---"
echo "PROMPT: $line"
echo "RESPONSE: $result"
echo ""
else
((fail++))
printf '%bFAIL%b\n' "$RED" "$RESET" >&2
fi
done < "$input_file"
printf '\n%bBatch complete: %d/%d succeeded%b\n' "$PINK" "$success" "$total" "$RESET" >&2
}
# Stats
cmd_stats() {
printf '%b╔══════════════════════════════════════════╗%b\n' "$PINK" "$RESET"
printf '%b║ Inference Statistics ║%b\n' "$PINK" "$RESET"
printf '%b╚══════════════════════════════════════════╝%b\n\n' "$PINK" "$RESET"
local total=$(_sql "SELECT COUNT(*) FROM inferences")
local success=$(_sql "SELECT COUNT(*) FROM inferences WHERE status='success'")
local avg_lat=$(_sql "SELECT CAST(AVG(latency_ms) AS INTEGER) FROM inferences WHERE status='success'")
local total_tokens=$(_sql "SELECT SUM(response_len) FROM inferences WHERE status='success'")
printf ' Total requests: %s\n' "${total:-0}"
printf ' Success rate: '
if [[ "${total:-0}" -gt 0 ]]; then
printf '%s%%\n' "$(echo "scale=1; ${success:-0} * 100 / $total" | bc 2>/dev/null || echo "?")"
else
printf 'N/A\n'
fi
printf ' Avg latency: %sms\n' "${avg_lat:-0}"
printf ' Total chars out: %s\n\n' "${total_tokens:-0}"
printf ' %bBy node:%b\n' "$BLUE" "$RESET"
_sql "SELECT node, COUNT(*), CAST(AVG(latency_ms) AS INTEGER), SUM(CASE WHEN status='success' THEN 1 ELSE 0 END) FROM inferences GROUP BY node ORDER BY COUNT(*) DESC" | \
while IFS='|' read -r node cnt avg_l succ; do
printf ' %-12s %4d reqs avg:%4dms ok:%d\n' "$node" "$cnt" "$avg_l" "$succ"
done
echo ""
printf ' %bBy model:%b\n' "$BLUE" "$RESET"
_sql "SELECT model, COUNT(*), CAST(AVG(latency_ms) AS INTEGER) FROM inferences WHERE status='success' GROUP BY model ORDER BY COUNT(*) DESC" | \
while IFS='|' read -r model cnt avg_l; do
printf ' %-25s %4d reqs avg:%4dms\n' "$model" "$cnt" "$avg_l"
done
echo ""
printf ' %bLast 10:%b\n' "$BLUE" "$RESET"
_sql "SELECT node, model, latency_ms, status, timestamp FROM inferences ORDER BY id DESC LIMIT 10" | \
while IFS='|' read -r node model lat status ts; do
local icon="+"
[[ "$status" == "fail" ]] && icon="x"
printf ' %s %-10s %-20s %5dms %s\n' "$icon" "$node" "$model" "$lat" "$ts"
done
echo
}
# List available models across fleet
cmd_models() {
printf '%bFleet Models:%b\n\n' "$PINK" "$RESET"
printf ' %-30s %s\n' "MODEL" "AVAILABLE ON"
printf ' %-30s %s\n' "─────" "────────────"
declare -A model_nodes
for node in "${OLLAMA_NODES[@]}"; do
local ip="${NODE_IP[$node]:-}"
[[ -z "$ip" ]] && continue
local tags
tags=$(curl -sf --connect-timeout 3 "http://${ip}:11434/api/tags" 2>/dev/null) || continue
echo "$tags" | jq -r '.models[].name' 2>/dev/null | while read -r m; do
echo "${m}|${node}"
done
done | sort | while IFS='|' read -r m n; do
printf ' %-30s %s\n' "$m" "$n"
done
echo
}
usage() {
cat <<EOF
${PINK}br-infer${RESET} - Production AI inference across the fleet
${BLUE}USAGE:${RESET}
br-infer [options] "prompt" Run inference
br-infer batch <file> Batch inference (one prompt per line)
br-infer health Check fleet health
br-infer models List available models
br-infer stats Inference statistics
${BLUE}OPTIONS:${RESET}
-m, --model MODEL Model to use (default: ${DEFAULT_MODEL})
-s, --system TEXT System prompt
-j, --json JSON output
${GREEN}EXAMPLES:${RESET}
br-infer "explain TCP in one sentence"
br-infer -m mistral "write a haiku about code"
br-infer -j "what is 2+2"
echo "hello" | br-infer
br-infer batch prompts.txt
br-infer health
br-infer stats
EOF
}
[[ -f "$INFER_DB" ]] || init_db
case "${1:-}" in
health|h) cmd_health ;;
stats|s) cmd_stats ;;
models|m) cmd_models ;;
batch|b) shift; cmd_batch "$@" ;;
-h|--help|help) usage ;;
"") usage ;;
*) cmd_query "$@" ;;
esac