Initial monorepo — everything BlackRoad in one place
bin/ 230 CLI tools (ask-*, br-*, agent-*, roadid, carpool) scripts/ 99 automation scripts fleet/ Node configs and deployment workers/ Cloudflare Worker sources (roadpay, road-search, squad webhooks) roadc/ RoadC programming language roadnet/ Mesh network (5 APs, WireGuard) operator/ Memory system scripts config/ System configs dotfiles/ Shell configs docs/ Documentation BlackRoad OS — Pave Tomorrow. RoadChain-SHA2048: d1a24f55318d338b RoadChain-Identity: alexa@sovereign RoadChain-Full: d1a24f55318d338b24b60bad7be39286379c76ae5470817482100cb0ddbbcb97e147d07ac7243da0a9f0363e4e5c833d612b9c0df3a3cd20802465420278ef74875a5b77f55af6fe42a931b8b635b3d0d0b6bde9abf33dc42eea52bc03c951406d8cbe49f1a3d29b26a94dade05e9477f34a7d4d4c6ec4005c3c2ac54e73a68440c512c8e83fd9b1fe234750b898ef8f4032c23db173961fe225e67a0432b5293a9714f76c5c57ed5fdf35b9fb40fd73c03ebf88b7253c6a0575f5afb6a6b49b3bda310602fb1ef676859962dad2aebbb2875814b30eee0a8ba195e482d4cbc91d8819e7f38f6db53e8063401649c77bb994371473cabfb917fb53e8cbe73d60
This commit is contained in:
397
bin/br-infer
Executable file
397
bin/br-infer
Executable file
@@ -0,0 +1,397 @@
|
||||
#!/usr/bin/env bash
|
||||
# ============================================================================
|
||||
# BLACKROAD OS, INC. - PROPRIETARY AND CONFIDENTIAL
|
||||
# Copyright (c) 2025-2026 BlackRoad OS, Inc. All Rights Reserved.
|
||||
# ============================================================================
|
||||
# br-infer - Production AI inference across the fleet
|
||||
# Smart routing with health checks, retries, and metrics
|
||||
# Usage: br-infer [options] "prompt"
|
||||
set -eo pipefail
|
||||
|
||||
source "$HOME/.blackroad/config/nodes.sh" 2>/dev/null || true
|
||||
|
||||
INFER_DIR="$HOME/.blackroad/inference"
|
||||
INFER_DB="$INFER_DIR/inference.db"
|
||||
CACHE_DIR="$INFER_DIR/cache"
|
||||
mkdir -p "$INFER_DIR" "$CACHE_DIR"
|
||||
|
||||
_sql() { sqlite3 "$INFER_DB" "$@" 2>/dev/null; }
|
||||
_sql_escape() { echo "$1" | sed "s/'/''/g"; }
|
||||
|
||||
OLLAMA_NODES=(cecilia lucidia alice)
|
||||
DEFAULT_MODEL="${BR_DEFAULT_MODEL:-llama3.2}"
|
||||
MAX_RETRIES=2
|
||||
TIMEOUT_S=120
|
||||
|
||||
init_db() {
|
||||
_sql <<'SQL'
|
||||
CREATE TABLE IF NOT EXISTS inferences (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
node TEXT,
|
||||
model TEXT,
|
||||
prompt_len INTEGER,
|
||||
response_len INTEGER,
|
||||
latency_ms INTEGER,
|
||||
status TEXT,
|
||||
cached INTEGER DEFAULT 0
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS node_health (
|
||||
node TEXT PRIMARY KEY,
|
||||
last_check DATETIME,
|
||||
status TEXT DEFAULT 'unknown',
|
||||
avg_latency_ms REAL DEFAULT 0,
|
||||
success_count INTEGER DEFAULT 0,
|
||||
fail_count INTEGER DEFAULT 0,
|
||||
models TEXT
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_infer_ts ON inferences(timestamp);
|
||||
CREATE INDEX IF NOT EXISTS idx_infer_node ON inferences(node);
|
||||
PRAGMA journal_mode=WAL;
|
||||
PRAGMA busy_timeout=5000;
|
||||
SQL
|
||||
}
|
||||
|
||||
# Health check all nodes, update DB
|
||||
cmd_health() {
|
||||
printf '%bChecking fleet inference health...%b\n\n' "$AMBER" "$RESET"
|
||||
printf ' %-12s %-8s %-10s %-8s %s\n' "NODE" "STATUS" "LATENCY" "MODELS" "ENDPOINT"
|
||||
printf ' %-12s %-8s %-10s %-8s %s\n' "────" "──────" "───────" "──────" "────────"
|
||||
|
||||
for node in "${OLLAMA_NODES[@]}"; do
|
||||
local ip="${NODE_IP[$node]:-}"
|
||||
[[ -z "$ip" ]] && continue
|
||||
|
||||
printf ' %-12s ' "$node"
|
||||
|
||||
local start_ms=$(date +%s%3N 2>/dev/null || python3 -c 'import time; print(int(time.time()*1000))')
|
||||
local tags
|
||||
tags=$(curl -sf --connect-timeout 3 --max-time 5 "http://${ip}:11434/api/tags" 2>/dev/null)
|
||||
local end_ms=$(date +%s%3N 2>/dev/null || python3 -c 'import time; print(int(time.time()*1000))')
|
||||
local lat=$((end_ms - start_ms))
|
||||
|
||||
if [[ -n "$tags" ]]; then
|
||||
local count
|
||||
count=$(echo "$tags" | jq '.models | length' 2>/dev/null || echo "0")
|
||||
local model_list
|
||||
model_list=$(echo "$tags" | jq -r '[.models[].name] | join(",")' 2>/dev/null || echo "")
|
||||
|
||||
printf '%b%-8s%b %-10s %-8s %s\n' "$GREEN" "UP" "$RESET" "${lat}ms" "$count" "http://${ip}:11434"
|
||||
|
||||
_sql "INSERT OR REPLACE INTO node_health (node, last_check, status, avg_latency_ms, models)
|
||||
VALUES ('$node', datetime('now'), 'up', $lat, '$(_sql_escape "$model_list")')"
|
||||
else
|
||||
printf '%b%-8s%b %-10s %-8s %s\n' "$RED" "DOWN" "$RESET" "—" "—" "http://${ip}:11434"
|
||||
|
||||
_sql "INSERT OR REPLACE INTO node_health (node, last_check, status)
|
||||
VALUES ('$node', datetime('now'), 'down')"
|
||||
fi
|
||||
done
|
||||
|
||||
# Also check localhost
|
||||
printf ' %-12s ' "localhost"
|
||||
if curl -sf --connect-timeout 1 "http://localhost:11434/api/tags" &>/dev/null; then
|
||||
printf '%b%-8s%b\n' "$GREEN" "UP" "$RESET"
|
||||
else
|
||||
printf '%b%-8s%b\n' "$AMBER" "N/A" "$RESET"
|
||||
fi
|
||||
echo
|
||||
}
|
||||
|
||||
# Find best available node (least latency, healthy)
|
||||
find_best_node() {
|
||||
local model="$1"
|
||||
|
||||
# Check local first
|
||||
if curl -sf --connect-timeout 1 "http://localhost:11434/api/tags" &>/dev/null; then
|
||||
echo "localhost:localhost"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Try nodes by health record (best latency first)
|
||||
local best_node=""
|
||||
local best_ip=""
|
||||
local best_lat=999999
|
||||
|
||||
for node in "${OLLAMA_NODES[@]}"; do
|
||||
local ip="${NODE_IP[$node]:-}"
|
||||
[[ -z "$ip" ]] && continue
|
||||
|
||||
# Quick health check
|
||||
if curl -sf --connect-timeout 2 "http://${ip}:11434/api/tags" &>/dev/null; then
|
||||
local cached_lat
|
||||
cached_lat=$(_sql "SELECT avg_latency_ms FROM node_health WHERE node='$node' AND status='up'" 2>/dev/null)
|
||||
cached_lat="${cached_lat:-500}"
|
||||
|
||||
if [[ "${cached_lat%.*}" -lt "$best_lat" ]]; then
|
||||
best_lat="${cached_lat%.*}"
|
||||
best_node="$node"
|
||||
best_ip="$ip"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ -n "$best_node" ]]; then
|
||||
echo "${best_node}:${best_ip}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
# Core inference
|
||||
do_infer() {
|
||||
local model="$1"
|
||||
local prompt="$2"
|
||||
local node_info="$3"
|
||||
|
||||
local node="${node_info%%:*}"
|
||||
local ip="${node_info#*:}"
|
||||
|
||||
local host="http://${ip}:11434"
|
||||
[[ "$ip" == "localhost" ]] && host="http://localhost:11434"
|
||||
|
||||
local start_ms=$(date +%s%3N 2>/dev/null || python3 -c 'import time; print(int(time.time()*1000))')
|
||||
|
||||
local response
|
||||
response=$(curl -sf --max-time "$TIMEOUT_S" "${host}/api/generate" \
|
||||
-d "$(jq -n --arg m "$model" --arg p "$prompt" '{model: $m, prompt: $p, stream: false}')" 2>/dev/null)
|
||||
|
||||
local end_ms=$(date +%s%3N 2>/dev/null || python3 -c 'import time; print(int(time.time()*1000))')
|
||||
local lat=$((end_ms - start_ms))
|
||||
|
||||
local text
|
||||
text=$(echo "$response" | jq -r '.response // empty' 2>/dev/null)
|
||||
|
||||
if [[ -n "$text" ]]; then
|
||||
# Record success
|
||||
_sql "INSERT INTO inferences (node, model, prompt_len, response_len, latency_ms, status)
|
||||
VALUES ('$node', '$(_sql_escape "$model")', ${#prompt}, ${#text}, $lat, 'success')"
|
||||
_sql "UPDATE node_health SET success_count = success_count + 1,
|
||||
avg_latency_ms = (avg_latency_ms * success_count + $lat) / (success_count + 1)
|
||||
WHERE node='$node'"
|
||||
echo "$text"
|
||||
return 0
|
||||
else
|
||||
_sql "INSERT INTO inferences (node, model, prompt_len, response_len, latency_ms, status)
|
||||
VALUES ('$node', '$(_sql_escape "$model")', ${#prompt}, 0, $lat, 'fail')"
|
||||
_sql "UPDATE node_health SET fail_count = fail_count + 1 WHERE node='$node'"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Main query with retries and fallback
|
||||
cmd_query() {
|
||||
local model="$DEFAULT_MODEL"
|
||||
local system_prompt=""
|
||||
local json_output=false
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-m|--model) model="$2"; shift 2 ;;
|
||||
-s|--system) system_prompt="$2"; shift 2 ;;
|
||||
-j|--json) json_output=true; shift ;;
|
||||
*) break ;;
|
||||
esac
|
||||
done
|
||||
|
||||
local prompt="$*"
|
||||
|
||||
# Read from stdin if no prompt
|
||||
if [[ -z "$prompt" && ! -t 0 ]]; then
|
||||
prompt=$(cat)
|
||||
fi
|
||||
|
||||
[[ -z "$prompt" ]] && { echo "Usage: br-infer \"prompt\"" >&2; return 1; }
|
||||
|
||||
# Prepend system prompt if given
|
||||
[[ -n "$system_prompt" ]] && prompt="$system_prompt\n\n$prompt"
|
||||
|
||||
local attempt=0
|
||||
local tried_nodes=()
|
||||
|
||||
while [[ $attempt -le $MAX_RETRIES ]]; do
|
||||
local node_info
|
||||
node_info=$(find_best_node "$model") || {
|
||||
printf '%bNo inference nodes available%b\n' "$RED" "$RESET" >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
local node="${node_info%%:*}"
|
||||
|
||||
# Skip already-tried nodes
|
||||
local skip=false
|
||||
for tried in "${tried_nodes[@]}"; do
|
||||
[[ "$tried" == "$node" ]] && skip=true
|
||||
done
|
||||
$skip && { ((attempt++)); continue; }
|
||||
|
||||
tried_nodes+=("$node")
|
||||
|
||||
[[ $attempt -gt 0 ]] && printf '%bRetrying on %s...%b\n' "$AMBER" "$node" "$RESET" >&2
|
||||
printf '%b→ %s%b (model: %s)\n' "$PINK" "$node" "$RESET" "$model" >&2
|
||||
|
||||
local result
|
||||
if result=$(do_infer "$model" "$prompt" "$node_info"); then
|
||||
if $json_output; then
|
||||
jq -n --arg node "$node" --arg model "$model" --arg response "$result" \
|
||||
'{node: $node, model: $model, response: $response}'
|
||||
else
|
||||
echo "$result"
|
||||
fi
|
||||
return 0
|
||||
fi
|
||||
|
||||
((attempt++))
|
||||
done
|
||||
|
||||
printf '%bAll inference attempts failed%b\n' "$RED" "$RESET" >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
# Batch inference
|
||||
cmd_batch() {
|
||||
local model="$DEFAULT_MODEL"
|
||||
local input_file=""
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-m|--model) model="$2"; shift 2 ;;
|
||||
*) input_file="$1"; shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
[[ -z "$input_file" || ! -f "$input_file" ]] && {
|
||||
echo "Usage: br-infer batch [-m model] <file>" >&2
|
||||
echo " File should contain one prompt per line" >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
local total=0 success=0 fail=0
|
||||
while IFS= read -r line; do
|
||||
[[ -z "$line" || "$line" == "#"* ]] && continue
|
||||
((total++))
|
||||
|
||||
printf '%b[%d] %b' "$AMBER" "$total" "$RESET" >&2
|
||||
if result=$(cmd_query -m "$model" "$line" 2>/dev/null); then
|
||||
((success++))
|
||||
printf '%bOK%b\n' "$GREEN" "$RESET" >&2
|
||||
echo "---"
|
||||
echo "PROMPT: $line"
|
||||
echo "RESPONSE: $result"
|
||||
echo ""
|
||||
else
|
||||
((fail++))
|
||||
printf '%bFAIL%b\n' "$RED" "$RESET" >&2
|
||||
fi
|
||||
done < "$input_file"
|
||||
|
||||
printf '\n%bBatch complete: %d/%d succeeded%b\n' "$PINK" "$success" "$total" "$RESET" >&2
|
||||
}
|
||||
|
||||
# Stats
|
||||
cmd_stats() {
|
||||
printf '%b╔══════════════════════════════════════════╗%b\n' "$PINK" "$RESET"
|
||||
printf '%b║ Inference Statistics ║%b\n' "$PINK" "$RESET"
|
||||
printf '%b╚══════════════════════════════════════════╝%b\n\n' "$PINK" "$RESET"
|
||||
|
||||
local total=$(_sql "SELECT COUNT(*) FROM inferences")
|
||||
local success=$(_sql "SELECT COUNT(*) FROM inferences WHERE status='success'")
|
||||
local avg_lat=$(_sql "SELECT CAST(AVG(latency_ms) AS INTEGER) FROM inferences WHERE status='success'")
|
||||
local total_tokens=$(_sql "SELECT SUM(response_len) FROM inferences WHERE status='success'")
|
||||
|
||||
printf ' Total requests: %s\n' "${total:-0}"
|
||||
printf ' Success rate: '
|
||||
if [[ "${total:-0}" -gt 0 ]]; then
|
||||
printf '%s%%\n' "$(echo "scale=1; ${success:-0} * 100 / $total" | bc 2>/dev/null || echo "?")"
|
||||
else
|
||||
printf 'N/A\n'
|
||||
fi
|
||||
printf ' Avg latency: %sms\n' "${avg_lat:-0}"
|
||||
printf ' Total chars out: %s\n\n' "${total_tokens:-0}"
|
||||
|
||||
printf ' %bBy node:%b\n' "$BLUE" "$RESET"
|
||||
_sql "SELECT node, COUNT(*), CAST(AVG(latency_ms) AS INTEGER), SUM(CASE WHEN status='success' THEN 1 ELSE 0 END) FROM inferences GROUP BY node ORDER BY COUNT(*) DESC" | \
|
||||
while IFS='|' read -r node cnt avg_l succ; do
|
||||
printf ' %-12s %4d reqs avg:%4dms ok:%d\n' "$node" "$cnt" "$avg_l" "$succ"
|
||||
done
|
||||
|
||||
echo ""
|
||||
printf ' %bBy model:%b\n' "$BLUE" "$RESET"
|
||||
_sql "SELECT model, COUNT(*), CAST(AVG(latency_ms) AS INTEGER) FROM inferences WHERE status='success' GROUP BY model ORDER BY COUNT(*) DESC" | \
|
||||
while IFS='|' read -r model cnt avg_l; do
|
||||
printf ' %-25s %4d reqs avg:%4dms\n' "$model" "$cnt" "$avg_l"
|
||||
done
|
||||
|
||||
echo ""
|
||||
printf ' %bLast 10:%b\n' "$BLUE" "$RESET"
|
||||
_sql "SELECT node, model, latency_ms, status, timestamp FROM inferences ORDER BY id DESC LIMIT 10" | \
|
||||
while IFS='|' read -r node model lat status ts; do
|
||||
local icon="+"
|
||||
[[ "$status" == "fail" ]] && icon="x"
|
||||
printf ' %s %-10s %-20s %5dms %s\n' "$icon" "$node" "$model" "$lat" "$ts"
|
||||
done
|
||||
echo
|
||||
}
|
||||
|
||||
# List available models across fleet
|
||||
cmd_models() {
|
||||
printf '%bFleet Models:%b\n\n' "$PINK" "$RESET"
|
||||
printf ' %-30s %s\n' "MODEL" "AVAILABLE ON"
|
||||
printf ' %-30s %s\n' "─────" "────────────"
|
||||
|
||||
declare -A model_nodes
|
||||
for node in "${OLLAMA_NODES[@]}"; do
|
||||
local ip="${NODE_IP[$node]:-}"
|
||||
[[ -z "$ip" ]] && continue
|
||||
|
||||
local tags
|
||||
tags=$(curl -sf --connect-timeout 3 "http://${ip}:11434/api/tags" 2>/dev/null) || continue
|
||||
|
||||
echo "$tags" | jq -r '.models[].name' 2>/dev/null | while read -r m; do
|
||||
echo "${m}|${node}"
|
||||
done
|
||||
done | sort | while IFS='|' read -r m n; do
|
||||
printf ' %-30s %s\n' "$m" "$n"
|
||||
done
|
||||
echo
|
||||
}
|
||||
|
||||
usage() {
|
||||
cat <<EOF
|
||||
${PINK}br-infer${RESET} - Production AI inference across the fleet
|
||||
|
||||
${BLUE}USAGE:${RESET}
|
||||
br-infer [options] "prompt" Run inference
|
||||
br-infer batch <file> Batch inference (one prompt per line)
|
||||
br-infer health Check fleet health
|
||||
br-infer models List available models
|
||||
br-infer stats Inference statistics
|
||||
|
||||
${BLUE}OPTIONS:${RESET}
|
||||
-m, --model MODEL Model to use (default: ${DEFAULT_MODEL})
|
||||
-s, --system TEXT System prompt
|
||||
-j, --json JSON output
|
||||
|
||||
${GREEN}EXAMPLES:${RESET}
|
||||
br-infer "explain TCP in one sentence"
|
||||
br-infer -m mistral "write a haiku about code"
|
||||
br-infer -j "what is 2+2"
|
||||
echo "hello" | br-infer
|
||||
br-infer batch prompts.txt
|
||||
br-infer health
|
||||
br-infer stats
|
||||
EOF
|
||||
}
|
||||
|
||||
[[ -f "$INFER_DB" ]] || init_db
|
||||
|
||||
case "${1:-}" in
|
||||
health|h) cmd_health ;;
|
||||
stats|s) cmd_stats ;;
|
||||
models|m) cmd_models ;;
|
||||
batch|b) shift; cmd_batch "$@" ;;
|
||||
-h|--help|help) usage ;;
|
||||
"") usage ;;
|
||||
*) cmd_query "$@" ;;
|
||||
esac
|
||||
Reference in New Issue
Block a user