Some checks failed
Lint & Format / detect (push) Failing after 32s
Monorepo Lint / lint-shell (push) Failing after 31s
Monorepo Lint / lint-js (push) Failing after 30s
Lint & Format / js-lint (push) Has been skipped
Lint & Format / py-lint (push) Has been skipped
Lint & Format / sh-lint (push) Has been skipped
Lint & Format / go-lint (push) Has been skipped
RoadChain-SHA2048: 692327ce2e990f37 RoadChain-Identity: alexa@sovereign RoadChain-Full: 692327ce2e990f37649b83e948241ac858c0d07146c6b42043e4770d638c44d5bada5639ad82c7aa8911d7042912c1d75b6bbce9a453637621b3903dc912a3a9537696cedf7a0870e3bf962ca44677793082aaae5c5433615885ad20fab1e80417202d11e93284483551ba9558f06809d2f3fa53c00a657277d7c183abe3ba187c1af6856a455071771757cca67ff2b74c5f855f23dd8cc8f5b3596c966b2344361fcbb74843e9d9d9ad66c5321ef64ce787f9d255d11e0d4e0ee571af4e09697964e22f6f629a11279b315c9a4563860b169ad93fa500b485297516ef2ba2039f76348c0d547cfa182e9b0bccee73f5e8b7db7e33d61e8199bb4464c2c30d03
208 lines
7.2 KiB
Python
208 lines
7.2 KiB
Python
"""
|
|
BlackRoad Agent Orchestrator — Recurring Jobs & Worker Integrations
|
|
Schedule agents to do real work: reindex search, collect analytics, monitor fleet.
|
|
"""
|
|
import asyncio
|
|
import time
|
|
import logging
|
|
import aiohttp
|
|
from dataclasses import dataclass
|
|
from .nats_protocol import NATSBus, TaskMessage
|
|
|
|
log = logging.getLogger("orchestrator.jobs")
|
|
|
|
|
|
@dataclass
|
|
class RecurringJob:
|
|
name: str
|
|
archetype: str
|
|
prompt: str
|
|
interval_seconds: int
|
|
intent: str = "scheduled"
|
|
enabled: bool = True
|
|
last_run: float = 0.0
|
|
run_count: int = 0
|
|
|
|
|
|
# Built-in recurring jobs
|
|
RECURRING_JOBS = [
|
|
RecurringJob(
|
|
name="fleet-health-check",
|
|
archetype="security", # Uses deepseek-r1:1.5b (fast)
|
|
prompt="Check BlackRoad infrastructure health. List which services are up or down. Be brief — one line per service.",
|
|
interval_seconds=600, # Every 10 minutes
|
|
),
|
|
RecurringJob(
|
|
name="security-scan",
|
|
archetype="security",
|
|
prompt="Brief security check: any concerns with the BlackRoad fleet? Check auth, network, and access patterns. 3 bullet points max.",
|
|
interval_seconds=3600, # Every hour
|
|
),
|
|
RecurringJob(
|
|
name="code-index-refresh",
|
|
archetype="coder", # Uses qwen2.5-coder:3b (medium speed)
|
|
prompt="What are the most important recent changes across BlackRoad repos? Summarize in 5 bullet points.",
|
|
interval_seconds=1800, # Every 30 minutes
|
|
),
|
|
RecurringJob(
|
|
name="analytics-digest",
|
|
archetype="security", # Fast model for simple analysis
|
|
prompt="Summarize BlackRoad OS usage: estimated active users, top services, any anomalies. Keep it to 3 lines.",
|
|
interval_seconds=3600, # Every hour
|
|
),
|
|
RecurringJob(
|
|
name="creative-brief",
|
|
archetype="security", # Use fast model
|
|
prompt="Write one motivational sentence about building sovereign infrastructure. Keep it under 20 words.",
|
|
interval_seconds=7200, # Every 2 hours
|
|
enabled=False, # Disabled by default — nice-to-have
|
|
),
|
|
]
|
|
|
|
|
|
class JobScheduler:
|
|
"""Runs recurring agent jobs on schedule."""
|
|
|
|
def __init__(self, bus: NATSBus):
|
|
self.bus = bus
|
|
self.jobs = {j.name: j for j in RECURRING_JOBS}
|
|
self._running = False
|
|
|
|
async def start(self):
|
|
"""Start the job scheduler loop. Staggers initial runs to avoid thundering herd."""
|
|
self._running = True
|
|
# Stagger initial runs — offset each job by 60s
|
|
offset = 0
|
|
for job in self.jobs.values():
|
|
job.last_run = time.time() + offset # Delay initial run
|
|
offset += 60
|
|
log.info("Job scheduler started with %d jobs (staggered)", len(self.jobs))
|
|
|
|
async def stop(self):
|
|
self._running = False
|
|
|
|
async def _run_loop(self):
|
|
while self._running:
|
|
now = time.time()
|
|
for job in self.jobs.values():
|
|
if not job.enabled:
|
|
continue
|
|
if now - job.last_run >= job.interval_seconds:
|
|
await self._execute_job(job)
|
|
await asyncio.sleep(30) # Check every 30s
|
|
|
|
async def _execute_job(self, job: RecurringJob):
|
|
"""Submit a job as a task to the orchestrator."""
|
|
task = TaskMessage(
|
|
task_id=f"job-{job.name}-{int(time.time())}",
|
|
archetype=job.archetype,
|
|
intent=job.intent,
|
|
prompt=job.prompt,
|
|
priority=7, # Lower priority than user tasks
|
|
)
|
|
await self.bus.publish_task(task)
|
|
job.last_run = time.time()
|
|
job.run_count += 1
|
|
log.info("Job %s submitted (run #%d)", job.name, job.run_count)
|
|
|
|
def list_jobs(self) -> list[dict]:
|
|
return [
|
|
{
|
|
"name": j.name,
|
|
"archetype": j.archetype,
|
|
"interval": j.interval_seconds,
|
|
"enabled": j.enabled,
|
|
"last_run": j.last_run,
|
|
"run_count": j.run_count,
|
|
}
|
|
for j in self.jobs.values()
|
|
]
|
|
|
|
def toggle_job(self, name: str) -> bool:
|
|
job = self.jobs.get(name)
|
|
if not job:
|
|
return False
|
|
job.enabled = not job.enabled
|
|
log.info("Job %s %s", name, "enabled" if job.enabled else "disabled")
|
|
return True
|
|
|
|
|
|
class WorkerIntegration:
|
|
"""Connect agent tasks to real Cloudflare Workers."""
|
|
|
|
def __init__(self):
|
|
self._session: aiohttp.ClientSession | None = None
|
|
|
|
async def init(self):
|
|
self._session = aiohttp.ClientSession()
|
|
|
|
async def close(self):
|
|
if self._session:
|
|
await self._session.close()
|
|
|
|
async def trigger_search_reindex(self, source: str = "github") -> dict:
|
|
"""Trigger search index rebuild via the index Worker."""
|
|
async with self._session.post(
|
|
f"https://index.blackroad.io/api/index?source={source}",
|
|
timeout=aiohttp.ClientTimeout(total=30),
|
|
) as resp:
|
|
return await resp.json()
|
|
|
|
async def get_fleet_status(self) -> dict:
|
|
"""Pull fleet status from the fleet API."""
|
|
async with self._session.get(
|
|
"https://fleet-api.amundsonalexa.workers.dev/fleet",
|
|
timeout=aiohttp.ClientTimeout(total=10),
|
|
) as resp:
|
|
return await resp.json()
|
|
|
|
async def get_search_stats(self) -> dict:
|
|
"""Pull search stats."""
|
|
async with self._session.get(
|
|
"https://search.blackroad.io/api/stats",
|
|
timeout=aiohttp.ClientTimeout(total=5),
|
|
) as resp:
|
|
return await resp.json()
|
|
|
|
async def get_analytics(self) -> dict:
|
|
"""Pull analytics summary."""
|
|
async with self._session.get(
|
|
"https://analytics.blackroad.io/api/stats",
|
|
timeout=aiohttp.ClientTimeout(total=5),
|
|
) as resp:
|
|
return await resp.json()
|
|
|
|
async def check_all_workers(self) -> dict:
|
|
"""Health check all Workers in parallel."""
|
|
endpoints = {
|
|
"auth": "https://auth.blackroad.io/api/health",
|
|
"pay": "https://pay.blackroad.io/health",
|
|
"search": "https://search.blackroad.io/api/health",
|
|
"portal": "https://portal.blackroad.io/api/health",
|
|
"chat": "https://chat.blackroad.io/api/health",
|
|
"images": "https://images.blackroad.io/api/health",
|
|
"index": "https://index.blackroad.io/api/health",
|
|
"analytics": "https://analytics.blackroad.io/api/health",
|
|
"stats": "https://stats.blackroad.io/health",
|
|
"agents": "https://agents.blackroad.io/health",
|
|
"fleet": "https://fleet.blackroad.io/health",
|
|
}
|
|
|
|
results = {}
|
|
tasks = []
|
|
for name, url in endpoints.items():
|
|
tasks.append(self._check_one(name, url))
|
|
|
|
for coro in asyncio.as_completed(tasks):
|
|
name, status = await coro
|
|
results[name] = status
|
|
|
|
return results
|
|
|
|
async def _check_one(self, name: str, url: str) -> tuple[str, str]:
|
|
try:
|
|
async with self._session.get(url, timeout=aiohttp.ClientTimeout(total=5)) as resp:
|
|
return (name, "up" if resp.status == 200 else "down")
|
|
except Exception:
|
|
return (name, "down")
|