fix: health-check workflow, add Ollama local LLM, revive active README with dynamic stats

Co-authored-by: blackboxprogramming <118287761+blackboxprogramming@users.noreply.github.com>
2026-03-17 00:57:12 -05:00 · 2026-03-03 05:28:15 +00:00
parent 3132853f58
commit dce3811506
7 changed files with 403 additions and 22 deletions
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -77,6 +77,10 @@ class Settings(BaseSettings):
    GITHUB_TOKEN: str = ""
    GITHUB_WEBHOOK_SECRET: str = ""

+    # Ollama local LLM
+    OLLAMA_BASE_URL: str = "http://localhost:11434"
+    OLLAMA_DEFAULT_MODEL: str = "llama3"
+
    class Config:
        env_file = ".env"
        case_sensitive = True
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -18,7 +18,7 @@ from app.routers import (
    railway, vercel, stripe, twilio, slack, discord, sentry, api_health, agents,
    capture, identity_center, notifications_center, creator, compliance_ops,
    search, cloudflare, system, webhooks, prism_static, ip_vault, leitl, cognition,
-    cece, br95
+    cece, br95, ollama
 )
 from app.services.crypto import rotate_plaintext_wallet_keys

@@ -181,6 +181,8 @@ app.include_router(webhooks.router)
 # BR-95 Desktop OS Data APIs + WebSocket
 app.include_router(br95.router)

+# Ollama local LLM proxy
+app.include_router(ollama.router)

 # Prism Console (Phase 2.5) - Admin interface at /prism
 prism_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "prism-console")
--- a/backend/app/routers/ollama.py
+++ b/backend/app/routers/ollama.py
@@ -0,0 +1,145 @@
+"""Ollama local LLM router – proxies requests to a local Ollama instance.
+
+Usage
+-----
+Start Ollama locally::
+
+    ollama serve          # defaults to http://localhost:11434
+    ollama pull llama3    # pull a model
+
+Then call::
+
+    POST /api/ollama/chat
+    POST /api/ollama/generate
+    GET  /api/ollama/models
+    GET  /api/ollama/health
+"""
+
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+from typing import Optional, List, Dict, Any
+import httpx
+
+from app.config import settings
+
+router = APIRouter(prefix="/api/ollama", tags=["Ollama"])
+
+
+# ────────────────────────────────────────────────────────────────────────────
+# Schemas
+# ────────────────────────────────────────────────────────────────────────────
+
+class OllamaChatMessage(BaseModel):
+    role: str  # "user" | "assistant" | "system"
+    content: str
+
+
+class OllamaChatRequest(BaseModel):
+    model: Optional[str] = None
+    messages: List[OllamaChatMessage]
+    stream: bool = False
+    options: Optional[Dict[str, Any]] = None
+
+
+class OllamaGenerateRequest(BaseModel):
+    model: Optional[str] = None
+    prompt: str
+    stream: bool = False
+    options: Optional[Dict[str, Any]] = None
+
+
+# ────────────────────────────────────────────────────────────────────────────
+# Helpers
+# ────────────────────────────────────────────────────────────────────────────
+
+def _get_base_url() -> str:
+    return settings.OLLAMA_BASE_URL.rstrip("/")
+
+
+def _get_model(model: Optional[str]) -> str:
+    return model or settings.OLLAMA_DEFAULT_MODEL
+
+
+async def _proxy(method: str, path: str, payload: dict) -> dict:
+    url = f"{_get_base_url()}{path}"
+    try:
+        async with httpx.AsyncClient(timeout=120.0) as client:
+            resp = await client.request(method, url, json=payload)
+            resp.raise_for_status()
+            return resp.json()
+    except httpx.ConnectError:
+        raise HTTPException(
+            status_code=503,
+            detail=(
+                f"Cannot reach Ollama at {_get_base_url()}. "
+                "Make sure Ollama is running locally: `ollama serve`"
+            ),
+        )
+    except httpx.HTTPStatusError as exc:
+        raise HTTPException(status_code=exc.response.status_code, detail="Ollama request failed")
+
+
+# ────────────────────────────────────────────────────────────────────────────
+# Endpoints
+# ────────────────────────────────────────────────────────────────────────────
+
+@router.get("/health")
+async def ollama_health():
+    """Check whether the local Ollama daemon is reachable."""
+    url = f"{_get_base_url()}/api/tags"
+    try:
+        async with httpx.AsyncClient(timeout=5.0) as client:
+            resp = await client.get(url)
+            resp.raise_for_status()
+            return {"status": "ok", "base_url": _get_base_url()}
+    except Exception:
+        return {"status": "unreachable", "base_url": _get_base_url(), "error": "Ollama daemon not reachable"}
+
+
+@router.get("/models")
+async def list_models():
+    """List models available in the local Ollama instance."""
+    url = f"{_get_base_url()}/api/tags"
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            resp = await client.get(url)
+            resp.raise_for_status()
+            return resp.json()
+    except httpx.ConnectError:
+        raise HTTPException(
+            status_code=503,
+            detail=f"Cannot reach Ollama at {_get_base_url()}. Run `ollama serve` first.",
+        )
+
+
+@router.post("/chat")
+async def ollama_chat(req: OllamaChatRequest):
+    """Send a chat completion request to the local Ollama instance.
+
+    Example::
+
+        curl -X POST /api/ollama/chat \\
+          -H 'Content-Type: application/json' \\
+          -d '{"messages": [{"role": "user", "content": "Hello!"}]}'
+    """
+    payload: Dict[str, Any] = {
+        "model": _get_model(req.model),
+        "messages": [m.model_dump() for m in req.messages],
+        "stream": False,
+    }
+    if req.options:
+        payload["options"] = req.options
+    return await _proxy("POST", "/api/chat", payload)
+
+
+@router.post("/generate")
+async def ollama_generate(req: OllamaGenerateRequest):
+    """Send a raw generation request to the local Ollama instance."""
+    payload: Dict[str, Any] = {
+        "model": _get_model(req.model),
+        "prompt": req.prompt,
+        "stream": False,
+    }
+    if req.options:
+        payload["options"] = req.options
+    return await _proxy("POST", "/api/generate", payload)