blackroad-operating-system/backend/app/routers/ollama.py

"""Ollama local LLM router – proxies requests to a local Ollama instance.

Usage
-----
Start Ollama locally::

    ollama serve          # defaults to http://localhost:11434
    ollama pull llama3    # pull a model

Then call::

    POST /api/ollama/chat
    POST /api/ollama/generate
    GET  /api/ollama/models
    GET  /api/ollama/health
"""

from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from typing import Optional, List, Dict, Any
import httpx

from app.config import settings

router = APIRouter(prefix="/api/ollama", tags=["Ollama"])


# ────────────────────────────────────────────────────────────────────────────
# Schemas
# ────────────────────────────────────────────────────────────────────────────

class OllamaChatMessage(BaseModel):
    role: str  # "user" | "assistant" | "system"
    content: str


class OllamaChatRequest(BaseModel):
    model: Optional[str] = None
    messages: List[OllamaChatMessage]
    stream: bool = False
    options: Optional[Dict[str, Any]] = None


class OllamaGenerateRequest(BaseModel):
    model: Optional[str] = None
    prompt: str
    stream: bool = False
    options: Optional[Dict[str, Any]] = None


# ────────────────────────────────────────────────────────────────────────────
# Helpers
# ────────────────────────────────────────────────────────────────────────────

def _get_base_url() -> str:
    return settings.OLLAMA_BASE_URL.rstrip("/")


def _get_model(model: Optional[str]) -> str:
    return model or settings.OLLAMA_DEFAULT_MODEL


async def _proxy(method: str, path: str, payload: dict) -> dict:
    url = f"{_get_base_url()}{path}"
    try:
        async with httpx.AsyncClient(timeout=120.0) as client:
            resp = await client.request(method, url, json=payload)
            resp.raise_for_status()
            return resp.json()
    except httpx.ConnectError:
        raise HTTPException(
            status_code=503,
            detail=(
                f"Cannot reach Ollama at {_get_base_url()}. "
                "Make sure Ollama is running locally: `ollama serve`"
            ),
        )
    except httpx.HTTPStatusError as exc:
        raise HTTPException(status_code=exc.response.status_code, detail="Ollama request failed")


# ────────────────────────────────────────────────────────────────────────────
# Endpoints
# ────────────────────────────────────────────────────────────────────────────

@router.get("/health")
async def ollama_health():
    """Check whether the local Ollama daemon is reachable."""
    url = f"{_get_base_url()}/api/tags"
    try:
        async with httpx.AsyncClient(timeout=5.0) as client:
            resp = await client.get(url)
            resp.raise_for_status()
            return {"status": "ok", "base_url": _get_base_url()}
    except Exception:
        return {"status": "unreachable", "base_url": _get_base_url(), "error": "Ollama daemon not reachable"}


@router.get("/models")
async def list_models():
    """List models available in the local Ollama instance."""
    url = f"{_get_base_url()}/api/tags"
    try:
        async with httpx.AsyncClient(timeout=10.0) as client:
            resp = await client.get(url)
            resp.raise_for_status()
            return resp.json()
    except httpx.ConnectError:
        raise HTTPException(
            status_code=503,
            detail=f"Cannot reach Ollama at {_get_base_url()}. Run `ollama serve` first.",
        )


@router.post("/chat")
async def ollama_chat(req: OllamaChatRequest):
    """Send a chat completion request to the local Ollama instance.

    Example::

        curl -X POST /api/ollama/chat \\
          -H 'Content-Type: application/json' \\
          -d '{"messages": [{"role": "user", "content": "Hello!"}]}'
    """
    payload: Dict[str, Any] = {
        "model": _get_model(req.model),
        "messages": [m.model_dump() for m in req.messages],
        "stream": False,
    }
    if req.options:
        payload["options"] = req.options
    return await _proxy("POST", "/api/chat", payload)


@router.post("/generate")
async def ollama_generate(req: OllamaGenerateRequest):
    """Send a raw generation request to the local Ollama instance."""
    payload: Dict[str, Any] = {
        "model": _get_model(req.model),
        "prompt": req.prompt,
        "stream": False,
    }
    if req.options:
        payload["options"] = req.options
    return await _proxy("POST", "/api/generate", payload)