mirror of
https://github.com/blackboxprogramming/BlackRoad-Operating-System.git
synced 2026-03-17 06:57:17 -05:00
Co-authored-by: blackboxprogramming <118287761+blackboxprogramming@users.noreply.github.com>
146 lines
5.2 KiB
Python
146 lines
5.2 KiB
Python
"""Ollama local LLM router – proxies requests to a local Ollama instance.
|
||
|
||
Usage
|
||
-----
|
||
Start Ollama locally::
|
||
|
||
ollama serve # defaults to http://localhost:11434
|
||
ollama pull llama3 # pull a model
|
||
|
||
Then call::
|
||
|
||
POST /api/ollama/chat
|
||
POST /api/ollama/generate
|
||
GET /api/ollama/models
|
||
GET /api/ollama/health
|
||
"""
|
||
|
||
from fastapi import APIRouter, HTTPException
|
||
from pydantic import BaseModel
|
||
from typing import Optional, List, Dict, Any
|
||
import httpx
|
||
|
||
from app.config import settings
|
||
|
||
router = APIRouter(prefix="/api/ollama", tags=["Ollama"])
|
||
|
||
|
||
# ────────────────────────────────────────────────────────────────────────────
|
||
# Schemas
|
||
# ────────────────────────────────────────────────────────────────────────────
|
||
|
||
class OllamaChatMessage(BaseModel):
|
||
role: str # "user" | "assistant" | "system"
|
||
content: str
|
||
|
||
|
||
class OllamaChatRequest(BaseModel):
|
||
model: Optional[str] = None
|
||
messages: List[OllamaChatMessage]
|
||
stream: bool = False
|
||
options: Optional[Dict[str, Any]] = None
|
||
|
||
|
||
class OllamaGenerateRequest(BaseModel):
|
||
model: Optional[str] = None
|
||
prompt: str
|
||
stream: bool = False
|
||
options: Optional[Dict[str, Any]] = None
|
||
|
||
|
||
# ────────────────────────────────────────────────────────────────────────────
|
||
# Helpers
|
||
# ────────────────────────────────────────────────────────────────────────────
|
||
|
||
def _get_base_url() -> str:
|
||
return settings.OLLAMA_BASE_URL.rstrip("/")
|
||
|
||
|
||
def _get_model(model: Optional[str]) -> str:
|
||
return model or settings.OLLAMA_DEFAULT_MODEL
|
||
|
||
|
||
async def _proxy(method: str, path: str, payload: dict) -> dict:
|
||
url = f"{_get_base_url()}{path}"
|
||
try:
|
||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||
resp = await client.request(method, url, json=payload)
|
||
resp.raise_for_status()
|
||
return resp.json()
|
||
except httpx.ConnectError:
|
||
raise HTTPException(
|
||
status_code=503,
|
||
detail=(
|
||
f"Cannot reach Ollama at {_get_base_url()}. "
|
||
"Make sure Ollama is running locally: `ollama serve`"
|
||
),
|
||
)
|
||
except httpx.HTTPStatusError as exc:
|
||
raise HTTPException(status_code=exc.response.status_code, detail="Ollama request failed")
|
||
|
||
|
||
# ────────────────────────────────────────────────────────────────────────────
|
||
# Endpoints
|
||
# ────────────────────────────────────────────────────────────────────────────
|
||
|
||
@router.get("/health")
|
||
async def ollama_health():
|
||
"""Check whether the local Ollama daemon is reachable."""
|
||
url = f"{_get_base_url()}/api/tags"
|
||
try:
|
||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||
resp = await client.get(url)
|
||
resp.raise_for_status()
|
||
return {"status": "ok", "base_url": _get_base_url()}
|
||
except Exception:
|
||
return {"status": "unreachable", "base_url": _get_base_url(), "error": "Ollama daemon not reachable"}
|
||
|
||
|
||
@router.get("/models")
|
||
async def list_models():
|
||
"""List models available in the local Ollama instance."""
|
||
url = f"{_get_base_url()}/api/tags"
|
||
try:
|
||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||
resp = await client.get(url)
|
||
resp.raise_for_status()
|
||
return resp.json()
|
||
except httpx.ConnectError:
|
||
raise HTTPException(
|
||
status_code=503,
|
||
detail=f"Cannot reach Ollama at {_get_base_url()}. Run `ollama serve` first.",
|
||
)
|
||
|
||
|
||
@router.post("/chat")
|
||
async def ollama_chat(req: OllamaChatRequest):
|
||
"""Send a chat completion request to the local Ollama instance.
|
||
|
||
Example::
|
||
|
||
curl -X POST /api/ollama/chat \\
|
||
-H 'Content-Type: application/json' \\
|
||
-d '{"messages": [{"role": "user", "content": "Hello!"}]}'
|
||
"""
|
||
payload: Dict[str, Any] = {
|
||
"model": _get_model(req.model),
|
||
"messages": [m.model_dump() for m in req.messages],
|
||
"stream": False,
|
||
}
|
||
if req.options:
|
||
payload["options"] = req.options
|
||
return await _proxy("POST", "/api/chat", payload)
|
||
|
||
|
||
@router.post("/generate")
|
||
async def ollama_generate(req: OllamaGenerateRequest):
|
||
"""Send a raw generation request to the local Ollama instance."""
|
||
payload: Dict[str, Any] = {
|
||
"model": _get_model(req.model),
|
||
"prompt": req.prompt,
|
||
"stream": False,
|
||
}
|
||
if req.options:
|
||
payload["options"] = req.options
|
||
return await _proxy("POST", "/api/generate", payload)
|