Remove all external AI provider integrations, route to self-hosted Ollama on Pi cluster

- Remove OpenAI and Anthropic API keys, SDKs, and config from entire codebase - Replace OPENAI_API_KEY/ANTHROPIC_API_KEY with OLLAMA_BASE_URL everywhere - Update backend routers (ai_chat, dashboard, api_health, system) to use Ollama - Update all 8 GitHub workflows: remove claude/** and codex/** branch triggers, keep copilot/** - Update label-pr.yml: replace claude-auto/atlas-auto/codex-auto with copilot-auto/lucidia-auto/owner-auto - Update GITHUB_AUTOMATION_RULES.md for self-hosted routing (@copilot, @lucidia, @blackboxprogramming) - Update Lucidia implementation plan: all models now self-hosted (Llama 3, CodeLlama, Mistral, Phi-3) - Update all env templates, infra manifests, and deployment docs - Update validation script to remove OpenAI/Anthropic from required/sensitive key sets - Zero external AI provider dependency — fully self-hosted on Pi cluster https://claude.ai/code/session_01HwpePGmEVSmn8N3xBtn66E
2026-03-17 23:34:00 -05:00 · 2026-03-03 09:56:57 +00:00
parent 402c7272e7
commit 136443c87f
30 changed files with 133 additions and 171 deletions
--- a/.github/workflows/backend-tests.yml
+++ b/.github/workflows/backend-tests.yml
@@ -2,7 +2,7 @@ name: Backend Tests & API Connectivity
 on:
  push:
-    branches: ["main", "claude/**"]
+    branches: ["main", "copilot/**"]
  pull_request:
    branches: ["main"]
  workflow_dispatch:
@@ -126,7 +126,6 @@ jobs:
                  "Slack API": "https://slack.com/api",
                  "Discord API": "https://discord.com/api/v10",
                  "Sentry API": "https://sentry.io/api/0",
                  "OpenAI API": "https://api.openai.com",
                  "HuggingFace API": "https://huggingface.co/api",
                  "DigitalOcean API": "https://api.digitalocean.com",
              }
--- a/.github/workflows/cece-audit.yml
+++ b/.github/workflows/cece-audit.yml
@@ -2,7 +2,7 @@ name: Cece System Audit
 on:
  push:
-    branches: [main, claude/**]
+    branches: [main, copilot/**]
  pull_request:
    branches: [main]
  workflow_dispatch:  # Allow manual trigger
--- a/.github/workflows/core-os-tests.yml
+++ b/.github/workflows/core-os-tests.yml
@@ -2,7 +2,7 @@ name: Core OS Tests
 on:
  push:
-    branches: [ main, claude/** ]
+    branches: [ main, copilot/** ]
    paths:
      - 'core_os/**'
      - '.github/workflows/core-os-tests.yml'
--- a/.github/workflows/docs-build.yml
+++ b/.github/workflows/docs-build.yml
@@ -2,7 +2,7 @@ name: Documentation Build
 on:
  push:
-    branches: [ main, claude/** ]
+    branches: [ main, copilot/** ]
    paths:
      - 'codex-docs/**'
      - '.github/workflows/docs-build.yml'
--- a/.github/workflows/label-pr.yml
+++ b/.github/workflows/label-pr.yml
@@ -37,17 +37,17 @@ jobs:
        id: accumulate-labels
        run: |
          LABELS=""
-          # Claude PR
+          # Copilot PR
-          if [[ "${GITHUB_HEAD_REF}" == claude/* || "${GITHUB_ACTOR}" == "claude-code[bot]" ]]; then
+          if [[ "${GITHUB_HEAD_REF}" == copilot/* ]]; then
-            LABELS="${LABELS} claude-auto"
+            LABELS="${LABELS} copilot-auto"
          fi
-          # Atlas PR
+          # Lucidia PR
-          if [[ "${GITHUB_HEAD_REF}" == atlas/* || "${GITHUB_ACTOR}" == "atlas[bot]" ]]; then
+          if [[ "${GITHUB_HEAD_REF}" == lucidia/* ]]; then
-            LABELS="${LABELS} atlas-auto"
+            LABELS="${LABELS} lucidia-auto"
          fi
-          # Codex PR
+          # Owner PR (@blackboxprogramming)
-          if [[ "${GITHUB_HEAD_REF}" == codex/* || "${GITHUB_ACTOR}" == "codex[bot]" ]]; then
+          if [[ "${GITHUB_ACTOR}" == "blackboxprogramming" ]]; then
-            LABELS="${LABELS} codex-auto"
+            LABELS="${LABELS} owner-auto"
          fi
          # Docs-only
          FILES=$(gh pr view ${{ github.event.pull_request.number }} --json files --jq '.files[].path')
--- a/.github/workflows/operator-tests.yml
+++ b/.github/workflows/operator-tests.yml
@@ -2,7 +2,7 @@ name: Operator Engine Tests
 on:
  push:
-    branches: [ main, claude/** ]
+    branches: [ main, copilot/** ]
    paths:
      - 'operator_engine/**'
      - '.github/workflows/operator-tests.yml'
--- a/.github/workflows/railway-automation.yml
+++ b/.github/workflows/railway-automation.yml
@@ -2,7 +2,7 @@ name: Railway Secrets & Automation Audit
 on:
  push:
-    branches: ["main", "claude/**"]
+    branches: ["main", "copilot/**"]
  pull_request:
    branches: ["main"]
  schedule:
--- a/.github/workflows/test-orchestrator.yml
+++ b/.github/workflows/test-orchestrator.yml
@@ -2,7 +2,7 @@ name: Test Orchestrator - All Suites
 on:
  push:
-    branches: ["main", "claude/**", "copilot/**", "codex/**"]
+    branches: ["main", "copilot/**"]
  pull_request:
    branches: ["main"]
  workflow_dispatch:
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -573,12 +573,10 @@ document.body.appendChild(card);
 **Branch Strategy**:
 - `main` - Production branch (protected)
- `claude/*` - AI assistant branches (auto-created)
+- `copilot/*` - Copilot AI branches (auto-created)
- `codex/*`, `copilot/*` - Other AI branches
+- `lucidia/*` - Lucidia orchestration branches
 - `feature/*`, `fix/*`, `docs/*` - Human developer branches
 **Current Branch**: `claude/claude-md-mi3wes6waq9fnfmt-012kKpHTPTV5D9YBFB7ufuwy`
 **Commit Guidelines**:
 ```bash
 # Good commit messages
@@ -646,8 +644,8 @@ ALLOWED_ORIGINS=http://localhost:8000,https://yourdomain.com
 # Blockchain
 WALLET_MASTER_KEY=your-wallet-master-key
-# AI
+# Self-hosted AI (Ollama on Pi cluster)
-OPENAI_API_KEY=sk-...
+OLLAMA_BASE_URL=http://localhost:11434
 # Cloud (optional)
 AWS_ACCESS_KEY_ID=...
--- a/CORE_ROUTERS.md
+++ b/CORE_ROUTERS.md
@@ -228,7 +228,7 @@ agents, api_health, browser, capture, cloudflare, compliance_ops, creator, dashb
 - `SECRET_KEY` (auth)
 - `DATABASE_URL` (all routers)
 - `REDIS_URL` (sessions, caching)
- `OPENAI_API_KEY` (ai_chat)
+- `OLLAMA_BASE_URL` (ai_chat — self-hosted)
 - `SMTP_*` variables (email - optional if email is stub)
 - `AWS_*` variables (files - optional if files is stub)
--- a/DEPLOYMENT_NOTES.md
+++ b/DEPLOYMENT_NOTES.md
@@ -59,9 +59,8 @@ WALLET_MASTER_KEY=<generate-with-openssl-rand-hex-32>
 ### Optional Variables (Add as Needed)
 ```bash
-# AI Integration
+# Self-hosted AI (Ollama on Pi cluster)
-OPENAI_API_KEY=sk-...
+OLLAMA_BASE_URL=http://localhost:11434
 ANTHROPIC_API_KEY=sk-ant-...
 # External Services
 GITHUB_TOKEN=ghp_...
--- a/ENV_VARS.md
+++ b/ENV_VARS.md
@@ -278,32 +278,19 @@ WALLET_MASTER_KEY=your-generated-master-key-here
 ## API Integrations
-### OPENAI_API_KEY
+### OLLAMA_BASE_URL
 **Required:** For AI features (Lucidia, agents)
-**Description:** OpenAI API key for GPT models
+**Description:** URL of self-hosted Ollama instance running on Pi cluster
 **Default:** `http://localhost:11434`
-**How to get:**
+**Setup:**
-1. Go to https://platform.openai.com/api-keys
+1. Install Ollama on your Pi cluster or local machine
-2. Create new secret key
+2. Pull desired models (e.g., `ollama pull llama3`)
-3. Copy key (starts with `sk-`)
+3. Set the base URL to your Ollama instance
 **Example:**
 ```bash
-OPENAI_API_KEY=sk-proj-1234567890abcdef...
+OLLAMA_BASE_URL=http://192.168.1.100:11434
 ```
 ### ANTHROPIC_API_KEY
 **Required:** For Claude integration
 **Description:** Anthropic API key for Claude models
 **How to get:**
 1. Go to https://console.anthropic.com/settings/keys
 2. Create new API key
 3. Copy key
 **Example:**
 ```bash
 ANTHROPIC_API_KEY=sk-ant-1234567890abcdef...
 ```
 ### GITHUB_TOKEN
@@ -648,9 +635,8 @@ ACCESS_TOKEN_EXPIRE_MINUTES=30
 REFRESH_TOKEN_EXPIRE_DAYS=7
 WALLET_MASTER_KEY=[generate-unique-32-char-string]
-# AI (add when ready)
+# Self-hosted AI (Ollama on Pi cluster)
-# OPENAI_API_KEY=sk-proj-...
+# OLLAMA_BASE_URL=http://localhost:11434
 # ANTHROPIC_API_KEY=sk-ant-...
 # Observability (add when ready)
 # SENTRY_DSN=https://...
@@ -677,9 +663,8 @@ ACCESS_TOKEN_EXPIRE_MINUTES=120
 REFRESH_TOKEN_EXPIRE_DAYS=30
 WALLET_MASTER_KEY=local-dev-wallet-key
-# AI (optional - use your own keys)
+# Self-hosted AI (Ollama on Pi cluster)
-# OPENAI_API_KEY=sk-...
+# OLLAMA_BASE_URL=http://localhost:11434
 # ANTHROPIC_API_KEY=sk-ant-...
 ```
 ### GitHub Actions Secrets
--- a/GITHUB_AUTOMATION_RULES.md
+++ b/GITHUB_AUTOMATION_RULES.md
@@ -85,17 +85,17 @@ Applied by `.github/labeler.yml` action on PR open/update.
 | Label | Applied When | Purpose |
 |-------|--------------|---------|
-| `claude-auto` | Author is `claude-code[bot]` or branch starts with `claude/` | Claude-generated PR |
+| `copilot-auto` | Branch starts with `copilot/` | Copilot-generated PR |
-| `atlas-auto` | Author is `atlas[bot]` or branch starts with `atlas/` | Atlas-generated PR |
+| `lucidia-auto` | Branch starts with `lucidia/` | Lucidia-generated PR |
-| `codex-auto` | Author is `codex[bot]` or branch starts with `codex/` | Codex-generated PR |
+| `owner-auto` | Author is `blackboxprogramming` | Owner-generated PR |
 | `dependabot` | Author is `dependabot[bot]` | Dependency update PR |
 **Implementation**:
 ```yaml
 # .github/workflows/label-author.yml
- name: Label Claude PRs
+- name: Label Copilot PRs
-  if: startsWith(github.head_ref, 'claude/') || github.actor == 'claude-code[bot]'
+  if: startsWith(github.head_ref, 'copilot/')
-  run: gh pr edit ${{ github.event.pull_request.number }} --add-label "claude-auto"
+  run: gh pr edit ${{ github.event.pull_request.number }} --add-label "copilot-auto"
 ```
 ### Manual Labels
@@ -174,10 +174,10 @@ jobs:
 ✅ **Action**: Auto-approve with human notification
 ✅ **Approver**: `scaffold-bot`
-#### Tier 4: AI-Generated (Claude/Atlas)
+#### Tier 4: Self-Hosted AI / Copilot Generated
-✅ **Condition**: PR from AI agent
+✅ **Condition**: PR from Copilot, Lucidia, or owner
- Labels: `claude-auto`, `atlas-auto`, or `codex-auto`
+- Labels: `copilot-auto`, `lucidia-auto`, or `owner-auto`
 - Required checks: **All** CI checks pass
 - Max size: 500 lines (larger needs human review)
 - No `breaking-change` or `security` labels
@@ -195,7 +195,7 @@ on:
 jobs:
  approve:
    if: |
-      contains(github.event.pull_request.labels.*.name, 'claude-auto') &&
+      contains(github.event.pull_request.labels.*.name, 'copilot-auto') &&
      github.event.state == 'success' &&
      !contains(github.event.pull_request.labels.*.name, 'breaking-change')
    runs-on: ubuntu-latest
@@ -254,7 +254,7 @@ A PR is **automatically merged** if it meets **ALL** of these criteria:
 2. ✅ **Checks Passing**: All required status checks pass
 3. ✅ **Up to Date**: Branch is current (or in merge queue)
 4. ✅ **No Conflicts**: No merge conflicts
-5. ✅ **Labeled**: Has one of: `auto-merge`, `claude-auto`, `docs-only`, `merge-ready`
+5. ✅ **Labeled**: Has one of: `auto-merge`, `copilot-auto`, `lucidia-auto`, `owner-auto`, `docs-only`, `merge-ready`
 6. ✅ **Not Blocked**: No `do-not-merge`, `wip`, `needs-review` labels
 #### Tier-Specific Conditions
@@ -269,7 +269,7 @@ A PR is **automatically merged** if it meets **ALL** of these criteria:
 - ✅ All tests pass (including new tests)
 - ⏱️ Merge immediately
-**AI-Generated PRs** (`claude-auto`, `atlas-auto`):
+**Self-Hosted AI / Copilot PRs** (`copilot-auto`, `lucidia-auto`, `owner-auto`):
 - ✅ Auto-approve + auto-merge enabled
 - ✅ **All** CI checks pass (backend, frontend, security)
 - ✅ No `breaking-change` label
@@ -319,7 +319,7 @@ env:
 ```yaml
 # .github/workflows/auto-merge.yml
 - name: Wait soak time
-  if: contains(github.event.pull_request.labels.*.name, 'claude-auto')
+  if: contains(github.event.pull_request.labels.*.name, 'copilot-auto')
  run: sleep 300  # 5 minutes
 ```
@@ -349,7 +349,7 @@ env:
 ### Feature Branch Protection
-**Branches**: `feature/*`, `claude/*`, `atlas/*`
+**Branches**: `feature/*`, `copilot/*`, `lucidia/*`
 **Rules**:
 - ⚠️ No protection (development branches)
--- a/IMPLEMENTATION.md
+++ b/IMPLEMENTATION.md
@@ -316,10 +316,9 @@ FRONTEND_URL=https://os.blackroad.systems
 WALLET_MASTER_KEY=<generated securely>
 ```
-**AI/ML** (Phase 1):
+**AI/ML** (Self-hosted via Ollama on Pi cluster):
 ```bash
-OPENAI_API_KEY=sk-...
+OLLAMA_BASE_URL=http://localhost:11434
 ANTHROPIC_API_KEY=sk-ant-...  # For Lucidia in Phase 2
 ```
 **Cloud** (optional, Phase 2):
--- a/RAILWAY_DEPLOY_FIX.md
+++ b/RAILWAY_DEPLOY_FIX.md
@@ -144,7 +144,7 @@ Based on `backend/.env.example`, the following variables **MUST** be set in Rail
 | Variable | Purpose | Required When |
 |----------|---------|---------------|
-| `OPENAI_API_KEY` | AI chat features | Using AI router |
+| `OLLAMA_BASE_URL` | AI chat features (self-hosted) | Using AI router |
 | `GITHUB_TOKEN` | GitHub integration | Using GitHub router |
 | `STRIPE_SECRET_KEY` | Payment processing | Using Stripe router |
 | `SMTP_HOST`, `SMTP_USER`, `SMTP_PASSWORD` | Email sending | Using email router |
--- a/ROUTER_PLAN.md
+++ b/ROUTER_PLAN.md
@@ -466,7 +466,7 @@ app.include_router(system.router)
     - SECRET_KEY
     - DATABASE_URL
     - REDIS_URL
-     - OPENAI_API_KEY (optional)
+     - OLLAMA_BASE_URL (self-hosted AI)
     - SMTP_* (optional)
     - AWS_* (optional)
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -47,9 +47,8 @@ SMTP_USER=road@example.com
 SMTP_PASSWORD=changeme-smtp-password
 EMAIL_FROM=blackroad@example.com
-# AI integrations
+# Self-hosted AI (Ollama on Pi cluster)
-OPENAI_API_KEY=sk-your-openai-key
+OLLAMA_BASE_URL=http://localhost:11434
 ANTHROPIC_API_KEY=anthropic-key-placeholder
 # Blockchain tuning
 BLOCKCHAIN_DIFFICULTY=4
--- a/backend/README.md
+++ b/backend/README.md
@@ -117,8 +117,8 @@ SECRET_KEY=your-very-secret-key-change-this-in-production
 # CORS (Add your frontend URLs)
 ALLOWED_ORIGINS=http://localhost:3000,https://yourdomain.com
-# OpenAI (for AI Chat)
+# Self-hosted AI (Ollama on Pi cluster)
-OPENAI_API_KEY=your-openai-api-key
+OLLAMA_BASE_URL=http://localhost:11434
 ```
 ## API Documentation
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -66,8 +66,8 @@ class Settings(BaseSettings):
    SMTP_PASSWORD: str = ""
    EMAIL_FROM: str = "noreply@blackroad.com"
-    # OpenAI
+    # Self-hosted AI (Ollama on Pi cluster)
-    OPENAI_API_KEY: str = ""
+    OLLAMA_BASE_URL: str = "http://localhost:11434"
    # Blockchain
    BLOCKCHAIN_DIFFICULTY: int = 4
--- a/backend/app/routers/ai_chat.py
+++ b/backend/app/routers/ai_chat.py
@@ -177,8 +177,8 @@ async def send_message(
    )
    db.add(user_message)
-    # Generate AI response (simplified - in production, call OpenAI API)
+    # Generate AI response (simplified - in production, routes to self-hosted Ollama on Pi cluster)
-    ai_response_content = f"This is a simulated AI response to: '{message_data.content}'. In production, this would call the OpenAI API configured in settings.OPENAI_API_KEY."
+    ai_response_content = f"This is a simulated AI response to: '{message_data.content}'. In production, this routes to the self-hosted Ollama instance at OLLAMA_BASE_URL."
    ai_message = Message(
        conversation_id=conversation_id,
--- a/backend/app/routers/api_health.py
+++ b/backend/app/routers/api_health.py
@@ -82,7 +82,7 @@ async def check_all_apis():
    - Slack API
    - Discord API
    - Sentry API
-    - OpenAI API
+    - Ollama (self-hosted AI)
    - Hugging Face API
    - DigitalOcean API
    - AWS S3
@@ -111,7 +111,7 @@ async def check_all_apis():
    # Add checks for existing APIs
    api_checks.update({
        "github": lambda: check_github_status(),
-        "openai": lambda: check_openai_status(),
+        "ollama": lambda: check_ollama_status(),
        "huggingface": lambda: check_huggingface_status(),
        "digitalocean": lambda: check_digitalocean_status(),
        "aws": lambda: check_aws_status(),
@@ -206,7 +206,7 @@ async def check_specific_api(api_name: str):
        "discord": lambda: __import__("app.routers.discord", fromlist=["get_discord_status"]).get_discord_status(),
        "sentry": lambda: __import__("app.routers.sentry", fromlist=["get_sentry_status"]).get_sentry_status(),
        "github": check_github_status,
-        "openai": check_openai_status,
+        "ollama": check_ollama_status,
        "huggingface": check_huggingface_status,
        "digitalocean": check_digitalocean_status,
        "aws": check_aws_status,
@@ -258,35 +258,28 @@ async def check_github_status():
        }
-async def check_openai_status():
+async def check_ollama_status():
-    """Check OpenAI API status"""
+    """Check self-hosted Ollama status"""
-    openai_key = os.getenv("OPENAI_API_KEY")
+    ollama_url = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
    if not openai_key:
        return {
            "connected": False,
            "message": "OpenAI API key not configured",
            "key_configured": False
        }
    import httpx
    try:
        async with httpx.AsyncClient() as client:
            response = await client.get(
-                "https://api.openai.com/v1/models",
+                f"{ollama_url}/api/tags",
                headers={"Authorization": f"Bearer {openai_key}"},
                timeout=10.0
            )
            response.raise_for_status()
            return {
                "connected": True,
-                "message": "OpenAI API connected successfully",
+                "message": "Ollama (self-hosted) connected successfully",
-                "key_configured": True
+                "url_configured": True
            }
    except Exception as e:
        return {
            "connected": False,
-            "message": f"OpenAI API connection failed: {str(e)}",
+            "message": f"Ollama connection failed: {str(e)}",
-            "key_configured": True
+            "url_configured": bool(ollama_url)
        }
--- a/backend/app/routers/dashboard.py
+++ b/backend/app/routers/dashboard.py
@@ -49,7 +49,7 @@ async def get_dashboard_overview(
        "digitalocean": bool(digital_ocean_token),
        "github": bool(os.getenv("GITHUB_TOKEN")),
        "huggingface": bool(os.getenv("HUGGINGFACE_TOKEN")),
-        "openai": bool(os.getenv("OPENAI_API_KEY")),
+        "ollama": bool(os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")),
        "aws_s3": bool(os.getenv("AWS_ACCESS_KEY_ID")),
        "smtp": bool(os.getenv("SMTP_HOST")),
    }
@@ -98,9 +98,9 @@ async def get_dashboard_overview(
        {
            "name": "AI Assistant",
            "icon": "🤖",
-            "status": "online" if services_config["openai"] else "offline",
+            "status": "online" if services_config["ollama"] else "offline",
-            "enabled": services_config["openai"],
+            "enabled": services_config["ollama"],
-            "connected": services_config["openai"],
+            "connected": services_config["ollama"],
            "stats": {"conversations": stats["ai"]["conversations"], "messages": stats["ai"]["messages"]},
            "endpoint": "/api/ai-chat"
        },
@@ -250,10 +250,10 @@ async def list_all_services(
        {
            "id": "ai_chat",
            "name": "AI Assistant",
-            "description": "Conversational AI powered by OpenAI",
+            "description": "Conversational AI powered by self-hosted Ollama",
            "category": "productivity",
            "icon": "🤖",
-            "configured": bool(os.getenv("OPENAI_API_KEY"))
+            "configured": bool(os.getenv("OLLAMA_BASE_URL", "http://localhost:11434"))
        },
        {
            "id": "files",
--- a/backend/app/routers/system.py
+++ b/backend/app/routers/system.py
@@ -56,7 +56,7 @@ async def get_public_config():
        "external_services": {
            "github_integration": bool(os.environ.get("GITHUB_TOKEN")),
            "stripe_enabled": bool(os.environ.get("STRIPE_SECRET_KEY")),
-            "openai_enabled": bool(os.environ.get("OPENAI_API_KEY")),
+            "ollama_enabled": bool(os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434")),
        },
    }
--- a/implementation-plans/IMPLEMENTATION_lucidia.md
+++ b/implementation-plans/IMPLEMENTATION_lucidia.md
@@ -10,12 +10,12 @@
 ## PURPOSE
 **Lucidia** is the **AI intelligence layer** that:
- Routes requests to multiple AI models (Claude, GPT-4, Llama, Gemini)
+- Routes requests to self-hosted models via Ollama on Pi cluster (Llama, Mistral, Phi, Gemma)
 - Orchestrates multi-agent conversations
 - Manages long-term memory and context
 - Provides personas (Cece, Amundson, etc.)
 - Tool calling and function execution
- Cost optimization (use cheaper models when appropriate)
+- Zero external provider dependency (fully self-hosted)
 **Role in Architecture**: **Layer 4** (Orchestration & Intelligence)
@@ -29,46 +29,39 @@
 ```python
 # lucidia/router.py
-from anthropic import Anthropic
+import httpx
-from openai import OpenAI
+import os
 import requests
 class ModelRouter:
    def __init__(self):
-        self.claude = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
+        self.ollama_url = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
        self.openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
    async def route(self, prompt: str, preferences: dict):
-        """Route to best model based on task, cost, latency."""
+        """Route to best self-hosted model based on task."""
        # Task classification
        task_type = self.classify_task(prompt)
-        # Routing logic
+        # Routing logic — all models self-hosted via Ollama on Pi cluster
        if task_type == "code":
-            return await self.call_claude(prompt, model="claude-sonnet-4")
+            return await self.call_ollama(prompt, model="codellama:13b")
        elif task_type == "creative":
-            return await self.call_openai(prompt, model="gpt-4")
+            return await self.call_ollama(prompt, model="llama3:8b")
        elif task_type == "fast":
-            return await self.call_openai(prompt, model="gpt-3.5-turbo")
+            return await self.call_ollama(prompt, model="phi3:mini")
        else:
-            # Default to Claude
+            # Default to Llama 3
-            return await self.call_claude(prompt)
+            return await self.call_ollama(prompt, model="llama3:8b")
-    async def call_claude(self, prompt: str, model: str = "claude-3-5-sonnet-20241022"):
+    async def call_ollama(self, prompt: str, model: str = "llama3:8b"):
-        response = self.claude.messages.create(
+        async with httpx.AsyncClient() as client:
-            model=model,
+            response = await client.post(
-            max_tokens=4096,
+                f"{self.ollama_url}/api/generate",
-            messages=[{"role": "user", "content": prompt}]
+                json={"model": model, "prompt": prompt, "stream": False},
                timeout=120.0
            )
-        return response.content[0].text
+            response.raise_for_status()
-
+            return response.json()["response"]
    async def call_openai(self, prompt: str, model: str = "gpt-4"):
        response = self.openai.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}]
        )
        return response.choices[0].message.content
 ```
 ### 2. Multi-Agent Orchestration
@@ -78,9 +71,9 @@ class ModelRouter:
 class AgentOrchestrator:
    def __init__(self):
        self.agents = {
-            "cece": Agent(name="Cece", role="OS Architect", model="claude-sonnet-4"),
+            "cece": Agent(name="Cece", role="OS Architect", model="llama3:8b"),
-            "amundson": Agent(name="Amundson", role="Quantum Physicist", model="claude-opus-4"),
+            "amundson": Agent(name="Amundson", role="Quantum Physicist", model="llama3:8b"),
-            "designer": Agent(name="Designer", role="UI/UX", model="gpt-4"),
+            "designer": Agent(name="Designer", role="UI/UX", model="mistral:7b"),
        }
    async def orchestrate(self, task: str):
@@ -233,14 +226,15 @@ lucidia/
 **Core**:
 - Python 3.11+
 - FastAPI (API service)
- Anthropic SDK (Claude)
+- Ollama (self-hosted LLM runtime on Pi cluster)
- OpenAI SDK (GPT-4)
+- httpx (async HTTP client for Ollama API)
 - ChromaDB (vector memory)
-**Optional**:
+**Self-Hosted Models** (via Ollama):
- LangChain (agent framework)
+- Llama 3 8B (general purpose)
- LlamaIndex (context management)
+- CodeLlama 13B (code generation)
- Replicate (open-source models)
+- Mistral 7B (creative/reasoning)
 - Phi-3 Mini (fast/lightweight tasks)
 ---
@@ -252,10 +246,11 @@ lucidia/
 **Month 12**: Production deployment, OS integration
 **Success Criteria**:
- ✅ 3+ models supported (Claude, GPT-4, Llama)
+- ✅ 3+ self-hosted models running on Pi cluster (Llama 3, CodeLlama, Mistral, Phi-3)
- ✅ <2s average response time
+- ✅ <5s average response time (self-hosted hardware)
 - ✅ 95% user satisfaction
 - ✅ 10+ personas available
 - ✅ Zero external API dependencies
 ---
@@ -287,19 +282,20 @@ lucidia/
 ## COST OPTIMIZATION
-**Model Cost Comparison**:
+**Self-Hosted Model Cost**:
- Claude Sonnet 4: $3 / 1M input tokens
+- All models run on Pi cluster — zero per-token API costs
- GPT-4: $10 / 1M input tokens
+- One-time hardware cost: Raspberry Pi 5 cluster
- GPT-3.5 Turbo: $0.50 / 1M input tokens
+- Electricity only ongoing cost
- Llama 3 (open-source): Free (hosting cost only)
+- No vendor lock-in, no rate limits, no API keys
 **Strategy**:
- Use GPT-3.5 for simple queries (classification, summarization)
+- Use Phi-3 Mini for simple queries (classification, summarization)
- Use Claude Sonnet for complex reasoning (code, analysis)
+- Use CodeLlama 13B for code generation and analysis
- Use GPT-4 for creative tasks (copywriting, brainstorming)
+- Use Llama 3 8B for general reasoning and conversation
 - Use Mistral 7B for creative tasks
 - Cache common queries in Redis
-**Projected Savings**: 60% vs. using GPT-4 for everything
+**Projected Savings**: 100% vs. using external AI providers
 ---
--- a/infra/blackroad-manifest.yml
+++ b/infra/blackroad-manifest.yml
@@ -154,9 +154,9 @@ projects:
              example: "https://blackroad.systems"
          optional:
-            - name: "OPENAI_API_KEY"
+            - name: "OLLAMA_BASE_URL"
-              description: "OpenAI API access"
+              description: "Self-hosted Ollama instance URL"
-              secret: true
+              secret: false
            - name: "GITHUB_TOKEN"
              description: "GitHub API access for agents"
              secret: true
@@ -454,9 +454,7 @@ planned_projects:
        env:
          required:
-            - "OPENAI_API_KEY"
+            - "OLLAMA_BASE_URL"
            - "ANTHROPIC_API_KEY"
            - "GROQ_API_KEY"
            - "DATABASE_URL"
            - "REDIS_URL"
--- a/infra/env/ENVIRONMENT_MAP.md
+++ b/infra/env/ENVIRONMENT_MAP.md
@@ -18,7 +18,7 @@ Use this file as the single source of truth for environment variables across Rai
 | `CF_ZONE_ID` | Cloudflare zone identifier. | - | `CF_ZONE_ID` secret for cache purge. | Config variable in Workers/Pages. | Export in terminal when running scripts. |
 | `CF_API_TOKEN` | Token for DNS/cache automation. | - | `CF_API_TOKEN` secret. | Secret binding in Workers automation. | Export in terminal; do not store. |
 | `RAILWAY_TOKEN` | Token for CLI/CI deployments. | N/A | `RAILWAY_TOKEN` secret. | - | Export locally when using Railway CLI. |
-| `OPENAI_API_KEY` | Agent/LLM access key. | Backend variable if used server-side. | `OPENAI_API_KEY` secret for agent jobs. | Worker secret if routing requests. | `.env` entry for local agent dev. |
+| `OLLAMA_BASE_URL` | Self-hosted Ollama URL for LLM. | Backend variable for AI routing. | Config variable for agent jobs. | Config variable if routing requests. | `.env` entry for local agent dev. |
 | `GITHUB_TOKEN` | GitHub API access for agents/prism. | - | Automatic Actions token or PAT secret. | Worker secret if used in edge functions. | Export locally when testing agent integrations. |
 ## Usage Notes
--- a/infra/railway/.env.railway.example
+++ b/infra/railway/.env.railway.example
@@ -82,9 +82,8 @@ JWT_SECRET=your-jwt-secret-here
 # External API Keys (Service Specific)
 # ------------------------------------------------------------------------------
-# AI Services
+# Self-hosted AI (Ollama on Pi cluster)
-# OPENAI_API_KEY=sk-...
+# OLLAMA_BASE_URL=http://localhost:11434
 # ANTHROPIC_API_KEY=sk-ant-...
 # Cloud Services
 # AWS_ACCESS_KEY_ID=
--- a/infra/templates/.env.example.template
+++ b/infra/templates/.env.example.template
@@ -62,8 +62,8 @@ WALLET_MASTER_KEY=  # Generate with: openssl rand -hex 32
 # EXTERNAL APIs (Optional)
 # =============================================================================
-# OpenAI
+# Self-hosted AI (Ollama on Pi cluster)
-OPENAI_API_KEY=
+OLLAMA_BASE_URL=http://localhost:11434
 # GitHub
 GITHUB_TOKEN=
--- a/infra/templates/railway.toml.template
+++ b/infra/templates/railway.toml.template
@@ -26,6 +26,6 @@ patterns = ["**/*.py", "requirements.txt", "Dockerfile"]
 # - ALLOWED_ORIGINS
 # Optional:
-# - OPENAI_API_KEY
+# - OLLAMA_BASE_URL
 # - GITHUB_TOKEN
 # - SENTRY_DSN
--- a/scripts/railway/validate_env_template.py
+++ b/scripts/railway/validate_env_template.py
@@ -47,7 +47,6 @@ EXTRA_REQUIRED_KEYS: Set[str] = {
    "MQTT_USERNAME",
    "MQTT_PASSWORD",
    "DEVICE_HEARTBEAT_TIMEOUT_SECONDS",
    "ANTHROPIC_API_KEY",
    "POSTGRES_URL",
    "JWT_SECRET",
    "SESSION_SECRET",
@@ -69,7 +68,6 @@ SENSITIVE_KEYS: Set[str] = {
    "AWS_ACCESS_KEY_ID",
    "AWS_SECRET_ACCESS_KEY",
    "SMTP_PASSWORD",
    "OPENAI_API_KEY",
    "RAILWAY_TOKEN",
    "RAILWAY_PROJECT_ID",
    "RAILWAY_ENVIRONMENT_ID",
@@ -95,7 +93,6 @@ SENSITIVE_KEYS: Set[str] = {
    "ROADCOIN_WALLET_ADDRESS",
    "MQTT_BROKER_URL",
    "MQTT_PASSWORD",
    "ANTHROPIC_API_KEY",
    "POSTGRES_URL",
    "JWT_SECRET",
    "SESSION_SECRET",