Files
lucidia-core/analyst.py
Alexa Louise 6afdb4b148 Initial extraction from blackroad-prism-console
Lucidia Core - AI reasoning engines for specialized domains:
- Physicist (867 lines) - energy modeling, force calculations
- Mathematician (760 lines) - symbolic computation, proofs
- Geologist (654 lines) - terrain modeling, stratigraphy
- Engineer (599 lines) - structural analysis, optimization
- Painter (583 lines) - visual generation, graphics
- Chemist (569 lines) - molecular analysis, reactions
- Analyst (505 lines) - pattern recognition, insights
- Plus: architect, researcher, mediator, speaker, poet, navigator

Features:
- FastAPI wrapper with REST endpoints for each agent
- CLI with `lucidia list`, `lucidia run`, `lucidia api`
- Codex YAML configurations for agent personalities
- Quantum engine extensions

12,512 lines of Python across 91 files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-30 08:00:53 -06:00

506 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""Lucidia Codex-12 Analyst agent implementation.
The Analyst watches telemetry, metrics, dialogue summaries, and memory notes to
surface gentle, well-explained insight. It favours transparency: every
statistic that leaves the loop carries enough context for the other Codex
agents (and humans) to understand how the number was derived. The behavioural
loop follows the charter for Codex-12 "Analyst":
gather → analyze → model → interpret → teach → rest
The implementation keeps lightweight running statistics for each numeric field
encountered in the configured input streams. It also tracks qualitative
labels, recent notes, and risk signals. Each cycle emits an "insight card"
with teacherly explanations so downstream consumers can reuse the results
without guessing at the methodology.
"""
from __future__ import annotations
import argparse
import json
import math
import time
from collections import Counter
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, Iterable, List, Mapping, Optional
import yaml
DEFAULT_SOURCE_ROOT = Path("/srv/lucidia/state")
DEFAULT_STATE_ROOT = Path("/srv/lucidia/analyst")
DEFAULT_EMIT_DIR = Path("/codex/prompts/next")
STATE_FILE_NAME = "state.json"
INSIGHT_LOG_NAME = "insights.jsonl"
HISTORY_LIMIT = 24
LABEL_FIELDS = {"label", "status", "state", "mood", "risk", "topic", "channel", "severity", "phase"}
NOTE_FIELDS = {"message", "summary", "note", "insight", "observation"}
@dataclass
class AnalystState:
"""Persisted runtime state between behavioural loop iterations."""
cursors: Dict[str, int] = field(default_factory=dict)
metrics: Dict[str, Dict[str, Dict[str, Any]]] = field(default_factory=dict)
labels: Dict[str, Dict[str, int]] = field(default_factory=dict)
notes: Dict[str, List[str]] = field(default_factory=dict)
@classmethod
def load(cls, path: Path) -> "AnalystState":
if not path.exists():
return cls()
try:
raw = json.loads(path.read_text(encoding="utf-8"))
except json.JSONDecodeError:
return cls()
cursors = {str(k): int(v) for k, v in raw.get("cursors", {}).items()}
metrics: Dict[str, Dict[str, Dict[str, Any]]] = {}
for stream, metric_map in raw.get("metrics", {}).items():
metrics[stream] = {}
if isinstance(metric_map, Mapping):
for name, snapshot in metric_map.items():
if isinstance(snapshot, Mapping):
metrics[stream][str(name)] = dict(snapshot)
labels: Dict[str, Dict[str, int]] = {}
for stream, counter in raw.get("labels", {}).items():
if isinstance(counter, Mapping):
labels[stream] = {str(k): int(v) for k, v in counter.items()}
notes: Dict[str, List[str]] = {}
for stream, entries in raw.get("notes", {}).items():
if isinstance(entries, Iterable) and not isinstance(entries, (str, bytes)):
notes[stream] = [str(item) for item in entries]
return cls(cursors=cursors, metrics=metrics, labels=labels, notes=notes)
def save(self, path: Path) -> None:
payload = {
"cursors": self.cursors,
"metrics": self.metrics,
"labels": self.labels,
"notes": self.notes,
}
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(payload, indent=2, sort_keys=True, ensure_ascii=False) + "\n", encoding="utf-8")
def utc_now() -> str:
return datetime.now(timezone.utc).isoformat()
def load_seed(path: Path) -> Dict[str, Any]:
if not path.exists():
raise FileNotFoundError(f"Seed file not found: {path}")
with path.open("r", encoding="utf-8") as handle:
data = yaml.safe_load(handle)
if not isinstance(data, dict):
raise ValueError("Seed file must contain a top-level mapping")
return data
def _tail_file(path: Path, offset: int) -> tuple[list[str], int]:
if not path.exists():
return [], offset
with path.open("r", encoding="utf-8") as handle:
handle.seek(offset)
lines = handle.readlines()
new_offset = handle.tell()
return [line.rstrip("\n") for line in lines if line.strip()], new_offset
def _ensure_metric_snapshot(snapshot: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
if snapshot is None:
snapshot = {}
snapshot.setdefault("count", 0)
snapshot.setdefault("mean", 0.0)
snapshot.setdefault("m2", 0.0)
snapshot.setdefault("min", None)
snapshot.setdefault("max", None)
history = snapshot.get("history") or []
if not isinstance(history, list):
history = []
snapshot["history"] = history
return snapshot
def _update_snapshot(snapshot: Dict[str, Any], value: float) -> None:
count = int(snapshot.get("count", 0)) + 1
mean = float(snapshot.get("mean", 0.0))
m2 = float(snapshot.get("m2", 0.0))
delta = value - mean
mean += delta / count
delta2 = value - mean
m2 += delta * delta2
minimum = snapshot.get("min")
maximum = snapshot.get("max")
minimum = value if minimum is None else min(float(minimum), value)
maximum = value if maximum is None else max(float(maximum), value)
history = list(snapshot.get("history", []))
history.append(value)
if len(history) > HISTORY_LIMIT:
history = history[-HISTORY_LIMIT:]
snapshot.update({
"count": count,
"mean": mean,
"m2": m2,
"min": minimum,
"max": maximum,
"history": history,
})
def _extract_numeric_metrics(record: Any, prefix: str = "") -> Iterable[tuple[str, float]]:
if isinstance(record, Mapping):
for key, value in record.items():
name = f"{prefix}{key}" if not prefix else f"{prefix}.{key}"
if isinstance(value, (int, float)):
yield name, float(value)
elif isinstance(value, Mapping):
yield from _extract_numeric_metrics(value, name)
return
def _collect_labels(record: Any) -> Iterable[str]:
if isinstance(record, Mapping):
for key, value in record.items():
if isinstance(value, str) and key.lower() in LABEL_FIELDS:
yield f"{key.lower()}:{value.strip()}"
elif isinstance(value, Mapping):
yield from _collect_labels(value)
elif isinstance(value, list):
for item in value:
yield from _collect_labels({key: item})
return
def _collect_notes(record: Any) -> Iterable[str]:
if isinstance(record, Mapping):
for key, value in record.items():
if isinstance(value, str) and key.lower() in NOTE_FIELDS:
text = value.strip()
if text:
yield text
elif isinstance(value, Mapping):
yield from _collect_notes(value)
elif isinstance(value, list):
for item in value:
yield from _collect_notes({key: item})
elif isinstance(record, str):
text = record.strip()
if text:
yield text
return
def _stddev(snapshot: Mapping[str, Any]) -> float:
count = int(snapshot.get("count", 0))
if count < 2:
return 0.0
m2 = float(snapshot.get("m2", 0.0))
variance = max(m2 / (count - 1), 0.0)
return math.sqrt(variance)
def _trend_label(history: List[float]) -> str:
if len(history) < 2:
return "steady"
recent = history[-5:]
if len(recent) < 2:
recent = history
increasing = all(a < b for a, b in zip(recent, recent[1:]))
decreasing = all(a > b for a, b in zip(recent, recent[1:]))
delta = recent[-1] - recent[0]
if increasing:
return "ascending"
if decreasing:
return "descending"
if abs(delta) < 1e-9:
return "steady"
return "rising" if delta > 0 else "falling"
def _format_anomaly(stream: str, metric: str, snapshot: Mapping[str, Any]) -> Optional[str]:
latest_history = snapshot.get("history") or []
if not latest_history:
return None
latest = latest_history[-1]
count = int(snapshot.get("count", 0))
if count < 5:
return None
sigma = _stddev(snapshot)
if sigma <= 0:
return None
mean = float(snapshot.get("mean", 0.0))
deviation = abs(latest - mean)
if deviation < 2.5 * sigma:
return None
z_score = deviation / sigma
return (
f"{stream}:{metric} latest value {latest:.3f} sits {z_score:.2f}σ away from the running mean "
f"({mean:.3f})."
)
def _monotonic_run(history: List[float]) -> Optional[str]:
if len(history) < 6:
return None
recent = history[-6:]
if all(a < b for a, b in zip(recent, recent[1:])):
return "ascending"
if all(a > b for a, b in zip(recent, recent[1:])):
return "descending"
return None
class Analyst:
"""Implements the Codex-12 Analyst behavioural loop."""
def __init__(
self,
*,
seed_path: Path,
stream_paths: Mapping[str, Path],
state_root: Path = DEFAULT_STATE_ROOT,
emit_dir: Optional[Path] = None,
poll_interval: float = 5.0,
once: bool = False,
) -> None:
self.seed = load_seed(seed_path)
charter = self.seed.get("system_charter", {})
self.identity = charter.get("agent_name", "Codex-12 Analyst")
self.directives = self.seed.get("directives", [])
self.seed_language = str(self.seed.get("seed_language", "")).strip()
self.behavioural_loop = self.seed.get("behavioral_loop", [])
self.stream_paths = {name: Path(path) for name, path in stream_paths.items()}
self.state_root = state_root
self.state_path = state_root / STATE_FILE_NAME
self.insight_log_path = state_root / INSIGHT_LOG_NAME
self.state_root.mkdir(parents=True, exist_ok=True)
self.state = AnalystState.load(self.state_path)
self.emit_dir = emit_dir
if self.emit_dir is not None:
self.emit_dir.mkdir(parents=True, exist_ok=True)
self.poll_interval = poll_interval
self.once = once
def run(self) -> None:
while True:
insight = self._cycle()
if insight is not None:
self._write_insight(insight)
if self.once:
break
time.sleep(self.poll_interval)
def _cycle(self) -> Optional[Dict[str, Any]]:
cycle_summary: Dict[str, Any] = {}
anomalies: List[str] = []
qualitative_highlights: Dict[str, Any] = {}
total_new_records = 0
for stream, path in self.stream_paths.items():
records, processed_count = self._process_stream(stream, path)
total_new_records += processed_count
if processed_count == 0:
continue
summary, stream_anomalies, highlights = self._summarise_stream(stream, records)
cycle_summary[stream] = summary
anomalies.extend(stream_anomalies)
qualitative_highlights[stream] = highlights
if total_new_records == 0:
return None
insight = {
"agent": self.identity,
"generated_at": utc_now(),
"loop": self.behavioural_loop,
"directives": self.directives,
"summary": cycle_summary,
"qualitative": qualitative_highlights,
"anomalies": anomalies,
"explanation": (
"Metrics are tracked with running mean (μ), standard deviation (σ), and range. "
"An anomaly is flagged when the latest reading drifts more than 2.5σ from μ. "
"Trend labels use the most recent window of observations to stay honest about direction."
),
}
if self.seed_language:
insight["seed_language"] = self.seed_language
self.state.save(self.state_path)
return insight
def _process_stream(self, name: str, path: Path) -> tuple[List[Any], int]:
cursor = self.state.cursors.get(name, 0)
lines, new_cursor = _tail_file(path, cursor)
self.state.cursors[name] = new_cursor
processed_records: List[Any] = []
for line in lines:
record: Any
try:
record = json.loads(line)
except json.JSONDecodeError:
record = {"message": line}
processed_records.append(record)
self._ingest_record(name, record)
return processed_records, len(processed_records)
def _ingest_record(self, stream: str, record: Any) -> None:
metrics_for_stream = self.state.metrics.setdefault(stream, {})
for metric_name, value in _extract_numeric_metrics(record):
snapshot = _ensure_metric_snapshot(metrics_for_stream.get(metric_name))
_update_snapshot(snapshot, value)
metrics_for_stream[metric_name] = snapshot
if stream not in self.state.labels:
self.state.labels[stream] = {}
label_counter = Counter(self.state.labels[stream])
for label in _collect_labels(record):
label_counter[label] += 1
self.state.labels[stream] = dict(label_counter)
note_bank = self.state.notes.setdefault(stream, [])
for note in _collect_notes(record):
note_bank.append(note)
if len(note_bank) > HISTORY_LIMIT:
self.state.notes[stream] = note_bank[-HISTORY_LIMIT:]
def _summarise_stream(self, stream: str, records: List[Any]) -> tuple[Dict[str, Any], List[str], Dict[str, Any]]:
metrics = self.state.metrics.get(stream, {})
metric_summaries: List[Dict[str, Any]] = []
anomalies: List[str] = []
qualitative: Dict[str, Any] = {}
for metric_name, snapshot in sorted(metrics.items()):
history = list(snapshot.get("history", []))
latest = history[-1] if history else None
summary = {
"metric": metric_name,
"count": int(snapshot.get("count", 0)),
"mean": float(snapshot.get("mean", 0.0)),
"stddev": _stddev(snapshot),
"min": snapshot.get("min"),
"max": snapshot.get("max"),
"latest": latest,
"trend": _trend_label(history),
}
metric_summaries.append(summary)
anomaly = _format_anomaly(stream, metric_name, snapshot)
if anomaly:
anomalies.append(anomaly)
monotonic = _monotonic_run(history)
if monotonic:
anomalies.append(
f"{stream}:{metric_name} has a sustained {monotonic} run across the last six readings."
)
labels = Counter(self.state.labels.get(stream, {}))
qualitative["top_labels"] = labels.most_common(5)
notes = self.state.notes.get(stream, [])
qualitative["recent_notes"] = notes[-5:]
qualitative["records_processed"] = len(records)
return {
"records_processed": len(records),
"metrics": metric_summaries,
}, anomalies, qualitative
def _write_insight(self, insight: Dict[str, Any]) -> None:
serialized = json.dumps(insight, ensure_ascii=False, indent=2)
self.insight_log_path.parent.mkdir(parents=True, exist_ok=True)
with self.insight_log_path.open("a", encoding="utf-8") as handle:
handle.write(serialized)
handle.write("\n")
if self.emit_dir is not None:
timestamp = insight["generated_at"].replace(":", "-")
file_path = self.emit_dir / f"codex12_insight_{timestamp}.json"
file_path.write_text(serialized + "\n", encoding="utf-8")
print(f"[{self.identity}] emitted insight with {len(insight.get('summary', {}))} stream summaries.")
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Run the Codex-12 Analyst behavioural loop.")
parser.add_argument("--seed", type=Path, required=True, help="Path to the Codex-12 seed file (YAML).")
parser.add_argument(
"--state-root",
type=Path,
default=DEFAULT_STATE_ROOT,
help="Directory for Analyst state and insight logs (default: /srv/lucidia/analyst).",
)
parser.add_argument(
"--emit",
type=Path,
default=None,
help="Directory for emitting Codex insight artifacts (default: /codex/prompts/next).",
)
parser.add_argument(
"--poll-interval",
type=float,
default=5.0,
help="Seconds to wait between cycles when running continuously.",
)
parser.add_argument("--once", action="store_true", help="Process inputs a single time then exit.")
parser.add_argument(
"--telemetry-path",
type=Path,
default=None,
help="Override path for telemetry stream (default: /srv/lucidia/state/telemetry.jsonl).",
)
parser.add_argument(
"--metrics-path",
type=Path,
default=None,
help="Override path for metrics stream (default: /srv/lucidia/state/metrics.jsonl).",
)
parser.add_argument(
"--dialogue-path",
type=Path,
default=None,
help="Override path for dialogue stream (default: /srv/lucidia/state/dialogue.jsonl).",
)
parser.add_argument(
"--memory-path",
type=Path,
default=None,
help="Override path for memory summaries stream (default: /srv/lucidia/state/memory_summaries.jsonl).",
)
return parser.parse_args()
def build_stream_paths(args: argparse.Namespace) -> Dict[str, Path]:
defaults = {
"telemetry": DEFAULT_SOURCE_ROOT / "telemetry.jsonl",
"metrics": DEFAULT_SOURCE_ROOT / "metrics.jsonl",
"dialogue": DEFAULT_SOURCE_ROOT / "dialogue.jsonl",
"memory": DEFAULT_SOURCE_ROOT / "memory_summaries.jsonl",
}
if args.telemetry_path is not None:
defaults["telemetry"] = args.telemetry_path
if args.metrics_path is not None:
defaults["metrics"] = args.metrics_path
if args.dialogue_path is not None:
defaults["dialogue"] = args.dialogue_path
if args.memory_path is not None:
defaults["memory"] = args.memory_path
return defaults
def main() -> None:
args = parse_args()
emit_dir = args.emit if args.emit is not None else DEFAULT_EMIT_DIR
stream_paths = build_stream_paths(args)
analyst = Analyst(
seed_path=args.seed,
stream_paths=stream_paths,
state_root=args.state_root,
emit_dir=emit_dir,
poll_interval=args.poll_interval,
once=args.once,
)
analyst.run()
if __name__ == "__main__":
main()