sync: update from blackroad-operator 2026-03-14

Synced from BlackRoad-OS-Inc/blackroad-operator/orgs/personal/lucidia BlackRoad OS — Pave Tomorrow. RoadChain-SHA2048: fe729062952871e7 RoadChain-Identity: alexa@sovereign RoadChain-Full: fe729062952871e77147cf6d938b799096e87d9024d7005a14c9e209e12e8ad0c825b624c7bc649fc7eeb4c284fdcab8231af77980065cc04d9f36fca479ffc2346ed3c1b73de6f240d8f9485f47c995ad5b81142f7179b84932c67914dff1c08db039349ba28fca36cb57688093bf0199268dd1c2f3448c9383000bc77cc9663066ff57b834370afc8838b18466ea9029908018b961555cccaabf2ce21649cf3cabc7f64bdcc4abdf2da259b210c342835a2cecf92bdd3b4e109b4d6e622f6934e13b2b123607bd61ce3d0f20454c9ab594f9284cffe18716619c52db57ce5f4ee2856cb96e1fa3748fe1fe65435bec297c5ab3ab58d570ec1064aea29931dd
2026-03-17 07:57:19 -05:00 · 2026-03-14 15:09:52 -05:00
parent f25d5c2836
commit 855585cb0e
1207 changed files with 10061 additions and 349689 deletions
--- a/human_machine/reinforcement.py
+++ b/human_machine/reinforcement.py
@@ -0,0 +1,69 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Dict, List, Tuple
+import random
+
+@dataclass
+class ReinforcementAgent:
+    """
+    A simple reinforcement learning agent using tabular Q-learning.
+
+    Attributes
+    ----------
+    q_values : Dict[Tuple[str, str], float]
+        Q-value table mapping (state, action) pairs to their value estimates.
+    """
+    q_values: Dict[Tuple[str, str], float] = field(default_factory=dict)
+
+    def update(self, state: str, action: str, reward: float, alpha: float = 0.1) -> None:
+        """
+        Update the Q-value for a state-action pair.
+
+        Parameters
+        ----------
+        state : str
+            Current state identifier.
+        action : str
+            Action taken in the state.
+        reward : float
+            Reward received for this state-action.
+        alpha : float, default 0.1
+            Learning rate.
+        """
+        key = (state, action)
+        old = self.q_values.get(key, 0.0)
+        self.q_values[key] = old + alpha * (reward - old)
+
+    def choose_action(self, state: str, actions: List[str], epsilon: float = 0.2) -> str:
+        """
+        Choose an action using an epsilon-greedy policy.
+
+        Parameters
+        ----------
+        state : str
+            Current state identifier.
+        actions : List[str]
+            Available actions.
+        epsilon : float
+            Exploration rate.
+
+        Returns
+        -------
+        str
+            Selected action.
+        """
+        if not actions:
+            raise ValueError("actions list cannot be empty")
+        if random.random() < epsilon:
+            return random.choice(actions)
+        # choose action with highest Q-value
+        return max(actions, key=lambda a: self.q_values.get((state, a), 0.0))
+
+if __name__ == "__main__":
+    agent = ReinforcementAgent()
+    state = "home"
+    actions = ["explore", "rest"]
+    chosen = agent.choose_action(state, actions)
+    agent.update(state, chosen, reward=1.0)
+    print(agent.q_values)