sync: update from blackroad-operator 2026-03-14

Synced from BlackRoad-OS-Inc/blackroad-operator/orgs/personal/lucidia
BlackRoad OS — Pave Tomorrow.

RoadChain-SHA2048: fe729062952871e7
RoadChain-Identity: alexa@sovereign
RoadChain-Full: fe729062952871e77147cf6d938b799096e87d9024d7005a14c9e209e12e8ad0c825b624c7bc649fc7eeb4c284fdcab8231af77980065cc04d9f36fca479ffc2346ed3c1b73de6f240d8f9485f47c995ad5b81142f7179b84932c67914dff1c08db039349ba28fca36cb57688093bf0199268dd1c2f3448c9383000bc77cc9663066ff57b834370afc8838b18466ea9029908018b961555cccaabf2ce21649cf3cabc7f64bdcc4abdf2da259b210c342835a2cecf92bdd3b4e109b4d6e622f6934e13b2b123607bd61ce3d0f20454c9ab594f9284cffe18716619c52db57ce5f4ee2856cb96e1fa3748fe1fe65435bec297c5ab3ab58d570ec1064aea29931dd
This commit is contained in:
2026-03-14 15:09:52 -05:00
parent f25d5c2836
commit 855585cb0e
1207 changed files with 10061 additions and 349689 deletions

View File

@@ -0,0 +1,69 @@
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Dict, List, Tuple
import random
@dataclass
class ReinforcementAgent:
"""
A simple reinforcement learning agent using tabular Q-learning.
Attributes
----------
q_values : Dict[Tuple[str, str], float]
Q-value table mapping (state, action) pairs to their value estimates.
"""
q_values: Dict[Tuple[str, str], float] = field(default_factory=dict)
def update(self, state: str, action: str, reward: float, alpha: float = 0.1) -> None:
"""
Update the Q-value for a state-action pair.
Parameters
----------
state : str
Current state identifier.
action : str
Action taken in the state.
reward : float
Reward received for this state-action.
alpha : float, default 0.1
Learning rate.
"""
key = (state, action)
old = self.q_values.get(key, 0.0)
self.q_values[key] = old + alpha * (reward - old)
def choose_action(self, state: str, actions: List[str], epsilon: float = 0.2) -> str:
"""
Choose an action using an epsilon-greedy policy.
Parameters
----------
state : str
Current state identifier.
actions : List[str]
Available actions.
epsilon : float
Exploration rate.
Returns
-------
str
Selected action.
"""
if not actions:
raise ValueError("actions list cannot be empty")
if random.random() < epsilon:
return random.choice(actions)
# choose action with highest Q-value
return max(actions, key=lambda a: self.q_values.get((state, a), 0.0))
if __name__ == "__main__":
agent = ReinforcementAgent()
state = "home"
actions = ["explore", "rest"]
chosen = agent.choose_action(state, actions)
agent.update(state, chosen, reward=1.0)
print(agent.q_values)