mirror of
https://github.com/blackboxprogramming/lucidia.git
synced 2026-03-17 01:57:10 -05:00
Synced from BlackRoad-OS-Inc/blackroad-operator/orgs/personal/lucidia BlackRoad OS — Pave Tomorrow. RoadChain-SHA2048: fe729062952871e7 RoadChain-Identity: alexa@sovereign RoadChain-Full: fe729062952871e77147cf6d938b799096e87d9024d7005a14c9e209e12e8ad0c825b624c7bc649fc7eeb4c284fdcab8231af77980065cc04d9f36fca479ffc2346ed3c1b73de6f240d8f9485f47c995ad5b81142f7179b84932c67914dff1c08db039349ba28fca36cb57688093bf0199268dd1c2f3448c9383000bc77cc9663066ff57b834370afc8838b18466ea9029908018b961555cccaabf2ce21649cf3cabc7f64bdcc4abdf2da259b210c342835a2cecf92bdd3b4e109b4d6e622f6934e13b2b123607bd61ce3d0f20454c9ab594f9284cffe18716619c52db57ce5f4ee2856cb96e1fa3748fe1fe65435bec297c5ab3ab58d570ec1064aea29931dd
70 lines
2.0 KiB
Python
70 lines
2.0 KiB
Python
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import Dict, List, Tuple
|
|
import random
|
|
|
|
@dataclass
|
|
class ReinforcementAgent:
|
|
"""
|
|
A simple reinforcement learning agent using tabular Q-learning.
|
|
|
|
Attributes
|
|
----------
|
|
q_values : Dict[Tuple[str, str], float]
|
|
Q-value table mapping (state, action) pairs to their value estimates.
|
|
"""
|
|
q_values: Dict[Tuple[str, str], float] = field(default_factory=dict)
|
|
|
|
def update(self, state: str, action: str, reward: float, alpha: float = 0.1) -> None:
|
|
"""
|
|
Update the Q-value for a state-action pair.
|
|
|
|
Parameters
|
|
----------
|
|
state : str
|
|
Current state identifier.
|
|
action : str
|
|
Action taken in the state.
|
|
reward : float
|
|
Reward received for this state-action.
|
|
alpha : float, default 0.1
|
|
Learning rate.
|
|
"""
|
|
key = (state, action)
|
|
old = self.q_values.get(key, 0.0)
|
|
self.q_values[key] = old + alpha * (reward - old)
|
|
|
|
def choose_action(self, state: str, actions: List[str], epsilon: float = 0.2) -> str:
|
|
"""
|
|
Choose an action using an epsilon-greedy policy.
|
|
|
|
Parameters
|
|
----------
|
|
state : str
|
|
Current state identifier.
|
|
actions : List[str]
|
|
Available actions.
|
|
epsilon : float
|
|
Exploration rate.
|
|
|
|
Returns
|
|
-------
|
|
str
|
|
Selected action.
|
|
"""
|
|
if not actions:
|
|
raise ValueError("actions list cannot be empty")
|
|
if random.random() < epsilon:
|
|
return random.choice(actions)
|
|
# choose action with highest Q-value
|
|
return max(actions, key=lambda a: self.q_values.get((state, a), 0.0))
|
|
|
|
if __name__ == "__main__":
|
|
agent = ReinforcementAgent()
|
|
state = "home"
|
|
actions = ["explore", "rest"]
|
|
chosen = agent.choose_action(state, actions)
|
|
agent.update(state, chosen, reward=1.0)
|
|
print(agent.q_values)
|