lucidia-main/human_machine/reinforcement.py

from __future__ import annotations

from dataclasses import dataclass, field
from typing import Dict, List, Tuple
import random

@dataclass
class ReinforcementAgent:
    """
    A simple reinforcement learning agent using tabular Q-learning.

    Attributes
    ----------
    q_values : Dict[Tuple[str, str], float]
        Q-value table mapping (state, action) pairs to their value estimates.
    """
    q_values: Dict[Tuple[str, str], float] = field(default_factory=dict)

    def update(self, state: str, action: str, reward: float, alpha: float = 0.1) -> None:
        """
        Update the Q-value for a state-action pair.

        Parameters
        ----------
        state : str
            Current state identifier.
        action : str
            Action taken in the state.
        reward : float
            Reward received for this state-action.
        alpha : float, default 0.1
            Learning rate.
        """
        key = (state, action)
        old = self.q_values.get(key, 0.0)
        self.q_values[key] = old + alpha * (reward - old)

    def choose_action(self, state: str, actions: List[str], epsilon: float = 0.2) -> str:
        """
        Choose an action using an epsilon-greedy policy.

        Parameters
        ----------
        state : str
            Current state identifier.
        actions : List[str]
            Available actions.
        epsilon : float
            Exploration rate.

        Returns
        -------
        str
            Selected action.
        """
        if not actions:
            raise ValueError("actions list cannot be empty")
        if random.random() < epsilon:
            return random.choice(actions)
        # choose action with highest Q-value
        return max(actions, key=lambda a: self.q_values.get((state, a), 0.0))

if __name__ == "__main__":
    agent = ReinforcementAgent()
    state = "home"
    actions = ["explore", "rest"]
    chosen = agent.choose_action(state, actions)
    agent.update(state, chosen, reward=1.0)
    print(agent.q_values)