ai-econ/ai_economist/foundation/base/base_agent.py

# Copyright (c) 2020, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root
# or https://opensource.org/licenses/BSD-3-Clause

import random

import numpy as np

from ai_economist.foundation.base.registrar import Registry


class BaseAgent:
    """Base class for Agent classes.

    Instances of Agent classes are created for each agent in the environment. Agent
    instances are stateful, capturing location, inventory, endogenous variables,
    and any additional state fields created by environment components during
    construction (see BaseComponent.get_additional_state_fields in base_component.py).

    They also provide a simple API for getting/setting actions for each of their
    registered action subspaces (which depend on the components used to build
    the environment).

    Args:
        idx (int or str): Index that uniquely identifies the agent object amongst the
            other agent objects registered in its environment.
        multi_action_mode (bool): Whether to allow the agent to take one action for
            each of its registered action subspaces each timestep (if True),
            or to limit the agent to take only one action each timestep (if False).
    """

    name = ""

    def __init__(self, idx=None, multi_action_mode=None):
        assert self.name

        if idx is None:
            idx = 0

        if multi_action_mode is None:
            multi_action_mode = False

        if isinstance(idx, str):
            self._idx = idx
        else:
            self._idx = int(idx)

        self.multi_action_mode = bool(multi_action_mode)
        self.single_action_map = (
            {}
        )  # Used to convert single-action-mode actions to the general format

        self.action = dict()
        self.action_dim = dict()
        self._action_names = []
        self._multi_action_dict = {}
        self._unique_actions = 0
        self._total_actions = 0

        self.state = dict(loc=[0, 0], inventory={}, escrow={}, endogenous={})

        self._registered_inventory = False
        self._registered_endogenous = False
        self._registered_components = False
        self._noop_action_dict = dict()

        # Special flag to allow logic for multi-action-mode agents
        # that are not given any actions.
        self._passive_multi_action_agent = False

        # If this gets set to true, we can make masks faster
        self._one_component_single_action = False
        self._premask = None

    @property
    def idx(self):
        """Index used to identify this agent. Must be unique within the environment."""
        return self._idx

    def register_inventory(self, resources):
        """Used during environment construction to populate inventory/escrow fields."""
        if self._registered_inventory:
            return
        for entity_name in resources:
            self.inventory[entity_name] = 0
            self.escrow[entity_name] = 0
        self._registered_inventory = True

    def register_endogenous(self, endogenous):
        """Used during environment construction to populate endogenous state fields."""
        if self._registered_endogenous:
            return
        for entity_name in endogenous:
            self.endogenous[entity_name] = 0
        self._registered_endogenous = True

    def _incorporate_component(self, action_name, n):
        extra_n = (
            1 if self.multi_action_mode else 0
        )  # Each sub-action has a NO-OP in multi action mode)
        self.action[action_name] = 0
        self.action_dim[action_name] = n + extra_n
        self._action_names.append(action_name)
        self._multi_action_dict[action_name] = False
        self._unique_actions += 1
        if self.multi_action_mode:
            self._total_actions += n + extra_n
        else:
            for action_n in range(1, n + 1):
                self._total_actions += 1
                self.single_action_map[int(self._total_actions)] = [
                    action_name,
                    action_n,
                ]

    def register_components(self, components):
        """Used during environment construction to set up state/action spaces."""
        if self._registered_components:
            return
        for component in components:
            n = component.get_n_actions(self.name)
            if n is None:
                continue

            # Most components will have a single action-per-agent, so n is an int
            if isinstance(n, int):
                if n == 0:
                    continue
                self._incorporate_component(component.name, n)

            # They can also internally handle multiple actions-per-agent,
            # so n is an tuple or list
            elif isinstance(n, (tuple, list)):
                for action_sub_name, n_ in n:
                    if n_ == 0:
                        continue
                    if "." in action_sub_name:
                        raise NameError(
                            "Sub-action {} of component {} "
                            "is illegally named.".format(
                                action_sub_name, component.name
                            )
                        )
                    self._incorporate_component(
                        "{}.{}".format(component.name, action_sub_name), n_
                    )

            # If that's not what we got something is funky.
            else:
                raise TypeError(
                    "Received unexpected type ({}) from {}.get_n_actions('{}')".format(
                        type(n), component.name, self.name
                    )
                )

            for k, v in component.get_additional_state_fields(self.name).items():
                self.state[k] = v

        # Currently no actions are available to this agent. Give it a placeholder.
        if len(self.action) == 0 and self.multi_action_mode:
            self._incorporate_component("PassiveAgentPlaceholder", 0)
            self._passive_multi_action_agent = True

        elif len(self.action) == 1 and not self.multi_action_mode:
            self._one_component_single_action = True
            self._premask = np.ones(1 + self._total_actions, dtype=np.float32)

        self._registered_components = True

        self._noop_action_dict = {k: v * 0 for k, v in self.action.items()}

        verbose = False
        if verbose:
            print(self.name, self.idx, "constructed action map:")
            for k, v in self.single_action_map.items():
                print("single action map:", k, v)
            for k, v in self.action.items():
                print("action:", k, v)
            for k, v in self.action_dim.items():
                print("action_dim:", k, v)

    @property
    def action_spaces(self):
        """
        if self.multi_action_mode == True:
            Returns an integer array with length equal to the number of action
            subspaces that the agent registered. The i'th element of the array
            indicates the number of actions associated with the i'th action subspace.
            In multi_action_mode, each subspace includes a NO-OP.
            Note: self._action_names describes which action subspace each element of
            the array refers to.

            Example:
                >> self.multi_action_mode
                True
                >> self.action_spaces
                [2, 5]
                >> self._action_names
                ["Build", "Gather"]
                # [1 Build action + Build NO-OP, 4 Gather actions + Gather NO-OP]

        if self.multi_action_mode == False:
            Returns a single integer equal to the total number of actions that the
            agent can take.

            Example:
                >> self.multi_action_mode
                False
                >> self.action_spaces
                6
                >> self._action_names
                ["Build", "Gather"]
                # 1 NO-OP + 1 Build action + 4 Gather actions.
        """
        if self.multi_action_mode:
            action_dims = []
            for m in self._action_names:
                action_dims.append(np.array(self.action_dim[m]).reshape(-1))
            return np.concatenate(action_dims).astype(np.int32)
        n_actions = 1  # (NO-OP)
        for m in self._action_names:
            n_actions += self.action_dim[m]
        return n_actions

    @property
    def loc(self):
        """2D list of [row, col] representing agent's location in the environment."""
        return self.state["loc"]

    @property
    def endogenous(self):
        """Dictionary representing endogenous quantities (i.e. "Labor").

        Example:
            >> self.endogenous
            {"Labor": 30.25}
        """
        return self.state["endogenous"]

    @property
    def inventory(self):
        """Dictionary representing quantities of resources in agent's inventory.

        Example:
            >> self.inventory
            {"Wood": 3, "Stone": 20, "Coin": 1002.83}
        """
        return self.state["inventory"]

    @property
    def escrow(self):
        """Dictionary representing quantities of resources in agent's escrow.

        https://en.wikipedia.org/wiki/Escrow
        Escrow is used to manage any portion of the agent's inventory that is
        reserved for a particular purpose. Typically, something enters escrow as part
        of a contractual arrangement to disburse that something when another
        condition is met. An example is found in the ContinuousDoubleAuction
        Component class (see ../components/continuous_double_auction.py). When an
        agent creates an order to sell a unit of Wood, for example, the component
        moves one unit of Wood from the agent's inventory to its escrow. If another
        agent buys the Wood, it is moved from escrow to the other agent's inventory. By
        placing the Wood in escrow, it prevents the first agent from using it for
        something else (i.e. building a house).

        Notes:
            The inventory and escrow share the same keys. An agent's endowment refers
            to the total quantity it has in its inventory and escrow.

            Escrow is provided to simplify inventory management but its intended
            semantics are not enforced directly. It is up to Component classes to
            enforce these semantics.

        Example:
            >> self.inventory
            {"Wood": 0, "Stone": 1, "Coin": 3}
        """
        return self.state["escrow"]

    def inventory_to_escrow(self, resource, amount):
        """Move some amount of a resource from agent inventory to agent escrow.

        Amount transferred is capped to the amount of resource in agent inventory.

        Args:
            resource (str): The name of the resource to move (i.e. "Wood", "Coin").
            amount (float): The amount to be moved from inventory to escrow. Must be
                positive.

        Returns:
            Amount of resource actually transferred. Will be less than amount argument
                if amount argument exceeded the amount of resource in the inventory.
                Calculated as:
                    transferred = np.minimum(self.state["inventory"][resource], amount)
        """
        assert amount >= 0
        transferred = float(np.minimum(self.state["inventory"][resource], amount))
        self.state["inventory"][resource] -= transferred
        self.state["escrow"][resource] += transferred
        return float(transferred)

    def escrow_to_inventory(self, resource, amount):
        """Move some amount of a resource from agent escrow to agent inventory.

        Amount transferred is capped to the amount of resource in agent escrow.

        Args:
            resource (str): The name of the resource to move (i.e. "Wood", "Coin").
            amount (float): The amount to be moved from escrow to inventory. Must be
                positive.

        Returns:
            Amount of resource actually transferred. Will be less than amount argument
                if amount argument exceeded the amount of resource in escrow.
                Calculated as:
                    transferred = np.minimum(self.state["escrow"][resource], amount)
        """
        assert amount >= 0
        transferred = float(np.minimum(self.state["escrow"][resource], amount))
        self.state["escrow"][resource] -= transferred
        self.state["inventory"][resource] += transferred
        return float(transferred)

    def total_endowment(self, resource):
        """Get the combined inventory+escrow endowment of resource.

        Args:
            resource (str): Name of the resource

        Returns:
            The amount of resource in the agents inventory and escrow.

        """
        return self.inventory[resource] + self.escrow[resource]

    def reset_actions(self, component=None):
        """Reset all actions to the NO-OP action (the 0'th action index).

        If component is specified, only reset action(s) for that component.
        """
        if not component:
            self.action.update(self._noop_action_dict)
        else:
            for k, v in self.action.items():
                if "." in component:
                    if k.lower() == component.lower():
                        self.action[k] = v * 0
                else:
                    base_component = k.split(".")[0]
                    if base_component.lower() == component.lower():
                        self.action[k] = v * 0

    def has_component(self, component_name):
        """Returns True if the agent has component_name as a registered subaction."""
        return bool(component_name in self.action)

    def get_random_action(self):
        """
        Select a component at random and randomly choose one of its actions (other
        than NO-OP).
        """
        random_component = random.choice(self._action_names)
        component_action = random.choice(
            list(range(1, self.action_dim[random_component]))
        )
        return {random_component: component_action}

    def get_component_action(self, component_name, sub_action_name=None):
        """
        Return the action(s) taken for component_name component, or None if the
        agent does not use that component.
        """
        if sub_action_name is not None:
            return self.action.get(component_name + "." + sub_action_name, None)
        matching_names = [
            m for m in self._action_names if m.split(".")[0] == component_name
        ]
        if len(matching_names) == 0:
            return None
        if len(matching_names) == 1:
            return self.action.get(matching_names[0], None)
        return [self.action.get(m, None) for m in matching_names]

    def set_component_action(self, component_name, action):
        """Set the action(s) taken for component_name component."""
        if component_name not in self.action:
            raise KeyError(
                "Agent {} of type {} does not have {} registered as a subaction".format(
                    self.idx, self.name, component_name
                )
            )
        if self._multi_action_dict[component_name]:
            self.action[component_name] = np.array(action, dtype=np.int32)
        else:
            self.action[component_name] = int(action)

    def populate_random_actions(self):
        """Fill the action buffer with random actions. This is for testing."""
        for component, d in self.action_dim.items():
            if isinstance(d, int):
                self.set_component_action(component, np.random.randint(0, d))
            else:
                d_array = np.array(d)
                self.set_component_action(
                    component, np.floor(np.random.rand(*d_array.shape) * d_array)
                )

    def parse_actions(self, actions):
        """Parse the actions array to fill each component's action buffers."""
        if self.multi_action_mode:
            assert len(actions) == self._unique_actions
            if len(actions) == 1:
                self.set_component_action(self._action_names[0], actions[0])
            else:
                for action_name, action in zip(self._action_names, actions):
                    self.set_component_action(action_name, int(action))

        # Single action mode
        else:
            # Action was supplied as an index of a specific subaction.
            # No need to do any lookup.
            if isinstance(actions, dict):
                if len(actions) == 0:
                    return
                assert len(actions) == 1
                action_name = list(actions.keys())[0]
                action = list(actions.values())[0]
                if action == 0:
                    return
                self.set_component_action(action_name, action)

            # Action was supplied as an index into the full set of combined actions
            else:
                action = int(actions)
                # Universal NO-OP
                if action == 0:
                    return
                action_name, action = self.single_action_map.get(action)
                self.set_component_action(action_name, action)

    def flatten_masks(self, mask_dict):
        """Convert a dictionary of component action masks into a single mask vector."""
        if self._one_component_single_action:
            self._premask[1:] = mask_dict[self._action_names[0]]
            return self._premask

        no_op_mask = [1]

        if self._passive_multi_action_agent:
            return np.array(no_op_mask).astype(np.float32)

        list_of_masks = []
        if not self.multi_action_mode:
            list_of_masks.append(no_op_mask)
        for m in self._action_names:
            if m not in mask_dict:
                raise KeyError("No mask provided for {} (agent {})".format(m, self.idx))
            if self.multi_action_mode:
                list_of_masks.append(no_op_mask)
            list_of_masks.append(mask_dict[m])
        return np.concatenate(list_of_masks).astype(np.float32)


agent_registry = Registry(BaseAgent)
"""The registry for Agent classes.

This creates a registry object for Agent classes. This registry requires that all
added classes are subclasses of BaseAgent. To make an Agent class available through
the registry, decorate the class definition with @agent_registry.add.

Example:
    from ai_economist.foundation.base.base_agent import BaseAgent, agent_registry

    @agent_registry.add
    class ExampleAgent(BaseAgent):
        name = "Example"
        pass

    assert agent_registry.has("Example")

    AgentClass = agent_registry.get("Example")
    agent = AgentClass(...)
    assert isinstance(agent, ExampleAgent)

Notes:
    The foundation package exposes the agent registry as: foundation.agents

    An Agent class that is defined and registered following the above example will
    only be visible in foundation.agents if defined/registered in a file that is
    imported in ../agents/__init__.py.
"""