# Copyright (c) 2020, salesforce.com, inc. # All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # For full license text, see the LICENSE file in the repo root # or https://opensource.org/licenses/BSD-3-Clause import random import numpy as np from ai_economist.foundation.base.registrar import Registry class BaseAgent: """Base class for Agent classes. Instances of Agent classes are created for each agent in the environment. Agent instances are stateful, capturing location, inventory, endogenous variables, and any additional state fields created by environment components during construction (see BaseComponent.get_additional_state_fields in base_component.py). They also provide a simple API for getting/setting actions for each of their registered action subspaces (which depend on the components used to build the environment). Args: idx (int or str): Index that uniquely identifies the agent object amongst the other agent objects registered in its environment. multi_action_mode (bool): Whether to allow the agent to take one action for each of its registered action subspaces each timestep (if True), or to limit the agent to take only one action each timestep (if False). """ name = "" def __init__(self, idx=None, multi_action_mode=None): assert self.name if idx is None: idx = 0 if multi_action_mode is None: multi_action_mode = False if isinstance(idx, str): self._idx = idx else: self._idx = int(idx) self.multi_action_mode = bool(multi_action_mode) self.single_action_map = ( {} ) # Used to convert single-action-mode actions to the general format self.action = dict() self.action_dim = dict() self._action_names = [] self._multi_action_dict = {} self._unique_actions = 0 self._total_actions = 0 self.state = dict(loc=[0, 0], inventory={}, escrow={}, endogenous={}) self._registered_inventory = False self._registered_endogenous = False self._registered_components = False self._noop_action_dict = dict() # Special flag to allow logic for multi-action-mode agents # that are not given any actions. self._passive_multi_action_agent = False # If this gets set to true, we can make masks faster self._one_component_single_action = False self._premask = None @property def idx(self): """Index used to identify this agent. Must be unique within the environment.""" return self._idx def register_inventory(self, resources): """Used during environment construction to populate inventory/escrow fields.""" if self._registered_inventory: return for entity_name in resources: self.inventory[entity_name] = 0 self.escrow[entity_name] = 0 self._registered_inventory = True def register_endogenous(self, endogenous): """Used during environment construction to populate endogenous state fields.""" if self._registered_endogenous: return for entity_name in endogenous: self.endogenous[entity_name] = 0 self._registered_endogenous = True def _incorporate_component(self, action_name, n): extra_n = ( 1 if self.multi_action_mode else 0 ) # Each sub-action has a NO-OP in multi action mode) self.action[action_name] = 0 self.action_dim[action_name] = n + extra_n self._action_names.append(action_name) self._multi_action_dict[action_name] = False self._unique_actions += 1 if self.multi_action_mode: self._total_actions += n + extra_n else: for action_n in range(1, n + 1): self._total_actions += 1 self.single_action_map[int(self._total_actions)] = [ action_name, action_n, ] def register_components(self, components): """Used during environment construction to set up state/action spaces.""" if self._registered_components: return for component in components: n = component.get_n_actions(self.name) if n is None: continue # Most components will have a single action-per-agent, so n is an int if isinstance(n, int): if n == 0: continue self._incorporate_component(component.name, n) # They can also internally handle multiple actions-per-agent, # so n is an tuple or list elif isinstance(n, (tuple, list)): for action_sub_name, n_ in n: if n_ == 0: continue if "." in action_sub_name: raise NameError( "Sub-action {} of component {} " "is illegally named.".format( action_sub_name, component.name ) ) self._incorporate_component( "{}.{}".format(component.name, action_sub_name), n_ ) # If that's not what we got something is funky. else: raise TypeError( "Received unexpected type ({}) from {}.get_n_actions('{}')".format( type(n), component.name, self.name ) ) for k, v in component.get_additional_state_fields(self.name).items(): self.state[k] = v # Currently no actions are available to this agent. Give it a placeholder. if len(self.action) == 0 and self.multi_action_mode: self._incorporate_component("PassiveAgentPlaceholder", 0) self._passive_multi_action_agent = True elif len(self.action) == 1 and not self.multi_action_mode: self._one_component_single_action = True self._premask = np.ones(1 + self._total_actions, dtype=np.float32) self._registered_components = True self._noop_action_dict = {k: v * 0 for k, v in self.action.items()} verbose = False if verbose: print(self.name, self.idx, "constructed action map:") for k, v in self.single_action_map.items(): print("single action map:", k, v) for k, v in self.action.items(): print("action:", k, v) for k, v in self.action_dim.items(): print("action_dim:", k, v) @property def action_spaces(self): """ if self.multi_action_mode == True: Returns an integer array with length equal to the number of action subspaces that the agent registered. The i'th element of the array indicates the number of actions associated with the i'th action subspace. In multi_action_mode, each subspace includes a NO-OP. Note: self._action_names describes which action subspace each element of the array refers to. Example: >> self.multi_action_mode True >> self.action_spaces [2, 5] >> self._action_names ["Build", "Gather"] # [1 Build action + Build NO-OP, 4 Gather actions + Gather NO-OP] if self.multi_action_mode == False: Returns a single integer equal to the total number of actions that the agent can take. Example: >> self.multi_action_mode False >> self.action_spaces 6 >> self._action_names ["Build", "Gather"] # 1 NO-OP + 1 Build action + 4 Gather actions. """ if self.multi_action_mode: action_dims = [] for m in self._action_names: action_dims.append(np.array(self.action_dim[m]).reshape(-1)) return np.concatenate(action_dims).astype(np.int32) n_actions = 1 # (NO-OP) for m in self._action_names: n_actions += self.action_dim[m] return n_actions @property def loc(self): """2D list of [row, col] representing agent's location in the environment.""" return self.state["loc"] @property def endogenous(self): """Dictionary representing endogenous quantities (i.e. "Labor"). Example: >> self.endogenous {"Labor": 30.25} """ return self.state["endogenous"] @property def inventory(self): """Dictionary representing quantities of resources in agent's inventory. Example: >> self.inventory {"Wood": 3, "Stone": 20, "Coin": 1002.83} """ return self.state["inventory"] @property def escrow(self): """Dictionary representing quantities of resources in agent's escrow. https://en.wikipedia.org/wiki/Escrow Escrow is used to manage any portion of the agent's inventory that is reserved for a particular purpose. Typically, something enters escrow as part of a contractual arrangement to disburse that something when another condition is met. An example is found in the ContinuousDoubleAuction Component class (see ../components/continuous_double_auction.py). When an agent creates an order to sell a unit of Wood, for example, the component moves one unit of Wood from the agent's inventory to its escrow. If another agent buys the Wood, it is moved from escrow to the other agent's inventory. By placing the Wood in escrow, it prevents the first agent from using it for something else (i.e. building a house). Notes: The inventory and escrow share the same keys. An agent's endowment refers to the total quantity it has in its inventory and escrow. Escrow is provided to simplify inventory management but its intended semantics are not enforced directly. It is up to Component classes to enforce these semantics. Example: >> self.inventory {"Wood": 0, "Stone": 1, "Coin": 3} """ return self.state["escrow"] def inventory_to_escrow(self, resource, amount): """Move some amount of a resource from agent inventory to agent escrow. Amount transferred is capped to the amount of resource in agent inventory. Args: resource (str): The name of the resource to move (i.e. "Wood", "Coin"). amount (float): The amount to be moved from inventory to escrow. Must be positive. Returns: Amount of resource actually transferred. Will be less than amount argument if amount argument exceeded the amount of resource in the inventory. Calculated as: transferred = np.minimum(self.state["inventory"][resource], amount) """ assert amount >= 0 transferred = float(np.minimum(self.state["inventory"][resource], amount)) self.state["inventory"][resource] -= transferred self.state["escrow"][resource] += transferred return float(transferred) def escrow_to_inventory(self, resource, amount): """Move some amount of a resource from agent escrow to agent inventory. Amount transferred is capped to the amount of resource in agent escrow. Args: resource (str): The name of the resource to move (i.e. "Wood", "Coin"). amount (float): The amount to be moved from escrow to inventory. Must be positive. Returns: Amount of resource actually transferred. Will be less than amount argument if amount argument exceeded the amount of resource in escrow. Calculated as: transferred = np.minimum(self.state["escrow"][resource], amount) """ assert amount >= 0 transferred = float(np.minimum(self.state["escrow"][resource], amount)) self.state["escrow"][resource] -= transferred self.state["inventory"][resource] += transferred return float(transferred) def total_endowment(self, resource): """Get the combined inventory+escrow endowment of resource. Args: resource (str): Name of the resource Returns: The amount of resource in the agents inventory and escrow. """ return self.inventory[resource] + self.escrow[resource] def reset_actions(self, component=None): """Reset all actions to the NO-OP action (the 0'th action index). If component is specified, only reset action(s) for that component. """ if not component: self.action.update(self._noop_action_dict) else: for k, v in self.action.items(): if "." in component: if k.lower() == component.lower(): self.action[k] = v * 0 else: base_component = k.split(".")[0] if base_component.lower() == component.lower(): self.action[k] = v * 0 def has_component(self, component_name): """Returns True if the agent has component_name as a registered subaction.""" return bool(component_name in self.action) def get_random_action(self): """ Select a component at random and randomly choose one of its actions (other than NO-OP). """ random_component = random.choice(self._action_names) component_action = random.choice( list(range(1, self.action_dim[random_component])) ) return {random_component: component_action} def get_component_action(self, component_name, sub_action_name=None): """ Return the action(s) taken for component_name component, or None if the agent does not use that component. """ if sub_action_name is not None: return self.action.get(component_name + "." + sub_action_name, None) matching_names = [ m for m in self._action_names if m.split(".")[0] == component_name ] if len(matching_names) == 0: return None if len(matching_names) == 1: return self.action.get(matching_names[0], None) return [self.action.get(m, None) for m in matching_names] def set_component_action(self, component_name, action): """Set the action(s) taken for component_name component.""" if component_name not in self.action: raise KeyError( "Agent {} of type {} does not have {} registered as a subaction".format( self.idx, self.name, component_name ) ) if self._multi_action_dict[component_name]: self.action[component_name] = np.array(action, dtype=np.int32) else: self.action[component_name] = int(action) def populate_random_actions(self): """Fill the action buffer with random actions. This is for testing.""" for component, d in self.action_dim.items(): if isinstance(d, int): self.set_component_action(component, np.random.randint(0, d)) else: d_array = np.array(d) self.set_component_action( component, np.floor(np.random.rand(*d_array.shape) * d_array) ) def parse_actions(self, actions): """Parse the actions array to fill each component's action buffers.""" if self.multi_action_mode: assert len(actions) == self._unique_actions if len(actions) == 1: self.set_component_action(self._action_names[0], actions[0]) else: for action_name, action in zip(self._action_names, actions): self.set_component_action(action_name, int(action)) # Single action mode else: # Action was supplied as an index of a specific subaction. # No need to do any lookup. if isinstance(actions, dict): if len(actions) == 0: return assert len(actions) == 1 action_name = list(actions.keys())[0] action = list(actions.values())[0] if action == 0: return self.set_component_action(action_name, action) # Action was supplied as an index into the full set of combined actions else: action = int(actions) # Universal NO-OP if action == 0: return action_name, action = self.single_action_map.get(action) self.set_component_action(action_name, action) def flatten_masks(self, mask_dict): """Convert a dictionary of component action masks into a single mask vector.""" if self._one_component_single_action: self._premask[1:] = mask_dict[self._action_names[0]] return self._premask no_op_mask = [1] if self._passive_multi_action_agent: return np.array(no_op_mask).astype(np.float32) list_of_masks = [] if not self.multi_action_mode: list_of_masks.append(no_op_mask) for m in self._action_names: if m not in mask_dict: raise KeyError("No mask provided for {} (agent {})".format(m, self.idx)) if self.multi_action_mode: list_of_masks.append(no_op_mask) list_of_masks.append(mask_dict[m]) return np.concatenate(list_of_masks).astype(np.float32) agent_registry = Registry(BaseAgent) """The registry for Agent classes. This creates a registry object for Agent classes. This registry requires that all added classes are subclasses of BaseAgent. To make an Agent class available through the registry, decorate the class definition with @agent_registry.add. Example: from ai_economist.foundation.base.base_agent import BaseAgent, agent_registry @agent_registry.add class ExampleAgent(BaseAgent): name = "Example" pass assert agent_registry.has("Example") AgentClass = agent_registry.get("Example") agent = AgentClass(...) assert isinstance(agent, ExampleAgent) Notes: The foundation package exposes the agent registry as: foundation.agents An Agent class that is defined and registered following the above example will only be visible in foundation.agents if defined/registered in a file that is imported in ../agents/__init__.py. """