Files
ai-econ/ai_economist/foundation/base/base_agent.py

494 lines
19 KiB
Python

# Copyright (c) 2020, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root
# or https://opensource.org/licenses/BSD-3-Clause
import random
import numpy as np
from ai_economist.foundation.base.registrar import Registry
class BaseAgent:
"""Base class for Agent classes.
Instances of Agent classes are created for each agent in the environment. Agent
instances are stateful, capturing location, inventory, endogenous variables,
and any additional state fields created by environment components during
construction (see BaseComponent.get_additional_state_fields in base_component.py).
They also provide a simple API for getting/setting actions for each of their
registered action subspaces (which depend on the components used to build
the environment).
Args:
idx (int or str): Index that uniquely identifies the agent object amongst the
other agent objects registered in its environment.
multi_action_mode (bool): Whether to allow the agent to take one action for
each of its registered action subspaces each timestep (if True),
or to limit the agent to take only one action each timestep (if False).
"""
name = ""
def __init__(self, idx=None, multi_action_mode=None):
assert self.name
if idx is None:
idx = 0
if multi_action_mode is None:
multi_action_mode = False
if isinstance(idx, str):
self._idx = idx
else:
self._idx = int(idx)
self.multi_action_mode = bool(multi_action_mode)
self.single_action_map = (
{}
) # Used to convert single-action-mode actions to the general format
self.action = dict()
self.action_dim = dict()
self._action_names = []
self._multi_action_dict = {}
self._unique_actions = 0
self._total_actions = 0
self.state = dict(loc=[0, 0], inventory={}, escrow={}, endogenous={})
self._registered_inventory = False
self._registered_endogenous = False
self._registered_components = False
self._noop_action_dict = dict()
# Special flag to allow logic for multi-action-mode agents
# that are not given any actions.
self._passive_multi_action_agent = False
# If this gets set to true, we can make masks faster
self._one_component_single_action = False
self._premask = None
@property
def idx(self):
"""Index used to identify this agent. Must be unique within the environment."""
return self._idx
def register_inventory(self, resources):
"""Used during environment construction to populate inventory/escrow fields."""
if self._registered_inventory:
return
for entity_name in resources:
self.inventory[entity_name] = 0
self.escrow[entity_name] = 0
self._registered_inventory = True
def register_endogenous(self, endogenous):
"""Used during environment construction to populate endogenous state fields."""
if self._registered_endogenous:
return
for entity_name in endogenous:
self.endogenous[entity_name] = 0
self._registered_endogenous = True
def _incorporate_component(self, action_name, n):
extra_n = (
1 if self.multi_action_mode else 0
) # Each sub-action has a NO-OP in multi action mode)
self.action[action_name] = 0
self.action_dim[action_name] = n + extra_n
self._action_names.append(action_name)
self._multi_action_dict[action_name] = False
self._unique_actions += 1
if self.multi_action_mode:
self._total_actions += n + extra_n
else:
for action_n in range(1, n + 1):
self._total_actions += 1
self.single_action_map[int(self._total_actions)] = [
action_name,
action_n,
]
def register_components(self, components):
"""Used during environment construction to set up state/action spaces."""
if self._registered_components:
return
for component in components:
n = component.get_n_actions(self.name)
if n is None:
continue
# Most components will have a single action-per-agent, so n is an int
if isinstance(n, int):
if n == 0:
continue
self._incorporate_component(component.name, n)
# They can also internally handle multiple actions-per-agent,
# so n is an tuple or list
elif isinstance(n, (tuple, list)):
for action_sub_name, n_ in n:
if n_ == 0:
continue
if "." in action_sub_name:
raise NameError(
"Sub-action {} of component {} "
"is illegally named.".format(
action_sub_name, component.name
)
)
self._incorporate_component(
"{}.{}".format(component.name, action_sub_name), n_
)
# If that's not what we got something is funky.
else:
raise TypeError(
"Received unexpected type ({}) from {}.get_n_actions('{}')".format(
type(n), component.name, self.name
)
)
for k, v in component.get_additional_state_fields(self.name).items():
self.state[k] = v
# Currently no actions are available to this agent. Give it a placeholder.
if len(self.action) == 0 and self.multi_action_mode:
self._incorporate_component("PassiveAgentPlaceholder", 0)
self._passive_multi_action_agent = True
elif len(self.action) == 1 and not self.multi_action_mode:
self._one_component_single_action = True
self._premask = np.ones(1 + self._total_actions, dtype=np.float32)
self._registered_components = True
self._noop_action_dict = {k: v * 0 for k, v in self.action.items()}
verbose = False
if verbose:
print(self.name, self.idx, "constructed action map:")
for k, v in self.single_action_map.items():
print("single action map:", k, v)
for k, v in self.action.items():
print("action:", k, v)
for k, v in self.action_dim.items():
print("action_dim:", k, v)
@property
def action_spaces(self):
"""
if self.multi_action_mode == True:
Returns an integer array with length equal to the number of action
subspaces that the agent registered. The i'th element of the array
indicates the number of actions associated with the i'th action subspace.
In multi_action_mode, each subspace includes a NO-OP.
Note: self._action_names describes which action subspace each element of
the array refers to.
Example:
>> self.multi_action_mode
True
>> self.action_spaces
[2, 5]
>> self._action_names
["Build", "Gather"]
# [1 Build action + Build NO-OP, 4 Gather actions + Gather NO-OP]
if self.multi_action_mode == False:
Returns a single integer equal to the total number of actions that the
agent can take.
Example:
>> self.multi_action_mode
False
>> self.action_spaces
6
>> self._action_names
["Build", "Gather"]
# 1 NO-OP + 1 Build action + 4 Gather actions.
"""
if self.multi_action_mode:
action_dims = []
for m in self._action_names:
action_dims.append(np.array(self.action_dim[m]).reshape(-1))
return np.concatenate(action_dims).astype(np.int32)
n_actions = 1 # (NO-OP)
for m in self._action_names:
n_actions += self.action_dim[m]
return n_actions
@property
def loc(self):
"""2D list of [row, col] representing agent's location in the environment."""
return self.state["loc"]
@property
def endogenous(self):
"""Dictionary representing endogenous quantities (i.e. "Labor").
Example:
>> self.endogenous
{"Labor": 30.25}
"""
return self.state["endogenous"]
@property
def inventory(self):
"""Dictionary representing quantities of resources in agent's inventory.
Example:
>> self.inventory
{"Wood": 3, "Stone": 20, "Coin": 1002.83}
"""
return self.state["inventory"]
@property
def escrow(self):
"""Dictionary representing quantities of resources in agent's escrow.
https://en.wikipedia.org/wiki/Escrow
Escrow is used to manage any portion of the agent's inventory that is
reserved for a particular purpose. Typically, something enters escrow as part
of a contractual arrangement to disburse that something when another
condition is met. An example is found in the ContinuousDoubleAuction
Component class (see ../components/continuous_double_auction.py). When an
agent creates an order to sell a unit of Wood, for example, the component
moves one unit of Wood from the agent's inventory to its escrow. If another
agent buys the Wood, it is moved from escrow to the other agent's inventory. By
placing the Wood in escrow, it prevents the first agent from using it for
something else (i.e. building a house).
Notes:
The inventory and escrow share the same keys. An agent's endowment refers
to the total quantity it has in its inventory and escrow.
Escrow is provided to simplify inventory management but its intended
semantics are not enforced directly. It is up to Component classes to
enforce these semantics.
Example:
>> self.inventory
{"Wood": 0, "Stone": 1, "Coin": 3}
"""
return self.state["escrow"]
def inventory_to_escrow(self, resource, amount):
"""Move some amount of a resource from agent inventory to agent escrow.
Amount transferred is capped to the amount of resource in agent inventory.
Args:
resource (str): The name of the resource to move (i.e. "Wood", "Coin").
amount (float): The amount to be moved from inventory to escrow. Must be
positive.
Returns:
Amount of resource actually transferred. Will be less than amount argument
if amount argument exceeded the amount of resource in the inventory.
Calculated as:
transferred = np.minimum(self.state["inventory"][resource], amount)
"""
assert amount >= 0
transferred = float(np.minimum(self.state["inventory"][resource], amount))
self.state["inventory"][resource] -= transferred
self.state["escrow"][resource] += transferred
return float(transferred)
def escrow_to_inventory(self, resource, amount):
"""Move some amount of a resource from agent escrow to agent inventory.
Amount transferred is capped to the amount of resource in agent escrow.
Args:
resource (str): The name of the resource to move (i.e. "Wood", "Coin").
amount (float): The amount to be moved from escrow to inventory. Must be
positive.
Returns:
Amount of resource actually transferred. Will be less than amount argument
if amount argument exceeded the amount of resource in escrow.
Calculated as:
transferred = np.minimum(self.state["escrow"][resource], amount)
"""
assert amount >= 0
transferred = float(np.minimum(self.state["escrow"][resource], amount))
self.state["escrow"][resource] -= transferred
self.state["inventory"][resource] += transferred
return float(transferred)
def total_endowment(self, resource):
"""Get the combined inventory+escrow endowment of resource.
Args:
resource (str): Name of the resource
Returns:
The amount of resource in the agents inventory and escrow.
"""
return self.inventory[resource] + self.escrow[resource]
def reset_actions(self, component=None):
"""Reset all actions to the NO-OP action (the 0'th action index).
If component is specified, only reset action(s) for that component.
"""
if not component:
self.action.update(self._noop_action_dict)
else:
for k, v in self.action.items():
if "." in component:
if k.lower() == component.lower():
self.action[k] = v * 0
else:
base_component = k.split(".")[0]
if base_component.lower() == component.lower():
self.action[k] = v * 0
def has_component(self, component_name):
"""Returns True if the agent has component_name as a registered subaction."""
return bool(component_name in self.action)
def get_random_action(self):
"""
Select a component at random and randomly choose one of its actions (other
than NO-OP).
"""
random_component = random.choice(self._action_names)
component_action = random.choice(
list(range(1, self.action_dim[random_component]))
)
return {random_component: component_action}
def get_component_action(self, component_name, sub_action_name=None):
"""
Return the action(s) taken for component_name component, or None if the
agent does not use that component.
"""
if sub_action_name is not None:
return self.action.get(component_name + "." + sub_action_name, None)
matching_names = [
m for m in self._action_names if m.split(".")[0] == component_name
]
if len(matching_names) == 0:
return None
if len(matching_names) == 1:
return self.action.get(matching_names[0], None)
return [self.action.get(m, None) for m in matching_names]
def set_component_action(self, component_name, action):
"""Set the action(s) taken for component_name component."""
if component_name not in self.action:
raise KeyError(
"Agent {} of type {} does not have {} registered as a subaction".format(
self.idx, self.name, component_name
)
)
if self._multi_action_dict[component_name]:
self.action[component_name] = np.array(action, dtype=np.int32)
else:
self.action[component_name] = int(action)
def populate_random_actions(self):
"""Fill the action buffer with random actions. This is for testing."""
for component, d in self.action_dim.items():
if isinstance(d, int):
self.set_component_action(component, np.random.randint(0, d))
else:
d_array = np.array(d)
self.set_component_action(
component, np.floor(np.random.rand(*d_array.shape) * d_array)
)
def parse_actions(self, actions):
"""Parse the actions array to fill each component's action buffers."""
if self.multi_action_mode:
assert len(actions) == self._unique_actions
if len(actions) == 1:
self.set_component_action(self._action_names[0], actions[0])
else:
for action_name, action in zip(self._action_names, actions):
self.set_component_action(action_name, int(action))
# Single action mode
else:
# Action was supplied as an index of a specific subaction.
# No need to do any lookup.
if isinstance(actions, dict):
if len(actions) == 0:
return
assert len(actions) == 1
action_name = list(actions.keys())[0]
action = list(actions.values())[0]
if action == 0:
return
self.set_component_action(action_name, action)
# Action was supplied as an index into the full set of combined actions
else:
action = int(actions)
# Universal NO-OP
if action == 0:
return
action_name, action = self.single_action_map.get(action)
self.set_component_action(action_name, action)
def flatten_masks(self, mask_dict):
"""Convert a dictionary of component action masks into a single mask vector."""
if self._one_component_single_action:
self._premask[1:] = mask_dict[self._action_names[0]]
return self._premask
no_op_mask = [1]
if self._passive_multi_action_agent:
return np.array(no_op_mask).astype(np.float32)
list_of_masks = []
if not self.multi_action_mode:
list_of_masks.append(no_op_mask)
for m in self._action_names:
if m not in mask_dict:
raise KeyError("No mask provided for {} (agent {})".format(m, self.idx))
if self.multi_action_mode:
list_of_masks.append(no_op_mask)
list_of_masks.append(mask_dict[m])
return np.concatenate(list_of_masks).astype(np.float32)
agent_registry = Registry(BaseAgent)
"""The registry for Agent classes.
This creates a registry object for Agent classes. This registry requires that all
added classes are subclasses of BaseAgent. To make an Agent class available through
the registry, decorate the class definition with @agent_registry.add.
Example:
from ai_economist.foundation.base.base_agent import BaseAgent, agent_registry
@agent_registry.add
class ExampleAgent(BaseAgent):
name = "Example"
pass
assert agent_registry.has("Example")
AgentClass = agent_registry.get("Example")
agent = AgentClass(...)
assert isinstance(agent, ExampleAgent)
Notes:
The foundation package exposes the agent registry as: foundation.agents
An Agent class that is defined and registered following the above example will
only be visible in foundation.agents if defined/registered in a file that is
imported in ../agents/__init__.py.
"""