crafting done ?
This commit is contained in:
@@ -38,7 +38,7 @@ class BaseAgent:
|
||||
|
||||
if idx is None:
|
||||
idx = 0
|
||||
|
||||
|
||||
if multi_action_mode is None:
|
||||
multi_action_mode = False
|
||||
|
||||
@@ -64,6 +64,7 @@ class BaseAgent:
|
||||
self._registered_inventory = False
|
||||
self._registered_endogenous = False
|
||||
self._registered_components = False
|
||||
self._setup = False # agent setup not completed
|
||||
self._noop_action_dict = dict()
|
||||
|
||||
# Special flag to allow logic for multi-action-mode agents
|
||||
@@ -78,7 +79,13 @@ class BaseAgent:
|
||||
def idx(self):
|
||||
"""Index used to identify this agent. Must be unique within the environment."""
|
||||
return self._idx
|
||||
|
||||
@property
|
||||
def is_setup(self):
|
||||
return self._setup
|
||||
|
||||
def set_setup(self, set):
|
||||
self._setup=set
|
||||
|
||||
def register_inventory(self, resources):
|
||||
"""Used during environment construction to populate inventory/escrow fields."""
|
||||
if self._registered_inventory:
|
||||
|
||||
@@ -342,16 +342,11 @@ class BaseEnvironment(ABC):
|
||||
self._components_dict[component_object.name] = component_object
|
||||
self._shorthand_lookup[component_object.shorthand] = component_object
|
||||
|
||||
# Register the components with the agents
|
||||
# to finish setting up their state/action spaces.
|
||||
for agent in self.world.agents:
|
||||
agent.register_inventory(self.resources)
|
||||
agent.register_endogenous(self.endogenous)
|
||||
agent.register_components(self._components)
|
||||
|
||||
self.world.planner.register_inventory(self.resources)
|
||||
self.world.planner.register_components(self._components)
|
||||
|
||||
self._agent_lookup = {str(agent.idx): agent for agent in self.all_agents}
|
||||
self.apply_scenario_config_to_agents()
|
||||
|
||||
|
||||
self._completions = 0
|
||||
|
||||
@@ -370,6 +365,16 @@ class BaseEnvironment(ABC):
|
||||
# To collate all the agents ('0', '1', ...) data during reset and step
|
||||
# into a single agent with index 'a'
|
||||
self.collate_agent_step_and_reset_data = collate_agent_step_and_reset_data
|
||||
|
||||
def apply_scenario_config_to_agents(self):
|
||||
# Register the components with the agents
|
||||
# to finish setting up their state/action spaces.
|
||||
for agent in self.world.agents:
|
||||
agent.register_inventory(self.resources)
|
||||
agent.register_endogenous(self.endogenous)
|
||||
agent.register_components(self._components)
|
||||
self._agent_lookup = {str(agent.idx): agent for agent in self.all_agents}
|
||||
self.world.apply_agent_db_to_world()
|
||||
|
||||
def _register_entities(self, entities):
|
||||
for entity in entities:
|
||||
@@ -920,6 +925,7 @@ class BaseEnvironment(ABC):
|
||||
# Reset actions to that default.
|
||||
for agent in self.all_agents:
|
||||
agent.reset_actions()
|
||||
agent.set_setup(True)
|
||||
|
||||
# Produce observations
|
||||
obs = self._generate_observations(
|
||||
|
||||
@@ -382,7 +382,7 @@ class World:
|
||||
self._agent_class_idx_map={}
|
||||
#create agents
|
||||
self.create_agents(agent_composition)
|
||||
|
||||
|
||||
self.maps = Maps(world_size, self.n_agents, world_resources, world_landmarks)
|
||||
|
||||
planner_class = agent_registry.get("BasicPlanner")
|
||||
@@ -410,7 +410,7 @@ class World:
|
||||
self._agent_class_idx_map[k].append(str(self.n_agents))
|
||||
self.n_agents+=1
|
||||
|
||||
def apply_agent_db(self):
|
||||
def apply_agent_db_to_world(self):
|
||||
"""Applys current agent db into lookup maps inside world and map itself. Enables insertion of new agents into existing env."""
|
||||
self.n_agents=len(self._agents)
|
||||
self._agent_class_idx_map={}
|
||||
|
||||
@@ -10,7 +10,7 @@ from ai_economist.foundation.base.base_component import (
|
||||
BaseComponent,
|
||||
component_registry,
|
||||
)
|
||||
from ai_economist.foundation.entities.resources import resource_registry
|
||||
from ai_economist.foundation.entities.resources import Resource, resource_registry
|
||||
|
||||
|
||||
@component_registry.add
|
||||
@@ -47,7 +47,7 @@ class Craft(BaseComponent):
|
||||
skill_dist="none",
|
||||
**base_component_kwargs
|
||||
):
|
||||
#append commodities
|
||||
#setup commodities
|
||||
for v in commodities:
|
||||
res_class=resource_registry.get(v)
|
||||
res=res_class()
|
||||
@@ -74,10 +74,10 @@ class Craft(BaseComponent):
|
||||
self.builds = []
|
||||
super().__init__(*base_component_args, **base_component_kwargs)
|
||||
|
||||
def agent_can_build(self, agent):
|
||||
def agent_can_build(self, agent, recipe):
|
||||
"""Return True if agent can actually build in its current location."""
|
||||
# See if the agent has the resources necessary to complete the action
|
||||
for resource, cost in self.resource_cost.items():
|
||||
for resource, cost in recipe.items():
|
||||
if agent.state["inventory"][resource] < cost:
|
||||
return False
|
||||
return True
|
||||
@@ -93,7 +93,7 @@ class Craft(BaseComponent):
|
||||
"""
|
||||
# This component adds 1 action that mobile agents can take: build a house
|
||||
if agent_cls_name in self.agent_subclasses:
|
||||
return 1
|
||||
return len(self.commodities)
|
||||
|
||||
return None
|
||||
|
||||
@@ -106,7 +106,7 @@ class Craft(BaseComponent):
|
||||
if agent_cls_name not in self.agent_subclasses:
|
||||
return {}
|
||||
if agent_cls_name == "BasicMobileAgent":
|
||||
return {"build_payment": float(self.payment), "build_skill": 1}
|
||||
return {}
|
||||
raise NotImplementedError
|
||||
|
||||
def component_step(self):
|
||||
@@ -131,29 +131,32 @@ class Craft(BaseComponent):
|
||||
pass
|
||||
|
||||
# Build! (If you can.)
|
||||
elif action == 1:
|
||||
if self.agent_can_build(agent):
|
||||
else:
|
||||
comm=self.commodities[action]
|
||||
|
||||
if self.agent_can_build(agent,comm.craft_recp):
|
||||
# Remove the resources
|
||||
for resource, cost in self.resource_cost.items():
|
||||
for resource, cost in comm.craft_recp.items():
|
||||
agent.state["inventory"][resource] -= cost
|
||||
|
||||
# Receive payment for the house
|
||||
agent.state["inventory"]["Coin"] += agent.state["build_payment"]
|
||||
# Receive crafted commodity
|
||||
agent.state["inventory"][comm.name] += agent.state["craft_amount"][comm.name]
|
||||
|
||||
# Incur the labor cost for building
|
||||
agent.state["endogenous"]["Labor"] += self.build_labor
|
||||
agent.state["endogenous"]["Labor"] += agent.state["craft_labour"][comm.name]
|
||||
|
||||
build.append(
|
||||
{
|
||||
"builder": agent.idx,
|
||||
"build_skill": self.sampled_skills[agent.idx],
|
||||
"income": float(agent.state["build_payment"]),
|
||||
"crafter": agent.idx,
|
||||
"craft_commodity": comm.name,
|
||||
"craft_skill": agent.state["craft_skill"][comm.name],
|
||||
"craft_amount": agent.state["craft_amount"][comm.name],
|
||||
"craft_labour": agent.state["craft_labour"][comm.name]
|
||||
}
|
||||
)
|
||||
else:
|
||||
agent.bad_action=True
|
||||
else:
|
||||
raise ValueError
|
||||
|
||||
|
||||
self.builds.append(build)
|
||||
|
||||
@@ -168,10 +171,10 @@ class Craft(BaseComponent):
|
||||
obs_dict = dict()
|
||||
for agent in self.world.agents:
|
||||
if agent.name in self.agent_subclasses:
|
||||
obs_dict[agent.idx] = {
|
||||
"build_payment": agent.state["build_payment"] / self.payment,
|
||||
"build_skill": self.sampled_skills[agent.idx],
|
||||
}
|
||||
obs_dict[agent.idx]["craft_skill"]={}
|
||||
for k in self.commodities:
|
||||
obs_dict[agent.idx]["craft_skill"][k.name] = agent.state["craft_skill"][k.name]
|
||||
|
||||
|
||||
return obs_dict
|
||||
|
||||
@@ -186,7 +189,8 @@ class Craft(BaseComponent):
|
||||
# Mobile agents' build action is masked if they cannot build with their
|
||||
# current location and/or endowment
|
||||
for agent in self.world.agents:
|
||||
masks[agent.idx] = np.array([self.agent_can_build(agent)])
|
||||
if agent.name in self.agent_subclasses:
|
||||
masks[agent.idx] = np.array([self.agent_can_build(agent,k.name) for k in self.commodities])
|
||||
|
||||
return masks
|
||||
|
||||
@@ -227,27 +231,35 @@ class Craft(BaseComponent):
|
||||
"""
|
||||
world = self.world
|
||||
|
||||
self.sampled_skills = {agent.idx: 1 for agent in world.agents}
|
||||
|
||||
PMSM = self.payment_max_skill_multiplier
|
||||
MSAB= self.max_skill_amount_benefit
|
||||
MSLB= self.max_skill_labour_benefit
|
||||
|
||||
|
||||
|
||||
for agent in world.agents:
|
||||
if self.skill_dist == "none":
|
||||
sampled_skill = 1
|
||||
pay_rate = 1
|
||||
elif self.skill_dist == "pareto":
|
||||
sampled_skill = np.random.pareto(4)
|
||||
pay_rate = np.minimum(PMSM, (PMSM - 1) * sampled_skill + 1)
|
||||
elif self.skill_dist == "lognormal":
|
||||
sampled_skill = np.random.lognormal(-1, 0.5)
|
||||
pay_rate = np.minimum(PMSM, (PMSM - 1) * sampled_skill + 1)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
if agent.name not in self.agent_subclasses | agent.is_setup():
|
||||
continue
|
||||
agent.state["craft_skill"]={}
|
||||
agent.state["craft_labour"]={}
|
||||
agent.state["craft_amount"]={}
|
||||
|
||||
agent.state["build_payment"] = float(pay_rate * self.payment)
|
||||
agent.state["build_skill"] = float(sampled_skill)
|
||||
for comm in self.commodities:
|
||||
if self.skill_dist == "none":
|
||||
sampled_skill = 1
|
||||
amount= 1
|
||||
labour = 1
|
||||
elif self.skill_dist == "pareto":
|
||||
labour = 1
|
||||
sampled_skill = np.random.pareto(2)
|
||||
amount = np.minimum(MSAB, MSAB * sampled_skill)
|
||||
labour_modifier = 1 - np.minimum(1 - MSLB, (1 - MSLB) * sampled_skill)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
agent.state["craft_skill"][comm.name]=sampled_skill
|
||||
agent.state["craft_labour"][comm.name]=comm.craft_labour_base*labour_modifier
|
||||
agent.state["craft_amount"][comm.name]=amount
|
||||
|
||||
self.sampled_skills[agent.idx] = sampled_skill
|
||||
|
||||
self.builds = []
|
||||
|
||||
|
||||
474
envs/econ.py
Normal file
474
envs/econ.py
Normal file
@@ -0,0 +1,474 @@
|
||||
# Copyright (c) 2020, salesforce.com, inc.
|
||||
# All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
# For full license text, see the LICENSE file in the repo root
|
||||
# or https://opensource.org/licenses/BSD-3-Clause
|
||||
|
||||
from copy import deepcopy
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from scipy import signal
|
||||
|
||||
from ai_economist.foundation.base.base_env import BaseEnvironment, scenario_registry
|
||||
from ai_economist.foundation.scenarios.utils import rewards, social_metrics
|
||||
import yaml
|
||||
|
||||
|
||||
@scenario_registry.add
|
||||
class Econ(BaseEnvironment):
|
||||
"""
|
||||
World containing stone and wood with stochastic regeneration. Refers to a fixed
|
||||
layout file (see ./map_txt/ for examples) to determine the spatial arrangement of
|
||||
stone, wood, and water tiles.
|
||||
|
||||
Args:
|
||||
planner_gets_spatial_obs (bool): Whether the planner agent receives spatial
|
||||
observations from the world.
|
||||
full_observability (bool): Whether the mobile agents' spatial observation
|
||||
includes the full world view or is instead an egocentric view.
|
||||
mobile_agent_observation_range (int): If not using full_observability,
|
||||
the spatial range (on each side of the agent) that is visible in the
|
||||
spatial observations.
|
||||
env_layout_file (str): Name of the layout file in ./map_txt/ to use.
|
||||
Note: The world dimensions of that layout must match the world dimensions
|
||||
argument used to construct the environment.
|
||||
resource_regen_prob (float): Probability that an empty source tile will
|
||||
regenerate a new resource unit.
|
||||
fixed_four_skill_and_loc (bool): Whether to use a fixed set of build skills and
|
||||
starting locations, with agents grouped into starting locations based on
|
||||
which skill quartile they are in. False, by default.
|
||||
True, for experiments in https://arxiv.org/abs/2004.13332.
|
||||
Note: Requires that the environment uses the "Build" component with
|
||||
skill_dist="pareto".
|
||||
starting_agent_coin (int, float): Amount of coin agents have at t=0. Defaults
|
||||
to zero coin.
|
||||
isoelastic_eta (float): Parameter controlling the shape of agent utility
|
||||
wrt coin endowment.
|
||||
energy_cost (float): Coefficient for converting labor to negative utility.
|
||||
energy_warmup_constant (float): Decay constant that controls the rate at which
|
||||
the effective energy cost is annealed from 0 to energy_cost. Set to 0
|
||||
(default) to disable annealing, meaning that the effective energy cost is
|
||||
always energy_cost. The units of the decay constant depend on the choice of
|
||||
energy_warmup_method.
|
||||
energy_warmup_method (str): How to schedule energy annealing (warmup). If
|
||||
"decay" (default), use the number of completed episodes. If "auto",
|
||||
use the number of timesteps where the average agent reward was positive.
|
||||
planner_reward_type (str): The type of reward used for the planner. Options
|
||||
are "coin_eq_times_productivity" (default),
|
||||
"inv_income_weighted_coin_endowment", and "inv_income_weighted_utility".
|
||||
mixing_weight_gini_vs_coin (float): Degree to which equality is ignored w/
|
||||
"coin_eq_times_productivity". Default is 0, which weights equality and
|
||||
productivity equally. If set to 1, only productivity is rewarded.
|
||||
"""
|
||||
|
||||
name = "econ"
|
||||
agent_subclasses = ["BasicMobileAgent"]
|
||||
required_entities = ["Wood", "Stone", "Water"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*base_env_args,
|
||||
resource_regen_prob=0.01,
|
||||
fixed_four_skill_and_loc=False,
|
||||
starting_agent_coin=0,
|
||||
isoelastic_eta=0.23,
|
||||
energy_cost=0.21,
|
||||
energy_warmup_constant=0,
|
||||
energy_warmup_method="decay",
|
||||
planner_reward_type="coin_eq_times_productivity",
|
||||
mixing_weight_gini_vs_coin=0.0,
|
||||
**base_env_kwargs,
|
||||
):
|
||||
super().__init__(*base_env_args, **base_env_kwargs)
|
||||
|
||||
|
||||
self.layout_specs = dict(
|
||||
Wood={
|
||||
"regen_weight": float(resource_regen_prob),
|
||||
"regen_halfwidth": 0,
|
||||
"max_health": 1,
|
||||
},
|
||||
Stone={
|
||||
"regen_weight": float(resource_regen_prob),
|
||||
"regen_halfwidth": 0,
|
||||
"max_health": 1,
|
||||
},
|
||||
)
|
||||
assert 0 <= self.layout_specs["Wood"]["regen_weight"] <= 1
|
||||
assert 0 <= self.layout_specs["Stone"]["regen_weight"] <= 1
|
||||
|
||||
# How much coin do agents begin with at upon reset
|
||||
self.starting_agent_coin = float(starting_agent_coin)
|
||||
assert self.starting_agent_coin >= 0.0
|
||||
|
||||
# Controls the diminishing marginal utility of coin.
|
||||
# isoelastic_eta=0 means no diminishing utility.
|
||||
self.isoelastic_eta = float(isoelastic_eta)
|
||||
assert 0.0 <= self.isoelastic_eta <= 1.0
|
||||
|
||||
# The amount that labor is weighted in utility computation
|
||||
# (once annealing is finished)
|
||||
self.energy_cost = float(energy_cost)
|
||||
assert self.energy_cost >= 0
|
||||
|
||||
# Which method to use for calculating the progress of energy annealing
|
||||
# If method = 'decay': #completed episodes
|
||||
# If method = 'auto' : #timesteps where avg. agent reward > 0
|
||||
self.energy_warmup_method = energy_warmup_method.lower()
|
||||
assert self.energy_warmup_method in ["decay", "auto"]
|
||||
# Decay constant for annealing to full energy cost
|
||||
# (if energy_warmup_constant == 0, there is no annealing)
|
||||
self.energy_warmup_constant = float(energy_warmup_constant)
|
||||
assert self.energy_warmup_constant >= 0
|
||||
self._auto_warmup_integrator = 0
|
||||
|
||||
# Which social welfare function to use
|
||||
self.planner_reward_type = str(planner_reward_type).lower()
|
||||
|
||||
# How much to weight equality if using SWF=eq*prod:
|
||||
# 0 -> SWF=eq * prod
|
||||
# 1 -> SWF=prod
|
||||
self.mixing_weight_gini_vs_coin = float(mixing_weight_gini_vs_coin)
|
||||
assert 0 <= self.mixing_weight_gini_vs_coin <= 1.0
|
||||
|
||||
# Use this to calculate marginal changes and deliver that as reward
|
||||
self.init_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
|
||||
self.prev_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
|
||||
self.curr_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
|
||||
|
||||
"""
|
||||
Fixed Four Skill and Loc
|
||||
------------------------
|
||||
"""
|
||||
self.agent_starting_pos = {agent.idx: [] for agent in self.world.agents}
|
||||
|
||||
|
||||
|
||||
self.last_log_loged={}
|
||||
|
||||
|
||||
@property
|
||||
def energy_weight(self):
|
||||
"""
|
||||
Energy annealing progress. Multiply with self.energy_cost to get the
|
||||
effective energy coefficient.
|
||||
"""
|
||||
if self.energy_warmup_constant <= 0.0:
|
||||
return 1.0
|
||||
|
||||
if self.energy_warmup_method == "decay":
|
||||
return float(1.0 - np.exp(-self._completions / self.energy_warmup_constant))
|
||||
|
||||
if self.energy_warmup_method == "auto":
|
||||
return float(
|
||||
1.0
|
||||
- np.exp(-self._auto_warmup_integrator / self.energy_warmup_constant)
|
||||
)
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
def is_bad_action(self,agent):
|
||||
bad=agent.bad_action
|
||||
agent.bad_action=False
|
||||
return bad
|
||||
def get_current_optimization_metrics(self):
|
||||
"""
|
||||
Compute optimization metrics based on the current state. Used to compute reward.
|
||||
|
||||
Returns:
|
||||
curr_optimization_metric (dict): A dictionary of {agent.idx: metric}
|
||||
with an entry for each agent (including the planner) in the env.
|
||||
"""
|
||||
curr_optimization_metric = {}
|
||||
# (for agents)
|
||||
for agent in self.world.agents:
|
||||
|
||||
rew= rewards.isoelastic_coin_minus_labor(
|
||||
coin_endowment=agent.total_endowment("Coin"),
|
||||
total_labor=agent.state["endogenous"]["Labor"],
|
||||
isoelastic_eta=self.isoelastic_eta,
|
||||
labor_coefficient=self.energy_weight * self.energy_cost,
|
||||
)
|
||||
|
||||
|
||||
|
||||
#rew-=agent.state["endogenous"]["noops"]
|
||||
curr_optimization_metric[agent.idx] = rew
|
||||
# (for the planner)
|
||||
if self.planner_reward_type == "coin_eq_times_productivity":
|
||||
curr_optimization_metric[
|
||||
self.world.planner.idx
|
||||
] = rewards.coin_eq_times_productivity(
|
||||
coin_endowments=np.array(
|
||||
[agent.total_endowment("Coin") for agent in self.world.agents]
|
||||
),
|
||||
equality_weight=1 - self.mixing_weight_gini_vs_coin,
|
||||
)
|
||||
elif self.planner_reward_type == "inv_income_weighted_coin_endowments":
|
||||
curr_optimization_metric[
|
||||
self.world.planner.idx
|
||||
] = rewards.inv_income_weighted_coin_endowments(
|
||||
coin_endowments=np.array(
|
||||
[agent.total_endowment("Coin") for agent in self.world.agents]
|
||||
)
|
||||
)
|
||||
elif self.planner_reward_type == "inv_income_weighted_utility":
|
||||
curr_optimization_metric[
|
||||
self.world.planner.idx
|
||||
] = rewards.inv_income_weighted_utility(
|
||||
coin_endowments=np.array(
|
||||
[agent.total_endowment("Coin") for agent in self.world.agents]
|
||||
),
|
||||
utilities=np.array(
|
||||
[curr_optimization_metric[agent.idx] for agent in self.world.agents]
|
||||
),
|
||||
)
|
||||
else:
|
||||
print("No valid planner reward selected!")
|
||||
raise NotImplementedError
|
||||
return curr_optimization_metric
|
||||
|
||||
# The following methods must be implemented for each scenario
|
||||
# -----------------------------------------------------------
|
||||
|
||||
def reset_starting_layout(self):
|
||||
"""
|
||||
Part 1/2 of scenario reset. This method handles resetting the state of the
|
||||
environment managed by the scenario (i.e. resource & landmark layout).
|
||||
|
||||
Here, reset to the layout in the fixed layout file
|
||||
"""
|
||||
self.world.maps.clear()
|
||||
|
||||
resources = ["Wood", "Stone"]
|
||||
|
||||
for resource in resources:
|
||||
self.world.maps.set_point_add(resource,0,0,1)
|
||||
|
||||
def reset_agent_states(self):
|
||||
"""
|
||||
Part 2/2 of scenario reset. This method handles resetting the state of the
|
||||
agents themselves (i.e. inventory, locations, etc.).
|
||||
|
||||
Here, empty inventories and place mobile agents in random, accessible
|
||||
locations to start. Note: If using fixed_four_skill_and_loc, the starting
|
||||
locations will be overridden in self.additional_reset_steps.
|
||||
"""
|
||||
self.world.clear_agent_locs()
|
||||
for agent in self.world.agents:
|
||||
if not agent.is_setup():
|
||||
|
||||
agent.state["inventory"] = {k: 0 for k in agent.inventory.keys()}
|
||||
agent.state["escrow"] = {k: 0 for k in agent.inventory.keys()}
|
||||
agent.state["endogenous"] = {k: 0 for k in agent.endogenous.keys()}
|
||||
# Add starting coin
|
||||
agent.state["inventory"]["Coin"] = float(self.starting_agent_coin)
|
||||
agent.bad_action=False
|
||||
|
||||
self.world.planner.state["inventory"] = {
|
||||
k: 0 for k in self.world.planner.inventory.keys()
|
||||
}
|
||||
self.world.planner.state["escrow"] = {
|
||||
k: 0 for k in self.world.planner.escrow.keys()
|
||||
}
|
||||
|
||||
|
||||
def scenario_step(self):
|
||||
"""
|
||||
Update the state of the world according to whatever rules this scenario
|
||||
implements.
|
||||
|
||||
This gets called in the 'step' method (of base_env) after going through each
|
||||
component step and before generating observations, rewards, etc.
|
||||
|
||||
In this class of scenarios, the scenario step handles stochastic resource
|
||||
regeneration.
|
||||
"""
|
||||
|
||||
resources = ["Wood", "Stone"]
|
||||
|
||||
for resource in resources:
|
||||
self.world.maps.set_point_add(resource,0,0,20)
|
||||
|
||||
|
||||
def generate_observations(self):
|
||||
"""
|
||||
Generate observations associated with this scenario.
|
||||
|
||||
A scenario does not need to produce observations and can provide observations
|
||||
for only some agent types; however, for a given agent type, it should either
|
||||
always or never yield an observation. If it does yield an observation,
|
||||
that observation should always have the same structure/sizes!
|
||||
|
||||
Returns:
|
||||
obs (dict): A dictionary of {agent.idx: agent_obs_dict}. In words,
|
||||
return a dictionary with an entry for each agent (which can including
|
||||
the planner) for which this scenario provides an observation. For each
|
||||
entry, the key specifies the index of the agent and the value contains
|
||||
its associated observation dictionary.
|
||||
|
||||
Here, non-planner agents receive spatial observations (depending on the env
|
||||
config) as well as the contents of their inventory and endogenous quantities.
|
||||
The planner also receives spatial observations (again, depending on the env
|
||||
config) as well as the inventory of each of the mobile agents.
|
||||
"""
|
||||
obs = {}
|
||||
|
||||
|
||||
|
||||
agent_invs = {
|
||||
str(agent.idx): {
|
||||
"inventory-" + k: v * self.inv_scale for k, v in agent.inventory.items()
|
||||
}
|
||||
for agent in self.world.agents
|
||||
}
|
||||
|
||||
obs[self.world.planner.idx] = {
|
||||
"inventory-" + k: v * self.inv_scale
|
||||
for k, v in self.world.planner.inventory.items()
|
||||
}
|
||||
|
||||
|
||||
for agent in self.world.agents:
|
||||
sidx = str(agent.idx)
|
||||
obs[sidx]=agent_invs[sidx]
|
||||
|
||||
|
||||
|
||||
|
||||
return obs
|
||||
|
||||
def compute_reward(self):
|
||||
"""
|
||||
Apply the reward function(s) associated with this scenario to get the rewards
|
||||
from this step.
|
||||
|
||||
Returns:
|
||||
rew (dict): A dictionary of {agent.idx: agent_obs_dict}. In words,
|
||||
return a dictionary with an entry for each agent in the environment
|
||||
(including the planner). For each entry, the key specifies the index of
|
||||
the agent and the value contains the scalar reward earned this timestep.
|
||||
|
||||
Rewards are computed as the marginal utility (agents) or marginal social
|
||||
welfare (planner) experienced on this timestep. Ignoring discounting,
|
||||
this means that agents' (planner's) objective is to maximize the utility
|
||||
(social welfare) associated with the terminal state of the episode.
|
||||
"""
|
||||
|
||||
# "curr_optimization_metric" hasn't been updated yet, so it gives us the
|
||||
# utility from the last step.
|
||||
utility_at_end_of_last_time_step = deepcopy(self.curr_optimization_metric)
|
||||
|
||||
# compute current objectives and store the values
|
||||
self.curr_optimization_metric = self.get_current_optimization_metrics()
|
||||
|
||||
# reward = curr - prev objectives
|
||||
rew={}
|
||||
for k, v in self.curr_optimization_metric.items():
|
||||
rew[k] = float(v - utility_at_end_of_last_time_step[k])
|
||||
if k!="p":
|
||||
if self.is_bad_action(self.world.agents[k]):
|
||||
rew[k]-=1
|
||||
|
||||
# store the previous objective values
|
||||
self.prev_optimization_metric.update(utility_at_end_of_last_time_step)
|
||||
|
||||
# Automatic Energy Cost Annealing
|
||||
# -------------------------------
|
||||
avg_agent_rew = np.mean([rew[a.idx] for a in self.world.agents])
|
||||
# Count the number of timesteps where the avg agent reward was > 0
|
||||
if avg_agent_rew > 0:
|
||||
self._auto_warmup_integrator += 1
|
||||
|
||||
return rew
|
||||
|
||||
# Optional methods for customization
|
||||
# ----------------------------------
|
||||
|
||||
def additional_reset_steps(self):
|
||||
"""
|
||||
Extra scenario-specific steps that should be performed at the end of the reset
|
||||
cycle.
|
||||
|
||||
For each reset cycle...
|
||||
First, reset_starting_layout() and reset_agent_states() will be called.
|
||||
|
||||
Second, <component>.reset() will be called for each registered component.
|
||||
|
||||
Lastly, this method will be called to allow for any final customization of
|
||||
the reset cycle.
|
||||
|
||||
For this scenario, this method resets optimization metric trackers. If using
|
||||
fixed_four_skill_and_loc, this is where each agent gets assigned to one of
|
||||
the four fixed skill/loc combinations. The agent-->skill/loc assignment is
|
||||
permuted so that all four skill/loc combinations are used.
|
||||
"""
|
||||
|
||||
|
||||
# compute current objectives
|
||||
curr_optimization_metric = self.get_current_optimization_metrics()
|
||||
|
||||
self.curr_optimization_metric = deepcopy(curr_optimization_metric)
|
||||
self.init_optimization_metric = deepcopy(curr_optimization_metric)
|
||||
self.prev_optimization_metric = deepcopy(curr_optimization_metric)
|
||||
|
||||
|
||||
|
||||
def scenario_metrics(self):
|
||||
"""
|
||||
Allows the scenario to generate metrics (collected along with component metrics
|
||||
in the 'metrics' property).
|
||||
|
||||
To have the scenario add metrics, this function needs to return a dictionary of
|
||||
{metric_key: value} where 'value' is a scalar (no nesting or lists!)
|
||||
|
||||
Here, summarize social metrics, endowments, utilities, and labor cost annealing.
|
||||
"""
|
||||
metrics = dict()
|
||||
|
||||
coin_endowments = np.array(
|
||||
[agent.total_endowment("Coin") for agent in self.world.agents]
|
||||
)
|
||||
metrics["social/productivity"] = social_metrics.get_productivity(
|
||||
coin_endowments
|
||||
)
|
||||
metrics["social/equality"] = social_metrics.get_equality(coin_endowments)
|
||||
|
||||
utilities = np.array(
|
||||
[self.curr_optimization_metric[agent.idx] for agent in self.world.agents]
|
||||
)
|
||||
metrics[
|
||||
"social_welfare/coin_eq_times_productivity"
|
||||
] = rewards.coin_eq_times_productivity(
|
||||
coin_endowments=coin_endowments, equality_weight=1.0
|
||||
)
|
||||
metrics[
|
||||
"social_welfare/inv_income_weighted_coin_endow"
|
||||
] = rewards.inv_income_weighted_coin_endowments(coin_endowments=coin_endowments)
|
||||
metrics[
|
||||
"social_welfare/inv_income_weighted_utility"
|
||||
] = rewards.inv_income_weighted_utility(
|
||||
coin_endowments=coin_endowments, utilities=utilities
|
||||
)
|
||||
|
||||
for agent in self.all_agents:
|
||||
for resource, quantity in agent.inventory.items():
|
||||
metrics[
|
||||
"endow/{}/{}".format(agent.idx, resource)
|
||||
] = agent.total_endowment(resource)
|
||||
|
||||
if agent.endogenous is not None:
|
||||
for resource, quantity in agent.endogenous.items():
|
||||
metrics["endogenous/{}/{}".format(agent.idx, resource)] = quantity
|
||||
|
||||
metrics["util/{}".format(agent.idx)] = self.curr_optimization_metric[
|
||||
agent.idx
|
||||
]
|
||||
|
||||
# Labor weight
|
||||
metrics["labor/weighted_cost"] = self.energy_cost * self.energy_weight
|
||||
metrics["labor/warmup_integrator"] = int(self._auto_warmup_integrator)
|
||||
|
||||
return metrics
|
||||
|
||||
4
main.py
4
main.py
@@ -1,5 +1,7 @@
|
||||
from ai_economist import foundation
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ai_economist import foundation
|
||||
from stable_baselines3.common.vec_env import vec_frame_stack
|
||||
from stable_baselines3.common.evaluation import evaluate_policy
|
||||
import envs
|
||||
|
||||
@@ -23,7 +23,7 @@ class RecieverEconWrapper(gym.Env):
|
||||
self.idx_to_index={}
|
||||
#create idx to index map
|
||||
for i in range(len(self.agnet_idx)):
|
||||
self.idx_to_index[self.agnet_idx[i]]=i
|
||||
self.idx_to_index[str(self.agnet_idx[i])]=i
|
||||
first_idx=self.agnet_idx[0]
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ class RecieverEconWrapper(gym.Env):
|
||||
def _dict_idx_to_index(self, data):
|
||||
data_out={}
|
||||
for k,v in data.items():
|
||||
|
||||
if k in self.idx_to_index:
|
||||
index=self.idx_to_index[k]
|
||||
data_out[index]=v
|
||||
|
||||
Reference in New Issue
Block a user