crafting done ?

This commit is contained in:
2023-01-13 20:07:21 +01:00
parent 7539863ace
commit 4f1044b87e
7 changed files with 554 additions and 52 deletions

View File

@@ -38,7 +38,7 @@ class BaseAgent:
if idx is None:
idx = 0
if multi_action_mode is None:
multi_action_mode = False
@@ -64,6 +64,7 @@ class BaseAgent:
self._registered_inventory = False
self._registered_endogenous = False
self._registered_components = False
self._setup = False # agent setup not completed
self._noop_action_dict = dict()
# Special flag to allow logic for multi-action-mode agents
@@ -78,7 +79,13 @@ class BaseAgent:
def idx(self):
"""Index used to identify this agent. Must be unique within the environment."""
return self._idx
@property
def is_setup(self):
return self._setup
def set_setup(self, set):
self._setup=set
def register_inventory(self, resources):
"""Used during environment construction to populate inventory/escrow fields."""
if self._registered_inventory:

View File

@@ -342,16 +342,11 @@ class BaseEnvironment(ABC):
self._components_dict[component_object.name] = component_object
self._shorthand_lookup[component_object.shorthand] = component_object
# Register the components with the agents
# to finish setting up their state/action spaces.
for agent in self.world.agents:
agent.register_inventory(self.resources)
agent.register_endogenous(self.endogenous)
agent.register_components(self._components)
self.world.planner.register_inventory(self.resources)
self.world.planner.register_components(self._components)
self._agent_lookup = {str(agent.idx): agent for agent in self.all_agents}
self.apply_scenario_config_to_agents()
self._completions = 0
@@ -370,6 +365,16 @@ class BaseEnvironment(ABC):
# To collate all the agents ('0', '1', ...) data during reset and step
# into a single agent with index 'a'
self.collate_agent_step_and_reset_data = collate_agent_step_and_reset_data
def apply_scenario_config_to_agents(self):
# Register the components with the agents
# to finish setting up their state/action spaces.
for agent in self.world.agents:
agent.register_inventory(self.resources)
agent.register_endogenous(self.endogenous)
agent.register_components(self._components)
self._agent_lookup = {str(agent.idx): agent for agent in self.all_agents}
self.world.apply_agent_db_to_world()
def _register_entities(self, entities):
for entity in entities:
@@ -920,6 +925,7 @@ class BaseEnvironment(ABC):
# Reset actions to that default.
for agent in self.all_agents:
agent.reset_actions()
agent.set_setup(True)
# Produce observations
obs = self._generate_observations(

View File

@@ -382,7 +382,7 @@ class World:
self._agent_class_idx_map={}
#create agents
self.create_agents(agent_composition)
self.maps = Maps(world_size, self.n_agents, world_resources, world_landmarks)
planner_class = agent_registry.get("BasicPlanner")
@@ -410,7 +410,7 @@ class World:
self._agent_class_idx_map[k].append(str(self.n_agents))
self.n_agents+=1
def apply_agent_db(self):
def apply_agent_db_to_world(self):
"""Applys current agent db into lookup maps inside world and map itself. Enables insertion of new agents into existing env."""
self.n_agents=len(self._agents)
self._agent_class_idx_map={}

View File

@@ -10,7 +10,7 @@ from ai_economist.foundation.base.base_component import (
BaseComponent,
component_registry,
)
from ai_economist.foundation.entities.resources import resource_registry
from ai_economist.foundation.entities.resources import Resource, resource_registry
@component_registry.add
@@ -47,7 +47,7 @@ class Craft(BaseComponent):
skill_dist="none",
**base_component_kwargs
):
#append commodities
#setup commodities
for v in commodities:
res_class=resource_registry.get(v)
res=res_class()
@@ -74,10 +74,10 @@ class Craft(BaseComponent):
self.builds = []
super().__init__(*base_component_args, **base_component_kwargs)
def agent_can_build(self, agent):
def agent_can_build(self, agent, recipe):
"""Return True if agent can actually build in its current location."""
# See if the agent has the resources necessary to complete the action
for resource, cost in self.resource_cost.items():
for resource, cost in recipe.items():
if agent.state["inventory"][resource] < cost:
return False
return True
@@ -93,7 +93,7 @@ class Craft(BaseComponent):
"""
# This component adds 1 action that mobile agents can take: build a house
if agent_cls_name in self.agent_subclasses:
return 1
return len(self.commodities)
return None
@@ -106,7 +106,7 @@ class Craft(BaseComponent):
if agent_cls_name not in self.agent_subclasses:
return {}
if agent_cls_name == "BasicMobileAgent":
return {"build_payment": float(self.payment), "build_skill": 1}
return {}
raise NotImplementedError
def component_step(self):
@@ -131,29 +131,32 @@ class Craft(BaseComponent):
pass
# Build! (If you can.)
elif action == 1:
if self.agent_can_build(agent):
else:
comm=self.commodities[action]
if self.agent_can_build(agent,comm.craft_recp):
# Remove the resources
for resource, cost in self.resource_cost.items():
for resource, cost in comm.craft_recp.items():
agent.state["inventory"][resource] -= cost
# Receive payment for the house
agent.state["inventory"]["Coin"] += agent.state["build_payment"]
# Receive crafted commodity
agent.state["inventory"][comm.name] += agent.state["craft_amount"][comm.name]
# Incur the labor cost for building
agent.state["endogenous"]["Labor"] += self.build_labor
agent.state["endogenous"]["Labor"] += agent.state["craft_labour"][comm.name]
build.append(
{
"builder": agent.idx,
"build_skill": self.sampled_skills[agent.idx],
"income": float(agent.state["build_payment"]),
"crafter": agent.idx,
"craft_commodity": comm.name,
"craft_skill": agent.state["craft_skill"][comm.name],
"craft_amount": agent.state["craft_amount"][comm.name],
"craft_labour": agent.state["craft_labour"][comm.name]
}
)
else:
agent.bad_action=True
else:
raise ValueError
self.builds.append(build)
@@ -168,10 +171,10 @@ class Craft(BaseComponent):
obs_dict = dict()
for agent in self.world.agents:
if agent.name in self.agent_subclasses:
obs_dict[agent.idx] = {
"build_payment": agent.state["build_payment"] / self.payment,
"build_skill": self.sampled_skills[agent.idx],
}
obs_dict[agent.idx]["craft_skill"]={}
for k in self.commodities:
obs_dict[agent.idx]["craft_skill"][k.name] = agent.state["craft_skill"][k.name]
return obs_dict
@@ -186,7 +189,8 @@ class Craft(BaseComponent):
# Mobile agents' build action is masked if they cannot build with their
# current location and/or endowment
for agent in self.world.agents:
masks[agent.idx] = np.array([self.agent_can_build(agent)])
if agent.name in self.agent_subclasses:
masks[agent.idx] = np.array([self.agent_can_build(agent,k.name) for k in self.commodities])
return masks
@@ -227,27 +231,35 @@ class Craft(BaseComponent):
"""
world = self.world
self.sampled_skills = {agent.idx: 1 for agent in world.agents}
PMSM = self.payment_max_skill_multiplier
MSAB= self.max_skill_amount_benefit
MSLB= self.max_skill_labour_benefit
for agent in world.agents:
if self.skill_dist == "none":
sampled_skill = 1
pay_rate = 1
elif self.skill_dist == "pareto":
sampled_skill = np.random.pareto(4)
pay_rate = np.minimum(PMSM, (PMSM - 1) * sampled_skill + 1)
elif self.skill_dist == "lognormal":
sampled_skill = np.random.lognormal(-1, 0.5)
pay_rate = np.minimum(PMSM, (PMSM - 1) * sampled_skill + 1)
else:
raise NotImplementedError
if agent.name not in self.agent_subclasses | agent.is_setup():
continue
agent.state["craft_skill"]={}
agent.state["craft_labour"]={}
agent.state["craft_amount"]={}
agent.state["build_payment"] = float(pay_rate * self.payment)
agent.state["build_skill"] = float(sampled_skill)
for comm in self.commodities:
if self.skill_dist == "none":
sampled_skill = 1
amount= 1
labour = 1
elif self.skill_dist == "pareto":
labour = 1
sampled_skill = np.random.pareto(2)
amount = np.minimum(MSAB, MSAB * sampled_skill)
labour_modifier = 1 - np.minimum(1 - MSLB, (1 - MSLB) * sampled_skill)
else:
raise NotImplementedError
agent.state["craft_skill"][comm.name]=sampled_skill
agent.state["craft_labour"][comm.name]=comm.craft_labour_base*labour_modifier
agent.state["craft_amount"][comm.name]=amount
self.sampled_skills[agent.idx] = sampled_skill
self.builds = []

474
envs/econ.py Normal file
View File

@@ -0,0 +1,474 @@
# Copyright (c) 2020, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root
# or https://opensource.org/licenses/BSD-3-Clause
from copy import deepcopy
from pathlib import Path
import numpy as np
from scipy import signal
from ai_economist.foundation.base.base_env import BaseEnvironment, scenario_registry
from ai_economist.foundation.scenarios.utils import rewards, social_metrics
import yaml
@scenario_registry.add
class Econ(BaseEnvironment):
"""
World containing stone and wood with stochastic regeneration. Refers to a fixed
layout file (see ./map_txt/ for examples) to determine the spatial arrangement of
stone, wood, and water tiles.
Args:
planner_gets_spatial_obs (bool): Whether the planner agent receives spatial
observations from the world.
full_observability (bool): Whether the mobile agents' spatial observation
includes the full world view or is instead an egocentric view.
mobile_agent_observation_range (int): If not using full_observability,
the spatial range (on each side of the agent) that is visible in the
spatial observations.
env_layout_file (str): Name of the layout file in ./map_txt/ to use.
Note: The world dimensions of that layout must match the world dimensions
argument used to construct the environment.
resource_regen_prob (float): Probability that an empty source tile will
regenerate a new resource unit.
fixed_four_skill_and_loc (bool): Whether to use a fixed set of build skills and
starting locations, with agents grouped into starting locations based on
which skill quartile they are in. False, by default.
True, for experiments in https://arxiv.org/abs/2004.13332.
Note: Requires that the environment uses the "Build" component with
skill_dist="pareto".
starting_agent_coin (int, float): Amount of coin agents have at t=0. Defaults
to zero coin.
isoelastic_eta (float): Parameter controlling the shape of agent utility
wrt coin endowment.
energy_cost (float): Coefficient for converting labor to negative utility.
energy_warmup_constant (float): Decay constant that controls the rate at which
the effective energy cost is annealed from 0 to energy_cost. Set to 0
(default) to disable annealing, meaning that the effective energy cost is
always energy_cost. The units of the decay constant depend on the choice of
energy_warmup_method.
energy_warmup_method (str): How to schedule energy annealing (warmup). If
"decay" (default), use the number of completed episodes. If "auto",
use the number of timesteps where the average agent reward was positive.
planner_reward_type (str): The type of reward used for the planner. Options
are "coin_eq_times_productivity" (default),
"inv_income_weighted_coin_endowment", and "inv_income_weighted_utility".
mixing_weight_gini_vs_coin (float): Degree to which equality is ignored w/
"coin_eq_times_productivity". Default is 0, which weights equality and
productivity equally. If set to 1, only productivity is rewarded.
"""
name = "econ"
agent_subclasses = ["BasicMobileAgent"]
required_entities = ["Wood", "Stone", "Water"]
def __init__(
self,
*base_env_args,
resource_regen_prob=0.01,
fixed_four_skill_and_loc=False,
starting_agent_coin=0,
isoelastic_eta=0.23,
energy_cost=0.21,
energy_warmup_constant=0,
energy_warmup_method="decay",
planner_reward_type="coin_eq_times_productivity",
mixing_weight_gini_vs_coin=0.0,
**base_env_kwargs,
):
super().__init__(*base_env_args, **base_env_kwargs)
self.layout_specs = dict(
Wood={
"regen_weight": float(resource_regen_prob),
"regen_halfwidth": 0,
"max_health": 1,
},
Stone={
"regen_weight": float(resource_regen_prob),
"regen_halfwidth": 0,
"max_health": 1,
},
)
assert 0 <= self.layout_specs["Wood"]["regen_weight"] <= 1
assert 0 <= self.layout_specs["Stone"]["regen_weight"] <= 1
# How much coin do agents begin with at upon reset
self.starting_agent_coin = float(starting_agent_coin)
assert self.starting_agent_coin >= 0.0
# Controls the diminishing marginal utility of coin.
# isoelastic_eta=0 means no diminishing utility.
self.isoelastic_eta = float(isoelastic_eta)
assert 0.0 <= self.isoelastic_eta <= 1.0
# The amount that labor is weighted in utility computation
# (once annealing is finished)
self.energy_cost = float(energy_cost)
assert self.energy_cost >= 0
# Which method to use for calculating the progress of energy annealing
# If method = 'decay': #completed episodes
# If method = 'auto' : #timesteps where avg. agent reward > 0
self.energy_warmup_method = energy_warmup_method.lower()
assert self.energy_warmup_method in ["decay", "auto"]
# Decay constant for annealing to full energy cost
# (if energy_warmup_constant == 0, there is no annealing)
self.energy_warmup_constant = float(energy_warmup_constant)
assert self.energy_warmup_constant >= 0
self._auto_warmup_integrator = 0
# Which social welfare function to use
self.planner_reward_type = str(planner_reward_type).lower()
# How much to weight equality if using SWF=eq*prod:
# 0 -> SWF=eq * prod
# 1 -> SWF=prod
self.mixing_weight_gini_vs_coin = float(mixing_weight_gini_vs_coin)
assert 0 <= self.mixing_weight_gini_vs_coin <= 1.0
# Use this to calculate marginal changes and deliver that as reward
self.init_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
self.prev_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
self.curr_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
"""
Fixed Four Skill and Loc
------------------------
"""
self.agent_starting_pos = {agent.idx: [] for agent in self.world.agents}
self.last_log_loged={}
@property
def energy_weight(self):
"""
Energy annealing progress. Multiply with self.energy_cost to get the
effective energy coefficient.
"""
if self.energy_warmup_constant <= 0.0:
return 1.0
if self.energy_warmup_method == "decay":
return float(1.0 - np.exp(-self._completions / self.energy_warmup_constant))
if self.energy_warmup_method == "auto":
return float(
1.0
- np.exp(-self._auto_warmup_integrator / self.energy_warmup_constant)
)
raise NotImplementedError
def is_bad_action(self,agent):
bad=agent.bad_action
agent.bad_action=False
return bad
def get_current_optimization_metrics(self):
"""
Compute optimization metrics based on the current state. Used to compute reward.
Returns:
curr_optimization_metric (dict): A dictionary of {agent.idx: metric}
with an entry for each agent (including the planner) in the env.
"""
curr_optimization_metric = {}
# (for agents)
for agent in self.world.agents:
rew= rewards.isoelastic_coin_minus_labor(
coin_endowment=agent.total_endowment("Coin"),
total_labor=agent.state["endogenous"]["Labor"],
isoelastic_eta=self.isoelastic_eta,
labor_coefficient=self.energy_weight * self.energy_cost,
)
#rew-=agent.state["endogenous"]["noops"]
curr_optimization_metric[agent.idx] = rew
# (for the planner)
if self.planner_reward_type == "coin_eq_times_productivity":
curr_optimization_metric[
self.world.planner.idx
] = rewards.coin_eq_times_productivity(
coin_endowments=np.array(
[agent.total_endowment("Coin") for agent in self.world.agents]
),
equality_weight=1 - self.mixing_weight_gini_vs_coin,
)
elif self.planner_reward_type == "inv_income_weighted_coin_endowments":
curr_optimization_metric[
self.world.planner.idx
] = rewards.inv_income_weighted_coin_endowments(
coin_endowments=np.array(
[agent.total_endowment("Coin") for agent in self.world.agents]
)
)
elif self.planner_reward_type == "inv_income_weighted_utility":
curr_optimization_metric[
self.world.planner.idx
] = rewards.inv_income_weighted_utility(
coin_endowments=np.array(
[agent.total_endowment("Coin") for agent in self.world.agents]
),
utilities=np.array(
[curr_optimization_metric[agent.idx] for agent in self.world.agents]
),
)
else:
print("No valid planner reward selected!")
raise NotImplementedError
return curr_optimization_metric
# The following methods must be implemented for each scenario
# -----------------------------------------------------------
def reset_starting_layout(self):
"""
Part 1/2 of scenario reset. This method handles resetting the state of the
environment managed by the scenario (i.e. resource & landmark layout).
Here, reset to the layout in the fixed layout file
"""
self.world.maps.clear()
resources = ["Wood", "Stone"]
for resource in resources:
self.world.maps.set_point_add(resource,0,0,1)
def reset_agent_states(self):
"""
Part 2/2 of scenario reset. This method handles resetting the state of the
agents themselves (i.e. inventory, locations, etc.).
Here, empty inventories and place mobile agents in random, accessible
locations to start. Note: If using fixed_four_skill_and_loc, the starting
locations will be overridden in self.additional_reset_steps.
"""
self.world.clear_agent_locs()
for agent in self.world.agents:
if not agent.is_setup():
agent.state["inventory"] = {k: 0 for k in agent.inventory.keys()}
agent.state["escrow"] = {k: 0 for k in agent.inventory.keys()}
agent.state["endogenous"] = {k: 0 for k in agent.endogenous.keys()}
# Add starting coin
agent.state["inventory"]["Coin"] = float(self.starting_agent_coin)
agent.bad_action=False
self.world.planner.state["inventory"] = {
k: 0 for k in self.world.planner.inventory.keys()
}
self.world.planner.state["escrow"] = {
k: 0 for k in self.world.planner.escrow.keys()
}
def scenario_step(self):
"""
Update the state of the world according to whatever rules this scenario
implements.
This gets called in the 'step' method (of base_env) after going through each
component step and before generating observations, rewards, etc.
In this class of scenarios, the scenario step handles stochastic resource
regeneration.
"""
resources = ["Wood", "Stone"]
for resource in resources:
self.world.maps.set_point_add(resource,0,0,20)
def generate_observations(self):
"""
Generate observations associated with this scenario.
A scenario does not need to produce observations and can provide observations
for only some agent types; however, for a given agent type, it should either
always or never yield an observation. If it does yield an observation,
that observation should always have the same structure/sizes!
Returns:
obs (dict): A dictionary of {agent.idx: agent_obs_dict}. In words,
return a dictionary with an entry for each agent (which can including
the planner) for which this scenario provides an observation. For each
entry, the key specifies the index of the agent and the value contains
its associated observation dictionary.
Here, non-planner agents receive spatial observations (depending on the env
config) as well as the contents of their inventory and endogenous quantities.
The planner also receives spatial observations (again, depending on the env
config) as well as the inventory of each of the mobile agents.
"""
obs = {}
agent_invs = {
str(agent.idx): {
"inventory-" + k: v * self.inv_scale for k, v in agent.inventory.items()
}
for agent in self.world.agents
}
obs[self.world.planner.idx] = {
"inventory-" + k: v * self.inv_scale
for k, v in self.world.planner.inventory.items()
}
for agent in self.world.agents:
sidx = str(agent.idx)
obs[sidx]=agent_invs[sidx]
return obs
def compute_reward(self):
"""
Apply the reward function(s) associated with this scenario to get the rewards
from this step.
Returns:
rew (dict): A dictionary of {agent.idx: agent_obs_dict}. In words,
return a dictionary with an entry for each agent in the environment
(including the planner). For each entry, the key specifies the index of
the agent and the value contains the scalar reward earned this timestep.
Rewards are computed as the marginal utility (agents) or marginal social
welfare (planner) experienced on this timestep. Ignoring discounting,
this means that agents' (planner's) objective is to maximize the utility
(social welfare) associated with the terminal state of the episode.
"""
# "curr_optimization_metric" hasn't been updated yet, so it gives us the
# utility from the last step.
utility_at_end_of_last_time_step = deepcopy(self.curr_optimization_metric)
# compute current objectives and store the values
self.curr_optimization_metric = self.get_current_optimization_metrics()
# reward = curr - prev objectives
rew={}
for k, v in self.curr_optimization_metric.items():
rew[k] = float(v - utility_at_end_of_last_time_step[k])
if k!="p":
if self.is_bad_action(self.world.agents[k]):
rew[k]-=1
# store the previous objective values
self.prev_optimization_metric.update(utility_at_end_of_last_time_step)
# Automatic Energy Cost Annealing
# -------------------------------
avg_agent_rew = np.mean([rew[a.idx] for a in self.world.agents])
# Count the number of timesteps where the avg agent reward was > 0
if avg_agent_rew > 0:
self._auto_warmup_integrator += 1
return rew
# Optional methods for customization
# ----------------------------------
def additional_reset_steps(self):
"""
Extra scenario-specific steps that should be performed at the end of the reset
cycle.
For each reset cycle...
First, reset_starting_layout() and reset_agent_states() will be called.
Second, <component>.reset() will be called for each registered component.
Lastly, this method will be called to allow for any final customization of
the reset cycle.
For this scenario, this method resets optimization metric trackers. If using
fixed_four_skill_and_loc, this is where each agent gets assigned to one of
the four fixed skill/loc combinations. The agent-->skill/loc assignment is
permuted so that all four skill/loc combinations are used.
"""
# compute current objectives
curr_optimization_metric = self.get_current_optimization_metrics()
self.curr_optimization_metric = deepcopy(curr_optimization_metric)
self.init_optimization_metric = deepcopy(curr_optimization_metric)
self.prev_optimization_metric = deepcopy(curr_optimization_metric)
def scenario_metrics(self):
"""
Allows the scenario to generate metrics (collected along with component metrics
in the 'metrics' property).
To have the scenario add metrics, this function needs to return a dictionary of
{metric_key: value} where 'value' is a scalar (no nesting or lists!)
Here, summarize social metrics, endowments, utilities, and labor cost annealing.
"""
metrics = dict()
coin_endowments = np.array(
[agent.total_endowment("Coin") for agent in self.world.agents]
)
metrics["social/productivity"] = social_metrics.get_productivity(
coin_endowments
)
metrics["social/equality"] = social_metrics.get_equality(coin_endowments)
utilities = np.array(
[self.curr_optimization_metric[agent.idx] for agent in self.world.agents]
)
metrics[
"social_welfare/coin_eq_times_productivity"
] = rewards.coin_eq_times_productivity(
coin_endowments=coin_endowments, equality_weight=1.0
)
metrics[
"social_welfare/inv_income_weighted_coin_endow"
] = rewards.inv_income_weighted_coin_endowments(coin_endowments=coin_endowments)
metrics[
"social_welfare/inv_income_weighted_utility"
] = rewards.inv_income_weighted_utility(
coin_endowments=coin_endowments, utilities=utilities
)
for agent in self.all_agents:
for resource, quantity in agent.inventory.items():
metrics[
"endow/{}/{}".format(agent.idx, resource)
] = agent.total_endowment(resource)
if agent.endogenous is not None:
for resource, quantity in agent.endogenous.items():
metrics["endogenous/{}/{}".format(agent.idx, resource)] = quantity
metrics["util/{}".format(agent.idx)] = self.curr_optimization_metric[
agent.idx
]
# Labor weight
metrics["labor/weighted_cost"] = self.energy_cost * self.energy_weight
metrics["labor/warmup_integrator"] = int(self._auto_warmup_integrator)
return metrics

View File

@@ -1,5 +1,7 @@
from ai_economist import foundation
import numpy as np
from ai_economist import foundation
from stable_baselines3.common.vec_env import vec_frame_stack
from stable_baselines3.common.evaluation import evaluate_policy
import envs

View File

@@ -23,7 +23,7 @@ class RecieverEconWrapper(gym.Env):
self.idx_to_index={}
#create idx to index map
for i in range(len(self.agnet_idx)):
self.idx_to_index[self.agnet_idx[i]]=i
self.idx_to_index[str(self.agnet_idx[i])]=i
first_idx=self.agnet_idx[0]
@@ -35,6 +35,7 @@ class RecieverEconWrapper(gym.Env):
def _dict_idx_to_index(self, data):
data_out={}
for k,v in data.items():
if k in self.idx_to_index:
index=self.idx_to_index[k]
data_out[index]=v