# Copyright (c) 2020, salesforce.com, inc. # All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # For full license text, see the LICENSE file in the repo root # or https://opensource.org/licenses/BSD-3-Clause from copy import deepcopy from pathlib import Path import numpy as np from scipy import signal from ai_economist.foundation.base.base_env import BaseEnvironment, scenario_registry from ai_economist.foundation.scenarios.utils import rewards, social_metrics import yaml @scenario_registry.add class SimpleMarket(BaseEnvironment): """ World containing stone and wood with stochastic regeneration. Refers to a fixed layout file (see ./map_txt/ for examples) to determine the spatial arrangement of stone, wood, and water tiles. Args: planner_gets_spatial_obs (bool): Whether the planner agent receives spatial observations from the world. full_observability (bool): Whether the mobile agents' spatial observation includes the full world view or is instead an egocentric view. mobile_agent_observation_range (int): If not using full_observability, the spatial range (on each side of the agent) that is visible in the spatial observations. env_layout_file (str): Name of the layout file in ./map_txt/ to use. Note: The world dimensions of that layout must match the world dimensions argument used to construct the environment. resource_regen_prob (float): Probability that an empty source tile will regenerate a new resource unit. fixed_four_skill_and_loc (bool): Whether to use a fixed set of build skills and starting locations, with agents grouped into starting locations based on which skill quartile they are in. False, by default. True, for experiments in https://arxiv.org/abs/2004.13332. Note: Requires that the environment uses the "Build" component with skill_dist="pareto". starting_agent_coin (int, float): Amount of coin agents have at t=0. Defaults to zero coin. isoelastic_eta (float): Parameter controlling the shape of agent utility wrt coin endowment. energy_cost (float): Coefficient for converting labor to negative utility. energy_warmup_constant (float): Decay constant that controls the rate at which the effective energy cost is annealed from 0 to energy_cost. Set to 0 (default) to disable annealing, meaning that the effective energy cost is always energy_cost. The units of the decay constant depend on the choice of energy_warmup_method. energy_warmup_method (str): How to schedule energy annealing (warmup). If "decay" (default), use the number of completed episodes. If "auto", use the number of timesteps where the average agent reward was positive. planner_reward_type (str): The type of reward used for the planner. Options are "coin_eq_times_productivity" (default), "inv_income_weighted_coin_endowment", and "inv_income_weighted_utility". mixing_weight_gini_vs_coin (float): Degree to which equality is ignored w/ "coin_eq_times_productivity". Default is 0, which weights equality and productivity equally. If set to 1, only productivity is rewarded. """ name = "simple_market" agent_subclasses = ["BasicMobileAgent"] required_entities = ["Wood", "Stone", "Water"] def __init__( self, *base_env_args, resource_regen_prob=0.01, fixed_four_skill_and_loc=False, starting_agent_coin=0, isoelastic_eta=0.23, energy_cost=0.21, energy_warmup_constant=0, energy_warmup_method="decay", planner_reward_type="coin_eq_times_productivity", mixing_weight_gini_vs_coin=0.0, **base_env_kwargs, ): super().__init__(*base_env_args, **base_env_kwargs) self.layout_specs = dict( Wood={ "regen_weight": float(resource_regen_prob), "regen_halfwidth": 0, "max_health": 1, }, Stone={ "regen_weight": float(resource_regen_prob), "regen_halfwidth": 0, "max_health": 1, }, ) assert 0 <= self.layout_specs["Wood"]["regen_weight"] <= 1 assert 0 <= self.layout_specs["Stone"]["regen_weight"] <= 1 # How much coin do agents begin with at upon reset self.starting_agent_coin = float(starting_agent_coin) assert self.starting_agent_coin >= 0.0 # Controls the diminishing marginal utility of coin. # isoelastic_eta=0 means no diminishing utility. self.isoelastic_eta = float(isoelastic_eta) assert 0.0 <= self.isoelastic_eta <= 1.0 # The amount that labor is weighted in utility computation # (once annealing is finished) self.energy_cost = float(energy_cost) assert self.energy_cost >= 0 # Which method to use for calculating the progress of energy annealing # If method = 'decay': #completed episodes # If method = 'auto' : #timesteps where avg. agent reward > 0 self.energy_warmup_method = energy_warmup_method.lower() assert self.energy_warmup_method in ["decay", "auto"] # Decay constant for annealing to full energy cost # (if energy_warmup_constant == 0, there is no annealing) self.energy_warmup_constant = float(energy_warmup_constant) assert self.energy_warmup_constant >= 0 self._auto_warmup_integrator = 0 # Which social welfare function to use self.planner_reward_type = str(planner_reward_type).lower() # How much to weight equality if using SWF=eq*prod: # 0 -> SWF=eq * prod # 1 -> SWF=prod self.mixing_weight_gini_vs_coin = float(mixing_weight_gini_vs_coin) assert 0 <= self.mixing_weight_gini_vs_coin <= 1.0 # Use this to calculate marginal changes and deliver that as reward self.init_optimization_metric = {agent.idx: 0 for agent in self.all_agents} self.prev_optimization_metric = {agent.idx: 0 for agent in self.all_agents} self.curr_optimization_metric = {agent.idx: 0 for agent in self.all_agents} """ Fixed Four Skill and Loc ------------------------ """ self.agent_starting_pos = {agent.idx: [] for agent in self.world.agents} self.last_log_loged={} @property def energy_weight(self): """ Energy annealing progress. Multiply with self.energy_cost to get the effective energy coefficient. """ if self.energy_warmup_constant <= 0.0: return 1.0 if self.energy_warmup_method == "decay": return float(1.0 - np.exp(-self._completions / self.energy_warmup_constant)) if self.energy_warmup_method == "auto": return float( 1.0 - np.exp(-self._auto_warmup_integrator / self.energy_warmup_constant) ) raise NotImplementedError def is_bad_action(self,agent): bad=agent.bad_action agent.bad_action=False return bad def get_current_optimization_metrics(self): """ Compute optimization metrics based on the current state. Used to compute reward. Returns: curr_optimization_metric (dict): A dictionary of {agent.idx: metric} with an entry for each agent (including the planner) in the env. """ curr_optimization_metric = {} # (for agents) for agent in self.world.agents: rew= rewards.isoelastic_coin_minus_labor( coin_endowment=agent.total_endowment("Coin"), total_labor=agent.state["endogenous"]["Labor"], isoelastic_eta=self.isoelastic_eta, labor_coefficient=self.energy_weight * self.energy_cost, ) #rew-=agent.state["endogenous"]["noops"] curr_optimization_metric[agent.idx] = rew # (for the planner) if self.planner_reward_type == "coin_eq_times_productivity": curr_optimization_metric[ self.world.planner.idx ] = rewards.coin_eq_times_productivity( coin_endowments=np.array( [agent.total_endowment("Coin") for agent in self.world.agents] ), equality_weight=1 - self.mixing_weight_gini_vs_coin, ) elif self.planner_reward_type == "inv_income_weighted_coin_endowments": curr_optimization_metric[ self.world.planner.idx ] = rewards.inv_income_weighted_coin_endowments( coin_endowments=np.array( [agent.total_endowment("Coin") for agent in self.world.agents] ) ) elif self.planner_reward_type == "inv_income_weighted_utility": curr_optimization_metric[ self.world.planner.idx ] = rewards.inv_income_weighted_utility( coin_endowments=np.array( [agent.total_endowment("Coin") for agent in self.world.agents] ), utilities=np.array( [curr_optimization_metric[agent.idx] for agent in self.world.agents] ), ) else: print("No valid planner reward selected!") raise NotImplementedError return curr_optimization_metric # The following methods must be implemented for each scenario # ----------------------------------------------------------- def reset_starting_layout(self): """ Part 1/2 of scenario reset. This method handles resetting the state of the environment managed by the scenario (i.e. resource & landmark layout). Here, reset to the layout in the fixed layout file """ self.world.maps.clear() resources = ["Wood", "Stone"] for resource in resources: self.world.maps.set_point_add(resource,0,0,1) def reset_agent_states(self): """ Part 2/2 of scenario reset. This method handles resetting the state of the agents themselves (i.e. inventory, locations, etc.). Here, empty inventories and place mobile agents in random, accessible locations to start. Note: If using fixed_four_skill_and_loc, the starting locations will be overridden in self.additional_reset_steps. """ self.world.clear_agent_locs() for agent in self.world.agents: agent.state["inventory"] = {k: 0 for k in agent.inventory.keys()} agent.state["escrow"] = {k: 0 for k in agent.inventory.keys()} agent.state["endogenous"] = {k: 0 for k in agent.endogenous.keys()} # Add starting coin agent.state["inventory"]["Coin"] = float(self.starting_agent_coin) agent.bad_action=False self.world.planner.state["inventory"] = { k: 0 for k in self.world.planner.inventory.keys() } self.world.planner.state["escrow"] = { k: 0 for k in self.world.planner.escrow.keys() } def scenario_step(self): """ Update the state of the world according to whatever rules this scenario implements. This gets called in the 'step' method (of base_env) after going through each component step and before generating observations, rewards, etc. In this class of scenarios, the scenario step handles stochastic resource regeneration. """ resources = ["Wood", "Stone"] for resource in resources: self.world.maps.set_point_add(resource,0,0,20) def generate_observations(self): """ Generate observations associated with this scenario. A scenario does not need to produce observations and can provide observations for only some agent types; however, for a given agent type, it should either always or never yield an observation. If it does yield an observation, that observation should always have the same structure/sizes! Returns: obs (dict): A dictionary of {agent.idx: agent_obs_dict}. In words, return a dictionary with an entry for each agent (which can including the planner) for which this scenario provides an observation. For each entry, the key specifies the index of the agent and the value contains its associated observation dictionary. Here, non-planner agents receive spatial observations (depending on the env config) as well as the contents of their inventory and endogenous quantities. The planner also receives spatial observations (again, depending on the env config) as well as the inventory of each of the mobile agents. """ obs = {} agent_invs = { str(agent.idx): { "inventory-" + k: v * self.inv_scale for k, v in agent.inventory.items() } for agent in self.world.agents } obs[self.world.planner.idx] = { "inventory-" + k: v * self.inv_scale for k, v in self.world.planner.inventory.items() } for agent in self.world.agents: sidx = str(agent.idx) obs[sidx]=agent_invs[sidx] return obs def compute_reward(self): """ Apply the reward function(s) associated with this scenario to get the rewards from this step. Returns: rew (dict): A dictionary of {agent.idx: agent_obs_dict}. In words, return a dictionary with an entry for each agent in the environment (including the planner). For each entry, the key specifies the index of the agent and the value contains the scalar reward earned this timestep. Rewards are computed as the marginal utility (agents) or marginal social welfare (planner) experienced on this timestep. Ignoring discounting, this means that agents' (planner's) objective is to maximize the utility (social welfare) associated with the terminal state of the episode. """ # "curr_optimization_metric" hasn't been updated yet, so it gives us the # utility from the last step. utility_at_end_of_last_time_step = deepcopy(self.curr_optimization_metric) # compute current objectives and store the values self.curr_optimization_metric = self.get_current_optimization_metrics() # reward = curr - prev objectives rew={} for k, v in self.curr_optimization_metric.items(): rew[k] = float(v - utility_at_end_of_last_time_step[k]) if k!="p": if self.is_bad_action(self.world.agents[k]): rew[k]-=1 # store the previous objective values self.prev_optimization_metric.update(utility_at_end_of_last_time_step) # Automatic Energy Cost Annealing # ------------------------------- avg_agent_rew = np.mean([rew[a.idx] for a in self.world.agents]) # Count the number of timesteps where the avg agent reward was > 0 if avg_agent_rew > 0: self._auto_warmup_integrator += 1 return rew # Optional methods for customization # ---------------------------------- def additional_reset_steps(self): """ Extra scenario-specific steps that should be performed at the end of the reset cycle. For each reset cycle... First, reset_starting_layout() and reset_agent_states() will be called. Second, .reset() will be called for each registered component. Lastly, this method will be called to allow for any final customization of the reset cycle. For this scenario, this method resets optimization metric trackers. If using fixed_four_skill_and_loc, this is where each agent gets assigned to one of the four fixed skill/loc combinations. The agent-->skill/loc assignment is permuted so that all four skill/loc combinations are used. """ # compute current objectives curr_optimization_metric = self.get_current_optimization_metrics() self.curr_optimization_metric = deepcopy(curr_optimization_metric) self.init_optimization_metric = deepcopy(curr_optimization_metric) self.prev_optimization_metric = deepcopy(curr_optimization_metric) def scenario_metrics(self): """ Allows the scenario to generate metrics (collected along with component metrics in the 'metrics' property). To have the scenario add metrics, this function needs to return a dictionary of {metric_key: value} where 'value' is a scalar (no nesting or lists!) Here, summarize social metrics, endowments, utilities, and labor cost annealing. """ metrics = dict() coin_endowments = np.array( [agent.total_endowment("Coin") for agent in self.world.agents] ) metrics["social/productivity"] = social_metrics.get_productivity( coin_endowments ) metrics["social/equality"] = social_metrics.get_equality(coin_endowments) utilities = np.array( [self.curr_optimization_metric[agent.idx] for agent in self.world.agents] ) metrics[ "social_welfare/coin_eq_times_productivity" ] = rewards.coin_eq_times_productivity( coin_endowments=coin_endowments, equality_weight=1.0 ) metrics[ "social_welfare/inv_income_weighted_coin_endow" ] = rewards.inv_income_weighted_coin_endowments(coin_endowments=coin_endowments) metrics[ "social_welfare/inv_income_weighted_utility" ] = rewards.inv_income_weighted_utility( coin_endowments=coin_endowments, utilities=utilities ) for agent in self.all_agents: for resource, quantity in agent.inventory.items(): metrics[ "endow/{}/{}".format(agent.idx, resource) ] = agent.total_endowment(resource) if agent.endogenous is not None: for resource, quantity in agent.endogenous.items(): metrics["endogenous/{}/{}".format(agent.idx, resource)] = quantity metrics["util/{}".format(agent.idx)] = self.curr_optimization_metric[ agent.idx ] # Labor weight metrics["labor/weighted_cost"] = self.energy_cost * self.energy_weight metrics["labor/warmup_integrator"] = int(self._auto_warmup_integrator) return metrics