it is working

This commit is contained in:
2023-01-11 19:04:20 +01:00
commit 86874dcfd3
13 changed files with 1768 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
logs/*

16
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,16 @@
{
// Verwendet IntelliSense zum Ermitteln möglicher Attribute.
// Zeigen Sie auf vorhandene Attribute, um die zugehörigen Beschreibungen anzuzeigen.
// Weitere Informationen finden Sie unter https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Aktuelle Datei",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": false
}
]
}

4
components/__init__.py Normal file
View File

@@ -0,0 +1,4 @@
from . import(
simple_gather,
simple_build
)

9
components/noops.py Normal file
View File

@@ -0,0 +1,9 @@
from ai_economist.foundation.base.registrar import Registry
from ai_economist.foundation.entities.endogenous import Endogenous, endogenous_registry
@endogenous_registry.add
class Noop(Endogenous):
"""consecutive noop actions performed by actor"""
name = "Noop"

256
components/simple_build.py Normal file
View File

@@ -0,0 +1,256 @@
# Copyright (c) 2020, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root
# or https://opensource.org/licenses/BSD-3-Clause
import numpy as np
from ai_economist.foundation.base.base_component import (
BaseComponent,
component_registry,
)
@component_registry.add
class SimpleCraft(BaseComponent):
"""
Allows mobile agents to build house landmarks in the world using stone and wood,
earning income.
Can be configured to include heterogeneous building skill where agents earn
different levels of income when building.
Args:
payment (int): Default amount of coin agents earn from building.
Must be >= 0. Default is 10.
payment_max_skill_multiplier (int): Maximum skill multiplier that an agent
can sample. Must be >= 1. Default is 1.
skill_dist (str): Distribution type for sampling skills. Default ("none")
gives all agents identical skill equal to a multiplier of 1. "pareto" and
"lognormal" sample skills from the associated distributions.
build_labor (float): Labor cost associated with building a house.
Must be >= 0. Default is 10.
"""
name = "SimpleCraft"
component_type = "Build"
required_entities = ["Wood", "Stone", "Coin", "House", "Labor"]
agent_subclasses = ["BasicMobileAgent"]
def __init__(
self,
*base_component_args,
payment=10,
payment_max_skill_multiplier=1,
skill_dist="none",
build_labor=10.0,
**base_component_kwargs
):
super().__init__(*base_component_args, **base_component_kwargs)
self.payment = int(payment)
assert self.payment >= 0
self.payment_max_skill_multiplier = int(payment_max_skill_multiplier)
assert self.payment_max_skill_multiplier >= 1
self.resource_cost = {"Wood": 1, "Stone": 1}
self.build_labor = float(build_labor)
assert self.build_labor >= 0
self.skill_dist = skill_dist.lower()
assert self.skill_dist in ["none", "pareto", "lognormal"]
self.sampled_skills = {}
self.builds = []
def agent_can_build(self, agent):
"""Return True if agent can actually build in its current location."""
# See if the agent has the resources necessary to complete the action
for resource, cost in self.resource_cost.items():
if agent.state["inventory"][resource] < cost:
return False
return True
# Required methods for implementing components
# --------------------------------------------
def get_n_actions(self, agent_cls_name):
"""
See base_component.py for detailed description.
Add a single action (build) for mobile agents.
"""
# This component adds 1 action that mobile agents can take: build a house
if agent_cls_name == "BasicMobileAgent":
return 1
return None
def get_additional_state_fields(self, agent_cls_name):
"""
See base_component.py for detailed description.
For mobile agents, add state fields for building skill.
"""
if agent_cls_name not in self.agent_subclasses:
return {}
if agent_cls_name == "BasicMobileAgent":
return {"build_payment": float(self.payment), "build_skill": 1}
raise NotImplementedError
def component_step(self):
"""
See base_component.py for detailed description.
Convert stone+wood to house+coin for agents that choose to build and can.
"""
world = self.world
build = []
# Apply any building actions taken by the mobile agents
for agent in world.get_random_order_agents():
action = agent.get_component_action(self.name)
# This component doesn't apply to this agent!
if action is None:
continue
# NO-OP!
if action == 0:
pass
# Build! (If you can.)
elif action == 1:
if self.agent_can_build(agent):
# Remove the resources
for resource, cost in self.resource_cost.items():
agent.state["inventory"][resource] -= cost
# Receive payment for the house
agent.state["inventory"]["Coin"] += agent.state["build_payment"]
# Incur the labor cost for building
agent.state["endogenous"]["Labor"] += self.build_labor
build.append(
{
"builder": agent.idx,
"build_skill": self.sampled_skills[agent.idx],
"income": float(agent.state["build_payment"]),
}
)
else:
agent.bad_action=True
else:
raise ValueError
self.builds.append(build)
def generate_observations(self):
"""
See base_component.py for detailed description.
Here, agents observe their build skill. The planner does not observe anything
from this component.
"""
obs_dict = dict()
for agent in self.world.agents:
obs_dict[agent.idx] = {
"build_payment": agent.state["build_payment"] / self.payment,
"build_skill": self.sampled_skills[agent.idx],
}
return obs_dict
def generate_masks(self, completions=0):
"""
See base_component.py for detailed description.
Prevent building only if a landmark already occupies the agent's location.
"""
masks = {}
# Mobile agents' build action is masked if they cannot build with their
# current location and/or endowment
for agent in self.world.agents:
masks[agent.idx] = np.array([self.agent_can_build(agent)])
return masks
# For non-required customization
# ------------------------------
def get_metrics(self):
"""
Metrics that capture what happened through this component.
Returns:
metrics (dict): A dictionary of {"metric_name": metric_value},
where metric_value is a scalar.
"""
world = self.world
build_stats = {a.idx: {"n_builds": 0} for a in world.agents}
for builds in self.builds:
for build in builds:
idx = build["builder"]
build_stats[idx]["n_builds"] += 1
out_dict = {}
for a in world.agents:
for k, v in build_stats[a.idx].items():
out_dict["{}/{}".format(a.idx, k)] = v
num_houses = np.sum(world.maps.get("House") > 0)
out_dict["total_builds"] = num_houses
return out_dict
def additional_reset_steps(self):
"""
See base_component.py for detailed description.
Re-sample agents' building skills.
"""
world = self.world
self.sampled_skills = {agent.idx: 1 for agent in world.agents}
PMSM = self.payment_max_skill_multiplier
for agent in world.agents:
if self.skill_dist == "none":
sampled_skill = 1
pay_rate = 1
elif self.skill_dist == "pareto":
sampled_skill = np.random.pareto(4)
pay_rate = np.minimum(PMSM, (PMSM - 1) * sampled_skill + 1)
elif self.skill_dist == "lognormal":
sampled_skill = np.random.lognormal(-1, 0.5)
pay_rate = np.minimum(PMSM, (PMSM - 1) * sampled_skill + 1)
else:
raise NotImplementedError
agent.state["build_payment"] = float(pay_rate * self.payment)
agent.state["build_skill"] = float(sampled_skill)
self.sampled_skills[agent.idx] = sampled_skill
self.builds = []
def get_dense_log(self):
"""
Log builds.
Returns:
builds (list): A list of build events. Each entry corresponds to a single
timestep and contains a description of any builds that occurred on
that timestep.
"""
return self.builds

214
components/simple_gather.py Normal file
View File

@@ -0,0 +1,214 @@
# Copyright (c) 2020, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root
# or https://opensource.org/licenses/BSD-3-Clause
import numpy as np
from numpy.random import rand
from ai_economist.foundation.base.base_component import (
BaseComponent,
component_registry,
)
from ai_economist.foundation.entities import resource_registry, resources
@component_registry.add
class SimpleGather(BaseComponent):
"""
Allows mobile agents to move around the world and collect resources and prevents
agents from moving to invalid locations.
Can be configured to include collection skill, where agents have heterogeneous
probabilities of collecting bonus resources without additional labor cost.
Args:
move_labor (float): Labor cost associated with movement. Must be >= 0.
Default is 1.0.
collect_labor (float): Labor cost associated with collecting resources. This
cost is added (in addition to any movement cost) when the agent lands on
a tile that is populated with resources (triggering collection).
Must be >= 0. Default is 1.0.
skill_dist (str): Distribution type for sampling skills. Default ("none")
gives all agents identical skill equal to a bonus prob of 0. "pareto" and
"lognormal" sample skills from the associated distributions.
"""
name = "SimpleGather"
required_entities = ["Coin", "House", "Labor"]
agent_subclasses = ["BasicMobileAgent"]
def __init__(
self,
*base_component_args,
collect_labor=1.0,
skill_dist="none",
**base_component_kwargs
):
super().__init__(*base_component_args, **base_component_kwargs)
self.collect_labor = float(collect_labor)
assert self.collect_labor >= 0
self.skill_dist = skill_dist.lower()
assert self.skill_dist in ["none", "pareto", "lognormal"]
self.gathers = []
self.commodities = [
r for r in self.world.resources if resource_registry.get(r).collectible
]
# Required methods for implementing components
# --------------------------------------------
def get_n_actions(self, agent_cls_name):
"""
See base_component.py for detailed description.
Adds 1 action per commodity that can be picked up.
"""
if agent_cls_name == "BasicMobileAgent":
return len(self.commodities)
return None
def get_additional_state_fields(self, agent_cls_name):
"""
See base_component.py for detailed description.
For mobile agents, add state field for collection skill.
"""
if agent_cls_name not in self.agent_subclasses:
return {}
if agent_cls_name == "BasicMobileAgent":
return {"bonus_gather_prob": 0.0}
raise NotImplementedError
def component_step(self):
"""
See base_component.py for detailed description.
Pickup resources if available from env
"""
world = self.world
gathers = []
for agent in world.get_random_order_agents():
if self.name not in agent.action:
continue
resource_action = agent.get_component_action(
self.name
)
if resource_action == 0: # NO-OP
continue
resource_action -=1 # Starting at 1
r=self.commodities[resource_action]
if self.get_num_resources(r)>0:
gather= self.pickup(r,agent)
gathers.append(gather)
else:
agent.bad_action=True
continue
self.gathers.append(gathers)
def generate_observations(self):
"""
See base_component.py for detailed description.
Here, agents observe their collection skill. The planner does not observe
anything from this component.
"""
num_agent=len(self.world.agents)
obs_avai={}
for r in self.commodities:
key="pickup_perc_{}".format(r)
pickProb=float(self.get_num_resources(r)/num_agent)
if pickProb>1:
pickProb=1
obs_avai[key]=pickProb
obs={}
for agent in self.world.agents:
obs[agent.idx]={}
obs[agent.idx]["bonus_gather_prob"]= agent.state["bonus_gather_prob"]
obs[agent.idx].update(obs_avai)
return obs
def generate_masks(self, completions=0):
"""
See base_component.py for detailed description.
Prevent moving to adjacent tiles that are already occupied (or outside the
boundaries of the world)
"""
world = self.world
mask=[]
for r in self.commodities:
avail=0
if self.get_num_resources(r)>0:
avail=1
mask.append(avail)
masks = {}
for agent in world.agents:
masks[agent.idx]=mask
return masks
# For non-required customization
# ------------------------------
def additional_reset_steps(self):
"""
See base_component.py for detailed description.
Re-sample agents' collection skills.
"""
for agent in self.world.agents:
if self.skill_dist == "none":
bonus_rate = 0.0
elif self.skill_dist == "pareto":
bonus_rate = np.minimum(2, np.random.pareto(3)) / 2
elif self.skill_dist == "lognormal":
bonus_rate = np.minimum(2, np.random.lognormal(-2.022, 0.938)) / 2
else:
raise NotImplementedError
agent.state["bonus_gather_prob"] = float(bonus_rate)
self.gathers = []
def get_dense_log(self):
"""
Log resource collections.
Returns:
gathers (list): A list of gather events. Each entry corresponds to a single
timestep and contains a description of any resource gathers that
occurred on that timestep.
"""
return self.gathers
# For Components
def get_num_resources(self, res: resources.Resource):
return self.world.maps.get_point(res,0,0)
def pickup(self, res: resources.Resource, agent ):
n_gathered = 1 + (rand() < agent.state["bonus_gather_prob"])
agent.state["inventory"][res] += n_gathered
agent.state["endogenous"]["Labor"] += self.collect_labor
self.world.consume_resource(res,0,0)
# Log the gather
return (
dict(
agent=agent.idx,
resource=res,
n=n_gathered,
)
)

227
envs/econ_wrapper.py Normal file
View File

@@ -0,0 +1,227 @@
from collections import OrderedDict
from copy import deepcopy
from typing import Any, Callable, List, Optional, Sequence, Type, Union
from ai_economist.foundation.base import base_env
import gym
import gym.spaces
import numpy as np
from stable_baselines3.common.vec_env.base_vec_env import VecEnv, VecEnvIndices, VecEnvObs, VecEnvStepReturn
from stable_baselines3.common.vec_env.util import copy_obs_dict, dict_to_obs, obs_space_info
from ai_economist import foundation
class EconVecEnv(VecEnv, gym.Env):
"""
Creates a simple vectorized wrapper for multiple environments, calling each environment in sequence on the current
Python process. This is useful for computationally simple environment such as ``cartpole-v1``,
as the overhead of multiprocess or multithread outweighs the environment computation time.
This can also be used for RL methods that
require a vectorized environment, but that you want a single environments to train with.
:param env_fns: a list of functions
that return environments to vectorize
:raises ValueError: If the same environment instance is passed as the output of two or more different env_fn.
"""
def __init__(self, env_config):
##init for init
self.config=env_config
env=foundation.make_env_instance(**env_config)
self.env = env
# build spaces
obs=env.reset()
actions=env.world.agents[0].action_spaces
obs1=obs["0"]
del obs1["action_mask"]
del obs1["time"]
self.observation_space=gym.spaces.Box(low=0,high=np.inf,shape=(len(obs1),),dtype=np.float32)
self.action_space=gym.spaces.Discrete(actions)
# count agents
self.num_envs=env.world.n_agents
VecEnv.__init__(self, self.num_envs, self.observation_space, action_space=self.action_space)
self.keys, shapes, dtypes = obs_space_info(self.observation_space)
self.buf_obs = OrderedDict([(k, np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k])) for k in self.keys])
self.buf_dones = np.zeros((self.num_envs,), dtype=bool)
self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32)
self.buf_infos = [{} for _ in range(self.num_envs)]
self.actions = None
def step_async(self, actions: np.ndarray) -> None:
self.actions = actions
def step_wait(self) -> VecEnvStepReturn:
#convert to econ actions
r_action={}
for ai_idx in range(len(self.actions)):
r_action[str(ai_idx)]=self.actions[ai_idx]
obs,rew,done,info = self.env.step(r_action)
obs_g=self._convert_econ_obs_to_gym(obs)
rew_g=self._convert_econ_to_gym(rew)
info_g=self._convert_econ_to_gym(info)
#collect metrics
prev_metrics=self.metrics
self.metrics=self.env.scenario_metrics()
curr_prod=self.metrics["social/productivity"]
trend_pord=curr_prod-prev_metrics["social/productivity"]
for k in info_g:
k["social/productivity"]=curr_prod
k["trend/productivity"]=trend_pord
done_g=[False]*self.num_envs
done=(done["__all__"])
if done:
for i in range(self.num_envs):
done_g[i]=done
info_g[i]["terminal_observation"]=obs_g[i]
obs_g=self.reset()
return (np.copy(obs_g), np.copy(rew_g), np.copy(done_g), deepcopy(info_g))
# fix with malformed action tensor from sb3 predict method
def step_predict(self,actions):
return self.step(actions[0])
def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
if seed is None:
seed = np.random.randint(0, 2**32 - 1)
seeds = []
for idx, env in enumerate(self.envs):
seeds.append(env.seed(seed + idx))
return seeds
def reset(self) -> VecEnvObs:
# env=foundation.make_env_instance(**self.config)
# self.env = env
obs = self.env.reset()
self.metrics=self.env.scenario_metrics()
obs_g=self._convert_econ_obs_to_gym(obs)
return obs_g
def close(self) -> None:
self.env.close()
def get_images(self) -> Sequence[np.ndarray]:
return [env.render(mode="rgb_array") for env in self.envs]
def render(self, mode: str = "human") -> Optional[np.ndarray]:
"""
Gym environment rendering. If there are multiple environments then
they are tiled together in one image via ``BaseVecEnv.render()``.
Otherwise (if ``self.num_envs == 1``), we pass the render call directly to the
underlying environment.
Therefore, some arguments such as ``mode`` will have values that are valid
only when ``num_envs == 1``.
:param mode: The rendering type.
"""
if self.num_envs == 1:
return self.envs[0].render(mode=mode)
else:
return super().render(mode=mode)
def _save_obs(self, env_idx: int, obs: VecEnvObs) -> None:
for key in self.keys:
if key is None:
self.buf_obs[key][env_idx] = obs
else:
self.buf_obs[key][env_idx] = obs[key]
def _obs_from_buf(self) -> VecEnvObs:
return dict_to_obs(self.observation_space, copy_obs_dict(self.buf_obs))
def get_attr(self, attr_name: str, indices: VecEnvIndices = None) -> List[Any]:
"""Return attribute from vectorized environment (see base class)."""
target_envs = self._get_target_envs(indices)
return [getattr(env_i, attr_name) for env_i in target_envs]
def set_attr(self, attr_name: str, value: Any, indices: VecEnvIndices = None) -> None:
"""Set attribute inside vectorized environments (see base class)."""
target_envs = self._get_target_envs(indices)
for env_i in target_envs:
setattr(env_i, attr_name, value)
def env_method(self, method_name: str, *method_args, indices: VecEnvIndices = None, **method_kwargs) -> List[Any]:
"""Call instance methods of vectorized environments."""
target_envs = self._get_target_envs(indices)
return [getattr(env_i, method_name)(*method_args, **method_kwargs) for env_i in target_envs]
def env_is_wrapped(self, wrapper_class: Type[gym.Wrapper], indices: VecEnvIndices = None) -> List[bool]:
"""Check if worker environments are wrapped with a given wrapper"""
target_envs = self._get_target_envs(indices)
# Import here to avoid a circular import
from stable_baselines3.common import env_util
return [env_util.is_wrapped(env_i, wrapper_class) for env_i in target_envs]
def _get_target_envs(self, indices: VecEnvIndices) -> List[gym.Env]:
indices = self._get_indices(indices)
return [self.envs[i] for i in indices]
# Convert econ to gym
def _convert_econ_to_gym(self, econ):
gy=[]
del econ["p"]
gy=[v for k,v in econ.items()]
return gy
def _convert_gym_to_acon(self, gy):
econ={}
for k,v in gy:
econ[k]=v
return econ
def _convert_econ_obs_to_gym(self, econ):
gy=[None] * self.num_envs
del econ["p"]
for k,v in econ.items():
del v["time"]
del v["action_mask"]
out=self.extract_dict(v)
agent_obs=np.array(out)
gy[int(k)]=agent_obs
return np.stack(gy)
def extract_dict(self,obj):
output=[]
use_key=isinstance(obj,dict)
for v in obj:
if use_key:
v=obj[v]
if isinstance(v,dict):
temp=self.extract_dict(v)
output.append(temp)
else:
output.append(v)
return output

472
envs/simple_market.py Normal file
View File

@@ -0,0 +1,472 @@
# Copyright (c) 2020, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root
# or https://opensource.org/licenses/BSD-3-Clause
from copy import deepcopy
from pathlib import Path
import numpy as np
from scipy import signal
from ai_economist.foundation.base.base_env import BaseEnvironment, scenario_registry
from ai_economist.foundation.scenarios.utils import rewards, social_metrics
import yaml
@scenario_registry.add
class SimpleMarket(BaseEnvironment):
"""
World containing stone and wood with stochastic regeneration. Refers to a fixed
layout file (see ./map_txt/ for examples) to determine the spatial arrangement of
stone, wood, and water tiles.
Args:
planner_gets_spatial_obs (bool): Whether the planner agent receives spatial
observations from the world.
full_observability (bool): Whether the mobile agents' spatial observation
includes the full world view or is instead an egocentric view.
mobile_agent_observation_range (int): If not using full_observability,
the spatial range (on each side of the agent) that is visible in the
spatial observations.
env_layout_file (str): Name of the layout file in ./map_txt/ to use.
Note: The world dimensions of that layout must match the world dimensions
argument used to construct the environment.
resource_regen_prob (float): Probability that an empty source tile will
regenerate a new resource unit.
fixed_four_skill_and_loc (bool): Whether to use a fixed set of build skills and
starting locations, with agents grouped into starting locations based on
which skill quartile they are in. False, by default.
True, for experiments in https://arxiv.org/abs/2004.13332.
Note: Requires that the environment uses the "Build" component with
skill_dist="pareto".
starting_agent_coin (int, float): Amount of coin agents have at t=0. Defaults
to zero coin.
isoelastic_eta (float): Parameter controlling the shape of agent utility
wrt coin endowment.
energy_cost (float): Coefficient for converting labor to negative utility.
energy_warmup_constant (float): Decay constant that controls the rate at which
the effective energy cost is annealed from 0 to energy_cost. Set to 0
(default) to disable annealing, meaning that the effective energy cost is
always energy_cost. The units of the decay constant depend on the choice of
energy_warmup_method.
energy_warmup_method (str): How to schedule energy annealing (warmup). If
"decay" (default), use the number of completed episodes. If "auto",
use the number of timesteps where the average agent reward was positive.
planner_reward_type (str): The type of reward used for the planner. Options
are "coin_eq_times_productivity" (default),
"inv_income_weighted_coin_endowment", and "inv_income_weighted_utility".
mixing_weight_gini_vs_coin (float): Degree to which equality is ignored w/
"coin_eq_times_productivity". Default is 0, which weights equality and
productivity equally. If set to 1, only productivity is rewarded.
"""
name = "simple_market"
agent_subclasses = ["BasicMobileAgent"]
required_entities = ["Wood", "Stone", "Water"]
def __init__(
self,
*base_env_args,
resource_regen_prob=0.01,
fixed_four_skill_and_loc=False,
starting_agent_coin=0,
isoelastic_eta=0.23,
energy_cost=0.21,
energy_warmup_constant=0,
energy_warmup_method="decay",
planner_reward_type="coin_eq_times_productivity",
mixing_weight_gini_vs_coin=0.0,
**base_env_kwargs,
):
super().__init__(*base_env_args, **base_env_kwargs)
self.layout_specs = dict(
Wood={
"regen_weight": float(resource_regen_prob),
"regen_halfwidth": 0,
"max_health": 1,
},
Stone={
"regen_weight": float(resource_regen_prob),
"regen_halfwidth": 0,
"max_health": 1,
},
)
assert 0 <= self.layout_specs["Wood"]["regen_weight"] <= 1
assert 0 <= self.layout_specs["Stone"]["regen_weight"] <= 1
# How much coin do agents begin with at upon reset
self.starting_agent_coin = float(starting_agent_coin)
assert self.starting_agent_coin >= 0.0
# Controls the diminishing marginal utility of coin.
# isoelastic_eta=0 means no diminishing utility.
self.isoelastic_eta = float(isoelastic_eta)
assert 0.0 <= self.isoelastic_eta <= 1.0
# The amount that labor is weighted in utility computation
# (once annealing is finished)
self.energy_cost = float(energy_cost)
assert self.energy_cost >= 0
# Which method to use for calculating the progress of energy annealing
# If method = 'decay': #completed episodes
# If method = 'auto' : #timesteps where avg. agent reward > 0
self.energy_warmup_method = energy_warmup_method.lower()
assert self.energy_warmup_method in ["decay", "auto"]
# Decay constant for annealing to full energy cost
# (if energy_warmup_constant == 0, there is no annealing)
self.energy_warmup_constant = float(energy_warmup_constant)
assert self.energy_warmup_constant >= 0
self._auto_warmup_integrator = 0
# Which social welfare function to use
self.planner_reward_type = str(planner_reward_type).lower()
# How much to weight equality if using SWF=eq*prod:
# 0 -> SWF=eq * prod
# 1 -> SWF=prod
self.mixing_weight_gini_vs_coin = float(mixing_weight_gini_vs_coin)
assert 0 <= self.mixing_weight_gini_vs_coin <= 1.0
# Use this to calculate marginal changes and deliver that as reward
self.init_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
self.prev_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
self.curr_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
"""
Fixed Four Skill and Loc
------------------------
"""
self.agent_starting_pos = {agent.idx: [] for agent in self.world.agents}
self.last_log_loged={}
@property
def energy_weight(self):
"""
Energy annealing progress. Multiply with self.energy_cost to get the
effective energy coefficient.
"""
if self.energy_warmup_constant <= 0.0:
return 1.0
if self.energy_warmup_method == "decay":
return float(1.0 - np.exp(-self._completions / self.energy_warmup_constant))
if self.energy_warmup_method == "auto":
return float(
1.0
- np.exp(-self._auto_warmup_integrator / self.energy_warmup_constant)
)
raise NotImplementedError
def is_bad_action(self,agent):
bad=agent.bad_action
agent.bad_action=False
return bad
def get_current_optimization_metrics(self):
"""
Compute optimization metrics based on the current state. Used to compute reward.
Returns:
curr_optimization_metric (dict): A dictionary of {agent.idx: metric}
with an entry for each agent (including the planner) in the env.
"""
curr_optimization_metric = {}
# (for agents)
for agent in self.world.agents:
rew= rewards.isoelastic_coin_minus_labor(
coin_endowment=agent.total_endowment("Coin"),
total_labor=agent.state["endogenous"]["Labor"],
isoelastic_eta=self.isoelastic_eta,
labor_coefficient=self.energy_weight * self.energy_cost,
)
#rew-=agent.state["endogenous"]["noops"]
curr_optimization_metric[agent.idx] = rew
# (for the planner)
if self.planner_reward_type == "coin_eq_times_productivity":
curr_optimization_metric[
self.world.planner.idx
] = rewards.coin_eq_times_productivity(
coin_endowments=np.array(
[agent.total_endowment("Coin") for agent in self.world.agents]
),
equality_weight=1 - self.mixing_weight_gini_vs_coin,
)
elif self.planner_reward_type == "inv_income_weighted_coin_endowments":
curr_optimization_metric[
self.world.planner.idx
] = rewards.inv_income_weighted_coin_endowments(
coin_endowments=np.array(
[agent.total_endowment("Coin") for agent in self.world.agents]
)
)
elif self.planner_reward_type == "inv_income_weighted_utility":
curr_optimization_metric[
self.world.planner.idx
] = rewards.inv_income_weighted_utility(
coin_endowments=np.array(
[agent.total_endowment("Coin") for agent in self.world.agents]
),
utilities=np.array(
[curr_optimization_metric[agent.idx] for agent in self.world.agents]
),
)
else:
print("No valid planner reward selected!")
raise NotImplementedError
return curr_optimization_metric
# The following methods must be implemented for each scenario
# -----------------------------------------------------------
def reset_starting_layout(self):
"""
Part 1/2 of scenario reset. This method handles resetting the state of the
environment managed by the scenario (i.e. resource & landmark layout).
Here, reset to the layout in the fixed layout file
"""
self.world.maps.clear()
resources = ["Wood", "Stone"]
for resource in resources:
self.world.maps.set_point_add(resource,0,0,1)
def reset_agent_states(self):
"""
Part 2/2 of scenario reset. This method handles resetting the state of the
agents themselves (i.e. inventory, locations, etc.).
Here, empty inventories and place mobile agents in random, accessible
locations to start. Note: If using fixed_four_skill_and_loc, the starting
locations will be overridden in self.additional_reset_steps.
"""
self.world.clear_agent_locs()
for agent in self.world.agents:
agent.state["inventory"] = {k: 0 for k in agent.inventory.keys()}
agent.state["escrow"] = {k: 0 for k in agent.inventory.keys()}
agent.state["endogenous"] = {k: 0 for k in agent.endogenous.keys()}
# Add starting coin
agent.state["inventory"]["Coin"] = float(self.starting_agent_coin)
agent.bad_action=False
self.world.planner.state["inventory"] = {
k: 0 for k in self.world.planner.inventory.keys()
}
self.world.planner.state["escrow"] = {
k: 0 for k in self.world.planner.escrow.keys()
}
def scenario_step(self):
"""
Update the state of the world according to whatever rules this scenario
implements.
This gets called in the 'step' method (of base_env) after going through each
component step and before generating observations, rewards, etc.
In this class of scenarios, the scenario step handles stochastic resource
regeneration.
"""
resources = ["Wood", "Stone"]
for resource in resources:
self.world.maps.set_point_add(resource,0,0,20)
def generate_observations(self):
"""
Generate observations associated with this scenario.
A scenario does not need to produce observations and can provide observations
for only some agent types; however, for a given agent type, it should either
always or never yield an observation. If it does yield an observation,
that observation should always have the same structure/sizes!
Returns:
obs (dict): A dictionary of {agent.idx: agent_obs_dict}. In words,
return a dictionary with an entry for each agent (which can including
the planner) for which this scenario provides an observation. For each
entry, the key specifies the index of the agent and the value contains
its associated observation dictionary.
Here, non-planner agents receive spatial observations (depending on the env
config) as well as the contents of their inventory and endogenous quantities.
The planner also receives spatial observations (again, depending on the env
config) as well as the inventory of each of the mobile agents.
"""
obs = {}
agent_invs = {
str(agent.idx): {
"inventory-" + k: v * self.inv_scale for k, v in agent.inventory.items()
}
for agent in self.world.agents
}
obs[self.world.planner.idx] = {
"inventory-" + k: v * self.inv_scale
for k, v in self.world.planner.inventory.items()
}
for agent in self.world.agents:
sidx = str(agent.idx)
obs[sidx]=agent_invs[sidx]
return obs
def compute_reward(self):
"""
Apply the reward function(s) associated with this scenario to get the rewards
from this step.
Returns:
rew (dict): A dictionary of {agent.idx: agent_obs_dict}. In words,
return a dictionary with an entry for each agent in the environment
(including the planner). For each entry, the key specifies the index of
the agent and the value contains the scalar reward earned this timestep.
Rewards are computed as the marginal utility (agents) or marginal social
welfare (planner) experienced on this timestep. Ignoring discounting,
this means that agents' (planner's) objective is to maximize the utility
(social welfare) associated with the terminal state of the episode.
"""
# "curr_optimization_metric" hasn't been updated yet, so it gives us the
# utility from the last step.
utility_at_end_of_last_time_step = deepcopy(self.curr_optimization_metric)
# compute current objectives and store the values
self.curr_optimization_metric = self.get_current_optimization_metrics()
# reward = curr - prev objectives
rew={}
for k, v in self.curr_optimization_metric.items():
rew[k] = float(v - utility_at_end_of_last_time_step[k])
if k!="p":
if self.is_bad_action(self.world.agents[k]):
rew[k]-=1
# store the previous objective values
self.prev_optimization_metric.update(utility_at_end_of_last_time_step)
# Automatic Energy Cost Annealing
# -------------------------------
avg_agent_rew = np.mean([rew[a.idx] for a in self.world.agents])
# Count the number of timesteps where the avg agent reward was > 0
if avg_agent_rew > 0:
self._auto_warmup_integrator += 1
return rew
# Optional methods for customization
# ----------------------------------
def additional_reset_steps(self):
"""
Extra scenario-specific steps that should be performed at the end of the reset
cycle.
For each reset cycle...
First, reset_starting_layout() and reset_agent_states() will be called.
Second, <component>.reset() will be called for each registered component.
Lastly, this method will be called to allow for any final customization of
the reset cycle.
For this scenario, this method resets optimization metric trackers. If using
fixed_four_skill_and_loc, this is where each agent gets assigned to one of
the four fixed skill/loc combinations. The agent-->skill/loc assignment is
permuted so that all four skill/loc combinations are used.
"""
# compute current objectives
curr_optimization_metric = self.get_current_optimization_metrics()
self.curr_optimization_metric = deepcopy(curr_optimization_metric)
self.init_optimization_metric = deepcopy(curr_optimization_metric)
self.prev_optimization_metric = deepcopy(curr_optimization_metric)
def scenario_metrics(self):
"""
Allows the scenario to generate metrics (collected along with component metrics
in the 'metrics' property).
To have the scenario add metrics, this function needs to return a dictionary of
{metric_key: value} where 'value' is a scalar (no nesting or lists!)
Here, summarize social metrics, endowments, utilities, and labor cost annealing.
"""
metrics = dict()
coin_endowments = np.array(
[agent.total_endowment("Coin") for agent in self.world.agents]
)
metrics["social/productivity"] = social_metrics.get_productivity(
coin_endowments
)
metrics["social/equality"] = social_metrics.get_equality(coin_endowments)
utilities = np.array(
[self.curr_optimization_metric[agent.idx] for agent in self.world.agents]
)
metrics[
"social_welfare/coin_eq_times_productivity"
] = rewards.coin_eq_times_productivity(
coin_endowments=coin_endowments, equality_weight=1.0
)
metrics[
"social_welfare/inv_income_weighted_coin_endow"
] = rewards.inv_income_weighted_coin_endowments(coin_endowments=coin_endowments)
metrics[
"social_welfare/inv_income_weighted_utility"
] = rewards.inv_income_weighted_utility(
coin_endowments=coin_endowments, utilities=utilities
)
for agent in self.all_agents:
for resource, quantity in agent.inventory.items():
metrics[
"endow/{}/{}".format(agent.idx, resource)
] = agent.total_endowment(resource)
if agent.endogenous is not None:
for resource, quantity in agent.endogenous.items():
metrics["endogenous/{}/{}".format(agent.idx, resource)] = quantity
metrics["util/{}".format(agent.idx)] = self.curr_optimization_metric[
agent.idx
]
# Labor weight
metrics["labor/weighted_cost"] = self.energy_cost * self.energy_weight
metrics["labor/warmup_integrator"] = int(self._auto_warmup_integrator)
return metrics

View File

@@ -0,0 +1,283 @@
from ai_economist import foundation
import numpy as np
from stable_baselines3.common.vec_env import vec_frame_stack
from stable_baselines3.common.evaluation import evaluate_policy
import envs
from tqdm import tqdm
import components
from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
from stable_baselines3.common.vec_env.vec_normalize import VecNormalize
from sb3_contrib import RecurrentPPO
from envs.econ_wrapper import EconVecEnv
from stable_baselines3.common.callbacks import BaseCallback
import yaml
import time
env_config = {
# ===== SCENARIO CLASS =====
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
# The environment object will be an instance of the Scenario class.
'scenario_name': 'simple_market',
# ===== COMPONENTS =====
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
# "component_name" refers to the Component class's name in the Component Registry (foundation.components)
# {component_kwargs} is a dictionary of kwargs passed to the Component class
# The order in which components reset, step, and generate obs follows their listed order below.
'components': [
# (1) Building houses
('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
# (2) Trading collectible resources
#('ContinuousDoubleAuction', {'max_num_orders': 10}),
# (3) Movement and resource collection
('SimpleGather', {}),
],
# ===== SCENARIO CLASS ARGUMENTS =====
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
'starting_agent_coin': 0,
'fixed_four_skill_and_loc': True,
# ===== STANDARD ARGUMENTS ======
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
'n_agents': 20, # Number of non-planner agents (must be > 1)
'world_size': [1, 1], # [Height, Width] of the env world
'episode_length': 256, # Number of timesteps per episode
'allow_observation_scaling': True,
'dense_log_frequency': 100,
'world_dense_log_frequency':1,
'energy_cost':0,
'energy_warmup_method': "auto",
'energy_warmup_constant': 0,
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
# Otherwise, the policy selects only 1 action.
'multi_action_mode_agents': False,
'multi_action_mode_planner': False,
# When flattening observations, concatenate scalar & vector observations before output.
# Otherwise, return observations with minimal processing.
'flatten_observations': False,
# When Flattening masks, concatenate each action subspace mask into a single array.
# Note: flatten_masks = True is required for masking action logits in the code below.
'flatten_masks': False,
}
eval_env_config = {
# ===== SCENARIO CLASS =====
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
# The environment object will be an instance of the Scenario class.
'scenario_name': 'simple_market',
# ===== COMPONENTS =====
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
# "component_name" refers to the Component class's name in the Component Registry (foundation.components)
# {component_kwargs} is a dictionary of kwargs passed to the Component class
# The order in which components reset, step, and generate obs follows their listed order below.
'components': [
# (1) Building houses
('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
# (2) Trading collectible resources
#('ContinuousDoubleAuction', {'max_num_orders': 10}),
# (3) Movement and resource collection
('SimpleGather', {}),
],
# ===== SCENARIO CLASS ARGUMENTS =====
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
'starting_agent_coin': 0,
'fixed_four_skill_and_loc': True,
# ===== STANDARD ARGUMENTS ======
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
'n_agents': 20, # Number of non-planner agents (must be > 1)
'world_size': [1, 1], # [Height, Width] of the env world
'episode_length': 100, # Number of timesteps per episode
'allow_observation_scaling': True,
'dense_log_frequency': 10,
'world_dense_log_frequency':1,
'energy_cost':0,
'energy_warmup_method': "auto",
'energy_warmup_constant': 0,
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
# Otherwise, the policy selects only 1 action.
'multi_action_mode_agents': False,
'multi_action_mode_planner': False,
# When flattening observations, concatenate scalar & vector observations before output.
# Otherwise, return observations with minimal processing.
'flatten_observations': False,
# When Flattening masks, concatenate each action subspace mask into a single array.
# Note: flatten_masks = True is required for masking action logits in the code below.
'flatten_masks': False,
}
num_frames=2
class TensorboardCallback(BaseCallback):
"""
Custom callback for plotting additional values in tensorboard.
"""
def __init__(self,econ, verbose=0):
super().__init__(verbose)
self.econ=econ
self.metrics=econ.scenario_metrics()
def _on_step(self) -> bool:
# Log scalar value (here a random variable)
prev_metrics=self.metrics
if self.econ.previous_episode_metrics is None:
self.metrics=self.econ.scenario_metrics()
else:
self.metrics=self.econ.previous_episode_metrics
curr_prod=self.metrics["social/productivity"]
trend_pord=curr_prod-prev_metrics["social/productivity"]
self.logger.record("social/total_productivity", curr_prod)
self.logger.record("social/delta_productivity", trend_pord)
return True
def sample_random_action(agent, mask):
"""Sample random UNMASKED action(s) for agent."""
# Return a list of actions: 1 for each action subspace
if agent.multi_action_mode:
split_masks = np.split(mask, agent.action_spaces.cumsum()[:-1])
return [np.random.choice(np.arange(len(m_)), p=m_/m_.sum()) for m_ in split_masks]
# Return a single action
else:
return np.random.choice(np.arange(agent.action_spaces), p=mask/mask.sum())
def sample_random_actions(env, obs):
"""Samples random UNMASKED actions for each agent in obs."""
actions = {
a_idx: 0
for a_idx in range( len(obs))
}
return actions
def printMarket(market):
for i in range(len(market)):
step=market[i]
if len(step)>0:
print("=== Step {} ===".format(i))
for transaction in step:
t=transaction
transstring = "({}) {} -> {} | [{}/{}] {} Coins\n".format(t["commodity"],t["seller"],t["buyer"],t["ask"],t["bid"],t["price"])
print(transstring)
return ""
def printBuilds(builds):
for i in range(len(builds)):
step=builds[i]
if len(step)>0:
for build in step:
t=build
transstring = "({}) Builder: {}, Skill: {}, Income {} ".format(i,t["builder"],t["build_skill"],t["income"])
print(transstring)
return ""
def printReplay(econ,agentid):
worldmaps=["Stone","Wood"]
log=econ.previous_episode_dense_log
agent=econ.world.agents[agentid]
agentid=str(agentid)
maxsetp=len(log["states"])-1
for step in range(maxsetp):
print()
print("=== Step {} ===".format(step))
# state
print("--- World ---")
world=log['world'][step]
for res in worldmaps:
print("{}: {}".format(res,world[res][0][0]))
print("--- State ---")
state=log['states'][step][agentid]
print(yaml.dump(state))
print("--- Action ---")
action=log["actions"][step][agentid]
if action=={}:
print("Action: 0 -> NOOP")
else:
for k in action:
formats="Action: {}({})".format(k,action[k])
print(formats)
print("--- Reward ---")
reward=log["rewards"][step][agentid]
print("Reward: {}".format(reward))
#Setup Env Objects
vecenv=EconVecEnv(env_config=env_config)
econ=vecenv.env
monenv=VecMonitor(venv=vecenv,info_keywords=["social/productivity","trend/productivity"])
normenv=VecNormalize(monenv,norm_reward=False,clip_obs=1)
stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10)
obs=stackenv.reset()
runname="run_{}".format(int(np.random.rand()*100))
model = PPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
total_required_for_episode=env_config['n_agents']*env_config['episode_length']
print("this is run {}".format(runname))
while True:
# Create Eval ENV
vec_env_eval=EconVecEnv(env_config=eval_env_config)
vec_mon_eval=VecMonitor(venv=vec_env_eval)
norm_env_eval=VecNormalize(vec_mon_eval,norm_reward=False,training=False)
eval_econ = vec_env_eval.env
#Train
model=model.learn(total_timesteps=total_required_for_episode*50,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
normenv.save("temp-normalizer.ai")
## Run Eval
print("### EVAL ###")
norm_env_eval.load("temp-normalizer.ai",vec_mon_eval)
obs=vec_mon_eval.reset()
done=False
for i in tqdm(range(eval_env_config['episode_length'])):
action=model.predict(obs)
obs,rew,done_e,info=vec_mon_eval.step(action[0])
done=done_e[0]
#market=eval_econ.get_component("ContinuousDoubleAuction")
craft=eval_econ.get_component("SimpleCraft")
# trades=market.get_dense_log()
build=craft.get_dense_log()
met=econ.previous_episode_metrics
printReplay(eval_econ,0)
# printMarket(trades)
printBuilds(builds=build)
print("social/productivity: {}".format(met["social/productivity"]))
print("labor/weighted_cost: {}".format(met["labor/weighted_cost"]))
print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"]))
time.sleep(1)

283
main.py Normal file
View File

@@ -0,0 +1,283 @@
from ai_economist import foundation
import numpy as np
from stable_baselines3.common.vec_env import vec_frame_stack
from stable_baselines3.common.evaluation import evaluate_policy
import envs
from tqdm import tqdm
import components
from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
from stable_baselines3.common.vec_env.vec_normalize import VecNormalize
from sb3_contrib import RecurrentPPO
from envs.econ_wrapper import EconVecEnv
from stable_baselines3.common.callbacks import BaseCallback
import yaml
import time
env_config = {
# ===== SCENARIO CLASS =====
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
# The environment object will be an instance of the Scenario class.
'scenario_name': 'simple_market',
# ===== COMPONENTS =====
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
# "component_name" refers to the Component class's name in the Component Registry (foundation.components)
# {component_kwargs} is a dictionary of kwargs passed to the Component class
# The order in which components reset, step, and generate obs follows their listed order below.
'components': [
# (1) Building houses
('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
# (2) Trading collectible resources
#('ContinuousDoubleAuction', {'max_num_orders': 10}),
# (3) Movement and resource collection
('SimpleGather', {}),
],
# ===== SCENARIO CLASS ARGUMENTS =====
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
'starting_agent_coin': 0,
'fixed_four_skill_and_loc': True,
# ===== STANDARD ARGUMENTS ======
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
'n_agents': 20, # Number of non-planner agents (must be > 1)
'world_size': [1, 1], # [Height, Width] of the env world
'episode_length': 256, # Number of timesteps per episode
'allow_observation_scaling': True,
'dense_log_frequency': 100,
'world_dense_log_frequency':1,
'energy_cost':0,
'energy_warmup_method': "auto",
'energy_warmup_constant': 0,
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
# Otherwise, the policy selects only 1 action.
'multi_action_mode_agents': False,
'multi_action_mode_planner': False,
# When flattening observations, concatenate scalar & vector observations before output.
# Otherwise, return observations with minimal processing.
'flatten_observations': False,
# When Flattening masks, concatenate each action subspace mask into a single array.
# Note: flatten_masks = True is required for masking action logits in the code below.
'flatten_masks': False,
}
eval_env_config = {
# ===== SCENARIO CLASS =====
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
# The environment object will be an instance of the Scenario class.
'scenario_name': 'simple_market',
# ===== COMPONENTS =====
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
# "component_name" refers to the Component class's name in the Component Registry (foundation.components)
# {component_kwargs} is a dictionary of kwargs passed to the Component class
# The order in which components reset, step, and generate obs follows their listed order below.
'components': [
# (1) Building houses
('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
# (2) Trading collectible resources
#('ContinuousDoubleAuction', {'max_num_orders': 10}),
# (3) Movement and resource collection
('SimpleGather', {}),
],
# ===== SCENARIO CLASS ARGUMENTS =====
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
'starting_agent_coin': 0,
'fixed_four_skill_and_loc': True,
# ===== STANDARD ARGUMENTS ======
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
'n_agents': 20, # Number of non-planner agents (must be > 1)
'world_size': [1, 1], # [Height, Width] of the env world
'episode_length': 100, # Number of timesteps per episode
'allow_observation_scaling': True,
'dense_log_frequency': 10,
'world_dense_log_frequency':1,
'energy_cost':0,
'energy_warmup_method': "auto",
'energy_warmup_constant': 0,
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
# Otherwise, the policy selects only 1 action.
'multi_action_mode_agents': False,
'multi_action_mode_planner': False,
# When flattening observations, concatenate scalar & vector observations before output.
# Otherwise, return observations with minimal processing.
'flatten_observations': False,
# When Flattening masks, concatenate each action subspace mask into a single array.
# Note: flatten_masks = True is required for masking action logits in the code below.
'flatten_masks': False,
}
num_frames=2
class TensorboardCallback(BaseCallback):
"""
Custom callback for plotting additional values in tensorboard.
"""
def __init__(self,econ, verbose=0):
super().__init__(verbose)
self.econ=econ
self.metrics=econ.scenario_metrics()
def _on_step(self) -> bool:
# Log scalar value (here a random variable)
prev_metrics=self.metrics
if self.econ.previous_episode_metrics is None:
self.metrics=self.econ.scenario_metrics()
else:
self.metrics=self.econ.previous_episode_metrics
curr_prod=self.metrics["social/productivity"]
trend_pord=curr_prod-prev_metrics["social/productivity"]
self.logger.record("social/total_productivity", curr_prod)
self.logger.record("social/delta_productivity", trend_pord)
return True
def sample_random_action(agent, mask):
"""Sample random UNMASKED action(s) for agent."""
# Return a list of actions: 1 for each action subspace
if agent.multi_action_mode:
split_masks = np.split(mask, agent.action_spaces.cumsum()[:-1])
return [np.random.choice(np.arange(len(m_)), p=m_/m_.sum()) for m_ in split_masks]
# Return a single action
else:
return np.random.choice(np.arange(agent.action_spaces), p=mask/mask.sum())
def sample_random_actions(env, obs):
"""Samples random UNMASKED actions for each agent in obs."""
actions = {
a_idx: 0
for a_idx in range( len(obs))
}
return actions
def printMarket(market):
for i in range(len(market)):
step=market[i]
if len(step)>0:
print("=== Step {} ===".format(i))
for transaction in step:
t=transaction
transstring = "({}) {} -> {} | [{}/{}] {} Coins\n".format(t["commodity"],t["seller"],t["buyer"],t["ask"],t["bid"],t["price"])
print(transstring)
return ""
def printBuilds(builds):
for i in range(len(builds)):
step=builds[i]
if len(step)>0:
for build in step:
t=build
transstring = "({}) Builder: {}, Skill: {}, Income {} ".format(i,t["builder"],t["build_skill"],t["income"])
print(transstring)
return ""
def printReplay(econ,agentid):
worldmaps=["Stone","Wood"]
log=econ.previous_episode_dense_log
agent=econ.world.agents[agentid]
agentid=str(agentid)
maxsetp=len(log["states"])-1
for step in range(maxsetp):
print()
print("=== Step {} ===".format(step))
# state
print("--- World ---")
world=log['world'][step]
for res in worldmaps:
print("{}: {}".format(res,world[res][0][0]))
print("--- State ---")
state=log['states'][step][agentid]
print(yaml.dump(state))
print("--- Action ---")
action=log["actions"][step][agentid]
if action=={}:
print("Action: 0 -> NOOP")
else:
for k in action:
formats="Action: {}({})".format(k,action[k])
print(formats)
print("--- Reward ---")
reward=log["rewards"][step][agentid]
print("Reward: {}".format(reward))
#Setup Env Objects
vecenv=EconVecEnv(env_config=env_config)
econ=vecenv.env
monenv=VecMonitor(venv=vecenv,info_keywords=["social/productivity","trend/productivity"])
normenv=VecNormalize(monenv,norm_reward=False,clip_obs=1)
stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10)
obs=stackenv.reset()
runname="run_{}".format(int(np.random.rand()*100))
model = PPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
total_required_for_episode=env_config['n_agents']*env_config['episode_length']
print("this is run {}".format(runname))
while True:
# Create Eval ENV
vec_env_eval=EconVecEnv(env_config=eval_env_config)
vec_mon_eval=VecMonitor(venv=vec_env_eval)
norm_env_eval=VecNormalize(vec_mon_eval,norm_reward=False,training=False)
eval_econ = vec_env_eval.env
#Train
model=model.learn(total_timesteps=total_required_for_episode*50,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
normenv.save("temp-normalizer.ai")
## Run Eval
print("### EVAL ###")
norm_env_eval.load("temp-normalizer.ai",vec_mon_eval)
obs=vec_mon_eval.reset()
done=False
for i in tqdm(range(eval_env_config['episode_length'])):
action=model.predict(obs)
obs,rew,done_e,info=vec_mon_eval.step(action[0])
done=done_e[0]
#market=eval_econ.get_component("ContinuousDoubleAuction")
craft=eval_econ.get_component("SimpleCraft")
# trades=market.get_dense_log()
build=craft.get_dense_log()
met=econ.previous_episode_metrics
printReplay(eval_econ,0)
# printMarket(trades)
printBuilds(builds=build)
print("social/productivity: {}".format(met["social/productivity"]))
print("labor/weighted_cost: {}".format(met["labor/weighted_cost"]))
print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"]))
time.sleep(1)

Binary file not shown.

3
reqirements.txt Normal file
View File

@@ -0,0 +1,3 @@
ai-economist
gym
ray[rllib]

BIN
temp-normalizer.ai Normal file

Binary file not shown.