Merge pull request 'crafting' (#2) from crafting into master

Reviewed-on: #2
This commit was merged in pull request #2.
This commit is contained in:
2023-01-17 08:34:11 +00:00
26 changed files with 1972 additions and 136 deletions

View File

@@ -8,4 +8,6 @@ class TradingAgent(BaseAgent):
"Mobile" refers to agents of this type being able to move around in the 2D world.
"""
name = "TradingAgent"
name = "TradingAgent"

View File

@@ -5,6 +5,7 @@
# or https://opensource.org/licenses/BSD-3-Clause
import random
import uuid
import numpy as np
@@ -38,7 +39,7 @@ class BaseAgent:
if idx is None:
idx = 0
self.uuid=uuid.uuid4()
if multi_action_mode is None:
multi_action_mode = False
@@ -64,6 +65,7 @@ class BaseAgent:
self._registered_inventory = False
self._registered_endogenous = False
self._registered_components = False
self._setup = False # agent setup not completed
self._noop_action_dict = dict()
# Special flag to allow logic for multi-action-mode agents
@@ -78,10 +80,17 @@ class BaseAgent:
def idx(self):
"""Index used to identify this agent. Must be unique within the environment."""
return self._idx
@property
def is_setup(self):
return self._setup
def set_setup(self, set):
self._setup=set
def register_inventory(self, resources):
"""Used during environment construction to populate inventory/escrow fields."""
assert not self._registered_inventory
if self._registered_inventory:
return
for entity_name in resources:
self.inventory[entity_name] = 0
self.escrow[entity_name] = 0
@@ -89,7 +98,8 @@ class BaseAgent:
def register_endogenous(self, endogenous):
"""Used during environment construction to populate endogenous state fields."""
assert not self._registered_endogenous
if self._registered_endogenous:
return
for entity_name in endogenous:
self.endogenous[entity_name] = 0
self._registered_endogenous = True
@@ -115,7 +125,8 @@ class BaseAgent:
def register_components(self, components):
"""Used during environment construction to set up state/action spaces."""
assert not self._registered_components
if self._registered_components:
return
for component in components:
n = component.get_n_actions(self.name)
if n is None:

View File

@@ -134,6 +134,7 @@ class BaseComponent(ABC):
def reset(self):
"""Reset any portion of the state managed by this component."""
world = self.world
self.n_agents = world.n_agents
all_agents = world.agents + [world.planner]
for agent in all_agents:
agent.state.update(self.get_additional_state_fields(agent.name))

View File

@@ -234,7 +234,7 @@ class BaseEnvironment(ABC):
self.num_agents = (
n_agents + n_planners
) # used in the warp_drive env wrapper (+ 1 for the planner)
# Components must be a tuple/list where each element is either a...
# tuple: ('Component Name', {Component kwargs})
# dict : {'Component Name': {Component kwargs}}
@@ -342,19 +342,14 @@ class BaseEnvironment(ABC):
self._components_dict[component_object.name] = component_object
self._shorthand_lookup[component_object.shorthand] = component_object
# Register the components with the agents
# to finish setting up their state/action spaces.
for agent in self.world.agents:
agent.register_inventory(self.resources)
agent.register_endogenous(self.endogenous)
agent.register_components(self._components)
self.world.planner.register_inventory(self.resources)
self.world.planner.register_components(self._components)
self._agent_lookup = {str(agent.idx): agent for agent in self.all_agents}
self.reapply_scenario_config_to_agents()
self._completions = 0
self._finish_episode=False
self._last_ep_metrics = None
# For dense logging
@@ -370,6 +365,16 @@ class BaseEnvironment(ABC):
# To collate all the agents ('0', '1', ...) data during reset and step
# into a single agent with index 'a'
self.collate_agent_step_and_reset_data = collate_agent_step_and_reset_data
def reapply_scenario_config_to_agents(self):
# Register the components with the agents
# to finish setting up their state/action spaces.
for agent in self.world.agents:
agent.register_inventory(self.resources)
agent.register_endogenous(self.endogenous)
agent.register_components(self._components)
self._agent_lookup = {str(agent.idx): agent for agent in self.all_agents}
self.world.apply_agent_db_to_world()
def _register_entities(self, entities):
for entity in entities:
@@ -501,6 +506,8 @@ class BaseEnvironment(ABC):
# Getters & Setters
# -----------------
def set_finish_episode(self,done):
self._finish_episode=done
def get_component(self, component_name):
"""
@@ -904,6 +911,9 @@ class BaseEnvironment(ABC):
# Reset the timestep counter
self.world.timestep = 0
# Reset done flag
self._finish_episode=False
# Perform the scenario reset,
# which includes resetting the world and agent states
self.reset_starting_layout()
@@ -920,6 +930,7 @@ class BaseEnvironment(ABC):
# Reset actions to that default.
for agent in self.all_agents:
agent.reset_actions()
agent.set_setup(True)
# Produce observations
obs = self._generate_observations(
@@ -1015,7 +1026,7 @@ class BaseEnvironment(ABC):
flatten_masks=self._flatten_masks,
)
rew = self._generate_rewards()
done = {"__all__": self.world.timestep >= self._episode_length}
done = {"__all__": self.world.timestep >= self._episode_length | self._finish_episode}
info = {k: {} for k in obs.keys()}
if self._dense_log_this_episode:

View File

@@ -76,8 +76,7 @@ class Registry:
See Registry class docstring for example.
"""
if cls_name.lower() not in self._lookup:
raise KeyError('"{}" is not a name of a registered class'.format(cls_name))
if cls_name.lower() not in self._lookup: raise KeyError('"{}" is not a name of a registered class'.format(cls_name))
return self._lookup[cls_name.lower()]
def has(self, cls_name):

View File

@@ -91,7 +91,10 @@ class Maps:
else:
raise NotImplementedError
self.reset_agent_maps(n_agents)
def reset_agent_maps(self,n_agents):
self.n_agents=n_agents
self._idx_map = np.stack(
[i * np.ones(shape=self.size) for i in range(self.n_agents)]
)
@@ -378,17 +381,8 @@ class World:
self.multi_action_mode_planner = bool(multi_action_mode_planner)
self._agent_class_idx_map={}
#create agents
self.agent_composition=agent_composition
self.n_agents=0
self._agents = []
for k,v in agent_composition.items():
self._agent_class_idx_map[k]=[]
for offset in range(v):
agent_class=agent_registry.get(k)
agent=agent_class(self.n_agents,self.multi_action_mode_agents)
self._agents.append(agent)
self._agent_class_idx_map[k].append(str(self.n_agents))
self.n_agents+=1
self.create_agents(agent_composition)
self.maps = Maps(world_size, self.n_agents, world_resources, world_landmarks)
planner_class = agent_registry.get("BasicPlanner")
@@ -402,6 +396,37 @@ class World:
self.cuda_function_manager = None
self.cuda_data_manager = None
def create_agents(self, agent_composition):
"""create_agents creates the world agent db with the given compostition."""
self.agent_composition=agent_composition
self.n_agents=0
self._agents = []
for k,v in agent_composition.items():
self._agent_class_idx_map[k]=[]
for offset in range(v):
agent_class=agent_registry.get(k)
agent=agent_class(self.n_agents,self.multi_action_mode_agents)
self._agents.append(agent)
self._agent_class_idx_map[k].append(str(self.n_agents))
self.n_agents+=1
def apply_agent_db_to_world(self):
"""Applys current agent db into lookup maps inside world and map itself. Enables insertion of new agents into existing env."""
self.n_agents=len(self._agents)
self._agent_class_idx_map={}
self.maps.reset_agent_maps(self.n_agents) # reset map lookups
#create mapping dict
for idx in range(self.n_agents):
cls=self.get_agent_class(idx)
agent=self._agents[idx]
if cls in self._agent_class_idx_map:
self._agent_class_idx_map[cls].append(idx)
else:
self._agent_class_idx_map[cls]=[idx]
# apply agent locs db to maps
if "loc" in agent.state:
self.maps.set_agent_loc(agent,*agent.loc)
@property
def agents(self):
"""Return a list of the agent objects in the world (sorted by index)."""

View File

@@ -37,7 +37,7 @@ class ContinuousDoubleAuction(BaseComponent):
name = "ContinuousDoubleAuction"
component_type = "Trade"
required_entities = ["Coin", "Labor"]
agent_subclasses = ["BasicMobileAgent"]
agent_subclasses = ["BasicMobileAgent","TradingAgent"]
def __init__(
self,
@@ -159,7 +159,7 @@ class ContinuousDoubleAuction(BaseComponent):
"""If agent can submit an ask for resource."""
return (
self.n_orders[resource][agent.idx] < self.max_num_orders
and agent.state["inventory"][resource] > 0
and agent.state["inventory"][resource] >= 1
)
# Core components for this market
@@ -417,7 +417,7 @@ class ContinuousDoubleAuction(BaseComponent):
"""
# This component adds 2*(1+max_bid_ask)*n_resources possible actions:
# buy/sell x each-price x each-resource
if agent_cls_name == "BasicMobileAgent":
if agent_cls_name in self.agent_subclasses:
trades = []
for c in self.commodities:
trades.append(
@@ -526,14 +526,14 @@ class ContinuousDoubleAuction(BaseComponent):
for _, agent in enumerate(world.agents):
# Private to the agent
available_ask_agent=full_asks - self.ask_hists[c][agent.idx]
available_bid_agent=full_bids- self.bid_hists[c][agent.idx]
obs[agent.idx].update(
{
"market_rate-{}".format(c): market_rate,
"market_rate-{}".format(c): market_rate*self.inv_scale,
"price_history-{}".format(c): scaled_price_history,
"available_asks-{}".format(c): full_asks
- self.ask_hists[c][agent.idx],
"available_bids-{}".format(c): full_bids
- self.bid_hists[c][agent.idx],
"available_asks-{}".format(c): np.clip(available_ask_agent,0,self.max_num_orders),
"available_bids-{}".format(c): np.clip(available_bid_agent,0,self.max_num_orders),
"my_asks-{}".format(c): self.ask_hists[c][agent.idx],
"my_bids-{}".format(c): self.bid_hists[c][agent.idx],
}

View File

@@ -66,10 +66,10 @@ class Coin(Resource):
collectible = False
@resource_registry.add
class RawGem(Resource):
class GemRaw(Resource):
"""Raw Gem that can be processed further"""
name = "Raw_Gem"
name = "Gem_Raw"
color = np.array([241, 233, 219]) / 255.0
collectible = True
@@ -79,6 +79,6 @@ class Gem(Resource):
name = "Gem"
color = np.array([241, 233, 219]) / 255.0
collectible = False
craft_recp= {"Raw_Gem": 1}
collectible = True
craft_recp= {"Gem_Raw": 1}
craft_labour_base= 1

BIN
basic 4000.ai Normal file

Binary file not shown.

BIN
basic.ai Normal file

Binary file not shown.

View File

@@ -1,4 +1,6 @@
from . import(
simple_gather,
simple_build
simple_build,
crafting,
external_market
)

287
components/crafting.py Normal file
View File

@@ -0,0 +1,287 @@
# Copyright (c) 2020, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root
# or https://opensource.org/licenses/BSD-3-Clause
import numpy as np
from ai_economist.foundation.base.base_component import (
BaseComponent,
component_registry,
)
from ai_economist.foundation.entities.resources import Resource, resource_registry
@component_registry.add
class Craft(BaseComponent):
"""
Allows mobile agents to build house landmarks in the world using stone and wood,
earning income.
Can be configured to include heterogeneous building skill where agents earn
different levels of income when building.
Args:
commodities (list(str)): list of commodities that can be crafted in the local world
payment_max_skill_multiplier (int): Maximum skill multiplier that an agent
can sample. Must be >= 1. Default is 1.
skill_dist (str): Distribution type for sampling skills. Default ("none")
gives all agents identical skill equal to a multiplier of 1. "pareto" and
"lognormal" sample skills from the associated distributions.
build_labor (float): Labor cost associated with building a house.
Must be >= 0. Default is 10.
"""
name = "Craft"
component_type = "Build"
required_entities = ["Coin", "Labor"]
agent_subclasses = ["BasicMobileAgent"]
commodities=[]
def __init__(
self,
*base_component_args,
commodities=[],
max_skill_amount_benefit=1,
max_skill_labour_benefit=1,
skill_dist="none",
**base_component_kwargs
):
assert len(commodities)>0
#setup commodities
self.recip_map={}
self.commodities=[]
for v in commodities:
res_class=resource_registry.get(v)
res=res_class()
if res.craft_recp!=None:
# is craftable
assert res.craft_recp!={}
assert res.craft_labour_base >= 0
self.required_entities.append(v)
self.recip_map[res.name]=res.craft_recp
self.commodities.append(res)
self.max_skill_amount_benefit=max_skill_amount_benefit
self.max_skill_labour_benefit=max_skill_labour_benefit
assert self.max_skill_amount_benefit >= 1
assert self.max_skill_labour_benefit <= 1
self.skill_dist = skill_dist.lower()
assert self.skill_dist in ["none", "pareto"]
self.sampled_skills = {}
self.builds = []
super().__init__(*base_component_args, **base_component_kwargs)
def agent_can_build(self, agent, res):
"""Return True if agent can actually build in its current location."""
# See if the agent has the resources necessary to complete the action
if res in self.recip_map:
recipe= self.recip_map[res]
for resource, cost in recipe.items():
if agent.state["inventory"][resource] < cost:
return False
else:
return True
return False
# Required methods for implementing components
# --------------------------------------------
def get_n_actions(self, agent_cls_name):
"""
See base_component.py for detailed description.
Add a single action (build) for mobile agents.
"""
# This component adds 1 action that mobile agents can take: build a house
if agent_cls_name in self.agent_subclasses:
return len(self.commodities)
return None
def get_additional_state_fields(self, agent_cls_name):
"""
See base_component.py for detailed description.
For mobile agents, add state fields for building skill.
"""
if agent_cls_name not in self.agent_subclasses:
return {}
if agent_cls_name == "BasicMobileAgent":
return {}
raise NotImplementedError
def component_step(self):
"""
See base_component.py for detailed description.
Convert stone+wood to house+coin for agents that choose to build and can.
"""
world = self.world
build = []
# Apply any building actions taken by the mobile agents
for agent in world.get_random_order_agents():
action = agent.get_component_action(self.name)
# This component doesn't apply to this agent!
if action is None:
continue
# NO-OP!
if action == 0:
pass
# Build! (If you can.)
else:
action-=1
comm=self.commodities[action]
if self.agent_can_build(agent,comm.name):
# Remove the resources
for resource, cost in comm.craft_recp.items():
agent.state["inventory"][resource] -= cost
# Receive crafted commodity
agent.state["inventory"][comm.name] += agent.state["craft_amount"][comm.name]
# Incur the labor cost for building
agent.state["endogenous"]["Labor"] += agent.state["craft_labour"][comm.name]
build.append(
{
"crafter": agent.idx,
"craft_commodity": comm.name,
"craft_skill": agent.state["craft_skill"][comm.name],
"craft_amount": agent.state["craft_amount"][comm.name],
"craft_labour": agent.state["craft_labour"][comm.name]
}
)
else:
agent.bad_action=True
self.builds.append(build)
def generate_observations(self):
"""
See base_component.py for detailed description.
Here, agents observe their build skill. The planner does not observe anything
from this component.
"""
obs_dict = dict()
for agent in self.world.agents:
if agent.name in self.agent_subclasses:
obs_dict[agent.idx]={}
for k in self.commodities:
obs_dict[agent.idx]["craft_skill_{}".format(k.name)] = agent.state["craft_skill"][k.name]
return obs_dict
def generate_masks(self, completions=0):
"""
See base_component.py for detailed description.
Prevent building only if a landmark already occupies the agent's location.
"""
masks = {}
# Mobile agents' build action is masked if they cannot build with their
# current location and/or endowment
for agent in self.world.agents:
if agent.name in self.agent_subclasses:
masks[agent.idx] = np.array([self.agent_can_build(agent,k.name) for k in self.commodities])
return masks
# For non-required customization
# ------------------------------
def get_metrics(self):
"""
Metrics that capture what happened through this component.
Returns:
metrics (dict): A dictionary of {"metric_name": metric_value},
where metric_value is a scalar.
"""
world = self.world
"""
build_stats = {a.idx: {"n_builds": 0} for a in world.agents}
for builds in self.builds:
for build in builds:
idx = build["builder"]
build_stats[idx]["n_builds"] += 1
out_dict = {}
for a in world.agents:
for k, v in build_stats[a.idx].items():
out_dict["{}/{}".format(a.idx, k)] = v
num_houses = np.sum(world.maps.get("House") > 0)
out_dict["total_builds"] = num_houses
"""
return {}
def additional_reset_steps(self):
"""
See base_component.py for detailed description.
Re-sample agents' building skills.
"""
world = self.world
MSAB= self.max_skill_amount_benefit
MSLB= self.max_skill_labour_benefit
for agent in world.agents:
if (agent.name not in self.agent_subclasses) | agent.is_setup:
continue
agent.state["craft_skill"]={}
agent.state["craft_labour"]={}
agent.state["craft_amount"]={}
for comm in self.commodities:
if self.skill_dist == "none":
sampled_skill = 1
amount= 1
labour = 1
elif self.skill_dist == "pareto":
labour = 1
sampled_skill = np.random.pareto(2)
amount = 1+np.minimum(MSAB,(MSAB-1) * (sampled_skill) )
labour_modifier = 1 - np.minimum(1 - MSLB, (1 - MSLB) * sampled_skill)
else:
raise NotImplementedError
agent.state["craft_skill"][comm.name]=sampled_skill
agent.state["craft_labour"][comm.name]=comm.craft_labour_base*labour_modifier
agent.state["craft_amount"][comm.name]=amount
self.builds = []
def get_dense_log(self):
"""
Log builds.
Returns:
builds (list): A list of build events. Each entry corresponds to a single
timestep and contains a description of any builds that occurred on
that timestep.
"""
return self.builds

View File

@@ -0,0 +1,221 @@
# Copyright (c) 2020, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root
# or https://opensource.org/licenses/BSD-3-Clause
import numpy as np
from ai_economist.foundation.base.base_component import (
BaseComponent,
component_registry,
)
@component_registry.add
class ExternalMarket(BaseComponent):
"""
Allows mobile agents to build house landmarks in the world using stone and wood,
earning income.
Can be configured to include heterogeneous building skill where agents earn
different levels of income when building.
Args:
payment (int): Default amount of coin agents earn from building.
Must be >= 0. Default is 10.
market_demand (dict): Resource name -> amout of money
skill_dist (str): Distribution type for sampling skills. Default ("none")
gives all agents identical skill equal to a multiplier of 1. "pareto" and
"lognormal" sample skills from the associated distributions.
build_labor (float): Labor cost associated with building a house.
Must be >= 0. Default is 10.
"""
name = "ExternalMarket"
component_type = "Trade"
required_entities = ["Coin", "Labor"]
agent_subclasses = ["TradingAgent"]
def __init__(
self,
*base_component_args,
market_demand={},
trade_labor=1.0,
**base_component_kwargs
):
super().__init__(*base_component_args, **base_component_kwargs)
self.market_demand = market_demand
self.action_res_map={}
for k in market_demand.keys():
self.action_res_map[len(self.action_res_map)+1]=k
self.trade_labor = float(trade_labor)
assert self.trade_labor >= 0
self.builds = []
def agent_can_sell(self, agent,res):
"""Return True if agent can sell a res."""
# See if the agent has the resources necessary to complete the action
if agent.state["inventory"][res]>= 1:
return True
return False
# Required methods for implementing components
# --------------------------------------------
def get_n_actions(self, agent_cls_name):
"""
See base_component.py for detailed description.
Add a single action (build) for mobile agents.
"""
# This component adds 1 action that mobile agents can take: build a house
if agent_cls_name in self.agent_subclasses:
return len(self.action_res_map)
return None
def get_additional_state_fields(self, agent_cls_name):
"""
See base_component.py for detailed description.
For mobile agents, add state fields for building skill.
"""
return {}
def component_step(self):
"""
See base_component.py for detailed description.
Convert stone+wood to house+coin for agents that choose to build and can.
"""
world = self.world
build = []
# Apply any building actions taken by the mobile agents
for agent in world.get_random_order_agents():
action = agent.get_component_action(self.name)
# This component doesn't apply to this agent!
if action is None:
continue
# NO-OP!
if action == 0:
continue
res_name=self.action_res_map[action]
# Build! (If you can.)
if self.agent_can_sell(agent,res_name):
# Remove the resources
agent.state["inventory"][res_name] -= 1
# Receive payment for the house
agent.state["inventory"]["Coin"] += self.market_demand[res_name]
# Incur the labor cost for building
agent.state["endogenous"]["Labor"] += self.trade_labor
build.append(
{
"seller": agent.idx,
"commodity": res_name,
"income": self.market_demand[res_name],
}
)
else:
raise ValueError
self.builds.append(build)
def generate_observations(self):
"""
See base_component.py for detailed description.
Here, agents observe their build skill. The planner does not observe anything
from this component.
"""
obs_dict = dict()
for agent in self.world.agents:
if agent.name in self.agent_subclasses:
obs_dict[agent.idx] = {}
for res_name,coin in self.market_demand.items():
obs_dict[agent.idx]["external_{}_price".format(res_name)]: self.inv_scale*coin
return obs_dict
def generate_masks(self, completions=0):
"""
See base_component.py for detailed description.
Prevent building only if a landmark already occupies the agent's location.
"""
masks = {}
# Mobile agents' build action is masked if they cannot build with their
# current location and/or endowment
for agent in self.world.agents:
if agent.name in self.agent_subclasses:
mask=[]
for res in self.market_demand:
mask.append(self.agent_can_sell(agent,res))
masks[agent.idx] = mask
return masks
# For non-required customization
# ------------------------------
def get_metrics(self):
"""
Metrics that capture what happened through this component.
Returns:
metrics (dict): A dictionary of {"metric_name": metric_value},
where metric_value is a scalar.
"""
world = self.world
"""
build_stats = {a.idx: {"n_builds": 0} for a in world.agents}
for builds in self.builds:
for build in builds:
idx = build["builder"]
build_stats[idx]["n_builds"] += 1
out_dict = {}
for a in world.agents:
for k, v in build_stats[a.idx].items():
out_dict["{}/{}".format(a.idx, k)] = v
num_houses = np.sum(world.maps.get("House") > 0)
out_dict["total_builds"] = num_houses
"""
return {}
def additional_reset_steps(self):
"""
See base_component.py for detailed description.
Re-sample agents' building skills.
"""
self.builds = []
def get_dense_log(self):
"""
Log builds.
Returns:
builds (list): A list of build events. Each entry corresponds to a single
timestep and contains a description of any builds that occurred on
that timestep.
"""
return self.builds

View File

@@ -44,7 +44,7 @@ class SimpleCraft(BaseComponent):
payment=10,
payment_max_skill_multiplier=1,
skill_dist="none",
build_labor=10.0,
build_labor=1.0,
**base_component_kwargs
):
super().__init__(*base_component_args, **base_component_kwargs)

View File

@@ -1,5 +1,6 @@
from . import (
simple_market,
econ_wrapper
econ_wrapper,
econ
)

482
envs/econ.py Normal file
View File

@@ -0,0 +1,482 @@
# Copyright (c) 2020, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root
# or https://opensource.org/licenses/BSD-3-Clause
from copy import deepcopy
from pathlib import Path
import numpy as np
from scipy import signal
from ai_economist.foundation.base.base_env import BaseEnvironment, scenario_registry
from ai_economist.foundation.scenarios.utils import rewards, social_metrics
import yaml
@scenario_registry.add
class Econ(BaseEnvironment):
"""
World containing stone and wood with stochastic regeneration. Refers to a fixed
layout file (see ./map_txt/ for examples) to determine the spatial arrangement of
stone, wood, and water tiles.
Args:
action_against_mask_penelty=-1 (int): Reward penelty for performing action against mask
full_observability (bool): Whether the mobile agents' spatial observation
includes the full world view or is instead an egocentric view.
mobile_agent_observation_range (int): If not using full_observability,
the spatial range (on each side of the agent) that is visible in the
spatial observations.
env_layout_file (str): Name of the layout file in ./map_txt/ to use.
Note: The world dimensions of that layout must match the world dimensions
argument used to construct the environment.
resource_regen_prob (float): Probability that an empty source tile will
regenerate a new resource unit.
fixed_four_skill_and_loc (bool): Whether to use a fixed set of build skills and
starting locations, with agents grouped into starting locations based on
which skill quartile they are in. False, by default.
True, for experiments in https://arxiv.org/abs/2004.13332.
Note: Requires that the environment uses the "Build" component with
skill_dist="pareto".
starting_agent_coin (int, float): Amount of coin agents have at t=0. Defaults
to zero coin.
isoelastic_eta (float): Parameter controlling the shape of agent utility
wrt coin endowment.
energy_cost (float): Coefficient for converting labor to negative utility.
energy_warmup_constant (float): Decay constant that controls the rate at which
the effective energy cost is annealed from 0 to energy_cost. Set to 0
(default) to disable annealing, meaning that the effective energy cost is
always energy_cost. The units of the decay constant depend on the choice of
energy_warmup_method.
energy_warmup_method (str): How to schedule energy annealing (warmup). If
"decay" (default), use the number of completed episodes. If "auto",
use the number of timesteps where the average agent reward was positive.
planner_reward_type (str): The type of reward used for the planner. Options
are "coin_eq_times_productivity" (default),
"inv_income_weighted_coin_endowment", and "inv_income_weighted_utility".
mixing_weight_gini_vs_coin (float): Degree to which equality is ignored w/
"coin_eq_times_productivity". Default is 0, which weights equality and
productivity equally. If set to 1, only productivity is rewarded.
"""
name = "econ"
agent_subclasses = ["BasicMobileAgent"]
required_entities = ["Wood", "Stone", "Water","Gem_Raw","Gem"]
def __init__(
self,
*base_env_args,
resource_regen_prob=0.01,
fixed_four_skill_and_loc=False,
starting_agent_coin=0,
isoelastic_eta=0.23,
energy_cost=0.21,
energy_warmup_constant=0,
energy_warmup_method="decay",
planner_reward_type="coin_eq_times_productivity",
mixing_weight_gini_vs_coin=0.0,
**base_env_kwargs,
):
super().__init__(*base_env_args, **base_env_kwargs)
self.layout_specs = dict(
Wood={
"regen_weight": float(resource_regen_prob),
"regen_halfwidth": 0,
"max_health": 1,
},
Stone={
"regen_weight": float(resource_regen_prob),
"regen_halfwidth": 0,
"max_health": 1,
},
)
assert 0 <= self.layout_specs["Wood"]["regen_weight"] <= 1
assert 0 <= self.layout_specs["Stone"]["regen_weight"] <= 1
# How much coin do agents begin with at upon reset
self.starting_agent_coin = float(starting_agent_coin)
assert self.starting_agent_coin >= 0.0
# Controls the diminishing marginal utility of coin.
# isoelastic_eta=0 means no diminishing utility.
self.isoelastic_eta = float(isoelastic_eta)
assert 0.0 <= self.isoelastic_eta <= 1.0
# The amount that labor is weighted in utility computation
# (once annealing is finished)
self.energy_cost = float(energy_cost)
assert self.energy_cost >= 0
# Which method to use for calculating the progress of energy annealing
# If method = 'decay': #completed episodes
# If method = 'auto' : #timesteps where avg. agent reward > 0
self.energy_warmup_method = energy_warmup_method.lower()
assert self.energy_warmup_method in ["decay", "auto"]
# Decay constant for annealing to full energy cost
# (if energy_warmup_constant == 0, there is no annealing)
self.energy_warmup_constant = float(energy_warmup_constant)
assert self.energy_warmup_constant >= 0
self._auto_warmup_integrator = 0
# Which social welfare function to use
self.planner_reward_type = str(planner_reward_type).lower()
# How much to weight equality if using SWF=eq*prod:
# 0 -> SWF=eq * prod
# 1 -> SWF=prod
self.mixing_weight_gini_vs_coin = float(mixing_weight_gini_vs_coin)
assert 0 <= self.mixing_weight_gini_vs_coin <= 1.0
# Use this to calculate marginal changes and deliver that as reward
self.init_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
self.prev_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
self.curr_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
"""
Fixed Four Skill and Loc
------------------------
"""
self.agent_starting_pos = {agent.idx: [] for agent in self.world.agents}
self._persist_between_resets=False
self.last_log_loged={}
@property
def energy_weight(self):
"""
Energy annealing progress. Multiply with self.energy_cost to get the
effective energy coefficient.
"""
if self.energy_warmup_constant <= 0.0:
return 1.0
if self.energy_warmup_method == "decay":
return float(1.0 - np.exp(-self._completions / self.energy_warmup_constant))
if self.energy_warmup_method == "auto":
return float(
1.0
- np.exp(-self._auto_warmup_integrator / self.energy_warmup_constant)
)
raise NotImplementedError
def is_bad_action(self,agent):
bad=agent.bad_action
agent.bad_action=False
return bad
def get_current_optimization_metrics(self):
"""
Compute optimization metrics based on the current state. Used to compute reward.
Returns:
curr_optimization_metric (dict): A dictionary of {agent.idx: metric}
with an entry for each agent (including the planner) in the env.
"""
curr_optimization_metric = {}
# (for agents)
for agent in self.world.agents:
rew= rewards.isoelastic_coin_minus_labor(
coin_endowment=agent.total_endowment("Coin"),
total_labor=agent.state["endogenous"]["Labor"],
isoelastic_eta=self.isoelastic_eta,
labor_coefficient=self.energy_weight * self.energy_cost,
)
#rew-=agent.state["endogenous"]["noops"]
curr_optimization_metric[agent.idx] = rew
# (for the planner)
if self.planner_reward_type == "coin_eq_times_productivity":
curr_optimization_metric[
self.world.planner.idx
] = rewards.coin_eq_times_productivity(
coin_endowments=np.array(
[agent.total_endowment("Coin") for agent in self.world.agents]
),
equality_weight=1 - self.mixing_weight_gini_vs_coin,
)
elif self.planner_reward_type == "inv_income_weighted_coin_endowments":
curr_optimization_metric[
self.world.planner.idx
] = rewards.inv_income_weighted_coin_endowments(
coin_endowments=np.array(
[agent.total_endowment("Coin") for agent in self.world.agents]
)
)
elif self.planner_reward_type == "inv_income_weighted_utility":
curr_optimization_metric[
self.world.planner.idx
] = rewards.inv_income_weighted_utility(
coin_endowments=np.array(
[agent.total_endowment("Coin") for agent in self.world.agents]
),
utilities=np.array(
[curr_optimization_metric[agent.idx] for agent in self.world.agents]
),
)
else:
print("No valid planner reward selected!")
raise NotImplementedError
return curr_optimization_metric
# The following methods must be implemented for each scenario
# -----------------------------------------------------------
def reset_starting_layout(self):
"""
Part 1/2 of scenario reset. This method handles resetting the state of the
environment managed by the scenario (i.e. resource & landmark layout).
Here, reset to the layout in the fixed layout file
"""
if self._persist_between_resets: # if we only want to modify some values and not accualy reset
return
self.world.maps.clear()
resources = ["Wood", "Stone","Gem_Raw"]
for resource in resources:
self.world.maps.set_point_add(resource,0,0,1)
def reset_agent_states(self):
"""
Part 2/2 of scenario reset. This method handles resetting the state of the
agents themselves (i.e. inventory, locations, etc.).
Here, empty inventories and place mobile agents in random, accessible
locations to start. Note: If using fixed_four_skill_and_loc, the starting
locations will be overridden in self.additional_reset_steps.
"""
if not self._persist_between_resets:
self.world.clear_agent_locs()
for agent in self.world.agents:
if not self._persist_between_resets:
agent.set_setup(False) # resets agent states
if not agent.is_setup: # agent has not been setup for scenario
agent.state["inventory"] = {k: 0 for k in agent.inventory.keys()}
agent.state["escrow"] = {k: 0 for k in agent.inventory.keys()}
agent.state["endogenous"] = {k: 0 for k in agent.endogenous.keys()}
# Add starting coin
agent.state["inventory"]["Coin"] = float(self.starting_agent_coin)
agent.bad_action=False
self.world.planner.state["inventory"] = {
k: 0 for k in self.world.planner.inventory.keys()
}
self.world.planner.state["escrow"] = {
k: 0 for k in self.world.planner.escrow.keys()
}
def scenario_step(self):
"""
Update the state of the world according to whatever rules this scenario
implements.
This gets called in the 'step' method (of base_env) after going through each
component step and before generating observations, rewards, etc.
In this class of scenarios, the scenario step handles stochastic resource
regeneration.
"""
resources = ["Wood", "Stone", "Gem_Raw"]
for resource in resources:
self.world.maps.set_point_add(resource,0,0,20)
def generate_observations(self):
"""
Generate observations associated with this scenario.
A scenario does not need to produce observations and can provide observations
for only some agent types; however, for a given agent type, it should either
always or never yield an observation. If it does yield an observation,
that observation should always have the same structure/sizes!
Returns:
obs (dict): A dictionary of {agent.idx: agent_obs_dict}. In words,
return a dictionary with an entry for each agent (which can including
the planner) for which this scenario provides an observation. For each
entry, the key specifies the index of the agent and the value contains
its associated observation dictionary.
Here, non-planner agents receive spatial observations (depending on the env
config) as well as the contents of their inventory and endogenous quantities.
The planner also receives spatial observations (again, depending on the env
config) as well as the inventory of each of the mobile agents.
"""
obs = {}
agent_invs = {
str(agent.idx): {
"inventory-" + k: v * self.inv_scale for k, v in agent.inventory.items()
}
for agent in self.world.agents
}
obs[self.world.planner.idx] = {
"inventory-" + k: v * self.inv_scale
for k, v in self.world.planner.inventory.items()
}
for agent in self.world.agents:
sidx = str(agent.idx)
obs[sidx]=agent_invs[sidx]
return obs
def compute_reward(self):
"""
Apply the reward function(s) associated with this scenario to get the rewards
from this step.
Returns:
rew (dict): A dictionary of {agent.idx: agent_obs_dict}. In words,
return a dictionary with an entry for each agent in the environment
(including the planner). For each entry, the key specifies the index of
the agent and the value contains the scalar reward earned this timestep.
Rewards are computed as the marginal utility (agents) or marginal social
welfare (planner) experienced on this timestep. Ignoring discounting,
this means that agents' (planner's) objective is to maximize the utility
(social welfare) associated with the terminal state of the episode.
"""
# "curr_optimization_metric" hasn't been updated yet, so it gives us the
# utility from the last step.
utility_at_end_of_last_time_step = deepcopy(self.curr_optimization_metric)
# compute current objectives and store the values
self.curr_optimization_metric = self.get_current_optimization_metrics()
# reward = curr - prev objectives
rew={}
for k, v in self.curr_optimization_metric.items():
rew[k] = float(v - utility_at_end_of_last_time_step[k])
if k!="p":
if self.is_bad_action(self.world.agents[k]):
rew[k]-=1
# store the previous objective values
self.prev_optimization_metric.update(utility_at_end_of_last_time_step)
# Automatic Energy Cost Annealing
# -------------------------------
avg_agent_rew = np.mean([rew[a.idx] for a in self.world.agents])
# Count the number of timesteps where the avg agent reward was > 0
if avg_agent_rew > 0:
self._auto_warmup_integrator += 1
return rew
# Optional methods for customization
# ----------------------------------
def additional_reset_steps(self):
"""
Extra scenario-specific steps that should be performed at the end of the reset
cycle.
For each reset cycle...
First, reset_starting_layout() and reset_agent_states() will be called.
Second, <component>.reset() will be called for each registered component.
Lastly, this method will be called to allow for any final customization of
the reset cycle.
For this scenario, this method resets optimization metric trackers. If using
fixed_four_skill_and_loc, this is where each agent gets assigned to one of
the four fixed skill/loc combinations. The agent-->skill/loc assignment is
permuted so that all four skill/loc combinations are used.
"""
# compute current objectives
curr_optimization_metric = self.get_current_optimization_metrics()
self.curr_optimization_metric = deepcopy(curr_optimization_metric)
self.init_optimization_metric = deepcopy(curr_optimization_metric)
self.prev_optimization_metric = deepcopy(curr_optimization_metric)
def scenario_metrics(self):
"""
Allows the scenario to generate metrics (collected along with component metrics
in the 'metrics' property).
To have the scenario add metrics, this function needs to return a dictionary of
{metric_key: value} where 'value' is a scalar (no nesting or lists!)
Here, summarize social metrics, endowments, utilities, and labor cost annealing.
"""
metrics = dict()
coin_endowments = np.array(
[agent.total_endowment("Coin") for agent in self.world.agents]
)
metrics["social/productivity"] = social_metrics.get_productivity(
coin_endowments
)
metrics["social/equality"] = social_metrics.get_equality(coin_endowments)
utilities = np.array(
[self.curr_optimization_metric[agent.idx] for agent in self.world.agents]
)
metrics[
"social_welfare/coin_eq_times_productivity"
] = rewards.coin_eq_times_productivity(
coin_endowments=coin_endowments, equality_weight=1.0
)
metrics[
"social_welfare/inv_income_weighted_coin_endow"
] = rewards.inv_income_weighted_coin_endowments(coin_endowments=coin_endowments)
metrics[
"social_welfare/inv_income_weighted_utility"
] = rewards.inv_income_weighted_utility(
coin_endowments=coin_endowments, utilities=utilities
)
for agent in self.all_agents:
for resource, quantity in agent.inventory.items():
metrics[
"endow/{}/{}".format(agent.idx, resource)
] = agent.total_endowment(resource)
if agent.endogenous is not None:
for resource, quantity in agent.endogenous.items():
metrics["endogenous/{}/{}".format(agent.idx, resource)] = quantity
metrics["util/{}".format(agent.idx)] = self.curr_optimization_metric[
agent.idx
]
# Labor weight
metrics["labor/weighted_cost"] = self.energy_cost * self.energy_weight
metrics["labor/warmup_integrator"] = int(self._auto_warmup_integrator)
return metrics

View File

@@ -0,0 +1,341 @@
import numpy as np
from ai_economist import foundation
from stable_baselines3.common.vec_env import vec_frame_stack
from stable_baselines3.common.evaluation import evaluate_policy
from sb3_contrib.ppo_mask import MaskablePPO
import envs
import wrapper
import resources
from agents import trading_agent
from wrapper.base_econ_wrapper import BaseEconWrapper
from wrapper.reciever_econ_wrapper import RecieverEconWrapper
from wrapper.sb3_econ_converter import SB3EconConverter
from tqdm import tqdm
import components
from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
from stable_baselines3.common.vec_env.vec_normalize import VecNormalize
from sb3_contrib import RecurrentPPO
from envs.econ_wrapper import EconVecEnv
from stable_baselines3.common.callbacks import BaseCallback
import yaml
import time
from threading import Thread
env_config = {
# ===== SCENARIO CLASS =====
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
# The environment object will be an instance of the Scenario class.
'scenario_name': 'econ',
# ===== COMPONENTS =====
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
# "component_name" refers to the Component class's name in the Component Registry (foundation.components)
# {component_kwargs} is a dictionary of kwargs passed to the Component class
# The order in which components reset, step, and generate obs follows their listed order below.
'components': [
# (1) Building houses
('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}),
# (2) Trading collectible resources
('ContinuousDoubleAuction', {'max_num_orders': 10}),
# (3) Movement and resource collection
('SimpleGather', {}),
('ExternalMarket',{'market_demand':{
'Gem': 15
}}),
],
# ===== SCENARIO CLASS ARGUMENTS =====
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
'starting_agent_coin': 10,
'fixed_four_skill_and_loc': True,
# ===== STANDARD ARGUMENTS ======
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5}, # Number of non-planner agents (must be > 1)
'world_size': [5, 5], # [Height, Width] of the env world
'episode_length': 256, # Number of timesteps per episode
'allow_observation_scaling': True,
'dense_log_frequency': 100,
'world_dense_log_frequency':1,
'energy_cost':0,
'energy_warmup_method': "auto",
'energy_warmup_constant': 4000,
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
# Otherwise, the policy selects only 1 action.
'multi_action_mode_agents': False,
'multi_action_mode_planner': False,
# When flattening observations, concatenate scalar & vector observations before output.
# Otherwise, return observations with minimal processing.
'flatten_observations': False,
# When Flattening masks, concatenate each action subspace mask into a single array.
# Note: flatten_masks = True is required for masking action logits in the code below.
'flatten_masks': True,
}
eval_env_config = {
# ===== SCENARIO CLASS =====
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
# The environment object will be an instance of the Scenario class.
'scenario_name': 'econ',
# ===== COMPONENTS =====
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
# "component_name" refers to the Component class's name in the Component Registry (foundation.components)
# {component_kwargs} is a dictionary of kwargs passed to the Component class
# The order in which components reset, step, and generate obs follows their listed order below.
'components': [
# (1) Building houses
('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}),
# (2) Trading collectible resources
('ContinuousDoubleAuction', {'max_num_orders': 10}),
# (3) Movement and resource collection
('SimpleGather', {}),
('ExternalMarket',{'market_demand':{
'Gem': 15
}}),
],
# ===== SCENARIO CLASS ARGUMENTS =====
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
'starting_agent_coin': 10,
'fixed_four_skill_and_loc': True,
# ===== STANDARD ARGUMENTS ======
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5}, # Number of non-planner agents (must be > 1)
'world_size': [1, 1], # [Height, Width] of the env world
'episode_length': 256, # Number of timesteps per episode
'allow_observation_scaling': True,
'dense_log_frequency': 1,
'world_dense_log_frequency':1,
'energy_cost':0,
'energy_warmup_method': "auto",
'energy_warmup_constant': 4000,
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
# Otherwise, the policy selects only 1 action.
'multi_action_mode_agents': False,
'multi_action_mode_planner': False,
# When flattening observations, concatenate scalar & vector observations before output.
# Otherwise, return observations with minimal processing.
'flatten_observations': False,
# When Flattening masks, concatenate each action subspace mask into a single array.
# Note: flatten_masks = True is required for masking action logits in the code below.
'flatten_masks': True,
}
num_frames=5
class TensorboardCallback(BaseCallback):
"""
Custom callback for plotting additional values in tensorboard.
"""
def __init__(self,econ, verbose=0):
super().__init__(verbose)
self.econ=econ
self.metrics=econ.scenario_metrics()
def _on_step(self) -> bool:
# Log scalar value (here a random variable)
prev_metrics=self.metrics
if self.econ.previous_episode_metrics is None:
self.metrics=self.econ.scenario_metrics()
else:
self.metrics=self.econ.previous_episode_metrics
curr_prod=self.metrics["social/productivity"]
trend_pord=curr_prod-prev_metrics["social/productivity"]
self.logger.record("social/total_productivity", curr_prod)
self.logger.record("social/delta_productivity", trend_pord)
return True
def printMarket(market):
for i in range(len(market)):
step=market[i]
if len(step)>0:
print("=== Step {} ===".format(i))
for transaction in step:
t=transaction
transstring = "({}) {} -> {} | [{}/{}] {} Coins\n".format(t["commodity"],t["seller"],t["buyer"],t["ask"],t["bid"],t["price"])
print(transstring)
return ""
def printBuilds(builds):
for i in range(len(builds)):
step=builds[i]
if len(step)>0:
for build in step:
t=build
transstring = "({}) Builder: {}, Skill: {}, Income {} ".format(i,t["builder"],t["build_skill"],t["income"])
print(transstring)
return ""
def printReplay(econ,agentid):
worldmaps=["Stone","Wood"]
log=econ.previous_episode_dense_log
agent=econ.world.agents[agentid]
agentid=str(agentid)
maxsetp=len(log["states"])-1
for step in range(maxsetp):
print()
print("=== Step {} ===".format(step))
# state
print("--- World ---")
world=log['world'][step]
for res in worldmaps:
print("{}: {}".format(res,world[res][0][0]))
print("--- State ---")
state=log['states'][step][agentid]
print(yaml.safe_dump(state))
print("--- Action ---")
action=log["actions"][step][agentid]
if action=={}:
print("Action: 0 -> NOOP")
else:
for k in action:
formats="Action: {}({})".format(k,action[k])
print(formats)
print("--- Reward ---")
reward=log["rewards"][step][agentid]
print("Reward: {}".format(reward))
#Setup Env Objects
econ=foundation.make_env_instance(**env_config)
market=econ.get_component("ContinuousDoubleAuction")
action=market.get_n_actions("TradingAgent")
baseEconWrapper=BaseEconWrapper(econ)
baseEconWrapper.run()
time.sleep(0.5)
mobileRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="BasicMobileAgent")
tradeRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="TradingAgent")
sb3_traderConverter=SB3EconConverter(tradeRecieverEconWrapper,econ,"TradingAgent",True)
sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent",True)
# attach sb3 wrappers
monenv=VecMonitor(venv=sb3Converter,info_keywords=["social/productivity","trend/productivity"])
montraidingenv=VecMonitor(venv=sb3_traderConverter)
stackenv_basic=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=num_frames)
stackenv_traid=vec_frame_stack.VecFrameStack(venv=montraidingenv,n_stack=num_frames)
# Model setup complete
# Setup Eval Env
econ_eval=foundation.make_env_instance(**eval_env_config)
baseEconWrapper_eval=BaseEconWrapper(econ_eval)
baseEconWrapper_eval.run()
time.sleep(0.5)
mobileRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="BasicMobileAgent")
tradeRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="TradingAgent")
sb3_traderConverter_eval=SB3EconConverter(tradeRecieverEconWrapper_eval,econ_eval,"TradingAgent",False)
sb3Converter_eval=SB3EconConverter(mobileRecieverEconWrapper_eval,econ_eval,"BasicMobileAgent",False)
# attach sb3 wrappers
monenv_eval=VecMonitor(venv=sb3Converter_eval,info_keywords=["social/productivity","trend/productivity"])
montraidingenv_eval=VecMonitor(venv=sb3_traderConverter_eval)
stackenv_basic_eval=vec_frame_stack.VecFrameStack(venv=monenv_eval,n_stack=num_frames)
stackenv_traid_eval=vec_frame_stack.VecFrameStack(venv=montraidingenv_eval,n_stack=num_frames)
obs=monenv.reset()
# define training functions
def train(model,timesteps, econ_call,process_bar,name,db,index):
db[index]=model.learn(total_timesteps=timesteps,progress_bar=process_bar,reset_num_timesteps=False,tb_log_name=name,callback=TensorboardCallback(econ_call))
# prepare training
run_number=int(np.random.rand()*100)
runname="run_{}".format(run_number)
model_db=[None,None] # object for storing model
model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=1e-5,env=stackenv_basic, seed=300,verbose=1,device="cuda",tensorboard_log="./log")
model_trade=MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=1e-5,env=stackenv_traid, seed=300,verbose=1,device="cuda",tensorboard_log="./log")
n_agents=econ.n_agents
total_required_for_episode_basic=len(mobileRecieverEconWrapper.agnet_idx)*env_config['episode_length']
total_required_for_episode_traid=len(tradeRecieverEconWrapper.agnet_idx)*env_config['episode_length']
print("this is run {}".format(runname))
while True:
#Train
runname="run_{}_{}".format(run_number,"basic")
thread_model=Thread(target=train,args=(model,total_required_for_episode_basic*100,econ,True,runname,model_db,0))
runname="run_{}_{}".format(run_number,"trader")
thread_model_traid=Thread(target=train,args=(model_trade,total_required_for_episode_traid*100,econ,False,runname,model_db,1))
thread_model.start()
thread_model_traid.start()
thread_model.join()
thread_model_traid.join()
#normenv.save("temp-normalizer.ai")
## Run Eval
print("### EVAL ###")
obs_basic=stackenv_basic_eval.reset()
obs_trade=stackenv_traid_eval.reset()
model=model_db[0]
model_trade=model_db[1]
done=False
for i in tqdm(range(eval_env_config['episode_length'])):
#create masks
masks_basic=stackenv_basic_eval.action_masks()
masks_trade=stackenv_traid_eval.action_masks()
# get actions
action_basic=model.predict(obs_basic,action_masks=masks_basic)
action_trade=model_trade.predict(obs_trade,action_masks=masks_trade)
#submit async directly for non blocking operation
sb3Converter_eval.step_async(action_basic[0])
sb3_traderConverter_eval.step_async(action_trade[0])
# retieve full results
obs_basic,rew_basic,done_e,info=stackenv_basic_eval.step(action_basic[0])
obs_trade,rew_trade,done_e,info=stackenv_traid_eval.step(action_trade[0])
done=done_e[0]
market=econ_eval.get_component("ContinuousDoubleAuction")
craft=econ_eval.get_component("Craft")
# trades=market.get_dense_log()
build=craft.get_dense_log()
met=econ.previous_episode_metrics
printReplay(econ_eval,0)
# printMarket(trades)
# printBuilds(builds=build)
print("social/productivity: {}".format(met["social/productivity"]))
print("labor/weighted_cost: {}".format(met["labor/weighted_cost"]))
print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"]))
time.sleep(1)

208
main.py
View File

@@ -1,9 +1,15 @@
from ai_economist import foundation
import numpy as np
from ai_economist import foundation
from stable_baselines3.common.vec_env import vec_frame_stack
from stable_baselines3.common.evaluation import evaluate_policy
from sb3_contrib.ppo_mask import MaskablePPO
import envs
import wrapper
import resources
import pprint
from agents import trading_agent
from wrapper.base_econ_wrapper import BaseEconWrapper
from wrapper.reciever_econ_wrapper import RecieverEconWrapper
from wrapper.sb3_econ_converter import SB3EconConverter
@@ -18,12 +24,13 @@ from envs.econ_wrapper import EconVecEnv
from stable_baselines3.common.callbacks import BaseCallback
import yaml
import time
from threading import Thread
env_config = {
# ===== SCENARIO CLASS =====
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
# The environment object will be an instance of the Scenario class.
'scenario_name': 'simple_market',
'scenario_name': 'econ',
# ===== COMPONENTS =====
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
@@ -32,30 +39,34 @@ env_config = {
# The order in which components reset, step, and generate obs follows their listed order below.
'components': [
# (1) Building houses
('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}),
# (2) Trading collectible resources
#('ContinuousDoubleAuction', {'max_num_orders': 10}),
('ContinuousDoubleAuction', {'max_num_orders': 10}),
# (3) Movement and resource collection
('SimpleGather', {}),
('ExternalMarket',{'market_demand':{
'Gem': 15
}}),
],
# ===== SCENARIO CLASS ARGUMENTS =====
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
'starting_agent_coin': 0,
'starting_agent_coin': 50,
'fixed_four_skill_and_loc': True,
# ===== STANDARD ARGUMENTS ======
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
'agent_composition': {"BasicMobileAgent": 20}, # Number of non-planner agents (must be > 1)
'world_size': [1, 1], # [Height, Width] of the env world
'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5}, # Number of non-planner agents (must be > 1)
'world_size': [5, 5], # [Height, Width] of the env world
'episode_length': 256, # Number of timesteps per episode
'isoelastic_eta':0.001,
'allow_observation_scaling': True,
'dense_log_frequency': 100,
'world_dense_log_frequency':1,
'energy_cost':0,
'energy_cost':0,
'energy_warmup_method': "auto",
'energy_warmup_constant': 0,
'energy_warmup_constant': 4000,
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
# Otherwise, the policy selects only 1 action.
@@ -67,7 +78,7 @@ env_config = {
'flatten_observations': False,
# When Flattening masks, concatenate each action subspace mask into a single array.
# Note: flatten_masks = True is required for masking action logits in the code below.
'flatten_masks': False,
'flatten_masks': True,
}
@@ -75,7 +86,7 @@ eval_env_config = {
# ===== SCENARIO CLASS =====
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
# The environment object will be an instance of the Scenario class.
'scenario_name': 'simple_market',
'scenario_name': 'econ',
# ===== COMPONENTS =====
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
@@ -84,30 +95,34 @@ eval_env_config = {
# The order in which components reset, step, and generate obs follows their listed order below.
'components': [
# (1) Building houses
('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}),
# (2) Trading collectible resources
#('ContinuousDoubleAuction', {'max_num_orders': 10}),
('ContinuousDoubleAuction', {'max_num_orders': 10}),
# (3) Movement and resource collection
('SimpleGather', {}),
('ExternalMarket',{'market_demand':{
'Gem': 15
}}),
],
# ===== SCENARIO CLASS ARGUMENTS =====
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
'starting_agent_coin': 0,
'starting_agent_coin': 50,
'fixed_four_skill_and_loc': True,
# ===== STANDARD ARGUMENTS ======
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
'agent_composition': {"BasicMobileAgent": 20}, # Number of non-planner agents (must be > 1)
'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5}, # Number of non-planner agents (must be > 1)
'world_size': [1, 1], # [Height, Width] of the env world
'episode_length': 100, # Number of timesteps per episode
'episode_length': 256, # Number of timesteps per episode
'allow_observation_scaling': True,
'dense_log_frequency': 10,
'isoelastic_eta':0.001,
'dense_log_frequency': 1,
'world_dense_log_frequency':1,
'energy_cost':0,
'energy_warmup_method': "auto",
'energy_warmup_constant': 0,
'energy_warmup_constant': 4000,
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
# Otherwise, the policy selects only 1 action.
@@ -119,10 +134,10 @@ eval_env_config = {
'flatten_observations': False,
# When Flattening masks, concatenate each action subspace mask into a single array.
# Note: flatten_masks = True is required for masking action logits in the code below.
'flatten_masks': False,
'flatten_masks': True,
}
num_frames=2
num_frames=1
class TensorboardCallback(BaseCallback):
"""
@@ -135,18 +150,36 @@ class TensorboardCallback(BaseCallback):
self.metrics=econ.scenario_metrics()
def _on_step(self) -> bool:
# Log scalar value (here a random variable)
prev_metrics=self.metrics
if self.econ.previous_episode_metrics is None:
self.metrics=self.econ.scenario_metrics()
else:
self.metrics=self.econ.previous_episode_metrics
curr_prod=self.metrics["social/productivity"]
trend_pord=curr_prod-prev_metrics["social/productivity"]
self.logger.record("social/total_productivity", curr_prod)
self.logger.record("social/delta_productivity", trend_pord)
if econ.world.timestep==0:
prev_metrics=self.metrics
if self.econ.previous_episode_metrics is None:
self.metrics=self.econ.scenario_metrics()
else:
self.metrics=self.econ.previous_episode_metrics
curr_prod=self.metrics["social/productivity"]
trend_pord=curr_prod-prev_metrics["social/productivity"]
self.logger.record("social/total_productivity", curr_prod)
self.logger.record("social/delta_productivity", trend_pord)
return True
min_at_target_basic=0.5
min_lr_basic=5e-6
start_lr_basic=9e-4
min_at_target_trade=0.5
min_lr_trade=5e-6
start_lr_trade=9e-4
def learning_rate_adj_basic(x) -> float:
diff=start_lr_basic-min_lr_basic
lr=min_lr_basic+x*diff
return lr
def learning_rate_adj_trade(x) -> float:
diff=start_lr_trade-min_lr_trade
lr=min_lr_basic+x*diff
return lr
def printMarket(market):
for i in range(len(market)):
@@ -188,7 +221,7 @@ def printReplay(econ,agentid):
print("--- State ---")
state=log['states'][step][agentid]
print(yaml.dump(state))
pprint.pprint(state)
print("--- Action ---")
action=log["actions"][step][agentid]
@@ -205,63 +238,120 @@ def printReplay(econ,agentid):
#Setup Env Objects
econ=foundation.make_env_instance(**env_config)
market=econ.get_component("ContinuousDoubleAuction")
action=market.get_n_actions("TradingAgent")
baseEconWrapper=BaseEconWrapper(econ)
baseEconWrapper.run()
time.sleep(0.5)
mobileRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="BasicMobileAgent")
sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent")
#obs=sb3Converter.reset()
#vecenv=EconVecEnv(env_config=env_config)
tradeRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="TradingAgent")
sb3_traderConverter=SB3EconConverter(tradeRecieverEconWrapper,econ,"TradingAgent",True)
sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent",True)
# attach sb3 wrappers
monenv=VecMonitor(venv=sb3Converter,info_keywords=["social/productivity","trend/productivity"])
montraidingenv=VecMonitor(venv=sb3_traderConverter)
stackenv_basic=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=num_frames)
stackenv_traid=vec_frame_stack.VecFrameStack(venv=montraidingenv,n_stack=num_frames)
# Model setup complete
# Setup Eval Env
econ_eval=foundation.make_env_instance(**eval_env_config)
baseEconWrapper_eval=BaseEconWrapper(econ_eval)
baseEconWrapper_eval.run()
time.sleep(0.5)
mobileRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="BasicMobileAgent")
tradeRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="TradingAgent")
sb3_traderConverter_eval=SB3EconConverter(tradeRecieverEconWrapper_eval,econ_eval,"TradingAgent",False)
sb3Converter_eval=SB3EconConverter(mobileRecieverEconWrapper_eval,econ_eval,"BasicMobileAgent",False)
# attach sb3 wrappers
monenv_eval=VecMonitor(venv=sb3Converter_eval,info_keywords=["social/productivity","trend/productivity"])
montraidingenv_eval=VecMonitor(venv=sb3_traderConverter_eval)
stackenv_basic_eval=vec_frame_stack.VecFrameStack(venv=monenv_eval,n_stack=num_frames)
stackenv_traid_eval=vec_frame_stack.VecFrameStack(venv=montraidingenv_eval,n_stack=num_frames)
#normenv=VecNormalize(sb3Converter,norm_reward=False,clip_obs=1)
#stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10)
obs=monenv.reset()
# define training functions
def train(model,timesteps, econ_call,process_bar,name,db,index):
db[index]=model.learn(total_timesteps=timesteps,progress_bar=process_bar,reset_num_timesteps=False,tb_log_name=name,callback=TensorboardCallback(econ_call))
runname="run_{}".format(int(np.random.rand()*100))
# prepare training
run_number=int(np.random.rand()*100)
runname="run_{}".format(run_number)
model_db=[None,None] # object for storing model
model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=learning_rate_adj_basic,env=stackenv_basic, seed=445,verbose=1,device="cuda",tensorboard_log="./log")
model_trade=MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=learning_rate_adj_trade,env=stackenv_traid, seed=445,verbose=1,device="cuda",tensorboard_log="./log")
model = PPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
n_agents=econ.n_agents
total_required_for_episode=n_agents*env_config['episode_length']
print("this is run {}".format(runname))
while True:
# Create Eval ENV
vec_env_eval=EconVecEnv(env_config=eval_env_config)
vec_mon_eval=VecMonitor(venv=vec_env_eval)
norm_env_eval=VecNormalize(vec_mon_eval,norm_reward=False,training=False)
eval_econ = vec_env_eval.env
#Train
model=model.learn(total_timesteps=total_required_for_episode*50,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
#normenv.save("temp-normalizer.ai")
total_required_for_episode_basic=len(mobileRecieverEconWrapper.agnet_idx)*env_config['episode_length']
total_required_for_episode_traid=len(tradeRecieverEconWrapper.agnet_idx)*env_config['episode_length']
print("this is run {}".format(runname))
while True:
#Train
runname="run_{}_{}".format(run_number,"basic")
thread_model=Thread(target=train,args=(model,total_required_for_episode_basic*150,econ,True,runname,model_db,0))
runname="run_{}_{}".format(run_number,"trader")
thread_model_traid=Thread(target=train,args=(model_trade,total_required_for_episode_traid*150,econ,False,runname,model_db,1))
thread_model.start()
thread_model_traid.start()
thread_model.join()
thread_model_traid.join()
#normenv.save("temp-normalizer.ai")
model=model_db[0]
model_trade=model_db[1]
model.save("basic.ai")
model_trade.save("trade.ai")
## Run Eval
print("### EVAL ###")
norm_env_eval.load("temp-normalizer.ai",vec_mon_eval)
obs=vec_mon_eval.reset()
obs_basic=stackenv_basic_eval.reset()
obs_trade=stackenv_traid_eval.reset()
done=False
for i in tqdm(range(eval_env_config['episode_length'])):
action=model.predict(obs)
obs,rew,done_e,info=vec_mon_eval.step(action[0])
#create masks
masks_basic=stackenv_basic_eval.action_masks()
masks_trade=stackenv_traid_eval.action_masks()
# get actions
action_basic=model.predict(obs_basic,action_masks=masks_basic)
action_trade=model_trade.predict(obs_trade,action_masks=masks_trade)
#submit async directly for non blocking operation
sb3Converter_eval.step_async(action_basic[0])
sb3_traderConverter_eval.step_async(action_trade[0])
# retieve full results
obs_basic,rew_basic,done_e,info=stackenv_basic_eval.step(action_basic[0])
obs_trade,rew_trade,done_e,info=stackenv_traid_eval.step(action_trade[0])
done=done_e[0]
#market=eval_econ.get_component("ContinuousDoubleAuction")
craft=eval_econ.get_component("SimpleCraft")
market=econ_eval.get_component("ContinuousDoubleAuction")
craft=econ_eval.get_component("Craft")
# trades=market.get_dense_log()
build=craft.get_dense_log()
met=econ.previous_episode_metrics
printReplay(eval_econ,0)
printReplay(econ_eval,0)
# printMarket(trades)
printBuilds(builds=build)
# printBuilds(builds=build)
print("social/productivity: {}".format(met["social/productivity"]))
print("labor/weighted_cost: {}".format(met["labor/weighted_cost"]))
print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"]))

3
resources/__init_.py Normal file
View File

@@ -0,0 +1,3 @@
from . import (
resources
)

4
resources/resources.py Normal file
View File

@@ -0,0 +1,4 @@
import numpy as np
from ai_economist.foundation.entities.resources import Resource, resource_registry

343
test.py Normal file
View File

@@ -0,0 +1,343 @@
import numpy as np
from ai_economist import foundation
from stable_baselines3.common.vec_env import vec_frame_stack
from stable_baselines3.common.evaluation import evaluate_policy
from sb3_contrib.ppo_mask import MaskablePPO
import envs
import wrapper
import resources
import pprint
from agents import trading_agent
from wrapper.base_econ_wrapper import BaseEconWrapper
from wrapper.reciever_econ_wrapper import RecieverEconWrapper
from wrapper.sb3_econ_converter import SB3EconConverter
from tqdm import tqdm
import components
from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
from stable_baselines3.common.vec_env.vec_normalize import VecNormalize
from sb3_contrib import RecurrentPPO
from envs.econ_wrapper import EconVecEnv
from stable_baselines3.common.callbacks import BaseCallback
import yaml
import time
from threading import Thread
env_config = {
# ===== SCENARIO CLASS =====
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
# The environment object will be an instance of the Scenario class.
'scenario_name': 'econ',
# ===== COMPONENTS =====
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
# "component_name" refers to the Component class's name in the Component Registry (foundation.components)
# {component_kwargs} is a dictionary of kwargs passed to the Component class
# The order in which components reset, step, and generate obs follows their listed order below.
'components': [
# (1) Building houses
('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}),
# (2) Trading collectible resources
('ContinuousDoubleAuction', {'max_num_orders': 10}),
# (3) Movement and resource collection
('SimpleGather', {}),
('ExternalMarket',{'market_demand':{
'Gem': 15
}}),
],
# ===== SCENARIO CLASS ARGUMENTS =====
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
'starting_agent_coin': 10,
'fixed_four_skill_and_loc': True,
# ===== STANDARD ARGUMENTS ======
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5}, # Number of non-planner agents (must be > 1)
'world_size': [5, 5], # [Height, Width] of the env world
'episode_length': 256, # Number of timesteps per episode
'allow_observation_scaling': True,
'dense_log_frequency': 100,
'world_dense_log_frequency':1,
'energy_cost':0,
'energy_warmup_method': "auto",
'energy_warmup_constant': 4000,
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
# Otherwise, the policy selects only 1 action.
'multi_action_mode_agents': False,
'multi_action_mode_planner': False,
# When flattening observations, concatenate scalar & vector observations before output.
# Otherwise, return observations with minimal processing.
'flatten_observations': False,
# When Flattening masks, concatenate each action subspace mask into a single array.
# Note: flatten_masks = True is required for masking action logits in the code below.
'flatten_masks': True,
}
eval_env_config = {
# ===== SCENARIO CLASS =====
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
# The environment object will be an instance of the Scenario class.
'scenario_name': 'econ',
# ===== COMPONENTS =====
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
# "component_name" refers to the Component class's name in the Component Registry (foundation.components)
# {component_kwargs} is a dictionary of kwargs passed to the Component class
# The order in which components reset, step, and generate obs follows their listed order below.
'components': [
# (1) Building houses
('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}),
# (2) Trading collectible resources
('ContinuousDoubleAuction', {'max_num_orders': 10}),
# (3) Movement and resource collection
('SimpleGather', {}),
('ExternalMarket',{'market_demand':{
'Gem': 15
}}),
],
# ===== SCENARIO CLASS ARGUMENTS =====
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
'starting_agent_coin': 10,
'fixed_four_skill_and_loc': True,
# ===== STANDARD ARGUMENTS ======
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5}, # Number of non-planner agents (must be > 1)
'world_size': [1, 1], # [Height, Width] of the env world
'episode_length': 256, # Number of timesteps per episode
'allow_observation_scaling': True,
'dense_log_frequency': 1,
'world_dense_log_frequency':1,
'energy_cost':0,
'energy_warmup_method': "auto",
'energy_warmup_constant': 4000,
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
# Otherwise, the policy selects only 1 action.
'multi_action_mode_agents': False,
'multi_action_mode_planner': False,
# When flattening observations, concatenate scalar & vector observations before output.
# Otherwise, return observations with minimal processing.
'flatten_observations': False,
# When Flattening masks, concatenate each action subspace mask into a single array.
# Note: flatten_masks = True is required for masking action logits in the code below.
'flatten_masks': True,
}
num_frames=5
class TensorboardCallback(BaseCallback):
"""
Custom callback for plotting additional values in tensorboard.
"""
def __init__(self,econ, verbose=0):
super().__init__(verbose)
self.econ=econ
self.metrics=econ.scenario_metrics()
def _on_step(self) -> bool:
# Log scalar value (here a random variable)
if econ.world.timestep==0:
prev_metrics=self.metrics
if self.econ.previous_episode_metrics is None:
self.metrics=self.econ.scenario_metrics()
else:
self.metrics=self.econ.previous_episode_metrics
curr_prod=self.metrics["social/productivity"]
trend_pord=curr_prod-prev_metrics["social/productivity"]
self.logger.record("social/total_productivity", curr_prod)
self.logger.record("social/delta_productivity", trend_pord)
return True
def printMarket(market):
for i in range(len(market)):
step=market[i]
if len(step)>0:
print("=== Step {} ===".format(i))
for transaction in step:
t=transaction
transstring = "({}) {} -> {} | [{}/{}] {} Coins\n".format(t["commodity"],t["seller"],t["buyer"],t["ask"],t["bid"],t["price"])
print(transstring)
return ""
def printBuilds(builds):
for i in range(len(builds)):
step=builds[i]
if len(step)>0:
for build in step:
t=build
transstring = "({}) Builder: {}, Skill: {}, Income {} ".format(i,t["builder"],t["build_skill"],t["income"])
print(transstring)
return ""
def printReplay(econ,agentid):
worldmaps=["Stone","Wood"]
log=econ.previous_episode_dense_log
agent=econ.world.agents[agentid]
agentid=str(agentid)
maxsetp=len(log["states"])-1
for step in range(maxsetp):
print()
print("=== Step {} ===".format(step))
# state
print("--- World ---")
world=log['world'][step]
for res in worldmaps:
print("{}: {}".format(res,world[res][0][0]))
print("--- State ---")
state=log['states'][step][agentid]
pprint.pprint(state)
print("--- Action ---")
action=log["actions"][step][agentid]
if action=={}:
print("Action: 0 -> NOOP")
else:
for k in action:
formats="Action: {}({})".format(k,action[k])
print(formats)
print("--- Reward ---")
reward=log["rewards"][step][agentid]
print("Reward: {}".format(reward))
#Setup Env Objects
econ=foundation.make_env_instance(**env_config)
market=econ.get_component("ContinuousDoubleAuction")
action=market.get_n_actions("TradingAgent")
baseEconWrapper=BaseEconWrapper(econ)
baseEconWrapper.run()
time.sleep(0.5)
mobileRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="BasicMobileAgent")
tradeRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="TradingAgent")
sb3_traderConverter=SB3EconConverter(tradeRecieverEconWrapper,econ,"TradingAgent",True)
sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent",True)
# attach sb3 wrappers
monenv=VecMonitor(venv=sb3Converter,info_keywords=["social/productivity","trend/productivity"])
montraidingenv=VecMonitor(venv=sb3_traderConverter)
stackenv_basic=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=num_frames)
stackenv_traid=vec_frame_stack.VecFrameStack(venv=montraidingenv,n_stack=num_frames)
# Model setup complete
# Setup Eval Env
econ_eval=foundation.make_env_instance(**eval_env_config)
baseEconWrapper_eval=BaseEconWrapper(econ_eval)
baseEconWrapper_eval.run()
time.sleep(0.5)
mobileRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="BasicMobileAgent")
tradeRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="TradingAgent")
sb3_traderConverter_eval=SB3EconConverter(tradeRecieverEconWrapper_eval,econ_eval,"TradingAgent",False)
sb3Converter_eval=SB3EconConverter(mobileRecieverEconWrapper_eval,econ_eval,"BasicMobileAgent",False)
# attach sb3 wrappers
monenv_eval=VecMonitor(venv=sb3Converter_eval,info_keywords=["social/productivity","trend/productivity"])
montraidingenv_eval=VecMonitor(venv=sb3_traderConverter_eval)
stackenv_basic_eval=vec_frame_stack.VecFrameStack(venv=monenv_eval,n_stack=num_frames)
stackenv_traid_eval=vec_frame_stack.VecFrameStack(venv=montraidingenv_eval,n_stack=num_frames)
obs=monenv.reset()
# define training functions
def train(model,timesteps, econ_call,process_bar,name,db,index):
db[index]=model.learn(total_timesteps=timesteps,progress_bar=process_bar,reset_num_timesteps=False,tb_log_name=name,callback=TensorboardCallback(econ_call))
# prepare training
run_number=int(np.random.rand()*100)
runname="run_{}".format(run_number)
model_db=[None,None] # object for storing model
model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=1e-5,env=stackenv_basic, seed=300,verbose=1,device="cuda",tensorboard_log="./log")
model_trade=MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=1e-5,env=stackenv_traid, seed=300,verbose=1,device="cuda",tensorboard_log="./log")
n_agents=econ.n_agents
total_required_for_episode_basic=len(mobileRecieverEconWrapper.agnet_idx)*env_config['episode_length']
total_required_for_episode_traid=len(tradeRecieverEconWrapper.agnet_idx)*env_config['episode_length']
print("this is run {}".format(runname))
while True:
#Train
runname="run_{}_{}".format(run_number,"basic")
thread_model=Thread(target=train,args=(model,total_required_for_episode_basic*50,econ,True,runname,model_db,0))
runname="run_{}_{}".format(run_number,"trader")
thread_model_traid=Thread(target=train,args=(model_trade,total_required_for_episode_traid*50,econ,False,runname,model_db,1))
thread_model.start()
thread_model_traid.start()
thread_model.join()
thread_model_traid.join()
#normenv.save("temp-normalizer.ai")
model=model_db[0]
model_trade=model_db[1]
model.save("basic.ai")
model_trade.save("trade.ai")
## Run Eval
print("### EVAL ###")
obs_basic=stackenv_basic_eval.reset()
obs_trade=stackenv_traid_eval.reset()
done=False
for i in tqdm(range(eval_env_config['episode_length'])):
#create masks
masks_basic=stackenv_basic_eval.action_masks()
masks_trade=stackenv_traid_eval.action_masks()
# get actions
action_basic=model.predict(obs_basic,action_masks=masks_basic)
action_trade=model_trade.predict(obs_trade,action_masks=masks_trade)
#submit async directly for non blocking operation
sb3Converter_eval.step_async(action_basic[0])
sb3_traderConverter_eval.step_async(action_trade[0])
# retieve full results
obs_basic,rew_basic,done_e,info=stackenv_basic_eval.step(action_basic[0])
obs_trade,rew_trade,done_e,info=stackenv_traid_eval.step(action_trade[0])
done=done_e[0]
market=econ_eval.get_component("ContinuousDoubleAuction")
craft=econ_eval.get_component("Craft")
# trades=market.get_dense_log()
build=craft.get_dense_log()
met=econ.previous_episode_metrics
printReplay(econ_eval,0)
# printMarket(trades)
# printBuilds(builds=build)
print("social/productivity: {}".format(met["social/productivity"]))
print("labor/weighted_cost: {}".format(met["labor/weighted_cost"]))
print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"]))
time.sleep(1)

BIN
trade 4000.ai Normal file

Binary file not shown.

BIN
trade.ai Normal file

Binary file not shown.

View File

@@ -3,25 +3,14 @@ from threading import Event, Lock, Thread
from queue import Queue
class BaseEconWrapper():
"""Base class for connecting reciever wrapper to a multi threaded econ simulation and training session"""
base_notification=Event() #Notification for Base
reset_notification=Event() #Notification for recievers
step_notifications=[] #Notification for recievers
action_edit_lock=Lock()
actor_actions={}
stop_edit_lock=Lock()
stop=False
vote_lock=Lock()
n_voters=0
n_votes_reset=0
# States of Env
env_data_lock=Lock()
obs=None
rew=None
done=None
@@ -30,6 +19,13 @@ class BaseEconWrapper():
def __init__(self, econ: base_env.BaseEnvironment):
self.env=econ
self.vote_lock=Lock()
self.base_notification=Event() #Notification for Base
self.reset_notification=Event() #Notification for recievers
self.action_edit_lock=Lock()
self.stop_edit_lock=Lock()
self.env_data_lock=Lock()
def register_vote(self):
"""Register reciever on base. Returns ID of Voter to pass on during blocking"""
@@ -149,7 +145,8 @@ class BaseEconWrapper():
self.action_edit_lock.acquire() # Start to submit action dict
for k,v in actions.items():
if k in self.actor_actions.keys():
raise Exception("Actor action has already been submitted. {}".format(k))
print("Actor action has already been submitted. {}".format(k))
continue
self.actor_actions[k]=v
self.step_notifications[voter_id].clear()
self.base_notification.set() #Alert base for action changes
@@ -168,9 +165,9 @@ class BaseEconWrapper():
def reciever_request_reset(self):
"""Adds to vote count to reset. If limit is reached reset will occure"""
self.vote_lock.acquire()
#self.vote_lock.acquire()
self.n_votes_reset+=1
self.vote_lock.release()
# self.vote_lock.release()
self.base_notification.set() #Alert base for action changes
def reciever_block_reset(self):

View File

@@ -23,7 +23,7 @@ class RecieverEconWrapper(gym.Env):
self.idx_to_index={}
#create idx to index map
for i in range(len(self.agnet_idx)):
self.idx_to_index[self.agnet_idx[i]]=i
self.idx_to_index[str(self.agnet_idx[i])]=i
first_idx=self.agnet_idx[0]
@@ -35,6 +35,7 @@ class RecieverEconWrapper(gym.Env):
def _dict_idx_to_index(self, data):
data_out={}
for k,v in data.items():
if k in self.idx_to_index:
index=self.idx_to_index[k]
data_out[index]=v

View File

@@ -8,7 +8,7 @@ from typing import Any, Callable, List, Optional, Sequence, Type, Union
class SB3EconConverter(VecEnv, gym.Env):
def __init__(self, env: gym.Env, econ: base_env.BaseEnvironment,agentclass: str):
def __init__(self, env: gym.Env, econ: base_env.BaseEnvironment,agentclass: str,auto_reset: bool):
self.env=env
self.econ=econ
#get observation sample
@@ -20,7 +20,9 @@ class SB3EconConverter(VecEnv, gym.Env):
#flatten obervation of first agent
obs0=utils.package(obs[0],*self.packager)
obs0["flat"]
self.observation_space=gym.spaces.Box(low=-np.inf,high=np.inf,shape=(len(obs0["flat"]),),dtype=np.float32)
self.step_request_send=False
self.auto_reset=auto_reset
self.observation_space=gym.spaces.Box(low=0,high=10,shape=(len(obs0["flat"]),),dtype=np.float32)
super().__init__(self.num_envs, self.observation_space, self.action_space)
@@ -30,12 +32,15 @@ class SB3EconConverter(VecEnv, gym.Env):
agent=self.econ.world.agents[idx]
return gym.spaces.Discrete(agent.action_spaces)
def step_async(self, actions: np.ndarray) -> None:
d_actions=utils.convert_gym_to_econ(actions)
return self.env.step_async(d_actions)
def step_async(self, actions: np.ndarray):
if self.step_request_send==False:
self.step_request_send=True
d_actions=utils.convert_gym_to_econ(actions)
return self.env.step_async(d_actions)
def step_wait(self) -> VecEnvStepReturn:
obs,rew,done,info=self.env.step_wait()
self.curr_obs=obs
#flatten obs
f_obs={}
for k,v in obs.items():
@@ -61,12 +66,16 @@ class SB3EconConverter(VecEnv, gym.Env):
for i in range(self.num_envs):
done_g[i]=done
c_info[i]["terminal_observation"]=c_obs[i]
c_obs=self.reset()
if self.auto_reset:
c_obs=self.reset()
self.step_request_send=False
return np.copy(c_obs),np.copy(c_rew),np.copy(done_g),np.copy(c_info)
def reset(self) -> VecEnvObs:
obs=self.env.reset()
self.step_request_send=False
f_obs={}
self.curr_obs=obs
for k,v in obs.items():
f_obs[k]=utils.package(v,*self.packager)
g_obs={}
@@ -79,20 +88,27 @@ class SB3EconConverter(VecEnv, gym.Env):
def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
if seed is None:
seed = np.random.randint(0, 2**32 - 1)
seeds = []
for idx, env in enumerate(self.envs):
seeds.append(env.seed(seed + idx))
self.econ.seed(seed)
seeds=[seed]
return seeds
def action_masks(self):
"""Returns action masks for agents and current obs"""
masks=[]
for obs in self.curr_obs:
mask=[]
for num in self.curr_obs[obs]["action_mask"]:
mask.append(num==1.0)
masks.append(mask)
return masks
def close(self) -> None:
return
def get_attr(self, attr_name: str, indices: VecEnvIndices = None) -> List[Any]:
"""Return attribute from vectorized environment (see base class)."""
target_envs = self._get_target_envs(indices)
return [getattr(env_i, attr_name) for env_i in target_envs]
return getattr(self, attr_name)
@@ -106,8 +122,7 @@ class SB3EconConverter(VecEnv, gym.Env):
def env_method(self, method_name: str, *method_args, indices: VecEnvIndices = None, **method_kwargs) -> List[Any]:
"""Call instance methods of vectorized environments."""
target_envs = self._get_target_envs(indices)
return [getattr(env_i, method_name)(*method_args, **method_kwargs) for env_i in target_envs]
return getattr(self, method_name)(*method_args, **method_kwargs)