Merge pull request 'crafting' (#2) from crafting into master
Reviewed-on: #2
This commit was merged in pull request #2.
This commit is contained in:
@@ -8,4 +8,6 @@ class TradingAgent(BaseAgent):
|
||||
"Mobile" refers to agents of this type being able to move around in the 2D world.
|
||||
"""
|
||||
|
||||
name = "TradingAgent"
|
||||
name = "TradingAgent"
|
||||
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
# or https://opensource.org/licenses/BSD-3-Clause
|
||||
|
||||
import random
|
||||
import uuid
|
||||
|
||||
import numpy as np
|
||||
|
||||
@@ -38,7 +39,7 @@ class BaseAgent:
|
||||
|
||||
if idx is None:
|
||||
idx = 0
|
||||
|
||||
self.uuid=uuid.uuid4()
|
||||
if multi_action_mode is None:
|
||||
multi_action_mode = False
|
||||
|
||||
@@ -64,6 +65,7 @@ class BaseAgent:
|
||||
self._registered_inventory = False
|
||||
self._registered_endogenous = False
|
||||
self._registered_components = False
|
||||
self._setup = False # agent setup not completed
|
||||
self._noop_action_dict = dict()
|
||||
|
||||
# Special flag to allow logic for multi-action-mode agents
|
||||
@@ -78,10 +80,17 @@ class BaseAgent:
|
||||
def idx(self):
|
||||
"""Index used to identify this agent. Must be unique within the environment."""
|
||||
return self._idx
|
||||
|
||||
@property
|
||||
def is_setup(self):
|
||||
return self._setup
|
||||
|
||||
def set_setup(self, set):
|
||||
self._setup=set
|
||||
|
||||
def register_inventory(self, resources):
|
||||
"""Used during environment construction to populate inventory/escrow fields."""
|
||||
assert not self._registered_inventory
|
||||
if self._registered_inventory:
|
||||
return
|
||||
for entity_name in resources:
|
||||
self.inventory[entity_name] = 0
|
||||
self.escrow[entity_name] = 0
|
||||
@@ -89,7 +98,8 @@ class BaseAgent:
|
||||
|
||||
def register_endogenous(self, endogenous):
|
||||
"""Used during environment construction to populate endogenous state fields."""
|
||||
assert not self._registered_endogenous
|
||||
if self._registered_endogenous:
|
||||
return
|
||||
for entity_name in endogenous:
|
||||
self.endogenous[entity_name] = 0
|
||||
self._registered_endogenous = True
|
||||
@@ -115,7 +125,8 @@ class BaseAgent:
|
||||
|
||||
def register_components(self, components):
|
||||
"""Used during environment construction to set up state/action spaces."""
|
||||
assert not self._registered_components
|
||||
if self._registered_components:
|
||||
return
|
||||
for component in components:
|
||||
n = component.get_n_actions(self.name)
|
||||
if n is None:
|
||||
|
||||
@@ -134,6 +134,7 @@ class BaseComponent(ABC):
|
||||
def reset(self):
|
||||
"""Reset any portion of the state managed by this component."""
|
||||
world = self.world
|
||||
self.n_agents = world.n_agents
|
||||
all_agents = world.agents + [world.planner]
|
||||
for agent in all_agents:
|
||||
agent.state.update(self.get_additional_state_fields(agent.name))
|
||||
|
||||
@@ -234,7 +234,7 @@ class BaseEnvironment(ABC):
|
||||
self.num_agents = (
|
||||
n_agents + n_planners
|
||||
) # used in the warp_drive env wrapper (+ 1 for the planner)
|
||||
|
||||
|
||||
# Components must be a tuple/list where each element is either a...
|
||||
# tuple: ('Component Name', {Component kwargs})
|
||||
# dict : {'Component Name': {Component kwargs}}
|
||||
@@ -342,19 +342,14 @@ class BaseEnvironment(ABC):
|
||||
self._components_dict[component_object.name] = component_object
|
||||
self._shorthand_lookup[component_object.shorthand] = component_object
|
||||
|
||||
# Register the components with the agents
|
||||
# to finish setting up their state/action spaces.
|
||||
for agent in self.world.agents:
|
||||
agent.register_inventory(self.resources)
|
||||
agent.register_endogenous(self.endogenous)
|
||||
agent.register_components(self._components)
|
||||
|
||||
self.world.planner.register_inventory(self.resources)
|
||||
self.world.planner.register_components(self._components)
|
||||
|
||||
self._agent_lookup = {str(agent.idx): agent for agent in self.all_agents}
|
||||
self.reapply_scenario_config_to_agents()
|
||||
|
||||
|
||||
self._completions = 0
|
||||
|
||||
self._finish_episode=False
|
||||
self._last_ep_metrics = None
|
||||
|
||||
# For dense logging
|
||||
@@ -370,6 +365,16 @@ class BaseEnvironment(ABC):
|
||||
# To collate all the agents ('0', '1', ...) data during reset and step
|
||||
# into a single agent with index 'a'
|
||||
self.collate_agent_step_and_reset_data = collate_agent_step_and_reset_data
|
||||
|
||||
def reapply_scenario_config_to_agents(self):
|
||||
# Register the components with the agents
|
||||
# to finish setting up their state/action spaces.
|
||||
for agent in self.world.agents:
|
||||
agent.register_inventory(self.resources)
|
||||
agent.register_endogenous(self.endogenous)
|
||||
agent.register_components(self._components)
|
||||
self._agent_lookup = {str(agent.idx): agent for agent in self.all_agents}
|
||||
self.world.apply_agent_db_to_world()
|
||||
|
||||
def _register_entities(self, entities):
|
||||
for entity in entities:
|
||||
@@ -501,6 +506,8 @@ class BaseEnvironment(ABC):
|
||||
|
||||
# Getters & Setters
|
||||
# -----------------
|
||||
def set_finish_episode(self,done):
|
||||
self._finish_episode=done
|
||||
|
||||
def get_component(self, component_name):
|
||||
"""
|
||||
@@ -904,6 +911,9 @@ class BaseEnvironment(ABC):
|
||||
# Reset the timestep counter
|
||||
self.world.timestep = 0
|
||||
|
||||
# Reset done flag
|
||||
self._finish_episode=False
|
||||
|
||||
# Perform the scenario reset,
|
||||
# which includes resetting the world and agent states
|
||||
self.reset_starting_layout()
|
||||
@@ -920,6 +930,7 @@ class BaseEnvironment(ABC):
|
||||
# Reset actions to that default.
|
||||
for agent in self.all_agents:
|
||||
agent.reset_actions()
|
||||
agent.set_setup(True)
|
||||
|
||||
# Produce observations
|
||||
obs = self._generate_observations(
|
||||
@@ -1015,7 +1026,7 @@ class BaseEnvironment(ABC):
|
||||
flatten_masks=self._flatten_masks,
|
||||
)
|
||||
rew = self._generate_rewards()
|
||||
done = {"__all__": self.world.timestep >= self._episode_length}
|
||||
done = {"__all__": self.world.timestep >= self._episode_length | self._finish_episode}
|
||||
info = {k: {} for k in obs.keys()}
|
||||
|
||||
if self._dense_log_this_episode:
|
||||
|
||||
@@ -76,8 +76,7 @@ class Registry:
|
||||
|
||||
See Registry class docstring for example.
|
||||
"""
|
||||
if cls_name.lower() not in self._lookup:
|
||||
raise KeyError('"{}" is not a name of a registered class'.format(cls_name))
|
||||
if cls_name.lower() not in self._lookup: raise KeyError('"{}" is not a name of a registered class'.format(cls_name))
|
||||
return self._lookup[cls_name.lower()]
|
||||
|
||||
def has(self, cls_name):
|
||||
|
||||
@@ -91,7 +91,10 @@ class Maps:
|
||||
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
self.reset_agent_maps(n_agents)
|
||||
|
||||
def reset_agent_maps(self,n_agents):
|
||||
self.n_agents=n_agents
|
||||
self._idx_map = np.stack(
|
||||
[i * np.ones(shape=self.size) for i in range(self.n_agents)]
|
||||
)
|
||||
@@ -378,17 +381,8 @@ class World:
|
||||
self.multi_action_mode_planner = bool(multi_action_mode_planner)
|
||||
self._agent_class_idx_map={}
|
||||
#create agents
|
||||
self.agent_composition=agent_composition
|
||||
self.n_agents=0
|
||||
self._agents = []
|
||||
for k,v in agent_composition.items():
|
||||
self._agent_class_idx_map[k]=[]
|
||||
for offset in range(v):
|
||||
agent_class=agent_registry.get(k)
|
||||
agent=agent_class(self.n_agents,self.multi_action_mode_agents)
|
||||
self._agents.append(agent)
|
||||
self._agent_class_idx_map[k].append(str(self.n_agents))
|
||||
self.n_agents+=1
|
||||
self.create_agents(agent_composition)
|
||||
|
||||
self.maps = Maps(world_size, self.n_agents, world_resources, world_landmarks)
|
||||
|
||||
planner_class = agent_registry.get("BasicPlanner")
|
||||
@@ -402,6 +396,37 @@ class World:
|
||||
self.cuda_function_manager = None
|
||||
self.cuda_data_manager = None
|
||||
|
||||
def create_agents(self, agent_composition):
|
||||
"""create_agents creates the world agent db with the given compostition."""
|
||||
self.agent_composition=agent_composition
|
||||
self.n_agents=0
|
||||
self._agents = []
|
||||
for k,v in agent_composition.items():
|
||||
self._agent_class_idx_map[k]=[]
|
||||
for offset in range(v):
|
||||
agent_class=agent_registry.get(k)
|
||||
agent=agent_class(self.n_agents,self.multi_action_mode_agents)
|
||||
self._agents.append(agent)
|
||||
self._agent_class_idx_map[k].append(str(self.n_agents))
|
||||
self.n_agents+=1
|
||||
|
||||
def apply_agent_db_to_world(self):
|
||||
"""Applys current agent db into lookup maps inside world and map itself. Enables insertion of new agents into existing env."""
|
||||
self.n_agents=len(self._agents)
|
||||
self._agent_class_idx_map={}
|
||||
self.maps.reset_agent_maps(self.n_agents) # reset map lookups
|
||||
#create mapping dict
|
||||
for idx in range(self.n_agents):
|
||||
cls=self.get_agent_class(idx)
|
||||
agent=self._agents[idx]
|
||||
if cls in self._agent_class_idx_map:
|
||||
self._agent_class_idx_map[cls].append(idx)
|
||||
else:
|
||||
self._agent_class_idx_map[cls]=[idx]
|
||||
# apply agent locs db to maps
|
||||
if "loc" in agent.state:
|
||||
self.maps.set_agent_loc(agent,*agent.loc)
|
||||
|
||||
@property
|
||||
def agents(self):
|
||||
"""Return a list of the agent objects in the world (sorted by index)."""
|
||||
|
||||
@@ -37,7 +37,7 @@ class ContinuousDoubleAuction(BaseComponent):
|
||||
name = "ContinuousDoubleAuction"
|
||||
component_type = "Trade"
|
||||
required_entities = ["Coin", "Labor"]
|
||||
agent_subclasses = ["BasicMobileAgent"]
|
||||
agent_subclasses = ["BasicMobileAgent","TradingAgent"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -159,7 +159,7 @@ class ContinuousDoubleAuction(BaseComponent):
|
||||
"""If agent can submit an ask for resource."""
|
||||
return (
|
||||
self.n_orders[resource][agent.idx] < self.max_num_orders
|
||||
and agent.state["inventory"][resource] > 0
|
||||
and agent.state["inventory"][resource] >= 1
|
||||
)
|
||||
|
||||
# Core components for this market
|
||||
@@ -417,7 +417,7 @@ class ContinuousDoubleAuction(BaseComponent):
|
||||
"""
|
||||
# This component adds 2*(1+max_bid_ask)*n_resources possible actions:
|
||||
# buy/sell x each-price x each-resource
|
||||
if agent_cls_name == "BasicMobileAgent":
|
||||
if agent_cls_name in self.agent_subclasses:
|
||||
trades = []
|
||||
for c in self.commodities:
|
||||
trades.append(
|
||||
@@ -526,14 +526,14 @@ class ContinuousDoubleAuction(BaseComponent):
|
||||
|
||||
for _, agent in enumerate(world.agents):
|
||||
# Private to the agent
|
||||
available_ask_agent=full_asks - self.ask_hists[c][agent.idx]
|
||||
available_bid_agent=full_bids- self.bid_hists[c][agent.idx]
|
||||
obs[agent.idx].update(
|
||||
{
|
||||
"market_rate-{}".format(c): market_rate,
|
||||
"market_rate-{}".format(c): market_rate*self.inv_scale,
|
||||
"price_history-{}".format(c): scaled_price_history,
|
||||
"available_asks-{}".format(c): full_asks
|
||||
- self.ask_hists[c][agent.idx],
|
||||
"available_bids-{}".format(c): full_bids
|
||||
- self.bid_hists[c][agent.idx],
|
||||
"available_asks-{}".format(c): np.clip(available_ask_agent,0,self.max_num_orders),
|
||||
"available_bids-{}".format(c): np.clip(available_bid_agent,0,self.max_num_orders),
|
||||
"my_asks-{}".format(c): self.ask_hists[c][agent.idx],
|
||||
"my_bids-{}".format(c): self.bid_hists[c][agent.idx],
|
||||
}
|
||||
|
||||
@@ -66,10 +66,10 @@ class Coin(Resource):
|
||||
collectible = False
|
||||
|
||||
@resource_registry.add
|
||||
class RawGem(Resource):
|
||||
class GemRaw(Resource):
|
||||
"""Raw Gem that can be processed further"""
|
||||
|
||||
name = "Raw_Gem"
|
||||
name = "Gem_Raw"
|
||||
color = np.array([241, 233, 219]) / 255.0
|
||||
collectible = True
|
||||
|
||||
@@ -79,6 +79,6 @@ class Gem(Resource):
|
||||
|
||||
name = "Gem"
|
||||
color = np.array([241, 233, 219]) / 255.0
|
||||
collectible = False
|
||||
craft_recp= {"Raw_Gem": 1}
|
||||
collectible = True
|
||||
craft_recp= {"Gem_Raw": 1}
|
||||
craft_labour_base= 1
|
||||
BIN
basic 4000.ai
Normal file
BIN
basic 4000.ai
Normal file
Binary file not shown.
@@ -1,4 +1,6 @@
|
||||
from . import(
|
||||
simple_gather,
|
||||
simple_build
|
||||
simple_build,
|
||||
crafting,
|
||||
external_market
|
||||
)
|
||||
287
components/crafting.py
Normal file
287
components/crafting.py
Normal file
@@ -0,0 +1,287 @@
|
||||
# Copyright (c) 2020, salesforce.com, inc.
|
||||
# All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
# For full license text, see the LICENSE file in the repo root
|
||||
# or https://opensource.org/licenses/BSD-3-Clause
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ai_economist.foundation.base.base_component import (
|
||||
BaseComponent,
|
||||
component_registry,
|
||||
)
|
||||
from ai_economist.foundation.entities.resources import Resource, resource_registry
|
||||
|
||||
|
||||
@component_registry.add
|
||||
class Craft(BaseComponent):
|
||||
"""
|
||||
Allows mobile agents to build house landmarks in the world using stone and wood,
|
||||
earning income.
|
||||
|
||||
Can be configured to include heterogeneous building skill where agents earn
|
||||
different levels of income when building.
|
||||
|
||||
Args:
|
||||
commodities (list(str)): list of commodities that can be crafted in the local world
|
||||
payment_max_skill_multiplier (int): Maximum skill multiplier that an agent
|
||||
can sample. Must be >= 1. Default is 1.
|
||||
skill_dist (str): Distribution type for sampling skills. Default ("none")
|
||||
gives all agents identical skill equal to a multiplier of 1. "pareto" and
|
||||
"lognormal" sample skills from the associated distributions.
|
||||
build_labor (float): Labor cost associated with building a house.
|
||||
Must be >= 0. Default is 10.
|
||||
"""
|
||||
|
||||
name = "Craft"
|
||||
component_type = "Build"
|
||||
required_entities = ["Coin", "Labor"]
|
||||
agent_subclasses = ["BasicMobileAgent"]
|
||||
commodities=[]
|
||||
def __init__(
|
||||
self,
|
||||
*base_component_args,
|
||||
commodities=[],
|
||||
max_skill_amount_benefit=1,
|
||||
max_skill_labour_benefit=1,
|
||||
skill_dist="none",
|
||||
**base_component_kwargs
|
||||
):
|
||||
assert len(commodities)>0
|
||||
#setup commodities
|
||||
self.recip_map={}
|
||||
self.commodities=[]
|
||||
for v in commodities:
|
||||
res_class=resource_registry.get(v)
|
||||
res=res_class()
|
||||
if res.craft_recp!=None:
|
||||
# is craftable
|
||||
assert res.craft_recp!={}
|
||||
assert res.craft_labour_base >= 0
|
||||
self.required_entities.append(v)
|
||||
self.recip_map[res.name]=res.craft_recp
|
||||
self.commodities.append(res)
|
||||
|
||||
|
||||
self.max_skill_amount_benefit=max_skill_amount_benefit
|
||||
self.max_skill_labour_benefit=max_skill_labour_benefit
|
||||
|
||||
|
||||
assert self.max_skill_amount_benefit >= 1
|
||||
assert self.max_skill_labour_benefit <= 1
|
||||
|
||||
self.skill_dist = skill_dist.lower()
|
||||
assert self.skill_dist in ["none", "pareto"]
|
||||
|
||||
self.sampled_skills = {}
|
||||
|
||||
self.builds = []
|
||||
super().__init__(*base_component_args, **base_component_kwargs)
|
||||
|
||||
def agent_can_build(self, agent, res):
|
||||
"""Return True if agent can actually build in its current location."""
|
||||
# See if the agent has the resources necessary to complete the action
|
||||
if res in self.recip_map:
|
||||
recipe= self.recip_map[res]
|
||||
for resource, cost in recipe.items():
|
||||
if agent.state["inventory"][resource] < cost:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
return False
|
||||
|
||||
# Required methods for implementing components
|
||||
# --------------------------------------------
|
||||
|
||||
def get_n_actions(self, agent_cls_name):
|
||||
"""
|
||||
See base_component.py for detailed description.
|
||||
|
||||
Add a single action (build) for mobile agents.
|
||||
"""
|
||||
# This component adds 1 action that mobile agents can take: build a house
|
||||
if agent_cls_name in self.agent_subclasses:
|
||||
return len(self.commodities)
|
||||
|
||||
return None
|
||||
|
||||
def get_additional_state_fields(self, agent_cls_name):
|
||||
"""
|
||||
See base_component.py for detailed description.
|
||||
|
||||
For mobile agents, add state fields for building skill.
|
||||
"""
|
||||
if agent_cls_name not in self.agent_subclasses:
|
||||
return {}
|
||||
if agent_cls_name == "BasicMobileAgent":
|
||||
return {}
|
||||
raise NotImplementedError
|
||||
|
||||
def component_step(self):
|
||||
"""
|
||||
See base_component.py for detailed description.
|
||||
|
||||
Convert stone+wood to house+coin for agents that choose to build and can.
|
||||
"""
|
||||
world = self.world
|
||||
build = []
|
||||
# Apply any building actions taken by the mobile agents
|
||||
for agent in world.get_random_order_agents():
|
||||
|
||||
action = agent.get_component_action(self.name)
|
||||
|
||||
# This component doesn't apply to this agent!
|
||||
if action is None:
|
||||
continue
|
||||
|
||||
# NO-OP!
|
||||
if action == 0:
|
||||
pass
|
||||
|
||||
# Build! (If you can.)
|
||||
else:
|
||||
action-=1
|
||||
comm=self.commodities[action]
|
||||
|
||||
if self.agent_can_build(agent,comm.name):
|
||||
# Remove the resources
|
||||
for resource, cost in comm.craft_recp.items():
|
||||
agent.state["inventory"][resource] -= cost
|
||||
|
||||
# Receive crafted commodity
|
||||
agent.state["inventory"][comm.name] += agent.state["craft_amount"][comm.name]
|
||||
|
||||
# Incur the labor cost for building
|
||||
agent.state["endogenous"]["Labor"] += agent.state["craft_labour"][comm.name]
|
||||
|
||||
build.append(
|
||||
{
|
||||
"crafter": agent.idx,
|
||||
"craft_commodity": comm.name,
|
||||
"craft_skill": agent.state["craft_skill"][comm.name],
|
||||
"craft_amount": agent.state["craft_amount"][comm.name],
|
||||
"craft_labour": agent.state["craft_labour"][comm.name]
|
||||
}
|
||||
)
|
||||
else:
|
||||
agent.bad_action=True
|
||||
|
||||
|
||||
self.builds.append(build)
|
||||
|
||||
def generate_observations(self):
|
||||
"""
|
||||
See base_component.py for detailed description.
|
||||
|
||||
Here, agents observe their build skill. The planner does not observe anything
|
||||
from this component.
|
||||
"""
|
||||
|
||||
obs_dict = dict()
|
||||
for agent in self.world.agents:
|
||||
if agent.name in self.agent_subclasses:
|
||||
obs_dict[agent.idx]={}
|
||||
|
||||
for k in self.commodities:
|
||||
obs_dict[agent.idx]["craft_skill_{}".format(k.name)] = agent.state["craft_skill"][k.name]
|
||||
|
||||
|
||||
return obs_dict
|
||||
|
||||
def generate_masks(self, completions=0):
|
||||
"""
|
||||
See base_component.py for detailed description.
|
||||
|
||||
Prevent building only if a landmark already occupies the agent's location.
|
||||
"""
|
||||
|
||||
masks = {}
|
||||
# Mobile agents' build action is masked if they cannot build with their
|
||||
# current location and/or endowment
|
||||
for agent in self.world.agents:
|
||||
if agent.name in self.agent_subclasses:
|
||||
masks[agent.idx] = np.array([self.agent_can_build(agent,k.name) for k in self.commodities])
|
||||
|
||||
return masks
|
||||
|
||||
# For non-required customization
|
||||
# ------------------------------
|
||||
|
||||
def get_metrics(self):
|
||||
"""
|
||||
Metrics that capture what happened through this component.
|
||||
|
||||
Returns:
|
||||
metrics (dict): A dictionary of {"metric_name": metric_value},
|
||||
where metric_value is a scalar.
|
||||
"""
|
||||
world = self.world
|
||||
"""
|
||||
build_stats = {a.idx: {"n_builds": 0} for a in world.agents}
|
||||
for builds in self.builds:
|
||||
for build in builds:
|
||||
idx = build["builder"]
|
||||
build_stats[idx]["n_builds"] += 1
|
||||
|
||||
out_dict = {}
|
||||
for a in world.agents:
|
||||
for k, v in build_stats[a.idx].items():
|
||||
out_dict["{}/{}".format(a.idx, k)] = v
|
||||
|
||||
num_houses = np.sum(world.maps.get("House") > 0)
|
||||
out_dict["total_builds"] = num_houses
|
||||
"""
|
||||
return {}
|
||||
|
||||
def additional_reset_steps(self):
|
||||
"""
|
||||
See base_component.py for detailed description.
|
||||
|
||||
Re-sample agents' building skills.
|
||||
"""
|
||||
world = self.world
|
||||
|
||||
|
||||
MSAB= self.max_skill_amount_benefit
|
||||
MSLB= self.max_skill_labour_benefit
|
||||
|
||||
|
||||
|
||||
for agent in world.agents:
|
||||
if (agent.name not in self.agent_subclasses) | agent.is_setup:
|
||||
continue
|
||||
agent.state["craft_skill"]={}
|
||||
agent.state["craft_labour"]={}
|
||||
agent.state["craft_amount"]={}
|
||||
|
||||
for comm in self.commodities:
|
||||
if self.skill_dist == "none":
|
||||
sampled_skill = 1
|
||||
amount= 1
|
||||
labour = 1
|
||||
elif self.skill_dist == "pareto":
|
||||
labour = 1
|
||||
sampled_skill = np.random.pareto(2)
|
||||
|
||||
amount = 1+np.minimum(MSAB,(MSAB-1) * (sampled_skill) )
|
||||
labour_modifier = 1 - np.minimum(1 - MSLB, (1 - MSLB) * sampled_skill)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
agent.state["craft_skill"][comm.name]=sampled_skill
|
||||
agent.state["craft_labour"][comm.name]=comm.craft_labour_base*labour_modifier
|
||||
agent.state["craft_amount"][comm.name]=amount
|
||||
|
||||
|
||||
self.builds = []
|
||||
|
||||
def get_dense_log(self):
|
||||
"""
|
||||
Log builds.
|
||||
|
||||
Returns:
|
||||
builds (list): A list of build events. Each entry corresponds to a single
|
||||
timestep and contains a description of any builds that occurred on
|
||||
that timestep.
|
||||
|
||||
"""
|
||||
return self.builds
|
||||
221
components/external_market.py
Normal file
221
components/external_market.py
Normal file
@@ -0,0 +1,221 @@
|
||||
# Copyright (c) 2020, salesforce.com, inc.
|
||||
# All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
# For full license text, see the LICENSE file in the repo root
|
||||
# or https://opensource.org/licenses/BSD-3-Clause
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ai_economist.foundation.base.base_component import (
|
||||
BaseComponent,
|
||||
component_registry,
|
||||
)
|
||||
|
||||
|
||||
@component_registry.add
|
||||
class ExternalMarket(BaseComponent):
|
||||
"""
|
||||
Allows mobile agents to build house landmarks in the world using stone and wood,
|
||||
earning income.
|
||||
|
||||
Can be configured to include heterogeneous building skill where agents earn
|
||||
different levels of income when building.
|
||||
|
||||
Args:
|
||||
payment (int): Default amount of coin agents earn from building.
|
||||
Must be >= 0. Default is 10.
|
||||
market_demand (dict): Resource name -> amout of money
|
||||
skill_dist (str): Distribution type for sampling skills. Default ("none")
|
||||
gives all agents identical skill equal to a multiplier of 1. "pareto" and
|
||||
"lognormal" sample skills from the associated distributions.
|
||||
build_labor (float): Labor cost associated with building a house.
|
||||
Must be >= 0. Default is 10.
|
||||
"""
|
||||
|
||||
name = "ExternalMarket"
|
||||
component_type = "Trade"
|
||||
required_entities = ["Coin", "Labor"]
|
||||
agent_subclasses = ["TradingAgent"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*base_component_args,
|
||||
market_demand={},
|
||||
trade_labor=1.0,
|
||||
**base_component_kwargs
|
||||
):
|
||||
super().__init__(*base_component_args, **base_component_kwargs)
|
||||
|
||||
self.market_demand = market_demand
|
||||
self.action_res_map={}
|
||||
for k in market_demand.keys():
|
||||
self.action_res_map[len(self.action_res_map)+1]=k
|
||||
|
||||
self.trade_labor = float(trade_labor)
|
||||
assert self.trade_labor >= 0
|
||||
|
||||
self.builds = []
|
||||
|
||||
def agent_can_sell(self, agent,res):
|
||||
"""Return True if agent can sell a res."""
|
||||
# See if the agent has the resources necessary to complete the action
|
||||
|
||||
if agent.state["inventory"][res]>= 1:
|
||||
return True
|
||||
return False
|
||||
|
||||
# Required methods for implementing components
|
||||
# --------------------------------------------
|
||||
|
||||
def get_n_actions(self, agent_cls_name):
|
||||
"""
|
||||
See base_component.py for detailed description.
|
||||
|
||||
Add a single action (build) for mobile agents.
|
||||
"""
|
||||
# This component adds 1 action that mobile agents can take: build a house
|
||||
if agent_cls_name in self.agent_subclasses:
|
||||
return len(self.action_res_map)
|
||||
|
||||
return None
|
||||
|
||||
def get_additional_state_fields(self, agent_cls_name):
|
||||
"""
|
||||
See base_component.py for detailed description.
|
||||
|
||||
For mobile agents, add state fields for building skill.
|
||||
"""
|
||||
|
||||
return {}
|
||||
|
||||
def component_step(self):
|
||||
"""
|
||||
See base_component.py for detailed description.
|
||||
|
||||
Convert stone+wood to house+coin for agents that choose to build and can.
|
||||
"""
|
||||
world = self.world
|
||||
build = []
|
||||
# Apply any building actions taken by the mobile agents
|
||||
for agent in world.get_random_order_agents():
|
||||
|
||||
action = agent.get_component_action(self.name)
|
||||
|
||||
# This component doesn't apply to this agent!
|
||||
if action is None:
|
||||
continue
|
||||
|
||||
# NO-OP!
|
||||
if action == 0:
|
||||
continue
|
||||
|
||||
res_name=self.action_res_map[action]
|
||||
# Build! (If you can.)
|
||||
|
||||
if self.agent_can_sell(agent,res_name):
|
||||
# Remove the resources
|
||||
agent.state["inventory"][res_name] -= 1
|
||||
|
||||
# Receive payment for the house
|
||||
agent.state["inventory"]["Coin"] += self.market_demand[res_name]
|
||||
|
||||
# Incur the labor cost for building
|
||||
agent.state["endogenous"]["Labor"] += self.trade_labor
|
||||
|
||||
build.append(
|
||||
{
|
||||
"seller": agent.idx,
|
||||
"commodity": res_name,
|
||||
"income": self.market_demand[res_name],
|
||||
}
|
||||
)
|
||||
else:
|
||||
raise ValueError
|
||||
|
||||
self.builds.append(build)
|
||||
|
||||
def generate_observations(self):
|
||||
"""
|
||||
See base_component.py for detailed description.
|
||||
|
||||
Here, agents observe their build skill. The planner does not observe anything
|
||||
from this component.
|
||||
"""
|
||||
|
||||
obs_dict = dict()
|
||||
for agent in self.world.agents:
|
||||
if agent.name in self.agent_subclasses:
|
||||
|
||||
obs_dict[agent.idx] = {}
|
||||
for res_name,coin in self.market_demand.items():
|
||||
obs_dict[agent.idx]["external_{}_price".format(res_name)]: self.inv_scale*coin
|
||||
|
||||
return obs_dict
|
||||
|
||||
def generate_masks(self, completions=0):
|
||||
"""
|
||||
See base_component.py for detailed description.
|
||||
|
||||
Prevent building only if a landmark already occupies the agent's location.
|
||||
"""
|
||||
|
||||
masks = {}
|
||||
# Mobile agents' build action is masked if they cannot build with their
|
||||
# current location and/or endowment
|
||||
for agent in self.world.agents:
|
||||
if agent.name in self.agent_subclasses:
|
||||
mask=[]
|
||||
for res in self.market_demand:
|
||||
mask.append(self.agent_can_sell(agent,res))
|
||||
masks[agent.idx] = mask
|
||||
|
||||
return masks
|
||||
|
||||
# For non-required customization
|
||||
# ------------------------------
|
||||
|
||||
def get_metrics(self):
|
||||
"""
|
||||
Metrics that capture what happened through this component.
|
||||
|
||||
Returns:
|
||||
metrics (dict): A dictionary of {"metric_name": metric_value},
|
||||
where metric_value is a scalar.
|
||||
"""
|
||||
world = self.world
|
||||
"""
|
||||
build_stats = {a.idx: {"n_builds": 0} for a in world.agents}
|
||||
for builds in self.builds:
|
||||
for build in builds:
|
||||
idx = build["builder"]
|
||||
build_stats[idx]["n_builds"] += 1
|
||||
|
||||
out_dict = {}
|
||||
for a in world.agents:
|
||||
for k, v in build_stats[a.idx].items():
|
||||
out_dict["{}/{}".format(a.idx, k)] = v
|
||||
|
||||
num_houses = np.sum(world.maps.get("House") > 0)
|
||||
out_dict["total_builds"] = num_houses
|
||||
"""
|
||||
return {}
|
||||
|
||||
def additional_reset_steps(self):
|
||||
"""
|
||||
See base_component.py for detailed description.
|
||||
|
||||
Re-sample agents' building skills.
|
||||
"""
|
||||
self.builds = []
|
||||
|
||||
def get_dense_log(self):
|
||||
"""
|
||||
Log builds.
|
||||
|
||||
Returns:
|
||||
builds (list): A list of build events. Each entry corresponds to a single
|
||||
timestep and contains a description of any builds that occurred on
|
||||
that timestep.
|
||||
|
||||
"""
|
||||
return self.builds
|
||||
@@ -44,7 +44,7 @@ class SimpleCraft(BaseComponent):
|
||||
payment=10,
|
||||
payment_max_skill_multiplier=1,
|
||||
skill_dist="none",
|
||||
build_labor=10.0,
|
||||
build_labor=1.0,
|
||||
**base_component_kwargs
|
||||
):
|
||||
super().__init__(*base_component_args, **base_component_kwargs)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
|
||||
from . import (
|
||||
simple_market,
|
||||
econ_wrapper
|
||||
econ_wrapper,
|
||||
econ
|
||||
)
|
||||
|
||||
482
envs/econ.py
Normal file
482
envs/econ.py
Normal file
@@ -0,0 +1,482 @@
|
||||
# Copyright (c) 2020, salesforce.com, inc.
|
||||
# All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
# For full license text, see the LICENSE file in the repo root
|
||||
# or https://opensource.org/licenses/BSD-3-Clause
|
||||
|
||||
from copy import deepcopy
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from scipy import signal
|
||||
|
||||
from ai_economist.foundation.base.base_env import BaseEnvironment, scenario_registry
|
||||
from ai_economist.foundation.scenarios.utils import rewards, social_metrics
|
||||
import yaml
|
||||
|
||||
|
||||
@scenario_registry.add
|
||||
class Econ(BaseEnvironment):
|
||||
"""
|
||||
World containing stone and wood with stochastic regeneration. Refers to a fixed
|
||||
layout file (see ./map_txt/ for examples) to determine the spatial arrangement of
|
||||
stone, wood, and water tiles.
|
||||
|
||||
Args:
|
||||
action_against_mask_penelty=-1 (int): Reward penelty for performing action against mask
|
||||
full_observability (bool): Whether the mobile agents' spatial observation
|
||||
includes the full world view or is instead an egocentric view.
|
||||
mobile_agent_observation_range (int): If not using full_observability,
|
||||
the spatial range (on each side of the agent) that is visible in the
|
||||
spatial observations.
|
||||
env_layout_file (str): Name of the layout file in ./map_txt/ to use.
|
||||
Note: The world dimensions of that layout must match the world dimensions
|
||||
argument used to construct the environment.
|
||||
resource_regen_prob (float): Probability that an empty source tile will
|
||||
regenerate a new resource unit.
|
||||
fixed_four_skill_and_loc (bool): Whether to use a fixed set of build skills and
|
||||
starting locations, with agents grouped into starting locations based on
|
||||
which skill quartile they are in. False, by default.
|
||||
True, for experiments in https://arxiv.org/abs/2004.13332.
|
||||
Note: Requires that the environment uses the "Build" component with
|
||||
skill_dist="pareto".
|
||||
starting_agent_coin (int, float): Amount of coin agents have at t=0. Defaults
|
||||
to zero coin.
|
||||
isoelastic_eta (float): Parameter controlling the shape of agent utility
|
||||
wrt coin endowment.
|
||||
energy_cost (float): Coefficient for converting labor to negative utility.
|
||||
energy_warmup_constant (float): Decay constant that controls the rate at which
|
||||
the effective energy cost is annealed from 0 to energy_cost. Set to 0
|
||||
(default) to disable annealing, meaning that the effective energy cost is
|
||||
always energy_cost. The units of the decay constant depend on the choice of
|
||||
energy_warmup_method.
|
||||
energy_warmup_method (str): How to schedule energy annealing (warmup). If
|
||||
"decay" (default), use the number of completed episodes. If "auto",
|
||||
use the number of timesteps where the average agent reward was positive.
|
||||
planner_reward_type (str): The type of reward used for the planner. Options
|
||||
are "coin_eq_times_productivity" (default),
|
||||
"inv_income_weighted_coin_endowment", and "inv_income_weighted_utility".
|
||||
mixing_weight_gini_vs_coin (float): Degree to which equality is ignored w/
|
||||
"coin_eq_times_productivity". Default is 0, which weights equality and
|
||||
productivity equally. If set to 1, only productivity is rewarded.
|
||||
"""
|
||||
|
||||
name = "econ"
|
||||
agent_subclasses = ["BasicMobileAgent"]
|
||||
required_entities = ["Wood", "Stone", "Water","Gem_Raw","Gem"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*base_env_args,
|
||||
resource_regen_prob=0.01,
|
||||
fixed_four_skill_and_loc=False,
|
||||
starting_agent_coin=0,
|
||||
isoelastic_eta=0.23,
|
||||
energy_cost=0.21,
|
||||
energy_warmup_constant=0,
|
||||
energy_warmup_method="decay",
|
||||
planner_reward_type="coin_eq_times_productivity",
|
||||
mixing_weight_gini_vs_coin=0.0,
|
||||
**base_env_kwargs,
|
||||
):
|
||||
super().__init__(*base_env_args, **base_env_kwargs)
|
||||
|
||||
|
||||
self.layout_specs = dict(
|
||||
Wood={
|
||||
"regen_weight": float(resource_regen_prob),
|
||||
"regen_halfwidth": 0,
|
||||
"max_health": 1,
|
||||
},
|
||||
Stone={
|
||||
"regen_weight": float(resource_regen_prob),
|
||||
"regen_halfwidth": 0,
|
||||
"max_health": 1,
|
||||
},
|
||||
)
|
||||
assert 0 <= self.layout_specs["Wood"]["regen_weight"] <= 1
|
||||
assert 0 <= self.layout_specs["Stone"]["regen_weight"] <= 1
|
||||
|
||||
# How much coin do agents begin with at upon reset
|
||||
self.starting_agent_coin = float(starting_agent_coin)
|
||||
assert self.starting_agent_coin >= 0.0
|
||||
|
||||
# Controls the diminishing marginal utility of coin.
|
||||
# isoelastic_eta=0 means no diminishing utility.
|
||||
self.isoelastic_eta = float(isoelastic_eta)
|
||||
assert 0.0 <= self.isoelastic_eta <= 1.0
|
||||
|
||||
# The amount that labor is weighted in utility computation
|
||||
# (once annealing is finished)
|
||||
self.energy_cost = float(energy_cost)
|
||||
assert self.energy_cost >= 0
|
||||
|
||||
# Which method to use for calculating the progress of energy annealing
|
||||
# If method = 'decay': #completed episodes
|
||||
# If method = 'auto' : #timesteps where avg. agent reward > 0
|
||||
self.energy_warmup_method = energy_warmup_method.lower()
|
||||
assert self.energy_warmup_method in ["decay", "auto"]
|
||||
# Decay constant for annealing to full energy cost
|
||||
# (if energy_warmup_constant == 0, there is no annealing)
|
||||
self.energy_warmup_constant = float(energy_warmup_constant)
|
||||
assert self.energy_warmup_constant >= 0
|
||||
self._auto_warmup_integrator = 0
|
||||
|
||||
# Which social welfare function to use
|
||||
self.planner_reward_type = str(planner_reward_type).lower()
|
||||
|
||||
# How much to weight equality if using SWF=eq*prod:
|
||||
# 0 -> SWF=eq * prod
|
||||
# 1 -> SWF=prod
|
||||
self.mixing_weight_gini_vs_coin = float(mixing_weight_gini_vs_coin)
|
||||
assert 0 <= self.mixing_weight_gini_vs_coin <= 1.0
|
||||
|
||||
# Use this to calculate marginal changes and deliver that as reward
|
||||
self.init_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
|
||||
self.prev_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
|
||||
self.curr_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
|
||||
|
||||
"""
|
||||
Fixed Four Skill and Loc
|
||||
------------------------
|
||||
"""
|
||||
self.agent_starting_pos = {agent.idx: [] for agent in self.world.agents}
|
||||
|
||||
self._persist_between_resets=False
|
||||
|
||||
|
||||
self.last_log_loged={}
|
||||
|
||||
|
||||
@property
|
||||
def energy_weight(self):
|
||||
"""
|
||||
Energy annealing progress. Multiply with self.energy_cost to get the
|
||||
effective energy coefficient.
|
||||
"""
|
||||
if self.energy_warmup_constant <= 0.0:
|
||||
return 1.0
|
||||
|
||||
if self.energy_warmup_method == "decay":
|
||||
return float(1.0 - np.exp(-self._completions / self.energy_warmup_constant))
|
||||
|
||||
if self.energy_warmup_method == "auto":
|
||||
return float(
|
||||
1.0
|
||||
- np.exp(-self._auto_warmup_integrator / self.energy_warmup_constant)
|
||||
)
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
def is_bad_action(self,agent):
|
||||
bad=agent.bad_action
|
||||
agent.bad_action=False
|
||||
return bad
|
||||
|
||||
def get_current_optimization_metrics(self):
|
||||
"""
|
||||
Compute optimization metrics based on the current state. Used to compute reward.
|
||||
|
||||
Returns:
|
||||
curr_optimization_metric (dict): A dictionary of {agent.idx: metric}
|
||||
with an entry for each agent (including the planner) in the env.
|
||||
"""
|
||||
curr_optimization_metric = {}
|
||||
# (for agents)
|
||||
for agent in self.world.agents:
|
||||
|
||||
rew= rewards.isoelastic_coin_minus_labor(
|
||||
coin_endowment=agent.total_endowment("Coin"),
|
||||
total_labor=agent.state["endogenous"]["Labor"],
|
||||
isoelastic_eta=self.isoelastic_eta,
|
||||
labor_coefficient=self.energy_weight * self.energy_cost,
|
||||
)
|
||||
|
||||
|
||||
|
||||
#rew-=agent.state["endogenous"]["noops"]
|
||||
curr_optimization_metric[agent.idx] = rew
|
||||
# (for the planner)
|
||||
if self.planner_reward_type == "coin_eq_times_productivity":
|
||||
curr_optimization_metric[
|
||||
self.world.planner.idx
|
||||
] = rewards.coin_eq_times_productivity(
|
||||
coin_endowments=np.array(
|
||||
[agent.total_endowment("Coin") for agent in self.world.agents]
|
||||
),
|
||||
equality_weight=1 - self.mixing_weight_gini_vs_coin,
|
||||
)
|
||||
elif self.planner_reward_type == "inv_income_weighted_coin_endowments":
|
||||
curr_optimization_metric[
|
||||
self.world.planner.idx
|
||||
] = rewards.inv_income_weighted_coin_endowments(
|
||||
coin_endowments=np.array(
|
||||
[agent.total_endowment("Coin") for agent in self.world.agents]
|
||||
)
|
||||
)
|
||||
elif self.planner_reward_type == "inv_income_weighted_utility":
|
||||
curr_optimization_metric[
|
||||
self.world.planner.idx
|
||||
] = rewards.inv_income_weighted_utility(
|
||||
coin_endowments=np.array(
|
||||
[agent.total_endowment("Coin") for agent in self.world.agents]
|
||||
),
|
||||
utilities=np.array(
|
||||
[curr_optimization_metric[agent.idx] for agent in self.world.agents]
|
||||
),
|
||||
)
|
||||
else:
|
||||
print("No valid planner reward selected!")
|
||||
raise NotImplementedError
|
||||
return curr_optimization_metric
|
||||
|
||||
# The following methods must be implemented for each scenario
|
||||
# -----------------------------------------------------------
|
||||
|
||||
def reset_starting_layout(self):
|
||||
"""
|
||||
Part 1/2 of scenario reset. This method handles resetting the state of the
|
||||
environment managed by the scenario (i.e. resource & landmark layout).
|
||||
|
||||
Here, reset to the layout in the fixed layout file
|
||||
"""
|
||||
|
||||
if self._persist_between_resets: # if we only want to modify some values and not accualy reset
|
||||
return
|
||||
|
||||
self.world.maps.clear()
|
||||
|
||||
resources = ["Wood", "Stone","Gem_Raw"]
|
||||
|
||||
for resource in resources:
|
||||
self.world.maps.set_point_add(resource,0,0,1)
|
||||
|
||||
def reset_agent_states(self):
|
||||
"""
|
||||
Part 2/2 of scenario reset. This method handles resetting the state of the
|
||||
agents themselves (i.e. inventory, locations, etc.).
|
||||
|
||||
Here, empty inventories and place mobile agents in random, accessible
|
||||
locations to start. Note: If using fixed_four_skill_and_loc, the starting
|
||||
locations will be overridden in self.additional_reset_steps.
|
||||
"""
|
||||
if not self._persist_between_resets:
|
||||
self.world.clear_agent_locs()
|
||||
|
||||
for agent in self.world.agents:
|
||||
if not self._persist_between_resets:
|
||||
agent.set_setup(False) # resets agent states
|
||||
if not agent.is_setup: # agent has not been setup for scenario
|
||||
agent.state["inventory"] = {k: 0 for k in agent.inventory.keys()}
|
||||
agent.state["escrow"] = {k: 0 for k in agent.inventory.keys()}
|
||||
agent.state["endogenous"] = {k: 0 for k in agent.endogenous.keys()}
|
||||
# Add starting coin
|
||||
agent.state["inventory"]["Coin"] = float(self.starting_agent_coin)
|
||||
agent.bad_action=False
|
||||
|
||||
self.world.planner.state["inventory"] = {
|
||||
k: 0 for k in self.world.planner.inventory.keys()
|
||||
}
|
||||
self.world.planner.state["escrow"] = {
|
||||
k: 0 for k in self.world.planner.escrow.keys()
|
||||
}
|
||||
|
||||
|
||||
def scenario_step(self):
|
||||
"""
|
||||
Update the state of the world according to whatever rules this scenario
|
||||
implements.
|
||||
|
||||
This gets called in the 'step' method (of base_env) after going through each
|
||||
component step and before generating observations, rewards, etc.
|
||||
|
||||
In this class of scenarios, the scenario step handles stochastic resource
|
||||
regeneration.
|
||||
"""
|
||||
|
||||
resources = ["Wood", "Stone", "Gem_Raw"]
|
||||
|
||||
for resource in resources:
|
||||
self.world.maps.set_point_add(resource,0,0,20)
|
||||
|
||||
|
||||
def generate_observations(self):
|
||||
"""
|
||||
Generate observations associated with this scenario.
|
||||
|
||||
A scenario does not need to produce observations and can provide observations
|
||||
for only some agent types; however, for a given agent type, it should either
|
||||
always or never yield an observation. If it does yield an observation,
|
||||
that observation should always have the same structure/sizes!
|
||||
|
||||
Returns:
|
||||
obs (dict): A dictionary of {agent.idx: agent_obs_dict}. In words,
|
||||
return a dictionary with an entry for each agent (which can including
|
||||
the planner) for which this scenario provides an observation. For each
|
||||
entry, the key specifies the index of the agent and the value contains
|
||||
its associated observation dictionary.
|
||||
|
||||
Here, non-planner agents receive spatial observations (depending on the env
|
||||
config) as well as the contents of their inventory and endogenous quantities.
|
||||
The planner also receives spatial observations (again, depending on the env
|
||||
config) as well as the inventory of each of the mobile agents.
|
||||
"""
|
||||
obs = {}
|
||||
|
||||
|
||||
|
||||
agent_invs = {
|
||||
str(agent.idx): {
|
||||
"inventory-" + k: v * self.inv_scale for k, v in agent.inventory.items()
|
||||
}
|
||||
for agent in self.world.agents
|
||||
}
|
||||
|
||||
obs[self.world.planner.idx] = {
|
||||
"inventory-" + k: v * self.inv_scale
|
||||
for k, v in self.world.planner.inventory.items()
|
||||
}
|
||||
|
||||
|
||||
for agent in self.world.agents:
|
||||
sidx = str(agent.idx)
|
||||
obs[sidx]=agent_invs[sidx]
|
||||
|
||||
|
||||
|
||||
|
||||
return obs
|
||||
|
||||
def compute_reward(self):
|
||||
"""
|
||||
Apply the reward function(s) associated with this scenario to get the rewards
|
||||
from this step.
|
||||
|
||||
Returns:
|
||||
rew (dict): A dictionary of {agent.idx: agent_obs_dict}. In words,
|
||||
return a dictionary with an entry for each agent in the environment
|
||||
(including the planner). For each entry, the key specifies the index of
|
||||
the agent and the value contains the scalar reward earned this timestep.
|
||||
|
||||
Rewards are computed as the marginal utility (agents) or marginal social
|
||||
welfare (planner) experienced on this timestep. Ignoring discounting,
|
||||
this means that agents' (planner's) objective is to maximize the utility
|
||||
(social welfare) associated with the terminal state of the episode.
|
||||
"""
|
||||
|
||||
# "curr_optimization_metric" hasn't been updated yet, so it gives us the
|
||||
# utility from the last step.
|
||||
utility_at_end_of_last_time_step = deepcopy(self.curr_optimization_metric)
|
||||
|
||||
# compute current objectives and store the values
|
||||
self.curr_optimization_metric = self.get_current_optimization_metrics()
|
||||
|
||||
# reward = curr - prev objectives
|
||||
rew={}
|
||||
for k, v in self.curr_optimization_metric.items():
|
||||
rew[k] = float(v - utility_at_end_of_last_time_step[k])
|
||||
if k!="p":
|
||||
if self.is_bad_action(self.world.agents[k]):
|
||||
rew[k]-=1
|
||||
|
||||
# store the previous objective values
|
||||
self.prev_optimization_metric.update(utility_at_end_of_last_time_step)
|
||||
|
||||
# Automatic Energy Cost Annealing
|
||||
# -------------------------------
|
||||
avg_agent_rew = np.mean([rew[a.idx] for a in self.world.agents])
|
||||
# Count the number of timesteps where the avg agent reward was > 0
|
||||
if avg_agent_rew > 0:
|
||||
self._auto_warmup_integrator += 1
|
||||
|
||||
return rew
|
||||
|
||||
# Optional methods for customization
|
||||
# ----------------------------------
|
||||
|
||||
def additional_reset_steps(self):
|
||||
"""
|
||||
Extra scenario-specific steps that should be performed at the end of the reset
|
||||
cycle.
|
||||
|
||||
For each reset cycle...
|
||||
First, reset_starting_layout() and reset_agent_states() will be called.
|
||||
|
||||
Second, <component>.reset() will be called for each registered component.
|
||||
|
||||
Lastly, this method will be called to allow for any final customization of
|
||||
the reset cycle.
|
||||
|
||||
For this scenario, this method resets optimization metric trackers. If using
|
||||
fixed_four_skill_and_loc, this is where each agent gets assigned to one of
|
||||
the four fixed skill/loc combinations. The agent-->skill/loc assignment is
|
||||
permuted so that all four skill/loc combinations are used.
|
||||
"""
|
||||
|
||||
|
||||
# compute current objectives
|
||||
curr_optimization_metric = self.get_current_optimization_metrics()
|
||||
|
||||
self.curr_optimization_metric = deepcopy(curr_optimization_metric)
|
||||
self.init_optimization_metric = deepcopy(curr_optimization_metric)
|
||||
self.prev_optimization_metric = deepcopy(curr_optimization_metric)
|
||||
|
||||
|
||||
|
||||
def scenario_metrics(self):
|
||||
"""
|
||||
Allows the scenario to generate metrics (collected along with component metrics
|
||||
in the 'metrics' property).
|
||||
|
||||
To have the scenario add metrics, this function needs to return a dictionary of
|
||||
{metric_key: value} where 'value' is a scalar (no nesting or lists!)
|
||||
|
||||
Here, summarize social metrics, endowments, utilities, and labor cost annealing.
|
||||
"""
|
||||
metrics = dict()
|
||||
|
||||
coin_endowments = np.array(
|
||||
[agent.total_endowment("Coin") for agent in self.world.agents]
|
||||
)
|
||||
metrics["social/productivity"] = social_metrics.get_productivity(
|
||||
coin_endowments
|
||||
)
|
||||
metrics["social/equality"] = social_metrics.get_equality(coin_endowments)
|
||||
|
||||
utilities = np.array(
|
||||
[self.curr_optimization_metric[agent.idx] for agent in self.world.agents]
|
||||
)
|
||||
metrics[
|
||||
"social_welfare/coin_eq_times_productivity"
|
||||
] = rewards.coin_eq_times_productivity(
|
||||
coin_endowments=coin_endowments, equality_weight=1.0
|
||||
)
|
||||
metrics[
|
||||
"social_welfare/inv_income_weighted_coin_endow"
|
||||
] = rewards.inv_income_weighted_coin_endowments(coin_endowments=coin_endowments)
|
||||
metrics[
|
||||
"social_welfare/inv_income_weighted_utility"
|
||||
] = rewards.inv_income_weighted_utility(
|
||||
coin_endowments=coin_endowments, utilities=utilities
|
||||
)
|
||||
|
||||
for agent in self.all_agents:
|
||||
for resource, quantity in agent.inventory.items():
|
||||
metrics[
|
||||
"endow/{}/{}".format(agent.idx, resource)
|
||||
] = agent.total_endowment(resource)
|
||||
|
||||
if agent.endogenous is not None:
|
||||
for resource, quantity in agent.endogenous.items():
|
||||
metrics["endogenous/{}/{}".format(agent.idx, resource)] = quantity
|
||||
|
||||
metrics["util/{}".format(agent.idx)] = self.curr_optimization_metric[
|
||||
agent.idx
|
||||
]
|
||||
|
||||
# Labor weight
|
||||
metrics["labor/weighted_cost"] = self.energy_cost * self.energy_weight
|
||||
metrics["labor/warmup_integrator"] = int(self._auto_warmup_integrator)
|
||||
|
||||
return metrics
|
||||
|
||||
341
main working good econ trader univer.pys
Normal file
341
main working good econ trader univer.pys
Normal file
@@ -0,0 +1,341 @@
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ai_economist import foundation
|
||||
from stable_baselines3.common.vec_env import vec_frame_stack
|
||||
from stable_baselines3.common.evaluation import evaluate_policy
|
||||
from sb3_contrib.ppo_mask import MaskablePPO
|
||||
import envs
|
||||
import wrapper
|
||||
import resources
|
||||
from agents import trading_agent
|
||||
from wrapper.base_econ_wrapper import BaseEconWrapper
|
||||
from wrapper.reciever_econ_wrapper import RecieverEconWrapper
|
||||
from wrapper.sb3_econ_converter import SB3EconConverter
|
||||
from tqdm import tqdm
|
||||
import components
|
||||
from stable_baselines3.common.env_checker import check_env
|
||||
from stable_baselines3 import PPO
|
||||
from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
|
||||
from stable_baselines3.common.vec_env.vec_normalize import VecNormalize
|
||||
from sb3_contrib import RecurrentPPO
|
||||
from envs.econ_wrapper import EconVecEnv
|
||||
from stable_baselines3.common.callbacks import BaseCallback
|
||||
import yaml
|
||||
import time
|
||||
from threading import Thread
|
||||
|
||||
env_config = {
|
||||
# ===== SCENARIO CLASS =====
|
||||
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
|
||||
# The environment object will be an instance of the Scenario class.
|
||||
'scenario_name': 'econ',
|
||||
|
||||
# ===== COMPONENTS =====
|
||||
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
|
||||
# "component_name" refers to the Component class's name in the Component Registry (foundation.components)
|
||||
# {component_kwargs} is a dictionary of kwargs passed to the Component class
|
||||
# The order in which components reset, step, and generate obs follows their listed order below.
|
||||
'components': [
|
||||
# (1) Building houses
|
||||
('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}),
|
||||
# (2) Trading collectible resources
|
||||
('ContinuousDoubleAuction', {'max_num_orders': 10}),
|
||||
# (3) Movement and resource collection
|
||||
('SimpleGather', {}),
|
||||
('ExternalMarket',{'market_demand':{
|
||||
'Gem': 15
|
||||
}}),
|
||||
],
|
||||
|
||||
# ===== SCENARIO CLASS ARGUMENTS =====
|
||||
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
|
||||
|
||||
'starting_agent_coin': 10,
|
||||
'fixed_four_skill_and_loc': True,
|
||||
|
||||
# ===== STANDARD ARGUMENTS ======
|
||||
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
|
||||
'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5}, # Number of non-planner agents (must be > 1)
|
||||
'world_size': [5, 5], # [Height, Width] of the env world
|
||||
'episode_length': 256, # Number of timesteps per episode
|
||||
'allow_observation_scaling': True,
|
||||
'dense_log_frequency': 100,
|
||||
'world_dense_log_frequency':1,
|
||||
'energy_cost':0,
|
||||
'energy_warmup_method': "auto",
|
||||
'energy_warmup_constant': 4000,
|
||||
|
||||
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
|
||||
# Otherwise, the policy selects only 1 action.
|
||||
'multi_action_mode_agents': False,
|
||||
'multi_action_mode_planner': False,
|
||||
|
||||
# When flattening observations, concatenate scalar & vector observations before output.
|
||||
# Otherwise, return observations with minimal processing.
|
||||
'flatten_observations': False,
|
||||
# When Flattening masks, concatenate each action subspace mask into a single array.
|
||||
# Note: flatten_masks = True is required for masking action logits in the code below.
|
||||
'flatten_masks': True,
|
||||
}
|
||||
|
||||
|
||||
eval_env_config = {
|
||||
# ===== SCENARIO CLASS =====
|
||||
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
|
||||
# The environment object will be an instance of the Scenario class.
|
||||
'scenario_name': 'econ',
|
||||
|
||||
# ===== COMPONENTS =====
|
||||
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
|
||||
# "component_name" refers to the Component class's name in the Component Registry (foundation.components)
|
||||
# {component_kwargs} is a dictionary of kwargs passed to the Component class
|
||||
# The order in which components reset, step, and generate obs follows their listed order below.
|
||||
'components': [
|
||||
# (1) Building houses
|
||||
('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}),
|
||||
# (2) Trading collectible resources
|
||||
('ContinuousDoubleAuction', {'max_num_orders': 10}),
|
||||
# (3) Movement and resource collection
|
||||
('SimpleGather', {}),
|
||||
('ExternalMarket',{'market_demand':{
|
||||
'Gem': 15
|
||||
}}),
|
||||
],
|
||||
|
||||
# ===== SCENARIO CLASS ARGUMENTS =====
|
||||
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
|
||||
|
||||
'starting_agent_coin': 10,
|
||||
'fixed_four_skill_and_loc': True,
|
||||
|
||||
# ===== STANDARD ARGUMENTS ======
|
||||
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
|
||||
'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5}, # Number of non-planner agents (must be > 1)
|
||||
'world_size': [1, 1], # [Height, Width] of the env world
|
||||
'episode_length': 256, # Number of timesteps per episode
|
||||
'allow_observation_scaling': True,
|
||||
'dense_log_frequency': 1,
|
||||
'world_dense_log_frequency':1,
|
||||
'energy_cost':0,
|
||||
'energy_warmup_method': "auto",
|
||||
'energy_warmup_constant': 4000,
|
||||
|
||||
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
|
||||
# Otherwise, the policy selects only 1 action.
|
||||
'multi_action_mode_agents': False,
|
||||
'multi_action_mode_planner': False,
|
||||
|
||||
# When flattening observations, concatenate scalar & vector observations before output.
|
||||
# Otherwise, return observations with minimal processing.
|
||||
'flatten_observations': False,
|
||||
# When Flattening masks, concatenate each action subspace mask into a single array.
|
||||
# Note: flatten_masks = True is required for masking action logits in the code below.
|
||||
'flatten_masks': True,
|
||||
}
|
||||
|
||||
num_frames=5
|
||||
|
||||
class TensorboardCallback(BaseCallback):
|
||||
"""
|
||||
Custom callback for plotting additional values in tensorboard.
|
||||
"""
|
||||
|
||||
def __init__(self,econ, verbose=0):
|
||||
super().__init__(verbose)
|
||||
self.econ=econ
|
||||
self.metrics=econ.scenario_metrics()
|
||||
def _on_step(self) -> bool:
|
||||
# Log scalar value (here a random variable)
|
||||
prev_metrics=self.metrics
|
||||
if self.econ.previous_episode_metrics is None:
|
||||
self.metrics=self.econ.scenario_metrics()
|
||||
else:
|
||||
self.metrics=self.econ.previous_episode_metrics
|
||||
curr_prod=self.metrics["social/productivity"]
|
||||
trend_pord=curr_prod-prev_metrics["social/productivity"]
|
||||
self.logger.record("social/total_productivity", curr_prod)
|
||||
self.logger.record("social/delta_productivity", trend_pord)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def printMarket(market):
|
||||
for i in range(len(market)):
|
||||
step=market[i]
|
||||
if len(step)>0:
|
||||
print("=== Step {} ===".format(i))
|
||||
for transaction in step:
|
||||
t=transaction
|
||||
transstring = "({}) {} -> {} | [{}/{}] {} Coins\n".format(t["commodity"],t["seller"],t["buyer"],t["ask"],t["bid"],t["price"])
|
||||
print(transstring)
|
||||
return ""
|
||||
|
||||
def printBuilds(builds):
|
||||
for i in range(len(builds)):
|
||||
step=builds[i]
|
||||
if len(step)>0:
|
||||
for build in step:
|
||||
t=build
|
||||
transstring = "({}) Builder: {}, Skill: {}, Income {} ".format(i,t["builder"],t["build_skill"],t["income"])
|
||||
print(transstring)
|
||||
return ""
|
||||
def printReplay(econ,agentid):
|
||||
worldmaps=["Stone","Wood"]
|
||||
|
||||
log=econ.previous_episode_dense_log
|
||||
agent=econ.world.agents[agentid]
|
||||
|
||||
agentid=str(agentid)
|
||||
maxsetp=len(log["states"])-1
|
||||
|
||||
for step in range(maxsetp):
|
||||
print()
|
||||
print("=== Step {} ===".format(step))
|
||||
# state
|
||||
print("--- World ---")
|
||||
world=log['world'][step]
|
||||
for res in worldmaps:
|
||||
print("{}: {}".format(res,world[res][0][0]))
|
||||
print("--- State ---")
|
||||
state=log['states'][step][agentid]
|
||||
|
||||
print(yaml.safe_dump(state))
|
||||
print("--- Action ---")
|
||||
action=log["actions"][step][agentid]
|
||||
|
||||
|
||||
if action=={}:
|
||||
print("Action: 0 -> NOOP")
|
||||
else:
|
||||
for k in action:
|
||||
formats="Action: {}({})".format(k,action[k])
|
||||
print(formats)
|
||||
print("--- Reward ---")
|
||||
reward=log["rewards"][step][agentid]
|
||||
print("Reward: {}".format(reward))
|
||||
|
||||
#Setup Env Objects
|
||||
econ=foundation.make_env_instance(**env_config)
|
||||
|
||||
market=econ.get_component("ContinuousDoubleAuction")
|
||||
action=market.get_n_actions("TradingAgent")
|
||||
baseEconWrapper=BaseEconWrapper(econ)
|
||||
baseEconWrapper.run()
|
||||
time.sleep(0.5)
|
||||
mobileRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="BasicMobileAgent")
|
||||
tradeRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="TradingAgent")
|
||||
sb3_traderConverter=SB3EconConverter(tradeRecieverEconWrapper,econ,"TradingAgent",True)
|
||||
sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent",True)
|
||||
# attach sb3 wrappers
|
||||
|
||||
monenv=VecMonitor(venv=sb3Converter,info_keywords=["social/productivity","trend/productivity"])
|
||||
montraidingenv=VecMonitor(venv=sb3_traderConverter)
|
||||
|
||||
stackenv_basic=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=num_frames)
|
||||
stackenv_traid=vec_frame_stack.VecFrameStack(venv=montraidingenv,n_stack=num_frames)
|
||||
# Model setup complete
|
||||
|
||||
# Setup Eval Env
|
||||
econ_eval=foundation.make_env_instance(**eval_env_config)
|
||||
|
||||
|
||||
baseEconWrapper_eval=BaseEconWrapper(econ_eval)
|
||||
baseEconWrapper_eval.run()
|
||||
time.sleep(0.5)
|
||||
mobileRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="BasicMobileAgent")
|
||||
tradeRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="TradingAgent")
|
||||
sb3_traderConverter_eval=SB3EconConverter(tradeRecieverEconWrapper_eval,econ_eval,"TradingAgent",False)
|
||||
sb3Converter_eval=SB3EconConverter(mobileRecieverEconWrapper_eval,econ_eval,"BasicMobileAgent",False)
|
||||
# attach sb3 wrappers
|
||||
|
||||
monenv_eval=VecMonitor(venv=sb3Converter_eval,info_keywords=["social/productivity","trend/productivity"])
|
||||
montraidingenv_eval=VecMonitor(venv=sb3_traderConverter_eval)
|
||||
|
||||
stackenv_basic_eval=vec_frame_stack.VecFrameStack(venv=monenv_eval,n_stack=num_frames)
|
||||
stackenv_traid_eval=vec_frame_stack.VecFrameStack(venv=montraidingenv_eval,n_stack=num_frames)
|
||||
|
||||
|
||||
obs=monenv.reset()
|
||||
|
||||
|
||||
# define training functions
|
||||
def train(model,timesteps, econ_call,process_bar,name,db,index):
|
||||
db[index]=model.learn(total_timesteps=timesteps,progress_bar=process_bar,reset_num_timesteps=False,tb_log_name=name,callback=TensorboardCallback(econ_call))
|
||||
|
||||
|
||||
|
||||
# prepare training
|
||||
run_number=int(np.random.rand()*100)
|
||||
runname="run_{}".format(run_number)
|
||||
model_db=[None,None] # object for storing model
|
||||
|
||||
|
||||
model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=1e-5,env=stackenv_basic, seed=300,verbose=1,device="cuda",tensorboard_log="./log")
|
||||
model_trade=MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=1e-5,env=stackenv_traid, seed=300,verbose=1,device="cuda",tensorboard_log="./log")
|
||||
|
||||
n_agents=econ.n_agents
|
||||
|
||||
total_required_for_episode_basic=len(mobileRecieverEconWrapper.agnet_idx)*env_config['episode_length']
|
||||
total_required_for_episode_traid=len(tradeRecieverEconWrapper.agnet_idx)*env_config['episode_length']
|
||||
|
||||
print("this is run {}".format(runname))
|
||||
|
||||
while True:
|
||||
|
||||
|
||||
#Train
|
||||
runname="run_{}_{}".format(run_number,"basic")
|
||||
|
||||
thread_model=Thread(target=train,args=(model,total_required_for_episode_basic*100,econ,True,runname,model_db,0))
|
||||
runname="run_{}_{}".format(run_number,"trader")
|
||||
thread_model_traid=Thread(target=train,args=(model_trade,total_required_for_episode_traid*100,econ,False,runname,model_db,1))
|
||||
|
||||
thread_model.start()
|
||||
thread_model_traid.start()
|
||||
thread_model.join()
|
||||
thread_model_traid.join()
|
||||
#normenv.save("temp-normalizer.ai")
|
||||
|
||||
|
||||
|
||||
## Run Eval
|
||||
print("### EVAL ###")
|
||||
obs_basic=stackenv_basic_eval.reset()
|
||||
obs_trade=stackenv_traid_eval.reset()
|
||||
model=model_db[0]
|
||||
model_trade=model_db[1]
|
||||
done=False
|
||||
for i in tqdm(range(eval_env_config['episode_length'])):
|
||||
#create masks
|
||||
masks_basic=stackenv_basic_eval.action_masks()
|
||||
masks_trade=stackenv_traid_eval.action_masks()
|
||||
# get actions
|
||||
action_basic=model.predict(obs_basic,action_masks=masks_basic)
|
||||
action_trade=model_trade.predict(obs_trade,action_masks=masks_trade)
|
||||
#submit async directly for non blocking operation
|
||||
sb3Converter_eval.step_async(action_basic[0])
|
||||
sb3_traderConverter_eval.step_async(action_trade[0])
|
||||
# retieve full results
|
||||
obs_basic,rew_basic,done_e,info=stackenv_basic_eval.step(action_basic[0])
|
||||
obs_trade,rew_trade,done_e,info=stackenv_traid_eval.step(action_trade[0])
|
||||
done=done_e[0]
|
||||
|
||||
|
||||
|
||||
market=econ_eval.get_component("ContinuousDoubleAuction")
|
||||
craft=econ_eval.get_component("Craft")
|
||||
# trades=market.get_dense_log()
|
||||
build=craft.get_dense_log()
|
||||
met=econ.previous_episode_metrics
|
||||
printReplay(econ_eval,0)
|
||||
# printMarket(trades)
|
||||
# printBuilds(builds=build)
|
||||
print("social/productivity: {}".format(met["social/productivity"]))
|
||||
print("labor/weighted_cost: {}".format(met["labor/weighted_cost"]))
|
||||
print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"]))
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
|
||||
208
main.py
208
main.py
@@ -1,9 +1,15 @@
|
||||
from ai_economist import foundation
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ai_economist import foundation
|
||||
from stable_baselines3.common.vec_env import vec_frame_stack
|
||||
from stable_baselines3.common.evaluation import evaluate_policy
|
||||
from sb3_contrib.ppo_mask import MaskablePPO
|
||||
import envs
|
||||
import wrapper
|
||||
import resources
|
||||
import pprint
|
||||
from agents import trading_agent
|
||||
from wrapper.base_econ_wrapper import BaseEconWrapper
|
||||
from wrapper.reciever_econ_wrapper import RecieverEconWrapper
|
||||
from wrapper.sb3_econ_converter import SB3EconConverter
|
||||
@@ -18,12 +24,13 @@ from envs.econ_wrapper import EconVecEnv
|
||||
from stable_baselines3.common.callbacks import BaseCallback
|
||||
import yaml
|
||||
import time
|
||||
from threading import Thread
|
||||
|
||||
env_config = {
|
||||
# ===== SCENARIO CLASS =====
|
||||
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
|
||||
# The environment object will be an instance of the Scenario class.
|
||||
'scenario_name': 'simple_market',
|
||||
'scenario_name': 'econ',
|
||||
|
||||
# ===== COMPONENTS =====
|
||||
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
|
||||
@@ -32,30 +39,34 @@ env_config = {
|
||||
# The order in which components reset, step, and generate obs follows their listed order below.
|
||||
'components': [
|
||||
# (1) Building houses
|
||||
('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
|
||||
('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}),
|
||||
# (2) Trading collectible resources
|
||||
#('ContinuousDoubleAuction', {'max_num_orders': 10}),
|
||||
('ContinuousDoubleAuction', {'max_num_orders': 10}),
|
||||
# (3) Movement and resource collection
|
||||
('SimpleGather', {}),
|
||||
('ExternalMarket',{'market_demand':{
|
||||
'Gem': 15
|
||||
}}),
|
||||
],
|
||||
|
||||
# ===== SCENARIO CLASS ARGUMENTS =====
|
||||
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
|
||||
|
||||
'starting_agent_coin': 0,
|
||||
'starting_agent_coin': 50,
|
||||
'fixed_four_skill_and_loc': True,
|
||||
|
||||
# ===== STANDARD ARGUMENTS ======
|
||||
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
|
||||
'agent_composition': {"BasicMobileAgent": 20}, # Number of non-planner agents (must be > 1)
|
||||
'world_size': [1, 1], # [Height, Width] of the env world
|
||||
'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5}, # Number of non-planner agents (must be > 1)
|
||||
'world_size': [5, 5], # [Height, Width] of the env world
|
||||
'episode_length': 256, # Number of timesteps per episode
|
||||
'isoelastic_eta':0.001,
|
||||
'allow_observation_scaling': True,
|
||||
'dense_log_frequency': 100,
|
||||
'world_dense_log_frequency':1,
|
||||
'energy_cost':0,
|
||||
'energy_cost':0,
|
||||
'energy_warmup_method': "auto",
|
||||
'energy_warmup_constant': 0,
|
||||
'energy_warmup_constant': 4000,
|
||||
|
||||
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
|
||||
# Otherwise, the policy selects only 1 action.
|
||||
@@ -67,7 +78,7 @@ env_config = {
|
||||
'flatten_observations': False,
|
||||
# When Flattening masks, concatenate each action subspace mask into a single array.
|
||||
# Note: flatten_masks = True is required for masking action logits in the code below.
|
||||
'flatten_masks': False,
|
||||
'flatten_masks': True,
|
||||
}
|
||||
|
||||
|
||||
@@ -75,7 +86,7 @@ eval_env_config = {
|
||||
# ===== SCENARIO CLASS =====
|
||||
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
|
||||
# The environment object will be an instance of the Scenario class.
|
||||
'scenario_name': 'simple_market',
|
||||
'scenario_name': 'econ',
|
||||
|
||||
# ===== COMPONENTS =====
|
||||
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
|
||||
@@ -84,30 +95,34 @@ eval_env_config = {
|
||||
# The order in which components reset, step, and generate obs follows their listed order below.
|
||||
'components': [
|
||||
# (1) Building houses
|
||||
('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
|
||||
('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}),
|
||||
# (2) Trading collectible resources
|
||||
#('ContinuousDoubleAuction', {'max_num_orders': 10}),
|
||||
('ContinuousDoubleAuction', {'max_num_orders': 10}),
|
||||
# (3) Movement and resource collection
|
||||
('SimpleGather', {}),
|
||||
('ExternalMarket',{'market_demand':{
|
||||
'Gem': 15
|
||||
}}),
|
||||
],
|
||||
|
||||
# ===== SCENARIO CLASS ARGUMENTS =====
|
||||
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
|
||||
|
||||
'starting_agent_coin': 0,
|
||||
'starting_agent_coin': 50,
|
||||
'fixed_four_skill_and_loc': True,
|
||||
|
||||
# ===== STANDARD ARGUMENTS ======
|
||||
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
|
||||
'agent_composition': {"BasicMobileAgent": 20}, # Number of non-planner agents (must be > 1)
|
||||
'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5}, # Number of non-planner agents (must be > 1)
|
||||
'world_size': [1, 1], # [Height, Width] of the env world
|
||||
'episode_length': 100, # Number of timesteps per episode
|
||||
'episode_length': 256, # Number of timesteps per episode
|
||||
'allow_observation_scaling': True,
|
||||
'dense_log_frequency': 10,
|
||||
'isoelastic_eta':0.001,
|
||||
'dense_log_frequency': 1,
|
||||
'world_dense_log_frequency':1,
|
||||
'energy_cost':0,
|
||||
'energy_warmup_method': "auto",
|
||||
'energy_warmup_constant': 0,
|
||||
'energy_warmup_constant': 4000,
|
||||
|
||||
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
|
||||
# Otherwise, the policy selects only 1 action.
|
||||
@@ -119,10 +134,10 @@ eval_env_config = {
|
||||
'flatten_observations': False,
|
||||
# When Flattening masks, concatenate each action subspace mask into a single array.
|
||||
# Note: flatten_masks = True is required for masking action logits in the code below.
|
||||
'flatten_masks': False,
|
||||
'flatten_masks': True,
|
||||
}
|
||||
|
||||
num_frames=2
|
||||
num_frames=1
|
||||
|
||||
class TensorboardCallback(BaseCallback):
|
||||
"""
|
||||
@@ -135,18 +150,36 @@ class TensorboardCallback(BaseCallback):
|
||||
self.metrics=econ.scenario_metrics()
|
||||
def _on_step(self) -> bool:
|
||||
# Log scalar value (here a random variable)
|
||||
prev_metrics=self.metrics
|
||||
if self.econ.previous_episode_metrics is None:
|
||||
self.metrics=self.econ.scenario_metrics()
|
||||
else:
|
||||
self.metrics=self.econ.previous_episode_metrics
|
||||
curr_prod=self.metrics["social/productivity"]
|
||||
trend_pord=curr_prod-prev_metrics["social/productivity"]
|
||||
self.logger.record("social/total_productivity", curr_prod)
|
||||
self.logger.record("social/delta_productivity", trend_pord)
|
||||
|
||||
if econ.world.timestep==0:
|
||||
prev_metrics=self.metrics
|
||||
if self.econ.previous_episode_metrics is None:
|
||||
self.metrics=self.econ.scenario_metrics()
|
||||
else:
|
||||
self.metrics=self.econ.previous_episode_metrics
|
||||
curr_prod=self.metrics["social/productivity"]
|
||||
trend_pord=curr_prod-prev_metrics["social/productivity"]
|
||||
self.logger.record("social/total_productivity", curr_prod)
|
||||
self.logger.record("social/delta_productivity", trend_pord)
|
||||
|
||||
return True
|
||||
|
||||
min_at_target_basic=0.5
|
||||
min_lr_basic=5e-6
|
||||
start_lr_basic=9e-4
|
||||
|
||||
min_at_target_trade=0.5
|
||||
min_lr_trade=5e-6
|
||||
start_lr_trade=9e-4
|
||||
|
||||
def learning_rate_adj_basic(x) -> float:
|
||||
diff=start_lr_basic-min_lr_basic
|
||||
lr=min_lr_basic+x*diff
|
||||
return lr
|
||||
|
||||
def learning_rate_adj_trade(x) -> float:
|
||||
diff=start_lr_trade-min_lr_trade
|
||||
lr=min_lr_basic+x*diff
|
||||
return lr
|
||||
|
||||
def printMarket(market):
|
||||
for i in range(len(market)):
|
||||
@@ -188,7 +221,7 @@ def printReplay(econ,agentid):
|
||||
print("--- State ---")
|
||||
state=log['states'][step][agentid]
|
||||
|
||||
print(yaml.dump(state))
|
||||
pprint.pprint(state)
|
||||
print("--- Action ---")
|
||||
action=log["actions"][step][agentid]
|
||||
|
||||
@@ -205,63 +238,120 @@ def printReplay(econ,agentid):
|
||||
|
||||
#Setup Env Objects
|
||||
econ=foundation.make_env_instance(**env_config)
|
||||
|
||||
market=econ.get_component("ContinuousDoubleAuction")
|
||||
action=market.get_n_actions("TradingAgent")
|
||||
baseEconWrapper=BaseEconWrapper(econ)
|
||||
baseEconWrapper.run()
|
||||
time.sleep(0.5)
|
||||
mobileRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="BasicMobileAgent")
|
||||
sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent")
|
||||
#obs=sb3Converter.reset()
|
||||
#vecenv=EconVecEnv(env_config=env_config)
|
||||
tradeRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="TradingAgent")
|
||||
sb3_traderConverter=SB3EconConverter(tradeRecieverEconWrapper,econ,"TradingAgent",True)
|
||||
sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent",True)
|
||||
# attach sb3 wrappers
|
||||
|
||||
monenv=VecMonitor(venv=sb3Converter,info_keywords=["social/productivity","trend/productivity"])
|
||||
montraidingenv=VecMonitor(venv=sb3_traderConverter)
|
||||
|
||||
stackenv_basic=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=num_frames)
|
||||
stackenv_traid=vec_frame_stack.VecFrameStack(venv=montraidingenv,n_stack=num_frames)
|
||||
# Model setup complete
|
||||
|
||||
# Setup Eval Env
|
||||
econ_eval=foundation.make_env_instance(**eval_env_config)
|
||||
|
||||
|
||||
baseEconWrapper_eval=BaseEconWrapper(econ_eval)
|
||||
baseEconWrapper_eval.run()
|
||||
time.sleep(0.5)
|
||||
mobileRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="BasicMobileAgent")
|
||||
tradeRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="TradingAgent")
|
||||
sb3_traderConverter_eval=SB3EconConverter(tradeRecieverEconWrapper_eval,econ_eval,"TradingAgent",False)
|
||||
sb3Converter_eval=SB3EconConverter(mobileRecieverEconWrapper_eval,econ_eval,"BasicMobileAgent",False)
|
||||
# attach sb3 wrappers
|
||||
|
||||
monenv_eval=VecMonitor(venv=sb3Converter_eval,info_keywords=["social/productivity","trend/productivity"])
|
||||
montraidingenv_eval=VecMonitor(venv=sb3_traderConverter_eval)
|
||||
|
||||
stackenv_basic_eval=vec_frame_stack.VecFrameStack(venv=monenv_eval,n_stack=num_frames)
|
||||
stackenv_traid_eval=vec_frame_stack.VecFrameStack(venv=montraidingenv_eval,n_stack=num_frames)
|
||||
|
||||
|
||||
#normenv=VecNormalize(sb3Converter,norm_reward=False,clip_obs=1)
|
||||
#stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10)
|
||||
obs=monenv.reset()
|
||||
|
||||
|
||||
# define training functions
|
||||
def train(model,timesteps, econ_call,process_bar,name,db,index):
|
||||
db[index]=model.learn(total_timesteps=timesteps,progress_bar=process_bar,reset_num_timesteps=False,tb_log_name=name,callback=TensorboardCallback(econ_call))
|
||||
|
||||
|
||||
|
||||
runname="run_{}".format(int(np.random.rand()*100))
|
||||
# prepare training
|
||||
run_number=int(np.random.rand()*100)
|
||||
runname="run_{}".format(run_number)
|
||||
model_db=[None,None] # object for storing model
|
||||
|
||||
|
||||
model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=learning_rate_adj_basic,env=stackenv_basic, seed=445,verbose=1,device="cuda",tensorboard_log="./log")
|
||||
model_trade=MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=learning_rate_adj_trade,env=stackenv_traid, seed=445,verbose=1,device="cuda",tensorboard_log="./log")
|
||||
|
||||
model = PPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
|
||||
n_agents=econ.n_agents
|
||||
total_required_for_episode=n_agents*env_config['episode_length']
|
||||
print("this is run {}".format(runname))
|
||||
while True:
|
||||
# Create Eval ENV
|
||||
|
||||
vec_env_eval=EconVecEnv(env_config=eval_env_config)
|
||||
vec_mon_eval=VecMonitor(venv=vec_env_eval)
|
||||
norm_env_eval=VecNormalize(vec_mon_eval,norm_reward=False,training=False)
|
||||
eval_econ = vec_env_eval.env
|
||||
|
||||
#Train
|
||||
model=model.learn(total_timesteps=total_required_for_episode*50,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
|
||||
#normenv.save("temp-normalizer.ai")
|
||||
|
||||
total_required_for_episode_basic=len(mobileRecieverEconWrapper.agnet_idx)*env_config['episode_length']
|
||||
total_required_for_episode_traid=len(tradeRecieverEconWrapper.agnet_idx)*env_config['episode_length']
|
||||
|
||||
print("this is run {}".format(runname))
|
||||
|
||||
while True:
|
||||
|
||||
|
||||
#Train
|
||||
runname="run_{}_{}".format(run_number,"basic")
|
||||
|
||||
thread_model=Thread(target=train,args=(model,total_required_for_episode_basic*150,econ,True,runname,model_db,0))
|
||||
runname="run_{}_{}".format(run_number,"trader")
|
||||
thread_model_traid=Thread(target=train,args=(model_trade,total_required_for_episode_traid*150,econ,False,runname,model_db,1))
|
||||
|
||||
thread_model.start()
|
||||
thread_model_traid.start()
|
||||
thread_model.join()
|
||||
thread_model_traid.join()
|
||||
#normenv.save("temp-normalizer.ai")
|
||||
model=model_db[0]
|
||||
model_trade=model_db[1]
|
||||
model.save("basic.ai")
|
||||
model_trade.save("trade.ai")
|
||||
|
||||
## Run Eval
|
||||
print("### EVAL ###")
|
||||
norm_env_eval.load("temp-normalizer.ai",vec_mon_eval)
|
||||
obs=vec_mon_eval.reset()
|
||||
obs_basic=stackenv_basic_eval.reset()
|
||||
obs_trade=stackenv_traid_eval.reset()
|
||||
done=False
|
||||
for i in tqdm(range(eval_env_config['episode_length'])):
|
||||
action=model.predict(obs)
|
||||
obs,rew,done_e,info=vec_mon_eval.step(action[0])
|
||||
#create masks
|
||||
masks_basic=stackenv_basic_eval.action_masks()
|
||||
masks_trade=stackenv_traid_eval.action_masks()
|
||||
# get actions
|
||||
action_basic=model.predict(obs_basic,action_masks=masks_basic)
|
||||
action_trade=model_trade.predict(obs_trade,action_masks=masks_trade)
|
||||
#submit async directly for non blocking operation
|
||||
sb3Converter_eval.step_async(action_basic[0])
|
||||
sb3_traderConverter_eval.step_async(action_trade[0])
|
||||
# retieve full results
|
||||
obs_basic,rew_basic,done_e,info=stackenv_basic_eval.step(action_basic[0])
|
||||
obs_trade,rew_trade,done_e,info=stackenv_traid_eval.step(action_trade[0])
|
||||
done=done_e[0]
|
||||
|
||||
|
||||
|
||||
#market=eval_econ.get_component("ContinuousDoubleAuction")
|
||||
craft=eval_econ.get_component("SimpleCraft")
|
||||
market=econ_eval.get_component("ContinuousDoubleAuction")
|
||||
craft=econ_eval.get_component("Craft")
|
||||
# trades=market.get_dense_log()
|
||||
build=craft.get_dense_log()
|
||||
met=econ.previous_episode_metrics
|
||||
printReplay(eval_econ,0)
|
||||
printReplay(econ_eval,0)
|
||||
# printMarket(trades)
|
||||
printBuilds(builds=build)
|
||||
# printBuilds(builds=build)
|
||||
print("social/productivity: {}".format(met["social/productivity"]))
|
||||
print("labor/weighted_cost: {}".format(met["labor/weighted_cost"]))
|
||||
print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"]))
|
||||
|
||||
3
resources/__init_.py
Normal file
3
resources/__init_.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from . import (
|
||||
resources
|
||||
)
|
||||
4
resources/resources.py
Normal file
4
resources/resources.py
Normal file
@@ -0,0 +1,4 @@
|
||||
|
||||
import numpy as np
|
||||
from ai_economist.foundation.entities.resources import Resource, resource_registry
|
||||
|
||||
343
test.py
Normal file
343
test.py
Normal file
@@ -0,0 +1,343 @@
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ai_economist import foundation
|
||||
from stable_baselines3.common.vec_env import vec_frame_stack
|
||||
from stable_baselines3.common.evaluation import evaluate_policy
|
||||
from sb3_contrib.ppo_mask import MaskablePPO
|
||||
import envs
|
||||
import wrapper
|
||||
import resources
|
||||
import pprint
|
||||
from agents import trading_agent
|
||||
from wrapper.base_econ_wrapper import BaseEconWrapper
|
||||
from wrapper.reciever_econ_wrapper import RecieverEconWrapper
|
||||
from wrapper.sb3_econ_converter import SB3EconConverter
|
||||
from tqdm import tqdm
|
||||
import components
|
||||
from stable_baselines3.common.env_checker import check_env
|
||||
from stable_baselines3 import PPO
|
||||
from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
|
||||
from stable_baselines3.common.vec_env.vec_normalize import VecNormalize
|
||||
from sb3_contrib import RecurrentPPO
|
||||
from envs.econ_wrapper import EconVecEnv
|
||||
from stable_baselines3.common.callbacks import BaseCallback
|
||||
import yaml
|
||||
import time
|
||||
from threading import Thread
|
||||
|
||||
env_config = {
|
||||
# ===== SCENARIO CLASS =====
|
||||
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
|
||||
# The environment object will be an instance of the Scenario class.
|
||||
'scenario_name': 'econ',
|
||||
|
||||
# ===== COMPONENTS =====
|
||||
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
|
||||
# "component_name" refers to the Component class's name in the Component Registry (foundation.components)
|
||||
# {component_kwargs} is a dictionary of kwargs passed to the Component class
|
||||
# The order in which components reset, step, and generate obs follows their listed order below.
|
||||
'components': [
|
||||
# (1) Building houses
|
||||
('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}),
|
||||
# (2) Trading collectible resources
|
||||
('ContinuousDoubleAuction', {'max_num_orders': 10}),
|
||||
# (3) Movement and resource collection
|
||||
('SimpleGather', {}),
|
||||
('ExternalMarket',{'market_demand':{
|
||||
'Gem': 15
|
||||
}}),
|
||||
],
|
||||
|
||||
# ===== SCENARIO CLASS ARGUMENTS =====
|
||||
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
|
||||
|
||||
'starting_agent_coin': 10,
|
||||
'fixed_four_skill_and_loc': True,
|
||||
|
||||
# ===== STANDARD ARGUMENTS ======
|
||||
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
|
||||
'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5}, # Number of non-planner agents (must be > 1)
|
||||
'world_size': [5, 5], # [Height, Width] of the env world
|
||||
'episode_length': 256, # Number of timesteps per episode
|
||||
'allow_observation_scaling': True,
|
||||
'dense_log_frequency': 100,
|
||||
'world_dense_log_frequency':1,
|
||||
'energy_cost':0,
|
||||
'energy_warmup_method': "auto",
|
||||
'energy_warmup_constant': 4000,
|
||||
|
||||
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
|
||||
# Otherwise, the policy selects only 1 action.
|
||||
'multi_action_mode_agents': False,
|
||||
'multi_action_mode_planner': False,
|
||||
|
||||
# When flattening observations, concatenate scalar & vector observations before output.
|
||||
# Otherwise, return observations with minimal processing.
|
||||
'flatten_observations': False,
|
||||
# When Flattening masks, concatenate each action subspace mask into a single array.
|
||||
# Note: flatten_masks = True is required for masking action logits in the code below.
|
||||
'flatten_masks': True,
|
||||
}
|
||||
|
||||
|
||||
eval_env_config = {
|
||||
# ===== SCENARIO CLASS =====
|
||||
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
|
||||
# The environment object will be an instance of the Scenario class.
|
||||
'scenario_name': 'econ',
|
||||
|
||||
# ===== COMPONENTS =====
|
||||
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
|
||||
# "component_name" refers to the Component class's name in the Component Registry (foundation.components)
|
||||
# {component_kwargs} is a dictionary of kwargs passed to the Component class
|
||||
# The order in which components reset, step, and generate obs follows their listed order below.
|
||||
'components': [
|
||||
# (1) Building houses
|
||||
('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}),
|
||||
# (2) Trading collectible resources
|
||||
('ContinuousDoubleAuction', {'max_num_orders': 10}),
|
||||
# (3) Movement and resource collection
|
||||
('SimpleGather', {}),
|
||||
('ExternalMarket',{'market_demand':{
|
||||
'Gem': 15
|
||||
}}),
|
||||
],
|
||||
|
||||
# ===== SCENARIO CLASS ARGUMENTS =====
|
||||
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
|
||||
|
||||
'starting_agent_coin': 10,
|
||||
'fixed_four_skill_and_loc': True,
|
||||
|
||||
# ===== STANDARD ARGUMENTS ======
|
||||
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
|
||||
'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5}, # Number of non-planner agents (must be > 1)
|
||||
'world_size': [1, 1], # [Height, Width] of the env world
|
||||
'episode_length': 256, # Number of timesteps per episode
|
||||
'allow_observation_scaling': True,
|
||||
'dense_log_frequency': 1,
|
||||
'world_dense_log_frequency':1,
|
||||
'energy_cost':0,
|
||||
'energy_warmup_method': "auto",
|
||||
'energy_warmup_constant': 4000,
|
||||
|
||||
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
|
||||
# Otherwise, the policy selects only 1 action.
|
||||
'multi_action_mode_agents': False,
|
||||
'multi_action_mode_planner': False,
|
||||
|
||||
# When flattening observations, concatenate scalar & vector observations before output.
|
||||
# Otherwise, return observations with minimal processing.
|
||||
'flatten_observations': False,
|
||||
# When Flattening masks, concatenate each action subspace mask into a single array.
|
||||
# Note: flatten_masks = True is required for masking action logits in the code below.
|
||||
'flatten_masks': True,
|
||||
}
|
||||
|
||||
num_frames=5
|
||||
|
||||
class TensorboardCallback(BaseCallback):
|
||||
"""
|
||||
Custom callback for plotting additional values in tensorboard.
|
||||
"""
|
||||
|
||||
def __init__(self,econ, verbose=0):
|
||||
super().__init__(verbose)
|
||||
self.econ=econ
|
||||
self.metrics=econ.scenario_metrics()
|
||||
def _on_step(self) -> bool:
|
||||
# Log scalar value (here a random variable)
|
||||
if econ.world.timestep==0:
|
||||
prev_metrics=self.metrics
|
||||
if self.econ.previous_episode_metrics is None:
|
||||
self.metrics=self.econ.scenario_metrics()
|
||||
else:
|
||||
self.metrics=self.econ.previous_episode_metrics
|
||||
curr_prod=self.metrics["social/productivity"]
|
||||
trend_pord=curr_prod-prev_metrics["social/productivity"]
|
||||
self.logger.record("social/total_productivity", curr_prod)
|
||||
self.logger.record("social/delta_productivity", trend_pord)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def printMarket(market):
|
||||
for i in range(len(market)):
|
||||
step=market[i]
|
||||
if len(step)>0:
|
||||
print("=== Step {} ===".format(i))
|
||||
for transaction in step:
|
||||
t=transaction
|
||||
transstring = "({}) {} -> {} | [{}/{}] {} Coins\n".format(t["commodity"],t["seller"],t["buyer"],t["ask"],t["bid"],t["price"])
|
||||
print(transstring)
|
||||
return ""
|
||||
|
||||
def printBuilds(builds):
|
||||
for i in range(len(builds)):
|
||||
step=builds[i]
|
||||
if len(step)>0:
|
||||
for build in step:
|
||||
t=build
|
||||
transstring = "({}) Builder: {}, Skill: {}, Income {} ".format(i,t["builder"],t["build_skill"],t["income"])
|
||||
print(transstring)
|
||||
return ""
|
||||
def printReplay(econ,agentid):
|
||||
worldmaps=["Stone","Wood"]
|
||||
|
||||
log=econ.previous_episode_dense_log
|
||||
agent=econ.world.agents[agentid]
|
||||
|
||||
agentid=str(agentid)
|
||||
maxsetp=len(log["states"])-1
|
||||
|
||||
for step in range(maxsetp):
|
||||
print()
|
||||
print("=== Step {} ===".format(step))
|
||||
# state
|
||||
print("--- World ---")
|
||||
world=log['world'][step]
|
||||
for res in worldmaps:
|
||||
print("{}: {}".format(res,world[res][0][0]))
|
||||
print("--- State ---")
|
||||
state=log['states'][step][agentid]
|
||||
|
||||
pprint.pprint(state)
|
||||
print("--- Action ---")
|
||||
action=log["actions"][step][agentid]
|
||||
|
||||
|
||||
if action=={}:
|
||||
print("Action: 0 -> NOOP")
|
||||
else:
|
||||
for k in action:
|
||||
formats="Action: {}({})".format(k,action[k])
|
||||
print(formats)
|
||||
print("--- Reward ---")
|
||||
reward=log["rewards"][step][agentid]
|
||||
print("Reward: {}".format(reward))
|
||||
|
||||
#Setup Env Objects
|
||||
econ=foundation.make_env_instance(**env_config)
|
||||
|
||||
market=econ.get_component("ContinuousDoubleAuction")
|
||||
action=market.get_n_actions("TradingAgent")
|
||||
baseEconWrapper=BaseEconWrapper(econ)
|
||||
baseEconWrapper.run()
|
||||
time.sleep(0.5)
|
||||
mobileRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="BasicMobileAgent")
|
||||
tradeRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="TradingAgent")
|
||||
sb3_traderConverter=SB3EconConverter(tradeRecieverEconWrapper,econ,"TradingAgent",True)
|
||||
sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent",True)
|
||||
# attach sb3 wrappers
|
||||
|
||||
monenv=VecMonitor(venv=sb3Converter,info_keywords=["social/productivity","trend/productivity"])
|
||||
montraidingenv=VecMonitor(venv=sb3_traderConverter)
|
||||
|
||||
stackenv_basic=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=num_frames)
|
||||
stackenv_traid=vec_frame_stack.VecFrameStack(venv=montraidingenv,n_stack=num_frames)
|
||||
# Model setup complete
|
||||
|
||||
# Setup Eval Env
|
||||
econ_eval=foundation.make_env_instance(**eval_env_config)
|
||||
|
||||
|
||||
baseEconWrapper_eval=BaseEconWrapper(econ_eval)
|
||||
baseEconWrapper_eval.run()
|
||||
time.sleep(0.5)
|
||||
mobileRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="BasicMobileAgent")
|
||||
tradeRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="TradingAgent")
|
||||
sb3_traderConverter_eval=SB3EconConverter(tradeRecieverEconWrapper_eval,econ_eval,"TradingAgent",False)
|
||||
sb3Converter_eval=SB3EconConverter(mobileRecieverEconWrapper_eval,econ_eval,"BasicMobileAgent",False)
|
||||
# attach sb3 wrappers
|
||||
|
||||
monenv_eval=VecMonitor(venv=sb3Converter_eval,info_keywords=["social/productivity","trend/productivity"])
|
||||
montraidingenv_eval=VecMonitor(venv=sb3_traderConverter_eval)
|
||||
|
||||
stackenv_basic_eval=vec_frame_stack.VecFrameStack(venv=monenv_eval,n_stack=num_frames)
|
||||
stackenv_traid_eval=vec_frame_stack.VecFrameStack(venv=montraidingenv_eval,n_stack=num_frames)
|
||||
|
||||
|
||||
obs=monenv.reset()
|
||||
|
||||
|
||||
# define training functions
|
||||
def train(model,timesteps, econ_call,process_bar,name,db,index):
|
||||
db[index]=model.learn(total_timesteps=timesteps,progress_bar=process_bar,reset_num_timesteps=False,tb_log_name=name,callback=TensorboardCallback(econ_call))
|
||||
|
||||
|
||||
|
||||
# prepare training
|
||||
run_number=int(np.random.rand()*100)
|
||||
runname="run_{}".format(run_number)
|
||||
model_db=[None,None] # object for storing model
|
||||
|
||||
|
||||
model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=1e-5,env=stackenv_basic, seed=300,verbose=1,device="cuda",tensorboard_log="./log")
|
||||
model_trade=MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=1e-5,env=stackenv_traid, seed=300,verbose=1,device="cuda",tensorboard_log="./log")
|
||||
|
||||
n_agents=econ.n_agents
|
||||
|
||||
total_required_for_episode_basic=len(mobileRecieverEconWrapper.agnet_idx)*env_config['episode_length']
|
||||
total_required_for_episode_traid=len(tradeRecieverEconWrapper.agnet_idx)*env_config['episode_length']
|
||||
|
||||
print("this is run {}".format(runname))
|
||||
|
||||
while True:
|
||||
|
||||
|
||||
#Train
|
||||
runname="run_{}_{}".format(run_number,"basic")
|
||||
|
||||
thread_model=Thread(target=train,args=(model,total_required_for_episode_basic*50,econ,True,runname,model_db,0))
|
||||
runname="run_{}_{}".format(run_number,"trader")
|
||||
thread_model_traid=Thread(target=train,args=(model_trade,total_required_for_episode_traid*50,econ,False,runname,model_db,1))
|
||||
|
||||
thread_model.start()
|
||||
thread_model_traid.start()
|
||||
thread_model.join()
|
||||
thread_model_traid.join()
|
||||
#normenv.save("temp-normalizer.ai")
|
||||
model=model_db[0]
|
||||
model_trade=model_db[1]
|
||||
model.save("basic.ai")
|
||||
model_trade.save("trade.ai")
|
||||
|
||||
## Run Eval
|
||||
print("### EVAL ###")
|
||||
obs_basic=stackenv_basic_eval.reset()
|
||||
obs_trade=stackenv_traid_eval.reset()
|
||||
done=False
|
||||
for i in tqdm(range(eval_env_config['episode_length'])):
|
||||
#create masks
|
||||
masks_basic=stackenv_basic_eval.action_masks()
|
||||
masks_trade=stackenv_traid_eval.action_masks()
|
||||
# get actions
|
||||
action_basic=model.predict(obs_basic,action_masks=masks_basic)
|
||||
action_trade=model_trade.predict(obs_trade,action_masks=masks_trade)
|
||||
#submit async directly for non blocking operation
|
||||
sb3Converter_eval.step_async(action_basic[0])
|
||||
sb3_traderConverter_eval.step_async(action_trade[0])
|
||||
# retieve full results
|
||||
obs_basic,rew_basic,done_e,info=stackenv_basic_eval.step(action_basic[0])
|
||||
obs_trade,rew_trade,done_e,info=stackenv_traid_eval.step(action_trade[0])
|
||||
done=done_e[0]
|
||||
|
||||
|
||||
|
||||
market=econ_eval.get_component("ContinuousDoubleAuction")
|
||||
craft=econ_eval.get_component("Craft")
|
||||
# trades=market.get_dense_log()
|
||||
build=craft.get_dense_log()
|
||||
met=econ.previous_episode_metrics
|
||||
printReplay(econ_eval,0)
|
||||
# printMarket(trades)
|
||||
# printBuilds(builds=build)
|
||||
print("social/productivity: {}".format(met["social/productivity"]))
|
||||
print("labor/weighted_cost: {}".format(met["labor/weighted_cost"]))
|
||||
print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"]))
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
|
||||
BIN
trade 4000.ai
Normal file
BIN
trade 4000.ai
Normal file
Binary file not shown.
@@ -3,25 +3,14 @@ from threading import Event, Lock, Thread
|
||||
from queue import Queue
|
||||
class BaseEconWrapper():
|
||||
"""Base class for connecting reciever wrapper to a multi threaded econ simulation and training session"""
|
||||
|
||||
base_notification=Event() #Notification for Base
|
||||
reset_notification=Event() #Notification for recievers
|
||||
|
||||
step_notifications=[] #Notification for recievers
|
||||
|
||||
action_edit_lock=Lock()
|
||||
actor_actions={}
|
||||
|
||||
stop_edit_lock=Lock()
|
||||
stop=False
|
||||
|
||||
vote_lock=Lock()
|
||||
n_voters=0
|
||||
n_votes_reset=0
|
||||
|
||||
|
||||
|
||||
# States of Env
|
||||
env_data_lock=Lock()
|
||||
|
||||
obs=None
|
||||
rew=None
|
||||
done=None
|
||||
@@ -30,6 +19,13 @@ class BaseEconWrapper():
|
||||
|
||||
def __init__(self, econ: base_env.BaseEnvironment):
|
||||
self.env=econ
|
||||
self.vote_lock=Lock()
|
||||
|
||||
self.base_notification=Event() #Notification for Base
|
||||
self.reset_notification=Event() #Notification for recievers
|
||||
self.action_edit_lock=Lock()
|
||||
self.stop_edit_lock=Lock()
|
||||
self.env_data_lock=Lock()
|
||||
|
||||
def register_vote(self):
|
||||
"""Register reciever on base. Returns ID of Voter to pass on during blocking"""
|
||||
@@ -149,7 +145,8 @@ class BaseEconWrapper():
|
||||
self.action_edit_lock.acquire() # Start to submit action dict
|
||||
for k,v in actions.items():
|
||||
if k in self.actor_actions.keys():
|
||||
raise Exception("Actor action has already been submitted. {}".format(k))
|
||||
print("Actor action has already been submitted. {}".format(k))
|
||||
continue
|
||||
self.actor_actions[k]=v
|
||||
self.step_notifications[voter_id].clear()
|
||||
self.base_notification.set() #Alert base for action changes
|
||||
@@ -168,9 +165,9 @@ class BaseEconWrapper():
|
||||
|
||||
def reciever_request_reset(self):
|
||||
"""Adds to vote count to reset. If limit is reached reset will occure"""
|
||||
self.vote_lock.acquire()
|
||||
#self.vote_lock.acquire()
|
||||
self.n_votes_reset+=1
|
||||
self.vote_lock.release()
|
||||
# self.vote_lock.release()
|
||||
self.base_notification.set() #Alert base for action changes
|
||||
|
||||
def reciever_block_reset(self):
|
||||
|
||||
@@ -23,7 +23,7 @@ class RecieverEconWrapper(gym.Env):
|
||||
self.idx_to_index={}
|
||||
#create idx to index map
|
||||
for i in range(len(self.agnet_idx)):
|
||||
self.idx_to_index[self.agnet_idx[i]]=i
|
||||
self.idx_to_index[str(self.agnet_idx[i])]=i
|
||||
first_idx=self.agnet_idx[0]
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ class RecieverEconWrapper(gym.Env):
|
||||
def _dict_idx_to_index(self, data):
|
||||
data_out={}
|
||||
for k,v in data.items():
|
||||
|
||||
if k in self.idx_to_index:
|
||||
index=self.idx_to_index[k]
|
||||
data_out[index]=v
|
||||
|
||||
@@ -8,7 +8,7 @@ from typing import Any, Callable, List, Optional, Sequence, Type, Union
|
||||
|
||||
class SB3EconConverter(VecEnv, gym.Env):
|
||||
|
||||
def __init__(self, env: gym.Env, econ: base_env.BaseEnvironment,agentclass: str):
|
||||
def __init__(self, env: gym.Env, econ: base_env.BaseEnvironment,agentclass: str,auto_reset: bool):
|
||||
self.env=env
|
||||
self.econ=econ
|
||||
#get observation sample
|
||||
@@ -20,7 +20,9 @@ class SB3EconConverter(VecEnv, gym.Env):
|
||||
#flatten obervation of first agent
|
||||
obs0=utils.package(obs[0],*self.packager)
|
||||
obs0["flat"]
|
||||
self.observation_space=gym.spaces.Box(low=-np.inf,high=np.inf,shape=(len(obs0["flat"]),),dtype=np.float32)
|
||||
self.step_request_send=False
|
||||
self.auto_reset=auto_reset
|
||||
self.observation_space=gym.spaces.Box(low=0,high=10,shape=(len(obs0["flat"]),),dtype=np.float32)
|
||||
super().__init__(self.num_envs, self.observation_space, self.action_space)
|
||||
|
||||
|
||||
@@ -30,12 +32,15 @@ class SB3EconConverter(VecEnv, gym.Env):
|
||||
agent=self.econ.world.agents[idx]
|
||||
return gym.spaces.Discrete(agent.action_spaces)
|
||||
|
||||
def step_async(self, actions: np.ndarray) -> None:
|
||||
d_actions=utils.convert_gym_to_econ(actions)
|
||||
return self.env.step_async(d_actions)
|
||||
def step_async(self, actions: np.ndarray):
|
||||
if self.step_request_send==False:
|
||||
self.step_request_send=True
|
||||
d_actions=utils.convert_gym_to_econ(actions)
|
||||
return self.env.step_async(d_actions)
|
||||
|
||||
def step_wait(self) -> VecEnvStepReturn:
|
||||
obs,rew,done,info=self.env.step_wait()
|
||||
self.curr_obs=obs
|
||||
#flatten obs
|
||||
f_obs={}
|
||||
for k,v in obs.items():
|
||||
@@ -61,12 +66,16 @@ class SB3EconConverter(VecEnv, gym.Env):
|
||||
for i in range(self.num_envs):
|
||||
done_g[i]=done
|
||||
c_info[i]["terminal_observation"]=c_obs[i]
|
||||
c_obs=self.reset()
|
||||
if self.auto_reset:
|
||||
c_obs=self.reset()
|
||||
self.step_request_send=False
|
||||
return np.copy(c_obs),np.copy(c_rew),np.copy(done_g),np.copy(c_info)
|
||||
|
||||
def reset(self) -> VecEnvObs:
|
||||
obs=self.env.reset()
|
||||
self.step_request_send=False
|
||||
f_obs={}
|
||||
self.curr_obs=obs
|
||||
for k,v in obs.items():
|
||||
f_obs[k]=utils.package(v,*self.packager)
|
||||
g_obs={}
|
||||
@@ -79,20 +88,27 @@ class SB3EconConverter(VecEnv, gym.Env):
|
||||
def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
|
||||
if seed is None:
|
||||
seed = np.random.randint(0, 2**32 - 1)
|
||||
seeds = []
|
||||
for idx, env in enumerate(self.envs):
|
||||
seeds.append(env.seed(seed + idx))
|
||||
self.econ.seed(seed)
|
||||
seeds=[seed]
|
||||
return seeds
|
||||
|
||||
|
||||
def action_masks(self):
|
||||
"""Returns action masks for agents and current obs"""
|
||||
masks=[]
|
||||
for obs in self.curr_obs:
|
||||
mask=[]
|
||||
for num in self.curr_obs[obs]["action_mask"]:
|
||||
mask.append(num==1.0)
|
||||
masks.append(mask)
|
||||
return masks
|
||||
|
||||
def close(self) -> None:
|
||||
return
|
||||
|
||||
def get_attr(self, attr_name: str, indices: VecEnvIndices = None) -> List[Any]:
|
||||
"""Return attribute from vectorized environment (see base class)."""
|
||||
target_envs = self._get_target_envs(indices)
|
||||
return [getattr(env_i, attr_name) for env_i in target_envs]
|
||||
|
||||
return getattr(self, attr_name)
|
||||
|
||||
|
||||
|
||||
@@ -106,8 +122,7 @@ class SB3EconConverter(VecEnv, gym.Env):
|
||||
|
||||
def env_method(self, method_name: str, *method_args, indices: VecEnvIndices = None, **method_kwargs) -> List[Any]:
|
||||
"""Call instance methods of vectorized environments."""
|
||||
target_envs = self._get_target_envs(indices)
|
||||
return [getattr(env_i, method_name)(*method_args, **method_kwargs) for env_i in target_envs]
|
||||
return getattr(self, method_name)(*method_args, **method_kwargs)
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user