it is working
This commit is contained in:
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
logs/*
|
||||||
16
.vscode/launch.json
vendored
Normal file
16
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
{
|
||||||
|
// Verwendet IntelliSense zum Ermitteln möglicher Attribute.
|
||||||
|
// Zeigen Sie auf vorhandene Attribute, um die zugehörigen Beschreibungen anzuzeigen.
|
||||||
|
// Weitere Informationen finden Sie unter https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Python: Aktuelle Datei",
|
||||||
|
"type": "python",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${file}",
|
||||||
|
"console": "integratedTerminal",
|
||||||
|
"justMyCode": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
4
components/__init__.py
Normal file
4
components/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
from . import(
|
||||||
|
simple_gather,
|
||||||
|
simple_build
|
||||||
|
)
|
||||||
9
components/noops.py
Normal file
9
components/noops.py
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
from ai_economist.foundation.base.registrar import Registry
|
||||||
|
from ai_economist.foundation.entities.endogenous import Endogenous, endogenous_registry
|
||||||
|
|
||||||
|
|
||||||
|
@endogenous_registry.add
|
||||||
|
class Noop(Endogenous):
|
||||||
|
"""consecutive noop actions performed by actor"""
|
||||||
|
|
||||||
|
name = "Noop"
|
||||||
256
components/simple_build.py
Normal file
256
components/simple_build.py
Normal file
@@ -0,0 +1,256 @@
|
|||||||
|
# Copyright (c) 2020, salesforce.com, inc.
|
||||||
|
# All rights reserved.
|
||||||
|
# SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
# For full license text, see the LICENSE file in the repo root
|
||||||
|
# or https://opensource.org/licenses/BSD-3-Clause
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from ai_economist.foundation.base.base_component import (
|
||||||
|
BaseComponent,
|
||||||
|
component_registry,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@component_registry.add
|
||||||
|
class SimpleCraft(BaseComponent):
|
||||||
|
"""
|
||||||
|
Allows mobile agents to build house landmarks in the world using stone and wood,
|
||||||
|
earning income.
|
||||||
|
|
||||||
|
Can be configured to include heterogeneous building skill where agents earn
|
||||||
|
different levels of income when building.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
payment (int): Default amount of coin agents earn from building.
|
||||||
|
Must be >= 0. Default is 10.
|
||||||
|
payment_max_skill_multiplier (int): Maximum skill multiplier that an agent
|
||||||
|
can sample. Must be >= 1. Default is 1.
|
||||||
|
skill_dist (str): Distribution type for sampling skills. Default ("none")
|
||||||
|
gives all agents identical skill equal to a multiplier of 1. "pareto" and
|
||||||
|
"lognormal" sample skills from the associated distributions.
|
||||||
|
build_labor (float): Labor cost associated with building a house.
|
||||||
|
Must be >= 0. Default is 10.
|
||||||
|
"""
|
||||||
|
|
||||||
|
name = "SimpleCraft"
|
||||||
|
component_type = "Build"
|
||||||
|
required_entities = ["Wood", "Stone", "Coin", "House", "Labor"]
|
||||||
|
agent_subclasses = ["BasicMobileAgent"]
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*base_component_args,
|
||||||
|
payment=10,
|
||||||
|
payment_max_skill_multiplier=1,
|
||||||
|
skill_dist="none",
|
||||||
|
build_labor=10.0,
|
||||||
|
**base_component_kwargs
|
||||||
|
):
|
||||||
|
super().__init__(*base_component_args, **base_component_kwargs)
|
||||||
|
|
||||||
|
self.payment = int(payment)
|
||||||
|
assert self.payment >= 0
|
||||||
|
|
||||||
|
self.payment_max_skill_multiplier = int(payment_max_skill_multiplier)
|
||||||
|
assert self.payment_max_skill_multiplier >= 1
|
||||||
|
|
||||||
|
self.resource_cost = {"Wood": 1, "Stone": 1}
|
||||||
|
|
||||||
|
self.build_labor = float(build_labor)
|
||||||
|
assert self.build_labor >= 0
|
||||||
|
|
||||||
|
self.skill_dist = skill_dist.lower()
|
||||||
|
assert self.skill_dist in ["none", "pareto", "lognormal"]
|
||||||
|
|
||||||
|
self.sampled_skills = {}
|
||||||
|
|
||||||
|
self.builds = []
|
||||||
|
|
||||||
|
def agent_can_build(self, agent):
|
||||||
|
"""Return True if agent can actually build in its current location."""
|
||||||
|
# See if the agent has the resources necessary to complete the action
|
||||||
|
for resource, cost in self.resource_cost.items():
|
||||||
|
if agent.state["inventory"][resource] < cost:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Required methods for implementing components
|
||||||
|
# --------------------------------------------
|
||||||
|
|
||||||
|
def get_n_actions(self, agent_cls_name):
|
||||||
|
"""
|
||||||
|
See base_component.py for detailed description.
|
||||||
|
|
||||||
|
Add a single action (build) for mobile agents.
|
||||||
|
"""
|
||||||
|
# This component adds 1 action that mobile agents can take: build a house
|
||||||
|
if agent_cls_name == "BasicMobileAgent":
|
||||||
|
return 1
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_additional_state_fields(self, agent_cls_name):
|
||||||
|
"""
|
||||||
|
See base_component.py for detailed description.
|
||||||
|
|
||||||
|
For mobile agents, add state fields for building skill.
|
||||||
|
"""
|
||||||
|
if agent_cls_name not in self.agent_subclasses:
|
||||||
|
return {}
|
||||||
|
if agent_cls_name == "BasicMobileAgent":
|
||||||
|
return {"build_payment": float(self.payment), "build_skill": 1}
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def component_step(self):
|
||||||
|
"""
|
||||||
|
See base_component.py for detailed description.
|
||||||
|
|
||||||
|
Convert stone+wood to house+coin for agents that choose to build and can.
|
||||||
|
"""
|
||||||
|
world = self.world
|
||||||
|
build = []
|
||||||
|
# Apply any building actions taken by the mobile agents
|
||||||
|
for agent in world.get_random_order_agents():
|
||||||
|
|
||||||
|
action = agent.get_component_action(self.name)
|
||||||
|
|
||||||
|
# This component doesn't apply to this agent!
|
||||||
|
if action is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# NO-OP!
|
||||||
|
if action == 0:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Build! (If you can.)
|
||||||
|
elif action == 1:
|
||||||
|
if self.agent_can_build(agent):
|
||||||
|
# Remove the resources
|
||||||
|
for resource, cost in self.resource_cost.items():
|
||||||
|
agent.state["inventory"][resource] -= cost
|
||||||
|
|
||||||
|
# Receive payment for the house
|
||||||
|
agent.state["inventory"]["Coin"] += agent.state["build_payment"]
|
||||||
|
|
||||||
|
# Incur the labor cost for building
|
||||||
|
agent.state["endogenous"]["Labor"] += self.build_labor
|
||||||
|
|
||||||
|
build.append(
|
||||||
|
{
|
||||||
|
"builder": agent.idx,
|
||||||
|
"build_skill": self.sampled_skills[agent.idx],
|
||||||
|
"income": float(agent.state["build_payment"]),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
agent.bad_action=True
|
||||||
|
else:
|
||||||
|
raise ValueError
|
||||||
|
|
||||||
|
self.builds.append(build)
|
||||||
|
|
||||||
|
def generate_observations(self):
|
||||||
|
"""
|
||||||
|
See base_component.py for detailed description.
|
||||||
|
|
||||||
|
Here, agents observe their build skill. The planner does not observe anything
|
||||||
|
from this component.
|
||||||
|
"""
|
||||||
|
|
||||||
|
obs_dict = dict()
|
||||||
|
for agent in self.world.agents:
|
||||||
|
obs_dict[agent.idx] = {
|
||||||
|
"build_payment": agent.state["build_payment"] / self.payment,
|
||||||
|
"build_skill": self.sampled_skills[agent.idx],
|
||||||
|
}
|
||||||
|
|
||||||
|
return obs_dict
|
||||||
|
|
||||||
|
def generate_masks(self, completions=0):
|
||||||
|
"""
|
||||||
|
See base_component.py for detailed description.
|
||||||
|
|
||||||
|
Prevent building only if a landmark already occupies the agent's location.
|
||||||
|
"""
|
||||||
|
|
||||||
|
masks = {}
|
||||||
|
# Mobile agents' build action is masked if they cannot build with their
|
||||||
|
# current location and/or endowment
|
||||||
|
for agent in self.world.agents:
|
||||||
|
masks[agent.idx] = np.array([self.agent_can_build(agent)])
|
||||||
|
|
||||||
|
return masks
|
||||||
|
|
||||||
|
# For non-required customization
|
||||||
|
# ------------------------------
|
||||||
|
|
||||||
|
def get_metrics(self):
|
||||||
|
"""
|
||||||
|
Metrics that capture what happened through this component.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
metrics (dict): A dictionary of {"metric_name": metric_value},
|
||||||
|
where metric_value is a scalar.
|
||||||
|
"""
|
||||||
|
world = self.world
|
||||||
|
|
||||||
|
build_stats = {a.idx: {"n_builds": 0} for a in world.agents}
|
||||||
|
for builds in self.builds:
|
||||||
|
for build in builds:
|
||||||
|
idx = build["builder"]
|
||||||
|
build_stats[idx]["n_builds"] += 1
|
||||||
|
|
||||||
|
out_dict = {}
|
||||||
|
for a in world.agents:
|
||||||
|
for k, v in build_stats[a.idx].items():
|
||||||
|
out_dict["{}/{}".format(a.idx, k)] = v
|
||||||
|
|
||||||
|
num_houses = np.sum(world.maps.get("House") > 0)
|
||||||
|
out_dict["total_builds"] = num_houses
|
||||||
|
|
||||||
|
return out_dict
|
||||||
|
|
||||||
|
def additional_reset_steps(self):
|
||||||
|
"""
|
||||||
|
See base_component.py for detailed description.
|
||||||
|
|
||||||
|
Re-sample agents' building skills.
|
||||||
|
"""
|
||||||
|
world = self.world
|
||||||
|
|
||||||
|
self.sampled_skills = {agent.idx: 1 for agent in world.agents}
|
||||||
|
|
||||||
|
PMSM = self.payment_max_skill_multiplier
|
||||||
|
|
||||||
|
for agent in world.agents:
|
||||||
|
if self.skill_dist == "none":
|
||||||
|
sampled_skill = 1
|
||||||
|
pay_rate = 1
|
||||||
|
elif self.skill_dist == "pareto":
|
||||||
|
sampled_skill = np.random.pareto(4)
|
||||||
|
pay_rate = np.minimum(PMSM, (PMSM - 1) * sampled_skill + 1)
|
||||||
|
elif self.skill_dist == "lognormal":
|
||||||
|
sampled_skill = np.random.lognormal(-1, 0.5)
|
||||||
|
pay_rate = np.minimum(PMSM, (PMSM - 1) * sampled_skill + 1)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
agent.state["build_payment"] = float(pay_rate * self.payment)
|
||||||
|
agent.state["build_skill"] = float(sampled_skill)
|
||||||
|
|
||||||
|
self.sampled_skills[agent.idx] = sampled_skill
|
||||||
|
|
||||||
|
self.builds = []
|
||||||
|
|
||||||
|
def get_dense_log(self):
|
||||||
|
"""
|
||||||
|
Log builds.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
builds (list): A list of build events. Each entry corresponds to a single
|
||||||
|
timestep and contains a description of any builds that occurred on
|
||||||
|
that timestep.
|
||||||
|
|
||||||
|
"""
|
||||||
|
return self.builds
|
||||||
214
components/simple_gather.py
Normal file
214
components/simple_gather.py
Normal file
@@ -0,0 +1,214 @@
|
|||||||
|
# Copyright (c) 2020, salesforce.com, inc.
|
||||||
|
# All rights reserved.
|
||||||
|
# SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
# For full license text, see the LICENSE file in the repo root
|
||||||
|
# or https://opensource.org/licenses/BSD-3-Clause
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from numpy.random import rand
|
||||||
|
|
||||||
|
from ai_economist.foundation.base.base_component import (
|
||||||
|
BaseComponent,
|
||||||
|
component_registry,
|
||||||
|
)
|
||||||
|
from ai_economist.foundation.entities import resource_registry, resources
|
||||||
|
|
||||||
|
@component_registry.add
|
||||||
|
class SimpleGather(BaseComponent):
|
||||||
|
"""
|
||||||
|
Allows mobile agents to move around the world and collect resources and prevents
|
||||||
|
agents from moving to invalid locations.
|
||||||
|
Can be configured to include collection skill, where agents have heterogeneous
|
||||||
|
probabilities of collecting bonus resources without additional labor cost.
|
||||||
|
Args:
|
||||||
|
move_labor (float): Labor cost associated with movement. Must be >= 0.
|
||||||
|
Default is 1.0.
|
||||||
|
collect_labor (float): Labor cost associated with collecting resources. This
|
||||||
|
cost is added (in addition to any movement cost) when the agent lands on
|
||||||
|
a tile that is populated with resources (triggering collection).
|
||||||
|
Must be >= 0. Default is 1.0.
|
||||||
|
skill_dist (str): Distribution type for sampling skills. Default ("none")
|
||||||
|
gives all agents identical skill equal to a bonus prob of 0. "pareto" and
|
||||||
|
"lognormal" sample skills from the associated distributions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
name = "SimpleGather"
|
||||||
|
required_entities = ["Coin", "House", "Labor"]
|
||||||
|
agent_subclasses = ["BasicMobileAgent"]
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*base_component_args,
|
||||||
|
|
||||||
|
collect_labor=1.0,
|
||||||
|
|
||||||
|
skill_dist="none",
|
||||||
|
**base_component_kwargs
|
||||||
|
):
|
||||||
|
super().__init__(*base_component_args, **base_component_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
self.collect_labor = float(collect_labor)
|
||||||
|
assert self.collect_labor >= 0
|
||||||
|
|
||||||
|
self.skill_dist = skill_dist.lower()
|
||||||
|
assert self.skill_dist in ["none", "pareto", "lognormal"]
|
||||||
|
|
||||||
|
self.gathers = []
|
||||||
|
self.commodities = [
|
||||||
|
r for r in self.world.resources if resource_registry.get(r).collectible
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# Required methods for implementing components
|
||||||
|
# --------------------------------------------
|
||||||
|
|
||||||
|
def get_n_actions(self, agent_cls_name):
|
||||||
|
"""
|
||||||
|
See base_component.py for detailed description.
|
||||||
|
Adds 1 action per commodity that can be picked up.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if agent_cls_name == "BasicMobileAgent":
|
||||||
|
return len(self.commodities)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_additional_state_fields(self, agent_cls_name):
|
||||||
|
"""
|
||||||
|
See base_component.py for detailed description.
|
||||||
|
For mobile agents, add state field for collection skill.
|
||||||
|
"""
|
||||||
|
if agent_cls_name not in self.agent_subclasses:
|
||||||
|
return {}
|
||||||
|
if agent_cls_name == "BasicMobileAgent":
|
||||||
|
return {"bonus_gather_prob": 0.0}
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def component_step(self):
|
||||||
|
"""
|
||||||
|
See base_component.py for detailed description.
|
||||||
|
Pickup resources if available from env
|
||||||
|
"""
|
||||||
|
world = self.world
|
||||||
|
|
||||||
|
gathers = []
|
||||||
|
for agent in world.get_random_order_agents():
|
||||||
|
|
||||||
|
if self.name not in agent.action:
|
||||||
|
continue
|
||||||
|
resource_action = agent.get_component_action(
|
||||||
|
self.name
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if resource_action == 0: # NO-OP
|
||||||
|
continue
|
||||||
|
|
||||||
|
resource_action -=1 # Starting at 1
|
||||||
|
|
||||||
|
r=self.commodities[resource_action]
|
||||||
|
|
||||||
|
if self.get_num_resources(r)>0:
|
||||||
|
gather= self.pickup(r,agent)
|
||||||
|
gathers.append(gather)
|
||||||
|
|
||||||
|
else:
|
||||||
|
agent.bad_action=True
|
||||||
|
continue
|
||||||
|
|
||||||
|
self.gathers.append(gathers)
|
||||||
|
|
||||||
|
def generate_observations(self):
|
||||||
|
"""
|
||||||
|
See base_component.py for detailed description.
|
||||||
|
Here, agents observe their collection skill. The planner does not observe
|
||||||
|
anything from this component.
|
||||||
|
"""
|
||||||
|
num_agent=len(self.world.agents)
|
||||||
|
obs_avai={}
|
||||||
|
for r in self.commodities:
|
||||||
|
key="pickup_perc_{}".format(r)
|
||||||
|
pickProb=float(self.get_num_resources(r)/num_agent)
|
||||||
|
if pickProb>1:
|
||||||
|
pickProb=1
|
||||||
|
obs_avai[key]=pickProb
|
||||||
|
obs={}
|
||||||
|
|
||||||
|
for agent in self.world.agents:
|
||||||
|
obs[agent.idx]={}
|
||||||
|
obs[agent.idx]["bonus_gather_prob"]= agent.state["bonus_gather_prob"]
|
||||||
|
obs[agent.idx].update(obs_avai)
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def generate_masks(self, completions=0):
|
||||||
|
"""
|
||||||
|
See base_component.py for detailed description.
|
||||||
|
Prevent moving to adjacent tiles that are already occupied (or outside the
|
||||||
|
boundaries of the world)
|
||||||
|
"""
|
||||||
|
world = self.world
|
||||||
|
|
||||||
|
mask=[]
|
||||||
|
for r in self.commodities:
|
||||||
|
avail=0
|
||||||
|
if self.get_num_resources(r)>0:
|
||||||
|
avail=1
|
||||||
|
mask.append(avail)
|
||||||
|
|
||||||
|
masks = {}
|
||||||
|
|
||||||
|
for agent in world.agents:
|
||||||
|
masks[agent.idx]=mask
|
||||||
|
|
||||||
|
return masks
|
||||||
|
|
||||||
|
# For non-required customization
|
||||||
|
# ------------------------------
|
||||||
|
|
||||||
|
def additional_reset_steps(self):
|
||||||
|
"""
|
||||||
|
See base_component.py for detailed description.
|
||||||
|
Re-sample agents' collection skills.
|
||||||
|
"""
|
||||||
|
for agent in self.world.agents:
|
||||||
|
if self.skill_dist == "none":
|
||||||
|
bonus_rate = 0.0
|
||||||
|
elif self.skill_dist == "pareto":
|
||||||
|
bonus_rate = np.minimum(2, np.random.pareto(3)) / 2
|
||||||
|
elif self.skill_dist == "lognormal":
|
||||||
|
bonus_rate = np.minimum(2, np.random.lognormal(-2.022, 0.938)) / 2
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
agent.state["bonus_gather_prob"] = float(bonus_rate)
|
||||||
|
|
||||||
|
self.gathers = []
|
||||||
|
|
||||||
|
def get_dense_log(self):
|
||||||
|
"""
|
||||||
|
Log resource collections.
|
||||||
|
Returns:
|
||||||
|
gathers (list): A list of gather events. Each entry corresponds to a single
|
||||||
|
timestep and contains a description of any resource gathers that
|
||||||
|
occurred on that timestep.
|
||||||
|
"""
|
||||||
|
return self.gathers
|
||||||
|
|
||||||
|
# For Components
|
||||||
|
|
||||||
|
def get_num_resources(self, res: resources.Resource):
|
||||||
|
return self.world.maps.get_point(res,0,0)
|
||||||
|
|
||||||
|
def pickup(self, res: resources.Resource, agent ):
|
||||||
|
n_gathered = 1 + (rand() < agent.state["bonus_gather_prob"])
|
||||||
|
agent.state["inventory"][res] += n_gathered
|
||||||
|
agent.state["endogenous"]["Labor"] += self.collect_labor
|
||||||
|
self.world.consume_resource(res,0,0)
|
||||||
|
# Log the gather
|
||||||
|
return (
|
||||||
|
dict(
|
||||||
|
agent=agent.idx,
|
||||||
|
resource=res,
|
||||||
|
n=n_gathered,
|
||||||
|
)
|
||||||
|
)
|
||||||
227
envs/econ_wrapper.py
Normal file
227
envs/econ_wrapper.py
Normal file
@@ -0,0 +1,227 @@
|
|||||||
|
from collections import OrderedDict
|
||||||
|
from copy import deepcopy
|
||||||
|
from typing import Any, Callable, List, Optional, Sequence, Type, Union
|
||||||
|
from ai_economist.foundation.base import base_env
|
||||||
|
|
||||||
|
import gym
|
||||||
|
import gym.spaces
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from stable_baselines3.common.vec_env.base_vec_env import VecEnv, VecEnvIndices, VecEnvObs, VecEnvStepReturn
|
||||||
|
from stable_baselines3.common.vec_env.util import copy_obs_dict, dict_to_obs, obs_space_info
|
||||||
|
|
||||||
|
from ai_economist import foundation
|
||||||
|
|
||||||
|
class EconVecEnv(VecEnv, gym.Env):
|
||||||
|
"""
|
||||||
|
Creates a simple vectorized wrapper for multiple environments, calling each environment in sequence on the current
|
||||||
|
Python process. This is useful for computationally simple environment such as ``cartpole-v1``,
|
||||||
|
as the overhead of multiprocess or multithread outweighs the environment computation time.
|
||||||
|
This can also be used for RL methods that
|
||||||
|
require a vectorized environment, but that you want a single environments to train with.
|
||||||
|
|
||||||
|
:param env_fns: a list of functions
|
||||||
|
that return environments to vectorize
|
||||||
|
:raises ValueError: If the same environment instance is passed as the output of two or more different env_fn.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, env_config):
|
||||||
|
##init for init
|
||||||
|
self.config=env_config
|
||||||
|
env=foundation.make_env_instance(**env_config)
|
||||||
|
self.env = env
|
||||||
|
# build spaces
|
||||||
|
obs=env.reset()
|
||||||
|
actions=env.world.agents[0].action_spaces
|
||||||
|
obs1=obs["0"]
|
||||||
|
del obs1["action_mask"]
|
||||||
|
del obs1["time"]
|
||||||
|
self.observation_space=gym.spaces.Box(low=0,high=np.inf,shape=(len(obs1),),dtype=np.float32)
|
||||||
|
self.action_space=gym.spaces.Discrete(actions)
|
||||||
|
|
||||||
|
# count agents
|
||||||
|
self.num_envs=env.world.n_agents
|
||||||
|
|
||||||
|
VecEnv.__init__(self, self.num_envs, self.observation_space, action_space=self.action_space)
|
||||||
|
self.keys, shapes, dtypes = obs_space_info(self.observation_space)
|
||||||
|
|
||||||
|
self.buf_obs = OrderedDict([(k, np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k])) for k in self.keys])
|
||||||
|
self.buf_dones = np.zeros((self.num_envs,), dtype=bool)
|
||||||
|
self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32)
|
||||||
|
self.buf_infos = [{} for _ in range(self.num_envs)]
|
||||||
|
self.actions = None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def step_async(self, actions: np.ndarray) -> None:
|
||||||
|
self.actions = actions
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def step_wait(self) -> VecEnvStepReturn:
|
||||||
|
#convert to econ actions
|
||||||
|
r_action={}
|
||||||
|
for ai_idx in range(len(self.actions)):
|
||||||
|
r_action[str(ai_idx)]=self.actions[ai_idx]
|
||||||
|
|
||||||
|
|
||||||
|
obs,rew,done,info = self.env.step(r_action)
|
||||||
|
obs_g=self._convert_econ_obs_to_gym(obs)
|
||||||
|
rew_g=self._convert_econ_to_gym(rew)
|
||||||
|
info_g=self._convert_econ_to_gym(info)
|
||||||
|
#collect metrics
|
||||||
|
prev_metrics=self.metrics
|
||||||
|
self.metrics=self.env.scenario_metrics()
|
||||||
|
curr_prod=self.metrics["social/productivity"]
|
||||||
|
trend_pord=curr_prod-prev_metrics["social/productivity"]
|
||||||
|
|
||||||
|
for k in info_g:
|
||||||
|
k["social/productivity"]=curr_prod
|
||||||
|
k["trend/productivity"]=trend_pord
|
||||||
|
done_g=[False]*self.num_envs
|
||||||
|
done=(done["__all__"])
|
||||||
|
if done:
|
||||||
|
for i in range(self.num_envs):
|
||||||
|
done_g[i]=done
|
||||||
|
info_g[i]["terminal_observation"]=obs_g[i]
|
||||||
|
obs_g=self.reset()
|
||||||
|
|
||||||
|
|
||||||
|
return (np.copy(obs_g), np.copy(rew_g), np.copy(done_g), deepcopy(info_g))
|
||||||
|
# fix with malformed action tensor from sb3 predict method
|
||||||
|
def step_predict(self,actions):
|
||||||
|
return self.step(actions[0])
|
||||||
|
|
||||||
|
|
||||||
|
def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
|
||||||
|
if seed is None:
|
||||||
|
seed = np.random.randint(0, 2**32 - 1)
|
||||||
|
seeds = []
|
||||||
|
for idx, env in enumerate(self.envs):
|
||||||
|
seeds.append(env.seed(seed + idx))
|
||||||
|
return seeds
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def reset(self) -> VecEnvObs:
|
||||||
|
# env=foundation.make_env_instance(**self.config)
|
||||||
|
# self.env = env
|
||||||
|
obs = self.env.reset()
|
||||||
|
self.metrics=self.env.scenario_metrics()
|
||||||
|
obs_g=self._convert_econ_obs_to_gym(obs)
|
||||||
|
|
||||||
|
return obs_g
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
|
||||||
|
self.env.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_images(self) -> Sequence[np.ndarray]:
|
||||||
|
return [env.render(mode="rgb_array") for env in self.envs]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def render(self, mode: str = "human") -> Optional[np.ndarray]:
|
||||||
|
"""
|
||||||
|
Gym environment rendering. If there are multiple environments then
|
||||||
|
they are tiled together in one image via ``BaseVecEnv.render()``.
|
||||||
|
Otherwise (if ``self.num_envs == 1``), we pass the render call directly to the
|
||||||
|
underlying environment.
|
||||||
|
|
||||||
|
Therefore, some arguments such as ``mode`` will have values that are valid
|
||||||
|
only when ``num_envs == 1``.
|
||||||
|
|
||||||
|
:param mode: The rendering type.
|
||||||
|
"""
|
||||||
|
if self.num_envs == 1:
|
||||||
|
return self.envs[0].render(mode=mode)
|
||||||
|
else:
|
||||||
|
return super().render(mode=mode)
|
||||||
|
|
||||||
|
|
||||||
|
def _save_obs(self, env_idx: int, obs: VecEnvObs) -> None:
|
||||||
|
for key in self.keys:
|
||||||
|
if key is None:
|
||||||
|
self.buf_obs[key][env_idx] = obs
|
||||||
|
else:
|
||||||
|
self.buf_obs[key][env_idx] = obs[key]
|
||||||
|
|
||||||
|
def _obs_from_buf(self) -> VecEnvObs:
|
||||||
|
return dict_to_obs(self.observation_space, copy_obs_dict(self.buf_obs))
|
||||||
|
|
||||||
|
def get_attr(self, attr_name: str, indices: VecEnvIndices = None) -> List[Any]:
|
||||||
|
"""Return attribute from vectorized environment (see base class)."""
|
||||||
|
target_envs = self._get_target_envs(indices)
|
||||||
|
return [getattr(env_i, attr_name) for env_i in target_envs]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def set_attr(self, attr_name: str, value: Any, indices: VecEnvIndices = None) -> None:
|
||||||
|
"""Set attribute inside vectorized environments (see base class)."""
|
||||||
|
target_envs = self._get_target_envs(indices)
|
||||||
|
for env_i in target_envs:
|
||||||
|
setattr(env_i, attr_name, value)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def env_method(self, method_name: str, *method_args, indices: VecEnvIndices = None, **method_kwargs) -> List[Any]:
|
||||||
|
"""Call instance methods of vectorized environments."""
|
||||||
|
target_envs = self._get_target_envs(indices)
|
||||||
|
return [getattr(env_i, method_name)(*method_args, **method_kwargs) for env_i in target_envs]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def env_is_wrapped(self, wrapper_class: Type[gym.Wrapper], indices: VecEnvIndices = None) -> List[bool]:
|
||||||
|
"""Check if worker environments are wrapped with a given wrapper"""
|
||||||
|
target_envs = self._get_target_envs(indices)
|
||||||
|
# Import here to avoid a circular import
|
||||||
|
from stable_baselines3.common import env_util
|
||||||
|
|
||||||
|
return [env_util.is_wrapped(env_i, wrapper_class) for env_i in target_envs]
|
||||||
|
|
||||||
|
|
||||||
|
def _get_target_envs(self, indices: VecEnvIndices) -> List[gym.Env]:
|
||||||
|
indices = self._get_indices(indices)
|
||||||
|
return [self.envs[i] for i in indices]
|
||||||
|
|
||||||
|
# Convert econ to gym
|
||||||
|
def _convert_econ_to_gym(self, econ):
|
||||||
|
gy=[]
|
||||||
|
del econ["p"]
|
||||||
|
gy=[v for k,v in econ.items()]
|
||||||
|
return gy
|
||||||
|
def _convert_gym_to_acon(self, gy):
|
||||||
|
econ={}
|
||||||
|
for k,v in gy:
|
||||||
|
econ[k]=v
|
||||||
|
return econ
|
||||||
|
def _convert_econ_obs_to_gym(self, econ):
|
||||||
|
gy=[None] * self.num_envs
|
||||||
|
del econ["p"]
|
||||||
|
for k,v in econ.items():
|
||||||
|
|
||||||
|
del v["time"]
|
||||||
|
del v["action_mask"]
|
||||||
|
out=self.extract_dict(v)
|
||||||
|
|
||||||
|
agent_obs=np.array(out)
|
||||||
|
|
||||||
|
gy[int(k)]=agent_obs
|
||||||
|
return np.stack(gy)
|
||||||
|
|
||||||
|
def extract_dict(self,obj):
|
||||||
|
output=[]
|
||||||
|
use_key=isinstance(obj,dict)
|
||||||
|
for v in obj:
|
||||||
|
if use_key:
|
||||||
|
v=obj[v]
|
||||||
|
if isinstance(v,dict):
|
||||||
|
temp=self.extract_dict(v)
|
||||||
|
output.append(temp)
|
||||||
|
else:
|
||||||
|
output.append(v)
|
||||||
|
return output
|
||||||
472
envs/simple_market.py
Normal file
472
envs/simple_market.py
Normal file
@@ -0,0 +1,472 @@
|
|||||||
|
# Copyright (c) 2020, salesforce.com, inc.
|
||||||
|
# All rights reserved.
|
||||||
|
# SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
# For full license text, see the LICENSE file in the repo root
|
||||||
|
# or https://opensource.org/licenses/BSD-3-Clause
|
||||||
|
|
||||||
|
from copy import deepcopy
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import signal
|
||||||
|
|
||||||
|
from ai_economist.foundation.base.base_env import BaseEnvironment, scenario_registry
|
||||||
|
from ai_economist.foundation.scenarios.utils import rewards, social_metrics
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
|
||||||
|
@scenario_registry.add
|
||||||
|
class SimpleMarket(BaseEnvironment):
|
||||||
|
"""
|
||||||
|
World containing stone and wood with stochastic regeneration. Refers to a fixed
|
||||||
|
layout file (see ./map_txt/ for examples) to determine the spatial arrangement of
|
||||||
|
stone, wood, and water tiles.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
planner_gets_spatial_obs (bool): Whether the planner agent receives spatial
|
||||||
|
observations from the world.
|
||||||
|
full_observability (bool): Whether the mobile agents' spatial observation
|
||||||
|
includes the full world view or is instead an egocentric view.
|
||||||
|
mobile_agent_observation_range (int): If not using full_observability,
|
||||||
|
the spatial range (on each side of the agent) that is visible in the
|
||||||
|
spatial observations.
|
||||||
|
env_layout_file (str): Name of the layout file in ./map_txt/ to use.
|
||||||
|
Note: The world dimensions of that layout must match the world dimensions
|
||||||
|
argument used to construct the environment.
|
||||||
|
resource_regen_prob (float): Probability that an empty source tile will
|
||||||
|
regenerate a new resource unit.
|
||||||
|
fixed_four_skill_and_loc (bool): Whether to use a fixed set of build skills and
|
||||||
|
starting locations, with agents grouped into starting locations based on
|
||||||
|
which skill quartile they are in. False, by default.
|
||||||
|
True, for experiments in https://arxiv.org/abs/2004.13332.
|
||||||
|
Note: Requires that the environment uses the "Build" component with
|
||||||
|
skill_dist="pareto".
|
||||||
|
starting_agent_coin (int, float): Amount of coin agents have at t=0. Defaults
|
||||||
|
to zero coin.
|
||||||
|
isoelastic_eta (float): Parameter controlling the shape of agent utility
|
||||||
|
wrt coin endowment.
|
||||||
|
energy_cost (float): Coefficient for converting labor to negative utility.
|
||||||
|
energy_warmup_constant (float): Decay constant that controls the rate at which
|
||||||
|
the effective energy cost is annealed from 0 to energy_cost. Set to 0
|
||||||
|
(default) to disable annealing, meaning that the effective energy cost is
|
||||||
|
always energy_cost. The units of the decay constant depend on the choice of
|
||||||
|
energy_warmup_method.
|
||||||
|
energy_warmup_method (str): How to schedule energy annealing (warmup). If
|
||||||
|
"decay" (default), use the number of completed episodes. If "auto",
|
||||||
|
use the number of timesteps where the average agent reward was positive.
|
||||||
|
planner_reward_type (str): The type of reward used for the planner. Options
|
||||||
|
are "coin_eq_times_productivity" (default),
|
||||||
|
"inv_income_weighted_coin_endowment", and "inv_income_weighted_utility".
|
||||||
|
mixing_weight_gini_vs_coin (float): Degree to which equality is ignored w/
|
||||||
|
"coin_eq_times_productivity". Default is 0, which weights equality and
|
||||||
|
productivity equally. If set to 1, only productivity is rewarded.
|
||||||
|
"""
|
||||||
|
|
||||||
|
name = "simple_market"
|
||||||
|
agent_subclasses = ["BasicMobileAgent"]
|
||||||
|
required_entities = ["Wood", "Stone", "Water"]
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*base_env_args,
|
||||||
|
resource_regen_prob=0.01,
|
||||||
|
fixed_four_skill_and_loc=False,
|
||||||
|
starting_agent_coin=0,
|
||||||
|
isoelastic_eta=0.23,
|
||||||
|
energy_cost=0.21,
|
||||||
|
energy_warmup_constant=0,
|
||||||
|
energy_warmup_method="decay",
|
||||||
|
planner_reward_type="coin_eq_times_productivity",
|
||||||
|
mixing_weight_gini_vs_coin=0.0,
|
||||||
|
**base_env_kwargs,
|
||||||
|
):
|
||||||
|
super().__init__(*base_env_args, **base_env_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
self.layout_specs = dict(
|
||||||
|
Wood={
|
||||||
|
"regen_weight": float(resource_regen_prob),
|
||||||
|
"regen_halfwidth": 0,
|
||||||
|
"max_health": 1,
|
||||||
|
},
|
||||||
|
Stone={
|
||||||
|
"regen_weight": float(resource_regen_prob),
|
||||||
|
"regen_halfwidth": 0,
|
||||||
|
"max_health": 1,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert 0 <= self.layout_specs["Wood"]["regen_weight"] <= 1
|
||||||
|
assert 0 <= self.layout_specs["Stone"]["regen_weight"] <= 1
|
||||||
|
|
||||||
|
# How much coin do agents begin with at upon reset
|
||||||
|
self.starting_agent_coin = float(starting_agent_coin)
|
||||||
|
assert self.starting_agent_coin >= 0.0
|
||||||
|
|
||||||
|
# Controls the diminishing marginal utility of coin.
|
||||||
|
# isoelastic_eta=0 means no diminishing utility.
|
||||||
|
self.isoelastic_eta = float(isoelastic_eta)
|
||||||
|
assert 0.0 <= self.isoelastic_eta <= 1.0
|
||||||
|
|
||||||
|
# The amount that labor is weighted in utility computation
|
||||||
|
# (once annealing is finished)
|
||||||
|
self.energy_cost = float(energy_cost)
|
||||||
|
assert self.energy_cost >= 0
|
||||||
|
|
||||||
|
# Which method to use for calculating the progress of energy annealing
|
||||||
|
# If method = 'decay': #completed episodes
|
||||||
|
# If method = 'auto' : #timesteps where avg. agent reward > 0
|
||||||
|
self.energy_warmup_method = energy_warmup_method.lower()
|
||||||
|
assert self.energy_warmup_method in ["decay", "auto"]
|
||||||
|
# Decay constant for annealing to full energy cost
|
||||||
|
# (if energy_warmup_constant == 0, there is no annealing)
|
||||||
|
self.energy_warmup_constant = float(energy_warmup_constant)
|
||||||
|
assert self.energy_warmup_constant >= 0
|
||||||
|
self._auto_warmup_integrator = 0
|
||||||
|
|
||||||
|
# Which social welfare function to use
|
||||||
|
self.planner_reward_type = str(planner_reward_type).lower()
|
||||||
|
|
||||||
|
# How much to weight equality if using SWF=eq*prod:
|
||||||
|
# 0 -> SWF=eq * prod
|
||||||
|
# 1 -> SWF=prod
|
||||||
|
self.mixing_weight_gini_vs_coin = float(mixing_weight_gini_vs_coin)
|
||||||
|
assert 0 <= self.mixing_weight_gini_vs_coin <= 1.0
|
||||||
|
|
||||||
|
# Use this to calculate marginal changes and deliver that as reward
|
||||||
|
self.init_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
|
||||||
|
self.prev_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
|
||||||
|
self.curr_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
|
||||||
|
|
||||||
|
"""
|
||||||
|
Fixed Four Skill and Loc
|
||||||
|
------------------------
|
||||||
|
"""
|
||||||
|
self.agent_starting_pos = {agent.idx: [] for agent in self.world.agents}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
self.last_log_loged={}
|
||||||
|
|
||||||
|
|
||||||
|
@property
|
||||||
|
def energy_weight(self):
|
||||||
|
"""
|
||||||
|
Energy annealing progress. Multiply with self.energy_cost to get the
|
||||||
|
effective energy coefficient.
|
||||||
|
"""
|
||||||
|
if self.energy_warmup_constant <= 0.0:
|
||||||
|
return 1.0
|
||||||
|
|
||||||
|
if self.energy_warmup_method == "decay":
|
||||||
|
return float(1.0 - np.exp(-self._completions / self.energy_warmup_constant))
|
||||||
|
|
||||||
|
if self.energy_warmup_method == "auto":
|
||||||
|
return float(
|
||||||
|
1.0
|
||||||
|
- np.exp(-self._auto_warmup_integrator / self.energy_warmup_constant)
|
||||||
|
)
|
||||||
|
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def is_bad_action(self,agent):
|
||||||
|
bad=agent.bad_action
|
||||||
|
agent.bad_action=False
|
||||||
|
return bad
|
||||||
|
def get_current_optimization_metrics(self):
|
||||||
|
"""
|
||||||
|
Compute optimization metrics based on the current state. Used to compute reward.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
curr_optimization_metric (dict): A dictionary of {agent.idx: metric}
|
||||||
|
with an entry for each agent (including the planner) in the env.
|
||||||
|
"""
|
||||||
|
curr_optimization_metric = {}
|
||||||
|
# (for agents)
|
||||||
|
for agent in self.world.agents:
|
||||||
|
|
||||||
|
rew= rewards.isoelastic_coin_minus_labor(
|
||||||
|
coin_endowment=agent.total_endowment("Coin"),
|
||||||
|
total_labor=agent.state["endogenous"]["Labor"],
|
||||||
|
isoelastic_eta=self.isoelastic_eta,
|
||||||
|
labor_coefficient=self.energy_weight * self.energy_cost,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#rew-=agent.state["endogenous"]["noops"]
|
||||||
|
curr_optimization_metric[agent.idx] = rew
|
||||||
|
# (for the planner)
|
||||||
|
if self.planner_reward_type == "coin_eq_times_productivity":
|
||||||
|
curr_optimization_metric[
|
||||||
|
self.world.planner.idx
|
||||||
|
] = rewards.coin_eq_times_productivity(
|
||||||
|
coin_endowments=np.array(
|
||||||
|
[agent.total_endowment("Coin") for agent in self.world.agents]
|
||||||
|
),
|
||||||
|
equality_weight=1 - self.mixing_weight_gini_vs_coin,
|
||||||
|
)
|
||||||
|
elif self.planner_reward_type == "inv_income_weighted_coin_endowments":
|
||||||
|
curr_optimization_metric[
|
||||||
|
self.world.planner.idx
|
||||||
|
] = rewards.inv_income_weighted_coin_endowments(
|
||||||
|
coin_endowments=np.array(
|
||||||
|
[agent.total_endowment("Coin") for agent in self.world.agents]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
elif self.planner_reward_type == "inv_income_weighted_utility":
|
||||||
|
curr_optimization_metric[
|
||||||
|
self.world.planner.idx
|
||||||
|
] = rewards.inv_income_weighted_utility(
|
||||||
|
coin_endowments=np.array(
|
||||||
|
[agent.total_endowment("Coin") for agent in self.world.agents]
|
||||||
|
),
|
||||||
|
utilities=np.array(
|
||||||
|
[curr_optimization_metric[agent.idx] for agent in self.world.agents]
|
||||||
|
),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print("No valid planner reward selected!")
|
||||||
|
raise NotImplementedError
|
||||||
|
return curr_optimization_metric
|
||||||
|
|
||||||
|
# The following methods must be implemented for each scenario
|
||||||
|
# -----------------------------------------------------------
|
||||||
|
|
||||||
|
def reset_starting_layout(self):
|
||||||
|
"""
|
||||||
|
Part 1/2 of scenario reset. This method handles resetting the state of the
|
||||||
|
environment managed by the scenario (i.e. resource & landmark layout).
|
||||||
|
|
||||||
|
Here, reset to the layout in the fixed layout file
|
||||||
|
"""
|
||||||
|
self.world.maps.clear()
|
||||||
|
|
||||||
|
resources = ["Wood", "Stone"]
|
||||||
|
|
||||||
|
for resource in resources:
|
||||||
|
self.world.maps.set_point_add(resource,0,0,1)
|
||||||
|
|
||||||
|
def reset_agent_states(self):
|
||||||
|
"""
|
||||||
|
Part 2/2 of scenario reset. This method handles resetting the state of the
|
||||||
|
agents themselves (i.e. inventory, locations, etc.).
|
||||||
|
|
||||||
|
Here, empty inventories and place mobile agents in random, accessible
|
||||||
|
locations to start. Note: If using fixed_four_skill_and_loc, the starting
|
||||||
|
locations will be overridden in self.additional_reset_steps.
|
||||||
|
"""
|
||||||
|
self.world.clear_agent_locs()
|
||||||
|
for agent in self.world.agents:
|
||||||
|
agent.state["inventory"] = {k: 0 for k in agent.inventory.keys()}
|
||||||
|
agent.state["escrow"] = {k: 0 for k in agent.inventory.keys()}
|
||||||
|
agent.state["endogenous"] = {k: 0 for k in agent.endogenous.keys()}
|
||||||
|
# Add starting coin
|
||||||
|
agent.state["inventory"]["Coin"] = float(self.starting_agent_coin)
|
||||||
|
agent.bad_action=False
|
||||||
|
|
||||||
|
self.world.planner.state["inventory"] = {
|
||||||
|
k: 0 for k in self.world.planner.inventory.keys()
|
||||||
|
}
|
||||||
|
self.world.planner.state["escrow"] = {
|
||||||
|
k: 0 for k in self.world.planner.escrow.keys()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def scenario_step(self):
|
||||||
|
"""
|
||||||
|
Update the state of the world according to whatever rules this scenario
|
||||||
|
implements.
|
||||||
|
|
||||||
|
This gets called in the 'step' method (of base_env) after going through each
|
||||||
|
component step and before generating observations, rewards, etc.
|
||||||
|
|
||||||
|
In this class of scenarios, the scenario step handles stochastic resource
|
||||||
|
regeneration.
|
||||||
|
"""
|
||||||
|
|
||||||
|
resources = ["Wood", "Stone"]
|
||||||
|
|
||||||
|
for resource in resources:
|
||||||
|
self.world.maps.set_point_add(resource,0,0,20)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_observations(self):
|
||||||
|
"""
|
||||||
|
Generate observations associated with this scenario.
|
||||||
|
|
||||||
|
A scenario does not need to produce observations and can provide observations
|
||||||
|
for only some agent types; however, for a given agent type, it should either
|
||||||
|
always or never yield an observation. If it does yield an observation,
|
||||||
|
that observation should always have the same structure/sizes!
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
obs (dict): A dictionary of {agent.idx: agent_obs_dict}. In words,
|
||||||
|
return a dictionary with an entry for each agent (which can including
|
||||||
|
the planner) for which this scenario provides an observation. For each
|
||||||
|
entry, the key specifies the index of the agent and the value contains
|
||||||
|
its associated observation dictionary.
|
||||||
|
|
||||||
|
Here, non-planner agents receive spatial observations (depending on the env
|
||||||
|
config) as well as the contents of their inventory and endogenous quantities.
|
||||||
|
The planner also receives spatial observations (again, depending on the env
|
||||||
|
config) as well as the inventory of each of the mobile agents.
|
||||||
|
"""
|
||||||
|
obs = {}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
agent_invs = {
|
||||||
|
str(agent.idx): {
|
||||||
|
"inventory-" + k: v * self.inv_scale for k, v in agent.inventory.items()
|
||||||
|
}
|
||||||
|
for agent in self.world.agents
|
||||||
|
}
|
||||||
|
|
||||||
|
obs[self.world.planner.idx] = {
|
||||||
|
"inventory-" + k: v * self.inv_scale
|
||||||
|
for k, v in self.world.planner.inventory.items()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
for agent in self.world.agents:
|
||||||
|
sidx = str(agent.idx)
|
||||||
|
obs[sidx]=agent_invs[sidx]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def compute_reward(self):
|
||||||
|
"""
|
||||||
|
Apply the reward function(s) associated with this scenario to get the rewards
|
||||||
|
from this step.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
rew (dict): A dictionary of {agent.idx: agent_obs_dict}. In words,
|
||||||
|
return a dictionary with an entry for each agent in the environment
|
||||||
|
(including the planner). For each entry, the key specifies the index of
|
||||||
|
the agent and the value contains the scalar reward earned this timestep.
|
||||||
|
|
||||||
|
Rewards are computed as the marginal utility (agents) or marginal social
|
||||||
|
welfare (planner) experienced on this timestep. Ignoring discounting,
|
||||||
|
this means that agents' (planner's) objective is to maximize the utility
|
||||||
|
(social welfare) associated with the terminal state of the episode.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# "curr_optimization_metric" hasn't been updated yet, so it gives us the
|
||||||
|
# utility from the last step.
|
||||||
|
utility_at_end_of_last_time_step = deepcopy(self.curr_optimization_metric)
|
||||||
|
|
||||||
|
# compute current objectives and store the values
|
||||||
|
self.curr_optimization_metric = self.get_current_optimization_metrics()
|
||||||
|
|
||||||
|
# reward = curr - prev objectives
|
||||||
|
rew={}
|
||||||
|
for k, v in self.curr_optimization_metric.items():
|
||||||
|
rew[k] = float(v - utility_at_end_of_last_time_step[k])
|
||||||
|
if k!="p":
|
||||||
|
if self.is_bad_action(self.world.agents[k]):
|
||||||
|
rew[k]-=1
|
||||||
|
|
||||||
|
# store the previous objective values
|
||||||
|
self.prev_optimization_metric.update(utility_at_end_of_last_time_step)
|
||||||
|
|
||||||
|
# Automatic Energy Cost Annealing
|
||||||
|
# -------------------------------
|
||||||
|
avg_agent_rew = np.mean([rew[a.idx] for a in self.world.agents])
|
||||||
|
# Count the number of timesteps where the avg agent reward was > 0
|
||||||
|
if avg_agent_rew > 0:
|
||||||
|
self._auto_warmup_integrator += 1
|
||||||
|
|
||||||
|
return rew
|
||||||
|
|
||||||
|
# Optional methods for customization
|
||||||
|
# ----------------------------------
|
||||||
|
|
||||||
|
def additional_reset_steps(self):
|
||||||
|
"""
|
||||||
|
Extra scenario-specific steps that should be performed at the end of the reset
|
||||||
|
cycle.
|
||||||
|
|
||||||
|
For each reset cycle...
|
||||||
|
First, reset_starting_layout() and reset_agent_states() will be called.
|
||||||
|
|
||||||
|
Second, <component>.reset() will be called for each registered component.
|
||||||
|
|
||||||
|
Lastly, this method will be called to allow for any final customization of
|
||||||
|
the reset cycle.
|
||||||
|
|
||||||
|
For this scenario, this method resets optimization metric trackers. If using
|
||||||
|
fixed_four_skill_and_loc, this is where each agent gets assigned to one of
|
||||||
|
the four fixed skill/loc combinations. The agent-->skill/loc assignment is
|
||||||
|
permuted so that all four skill/loc combinations are used.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# compute current objectives
|
||||||
|
curr_optimization_metric = self.get_current_optimization_metrics()
|
||||||
|
|
||||||
|
self.curr_optimization_metric = deepcopy(curr_optimization_metric)
|
||||||
|
self.init_optimization_metric = deepcopy(curr_optimization_metric)
|
||||||
|
self.prev_optimization_metric = deepcopy(curr_optimization_metric)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def scenario_metrics(self):
|
||||||
|
"""
|
||||||
|
Allows the scenario to generate metrics (collected along with component metrics
|
||||||
|
in the 'metrics' property).
|
||||||
|
|
||||||
|
To have the scenario add metrics, this function needs to return a dictionary of
|
||||||
|
{metric_key: value} where 'value' is a scalar (no nesting or lists!)
|
||||||
|
|
||||||
|
Here, summarize social metrics, endowments, utilities, and labor cost annealing.
|
||||||
|
"""
|
||||||
|
metrics = dict()
|
||||||
|
|
||||||
|
coin_endowments = np.array(
|
||||||
|
[agent.total_endowment("Coin") for agent in self.world.agents]
|
||||||
|
)
|
||||||
|
metrics["social/productivity"] = social_metrics.get_productivity(
|
||||||
|
coin_endowments
|
||||||
|
)
|
||||||
|
metrics["social/equality"] = social_metrics.get_equality(coin_endowments)
|
||||||
|
|
||||||
|
utilities = np.array(
|
||||||
|
[self.curr_optimization_metric[agent.idx] for agent in self.world.agents]
|
||||||
|
)
|
||||||
|
metrics[
|
||||||
|
"social_welfare/coin_eq_times_productivity"
|
||||||
|
] = rewards.coin_eq_times_productivity(
|
||||||
|
coin_endowments=coin_endowments, equality_weight=1.0
|
||||||
|
)
|
||||||
|
metrics[
|
||||||
|
"social_welfare/inv_income_weighted_coin_endow"
|
||||||
|
] = rewards.inv_income_weighted_coin_endowments(coin_endowments=coin_endowments)
|
||||||
|
metrics[
|
||||||
|
"social_welfare/inv_income_weighted_utility"
|
||||||
|
] = rewards.inv_income_weighted_utility(
|
||||||
|
coin_endowments=coin_endowments, utilities=utilities
|
||||||
|
)
|
||||||
|
|
||||||
|
for agent in self.all_agents:
|
||||||
|
for resource, quantity in agent.inventory.items():
|
||||||
|
metrics[
|
||||||
|
"endow/{}/{}".format(agent.idx, resource)
|
||||||
|
] = agent.total_endowment(resource)
|
||||||
|
|
||||||
|
if agent.endogenous is not None:
|
||||||
|
for resource, quantity in agent.endogenous.items():
|
||||||
|
metrics["endogenous/{}/{}".format(agent.idx, resource)] = quantity
|
||||||
|
|
||||||
|
metrics["util/{}".format(agent.idx)] = self.curr_optimization_metric[
|
||||||
|
agent.idx
|
||||||
|
]
|
||||||
|
|
||||||
|
# Labor weight
|
||||||
|
metrics["labor/weighted_cost"] = self.energy_cost * self.energy_weight
|
||||||
|
metrics["labor/warmup_integrator"] = int(self._auto_warmup_integrator)
|
||||||
|
|
||||||
|
return metrics
|
||||||
|
|
||||||
283
main working way to good.pys
Normal file
283
main working way to good.pys
Normal file
@@ -0,0 +1,283 @@
|
|||||||
|
from ai_economist import foundation
|
||||||
|
import numpy as np
|
||||||
|
from stable_baselines3.common.vec_env import vec_frame_stack
|
||||||
|
from stable_baselines3.common.evaluation import evaluate_policy
|
||||||
|
import envs
|
||||||
|
from tqdm import tqdm
|
||||||
|
import components
|
||||||
|
from stable_baselines3.common.env_checker import check_env
|
||||||
|
from stable_baselines3 import PPO
|
||||||
|
from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
|
||||||
|
from stable_baselines3.common.vec_env.vec_normalize import VecNormalize
|
||||||
|
from sb3_contrib import RecurrentPPO
|
||||||
|
from envs.econ_wrapper import EconVecEnv
|
||||||
|
from stable_baselines3.common.callbacks import BaseCallback
|
||||||
|
import yaml
|
||||||
|
import time
|
||||||
|
|
||||||
|
env_config = {
|
||||||
|
# ===== SCENARIO CLASS =====
|
||||||
|
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
|
||||||
|
# The environment object will be an instance of the Scenario class.
|
||||||
|
'scenario_name': 'simple_market',
|
||||||
|
|
||||||
|
# ===== COMPONENTS =====
|
||||||
|
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
|
||||||
|
# "component_name" refers to the Component class's name in the Component Registry (foundation.components)
|
||||||
|
# {component_kwargs} is a dictionary of kwargs passed to the Component class
|
||||||
|
# The order in which components reset, step, and generate obs follows their listed order below.
|
||||||
|
'components': [
|
||||||
|
# (1) Building houses
|
||||||
|
('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
|
||||||
|
# (2) Trading collectible resources
|
||||||
|
#('ContinuousDoubleAuction', {'max_num_orders': 10}),
|
||||||
|
# (3) Movement and resource collection
|
||||||
|
('SimpleGather', {}),
|
||||||
|
],
|
||||||
|
|
||||||
|
# ===== SCENARIO CLASS ARGUMENTS =====
|
||||||
|
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
|
||||||
|
|
||||||
|
'starting_agent_coin': 0,
|
||||||
|
'fixed_four_skill_and_loc': True,
|
||||||
|
|
||||||
|
# ===== STANDARD ARGUMENTS ======
|
||||||
|
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
|
||||||
|
'n_agents': 20, # Number of non-planner agents (must be > 1)
|
||||||
|
'world_size': [1, 1], # [Height, Width] of the env world
|
||||||
|
'episode_length': 256, # Number of timesteps per episode
|
||||||
|
'allow_observation_scaling': True,
|
||||||
|
'dense_log_frequency': 100,
|
||||||
|
'world_dense_log_frequency':1,
|
||||||
|
'energy_cost':0,
|
||||||
|
'energy_warmup_method': "auto",
|
||||||
|
'energy_warmup_constant': 0,
|
||||||
|
|
||||||
|
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
|
||||||
|
# Otherwise, the policy selects only 1 action.
|
||||||
|
'multi_action_mode_agents': False,
|
||||||
|
'multi_action_mode_planner': False,
|
||||||
|
|
||||||
|
# When flattening observations, concatenate scalar & vector observations before output.
|
||||||
|
# Otherwise, return observations with minimal processing.
|
||||||
|
'flatten_observations': False,
|
||||||
|
# When Flattening masks, concatenate each action subspace mask into a single array.
|
||||||
|
# Note: flatten_masks = True is required for masking action logits in the code below.
|
||||||
|
'flatten_masks': False,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
eval_env_config = {
|
||||||
|
# ===== SCENARIO CLASS =====
|
||||||
|
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
|
||||||
|
# The environment object will be an instance of the Scenario class.
|
||||||
|
'scenario_name': 'simple_market',
|
||||||
|
|
||||||
|
# ===== COMPONENTS =====
|
||||||
|
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
|
||||||
|
# "component_name" refers to the Component class's name in the Component Registry (foundation.components)
|
||||||
|
# {component_kwargs} is a dictionary of kwargs passed to the Component class
|
||||||
|
# The order in which components reset, step, and generate obs follows their listed order below.
|
||||||
|
'components': [
|
||||||
|
# (1) Building houses
|
||||||
|
('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
|
||||||
|
# (2) Trading collectible resources
|
||||||
|
#('ContinuousDoubleAuction', {'max_num_orders': 10}),
|
||||||
|
# (3) Movement and resource collection
|
||||||
|
('SimpleGather', {}),
|
||||||
|
],
|
||||||
|
|
||||||
|
# ===== SCENARIO CLASS ARGUMENTS =====
|
||||||
|
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
|
||||||
|
|
||||||
|
'starting_agent_coin': 0,
|
||||||
|
'fixed_four_skill_and_loc': True,
|
||||||
|
|
||||||
|
# ===== STANDARD ARGUMENTS ======
|
||||||
|
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
|
||||||
|
'n_agents': 20, # Number of non-planner agents (must be > 1)
|
||||||
|
'world_size': [1, 1], # [Height, Width] of the env world
|
||||||
|
'episode_length': 100, # Number of timesteps per episode
|
||||||
|
'allow_observation_scaling': True,
|
||||||
|
'dense_log_frequency': 10,
|
||||||
|
'world_dense_log_frequency':1,
|
||||||
|
'energy_cost':0,
|
||||||
|
'energy_warmup_method': "auto",
|
||||||
|
'energy_warmup_constant': 0,
|
||||||
|
|
||||||
|
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
|
||||||
|
# Otherwise, the policy selects only 1 action.
|
||||||
|
'multi_action_mode_agents': False,
|
||||||
|
'multi_action_mode_planner': False,
|
||||||
|
|
||||||
|
# When flattening observations, concatenate scalar & vector observations before output.
|
||||||
|
# Otherwise, return observations with minimal processing.
|
||||||
|
'flatten_observations': False,
|
||||||
|
# When Flattening masks, concatenate each action subspace mask into a single array.
|
||||||
|
# Note: flatten_masks = True is required for masking action logits in the code below.
|
||||||
|
'flatten_masks': False,
|
||||||
|
}
|
||||||
|
|
||||||
|
num_frames=2
|
||||||
|
|
||||||
|
class TensorboardCallback(BaseCallback):
|
||||||
|
"""
|
||||||
|
Custom callback for plotting additional values in tensorboard.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,econ, verbose=0):
|
||||||
|
super().__init__(verbose)
|
||||||
|
self.econ=econ
|
||||||
|
self.metrics=econ.scenario_metrics()
|
||||||
|
def _on_step(self) -> bool:
|
||||||
|
# Log scalar value (here a random variable)
|
||||||
|
prev_metrics=self.metrics
|
||||||
|
if self.econ.previous_episode_metrics is None:
|
||||||
|
self.metrics=self.econ.scenario_metrics()
|
||||||
|
else:
|
||||||
|
self.metrics=self.econ.previous_episode_metrics
|
||||||
|
curr_prod=self.metrics["social/productivity"]
|
||||||
|
trend_pord=curr_prod-prev_metrics["social/productivity"]
|
||||||
|
self.logger.record("social/total_productivity", curr_prod)
|
||||||
|
self.logger.record("social/delta_productivity", trend_pord)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def sample_random_action(agent, mask):
|
||||||
|
"""Sample random UNMASKED action(s) for agent."""
|
||||||
|
# Return a list of actions: 1 for each action subspace
|
||||||
|
if agent.multi_action_mode:
|
||||||
|
split_masks = np.split(mask, agent.action_spaces.cumsum()[:-1])
|
||||||
|
return [np.random.choice(np.arange(len(m_)), p=m_/m_.sum()) for m_ in split_masks]
|
||||||
|
|
||||||
|
# Return a single action
|
||||||
|
else:
|
||||||
|
return np.random.choice(np.arange(agent.action_spaces), p=mask/mask.sum())
|
||||||
|
|
||||||
|
def sample_random_actions(env, obs):
|
||||||
|
"""Samples random UNMASKED actions for each agent in obs."""
|
||||||
|
|
||||||
|
actions = {
|
||||||
|
a_idx: 0
|
||||||
|
for a_idx in range( len(obs))
|
||||||
|
}
|
||||||
|
|
||||||
|
return actions
|
||||||
|
|
||||||
|
def printMarket(market):
|
||||||
|
for i in range(len(market)):
|
||||||
|
step=market[i]
|
||||||
|
if len(step)>0:
|
||||||
|
print("=== Step {} ===".format(i))
|
||||||
|
for transaction in step:
|
||||||
|
t=transaction
|
||||||
|
transstring = "({}) {} -> {} | [{}/{}] {} Coins\n".format(t["commodity"],t["seller"],t["buyer"],t["ask"],t["bid"],t["price"])
|
||||||
|
print(transstring)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def printBuilds(builds):
|
||||||
|
for i in range(len(builds)):
|
||||||
|
step=builds[i]
|
||||||
|
if len(step)>0:
|
||||||
|
for build in step:
|
||||||
|
t=build
|
||||||
|
transstring = "({}) Builder: {}, Skill: {}, Income {} ".format(i,t["builder"],t["build_skill"],t["income"])
|
||||||
|
print(transstring)
|
||||||
|
return ""
|
||||||
|
def printReplay(econ,agentid):
|
||||||
|
worldmaps=["Stone","Wood"]
|
||||||
|
|
||||||
|
log=econ.previous_episode_dense_log
|
||||||
|
agent=econ.world.agents[agentid]
|
||||||
|
|
||||||
|
agentid=str(agentid)
|
||||||
|
maxsetp=len(log["states"])-1
|
||||||
|
|
||||||
|
for step in range(maxsetp):
|
||||||
|
print()
|
||||||
|
print("=== Step {} ===".format(step))
|
||||||
|
# state
|
||||||
|
print("--- World ---")
|
||||||
|
world=log['world'][step]
|
||||||
|
for res in worldmaps:
|
||||||
|
print("{}: {}".format(res,world[res][0][0]))
|
||||||
|
print("--- State ---")
|
||||||
|
state=log['states'][step][agentid]
|
||||||
|
|
||||||
|
print(yaml.dump(state))
|
||||||
|
print("--- Action ---")
|
||||||
|
action=log["actions"][step][agentid]
|
||||||
|
|
||||||
|
|
||||||
|
if action=={}:
|
||||||
|
print("Action: 0 -> NOOP")
|
||||||
|
else:
|
||||||
|
for k in action:
|
||||||
|
formats="Action: {}({})".format(k,action[k])
|
||||||
|
print(formats)
|
||||||
|
print("--- Reward ---")
|
||||||
|
reward=log["rewards"][step][agentid]
|
||||||
|
print("Reward: {}".format(reward))
|
||||||
|
|
||||||
|
#Setup Env Objects
|
||||||
|
|
||||||
|
vecenv=EconVecEnv(env_config=env_config)
|
||||||
|
econ=vecenv.env
|
||||||
|
monenv=VecMonitor(venv=vecenv,info_keywords=["social/productivity","trend/productivity"])
|
||||||
|
normenv=VecNormalize(monenv,norm_reward=False,clip_obs=1)
|
||||||
|
stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10)
|
||||||
|
obs=stackenv.reset()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
runname="run_{}".format(int(np.random.rand()*100))
|
||||||
|
|
||||||
|
model = PPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
|
||||||
|
|
||||||
|
total_required_for_episode=env_config['n_agents']*env_config['episode_length']
|
||||||
|
print("this is run {}".format(runname))
|
||||||
|
while True:
|
||||||
|
# Create Eval ENV
|
||||||
|
|
||||||
|
vec_env_eval=EconVecEnv(env_config=eval_env_config)
|
||||||
|
vec_mon_eval=VecMonitor(venv=vec_env_eval)
|
||||||
|
norm_env_eval=VecNormalize(vec_mon_eval,norm_reward=False,training=False)
|
||||||
|
eval_econ = vec_env_eval.env
|
||||||
|
|
||||||
|
#Train
|
||||||
|
model=model.learn(total_timesteps=total_required_for_episode*50,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
|
||||||
|
normenv.save("temp-normalizer.ai")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Run Eval
|
||||||
|
print("### EVAL ###")
|
||||||
|
norm_env_eval.load("temp-normalizer.ai",vec_mon_eval)
|
||||||
|
obs=vec_mon_eval.reset()
|
||||||
|
done=False
|
||||||
|
for i in tqdm(range(eval_env_config['episode_length'])):
|
||||||
|
action=model.predict(obs)
|
||||||
|
obs,rew,done_e,info=vec_mon_eval.step(action[0])
|
||||||
|
done=done_e[0]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#market=eval_econ.get_component("ContinuousDoubleAuction")
|
||||||
|
craft=eval_econ.get_component("SimpleCraft")
|
||||||
|
# trades=market.get_dense_log()
|
||||||
|
build=craft.get_dense_log()
|
||||||
|
met=econ.previous_episode_metrics
|
||||||
|
printReplay(eval_econ,0)
|
||||||
|
# printMarket(trades)
|
||||||
|
printBuilds(builds=build)
|
||||||
|
print("social/productivity: {}".format(met["social/productivity"]))
|
||||||
|
print("labor/weighted_cost: {}".format(met["labor/weighted_cost"]))
|
||||||
|
print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"]))
|
||||||
|
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
283
main.py
Normal file
283
main.py
Normal file
@@ -0,0 +1,283 @@
|
|||||||
|
from ai_economist import foundation
|
||||||
|
import numpy as np
|
||||||
|
from stable_baselines3.common.vec_env import vec_frame_stack
|
||||||
|
from stable_baselines3.common.evaluation import evaluate_policy
|
||||||
|
import envs
|
||||||
|
from tqdm import tqdm
|
||||||
|
import components
|
||||||
|
from stable_baselines3.common.env_checker import check_env
|
||||||
|
from stable_baselines3 import PPO
|
||||||
|
from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
|
||||||
|
from stable_baselines3.common.vec_env.vec_normalize import VecNormalize
|
||||||
|
from sb3_contrib import RecurrentPPO
|
||||||
|
from envs.econ_wrapper import EconVecEnv
|
||||||
|
from stable_baselines3.common.callbacks import BaseCallback
|
||||||
|
import yaml
|
||||||
|
import time
|
||||||
|
|
||||||
|
env_config = {
|
||||||
|
# ===== SCENARIO CLASS =====
|
||||||
|
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
|
||||||
|
# The environment object will be an instance of the Scenario class.
|
||||||
|
'scenario_name': 'simple_market',
|
||||||
|
|
||||||
|
# ===== COMPONENTS =====
|
||||||
|
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
|
||||||
|
# "component_name" refers to the Component class's name in the Component Registry (foundation.components)
|
||||||
|
# {component_kwargs} is a dictionary of kwargs passed to the Component class
|
||||||
|
# The order in which components reset, step, and generate obs follows their listed order below.
|
||||||
|
'components': [
|
||||||
|
# (1) Building houses
|
||||||
|
('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
|
||||||
|
# (2) Trading collectible resources
|
||||||
|
#('ContinuousDoubleAuction', {'max_num_orders': 10}),
|
||||||
|
# (3) Movement and resource collection
|
||||||
|
('SimpleGather', {}),
|
||||||
|
],
|
||||||
|
|
||||||
|
# ===== SCENARIO CLASS ARGUMENTS =====
|
||||||
|
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
|
||||||
|
|
||||||
|
'starting_agent_coin': 0,
|
||||||
|
'fixed_four_skill_and_loc': True,
|
||||||
|
|
||||||
|
# ===== STANDARD ARGUMENTS ======
|
||||||
|
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
|
||||||
|
'n_agents': 20, # Number of non-planner agents (must be > 1)
|
||||||
|
'world_size': [1, 1], # [Height, Width] of the env world
|
||||||
|
'episode_length': 256, # Number of timesteps per episode
|
||||||
|
'allow_observation_scaling': True,
|
||||||
|
'dense_log_frequency': 100,
|
||||||
|
'world_dense_log_frequency':1,
|
||||||
|
'energy_cost':0,
|
||||||
|
'energy_warmup_method': "auto",
|
||||||
|
'energy_warmup_constant': 0,
|
||||||
|
|
||||||
|
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
|
||||||
|
# Otherwise, the policy selects only 1 action.
|
||||||
|
'multi_action_mode_agents': False,
|
||||||
|
'multi_action_mode_planner': False,
|
||||||
|
|
||||||
|
# When flattening observations, concatenate scalar & vector observations before output.
|
||||||
|
# Otherwise, return observations with minimal processing.
|
||||||
|
'flatten_observations': False,
|
||||||
|
# When Flattening masks, concatenate each action subspace mask into a single array.
|
||||||
|
# Note: flatten_masks = True is required for masking action logits in the code below.
|
||||||
|
'flatten_masks': False,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
eval_env_config = {
|
||||||
|
# ===== SCENARIO CLASS =====
|
||||||
|
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
|
||||||
|
# The environment object will be an instance of the Scenario class.
|
||||||
|
'scenario_name': 'simple_market',
|
||||||
|
|
||||||
|
# ===== COMPONENTS =====
|
||||||
|
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
|
||||||
|
# "component_name" refers to the Component class's name in the Component Registry (foundation.components)
|
||||||
|
# {component_kwargs} is a dictionary of kwargs passed to the Component class
|
||||||
|
# The order in which components reset, step, and generate obs follows their listed order below.
|
||||||
|
'components': [
|
||||||
|
# (1) Building houses
|
||||||
|
('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
|
||||||
|
# (2) Trading collectible resources
|
||||||
|
#('ContinuousDoubleAuction', {'max_num_orders': 10}),
|
||||||
|
# (3) Movement and resource collection
|
||||||
|
('SimpleGather', {}),
|
||||||
|
],
|
||||||
|
|
||||||
|
# ===== SCENARIO CLASS ARGUMENTS =====
|
||||||
|
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
|
||||||
|
|
||||||
|
'starting_agent_coin': 0,
|
||||||
|
'fixed_four_skill_and_loc': True,
|
||||||
|
|
||||||
|
# ===== STANDARD ARGUMENTS ======
|
||||||
|
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
|
||||||
|
'n_agents': 20, # Number of non-planner agents (must be > 1)
|
||||||
|
'world_size': [1, 1], # [Height, Width] of the env world
|
||||||
|
'episode_length': 100, # Number of timesteps per episode
|
||||||
|
'allow_observation_scaling': True,
|
||||||
|
'dense_log_frequency': 10,
|
||||||
|
'world_dense_log_frequency':1,
|
||||||
|
'energy_cost':0,
|
||||||
|
'energy_warmup_method': "auto",
|
||||||
|
'energy_warmup_constant': 0,
|
||||||
|
|
||||||
|
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
|
||||||
|
# Otherwise, the policy selects only 1 action.
|
||||||
|
'multi_action_mode_agents': False,
|
||||||
|
'multi_action_mode_planner': False,
|
||||||
|
|
||||||
|
# When flattening observations, concatenate scalar & vector observations before output.
|
||||||
|
# Otherwise, return observations with minimal processing.
|
||||||
|
'flatten_observations': False,
|
||||||
|
# When Flattening masks, concatenate each action subspace mask into a single array.
|
||||||
|
# Note: flatten_masks = True is required for masking action logits in the code below.
|
||||||
|
'flatten_masks': False,
|
||||||
|
}
|
||||||
|
|
||||||
|
num_frames=2
|
||||||
|
|
||||||
|
class TensorboardCallback(BaseCallback):
|
||||||
|
"""
|
||||||
|
Custom callback for plotting additional values in tensorboard.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,econ, verbose=0):
|
||||||
|
super().__init__(verbose)
|
||||||
|
self.econ=econ
|
||||||
|
self.metrics=econ.scenario_metrics()
|
||||||
|
def _on_step(self) -> bool:
|
||||||
|
# Log scalar value (here a random variable)
|
||||||
|
prev_metrics=self.metrics
|
||||||
|
if self.econ.previous_episode_metrics is None:
|
||||||
|
self.metrics=self.econ.scenario_metrics()
|
||||||
|
else:
|
||||||
|
self.metrics=self.econ.previous_episode_metrics
|
||||||
|
curr_prod=self.metrics["social/productivity"]
|
||||||
|
trend_pord=curr_prod-prev_metrics["social/productivity"]
|
||||||
|
self.logger.record("social/total_productivity", curr_prod)
|
||||||
|
self.logger.record("social/delta_productivity", trend_pord)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def sample_random_action(agent, mask):
|
||||||
|
"""Sample random UNMASKED action(s) for agent."""
|
||||||
|
# Return a list of actions: 1 for each action subspace
|
||||||
|
if agent.multi_action_mode:
|
||||||
|
split_masks = np.split(mask, agent.action_spaces.cumsum()[:-1])
|
||||||
|
return [np.random.choice(np.arange(len(m_)), p=m_/m_.sum()) for m_ in split_masks]
|
||||||
|
|
||||||
|
# Return a single action
|
||||||
|
else:
|
||||||
|
return np.random.choice(np.arange(agent.action_spaces), p=mask/mask.sum())
|
||||||
|
|
||||||
|
def sample_random_actions(env, obs):
|
||||||
|
"""Samples random UNMASKED actions for each agent in obs."""
|
||||||
|
|
||||||
|
actions = {
|
||||||
|
a_idx: 0
|
||||||
|
for a_idx in range( len(obs))
|
||||||
|
}
|
||||||
|
|
||||||
|
return actions
|
||||||
|
|
||||||
|
def printMarket(market):
|
||||||
|
for i in range(len(market)):
|
||||||
|
step=market[i]
|
||||||
|
if len(step)>0:
|
||||||
|
print("=== Step {} ===".format(i))
|
||||||
|
for transaction in step:
|
||||||
|
t=transaction
|
||||||
|
transstring = "({}) {} -> {} | [{}/{}] {} Coins\n".format(t["commodity"],t["seller"],t["buyer"],t["ask"],t["bid"],t["price"])
|
||||||
|
print(transstring)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def printBuilds(builds):
|
||||||
|
for i in range(len(builds)):
|
||||||
|
step=builds[i]
|
||||||
|
if len(step)>0:
|
||||||
|
for build in step:
|
||||||
|
t=build
|
||||||
|
transstring = "({}) Builder: {}, Skill: {}, Income {} ".format(i,t["builder"],t["build_skill"],t["income"])
|
||||||
|
print(transstring)
|
||||||
|
return ""
|
||||||
|
def printReplay(econ,agentid):
|
||||||
|
worldmaps=["Stone","Wood"]
|
||||||
|
|
||||||
|
log=econ.previous_episode_dense_log
|
||||||
|
agent=econ.world.agents[agentid]
|
||||||
|
|
||||||
|
agentid=str(agentid)
|
||||||
|
maxsetp=len(log["states"])-1
|
||||||
|
|
||||||
|
for step in range(maxsetp):
|
||||||
|
print()
|
||||||
|
print("=== Step {} ===".format(step))
|
||||||
|
# state
|
||||||
|
print("--- World ---")
|
||||||
|
world=log['world'][step]
|
||||||
|
for res in worldmaps:
|
||||||
|
print("{}: {}".format(res,world[res][0][0]))
|
||||||
|
print("--- State ---")
|
||||||
|
state=log['states'][step][agentid]
|
||||||
|
|
||||||
|
print(yaml.dump(state))
|
||||||
|
print("--- Action ---")
|
||||||
|
action=log["actions"][step][agentid]
|
||||||
|
|
||||||
|
|
||||||
|
if action=={}:
|
||||||
|
print("Action: 0 -> NOOP")
|
||||||
|
else:
|
||||||
|
for k in action:
|
||||||
|
formats="Action: {}({})".format(k,action[k])
|
||||||
|
print(formats)
|
||||||
|
print("--- Reward ---")
|
||||||
|
reward=log["rewards"][step][agentid]
|
||||||
|
print("Reward: {}".format(reward))
|
||||||
|
|
||||||
|
#Setup Env Objects
|
||||||
|
|
||||||
|
vecenv=EconVecEnv(env_config=env_config)
|
||||||
|
econ=vecenv.env
|
||||||
|
monenv=VecMonitor(venv=vecenv,info_keywords=["social/productivity","trend/productivity"])
|
||||||
|
normenv=VecNormalize(monenv,norm_reward=False,clip_obs=1)
|
||||||
|
stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10)
|
||||||
|
obs=stackenv.reset()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
runname="run_{}".format(int(np.random.rand()*100))
|
||||||
|
|
||||||
|
model = PPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
|
||||||
|
|
||||||
|
total_required_for_episode=env_config['n_agents']*env_config['episode_length']
|
||||||
|
print("this is run {}".format(runname))
|
||||||
|
while True:
|
||||||
|
# Create Eval ENV
|
||||||
|
|
||||||
|
vec_env_eval=EconVecEnv(env_config=eval_env_config)
|
||||||
|
vec_mon_eval=VecMonitor(venv=vec_env_eval)
|
||||||
|
norm_env_eval=VecNormalize(vec_mon_eval,norm_reward=False,training=False)
|
||||||
|
eval_econ = vec_env_eval.env
|
||||||
|
|
||||||
|
#Train
|
||||||
|
model=model.learn(total_timesteps=total_required_for_episode*50,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
|
||||||
|
normenv.save("temp-normalizer.ai")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Run Eval
|
||||||
|
print("### EVAL ###")
|
||||||
|
norm_env_eval.load("temp-normalizer.ai",vec_mon_eval)
|
||||||
|
obs=vec_mon_eval.reset()
|
||||||
|
done=False
|
||||||
|
for i in tqdm(range(eval_env_config['episode_length'])):
|
||||||
|
action=model.predict(obs)
|
||||||
|
obs,rew,done_e,info=vec_mon_eval.step(action[0])
|
||||||
|
done=done_e[0]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#market=eval_econ.get_component("ContinuousDoubleAuction")
|
||||||
|
craft=eval_econ.get_component("SimpleCraft")
|
||||||
|
# trades=market.get_dense_log()
|
||||||
|
build=craft.get_dense_log()
|
||||||
|
met=econ.previous_episode_metrics
|
||||||
|
printReplay(eval_econ,0)
|
||||||
|
# printMarket(trades)
|
||||||
|
printBuilds(builds=build)
|
||||||
|
print("social/productivity: {}".format(met["social/productivity"]))
|
||||||
|
print("labor/weighted_cost: {}".format(met["labor/weighted_cost"]))
|
||||||
|
print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"]))
|
||||||
|
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
BIN
ray-3.0.0.dev0-cp310-cp310-win_amd64.whl
Normal file
BIN
ray-3.0.0.dev0-cp310-cp310-win_amd64.whl
Normal file
Binary file not shown.
3
reqirements.txt
Normal file
3
reqirements.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
ai-economist
|
||||||
|
gym
|
||||||
|
ray[rllib]
|
||||||
BIN
temp-normalizer.ai
Normal file
BIN
temp-normalizer.ai
Normal file
Binary file not shown.
Reference in New Issue
Block a user