it is working

2023-01-11 19:04:20 +01:00
commit 86874dcfd3
13 changed files with 1768 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
 logs/*
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -0,0 +1,16 @@
 {
    // Verwendet IntelliSense zum Ermitteln möglicher Attribute.
    // Zeigen Sie auf vorhandene Attribute, um die zugehörigen Beschreibungen anzuzeigen.
    // Weitere Informationen finden Sie unter https://go.microsoft.com/fwlink/?linkid=830387
    "version": "0.2.0",
    "configurations": [
        {
            "name": "Python: Aktuelle Datei",
            "type": "python",
            "request": "launch",
            "program": "${file}",
            "console": "integratedTerminal",
            "justMyCode": false
        }
    ]
 }
--- a/components/init.py
+++ b/components/init.py
@@ -0,0 +1,4 @@
 from . import(
    simple_gather,
    simple_build
 )
--- a/components/noops.py
+++ b/components/noops.py
@@ -0,0 +1,9 @@
 from ai_economist.foundation.base.registrar import Registry
 from ai_economist.foundation.entities.endogenous import Endogenous, endogenous_registry
@endogenous_registry.add
 class Noop(Endogenous):
    """consecutive noop actions performed by actor"""
    name = "Noop"
--- a/components/simple_build.py
+++ b/components/simple_build.py
@@ -0,0 +1,256 @@
 # Copyright (c) 2020, salesforce.com, inc.
 # All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # For full license text, see the LICENSE file in the repo root
 # or https://opensource.org/licenses/BSD-3-Clause
 import numpy as np
 from ai_economist.foundation.base.base_component import (
    BaseComponent,
    component_registry,
 )
@component_registry.add
 class SimpleCraft(BaseComponent):
    """
    Allows mobile agents to build house landmarks in the world using stone and wood,
    earning income.
    Can be configured to include heterogeneous building skill where agents earn
    different levels of income when building.
    Args:
        payment (int): Default amount of coin agents earn from building.
            Must be >= 0. Default is 10.
        payment_max_skill_multiplier (int): Maximum skill multiplier that an agent
            can sample. Must be >= 1. Default is 1.
        skill_dist (str): Distribution type for sampling skills. Default ("none")
            gives all agents identical skill equal to a multiplier of 1. "pareto" and
            "lognormal" sample skills from the associated distributions.
        build_labor (float): Labor cost associated with building a house.
            Must be >= 0. Default is 10.
    """
    name = "SimpleCraft"
    component_type = "Build"
    required_entities = ["Wood", "Stone", "Coin", "House", "Labor"]
    agent_subclasses = ["BasicMobileAgent"]
    def __init__(
        self,
        *base_component_args,
        payment=10,
        payment_max_skill_multiplier=1,
        skill_dist="none",
        build_labor=10.0,
        **base_component_kwargs
    ):
        super().__init__(*base_component_args, **base_component_kwargs)
        self.payment = int(payment)
        assert self.payment >= 0
        self.payment_max_skill_multiplier = int(payment_max_skill_multiplier)
        assert self.payment_max_skill_multiplier >= 1
        self.resource_cost = {"Wood": 1, "Stone": 1}
        self.build_labor = float(build_labor)
        assert self.build_labor >= 0
        self.skill_dist = skill_dist.lower()
        assert self.skill_dist in ["none", "pareto", "lognormal"]
        self.sampled_skills = {}
        self.builds = []
    def agent_can_build(self, agent):
        """Return True if agent can actually build in its current location."""
        # See if the agent has the resources necessary to complete the action
        for resource, cost in self.resource_cost.items():
            if agent.state["inventory"][resource] < cost:
                return False
        return True
    # Required methods for implementing components
    # --------------------------------------------
    def get_n_actions(self, agent_cls_name):
        """
        See base_component.py for detailed description.
        Add a single action (build) for mobile agents.
        """
        # This component adds 1 action that mobile agents can take: build a house
        if agent_cls_name == "BasicMobileAgent":
            return 1
        return None
    def get_additional_state_fields(self, agent_cls_name):
        """
        See base_component.py for detailed description.
        For mobile agents, add state fields for building skill.
        """
        if agent_cls_name not in self.agent_subclasses:
            return {}
        if agent_cls_name == "BasicMobileAgent":
            return {"build_payment": float(self.payment), "build_skill": 1}
        raise NotImplementedError
    def component_step(self):
        """
        See base_component.py for detailed description.
        Convert stone+wood to house+coin for agents that choose to build and can.
        """
        world = self.world
        build = []
        # Apply any building actions taken by the mobile agents
        for agent in world.get_random_order_agents():
            action = agent.get_component_action(self.name)
            # This component doesn't apply to this agent!
            if action is None:
                continue
            # NO-OP!
            if action == 0:
                pass
            # Build! (If you can.)
            elif action == 1:
                if self.agent_can_build(agent):
                    # Remove the resources
                    for resource, cost in self.resource_cost.items():
                        agent.state["inventory"][resource] -= cost
                    # Receive payment for the house
                    agent.state["inventory"]["Coin"] += agent.state["build_payment"]
                    # Incur the labor cost for building
                    agent.state["endogenous"]["Labor"] += self.build_labor
                    build.append(
                        {
                            "builder": agent.idx,
                            "build_skill": self.sampled_skills[agent.idx],
                            "income": float(agent.state["build_payment"]),
                        }
                    )
                else:
                    agent.bad_action=True
            else:
                raise ValueError
        self.builds.append(build)
    def generate_observations(self):
        """
        See base_component.py for detailed description.
        Here, agents observe their build skill. The planner does not observe anything
        from this component.
        """
        obs_dict = dict()
        for agent in self.world.agents:
            obs_dict[agent.idx] = {
                "build_payment": agent.state["build_payment"] / self.payment,
                "build_skill": self.sampled_skills[agent.idx],
            }
        return obs_dict
    def generate_masks(self, completions=0):
        """
        See base_component.py for detailed description.
        Prevent building only if a landmark already occupies the agent's location.
        """
        masks = {}
        # Mobile agents' build action is masked if they cannot build with their
        # current location and/or endowment
        for agent in self.world.agents:
            masks[agent.idx] = np.array([self.agent_can_build(agent)])
        return masks
    # For non-required customization
    # ------------------------------
    def get_metrics(self):
        """
        Metrics that capture what happened through this component.
        Returns:
            metrics (dict): A dictionary of {"metric_name": metric_value},
                where metric_value is a scalar.
        """
        world = self.world
        build_stats = {a.idx: {"n_builds": 0} for a in world.agents}
        for builds in self.builds:
            for build in builds:
                idx = build["builder"]
                build_stats[idx]["n_builds"] += 1
        out_dict = {}
        for a in world.agents:
            for k, v in build_stats[a.idx].items():
                out_dict["{}/{}".format(a.idx, k)] = v
        num_houses = np.sum(world.maps.get("House") > 0)
        out_dict["total_builds"] = num_houses
        return out_dict
    def additional_reset_steps(self):
        """
        See base_component.py for detailed description.
        Re-sample agents' building skills.
        """
        world = self.world
        self.sampled_skills = {agent.idx: 1 for agent in world.agents}
        PMSM = self.payment_max_skill_multiplier
        for agent in world.agents:
            if self.skill_dist == "none":
                sampled_skill = 1
                pay_rate = 1
            elif self.skill_dist == "pareto":
                sampled_skill = np.random.pareto(4)
                pay_rate = np.minimum(PMSM, (PMSM - 1) * sampled_skill + 1)
            elif self.skill_dist == "lognormal":
                sampled_skill = np.random.lognormal(-1, 0.5)
                pay_rate = np.minimum(PMSM, (PMSM - 1) * sampled_skill + 1)
            else:
                raise NotImplementedError
            agent.state["build_payment"] = float(pay_rate * self.payment)
            agent.state["build_skill"] = float(sampled_skill)
            self.sampled_skills[agent.idx] = sampled_skill
        self.builds = []
    def get_dense_log(self):
        """
        Log builds.
        Returns:
            builds (list): A list of build events. Each entry corresponds to a single
                timestep and contains a description of any builds that occurred on
                that timestep.
        """
        return self.builds
--- a/components/simple_gather.py
+++ b/components/simple_gather.py
@@ -0,0 +1,214 @@
 # Copyright (c) 2020, salesforce.com, inc.
 # All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # For full license text, see the LICENSE file in the repo root
 # or https://opensource.org/licenses/BSD-3-Clause
 import numpy as np
 from numpy.random import rand
 from ai_economist.foundation.base.base_component import (
    BaseComponent,
    component_registry,
 )
 from ai_economist.foundation.entities import resource_registry, resources
@component_registry.add
 class SimpleGather(BaseComponent):
    """
    Allows mobile agents to move around the world and collect resources and prevents
    agents from moving to invalid locations.
    Can be configured to include collection skill, where agents have heterogeneous
    probabilities of collecting bonus resources without additional labor cost.
    Args:
        move_labor (float): Labor cost associated with movement. Must be >= 0.
            Default is 1.0.
        collect_labor (float): Labor cost associated with collecting resources. This
            cost is added (in addition to any movement cost) when the agent lands on
            a tile that is populated with resources (triggering collection).
            Must be >= 0. Default is 1.0.
        skill_dist (str): Distribution type for sampling skills. Default ("none")
            gives all agents identical skill equal to a bonus prob of 0. "pareto" and
            "lognormal" sample skills from the associated distributions.
    """
    name = "SimpleGather"
    required_entities = ["Coin", "House", "Labor"]
    agent_subclasses = ["BasicMobileAgent"]
    def __init__(
        self,
        *base_component_args,
        collect_labor=1.0,
        skill_dist="none",
        **base_component_kwargs
    ):
        super().__init__(*base_component_args, **base_component_kwargs)
        self.collect_labor = float(collect_labor)
        assert self.collect_labor >= 0
        self.skill_dist = skill_dist.lower()
        assert self.skill_dist in ["none", "pareto", "lognormal"]
        self.gathers = []
        self.commodities = [
            r for r in self.world.resources if resource_registry.get(r).collectible
        ]
    # Required methods for implementing components
    # --------------------------------------------
    def get_n_actions(self, agent_cls_name):
        """
        See base_component.py for detailed description.
        Adds 1 action per commodity that can be picked up.
        """
        if agent_cls_name == "BasicMobileAgent":
            return len(self.commodities)
        return None
    def get_additional_state_fields(self, agent_cls_name):
        """
        See base_component.py for detailed description.
        For mobile agents, add state field for collection skill.
        """
        if agent_cls_name not in self.agent_subclasses:
            return {}
        if agent_cls_name == "BasicMobileAgent":
            return {"bonus_gather_prob": 0.0}
        raise NotImplementedError
    def component_step(self):
        """
        See base_component.py for detailed description.
        Pickup resources if available from env
        """
        world = self.world
        gathers = []
        for agent in world.get_random_order_agents():
            if self.name not in agent.action:
                continue
            resource_action = agent.get_component_action(
                    self.name
            )
            if resource_action == 0: # NO-OP
                continue
            resource_action -=1 # Starting at 1
            r=self.commodities[resource_action]
            if self.get_num_resources(r)>0:
                gather= self.pickup(r,agent)
                gathers.append(gather)
            else:
                agent.bad_action=True
                continue
        self.gathers.append(gathers)
    def generate_observations(self):
        """
        See base_component.py for detailed description.
        Here, agents observe their collection skill. The planner does not observe
        anything from this component.
        """
        num_agent=len(self.world.agents)
        obs_avai={}
        for r in self.commodities:
            key="pickup_perc_{}".format(r)
            pickProb=float(self.get_num_resources(r)/num_agent)
            if pickProb>1:
                pickProb=1
            obs_avai[key]=pickProb
        obs={}
        for agent in self.world.agents:
            obs[agent.idx]={}
            obs[agent.idx]["bonus_gather_prob"]= agent.state["bonus_gather_prob"]
            obs[agent.idx].update(obs_avai)
        return obs
    def generate_masks(self, completions=0):
        """
        See base_component.py for detailed description.
        Prevent moving to adjacent tiles that are already occupied (or outside the
        boundaries of the world)
        """
        world = self.world
        mask=[]
        for r in self.commodities:
            avail=0
            if self.get_num_resources(r)>0:
                avail=1
            mask.append(avail)
        masks = {}
        for agent in world.agents:
            masks[agent.idx]=mask
        return masks
    # For non-required customization
    # ------------------------------
    def additional_reset_steps(self):
        """
        See base_component.py for detailed description.
        Re-sample agents' collection skills.
        """
        for agent in self.world.agents:
            if self.skill_dist == "none":
                bonus_rate = 0.0
            elif self.skill_dist == "pareto":
                bonus_rate = np.minimum(2, np.random.pareto(3)) / 2
            elif self.skill_dist == "lognormal":
                bonus_rate = np.minimum(2, np.random.lognormal(-2.022, 0.938)) / 2
            else:
                raise NotImplementedError
            agent.state["bonus_gather_prob"] = float(bonus_rate)
        self.gathers = []
    def get_dense_log(self):
        """
        Log resource collections.
        Returns:
            gathers (list): A list of gather events. Each entry corresponds to a single
                timestep and contains a description of any resource gathers that
                occurred on that timestep.
        """
        return self.gathers
 # For Components
    def get_num_resources(self, res: resources.Resource):
        return self.world.maps.get_point(res,0,0)
    def pickup(self, res: resources.Resource, agent ):
        n_gathered = 1 + (rand() < agent.state["bonus_gather_prob"])
        agent.state["inventory"][res] += n_gathered
        agent.state["endogenous"]["Labor"] += self.collect_labor
        self.world.consume_resource(res,0,0)
        # Log the gather
        return (
            dict(
                agent=agent.idx,
                resource=res,
                n=n_gathered,
            )
        )
--- a/envs/econ_wrapper.py
+++ b/envs/econ_wrapper.py
@@ -0,0 +1,227 @@
 from collections import OrderedDict
 from copy import deepcopy
 from typing import Any, Callable, List, Optional, Sequence, Type, Union
 from ai_economist.foundation.base import base_env
 import gym
 import gym.spaces
 import numpy as np
 from stable_baselines3.common.vec_env.base_vec_env import VecEnv, VecEnvIndices, VecEnvObs, VecEnvStepReturn
 from stable_baselines3.common.vec_env.util import copy_obs_dict, dict_to_obs, obs_space_info
 from ai_economist import foundation
 class EconVecEnv(VecEnv, gym.Env):
    """
    Creates a simple vectorized wrapper for multiple environments, calling each environment in sequence on the current
    Python process. This is useful for computationally simple environment such as ``cartpole-v1``,
    as the overhead of multiprocess or multithread outweighs the environment computation time.
    This can also be used for RL methods that
    require a vectorized environment, but that you want a single environments to train with.
    :param env_fns: a list of functions
        that return environments to vectorize
    :raises ValueError: If the same environment instance is passed as the output of two or more different env_fn.
    """
    def __init__(self, env_config):
        ##init for init
        self.config=env_config
        env=foundation.make_env_instance(**env_config)
        self.env = env
        # build spaces
        obs=env.reset()
        actions=env.world.agents[0].action_spaces
        obs1=obs["0"]
        del obs1["action_mask"]
        del obs1["time"]
        self.observation_space=gym.spaces.Box(low=0,high=np.inf,shape=(len(obs1),),dtype=np.float32)
        self.action_space=gym.spaces.Discrete(actions)
        # count agents
        self.num_envs=env.world.n_agents
        VecEnv.__init__(self, self.num_envs, self.observation_space, action_space=self.action_space)
        self.keys, shapes, dtypes = obs_space_info(self.observation_space)
        self.buf_obs = OrderedDict([(k, np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k])) for k in self.keys])
        self.buf_dones = np.zeros((self.num_envs,), dtype=bool)
        self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32)
        self.buf_infos = [{} for _ in range(self.num_envs)]
        self.actions = None
    def step_async(self, actions: np.ndarray) -> None:
        self.actions = actions
    def step_wait(self) -> VecEnvStepReturn:
        #convert to econ actions
        r_action={}
        for ai_idx in range(len(self.actions)):
            r_action[str(ai_idx)]=self.actions[ai_idx]
        obs,rew,done,info =  self.env.step(r_action)
        obs_g=self._convert_econ_obs_to_gym(obs)
        rew_g=self._convert_econ_to_gym(rew)
        info_g=self._convert_econ_to_gym(info)
        #collect metrics
        prev_metrics=self.metrics
        self.metrics=self.env.scenario_metrics()
        curr_prod=self.metrics["social/productivity"]
        trend_pord=curr_prod-prev_metrics["social/productivity"]
        for k in info_g:
            k["social/productivity"]=curr_prod
            k["trend/productivity"]=trend_pord
        done_g=[False]*self.num_envs
        done=(done["__all__"])
        if done:
            for i in range(self.num_envs):
                done_g[i]=done
                info_g[i]["terminal_observation"]=obs_g[i]
            obs_g=self.reset()
        return (np.copy(obs_g), np.copy(rew_g), np.copy(done_g), deepcopy(info_g))
    # fix with malformed action tensor from sb3 predict method
    def step_predict(self,actions):
        return self.step(actions[0])
    def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
        if seed is None:
            seed = np.random.randint(0, 2**32 - 1)
        seeds = []
        for idx, env in enumerate(self.envs):
            seeds.append(env.seed(seed + idx))
        return seeds
    def reset(self) -> VecEnvObs:
       # env=foundation.make_env_instance(**self.config)
       # self.env = env
        obs =  self.env.reset()
        self.metrics=self.env.scenario_metrics()
        obs_g=self._convert_econ_obs_to_gym(obs)
        return obs_g
    def close(self) -> None:
        self.env.close()
    def get_images(self) -> Sequence[np.ndarray]:
        return [env.render(mode="rgb_array") for env in self.envs]
    def render(self, mode: str = "human") -> Optional[np.ndarray]:
        """
        Gym environment rendering. If there are multiple environments then
        they are tiled together in one image via ``BaseVecEnv.render()``.
        Otherwise (if ``self.num_envs == 1``), we pass the render call directly to the
        underlying environment.
        Therefore, some arguments such as ``mode`` will have values that are valid
        only when ``num_envs == 1``.
        :param mode: The rendering type.
        """
        if self.num_envs == 1:
            return self.envs[0].render(mode=mode)
        else:
            return super().render(mode=mode)
    def _save_obs(self, env_idx: int, obs: VecEnvObs) -> None:
        for key in self.keys:
            if key is None:
                self.buf_obs[key][env_idx] = obs
            else:
                self.buf_obs[key][env_idx] = obs[key]
    def _obs_from_buf(self) -> VecEnvObs:
        return dict_to_obs(self.observation_space, copy_obs_dict(self.buf_obs))
    def get_attr(self, attr_name: str, indices: VecEnvIndices = None) -> List[Any]:
        """Return attribute from vectorized environment (see base class)."""
        target_envs = self._get_target_envs(indices)
        return [getattr(env_i, attr_name) for env_i in target_envs]
    def set_attr(self, attr_name: str, value: Any, indices: VecEnvIndices = None) -> None:
        """Set attribute inside vectorized environments (see base class)."""
        target_envs = self._get_target_envs(indices)
        for env_i in target_envs:
            setattr(env_i, attr_name, value)
    def env_method(self, method_name: str, *method_args, indices: VecEnvIndices = None, **method_kwargs) -> List[Any]:
        """Call instance methods of vectorized environments."""
        target_envs = self._get_target_envs(indices)
        return [getattr(env_i, method_name)(*method_args, **method_kwargs) for env_i in target_envs]
    def env_is_wrapped(self, wrapper_class: Type[gym.Wrapper], indices: VecEnvIndices = None) -> List[bool]:
        """Check if worker environments are wrapped with a given wrapper"""
        target_envs = self._get_target_envs(indices)
        # Import here to avoid a circular import
        from stable_baselines3.common import env_util
        return [env_util.is_wrapped(env_i, wrapper_class) for env_i in target_envs]
    def _get_target_envs(self, indices: VecEnvIndices) -> List[gym.Env]:
        indices = self._get_indices(indices)
        return [self.envs[i] for i in indices]
    # Convert econ to gym
    def _convert_econ_to_gym(self, econ):
        gy=[]
        del econ["p"]
        gy=[v for k,v in econ.items()]
        return gy
    def _convert_gym_to_acon(self, gy):
        econ={}
        for k,v in gy:
            econ[k]=v
        return econ
    def _convert_econ_obs_to_gym(self, econ):
        gy=[None] * self.num_envs
        del econ["p"]
        for k,v in econ.items():
            del v["time"]
            del v["action_mask"]
            out=self.extract_dict(v)
            agent_obs=np.array(out)
            gy[int(k)]=agent_obs
        return np.stack(gy)
    def extract_dict(self,obj):
        output=[]
        use_key=isinstance(obj,dict)
        for v in obj:
            if use_key:
                v=obj[v]
            if isinstance(v,dict):
                temp=self.extract_dict(v)
                output.append(temp)
            else:
                output.append(v)
        return output
--- a/envs/simple_market.py
+++ b/envs/simple_market.py
@@ -0,0 +1,472 @@
 # Copyright (c) 2020, salesforce.com, inc.
 # All rights reserved.
 # SPDX-License-Identifier: BSD-3-Clause
 # For full license text, see the LICENSE file in the repo root
 # or https://opensource.org/licenses/BSD-3-Clause
 from copy import deepcopy
 from pathlib import Path
 import numpy as np
 from scipy import signal
 from ai_economist.foundation.base.base_env import BaseEnvironment, scenario_registry
 from ai_economist.foundation.scenarios.utils import rewards, social_metrics
 import yaml
@scenario_registry.add
 class SimpleMarket(BaseEnvironment):
    """
    World containing stone and wood with stochastic regeneration. Refers to a fixed
    layout file (see ./map_txt/ for examples) to determine the spatial arrangement of
    stone, wood, and water tiles.
    Args:
        planner_gets_spatial_obs (bool): Whether the planner agent receives spatial
            observations from the world.
        full_observability (bool): Whether the mobile agents' spatial observation
            includes the full world view or is instead an egocentric view.
        mobile_agent_observation_range (int): If not using full_observability,
            the spatial range (on each side of the agent) that is visible in the
            spatial observations.
        env_layout_file (str): Name of the layout file in ./map_txt/ to use.
            Note: The world dimensions of that layout must match the world dimensions
            argument used to construct the environment.
        resource_regen_prob (float): Probability that an empty source tile will
            regenerate a new resource unit.
        fixed_four_skill_and_loc (bool): Whether to use a fixed set of build skills and
            starting locations, with agents grouped into starting locations based on
            which skill quartile they are in. False, by default.
            True, for experiments in https://arxiv.org/abs/2004.13332.
            Note: Requires that the environment uses the "Build" component with
            skill_dist="pareto".
        starting_agent_coin (int, float): Amount of coin agents have at t=0. Defaults
            to zero coin.
        isoelastic_eta (float): Parameter controlling the shape of agent utility
            wrt coin endowment.
        energy_cost (float): Coefficient for converting labor to negative utility.
        energy_warmup_constant (float): Decay constant that controls the rate at which
            the effective energy cost is annealed from 0 to energy_cost. Set to 0
            (default) to disable annealing, meaning that the effective energy cost is
            always energy_cost. The units of the decay constant depend on the choice of
            energy_warmup_method.
        energy_warmup_method (str): How to schedule energy annealing (warmup). If
            "decay" (default), use the number of completed episodes. If "auto",
            use the number of timesteps where the average agent reward was positive.
        planner_reward_type (str): The type of reward used for the planner. Options
            are "coin_eq_times_productivity" (default),
            "inv_income_weighted_coin_endowment", and "inv_income_weighted_utility".
        mixing_weight_gini_vs_coin (float): Degree to which equality is ignored w/
            "coin_eq_times_productivity". Default is 0, which weights equality and
            productivity equally. If set to 1, only productivity is rewarded.
    """
    name = "simple_market"
    agent_subclasses = ["BasicMobileAgent"]
    required_entities = ["Wood", "Stone", "Water"]
    def __init__(
        self,
        *base_env_args,
        resource_regen_prob=0.01,
        fixed_four_skill_and_loc=False,
        starting_agent_coin=0,
        isoelastic_eta=0.23,
        energy_cost=0.21,
        energy_warmup_constant=0,
        energy_warmup_method="decay",
        planner_reward_type="coin_eq_times_productivity",
        mixing_weight_gini_vs_coin=0.0,
        **base_env_kwargs,
    ):
        super().__init__(*base_env_args, **base_env_kwargs)
        self.layout_specs = dict(
            Wood={
                "regen_weight": float(resource_regen_prob),
                "regen_halfwidth": 0,
                "max_health": 1,
            },
            Stone={
                "regen_weight": float(resource_regen_prob),
                "regen_halfwidth": 0,
                "max_health": 1,
            },
        )
        assert 0 <= self.layout_specs["Wood"]["regen_weight"] <= 1
        assert 0 <= self.layout_specs["Stone"]["regen_weight"] <= 1
        # How much coin do agents begin with at upon reset
        self.starting_agent_coin = float(starting_agent_coin)
        assert self.starting_agent_coin >= 0.0
        # Controls the diminishing marginal utility of coin.
        # isoelastic_eta=0 means no diminishing utility.
        self.isoelastic_eta = float(isoelastic_eta)
        assert 0.0 <= self.isoelastic_eta <= 1.0
        # The amount that labor is weighted in utility computation
        # (once annealing is finished)
        self.energy_cost = float(energy_cost)
        assert self.energy_cost >= 0
        # Which method to use for calculating the progress of energy annealing
        # If method = 'decay': #completed episodes
        # If method = 'auto' : #timesteps where avg. agent reward > 0
        self.energy_warmup_method = energy_warmup_method.lower()
        assert self.energy_warmup_method in ["decay", "auto"]
        # Decay constant for annealing to full energy cost
        # (if energy_warmup_constant == 0, there is no annealing)
        self.energy_warmup_constant = float(energy_warmup_constant)
        assert self.energy_warmup_constant >= 0
        self._auto_warmup_integrator = 0
        # Which social welfare function to use
        self.planner_reward_type = str(planner_reward_type).lower()
        # How much to weight equality if using SWF=eq*prod:
        # 0 -> SWF=eq * prod
        # 1 -> SWF=prod
        self.mixing_weight_gini_vs_coin = float(mixing_weight_gini_vs_coin)
        assert 0 <= self.mixing_weight_gini_vs_coin <= 1.0
        # Use this to calculate marginal changes and deliver that as reward
        self.init_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
        self.prev_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
        self.curr_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
        """
        Fixed Four Skill and Loc
        ------------------------
        """
        self.agent_starting_pos = {agent.idx: [] for agent in self.world.agents}
        self.last_log_loged={}
    @property
    def energy_weight(self):
        """
        Energy annealing progress. Multiply with self.energy_cost to get the
        effective energy coefficient.
        """
        if self.energy_warmup_constant <= 0.0:
            return 1.0
        if self.energy_warmup_method == "decay":
            return float(1.0 - np.exp(-self._completions / self.energy_warmup_constant))
        if self.energy_warmup_method == "auto":
            return float(
                1.0
                - np.exp(-self._auto_warmup_integrator / self.energy_warmup_constant)
            )
        raise NotImplementedError
    def is_bad_action(self,agent):
        bad=agent.bad_action
        agent.bad_action=False
        return bad
    def get_current_optimization_metrics(self):
        """
        Compute optimization metrics based on the current state. Used to compute reward.
        Returns:
            curr_optimization_metric (dict): A dictionary of {agent.idx: metric}
                with an entry for each agent (including the planner) in the env.
        """
        curr_optimization_metric = {}
        # (for agents)
        for agent in self.world.agents:
            rew= rewards.isoelastic_coin_minus_labor(
                coin_endowment=agent.total_endowment("Coin"),
                total_labor=agent.state["endogenous"]["Labor"],
                isoelastic_eta=self.isoelastic_eta,
                labor_coefficient=self.energy_weight * self.energy_cost,
            )
            #rew-=agent.state["endogenous"]["noops"]
            curr_optimization_metric[agent.idx] = rew
        # (for the planner)
        if self.planner_reward_type == "coin_eq_times_productivity":
            curr_optimization_metric[
                self.world.planner.idx
            ] = rewards.coin_eq_times_productivity(
                coin_endowments=np.array(
                    [agent.total_endowment("Coin") for agent in self.world.agents]
                ),
                equality_weight=1 - self.mixing_weight_gini_vs_coin,
            )
        elif self.planner_reward_type == "inv_income_weighted_coin_endowments":
            curr_optimization_metric[
                self.world.planner.idx
            ] = rewards.inv_income_weighted_coin_endowments(
                coin_endowments=np.array(
                    [agent.total_endowment("Coin") for agent in self.world.agents]
                )
            )
        elif self.planner_reward_type == "inv_income_weighted_utility":
            curr_optimization_metric[
                self.world.planner.idx
            ] = rewards.inv_income_weighted_utility(
                coin_endowments=np.array(
                    [agent.total_endowment("Coin") for agent in self.world.agents]
                ),
                utilities=np.array(
                    [curr_optimization_metric[agent.idx] for agent in self.world.agents]
                ),
            )
        else:
            print("No valid planner reward selected!")
            raise NotImplementedError
        return curr_optimization_metric
    # The following methods must be implemented for each scenario
    # -----------------------------------------------------------
    def reset_starting_layout(self):
        """
        Part 1/2 of scenario reset. This method handles resetting the state of the
        environment managed by the scenario (i.e. resource & landmark layout).
        Here, reset to the layout in the fixed layout file
        """
        self.world.maps.clear()
        resources = ["Wood", "Stone"]
        for resource in resources:
            self.world.maps.set_point_add(resource,0,0,1)
    def reset_agent_states(self):
        """
        Part 2/2 of scenario reset. This method handles resetting the state of the
        agents themselves (i.e. inventory, locations, etc.).
        Here, empty inventories and place mobile agents in random, accessible
        locations to start. Note: If using fixed_four_skill_and_loc, the starting
        locations will be overridden in self.additional_reset_steps.
        """
        self.world.clear_agent_locs()
        for agent in self.world.agents:
            agent.state["inventory"] = {k: 0 for k in agent.inventory.keys()}
            agent.state["escrow"] = {k: 0 for k in agent.inventory.keys()}
            agent.state["endogenous"] = {k: 0 for k in agent.endogenous.keys()}
            # Add starting coin
            agent.state["inventory"]["Coin"] = float(self.starting_agent_coin)
            agent.bad_action=False
        self.world.planner.state["inventory"] = {
            k: 0 for k in self.world.planner.inventory.keys()
        }
        self.world.planner.state["escrow"] = {
            k: 0 for k in self.world.planner.escrow.keys()
        }
    def scenario_step(self):
        """
        Update the state of the world according to whatever rules this scenario
        implements.
        This gets called in the 'step' method (of base_env) after going through each
        component step and before generating observations, rewards, etc.
        In this class of scenarios, the scenario step handles stochastic resource
        regeneration.
        """
        resources = ["Wood", "Stone"]
        for resource in resources:
            self.world.maps.set_point_add(resource,0,0,20)
    def generate_observations(self):
        """
        Generate observations associated with this scenario.
        A scenario does not need to produce observations and can provide observations
        for only some agent types; however, for a given agent type, it should either
        always or never yield an observation. If it does yield an observation,
        that observation should always have the same structure/sizes!
        Returns:
            obs (dict): A dictionary of {agent.idx: agent_obs_dict}. In words,
                return a dictionary with an entry for each agent (which can including
                the planner) for which this scenario provides an observation. For each
                entry, the key specifies the index of the agent and the value contains
                its associated observation dictionary.
        Here, non-planner agents receive spatial observations (depending on the env
        config) as well as the contents of their inventory and endogenous quantities.
        The planner also receives spatial observations (again, depending on the env
        config) as well as the inventory of each of the mobile agents.
        """
        obs = {}
        agent_invs = {
            str(agent.idx): {
                "inventory-" + k: v * self.inv_scale for k, v in agent.inventory.items()
            }
            for agent in self.world.agents
        }
        obs[self.world.planner.idx] = {
            "inventory-" + k: v * self.inv_scale
            for k, v in self.world.planner.inventory.items()
        }
        for agent in self.world.agents:
            sidx = str(agent.idx)
            obs[sidx]=agent_invs[sidx]
        return obs
    def compute_reward(self):
        """
        Apply the reward function(s) associated with this scenario to get the rewards
        from this step.
        Returns:
            rew (dict): A dictionary of {agent.idx: agent_obs_dict}. In words,
                return a dictionary with an entry for each agent in the environment
                (including the planner). For each entry, the key specifies the index of
                the agent and the value contains the scalar reward earned this timestep.
        Rewards are computed as the marginal utility (agents) or marginal social
        welfare (planner) experienced on this timestep. Ignoring discounting,
        this means that agents' (planner's) objective is to maximize the utility
        (social welfare) associated with the terminal state of the episode.
        """
        # "curr_optimization_metric" hasn't been updated yet, so it gives us the
        # utility from the last step.
        utility_at_end_of_last_time_step = deepcopy(self.curr_optimization_metric)
        # compute current objectives and store the values
        self.curr_optimization_metric = self.get_current_optimization_metrics()
        # reward = curr - prev objectives
        rew={}
        for k, v in self.curr_optimization_metric.items():
                rew[k] = float(v  - utility_at_end_of_last_time_step[k])
                if k!="p":
                    if self.is_bad_action(self.world.agents[k]):
                        rew[k]-=1
        # store the previous objective values
        self.prev_optimization_metric.update(utility_at_end_of_last_time_step)
        # Automatic Energy Cost Annealing
        # -------------------------------
        avg_agent_rew = np.mean([rew[a.idx] for a in self.world.agents])
        # Count the number of timesteps where the avg agent reward was > 0
        if avg_agent_rew > 0:
            self._auto_warmup_integrator += 1
        return rew
    # Optional methods for customization
    # ----------------------------------
    def additional_reset_steps(self):
        """
        Extra scenario-specific steps that should be performed at the end of the reset
        cycle.
        For each reset cycle...
            First, reset_starting_layout() and reset_agent_states() will be called.
            Second, <component>.reset() will be called for each registered component.
            Lastly, this method will be called to allow for any final customization of
            the reset cycle.
        For this scenario, this method resets optimization metric trackers. If using
        fixed_four_skill_and_loc, this is where each agent gets assigned to one of
        the four fixed skill/loc combinations. The agent-->skill/loc assignment is
        permuted so that all four skill/loc combinations are used.
        """
        # compute current objectives
        curr_optimization_metric = self.get_current_optimization_metrics()
        self.curr_optimization_metric = deepcopy(curr_optimization_metric)
        self.init_optimization_metric = deepcopy(curr_optimization_metric)
        self.prev_optimization_metric = deepcopy(curr_optimization_metric)
    def scenario_metrics(self):
        """
        Allows the scenario to generate metrics (collected along with component metrics
        in the 'metrics' property).
        To have the scenario add metrics, this function needs to return a dictionary of
        {metric_key: value} where 'value' is a scalar (no nesting or lists!)
        Here, summarize social metrics, endowments, utilities, and labor cost annealing.
        """
        metrics = dict()
        coin_endowments = np.array(
            [agent.total_endowment("Coin") for agent in self.world.agents]
        )
        metrics["social/productivity"] = social_metrics.get_productivity(
            coin_endowments
        )
        metrics["social/equality"] = social_metrics.get_equality(coin_endowments)
        utilities = np.array(
            [self.curr_optimization_metric[agent.idx] for agent in self.world.agents]
        )
        metrics[
            "social_welfare/coin_eq_times_productivity"
        ] = rewards.coin_eq_times_productivity(
            coin_endowments=coin_endowments, equality_weight=1.0
        )
        metrics[
            "social_welfare/inv_income_weighted_coin_endow"
        ] = rewards.inv_income_weighted_coin_endowments(coin_endowments=coin_endowments)
        metrics[
            "social_welfare/inv_income_weighted_utility"
        ] = rewards.inv_income_weighted_utility(
            coin_endowments=coin_endowments, utilities=utilities
        )
        for agent in self.all_agents:
            for resource, quantity in agent.inventory.items():
                metrics[
                    "endow/{}/{}".format(agent.idx, resource)
                ] = agent.total_endowment(resource)
            if agent.endogenous is not None:
                for resource, quantity in agent.endogenous.items():
                    metrics["endogenous/{}/{}".format(agent.idx, resource)] = quantity
            metrics["util/{}".format(agent.idx)] = self.curr_optimization_metric[
                agent.idx
            ]
        # Labor weight
        metrics["labor/weighted_cost"] = self.energy_cost * self.energy_weight
        metrics["labor/warmup_integrator"] = int(self._auto_warmup_integrator)
        return metrics
--- a/good.pys
+++ b/good.pys
@@ -0,0 +1,283 @@
 from ai_economist import foundation
 import numpy as np
 from stable_baselines3.common.vec_env import vec_frame_stack
 from stable_baselines3.common.evaluation import evaluate_policy
 import envs
 from tqdm import tqdm
 import components
 from stable_baselines3.common.env_checker import check_env
 from stable_baselines3 import PPO
 from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
 from stable_baselines3.common.vec_env.vec_normalize import VecNormalize
 from sb3_contrib import RecurrentPPO
 from envs.econ_wrapper import EconVecEnv
 from stable_baselines3.common.callbacks import BaseCallback
 import yaml
 import time
 env_config = {
    # ===== SCENARIO CLASS =====
    # Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
    # The environment object will be an instance of the Scenario class.
    'scenario_name': 'simple_market',
    # ===== COMPONENTS =====
    # Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
    #   "component_name" refers to the Component class's name in the Component Registry (foundation.components)
    #   {component_kwargs} is a dictionary of kwargs passed to the Component class
    # The order in which components reset, step, and generate obs follows their listed order below.
    'components': [
        # (1) Building houses
        ('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
        # (2) Trading collectible resources
        #('ContinuousDoubleAuction', {'max_num_orders': 10}),
        # (3) Movement and resource collection
        ('SimpleGather', {}),
    ],
    # ===== SCENARIO CLASS ARGUMENTS =====
    # (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
    'starting_agent_coin': 0,
    'fixed_four_skill_and_loc': True,
    # ===== STANDARD ARGUMENTS ======
    # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
    'n_agents': 20,          # Number of non-planner agents (must be > 1)
    'world_size': [1, 1], # [Height, Width] of the env world
    'episode_length': 256, # Number of timesteps per episode
    'allow_observation_scaling': True,
    'dense_log_frequency': 100, 
    'world_dense_log_frequency':1,
   'energy_cost':0,
    'energy_warmup_method': "auto",
    'energy_warmup_constant': 0,
    # In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
    # Otherwise, the policy selects only 1 action.
    'multi_action_mode_agents': False,
    'multi_action_mode_planner': False,
    # When flattening observations, concatenate scalar & vector observations before output.
    # Otherwise, return observations with minimal processing.
    'flatten_observations': False,
    # When Flattening masks, concatenate each action subspace mask into a single array.
    # Note: flatten_masks = True is required for masking action logits in the code below.
    'flatten_masks': False,
 }
 eval_env_config = {
    # ===== SCENARIO CLASS =====
    # Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
    # The environment object will be an instance of the Scenario class.
    'scenario_name': 'simple_market',
    # ===== COMPONENTS =====
    # Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
    #   "component_name" refers to the Component class's name in the Component Registry (foundation.components)
    #   {component_kwargs} is a dictionary of kwargs passed to the Component class
    # The order in which components reset, step, and generate obs follows their listed order below.
    'components': [
        # (1) Building houses
        ('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
        # (2) Trading collectible resources
        #('ContinuousDoubleAuction', {'max_num_orders': 10}),
        # (3) Movement and resource collection
        ('SimpleGather', {}),
    ],
    # ===== SCENARIO CLASS ARGUMENTS =====
    # (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
    'starting_agent_coin': 0,
    'fixed_four_skill_and_loc': True,
    # ===== STANDARD ARGUMENTS ======
    # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
    'n_agents': 20,          # Number of non-planner agents (must be > 1)
    'world_size': [1, 1], # [Height, Width] of the env world
    'episode_length': 100, # Number of timesteps per episode
    'allow_observation_scaling': True,
    'dense_log_frequency': 10, 
    'world_dense_log_frequency':1, 
    'energy_cost':0,
    'energy_warmup_method': "auto",
    'energy_warmup_constant': 0,
    # In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
    # Otherwise, the policy selects only 1 action.
    'multi_action_mode_agents': False,
    'multi_action_mode_planner': False,
    # When flattening observations, concatenate scalar & vector observations before output.
    # Otherwise, return observations with minimal processing.
    'flatten_observations': False,
    # When Flattening masks, concatenate each action subspace mask into a single array.
    # Note: flatten_masks = True is required for masking action logits in the code below.
    'flatten_masks': False,
 }
 num_frames=2
 class TensorboardCallback(BaseCallback):
    """
    Custom callback for plotting additional values in tensorboard.
    """
    def __init__(self,econ, verbose=0):
        super().__init__(verbose)
        self.econ=econ
        self.metrics=econ.scenario_metrics()
    def _on_step(self) -> bool:
        # Log scalar value (here a random variable)
        prev_metrics=self.metrics
        if self.econ.previous_episode_metrics is None:
            self.metrics=self.econ.scenario_metrics()
        else:
            self.metrics=self.econ.previous_episode_metrics
        curr_prod=self.metrics["social/productivity"]
        trend_pord=curr_prod-prev_metrics["social/productivity"]
        self.logger.record("social/total_productivity", curr_prod)
        self.logger.record("social/delta_productivity", trend_pord)
        return True
 def sample_random_action(agent, mask):
    """Sample random UNMASKED action(s) for agent."""
    # Return a list of actions: 1 for each action subspace
    if agent.multi_action_mode:
        split_masks = np.split(mask, agent.action_spaces.cumsum()[:-1])
        return [np.random.choice(np.arange(len(m_)), p=m_/m_.sum()) for m_ in split_masks]
    # Return a single action
    else:
        return np.random.choice(np.arange(agent.action_spaces), p=mask/mask.sum())
 def sample_random_actions(env, obs):
    """Samples random UNMASKED actions for each agent in obs."""
    actions = {
        a_idx: 0
        for a_idx in range( len(obs))
    }
    return actions
 def printMarket(market):
    for i in range(len(market)):
        step=market[i]
        if len(step)>0:
            print("=== Step {} ===".format(i))
            for transaction in step:
                t=transaction
                transstring = "({}) {} -> {} | [{}/{}] {} Coins\n".format(t["commodity"],t["seller"],t["buyer"],t["ask"],t["bid"],t["price"])
                print(transstring)
    return ""
 def printBuilds(builds):
    for i in range(len(builds)):
        step=builds[i]
        if len(step)>0:
            for build in step:
                t=build
                transstring = "({}) Builder: {}, Skill: {}, Income {} ".format(i,t["builder"],t["build_skill"],t["income"])
                print(transstring)
    return ""
 def printReplay(econ,agentid):
    worldmaps=["Stone","Wood"]
    log=econ.previous_episode_dense_log
    agent=econ.world.agents[agentid]
    agentid=str(agentid)
    maxsetp=len(log["states"])-1
    for step in range(maxsetp):
        print()
        print("=== Step {} ===".format(step))
        # state
        print("--- World ---")
        world=log['world'][step]
        for res in worldmaps:
            print("{}: {}".format(res,world[res][0][0]))
        print("--- State ---")
        state=log['states'][step][agentid]
        print(yaml.dump(state))
        print("--- Action ---")
        action=log["actions"][step][agentid]
        if action=={}:
            print("Action: 0 -> NOOP")
        else:
            for k in action:
                formats="Action:  {}({})".format(k,action[k])
                print(formats)
        print("--- Reward ---")
        reward=log["rewards"][step][agentid]
        print("Reward: {}".format(reward))
 #Setup Env Objects
 vecenv=EconVecEnv(env_config=env_config)
 econ=vecenv.env
 monenv=VecMonitor(venv=vecenv,info_keywords=["social/productivity","trend/productivity"])
 normenv=VecNormalize(monenv,norm_reward=False,clip_obs=1)
 stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10)
 obs=stackenv.reset()
 runname="run_{}".format(int(np.random.rand()*100))
 model = PPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
 total_required_for_episode=env_config['n_agents']*env_config['episode_length']
 print("this is run {}".format(runname))
 while True:
    # Create Eval ENV
    vec_env_eval=EconVecEnv(env_config=eval_env_config)
    vec_mon_eval=VecMonitor(venv=vec_env_eval)
    norm_env_eval=VecNormalize(vec_mon_eval,norm_reward=False,training=False)
    eval_econ = vec_env_eval.env
    #Train
    model=model.learn(total_timesteps=total_required_for_episode*50,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
    normenv.save("temp-normalizer.ai")
    ## Run Eval
    print("### EVAL ###")
    norm_env_eval.load("temp-normalizer.ai",vec_mon_eval)
    obs=vec_mon_eval.reset()
    done=False
    for i in tqdm(range(eval_env_config['episode_length'])):
        action=model.predict(obs)
        obs,rew,done_e,info=vec_mon_eval.step(action[0])
        done=done_e[0]
    #market=eval_econ.get_component("ContinuousDoubleAuction")
    craft=eval_econ.get_component("SimpleCraft")
   # trades=market.get_dense_log()
    build=craft.get_dense_log()
    met=econ.previous_episode_metrics
    printReplay(eval_econ,0)
   # printMarket(trades)
    printBuilds(builds=build)
    print("social/productivity: {}".format(met["social/productivity"]))
    print("labor/weighted_cost: {}".format(met["labor/weighted_cost"]))
    print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"]))
    time.sleep(1)
--- a/main.py
+++ b/main.py
@@ -0,0 +1,283 @@
 from ai_economist import foundation
 import numpy as np
 from stable_baselines3.common.vec_env import vec_frame_stack
 from stable_baselines3.common.evaluation import evaluate_policy
 import envs
 from tqdm import tqdm
 import components
 from stable_baselines3.common.env_checker import check_env
 from stable_baselines3 import PPO
 from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
 from stable_baselines3.common.vec_env.vec_normalize import VecNormalize
 from sb3_contrib import RecurrentPPO
 from envs.econ_wrapper import EconVecEnv
 from stable_baselines3.common.callbacks import BaseCallback
 import yaml
 import time
 env_config = {
    # ===== SCENARIO CLASS =====
    # Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
    # The environment object will be an instance of the Scenario class.
    'scenario_name': 'simple_market',
    # ===== COMPONENTS =====
    # Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
    #   "component_name" refers to the Component class's name in the Component Registry (foundation.components)
    #   {component_kwargs} is a dictionary of kwargs passed to the Component class
    # The order in which components reset, step, and generate obs follows their listed order below.
    'components': [
        # (1) Building houses
        ('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
        # (2) Trading collectible resources
        #('ContinuousDoubleAuction', {'max_num_orders': 10}),
        # (3) Movement and resource collection
        ('SimpleGather', {}),
    ],
    # ===== SCENARIO CLASS ARGUMENTS =====
    # (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
    'starting_agent_coin': 0,
    'fixed_four_skill_and_loc': True,
    # ===== STANDARD ARGUMENTS ======
    # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
    'n_agents': 20,          # Number of non-planner agents (must be > 1)
    'world_size': [1, 1], # [Height, Width] of the env world
    'episode_length': 256, # Number of timesteps per episode
    'allow_observation_scaling': True,
    'dense_log_frequency': 100, 
    'world_dense_log_frequency':1,
   'energy_cost':0,
    'energy_warmup_method': "auto",
    'energy_warmup_constant': 0,
    # In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
    # Otherwise, the policy selects only 1 action.
    'multi_action_mode_agents': False,
    'multi_action_mode_planner': False,
    # When flattening observations, concatenate scalar & vector observations before output.
    # Otherwise, return observations with minimal processing.
    'flatten_observations': False,
    # When Flattening masks, concatenate each action subspace mask into a single array.
    # Note: flatten_masks = True is required for masking action logits in the code below.
    'flatten_masks': False,
 }
 eval_env_config = {
    # ===== SCENARIO CLASS =====
    # Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
    # The environment object will be an instance of the Scenario class.
    'scenario_name': 'simple_market',
    # ===== COMPONENTS =====
    # Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
    #   "component_name" refers to the Component class's name in the Component Registry (foundation.components)
    #   {component_kwargs} is a dictionary of kwargs passed to the Component class
    # The order in which components reset, step, and generate obs follows their listed order below.
    'components': [
        # (1) Building houses
        ('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
        # (2) Trading collectible resources
        #('ContinuousDoubleAuction', {'max_num_orders': 10}),
        # (3) Movement and resource collection
        ('SimpleGather', {}),
    ],
    # ===== SCENARIO CLASS ARGUMENTS =====
    # (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
    'starting_agent_coin': 0,
    'fixed_four_skill_and_loc': True,
    # ===== STANDARD ARGUMENTS ======
    # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
    'n_agents': 20,          # Number of non-planner agents (must be > 1)
    'world_size': [1, 1], # [Height, Width] of the env world
    'episode_length': 100, # Number of timesteps per episode
    'allow_observation_scaling': True,
    'dense_log_frequency': 10, 
    'world_dense_log_frequency':1, 
    'energy_cost':0,
    'energy_warmup_method': "auto",
    'energy_warmup_constant': 0,
    # In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
    # Otherwise, the policy selects only 1 action.
    'multi_action_mode_agents': False,
    'multi_action_mode_planner': False,
    # When flattening observations, concatenate scalar & vector observations before output.
    # Otherwise, return observations with minimal processing.
    'flatten_observations': False,
    # When Flattening masks, concatenate each action subspace mask into a single array.
    # Note: flatten_masks = True is required for masking action logits in the code below.
    'flatten_masks': False,
 }
 num_frames=2
 class TensorboardCallback(BaseCallback):
    """
    Custom callback for plotting additional values in tensorboard.
    """
    def __init__(self,econ, verbose=0):
        super().__init__(verbose)
        self.econ=econ
        self.metrics=econ.scenario_metrics()
    def _on_step(self) -> bool:
        # Log scalar value (here a random variable)
        prev_metrics=self.metrics
        if self.econ.previous_episode_metrics is None:
            self.metrics=self.econ.scenario_metrics()
        else:
            self.metrics=self.econ.previous_episode_metrics
        curr_prod=self.metrics["social/productivity"]
        trend_pord=curr_prod-prev_metrics["social/productivity"]
        self.logger.record("social/total_productivity", curr_prod)
        self.logger.record("social/delta_productivity", trend_pord)
        return True
 def sample_random_action(agent, mask):
    """Sample random UNMASKED action(s) for agent."""
    # Return a list of actions: 1 for each action subspace
    if agent.multi_action_mode:
        split_masks = np.split(mask, agent.action_spaces.cumsum()[:-1])
        return [np.random.choice(np.arange(len(m_)), p=m_/m_.sum()) for m_ in split_masks]
    # Return a single action
    else:
        return np.random.choice(np.arange(agent.action_spaces), p=mask/mask.sum())
 def sample_random_actions(env, obs):
    """Samples random UNMASKED actions for each agent in obs."""
    actions = {
        a_idx: 0
        for a_idx in range( len(obs))
    }
    return actions
 def printMarket(market):
    for i in range(len(market)):
        step=market[i]
        if len(step)>0:
            print("=== Step {} ===".format(i))
            for transaction in step:
                t=transaction
                transstring = "({}) {} -> {} | [{}/{}] {} Coins\n".format(t["commodity"],t["seller"],t["buyer"],t["ask"],t["bid"],t["price"])
                print(transstring)
    return ""
 def printBuilds(builds):
    for i in range(len(builds)):
        step=builds[i]
        if len(step)>0:
            for build in step:
                t=build
                transstring = "({}) Builder: {}, Skill: {}, Income {} ".format(i,t["builder"],t["build_skill"],t["income"])
                print(transstring)
    return ""
 def printReplay(econ,agentid):
    worldmaps=["Stone","Wood"]
    log=econ.previous_episode_dense_log
    agent=econ.world.agents[agentid]
    agentid=str(agentid)
    maxsetp=len(log["states"])-1
    for step in range(maxsetp):
        print()
        print("=== Step {} ===".format(step))
        # state
        print("--- World ---")
        world=log['world'][step]
        for res in worldmaps:
            print("{}: {}".format(res,world[res][0][0]))
        print("--- State ---")
        state=log['states'][step][agentid]
        print(yaml.dump(state))
        print("--- Action ---")
        action=log["actions"][step][agentid]
        if action=={}:
            print("Action: 0 -> NOOP")
        else:
            for k in action:
                formats="Action:  {}({})".format(k,action[k])
                print(formats)
        print("--- Reward ---")
        reward=log["rewards"][step][agentid]
        print("Reward: {}".format(reward))
 #Setup Env Objects
 vecenv=EconVecEnv(env_config=env_config)
 econ=vecenv.env
 monenv=VecMonitor(venv=vecenv,info_keywords=["social/productivity","trend/productivity"])
 normenv=VecNormalize(monenv,norm_reward=False,clip_obs=1)
 stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10)
 obs=stackenv.reset()
 runname="run_{}".format(int(np.random.rand()*100))
 model = PPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
 total_required_for_episode=env_config['n_agents']*env_config['episode_length']
 print("this is run {}".format(runname))
 while True:
    # Create Eval ENV
    vec_env_eval=EconVecEnv(env_config=eval_env_config)
    vec_mon_eval=VecMonitor(venv=vec_env_eval)
    norm_env_eval=VecNormalize(vec_mon_eval,norm_reward=False,training=False)
    eval_econ = vec_env_eval.env
    #Train
    model=model.learn(total_timesteps=total_required_for_episode*50,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
    normenv.save("temp-normalizer.ai")
    ## Run Eval
    print("### EVAL ###")
    norm_env_eval.load("temp-normalizer.ai",vec_mon_eval)
    obs=vec_mon_eval.reset()
    done=False
    for i in tqdm(range(eval_env_config['episode_length'])):
        action=model.predict(obs)
        obs,rew,done_e,info=vec_mon_eval.step(action[0])
        done=done_e[0]
    #market=eval_econ.get_component("ContinuousDoubleAuction")
    craft=eval_econ.get_component("SimpleCraft")
   # trades=market.get_dense_log()
    build=craft.get_dense_log()
    met=econ.previous_episode_metrics
    printReplay(eval_econ,0)
   # printMarket(trades)
    printBuilds(builds=build)
    print("social/productivity: {}".format(met["social/productivity"]))
    print("labor/weighted_cost: {}".format(met["labor/weighted_cost"]))
    print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"]))
    time.sleep(1)
--- a/ray-3.0.0.dev0-cp310-cp310-win_amd64.whl
+++ b/ray-3.0.0.dev0-cp310-cp310-win_amd64.whl
--- a/reqirements.txt
+++ b/reqirements.txt
@@ -0,0 +1,3 @@
 ai-economist
 gym
 ray[rllib]
--- a/temp-normalizer.ai
+++ b/temp-normalizer.ai