it is working

2023-01-11 19:04:20 +01:00
commit 86874dcfd3
13 changed files with 1768 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+logs/*
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -0,0 +1,16 @@
+{
+    // Verwendet IntelliSense zum Ermitteln möglicher Attribute.
+    // Zeigen Sie auf vorhandene Attribute, um die zugehörigen Beschreibungen anzuzeigen.
+    // Weitere Informationen finden Sie unter https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python: Aktuelle Datei",
+            "type": "python",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal",
+            "justMyCode": false
+        }
+    ]
+}
--- a/components/init.py
+++ b/components/init.py
@@ -0,0 +1,4 @@
+from . import(
+    simple_gather,
+    simple_build
+)
--- a/components/noops.py
+++ b/components/noops.py
@@ -0,0 +1,9 @@
+from ai_economist.foundation.base.registrar import Registry
+from ai_economist.foundation.entities.endogenous import Endogenous, endogenous_registry
+
+
+@endogenous_registry.add
+class Noop(Endogenous):
+    """consecutive noop actions performed by actor"""
+
+    name = "Noop"
--- a/components/simple_build.py
+++ b/components/simple_build.py
@@ -0,0 +1,256 @@
+# Copyright (c) 2020, salesforce.com, inc.
+# All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# For full license text, see the LICENSE file in the repo root
+# or https://opensource.org/licenses/BSD-3-Clause
+
+import numpy as np
+
+from ai_economist.foundation.base.base_component import (
+    BaseComponent,
+    component_registry,
+)
+
+
+@component_registry.add
+class SimpleCraft(BaseComponent):
+    """
+    Allows mobile agents to build house landmarks in the world using stone and wood,
+    earning income.
+
+    Can be configured to include heterogeneous building skill where agents earn
+    different levels of income when building.
+
+    Args:
+        payment (int): Default amount of coin agents earn from building.
+            Must be >= 0. Default is 10.
+        payment_max_skill_multiplier (int): Maximum skill multiplier that an agent
+            can sample. Must be >= 1. Default is 1.
+        skill_dist (str): Distribution type for sampling skills. Default ("none")
+            gives all agents identical skill equal to a multiplier of 1. "pareto" and
+            "lognormal" sample skills from the associated distributions.
+        build_labor (float): Labor cost associated with building a house.
+            Must be >= 0. Default is 10.
+    """
+
+    name = "SimpleCraft"
+    component_type = "Build"
+    required_entities = ["Wood", "Stone", "Coin", "House", "Labor"]
+    agent_subclasses = ["BasicMobileAgent"]
+
+    def __init__(
+        self,
+        *base_component_args,
+        payment=10,
+        payment_max_skill_multiplier=1,
+        skill_dist="none",
+        build_labor=10.0,
+        **base_component_kwargs
+    ):
+        super().__init__(*base_component_args, **base_component_kwargs)
+
+        self.payment = int(payment)
+        assert self.payment >= 0
+
+        self.payment_max_skill_multiplier = int(payment_max_skill_multiplier)
+        assert self.payment_max_skill_multiplier >= 1
+
+        self.resource_cost = {"Wood": 1, "Stone": 1}
+
+        self.build_labor = float(build_labor)
+        assert self.build_labor >= 0
+
+        self.skill_dist = skill_dist.lower()
+        assert self.skill_dist in ["none", "pareto", "lognormal"]
+
+        self.sampled_skills = {}
+
+        self.builds = []
+
+    def agent_can_build(self, agent):
+        """Return True if agent can actually build in its current location."""
+        # See if the agent has the resources necessary to complete the action
+        for resource, cost in self.resource_cost.items():
+            if agent.state["inventory"][resource] < cost:
+                return False
+        return True
+
+    # Required methods for implementing components
+    # --------------------------------------------
+
+    def get_n_actions(self, agent_cls_name):
+        """
+        See base_component.py for detailed description.
+
+        Add a single action (build) for mobile agents.
+        """
+        # This component adds 1 action that mobile agents can take: build a house
+        if agent_cls_name == "BasicMobileAgent":
+            return 1
+
+        return None
+
+    def get_additional_state_fields(self, agent_cls_name):
+        """
+        See base_component.py for detailed description.
+
+        For mobile agents, add state fields for building skill.
+        """
+        if agent_cls_name not in self.agent_subclasses:
+            return {}
+        if agent_cls_name == "BasicMobileAgent":
+            return {"build_payment": float(self.payment), "build_skill": 1}
+        raise NotImplementedError
+
+    def component_step(self):
+        """
+        See base_component.py for detailed description.
+
+        Convert stone+wood to house+coin for agents that choose to build and can.
+        """
+        world = self.world
+        build = []
+        # Apply any building actions taken by the mobile agents
+        for agent in world.get_random_order_agents():
+
+            action = agent.get_component_action(self.name)
+
+            # This component doesn't apply to this agent!
+            if action is None:
+                continue
+
+            # NO-OP!
+            if action == 0:
+                pass
+
+            # Build! (If you can.)
+            elif action == 1:
+                if self.agent_can_build(agent):
+                    # Remove the resources
+                    for resource, cost in self.resource_cost.items():
+                        agent.state["inventory"][resource] -= cost
+
+                    # Receive payment for the house
+                    agent.state["inventory"]["Coin"] += agent.state["build_payment"]
+
+                    # Incur the labor cost for building
+                    agent.state["endogenous"]["Labor"] += self.build_labor
+
+                    build.append(
+                        {
+                            "builder": agent.idx,
+                            "build_skill": self.sampled_skills[agent.idx],
+                            "income": float(agent.state["build_payment"]),
+                        }
+                    )
+                else:
+                    agent.bad_action=True
+            else:
+                raise ValueError
+
+        self.builds.append(build)
+
+    def generate_observations(self):
+        """
+        See base_component.py for detailed description.
+
+        Here, agents observe their build skill. The planner does not observe anything
+        from this component.
+        """
+
+        obs_dict = dict()
+        for agent in self.world.agents:
+            obs_dict[agent.idx] = {
+                "build_payment": agent.state["build_payment"] / self.payment,
+                "build_skill": self.sampled_skills[agent.idx],
+            }
+
+        return obs_dict
+
+    def generate_masks(self, completions=0):
+        """
+        See base_component.py for detailed description.
+
+        Prevent building only if a landmark already occupies the agent's location.
+        """
+
+        masks = {}
+        # Mobile agents' build action is masked if they cannot build with their
+        # current location and/or endowment
+        for agent in self.world.agents:
+            masks[agent.idx] = np.array([self.agent_can_build(agent)])
+
+        return masks
+
+    # For non-required customization
+    # ------------------------------
+
+    def get_metrics(self):
+        """
+        Metrics that capture what happened through this component.
+
+        Returns:
+            metrics (dict): A dictionary of {"metric_name": metric_value},
+                where metric_value is a scalar.
+        """
+        world = self.world
+
+        build_stats = {a.idx: {"n_builds": 0} for a in world.agents}
+        for builds in self.builds:
+            for build in builds:
+                idx = build["builder"]
+                build_stats[idx]["n_builds"] += 1
+
+        out_dict = {}
+        for a in world.agents:
+            for k, v in build_stats[a.idx].items():
+                out_dict["{}/{}".format(a.idx, k)] = v
+
+        num_houses = np.sum(world.maps.get("House") > 0)
+        out_dict["total_builds"] = num_houses
+
+        return out_dict
+
+    def additional_reset_steps(self):
+        """
+        See base_component.py for detailed description.
+
+        Re-sample agents' building skills.
+        """
+        world = self.world
+
+        self.sampled_skills = {agent.idx: 1 for agent in world.agents}
+
+        PMSM = self.payment_max_skill_multiplier
+
+        for agent in world.agents:
+            if self.skill_dist == "none":
+                sampled_skill = 1
+                pay_rate = 1
+            elif self.skill_dist == "pareto":
+                sampled_skill = np.random.pareto(4)
+                pay_rate = np.minimum(PMSM, (PMSM - 1) * sampled_skill + 1)
+            elif self.skill_dist == "lognormal":
+                sampled_skill = np.random.lognormal(-1, 0.5)
+                pay_rate = np.minimum(PMSM, (PMSM - 1) * sampled_skill + 1)
+            else:
+                raise NotImplementedError
+
+            agent.state["build_payment"] = float(pay_rate * self.payment)
+            agent.state["build_skill"] = float(sampled_skill)
+
+            self.sampled_skills[agent.idx] = sampled_skill
+
+        self.builds = []
+
+    def get_dense_log(self):
+        """
+        Log builds.
+
+        Returns:
+            builds (list): A list of build events. Each entry corresponds to a single
+                timestep and contains a description of any builds that occurred on
+                that timestep.
+
+        """
+        return self.builds
--- a/components/simple_gather.py
+++ b/components/simple_gather.py
@@ -0,0 +1,214 @@
+# Copyright (c) 2020, salesforce.com, inc.
+# All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# For full license text, see the LICENSE file in the repo root
+# or https://opensource.org/licenses/BSD-3-Clause
+
+import numpy as np
+from numpy.random import rand
+
+from ai_economist.foundation.base.base_component import (
+    BaseComponent,
+    component_registry,
+)
+from ai_economist.foundation.entities import resource_registry, resources
+
+@component_registry.add
+class SimpleGather(BaseComponent):
+    """
+    Allows mobile agents to move around the world and collect resources and prevents
+    agents from moving to invalid locations.
+    Can be configured to include collection skill, where agents have heterogeneous
+    probabilities of collecting bonus resources without additional labor cost.
+    Args:
+        move_labor (float): Labor cost associated with movement. Must be >= 0.
+            Default is 1.0.
+        collect_labor (float): Labor cost associated with collecting resources. This
+            cost is added (in addition to any movement cost) when the agent lands on
+            a tile that is populated with resources (triggering collection).
+            Must be >= 0. Default is 1.0.
+        skill_dist (str): Distribution type for sampling skills. Default ("none")
+            gives all agents identical skill equal to a bonus prob of 0. "pareto" and
+            "lognormal" sample skills from the associated distributions.
+    """
+
+    name = "SimpleGather"
+    required_entities = ["Coin", "House", "Labor"]
+    agent_subclasses = ["BasicMobileAgent"]
+
+    def __init__(
+        self,
+        *base_component_args,
+    
+        collect_labor=1.0,
+        
+        skill_dist="none",
+        **base_component_kwargs
+    ):
+        super().__init__(*base_component_args, **base_component_kwargs)
+
+       
+
+        self.collect_labor = float(collect_labor)
+        assert self.collect_labor >= 0
+
+        self.skill_dist = skill_dist.lower()
+        assert self.skill_dist in ["none", "pareto", "lognormal"]
+
+        self.gathers = []
+        self.commodities = [
+            r for r in self.world.resources if resource_registry.get(r).collectible
+        ]
+
+
+    # Required methods for implementing components
+    # --------------------------------------------
+
+    def get_n_actions(self, agent_cls_name):
+        """
+        See base_component.py for detailed description.
+        Adds 1 action per commodity that can be picked up.
+        """
+      
+        if agent_cls_name == "BasicMobileAgent":
+            return len(self.commodities)
+        return None
+
+    def get_additional_state_fields(self, agent_cls_name):
+        """
+        See base_component.py for detailed description.
+        For mobile agents, add state field for collection skill.
+        """
+        if agent_cls_name not in self.agent_subclasses:
+            return {}
+        if agent_cls_name == "BasicMobileAgent":
+            return {"bonus_gather_prob": 0.0}
+        raise NotImplementedError
+
+    def component_step(self):
+        """
+        See base_component.py for detailed description.
+        Pickup resources if available from env
+        """
+        world = self.world
+
+        gathers = []
+        for agent in world.get_random_order_agents():
+
+            if self.name not in agent.action:
+                continue
+            resource_action = agent.get_component_action(
+                    self.name
+            )
+
+
+            if resource_action == 0: # NO-OP
+                continue
+            
+            resource_action -=1 # Starting at 1
+
+            r=self.commodities[resource_action]
+
+            if self.get_num_resources(r)>0:
+                gather= self.pickup(r,agent)
+                gathers.append(gather)
+
+            else:
+                agent.bad_action=True
+                continue
+
+        self.gathers.append(gathers)
+
+    def generate_observations(self):
+        """
+        See base_component.py for detailed description.
+        Here, agents observe their collection skill. The planner does not observe
+        anything from this component.
+        """
+        num_agent=len(self.world.agents)
+        obs_avai={}
+        for r in self.commodities:
+            key="pickup_perc_{}".format(r)
+            pickProb=float(self.get_num_resources(r)/num_agent)
+            if pickProb>1:
+                pickProb=1
+            obs_avai[key]=pickProb
+        obs={}
+    
+        for agent in self.world.agents:
+            obs[agent.idx]={}
+            obs[agent.idx]["bonus_gather_prob"]= agent.state["bonus_gather_prob"]
+            obs[agent.idx].update(obs_avai)
+        return obs
+
+    def generate_masks(self, completions=0):
+        """
+        See base_component.py for detailed description.
+        Prevent moving to adjacent tiles that are already occupied (or outside the
+        boundaries of the world)
+        """
+        world = self.world
+
+        mask=[]
+        for r in self.commodities:
+            avail=0
+            if self.get_num_resources(r)>0:
+                avail=1
+            mask.append(avail)
+            
+        masks = {}
+
+        for agent in world.agents:
+            masks[agent.idx]=mask
+
+        return masks
+
+    # For non-required customization
+    # ------------------------------
+
+    def additional_reset_steps(self):
+        """
+        See base_component.py for detailed description.
+        Re-sample agents' collection skills.
+        """
+        for agent in self.world.agents:
+            if self.skill_dist == "none":
+                bonus_rate = 0.0
+            elif self.skill_dist == "pareto":
+                bonus_rate = np.minimum(2, np.random.pareto(3)) / 2
+            elif self.skill_dist == "lognormal":
+                bonus_rate = np.minimum(2, np.random.lognormal(-2.022, 0.938)) / 2
+            else:
+                raise NotImplementedError
+            agent.state["bonus_gather_prob"] = float(bonus_rate)
+
+        self.gathers = []
+
+    def get_dense_log(self):
+        """
+        Log resource collections.
+        Returns:
+            gathers (list): A list of gather events. Each entry corresponds to a single
+                timestep and contains a description of any resource gathers that
+                occurred on that timestep.
+        """
+        return self.gathers
+
+# For Components
+
+    def get_num_resources(self, res: resources.Resource):
+        return self.world.maps.get_point(res,0,0)
+
+    def pickup(self, res: resources.Resource, agent ):
+        n_gathered = 1 + (rand() < agent.state["bonus_gather_prob"])
+        agent.state["inventory"][res] += n_gathered
+        agent.state["endogenous"]["Labor"] += self.collect_labor
+        self.world.consume_resource(res,0,0)
+        # Log the gather
+        return (
+            dict(
+                agent=agent.idx,
+                resource=res,
+                n=n_gathered,
+            )
+        )
--- a/envs/econ_wrapper.py
+++ b/envs/econ_wrapper.py
@@ -0,0 +1,227 @@
+from collections import OrderedDict
+from copy import deepcopy
+from typing import Any, Callable, List, Optional, Sequence, Type, Union
+from ai_economist.foundation.base import base_env
+
+import gym
+import gym.spaces
+import numpy as np
+
+from stable_baselines3.common.vec_env.base_vec_env import VecEnv, VecEnvIndices, VecEnvObs, VecEnvStepReturn
+from stable_baselines3.common.vec_env.util import copy_obs_dict, dict_to_obs, obs_space_info
+
+from ai_economist import foundation
+
+class EconVecEnv(VecEnv, gym.Env):
+    """
+    Creates a simple vectorized wrapper for multiple environments, calling each environment in sequence on the current
+    Python process. This is useful for computationally simple environment such as ``cartpole-v1``,
+    as the overhead of multiprocess or multithread outweighs the environment computation time.
+    This can also be used for RL methods that
+    require a vectorized environment, but that you want a single environments to train with.
+
+    :param env_fns: a list of functions
+        that return environments to vectorize
+    :raises ValueError: If the same environment instance is passed as the output of two or more different env_fn.
+    """
+
+    def __init__(self, env_config):
+        ##init for init
+        self.config=env_config
+        env=foundation.make_env_instance(**env_config)
+        self.env = env
+        # build spaces
+        obs=env.reset()
+        actions=env.world.agents[0].action_spaces
+        obs1=obs["0"]
+        del obs1["action_mask"]
+        del obs1["time"]
+        self.observation_space=gym.spaces.Box(low=0,high=np.inf,shape=(len(obs1),),dtype=np.float32)
+        self.action_space=gym.spaces.Discrete(actions)
+
+        # count agents
+        self.num_envs=env.world.n_agents
+
+        VecEnv.__init__(self, self.num_envs, self.observation_space, action_space=self.action_space)
+        self.keys, shapes, dtypes = obs_space_info(self.observation_space)
+
+        self.buf_obs = OrderedDict([(k, np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k])) for k in self.keys])
+        self.buf_dones = np.zeros((self.num_envs,), dtype=bool)
+        self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32)
+        self.buf_infos = [{} for _ in range(self.num_envs)]
+        self.actions = None
+       
+
+
+    def step_async(self, actions: np.ndarray) -> None:
+        self.actions = actions
+
+
+
+    def step_wait(self) -> VecEnvStepReturn:
+        #convert to econ actions
+        r_action={}
+        for ai_idx in range(len(self.actions)):
+            r_action[str(ai_idx)]=self.actions[ai_idx]
+
+           
+        obs,rew,done,info =  self.env.step(r_action)
+        obs_g=self._convert_econ_obs_to_gym(obs)
+        rew_g=self._convert_econ_to_gym(rew)
+        info_g=self._convert_econ_to_gym(info)
+        #collect metrics
+        prev_metrics=self.metrics
+        self.metrics=self.env.scenario_metrics()
+        curr_prod=self.metrics["social/productivity"]
+        trend_pord=curr_prod-prev_metrics["social/productivity"]
+
+        for k in info_g:
+            k["social/productivity"]=curr_prod
+            k["trend/productivity"]=trend_pord
+        done_g=[False]*self.num_envs
+        done=(done["__all__"])
+        if done:
+            for i in range(self.num_envs):
+                done_g[i]=done
+                info_g[i]["terminal_observation"]=obs_g[i]
+            obs_g=self.reset()
+
+   
+        return (np.copy(obs_g), np.copy(rew_g), np.copy(done_g), deepcopy(info_g))
+    # fix with malformed action tensor from sb3 predict method
+    def step_predict(self,actions):
+        return self.step(actions[0])
+
+
+    def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
+        if seed is None:
+            seed = np.random.randint(0, 2**32 - 1)
+        seeds = []
+        for idx, env in enumerate(self.envs):
+            seeds.append(env.seed(seed + idx))
+        return seeds
+
+
+
+    def reset(self) -> VecEnvObs:
+       # env=foundation.make_env_instance(**self.config)
+       # self.env = env
+        obs =  self.env.reset()
+        self.metrics=self.env.scenario_metrics()
+        obs_g=self._convert_econ_obs_to_gym(obs)
+        
+        return obs_g
+
+
+
+    def close(self) -> None:
+        
+        self.env.close()
+
+
+
+    def get_images(self) -> Sequence[np.ndarray]:
+        return [env.render(mode="rgb_array") for env in self.envs]
+
+
+
+    def render(self, mode: str = "human") -> Optional[np.ndarray]:
+        """
+        Gym environment rendering. If there are multiple environments then
+        they are tiled together in one image via ``BaseVecEnv.render()``.
+        Otherwise (if ``self.num_envs == 1``), we pass the render call directly to the
+        underlying environment.
+
+        Therefore, some arguments such as ``mode`` will have values that are valid
+        only when ``num_envs == 1``.
+
+        :param mode: The rendering type.
+        """
+        if self.num_envs == 1:
+            return self.envs[0].render(mode=mode)
+        else:
+            return super().render(mode=mode)
+
+
+    def _save_obs(self, env_idx: int, obs: VecEnvObs) -> None:
+        for key in self.keys:
+            if key is None:
+                self.buf_obs[key][env_idx] = obs
+            else:
+                self.buf_obs[key][env_idx] = obs[key]
+
+    def _obs_from_buf(self) -> VecEnvObs:
+        return dict_to_obs(self.observation_space, copy_obs_dict(self.buf_obs))
+
+    def get_attr(self, attr_name: str, indices: VecEnvIndices = None) -> List[Any]:
+        """Return attribute from vectorized environment (see base class)."""
+        target_envs = self._get_target_envs(indices)
+        return [getattr(env_i, attr_name) for env_i in target_envs]
+
+
+
+    def set_attr(self, attr_name: str, value: Any, indices: VecEnvIndices = None) -> None:
+        """Set attribute inside vectorized environments (see base class)."""
+        target_envs = self._get_target_envs(indices)
+        for env_i in target_envs:
+            setattr(env_i, attr_name, value)
+
+
+
+    def env_method(self, method_name: str, *method_args, indices: VecEnvIndices = None, **method_kwargs) -> List[Any]:
+        """Call instance methods of vectorized environments."""
+        target_envs = self._get_target_envs(indices)
+        return [getattr(env_i, method_name)(*method_args, **method_kwargs) for env_i in target_envs]
+
+
+
+    def env_is_wrapped(self, wrapper_class: Type[gym.Wrapper], indices: VecEnvIndices = None) -> List[bool]:
+        """Check if worker environments are wrapped with a given wrapper"""
+        target_envs = self._get_target_envs(indices)
+        # Import here to avoid a circular import
+        from stable_baselines3.common import env_util
+
+        return [env_util.is_wrapped(env_i, wrapper_class) for env_i in target_envs]
+
+
+    def _get_target_envs(self, indices: VecEnvIndices) -> List[gym.Env]:
+        indices = self._get_indices(indices)
+        return [self.envs[i] for i in indices]
+    
+    # Convert econ to gym
+    def _convert_econ_to_gym(self, econ):
+        gy=[]
+        del econ["p"]
+        gy=[v for k,v in econ.items()]
+        return gy
+    def _convert_gym_to_acon(self, gy):
+        econ={}
+        for k,v in gy:
+            econ[k]=v
+        return econ
+    def _convert_econ_obs_to_gym(self, econ):
+        gy=[None] * self.num_envs
+        del econ["p"]
+        for k,v in econ.items():
+           
+            del v["time"]
+            del v["action_mask"]
+            out=self.extract_dict(v)
+         
+            agent_obs=np.array(out)
+
+            gy[int(k)]=agent_obs
+        return np.stack(gy)
+
+    def extract_dict(self,obj):
+        output=[]
+        use_key=isinstance(obj,dict)
+        for v in obj:
+            if use_key:
+                v=obj[v]
+            if isinstance(v,dict):
+                temp=self.extract_dict(v)
+                output.append(temp)
+            else:
+                output.append(v)
+        return output
--- a/envs/simple_market.py
+++ b/envs/simple_market.py
@@ -0,0 +1,472 @@
+# Copyright (c) 2020, salesforce.com, inc.
+# All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# For full license text, see the LICENSE file in the repo root
+# or https://opensource.org/licenses/BSD-3-Clause
+
+from copy import deepcopy
+from pathlib import Path
+
+import numpy as np
+from scipy import signal
+
+from ai_economist.foundation.base.base_env import BaseEnvironment, scenario_registry
+from ai_economist.foundation.scenarios.utils import rewards, social_metrics
+import yaml
+
+
+@scenario_registry.add
+class SimpleMarket(BaseEnvironment):
+    """
+    World containing stone and wood with stochastic regeneration. Refers to a fixed
+    layout file (see ./map_txt/ for examples) to determine the spatial arrangement of
+    stone, wood, and water tiles.
+
+    Args:
+        planner_gets_spatial_obs (bool): Whether the planner agent receives spatial
+            observations from the world.
+        full_observability (bool): Whether the mobile agents' spatial observation
+            includes the full world view or is instead an egocentric view.
+        mobile_agent_observation_range (int): If not using full_observability,
+            the spatial range (on each side of the agent) that is visible in the
+            spatial observations.
+        env_layout_file (str): Name of the layout file in ./map_txt/ to use.
+            Note: The world dimensions of that layout must match the world dimensions
+            argument used to construct the environment.
+        resource_regen_prob (float): Probability that an empty source tile will
+            regenerate a new resource unit.
+        fixed_four_skill_and_loc (bool): Whether to use a fixed set of build skills and
+            starting locations, with agents grouped into starting locations based on
+            which skill quartile they are in. False, by default.
+            True, for experiments in https://arxiv.org/abs/2004.13332.
+            Note: Requires that the environment uses the "Build" component with
+            skill_dist="pareto".
+        starting_agent_coin (int, float): Amount of coin agents have at t=0. Defaults
+            to zero coin.
+        isoelastic_eta (float): Parameter controlling the shape of agent utility
+            wrt coin endowment.
+        energy_cost (float): Coefficient for converting labor to negative utility.
+        energy_warmup_constant (float): Decay constant that controls the rate at which
+            the effective energy cost is annealed from 0 to energy_cost. Set to 0
+            (default) to disable annealing, meaning that the effective energy cost is
+            always energy_cost. The units of the decay constant depend on the choice of
+            energy_warmup_method.
+        energy_warmup_method (str): How to schedule energy annealing (warmup). If
+            "decay" (default), use the number of completed episodes. If "auto",
+            use the number of timesteps where the average agent reward was positive.
+        planner_reward_type (str): The type of reward used for the planner. Options
+            are "coin_eq_times_productivity" (default),
+            "inv_income_weighted_coin_endowment", and "inv_income_weighted_utility".
+        mixing_weight_gini_vs_coin (float): Degree to which equality is ignored w/
+            "coin_eq_times_productivity". Default is 0, which weights equality and
+            productivity equally. If set to 1, only productivity is rewarded.
+    """
+
+    name = "simple_market"
+    agent_subclasses = ["BasicMobileAgent"]
+    required_entities = ["Wood", "Stone", "Water"]
+
+    def __init__(
+        self,
+        *base_env_args,
+        resource_regen_prob=0.01,
+        fixed_four_skill_and_loc=False,
+        starting_agent_coin=0,
+        isoelastic_eta=0.23,
+        energy_cost=0.21,
+        energy_warmup_constant=0,
+        energy_warmup_method="decay",
+        planner_reward_type="coin_eq_times_productivity",
+        mixing_weight_gini_vs_coin=0.0,
+        **base_env_kwargs,
+    ):
+        super().__init__(*base_env_args, **base_env_kwargs)
+
+     
+        self.layout_specs = dict(
+            Wood={
+                "regen_weight": float(resource_regen_prob),
+                "regen_halfwidth": 0,
+                "max_health": 1,
+            },
+            Stone={
+                "regen_weight": float(resource_regen_prob),
+                "regen_halfwidth": 0,
+                "max_health": 1,
+            },
+        )
+        assert 0 <= self.layout_specs["Wood"]["regen_weight"] <= 1
+        assert 0 <= self.layout_specs["Stone"]["regen_weight"] <= 1
+
+        # How much coin do agents begin with at upon reset
+        self.starting_agent_coin = float(starting_agent_coin)
+        assert self.starting_agent_coin >= 0.0
+
+        # Controls the diminishing marginal utility of coin.
+        # isoelastic_eta=0 means no diminishing utility.
+        self.isoelastic_eta = float(isoelastic_eta)
+        assert 0.0 <= self.isoelastic_eta <= 1.0
+
+        # The amount that labor is weighted in utility computation
+        # (once annealing is finished)
+        self.energy_cost = float(energy_cost)
+        assert self.energy_cost >= 0
+
+        # Which method to use for calculating the progress of energy annealing
+        # If method = 'decay': #completed episodes
+        # If method = 'auto' : #timesteps where avg. agent reward > 0
+        self.energy_warmup_method = energy_warmup_method.lower()
+        assert self.energy_warmup_method in ["decay", "auto"]
+        # Decay constant for annealing to full energy cost
+        # (if energy_warmup_constant == 0, there is no annealing)
+        self.energy_warmup_constant = float(energy_warmup_constant)
+        assert self.energy_warmup_constant >= 0
+        self._auto_warmup_integrator = 0
+
+        # Which social welfare function to use
+        self.planner_reward_type = str(planner_reward_type).lower()
+
+        # How much to weight equality if using SWF=eq*prod:
+        # 0 -> SWF=eq * prod
+        # 1 -> SWF=prod
+        self.mixing_weight_gini_vs_coin = float(mixing_weight_gini_vs_coin)
+        assert 0 <= self.mixing_weight_gini_vs_coin <= 1.0
+
+        # Use this to calculate marginal changes and deliver that as reward
+        self.init_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
+        self.prev_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
+        self.curr_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
+
+        """
+        Fixed Four Skill and Loc
+        ------------------------
+        """
+        self.agent_starting_pos = {agent.idx: [] for agent in self.world.agents}
+
+  
+       
+        self.last_log_loged={}
+
+
+    @property
+    def energy_weight(self):
+        """
+        Energy annealing progress. Multiply with self.energy_cost to get the
+        effective energy coefficient.
+        """
+        if self.energy_warmup_constant <= 0.0:
+            return 1.0
+
+        if self.energy_warmup_method == "decay":
+            return float(1.0 - np.exp(-self._completions / self.energy_warmup_constant))
+
+        if self.energy_warmup_method == "auto":
+            return float(
+                1.0
+                - np.exp(-self._auto_warmup_integrator / self.energy_warmup_constant)
+            )
+
+        raise NotImplementedError
+
+    def is_bad_action(self,agent):
+        bad=agent.bad_action
+        agent.bad_action=False
+        return bad
+    def get_current_optimization_metrics(self):
+        """
+        Compute optimization metrics based on the current state. Used to compute reward.
+
+        Returns:
+            curr_optimization_metric (dict): A dictionary of {agent.idx: metric}
+                with an entry for each agent (including the planner) in the env.
+        """
+        curr_optimization_metric = {}
+        # (for agents)
+        for agent in self.world.agents:
+
+            rew= rewards.isoelastic_coin_minus_labor(
+                coin_endowment=agent.total_endowment("Coin"),
+                total_labor=agent.state["endogenous"]["Labor"],
+                isoelastic_eta=self.isoelastic_eta,
+                labor_coefficient=self.energy_weight * self.energy_cost,
+            )
+            
+            
+
+            #rew-=agent.state["endogenous"]["noops"]
+            curr_optimization_metric[agent.idx] = rew
+        # (for the planner)
+        if self.planner_reward_type == "coin_eq_times_productivity":
+            curr_optimization_metric[
+                self.world.planner.idx
+            ] = rewards.coin_eq_times_productivity(
+                coin_endowments=np.array(
+                    [agent.total_endowment("Coin") for agent in self.world.agents]
+                ),
+                equality_weight=1 - self.mixing_weight_gini_vs_coin,
+            )
+        elif self.planner_reward_type == "inv_income_weighted_coin_endowments":
+            curr_optimization_metric[
+                self.world.planner.idx
+            ] = rewards.inv_income_weighted_coin_endowments(
+                coin_endowments=np.array(
+                    [agent.total_endowment("Coin") for agent in self.world.agents]
+                )
+            )
+        elif self.planner_reward_type == "inv_income_weighted_utility":
+            curr_optimization_metric[
+                self.world.planner.idx
+            ] = rewards.inv_income_weighted_utility(
+                coin_endowments=np.array(
+                    [agent.total_endowment("Coin") for agent in self.world.agents]
+                ),
+                utilities=np.array(
+                    [curr_optimization_metric[agent.idx] for agent in self.world.agents]
+                ),
+            )
+        else:
+            print("No valid planner reward selected!")
+            raise NotImplementedError
+        return curr_optimization_metric
+
+    # The following methods must be implemented for each scenario
+    # -----------------------------------------------------------
+
+    def reset_starting_layout(self):
+        """
+        Part 1/2 of scenario reset. This method handles resetting the state of the
+        environment managed by the scenario (i.e. resource & landmark layout).
+
+        Here, reset to the layout in the fixed layout file
+        """
+        self.world.maps.clear()
+        
+        resources = ["Wood", "Stone"]
+
+        for resource in resources:
+            self.world.maps.set_point_add(resource,0,0,1)
+
+    def reset_agent_states(self):
+        """
+        Part 2/2 of scenario reset. This method handles resetting the state of the
+        agents themselves (i.e. inventory, locations, etc.).
+
+        Here, empty inventories and place mobile agents in random, accessible
+        locations to start. Note: If using fixed_four_skill_and_loc, the starting
+        locations will be overridden in self.additional_reset_steps.
+        """
+        self.world.clear_agent_locs()
+        for agent in self.world.agents:
+            agent.state["inventory"] = {k: 0 for k in agent.inventory.keys()}
+            agent.state["escrow"] = {k: 0 for k in agent.inventory.keys()}
+            agent.state["endogenous"] = {k: 0 for k in agent.endogenous.keys()}
+            # Add starting coin
+            agent.state["inventory"]["Coin"] = float(self.starting_agent_coin)
+            agent.bad_action=False
+
+        self.world.planner.state["inventory"] = {
+            k: 0 for k in self.world.planner.inventory.keys()
+        }
+        self.world.planner.state["escrow"] = {
+            k: 0 for k in self.world.planner.escrow.keys()
+        }
+
+
+    def scenario_step(self):
+        """
+        Update the state of the world according to whatever rules this scenario
+        implements.
+
+        This gets called in the 'step' method (of base_env) after going through each
+        component step and before generating observations, rewards, etc.
+
+        In this class of scenarios, the scenario step handles stochastic resource
+        regeneration.
+        """
+
+        resources = ["Wood", "Stone"]
+
+        for resource in resources:
+            self.world.maps.set_point_add(resource,0,0,20)
+           
+
+    def generate_observations(self):
+        """
+        Generate observations associated with this scenario.
+
+        A scenario does not need to produce observations and can provide observations
+        for only some agent types; however, for a given agent type, it should either
+        always or never yield an observation. If it does yield an observation,
+        that observation should always have the same structure/sizes!
+
+        Returns:
+            obs (dict): A dictionary of {agent.idx: agent_obs_dict}. In words,
+                return a dictionary with an entry for each agent (which can including
+                the planner) for which this scenario provides an observation. For each
+                entry, the key specifies the index of the agent and the value contains
+                its associated observation dictionary.
+
+        Here, non-planner agents receive spatial observations (depending on the env
+        config) as well as the contents of their inventory and endogenous quantities.
+        The planner also receives spatial observations (again, depending on the env
+        config) as well as the inventory of each of the mobile agents.
+        """
+        obs = {}
+
+
+     
+        agent_invs = {
+            str(agent.idx): {
+                "inventory-" + k: v * self.inv_scale for k, v in agent.inventory.items()
+            }
+            for agent in self.world.agents
+        }
+
+        obs[self.world.planner.idx] = {
+            "inventory-" + k: v * self.inv_scale
+            for k, v in self.world.planner.inventory.items()
+        }
+     
+     
+        for agent in self.world.agents:
+            sidx = str(agent.idx)
+            obs[sidx]=agent_invs[sidx]
+
+    
+
+
+        return obs
+
+    def compute_reward(self):
+        """
+        Apply the reward function(s) associated with this scenario to get the rewards
+        from this step.
+
+        Returns:
+            rew (dict): A dictionary of {agent.idx: agent_obs_dict}. In words,
+                return a dictionary with an entry for each agent in the environment
+                (including the planner). For each entry, the key specifies the index of
+                the agent and the value contains the scalar reward earned this timestep.
+
+        Rewards are computed as the marginal utility (agents) or marginal social
+        welfare (planner) experienced on this timestep. Ignoring discounting,
+        this means that agents' (planner's) objective is to maximize the utility
+        (social welfare) associated with the terminal state of the episode.
+        """
+
+        # "curr_optimization_metric" hasn't been updated yet, so it gives us the
+        # utility from the last step.
+        utility_at_end_of_last_time_step = deepcopy(self.curr_optimization_metric)
+
+        # compute current objectives and store the values
+        self.curr_optimization_metric = self.get_current_optimization_metrics()
+
+        # reward = curr - prev objectives
+        rew={}
+        for k, v in self.curr_optimization_metric.items():
+                rew[k] = float(v  - utility_at_end_of_last_time_step[k])
+                if k!="p":
+                    if self.is_bad_action(self.world.agents[k]):
+                        rew[k]-=1
+
+        # store the previous objective values
+        self.prev_optimization_metric.update(utility_at_end_of_last_time_step)
+
+        # Automatic Energy Cost Annealing
+        # -------------------------------
+        avg_agent_rew = np.mean([rew[a.idx] for a in self.world.agents])
+        # Count the number of timesteps where the avg agent reward was > 0
+        if avg_agent_rew > 0:
+            self._auto_warmup_integrator += 1
+
+        return rew
+
+    # Optional methods for customization
+    # ----------------------------------
+
+    def additional_reset_steps(self):
+        """
+        Extra scenario-specific steps that should be performed at the end of the reset
+        cycle.
+
+        For each reset cycle...
+            First, reset_starting_layout() and reset_agent_states() will be called.
+
+            Second, <component>.reset() will be called for each registered component.
+
+            Lastly, this method will be called to allow for any final customization of
+            the reset cycle.
+
+        For this scenario, this method resets optimization metric trackers. If using
+        fixed_four_skill_and_loc, this is where each agent gets assigned to one of
+        the four fixed skill/loc combinations. The agent-->skill/loc assignment is
+        permuted so that all four skill/loc combinations are used.
+        """
+ 
+
+        # compute current objectives
+        curr_optimization_metric = self.get_current_optimization_metrics()
+
+        self.curr_optimization_metric = deepcopy(curr_optimization_metric)
+        self.init_optimization_metric = deepcopy(curr_optimization_metric)
+        self.prev_optimization_metric = deepcopy(curr_optimization_metric)
+
+       
+
+    def scenario_metrics(self):
+        """
+        Allows the scenario to generate metrics (collected along with component metrics
+        in the 'metrics' property).
+
+        To have the scenario add metrics, this function needs to return a dictionary of
+        {metric_key: value} where 'value' is a scalar (no nesting or lists!)
+
+        Here, summarize social metrics, endowments, utilities, and labor cost annealing.
+        """
+        metrics = dict()
+
+        coin_endowments = np.array(
+            [agent.total_endowment("Coin") for agent in self.world.agents]
+        )
+        metrics["social/productivity"] = social_metrics.get_productivity(
+            coin_endowments
+        )
+        metrics["social/equality"] = social_metrics.get_equality(coin_endowments)
+
+        utilities = np.array(
+            [self.curr_optimization_metric[agent.idx] for agent in self.world.agents]
+        )
+        metrics[
+            "social_welfare/coin_eq_times_productivity"
+        ] = rewards.coin_eq_times_productivity(
+            coin_endowments=coin_endowments, equality_weight=1.0
+        )
+        metrics[
+            "social_welfare/inv_income_weighted_coin_endow"
+        ] = rewards.inv_income_weighted_coin_endowments(coin_endowments=coin_endowments)
+        metrics[
+            "social_welfare/inv_income_weighted_utility"
+        ] = rewards.inv_income_weighted_utility(
+            coin_endowments=coin_endowments, utilities=utilities
+        )
+
+        for agent in self.all_agents:
+            for resource, quantity in agent.inventory.items():
+                metrics[
+                    "endow/{}/{}".format(agent.idx, resource)
+                ] = agent.total_endowment(resource)
+
+            if agent.endogenous is not None:
+                for resource, quantity in agent.endogenous.items():
+                    metrics["endogenous/{}/{}".format(agent.idx, resource)] = quantity
+
+            metrics["util/{}".format(agent.idx)] = self.curr_optimization_metric[
+                agent.idx
+            ]
+
+        # Labor weight
+        metrics["labor/weighted_cost"] = self.energy_cost * self.energy_weight
+        metrics["labor/warmup_integrator"] = int(self._auto_warmup_integrator)
+
+        return metrics
+
--- a/good.pys
+++ b/good.pys
@@ -0,0 +1,283 @@
+from ai_economist import foundation
+import numpy as np
+from stable_baselines3.common.vec_env import vec_frame_stack
+from stable_baselines3.common.evaluation import evaluate_policy
+import envs
+from tqdm import tqdm
+import components
+from stable_baselines3.common.env_checker import check_env
+from stable_baselines3 import PPO
+from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
+from stable_baselines3.common.vec_env.vec_normalize import VecNormalize
+from sb3_contrib import RecurrentPPO
+from envs.econ_wrapper import EconVecEnv
+from stable_baselines3.common.callbacks import BaseCallback
+import yaml
+import time
+
+env_config = {
+    # ===== SCENARIO CLASS =====
+    # Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
+    # The environment object will be an instance of the Scenario class.
+    'scenario_name': 'simple_market',
+    
+    # ===== COMPONENTS =====
+    # Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
+    #   "component_name" refers to the Component class's name in the Component Registry (foundation.components)
+    #   {component_kwargs} is a dictionary of kwargs passed to the Component class
+    # The order in which components reset, step, and generate obs follows their listed order below.
+    'components': [
+        # (1) Building houses
+        ('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
+        # (2) Trading collectible resources
+        #('ContinuousDoubleAuction', {'max_num_orders': 10}),
+        # (3) Movement and resource collection
+        ('SimpleGather', {}),
+    ],
+    
+    # ===== SCENARIO CLASS ARGUMENTS =====
+    # (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
+    
+    'starting_agent_coin': 0,
+    'fixed_four_skill_and_loc': True,
+    
+    # ===== STANDARD ARGUMENTS ======
+    # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
+    'n_agents': 20,          # Number of non-planner agents (must be > 1)
+    'world_size': [1, 1], # [Height, Width] of the env world
+    'episode_length': 256, # Number of timesteps per episode
+    'allow_observation_scaling': True,
+    'dense_log_frequency': 100, 
+    'world_dense_log_frequency':1,
+   'energy_cost':0,
+    'energy_warmup_method': "auto",
+    'energy_warmup_constant': 0,
+    
+    # In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
+    # Otherwise, the policy selects only 1 action.
+    'multi_action_mode_agents': False,
+    'multi_action_mode_planner': False,
+    
+    # When flattening observations, concatenate scalar & vector observations before output.
+    # Otherwise, return observations with minimal processing.
+    'flatten_observations': False,
+    # When Flattening masks, concatenate each action subspace mask into a single array.
+    # Note: flatten_masks = True is required for masking action logits in the code below.
+    'flatten_masks': False,
+}
+
+
+eval_env_config = {
+    # ===== SCENARIO CLASS =====
+    # Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
+    # The environment object will be an instance of the Scenario class.
+    'scenario_name': 'simple_market',
+    
+    # ===== COMPONENTS =====
+    # Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
+    #   "component_name" refers to the Component class's name in the Component Registry (foundation.components)
+    #   {component_kwargs} is a dictionary of kwargs passed to the Component class
+    # The order in which components reset, step, and generate obs follows their listed order below.
+    'components': [
+        # (1) Building houses
+        ('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
+        # (2) Trading collectible resources
+        #('ContinuousDoubleAuction', {'max_num_orders': 10}),
+        # (3) Movement and resource collection
+        ('SimpleGather', {}),
+    ],
+    
+    # ===== SCENARIO CLASS ARGUMENTS =====
+    # (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
+    
+    'starting_agent_coin': 0,
+    'fixed_four_skill_and_loc': True,
+    
+    # ===== STANDARD ARGUMENTS ======
+    # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
+    'n_agents': 20,          # Number of non-planner agents (must be > 1)
+    'world_size': [1, 1], # [Height, Width] of the env world
+    'episode_length': 100, # Number of timesteps per episode
+    'allow_observation_scaling': True,
+    'dense_log_frequency': 10, 
+    'world_dense_log_frequency':1, 
+    'energy_cost':0,
+    'energy_warmup_method': "auto",
+    'energy_warmup_constant': 0,
+    
+    # In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
+    # Otherwise, the policy selects only 1 action.
+    'multi_action_mode_agents': False,
+    'multi_action_mode_planner': False,
+    
+    # When flattening observations, concatenate scalar & vector observations before output.
+    # Otherwise, return observations with minimal processing.
+    'flatten_observations': False,
+    # When Flattening masks, concatenate each action subspace mask into a single array.
+    # Note: flatten_masks = True is required for masking action logits in the code below.
+    'flatten_masks': False,
+}
+
+num_frames=2
+
+class TensorboardCallback(BaseCallback):
+    """
+    Custom callback for plotting additional values in tensorboard.
+    """
+
+    def __init__(self,econ, verbose=0):
+        super().__init__(verbose)
+        self.econ=econ
+        self.metrics=econ.scenario_metrics()
+    def _on_step(self) -> bool:
+        # Log scalar value (here a random variable)
+        prev_metrics=self.metrics
+        if self.econ.previous_episode_metrics is None:
+            self.metrics=self.econ.scenario_metrics()
+        else:
+            self.metrics=self.econ.previous_episode_metrics
+        curr_prod=self.metrics["social/productivity"]
+        trend_pord=curr_prod-prev_metrics["social/productivity"]
+        self.logger.record("social/total_productivity", curr_prod)
+        self.logger.record("social/delta_productivity", trend_pord)
+      
+        return True
+
+
+def sample_random_action(agent, mask):
+    """Sample random UNMASKED action(s) for agent."""
+    # Return a list of actions: 1 for each action subspace
+    if agent.multi_action_mode:
+        split_masks = np.split(mask, agent.action_spaces.cumsum()[:-1])
+        return [np.random.choice(np.arange(len(m_)), p=m_/m_.sum()) for m_ in split_masks]
+
+    # Return a single action
+    else:
+        return np.random.choice(np.arange(agent.action_spaces), p=mask/mask.sum())
+
+def sample_random_actions(env, obs):
+    """Samples random UNMASKED actions for each agent in obs."""
+        
+    actions = {
+        a_idx: 0
+        for a_idx in range( len(obs))
+    }
+
+    return actions
+
+def printMarket(market):
+    for i in range(len(market)):
+        step=market[i]
+        if len(step)>0:
+            print("=== Step {} ===".format(i))
+            for transaction in step:
+                t=transaction
+                transstring = "({}) {} -> {} | [{}/{}] {} Coins\n".format(t["commodity"],t["seller"],t["buyer"],t["ask"],t["bid"],t["price"])
+                print(transstring)
+    return ""
+
+def printBuilds(builds):
+    for i in range(len(builds)):
+        step=builds[i]
+        if len(step)>0:
+            for build in step:
+                t=build
+                transstring = "({}) Builder: {}, Skill: {}, Income {} ".format(i,t["builder"],t["build_skill"],t["income"])
+                print(transstring)
+    return ""
+def printReplay(econ,agentid):
+    worldmaps=["Stone","Wood"]
+
+    log=econ.previous_episode_dense_log
+    agent=econ.world.agents[agentid]
+  
+    agentid=str(agentid)
+    maxsetp=len(log["states"])-1
+
+    for step in range(maxsetp):
+        print()
+        print("=== Step {} ===".format(step))
+        # state
+        print("--- World ---")
+        world=log['world'][step]
+        for res in worldmaps:
+            print("{}: {}".format(res,world[res][0][0]))
+        print("--- State ---")
+        state=log['states'][step][agentid]
+       
+        print(yaml.dump(state))
+        print("--- Action ---")
+        action=log["actions"][step][agentid]
+        
+
+        if action=={}:
+            print("Action: 0 -> NOOP")
+        else:
+            for k in action:
+                formats="Action:  {}({})".format(k,action[k])
+                print(formats)
+        print("--- Reward ---")
+        reward=log["rewards"][step][agentid]
+        print("Reward: {}".format(reward))
+
+#Setup Env Objects
+
+vecenv=EconVecEnv(env_config=env_config)
+econ=vecenv.env
+monenv=VecMonitor(venv=vecenv,info_keywords=["social/productivity","trend/productivity"])
+normenv=VecNormalize(monenv,norm_reward=False,clip_obs=1)
+stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10)
+obs=stackenv.reset()
+
+
+
+
+
+runname="run_{}".format(int(np.random.rand()*100))
+
+model = PPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
+
+total_required_for_episode=env_config['n_agents']*env_config['episode_length']
+print("this is run {}".format(runname))
+while True:
+    # Create Eval ENV
+  
+    vec_env_eval=EconVecEnv(env_config=eval_env_config)
+    vec_mon_eval=VecMonitor(venv=vec_env_eval)
+    norm_env_eval=VecNormalize(vec_mon_eval,norm_reward=False,training=False)
+    eval_econ = vec_env_eval.env
+  
+    #Train
+    model=model.learn(total_timesteps=total_required_for_episode*50,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
+    normenv.save("temp-normalizer.ai")
+
+    
+   
+    ## Run Eval
+    print("### EVAL ###")
+    norm_env_eval.load("temp-normalizer.ai",vec_mon_eval)
+    obs=vec_mon_eval.reset()
+    done=False
+    for i in tqdm(range(eval_env_config['episode_length'])):
+        action=model.predict(obs)
+        obs,rew,done_e,info=vec_mon_eval.step(action[0])
+        done=done_e[0]
+
+
+
+    #market=eval_econ.get_component("ContinuousDoubleAuction")
+    craft=eval_econ.get_component("SimpleCraft")
+   # trades=market.get_dense_log()
+    build=craft.get_dense_log()
+    met=econ.previous_episode_metrics
+    printReplay(eval_econ,0)
+   # printMarket(trades)
+    printBuilds(builds=build)
+    print("social/productivity: {}".format(met["social/productivity"]))
+    print("labor/weighted_cost: {}".format(met["labor/weighted_cost"]))
+    print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"]))
+
+    time.sleep(1)
+
+
+
--- a/main.py
+++ b/main.py
@@ -0,0 +1,283 @@
+from ai_economist import foundation
+import numpy as np
+from stable_baselines3.common.vec_env import vec_frame_stack
+from stable_baselines3.common.evaluation import evaluate_policy
+import envs
+from tqdm import tqdm
+import components
+from stable_baselines3.common.env_checker import check_env
+from stable_baselines3 import PPO
+from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
+from stable_baselines3.common.vec_env.vec_normalize import VecNormalize
+from sb3_contrib import RecurrentPPO
+from envs.econ_wrapper import EconVecEnv
+from stable_baselines3.common.callbacks import BaseCallback
+import yaml
+import time
+
+env_config = {
+    # ===== SCENARIO CLASS =====
+    # Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
+    # The environment object will be an instance of the Scenario class.
+    'scenario_name': 'simple_market',
+    
+    # ===== COMPONENTS =====
+    # Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
+    #   "component_name" refers to the Component class's name in the Component Registry (foundation.components)
+    #   {component_kwargs} is a dictionary of kwargs passed to the Component class
+    # The order in which components reset, step, and generate obs follows their listed order below.
+    'components': [
+        # (1) Building houses
+        ('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
+        # (2) Trading collectible resources
+        #('ContinuousDoubleAuction', {'max_num_orders': 10}),
+        # (3) Movement and resource collection
+        ('SimpleGather', {}),
+    ],
+    
+    # ===== SCENARIO CLASS ARGUMENTS =====
+    # (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
+    
+    'starting_agent_coin': 0,
+    'fixed_four_skill_and_loc': True,
+    
+    # ===== STANDARD ARGUMENTS ======
+    # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
+    'n_agents': 20,          # Number of non-planner agents (must be > 1)
+    'world_size': [1, 1], # [Height, Width] of the env world
+    'episode_length': 256, # Number of timesteps per episode
+    'allow_observation_scaling': True,
+    'dense_log_frequency': 100, 
+    'world_dense_log_frequency':1,
+   'energy_cost':0,
+    'energy_warmup_method': "auto",
+    'energy_warmup_constant': 0,
+    
+    # In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
+    # Otherwise, the policy selects only 1 action.
+    'multi_action_mode_agents': False,
+    'multi_action_mode_planner': False,
+    
+    # When flattening observations, concatenate scalar & vector observations before output.
+    # Otherwise, return observations with minimal processing.
+    'flatten_observations': False,
+    # When Flattening masks, concatenate each action subspace mask into a single array.
+    # Note: flatten_masks = True is required for masking action logits in the code below.
+    'flatten_masks': False,
+}
+
+
+eval_env_config = {
+    # ===== SCENARIO CLASS =====
+    # Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
+    # The environment object will be an instance of the Scenario class.
+    'scenario_name': 'simple_market',
+    
+    # ===== COMPONENTS =====
+    # Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
+    #   "component_name" refers to the Component class's name in the Component Registry (foundation.components)
+    #   {component_kwargs} is a dictionary of kwargs passed to the Component class
+    # The order in which components reset, step, and generate obs follows their listed order below.
+    'components': [
+        # (1) Building houses
+        ('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
+        # (2) Trading collectible resources
+        #('ContinuousDoubleAuction', {'max_num_orders': 10}),
+        # (3) Movement and resource collection
+        ('SimpleGather', {}),
+    ],
+    
+    # ===== SCENARIO CLASS ARGUMENTS =====
+    # (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
+    
+    'starting_agent_coin': 0,
+    'fixed_four_skill_and_loc': True,
+    
+    # ===== STANDARD ARGUMENTS ======
+    # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
+    'n_agents': 20,          # Number of non-planner agents (must be > 1)
+    'world_size': [1, 1], # [Height, Width] of the env world
+    'episode_length': 100, # Number of timesteps per episode
+    'allow_observation_scaling': True,
+    'dense_log_frequency': 10, 
+    'world_dense_log_frequency':1, 
+    'energy_cost':0,
+    'energy_warmup_method': "auto",
+    'energy_warmup_constant': 0,
+    
+    # In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
+    # Otherwise, the policy selects only 1 action.
+    'multi_action_mode_agents': False,
+    'multi_action_mode_planner': False,
+    
+    # When flattening observations, concatenate scalar & vector observations before output.
+    # Otherwise, return observations with minimal processing.
+    'flatten_observations': False,
+    # When Flattening masks, concatenate each action subspace mask into a single array.
+    # Note: flatten_masks = True is required for masking action logits in the code below.
+    'flatten_masks': False,
+}
+
+num_frames=2
+
+class TensorboardCallback(BaseCallback):
+    """
+    Custom callback for plotting additional values in tensorboard.
+    """
+
+    def __init__(self,econ, verbose=0):
+        super().__init__(verbose)
+        self.econ=econ
+        self.metrics=econ.scenario_metrics()
+    def _on_step(self) -> bool:
+        # Log scalar value (here a random variable)
+        prev_metrics=self.metrics
+        if self.econ.previous_episode_metrics is None:
+            self.metrics=self.econ.scenario_metrics()
+        else:
+            self.metrics=self.econ.previous_episode_metrics
+        curr_prod=self.metrics["social/productivity"]
+        trend_pord=curr_prod-prev_metrics["social/productivity"]
+        self.logger.record("social/total_productivity", curr_prod)
+        self.logger.record("social/delta_productivity", trend_pord)
+      
+        return True
+
+
+def sample_random_action(agent, mask):
+    """Sample random UNMASKED action(s) for agent."""
+    # Return a list of actions: 1 for each action subspace
+    if agent.multi_action_mode:
+        split_masks = np.split(mask, agent.action_spaces.cumsum()[:-1])
+        return [np.random.choice(np.arange(len(m_)), p=m_/m_.sum()) for m_ in split_masks]
+
+    # Return a single action
+    else:
+        return np.random.choice(np.arange(agent.action_spaces), p=mask/mask.sum())
+
+def sample_random_actions(env, obs):
+    """Samples random UNMASKED actions for each agent in obs."""
+        
+    actions = {
+        a_idx: 0
+        for a_idx in range( len(obs))
+    }
+
+    return actions
+
+def printMarket(market):
+    for i in range(len(market)):
+        step=market[i]
+        if len(step)>0:
+            print("=== Step {} ===".format(i))
+            for transaction in step:
+                t=transaction
+                transstring = "({}) {} -> {} | [{}/{}] {} Coins\n".format(t["commodity"],t["seller"],t["buyer"],t["ask"],t["bid"],t["price"])
+                print(transstring)
+    return ""
+
+def printBuilds(builds):
+    for i in range(len(builds)):
+        step=builds[i]
+        if len(step)>0:
+            for build in step:
+                t=build
+                transstring = "({}) Builder: {}, Skill: {}, Income {} ".format(i,t["builder"],t["build_skill"],t["income"])
+                print(transstring)
+    return ""
+def printReplay(econ,agentid):
+    worldmaps=["Stone","Wood"]
+
+    log=econ.previous_episode_dense_log
+    agent=econ.world.agents[agentid]
+  
+    agentid=str(agentid)
+    maxsetp=len(log["states"])-1
+
+    for step in range(maxsetp):
+        print()
+        print("=== Step {} ===".format(step))
+        # state
+        print("--- World ---")
+        world=log['world'][step]
+        for res in worldmaps:
+            print("{}: {}".format(res,world[res][0][0]))
+        print("--- State ---")
+        state=log['states'][step][agentid]
+       
+        print(yaml.dump(state))
+        print("--- Action ---")
+        action=log["actions"][step][agentid]
+        
+
+        if action=={}:
+            print("Action: 0 -> NOOP")
+        else:
+            for k in action:
+                formats="Action:  {}({})".format(k,action[k])
+                print(formats)
+        print("--- Reward ---")
+        reward=log["rewards"][step][agentid]
+        print("Reward: {}".format(reward))
+
+#Setup Env Objects
+
+vecenv=EconVecEnv(env_config=env_config)
+econ=vecenv.env
+monenv=VecMonitor(venv=vecenv,info_keywords=["social/productivity","trend/productivity"])
+normenv=VecNormalize(monenv,norm_reward=False,clip_obs=1)
+stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10)
+obs=stackenv.reset()
+
+
+
+
+
+runname="run_{}".format(int(np.random.rand()*100))
+
+model = PPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
+
+total_required_for_episode=env_config['n_agents']*env_config['episode_length']
+print("this is run {}".format(runname))
+while True:
+    # Create Eval ENV
+  
+    vec_env_eval=EconVecEnv(env_config=eval_env_config)
+    vec_mon_eval=VecMonitor(venv=vec_env_eval)
+    norm_env_eval=VecNormalize(vec_mon_eval,norm_reward=False,training=False)
+    eval_econ = vec_env_eval.env
+  
+    #Train
+    model=model.learn(total_timesteps=total_required_for_episode*50,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
+    normenv.save("temp-normalizer.ai")
+
+    
+   
+    ## Run Eval
+    print("### EVAL ###")
+    norm_env_eval.load("temp-normalizer.ai",vec_mon_eval)
+    obs=vec_mon_eval.reset()
+    done=False
+    for i in tqdm(range(eval_env_config['episode_length'])):
+        action=model.predict(obs)
+        obs,rew,done_e,info=vec_mon_eval.step(action[0])
+        done=done_e[0]
+
+
+
+    #market=eval_econ.get_component("ContinuousDoubleAuction")
+    craft=eval_econ.get_component("SimpleCraft")
+   # trades=market.get_dense_log()
+    build=craft.get_dense_log()
+    met=econ.previous_episode_metrics
+    printReplay(eval_econ,0)
+   # printMarket(trades)
+    printBuilds(builds=build)
+    print("social/productivity: {}".format(met["social/productivity"]))
+    print("labor/weighted_cost: {}".format(met["labor/weighted_cost"]))
+    print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"]))
+
+    time.sleep(1)
+
+
+
--- a/ray-3.0.0.dev0-cp310-cp310-win_amd64.whl
+++ b/ray-3.0.0.dev0-cp310-cp310-win_amd64.whl
--- a/reqirements.txt
+++ b/reqirements.txt
@@ -0,0 +1,3 @@
+ai-economist
+gym
+ray[rllib]
--- a/temp-normalizer.ai
+++ b/temp-normalizer.ai