Merge pull request 'crafting' (#2) from crafting into master

Reviewed-on: #2
2023-01-17 08:34:11 +00:00
parent 9b8afb14bd e15841914d
commit 731aad0a7b
26 changed files with 1972 additions and 136 deletions
--- a/agents/trading_agent.py
+++ b/agents/trading_agent.py
@@ -8,4 +8,6 @@ class TradingAgent(BaseAgent):
    "Mobile" refers to agents of this type being able to move around in the 2D world.
    """

-    name = "TradingAgent"
+    name = "TradingAgent"
+
+
--- a/ai_economist/foundation/base/base_agent.py
+++ b/ai_economist/foundation/base/base_agent.py
@@ -5,6 +5,7 @@
 # or https://opensource.org/licenses/BSD-3-Clause

 import random
+import uuid

 import numpy as np

@@ -38,7 +39,7 @@ class BaseAgent:

        if idx is None:
            idx = 0
-
+        self.uuid=uuid.uuid4()
        if multi_action_mode is None:
            multi_action_mode = False

@@ -64,6 +65,7 @@ class BaseAgent:
        self._registered_inventory = False
        self._registered_endogenous = False
        self._registered_components = False
+        self._setup = False # agent setup not completed 
        self._noop_action_dict = dict()

        # Special flag to allow logic for multi-action-mode agents
@@ -78,10 +80,17 @@ class BaseAgent:
    def idx(self):
        """Index used to identify this agent. Must be unique within the environment."""
        return self._idx
-
+    @property
+    def is_setup(self):
+        return self._setup
+        
+    def set_setup(self, set):
+        self._setup=set
+    
    def register_inventory(self, resources):
        """Used during environment construction to populate inventory/escrow fields."""
-        assert not self._registered_inventory
+        if self._registered_inventory:
+            return
        for entity_name in resources:
            self.inventory[entity_name] = 0
            self.escrow[entity_name] = 0
@@ -89,7 +98,8 @@ class BaseAgent:

    def register_endogenous(self, endogenous):
        """Used during environment construction to populate endogenous state fields."""
-        assert not self._registered_endogenous
+        if self._registered_endogenous:
+            return
        for entity_name in endogenous:
            self.endogenous[entity_name] = 0
        self._registered_endogenous = True
@@ -115,7 +125,8 @@ class BaseAgent:

    def register_components(self, components):
        """Used during environment construction to set up state/action spaces."""
-        assert not self._registered_components
+        if self._registered_components:
+            return
        for component in components:
            n = component.get_n_actions(self.name)
            if n is None:
--- a/ai_economist/foundation/base/base_component.py
+++ b/ai_economist/foundation/base/base_component.py
@@ -134,6 +134,7 @@ class BaseComponent(ABC):
    def reset(self):
        """Reset any portion of the state managed by this component."""
        world = self.world
+        self.n_agents = world.n_agents
        all_agents = world.agents + [world.planner]
        for agent in all_agents:
            agent.state.update(self.get_additional_state_fields(agent.name))
--- a/ai_economist/foundation/base/base_env.py
+++ b/ai_economist/foundation/base/base_env.py
@@ -234,7 +234,7 @@ class BaseEnvironment(ABC):
        self.num_agents = (
            n_agents + n_planners
        )  # used in the warp_drive env wrapper (+ 1 for the planner)
-
+        
        # Components must be a tuple/list where each element is either a...
        #   tuple: ('Component Name', {Component kwargs})
        #   dict : {'Component Name': {Component kwargs}}
@@ -342,19 +342,14 @@ class BaseEnvironment(ABC):
            self._components_dict[component_object.name] = component_object
            self._shorthand_lookup[component_object.shorthand] = component_object

-        # Register the components with the agents
-        # to finish setting up their state/action spaces.
-        for agent in self.world.agents:
-            agent.register_inventory(self.resources)
-            agent.register_endogenous(self.endogenous)
-            agent.register_components(self._components)
+       
        self.world.planner.register_inventory(self.resources)
        self.world.planner.register_components(self._components)
-
-        self._agent_lookup = {str(agent.idx): agent for agent in self.all_agents}
+        self.reapply_scenario_config_to_agents()
+       

        self._completions = 0
-
+        self._finish_episode=False
        self._last_ep_metrics = None

        # For dense logging
@@ -370,6 +365,16 @@ class BaseEnvironment(ABC):
        # To collate all the agents ('0', '1', ...) data during reset and step
        # into a single agent with index 'a'
        self.collate_agent_step_and_reset_data = collate_agent_step_and_reset_data
+    
+    def reapply_scenario_config_to_agents(self):
+        # Register the components with the agents
+        # to finish setting up their state/action spaces.
+        for agent in self.world.agents:
+            agent.register_inventory(self.resources)
+            agent.register_endogenous(self.endogenous)
+            agent.register_components(self._components)
+        self._agent_lookup = {str(agent.idx): agent for agent in self.all_agents}
+        self.world.apply_agent_db_to_world()

    def _register_entities(self, entities):
        for entity in entities:
@@ -501,6 +506,8 @@ class BaseEnvironment(ABC):

    # Getters & Setters
    # -----------------
+    def set_finish_episode(self,done):
+        self._finish_episode=done

    def get_component(self, component_name):
        """
@@ -904,6 +911,9 @@ class BaseEnvironment(ABC):
        # Reset the timestep counter
        self.world.timestep = 0

+        # Reset done flag
+        self._finish_episode=False
+
        # Perform the scenario reset,
        # which includes resetting the world and agent states
        self.reset_starting_layout()
@@ -920,6 +930,7 @@ class BaseEnvironment(ABC):
        # Reset actions to that default.
        for agent in self.all_agents:
            agent.reset_actions()
+            agent.set_setup(True)

        # Produce observations
        obs = self._generate_observations(
@@ -1015,7 +1026,7 @@ class BaseEnvironment(ABC):
            flatten_masks=self._flatten_masks,
        )
        rew = self._generate_rewards()
-        done = {"__all__": self.world.timestep >= self._episode_length}
+        done = {"__all__": self.world.timestep >= self._episode_length | self._finish_episode}
        info = {k: {} for k in obs.keys()}

        if self._dense_log_this_episode:
--- a/ai_economist/foundation/base/registrar.py
+++ b/ai_economist/foundation/base/registrar.py
@@ -76,8 +76,7 @@ class Registry:

        See Registry class docstring for example.
        """
-        if cls_name.lower() not in self._lookup:
-            raise KeyError('"{}" is not a name of a registered class'.format(cls_name))
+        if cls_name.lower() not in self._lookup:            raise KeyError('"{}" is not a name of a registered class'.format(cls_name))
        return self._lookup[cls_name.lower()]

    def has(self, cls_name):
--- a/ai_economist/foundation/base/world.py
+++ b/ai_economist/foundation/base/world.py
@@ -91,7 +91,10 @@ class Maps:

            else:
                raise NotImplementedError
-
+        self.reset_agent_maps(n_agents)
+       
+    def reset_agent_maps(self,n_agents):
+        self.n_agents=n_agents
        self._idx_map = np.stack(
            [i * np.ones(shape=self.size) for i in range(self.n_agents)]
        )
@@ -378,17 +381,8 @@ class World:
        self.multi_action_mode_planner = bool(multi_action_mode_planner)
        self._agent_class_idx_map={}
        #create agents
-        self.agent_composition=agent_composition
-        self.n_agents=0
-        self._agents = []
-        for k,v in agent_composition.items():
-            self._agent_class_idx_map[k]=[]
-            for offset in range(v):
-                agent_class=agent_registry.get(k)
-                agent=agent_class(self.n_agents,self.multi_action_mode_agents)
-                self._agents.append(agent)
-                self._agent_class_idx_map[k].append(str(self.n_agents))
-                self.n_agents+=1
+        self.create_agents(agent_composition)
+
        self.maps = Maps(world_size, self.n_agents, world_resources, world_landmarks)

        planner_class = agent_registry.get("BasicPlanner")
@@ -402,6 +396,37 @@ class World:
        self.cuda_function_manager = None
        self.cuda_data_manager = None

+    def create_agents(self, agent_composition):
+        """create_agents creates the world agent db with the given compostition."""
+        self.agent_composition=agent_composition
+        self.n_agents=0
+        self._agents = []
+        for k,v in agent_composition.items():
+            self._agent_class_idx_map[k]=[]
+            for offset in range(v):
+                agent_class=agent_registry.get(k)
+                agent=agent_class(self.n_agents,self.multi_action_mode_agents)
+                self._agents.append(agent)
+                self._agent_class_idx_map[k].append(str(self.n_agents))
+                self.n_agents+=1
+
+    def apply_agent_db_to_world(self):
+        """Applys current agent db into lookup maps inside world and map itself. Enables insertion of new agents into existing env."""
+        self.n_agents=len(self._agents)
+        self._agent_class_idx_map={}
+        self.maps.reset_agent_maps(self.n_agents) # reset map lookups
+        #create mapping dict
+        for idx in range(self.n_agents):
+            cls=self.get_agent_class(idx)
+            agent=self._agents[idx]
+            if cls in self._agent_class_idx_map:
+                self._agent_class_idx_map[cls].append(idx)
+            else:
+                self._agent_class_idx_map[cls]=[idx]
+            # apply agent locs db to maps
+            if "loc" in agent.state:
+                self.maps.set_agent_loc(agent,*agent.loc)
+
    @property
    def agents(self):
        """Return a list of the agent objects in the world (sorted by index)."""
--- a/ai_economist/foundation/components/continuous_double_auction.py
+++ b/ai_economist/foundation/components/continuous_double_auction.py
@@ -37,7 +37,7 @@ class ContinuousDoubleAuction(BaseComponent):
    name = "ContinuousDoubleAuction"
    component_type = "Trade"
    required_entities = ["Coin", "Labor"]
-    agent_subclasses = ["BasicMobileAgent"]
+    agent_subclasses = ["BasicMobileAgent","TradingAgent"]

    def __init__(
        self,
@@ -159,7 +159,7 @@ class ContinuousDoubleAuction(BaseComponent):
        """If agent can submit an ask for resource."""
        return (
            self.n_orders[resource][agent.idx] < self.max_num_orders
-            and agent.state["inventory"][resource] > 0
+            and agent.state["inventory"][resource] >= 1
        )

    # Core components for this market
@@ -417,7 +417,7 @@ class ContinuousDoubleAuction(BaseComponent):
        """
        # This component adds 2*(1+max_bid_ask)*n_resources possible actions:
        # buy/sell x each-price x each-resource
-        if agent_cls_name == "BasicMobileAgent":
+        if agent_cls_name in self.agent_subclasses:
            trades = []
            for c in self.commodities:
                trades.append(
@@ -526,14 +526,14 @@ class ContinuousDoubleAuction(BaseComponent):

            for _, agent in enumerate(world.agents):
                # Private to the agent
+                available_ask_agent=full_asks - self.ask_hists[c][agent.idx]
+                available_bid_agent=full_bids- self.bid_hists[c][agent.idx]
                obs[agent.idx].update(
                    {
-                        "market_rate-{}".format(c): market_rate,
+                        "market_rate-{}".format(c): market_rate*self.inv_scale,
                        "price_history-{}".format(c): scaled_price_history,
-                        "available_asks-{}".format(c): full_asks
-                        - self.ask_hists[c][agent.idx],
-                        "available_bids-{}".format(c): full_bids
-                        - self.bid_hists[c][agent.idx],
+                        "available_asks-{}".format(c): np.clip(available_ask_agent,0,self.max_num_orders),
+                        "available_bids-{}".format(c): np.clip(available_bid_agent,0,self.max_num_orders),
                        "my_asks-{}".format(c): self.ask_hists[c][agent.idx],
                        "my_bids-{}".format(c): self.bid_hists[c][agent.idx],
                    }
--- a/ai_economist/foundation/entities/resources.py
+++ b/ai_economist/foundation/entities/resources.py
@@ -66,10 +66,10 @@ class Coin(Resource):
    collectible = False

@resource_registry.add
-class RawGem(Resource):
+class GemRaw(Resource):
    """Raw Gem that can be processed further"""

-    name = "Raw_Gem"
+    name = "Gem_Raw"
    color = np.array([241, 233, 219]) / 255.0
    collectible = True

@@ -79,6 +79,6 @@ class Gem(Resource):

    name = "Gem"
    color = np.array([241, 233, 219]) / 255.0
-    collectible = False
-    craft_recp= {"Raw_Gem": 1}
+    collectible = True
+    craft_recp= {"Gem_Raw": 1}
    craft_labour_base= 1
--- a/4000.ai
+++ b/4000.ai
--- a/basic.ai
+++ b/basic.ai
--- a/components/init.py
+++ b/components/init.py
@@ -1,4 +1,6 @@
 from . import(
    simple_gather,
-    simple_build
+    simple_build,
+    crafting,
+    external_market
 )
--- a/components/crafting.py
+++ b/components/crafting.py
@@ -0,0 +1,287 @@
+# Copyright (c) 2020, salesforce.com, inc.
+# All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# For full license text, see the LICENSE file in the repo root
+# or https://opensource.org/licenses/BSD-3-Clause
+
+import numpy as np
+
+from ai_economist.foundation.base.base_component import (
+    BaseComponent,
+    component_registry,
+)
+from ai_economist.foundation.entities.resources import Resource, resource_registry
+
+
+@component_registry.add
+class Craft(BaseComponent):
+    """
+    Allows mobile agents to build house landmarks in the world using stone and wood,
+    earning income.
+
+    Can be configured to include heterogeneous building skill where agents earn
+    different levels of income when building.
+
+    Args:
+        commodities (list(str)): list of commodities that can be crafted in the local world
+        payment_max_skill_multiplier (int): Maximum skill multiplier that an agent
+            can sample. Must be >= 1. Default is 1.
+        skill_dist (str): Distribution type for sampling skills. Default ("none")
+            gives all agents identical skill equal to a multiplier of 1. "pareto" and
+            "lognormal" sample skills from the associated distributions.
+        build_labor (float): Labor cost associated with building a house.
+            Must be >= 0. Default is 10.
+    """
+
+    name = "Craft"
+    component_type = "Build"
+    required_entities = ["Coin", "Labor"]
+    agent_subclasses = ["BasicMobileAgent"]
+    commodities=[]
+    def __init__(
+        self,
+        *base_component_args,
+        commodities=[],
+        max_skill_amount_benefit=1,
+        max_skill_labour_benefit=1,
+        skill_dist="none",
+        **base_component_kwargs
+    ):
+        assert len(commodities)>0
+        #setup commodities
+        self.recip_map={}
+        self.commodities=[]
+        for v in commodities:
+            res_class=resource_registry.get(v)
+            res=res_class()
+            if res.craft_recp!=None:
+                # is craftable
+                assert res.craft_recp!={}
+                assert res.craft_labour_base >= 0
+                self.required_entities.append(v)
+                self.recip_map[res.name]=res.craft_recp
+                self.commodities.append(res)
+                
+        
+        self.max_skill_amount_benefit=max_skill_amount_benefit
+        self.max_skill_labour_benefit=max_skill_labour_benefit
+
+
+        assert self.max_skill_amount_benefit >= 1
+        assert self.max_skill_labour_benefit <= 1
+
+        self.skill_dist = skill_dist.lower()
+        assert self.skill_dist in ["none", "pareto"]
+
+        self.sampled_skills = {}
+
+        self.builds = []
+        super().__init__(*base_component_args, **base_component_kwargs)
+
+    def agent_can_build(self, agent, res):
+        """Return True if agent can actually build in its current location."""
+        # See if the agent has the resources necessary to complete the action
+        if res in self.recip_map:
+            recipe= self.recip_map[res]
+            for resource, cost in recipe.items():
+                if agent.state["inventory"][resource] < cost:
+                    return False
+                else:
+                    return True
+        return False
+
+    # Required methods for implementing components
+    # --------------------------------------------
+
+    def get_n_actions(self, agent_cls_name):
+        """
+        See base_component.py for detailed description.
+
+        Add a single action (build) for mobile agents.
+        """
+        # This component adds 1 action that mobile agents can take: build a house
+        if agent_cls_name in self.agent_subclasses:
+            return len(self.commodities)
+
+        return None
+
+    def get_additional_state_fields(self, agent_cls_name):
+        """
+        See base_component.py for detailed description.
+
+        For mobile agents, add state fields for building skill.
+        """
+        if agent_cls_name not in self.agent_subclasses:
+            return {}
+        if agent_cls_name == "BasicMobileAgent":
+            return {}
+        raise NotImplementedError
+
+    def component_step(self):
+        """
+        See base_component.py for detailed description.
+
+        Convert stone+wood to house+coin for agents that choose to build and can.
+        """
+        world = self.world
+        build = []
+        # Apply any building actions taken by the mobile agents
+        for agent in world.get_random_order_agents():
+            
+            action = agent.get_component_action(self.name)
+
+            # This component doesn't apply to this agent!
+            if action is None:
+                continue
+
+            # NO-OP!
+            if action == 0:
+                pass
+
+            # Build! (If you can.)
+            else:
+                action-=1
+                comm=self.commodities[action]
+
+                if self.agent_can_build(agent,comm.name):
+                    # Remove the resources
+                    for resource, cost in comm.craft_recp.items():
+                        agent.state["inventory"][resource] -= cost
+
+                    # Receive crafted commodity
+                    agent.state["inventory"][comm.name] += agent.state["craft_amount"][comm.name]
+
+                    # Incur the labor cost for building
+                    agent.state["endogenous"]["Labor"] += agent.state["craft_labour"][comm.name]
+
+                    build.append(
+                        {
+                            "crafter": agent.idx,
+                            "craft_commodity": comm.name,
+                            "craft_skill": agent.state["craft_skill"][comm.name],
+                            "craft_amount": agent.state["craft_amount"][comm.name],
+                            "craft_labour": agent.state["craft_labour"][comm.name]
+                        }
+                    )
+                else:
+                    agent.bad_action=True
+          
+
+        self.builds.append(build)
+
+    def generate_observations(self):
+        """
+        See base_component.py for detailed description.
+
+        Here, agents observe their build skill. The planner does not observe anything
+        from this component.
+        """
+
+        obs_dict = dict()
+        for agent in self.world.agents:
+            if agent.name in self.agent_subclasses:
+                obs_dict[agent.idx]={}
+                
+                for k in self.commodities:
+                    obs_dict[agent.idx]["craft_skill_{}".format(k.name)] = agent.state["craft_skill"][k.name]
+                       
+
+        return obs_dict
+
+    def generate_masks(self, completions=0):
+        """
+        See base_component.py for detailed description.
+
+        Prevent building only if a landmark already occupies the agent's location.
+        """
+
+        masks = {}
+        # Mobile agents' build action is masked if they cannot build with their
+        # current location and/or endowment
+        for agent in self.world.agents:
+            if agent.name in self.agent_subclasses:
+                masks[agent.idx] = np.array([self.agent_can_build(agent,k.name) for k in self.commodities])
+
+        return masks
+
+    # For non-required customization
+    # ------------------------------
+
+    def get_metrics(self):
+        """
+        Metrics that capture what happened through this component.
+
+        Returns:
+            metrics (dict): A dictionary of {"metric_name": metric_value},
+                where metric_value is a scalar.
+        """
+        world = self.world
+        """
+        build_stats = {a.idx: {"n_builds": 0} for a in world.agents}
+        for builds in self.builds:
+            for build in builds:
+                idx = build["builder"]
+                build_stats[idx]["n_builds"] += 1
+
+        out_dict = {}
+        for a in world.agents:
+            for k, v in build_stats[a.idx].items():
+                out_dict["{}/{}".format(a.idx, k)] = v
+
+        num_houses = np.sum(world.maps.get("House") > 0)
+        out_dict["total_builds"] = num_houses
+    """
+        return {}
+
+    def additional_reset_steps(self):
+        """
+        See base_component.py for detailed description.
+
+        Re-sample agents' building skills.
+        """
+        world = self.world
+
+
+        MSAB= self.max_skill_amount_benefit
+        MSLB= self.max_skill_labour_benefit
+
+       
+            
+        for agent in world.agents:
+                if (agent.name not in self.agent_subclasses) | agent.is_setup:
+                    continue
+                agent.state["craft_skill"]={}
+                agent.state["craft_labour"]={}
+                agent.state["craft_amount"]={}
+
+                for comm in self.commodities:
+                    if self.skill_dist == "none":
+                        sampled_skill = 1
+                        amount= 1
+                        labour = 1
+                    elif self.skill_dist == "pareto":
+                        labour = 1
+                        sampled_skill = np.random.pareto(2)
+
+                        amount = 1+np.minimum(MSAB,(MSAB-1) * (sampled_skill) )
+                        labour_modifier = 1 - np.minimum(1 - MSLB, (1 - MSLB) * sampled_skill)
+                    else:
+                        raise NotImplementedError
+                    agent.state["craft_skill"][comm.name]=sampled_skill
+                    agent.state["craft_labour"][comm.name]=comm.craft_labour_base*labour_modifier
+                    agent.state["craft_amount"][comm.name]=amount
+
+
+        self.builds = []
+
+    def get_dense_log(self):
+        """
+        Log builds.
+
+        Returns:
+            builds (list): A list of build events. Each entry corresponds to a single
+                timestep and contains a description of any builds that occurred on
+                that timestep.
+
+        """
+        return self.builds
--- a/components/external_market.py
+++ b/components/external_market.py
@@ -0,0 +1,221 @@
+# Copyright (c) 2020, salesforce.com, inc.
+# All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# For full license text, see the LICENSE file in the repo root
+# or https://opensource.org/licenses/BSD-3-Clause
+
+import numpy as np
+
+from ai_economist.foundation.base.base_component import (
+    BaseComponent,
+    component_registry,
+)
+
+
+@component_registry.add
+class ExternalMarket(BaseComponent):
+    """
+    Allows mobile agents to build house landmarks in the world using stone and wood,
+    earning income.
+
+    Can be configured to include heterogeneous building skill where agents earn
+    different levels of income when building.
+
+    Args:
+        payment (int): Default amount of coin agents earn from building.
+            Must be >= 0. Default is 10.
+        market_demand (dict): Resource name -> amout of money
+        skill_dist (str): Distribution type for sampling skills. Default ("none")
+            gives all agents identical skill equal to a multiplier of 1. "pareto" and
+            "lognormal" sample skills from the associated distributions.
+        build_labor (float): Labor cost associated with building a house.
+            Must be >= 0. Default is 10.
+    """
+
+    name = "ExternalMarket"
+    component_type = "Trade"
+    required_entities = ["Coin", "Labor"]
+    agent_subclasses = ["TradingAgent"]
+
+    def __init__(
+        self,
+        *base_component_args,
+        market_demand={},
+        trade_labor=1.0,
+        **base_component_kwargs
+    ):
+        super().__init__(*base_component_args, **base_component_kwargs)
+
+        self.market_demand = market_demand
+        self.action_res_map={}
+        for k in market_demand.keys():
+            self.action_res_map[len(self.action_res_map)+1]=k
+
+        self.trade_labor = float(trade_labor)
+        assert self.trade_labor >= 0
+
+        self.builds = []
+
+    def agent_can_sell(self, agent,res):
+        """Return True if agent can sell a res."""
+        # See if the agent has the resources necessary to complete the action
+       
+        if agent.state["inventory"][res]>= 1:
+            return True
+        return False
+
+    # Required methods for implementing components
+    # --------------------------------------------
+
+    def get_n_actions(self, agent_cls_name):
+        """
+        See base_component.py for detailed description.
+
+        Add a single action (build) for mobile agents.
+        """
+        # This component adds 1 action that mobile agents can take: build a house
+        if agent_cls_name in self.agent_subclasses:
+            return len(self.action_res_map)
+
+        return None
+
+    def get_additional_state_fields(self, agent_cls_name):
+        """
+        See base_component.py for detailed description.
+
+        For mobile agents, add state fields for building skill.
+        """
+      
+        return {}
+
+    def component_step(self):
+        """
+        See base_component.py for detailed description.
+
+        Convert stone+wood to house+coin for agents that choose to build and can.
+        """
+        world = self.world
+        build = []
+        # Apply any building actions taken by the mobile agents
+        for agent in world.get_random_order_agents():
+
+            action = agent.get_component_action(self.name)
+
+            # This component doesn't apply to this agent!
+            if action is None:
+                continue
+
+            # NO-OP!
+            if action == 0:
+                continue
+           
+            res_name=self.action_res_map[action]
+            # Build! (If you can.)
+       
+            if self.agent_can_sell(agent,res_name):
+                    # Remove the resources
+                    agent.state["inventory"][res_name] -= 1
+
+                    # Receive payment for the house
+                    agent.state["inventory"]["Coin"] += self.market_demand[res_name]
+
+                    # Incur the labor cost for building
+                    agent.state["endogenous"]["Labor"] += self.trade_labor
+
+                    build.append(
+                        {
+                            "seller": agent.idx,
+                            "commodity": res_name,
+                            "income": self.market_demand[res_name],
+                        }
+                    )
+            else:
+                raise ValueError
+
+        self.builds.append(build)
+
+    def generate_observations(self):
+        """
+        See base_component.py for detailed description.
+
+        Here, agents observe their build skill. The planner does not observe anything
+        from this component.
+        """
+
+        obs_dict = dict()
+        for agent in self.world.agents:
+            if agent.name in self.agent_subclasses:
+
+                obs_dict[agent.idx] = {}
+                for res_name,coin in self.market_demand.items():            
+                    obs_dict[agent.idx]["external_{}_price".format(res_name)]: self.inv_scale*coin            
+
+        return obs_dict
+
+    def generate_masks(self, completions=0):
+        """
+        See base_component.py for detailed description.
+
+        Prevent building only if a landmark already occupies the agent's location.
+        """
+
+        masks = {}
+        # Mobile agents' build action is masked if they cannot build with their
+        # current location and/or endowment
+        for agent in self.world.agents:
+            if agent.name in self.agent_subclasses:
+                mask=[]
+                for res in self.market_demand:
+                    mask.append(self.agent_can_sell(agent,res))
+                masks[agent.idx] = mask
+
+        return masks
+
+    # For non-required customization
+    # ------------------------------
+
+    def get_metrics(self):
+        """
+        Metrics that capture what happened through this component.
+
+        Returns:
+            metrics (dict): A dictionary of {"metric_name": metric_value},
+                where metric_value is a scalar.
+        """
+        world = self.world
+        """
+        build_stats = {a.idx: {"n_builds": 0} for a in world.agents}
+        for builds in self.builds:
+            for build in builds:
+                idx = build["builder"]
+                build_stats[idx]["n_builds"] += 1
+
+        out_dict = {}
+        for a in world.agents:
+            for k, v in build_stats[a.idx].items():
+                out_dict["{}/{}".format(a.idx, k)] = v
+
+        num_houses = np.sum(world.maps.get("House") > 0)
+        out_dict["total_builds"] = num_houses
+        """ 
+        return {}
+
+    def additional_reset_steps(self):
+        """
+        See base_component.py for detailed description.
+
+        Re-sample agents' building skills.
+        """
+        self.builds = []
+
+    def get_dense_log(self):
+        """
+        Log builds.
+
+        Returns:
+            builds (list): A list of build events. Each entry corresponds to a single
+                timestep and contains a description of any builds that occurred on
+                that timestep.
+
+        """
+        return self.builds
--- a/components/simple_build.py
+++ b/components/simple_build.py
@@ -44,7 +44,7 @@ class SimpleCraft(BaseComponent):
        payment=10,
        payment_max_skill_multiplier=1,
        skill_dist="none",
-        build_labor=10.0,
+        build_labor=1.0,
        **base_component_kwargs
    ):
        super().__init__(*base_component_args, **base_component_kwargs)
--- a/envs/init.py
+++ b/envs/init.py
@@ -1,5 +1,6 @@

 from . import (
    simple_market,
-    econ_wrapper
+    econ_wrapper,
+    econ
 )
--- a/envs/econ.py
+++ b/envs/econ.py
@@ -0,0 +1,482 @@
+# Copyright (c) 2020, salesforce.com, inc.
+# All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# For full license text, see the LICENSE file in the repo root
+# or https://opensource.org/licenses/BSD-3-Clause
+
+from copy import deepcopy
+from pathlib import Path
+
+import numpy as np
+from scipy import signal
+
+from ai_economist.foundation.base.base_env import BaseEnvironment, scenario_registry
+from ai_economist.foundation.scenarios.utils import rewards, social_metrics
+import yaml
+
+
+@scenario_registry.add
+class Econ(BaseEnvironment):
+    """
+    World containing stone and wood with stochastic regeneration. Refers to a fixed
+    layout file (see ./map_txt/ for examples) to determine the spatial arrangement of
+    stone, wood, and water tiles.
+
+    Args:
+        action_against_mask_penelty=-1 (int): Reward penelty for performing action against mask
+        full_observability (bool): Whether the mobile agents' spatial observation
+            includes the full world view or is instead an egocentric view.
+        mobile_agent_observation_range (int): If not using full_observability,
+            the spatial range (on each side of the agent) that is visible in the
+            spatial observations.
+        env_layout_file (str): Name of the layout file in ./map_txt/ to use.
+            Note: The world dimensions of that layout must match the world dimensions
+            argument used to construct the environment.
+        resource_regen_prob (float): Probability that an empty source tile will
+            regenerate a new resource unit.
+        fixed_four_skill_and_loc (bool): Whether to use a fixed set of build skills and
+            starting locations, with agents grouped into starting locations based on
+            which skill quartile they are in. False, by default.
+            True, for experiments in https://arxiv.org/abs/2004.13332.
+            Note: Requires that the environment uses the "Build" component with
+            skill_dist="pareto".
+        starting_agent_coin (int, float): Amount of coin agents have at t=0. Defaults
+            to zero coin.
+        isoelastic_eta (float): Parameter controlling the shape of agent utility
+            wrt coin endowment.
+        energy_cost (float): Coefficient for converting labor to negative utility.
+        energy_warmup_constant (float): Decay constant that controls the rate at which
+            the effective energy cost is annealed from 0 to energy_cost. Set to 0
+            (default) to disable annealing, meaning that the effective energy cost is
+            always energy_cost. The units of the decay constant depend on the choice of
+            energy_warmup_method.
+        energy_warmup_method (str): How to schedule energy annealing (warmup). If
+            "decay" (default), use the number of completed episodes. If "auto",
+            use the number of timesteps where the average agent reward was positive.
+        planner_reward_type (str): The type of reward used for the planner. Options
+            are "coin_eq_times_productivity" (default),
+            "inv_income_weighted_coin_endowment", and "inv_income_weighted_utility".
+        mixing_weight_gini_vs_coin (float): Degree to which equality is ignored w/
+            "coin_eq_times_productivity". Default is 0, which weights equality and
+            productivity equally. If set to 1, only productivity is rewarded.
+    """
+
+    name = "econ"
+    agent_subclasses = ["BasicMobileAgent"]
+    required_entities = ["Wood", "Stone", "Water","Gem_Raw","Gem"]
+
+    def __init__(
+        self,
+        *base_env_args,
+        resource_regen_prob=0.01,
+        fixed_four_skill_and_loc=False,
+        starting_agent_coin=0,
+        isoelastic_eta=0.23,
+        energy_cost=0.21,
+        energy_warmup_constant=0,
+        energy_warmup_method="decay",
+        planner_reward_type="coin_eq_times_productivity",
+        mixing_weight_gini_vs_coin=0.0,
+        **base_env_kwargs,
+    ):
+        super().__init__(*base_env_args, **base_env_kwargs)
+
+     
+        self.layout_specs = dict(
+            Wood={
+                "regen_weight": float(resource_regen_prob),
+                "regen_halfwidth": 0,
+                "max_health": 1,
+            },
+            Stone={
+                "regen_weight": float(resource_regen_prob),
+                "regen_halfwidth": 0,
+                "max_health": 1,
+            },
+        )
+        assert 0 <= self.layout_specs["Wood"]["regen_weight"] <= 1
+        assert 0 <= self.layout_specs["Stone"]["regen_weight"] <= 1
+
+        # How much coin do agents begin with at upon reset
+        self.starting_agent_coin = float(starting_agent_coin)
+        assert self.starting_agent_coin >= 0.0
+
+        # Controls the diminishing marginal utility of coin.
+        # isoelastic_eta=0 means no diminishing utility.
+        self.isoelastic_eta = float(isoelastic_eta)
+        assert 0.0 <= self.isoelastic_eta <= 1.0
+
+        # The amount that labor is weighted in utility computation
+        # (once annealing is finished)
+        self.energy_cost = float(energy_cost)
+        assert self.energy_cost >= 0
+
+        # Which method to use for calculating the progress of energy annealing
+        # If method = 'decay': #completed episodes
+        # If method = 'auto' : #timesteps where avg. agent reward > 0
+        self.energy_warmup_method = energy_warmup_method.lower()
+        assert self.energy_warmup_method in ["decay", "auto"]
+        # Decay constant for annealing to full energy cost
+        # (if energy_warmup_constant == 0, there is no annealing)
+        self.energy_warmup_constant = float(energy_warmup_constant)
+        assert self.energy_warmup_constant >= 0
+        self._auto_warmup_integrator = 0
+
+        # Which social welfare function to use
+        self.planner_reward_type = str(planner_reward_type).lower()
+
+        # How much to weight equality if using SWF=eq*prod:
+        # 0 -> SWF=eq * prod
+        # 1 -> SWF=prod
+        self.mixing_weight_gini_vs_coin = float(mixing_weight_gini_vs_coin)
+        assert 0 <= self.mixing_weight_gini_vs_coin <= 1.0
+
+        # Use this to calculate marginal changes and deliver that as reward
+        self.init_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
+        self.prev_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
+        self.curr_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
+
+        """
+        Fixed Four Skill and Loc
+        ------------------------
+        """
+        self.agent_starting_pos = {agent.idx: [] for agent in self.world.agents}
+
+        self._persist_between_resets=False
+  
+       
+        self.last_log_loged={}
+
+
+    @property
+    def energy_weight(self):
+        """
+        Energy annealing progress. Multiply with self.energy_cost to get the
+        effective energy coefficient.
+        """
+        if self.energy_warmup_constant <= 0.0:
+            return 1.0
+
+        if self.energy_warmup_method == "decay":
+            return float(1.0 - np.exp(-self._completions / self.energy_warmup_constant))
+
+        if self.energy_warmup_method == "auto":
+            return float(
+                1.0
+                - np.exp(-self._auto_warmup_integrator / self.energy_warmup_constant)
+            )
+
+        raise NotImplementedError
+
+    def is_bad_action(self,agent):
+        bad=agent.bad_action
+        agent.bad_action=False
+        return bad
+    
+    def get_current_optimization_metrics(self):
+        """
+        Compute optimization metrics based on the current state. Used to compute reward.
+
+        Returns:
+            curr_optimization_metric (dict): A dictionary of {agent.idx: metric}
+                with an entry for each agent (including the planner) in the env.
+        """
+        curr_optimization_metric = {}
+        # (for agents)
+        for agent in self.world.agents:
+
+            rew= rewards.isoelastic_coin_minus_labor(
+                coin_endowment=agent.total_endowment("Coin"),
+                total_labor=agent.state["endogenous"]["Labor"],
+                isoelastic_eta=self.isoelastic_eta,
+                labor_coefficient=self.energy_weight * self.energy_cost,
+            )
+            
+            
+
+            #rew-=agent.state["endogenous"]["noops"]
+            curr_optimization_metric[agent.idx] = rew
+        # (for the planner)
+        if self.planner_reward_type == "coin_eq_times_productivity":
+            curr_optimization_metric[
+                self.world.planner.idx
+            ] = rewards.coin_eq_times_productivity(
+                coin_endowments=np.array(
+                    [agent.total_endowment("Coin") for agent in self.world.agents]
+                ),
+                equality_weight=1 - self.mixing_weight_gini_vs_coin,
+            )
+        elif self.planner_reward_type == "inv_income_weighted_coin_endowments":
+            curr_optimization_metric[
+                self.world.planner.idx
+            ] = rewards.inv_income_weighted_coin_endowments(
+                coin_endowments=np.array(
+                    [agent.total_endowment("Coin") for agent in self.world.agents]
+                )
+            )
+        elif self.planner_reward_type == "inv_income_weighted_utility":
+            curr_optimization_metric[
+                self.world.planner.idx
+            ] = rewards.inv_income_weighted_utility(
+                coin_endowments=np.array(
+                    [agent.total_endowment("Coin") for agent in self.world.agents]
+                ),
+                utilities=np.array(
+                    [curr_optimization_metric[agent.idx] for agent in self.world.agents]
+                ),
+            )
+        else:
+            print("No valid planner reward selected!")
+            raise NotImplementedError
+        return curr_optimization_metric
+
+    # The following methods must be implemented for each scenario
+    # -----------------------------------------------------------
+
+    def reset_starting_layout(self):
+        """
+        Part 1/2 of scenario reset. This method handles resetting the state of the
+        environment managed by the scenario (i.e. resource & landmark layout).
+
+        Here, reset to the layout in the fixed layout file
+        """
+
+        if self._persist_between_resets: # if we only want to modify some values and not accualy reset
+            return
+
+        self.world.maps.clear()
+        
+        resources = ["Wood", "Stone","Gem_Raw"]
+
+        for resource in resources:
+            self.world.maps.set_point_add(resource,0,0,1)
+
+    def reset_agent_states(self):
+        """
+        Part 2/2 of scenario reset. This method handles resetting the state of the
+        agents themselves (i.e. inventory, locations, etc.).
+
+        Here, empty inventories and place mobile agents in random, accessible
+        locations to start. Note: If using fixed_four_skill_and_loc, the starting
+        locations will be overridden in self.additional_reset_steps.
+        """
+        if not self._persist_between_resets:
+            self.world.clear_agent_locs()
+        
+        for agent in self.world.agents:
+            if not self._persist_between_resets:
+                agent.set_setup(False) # resets agent states
+            if not agent.is_setup: # agent has not been setup for scenario
+                agent.state["inventory"] = {k: 0 for k in agent.inventory.keys()}
+                agent.state["escrow"] = {k: 0 for k in agent.inventory.keys()}
+                agent.state["endogenous"] = {k: 0 for k in agent.endogenous.keys()}
+                # Add starting coin
+                agent.state["inventory"]["Coin"] = float(self.starting_agent_coin)
+            agent.bad_action=False
+
+        self.world.planner.state["inventory"] = {
+            k: 0 for k in self.world.planner.inventory.keys()
+        }
+        self.world.planner.state["escrow"] = {
+            k: 0 for k in self.world.planner.escrow.keys()
+        }
+
+
+    def scenario_step(self):
+        """
+        Update the state of the world according to whatever rules this scenario
+        implements.
+
+        This gets called in the 'step' method (of base_env) after going through each
+        component step and before generating observations, rewards, etc.
+
+        In this class of scenarios, the scenario step handles stochastic resource
+        regeneration.
+        """
+
+        resources = ["Wood", "Stone", "Gem_Raw"]
+
+        for resource in resources:
+            self.world.maps.set_point_add(resource,0,0,20)
+           
+
+    def generate_observations(self):
+        """
+        Generate observations associated with this scenario.
+
+        A scenario does not need to produce observations and can provide observations
+        for only some agent types; however, for a given agent type, it should either
+        always or never yield an observation. If it does yield an observation,
+        that observation should always have the same structure/sizes!
+
+        Returns:
+            obs (dict): A dictionary of {agent.idx: agent_obs_dict}. In words,
+                return a dictionary with an entry for each agent (which can including
+                the planner) for which this scenario provides an observation. For each
+                entry, the key specifies the index of the agent and the value contains
+                its associated observation dictionary.
+
+        Here, non-planner agents receive spatial observations (depending on the env
+        config) as well as the contents of their inventory and endogenous quantities.
+        The planner also receives spatial observations (again, depending on the env
+        config) as well as the inventory of each of the mobile agents.
+        """
+        obs = {}
+
+
+     
+        agent_invs = {
+            str(agent.idx): {
+                "inventory-" + k: v * self.inv_scale for k, v in agent.inventory.items()
+            }
+            for agent in self.world.agents
+        }
+
+        obs[self.world.planner.idx] = {
+            "inventory-" + k: v * self.inv_scale
+            for k, v in self.world.planner.inventory.items()
+        }
+     
+     
+        for agent in self.world.agents:
+            sidx = str(agent.idx)
+            obs[sidx]=agent_invs[sidx]
+
+    
+
+
+        return obs
+
+    def compute_reward(self):
+        """
+        Apply the reward function(s) associated with this scenario to get the rewards
+        from this step.
+
+        Returns:
+            rew (dict): A dictionary of {agent.idx: agent_obs_dict}. In words,
+                return a dictionary with an entry for each agent in the environment
+                (including the planner). For each entry, the key specifies the index of
+                the agent and the value contains the scalar reward earned this timestep.
+
+        Rewards are computed as the marginal utility (agents) or marginal social
+        welfare (planner) experienced on this timestep. Ignoring discounting,
+        this means that agents' (planner's) objective is to maximize the utility
+        (social welfare) associated with the terminal state of the episode.
+        """
+
+        # "curr_optimization_metric" hasn't been updated yet, so it gives us the
+        # utility from the last step.
+        utility_at_end_of_last_time_step = deepcopy(self.curr_optimization_metric)
+
+        # compute current objectives and store the values
+        self.curr_optimization_metric = self.get_current_optimization_metrics()
+
+        # reward = curr - prev objectives
+        rew={}
+        for k, v in self.curr_optimization_metric.items():
+                rew[k] = float(v  - utility_at_end_of_last_time_step[k])
+                if k!="p":
+                    if self.is_bad_action(self.world.agents[k]):
+                        rew[k]-=1
+
+        # store the previous objective values
+        self.prev_optimization_metric.update(utility_at_end_of_last_time_step)
+
+        # Automatic Energy Cost Annealing
+        # -------------------------------
+        avg_agent_rew = np.mean([rew[a.idx] for a in self.world.agents])
+        # Count the number of timesteps where the avg agent reward was > 0
+        if avg_agent_rew > 0:
+            self._auto_warmup_integrator += 1
+
+        return rew
+
+    # Optional methods for customization
+    # ----------------------------------
+
+    def additional_reset_steps(self):
+        """
+        Extra scenario-specific steps that should be performed at the end of the reset
+        cycle.
+
+        For each reset cycle...
+            First, reset_starting_layout() and reset_agent_states() will be called.
+
+            Second, <component>.reset() will be called for each registered component.
+
+            Lastly, this method will be called to allow for any final customization of
+            the reset cycle.
+
+        For this scenario, this method resets optimization metric trackers. If using
+        fixed_four_skill_and_loc, this is where each agent gets assigned to one of
+        the four fixed skill/loc combinations. The agent-->skill/loc assignment is
+        permuted so that all four skill/loc combinations are used.
+        """
+ 
+
+        # compute current objectives
+        curr_optimization_metric = self.get_current_optimization_metrics()
+
+        self.curr_optimization_metric = deepcopy(curr_optimization_metric)
+        self.init_optimization_metric = deepcopy(curr_optimization_metric)
+        self.prev_optimization_metric = deepcopy(curr_optimization_metric)
+
+       
+
+    def scenario_metrics(self):
+        """
+        Allows the scenario to generate metrics (collected along with component metrics
+        in the 'metrics' property).
+
+        To have the scenario add metrics, this function needs to return a dictionary of
+        {metric_key: value} where 'value' is a scalar (no nesting or lists!)
+
+        Here, summarize social metrics, endowments, utilities, and labor cost annealing.
+        """
+        metrics = dict()
+
+        coin_endowments = np.array(
+            [agent.total_endowment("Coin") for agent in self.world.agents]
+        )
+        metrics["social/productivity"] = social_metrics.get_productivity(
+            coin_endowments
+        )
+        metrics["social/equality"] = social_metrics.get_equality(coin_endowments)
+
+        utilities = np.array(
+            [self.curr_optimization_metric[agent.idx] for agent in self.world.agents]
+        )
+        metrics[
+            "social_welfare/coin_eq_times_productivity"
+        ] = rewards.coin_eq_times_productivity(
+            coin_endowments=coin_endowments, equality_weight=1.0
+        )
+        metrics[
+            "social_welfare/inv_income_weighted_coin_endow"
+        ] = rewards.inv_income_weighted_coin_endowments(coin_endowments=coin_endowments)
+        metrics[
+            "social_welfare/inv_income_weighted_utility"
+        ] = rewards.inv_income_weighted_utility(
+            coin_endowments=coin_endowments, utilities=utilities
+        )
+
+        for agent in self.all_agents:
+            for resource, quantity in agent.inventory.items():
+                metrics[
+                    "endow/{}/{}".format(agent.idx, resource)
+                ] = agent.total_endowment(resource)
+
+            if agent.endogenous is not None:
+                for resource, quantity in agent.endogenous.items():
+                    metrics["endogenous/{}/{}".format(agent.idx, resource)] = quantity
+
+            metrics["util/{}".format(agent.idx)] = self.curr_optimization_metric[
+                agent.idx
+            ]
+
+        # Labor weight
+        metrics["labor/weighted_cost"] = self.energy_cost * self.energy_weight
+        metrics["labor/warmup_integrator"] = int(self._auto_warmup_integrator)
+
+        return metrics
+
--- a/univer.pys
+++ b/univer.pys
@@ -0,0 +1,341 @@
+
+import numpy as np
+
+from ai_economist import foundation
+from stable_baselines3.common.vec_env import vec_frame_stack
+from stable_baselines3.common.evaluation import evaluate_policy
+from sb3_contrib.ppo_mask import MaskablePPO
+import envs
+import wrapper
+import resources
+from agents import trading_agent
+from wrapper.base_econ_wrapper import BaseEconWrapper
+from wrapper.reciever_econ_wrapper import RecieverEconWrapper
+from wrapper.sb3_econ_converter import SB3EconConverter
+from tqdm import tqdm
+import components
+from stable_baselines3.common.env_checker import check_env
+from stable_baselines3 import PPO
+from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
+from stable_baselines3.common.vec_env.vec_normalize import VecNormalize
+from sb3_contrib import RecurrentPPO
+from envs.econ_wrapper import EconVecEnv
+from stable_baselines3.common.callbacks import BaseCallback
+import yaml
+import time
+from threading import Thread 
+
+env_config = {
+    # ===== SCENARIO CLASS =====
+    # Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
+    # The environment object will be an instance of the Scenario class.
+    'scenario_name': 'econ',
+    
+    # ===== COMPONENTS =====
+    # Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
+    #   "component_name" refers to the Component class's name in the Component Registry (foundation.components)
+    #   {component_kwargs} is a dictionary of kwargs passed to the Component class
+    # The order in which components reset, step, and generate obs follows their listed order below.
+    'components': [
+        # (1) Building houses
+        ('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}),
+        # (2) Trading collectible resources
+        ('ContinuousDoubleAuction', {'max_num_orders': 10}),
+        # (3) Movement and resource collection
+        ('SimpleGather', {}),
+        ('ExternalMarket',{'market_demand':{
+            'Gem': 15
+        }}),
+    ],
+    
+    # ===== SCENARIO CLASS ARGUMENTS =====
+    # (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
+    
+    'starting_agent_coin': 10,
+    'fixed_four_skill_and_loc': True,
+    
+    # ===== STANDARD ARGUMENTS ======
+    # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
+    'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5},        # Number of non-planner agents (must be > 1)
+    'world_size': [5, 5], # [Height, Width] of the env world
+    'episode_length': 256, # Number of timesteps per episode
+    'allow_observation_scaling': True,
+    'dense_log_frequency': 100, 
+    'world_dense_log_frequency':1,
+    'energy_cost':0,
+    'energy_warmup_method': "auto",
+    'energy_warmup_constant': 4000,
+    
+    # In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
+    # Otherwise, the policy selects only 1 action.
+    'multi_action_mode_agents': False,
+    'multi_action_mode_planner': False,
+    
+    # When flattening observations, concatenate scalar & vector observations before output.
+    # Otherwise, return observations with minimal processing.
+    'flatten_observations': False,
+    # When Flattening masks, concatenate each action subspace mask into a single array.
+    # Note: flatten_masks = True is required for masking action logits in the code below.
+    'flatten_masks': True,
+}
+
+
+eval_env_config = {
+    # ===== SCENARIO CLASS =====
+    # Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
+    # The environment object will be an instance of the Scenario class.
+    'scenario_name': 'econ',
+    
+    # ===== COMPONENTS =====
+    # Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
+    #   "component_name" refers to the Component class's name in the Component Registry (foundation.components)
+    #   {component_kwargs} is a dictionary of kwargs passed to the Component class
+    # The order in which components reset, step, and generate obs follows their listed order below.
+    'components': [
+        # (1) Building houses
+        ('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}),
+        # (2) Trading collectible resources
+        ('ContinuousDoubleAuction', {'max_num_orders': 10}),
+        # (3) Movement and resource collection
+        ('SimpleGather', {}),
+        ('ExternalMarket',{'market_demand':{
+            'Gem': 15
+        }}),
+    ],
+    
+    # ===== SCENARIO CLASS ARGUMENTS =====
+    # (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
+    
+    'starting_agent_coin': 10,
+    'fixed_four_skill_and_loc': True,
+    
+    # ===== STANDARD ARGUMENTS ======
+    # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
+    'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5},            # Number of non-planner agents (must be > 1)
+    'world_size': [1, 1], # [Height, Width] of the env world
+    'episode_length': 256, # Number of timesteps per episode
+    'allow_observation_scaling': True,
+    'dense_log_frequency': 1, 
+    'world_dense_log_frequency':1, 
+    'energy_cost':0,
+    'energy_warmup_method': "auto",
+    'energy_warmup_constant': 4000,
+    
+    # In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
+    # Otherwise, the policy selects only 1 action.
+    'multi_action_mode_agents': False,
+    'multi_action_mode_planner': False,
+    
+    # When flattening observations, concatenate scalar & vector observations before output.
+    # Otherwise, return observations with minimal processing.
+    'flatten_observations': False,
+    # When Flattening masks, concatenate each action subspace mask into a single array.
+    # Note: flatten_masks = True is required for masking action logits in the code below.
+    'flatten_masks': True,
+}
+
+num_frames=5
+
+class TensorboardCallback(BaseCallback):
+    """
+    Custom callback for plotting additional values in tensorboard.
+    """
+
+    def __init__(self,econ, verbose=0):
+        super().__init__(verbose)
+        self.econ=econ
+        self.metrics=econ.scenario_metrics()
+    def _on_step(self) -> bool:
+        # Log scalar value (here a random variable)
+        prev_metrics=self.metrics
+        if self.econ.previous_episode_metrics is None:
+            self.metrics=self.econ.scenario_metrics()
+        else:
+            self.metrics=self.econ.previous_episode_metrics
+        curr_prod=self.metrics["social/productivity"]
+        trend_pord=curr_prod-prev_metrics["social/productivity"]
+        self.logger.record("social/total_productivity", curr_prod)
+        self.logger.record("social/delta_productivity", trend_pord)
+      
+        return True
+
+
+def printMarket(market):
+    for i in range(len(market)):
+        step=market[i]
+        if len(step)>0:
+            print("=== Step {} ===".format(i))
+            for transaction in step:
+                t=transaction
+                transstring = "({}) {} -> {} | [{}/{}] {} Coins\n".format(t["commodity"],t["seller"],t["buyer"],t["ask"],t["bid"],t["price"])
+                print(transstring)
+    return ""
+
+def printBuilds(builds):
+    for i in range(len(builds)):
+        step=builds[i]
+        if len(step)>0:
+            for build in step:
+                t=build
+                transstring = "({}) Builder: {}, Skill: {}, Income {} ".format(i,t["builder"],t["build_skill"],t["income"])
+                print(transstring)
+    return ""
+def printReplay(econ,agentid):
+    worldmaps=["Stone","Wood"]
+
+    log=econ.previous_episode_dense_log
+    agent=econ.world.agents[agentid]
+  
+    agentid=str(agentid)
+    maxsetp=len(log["states"])-1
+
+    for step in range(maxsetp):
+        print()
+        print("=== Step {} ===".format(step))
+        # state
+        print("--- World ---")
+        world=log['world'][step]
+        for res in worldmaps:
+            print("{}: {}".format(res,world[res][0][0]))
+        print("--- State ---")
+        state=log['states'][step][agentid]
+       
+        print(yaml.safe_dump(state))
+        print("--- Action ---")
+        action=log["actions"][step][agentid]
+        
+
+        if action=={}:
+            print("Action: 0 -> NOOP")
+        else:
+            for k in action:
+                formats="Action:  {}({})".format(k,action[k])
+                print(formats)
+        print("--- Reward ---")
+        reward=log["rewards"][step][agentid]
+        print("Reward: {}".format(reward))
+
+#Setup Env Objects
+econ=foundation.make_env_instance(**env_config)
+
+market=econ.get_component("ContinuousDoubleAuction")
+action=market.get_n_actions("TradingAgent")
+baseEconWrapper=BaseEconWrapper(econ)
+baseEconWrapper.run()
+time.sleep(0.5)
+mobileRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="BasicMobileAgent")
+tradeRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="TradingAgent")
+sb3_traderConverter=SB3EconConverter(tradeRecieverEconWrapper,econ,"TradingAgent",True)
+sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent",True)
+# attach sb3 wrappers
+
+monenv=VecMonitor(venv=sb3Converter,info_keywords=["social/productivity","trend/productivity"])
+montraidingenv=VecMonitor(venv=sb3_traderConverter)
+
+stackenv_basic=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=num_frames)
+stackenv_traid=vec_frame_stack.VecFrameStack(venv=montraidingenv,n_stack=num_frames)
+# Model setup complete
+
+# Setup Eval Env
+econ_eval=foundation.make_env_instance(**eval_env_config)
+
+
+baseEconWrapper_eval=BaseEconWrapper(econ_eval)
+baseEconWrapper_eval.run()
+time.sleep(0.5)
+mobileRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="BasicMobileAgent")
+tradeRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="TradingAgent")
+sb3_traderConverter_eval=SB3EconConverter(tradeRecieverEconWrapper_eval,econ_eval,"TradingAgent",False)
+sb3Converter_eval=SB3EconConverter(mobileRecieverEconWrapper_eval,econ_eval,"BasicMobileAgent",False)
+# attach sb3 wrappers
+
+monenv_eval=VecMonitor(venv=sb3Converter_eval,info_keywords=["social/productivity","trend/productivity"])
+montraidingenv_eval=VecMonitor(venv=sb3_traderConverter_eval)
+
+stackenv_basic_eval=vec_frame_stack.VecFrameStack(venv=monenv_eval,n_stack=num_frames)
+stackenv_traid_eval=vec_frame_stack.VecFrameStack(venv=montraidingenv_eval,n_stack=num_frames)
+
+
+obs=monenv.reset()
+
+
+# define training functions
+def train(model,timesteps, econ_call,process_bar,name,db,index):
+    db[index]=model.learn(total_timesteps=timesteps,progress_bar=process_bar,reset_num_timesteps=False,tb_log_name=name,callback=TensorboardCallback(econ_call))
+
+
+
+# prepare training
+run_number=int(np.random.rand()*100)
+runname="run_{}".format(run_number)
+model_db=[None,None] # object for storing model
+
+
+model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=1e-5,env=stackenv_basic, seed=300,verbose=1,device="cuda",tensorboard_log="./log")
+model_trade=MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=1e-5,env=stackenv_traid, seed=300,verbose=1,device="cuda",tensorboard_log="./log")
+
+n_agents=econ.n_agents
+
+total_required_for_episode_basic=len(mobileRecieverEconWrapper.agnet_idx)*env_config['episode_length']
+total_required_for_episode_traid=len(tradeRecieverEconWrapper.agnet_idx)*env_config['episode_length']
+
+print("this is run {}".format(runname))
+
+while True:
+ 
+
+    #Train
+    runname="run_{}_{}".format(run_number,"basic")
+
+    thread_model=Thread(target=train,args=(model,total_required_for_episode_basic*100,econ,True,runname,model_db,0))
+    runname="run_{}_{}".format(run_number,"trader")
+    thread_model_traid=Thread(target=train,args=(model_trade,total_required_for_episode_traid*100,econ,False,runname,model_db,1))
+    
+    thread_model.start()
+    thread_model_traid.start()
+    thread_model.join()
+    thread_model_traid.join()
+    #normenv.save("temp-normalizer.ai")
+
+    
+   
+    ## Run Eval
+    print("### EVAL ###")
+    obs_basic=stackenv_basic_eval.reset()
+    obs_trade=stackenv_traid_eval.reset()
+    model=model_db[0]
+    model_trade=model_db[1]
+    done=False
+    for i in tqdm(range(eval_env_config['episode_length'])):
+        #create masks
+        masks_basic=stackenv_basic_eval.action_masks()
+        masks_trade=stackenv_traid_eval.action_masks()
+        # get actions
+        action_basic=model.predict(obs_basic,action_masks=masks_basic)
+        action_trade=model_trade.predict(obs_trade,action_masks=masks_trade)
+        #submit async directly for non blocking operation
+        sb3Converter_eval.step_async(action_basic[0])
+        sb3_traderConverter_eval.step_async(action_trade[0])
+        # retieve full results
+        obs_basic,rew_basic,done_e,info=stackenv_basic_eval.step(action_basic[0])
+        obs_trade,rew_trade,done_e,info=stackenv_traid_eval.step(action_trade[0])
+        done=done_e[0]
+
+
+
+    market=econ_eval.get_component("ContinuousDoubleAuction")
+    craft=econ_eval.get_component("Craft")
+   # trades=market.get_dense_log()
+    build=craft.get_dense_log()
+    met=econ.previous_episode_metrics
+    printReplay(econ_eval,0)
+   # printMarket(trades)
+  #  printBuilds(builds=build)
+    print("social/productivity: {}".format(met["social/productivity"]))
+    print("labor/weighted_cost: {}".format(met["labor/weighted_cost"]))
+    print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"]))
+
+    time.sleep(1)
+
+
+
--- a/main.py
+++ b/main.py
@@ -1,9 +1,15 @@
-from ai_economist import foundation
+
 import numpy as np
+
+from ai_economist import foundation
 from stable_baselines3.common.vec_env import vec_frame_stack
 from stable_baselines3.common.evaluation import evaluate_policy
+from sb3_contrib.ppo_mask import MaskablePPO
 import envs
 import wrapper
+import resources
+import pprint
+from agents import trading_agent
 from wrapper.base_econ_wrapper import BaseEconWrapper
 from wrapper.reciever_econ_wrapper import RecieverEconWrapper
 from wrapper.sb3_econ_converter import SB3EconConverter
@@ -18,12 +24,13 @@ from envs.econ_wrapper import EconVecEnv
 from stable_baselines3.common.callbacks import BaseCallback
 import yaml
 import time
+from threading import Thread 

 env_config = {
    # ===== SCENARIO CLASS =====
    # Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
    # The environment object will be an instance of the Scenario class.
-    'scenario_name': 'simple_market',
+    'scenario_name': 'econ',
    
    # ===== COMPONENTS =====
    # Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
@@ -32,30 +39,34 @@ env_config = {
    # The order in which components reset, step, and generate obs follows their listed order below.
    'components': [
        # (1) Building houses
-        ('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
+        ('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}),
        # (2) Trading collectible resources
-        #('ContinuousDoubleAuction', {'max_num_orders': 10}),
+        ('ContinuousDoubleAuction', {'max_num_orders': 10}),
        # (3) Movement and resource collection
        ('SimpleGather', {}),
+        ('ExternalMarket',{'market_demand':{
+            'Gem': 15
+        }}),
    ],
    
    # ===== SCENARIO CLASS ARGUMENTS =====
    # (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
    
-    'starting_agent_coin': 0,
+    'starting_agent_coin': 50,
    'fixed_four_skill_and_loc': True,
    
    # ===== STANDARD ARGUMENTS ======
    # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
-    'agent_composition': {"BasicMobileAgent": 20},        # Number of non-planner agents (must be > 1)
-    'world_size': [1, 1], # [Height, Width] of the env world
+    'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5},        # Number of non-planner agents (must be > 1)
+    'world_size': [5, 5], # [Height, Width] of the env world
    'episode_length': 256, # Number of timesteps per episode
+    'isoelastic_eta':0.001,
    'allow_observation_scaling': True,
    'dense_log_frequency': 100, 
    'world_dense_log_frequency':1,
-   'energy_cost':0,
+    'energy_cost':0,
    'energy_warmup_method': "auto",
-    'energy_warmup_constant': 0,
+    'energy_warmup_constant': 4000,
    
    # In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
    # Otherwise, the policy selects only 1 action.
@@ -67,7 +78,7 @@ env_config = {
    'flatten_observations': False,
    # When Flattening masks, concatenate each action subspace mask into a single array.
    # Note: flatten_masks = True is required for masking action logits in the code below.
-    'flatten_masks': False,
+    'flatten_masks': True,
 }


@@ -75,7 +86,7 @@ eval_env_config = {
    # ===== SCENARIO CLASS =====
    # Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
    # The environment object will be an instance of the Scenario class.
-    'scenario_name': 'simple_market',
+    'scenario_name': 'econ',
    
    # ===== COMPONENTS =====
    # Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
@@ -84,30 +95,34 @@ eval_env_config = {
    # The order in which components reset, step, and generate obs follows their listed order below.
    'components': [
        # (1) Building houses
-        ('SimpleCraft', {'skill_dist': "none", 'payment_max_skill_multiplier': 3}),
+        ('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}),
        # (2) Trading collectible resources
-        #('ContinuousDoubleAuction', {'max_num_orders': 10}),
+        ('ContinuousDoubleAuction', {'max_num_orders': 10}),
        # (3) Movement and resource collection
        ('SimpleGather', {}),
+        ('ExternalMarket',{'market_demand':{
+            'Gem': 15
+        }}),
    ],
    
    # ===== SCENARIO CLASS ARGUMENTS =====
    # (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
    
-    'starting_agent_coin': 0,
+    'starting_agent_coin': 50,
    'fixed_four_skill_and_loc': True,
    
    # ===== STANDARD ARGUMENTS ======
    # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
-    'agent_composition': {"BasicMobileAgent": 20},          # Number of non-planner agents (must be > 1)
+    'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5},            # Number of non-planner agents (must be > 1)
    'world_size': [1, 1], # [Height, Width] of the env world
-    'episode_length': 100, # Number of timesteps per episode
+    'episode_length': 256, # Number of timesteps per episode
    'allow_observation_scaling': True,
-    'dense_log_frequency': 10, 
+    'isoelastic_eta':0.001,
+    'dense_log_frequency': 1, 
    'world_dense_log_frequency':1, 
    'energy_cost':0,
    'energy_warmup_method': "auto",
-    'energy_warmup_constant': 0,
+    'energy_warmup_constant': 4000,
    
    # In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
    # Otherwise, the policy selects only 1 action.
@@ -119,10 +134,10 @@ eval_env_config = {
    'flatten_observations': False,
    # When Flattening masks, concatenate each action subspace mask into a single array.
    # Note: flatten_masks = True is required for masking action logits in the code below.
-    'flatten_masks': False,
+    'flatten_masks': True,
 }

-num_frames=2
+num_frames=1

 class TensorboardCallback(BaseCallback):
    """
@@ -135,18 +150,36 @@ class TensorboardCallback(BaseCallback):
        self.metrics=econ.scenario_metrics()
    def _on_step(self) -> bool:
        # Log scalar value (here a random variable)
-        prev_metrics=self.metrics
-        if self.econ.previous_episode_metrics is None:
-            self.metrics=self.econ.scenario_metrics()
-        else:
-            self.metrics=self.econ.previous_episode_metrics
-        curr_prod=self.metrics["social/productivity"]
-        trend_pord=curr_prod-prev_metrics["social/productivity"]
-        self.logger.record("social/total_productivity", curr_prod)
-        self.logger.record("social/delta_productivity", trend_pord)
-      
+        if econ.world.timestep==0:
+            prev_metrics=self.metrics
+            if self.econ.previous_episode_metrics is None:
+                self.metrics=self.econ.scenario_metrics()
+            else:
+                self.metrics=self.econ.previous_episode_metrics
+            curr_prod=self.metrics["social/productivity"]
+            trend_pord=curr_prod-prev_metrics["social/productivity"]
+            self.logger.record("social/total_productivity", curr_prod)
+            self.logger.record("social/delta_productivity", trend_pord)
+        
        return True

+min_at_target_basic=0.5
+min_lr_basic=5e-6
+start_lr_basic=9e-4
+
+min_at_target_trade=0.5
+min_lr_trade=5e-6
+start_lr_trade=9e-4
+
+def learning_rate_adj_basic(x) -> float:
+    diff=start_lr_basic-min_lr_basic
+    lr=min_lr_basic+x*diff
+    return lr
+
+def learning_rate_adj_trade(x) -> float:
+    diff=start_lr_trade-min_lr_trade
+    lr=min_lr_basic+x*diff
+    return lr

 def printMarket(market):
    for i in range(len(market)):
@@ -188,7 +221,7 @@ def printReplay(econ,agentid):
        print("--- State ---")
        state=log['states'][step][agentid]
       
-        print(yaml.dump(state))
+        pprint.pprint(state)
        print("--- Action ---")
        action=log["actions"][step][agentid]
        
@@ -205,63 +238,120 @@ def printReplay(econ,agentid):

 #Setup Env Objects
 econ=foundation.make_env_instance(**env_config)
+
+market=econ.get_component("ContinuousDoubleAuction")
+action=market.get_n_actions("TradingAgent")
 baseEconWrapper=BaseEconWrapper(econ)
 baseEconWrapper.run()
+time.sleep(0.5)
 mobileRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="BasicMobileAgent")
-sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent")
-#obs=sb3Converter.reset()
-#vecenv=EconVecEnv(env_config=env_config)
+tradeRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="TradingAgent")
+sb3_traderConverter=SB3EconConverter(tradeRecieverEconWrapper,econ,"TradingAgent",True)
+sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent",True)
+# attach sb3 wrappers

 monenv=VecMonitor(venv=sb3Converter,info_keywords=["social/productivity","trend/productivity"])
+montraidingenv=VecMonitor(venv=sb3_traderConverter)
+
+stackenv_basic=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=num_frames)
+stackenv_traid=vec_frame_stack.VecFrameStack(venv=montraidingenv,n_stack=num_frames)
+# Model setup complete
+
+# Setup Eval Env
+econ_eval=foundation.make_env_instance(**eval_env_config)
+
+
+baseEconWrapper_eval=BaseEconWrapper(econ_eval)
+baseEconWrapper_eval.run()
+time.sleep(0.5)
+mobileRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="BasicMobileAgent")
+tradeRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="TradingAgent")
+sb3_traderConverter_eval=SB3EconConverter(tradeRecieverEconWrapper_eval,econ_eval,"TradingAgent",False)
+sb3Converter_eval=SB3EconConverter(mobileRecieverEconWrapper_eval,econ_eval,"BasicMobileAgent",False)
+# attach sb3 wrappers
+
+monenv_eval=VecMonitor(venv=sb3Converter_eval,info_keywords=["social/productivity","trend/productivity"])
+montraidingenv_eval=VecMonitor(venv=sb3_traderConverter_eval)
+
+stackenv_basic_eval=vec_frame_stack.VecFrameStack(venv=monenv_eval,n_stack=num_frames)
+stackenv_traid_eval=vec_frame_stack.VecFrameStack(venv=montraidingenv_eval,n_stack=num_frames)
+

-#normenv=VecNormalize(sb3Converter,norm_reward=False,clip_obs=1)
-#stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10)
 obs=monenv.reset()


+# define training functions
+def train(model,timesteps, econ_call,process_bar,name,db,index):
+    db[index]=model.learn(total_timesteps=timesteps,progress_bar=process_bar,reset_num_timesteps=False,tb_log_name=name,callback=TensorboardCallback(econ_call))



-runname="run_{}".format(int(np.random.rand()*100))
+# prepare training
+run_number=int(np.random.rand()*100)
+runname="run_{}".format(run_number)
+model_db=[None,None] # object for storing model
+
+
+model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=learning_rate_adj_basic,env=stackenv_basic, seed=445,verbose=1,device="cuda",tensorboard_log="./log")
+model_trade=MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=learning_rate_adj_trade,env=stackenv_traid, seed=445,verbose=1,device="cuda",tensorboard_log="./log")

-model = PPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
 n_agents=econ.n_agents
-total_required_for_episode=n_agents*env_config['episode_length']
-print("this is run {}".format(runname))
-while True:
-    # Create Eval ENV
-  
-    vec_env_eval=EconVecEnv(env_config=eval_env_config)
-    vec_mon_eval=VecMonitor(venv=vec_env_eval)
-    norm_env_eval=VecNormalize(vec_mon_eval,norm_reward=False,training=False)
-    eval_econ = vec_env_eval.env
-  
-    #Train
-    model=model.learn(total_timesteps=total_required_for_episode*50,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
-    #normenv.save("temp-normalizer.ai")

+total_required_for_episode_basic=len(mobileRecieverEconWrapper.agnet_idx)*env_config['episode_length']
+total_required_for_episode_traid=len(tradeRecieverEconWrapper.agnet_idx)*env_config['episode_length']
+
+print("this is run {}".format(runname))
+
+while True:
+ 
+
+    #Train
+    runname="run_{}_{}".format(run_number,"basic")
+
+    thread_model=Thread(target=train,args=(model,total_required_for_episode_basic*150,econ,True,runname,model_db,0))
+    runname="run_{}_{}".format(run_number,"trader")
+    thread_model_traid=Thread(target=train,args=(model_trade,total_required_for_episode_traid*150,econ,False,runname,model_db,1))
    
+    thread_model.start()
+    thread_model_traid.start()
+    thread_model.join()
+    thread_model_traid.join()
+    #normenv.save("temp-normalizer.ai")
+    model=model_db[0]
+    model_trade=model_db[1]
+    model.save("basic.ai")
+    model_trade.save("trade.ai")
   
    ## Run Eval
    print("### EVAL ###")
-    norm_env_eval.load("temp-normalizer.ai",vec_mon_eval)
-    obs=vec_mon_eval.reset()
+    obs_basic=stackenv_basic_eval.reset()
+    obs_trade=stackenv_traid_eval.reset()
    done=False
    for i in tqdm(range(eval_env_config['episode_length'])):
-        action=model.predict(obs)
-        obs,rew,done_e,info=vec_mon_eval.step(action[0])
+        #create masks
+        masks_basic=stackenv_basic_eval.action_masks()
+        masks_trade=stackenv_traid_eval.action_masks()
+        # get actions
+        action_basic=model.predict(obs_basic,action_masks=masks_basic)
+        action_trade=model_trade.predict(obs_trade,action_masks=masks_trade)
+        #submit async directly for non blocking operation
+        sb3Converter_eval.step_async(action_basic[0])
+        sb3_traderConverter_eval.step_async(action_trade[0])
+        # retieve full results
+        obs_basic,rew_basic,done_e,info=stackenv_basic_eval.step(action_basic[0])
+        obs_trade,rew_trade,done_e,info=stackenv_traid_eval.step(action_trade[0])
        done=done_e[0]



-    #market=eval_econ.get_component("ContinuousDoubleAuction")
-    craft=eval_econ.get_component("SimpleCraft")
+    market=econ_eval.get_component("ContinuousDoubleAuction")
+    craft=econ_eval.get_component("Craft")
   # trades=market.get_dense_log()
    build=craft.get_dense_log()
    met=econ.previous_episode_metrics
-    printReplay(eval_econ,0)
+    printReplay(econ_eval,0)
   # printMarket(trades)
-    printBuilds(builds=build)
+  #  printBuilds(builds=build)
    print("social/productivity: {}".format(met["social/productivity"]))
    print("labor/weighted_cost: {}".format(met["labor/weighted_cost"]))
    print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"]))
--- a/resources/__init_.py
+++ b/resources/__init_.py
@@ -0,0 +1,3 @@
+from  . import (
+    resources    
+)
--- a/resources/resources.py
+++ b/resources/resources.py
@@ -0,0 +1,4 @@
+
+import numpy as np
+from ai_economist.foundation.entities.resources import Resource, resource_registry
+
--- a/test.py
+++ b/test.py
@@ -0,0 +1,343 @@
+
+import numpy as np
+
+from ai_economist import foundation
+from stable_baselines3.common.vec_env import vec_frame_stack
+from stable_baselines3.common.evaluation import evaluate_policy
+from sb3_contrib.ppo_mask import MaskablePPO
+import envs
+import wrapper
+import resources
+import pprint
+from agents import trading_agent
+from wrapper.base_econ_wrapper import BaseEconWrapper
+from wrapper.reciever_econ_wrapper import RecieverEconWrapper
+from wrapper.sb3_econ_converter import SB3EconConverter
+from tqdm import tqdm
+import components
+from stable_baselines3.common.env_checker import check_env
+from stable_baselines3 import PPO
+from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
+from stable_baselines3.common.vec_env.vec_normalize import VecNormalize
+from sb3_contrib import RecurrentPPO
+from envs.econ_wrapper import EconVecEnv
+from stable_baselines3.common.callbacks import BaseCallback
+import yaml
+import time
+from threading import Thread 
+
+env_config = {
+    # ===== SCENARIO CLASS =====
+    # Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
+    # The environment object will be an instance of the Scenario class.
+    'scenario_name': 'econ',
+    
+    # ===== COMPONENTS =====
+    # Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
+    #   "component_name" refers to the Component class's name in the Component Registry (foundation.components)
+    #   {component_kwargs} is a dictionary of kwargs passed to the Component class
+    # The order in which components reset, step, and generate obs follows their listed order below.
+    'components': [
+        # (1) Building houses
+        ('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}),
+        # (2) Trading collectible resources
+        ('ContinuousDoubleAuction', {'max_num_orders': 10}),
+        # (3) Movement and resource collection
+        ('SimpleGather', {}),
+        ('ExternalMarket',{'market_demand':{
+            'Gem': 15
+        }}),
+    ],
+    
+    # ===== SCENARIO CLASS ARGUMENTS =====
+    # (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
+    
+    'starting_agent_coin': 10,
+    'fixed_four_skill_and_loc': True,
+    
+    # ===== STANDARD ARGUMENTS ======
+    # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
+    'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5},        # Number of non-planner agents (must be > 1)
+    'world_size': [5, 5], # [Height, Width] of the env world
+    'episode_length': 256, # Number of timesteps per episode
+    'allow_observation_scaling': True,
+    'dense_log_frequency': 100, 
+    'world_dense_log_frequency':1,
+    'energy_cost':0,
+    'energy_warmup_method': "auto",
+    'energy_warmup_constant': 4000,
+    
+    # In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
+    # Otherwise, the policy selects only 1 action.
+    'multi_action_mode_agents': False,
+    'multi_action_mode_planner': False,
+    
+    # When flattening observations, concatenate scalar & vector observations before output.
+    # Otherwise, return observations with minimal processing.
+    'flatten_observations': False,
+    # When Flattening masks, concatenate each action subspace mask into a single array.
+    # Note: flatten_masks = True is required for masking action logits in the code below.
+    'flatten_masks': True,
+}
+
+
+eval_env_config = {
+    # ===== SCENARIO CLASS =====
+    # Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
+    # The environment object will be an instance of the Scenario class.
+    'scenario_name': 'econ',
+    
+    # ===== COMPONENTS =====
+    # Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
+    #   "component_name" refers to the Component class's name in the Component Registry (foundation.components)
+    #   {component_kwargs} is a dictionary of kwargs passed to the Component class
+    # The order in which components reset, step, and generate obs follows their listed order below.
+    'components': [
+        # (1) Building houses
+        ('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}),
+        # (2) Trading collectible resources
+        ('ContinuousDoubleAuction', {'max_num_orders': 10}),
+        # (3) Movement and resource collection
+        ('SimpleGather', {}),
+        ('ExternalMarket',{'market_demand':{
+            'Gem': 15
+        }}),
+    ],
+    
+    # ===== SCENARIO CLASS ARGUMENTS =====
+    # (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
+    
+    'starting_agent_coin': 10,
+    'fixed_four_skill_and_loc': True,
+    
+    # ===== STANDARD ARGUMENTS ======
+    # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
+    'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5},            # Number of non-planner agents (must be > 1)
+    'world_size': [1, 1], # [Height, Width] of the env world
+    'episode_length': 256, # Number of timesteps per episode
+    'allow_observation_scaling': True,
+    'dense_log_frequency': 1, 
+    'world_dense_log_frequency':1, 
+    'energy_cost':0,
+    'energy_warmup_method': "auto",
+    'energy_warmup_constant': 4000,
+    
+    # In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
+    # Otherwise, the policy selects only 1 action.
+    'multi_action_mode_agents': False,
+    'multi_action_mode_planner': False,
+    
+    # When flattening observations, concatenate scalar & vector observations before output.
+    # Otherwise, return observations with minimal processing.
+    'flatten_observations': False,
+    # When Flattening masks, concatenate each action subspace mask into a single array.
+    # Note: flatten_masks = True is required for masking action logits in the code below.
+    'flatten_masks': True,
+}
+
+num_frames=5
+
+class TensorboardCallback(BaseCallback):
+    """
+    Custom callback for plotting additional values in tensorboard.
+    """
+
+    def __init__(self,econ, verbose=0):
+        super().__init__(verbose)
+        self.econ=econ
+        self.metrics=econ.scenario_metrics()
+    def _on_step(self) -> bool:
+        # Log scalar value (here a random variable)
+        if econ.world.timestep==0:
+            prev_metrics=self.metrics
+            if self.econ.previous_episode_metrics is None:
+                self.metrics=self.econ.scenario_metrics()
+            else:
+                self.metrics=self.econ.previous_episode_metrics
+            curr_prod=self.metrics["social/productivity"]
+            trend_pord=curr_prod-prev_metrics["social/productivity"]
+            self.logger.record("social/total_productivity", curr_prod)
+            self.logger.record("social/delta_productivity", trend_pord)
+        
+        return True
+
+
+def printMarket(market):
+    for i in range(len(market)):
+        step=market[i]
+        if len(step)>0:
+            print("=== Step {} ===".format(i))
+            for transaction in step:
+                t=transaction
+                transstring = "({}) {} -> {} | [{}/{}] {} Coins\n".format(t["commodity"],t["seller"],t["buyer"],t["ask"],t["bid"],t["price"])
+                print(transstring)
+    return ""
+
+def printBuilds(builds):
+    for i in range(len(builds)):
+        step=builds[i]
+        if len(step)>0:
+            for build in step:
+                t=build
+                transstring = "({}) Builder: {}, Skill: {}, Income {} ".format(i,t["builder"],t["build_skill"],t["income"])
+                print(transstring)
+    return ""
+def printReplay(econ,agentid):
+    worldmaps=["Stone","Wood"]
+
+    log=econ.previous_episode_dense_log
+    agent=econ.world.agents[agentid]
+  
+    agentid=str(agentid)
+    maxsetp=len(log["states"])-1
+
+    for step in range(maxsetp):
+        print()
+        print("=== Step {} ===".format(step))
+        # state
+        print("--- World ---")
+        world=log['world'][step]
+        for res in worldmaps:
+            print("{}: {}".format(res,world[res][0][0]))
+        print("--- State ---")
+        state=log['states'][step][agentid]
+       
+        pprint.pprint(state)
+        print("--- Action ---")
+        action=log["actions"][step][agentid]
+        
+
+        if action=={}:
+            print("Action: 0 -> NOOP")
+        else:
+            for k in action:
+                formats="Action:  {}({})".format(k,action[k])
+                print(formats)
+        print("--- Reward ---")
+        reward=log["rewards"][step][agentid]
+        print("Reward: {}".format(reward))
+
+#Setup Env Objects
+econ=foundation.make_env_instance(**env_config)
+
+market=econ.get_component("ContinuousDoubleAuction")
+action=market.get_n_actions("TradingAgent")
+baseEconWrapper=BaseEconWrapper(econ)
+baseEconWrapper.run()
+time.sleep(0.5)
+mobileRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="BasicMobileAgent")
+tradeRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="TradingAgent")
+sb3_traderConverter=SB3EconConverter(tradeRecieverEconWrapper,econ,"TradingAgent",True)
+sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent",True)
+# attach sb3 wrappers
+
+monenv=VecMonitor(venv=sb3Converter,info_keywords=["social/productivity","trend/productivity"])
+montraidingenv=VecMonitor(venv=sb3_traderConverter)
+
+stackenv_basic=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=num_frames)
+stackenv_traid=vec_frame_stack.VecFrameStack(venv=montraidingenv,n_stack=num_frames)
+# Model setup complete
+
+# Setup Eval Env
+econ_eval=foundation.make_env_instance(**eval_env_config)
+
+
+baseEconWrapper_eval=BaseEconWrapper(econ_eval)
+baseEconWrapper_eval.run()
+time.sleep(0.5)
+mobileRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="BasicMobileAgent")
+tradeRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="TradingAgent")
+sb3_traderConverter_eval=SB3EconConverter(tradeRecieverEconWrapper_eval,econ_eval,"TradingAgent",False)
+sb3Converter_eval=SB3EconConverter(mobileRecieverEconWrapper_eval,econ_eval,"BasicMobileAgent",False)
+# attach sb3 wrappers
+
+monenv_eval=VecMonitor(venv=sb3Converter_eval,info_keywords=["social/productivity","trend/productivity"])
+montraidingenv_eval=VecMonitor(venv=sb3_traderConverter_eval)
+
+stackenv_basic_eval=vec_frame_stack.VecFrameStack(venv=monenv_eval,n_stack=num_frames)
+stackenv_traid_eval=vec_frame_stack.VecFrameStack(venv=montraidingenv_eval,n_stack=num_frames)
+
+
+obs=monenv.reset()
+
+
+# define training functions
+def train(model,timesteps, econ_call,process_bar,name,db,index):
+    db[index]=model.learn(total_timesteps=timesteps,progress_bar=process_bar,reset_num_timesteps=False,tb_log_name=name,callback=TensorboardCallback(econ_call))
+
+
+
+# prepare training
+run_number=int(np.random.rand()*100)
+runname="run_{}".format(run_number)
+model_db=[None,None] # object for storing model
+
+
+model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=1e-5,env=stackenv_basic, seed=300,verbose=1,device="cuda",tensorboard_log="./log")
+model_trade=MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=1e-5,env=stackenv_traid, seed=300,verbose=1,device="cuda",tensorboard_log="./log")
+
+n_agents=econ.n_agents
+
+total_required_for_episode_basic=len(mobileRecieverEconWrapper.agnet_idx)*env_config['episode_length']
+total_required_for_episode_traid=len(tradeRecieverEconWrapper.agnet_idx)*env_config['episode_length']
+
+print("this is run {}".format(runname))
+
+while True:
+ 
+
+    #Train
+    runname="run_{}_{}".format(run_number,"basic")
+
+    thread_model=Thread(target=train,args=(model,total_required_for_episode_basic*50,econ,True,runname,model_db,0))
+    runname="run_{}_{}".format(run_number,"trader")
+    thread_model_traid=Thread(target=train,args=(model_trade,total_required_for_episode_traid*50,econ,False,runname,model_db,1))
+    
+    thread_model.start()
+    thread_model_traid.start()
+    thread_model.join()
+    thread_model_traid.join()
+    #normenv.save("temp-normalizer.ai")
+    model=model_db[0]
+    model_trade=model_db[1]
+    model.save("basic.ai")
+    model_trade.save("trade.ai")
+   
+    ## Run Eval
+    print("### EVAL ###")
+    obs_basic=stackenv_basic_eval.reset()
+    obs_trade=stackenv_traid_eval.reset()
+    done=False
+    for i in tqdm(range(eval_env_config['episode_length'])):
+        #create masks
+        masks_basic=stackenv_basic_eval.action_masks()
+        masks_trade=stackenv_traid_eval.action_masks()
+        # get actions
+        action_basic=model.predict(obs_basic,action_masks=masks_basic)
+        action_trade=model_trade.predict(obs_trade,action_masks=masks_trade)
+        #submit async directly for non blocking operation
+        sb3Converter_eval.step_async(action_basic[0])
+        sb3_traderConverter_eval.step_async(action_trade[0])
+        # retieve full results
+        obs_basic,rew_basic,done_e,info=stackenv_basic_eval.step(action_basic[0])
+        obs_trade,rew_trade,done_e,info=stackenv_traid_eval.step(action_trade[0])
+        done=done_e[0]
+
+
+
+    market=econ_eval.get_component("ContinuousDoubleAuction")
+    craft=econ_eval.get_component("Craft")
+   # trades=market.get_dense_log()
+    build=craft.get_dense_log()
+    met=econ.previous_episode_metrics
+    printReplay(econ_eval,0)
+   # printMarket(trades)
+  #  printBuilds(builds=build)
+    print("social/productivity: {}".format(met["social/productivity"]))
+    print("labor/weighted_cost: {}".format(met["labor/weighted_cost"]))
+    print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"]))
+
+    time.sleep(1)
+
+
+
--- a/4000.ai
+++ b/4000.ai
--- a/trade.ai
+++ b/trade.ai
--- a/wrapper/base_econ_wrapper.py
+++ b/wrapper/base_econ_wrapper.py
@@ -3,25 +3,14 @@ from threading import Event, Lock, Thread
 from queue import Queue
 class BaseEconWrapper():
    """Base class for connecting reciever wrapper to a multi threaded econ simulation and training session"""
-    
-    base_notification=Event() #Notification for Base
-    reset_notification=Event() #Notification for recievers
+  
    step_notifications=[] #Notification for recievers
-    
-    action_edit_lock=Lock()
    actor_actions={}
-    
-    stop_edit_lock=Lock()
    stop=False
-
-    vote_lock=Lock()
    n_voters=0
    n_votes_reset=0
-
-
-
    # States of Env
-    env_data_lock=Lock()
+    
    obs=None
    rew=None
    done=None
@@ -30,6 +19,13 @@ class BaseEconWrapper():

    def __init__(self, econ: base_env.BaseEnvironment):
        self.env=econ
+        self.vote_lock=Lock()
+          
+        self.base_notification=Event() #Notification for Base
+        self.reset_notification=Event() #Notification for recievers
+        self.action_edit_lock=Lock()
+        self.stop_edit_lock=Lock()
+        self.env_data_lock=Lock()

    def register_vote(self):
        """Register reciever on base. Returns ID of Voter to pass on during blocking"""
@@ -149,7 +145,8 @@ class BaseEconWrapper():
        self.action_edit_lock.acquire() # Start to submit action dict       
        for k,v in actions.items():
            if k in self.actor_actions.keys():
-               raise Exception("Actor action has already been submitted. {}".format(k))
+               print("Actor action has already been submitted. {}".format(k))
+               continue
            self.actor_actions[k]=v
        self.step_notifications[voter_id].clear()
        self.base_notification.set() #Alert base for action changes
@@ -168,9 +165,9 @@ class BaseEconWrapper():

    def reciever_request_reset(self):
        """Adds to vote count to reset. If limit is reached reset will occure"""
-        self.vote_lock.acquire()
+        #self.vote_lock.acquire()
        self.n_votes_reset+=1
-        self.vote_lock.release()
+       # self.vote_lock.release()
        self.base_notification.set() #Alert base for action changes
    
    def reciever_block_reset(self):
--- a/wrapper/reciever_econ_wrapper.py
+++ b/wrapper/reciever_econ_wrapper.py
@@ -23,7 +23,7 @@ class RecieverEconWrapper(gym.Env):
        self.idx_to_index={}
        #create idx to index map
        for i in range(len(self.agnet_idx)): 
-            self.idx_to_index[self.agnet_idx[i]]=i
+            self.idx_to_index[str(self.agnet_idx[i])]=i
        first_idx=self.agnet_idx[0]
    

@@ -35,6 +35,7 @@ class RecieverEconWrapper(gym.Env):
    def _dict_idx_to_index(self, data):
        data_out={}
        for k,v in data.items():
+           
            if k in self.idx_to_index:
                index=self.idx_to_index[k]
                data_out[index]=v
--- a/wrapper/sb3_econ_converter.py
+++ b/wrapper/sb3_econ_converter.py
@@ -8,7 +8,7 @@ from typing import Any, Callable, List, Optional, Sequence, Type, Union

 class SB3EconConverter(VecEnv, gym.Env):

-    def __init__(self, env: gym.Env, econ: base_env.BaseEnvironment,agentclass: str):
+    def __init__(self, env: gym.Env, econ: base_env.BaseEnvironment,agentclass: str,auto_reset: bool):
        self.env=env
        self.econ=econ
        #get observation sample
@@ -20,7 +20,9 @@ class SB3EconConverter(VecEnv, gym.Env):
        #flatten obervation of first agent 
        obs0=utils.package(obs[0],*self.packager)
        obs0["flat"]
-        self.observation_space=gym.spaces.Box(low=-np.inf,high=np.inf,shape=(len(obs0["flat"]),),dtype=np.float32)
+        self.step_request_send=False
+        self.auto_reset=auto_reset
+        self.observation_space=gym.spaces.Box(low=0,high=10,shape=(len(obs0["flat"]),),dtype=np.float32)
        super().__init__(self.num_envs,  self.observation_space, self.action_space)
       
    
@@ -30,12 +32,15 @@ class SB3EconConverter(VecEnv, gym.Env):
        agent=self.econ.world.agents[idx]
        return gym.spaces.Discrete(agent.action_spaces)

-    def step_async(self, actions: np.ndarray) -> None:
-        d_actions=utils.convert_gym_to_econ(actions)
-        return self.env.step_async(d_actions)
+    def step_async(self, actions: np.ndarray):
+        if self.step_request_send==False:
+            self.step_request_send=True
+            d_actions=utils.convert_gym_to_econ(actions)
+            return self.env.step_async(d_actions)

    def step_wait(self) -> VecEnvStepReturn:
        obs,rew,done,info=self.env.step_wait()
+        self.curr_obs=obs
        #flatten obs
        f_obs={}
        for k,v in obs.items():
@@ -61,12 +66,16 @@ class SB3EconConverter(VecEnv, gym.Env):
            for i in range(self.num_envs):
                done_g[i]=done
                c_info[i]["terminal_observation"]=c_obs[i]
-            c_obs=self.reset()
+            if self.auto_reset:
+                c_obs=self.reset()
+        self.step_request_send=False
        return np.copy(c_obs),np.copy(c_rew),np.copy(done_g),np.copy(c_info)

    def reset(self) -> VecEnvObs:
        obs=self.env.reset()
+        self.step_request_send=False
        f_obs={}
+        self.curr_obs=obs
        for k,v in obs.items():
            f_obs[k]=utils.package(v,*self.packager)
        g_obs={}
@@ -79,20 +88,27 @@ class SB3EconConverter(VecEnv, gym.Env):
    def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
        if seed is None:
            seed = np.random.randint(0, 2**32 - 1)
-        seeds = []
-        for idx, env in enumerate(self.envs):
-            seeds.append(env.seed(seed + idx))
+        self.econ.seed(seed)
+        seeds=[seed]
        return seeds

-
+    def action_masks(self):
+        """Returns action masks for agents and current obs"""
+        masks=[]
+        for obs in self.curr_obs:
+            mask=[]
+            for num in self.curr_obs[obs]["action_mask"]:
+                mask.append(num==1.0)
+            masks.append(mask)
+        return masks

    def close(self) -> None:
        return
    
    def get_attr(self, attr_name: str, indices: VecEnvIndices = None) -> List[Any]:
        """Return attribute from vectorized environment (see base class)."""
-        target_envs = self._get_target_envs(indices)
-        return [getattr(env_i, attr_name) for env_i in target_envs]
+        
+        return getattr(self, attr_name)



@@ -106,8 +122,7 @@ class SB3EconConverter(VecEnv, gym.Env):

    def env_method(self, method_name: str, *method_args, indices: VecEnvIndices = None, **method_kwargs) -> List[Any]:
        """Call instance methods of vectorized environments."""
-        target_envs = self._get_target_envs(indices)
-        return [getattr(env_i, method_name)(*method_args, **method_kwargs) for env_i in target_envs]
+        return getattr(self, method_name)(*method_args, **method_kwargs)