crafting done ?

2023-01-13 20:07:21 +01:00
parent 7539863ace
commit 4f1044b87e
7 changed files with 554 additions and 52 deletions
--- a/ai_economist/foundation/base/base_agent.py
+++ b/ai_economist/foundation/base/base_agent.py
@@ -38,7 +38,7 @@ class BaseAgent:

        if idx is None:
            idx = 0
-
+        
        if multi_action_mode is None:
            multi_action_mode = False

@@ -64,6 +64,7 @@ class BaseAgent:
        self._registered_inventory = False
        self._registered_endogenous = False
        self._registered_components = False
+        self._setup = False # agent setup not completed 
        self._noop_action_dict = dict()

        # Special flag to allow logic for multi-action-mode agents
@@ -78,7 +79,13 @@ class BaseAgent:
    def idx(self):
        """Index used to identify this agent. Must be unique within the environment."""
        return self._idx
-
+    @property
+    def is_setup(self):
+        return self._setup
+        
+    def set_setup(self, set):
+        self._setup=set
+    
    def register_inventory(self, resources):
        """Used during environment construction to populate inventory/escrow fields."""
        if self._registered_inventory:
--- a/ai_economist/foundation/base/base_env.py
+++ b/ai_economist/foundation/base/base_env.py
@@ -342,16 +342,11 @@ class BaseEnvironment(ABC):
            self._components_dict[component_object.name] = component_object
            self._shorthand_lookup[component_object.shorthand] = component_object

-        # Register the components with the agents
-        # to finish setting up their state/action spaces.
-        for agent in self.world.agents:
-            agent.register_inventory(self.resources)
-            agent.register_endogenous(self.endogenous)
-            agent.register_components(self._components)
+       
        self.world.planner.register_inventory(self.resources)
        self.world.planner.register_components(self._components)
-
-        self._agent_lookup = {str(agent.idx): agent for agent in self.all_agents}
+        self.apply_scenario_config_to_agents()
+       

        self._completions = 0

@@ -370,6 +365,16 @@ class BaseEnvironment(ABC):
        # To collate all the agents ('0', '1', ...) data during reset and step
        # into a single agent with index 'a'
        self.collate_agent_step_and_reset_data = collate_agent_step_and_reset_data
+    
+    def apply_scenario_config_to_agents(self):
+        # Register the components with the agents
+        # to finish setting up their state/action spaces.
+        for agent in self.world.agents:
+            agent.register_inventory(self.resources)
+            agent.register_endogenous(self.endogenous)
+            agent.register_components(self._components)
+        self._agent_lookup = {str(agent.idx): agent for agent in self.all_agents}
+        self.world.apply_agent_db_to_world()

    def _register_entities(self, entities):
        for entity in entities:
@@ -920,6 +925,7 @@ class BaseEnvironment(ABC):
        # Reset actions to that default.
        for agent in self.all_agents:
            agent.reset_actions()
+            agent.set_setup(True)

        # Produce observations
        obs = self._generate_observations(
--- a/ai_economist/foundation/base/world.py
+++ b/ai_economist/foundation/base/world.py
@@ -382,7 +382,7 @@ class World:
        self._agent_class_idx_map={}
        #create agents
        self.create_agents(agent_composition)
-        
+
        self.maps = Maps(world_size, self.n_agents, world_resources, world_landmarks)

        planner_class = agent_registry.get("BasicPlanner")
@@ -410,7 +410,7 @@ class World:
                self._agent_class_idx_map[k].append(str(self.n_agents))
                self.n_agents+=1

-    def apply_agent_db(self):
+    def apply_agent_db_to_world(self):
        """Applys current agent db into lookup maps inside world and map itself. Enables insertion of new agents into existing env."""
        self.n_agents=len(self._agents)
        self._agent_class_idx_map={}
--- a/components/crafting.py
+++ b/components/crafting.py
@@ -10,7 +10,7 @@ from ai_economist.foundation.base.base_component import (
    BaseComponent,
    component_registry,
 )
-from ai_economist.foundation.entities.resources import resource_registry
+from ai_economist.foundation.entities.resources import Resource, resource_registry


@component_registry.add
@@ -47,7 +47,7 @@ class Craft(BaseComponent):
        skill_dist="none",
        **base_component_kwargs
    ):
-        #append commodities
+        #setup commodities
        for v in commodities:
            res_class=resource_registry.get(v)
            res=res_class()
@@ -74,10 +74,10 @@ class Craft(BaseComponent):
        self.builds = []
        super().__init__(*base_component_args, **base_component_kwargs)

-    def agent_can_build(self, agent):
+    def agent_can_build(self, agent, recipe):
        """Return True if agent can actually build in its current location."""
        # See if the agent has the resources necessary to complete the action
-        for resource, cost in self.resource_cost.items():
+        for resource, cost in recipe.items():
            if agent.state["inventory"][resource] < cost:
                return False
        return True
@@ -93,7 +93,7 @@ class Craft(BaseComponent):
        """
        # This component adds 1 action that mobile agents can take: build a house
        if agent_cls_name in self.agent_subclasses:
-            return 1
+            return len(self.commodities)

        return None

@@ -106,7 +106,7 @@ class Craft(BaseComponent):
        if agent_cls_name not in self.agent_subclasses:
            return {}
        if agent_cls_name == "BasicMobileAgent":
-            return {"build_payment": float(self.payment), "build_skill": 1}
+            return {}
        raise NotImplementedError

    def component_step(self):
@@ -131,29 +131,32 @@ class Craft(BaseComponent):
                pass

            # Build! (If you can.)
-            elif action == 1:
-                if self.agent_can_build(agent):
+            else:
+                comm=self.commodities[action]
+
+                if self.agent_can_build(agent,comm.craft_recp):
                    # Remove the resources
-                    for resource, cost in self.resource_cost.items():
+                    for resource, cost in comm.craft_recp.items():
                        agent.state["inventory"][resource] -= cost

-                    # Receive payment for the house
-                    agent.state["inventory"]["Coin"] += agent.state["build_payment"]
+                    # Receive crafted commodity
+                    agent.state["inventory"][comm.name] += agent.state["craft_amount"][comm.name]

                    # Incur the labor cost for building
-                    agent.state["endogenous"]["Labor"] += self.build_labor
+                    agent.state["endogenous"]["Labor"] += agent.state["craft_labour"][comm.name]

                    build.append(
                        {
-                            "builder": agent.idx,
-                            "build_skill": self.sampled_skills[agent.idx],
-                            "income": float(agent.state["build_payment"]),
+                            "crafter": agent.idx,
+                            "craft_commodity": comm.name,
+                            "craft_skill": agent.state["craft_skill"][comm.name],
+                            "craft_amount": agent.state["craft_amount"][comm.name],
+                            "craft_labour": agent.state["craft_labour"][comm.name]
                        }
                    )
                else:
                    agent.bad_action=True
-            else:
-                raise ValueError
+          

        self.builds.append(build)

@@ -168,10 +171,10 @@ class Craft(BaseComponent):
        obs_dict = dict()
        for agent in self.world.agents:
            if agent.name in self.agent_subclasses:
-                obs_dict[agent.idx] = {
-                    "build_payment": agent.state["build_payment"] / self.payment,
-                    "build_skill": self.sampled_skills[agent.idx],
-                }
+                obs_dict[agent.idx]["craft_skill"]={}
+                for k in self.commodities:
+                    obs_dict[agent.idx]["craft_skill"][k.name] = agent.state["craft_skill"][k.name]
+                       

        return obs_dict

@@ -186,7 +189,8 @@ class Craft(BaseComponent):
        # Mobile agents' build action is masked if they cannot build with their
        # current location and/or endowment
        for agent in self.world.agents:
-            masks[agent.idx] = np.array([self.agent_can_build(agent)])
+            if agent.name in self.agent_subclasses:
+                masks[agent.idx] = np.array([self.agent_can_build(agent,k.name) for k in self.commodities])

        return masks

@@ -227,27 +231,35 @@ class Craft(BaseComponent):
        """
        world = self.world

-        self.sampled_skills = {agent.idx: 1 for agent in world.agents}

-        PMSM = self.payment_max_skill_multiplier
+        MSAB= self.max_skill_amount_benefit
+        MSLB= self.max_skill_labour_benefit

+       
+            
        for agent in world.agents:
-            if self.skill_dist == "none":
-                sampled_skill = 1
-                pay_rate = 1
-            elif self.skill_dist == "pareto":
-                sampled_skill = np.random.pareto(4)
-                pay_rate = np.minimum(PMSM, (PMSM - 1) * sampled_skill + 1)
-            elif self.skill_dist == "lognormal":
-                sampled_skill = np.random.lognormal(-1, 0.5)
-                pay_rate = np.minimum(PMSM, (PMSM - 1) * sampled_skill + 1)
-            else:
-                raise NotImplementedError
+                if agent.name not in self.agent_subclasses | agent.is_setup():
+                    continue
+                agent.state["craft_skill"]={}
+                agent.state["craft_labour"]={}
+                agent.state["craft_amount"]={}

-            agent.state["build_payment"] = float(pay_rate * self.payment)
-            agent.state["build_skill"] = float(sampled_skill)
+                for comm in self.commodities:
+                    if self.skill_dist == "none":
+                        sampled_skill = 1
+                        amount= 1
+                        labour = 1
+                    elif self.skill_dist == "pareto":
+                        labour = 1
+                        sampled_skill = np.random.pareto(2)
+                        amount = np.minimum(MSAB, MSAB * sampled_skill)
+                        labour_modifier = 1 - np.minimum(1 - MSLB, (1 - MSLB) * sampled_skill)
+                    else:
+                        raise NotImplementedError
+                    agent.state["craft_skill"][comm.name]=sampled_skill
+                    agent.state["craft_labour"][comm.name]=comm.craft_labour_base*labour_modifier
+                    agent.state["craft_amount"][comm.name]=amount

-            self.sampled_skills[agent.idx] = sampled_skill

        self.builds = []

--- a/envs/econ.py
+++ b/envs/econ.py
@@ -0,0 +1,474 @@
+# Copyright (c) 2020, salesforce.com, inc.
+# All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# For full license text, see the LICENSE file in the repo root
+# or https://opensource.org/licenses/BSD-3-Clause
+
+from copy import deepcopy
+from pathlib import Path
+
+import numpy as np
+from scipy import signal
+
+from ai_economist.foundation.base.base_env import BaseEnvironment, scenario_registry
+from ai_economist.foundation.scenarios.utils import rewards, social_metrics
+import yaml
+
+
+@scenario_registry.add
+class Econ(BaseEnvironment):
+    """
+    World containing stone and wood with stochastic regeneration. Refers to a fixed
+    layout file (see ./map_txt/ for examples) to determine the spatial arrangement of
+    stone, wood, and water tiles.
+
+    Args:
+        planner_gets_spatial_obs (bool): Whether the planner agent receives spatial
+            observations from the world.
+        full_observability (bool): Whether the mobile agents' spatial observation
+            includes the full world view or is instead an egocentric view.
+        mobile_agent_observation_range (int): If not using full_observability,
+            the spatial range (on each side of the agent) that is visible in the
+            spatial observations.
+        env_layout_file (str): Name of the layout file in ./map_txt/ to use.
+            Note: The world dimensions of that layout must match the world dimensions
+            argument used to construct the environment.
+        resource_regen_prob (float): Probability that an empty source tile will
+            regenerate a new resource unit.
+        fixed_four_skill_and_loc (bool): Whether to use a fixed set of build skills and
+            starting locations, with agents grouped into starting locations based on
+            which skill quartile they are in. False, by default.
+            True, for experiments in https://arxiv.org/abs/2004.13332.
+            Note: Requires that the environment uses the "Build" component with
+            skill_dist="pareto".
+        starting_agent_coin (int, float): Amount of coin agents have at t=0. Defaults
+            to zero coin.
+        isoelastic_eta (float): Parameter controlling the shape of agent utility
+            wrt coin endowment.
+        energy_cost (float): Coefficient for converting labor to negative utility.
+        energy_warmup_constant (float): Decay constant that controls the rate at which
+            the effective energy cost is annealed from 0 to energy_cost. Set to 0
+            (default) to disable annealing, meaning that the effective energy cost is
+            always energy_cost. The units of the decay constant depend on the choice of
+            energy_warmup_method.
+        energy_warmup_method (str): How to schedule energy annealing (warmup). If
+            "decay" (default), use the number of completed episodes. If "auto",
+            use the number of timesteps where the average agent reward was positive.
+        planner_reward_type (str): The type of reward used for the planner. Options
+            are "coin_eq_times_productivity" (default),
+            "inv_income_weighted_coin_endowment", and "inv_income_weighted_utility".
+        mixing_weight_gini_vs_coin (float): Degree to which equality is ignored w/
+            "coin_eq_times_productivity". Default is 0, which weights equality and
+            productivity equally. If set to 1, only productivity is rewarded.
+    """
+
+    name = "econ"
+    agent_subclasses = ["BasicMobileAgent"]
+    required_entities = ["Wood", "Stone", "Water"]
+
+    def __init__(
+        self,
+        *base_env_args,
+        resource_regen_prob=0.01,
+        fixed_four_skill_and_loc=False,
+        starting_agent_coin=0,
+        isoelastic_eta=0.23,
+        energy_cost=0.21,
+        energy_warmup_constant=0,
+        energy_warmup_method="decay",
+        planner_reward_type="coin_eq_times_productivity",
+        mixing_weight_gini_vs_coin=0.0,
+        **base_env_kwargs,
+    ):
+        super().__init__(*base_env_args, **base_env_kwargs)
+
+     
+        self.layout_specs = dict(
+            Wood={
+                "regen_weight": float(resource_regen_prob),
+                "regen_halfwidth": 0,
+                "max_health": 1,
+            },
+            Stone={
+                "regen_weight": float(resource_regen_prob),
+                "regen_halfwidth": 0,
+                "max_health": 1,
+            },
+        )
+        assert 0 <= self.layout_specs["Wood"]["regen_weight"] <= 1
+        assert 0 <= self.layout_specs["Stone"]["regen_weight"] <= 1
+
+        # How much coin do agents begin with at upon reset
+        self.starting_agent_coin = float(starting_agent_coin)
+        assert self.starting_agent_coin >= 0.0
+
+        # Controls the diminishing marginal utility of coin.
+        # isoelastic_eta=0 means no diminishing utility.
+        self.isoelastic_eta = float(isoelastic_eta)
+        assert 0.0 <= self.isoelastic_eta <= 1.0
+
+        # The amount that labor is weighted in utility computation
+        # (once annealing is finished)
+        self.energy_cost = float(energy_cost)
+        assert self.energy_cost >= 0
+
+        # Which method to use for calculating the progress of energy annealing
+        # If method = 'decay': #completed episodes
+        # If method = 'auto' : #timesteps where avg. agent reward > 0
+        self.energy_warmup_method = energy_warmup_method.lower()
+        assert self.energy_warmup_method in ["decay", "auto"]
+        # Decay constant for annealing to full energy cost
+        # (if energy_warmup_constant == 0, there is no annealing)
+        self.energy_warmup_constant = float(energy_warmup_constant)
+        assert self.energy_warmup_constant >= 0
+        self._auto_warmup_integrator = 0
+
+        # Which social welfare function to use
+        self.planner_reward_type = str(planner_reward_type).lower()
+
+        # How much to weight equality if using SWF=eq*prod:
+        # 0 -> SWF=eq * prod
+        # 1 -> SWF=prod
+        self.mixing_weight_gini_vs_coin = float(mixing_weight_gini_vs_coin)
+        assert 0 <= self.mixing_weight_gini_vs_coin <= 1.0
+
+        # Use this to calculate marginal changes and deliver that as reward
+        self.init_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
+        self.prev_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
+        self.curr_optimization_metric = {agent.idx: 0 for agent in self.all_agents}
+
+        """
+        Fixed Four Skill and Loc
+        ------------------------
+        """
+        self.agent_starting_pos = {agent.idx: [] for agent in self.world.agents}
+
+  
+       
+        self.last_log_loged={}
+
+
+    @property
+    def energy_weight(self):
+        """
+        Energy annealing progress. Multiply with self.energy_cost to get the
+        effective energy coefficient.
+        """
+        if self.energy_warmup_constant <= 0.0:
+            return 1.0
+
+        if self.energy_warmup_method == "decay":
+            return float(1.0 - np.exp(-self._completions / self.energy_warmup_constant))
+
+        if self.energy_warmup_method == "auto":
+            return float(
+                1.0
+                - np.exp(-self._auto_warmup_integrator / self.energy_warmup_constant)
+            )
+
+        raise NotImplementedError
+
+    def is_bad_action(self,agent):
+        bad=agent.bad_action
+        agent.bad_action=False
+        return bad
+    def get_current_optimization_metrics(self):
+        """
+        Compute optimization metrics based on the current state. Used to compute reward.
+
+        Returns:
+            curr_optimization_metric (dict): A dictionary of {agent.idx: metric}
+                with an entry for each agent (including the planner) in the env.
+        """
+        curr_optimization_metric = {}
+        # (for agents)
+        for agent in self.world.agents:
+
+            rew= rewards.isoelastic_coin_minus_labor(
+                coin_endowment=agent.total_endowment("Coin"),
+                total_labor=agent.state["endogenous"]["Labor"],
+                isoelastic_eta=self.isoelastic_eta,
+                labor_coefficient=self.energy_weight * self.energy_cost,
+            )
+            
+            
+
+            #rew-=agent.state["endogenous"]["noops"]
+            curr_optimization_metric[agent.idx] = rew
+        # (for the planner)
+        if self.planner_reward_type == "coin_eq_times_productivity":
+            curr_optimization_metric[
+                self.world.planner.idx
+            ] = rewards.coin_eq_times_productivity(
+                coin_endowments=np.array(
+                    [agent.total_endowment("Coin") for agent in self.world.agents]
+                ),
+                equality_weight=1 - self.mixing_weight_gini_vs_coin,
+            )
+        elif self.planner_reward_type == "inv_income_weighted_coin_endowments":
+            curr_optimization_metric[
+                self.world.planner.idx
+            ] = rewards.inv_income_weighted_coin_endowments(
+                coin_endowments=np.array(
+                    [agent.total_endowment("Coin") for agent in self.world.agents]
+                )
+            )
+        elif self.planner_reward_type == "inv_income_weighted_utility":
+            curr_optimization_metric[
+                self.world.planner.idx
+            ] = rewards.inv_income_weighted_utility(
+                coin_endowments=np.array(
+                    [agent.total_endowment("Coin") for agent in self.world.agents]
+                ),
+                utilities=np.array(
+                    [curr_optimization_metric[agent.idx] for agent in self.world.agents]
+                ),
+            )
+        else:
+            print("No valid planner reward selected!")
+            raise NotImplementedError
+        return curr_optimization_metric
+
+    # The following methods must be implemented for each scenario
+    # -----------------------------------------------------------
+
+    def reset_starting_layout(self):
+        """
+        Part 1/2 of scenario reset. This method handles resetting the state of the
+        environment managed by the scenario (i.e. resource & landmark layout).
+
+        Here, reset to the layout in the fixed layout file
+        """
+        self.world.maps.clear()
+        
+        resources = ["Wood", "Stone"]
+
+        for resource in resources:
+            self.world.maps.set_point_add(resource,0,0,1)
+
+    def reset_agent_states(self):
+        """
+        Part 2/2 of scenario reset. This method handles resetting the state of the
+        agents themselves (i.e. inventory, locations, etc.).
+
+        Here, empty inventories and place mobile agents in random, accessible
+        locations to start. Note: If using fixed_four_skill_and_loc, the starting
+        locations will be overridden in self.additional_reset_steps.
+        """
+        self.world.clear_agent_locs()
+        for agent in self.world.agents:
+            if not agent.is_setup():
+                
+            agent.state["inventory"] = {k: 0 for k in agent.inventory.keys()}
+            agent.state["escrow"] = {k: 0 for k in agent.inventory.keys()}
+            agent.state["endogenous"] = {k: 0 for k in agent.endogenous.keys()}
+            # Add starting coin
+            agent.state["inventory"]["Coin"] = float(self.starting_agent_coin)
+            agent.bad_action=False
+
+        self.world.planner.state["inventory"] = {
+            k: 0 for k in self.world.planner.inventory.keys()
+        }
+        self.world.planner.state["escrow"] = {
+            k: 0 for k in self.world.planner.escrow.keys()
+        }
+
+
+    def scenario_step(self):
+        """
+        Update the state of the world according to whatever rules this scenario
+        implements.
+
+        This gets called in the 'step' method (of base_env) after going through each
+        component step and before generating observations, rewards, etc.
+
+        In this class of scenarios, the scenario step handles stochastic resource
+        regeneration.
+        """
+
+        resources = ["Wood", "Stone"]
+
+        for resource in resources:
+            self.world.maps.set_point_add(resource,0,0,20)
+           
+
+    def generate_observations(self):
+        """
+        Generate observations associated with this scenario.
+
+        A scenario does not need to produce observations and can provide observations
+        for only some agent types; however, for a given agent type, it should either
+        always or never yield an observation. If it does yield an observation,
+        that observation should always have the same structure/sizes!
+
+        Returns:
+            obs (dict): A dictionary of {agent.idx: agent_obs_dict}. In words,
+                return a dictionary with an entry for each agent (which can including
+                the planner) for which this scenario provides an observation. For each
+                entry, the key specifies the index of the agent and the value contains
+                its associated observation dictionary.
+
+        Here, non-planner agents receive spatial observations (depending on the env
+        config) as well as the contents of their inventory and endogenous quantities.
+        The planner also receives spatial observations (again, depending on the env
+        config) as well as the inventory of each of the mobile agents.
+        """
+        obs = {}
+
+
+     
+        agent_invs = {
+            str(agent.idx): {
+                "inventory-" + k: v * self.inv_scale for k, v in agent.inventory.items()
+            }
+            for agent in self.world.agents
+        }
+
+        obs[self.world.planner.idx] = {
+            "inventory-" + k: v * self.inv_scale
+            for k, v in self.world.planner.inventory.items()
+        }
+     
+     
+        for agent in self.world.agents:
+            sidx = str(agent.idx)
+            obs[sidx]=agent_invs[sidx]
+
+    
+
+
+        return obs
+
+    def compute_reward(self):
+        """
+        Apply the reward function(s) associated with this scenario to get the rewards
+        from this step.
+
+        Returns:
+            rew (dict): A dictionary of {agent.idx: agent_obs_dict}. In words,
+                return a dictionary with an entry for each agent in the environment
+                (including the planner). For each entry, the key specifies the index of
+                the agent and the value contains the scalar reward earned this timestep.
+
+        Rewards are computed as the marginal utility (agents) or marginal social
+        welfare (planner) experienced on this timestep. Ignoring discounting,
+        this means that agents' (planner's) objective is to maximize the utility
+        (social welfare) associated with the terminal state of the episode.
+        """
+
+        # "curr_optimization_metric" hasn't been updated yet, so it gives us the
+        # utility from the last step.
+        utility_at_end_of_last_time_step = deepcopy(self.curr_optimization_metric)
+
+        # compute current objectives and store the values
+        self.curr_optimization_metric = self.get_current_optimization_metrics()
+
+        # reward = curr - prev objectives
+        rew={}
+        for k, v in self.curr_optimization_metric.items():
+                rew[k] = float(v  - utility_at_end_of_last_time_step[k])
+                if k!="p":
+                    if self.is_bad_action(self.world.agents[k]):
+                        rew[k]-=1
+
+        # store the previous objective values
+        self.prev_optimization_metric.update(utility_at_end_of_last_time_step)
+
+        # Automatic Energy Cost Annealing
+        # -------------------------------
+        avg_agent_rew = np.mean([rew[a.idx] for a in self.world.agents])
+        # Count the number of timesteps where the avg agent reward was > 0
+        if avg_agent_rew > 0:
+            self._auto_warmup_integrator += 1
+
+        return rew
+
+    # Optional methods for customization
+    # ----------------------------------
+
+    def additional_reset_steps(self):
+        """
+        Extra scenario-specific steps that should be performed at the end of the reset
+        cycle.
+
+        For each reset cycle...
+            First, reset_starting_layout() and reset_agent_states() will be called.
+
+            Second, <component>.reset() will be called for each registered component.
+
+            Lastly, this method will be called to allow for any final customization of
+            the reset cycle.
+
+        For this scenario, this method resets optimization metric trackers. If using
+        fixed_four_skill_and_loc, this is where each agent gets assigned to one of
+        the four fixed skill/loc combinations. The agent-->skill/loc assignment is
+        permuted so that all four skill/loc combinations are used.
+        """
+ 
+
+        # compute current objectives
+        curr_optimization_metric = self.get_current_optimization_metrics()
+
+        self.curr_optimization_metric = deepcopy(curr_optimization_metric)
+        self.init_optimization_metric = deepcopy(curr_optimization_metric)
+        self.prev_optimization_metric = deepcopy(curr_optimization_metric)
+
+       
+
+    def scenario_metrics(self):
+        """
+        Allows the scenario to generate metrics (collected along with component metrics
+        in the 'metrics' property).
+
+        To have the scenario add metrics, this function needs to return a dictionary of
+        {metric_key: value} where 'value' is a scalar (no nesting or lists!)
+
+        Here, summarize social metrics, endowments, utilities, and labor cost annealing.
+        """
+        metrics = dict()
+
+        coin_endowments = np.array(
+            [agent.total_endowment("Coin") for agent in self.world.agents]
+        )
+        metrics["social/productivity"] = social_metrics.get_productivity(
+            coin_endowments
+        )
+        metrics["social/equality"] = social_metrics.get_equality(coin_endowments)
+
+        utilities = np.array(
+            [self.curr_optimization_metric[agent.idx] for agent in self.world.agents]
+        )
+        metrics[
+            "social_welfare/coin_eq_times_productivity"
+        ] = rewards.coin_eq_times_productivity(
+            coin_endowments=coin_endowments, equality_weight=1.0
+        )
+        metrics[
+            "social_welfare/inv_income_weighted_coin_endow"
+        ] = rewards.inv_income_weighted_coin_endowments(coin_endowments=coin_endowments)
+        metrics[
+            "social_welfare/inv_income_weighted_utility"
+        ] = rewards.inv_income_weighted_utility(
+            coin_endowments=coin_endowments, utilities=utilities
+        )
+
+        for agent in self.all_agents:
+            for resource, quantity in agent.inventory.items():
+                metrics[
+                    "endow/{}/{}".format(agent.idx, resource)
+                ] = agent.total_endowment(resource)
+
+            if agent.endogenous is not None:
+                for resource, quantity in agent.endogenous.items():
+                    metrics["endogenous/{}/{}".format(agent.idx, resource)] = quantity
+
+            metrics["util/{}".format(agent.idx)] = self.curr_optimization_metric[
+                agent.idx
+            ]
+
+        # Labor weight
+        metrics["labor/weighted_cost"] = self.energy_cost * self.energy_weight
+        metrics["labor/warmup_integrator"] = int(self._auto_warmup_integrator)
+
+        return metrics
+
--- a/main.py
+++ b/main.py
@@ -1,5 +1,7 @@
-from ai_economist import foundation
+
 import numpy as np
+
+from ai_economist import foundation
 from stable_baselines3.common.vec_env import vec_frame_stack
 from stable_baselines3.common.evaluation import evaluate_policy
 import envs
--- a/wrapper/reciever_econ_wrapper.py
+++ b/wrapper/reciever_econ_wrapper.py
@@ -23,7 +23,7 @@ class RecieverEconWrapper(gym.Env):
        self.idx_to_index={}
        #create idx to index map
        for i in range(len(self.agnet_idx)): 
-            self.idx_to_index[self.agnet_idx[i]]=i
+            self.idx_to_index[str(self.agnet_idx[i])]=i
        first_idx=self.agnet_idx[0]
    

@@ -35,6 +35,7 @@ class RecieverEconWrapper(gym.Env):
    def _dict_idx_to_index(self, data):
        data_out={}
        for k,v in data.items():
+           
            if k in self.idx_to_index:
                index=self.idx_to_index[k]
                data_out[index]=v