diff --git a/ai_economist/foundation/base/base_agent.py b/ai_economist/foundation/base/base_agent.py
index eb2090a..bce4fcc 100644
--- a/ai_economist/foundation/base/base_agent.py
+++ b/ai_economist/foundation/base/base_agent.py
@@ -5,6 +5,7 @@
 # or https://opensource.org/licenses/BSD-3-Clause
 
 import random
+import uuid
 
 import numpy as np
 
@@ -38,7 +39,7 @@ class BaseAgent:
 
         if idx is None:
             idx = 0
-        
+        self.uuid=uuid.uuid4()
         if multi_action_mode is None:
             multi_action_mode = False
 
diff --git a/ai_economist/foundation/base/base_component.py b/ai_economist/foundation/base/base_component.py
index aea66b8..d826da0 100644
--- a/ai_economist/foundation/base/base_component.py
+++ b/ai_economist/foundation/base/base_component.py
@@ -134,6 +134,7 @@ class BaseComponent(ABC):
     def reset(self):
         """Reset any portion of the state managed by this component."""
         world = self.world
+        self.n_agents = world.n_agents
         all_agents = world.agents + [world.planner]
         for agent in all_agents:
             agent.state.update(self.get_additional_state_fields(agent.name))
diff --git a/ai_economist/foundation/base/base_env.py b/ai_economist/foundation/base/base_env.py
index f390dbc..8eea22b 100644
--- a/ai_economist/foundation/base/base_env.py
+++ b/ai_economist/foundation/base/base_env.py
@@ -234,7 +234,7 @@ class BaseEnvironment(ABC):
         self.num_agents = (
             n_agents + n_planners
         )  # used in the warp_drive env wrapper (+ 1 for the planner)
-
+        
         # Components must be a tuple/list where each element is either a...
         #   tuple: ('Component Name', {Component kwargs})
         #   dict : {'Component Name': {Component kwargs}}
@@ -345,11 +345,11 @@ class BaseEnvironment(ABC):
        
         self.world.planner.register_inventory(self.resources)
         self.world.planner.register_components(self._components)
-        self.apply_scenario_config_to_agents()
+        self.reapply_scenario_config_to_agents()
        
 
         self._completions = 0
-
+        self._finish_episode=False
         self._last_ep_metrics = None
 
         # For dense logging
@@ -366,7 +366,7 @@ class BaseEnvironment(ABC):
         # into a single agent with index 'a'
         self.collate_agent_step_and_reset_data = collate_agent_step_and_reset_data
     
-    def apply_scenario_config_to_agents(self):
+    def reapply_scenario_config_to_agents(self):
         # Register the components with the agents
         # to finish setting up their state/action spaces.
         for agent in self.world.agents:
@@ -506,6 +506,8 @@ class BaseEnvironment(ABC):
 
     # Getters & Setters
     # -----------------
+    def set_finish_episode(self,done):
+        self._finish_episode=done
 
     def get_component(self, component_name):
         """
@@ -909,6 +911,9 @@ class BaseEnvironment(ABC):
         # Reset the timestep counter
         self.world.timestep = 0
 
+        # Reset done flag
+        self._finish_episode=False
+
         # Perform the scenario reset,
         # which includes resetting the world and agent states
         self.reset_starting_layout()
@@ -1021,7 +1026,7 @@ class BaseEnvironment(ABC):
             flatten_masks=self._flatten_masks,
         )
         rew = self._generate_rewards()
-        done = {"__all__": self.world.timestep >= self._episode_length}
+        done = {"__all__": self.world.timestep >= self._episode_length | self._finish_episode}
         info = {k: {} for k in obs.keys()}
 
         if self._dense_log_this_episode:
diff --git a/envs/econ.py b/envs/econ.py
index 963c447..b7a8b97 100644
--- a/envs/econ.py
+++ b/envs/econ.py
@@ -23,8 +23,7 @@ class Econ(BaseEnvironment):
     stone, wood, and water tiles.
 
     Args:
-        planner_gets_spatial_obs (bool): Whether the planner agent receives spatial
-            observations from the world.
+        action_against_mask_penelty=-1 (int): Reward penelty for performing action against mask
         full_observability (bool): Whether the mobile agents' spatial observation
             includes the full world view or is instead an egocentric view.
         mobile_agent_observation_range (int): If not using full_observability,
@@ -64,7 +63,7 @@ class Econ(BaseEnvironment):
 
     name = "econ"
     agent_subclasses = ["BasicMobileAgent"]
-    required_entities = ["Wood", "Stone", "Water"]
+    required_entities = ["Wood", "Stone", "Water","Gem_Raw","Gem"]
 
     def __init__(
         self,
@@ -143,6 +142,7 @@ class Econ(BaseEnvironment):
         """
         self.agent_starting_pos = {agent.idx: [] for agent in self.world.agents}
 
+        self._persist_between_resets=False
   
        
         self.last_log_loged={}
@@ -172,6 +172,7 @@ class Econ(BaseEnvironment):
         bad=agent.bad_action
         agent.bad_action=False
         return bad
+    
     def get_current_optimization_metrics(self):
         """
         Compute optimization metrics based on the current state. Used to compute reward.
@@ -239,9 +240,13 @@ class Econ(BaseEnvironment):
 
         Here, reset to the layout in the fixed layout file
         """
+
+        if self._persist_between_resets: # if we only want to modify some values and not accualy reset
+            return
+
         self.world.maps.clear()
         
-        resources = ["Wood", "Stone"]
+        resources = ["Wood", "Stone","Gem_Raw"]
 
         for resource in resources:
             self.world.maps.set_point_add(resource,0,0,1)
@@ -255,15 +260,18 @@ class Econ(BaseEnvironment):
         locations to start. Note: If using fixed_four_skill_and_loc, the starting
         locations will be overridden in self.additional_reset_steps.
         """
-        self.world.clear_agent_locs()
+        if not self._persist_between_resets:
+            self.world.clear_agent_locs()
+        
         for agent in self.world.agents:
-            if not agent.is_setup():
-                
-            agent.state["inventory"] = {k: 0 for k in agent.inventory.keys()}
-            agent.state["escrow"] = {k: 0 for k in agent.inventory.keys()}
-            agent.state["endogenous"] = {k: 0 for k in agent.endogenous.keys()}
-            # Add starting coin
-            agent.state["inventory"]["Coin"] = float(self.starting_agent_coin)
+            if not self._persist_between_resets:
+                agent.set_setup(False) # resets agent states
+            if not agent.is_setup(): # agent has not been setup for scenario
+                agent.state["inventory"] = {k: 0 for k in agent.inventory.keys()}
+                agent.state["escrow"] = {k: 0 for k in agent.inventory.keys()}
+                agent.state["endogenous"] = {k: 0 for k in agent.endogenous.keys()}
+                # Add starting coin
+                agent.state["inventory"]["Coin"] = float(self.starting_agent_coin)
             agent.bad_action=False
 
         self.world.planner.state["inventory"] = {
@@ -286,7 +294,7 @@ class Econ(BaseEnvironment):
         regeneration.
         """
 
-        resources = ["Wood", "Stone"]
+        resources = ["Wood", "Stone", "Gem_Raw"]
 
         for resource in resources:
             self.world.maps.set_point_add(resource,0,0,20)
diff --git a/main.py b/main.py
index 9cab991..251d9dc 100644
--- a/main.py
+++ b/main.py
@@ -4,6 +4,7 @@ import numpy as np
 from ai_economist import foundation
 from stable_baselines3.common.vec_env import vec_frame_stack
 from stable_baselines3.common.evaluation import evaluate_policy
+from sb3_contrib.ppo_mask import MaskablePPO
 import envs
 import wrapper
 from wrapper.base_econ_wrapper import BaseEconWrapper
@@ -69,7 +70,7 @@ env_config = {
     'flatten_observations': False,
     # When Flattening masks, concatenate each action subspace mask into a single array.
     # Note: flatten_masks = True is required for masking action logits in the code below.
-    'flatten_masks': False,
+    'flatten_masks': True,
 }
 
 
@@ -121,7 +122,7 @@ eval_env_config = {
     'flatten_observations': False,
     # When Flattening masks, concatenate each action subspace mask into a single array.
     # Note: flatten_masks = True is required for masking action logits in the code below.
-    'flatten_masks': False,
+    'flatten_masks': True,
 }
 
 num_frames=2
@@ -226,7 +227,7 @@ obs=monenv.reset()
 
 runname="run_{}".format(int(np.random.rand()*100))
 
-model = PPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
+model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
 n_agents=econ.n_agents
 total_required_for_episode=n_agents*env_config['episode_length']
 print("this is run {}".format(runname))
diff --git a/wrapper/sb3_econ_converter.py b/wrapper/sb3_econ_converter.py
index de3345e..dbd3bd7 100644
--- a/wrapper/sb3_econ_converter.py
+++ b/wrapper/sb3_econ_converter.py
@@ -36,6 +36,7 @@ class SB3EconConverter(VecEnv, gym.Env):
 
     def step_wait(self) -> VecEnvStepReturn:
         obs,rew,done,info=self.env.step_wait()
+        self.curr_obs=obs
         #flatten obs
         f_obs={}
         for k,v in obs.items():
@@ -62,11 +63,13 @@ class SB3EconConverter(VecEnv, gym.Env):
                 done_g[i]=done
                 c_info[i]["terminal_observation"]=c_obs[i]
             c_obs=self.reset()
+        
         return np.copy(c_obs),np.copy(c_rew),np.copy(done_g),np.copy(c_info)
 
     def reset(self) -> VecEnvObs:
         obs=self.env.reset()
         f_obs={}
+        self.curr_obs=obs
         for k,v in obs.items():
             f_obs[k]=utils.package(v,*self.packager)
         g_obs={}
@@ -84,15 +87,20 @@ class SB3EconConverter(VecEnv, gym.Env):
             seeds.append(env.seed(seed + idx))
         return seeds
 
-
+    def action_masks(self):
+        """Returns action masks for agents and current obs"""
+        masks=[]
+        for obs in self.curr_obs:
+            masks.append(self.curr_obs[obs]["action_mask"])
+        return masks
 
     def close(self) -> None:
         return
     
     def get_attr(self, attr_name: str, indices: VecEnvIndices = None) -> List[Any]:
         """Return attribute from vectorized environment (see base class)."""
-        target_envs = self._get_target_envs(indices)
-        return [getattr(env_i, attr_name) for env_i in target_envs]
+        
+        return getattr(self, attr_name)
 
 
 
@@ -106,8 +114,7 @@ class SB3EconConverter(VecEnv, gym.Env):
 
     def env_method(self, method_name: str, *method_args, indices: VecEnvIndices = None, **method_kwargs) -> List[Any]:
         """Call instance methods of vectorized environments."""
-        target_envs = self._get_target_envs(indices)
-        return [getattr(env_i, method_name)(*method_args, **method_kwargs) for env_i in target_envs]
+        return getattr(self, method_name)(*method_args, **method_kwargs)