diff --git a/main.py b/main.py index ef18272..7908c37 100644 --- a/main.py +++ b/main.py @@ -209,13 +209,14 @@ baseEconWrapper=BaseEconWrapper(econ) baseEconWrapper.run() mobileRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="BasicMobileAgent") sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent") -obs=sb3Converter.reset() -vecenv=EconVecEnv(env_config=env_config) +#obs=sb3Converter.reset() +#vecenv=EconVecEnv(env_config=env_config) -monenv=VecMonitor(venv=vecenv,info_keywords=["social/productivity","trend/productivity"]) -normenv=VecNormalize(monenv,norm_reward=False,clip_obs=1) -stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10) -obs=stackenv.reset() +monenv=VecMonitor(venv=sb3Converter,info_keywords=["social/productivity","trend/productivity"]) + +#normenv=VecNormalize(sb3Converter,norm_reward=False,clip_obs=1) +#stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10) +obs=monenv.reset() @@ -224,8 +225,8 @@ obs=stackenv.reset() runname="run_{}".format(int(np.random.rand()*100)) model = PPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log") - -total_required_for_episode=env_config['n_agents']*env_config['episode_length'] +n_agents=econ.n_agents +total_required_for_episode=n_agents*env_config['episode_length'] print("this is run {}".format(runname)) while True: # Create Eval ENV @@ -237,7 +238,7 @@ while True: #Train model=model.learn(total_timesteps=total_required_for_episode*50,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ)) - normenv.save("temp-normalizer.ai") + #normenv.save("temp-normalizer.ai") diff --git a/wrapper/base_econ_wrapper.py b/wrapper/base_econ_wrapper.py index 0621f60..851fea1 100644 --- a/wrapper/base_econ_wrapper.py +++ b/wrapper/base_econ_wrapper.py @@ -88,10 +88,13 @@ class BaseEconWrapper(): #check for actions self.action_edit_lock.acquire() - if self.env.n_agents==len(self.actor_actions.keys) & self.step_notification.is_set()==False: + enough_votes_for_step=self.env.n_agents==len(self.actor_actions.keys()) + currently_not_stepping=self.step_notification.is_set()==False + self.action_edit_lock.release() + if enough_votes_for_step & currently_not_stepping: # we have all the actions -> STEP self._step() - self.action_edit_lock.release() # release actions + # release actions # we are done def stop_env(self): @@ -137,8 +140,8 @@ class BaseEconWrapper(): """Submits actions to base processing queue. Actions as dict pairing of idx and action id""" self._prepare_step() # New actions are bening submitted. Prepare base for new step self.action_edit_lock.acquire() # Start to submit action dict - for k,v in actions: - if self.actor_actions[k]!=None: + for k,v in actions.items(): + if k in self.actor_actions.keys(): raise Exception("Actor action has already been submitted. {}".format(k)) self.actor_actions[k]=v self.base_notification.set() #Alert base for action changes diff --git a/wrapper/reciever_econ_wrapper.py b/wrapper/reciever_econ_wrapper.py index e604036..2f4c81f 100644 --- a/wrapper/reciever_econ_wrapper.py +++ b/wrapper/reciever_econ_wrapper.py @@ -52,7 +52,7 @@ class RecieverEconWrapper(gym.Env): obs,rew,done,info=self.base_econ.reciever_block_step() c_obs=self._dict_idx_to_index(obs) c_rew=self._dict_idx_to_index(rew) - c_done=self._dict_idx_to_index(done) + c_done=done c_info=self._dict_idx_to_index(info) return c_obs,c_rew,c_done,c_info diff --git a/wrapper/sb3_econ_converter.py b/wrapper/sb3_econ_converter.py index a7bb312..fd753f3 100644 --- a/wrapper/sb3_econ_converter.py +++ b/wrapper/sb3_econ_converter.py @@ -19,7 +19,8 @@ class SB3EconConverter(VecEnv, gym.Env): self.packager=utils.build_packager(obs[0]) #flatten obervation of first agent obs0=utils.package(obs[0],*self.packager) - self.observation_space=gym.spaces.Box(low=-np.inf,high=np.inf,shape=(len(obs0),1),dtype=np.float32) + obs0["flat"] + self.observation_space=gym.spaces.Box(low=-np.inf,high=np.inf,shape=(len(obs0["flat"]),),dtype=np.float32) super().__init__(self.num_envs, self.observation_space, self.action_space) @@ -43,16 +44,17 @@ class SB3EconConverter(VecEnv, gym.Env): c_obs=utils.convert_econ_to_gym(f_obs) c_rew=utils.convert_econ_to_gym(rew) - c_done=utils.convert_econ_to_gym(done) + c_done={} c_info=utils.convert_econ_to_gym(info) done_g=[False]*self.num_envs done=(done["__all__"]) if done: for i in range(self.num_envs): - c_done[i]=done + done_g[i]=done c_info[i]["terminal_observation"]=c_obs[i] c_obs=self.reset() - return c_obs,c_rew,c_done,c_info + return np.copy(c_obs),np.copy(c_rew),np.copy(done_g),np.copy(c_info) + def reset(self) -> VecEnvObs: obs=self.env.reset() f_obs={} @@ -62,7 +64,7 @@ class SB3EconConverter(VecEnv, gym.Env): for k,v in f_obs.items(): g_obs[k]=v["flat"] c_obs=utils.convert_econ_to_gym(g_obs) - return c_obs + return np.copy(c_obs) def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]: if seed is None: diff --git a/wrapper/utils.py b/wrapper/utils.py index 6eb63a1..d89e266 100644 --- a/wrapper/utils.py +++ b/wrapper/utils.py @@ -7,8 +7,8 @@ def convert_econ_to_gym(econ): def convert_gym_to_econ(gy): econ={} - for k,v in gy: - econ[k]=v + for k in range(len(gy)): + econ[k]=gy[k] return econ