i think i hav repaired for current stuff

This commit is contained in:
2023-01-13 09:21:23 +01:00
parent ee444cb56c
commit 200bb5da79
5 changed files with 27 additions and 21 deletions

19
main.py
View File

@@ -209,13 +209,14 @@ baseEconWrapper=BaseEconWrapper(econ)
baseEconWrapper.run() baseEconWrapper.run()
mobileRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="BasicMobileAgent") mobileRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="BasicMobileAgent")
sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent") sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent")
obs=sb3Converter.reset() #obs=sb3Converter.reset()
vecenv=EconVecEnv(env_config=env_config) #vecenv=EconVecEnv(env_config=env_config)
monenv=VecMonitor(venv=vecenv,info_keywords=["social/productivity","trend/productivity"]) monenv=VecMonitor(venv=sb3Converter,info_keywords=["social/productivity","trend/productivity"])
normenv=VecNormalize(monenv,norm_reward=False,clip_obs=1)
stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10) #normenv=VecNormalize(sb3Converter,norm_reward=False,clip_obs=1)
obs=stackenv.reset() #stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10)
obs=monenv.reset()
@@ -224,8 +225,8 @@ obs=stackenv.reset()
runname="run_{}".format(int(np.random.rand()*100)) runname="run_{}".format(int(np.random.rand()*100))
model = PPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log") model = PPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
n_agents=econ.n_agents
total_required_for_episode=env_config['n_agents']*env_config['episode_length'] total_required_for_episode=n_agents*env_config['episode_length']
print("this is run {}".format(runname)) print("this is run {}".format(runname))
while True: while True:
# Create Eval ENV # Create Eval ENV
@@ -237,7 +238,7 @@ while True:
#Train #Train
model=model.learn(total_timesteps=total_required_for_episode*50,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ)) model=model.learn(total_timesteps=total_required_for_episode*50,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
normenv.save("temp-normalizer.ai") #normenv.save("temp-normalizer.ai")

View File

@@ -88,10 +88,13 @@ class BaseEconWrapper():
#check for actions #check for actions
self.action_edit_lock.acquire() self.action_edit_lock.acquire()
if self.env.n_agents==len(self.actor_actions.keys) & self.step_notification.is_set()==False: enough_votes_for_step=self.env.n_agents==len(self.actor_actions.keys())
currently_not_stepping=self.step_notification.is_set()==False
self.action_edit_lock.release()
if enough_votes_for_step & currently_not_stepping:
# we have all the actions -> STEP # we have all the actions -> STEP
self._step() self._step()
self.action_edit_lock.release() # release actions # release actions
# we are done # we are done
def stop_env(self): def stop_env(self):
@@ -137,8 +140,8 @@ class BaseEconWrapper():
"""Submits actions to base processing queue. Actions as dict pairing of idx and action id""" """Submits actions to base processing queue. Actions as dict pairing of idx and action id"""
self._prepare_step() # New actions are bening submitted. Prepare base for new step self._prepare_step() # New actions are bening submitted. Prepare base for new step
self.action_edit_lock.acquire() # Start to submit action dict self.action_edit_lock.acquire() # Start to submit action dict
for k,v in actions: for k,v in actions.items():
if self.actor_actions[k]!=None: if k in self.actor_actions.keys():
raise Exception("Actor action has already been submitted. {}".format(k)) raise Exception("Actor action has already been submitted. {}".format(k))
self.actor_actions[k]=v self.actor_actions[k]=v
self.base_notification.set() #Alert base for action changes self.base_notification.set() #Alert base for action changes

View File

@@ -52,7 +52,7 @@ class RecieverEconWrapper(gym.Env):
obs,rew,done,info=self.base_econ.reciever_block_step() obs,rew,done,info=self.base_econ.reciever_block_step()
c_obs=self._dict_idx_to_index(obs) c_obs=self._dict_idx_to_index(obs)
c_rew=self._dict_idx_to_index(rew) c_rew=self._dict_idx_to_index(rew)
c_done=self._dict_idx_to_index(done) c_done=done
c_info=self._dict_idx_to_index(info) c_info=self._dict_idx_to_index(info)
return c_obs,c_rew,c_done,c_info return c_obs,c_rew,c_done,c_info

View File

@@ -19,7 +19,8 @@ class SB3EconConverter(VecEnv, gym.Env):
self.packager=utils.build_packager(obs[0]) self.packager=utils.build_packager(obs[0])
#flatten obervation of first agent #flatten obervation of first agent
obs0=utils.package(obs[0],*self.packager) obs0=utils.package(obs[0],*self.packager)
self.observation_space=gym.spaces.Box(low=-np.inf,high=np.inf,shape=(len(obs0),1),dtype=np.float32) obs0["flat"]
self.observation_space=gym.spaces.Box(low=-np.inf,high=np.inf,shape=(len(obs0["flat"]),),dtype=np.float32)
super().__init__(self.num_envs, self.observation_space, self.action_space) super().__init__(self.num_envs, self.observation_space, self.action_space)
@@ -43,16 +44,17 @@ class SB3EconConverter(VecEnv, gym.Env):
c_obs=utils.convert_econ_to_gym(f_obs) c_obs=utils.convert_econ_to_gym(f_obs)
c_rew=utils.convert_econ_to_gym(rew) c_rew=utils.convert_econ_to_gym(rew)
c_done=utils.convert_econ_to_gym(done) c_done={}
c_info=utils.convert_econ_to_gym(info) c_info=utils.convert_econ_to_gym(info)
done_g=[False]*self.num_envs done_g=[False]*self.num_envs
done=(done["__all__"]) done=(done["__all__"])
if done: if done:
for i in range(self.num_envs): for i in range(self.num_envs):
c_done[i]=done done_g[i]=done
c_info[i]["terminal_observation"]=c_obs[i] c_info[i]["terminal_observation"]=c_obs[i]
c_obs=self.reset() c_obs=self.reset()
return c_obs,c_rew,c_done,c_info return np.copy(c_obs),np.copy(c_rew),np.copy(done_g),np.copy(c_info)
def reset(self) -> VecEnvObs: def reset(self) -> VecEnvObs:
obs=self.env.reset() obs=self.env.reset()
f_obs={} f_obs={}
@@ -62,7 +64,7 @@ class SB3EconConverter(VecEnv, gym.Env):
for k,v in f_obs.items(): for k,v in f_obs.items():
g_obs[k]=v["flat"] g_obs[k]=v["flat"]
c_obs=utils.convert_econ_to_gym(g_obs) c_obs=utils.convert_econ_to_gym(g_obs)
return c_obs return np.copy(c_obs)
def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]: def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
if seed is None: if seed is None:

View File

@@ -7,8 +7,8 @@ def convert_econ_to_gym(econ):
def convert_gym_to_econ(gy): def convert_gym_to_econ(gy):
econ={} econ={}
for k,v in gy: for k in range(len(gy)):
econ[k]=v econ[k]=gy[k]
return econ return econ