i think i hav repaired for current stuff

This commit is contained in:
2023-01-13 09:21:23 +01:00
parent ee444cb56c
commit 200bb5da79
5 changed files with 27 additions and 21 deletions

19
main.py
View File

@@ -209,13 +209,14 @@ baseEconWrapper=BaseEconWrapper(econ)
baseEconWrapper.run()
mobileRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="BasicMobileAgent")
sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent")
obs=sb3Converter.reset()
vecenv=EconVecEnv(env_config=env_config)
#obs=sb3Converter.reset()
#vecenv=EconVecEnv(env_config=env_config)
monenv=VecMonitor(venv=vecenv,info_keywords=["social/productivity","trend/productivity"])
normenv=VecNormalize(monenv,norm_reward=False,clip_obs=1)
stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10)
obs=stackenv.reset()
monenv=VecMonitor(venv=sb3Converter,info_keywords=["social/productivity","trend/productivity"])
#normenv=VecNormalize(sb3Converter,norm_reward=False,clip_obs=1)
#stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10)
obs=monenv.reset()
@@ -224,8 +225,8 @@ obs=stackenv.reset()
runname="run_{}".format(int(np.random.rand()*100))
model = PPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
total_required_for_episode=env_config['n_agents']*env_config['episode_length']
n_agents=econ.n_agents
total_required_for_episode=n_agents*env_config['episode_length']
print("this is run {}".format(runname))
while True:
# Create Eval ENV
@@ -237,7 +238,7 @@ while True:
#Train
model=model.learn(total_timesteps=total_required_for_episode*50,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
normenv.save("temp-normalizer.ai")
#normenv.save("temp-normalizer.ai")

View File

@@ -88,10 +88,13 @@ class BaseEconWrapper():
#check for actions
self.action_edit_lock.acquire()
if self.env.n_agents==len(self.actor_actions.keys) & self.step_notification.is_set()==False:
enough_votes_for_step=self.env.n_agents==len(self.actor_actions.keys())
currently_not_stepping=self.step_notification.is_set()==False
self.action_edit_lock.release()
if enough_votes_for_step & currently_not_stepping:
# we have all the actions -> STEP
self._step()
self.action_edit_lock.release() # release actions
# release actions
# we are done
def stop_env(self):
@@ -137,8 +140,8 @@ class BaseEconWrapper():
"""Submits actions to base processing queue. Actions as dict pairing of idx and action id"""
self._prepare_step() # New actions are bening submitted. Prepare base for new step
self.action_edit_lock.acquire() # Start to submit action dict
for k,v in actions:
if self.actor_actions[k]!=None:
for k,v in actions.items():
if k in self.actor_actions.keys():
raise Exception("Actor action has already been submitted. {}".format(k))
self.actor_actions[k]=v
self.base_notification.set() #Alert base for action changes

View File

@@ -52,7 +52,7 @@ class RecieverEconWrapper(gym.Env):
obs,rew,done,info=self.base_econ.reciever_block_step()
c_obs=self._dict_idx_to_index(obs)
c_rew=self._dict_idx_to_index(rew)
c_done=self._dict_idx_to_index(done)
c_done=done
c_info=self._dict_idx_to_index(info)
return c_obs,c_rew,c_done,c_info

View File

@@ -19,7 +19,8 @@ class SB3EconConverter(VecEnv, gym.Env):
self.packager=utils.build_packager(obs[0])
#flatten obervation of first agent
obs0=utils.package(obs[0],*self.packager)
self.observation_space=gym.spaces.Box(low=-np.inf,high=np.inf,shape=(len(obs0),1),dtype=np.float32)
obs0["flat"]
self.observation_space=gym.spaces.Box(low=-np.inf,high=np.inf,shape=(len(obs0["flat"]),),dtype=np.float32)
super().__init__(self.num_envs, self.observation_space, self.action_space)
@@ -43,16 +44,17 @@ class SB3EconConverter(VecEnv, gym.Env):
c_obs=utils.convert_econ_to_gym(f_obs)
c_rew=utils.convert_econ_to_gym(rew)
c_done=utils.convert_econ_to_gym(done)
c_done={}
c_info=utils.convert_econ_to_gym(info)
done_g=[False]*self.num_envs
done=(done["__all__"])
if done:
for i in range(self.num_envs):
c_done[i]=done
done_g[i]=done
c_info[i]["terminal_observation"]=c_obs[i]
c_obs=self.reset()
return c_obs,c_rew,c_done,c_info
return np.copy(c_obs),np.copy(c_rew),np.copy(done_g),np.copy(c_info)
def reset(self) -> VecEnvObs:
obs=self.env.reset()
f_obs={}
@@ -62,7 +64,7 @@ class SB3EconConverter(VecEnv, gym.Env):
for k,v in f_obs.items():
g_obs[k]=v["flat"]
c_obs=utils.convert_econ_to_gym(g_obs)
return c_obs
return np.copy(c_obs)
def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
if seed is None:

View File

@@ -7,8 +7,8 @@ def convert_econ_to_gym(econ):
def convert_gym_to_econ(gy):
econ={}
for k,v in gy:
econ[k]=v
for k in range(len(gy)):
econ[k]=gy[k]
return econ