i think i hav repaired for current stuff
This commit is contained in:
19
main.py
19
main.py
@@ -209,13 +209,14 @@ baseEconWrapper=BaseEconWrapper(econ)
|
||||
baseEconWrapper.run()
|
||||
mobileRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="BasicMobileAgent")
|
||||
sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent")
|
||||
obs=sb3Converter.reset()
|
||||
vecenv=EconVecEnv(env_config=env_config)
|
||||
#obs=sb3Converter.reset()
|
||||
#vecenv=EconVecEnv(env_config=env_config)
|
||||
|
||||
monenv=VecMonitor(venv=vecenv,info_keywords=["social/productivity","trend/productivity"])
|
||||
normenv=VecNormalize(monenv,norm_reward=False,clip_obs=1)
|
||||
stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10)
|
||||
obs=stackenv.reset()
|
||||
monenv=VecMonitor(venv=sb3Converter,info_keywords=["social/productivity","trend/productivity"])
|
||||
|
||||
#normenv=VecNormalize(sb3Converter,norm_reward=False,clip_obs=1)
|
||||
#stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10)
|
||||
obs=monenv.reset()
|
||||
|
||||
|
||||
|
||||
@@ -224,8 +225,8 @@ obs=stackenv.reset()
|
||||
runname="run_{}".format(int(np.random.rand()*100))
|
||||
|
||||
model = PPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
|
||||
|
||||
total_required_for_episode=env_config['n_agents']*env_config['episode_length']
|
||||
n_agents=econ.n_agents
|
||||
total_required_for_episode=n_agents*env_config['episode_length']
|
||||
print("this is run {}".format(runname))
|
||||
while True:
|
||||
# Create Eval ENV
|
||||
@@ -237,7 +238,7 @@ while True:
|
||||
|
||||
#Train
|
||||
model=model.learn(total_timesteps=total_required_for_episode*50,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
|
||||
normenv.save("temp-normalizer.ai")
|
||||
#normenv.save("temp-normalizer.ai")
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -88,10 +88,13 @@ class BaseEconWrapper():
|
||||
|
||||
#check for actions
|
||||
self.action_edit_lock.acquire()
|
||||
if self.env.n_agents==len(self.actor_actions.keys) & self.step_notification.is_set()==False:
|
||||
enough_votes_for_step=self.env.n_agents==len(self.actor_actions.keys())
|
||||
currently_not_stepping=self.step_notification.is_set()==False
|
||||
self.action_edit_lock.release()
|
||||
if enough_votes_for_step & currently_not_stepping:
|
||||
# we have all the actions -> STEP
|
||||
self._step()
|
||||
self.action_edit_lock.release() # release actions
|
||||
# release actions
|
||||
# we are done
|
||||
|
||||
def stop_env(self):
|
||||
@@ -137,8 +140,8 @@ class BaseEconWrapper():
|
||||
"""Submits actions to base processing queue. Actions as dict pairing of idx and action id"""
|
||||
self._prepare_step() # New actions are bening submitted. Prepare base for new step
|
||||
self.action_edit_lock.acquire() # Start to submit action dict
|
||||
for k,v in actions:
|
||||
if self.actor_actions[k]!=None:
|
||||
for k,v in actions.items():
|
||||
if k in self.actor_actions.keys():
|
||||
raise Exception("Actor action has already been submitted. {}".format(k))
|
||||
self.actor_actions[k]=v
|
||||
self.base_notification.set() #Alert base for action changes
|
||||
|
||||
@@ -52,7 +52,7 @@ class RecieverEconWrapper(gym.Env):
|
||||
obs,rew,done,info=self.base_econ.reciever_block_step()
|
||||
c_obs=self._dict_idx_to_index(obs)
|
||||
c_rew=self._dict_idx_to_index(rew)
|
||||
c_done=self._dict_idx_to_index(done)
|
||||
c_done=done
|
||||
c_info=self._dict_idx_to_index(info)
|
||||
return c_obs,c_rew,c_done,c_info
|
||||
|
||||
|
||||
@@ -19,7 +19,8 @@ class SB3EconConverter(VecEnv, gym.Env):
|
||||
self.packager=utils.build_packager(obs[0])
|
||||
#flatten obervation of first agent
|
||||
obs0=utils.package(obs[0],*self.packager)
|
||||
self.observation_space=gym.spaces.Box(low=-np.inf,high=np.inf,shape=(len(obs0),1),dtype=np.float32)
|
||||
obs0["flat"]
|
||||
self.observation_space=gym.spaces.Box(low=-np.inf,high=np.inf,shape=(len(obs0["flat"]),),dtype=np.float32)
|
||||
super().__init__(self.num_envs, self.observation_space, self.action_space)
|
||||
|
||||
|
||||
@@ -43,16 +44,17 @@ class SB3EconConverter(VecEnv, gym.Env):
|
||||
|
||||
c_obs=utils.convert_econ_to_gym(f_obs)
|
||||
c_rew=utils.convert_econ_to_gym(rew)
|
||||
c_done=utils.convert_econ_to_gym(done)
|
||||
c_done={}
|
||||
c_info=utils.convert_econ_to_gym(info)
|
||||
done_g=[False]*self.num_envs
|
||||
done=(done["__all__"])
|
||||
if done:
|
||||
for i in range(self.num_envs):
|
||||
c_done[i]=done
|
||||
done_g[i]=done
|
||||
c_info[i]["terminal_observation"]=c_obs[i]
|
||||
c_obs=self.reset()
|
||||
return c_obs,c_rew,c_done,c_info
|
||||
return np.copy(c_obs),np.copy(c_rew),np.copy(done_g),np.copy(c_info)
|
||||
|
||||
def reset(self) -> VecEnvObs:
|
||||
obs=self.env.reset()
|
||||
f_obs={}
|
||||
@@ -62,7 +64,7 @@ class SB3EconConverter(VecEnv, gym.Env):
|
||||
for k,v in f_obs.items():
|
||||
g_obs[k]=v["flat"]
|
||||
c_obs=utils.convert_econ_to_gym(g_obs)
|
||||
return c_obs
|
||||
return np.copy(c_obs)
|
||||
|
||||
def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
|
||||
if seed is None:
|
||||
|
||||
@@ -7,8 +7,8 @@ def convert_econ_to_gym(econ):
|
||||
|
||||
def convert_gym_to_econ(gy):
|
||||
econ={}
|
||||
for k,v in gy:
|
||||
econ[k]=v
|
||||
for k in range(len(gy)):
|
||||
econ[k]=gy[k]
|
||||
return econ
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user