i think i hav repaired for current stuff
This commit is contained in:
19
main.py
19
main.py
@@ -209,13 +209,14 @@ baseEconWrapper=BaseEconWrapper(econ)
|
|||||||
baseEconWrapper.run()
|
baseEconWrapper.run()
|
||||||
mobileRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="BasicMobileAgent")
|
mobileRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="BasicMobileAgent")
|
||||||
sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent")
|
sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent")
|
||||||
obs=sb3Converter.reset()
|
#obs=sb3Converter.reset()
|
||||||
vecenv=EconVecEnv(env_config=env_config)
|
#vecenv=EconVecEnv(env_config=env_config)
|
||||||
|
|
||||||
monenv=VecMonitor(venv=vecenv,info_keywords=["social/productivity","trend/productivity"])
|
monenv=VecMonitor(venv=sb3Converter,info_keywords=["social/productivity","trend/productivity"])
|
||||||
normenv=VecNormalize(monenv,norm_reward=False,clip_obs=1)
|
|
||||||
stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10)
|
#normenv=VecNormalize(sb3Converter,norm_reward=False,clip_obs=1)
|
||||||
obs=stackenv.reset()
|
#stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10)
|
||||||
|
obs=monenv.reset()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -224,8 +225,8 @@ obs=stackenv.reset()
|
|||||||
runname="run_{}".format(int(np.random.rand()*100))
|
runname="run_{}".format(int(np.random.rand()*100))
|
||||||
|
|
||||||
model = PPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
|
model = PPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
|
||||||
|
n_agents=econ.n_agents
|
||||||
total_required_for_episode=env_config['n_agents']*env_config['episode_length']
|
total_required_for_episode=n_agents*env_config['episode_length']
|
||||||
print("this is run {}".format(runname))
|
print("this is run {}".format(runname))
|
||||||
while True:
|
while True:
|
||||||
# Create Eval ENV
|
# Create Eval ENV
|
||||||
@@ -237,7 +238,7 @@ while True:
|
|||||||
|
|
||||||
#Train
|
#Train
|
||||||
model=model.learn(total_timesteps=total_required_for_episode*50,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
|
model=model.learn(total_timesteps=total_required_for_episode*50,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
|
||||||
normenv.save("temp-normalizer.ai")
|
#normenv.save("temp-normalizer.ai")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -88,10 +88,13 @@ class BaseEconWrapper():
|
|||||||
|
|
||||||
#check for actions
|
#check for actions
|
||||||
self.action_edit_lock.acquire()
|
self.action_edit_lock.acquire()
|
||||||
if self.env.n_agents==len(self.actor_actions.keys) & self.step_notification.is_set()==False:
|
enough_votes_for_step=self.env.n_agents==len(self.actor_actions.keys())
|
||||||
|
currently_not_stepping=self.step_notification.is_set()==False
|
||||||
|
self.action_edit_lock.release()
|
||||||
|
if enough_votes_for_step & currently_not_stepping:
|
||||||
# we have all the actions -> STEP
|
# we have all the actions -> STEP
|
||||||
self._step()
|
self._step()
|
||||||
self.action_edit_lock.release() # release actions
|
# release actions
|
||||||
# we are done
|
# we are done
|
||||||
|
|
||||||
def stop_env(self):
|
def stop_env(self):
|
||||||
@@ -137,8 +140,8 @@ class BaseEconWrapper():
|
|||||||
"""Submits actions to base processing queue. Actions as dict pairing of idx and action id"""
|
"""Submits actions to base processing queue. Actions as dict pairing of idx and action id"""
|
||||||
self._prepare_step() # New actions are bening submitted. Prepare base for new step
|
self._prepare_step() # New actions are bening submitted. Prepare base for new step
|
||||||
self.action_edit_lock.acquire() # Start to submit action dict
|
self.action_edit_lock.acquire() # Start to submit action dict
|
||||||
for k,v in actions:
|
for k,v in actions.items():
|
||||||
if self.actor_actions[k]!=None:
|
if k in self.actor_actions.keys():
|
||||||
raise Exception("Actor action has already been submitted. {}".format(k))
|
raise Exception("Actor action has already been submitted. {}".format(k))
|
||||||
self.actor_actions[k]=v
|
self.actor_actions[k]=v
|
||||||
self.base_notification.set() #Alert base for action changes
|
self.base_notification.set() #Alert base for action changes
|
||||||
|
|||||||
@@ -52,7 +52,7 @@ class RecieverEconWrapper(gym.Env):
|
|||||||
obs,rew,done,info=self.base_econ.reciever_block_step()
|
obs,rew,done,info=self.base_econ.reciever_block_step()
|
||||||
c_obs=self._dict_idx_to_index(obs)
|
c_obs=self._dict_idx_to_index(obs)
|
||||||
c_rew=self._dict_idx_to_index(rew)
|
c_rew=self._dict_idx_to_index(rew)
|
||||||
c_done=self._dict_idx_to_index(done)
|
c_done=done
|
||||||
c_info=self._dict_idx_to_index(info)
|
c_info=self._dict_idx_to_index(info)
|
||||||
return c_obs,c_rew,c_done,c_info
|
return c_obs,c_rew,c_done,c_info
|
||||||
|
|
||||||
|
|||||||
@@ -19,7 +19,8 @@ class SB3EconConverter(VecEnv, gym.Env):
|
|||||||
self.packager=utils.build_packager(obs[0])
|
self.packager=utils.build_packager(obs[0])
|
||||||
#flatten obervation of first agent
|
#flatten obervation of first agent
|
||||||
obs0=utils.package(obs[0],*self.packager)
|
obs0=utils.package(obs[0],*self.packager)
|
||||||
self.observation_space=gym.spaces.Box(low=-np.inf,high=np.inf,shape=(len(obs0),1),dtype=np.float32)
|
obs0["flat"]
|
||||||
|
self.observation_space=gym.spaces.Box(low=-np.inf,high=np.inf,shape=(len(obs0["flat"]),),dtype=np.float32)
|
||||||
super().__init__(self.num_envs, self.observation_space, self.action_space)
|
super().__init__(self.num_envs, self.observation_space, self.action_space)
|
||||||
|
|
||||||
|
|
||||||
@@ -43,16 +44,17 @@ class SB3EconConverter(VecEnv, gym.Env):
|
|||||||
|
|
||||||
c_obs=utils.convert_econ_to_gym(f_obs)
|
c_obs=utils.convert_econ_to_gym(f_obs)
|
||||||
c_rew=utils.convert_econ_to_gym(rew)
|
c_rew=utils.convert_econ_to_gym(rew)
|
||||||
c_done=utils.convert_econ_to_gym(done)
|
c_done={}
|
||||||
c_info=utils.convert_econ_to_gym(info)
|
c_info=utils.convert_econ_to_gym(info)
|
||||||
done_g=[False]*self.num_envs
|
done_g=[False]*self.num_envs
|
||||||
done=(done["__all__"])
|
done=(done["__all__"])
|
||||||
if done:
|
if done:
|
||||||
for i in range(self.num_envs):
|
for i in range(self.num_envs):
|
||||||
c_done[i]=done
|
done_g[i]=done
|
||||||
c_info[i]["terminal_observation"]=c_obs[i]
|
c_info[i]["terminal_observation"]=c_obs[i]
|
||||||
c_obs=self.reset()
|
c_obs=self.reset()
|
||||||
return c_obs,c_rew,c_done,c_info
|
return np.copy(c_obs),np.copy(c_rew),np.copy(done_g),np.copy(c_info)
|
||||||
|
|
||||||
def reset(self) -> VecEnvObs:
|
def reset(self) -> VecEnvObs:
|
||||||
obs=self.env.reset()
|
obs=self.env.reset()
|
||||||
f_obs={}
|
f_obs={}
|
||||||
@@ -62,7 +64,7 @@ class SB3EconConverter(VecEnv, gym.Env):
|
|||||||
for k,v in f_obs.items():
|
for k,v in f_obs.items():
|
||||||
g_obs[k]=v["flat"]
|
g_obs[k]=v["flat"]
|
||||||
c_obs=utils.convert_econ_to_gym(g_obs)
|
c_obs=utils.convert_econ_to_gym(g_obs)
|
||||||
return c_obs
|
return np.copy(c_obs)
|
||||||
|
|
||||||
def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
|
def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
|
||||||
if seed is None:
|
if seed is None:
|
||||||
|
|||||||
@@ -7,8 +7,8 @@ def convert_econ_to_gym(econ):
|
|||||||
|
|
||||||
def convert_gym_to_econ(gy):
|
def convert_gym_to_econ(gy):
|
||||||
econ={}
|
econ={}
|
||||||
for k,v in gy:
|
for k in range(len(gy)):
|
||||||
econ[k]=v
|
econ[k]=gy[k]
|
||||||
return econ
|
return econ
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user