lets create trading
This commit is contained in:
@@ -9,3 +9,9 @@ class TradingAgent(BaseAgent):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
name = "TradingAgent"
|
name = "TradingAgent"
|
||||||
|
|
||||||
|
|
||||||
|
class TradingAgentLogic():
|
||||||
|
"""Logic for trading agent"""
|
||||||
|
def __init__(self,) -> None:
|
||||||
|
pass
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ class SimpleCraft(BaseComponent):
|
|||||||
payment=10,
|
payment=10,
|
||||||
payment_max_skill_multiplier=1,
|
payment_max_skill_multiplier=1,
|
||||||
skill_dist="none",
|
skill_dist="none",
|
||||||
build_labor=10.0,
|
build_labor=1.0,
|
||||||
**base_component_kwargs
|
**base_component_kwargs
|
||||||
):
|
):
|
||||||
super().__init__(*base_component_args, **base_component_kwargs)
|
super().__init__(*base_component_args, **base_component_kwargs)
|
||||||
|
|||||||
18
main.py
18
main.py
@@ -57,9 +57,9 @@ env_config = {
|
|||||||
'allow_observation_scaling': True,
|
'allow_observation_scaling': True,
|
||||||
'dense_log_frequency': 100,
|
'dense_log_frequency': 100,
|
||||||
'world_dense_log_frequency':1,
|
'world_dense_log_frequency':1,
|
||||||
'energy_cost':0,
|
'energy_cost':0.21,
|
||||||
'energy_warmup_method': "auto",
|
'energy_warmup_method': "auto",
|
||||||
'energy_warmup_constant': 0,
|
'energy_warmup_constant': 4000,
|
||||||
|
|
||||||
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
|
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
|
||||||
# Otherwise, the policy selects only 1 action.
|
# Otherwise, the policy selects only 1 action.
|
||||||
@@ -105,13 +105,13 @@ eval_env_config = {
|
|||||||
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
|
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
|
||||||
'agent_composition': {"BasicMobileAgent": 20}, # Number of non-planner agents (must be > 1)
|
'agent_composition': {"BasicMobileAgent": 20}, # Number of non-planner agents (must be > 1)
|
||||||
'world_size': [1, 1], # [Height, Width] of the env world
|
'world_size': [1, 1], # [Height, Width] of the env world
|
||||||
'episode_length': 100, # Number of timesteps per episode
|
'episode_length': 256, # Number of timesteps per episode
|
||||||
'allow_observation_scaling': True,
|
'allow_observation_scaling': True,
|
||||||
'dense_log_frequency': 10,
|
'dense_log_frequency': 10,
|
||||||
'world_dense_log_frequency':1,
|
'world_dense_log_frequency':1,
|
||||||
'energy_cost':0,
|
'energy_cost':0.21,
|
||||||
'energy_warmup_method': "auto",
|
'energy_warmup_method': "auto",
|
||||||
'energy_warmup_constant': 0,
|
'energy_warmup_constant': 4000,
|
||||||
|
|
||||||
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
|
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
|
||||||
# Otherwise, the policy selects only 1 action.
|
# Otherwise, the policy selects only 1 action.
|
||||||
@@ -228,7 +228,7 @@ obs=monenv.reset()
|
|||||||
|
|
||||||
runname="run_{}".format(int(np.random.rand()*100))
|
runname="run_{}".format(int(np.random.rand()*100))
|
||||||
|
|
||||||
model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
|
model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, seed=225,verbose=1,device="cuda",tensorboard_log="./log")
|
||||||
n_agents=econ.n_agents
|
n_agents=econ.n_agents
|
||||||
total_required_for_episode=n_agents*env_config['episode_length']
|
total_required_for_episode=n_agents*env_config['episode_length']
|
||||||
print("this is run {}".format(runname))
|
print("this is run {}".format(runname))
|
||||||
@@ -242,7 +242,7 @@ while True:
|
|||||||
# Create Eval ENV
|
# Create Eval ENV
|
||||||
vec_mon_eval=VecMonitor(venv=eval_sb3_converter)
|
vec_mon_eval=VecMonitor(venv=eval_sb3_converter)
|
||||||
#Train
|
#Train
|
||||||
model=model.learn(total_timesteps=total_required_for_episode*20,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
|
model=model.learn(total_timesteps=total_required_for_episode*10,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
|
||||||
#normenv.save("temp-normalizer.ai")
|
#normenv.save("temp-normalizer.ai")
|
||||||
|
|
||||||
|
|
||||||
@@ -253,7 +253,9 @@ while True:
|
|||||||
obs=vec_mon_eval.reset()
|
obs=vec_mon_eval.reset()
|
||||||
done=False
|
done=False
|
||||||
for i in tqdm(range(eval_env_config['episode_length'])):
|
for i in tqdm(range(eval_env_config['episode_length'])):
|
||||||
action=model.predict(obs)
|
masks=eval_sb3_converter.action_masks()
|
||||||
|
action=model.predict(obs,action_masks=masks)
|
||||||
|
|
||||||
obs,rew,done_e,info=vec_mon_eval.step(action[0])
|
obs,rew,done_e,info=vec_mon_eval.step(action[0])
|
||||||
done=done_e[0]
|
done=done_e[0]
|
||||||
|
|
||||||
|
|||||||
@@ -82,16 +82,18 @@ class SB3EconConverter(VecEnv, gym.Env):
|
|||||||
def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
|
def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
|
||||||
if seed is None:
|
if seed is None:
|
||||||
seed = np.random.randint(0, 2**32 - 1)
|
seed = np.random.randint(0, 2**32 - 1)
|
||||||
seeds = []
|
self.econ.seed(seed)
|
||||||
for idx, env in enumerate(self.envs):
|
seeds=[seed]
|
||||||
seeds.append(env.seed(seed + idx))
|
|
||||||
return seeds
|
return seeds
|
||||||
|
|
||||||
def action_masks(self):
|
def action_masks(self):
|
||||||
"""Returns action masks for agents and current obs"""
|
"""Returns action masks for agents and current obs"""
|
||||||
masks=[]
|
masks=[]
|
||||||
for obs in self.curr_obs:
|
for obs in self.curr_obs:
|
||||||
masks.append(self.curr_obs[obs]["action_mask"])
|
mask=[]
|
||||||
|
for num in self.curr_obs[obs]["action_mask"]:
|
||||||
|
mask.append(num==1.0)
|
||||||
|
masks.append(mask)
|
||||||
return masks
|
return masks
|
||||||
|
|
||||||
def close(self) -> None:
|
def close(self) -> None:
|
||||||
|
|||||||
Reference in New Issue
Block a user