lets create trading

This commit is contained in:
2023-01-14 11:54:14 +01:00
parent 058eea02b2
commit 8801ebaf58
4 changed files with 24 additions and 14 deletions

View File

@@ -9,3 +9,9 @@ class TradingAgent(BaseAgent):
""" """
name = "TradingAgent" name = "TradingAgent"
class TradingAgentLogic():
"""Logic for trading agent"""
def __init__(self,) -> None:
pass

View File

@@ -44,7 +44,7 @@ class SimpleCraft(BaseComponent):
payment=10, payment=10,
payment_max_skill_multiplier=1, payment_max_skill_multiplier=1,
skill_dist="none", skill_dist="none",
build_labor=10.0, build_labor=1.0,
**base_component_kwargs **base_component_kwargs
): ):
super().__init__(*base_component_args, **base_component_kwargs) super().__init__(*base_component_args, **base_component_kwargs)

18
main.py
View File

@@ -57,9 +57,9 @@ env_config = {
'allow_observation_scaling': True, 'allow_observation_scaling': True,
'dense_log_frequency': 100, 'dense_log_frequency': 100,
'world_dense_log_frequency':1, 'world_dense_log_frequency':1,
'energy_cost':0, 'energy_cost':0.21,
'energy_warmup_method': "auto", 'energy_warmup_method': "auto",
'energy_warmup_constant': 0, 'energy_warmup_constant': 4000,
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code). # In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
# Otherwise, the policy selects only 1 action. # Otherwise, the policy selects only 1 action.
@@ -105,13 +105,13 @@ eval_env_config = {
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment) # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
'agent_composition': {"BasicMobileAgent": 20}, # Number of non-planner agents (must be > 1) 'agent_composition': {"BasicMobileAgent": 20}, # Number of non-planner agents (must be > 1)
'world_size': [1, 1], # [Height, Width] of the env world 'world_size': [1, 1], # [Height, Width] of the env world
'episode_length': 100, # Number of timesteps per episode 'episode_length': 256, # Number of timesteps per episode
'allow_observation_scaling': True, 'allow_observation_scaling': True,
'dense_log_frequency': 10, 'dense_log_frequency': 10,
'world_dense_log_frequency':1, 'world_dense_log_frequency':1,
'energy_cost':0, 'energy_cost':0.21,
'energy_warmup_method': "auto", 'energy_warmup_method': "auto",
'energy_warmup_constant': 0, 'energy_warmup_constant': 4000,
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code). # In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
# Otherwise, the policy selects only 1 action. # Otherwise, the policy selects only 1 action.
@@ -228,7 +228,7 @@ obs=monenv.reset()
runname="run_{}".format(int(np.random.rand()*100)) runname="run_{}".format(int(np.random.rand()*100))
model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log") model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, seed=225,verbose=1,device="cuda",tensorboard_log="./log")
n_agents=econ.n_agents n_agents=econ.n_agents
total_required_for_episode=n_agents*env_config['episode_length'] total_required_for_episode=n_agents*env_config['episode_length']
print("this is run {}".format(runname)) print("this is run {}".format(runname))
@@ -242,7 +242,7 @@ while True:
# Create Eval ENV # Create Eval ENV
vec_mon_eval=VecMonitor(venv=eval_sb3_converter) vec_mon_eval=VecMonitor(venv=eval_sb3_converter)
#Train #Train
model=model.learn(total_timesteps=total_required_for_episode*20,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ)) model=model.learn(total_timesteps=total_required_for_episode*10,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
#normenv.save("temp-normalizer.ai") #normenv.save("temp-normalizer.ai")
@@ -253,7 +253,9 @@ while True:
obs=vec_mon_eval.reset() obs=vec_mon_eval.reset()
done=False done=False
for i in tqdm(range(eval_env_config['episode_length'])): for i in tqdm(range(eval_env_config['episode_length'])):
action=model.predict(obs) masks=eval_sb3_converter.action_masks()
action=model.predict(obs,action_masks=masks)
obs,rew,done_e,info=vec_mon_eval.step(action[0]) obs,rew,done_e,info=vec_mon_eval.step(action[0])
done=done_e[0] done=done_e[0]

View File

@@ -82,16 +82,18 @@ class SB3EconConverter(VecEnv, gym.Env):
def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]: def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
if seed is None: if seed is None:
seed = np.random.randint(0, 2**32 - 1) seed = np.random.randint(0, 2**32 - 1)
seeds = [] self.econ.seed(seed)
for idx, env in enumerate(self.envs): seeds=[seed]
seeds.append(env.seed(seed + idx))
return seeds return seeds
def action_masks(self): def action_masks(self):
"""Returns action masks for agents and current obs""" """Returns action masks for agents and current obs"""
masks=[] masks=[]
for obs in self.curr_obs: for obs in self.curr_obs:
masks.append(self.curr_obs[obs]["action_mask"]) mask=[]
for num in self.curr_obs[obs]["action_mask"]:
mask.append(num==1.0)
masks.append(mask)
return masks return masks
def close(self) -> None: def close(self) -> None: