lets create trading

This commit is contained in:
2023-01-14 11:54:14 +01:00
parent 058eea02b2
commit 8801ebaf58
4 changed files with 24 additions and 14 deletions

View File

@@ -9,3 +9,9 @@ class TradingAgent(BaseAgent):
"""
name = "TradingAgent"
class TradingAgentLogic():
"""Logic for trading agent"""
def __init__(self,) -> None:
pass

View File

@@ -44,7 +44,7 @@ class SimpleCraft(BaseComponent):
payment=10,
payment_max_skill_multiplier=1,
skill_dist="none",
build_labor=10.0,
build_labor=1.0,
**base_component_kwargs
):
super().__init__(*base_component_args, **base_component_kwargs)

18
main.py
View File

@@ -57,9 +57,9 @@ env_config = {
'allow_observation_scaling': True,
'dense_log_frequency': 100,
'world_dense_log_frequency':1,
'energy_cost':0,
'energy_cost':0.21,
'energy_warmup_method': "auto",
'energy_warmup_constant': 0,
'energy_warmup_constant': 4000,
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
# Otherwise, the policy selects only 1 action.
@@ -105,13 +105,13 @@ eval_env_config = {
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
'agent_composition': {"BasicMobileAgent": 20}, # Number of non-planner agents (must be > 1)
'world_size': [1, 1], # [Height, Width] of the env world
'episode_length': 100, # Number of timesteps per episode
'episode_length': 256, # Number of timesteps per episode
'allow_observation_scaling': True,
'dense_log_frequency': 10,
'world_dense_log_frequency':1,
'energy_cost':0,
'energy_cost':0.21,
'energy_warmup_method': "auto",
'energy_warmup_constant': 0,
'energy_warmup_constant': 4000,
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
# Otherwise, the policy selects only 1 action.
@@ -228,7 +228,7 @@ obs=monenv.reset()
runname="run_{}".format(int(np.random.rand()*100))
model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, seed=225,verbose=1,device="cuda",tensorboard_log="./log")
n_agents=econ.n_agents
total_required_for_episode=n_agents*env_config['episode_length']
print("this is run {}".format(runname))
@@ -242,7 +242,7 @@ while True:
# Create Eval ENV
vec_mon_eval=VecMonitor(venv=eval_sb3_converter)
#Train
model=model.learn(total_timesteps=total_required_for_episode*20,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
model=model.learn(total_timesteps=total_required_for_episode*10,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
#normenv.save("temp-normalizer.ai")
@@ -253,7 +253,9 @@ while True:
obs=vec_mon_eval.reset()
done=False
for i in tqdm(range(eval_env_config['episode_length'])):
action=model.predict(obs)
masks=eval_sb3_converter.action_masks()
action=model.predict(obs,action_masks=masks)
obs,rew,done_e,info=vec_mon_eval.step(action[0])
done=done_e[0]

View File

@@ -82,16 +82,18 @@ class SB3EconConverter(VecEnv, gym.Env):
def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
if seed is None:
seed = np.random.randint(0, 2**32 - 1)
seeds = []
for idx, env in enumerate(self.envs):
seeds.append(env.seed(seed + idx))
self.econ.seed(seed)
seeds=[seed]
return seeds
def action_masks(self):
"""Returns action masks for agents and current obs"""
masks=[]
for obs in self.curr_obs:
masks.append(self.curr_obs[obs]["action_mask"])
mask=[]
for num in self.curr_obs[obs]["action_mask"]:
mask.append(num==1.0)
masks.append(mask)
return masks
def close(self) -> None: