lets create trading
This commit is contained in:
@@ -9,3 +9,9 @@ class TradingAgent(BaseAgent):
|
||||
"""
|
||||
|
||||
name = "TradingAgent"
|
||||
|
||||
|
||||
class TradingAgentLogic():
|
||||
"""Logic for trading agent"""
|
||||
def __init__(self,) -> None:
|
||||
pass
|
||||
|
||||
@@ -44,7 +44,7 @@ class SimpleCraft(BaseComponent):
|
||||
payment=10,
|
||||
payment_max_skill_multiplier=1,
|
||||
skill_dist="none",
|
||||
build_labor=10.0,
|
||||
build_labor=1.0,
|
||||
**base_component_kwargs
|
||||
):
|
||||
super().__init__(*base_component_args, **base_component_kwargs)
|
||||
|
||||
18
main.py
18
main.py
@@ -57,9 +57,9 @@ env_config = {
|
||||
'allow_observation_scaling': True,
|
||||
'dense_log_frequency': 100,
|
||||
'world_dense_log_frequency':1,
|
||||
'energy_cost':0,
|
||||
'energy_cost':0.21,
|
||||
'energy_warmup_method': "auto",
|
||||
'energy_warmup_constant': 0,
|
||||
'energy_warmup_constant': 4000,
|
||||
|
||||
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
|
||||
# Otherwise, the policy selects only 1 action.
|
||||
@@ -105,13 +105,13 @@ eval_env_config = {
|
||||
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
|
||||
'agent_composition': {"BasicMobileAgent": 20}, # Number of non-planner agents (must be > 1)
|
||||
'world_size': [1, 1], # [Height, Width] of the env world
|
||||
'episode_length': 100, # Number of timesteps per episode
|
||||
'episode_length': 256, # Number of timesteps per episode
|
||||
'allow_observation_scaling': True,
|
||||
'dense_log_frequency': 10,
|
||||
'world_dense_log_frequency':1,
|
||||
'energy_cost':0,
|
||||
'energy_cost':0.21,
|
||||
'energy_warmup_method': "auto",
|
||||
'energy_warmup_constant': 0,
|
||||
'energy_warmup_constant': 4000,
|
||||
|
||||
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
|
||||
# Otherwise, the policy selects only 1 action.
|
||||
@@ -228,7 +228,7 @@ obs=monenv.reset()
|
||||
|
||||
runname="run_{}".format(int(np.random.rand()*100))
|
||||
|
||||
model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, verbose=1,device="cuda",tensorboard_log="./log")
|
||||
model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.8 ,gamma=0.95, learning_rate=5e-3,env=monenv, seed=225,verbose=1,device="cuda",tensorboard_log="./log")
|
||||
n_agents=econ.n_agents
|
||||
total_required_for_episode=n_agents*env_config['episode_length']
|
||||
print("this is run {}".format(runname))
|
||||
@@ -242,7 +242,7 @@ while True:
|
||||
# Create Eval ENV
|
||||
vec_mon_eval=VecMonitor(venv=eval_sb3_converter)
|
||||
#Train
|
||||
model=model.learn(total_timesteps=total_required_for_episode*20,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
|
||||
model=model.learn(total_timesteps=total_required_for_episode*10,progress_bar=True,reset_num_timesteps=False,tb_log_name=runname,callback=TensorboardCallback(econ=econ))
|
||||
#normenv.save("temp-normalizer.ai")
|
||||
|
||||
|
||||
@@ -253,7 +253,9 @@ while True:
|
||||
obs=vec_mon_eval.reset()
|
||||
done=False
|
||||
for i in tqdm(range(eval_env_config['episode_length'])):
|
||||
action=model.predict(obs)
|
||||
masks=eval_sb3_converter.action_masks()
|
||||
action=model.predict(obs,action_masks=masks)
|
||||
|
||||
obs,rew,done_e,info=vec_mon_eval.step(action[0])
|
||||
done=done_e[0]
|
||||
|
||||
|
||||
@@ -82,16 +82,18 @@ class SB3EconConverter(VecEnv, gym.Env):
|
||||
def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
|
||||
if seed is None:
|
||||
seed = np.random.randint(0, 2**32 - 1)
|
||||
seeds = []
|
||||
for idx, env in enumerate(self.envs):
|
||||
seeds.append(env.seed(seed + idx))
|
||||
self.econ.seed(seed)
|
||||
seeds=[seed]
|
||||
return seeds
|
||||
|
||||
def action_masks(self):
|
||||
"""Returns action masks for agents and current obs"""
|
||||
masks=[]
|
||||
for obs in self.curr_obs:
|
||||
masks.append(self.curr_obs[obs]["action_mask"])
|
||||
mask=[]
|
||||
for num in self.curr_obs[obs]["action_mask"]:
|
||||
mask.append(num==1.0)
|
||||
masks.append(mask)
|
||||
return masks
|
||||
|
||||
def close(self) -> None:
|
||||
|
||||
Reference in New Issue
Block a user