its training good
This commit is contained in:
341
main working good econ trader univer.pys
Normal file
341
main working good econ trader univer.pys
Normal file
@@ -0,0 +1,341 @@
|
|||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from ai_economist import foundation
|
||||||
|
from stable_baselines3.common.vec_env import vec_frame_stack
|
||||||
|
from stable_baselines3.common.evaluation import evaluate_policy
|
||||||
|
from sb3_contrib.ppo_mask import MaskablePPO
|
||||||
|
import envs
|
||||||
|
import wrapper
|
||||||
|
import resources
|
||||||
|
from agents import trading_agent
|
||||||
|
from wrapper.base_econ_wrapper import BaseEconWrapper
|
||||||
|
from wrapper.reciever_econ_wrapper import RecieverEconWrapper
|
||||||
|
from wrapper.sb3_econ_converter import SB3EconConverter
|
||||||
|
from tqdm import tqdm
|
||||||
|
import components
|
||||||
|
from stable_baselines3.common.env_checker import check_env
|
||||||
|
from stable_baselines3 import PPO
|
||||||
|
from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
|
||||||
|
from stable_baselines3.common.vec_env.vec_normalize import VecNormalize
|
||||||
|
from sb3_contrib import RecurrentPPO
|
||||||
|
from envs.econ_wrapper import EconVecEnv
|
||||||
|
from stable_baselines3.common.callbacks import BaseCallback
|
||||||
|
import yaml
|
||||||
|
import time
|
||||||
|
from threading import Thread
|
||||||
|
|
||||||
|
env_config = {
|
||||||
|
# ===== SCENARIO CLASS =====
|
||||||
|
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
|
||||||
|
# The environment object will be an instance of the Scenario class.
|
||||||
|
'scenario_name': 'econ',
|
||||||
|
|
||||||
|
# ===== COMPONENTS =====
|
||||||
|
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
|
||||||
|
# "component_name" refers to the Component class's name in the Component Registry (foundation.components)
|
||||||
|
# {component_kwargs} is a dictionary of kwargs passed to the Component class
|
||||||
|
# The order in which components reset, step, and generate obs follows their listed order below.
|
||||||
|
'components': [
|
||||||
|
# (1) Building houses
|
||||||
|
('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}),
|
||||||
|
# (2) Trading collectible resources
|
||||||
|
('ContinuousDoubleAuction', {'max_num_orders': 10}),
|
||||||
|
# (3) Movement and resource collection
|
||||||
|
('SimpleGather', {}),
|
||||||
|
('ExternalMarket',{'market_demand':{
|
||||||
|
'Gem': 15
|
||||||
|
}}),
|
||||||
|
],
|
||||||
|
|
||||||
|
# ===== SCENARIO CLASS ARGUMENTS =====
|
||||||
|
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
|
||||||
|
|
||||||
|
'starting_agent_coin': 10,
|
||||||
|
'fixed_four_skill_and_loc': True,
|
||||||
|
|
||||||
|
# ===== STANDARD ARGUMENTS ======
|
||||||
|
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
|
||||||
|
'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5}, # Number of non-planner agents (must be > 1)
|
||||||
|
'world_size': [5, 5], # [Height, Width] of the env world
|
||||||
|
'episode_length': 256, # Number of timesteps per episode
|
||||||
|
'allow_observation_scaling': True,
|
||||||
|
'dense_log_frequency': 100,
|
||||||
|
'world_dense_log_frequency':1,
|
||||||
|
'energy_cost':0,
|
||||||
|
'energy_warmup_method': "auto",
|
||||||
|
'energy_warmup_constant': 4000,
|
||||||
|
|
||||||
|
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
|
||||||
|
# Otherwise, the policy selects only 1 action.
|
||||||
|
'multi_action_mode_agents': False,
|
||||||
|
'multi_action_mode_planner': False,
|
||||||
|
|
||||||
|
# When flattening observations, concatenate scalar & vector observations before output.
|
||||||
|
# Otherwise, return observations with minimal processing.
|
||||||
|
'flatten_observations': False,
|
||||||
|
# When Flattening masks, concatenate each action subspace mask into a single array.
|
||||||
|
# Note: flatten_masks = True is required for masking action logits in the code below.
|
||||||
|
'flatten_masks': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
eval_env_config = {
|
||||||
|
# ===== SCENARIO CLASS =====
|
||||||
|
# Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios).
|
||||||
|
# The environment object will be an instance of the Scenario class.
|
||||||
|
'scenario_name': 'econ',
|
||||||
|
|
||||||
|
# ===== COMPONENTS =====
|
||||||
|
# Which components to use (specified as list of ("component_name", {component_kwargs}) tuples).
|
||||||
|
# "component_name" refers to the Component class's name in the Component Registry (foundation.components)
|
||||||
|
# {component_kwargs} is a dictionary of kwargs passed to the Component class
|
||||||
|
# The order in which components reset, step, and generate obs follows their listed order below.
|
||||||
|
'components': [
|
||||||
|
# (1) Building houses
|
||||||
|
('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}),
|
||||||
|
# (2) Trading collectible resources
|
||||||
|
('ContinuousDoubleAuction', {'max_num_orders': 10}),
|
||||||
|
# (3) Movement and resource collection
|
||||||
|
('SimpleGather', {}),
|
||||||
|
('ExternalMarket',{'market_demand':{
|
||||||
|
'Gem': 15
|
||||||
|
}}),
|
||||||
|
],
|
||||||
|
|
||||||
|
# ===== SCENARIO CLASS ARGUMENTS =====
|
||||||
|
# (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment)
|
||||||
|
|
||||||
|
'starting_agent_coin': 10,
|
||||||
|
'fixed_four_skill_and_loc': True,
|
||||||
|
|
||||||
|
# ===== STANDARD ARGUMENTS ======
|
||||||
|
# kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment)
|
||||||
|
'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5}, # Number of non-planner agents (must be > 1)
|
||||||
|
'world_size': [1, 1], # [Height, Width] of the env world
|
||||||
|
'episode_length': 256, # Number of timesteps per episode
|
||||||
|
'allow_observation_scaling': True,
|
||||||
|
'dense_log_frequency': 1,
|
||||||
|
'world_dense_log_frequency':1,
|
||||||
|
'energy_cost':0,
|
||||||
|
'energy_warmup_method': "auto",
|
||||||
|
'energy_warmup_constant': 4000,
|
||||||
|
|
||||||
|
# In multi-action-mode, the policy selects an action for each action subspace (defined in component code).
|
||||||
|
# Otherwise, the policy selects only 1 action.
|
||||||
|
'multi_action_mode_agents': False,
|
||||||
|
'multi_action_mode_planner': False,
|
||||||
|
|
||||||
|
# When flattening observations, concatenate scalar & vector observations before output.
|
||||||
|
# Otherwise, return observations with minimal processing.
|
||||||
|
'flatten_observations': False,
|
||||||
|
# When Flattening masks, concatenate each action subspace mask into a single array.
|
||||||
|
# Note: flatten_masks = True is required for masking action logits in the code below.
|
||||||
|
'flatten_masks': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
num_frames=5
|
||||||
|
|
||||||
|
class TensorboardCallback(BaseCallback):
|
||||||
|
"""
|
||||||
|
Custom callback for plotting additional values in tensorboard.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,econ, verbose=0):
|
||||||
|
super().__init__(verbose)
|
||||||
|
self.econ=econ
|
||||||
|
self.metrics=econ.scenario_metrics()
|
||||||
|
def _on_step(self) -> bool:
|
||||||
|
# Log scalar value (here a random variable)
|
||||||
|
prev_metrics=self.metrics
|
||||||
|
if self.econ.previous_episode_metrics is None:
|
||||||
|
self.metrics=self.econ.scenario_metrics()
|
||||||
|
else:
|
||||||
|
self.metrics=self.econ.previous_episode_metrics
|
||||||
|
curr_prod=self.metrics["social/productivity"]
|
||||||
|
trend_pord=curr_prod-prev_metrics["social/productivity"]
|
||||||
|
self.logger.record("social/total_productivity", curr_prod)
|
||||||
|
self.logger.record("social/delta_productivity", trend_pord)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def printMarket(market):
|
||||||
|
for i in range(len(market)):
|
||||||
|
step=market[i]
|
||||||
|
if len(step)>0:
|
||||||
|
print("=== Step {} ===".format(i))
|
||||||
|
for transaction in step:
|
||||||
|
t=transaction
|
||||||
|
transstring = "({}) {} -> {} | [{}/{}] {} Coins\n".format(t["commodity"],t["seller"],t["buyer"],t["ask"],t["bid"],t["price"])
|
||||||
|
print(transstring)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def printBuilds(builds):
|
||||||
|
for i in range(len(builds)):
|
||||||
|
step=builds[i]
|
||||||
|
if len(step)>0:
|
||||||
|
for build in step:
|
||||||
|
t=build
|
||||||
|
transstring = "({}) Builder: {}, Skill: {}, Income {} ".format(i,t["builder"],t["build_skill"],t["income"])
|
||||||
|
print(transstring)
|
||||||
|
return ""
|
||||||
|
def printReplay(econ,agentid):
|
||||||
|
worldmaps=["Stone","Wood"]
|
||||||
|
|
||||||
|
log=econ.previous_episode_dense_log
|
||||||
|
agent=econ.world.agents[agentid]
|
||||||
|
|
||||||
|
agentid=str(agentid)
|
||||||
|
maxsetp=len(log["states"])-1
|
||||||
|
|
||||||
|
for step in range(maxsetp):
|
||||||
|
print()
|
||||||
|
print("=== Step {} ===".format(step))
|
||||||
|
# state
|
||||||
|
print("--- World ---")
|
||||||
|
world=log['world'][step]
|
||||||
|
for res in worldmaps:
|
||||||
|
print("{}: {}".format(res,world[res][0][0]))
|
||||||
|
print("--- State ---")
|
||||||
|
state=log['states'][step][agentid]
|
||||||
|
|
||||||
|
print(yaml.safe_dump(state))
|
||||||
|
print("--- Action ---")
|
||||||
|
action=log["actions"][step][agentid]
|
||||||
|
|
||||||
|
|
||||||
|
if action=={}:
|
||||||
|
print("Action: 0 -> NOOP")
|
||||||
|
else:
|
||||||
|
for k in action:
|
||||||
|
formats="Action: {}({})".format(k,action[k])
|
||||||
|
print(formats)
|
||||||
|
print("--- Reward ---")
|
||||||
|
reward=log["rewards"][step][agentid]
|
||||||
|
print("Reward: {}".format(reward))
|
||||||
|
|
||||||
|
#Setup Env Objects
|
||||||
|
econ=foundation.make_env_instance(**env_config)
|
||||||
|
|
||||||
|
market=econ.get_component("ContinuousDoubleAuction")
|
||||||
|
action=market.get_n_actions("TradingAgent")
|
||||||
|
baseEconWrapper=BaseEconWrapper(econ)
|
||||||
|
baseEconWrapper.run()
|
||||||
|
time.sleep(0.5)
|
||||||
|
mobileRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="BasicMobileAgent")
|
||||||
|
tradeRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="TradingAgent")
|
||||||
|
sb3_traderConverter=SB3EconConverter(tradeRecieverEconWrapper,econ,"TradingAgent",True)
|
||||||
|
sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent",True)
|
||||||
|
# attach sb3 wrappers
|
||||||
|
|
||||||
|
monenv=VecMonitor(venv=sb3Converter,info_keywords=["social/productivity","trend/productivity"])
|
||||||
|
montraidingenv=VecMonitor(venv=sb3_traderConverter)
|
||||||
|
|
||||||
|
stackenv_basic=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=num_frames)
|
||||||
|
stackenv_traid=vec_frame_stack.VecFrameStack(venv=montraidingenv,n_stack=num_frames)
|
||||||
|
# Model setup complete
|
||||||
|
|
||||||
|
# Setup Eval Env
|
||||||
|
econ_eval=foundation.make_env_instance(**eval_env_config)
|
||||||
|
|
||||||
|
|
||||||
|
baseEconWrapper_eval=BaseEconWrapper(econ_eval)
|
||||||
|
baseEconWrapper_eval.run()
|
||||||
|
time.sleep(0.5)
|
||||||
|
mobileRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="BasicMobileAgent")
|
||||||
|
tradeRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="TradingAgent")
|
||||||
|
sb3_traderConverter_eval=SB3EconConverter(tradeRecieverEconWrapper_eval,econ_eval,"TradingAgent",False)
|
||||||
|
sb3Converter_eval=SB3EconConverter(mobileRecieverEconWrapper_eval,econ_eval,"BasicMobileAgent",False)
|
||||||
|
# attach sb3 wrappers
|
||||||
|
|
||||||
|
monenv_eval=VecMonitor(venv=sb3Converter_eval,info_keywords=["social/productivity","trend/productivity"])
|
||||||
|
montraidingenv_eval=VecMonitor(venv=sb3_traderConverter_eval)
|
||||||
|
|
||||||
|
stackenv_basic_eval=vec_frame_stack.VecFrameStack(venv=monenv_eval,n_stack=num_frames)
|
||||||
|
stackenv_traid_eval=vec_frame_stack.VecFrameStack(venv=montraidingenv_eval,n_stack=num_frames)
|
||||||
|
|
||||||
|
|
||||||
|
obs=monenv.reset()
|
||||||
|
|
||||||
|
|
||||||
|
# define training functions
|
||||||
|
def train(model,timesteps, econ_call,process_bar,name,db,index):
|
||||||
|
db[index]=model.learn(total_timesteps=timesteps,progress_bar=process_bar,reset_num_timesteps=False,tb_log_name=name,callback=TensorboardCallback(econ_call))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# prepare training
|
||||||
|
run_number=int(np.random.rand()*100)
|
||||||
|
runname="run_{}".format(run_number)
|
||||||
|
model_db=[None,None] # object for storing model
|
||||||
|
|
||||||
|
|
||||||
|
model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=1e-5,env=stackenv_basic, seed=300,verbose=1,device="cuda",tensorboard_log="./log")
|
||||||
|
model_trade=MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=1e-5,env=stackenv_traid, seed=300,verbose=1,device="cuda",tensorboard_log="./log")
|
||||||
|
|
||||||
|
n_agents=econ.n_agents
|
||||||
|
|
||||||
|
total_required_for_episode_basic=len(mobileRecieverEconWrapper.agnet_idx)*env_config['episode_length']
|
||||||
|
total_required_for_episode_traid=len(tradeRecieverEconWrapper.agnet_idx)*env_config['episode_length']
|
||||||
|
|
||||||
|
print("this is run {}".format(runname))
|
||||||
|
|
||||||
|
while True:
|
||||||
|
|
||||||
|
|
||||||
|
#Train
|
||||||
|
runname="run_{}_{}".format(run_number,"basic")
|
||||||
|
|
||||||
|
thread_model=Thread(target=train,args=(model,total_required_for_episode_basic*100,econ,True,runname,model_db,0))
|
||||||
|
runname="run_{}_{}".format(run_number,"trader")
|
||||||
|
thread_model_traid=Thread(target=train,args=(model_trade,total_required_for_episode_traid*100,econ,False,runname,model_db,1))
|
||||||
|
|
||||||
|
thread_model.start()
|
||||||
|
thread_model_traid.start()
|
||||||
|
thread_model.join()
|
||||||
|
thread_model_traid.join()
|
||||||
|
#normenv.save("temp-normalizer.ai")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Run Eval
|
||||||
|
print("### EVAL ###")
|
||||||
|
obs_basic=stackenv_basic_eval.reset()
|
||||||
|
obs_trade=stackenv_traid_eval.reset()
|
||||||
|
model=model_db[0]
|
||||||
|
model_trade=model_db[1]
|
||||||
|
done=False
|
||||||
|
for i in tqdm(range(eval_env_config['episode_length'])):
|
||||||
|
#create masks
|
||||||
|
masks_basic=stackenv_basic_eval.action_masks()
|
||||||
|
masks_trade=stackenv_traid_eval.action_masks()
|
||||||
|
# get actions
|
||||||
|
action_basic=model.predict(obs_basic,action_masks=masks_basic)
|
||||||
|
action_trade=model_trade.predict(obs_trade,action_masks=masks_trade)
|
||||||
|
#submit async directly for non blocking operation
|
||||||
|
sb3Converter_eval.step_async(action_basic[0])
|
||||||
|
sb3_traderConverter_eval.step_async(action_trade[0])
|
||||||
|
# retieve full results
|
||||||
|
obs_basic,rew_basic,done_e,info=stackenv_basic_eval.step(action_basic[0])
|
||||||
|
obs_trade,rew_trade,done_e,info=stackenv_traid_eval.step(action_trade[0])
|
||||||
|
done=done_e[0]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
market=econ_eval.get_component("ContinuousDoubleAuction")
|
||||||
|
craft=econ_eval.get_component("Craft")
|
||||||
|
# trades=market.get_dense_log()
|
||||||
|
build=craft.get_dense_log()
|
||||||
|
met=econ.previous_episode_metrics
|
||||||
|
printReplay(econ_eval,0)
|
||||||
|
# printMarket(trades)
|
||||||
|
# printBuilds(builds=build)
|
||||||
|
print("social/productivity: {}".format(met["social/productivity"]))
|
||||||
|
print("labor/weighted_cost: {}".format(met["labor/weighted_cost"]))
|
||||||
|
print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"]))
|
||||||
|
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
119
main.py
119
main.py
@@ -8,6 +8,7 @@ from sb3_contrib.ppo_mask import MaskablePPO
|
|||||||
import envs
|
import envs
|
||||||
import wrapper
|
import wrapper
|
||||||
import resources
|
import resources
|
||||||
|
import pprint
|
||||||
from agents import trading_agent
|
from agents import trading_agent
|
||||||
from wrapper.base_econ_wrapper import BaseEconWrapper
|
from wrapper.base_econ_wrapper import BaseEconWrapper
|
||||||
from wrapper.reciever_econ_wrapper import RecieverEconWrapper
|
from wrapper.reciever_econ_wrapper import RecieverEconWrapper
|
||||||
@@ -44,7 +45,7 @@ env_config = {
|
|||||||
# (3) Movement and resource collection
|
# (3) Movement and resource collection
|
||||||
('SimpleGather', {}),
|
('SimpleGather', {}),
|
||||||
('ExternalMarket',{'market_demand':{
|
('ExternalMarket',{'market_demand':{
|
||||||
'Gem': 10
|
'Gem': 15
|
||||||
}}),
|
}}),
|
||||||
],
|
],
|
||||||
|
|
||||||
@@ -99,7 +100,7 @@ eval_env_config = {
|
|||||||
# (3) Movement and resource collection
|
# (3) Movement and resource collection
|
||||||
('SimpleGather', {}),
|
('SimpleGather', {}),
|
||||||
('ExternalMarket',{'market_demand':{
|
('ExternalMarket',{'market_demand':{
|
||||||
'Gem': 10
|
'Gem': 15
|
||||||
}}),
|
}}),
|
||||||
],
|
],
|
||||||
|
|
||||||
@@ -115,7 +116,7 @@ eval_env_config = {
|
|||||||
'world_size': [1, 1], # [Height, Width] of the env world
|
'world_size': [1, 1], # [Height, Width] of the env world
|
||||||
'episode_length': 256, # Number of timesteps per episode
|
'episode_length': 256, # Number of timesteps per episode
|
||||||
'allow_observation_scaling': True,
|
'allow_observation_scaling': True,
|
||||||
'dense_log_frequency': 10,
|
'dense_log_frequency': 1,
|
||||||
'world_dense_log_frequency':1,
|
'world_dense_log_frequency':1,
|
||||||
'energy_cost':0,
|
'energy_cost':0,
|
||||||
'energy_warmup_method': "auto",
|
'energy_warmup_method': "auto",
|
||||||
@@ -134,7 +135,7 @@ eval_env_config = {
|
|||||||
'flatten_masks': True,
|
'flatten_masks': True,
|
||||||
}
|
}
|
||||||
|
|
||||||
num_frames=2
|
num_frames=5
|
||||||
|
|
||||||
class TensorboardCallback(BaseCallback):
|
class TensorboardCallback(BaseCallback):
|
||||||
"""
|
"""
|
||||||
@@ -147,15 +148,16 @@ class TensorboardCallback(BaseCallback):
|
|||||||
self.metrics=econ.scenario_metrics()
|
self.metrics=econ.scenario_metrics()
|
||||||
def _on_step(self) -> bool:
|
def _on_step(self) -> bool:
|
||||||
# Log scalar value (here a random variable)
|
# Log scalar value (here a random variable)
|
||||||
prev_metrics=self.metrics
|
if econ.world.timestep==0:
|
||||||
if self.econ.previous_episode_metrics is None:
|
prev_metrics=self.metrics
|
||||||
self.metrics=self.econ.scenario_metrics()
|
if self.econ.previous_episode_metrics is None:
|
||||||
else:
|
self.metrics=self.econ.scenario_metrics()
|
||||||
self.metrics=self.econ.previous_episode_metrics
|
else:
|
||||||
curr_prod=self.metrics["social/productivity"]
|
self.metrics=self.econ.previous_episode_metrics
|
||||||
trend_pord=curr_prod-prev_metrics["social/productivity"]
|
curr_prod=self.metrics["social/productivity"]
|
||||||
self.logger.record("social/total_productivity", curr_prod)
|
trend_pord=curr_prod-prev_metrics["social/productivity"]
|
||||||
self.logger.record("social/delta_productivity", trend_pord)
|
self.logger.record("social/total_productivity", curr_prod)
|
||||||
|
self.logger.record("social/delta_productivity", trend_pord)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@@ -200,7 +202,7 @@ def printReplay(econ,agentid):
|
|||||||
print("--- State ---")
|
print("--- State ---")
|
||||||
state=log['states'][step][agentid]
|
state=log['states'][step][agentid]
|
||||||
|
|
||||||
print(yaml.dump(state))
|
pprint.pprint(state)
|
||||||
print("--- Action ---")
|
print("--- Action ---")
|
||||||
action=log["actions"][step][agentid]
|
action=log["actions"][step][agentid]
|
||||||
|
|
||||||
@@ -222,17 +224,40 @@ market=econ.get_component("ContinuousDoubleAuction")
|
|||||||
action=market.get_n_actions("TradingAgent")
|
action=market.get_n_actions("TradingAgent")
|
||||||
baseEconWrapper=BaseEconWrapper(econ)
|
baseEconWrapper=BaseEconWrapper(econ)
|
||||||
baseEconWrapper.run()
|
baseEconWrapper.run()
|
||||||
|
time.sleep(0.5)
|
||||||
mobileRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="BasicMobileAgent")
|
mobileRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="BasicMobileAgent")
|
||||||
tradeRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="TradingAgent")
|
tradeRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="TradingAgent")
|
||||||
sb3_traderConverter=SB3EconConverter(tradeRecieverEconWrapper,econ,"TradingAgent")
|
sb3_traderConverter=SB3EconConverter(tradeRecieverEconWrapper,econ,"TradingAgent",True)
|
||||||
sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent")
|
sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent",True)
|
||||||
#obs=sb3Converter.reset()
|
# attach sb3 wrappers
|
||||||
#vecenv=EconVecEnv(env_config=env_config)
|
|
||||||
|
|
||||||
monenv=VecMonitor(venv=sb3Converter,info_keywords=["social/productivity","trend/productivity"])
|
monenv=VecMonitor(venv=sb3Converter,info_keywords=["social/productivity","trend/productivity"])
|
||||||
montraidingenv=VecMonitor(venv=sb3_traderConverter)
|
montraidingenv=VecMonitor(venv=sb3_traderConverter)
|
||||||
#normenv=VecNormalize(sb3Converter,norm_reward=False,clip_obs=1)
|
|
||||||
#stackenv=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=10)
|
stackenv_basic=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=num_frames)
|
||||||
|
stackenv_traid=vec_frame_stack.VecFrameStack(venv=montraidingenv,n_stack=num_frames)
|
||||||
|
# Model setup complete
|
||||||
|
|
||||||
|
# Setup Eval Env
|
||||||
|
econ_eval=foundation.make_env_instance(**eval_env_config)
|
||||||
|
|
||||||
|
|
||||||
|
baseEconWrapper_eval=BaseEconWrapper(econ_eval)
|
||||||
|
baseEconWrapper_eval.run()
|
||||||
|
time.sleep(0.5)
|
||||||
|
mobileRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="BasicMobileAgent")
|
||||||
|
tradeRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="TradingAgent")
|
||||||
|
sb3_traderConverter_eval=SB3EconConverter(tradeRecieverEconWrapper_eval,econ_eval,"TradingAgent",False)
|
||||||
|
sb3Converter_eval=SB3EconConverter(mobileRecieverEconWrapper_eval,econ_eval,"BasicMobileAgent",False)
|
||||||
|
# attach sb3 wrappers
|
||||||
|
|
||||||
|
monenv_eval=VecMonitor(venv=sb3Converter_eval,info_keywords=["social/productivity","trend/productivity"])
|
||||||
|
montraidingenv_eval=VecMonitor(venv=sb3_traderConverter_eval)
|
||||||
|
|
||||||
|
stackenv_basic_eval=vec_frame_stack.VecFrameStack(venv=monenv_eval,n_stack=num_frames)
|
||||||
|
stackenv_traid_eval=vec_frame_stack.VecFrameStack(venv=montraidingenv_eval,n_stack=num_frames)
|
||||||
|
|
||||||
|
|
||||||
obs=monenv.reset()
|
obs=monenv.reset()
|
||||||
|
|
||||||
|
|
||||||
@@ -245,11 +270,11 @@ def train(model,timesteps, econ_call,process_bar,name,db,index):
|
|||||||
# prepare training
|
# prepare training
|
||||||
run_number=int(np.random.rand()*100)
|
run_number=int(np.random.rand()*100)
|
||||||
runname="run_{}".format(run_number)
|
runname="run_{}".format(run_number)
|
||||||
model_db=[] # object for storing model
|
model_db=[None,None] # object for storing model
|
||||||
|
|
||||||
|
|
||||||
model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.98, learning_rate=5e-3,env=monenv, seed=225,verbose=1,device="cuda",tensorboard_log="./log")
|
model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=1e-5,env=stackenv_basic, seed=300,verbose=1,device="cuda",tensorboard_log="./log")
|
||||||
model_trade=MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.98, learning_rate=5e-3,env=montraidingenv, seed=225,verbose=1,device="cuda",tensorboard_log="./log")
|
model_trade=MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=1e-5,env=stackenv_traid, seed=300,verbose=1,device="cuda",tensorboard_log="./log")
|
||||||
|
|
||||||
n_agents=econ.n_agents
|
n_agents=econ.n_agents
|
||||||
|
|
||||||
@@ -257,53 +282,57 @@ total_required_for_episode_basic=len(mobileRecieverEconWrapper.agnet_idx)*env_co
|
|||||||
total_required_for_episode_traid=len(tradeRecieverEconWrapper.agnet_idx)*env_config['episode_length']
|
total_required_for_episode_traid=len(tradeRecieverEconWrapper.agnet_idx)*env_config['episode_length']
|
||||||
|
|
||||||
print("this is run {}".format(runname))
|
print("this is run {}".format(runname))
|
||||||
eval_econ=foundation.make_env_instance(**eval_env_config)
|
|
||||||
eval_base_econ=BaseEconWrapper(eval_econ)
|
|
||||||
eval_base_econ.run()
|
|
||||||
eval_mobileRecieverEconWrapper=RecieverEconWrapper(eval_base_econ,"BasicMobileAgent")
|
|
||||||
time.sleep(0.5)
|
|
||||||
eval_sb3_converter=SB3EconConverter(eval_mobileRecieverEconWrapper,eval_econ,"BasicMobileAgent")
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
# Create Eval ENV
|
|
||||||
vec_mon_eval=VecMonitor(venv=eval_sb3_converter)
|
|
||||||
#Train
|
#Train
|
||||||
runname="run_{}_{}".format(run_number,"basic")
|
runname="run_{}_{}".format(run_number,"basic")
|
||||||
|
|
||||||
thread_model=Thread(target=train,args=(model,total_required_for_episode_basic*300,econ,True,runname,model_db,0))
|
thread_model=Thread(target=train,args=(model,total_required_for_episode_basic*50,econ,True,runname,model_db,0))
|
||||||
runname="run_{}_{}".format(run_number,"trader")
|
runname="run_{}_{}".format(run_number,"trader")
|
||||||
thread_model_traid=Thread(target=train,args=(model_trade,total_required_for_episode_traid*300,econ,False,runname,model_db,1))
|
thread_model_traid=Thread(target=train,args=(model_trade,total_required_for_episode_traid*50,econ,False,runname,model_db,1))
|
||||||
|
|
||||||
thread_model.start()
|
thread_model.start()
|
||||||
thread_model_traid.start()
|
thread_model_traid.start()
|
||||||
thread_model.join()
|
thread_model.join()
|
||||||
thread_model_traid.join()
|
thread_model_traid.join()
|
||||||
#normenv.save("temp-normalizer.ai")
|
#normenv.save("temp-normalizer.ai")
|
||||||
|
model=model_db[0]
|
||||||
|
model_trade=model_db[1]
|
||||||
|
model.save("basic.ai")
|
||||||
|
model_trade.save("trade.ai")
|
||||||
|
|
||||||
## Run Eval
|
## Run Eval
|
||||||
print("### EVAL ###")
|
print("### EVAL ###")
|
||||||
|
obs_basic=stackenv_basic_eval.reset()
|
||||||
obs=vec_mon_eval.reset()
|
obs_trade=stackenv_traid_eval.reset()
|
||||||
done=False
|
done=False
|
||||||
for i in tqdm(range(eval_env_config['episode_length'])):
|
for i in tqdm(range(eval_env_config['episode_length'])):
|
||||||
masks=eval_sb3_converter.action_masks()
|
#create masks
|
||||||
action=model.predict(obs,action_masks=masks)
|
masks_basic=stackenv_basic_eval.action_masks()
|
||||||
|
masks_trade=stackenv_traid_eval.action_masks()
|
||||||
obs,rew,done_e,info=vec_mon_eval.step(action[0])
|
# get actions
|
||||||
|
action_basic=model.predict(obs_basic,action_masks=masks_basic)
|
||||||
|
action_trade=model_trade.predict(obs_trade,action_masks=masks_trade)
|
||||||
|
#submit async directly for non blocking operation
|
||||||
|
sb3Converter_eval.step_async(action_basic[0])
|
||||||
|
sb3_traderConverter_eval.step_async(action_trade[0])
|
||||||
|
# retieve full results
|
||||||
|
obs_basic,rew_basic,done_e,info=stackenv_basic_eval.step(action_basic[0])
|
||||||
|
obs_trade,rew_trade,done_e,info=stackenv_traid_eval.step(action_trade[0])
|
||||||
done=done_e[0]
|
done=done_e[0]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
market=eval_econ.get_component("ContinuousDoubleAuction")
|
market=econ_eval.get_component("ContinuousDoubleAuction")
|
||||||
craft=eval_econ.get_component("SimpleCraft")
|
craft=econ_eval.get_component("Craft")
|
||||||
# trades=market.get_dense_log()
|
# trades=market.get_dense_log()
|
||||||
build=craft.get_dense_log()
|
build=craft.get_dense_log()
|
||||||
met=econ.previous_episode_metrics
|
met=econ.previous_episode_metrics
|
||||||
printReplay(eval_econ,0)
|
printReplay(econ_eval,0)
|
||||||
# printMarket(trades)
|
# printMarket(trades)
|
||||||
printBuilds(builds=build)
|
# printBuilds(builds=build)
|
||||||
print("social/productivity: {}".format(met["social/productivity"]))
|
print("social/productivity: {}".format(met["social/productivity"]))
|
||||||
print("labor/weighted_cost: {}".format(met["labor/weighted_cost"]))
|
print("labor/weighted_cost: {}".format(met["labor/weighted_cost"]))
|
||||||
print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"]))
|
print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"]))
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from typing import Any, Callable, List, Optional, Sequence, Type, Union
|
|||||||
|
|
||||||
class SB3EconConverter(VecEnv, gym.Env):
|
class SB3EconConverter(VecEnv, gym.Env):
|
||||||
|
|
||||||
def __init__(self, env: gym.Env, econ: base_env.BaseEnvironment,agentclass: str):
|
def __init__(self, env: gym.Env, econ: base_env.BaseEnvironment,agentclass: str,auto_reset: bool):
|
||||||
self.env=env
|
self.env=env
|
||||||
self.econ=econ
|
self.econ=econ
|
||||||
#get observation sample
|
#get observation sample
|
||||||
@@ -20,7 +20,9 @@ class SB3EconConverter(VecEnv, gym.Env):
|
|||||||
#flatten obervation of first agent
|
#flatten obervation of first agent
|
||||||
obs0=utils.package(obs[0],*self.packager)
|
obs0=utils.package(obs[0],*self.packager)
|
||||||
obs0["flat"]
|
obs0["flat"]
|
||||||
self.observation_space=gym.spaces.Box(low=-np.inf,high=np.inf,shape=(len(obs0["flat"]),),dtype=np.float32)
|
self.step_request_send=False
|
||||||
|
self.auto_reset=auto_reset
|
||||||
|
self.observation_space=gym.spaces.Box(low=0,high=np.inf,shape=(len(obs0["flat"]),),dtype=np.float32)
|
||||||
super().__init__(self.num_envs, self.observation_space, self.action_space)
|
super().__init__(self.num_envs, self.observation_space, self.action_space)
|
||||||
|
|
||||||
|
|
||||||
@@ -30,9 +32,11 @@ class SB3EconConverter(VecEnv, gym.Env):
|
|||||||
agent=self.econ.world.agents[idx]
|
agent=self.econ.world.agents[idx]
|
||||||
return gym.spaces.Discrete(agent.action_spaces)
|
return gym.spaces.Discrete(agent.action_spaces)
|
||||||
|
|
||||||
def step_async(self, actions: np.ndarray) -> None:
|
def step_async(self, actions: np.ndarray):
|
||||||
d_actions=utils.convert_gym_to_econ(actions)
|
if self.step_request_send==False:
|
||||||
return self.env.step_async(d_actions)
|
self.step_request_send=True
|
||||||
|
d_actions=utils.convert_gym_to_econ(actions)
|
||||||
|
return self.env.step_async(d_actions)
|
||||||
|
|
||||||
def step_wait(self) -> VecEnvStepReturn:
|
def step_wait(self) -> VecEnvStepReturn:
|
||||||
obs,rew,done,info=self.env.step_wait()
|
obs,rew,done,info=self.env.step_wait()
|
||||||
@@ -62,12 +66,14 @@ class SB3EconConverter(VecEnv, gym.Env):
|
|||||||
for i in range(self.num_envs):
|
for i in range(self.num_envs):
|
||||||
done_g[i]=done
|
done_g[i]=done
|
||||||
c_info[i]["terminal_observation"]=c_obs[i]
|
c_info[i]["terminal_observation"]=c_obs[i]
|
||||||
c_obs=self.reset()
|
if self.auto_reset:
|
||||||
|
c_obs=self.reset()
|
||||||
|
self.step_request_send=False
|
||||||
return np.copy(c_obs),np.copy(c_rew),np.copy(done_g),np.copy(c_info)
|
return np.copy(c_obs),np.copy(c_rew),np.copy(done_g),np.copy(c_info)
|
||||||
|
|
||||||
def reset(self) -> VecEnvObs:
|
def reset(self) -> VecEnvObs:
|
||||||
obs=self.env.reset()
|
obs=self.env.reset()
|
||||||
|
self.step_request_send=False
|
||||||
f_obs={}
|
f_obs={}
|
||||||
self.curr_obs=obs
|
self.curr_obs=obs
|
||||||
for k,v in obs.items():
|
for k,v in obs.items():
|
||||||
|
|||||||
Reference in New Issue
Block a user