diff --git a/ai_economist/foundation/components/continuous_double_auction.py b/ai_economist/foundation/components/continuous_double_auction.py index 1c50b78..4e4f047 100644 --- a/ai_economist/foundation/components/continuous_double_auction.py +++ b/ai_economist/foundation/components/continuous_double_auction.py @@ -526,14 +526,14 @@ class ContinuousDoubleAuction(BaseComponent): for _, agent in enumerate(world.agents): # Private to the agent + available_ask_agent=full_asks - self.ask_hists[c][agent.idx] + available_bid_agent=full_bids- self.bid_hists[c][agent.idx] obs[agent.idx].update( { - "market_rate-{}".format(c): market_rate, + "market_rate-{}".format(c): market_rate*self.inv_scale, "price_history-{}".format(c): scaled_price_history, - "available_asks-{}".format(c): full_asks - - self.ask_hists[c][agent.idx], - "available_bids-{}".format(c): full_bids - - self.bid_hists[c][agent.idx], + "available_asks-{}".format(c): np.clip(available_ask_agent,0,self.max_num_orders), + "available_bids-{}".format(c): np.clip(available_bid_agent,0,self.max_num_orders), "my_asks-{}".format(c): self.ask_hists[c][agent.idx], "my_bids-{}".format(c): self.bid_hists[c][agent.idx], } diff --git a/basic.ai b/basic.ai index 99572cb..96396e6 100644 Binary files a/basic.ai and b/basic.ai differ diff --git a/main.py b/main.py index 45d7b73..743a4ea 100644 --- a/main.py +++ b/main.py @@ -52,7 +52,7 @@ env_config = { # ===== SCENARIO CLASS ARGUMENTS ===== # (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment) - 'starting_agent_coin': 10, + 'starting_agent_coin': 50, 'fixed_four_skill_and_loc': True, # ===== STANDARD ARGUMENTS ====== @@ -60,6 +60,7 @@ env_config = { 'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5}, # Number of non-planner agents (must be > 1) 'world_size': [5, 5], # [Height, Width] of the env world 'episode_length': 256, # Number of timesteps per episode + 'isoelastic_eta':0.001, 'allow_observation_scaling': True, 'dense_log_frequency': 100, 'world_dense_log_frequency':1, @@ -107,7 +108,7 @@ eval_env_config = { # ===== SCENARIO CLASS ARGUMENTS ===== # (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment) - 'starting_agent_coin': 10, + 'starting_agent_coin': 50, 'fixed_four_skill_and_loc': True, # ===== STANDARD ARGUMENTS ====== @@ -116,6 +117,7 @@ eval_env_config = { 'world_size': [1, 1], # [Height, Width] of the env world 'episode_length': 256, # Number of timesteps per episode 'allow_observation_scaling': True, + 'isoelastic_eta':0.001, 'dense_log_frequency': 1, 'world_dense_log_frequency':1, 'energy_cost':0, @@ -135,7 +137,7 @@ eval_env_config = { 'flatten_masks': True, } -num_frames=5 +num_frames=1 class TensorboardCallback(BaseCallback): """ @@ -161,6 +163,23 @@ class TensorboardCallback(BaseCallback): return True +min_at_target_basic=0.5 +min_lr_basic=5e-6 +start_lr_basic=9e-4 + +min_at_target_trade=0.5 +min_lr_trade=5e-6 +start_lr_trade=9e-4 + +def learning_rate_adj_basic(x) -> float: + diff=start_lr_basic-min_lr_basic + lr=min_lr_basic+x*diff + return lr + +def learning_rate_adj_trade(x) -> float: + diff=start_lr_trade-min_lr_trade + lr=min_lr_basic+x*diff + return lr def printMarket(market): for i in range(len(market)): @@ -273,8 +292,8 @@ runname="run_{}".format(run_number) model_db=[None,None] # object for storing model -model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=1e-5,env=stackenv_basic, seed=300,verbose=1,device="cuda",tensorboard_log="./log") -model_trade=MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=1e-5,env=stackenv_traid, seed=300,verbose=1,device="cuda",tensorboard_log="./log") +model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=learning_rate_adj_basic,env=stackenv_basic, seed=445,verbose=1,device="cuda",tensorboard_log="./log") +model_trade=MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=learning_rate_adj_trade,env=stackenv_traid, seed=445,verbose=1,device="cuda",tensorboard_log="./log") n_agents=econ.n_agents @@ -289,9 +308,9 @@ while True: #Train runname="run_{}_{}".format(run_number,"basic") - thread_model=Thread(target=train,args=(model,total_required_for_episode_basic*50,econ,True,runname,model_db,0)) + thread_model=Thread(target=train,args=(model,total_required_for_episode_basic*150,econ,True,runname,model_db,0)) runname="run_{}_{}".format(run_number,"trader") - thread_model_traid=Thread(target=train,args=(model_trade,total_required_for_episode_traid*50,econ,False,runname,model_db,1)) + thread_model_traid=Thread(target=train,args=(model_trade,total_required_for_episode_traid*150,econ,False,runname,model_db,1)) thread_model.start() thread_model_traid.start() diff --git a/test.py b/test.py new file mode 100644 index 0000000..45d7b73 --- /dev/null +++ b/test.py @@ -0,0 +1,343 @@ + +import numpy as np + +from ai_economist import foundation +from stable_baselines3.common.vec_env import vec_frame_stack +from stable_baselines3.common.evaluation import evaluate_policy +from sb3_contrib.ppo_mask import MaskablePPO +import envs +import wrapper +import resources +import pprint +from agents import trading_agent +from wrapper.base_econ_wrapper import BaseEconWrapper +from wrapper.reciever_econ_wrapper import RecieverEconWrapper +from wrapper.sb3_econ_converter import SB3EconConverter +from tqdm import tqdm +import components +from stable_baselines3.common.env_checker import check_env +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env.vec_monitor import VecMonitor +from stable_baselines3.common.vec_env.vec_normalize import VecNormalize +from sb3_contrib import RecurrentPPO +from envs.econ_wrapper import EconVecEnv +from stable_baselines3.common.callbacks import BaseCallback +import yaml +import time +from threading import Thread + +env_config = { + # ===== SCENARIO CLASS ===== + # Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios). + # The environment object will be an instance of the Scenario class. + 'scenario_name': 'econ', + + # ===== COMPONENTS ===== + # Which components to use (specified as list of ("component_name", {component_kwargs}) tuples). + # "component_name" refers to the Component class's name in the Component Registry (foundation.components) + # {component_kwargs} is a dictionary of kwargs passed to the Component class + # The order in which components reset, step, and generate obs follows their listed order below. + 'components': [ + # (1) Building houses + ('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}), + # (2) Trading collectible resources + ('ContinuousDoubleAuction', {'max_num_orders': 10}), + # (3) Movement and resource collection + ('SimpleGather', {}), + ('ExternalMarket',{'market_demand':{ + 'Gem': 15 + }}), + ], + + # ===== SCENARIO CLASS ARGUMENTS ===== + # (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment) + + 'starting_agent_coin': 10, + 'fixed_four_skill_and_loc': True, + + # ===== STANDARD ARGUMENTS ====== + # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment) + 'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5}, # Number of non-planner agents (must be > 1) + 'world_size': [5, 5], # [Height, Width] of the env world + 'episode_length': 256, # Number of timesteps per episode + 'allow_observation_scaling': True, + 'dense_log_frequency': 100, + 'world_dense_log_frequency':1, + 'energy_cost':0, + 'energy_warmup_method': "auto", + 'energy_warmup_constant': 4000, + + # In multi-action-mode, the policy selects an action for each action subspace (defined in component code). + # Otherwise, the policy selects only 1 action. + 'multi_action_mode_agents': False, + 'multi_action_mode_planner': False, + + # When flattening observations, concatenate scalar & vector observations before output. + # Otherwise, return observations with minimal processing. + 'flatten_observations': False, + # When Flattening masks, concatenate each action subspace mask into a single array. + # Note: flatten_masks = True is required for masking action logits in the code below. + 'flatten_masks': True, +} + + +eval_env_config = { + # ===== SCENARIO CLASS ===== + # Which Scenario class to use: the class's name in the Scenario Registry (foundation.scenarios). + # The environment object will be an instance of the Scenario class. + 'scenario_name': 'econ', + + # ===== COMPONENTS ===== + # Which components to use (specified as list of ("component_name", {component_kwargs}) tuples). + # "component_name" refers to the Component class's name in the Component Registry (foundation.components) + # {component_kwargs} is a dictionary of kwargs passed to the Component class + # The order in which components reset, step, and generate obs follows their listed order below. + 'components': [ + # (1) Building houses + ('Craft', {'skill_dist': "pareto", 'commodities': ["Gem"],'max_skill_amount_benefit':1.5}), + # (2) Trading collectible resources + ('ContinuousDoubleAuction', {'max_num_orders': 10}), + # (3) Movement and resource collection + ('SimpleGather', {}), + ('ExternalMarket',{'market_demand':{ + 'Gem': 15 + }}), + ], + + # ===== SCENARIO CLASS ARGUMENTS ===== + # (optional) kwargs that are added by the Scenario class (i.e. not defined in BaseEnvironment) + + 'starting_agent_coin': 10, + 'fixed_four_skill_and_loc': True, + + # ===== STANDARD ARGUMENTS ====== + # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment) + 'agent_composition': {"BasicMobileAgent": 20,"TradingAgent":5}, # Number of non-planner agents (must be > 1) + 'world_size': [1, 1], # [Height, Width] of the env world + 'episode_length': 256, # Number of timesteps per episode + 'allow_observation_scaling': True, + 'dense_log_frequency': 1, + 'world_dense_log_frequency':1, + 'energy_cost':0, + 'energy_warmup_method': "auto", + 'energy_warmup_constant': 4000, + + # In multi-action-mode, the policy selects an action for each action subspace (defined in component code). + # Otherwise, the policy selects only 1 action. + 'multi_action_mode_agents': False, + 'multi_action_mode_planner': False, + + # When flattening observations, concatenate scalar & vector observations before output. + # Otherwise, return observations with minimal processing. + 'flatten_observations': False, + # When Flattening masks, concatenate each action subspace mask into a single array. + # Note: flatten_masks = True is required for masking action logits in the code below. + 'flatten_masks': True, +} + +num_frames=5 + +class TensorboardCallback(BaseCallback): + """ + Custom callback for plotting additional values in tensorboard. + """ + + def __init__(self,econ, verbose=0): + super().__init__(verbose) + self.econ=econ + self.metrics=econ.scenario_metrics() + def _on_step(self) -> bool: + # Log scalar value (here a random variable) + if econ.world.timestep==0: + prev_metrics=self.metrics + if self.econ.previous_episode_metrics is None: + self.metrics=self.econ.scenario_metrics() + else: + self.metrics=self.econ.previous_episode_metrics + curr_prod=self.metrics["social/productivity"] + trend_pord=curr_prod-prev_metrics["social/productivity"] + self.logger.record("social/total_productivity", curr_prod) + self.logger.record("social/delta_productivity", trend_pord) + + return True + + +def printMarket(market): + for i in range(len(market)): + step=market[i] + if len(step)>0: + print("=== Step {} ===".format(i)) + for transaction in step: + t=transaction + transstring = "({}) {} -> {} | [{}/{}] {} Coins\n".format(t["commodity"],t["seller"],t["buyer"],t["ask"],t["bid"],t["price"]) + print(transstring) + return "" + +def printBuilds(builds): + for i in range(len(builds)): + step=builds[i] + if len(step)>0: + for build in step: + t=build + transstring = "({}) Builder: {}, Skill: {}, Income {} ".format(i,t["builder"],t["build_skill"],t["income"]) + print(transstring) + return "" +def printReplay(econ,agentid): + worldmaps=["Stone","Wood"] + + log=econ.previous_episode_dense_log + agent=econ.world.agents[agentid] + + agentid=str(agentid) + maxsetp=len(log["states"])-1 + + for step in range(maxsetp): + print() + print("=== Step {} ===".format(step)) + # state + print("--- World ---") + world=log['world'][step] + for res in worldmaps: + print("{}: {}".format(res,world[res][0][0])) + print("--- State ---") + state=log['states'][step][agentid] + + pprint.pprint(state) + print("--- Action ---") + action=log["actions"][step][agentid] + + + if action=={}: + print("Action: 0 -> NOOP") + else: + for k in action: + formats="Action: {}({})".format(k,action[k]) + print(formats) + print("--- Reward ---") + reward=log["rewards"][step][agentid] + print("Reward: {}".format(reward)) + +#Setup Env Objects +econ=foundation.make_env_instance(**env_config) + +market=econ.get_component("ContinuousDoubleAuction") +action=market.get_n_actions("TradingAgent") +baseEconWrapper=BaseEconWrapper(econ) +baseEconWrapper.run() +time.sleep(0.5) +mobileRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="BasicMobileAgent") +tradeRecieverEconWrapper=RecieverEconWrapper(base_econ=baseEconWrapper,agent_classname="TradingAgent") +sb3_traderConverter=SB3EconConverter(tradeRecieverEconWrapper,econ,"TradingAgent",True) +sb3Converter=SB3EconConverter(mobileRecieverEconWrapper,econ,"BasicMobileAgent",True) +# attach sb3 wrappers + +monenv=VecMonitor(venv=sb3Converter,info_keywords=["social/productivity","trend/productivity"]) +montraidingenv=VecMonitor(venv=sb3_traderConverter) + +stackenv_basic=vec_frame_stack.VecFrameStack(venv=monenv,n_stack=num_frames) +stackenv_traid=vec_frame_stack.VecFrameStack(venv=montraidingenv,n_stack=num_frames) +# Model setup complete + +# Setup Eval Env +econ_eval=foundation.make_env_instance(**eval_env_config) + + +baseEconWrapper_eval=BaseEconWrapper(econ_eval) +baseEconWrapper_eval.run() +time.sleep(0.5) +mobileRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="BasicMobileAgent") +tradeRecieverEconWrapper_eval=RecieverEconWrapper(base_econ=baseEconWrapper_eval,agent_classname="TradingAgent") +sb3_traderConverter_eval=SB3EconConverter(tradeRecieverEconWrapper_eval,econ_eval,"TradingAgent",False) +sb3Converter_eval=SB3EconConverter(mobileRecieverEconWrapper_eval,econ_eval,"BasicMobileAgent",False) +# attach sb3 wrappers + +monenv_eval=VecMonitor(venv=sb3Converter_eval,info_keywords=["social/productivity","trend/productivity"]) +montraidingenv_eval=VecMonitor(venv=sb3_traderConverter_eval) + +stackenv_basic_eval=vec_frame_stack.VecFrameStack(venv=monenv_eval,n_stack=num_frames) +stackenv_traid_eval=vec_frame_stack.VecFrameStack(venv=montraidingenv_eval,n_stack=num_frames) + + +obs=monenv.reset() + + +# define training functions +def train(model,timesteps, econ_call,process_bar,name,db,index): + db[index]=model.learn(total_timesteps=timesteps,progress_bar=process_bar,reset_num_timesteps=False,tb_log_name=name,callback=TensorboardCallback(econ_call)) + + + +# prepare training +run_number=int(np.random.rand()*100) +runname="run_{}".format(run_number) +model_db=[None,None] # object for storing model + + +model = MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=1e-5,env=stackenv_basic, seed=300,verbose=1,device="cuda",tensorboard_log="./log") +model_trade=MaskablePPO("MlpPolicy",n_steps=int(env_config['episode_length']*2),ent_coef=0.1, vf_coef=0.5 ,gamma=0.99, learning_rate=1e-5,env=stackenv_traid, seed=300,verbose=1,device="cuda",tensorboard_log="./log") + +n_agents=econ.n_agents + +total_required_for_episode_basic=len(mobileRecieverEconWrapper.agnet_idx)*env_config['episode_length'] +total_required_for_episode_traid=len(tradeRecieverEconWrapper.agnet_idx)*env_config['episode_length'] + +print("this is run {}".format(runname)) + +while True: + + + #Train + runname="run_{}_{}".format(run_number,"basic") + + thread_model=Thread(target=train,args=(model,total_required_for_episode_basic*50,econ,True,runname,model_db,0)) + runname="run_{}_{}".format(run_number,"trader") + thread_model_traid=Thread(target=train,args=(model_trade,total_required_for_episode_traid*50,econ,False,runname,model_db,1)) + + thread_model.start() + thread_model_traid.start() + thread_model.join() + thread_model_traid.join() + #normenv.save("temp-normalizer.ai") + model=model_db[0] + model_trade=model_db[1] + model.save("basic.ai") + model_trade.save("trade.ai") + + ## Run Eval + print("### EVAL ###") + obs_basic=stackenv_basic_eval.reset() + obs_trade=stackenv_traid_eval.reset() + done=False + for i in tqdm(range(eval_env_config['episode_length'])): + #create masks + masks_basic=stackenv_basic_eval.action_masks() + masks_trade=stackenv_traid_eval.action_masks() + # get actions + action_basic=model.predict(obs_basic,action_masks=masks_basic) + action_trade=model_trade.predict(obs_trade,action_masks=masks_trade) + #submit async directly for non blocking operation + sb3Converter_eval.step_async(action_basic[0]) + sb3_traderConverter_eval.step_async(action_trade[0]) + # retieve full results + obs_basic,rew_basic,done_e,info=stackenv_basic_eval.step(action_basic[0]) + obs_trade,rew_trade,done_e,info=stackenv_traid_eval.step(action_trade[0]) + done=done_e[0] + + + + market=econ_eval.get_component("ContinuousDoubleAuction") + craft=econ_eval.get_component("Craft") + # trades=market.get_dense_log() + build=craft.get_dense_log() + met=econ.previous_episode_metrics + printReplay(econ_eval,0) + # printMarket(trades) + # printBuilds(builds=build) + print("social/productivity: {}".format(met["social/productivity"])) + print("labor/weighted_cost: {}".format(met["labor/weighted_cost"])) + print("labor/warmup_integrator: {}".format(met["labor/warmup_integrator"])) + + time.sleep(1) + + + diff --git a/trade.ai b/trade.ai index 8dafb2a..f65e0f0 100644 Binary files a/trade.ai and b/trade.ai differ diff --git a/wrapper/base_econ_wrapper.py b/wrapper/base_econ_wrapper.py index ad02739..cc2af5c 100644 --- a/wrapper/base_econ_wrapper.py +++ b/wrapper/base_econ_wrapper.py @@ -3,25 +3,14 @@ from threading import Event, Lock, Thread from queue import Queue class BaseEconWrapper(): """Base class for connecting reciever wrapper to a multi threaded econ simulation and training session""" - - base_notification=Event() #Notification for Base - reset_notification=Event() #Notification for recievers + step_notifications=[] #Notification for recievers - - action_edit_lock=Lock() actor_actions={} - - stop_edit_lock=Lock() stop=False - - vote_lock=Lock() n_voters=0 n_votes_reset=0 - - - # States of Env - env_data_lock=Lock() + obs=None rew=None done=None @@ -30,6 +19,13 @@ class BaseEconWrapper(): def __init__(self, econ: base_env.BaseEnvironment): self.env=econ + self.vote_lock=Lock() + + self.base_notification=Event() #Notification for Base + self.reset_notification=Event() #Notification for recievers + self.action_edit_lock=Lock() + self.stop_edit_lock=Lock() + self.env_data_lock=Lock() def register_vote(self): """Register reciever on base. Returns ID of Voter to pass on during blocking""" @@ -169,9 +165,9 @@ class BaseEconWrapper(): def reciever_request_reset(self): """Adds to vote count to reset. If limit is reached reset will occure""" - self.vote_lock.acquire() + #self.vote_lock.acquire() self.n_votes_reset+=1 - self.vote_lock.release() + # self.vote_lock.release() self.base_notification.set() #Alert base for action changes def reciever_block_reset(self): diff --git a/wrapper/sb3_econ_converter.py b/wrapper/sb3_econ_converter.py index dfcade2..5c4b71f 100644 --- a/wrapper/sb3_econ_converter.py +++ b/wrapper/sb3_econ_converter.py @@ -22,7 +22,7 @@ class SB3EconConverter(VecEnv, gym.Env): obs0["flat"] self.step_request_send=False self.auto_reset=auto_reset - self.observation_space=gym.spaces.Box(low=0,high=np.inf,shape=(len(obs0["flat"]),),dtype=np.float32) + self.observation_space=gym.spaces.Box(low=0,high=10,shape=(len(obs0["flat"]),),dtype=np.float32) super().__init__(self.num_envs, self.observation_space, self.action_space)