diff --git a/ai_economist/foundation/base/base_agent.py b/ai_economist/foundation/base/base_agent.py index b5a627c..2a6cb31 100644 --- a/ai_economist/foundation/base/base_agent.py +++ b/ai_economist/foundation/base/base_agent.py @@ -81,7 +81,8 @@ class BaseAgent: def register_inventory(self, resources): """Used during environment construction to populate inventory/escrow fields.""" - assert not self._registered_inventory + if self._registered_inventory: + return for entity_name in resources: self.inventory[entity_name] = 0 self.escrow[entity_name] = 0 @@ -89,7 +90,8 @@ class BaseAgent: def register_endogenous(self, endogenous): """Used during environment construction to populate endogenous state fields.""" - assert not self._registered_endogenous + if self._registered_endogenous: + return for entity_name in endogenous: self.endogenous[entity_name] = 0 self._registered_endogenous = True @@ -115,7 +117,8 @@ class BaseAgent: def register_components(self, components): """Used during environment construction to set up state/action spaces.""" - assert not self._registered_components + if self._registered_components: + return for component in components: n = component.get_n_actions(self.name) if n is None: diff --git a/ai_economist/foundation/base/world.py b/ai_economist/foundation/base/world.py index a4d5029..cf0b61f 100644 --- a/ai_economist/foundation/base/world.py +++ b/ai_economist/foundation/base/world.py @@ -91,7 +91,10 @@ class Maps: else: raise NotImplementedError - + self.reset_agent_maps(n_agents) + + def reset_agent_maps(self,n_agents): + self.n_agents=n_agents self._idx_map = np.stack( [i * np.ones(shape=self.size) for i in range(self.n_agents)] ) @@ -378,17 +381,8 @@ class World: self.multi_action_mode_planner = bool(multi_action_mode_planner) self._agent_class_idx_map={} #create agents - self.agent_composition=agent_composition - self.n_agents=0 - self._agents = [] - for k,v in agent_composition.items(): - self._agent_class_idx_map[k]=[] - for offset in range(v): - agent_class=agent_registry.get(k) - agent=agent_class(self.n_agents,self.multi_action_mode_agents) - self._agents.append(agent) - self._agent_class_idx_map[k].append(str(self.n_agents)) - self.n_agents+=1 + self.create_agents(agent_composition) + self.maps = Maps(world_size, self.n_agents, world_resources, world_landmarks) planner_class = agent_registry.get("BasicPlanner") @@ -402,6 +396,37 @@ class World: self.cuda_function_manager = None self.cuda_data_manager = None + def create_agents(self, agent_composition): + """create_agents creates the world agent db with the given compostition.""" + self.agent_composition=agent_composition + self.n_agents=0 + self._agents = [] + for k,v in agent_composition.items(): + self._agent_class_idx_map[k]=[] + for offset in range(v): + agent_class=agent_registry.get(k) + agent=agent_class(self.n_agents,self.multi_action_mode_agents) + self._agents.append(agent) + self._agent_class_idx_map[k].append(str(self.n_agents)) + self.n_agents+=1 + + def apply_agent_db(self): + """Applys current agent db into lookup maps inside world and map itself. Enables insertion of new agents into existing env.""" + self.n_agents=len(self._agents) + self._agent_class_idx_map={} + self.maps.reset_agent_maps(self.n_agents) # reset map lookups + #create mapping dict + for idx in range(self.n_agents): + cls=self.get_agent_class(idx) + agent=self._agents[idx] + if cls in self._agent_class_idx_map: + self._agent_class_idx_map[cls].append(idx) + else: + self._agent_class_idx_map[cls]=[idx] + # apply agent locs db to maps + if "loc" in agent.state: + self.maps.set_agent_loc(agent,*agent.loc) + @property def agents(self): """Return a list of the agent objects in the world (sorted by index).""" diff --git a/ai_economist/foundation/entities/resources.py b/ai_economist/foundation/entities/resources.py index fe6693c..cf15a44 100644 --- a/ai_economist/foundation/entities/resources.py +++ b/ai_economist/foundation/entities/resources.py @@ -65,20 +65,3 @@ class Coin(Resource): color = np.array([229, 211, 82]) / 255.0 collectible = False -@resource_registry.add -class RawGem(Resource): - """Raw Gem that can be processed further""" - - name = "Raw_Gem" - color = np.array([241, 233, 219]) / 255.0 - collectible = True - -@resource_registry.add -class Gem(Resource): - """Proccesed Gem. Craftable.""" - - name = "Gem" - color = np.array([241, 233, 219]) / 255.0 - collectible = False - craft_recp= {"Raw_Gem": 1} - craft_labour_base= 1 \ No newline at end of file diff --git a/components/crafting.py b/components/crafting.py new file mode 100644 index 0000000..c48544e --- /dev/null +++ b/components/crafting.py @@ -0,0 +1,264 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import numpy as np + +from ai_economist.foundation.base.base_component import ( + BaseComponent, + component_registry, +) +from ai_economist.foundation.entities.resources import resource_registry + + +@component_registry.add +class Craft(BaseComponent): + """ + Allows mobile agents to build house landmarks in the world using stone and wood, + earning income. + + Can be configured to include heterogeneous building skill where agents earn + different levels of income when building. + + Args: + commodities (list(str)): list of commodities that can be crafted in the local world + payment_max_skill_multiplier (int): Maximum skill multiplier that an agent + can sample. Must be >= 1. Default is 1. + skill_dist (str): Distribution type for sampling skills. Default ("none") + gives all agents identical skill equal to a multiplier of 1. "pareto" and + "lognormal" sample skills from the associated distributions. + build_labor (float): Labor cost associated with building a house. + Must be >= 0. Default is 10. + """ + + name = "Craft" + component_type = "Build" + required_entities = ["Coin", "Labor"] + agent_subclasses = ["BasicMobileAgent"] + commodities=[] + def __init__( + self, + *base_component_args, + commodities=[], + max_skill_amount_benefit=1, + max_skill_labour_benefit=1, + skill_dist="none", + **base_component_kwargs + ): + #append commodities + for v in commodities: + res_class=resource_registry.get(v) + res=res_class() + if res.craft_recp!=None: + # is craftable + assert res.craft_recp!={} + assert res.craft_labour_base >= 0 + self.required_entities.append(v) + self.commodities.append(res) + + + self.max_skill_amount_benefit=max_skill_amount_benefit + self.max_skill_labour_benefit=max_skill_labour_benefit + + + assert self.max_skill_amount_benefit >= 1 + assert self.max_skill_labour_benefit <= 1 + + self.skill_dist = skill_dist.lower() + assert self.skill_dist in ["none", "pareto", "lognormal"] + + self.sampled_skills = {} + + self.builds = [] + super().__init__(*base_component_args, **base_component_kwargs) + + def agent_can_build(self, agent): + """Return True if agent can actually build in its current location.""" + # See if the agent has the resources necessary to complete the action + for resource, cost in self.resource_cost.items(): + if agent.state["inventory"][resource] < cost: + return False + return True + + # Required methods for implementing components + # -------------------------------------------- + + def get_n_actions(self, agent_cls_name): + """ + See base_component.py for detailed description. + + Add a single action (build) for mobile agents. + """ + # This component adds 1 action that mobile agents can take: build a house + if agent_cls_name in self.agent_subclasses: + return 1 + + return None + + def get_additional_state_fields(self, agent_cls_name): + """ + See base_component.py for detailed description. + + For mobile agents, add state fields for building skill. + """ + if agent_cls_name not in self.agent_subclasses: + return {} + if agent_cls_name == "BasicMobileAgent": + return {"build_payment": float(self.payment), "build_skill": 1} + raise NotImplementedError + + def component_step(self): + """ + See base_component.py for detailed description. + + Convert stone+wood to house+coin for agents that choose to build and can. + """ + world = self.world + build = [] + # Apply any building actions taken by the mobile agents + for agent in world.get_random_order_agents(): + + action = agent.get_component_action(self.name) + + # This component doesn't apply to this agent! + if action is None: + continue + + # NO-OP! + if action == 0: + pass + + # Build! (If you can.) + elif action == 1: + if self.agent_can_build(agent): + # Remove the resources + for resource, cost in self.resource_cost.items(): + agent.state["inventory"][resource] -= cost + + # Receive payment for the house + agent.state["inventory"]["Coin"] += agent.state["build_payment"] + + # Incur the labor cost for building + agent.state["endogenous"]["Labor"] += self.build_labor + + build.append( + { + "builder": agent.idx, + "build_skill": self.sampled_skills[agent.idx], + "income": float(agent.state["build_payment"]), + } + ) + else: + agent.bad_action=True + else: + raise ValueError + + self.builds.append(build) + + def generate_observations(self): + """ + See base_component.py for detailed description. + + Here, agents observe their build skill. The planner does not observe anything + from this component. + """ + + obs_dict = dict() + for agent in self.world.agents: + if agent.name in self.agent_subclasses: + obs_dict[agent.idx] = { + "build_payment": agent.state["build_payment"] / self.payment, + "build_skill": self.sampled_skills[agent.idx], + } + + return obs_dict + + def generate_masks(self, completions=0): + """ + See base_component.py for detailed description. + + Prevent building only if a landmark already occupies the agent's location. + """ + + masks = {} + # Mobile agents' build action is masked if they cannot build with their + # current location and/or endowment + for agent in self.world.agents: + masks[agent.idx] = np.array([self.agent_can_build(agent)]) + + return masks + + # For non-required customization + # ------------------------------ + + def get_metrics(self): + """ + Metrics that capture what happened through this component. + + Returns: + metrics (dict): A dictionary of {"metric_name": metric_value}, + where metric_value is a scalar. + """ + world = self.world + + build_stats = {a.idx: {"n_builds": 0} for a in world.agents} + for builds in self.builds: + for build in builds: + idx = build["builder"] + build_stats[idx]["n_builds"] += 1 + + out_dict = {} + for a in world.agents: + for k, v in build_stats[a.idx].items(): + out_dict["{}/{}".format(a.idx, k)] = v + + num_houses = np.sum(world.maps.get("House") > 0) + out_dict["total_builds"] = num_houses + + return out_dict + + def additional_reset_steps(self): + """ + See base_component.py for detailed description. + + Re-sample agents' building skills. + """ + world = self.world + + self.sampled_skills = {agent.idx: 1 for agent in world.agents} + + PMSM = self.payment_max_skill_multiplier + + for agent in world.agents: + if self.skill_dist == "none": + sampled_skill = 1 + pay_rate = 1 + elif self.skill_dist == "pareto": + sampled_skill = np.random.pareto(4) + pay_rate = np.minimum(PMSM, (PMSM - 1) * sampled_skill + 1) + elif self.skill_dist == "lognormal": + sampled_skill = np.random.lognormal(-1, 0.5) + pay_rate = np.minimum(PMSM, (PMSM - 1) * sampled_skill + 1) + else: + raise NotImplementedError + + agent.state["build_payment"] = float(pay_rate * self.payment) + agent.state["build_skill"] = float(sampled_skill) + + self.sampled_skills[agent.idx] = sampled_skill + + self.builds = [] + + def get_dense_log(self): + """ + Log builds. + + Returns: + builds (list): A list of build events. Each entry corresponds to a single + timestep and contains a description of any builds that occurred on + that timestep. + + """ + return self.builds diff --git a/main.py b/main.py index 7908c37..2268cc2 100644 --- a/main.py +++ b/main.py @@ -48,7 +48,7 @@ env_config = { # ===== STANDARD ARGUMENTS ====== # kwargs that are used by every Scenario class (i.e. defined in BaseEnvironment) 'agent_composition': {"BasicMobileAgent": 20}, # Number of non-planner agents (must be > 1) - 'world_size': [1, 1], # [Height, Width] of the env world + 'world_size': [5, 5], # [Height, Width] of the env world 'episode_length': 256, # Number of timesteps per episode 'allow_observation_scaling': True, 'dense_log_frequency': 100, diff --git a/resources/__init_.py b/resources/__init_.py new file mode 100644 index 0000000..701a593 --- /dev/null +++ b/resources/__init_.py @@ -0,0 +1,3 @@ +from . import ( + resources +) \ No newline at end of file diff --git a/resources/resources.py b/resources/resources.py new file mode 100644 index 0000000..c45e021 --- /dev/null +++ b/resources/resources.py @@ -0,0 +1,23 @@ + +import numpy as np + + +from ai_economist.foundation.entities.resources import Resource, resource_registry + +@resource_registry.add +class RawGem(Resource): + """Raw Gem that can be processed further""" + + name = "Raw_Gem" + color = np.array([241, 233, 219]) / 255.0 + collectible = True + +@resource_registry.add +class Gem(Resource): + """Proccesed Gem. Craftable.""" + + name = "Gem" + color = np.array([241, 233, 219]) / 255.0 + collectible = False + craft_recp= {"Raw_Gem": 1} + craft_labour_base= 1 \ No newline at end of file