adding ai_economist for modding

This commit is contained in:
2023-01-12 16:41:38 +01:00
parent 0479a4f6a4
commit f177f8f0ba
85 changed files with 19373 additions and 2 deletions

View File

@@ -0,0 +1,663 @@
# Copyright (c) 2021, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root
# or https://opensource.org/licenses/BSD-3-Clause
from datetime import datetime
import GPUtil
import numpy as np
from ai_economist.foundation.base.base_component import (
BaseComponent,
component_registry,
)
try:
num_gpus_available = len(GPUtil.getAvailable())
print(f"Inside covid19_components.py: {num_gpus_available} GPUs are available.")
if num_gpus_available == 0:
print("No GPUs found! Running the simulation on a CPU.")
else:
from warp_drive.utils.constants import Constants
from warp_drive.utils.data_feed import DataFeed
_OBSERVATIONS = Constants.OBSERVATIONS
_ACTIONS = Constants.ACTIONS
except ModuleNotFoundError:
print(
"Warning: The 'WarpDrive' package is not found and cannot be used! "
"If you wish to use WarpDrive, please run "
"'pip install rl-warp-drive' first."
)
except ValueError:
print("No GPUs found! Running the simulation on a CPU.")
@component_registry.add
class ControlUSStateOpenCloseStatus(BaseComponent):
"""
Sets the open/close stringency levels for states.
Args:
n_stringency_levels (int): number of stringency levels the states can chose
from. (Must match the number in the model constants dictionary referenced by
the parent scenario.)
action_cooldown_period (int): action cooldown period in days.
Once a stringency level is set, the state(s) cannot switch to another level
for a certain number of days (referred to as the "action_cooldown_period")
"""
name = "ControlUSStateOpenCloseStatus"
required_entities = []
agent_subclasses = ["BasicMobileAgent"]
def __init__(
self,
*base_component_args,
n_stringency_levels=10,
action_cooldown_period=28,
**base_component_kwargs,
):
self.action_cooldown_period = action_cooldown_period
super().__init__(*base_component_args, **base_component_kwargs)
self.np_int_dtype = np.int32
self.n_stringency_levels = int(n_stringency_levels)
assert self.n_stringency_levels >= 2
self._checked_n_stringency_levels = False
self.masks = dict()
self.default_agent_action_mask = [1 for _ in range(self.n_stringency_levels)]
self.no_op_agent_action_mask = [0 for _ in range(self.n_stringency_levels)]
self.masks["a"] = np.repeat(
np.array(self.no_op_agent_action_mask)[:, np.newaxis],
self.n_agents,
axis=-1,
)
# (This will be overwritten during reset; see below)
self.action_in_cooldown_until = None
def get_additional_state_fields(self, agent_cls_name):
return {}
def additional_reset_steps(self):
# Store the times when the next set of actions can be taken.
self.action_in_cooldown_until = np.array(
[self.world.timestep for _ in range(self.n_agents)]
)
def get_n_actions(self, agent_cls_name):
if agent_cls_name == "BasicMobileAgent":
return self.n_stringency_levels
return None
def generate_masks(self, completions=0):
for agent in self.world.agents:
if self.world.use_real_world_policies:
self.masks["a"][:, agent.idx] = self.default_agent_action_mask
else:
if self.world.timestep < self.action_in_cooldown_until[agent.idx]:
# Keep masking the actions
self.masks["a"][:, agent.idx] = self.no_op_agent_action_mask
else: # self.world.timestep == self.action_in_cooldown_until[agent.idx]
# Cooldown period has ended; unmask the "subsequent" action
self.masks["a"][:, agent.idx] = self.default_agent_action_mask
return self.masks
def get_data_dictionary(self):
"""
Create a dictionary of data to push to the GPU (device).
"""
data_dict = DataFeed()
data_dict.add_data(
name="action_cooldown_period",
data=self.action_cooldown_period,
)
data_dict.add_data(
name="action_in_cooldown_until",
data=self.action_in_cooldown_until,
save_copy_and_apply_at_reset=True,
)
data_dict.add_data(
name="num_stringency_levels",
data=self.n_stringency_levels,
)
data_dict.add_data(
name="default_agent_action_mask",
data=[1] + self.default_agent_action_mask,
)
data_dict.add_data(
name="no_op_agent_action_mask",
data=[1] + self.no_op_agent_action_mask,
)
return data_dict
def get_tensor_dictionary(self):
"""
Create a dictionary of (Pytorch-accessible) data to push to the GPU (device).
"""
tensor_dict = DataFeed()
return tensor_dict
def component_step(self):
if self.world.use_cuda:
self.world.cuda_component_step[self.name](
self.world.cuda_data_manager.device_data("stringency_level"),
self.world.cuda_data_manager.device_data("action_cooldown_period"),
self.world.cuda_data_manager.device_data("action_in_cooldown_until"),
self.world.cuda_data_manager.device_data("default_agent_action_mask"),
self.world.cuda_data_manager.device_data("no_op_agent_action_mask"),
self.world.cuda_data_manager.device_data("num_stringency_levels"),
self.world.cuda_data_manager.device_data(f"{_ACTIONS}_a"),
self.world.cuda_data_manager.device_data(
f"{_OBSERVATIONS}_a_{self.name}-agent_policy_indicators"
),
self.world.cuda_data_manager.device_data(
f"{_OBSERVATIONS}_a_action_mask"
),
self.world.cuda_data_manager.device_data(
f"{_OBSERVATIONS}_p_{self.name}-agent_policy_indicators"
),
self.world.cuda_data_manager.device_data("_timestep_"),
self.world.cuda_data_manager.meta_info("n_agents"),
self.world.cuda_data_manager.meta_info("episode_length"),
block=self.world.cuda_function_manager.block,
grid=self.world.cuda_function_manager.grid,
)
else:
if not self._checked_n_stringency_levels:
if self.n_stringency_levels != self.world.n_stringency_levels:
raise ValueError(
"The environment was not configured correctly. For the given "
"model fit, you need to set the number of stringency levels to "
"be {}".format(self.world.n_stringency_levels)
)
self._checked_n_stringency_levels = True
for agent in self.world.agents:
if self.world.use_real_world_policies:
# Use the action taken in the previous timestep
action = self.world.real_world_stringency_policy[
self.world.timestep - 1, agent.idx
]
else:
action = agent.get_component_action(self.name)
assert 0 <= action <= self.n_stringency_levels
# We only update the stringency level if the action is not a NO-OP.
self.world.global_state["Stringency Level"][
self.world.timestep, agent.idx
] = (
self.world.global_state["Stringency Level"][
self.world.timestep - 1, agent.idx
]
* (action == 0)
+ action
)
agent.state[
"Current Open Close Stringency Level"
] = self.world.global_state["Stringency Level"][
self.world.timestep, agent.idx
]
# Check if the action cooldown period has ended, and set the next
# time until action cooldown. If current action is a no-op
# (i.e., no new action was taken), the agent can take an action
# in the very next step, otherwise it needs to wait for
# self.action_cooldown_period steps. When in the action cooldown
# period, whatever actions the agents take are masked out,
# so it's always a NO-OP (see generate_masks() above)
# The logic below influences the action masks.
if self.world.timestep == self.action_in_cooldown_until[agent.idx] + 1:
if action == 0: # NO-OP
self.action_in_cooldown_until[agent.idx] += 1
else:
self.action_in_cooldown_until[
agent.idx
] += self.action_cooldown_period
def generate_observations(self):
# Normalized observations
obs_dict = dict()
agent_policy_indicators = self.world.global_state["Stringency Level"][
self.world.timestep
]
obs_dict["a"] = {
"agent_policy_indicators": agent_policy_indicators
/ self.n_stringency_levels
}
obs_dict[self.world.planner.idx] = {
"agent_policy_indicators": agent_policy_indicators
/ self.n_stringency_levels
}
return obs_dict
@component_registry.add
class FederalGovernmentSubsidy(BaseComponent):
"""
Args:
subsidy_interval (int): The number of days over which the total subsidy amount
is evenly rolled out.
Note: shortening the subsidy interval increases the total amount of money
that the planner could possibly spend. For instance, if the subsidy
interval is 30, the planner can create a subsidy every 30 days.
num_subsidy_levels (int): The number of subsidy levels.
Note: with max_annual_subsidy_per_person=10000, one round of subsidies at
the maximum subsidy level equals an expenditure of roughly $3.3 trillion
(given the US population of 330 million).
If the planner chooses the maximum subsidy amount, the $3.3 trillion
is rolled out gradually over the subsidy interval.
max_annual_subsidy_per_person (float): The maximum annual subsidy that may be
allocated per person.
"""
name = "FederalGovernmentSubsidy"
required_entities = []
agent_subclasses = ["BasicPlanner"]
def __init__(
self,
*base_component_args,
subsidy_interval=90,
num_subsidy_levels=20,
max_annual_subsidy_per_person=20000,
**base_component_kwargs,
):
self.subsidy_interval = int(subsidy_interval)
assert self.subsidy_interval >= 1
self.num_subsidy_levels = int(num_subsidy_levels)
assert self.num_subsidy_levels >= 1
self.max_annual_subsidy_per_person = float(max_annual_subsidy_per_person)
assert self.max_annual_subsidy_per_person >= 0
self.np_int_dtype = np.int32
# (This will be overwritten during component_step; see below)
self._subsidy_amount_per_level = None
self._subsidy_level_array = None
super().__init__(*base_component_args, **base_component_kwargs)
self.default_planner_action_mask = [1 for _ in range(self.num_subsidy_levels)]
self.no_op_planner_action_mask = [0 for _ in range(self.num_subsidy_levels)]
# (This will be overwritten during reset; see below)
self.max_daily_subsidy_per_state = np.array(
self.n_agents, dtype=self.np_int_dtype
)
def get_additional_state_fields(self, agent_cls_name):
if agent_cls_name == "BasicPlanner":
return {"Total Subsidy": 0, "Current Subsidy Level": 0}
return {}
def additional_reset_steps(self):
# Pre-compute maximum state-specific subsidy levels
self.max_daily_subsidy_per_state = (
self.world.us_state_population * self.max_annual_subsidy_per_person / 365
)
def get_n_actions(self, agent_cls_name):
if agent_cls_name == "BasicPlanner":
# Number of non-zero subsidy levels
# (the action 0 pertains to the no-subsidy case)
return self.num_subsidy_levels
return None
def generate_masks(self, completions=0):
masks = {}
if self.world.use_real_world_policies:
masks[self.world.planner.idx] = self.default_planner_action_mask
else:
if self.world.timestep % self.subsidy_interval == 0:
masks[self.world.planner.idx] = self.default_planner_action_mask
else:
masks[self.world.planner.idx] = self.no_op_planner_action_mask
return masks
def get_data_dictionary(self):
"""
Create a dictionary of data to push to the device
"""
data_dict = DataFeed()
data_dict.add_data(
name="subsidy_interval",
data=self.subsidy_interval,
)
data_dict.add_data(
name="num_subsidy_levels",
data=self.num_subsidy_levels,
)
data_dict.add_data(
name="max_daily_subsidy_per_state",
data=self.max_daily_subsidy_per_state,
)
data_dict.add_data(
name="default_planner_action_mask",
data=[1] + self.default_planner_action_mask,
)
data_dict.add_data(
name="no_op_planner_action_mask",
data=[1] + self.no_op_planner_action_mask,
)
return data_dict
def get_tensor_dictionary(self):
"""
Create a dictionary of (Pytorch-accessible) data to push to the device
"""
tensor_dict = DataFeed()
return tensor_dict
def component_step(self):
if self.world.use_cuda:
self.world.cuda_component_step[self.name](
self.world.cuda_data_manager.device_data("subsidy_level"),
self.world.cuda_data_manager.device_data("subsidy"),
self.world.cuda_data_manager.device_data("subsidy_interval"),
self.world.cuda_data_manager.device_data("num_subsidy_levels"),
self.world.cuda_data_manager.device_data("max_daily_subsidy_per_state"),
self.world.cuda_data_manager.device_data("default_planner_action_mask"),
self.world.cuda_data_manager.device_data("no_op_planner_action_mask"),
self.world.cuda_data_manager.device_data(f"{_ACTIONS}_p"),
self.world.cuda_data_manager.device_data(
f"{_OBSERVATIONS}_a_{self.name}-t_until_next_subsidy"
),
self.world.cuda_data_manager.device_data(
f"{_OBSERVATIONS}_a_{self.name}-current_subsidy_level"
),
self.world.cuda_data_manager.device_data(
f"{_OBSERVATIONS}_p_{self.name}-t_until_next_subsidy"
),
self.world.cuda_data_manager.device_data(
f"{_OBSERVATIONS}_p_{self.name}-current_subsidy_level"
),
self.world.cuda_data_manager.device_data(
f"{_OBSERVATIONS}_p_action_mask"
),
self.world.cuda_data_manager.device_data("_timestep_"),
self.world.cuda_data_manager.meta_info("n_agents"),
self.world.cuda_data_manager.meta_info("episode_length"),
block=self.world.cuda_function_manager.block,
grid=self.world.cuda_function_manager.grid,
)
else:
if self.world.use_real_world_policies:
if self._subsidy_amount_per_level is None:
self._subsidy_amount_per_level = (
self.world.us_population
* self.max_annual_subsidy_per_person
/ self.num_subsidy_levels
* self.subsidy_interval
/ 365
)
self._subsidy_level_array = np.zeros((self._episode_length + 1))
# Use the action taken in the previous timestep
current_subsidy_amount = self.world.real_world_subsidy[
self.world.timestep - 1
]
if current_subsidy_amount > 0:
_subsidy_level = np.round(
(current_subsidy_amount / self._subsidy_amount_per_level)
)
for t_idx in range(
self.world.timestep - 1,
min(
len(self._subsidy_level_array),
self.world.timestep - 1 + self.subsidy_interval,
),
):
self._subsidy_level_array[t_idx] += _subsidy_level
subsidy_level = self._subsidy_level_array[self.world.timestep - 1]
else:
# Update the subsidy level only every self.subsidy_interval, since the
# other actions are masked out.
if (self.world.timestep - 1) % self.subsidy_interval == 0:
subsidy_level = self.world.planner.get_component_action(self.name)
else:
subsidy_level = self.world.planner.state["Current Subsidy Level"]
assert 0 <= subsidy_level <= self.num_subsidy_levels
self.world.planner.state["Current Subsidy Level"] = np.array(
subsidy_level
).astype(self.np_int_dtype)
# Update subsidy level
subsidy_level_frac = subsidy_level / self.num_subsidy_levels
daily_statewise_subsidy = (
subsidy_level_frac * self.max_daily_subsidy_per_state
)
self.world.global_state["Subsidy"][
self.world.timestep
] = daily_statewise_subsidy
self.world.planner.state["Total Subsidy"] += np.sum(daily_statewise_subsidy)
def generate_observations(self):
# Allow the agents/planner to know when the next subsidy might come.
# Obs should = 0 when the next timestep could include a subsidy
t_since_last_subsidy = self.world.timestep % self.subsidy_interval
# (this is normalized to 0<-->1)
t_until_next_subsidy = self.subsidy_interval - t_since_last_subsidy
t_vec = t_until_next_subsidy * np.ones(self.n_agents)
current_subsidy_level = self.world.planner.state["Current Subsidy Level"]
sl_vec = current_subsidy_level * np.ones(self.n_agents)
# Normalized observations
obs_dict = dict()
obs_dict["a"] = {
"t_until_next_subsidy": t_vec / self.subsidy_interval,
"current_subsidy_level": sl_vec / self.num_subsidy_levels,
}
obs_dict[self.world.planner.idx] = {
"t_until_next_subsidy": t_until_next_subsidy / self.subsidy_interval,
"current_subsidy_level": current_subsidy_level / self.num_subsidy_levels,
}
return obs_dict
@component_registry.add
class VaccinationCampaign(BaseComponent):
"""
Implements a (passive) component for delivering vaccines to agents once a certain
amount of time has elapsed.
Args:
daily_vaccines_per_million_people (int): The number of vaccines available per
million people everyday.
delivery_interval (int): The number of days between vaccine deliveries.
vaccine_delivery_start_date (string): The date (YYYY-MM-DD) when the
vaccination begins.
"""
name = "VaccinationCampaign"
required_entities = []
agent_subclasses = ["BasicMobileAgent"]
def __init__(
self,
*base_component_args,
daily_vaccines_per_million_people=4500,
delivery_interval=1,
vaccine_delivery_start_date="2020-12-22",
observe_rate=False,
**base_component_kwargs,
):
self.daily_vaccines_per_million_people = int(daily_vaccines_per_million_people)
assert 0 <= self.daily_vaccines_per_million_people <= 1e6
self.delivery_interval = int(delivery_interval)
assert 1 <= self.delivery_interval <= 5000
try:
self.vaccine_delivery_start_date = datetime.strptime(
vaccine_delivery_start_date, "%Y-%m-%d"
)
except ValueError:
print("Incorrect data format, should be YYYY-MM-DD")
# (This will be overwritten during component_step (see below))
self._time_when_vaccine_delivery_begins = None
self.np_int_dtype = np.int32
self.observe_rate = bool(observe_rate)
super().__init__(*base_component_args, **base_component_kwargs)
# (This will be overwritten during reset; see below)
self._num_vaccines_per_delivery = None
# Convenience for obs (see usage below):
self._t_first_delivery = None
@property
def num_vaccines_per_delivery(self):
if self._num_vaccines_per_delivery is None:
# Pre-compute dispersal numbers
millions_of_residents = self.world.us_state_population / 1e6
daily_vaccines = (
millions_of_residents * self.daily_vaccines_per_million_people
)
num_vaccines_per_delivery = np.floor(
self.delivery_interval * daily_vaccines
)
self._num_vaccines_per_delivery = np.array(
num_vaccines_per_delivery, dtype=self.np_int_dtype
)
return self._num_vaccines_per_delivery
@property
def time_when_vaccine_delivery_begins(self):
if self._time_when_vaccine_delivery_begins is None:
self._time_when_vaccine_delivery_begins = (
self.vaccine_delivery_start_date - self.world.start_date
).days
return self._time_when_vaccine_delivery_begins
def get_additional_state_fields(self, agent_cls_name):
if agent_cls_name == "BasicMobileAgent":
return {"Total Vaccinated": 0, "Vaccines Available": 0}
return {}
def additional_reset_steps(self):
pass
def get_n_actions(self, agent_cls_name):
return # Passive component
def generate_masks(self, completions=0):
return {} # Passive component
def get_data_dictionary(self):
"""
Create a dictionary of data to push to the device
"""
data_dict = DataFeed()
data_dict.add_data(
name="num_vaccines_per_delivery",
data=self.num_vaccines_per_delivery,
)
data_dict.add_data(
name="delivery_interval",
data=self.delivery_interval,
)
data_dict.add_data(
name="time_when_vaccine_delivery_begins",
data=self.time_when_vaccine_delivery_begins,
)
data_dict.add_data(
name="num_vaccines_available_t",
data=np.zeros(self.n_agents),
save_copy_and_apply_at_reset=True,
)
return data_dict
def get_tensor_dictionary(self):
"""
Create a dictionary of (Pytorch-accessible) data to push to the device
"""
tensor_dict = DataFeed()
return tensor_dict
def component_step(self):
if self.world.use_cuda:
self.world.cuda_component_step[self.name](
self.world.cuda_data_manager.device_data("vaccinated"),
self.world.cuda_data_manager.device_data("num_vaccines_per_delivery"),
self.world.cuda_data_manager.device_data("num_vaccines_available_t"),
self.world.cuda_data_manager.device_data("delivery_interval"),
self.world.cuda_data_manager.device_data(
"time_when_vaccine_delivery_begins"
),
self.world.cuda_data_manager.device_data(
f"{_OBSERVATIONS}_a_{self.name}-t_until_next_vaccines"
),
self.world.cuda_data_manager.device_data(
f"{_OBSERVATIONS}_p_{self.name}-t_until_next_vaccines"
),
self.world.cuda_data_manager.device_data("_timestep_"),
self.world.cuda_data_manager.meta_info("n_agents"),
self.world.cuda_data_manager.meta_info("episode_length"),
block=self.world.cuda_function_manager.block,
grid=self.world.cuda_function_manager.grid,
)
else:
# Do nothing if vaccines are not available yet
if self.world.timestep < self.time_when_vaccine_delivery_begins:
return
# Do nothing if this is not the start of a delivery interval.
# Vaccines are delivered at the start of each interval.
if (self.world.timestep % self.delivery_interval) != 0:
return
# Deliver vaccines to each state
for aidx, vaccines in enumerate(self.num_vaccines_per_delivery):
self.world.agents[aidx].state["Vaccines Available"] += vaccines
def generate_observations(self):
# Allow the agents/planner to know when the next vaccines might come.
# Obs should = 0 when the next timestep will deliver vaccines
# (this is normalized to 0<-->1)
if self._t_first_delivery is None:
self._t_first_delivery = int(self.time_when_vaccine_delivery_begins)
while (self._t_first_delivery % self.delivery_interval) != 0:
self._t_first_delivery += 1
next_t = self.world.timestep + 1
if next_t <= self._t_first_delivery:
t_until_next_vac = np.minimum(
1, (self._t_first_delivery - next_t) / self.delivery_interval
)
next_vax_rate = 0.0
else:
t_since_last_vac = next_t % self.delivery_interval
t_until_next_vac = self.delivery_interval - t_since_last_vac
next_vax_rate = self.daily_vaccines_per_million_people / 1e6
t_vec = t_until_next_vac * np.ones(self.n_agents)
r_vec = next_vax_rate * np.ones(self.n_agents)
# Normalized observations
obs_dict = dict()
obs_dict["a"] = {"t_until_next_vaccines": t_vec / self.delivery_interval}
obs_dict[self.world.planner.idx] = {
"t_until_next_vaccines": t_until_next_vac / self.delivery_interval
}
if self.observe_rate:
obs_dict["a"]["next_vaccination_rate"] = r_vec
obs_dict["p"]["next_vaccination_rate"] = float(next_vax_rate)
return obs_dict