adding ai_economist for modding

2023-01-12 16:41:38 +01:00
parent 0479a4f6a4
commit f177f8f0ba
85 changed files with 19373 additions and 2 deletions
--- a/ai_economist/foundation/components/covid19_components.py
+++ b/ai_economist/foundation/components/covid19_components.py
@@ -0,0 +1,663 @@
+# Copyright (c) 2021, salesforce.com, inc.
+# All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# For full license text, see the LICENSE file in the repo root
+# or https://opensource.org/licenses/BSD-3-Clause
+
+from datetime import datetime
+
+import GPUtil
+import numpy as np
+
+from ai_economist.foundation.base.base_component import (
+    BaseComponent,
+    component_registry,
+)
+
+try:
+    num_gpus_available = len(GPUtil.getAvailable())
+    print(f"Inside covid19_components.py: {num_gpus_available} GPUs are available.")
+    if num_gpus_available == 0:
+        print("No GPUs found! Running the simulation on a CPU.")
+    else:
+        from warp_drive.utils.constants import Constants
+        from warp_drive.utils.data_feed import DataFeed
+
+        _OBSERVATIONS = Constants.OBSERVATIONS
+        _ACTIONS = Constants.ACTIONS
+except ModuleNotFoundError:
+    print(
+        "Warning: The 'WarpDrive' package is not found and cannot be used! "
+        "If you wish to use WarpDrive, please run "
+        "'pip install rl-warp-drive' first."
+    )
+except ValueError:
+    print("No GPUs found! Running the simulation on a CPU.")
+
+
+@component_registry.add
+class ControlUSStateOpenCloseStatus(BaseComponent):
+    """
+    Sets the open/close stringency levels for states.
+    Args:
+        n_stringency_levels (int): number of stringency levels the states can chose
+            from. (Must match the number in the model constants dictionary referenced by
+            the parent scenario.)
+        action_cooldown_period (int): action cooldown period in days.
+            Once a stringency level is set, the state(s) cannot switch to another level
+            for a certain number of days (referred to as the "action_cooldown_period")
+    """
+
+    name = "ControlUSStateOpenCloseStatus"
+    required_entities = []
+    agent_subclasses = ["BasicMobileAgent"]
+
+    def __init__(
+        self,
+        *base_component_args,
+        n_stringency_levels=10,
+        action_cooldown_period=28,
+        **base_component_kwargs,
+    ):
+
+        self.action_cooldown_period = action_cooldown_period
+        super().__init__(*base_component_args, **base_component_kwargs)
+        self.np_int_dtype = np.int32
+
+        self.n_stringency_levels = int(n_stringency_levels)
+        assert self.n_stringency_levels >= 2
+        self._checked_n_stringency_levels = False
+
+        self.masks = dict()
+        self.default_agent_action_mask = [1 for _ in range(self.n_stringency_levels)]
+        self.no_op_agent_action_mask = [0 for _ in range(self.n_stringency_levels)]
+        self.masks["a"] = np.repeat(
+            np.array(self.no_op_agent_action_mask)[:, np.newaxis],
+            self.n_agents,
+            axis=-1,
+        )
+
+        # (This will be overwritten during reset; see below)
+        self.action_in_cooldown_until = None
+
+    def get_additional_state_fields(self, agent_cls_name):
+        return {}
+
+    def additional_reset_steps(self):
+        # Store the times when the next set of actions can be taken.
+        self.action_in_cooldown_until = np.array(
+            [self.world.timestep for _ in range(self.n_agents)]
+        )
+
+    def get_n_actions(self, agent_cls_name):
+        if agent_cls_name == "BasicMobileAgent":
+            return self.n_stringency_levels
+        return None
+
+    def generate_masks(self, completions=0):
+        for agent in self.world.agents:
+            if self.world.use_real_world_policies:
+                self.masks["a"][:, agent.idx] = self.default_agent_action_mask
+            else:
+                if self.world.timestep < self.action_in_cooldown_until[agent.idx]:
+                    # Keep masking the actions
+                    self.masks["a"][:, agent.idx] = self.no_op_agent_action_mask
+                else:  # self.world.timestep == self.action_in_cooldown_until[agent.idx]
+                    # Cooldown period has ended; unmask the "subsequent" action
+                    self.masks["a"][:, agent.idx] = self.default_agent_action_mask
+        return self.masks
+
+    def get_data_dictionary(self):
+        """
+        Create a dictionary of data to push to the GPU (device).
+        """
+        data_dict = DataFeed()
+        data_dict.add_data(
+            name="action_cooldown_period",
+            data=self.action_cooldown_period,
+        )
+        data_dict.add_data(
+            name="action_in_cooldown_until",
+            data=self.action_in_cooldown_until,
+            save_copy_and_apply_at_reset=True,
+        )
+        data_dict.add_data(
+            name="num_stringency_levels",
+            data=self.n_stringency_levels,
+        )
+        data_dict.add_data(
+            name="default_agent_action_mask",
+            data=[1] + self.default_agent_action_mask,
+        )
+        data_dict.add_data(
+            name="no_op_agent_action_mask",
+            data=[1] + self.no_op_agent_action_mask,
+        )
+        return data_dict
+
+    def get_tensor_dictionary(self):
+        """
+        Create a dictionary of (Pytorch-accessible) data to push to the GPU (device).
+        """
+        tensor_dict = DataFeed()
+        return tensor_dict
+
+    def component_step(self):
+        if self.world.use_cuda:
+            self.world.cuda_component_step[self.name](
+                self.world.cuda_data_manager.device_data("stringency_level"),
+                self.world.cuda_data_manager.device_data("action_cooldown_period"),
+                self.world.cuda_data_manager.device_data("action_in_cooldown_until"),
+                self.world.cuda_data_manager.device_data("default_agent_action_mask"),
+                self.world.cuda_data_manager.device_data("no_op_agent_action_mask"),
+                self.world.cuda_data_manager.device_data("num_stringency_levels"),
+                self.world.cuda_data_manager.device_data(f"{_ACTIONS}_a"),
+                self.world.cuda_data_manager.device_data(
+                    f"{_OBSERVATIONS}_a_{self.name}-agent_policy_indicators"
+                ),
+                self.world.cuda_data_manager.device_data(
+                    f"{_OBSERVATIONS}_a_action_mask"
+                ),
+                self.world.cuda_data_manager.device_data(
+                    f"{_OBSERVATIONS}_p_{self.name}-agent_policy_indicators"
+                ),
+                self.world.cuda_data_manager.device_data("_timestep_"),
+                self.world.cuda_data_manager.meta_info("n_agents"),
+                self.world.cuda_data_manager.meta_info("episode_length"),
+                block=self.world.cuda_function_manager.block,
+                grid=self.world.cuda_function_manager.grid,
+            )
+        else:
+            if not self._checked_n_stringency_levels:
+                if self.n_stringency_levels != self.world.n_stringency_levels:
+                    raise ValueError(
+                        "The environment was not configured correctly. For the given "
+                        "model fit, you need to set the number of stringency levels to "
+                        "be {}".format(self.world.n_stringency_levels)
+                    )
+                self._checked_n_stringency_levels = True
+
+            for agent in self.world.agents:
+                if self.world.use_real_world_policies:
+                    # Use the action taken in the previous timestep
+                    action = self.world.real_world_stringency_policy[
+                        self.world.timestep - 1, agent.idx
+                    ]
+                else:
+                    action = agent.get_component_action(self.name)
+                assert 0 <= action <= self.n_stringency_levels
+
+                # We only update the stringency level if the action is not a NO-OP.
+                self.world.global_state["Stringency Level"][
+                    self.world.timestep, agent.idx
+                ] = (
+                    self.world.global_state["Stringency Level"][
+                        self.world.timestep - 1, agent.idx
+                    ]
+                    * (action == 0)
+                    + action
+                )
+
+                agent.state[
+                    "Current Open Close Stringency Level"
+                ] = self.world.global_state["Stringency Level"][
+                    self.world.timestep, agent.idx
+                ]
+
+                # Check if the action cooldown period has ended, and set the next
+                # time until action cooldown. If current action is a no-op
+                # (i.e., no new action was taken), the agent can take an action
+                # in the very next step, otherwise it needs to wait for
+                # self.action_cooldown_period steps. When in the action cooldown
+                # period, whatever actions the agents take are masked out,
+                # so it's always a NO-OP (see generate_masks() above)
+                # The logic below influences the action masks.
+                if self.world.timestep == self.action_in_cooldown_until[agent.idx] + 1:
+                    if action == 0:  # NO-OP
+                        self.action_in_cooldown_until[agent.idx] += 1
+                    else:
+                        self.action_in_cooldown_until[
+                            agent.idx
+                        ] += self.action_cooldown_period
+
+    def generate_observations(self):
+
+        # Normalized observations
+        obs_dict = dict()
+        agent_policy_indicators = self.world.global_state["Stringency Level"][
+            self.world.timestep
+        ]
+        obs_dict["a"] = {
+            "agent_policy_indicators": agent_policy_indicators
+            / self.n_stringency_levels
+        }
+        obs_dict[self.world.planner.idx] = {
+            "agent_policy_indicators": agent_policy_indicators
+            / self.n_stringency_levels
+        }
+
+        return obs_dict
+
+
+@component_registry.add
+class FederalGovernmentSubsidy(BaseComponent):
+    """
+    Args:
+        subsidy_interval (int): The number of days over which the total subsidy amount
+            is evenly rolled out.
+            Note: shortening the subsidy interval increases the total amount of money
+            that the planner could possibly spend. For instance, if the subsidy
+            interval is 30, the planner can create a subsidy every 30 days.
+        num_subsidy_levels (int): The number of subsidy levels.
+            Note: with max_annual_subsidy_per_person=10000, one round of subsidies at
+            the maximum subsidy level equals an expenditure of roughly $3.3 trillion
+            (given the US population of 330 million).
+            If the planner chooses the maximum subsidy amount, the $3.3 trillion
+            is rolled out gradually over the subsidy interval.
+        max_annual_subsidy_per_person (float): The maximum annual subsidy that may be
+            allocated per person.
+    """
+
+    name = "FederalGovernmentSubsidy"
+    required_entities = []
+    agent_subclasses = ["BasicPlanner"]
+
+    def __init__(
+        self,
+        *base_component_args,
+        subsidy_interval=90,
+        num_subsidy_levels=20,
+        max_annual_subsidy_per_person=20000,
+        **base_component_kwargs,
+    ):
+        self.subsidy_interval = int(subsidy_interval)
+        assert self.subsidy_interval >= 1
+
+        self.num_subsidy_levels = int(num_subsidy_levels)
+        assert self.num_subsidy_levels >= 1
+
+        self.max_annual_subsidy_per_person = float(max_annual_subsidy_per_person)
+        assert self.max_annual_subsidy_per_person >= 0
+
+        self.np_int_dtype = np.int32
+
+        # (This will be overwritten during component_step; see below)
+        self._subsidy_amount_per_level = None
+        self._subsidy_level_array = None
+
+        super().__init__(*base_component_args, **base_component_kwargs)
+
+        self.default_planner_action_mask = [1 for _ in range(self.num_subsidy_levels)]
+        self.no_op_planner_action_mask = [0 for _ in range(self.num_subsidy_levels)]
+
+        # (This will be overwritten during reset; see below)
+        self.max_daily_subsidy_per_state = np.array(
+            self.n_agents, dtype=self.np_int_dtype
+        )
+
+    def get_additional_state_fields(self, agent_cls_name):
+        if agent_cls_name == "BasicPlanner":
+            return {"Total Subsidy": 0, "Current Subsidy Level": 0}
+        return {}
+
+    def additional_reset_steps(self):
+        # Pre-compute maximum state-specific subsidy levels
+        self.max_daily_subsidy_per_state = (
+            self.world.us_state_population * self.max_annual_subsidy_per_person / 365
+        )
+
+    def get_n_actions(self, agent_cls_name):
+        if agent_cls_name == "BasicPlanner":
+            # Number of non-zero subsidy levels
+            # (the action 0 pertains to the no-subsidy case)
+            return self.num_subsidy_levels
+        return None
+
+    def generate_masks(self, completions=0):
+        masks = {}
+        if self.world.use_real_world_policies:
+            masks[self.world.planner.idx] = self.default_planner_action_mask
+        else:
+            if self.world.timestep % self.subsidy_interval == 0:
+                masks[self.world.planner.idx] = self.default_planner_action_mask
+            else:
+                masks[self.world.planner.idx] = self.no_op_planner_action_mask
+        return masks
+
+    def get_data_dictionary(self):
+        """
+        Create a dictionary of data to push to the device
+        """
+        data_dict = DataFeed()
+        data_dict.add_data(
+            name="subsidy_interval",
+            data=self.subsidy_interval,
+        )
+        data_dict.add_data(
+            name="num_subsidy_levels",
+            data=self.num_subsidy_levels,
+        )
+        data_dict.add_data(
+            name="max_daily_subsidy_per_state",
+            data=self.max_daily_subsidy_per_state,
+        )
+        data_dict.add_data(
+            name="default_planner_action_mask",
+            data=[1] + self.default_planner_action_mask,
+        )
+        data_dict.add_data(
+            name="no_op_planner_action_mask",
+            data=[1] + self.no_op_planner_action_mask,
+        )
+        return data_dict
+
+    def get_tensor_dictionary(self):
+        """
+        Create a dictionary of (Pytorch-accessible) data to push to the device
+        """
+        tensor_dict = DataFeed()
+        return tensor_dict
+
+    def component_step(self):
+        if self.world.use_cuda:
+            self.world.cuda_component_step[self.name](
+                self.world.cuda_data_manager.device_data("subsidy_level"),
+                self.world.cuda_data_manager.device_data("subsidy"),
+                self.world.cuda_data_manager.device_data("subsidy_interval"),
+                self.world.cuda_data_manager.device_data("num_subsidy_levels"),
+                self.world.cuda_data_manager.device_data("max_daily_subsidy_per_state"),
+                self.world.cuda_data_manager.device_data("default_planner_action_mask"),
+                self.world.cuda_data_manager.device_data("no_op_planner_action_mask"),
+                self.world.cuda_data_manager.device_data(f"{_ACTIONS}_p"),
+                self.world.cuda_data_manager.device_data(
+                    f"{_OBSERVATIONS}_a_{self.name}-t_until_next_subsidy"
+                ),
+                self.world.cuda_data_manager.device_data(
+                    f"{_OBSERVATIONS}_a_{self.name}-current_subsidy_level"
+                ),
+                self.world.cuda_data_manager.device_data(
+                    f"{_OBSERVATIONS}_p_{self.name}-t_until_next_subsidy"
+                ),
+                self.world.cuda_data_manager.device_data(
+                    f"{_OBSERVATIONS}_p_{self.name}-current_subsidy_level"
+                ),
+                self.world.cuda_data_manager.device_data(
+                    f"{_OBSERVATIONS}_p_action_mask"
+                ),
+                self.world.cuda_data_manager.device_data("_timestep_"),
+                self.world.cuda_data_manager.meta_info("n_agents"),
+                self.world.cuda_data_manager.meta_info("episode_length"),
+                block=self.world.cuda_function_manager.block,
+                grid=self.world.cuda_function_manager.grid,
+            )
+        else:
+            if self.world.use_real_world_policies:
+                if self._subsidy_amount_per_level is None:
+                    self._subsidy_amount_per_level = (
+                        self.world.us_population
+                        * self.max_annual_subsidy_per_person
+                        / self.num_subsidy_levels
+                        * self.subsidy_interval
+                        / 365
+                    )
+                    self._subsidy_level_array = np.zeros((self._episode_length + 1))
+                # Use the action taken in the previous timestep
+                current_subsidy_amount = self.world.real_world_subsidy[
+                    self.world.timestep - 1
+                ]
+                if current_subsidy_amount > 0:
+                    _subsidy_level = np.round(
+                        (current_subsidy_amount / self._subsidy_amount_per_level)
+                    )
+                    for t_idx in range(
+                        self.world.timestep - 1,
+                        min(
+                            len(self._subsidy_level_array),
+                            self.world.timestep - 1 + self.subsidy_interval,
+                        ),
+                    ):
+                        self._subsidy_level_array[t_idx] += _subsidy_level
+                subsidy_level = self._subsidy_level_array[self.world.timestep - 1]
+            else:
+                # Update the subsidy level only every self.subsidy_interval, since the
+                # other actions are masked out.
+                if (self.world.timestep - 1) % self.subsidy_interval == 0:
+                    subsidy_level = self.world.planner.get_component_action(self.name)
+                else:
+                    subsidy_level = self.world.planner.state["Current Subsidy Level"]
+
+            assert 0 <= subsidy_level <= self.num_subsidy_levels
+            self.world.planner.state["Current Subsidy Level"] = np.array(
+                subsidy_level
+            ).astype(self.np_int_dtype)
+
+            # Update subsidy level
+            subsidy_level_frac = subsidy_level / self.num_subsidy_levels
+            daily_statewise_subsidy = (
+                subsidy_level_frac * self.max_daily_subsidy_per_state
+            )
+
+            self.world.global_state["Subsidy"][
+                self.world.timestep
+            ] = daily_statewise_subsidy
+            self.world.planner.state["Total Subsidy"] += np.sum(daily_statewise_subsidy)
+
+    def generate_observations(self):
+        # Allow the agents/planner to know when the next subsidy might come.
+        # Obs should = 0 when the next timestep could include a subsidy
+        t_since_last_subsidy = self.world.timestep % self.subsidy_interval
+        # (this is normalized to 0<-->1)
+        t_until_next_subsidy = self.subsidy_interval - t_since_last_subsidy
+        t_vec = t_until_next_subsidy * np.ones(self.n_agents)
+
+        current_subsidy_level = self.world.planner.state["Current Subsidy Level"]
+        sl_vec = current_subsidy_level * np.ones(self.n_agents)
+
+        # Normalized observations
+        obs_dict = dict()
+        obs_dict["a"] = {
+            "t_until_next_subsidy": t_vec / self.subsidy_interval,
+            "current_subsidy_level": sl_vec / self.num_subsidy_levels,
+        }
+        obs_dict[self.world.planner.idx] = {
+            "t_until_next_subsidy": t_until_next_subsidy / self.subsidy_interval,
+            "current_subsidy_level": current_subsidy_level / self.num_subsidy_levels,
+        }
+
+        return obs_dict
+
+
+@component_registry.add
+class VaccinationCampaign(BaseComponent):
+    """
+    Implements a (passive) component for delivering vaccines to agents once a certain
+    amount of time has elapsed.
+
+    Args:
+        daily_vaccines_per_million_people (int): The number of vaccines available per
+            million people everyday.
+        delivery_interval (int): The number of days between vaccine deliveries.
+        vaccine_delivery_start_date (string): The date (YYYY-MM-DD) when the
+            vaccination begins.
+    """
+
+    name = "VaccinationCampaign"
+    required_entities = []
+    agent_subclasses = ["BasicMobileAgent"]
+
+    def __init__(
+        self,
+        *base_component_args,
+        daily_vaccines_per_million_people=4500,
+        delivery_interval=1,
+        vaccine_delivery_start_date="2020-12-22",
+        observe_rate=False,
+        **base_component_kwargs,
+    ):
+        self.daily_vaccines_per_million_people = int(daily_vaccines_per_million_people)
+        assert 0 <= self.daily_vaccines_per_million_people <= 1e6
+
+        self.delivery_interval = int(delivery_interval)
+        assert 1 <= self.delivery_interval <= 5000
+
+        try:
+            self.vaccine_delivery_start_date = datetime.strptime(
+                vaccine_delivery_start_date, "%Y-%m-%d"
+            )
+        except ValueError:
+            print("Incorrect data format, should be YYYY-MM-DD")
+
+        # (This will  be overwritten during component_step (see below))
+        self._time_when_vaccine_delivery_begins = None
+
+        self.np_int_dtype = np.int32
+
+        self.observe_rate = bool(observe_rate)
+
+        super().__init__(*base_component_args, **base_component_kwargs)
+
+        # (This will be overwritten during reset; see below)
+        self._num_vaccines_per_delivery = None
+        # Convenience for obs (see usage below):
+        self._t_first_delivery = None
+
+    @property
+    def num_vaccines_per_delivery(self):
+        if self._num_vaccines_per_delivery is None:
+            # Pre-compute dispersal numbers
+            millions_of_residents = self.world.us_state_population / 1e6
+            daily_vaccines = (
+                millions_of_residents * self.daily_vaccines_per_million_people
+            )
+            num_vaccines_per_delivery = np.floor(
+                self.delivery_interval * daily_vaccines
+            )
+            self._num_vaccines_per_delivery = np.array(
+                num_vaccines_per_delivery, dtype=self.np_int_dtype
+            )
+        return self._num_vaccines_per_delivery
+
+    @property
+    def time_when_vaccine_delivery_begins(self):
+        if self._time_when_vaccine_delivery_begins is None:
+            self._time_when_vaccine_delivery_begins = (
+                self.vaccine_delivery_start_date - self.world.start_date
+            ).days
+        return self._time_when_vaccine_delivery_begins
+
+    def get_additional_state_fields(self, agent_cls_name):
+        if agent_cls_name == "BasicMobileAgent":
+            return {"Total Vaccinated": 0, "Vaccines Available": 0}
+        return {}
+
+    def additional_reset_steps(self):
+        pass
+
+    def get_n_actions(self, agent_cls_name):
+        return  # Passive component
+
+    def generate_masks(self, completions=0):
+        return {}  # Passive component
+
+    def get_data_dictionary(self):
+        """
+        Create a dictionary of data to push to the device
+        """
+        data_dict = DataFeed()
+        data_dict.add_data(
+            name="num_vaccines_per_delivery",
+            data=self.num_vaccines_per_delivery,
+        )
+        data_dict.add_data(
+            name="delivery_interval",
+            data=self.delivery_interval,
+        )
+        data_dict.add_data(
+            name="time_when_vaccine_delivery_begins",
+            data=self.time_when_vaccine_delivery_begins,
+        )
+        data_dict.add_data(
+            name="num_vaccines_available_t",
+            data=np.zeros(self.n_agents),
+            save_copy_and_apply_at_reset=True,
+        )
+        return data_dict
+
+    def get_tensor_dictionary(self):
+        """
+        Create a dictionary of (Pytorch-accessible) data to push to the device
+        """
+        tensor_dict = DataFeed()
+        return tensor_dict
+
+    def component_step(self):
+        if self.world.use_cuda:
+            self.world.cuda_component_step[self.name](
+                self.world.cuda_data_manager.device_data("vaccinated"),
+                self.world.cuda_data_manager.device_data("num_vaccines_per_delivery"),
+                self.world.cuda_data_manager.device_data("num_vaccines_available_t"),
+                self.world.cuda_data_manager.device_data("delivery_interval"),
+                self.world.cuda_data_manager.device_data(
+                    "time_when_vaccine_delivery_begins"
+                ),
+                self.world.cuda_data_manager.device_data(
+                    f"{_OBSERVATIONS}_a_{self.name}-t_until_next_vaccines"
+                ),
+                self.world.cuda_data_manager.device_data(
+                    f"{_OBSERVATIONS}_p_{self.name}-t_until_next_vaccines"
+                ),
+                self.world.cuda_data_manager.device_data("_timestep_"),
+                self.world.cuda_data_manager.meta_info("n_agents"),
+                self.world.cuda_data_manager.meta_info("episode_length"),
+                block=self.world.cuda_function_manager.block,
+                grid=self.world.cuda_function_manager.grid,
+            )
+        else:
+            # Do nothing if vaccines are not available yet
+            if self.world.timestep < self.time_when_vaccine_delivery_begins:
+                return
+
+            # Do nothing if this is not the start of a delivery interval.
+            # Vaccines are delivered at the start of each interval.
+            if (self.world.timestep % self.delivery_interval) != 0:
+                return
+
+            # Deliver vaccines to each state
+            for aidx, vaccines in enumerate(self.num_vaccines_per_delivery):
+                self.world.agents[aidx].state["Vaccines Available"] += vaccines
+
+    def generate_observations(self):
+        # Allow the agents/planner to know when the next vaccines might come.
+        # Obs should = 0 when the next timestep will deliver vaccines
+        # (this is normalized to 0<-->1)
+
+        if self._t_first_delivery is None:
+            self._t_first_delivery = int(self.time_when_vaccine_delivery_begins)
+            while (self._t_first_delivery % self.delivery_interval) != 0:
+                self._t_first_delivery += 1
+
+        next_t = self.world.timestep + 1
+        if next_t <= self._t_first_delivery:
+            t_until_next_vac = np.minimum(
+                1, (self._t_first_delivery - next_t) / self.delivery_interval
+            )
+            next_vax_rate = 0.0
+        else:
+            t_since_last_vac = next_t % self.delivery_interval
+            t_until_next_vac = self.delivery_interval - t_since_last_vac
+            next_vax_rate = self.daily_vaccines_per_million_people / 1e6
+        t_vec = t_until_next_vac * np.ones(self.n_agents)
+        r_vec = next_vax_rate * np.ones(self.n_agents)
+
+        # Normalized observations
+        obs_dict = dict()
+        obs_dict["a"] = {"t_until_next_vaccines": t_vec / self.delivery_interval}
+        obs_dict[self.world.planner.idx] = {
+            "t_until_next_vaccines": t_until_next_vac / self.delivery_interval
+        }
+
+        if self.observe_rate:
+            obs_dict["a"]["next_vaccination_rate"] = r_vec
+            obs_dict["p"]["next_vaccination_rate"] = float(next_vax_rate)
+
+        return obs_dict