# Copyright (c) 2021, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root
# or https://opensource.org/licenses/BSD-3-Clause

from datetime import datetime

import GPUtil
import numpy as np

from ai_economist.foundation.base.base_component import (
    BaseComponent,
    component_registry,
)

try:
    num_gpus_available = len(GPUtil.getAvailable())
    print(f"Inside covid19_components.py: {num_gpus_available} GPUs are available.")
    if num_gpus_available == 0:
        print("No GPUs found! Running the simulation on a CPU.")
    else:
        from warp_drive.utils.constants import Constants
        from warp_drive.utils.data_feed import DataFeed

        _OBSERVATIONS = Constants.OBSERVATIONS
        _ACTIONS = Constants.ACTIONS
except ModuleNotFoundError:
    print(
        "Warning: The 'WarpDrive' package is not found and cannot be used! "
        "If you wish to use WarpDrive, please run "
        "'pip install rl-warp-drive' first."
    )
except ValueError:
    print("No GPUs found! Running the simulation on a CPU.")


@component_registry.add
class ControlUSStateOpenCloseStatus(BaseComponent):
    """
    Sets the open/close stringency levels for states.
    Args:
        n_stringency_levels (int): number of stringency levels the states can chose
            from. (Must match the number in the model constants dictionary referenced by
            the parent scenario.)
        action_cooldown_period (int): action cooldown period in days.
            Once a stringency level is set, the state(s) cannot switch to another level
            for a certain number of days (referred to as the "action_cooldown_period")
    """

    name = "ControlUSStateOpenCloseStatus"
    required_entities = []
    agent_subclasses = ["BasicMobileAgent"]

    def __init__(
        self,
        *base_component_args,
        n_stringency_levels=10,
        action_cooldown_period=28,
        **base_component_kwargs,
    ):

        self.action_cooldown_period = action_cooldown_period
        super().__init__(*base_component_args, **base_component_kwargs)
        self.np_int_dtype = np.int32

        self.n_stringency_levels = int(n_stringency_levels)
        assert self.n_stringency_levels >= 2
        self._checked_n_stringency_levels = False

        self.masks = dict()
        self.default_agent_action_mask = [1 for _ in range(self.n_stringency_levels)]
        self.no_op_agent_action_mask = [0 for _ in range(self.n_stringency_levels)]
        self.masks["a"] = np.repeat(
            np.array(self.no_op_agent_action_mask)[:, np.newaxis],
            self.n_agents,
            axis=-1,
        )

        # (This will be overwritten during reset; see below)
        self.action_in_cooldown_until = None

    def get_additional_state_fields(self, agent_cls_name):
        return {}

    def additional_reset_steps(self):
        # Store the times when the next set of actions can be taken.
        self.action_in_cooldown_until = np.array(
            [self.world.timestep for _ in range(self.n_agents)]
        )

    def get_n_actions(self, agent_cls_name):
        if agent_cls_name == "BasicMobileAgent":
            return self.n_stringency_levels
        return None

    def generate_masks(self, completions=0):
        for agent in self.world.agents:
            if self.world.use_real_world_policies:
                self.masks["a"][:, agent.idx] = self.default_agent_action_mask
            else:
                if self.world.timestep < self.action_in_cooldown_until[agent.idx]:
                    # Keep masking the actions
                    self.masks["a"][:, agent.idx] = self.no_op_agent_action_mask
                else:  # self.world.timestep == self.action_in_cooldown_until[agent.idx]
                    # Cooldown period has ended; unmask the "subsequent" action
                    self.masks["a"][:, agent.idx] = self.default_agent_action_mask
        return self.masks

    def get_data_dictionary(self):
        """
        Create a dictionary of data to push to the GPU (device).
        """
        data_dict = DataFeed()
        data_dict.add_data(
            name="action_cooldown_period",
            data=self.action_cooldown_period,
        )
        data_dict.add_data(
            name="action_in_cooldown_until",
            data=self.action_in_cooldown_until,
            save_copy_and_apply_at_reset=True,
        )
        data_dict.add_data(
            name="num_stringency_levels",
            data=self.n_stringency_levels,
        )
        data_dict.add_data(
            name="default_agent_action_mask",
            data=[1] + self.default_agent_action_mask,
        )
        data_dict.add_data(
            name="no_op_agent_action_mask",
            data=[1] + self.no_op_agent_action_mask,
        )
        return data_dict

    def get_tensor_dictionary(self):
        """
        Create a dictionary of (Pytorch-accessible) data to push to the GPU (device).
        """
        tensor_dict = DataFeed()
        return tensor_dict

    def component_step(self):
        if self.world.use_cuda:
            self.world.cuda_component_step[self.name](
                self.world.cuda_data_manager.device_data("stringency_level"),
                self.world.cuda_data_manager.device_data("action_cooldown_period"),
                self.world.cuda_data_manager.device_data("action_in_cooldown_until"),
                self.world.cuda_data_manager.device_data("default_agent_action_mask"),
                self.world.cuda_data_manager.device_data("no_op_agent_action_mask"),
                self.world.cuda_data_manager.device_data("num_stringency_levels"),
                self.world.cuda_data_manager.device_data(f"{_ACTIONS}_a"),
                self.world.cuda_data_manager.device_data(
                    f"{_OBSERVATIONS}_a_{self.name}-agent_policy_indicators"
                ),
                self.world.cuda_data_manager.device_data(
                    f"{_OBSERVATIONS}_a_action_mask"
                ),
                self.world.cuda_data_manager.device_data(
                    f"{_OBSERVATIONS}_p_{self.name}-agent_policy_indicators"
                ),
                self.world.cuda_data_manager.device_data("_timestep_"),
                self.world.cuda_data_manager.meta_info("n_agents"),
                self.world.cuda_data_manager.meta_info("episode_length"),
                block=self.world.cuda_function_manager.block,
                grid=self.world.cuda_function_manager.grid,
            )
        else:
            if not self._checked_n_stringency_levels:
                if self.n_stringency_levels != self.world.n_stringency_levels:
                    raise ValueError(
                        "The environment was not configured correctly. For the given "
                        "model fit, you need to set the number of stringency levels to "
                        "be {}".format(self.world.n_stringency_levels)
                    )
                self._checked_n_stringency_levels = True

            for agent in self.world.agents:
                if self.world.use_real_world_policies:
                    # Use the action taken in the previous timestep
                    action = self.world.real_world_stringency_policy[
                        self.world.timestep - 1, agent.idx
                    ]
                else:
                    action = agent.get_component_action(self.name)
                assert 0 <= action <= self.n_stringency_levels

                # We only update the stringency level if the action is not a NO-OP.
                self.world.global_state["Stringency Level"][
                    self.world.timestep, agent.idx
                ] = (
                    self.world.global_state["Stringency Level"][
                        self.world.timestep - 1, agent.idx
                    ]
                    * (action == 0)
                    + action
                )

                agent.state[
                    "Current Open Close Stringency Level"
                ] = self.world.global_state["Stringency Level"][
                    self.world.timestep, agent.idx
                ]

                # Check if the action cooldown period has ended, and set the next
                # time until action cooldown. If current action is a no-op
                # (i.e., no new action was taken), the agent can take an action
                # in the very next step, otherwise it needs to wait for
                # self.action_cooldown_period steps. When in the action cooldown
                # period, whatever actions the agents take are masked out,
                # so it's always a NO-OP (see generate_masks() above)
                # The logic below influences the action masks.
                if self.world.timestep == self.action_in_cooldown_until[agent.idx] + 1:
                    if action == 0:  # NO-OP
                        self.action_in_cooldown_until[agent.idx] += 1
                    else:
                        self.action_in_cooldown_until[
                            agent.idx
                        ] += self.action_cooldown_period

    def generate_observations(self):

        # Normalized observations
        obs_dict = dict()
        agent_policy_indicators = self.world.global_state["Stringency Level"][
            self.world.timestep
        ]
        obs_dict["a"] = {
            "agent_policy_indicators": agent_policy_indicators
            / self.n_stringency_levels
        }
        obs_dict[self.world.planner.idx] = {
            "agent_policy_indicators": agent_policy_indicators
            / self.n_stringency_levels
        }

        return obs_dict


@component_registry.add
class FederalGovernmentSubsidy(BaseComponent):
    """
    Args:
        subsidy_interval (int): The number of days over which the total subsidy amount
            is evenly rolled out.
            Note: shortening the subsidy interval increases the total amount of money
            that the planner could possibly spend. For instance, if the subsidy
            interval is 30, the planner can create a subsidy every 30 days.
        num_subsidy_levels (int): The number of subsidy levels.
            Note: with max_annual_subsidy_per_person=10000, one round of subsidies at
            the maximum subsidy level equals an expenditure of roughly $3.3 trillion
            (given the US population of 330 million).
            If the planner chooses the maximum subsidy amount, the $3.3 trillion
            is rolled out gradually over the subsidy interval.
        max_annual_subsidy_per_person (float): The maximum annual subsidy that may be
            allocated per person.
    """

    name = "FederalGovernmentSubsidy"
    required_entities = []
    agent_subclasses = ["BasicPlanner"]

    def __init__(
        self,
        *base_component_args,
        subsidy_interval=90,
        num_subsidy_levels=20,
        max_annual_subsidy_per_person=20000,
        **base_component_kwargs,
    ):
        self.subsidy_interval = int(subsidy_interval)
        assert self.subsidy_interval >= 1

        self.num_subsidy_levels = int(num_subsidy_levels)
        assert self.num_subsidy_levels >= 1

        self.max_annual_subsidy_per_person = float(max_annual_subsidy_per_person)
        assert self.max_annual_subsidy_per_person >= 0

        self.np_int_dtype = np.int32

        # (This will be overwritten during component_step; see below)
        self._subsidy_amount_per_level = None
        self._subsidy_level_array = None

        super().__init__(*base_component_args, **base_component_kwargs)

        self.default_planner_action_mask = [1 for _ in range(self.num_subsidy_levels)]
        self.no_op_planner_action_mask = [0 for _ in range(self.num_subsidy_levels)]

        # (This will be overwritten during reset; see below)
        self.max_daily_subsidy_per_state = np.array(
            self.n_agents, dtype=self.np_int_dtype
        )

    def get_additional_state_fields(self, agent_cls_name):
        if agent_cls_name == "BasicPlanner":
            return {"Total Subsidy": 0, "Current Subsidy Level": 0}
        return {}

    def additional_reset_steps(self):
        # Pre-compute maximum state-specific subsidy levels
        self.max_daily_subsidy_per_state = (
            self.world.us_state_population * self.max_annual_subsidy_per_person / 365
        )

    def get_n_actions(self, agent_cls_name):
        if agent_cls_name == "BasicPlanner":
            # Number of non-zero subsidy levels
            # (the action 0 pertains to the no-subsidy case)
            return self.num_subsidy_levels
        return None

    def generate_masks(self, completions=0):
        masks = {}
        if self.world.use_real_world_policies:
            masks[self.world.planner.idx] = self.default_planner_action_mask
        else:
            if self.world.timestep % self.subsidy_interval == 0:
                masks[self.world.planner.idx] = self.default_planner_action_mask
            else:
                masks[self.world.planner.idx] = self.no_op_planner_action_mask
        return masks

    def get_data_dictionary(self):
        """
        Create a dictionary of data to push to the device
        """
        data_dict = DataFeed()
        data_dict.add_data(
            name="subsidy_interval",
            data=self.subsidy_interval,
        )
        data_dict.add_data(
            name="num_subsidy_levels",
            data=self.num_subsidy_levels,
        )
        data_dict.add_data(
            name="max_daily_subsidy_per_state",
            data=self.max_daily_subsidy_per_state,
        )
        data_dict.add_data(
            name="default_planner_action_mask",
            data=[1] + self.default_planner_action_mask,
        )
        data_dict.add_data(
            name="no_op_planner_action_mask",
            data=[1] + self.no_op_planner_action_mask,
        )
        return data_dict

    def get_tensor_dictionary(self):
        """
        Create a dictionary of (Pytorch-accessible) data to push to the device
        """
        tensor_dict = DataFeed()
        return tensor_dict

    def component_step(self):
        if self.world.use_cuda:
            self.world.cuda_component_step[self.name](
                self.world.cuda_data_manager.device_data("subsidy_level"),
                self.world.cuda_data_manager.device_data("subsidy"),
                self.world.cuda_data_manager.device_data("subsidy_interval"),
                self.world.cuda_data_manager.device_data("num_subsidy_levels"),
                self.world.cuda_data_manager.device_data("max_daily_subsidy_per_state"),
                self.world.cuda_data_manager.device_data("default_planner_action_mask"),
                self.world.cuda_data_manager.device_data("no_op_planner_action_mask"),
                self.world.cuda_data_manager.device_data(f"{_ACTIONS}_p"),
                self.world.cuda_data_manager.device_data(
                    f"{_OBSERVATIONS}_a_{self.name}-t_until_next_subsidy"
                ),
                self.world.cuda_data_manager.device_data(
                    f"{_OBSERVATIONS}_a_{self.name}-current_subsidy_level"
                ),
                self.world.cuda_data_manager.device_data(
                    f"{_OBSERVATIONS}_p_{self.name}-t_until_next_subsidy"
                ),
                self.world.cuda_data_manager.device_data(
                    f"{_OBSERVATIONS}_p_{self.name}-current_subsidy_level"
                ),
                self.world.cuda_data_manager.device_data(
                    f"{_OBSERVATIONS}_p_action_mask"
                ),
                self.world.cuda_data_manager.device_data("_timestep_"),
                self.world.cuda_data_manager.meta_info("n_agents"),
                self.world.cuda_data_manager.meta_info("episode_length"),
                block=self.world.cuda_function_manager.block,
                grid=self.world.cuda_function_manager.grid,
            )
        else:
            if self.world.use_real_world_policies:
                if self._subsidy_amount_per_level is None:
                    self._subsidy_amount_per_level = (
                        self.world.us_population
                        * self.max_annual_subsidy_per_person
                        / self.num_subsidy_levels
                        * self.subsidy_interval
                        / 365
                    )
                    self._subsidy_level_array = np.zeros((self._episode_length + 1))
                # Use the action taken in the previous timestep
                current_subsidy_amount = self.world.real_world_subsidy[
                    self.world.timestep - 1
                ]
                if current_subsidy_amount > 0:
                    _subsidy_level = np.round(
                        (current_subsidy_amount / self._subsidy_amount_per_level)
                    )
                    for t_idx in range(
                        self.world.timestep - 1,
                        min(
                            len(self._subsidy_level_array),
                            self.world.timestep - 1 + self.subsidy_interval,
                        ),
                    ):
                        self._subsidy_level_array[t_idx] += _subsidy_level
                subsidy_level = self._subsidy_level_array[self.world.timestep - 1]
            else:
                # Update the subsidy level only every self.subsidy_interval, since the
                # other actions are masked out.
                if (self.world.timestep - 1) % self.subsidy_interval == 0:
                    subsidy_level = self.world.planner.get_component_action(self.name)
                else:
                    subsidy_level = self.world.planner.state["Current Subsidy Level"]

            assert 0 <= subsidy_level <= self.num_subsidy_levels
            self.world.planner.state["Current Subsidy Level"] = np.array(
                subsidy_level
            ).astype(self.np_int_dtype)

            # Update subsidy level
            subsidy_level_frac = subsidy_level / self.num_subsidy_levels
            daily_statewise_subsidy = (
                subsidy_level_frac * self.max_daily_subsidy_per_state
            )

            self.world.global_state["Subsidy"][
                self.world.timestep
            ] = daily_statewise_subsidy
            self.world.planner.state["Total Subsidy"] += np.sum(daily_statewise_subsidy)

    def generate_observations(self):
        # Allow the agents/planner to know when the next subsidy might come.
        # Obs should = 0 when the next timestep could include a subsidy
        t_since_last_subsidy = self.world.timestep % self.subsidy_interval
        # (this is normalized to 0<-->1)
        t_until_next_subsidy = self.subsidy_interval - t_since_last_subsidy
        t_vec = t_until_next_subsidy * np.ones(self.n_agents)

        current_subsidy_level = self.world.planner.state["Current Subsidy Level"]
        sl_vec = current_subsidy_level * np.ones(self.n_agents)

        # Normalized observations
        obs_dict = dict()
        obs_dict["a"] = {
            "t_until_next_subsidy": t_vec / self.subsidy_interval,
            "current_subsidy_level": sl_vec / self.num_subsidy_levels,
        }
        obs_dict[self.world.planner.idx] = {
            "t_until_next_subsidy": t_until_next_subsidy / self.subsidy_interval,
            "current_subsidy_level": current_subsidy_level / self.num_subsidy_levels,
        }

        return obs_dict


@component_registry.add
class VaccinationCampaign(BaseComponent):
    """
    Implements a (passive) component for delivering vaccines to agents once a certain
    amount of time has elapsed.

    Args:
        daily_vaccines_per_million_people (int): The number of vaccines available per
            million people everyday.
        delivery_interval (int): The number of days between vaccine deliveries.
        vaccine_delivery_start_date (string): The date (YYYY-MM-DD) when the
            vaccination begins.
    """

    name = "VaccinationCampaign"
    required_entities = []
    agent_subclasses = ["BasicMobileAgent"]

    def __init__(
        self,
        *base_component_args,
        daily_vaccines_per_million_people=4500,
        delivery_interval=1,
        vaccine_delivery_start_date="2020-12-22",
        observe_rate=False,
        **base_component_kwargs,
    ):
        self.daily_vaccines_per_million_people = int(daily_vaccines_per_million_people)
        assert 0 <= self.daily_vaccines_per_million_people <= 1e6

        self.delivery_interval = int(delivery_interval)
        assert 1 <= self.delivery_interval <= 5000

        try:
            self.vaccine_delivery_start_date = datetime.strptime(
                vaccine_delivery_start_date, "%Y-%m-%d"
            )
        except ValueError:
            print("Incorrect data format, should be YYYY-MM-DD")

        # (This will  be overwritten during component_step (see below))
        self._time_when_vaccine_delivery_begins = None

        self.np_int_dtype = np.int32

        self.observe_rate = bool(observe_rate)

        super().__init__(*base_component_args, **base_component_kwargs)

        # (This will be overwritten during reset; see below)
        self._num_vaccines_per_delivery = None
        # Convenience for obs (see usage below):
        self._t_first_delivery = None

    @property
    def num_vaccines_per_delivery(self):
        if self._num_vaccines_per_delivery is None:
            # Pre-compute dispersal numbers
            millions_of_residents = self.world.us_state_population / 1e6
            daily_vaccines = (
                millions_of_residents * self.daily_vaccines_per_million_people
            )
            num_vaccines_per_delivery = np.floor(
                self.delivery_interval * daily_vaccines
            )
            self._num_vaccines_per_delivery = np.array(
                num_vaccines_per_delivery, dtype=self.np_int_dtype
            )
        return self._num_vaccines_per_delivery

    @property
    def time_when_vaccine_delivery_begins(self):
        if self._time_when_vaccine_delivery_begins is None:
            self._time_when_vaccine_delivery_begins = (
                self.vaccine_delivery_start_date - self.world.start_date
            ).days
        return self._time_when_vaccine_delivery_begins

    def get_additional_state_fields(self, agent_cls_name):
        if agent_cls_name == "BasicMobileAgent":
            return {"Total Vaccinated": 0, "Vaccines Available": 0}
        return {}

    def additional_reset_steps(self):
        pass

    def get_n_actions(self, agent_cls_name):
        return  # Passive component

    def generate_masks(self, completions=0):
        return {}  # Passive component

    def get_data_dictionary(self):
        """
        Create a dictionary of data to push to the device
        """
        data_dict = DataFeed()
        data_dict.add_data(
            name="num_vaccines_per_delivery",
            data=self.num_vaccines_per_delivery,
        )
        data_dict.add_data(
            name="delivery_interval",
            data=self.delivery_interval,
        )
        data_dict.add_data(
            name="time_when_vaccine_delivery_begins",
            data=self.time_when_vaccine_delivery_begins,
        )
        data_dict.add_data(
            name="num_vaccines_available_t",
            data=np.zeros(self.n_agents),
            save_copy_and_apply_at_reset=True,
        )
        return data_dict

    def get_tensor_dictionary(self):
        """
        Create a dictionary of (Pytorch-accessible) data to push to the device
        """
        tensor_dict = DataFeed()
        return tensor_dict

    def component_step(self):
        if self.world.use_cuda:
            self.world.cuda_component_step[self.name](
                self.world.cuda_data_manager.device_data("vaccinated"),
                self.world.cuda_data_manager.device_data("num_vaccines_per_delivery"),
                self.world.cuda_data_manager.device_data("num_vaccines_available_t"),
                self.world.cuda_data_manager.device_data("delivery_interval"),
                self.world.cuda_data_manager.device_data(
                    "time_when_vaccine_delivery_begins"
                ),
                self.world.cuda_data_manager.device_data(
                    f"{_OBSERVATIONS}_a_{self.name}-t_until_next_vaccines"
                ),
                self.world.cuda_data_manager.device_data(
                    f"{_OBSERVATIONS}_p_{self.name}-t_until_next_vaccines"
                ),
                self.world.cuda_data_manager.device_data("_timestep_"),
                self.world.cuda_data_manager.meta_info("n_agents"),
                self.world.cuda_data_manager.meta_info("episode_length"),
                block=self.world.cuda_function_manager.block,
                grid=self.world.cuda_function_manager.grid,
            )
        else:
            # Do nothing if vaccines are not available yet
            if self.world.timestep < self.time_when_vaccine_delivery_begins:
                return

            # Do nothing if this is not the start of a delivery interval.
            # Vaccines are delivered at the start of each interval.
            if (self.world.timestep % self.delivery_interval) != 0:
                return

            # Deliver vaccines to each state
            for aidx, vaccines in enumerate(self.num_vaccines_per_delivery):
                self.world.agents[aidx].state["Vaccines Available"] += vaccines

    def generate_observations(self):
        # Allow the agents/planner to know when the next vaccines might come.
        # Obs should = 0 when the next timestep will deliver vaccines
        # (this is normalized to 0<-->1)

        if self._t_first_delivery is None:
            self._t_first_delivery = int(self.time_when_vaccine_delivery_begins)
            while (self._t_first_delivery % self.delivery_interval) != 0:
                self._t_first_delivery += 1

        next_t = self.world.timestep + 1
        if next_t <= self._t_first_delivery:
            t_until_next_vac = np.minimum(
                1, (self._t_first_delivery - next_t) / self.delivery_interval
            )
            next_vax_rate = 0.0
        else:
            t_since_last_vac = next_t % self.delivery_interval
            t_until_next_vac = self.delivery_interval - t_since_last_vac
            next_vax_rate = self.daily_vaccines_per_million_people / 1e6
        t_vec = t_until_next_vac * np.ones(self.n_agents)
        r_vec = next_vax_rate * np.ones(self.n_agents)

        # Normalized observations
        obs_dict = dict()
        obs_dict["a"] = {"t_until_next_vaccines": t_vec / self.delivery_interval}
        obs_dict[self.world.planner.idx] = {
            "t_until_next_vaccines": t_until_next_vac / self.delivery_interval
        }

        if self.observe_rate:
            obs_dict["a"]["next_vaccination_rate"] = r_vec
            obs_dict["p"]["next_vaccination_rate"] = float(next_vax_rate)

        return obs_dict