ai-econ/envs/econ_wrapper.py

from collections import OrderedDict
from copy import deepcopy
from typing import Any, Callable, List, Optional, Sequence, Type, Union
from ai_economist.foundation.base import base_env

import gym
import gym.spaces
import numpy as np

from stable_baselines3.common.vec_env.base_vec_env import VecEnv, VecEnvIndices, VecEnvObs, VecEnvStepReturn
from stable_baselines3.common.vec_env.util import copy_obs_dict, dict_to_obs, obs_space_info

from ai_economist import foundation

class EconVecEnv(VecEnv, gym.Env):
    """
    Creates a simple vectorized wrapper for multiple environments, calling each environment in sequence on the current
    Python process. This is useful for computationally simple environment such as ``cartpole-v1``,
    as the overhead of multiprocess or multithread outweighs the environment computation time.
    This can also be used for RL methods that
    require a vectorized environment, but that you want a single environments to train with.

    :param env_fns: a list of functions
        that return environments to vectorize
    :raises ValueError: If the same environment instance is passed as the output of two or more different env_fn.
    """

    def __init__(self, env_config):
        ##init for init
        self.config=env_config
        env=foundation.make_env_instance(**env_config)
        self.env = env
        # build spaces
        obs=env.reset()
        actions=env.world.agents[0].action_spaces
        obs1=obs["0"]
        del obs1["action_mask"]
        del obs1["time"]
        self.observation_space=gym.spaces.Box(low=0,high=np.inf,shape=(len(obs1),),dtype=np.float32)
        self.action_space=gym.spaces.Discrete(actions)

        # count agents
        self.num_envs=env.world.n_agents

        VecEnv.__init__(self, self.num_envs, self.observation_space, action_space=self.action_space)
        self.keys, shapes, dtypes = obs_space_info(self.observation_space)

        self.buf_obs = OrderedDict([(k, np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k])) for k in self.keys])
        self.buf_dones = np.zeros((self.num_envs,), dtype=bool)
        self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32)
        self.buf_infos = [{} for _ in range(self.num_envs)]
        self.actions = None


    def step_async(self, actions: np.ndarray) -> None:
        self.actions = actions


    def step_wait(self) -> VecEnvStepReturn:
        #convert to econ actions
        r_action={}
        for ai_idx in range(len(self.actions)):
            r_action[str(ai_idx)]=self.actions[ai_idx]


        obs,rew,done,info =  self.env.step(r_action)
        obs_g=self._convert_econ_obs_to_gym(obs)
        rew_g=self._convert_econ_to_gym(rew)
        info_g=self._convert_econ_to_gym(info)
        #collect metrics
        prev_metrics=self.metrics
        self.metrics=self.env.scenario_metrics()
        curr_prod=self.metrics["social/productivity"]
        trend_pord=curr_prod-prev_metrics["social/productivity"]

        for k in info_g:
            k["social/productivity"]=curr_prod
            k["trend/productivity"]=trend_pord
        done_g=[False]*self.num_envs
        done=(done["__all__"])
        if done:
            for i in range(self.num_envs):
                done_g[i]=done
                info_g[i]["terminal_observation"]=obs_g[i]
            obs_g=self.reset()


        return (np.copy(obs_g), np.copy(rew_g), np.copy(done_g), deepcopy(info_g))
    # fix with malformed action tensor from sb3 predict method
    def step_predict(self,actions):
        return self.step(actions[0])


    def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]:
        if seed is None:
            seed = np.random.randint(0, 2**32 - 1)
        seeds = []
        for idx, env in enumerate(self.envs):
            seeds.append(env.seed(seed + idx))
        return seeds


    def reset(self) -> VecEnvObs:
       # env=foundation.make_env_instance(**self.config)
       # self.env = env
        obs =  self.env.reset()
        self.metrics=self.env.scenario_metrics()
        obs_g=self._convert_econ_obs_to_gym(obs)

        return obs_g


    def close(self) -> None:

        self.env.close()


    def get_images(self) -> Sequence[np.ndarray]:
        return [env.render(mode="rgb_array") for env in self.envs]


    def render(self, mode: str = "human") -> Optional[np.ndarray]:
        """
        Gym environment rendering. If there are multiple environments then
        they are tiled together in one image via ``BaseVecEnv.render()``.
        Otherwise (if ``self.num_envs == 1``), we pass the render call directly to the
        underlying environment.

        Therefore, some arguments such as ``mode`` will have values that are valid
        only when ``num_envs == 1``.

        :param mode: The rendering type.
        """
        if self.num_envs == 1:
            return self.envs[0].render(mode=mode)
        else:
            return super().render(mode=mode)


    def _save_obs(self, env_idx: int, obs: VecEnvObs) -> None:
        for key in self.keys:
            if key is None:
                self.buf_obs[key][env_idx] = obs
            else:
                self.buf_obs[key][env_idx] = obs[key]

    def _obs_from_buf(self) -> VecEnvObs:
        return dict_to_obs(self.observation_space, copy_obs_dict(self.buf_obs))

    def get_attr(self, attr_name: str, indices: VecEnvIndices = None) -> List[Any]:
        """Return attribute from vectorized environment (see base class)."""
        target_envs = self._get_target_envs(indices)
        return [getattr(env_i, attr_name) for env_i in target_envs]


    def set_attr(self, attr_name: str, value: Any, indices: VecEnvIndices = None) -> None:
        """Set attribute inside vectorized environments (see base class)."""
        target_envs = self._get_target_envs(indices)
        for env_i in target_envs:
            setattr(env_i, attr_name, value)


    def env_method(self, method_name: str, *method_args, indices: VecEnvIndices = None, **method_kwargs) -> List[Any]:
        """Call instance methods of vectorized environments."""
        target_envs = self._get_target_envs(indices)
        return [getattr(env_i, method_name)(*method_args, **method_kwargs) for env_i in target_envs]


    def env_is_wrapped(self, wrapper_class: Type[gym.Wrapper], indices: VecEnvIndices = None) -> List[bool]:
        """Check if worker environments are wrapped with a given wrapper"""
        target_envs = self._get_target_envs(indices)
        # Import here to avoid a circular import
        from stable_baselines3.common import env_util

        return [env_util.is_wrapped(env_i, wrapper_class) for env_i in target_envs]


    def _get_target_envs(self, indices: VecEnvIndices) -> List[gym.Env]:
        indices = self._get_indices(indices)
        return [self.envs[i] for i in indices]

    # Convert econ to gym
    def _convert_econ_to_gym(self, econ):
        gy=[]
        del econ["p"]
        gy=[v for k,v in econ.items()]
        return gy
    def _convert_gym_to_acon(self, gy):
        econ={}
        for k,v in gy:
            econ[k]=v
        return econ
    def _convert_econ_obs_to_gym(self, econ):
        gy=[None] * self.num_envs
        del econ["p"]
        for k,v in econ.items():

            del v["time"]
            del v["action_mask"]
            out=self.extract_dict(v)

            agent_obs=np.array(out)

            gy[int(k)]=agent_obs
        return np.stack(gy)

    def extract_dict(self,obj):
        output=[]
        use_key=isinstance(obj,dict)
        for v in obj:
            if use_key:
                v=obj[v]
            if isinstance(v,dict):
                temp=self.extract_dict(v)
                output.append(temp)
            else:
                output.append(v)
        return output