diff --git a/.gitignore b/.gitignore index 2a24376..ee66eb6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ logs/* -_pycache_ \ No newline at end of file +_pycache_ +*.pyc +*tfevents* \ No newline at end of file diff --git a/agents/consumer_agent.py b/agents/consumer_agent.py new file mode 100644 index 0000000..36c6999 --- /dev/null +++ b/agents/consumer_agent.py @@ -0,0 +1,11 @@ +from ai_economist.foundation.base.base_agent import BaseAgent, agent_registry + + +@agent_registry.add +class ConsumerAgent(BaseAgent): + """ + A basic mobile agent represents an individual actor in the economic simulation. + "Mobile" refers to agents of this type being able to move around in the 2D world. + """ + + name = "ConsumerAgent" \ No newline at end of file diff --git a/agents/trading_agent.py b/agents/trading_agent.py new file mode 100644 index 0000000..10a8245 --- /dev/null +++ b/agents/trading_agent.py @@ -0,0 +1,11 @@ +from ai_economist.foundation.base.base_agent import BaseAgent, agent_registry + + +@agent_registry.add +class TradingAgent(BaseAgent): + """ + A basic mobile agent represents an individual actor in the economic simulation. + "Mobile" refers to agents of this type being able to move around in the 2D world. + """ + + name = "TradingAgent" \ No newline at end of file diff --git a/ai_economist/LICENSE.txt b/ai_economist/LICENSE.txt new file mode 100644 index 0000000..53866a3 --- /dev/null +++ b/ai_economist/LICENSE.txt @@ -0,0 +1,12 @@ +Copyright (c) 2020, Salesforce.com, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +* Neither the name of Salesforce.com nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/ai_economist/__init__.py b/ai_economist/__init__.py new file mode 100644 index 0000000..5dd0f79 --- /dev/null +++ b/ai_economist/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +from ai_economist import foundation diff --git a/ai_economist/datasets/__init__.py b/ai_economist/datasets/__init__.py new file mode 100644 index 0000000..93bee4b --- /dev/null +++ b/ai_economist/datasets/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause diff --git a/ai_economist/datasets/covid19_datasets/README.md b/ai_economist/datasets/covid19_datasets/README.md new file mode 100644 index 0000000..a862ebf --- /dev/null +++ b/ai_economist/datasets/covid19_datasets/README.md @@ -0,0 +1,27 @@ +## List of COVID-19 datasources used + +1. **US state government policies** (Oxford Covid-19 Government Response Tracker (OxCGRT)) + + https://github.com/OxCGRT/USA-covid-policy + + +2. **US federal government direct payments** (Committee for a Responsible Federal Budget) + + https://www.covidmoneytracker.org/ + + https://docs.google.com/spreadsheets/d/1Nr_J5wLfUT4IzqSXkYbdOXrRgEkBxhX0/edit#gid=682404301 + + +3. **US deaths data** (COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University) + + https://github.com/CSSEGISandData/COVID-19 + + +4. **US unemployment** (Bureau of Labor and Statistics) + + https://www.bls.gov/lau/ + + +5. **US vaccinations** (Our World in Data) + + https://ourworldindata.org/covid-vaccinations \ No newline at end of file diff --git a/ai_economist/datasets/covid19_datasets/__init__.py b/ai_economist/datasets/covid19_datasets/__init__.py new file mode 100644 index 0000000..93bee4b --- /dev/null +++ b/ai_economist/datasets/covid19_datasets/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause diff --git a/ai_economist/datasets/covid19_datasets/data_and_fitted_params/fitted_params.json b/ai_economist/datasets/covid19_datasets/data_and_fitted_params/fitted_params.json new file mode 100644 index 0000000..0e43bbe --- /dev/null +++ b/ai_economist/datasets/covid19_datasets/data_and_fitted_params/fitted_params.json @@ -0,0 +1 @@ +{"settings": {"LAST_DATE_IN_TRAIN_SET": "2020-11-30", "LAST_DATE_IN_VAL_SET": "2020-12-31", "FILTER_SIZE_UNEMPLOYMENT": 600, "SIMILARITY_REGULARIZATION_UNEMPLOYMENT": 0.5, "SIMILARITY_REGULARIZATION_SIR": 1.0, "env": {"economic_reward_crra_eta": 2, "start_date": "2020-03-22", "infection_too_sick_to_work_rate": 0.1, "pop_between_age_18_65": 0.6, "risk_free_interest_rate": 0.03}}, "BETA_DELAY": 29, "BETA_SLOPES": [-0.005233703270905861, -0.0025096633588151916, -0.009978235905460415, -0.0030498590463604695, -0.009739434907947386, -0.016118405619767906, -0.02216394155353909, -0.01501269383871521, -0.01772321933003828, -0.0035091211514514035, -0.00842426615392502, -0.006421715854186978, -0.007605442605186503, -0.013145828895296467, -0.013021772081998916, -0.011242043826411524, -0.009583325687728843, -0.007332846816941151, -0.012677925062908617, -0.010726278623401973, -0.01320149654376923, -0.020049202898711607, -0.020079903692931216, -0.012961592439659947, -0.009660688484644651, -0.01197325796370973, -0.0031953183140946543, -0.011219460393603341, -0.01161877928285559, -0.010572606153577992, -0.021402323448902912, -0.011571328478690551, -0.015057575233879374, -0.011653349301328355, -0.01420212156352489, -0.015437622231824741, -0.01247166799443816, -0.009058745964208309, -0.016065672658303675, -0.015379928352481361, -0.004690098051335516, -0.0027855147080954887, -0.0017843204990313376, -0.006520659060682759, -0.008618617017313706, -0.011942638319771232, -0.009076932750326299, -0.005580847854404909, -0.01311747891796651, -0.008677318047675145, -0.009906449614086134], "BETA_INTERCEPTS": [0.10550336869678541, 0.09352399879913631, 0.13367068043188485, 0.10161651854073984, 0.1457722708748621, 0.16887787187410597, 0.21729992109816224, 0.17450455331397685, 0.18931639275182566, 0.09984961233900469, 0.12133822448461924, 0.12722467531413584, 0.12175264651385272, 0.1552042495807431, 0.145615506170138, 0.1258875328197989, 0.13389493026715132, 0.1254821302501782, 0.14642572151467384, 0.1563475618087581, 0.16032143059458762, 0.19910298878230218, 0.18446412122461148, 0.15385189814506917, 0.1302208274241928, 0.13736418751161014, 0.1033837852314781, 0.13692185665344278, 0.14309430680693005, 0.13749187639644195, 0.19973224088216823, 0.17203054106935328, 0.18082127751622276, 0.15117289048468205, 0.14160189224730083, 0.17488934332776923, 0.1338827960870001, 0.13066194254313895, 0.17067714865126077, 0.18532521741844085, 0.10703810005198379, 0.09841913100945021, 0.09289613699392442, 0.11940190498430303, 0.12161571930284992, 0.1285837104949546, 0.12668802839364063, 0.10747537477804284, 0.1625164048222745, 0.12008200309258711, 0.13872464377603697], "POLICY_START_DATE": "2020-01-01", "FILTER_LEN": 600, "CONV_LAMBDAS": [27.72427749633789, 62.91925048828125, 129.2318572998047, 264.43182373046875, 542.06005859375], "UNEMPLOYMENT_BIAS": [2.1136703491210938, 4.048089027404785, 4.277402877807617, 3.6710152626037598, 4.371125221252441, 3.5259063243865967, 3.423633575439453, 3.4609005451202393, 4.390212535858154, 3.9631943702697754, 2.5867717266082764, 3.5042591094970703, 2.3830432891845703, 4.411620140075684, 2.3750250339508057, 2.5980374813079834, 2.2534663677215576, 2.4830000400543213, 4.428332805633545, 2.4989376068115234, 3.071929931640625, 1.8633426427841187, 4.299410820007324, 3.195344924926758, 4.691559791564941, 2.994248628616333, 3.1809568405151367, 2.121537923812866, 5.0345377922058105, 2.9758565425872803, 4.2665019035339355, 4.322226047515869, 3.248310089111328, 3.0009894371032715, 1.353360652923584, 3.418184280395508, 2.7211599349975586, 2.9320943355560303, 3.8583884239196777, 3.613877773284912, 1.6023359298706055, 2.3257055282592773, 4.022616863250732, 3.996405601501465, 1.5916776657104492, 2.3988192081451416, 3.0608890056610107, 4.039086818695068, 3.984750747680664, 2.440520763397217, 3.8146729469299316], "GROUPED_CONVOLUTIONAL_FILTER_WEIGHTS": [[[-0.8623685240745544]], [[-0.45006391406059265]], [[-0.00237645348533988]], [[0.8720516562461853]], [[1.3083926439285278]], [[-1.8319092988967896]], [[-0.0032714344561100006]], [[0.017020821571350098]], [[0.9376689195632935]], [[0.7846916913986206]], [[-0.634898841381073]], [[-0.39103013277053833]], [[0.765248715877533]], [[0.5142654180526733]], [[0.8652791380882263]], [[-0.9020224809646606]], [[0.2602136433124542]], [[0.2674458622932434]], [[0.6922929883003235]], [[0.28152236342430115]], [[-1.3684333562850952]], [[-1.136441707611084]], [[0.7635596990585327]], [[0.6395389437675476]], [[1.2692675590515137]], [[-1.3872078657150269]], [[-0.18349117040634155]], [[0.731469988822937]], [[0.4884212017059326]], [[0.7628554105758667]], [[-1.9923702478408813]], [[-0.7254942059516907]], [[0.6672083139419556]], [[0.7401175498962402]], [[0.8266059756278992]], [[-2.0412659645080566]], [[0.2596452832221985]], [[0.7648528814315796]], [[0.628707230091095]], [[0.564267098903656]], [[-0.12891456484794617]], [[-0.3544159233570099]], [[-0.3368719518184662]], [[0.37673136591911316]], [[0.9932129383087158]], [[-1.196645975112915]], [[0.019427649676799774]], [[-0.032719578593969345]], [[1.1447675228118896]], [[0.6738105416297913]], [[-1.0570842027664185]], [[0.19370649755001068]], [[0.6044667363166809]], [[0.5649805068969727]], [[0.7912497520446777]], [[-1.6130324602127075]], [[0.14945188164710999]], [[1.2974058389663696]], [[0.27230948209762573]], [[1.4573169946670532]], [[-1.1152888536453247]], [[-0.1312718242406845]], [[0.4778050482273102]], [[0.8300752639770508]], [[0.3758259117603302]], [[-1.8520358800888062]], [[-1.14521324634552]], [[1.0409947633743286]], [[1.6186996698379517]], [[0.4937899112701416]], [[-1.8565905094146729]], [[-0.5699316263198853]], [[1.2526781558990479]], [[1.0222679376602173]], [[0.8439081907272339]], [[-1.1728674173355103]], [[-0.4200057089328766]], [[0.24613676965236664]], [[0.9689220190048218]], [[0.8804899454116821]], [[-0.6283168792724609]], [[0.018899045884609222]], [[0.4649633467197418]], [[0.5606456995010376]], [[0.7042286992073059]], [[-0.969465970993042]], [[0.7879332304000854]], [[0.4766736328601837]], [[0.38358408212661743]], [[0.5555617809295654]], [[-1.3140840530395508]], [[-0.06866976618766785]], [[0.20318199694156647]], [[1.156029224395752]], [[0.4719393253326416]], [[-0.8908430337905884]], [[0.4366728663444519]], [[-0.03466540947556496]], [[0.5951891541481018]], [[0.26352477073669434]], [[-0.4593994617462158]], [[-0.3329457938671112]], [[-0.4925514757633209]], [[-0.0024775229394435883]], [[1.483317255973816]], [[-2.580217123031616]], [[-0.21124900877475739]], [[1.3222242593765259]], [[0.9738183617591858]], [[1.1125288009643555]], [[-1.5558487176895142]], [[-0.21986360847949982]], [[1.4449234008789062]], [[1.1203645467758179]], [[0.8254041075706482]], [[-1.3072439432144165]], [[-0.3959537446498871]], [[0.5149790048599243]], [[0.5998527407646179]], [[0.6167992949485779]], [[-0.03933652117848396]], [[-0.24180862307548523]], [[0.4485468864440918]], [[1.2529921531677246]], [[0.16725485026836395]], [[-1.1775641441345215]], [[0.210318922996521]], [[0.6509057879447937]], [[0.22740843892097473]], [[0.9219421744346619]], [[-1.1539403200149536]], [[0.07256040722131729]], [[0.6274937391281128]], [[0.7214186191558838]], [[0.2535996437072754]], [[-1.1221498250961304]], [[0.03368315473198891]], [[0.4749435782432556]], [[0.4006294906139374]], [[0.35465008020401]], [[-0.7560990452766418]], [[0.8384001851081848]], [[0.7217393517494202]], [[0.9829040765762329]], [[1.6163955926895142]], [[-1.3966710567474365]], [[-0.06901184469461441]], [[0.9089240431785583]], [[0.7274520397186279]], [[0.6534935235977173]], [[-2.2978711128234863]], [[-0.45931634306907654]], [[0.682558536529541]], [[1.9217747449874878]], [[0.46477916836738586]], [[-0.9368125796318054]], [[-0.7055768370628357]], [[0.45245739817619324]], [[1.149850845336914]], [[0.08268105238676071]], [[-1.498590111732483]], [[-1.053027629852295]], [[1.1536319255828857]], [[0.36754435300827026]], [[1.370290756225586]], [[-1.5895041227340698]], [[-0.022197451442480087]], [[1.3241043090820312]], [[0.33694934844970703]], [[0.6450698971748352]], [[-1.221463680267334]], [[-0.8520716428756714]], [[0.31779128313064575]], [[1.012588381767273]], [[1.3222732543945312]], [[-1.7537246942520142]], [[-0.3384951949119568]], [[1.3620738983154297]], [[0.1112571433186531]], [[1.0902765989303589]], [[-1.899932861328125]], [[0.19727519154548645]], [[0.09953983128070831]], [[0.922265887260437]], [[1.0663707256317139]], [[-1.2837663888931274]], [[-0.7540831565856934]], [[0.7365661859512329]], [[1.1298480033874512]], [[0.6919987797737122]], [[-1.5676841735839844]], [[-0.3267393708229065]], [[0.3632243871688843]], [[0.41000232100486755]], [[1.6217762231826782]], [[-1.136587142944336]], [[-0.27317526936531067]], [[0.514305830001831]], [[0.01356478687375784]], [[1.5590686798095703]], [[-1.5125669240951538]], [[-0.6680471897125244]], [[0.7636657953262329]], [[0.5478187799453735]], [[1.22959303855896]], [[-1.6629095077514648]], [[-0.5278839468955994]], [[0.15898258984088898]], [[1.000220775604248]], [[1.083420991897583]], [[-1.2720608711242676]], [[0.23574863374233246]], [[0.4396874010562897]], [[0.7104308605194092]], [[0.6883097887039185]], [[-1.5120340585708618]], [[-0.27725064754486084]], [[1.1652095317840576]], [[0.6678454279899597]], [[0.4044547379016876]], [[-1.588853120803833]], [[0.10982409119606018]], [[0.7705152034759521]], [[0.7239919900894165]], [[0.5325650572776794]], [[-2.0845890045166016]], [[0.9196359515190125]], [[0.8362395167350769]], [[0.6898489594459534]], [[0.07182575017213821]], [[-0.7716994285583496]], [[-0.5762143731117249]], [[0.46993783116340637]], [[0.42921707034111023]], [[0.8545767068862915]], [[-1.801488995552063]], [[0.18351492285728455]], [[1.318638801574707]], [[0.7943655848503113]], [[0.30118492245674133]], [[-0.8504781723022461]], [[-0.19791175425052643]], [[0.7364271879196167]], [[-0.31268006563186646]], [[1.453898549079895]], [[-1.6999027729034424]], [[-0.3689071536064148]], [[0.6285111308097839]], [[0.9383906722068787]], [[0.9711975455284119]], [[-1.2704155445098877]], [[-0.11265530437231064]], [[0.25702616572380066]], [[0.07824986428022385]], [[0.6725088953971863]]], "VALUE_OF_LIFE": 10000000, "INFERRED_WEIGHTAGE_ON_AGENT_HEALTH_INDEX": [0.685, 0.683, 0.718, 0.627, 0.809, 0.808, 0.646, 0.839, 0.765, 0.757, 0.734, 0.889, 0.8150000000000001, 0.769, 0.737, 0.752, 0.75, 0.849, 0.679, 0.875, 0.859, 0.672, 0.733, 0.862, 0.6950000000000001, 0.808, 0.662, 0.855, 0.762, 0.865, 0.687, 0.746, 0.641, 0.847, 0.747, 0.781, 0.795, 0.851, 0.756, 0.673, 0.711, 0.41200000000000003, 0.5690000000000001, 0.787, 0.866, 0.968, 0.872, 0.735, 0.878, 0.754, 0.9380000000000001], "INFERRED_WEIGHTAGE_ON_PLANNER_HEALTH_INDEX": 0.757, "MAX_MARGINAL_AGENT_ECONOMIC_INDEX": [0.0027285723481327295, 0.002734474139288068, 0.0027149636298418045, 0.002733101835474372, 0.002698722528293729, 0.002695730421692133, 0.0026784248184412718, 0.0026936873327940702, 0.0026890540029853582, 0.002731619868427515, 0.0027165759820491076, 0.0027218845207244158, 0.0027180504985153675, 0.0026966372970491648, 0.002704625716432929, 0.002718393225222826, 0.0027127708308398724, 0.002722038421779871, 0.0026984831783920527, 0.0027032513171434402, 0.002696973504498601, 0.0026874069590121508, 0.002697594463825226, 0.0027030734345316887, 0.0027131896931678057, 0.002711491659283638, 0.0027325651608407497, 0.0027140663005411625, 0.002711672568693757, 0.0027134160045534372, 0.0026841098442673683, 0.002694197930395603, 0.0026839885395020247, 0.002705829916521907, 0.00271249539218843, 0.002696133917197585, 0.0027130150701850653, 0.0027131501119583845, 0.0026947411242872477, 0.002690640976652503, 0.002726985840126872, 0.002733224770054221, 0.002736929804086685, 0.0027190325781702995, 0.0027239227201789618, 0.0027165571227669716, 0.0027152288239449263, 0.0027226179372519255, 0.0027034329250454903, 0.0027189261745661497, 0.0027153033297508955], "MAX_MARGINAL_PLANNER_ECONOMIC_INDEX": 0.00270779593847692, "MAX_MARGINAL_AGENT_HEALTH_INDEX": [-3.0090674044913612e-05, -4.101892773178406e-05, -2.391263478784822e-05, -5.501513078343123e-05, -3.641096918727271e-05, -8.148177585098892e-05, -0.00033768793218769133, -8.136823453241959e-05, -0.00017953713540919125, -5.476094884215854e-05, -6.003917951602489e-05, -1.9584411347750574e-05, -2.692215821298305e-05, -8.299046021420509e-05, -7.80503178248182e-05, -1.1469884157122578e-05, -2.996458533743862e-05, -3.8060217775637284e-05, -0.00028306362219154835, -4.246145545039326e-05, -5.934169530519284e-05, -0.0002345900284126401, -0.00020272599067538977, -1.6491414498887025e-05, -4.7763867769390345e-05, -2.500748996681068e-05, -5.226726716500707e-05, -9.981938092096243e-06, -5.143717862665653e-05, -2.055184631899465e-05, -0.0005827346467413008, -7.382390322163701e-05, -0.0012094955891370773, -1.7349766494589858e-05, -8.910564247344155e-06, -4.672993964049965e-05, -2.3844344468670897e-05, -1.9828426957246847e-05, -6.0026035498594865e-05, -0.00021144225320313126, -3.511341492412612e-05, -2.745154961303342e-05, -0.00011554994125617668, -2.7730202418752015e-05, -6.093723641242832e-06, -4.6192471927497536e-05, -2.1770805687992834e-05, -0.0001274808746529743, -8.22813126433175e-06, -3.4170137951150537e-05, -1.28951739952754e-06], "MAX_MARGINAL_PLANNER_HEALTH_INDEX": -0.0001451095740776509, "MIN_MARGINAL_AGENT_ECONOMIC_INDEX": [0.0020356886088848114, 0.0022419975139200687, 0.002160344272851944, 0.0024188596289604902, 0.0020349116530269384, 0.002246933989226818, 0.002235130639746785, 0.0022626728750765324, 0.002382050035521388, 0.0021890902426093817, 0.0021741720847785473, 0.0017624523025006056, 0.0023508218582719564, 0.001991693861782551, 0.0019486609380692244, 0.002157807582989335, 0.00225676316767931, 0.0023168111220002174, 0.002234051935374737, 0.002510908292606473, 0.0023535331711173058, 0.0017812215955927968, 0.0017329421825706959, 0.0023532791528850794, 0.002227779245004058, 0.0022383893374353647, 0.0023805166129022837, 0.0024882552679628134, 0.0010899916524067521, 0.0021604555658996105, 0.001897018519230187, 0.0024196451995521784, 0.001992126228287816, 0.0022069797851145267, 0.001977990148589015, 0.002120157005265355, 0.0020803255029022694, 0.002108931075781584, 0.0019574600737541914, 0.0020899344235658646, 0.002084167208522558, 0.002109887544065714, 0.0022026910446584225, 0.0022563086822628975, 0.002252459991723299, 0.0023787757381796837, 0.002311331918463111, 0.0021701548248529434, 0.002184213837608695, 0.0020549052860587835, 0.0025167958810925484], "MIN_MARGINAL_PLANNER_ECONOMIC_INDEX": 0.002124679973348975, "MIN_MARGINAL_AGENT_HEALTH_INDEX": [-0.003964281640946865, -0.0010713868541643023, -0.0076567428186535835, -0.002871995558962226, -0.008433903567492962, -0.00899407546967268, -0.009911752305924892, -0.009194649755954742, -0.009664129465818405, -0.002949925372377038, -0.0065671540796756744, -0.007176358252763748, -0.006536103319376707, -0.008727999404072762, -0.007968118414282799, -0.006505947560071945, -0.007439949084073305, -0.0069662826135754585, -0.008316239342093468, -0.008579589426517487, -0.00872408039867878, -0.009386059828102589, -0.00945289060473442, -0.00848479475826025, -0.00750832399353385, -0.00821556244045496, -0.0032150805927813053, -0.007467340212315321, -0.008297407068312168, -0.00770018994808197, -0.009744877927005291, -0.00937261339277029, -0.009218289516866207, -0.00839361734688282, -0.007491708733141422, -0.009187941439449787, -0.007287331856787205, -0.007290090434253216, -0.009112140163779259, -0.009490792639553547, -0.004297951702028513, -0.0013750526122748852, -0.0013287087203934789, -0.0064947898499667645, -0.005829110741615295, -0.006879822351038456, -0.006967827212065458, -0.00535054225474596, -0.008951004594564438, -0.006295738276094198, -0.007645895704627037], "MIN_MARGINAL_PLANNER_HEALTH_INDEX": -0.007381112780421972} \ No newline at end of file diff --git a/ai_economist/datasets/covid19_datasets/data_and_fitted_params/model_constants.json b/ai_economist/datasets/covid19_datasets/data_and_fitted_params/model_constants.json new file mode 100644 index 0000000..faff7ff --- /dev/null +++ b/ai_economist/datasets/covid19_datasets/data_and_fitted_params/model_constants.json @@ -0,0 +1 @@ +{"DATE_FORMAT": "%Y-%m-%d", "STRINGENCY_POLICY_KEY": "StringencyIndex", "NUM_STRINGENCY_LEVELS": 10, "SIR_SMOOTHING_STD": 10, "SIR_MORTALITY": 0.02, "SIR_GAMMA": 0.07142857142857142, "US_STATE_IDX_TO_STATE_NAME": {"0": "Alabama", "1": "Alaska", "2": "Arizona", "3": "Arkansas", "4": "California", "5": "Colorado", "6": "Connecticut", "7": "Delaware", "8": "District of Columbia", "9": "Florida", "10": "Georgia", "11": "Hawaii", "12": "Idaho", "13": "Illinois", "14": "Indiana", "15": "Iowa", "16": "Kansas", "17": "Kentucky", "18": "Louisiana", "19": "Maine", "20": "Maryland", "21": "Massachusetts", "22": "Michigan", "23": "Minnesota", "24": "Mississippi", "25": "Missouri", "26": "Montana", "27": "Nebraska", "28": "Nevada", "29": "New Hampshire", "30": "New Jersey", "31": "New Mexico", "32": "New York", "33": "North Carolina", "34": "North Dakota", "35": "Ohio", "36": "Oklahoma", "37": "Oregon", "38": "Pennsylvania", "39": "Rhode Island", "40": "South Carolina", "41": "South Dakota", "42": "Tennessee", "43": "Texas", "44": "Utah", "45": "Vermont", "46": "Virginia", "47": "Washington", "48": "West Virginia", "49": "Wisconsin", "50": "Wyoming"}, "US_STATE_POPULATION": [4903185, 740995, 7278717, 3017804, 39512223, 5758736, 3565287, 973764, 705749, 21477737, 10617423, 1415872, 1787065, 12671821, 6732219, 3155070, 2913314, 4467673, 4648794, 1344212, 6045680, 6892503, 9986857, 5639632, 2976149, 6626371, 1068778, 1934408, 3080156, 1359711, 8882190, 2096829, 19453561, 10488084, 762062, 11689100, 3956971, 4217737, 12801989, 1059361, 5148714, 884659, 6829174, 28995881, 3205958, 623989, 8535519, 7614893, 1792147, 5822434, 578759], "US_POPULATION": 328737916, "GDP_PER_CAPITA": 65300} \ No newline at end of file diff --git a/ai_economist/datasets/covid19_datasets/data_and_fitted_params/real_world_data.npz b/ai_economist/datasets/covid19_datasets/data_and_fitted_params/real_world_data.npz new file mode 100644 index 0000000..93ef408 Binary files /dev/null and b/ai_economist/datasets/covid19_datasets/data_and_fitted_params/real_world_data.npz differ diff --git a/ai_economist/datasets/covid19_datasets/fit_model_parameters.ipynb b/ai_economist/datasets/covid19_datasets/fit_model_parameters.ipynb new file mode 100644 index 0000000..fcb5107 --- /dev/null +++ b/ai_economist/datasets/covid19_datasets/fit_model_parameters.ipynb @@ -0,0 +1,1450 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) 2021, salesforce.com, inc. \n", + "All rights reserved. \n", + "SPDX-License-Identifier: BSD-3-Clause \n", + "For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# This is a notebook that uses real-world data and fits models to calibrate the COVID-19 simulation environment\n", + "\n", + "### What does it mean to calibrate the simulation?\n", + "\n", + "Simply put, we want to set the parameters of the simulation so that its behavior is consistent with real-world data.\n", + "\n", + "To go into a bit more detail, the point of the simulation is to capture how public health policy decisions influence the spread of COVID-19 and influence the economy.\n", + "We model the spread of COVID-19 through a set of **SIR (+vaccination)** models, one for each US State.\n", + "We model the economy through a model of **unemployment** (again, one for each US State) and relief payments provided by the federal government.\n", + "\n", + "Both the SIR disease dynamics and unemployment respond to public health policy decisions.\n", + "This notebook quantifies those responses as measured in the real world data, and exports that quantification as parameters that the simulation can use.\n", + "\n", + "Another function of the simulator is to quantify **social welfare**, which is the metric that we want to maximize.\n", + "Our simulation defines social welfare as \n", + "$$W = \\alpha H + (1-\\alpha)E$$\n", + "where $H$ and $E$ are indices representing the health outcome (i.e. COVID deaths) and economic outcome (i.e. GDP). \n", + "$\\alpha$ is a weighting term (between 0 and 1).\n", + "\n", + "We don't know what the actual goals/priorities were of real-world policymakers, but we want to calibrate $\\alpha$ (separately for each US State) so that the resulting definition of social welfare is most consistent with public health policy decisions made in the real world." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime, timedelta\n", + "import json\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import os\n", + "import pickle\n", + "import scipy\n", + "from scipy.signal import convolve\n", + "from scipy.optimize import minimize\n", + "from tqdm import tqdm\n", + "\n", + "import ai_economist" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Install torch 1.8.0 or higher (required for the unemployment fits)\n", + "!pip install torch==1.8.0\n", + "\n", + "import torch\n", + "import torch.nn as nn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before running this notebook, \n", + "1. Please use the **gather_real_world_data.ipynb** notebook to download and pre-processing the raw real-world data.\n", + "2. Please set the base directory path below; it is the folder into which the real_world data have been downloaded, and it corresponds to the BASE_DATA_DIR_PATH in gather_real_world_data.ipynb notebook (and defaults to \"/tmp/covid19_data\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "BASE_DATA_DIR_PATH = \"/tmp/covid19_data\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Use the latest dated folder in BASE_DATA_DIR_PATH to fetch the real-world data\n", + "Note: if you do not wish to use the latest data, you will need to manually overwrite the data_dir variable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_dir = os.path.join(BASE_DATA_DIR_PATH, sorted(os.listdir(BASE_DATA_DIR_PATH))[-1])\n", + "print(\"In this notebook, we will use the real world data saved in '{}'\".format(data_dir))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Read in the model constants" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(os.path.join(data_dir, \"model_constants.json\"), \"r\") as fp:\n", + " model_constants = json.load(fp)\n", + " \n", + "DATE_FORMAT = model_constants[\"DATE_FORMAT\"]\n", + "STRINGENCY_POLICY_KEY = model_constants[\"STRINGENCY_POLICY_KEY\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## This notebook uses the real-world data to calibrate the simulation.\n", + "\n", + "### What does it mean to calibrate the simulation?\n", + "\n", + "Simply put, we want to set the parameters of the simulation so that its behavior is consistent with real-world data.\n", + "\n", + "To go into a bit more detail, the point of the simulation is to capture how public health policy decisions influence the spread of COVID-19 and influence the economy.\n", + "We model the spread of COVID-19 through a set of **SIR (+vaccination)** models, one for each US State.\n", + "We model the economy through a model of **unemployment** (again, one for each US State) and relief payments provided by the federal government.\n", + "\n", + "Both the SIR disease dynamics and unemployment respond to public health policy decisions.\n", + "This notebook quantifies those responses as measured in the real world data, and exports that quantification as parameters that the simulation can use.\n", + "\n", + "Another function of the simulator is to quantify **social welfare**, which is the metric that we want to maximize.\n", + "Our simulation defines social welfare as \n", + "$$W = \\alpha H + (1-\\alpha)E$$\n", + "where $H$ and $E$ are indices representing the health outcome (i.e. COVID deaths) and economic outcome (i.e. GDP). \n", + "$\\alpha$ is a weighting term (between 0 and 1).\n", + "\n", + "We don't know what the actual goals/priorities were of real-world policymakers, but we want to calibrate $\\alpha$ (separately for each US State) so that the resulting definition of social welfare is most consistent with public health policy decisions made in the real world." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Set data / fit parameters:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Set up dictionary to write fitted parameters\n", + "fitted_params_dict = {'settings': {}}\n", + "fitted_params_filename = \"fitted_params.json\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, choose the dates we'll use to split the data.\n", + "\n", + "- Data until the last date for **training** can be used to directly fit parameters.\n", + "\n", + "- Date until the last date for **validation** can be used for measuring fit quality.\n", + "\n", + "Our default calibration settings use data in 2020 for fitting (training+validation) so that data in 2021 can be held out as a test set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Do fitting up until the last day in the train set:\n", + "fitted_params_dict['settings']['LAST_DATE_IN_TRAIN_SET'] = '2020-11-30'\n", + "# Cross validation should uses non-training data up until:\n", + "fitted_params_dict['settings']['LAST_DATE_IN_VAL_SET'] = '2020-12-31'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, set some parameters governing how the SIR and unemployment models use their data. The provided default settings yield good fits for the default date range (above)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Length of the convolutional filters to use in the unemployment fitting\n", + "fitted_params_dict['settings']['FILTER_SIZE_UNEMPLOYMENT'] = 600\n", + "# Weight of regularization term to enforce state-by-state similarity in unemployment fits\n", + "fitted_params_dict['settings']['SIMILARITY_REGULARIZATION_UNEMPLOYMENT'] = 0.5\n", + "\n", + "# Weight of regularization term to enforce state-by-state similarity in Beta fits (for SIR model)\n", + "fitted_params_dict['settings']['SIMILARITY_REGULARIZATION_SIR'] = 1.0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Lastly, set the environment configuration that we will use when we calibrate $\\alpha$ values (which determine how social welfare is computed).\n", + "\n", + "Note! $\\alpha$ calibration is sensitive to these choices, so we will want to take care to use these same settings between calibration and any downstream experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Env settings for calibrating alphas. Default settings reflect env defaults.\n", + "fitted_params_dict['settings']['env'] = {\n", + " 'economic_reward_crra_eta': 2,\n", + " \"start_date\": '2020-03-22',\n", + " \"infection_too_sick_to_work_rate\": 0.1,\n", + " \"pop_between_age_18_65\": 0.6,\n", + " \"risk_free_interest_rate\": 0.03,\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load real_world_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataframes = pickle.load( open(os.path.join(data_dir, \"dataframes.pkl\"), \"rb\" ) )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1. Beta Fits\n", + "\n", + "Here, we calibrate the **SIR** model.\n", + "\n", + "Within the SIR model, the **beta** parameter controls the rate that infections spread from infected people to susceptible people. It reflects the probability of disease transmission when a member from each group come in contact, and the rate at which those groups come into contact.\n", + "\n", + "As such, the public health policy can affect beta (intuitively, through its influence on how often people interact with one another).\n", + "\n", + "Our simulation models the relationship between public health policy (i.e. \"stringency\") and beta via a time-delayed linear model. \n", + "Specifically, we model beta of US State $i$ at time $t$ as\n", + "\n", + "$$\\beta_{i,t} = \\beta_{i}^{\\pi} \\cdot \\pi_{i,t-d} + \\beta_{i}^{0},$$\n", + "\n", + "where $\\pi_{i,t-d}$ is the stringency level $d$ days ago, $\\beta_{i}^{\\pi}$ is the slope, and $\\beta_{i}^{0}$ is the intercept.\n", + "\n", + " \n", + " \n", + "The next block of code finds the time delay $d$ and then fits the slope and intercept parameters for each US State. \n", + "Each State uses the same delay.\n", + "In addition, we regularize the State-to-State differences in slope/intercept parameters to help reduce overfitting to noise." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**NOTE!** Although we're directly fitting linear models of beta, our ground-truth data is COVID-19 deaths. The best test of this fit is how well our simulation reproduces the COVID-19 death data given the real-world policy choices." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# This is the subset of the data we will use:\n", + "beta_df = dataframes[\"beta\"]\n", + "policy_df = dataframes[\"policy\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find the optimal delay in the response of beta (IT SHOULD BE >0)\n", + "\n", + "Step through many possible policy delays to find where the empirical correlation between delayed policy and beta is strongest. Since all US States will use the same delay, we aggregate the data across states.\n", + "\n", + "Intuitively, we expect higher stringency to lead to smaller beta, so we're actually looking for the delay where the correlation is smallest (i.e. most negative)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "delays = list(range(-90, 90))\n", + "\n", + "fits = []\n", + "\n", + "for delay in delays:\n", + " D = fitted_params_dict['settings']['LAST_DATE_IN_VAL_SET']\n", + " if delay < 0:\n", + " x = policy_df[:D].values[-delay:].flatten()\n", + " y = beta_df[:D].values[:delay].flatten()\n", + " elif delay == 0:\n", + " x = policy_df[:D].values.flatten()\n", + " y = beta_df[:D].values.flatten()\n", + " else:\n", + " x = policy_df[:D].values[:-delay].flatten()\n", + " y = beta_df[:D].values[delay:].flatten()\n", + " keep = np.logical_not(np.logical_or(np.isnan(y), np.isnan(x)))\n", + "\n", + " x = x[keep]\n", + " y = y[keep]\n", + " fit = scipy.stats.linregress(x, y)\n", + " \n", + " fits.append(fit)\n", + "\n", + "_, (ax0, ax1) = plt.subplots(1, 2, figsize=(16, 6));\n", + "ax0.plot(delays, [f.rvalue for f in fits]);\n", + "ax0.set_xlabel('Policy-vs-Beta Delay');\n", + "ax0.set_ylabel('Correlation r-value');\n", + "\n", + "ax1.plot(delays, [f.slope for f in fits]);\n", + "ax1.set_xlabel('Policy-vs-Beta Delay');\n", + "ax1.set_ylabel('Slope of Linear Fit');\n", + "\n", + "BETA_DELAY = delays[np.argmin(np.array([f.rvalue for f in fits]))]\n", + "ax0.set_title('Optimal delay = {}'.format(BETA_DELAY));" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### What does the aggregate policy vs. beta trend look like at this delay?\n", + "\n", + "This plot shows the raw policy & beta data (blue) after applying the delay. The black dashed line shows the linear fit. The red points show the beta mean at each of the stringency levels." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert BETA_DELAY > 0\n", + "\n", + "x = policy_df[:fitted_params_dict['settings']['LAST_DATE_IN_VAL_SET']].values[:-BETA_DELAY].flatten()\n", + "y = beta_df[:fitted_params_dict['settings']['LAST_DATE_IN_VAL_SET']].values[BETA_DELAY:].flatten()\n", + "keep = np.logical_not(np.logical_or(np.isnan(y), np.isnan(x)))\n", + "\n", + "x = x[keep]\n", + "y = y[keep]\n", + "fit = scipy.stats.linregress(x, y)\n", + "\n", + "print('Fit Intercept: {:+f}'.format(fit.intercept))\n", + "print('Fit Slope: {:+f}'.format(fit.slope))\n", + "\n", + "_, ax = plt.subplots(1, 1, figsize=(8, 7))\n", + "\n", + "ax.plot(x, y, 'o', alpha=0.03);\n", + "xL = ax.get_xlim()\n", + "ax.plot(xL, [fit.intercept + fit.slope*x_ for x_ in xL], 'k--', linewidth=5);\n", + "ax.set_xlim(xL);\n", + "\n", + "xs = np.sort(np.unique(x))\n", + "ys = np.zeros_like(xs, dtype=np.float)\n", + "for i, x_ in enumerate(xs):\n", + " ys[i] = np.nanmean(y[x==x_])\n", + "\n", + "ax.plot(xs, ys, 'ro-', markersize=13);\n", + "\n", + "ax.set_ylim([0, 2*np.nanmax(ys)]);\n", + "ax.set_xlabel('Policy ({})'.format(STRINGENCY_POLICY_KEY), fontsize=16);\n", + "ax.set_ylabel('Estimated Beta ({} days later)'.format(BETA_DELAY), fontsize=16);\n", + "ax.set_title('Effect of Policy on Transmission Rate', fontsize=20);\n", + "ax.grid(b=True, axis='y');" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This code sets up the regularized, State-by-State linear regression. It separates data for training/validation and defines the fitting procedure." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def shift_datestring(dstring, delta):\n", + " return datetime.strftime(\n", + " datetime.strptime(dstring, DATE_FORMAT) + timedelta(delta), DATE_FORMAT\n", + " )\n", + "\n", + "x_train_d0 = '2020-01-01'\n", + "x_train_dT = fitted_params_dict['settings']['LAST_DATE_IN_TRAIN_SET']\n", + "x_val_dT = fitted_params_dict['settings']['LAST_DATE_IN_VAL_SET']\n", + "y_train_d0 = shift_datestring(x_train_d0, BETA_DELAY)\n", + "y_train_dT = shift_datestring(x_train_dT, BETA_DELAY)\n", + "y_val_dT = shift_datestring(x_val_dT, BETA_DELAY)\n", + "\n", + "x_data = policy_df[x_train_d0:x_train_dT].values.T\n", + "y_data = beta_df[y_train_d0:y_train_dT].values.T\n", + "\n", + "x_data_val = policy_df[x_train_dT:x_val_dT].values.T\n", + "y_data_val = beta_df[y_train_dT:y_val_dT].values.T\n", + "\n", + "n_states = x_data.shape[0]\n", + "\n", + "def predict_fn(x, weights):\n", + " slopes = weights[:n_states, None]\n", + " intercepts = weights[n_states:, None]\n", + " return x*slopes + intercepts\n", + "\n", + "def loss_fn(weights, w_sse_lambda): \n", + " y_hat = predict_fn(x_data, weights)\n", + " y_sse = np.nansum((y_data - y_hat)**2)\n", + " \n", + " slopes = weights[:n_states]\n", + " intercepts = weights[n_states:]\n", + " s_sse = np.sum((slopes - np.mean(slopes))**2)\n", + " i_sse = np.sum((intercepts - np.mean(intercepts))**2)\n", + " \n", + " return y_sse + w_sse_lambda*(s_sse*np.nanmean(x_data) + i_sse)\n", + " \n", + "def do_fit(w_sse_lambda):\n", + " w_bounds = [(None, 0)] * n_states\n", + " i_bounds = [(0, None)] * n_states\n", + " res = minimize(\n", + " loss_fn,\n", + " np.zeros(n_states * 2),\n", + " args=(w_sse_lambda),\n", + " bounds=w_bounds + i_bounds,\n", + " )\n", + "\n", + " weights = res.x\n", + " slopes = weights[:n_states]\n", + " intercepts = weights[n_states:]\n", + " y_hat = predict_fn(x_data, weights)\n", + " \n", + " return weights, slopes, intercepts, y_hat" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This code gives us a look at the fit parameters/quality for different regularization levels (lambda).\n", + "\n", + "Notice how stronger regularization leads to more State-by-State similarity across the fit parameters. This also leads to slightly worse training $r^2$ and slightly better validation $r^2$, as one would typically expect.\n", + "\n", + "For the default data split, validation $r^2$ is poor, but, as described above, we are more interested in fitting the *SIR model* (not just fitting betas). So, calibration settings should focus on how well the simulation predicts real-world COVID-19 deaths given the real-world policy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_, ax = plt.subplots(1, 1, figsize=(16, 6))\n", + "_, axes = plt.subplots(2, 2, figsize=(16, 8))\n", + "ax0, ax1, ax2, ax3 = axes.flatten()\n", + "\n", + "ax.plot(y_data.flatten(), label='Real Data');\n", + "\n", + "# (regularization_amount, plot_color, plot_style)\n", + "plot_specs = [\n", + " ( 0.0, 'r', ':'),\n", + " ( 5.0, 'g', '--'),\n", + " (10.0, 'c', '-.'),\n", + "]\n", + "for LAMBDA, color, linestyle in plot_specs:\n", + " weights, slopes, intercepts, y_hat = do_fit(w_sse_lambda=LAMBDA)\n", + " y_hat_val = predict_fn(x_data_val, weights)\n", + " print('lambda = {:5.1f}, r^2 = {:5.3f}, r^2 (val) = {:5.3f}'.format(\n", + " LAMBDA,\n", + " 1 - np.nanvar((y_hat - y_data)) / np.nanvar(y_data),\n", + " 1 - np.nanvar((y_hat_val - y_data_val)) / np.nanvar(y_data_val)\n", + " ))\n", + " \n", + " ax.plot(y_hat.flatten(), color=color, linestyle=linestyle, label='Predicted (lambda={})'.format(LAMBDA));\n", + " ax0.plot(slopes, color=color, linestyle=linestyle, label='lambda={}'.format(LAMBDA));\n", + " ax1.plot(intercepts, color=color, linestyle=linestyle, label='lambda={}'.format(LAMBDA));\n", + " ax2.plot(intercepts+slopes, color=color, linestyle=linestyle, label='lambda={}'.format(LAMBDA));\n", + " ax3.plot(y_data.flatten(), y_hat.flatten(), 'o', color=color, alpha=0.1, label='lambda={}'.format(LAMBDA));\n", + "\n", + "LIM = [0, 0.3]\n", + "ax3.plot(LIM, LIM, 'k--');\n", + "ax3.set_xlim(LIM);\n", + "ax3.set_ylim(LIM);\n", + "\n", + "for ax_ in [ax, ax0, ax1, ax2, ax3]:\n", + " ax_.legend();\n", + " \n", + "ax.set_ylabel('Beta');\n", + "ax0.set_ylabel('Slope');\n", + "ax1.set_ylabel('Intercept');\n", + "ax2.set_ylabel('Beta @ stringency=1');\n", + "ax3.set_ylabel('Predicted Beta');\n", + "\n", + "ax0.set_xlabel('State Index');\n", + "ax1.set_xlabel('State Index');\n", + "ax2.set_xlabel('State Index');\n", + "ax3.set_xlabel('Actual Beta');" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Perform the actual calibration fit" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# (if you want to tweak the regularization setting, you can do so here)\n", + "# fitted_params_dict['settings']['SIMILARITY_REGULARIZATION_SIR'] = ...\n", + "\n", + "_, slopes, intercepts, y_hat = do_fit(\n", + " w_sse_lambda=fitted_params_dict['settings']['SIMILARITY_REGULARIZATION_SIR']\n", + ")\n", + "\n", + "# Update the calibration fit dictionary\n", + "fitted_params_dict.update(\n", + " {\n", + " \"BETA_DELAY\": BETA_DELAY,\n", + " \"BETA_SLOPES\": slopes.tolist(),\n", + " \"BETA_INTERCEPTS\": intercepts.tolist()\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 2. Unemployment Fits\n", + "\n", + "Here, we calibrate the **unemployment** model.\n", + "\n", + "The simulation models unemployment using a time-series model that predicts a State’s unemployment rate based on its history of stringency level increases and decreases.\n", + "\n", + "Each change triggers a combination of exponentially-decaying unemployment responses. We learn a shared set of those responses by learning a set of 5 decay lambdas. Each State separately fits its own model that says strongly each of those underlying responses is triggered by a change in stringency.\n", + "\n", + "In math, the unemployment model looks like this:\n", + "\n", + "$$\n", + "\\tilde{U}_{i,t}^k = \\sum_{t'=t-L}^{t'=t} e^{\\frac{t'-t}{\\lambda_k}} \\cdot \\Delta \\pi_{i, t'}, \\\\\n", + "U_{i,t} = \\texttt{softplus}\\left( \\sum_k w_{i, k} \\cdot \\tilde{U}_{i,t}^k \\right) + U^0_i.\n", + "$$\n", + "\n", + "$\\tilde{U}_{i,t}^k$ describes the unemployment driven by the $k$-th component of the response. $\\lambda_k$ is the decay constant for that component. $L$ is the total length of the response filters. And $\\Delta \\pi_{i,t}$ is how much the stringency changed in State $i$ on time $t$.\n", + "\n", + "$U_{i,t}$ is the actual unemployment according to the model. It adds baseline unemployment $U^0_i$ to excess unemployment coming from the policy choices. Excess unemployment is a State-specific weighted sum of each $\\tilde{U}_{i,t}^k$, with weights $w_{i,k}$, passed through a $\\texttt{softplus}$ function so that excess unemployment is $\\geq 0$.\n", + "\n", + "Similar to the way we fit beta (above), we regularize State-to-State variability in $w_{i,k}$." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# This is the subset of the data we will use for calibrating the unemployment model\n", + "unemployment_df = dataframes[\"unemployment\"]\n", + "policy_df = dataframes[\"policy\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Model set-up\n", + "\n", + "For a model like this, we take advantage of the tools available through PyTorch. In particular, we instantiate the unemployment model as a PyTorch module, and write a simple wrapper to handle the data feeding, parameter updates, etc." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class SharedConvUnemp(nn.Module):\n", + " \"\"\"\n", + " The unemployment model.\n", + " Given a history of stringency changes, predicts current unemployment.\n", + " Embeds the full set of shared and State-specific model weights.\n", + " \"\"\"\n", + " def __init__(\n", + " self, \n", + " x_dim,\n", + " n_filters,\n", + " filter_size,\n", + " n_states=51,\n", + " signal_bias=True,\n", + " initial_lambda_guess=None,\n", + " ):\n", + " super().__init__()\n", + " self.x_dim = int(x_dim)\n", + " self.n_filters = int(n_filters)\n", + " self.filter_size = int(filter_size)\n", + " self.n_states = int(n_states)\n", + " \n", + " # We can use grouped 1D convolution to do state-specific projection of x --> signal\n", + " # Expects input of size [1, x_dim*n_states, time+filter_size] (+filter_size implies pre-padding)\n", + " # Output has size [1, n_filters*n_states, time+filter_size]\n", + " self.signal = nn.Conv1d(\n", + " in_channels=self.n_states*self.x_dim,\n", + " out_channels=self.n_states*self.n_filters,\n", + " kernel_size=1,\n", + " groups=self.n_states,\n", + " bias=bool(signal_bias),\n", + " )\n", + " \n", + " # Convolve the learned \"signal\" with a shared bank of exponential filters w/ learnable lambdas\n", + " # Expects input of size [n_states, n_filters, time+filter_size] (+filter_size implies pre-padding)\n", + " # Output has size [n_states, 1, time+filter_size] (here, +filter_size comes from internal padding)\n", + " self.conv_lambdas = nn.Parameter(initial_lambda_guess,\n", + " requires_grad=True\n", + " )\n", + " self.f_ts = torch.tile(\n", + " torch.flip(torch.arange(self.filter_size), (0,))[None, None],\n", + " (1, self.n_filters, 1)\n", + " )\n", + " \n", + " # State-specific unemployment offsets\n", + " self.unemp_bias = nn.Parameter(\n", + " torch.tensor(np.ones(self.n_states, dtype=np.float32)*3.5),\n", + " requires_grad=True\n", + " )\n", + " \n", + " def get_similarity_regularization_loss(self):\n", + " w = self.signal.weight.flatten().view(self.n_states, -1)\n", + " dev = w - w.mean(0, keepdim=True)\n", + " mse = torch.pow(dev, 2).mean()\n", + " return mse\n", + " \n", + " def x2signal(self, x):\n", + " # Assume input is [n_states, x_dim, time+filter_size], reshape to size expected by self.signal\n", + " x = x.reshape(1, self.n_states*self.x_dim, -1)\n", + " signal = self.signal(x)\n", + " # Output should be returned to [n_states, n_filters, time+filter_size]\n", + " signal = signal.reshape(self.n_states, self.n_filters, -1)\n", + " return signal\n", + " \n", + " def signal2unemp(self, signal):\n", + " # Assume input is [n_states, n_filters, time+filter_size]\n", + " conv_filters = torch.exp(-self.f_ts / self.conv_lambdas[None, :, None])\n", + " unemp = nn.functional.conv1d(signal, conv_filters, padding=self.filter_size-1)\n", + " \n", + " # Output should be returned as [n_states, time] (i.e. we remove padded outputs)\n", + " unemp = unemp.reshape(self.n_states, -1)[:, self.filter_size:-(self.filter_size-1)]\n", + " \n", + " # Soft clipping + baseline unemployment\n", + " return nn.functional.softplus(unemp, beta=1) + self.unemp_bias[:, None]\n", + " \n", + " def forward(self, x):\n", + " signal = self.x2signal(x)\n", + " unemp = self.signal2unemp(signal)\n", + " return unemp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class SharedConvUnempFitter:\n", + " \"\"\"\n", + " Wrapper to handle the data feeding and training of the actual unemployment model.\n", + " \"\"\"\n", + " def __init__(self,\n", + " policy_df=None,\n", + " unemployment_df=None,\n", + " lr=0.01, similarity_regularization_coeff=0.0,\n", + " filter_size=600, lambdas=np.array([30, 60, 130, 260, 540])\n", + " ):\n", + " assert unemployment_df is not None\n", + " \n", + " self.n_filters = len(lambdas)\n", + " self.filter_size = int(filter_size)\n", + " self.similarity_regularization_coeff = float(similarity_regularization_coeff)\n", + " self.last_training_time_index = unemployment_df[\n", + " :fitted_params_dict['settings']['LAST_DATE_IN_TRAIN_SET']\n", + " ].shape[0]\n", + " \n", + " # Use this to crop out nans\n", + " keep = np.logical_not(np.isnan(unemployment_df[\n", + " :fitted_params_dict['settings']['LAST_DATE_IN_VAL_SET']\n", + " ].values.T[0]))\n", + " \n", + " # Set up the data\n", + " self.x_data, self.x_th = self.preprocess_policy(policy_df[\n", + " :fitted_params_dict['settings']['LAST_DATE_IN_VAL_SET']\n", + " ].values[keep].T)\n", + " \n", + " self.y_data = unemployment_df[:fitted_params_dict['settings']['LAST_DATE_IN_VAL_SET']].values[keep].T\n", + " self.y_th = torch.from_numpy(self.y_data.astype(np.float32))\n", + " \n", + " # Crop out any nan region\n", + " \n", + " # Create the model\n", + " self.x_dim = self.x_data.shape[1]\n", + " self.model = SharedConvUnemp(\n", + " self.x_dim, self.n_filters, self.filter_size, signal_bias=False,\n", + " initial_lambda_guess=torch.tensor(lambdas.astype(np.float32))\n", + " )\n", + " \n", + " # Loss\n", + " self.loss = nn.MSELoss()\n", + " self.train_loss_history = []\n", + " self.val_loss_history = []\n", + " \n", + " # Create the optimizer\n", + " self.optim = torch.optim.Adam(self.model.parameters(), lr=float(lr))\n", + " \n", + " def preprocess_policy(self, raw_policy_data):\n", + " # Expects policy data size is [n_states, t]\n", + " pad_pol = np.pad(raw_policy_data, [(0, 0), (self.filter_size, 0)], constant_values=1)\n", + " \n", + " dpad = np.zeros_like(pad_pol)\n", + " dpad[:, 1:] = pad_pol[:, 1:] - pad_pol[:, :-1]\n", + "\n", + " x_data = dpad[None]\n", + " \n", + " x_data = x_data.transpose(1, 0, 2)\n", + " x_th = torch.from_numpy(x_data.astype(np.float32))\n", + " \n", + " return x_data, x_th\n", + " \n", + " def predict(self, numpy=True):\n", + " y_hat = self.model(self.x_th.detach())\n", + " if numpy:\n", + " y_hat = y_hat.data.numpy()\n", + " return y_hat\n", + " \n", + " def get_train_loss(self):\n", + " y_hat = self.predict(numpy=False)\n", + " y_hat_train = y_hat[:, :self.last_training_time_index]\n", + " y_train = self.y_th[:, :self.last_training_time_index]\n", + " loss = self.loss(y_hat_train, y_train)\n", + " return loss\n", + " \n", + " def get_val_loss(self):\n", + " y_hat = self.predict(numpy=False)\n", + " y_hat_val = y_hat[:, self.last_training_time_index:]\n", + " y_val = self.y_th[:, self.last_training_time_index:]\n", + " loss = self.loss(y_hat_val, y_val)\n", + " return loss\n", + " \n", + " def get_losses(self):\n", + " y_hat = self.predict(numpy=False)\n", + " \n", + " y_hat_train = y_hat[:, :self.last_training_time_index]\n", + " y_train = self.y_th[:, :self.last_training_time_index]\n", + " train_loss = self.loss(y_hat_train, y_train)\n", + " \n", + " y_hat_val = y_hat[:, self.last_training_time_index:]\n", + " y_val = self.y_th[:, self.last_training_time_index:]\n", + " val_loss = self.loss(y_hat_val, y_val)\n", + " \n", + " return train_loss, val_loss\n", + " \n", + " def update(self):\n", + " self.optim.zero_grad()\n", + " \n", + " # Get the training and val losses\n", + " train_loss, val_loss = self.get_losses()\n", + " self.train_loss_history.append(float(train_loss))\n", + " self.val_loss_history.append(float(val_loss))\n", + " \n", + " # Add any similarity regularization loss\n", + " if self.similarity_regularization_coeff:\n", + " similarity_loss = self.model.get_similarity_regularization_loss()\n", + " train_loss = train_loss + self.similarity_regularization_coeff*similarity_loss\n", + " \n", + " # Update\n", + " train_loss.backward()\n", + " self.optim.step()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Perform the actual calibration fit\n", + "\n", + "Initialize and fit the unemployment model (this will likely take a few minutes)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# (if you want to tweak the regularization setting, you can do so here)\n", + "# fitted_params_dict['settings']['FILTER_SIZE_UNEMPLOYMENT'] = ...\n", + "# fitted_params_dict['settings']['SIMILARITY_REGULARIZATION_UNEMPLOYMENT'] = ...\n", + "\n", + "unemployment_fitter = SharedConvUnempFitter(\n", + " policy_df=policy_df,\n", + " unemployment_df=unemployment_df,\n", + " filter_size=fitted_params_dict['settings']['FILTER_SIZE_UNEMPLOYMENT'],\n", + " similarity_regularization_coeff=fitted_params_dict['settings']['SIMILARITY_REGULARIZATION_UNEMPLOYMENT'],\n", + " lambdas=np.logspace(np.log10(30), np.log10(540), 5),\n", + " lr=0.01,\n", + ")\n", + "\n", + "# Recommend training for 350 steps with lr=0.01 and similarity regularization=0.5\n", + "for _ in tqdm(range(350)):\n", + " unemployment_fitter.update()\n", + "\n", + "_, ax = plt.subplots(1, 1, figsize=(12, 5))\n", + "ax.plot(unemployment_fitter.train_loss_history, label='Training');\n", + "ax.plot(unemployment_fitter.val_loss_history, label='Validation');\n", + "ax.set_ylabel('Loss (MSE)');\n", + "ax.set_xlabel('Training Steps');\n", + "ax.legend();\n", + "ax.set_ylim(bottom=0);\n", + "ax.grid(b=True);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### How well does the predicted US unemployment match the real data?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_, ax = plt.subplots(1, 1, figsize=(12, 5))\n", + "\n", + "y_hat = unemployment_fitter.predict().mean(0)\n", + "t = np.arange(len(y_hat))\n", + "\n", + "ax.plot(t, unemployment_fitter.y_data.mean(0), 'b', label='Real Data');\n", + "ax.plot(\n", + " t[:unemployment_fitter.last_training_time_index],\n", + " y_hat[:unemployment_fitter.last_training_time_index],\n", + " 'r-', label='Predicted (Train)'\n", + ");\n", + "ax.plot(\n", + " t[unemployment_fitter.last_training_time_index:],\n", + " y_hat[unemployment_fitter.last_training_time_index:],\n", + " 'g-', label='Predicted (Val)'\n", + ");\n", + "ax.grid(b=True);\n", + "\n", + "ax.set_xlabel('Time (days)', fontsize=16);\n", + "ax.set_ylabel('Avg. State Unemployment Rate (%)', fontsize=16);\n", + "ax.set_title('Real vs. Predicted Unemployment', fontsize=20);\n", + "ax.legend(fontsize=20);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### How well does the predicted unemployment match the real data -- *for each state*?\n", + "\n", + "**WARNING: Make sure the unemployment fits above look reasonable for all states before saving!** If not, re-run the fits above" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "# Note, since there are 51 \"states\", this won't plot Wyoming.\n", + "_, axes = plt.subplots(10, 5, figsize=(16, 40), sharey=True, sharex=True)\n", + "axes = axes.flatten()\n", + "\n", + "y_hat = unemployment_fitter.predict()\n", + "t = np.arange(y_hat.shape[1])\n", + "for IDX, ax in enumerate(axes):\n", + " ax.plot(t, unemployment_fitter.y_data[IDX], 'b-', label='Real Data');\n", + " ax.plot(\n", + " t[:unemployment_fitter.last_training_time_index],\n", + " y_hat[IDX, :unemployment_fitter.last_training_time_index], \n", + " 'r-', label='Predicted (Train)'\n", + " );\n", + " ax.plot(\n", + " t[unemployment_fitter.last_training_time_index:],\n", + " y_hat[IDX, unemployment_fitter.last_training_time_index:],\n", + " 'g-', label='Predicted (Val)'\n", + " );\n", + " ax.grid(b=True, axis='y');\n", + " ax.set_title(unemployment_df.columns[IDX]);\n", + " if ax.is_first_col():\n", + " ax.set_ylabel('Unemployment Rate (%)');\n", + " ax.legend();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## When satisfied with the fits, export them as part of the calibration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Update fitted_params_dict\n", + "# Note: we cast some arrays as np.float64 in order to be able to write out to a json file\n", + "fitted_params_dict.update(\n", + " {\n", + " \"POLICY_START_DATE\": datetime.strftime(policy_df.index[0], format=DATE_FORMAT),\n", + " \"FILTER_LEN\": unemployment_fitter.filter_size,\n", + " \"CONV_LAMBDAS\": [float(x) for x in unemployment_fitter.model.conv_lambdas.data.numpy()],\n", + " \"UNEMPLOYMENT_BIAS\": [float(x) for x in unemployment_fitter.model.unemp_bias.data.numpy()],\n", + " \"GROUPED_CONVOLUTIONAL_FILTER_WEIGHTS\": unemployment_fitter.model.signal.weight.data.numpy().tolist()\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 3. Social Welfare\n", + "\n", + "The simulator tracks **social welfare**, which is the metric that we want to maximize.\n", + "Our simulation defines social welfare as \n", + "$W = \\alpha H + (1-\\alpha)E$ \n", + "where $H$ and $E$ are indices representing the health outcome ($H$ decreases with COVID deaths) and economic outcome ($E$ decreases with lost GDP). \n", + "$\\alpha$ is a weighting term (between 0 and 1).\n", + "\n", + "We don't know what the actual goals/priorities were of real-world policymakers, but we want to calibrate $\\alpha$ (separately for each US State) so that the resulting definition of social welfare is most consistent with public health policy decisions made in the real world.\n", + "\n", + "*There is no definitive way to extract these priorities from the data. Our method is just one possible choice.*\n", + "\n", + "The goal of this fitting procedure is to estimate what the outcomes would have been if only interested in either health (minimizing deaths) or in the economy (minimizing GDP loss), and then to compare these against the real-world policy outcomes in order to estimate the underlying policy priorities.\n", + "\n", + "Another way to look at this is that we want to first measure the *shape* of the health/economy trade-off curve for each State. Then, we want to back out the health/economy prioritization (i.e. $\\alpha_i$) that would make the outcomes under the real-world policy preferable to all other outcomes along that trade-off curve.\n", + "\n", + "**We'll go into specifics as we proceed below...**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Note:** this procedure also gives us the numbers we need to *normalize* $H$ and $E$ so that the Health and Economic Indices have a meaningful scale." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The env requires the fitted params to run, and we require the env to calibrate the fitted params.\n", + "# Save some placeholders, which we'll update after calibration, then re-save.\n", + "fitted_params_dict.update(\n", + " {\n", + " \"VALUE_OF_LIFE\": 10000000,\n", + " \"INFERRED_WEIGHTAGE_ON_AGENT_HEALTH_INDEX\": [0.5]*51,\n", + " \"INFERRED_WEIGHTAGE_ON_PLANNER_HEALTH_INDEX\": 0.5,\n", + " \"MAX_MARGINAL_AGENT_ECONOMIC_INDEX\": [1]*51,\n", + " \"MAX_MARGINAL_PLANNER_ECONOMIC_INDEX\": 1,\n", + " \"MAX_MARGINAL_AGENT_HEALTH_INDEX\": [1]*51,\n", + " \"MAX_MARGINAL_PLANNER_HEALTH_INDEX\": 1,\n", + " \"MIN_MARGINAL_AGENT_ECONOMIC_INDEX\": [0]*51,\n", + " \"MIN_MARGINAL_PLANNER_ECONOMIC_INDEX\": 0,\n", + " \"MIN_MARGINAL_AGENT_HEALTH_INDEX\": [0]*51,\n", + " \"MIN_MARGINAL_PLANNER_HEALTH_INDEX\": 0,\n", + " }\n", + ")\n", + "with open(os.path.join(data_dir, fitted_params_filename), \"w\") as fp: \n", + " json.dump(fitted_params_dict, fp)\n", + "\n", + "# Define the configuration of the environment that will be built\n", + "N_ALPHA_CALIBRATION_DAYS = (\n", + " datetime.strptime(\n", + " fitted_params_dict['settings']['LAST_DATE_IN_VAL_SET'], DATE_FORMAT\n", + " ) - datetime.strptime(\n", + " fitted_params_dict['settings']['env']['start_date'], DATE_FORMAT\n", + " )\n", + ").days\n", + "\n", + "env_config = {\n", + " \"collate_agent_step_and_reset_data\": True,\n", + " \"scenario_name\": \"CovidAndEconomySimulation\",\n", + " \"path_to_data_and_fitted_params\": data_dir,\n", + " \n", + " \"components\": [\n", + " {\"ControlUSStateOpenCloseStatus\": {\n", + " \"action_cooldown_period\": 28\n", + " }},\n", + " {\"FederalGovernmentSubsidy\": {\n", + " \"num_subsidy_levels\": 20,\n", + " \"subsidy_interval\": 90,\n", + " \"max_annual_subsidy_per_person\": 20000,\n", + " }},\n", + " {\"VaccinationCampaign\": {\n", + " \"daily_vaccines_per_million_people\": 3000,\n", + " \"delivery_interval\": 1,\n", + " \"vaccine_delivery_start_date\": \"2021-01-12\",\n", + " }},\n", + " ],\n", + " \"flatten_masks\": False,\n", + " \"flatten_observations\": False,\n", + " \"health_priority_scaling_agents\": 1.0,\n", + " \"health_priority_scaling_planner\": 1.0,\n", + " \"multi_action_mode_agents\": False,\n", + " \"multi_action_mode_planner\": False,\n", + " \"world_size\": [1, 1],\n", + " \"n_agents\": 51,\n", + " \"episode_length\": N_ALPHA_CALIBRATION_DAYS,\n", + "}\n", + " \n", + "# NOTE!!!!\n", + "# The calibration will be specific to these choices!\n", + "# Downstream environments that use this calibration should also use these parameters!\n", + "env_config.update(fitted_params_dict['settings']['env'])\n", + "\n", + "# Build the environment from this partially-finished calibration.\n", + "uncalibrated_env = ai_economist.foundation.make_env_instance(**env_config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To estimate $\\hat{\\alpha}_i$ (the hat symbol denotes that this is an estimate), we first collect simulated health/economic outcomes under 3 policies: the actual stringency choices, minimum stringency, and maximum stringency.\n", + "\n", + "\n", + "We use these outcomes to estimate the Pareto frontier in the $\\left( H_i, E_i \\right)$ coordinate space.\n", + "\n", + "\n", + "By definition, the $\\left( H_i, E_i \\right)$ coordinates for the minimum- and maximum-stringency policies define the endpoints of this frontier, at $\\left(0, 1\\right)$ and $\\left(1, 0\\right)$, respectively. \n", + "(This definition comes from how we normalize the health/economic indices -- that is, they are scaled to reflect the range of outcomes spanning the minimum- and maximum-stringency policies.)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Collect the outcomes under the actual policies and 2 extremes: fully-closed and fully-open\n", + "\n", + "index_results = {}\n", + "\n", + "for p in ['closed', 'open', 'actual']:\n", + "\n", + " uncalibrated_env.reset();\n", + " for _ in range(uncalibrated_env.episode_length):\n", + " if p == 'actual':\n", + " t_str = uncalibrated_env.current_date.strftime(DATE_FORMAT)\n", + " actions = {\n", + " str(idx): policy_df[state][t_str]\n", + " for idx, state in uncalibrated_env.us_state_idx_to_state_name.items()\n", + " }\n", + " \n", + " elif p == 'closed':\n", + " actions = {str(idx): 10 for idx in range(51)}\n", + " \n", + " elif p == 'open':\n", + " actions = {str(idx): 1 for idx in range(51)}\n", + " \n", + " else:\n", + " raise NotImplementedError\n", + "\n", + " uncalibrated_env.step(actions);\n", + "\n", + " health_and_economic_indices = {}\n", + " for agent in uncalibrated_env.all_agents:\n", + " health_and_economic_indices[agent.idx] = (\n", + " float(agent.state[\"Health Index\"] / uncalibrated_env.episode_length), \n", + " float(agent.state[\"Economic Index\"] / uncalibrated_env.episode_length),\n", + " )\n", + " \n", + " index_results[p] = health_and_economic_indices" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We assume the Pareto frontier for State $i$ has form $E_i = (1-H_i)^{x_i}$ and that the coordinates associated with the actual-stringency policy are found along this frontier.\n", + "\n", + "We set the shape parameter $x_i$ based on this latter assumption, and take $\\hat{\\alpha}_i$ as the value that maximizes social welfare along the estimated Pareto frontier:\n", + "$$\n", + "\\hat{\\alpha_i} = \\max_{\\alpha_i} \\alpha_i H_i^{\\pi} + \\left(1-\\alpha_i\\right) E_i^{\\pi} =\n", + "\\max_{\\alpha_i} \\alpha_i H_i^{\\pi} + \\left(1-\\alpha_i \\right)\\left(1- \\left(1-H_i^{\\pi}\\right)^{x_i} \\right),\n", + "$$\n", + "\n", + "where the Economic Index $E_i^{\\pi}$ and Health Index $H_i^{\\pi}$ values are obtained from running the policies $\\pi$ in the simulation.\n", + "\n", + "\n", + "In other words, given the estimate of the Pareto frontier, we find the $\\hat{\\alpha}_i$ that best rationalizes the outcomes obtained under the actual policy, i.e. the $\\hat{\\alpha}_i$ under which these outcomes ($E_i^{\\pi}$ and $H_i^{\\pi}$) are considered optimal." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# This function implements the process described above and adds some plotting so we can visualize things\n", + "\n", + "def estimate_alpha_and_plot_rew_examples(state_idx, do_plot=True, ax=None):\n", + " \n", + " act_h, act_e = index_results[\"actual\"][state_idx] # actual health index, actual economic index\n", + " max_h, min_e = index_results[\"closed\"][state_idx] # max health index, min economic index\n", + " min_h, max_e = index_results[\"open\"][state_idx] # min health index, max economic index\n", + "\n", + " norm_idx_pairs = []\n", + " for index_dict in index_results.values():\n", + " h_index, e_index = index_dict[state_idx]\n", + " nh = (h_index-min_h)/(max_h - min_h)\n", + " ne = (e_index-min_e)/(max_e - min_e)\n", + " norm_idx_pairs.append([nh, ne])\n", + " \n", + " # Split out the normalized health / economic indices\n", + " norm_idx_pairs = np.array(norm_idx_pairs)\n", + " nhs = norm_idx_pairs[:, 0]\n", + " nes = norm_idx_pairs[:, 1]\n", + "\n", + " # We assume the coordinates along the pareto curve have this form:\n", + " # (h, e) = (h, (1-h)**pwr)\n", + " # Fit the power terms of the estimated pareto curve\n", + " def loss_fn(pwr):\n", + " nes_hat = (1-(nhs**pwr))**(1/pwr)\n", + " return np.sum((nes_hat - nes)**2)\n", + " res = minimize(\n", + " fun=loss_fn,\n", + " x0=2,\n", + " bounds=[(1.001, None)],\n", + " )\n", + " pwr = res.x\n", + " \n", + " # Given the supplied or fit powers, produce the estimated pareto curve (the set of hs/es coordinates)\n", + " policies = np.linspace(0, 1, 1001)\n", + " hs = policies**(1/pwr)\n", + " es = (1-policies)**(1/pwr)\n", + " \n", + " # Find the alpha such that the optimal nh/ne coordinate is closest to the ACTUAL policy nh/ne coordinate\n", + " nh = (act_h-min_h)/(max_h-min_h)\n", + " ne = (act_e-min_e)/(max_e-min_e)\n", + " alphas = np.linspace(0, 1, 1001)\n", + " d_opt2actual = []\n", + " # For each possible alpha ...\n", + " for alpha in alphas:\n", + " # ... find the optimal nh/ne coordinate for this alpha ...\n", + " opt_nh_ne_index = np.argmax(alpha*hs + (1-alpha)*es)\n", + " opt_nh = hs[opt_nh_ne_index]\n", + " opt_ne = es[opt_nh_ne_index]\n", + " # ... and store its distance to the ACTUAL h/e coordinate.\n", + " d = np.sqrt(((nh-opt_nh)**2) + (ne-opt_ne)**2)\n", + " d_opt2actual.append(d)\n", + " # The inferred alpha is that where the distance measured above is lowest\n", + " alpha = float(alphas[np.argmin(d_opt2actual)])\n", + " \n", + " if not do_plot:\n", + " return alpha\n", + " \n", + " if ax is None:\n", + " _, ax = plt.subplots(1, 1, figsize=(6, 6))\n", + " \n", + " # Plot a reward heatmap for the given alpha\n", + " h_full, e_full = np.meshgrid(np.linspace(0, 1, 101), np.linspace(0, 1, 101))\n", + " r_full = alpha*h_full + (1-alpha)*e_full\n", + " ax.imshow(r_full, aspect='auto', origin='lower')\n", + " if ax.is_last_row():\n", + " ax.set_xlabel('Normalized Health Index');\n", + " if ax.is_first_col():\n", + " ax.set_ylabel('Normalized Economic Index');\n", + " \n", + " # Add the observed h/e coordinates from actual policies\n", + " for nh, ne in norm_idx_pairs:\n", + " ax.plot(nh*100, ne*100, 'bo', markersize=12);\n", + " \n", + " # Add the estimated pareto boundary\n", + " ax.plot(hs*100, es*100, 'w');\n", + " \n", + " # Mark the optimal point along the boundary for the given alpha\n", + " rs = alpha*hs + (1-alpha)*es\n", + " ax.plot(hs[rs.argmax()]*100, es[rs.argmax()]*100, 'o', markerfacecolor=\"None\", \n", + " markersize=8, markeredgecolor='red', markeredgewidth=2);\n", + " \n", + " ax.set_title('{}; alpha={:4.2f}'.format(\n", + " 'USA' if state_idx=='p' else uncalibrated_env.us_state_idx_to_state_name[str(state_idx)],\n", + " alpha,\n", + " ))\n", + " \n", + " ax.set_xticklabels(\n", + " [\"{:3.2f}\".format(x/100).rstrip('0').rstrip('.') for x in ax.get_xticks()]\n", + " )\n", + " ax.set_yticklabels(\n", + " [\"{:3.2f}\".format(y/100).rstrip('0').rstrip('.') for y in ax.get_yticks()]\n", + " )\n", + " \n", + " return alpha" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Here are some visualizations to make things more intuitive...\n", + "\n", + "Blue dots are normalized outcomes from the actual policy, and the fully-closed and fully-open policies.\n", + "\n", + "The white line connecting these dots estimates the pareto frontier.\n", + "\n", + "The red circle is the best outcome on the pareto frontier for the estimated alpha.\n", + "\n", + "The estimated alpha is the one that places the red circle closest to the actual-policy blue dot.\n", + "\n", + "The heatmap background shows the reward at each coordinate (given $\\hat{\\alpha}_i$)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Single-State plot so things are easier to see.\n", + "alpha = estimate_alpha_and_plot_rew_examples(state_idx=50);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "# Now, for all rest of the States!\n", + "\n", + "_, axes = plt.subplots(10, 5, figsize=(16, 35), sharex=True, sharey=True)\n", + "\n", + "for i, ax in enumerate(axes.flatten()):\n", + " alpha = estimate_alpha_and_plot_rew_examples(state_idx=i, ax=ax)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Perform the actual calibration fit\n", + "\n", + "At the end of this step, we have finalized the fitted_params_dict and are ready to export the final calibration!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The fully-closed and fully-open policies give us coordinates for normalizing the indices\n", + "fitted_params_dict.update(\n", + " {\n", + " \"MAX_MARGINAL_AGENT_ECONOMIC_INDEX\": [index_results['open'][i][1] for i in range(51)],\n", + " \"MAX_MARGINAL_PLANNER_ECONOMIC_INDEX\": index_results['open']['p'][1],\n", + " \"MAX_MARGINAL_AGENT_HEALTH_INDEX\": [index_results['closed'][i][0] for i in range(51)],\n", + " \"MAX_MARGINAL_PLANNER_HEALTH_INDEX\": index_results['closed']['p'][0],\n", + " \"MIN_MARGINAL_AGENT_ECONOMIC_INDEX\": [index_results['closed'][i][1] for i in range(51)],\n", + " \"MIN_MARGINAL_PLANNER_ECONOMIC_INDEX\": index_results['closed']['p'][1],\n", + " \"MIN_MARGINAL_AGENT_HEALTH_INDEX\": [index_results['open'][i][0] for i in range(51)],\n", + " \"MIN_MARGINAL_PLANNER_HEALTH_INDEX\": index_results['open']['p'][0],\n", + " }\n", + ") \n", + "\n", + "# Apply the alpha-estimation procedure to fill in the rest of the fitted params dictionary\n", + "fitted_params_dict.update(\n", + " {\n", + " \"INFERRED_WEIGHTAGE_ON_AGENT_HEALTH_INDEX\": [\n", + " estimate_alpha_and_plot_rew_examples(state_idx=i, do_plot=False) for i in range(51)\n", + " ],\n", + " \"INFERRED_WEIGHTAGE_ON_PLANNER_HEALTH_INDEX\": estimate_alpha_and_plot_rew_examples(\n", + " state_idx='p', do_plot=False\n", + " ),\n", + " }\n", + ")\n", + "\n", + "# We have replaced the placeholders in the original fitted_params_file. All done -- time to re-save!\n", + "with open(os.path.join(data_dir, fitted_params_filename), \"w\") as fp: \n", + " json.dump(fitted_params_dict, fp)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# \n", + "# \n", + "# Inspection\n", + "\n", + "Use this section to inspect the behavior of the calibrated simulator. As a starting point, we provide some code:\n", + "- to re-build an environment instance (now that all the calibration fits have been exported),\n", + "- to run that environment using the real-world policies,\n", + "- and to compare real-world COVID-19 cases/deaths against what the calibrated simulation predicts given the real-world policies." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Run the actual policy in the simulation to see what kind of simulated outcomes we get and how they compare.\n", + "\n", + "# (this will now use the fully-calibrated settings)\n", + "calibrated_env = ai_economist.foundation.make_env_instance(**env_config)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Run sim, setting agent actions based on the real-world policy\n", + "calibrated_env.reset();\n", + "for _ in range(calibrated_env.episode_length):\n", + " t_str = calibrated_env.current_date.strftime(DATE_FORMAT)\n", + " actions = {\n", + " str(idx): policy_df[state][t_str]\n", + " for idx, state in calibrated_env.us_state_idx_to_state_name.items()\n", + " }\n", + " calibrated_env.step(actions);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "# Visualize active cases and deaths, for the real-world and for the simulation w/ real-world policies\n", + "_, (ax0, ax1) = plt.subplots(1, 2, figsize=(16, 5))\n", + "\n", + "\n", + "infected_df = dataframes[\"infected\"]\n", + "deaths_df = dataframes[\"smoothed_deaths\"]\n", + "\n", + "ax0.plot(infected_df[\n", + " fitted_params_dict['settings']['env']['start_date']:fitted_params_dict['settings']['LAST_DATE_IN_VAL_SET']\n", + "].sum(1).values, label='Actual Data');\n", + "ax0.plot(calibrated_env.world.global_state[\"Infected\"][:, :].sum(1), 'r', label='Simulated');\n", + "ax0.set_title('Active COVID-19 Cases');\n", + "ax0.set_xlabel('Days Since Start Date');\n", + "ax0.grid(b=True, axis='y');\n", + "ax0.legend();\n", + "\n", + "ax1.plot(deaths_df[\n", + " fitted_params_dict['settings']['env']['start_date']:fitted_params_dict['settings']['LAST_DATE_IN_VAL_SET']\n", + "].sum(1).values, label='Actual Data');\n", + "ax1.plot(calibrated_env.world.global_state['Deaths'][:, :].sum(1), 'r', label='Simulated');\n", + "ax1.set_title('Cumulative COVID-19 Deaths');\n", + "ax1.set_xlabel('Days Since Start Date');\n", + "ax1.grid(b=True, axis='y');\n", + "ax1.legend();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/ai_economist/datasets/covid19_datasets/gather_real_world_data.ipynb b/ai_economist/datasets/covid19_datasets/gather_real_world_data.ipynb new file mode 100644 index 0000000..d6d1990 --- /dev/null +++ b/ai_economist/datasets/covid19_datasets/gather_real_world_data.ipynb @@ -0,0 +1,846 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) 2021, salesforce.com, inc. \n", + "All rights reserved. \n", + "SPDX-License-Identifier: BSD-3-Clause \n", + "For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# This notebook will be used to gather real-world data and perform data processing in order to use it in the covid-19 simulation.\n", + "\n", + "### All the downloaded data will be formatted into pandas dataframes." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Below is the list of COVID-19 data sources used in this notebook\n", + "\n", + "1. **US state government policies** (Oxford Covid-19 Government Response Tracker (OxCGRT))\n", + "\n", + " https://github.com/OxCGRT/USA-covid-policy\n", + "\n", + "\n", + "2. **US federal government direct payments** (Committee for a Responsible Federal Budget)\n", + "\n", + " https://www.covidmoneytracker.org/\n", + " \n", + " https://docs.google.com/spreadsheets/d/1Nr_J5wLfUT4IzqSXkYbdOXrRgEkBxhX0/edit#gid=682404301\n", + " \n", + "\n", + "3. **US deaths data** (COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University)\n", + "\n", + " https://github.com/CSSEGISandData/COVID-19\n", + "\n", + "\n", + "4. **US vaccinations** (Our World in Data)\n", + " \n", + " https://ourworldindata.org/covid-vaccinations\n", + " \n", + " \n", + "5. **US unemployment** (Bureau of Labor and Statistics)\n", + "\n", + " https://www.bls.gov/lau/" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime, timedelta\n", + "import json\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import os\n", + "import pandas as pd\n", + "import pickle\n", + "import scipy\n", + "from scipy.signal import convolve" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Classes to fetch the real-world data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from ai_economist.datasets.covid19_datasets.us_policies import DatasetCovidPoliciesUS\n", + "from ai_economist.datasets.covid19_datasets.us_deaths import DatasetCovidDeathsUS\n", + "from ai_economist.datasets.covid19_datasets.us_vaccinations import DatasetCovidVaccinationsUS\n", + "from ai_economist.datasets.covid19_datasets.us_unemployment import DatasetCovidUnemploymentUS" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set a base directory where you would like to download real world data. The latest data will be downloaded into a folder within the base directory, named using the current date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "BASE_DATA_DIR_PATH = \"/tmp/covid19_data\" # SPECIFY A BASE DIRECTORY TO STORE ALL THE DOWNLOADED DATA" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "DOWNLOAD_LATEST_DATA = True # Download the latest data or use whatever is saved earlier \n", + "CURRENT_DATE = datetime.now()\n", + "DATE_FORMAT = \"%Y-%m-%d\"\n", + "date_string = CURRENT_DATE.strftime(DATE_FORMAT).replace('/','-')\n", + "data_dir = os.path.join(BASE_DATA_DIR_PATH, date_string)\n", + "\n", + "print(\"All the data will be downloaded to the directory: '{}'.\".format(data_dir))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Set up dictionary to write model constants\n", + "model_constants = {}\n", + "model_constants_filename = \"model_constants.json\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Gather real-world data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1. COVID-19 US State Government Policies\n", + "### Source: Oxford Covid-19 Government Response Tracker (OxCGRT) \n", + "(https://github.com/OxCGRT/USA-covid-policy)\n", + "\n", + "**NOTE:** All data will use the same format as **policy_df** (below) and use the same date index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "covid_policies_us = DatasetCovidPoliciesUS(\n", + " data_dir=data_dir,\n", + " download_latest_data=DOWNLOAD_LATEST_DATA\n", + ")\n", + "\n", + "# Which of the policy indicators to treat as the open/close level\n", + "STRINGENCY_POLICY_KEY = 'StringencyIndex'\n", + "# Number of levels to discretize the stringency policy into. \n", + "# In the context of reinforcement learning, this also determines the action space of the agents.\n", + "NUM_STRINGENCY_LEVELS = 10\n", + "\n", + "policies_us_df = covid_policies_us.process_policy_data(\n", + " stringency_policy_key=STRINGENCY_POLICY_KEY,\n", + " num_stringency_levels=NUM_STRINGENCY_LEVELS\n", + ")\n", + "\n", + "print(\"Policy data are available between {} and {}\".format(policies_us_df[\"Date\"].min(), \n", + " policies_us_df[\"Date\"].max()))\n", + "\n", + "policy_df = policies_us_df.pivot(\n", + " index=\"Date\", columns=\"RegionName\", values=STRINGENCY_POLICY_KEY\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# This is the common date index that all the dataframes will use\n", + "COMMON_DATE_INDEX = policy_df.index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# This is the list of states (in order) all the dataframes will use\n", + "US_STATE_ORDER = policy_df.columns.values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Visualize the stringency level for a specified US state\n", + "state = \"California\"\n", + "policy_df[state].plot(figsize=(15,5), x='Date', title=\"Stringency Level for {}\".format(state), grid=True);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 2. COVID-19 Federal government subsidies (direct payments) to the states\n", + "### Source: Committee For A Responsible Federal Budget\n", + "https://www.covidmoneytracker.org/\n", + "\n", + "### Direct payments provided by the Federal Government so far are recorded in this google spreadsheet\n", + "https://docs.google.com/spreadsheets/d/1Nr_J5wLfUT4IzqSXkYbdOXrRgEkBxhX0/edit#gid=682404301\n", + "### Read as (date: direct payment amount)\n", + "2020-04-15: 274B\n", + "\n", + "2020-12-27: 142B\n", + "\n", + "2021-03-11: 386B" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "subsidy_df = pd.DataFrame(policy_df.index).set_index(\"Date\")\n", + "subsidy_df[\"USA\"] = 0.0\n", + "\n", + "subsidy_df.loc[\"2020-04-15\", \"USA\"] = 274e9\n", + "subsidy_df.loc[\"2020-12-27\", \"USA\"] = 142e9\n", + "subsidy_df.loc[\"2021-03-11\", \"USA\"] = 386e9" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 3. COVID-19 Deaths data\n", + "### Source: COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University \n", + "(https://github.com/CSSEGISandData/COVID-19)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "deaths_us_df = DatasetCovidDeathsUS(\n", + " data_dir=data_dir,\n", + " download_latest_data=DOWNLOAD_LATEST_DATA\n", + ").df\n", + "\n", + "print(\"COVID-19 death data for the US is available between {} and {}\".format(\n", + " deaths_us_df.columns[12], deaths_us_df.columns[-1]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Retain just the states in US_STATE_ORDER\n", + "deaths_us_df = deaths_us_df[deaths_us_df.Province_State.isin(US_STATE_ORDER)]\n", + "\n", + "# We will visualize this later in the notebook" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 4. COVID-19 Vaccination Data\n", + "### Source: Our World in Data\n", + "(https://ourworldindata.org/covid-vaccinations)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vaccinations_us_df = DatasetCovidVaccinationsUS(\n", + " data_dir=data_dir,\n", + " download_latest_data=DOWNLOAD_LATEST_DATA\n", + ").df\n", + "\n", + "vaccination_dates = sorted(vaccinations_us_df.date.unique())\n", + "print(\"Vaccination data is available between {} and {}\".format(min(vaccination_dates), max(vaccination_dates)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vaccinated_df = vaccinations_us_df.pivot(\n", + " index=\"date\", columns=\"location\", values=\"people_fully_vaccinated\"\n", + ")[US_STATE_ORDER]\n", + "\n", + "vaccinated_df.index = pd.to_datetime(vaccinated_df.index)\n", + "vaccinated_df = vaccinated_df.reindex(COMMON_DATE_INDEX).fillna(0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Visualize the vaccinations for a specified US state\n", + "# Warning: the last value may not be updated (may show it to be 0)\n", + "\n", + "state = \"California\"\n", + "vaccinated_df[state].plot(figsize=(15,5), x='Date', title=\"Vaccinations for {}\".format(state), grid=True);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using deaths and vaccinations to compute the susceptible-infected-recovered (SIR) numbers\n", + "\n", + "Our SIR data will only treat **deaths** as ground-truth.\n", + "\n", + "Given death data and some assumed constants about the _death rate_ and _recovery rate_ , we can apply some \"SIR algebra\" (i.e. solve for unknowns using the SIR equations) to _infer_ quantities like total \"recovered\", number of infected people, and ultimately **Beta**, which is the rate of transmission times the number of people an infected person comes into contact with." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# For data representation, we will want to build a dataframe for...\n", + "# ... deaths...\n", + "deaths_df = pd.DataFrame(COMMON_DATE_INDEX, columns=['Date']).set_index('Date')\n", + "smoothed_deaths_df = pd.DataFrame(COMMON_DATE_INDEX, columns=['Date']).set_index('Date')\n", + "# ... (inferred) SIR states...\n", + "susceptible_df = pd.DataFrame(COMMON_DATE_INDEX, columns=['Date']).set_index('Date')\n", + "infected_df = pd.DataFrame(COMMON_DATE_INDEX, columns=['Date']).set_index('Date')\n", + "recovered_df = pd.DataFrame(COMMON_DATE_INDEX, columns=['Date']).set_index('Date')\n", + "# ... and (inferred) Beta.\n", + "beta_df = pd.DataFrame(COMMON_DATE_INDEX, columns=['Date']).set_index('Date')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# STD of the Gaussian smoothing window applied to the death data.\n", + "SIR_SMOOTHING_STD = 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Fill the death dataframe from (smoothed) raw data\n", + "\n", + "def smooth(x, gauss_std=10):\n", + " \"\"\"\n", + " gauss_std: standard deviation of the Gaussian smoothing window applied to the death data.\n", + " \"\"\"\n", + " if gauss_std <= 0:\n", + " return x\n", + " # To invalidate the near-edge results, bookend the input x with nans\n", + " x = np.concatenate([[np.nan], np.array(x), [np.nan]])\n", + " \n", + " kernel = scipy.stats.norm.pdf(\n", + " np.linspace(-3*gauss_std, 3*gauss_std, 1+6*gauss_std),\n", + " scale=gauss_std\n", + " )\n", + " normer = np.ones_like(x)\n", + " smoothed_x = convolve(x, kernel, mode='same') / convolve(normer, kernel, mode='same')\n", + " \n", + " # Remove the indices added by the nan padding\n", + " return smoothed_x[1:-1]\n", + "\n", + "for us_state_name in US_STATE_ORDER:\n", + " state_deaths = deaths_us_df[deaths_us_df['Province_State']==us_state_name]\n", + " cumulative_state_deaths = []\n", + " for d in COMMON_DATE_INDEX:\n", + " date_string = '{d.month}/{d.day}/{y}'.format(d=d, y=d.year % 2000)\n", + " if date_string in state_deaths:\n", + " cumulative_state_deaths.append(\n", + " state_deaths[date_string].sum()\n", + " )\n", + " else:\n", + " cumulative_state_deaths.append(\n", + " np.nan\n", + " )\n", + " \n", + " # Store raw numbers (for direct comparison)\n", + " deaths_df[us_state_name] = cumulative_state_deaths\n", + " \n", + " # Store smoothed numbers (for beta analysis)\n", + " smoothed_cumulative_state_deaths = smooth(cumulative_state_deaths, gauss_std=SIR_SMOOTHING_STD)\n", + " smoothed_deaths_df[us_state_name] = smoothed_cumulative_state_deaths" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "state_deaths = deaths_us_df[deaths_us_df['Province_State']==\"California\"]\n", + "cumulative_state_deaths = []\n", + "for d in COMMON_DATE_INDEX:\n", + " date_string = '{d.month}/{d.day}/{y}'.format(d=d, y=d.year % 2000)\n", + " if date_string in state_deaths:\n", + " cumulative_state_deaths.append(\n", + " state_deaths[date_string].sum()\n", + " )\n", + " else:\n", + " cumulative_state_deaths.append(\n", + " np.nan\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Visualize the deaths for a specified US state\n", + "state = \"California\"\n", + "\n", + "# Some values near the ends may be \"missing\" because of smoothing\n", + "deaths_df[state].plot(figsize=(15,5), x='Date', ylim=[0, 65000]);\n", + "smoothed_deaths_df[state].plot(figsize=(15,5), x='Date', title=\"COVID deaths in {}\".format(state), ylim=[0, 65000], grid=True);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Death rate: fraction of infected persons who die\n", + "SIR_MORTALITY = 0.02\n", + "\n", + "# Recovery rate: the inverse of expected time someone remains infected\n", + "SIR_GAMMA = 1 / 14" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# This is the core \"SIR algebra\" used to infer S, I, R, and Beta at each date.\n", + "\n", + "def infer_sir_and_beta(us_state_name):\n", + " state_population = deaths_us_df[deaths_us_df['Province_State']==us_state_name]['Population'].sum()\n", + " \n", + " # Helpful to do this math in normalized numbers\n", + " dead = np.array(smoothed_deaths_df[us_state_name]) / state_population\n", + " vaccinated = np.array(vaccinated_df[us_state_name]) / state_population\n", + " \n", + " # Dead is the fraction of \"recovered\" that did not survive\n", + " # Also, the vaccinated lot is part of the recovered\n", + " recovered = dead / SIR_MORTALITY + vaccinated\n", + " \n", + " # The daily change in recovered (ignoring the vaccinated) is a fraction of the infected population on the previous day\n", + " infected = np.nan * np.zeros_like(dead)\n", + " infected[:-1] = (recovered[1:] - recovered[:-1] - (vaccinated[1:] - vaccinated[:-1])) / SIR_GAMMA\n", + " \n", + " # S+I+R must always = 1\n", + " susceptible = 1 - infected - recovered\n", + " \n", + " # Here's where things get interesting. The change in infected is due to...\n", + " change_in_i = infected[1:] - infected[:-1]\n", + " # ... infected people that transition to the recovered state (decreases I)...\n", + " expected_change_from_recovery = -infected[:-1] * SIR_GAMMA\n", + " # ... and susceptible people that transition to the infected state (increases I).\n", + " new_infections = change_in_i - expected_change_from_recovery\n", + " \n", + " # With these pieces, we can solve for Beta.\n", + " beta_ = new_infections / (infected[:-1] * susceptible[:-1] + 1e-6)\n", + " beta_ = np.clip(beta_, 0, 1)\n", + " # Apply a threshold in terms of normalized daily deaths (if too low, beta estimates are bad)\n", + " normalized_daily_deaths = dead[1:]-dead[:-1]\n", + " ndd_lookback = np.zeros_like(new_infections)\n", + " lookback_window = 3*SIR_SMOOTHING_STD\n", + " ndd_cutoff = 1e-8\n", + " ndd_lookback[lookback_window:] = normalized_daily_deaths[:-lookback_window]\n", + " beta_[np.logical_not(ndd_lookback > 1e-8)] = np.nan\n", + " \n", + " beta = np.nan * np.zeros_like(dead)\n", + " beta[:-1] = beta_\n", + " \n", + " # Undo normalization\n", + " susceptible *= state_population\n", + " infected *= state_population\n", + " recovered *= state_population\n", + " \n", + " return susceptible, infected, recovered, beta" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Fill the SIR and Beta dataframes with their inferred values\n", + "for st in US_STATE_ORDER:\n", + " susceptible_df[st], infected_df[st], recovered_df[st], beta_df[st] = infer_sir_and_beta(us_state_name=st)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Visualize the SIR and BETA for a specified US state\n", + "# Warning: some values near the ends may be \"missing\" because of smoothing\n", + "\n", + "state = \"California\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "susceptible_df[state].plot(figsize=(15,3), x='Date', title=\"(Inferred) Susceptible Population in {}\".format(state), grid=True);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "infected_df[state].plot(figsize=(15,3), x='Date', title=\"(Inferred) Infected Population in {}\".format(state), grid=True);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "recovered_df[state].plot(figsize=(15,3), x='Date', title=\"(Inferred) Recovered Population in {}\".format(state), grid=True);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "beta_df[state].plot(figsize=(15,3), x='Date', title=\"(Inferred) SIR Beta in {}\".format(state), grid=True);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 5. COVID-19 Unemployment data\n", + "### Source: Bureau of Labor and Statistics\n", + "\n", + "https://www.bls.gov/lau/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "monthly_unemployment_us = DatasetCovidUnemploymentUS(\n", + " data_dir=data_dir,\n", + " download_latest_data=DOWNLOAD_LATEST_DATA).data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sample_monthly_unemployment = monthly_unemployment_us['California']\n", + "unemp_year_keys = sorted(sample_monthly_unemployment.keys())\n", + "unemp_starting_month_key = sorted(sample_monthly_unemployment[unemp_year_keys[0]].keys())[0]\n", + "unemp_ending_month_key = sorted(sample_monthly_unemployment[unemp_year_keys[-1]].keys())[-1]\n", + "unemp_starting_date = datetime.strptime(\n", + " str(unemp_year_keys[0]) + '-' + str(unemp_ending_month_key+1) + '-1', DATE_FORMAT)\n", + "unemp_ending_date = datetime.strptime(\n", + " str(unemp_year_keys[-1]) + '-' + str(unemp_ending_month_key+1) + '-1', DATE_FORMAT) - timedelta(1)\n", + "\n", + "print(\"Unemployment data is available between {} and {}\".format(datetime.strftime(unemp_starting_date, DATE_FORMAT),\n", + " datetime.strftime(unemp_ending_date, DATE_FORMAT)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Convert this to a daily unemployment dataframe\n", + "\n", + "unemployment_df = pd.DataFrame(COMMON_DATE_INDEX, columns=['Date']).set_index('Date')\n", + "\n", + "for us_state_name in monthly_unemployment_us.keys():\n", + " unemployment_df[us_state_name] = [\n", + " monthly_unemployment_us[us_state_name][x.year].get(x.month, np.nan)\n", + " for x in unemployment_df.index\n", + " ]\n", + "unemployment_df = unemployment_df[US_STATE_ORDER]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Visualize the unemployment rate for a specified US state\n", + "# There is likely going to be some unemployment data missing at the tail end, \n", + "# as the unemployment data isn't updated as frequently as the other data.\n", + "\n", + "state = \"California\"\n", + "unemployment_df[state].plot(figsize=(15,5), x='Date', title=\"Unemployment for {} (%)\".format(state), grid=True);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Unemployment rate -> unemployed (the number of unemployed people)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "us_state_to_pop_dict = {}\n", + "for us_state in US_STATE_ORDER:\n", + " us_state_to_pop_dict[us_state] = deaths_us_df[deaths_us_df.Province_State==us_state].Population.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "unemployed_df = unemployment_df.multiply([us_state_to_pop_dict[col]/100.0 for col in unemployment_df.columns])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Saving" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save some of the data processing constants for use within the environment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model_constants_dict = {}\n", + "\n", + "model_constants_dict[\"DATE_FORMAT\"] = DATE_FORMAT\n", + "model_constants_dict[\"STRINGENCY_POLICY_KEY\"] = STRINGENCY_POLICY_KEY\n", + "model_constants_dict[\"NUM_STRINGENCY_LEVELS\"] = int(NUM_STRINGENCY_LEVELS)\n", + "model_constants_dict[\"SIR_SMOOTHING_STD\"] = SIR_SMOOTHING_STD\n", + "model_constants_dict[\"SIR_MORTALITY\"] = SIR_MORTALITY\n", + "model_constants_dict[\"SIR_GAMMA\"] = SIR_GAMMA\n", + "model_constants_dict[\"US_STATE_IDX_TO_STATE_NAME\"] = {\n", + " us_state_idx: us_state for us_state_idx, us_state in enumerate(US_STATE_ORDER)\n", + "}\n", + "model_constants_dict[\"US_STATE_POPULATION\"] = [int(us_state_to_pop_dict[us_state]) for us_state in US_STATE_ORDER]\n", + "model_constants_dict[\"US_POPULATION\"] = int(sum([us_state_to_pop_dict[us_state] for us_state in US_STATE_ORDER]))\n", + "\n", + "# 2019: https://data.worldbank.org/indicator/NY.GDP.PCAP.CD?locations=US&view=chart\n", + "model_constants_dict[\"GDP_PER_CAPITA\"] = 65300 # TODO: Load this in from model_constants.json.\n", + "\n", + "model_constants_filename = \"model_constants.json\"\n", + "with open(os.path.join(data_dir, model_constants_filename), \"w\") as fp: \n", + " json.dump(model_constants_dict, fp)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save all the processed dataframes in order to use for model fitting notebook (fit_model_parameters.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataframes = {\n", + " \"policy\": policy_df,\n", + " \"subsidy\": subsidy_df,\n", + " \"deaths\": deaths_df,\n", + " \"vaccinated\": vaccinated_df,\n", + " \"smoothed_deaths\": smoothed_deaths_df,\n", + " \"susceptible\": susceptible_df,\n", + " \"infected\": infected_df,\n", + " \"recovered\": recovered_df,\n", + " \"beta\": beta_df,\n", + " \"unemployment\": unemployment_df,\n", + " \"unemployed\": unemployed_df,\n", + "}\n", + "\n", + "for k, df in dataframes.items():\n", + " if k == \"subsidy\": # This is at the USA level, not at the US states level\n", + " continue\n", + " assert (df.columns.to_list() == US_STATE_ORDER).all()\n", + "\n", + "with open(os.path.join(data_dir, 'dataframes.pkl'), 'wb') as F:\n", + " pickle.dump(dataframes, F)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Also save all the data as numpy arrays for use within the covid19 simulation environment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "real_world_data = {}\n", + "for key in dataframes:\n", + " real_world_data[key] = dataframes[key].values\n", + " \n", + "# Save the real-world data as a .npz for use within the environment\n", + "np.savez(os.path.join(data_dir, \"real_world_data.npz\"), **real_world_data) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Finally, in order to use this gathered real-world data when you run the covid19 simulation, you will need to also" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Run the \"fit_model_parameters.ipynb\" notebook with the base data directory specified below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"BASE_DATA_DIR_PATH = '{}'\".format(BASE_DATA_DIR_PATH))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Set \"path_to_data_and_fitted_params\" in the env config also to the data directory below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"path_to_data_and_fitted_params = '{}'\".format(data_dir))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/ai_economist/datasets/covid19_datasets/us_deaths.py b/ai_economist/datasets/covid19_datasets/us_deaths.py new file mode 100644 index 0000000..7cb0a00 --- /dev/null +++ b/ai_economist/datasets/covid19_datasets/us_deaths.py @@ -0,0 +1,54 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import os +from io import BytesIO + +import pandas as pd +import requests + + +class DatasetCovidDeathsUS: + """ + Class to load COVID-19 deaths data for the US. + Source: https://github.com/CSSEGISandData/COVID-19 + Note: in this dataset, reporting deaths only started on the 22nd of January 2020, + + Attributes: + df: Timeseries dataframe of confirmed COVID deaths for all the US states + """ + + def __init__(self, data_dir="", download_latest_data=True): + if not os.path.exists(data_dir): + print( + "Creating a dynamic data directory to store " + "COVID-19 deaths data: {}".format(data_dir) + ) + os.makedirs(data_dir) + + filename = "daily_us_deaths.csv" + if download_latest_data or filename not in os.listdir(data_dir): + print( + "Fetching latest U.S. COVID-19 deaths data from John Hopkins, " + "and saving it in {}".format(data_dir) + ) + + req = requests.get( + "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/" + "csse_covid_19_data/csse_covid_19_time_series/" + "time_series_covid19_deaths_US.csv" + ) + self.df = pd.read_csv(BytesIO(req.content)) + self.df.to_csv( + os.path.join(data_dir, filename) + ) # Note: performs an overwrite + else: + print( + "Not fetching the latest U.S. COVID-19 deaths data from John Hopkins." + " Using whatever was saved earlier in {}!!".format(data_dir) + ) + assert filename in os.listdir(data_dir) + self.df = pd.read_csv(os.path.join(data_dir, filename), low_memory=False) diff --git a/ai_economist/datasets/covid19_datasets/us_policies.py b/ai_economist/datasets/covid19_datasets/us_policies.py new file mode 100644 index 0000000..486579c --- /dev/null +++ b/ai_economist/datasets/covid19_datasets/us_policies.py @@ -0,0 +1,122 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + + +import os +from datetime import datetime +from io import BytesIO + +import numpy as np +import pandas as pd +import requests + + +class DatasetCovidPoliciesUS: + """ + Class to load COVID-19 government policies for the US states. + Source: https://github.com/OxCGRT/USA-covid-policy + + Other references: + - Codebook: https://github.com/OxCGRT/covid-policy-tracker/blob/master/ + documentation/codebook.md + - Index computation methodology: https://github.com/OxCGRT/covid-policy-tracker/ + blob/master/documentation/index_methodology.md + + Attributes: + df: Timeseries dataframe of state-wide policies + """ + + def __init__(self, data_dir="", download_latest_data=True): + if not os.path.exists(data_dir): + print( + "Creating a dynamic data directory to store COVID-19 " + "policy tracking data: {}".format(data_dir) + ) + os.makedirs(data_dir) + + filename = "daily_us_policies.csv" + if download_latest_data or filename not in os.listdir(data_dir): + print( + "Fetching latest U.S. COVID-19 policies data from OxCGRT, " + "and saving it in {}".format(data_dir) + ) + req = requests.get( + "https://raw.githubusercontent.com/OxCGRT/USA-covid-policy/master/" + "data/OxCGRT_US_latest.csv" + ) + self.df = pd.read_csv(BytesIO(req.content), low_memory=False) + self.df["Date"] = self.df["Date"].apply( + lambda x: datetime.strptime(str(x), "%Y%m%d") + ) + + # Fetch only the state-wide policies + self.df = self.df.loc[self.df["Jurisdiction"] != "NAT_GOV"] + + self.df.to_csv( + os.path.join(data_dir, filename) + ) # Note: performs an overwrite + else: + print( + "Not fetching the latest U.S. COVID-19 policies data from OxCGRT. " + "Using whatever was saved earlier in {}!!".format(data_dir) + ) + assert filename in os.listdir(data_dir) + self.df = pd.read_csv(os.path.join(data_dir, filename), low_memory=False) + + def process_policy_data( + self, stringency_policy_key="StringencyIndex", num_stringency_levels=10 + ): + """ + Gather the relevant policy indicator frm the dataframe, + fill in the null values (if any), + and discretize/quantize the policy into num_stringency_levels. + Note: Possible values for stringency_policy_key are + ["StringencyIndex", "Government response index", + "Containment and health index", "Economic Support index".] + Reference: https://github.com/OxCGRT/covid-policy-tracker/blob/master/ + documentation/index_methodology.md + """ + + def discretize(policies, num_indicator_levels=10): + """ + Discretize the policies (a Pandas series) into num_indicator_levels + """ + # Indices are normalized to be in [0, 100] + bins = np.linspace(0, 100, num_indicator_levels) + # Find left and right values of bin and find the nearer edge + bin_index = np.digitize(policies, bins, right=True) + bin_left_edges = bins[bin_index - 1] + bin_right_edges = bins[bin_index] + discretized_policies = bin_index + np.argmin( + np.stack( + ( + np.abs(policies.values - bin_left_edges), + np.abs(policies.values - bin_right_edges), + ) + ), + axis=0, + ) + return discretized_policies + + # Gather just the relevant columns + policy_df = self.df[["RegionName", "Date", stringency_policy_key]].copy() + + # Fill in null values via a "forward fill" + policy_df[stringency_policy_key].fillna(method="ffill", inplace=True) + + # Discretize the stringency indices + discretized_stringency_policies = discretize( + policy_df[stringency_policy_key], num_indicator_levels=num_stringency_levels + ) + policy_df.loc[:, stringency_policy_key] = discretized_stringency_policies + + # Replace Washington DC by District of Columbia to keep consistent + # (with the other data sources) + policy_df = policy_df.replace("Washington DC", "District of Columbia") + + policy_df = policy_df.sort_values(by=["RegionName", "Date"]) + + return policy_df diff --git a/ai_economist/datasets/covid19_datasets/us_unemployment.py b/ai_economist/datasets/covid19_datasets/us_unemployment.py new file mode 100644 index 0000000..6552f61 --- /dev/null +++ b/ai_economist/datasets/covid19_datasets/us_unemployment.py @@ -0,0 +1,128 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import bz2 +import os +import pickle +import queue +import threading +import urllib.request as urllib2 + +import pandas as pd +from bs4 import BeautifulSoup + + +class DatasetCovidUnemploymentUS: + """ + Class to load COVID-19 unemployment data for the US states. + Source: https://www.bls.gov/lau/ + """ + + def __init__(self, data_dir="", download_latest_data=True): + if not os.path.exists(data_dir): + print( + "Creating a dynamic data directory to store COVID-19 " + "unemployment data: {}".format(data_dir) + ) + os.makedirs(data_dir) + + filename = "monthly_us_unemployment.bz2" + if download_latest_data or filename not in os.listdir(data_dir): + # Construct the U.S. state to FIPS code mapping + state_fips_df = pd.read_excel( + "https://www2.census.gov/programs-surveys/popest/geographies/2017/" + "state-geocodes-v2017.xlsx", + header=5, + ) + # remove all statistical areas and cities + state_fips_df = state_fips_df.loc[state_fips_df["State (FIPS)"] != 0] + self.us_state_to_fips_dict = pd.Series( + state_fips_df["State (FIPS)"].values, index=state_fips_df.Name + ).to_dict() + + print( + "Fetching the U.S. unemployment data from " + "Bureau of Labor and Statistics, and saving it in {}".format(data_dir) + ) + self.data = self.scrape_bls_data() + fp = bz2.BZ2File(os.path.join(data_dir, filename), "wb") + pickle.dump(self.data, fp) + fp.close() + + else: + print( + "Not fetching the U.S. unemployment data from Bureau of Labor and" + " Statistics. Using whatever was saved earlier in {}!!".format(data_dir) + ) + assert filename in os.listdir(data_dir) + with bz2.BZ2File(os.path.join(data_dir, filename), "rb") as fp: + self.data = pickle.load(fp) + fp.close() + + # Scrape monthly unemployment from the Bureau of Labor Statistics website + def get_monthly_bls_unemployment_rates(self, state_fips): + with urllib2.urlopen( + "https://data.bls.gov/timeseries/LASST{:02d}0000000000003".format( + state_fips + ) + ) as response: + html_doc = response.read() + + soup = BeautifulSoup(html_doc, "html.parser") + table = soup.find_all("table")[1] + table_rows = table.find_all("tr") + + unemployment_dict = {} + + mth2idx = { + "Jan": 1, + "Feb": 2, + "Mar": 3, + "Apr": 4, + "May": 5, + "Jun": 6, + "Jul": 7, + "Aug": 8, + "Sep": 9, + "Oct": 10, + "Nov": 11, + "Dec": 12, + } + + for tr in table_rows[1:-1]: + td = tr.find_all("td")[-1] + unemp = float("".join([c for c in td.text if c.isdigit() or c == "."])) + th = tr.find_all("th") + year = int(th[0].text) + month = mth2idx[th[1].text] + if year not in unemployment_dict: + unemployment_dict[year] = {} + unemployment_dict[year][month] = unemp + + return unemployment_dict + + def scrape_bls_data(self): + def do_scrape(us_state, fips, queue_obj): + out = self.get_monthly_bls_unemployment_rates(fips) + queue_obj.put([us_state, out]) + + print("Getting BLS Data. This might take a minute...") + result = queue.Queue() + threads = [ + threading.Thread(target=do_scrape, args=(us_state, fips, result)) + for us_state, fips in self.us_state_to_fips_dict.items() + ] + for t in threads: + t.start() + for t in threads: + t.join() + + monthly_unemployment = {} + while not result.empty(): + us_state, data = result.get() + monthly_unemployment[us_state] = data + + return monthly_unemployment diff --git a/ai_economist/datasets/covid19_datasets/us_vaccinations.py b/ai_economist/datasets/covid19_datasets/us_vaccinations.py new file mode 100644 index 0000000..9e21be2 --- /dev/null +++ b/ai_economist/datasets/covid19_datasets/us_vaccinations.py @@ -0,0 +1,61 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import os +from io import BytesIO + +import pandas as pd +import requests + + +class DatasetCovidVaccinationsUS: + """ + Class to load COVID-19 vaccination data for the US. + Source: https://ourworldindata.org/covid-vaccinations + + Attributes: + df: Timeseries dataframe of COVID vaccinations for all the US states + """ + + def __init__(self, data_dir="", download_latest_data=True): + if not os.path.exists(data_dir): + print( + "Creating a dynamic data directory to store COVID-19 " + "vaccination data: {}".format(data_dir) + ) + os.makedirs(data_dir) + + filename = "daily_us_vaccinations.csv" + if download_latest_data or filename not in os.listdir(data_dir): + print( + "Fetching latest U.S. COVID-19 vaccination data from " + "Our World in Data, and saving it in {}".format(data_dir) + ) + + req = requests.get( + "https://raw.githubusercontent.com/owid/covid-19-data/master/" + "public/data/vaccinations/us_state_vaccinations.csv" + ) + self.df = pd.read_csv(BytesIO(req.content)) + + # Rename New York State to New York for consistency with other datasets + self.df = self.df.replace("New York State", "New York") + + # Interpolate missing values + self.df = self.df.interpolate(method="linear") + + self.df.to_csv( + os.path.join(data_dir, filename) + ) # Note: performs an overwrite + else: + print( + "Not fetching the latest U.S. COVID-19 deaths data from " + "Our World in Data. Using whatever was saved earlier in {}!!".format( + data_dir + ) + ) + assert filename in os.listdir(data_dir) + self.df = pd.read_csv(os.path.join(data_dir, filename), low_memory=False) diff --git a/ai_economist/foundation/__init__.py b/ai_economist/foundation/__init__.py new file mode 100644 index 0000000..d92cf27 --- /dev/null +++ b/ai_economist/foundation/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +from ai_economist.foundation import utils +from ai_economist.foundation.agents import agent_registry as agents +from ai_economist.foundation.components import component_registry as components +from ai_economist.foundation.entities import endogenous_registry as endogenous +from ai_economist.foundation.entities import landmark_registry as landmarks +from ai_economist.foundation.entities import resource_registry as resources +from ai_economist.foundation.scenarios import scenario_registry as scenarios + + +def make_env_instance(scenario_name, **kwargs): + scenario_class = scenarios.get(scenario_name) + return scenario_class(**kwargs) diff --git a/ai_economist/foundation/agents/__init__.py b/ai_economist/foundation/agents/__init__.py new file mode 100644 index 0000000..67df723 --- /dev/null +++ b/ai_economist/foundation/agents/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +from ai_economist.foundation.base.base_agent import agent_registry + +from . import mobiles, planners + +# Import files that add Agent class(es) to agent_registry +# ------------------------------------------------------- diff --git a/ai_economist/foundation/agents/mobiles.py b/ai_economist/foundation/agents/mobiles.py new file mode 100644 index 0000000..b9f0b8c --- /dev/null +++ b/ai_economist/foundation/agents/mobiles.py @@ -0,0 +1,18 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +from ai_economist.foundation.base.base_agent import BaseAgent, agent_registry + + +@agent_registry.add +class BasicMobileAgent(BaseAgent): + """ + A basic mobile agent represents an individual actor in the economic simulation. + + "Mobile" refers to agents of this type being able to move around in the 2D world. + """ + + name = "BasicMobileAgent" diff --git a/ai_economist/foundation/agents/planners.py b/ai_economist/foundation/agents/planners.py new file mode 100644 index 0000000..7f555fb --- /dev/null +++ b/ai_economist/foundation/agents/planners.py @@ -0,0 +1,40 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +from ai_economist.foundation.base.base_agent import BaseAgent, agent_registry + + +@agent_registry.add +class BasicPlanner(BaseAgent): + """ + A basic planner agent represents a social planner that sets macroeconomic policy. + + Unlike the "mobile" agent, the planner does not represent an embodied agent in + the world environment. BasicPlanner modifies the BaseAgent class to remove + location as part of the agent state. + + Also unlike the "mobile" agent, the planner agent is expected to be unique -- + that is, there should only be 1 planner. For this reason, BasicPlanner ignores + the idx argument during construction and always sets its agent index as "p". + """ + + name = "BasicPlanner" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + del self.state["loc"] + + # Overwrite any specified index so that this one is always indexed as 'p' + # (make a separate class of planner if you want there to be multiple planners + # in a game) + self._idx = "p" + + @property + def loc(self): + """ + Planner agents do not occupy any location. + """ + raise AttributeError("BasicPlanner agents do not occupy a location.") diff --git a/ai_economist/foundation/base/__init__.py b/ai_economist/foundation/base/__init__.py new file mode 100644 index 0000000..fc6cee4 --- /dev/null +++ b/ai_economist/foundation/base/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause diff --git a/ai_economist/foundation/base/base_agent.py b/ai_economist/foundation/base/base_agent.py new file mode 100644 index 0000000..b5a627c --- /dev/null +++ b/ai_economist/foundation/base/base_agent.py @@ -0,0 +1,490 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import random + +import numpy as np + +from ai_economist.foundation.base.registrar import Registry + + +class BaseAgent: + """Base class for Agent classes. + + Instances of Agent classes are created for each agent in the environment. Agent + instances are stateful, capturing location, inventory, endogenous variables, + and any additional state fields created by environment components during + construction (see BaseComponent.get_additional_state_fields in base_component.py). + + They also provide a simple API for getting/setting actions for each of their + registered action subspaces (which depend on the components used to build + the environment). + + Args: + idx (int or str): Index that uniquely identifies the agent object amongst the + other agent objects registered in its environment. + multi_action_mode (bool): Whether to allow the agent to take one action for + each of its registered action subspaces each timestep (if True), + or to limit the agent to take only one action each timestep (if False). + """ + + name = "" + + def __init__(self, idx=None, multi_action_mode=None): + assert self.name + + if idx is None: + idx = 0 + + if multi_action_mode is None: + multi_action_mode = False + + if isinstance(idx, str): + self._idx = idx + else: + self._idx = int(idx) + + self.multi_action_mode = bool(multi_action_mode) + self.single_action_map = ( + {} + ) # Used to convert single-action-mode actions to the general format + + self.action = dict() + self.action_dim = dict() + self._action_names = [] + self._multi_action_dict = {} + self._unique_actions = 0 + self._total_actions = 0 + + self.state = dict(loc=[0, 0], inventory={}, escrow={}, endogenous={}) + + self._registered_inventory = False + self._registered_endogenous = False + self._registered_components = False + self._noop_action_dict = dict() + + # Special flag to allow logic for multi-action-mode agents + # that are not given any actions. + self._passive_multi_action_agent = False + + # If this gets set to true, we can make masks faster + self._one_component_single_action = False + self._premask = None + + @property + def idx(self): + """Index used to identify this agent. Must be unique within the environment.""" + return self._idx + + def register_inventory(self, resources): + """Used during environment construction to populate inventory/escrow fields.""" + assert not self._registered_inventory + for entity_name in resources: + self.inventory[entity_name] = 0 + self.escrow[entity_name] = 0 + self._registered_inventory = True + + def register_endogenous(self, endogenous): + """Used during environment construction to populate endogenous state fields.""" + assert not self._registered_endogenous + for entity_name in endogenous: + self.endogenous[entity_name] = 0 + self._registered_endogenous = True + + def _incorporate_component(self, action_name, n): + extra_n = ( + 1 if self.multi_action_mode else 0 + ) # Each sub-action has a NO-OP in multi action mode) + self.action[action_name] = 0 + self.action_dim[action_name] = n + extra_n + self._action_names.append(action_name) + self._multi_action_dict[action_name] = False + self._unique_actions += 1 + if self.multi_action_mode: + self._total_actions += n + extra_n + else: + for action_n in range(1, n + 1): + self._total_actions += 1 + self.single_action_map[int(self._total_actions)] = [ + action_name, + action_n, + ] + + def register_components(self, components): + """Used during environment construction to set up state/action spaces.""" + assert not self._registered_components + for component in components: + n = component.get_n_actions(self.name) + if n is None: + continue + + # Most components will have a single action-per-agent, so n is an int + if isinstance(n, int): + if n == 0: + continue + self._incorporate_component(component.name, n) + + # They can also internally handle multiple actions-per-agent, + # so n is an tuple or list + elif isinstance(n, (tuple, list)): + for action_sub_name, n_ in n: + if n_ == 0: + continue + if "." in action_sub_name: + raise NameError( + "Sub-action {} of component {} " + "is illegally named.".format( + action_sub_name, component.name + ) + ) + self._incorporate_component( + "{}.{}".format(component.name, action_sub_name), n_ + ) + + # If that's not what we got something is funky. + else: + raise TypeError( + "Received unexpected type ({}) from {}.get_n_actions('{}')".format( + type(n), component.name, self.name + ) + ) + + for k, v in component.get_additional_state_fields(self.name).items(): + self.state[k] = v + + # Currently no actions are available to this agent. Give it a placeholder. + if len(self.action) == 0 and self.multi_action_mode: + self._incorporate_component("PassiveAgentPlaceholder", 0) + self._passive_multi_action_agent = True + + elif len(self.action) == 1 and not self.multi_action_mode: + self._one_component_single_action = True + self._premask = np.ones(1 + self._total_actions, dtype=np.float32) + + self._registered_components = True + + self._noop_action_dict = {k: v * 0 for k, v in self.action.items()} + + verbose = False + if verbose: + print(self.name, self.idx, "constructed action map:") + for k, v in self.single_action_map.items(): + print("single action map:", k, v) + for k, v in self.action.items(): + print("action:", k, v) + for k, v in self.action_dim.items(): + print("action_dim:", k, v) + + @property + def action_spaces(self): + """ + if self.multi_action_mode == True: + Returns an integer array with length equal to the number of action + subspaces that the agent registered. The i'th element of the array + indicates the number of actions associated with the i'th action subspace. + In multi_action_mode, each subspace includes a NO-OP. + Note: self._action_names describes which action subspace each element of + the array refers to. + + Example: + >> self.multi_action_mode + True + >> self.action_spaces + [2, 5] + >> self._action_names + ["Build", "Gather"] + # [1 Build action + Build NO-OP, 4 Gather actions + Gather NO-OP] + + if self.multi_action_mode == False: + Returns a single integer equal to the total number of actions that the + agent can take. + + Example: + >> self.multi_action_mode + False + >> self.action_spaces + 6 + >> self._action_names + ["Build", "Gather"] + # 1 NO-OP + 1 Build action + 4 Gather actions. + """ + if self.multi_action_mode: + action_dims = [] + for m in self._action_names: + action_dims.append(np.array(self.action_dim[m]).reshape(-1)) + return np.concatenate(action_dims).astype(np.int32) + n_actions = 1 # (NO-OP) + for m in self._action_names: + n_actions += self.action_dim[m] + return n_actions + + @property + def loc(self): + """2D list of [row, col] representing agent's location in the environment.""" + return self.state["loc"] + + @property + def endogenous(self): + """Dictionary representing endogenous quantities (i.e. "Labor"). + + Example: + >> self.endogenous + {"Labor": 30.25} + """ + return self.state["endogenous"] + + @property + def inventory(self): + """Dictionary representing quantities of resources in agent's inventory. + + Example: + >> self.inventory + {"Wood": 3, "Stone": 20, "Coin": 1002.83} + """ + return self.state["inventory"] + + @property + def escrow(self): + """Dictionary representing quantities of resources in agent's escrow. + + https://en.wikipedia.org/wiki/Escrow + Escrow is used to manage any portion of the agent's inventory that is + reserved for a particular purpose. Typically, something enters escrow as part + of a contractual arrangement to disburse that something when another + condition is met. An example is found in the ContinuousDoubleAuction + Component class (see ../components/continuous_double_auction.py). When an + agent creates an order to sell a unit of Wood, for example, the component + moves one unit of Wood from the agent's inventory to its escrow. If another + agent buys the Wood, it is moved from escrow to the other agent's inventory. By + placing the Wood in escrow, it prevents the first agent from using it for + something else (i.e. building a house). + + Notes: + The inventory and escrow share the same keys. An agent's endowment refers + to the total quantity it has in its inventory and escrow. + + Escrow is provided to simplify inventory management but its intended + semantics are not enforced directly. It is up to Component classes to + enforce these semantics. + + Example: + >> self.inventory + {"Wood": 0, "Stone": 1, "Coin": 3} + """ + return self.state["escrow"] + + def inventory_to_escrow(self, resource, amount): + """Move some amount of a resource from agent inventory to agent escrow. + + Amount transferred is capped to the amount of resource in agent inventory. + + Args: + resource (str): The name of the resource to move (i.e. "Wood", "Coin"). + amount (float): The amount to be moved from inventory to escrow. Must be + positive. + + Returns: + Amount of resource actually transferred. Will be less than amount argument + if amount argument exceeded the amount of resource in the inventory. + Calculated as: + transferred = np.minimum(self.state["inventory"][resource], amount) + """ + assert amount >= 0 + transferred = float(np.minimum(self.state["inventory"][resource], amount)) + self.state["inventory"][resource] -= transferred + self.state["escrow"][resource] += transferred + return float(transferred) + + def escrow_to_inventory(self, resource, amount): + """Move some amount of a resource from agent escrow to agent inventory. + + Amount transferred is capped to the amount of resource in agent escrow. + + Args: + resource (str): The name of the resource to move (i.e. "Wood", "Coin"). + amount (float): The amount to be moved from escrow to inventory. Must be + positive. + + Returns: + Amount of resource actually transferred. Will be less than amount argument + if amount argument exceeded the amount of resource in escrow. + Calculated as: + transferred = np.minimum(self.state["escrow"][resource], amount) + """ + assert amount >= 0 + transferred = float(np.minimum(self.state["escrow"][resource], amount)) + self.state["escrow"][resource] -= transferred + self.state["inventory"][resource] += transferred + return float(transferred) + + def total_endowment(self, resource): + """Get the combined inventory+escrow endowment of resource. + + Args: + resource (str): Name of the resource + + Returns: + The amount of resource in the agents inventory and escrow. + + """ + return self.inventory[resource] + self.escrow[resource] + + def reset_actions(self, component=None): + """Reset all actions to the NO-OP action (the 0'th action index). + + If component is specified, only reset action(s) for that component. + """ + if not component: + self.action.update(self._noop_action_dict) + else: + for k, v in self.action.items(): + if "." in component: + if k.lower() == component.lower(): + self.action[k] = v * 0 + else: + base_component = k.split(".")[0] + if base_component.lower() == component.lower(): + self.action[k] = v * 0 + + def has_component(self, component_name): + """Returns True if the agent has component_name as a registered subaction.""" + return bool(component_name in self.action) + + def get_random_action(self): + """ + Select a component at random and randomly choose one of its actions (other + than NO-OP). + """ + random_component = random.choice(self._action_names) + component_action = random.choice( + list(range(1, self.action_dim[random_component])) + ) + return {random_component: component_action} + + def get_component_action(self, component_name, sub_action_name=None): + """ + Return the action(s) taken for component_name component, or None if the + agent does not use that component. + """ + if sub_action_name is not None: + return self.action.get(component_name + "." + sub_action_name, None) + matching_names = [ + m for m in self._action_names if m.split(".")[0] == component_name + ] + if len(matching_names) == 0: + return None + if len(matching_names) == 1: + return self.action.get(matching_names[0], None) + return [self.action.get(m, None) for m in matching_names] + + def set_component_action(self, component_name, action): + """Set the action(s) taken for component_name component.""" + if component_name not in self.action: + raise KeyError( + "Agent {} of type {} does not have {} registered as a subaction".format( + self.idx, self.name, component_name + ) + ) + if self._multi_action_dict[component_name]: + self.action[component_name] = np.array(action, dtype=np.int32) + else: + self.action[component_name] = int(action) + + def populate_random_actions(self): + """Fill the action buffer with random actions. This is for testing.""" + for component, d in self.action_dim.items(): + if isinstance(d, int): + self.set_component_action(component, np.random.randint(0, d)) + else: + d_array = np.array(d) + self.set_component_action( + component, np.floor(np.random.rand(*d_array.shape) * d_array) + ) + + def parse_actions(self, actions): + """Parse the actions array to fill each component's action buffers.""" + if self.multi_action_mode: + assert len(actions) == self._unique_actions + if len(actions) == 1: + self.set_component_action(self._action_names[0], actions[0]) + else: + for action_name, action in zip(self._action_names, actions): + self.set_component_action(action_name, int(action)) + + # Single action mode + else: + # Action was supplied as an index of a specific subaction. + # No need to do any lookup. + if isinstance(actions, dict): + if len(actions) == 0: + return + assert len(actions) == 1 + action_name = list(actions.keys())[0] + action = list(actions.values())[0] + if action == 0: + return + self.set_component_action(action_name, action) + + # Action was supplied as an index into the full set of combined actions + else: + action = int(actions) + # Universal NO-OP + if action == 0: + return + action_name, action = self.single_action_map.get(action) + self.set_component_action(action_name, action) + + def flatten_masks(self, mask_dict): + """Convert a dictionary of component action masks into a single mask vector.""" + if self._one_component_single_action: + self._premask[1:] = mask_dict[self._action_names[0]] + return self._premask + + no_op_mask = [1] + + if self._passive_multi_action_agent: + return np.array(no_op_mask).astype(np.float32) + + list_of_masks = [] + if not self.multi_action_mode: + list_of_masks.append(no_op_mask) + for m in self._action_names: + if m not in mask_dict: + raise KeyError("No mask provided for {} (agent {})".format(m, self.idx)) + if self.multi_action_mode: + list_of_masks.append(no_op_mask) + list_of_masks.append(mask_dict[m]) + return np.concatenate(list_of_masks).astype(np.float32) + + +agent_registry = Registry(BaseAgent) +"""The registry for Agent classes. + +This creates a registry object for Agent classes. This registry requires that all +added classes are subclasses of BaseAgent. To make an Agent class available through +the registry, decorate the class definition with @agent_registry.add. + +Example: + from ai_economist.foundation.base.base_agent import BaseAgent, agent_registry + + @agent_registry.add + class ExampleAgent(BaseAgent): + name = "Example" + pass + + assert agent_registry.has("Example") + + AgentClass = agent_registry.get("Example") + agent = AgentClass(...) + assert isinstance(agent, ExampleAgent) + +Notes: + The foundation package exposes the agent registry as: foundation.agents + + An Agent class that is defined and registered following the above example will + only be visible in foundation.agents if defined/registered in a file that is + imported in ../agents/__init__.py. +""" diff --git a/ai_economist/foundation/base/base_component.py b/ai_economist/foundation/base/base_component.py new file mode 100644 index 0000000..aea66b8 --- /dev/null +++ b/ai_economist/foundation/base/base_component.py @@ -0,0 +1,406 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +from abc import ABC, abstractmethod + +import numpy as np + +from ai_economist.foundation.agents import agent_registry +from ai_economist.foundation.base.registrar import Registry +from ai_economist.foundation.base.world import World + + +class BaseComponent(ABC): + """ + Base Component class. Should be used as the parent class for Component classes. + Component instances are used to add some particular dynamics to an environment. + They also add action spaces through which agents can interact with the + environment via the component instance. + + Environments expand the agents' state/action spaces by querying: + get_n_actions + get_additional_state_fields + + Environments expand their dynamics by querying: + component_step + generate_observations + generate_masks + + Environments expand logging behavior by querying: + get_metrics + get_dense_log + + Because they are built as Python objects, component instances can also be + stateful. Stateful attributes are reset via calls to: + additional_reset_steps + + The semantics of each method, and how they can be used to construct an instance + of the Component class, are detailed below. + + Refer to ../components/move.py for an example of a Component class that enables + mobile agents to move and collect resources in the environment world. + """ + + # The name associated with this Component class (must be unique). + # Note: This is what will identify the Component class in the component registry. + name = "" + + # An optional shorthand description of the what the component implements (i.e. + # "Trading", "Building", etc.). See BaseEnvironment.get_component and + # BaseEnvironment._finalize_logs to see where this may add convenience. + # Does not need to be unique. + component_type = None + + # The (sub)classes of agents that this component applies to + agent_subclasses = None # Replace with list or tuple (can be empty) + + # The (non-agent) game entities that are expected to be in play + required_entities = None # Replace with list or tuple (can be empty) + + def __init__(self, world, episode_length, inventory_scale=1): + assert self.name + + assert isinstance(self.agent_subclasses, (tuple, list)) + assert len(self.agent_subclasses) > 0 + if len(self.agent_subclasses) > 1: + for i in range(len(self.agent_subclasses)): + for j in range(len(self.agent_subclasses)): + if i == j: + continue + a_i = agent_registry.get(self.agent_subclasses[i]) + a_j = agent_registry.get(self.agent_subclasses[j]) + assert not issubclass(a_i, a_j) + + assert isinstance(self.required_entities, (tuple, list)) + + self.check_world(world) + self._world = world + + assert isinstance(episode_length, int) and episode_length > 0 + self._episode_length = episode_length + + self.n_agents = world.n_agents + self.resources = world.resources + self.landmarks = world.landmarks + + self.timescale = 1 + assert self.timescale >= 1 + + self._inventory_scale = float(inventory_scale) + + @property + def world(self): + """The world object of the environment this component instance is part of. + + The world object exposes the spatial/agent states through: + world.maps # Reference to maps object representing spatial state + world.agents # List of self.n_agents mobile agent objects + world.planner # Reference to planner agent object + + See world.py and base_agent.py for additional API details. + """ + return self._world + + @property + def episode_length(self): + """Episode length of the environment this component instance is a part of.""" + return int(self._episode_length) + + @property + def inv_scale(self): + """ + Value by which to scale quantities when generating observations. + + Note: This property is set by the environment during construction and + allows each component instance within the environment to refer to the same + scaling value. How the value is actually used depends on the implementation + of get_observations(). + """ + return self._inventory_scale + + @property + def shorthand(self): + """The shorthand name, or name if no component_type is defined.""" + return self.name if self.component_type is None else self.component_type + + @staticmethod + def check_world(world): + """Validate the world object.""" + assert isinstance(world, World) + + def reset(self): + """Reset any portion of the state managed by this component.""" + world = self.world + all_agents = world.agents + [world.planner] + for agent in all_agents: + agent.state.update(self.get_additional_state_fields(agent.name)) + + # This method allows components to define additional reset steps + self.additional_reset_steps() + + def obs(self): + """ + Observation produced by this component, given current world/agents/component + state. + """ + # This is mostly just to ensure formatting. + obs = self.generate_observations() + assert isinstance(obs, dict) + obs = {str(k): v for k, v in obs.items()} + return obs + + # Required methods for implementing components + # -------------------------------------------- + + @abstractmethod + def get_n_actions(self, agent_cls_name): + """ + Return the number of actions (not including NO-OPs) for agents of type + agent_cls_name. + + Args: + agent_cls_name (str): name of the Agent class for which number of actions + is being queried. For example, "BasicMobileAgent". + + Returns: + action_space (None, int, or list): If the component does not add any + actions for agents of type agent_cls_name, return None. If it adds a + single action space, return an integer specifying the number of + actions in the action space. If it adds multiple action spaces, + return a list of tuples ("action_set_name", num_actions_in_set). + See below for further detail. + + If agent_class_name type agents do not participate in the component, simply + return None + + In the next simplest case, the component adds one set of n different actions + for agents of type agent_cls_name. In this case, return n (as an int). For + example, if Component implements moving up, down, left, or right for + "BasicMobileAgent" agents, then Component.get_n_actions('Mobile') should + return 4. + + If the component adds multiple sets of actions for a given agent type, this + method should return a list of tuples: + [("action_set_name_1", n_1), ..., ("action_set_name_M", n_M)], + where M is the number of different sets of actions, and n_k is the number of + actions in action set k. + For example, if Component allows agent 'Planner' to set some tax for each of + individual Mobile agents, and there are 3 such agents, then: + Component.get_n_actions('Planner') should return, i.e., + [('Tax_0', 10), ('Tax_1', 10), ('Tax_2', 10)], + where, in this example, the Planner agent can choose 10 different tax + levels for each Mobile agent. + """ + + @abstractmethod + def get_additional_state_fields(self, agent_cls_name): + """ + Return a dictionary of {state_field: reset_val} managed by this Component + class for agents of type agent_cls_name. This also partially controls reset + behavior. + + Args: + agent_cls_name (str): name of the Agent class for which additional states + are being queried. For example, "BasicMobileAgent". + + Returns: + extra_state_dict (dict): A dictionary of {"state_field": reset_val} for + each extra state field that this component adds/manages to agents of + type agent_cls_name. This extra_state_dict is incorporated into + agent.state for each agent of this type. Note that the keyed fields + will be reset to reset_val when the environment is reset. + + If the component has its own internal state, the protocol for resetting that + should be written into the custom method 'additional_reset_steps()' [see below]. + + States that are meant to be internal to the component do not need to be + registered as agent state fields. Rather, adding to the agent state fields is + most useful when two or more components refer to or affect the same state. In + general, however, if the component expects a particular state field to exist, + it should use return that field (and its reset value) here. + """ + + @abstractmethod + def component_step(self): + """ + For all relevant agents, execute the actions specific to this Component class. + This is essentially where the component logic is implemented and what allows + components to create environment dynamics. + + If the component expects certain resources/landmarks/entities to be in play, + it must declare them in 'required_entities' so that they can be registered as + part of the world and, where appropriate, part of the agent inventory. + + If the component expects non-standard fields to exist in agent.state for one + or more agent types, that must be reflected in get_additional_state_fields(). + """ + + @abstractmethod + def generate_observations(self): + """ + Generate observations associated with this Component class. + + A component does not need to produce observations and can provide observations + for only some agent types; however, for a given environment, the structure of + the observations returned by this component should be identical between + subsequent calls to generate_observations. That is, the agents that receive + observations should remain consistent as should the structure of their + individual observations. + + Returns: + obs (dict): A dictionary of {agent.idx: agent_obs_dict}. In words, + return a dictionary with an entry for each agent (which can include + the planner) for which this component provides an observation. For each + entry, the key specifies the index of the agent and the value contains + its associated observation dictionary. + """ + + @abstractmethod + def generate_masks(self, completions=0): + """ + Create action masks to indicate which actions are and are not valid. Actions + that are valid should be given a value of 1 and 0 otherwise. Do not generate + a mask for the NO-OP action, which is always available. + + Args: + completions (int): The number of completed episodes. This is intended to + be used in the case that actions may be masked or unmasked as part of a + learning curriculum. + + Returns: + masks (dict): A dictionary of {agent.idx: mask} with an entry for each + agent that can interact with this component. See below. + + + The expected output parallels the action subspaces defined by get_n_actions(): + The output should be a dictionary of {agent.idx: mask} keyed for all agents + that take actions via this component. + + For example, say the component defines a set of 4 actions for agents of type + "BasicMobileAgent" (self.get_n_actions("BasicMobileAgent) --> 4). Because all + action spaces include a NO-OP action, there are 5 available actions, + interpreted in this example as: NO-OP (index=0), moving up (index=1), + down (index=2), left (index=3), or right (index=4). Say also that agent-0 (the + agent with agent.idx=0) is prevented from moving left but can otherwise move. + In this case, generate_masks(world)['0'] should point to a length-4 binary + array, specifically [1, 1, 0, 1]. Note that the mask is length 4 while + technically 5 actions are available. This is because NO-OP should be ignored + when constructing masks. + + In the more complex case where the component defines several action sets for + an agent, say the planner agent (the agent with agent.idx='p'), then + generate_masks(world)['p'] should point to a dictionary of + {"action_set_name_m": mask_m} for each of the M action sets associated with + agent p's type. Each such value, mask_m, should be a binary array whose + length matches the number of actions in "action_set_name_m". + + The default behavior (below) keeps all actions available. The code gives an + example of expected formatting. + """ + world = self.world + masks = {} + # For all the agents in the environment + for agent in world.agents + [world.planner]: + # Get any action space(s) defined by this component for this agent + n_actions = self.get_n_actions(agent.name) + + # If no action spaces are defined, just move on. + if n_actions is None: + continue + + # If a single action space is defined, n_actions corresponds to the + # number of (non NO-OP) actions. Return an array of ones of that length, + # enabling all actions. + if isinstance(n_actions, (int, float)): + masks[agent.idx] = np.ones(int(n_actions)) + + # If multiple action spaces are defined, n_actions corresponds to the + # tuple or list giving ("name", N) for each action space, where "name" + # is the unique name and N is the number of (non NO-OP) actions + # associated with that action space. + # Return a dictionary of {"name": length-N ones array}, enabling all + # actions in all the action spaces. + elif isinstance(n_actions, (tuple, list)): + masks[agent.idx] = { + sub_name: np.ones(int(sub_n)) for sub_name, sub_n in n_actions + } + + else: + raise TypeError + + return masks + + # For non-required customization + # ------------------------------ + + def additional_reset_steps(self): + """ + Use this method to implement additional steps that the component should + perform at reset. Useful for resetting internal trackers. + + This method should not return anything. + """ + return + + def get_metrics(self): + """ + Returns a dictionary of custom metrics describing the episode through the + lens of the component. + + For example, if Build is a subclass of BaseComponent that implements building, + Build.get_metrics() might return a dictionary with terms relating to the + number of things each agent built. + + Returns: + metrics (dict or None): A dictionary of {"metric_key": metric_value} + entries describing the metrics that this component calculates. The + environment combines scenario metrics with each of the metric + dictionaries produced by its component instances. metric_value is + expected to be a scalar. + By returning None instead of a dictionary, the component is ignored + by the environment when constructing the full metric report. + """ + return None + + def get_dense_log(self): + """ + Return the dense log, either a tuple, list, or dict, of the episode through the + lens of this component. + + If this component does not yield a dense log, return None (default behavior). + """ + return None + + +component_registry = Registry(BaseComponent) +"""The registry for Component classes. + +This creates a registry object for Component classes. This registry requires that all +added classes are subclasses of BaseComponent. To make a Component class available +through the registry, decorate the class definition with @component_registry.add. + +Example: + from ai_economist.foundation.base.base_component + import BaseComponent, component_registry + + @component_registry.add + class ExampleComponent(BaseComponent): + name = "Example" + pass + + assert component_registry.has("Example") + + ComponentClass = component_registry.get("Example") + component = ComponentClass(...) + assert isinstance(component, ExampleComponent) + +Notes: + The foundation package exposes the component registry as: foundation.components + + A Component class that is defined and registered following the above example will + only be visible in foundation.components if defined/registered in a file that is + imported in ../components/__init__.py. +""" diff --git a/ai_economist/foundation/base/base_env.py b/ai_economist/foundation/base/base_env.py new file mode 100644 index 0000000..051e6fa --- /dev/null +++ b/ai_economist/foundation/base/base_env.py @@ -0,0 +1,1157 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import random +from abc import ABC, abstractmethod +from copy import deepcopy + +import numpy as np + +from ai_economist.foundation.agents import agent_registry +from ai_economist.foundation.base.registrar import Registry +from ai_economist.foundation.base.world import World +from ai_economist.foundation.components import component_registry +from ai_economist.foundation.entities import ( + endogenous_registry, + landmark_registry, + resource_registry, +) + + +class BaseEnvironment(ABC): + """ + Base Environment class. Should be used as the parent class for Scenario classes. + Instantiates world, agent, and component objects. + + Provides Gym-style API for resetting and stepping: + obs <-- env.reset() + obs, rew, done, info <-- env.step(actions) + + Also provides Gym-style API for controlling random behavior: + env.seed(seed) # Sets numpy and built-in RNG seeds to seed + + Reference: OpenAI Gym [https://github.com/openai/gym] + + Environments in this framework are instances of Scenario classes (which are built + as extensions of BaseEnvironment). A Scenario must implement the following + abstract methods (method docstrings provide detailed explanations): + reset_starting_layout + reset_agent_states + scenario_step + generate_observations + compute_reward + + Scenario classes define their own passive dynamics--that is, dynamics that do not + depend on agent actions--and supply observations. It is up to the Scenario class + to handle reward. + + Interactions with the environment are handled through components, which define + actions that agents can perform. Components are defined through distinct + Component classes (which extend BaseComponent [see base_component.py]) and must + be included in the components_registry in order to be used (see below). + Components influence the environment dynamics through effects they have on + agent/world states. They also (optionally) supply observations. + + The actions available to the agents, observations they receive, the dynamics of + the environment, and the rewards depend of the choice of which Scenario class and + Component class(es) to use. + + In multi_action_mode, an agent may choose an action for each of the action + subspaces defined by the included Component classes. A Component can define 0, 1, + or several action subspaces for a given agent type. If not using + multi_action_mode, these action subspaces are combined into a single action space + and the agent may select one action within this aggregated space. + + For additional detail regarding actions and action subspaces, see the + BaseComponent class in base_component.py. + + There are 2 types of agents: mobile agents and the planner agent. There can be + two or more mobile agents and a single planner agent. Conceptually, mobile agents + represent the individual actors in the economic simulation while the planner + agent represents a social planner that sets macroeconomic policy. + + This environment framework makes extensive use of Python classes. Scenarios, + Components, Agents, and environment entities such as Resources, Landmarks, + and Endogenous variables are all implemented as classes. These classes are + accessed via registries. See top example. + + Example: + from ai_economist import foundation + # foundation.scenarios <-- Scenario class registry + # foundation.components <-- Component class registry + # foundation.agents <-- Agent class registry + # foundation.resources <-- Resource class registry + # foundation.landmarks <-- Landmark class registry + # foundation.endogenous <-- Endogenous class registry + + # see ../scenarios/simple_wood_and_stone/dynamic_layout.py + UniScenarioClass = foundation.scenarios.get("uniform/simple_wood_and_stone") + + # see ../components/build.py and ../components/move.py + BuildComponentClass = foundation.components.get("Build") + GatherComponentClass = foundation.components.get("Gather") + + Example: + from ai_economist import foundation + from ai_economist.foundation.base.base_env import BaseEnvironment + + ScenarioClass = foundation.scenarios.get(...) + assert issubclass(ScenarioClass, BaseEnvironment) + + env = ScenarioClass( + components=[ + ("Build", {"payment": 20}), + ("Gather", {"move_labor": 1.0, "collect_labor": 2.0}), + ], + n_agents=20, + world_size=[25, 25], + ) + + obs = env.reset() + + actions = {agent.idx: ... for agent in env.all_agents} + obs, rew, done, info = env.step(actions) + + Args: + components (list): A list of tuples ("Component Name", {Component kwargs}) or + list of dicts {"Component Name": {Component kwargs}} specifying the + components that the instantiated environment will include. + "Component Name" must be a string matching the name of a registered + Component class. + {Component kwargs} must be a dictionary of kwargs that can be passed as + arguments to the Component class with name "Component Name". + Resetting, stepping, and observation generation will be carried out in + the order in which components are listed. This should be considered, + as re-ordering the components list may impact the dynamics of the + environment. + agent_composition (dict): Agent Class name in string paired with amount of agents of that class + n_agents (int): The number of mobile agents (does not include planner). + Number of agents must be > 1. + world_size (list): A length-2 list specifying the dimensions of the 2D world. + Interpreted as [height, width]. + episode_length (int): Number of timesteps in a single episode. + multi_action_mode_agents (bool): Whether mobile agents use multi_action_mode. + multi_action_mode_planner (bool): Whether the planner uses multi_action_mode. + flatten_observations (bool): Whether to preprocess observations by + concatenating all scalar/vector observation subfields into a single + "flat" observation field. If not, return observations as minimally + processed dictionaries. + flatten_masks (bool): Whether to flatten action masks into a single array or + to keep as a {"action_subspace_name": action_subspace_mask} dictionary. + For integration with deep RL, it is helpful to set this to True, for the + purpose of action masking: flattened masks have the same semantics as + policy logits. + allow_observation_scaling (bool): Whether to enable certain observation + fields to be scaled to a range better suited for deep RL. + dense_log_frequency (int): [optional] How often (in completed episodes) to + create a dense log while playing an episode. By default, dense logging is + turned off (dense_log_frequency=None). If dense_log_frequency=20, + a dense log will be created when the total episode count is a multiple of + 20. + Dense logs provide a log of agent states, actions, and rewards at each + timestep of an episode. They also log world states at a coarser timescale + (see below). Component classes optionally contribute additional + information to the dense log. + Note: dense logging is time consuming (especially with many agents). + world_dense_log_frequency (int): When dense logging, how often (in timesteps) to + log a snapshot of the world state. If world_dense_log_frequency=50 + (the default), the world state will be included in the dense log for + timesteps where t is a multiple of 50. + Note: More frequent world snapshots increase the dense log memory footprint. + seed (int, optional): If provided, sets the numpy and built-in random number + generator seeds to seed. You can control the seed after env construction + using the 'seed' method. + """ + + # The name associated with this Scenario class (must be unique) + # Note: This is what will identify the Scenario class in the scenario registry. + name = "" + + # The (sub)classes of agents that this scenario applies to + agent_subclasses = [] + + # The (non-agent) game entities that are expected to be in play + required_entities = None # Replace with list or tuple (can be empty) + + def __init__( + self, + components=None, + agent_composition=None, + world_size=None, + episode_length=1000, + multi_action_mode_agents=False, + multi_action_mode_planner=True, + flatten_observations=True, + flatten_masks=True, + allow_observation_scaling=True, + dense_log_frequency=None, + world_dense_log_frequency=50, + collate_agent_step_and_reset_data=False, + seed=None, + ): + + # Make sure a name was declared by child class + assert self.name + # Make sure the agent_subclasses was declared by child class + # and does not create potential conflicts + assert isinstance(self.agent_subclasses, (tuple, list)) + assert len(self.agent_subclasses) > 0 + if len(self.agent_subclasses) > 1: + for i in range(len(self.agent_subclasses)): + for j in range(len(self.agent_subclasses)): + if i == j: + continue + a_i = agent_registry.get(self.agent_subclasses[i]) + a_j = agent_registry.get(self.agent_subclasses[j]) + assert not issubclass(a_i, a_j) + + # Make sure the required_entities was declared by child class + # (will typecheck later) + assert isinstance(self.required_entities, (tuple, list)) + + # World size must be a tuple or list of length 2, + # specifying [Height, Width] of the game map + assert isinstance(world_size, (tuple, list)) + assert len(world_size) == 2 + self.world_size = world_size + + # Set n_agents + self.agent_composition=agent_composition + n_agents=0 + for k,v in agent_composition: + n_agents+=v + + # Number of agents must be an integer and there must be at least 2 agents + assert isinstance(n_agents, int) + assert n_agents >= 2 + self.n_agents = n_agents + + # Foundation assumes there's only a single planner + n_planners = 1 + self.num_agents = ( + n_agents + n_planners + ) # used in the warp_drive env wrapper (+ 1 for the planner) + + # Components must be a tuple/list where each element is either a... + # tuple: ('Component Name', {Component kwargs}) + # dict : {'Component Name': {Component kwargs}} + assert isinstance(components, (tuple, list)) + + def spec_is_valid(spec): + """Return True if component specification is validly configured.""" + if isinstance(spec, (tuple, list)): + if len(spec) != 2: + return False + return isinstance(spec[0], str) and isinstance(spec[1], dict) + if isinstance(spec, dict): + if len(spec) != 1: + return False + key_is_str = isinstance(list(spec.keys())[0], str) + val_is_dict = isinstance(list(spec.values())[0], dict) + return key_is_str and val_is_dict + return False + + assert all(spec_is_valid(component) for component in components) + + self._episode_length = int(episode_length) + assert self._episode_length >= 1 + + # Can an agent/planner execute multiple actions (1 per action subspace) per + # timestep (=True) or just one action (=False) + self.multi_action_mode_agents = bool(multi_action_mode_agents) + self.multi_action_mode_planner = bool(multi_action_mode_planner) + + # Whether to allow the world to scale observations + self._allow_observation_scaling = bool(allow_observation_scaling) + + # Whether to flatten the observation dictionaries before returning them + # Note: flattened observations are still returned as dictionaries, but with + # all scalar/vector observation fields concatenated into a single "flat" field. + self._flatten_observations = bool(flatten_observations) + + # Whether to flatten the mask dictionaries before putting them in the obs + self._flatten_masks = bool(flatten_masks) + + # How often (in episode completions) to create a dense log + self._dense_log_this_episode = False + if dense_log_frequency is None: # Only create a dense log + # if manually specified during reset + self._create_dense_log_every = None + else: # Create a dense log every dense_log_frequency episodes + self._create_dense_log_every = int(dense_log_frequency) + assert self._create_dense_log_every >= 1 + + # How often (in timesteps) to snapshot the world map when creating the denselog + self._world_dense_log_frequency = int(world_dense_log_frequency) + assert self._world_dense_log_frequency >= 1 + + # Seed control + if seed is not None: + self.seed(seed) + + # Initialize the set of entities used in the game that's being created. + # Coin and Labor are always included. + self._entities = { + "resources": ["Coin"], + "landmarks": [], + "endogenous": ["Labor"], + } + self._register_entities(self.required_entities) + + # Register all the components to get the entities they rely on. + self._components = [] + self._components_dict = {} + self._shorthand_lookup = {} + component_classes = [] + for component_spec in components: + if isinstance(component_spec, (tuple, list)): + component_name, component_config = component_spec + elif isinstance(component_spec, dict): + assert len(component_spec) == 1 + component_name = list(component_spec.keys())[0] + component_config = list(component_spec.values())[0] + else: + raise TypeError + component_cls = component_registry.get(component_name) + self._register_entities(component_cls.required_entities) + component_classes.append([component_cls, component_config]) + + # Initialize the world object (contains agents and world map), + # now that we know all the entities we'll use. + self.world = World( + self.world_size, + self.n_agents, + self.resources, + self.landmarks, + self.multi_action_mode_agents, + self.multi_action_mode_planner, + ) + + # Initialize the component objects. + for component_cls, component_kwargs in component_classes: + component_object = component_cls( + self.world, + self._episode_length, + inventory_scale=self.inv_scale, + **component_kwargs + ) + self._components.append(component_object) + self._components_dict[component_object.name] = component_object + self._shorthand_lookup[component_object.shorthand] = component_object + + # Register the components with the agents + # to finish setting up their state/action spaces. + for agent in self.world.agents: + agent.register_inventory(self.resources) + agent.register_endogenous(self.endogenous) + agent.register_components(self._components) + self.world.planner.register_inventory(self.resources) + self.world.planner.register_components(self._components) + + self._agent_lookup = {str(agent.idx): agent for agent in self.all_agents} + + self._completions = 0 + + self._last_ep_metrics = None + + # For dense logging + self._dense_log = {"world": [], "states": [], "actions": [], "rewards": []} + self._last_ep_dense_log = self.dense_log.copy() + + # For episode replay + self._replay_log = {"reset": dict(seed_state=None), "step": []} + self._last_ep_replay_log = self.replay_log.copy() + + self._packagers = {} + + # To collate all the agents ('0', '1', ...) data during reset and step + # into a single agent with index 'a' + self.collate_agent_step_and_reset_data = collate_agent_step_and_reset_data + + def _register_entities(self, entities): + for entity in entities: + if resource_registry.has(entity): + if entity not in self._entities["resources"]: + self._entities["resources"].append(entity) + elif landmark_registry.has(entity): + if entity not in self._entities["landmarks"]: + self._entities["landmarks"].append(entity) + elif endogenous_registry.has(entity): + if entity not in self._entities["endogenous"]: + self._entities["endogenous"].append(entity) + else: + raise KeyError("Unknown entity: {}".format(entity)) + + # Properties + # ---------- + + @property + def episode_length(self): + """Length of an episode, in timesteps.""" + return int(self._episode_length) + + @property + def inv_scale(self): + """Scale value to be used for inventory scaling. 1 if no scaling enabled.""" + return 0.01 if self._allow_observation_scaling else 1 + + @property + def resources(self): + """List of resources managed by this environment instance.""" + return sorted(list(self._entities["resources"])) + + @property + def landmarks(self): + """List of landmarks managed by this environment instance.""" + return sorted(list(self._entities["landmarks"])) + + @property + def endogenous(self): + """List of endogenous quantities managed by this environment instance.""" + return sorted(list(self._entities["endogenous"])) + + @property + def all_agents(self): + """List of mobile agents and the planner agent.""" + return self.world.agents + [self.world.planner] + + @property + def previous_episode_metrics(self): + """Metrics from the end of the last completed episode.""" + return self._last_ep_metrics + + @property + def metrics(self): + """The combined metrics yielded by the scenario and the components.""" + metrics = self.scenario_metrics() or {} + + for component in self._components: + m_metrics = component.get_metrics() + if not m_metrics: + continue + for k, v in m_metrics.items(): + metrics["{}/{}".format(component.shorthand, k)] = v + + return metrics + + @property + def components(self): + """The list of components associated with this scenario.""" + return self._components + + @property + def dense_log(self): + """The contents of the current (potentially incomplete) dense log.""" + return self._dense_log + + @property + def replay_log(self): + """The contents of the current (potentially incomplete) replay log.""" + return self._replay_log + + @property + def previous_episode_dense_log(self): + """Dense log from the last completed episode that was being logged.""" + return self._last_ep_dense_log + + @property + def previous_episode_replay_log(self): + """ + Replay log from the last completed episode. Serves as a compact encoding of + an episode by allowing the episode to be perfectly reproduced. + + Examples: + # replay log of the episode to be reproduced + replay_log = env.previous_episode_replay_log + + # recover episode metrics and dense log via replay + _ = env.reset(force_dense_logging=True, **replay_log['reset']) + for replay_step in replay_log['step']: + _ = env.step(**replay_step) + dense_log = env.previous_episode_dense_log + metrics = env.previous_episode_metrics + """ + return self._last_ep_replay_log + + @property + def generate_rewards(self): + """Compute the rewards for each agent.""" + return self._generate_rewards + + # Seed control + # ----------------- + + @staticmethod + def seed(seed): + """Sets the numpy and built-in random number generator seed. + + Args: + seed (int, float): Seed value to use. Must be > 0. Converted to int + internally if provided value is a float. + """ + assert isinstance(seed, (int, float)) + seed = int(seed) + assert seed > 0 + + np.random.seed(seed) + random.seed(seed) + + # Getters & Setters + # ----------------- + + def get_component(self, component_name): + """ + Get the component object instance wrapped in the environment. + + Args: + component_name (str): Name or shorthand name of the Component class to get. + Must correspond to a name or shorthand of one of the components that + is included in this environment instance. + + Returns: + component (BaseComponent object) + """ + if component_name not in self._components_dict: + if component_name not in self._shorthand_lookup: + raise KeyError( + "No component with name or shorthand name {} found; " + "registered components are:\n".format(component_name) + + "\n\t".join(list(self._components_dict.keys())) + ) + return self._shorthand_lookup[component_name] + return self._components_dict[component_name] + + def get_agent(self, agent_idx): + """ + Get the agent object instance with idx agent_idx. + + Args: + agent_idx (int or str): Identifier of the agent to return. Must match the + idx property of one of the agent objects in self.all_agents. + + Returns: + agent (BaseAgent object) + """ + agent = self._agent_lookup.get(str(agent_idx), None) + if agent is None: + raise ValueError("No agent with associated index {}".format(agent_idx)) + return agent + + def set_agent_component_action(self, agent_idx, component_name, action): + """ + Set agent with idx to take action for the action + subspace with name + + Args: + agent_idx (int or str): Identifier of the agent taking the action. Must + match the idx property of one of the agent objects in self.all_agents. + component_name (str): Name of the action subspace to set the action value + of. + action (int): Index of the chosen action. + """ + agent = self.get_agent(agent_idx) + agent.set_component_action(component_name, action) + + def parse_actions(self, action_dictionary): + """Put actions into the appropriate agent's action buffer""" + for agent_idx, agent_actions in action_dictionary.items(): + agent = self.get_agent(agent_idx) + agent.parse_actions(agent_actions) + + # Core control of environment execution + # ------------------------------------- + + @staticmethod + def _build_packager(sub_obs, put_in_both=None): + """ + Decides which keys-vals should be flattened or not. + put_in_both: include in both (e.g., 'time') + """ + if put_in_both is None: + put_in_both = [] + keep_as_is = [] + flatten = [] + wrap_as_list = {} + for k, v in sub_obs.items(): + if isinstance(v, np.ndarray): + multi_d_array = len(v.shape) > 1 + else: + multi_d_array = False + + if k == "action_mask" or multi_d_array: + keep_as_is.append(k) + else: + flatten.append(k) + if k in put_in_both: + keep_as_is.append(k) + + wrap_as_list[k] = np.isscalar(v) + + flatten = sorted(flatten) + + return keep_as_is, flatten, wrap_as_list + + @staticmethod + def _package(obs_dict, keep_as_is, flatten, wrap_as_list): + new_obs = {k: obs_dict[k] for k in keep_as_is} + if len(flatten) == 1: + k = flatten[0] + o = obs_dict[k] + if wrap_as_list[k]: + o = [o] + new_obs["flat"] = np.array(o, dtype=np.float32) + else: + to_flatten = [ + [obs_dict[k]] if wrap_as_list[k] else obs_dict[k] for k in flatten + ] + try: + new_obs["flat"] = np.concatenate(to_flatten).astype(np.float32) + except ValueError: + for k, v in zip(flatten, to_flatten): + print(k, np.array(v).shape) + print(v) + print("") + raise + return new_obs + + def _generate_observations(self, flatten_observations=False, flatten_masks=False): + def recursive_listify(d): + assert isinstance(d, dict) + for k, v in d.items(): + if isinstance(v, dict): + d[k] = recursive_listify(v) + elif isinstance(v, (int, float)): + d[k] = v + elif isinstance(v, (list, tuple, set)): + d[k] = list(v) + elif isinstance(v, (np.ndarray, np.integer, np.floating)): + d[k] = v.tolist() + else: + raise NotImplementedError( + "Not clear how to handle {} with type {}".format(k, type(v)) + ) + if isinstance(d[k], list) and len(d[k]) == 1: + d[k] = d[k][0] + return d + + # Initialize empty observations + if self.collate_agent_step_and_reset_data: + obs = {"a": {}, "p": {}} + else: + obs = {str(agent.idx): {} for agent in self.all_agents} + agent_wise_planner_obs = { + "p" + str(agent.idx): {} for agent in self.world.agents + } + + # Get/process observations generated by the scenario + world_obs = {str(k): v for k, v in self.generate_observations().items()} + time_scale = self.episode_length if self._allow_observation_scaling else 1.0 + for idx, o in world_obs.items(): + if idx in obs: + obs[idx].update({"world-" + k: v for k, v in o.items()}) + if self.collate_agent_step_and_reset_data and idx == "a": + obs[idx]["time"] = np.array( + [ + self.world.timestep / time_scale + for _ in range(self.world.n_agents) + ] + ) + else: + obs[idx]["time"] = [self.world.timestep / time_scale] + elif idx in agent_wise_planner_obs: + agent_wise_planner_obs[idx].update( + {"world-" + k: v for k, v in o.items()} + ) + else: + raise KeyError + + # Get/process observations generated by the components + for component in self._components: + for idx, o in component.obs().items(): + if idx in obs: + obs[idx].update({component.name + "-" + k: v for k, v in o.items()}) + elif idx in agent_wise_planner_obs: + agent_wise_planner_obs[idx].update( + {component.name + "-" + k: v for k, v in o.items()} + ) + else: + raise KeyError + + # Process the observations + if flatten_observations: + for o_dict in [obs, agent_wise_planner_obs]: + for aidx, aobs in o_dict.items(): + if not aobs: + continue + if aidx not in self._packagers: + self._packagers[aidx] = self._build_packager( + aobs, put_in_both=["time"] + ) + try: + o_dict[aidx] = self._package(aobs, *self._packagers[aidx]) + except ValueError: + print("Error when packaging obs.") + print("Agent index: {}\nRaw obs: {}\n".format(aidx, aobs)) + raise + + for k, v in agent_wise_planner_obs.items(): + if len(v) > 0: + obs[self.world.planner.idx][k] = ( + v["flat"] if flatten_observations else v + ) + + # Get each agent's action masks and incorporate them into the observations + for aidx, amask in self._generate_masks(flatten_masks=flatten_masks).items(): + obs[aidx]["action_mask"] = amask + + return obs + + def _generate_masks(self, flatten_masks=True): + if self.collate_agent_step_and_reset_data: + masks = {"a": {}, "p": {}} + else: + masks = {agent.idx: {} for agent in self.all_agents} + for component in self._components: + # Use the component's generate_masks method to get action masks + component_masks = component.generate_masks(completions=self._completions) + + for idx, mask in component_masks.items(): + if isinstance(mask, dict): + for sub_action, sub_mask in mask.items(): + masks[idx][ + "{}.{}".format(component.name, sub_action) + ] = sub_mask + else: + masks[idx][component.name] = mask + + if flatten_masks: + if self.collate_agent_step_and_reset_data: + flattened_masks = {} + for agent_id in masks.keys(): + if agent_id == "a": + multi_action_mode = self.multi_action_mode_agents + no_op_mask = np.ones((1, self.n_agents)) + elif agent_id == "p": + multi_action_mode = self.multi_action_mode_planner + no_op_mask = [1] + mask_dict = masks[agent_id] + list_of_masks = [] + if not multi_action_mode: + list_of_masks.append(no_op_mask) + for m in mask_dict.keys(): + if multi_action_mode: + list_of_masks.append(no_op_mask) + list_of_masks.append(mask_dict[m]) + flattened_masks[agent_id] = np.concatenate( + list_of_masks, axis=0 + ).astype(np.float32) + return flattened_masks + return { + str(agent.idx): agent.flatten_masks(masks[agent.idx]) + for agent in self.all_agents + } + return { + str(agent_idx): { + k: np.array(v, dtype=np.uint8).tolist() + for k, v in masks[agent_idx].items() + } + for agent_idx in list(masks.keys()) + } + + def _generate_rewards(self): + rew = self.compute_reward() + assert isinstance(rew, dict) + return {str(k): v for k, v in rew.items()} + + def _finalize_logs(self): + self._last_ep_replay_log = self._replay_log + self._last_ep_metrics = self.metrics + + if not self._dense_log_this_episode: + return + + def recursive_cast(d): + if isinstance(d, (list, tuple, set)): + new_d = [recursive_cast(v_) for v_ in d] + return new_d + if isinstance(d, dict): + for k, v in d.items(): + if isinstance(v, (list, tuple, set, dict)): + d[k] = recursive_cast(v) + elif isinstance(v, (int, float, str)): + d[k] = v + elif isinstance(v, (np.ndarray, np.integer, np.floating)): + d[k] = v.tolist() + else: + raise NotImplementedError( + "Not clear how to handle {} with type {}".format(k, type(v)) + ) + return d + if isinstance(d, (int, float, str)): + return d + if isinstance(d, (np.ndarray, np.integer, np.floating)): + return d.tolist() + raise NotImplementedError( + "Not clear how to handle {} with type {}".format(d, type(d)) + ) + + self._dense_log["world"].append(deepcopy(self.world.maps.state_dict)) + self._dense_log["states"].append( + {str(agent.idx): deepcopy(agent.state) for agent in self.all_agents} + ) + + # Back-fill the log with each component's dense log to complete the aggregate + # dense log + for component in self._components: + component_log = component.get_dense_log() + if component_log is None: + continue + if isinstance(component_log, dict): + for k, v in component_log.items(): + self._dense_log[component.shorthand + "-" + k] = v + elif isinstance(component_log, (tuple, list)): + self._dense_log[component.shorthand] = list(component_log) + else: + raise TypeError + + self._last_ep_dense_log = recursive_cast(self._dense_log) + + def collate_agent_obs(self, obs): + # Collating observations from all agents + if "a" in obs: # already collated! + return obs + num_agents = len(obs.keys()) - 1 + obs["a"] = {} + for key in obs["0"].keys(): + obs["a"][key] = np.stack( + [obs[str(agent_idx)][key] for agent_idx in range(num_agents)], axis=-1 + ) + for agent_idx in range(num_agents): + del obs[str(agent_idx)] + return obs + + def collate_agent_rew(self, rew): + # Collating rewards from all agents + if "a" in rew: # already collated! + return rew + num_agents = len(rew.keys()) - 1 + rew["a"] = [] + for agent_idx in range(num_agents): + rew["a"] += [rew[str(agent_idx)]] + del rew[str(agent_idx)] + return rew + + def collate_agent_info(self, info): + # Collating infos from all agents + if "a" in info: # already collated! + return info + num_agents = len(info.keys()) - 1 + info["a"] = {} + for agent_idx in range(num_agents): + info["a"][str(agent_idx)] = info[str(agent_idx)] + del info[str(agent_idx)] + return info + + def reset(self, seed_state=None, force_dense_logging=False): + """ + Reset the state of the environment to initialize a new episode. + + Arguments: + seed_state (tuple or list): Optional state that the numpy RNG should be set + to prior to the reset cycle must be length 5, following the format + expected by np.random.set_state() + force_dense_logging (bool): Optional whether to force dense logging to take + place this episode; default behavior is to do dense logging every + create_dense_log_every episodes + + Returns: + obs (dict): A dictionary of {"agent_idx": agent_obs} with an entry for + each agent receiving observations. The "agent_idx" key identifies the + agent receiving the observations in the associated agent_obs value, + which itself is a dictionary. The "agent_idx" key matches the + agent.idx property for the given agent. + """ + if seed_state is not None: + assert isinstance(seed_state, (tuple, list)) + assert len(seed_state) == 5 + seed_state = ( + str(seed_state[0]), + np.array(seed_state[1], dtype=np.uint32), + int(seed_state[2]), + int(seed_state[3]), + float(seed_state[4]), + ) + np.random.set_state(seed_state) + + if force_dense_logging: + self._dense_log_this_episode = True + elif self._create_dense_log_every is None: + self._dense_log_this_episode = False + else: + self._dense_log_this_episode = ( + self._completions % self._create_dense_log_every + ) == 0 + + # For dense logging + self._dense_log = {"world": [], "states": [], "actions": [], "rewards": []} + + # For episode replay + self._replay_log = {"reset": dict(seed_state=np.random.get_state()), "step": []} + + # Reset the timestep counter + self.world.timestep = 0 + + # Perform the scenario reset, + # which includes resetting the world and agent states + self.reset_starting_layout() + self.reset_agent_states() + + # Perform the component resets for each registered component + for component in self._components: + component.reset() + + # Take any customized reset actions + self.additional_reset_steps() + + # By default, agents take the NO-OP action for each action space. + # Reset actions to that default. + for agent in self.all_agents: + agent.reset_actions() + + # Produce observations + obs = self._generate_observations( + flatten_observations=self._flatten_observations, + flatten_masks=self._flatten_masks, + ) + + if self.collate_agent_step_and_reset_data: + obs = self.collate_agent_obs(obs) + + return obs + + def step(self, actions=None, seed_state=None): + """ + Execute the components, perform the scenario step, collect observations and + return observations, rewards, dones, and infos. + + Arguments: + actions (dict): dictionary of {agent_idx: action} with an entry for each + agent (which may include the planner) that is supplying an action. + The key identifies which agent the action is associated with. It + should match that agent's agent.idx property. + The value indicates which action the agent will take. The environment + supports two formats for specifying an action, with slightly + different expectations for multi_action_mode. + If agent.multi_action_mode, action must be a list of integers + specifying the chosen action for each action subspace. + Otherwise, action must be a single integer specifying the chosen + action (where the action space is the concatenation of the subspaces). + seed_state (tuple or list): Optional state that the numpy RNG should be set + to prior to the reset cycle must be length 5, following the format + expected by np.random.set_state(). + + Returns: + obs (dict): A dictionary of {"agent_idx": agent_obs} with an entry for + each agent receiving observations. The "agent_idx" key identifies the + agent receiving the observations in the associated agent_obs value, + which itself is a dictionary. The "agent_idx" key matches the + agent.idx property for the given agent. + rew (dict): A dictionary of {"agent_idx": reward} with an entry for each + agent that also receives an observation. Each reward value is a scalar. + done (dict): A dictionary with a single key "__all__". The associated + value is False when self.world.timestep < self.episode_length and True + otherwise. + info (dict): Placeholder dictionary with structure {"agent_idx": {}}, + with the same keys as obs and rew. + """ + if actions is not None: + assert isinstance(actions, dict) + self.parse_actions(actions) + + if seed_state is not None: + assert isinstance(seed_state, (tuple, list)) + assert len(seed_state) == 5 + seed_state = ( + str(seed_state[0]), + np.array(seed_state[1], dtype=np.uint32), + int(seed_state[2]), + int(seed_state[3]), + float(seed_state[4]), + ) + np.random.set_state(seed_state) + + self._replay_log["step"].append( + dict(actions=actions, seed_state=np.random.get_state()) + ) + + if self._dense_log_this_episode: + self._dense_log["world"].append( + deepcopy(self.world.maps.state_dict) + if (self.world.timestep % self._world_dense_log_frequency) == 0 + else {} + ) + self._dense_log["states"].append( + {str(agent.idx): deepcopy(agent.state) for agent in self.all_agents} + ) + self._dense_log["actions"].append( + { + str(agent.idx): {k: v for k, v in agent.action.items() if v > 0} + for agent in self.all_agents + } + ) + + self.world.timestep += 1 + + for component in self._components: + component.component_step() + + self.scenario_step() + + obs = self._generate_observations( + flatten_observations=self._flatten_observations, + flatten_masks=self._flatten_masks, + ) + rew = self._generate_rewards() + done = {"__all__": self.world.timestep >= self._episode_length} + info = {k: {} for k in obs.keys()} + + if self._dense_log_this_episode: + self._dense_log["rewards"].append(rew) + + for agent in self.all_agents: + agent.reset_actions() + + if done[ + "__all__" + ]: # Complete the dense log and stash it as well as the metrics + self._finalize_logs() + self._completions += 1 + + if self.collate_agent_step_and_reset_data: + obs = self.collate_agent_obs(obs) + rew = self.collate_agent_rew(rew) + info = self.collate_agent_info(info) + + return obs, rew, done, info + + # The following methods must be implemented for each scenario + # ----------------------------------------------------------- + + @abstractmethod + def reset_starting_layout(self): + """ + Part 1/2 of scenario reset. This method handles resetting the state of the + environment managed by the scenario (i.e. resource & landmark layout). + """ + + @abstractmethod + def reset_agent_states(self): + """ + Part 2/2 of scenario reset. This method handles resetting the state of the + agents themselves (i.e. inventory, locations, etc.). + """ + + @abstractmethod + def scenario_step(self): + """ + Update the state of the world according to whatever rules this scenario + implements. + + This gets called in the 'step' method (of base_env) after going through each + component step and before generating observations, rewards, etc. + + This is where things like resource regeneration, income redistribution, etc., + can be implemented. + """ + + @abstractmethod + def generate_observations(self): + """ + Generate observations associated with this scenario. + + A scenario does not need to produce observations and can provide observations + for only some agent types; however, for a given agent type, it should either + always or never yield an observation. If it does yield an observation, + that observation should always have the same structure/sizes! + + Returns: + obs (dict): A dictionary of {agent.idx: agent_obs_dict}. In words, + return a dictionary with an entry for each agent (which can including + the planner) for which this scenario provides an observation. For each + entry, the key specifies the index of the agent and the value contains + its associated observation dictionary. + """ + + @abstractmethod + def compute_reward(self): + """ + Apply the reward function(s) associated with this scenario to get the rewards + from this step. + + Returns: + rew (dict): A dictionary of {agent.idx: agent_obs_dict}. In words, + return a dictionary with an entry for each agent in the environment + (including the planner). For each entry, the key specifies the index of + the agent and the value contains the scalar reward earned this timestep. + """ + + # Optional methods for customization + # ---------------------------------- + + def additional_reset_steps(self): + """ + Extra scenario-specific steps that should be performed at the end of the reset + cycle. + + For each reset cycle... + First, reset_starting_layout() and reset_agent_states() will be called. + + Second, .reset() will be called for each registered component. + + Lastly, this method will be called to allow for any final customization of + the reset cycle. + """ + + def scenario_metrics(self): + """ + Allows the scenario to generate metrics (collected along with component metrics + in the 'metrics' property). + + To have the scenario add metrics, this function needs to return a dictionary of + {metric_key: value} where 'value' is a scalar (no nesting or lists!) + """ + return + + +scenario_registry = Registry(BaseEnvironment) +"""The registry for Scenario classes. + +This creates a registry object for Scenario classes. This registry requires that all +added classes are subclasses of BaseEnvironment. To make a Scenario class available +through the registry, decorate the class definition with @scenario_registry.add. + +Example: + from ai_economist.foundation.base.base_env + import BaseEnvironment, scenario_registry + + @scenario_registry.add + class ExampleScenario(BaseEnvironment): + name = "Example" + pass + + assert scenario_registry.has("Example") + + ScenarioClass = scenario_registry.get("Example") + scenario = ScenarioClass(...) + assert isinstance(scenario, ExampleScenario) + +Notes: + The foundation package exposes the scenario registry as: foundation.scenarios + + A Scenario class that is defined and registered following the above example will + only be visible in foundation.scenarios if defined/registered in a file that is + imported in ../scenarios/__init__.py. +""" diff --git a/ai_economist/foundation/base/registrar.py b/ai_economist/foundation/base/registrar.py new file mode 100644 index 0000000..9635f57 --- /dev/null +++ b/ai_economist/foundation/base/registrar.py @@ -0,0 +1,103 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + + +class Registry: + """Utility for registering sets of similar classes and looking them up by name. + + Registries provide a simple API for getting classes used to build environment + instances. Their main purpose is to organize such "building block" classes (i.e. + Components, Scenarios, Agents) for easy reference as well as to ensure that all + classes within a particular registry inherit from the same Base Class. + + Args: + base_class (class): The class that all entries in the registry must be a + subclass of. + + Example: + class BaseClass: + pass + + registry = Registry(BaseClass) + + @registry.add + class ExampleSubclassA(BaseClass): + name = "ExampleA" + pass + + @registry.add + class ExampleSubclassB(BaseClass): + name = "ExampleB" + pass + + print(registry.entries) + # ["ExampleA", "ExampleB"] + + assert registry.has("ExampleA") + assert registry.get("ExampleB") is ExampleSubclassB + """ + + def __init__(self, base_class=None): + self.base_class = base_class + self._entries = [] + self._lookup = dict() + + def add(self, cls): + """Add cls to this registry. + + Args: + cls: The class to add to this registry. Must be a subclass of + self.base_class. + + Returns: + cls (to allow decoration with @registry.add) + + See Registry class docstring for example. + """ + assert "." not in cls.name + if self.base_class: + assert issubclass(cls, self.base_class) + self._lookup[cls.name.lower()] = cls + if cls.name not in self._entries: + self._entries.append(cls.name) + return cls + + def get(self, cls_name): + """Return registered class with name cls_name. + + Args: + cls_name (str): Name of the registered class to get. + + Returns: + Registered class cls, where cls.name matches cls_name (ignoring casing). + + See Registry class docstring for example. + """ + if cls_name.lower() not in self._lookup: + raise KeyError('"{}" is not a name of a registered class'.format(cls_name)) + return self._lookup[cls_name.lower()] + + def has(self, cls_name): + """Return True if a class with name cls_name is registered. + + Args: + cls_name (str): Name of class to check. + + See Registry class docstring for example. + """ + return cls_name.lower() in self._lookup + + @property + def entries(self): + """Names of classes in this registry. + + Returns: + A list of strings corresponding to the names of classes registered in + this registry object. + + See Registry class docstring for example. + """ + return sorted(list(self._entries)) diff --git a/ai_economist/foundation/base/world.py b/ai_economist/foundation/base/world.py new file mode 100644 index 0000000..f453744 --- /dev/null +++ b/ai_economist/foundation/base/world.py @@ -0,0 +1,495 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import numpy as np + +from ai_economist.foundation.agents import agent_registry +from ai_economist.foundation.entities import landmark_registry, resource_registry + + +class Maps: + """Manages the spatial configuration of the world as a set of entity maps. + + A maps object is built during world construction, which is a part of environment + construction. The maps object is accessible through the world object. The maps + object maintains a map state for each of the spatial entities that are involved + in the constructed environment (which are determined by the "required_entities" + attributes of the Scenario and Component classes used to build the environment). + + The Maps class also implements some of the basic spatial logic of the game, + such as which locations agents can occupy based on other agent locations and + locations of various landmarks. + + Args: + size (list): A length-2 list specifying the dimensions of the 2D world. + Interpreted as [height, width]. + n_agents (int): The number of mobile agents (does not include planner). + world_resources (list): The resources registered during environment + construction. + world_landmarks (list): The landmarks registered during environment + construction. + """ + + def __init__(self, size, n_agents, world_resources, world_landmarks): + self.size = size + self.sz_h, self.sz_w = size + + self.n_agents = n_agents + + self.resources = world_resources + self.landmarks = world_landmarks + self.entities = world_resources + world_landmarks + + self._maps = {} # All maps + self._blocked = [] # Solid objects that no agent can move through + self._private = [] # Solid objects that only permit movement for parent agents + self._public = [] # Non-solid objects that agents can move on top of + self._resources = [] # Non-solid objects that can be collected + + self._private_landmark_types = [] + self._resource_source_blocks = [] + + self._map_keys = [] + + self._accessibility_lookup = {} + + for resource in self.resources: + resource_cls = resource_registry.get(resource) + if resource_cls.collectible: + self._maps[resource] = np.zeros(shape=self.size) + self._resources.append(resource) + self._map_keys.append(resource) + + self.landmarks.append("{}SourceBlock".format(resource)) + + for landmark in self.landmarks: + dummy_landmark = landmark_registry.get(landmark)() + + if dummy_landmark.public: + self._maps[landmark] = np.zeros(shape=self.size) + self._public.append(landmark) + self._map_keys.append(landmark) + + elif dummy_landmark.blocking: + self._maps[landmark] = np.zeros(shape=self.size) + self._blocked.append(landmark) + self._map_keys.append(landmark) + self._accessibility_lookup[landmark] = len(self._accessibility_lookup) + + elif dummy_landmark.private: + self._private_landmark_types.append(landmark) + self._maps[landmark] = dict( + owner=-np.ones(shape=self.size, dtype=np.int16), + health=np.zeros(shape=self.size), + ) + self._private.append(landmark) + self._map_keys.append(landmark) + self._accessibility_lookup[landmark] = len(self._accessibility_lookup) + + else: + raise NotImplementedError + + self._idx_map = np.stack( + [i * np.ones(shape=self.size) for i in range(self.n_agents)] + ) + self._idx_array = np.arange(self.n_agents) + if self._accessibility_lookup: + self._accessibility = np.ones( + shape=[len(self._accessibility_lookup), self.n_agents] + self.size, + dtype=bool, + ) + self._net_accessibility = None + else: + self._accessibility = None + self._net_accessibility = np.ones( + shape=[self.n_agents] + self.size, dtype=bool + ) + + self._agent_locs = [None for _ in range(self.n_agents)] + self._unoccupied = np.ones(self.size, dtype=bool) + + def clear(self, entity_name=None): + """Clear resource and landmark maps.""" + if entity_name is not None: + assert entity_name in self._maps + if entity_name in self._private_landmark_types: + self._maps[entity_name] = dict( + owner=-np.ones(shape=self.size, dtype=np.int16), + health=np.zeros(shape=self.size), + ) + else: + self._maps[entity_name] *= 0 + + else: + for name in self.keys(): + self.clear(entity_name=name) + + if self._accessibility is not None: + self._accessibility = np.ones_like(self._accessibility) + self._net_accessibility = None + + def clear_agent_loc(self, agent=None): + """Remove agents or agent from the world map.""" + # Clear all agent locations + if agent is None: + self._agent_locs = [None for _ in range(self.n_agents)] + self._unoccupied[:, :] = 1 + + # Clear the location of the provided agent + else: + i = agent.idx + if self._agent_locs[i] is None: + return + r, c = self._agent_locs[i] + self._unoccupied[r, c] = 1 + self._agent_locs[i] = None + + def set_agent_loc(self, agent, r, c): + """Set the location of agent to [r, c]. + + Note: + Things might break if you set the agent's location to somewhere it + cannot access. Don't do that. + """ + assert (0 <= r < self.size[0]) and (0 <= c < self.size[1]) + i = agent.idx + # If the agent is currently on the board... + if self._agent_locs[i] is not None: + curr_r, curr_c = self._agent_locs[i] + # If the agent isn't actually moving, just return + if (curr_r, curr_c) == (r, c): + return + # Make the location the agent is currently at as unoccupied + # (since the agent is going to move) + self._unoccupied[curr_r, curr_c] = 1 + + # Set the agent location to the specified coordinates + # and update the occupation map + agent.state["loc"] = [r, c] + self._agent_locs[i] = [r, c] + self._unoccupied[r, c] = 0 + + def keys(self): + """Return an iterable over map keys.""" + return self._maps.keys() + + def values(self): + """Return an iterable over map values.""" + return self._maps.values() + + def items(self): + """Return an iterable over map (key, value) pairs.""" + return self._maps.items() + + def get(self, entity_name, owner=False): + """Return the map or ownership for entity_name.""" + assert entity_name in self._maps + if entity_name in self._private_landmark_types: + sub_key = "owner" if owner else "health" + return self._maps[entity_name][sub_key] + return self._maps[entity_name] + + def set(self, entity_name, map_state): + """Set the map for entity_name.""" + if entity_name in self._private_landmark_types: + assert "owner" in map_state + assert self.get(entity_name, owner=True).shape == map_state["owner"].shape + assert "health" in map_state + assert self.get(entity_name, owner=False).shape == map_state["health"].shape + + h = np.maximum(0.0, map_state["health"]) + o = map_state["owner"].astype(np.int16) + + o[h <= 0] = -1 + tmp = o[h > 0] + if len(tmp) > 0: + assert np.min(tmp) >= 0 + + self._maps[entity_name] = dict(owner=o, health=h) + + owned_by_agent = o[None] == self._idx_map + owned_by_none = o[None] == -1 + self._accessibility[ + self._accessibility_lookup[entity_name] + ] = np.logical_or(owned_by_agent, owned_by_none) + self._net_accessibility = None + + else: + assert self.get(entity_name).shape == map_state.shape + self._maps[entity_name] = np.maximum(0, map_state) + + if entity_name in self._blocked: + self._accessibility[ + self._accessibility_lookup[entity_name] + ] = np.repeat(map_state[None] == 0, self.n_agents, axis=0) + self._net_accessibility = None + + def set_add(self, entity_name, map_state): + """Add map_state to the existing map for entity_name.""" + assert entity_name not in self._private_landmark_types + self.set(entity_name, self.get(entity_name) + map_state) + + def get_point(self, entity_name, r, c, **kwargs): + """Return the entity state at the specified coordinates.""" + point_map = self.get(entity_name, **kwargs) + return point_map[r, c] + + def set_point(self, entity_name, r, c, val, owner=None): + """Set the entity state at the specified coordinates.""" + if entity_name in self._private_landmark_types: + assert owner is not None + h = self._maps[entity_name]["health"] + o = self._maps[entity_name]["owner"] + assert o[r, c] == -1 or o[r, c] == int(owner) + h[r, c] = np.maximum(0, val) + if h[r, c] == 0: + o[r, c] = -1 + else: + o[r, c] = int(owner) + + self._maps[entity_name]["owner"] = o + self._maps[entity_name]["health"] = h + + self._accessibility[ + self._accessibility_lookup[entity_name], :, r, c + ] = np.logical_or(o[r, c] == self._idx_array, o[r, c] == -1).astype(bool) + self._net_accessibility = None + + else: + self._maps[entity_name][r, c] = np.maximum(0, val) + + if entity_name in self._blocked: + self._accessibility[ + self._accessibility_lookup[entity_name] + ] = np.repeat(np.array([val]) == 0, self.n_agents, axis=0) + self._net_accessibility = None + + def set_point_add(self, entity_name, r, c, value, **kwargs): + """Add value to the existing entity state at the specified coordinates.""" + self.set_point( + entity_name, + r, + c, + value + self.get_point(entity_name, r, c, **kwargs), + **kwargs + ) + + def is_accessible(self, r, c, agent_id): + """Return True if agent with id agent_id can occupy the location [r, c].""" + return bool(self.accessibility[agent_id, r, c]) + + def location_resources(self, r, c): + """Return {resource: health} dictionary for any resources at location [r, c].""" + return { + k: self._maps[k][r, c] for k in self._resources if self._maps[k][r, c] > 0 + } + + def location_landmarks(self, r, c): + """Return {landmark: health} dictionary for any landmarks at location [r, c].""" + tmp = {k: self.get_point(k, r, c) for k in self.keys()} + return {k: v for k, v in tmp.items() if k not in self._resources and v > 0} + + @property + def unoccupied(self): + """Return a boolean map indicating which locations are unoccupied.""" + return self._unoccupied + + @property + def accessibility(self): + """Return a boolean map indicating which locations are accessible.""" + if self._net_accessibility is None: + self._net_accessibility = self._accessibility.prod(axis=0).astype(bool) + return self._net_accessibility + + @property + def empty(self): + """Return a boolean map indicating which locations are empty. + + Empty locations have no landmarks or resources.""" + return self.state.sum(axis=0) == 0 + + @property + def state(self): + """Return the concatenated maps of landmark and resources.""" + return np.stack([self.get(k) for k in self.keys()]).astype(np.float32) + + @property + def owner_state(self): + """Return the concatenated ownership maps of private landmarks.""" + return np.stack( + [self.get(k, owner=True) for k in self._private_landmark_types] + ).astype(np.int16) + + @property + def state_dict(self): + """Return a dictionary of the map states.""" + return self._maps + + +class World: + """Manages the environment's spatial- and agent-states. + + The world object represents the state of the environment, minus whatever state + information is implicitly maintained by separate components. The world object + maintains the spatial state through an instance of the Maps class. Agent states + are maintained through instances of Agent classes (subclasses of BaseAgent), + with one such instance for each of the agents in the environment. + + The world object is built during the environment construction, after the + required entities have been registered. As part of the world object construction, + it instantiates a map object and the agent objects. + + The World class adds some functionality for interfacing with the spatial state + (the maps object) and setting/resetting agent locations. But its function is + mostly to wrap the stateful, non-component environment objects. + + Args: + world_size (list): A length-2 list specifying the dimensions of the 2D world. + Interpreted as [height, width]. + n_agents (int): The number of total agents (does not include planner). + agent_composition(dict): Dict of Agent Class names and amount + world_resources (list): The resources registered during environment + construction. + world_landmarks (list): The landmarks registered during environment + construction. + multi_action_mode_agents (bool): Whether "mobile" agents use multi action mode + (see BaseEnvironment in base_env.py). + multi_action_mode_planner (bool): Whether the planner agent uses multi action + mode (see BaseEnvironment in base_env.py). + """ + + def __init__( + self, + world_size, + agent_composition, + world_resources, + world_landmarks, + multi_action_mode_agents, + multi_action_mode_planner, + ): + self.world_size = world_size + + self.resources = world_resources + self.landmarks = world_landmarks + self.multi_action_mode_agents = bool(multi_action_mode_agents) + self.multi_action_mode_planner = bool(multi_action_mode_planner) + self._agent_class_idx_map={} + #create agents + self.agent_composition=agent_composition + self.n_agents=0 + self._agents = [] + for k,v in agent_composition: + self._agent_class_idx_map[k]=[] + for offset in range(v): + agent_class=agent_registry.get(k) + self._agents.append(agent_class(self.n_agents,multi_action_mode_agents=self.multi_action_mode_agents)) + self._agent_class_idx_map[k].append(self.n_agents) + self.n_agents+=1 + self.maps = Maps(world_size, self.n_agents, world_resources, world_landmarks) + + planner_class = agent_registry.get("BasicPlanner") + self._planner = planner_class(multi_action_mode=self.multi_action_mode_planner) + + self.timestep = 0 + + # CUDA-related attributes (for GPU simulations). + # These will be set via the env_wrapper, if required. + self.use_cuda = False + self.cuda_function_manager = None + self.cuda_data_manager = None + + @property + def agents(self): + """Return a list of the agent objects in the world (sorted by index).""" + return self._agents + + @property + def planner(self): + """Return the planner agent object.""" + return self._planner + + @property + def loc_map(self): + """Return a map indicating the agent index occupying each location. + + Locations with a value of -1 are not occupied by an agent. + """ + idx_map = -np.ones(shape=self.world_size, dtype=np.int16) + for agent in self.agents: + r, c = agent.loc + idx_map[r, c] = int(agent.idx) + return idx_map + + def get_random_order_agents(self): + """The agent list in a randomized order.""" + agent_order = np.random.permutation(self.n_agents) + agents = self.agents + return [agents[i] for i in agent_order] + + def get_agent_class(self,idx): + """Return class name of agent""" + return self.agents[idx].name + + def is_valid(self, r, c): + """Return True if the coordinates [r, c] are within the game boundaries.""" + return (0 <= r < self.world_size[0]) and (0 <= c < self.world_size[1]) + + def is_location_accessible(self, r, c, agent): + """Return True if location [r, c] is accessible to agent.""" + if not self.is_valid(r, c): + return False + return self.maps.is_accessible(r, c, agent.idx) + + def can_agent_occupy(self, r, c, agent): + """Return True if location [r, c] is accessible to agent and unoccupied.""" + if not self.is_location_accessible(r, c, agent): + return False + if self.maps.unoccupied[r, c]: + return True + return False + + def clear_agent_locs(self): + """Take all agents off the board. Useful for resetting.""" + for agent in self.agents: + agent.state["loc"] = [-1, -1] + self.maps.clear_agent_loc() + + def agent_locs_are_valid(self): + """Returns True if all agent locations comply with world semantics.""" + return all( + self.is_location_accessible(*agent.loc, agent) for agent in self.agents + ) + + def set_agent_loc(self, agent, r, c): + """Set the agent's location to coordinates [r, c] if possible. + + If agent cannot occupy [r, c], do nothing.""" + if self.can_agent_occupy(r, c, agent): + self.maps.set_agent_loc(agent, r, c) + return [int(coord) for coord in agent.loc] + + def location_resources(self, r, c): + """Return {resource: health} dictionary for any resources at location [r, c].""" + if not self.is_valid(r, c): + return {} + return self.maps.location_resources(r, c) + + def location_landmarks(self, r, c): + """Return {landmark: health} dictionary for any landmarks at location [r, c].""" + if not self.is_valid(r, c): + return {} + return self.maps.location_landmarks(r, c) + + def create_landmark(self, landmark_name, r, c, agent_idx=None): + """Place a landmark on the world map. + + Place landmark of type landmark_name at the given coordinates, indicating + agent ownership if applicable.""" + self.maps.set_point(landmark_name, r, c, 1, owner=agent_idx) + + def consume_resource(self, resource_name, r, c): + """Consume a unit of resource_name from location [r, c].""" + self.maps.set_point_add(resource_name, r, c, -1) diff --git a/ai_economist/foundation/components/__init__.py b/ai_economist/foundation/components/__init__.py new file mode 100644 index 0000000..eb00ee6 --- /dev/null +++ b/ai_economist/foundation/components/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +from ai_economist.foundation.base.base_component import component_registry + +from . import ( + build, + continuous_double_auction, + covid19_components, + move, + redistribution, + simple_labor, +) + +# Import files that add Component class(es) to component_registry +# --------------------------------------------------------------- diff --git a/ai_economist/foundation/components/build.py b/ai_economist/foundation/components/build.py new file mode 100644 index 0000000..6bd2a1f --- /dev/null +++ b/ai_economist/foundation/components/build.py @@ -0,0 +1,266 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import numpy as np + +from ai_economist.foundation.base.base_component import ( + BaseComponent, + component_registry, +) + + +@component_registry.add +class Build(BaseComponent): + """ + Allows mobile agents to build house landmarks in the world using stone and wood, + earning income. + + Can be configured to include heterogeneous building skill where agents earn + different levels of income when building. + + Args: + payment (int): Default amount of coin agents earn from building. + Must be >= 0. Default is 10. + payment_max_skill_multiplier (int): Maximum skill multiplier that an agent + can sample. Must be >= 1. Default is 1. + skill_dist (str): Distribution type for sampling skills. Default ("none") + gives all agents identical skill equal to a multiplier of 1. "pareto" and + "lognormal" sample skills from the associated distributions. + build_labor (float): Labor cost associated with building a house. + Must be >= 0. Default is 10. + """ + + name = "Build" + component_type = "Build" + required_entities = ["Wood", "Stone", "Coin", "House", "Labor"] + agent_subclasses = ["BasicMobileAgent"] + + def __init__( + self, + *base_component_args, + payment=10, + payment_max_skill_multiplier=1, + skill_dist="none", + build_labor=10.0, + **base_component_kwargs + ): + super().__init__(*base_component_args, **base_component_kwargs) + + self.payment = int(payment) + assert self.payment >= 0 + + self.payment_max_skill_multiplier = int(payment_max_skill_multiplier) + assert self.payment_max_skill_multiplier >= 1 + + self.resource_cost = {"Wood": 1, "Stone": 1} + + self.build_labor = float(build_labor) + assert self.build_labor >= 0 + + self.skill_dist = skill_dist.lower() + assert self.skill_dist in ["none", "pareto", "lognormal"] + + self.sampled_skills = {} + + self.builds = [] + + def agent_can_build(self, agent): + """Return True if agent can actually build in its current location.""" + # See if the agent has the resources necessary to complete the action + for resource, cost in self.resource_cost.items(): + if agent.state["inventory"][resource] < cost: + return False + + # Do nothing if this spot is already occupied by a landmark or resource + if self.world.location_resources(*agent.loc): + return False + if self.world.location_landmarks(*agent.loc): + return False + # If we made it here, the agent can build. + return True + + # Required methods for implementing components + # -------------------------------------------- + + def get_n_actions(self, agent_cls_name): + """ + See base_component.py for detailed description. + + Add a single action (build) for mobile agents. + """ + # This component adds 1 action that mobile agents can take: build a house + if agent_cls_name == "BasicMobileAgent": + return 1 + + return None + + def get_additional_state_fields(self, agent_cls_name): + """ + See base_component.py for detailed description. + + For mobile agents, add state fields for building skill. + """ + if agent_cls_name not in self.agent_subclasses: + return {} + if agent_cls_name == "BasicMobileAgent": + return {"build_payment": float(self.payment), "build_skill": 1} + raise NotImplementedError + + def component_step(self): + """ + See base_component.py for detailed description. + + Convert stone+wood to house+coin for agents that choose to build and can. + """ + world = self.world + build = [] + # Apply any building actions taken by the mobile agents + for agent in world.get_random_order_agents(): + + action = agent.get_component_action(self.name) + + # This component doesn't apply to this agent! + if action is None: + continue + + # NO-OP! + if action == 0: + pass + + # Build! (If you can.) + elif action == 1: + if self.agent_can_build(agent): + # Remove the resources + for resource, cost in self.resource_cost.items(): + agent.state["inventory"][resource] -= cost + + # Place a house where the agent is standing + loc_r, loc_c = agent.loc + world.create_landmark("House", loc_r, loc_c, agent.idx) + + # Receive payment for the house + agent.state["inventory"]["Coin"] += agent.state["build_payment"] + + # Incur the labor cost for building + agent.state["endogenous"]["Labor"] += self.build_labor + + build.append( + { + "builder": agent.idx, + "loc": np.array(agent.loc), + "income": float(agent.state["build_payment"]), + } + ) + + else: + raise ValueError + + self.builds.append(build) + + def generate_observations(self): + """ + See base_component.py for detailed description. + + Here, agents observe their build skill. The planner does not observe anything + from this component. + """ + + obs_dict = dict() + for agent in self.world.agents: + obs_dict[agent.idx] = { + "build_payment": agent.state["build_payment"] / self.payment, + "build_skill": self.sampled_skills[agent.idx], + } + + return obs_dict + + def generate_masks(self, completions=0): + """ + See base_component.py for detailed description. + + Prevent building only if a landmark already occupies the agent's location. + """ + + masks = {} + # Mobile agents' build action is masked if they cannot build with their + # current location and/or endowment + for agent in self.world.agents: + masks[agent.idx] = np.array([self.agent_can_build(agent)]) + + return masks + + # For non-required customization + # ------------------------------ + + def get_metrics(self): + """ + Metrics that capture what happened through this component. + + Returns: + metrics (dict): A dictionary of {"metric_name": metric_value}, + where metric_value is a scalar. + """ + world = self.world + + build_stats = {a.idx: {"n_builds": 0} for a in world.agents} + for builds in self.builds: + for build in builds: + idx = build["builder"] + build_stats[idx]["n_builds"] += 1 + + out_dict = {} + for a in world.agents: + for k, v in build_stats[a.idx].items(): + out_dict["{}/{}".format(a.idx, k)] = v + + num_houses = np.sum(world.maps.get("House") > 0) + out_dict["total_builds"] = num_houses + + return out_dict + + def additional_reset_steps(self): + """ + See base_component.py for detailed description. + + Re-sample agents' building skills. + """ + world = self.world + + self.sampled_skills = {agent.idx: 1 for agent in world.agents} + + PMSM = self.payment_max_skill_multiplier + + for agent in world.agents: + if self.skill_dist == "none": + sampled_skill = 1 + pay_rate = 1 + elif self.skill_dist == "pareto": + sampled_skill = np.random.pareto(4) + pay_rate = np.minimum(PMSM, (PMSM - 1) * sampled_skill + 1) + elif self.skill_dist == "lognormal": + sampled_skill = np.random.lognormal(-1, 0.5) + pay_rate = np.minimum(PMSM, (PMSM - 1) * sampled_skill + 1) + else: + raise NotImplementedError + + agent.state["build_payment"] = float(pay_rate * self.payment) + agent.state["build_skill"] = float(sampled_skill) + + self.sampled_skills[agent.idx] = sampled_skill + + self.builds = [] + + def get_dense_log(self): + """ + Log builds. + + Returns: + builds (list): A list of build events. Each entry corresponds to a single + timestep and contains a description of any builds that occurred on + that timestep. + + """ + return self.builds diff --git a/ai_economist/foundation/components/continuous_double_auction.py b/ai_economist/foundation/components/continuous_double_auction.py new file mode 100644 index 0000000..54bc3e2 --- /dev/null +++ b/ai_economist/foundation/components/continuous_double_auction.py @@ -0,0 +1,679 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import numpy as np + +from ai_economist.foundation.base.base_component import ( + BaseComponent, + component_registry, +) +from ai_economist.foundation.entities import resource_registry + + +@component_registry.add +class ContinuousDoubleAuction(BaseComponent): + """Allows mobile agents to buy/sell collectible resources with one another. + + Implements a commodity-exchange-style market where agents may sell a unit of + resource by submitting an ask (saying the minimum it will accept in payment) + or may buy a resource by submitting a bid (saying the maximum it will pay in + exchange for a unit of a given resource). + + Args: + max_bid_ask (int): Maximum amount of coin that an agent can bid or ask for. + Must be >= 1. Default is 10 coin. + order_labor (float): Amount of labor incurred when an agent creates an order. + Must be >= 0. Default is 0.25. + order_duration (int): Number of environment timesteps before an unfilled + bid/ask expires. Must be >= 1. Default is 50 timesteps. + max_num_orders (int, optional): Maximum number of bids + asks that an agent can + have open for a given resource. Must be >= 1. Default is no limit to + number of orders. + """ + + name = "ContinuousDoubleAuction" + component_type = "Trade" + required_entities = ["Coin", "Labor"] + agent_subclasses = ["BasicMobileAgent"] + + def __init__( + self, + *args, + max_bid_ask=10, + order_labor=0.25, + order_duration=50, + max_num_orders=None, + **kwargs + ): + super().__init__(*args, **kwargs) + + # The max amount (in coin) that an agent can bid/ask for 1 unit of a commodity + self.max_bid_ask = int(max_bid_ask) + assert self.max_bid_ask >= 1 + self.price_floor = 0 + self.price_ceiling = int(max_bid_ask) + + # The amount of time (in timesteps) that an order stays in the books + # before it expires + self.order_duration = int(order_duration) + assert self.order_duration >= 1 + + # The maximum number of bid+ask orders an agent can have open + # for each type of commodity + self.max_num_orders = int(max_num_orders or self.order_duration) + assert self.max_num_orders >= 1 + + # The labor cost associated with creating a bid or ask order + + self.order_labor = float(order_labor) + self.order_labor = max(self.order_labor, 0.0) + + # Each collectible resource in the world can be traded via this component + self.commodities = [ + r for r in self.world.resources if resource_registry.get(r).collectible + ] + + # These get reset at the start of an episode: + self.asks = {c: [] for c in self.commodities} + self.bids = {c: [] for c in self.commodities} + self.n_orders = { + c: {i: 0 for i in range(self.n_agents)} for c in self.commodities + } + self.executed_trades = [] + self.price_history = { + c: {i: self._price_zeros() for i in range(self.n_agents)} + for c in self.commodities + } + self.bid_hists = { + c: {i: self._price_zeros() for i in range(self.n_agents)} + for c in self.commodities + } + self.ask_hists = { + c: {i: self._price_zeros() for i in range(self.n_agents)} + for c in self.commodities + } + + # Convenience methods + # ------------------- + + def _price_zeros(self): + if 1 + self.price_ceiling - self.price_floor <= 0: + print("ERROR!", self.price_ceiling, self.price_floor) + + return np.zeros(1 + self.price_ceiling - self.price_floor) + + def available_asks(self, resource, agent): + """ + Get a histogram of asks for resource to which agent could bid against. + + Args: + resource (str): Name of the resource + agent (BasicMobileAgent or None): Object of agent for which available + asks are being queried. If None, all asks are considered available. + + Returns: + ask_hist (ndarray): For each possible price level, the number of + available asks. + """ + if agent is None: + a_idx = -1 + else: + a_idx = agent.idx + ask_hist = self._price_zeros() + for i, h in self.ask_hists[resource].items(): + if a_idx != i: + ask_hist += h + return ask_hist + + def available_bids(self, resource, agent): + """ + Get a histogram of bids for resource to which agent could ask against. + + Args: + resource (str): Name of the resource + agent (BasicMobileAgent or None): Object of agent for which available + bids are being queried. If None, all bids are considered available. + + Returns: + bid_hist (ndarray): For each possible price level, the number of + available bids. + """ + if agent is None: + a_idx = -1 + else: + a_idx = agent.idx + bid_hist = self._price_zeros() + for i, h in self.bid_hists[resource].items(): + if a_idx != i: + bid_hist += h + return bid_hist + + def can_bid(self, resource, agent): + """If agent can submit a bid for resource.""" + return self.n_orders[resource][agent.idx] < self.max_num_orders + + def can_ask(self, resource, agent): + """If agent can submit an ask for resource.""" + return ( + self.n_orders[resource][agent.idx] < self.max_num_orders + and agent.state["inventory"][resource] > 0 + ) + + # Core components for this market + # ------------------------------- + + def create_bid(self, resource, agent, max_payment): + """Create a new bid for resource, with agent offering max_payment. + + On a successful trade, payment will be at most max_payment, possibly less. + + The agent places the bid coin into escrow so that it may not be spent on + something else while the order exists. + """ + + # The agent is past the max number of orders + # or doesn't have enough money, do nothing + if (not self.can_bid(resource, agent)) or agent.state["inventory"][ + "Coin" + ] < max_payment: + return + + assert self.price_floor <= max_payment <= self.price_ceiling + + bid = {"buyer": agent.idx, "bid": int(max_payment), "bid_lifetime": 0} + + # Add this to the bid book + self.bids[resource].append(bid) + self.bid_hists[resource][bid["buyer"]][bid["bid"] - self.price_floor] += 1 + self.n_orders[resource][agent.idx] += 1 + + # Set aside whatever money the agent is willing to pay + # (will get excess back if price ends up being less) + _ = agent.inventory_to_escrow("Coin", int(max_payment)) + + # Incur the labor cost of creating an order + agent.state["endogenous"]["Labor"] += self.order_labor + + def create_ask(self, resource, agent, min_income): + """ + Create a new ask for resource, with agent asking for min_income. + + On a successful trade, income will be at least min_income, possibly more. + + The agent places one unit of resource into escrow so that it may not be used + for something else while the order exists. + """ + # The agent is past the max number of orders + # or doesn't the resource it's trying to sell, do nothing + if not self.can_ask(resource, agent): + return + + # is there an upper limit? + assert self.price_floor <= min_income <= self.price_ceiling + + ask = {"seller": agent.idx, "ask": int(min_income), "ask_lifetime": 0} + + # Add this to the ask book + self.asks[resource].append(ask) + self.ask_hists[resource][ask["seller"]][ask["ask"] - self.price_floor] += 1 + self.n_orders[resource][agent.idx] += 1 + + # Set aside the resource the agent is willing to sell + amount = agent.inventory_to_escrow(resource, 1) + assert amount == 1 + + # Incur the labor cost of creating an order + agent.state["endogenous"]["Labor"] += self.order_labor + + def match_orders(self): + """ + This implements the continuous double auction by identifying valid bid/ask + pairs and executing trades accordingly. + + Higher (lower) bids (asks) are given priority over lower (higher) bids (asks). + Trades are executed using the price of whichever bid/ask order was placed + first: bid price if bid was placed first, ask price otherwise. + + Trading removes the payment and resource from bidder's and asker's escrow, + respectively, and puts them in the other's inventory. + """ + self.executed_trades.append([]) + + for resource in self.commodities: + possible_match = [True for _ in range(self.n_agents)] + keep_checking = True + + bids = sorted( + self.bids[resource], + key=lambda b: (b["bid"], b["bid_lifetime"]), + reverse=True, + ) + asks = sorted( + self.asks[resource], key=lambda a: (a["ask"], -a["ask_lifetime"]) + ) + + while any(possible_match) and keep_checking: + idx_bid, idx_ask = 0, 0 + while True: + # Out of bids to check. Exit both loops. + if idx_bid >= len(bids): + keep_checking = False + break + + # Already know this buyer is no good for this round. + # Skip to next bid. + if not possible_match[bids[idx_bid]["buyer"]]: + idx_bid += 1 + + # Out of asks to check. This buyer won't find a match on this round. + # (maybe) Restart inner loop. + elif idx_ask >= len(asks): + possible_match[bids[idx_bid]["buyer"]] = False + break + + # Skip to next ask if this ask comes from the buyer + # of the current bid. + elif asks[idx_ask]["seller"] == bids[idx_bid]["buyer"]: + idx_ask += 1 + + # If this bid/ask pair can't be matched, this buyer + # can't be matched. (maybe) Restart inner loop. + elif bids[idx_bid]["bid"] < asks[idx_ask]["ask"]: + possible_match[bids[idx_bid]["buyer"]] = False + break + + # TRADE! (then restart inner loop) + else: + bid = bids.pop(idx_bid) + ask = asks.pop(idx_ask) + + trade = {"commodity": resource} + trade.update(bid) + trade.update(ask) + + if ( + bid["bid_lifetime"] <= ask["ask_lifetime"] + ): # Ask came earlier. (in other words, + # trade triggered by new bid) + trade["price"] = int(trade["ask"]) + else: # Bid came earlier. (in other words, + # trade triggered by new ask) + trade["price"] = int(trade["bid"]) + trade["cost"] = trade["price"] # What the buyer pays in total + trade["income"] = trade[ + "price" + ] # What the seller receives in total + + buyer = self.world.agents[trade["buyer"]] + seller = self.world.agents[trade["seller"]] + + # Bookkeeping + self.bid_hists[resource][bid["buyer"]][ + bid["bid"] - self.price_floor + ] -= 1 + self.ask_hists[resource][ask["seller"]][ + ask["ask"] - self.price_floor + ] -= 1 + self.n_orders[trade["commodity"]][seller.idx] -= 1 + self.n_orders[trade["commodity"]][buyer.idx] -= 1 + self.executed_trades[-1].append(trade) + self.price_history[resource][trade["seller"]][ + trade["price"] + ] += 1 + + # The resource goes from the seller's escrow + # to the buyer's inventory + seller.state["escrow"][resource] -= 1 + buyer.state["inventory"][resource] += 1 + + # Buyer's money (already set aside) leaves escrow + pre_payment = int(trade["bid"]) + buyer.state["escrow"]["Coin"] -= pre_payment + assert buyer.state["escrow"]["Coin"] >= 0 + + # Payment is removed from the pre_payment + # and given to the seller. Excess returned to buyer. + payment_to_seller = int(trade["price"]) + excess_payment_from_buyer = pre_payment - payment_to_seller + assert excess_payment_from_buyer >= 0 + seller.state["inventory"]["Coin"] += payment_to_seller + buyer.state["inventory"]["Coin"] += excess_payment_from_buyer + + # Restart the inner loop + break + + # Keep the unfilled bids/asks + self.bids[resource] = bids + self.asks[resource] = asks + + def remove_expired_orders(self): + """ + Increment the time counter for any unfilled bids/asks and remove expired + orders from the market. + + When orders expire, the payment or resource is removed from escrow and + returned to the inventory and the associated order is removed from the order + books. + """ + world = self.world + + for resource in self.commodities: + + bids_ = [] + for bid in self.bids[resource]: + bid["bid_lifetime"] += 1 + # If the bid is not expired, keep it in the bids + if bid["bid_lifetime"] <= self.order_duration: + bids_.append(bid) + # Otherwise, remove it and do the associated bookkeeping + else: + # Return the set aside money to the buyer + amount = world.agents[bid["buyer"]].escrow_to_inventory( + "Coin", bid["bid"] + ) + assert amount == bid["bid"] + # Adjust the bid histogram to reflect the removal of the bid + self.bid_hists[resource][bid["buyer"]][ + bid["bid"] - self.price_floor + ] -= 1 + # Adjust the order counter + self.n_orders[resource][bid["buyer"]] -= 1 + + asks_ = [] + for ask in self.asks[resource]: + ask["ask_lifetime"] += 1 + # If the ask is not expired, keep it in the asks + if ask["ask_lifetime"] <= self.order_duration: + asks_.append(ask) + # Otherwise, remove it and do the associated bookkeeping + else: + # Return the set aside resource to the seller + resource_unit = world.agents[ask["seller"]].escrow_to_inventory( + resource, 1 + ) + assert resource_unit == 1 + # Adjust the ask histogram to reflect the removal of the ask + self.ask_hists[resource][ask["seller"]][ + ask["ask"] - self.price_floor + ] -= 1 + # Adjust the order counter + self.n_orders[resource][ask["seller"]] -= 1 + + self.bids[resource] = bids_ + self.asks[resource] = asks_ + + # Required methods for implementing components + # -------------------------------------------- + + def get_n_actions(self, agent_cls_name): + """ + See base_component.py for detailed description. + + Adds 2*C action spaces [ (bid+ask) * n_commodities ], each with 1 + max_bid_ask + actions corresponding to price levels 0 to max_bid_ask. + """ + # This component adds 2*(1+max_bid_ask)*n_resources possible actions: + # buy/sell x each-price x each-resource + if agent_cls_name == "BasicMobileAgent": + trades = [] + for c in self.commodities: + trades.append( + ("Buy_{}".format(c), 1 + self.max_bid_ask) + ) # How much willing to pay for c + trades.append( + ("Sell_{}".format(c), 1 + self.max_bid_ask) + ) # How much need to receive to sell c + return trades + + return None + + def get_additional_state_fields(self, agent_cls_name): + """ + See base_component.py for detailed description. + """ + # This component doesn't add any state fields + return {} + + def component_step(self): + """ + See base_component.py for detailed description. + + Create new bids and asks, match and execute valid order pairs, and manage + order expiration. + """ + world = self.world + + for resource in self.commodities: + for agent in world.agents: + self.price_history[resource][agent.idx] *= 0.995 + + # Create bid action + # ----------------- + resource_action = agent.get_component_action( + self.name, "Buy_{}".format(resource) + ) + + # No-op + if resource_action == 0: + pass + + # Create a bid + elif resource_action <= self.max_bid_ask + 1: + self.create_bid(resource, agent, max_payment=resource_action - 1) + + else: + raise ValueError + + # Create ask action + # ----------------- + resource_action = agent.get_component_action( + self.name, "Sell_{}".format(resource) + ) + + # No-op + if resource_action == 0: + pass + + # Create an ask + elif resource_action <= self.max_bid_ask + 1: + self.create_ask(resource, agent, min_income=resource_action - 1) + + else: + raise ValueError + + # Here's where the magic happens: + self.match_orders() # Pair bids and asks + self.remove_expired_orders() # Get rid of orders that have expired + + def generate_observations(self): + """ + See base_component.py for detailed description. + + Here, agents and the planner both observe historical market behavior and + outstanding bids/asks for each tradable commodity. Agents only see the + outstanding bids/asks to which they could respond (that is, that they did not + submit). Agents also see their own outstanding bids/asks. + """ + world = self.world + + obs = {a.idx: {} for a in world.agents + [world.planner]} + + prices = np.arange(self.price_floor, self.price_ceiling + 1) + for c in self.commodities: + net_price_history = np.sum( + np.stack([self.price_history[c][i] for i in range(self.n_agents)]), + axis=0, + ) + market_rate = prices.dot(net_price_history) / np.maximum( + 0.001, np.sum(net_price_history) + ) + scaled_price_history = net_price_history * self.inv_scale + + full_asks = self.available_asks(c, agent=None) + full_bids = self.available_bids(c, agent=None) + + obs[world.planner.idx].update( + { + "market_rate-{}".format(c): market_rate, + "price_history-{}".format(c): scaled_price_history, + "full_asks-{}".format(c): full_asks, + "full_bids-{}".format(c): full_bids, + } + ) + + for _, agent in enumerate(world.agents): + # Private to the agent + obs[agent.idx].update( + { + "market_rate-{}".format(c): market_rate, + "price_history-{}".format(c): scaled_price_history, + "available_asks-{}".format(c): full_asks + - self.ask_hists[c][agent.idx], + "available_bids-{}".format(c): full_bids + - self.bid_hists[c][agent.idx], + "my_asks-{}".format(c): self.ask_hists[c][agent.idx], + "my_bids-{}".format(c): self.bid_hists[c][agent.idx], + } + ) + + return obs + + def generate_masks(self, completions=0): + """ + See base_component.py for detailed description. + + Agents cannot submit bids/asks for resources where they are at the order + limit. In addition, they may only submit asks for resources they possess and + bids for which they can pay. + """ + world = self.world + + masks = dict() + + for agent in world.agents: + masks[agent.idx] = {} + + can_pay = np.arange(self.max_bid_ask + 1) <= agent.inventory["Coin"] + + for resource in self.commodities: + if not self.can_ask(resource, agent): # asks_maxed: + masks[agent.idx]["Sell_{}".format(resource)] = np.zeros( + 1 + self.max_bid_ask + ) + else: + masks[agent.idx]["Sell_{}".format(resource)] = np.ones( + 1 + self.max_bid_ask + ) + + if not self.can_bid(resource, agent): + masks[agent.idx]["Buy_{}".format(resource)] = np.zeros( + 1 + self.max_bid_ask + ) + else: + masks[agent.idx]["Buy_{}".format(resource)] = can_pay.astype( + np.int32 + ) + + return masks + + # For non-required customization + # ------------------------------ + + def get_metrics(self): + """ + Metrics that capture what happened through this component. + + Returns: + metrics (dict): A dictionary of {"metric_name": metric_value}, + where metric_value is a scalar. + """ + world = self.world + + trade_keys = ["price", "cost", "income"] + + selling_stats = { + a.idx: { + c: {k: 0 for k in trade_keys + ["n_sales"]} for c in self.commodities + } + for a in world.agents + } + buying_stats = { + a.idx: { + c: {k: 0 for k in trade_keys + ["n_sales"]} for c in self.commodities + } + for a in world.agents + } + + n_trades = 0 + + for trades in self.executed_trades: + for trade in trades: + n_trades += 1 + i_s, i_b, c = trade["seller"], trade["buyer"], trade["commodity"] + selling_stats[i_s][c]["n_sales"] += 1 + buying_stats[i_b][c]["n_sales"] += 1 + for k in trade_keys: + selling_stats[i_s][c][k] += trade[k] + buying_stats[i_b][c][k] += trade[k] + + out_dict = {} + for a in world.agents: + for c in self.commodities: + for stats, prefix in zip( + [selling_stats, buying_stats], ["Sell", "Buy"] + ): + n = stats[a.idx][c]["n_sales"] + if n == 0: + for k in trade_keys: + stats[a.idx][c][k] = np.nan + else: + for k in trade_keys: + stats[a.idx][c][k] /= n + + for k, v in stats[a.idx][c].items(): + out_dict["{}/{}{}/{}".format(a.idx, prefix, c, k)] = v + + out_dict["n_trades"] = n_trades + + return out_dict + + def additional_reset_steps(self): + """ + See base_component.py for detailed description. + + Reset the order books. + """ + self.bids = {c: [] for c in self.commodities} + self.asks = {c: [] for c in self.commodities} + self.n_orders = { + c: {i: 0 for i in range(self.n_agents)} for c in self.commodities + } + + self.price_history = { + c: {i: self._price_zeros() for i in range(self.n_agents)} + for c in self.commodities + } + self.bid_hists = { + c: {i: self._price_zeros() for i in range(self.n_agents)} + for c in self.commodities + } + self.ask_hists = { + c: {i: self._price_zeros() for i in range(self.n_agents)} + for c in self.commodities + } + + self.executed_trades = [] + + def get_dense_log(self): + """ + Log executed trades. + + Returns: + trades (list): A list of trade events. Each entry corresponds to a single + timestep and contains a description of any trades that occurred on + that timestep. + """ + return self.executed_trades diff --git a/ai_economist/foundation/components/covid19_components.py b/ai_economist/foundation/components/covid19_components.py new file mode 100644 index 0000000..0a47c8c --- /dev/null +++ b/ai_economist/foundation/components/covid19_components.py @@ -0,0 +1,663 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +from datetime import datetime + +import GPUtil +import numpy as np + +from ai_economist.foundation.base.base_component import ( + BaseComponent, + component_registry, +) + +try: + num_gpus_available = len(GPUtil.getAvailable()) + print(f"Inside covid19_components.py: {num_gpus_available} GPUs are available.") + if num_gpus_available == 0: + print("No GPUs found! Running the simulation on a CPU.") + else: + from warp_drive.utils.constants import Constants + from warp_drive.utils.data_feed import DataFeed + + _OBSERVATIONS = Constants.OBSERVATIONS + _ACTIONS = Constants.ACTIONS +except ModuleNotFoundError: + print( + "Warning: The 'WarpDrive' package is not found and cannot be used! " + "If you wish to use WarpDrive, please run " + "'pip install rl-warp-drive' first." + ) +except ValueError: + print("No GPUs found! Running the simulation on a CPU.") + + +@component_registry.add +class ControlUSStateOpenCloseStatus(BaseComponent): + """ + Sets the open/close stringency levels for states. + Args: + n_stringency_levels (int): number of stringency levels the states can chose + from. (Must match the number in the model constants dictionary referenced by + the parent scenario.) + action_cooldown_period (int): action cooldown period in days. + Once a stringency level is set, the state(s) cannot switch to another level + for a certain number of days (referred to as the "action_cooldown_period") + """ + + name = "ControlUSStateOpenCloseStatus" + required_entities = [] + agent_subclasses = ["BasicMobileAgent"] + + def __init__( + self, + *base_component_args, + n_stringency_levels=10, + action_cooldown_period=28, + **base_component_kwargs, + ): + + self.action_cooldown_period = action_cooldown_period + super().__init__(*base_component_args, **base_component_kwargs) + self.np_int_dtype = np.int32 + + self.n_stringency_levels = int(n_stringency_levels) + assert self.n_stringency_levels >= 2 + self._checked_n_stringency_levels = False + + self.masks = dict() + self.default_agent_action_mask = [1 for _ in range(self.n_stringency_levels)] + self.no_op_agent_action_mask = [0 for _ in range(self.n_stringency_levels)] + self.masks["a"] = np.repeat( + np.array(self.no_op_agent_action_mask)[:, np.newaxis], + self.n_agents, + axis=-1, + ) + + # (This will be overwritten during reset; see below) + self.action_in_cooldown_until = None + + def get_additional_state_fields(self, agent_cls_name): + return {} + + def additional_reset_steps(self): + # Store the times when the next set of actions can be taken. + self.action_in_cooldown_until = np.array( + [self.world.timestep for _ in range(self.n_agents)] + ) + + def get_n_actions(self, agent_cls_name): + if agent_cls_name == "BasicMobileAgent": + return self.n_stringency_levels + return None + + def generate_masks(self, completions=0): + for agent in self.world.agents: + if self.world.use_real_world_policies: + self.masks["a"][:, agent.idx] = self.default_agent_action_mask + else: + if self.world.timestep < self.action_in_cooldown_until[agent.idx]: + # Keep masking the actions + self.masks["a"][:, agent.idx] = self.no_op_agent_action_mask + else: # self.world.timestep == self.action_in_cooldown_until[agent.idx] + # Cooldown period has ended; unmask the "subsequent" action + self.masks["a"][:, agent.idx] = self.default_agent_action_mask + return self.masks + + def get_data_dictionary(self): + """ + Create a dictionary of data to push to the GPU (device). + """ + data_dict = DataFeed() + data_dict.add_data( + name="action_cooldown_period", + data=self.action_cooldown_period, + ) + data_dict.add_data( + name="action_in_cooldown_until", + data=self.action_in_cooldown_until, + save_copy_and_apply_at_reset=True, + ) + data_dict.add_data( + name="num_stringency_levels", + data=self.n_stringency_levels, + ) + data_dict.add_data( + name="default_agent_action_mask", + data=[1] + self.default_agent_action_mask, + ) + data_dict.add_data( + name="no_op_agent_action_mask", + data=[1] + self.no_op_agent_action_mask, + ) + return data_dict + + def get_tensor_dictionary(self): + """ + Create a dictionary of (Pytorch-accessible) data to push to the GPU (device). + """ + tensor_dict = DataFeed() + return tensor_dict + + def component_step(self): + if self.world.use_cuda: + self.world.cuda_component_step[self.name]( + self.world.cuda_data_manager.device_data("stringency_level"), + self.world.cuda_data_manager.device_data("action_cooldown_period"), + self.world.cuda_data_manager.device_data("action_in_cooldown_until"), + self.world.cuda_data_manager.device_data("default_agent_action_mask"), + self.world.cuda_data_manager.device_data("no_op_agent_action_mask"), + self.world.cuda_data_manager.device_data("num_stringency_levels"), + self.world.cuda_data_manager.device_data(f"{_ACTIONS}_a"), + self.world.cuda_data_manager.device_data( + f"{_OBSERVATIONS}_a_{self.name}-agent_policy_indicators" + ), + self.world.cuda_data_manager.device_data( + f"{_OBSERVATIONS}_a_action_mask" + ), + self.world.cuda_data_manager.device_data( + f"{_OBSERVATIONS}_p_{self.name}-agent_policy_indicators" + ), + self.world.cuda_data_manager.device_data("_timestep_"), + self.world.cuda_data_manager.meta_info("n_agents"), + self.world.cuda_data_manager.meta_info("episode_length"), + block=self.world.cuda_function_manager.block, + grid=self.world.cuda_function_manager.grid, + ) + else: + if not self._checked_n_stringency_levels: + if self.n_stringency_levels != self.world.n_stringency_levels: + raise ValueError( + "The environment was not configured correctly. For the given " + "model fit, you need to set the number of stringency levels to " + "be {}".format(self.world.n_stringency_levels) + ) + self._checked_n_stringency_levels = True + + for agent in self.world.agents: + if self.world.use_real_world_policies: + # Use the action taken in the previous timestep + action = self.world.real_world_stringency_policy[ + self.world.timestep - 1, agent.idx + ] + else: + action = agent.get_component_action(self.name) + assert 0 <= action <= self.n_stringency_levels + + # We only update the stringency level if the action is not a NO-OP. + self.world.global_state["Stringency Level"][ + self.world.timestep, agent.idx + ] = ( + self.world.global_state["Stringency Level"][ + self.world.timestep - 1, agent.idx + ] + * (action == 0) + + action + ) + + agent.state[ + "Current Open Close Stringency Level" + ] = self.world.global_state["Stringency Level"][ + self.world.timestep, agent.idx + ] + + # Check if the action cooldown period has ended, and set the next + # time until action cooldown. If current action is a no-op + # (i.e., no new action was taken), the agent can take an action + # in the very next step, otherwise it needs to wait for + # self.action_cooldown_period steps. When in the action cooldown + # period, whatever actions the agents take are masked out, + # so it's always a NO-OP (see generate_masks() above) + # The logic below influences the action masks. + if self.world.timestep == self.action_in_cooldown_until[agent.idx] + 1: + if action == 0: # NO-OP + self.action_in_cooldown_until[agent.idx] += 1 + else: + self.action_in_cooldown_until[ + agent.idx + ] += self.action_cooldown_period + + def generate_observations(self): + + # Normalized observations + obs_dict = dict() + agent_policy_indicators = self.world.global_state["Stringency Level"][ + self.world.timestep + ] + obs_dict["a"] = { + "agent_policy_indicators": agent_policy_indicators + / self.n_stringency_levels + } + obs_dict[self.world.planner.idx] = { + "agent_policy_indicators": agent_policy_indicators + / self.n_stringency_levels + } + + return obs_dict + + +@component_registry.add +class FederalGovernmentSubsidy(BaseComponent): + """ + Args: + subsidy_interval (int): The number of days over which the total subsidy amount + is evenly rolled out. + Note: shortening the subsidy interval increases the total amount of money + that the planner could possibly spend. For instance, if the subsidy + interval is 30, the planner can create a subsidy every 30 days. + num_subsidy_levels (int): The number of subsidy levels. + Note: with max_annual_subsidy_per_person=10000, one round of subsidies at + the maximum subsidy level equals an expenditure of roughly $3.3 trillion + (given the US population of 330 million). + If the planner chooses the maximum subsidy amount, the $3.3 trillion + is rolled out gradually over the subsidy interval. + max_annual_subsidy_per_person (float): The maximum annual subsidy that may be + allocated per person. + """ + + name = "FederalGovernmentSubsidy" + required_entities = [] + agent_subclasses = ["BasicPlanner"] + + def __init__( + self, + *base_component_args, + subsidy_interval=90, + num_subsidy_levels=20, + max_annual_subsidy_per_person=20000, + **base_component_kwargs, + ): + self.subsidy_interval = int(subsidy_interval) + assert self.subsidy_interval >= 1 + + self.num_subsidy_levels = int(num_subsidy_levels) + assert self.num_subsidy_levels >= 1 + + self.max_annual_subsidy_per_person = float(max_annual_subsidy_per_person) + assert self.max_annual_subsidy_per_person >= 0 + + self.np_int_dtype = np.int32 + + # (This will be overwritten during component_step; see below) + self._subsidy_amount_per_level = None + self._subsidy_level_array = None + + super().__init__(*base_component_args, **base_component_kwargs) + + self.default_planner_action_mask = [1 for _ in range(self.num_subsidy_levels)] + self.no_op_planner_action_mask = [0 for _ in range(self.num_subsidy_levels)] + + # (This will be overwritten during reset; see below) + self.max_daily_subsidy_per_state = np.array( + self.n_agents, dtype=self.np_int_dtype + ) + + def get_additional_state_fields(self, agent_cls_name): + if agent_cls_name == "BasicPlanner": + return {"Total Subsidy": 0, "Current Subsidy Level": 0} + return {} + + def additional_reset_steps(self): + # Pre-compute maximum state-specific subsidy levels + self.max_daily_subsidy_per_state = ( + self.world.us_state_population * self.max_annual_subsidy_per_person / 365 + ) + + def get_n_actions(self, agent_cls_name): + if agent_cls_name == "BasicPlanner": + # Number of non-zero subsidy levels + # (the action 0 pertains to the no-subsidy case) + return self.num_subsidy_levels + return None + + def generate_masks(self, completions=0): + masks = {} + if self.world.use_real_world_policies: + masks[self.world.planner.idx] = self.default_planner_action_mask + else: + if self.world.timestep % self.subsidy_interval == 0: + masks[self.world.planner.idx] = self.default_planner_action_mask + else: + masks[self.world.planner.idx] = self.no_op_planner_action_mask + return masks + + def get_data_dictionary(self): + """ + Create a dictionary of data to push to the device + """ + data_dict = DataFeed() + data_dict.add_data( + name="subsidy_interval", + data=self.subsidy_interval, + ) + data_dict.add_data( + name="num_subsidy_levels", + data=self.num_subsidy_levels, + ) + data_dict.add_data( + name="max_daily_subsidy_per_state", + data=self.max_daily_subsidy_per_state, + ) + data_dict.add_data( + name="default_planner_action_mask", + data=[1] + self.default_planner_action_mask, + ) + data_dict.add_data( + name="no_op_planner_action_mask", + data=[1] + self.no_op_planner_action_mask, + ) + return data_dict + + def get_tensor_dictionary(self): + """ + Create a dictionary of (Pytorch-accessible) data to push to the device + """ + tensor_dict = DataFeed() + return tensor_dict + + def component_step(self): + if self.world.use_cuda: + self.world.cuda_component_step[self.name]( + self.world.cuda_data_manager.device_data("subsidy_level"), + self.world.cuda_data_manager.device_data("subsidy"), + self.world.cuda_data_manager.device_data("subsidy_interval"), + self.world.cuda_data_manager.device_data("num_subsidy_levels"), + self.world.cuda_data_manager.device_data("max_daily_subsidy_per_state"), + self.world.cuda_data_manager.device_data("default_planner_action_mask"), + self.world.cuda_data_manager.device_data("no_op_planner_action_mask"), + self.world.cuda_data_manager.device_data(f"{_ACTIONS}_p"), + self.world.cuda_data_manager.device_data( + f"{_OBSERVATIONS}_a_{self.name}-t_until_next_subsidy" + ), + self.world.cuda_data_manager.device_data( + f"{_OBSERVATIONS}_a_{self.name}-current_subsidy_level" + ), + self.world.cuda_data_manager.device_data( + f"{_OBSERVATIONS}_p_{self.name}-t_until_next_subsidy" + ), + self.world.cuda_data_manager.device_data( + f"{_OBSERVATIONS}_p_{self.name}-current_subsidy_level" + ), + self.world.cuda_data_manager.device_data( + f"{_OBSERVATIONS}_p_action_mask" + ), + self.world.cuda_data_manager.device_data("_timestep_"), + self.world.cuda_data_manager.meta_info("n_agents"), + self.world.cuda_data_manager.meta_info("episode_length"), + block=self.world.cuda_function_manager.block, + grid=self.world.cuda_function_manager.grid, + ) + else: + if self.world.use_real_world_policies: + if self._subsidy_amount_per_level is None: + self._subsidy_amount_per_level = ( + self.world.us_population + * self.max_annual_subsidy_per_person + / self.num_subsidy_levels + * self.subsidy_interval + / 365 + ) + self._subsidy_level_array = np.zeros((self._episode_length + 1)) + # Use the action taken in the previous timestep + current_subsidy_amount = self.world.real_world_subsidy[ + self.world.timestep - 1 + ] + if current_subsidy_amount > 0: + _subsidy_level = np.round( + (current_subsidy_amount / self._subsidy_amount_per_level) + ) + for t_idx in range( + self.world.timestep - 1, + min( + len(self._subsidy_level_array), + self.world.timestep - 1 + self.subsidy_interval, + ), + ): + self._subsidy_level_array[t_idx] += _subsidy_level + subsidy_level = self._subsidy_level_array[self.world.timestep - 1] + else: + # Update the subsidy level only every self.subsidy_interval, since the + # other actions are masked out. + if (self.world.timestep - 1) % self.subsidy_interval == 0: + subsidy_level = self.world.planner.get_component_action(self.name) + else: + subsidy_level = self.world.planner.state["Current Subsidy Level"] + + assert 0 <= subsidy_level <= self.num_subsidy_levels + self.world.planner.state["Current Subsidy Level"] = np.array( + subsidy_level + ).astype(self.np_int_dtype) + + # Update subsidy level + subsidy_level_frac = subsidy_level / self.num_subsidy_levels + daily_statewise_subsidy = ( + subsidy_level_frac * self.max_daily_subsidy_per_state + ) + + self.world.global_state["Subsidy"][ + self.world.timestep + ] = daily_statewise_subsidy + self.world.planner.state["Total Subsidy"] += np.sum(daily_statewise_subsidy) + + def generate_observations(self): + # Allow the agents/planner to know when the next subsidy might come. + # Obs should = 0 when the next timestep could include a subsidy + t_since_last_subsidy = self.world.timestep % self.subsidy_interval + # (this is normalized to 0<-->1) + t_until_next_subsidy = self.subsidy_interval - t_since_last_subsidy + t_vec = t_until_next_subsidy * np.ones(self.n_agents) + + current_subsidy_level = self.world.planner.state["Current Subsidy Level"] + sl_vec = current_subsidy_level * np.ones(self.n_agents) + + # Normalized observations + obs_dict = dict() + obs_dict["a"] = { + "t_until_next_subsidy": t_vec / self.subsidy_interval, + "current_subsidy_level": sl_vec / self.num_subsidy_levels, + } + obs_dict[self.world.planner.idx] = { + "t_until_next_subsidy": t_until_next_subsidy / self.subsidy_interval, + "current_subsidy_level": current_subsidy_level / self.num_subsidy_levels, + } + + return obs_dict + + +@component_registry.add +class VaccinationCampaign(BaseComponent): + """ + Implements a (passive) component for delivering vaccines to agents once a certain + amount of time has elapsed. + + Args: + daily_vaccines_per_million_people (int): The number of vaccines available per + million people everyday. + delivery_interval (int): The number of days between vaccine deliveries. + vaccine_delivery_start_date (string): The date (YYYY-MM-DD) when the + vaccination begins. + """ + + name = "VaccinationCampaign" + required_entities = [] + agent_subclasses = ["BasicMobileAgent"] + + def __init__( + self, + *base_component_args, + daily_vaccines_per_million_people=4500, + delivery_interval=1, + vaccine_delivery_start_date="2020-12-22", + observe_rate=False, + **base_component_kwargs, + ): + self.daily_vaccines_per_million_people = int(daily_vaccines_per_million_people) + assert 0 <= self.daily_vaccines_per_million_people <= 1e6 + + self.delivery_interval = int(delivery_interval) + assert 1 <= self.delivery_interval <= 5000 + + try: + self.vaccine_delivery_start_date = datetime.strptime( + vaccine_delivery_start_date, "%Y-%m-%d" + ) + except ValueError: + print("Incorrect data format, should be YYYY-MM-DD") + + # (This will be overwritten during component_step (see below)) + self._time_when_vaccine_delivery_begins = None + + self.np_int_dtype = np.int32 + + self.observe_rate = bool(observe_rate) + + super().__init__(*base_component_args, **base_component_kwargs) + + # (This will be overwritten during reset; see below) + self._num_vaccines_per_delivery = None + # Convenience for obs (see usage below): + self._t_first_delivery = None + + @property + def num_vaccines_per_delivery(self): + if self._num_vaccines_per_delivery is None: + # Pre-compute dispersal numbers + millions_of_residents = self.world.us_state_population / 1e6 + daily_vaccines = ( + millions_of_residents * self.daily_vaccines_per_million_people + ) + num_vaccines_per_delivery = np.floor( + self.delivery_interval * daily_vaccines + ) + self._num_vaccines_per_delivery = np.array( + num_vaccines_per_delivery, dtype=self.np_int_dtype + ) + return self._num_vaccines_per_delivery + + @property + def time_when_vaccine_delivery_begins(self): + if self._time_when_vaccine_delivery_begins is None: + self._time_when_vaccine_delivery_begins = ( + self.vaccine_delivery_start_date - self.world.start_date + ).days + return self._time_when_vaccine_delivery_begins + + def get_additional_state_fields(self, agent_cls_name): + if agent_cls_name == "BasicMobileAgent": + return {"Total Vaccinated": 0, "Vaccines Available": 0} + return {} + + def additional_reset_steps(self): + pass + + def get_n_actions(self, agent_cls_name): + return # Passive component + + def generate_masks(self, completions=0): + return {} # Passive component + + def get_data_dictionary(self): + """ + Create a dictionary of data to push to the device + """ + data_dict = DataFeed() + data_dict.add_data( + name="num_vaccines_per_delivery", + data=self.num_vaccines_per_delivery, + ) + data_dict.add_data( + name="delivery_interval", + data=self.delivery_interval, + ) + data_dict.add_data( + name="time_when_vaccine_delivery_begins", + data=self.time_when_vaccine_delivery_begins, + ) + data_dict.add_data( + name="num_vaccines_available_t", + data=np.zeros(self.n_agents), + save_copy_and_apply_at_reset=True, + ) + return data_dict + + def get_tensor_dictionary(self): + """ + Create a dictionary of (Pytorch-accessible) data to push to the device + """ + tensor_dict = DataFeed() + return tensor_dict + + def component_step(self): + if self.world.use_cuda: + self.world.cuda_component_step[self.name]( + self.world.cuda_data_manager.device_data("vaccinated"), + self.world.cuda_data_manager.device_data("num_vaccines_per_delivery"), + self.world.cuda_data_manager.device_data("num_vaccines_available_t"), + self.world.cuda_data_manager.device_data("delivery_interval"), + self.world.cuda_data_manager.device_data( + "time_when_vaccine_delivery_begins" + ), + self.world.cuda_data_manager.device_data( + f"{_OBSERVATIONS}_a_{self.name}-t_until_next_vaccines" + ), + self.world.cuda_data_manager.device_data( + f"{_OBSERVATIONS}_p_{self.name}-t_until_next_vaccines" + ), + self.world.cuda_data_manager.device_data("_timestep_"), + self.world.cuda_data_manager.meta_info("n_agents"), + self.world.cuda_data_manager.meta_info("episode_length"), + block=self.world.cuda_function_manager.block, + grid=self.world.cuda_function_manager.grid, + ) + else: + # Do nothing if vaccines are not available yet + if self.world.timestep < self.time_when_vaccine_delivery_begins: + return + + # Do nothing if this is not the start of a delivery interval. + # Vaccines are delivered at the start of each interval. + if (self.world.timestep % self.delivery_interval) != 0: + return + + # Deliver vaccines to each state + for aidx, vaccines in enumerate(self.num_vaccines_per_delivery): + self.world.agents[aidx].state["Vaccines Available"] += vaccines + + def generate_observations(self): + # Allow the agents/planner to know when the next vaccines might come. + # Obs should = 0 when the next timestep will deliver vaccines + # (this is normalized to 0<-->1) + + if self._t_first_delivery is None: + self._t_first_delivery = int(self.time_when_vaccine_delivery_begins) + while (self._t_first_delivery % self.delivery_interval) != 0: + self._t_first_delivery += 1 + + next_t = self.world.timestep + 1 + if next_t <= self._t_first_delivery: + t_until_next_vac = np.minimum( + 1, (self._t_first_delivery - next_t) / self.delivery_interval + ) + next_vax_rate = 0.0 + else: + t_since_last_vac = next_t % self.delivery_interval + t_until_next_vac = self.delivery_interval - t_since_last_vac + next_vax_rate = self.daily_vaccines_per_million_people / 1e6 + t_vec = t_until_next_vac * np.ones(self.n_agents) + r_vec = next_vax_rate * np.ones(self.n_agents) + + # Normalized observations + obs_dict = dict() + obs_dict["a"] = {"t_until_next_vaccines": t_vec / self.delivery_interval} + obs_dict[self.world.planner.idx] = { + "t_until_next_vaccines": t_until_next_vac / self.delivery_interval + } + + if self.observe_rate: + obs_dict["a"]["next_vaccination_rate"] = r_vec + obs_dict["p"]["next_vaccination_rate"] = float(next_vax_rate) + + return obs_dict diff --git a/ai_economist/foundation/components/covid19_components_step.cu b/ai_economist/foundation/components/covid19_components_step.cu new file mode 100644 index 0000000..ba982ed --- /dev/null +++ b/ai_economist/foundation/components/covid19_components_step.cu @@ -0,0 +1,263 @@ +// Copyright (c) 2021, salesforce.com, inc. +// All rights reserved. +// SPDX-License-Identifier: BSD-3-Clause +// For full license text, see the LICENSE file in the repo root +// or https://opensource.org/licenses/BSD-3-Clause + +extern "C" { + // CUDA version of the components in + // "ai_economist.foundation.components.covid19_components.py" + __global__ void CudaControlUSStateOpenCloseStatusStep( + int * stringency_level, + const int kActionCooldownPeriod, + int * action_in_cooldown_until, + const int * kDefaultAgentActionMask, + const int * kNoOpAgentActionMask, + const int kNumStringencyLevels, + int * actions, + float * obs_a_stringency_policy_indicators, + float * obs_a_action_mask, + float * obs_p_stringency_policy_indicators, + int * env_timestep_arr, + const int kNumAgents, + const int kEpisodeLength + ) { + const int kEnvId = blockIdx.x; + const int kAgentId = threadIdx.x; + + // Increment time ONCE -- only 1 thread can do this. + if (kAgentId == 0) { + env_timestep_arr[kEnvId] += 1; + } + + // Wait here until timestep has been updated + __syncthreads(); + + assert(env_timestep_arr[kEnvId] > 0 && + env_timestep_arr[kEnvId] <= kEpisodeLength); + assert (kAgentId <= kNumAgents - 1); + + // Update the stringency levels for the US states + if (kAgentId < (kNumAgents - 1)) { + // Indices for time-dependent and time-independent arrays + // Time dependent arrays have shapes + // (num_envs, kEpisodeLength + 1, kNumAgents - 1) + // Time independent arrays have shapes (num_envs, kNumAgents - 1) + const int kArrayIdxOffset = kEnvId * (kEpisodeLength + 1) * + (kNumAgents - 1); + int time_dependent_array_index_curr_t = kArrayIdxOffset + + env_timestep_arr[kEnvId] * (kNumAgents - 1) + kAgentId; + int time_dependent_array_index_prev_t = kArrayIdxOffset + + (env_timestep_arr[kEnvId] - 1) * (kNumAgents - 1) + kAgentId; + const int time_independent_array_index = kEnvId * (kNumAgents - 1) + + kAgentId; + + // action is not a NO-OP + if (actions[time_independent_array_index] != 0) { + stringency_level[time_dependent_array_index_curr_t] = + actions[time_independent_array_index]; + } else { + stringency_level[time_dependent_array_index_curr_t] = + stringency_level[time_dependent_array_index_prev_t]; + } + + if (env_timestep_arr[kEnvId] == action_in_cooldown_until[ + time_independent_array_index] + 1) { + if (actions[time_independent_array_index] != 0) { + assert(0 <= actions[time_independent_array_index] <= + kNumStringencyLevels); + action_in_cooldown_until[time_independent_array_index] += + kActionCooldownPeriod; + } else { + action_in_cooldown_until[time_independent_array_index] += 1; + } + } + + obs_a_stringency_policy_indicators[ + time_independent_array_index + ] = stringency_level[time_dependent_array_index_curr_t] / + static_cast(kNumStringencyLevels); + + // CUDA version of generate_masks() + for (int action_id = 0; action_id < (kNumStringencyLevels + 1); + action_id++) { + int action_mask_array_index = + kEnvId * (kNumStringencyLevels + 1) * + (kNumAgents - 1) + action_id * (kNumAgents - 1) + kAgentId; + if (env_timestep_arr[kEnvId] < action_in_cooldown_until[ + time_independent_array_index] + ) { + obs_a_action_mask[action_mask_array_index] = + kNoOpAgentActionMask[action_id]; + } else { + obs_a_action_mask[action_mask_array_index] = + kDefaultAgentActionMask[action_id]; + } + } + } + + // Update planner obs after all the agents' obs are updated + __syncthreads(); + + if (kAgentId == kNumAgents - 1) { + for (int ag_id = 0; ag_id < (kNumAgents - 1); ag_id++) { + const int kIndex = kEnvId * (kNumAgents - 1) + ag_id; + obs_p_stringency_policy_indicators[ + kIndex + ] = + obs_a_stringency_policy_indicators[ + kIndex + ]; + } + } + } + + __global__ void CudaFederalGovernmentSubsidyStep( + int * subsidy_level, + float * subsidy, + const int kSubsidyInterval, + const int kNumSubsidyLevels, + const float * KMaxDailySubsidyPerState, + const int * kDefaultPlannerActionMask, + const int * kNoOpPlannerActionMask, + int * actions, + float * obs_a_time_until_next_subsidy, + float * obs_a_current_subsidy_level, + float * obs_p_time_until_next_subsidy, + float * obs_p_current_subsidy_level, + float * obs_p_action_mask, + int * env_timestep_arr, + const int kNumAgents, + const int kEpisodeLength + ) { + const int kEnvId = blockIdx.x; + const int kAgentId = threadIdx.x; + + assert(env_timestep_arr[kEnvId] > 0 && + env_timestep_arr[kEnvId] <= kEpisodeLength); + assert (kAgentId <= kNumAgents - 1); + + int t_since_last_subsidy = env_timestep_arr[kEnvId] % + kSubsidyInterval; + + // Setting the (federal government) planner's subsidy level + // to be the subsidy level for all the US states + if (kAgentId < kNumAgents - 1) { + // Indices for time-dependent and time-independent arrays + // Time dependent arrays have shapes (num_envs, + // kEpisodeLength + 1, kNumAgents - 1) + // Time independent arrays have shapes (num_envs, kNumAgents - 1) + const int kArrayIdxOffset = kEnvId * (kEpisodeLength + 1) * + (kNumAgents - 1); + int time_dependent_array_index_curr_t = kArrayIdxOffset + + env_timestep_arr[kEnvId] * (kNumAgents - 1) + kAgentId; + int time_dependent_array_index_prev_t = kArrayIdxOffset + + (env_timestep_arr[kEnvId] - 1) * (kNumAgents - 1) + kAgentId; + const int time_independent_array_index = kEnvId * + (kNumAgents - 1) + kAgentId; + + if ((env_timestep_arr[kEnvId] - 1) % kSubsidyInterval == 0) { + assert(0 <= actions[kEnvId] <= kNumSubsidyLevels); + subsidy_level[time_dependent_array_index_curr_t] = + actions[kEnvId]; + } else { + subsidy_level[time_dependent_array_index_curr_t] = + subsidy_level[time_dependent_array_index_prev_t]; + } + // Setting the subsidies for the US states + // based on the federal government's subsidy level + subsidy[time_dependent_array_index_curr_t] = + subsidy_level[time_dependent_array_index_curr_t] * + KMaxDailySubsidyPerState[kAgentId] / kNumSubsidyLevels; + + obs_a_time_until_next_subsidy[ + time_independent_array_index] = + 1 - (t_since_last_subsidy / + static_cast(kSubsidyInterval)); + obs_a_current_subsidy_level[ + time_independent_array_index] = + subsidy_level[time_dependent_array_index_curr_t] / + static_cast(kNumSubsidyLevels); + } else if (kAgentId == (kNumAgents - 1)) { + for (int action_id = 0; action_id < kNumSubsidyLevels + 1; + action_id++) { + int action_mask_array_index = kEnvId * + (kNumSubsidyLevels + 1) + action_id; + if (env_timestep_arr[kEnvId] % kSubsidyInterval == 0) { + obs_p_action_mask[action_mask_array_index] = + kDefaultPlannerActionMask[action_id]; + } else { + obs_p_action_mask[action_mask_array_index] = + kNoOpPlannerActionMask[action_id]; + } + } + // Update planner obs after the agent's obs are updated + __syncthreads(); + + if (kAgentId == (kNumAgents - 1)) { + // Just use the values for agent id 0 + obs_p_time_until_next_subsidy[kEnvId] = + obs_a_time_until_next_subsidy[ + kEnvId * (kNumAgents - 1) + ]; + obs_p_current_subsidy_level[kEnvId] = + obs_a_current_subsidy_level[ + kEnvId * (kNumAgents - 1) + ]; + } + } + } + + __global__ void CudaVaccinationCampaignStep( + int * vaccinated, + const int * kNumVaccinesPerDelivery, + int * num_vaccines_available_t, + const int kDeliveryInterval, + const int kTimeWhenVaccineDeliveryBegins, + float * obs_a_vaccination_campaign_t_until_next_vaccines, + float * obs_p_vaccination_campaign_t_until_next_vaccines, + int * env_timestep_arr, + int kNumAgents, + int kEpisodeLength + ) { + const int kEnvId = blockIdx.x; + const int kAgentId = threadIdx.x; + + assert(env_timestep_arr[kEnvId] > 0 && env_timestep_arr[kEnvId] <= + kEpisodeLength); + assert(kTimeWhenVaccineDeliveryBegins > 0); + assert (kAgentId <= kNumAgents - 1); + + // CUDA version of generate observations() + int t_first_delivery = kTimeWhenVaccineDeliveryBegins + + kTimeWhenVaccineDeliveryBegins % kDeliveryInterval; + int next_t = env_timestep_arr[kEnvId] + 1; + float t_until_next_vac; + if (next_t <= t_first_delivery) { + t_until_next_vac = min( + 1, + (t_first_delivery - next_t) / kDeliveryInterval); + } else { + float t_since_last_vac = next_t % kDeliveryInterval; + t_until_next_vac = 1 - (t_since_last_vac / kDeliveryInterval); + } + + // Update the vaccinated numbers for just the US states + if (kAgentId < (kNumAgents - 1)) { + const int time_independent_array_index = kEnvId * + (kNumAgents - 1) + kAgentId; + if ((env_timestep_arr[kEnvId] >= kTimeWhenVaccineDeliveryBegins) && + (env_timestep_arr[kEnvId] % kDeliveryInterval == 0)) { + num_vaccines_available_t[time_independent_array_index] = + kNumVaccinesPerDelivery[kAgentId]; + } else { + num_vaccines_available_t[time_independent_array_index] = 0; + } + obs_a_vaccination_campaign_t_until_next_vaccines[ + time_independent_array_index] = t_until_next_vac; + } else if (kAgentId == kNumAgents - 1) { + obs_p_vaccination_campaign_t_until_next_vaccines[kEnvId] = + t_until_next_vac; + } + } +} diff --git a/ai_economist/foundation/components/move.py b/ai_economist/foundation/components/move.py new file mode 100644 index 0000000..89f9182 --- /dev/null +++ b/ai_economist/foundation/components/move.py @@ -0,0 +1,222 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import numpy as np +from numpy.random import rand + +from ai_economist.foundation.base.base_component import ( + BaseComponent, + component_registry, +) + + +@component_registry.add +class Gather(BaseComponent): + """ + Allows mobile agents to move around the world and collect resources and prevents + agents from moving to invalid locations. + + Can be configured to include collection skill, where agents have heterogeneous + probabilities of collecting bonus resources without additional labor cost. + + Args: + move_labor (float): Labor cost associated with movement. Must be >= 0. + Default is 1.0. + collect_labor (float): Labor cost associated with collecting resources. This + cost is added (in addition to any movement cost) when the agent lands on + a tile that is populated with resources (triggering collection). + Must be >= 0. Default is 1.0. + skill_dist (str): Distribution type for sampling skills. Default ("none") + gives all agents identical skill equal to a bonus prob of 0. "pareto" and + "lognormal" sample skills from the associated distributions. + """ + + name = "Gather" + required_entities = ["Coin", "House", "Labor"] + agent_subclasses = ["BasicMobileAgent"] + + def __init__( + self, + *base_component_args, + move_labor=1.0, + collect_labor=1.0, + skill_dist="none", + **base_component_kwargs + ): + super().__init__(*base_component_args, **base_component_kwargs) + + self.move_labor = float(move_labor) + assert self.move_labor >= 0 + + self.collect_labor = float(collect_labor) + assert self.collect_labor >= 0 + + self.skill_dist = skill_dist.lower() + assert self.skill_dist in ["none", "pareto", "lognormal"] + + self.gathers = [] + + self._aidx = np.arange(self.n_agents)[:, None].repeat(4, axis=1) + self._roff = np.array([[0, 0, -1, 1]]) + self._coff = np.array([[-1, 1, 0, 0]]) + + # Required methods for implementing components + # -------------------------------------------- + + def get_n_actions(self, agent_cls_name): + """ + See base_component.py for detailed description. + + Adds 4 actions (move up, down, left, or right) for mobile agents. + """ + # This component adds 4 action that agents can take: + # move up, down, left, or right + if agent_cls_name == "BasicMobileAgent": + return 4 + return None + + def get_additional_state_fields(self, agent_cls_name): + """ + See base_component.py for detailed description. + + For mobile agents, add state field for collection skill. + """ + if agent_cls_name not in self.agent_subclasses: + return {} + if agent_cls_name == "BasicMobileAgent": + return {"bonus_gather_prob": 0.0} + raise NotImplementedError + + def component_step(self): + """ + See base_component.py for detailed description. + + Move to adjacent, unoccupied locations. Collect resources when moving to + populated resource tiles, adding the resource to the agent's inventory and + de-populating it from the tile. + """ + world = self.world + + gathers = [] + for agent in world.get_random_order_agents(): + + if self.name not in agent.action: + return + action = agent.get_component_action(self.name) + + r, c = [int(x) for x in agent.loc] + + if action == 0: # NO-OP! + new_r, new_c = r, c + + elif action <= 4: + if action == 1: # Left + new_r, new_c = r, c - 1 + elif action == 2: # Right + new_r, new_c = r, c + 1 + elif action == 3: # Up + new_r, new_c = r - 1, c + else: # action == 4, # Down + new_r, new_c = r + 1, c + + # Attempt to move the agent (if the new coordinates aren't accessible, + # nothing will happen) + new_r, new_c = world.set_agent_loc(agent, new_r, new_c) + + # If the agent did move, incur the labor cost of moving + if (new_r != r) or (new_c != c): + agent.state["endogenous"]["Labor"] += self.move_labor + + else: + raise ValueError + + for resource, health in world.location_resources(new_r, new_c).items(): + if health >= 1: + n_gathered = 1 + (rand() < agent.state["bonus_gather_prob"]) + agent.state["inventory"][resource] += n_gathered + world.consume_resource(resource, new_r, new_c) + # Incur the labor cost of collecting a resource + agent.state["endogenous"]["Labor"] += self.collect_labor + # Log the gather + gathers.append( + dict( + agent=agent.idx, + resource=resource, + n=n_gathered, + loc=[new_r, new_c], + ) + ) + + self.gathers.append(gathers) + + def generate_observations(self): + """ + See base_component.py for detailed description. + + Here, agents observe their collection skill. The planner does not observe + anything from this component. + """ + return { + str(agent.idx): {"bonus_gather_prob": agent.state["bonus_gather_prob"]} + for agent in self.world.agents + } + + def generate_masks(self, completions=0): + """ + See base_component.py for detailed description. + + Prevent moving to adjacent tiles that are already occupied (or outside the + boundaries of the world) + """ + world = self.world + + coords = np.array([agent.loc for agent in world.agents])[:, :, None] + ris = coords[:, 0] + self._roff + 1 + cis = coords[:, 1] + self._coff + 1 + + occ = np.pad(world.maps.unoccupied, ((1, 1), (1, 1))) + acc = np.pad(world.maps.accessibility, ((0, 0), (1, 1), (1, 1))) + mask_array = np.logical_and(occ[ris, cis], acc[self._aidx, ris, cis]).astype( + np.float32 + ) + + masks = {agent.idx: mask_array[i] for i, agent in enumerate(world.agents)} + + return masks + + # For non-required customization + # ------------------------------ + + def additional_reset_steps(self): + """ + See base_component.py for detailed description. + + Re-sample agents' collection skills. + """ + for agent in self.world.agents: + if self.skill_dist == "none": + bonus_rate = 0.0 + elif self.skill_dist == "pareto": + bonus_rate = np.minimum(2, np.random.pareto(3)) / 2 + elif self.skill_dist == "lognormal": + bonus_rate = np.minimum(2, np.random.lognormal(-2.022, 0.938)) / 2 + else: + raise NotImplementedError + agent.state["bonus_gather_prob"] = float(bonus_rate) + + self.gathers = [] + + def get_dense_log(self): + """ + Log resource collections. + + Returns: + gathers (list): A list of gather events. Each entry corresponds to a single + timestep and contains a description of any resource gathers that + occurred on that timestep. + + """ + return self.gathers diff --git a/ai_economist/foundation/components/redistribution.py b/ai_economist/foundation/components/redistribution.py new file mode 100644 index 0000000..d2177ab --- /dev/null +++ b/ai_economist/foundation/components/redistribution.py @@ -0,0 +1,1202 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +from copy import deepcopy + +import numpy as np + +from ai_economist.foundation.base.base_component import ( + BaseComponent, + component_registry, +) +from ai_economist.foundation.components.utils import ( + annealed_tax_limit, + annealed_tax_mask, +) + + +@component_registry.add +class WealthRedistribution(BaseComponent): + """Redistributes the total coin of the mobile agents as evenly as possible. + + Note: + If this component is used, it should always be the last component in the order! + """ + + name = "WealthRedistribution" + required_entities = ["Coin"] + agent_subclasses = ["BasicMobileAgent"] + + """ + Required methods for implementing components + -------------------------------------------- + """ + + def get_n_actions(self, agent_cls_name): + """This component is passive: it does not add any actions.""" + return + + def get_additional_state_fields(self, agent_cls_name): + """This component does not add any state fields.""" + return {} + + def component_step(self): + """ + See base_component.py for detailed description. + + Redistributes inventory coins so that all agents have equal coin endowment. + """ + world = self.world + + # Divide coins evenly + ic = np.array([agent.state["inventory"]["Coin"] for agent in world.agents]) + ec = np.array([agent.state["escrow"]["Coin"] for agent in world.agents]) + tc = np.sum(ic + ec) + target_share = tc / self.n_agents + for agent in world.agents: + agent.state["inventory"]["Coin"] = float(target_share - ec[agent.idx]) + + ic = np.array([agent.state["inventory"]["Coin"] for agent in world.agents]) + ec = np.array([agent.state["escrow"]["Coin"] for agent in world.agents]) + tc_next = np.sum(ic + ec) + assert np.abs(tc - tc_next) < 1 + + def generate_observations(self): + """This component does not add any observations.""" + obs = {} + return obs + + def generate_masks(self, completions=0): + """Passive component. Masks are empty.""" + masks = {} + return masks + + +@component_registry.add +class PeriodicBracketTax(BaseComponent): + """Periodically collect income taxes from agents and do lump-sum redistribution. + + Note: + If this component is used, it should always be the last component in the order! + + Args: + disable_taxes (bool): Whether to disable any tax collection, effectively + enforcing that tax rates are always 0. Useful for removing taxes without + changing the observation space. Default is False (taxes enabled). + tax_model (str): Which tax model to use for setting taxes. + "model_wrapper" (default) uses the actions of the planner agent; + "saez" uses an adaptation of the theoretical optimal taxation formula + derived in https://www.nber.org/papers/w7628. + "us-federal-single-filer-2018-scaled" uses US federal tax rates from 2018; + "fixed-bracket-rates" uses the rates supplied in fixed_bracket_rates. + period (int): Length of a tax period in environment timesteps. Taxes are + updated at the start of each period and collected/redistributed at the + end of each period. Must be > 0. Default is 100 timesteps. + rate_min (float): Minimum tax rate within a bracket. Must be >= 0 (default). + rate_max (float): Maximum tax rate within a bracket. Must be <= 1 (default). + rate_disc (float): (Only applies for "model_wrapper") the interval separating + discrete tax rates that the planner can select. Default of 0.05 means, + for example, the planner can select among [0.0, 0.05, 0.10, ... 1.0]. + Must be > 0 and < 1. + n_brackets (int): How many tax brackets to use. Must be >=2. Default is 5. + top_bracket_cutoff (float): The income at the left end of the last tax + bracket. Must be >= 10. Default is 100 coin. + usd_scaling (float): Scale by which to divide the US Federal bracket cutoffs + when using bracket_spacing = "us-federal". Must be > 0. Default is 1000. + bracket_spacing (str): How bracket cutoffs should be spaced. + "us-federal" (default) uses scaled cutoffs from the 2018 US Federal + taxes, with scaling set by usd_scaling (ignores n_brackets and + top_bracket_cutoff); + "linear" linearly spaces the n_bracket cutoffs between 0 and + top_bracket_cutoff; + "log" is similar to "linear" but with logarithmic spacing. + fixed_bracket_rates (list): Required if tax_model=="fixed-bracket-rates". A + list of fixed marginal rates to use for each bracket. Length must be + equal to the number of brackets (7 for "us-federal" spacing, n_brackets + otherwise). + pareto_weight_type (str): Type of Pareto weights to use when computing tax + rates using the Saez formula. "inverse_income" (default) uses 1/z; + "uniform" uses 1. + saez_fixed_elas (float, optional): If supplied, this value will be used as + the elasticity estimate when computing tax rates using the Saez formula. + If not given (default), elasticity will be estimated empirically. + tax_annealing_schedule (list, optional): A length-2 list of + [tax_annealing_warmup, tax_annealing_slope] describing the tax annealing + schedule. See annealed_tax_mask function for details. Default behavior is + no tax annealing. + """ + + name = "PeriodicBracketTax" + component_type = "PeriodicTax" + required_entities = ["Coin"] + agent_subclasses = ["BasicMobileAgent", "BasicPlanner"] + + def __init__( + self, + *base_component_args, + disable_taxes=False, + tax_model="model_wrapper", + period=100, + rate_min=0.0, + rate_max=1.0, + rate_disc=0.05, + n_brackets=5, + top_bracket_cutoff=100, + usd_scaling=1000.0, + bracket_spacing="us-federal", + fixed_bracket_rates=None, + pareto_weight_type="inverse_income", + saez_fixed_elas=None, + tax_annealing_schedule=None, + **base_component_kwargs + ): + super().__init__(*base_component_args, **base_component_kwargs) + + # Whether to turn off taxes. Disabling taxes will prevent any taxes from + # being collected but the observation space will be the same as if taxes were + # enabled, which can be useful for controlled tax/no-tax comparisons. + self.disable_taxes = bool(disable_taxes) + + # How to set taxes. + self.tax_model = tax_model + assert self.tax_model in [ + "model_wrapper", + "us-federal-single-filer-2018-scaled", + "saez", + "fixed-bracket-rates", + ] + + # How many timesteps a tax period lasts. + self.period = int(period) + assert self.period > 0 + + # Minimum marginal bracket rate + self.rate_min = 0.0 if self.disable_taxes else float(rate_min) + # Maximum marginal bracket rate + self.rate_max = 0.0 if self.disable_taxes else float(rate_max) + assert 0 <= self.rate_min <= self.rate_max <= 1.0 + + # Interval for discretizing tax rate options + # (only applies if tax_model == "model_wrapper"). + self.rate_disc = float(rate_disc) + + self.use_discretized_rates = self.tax_model == "model_wrapper" + + if self.use_discretized_rates: + self.disc_rates = np.arange( + self.rate_min, self.rate_max + self.rate_disc, self.rate_disc + ) + self.disc_rates = self.disc_rates[self.disc_rates <= self.rate_max] + assert len(self.disc_rates) > 1 or self.disable_taxes + self.n_disc_rates = len(self.disc_rates) + else: + self.disc_rates = None + self.n_disc_rates = 0 + + # === income bracket definitions === + self.n_brackets = int(n_brackets) + assert self.n_brackets >= 2 + + self.top_bracket_cutoff = float(top_bracket_cutoff) + assert self.top_bracket_cutoff >= 10 + + self.usd_scale = float(usd_scaling) + assert self.usd_scale > 0 + + self.bracket_spacing = bracket_spacing.lower() + assert self.bracket_spacing in ["linear", "log", "us-federal"] + + if self.bracket_spacing == "linear": + self.bracket_cutoffs = np.linspace( + 0, self.top_bracket_cutoff, self.n_brackets + ) + + elif self.bracket_spacing == "log": + b0_max = self.top_bracket_cutoff / (2 ** (self.n_brackets - 2)) + self.bracket_cutoffs = np.concatenate( + [ + [0], + 2 + ** np.linspace( + np.log2(b0_max), + np.log2(self.top_bracket_cutoff), + n_brackets - 1, + ), + ] + ) + elif self.bracket_spacing == "us-federal": + self.bracket_cutoffs = ( + np.array([0, 9700, 39475, 84200, 160725, 204100, 510300]) + / self.usd_scale + ) + self.n_brackets = len(self.bracket_cutoffs) + self.top_bracket_cutoff = float(self.bracket_cutoffs[-1]) + else: + raise NotImplementedError + + self.bracket_edges = np.concatenate([self.bracket_cutoffs, [np.inf]]) + self.bracket_sizes = self.bracket_edges[1:] - self.bracket_edges[:-1] + + assert self.bracket_cutoffs[0] == 0 + + if self.tax_model == "us-federal-single-filer-2018-scaled": + assert self.bracket_spacing == "us-federal" + + if self.tax_model == "fixed-bracket-rates": + assert isinstance(fixed_bracket_rates, (tuple, list)) + assert np.min(fixed_bracket_rates) >= 0 + assert np.max(fixed_bracket_rates) <= 1 + assert len(fixed_bracket_rates) == self.n_brackets + self._fixed_bracket_rates = np.array(fixed_bracket_rates) + else: + self._fixed_bracket_rates = None + + # === bracket tax rates === + self.curr_bracket_tax_rates = np.zeros_like(self.bracket_cutoffs) + self.curr_rate_indices = [0 for _ in range(self.n_brackets)] + + # === Pareto weights, elasticity === + self.pareto_weight_type = pareto_weight_type + self.elas_tm1 = 0.5 + self.elas_t = 0.5 + self.log_z0_tm1 = 0 + self.log_z0_t = 0 + + self._saez_fixed_elas = saez_fixed_elas + if self._saez_fixed_elas is not None: + self._saez_fixed_elas = float(self._saez_fixed_elas) + assert self._saez_fixed_elas >= 0 + + # Size of the local buffer. In a distributed context, the global buffer size + # will be capped at n_replicas * _buffer_size. + # NOTE: Saez will use random taxes until it has self._buffer_size samples. + self._buffer_size = 500 + self._reached_min_samples = False + self._additions_this_episode = 0 + # Local buffer maintained by this replica. + self._local_saez_buffer = [] + # "Global" buffer obtained by combining local buffers of individual replicas. + self._global_saez_buffer = [] + + self._saez_n_estimation_bins = 100 + self._saez_top_rate_cutoff = self.bracket_cutoffs[-1] + self._saez_income_bin_edges = np.linspace( + 0, self._saez_top_rate_cutoff, self._saez_n_estimation_bins + 1 + ) + self._saez_income_bin_sizes = np.concatenate( + [ + self._saez_income_bin_edges[1:] - self._saez_income_bin_edges[:-1], + [np.inf], + ] + ) + self.running_avg_tax_rates = np.zeros_like(self.curr_bracket_tax_rates) + + # === tax cycle definitions === + self.tax_cycle_pos = 1 + self.last_coin = [0 for _ in range(self.n_agents)] + self.last_income = [0 for _ in range(self.n_agents)] + self.last_marginal_rate = [0 for _ in range(self.n_agents)] + self.last_effective_tax_rate = [0 for _ in range(self.n_agents)] + + # === trackers === + self.total_collected_taxes = 0 + self.all_effective_tax_rates = [] + self._schedules = {"{:03d}".format(int(r)): [0] for r in self.bracket_cutoffs} + self._occupancy = {"{:03d}".format(int(r)): 0 for r in self.bracket_cutoffs} + self.taxes = [] + + # === tax annealing === + # for annealing of non-planner max taxes. + self._annealed_rate_max = float(self.rate_max) + self._last_completions = 0 + + # for annealing of planner actions. + self.tax_annealing_schedule = tax_annealing_schedule + if tax_annealing_schedule is not None: + assert isinstance(self.tax_annealing_schedule, (tuple, list)) + self._annealing_warmup = self.tax_annealing_schedule[0] + self._annealing_slope = self.tax_annealing_schedule[1] + self._annealed_rate_max = annealed_tax_limit( + self._last_completions, + self._annealing_warmup, + self._annealing_slope, + self.rate_max, + ) + else: + self._annealing_warmup = None + self._annealing_slope = None + + if self.tax_model == "model_wrapper" and not self.disable_taxes: + planner_action_tuples = self.get_n_actions("BasicPlanner") + self._planner_tax_val_dict = { + k: self.disc_rates for k, v in planner_action_tuples + } + else: + self._planner_tax_val_dict = {} + self._planner_masks = None + + # === placeholders === + self._curr_rates_obs = np.array(self.curr_marginal_rates) + self._last_income_obs = np.array(self.last_income) / self.period + self._last_income_obs_sorted = self._last_income_obs[ + np.argsort(self._last_income_obs) + ] + + # Methods for getting/setting marginal tax rates + # ---------------------------------------------- + + # ------- US Federal taxes + @property + def us_federal_single_filer_2018_scaled(self): + """ + https://turbotax.intuit.com/tax-tips/irs-tax-return/current-federal-tax-rate-schedules/L7Bjs1EAD + If taxable income is over— + but not over— + the tax is: + $0 + $9,700 + 10% of the amount over $0 + $9,700 + $39,475 + $970 plus 12% of the amount over $9,700 + $39,475 + $84,200 + $4,543 plus 22% of the amount over $39,475 + $84,200 + $160,725 + $14,382 plus 24% of the amount over $84,200 + $160,725 + $204,100 + $32,748 plus 32% of the amount over $160,725 + $204,100 + $510,300 + $46,628 plus 35% of the amount over $204,100 + $510,300 + no limit + $153,798 plus 37% of the amount over $510,300 + """ + return [0.1, 0.12, 0.22, 0.24, 0.32, 0.35, 0.37] + + # ------- fixed-bracket-rates + @property + def fixed_bracket_rates(self): + """Return whatever fixed bracket rates were set during initialization.""" + return self._fixed_bracket_rates + + @property + def curr_rate_max(self): + """Maximum allowable tax rate, given current progress of any tax annealing.""" + if self.tax_annealing_schedule is None: + return self.rate_max + return self._annealed_rate_max + + @property + def curr_marginal_rates(self): + """The current set of marginal tax bracket rates.""" + if self.use_discretized_rates: + return self.disc_rates[self.curr_rate_indices] + + if self.tax_model == "us-federal-single-filer-2018-scaled": + marginal_tax_bracket_rates = np.minimum( + np.array(self.us_federal_single_filer_2018_scaled), self.curr_rate_max + ) + elif self.tax_model == "saez": + marginal_tax_bracket_rates = np.minimum( + self.curr_bracket_tax_rates, self.curr_rate_max + ) + elif self.tax_model == "fixed-bracket-rates": + marginal_tax_bracket_rates = np.minimum( + np.array(self.fixed_bracket_rates), self.curr_rate_max + ) + else: + raise NotImplementedError + + return marginal_tax_bracket_rates + + def set_new_period_rates_model(self): + """Update taxes using actions from the tax model.""" + if self.disable_taxes: + return + + # AI version + for i, bracket in enumerate(self.bracket_cutoffs): + planner_action = self.world.planner.get_component_action( + self.name, "TaxIndexBracket_{:03d}".format(int(bracket)) + ) + if planner_action == 0: + pass + elif planner_action <= self.n_disc_rates: + self.curr_rate_indices[i] = int(planner_action - 1) + else: + raise ValueError + + # ------- Saez formula + def compute_and_set_new_period_rates_from_saez_formula( + self, update_elas_tm1=True, update_log_z0_tm1=True + ): + """Estimates/sets optimal rates using adaptation of Saez formula + + See: https://www.nber.org/papers/w7628 + """ + # Until we reach the min sample number, keep checking if we have reached it. + if not self._reached_min_samples: + # Note: self.saez_buffer includes the global buffer (if applicable). + if len(self.saez_buffer) >= self._buffer_size: + self._reached_min_samples = True + + # If no enough samples, use random taxes. + if not self._reached_min_samples: + self.curr_bracket_tax_rates = np.random.uniform( + low=self.rate_min, + high=self.curr_rate_max, + size=self.curr_bracket_tax_rates.shape, + ) + return + + incomes_and_marginal_rates = np.array(self.saez_buffer) + + # Elasticity assumed constant for all incomes. + # (Run this for the sake of tracking the estimate; will not actually use the + # estimate if using fixed elasticity). + if update_elas_tm1: + self.elas_tm1 = float(self.elas_t) + if update_log_z0_tm1: + self.log_z0_tm1 = float(self.log_z0_t) + + elas_t, log_z0_t = self.estimate_uniform_income_elasticity( + incomes_and_marginal_rates, + elas_df=0.98, + elas_tm1=self.elas_tm1, + log_z0_tm1=self.log_z0_tm1, + verbose=False, + ) + + if update_elas_tm1: + self.elas_t = float(elas_t) + if update_log_z0_tm1: + self.log_z0_t = float(log_z0_t) + + # If a fixed estimate has been specified, use it in the formulas below. + if self._saez_fixed_elas is not None: + elas_t = float(self._saez_fixed_elas) + + # Get Saez parameters at each income bin + # to compute a marginal tax rate schedule. + binned_gzs, binned_azs = self.get_binned_saez_welfare_weight_and_pareto_params( + population_incomes=incomes_and_marginal_rates[:, 0] + ) + + # Use the elasticity to compute this binned schedule using the Saez formula. + binned_marginal_tax_rates = self.get_saez_marginal_rates( + binned_gzs, binned_azs, elas_t + ) + + # Adapt the saez tax schedule to the tax brackets. + self.curr_bracket_tax_rates = np.clip( + self.bracketize_schedule( + bin_marginal_rates=binned_marginal_tax_rates, + bin_edges=self._saez_income_bin_edges, + bin_sizes=self._saez_income_bin_sizes, + ), + self.rate_min, + self.curr_rate_max, + ) + + self.running_avg_tax_rates = (self.running_avg_tax_rates * 0.99) + ( + self.curr_bracket_tax_rates * 0.01 + ) + + # Implementation of the Saez formula in this periodic, bracketed setting + # ---------------------------------------------------------------------- + @property + def saez_buffer(self): + if not self._global_saez_buffer: + saez_buffer = self._local_saez_buffer + elif self._additions_this_episode == 0: + saez_buffer = self._global_saez_buffer + else: + saez_buffer = ( + self._global_saez_buffer + + self._local_saez_buffer[-self._additions_this_episode :] + ) + return saez_buffer + + def get_local_saez_buffer(self): + return self._local_saez_buffer + + def set_global_saez_buffer(self, global_saez_buffer): + assert isinstance(global_saez_buffer, list) + assert len(global_saez_buffer) >= len(self._local_saez_buffer) + self._global_saez_buffer = global_saez_buffer + + def _update_saez_buffer(self, tax_info_t): + # Update the buffer. + for a_idx in range(self.n_agents): + z_t = tax_info_t[str(a_idx)]["income"] + tau_t = tax_info_t[str(a_idx)]["marginal_rate"] + self._local_saez_buffer.append([z_t, tau_t]) + self._additions_this_episode += 1 + + while len(self._local_saez_buffer) > self._buffer_size: + _ = self._local_saez_buffer.pop(0) + + def reset_saez_buffers(self): + self._local_saez_buffer = [] + self._global_saez_buffer = [] + self._additions_this_episode = 0 + self._reached_min_samples = False + + def estimate_uniform_income_elasticity( + self, + observed_incomes_and_marginal_rates, + elas_df=0.98, + elas_tm1=0.5, + log_z0_tm1=0.5, + verbose=False, + ): + """Estimate elasticity using Ordinary Least Squares regression. + OLS: https://en.wikipedia.org/wiki/Ordinary_least_squares + Estimating elasticity: https://www.nber.org/papers/w7512 + """ + zs = [] + taus = [] + + for z_t, tau_t in observed_incomes_and_marginal_rates: + # If z_t is <=0 or tau_t is >=1, the operations below will give us nans + if z_t > 0 and tau_t < 1: + zs.append(z_t) + taus.append(tau_t) + + if len(zs) < 10: + return float(elas_tm1), float(log_z0_tm1) + if np.std(taus) < 1e-6: + return float(elas_tm1), float(log_z0_tm1) + + # Regressing log income against log 1-marginal_rate. + x = np.log(np.maximum(1 - np.array(taus), 1e-9)) + # (bias term) + b = np.ones_like(x) + # Perform OLS. + X = np.stack([x, b]).T # Stack linear & bias terms + Y = np.log(np.maximum(np.array(zs), 1e-9)) # Regression targets + XXi = np.linalg.inv(X.T.dot(X)) + XY = X.T.dot(Y) + elas, log_z0 = XXi.T.dot(XY) + + warn_less_than_0 = elas < 0 + instant_elas_t = np.maximum(elas, 0.0) + + elas_t = ((1 - elas_df) * instant_elas_t) + (elas_df * elas_tm1) + + if verbose: + if warn_less_than_0: + print("\nWARNING: Recent elasticity estimate is < 0.") + print("Running elasticity estimate: {:.2f}\n".format(elas_t)) + else: + print("\nRunning elasticity estimate: {:.2f}\n".format(elas_t)) + + return elas_t, log_z0 + + def get_binned_saez_welfare_weight_and_pareto_params(self, population_incomes): + def clip(x, lo=None, hi=None): + if lo is not None: + x = max(lo, x) + if hi is not None: + x = min(x, hi) + return x + + def bin_z(left, right): + return 0.5 * (left + right) + + def get_cumul(counts, incomes_below, incomes_above): + n_below = len(incomes_below) + n_above = len(incomes_above) + n_total = np.sum(counts) + n_below + n_above + + def p(i, counts): + return counts[i] / n_total + + # Probability that an income is below the taxable threshold. + p_below = n_below / n_total + + # pz = p(z' = z): probability that [binned] income z' occurs in bin z. + pz = [p(i, counts) for i in range(len(counts))] + [n_above / n_total] + + # Pz = p(z' <= z): Probability z' is less-than or equal to z. + cum_pz = [pz[0] + p_below] + for p in pz[1:]: + cum_pz.append(clip(cum_pz[-1] + p, 0, 1.0)) + + return np.array(pz), np.array(cum_pz) + + def compute_binned_g_distribution(counts, lefts, incomes): + def pareto(z): + if self.pareto_weight_type == "uniform": + pareto_weights = np.ones_like(z) + elif self.pareto_weight_type == "inverse_income": + pareto_weights = 1.0 / np.maximum(1, z) + else: + raise NotImplementedError + return pareto_weights + + incomes_below = incomes[incomes < lefts[0]] + incomes_above = incomes[incomes > lefts[-1]] + + # The total (unnormalized) Pareto weight of untaxable incomes. + if len(incomes_below) > 0: + pareto_weight_below = np.sum(pareto(np.maximum(incomes_below, 0))) + else: + pareto_weight_below = 0 + + # The total (unnormalized) Pareto weight within each bin. + if len(incomes_above) > 0: + pareto_weight_above = np.sum(pareto(incomes_above)) + else: + pareto_weight_above = 0 + + # The total (unnormalized) Pareto weight within each bin. + pareto_weight_per_bin = counts * pareto(bin_z(lefts[:-1], lefts[1:])) + + # The aggregate (unnormalized) Pareto weight of all incomes. + cumulative_pareto_weights = pareto_weight_per_bin.sum() + cumulative_pareto_weights += pareto_weight_below + cumulative_pareto_weights += pareto_weight_above + + # Normalize so that the Pareto density sums to 1. + pareto_norm = cumulative_pareto_weights + 1e-9 + unnormalized_pareto_density = np.concatenate( + [pareto_weight_per_bin, [pareto_weight_above]] + ) + normalized_pareto_density = unnormalized_pareto_density / pareto_norm + + # Aggregate Pareto weight of earners with income greater-than or equal to z. + cumulative_pareto_density_geq_z = np.cumsum( + normalized_pareto_density[::-1] + )[::-1] + + # Probability that [binned] income z' is greather-than or equal to z. + pz, _ = get_cumul(counts, incomes_below, incomes_above) + cumulative_prob_geq_z = np.cumsum(pz[::-1])[::-1] + + # Average (normalized) Pareto weight of earners with income >= z. + geq_z_norm = cumulative_prob_geq_z + 1e-9 + avg_pareto_weight_geq_z = cumulative_pareto_density_geq_z / geq_z_norm + + def interpolate_gzs(gz): + # Assume incomes within a bin are evenly distributed within that bin + # and re-compute accordingly. + gz_at_left_edge = gz[:-1] + gz_at_right_edge = gz[1:] + + avg_bin_gz = 0.5 * (gz_at_left_edge + gz_at_right_edge) + # Re-attach the gz of the top tax rate (does not need to be + # interpolated). + gzs = np.concatenate([avg_bin_gz, [gz[-1]]]) + return gzs + + return interpolate_gzs(avg_pareto_weight_geq_z) + + def compute_binned_a_distribution(counts, lefts, incomes): + incomes_below = incomes[incomes < lefts[0]] + incomes_above = incomes[incomes > lefts[-1]] + + # z is defined as the MIDDLE point in a bin. + # So for a bin [left, right] -> z = (left + right) / 2. + Az = [] + + # cum_pz = p(z' <= z): Probability z' is less-than or equal to z + pz, cum_pz = get_cumul(counts, incomes_below, incomes_above) + + # Probability z' is greater-than or equal to z + # Note: The "0.5" coefficient gives results more consistent with theory; it + # accounts for the assumption that incomes within a particular bin are + # uniformly spread between the left & right edges of that bin. + p_geq_z = 1 - cum_pz + (0.5 * pz) + + T = len(lefts[:-1]) + + for i in range(T): + if pz[i] == 0: + Az.append(np.nan) + else: + z = bin_z(lefts[i], lefts[i + 1]) + # paz = z * pz[i] / (clip(1 - Pz[i], 0, 1) + 1e-9) + paz = z * pz[i] / (clip(p_geq_z[i], 0, 1) + 1e-9) # defn of A(z) + paz = paz / (lefts[i + 1] - lefts[i]) # norm by bin width + Az.append(paz) + + # Az for the incomes past the top cutoff, + # the bin is [left, infinity]: there is no "middle". + # Hence, use the mean value in the last bin. + if len(incomes_above) > 0: + cutoff = lefts[-1] + avg_income_above_cutoff = np.mean(incomes_above) + # use a special formula to compute A(z) + Az_above = avg_income_above_cutoff / ( + avg_income_above_cutoff - cutoff + 1e-9 + ) + else: + Az_above = 0.0 + + return np.concatenate([Az, [Az_above]]) + + counts, lefts = np.histogram( + population_incomes, bins=self._saez_income_bin_edges + ) + population_gz = compute_binned_g_distribution(counts, lefts, population_incomes) + population_az = compute_binned_a_distribution(counts, lefts, population_incomes) + + # Return the binned stats used to create a schedule of marginal rates. + return population_gz, population_az + + @staticmethod + def get_saez_marginal_rates(binned_gz, binned_az, elas, interpolate=True): + # Marginal rates within each income bin (last tau is the top tax rate). + taus = (1.0 - binned_gz) / (1.0 - binned_gz + binned_az * elas + 1e-9) + + if interpolate: + # In bins where there were no incomes found, tau is nan. + # Interpolate to fill the gaps. + last_real_rate = 0.0 + last_real_tidx = -1 + for i, tau in enumerate(taus): + # The current tax rate is a real number. + if not np.isnan(tau): + # This is the end of a gap. Interpolate. + if (i - last_real_tidx) > 1: + assert ( + i != 0 + ) # This should never trigger for the first tax bin. + gap_indices = list(range(last_real_tidx + 1, i)) + intermediate_rates = np.linspace( + last_real_rate, tau, len(gap_indices) + 2 + )[1:-1] + assert len(gap_indices) == len(intermediate_rates) + for gap_index, intermediate_rate in zip( + gap_indices, intermediate_rates + ): + taus[gap_index] = intermediate_rate + # Update the tracker. + last_real_rate = float(tau) + last_real_tidx = int(i) + + # The current tax rate is a nan. Continue without updating + # the tracker (indicating the presence of a gap). + else: + pass + + return taus + + def bracketize_schedule(self, bin_marginal_rates, bin_edges, bin_sizes): + # Compute the amount of tax each bracket would collect + # if income was >= the right edge. + # Divide by the bracket size to get + # the average marginal rate within that bracket. + last_bracket_total = 0 + bracket_avg_marginal_rates = [] + for b_idx, income in enumerate(self.bracket_cutoffs[1:]): + # How much income occurs within each bin + # (including the open-ended, top "bin"). + past_cutoff = np.maximum(0, income - bin_edges) + bin_income = np.minimum(bin_sizes, past_cutoff) + + # To get the total taxes due, + # multiply the income within each bin by that bin's marginal rate. + bin_taxes = bin_marginal_rates * bin_income + taxes_due = np.maximum(0, np.sum(bin_taxes)) + + bracket_tax_burden = taxes_due - last_bracket_total + bracket_size = self.bracket_sizes[b_idx] + + bracket_avg_marginal_rates.append(bracket_tax_burden / bracket_size) + last_bracket_total = taxes_due + + # The top bracket tax rate is computed directly already. + bracket_avg_marginal_rates.append(bin_marginal_rates[-1]) + + bracket_rates = np.array(bracket_avg_marginal_rates) + assert len(bracket_rates) == self.n_brackets + + return bracket_rates + + # Methods for collecting and redistributing taxes + # ----------------------------------------------- + + def income_bin(self, income): + """Return index of tax bin in which income falls.""" + if income < 0: + return 0.0 + meets_min = income >= self.bracket_edges[:-1] + under_max = income < self.bracket_edges[1:] + bracket_bool = meets_min * under_max + return self.bracket_cutoffs[np.argmax(bracket_bool)] + + def marginal_rate(self, income): + """Return the marginal tax rate applied at this income level.""" + if income < 0: + return 0.0 + meets_min = income >= self.bracket_edges[:-1] + under_max = income < self.bracket_edges[1:] + bracket_bool = meets_min * under_max + return self.curr_marginal_rates[np.argmax(bracket_bool)] + + def taxes_due(self, income): + """Return the total amount of taxes due at this income level.""" + past_cutoff = np.maximum(0, income - self.bracket_cutoffs) + bin_income = np.minimum(self.bracket_sizes, past_cutoff) + bin_taxes = self.curr_marginal_rates * bin_income + return np.sum(bin_taxes) + + def enact_taxes(self): + """Calculate period income & tax burden. Collect taxes and redistribute.""" + net_tax_revenue = 0 + tax_dict = dict( + schedule=np.array(self.curr_marginal_rates), + cutoffs=np.array(self.bracket_cutoffs), + ) + + for curr_rate, bracket_cutoff in zip( + self.curr_marginal_rates, self.bracket_cutoffs + ): + self._schedules["{:03d}".format(int(bracket_cutoff))].append( + float(curr_rate) + ) + + self.last_income = [] + self.last_effective_tax_rate = [] + self.last_marginal_rate = [] + for agent, last_coin in zip(self.world.agents, self.last_coin): + income = agent.total_endowment("Coin") - last_coin + tax_due = self.taxes_due(income) + effective_taxes = np.minimum( + agent.state["inventory"]["Coin"], tax_due + ) # Don't take from escrow. + marginal_rate = self.marginal_rate(income) + effective_tax_rate = float(effective_taxes / np.maximum(0.000001, income)) + tax_dict[str(agent.idx)] = dict( + income=float(income), + tax_paid=float(effective_taxes), + marginal_rate=marginal_rate, + effective_rate=effective_tax_rate, + ) + + # Actually collect the taxes. + agent.state["inventory"]["Coin"] -= effective_taxes + net_tax_revenue += effective_taxes + + self.last_income.append(float(income)) + self.last_marginal_rate.append(float(marginal_rate)) + self.last_effective_tax_rate.append(effective_tax_rate) + + self.all_effective_tax_rates.append(effective_tax_rate) + self._occupancy["{:03d}".format(int(self.income_bin(income)))] += 1 + + self.total_collected_taxes += float(net_tax_revenue) + + lump_sum = net_tax_revenue / self.n_agents + for agent in self.world.agents: + agent.state["inventory"]["Coin"] += lump_sum + tax_dict[str(agent.idx)]["lump_sum"] = float(lump_sum) + self.last_coin[agent.idx] = float(agent.total_endowment("Coin")) + + self.taxes.append(tax_dict) + + # Pre-compute some things that will be useful for generating observations. + self._last_income_obs = np.array(self.last_income) / self.period + self._last_income_obs_sorted = self._last_income_obs[ + np.argsort(self._last_income_obs) + ] + + # Fold this period's tax data into the saez buffer. + if self.tax_model == "saez": + self._update_saez_buffer(tax_dict) + + # Required methods for implementing components + # -------------------------------------------- + + def get_n_actions(self, agent_cls_name): + """ + See base_component.py for detailed description. + + If using the "model_wrapper" tax model and taxes are enabled, the planner's + action space includes an action subspace for each of the tax brackets. Each + such action space has as many actions as there are discretized tax rates. + """ + # Only the planner takes actions through this component. + if agent_cls_name == "BasicPlanner": + if self.tax_model == "model_wrapper" and not self.disable_taxes: + # For every bracket, the planner can select one of the discretized + # tax rates. + return [ + ("TaxIndexBracket_{:03d}".format(int(r)), self.n_disc_rates) + for r in self.bracket_cutoffs + ] + + # Return 0 (no added actions) if the other conditions aren't met. + return 0 + + def get_additional_state_fields(self, agent_cls_name): + """This component does not add any agent state fields.""" + return {} + + def component_step(self): + """ + See base_component.py for detailed description. + + On the first day of each tax period, update taxes. On the last day, enact them. + """ + + # 1. On the first day of a new tax period: Set up the taxes for this period. + if self.tax_cycle_pos == 1: + if self.tax_model == "model_wrapper": + self.set_new_period_rates_model() + + if self.tax_model == "saez": + self.compute_and_set_new_period_rates_from_saez_formula() + + # (cache this for faster obs generation) + self._curr_rates_obs = np.array(self.curr_marginal_rates) + + # 2. On the last day of the tax period: Get $-taxes AND update agent endowments. + if self.tax_cycle_pos >= self.period: + self.enact_taxes() + self.tax_cycle_pos = 0 + + else: + self.taxes.append([]) + + # increment timestep. + self.tax_cycle_pos += 1 + + def generate_observations(self): + """ + See base_component.py for detailed description. + + Agents observe where in the tax period cycle they are, information about the + last period's incomes, and the current marginal tax rates, including the + marginal rate that will apply to their next unit of income. + + The planner observes the same type of information, but for all the agents. It + also sees, for each agent, their marginal tax rate and reported income from + the previous tax period. + """ + is_tax_day = float(self.tax_cycle_pos >= self.period) + is_first_day = float(self.tax_cycle_pos == 1) + tax_phase = self.tax_cycle_pos / self.period + + obs = dict() + + obs[self.world.planner.idx] = dict( + is_tax_day=is_tax_day, + is_first_day=is_first_day, + tax_phase=tax_phase, + last_incomes=self._last_income_obs_sorted, + curr_rates=self._curr_rates_obs, + ) + + for agent in self.world.agents: + i = agent.idx + k = str(i) + + curr_marginal_rate = self.marginal_rate( + agent.total_endowment("Coin") - self.last_coin[i] + ) + + obs[k] = dict( + is_tax_day=is_tax_day, + is_first_day=is_first_day, + tax_phase=tax_phase, + last_incomes=self._last_income_obs_sorted, + curr_rates=self._curr_rates_obs, + marginal_rate=curr_marginal_rate, + ) + + obs["p" + k] = dict( + last_income=self._last_income_obs[i], + last_marginal_rate=self.last_marginal_rate[i], + curr_marginal_rate=curr_marginal_rate, + ) + + return obs + + def generate_masks(self, completions=0): + """ + See base_component.py for detailed description. + + Masks only apply to the planner and if tax_model == "model_wrapper" and taxes + are enabled. + All tax actions are masked (so, only NO-OPs can be sampled) on all timesteps + except when self.tax_cycle_pos==1 (meaning a new tax period is starting). + When self.tax_cycle_pos==1, tax actions are masked in order to enforce any + tax annealing. + """ + if ( + completions != self._last_completions + and self.tax_annealing_schedule is not None + ): + self._last_completions = int(completions) + self._annealed_rate_max = annealed_tax_limit( + completions, + self._annealing_warmup, + self._annealing_slope, + self.rate_max, + ) + + if self.disable_taxes: + return {} + + if self.tax_model == "model_wrapper": + # No annealing. Generate masks using default method. + if self.tax_annealing_schedule is None: + if self._planner_masks is None: + masks = super().generate_masks(completions=completions) + self._planner_masks = dict( + new_taxes=deepcopy(masks[self.world.planner.idx]), + zeros={ + k: np.zeros_like(v) + for k, v in masks[self.world.planner.idx].items() + }, + ) + + # No need to recompute. Use the cached masks. + masks = dict() + if self.tax_cycle_pos != 1 or self.disable_taxes: + # Apply zero masks for any timestep where taxes + # are not going to be updated. + masks[self.world.planner.idx] = self._planner_masks["zeros"] + else: + masks[self.world.planner.idx] = self._planner_masks["new_taxes"] + + # Doing annealing. + else: + # Figure out what the masks should be this episode. + if self._planner_masks is None: + planner_masks = { + k: annealed_tax_mask( + completions, + self._annealing_warmup, + self._annealing_slope, + tax_values, + ) + for k, tax_values in self._planner_tax_val_dict.items() + } + self._planner_masks = dict( + new_taxes=deepcopy(planner_masks), + zeros={k: np.zeros_like(v) for k, v in planner_masks.items()}, + ) + + # No need to recompute. Use the cached masks. + masks = dict() + if self.tax_cycle_pos != 1 or self.disable_taxes: + # Apply zero masks for any timestep where taxes + # are not going to be updated. + masks[self.world.planner.idx] = self._planner_masks["zeros"] + else: + masks[self.world.planner.idx] = self._planner_masks["new_taxes"] + + # We are not using a learned planner. Generate masks by the default method. + else: + masks = super().generate_masks(completions=completions) + + return masks + + # For non-required customization + # ------------------------------ + + def additional_reset_steps(self): + """ + See base_component.py for detailed description. + + Reset trackers. + """ + self.curr_rate_indices = [0 for _ in range(self.n_brackets)] + + self.tax_cycle_pos = 1 + self.last_coin = [ + float(agent.total_endowment("Coin")) for agent in self.world.agents + ] + self.last_income = [0 for _ in range(self.n_agents)] + self.last_marginal_rate = [0 for _ in range(self.n_agents)] + self.last_effective_tax_rate = [0 for _ in range(self.n_agents)] + + self._curr_rates_obs = np.array(self.curr_marginal_rates) + self._last_income_obs = np.array(self.last_income) / self.period + self._last_income_obs_sorted = self._last_income_obs[ + np.argsort(self._last_income_obs) + ] + + self.taxes = [] + self.total_collected_taxes = 0 + self.all_effective_tax_rates = [] + self._schedules = {"{:03d}".format(int(r)): [] for r in self.bracket_cutoffs} + self._occupancy = {"{:03d}".format(int(r)): 0 for r in self.bracket_cutoffs} + self._planner_masks = None + + if self.tax_model == "saez": + self.curr_bracket_tax_rates = np.array(self.running_avg_tax_rates) + + def get_metrics(self): + """ + See base_component.py for detailed description. + + Return metrics related to bracket rates, bracket occupancy, and tax collection. + """ + out = dict() + + n_observed_incomes = np.maximum(1, np.sum(list(self._occupancy.values()))) + for c in self.bracket_cutoffs: + k = "{:03d}".format(int(c)) + out["avg_bracket_rate/{}".format(k)] = np.mean(self._schedules[k]) + out["bracket_occupancy/{}".format(k)] = ( + self._occupancy[k] / n_observed_incomes + ) + + if not self.disable_taxes: + out["avg_effective_tax_rate"] = np.mean(self.all_effective_tax_rates) + out["total_collected_taxes"] = float(self.total_collected_taxes) + + # Indices of richest and poorest agents. + agent_coin_endows = np.array( + [agent.total_endowment("Coin") for agent in self.world.agents] + ) + idx_poor = np.argmin(agent_coin_endows) + idx_rich = np.argmax(agent_coin_endows) + + tax_days = self.taxes[(self.period - 1) :: self.period] + for i, tag in zip([idx_poor, idx_rich], ["poorest", "richest"]): + total_income = np.maximum( + 0, [tax_day[str(i)]["income"] for tax_day in tax_days] + ).sum() + total_tax_paid = np.sum( + [tax_day[str(i)]["tax_paid"] for tax_day in tax_days] + ) + # Report the overall tax rate over the episode + # for the richest and poorest agents. + out["avg_tax_rate/{}".format(tag)] = total_tax_paid / np.maximum( + 0.001, total_income + ) + + if self.tax_model == "saez": + # Include the running estimate of elasticity. + out["saez/estimated_elasticity"] = self.elas_tm1 + + return out + + def get_dense_log(self): + """ + Log taxes. + + Returns: + taxes (list): A list of tax collections. Each entry corresponds to a single + timestep. Entries are empty except for timesteps where a tax period + ended and taxes were collected. For those timesteps, each entry + contains the tax schedule, each agent's reported income, tax paid, + and redistribution received. + Returns None if taxes are disabled. + """ + if self.disable_taxes: + return None + return self.taxes diff --git a/ai_economist/foundation/components/simple_labor.py b/ai_economist/foundation/components/simple_labor.py new file mode 100644 index 0000000..3b5adc1 --- /dev/null +++ b/ai_economist/foundation/components/simple_labor.py @@ -0,0 +1,134 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import numpy as np + +from ai_economist.foundation.base.base_component import ( + BaseComponent, + component_registry, +) + + +@component_registry.add +class SimpleLabor(BaseComponent): + """ + Allows Agents to select a level of labor, which earns income based on skill. + + Labor is "simple" because this simplifies labor to a choice along a 1D axis. More + concretely, this component adds 100 labor actions, each representing a choice of + how many hours to work, e.g. action 50 represents doing 50 hours of work; each + Agent earns income proportional to the product of its labor amount (representing + hours worked) and its skill (representing wage), with higher skill and higher labor + yielding higher income. + + This component is intended to be used with the 'PeriodicBracketTax' component and + the 'one-step-economy' scenario. + + Args: + mask_first_step (bool): Defaults to True. If True, masks all non-0 labor + actions on the first step of the environment. When combined with the + intended component/scenario, the first env step is used to set taxes + (via the 'redistribution' component) and the second step is used to + select labor (via this component). + payment_max_skill_multiplier (float): When determining the skill level of + each Agent, sampled skills are clipped to this maximum value. + """ + + name = "SimpleLabor" + required_entities = ["Coin"] + agent_subclasses = ["BasicMobileAgent"] + + def __init__( + self, + *base_component_args, + mask_first_step=True, + payment_max_skill_multiplier=3, + pareto_param=4.0, + **base_component_kwargs + ): + super().__init__(*base_component_args, **base_component_kwargs) + + # This defines the size of the action space (the max # hours an agent can work). + self.num_labor_hours = 100 # max 100 hours + + assert isinstance(mask_first_step, bool) + self.mask_first_step = mask_first_step + + self.is_first_step = True + self.common_mask_on = { + agent.idx: np.ones((self.num_labor_hours,)) for agent in self.world.agents + } + self.common_mask_off = { + agent.idx: np.zeros((self.num_labor_hours,)) for agent in self.world.agents + } + + # Skill distribution + self.pareto_param = float(pareto_param) + assert self.pareto_param > 0 + self.payment_max_skill_multiplier = float(payment_max_skill_multiplier) + pmsm = self.payment_max_skill_multiplier + num_agents = len(self.world.agents) + # Generate a batch (1000) of num_agents (sorted/clipped) Pareto samples. + pareto_samples = np.random.pareto(4, size=(1000, num_agents)) + clipped_skills = np.minimum(pmsm, (pmsm - 1) * pareto_samples + 1) + sorted_clipped_skills = np.sort(clipped_skills, axis=1) + # The skill level of the i-th skill-ranked agent is the average of the + # i-th ranked samples throughout the batch. + self.skills = sorted_clipped_skills.mean(axis=0) + + def get_additional_state_fields(self, agent_cls_name): + if agent_cls_name == "BasicMobileAgent": + return {"skill": 0, "production": 0} + return {} + + def additional_reset_steps(self): + self.is_first_step = True + for agent in self.world.agents: + agent.state["skill"] = self.skills[agent.idx] + + def get_n_actions(self, agent_cls_name): + if agent_cls_name == "BasicMobileAgent": + return self.num_labor_hours + return None + + def generate_masks(self, completions=0): + if self.is_first_step: + self.is_first_step = False + if self.mask_first_step: + return self.common_mask_off + + return self.common_mask_on + + def component_step(self): + + for agent in self.world.get_random_order_agents(): + + action = agent.get_component_action(self.name) + + if action == 0: # NO-OP. + # Agent is not interacting with this component. + continue + + if 1 <= action <= self.num_labor_hours: # set reopening phase + + hours_worked = action # NO-OP is 0 hours. + agent.state["endogenous"]["Labor"] = hours_worked + + payoff = hours_worked * agent.state["skill"] + agent.state["production"] += payoff + agent.inventory["Coin"] += payoff + + else: + # If action > num_labor_hours, this is an error. + raise ValueError + + def generate_observations(self): + obs_dict = dict() + for agent in self.world.agents: + obs_dict[str(agent.idx)] = { + "skill": agent.state["skill"] / self.payment_max_skill_multiplier + } + return obs_dict diff --git a/ai_economist/foundation/components/utils.py b/ai_economist/foundation/components/utils.py new file mode 100644 index 0000000..d9418fb --- /dev/null +++ b/ai_economist/foundation/components/utils.py @@ -0,0 +1,115 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import numpy as np + + +def annealed_tax_limit(completions, warmup_period, slope, final_max_tax_value=1.0): + """ + Compute the maximum tax rate available at this stage of tax annealing. + + This function uses the number of episode completions and the annealing schedule + (warmup_period, slope, & final_max_tax_value) to determine what the maximum tax + rate can be. + This type of annealing allows for a tax curriculum where earlier episodes are + restricted to lower tax rates. As more episodes are played, higher tax values are + allowed. + + Args: + completions (int): Number of times the environment has completed an episode. + Expected to be >= 0. + warmup_period (int): Until warmup_period completions, only allow 0 tax. Using + a negative value will enable non-0 taxes at 0 environment completions. + slope (float): After warmup_period completions, percentage of full tax value + unmasked with each new completion. + final_max_tax_value (float): The maximum tax value at the end of annealing. + + Returns: + A scalar value indicating the maximum tax at this stage of annealing. + + Example: + >> WARMUP = 100 + >> SLOPE = 0.01 + >> annealed_tax_limit(0, WARMUP, SLOPE) + 0.0 + >> annealed_tax_limit(100, WARMUP, SLOPE) + 0.0 + >> annealed_tax_limit(150, WARMUP, SLOPE) + 0.5 + >> annealed_tax_limit(200, WARMUP, SLOPE) + 1.0 + >> annealed_tax_limit(1000, WARMUP, SLOPE) + 1.0 + """ + # What percentage of the full range is currently visible + # (between 0 [only 0 tax] and 1 [all taxes visible]) + percentage_visible = np.maximum( + 0.0, np.minimum(1.0, slope * (completions - warmup_period)) + ) + + # Determine the highest allowable tax, + # given the current position in the annealing schedule + current_max_tax = percentage_visible * final_max_tax_value + + return current_max_tax + + +def annealed_tax_mask(completions, warmup_period, slope, tax_values): + """ + Generate a mask applied to a set of tax values for the purpose of tax annealing. + + This function uses the number of episode completions and the annealing schedule + to determine which of the tax values are considered valid. The most extreme + tax/subsidy values are unmasked last. Zero tax is always unmasked (i.e. always + valid). + This type of annealing allows for a tax curriculum where earlier episodes are + restricted to lower tax rates. As more episodes are played, higher tax values are + allowed. + + Args: + completions (int): Number of times the environment has completed an episode. + Expected to be >= 0. + warmup_period (int): Until warmup_period completions, only allow 0 tax. Using + a negative value will enable non-0 taxes at 0 environment completions. + slope (float): After warmup_period completions, percentage of full tax value + unmasked with each new completion. + tax_values (list): The list of tax values associated with each action to + which this mask will apply. + + Returns: + A binary mask with same shape as tax_values, indicating which tax values are + currently valid. + + Example: + >> WARMUP = 100 + >> SLOPE = 0.01 + >> TAX_VALUES = [0.0, 0.25, 0.50, 0.75, 1.0] + >> annealed_tax_limit(0, WARMUP, SLOPE, TAX_VALUES) + [0, 0, 0, 0, 0] + >> annealed_tax_limit(100, WARMUP, SLOPE, TAX_VALUES) + [0, 0, 0, 0, 0] + >> annealed_tax_limit(150, WARMUP, SLOPE, TAX_VALUES) + [1, 1, 1, 0, 0] + >> annealed_tax_limit(200, WARMUP, SLOPE, TAX_VALUES) + [1, 1, 1, 1, 1] + >> annealed_tax_limit(1000, WARMUP, SLOPE, TAX_VALUES) + [1, 1, 1, 1, 1] + """ + # Infer the most extreme tax level from the supplied tax values. + abs_tax = np.abs(tax_values) + full_tax_amount = np.max(abs_tax) + + # Determine the highest allowable tax, given the current position + # in the annealing schedule + max_absolute_visible_tax = annealed_tax_limit( + completions, warmup_period, slope, full_tax_amount + ) + + # Return a binary mask to allow for taxes + # at or below the highest absolute visible tax + return np.less_equal(np.abs(tax_values), max_absolute_visible_tax).astype( + np.float32 + ) diff --git a/ai_economist/foundation/entities/__init__.py b/ai_economist/foundation/entities/__init__.py new file mode 100644 index 0000000..58a385e --- /dev/null +++ b/ai_economist/foundation/entities/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +from .endogenous import endogenous_registry +from .landmarks import landmark_registry +from .resources import resource_registry diff --git a/ai_economist/foundation/entities/endogenous.py b/ai_economist/foundation/entities/endogenous.py new file mode 100644 index 0000000..b6eb42a --- /dev/null +++ b/ai_economist/foundation/entities/endogenous.py @@ -0,0 +1,36 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +from ai_economist.foundation.base.registrar import Registry + + +class Endogenous: + """Base class for endogenous entity classes. + + Endogenous entities are those that, conceptually, describe the internal state + of an agent. This provides a convenient way to separate physical entities (which + may exist in the world, be exchanged among agents, or are otherwise in principal + observable by others) from endogenous entities (such as the amount of labor + effort an agent has experienced). + + Endogenous entities are registered in the "endogenous" portion of an agent's + state and should only be observable by the agent itself. + """ + + name = None + + def __init__(self): + assert self.name is not None + + +endogenous_registry = Registry(Endogenous) + + +@endogenous_registry.add +class Labor(Endogenous): + """Labor accumulated through working. Included in all environments by default.""" + + name = "Labor" diff --git a/ai_economist/foundation/entities/landmarks.py b/ai_economist/foundation/entities/landmarks.py new file mode 100644 index 0000000..7f01457 --- /dev/null +++ b/ai_economist/foundation/entities/landmarks.py @@ -0,0 +1,88 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import numpy as np + +from ai_economist.foundation.base.registrar import Registry +from ai_economist.foundation.entities.resources import resource_registry + + +class Landmark: + """Base class for Landmark entity classes. + + Landmark classes describe the entities that exist exclusively in the environment + world. In other words, they represent entities that should not be included in an + agent's inventory and are only observable through observations from the + spatial world. + + Landmark classes describe the following properties: + ownable: If each instance of the landmark belongs to an agent. For example, a + "House" is ownable and belongs to the agent that constructs it whereas + "Water" is not ownable. + solid: If the landmark creates a physical barrier to movement (that is, + if agents are prevented from occupying cells with the landmark). + Importantly, if the landmark is ownable, the agent that owns a given + landmark can occupy its cell even if the landmark is solid. + """ + + name = None + color = None # array of RGB values [0 - 1] + ownable = None + solid = True # Solid = Cannot be passed through + # (unless it is owned by the agent trying to pass through) + + def __init__(self): + assert self.name is not None + assert self.color is not None + assert self.ownable is not None + + # No agent can pass through this landmark + self.blocking = self.solid and not self.ownable + + # Only the agent that owns this landmark can pass through it + self.private = self.solid and self.ownable + + # This landmark does not belong to any agent and it does not inhibit movement + self.public = not self.solid and not self.ownable + + +landmark_registry = Registry(Landmark) + +# Registering each collectible resource's source block +# allows treating source blocks in a specific way +for resource_name in resource_registry.entries: + resource = resource_registry.get(resource_name) + if not resource.collectible: + continue + + @landmark_registry.add + class SourceBlock(Landmark): + """Special Landmark for generating resources. Not ownable. Not solid.""" + + name = "{}SourceBlock".format(resource.name) + color = np.array(resource.color) + ownable = False + solid = False + + +@landmark_registry.add +class House(Landmark): + """House landmark. Ownable. Solid.""" + + name = "House" + color = np.array([220, 20, 220]) / 255.0 + ownable = True + solid = True + + +@landmark_registry.add +class Water(Landmark): + """Water Landmark. Not ownable. Solid.""" + + name = "Water" + color = np.array([50, 50, 250]) / 255.0 + ownable = False + solid = True diff --git a/ai_economist/foundation/entities/resources.py b/ai_economist/foundation/entities/resources.py new file mode 100644 index 0000000..fe6693c --- /dev/null +++ b/ai_economist/foundation/entities/resources.py @@ -0,0 +1,84 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import numpy as np + +from ai_economist.foundation.base.registrar import Registry + + +class Resource: + """Base class for Resource entity classes. + + Resource classes describe entities that can be a part of an agent's inventory. + + Resources can also be a part of the world as collectible entities: for each + Resource class with Resource.collectible=True, a complementary + ResourceSourceBlock Landmark class will be created in landmarks.py. For each + collectible resource in the environment, the world map will include a resource + source block channel (representing landmarks where collectible resources are + generated) and a resource channel (representing locations where collectible + resources have generated). + """ + + name = None + color = None # array of RGB values [0 - 1] + collectible = None # Is this something that exists in the world? + # (versus something that can only be owned) + craft_recp = None # dict of recource name and amount + craft_labour_base= 0.0 + + def __init__(self): + assert self.name is not None + assert self.color is not None + assert self.collectible is not None + + +resource_registry = Registry(Resource) + + +@resource_registry.add +class Wood(Resource): + """Wood resource. collectible.""" + + name = "Wood" + color = np.array([107, 143, 113]) / 255.0 + collectible = True + + +@resource_registry.add +class Stone(Resource): + """Stone resource. collectible.""" + + name = "Stone" + color = np.array([241, 233, 219]) / 255.0 + collectible = True + + +@resource_registry.add +class Coin(Resource): + """Coin resource. Included in all environments by default. Not collectible.""" + + name = "Coin" + color = np.array([229, 211, 82]) / 255.0 + collectible = False + +@resource_registry.add +class RawGem(Resource): + """Raw Gem that can be processed further""" + + name = "Raw_Gem" + color = np.array([241, 233, 219]) / 255.0 + collectible = True + +@resource_registry.add +class Gem(Resource): + """Proccesed Gem. Craftable.""" + + name = "Gem" + color = np.array([241, 233, 219]) / 255.0 + collectible = False + craft_recp= {"Raw_Gem": 1} + craft_labour_base= 1 \ No newline at end of file diff --git a/ai_economist/foundation/env_wrapper.py b/ai_economist/foundation/env_wrapper.py new file mode 100644 index 0000000..29db95e --- /dev/null +++ b/ai_economist/foundation/env_wrapper.py @@ -0,0 +1,418 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +""" +The env wrapper class +""" + +import logging + +import GPUtil + +try: + num_gpus_available = len(GPUtil.getAvailable()) + print(f"Inside env_wrapper.py: {num_gpus_available} GPUs are available.") + if num_gpus_available == 0: + print("No GPUs found! Running the simulation on a CPU.") + else: + from warp_drive.managers.data_manager import CUDADataManager + from warp_drive.managers.function_manager import ( + CUDAEnvironmentReset, + CUDAFunctionManager, + ) +except ModuleNotFoundError: + print( + "Warning: The 'WarpDrive' package is not found and cannot be used! " + "If you wish to use WarpDrive, please run " + "'pip install rl-warp-drive' first." + ) +except ValueError: + print("No GPUs found! Running the simulation on a CPU.") + +import numpy as np +from gym.spaces import Box, Dict, Discrete, MultiDiscrete + +BIG_NUMBER = 1e20 + + +def recursive_obs_dict_to_spaces_dict(obs): + """Recursively return the observation space dictionary + for a dictionary of observations + + Args: + obs (dict): A dictionary of observations keyed by agent index + for a multi-agent environment + + Returns: + Dict: A dictionary (space.Dict) of observation spaces + """ + assert isinstance(obs, dict) + dict_of_spaces = {} + for k, v in obs.items(): + + # list of lists are listified np arrays + _v = v + if isinstance(v, list): + _v = np.array(v) + elif isinstance(v, (int, np.integer, float, np.floating)): + _v = np.array([v]) + + # assign Space + if isinstance(_v, np.ndarray): + x = float(BIG_NUMBER) + box = Box(low=-x, high=x, shape=_v.shape, dtype=_v.dtype) + low_high_valid = (box.low < 0).all() and (box.high > 0).all() + + # This loop avoids issues with overflow to make sure low/high are good. + while not low_high_valid: + x = x // 2 + box = Box(low=-x, high=x, shape=_v.shape, dtype=_v.dtype) + low_high_valid = (box.low < 0).all() and (box.high > 0).all() + + dict_of_spaces[k] = box + + elif isinstance(_v, dict): + dict_of_spaces[k] = recursive_obs_dict_to_spaces_dict(_v) + else: + raise TypeError + return Dict(dict_of_spaces) + + +class FoundationEnvWrapper: + """ + The environment wrapper class for Foundation. + This wrapper determines whether the environment reset and steps happen on the + CPU or the GPU, and proceeds accordingly. + If the environment runs on the CPU, the reset() and step() calls also occur on + the CPU. + If the environment runs on the GPU, only the first reset() happens on the CPU, + all the relevant data is copied over the GPU after, and the subsequent steps + all happen on the GPU. + """ + + def __init__( + self, + env_obj=None, + env_name=None, + env_config=None, + num_envs=1, + use_cuda=False, + env_registrar=None, + event_messenger=None, + process_id=0, + ): + """ + 'env_obj': an environment object + 'env_name': an environment name that is registered on the + WarpDrive environment registrar + 'env_config': environment configuration to instantiate + an environment from the registrar + 'use_cuda': if True, step through the environment on the GPU, else on the CPU + 'num_envs': the number of parallel environments to instantiate. Note: this is + only relevant when use_cuda is True + 'env_registrar': EnvironmentRegistrar object + it provides the customized env info (like src path) for the build + 'event_messenger': multiprocessing Event to sync up the build + when using multiple processes + 'process_id': id of the process running WarpDrive + """ + # Need to pass in an environment instance + if env_obj is not None: + self.env = env_obj + else: + assert ( + env_name is not None + and env_config is not None + and env_registrar is not None + ) + self.env = env_registrar.get(env_name, use_cuda)(**env_config) + + self.n_agents = self.env.num_agents + self.episode_length = self.env.episode_length + + assert self.env.name + self.name = self.env.name + + # Add observation space to the env + # -------------------------------- + # Note: when the collated agent "a" is present, add obs keys + # for each individual agent to the env + # and remove the collated agent "a" from the observation + obs = self.obs_at_reset() + self.env.observation_space = recursive_obs_dict_to_spaces_dict(obs) + + # Add action space to the env + # --------------------------- + self.env.action_space = {} + for agent_id in range(len(self.env.world.agents)): + if self.env.world.agents[agent_id].multi_action_mode: + self.env.action_space[str([agent_id])] = MultiDiscrete( + self.env.get_agent(str(agent_id)).action_spaces + ) + else: + self.env.action_space[str(agent_id)] = Discrete( + self.env.get_agent(str(agent_id)).action_spaces + ) + self.env.action_space[str(agent_id)].dtype = np.int32 + + if self.env.world.planner.multi_action_mode: + self.env.action_space["p"] = MultiDiscrete( + self.env.get_agent("p").action_spaces + ) + else: + self.env.action_space["p"] = Discrete(self.env.get_agent("p").action_spaces) + self.env.action_space["p"].dtype = np.int32 + + # Ensure the observation and action spaces share the same keys + assert set(self.env.observation_space.keys()) == set( + self.env.action_space.keys() + ) + + # CUDA-specific initializations + # ----------------------------- + # Flag to determine whether to use CUDA or not + self.use_cuda = use_cuda + if self.use_cuda: + assert len(GPUtil.getAvailable()) > 0, ( + "The env wrapper needs a GPU to run" " when use_cuda is True!" + ) + assert hasattr(self.env, "use_cuda") + assert hasattr(self.env, "cuda_data_manager") + assert hasattr(self.env, "cuda_function_manager") + + assert hasattr(self.env.world, "use_cuda") + assert hasattr(self.env.world, "cuda_data_manager") + assert hasattr(self.env.world, "cuda_function_manager") + self.env.use_cuda = use_cuda + self.env.world.use_cuda = self.use_cuda + + # Flag to determine where the reset happens (host or device) + # First reset is always on the host (CPU), and subsequent resets are on + # the device (GPU) + self.reset_on_host = True + + # Steps specific to GPU runs + # -------------------------- + if self.use_cuda: + logging.info("USING CUDA...") + + # Number of environments to run in parallel + assert num_envs >= 1 + self.n_envs = num_envs + + logging.info("Initializing the CUDA data manager...") + self.cuda_data_manager = CUDADataManager( + num_agents=self.n_agents, + episode_length=self.episode_length, + num_envs=self.n_envs, + ) + + logging.info("Initializing the CUDA function manager...") + self.cuda_function_manager = CUDAFunctionManager( + num_agents=int(self.cuda_data_manager.meta_info("n_agents")), + num_envs=int(self.cuda_data_manager.meta_info("n_envs")), + process_id=process_id, + ) + self.cuda_function_manager.compile_and_load_cuda( + env_name=self.name, + template_header_file="template_env_config.h", + template_runner_file="template_env_runner.cu", + customized_env_registrar=env_registrar, + event_messenger=event_messenger, + ) + + # Register the CUDA step() function for the env + # Note: generate_observation() and compute_reward() + # should be part of the step function itself + step_function = f"Cuda{self.name}Step" + self.cuda_function_manager.initialize_functions([step_function]) + self.env.cuda_step = self.cuda_function_manager.get_function(step_function) + + # Register additional cuda functions (other than the scenario step) + # Component step + # Create a cuda_component_step dictionary + self.env.world.cuda_component_step = {} + for component in self.env.components: + self.cuda_function_manager.initialize_functions( + ["Cuda" + component.name + "Step"] + ) + self.env.world.cuda_component_step[ + component.name + ] = self.cuda_function_manager.get_function( + "Cuda" + component.name + "Step" + ) + + # Compute reward + self.cuda_function_manager.initialize_functions(["CudaComputeReward"]) + self.env.cuda_compute_reward = self.cuda_function_manager.get_function( + "CudaComputeReward" + ) + + # Add wrapper attributes for use within env + self.env.cuda_data_manager = self.cuda_data_manager + self.env.cuda_function_manager = self.cuda_function_manager + + # Register the env resetter + self.env_resetter = CUDAEnvironmentReset( + function_manager=self.cuda_function_manager + ) + + # Add to self.env.world for use in components + self.env.world.cuda_data_manager = self.cuda_data_manager + self.env.world.cuda_function_manager = self.cuda_function_manager + + def reset_all_envs(self): + """ + Reset the state of the environment to initialize a new episode. + if self.reset_on_host is True: + calls the CPU env to prepare and return the initial state + if self.use_cuda is True: + if self.reset_on_host is True: + expands initial state to parallel example_envs and push to GPU once + sets self.reset_on_host = False + else: + calls device hard reset managed by the CUDAResetter + """ + self.env.world.timestep = 0 + + if self.reset_on_host: + # Produce observation + obs = self.obs_at_reset() + else: + assert self.use_cuda + + if self.use_cuda: # GPU version + if self.reset_on_host: + + # Helper function to repeat data across the env dimension + def repeat_across_env_dimension(array, num_envs): + return np.stack([array for _ in range(num_envs)], axis=0) + + # Copy host data and tensors to device + # Note: this happens only once after the first reset on the host + + scenario_and_components = [self.env] + self.env.components + + for item in scenario_and_components: + # Add env dimension to data + # if "save_copy_and_apply_at_reset" is True + data_dictionary = item.get_data_dictionary() + tensor_dictionary = item.get_tensor_dictionary() + for key in data_dictionary: + if data_dictionary[key]["attributes"][ + "save_copy_and_apply_at_reset" + ]: + data_dictionary[key]["data"] = repeat_across_env_dimension( + data_dictionary[key]["data"], self.n_envs + ) + + for key in tensor_dictionary: + if tensor_dictionary[key]["attributes"][ + "save_copy_and_apply_at_reset" + ]: + tensor_dictionary[key][ + "data" + ] = repeat_across_env_dimension( + tensor_dictionary[key]["data"], self.n_envs + ) + + self.cuda_data_manager.push_data_to_device(data_dictionary) + + self.cuda_data_manager.push_data_to_device( + tensor_dictionary, torch_accessible=True + ) + + # All subsequent resets happen on the GPU + self.reset_on_host = False + + # Return the obs + return obs + # Returns an empty dictionary for all subsequent resets on the GPU + # as arrays are modified in place + self.env_resetter.reset_when_done( + self.cuda_data_manager, mode="force_reset" + ) + return {} + return obs # CPU version + + def reset_only_done_envs(self): + """ + This function only works for GPU example_envs. + It will check all the running example_envs, + and only resets those example_envs that are observing done flag is True + """ + assert self.use_cuda and not self.reset_on_host, ( + "reset_only_done_envs() only works " + "for self.use_cuda = True and self.reset_on_host = False" + ) + + self.env_resetter.reset_when_done(self.cuda_data_manager, mode="if_done") + return {} + + def step_all_envs(self, actions=None): + """ + Step through all the environments' components and scenario + """ + if self.use_cuda: + # Step through each component + for component in self.env.components: + component.component_step() + + # Scenario step + self.env.scenario_step() + + # Compute rewards + self.env.generate_rewards() + + result = None # Do not return anything + else: + assert actions is not None, "Please provide actions to step with." + obs, rew, done, info = self.env.step(actions) + obs = self._reformat_obs(obs) + rew = self._reformat_rew(rew) + result = obs, rew, done, info + return result + + def obs_at_reset(self): + """ + Calls the (Python) env to reset and return the initial state + """ + obs = self.env.reset() + obs = self._reformat_obs(obs) + return obs + + def _reformat_obs(self, obs): + if "a" in obs: + # This means the env uses collated obs. + # Set each individual agent as obs keys for processing with WarpDrive. + for agent_id in range(self.env.n_agents): + obs[str(agent_id)] = {} + for key in obs["a"].keys(): + obs[str(agent_id)][key] = obs["a"][key][..., agent_id] + del obs["a"] # remove the key "a" + return obs + + def _reformat_rew(self, rew): + if "a" in rew: + # This means the env uses collated rew. + # Set each individual agent as rew keys for processing with WarpDrive. + assert isinstance(rew, dict) + for agent_id in range(self.env.n_agents): + rew[str(agent_id)] = rew["a"][agent_id] + del rew["a"] # remove the key "a" + return rew + + def reset(self): + """ + Alias for reset_all_envs() when CPU is used (conforms to gym-style) + """ + return self.reset_all_envs() + + def step(self, actions=None): + """ + Alias for step_all_envs() when CPU is used (conforms to gym-style) + """ + return self.step_all_envs(actions) diff --git a/ai_economist/foundation/scenarios/__init__.py b/ai_economist/foundation/scenarios/__init__.py new file mode 100644 index 0000000..f793d8d --- /dev/null +++ b/ai_economist/foundation/scenarios/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +from ai_economist.foundation.base.base_env import scenario_registry + +from .covid19 import covid19_env +from .one_step_economy import one_step_economy +from .simple_wood_and_stone import dynamic_layout, layout_from_file + +# Import files that add Scenario class(es) to scenario_registry +# ------------------------------------------------------------- diff --git a/ai_economist/foundation/scenarios/covid19/__init__.py b/ai_economist/foundation/scenarios/covid19/__init__.py new file mode 100644 index 0000000..fc6cee4 --- /dev/null +++ b/ai_economist/foundation/scenarios/covid19/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause diff --git a/ai_economist/foundation/scenarios/covid19/covid19_build.cu b/ai_economist/foundation/scenarios/covid19/covid19_build.cu new file mode 100644 index 0000000..e5d5d48 --- /dev/null +++ b/ai_economist/foundation/scenarios/covid19/covid19_build.cu @@ -0,0 +1,13 @@ +// Copyright (c) 2021, salesforce.com, inc. +// All rights reserved. +// SPDX-License-Identifier: BSD-3-Clause +// For full license text, see the LICENSE file in the repo root +// or https://opensource.org/licenses/BSD-3-Clause + +#ifndef CUDA_INCLUDES_COVID19_CONST_H_ +#define CUDA_INCLUDES_COVID19_CONST_H_ + +#include "../../components/covid19_components_step.cu" +#include "covid19_env_step.cu" + +#endif // CUDA_INCLUDES_COVID19_CONST_H_ diff --git a/ai_economist/foundation/scenarios/covid19/covid19_env.py b/ai_economist/foundation/scenarios/covid19/covid19_env.py new file mode 100644 index 0000000..c6c385a --- /dev/null +++ b/ai_economist/foundation/scenarios/covid19/covid19_env.py @@ -0,0 +1,1687 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import json +import os +from datetime import datetime, timedelta + +import GPUtil +import numpy as np + +from ai_economist.foundation.base.base_env import BaseEnvironment, scenario_registry +from ai_economist.foundation.utils import verify_activation_code + +try: + num_gpus_available = len(GPUtil.getAvailable()) + print(f"Inside covid19_env.py: {num_gpus_available} GPUs are available.") + if num_gpus_available == 0: + print("No GPUs found! Running the simulation on a CPU.") + else: + from warp_drive.utils.constants import Constants + from warp_drive.utils.data_feed import DataFeed + + _OBSERVATIONS = Constants.OBSERVATIONS + _ACTIONS = Constants.ACTIONS + _REWARDS = Constants.REWARDS +except ModuleNotFoundError: + print( + "Warning: The 'WarpDrive' package is not found and cannot be used! " + "If you wish to use WarpDrive, please run " + "'pip install rl-warp-drive' first." + ) +except ValueError: + print("No GPUs found! Running the simulation on a CPU.") + + +@scenario_registry.add +class CovidAndEconomyEnvironment(BaseEnvironment): + """ + A simulation to model health and economy dynamics amidst the COVID-19 pandemic. + The environment comprising 51 agents (each agent corresponding to a US state and + Washington D.C.) and the Federal Government (planner). The state agents decide the + stringency level of the policy response to the pandemic, while the federal + government provides subsidies to eligible individuals. + + This simulation makes modeling assumptions. For details, see the technical paper: + https://arxiv.org/abs/2108.02904 + + Args: + use_real_world_data (bool): Replay what happened in the real world. + Real-world data comprises SIR (susceptible/infected/recovered), + unemployment, government policy, and vaccination numbers. + This setting also sets use_real_world_policies=True. + use_real_world_policies (bool): Run the environment with real-world policies + (stringency levels and subsidies). With this setting and + use_real_world_data=False, SIR and economy dynamics are still + driven by fitted models. + path_to_data_and_fitted_params (dirpath): Full path to the directory containing + the data, fitted parameters and model constants. This defaults to + "ai_economist/datasets/covid19_datasets/data_and_fitted_params". + For details on obtaining these parameters, please see the notebook + "ai-economist-foundation/ai_economist/datasets/covid19_datasets/ + gather_real_world_data_and_fit_parameters.ipynb". + start_date (string): Date (YYYY-MM-DD) to start the simulation. + pop_between_age_18_65 (float): Fraction of the population between ages 18-65. + This is the subset of the population whose employment/unemployment affects + economic productivity. + Range: 0 <= pop_between_age_18_65 <= 1. + infection_too_sick_to_work_rate (float): Fraction of people infected with + COVID-19. Infected people don't work. + Range: 0 <= infection_too_sick_to_work_rate <= 1 + risk_free_interest_rate (float): Percentage of interest paid by the federal + government to borrow money from the federal reserve for COVID-19 relief + (direct payments). Higher interest rates mean that direct payments + have a larger cost on the federal government's economic index. + Range: 0 <= risk_free_interest_rate + economic_reward_crra_eta (float): CRRA eta parameter for modeling the economic + reward non-linearity. + A useful reference: https://en.wikipedia.org/wiki/Isoelastic_utility + Range: 0 <= economic_reward_crra_eta + health_priority_scaling_agents (float): A factor indicating how much more the + states prioritize health (roughly speaking, loss of lives due to + opening up more) over the economy (roughly speaking, a loss in GDP + due to shutting down resulting in more unemployment) compared to the + real-world. + For example, a value of 1 corresponds to the real-world, while + a value of 2 means that states cared twice as much about public health + (preventing deaths), while a value of 0.5 means that states cared twice + as much about the economy (preventing GDP drops). + Range: 0 <= health_priority_scaling_agents + health_priority_scaling_planner (float): same as above, + but for the federal government. + Range: 0 <= health_priority_scaling_planner + """ + + def __init__( + self, + *base_env_args, + use_real_world_data=False, + use_real_world_policies=False, + path_to_data_and_fitted_params="", + start_date="2020-03-22", + pop_between_age_18_65=0.6, + infection_too_sick_to_work_rate=0.1, + risk_free_interest_rate=0.03, + economic_reward_crra_eta=2, + health_priority_scaling_agents=1, + health_priority_scaling_planner=1, + reward_normalization_factor=1, + **base_env_kwargs, + ): + verify_activation_code() + + # Used for datatype checks + self.np_float_dtype = np.float32 + self.np_int_dtype = np.int32 + + # Flag to use real-world data or the fitted models instead + self.use_real_world_data = use_real_world_data + # Flag to use real-world policies (actions) or the supplied actions instead + self.use_real_world_policies = use_real_world_policies + + # If we use real-world data, we also want to use the real-world policies + if self.use_real_world_data: + print( + "Using real-world data to initialize as well as to " + "step through the env." + ) + # Note: under this setting, the real_world policies are also used. + assert self.use_real_world_policies, ( + "Since the env. config. 'use_real_world_data' is True, please also " + "set 'use_real_world_policies' to True." + ) + else: + print( + "Using the real-world data to only initialize the env, " + "and using the fitted models to step through the env." + ) + + # Load real-world date + if path_to_data_and_fitted_params == "": + current_dir = os.path.dirname(__file__) + self.path_to_data_and_fitted_params = os.path.join( + current_dir, "../../../datasets/covid19_datasets/data_and_fitted_params" + ) + else: + self.path_to_data_and_fitted_params = path_to_data_and_fitted_params + + print( + "Loading real-world data from {}".format( + self.path_to_data_and_fitted_params + ) + ) + real_world_data_npz = np.load( + os.path.join(self.path_to_data_and_fitted_params, "real_world_data.npz") + ) + self._real_world_data = {} + for key in list(real_world_data_npz): + self._real_world_data[key] = real_world_data_npz[key] + + # Load fitted parameters + print( + "Loading fit parameters from {}".format(self.path_to_data_and_fitted_params) + ) + self.load_model_constants(self.path_to_data_and_fitted_params) + self.load_fitted_params(self.path_to_data_and_fitted_params) + + try: + self.start_date = datetime.strptime(start_date, self.date_format) + except ValueError: + print(f"Incorrect data format, should be {self.date_format}") + + # Start date should be beyond the date for which data is available + assert self.start_date >= self.policy_start_date + + # Compute a start date index based on policy start date + self.start_date_index = (self.start_date - self.policy_start_date).days + assert 0 <= self.start_date_index < len(self._real_world_data["policy"]) + + # For date logging (This will be overwritten in additional_reset_steps; + # see below) + self.current_date = None + + # When using real-world policy, limit the episode length + # to the length of the available policy. + if self.use_real_world_policies: + real_world_policy_length = ( + len(self._real_world_data["policy"]) - self.start_date_index + ) + print("Using real-world policies, ignoring external action inputs.") + assert base_env_kwargs["episode_length"] <= real_world_policy_length, ( + f"The real-world policies are only available for " + f"{real_world_policy_length} timesteps; so the 'episode_length' " + f"in the environment configuration can only be at most " + f"{real_world_policy_length}" + ) + else: + print("Using external action inputs.") + + # US states and populations + self.num_us_states = len(self.us_state_population) + + assert ( + base_env_kwargs["n_agents"] == self.num_us_states + ), "n_agents should be set to the number of US states, i.e., {}.".format( + self.num_us_states + ) + # Note: For a faster environment step time, we collate all the individual agents + # into a single agent index "a" and we flatten the component action masks too. + assert base_env_kwargs[ + "collate_agent_step_and_reset_data" + ], "The env. config 'collate_agent_step_and_reset_data' should be set to True." + super().__init__(*base_env_args, **base_env_kwargs) + + # Add attributes to self.world for use in components + self.world.us_state_population = self.us_state_population + self.world.us_population = self.us_population + self.world.start_date = self.start_date + self.world.n_stringency_levels = self.num_stringency_levels + self.world.use_real_world_policies = self.use_real_world_policies + if self.use_real_world_policies: + # Agent open/close stringency levels + self.world.real_world_stringency_policy = self._real_world_data["policy"][ + self.start_date_index : + ] + # Planner subsidy levels + self.world.real_world_subsidy = self._real_world_data["subsidy"][ + self.start_date_index : + ] + + # Policy --> Unemployment + # For accurately modeling the state-wise unemployment, we convolve + # the current stringency policy with a family of exponential filters + # with separate means (lambdas). + # This code sets up things we will use in `unemployment_step()`, + # which includes a detailed breakdown of how the unemployment model is + # implemented. + self.stringency_level_history = None + # Each filter captures a temporally extended response to a stringency change. + self.num_filters = len(self.conv_lambdas) + self.f_ts = np.tile( + np.flip(np.arange(self.filter_len), (0,))[None, None], + (1, self.num_filters, 1), + ).astype(self.np_float_dtype) + self.unemp_conv_filters = np.exp(-self.f_ts / self.conv_lambdas[None, :, None]) + # Each state weights these filters differently. + self.repeated_conv_weights = np.repeat( + self.grouped_convolutional_filter_weights.reshape( + self.num_us_states, self.num_filters + )[:, :, np.newaxis], + self.filter_len, + axis=-1, + ) + + # For manually modulating SIR/Unemployment parameters + self._beta_intercepts_modulation = 1 + self._beta_slopes_modulation = 1 + self._unemployment_modulation = 1 + + # Economy-related + # Interest rate for borrowing money from the federal reserve + self.risk_free_interest_rate = self.np_float_dtype(risk_free_interest_rate) + + # Compute each worker's daily productivity when at work (to match 2019 GDP) + # We assume the open/close stringency policy level was always at it's lowest + # value (i.e., 1) before the pandemic started. + num_unemployed_at_stringency_level_1 = self.unemployment_step( + np.ones(self.num_us_states) + ) + workforce = ( + self.us_population * pop_between_age_18_65 + - np.sum(num_unemployed_at_stringency_level_1) + ).astype(self.np_int_dtype) + workers_per_capita = (workforce / self.us_population).astype( + self.np_float_dtype + ) + gdp_per_worker = (self.gdp_per_capita / workers_per_capita).astype( + self.np_float_dtype + ) + self.num_days_in_an_year = 365 + self.daily_production_per_worker = ( + gdp_per_worker / self.num_days_in_an_year + ).astype(self.np_float_dtype) + + self.infection_too_sick_to_work_rate = self.np_float_dtype( + infection_too_sick_to_work_rate + ) + assert 0 <= self.infection_too_sick_to_work_rate <= 1 + + self.pop_between_age_18_65 = self.np_float_dtype(pop_between_age_18_65) + assert 0 <= self.pop_between_age_18_65 <= 1 + + # Compute max possible productivity values (used for agent reward normalization) + max_productivity_t = self.economy_step( + self.us_state_population, + np.zeros((self.num_us_states), dtype=self.np_int_dtype), + np.zeros((self.num_us_states), dtype=self.np_int_dtype), + num_unemployed_at_stringency_level_1, + infection_too_sick_to_work_rate=self.infection_too_sick_to_work_rate, + population_between_age_18_65=self.pop_between_age_18_65, + ) + self.maximum_productivity_t = max_productivity_t + + # Economic reward non-linearity + self.economic_reward_crra_eta = self.np_float_dtype(economic_reward_crra_eta) + assert 0.0 <= self.economic_reward_crra_eta < 20.0 + + # Health indices are normalized by maximum annual GDP + self.agents_health_norm = self.maximum_productivity_t * self.num_days_in_an_year + self.planner_health_norm = np.sum(self.agents_health_norm) + + # Economic indices are normalized by maximum annual GDP + self.agents_economic_norm = ( + self.maximum_productivity_t * self.num_days_in_an_year + ) + self.planner_economic_norm = np.sum(self.agents_economic_norm) + + def scale_health_over_economic_index(health_priority_scaling, alphas): + """ + Given starting alpha(s), compute new alphas so that the + resulting alpha:1-alpha ratio is scaled by health_weightage + """ + z = alphas / (1 - alphas) # alphas = z / (1 + z) + scaled_z = health_priority_scaling * z + new_alphas = scaled_z / (1 + scaled_z) + return new_alphas + + # Agents' health and economic index weightages + # fmt: off + self.weightage_on_marginal_agent_health_index = \ + scale_health_over_economic_index( + health_priority_scaling_agents, + self.inferred_weightage_on_agent_health_index, + ) + # fmt: on + assert ( + (self.weightage_on_marginal_agent_health_index >= 0) + & (self.weightage_on_marginal_agent_health_index <= 1) + ).all() + self.weightage_on_marginal_agent_economic_index = ( + 1 - self.weightage_on_marginal_agent_health_index + ) + + # Planner's health and economic index weightages + # fmt: off + self.weightage_on_marginal_planner_health_index = \ + scale_health_over_economic_index( + health_priority_scaling_planner, + self.inferred_weightage_on_planner_health_index, + ) + # fmt: on + assert 0 <= self.weightage_on_marginal_planner_health_index <= 1 + self.weightage_on_marginal_planner_economic_index = ( + 1 - self.weightage_on_marginal_planner_health_index + ) + + # Normalization factor for the reward (often useful for RL training) + self.reward_normalization_factor = reward_normalization_factor + + # CUDA-related attributes (for GPU simulations) + # Note: these will be set / overwritten via the env_wrapper + # use_cuda will be set to True (by the env_wrapper), if needed + # to be simulated on the GPU + self.use_cuda = False + self.cuda_data_manager = None + self.cuda_function_manager = None + self.cuda_step = lambda *args, **kwargs: None + self.cuda_compute_reward = lambda *args, **kwargs: None + + # Adding use_cuda to self.world for use in components + self.world.use_cuda = self.use_cuda + self.world.cuda_data_manager = self.cuda_data_manager + self.world.cuda_function_manager = self.cuda_function_manager + + name = "CovidAndEconomySimulation" + agent_subclasses = ["BasicMobileAgent", "BasicPlanner"] + + required_entities = [] + + def reset_starting_layout(self): + pass + + def reset_agent_states(self): + self.world.clear_agent_locs() + + def get_data_dictionary(self): + """ + Create a dictionary of data to push to the GPU (device). + """ + data_dict = DataFeed() + # Global States + data_dict.add_data( + name="susceptible", + data=self.world.global_state["Susceptible"], + save_copy_and_apply_at_reset=True, + ) + data_dict.add_data( + name="infected", + data=self.world.global_state["Infected"], + save_copy_and_apply_at_reset=True, + ) + data_dict.add_data( + name="recovered", + data=self.world.global_state["Recovered"], + save_copy_and_apply_at_reset=True, + ) + data_dict.add_data( + name="deaths", + data=self.world.global_state["Deaths"], + save_copy_and_apply_at_reset=True, + ) + data_dict.add_data( + name="unemployed", + data=self.world.global_state["Unemployed"], + save_copy_and_apply_at_reset=True, + ) + data_dict.add_data( + name="vaccinated", + data=self.world.global_state["Vaccinated"], + save_copy_and_apply_at_reset=True, + ) + # Actions + data_dict.add_data( + name="stringency_level", + data=self.world.global_state["Stringency Level"].astype(self.np_int_dtype), + save_copy_and_apply_at_reset=True, + ) + data_dict.add_data( + name="subsidy_level", + data=self.world.global_state["Subsidy Level"].astype(self.np_int_dtype), + save_copy_and_apply_at_reset=True, + ) + # Economy-related + data_dict.add_data( + name="subsidy", + data=self.world.global_state["Subsidy"], + save_copy_and_apply_at_reset=True, + ) + data_dict.add_data( + name="postsubsidy_productivity", + data=self.world.global_state["Postsubsidy Productivity"], + save_copy_and_apply_at_reset=True, + ) + data_dict.add_data( + name="productivity", + data=np.zeros_like( + self.world.global_state["Susceptible"], dtype=self.np_float_dtype + ), + save_copy_and_apply_at_reset=True, + ) + data_dict.add_data( + name="incapacitated", + data=np.zeros((self.num_us_states), dtype=self.np_float_dtype), + save_copy_and_apply_at_reset=True, + ) + data_dict.add_data( + name="cant_work", + data=np.zeros((self.num_us_states), dtype=self.np_float_dtype), + save_copy_and_apply_at_reset=True, + ) + data_dict.add_data( + name="num_people_that_can_work", + data=np.zeros((self.num_us_states), dtype=self.np_float_dtype), + save_copy_and_apply_at_reset=True, + ) + data_dict.add_data( + name="us_state_population", + data=self.us_state_population, + ) + data_dict.add_data( + name="infection_too_sick_to_work_rate", + data=self.infection_too_sick_to_work_rate, + ) + data_dict.add_data( + name="population_between_age_18_65", + data=self.pop_between_age_18_65, + ) + data_dict.add_data( + name="daily_production_per_worker", + data=self.daily_production_per_worker, + ) + data_dict.add_data( + name="maximum_productivity", + data=self.maximum_productivity_t, + ) + # SIR-related + data_dict.add_data( + name="real_world_stringency_policy_history", + data=( + self._real_world_data["policy"][ + self.start_date_index - self.beta_delay + 1 : self.start_date_index, + :, + ] + ).astype(self.np_int_dtype), + ) + data_dict.add_data( + name="beta_delay", + data=self.beta_delay, + ) + data_dict.add_data( + name="beta_slopes", + data=self.beta_slopes, + ) + data_dict.add_data( + name="beta_intercepts", + data=self.beta_intercepts, + ) + data_dict.add_data( + name="beta", + data=np.zeros((self.num_us_states), dtype=self.np_float_dtype), + save_copy_and_apply_at_reset=True, + ) + data_dict.add_data( + name="gamma", + data=self.gamma, + ) + data_dict.add_data( + name="death_rate", + data=self.death_rate, + ) + # Unemployment fit parameters + data_dict.add_data( + name="filter_len", + data=self.filter_len, + ) + data_dict.add_data( + name="num_filters", + data=self.num_filters, + ) + data_dict.add_data( + name="delta_stringency_level", + data=( + self.stringency_level_history[1:] - self.stringency_level_history[:-1] + ).astype(self.np_int_dtype), + save_copy_and_apply_at_reset=True, + ) + data_dict.add_data( + name="grouped_convolutional_filter_weights", + data=self.grouped_convolutional_filter_weights, + ) + data_dict.add_data( + name="unemp_conv_filters", + data=self.unemp_conv_filters, + ) + data_dict.add_data( + name="unemployment_bias", + data=self.unemployment_bias, + ) + data_dict.add_data( + name="signal", + data=np.zeros( + (self.n_agents, self.num_filters, self.filter_len), + dtype=self.np_float_dtype, + ), + save_copy_and_apply_at_reset=True, + ) + # Reward-related + data_dict.add_data( + name="min_marginal_agent_health_index", + data=self.min_marginal_agent_health_index, + ) + data_dict.add_data( + name="max_marginal_agent_health_index", + data=self.max_marginal_agent_health_index, + ) + data_dict.add_data( + name="min_marginal_agent_economic_index", + data=self.min_marginal_agent_economic_index, + ) + data_dict.add_data( + name="max_marginal_agent_economic_index", + data=self.max_marginal_agent_economic_index, + ) + data_dict.add_data( + name="min_marginal_planner_health_index", + data=self.min_marginal_planner_health_index, + ) + data_dict.add_data( + name="max_marginal_planner_health_index", + data=self.max_marginal_planner_health_index, + ) + data_dict.add_data( + name="min_marginal_planner_economic_index", + data=self.min_marginal_planner_economic_index, + ) + data_dict.add_data( + name="max_marginal_planner_economic_index", + data=self.max_marginal_planner_economic_index, + ) + data_dict.add_data( + name="weightage_on_marginal_agent_health_index", + data=self.weightage_on_marginal_agent_health_index, + ) + data_dict.add_data( + name="weightage_on_marginal_agent_economic_index", + data=self.weightage_on_marginal_agent_economic_index, + ) + data_dict.add_data( + name="weightage_on_marginal_planner_health_index", + data=self.weightage_on_marginal_planner_health_index, + ) + data_dict.add_data( + name="weightage_on_marginal_planner_economic_index", + data=self.weightage_on_marginal_planner_economic_index, + ) + data_dict.add_data( + name="value_of_life", + data=self.value_of_life, + ) + data_dict.add_data( + name="economic_reward_crra_eta", + data=self.economic_reward_crra_eta, + ) + data_dict.add_data( + name="num_days_in_an_year", + data=self.num_days_in_an_year, + ) + data_dict.add_data( + name="risk_free_interest_rate", + data=self.risk_free_interest_rate, + ) + data_dict.add_data( + name="agents_health_norm", + data=self.agents_health_norm, + ) + data_dict.add_data( + name="agents_economic_norm", + data=self.agents_economic_norm, + ) + data_dict.add_data( + name="planner_health_norm", + data=self.planner_health_norm, + ) + data_dict.add_data( + name="planner_economic_norm", + data=self.planner_economic_norm, + ) + + return data_dict + + def get_tensor_dictionary(self): + """ + Create a dictionary of (Pytorch-accessible) data to push to the GPU (device). + """ + tensor_dict = DataFeed() + return tensor_dict + + def scenario_step(self): + """ + Update the state of the USA based on the Covid-19 and Economy dynamics. + This internally implements three steps + - sir_step() - updates the susceptible, infected, recovered, deaths + and vaccination numbers based on the SIR equations + - unemployment_step() - uses the unemployment model to updates the unemployment + based on the stringency levels + - economy_step - computes the current producitivity numbers for the agents + """ + if self.use_cuda: + self.cuda_step( + self.cuda_data_manager.device_data("susceptible"), + self.cuda_data_manager.device_data("infected"), + self.cuda_data_manager.device_data("recovered"), + self.cuda_data_manager.device_data("deaths"), + self.cuda_data_manager.device_data("vaccinated"), + self.cuda_data_manager.device_data("unemployed"), + self.cuda_data_manager.device_data("subsidy"), + self.cuda_data_manager.device_data("productivity"), + self.cuda_data_manager.device_data("stringency_level"), + self.cuda_data_manager.device_data("num_stringency_levels"), + self.cuda_data_manager.device_data("postsubsidy_productivity"), + self.cuda_data_manager.device_data("num_vaccines_available_t"), + self.cuda_data_manager.device_data( + "real_world_stringency_policy_history" + ), + self.cuda_data_manager.device_data("beta_delay"), + self.cuda_data_manager.device_data("beta_slopes"), + self.cuda_data_manager.device_data("beta_intercepts"), + self.cuda_data_manager.device_data("beta"), + self.cuda_data_manager.device_data("gamma"), + self.cuda_data_manager.device_data("death_rate"), + self.cuda_data_manager.device_data("incapacitated"), + self.cuda_data_manager.device_data("cant_work"), + self.cuda_data_manager.device_data("num_people_that_can_work"), + self.cuda_data_manager.device_data("us_state_population"), + self.cuda_data_manager.device_data("infection_too_sick_to_work_rate"), + self.cuda_data_manager.device_data("population_between_age_18_65"), + self.cuda_data_manager.device_data("filter_len"), + self.cuda_data_manager.device_data("num_filters"), + self.cuda_data_manager.device_data("delta_stringency_level"), + self.cuda_data_manager.device_data( + "grouped_convolutional_filter_weights" + ), + self.cuda_data_manager.device_data("unemp_conv_filters"), + self.cuda_data_manager.device_data("unemployment_bias"), + self.cuda_data_manager.device_data("signal"), + self.cuda_data_manager.device_data("daily_production_per_worker"), + self.cuda_data_manager.device_data("maximum_productivity"), + self.cuda_data_manager.device_data( + f"{_OBSERVATIONS}_a_world-agent_state" + ), + self.cuda_data_manager.device_data( + f"{_OBSERVATIONS}_a_world-agent_postsubsidy_productivity" + ), + self.cuda_data_manager.device_data( + f"{_OBSERVATIONS}_a_world-lagged_stringency_level" + ), + self.cuda_data_manager.device_data(f"{_OBSERVATIONS}_a_time"), + self.cuda_data_manager.device_data( + f"{_OBSERVATIONS}_p_world-agent_state" + ), + self.cuda_data_manager.device_data( + f"{_OBSERVATIONS}_p_world-agent_postsubsidy_productivity" + ), + self.cuda_data_manager.device_data( + f"{_OBSERVATIONS}_p_world-lagged_stringency_level" + ), + self.cuda_data_manager.device_data(f"{_OBSERVATIONS}_p_time"), + self.cuda_data_manager.device_data("_timestep_"), + self.cuda_data_manager.meta_info("n_agents"), + self.cuda_data_manager.meta_info("episode_length"), + block=self.world.cuda_function_manager.block, + grid=self.world.cuda_function_manager.grid, + ) + else: + prev_t = self.world.timestep - 1 + curr_t = self.world.timestep + + self.current_date += timedelta(days=1) + + # SIR + # --- + if self.use_real_world_data: + _S_t = np.maximum( + self._real_world_data["susceptible"][ + curr_t + self.start_date_index + ], + 0, + ) + _I_t = np.maximum( + self._real_world_data["infected"][curr_t + self.start_date_index], + 0, + ) + _R_t = np.maximum( + self._real_world_data["recovered"][curr_t + self.start_date_index], + 0, + ) + _V_t = np.maximum( + self._real_world_data["vaccinated"][curr_t + self.start_date_index], + 0, + ) + _D_t = np.maximum( + self._real_world_data["deaths"][curr_t + self.start_date_index], + 0, + ) + + else: # Use simulation logic + if curr_t - self.beta_delay < 0: + if self.start_date_index + curr_t - self.beta_delay < 0: + stringency_level_tmk = np.ones(self.num_us_states) + else: + stringency_level_tmk = self._real_world_data["policy"][ + self.start_date_index + curr_t - self.beta_delay, : + ] + else: + stringency_level_tmk = self.world.global_state["Stringency Level"][ + curr_t - self.beta_delay + ] + stringency_level_tmk = stringency_level_tmk.astype(self.np_int_dtype) + + _S_tm1 = self.world.global_state["Susceptible"][prev_t] + _I_tm1 = self.world.global_state["Infected"][prev_t] + _R_tm1 = self.world.global_state["Recovered"][prev_t] + _V_tm1 = self.world.global_state["Vaccinated"][prev_t] + + # Vaccination + # ----------- + num_vaccines_available_t = np.zeros( + self.n_agents, dtype=self.np_int_dtype + ) + for aidx, agent in enumerate(self.world.agents): + # "Load" the vaccines in the inventory into this vector. + num_vaccines_available_t[aidx] = agent.state["Vaccines Available"] + # Agents always use whatever vaccines they can, so this becomes 0: + agent.state["Total Vaccinated"] += agent.state["Vaccines Available"] + agent.state["Vaccines Available"] = 0 + + # SIR step + # -------- + _dS, _dI, _dR, _dV = self.sir_step( + _S_tm1, + _I_tm1, + stringency_level_tmk, + num_vaccines_available_t, + ) + _S_t = np.maximum(_S_tm1 + _dS, 0) + _I_t = np.maximum(_I_tm1 + _dI, 0) + _R_t = np.maximum(_R_tm1 + _dR, 0) + _V_t = np.maximum(_V_tm1 + _dV, 0) + + num_recovered_but_not_vaccinated_t = _R_t - _V_t + _D_t = self.death_rate * num_recovered_but_not_vaccinated_t + + # Update global state + # ------------------- + self.world.global_state["Susceptible"][curr_t] = _S_t + self.world.global_state["Infected"][curr_t] = _I_t + self.world.global_state["Recovered"][curr_t] = _R_t + self.world.global_state["Deaths"][curr_t] = _D_t + self.world.global_state["Vaccinated"][curr_t] = _V_t + + # Unemployment + # ------------ + if self.use_real_world_data: + num_unemployed_t = self._real_world_data["unemployed"][ + self.start_date_index + curr_t + ] + else: + num_unemployed_t = self.unemployment_step( + current_stringency_level=self.world.global_state[ + "Stringency Level" + ][curr_t] + ) + + self.world.global_state["Unemployed"][curr_t] = num_unemployed_t + + # Productivity + # ------------ + productivity_t = self.economy_step( + self.us_state_population, + infected=_I_t, + deaths=_D_t, + unemployed=num_unemployed_t, + infection_too_sick_to_work_rate=self.infection_too_sick_to_work_rate, + population_between_age_18_65=self.pop_between_age_18_65, + ) + + # Subsidies + # --------- + # Add federal government subsidy to productivity + daily_statewise_subsidy_t = self.world.global_state["Subsidy"][curr_t] + postsubsidy_productivity_t = productivity_t + daily_statewise_subsidy_t + self.world.global_state["Postsubsidy Productivity"][ + curr_t + ] = postsubsidy_productivity_t + + # Update agent state + # ------------------ + current_date_string = datetime.strftime( + self.current_date, format=self.date_format + ) + for agent in self.world.agents: + agent.state["Total Susceptible"] = _S_t[agent.idx].astype( + self.np_int_dtype + ) + agent.state["New Infections"] = ( + _I_t[agent.idx] - agent.state["Total Infected"] + ).astype(self.np_int_dtype) + agent.state["Total Infected"] = _I_t[agent.idx].astype( + self.np_int_dtype + ) + agent.state["Total Recovered"] = _R_t[agent.idx].astype( + self.np_int_dtype + ) + agent.state["New Deaths"] = _D_t[agent.idx] - agent.state[ + "Total Deaths" + ].astype(self.np_int_dtype) + agent.state["Total Deaths"] = _D_t[agent.idx].astype(self.np_int_dtype) + agent.state["Total Vaccinated"] = _V_t[agent.idx].astype( + self.np_int_dtype + ) + + agent.state["Total Unemployed"] = num_unemployed_t[agent.idx].astype( + self.np_int_dtype + ) + agent.state["New Subsidy Received"] = daily_statewise_subsidy_t[ + agent.idx + ] + agent.state["Postsubsidy Productivity"] = postsubsidy_productivity_t[ + agent.idx + ] + agent.state["Date"] = current_date_string + + # Update planner state + # -------------------- + self.world.planner.state["Total Susceptible"] = np.sum(_S_t).astype( + self.np_int_dtype + ) + self.world.planner.state["New Infections"] = ( + np.sum(_I_t) - self.world.planner.state["Total Infected"] + ).astype(self.np_int_dtype) + self.world.planner.state["Total Infected"] = np.sum(_I_t).astype( + self.np_int_dtype + ) + self.world.planner.state["Total Recovered"] = np.sum(_R_t).astype( + self.np_int_dtype + ) + self.world.planner.state["New Deaths"] = ( + np.sum(_D_t) - self.world.planner.state["Total Deaths"] + ).astype(self.np_int_dtype) + self.world.planner.state["Total Deaths"] = np.sum(_D_t).astype( + self.np_int_dtype + ) + self.world.planner.state["Total Vaccinated"] = np.sum(_V_t).astype( + self.np_int_dtype + ) + self.world.planner.state["Total Unemployed"] = np.sum( + num_unemployed_t + ).astype(self.np_int_dtype) + self.world.planner.state["New Subsidy Provided"] = np.sum( + daily_statewise_subsidy_t + ) + self.world.planner.state["Postsubsidy Productivity"] = np.sum( + postsubsidy_productivity_t + ) + self.world.planner.state["Date"] = current_date_string + + def generate_observations(self): + """ + - Process agent-specific and planner-specific data into an observation. + - Observations contain only the relevant features for that actor. + :return: a dictionary of observations for each agent and planner + """ + redux_agent_global_state = None + for feature in [ + "Susceptible", + "Infected", + "Recovered", + "Deaths", + "Vaccinated", + "Unemployed", + ]: + if redux_agent_global_state is None: + redux_agent_global_state = self.world.global_state[feature][ + self.world.timestep + ] + else: + redux_agent_global_state = np.vstack( + ( + redux_agent_global_state, + self.world.global_state[feature][self.world.timestep], + ) + ) + normalized_redux_agent_state = ( + redux_agent_global_state / self.us_state_population[None] + ) + + # Productivity + postsubsidy_productivity_t = self.world.global_state[ + "Postsubsidy Productivity" + ][self.world.timestep] + normalized_postsubsidy_productivity_t = ( + postsubsidy_productivity_t / self.maximum_productivity_t + ) + + # Let agents know about the policy about to affect SIR infection-rate beta + t_beta = self.world.timestep - self.beta_delay + 1 + if t_beta < 0: + lagged_stringency_level = self._real_world_data["policy"][ + self.start_date_index + t_beta + ] + else: + lagged_stringency_level = self.world.global_state["Stringency Level"][ + t_beta + ] + + normalized_lagged_stringency_level = ( + lagged_stringency_level / self.num_stringency_levels + ) + + # To condition policy on agent id + agent_index = np.eye(self.n_agents, dtype=self.np_int_dtype) + + # Observation dict - Agents + # ------------------------- + obs_dict = dict() + obs_dict["a"] = { + "agent_index": agent_index, + "agent_state": normalized_redux_agent_state, + "agent_postsubsidy_productivity": normalized_postsubsidy_productivity_t, + "lagged_stringency_level": normalized_lagged_stringency_level, + } + + # Observation dict - Planner + # -------------------------- + obs_dict[self.world.planner.idx] = { + "agent_state": normalized_redux_agent_state, + "agent_postsubsidy_productivity": normalized_postsubsidy_productivity_t, + "lagged_stringency_level": normalized_lagged_stringency_level, + } + + return obs_dict + + def compute_reward(self): + """ + Compute the social welfare metrics for each agent and the planner. + :return: a dictionary of rewards for each agent in the simulation + """ + if self.use_cuda: + self.cuda_compute_reward( + self.cuda_data_manager.device_data(f"{_REWARDS}_a"), + self.cuda_data_manager.device_data(f"{_REWARDS}_p"), + self.cuda_data_manager.device_data("num_days_in_an_year"), + self.cuda_data_manager.device_data("value_of_life"), + self.cuda_data_manager.device_data("risk_free_interest_rate"), + self.cuda_data_manager.device_data("economic_reward_crra_eta"), + self.cuda_data_manager.device_data("min_marginal_agent_health_index"), + self.cuda_data_manager.device_data("max_marginal_agent_health_index"), + self.cuda_data_manager.device_data("min_marginal_agent_economic_index"), + self.cuda_data_manager.device_data("max_marginal_agent_economic_index"), + self.cuda_data_manager.device_data("min_marginal_planner_health_index"), + self.cuda_data_manager.device_data("max_marginal_planner_health_index"), + self.cuda_data_manager.device_data( + "min_marginal_planner_economic_index" + ), + self.cuda_data_manager.device_data( + "max_marginal_planner_economic_index" + ), + self.cuda_data_manager.device_data( + "weightage_on_marginal_agent_health_index" + ), + self.cuda_data_manager.device_data( + "weightage_on_marginal_agent_economic_index" + ), + self.cuda_data_manager.device_data( + "weightage_on_marginal_planner_health_index" + ), + self.cuda_data_manager.device_data( + "weightage_on_marginal_planner_economic_index" + ), + self.cuda_data_manager.device_data("agents_health_norm"), + self.cuda_data_manager.device_data("agents_economic_norm"), + self.cuda_data_manager.device_data("planner_health_norm"), + self.cuda_data_manager.device_data("planner_economic_norm"), + self.cuda_data_manager.device_data("deaths"), + self.cuda_data_manager.device_data("subsidy"), + self.cuda_data_manager.device_data("postsubsidy_productivity"), + self.cuda_data_manager.device_data("_done_"), + self.cuda_data_manager.device_data("_timestep_"), + self.cuda_data_manager.meta_info("n_agents"), + self.cuda_data_manager.meta_info("episode_length"), + block=self.world.cuda_function_manager.block, + grid=self.world.cuda_function_manager.grid, + ) + return {} # Return empty dict. Reward arrays are updated in-place + rew = {"a": 0, "p": 0} + + def crra_nonlinearity(x, eta): + # Reference: https://en.wikipedia.org/wiki/Isoelastic_utility + # To be applied to (marginal) economic indices + annual_x = self.num_days_in_an_year * x + annual_x_clipped = np.clip(annual_x, 0.1, 3) + annual_crra = 1 + (annual_x_clipped ** (1 - eta) - 1) / (1 - eta) + daily_crra = annual_crra / self.num_days_in_an_year + return daily_crra + + def min_max_normalization(x, min_x, max_x): + eps = 1e-10 + return (x - min_x) / (max_x - min_x + eps) + + def get_weighted_average( + health_index_weightage, + health_index, + economic_index_weightage, + economic_index, + ): + return ( + health_index_weightage * health_index + + economic_index_weightage * economic_index + ) / (health_index_weightage + economic_index_weightage) + + # Changes this last timestep: + marginal_deaths = ( + self.world.global_state["Deaths"][self.world.timestep] + - self.world.global_state["Deaths"][self.world.timestep - 1] + ) + + subsidy_t = self.world.global_state["Subsidy"][self.world.timestep] + postsubsidy_productivity_t = self.world.global_state[ + "Postsubsidy Productivity" + ][self.world.timestep] + + # Health index -- the cost equivalent (annual GDP) of covid deaths + # Note: casting deaths to float to prevent overflow issues + marginal_agent_health_index = ( + -marginal_deaths.astype(self.np_float_dtype) + * self.value_of_life + / self.agents_health_norm + ).astype(self.np_float_dtype) + + # Economic index -- fraction of annual GDP achieved + # Use a "crra" nonlinearity on the agent economic reward + marginal_agent_economic_index = crra_nonlinearity( + postsubsidy_productivity_t / self.agents_economic_norm, + self.economic_reward_crra_eta, + ).astype(self.np_float_dtype) + + # Min-max Normalization + marginal_agent_health_index = min_max_normalization( + marginal_agent_health_index, + self.min_marginal_agent_health_index, + self.max_marginal_agent_health_index, + ).astype(self.np_float_dtype) + marginal_agent_economic_index = min_max_normalization( + marginal_agent_economic_index, + self.min_marginal_agent_economic_index, + self.max_marginal_agent_economic_index, + ).astype(self.np_float_dtype) + + # Agent Rewards + # ------------- + agent_rewards = get_weighted_average( + self.weightage_on_marginal_agent_health_index, + marginal_agent_health_index, + self.weightage_on_marginal_agent_economic_index, + marginal_agent_economic_index, + ) + rew["a"] = agent_rewards / self.reward_normalization_factor + + # Update agent states + # ------------------- + for agent in self.world.agents: + agent.state["Health Index"] += marginal_agent_health_index[agent.idx] + agent.state["Economic Index"] += marginal_agent_economic_index[agent.idx] + + # National level + # -------------- + # Health index -- the cost equivalent (annual GDP) of covid deaths + # Note: casting deaths to float to prevent overflow issues + marginal_planner_health_index = ( + -np.sum(marginal_deaths).astype(self.np_float_dtype) + * self.value_of_life + / self.planner_health_norm + ) + + # Economic index -- fraction of annual GDP achieved (minus subsidy cost) + cost_of_subsidy_t = (1 + self.risk_free_interest_rate) * np.sum(subsidy_t) + # Use a "crra" nonlinearity on the planner economic reward + marginal_planner_economic_index = crra_nonlinearity( + (np.sum(postsubsidy_productivity_t) - cost_of_subsidy_t) + / self.planner_economic_norm, + self.economic_reward_crra_eta, + ) + + # Min-max Normalization + marginal_planner_health_index = min_max_normalization( + marginal_planner_health_index, + self.min_marginal_planner_health_index, + self.max_marginal_planner_health_index, + ) + marginal_planner_economic_index = min_max_normalization( + marginal_planner_economic_index, + self.min_marginal_planner_economic_index, + self.max_marginal_planner_economic_index, + ) + + # Update planner states + # ------------------- + self.world.planner.state["Health Index"] += marginal_planner_health_index + self.world.planner.state["Economic Index"] += marginal_planner_economic_index + + # Planner Reward + # -------------- + planner_rewards = get_weighted_average( + self.weightage_on_marginal_planner_health_index, + marginal_planner_health_index, + self.weightage_on_marginal_planner_economic_index, + marginal_planner_economic_index, + ) + rew[self.world.planner.idx] = planner_rewards / self.reward_normalization_factor + + return rew + + def additional_reset_steps(self): + assert self.world.timestep == 0 + + # Reset current date + self.current_date = self.start_date + + # SIR numbers at timestep 0 + susceptible_0 = self._real_world_data["susceptible"][self.start_date_index] + infected_0 = self._real_world_data["infected"][self.start_date_index] + newly_infected_0 = ( + infected_0 + - self._real_world_data["infected"][max(0, self.start_date_index - 1)] + ) + recovered_0 = self._real_world_data["recovered"][self.start_date_index] + deaths_0 = recovered_0 * self.death_rate + + # Unemployment and vaccinated numbers at timestep 0 + unemployed_0 = self._real_world_data["unemployed"][self.start_date_index] + vaccinated_0 = self._real_world_data["vaccinated"][self.start_date_index] + + # Create a global state dictionary to save episode data + self.world.global_state = {} + self.set_global_state("Susceptible", susceptible_0, t=self.world.timestep) + self.set_global_state("Infected", infected_0, t=self.world.timestep) + self.set_global_state("Recovered", recovered_0, t=self.world.timestep) + self.set_global_state("Deaths", deaths_0, t=self.world.timestep) + + self.set_global_state("Unemployed", unemployed_0, t=self.world.timestep) + self.set_global_state("Vaccinated", vaccinated_0, t=self.world.timestep) + + new_deaths_0 = ( + deaths_0 + - self._real_world_data["recovered"][max(0, self.start_date_index - 1)] + * self.death_rate + ) + + # Reset stringency level history. + # Pad with stringency levels of 1 corresponding to states being fully open + # (as was the case before the pandemic). + self.stringency_level_history = np.pad( + self._real_world_data["policy"][: self.start_date_index + 1], + [(self.filter_len, 0), (0, 0)], + constant_values=1, + )[-(self.filter_len + 1) :] + + # Set the stringency level based to the real-world policy + self.set_global_state( + "Stringency Level", + self._real_world_data["policy"][self.start_date_index], + t=self.world.timestep, + ) + + # All US states start with zero subsidy and zero Postsubsidy Productivity + self.set_global_state("Subsidy Level", dtype=self.np_float_dtype) + self.set_global_state("Subsidy", dtype=self.np_float_dtype) + self.set_global_state("Postsubsidy Productivity", dtype=self.np_float_dtype) + + # Set initial agent states + # ------------------------ + current_date_string = datetime.strftime( + self.current_date, format=self.date_format + ) + + for agent in self.world.agents: + agent.state["Total Susceptible"] = susceptible_0[agent.idx].astype( + self.np_int_dtype + ) + agent.state["New Infections"] = newly_infected_0[agent.idx].astype( + self.np_int_dtype + ) + agent.state["Total Infected"] = infected_0[agent.idx].astype( + self.np_int_dtype + ) + agent.state["Total Recovered"] = recovered_0[agent.idx].astype( + self.np_int_dtype + ) + agent.state["New Deaths"] = new_deaths_0[agent.idx].astype( + self.np_int_dtype + ) + agent.state["Total Deaths"] = deaths_0[agent.idx].astype(self.np_int_dtype) + agent.state["Health Index"] = np.array([0]).astype(self.np_float_dtype) + agent.state["Economic Index"] = np.array([0]).astype(self.np_float_dtype) + agent.state["Date"] = current_date_string + + # Planner state fields + self.world.planner.state["Total Susceptible"] = np.sum( + [agent.state["Total Susceptible"] for agent in self.world.agents] + ).astype(self.np_int_dtype) + self.world.planner.state["New Infections"] = np.sum( + [agent.state["New Infections"] for agent in self.world.agents] + ).astype(self.np_int_dtype) + self.world.planner.state["Total Infected"] = np.sum( + [agent.state["Total Infected"] for agent in self.world.agents] + ).astype(self.np_int_dtype) + self.world.planner.state["Total Recovered"] = np.sum( + [agent.state["Total Recovered"] for agent in self.world.agents] + ).astype(self.np_int_dtype) + self.world.planner.state["New Deaths"] = np.sum( + [agent.state["New Deaths"] for agent in self.world.agents] + ).astype(self.np_int_dtype) + self.world.planner.state["Total Deaths"] = np.sum( + [agent.state["Total Deaths"] for agent in self.world.agents] + ).astype(self.np_int_dtype) + self.world.planner.state["Total Vaccinated"] = np.sum(vaccinated_0).astype( + self.np_int_dtype + ) + self.world.planner.state["Health Index"] = np.array([0]).astype( + self.np_float_dtype + ) + self.world.planner.state["Economic Index"] = np.array([0]).astype( + self.np_float_dtype + ) + + self.world.planner.state["Date"] = current_date_string + + # Reset any manually set parameter modulations + self._beta_intercepts_modulation = 1 + self._beta_slopes_modulation = 1 + self._unemployment_modulation = 1 + + def set_global_state(self, key=None, value=None, t=None, dtype=None): + # Use floats by default for the SIR dynamics + if dtype is None: + dtype = self.np_float_dtype + assert key in [ + "Susceptible", + "Infected", + "Recovered", + "Deaths", + "Unemployed", + "Vaccinated", + "Stringency Level", + "Subsidy Level", + "Subsidy", + "Postsubsidy Productivity", + ] + # If no values are passed, set everything to zeros. + if key not in self.world.global_state: + self.world.global_state[key] = np.zeros( + (self.episode_length + 1, self.num_us_states), dtype=dtype + ) + + if t is not None and value is not None: + assert isinstance(value, np.ndarray) + assert value.shape[0] == self.world.global_state[key].shape[1] + + self.world.global_state[key][t] = value + else: + pass + + def set_parameter_modulations( + self, beta_intercept=None, beta_slope=None, unemployment=None + ): + """ + Apply parameter modulation, which will be in effect until the next env reset. + + Each modulation term scales the associated set of model parameters by the + input value. This method is useful for performing a sensitivity analysis. + + In effect, the transmission rate (beta) will be calculated as: + beta = (m_s * beta_slope)*lagged_stringency + (m_i * beta_intercept) + + The unemployment rate (u) will be calculated as: + u = SOFTPLUS( m_u * SUM(u_filter_weight * u_filter_response) ) + u_0 + + Args: + beta_intercept: (float, >= 0) Modulation applied to the intercept term + of the beta model, m_i in above equations + beta_slope: (float, >= 0) Modulation applied to the slope term of the + beta model, m_s in above equations + unemployment: (float, >= 0) Modulation applied to the weighted sum of + unemployment filter responses, m_u in above equations. + + Example: + # Reset the environment + env.reset() + + # Increase the slope of the beta response by 15% + env.set_parameter_modulations(beta_slope=1.15) + + # Run the environment (this example skips over action selection for brevity) + for t in range(env.episode_length): + env.step(actions[t]) + """ + if beta_intercept is not None: + beta_intercept = float(beta_intercept) + assert beta_intercept >= 0 + self._beta_intercepts_modulation = beta_intercept + + if beta_slope is not None: + beta_slope = float(beta_slope) + assert beta_slope >= 0 + self._beta_slopes_modulation = beta_slope + + if unemployment is not None: + unemployment = float(unemployment) + assert unemployment >= 0 + self._unemployment_modulation = unemployment + + def unemployment_step(self, current_stringency_level): + """ + Computes unemployment given the current stringency level and past levels. + + Unemployment is computed as follows: + 1) For each of self.num_filters, an exponentially decaying filter is + convolved with the history of stringency changes. Responses move forward in + time, so a stringency change at time t-1 impacts the response at time t. + 2) The filter responses at time t (the current timestep) are summed together + using state-specific weights. + 3) The weighted sum is passed through a SOFTPLUS function to capture excess + unemployment due to stringency policy. + 4) The excess unemployment is added to a state-specific baseline unemployment + level to get the total unemployment. + + Note: Internally, unemployment is computed somewhat differently for speed. + In particular, no convolution is used. Instead the "filter response" at + time t is just a temporally discounted sum of past stringency changes, + with the discounting given by the filter decay rate. + """ + + def softplus(x, beta=1, threshold=20): + """ + Numpy implementation of softplus. For reference, see + https://pytorch.org/docs/stable/generated/torch.nn.Softplus.html + """ + return 1 / beta * np.log(1 + np.exp(beta * x)) * ( + beta * x <= threshold + ) + x * (beta * x > threshold) + + if ( + self.world.timestep == 0 + ): # computing unemployment at closure policy "all ones" + delta_stringency_level = np.zeros((self.filter_len, self.num_us_states)) + else: + self.stringency_level_history = np.concatenate( + ( + self.stringency_level_history[1:], + current_stringency_level.reshape(1, -1), + ) + ) + delta_stringency_level = ( + self.stringency_level_history[1:] - self.stringency_level_history[:-1] + ) + + # Rather than modulating the unemployment params, + # modulate the deltas (same effect) + delta_stringency_level = delta_stringency_level * self._unemployment_modulation + + # Expand the [time, state] delta history to have a dimension for filter channel + x_data = delta_stringency_level[None].transpose(2, 0, 1) + + # Apply the state-specific filter weights to each channel + weighted_x_data = x_data * self.repeated_conv_weights + + # Compute the discounted sum of the weighted deltas, with each channel using + # a discounting rate reflecting the time constant of the filter channel. Also + # sum over channels and use a softplus to get excess unemployment. + excess_unemployment = softplus( + np.sum(weighted_x_data * self.unemp_conv_filters, axis=(1, 2)), beta=1 + ) + + # Add excess unemployment to baseline unemployment + unemployment_rate = excess_unemployment + self.unemployment_bias + + # Convert the rate (which is a percent) to raw numbers for output + num_unemployed_t = unemployment_rate * self.us_state_population / 100 + return num_unemployed_t + + # --- Scenario-specific --- + def economy_step( + self, + population, + infected, + deaths, + unemployed, + infection_too_sick_to_work_rate=0.05, + population_between_age_18_65=0.67, + ): + """ + Computes how much production occurs. + + Assumptions: + + - People that cannot work: "infected + aware" and "unemployed" and "deaths". + - No life/death cycles. + + See __init__() for pre-computation of each worker's daily productivity. + """ + + incapacitated = (infection_too_sick_to_work_rate * infected) + deaths + cant_work = (incapacitated * population_between_age_18_65) + unemployed + + num_workers = population * population_between_age_18_65 + + num_people_that_can_work = np.maximum(0, num_workers - cant_work) + + productivity = ( + num_people_that_can_work * self.daily_production_per_worker + ).astype(self.np_float_dtype) + + return productivity + + def sir_step(self, S_tm1, I_tm1, stringency_level_tmk, num_vaccines_available_t): + """ + Simulates SIR infection model in the US. + """ + intercepts = self.beta_intercepts * self._beta_intercepts_modulation + slopes = self.beta_slopes * self._beta_slopes_modulation + beta_i = (intercepts + slopes * stringency_level_tmk).astype( + self.np_float_dtype + ) + + small_number = 1e-10 # used to prevent indeterminate cases + susceptible_fraction_vaccinated = np.minimum( + np.ones((self.num_us_states), dtype=self.np_int_dtype), + num_vaccines_available_t / (S_tm1 + small_number), + ).astype(self.np_float_dtype) + vaccinated_t = np.minimum(num_vaccines_available_t, S_tm1) + + # Record R0 + R0 = beta_i / self.gamma + for agent in self.world.agents: + agent.state["R0"] = R0[agent.idx] + + # S -> I; dS + neighborhood_SI_over_N = (S_tm1 / self.us_state_population) * I_tm1 + dS_t = ( + -beta_i * neighborhood_SI_over_N * (1 - susceptible_fraction_vaccinated) + - vaccinated_t + ).astype(self.np_float_dtype) + + # I -> R; dR + dR_t = (self.gamma * I_tm1 + vaccinated_t).astype(self.np_float_dtype) + + # dI from d(S + I + R) = 0 + # ------------------------ + dI_t = -dS_t - dR_t + + dV_t = vaccinated_t.astype(self.np_float_dtype) + + return dS_t, dI_t, dR_t, dV_t + + def load_model_constants(self, path_to_model_constants): + filename = "model_constants.json" + assert filename in os.listdir(path_to_model_constants), ( + "Unable to locate '{}' in '{}'.\nPlease run the " + "'gather_real_world_data.ipynb' notebook first".format( + filename, path_to_model_constants + ) + ) + with open(os.path.join(path_to_model_constants, filename), "r") as fp: + model_constants_dict = json.load(fp) + fp.close() + + self.date_format = model_constants_dict["DATE_FORMAT"] + self.us_state_idx_to_state_name = model_constants_dict[ + "US_STATE_IDX_TO_STATE_NAME" + ] + self.us_state_population = self.np_int_dtype( + model_constants_dict["US_STATE_POPULATION"] + ) + self.us_population = self.np_int_dtype(model_constants_dict["US_POPULATION"]) + self.num_stringency_levels = model_constants_dict["NUM_STRINGENCY_LEVELS"] + self.death_rate = self.np_float_dtype(model_constants_dict["SIR_MORTALITY"]) + self.gamma = self.np_float_dtype(model_constants_dict["SIR_GAMMA"]) + self.gdp_per_capita = self.np_float_dtype( + model_constants_dict["GDP_PER_CAPITA"] + ) + + def load_fitted_params(self, path_to_fitted_params): + filename = "fitted_params.json" + assert filename in os.listdir(path_to_fitted_params), ( + "Unable to locate '{}' in '{}'.\nIf you ran the " + "'gather_real_world_data.ipynb' notebook to download the latest " + "real-world data, please also run the " + "'fit_parameters.ipynb' notebook.".format(filename, path_to_fitted_params) + ) + with open(os.path.join(path_to_fitted_params, filename), "r") as fp: + fitted_params_dict = json.load(fp) + fp.close() + self.policy_start_date = datetime.strptime( + fitted_params_dict["POLICY_START_DATE"], self.date_format + ) + self.value_of_life = self.np_int_dtype(fitted_params_dict["VALUE_OF_LIFE"]) + self.beta_delay = self.np_int_dtype(fitted_params_dict["BETA_DELAY"]) + self.beta_slopes = np.array( + fitted_params_dict["BETA_SLOPES"], dtype=self.np_float_dtype + ) + self.beta_intercepts = np.array( + fitted_params_dict["BETA_INTERCEPTS"], dtype=self.np_float_dtype + ) + self.min_marginal_agent_health_index = np.array( + fitted_params_dict["MIN_MARGINAL_AGENT_HEALTH_INDEX"], + dtype=self.np_float_dtype, + ) + self.max_marginal_agent_health_index = np.array( + fitted_params_dict["MAX_MARGINAL_AGENT_HEALTH_INDEX"], + dtype=self.np_float_dtype, + ) + self.min_marginal_agent_economic_index = np.array( + fitted_params_dict["MIN_MARGINAL_AGENT_ECONOMIC_INDEX"], + dtype=self.np_float_dtype, + ) + self.max_marginal_agent_economic_index = np.array( + fitted_params_dict["MAX_MARGINAL_AGENT_ECONOMIC_INDEX"], + dtype=self.np_float_dtype, + ) + self.min_marginal_planner_health_index = self.np_float_dtype( + fitted_params_dict["MIN_MARGINAL_PLANNER_HEALTH_INDEX"] + ) + self.max_marginal_planner_health_index = self.np_float_dtype( + fitted_params_dict["MAX_MARGINAL_PLANNER_HEALTH_INDEX"] + ) + self.min_marginal_planner_economic_index = self.np_float_dtype( + fitted_params_dict["MIN_MARGINAL_PLANNER_ECONOMIC_INDEX"] + ) + self.max_marginal_planner_economic_index = self.np_float_dtype( + fitted_params_dict["MAX_MARGINAL_PLANNER_ECONOMIC_INDEX"] + ) + self.inferred_weightage_on_agent_health_index = np.array( + fitted_params_dict["INFERRED_WEIGHTAGE_ON_AGENT_HEALTH_INDEX"], + dtype=self.np_float_dtype, + ) + self.inferred_weightage_on_planner_health_index = self.np_float_dtype( + fitted_params_dict["INFERRED_WEIGHTAGE_ON_PLANNER_HEALTH_INDEX"] + ) + self.filter_len = self.np_int_dtype(fitted_params_dict["FILTER_LEN"]) + self.conv_lambdas = np.array( + fitted_params_dict["CONV_LAMBDAS"], dtype=self.np_float_dtype + ) + self.unemployment_bias = np.array( + fitted_params_dict["UNEMPLOYMENT_BIAS"], dtype=self.np_float_dtype + ) + self.grouped_convolutional_filter_weights = np.array( + fitted_params_dict["GROUPED_CONVOLUTIONAL_FILTER_WEIGHTS"], + dtype=self.np_float_dtype, + ) + + def scenario_metrics(self): + # End of episode metrics + # ---------------------- + metrics_dict = {} + + # State-level metrics + for agent in self.world.agents: + state_name = self.us_state_idx_to_state_name[str(agent.idx)] + + for field in ["infected", "recovered", "deaths"]: + metric_key = "{}/{} (millions)".format(state_name, field) + metrics_dict[metric_key] = ( + agent.state["Total " + field.capitalize()] / 1e6 + ) + + metrics_dict["{}/mean_unemployment_rate (%)".format(state_name)] = ( + np.mean(self.world.global_state["Unemployed"][1:, agent.idx], axis=0) + / self.us_state_population[agent.idx] + * 100 + ) + + metrics_dict[ + "{}/mean_open_close_stringency_level".format(state_name) + ] = np.mean( + self.world.global_state["Stringency Level"][1:, agent.idx], axis=0 + ) + + metrics_dict["{}/total_productivity (billion $)".format(state_name)] = ( + np.sum( + self.world.global_state["Postsubsidy Productivity"][1:, agent.idx] + ) + / 1e9 + ) + + metrics_dict[ + "{}/health_index_at_end_of_episode".format(state_name) + ] = agent.state["Health Index"] + metrics_dict[ + "{}/economic_index_at_end_of_episode".format(state_name) + ] = agent.state["Economic Index"] + + # USA-level metrics + metrics_dict["usa/vaccinated (% of population)"] = ( + np.sum(self.world.global_state["Vaccinated"][self.world.timestep], axis=0) + / self.us_population + * 100 + ) + metrics_dict["usa/deaths (thousands)"] = ( + np.sum(self.world.global_state["Deaths"][self.world.timestep], axis=0) / 1e3 + ) + + metrics_dict["usa/mean_unemployment_rate (%)"] = ( + np.mean( + np.sum(self.world.global_state["Unemployed"][1:], axis=1) + / self.us_population, + axis=0, + ) + * 100 + ) + metrics_dict["usa/total_amount_subsidized (trillion $)"] = ( + np.sum(self.world.global_state["Subsidy"][1:], axis=(0, 1)) / 1e12 + ) + metrics_dict["usa/total_productivity (trillion $)"] = ( + np.sum(self.world.global_state["Postsubsidy Productivity"][1:], axis=(0, 1)) + / 1e12 + ) + + metrics_dict["usa/health_index_at_end_of_episode"] = self.world.planner.state[ + "Health Index" + ] + metrics_dict["usa/economic_index_at_end_of_episode"] = self.world.planner.state[ + "Economic Index" + ] + + return metrics_dict diff --git a/ai_economist/foundation/scenarios/covid19/covid19_env_step.cu b/ai_economist/foundation/scenarios/covid19/covid19_env_step.cu new file mode 100644 index 0000000..cee1260 --- /dev/null +++ b/ai_economist/foundation/scenarios/covid19/covid19_env_step.cu @@ -0,0 +1,620 @@ +// Copyright (c) 2021, salesforce.com, inc. +// All rights reserved. +// SPDX-License-Identifier: BSD-3-Clause +// For full license text, see the LICENSE file in the repo root +// or https://opensource.org/licenses/BSD-3-Clause + +__constant__ float kEpsilon = 1.0e-10; // used to prevent division by 0 + +extern "C" { +// CUDA version of the scenario_step() in +// "ai_economist.foundation.scenarios.covid19_env.py" + + // CUDA version of the sir_step() in + // "ai_economist.foundation.scenarios.covid19_env.py" + __device__ void cuda_sir_step( + float* susceptible, + float* infected, + float* recovered, + float* vaccinated, + float* deaths, + int* num_vaccines_available_t, + const int* kRealWorldStringencyPolicyHistory, + const float kStatePopulation, + const int kNumAgents, + const int kBetaDelay, + const float kBetaSlope, + const float kbetaIntercept, + int* stringency_level, + float* beta, + const float kGamma, + const float kDeathRate, + const int kEnvId, + const int kAgentId, + int timestep, + const int kEpisodeLength, + const int kArrayIdxCurrentTime, + const int kArrayIdxPrevTime, + const int kTimeIndependentArrayIdx + ) { + float susceptible_fraction_vaccinated = min( + 1.0, + num_vaccines_available_t[kTimeIndependentArrayIdx] / + (susceptible[kArrayIdxPrevTime] + kEpsilon)); + float vaccinated_t = min( + static_cast(num_vaccines_available_t[ + kTimeIndependentArrayIdx]), + susceptible[kArrayIdxPrevTime]); + + // (S/N) * I in place of (S*I) / N to prevent overflow + float neighborhood_SI_over_N = susceptible[kArrayIdxPrevTime] / + kStatePopulation * infected[kArrayIdxPrevTime]; + int stringency_level_tmk; + if (timestep < kBetaDelay) { + stringency_level_tmk = kRealWorldStringencyPolicyHistory[ + (timestep - 1) * (kNumAgents - 1) + kAgentId]; + } else { + stringency_level_tmk = stringency_level[kEnvId * ( + kEpisodeLength + 1) * (kNumAgents - 1) + + (timestep - kBetaDelay) * (kNumAgents - 1) + kAgentId]; + } + beta[kTimeIndependentArrayIdx] = stringency_level_tmk * + kBetaSlope + kbetaIntercept; + + float dS_t = -(neighborhood_SI_over_N * beta[ + kTimeIndependentArrayIdx] * + (1 - susceptible_fraction_vaccinated) + vaccinated_t); + float dR_t = kGamma * infected[kArrayIdxPrevTime] + vaccinated_t; + float dI_t = - dS_t - dR_t; + + susceptible[kArrayIdxCurrentTime] = max( + 0.0, + susceptible[kArrayIdxPrevTime] + dS_t); + infected[kArrayIdxCurrentTime] = max( + 0.0, + infected[kArrayIdxPrevTime] + dI_t); + recovered[kArrayIdxCurrentTime] = max( + 0.0, + recovered[kArrayIdxPrevTime] + dR_t); + + vaccinated[kArrayIdxCurrentTime] = vaccinated_t + + vaccinated[kArrayIdxPrevTime]; + float recovered_but_not_vaccinated = recovered[kArrayIdxCurrentTime] - + vaccinated[kArrayIdxCurrentTime]; + deaths[kArrayIdxCurrentTime] = recovered_but_not_vaccinated * + kDeathRate; + } + + // CUDA version of the softplus() in + // "ai_economist.foundation.scenarios.covid19_env.py" + __device__ float softplus(float x) { + const float kBeta = 1.0; + const float kThreshold = 20.0; + if (kBeta * x < kThreshold) { + return 1.0 / kBeta * log(1.0 + exp(kBeta * x)); + } else { + return x; + } + } + + __device__ float signal2unemployment( + const int kEnvId, + const int kAgentId, + float* signal, + const float* kUnemploymentConvolutionalFilters, + const float kUnemploymentBias, + const int kNumAgents, + const int kFilterLen, + const int kNumFilters + ) { + float unemployment = 0.0; + const int kArrayIndexOffset = kEnvId * (kNumAgents - 1) * kNumFilters * + kFilterLen + kAgentId * kNumFilters * kFilterLen; + for (int index = 0; index < (kFilterLen * kNumFilters); index ++) { + unemployment += signal[kArrayIndexOffset + index] * + kUnemploymentConvolutionalFilters[index]; + } + return softplus(unemployment) + kUnemploymentBias; + } + + // CUDA version of the unemployment_step() in + // "ai_economist.foundation.scenarios.covid19_env.py" + __device__ void cuda_unemployment_step( + float* unemployed, + int* stringency_level, + int* delta_stringency_level, + const float* kGroupedConvolutionalFilterWeights, + const float* kUnemploymentConvolutionalFilters, + const float* kUnemploymentBias, + float* convolved_signal, + const int kFilterLen, + const int kNumFilters, + const float kStatePopulation, + const int kNumAgents, + const int kEnvId, + const int kAgentId, + int timestep, + const int kArrayIdxCurrentTime, + const int kArrayIdxPrevTime + ) { + // Shift array by kNumAgents - 1 + for (int idx = 0; idx < kFilterLen - 1; idx ++) { + delta_stringency_level[ + kEnvId * kFilterLen * (kNumAgents - 1) + idx * + (kNumAgents - 1) + kAgentId + ] = + delta_stringency_level[ + kEnvId * kFilterLen * (kNumAgents - 1) + (idx + 1) * + (kNumAgents - 1) + kAgentId + ]; + } + + delta_stringency_level[ + kEnvId * kFilterLen * (kNumAgents - 1) + (kFilterLen - 1) * + (kNumAgents - 1) + kAgentId + ] = stringency_level[kArrayIdxCurrentTime] - + stringency_level[kArrayIdxPrevTime]; + + // convolved_signal refers to the convolution between the filter weights + // and the delta stringency levels + for (int filter_idx = 0; filter_idx < kNumFilters; filter_idx ++) { + for (int idx = 0; idx < kFilterLen; idx ++) { + convolved_signal[ + kEnvId * (kNumAgents - 1) * kNumFilters * kFilterLen + + kAgentId * kNumFilters * kFilterLen + + filter_idx * kFilterLen + + idx + ] = + delta_stringency_level[kEnvId * kFilterLen * (kNumAgents - 1) + + idx * (kNumAgents - 1) + kAgentId] * + kGroupedConvolutionalFilterWeights[kAgentId * kNumFilters + + filter_idx]; + } + } + + float unemployment_rate = signal2unemployment( + kEnvId, + kAgentId, + convolved_signal, + kUnemploymentConvolutionalFilters, + kUnemploymentBias[kAgentId], + kNumAgents, + kFilterLen, + kNumFilters); + + unemployed[kArrayIdxCurrentTime] = + unemployment_rate * kStatePopulation / 100.0; + } + + // CUDA version of the economy_step() in + // "ai_economist.foundation.scenarios.covid19_env.py" + __device__ void cuda_economy_step( + float* infected, + float* deaths, + float* unemployed, + float* incapacitated, + float* cant_work, + float* num_people_that_can_work, + const float kStatePopulation, + const float kInfectionTooSickToWorkRate, + const float kPopulationBetweenAge18And65, + const float kDailyProductionPerWorker, + float* productivity, + float* subsidy, + float* postsubsidy_productivity, + int timestep, + const int kArrayIdxCurrentTime, + int kTimeIndependentArrayIdx + ) { + incapacitated[kTimeIndependentArrayIdx] = + kInfectionTooSickToWorkRate * infected[kArrayIdxCurrentTime] + + deaths[kArrayIdxCurrentTime]; + cant_work[kTimeIndependentArrayIdx] = + incapacitated[kTimeIndependentArrayIdx] * + kPopulationBetweenAge18And65 + unemployed[kArrayIdxCurrentTime]; + int num_workers = static_cast(kStatePopulation) * kPopulationBetweenAge18And65; + num_people_that_can_work[kTimeIndependentArrayIdx] = max( + 0.0, + num_workers - cant_work[kTimeIndependentArrayIdx]); + productivity[kArrayIdxCurrentTime] = + num_people_that_can_work[kTimeIndependentArrayIdx] * + kDailyProductionPerWorker; + + postsubsidy_productivity[kArrayIdxCurrentTime] = + productivity[kArrayIdxCurrentTime] + + subsidy[kArrayIdxCurrentTime]; + } + + // CUDA version of crra_nonlinearity() in + // "ai_economist.foundation.scenarios.covid19_env.py" + __device__ float crra_nonlinearity( + float x, + const float kEta, + const int kNumDaysInAnYear + ) { + float annual_x = kNumDaysInAnYear * x; + float annual_x_clipped = annual_x; + if (annual_x < 0.1) { + annual_x_clipped = 0.1; + } else if (annual_x > 3.0) { + annual_x_clipped = 3.0; + } + float annual_crra = 1 + (pow(annual_x_clipped, (1 - kEta)) - 1) / + (1 - kEta); + float daily_crra = annual_crra / kNumDaysInAnYear; + return daily_crra; + } + + // CUDA version of min_max_normalization() in + // "ai_economist.foundation.scenarios.covid19_env.py" + __device__ float min_max_normalization( + float x, + const float kMinX, + const float kMaxX + ) { + return (x - kMinX) / (kMaxX - kMinX + kEpsilon); + } + + // CUDA version of get_rew() in + // "ai_economist.foundation.scenarios.covid19_env.py" + __device__ float get_rew( + const float kHealthIndexWeightage, + float health_index, + const float kEconomicIndexWeightage, + float economic_index + ) { + return ( + kHealthIndexWeightage * health_index + + kEconomicIndexWeightage * economic_index) / + (kHealthIndexWeightage + kEconomicIndexWeightage); + } + + // CUDA version of scenario_step() in + // "ai_economist.foundation.scenarios.covid19_env.py" + __global__ void CudaCovidAndEconomySimulationStep( + float* susceptible, + float* infected, + float* recovered, + float* deaths, + float* vaccinated, + float* unemployed, + float* subsidy, + float* productivity, + int* stringency_level, + const int kNumStringencyLevels, + float* postsubsidy_productivity, + int* num_vaccines_available_t, + const int* kRealWorldStringencyPolicyHistory, + const int kBetaDelay, + const float* kBetaSlopes, + const float* kbetaIntercepts, + float* beta, + const float kGamma, + const float kDeathRate, + float* incapacitated, + float* cant_work, + float* num_people_that_can_work, + const int* us_kStatePopulation, + const float kInfectionTooSickToWorkRate, + const float kPopulationBetweenAge18And65, + const int kFilterLen, + const int kNumFilters, + int* delta_stringency_level, + const float* kGroupedConvolutionalFilterWeights, + const float* kUnemploymentConvolutionalFilters, + const float* kUnemploymentBias, + float* signal, + const float kDailyProductionPerWorker, + const float* maximum_productivity, + float* obs_a_world_agent_state, + float* obs_a_world_agent_postsubsidy_productivity, + float* obs_a_world_lagged_stringency_level, + float* obs_a_time, + float* obs_p_world_agent_state, + float* obs_p_world_agent_postsubsidy_productivity, + float* obs_p_world_lagged_stringency_level, + float* obs_p_time, + int * env_timestep_arr, + const int kNumAgents, + const int kEpisodeLength + ) { + const int kEnvId = blockIdx.x; + const int kAgentId = threadIdx.x; + + assert(env_timestep_arr[kEnvId] > 0 && + env_timestep_arr[kEnvId] <= kEpisodeLength); + assert (kAgentId <= kNumAgents - 1); + const int kNumFeatures = 6; + + if (kAgentId < (kNumAgents - 1)) { + // Indices for time-dependent and time-independent arrays + // Time dependent arrays have shapes (num_envs, + // kEpisodeLength + 1, kNumAgents - 1) + // Time independent arrays have shapes (num_envs, kNumAgents - 1) + const int kArrayIndexOffset = kEnvId * (kEpisodeLength + 1) * + (kNumAgents - 1); + int kArrayIdxCurrentTime = kArrayIndexOffset + + env_timestep_arr[kEnvId] * (kNumAgents - 1) + kAgentId; + int kArrayIdxPrevTime = kArrayIndexOffset + + (env_timestep_arr[kEnvId] - 1) * (kNumAgents - 1) + kAgentId; + const int kTimeIndependentArrayIdx = kEnvId * + (kNumAgents - 1) + kAgentId; + + const float kStatePopulation = static_cast(us_kStatePopulation[kAgentId]); + + cuda_sir_step( + susceptible, + infected, + recovered, + vaccinated, + deaths, + num_vaccines_available_t, + kRealWorldStringencyPolicyHistory, + kStatePopulation, + kNumAgents, + kBetaDelay, + kBetaSlopes[kAgentId], + kbetaIntercepts[kAgentId], + stringency_level, + beta, + kGamma, + kDeathRate, + kEnvId, + kAgentId, + env_timestep_arr[kEnvId], + kEpisodeLength, + kArrayIdxCurrentTime, + kArrayIdxPrevTime, + kTimeIndependentArrayIdx); + + cuda_unemployment_step( + unemployed, + stringency_level, + delta_stringency_level, + kGroupedConvolutionalFilterWeights, + kUnemploymentConvolutionalFilters, + kUnemploymentBias, + signal, + kFilterLen, + kNumFilters, + kStatePopulation, + kNumAgents, + kEnvId, + kAgentId, + env_timestep_arr[kEnvId], + kArrayIdxCurrentTime, + kArrayIdxPrevTime); + + cuda_economy_step( + infected, + deaths, + unemployed, + incapacitated, + cant_work, + num_people_that_can_work, + kStatePopulation, + kInfectionTooSickToWorkRate, + kPopulationBetweenAge18And65, + kDailyProductionPerWorker, + productivity, + subsidy, + postsubsidy_productivity, + env_timestep_arr[kEnvId], + kArrayIdxCurrentTime, + kTimeIndependentArrayIdx); + + // CUDA version of generate observations + // Agents' observations + int kFeatureArrayIndexOffset = kEnvId * kNumFeatures * + (kNumAgents - 1) + kAgentId; + obs_a_world_agent_state[ + kFeatureArrayIndexOffset + 0 * (kNumAgents - 1) + ] = susceptible[kArrayIdxCurrentTime] / kStatePopulation; + obs_a_world_agent_state[ + kFeatureArrayIndexOffset + 1 * (kNumAgents - 1) + ] = infected[kArrayIdxCurrentTime] / kStatePopulation; + obs_a_world_agent_state[ + kFeatureArrayIndexOffset + 2 * (kNumAgents - 1) + ] = recovered[kArrayIdxCurrentTime] / kStatePopulation; + obs_a_world_agent_state[ + kFeatureArrayIndexOffset + 3 * (kNumAgents - 1) + ] = deaths[kArrayIdxCurrentTime] / kStatePopulation; + obs_a_world_agent_state[ + kFeatureArrayIndexOffset + 4 * (kNumAgents - 1) + ] = vaccinated[kArrayIdxCurrentTime] / kStatePopulation; + obs_a_world_agent_state[ + kFeatureArrayIndexOffset + 5 * (kNumAgents - 1) + ] = unemployed[kArrayIdxCurrentTime] / kStatePopulation; + + for (int feature_id = 0; feature_id < kNumFeatures; feature_id ++) { + const int kIndex = feature_id * (kNumAgents - 1); + obs_p_world_agent_state[kFeatureArrayIndexOffset + + kIndex + ] = obs_a_world_agent_state[kFeatureArrayIndexOffset + + kIndex]; + } + + obs_a_world_agent_postsubsidy_productivity[ + kTimeIndependentArrayIdx + ] = postsubsidy_productivity[kArrayIdxCurrentTime] / + maximum_productivity[kAgentId]; + obs_p_world_agent_postsubsidy_productivity[ + kTimeIndependentArrayIdx + ] = obs_a_world_agent_postsubsidy_productivity[ + kTimeIndependentArrayIdx + ]; + + int t_beta = env_timestep_arr[kEnvId] - kBetaDelay + 1; + if (t_beta < 0) { + obs_a_world_lagged_stringency_level[ + kTimeIndependentArrayIdx + ] = kRealWorldStringencyPolicyHistory[ + env_timestep_arr[kEnvId] * (kNumAgents - 1) + kAgentId + ] / static_cast(kNumStringencyLevels); + } else { + obs_a_world_lagged_stringency_level[ + kTimeIndependentArrayIdx + ] = stringency_level[ + kArrayIndexOffset + + t_beta * (kNumAgents - 1) + + kAgentId + ] / static_cast(kNumStringencyLevels); + } + obs_p_world_lagged_stringency_level[ + kTimeIndependentArrayIdx + ] = obs_a_world_lagged_stringency_level[ + kTimeIndependentArrayIdx]; + // Below, we assume observation scaling = True + // (otherwise, 'obs_a_time[kTimeIndependentArrayIdx] = + // static_cast(env_timestep_arr[kEnvId]) + obs_a_time[kTimeIndependentArrayIdx] = + env_timestep_arr[kEnvId] / static_cast(kEpisodeLength); + } else if (kAgentId == kNumAgents - 1) { + obs_p_time[kEnvId] = env_timestep_arr[kEnvId] / + static_cast(kEpisodeLength); + } + } + + // CUDA version of the compute_reward() in + // "ai_economist.foundation.scenarios.covid19_env.py" + __global__ void CudaComputeReward( + float* rewards_a, + float* rewards_p, + const int kNumDaysInAnYear, + const int kValueOfLife, + const float kRiskFreeInterestRate, + const float kEconomicRewardCrraEta, + const float* kMinMarginalAgentHealthIndex, + const float* kMaxMarginalAgentHealthIndex, + const float* kMinMarginalAgentEconomicIndex, + const float* kMaxMarginalAgentEconomicIndex, + const float kMinMarginalPlannerHealthIndex, + const float kMaxMarginalPlannerHealthIndex, + const float kMinMarginalPlannerEconomicIndex, + const float kMaxMarginalPlannerEconomicIndex, + const float* kWeightageOnMarginalAgentHealthIndex, + const float* kWeightageOnMarginalPlannerHealthIndex, + const float kWeightageOnMarginalAgentEconomicIndex, + const float kWeightageOnMarginalPlannerEconomicIndex, + const float* kAgentsHealthNorm, + const float* kAgentsEconomicNorm, + const float kPlannerHealthNorm, + const float kPlannerEconomicNorm, + float* deaths, + float* subsidy, + float* postsubsidy_productivity, + int* env_done_arr, + int* env_timestep_arr, + const int kNumAgents, + const int kEpisodeLength + ) { + const int kEnvId = blockIdx.x; + const int kAgentId = threadIdx.x; + + assert(env_timestep_arr[kEnvId] > 0 && + env_timestep_arr[kEnvId] <= kEpisodeLength); + assert (kAgentId <= kNumAgents - 1); + + const int kArrayIndexOffset = kEnvId * (kEpisodeLength + 1) * + (kNumAgents - 1); + if (kAgentId < (kNumAgents - 1)) { + // Agents' rewards + // Indices for time-dependent and time-independent arrays + // Time dependent arrays have shapes (num_envs, + // kEpisodeLength + 1, kNumAgents - 1) + // Time independent arrays have shapes (num_envs, kNumAgents - 1) + int kArrayIdxCurrentTime = kArrayIndexOffset + + env_timestep_arr[kEnvId] * (kNumAgents - 1) + kAgentId; + int kArrayIdxPrevTime = kArrayIndexOffset + + (env_timestep_arr[kEnvId] - 1) * (kNumAgents - 1) + kAgentId; + const int kTimeIndependentArrayIdx = kEnvId * + (kNumAgents - 1) + kAgentId; + + float marginal_deaths = deaths[kArrayIdxCurrentTime] - + deaths[kArrayIdxPrevTime]; + + // Note: changing the order of operations to prevent overflow + float marginal_agent_health_index = - marginal_deaths / + (kAgentsHealthNorm[kAgentId] / + static_cast(kValueOfLife)); + + float marginal_agent_economic_index = crra_nonlinearity( + postsubsidy_productivity[kArrayIdxCurrentTime] / + kAgentsEconomicNorm[kAgentId], + kEconomicRewardCrraEta, + kNumDaysInAnYear); + + marginal_agent_health_index = min_max_normalization( + marginal_agent_health_index, + kMinMarginalAgentHealthIndex[kAgentId], + kMaxMarginalAgentHealthIndex[kAgentId]); + marginal_agent_economic_index = min_max_normalization( + marginal_agent_economic_index, + kMinMarginalAgentEconomicIndex[kAgentId], + kMaxMarginalAgentEconomicIndex[kAgentId]); + + rewards_a[kTimeIndependentArrayIdx] = get_rew( + kWeightageOnMarginalAgentHealthIndex[kAgentId], + marginal_agent_health_index, + kWeightageOnMarginalPlannerHealthIndex[kAgentId], + marginal_agent_economic_index); + } else if (kAgentId == kNumAgents - 1) { + // Planner's rewards + float total_marginal_deaths = 0; + for (int ag_id = 0; ag_id < (kNumAgents - 1); ag_id ++) { + total_marginal_deaths += ( + deaths[kArrayIndexOffset + env_timestep_arr[kEnvId] * + (kNumAgents - 1) + ag_id] - + deaths[kArrayIndexOffset + (env_timestep_arr[kEnvId] - 1) * + (kNumAgents - 1) + ag_id]); + } + // Note: changing the order of operations to prevent overflow + float marginal_planner_health_index = -total_marginal_deaths / + (kPlannerHealthNorm / static_cast(kValueOfLife)); + + float total_subsidy = 0.0; + float total_postsubsidy_productivity = 0.0; + for (int ag_id = 0; ag_id < (kNumAgents - 1); ag_id ++) { + total_subsidy += subsidy[kArrayIndexOffset + + env_timestep_arr[kEnvId] * (kNumAgents - 1) + ag_id]; + total_postsubsidy_productivity += + postsubsidy_productivity[kArrayIndexOffset + + env_timestep_arr[kEnvId] * (kNumAgents - 1) + ag_id]; + } + + float cost_of_subsidy = (1 + kRiskFreeInterestRate) * + total_subsidy; + float marginal_planner_economic_index = crra_nonlinearity( + (total_postsubsidy_productivity - cost_of_subsidy) / + kPlannerEconomicNorm, + kEconomicRewardCrraEta, + kNumDaysInAnYear); + + marginal_planner_health_index = min_max_normalization( + marginal_planner_health_index, + kMinMarginalPlannerHealthIndex, + kMaxMarginalPlannerHealthIndex); + marginal_planner_economic_index = min_max_normalization( + marginal_planner_economic_index, + kMinMarginalPlannerEconomicIndex, + kMaxMarginalPlannerEconomicIndex); + + rewards_p[kEnvId] = get_rew( + kWeightageOnMarginalAgentEconomicIndex, + marginal_planner_health_index, + kWeightageOnMarginalPlannerEconomicIndex, + marginal_planner_economic_index); + } + + // Wait here for all agents to finish computing rewards + __syncthreads(); + + // Use only agent 0's thread to set done_arr + if (kAgentId == 0) { + if (env_timestep_arr[kEnvId] == kEpisodeLength) { + env_timestep_arr[kEnvId] = 0; + env_done_arr[kEnvId] = 1; + } + } + } +} diff --git a/ai_economist/foundation/scenarios/covid19/key_to_check_activation_code_against b/ai_economist/foundation/scenarios/covid19/key_to_check_activation_code_against new file mode 100644 index 0000000..2bb74b3 --- /dev/null +++ b/ai_economist/foundation/scenarios/covid19/key_to_check_activation_code_against @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEpgIBAAKCAQEAk1+Qz0/Qg4OOGrskBJnVI9KVGTEUvsldHUV4AzLeecYZSV5+ +FZUQpl8lq1mstUZZ0xMlGSHz2t+AAJxyEro8mAj9gAp1qeN58pAX2k29DOt4YRnp +sTF1UG+nrV2aW+jfH16aeVsjWY+Nq+GxGyE3Q5bsxOhnOg0TUaB6RY8SBE/scTHn +bfNsgTc5EuiAAGqYYYdu12n5zeyvfjGW7bBf4Q9t0F0bI+YdZQY9HD35KAoNcqFQ +dvd2vKbojejkn+WyO1amnZxgAhVjpT61FV4u18jPN0Qrt0LHuF5kUVzYal+73ySY +BbwEo4onEn9xvUlQGFJWmv4OPwbI3d4nLqP+mQIHK9xUXfK97QKCAQABeR2EO0uu +ERyRXa5Mh7xsOEq/OJ9sQq+si8B5gDyyM1SW61wQMKF4Wiqw68bMCVvGRwScZD+T +XwBEBJMm9lCVx/UfOWqYSNFCk/YBefv9AI0Kg5lfCMZQuTdjMcbJdjoR5xoiCbO1 +ya7oOU8mfWx/SV0o/698b/zMVBKBBQDNZaN9pmtTOgm3G1QnM9ZlmrdlKYpe9Ihs +3sG4437QaPhumdZi8IoLBGMyYL2O38pG34LJjIkP8Efj1QVTndIIZX8CKghir++j +nUAyofFt7/PBS2k7gQ/1gFISwHxKjmzl/Fc25o7ahlLbO+i2UnRiB9IXcmiGDXMv +tY09oXhxCtTZAoGBAMEkMTzoiqKjXLwKLyFIF5QzXqQKcGqfC8NhQMsm43K0TgHg +Sv1fLdnKw0FWSG30gppBorAY9p5FoI+AWwTSd+AJhz7T1y/shpJx1oBR8qKWO5kO +gMru9kRRb0zb5hydakie3mujz7GUPiXrntKZjC4QYLar0USPulJnU+UTF6QjAoGB +AMNWJqG1ybrk0sNkWJJDW+MnMT0T9o0E+CtbRHqMHh7K1LF9Sc/qh0gLfDo51+kr +pscLaaJiF1Q8phzDhW9QDeNv+4lknNqMFBCFtzns1wVDlXL4U87oqhuBSs6IZAuO +CGVefYKgefdwn64rcyRNala44BbiMJKwRoDvvgH1FvATAoGAV1YK9ZHB1RkXkZ5a +uBePXvkScaujH4DxadMGf2tBuI1wIpVwhxOQ56yDwYoAuexXPUa8BAx2V69/LFo7 +H/yDYqzndA8WwZLy8oy7Ug+fFLtCp7VhkEwMPciBq6KjzUyShIBlgZOx5m5kTbfu +Cs2JQU35YHeompcpLooRG1/cFZkCgYAyVlWABzmgSKJL9ohwlSBBZFCjQ1mjN6uc +uRJxncqfCe3XQ5erFjuWMfPayWONBsWexNucJFc7Iz2LzCOXkUsftldEEET9f/2w +PrbsEu8khNTLqUcow2Whz+A8C0dV6p2cqtTKR1XlSmNVqP30lmpHcmF+R3M/J1ON +K7S9zJJ+zwKBgHIuCATGCGCOzAsUo80OQL46j74SxRV3H1CJASLKzatiTo54dbO6 +86w+N6BfYtYeRlnX1CTGl6bHqVUMBBlKws8Ig3gV3xFS8BiSav8zQ2m99JuhlVHF +Ocfowmuad3WXYvYXQ5IeP2JM/3q7BoPLg1DKP4GGZlNbatMRI+H0HimV +-----END RSA PRIVATE KEY----- \ No newline at end of file diff --git a/ai_economist/foundation/scenarios/one_step_economy/__init__.py b/ai_economist/foundation/scenarios/one_step_economy/__init__.py new file mode 100644 index 0000000..fc6cee4 --- /dev/null +++ b/ai_economist/foundation/scenarios/one_step_economy/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause diff --git a/ai_economist/foundation/scenarios/one_step_economy/one_step_economy.py b/ai_economist/foundation/scenarios/one_step_economy/one_step_economy.py new file mode 100644 index 0000000..78f1950 --- /dev/null +++ b/ai_economist/foundation/scenarios/one_step_economy/one_step_economy.py @@ -0,0 +1,336 @@ +# Copyright (c) 2021 salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import numpy as np + +from ai_economist.foundation.base.base_env import BaseEnvironment, scenario_registry +from ai_economist.foundation.scenarios.utils import rewards, social_metrics + + +@scenario_registry.add +class OneStepEconomy(BaseEnvironment): + """ + A simple model featuring one "step" of setting taxes and earning income. + + As described in https://arxiv.org/abs/2108.02755: + A simplified version of simple_wood_and_stone scenario where both the planner + and the agents each make a single decision: the planner setting taxes and the + agents choosing labor. Each agent chooses an amount of labor that optimizes + its post-tax utility, and this optimal labor depends on its skill and the tax + rates, and it does not depend on the labor choices of other agents. Before + the agents act, the planner sets the marginal tax rates in order to optimize + social welfare. + + Note: + This scenario is intended to be used with the 'PeriodicBracketTax' and + 'SimpleLabor' components. + It should use an episode length of 2. In the first step, taxes are set by + the planner via 'PeriodicBracketTax'. In the second, agents select how much + to work/earn via 'SimpleLabor'. + + Args: + agent_reward_type (str): The type of utility function used to compute each + agent's reward. Defaults to "coin_minus_labor_cost". + isoelastic_eta (float): The shape parameter of the isoelastic function used + in the "isoelastic_coin_minus_labor" utility function. + labor_exponent (float): The labor exponent parameter used in the + "coin_minus_labor_cost" utility function. + labor_cost (float): The coefficient used to weight the cost of labor. + planner_reward_type (str): The type of social welfare function (SWF) used to + compute the planner's reward. Defaults to "inv_income_weighted_utility". + mixing_weight_gini_vs_coin (float): Must be between 0 and 1 (inclusive). + Controls the weighting of equality and productivity when using SWF + "coin_eq_times_productivity", where a value of 0 (default) yields equal + weighting, and 1 only considers productivity. + """ + + name = "one-step-economy" + agent_subclasses = ["BasicMobileAgent", "BasicPlanner"] + required_entities = ["Coin"] + + def __init__( + self, + *base_env_args, + agent_reward_type="coin_minus_labor_cost", + isoelastic_eta=0.23, + labor_exponent=2.0, + labor_cost=1.0, + planner_reward_type="inv_income_weighted_utility", + mixing_weight_gini_vs_coin=0, + **base_env_kwargs + ): + super().__init__(*base_env_args, **base_env_kwargs) + + self.num_agents = len(self.world.agents) + + self.labor_cost = labor_cost + self.agent_reward_type = agent_reward_type + self.isoelastic_eta = isoelastic_eta + self.labor_exponent = labor_exponent + self.planner_reward_type = planner_reward_type + self.mixing_weight_gini_vs_coin = mixing_weight_gini_vs_coin + self.planner_starting_coin = 0 + + self.curr_optimization_metrics = {str(a.idx): 0 for a in self.all_agents} + + # The following methods must be implemented for each scenario + # ----------------------------------------------------------- + def reset_starting_layout(self): + """ + Part 1/2 of scenario reset. This method handles resetting the state of the + environment managed by the scenario (i.e. resource & landmark layout). + + Here, generate a resource source layout consistent with target parameters. + """ + + def reset_agent_states(self): + """ + Part 2/2 of scenario reset. This method handles resetting the state of the + agents themselves (i.e. inventory, locations, etc.). + + Here, empty inventories, give mobile agents any starting coin, and place them + in random accesible locations to start. + """ + self.world.clear_agent_locs() + + for agent in self.world.agents: + # Clear everything to start with + agent.state["inventory"] = {k: 0 for k in agent.state["inventory"].keys()} + agent.state["escrow"] = {k: 0 for k in agent.state["escrow"].keys()} + agent.state["endogenous"] = {k: 0 for k in agent.state["endogenous"].keys()} + + self.world.planner.inventory["Coin"] = self.planner_starting_coin + + def scenario_step(self): + """ + Update the state of the world according to whatever rules this scenario + implements. + + This gets called in the 'step' method (of base_env) after going through each + component step and before generating observations, rewards, etc. + + NOTE: does not take agent actions into account. + """ + + def generate_observations(self): + """ + Generate observations associated with this scenario. + + A scenario does not need to produce observations and can provide observations + for only some agent types; however, for a given agent type, it should either + always or never yield an observation. If it does yield an observation, + that observation should always have the same structure/sizes! + + Returns: + obs (dict): A dictionary of {agent.idx: agent_obs_dict}. In words, + return a dictionary with an entry for each agent (which can including + the planner) for which this scenario provides an observation. For each + entry, the key specifies the index of the agent and the value contains + its associated observation dictionary. + + Here, non-planner agents receive spatial observations (depending on the env + config) as well as the contents of their inventory and endogenous quantities. + The planner also receives spatial observations (again, depending on the env + config) as well as the inventory of each of the mobile agents. + """ + obs_dict = dict() + for agent in self.world.agents: + obs_dict[str(agent.idx)] = {} + + coin_endowments = np.array( + [agent.total_endowment("Coin") for agent in self.world.agents] + ) + equality = social_metrics.get_equality(coin_endowments) + productivity = social_metrics.get_productivity(coin_endowments) + normalized_per_capita_productivity = productivity / self.num_agents / 1000 + obs_dict[self.world.planner.idx] = { + "normalized_per_capita_productivity": normalized_per_capita_productivity, + "equality": equality, + } + + return obs_dict + + def compute_reward(self): + """ + Apply the reward function(s) associated with this scenario to get the rewards + from this step. + + Returns: + rew (dict): A dictionary of {agent.idx: agent_obs_dict}. In words, + return a dictionary with an entry for each agent in the environment + (including the planner). For each entry, the key specifies the index of + the agent and the value contains the scalar reward earned this timestep. + + Rewards are computed as the marginal utility (agents) or marginal social + welfare (planner) experienced on this timestep. Ignoring discounting, + this means that agents' (planner's) objective is to maximize the utility + (social welfare) associated with the terminal state of the episode. + """ + curr_optimization_metrics = self.get_current_optimization_metrics( + self.world.agents, + isoelastic_eta=float(self.isoelastic_eta), + labor_exponent=float(self.labor_exponent), + labor_coefficient=float(self.labor_cost), + ) + planner_agents_rew = { + k: v - self.curr_optimization_metrics[k] + for k, v in curr_optimization_metrics.items() + } + self.curr_optimization_metrics = curr_optimization_metrics + return planner_agents_rew + + # Optional methods for customization + # ---------------------------------- + def additional_reset_steps(self): + """ + Extra scenario-specific steps that should be performed at the end of the reset + cycle. + + For each reset cycle... + First, reset_starting_layout() and reset_agent_states() will be called. + + Second, .reset() will be called for each registered component. + + Lastly, this method will be called to allow for any final customization of + the reset cycle. + """ + self.curr_optimization_metrics = self.get_current_optimization_metrics( + self.world.agents, + isoelastic_eta=float(self.isoelastic_eta), + labor_exponent=float(self.labor_exponent), + labor_coefficient=float(self.labor_cost), + ) + + def scenario_metrics(self): + """ + Allows the scenario to generate metrics (collected along with component metrics + in the 'metrics' property). + + To have the scenario add metrics, this function needs to return a dictionary of + {metric_key: value} where 'value' is a scalar (no nesting or lists!) + + Here, summarize social metrics, endowments, utilities, and labor cost annealing. + """ + metrics = dict() + + # Log social/economic indicators + coin_endowments = np.array( + [agent.total_endowment("Coin") for agent in self.world.agents] + ) + pretax_incomes = np.array( + [agent.state["production"] for agent in self.world.agents] + ) + metrics["social/productivity"] = social_metrics.get_productivity( + coin_endowments + ) + metrics["social/equality"] = social_metrics.get_equality(coin_endowments) + + utilities = np.array( + [self.curr_optimization_metrics[agent.idx] for agent in self.world.agents] + ) + metrics[ + "social_welfare/coin_eq_times_productivity" + ] = rewards.coin_eq_times_productivity( + coin_endowments=coin_endowments, equality_weight=1.0 + ) + metrics[ + "social_welfare/inv_income_weighted_utility" + ] = rewards.inv_income_weighted_utility( + coin_endowments=pretax_incomes, utilities=utilities # coin_endowments, + ) + + # Log average endowments, endogenous, and utility for agents + agent_endows = {} + agent_endogenous = {} + agent_utilities = [] + for agent in self.world.agents: + for resource in agent.inventory.keys(): + if resource not in agent_endows: + agent_endows[resource] = [] + agent_endows[resource].append( + agent.inventory[resource] + agent.escrow[resource] + ) + + for endogenous, quantity in agent.endogenous.items(): + if endogenous not in agent_endogenous: + agent_endogenous[endogenous] = [] + agent_endogenous[endogenous].append(quantity) + + agent_utilities.append(self.curr_optimization_metrics[agent.idx]) + + for resource, quantities in agent_endows.items(): + metrics["endow/avg_agent/{}".format(resource)] = np.mean(quantities) + + for endogenous, quantities in agent_endogenous.items(): + metrics["endogenous/avg_agent/{}".format(endogenous)] = np.mean(quantities) + + metrics["util/avg_agent"] = np.mean(agent_utilities) + + # Log endowments and utility for the planner + for resource, quantity in self.world.planner.inventory.items(): + metrics["endow/p/{}".format(resource)] = quantity + + metrics["util/p"] = self.curr_optimization_metrics[self.world.planner.idx] + + return metrics + + def get_current_optimization_metrics( + self, agents, isoelastic_eta=0.23, labor_exponent=2.0, labor_coefficient=0.1 + ): + """ + Compute optimization metrics based on the current state. Used to compute reward. + + Returns: + curr_optimization_metric (dict): A dictionary of {agent.idx: metric} + with an entry for each agent (including the planner) in the env. + """ + curr_optimization_metric = {} + + coin_endowments = np.array([agent.total_endowment("Coin") for agent in agents]) + + pretax_incomes = np.array([agent.state["production"] for agent in agents]) + + # Optimization metric for agents: + for agent in agents: + if self.agent_reward_type == "isoelastic_coin_minus_labor": + assert 0.0 <= isoelastic_eta <= 1.0 + curr_optimization_metric[ + agent.idx + ] = rewards.isoelastic_coin_minus_labor( + coin_endowment=agent.total_endowment("Coin"), + total_labor=agent.state["endogenous"]["Labor"], + isoelastic_eta=isoelastic_eta, + labor_coefficient=labor_coefficient, + ) + elif self.agent_reward_type == "coin_minus_labor_cost": + assert labor_exponent > 1.0 + curr_optimization_metric[agent.idx] = rewards.coin_minus_labor_cost( + coin_endowment=agent.total_endowment("Coin"), + total_labor=agent.state["endogenous"]["Labor"], + labor_exponent=labor_exponent, + labor_coefficient=labor_coefficient, + ) + # Optimization metric for the planner: + if self.planner_reward_type == "coin_eq_times_productivity": + curr_optimization_metric[ + self.world.planner.idx + ] = rewards.coin_eq_times_productivity( + coin_endowments=coin_endowments, + equality_weight=1 - self.mixing_weight_gini_vs_coin, + ) + elif self.planner_reward_type == "inv_income_weighted_utility": + curr_optimization_metric[ + self.world.planner.idx + ] = rewards.inv_income_weighted_utility( + coin_endowments=pretax_incomes, # coin_endowments, + utilities=np.array( + [curr_optimization_metric[agent.idx] for agent in agents] + ), + ) + else: + print("No valid planner reward selected!") + raise NotImplementedError + return curr_optimization_metric diff --git a/ai_economist/foundation/scenarios/simple_wood_and_stone/__init__.py b/ai_economist/foundation/scenarios/simple_wood_and_stone/__init__.py new file mode 100644 index 0000000..fc6cee4 --- /dev/null +++ b/ai_economist/foundation/scenarios/simple_wood_and_stone/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause diff --git a/ai_economist/foundation/scenarios/simple_wood_and_stone/dynamic_layout.py b/ai_economist/foundation/scenarios/simple_wood_and_stone/dynamic_layout.py new file mode 100644 index 0000000..e6b8edc --- /dev/null +++ b/ai_economist/foundation/scenarios/simple_wood_and_stone/dynamic_layout.py @@ -0,0 +1,1021 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +from copy import deepcopy + +import numpy as np +from scipy import signal + +from ai_economist.foundation.base.base_env import BaseEnvironment, scenario_registry +from ai_economist.foundation.scenarios.utils import rewards, social_metrics + + +@scenario_registry.add +class Uniform(BaseEnvironment): + """ + World containing spatially-segregated stone and wood with stochastic regeneration. + + For controlling how resource regeneration behavior... + Coverage: if fraction, target fraction of total tiles; if integer, target number + of tiles + Regen Halfwidth: width of regen kernel = 1 + (2 * halfwidth); set >0 to create + a spatial social dilemma + Regen Weight: regen probability per tile counted by the regen kernel + Max Health: how many resource units can populate a source block + Clumpiness: degree to which resources are spatially clustered + Gradient Steepness: degree to which stone/wood are restricted to the top/bottom + of the map + + Args: + planner_gets_spatial_obs (bool): Whether the planner agent receives spatial + observations from the world. + full_observability (bool): Whether the mobile agents' spatial observation + includes the full world view or is instead an egocentric view. + mobile_agent_observation_range (int): If not using full_observability, + the spatial range (on each side of the agent) that is visible in the + spatial observations. + starting_wood_coverage (int, float): Target coverage of wood at t=0. + wood_regen_halfwidth (int): Regen halfwidth for wood. + wood_regen_weight (float): Regen weight for wood. + wood_max_health (int): Max wood units per wood source tile. + wood_clumpiness (float): Degree of wood clumping. + starting_stone_coverage (int, float): Target coverage of stone at t=0. + stone_regen_halfwidth (int): Regen halfwidth for stone. + stone_regen_weight (float): Regen weight for stone. + stone_max_health (int): Max stone units per stone source tile. + stone_clumpiness (float): Degree of stone clumping. + gradient_steepness (int, float): How steeply source tile probability falls + off from the top/bottom of the map. + checker_source_blocks (bool): Whether to space source tiles in a "checker" + formation. + starting_agent_coin (int, float): Amount of coin agents have at t=0. Defaults + to zero coin. + isoelastic_eta (float): Parameter controlling the shape of agent utility + wrt coin endowment. + energy_cost (float): Coefficient for converting labor to negative utility. + energy_warmup_constant (float): Decay constant that controls the rate at which + the effective energy cost is annealed from 0 to energy_cost. Set to 0 + (default) to disable annealing, meaning that the effective energy cost is + always energy_cost. The units of the decay constant depend on the choice of + energy_warmup_method. + energy_warmup_method (str): How to schedule energy annealing (warmup). If + "decay" (default), use the number of completed episodes. If "auto", + use the number of timesteps where the average agent reward was positive. + planner_reward_type (str): The type of reward used for the planner. Options + are "coin_eq_times_productivity" (default), + "inv_income_weighted_coin_endowment", and "inv_income_weighted_utility". + mixing_weight_gini_vs_coin (float): Degree to which equality is ignored w/ + "coin_eq_times_productivity". Default is 0, which weights equality and + productivity equally. If set to 1, only productivity is rewarded. + + """ + + name = "uniform/simple_wood_and_stone" + agent_subclasses = ["BasicMobileAgent", "BasicPlanner"] + required_entities = ["Wood", "Stone"] + + def __init__( + self, + *base_env_args, + planner_gets_spatial_info=True, + full_observability=False, + mobile_agent_observation_range=5, + starting_wood_coverage=0.025, + wood_regen_halfwidth=0, + wood_regen_weight=0.01, + wood_max_health=1, + starting_stone_coverage=0.025, + stone_regen_halfwidth=0, + stone_regen_weight=0.01, + stone_max_health=1, + wood_clumpiness=0.35, + stone_clumpiness=0.5, + gradient_steepness=8, + checker_source_blocks=False, + starting_agent_coin=0, + isoelastic_eta=0.23, + energy_cost=0.21, + energy_warmup_constant=0, + energy_warmup_method="decay", + planner_reward_type="coin_eq_times_productivity", + mixing_weight_gini_vs_coin=0.0, + **base_env_kwargs + ): + super().__init__(*base_env_args, **base_env_kwargs) + + # Whether agents receive spatial information in their observation tensor + self._planner_gets_spatial_info = bool(planner_gets_spatial_info) + + # Whether the (non-planner) agents can see the whole world map + self._full_observability = bool(full_observability) + + self._mobile_agent_observation_range = int(mobile_agent_observation_range) + + # For controlling how resource regeneration behavior + # - Coverage: if fraction, target fraction of total tiles; + # if integer, target number of tiles + # - Regen Halfwidth: width of regen kernel = 1 + (2 * halfwidth) + # - Regen Weight: regen probability per tile counted by the regen kernel + # - Max Health: how many resource units can populate a source block + # - Clumpiness: degree to which resources are spatially clustered + # - Gradient Steepness: degree to which stone/wood + # are restricted to top/bottom of map + self.layout_specs = dict(Wood={}, Stone={}) + # + if starting_wood_coverage >= 1: + starting_wood_coverage /= np.prod(self.world_size) + if starting_stone_coverage >= 1: + starting_stone_coverage /= np.prod(self.world_size) + assert (starting_stone_coverage + starting_wood_coverage) < 0.5 + # + self._checker_source_blocks = bool(checker_source_blocks) + c, r = np.meshgrid( + np.arange(self.world_size[1]) % 2, np.arange(self.world_size[0]) % 2 + ) + self._checker_mask = (r + c) == 1 + m = 2 if self._checker_source_blocks else 1 + # + self.layout_specs["Wood"]["starting_coverage"] = ( + float(starting_wood_coverage) * m + ) + self.layout_specs["Stone"]["starting_coverage"] = ( + float(starting_stone_coverage) * m + ) + assert 0 < self.layout_specs["Wood"]["starting_coverage"] < 1 + assert 0 < self.layout_specs["Stone"]["starting_coverage"] < 1 + # + self.layout_specs["Wood"]["regen_halfwidth"] = int(wood_regen_halfwidth) + self.layout_specs["Stone"]["regen_halfwidth"] = int(stone_regen_halfwidth) + assert 0 <= self.layout_specs["Wood"]["regen_halfwidth"] <= 3 + assert 0 <= self.layout_specs["Stone"]["regen_halfwidth"] <= 3 + # + self.layout_specs["Wood"]["regen_weight"] = float(wood_regen_weight) + self.layout_specs["Stone"]["regen_weight"] = float(stone_regen_weight) + assert 0 <= self.layout_specs["Wood"]["regen_weight"] <= 1 + assert 0 <= self.layout_specs["Stone"]["regen_weight"] <= 1 + # + self.layout_specs["Wood"]["max_health"] = int(wood_max_health) + self.layout_specs["Stone"]["max_health"] = int(stone_max_health) + assert self.layout_specs["Wood"]["max_health"] > 0 + assert self.layout_specs["Stone"]["max_health"] > 0 + # + self.clumpiness = { + "Wood": float(wood_clumpiness), + "Stone": float(stone_clumpiness), + } + assert all(0 <= v <= 1 for v in self.clumpiness.values()) + # + self.gradient_steepness = float(gradient_steepness) + assert self.gradient_steepness >= 1.0 + # + self.source_prob_maps = self.make_source_prob_maps() + self.source_maps = { + k: np.zeros_like(v) for k, v in self.source_prob_maps.items() + } + + # How much coin do agents begin with at upon reset + self.starting_agent_coin = float(starting_agent_coin) + assert self.starting_agent_coin >= 0.0 + + # Controls the diminishing marginal utility of coin. + # isoelastic_eta=0 means no diminishing utility. + self.isoelastic_eta = float(isoelastic_eta) + assert 0.0 <= self.isoelastic_eta <= 1.0 + + # The amount that labor is weighted in utility computation + # (once annealing is finished) + self.energy_cost = float(energy_cost) + assert self.energy_cost >= 0 + + # What value to use for calculating the progress of energy annealing + # If method = 'decay': #completed episodes + # If method = 'auto' : #timesteps where avg. agent reward > 0 + self.energy_warmup_method = energy_warmup_method.lower() + assert self.energy_warmup_method in ["decay", "auto"] + # Decay constant for annealing to full energy cost + # (if energy_warmup_constant == 0, there is no annealing) + self.energy_warmup_constant = float(energy_warmup_constant) + assert self.energy_warmup_constant >= 0 + self._auto_warmup_integrator = 0 + + # Which social welfare function to use + self.planner_reward_type = str(planner_reward_type).lower() + + # How much to weight equality if using SWF=eq*prod: + # 0 -> SWF=eq*prod + # 1 -> SWF=prod + self.mixing_weight_gini_vs_coin = float(mixing_weight_gini_vs_coin) + assert 0 <= self.mixing_weight_gini_vs_coin <= 1.0 + + # Use this to calculate marginal changes and deliver that as reward + self.init_optimization_metric = {agent.idx: 0 for agent in self.all_agents} + self.prev_optimization_metric = {agent.idx: 0 for agent in self.all_agents} + self.curr_optimization_metric = {agent.idx: 0 for agent in self.all_agents} + + @property + def energy_weight(self): + """ + Energy annealing progress. Multiply with self.energy_cost to get the + effective energy coefficient. + """ + if self.energy_warmup_constant <= 0.0: + return 1.0 + + if self.energy_warmup_method == "decay": + return float(1.0 - np.exp(-self._completions / self.energy_warmup_constant)) + + if self.energy_warmup_method == "auto": + return float( + 1.0 + - np.exp(-self._auto_warmup_integrator / self.energy_warmup_constant) + ) + + raise NotImplementedError + + def get_current_optimization_metrics(self): + """ + Compute optimization metrics based on the current state. Used to compute reward. + + Returns: + curr_optimization_metric (dict): A dictionary of {agent.idx: metric} + with an entry for each agent (including the planner) in the env. + """ + curr_optimization_metric = {} + # (for agents) + for agent in self.world.agents: + curr_optimization_metric[agent.idx] = rewards.isoelastic_coin_minus_labor( + coin_endowment=agent.total_endowment("Coin"), + total_labor=agent.state["endogenous"]["Labor"], + isoelastic_eta=self.isoelastic_eta, + labor_coefficient=self.energy_weight * self.energy_cost, + ) + # (for the planner) + if self.planner_reward_type == "coin_eq_times_productivity": + curr_optimization_metric[ + self.world.planner.idx + ] = rewards.coin_eq_times_productivity( + coin_endowments=np.array( + [agent.total_endowment("Coin") for agent in self.world.agents] + ), + equality_weight=1 - self.mixing_weight_gini_vs_coin, + ) + elif self.planner_reward_type == "inv_income_weighted_coin_endowments": + curr_optimization_metric[ + self.world.planner.idx + ] = rewards.inv_income_weighted_coin_endowments( + coin_endowments=np.array( + [agent.total_endowment("Coin") for agent in self.world.agents] + ) + ) + elif self.planner_reward_type == "inv_income_weighted_utility": + curr_optimization_metric[ + self.world.planner.idx + ] = rewards.inv_income_weighted_utility( + coin_endowments=np.array( + [agent.total_endowment("Coin") for agent in self.world.agents] + ), + utilities=np.array( + [curr_optimization_metric[agent.idx] for agent in self.world.agents] + ), + ) + else: + print("No valid planner reward selected!") + raise NotImplementedError + return curr_optimization_metric + + def make_source_prob_maps(self): + """ + Make maps specifying how likely each location is to be assigned as a resource + source tile. + + Returns: + source_prob_maps (dict): Contains a source probability map for both + stone and wood + """ + prob_gradient = ( + np.arange(self.world_size[0])[:, None].repeat(self.world_size[1], axis=1) + ** self.gradient_steepness + ) + prob_gradient = prob_gradient / np.mean(prob_gradient) + + return { + "Wood": prob_gradient * self.layout_specs["Wood"]["starting_coverage"], + "Stone": prob_gradient[-1::-1] + * self.layout_specs["Wood"]["starting_coverage"], + } + + # The following methods must be implemented for each scenario + # ----------------------------------------------------------- + + def reset_starting_layout(self): + """ + Part 1/2 of scenario reset. This method handles resetting the state of the + environment managed by the scenario (i.e. resource & landmark layout). + + Here, generate a resource source layout consistent with target parameters. + """ + happy_coverage = False + n_reset_tries = 0 + + # Attempt to do a reset until an attempt limit is reached or coverage is good + while n_reset_tries < 100 and not happy_coverage: + self.world.maps.clear() + + self.source_maps = { + k: np.zeros_like(v) for k, v in self.source_prob_maps.items() + } + + resources = ["Wood", "Stone"] + + for resource in resources: + clump = 1 - np.clip(self.clumpiness[resource], 0.0, 0.99) + + source_prob = self.source_prob_maps[resource] * 0.1 * clump + + empty = self.world.maps.empty + + tmp = np.random.rand(*source_prob.shape) + maybe_source_map = (tmp < source_prob) * empty + + n_tries = 0 + while np.mean(maybe_source_map) < ( + self.layout_specs[resource]["starting_coverage"] * clump + ): + tmp *= 0.9 + maybe_source_map = (tmp < source_prob) * empty + n_tries += 1 + if n_tries > 200: + break + + while ( + np.mean(maybe_source_map) + < self.layout_specs[resource]["starting_coverage"] + ): + kernel = np.random.randn(7, 7) > 0 + tmp = signal.convolve2d( + maybe_source_map + + (0.2 * np.random.randn(*maybe_source_map.shape)) + - 0.25, + kernel.astype(np.float32), + "same", + ) + maybe_source_map = np.maximum(tmp > 0, maybe_source_map) * empty + + self.source_maps[resource] = maybe_source_map + self.world.maps.set( + resource, maybe_source_map + ) # * self.layout_specs[resource]['max_health']) + self.world.maps.set(resource + "SourceBlock", maybe_source_map) + + # Restart if the resource distribution is too far off the target coverage + happy_coverage = True + for resource in resources: + coverage_quotient = ( + np.mean(self.source_maps[resource]) + / self.layout_specs[resource]["starting_coverage"] + ) + bound = 0.4 + if not (1 / (1 + bound)) <= coverage_quotient <= (1 + bound): + happy_coverage = False + + n_reset_tries += 1 + + # Apply checkering, if applicable + if self._checker_source_blocks: + for resource, source_map in self.source_maps.items(): + source_map = source_map * self._checker_mask + self.source_maps[resource] = source_map + self.world.maps.set(resource, source_map) + self.world.maps.set(resource + "SourceBlock", source_map) + + def reset_agent_states(self): + """ + Part 2/2 of scenario reset. This method handles resetting the state of the + agents themselves (i.e. inventory, locations, etc.). + + Here, empty inventories, give mobile agents any starting coin, and place them + in random accessible locations to start. + """ + self.world.clear_agent_locs() + + for agent in self.world.agents: + # Clear everything to start with + agent.state["inventory"] = {k: 0 for k in agent.inventory.keys()} + agent.state["escrow"] = {k: 0 for k in agent.inventory.keys()} + agent.state["endogenous"] = {k: 0 for k in agent.endogenous.keys()} + # Add starting coin + agent.state["inventory"]["Coin"] = float(self.starting_agent_coin) + + # Clear everything for the planner + self.world.planner.state["inventory"] = { + k: 0 for k in self.world.planner.inventory.keys() + } + self.world.planner.state["escrow"] = { + k: 0 for k in self.world.planner.escrow.keys() + } + + # Place the agents randomly in the world + for agent in self.world.get_random_order_agents(): + r = np.random.randint(0, self.world_size[0]) + c = np.random.randint(0, self.world_size[1]) + n_tries = 0 + while not self.world.can_agent_occupy(r, c, agent): + r = np.random.randint(0, self.world_size[0]) + c = np.random.randint(0, self.world_size[1]) + n_tries += 1 + if n_tries > 200: + raise TimeoutError + self.world.set_agent_loc(agent, r, c) + + def scenario_step(self): + """ + Update the state of the world according to whatever rules this scenario + implements. + + This gets called in the 'step' method (of base_env) after going through each + component step and before generating observations, rewards, etc. + + In this class of scenarios, the scenario step handles stochastic resource + regeneration. + """ + + resources = ["Wood", "Stone"] + + for resource in resources: + d = 1 + (2 * self.layout_specs[resource]["regen_halfwidth"]) + kernel = ( + self.layout_specs[resource]["regen_weight"] * np.ones((d, d)) / (d ** 2) + ) + + resource_map = self.world.maps.get(resource) + resource_source_blocks = self.world.maps.get(resource + "SourceBlock") + spawnable = ( + self.world.maps.empty + resource_map + resource_source_blocks + ) > 0 + spawnable *= resource_source_blocks > 0 + + health = np.maximum(resource_map, resource_source_blocks) + respawn = np.random.rand(*health.shape) < signal.convolve2d( + health, kernel, "same" + ) + respawn *= spawnable + + self.world.maps.set( + resource, + np.minimum( + resource_map + respawn, self.layout_specs[resource]["max_health"] + ), + ) + + def generate_observations(self): + """ + Generate observations associated with this scenario. + + A scenario does not need to produce observations and can provide observations + for only some agent types; however, for a given agent type, it should either + always or never yield an observation. If it does yield an observation, + that observation should always have the same structure/sizes! + + Returns: + obs (dict): A dictionary of {agent.idx: agent_obs_dict}. In words, + return a dictionary with an entry for each agent (which can including + the planner) for which this scenario provides an observation. For each + entry, the key specifies the index of the agent and the value contains + its associated observation dictionary. + + Here, non-planner agents receive spatial observations (depending on the env + config) as well as the contents of their inventory and endogenous quantities. + The planner also receives spatial observations (again, depending on the env + config) as well as the inventory of each of the mobile agents. + """ + obs = {} + curr_map = self.world.maps.state + + owner_map = self.world.maps.owner_state + loc_map = self.world.loc_map + agent_idx_maps = np.concatenate([owner_map, loc_map[None, :, :]], axis=0) + agent_idx_maps += 2 + agent_idx_maps[agent_idx_maps == 1] = 0 + + agent_locs = { + str(agent.idx): { + "loc-row": agent.loc[0] / self.world_size[0], + "loc-col": agent.loc[1] / self.world_size[1], + } + for agent in self.world.agents + } + agent_invs = { + str(agent.idx): { + "inventory-" + k: v * self.inv_scale for k, v in agent.inventory.items() + } + for agent in self.world.agents + } + + obs[self.world.planner.idx] = { + "inventory-" + k: v * self.inv_scale + for k, v in self.world.planner.inventory.items() + } + if self._planner_gets_spatial_info: + obs[self.world.planner.idx].update( + dict(map=curr_map, idx_map=agent_idx_maps) + ) + + # Mobile agents see the full map. Convey location info via one-hot map channels. + if self._full_observability: + for agent in self.world.agents: + my_map = np.array(agent_idx_maps) + my_map[my_map == int(agent.idx) + 2] = 1 + sidx = str(agent.idx) + obs[sidx] = {"map": curr_map, "idx_map": my_map} + obs[sidx].update(agent_invs[sidx]) + + # Mobile agents only see within a window around their position + else: + w = ( + self._mobile_agent_observation_range + ) # View halfwidth (only applicable without full observability) + + padded_map = np.pad( + curr_map, + [(0, 1), (w, w), (w, w)], + mode="constant", + constant_values=[(0, 1), (0, 0), (0, 0)], + ) + + padded_idx = np.pad( + agent_idx_maps, + [(0, 0), (w, w), (w, w)], + mode="constant", + constant_values=[(0, 0), (0, 0), (0, 0)], + ) + + for agent in self.world.agents: + r, c = [c + w for c in agent.loc] + visible_map = padded_map[ + :, (r - w) : (r + w + 1), (c - w) : (c + w + 1) + ] + visible_idx = np.array( + padded_idx[:, (r - w) : (r + w + 1), (c - w) : (c + w + 1)] + ) + + visible_idx[visible_idx == int(agent.idx) + 2] = 1 + + sidx = str(agent.idx) + + obs[sidx] = {"map": visible_map, "idx_map": visible_idx} + obs[sidx].update(agent_locs[sidx]) + obs[sidx].update(agent_invs[sidx]) + + # Agent-wise planner info (gets crunched into the planner obs in the + # base scenario code) + obs["p" + sidx] = agent_invs[sidx] + if self._planner_gets_spatial_info: + obs["p" + sidx].update(agent_locs[sidx]) + + return obs + + def compute_reward(self): + """ + Apply the reward function(s) associated with this scenario to get the rewards + from this step. + + Returns: + rew (dict): A dictionary of {agent.idx: agent_obs_dict}. In words, + return a dictionary with an entry for each agent in the environment + (including the planner). For each entry, the key specifies the index of + the agent and the value contains the scalar reward earned this timestep. + + Rewards are computed as the marginal utility (agents) or marginal social + welfare (planner) experienced on this timestep. Ignoring discounting, + this means that agents' (planner's) objective is to maximize the utility + (social welfare) associated with the terminal state of the episode. + """ + + # "curr_optimization_metric" hasn't been updated yet, so it gives us the + # utility from the last step. + utility_at_end_of_last_time_step = deepcopy(self.curr_optimization_metric) + + # compute current objectives and store the values + self.curr_optimization_metric = self.get_current_optimization_metrics() + + # reward = curr - prev objectives + rew = { + k: float(v - utility_at_end_of_last_time_step[k]) + for k, v in self.curr_optimization_metric.items() + } + + # store the previous objective values + self.prev_optimization_metric.update(utility_at_end_of_last_time_step) + + # Automatic Energy Cost Annealing + # ------------------------------- + avg_agent_rew = np.mean([rew[a.idx] for a in self.world.agents]) + # Count the number of timesteps where the avg agent reward was > 0 + if avg_agent_rew > 0: + self._auto_warmup_integrator += 1 + + return rew + + # Optional methods for customization + # ---------------------------------- + + def additional_reset_steps(self): + """ + Extra scenario-specific steps that should be performed at the end of the reset + cycle. + + For each reset cycle... + First, reset_starting_layout() and reset_agent_states() will be called. + + Second, .reset() will be called for each registered component. + + Lastly, this method will be called to allow for any final customization of + the reset cycle. + + For this scenario, this method resets optimization metric trackers. + """ + # compute current objectives + curr_optimization_metric = self.get_current_optimization_metrics() + + self.curr_optimization_metric = deepcopy(curr_optimization_metric) + self.init_optimization_metric = deepcopy(curr_optimization_metric) + self.prev_optimization_metric = deepcopy(curr_optimization_metric) + + def scenario_metrics(self): + """ + Allows the scenario to generate metrics (collected along with component metrics + in the 'metrics' property). + + To have the scenario add metrics, this function needs to return a dictionary of + {metric_key: value} where 'value' is a scalar (no nesting or lists!) + + Here, summarize social metrics, endowments, utilities, and labor cost annealing + """ + metrics = dict() + + coin_endowments = np.array( + [agent.total_endowment("Coin") for agent in self.world.agents] + ) + metrics["social/productivity"] = social_metrics.get_productivity( + coin_endowments + ) + metrics["social/equality"] = social_metrics.get_equality(coin_endowments) + + utilities = np.array( + [self.curr_optimization_metric[agent.idx] for agent in self.world.agents] + ) + metrics[ + "social_welfare/coin_eq_times_productivity" + ] = rewards.coin_eq_times_productivity( + coin_endowments=coin_endowments, equality_weight=1.0 + ) + metrics[ + "social_welfare/inv_income_weighted_coin_endow" + ] = rewards.inv_income_weighted_coin_endowments(coin_endowments=coin_endowments) + metrics[ + "social_welfare/inv_income_weighted_utility" + ] = rewards.inv_income_weighted_utility( + coin_endowments=coin_endowments, utilities=utilities + ) + + for agent in self.all_agents: + for resource, quantity in agent.inventory.items(): + metrics[ + "endow/{}/{}".format(agent.idx, resource) + ] = agent.total_endowment(resource) + + if agent.endogenous is not None: + for resource, quantity in agent.endogenous.items(): + metrics["endogenous/{}/{}".format(agent.idx, resource)] = quantity + + metrics["util/{}".format(agent.idx)] = self.curr_optimization_metric[ + agent.idx + ] + + # Labor weight + metrics["labor/weighted_cost"] = self.energy_cost * self.energy_weight + metrics["labor/warmup_integrator"] = int(self._auto_warmup_integrator) + + return metrics + + +@scenario_registry.add +class MultiZone(Uniform): + """ + World containing stone and wood clustered in "zones" with stochastic regeneration. + + For controlling how resource regeneration behavior... + Regen Halfwidth: width of regen kernel = 1 + (2 * halfwidth); set >0 to create + spatial social dilemma + Regen Weight: regen probability per tile counted by the regen kernel + Max Health: how many resource units can populate a source block + + Args: + num_partitions_row (int): Number of height-wise partitions (controls #zones). + num_partitions_col (int): Number of width-wise partitions (controls #zones). + num_wood_zones (int): Number of zones where wood will appear. + num_stone_zones (int): Number of zones where stone will appear. + num_wood_and_stone_zones (int): Number of zones where both wood and stone will + appear. + planner_gets_spatial_obs (bool): Whether the planner agent receives spatial + observations from the world. + full_observability (bool): Whether the mobile agents' spatial observation + includes the full world view or is instead an egocentric view. + mobile_agent_observation_range (int): If not using full_observability, + the spatial range (on each side of the agent) that is visible in the + spatial observations. + wood_regen_halfwidth (int): Regen halfwidth for wood. + wood_regen_weight (float): Regen weight for wood. + wood_max_health (int): Max wood units per wood source tile. + stone_regen_halfwidth (int): Regen halfwidth for stone. + stone_regen_weight (float): Regen weight for stone. + stone_max_health (int): Max stone units per stone source tile. + starting_agent_coin (int, float): Amount of coin agents have at t=0. + isoelastic_eta (float): Parameter controlling the shape of agent utility + wrt coin endowment. + energy_cost (float): Coefficient for converting labor to negative utility. + energy_warmup_constant (float): Decay constant that controls the rate at which + the effective energy cost is annealed from 0 to energy_cost. Set to 0 + (default) to disable annealing, meaning that the effective energy cost is + always energy_cost. The units of the decay constant depend on the choice of + energy_warmup_method. + energy_warmup_method (str): How to schedule energy annealing (warmup). If + "decay" (default), use the number of completed episodes. If "auto", + use the number of timesteps where the average agent reward was positive. + planner_reward_type (str): The type of reward used for the planner. Options + are "coin_eq_times_productivity" (default), + "inv_income_weighted_coin_endowment", and "inv_income_weighted_utility". + mixing_weight_gini_vs_coin (float): Degree to which equality is ignored w/ + "coin_eq_times_productivity". Default is 0, which weights equality and + productivity equally. If set to 1, only productivity is rewarded. + """ + + name = "multi_zone/simple_wood_and_stone" + + def __init__( + self, + *args, + num_partitions_row=8, + num_partitions_col=8, + num_wood_zones=6, + num_stone_zones=6, + num_wood_and_stone_zones=4, + **kwargs + ): + self.num_partitions_row = num_partitions_row + self.num_partitions_col = num_partitions_col + self.zone_specs = { + "Wood": (0, num_wood_zones), + "Stone": (1, num_stone_zones), + "WoodStone": (2, num_wood_and_stone_zones), + } + + super().__init__(*args, **kwargs) + + def make_source_prob_maps(self): + """ + Make maps specifying how likely each location is to be assigned as a resource + source tile. + + Returns: + source_prob_maps (dict): Contains a source probability map for both + stone and wood. + """ + # determines initial world probability masses + zone_names = list(self.zone_specs.keys()) + zone_indices = [v[0] for _, v in self.zone_specs.items()] + num_zones_per_type = [self.zone_specs[k][1] for k in zone_names] + num_zones = sum(num_zones_per_type) + + num_partitions_row = self.num_partitions_row + num_partitions_col = self.num_partitions_col + num_regions = num_partitions_row * num_partitions_col + + assert num_regions >= num_zones + + # define regions and starting resource-zone-type + # (wood, stone, mixed) for each region + + # note: row, col index over regions + partition_size_row = int(np.ceil(self.world_size[0] / num_partitions_row)) + partition_size_col = int(np.ceil(self.world_size[1] / num_partitions_col)) + + # serialize regions in the world and give them an index. + # -1 means no source blocks in that region + grid_zone_indices = np.concatenate( + [ + np.repeat(zone_indices, num_zones_per_type), + np.array([-1] * (num_regions - num_zones)), + ] + ) + np.random.shuffle(grid_zone_indices) + grid_zone_indices = grid_zone_indices.reshape( + (num_partitions_row, num_partitions_col) + ) + + wood_prob = np.where( + np.logical_or( + np.equal(grid_zone_indices, self.zone_specs["Wood"][0]), + np.equal(grid_zone_indices, self.zone_specs["WoodStone"][0]), + ), + np.ones_like(grid_zone_indices), + np.zeros_like(grid_zone_indices), + ) + wood_prob = np.kron( + wood_prob, np.ones((partition_size_row, partition_size_col)) + ) + wood_prob = wood_prob[: self.world_size[0], : self.world_size[1]] + wood_prob = wood_prob / np.mean(wood_prob) + + try: + assert wood_prob.shape[0] == self.world_size[0] + assert wood_prob.shape[1] == self.world_size[1] + except AssertionError: + print("World not correct size!") + raise + + stone_prob = np.where( + np.logical_or( + np.equal(grid_zone_indices, self.zone_specs["Stone"][0]), + np.equal(grid_zone_indices, self.zone_specs["WoodStone"][0]), + ), + np.ones_like(grid_zone_indices), + np.zeros_like(grid_zone_indices), + ) + stone_prob = np.kron( + stone_prob, np.ones((partition_size_row, partition_size_col)) + ) + stone_prob = stone_prob[: self.world_size[0], : self.world_size[1]] + stone_prob = stone_prob / np.mean(stone_prob) + + try: + assert stone_prob.shape[0] == self.world_size[0] + assert stone_prob.shape[1] == self.world_size[1] + except AssertionError: + print("World not correct size!") + raise + + return { + "Wood": wood_prob * self.layout_specs["Wood"]["starting_coverage"], + "Stone": stone_prob * self.layout_specs["Wood"]["starting_coverage"], + } + + def reset_starting_layout(self): + """ + Reset the starting layout of the world. Modifies parent scenario method such + that, before doing the reset, it first remakes the source prob maps. + """ + self.source_prob_maps = self.make_source_prob_maps() + super().reset_starting_layout() + + +@scenario_registry.add +class Quadrant(Uniform): + """ + World containing wood and stone, with water creating 4 nearly closed-off quadrants. + Wood is concentrated along the left of the map, stone along the top, creating + different resource concentrations within each of the quadrants. + + For controlling how resource regeneration behavior... + Coverage: if fraction, target fraction of total tiles; if integer, target number + of tiles + Regen Halfwidth: width of regen kernel = 1 + (2 * halfwidth); set >0 to create + a spatial social dilemma + Regen Weight: regen probability per tile counted by the regen kernel + Max Health: how many resource units can populate a source block + Clumpiness: degree to which resources are spatially clustered + Gradient Steepness: degree to which stone/wood are restricted to the top/bottom + of the map + + Args: + planner_gets_spatial_obs (bool): Whether the planner agent receives spatial + observations from the world. + full_observability (bool): Whether the mobile agents' spatial observation + includes the full world view or is instead an egocentric view. + mobile_agent_observation_range (int): If not using full_observability, + the spatial range (on each side of the agent) that is visible in the + spatial observations. + starting_wood_coverage (int, float): Target coverage of wood at t=0. + wood_regen_halfwidth (int): Regen halfwidth for wood. + wood_regen_weight (float): Regen weight for wood. + wood_max_health (int): Max wood units per wood source tile. + wood_clumpiness (float): Degree of wood clumping. + starting_stone_coverage (int, float): Target coverage of stone at t=0. + stone_regen_halfwidth (int): Regen halfwidth for stone. + stone_regen_weight (float): Regen weight for stone. + stone_max_health (int): Max stone units per stone source tile. + stone_clumpiness (float): Degree of stone clumping. + gradient_steepness (int, float): How steeply source tile probability falls + off from the top/bottom of the map. + checker_source_blocks (bool): Whether to space source tiles in a "checker" + formation. + starting_agent_coin (int, float): Amount of coin agents have at t=0. + isoelastic_eta (float): Parameter controlling the shape of agent utility + wrt coin endowment. + energy_cost (float): Coefficient for converting labor to negative utility. + energy_warmup_constant (float): Decay constant that controls the rate at which + the effective energy cost is annealed from 0 to energy_cost. Set to 0 + (default) to disable annealing, meaning that the effective energy cost is + always energy_cost. The units of the decay constant depend on the choice of + energy_warmup_method. + energy_warmup_method (str): How to schedule energy annealing (warmup). If + "decay" (default), use the number of completed episodes. If "auto", + use the number of timesteps where the average agent reward was positive. + planner_reward_type (str): The type of reward used for the planner. Options + are "coin_eq_times_productivity" (default), + "inv_income_weighted_coin_endowment", and "inv_income_weighted_utility". + mixing_weight_gini_vs_coin (float): Degree to which equality is ignored w/ + "coin_eq_times_productivity". Default is 0, which weights equality and + productivity equally. If set to 1, only productivity is rewarded. + + """ + + name = "quadrant/simple_wood_and_stone" + required_entities = Uniform.required_entities + ["Water"] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + width = self.world_size[1] + height = self.world_size[0] + + o0 = 0.2 + o1 = 0.35 + rN = (0.5 + np.arange(height)) / height + cN = (0.5 + np.arange(width)) / width + rSeg = ((rN < o0) + (rN > o1)) * ((rN < 1 - o1) + (rN > 1 - o0)) + cSeg = ((cN < o0) + (cN > o1)) * ((cN < 1 - o1) + (cN > 1 - o0)) + water = np.zeros((height, width)) + water[:, height // 2] = rSeg + water[width // 2, :] = cSeg + self._water = water + + for k, v in self.source_prob_maps.items(): + v = v * (1 - self._water) + v = v / np.sum(v) + self.source_prob_maps[k] = v + + def make_source_prob_maps(self): + """ + Make maps specifying how likely each location is to be assigned as a resource + source tile. + + Returns: + source_prob_maps (dict): Contains a source probability map for both + stone and wood + """ + width = self.world_size[1] + height = self.world_size[0] + + prob_gradient = np.arange(height)[:, None].repeat(width, axis=1) ** ( + self.gradient_steepness / 2 + ) + w_prob_gradient = prob_gradient[::-1] + + prob_gradient = np.arange(width)[None].repeat(height, axis=0) ** ( + self.gradient_steepness / 2 + ) + s_prob_gradient = prob_gradient[:, ::-1] + + prob_sum = s_prob_gradient + w_prob_gradient + + s_prob_gradient = prob_sum * s_prob_gradient + w_prob_gradient = prob_sum * w_prob_gradient + + s_prob_gradient = s_prob_gradient / np.sum(s_prob_gradient) + w_prob_gradient = w_prob_gradient / np.sum(w_prob_gradient) + + return {"Stone": s_prob_gradient, "Wood": w_prob_gradient} + + def reset_starting_layout(self): + """ + Reset the starting layout of the world. Modifies parent scenario method to + add water and remove resources from water locations. + """ + + # Generate the starting layout like normal + super().reset_starting_layout() + + width = self.world_size[1] + height = self.world_size[0] + + # Remove anything at the water line + for entity, state in self.world.maps.items(): + if isinstance(state, dict): + state["health"][:, height // 2] = 0 + state["health"][width // 2, :] = 0 + state["owner"][:, height // 2] = -1 + state["owner"][width // 2, :] = -1 + else: + state[:, height // 2] = 0 + state[width // 2, :] = 0 + self.world.maps.set(entity, state) + for entity, state in self.source_maps.items(): + state[:, height // 2] = 0 + state[width // 2, :] = 0 + self.source_maps[entity] = state + + # Place water + self.world.maps.set("Water", self._water) diff --git a/ai_economist/foundation/scenarios/simple_wood_and_stone/layout_from_file.py b/ai_economist/foundation/scenarios/simple_wood_and_stone/layout_from_file.py new file mode 100644 index 0000000..882f96f --- /dev/null +++ b/ai_economist/foundation/scenarios/simple_wood_and_stone/layout_from_file.py @@ -0,0 +1,800 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +from copy import deepcopy +from pathlib import Path + +import numpy as np +from scipy import signal + +from ai_economist.foundation.base.base_env import BaseEnvironment, scenario_registry +from ai_economist.foundation.scenarios.utils import rewards, social_metrics + + +@scenario_registry.add +class LayoutFromFile(BaseEnvironment): + """ + World containing stone and wood with stochastic regeneration. Refers to a fixed + layout file (see ./map_txt/ for examples) to determine the spatial arrangement of + stone, wood, and water tiles. + + Args: + planner_gets_spatial_obs (bool): Whether the planner agent receives spatial + observations from the world. + full_observability (bool): Whether the mobile agents' spatial observation + includes the full world view or is instead an egocentric view. + mobile_agent_observation_range (int): If not using full_observability, + the spatial range (on each side of the agent) that is visible in the + spatial observations. + env_layout_file (str): Name of the layout file in ./map_txt/ to use. + Note: The world dimensions of that layout must match the world dimensions + argument used to construct the environment. + resource_regen_prob (float): Probability that an empty source tile will + regenerate a new resource unit. + fixed_four_skill_and_loc (bool): Whether to use a fixed set of build skills and + starting locations, with agents grouped into starting locations based on + which skill quartile they are in. False, by default. + True, for experiments in https://arxiv.org/abs/2004.13332. + Note: Requires that the environment uses the "Build" component with + skill_dist="pareto". + starting_agent_coin (int, float): Amount of coin agents have at t=0. Defaults + to zero coin. + isoelastic_eta (float): Parameter controlling the shape of agent utility + wrt coin endowment. + energy_cost (float): Coefficient for converting labor to negative utility. + energy_warmup_constant (float): Decay constant that controls the rate at which + the effective energy cost is annealed from 0 to energy_cost. Set to 0 + (default) to disable annealing, meaning that the effective energy cost is + always energy_cost. The units of the decay constant depend on the choice of + energy_warmup_method. + energy_warmup_method (str): How to schedule energy annealing (warmup). If + "decay" (default), use the number of completed episodes. If "auto", + use the number of timesteps where the average agent reward was positive. + planner_reward_type (str): The type of reward used for the planner. Options + are "coin_eq_times_productivity" (default), + "inv_income_weighted_coin_endowment", and "inv_income_weighted_utility". + mixing_weight_gini_vs_coin (float): Degree to which equality is ignored w/ + "coin_eq_times_productivity". Default is 0, which weights equality and + productivity equally. If set to 1, only productivity is rewarded. + """ + + name = "layout_from_file/simple_wood_and_stone" + agent_subclasses = ["BasicMobileAgent", "BasicPlanner"] + required_entities = ["Wood", "Stone", "Water"] + + def __init__( + self, + *base_env_args, + planner_gets_spatial_info=True, + full_observability=False, + mobile_agent_observation_range=5, + env_layout_file="quadrant_25x25_20each_30clump.txt", + resource_regen_prob=0.01, + fixed_four_skill_and_loc=False, + starting_agent_coin=0, + isoelastic_eta=0.23, + energy_cost=0.21, + energy_warmup_constant=0, + energy_warmup_method="decay", + planner_reward_type="coin_eq_times_productivity", + mixing_weight_gini_vs_coin=0.0, + **base_env_kwargs, + ): + super().__init__(*base_env_args, **base_env_kwargs) + + # Whether agents receive spatial information in their observation tensor + self._planner_gets_spatial_info = bool(planner_gets_spatial_info) + + # Whether the (non-planner) agents can see the whole world map + self._full_observability = bool(full_observability) + + self._mobile_agent_observation_range = int(mobile_agent_observation_range) + + # Load in the layout + path_to_layout_file = Path(f"{Path(__file__).parent}/map_txt/{env_layout_file}") + + with open(path_to_layout_file, "r") as f: + self.env_layout_string = f.read() + self.env_layout = self.env_layout_string.split(";") + + # Convert the layout to landmark maps + landmark_lookup = {"W": "Wood", "S": "Stone", "@": "Water"} + self._source_maps = { + r: np.zeros(self.world_size) for r in landmark_lookup.values() + } + for r, symbol_row in enumerate(self.env_layout): + for c, symbol in enumerate(symbol_row): + landmark = landmark_lookup.get(symbol, None) + if landmark: + self._source_maps[landmark][r, c] = 1 + + # For controlling how resource regeneration behavior + self.layout_specs = dict( + Wood={ + "regen_weight": float(resource_regen_prob), + "regen_halfwidth": 0, + "max_health": 1, + }, + Stone={ + "regen_weight": float(resource_regen_prob), + "regen_halfwidth": 0, + "max_health": 1, + }, + ) + assert 0 <= self.layout_specs["Wood"]["regen_weight"] <= 1 + assert 0 <= self.layout_specs["Stone"]["regen_weight"] <= 1 + + # How much coin do agents begin with at upon reset + self.starting_agent_coin = float(starting_agent_coin) + assert self.starting_agent_coin >= 0.0 + + # Controls the diminishing marginal utility of coin. + # isoelastic_eta=0 means no diminishing utility. + self.isoelastic_eta = float(isoelastic_eta) + assert 0.0 <= self.isoelastic_eta <= 1.0 + + # The amount that labor is weighted in utility computation + # (once annealing is finished) + self.energy_cost = float(energy_cost) + assert self.energy_cost >= 0 + + # Which method to use for calculating the progress of energy annealing + # If method = 'decay': #completed episodes + # If method = 'auto' : #timesteps where avg. agent reward > 0 + self.energy_warmup_method = energy_warmup_method.lower() + assert self.energy_warmup_method in ["decay", "auto"] + # Decay constant for annealing to full energy cost + # (if energy_warmup_constant == 0, there is no annealing) + self.energy_warmup_constant = float(energy_warmup_constant) + assert self.energy_warmup_constant >= 0 + self._auto_warmup_integrator = 0 + + # Which social welfare function to use + self.planner_reward_type = str(planner_reward_type).lower() + + # How much to weight equality if using SWF=eq*prod: + # 0 -> SWF=eq * prod + # 1 -> SWF=prod + self.mixing_weight_gini_vs_coin = float(mixing_weight_gini_vs_coin) + assert 0 <= self.mixing_weight_gini_vs_coin <= 1.0 + + # Use this to calculate marginal changes and deliver that as reward + self.init_optimization_metric = {agent.idx: 0 for agent in self.all_agents} + self.prev_optimization_metric = {agent.idx: 0 for agent in self.all_agents} + self.curr_optimization_metric = {agent.idx: 0 for agent in self.all_agents} + + """ + Fixed Four Skill and Loc + ------------------------ + """ + self.agent_starting_pos = {agent.idx: [] for agent in self.world.agents} + + self.fixed_four_skill_and_loc = bool(fixed_four_skill_and_loc) + if self.fixed_four_skill_and_loc: + bm = self.get_component("Build") + assert bm.skill_dist == "pareto" + pmsm = bm.payment_max_skill_multiplier + + # Temporarily switch to a fixed seed for controlling randomness + seed_state = np.random.get_state() + np.random.seed(seed=1) + + # Generate a batch (100000) of num_agents (sorted/clipped) Pareto samples. + pareto_samples = np.random.pareto(4, size=(100000, self.n_agents)) + clipped_skills = np.minimum(pmsm, (pmsm - 1) * pareto_samples + 1) + sorted_clipped_skills = np.sort(clipped_skills, axis=1) + # The skill level of the i-th skill-ranked agent is the average of the + # i-th ranked samples throughout the batch. + average_ranked_skills = sorted_clipped_skills.mean(axis=0) + self._avg_ranked_skill = average_ranked_skills * bm.payment + + np.random.set_state(seed_state) + + # Fill in the starting location associated with each skill rank + starting_ranked_locs = [ + # Worst group of agents goes in top right + (0, self.world_size[1] - 1), + # Second-worst group of agents goes in bottom left + (self.world_size[0] - 1, 0), + # Second-best group of agents goes in top left + (0, 0), + # Best group of agents goes in bottom right + (self.world_size[1] - 1, self.world_size[1] - 1), + ] + self._ranked_locs = [] + + # Based on skill, assign each agent to one of the location groups + skill_groups = np.floor( + np.arange(self.n_agents) * (4 / self.n_agents), + ).astype(np.int) + n_in_group = np.zeros(4, dtype=np.int) + for g in skill_groups: + # The position within the group is given by the number of agents + # counted in the group thus far. + g_pos = n_in_group[g] + + # Top right + if g == 0: + r = starting_ranked_locs[g][0] + (g_pos // 4) + c = starting_ranked_locs[g][1] - (g_pos % 4) + self._ranked_locs.append((r, c)) + + # Bottom left + elif g == 1: + r = starting_ranked_locs[g][0] - (g_pos // 4) + c = starting_ranked_locs[g][1] + (g_pos % 4) + self._ranked_locs.append((r, c)) + + # Top left + elif g == 2: + r = starting_ranked_locs[g][0] + (g_pos // 4) + c = starting_ranked_locs[g][1] + (g_pos % 4) + self._ranked_locs.append((r, c)) + + # Bottom right + elif g == 3: + r = starting_ranked_locs[g][0] - (g_pos // 4) + c = starting_ranked_locs[g][1] - (g_pos % 4) + self._ranked_locs.append((r, c)) + + else: + raise ValueError + + # Count the agent we just placed. + n_in_group[g] = n_in_group[g] + 1 + + @property + def energy_weight(self): + """ + Energy annealing progress. Multiply with self.energy_cost to get the + effective energy coefficient. + """ + if self.energy_warmup_constant <= 0.0: + return 1.0 + + if self.energy_warmup_method == "decay": + return float(1.0 - np.exp(-self._completions / self.energy_warmup_constant)) + + if self.energy_warmup_method == "auto": + return float( + 1.0 + - np.exp(-self._auto_warmup_integrator / self.energy_warmup_constant) + ) + + raise NotImplementedError + + def get_current_optimization_metrics(self): + """ + Compute optimization metrics based on the current state. Used to compute reward. + + Returns: + curr_optimization_metric (dict): A dictionary of {agent.idx: metric} + with an entry for each agent (including the planner) in the env. + """ + curr_optimization_metric = {} + # (for agents) + for agent in self.world.agents: + curr_optimization_metric[agent.idx] = rewards.isoelastic_coin_minus_labor( + coin_endowment=agent.total_endowment("Coin"), + total_labor=agent.state["endogenous"]["Labor"], + isoelastic_eta=self.isoelastic_eta, + labor_coefficient=self.energy_weight * self.energy_cost, + ) + # (for the planner) + if self.planner_reward_type == "coin_eq_times_productivity": + curr_optimization_metric[ + self.world.planner.idx + ] = rewards.coin_eq_times_productivity( + coin_endowments=np.array( + [agent.total_endowment("Coin") for agent in self.world.agents] + ), + equality_weight=1 - self.mixing_weight_gini_vs_coin, + ) + elif self.planner_reward_type == "inv_income_weighted_coin_endowments": + curr_optimization_metric[ + self.world.planner.idx + ] = rewards.inv_income_weighted_coin_endowments( + coin_endowments=np.array( + [agent.total_endowment("Coin") for agent in self.world.agents] + ) + ) + elif self.planner_reward_type == "inv_income_weighted_utility": + curr_optimization_metric[ + self.world.planner.idx + ] = rewards.inv_income_weighted_utility( + coin_endowments=np.array( + [agent.total_endowment("Coin") for agent in self.world.agents] + ), + utilities=np.array( + [curr_optimization_metric[agent.idx] for agent in self.world.agents] + ), + ) + else: + print("No valid planner reward selected!") + raise NotImplementedError + return curr_optimization_metric + + # The following methods must be implemented for each scenario + # ----------------------------------------------------------- + + def reset_starting_layout(self): + """ + Part 1/2 of scenario reset. This method handles resetting the state of the + environment managed by the scenario (i.e. resource & landmark layout). + + Here, reset to the layout in the fixed layout file + """ + self.world.maps.clear() + for landmark, landmark_map in self._source_maps.items(): + self.world.maps.set(landmark, landmark_map) + if landmark in ["Stone", "Wood"]: + self.world.maps.set(landmark + "SourceBlock", landmark_map) + + def reset_agent_states(self): + """ + Part 2/2 of scenario reset. This method handles resetting the state of the + agents themselves (i.e. inventory, locations, etc.). + + Here, empty inventories and place mobile agents in random, accessible + locations to start. Note: If using fixed_four_skill_and_loc, the starting + locations will be overridden in self.additional_reset_steps. + """ + self.world.clear_agent_locs() + for agent in self.world.agents: + agent.state["inventory"] = {k: 0 for k in agent.inventory.keys()} + agent.state["escrow"] = {k: 0 for k in agent.inventory.keys()} + agent.state["endogenous"] = {k: 0 for k in agent.endogenous.keys()} + # Add starting coin + agent.state["inventory"]["Coin"] = float(self.starting_agent_coin) + + self.world.planner.state["inventory"] = { + k: 0 for k in self.world.planner.inventory.keys() + } + self.world.planner.state["escrow"] = { + k: 0 for k in self.world.planner.escrow.keys() + } + + for agent in self.world.agents: + r = np.random.randint(0, self.world_size[0]) + c = np.random.randint(0, self.world_size[1]) + n_tries = 0 + while not self.world.can_agent_occupy(r, c, agent): + r = np.random.randint(0, self.world_size[0]) + c = np.random.randint(0, self.world_size[1]) + n_tries += 1 + if n_tries > 200: + raise TimeoutError + r, c = self.world.set_agent_loc(agent, r, c) + + def scenario_step(self): + """ + Update the state of the world according to whatever rules this scenario + implements. + + This gets called in the 'step' method (of base_env) after going through each + component step and before generating observations, rewards, etc. + + In this class of scenarios, the scenario step handles stochastic resource + regeneration. + """ + + resources = ["Wood", "Stone"] + + for resource in resources: + d = 1 + (2 * self.layout_specs[resource]["regen_halfwidth"]) + kernel = ( + self.layout_specs[resource]["regen_weight"] * np.ones((d, d)) / (d ** 2) + ) + + resource_map = self.world.maps.get(resource) + resource_source_blocks = self.world.maps.get(resource + "SourceBlock") + spawnable = ( + self.world.maps.empty + resource_map + resource_source_blocks + ) > 0 + spawnable *= resource_source_blocks > 0 + + health = np.maximum(resource_map, resource_source_blocks) + respawn = np.random.rand(*health.shape) < signal.convolve2d( + health, kernel, "same" + ) + respawn *= spawnable + + self.world.maps.set( + resource, + np.minimum( + resource_map + respawn, self.layout_specs[resource]["max_health"] + ), + ) + + def generate_observations(self): + """ + Generate observations associated with this scenario. + + A scenario does not need to produce observations and can provide observations + for only some agent types; however, for a given agent type, it should either + always or never yield an observation. If it does yield an observation, + that observation should always have the same structure/sizes! + + Returns: + obs (dict): A dictionary of {agent.idx: agent_obs_dict}. In words, + return a dictionary with an entry for each agent (which can including + the planner) for which this scenario provides an observation. For each + entry, the key specifies the index of the agent and the value contains + its associated observation dictionary. + + Here, non-planner agents receive spatial observations (depending on the env + config) as well as the contents of their inventory and endogenous quantities. + The planner also receives spatial observations (again, depending on the env + config) as well as the inventory of each of the mobile agents. + """ + obs = {} + curr_map = self.world.maps.state + + owner_map = self.world.maps.owner_state + loc_map = self.world.loc_map + agent_idx_maps = np.concatenate([owner_map, loc_map[None, :, :]], axis=0) + agent_idx_maps += 2 + agent_idx_maps[agent_idx_maps == 1] = 0 + + agent_locs = { + str(agent.idx): { + "loc-row": agent.loc[0] / self.world_size[0], + "loc-col": agent.loc[1] / self.world_size[1], + } + for agent in self.world.agents + } + agent_invs = { + str(agent.idx): { + "inventory-" + k: v * self.inv_scale for k, v in agent.inventory.items() + } + for agent in self.world.agents + } + + obs[self.world.planner.idx] = { + "inventory-" + k: v * self.inv_scale + for k, v in self.world.planner.inventory.items() + } + if self._planner_gets_spatial_info: + obs[self.world.planner.idx].update( + dict(map=curr_map, idx_map=agent_idx_maps) + ) + + # Mobile agents see the full map. Convey location info via one-hot map channels. + if self._full_observability: + for agent in self.world.agents: + my_map = np.array(agent_idx_maps) + my_map[my_map == int(agent.idx) + 2] = 1 + sidx = str(agent.idx) + obs[sidx] = {"map": curr_map, "idx_map": my_map} + obs[sidx].update(agent_invs[sidx]) + + # Mobile agents only see within a window around their position + else: + w = ( + self._mobile_agent_observation_range + ) # View halfwidth (only applicable without full observability) + + padded_map = np.pad( + curr_map, + [(0, 1), (w, w), (w, w)], + mode="constant", + constant_values=[(0, 1), (0, 0), (0, 0)], + ) + + padded_idx = np.pad( + agent_idx_maps, + [(0, 0), (w, w), (w, w)], + mode="constant", + constant_values=[(0, 0), (0, 0), (0, 0)], + ) + + for agent in self.world.agents: + r, c = [c + w for c in agent.loc] + visible_map = padded_map[ + :, (r - w) : (r + w + 1), (c - w) : (c + w + 1) + ] + visible_idx = np.array( + padded_idx[:, (r - w) : (r + w + 1), (c - w) : (c + w + 1)] + ) + + visible_idx[visible_idx == int(agent.idx) + 2] = 1 + + sidx = str(agent.idx) + + obs[sidx] = {"map": visible_map, "idx_map": visible_idx} + obs[sidx].update(agent_locs[sidx]) + obs[sidx].update(agent_invs[sidx]) + + # Agent-wise planner info (gets crunched into the planner obs in the + # base scenario code) + obs["p" + sidx] = agent_invs[sidx] + if self._planner_gets_spatial_info: + obs["p" + sidx].update(agent_locs[sidx]) + + return obs + + def compute_reward(self): + """ + Apply the reward function(s) associated with this scenario to get the rewards + from this step. + + Returns: + rew (dict): A dictionary of {agent.idx: agent_obs_dict}. In words, + return a dictionary with an entry for each agent in the environment + (including the planner). For each entry, the key specifies the index of + the agent and the value contains the scalar reward earned this timestep. + + Rewards are computed as the marginal utility (agents) or marginal social + welfare (planner) experienced on this timestep. Ignoring discounting, + this means that agents' (planner's) objective is to maximize the utility + (social welfare) associated with the terminal state of the episode. + """ + + # "curr_optimization_metric" hasn't been updated yet, so it gives us the + # utility from the last step. + utility_at_end_of_last_time_step = deepcopy(self.curr_optimization_metric) + + # compute current objectives and store the values + self.curr_optimization_metric = self.get_current_optimization_metrics() + + # reward = curr - prev objectives + rew = { + k: float(v - utility_at_end_of_last_time_step[k]) + for k, v in self.curr_optimization_metric.items() + } + + # store the previous objective values + self.prev_optimization_metric.update(utility_at_end_of_last_time_step) + + # Automatic Energy Cost Annealing + # ------------------------------- + avg_agent_rew = np.mean([rew[a.idx] for a in self.world.agents]) + # Count the number of timesteps where the avg agent reward was > 0 + if avg_agent_rew > 0: + self._auto_warmup_integrator += 1 + + return rew + + # Optional methods for customization + # ---------------------------------- + + def additional_reset_steps(self): + """ + Extra scenario-specific steps that should be performed at the end of the reset + cycle. + + For each reset cycle... + First, reset_starting_layout() and reset_agent_states() will be called. + + Second, .reset() will be called for each registered component. + + Lastly, this method will be called to allow for any final customization of + the reset cycle. + + For this scenario, this method resets optimization metric trackers. If using + fixed_four_skill_and_loc, this is where each agent gets assigned to one of + the four fixed skill/loc combinations. The agent-->skill/loc assignment is + permuted so that all four skill/loc combinations are used. + """ + if self.fixed_four_skill_and_loc: + self.world.clear_agent_locs() + for i, agent in enumerate(self.world.get_random_order_agents()): + self.world.set_agent_loc(agent, *self._ranked_locs[i]) + agent.state["build_payment"] = self._avg_ranked_skill[i] + + # compute current objectives + curr_optimization_metric = self.get_current_optimization_metrics() + + self.curr_optimization_metric = deepcopy(curr_optimization_metric) + self.init_optimization_metric = deepcopy(curr_optimization_metric) + self.prev_optimization_metric = deepcopy(curr_optimization_metric) + + def scenario_metrics(self): + """ + Allows the scenario to generate metrics (collected along with component metrics + in the 'metrics' property). + + To have the scenario add metrics, this function needs to return a dictionary of + {metric_key: value} where 'value' is a scalar (no nesting or lists!) + + Here, summarize social metrics, endowments, utilities, and labor cost annealing. + """ + metrics = dict() + + coin_endowments = np.array( + [agent.total_endowment("Coin") for agent in self.world.agents] + ) + metrics["social/productivity"] = social_metrics.get_productivity( + coin_endowments + ) + metrics["social/equality"] = social_metrics.get_equality(coin_endowments) + + utilities = np.array( + [self.curr_optimization_metric[agent.idx] for agent in self.world.agents] + ) + metrics[ + "social_welfare/coin_eq_times_productivity" + ] = rewards.coin_eq_times_productivity( + coin_endowments=coin_endowments, equality_weight=1.0 + ) + metrics[ + "social_welfare/inv_income_weighted_coin_endow" + ] = rewards.inv_income_weighted_coin_endowments(coin_endowments=coin_endowments) + metrics[ + "social_welfare/inv_income_weighted_utility" + ] = rewards.inv_income_weighted_utility( + coin_endowments=coin_endowments, utilities=utilities + ) + + for agent in self.all_agents: + for resource, quantity in agent.inventory.items(): + metrics[ + "endow/{}/{}".format(agent.idx, resource) + ] = agent.total_endowment(resource) + + if agent.endogenous is not None: + for resource, quantity in agent.endogenous.items(): + metrics["endogenous/{}/{}".format(agent.idx, resource)] = quantity + + metrics["util/{}".format(agent.idx)] = self.curr_optimization_metric[ + agent.idx + ] + + # Labor weight + metrics["labor/weighted_cost"] = self.energy_cost * self.energy_weight + metrics["labor/warmup_integrator"] = int(self._auto_warmup_integrator) + + return metrics + + +@scenario_registry.add +class SplitLayout(LayoutFromFile): + """ + Extends layout_from_file/simple_wood_and_stone to impose a row of water midway + through the map, uses a fixed set of pareto-distributed building skills (requires a + Build component), and places agents in the top/bottom depending on skill rank. + + Args: + water_row (int): Row of the map where the water barrier is placed. Defaults + to half the world height. + skill_rank_of_top_agents (int, float, tuple, list): Index/indices specifying + which agent(s) to place in the top of the map. Indices refer to the skill + ranking, with 0 referring to the highest-skilled agent. Defaults to only + the highest-skilled agent in the top. + planner_gets_spatial_obs (bool): Whether the planner agent receives spatial + observations from the world. + full_observability (bool): Whether the mobile agents' spatial observation + includes the full world view or is instead an egocentric view. + mobile_agent_observation_range (int): If not using full_observability, + the spatial range (on each side of the agent) that is visible in the + spatial observations. + env_layout_file (str): Name of the layout file in ./map_txt/ to use. + Note: The world dimensions of that layout must match the world dimensions + argument used to construct the environment. + resource_regen_prob (float): Probability that an empty source tile will + regenerate a new resource unit. + starting_agent_coin (int, float): Amount of coin agents have at t=0. Defaults + to zero coin. + isoelastic_eta (float): Parameter controlling the shape of agent utility + wrt coin endowment. + energy_cost (float): Coefficient for converting labor to negative utility. + energy_warmup_constant (float): Decay constant that controls the rate at which + the effective energy cost is annealed from 0 to energy_cost. Set to 0 + (default) to disable annealing, meaning that the effective energy cost is + always energy_cost. The units of the decay constant depend on the choice of + energy_warmup_method. + energy_warmup_method (str): How to schedule energy annealing (warmup). If + "decay" (default), use the number of completed episodes. If "auto", + use the number of timesteps where the average agent reward was positive. + planner_reward_type (str): The type of reward used for the planner. Options + are "coin_eq_times_productivity" (default), + "inv_income_weighted_coin_endowment", and "inv_income_weighted_utility". + mixing_weight_gini_vs_coin (float): Degree to which equality is ignored w/ + "coin_eq_times_productivity". Default is 0, which weights equality and + productivity equally. If set to 1, only productivity is rewarded. + """ + + name = "split_layout/simple_wood_and_stone" + + def __init__( + self, + *args, + water_row=None, + skill_rank_of_top_agents=None, + **kwargs, + ): + super().__init__(*args, **kwargs) + + if self.fixed_four_skill_and_loc: + raise ValueError( + "The split layout scenario does not support " + "fixed_four_skill_and_loc. Set this to False." + ) + + # Augment the fixed layout to include a row of water through the middle + if water_row is None: + self._water_line = self.world_size[0] // 2 + else: + self._water_line = int(water_row) + assert 0 < self._water_line < self.world_size[0] - 1 + for landmark, landmark_map in self._source_maps.items(): + landmark_map[self._water_line, :] = 1 if landmark == "Water" else 0 + self._source_maps[landmark] = landmark_map + + # Controls logic for which agents (by skill rank) get placed on the top + if skill_rank_of_top_agents is None: + skill_rank_of_top_agents = [0] + + if isinstance(skill_rank_of_top_agents, (int, float)): + self.skill_rank_of_top_agents = [int(skill_rank_of_top_agents)] + elif isinstance(skill_rank_of_top_agents, (tuple, list)): + self.skill_rank_of_top_agents = list(set(skill_rank_of_top_agents)) + else: + raise TypeError( + "skill_rank_of_top_agents must be a scalar " + "index, or a list of scalar indices." + ) + for rank in self.skill_rank_of_top_agents: + assert 0 <= rank < self.n_agents + assert 0 < len(self.skill_rank_of_top_agents) < self.n_agents + + # Set the skill associated with each skill rank + bm = self.get_component("Build") + assert bm.skill_dist == "pareto" + pmsm = bm.payment_max_skill_multiplier + # Generate a batch (100000) of num_agents (sorted/clipped) Pareto samples. + pareto_samples = np.random.pareto(4, size=(100000, self.n_agents)) + clipped_skills = np.minimum(pmsm, (pmsm - 1) * pareto_samples + 1) + sorted_clipped_skills = np.sort(clipped_skills, axis=1) + # The skill level of the i-th skill-ranked agent is the average of the + # i-th ranked samples throughout the batch. + average_ranked_skills = sorted_clipped_skills.mean(axis=0) + self._avg_ranked_skill = average_ranked_skills * bm.payment + # Reverse the order so index 0 is the highest-skilled + self._avg_ranked_skill = self._avg_ranked_skill[::-1] + + def additional_reset_steps(self): + """ + Extra scenario-specific steps that should be performed at the end of the reset + cycle. + + For each reset cycle... + First, reset_starting_layout() and reset_agent_states() will be called. + + Second, .reset() will be called for each registered component. + + Lastly, this method will be called to allow for any final customization of + the reset cycle. + + For this scenario, this method resets optimization metric trackers. This is + where each agent gets assigned to one of the skills and the starting + locations are reset according to self.skill_rank_of_top_agents. + """ + self.world.clear_agent_locs() + for i, agent in enumerate(self.world.get_random_order_agents()): + agent.state["build_payment"] = self._avg_ranked_skill[i] + if i in self.skill_rank_of_top_agents: + r_min, r_max = 0, self._water_line + else: + r_min, r_max = self._water_line + 1, self.world_size[0] + + r = np.random.randint(r_min, r_max) + c = np.random.randint(0, self.world_size[1]) + n_tries = 0 + while not self.world.can_agent_occupy(r, c, agent): + r = np.random.randint(r_min, r_max) + c = np.random.randint(0, self.world_size[1]) + n_tries += 1 + if n_tries > 200: + raise TimeoutError + self.world.set_agent_loc(agent, r, c) + + # compute current objectives + curr_optimization_metric = self.get_current_optimization_metrics() + + self.curr_optimization_metric = deepcopy(curr_optimization_metric) + self.init_optimization_metric = deepcopy(curr_optimization_metric) + self.prev_optimization_metric = deepcopy(curr_optimization_metric) diff --git a/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/closed_quadrant_25x25_20each_30clump.txt b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/closed_quadrant_25x25_20each_30clump.txt new file mode 100644 index 0000000..cfb2ebd --- /dev/null +++ b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/closed_quadrant_25x25_20each_30clump.txt @@ -0,0 +1 @@ +WWWWW W @ W ;WW W @ W W WW; W @ W W;SW S S @ ; @ W W ; SS @ ; S @ ; @ ; @ ;S S @ ; @ ; @ ;@@@@@@@@@@@@@@@@@@@@@@@@@; S @ ;S S @ ; S @ ;SS @ ; SS @ ;SS @ ; S @ ; @ ; @ ; @ ; @ ;S @ ; \ No newline at end of file diff --git a/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/env-pure_and_mixed-15x15.txt b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/env-pure_and_mixed-15x15.txt new file mode 100644 index 0000000..b19ddf5 --- /dev/null +++ b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/env-pure_and_mixed-15x15.txt @@ -0,0 +1 @@ +WWW @ ;WSS @ ;WWW @ ;WWW @ ;WSS ;SWS ; @ ;@@@@ @@@@ @@@; @ ;WWW @ S ; WW SS ;WWW SS;W W @ ;W @ ; @ S ; \ No newline at end of file diff --git a/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/env-pure_and_mixed-25x25.txt b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/env-pure_and_mixed-25x25.txt new file mode 100644 index 0000000..42e835f --- /dev/null +++ b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/env-pure_and_mixed-25x25.txt @@ -0,0 +1 @@ + WW W @ ; SWWW @ ;SSWW @ ;WSSSW @ ;WSSWW ;WS WS ; WWS ;SWW S @ ; S W @ ; WS W @ ; @ ; @ ;@@@@ @@@@@@@@@@@@ @@@; @ ; @ ; W @ SSSSS; WW @ SSS ; @ S SS; WW W @ SSS S;W WW SSSS ;WWW S SS; WWWW S S ;WW W @ S ; W @ S ; W @ S SSS; \ No newline at end of file diff --git a/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/env-pure_and_mixed-40x40.txt b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/env-pure_and_mixed-40x40.txt new file mode 100644 index 0000000..fae4a20 --- /dev/null +++ b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/env-pure_and_mixed-40x40.txt @@ -0,0 +1 @@ + W WW ; W ; W W W ; W ; W W ; W W W ; W W ; WW ; ; ; ; ; ; ; ; ; SS W ; WW W; ; S S S; W WW S ; S S W ; W WS S S ; ; ; ; ; ; ; ; ; S ;S S ; ; SS ; ; S SS S ; SS S S ;S SS S S ;SSSS S SS ; \ No newline at end of file diff --git a/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/quadrant_25x25_20each_30clump.txt b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/quadrant_25x25_20each_30clump.txt new file mode 100644 index 0000000..b178c73 --- /dev/null +++ b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/quadrant_25x25_20each_30clump.txt @@ -0,0 +1 @@ +WWWWW W @ W ;WW W @ W W WW; W @ W W;SW S S @ ; @ W W ; SS ; S ; ; ;S S @ ; @ ; @ ;@@@@@ @@@@@@@ @@@@@; S @ ;S S @ ; S @ ;SS ; SS ;SS ; S ; @ ; @ ; @ ; @ ;S @ ; \ No newline at end of file diff --git a/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/quadrant_25x25_20each_30clump_no_water.txt b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/quadrant_25x25_20each_30clump_no_water.txt new file mode 100644 index 0000000..6bed29e --- /dev/null +++ b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/quadrant_25x25_20each_30clump_no_water.txt @@ -0,0 +1 @@ +WWWWW W W ;WW W W W WW; W W W;SW S S ; W W ; SS ; S ; ; ;S S ; ; ; ; S ;S S ; S ;SS ; SS ;SS ; S ; ; ; ; ;S ; \ No newline at end of file diff --git a/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/quadrant_40x40_50each.txt b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/quadrant_40x40_50each.txt new file mode 100644 index 0000000..75277aa --- /dev/null +++ b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/quadrant_40x40_50each.txt @@ -0,0 +1 @@ +WWWWWWWW WW @@ WW ;WWWWWWWW WW @@ WW ;WWW W @@ WW W WWW; W @@ WW WW; W @@ WW WW;SSW S SS @@ ; @@ W W ; @@ W W ; SSS ; SSS ; S ; ; ; ;SS SS @@ ;SS SS @@ ; @@ ; @@ ; @@ ;@@@@@@@@ @@@@@@@@@@@@ @@@@@@@@;@@@@@@@@ @@@@@@@@@@@@ @@@@@@@@; S @@ ;SS SS @@ ;SS SS @@ ; S @@ ; S @@ ;SSS ; SSS ; SSS ;SSS ; SS ; SS ; @@ ; @@ ; @@ ; @@ ; @@ ; @@ ;SS @@ ;SS @@ ; \ No newline at end of file diff --git a/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/quadrant_40x40_50each_no_water.txt b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/quadrant_40x40_50each_no_water.txt new file mode 100644 index 0000000..694cbc3 --- /dev/null +++ b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/quadrant_40x40_50each_no_water.txt @@ -0,0 +1 @@ +WWWWWWWW WW WW ;WWWWWWWW WW WW ;WWW W WW W WWW; W WW WW; W WW WW;SSW S SS ; W W ; W W ; SSS ; SSS ; S ; ; ; ;SS SS ;SS SS ; ; ; ; ; ; S ;SS SS ;SS SS ; S ; S ;SSS ; SSS ; SSS ;SSS ; SS ; SS ; ; ; ; ; ; ;SS ;SS ; \ No newline at end of file diff --git a/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/quadrant_8x8_4each_8clump.txt b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/quadrant_8x8_4each_8clump.txt new file mode 100644 index 0000000..c8f9283 --- /dev/null +++ b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/quadrant_8x8_4each_8clump.txt @@ -0,0 +1 @@ +WWWW@WW W; WW @ WWW;SW S@ S ;S @ ;@@ @@@ @@; S @ ;S S @ ;S @ ; S@ ; diff --git a/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/top_wood_bottom_stone_14x14.txt b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/top_wood_bottom_stone_14x14.txt new file mode 100644 index 0000000..eebe61d --- /dev/null +++ b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/top_wood_bottom_stone_14x14.txt @@ -0,0 +1 @@ + WW W ; A WW A ;WW WA W ; A A ; ; AA A ; A ; ; ; ; A ; A SS S;SA S AS;S SA SA \ No newline at end of file diff --git a/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/uniform_25x25_25each_65clump.txt b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/uniform_25x25_25each_65clump.txt new file mode 100644 index 0000000..e9c8276 --- /dev/null +++ b/ai_economist/foundation/scenarios/simple_wood_and_stone/map_txt/uniform_25x25_25each_65clump.txt @@ -0,0 +1 @@ + SSSSS; SS SSSS; SS SSSS; S SSSS; SS ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; W; W WW; WW ;W WW W W WWW;WWWWW WW W WWWWW; \ No newline at end of file diff --git a/ai_economist/foundation/scenarios/utils/__init__.py b/ai_economist/foundation/scenarios/utils/__init__.py new file mode 100644 index 0000000..fc6cee4 --- /dev/null +++ b/ai_economist/foundation/scenarios/utils/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause diff --git a/ai_economist/foundation/scenarios/utils/rewards.py b/ai_economist/foundation/scenarios/utils/rewards.py new file mode 100644 index 0000000..7b4db8e --- /dev/null +++ b/ai_economist/foundation/scenarios/utils/rewards.py @@ -0,0 +1,133 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import numpy as np + +from ai_economist.foundation.scenarios.utils import social_metrics + + +def isoelastic_coin_minus_labor( + coin_endowment, total_labor, isoelastic_eta, labor_coefficient +): + """Agent utility, concave increasing in coin and linearly decreasing in labor. + + Args: + coin_endowment (float, ndarray): The amount of coin owned by the agent(s). + total_labor (float, ndarray): The amount of labor performed by the agent(s). + isoelastic_eta (float): Constant describing the shape of the utility profile + with respect to coin endowment. Must be between 0 and 1. 0 yields utility + that increases linearly with coin. 1 yields utility that increases with + log(coin). Utility from coin uses: + https://en.wikipedia.org/wiki/Isoelastic_utility + labor_coefficient (float): Constant describing the disutility experienced per + unit of labor performed. Disutility from labor equals: + labor_coefficient * total_labor + + Returns: + Agent utility (float) or utilities (ndarray). + """ + # https://en.wikipedia.org/wiki/Isoelastic_utility + assert np.all(coin_endowment >= 0) + assert 0 <= isoelastic_eta <= 1.0 + + # Utility from coin endowment + if isoelastic_eta == 1.0: # dangerous + util_c = np.log(np.max(1, coin_endowment)) + else: # isoelastic_eta >= 0 + util_c = (coin_endowment ** (1 - isoelastic_eta) - 1) / (1 - isoelastic_eta) + + # disutility from labor + util_l = total_labor * labor_coefficient + + # Net utility + util = util_c - util_l + + return util + + +def coin_minus_labor_cost( + coin_endowment, total_labor, labor_exponent, labor_coefficient +): + """Agent utility, linearly increasing in coin and decreasing as a power of labor. + + Args: + coin_endowment (float, ndarray): The amount of coin owned by the agent(s). + total_labor (float, ndarray): The amount of labor performed by the agent(s). + labor_exponent (float): Constant describing the shape of the utility profile + with respect to total labor. Must be between >1. + labor_coefficient (float): Constant describing the disutility experienced per + unit of labor performed. Disutility from labor equals: + labor_coefficient * total_labor. + + Returns: + Agent utility (float) or utilities (ndarray). + """ + # https://en.wikipedia.org/wiki/Isoelastic_utility + assert np.all(coin_endowment >= 0) + assert labor_exponent > 1 + + # Utility from coin endowment + util_c = coin_endowment + + # Disutility from labor + util_l = (total_labor ** labor_exponent) * labor_coefficient + + # Net utility + util = util_c - util_l + + return util + + +def coin_eq_times_productivity(coin_endowments, equality_weight): + """Social welfare, measured as productivity scaled by the degree of coin equality. + + Args: + coin_endowments (ndarray): The array of coin endowments for each of the + agents in the simulated economy. + equality_weight (float): Constant that determines how productivity is scaled + by coin equality. Must be between 0 (SW = prod) and 1 (SW = prod * eq). + + Returns: + Product of coin equality and productivity (float). + """ + n_agents = len(coin_endowments) + prod = social_metrics.get_productivity(coin_endowments) / n_agents + equality = equality_weight * social_metrics.get_equality(coin_endowments) + ( + 1 - equality_weight + ) + return equality * prod + + +def inv_income_weighted_coin_endowments(coin_endowments): + """Social welfare, as weighted average endowment (weighted by inverse endowment). + + Args: + coin_endowments (ndarray): The array of coin endowments for each of the + agents in the simulated economy. + + Returns: + Weighted average coin endowment (float). + """ + pareto_weights = 1 / np.maximum(coin_endowments, 1) + pareto_weights = pareto_weights / np.sum(pareto_weights) + return np.sum(coin_endowments * pareto_weights) + + +def inv_income_weighted_utility(coin_endowments, utilities): + """Social welfare, as weighted average utility (weighted by inverse endowment). + + Args: + coin_endowments (ndarray): The array of coin endowments for each of the + agents in the simulated economy. + utilities (ndarray): The array of utilities for each of the agents in the + simulated economy. + + Returns: + Weighted average utility (float). + """ + pareto_weights = 1 / np.maximum(coin_endowments, 1) + pareto_weights = pareto_weights / np.sum(pareto_weights) + return np.sum(utilities * pareto_weights) diff --git a/ai_economist/foundation/scenarios/utils/social_metrics.py b/ai_economist/foundation/scenarios/utils/social_metrics.py new file mode 100644 index 0000000..3326e2e --- /dev/null +++ b/ai_economist/foundation/scenarios/utils/social_metrics.py @@ -0,0 +1,75 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import numpy as np + + +def get_gini(endowments): + """Returns the normalized Gini index describing the distribution of endowments. + + https://en.wikipedia.org/wiki/Gini_coefficient + + Args: + endowments (ndarray): The array of endowments for each of the agents in the + simulated economy. + + Returns: + Normalized Gini index for the distribution of endowments (float). A value of 1 + indicates everything belongs to 1 agent (perfect inequality), whereas a + value of 0 indicates all agents have equal endowments (perfect equality). + + Note: + Uses a slightly different method depending on the number of agents. For fewer + agents (<30), uses an exact but slow method. Switches to using a much faster + method for more agents, where both methods produce approximately equivalent + results. + """ + n_agents = len(endowments) + + if n_agents < 30: # Slower. Accurate for all n. + diff_ij = np.abs( + endowments.reshape((n_agents, 1)) - endowments.reshape((1, n_agents)) + ) + diff = np.sum(diff_ij) + norm = 2 * n_agents * endowments.sum(axis=0) + unscaled_gini = diff / (norm + 1e-10) + gini = unscaled_gini / ((n_agents - 1) / n_agents) + return gini + + # Much faster. Slightly overestimated for low n. + s_endows = np.sort(endowments) + return 1 - (2 / (n_agents + 1)) * np.sum( + np.cumsum(s_endows) / (np.sum(s_endows) + 1e-10) + ) + + +def get_equality(endowments): + """Returns the complement of the normalized Gini index (equality = 1 - Gini). + + Args: + endowments (ndarray): The array of endowments for each of the agents in the + simulated economy. + + Returns: + Normalized equality index for the distribution of endowments (float). A value + of 0 indicates everything belongs to 1 agent (perfect inequality), + whereas a value of 1 indicates all agents have equal endowments (perfect + equality). + """ + return 1 - get_gini(endowments) + + +def get_productivity(coin_endowments): + """Returns the total coin inside the simulated economy. + + Args: + coin_endowments (ndarray): The array of coin endowments for each of the + agents in the simulated economy. + + Returns: + Total coin endowment (float). + """ + return np.sum(coin_endowments) diff --git a/ai_economist/foundation/utils.py b/ai_economist/foundation/utils.py new file mode 100644 index 0000000..85e12f1 --- /dev/null +++ b/ai_economist/foundation/utils.py @@ -0,0 +1,123 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import json +import os +import sys +from hashlib import sha512 + +import lz4.frame +from Crypto.PublicKey import RSA + +from ai_economist.foundation.base.base_env import BaseEnvironment + + +def save_episode_log(game_object, filepath, compression_level=16): + """Save a lz4 compressed version of the dense log stored + in the provided game object""" + assert isinstance(game_object, BaseEnvironment) + compression_level = int(compression_level) + if compression_level < 0: + compression_level = 0 + elif compression_level > 16: + compression_level = 16 + + with lz4.frame.open( + filepath, mode="wb", compression_level=compression_level + ) as log_file: + log_bytes = bytes( + json.dumps( + game_object.previous_episode_dense_log, ensure_ascii=False + ).encode("utf-8") + ) + log_file.write(log_bytes) + + +def load_episode_log(filepath): + """Load the dense log saved at provided filepath""" + with lz4.frame.open(filepath, mode="rb") as log_file: + log_bytes = log_file.read() + return json.loads(log_bytes) + + +def verify_activation_code(): + """ + Validate the user's activation code. + If the activation code is valid, also save it in a text file for future reference. + If the activation code is invalid, simply exit the program + """ + path_to_activation_code_dir = os.path.dirname(os.path.abspath(__file__)) + + def validate_activation_code(code, msg=b"covid19 code activation"): + filepath = os.path.abspath( + os.path.join( + path_to_activation_code_dir, + "scenarios/covid19/key_to_check_activation_code_against", + ) + ) + with open(filepath, "r") as fp: + key_pair = RSA.import_key(fp.read()) + + hashed_msg = int.from_bytes(sha512(msg).digest(), byteorder="big") + signature = pow(hashed_msg, key_pair.d, key_pair.n) + try: + exp_from_code = int(code, 16) + hashed_msg_from_signature = pow(signature, exp_from_code, key_pair.n) + + return hashed_msg == hashed_msg_from_signature + except ValueError: + return False + + activation_code_filename = "activation_code.txt" + + filepath = os.path.join(path_to_activation_code_dir, activation_code_filename) + if activation_code_filename in os.listdir(path_to_activation_code_dir): + print("Using the activation code already present in '{}'".format(filepath)) + with open(filepath, "r") as fp: + activation_code = fp.read() + fp.close() + if validate_activation_code(activation_code): + return # already activated + print( + "The activation code saved in '{}' is incorrect! " + "Please correct the activation code and try again.".format(filepath) + ) + sys.exit(0) + else: + print( + "In order to run this simulation, you will need an activation code.\n" + "Please fill out the form at " + "https://forms.gle/dJ2gKDBqLDko1g7m7 and we will send you an " + "activation code to the provided email address.\n" + ) + num_attempts = 5 + attempt_num = 0 + while attempt_num < num_attempts: + activation_code = input( + f"Whenever you are ready, " + "please enter the activation code: " + f"(attempt {attempt_num + 1} / {num_attempts})" + ) + attempt_num += 1 + if validate_activation_code(activation_code): + print( + "Saving the activation code in '{}' for future " + "use.".format(filepath) + ) + with open( + os.path.join(path_to_activation_code_dir, activation_code_filename), + "w", + ) as fp: + fp.write(activation_code) + fp.close() + return + print("Incorrect activation code. Please try again.") + print( + "You have had {} attempts to provide the activate code. Unfortunately, " + "none of the activation code(s) you provided could be validated. " + "Exiting...".format(num_attempts) + ) + sys.exit(0) diff --git a/ai_economist/real_business_cycle/README.md b/ai_economist/real_business_cycle/README.md new file mode 100644 index 0000000..844b33b --- /dev/null +++ b/ai_economist/real_business_cycle/README.md @@ -0,0 +1,120 @@ +# Real Business Cycle (RBC) +This directory implements a **Real-Business-Cycle** (RBC) simulation with many heterogeneous, interacting strategic agents of various types, such as **consumers, firms, and the government**. For details, please refer to this paper "Finding General Equilibria in Many-Agent Economic Simulations using Deep Reinforcement Learning (ArXiv link forthcoming)". We also provide training code that uses deep multi-agent reinforcement learning to determine optimal economic policies and dynamics in these many agent environments. Below are instructions required to launch the training runs. + +**Note: The experiments require a GPU to run!** + +## Dependencies + +- torch>=1.9.0 +- pycuda==2021.1 +- matplotlib==3.2.1 + +## Running Local Jobs +To run a hyperparameter sweep of jobs on a local machine, use (see file for command line arguments and hyperparameter sweep dictionaries) + +``` +python train_multi_exps.py +``` + +## Configuration Dictionaries + +Configuration dictionaries are currently specified in Python code, and then written as `hparams.yaml` in the job directory. For examples, see the file `constants.py`. The dictionaries contain "agents", "world", and "train" dictionaries which contain various hyperparameters. + +## Hyperparameter Sweeps + +The files `train_multi_exps.py` allow hyperparameter sweeps. These are specified in `*_param_sweeps` dictionaries in the file. For each hyperparameter, specify a list of one or more choices. The Cartesian product of all choices will be used. + +## Approximate Best Response Training + +To run a single approximate best-response (BR) training job on checkpoint policies, run `python train_bestresponse.py ROLLOUT_DIR NUM_EPISODES_TO_TRAIN --ep-strs ep1 ep2 --agent-type all`. The `--ep-strs` argument specifies which episodes to run on (for example, policies from episode 0, 10000, and 200000). These must be episodes for which policies were saved. It is possible to specify a single agent type. + + +## What Will Be Saved? + +A large amount of data will be saved -- one can set hyperparamter `train.save_dense_every` in the configuration dictionary (`hparams.yaml`/`constants.py`) to reduce this. + +At the top level, an experiment directory stores the results of many runs in a hyperparameter sweep. Example structure: + +``` +experiment/experimentname/ + rollout-999999-99999/ + brconsumer/ + ... + brfirm/ + episode_XXXX_consumer.npz + episode_XXXX_government.npz + episode_XXXX_firm.npz + saved_models/ + consumer_policy_XXX.pt + firm_policy_XXX.pt + government_policy_XXX.pt. + brgovernment/ + ... + hparams.yaml + action_arrays.pickle + episode_XXXX_consumer.npz + episode_XXXX_government.npz + episode_XXXX_firm.npz + saved_models/ + consumer_policy_XXX.pt + firm_policy_XXX.pt + government_policy_XXX.pt. + + rollout-777777-77777/ + ... +``` + +Files: + +`rollout-XXXXXX-XXX`: subdirectory containing all output for a single run. + +`hparams.yaml`: configuration dictionary with hyperparameters + +`action_arrays.pickle`: contains saved action arrays (allowing mapping action indices to the actual action, e.g. index 1 is price 1000.0, etc.) + +`episode_XXXX_AGENTTYPE.npz`: Contains dense rollouts stored as the output of a numpy.savez call. When loaded, can be treated like a dictionary of numpy arrays. Has keys: `['states', 'actions', 'rewards', 'action_array', 'aux_array']` (view keys by using `.files`). `states`, `actions`, `rewards`, and `aux_array` all refer to saved copies of CUDA arrays (described below). `action_array` is a small array mapping action indices to the actual action. + +`saved_models/AGENTTYPE_policy_XXX.pt`: a saved PyTorch state dict of the policy network, after episode XXX. + +## Structure Of Arrays + +`states` for any given agent type is an array storing observed states. It has shape `batch_size, ep_length, num_agents, agent_total_state_dim`. + +`actions` is an array consisting of the action _indices_ (integers). For firms and government, it is of shape `batch_size, ep_length, num_agents`. For consumers, it is of shape `batch_size, ep_length, num_agents, num_action_heads`. + +`rewards` stores total rewards, and is of shape `batch_size, ep_length, num_agents`. + +The `aux_array` stores additional information and may differ per agent type. The consumer `aux_array` stores _actual_ consumption of each firm's good (as opposed to attempted consumption). The firm `aux_array` stores the amount bought by the export market. + +## State Array Layout: + +States observed by each agent consist of a global state, plus additional state dimensions per agent. + +Global state: total dimension 4 * num_firms + 2 + 1 +- prices: 1 per firm +- wages: 1 per firm +- inventories: 1 per firm +- overdemanded flag: 1 per firm +- time + +Consumer additional state variables: total dimension global state + 2 +- budget +- theta + +Firm additional state variables: total dimension global state + 3 + num_firms +- budget +- capital +- production alpha +- one-hot representation identifying which firm + +## What Gets Loaded And Written By BR Code? + +The best response code loads in the `hparams.yaml` file, and the policies at a given time step (i.e. `saved_models/...policy_XXX.pt`). It then trains one of the policies while keeping the others fixed. Results are written to directories `brfirm`, `brconsumer`, `brgovernment` and contain dense rollouts and saved policy checkpoints, but from the best response training. + +## Which Hyperparameters Are Managed And Where? + +Initial values of state variables (budgets, initial wages, levels of capital, and so on) are set by the code in the method `__init_cuda_data_structs`. Some of these can be controlled from the hyperparameter dict; others are currently hardcoded. + +Other hyperparameters are specified in the configuration dictionary. + +Finally, the technology parameter (A) of the production function is currently hardcoded in the function call in `rbc/cuda/firm_rbc.cu`. diff --git a/ai_economist/real_business_cycle/experiment_utils.py b/ai_economist/real_business_cycle/experiment_utils.py new file mode 100644 index 0000000..6a7c948 --- /dev/null +++ b/ai_economist/real_business_cycle/experiment_utils.py @@ -0,0 +1,242 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import hashlib +import itertools +import json +import os +import pickle +import struct +import time +from copy import deepcopy +from pathlib import Path + +import numpy as np +import yaml + +# defaults +_NUM_FIRMS = 10 + + +def _bigint_from_bytes(num_bytes): + """ + See https://github.com/openai/gym/blob/master/gym/utils/seeding.py. + """ + sizeof_int = 4 + padding = sizeof_int - len(num_bytes) % sizeof_int + num_bytes += b"\0" * padding + int_count = int(len(num_bytes) / sizeof_int) + unpacked = struct.unpack("{}I".format(int_count), num_bytes) + accum = 0 + for i, val in enumerate(unpacked): + accum += 2 ** (sizeof_int * 8 * i) * val + return accum + + +def seed_from_base_seed(base_seed): + """ + Hash base seed to reduce correlation. + """ + max_bytes = 4 + hash_func = hashlib.sha512(str(base_seed).encode("utf8")).digest() + + return _bigint_from_bytes(hash_func[:max_bytes]) + + +def hash_from_dict(d): + d_copy = deepcopy(d) + del (d_copy["train"])["base_seed"] + d_string = json.dumps(d_copy, sort_keys=True) + return int(hashlib.sha256(d_string.encode("utf8")).hexdigest()[:8], 16) + + +def cfg_dict_from_yaml( + hparams_path, + consumption_choices, + work_choices, + price_and_wage, + tax_choices, + group_name=None, +): + with open(hparams_path) as f: + d = yaml.safe_load(f) + + if group_name is not None: + d["metadata"]["group_name"] = group_name + d["metadata"]["hparamhash"] = hash_from_dict(d) + d["agents"][ + "consumer_consumption_actions_array" + ] = consumption_choices # Note: hardcoded + d["agents"]["consumer_work_actions_array"] = work_choices # Note: hardcoded + d["agents"]["firm_actions_array"] = price_and_wage # Note: hardcoded + d["agents"]["government_actions_array"] = tax_choices + d["train"]["save_dir"] = str(hparams_path.absolute().parent) + d["train"]["seed"] = seed_from_base_seed(d["train"]["base_seed"]) + return d + + +def run_experiment_batch_parallel( + experiment_dir, + consumption_choices, + work_choices, + price_and_wage, + tax_choices, + group_name=None, + consumers_only=False, + no_firms=False, + default_firm_action=None, + default_government_action=None, +): + hparams_path = Path(experiment_dir) / Path("hparams.yaml") + hparams_dict = cfg_dict_from_yaml( + hparams_path, + consumption_choices, + work_choices, + price_and_wage, + tax_choices, + group_name=group_name, + ) + print(f"hparams_dict {hparams_dict}") + # import this here so rest of file still imports without cuda installed + from rbc.cuda_manager import ConsumerFirmRunManagerBatchParallel + + if consumers_only: + m = ConsumerFirmRunManagerBatchParallel( + hparams_dict, + freeze_firms=default_firm_action, + freeze_govt=default_government_action, + ) + elif no_firms: + m = ConsumerFirmRunManagerBatchParallel( + hparams_dict, + freeze_firms=default_firm_action, + ) + else: + m = ConsumerFirmRunManagerBatchParallel(hparams_dict) + m.train() + + +def compare_global_states_within_type(states, global_state_size): + # every agent within a batch should have the same global state + first_agent_global = states[:, :, :1, :global_state_size] + all_agents_global = states[:, :, :, :global_state_size] + return np.isclose(all_agents_global, first_agent_global).all() + + +def compare_global_states_across_types( + consumer_states, firm_states, government_states, global_state_size +): + first_agent_global = consumer_states[:, :, :1, :global_state_size] + return ( + np.isclose(firm_states[:, :, :, :global_state_size], first_agent_global).all(), + np.isclose( + government_states[:, :, :, :global_state_size], first_agent_global + ).all(), + np.isclose( + consumer_states[:, :, :, :global_state_size], first_agent_global + ).all(), + ) + + +def check_no_negative_stocks(state, stock_offset, stock_size): + stocks = state[:, :, :, stock_offset : (stock_offset + stock_size)] + return (stocks >= -1.0e-3).all() + + +train_param_sweeps = { + "lr": [0.005, 0.001], + "entropy": [0.01], + "base_seed": [2596], + "batch_size": [64], + "clip_grad_norm": [1.0, 2.0, 5.0], +} + +# Other param sweeps +agent_param_sweeps = { + # "consumer_noponzi_eta": [0.1,0.05] +} + +world_param_sweeps = { + # "interest_rate": [0.02, 0.0] +} + + +def add_all(d, keys_list, target_val): + for k in keys_list: + d[k] = target_val + + +def sweep_cfg_generator( + base_cfg, + tr_param_sweeps=None, + ag_param_sweeps=None, + wld_param_sweeps=None, + seed_from_timestamp=False, + group_name=None, +): + # train_param_sweeps + if tr_param_sweeps is None: + tr_param_sweeps = {} + # agent_param_sweeps + if ag_param_sweeps is None: + ag_param_sweeps = {} + # world_param_sweeps + if wld_param_sweeps is None: + wld_param_sweeps = {} + + assert isinstance(tr_param_sweeps, dict) + assert isinstance(ag_param_sweeps, dict) + assert isinstance(wld_param_sweeps, dict) + + key_dict = {} # tells which key goes to which dict, e.g. "lr" -> "train", etc. + if len(tr_param_sweeps) > 0: + train_k, train_v = zip(*tr_param_sweeps.items()) + else: + train_k, train_v = (), () + add_all(key_dict, train_k, "train") + if len(ag_param_sweeps) > 0: + agent_k, agent_v = zip(*ag_param_sweeps.items()) + else: + agent_k, agent_v = (), () + add_all(key_dict, agent_k, "agents") + if len(wld_param_sweeps) > 0: + world_k, world_v = zip(*wld_param_sweeps.items()) + else: + world_k, world_v = (), () + add_all(key_dict, world_k, "world") + + k = train_k + agent_k + world_k + v = train_v + agent_v + world_v + + # have a "reverse lookup" dictionary for each key name + for combination in itertools.product(*v): + values_to_substitute = dict(zip(k, combination)) + out = deepcopy(base_cfg) + for key, value in values_to_substitute.items(): + out[key_dict[key]][key] = value + if seed_from_timestamp: + int_timestamp = int( + time.time() * 1000 + ) # time.time() returns float, multiply 1000 for higher resolution + out["train"]["base_seed"] += int_timestamp + if group_name is not None: + out["metadata"]["group"] = group_name + yield out + + +def create_job_dir(experiment_dir, job_name_base, cfg=None, action_arrays=None): + unique_id = time.time() + dirname = f"{job_name_base}-{unique_id}".replace(".", "-") + dir_path = Path(experiment_dir) / Path(dirname) + os.makedirs(str(dir_path), exist_ok=True) + cfg["metadata"]["dirname"] = dirname + cfg["metadata"]["group"] = str(Path(experiment_dir).name) + with open(dir_path / Path("hparams.yaml"), "w") as f: + f.write(yaml.dump(cfg)) + + if action_arrays is not None: + with open(dir_path / Path("action_arrays.pickle"), "wb") as f: + pickle.dump(action_arrays, f) diff --git a/ai_economist/real_business_cycle/rbc/__init__.py b/ai_economist/real_business_cycle/rbc/__init__.py new file mode 100644 index 0000000..fc6cee4 --- /dev/null +++ b/ai_economist/real_business_cycle/rbc/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2020, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause diff --git a/ai_economist/real_business_cycle/rbc/constants.py b/ai_economist/real_business_cycle/rbc/constants.py new file mode 100644 index 0000000..9db6d76 --- /dev/null +++ b/ai_economist/real_business_cycle/rbc/constants.py @@ -0,0 +1,638 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import itertools + +import numpy as np +import torch + +_NP_DTYPE = np.float32 + + +def all_agents_export_experiment_template( + NUMFIRMS, NUMCONSUMERS, NUMGOVERNMENTS, episodes_const=30000 +): + consumption_choices = [ + np.array([0.0 + 1.0 * c for c in range(11)], dtype=_NP_DTYPE) + ] + work_choices = [ + np.array([0.0 + 20 * 13 * h for h in range(5)], dtype=_NP_DTYPE) + ] # specify dtype -- be consistent? + + consumption_choices = np.array( + list(itertools.product(*consumption_choices)), dtype=_NP_DTYPE + ) + work_choices = np.array(list(itertools.product(*work_choices)), dtype=_NP_DTYPE) + + price_choices = np.array([0.0 + 500.0 * c for c in range(6)], dtype=_NP_DTYPE) + wage_choices = np.array([0.0, 11.0, 22.0, 33.0, 44.0], dtype=_NP_DTYPE) + capital_choices = np.array([0.1], dtype=_NP_DTYPE) + price_and_wage = np.array( + list(itertools.product(price_choices, wage_choices, capital_choices)), + dtype=_NP_DTYPE, + ) + + # government action discretization + income_taxation_choices = np.array( + [0.0 + 0.2 * c for c in range(6)], dtype=_NP_DTYPE + ) + corporate_taxation_choices = np.array( + [0.0 + 0.2 * c for c in range(6)], dtype=_NP_DTYPE + ) + tax_choices = np.array( + list(itertools.product(income_taxation_choices, corporate_taxation_choices)), + dtype=_NP_DTYPE, + ) + global_state_dim = ( + NUMFIRMS # prices + + NUMFIRMS # wages + + NUMFIRMS # stocks + + NUMFIRMS # was good overdemanded + + 2 * NUMGOVERNMENTS # tax rates + + 1 + ) # time + + global_state_digit_dims = list( + range(2 * NUMFIRMS, 3 * NUMFIRMS) + ) # stocks are the only global state var that can get huge + consumer_state_dim = ( + global_state_dim + 1 + 1 + ) # budget # theta, the disutility of work + + firm_state_dim = ( + global_state_dim + + 1 # budget + + 1 # capital + + 1 # production alpha + + NUMFIRMS # onehot specifying which firm + ) + + episodes_to_anneal_firm = 100000 + episodes_to_anneal_government = 100000 + government_phase1_start = 100000 + government_state_dim = global_state_dim + DEFAULT_CFG_DICT = { + # actions_array key will be added below + "agents": { + "num_consumers": NUMCONSUMERS, + "num_firms": NUMFIRMS, + "num_governments": NUMGOVERNMENTS, + "global_state_dim": global_state_dim, + "consumer_state_dim": consumer_state_dim, + # action vectors are how much consume from each firm, + # how much to work, and which firm to choose + "consumer_action_dim": NUMFIRMS + 1 + 1, + "consumer_num_consume_actions": consumption_choices.shape[0], + "consumer_num_work_actions": work_choices.shape[0], + "consumer_num_whichfirm_actions": NUMFIRMS, + "firm_state_dim": firm_state_dim, # what are observations? + # actions are price and wage for own firm, and capital choices + "firm_action_dim": 3, + "firm_num_actions": price_and_wage.shape[0], + "government_state_dim": government_state_dim, + "government_action_dim": 2, + "government_num_actions": tax_choices.shape[0], + "max_possible_consumption": float(consumption_choices.max()), + "max_possible_hours_worked": float(work_choices.max()), + "max_possible_wage": float(wage_choices.max()), + "max_possible_price": float(price_choices.max()), + # these are dims which, due to being on a large scale, + # have to be expanded to a digit representation + "consumer_digit_dims": global_state_digit_dims + + [global_state_dim], # global state + consumer budget + # global state + firm budget (do we need capital?) + "firm_digit_dims": global_state_digit_dims + [global_state_dim], + # govt only has global state + "government_digit_dims": global_state_digit_dims, + "firm_reward_scale": 10000, + "government_reward_scale": 100000, + "consumer_reward_scale": 50.0, + "firm_anneal_wages": { + "anneal_on": True, + "start": 22.0, + "increase_const": float(wage_choices.max() - 22.0) + / (episodes_to_anneal_firm), + "decrease_const": (22.0) / episodes_to_anneal_firm, + }, + "firm_anneal_prices": { + "anneal_on": True, + "start": 1000.0, + "increase_const": float(price_choices.max() - 1000.00) + / episodes_to_anneal_firm, + "decrease_const": (1000.0) / episodes_to_anneal_firm, + }, + "government_anneal_taxes": { + "anneal_on": True, + "start": 0.0, + "increase_const": 1.0 / episodes_to_anneal_government, + }, + "firm_begin_anneal_action": 0, + "government_begin_anneal_action": government_phase1_start, + "consumer_anneal_theta": { + "anneal_on": True, + "exp_decay_length_in_steps": episodes_const, + }, + "consumer_anneal_entropy": { + "anneal_on": True, + "exp_decay_length_in_steps": episodes_const, + "coef_floor": 0.1, + }, + "firm_anneal_entropy": { + "anneal_on": True, + "exp_decay_length_in_steps": episodes_const, + "coef_floor": 0.1, + }, + "govt_anneal_entropy": { + "anneal_on": True, + "exp_decay_length_in_steps": episodes_const, + "coef_floor": 0.1, + }, + "consumer_noponzi_eta": 0.0, + "consumer_penalty_scale": 1.0, + "firm_noponzi_eta": 0.0, + "firm_training_start": episodes_to_anneal_firm, + "government_training_start": government_phase1_start + + episodes_to_anneal_government, + "consumer_training_start": 0, + "government_counts_firm_reward": 0, + "should_boost_firm_reward": False, + "firm_reward_for_government_factor": 0.0025, + }, + "world": { + "maxtime": 10, + "initial_firm_endowment": 22.0 * 1000 * NUMCONSUMERS, + "initial_consumer_endowment": 2000, + "initial_stocks": 0.0, + "initial_prices": 1000.0, + "initial_wages": 22.0, + "interest_rate": 0.1, + "consumer_theta": 0.01, + "crra_param": 0.1, + "production_alpha": "fixed_array", # only works for exactly 10 firms, kluge + "initial_capital": "twolevel", + "paretoscaletheta": 4.0, + "importer_price": 500.0, + "importer_quantity": 100.0, + "use_importer": 1, + }, + "train": { + "batch_size": 8, + "base_seed": 1234, + "save_dense_every": 2000, + "save_model_every": 10000, + "num_episodes": 500000, + "infinite_episodes": False, + "lr": 0.01, + "gamma": 0.9999, + "entropy": 0.0, + "value_loss_weight": 1.0, + "digit_representation_size": 10, + "lagr_num_steps": 1, + "boost_firm_reward_factor": 1.0, + }, + } + return ( + DEFAULT_CFG_DICT, + consumption_choices, + work_choices, + price_and_wage, + tax_choices, + None, + None, + ) + + +def all_agents_short_export_experiment_template( + NUMFIRMS, NUMCONSUMERS, NUMGOVERNMENTS, episodes_const=10000 +): + consumption_choices = [ + np.array([0.0 + 1.0 * c for c in range(11)], dtype=_NP_DTYPE) + ] + work_choices = [ + np.array([0.0 + 20 * 13 * h for h in range(5)], dtype=_NP_DTYPE) + ] # specify dtype -- be consistent? + + consumption_choices = np.array( + list(itertools.product(*consumption_choices)), dtype=_NP_DTYPE + ) + work_choices = np.array(list(itertools.product(*work_choices)), dtype=_NP_DTYPE) + + price_choices = np.array([0.0 + 500.0 * c for c in range(6)], dtype=_NP_DTYPE) + wage_choices = np.array([0.0, 11.0, 22.0, 33.0, 44.0], dtype=_NP_DTYPE) + capital_choices = np.array([0.1], dtype=_NP_DTYPE) + price_and_wage = np.array( + list(itertools.product(price_choices, wage_choices, capital_choices)), + dtype=_NP_DTYPE, + ) + + # government action discretization + income_taxation_choices = np.array( + [0.0 + 0.2 * c for c in range(6)], dtype=_NP_DTYPE + ) + corporate_taxation_choices = np.array( + [0.0 + 0.2 * c for c in range(6)], dtype=_NP_DTYPE + ) + tax_choices = np.array( + list(itertools.product(income_taxation_choices, corporate_taxation_choices)), + dtype=_NP_DTYPE, + ) + global_state_dim = ( + NUMFIRMS # prices + + NUMFIRMS # wages + + NUMFIRMS # stocks + + NUMFIRMS # was good overdemanded + + 2 * NUMGOVERNMENTS # tax rates + + 1 + ) # time + + global_state_digit_dims = list( + range(2 * NUMFIRMS, 3 * NUMFIRMS) + ) # stocks are the only global state var that can get huge + consumer_state_dim = ( + global_state_dim + 1 + 1 + ) # budget # theta, the disutility of work + + firm_state_dim = ( + global_state_dim + + 1 # budget + + 1 # capital + + 1 # production alpha + + NUMFIRMS # onehot specifying which firm + ) + + episodes_to_anneal_firm = 30000 + episodes_to_anneal_government = 30000 + government_phase1_start = 30000 + government_state_dim = global_state_dim + DEFAULT_CFG_DICT = { + # actions_array key will be added below + "agents": { + "num_consumers": NUMCONSUMERS, + "num_firms": NUMFIRMS, + "num_governments": NUMGOVERNMENTS, + "global_state_dim": global_state_dim, + "consumer_state_dim": consumer_state_dim, + # action vectors are how much consume from each firm, + # how much to work, and which firm to choose + "consumer_action_dim": NUMFIRMS + 1 + 1, + "consumer_num_consume_actions": consumption_choices.shape[0], + "consumer_num_work_actions": work_choices.shape[0], + "consumer_num_whichfirm_actions": NUMFIRMS, + "firm_state_dim": firm_state_dim, # what are observations? + # actions are price and wage for own firm, and capital choices + "firm_action_dim": 3, + "firm_num_actions": price_and_wage.shape[0], + "government_state_dim": government_state_dim, + "government_action_dim": 2, + "government_num_actions": tax_choices.shape[0], + "max_possible_consumption": float(consumption_choices.max()), + "max_possible_hours_worked": float(work_choices.max()), + "max_possible_wage": float(wage_choices.max()), + "max_possible_price": float(price_choices.max()), + # these are dims which, due to being on a large scale, + # have to be expanded to a digit representation + "consumer_digit_dims": global_state_digit_dims + + [global_state_dim], # global state + consumer budget + "firm_digit_dims": global_state_digit_dims + + [global_state_dim], # global state + firm budget (do we need capital?) + # govt only has global state + "government_digit_dims": global_state_digit_dims, + "firm_reward_scale": 10000, + "government_reward_scale": 100000, + "consumer_reward_scale": 50.0, + "firm_anneal_wages": { + "anneal_on": True, + "start": 22.0, + "increase_const": float(wage_choices.max() - 22.0) + / (episodes_to_anneal_firm), + "decrease_const": (22.0) / episodes_to_anneal_firm, + }, + "firm_anneal_prices": { + "anneal_on": True, + "start": 1000.0, + "increase_const": float(price_choices.max() - 1000.00) + / episodes_to_anneal_firm, + "decrease_const": (1000.0) / episodes_to_anneal_firm, + }, + "government_anneal_taxes": { + "anneal_on": True, + "start": 0.0, + "increase_const": 1.0 / episodes_to_anneal_government, + }, + "firm_begin_anneal_action": 0, + "government_begin_anneal_action": government_phase1_start, + "consumer_anneal_theta": { + "anneal_on": True, + "exp_decay_length_in_steps": episodes_const, + }, + "consumer_anneal_entropy": { + "anneal_on": True, + "exp_decay_length_in_steps": episodes_const, + "coef_floor": 0.1, + }, + "firm_anneal_entropy": { + "anneal_on": True, + "exp_decay_length_in_steps": episodes_const, + "coef_floor": 0.1, + }, + "govt_anneal_entropy": { + "anneal_on": True, + "exp_decay_length_in_steps": episodes_const, + "coef_floor": 0.1, + }, + "consumer_noponzi_eta": 0.0, + "consumer_penalty_scale": 1.0, + "firm_noponzi_eta": 0.0, + "firm_training_start": episodes_to_anneal_firm, + "government_training_start": government_phase1_start + + episodes_to_anneal_government, + "consumer_training_start": 0, + "government_counts_firm_reward": 0, + "should_boost_firm_reward": False, + "firm_reward_for_government_factor": 0.0025, + }, + "world": { + "maxtime": 10, + "initial_firm_endowment": 22.0 * 1000 * NUMCONSUMERS, + "initial_consumer_endowment": 2000, + "initial_stocks": 0.0, + "initial_prices": 1000.0, + "initial_wages": 22.0, + "interest_rate": 0.1, + "consumer_theta": 0.01, + "crra_param": 0.1, + "production_alpha": "fixed_array", # only works for exactly 10 firms, kluge + "initial_capital": "twolevel", + "paretoscaletheta": 4.0, + "importer_price": 500.0, + "importer_quantity": 100.0, + "use_importer": 1, + }, + "train": { + "batch_size": 8, + "base_seed": 1234, + "save_dense_every": 2000, + "save_model_every": 10000, + "num_episodes": 200000, + "infinite_episodes": False, + "lr": 0.01, + "gamma": 0.9999, + "entropy": 0.0, + "value_loss_weight": 1.0, + "digit_representation_size": 10, + "lagr_num_steps": 1, + "boost_firm_reward_factor": 1.0, + }, + } + return ( + DEFAULT_CFG_DICT, + consumption_choices, + work_choices, + price_and_wage, + tax_choices, + None, + None, + ) + + +def very_short_test_template( + NUMFIRMS, NUMCONSUMERS, NUMGOVERNMENTS, episodes_const=30000 +): + consumption_choices = [ + np.array([0.0 + 1.0 * c for c in range(11)], dtype=_NP_DTYPE) + ] + work_choices = [ + np.array([0.0 + 20 * 13 * h for h in range(5)], dtype=_NP_DTYPE) + ] # specify dtype -- be consistent? + + consumption_choices = np.array( + list(itertools.product(*consumption_choices)), dtype=_NP_DTYPE + ) + work_choices = np.array(list(itertools.product(*work_choices)), dtype=_NP_DTYPE) + + price_choices = np.array([0.0 + 500.0 * c for c in range(6)], dtype=_NP_DTYPE) + wage_choices = np.array([0.0, 11.0, 22.0, 33.0, 44.0], dtype=_NP_DTYPE) + capital_choices = np.array([0.1], dtype=_NP_DTYPE) + price_and_wage = np.array( + list(itertools.product(price_choices, wage_choices, capital_choices)), + dtype=_NP_DTYPE, + ) + + # government action discretization + income_taxation_choices = np.array( + [0.0 + 0.2 * c for c in range(6)], dtype=_NP_DTYPE + ) + corporate_taxation_choices = np.array( + [0.0 + 0.2 * c for c in range(6)], dtype=_NP_DTYPE + ) + tax_choices = np.array( + list(itertools.product(income_taxation_choices, corporate_taxation_choices)), + dtype=_NP_DTYPE, + ) + global_state_dim = ( + NUMFIRMS # prices + + NUMFIRMS # wages + + NUMFIRMS # stocks + + NUMFIRMS # was good overdemanded + + 2 * NUMGOVERNMENTS # tax rates + + 1 + ) # time + + global_state_digit_dims = list( + range(2 * NUMFIRMS, 3 * NUMFIRMS) + ) # stocks are the only global state var that can get huge + consumer_state_dim = ( + global_state_dim + 1 + 1 + ) # budget # theta, the disutility of work + + firm_state_dim = ( + global_state_dim + + 1 # budget + + 1 # capital + + 1 # production alpha + + NUMFIRMS # onehot specifying which firm + ) + + episodes_to_anneal_firm = 10 + episodes_to_anneal_government = 10 + government_phase1_start = 10 + government_state_dim = global_state_dim + DEFAULT_CFG_DICT = { + # actions_array key will be added below + "agents": { + "num_consumers": NUMCONSUMERS, + "num_firms": NUMFIRMS, + "num_governments": NUMGOVERNMENTS, + "global_state_dim": global_state_dim, + "consumer_state_dim": consumer_state_dim, + # action vectors are how much consume from each firm, + # how much to work, and which firm to choose + "consumer_action_dim": NUMFIRMS + 1 + 1, + "consumer_num_consume_actions": consumption_choices.shape[0], + "consumer_num_work_actions": work_choices.shape[0], + "consumer_num_whichfirm_actions": NUMFIRMS, + "firm_state_dim": firm_state_dim, # what are observations? + # actions are price and wage for own firm, and capital choices + "firm_action_dim": 3, + "firm_num_actions": price_and_wage.shape[0], + "government_state_dim": government_state_dim, + "government_action_dim": 2, + "government_num_actions": tax_choices.shape[0], + "max_possible_consumption": float(consumption_choices.max()), + "max_possible_hours_worked": float(work_choices.max()), + "max_possible_wage": float(wage_choices.max()), + "max_possible_price": float(price_choices.max()), + # these are dims which, due to being on a large scale, + # have to be expanded to a digit representation + "consumer_digit_dims": global_state_digit_dims + + [global_state_dim], # global state + consumer budget + "firm_digit_dims": global_state_digit_dims + + [global_state_dim], # global state + firm budget (do we need capital?) + # govt only has global state + "government_digit_dims": global_state_digit_dims, + "firm_reward_scale": 10000, + "government_reward_scale": 100000, + "consumer_reward_scale": 50.0, + "firm_anneal_wages": { + "anneal_on": True, + "start": 22.0, + "increase_const": float(wage_choices.max() - 22.0) + / (episodes_to_anneal_firm), + "decrease_const": (22.0) / episodes_to_anneal_firm, + }, + "firm_anneal_prices": { + "anneal_on": True, + "start": 1000.0, + "increase_const": float(price_choices.max() - 1000.00) + / episodes_to_anneal_firm, + "decrease_const": (1000.0) / episodes_to_anneal_firm, + }, + "government_anneal_taxes": { + "anneal_on": True, + "start": 0.0, + "increase_const": 1.0 / episodes_to_anneal_government, + }, + "firm_begin_anneal_action": 0, + "government_begin_anneal_action": government_phase1_start, + "consumer_anneal_theta": { + "anneal_on": True, + "exp_decay_length_in_steps": episodes_const, + }, + "consumer_anneal_entropy": { + "anneal_on": True, + "exp_decay_length_in_steps": episodes_const, + "coef_floor": 0.1, + }, + "firm_anneal_entropy": { + "anneal_on": True, + "exp_decay_length_in_steps": episodes_const, + "coef_floor": 0.1, + }, + "govt_anneal_entropy": { + "anneal_on": True, + "exp_decay_length_in_steps": episodes_const, + "coef_floor": 0.1, + }, + "consumer_noponzi_eta": 0.0, + "consumer_penalty_scale": 1.0, + "firm_noponzi_eta": 0.0, + "firm_training_start": episodes_to_anneal_firm, + "government_training_start": government_phase1_start + + episodes_to_anneal_government, + "consumer_training_start": 0, + "government_counts_firm_reward": 0, + "should_boost_firm_reward": False, + "firm_reward_for_government_factor": 0.0025, + "train_firms_every": 2, + "train_consumers_every": 1, + "train_government_every": 5, + }, + "world": { + "maxtime": 10, + "initial_firm_endowment": 22.0 * 1000 * NUMCONSUMERS, + "initial_consumer_endowment": 2000, + "initial_stocks": 0.0, + "initial_prices": 1000.0, + "initial_wages": 22.0, + "interest_rate": 0.1, + "consumer_theta": 0.01, + "crra_param": 0.1, + "production_alpha": "fixed_array", # only works for exactly 10 firms, kluge + "initial_capital": "twolevel", + "paretoscaletheta": 4.0, + "importer_price": 500.0, + "importer_quantity": 100.0, + "use_importer": 1, + }, + "train": { + "batch_size": 8, + "base_seed": 1234, + "save_dense_every": 2000, + "save_model_every": 10000, + "num_episodes": 100, + "infinite_episodes": False, + "lr": 0.01, + "gamma": 0.9999, + "entropy": 0.0, + "value_loss_weight": 1.0, + "digit_representation_size": 10, + "lagr_num_steps": 1, + "boost_firm_reward_factor": 1.0, + }, + } + return ( + DEFAULT_CFG_DICT, + consumption_choices, + work_choices, + price_and_wage, + tax_choices, + None, + None, + ) + + +def global_state_scaling_factors(cfg_dict): + max_wage = cfg_dict["agents"]["max_possible_wage"] + max_price = cfg_dict["agents"]["max_possible_price"] + num_firms = cfg_dict["agents"]["num_firms"] + num_governments = cfg_dict["agents"]["num_governments"] + maxtime = cfg_dict["world"]["maxtime"] + + digit_size = cfg_dict["train"]["digit_representation_size"] + + return torch.tensor( + # prices, wages, stocks, overdemanded + ([max_price] * num_firms) + + ([max_wage] * num_firms) + + ([1.0] * num_firms * digit_size) # stocks are expanded to digit form + + ([1.0] * num_firms) + + ([1.0] * (2 * num_governments)) + + [maxtime] + ) + + +def consumer_state_scaling_factors(cfg_dict): + global_state_scales = global_state_scaling_factors(cfg_dict) + digit_size = cfg_dict["train"]["digit_representation_size"] + consumer_scales = torch.tensor( + ([1.0] * digit_size) + [cfg_dict["world"]["consumer_theta"]] + ) + return torch.cat((global_state_scales, consumer_scales)).cuda() + + +def firm_state_scaling_factors(cfg_dict): + num_firms = cfg_dict["agents"]["num_firms"] + global_state_scales = global_state_scaling_factors(cfg_dict) + digit_size = cfg_dict["train"]["digit_representation_size"] + # budget, capital, alpha, one-hot + firm_scales = torch.tensor( + ([1.0] * digit_size) + [10000.0, 1.0] + ([1.0] * num_firms) + ) + return torch.cat((global_state_scales, firm_scales)).cuda() + + +def govt_state_scaling_factors(cfg_dict): + return global_state_scaling_factors(cfg_dict).cuda() diff --git a/ai_economist/real_business_cycle/rbc/cuda/firm_rbc.cu b/ai_economist/real_business_cycle/rbc/cuda/firm_rbc.cu new file mode 100644 index 0000000..84fce67 --- /dev/null +++ b/ai_economist/real_business_cycle/rbc/cuda/firm_rbc.cu @@ -0,0 +1,912 @@ +// Copyright (c) 2021, salesforce.com, inc. +// All rights reserved. +// SPDX-License-Identifier: BSD-3-Clause +// For full license text, see the LICENSE file in the repo root +// or https://opensource.org/licenses/BSD-3-Clause + + +// Real Business Chain implementation in CUDA C + +#include +#include + +typedef enum { + kConsumerType, + kFirmType, + kGovernmentType, +} AgentType; + +const size_t kBatchSize = M_BATCHSIZE; +const size_t kNumConsumers = M_NUMCONSUMERS; +const bool kCountFirmReward = M_COUNTFIRMREWARD; +const size_t kNumFirms = M_NUMFIRMS; +const size_t kNumGovts = M_NUMGOVERNMENTS; +const float kMaxTime = M_MAXTIME; +const float kCrraParam = M_CRRA_PARAM; +const float kInterestRate = M_INTERESTRATE; +const size_t kNumAgents = kNumConsumers + kNumFirms + kNumGovts; +const bool kIncentivizeFirmActivity = M_SHOULDBOOSTFIRMREWARD; +const float kFirmBoostRewardFactor = M_BOOSTFIRMREWARDFACTOR; +const bool kUseImporter = M_USEIMPORTER; +const float kImporterPrice = M_IMPORTERPRICE; +const float kImporterQuantity = M_IMPORTERQUANTITY; +const float kLaborFloor = M_LABORFLOOR; + +// Global state = +const size_t kNumPrices = kNumFirms; // - prices, +const size_t kNumWages = kNumFirms; // - wages, +const size_t kNumInventories = kNumFirms; // - stocks, +const size_t kNumOverdemandFlags = kNumFirms; // - good overdemanded flag, +const size_t kNumCorporateTaxes = kNumGovts; // - corporate tax rate +const size_t kNumIncomeTaxes = kNumGovts; // - income tax rate +const size_t kNumTimeDimensions = 1; // - time step +const size_t kGlobalStateSize = kNumPrices + kNumWages + kNumInventories + kNumOverdemandFlags + kNumCorporateTaxes + kNumIncomeTaxes + kNumTimeDimensions; + +const size_t kIdxPricesOffset = 0; +const size_t kIdxWagesOffset = kNumPrices; +const size_t kIdxStockOffset = kIdxWagesOffset + kNumInventories; +const size_t kIdxOverdemandOffset = kIdxStockOffset + kNumOverdemandFlags; +const size_t kIdxIncomeTaxOffset = kGlobalStateSize - 3; +const size_t kIdxCorporateTaxOffset = kGlobalStateSize - 2; +const size_t kIdxTimeOffset = kGlobalStateSize - 1; + +// Consumer actions: consume, work, choose which firm to work for +const size_t kActionSizeConsumer = kNumFirms + 1 + 1; + +// add budget and theta +const size_t kStateSizeConsumer = kGlobalStateSize + 1 + 1; +const size_t kIdxConsumerBudgetOffset = 0; + +// offset from agent-specific state part of array +const size_t kIdxConsumerThetaOffset = 1; + +// UNUSED for consumer. Actions are floats. +// __constant__ float cs_index_to_action[num_actions_consumer * +// kActionSizeConsumer]; const size_t kActionSizeConsumer = kNumFirms + +// kNumFirms; // consume + work +/*const size_t num_actions_consumer = + NUMACTIONSkConsumerType; // depends on discretization*/ + +// Firm actions: set wage, set price, invest in capital +const size_t kActionSizeFirm = 3; + +// Number of actions depends on discretization of continuous action space. +const size_t kNumActionsFirm = M_NUMACTIONSFIRM; + +// budget, capital, production alpha, and one-hot firm ID +const size_t kStateSizeFirm = kGlobalStateSize + 1 + 1 + 1 + kNumFirms; + +// offset from agent-specific state part of array +const size_t kIdxFirmBudgetOffset = 0; +const size_t kIdxFirmCapitalOffset = 1; +const size_t kIdxFirmAlphaOffset = 2; +const size_t kIdxFirmOnehotOffset = 3; + +// Constant memory available from ALL threads. +// See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#constant +__constant__ float kFirmIndexToAction[kNumActionsFirm * kActionSizeFirm]; + +// Corporate + income tax rates +const size_t kGovtActionSize = 2; +const size_t kNumActionsGovernment = M_NUMACTIONSGOVERNMENT; +const size_t kGovtStateSize = kGlobalStateSize; +__constant__ float + kGovernmentIndexToAction[kNumActionsGovernment * kGovtActionSize]; + +// One RNG state for each thread. Each thread is assigned to an agent in an env. +__device__ curandState_t + *rng_state_arr[kBatchSize * kNumAgents]; // not sure best way to do this + +// Offsets into action vectors +const size_t kIdxConsumerDemandedOffset = 0; +const size_t kIdxConsumerWorkedOffset = kNumFirms; +const size_t kIdxConsumerWhichFirmOffset = kNumFirms + 1; + +// currently 1 govt +const size_t kIdxThisThreadGovtId = 0; + +extern "C" { + +// ------------------ +// CUDA C Utilities +// ------------------ +__device__ void CopyFloatArraySlice(float *start_point, int num_elems, + float *destination) { + for (int i = 0; i < num_elems; i++) { + destination[i] = start_point[i]; + } +} + +__device__ void CopyIntArraySlice(int *start_point, int num_elems, + float *destination) { + for (int i = 0; i < num_elems; i++) { + destination[i] = start_point[i]; + } +} + +// unfortunately, you can't do templates with extern "C" linkage required for +// CUDA, so we have to define different functions for each case. +__device__ int *GetPointerFromMultiIndexFor3DIntTensor(int *array, + const dim3 &sizes, + const dim3 &index) { + unsigned int flat_index = + index.z + index.y * (sizes.z) + index.x * (sizes.z * sizes.y); + return &(array[flat_index]); +} + +__device__ float *GetPointerFromMultiIndexFor3DFloatTensor(float *array, + const dim3 &sizes, + const dim3 &index) { + unsigned int flat_index = + index.z + index.y * (sizes.z) + index.x * (sizes.z * sizes.y); + return &(array[flat_index]); +} + +__device__ float * +GetPointerFromMultiIndexFor4DTensor(float *array, const size_t *sizes, + const size_t *multi_index) { + // don't use this for arrays that arne't exactly size 4!!! + unsigned int flat_index = multi_index[3] + multi_index[2] * sizes[3] + + multi_index[1] * sizes[3] * sizes[2] + + multi_index[0] * sizes[3] * sizes[2] * sizes[1]; + return &(array[flat_index]); +} + +__global__ void CudaInitKernel(int seed) { + // we want to reset random seeds for all firms and consumers + int tidx = threadIdx.x; + const int kThisThreadGlobalArrayIdx = blockIdx.x * kNumAgents + threadIdx.x; + + if (tidx < kNumAgents) { + curandState_t *s = new curandState_t; + if (s != 0) { + curand_init(seed, kThisThreadGlobalArrayIdx, 0, s); + } + rng_state_arr[kThisThreadGlobalArrayIdx] = s; + } +} + +__global__ void CudaFreeRand() { + int tidx = threadIdx.x; + const int kThisThreadGlobalArrayIdx = blockIdx.x * kNumAgents + threadIdx.x; + + if (tidx < kNumAgents) { + curandState_t *s = rng_state_arr[kThisThreadGlobalArrayIdx]; + delete s; + } +} + +__device__ int SearchIndex(float *distr, float p, int l, int r) { + int mid; + int left = l; + int right = r; + + while (left <= right) { + mid = left + (right - left) / 2; + if (distr[mid] == p) { + return mid; + } else if (distr[mid] < p) { + left = mid + 1; + } else { + right = mid - 1; + } + } + return left > r ? r : left; +} + +// -------------------- +// Simulation Utilities +// -------------------- +__device__ AgentType GetAgentType(const int agent_id) { + if (agent_id < kNumConsumers) { + return kConsumerType; + } else if (agent_id < (kNumConsumers + kNumFirms)) { + return kFirmType; + } else { + return kGovernmentType; + } +} + +__device__ float GetCRRAUtil(float consumption, float crra_param) { + return (powf(consumption + 1, 1.0 - crra_param) - 1.0) / (1.0 - crra_param); +} + +__global__ void CudaResetEnv(float *cs_state_arr, float *fm_state_arr, + float *govt_state_arr, float *cs_state_ckpt_arr, + float *fm_state_ckpt_arr, + float *govt_state_ckpt_arr, + float theta_anneal_factor) { + /* + Resets the environment by writing the initial state (checkpoint) into the + state array for this agent (thread). + */ + + const int kBlockId = blockIdx.x; + const int kWithinBlockAgentId = threadIdx.x; + + if (kWithinBlockAgentId >= kNumAgents) { + return; + } + + AgentType ThisThreadAgentType = GetAgentType(kWithinBlockAgentId); + + float *state_arr; + float *ckpt_arr; + dim3 my_state_shape, my_state_idx; + size_t my_state_size; + + if (ThisThreadAgentType == kConsumerType) { + // This thread/agent is a consumer. + my_state_size = kStateSizeConsumer; + my_state_shape = {kBatchSize, kNumConsumers, kStateSizeConsumer}; + my_state_idx = {kBlockId, kWithinBlockAgentId, 0}; + state_arr = cs_state_arr; + ckpt_arr = cs_state_ckpt_arr; + } else if (ThisThreadAgentType == kFirmType) { + // This thread/agent is a firm. + my_state_size = kStateSizeFirm; + my_state_shape = {kBatchSize, kNumFirms, kStateSizeFirm}; + my_state_idx = {kBlockId, + (unsigned int)(kWithinBlockAgentId - kNumConsumers), 0}; + state_arr = fm_state_arr; + ckpt_arr = fm_state_ckpt_arr; + } else { + // This thread/agent is government. + my_state_size = kGovtStateSize; + my_state_shape = {kBatchSize, kNumGovts, kGovtStateSize}; + my_state_idx = { + kBlockId, + (unsigned int)(kWithinBlockAgentId - kNumConsumers - kNumFirms), 0}; + state_arr = govt_state_arr; + ckpt_arr = govt_state_ckpt_arr; + } + + float *my_state_arr = GetPointerFromMultiIndexFor3DFloatTensor( + state_arr, my_state_shape, my_state_idx); + + float *my_ckpt_ptr = GetPointerFromMultiIndexFor3DFloatTensor( + ckpt_arr, my_state_shape, my_state_idx); + + CopyFloatArraySlice(my_ckpt_ptr, my_state_size, my_state_arr); + + // anneal theta + if (ThisThreadAgentType == kConsumerType) { + my_state_arr[kGlobalStateSize + kIdxConsumerThetaOffset] *= + theta_anneal_factor; + } +} + +__device__ void GetAction(float *action_arr, float *index_to_action_arr, + int index, int agent_idx, int agent_action_size) { + + // it needs to be possible to call this for either agents or firms + // Note: each thread is an agent. + for (int i = 0; i < agent_action_size; i++) { + action_arr[agent_idx * agent_action_size + i] = + index_to_action_arr[index * agent_action_size + i]; + } +} + +__global__ void CudaSampleFirmAndGovernmentActions( + float *fm_distr, int *fm_action_indices_arr, float *fm_actions_arr, + float *govt_distr, int *govt_action_indices_arr, float *govt_actions_arr) { + // Samples actions for firms and governments. Consumer actions are sampled in + // Pytorch... + const int kWithinBlockAgentId = threadIdx.x; + + // Unused threads should not do anything. + if (threadIdx.x >= kNumAgents) { + return; + } + + AgentType ThisThreadAgentType = GetAgentType(kWithinBlockAgentId); + + // Index into rand states array + int kThisThreadGlobalArrayIdx = blockIdx.x * kNumAgents + threadIdx.x; + curandState_t rng_state = *rng_state_arr[kThisThreadGlobalArrayIdx]; + *rng_state_arr[kThisThreadGlobalArrayIdx] = rng_state; + + // float cs_cum_dist[num_actions_consumer]; + float fm_cum_dist[kNumActionsFirm]; + float govt_cum_dist[kNumActionsGovernment]; + + float *my_cumul_dist; + float *my_dist; + int *my_indices; + float *my_actions; + float *index_to_action; + int this_thread_global_array_idx; + size_t my_num_actions; + int my_action_size; + + // Consumers have multiple action heads, hence sampling is more complicated. + if (ThisThreadAgentType == kConsumerType) { + return; + } else if (ThisThreadAgentType == kFirmType) { + // on firm thread + my_cumul_dist = fm_cum_dist; + my_dist = fm_distr; + my_indices = fm_action_indices_arr; + my_actions = fm_actions_arr; + my_num_actions = kNumActionsFirm; + index_to_action = kFirmIndexToAction; + my_action_size = kActionSizeFirm; + this_thread_global_array_idx = + (blockIdx.x * kNumFirms) + (threadIdx.x - kNumConsumers); + } else { + my_cumul_dist = govt_cum_dist; + my_dist = govt_distr; + my_indices = govt_action_indices_arr; + my_actions = govt_actions_arr; + my_num_actions = kNumActionsGovernment; + index_to_action = kGovernmentIndexToAction; + my_action_size = kGovtActionSize; + this_thread_global_array_idx = + (blockIdx.x * kNumGovts) + (threadIdx.x - kNumConsumers - kNumFirms); + } + + // Compute CDF + my_cumul_dist[0] = my_dist[this_thread_global_array_idx * my_num_actions]; + for (int i = 1; i < my_num_actions; i++) { + my_cumul_dist[i] = + my_dist[this_thread_global_array_idx * my_num_actions + i] + + my_cumul_dist[i - 1]; + } + + // Given sampled action which is a float in [0, 1], find the corresponding + // discrete action. + float sampled_float = curand_uniform(&rng_state); + const int index = + SearchIndex(my_cumul_dist, sampled_float, 0, (int)(my_num_actions - 1)); + my_indices[this_thread_global_array_idx] = index; + GetAction(my_actions, index_to_action, index, this_thread_global_array_idx, + my_action_size); +} + +__device__ float GetFirmProduction(float technology, float capital, float hours, + float alpha) { + if (hours < kLaborFloor) { + hours = 0.0; + } + return technology * powf(capital, 1.0 - alpha) * powf(hours, alpha); +} + +// -------------------- +// Simulation Logic +// -------------------- +__global__ void +CudaStep(float *cs_state_arr, float *cs_actions_arr, float *cs_rewards_arr, + float *cs_state_arr_batch, float *cs_rewards_arr_batch, + + float *fm_state_arr, int *fm_action_indices_arr, float *fm_actions_arr, + float *fm_rewards_arr, float *fm_state_arr_batch, + int *fm_actions_arr_batch, float *fm_rewards_arr_batch, + + float *govt_state_arr, int *govt_action_indices_arr, + float *govt_actions_arr, float *govt_rewards_arr, + float *govt_state_arr_batch, int *govt_actions_arr_batch, + float *govt_rewards_arr_batch, + float *consumer_aux_batch, + float *firm_aux_batch, + int iter) { + // This function should be called with 1 block per copy of the environment. + // Within a block, each agent should have a thread. + const int kWithinBlockAgentId = threadIdx.x; + + // return if we're on an extra thread not corresponding to an agent + if (kWithinBlockAgentId >= kNumAgents) { + return; + } + + // ------------------------------------- + // Start of variables and pointers defs. + // ------------------------------------- + + // __shared__ variables are block-local: can be seen by each thread ** in the + // block ** + __shared__ float gross_demand_arr[kNumFirms]; + __shared__ int num_consumer_demand_arr[kNumFirms]; + __shared__ float hours_worked_arr[kNumFirms]; + __shared__ float total_actually_consumer_arr[kNumFirms]; + __shared__ float bought_by_importer_arr[kNumFirms]; + __shared__ float next_global_state_arr[kGlobalStateSize]; + __shared__ float tax_revenue_arr[kNumGovts]; + __shared__ float total_utility_arr[kNumGovts]; + __shared__ bool need_to_ration_this_good_arr[kNumFirms]; // whether or not to + // ration good i + + float net_demand_arr[kNumFirms]; // amount demanded after budget constraints + // by a consumer (ignore for non-consumers) + + int num_iter = (int)kMaxTime; + AgentType ThisThreadAgentType = GetAgentType(kWithinBlockAgentId); + float this_agent_reward = 0.0; + + // pointer to start of state vector + // state vector consists of global state, then + // agent-specific state global part is of same size for + // all agents, but needs to be sliced out of different + // arrays depending on agent type + float *my_global_state_ptr; + + float *my_action_arr; + + // pointer to start of state vector in batch history + float *batch_state_ptr; + + // sizes and indices for strided array access + dim3 my_state_shape, my_state_idx, action_shape; + + // shape for batched array of scalars (action ind and reward) + dim3 batch_single_shape, single_idx; + + // pointer to action index + int *batch_action_value_ptr; + + // pointer to batch reward + float *batch_reward_value_ptr; + + // pointers to action index for current arrays + int *my_action_value_ptr; + + // pointers to reward index for current arrays + float *my_reward_value_ptr; + + float *my_aux_batch_ptr; + + // ----------------------------------- + // End of variables and pointers defs. + // ----------------------------------- + + if (ThisThreadAgentType == kConsumerType) { + // get current state + my_state_shape = {kBatchSize, kNumConsumers, kStateSizeConsumer}; + my_state_idx = {blockIdx.x, threadIdx.x, 0}; + my_global_state_ptr = GetPointerFromMultiIndexFor3DFloatTensor( + cs_state_arr, my_state_shape, my_state_idx); // index) + + // get current action + action_shape = {kBatchSize, kNumConsumers, kActionSizeConsumer}; + my_action_arr = GetPointerFromMultiIndexFor3DFloatTensor( + cs_actions_arr, action_shape, my_state_idx); + + // index into the episode history and save prev state into it + size_t my_batch_state_shape[] = {kBatchSize, num_iter, kNumConsumers, + kStateSizeConsumer}; + size_t my_batch_state_idx[] = {blockIdx.x, iter, threadIdx.x, 0}; + batch_state_ptr = GetPointerFromMultiIndexFor4DTensor( + cs_state_arr_batch, my_batch_state_shape, my_batch_state_idx); + CopyFloatArraySlice(my_global_state_ptr, kStateSizeConsumer, + batch_state_ptr); + + size_t my_aux_batch_shape[] = {kBatchSize, num_iter, kNumConsumers, kNumFirms}; + my_aux_batch_ptr = GetPointerFromMultiIndexFor4DTensor( + consumer_aux_batch, my_aux_batch_shape, my_batch_state_idx + ); + + + // Extract pointers to rewards, batch and current + batch_single_shape = {kBatchSize, (unsigned int)num_iter, kNumConsumers}; + single_idx = {blockIdx.x, (unsigned int)iter, threadIdx.x}; + + batch_reward_value_ptr = GetPointerFromMultiIndexFor3DFloatTensor( + cs_rewards_arr_batch, batch_single_shape, single_idx); + + my_reward_value_ptr = + &(cs_rewards_arr[blockIdx.x * kNumConsumers + threadIdx.x]); + } + + if (ThisThreadAgentType == kFirmType) { + // get current state + size_t this_thread_firm_id = (threadIdx.x - kNumConsumers); + my_state_shape = {kBatchSize, kNumFirms, kStateSizeFirm}; + my_state_idx = {blockIdx.x, (unsigned int)this_thread_firm_id, 0}; + my_global_state_ptr = GetPointerFromMultiIndexFor3DFloatTensor( + fm_state_arr, my_state_shape, my_state_idx); + + // get current action + action_shape = {kBatchSize, kNumFirms, kActionSizeFirm}; + my_action_arr = GetPointerFromMultiIndexFor3DFloatTensor( + fm_actions_arr, action_shape, my_state_idx); + + // index into the episode history and save prev state into it + size_t my_batch_state_shape[] = {kBatchSize, num_iter, kNumFirms, + kStateSizeFirm}; + size_t my_batch_state_idx[] = {blockIdx.x, iter, this_thread_firm_id, 0}; + batch_state_ptr = GetPointerFromMultiIndexFor4DTensor( + fm_state_arr_batch, my_batch_state_shape, my_batch_state_idx); + CopyFloatArraySlice(my_global_state_ptr, kStateSizeFirm, batch_state_ptr); + + dim3 my_aux_batch_shape = {kBatchSize, num_iter, kNumFirms}; + dim3 aux_batch_idx = {blockIdx.x, iter, this_thread_firm_id}; + my_aux_batch_ptr = GetPointerFromMultiIndexFor3DFloatTensor( + firm_aux_batch, my_aux_batch_shape, aux_batch_idx + ); + // extract pointers to action indices and rewards, batch and current + batch_single_shape = {kBatchSize, (unsigned int)num_iter, kNumFirms}; + single_idx = {blockIdx.x, (unsigned int)iter, + (unsigned int)this_thread_firm_id}; + batch_action_value_ptr = GetPointerFromMultiIndexFor3DIntTensor( + fm_actions_arr_batch, batch_single_shape, single_idx); + batch_reward_value_ptr = GetPointerFromMultiIndexFor3DFloatTensor( + fm_rewards_arr_batch, batch_single_shape, single_idx); + + const int kThisThreadFirmIdx = blockIdx.x * kNumFirms + this_thread_firm_id; + my_action_value_ptr = &(fm_action_indices_arr[kThisThreadFirmIdx]); + my_reward_value_ptr = &(fm_rewards_arr[kThisThreadFirmIdx]); + } + + if (ThisThreadAgentType == kGovernmentType) { + + int this_thread_govt_id = (threadIdx.x - kNumConsumers - kNumFirms); + + my_state_shape = {kBatchSize, kNumGovts, kGovtStateSize}; + my_state_idx = {blockIdx.x, (unsigned int)this_thread_govt_id, 0}; + my_global_state_ptr = GetPointerFromMultiIndexFor3DFloatTensor( + govt_state_arr, my_state_shape, my_state_idx); // index) + + // get current action + action_shape = {kBatchSize, kNumGovts, kGovtActionSize}; + my_action_arr = GetPointerFromMultiIndexFor3DFloatTensor( + govt_actions_arr, action_shape, my_state_idx); + + // index into the episode history and save prev state into it + size_t my_batch_state_shape[] = {kBatchSize, num_iter, kNumGovts, + kGovtStateSize}; + size_t my_batch_state_idx[] = {blockIdx.x, iter, this_thread_govt_id, 0}; + batch_state_ptr = GetPointerFromMultiIndexFor4DTensor( + govt_state_arr_batch, my_batch_state_shape, my_batch_state_idx); + CopyFloatArraySlice(my_global_state_ptr, kGovtStateSize, batch_state_ptr); + + // extract pointers to action indices and rewards, batch and current + batch_single_shape = {kBatchSize, (unsigned int)num_iter, kNumGovts}; + single_idx = {blockIdx.x, (unsigned int)iter, + (unsigned int)this_thread_govt_id}; + batch_action_value_ptr = GetPointerFromMultiIndexFor3DIntTensor( + govt_actions_arr_batch, batch_single_shape, single_idx); + batch_reward_value_ptr = GetPointerFromMultiIndexFor3DFloatTensor( + govt_rewards_arr_batch, batch_single_shape, single_idx); + + const int kThisThreadGovtIdx = blockIdx.x * kNumGovts + this_thread_govt_id; + my_action_value_ptr = &(govt_action_indices_arr[kThisThreadGovtIdx]); + my_reward_value_ptr = &(govt_rewards_arr[kThisThreadGovtIdx]); + } + + // ---------------------------------------------- + // State pointers and variables + // Create pointers to agent-specific state that will be updated by the + // simulation logic. + // ---------------------------------------------- + float *my_state_arr = &(my_global_state_ptr[kGlobalStateSize]); + float *prices_arr = &(my_global_state_ptr[kIdxPricesOffset]); + float *wages_arr = &(my_global_state_ptr[kIdxWagesOffset]); + float *available_stock_arr = &(my_global_state_ptr[kIdxStockOffset]); + float time = my_global_state_ptr[kIdxTimeOffset]; + float income_tax_rate = my_global_state_ptr[kIdxIncomeTaxOffset]; + float corporate_tax_rate = my_global_state_ptr[kIdxCorporateTaxOffset]; + + // ------------------------------- + // Safely initialize shared memory + // ------------------------------- + if (ThisThreadAgentType == kFirmType) { + int this_thread_firm_id = threadIdx.x - kNumConsumers; + gross_demand_arr[this_thread_firm_id] = 0.0; + num_consumer_demand_arr[this_thread_firm_id] = 0; + hours_worked_arr[this_thread_firm_id] = 0.0; + total_actually_consumer_arr[this_thread_firm_id] = 0.0; + need_to_ration_this_good_arr[this_thread_firm_id] = false; + } + + if (ThisThreadAgentType == kGovernmentType) { + tax_revenue_arr[0] = 0.0; + } + + __syncthreads(); + // ------------------------------------- + // End - Safely initialize shared memory + // ------------------------------------- + + // ------------------------------------- + // Process actions + // ------------------------------------- + if (ThisThreadAgentType == kConsumerType) { + // amount demanded is just the first part of the action vector + float *this_agent_gross_demand_arr = + &(my_action_arr[kIdxConsumerDemandedOffset]); + const float this_agent_hours_worked = + my_action_arr[kIdxConsumerWorkedOffset]; + const int worked_for_this_firm_id = + (int)my_action_arr[kIdxConsumerWhichFirmOffset]; + + // here, need to scale demands to meet the budget. put them in a local array + // *budgetDemanded logic should be: compute total expenditure given prices. + // if less than budget, copy existing demands + float __cost_of_demand = 0.0; + for (int i = 0; i < kNumFirms; i++) { + __cost_of_demand += this_agent_gross_demand_arr[i] * prices_arr[i]; + } + + // Scale demand to ensure that total demand at most equals total supply + // we want: my_state_arr being 0 always sends __scale_factor to 0 + float __scale_factor = 1.0; + + if ((__cost_of_demand > 0.0) && (__cost_of_demand > my_state_arr[0])) { + __scale_factor = my_state_arr[0] / __cost_of_demand; + } + + // otherwise scale all demands down to meet budget + // copy them into a demanded array + for (int i = 0; i < kNumFirms; i++) { + net_demand_arr[i] = __scale_factor * this_agent_gross_demand_arr[i]; + } + + // adding up demand across threads **in the block** + // somehow store amount demanded per firm in an array demanded (copy from + // action) also store amount worked per firm in array worked + + // Every thread executes atomicAdd_block in a memory-safe way. + for (int i = 0; i < kNumFirms; i++) { + // sum across threads in block + atomicAdd_block(&(gross_demand_arr[i]), net_demand_arr[i]); + + // increment count of consumers who want good i + if (net_demand_arr[i] > 0) { + atomicAdd_block(&(num_consumer_demand_arr[i]), 1); + } + } + + // increment total hours worked for firm i + atomicAdd_block(&(hours_worked_arr[worked_for_this_firm_id]), + this_agent_hours_worked); + } + + // wait for everyone to finish tallying up their adding + __syncthreads(); + + if (ThisThreadAgentType == kFirmType) { + // check each firm if rationing needed + int this_thread_firm_id = threadIdx.x - kNumConsumers; + need_to_ration_this_good_arr[this_thread_firm_id] = + ((gross_demand_arr[this_thread_firm_id] > 0.0) && (gross_demand_arr[this_thread_firm_id] > + available_stock_arr[this_thread_firm_id])); + } + + // wait for single thread to finish checking demands + __syncthreads(); + + // ---------------------------------------- + // Consumers: Rationing demand + Utility + // ---------------------------------------- + // Logic: + // case 1: no overdemand + // case 2: overdemand, but some want less than 1/N -- fill everyone up to + // max(theirs, 1/N) case 3: overdemand, everyone wants more -- fill everyone + // up to max(theirs, 1/N) + float net_consumed_arr[kNumFirms]; // per consumer thread + // always add negligible positive money to avoid budgets becoming small + // negative numbers otherwise, when computing proportions, one may end up with + // negative stocks. + float cs_budget_delta = 0.01; + float fm_budget_delta = 0.01; + float capital_delta = 0.0; + + if (ThisThreadAgentType == kConsumerType) { + // find out how much consumed + for (int i = 0; i < kNumFirms; i++) { + float __ration_factor = 1.0; + + if (need_to_ration_this_good_arr[i]) { + // overdemanded + __ration_factor = available_stock_arr[i] / gross_demand_arr[i]; + } + + net_consumed_arr[i] = __ration_factor * net_demand_arr[i]; + + atomicAdd_block(&(total_actually_consumer_arr[i]), net_consumed_arr[i]); + } + + // store amount actually consumed for this consumer + CopyFloatArraySlice(net_consumed_arr, kNumFirms, my_aux_batch_ptr); + + // ---------------------------------------- + // Compute consumer utility + // ---------------------------------------- + float hours_worked = my_action_arr[kIdxConsumerWorkedOffset]; + int worked_for_this_firm_id = + (int)my_action_arr[kIdxConsumerWhichFirmOffset]; + + // budget is first elem of consumer state, theta second + float __theta = my_state_arr[1]; + + float __this_consumer_util = 0.0; + float __total_hours_worked = 0.0; + float __gross_income = 0.0; + + // Compute expenses + // Each consumer can consume from each firm, so loop over them. + for (int i = 0; i < kNumFirms; i++) { + __this_consumer_util += GetCRRAUtil(net_consumed_arr[i], kCrraParam); + cs_budget_delta -= prices_arr[i] * net_consumed_arr[i]; + } + + // Compute income + __total_hours_worked += hours_worked; + __gross_income += wages_arr[worked_for_this_firm_id] * hours_worked; + float __income_tax_paid = income_tax_rate * __gross_income; + cs_budget_delta += (__gross_income - __income_tax_paid); + + // Update tax revenue (government) + atomicAdd_block(&(tax_revenue_arr[kIdxThisThreadGovtId]), + __income_tax_paid); + + // Compute reward + this_agent_reward += + __this_consumer_util - (__theta / 2.0) * (__total_hours_worked); + } + + __syncthreads(); + + + // ---------------------------------------- + // Firms Exports: Add external consumption. + // ---------------------------------------- + if (ThisThreadAgentType == kFirmType ) { + const int this_thread_firm_id = threadIdx.x - kNumConsumers; + if (kUseImporter) { + // sell remaining goods, if any, to importer, if price is high enough. + float __this_firm_price = prices_arr[this_thread_firm_id]; + float __stock_after_consumers = available_stock_arr[this_thread_firm_id] - total_actually_consumer_arr[this_thread_firm_id]; + + if (__this_firm_price >= kImporterPrice) { + bought_by_importer_arr[this_thread_firm_id] = fmaxf(fminf(__stock_after_consumers, kImporterQuantity), 0.0); // floor to zero to avoid small negative floats + } + else { + bought_by_importer_arr[this_thread_firm_id] = 0.0; + } + } + else { + bought_by_importer_arr[this_thread_firm_id] = 0.0; + } + } + + // ---------------------------------------- + // Firms: Rationing demand + Utility + // ---------------------------------------- + if (ThisThreadAgentType == kFirmType) { + const int this_thread_firm_id = threadIdx.x - kNumConsumers; + + float __this_firm_revenue = + (total_actually_consumer_arr[this_thread_firm_id] + bought_by_importer_arr[this_thread_firm_id]) * + prices_arr[this_thread_firm_id]; + float __wages_paid = + hours_worked_arr[this_thread_firm_id] * wages_arr[this_thread_firm_id]; + + // Firms can invest in new capital. This increases their production factor + // (see GetFirmProduction). + // here, after consumers consume, if price is >= than importer price, importer consumes up to their maximum of the goods, at the importer price + + float __gross_income = __this_firm_revenue - __wages_paid; + capital_delta = fmaxf(my_action_arr[2] * __gross_income, 0.0); + float __gross_profit = __gross_income - capital_delta; + float __corp_tax_paid = corporate_tax_rate * fmaxf(__gross_profit, 0.0); + fm_budget_delta = (__gross_profit - __corp_tax_paid); + if (kIncentivizeFirmActivity) { + if ((fm_budget_delta + my_state_arr[0]) > 0.0) { // if positive budget + this_agent_reward += (kFirmBoostRewardFactor * __this_firm_revenue); + } + } + this_agent_reward += (__gross_profit - __corp_tax_paid); + + atomicAdd_block(&(tax_revenue_arr[0]), __corp_tax_paid); + + float __production = GetFirmProduction(0.01, my_state_arr[kIdxFirmCapitalOffset], + hours_worked_arr[this_thread_firm_id], my_state_arr[kIdxFirmAlphaOffset]); + + // ------------------- + // Update global state + // ------------------- + // update prices in global state + next_global_state_arr[kIdxPricesOffset + this_thread_firm_id] = + my_action_arr[0]; + // update wages in global state + next_global_state_arr[kIdxWagesOffset + this_thread_firm_id] = + my_action_arr[1]; + // update stocks in global state + next_global_state_arr[kIdxStockOffset + this_thread_firm_id] = + available_stock_arr[this_thread_firm_id] - + total_actually_consumer_arr[this_thread_firm_id] - bought_by_importer_arr[this_thread_firm_id] + __production; + + *my_aux_batch_ptr = bought_by_importer_arr[this_thread_firm_id]; + + // update overdemanded in global state + next_global_state_arr[kIdxOverdemandOffset + this_thread_firm_id] = + need_to_ration_this_good_arr[this_thread_firm_id] ? 1.0 : 0.0; + } + + // ----------------- + // Move time forward + // ----------------- + // Let first firm tick time + if (ThisThreadAgentType == kFirmType) { + const int this_thread_firm_id = threadIdx.x - kNumConsumers; + if (this_thread_firm_id == 0) { + next_global_state_arr[kIdxTimeOffset] = + my_global_state_ptr[kIdxTimeOffset] + 1.0; + } + } + + __syncthreads(); + + // ---------------------------------------- + // Subsidies + // ---------------------------------------- + // need to redistribute tax revenues + if (ThisThreadAgentType == kConsumerType) { + float __redistribution = tax_revenue_arr[0] / kNumConsumers; + cs_budget_delta += __redistribution; + } + + // ---------------------------------------- + // Social welfare + // ---------------------------------------- + // After this point consumers and firms know their final reward, so can inform + // the government thread via shared memory + + __syncthreads(); + + // ---------------------------------------- + // Government sets taxes for the next round + // ---------------------------------------- + if (ThisThreadAgentType == kGovernmentType) { + next_global_state_arr[kIdxIncomeTaxOffset] = my_action_arr[0]; + next_global_state_arr[kIdxCorporateTaxOffset] = my_action_arr[1]; + } + + __syncthreads(); + + // ----------------------------------------------- + // Copy next_global_state_arr into my global state + // ----------------------------------------------- + // All agents need to see the updated global state + CopyFloatArraySlice(next_global_state_arr, kGlobalStateSize, + my_global_state_ptr); + + // ----------------------------------------------- + // Update budgets + // ----------------------------------------------- + // Update budget (same for all agents) + if (ThisThreadAgentType == kConsumerType) { + my_state_arr[0] += cs_budget_delta; + } + if (ThisThreadAgentType == kFirmType) { + my_state_arr[0] += fm_budget_delta; + } + + // Add interest rate on savings + if ((ThisThreadAgentType == kConsumerType) || + (ThisThreadAgentType == kFirmType)) { + if (my_state_arr[0] > 0.0) { + my_state_arr[0] += my_state_arr[0] * kInterestRate; + } + } + + // Add new capital + if (ThisThreadAgentType == kFirmType) { + my_state_arr[kIdxFirmCapitalOffset] += capital_delta; + } + + // Add new capital + if ((ThisThreadAgentType == kFirmType) || + (ThisThreadAgentType == kGovernmentType)) { + *batch_action_value_ptr = *my_action_value_ptr; + } + + // Update rewards in global state + *my_reward_value_ptr = this_agent_reward; + *batch_reward_value_ptr = this_agent_reward; +} + +// ************************ +// End of extern "C" block. +// ************************ +} diff --git a/ai_economist/real_business_cycle/rbc/cuda_manager.py b/ai_economist/real_business_cycle/rbc/cuda_manager.py new file mode 100644 index 0000000..52b4503 --- /dev/null +++ b/ai_economist/real_business_cycle/rbc/cuda_manager.py @@ -0,0 +1,1930 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import itertools +import os +import random +from pathlib import Path + +import numpy as np +import pycuda +import pycuda.autoinit +import pycuda.driver as cuda_driver +import scipy +import scipy.stats +import torch +from pycuda.compiler import SourceModule +from torch.distributions import Categorical +from tqdm import tqdm + +from .constants import ( + consumer_state_scaling_factors, + firm_state_scaling_factors, + govt_state_scaling_factors, +) +from .networks import DeterministicPolicy, IndependentPolicyNet, PolicyNet +from .util import expand_to_digit_form, size_after_digit_expansion + +_NP_DTYPE = np.float32 + +# the below line is 'strangely' necessary to make PyTorch work with PyCUDA +pytorch_cuda_init_success = torch.cuda.FloatTensor(8) + + +# for opening source files within module +module_path = Path(__file__).parent + + +def interval_list_contains(interval_list, step): + for (lower, upper_non_inclusive) in interval_list: + if lower <= step < upper_non_inclusive: + return True + return False + + +class NoOpOptimizer: + """ + Dummy Optimizer. + """ + + def __init__(self): + pass + + def step(self): + pass + + +def seed_everything(seed): + torch.manual_seed(seed) + random.seed(seed) + np.random.seed(seed) + + +def reverse_cumsum(x): + # assumes summing along episode iteration dim + return x + torch.sum(x, dim=-2, keepdims=True) - torch.cumsum(x, dim=-2) + + +def discounted_returns(rewards, gamma): + maxt = rewards.shape[-2] + cumulative_rewards = 0 + returns = torch.zeros_like(rewards) + for t in reversed(range(maxt)): + returns[:, t, :] = rewards[:, t, :] + gamma * cumulative_rewards + cumulative_rewards = rewards[:, t, :] + cumulative_rewards + return returns + + +def compute_theta_coef(hparams_dict, episode): + anneal_dict = hparams_dict["agents"]["consumer_anneal_theta"] + if anneal_dict["anneal_on"]: + exp_decay_length_in_steps = anneal_dict["exp_decay_length_in_steps"] + theta_coef = np.float32(1.0 - (np.exp(-episode / exp_decay_length_in_steps))) + else: + return np.float32(1.0) + return theta_coef + + +def government_action_mask(hparams_dict, step): + government_actions_array = hparams_dict["agents"]["government_actions_array"] + tax_annealing_params = hparams_dict["agents"]["government_anneal_taxes"] + + income_tax = torch.tensor(government_actions_array[:, 0]).cuda() + corporate_tax = torch.tensor(government_actions_array[:, 1]).cuda() + mask = torch.zeros(income_tax.shape[0]).cuda() + + if not tax_annealing_params["anneal_on"]: + return None + a0 = tax_annealing_params["start"] + max_tax = tax_annealing_params["increase_const"] * step + a0 + mask[(income_tax > max_tax) | (corporate_tax > max_tax)] -= 1000.0 + + return mask + + +def firm_action_mask(hparams_dict, step): + # pick out all firm actions where wage is the wrong height, + # and assign -1000.0 to those + firm_actions_array = hparams_dict["agents"]["firm_actions_array"] + wage_annealing_params = hparams_dict["agents"]["firm_anneal_wages"] + price_annealing_params = hparams_dict["agents"]["firm_anneal_prices"] + wages = torch.tensor(firm_actions_array[:, 1]).cuda() + prices = torch.tensor(firm_actions_array[:, 0]).cuda() + mask = torch.zeros(wages.shape[0]).cuda() + + if not (wage_annealing_params["anneal_on"] or price_annealing_params["anneal_on"]): + return None + + if wage_annealing_params["anneal_on"]: + a0 = wage_annealing_params["start"] + max_wage = wage_annealing_params["increase_const"] * step + a0 + min_wage = -wage_annealing_params["decrease_const"] * step + a0 + mask[(wages < min_wage) | (wages > max_wage)] -= 1000.0 + if price_annealing_params["anneal_on"]: + a0 = price_annealing_params["start"] + max_price = price_annealing_params["increase_const"] * step + a0 + min_price = -price_annealing_params["decrease_const"] * step + a0 + mask[(prices < min_price) | (prices > max_price)] -= 1000.0 + + return mask + + +def get_cuda_code(rel_path_to_cu_file, **preprocessor_vars_to_replace): + with open(module_path / rel_path_to_cu_file) as cudasource: + code_string = cudasource.read() + + # format for preprocessor macros in firm_rbc.cu is M_VARNAME. + # Specify all these as args to nvcc. + options_list = [ + f"-D M_{k.upper()}={v}" for k, v in preprocessor_vars_to_replace.items() + ] + + return code_string, options_list + + +def add_penalty_for_no_ponzi( + states, rewards, budget_offset, penalty_coef=20.0, penalty_scale=100.0 +): + budget_violations = -torch.clamp_max(states[..., budget_offset], 0.0) + rewards[:, -1, :] -= penalty_coef * budget_violations / penalty_scale + + +def update_government_rewards( + government_rewards, consumer_rewards, firm_rewards, cfg_dict +): + assert ( + government_rewards == 0.0 + ).all() # govt should have been assigned exactly 0 in cuda step function + total_rewards = consumer_rewards.sum(dim=-1) + if cfg_dict["agents"]["government_counts_firm_reward"] == 1: + total_rewards = total_rewards + cfg_dict["agents"].get( + "firm_reward_for_government_factor", 1.0 + ) * firm_rewards.sum(dim=-1) + + government_rewards[..., 0] = total_rewards[:] # one govt for now + + +def update_penalty_coef( + states, + budget_offset, + prev_penalty_coef, + penalty_step_size=0.01, + penalty_scale=100.0, +): + budget_violations = -torch.clamp_max(states[..., budget_offset], 0.0) + new_penalty_coef = ( + prev_penalty_coef + + penalty_step_size * (budget_violations / penalty_scale).mean().item() + ) + return new_penalty_coef + + +def get_actions_from_inds(action_inds, agents_dict): + + _action_inds = action_inds.cpu().to(torch.long) + + consumption_action_tensor = torch.tensor( + agents_dict["consumer_consumption_actions_array"] + ) + + work_action_tensor = torch.tensor(agents_dict["consumer_work_actions_array"]) + num_firms = agents_dict["num_firms"] + out_shape = _action_inds.shape[:-1] + (agents_dict["consumer_action_dim"],) + consumer_actions_out = torch.zeros(out_shape) + idx_hours_worked = num_firms + idx_which_firm = num_firms + 1 + + for i in range(num_firms): + consumer_actions_out[..., i] = consumption_action_tensor[ + _action_inds[..., i], : + ].squeeze(dim=-1) + + consumer_actions_out[..., num_firms] = work_action_tensor[ + _action_inds[..., idx_hours_worked], : + ].squeeze(dim=-1) + + consumer_actions_out[..., (num_firms + 1)] = _action_inds[..., idx_which_firm] + + return consumer_actions_out + + +def anneal_entropy_coef(entropy_dict, step): + if entropy_dict is None: + return 1.0 + + if entropy_dict["anneal_on"]: + coef_floor = entropy_dict.get("coef_floor", 0.0) + return max( + np.exp(-step / entropy_dict["exp_decay_length_in_steps"]), coef_floor + ) + return 1.0 + + +def get_grad_norm(policy): + grad_norm = 0.0 + for p in list(filter(lambda p: p.grad is not None, policy.parameters())): + grad_norm += (p.grad.data.norm(2).item()) ** 2 + return grad_norm + + +def get_ev(adv, returns, cutoff=-1.0): + return max(cutoff, (1 - (adv.detach().var() / returns.detach().var())).item()) + + +def consumer_ppo_step( + policy, + states, + actions, + rewards, + optimizer, + gamma_const, + entropy_val=0.0, + value_loss_weight=1.0, + ppo_num_updates=3, + reward_scale=1.0, + clip_grad_norm=None, + clip_param=0.1, +): + # Get initial policy predictions + multi_action_probs, old_value_preds = policy(states) + + old_value_preds = old_value_preds.detach() + # Get returns + rescaled_rewards = rewards / reward_scale + G_discounted_returns = discounted_returns(rescaled_rewards, gamma_const) + + # Value function loss + + sum_old_log_probs = 0.0 + for action_ind, probs in enumerate(multi_action_probs): + _CategoricalDist = Categorical(probs) + sum_old_log_probs += -1.0 * _CategoricalDist.log_prob(actions[..., action_ind]) + sum_old_log_probs = sum_old_log_probs.detach() + + assert not G_discounted_returns.requires_grad + assert not sum_old_log_probs.requires_grad + assert not old_value_preds.requires_grad + + # Compute ppo loss + for _ in range(ppo_num_updates): + multi_action_probs, value_preds = policy(states) + get_huber_loss = torch.nn.SmoothL1Loss() + value_pred_clipped = old_value_preds + (value_preds - old_value_preds).clamp( + -clip_param, clip_param + ) + value_loss_new = get_huber_loss( + value_preds.squeeze(dim=-1), G_discounted_returns + ) # can use huber loss instead + value_loss_clipped = get_huber_loss( + value_pred_clipped.squeeze(dim=-1), G_discounted_returns + ) + + value_loss = torch.max(value_loss_new, value_loss_clipped).mean() + + # Policy loss with value function baseline. + advantages = G_discounted_returns - value_preds.detach().squeeze(dim=-1) + # Don't propagate through to VF network. + assert not advantages.requires_grad + + # Trick: standardize advantages + standardized_advantages = (advantages - advantages.mean()) / ( + advantages.std() + 1e-6 + ) + sum_mean_entropy = 0.0 # mean over batch and agents + sum_neg_log_probs = 0.0 + + for action_ind, probs in enumerate(multi_action_probs): + _CategoricalDist = Categorical(probs) + sum_neg_log_probs += -1.0 * _CategoricalDist.log_prob( + actions[..., action_ind] + ) + sum_mean_entropy += _CategoricalDist.entropy().mean() + + assert sum_neg_log_probs.requires_grad + # note: log probs are negative, so negate again here + ratio = torch.exp(-sum_neg_log_probs + sum_old_log_probs) + surr1 = ratio * standardized_advantages + surr2 = ( + torch.clamp(ratio, 1.0 - clip_param, 1.0 + clip_param) + * standardized_advantages + ) + + ppo_loss = -torch.min(surr1, surr2).mean() + + loss = ( + ppo_loss - entropy_val * sum_mean_entropy + value_loss_weight * value_loss + ) + + # Apply gradients + optimizer.zero_grad() + loss.backward() + + if clip_grad_norm is not None: + torch.nn.utils.clip_grad_norm_(policy.parameters(), max_norm=clip_grad_norm) + + optimizer.step() + + +def ppo_step( + policy, + states, + actions, + rewards, + optimizer, + gamma_const, + entropy_val=0.0, + value_loss_weight=1.0, + ppo_num_updates=3, + actions_mask=None, + reward_scale=1.0, + clip_grad_norm=None, + clip_param=0.1, +): + # Get initial policy predictions + probs, old_value_preds = policy(states, actions_mask=actions_mask) + old_value_preds = old_value_preds.detach() + + # Get returns + rescaled_rewards = rewards / reward_scale + G_discounted_returns = discounted_returns(rescaled_rewards, gamma_const) + + # Value function loss + + _CategoricalDist = Categorical(probs) + old_log_probs = -1.0 * _CategoricalDist.log_prob(actions).detach() + + assert not G_discounted_returns.requires_grad + assert not old_log_probs.requires_grad + assert not old_value_preds.requires_grad + + # Compute ppo loss + for _ in range(ppo_num_updates): + probs, value_preds = policy(states, actions_mask=actions_mask) + get_huber_loss = torch.nn.SmoothL1Loss() + value_pred_clipped = old_value_preds + (value_preds - old_value_preds).clamp( + -clip_param, clip_param + ) + value_loss_new = get_huber_loss( + value_preds.squeeze(dim=-1), G_discounted_returns + ) # can use huber loss instead + value_loss_clipped = get_huber_loss( + value_pred_clipped.squeeze(dim=-1), G_discounted_returns + ) + + value_loss = torch.max(value_loss_new, value_loss_clipped).mean() + + # Policy loss with value function baseline. + advantages = G_discounted_returns - value_preds.detach().squeeze(dim=-1) + # Don't propagate through to VF network. + assert not advantages.requires_grad + + # Trick: standardize advantages + standardized_advantages = (advantages - advantages.mean()) / ( + advantages.std() + 1e-6 + ) + + _CategoricalDist = Categorical(probs) + neg_log_probs = -1.0 * _CategoricalDist.log_prob(actions) + mean_entropy = _CategoricalDist.entropy().mean() + + assert neg_log_probs.requires_grad + # note: log probs are negative, so negate again here + ratio = torch.exp(-neg_log_probs + old_log_probs) + surr1 = ratio * standardized_advantages + surr2 = ( + torch.clamp(ratio, 1.0 - clip_param, 1.0 + clip_param) + * standardized_advantages + ) + + ppo_loss = -torch.min(surr1, surr2).mean() + + loss = ppo_loss - entropy_val * mean_entropy + value_loss_weight * value_loss + + # Apply gradients + optimizer.zero_grad() + loss.backward() + + if clip_grad_norm is not None: + torch.nn.utils.clip_grad_norm_(policy.parameters(), max_norm=clip_grad_norm) + + optimizer.step() + + +def consumer_policy_gradient_step( + policy, + states, + actions, + rewards, + optimizer, + gamma_const, + entropy_val=0.0, + value_loss_weight=1.0, + reward_scale=1.0, + clip_grad_norm=None, +): + # Get policy and value predictions + multi_action_probs, value_preds = policy(states) + + # Get returns + rescaled_rewards = rewards / reward_scale + G_discounted_returns = discounted_returns(rescaled_rewards, gamma_const) + + # Value function loss + get_huber_loss = torch.nn.SmoothL1Loss() + value_loss = get_huber_loss( + value_preds.squeeze(dim=-1), G_discounted_returns + ).mean() # can use huber loss instead + + # Policy loss with value function baseline. + advantages = G_discounted_returns - value_preds.detach().squeeze(dim=-1) + # Don't propagate through to VF network. + assert not advantages.requires_grad + + # Trick: standardize advantages + standardized_advantages = (advantages - advantages.mean()) / ( + advantages.std() + 1e-6 + ) + + # Compute policy loss + sum_mean_entropy = 0.0 # mean over batch and agents + sum_neg_log_probs = 0.0 + + for action_ind, probs in enumerate(multi_action_probs): + _CategoricalDist = Categorical(probs) + sum_neg_log_probs += -1.0 * _CategoricalDist.log_prob(actions[..., action_ind]) + sum_mean_entropy += _CategoricalDist.entropy().mean() + + pg_loss = (sum_neg_log_probs * standardized_advantages).mean() + assert sum_neg_log_probs.requires_grad + + loss = pg_loss - entropy_val * sum_mean_entropy + value_loss_weight * value_loss + + # Apply gradients + optimizer.zero_grad() + loss.backward() + + if clip_grad_norm is not None: + torch.nn.utils.clip_grad_norm_(policy.parameters(), max_norm=clip_grad_norm) + + optimizer.step() + + +def policy_gradient_step( + policy, + states, + actions, + rewards, + optimizer, + gamma_const, + entropy_val=0.0, + value_loss_weight=1.0, + actions_mask=None, + reward_scale=1.0, + clip_grad_norm=None, +): + + # here, we must perform digit scaling + optimizer.zero_grad() + probs, value_preds = policy(states, actions_mask=actions_mask) + rewards = rewards / reward_scale + G_discounted_returns = discounted_returns(rewards, gamma_const) + get_huber_loss = torch.nn.SmoothL1Loss() + value_loss = get_huber_loss( + value_preds.squeeze(dim=-1), G_discounted_returns + ).mean() # can use huber loss instead + advantages = G_discounted_returns - value_preds.detach().squeeze( + dim=-1 + ) # compute advantages (don't propagate through to VF network) + assert not advantages.requires_grad + # mean and standardize advantages + standardized_advantages = (advantages - advantages.mean()) / ( + advantages.std() + 1e-6 + ) + assert not standardized_advantages.requires_grad + m = Categorical(probs) + pg_loss = (-m.log_prob(actions) * standardized_advantages).mean() + assert pg_loss.requires_grad + entropy_regularize = entropy_val * m.entropy().mean() + loss = pg_loss - entropy_regularize + value_loss_weight * value_loss + loss.backward() + + if clip_grad_norm is not None: + torch.nn.utils.clip_grad_norm_(policy.parameters(), max_norm=clip_grad_norm) + + optimizer.step() + + +def save_dense_log( + save_dir, + epi, + agent_type_arrays, + agent_action_arrays, + agent_aux_arrays, +): + print(f"Saving dense log at episode {epi}") + for agent_type in ["consumer", "firm", "government"]: + states_batch, actions_batch, rewards_batch = agent_type_arrays[agent_type] + aux_array = agent_aux_arrays[agent_type] + if aux_array is not None: + aux_array = aux_array.cpu().numpy() + np.savez( + str(Path(save_dir) / Path(f"episode_{epi}_{agent_type}.npz")), + states=states_batch.cpu().numpy(), + actions=actions_batch.cpu().numpy(), + rewards=rewards_batch.cpu().numpy(), + action_array=agent_action_arrays[agent_type], + aux_array=aux_array, + ) + + +def save_policy_parameters( + save_dir, + epi, + consumer_policy, + firm_policy, + government_policy, + freeze_firms, + freeze_govt, +): + print(f"saving model parameters at episode {epi}") + consumer_path = ( + Path(save_dir) / Path("saved_models") / Path(f"consumer_policy_{epi}.pt") + ) + + # always save the latest, to be overwritten later + consumer_path_latest = ( + Path(save_dir) / Path("saved_models") / Path("consumer_policy_latest.pt") + ) + os.makedirs(consumer_path.parent, exist_ok=True) + torch.save(consumer_policy.state_dict(), consumer_path) + torch.save(consumer_policy.state_dict(), consumer_path_latest) + + if freeze_firms is None: + firm_path = ( + Path(save_dir) / Path("saved_models") / Path(f"firm_policy_{epi}.pt") + ) + firm_path_latest = ( + Path(save_dir) / Path("saved_models") / Path("firm_policy_latest.pt") + ) + + os.makedirs(firm_path.parent, exist_ok=True) + torch.save(firm_policy.state_dict(), firm_path) + torch.save(firm_policy.state_dict(), firm_path_latest) + if freeze_govt is None: + government_path = ( + Path(save_dir) / Path("saved_models") / Path(f"government_policy_{epi}.pt") + ) + government_path_latest = ( + Path(save_dir) / Path("saved_models") / Path("government_policy_latest.pt") + ) + + os.makedirs(government_path.parent, exist_ok=True) + torch.save(government_policy.state_dict(), government_path) + torch.save(government_policy.state_dict(), government_path_latest) + + +class ConsumerFirmRunManagerBatchParallel: + """ + The Real Business Cycle Experiment Management Class. + """ + + def __init__(self, cfg_dict, freeze_firms=None, freeze_govt=None): + self.cfg_dict = cfg_dict + self.train_dict = cfg_dict["train"] + self.agents_dict = cfg_dict["agents"] + self.world_dict = cfg_dict["world"] + self.save_dense_every = self.train_dict["save_dense_every"] + self.save_dir = self.train_dict["save_dir"] + + self.freeze_firms = freeze_firms + self.freeze_govt = freeze_govt + + self.__init_cuda_functions() + self.__init_cuda_data_structs() + self.__init_torch_data() + + def __init_cuda_data_structs(self): + __td = self.train_dict + __ad = self.agents_dict + __wd = self.world_dict + batch_size = __td["batch_size"] + num_consumers = __ad["num_consumers"] + num_firms = __ad["num_firms"] + num_governments = __ad["num_governments"] + firm_action_dim = __ad["firm_action_dim"] + government_action_dim = __ad["government_action_dim"] + consumer_state_dim = __ad["consumer_state_dim"] + firm_state_dim = __ad["firm_state_dim"] + government_state_dim = __ad["government_state_dim"] + global_state_dim = __ad["global_state_dim"] + consumer_endowment = __wd["initial_consumer_endowment"] + firm_endowment = __wd["initial_firm_endowment"] + initial_stocks = __wd["initial_stocks"] + initial_wages = __wd["initial_wages"] + initial_prices = __wd["initial_prices"] + consumer_theta = __wd["consumer_theta"] + + consumer_rewards = np.zeros((batch_size, num_consumers), dtype=_NP_DTYPE) + consumer_states = np.zeros( + (batch_size, num_consumers, consumer_state_dim), dtype=_NP_DTYPE + ) + + firm_action_indices = np.zeros((batch_size, num_firms), dtype=np.int32) + firm_actions = np.zeros( + (batch_size, num_firms, firm_action_dim), dtype=_NP_DTYPE + ) + firm_rewards = np.zeros((batch_size, num_firms), dtype=_NP_DTYPE) + firm_states = np.zeros((batch_size, num_firms, firm_state_dim), dtype=_NP_DTYPE) + + government_action_indices = np.zeros( + (batch_size, num_governments), dtype=np.int32 + ) + government_actions = np.zeros( + (batch_size, num_governments, government_action_dim), dtype=_NP_DTYPE + ) + government_rewards = np.zeros((batch_size, num_governments), dtype=_NP_DTYPE) + government_states = np.zeros( + (batch_size, num_governments, government_state_dim), dtype=_NP_DTYPE + ) + + # initialize states to right values here + + # global state init + # for consumers, firms, and governments + for state_arr in [consumer_states, firm_states, government_states]: + # set prices to 1.0 + state_arr[:, :, 0:num_firms] = initial_prices + # set wages to 0.0 + state_arr[:, :, num_firms : (2 * num_firms)] = initial_wages + # set stocks to 0.0 + state_arr[:, :, (2 * num_firms) : (3 * num_firms)] = initial_stocks + # set goods overdemanded to 0.0 + state_arr[:, :, (3 * num_firms) : (4 * num_firms)] = 0.0 + # set taxes to 0.0 + state_arr[:, :, (4 * num_firms)] = 0.0 + state_arr[:, :, (4 * num_firms) + 1] = 0.0 + + # consumer states, set theta and initial budget + if "paretoscaletheta" in __wd: + pareto_vals = np.expand_dims( + scipy.stats.pareto.ppf( + (np.arange(num_consumers) / num_consumers), __wd["paretoscaletheta"] + ), + axis=0, + ) + consumer_states[:, :, consumer_state_dim - 1] = consumer_theta * ( + 1.0 / pareto_vals + ) + else: + consumer_states[:, :, consumer_state_dim - 1] = consumer_theta + consumer_states[:, :, global_state_dim] = consumer_endowment + + # firm states + # capital + if __wd.get("initial_capital", None) == "proportional": + for i in range(num_firms): + firm_states[:, i, global_state_dim + 1] = ((i + 1) / 10.0) * 2.0 + elif __wd.get("initial_capital", None) == "twolevel": + for i in range(num_firms): + if i < (num_firms // 2): + firm_states[:, i, global_state_dim + 1] = 5000 + else: + firm_states[:, i, global_state_dim + 1] = 10000 + else: + firm_states[:, :, global_state_dim + 1] = 1.0 + + # production alpha + if __wd["production_alpha"] == "proportional": + half_firms = num_firms // 2 + for i in range(num_firms): + firm_states[:, i, global_state_dim + 2] = ((i % half_firms) + 1) * 0.2 + elif __wd["production_alpha"] == "fixed_array": + alpha_arr = [0.2, 0.3, 0.4, 0.6, 0.8, 0.2, 0.3, 0.4, 0.6, 0.8] + for i in range(num_firms): + firm_states[:, i, global_state_dim + 2] = alpha_arr[i] + else: + for i in range(num_firms): + firm_states[:, i, global_state_dim + 2] = __wd["production_alpha"] + + # set one-hot fields correctly by index for each firm + onehot_rows = np.eye(num_firms) + firm_states[:, :, (global_state_dim + 3) :] = onehot_rows + firm_states[:, :, global_state_dim] = firm_endowment + + # government states + # for now, nothing beyond global state + + self.consumer_states_gpu_tensor = torch.from_numpy(consumer_states).cuda() + # these are now tensors bc sampling for consumers via pytorch + self.consumer_rewards_gpu_pycuda = cuda_driver.mem_alloc( + consumer_rewards.nbytes + ) + self.consumer_states_checkpoint_gpu_pycuda = cuda_driver.mem_alloc( + consumer_states.nbytes + ) + cuda_driver.memcpy_htod(self.consumer_rewards_gpu_pycuda, consumer_rewards) + cuda_driver.memcpy_htod( + self.consumer_states_checkpoint_gpu_pycuda, consumer_states + ) + + self.firm_states_gpu_tensor = torch.from_numpy(firm_states).cuda() + self.firm_action_indices_gpu_pycuda = cuda_driver.mem_alloc( + firm_action_indices.nbytes + ) + self.firm_actions_gpu_pycuda = cuda_driver.mem_alloc(firm_actions.nbytes) + self.firm_rewards_gpu_pycuda = cuda_driver.mem_alloc(firm_rewards.nbytes) + self.firm_states_checkpoint_gpu_pycuda = cuda_driver.mem_alloc( + firm_states.nbytes + ) + cuda_driver.memcpy_htod( + self.firm_action_indices_gpu_pycuda, firm_action_indices + ) + cuda_driver.memcpy_htod(self.firm_actions_gpu_pycuda, firm_actions) + cuda_driver.memcpy_htod(self.firm_rewards_gpu_pycuda, firm_rewards) + cuda_driver.memcpy_htod(self.firm_states_checkpoint_gpu_pycuda, firm_states) + + self.government_states_gpu_tensor = torch.from_numpy(government_states).cuda() + self.government_action_indices_gpu_pycuda = cuda_driver.mem_alloc( + government_action_indices.nbytes + ) + self.government_actions_gpu_pycuda = cuda_driver.mem_alloc( + government_actions.nbytes + ) + self.government_rewards_gpu_pycuda = cuda_driver.mem_alloc( + government_rewards.nbytes + ) + self.government_states_checkpoint_gpu_pycuda = cuda_driver.mem_alloc( + government_states.nbytes + ) + cuda_driver.memcpy_htod( + self.government_action_indices_gpu_pycuda, government_action_indices + ) + cuda_driver.memcpy_htod(self.government_actions_gpu_pycuda, government_actions) + cuda_driver.memcpy_htod(self.government_rewards_gpu_pycuda, government_rewards) + cuda_driver.memcpy_htod( + self.government_states_checkpoint_gpu_pycuda, government_states + ) + + def __init_torch_data(self): + + __td = self.train_dict + __ad = self.agents_dict + + batch_size = __td["batch_size"] + num_consumers = __ad["num_consumers"] + num_firms = __ad["num_firms"] + num_governments = __ad["num_governments"] + consumer_action_dim = __ad["consumer_action_dim"] + consumer_state_dim = __ad["consumer_state_dim"] + firm_state_dim = __ad["firm_state_dim"] + government_state_dim = __ad["government_state_dim"] + num_iters = int(self.world_dict["maxtime"]) + + consumer_states_batch = torch.zeros( + batch_size, + num_iters, + num_consumers, + consumer_state_dim, + dtype=torch.float32, + device="cpu", + ) + consumer_actions_single = torch.zeros( + batch_size, + num_consumers, + num_firms + 1 + 1, + dtype=torch.int32, + device="cpu", + ) + consumer_actions_batch = torch.zeros( + batch_size, + num_iters, + num_consumers, + num_firms + 1 + 1, + dtype=torch.int32, + device="cpu", + ) + + # auxiliary state info that is not part of observables. + # currently just the realized consumption + consumer_aux_batch = torch.zeros( + batch_size, + num_iters, + num_consumers, + num_firms, + dtype=torch.float32, + device="cpu", + ) + + consumer_rewards_batch = torch.zeros( + batch_size, num_iters, num_consumers, dtype=torch.float32, device="cpu" + ) + self.consumer_states_batch_gpu_tensor = consumer_states_batch.cuda() + self.consumer_actions_batch_gpu_tensor = consumer_actions_batch.cuda() + self.consumer_actions_index_single_gpu_tensor = consumer_actions_single.cuda() + self.consumer_actions_single_gpu_tensor = torch.zeros( + batch_size, + num_consumers, + consumer_action_dim, + dtype=torch.float32, + device="cpu", + ).cuda() + self.consumer_rewards_batch_gpu_tensor = consumer_rewards_batch.cuda() + self.consumer_aux_batch_gpu_tensor = consumer_aux_batch.cuda() + + firm_states_batch = torch.zeros( + batch_size, + num_iters, + num_firms, + firm_state_dim, + dtype=torch.float32, + device="cpu", + ) + firm_actions_batch = torch.zeros( + batch_size, num_iters, num_firms, dtype=torch.int32, device="cpu" + ) + firm_rewards_batch = torch.zeros( + batch_size, num_iters, num_firms, dtype=torch.float32, device="cpu" + ) + firm_aux_batch = torch.zeros( + batch_size, num_iters, num_firms, dtype=torch.float32, device="cpu" + ) + self.firm_states_batch = firm_states_batch.cuda() + self.firm_actions_batch = firm_actions_batch.cuda() + self.firm_rewards_batch = firm_rewards_batch.cuda() + self.firm_aux_batch = firm_aux_batch.cuda() + + government_states_batch = torch.zeros( + batch_size, + num_iters, + num_governments, + government_state_dim, + dtype=torch.float32, + device="cpu", + ) + government_actions_batch = torch.zeros( + batch_size, num_iters, num_governments, dtype=torch.int32, device="cpu" + ) + government_rewards_batch = torch.zeros( + batch_size, num_iters, num_governments, dtype=torch.float32, device="cpu" + ) + self.government_states_batch = government_states_batch.cuda() + self.government_actions_batch = government_actions_batch.cuda() + self.government_rewards_batch = government_rewards_batch.cuda() + + def __init_cuda_functions(self): + + __td = self.train_dict + __ad = self.agents_dict + __wd = self.world_dict + + if self.freeze_firms is not None: + countfirmreward = 0 + else: + countfirmreward = self.agents_dict["government_counts_firm_reward"] + + code, compiler_options = get_cuda_code( + Path("cuda") / Path("firm_rbc.cu"), + batchsize=__td["batch_size"], + numconsumers=__ad["num_consumers"], + numfirms=__ad["num_firms"], + numgovernments=__ad["num_governments"], + maxtime=__wd["maxtime"], + # numactionsconsumer=__ad["consumer_num_actions"], + numactionsconsumer=__ad["consumer_num_work_actions"], + numactionsfirm=__ad["firm_num_actions"], + numactionsgovernment=__ad["government_num_actions"], + interestrate=__wd["interest_rate"], + crra_param=__wd["crra_param"], + shouldboostfirmreward=int(__td["should_boost_firm_reward"]), + boostfirmrewardfactor=__td["boost_firm_reward_factor"], + countfirmreward=countfirmreward, + importerprice=__wd["importer_price"], + importerquantity=__wd["importer_quantity"], + laborfloor=__wd.get("labor_floor", 0.0), + useimporter=__wd["use_importer"], + ) + + mod = SourceModule(code, options=compiler_options, no_extern_c=True) + self.mod = mod + + # -------------------------------------------------------------------- + # Define Consumer actions -- maanged in Pytorch + # -------------------------------------------------------------------- + self.consumption_action_tensor = torch.tensor( + __ad["consumer_consumption_actions_array"].astype(_NP_DTYPE) + ).cuda() + self.work_action_tensor = torch.tensor( + __ad["consumer_work_actions_array"].astype(_NP_DTYPE) + ).cuda() + + # -------------------------------------------------------------------- + # Define Firm actions -- maanged in CUDA + # -------------------------------------------------------------------- + firm_index_to_action_gpu, _ = mod.get_global("kFirmIndexToAction") + cuda_driver.memcpy_htod( + firm_index_to_action_gpu, + __ad["firm_actions_array"].astype(_NP_DTYPE), + ) + + # -------------------------------------------------------------------- + # Define Govt actions -- maanged in CUDA + # -------------------------------------------------------------------- + government_index_to_action_gpu, _ = mod.get_global("kGovernmentIndexToAction") + cuda_driver.memcpy_htod( + government_index_to_action_gpu, + __ad["government_actions_array"].astype(_NP_DTYPE), + ) + + # -------------------------------------------------------------------- + # Get handles to CUDA methods + # -------------------------------------------------------------------- + self.cuda_init_random = mod.get_function("CudaInitKernel") + self.cuda_reset_env = mod.get_function("CudaResetEnv") + self.cuda_sample_actions = mod.get_function( + "CudaSampleFirmAndGovernmentActions" + ) + self.cuda_step = mod.get_function("CudaStep") + self.cuda_free_mem = mod.get_function("CudaFreeRand") + + def _update_consumer_actions_inplace(self): + # call after consumer_actions_single is updated + __ad = self.agents_dict + + # Add asserts when ``loading'' arrays + # assert consumption_action_array.shape == (1, 1, 1) + # assert len(consumption_action_array.shape) == 3 + + num_firms = __ad["num_firms"] + idx_hours = num_firms + idx_which_firm = num_firms + 1 + for i in range(num_firms): + + consumption_actions_at_firm_i = ( + self.consumer_actions_index_single_gpu_tensor[..., i].to(torch.long) + ) + + self.consumer_actions_single_gpu_tensor[ + ..., i + ] = self.consumption_action_tensor[ + consumption_actions_at_firm_i, : + ].squeeze( + dim=-1 + ) + + consumer_hours_worked = self.consumer_actions_index_single_gpu_tensor[ + ..., idx_hours + ].to(torch.long) + + self.consumer_actions_single_gpu_tensor[ + ..., num_firms + ] = self.work_action_tensor[consumer_hours_worked, :].squeeze(dim=-1) + + self.consumer_actions_single_gpu_tensor[ + ..., num_firms + 1 + ] = self.consumer_actions_index_single_gpu_tensor[..., idx_which_firm] + + def sample_consumer_actions_and_store(self, consumer_probs_list): + # Every consumer has A action heads, output as a list of tensors. + # Sample from each of these lists and store the results. + + with torch.no_grad(): + for i, probs in enumerate(consumer_probs_list): + dist = Categorical(probs) + samples = dist.sample() + self.consumer_actions_index_single_gpu_tensor[..., i] = samples + + self._update_consumer_actions_inplace() + + def consumers_will_train_this_episode(self, epi): + __ad = self.agents_dict + if "training_schedule_mod" in self.agents_dict: + mod_val = epi % __ad["training_schedule_mod"] + return mod_val <= __ad["consumer_mod_threshold"] + if "consumer_training_list" in self.agents_dict: + return interval_list_contains(__ad["consumer_training_list"], epi) + if "train_consumers_every" in self.agents_dict: + mod_val = epi % __ad["train_consumers_every"] + else: + mod_val = 0 + return epi >= self.agents_dict.get("consumer_training_start", 0) and ( + mod_val == 0 + ) + + def firms_will_train_this_episode(self, epi): + __ad = self.agents_dict + if "training_schedule_mod" in self.agents_dict: + mod_val = epi % __ad["training_schedule_mod"] + return mod_val > __ad["consumer_mod_threshold"] + if "firm_training_list" in self.agents_dict: + return interval_list_contains(__ad["firm_training_list"], epi) and ( + self.freeze_firms is None + ) + if "train_firms_every" in self.agents_dict: + mod_val = epi % __ad["train_firms_every"] + else: + mod_val = 0 + return ( + (epi >= self.agents_dict.get("firm_training_start", 0)) + and (self.freeze_firms is None) + and (mod_val == 0) + ) + + def governments_will_train_this_episode(self, epi): + __ad = self.agents_dict + if "government_training_list" in self.agents_dict: + return interval_list_contains(__ad["government_training_list"], epi) and ( + self.freeze_govt is None + ) + if "train_government_every" in self.agents_dict: + mod_val = epi % self.agents_dict["train_government_every"] + else: + mod_val = 0 + return ( + (epi >= self.agents_dict.get("government_training_start", 0)) + and (self.freeze_govt is None) + and (mod_val == 0) + ) + + def bestresponse_train( + self, train_type, num_episodes, rollout_path, ep_str="latest", checkpoint=100 + ): + # train one single type only + # load all policies from state dict + # reset all the environment stuff + + __td = self.train_dict + __ad = self.agents_dict + num_iters = int(self.world_dict["maxtime"]) + num_consumers = __ad["num_consumers"] + num_firms = __ad["num_firms"] + num_governments = __ad["num_governments"] + num_agents = num_consumers + num_firms + num_governments + block = (num_agents, 1, 1) + grid = (__td["batch_size"], 1) + + seed_everything(__td["seed"]) + self.cuda_init_random(np.int32(__td["seed"]), block=block, grid=grid) + + # -------------------------------------------- + # Define Consumer policy + optimizers + # -------------------------------------------- + lr = __td["lr"] + + consumer_expanded_size = size_after_digit_expansion( + __ad["consumer_state_dim"], + __ad["consumer_digit_dims"], + __td["digit_representation_size"], + ) + + consumer_policy = IndependentPolicyNet( + consumer_expanded_size, + [__ad["consumer_num_consume_actions"]] * num_firms + + [ + __ad["consumer_num_work_actions"], + __ad["consumer_num_whichfirm_actions"], + ], + norm_consts=( + torch.zeros(consumer_expanded_size).cuda(), # don't center for now + consumer_state_scaling_factors(self.cfg_dict), + ), + ).to("cuda") + consumer_policy.load_state_dict( + torch.load( + rollout_path + / Path("saved_models") + / Path(f"consumer_policy_{ep_str}.pt") + ) + ) + + consumer_optim = torch.optim.Adam(consumer_policy.parameters(), lr=lr) + firm_expanded_size = size_after_digit_expansion( + __ad["firm_state_dim"], + __ad["firm_digit_dims"], + __td["digit_representation_size"], + ) + firm_policy = PolicyNet( + firm_expanded_size, + __ad["firm_num_actions"], + norm_consts=( + torch.zeros(firm_expanded_size).cuda(), + firm_state_scaling_factors(self.cfg_dict), + ), + ).to("cuda") + + firm_policy.load_state_dict( + torch.load( + rollout_path / Path("saved_models") / Path(f"firm_policy_{ep_str}.pt") + ) + ) + + firm_optim = torch.optim.Adam(firm_policy.parameters(), lr=lr) + government_expanded_size = size_after_digit_expansion( + __ad["government_state_dim"], + __ad["government_digit_dims"], + __td["digit_representation_size"], + ) + + government_policy = PolicyNet( + government_expanded_size, + __ad["government_num_actions"], + norm_consts=( + torch.zeros(government_expanded_size).cuda(), + govt_state_scaling_factors(self.cfg_dict), + ), + ).to("cuda") + + government_policy.load_state_dict( + torch.load( + rollout_path + / Path("saved_models") + / Path(f"government_policy_{ep_str}.pt") + ) + ) + + government_optim = torch.optim.Adam(government_policy.parameters(), lr=lr) + rewards = [] + + agent_type_arrays = { + "consumer": ( + self.consumer_states_batch_gpu_tensor, + self.consumer_actions_batch_gpu_tensor, + self.consumer_rewards_batch_gpu_tensor, + ), + "firm": ( + self.firm_states_batch, + self.firm_actions_batch, + self.firm_rewards_batch, + ), + "government": ( + self.government_states_batch, + self.government_actions_batch, + self.government_rewards_batch, + ), + } + + agent_action_arrays = { + "consumer": __ad["consumer_work_actions_array"], + "firm": __ad["firm_actions_array"], + "government": __ad["government_actions_array"], + } + + agent_aux_arrays = { + "consumer": (self.consumer_aux_batch_gpu_tensor), + "firm": (self.firm_aux_batch), + "government": None, + } + + pbar = tqdm(range(num_episodes)) + for epi in pbar: + annealed_entropy_coef = 0.1 # later, do some computation to anneal this + self.cuda_reset_env( + CudaTensorHolder(self.consumer_states_gpu_tensor), + CudaTensorHolder(self.firm_states_gpu_tensor), + CudaTensorHolder(self.government_states_gpu_tensor), + self.consumer_states_checkpoint_gpu_pycuda, + self.firm_states_checkpoint_gpu_pycuda, + self.government_states_checkpoint_gpu_pycuda, + np.float32(1.0), + block=block, + grid=grid, + ) + + for _iter in range(num_iters): + + # ------------------------ + # Run policy and get probs + # ------------------------ + with torch.no_grad(): + # here, we must perform digit scaling + consumer_probs_list, _ = consumer_policy( + expand_to_digit_form( + self.consumer_states_gpu_tensor, + __ad["consumer_digit_dims"], + __td["digit_representation_size"], + ) + ) + firm_probs, _ = firm_policy( + expand_to_digit_form( + self.firm_states_gpu_tensor, + __ad["firm_digit_dims"], + __td["digit_representation_size"], + ), + actions_mask=None, + ) + government_probs, _ = government_policy( + expand_to_digit_form( + self.government_states_gpu_tensor, + __ad["government_digit_dims"], + __td["digit_representation_size"], + ), + actions_mask=None, + ) + + # ------------------------ + # Get action samples + # ------------------------ + # Sample consumer actions using PyTorch here on GPU! + self.sample_consumer_actions_and_store(consumer_probs_list) + + # Sample firms + govt actions using PyCUDA on GPU! + self.cuda_sample_actions( + CudaTensorHolder(firm_probs), + self.firm_action_indices_gpu_pycuda, + self.firm_actions_gpu_pycuda, + CudaTensorHolder(government_probs), + self.government_action_indices_gpu_pycuda, + self.government_actions_gpu_pycuda, + block=block, + grid=grid, + ) + + # ------------------------ + # Step on GPU + # ------------------------ + self.cuda_step( + CudaTensorHolder( + # size: batches x n_consumers x consumer_state float + self.consumer_states_gpu_tensor + ), + CudaTensorHolder( + # size: batches x n_consumers x consumer_action_dim float + self.consumer_actions_single_gpu_tensor + ), + # size: batches x n_consumers x 1 float + self.consumer_rewards_gpu_pycuda, + CudaTensorHolder( + self.consumer_states_batch_gpu_tensor + ), # size: batches x episode x n_consumers x consumer_state float + CudaTensorHolder(self.consumer_rewards_batch_gpu_tensor), + CudaTensorHolder(self.firm_states_gpu_tensor), + self.firm_action_indices_gpu_pycuda, + self.firm_actions_gpu_pycuda, + self.firm_rewards_gpu_pycuda, + CudaTensorHolder(self.firm_states_batch), + CudaTensorHolder(self.firm_actions_batch), + CudaTensorHolder(self.firm_rewards_batch), + CudaTensorHolder(self.government_states_gpu_tensor), + self.government_action_indices_gpu_pycuda, + self.government_actions_gpu_pycuda, + self.government_rewards_gpu_pycuda, + CudaTensorHolder(self.government_states_batch), + CudaTensorHolder(self.government_actions_batch), + CudaTensorHolder(self.government_rewards_batch), + CudaTensorHolder(self.consumer_aux_batch_gpu_tensor), + CudaTensorHolder(self.firm_aux_batch), + np.int32(_iter), + block=block, + grid=grid, + ) + self.consumer_actions_batch_gpu_tensor[ + :, _iter, :, : + ] = self.consumer_actions_index_single_gpu_tensor + update_government_rewards( + self.government_rewards_batch, + self.consumer_rewards_batch_gpu_tensor, + self.firm_rewards_batch, + self.cfg_dict, + ) + if train_type == "consumer": + consumer_reward_scale = self.agents_dict.get( + "consumer_reward_scale", 1.0 + ) + consumer_policy_gradient_step( + consumer_policy, + expand_to_digit_form( + self.consumer_states_batch_gpu_tensor, + __ad["consumer_digit_dims"], + __td["digit_representation_size"], + ), + self.consumer_actions_batch_gpu_tensor, + self.consumer_rewards_batch_gpu_tensor, + consumer_optim, + __td["gamma"], + entropy_val=annealed_entropy_coef * __td["entropy"], + value_loss_weight=__td["value_loss_weight"], + reward_scale=consumer_reward_scale, + clip_grad_norm=self.train_dict.get("clip_grad_norm", None), + ) + rewards.append(self.consumer_rewards_batch_gpu_tensor.mean().item()) + elif train_type == "firm": + firm_reward_scale = self.agents_dict.get("firm_reward_scale", 1.0) + policy_gradient_step( + firm_policy, + expand_to_digit_form( + self.firm_states_batch, + __ad["firm_digit_dims"], + __td["digit_representation_size"], + ), + self.firm_actions_batch, + self.firm_rewards_batch, + firm_optim, + __td["gamma"], + entropy_val=annealed_entropy_coef * __td["entropy"], + value_loss_weight=__td["value_loss_weight"], + actions_mask=None, + reward_scale=firm_reward_scale, + clip_grad_norm=self.train_dict.get("clip_grad_norm", None), + ) + rewards.append(self.firm_rewards_batch.mean().item()) + elif train_type == "government": + government_reward_scale = self.agents_dict.get( + "government_reward_scale", 1.0 + ) + policy_gradient_step( + government_policy, + expand_to_digit_form( + self.government_states_batch, + __ad["government_digit_dims"], + __td["digit_representation_size"], + ), + self.government_actions_batch, + self.government_rewards_batch, + government_optim, + __td["gamma"], + entropy_val=annealed_entropy_coef * __td["entropy"], + value_loss_weight=__td["value_loss_weight"], + actions_mask=None, + reward_scale=government_reward_scale, + clip_grad_norm=self.train_dict.get("clip_grad_norm", None), + ) + rewards.append(self.government_rewards_batch.mean().item()) + pbar.set_postfix({"reward": rewards[-1]}) + if (epi % checkpoint) == 0: + # save policy every checkpoint steps + save_policy_parameters( + str(Path(self.save_dir) / f"br{train_type}"), + epi, + consumer_policy, + firm_policy, + government_policy, + self.freeze_firms, + self.freeze_govt, + ) + save_dense_log( + str(Path(self.save_dir) / f"br{train_type}"), + epi, + agent_type_arrays, + agent_action_arrays, + agent_aux_arrays, + ) + + print( + f"{train_type}: starting reward {rewards[0]}, " + f"ending reward {rewards[-1]}, " + f"improvement in reward after {num_episodes}: {rewards[-1] - rewards[0]}" + ) + + self.cuda_free_mem(block=block, grid=grid) + return rewards + + def train(self): + + __td = self.train_dict + __ad = self.agents_dict + + # Create logdir + os.makedirs(__td["save_dir"], exist_ok=True) + + # Constants + num_iters = int(self.world_dict["maxtime"]) + num_consumers = __ad["num_consumers"] + num_firms = __ad["num_firms"] + num_governments = __ad["num_governments"] + num_agents = num_consumers + num_firms + num_governments + + # CUDA params: defines data shape on the GPU + block = (num_agents, 1, 1) + grid = (__td["batch_size"], 1) + + # Set seeds + seed_everything(__td["seed"]) + self.cuda_init_random(np.int32(__td["seed"]), block=block, grid=grid) + + # -------------------------------------------- + # Define Consumer policy + optimizers + # -------------------------------------------- + lr = __td["lr"] + + consumer_expanded_size = size_after_digit_expansion( + __ad["consumer_state_dim"], + __ad["consumer_digit_dims"], + __td["digit_representation_size"], + ) + + consumer_policy = IndependentPolicyNet( + consumer_expanded_size, + [__ad["consumer_num_consume_actions"]] * num_firms + + [ + __ad["consumer_num_work_actions"], + __ad["consumer_num_whichfirm_actions"], + ], + norm_consts=( + torch.zeros(consumer_expanded_size).cuda(), # don't center for now + consumer_state_scaling_factors(self.cfg_dict), + ), + ).to("cuda") + + consumer_optim = torch.optim.Adam( + consumer_policy.parameters(), + lr=lr * self.agents_dict.get("consumer_lr_multiple", 1.0), + ) + + # -------------------------------------------- + # Define Firm policy + optimizers + # -------------------------------------------- + firm_expanded_size = size_after_digit_expansion( + __ad["firm_state_dim"], + __ad["firm_digit_dims"], + __td["digit_representation_size"], + ) + + if self.freeze_firms is not None: + firm_policy = DeterministicPolicy( + firm_expanded_size, + __ad["firm_num_actions"], + self.freeze_firms, + ) + firm_optim = NoOpOptimizer() + else: + firm_policy = PolicyNet( + firm_expanded_size, + __ad["firm_num_actions"], + norm_consts=( + torch.zeros(firm_expanded_size).cuda(), + firm_state_scaling_factors(self.cfg_dict), + ), + ).to("cuda") + + firm_optim = torch.optim.Adam( + firm_policy.parameters(), + lr=lr * self.agents_dict.get("firm_lr_multiple", 1.0), + ) + + # -------------------------------------------- + # Define Government policy + optimizers + # -------------------------------------------- + government_expanded_size = size_after_digit_expansion( + __ad["government_state_dim"], + __ad["government_digit_dims"], + __td["digit_representation_size"], + ) + + if self.freeze_govt is not None: + government_policy = DeterministicPolicy( + government_expanded_size, + __ad["government_num_actions"], + self.freeze_govt, + ) + government_optim = NoOpOptimizer() + else: + government_policy = PolicyNet( + government_expanded_size, + __ad["government_num_actions"], + norm_consts=( + torch.zeros(government_expanded_size).cuda(), + govt_state_scaling_factors(self.cfg_dict), + ), + ).to("cuda") + government_optim = torch.optim.Adam( + government_policy.parameters(), + lr=lr * self.agents_dict.get("government_lr_multiple", 1.0), + ) + + # -------------------------------------------- + # Logging + # -------------------------------------------- + # For looking up GPU tensors + # -------------------------------------------- + agent_type_arrays = { + "consumer": ( + self.consumer_states_batch_gpu_tensor, + self.consumer_actions_batch_gpu_tensor, + self.consumer_rewards_batch_gpu_tensor, + ), + "firm": ( + self.firm_states_batch, + self.firm_actions_batch, + self.firm_rewards_batch, + ), + "government": ( + self.government_states_batch, + self.government_actions_batch, + self.government_rewards_batch, + ), + } + + agent_action_arrays = { + "consumer": __ad["consumer_work_actions_array"], + "firm": __ad["firm_actions_array"], + "government": __ad["government_actions_array"], + } + + agent_aux_arrays = { + "consumer": (self.consumer_aux_batch_gpu_tensor), + "firm": (self.firm_aux_batch), + "government": None, + } + + # -------------------------------------------- + # Training policy XYZ starts at which step? + # -------------------------------------------- + firm_no_ponzi_coef = self.agents_dict.get("firm_noponzi_start", 0.0) + consumer_no_ponzi_coef = self.agents_dict.get("consumer_noponzi_start", 0.0) + lagr_num_steps = self.train_dict.get("lagr_num_steps", 1) + + firm_training_start = self.agents_dict.get("firm_training_start", 0) + consumer_training_start = self.agents_dict.get("consumer_training_start", 0) + government_training_start = self.agents_dict.get("government_training_start", 0) + + firm_action_start = self.agents_dict.get("firm_begin_anneal_action", 0) + government_action_start = self.agents_dict.get( + "government_begin_anneal_action", 0 + ) + + # -------------------------------------------- + # Training loop + # -------------------------------------------- + if self.train_dict.get("infinite_episodes", False): + epi_iterator = itertools.count(0, 1) + else: + epi_iterator = range(__td["num_episodes"]) + + final_epi = None + for epi in tqdm(epi_iterator): + + firm_actions_mask = firm_action_mask( + self.cfg_dict, + max(epi - firm_action_start, 0), + ) + government_actions_mask = government_action_mask( + self.cfg_dict, + max(epi - government_action_start, 0), + ) + theta_coef = compute_theta_coef(self.cfg_dict, epi) + + # Reset environment for all agents + self.cuda_reset_env( + CudaTensorHolder(self.consumer_states_gpu_tensor), + CudaTensorHolder(self.firm_states_gpu_tensor), + CudaTensorHolder(self.government_states_gpu_tensor), + self.consumer_states_checkpoint_gpu_pycuda, + self.firm_states_checkpoint_gpu_pycuda, + self.government_states_checkpoint_gpu_pycuda, + theta_coef, + block=block, + grid=grid, + ) + + # Learning Loop + for _iter in range(num_iters): + + # ------------------------ + # Run policy and get probs + # ------------------------ + with torch.no_grad(): + # here, we must perform digit scaling + consumer_probs_list, _ = consumer_policy( + expand_to_digit_form( + self.consumer_states_gpu_tensor, + __ad["consumer_digit_dims"], + __td["digit_representation_size"], + ) + ) + firm_probs, _ = firm_policy( + expand_to_digit_form( + self.firm_states_gpu_tensor, + __ad["firm_digit_dims"], + __td["digit_representation_size"], + ), + actions_mask=firm_actions_mask, + ) + government_probs, _ = government_policy( + expand_to_digit_form( + self.government_states_gpu_tensor, + __ad["government_digit_dims"], + __td["digit_representation_size"], + ), + actions_mask=government_actions_mask, + ) + + # ------------------------ + # Get action samples + # ------------------------ + # Sample consumer actions using PyTorch here on GPU! + self.sample_consumer_actions_and_store(consumer_probs_list) + + # Sample firms + govt actions using PyCUDA on GPU! + self.cuda_sample_actions( + CudaTensorHolder(firm_probs), + self.firm_action_indices_gpu_pycuda, + self.firm_actions_gpu_pycuda, + CudaTensorHolder(government_probs), + self.government_action_indices_gpu_pycuda, + self.government_actions_gpu_pycuda, + block=block, + grid=grid, + ) + + # ------------------------ + # Step on GPU + # ------------------------ + self.cuda_step( + CudaTensorHolder( + # size: batches x n_consumers x consumer_state float + self.consumer_states_gpu_tensor + ), + CudaTensorHolder( + # size: batches x n_consumers x consumer_action_dim float + self.consumer_actions_single_gpu_tensor + ), + # size: batches x n_consumers x 1 float + self.consumer_rewards_gpu_pycuda, + CudaTensorHolder( + # size: batches x episode x n_consumers x consumer_state float + self.consumer_states_batch_gpu_tensor + ), + CudaTensorHolder(self.consumer_rewards_batch_gpu_tensor), + CudaTensorHolder(self.firm_states_gpu_tensor), + self.firm_action_indices_gpu_pycuda, + self.firm_actions_gpu_pycuda, + self.firm_rewards_gpu_pycuda, + CudaTensorHolder(self.firm_states_batch), + CudaTensorHolder(self.firm_actions_batch), + CudaTensorHolder(self.firm_rewards_batch), + CudaTensorHolder(self.government_states_gpu_tensor), + self.government_action_indices_gpu_pycuda, + self.government_actions_gpu_pycuda, + self.government_rewards_gpu_pycuda, + CudaTensorHolder(self.government_states_batch), + CudaTensorHolder(self.government_actions_batch), + CudaTensorHolder(self.government_rewards_batch), + CudaTensorHolder(self.consumer_aux_batch_gpu_tensor), + CudaTensorHolder(self.firm_aux_batch), + np.int32(_iter), + block=block, + grid=grid, + ) + self.consumer_actions_batch_gpu_tensor[ + :, _iter, :, : + ] = self.consumer_actions_index_single_gpu_tensor + + # ------------------------ + # Add penalty for no-Ponzi + # ------------------------ + add_penalty_for_no_ponzi( + self.firm_states_gpu_tensor, + self.firm_rewards_batch, + __ad["global_state_dim"], + penalty_coef=firm_no_ponzi_coef, + ) + add_penalty_for_no_ponzi( + self.consumer_states_gpu_tensor, + self.consumer_rewards_batch_gpu_tensor, + __ad["global_state_dim"], + penalty_coef=consumer_no_ponzi_coef, + penalty_scale=__ad["consumer_penalty_scale"], + ) + + # add government rewards -- sum of consumer rewards + update_government_rewards( + self.government_rewards_batch, + self.consumer_rewards_batch_gpu_tensor, + self.firm_rewards_batch, + self.cfg_dict, + ) + + # Save dense logs + # ------------------------ + if (epi % __td["save_model_every"]) == 0: + save_policy_parameters( + self.save_dir, + epi, + consumer_policy, + firm_policy, + government_policy, + self.freeze_firms, + self.freeze_govt, + ) + if (epi % self.save_dense_every) == 0: + save_dense_log( + self.save_dir, + epi, + agent_type_arrays, + agent_action_arrays, + agent_aux_arrays, + ) + + # -------------------------------- + # Curriculum: Train Consumers + # -------------------------------- + if self.consumers_will_train_this_episode(epi): + consumer_entropy_coef = anneal_entropy_coef( + self.agents_dict.get("consumer_anneal_entropy", None), + epi - consumer_training_start, + ) + consumer_reward_scale = self.agents_dict.get( + "consumer_reward_scale", 1.0 + ) + if __td["use_ppo"]: + consumer_ppo_step( + consumer_policy, + expand_to_digit_form( + self.consumer_states_batch_gpu_tensor, + __ad["consumer_digit_dims"], + __td["digit_representation_size"], + ), + self.consumer_actions_batch_gpu_tensor, + self.consumer_rewards_batch_gpu_tensor, + consumer_optim, + __td["gamma"], + entropy_val=consumer_entropy_coef * __td["entropy"], + value_loss_weight=__td["value_loss_weight"], + reward_scale=consumer_reward_scale, + ppo_num_updates=__td["ppo_num_updates"], + clip_param=__td["ppo_clip_param"], + clip_grad_norm=self.train_dict.get("clip_grad_norm", None), + ) + else: + consumer_policy_gradient_step( + consumer_policy, + expand_to_digit_form( + self.consumer_states_batch_gpu_tensor, + __ad["consumer_digit_dims"], + __td["digit_representation_size"], + ), + self.consumer_actions_batch_gpu_tensor, + self.consumer_rewards_batch_gpu_tensor, + consumer_optim, + __td["gamma"], + entropy_val=consumer_entropy_coef * __td["entropy"], + value_loss_weight=__td["value_loss_weight"], + reward_scale=consumer_reward_scale, + clip_grad_norm=self.train_dict.get("clip_grad_norm", None), + ) + if (epi % lagr_num_steps) == 0: + consumer_no_ponzi_coef = update_penalty_coef( + self.consumer_states_gpu_tensor, + __ad["global_state_dim"], + consumer_no_ponzi_coef, + penalty_step_size=__ad["consumer_noponzi_eta"], + penalty_scale=__ad["consumer_penalty_scale"], + ) + else: + pass + + # -------------------------------- + # Curriculum: Train Firms + # -------------------------------- + if self.firms_will_train_this_episode(epi): + firm_entropy_coef = anneal_entropy_coef( + self.agents_dict.get("firm_anneal_entropy", None), + epi - firm_training_start, + ) + firm_reward_scale = self.agents_dict.get("firm_reward_scale", 1.0) + if __td["use_ppo"]: + ppo_step( + firm_policy, + expand_to_digit_form( + self.firm_states_batch, + __ad["firm_digit_dims"], + __td["digit_representation_size"], + ), + self.firm_actions_batch, + self.firm_rewards_batch, + firm_optim, + __td["gamma"], + entropy_val=firm_entropy_coef * __td["entropy"], + value_loss_weight=__td["value_loss_weight"], + actions_mask=firm_actions_mask, + reward_scale=firm_reward_scale, + ppo_num_updates=__td["ppo_num_updates"], + clip_param=__td["ppo_clip_param"], + clip_grad_norm=self.train_dict.get("clip_grad_norm", None), + ) + else: + policy_gradient_step( + firm_policy, + expand_to_digit_form( + self.firm_states_batch, + __ad["firm_digit_dims"], + __td["digit_representation_size"], + ), + self.firm_actions_batch, + self.firm_rewards_batch, + firm_optim, + __td["gamma"], + entropy_val=firm_entropy_coef * __td["entropy"], + value_loss_weight=__td["value_loss_weight"], + actions_mask=firm_actions_mask, + reward_scale=firm_reward_scale, + clip_grad_norm=self.train_dict.get("clip_grad_norm", None), + ) + + if (epi % lagr_num_steps) == 0: + firm_no_ponzi_coef = update_penalty_coef( + self.firm_states_gpu_tensor, + __ad["global_state_dim"], + firm_no_ponzi_coef, + penalty_step_size=__ad["firm_noponzi_eta"], + ) + else: + pass + + # -------------------------------- + # Curriculum: Train Governments + # -------------------------------- + if self.governments_will_train_this_episode(epi): + government_entropy_coef = anneal_entropy_coef( + self.agents_dict.get("govt_anneal_entropy", None), + epi - government_training_start, + ) + government_reward_scale = self.agents_dict.get( + "government_reward_scale", 1.0 + ) + if __td["use_ppo"]: + ppo_step( + government_policy, + expand_to_digit_form( + self.government_states_batch, + __ad["government_digit_dims"], + __td["digit_representation_size"], + ), + self.government_actions_batch, + self.government_rewards_batch, + government_optim, + __td["gamma"], + entropy_val=government_entropy_coef * __td["entropy"], + value_loss_weight=__td["value_loss_weight"], + actions_mask=government_actions_mask, + reward_scale=government_reward_scale, + ppo_num_updates=__td["ppo_num_updates"], + clip_param=__td["ppo_clip_param"], + clip_grad_norm=self.train_dict.get("clip_grad_norm", None), + ) + else: + policy_gradient_step( + government_policy, + expand_to_digit_form( + self.government_states_batch, + __ad["government_digit_dims"], + __td["digit_representation_size"], + ), + self.government_actions_batch, + self.government_rewards_batch, + government_optim, + __td["gamma"], + entropy_val=government_entropy_coef * __td["entropy"], + value_loss_weight=__td["value_loss_weight"], + actions_mask=government_actions_mask, + reward_scale=government_reward_scale, + clip_grad_norm=self.train_dict.get("clip_grad_norm", None), + ) + else: + pass + + # Store the value of the final episode + final_epi = epi + + # ------------------------------------------------------------------ + # Post-Training (may not reach this with an infinite training loop!) + # Save FINAL dense log. + # ------------------------------------------------------------------ + save_dense_log( + self.save_dir, + "final", + agent_type_arrays, + agent_action_arrays, + agent_aux_arrays, + ) + save_policy_parameters( + self.save_dir, + final_epi, + consumer_policy, + firm_policy, + government_policy, + self.freeze_firms, + self.freeze_govt, + ) + + # ------------------------------------------------------------------ + # Clean up + # ------------------------------------------------------------------ + + self.cuda_free_mem(block=block, grid=grid) + + +class CudaTensorHolder(pycuda.driver.PointerHolderBase): + """ + A class that facilitates casting tensors to pointers. + """ + + def __init__(self, t): + super().__init__() + self.t = t + self.gpudata = t.data_ptr() + + def get_pointer(self): + return self.t.data_ptr() diff --git a/ai_economist/real_business_cycle/rbc/networks.py b/ai_economist/real_business_cycle/rbc/networks.py new file mode 100644 index 0000000..245b187 --- /dev/null +++ b/ai_economist/real_business_cycle/rbc/networks.py @@ -0,0 +1,114 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import torch +import torch.nn.functional as F +from torch import nn + + +class IndependentPolicyNet(nn.Module): + """ + Represents a policy network with separate heads for different types of actions. + Thus, the resulting policy will take the form + $pi(a | s) = pi_1(a_1 | s) pi_2(a_2 | s)...$ + """ + + def __init__(self, state_size, action_size_list, norm_consts=None): + super().__init__() + + self.state_size = state_size + self.action_size_list = action_size_list + if norm_consts is not None: + self.norm_center, self.norm_scale = norm_consts + else: + self.norm_center = torch.zeros(self.state_size).cuda() + self.norm_scale = torch.ones(self.state_size).cuda() + self.fc1 = nn.Linear(state_size, 128) + self.fc2 = nn.Linear(128, 128) + # policy network head + self.action_heads = nn.ModuleList( + [nn.Linear(128, action_size) for action_size in action_size_list] + ) + # value network head + self.fc4 = nn.Linear(128, 1) + + def forward(self, x): + assert x.shape[-1] == self.state_size # Check if the last dimension matches + + # Normalize the model input + new_shape = tuple(1 for _ in x.shape[:-1]) + (x.shape[-1],) + view_center = self.norm_center.view(new_shape) + view_scale = self.norm_scale.view(new_shape) + x = (x - view_center) / view_scale + + # Feed forward + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + probs = [F.softmax(action_head(x), dim=-1) for action_head in self.action_heads] + vals = self.fc4(x) + return probs, vals + + +class PolicyNet(nn.Module): + """ + The policy network class to output acton probabilities and the value function. + """ + + def __init__(self, state_size, action_size, norm_consts=None): + super().__init__() + + self.state_size = state_size + self.action_size = action_size + if norm_consts is not None: + self.norm_center, self.norm_scale = norm_consts + else: + self.norm_center = torch.zeros(self.state_size).cuda() + self.norm_scale = torch.ones(self.state_size).cuda() + self.fc1 = nn.Linear(state_size, 128) + self.fc2 = nn.Linear(128, 128) + # policy network head + self.fc3 = nn.Linear(128, action_size) + # value network head + self.fc4 = nn.Linear(128, 1) + + def forward(self, x, actions_mask=None): + # here, the action mask should be large negative constants for actions + # that shouldn't be allowed. + new_shape = tuple(1 for _ in x.shape[:-1]) + (x.shape[-1],) + view_center = self.norm_center.view(new_shape) + view_scale = self.norm_scale.view(new_shape) + x = (x - view_center) / view_scale + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + if actions_mask is not None: + probs = F.softmax(self.fc3(x) + actions_mask, dim=-1) + else: + probs = F.softmax(self.fc3(x), dim=-1) + vals = self.fc4(x) + return probs, vals + + +class DeterministicPolicy: + """ + A policy class that outputs deterministic actions. + """ + + def __init__(self, state_size, action_size, action_choice): + self.state_size = state_size + self.action_size = action_size + self.action_choice = action_choice + self.actions_out = torch.zeros(action_size, device="cuda") + self.actions_out[self.action_choice] = 1.0 + + def __call__(self, x, actions_mask=None): + return self.forward(x) + + def forward(self, x): + # output enough copies of the delta function + # distribution of the right size given x + x_batch_shapes = x.shape[:-1] + repeat_vals = x_batch_shapes + (1,) + return self.actions_out.repeat(*repeat_vals), None diff --git a/ai_economist/real_business_cycle/rbc/util.py b/ai_economist/real_business_cycle/rbc/util.py new file mode 100644 index 0000000..6e09a15 --- /dev/null +++ b/ai_economist/real_business_cycle/rbc/util.py @@ -0,0 +1,110 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import torch + + +def dict_merge(dct, merge_dct): + """Recursive dict merge. Inspired by :meth:``dict.update()``, instead of + updating only top-level keys, dict_merge recurses down into dicts nested + to an arbitrary depth, updating keys. The ``merge_dct`` is merged into + ``dct``. + :param dct: dict onto which the merge is executed + :param merge_dct: dct merged into dct + :return: None + """ + for k, v in merge_dct.items(): + # dct does not have k yet. Add it with value v. + if (k not in dct) and (not isinstance(dct, list)): + dct[k] = v + else: + # dct[k] and merge_dict[k] are both dictionaries. Recurse. + if isinstance(dct[k], (dict, list)) and isinstance(v, dict): + dict_merge(dct[k], merge_dct[k]) + else: + # dct[k] and merge_dict[k] are both tuples or lists. + if isinstance(dct[k], (tuple, list)) and isinstance(v, (tuple, list)): + # They don't match. Overwrite with v. + if len(dct[k]) != len(v): + dct[k] = v + else: + for i, (d_val, v_val) in enumerate(zip(dct[k], v)): + if isinstance(d_val, dict) and isinstance(v_val, dict): + dict_merge(d_val, v_val) + else: + dct[k][i] = v_val + else: + dct[k] = v + + +def min_max_consumer_budget_delta(hparams_dict): + # largest single round changes + max_wage = hparams_dict["agents"]["max_possible_wage"] + max_hours = hparams_dict["agents"]["max_possible_hours_worked"] + max_price = hparams_dict["agents"]["max_possible_price"] + max_singlefirm_consumption = hparams_dict["agents"]["max_possible_consumption"] + num_firms = hparams_dict["agents"]["num_firms"] + + min_budget = ( + -max_singlefirm_consumption * max_price * num_firms + ) # negative budget from consuming only + max_budget = max_hours * max_wage * num_firms # positive budget from only working + return min_budget, max_budget + + +def min_max_stock_delta(hparams_dict): + # for now, assuming 1.0 capital + max_hours = hparams_dict["agents"]["max_possible_hours_worked"] + max_singlefirm_consumption = hparams_dict["agents"]["max_possible_consumption"] + alpha = hparams_dict["world"]["production_alpha"] + if isinstance(alpha, str): + alpha = 0.8 + num_consumers = hparams_dict["agents"]["num_consumers"] + max_delta = (max_hours * num_consumers) ** alpha + min_delta = -max_singlefirm_consumption * num_consumers + return min_delta, max_delta + + +def min_max_firm_budget(hparams_dict): + max_wage = hparams_dict["agents"]["max_possible_wage"] + max_hours = hparams_dict["agents"]["max_possible_hours_worked"] + max_singlefirm_consumption = hparams_dict["agents"]["max_possible_consumption"] + num_consumers = hparams_dict["agents"]["num_consumers"] + max_price = hparams_dict["agents"]["max_possible_price"] + max_delta = max_singlefirm_consumption * max_price * num_consumers + min_delta = -max_hours * max_wage * num_consumers + return min_delta, max_delta + + +def expand_to_digit_form(x, dims_to_expand, max_digits): + # first split x up + requires_grad = ( + x.requires_grad + ) # don't want to backprop through these ops, but do want + # gradients if x had them + with torch.no_grad(): + tensor_pieces = [] + expanded_digit_shape = x.shape[:-1] + (max_digits,) + for i in range(x.shape[-1]): + if i not in dims_to_expand: + tensor_pieces.append(x[..., i : i + 1]) + else: + digit_entries = torch.zeros(expanded_digit_shape, device=x.device) + for j in range(max_digits): + digit_entries[..., j] = (x[..., i] % (10 ** (j + 1))) / ( + 10 ** (j + 1) + ) + tensor_pieces.append(digit_entries) + + output = torch.cat(tensor_pieces, dim=-1) + output.requires_grad_(requires_grad) + return output + + +def size_after_digit_expansion(existing_size, dims_to_expand, max_digits): + num_expanded = len(dims_to_expand) + # num non expanded digits, + all the expanded ones + return (existing_size - num_expanded) + (max_digits * num_expanded) diff --git a/ai_economist/real_business_cycle/train_bestresponse.py b/ai_economist/real_business_cycle/train_bestresponse.py new file mode 100644 index 0000000..c1731d4 --- /dev/null +++ b/ai_economist/real_business_cycle/train_bestresponse.py @@ -0,0 +1,108 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import argparse +import pickle +from collections import defaultdict +from pathlib import Path + +import numpy as np +from experiment_utils import cfg_dict_from_yaml +from rbc.cuda_manager import ConsumerFirmRunManagerBatchParallel + + +def check_if_ep_str_policy_exists(rollout_path, ep_str): + return ( + rollout_path / Path("saved_models") / Path(f"consumer_policy_{ep_str}.pt") + ).is_file() + + +def run_rollout(rollout_path, arguments): + """ + # take in rollout directory + # load latest policies and the action functions and the hparams dict + # make a cudamanager obj and run the job + # this will require initializing everything as before, resetting, + # and running naive policy gradient training at some fixed learning rate + """ + with open(rollout_path / Path("action_arrays.pickle"), "rb") as f: + action_arrays = pickle.load(f) + + consumption_choices, work_choices, price_and_wage, tax_choices = ( + action_arrays["consumption_choices"], + action_arrays["work_choices"], + action_arrays["price_and_wage"], + action_arrays["tax_choices"], + ) + + cfg_dict = cfg_dict_from_yaml( + rollout_path / Path("hparams.yaml"), + consumption_choices, + work_choices, + price_and_wage, + tax_choices, + ) + + print(cfg_dict) + + if arguments.agent_type == "all": + agent_types = ["consumer", "firm", "government"] + else: + agent_types = [arguments.agent_type] + + for agent_type in agent_types: + ep_rewards = defaultdict(list) + for _ in range(arguments.repeat_runs): + for ep_str in arguments.ep_strs: + if not check_if_ep_str_policy_exists(rollout_path, ep_str): + print(f"warning: {rollout_path} {ep_str} policy not found") + ep_rewards[ep_str].append([0.0]) + continue + m = ConsumerFirmRunManagerBatchParallel(cfg_dict) + rewards_start = m.bestresponse_train( + agent_type, + arguments.num_episodes, + rollout_path, + ep_str=ep_str, + checkpoint=arguments.checkpoint_model, + ) + ep_rewards[ep_str].append(rewards_start) + + with open(rollout_path / Path(f"br_{agent_type}_output.txt"), "w") as f: + for ep_str in arguments.ep_strs: + reward_arr = np.array(ep_rewards[ep_str]) + print( + f"mean reward (std) on rollout {ep_str}: " + f"before BR training {reward_arr[:,0].mean()} " + f"({reward_arr[:,0].std()}), " + f"after BR training {reward_arr[:,-1].mean()} " + f"({reward_arr[:,-1].std()}), " + f"mean improvement {(reward_arr[:,-1]-reward_arr[:,0]).mean()} " + f"({(reward_arr[:,-1]-reward_arr[:,0]).std()}", + file=f, + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("rolloutdir", type=str) + parser.add_argument("num_episodes", type=int) + parser.add_argument("--experiment-dir", action="store_true") + parser.add_argument("--ep-strs", nargs="+", default=["0", "latest"]) + parser.add_argument("--agent-type", type=str, default="all") + parser.add_argument("--repeat-runs", type=int, default=1) + parser.add_argument("--checkpoint-model", type=int, default=100) + + args = parser.parse_args() + + if args.experiment_dir: + exp_dir = Path(args.rolloutdir) + for rolloutpath in exp_dir.iterdir(): + if rolloutpath.is_dir(): + run_rollout(rolloutpath, args) + else: + rolloutpath = Path(args.rolloutdir) + run_rollout(rolloutpath, args) diff --git a/ai_economist/real_business_cycle/train_multi_exps.py b/ai_economist/real_business_cycle/train_multi_exps.py new file mode 100644 index 0000000..df0c428 --- /dev/null +++ b/ai_economist/real_business_cycle/train_multi_exps.py @@ -0,0 +1,119 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import argparse +import os + +from experiment_utils import ( + create_job_dir, + run_experiment_batch_parallel, + sweep_cfg_generator, +) +from rbc.constants import all_agents_short_export_experiment_template + +train_param_sweeps = { + "lr": [0.001], + "entropy": [0.5], + "batch_size": [128], + "clip_grad_norm": [2.0], + "base_seed": [2345], + "should_boost_firm_reward": [False], + "use_ppo": [True], + "ppo_num_updates": [2, 4], + "ppo_clip_param": [0.1], +} + + +agent_param_sweeps = { + "consumer_lr_multiple": [1.0], + "consumer_reward_scale": [5.0], + "government_reward_scale": [5.0 * 100.0 * 2.0], + "firm_reward_scale": [30000], + "government_counts_firm_reward": [1], + "government_lr_multiple": [0.05], +} + + +world_param_sweeps = { + "initial_wages": [0.0], + "interest_rate": [0.0], + "importer_price": [500.0], + "importer_quantity": [100.0], + "use_importer": [1], +} + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--dry-run", action="store_true") + parser.add_argument("--experiment-dir", type=str, default="experiment/experiment") + parser.add_argument("--group-name", type=str, default="default_group") + parser.add_argument("--job-name-base", type=str, default="rollout") + parser.add_argument("--num-consumers", type=int, default=100) + parser.add_argument("--num-firms", type=int, default=10) + parser.add_argument("--num-governments", type=int, default=1) + parser.add_argument("--run-only", action="store_true") + parser.add_argument("--seed-from-timestamp", action="store_true") + + args = parser.parse_args() + + ( + default_cfg_dict, + consumption_choices, + work_choices, + price_and_wage, + tax_choices, + default_firm_action, + default_government_action, + ) = all_agents_short_export_experiment_template( + args.num_firms, args.num_consumers, args.num_governments + ) + + if args.run_only: + print("Not sweeping over hyperparameter combos...") + else: + for new_cfg in sweep_cfg_generator( + default_cfg_dict, + tr_param_sweeps=train_param_sweeps, + ag_param_sweeps=agent_param_sweeps, + wld_param_sweeps=world_param_sweeps, + seed_from_timestamp=args.seed_from_timestamp, + group_name=args.group_name, + ): + create_job_dir( + args.experiment_dir, + args.job_name_base, + cfg=new_cfg, + action_arrays={ + "consumption_choices": consumption_choices, + "work_choices": work_choices, + "price_and_wage": price_and_wage, + "tax_choices": tax_choices, + }, + ) + + if args.dry_run: + print("Dry-run -> not actually training...") + else: + print("Training multiple experiments locally...") + + # for dirs in experiment dir, run job + experiment_dirs = [ + f.path for f in os.scandir(args.experiment_dir) if f.is_dir() + ] + for experiment in experiment_dirs: + run_experiment_batch_parallel( + experiment, + consumption_choices, + work_choices, + price_and_wage, + tax_choices, + group_name=args.group_name, + consumers_only=False, + no_firms=False, + default_firm_action=default_firm_action, + default_government_action=default_government_action, + ) diff --git a/ai_economist/real_business_cycle/train_single_exp.py b/ai_economist/real_business_cycle/train_single_exp.py new file mode 100644 index 0000000..ca15c65 --- /dev/null +++ b/ai_economist/real_business_cycle/train_single_exp.py @@ -0,0 +1,51 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +import argparse + +from experiment_utils import run_experiment_batch_parallel +from rbc.constants import all_agents_export_experiment_template + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--dry-run", action="store_true") + parser.add_argument("--experiment-dir", type=str, default="experiment/experiment") + parser.add_argument("--group-name", type=str, default="default_group") + parser.add_argument("--job-name-base", type=str, default="rollout") + parser.add_argument("--num-consumers", type=int, default=100) + parser.add_argument("--num-firms", type=int, default=10) + parser.add_argument("--num-governments", type=int, default=1) + parser.add_argument("--run-only", action="store_true") + parser.add_argument("--seed-from-timestamp", action="store_true") + + args = parser.parse_args() + ( + default_cfg_dict, + consumption_choices, + work_choices, + price_and_wage, + tax_choices, + default_firm_action, + default_government_action, + ) = all_agents_export_experiment_template( + args.num_firms, args.num_consumers, args.num_governments + ) + + if not args.dry_run: + # for dirs in experiment dir, run job + experiment = args.experiment_dir + run_experiment_batch_parallel( + experiment, + consumption_choices, + work_choices, + price_and_wage, + tax_choices, + group_name=args.group_name, + consumers_only=False, + no_firms=False, + default_firm_action=default_firm_action, + default_government_action=default_government_action, + ) diff --git a/ai_economist/training/__init__.py b/ai_economist/training/__init__.py new file mode 100644 index 0000000..93bee4b --- /dev/null +++ b/ai_economist/training/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause diff --git a/ai_economist/training/run_configs/covid_and_economy_environment.yaml b/ai_economist/training/run_configs/covid_and_economy_environment.yaml new file mode 100644 index 0000000..4bc1d21 --- /dev/null +++ b/ai_economist/training/run_configs/covid_and_economy_environment.yaml @@ -0,0 +1,77 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +# YAML configuration for the tag continuous environment +name: "covid_and_economy_environment" +# Environment settings +env: + collate_agent_step_and_reset_data: True + components: + - ControlUSStateOpenCloseStatus: + action_cooldown_period: 28 + - FederalGovernmentSubsidy: + num_subsidy_levels: 20 + subsidy_interval: 90 + max_annual_subsidy_per_person: 20000 + - VaccinationCampaign: + daily_vaccines_per_million_people: 3000 + delivery_interval: 1 + vaccine_delivery_start_date: "2021-01-12" + economic_reward_crra_eta: 2 + episode_length: 540 + flatten_masks: True + flatten_observations: False + health_priority_scaling_agents: 0.3 + health_priority_scaling_planner: 0.45 + infection_too_sick_to_work_rate: 0.1 + multi_action_mode_agents: False + multi_action_mode_planner: False + n_agents: 51 + path_to_data_and_fitted_params: "" + pop_between_age_18_65: 0.6 + risk_free_interest_rate: 0.03 + world_size: [1, 1] + start_date: "2020-03-22" + use_real_world_data: False + use_real_world_policies: False +# Trainer settings +trainer: + num_envs: 60 # number of environment replicas + num_episodes: 1000 # number of episodes to run the training for + train_batch_size: 5400 # total batch size used for training per iteration (across all the environments) +# Policy network settings +policy: # list all the policies below + a: + to_train: True # flag indicating whether the model needs to be trained + algorithm: "PPO" # algorithm used to train the policy + vf_loss_coeff: 1 # loss coefficient schedule for the value function loss + entropy_coeff: 0.05 # loss coefficient schedule for the entropy loss + gamma: 0.98 # discount factor + lr: 0.0001 # learning rate + model: + type: "fully_connected" + fc_dims: [256, 256] + model_ckpt_filepath: "" + p: + to_train: True + algorithm: "PPO" + vf_loss_coeff: 1 + entropy_coeff: # annealing entropy over time + - [0, 0.5] + - [50000000, 0.05] + gamma: 0.98 + lr: 0.0001 + model: + type: "fully_connected" + fc_dims: [256, 256] + model_ckpt_filepath: "" +# Checkpoint saving setting +saving: + metrics_log_freq: 100 # How often (in iterations) to print the metrics + model_params_save_freq: 500 # How often (in iterations) to save the model parameters + basedir: "/tmp" # base folder used for saving + name: "covid19_and_economy" # experiment name + tag: "experiments" # experiment tag diff --git a/ai_economist/training/training_script.py b/ai_economist/training/training_script.py new file mode 100644 index 0000000..7a13e80 --- /dev/null +++ b/ai_economist/training/training_script.py @@ -0,0 +1,134 @@ +# Copyright (c) 2021, salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root +# or https://opensource.org/licenses/BSD-3-Clause + +""" +Example training script for the grid world and continuous versions of Tag. +Note: This training script only runs on a GPU machine. +You will also need to install WarpDrive (https://github.com/salesforce/warp-drive) +using `pip install rl-warp-drive`, and Pytorch(https://pytorch.org/) +""" + +import argparse +import logging +import os + +import GPUtil + +try: + num_gpus_available = len(GPUtil.getAvailable()) + assert num_gpus_available > 0, "This training script needs a GPU to run!" + print(f"Inside training_script.py: {num_gpus_available} GPUs are available.") + import torch + import yaml + from warp_drive.training.trainer import Trainer + from warp_drive.utils.env_registrar import EnvironmentRegistrar +except ModuleNotFoundError: + raise ModuleNotFoundError( + "This training script requires the 'WarpDrive' package, please run " + "'pip install rl-warp-drive' first." + ) from None +except ValueError: + raise ValueError("This training script needs a GPU to run!") from None + +from ai_economist.foundation.env_wrapper import FoundationEnvWrapper +from ai_economist.foundation.scenarios.covid19.covid19_env import ( + CovidAndEconomyEnvironment, +) + +logging.getLogger().setLevel(logging.ERROR) + +pytorch_cuda_init_success = torch.cuda.FloatTensor(8) +_COVID_AND_ECONOMY_ENVIRONMENT = "covid_and_economy_environment" + +# Usage: +# >> python ai_economist/training/example_training_script.py +# --env covid_and_economy_environment + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + parser.add_argument("--env", "-e", type=str, help="Environment to train.") + + args = parser.parse_args() + + # Read the run configurations specific to each environment. + # Note: The run config yamls are located at warp_drive/training/run_configs + # --------------------------------------------------------------------------- + assert args.env in [_COVID_AND_ECONOMY_ENVIRONMENT], ( + f"Currently, the only environment supported " + f"is {_COVID_AND_ECONOMY_ENVIRONMENT}" + ) + + config_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "run_configs", + f"{args.env}.yaml", + ) + with open(config_path, "r", encoding="utf8") as f: + run_config = yaml.safe_load(f) + + num_envs = run_config["trainer"]["num_envs"] + + # Create a wrapped environment object via the EnvWrapper + # Ensure that use_cuda is set to True (in order to run on the GPU) + # ---------------------------------------------------------------- + if run_config["name"] == _COVID_AND_ECONOMY_ENVIRONMENT: + env_registrar = EnvironmentRegistrar() + this_file_dir = os.path.dirname(os.path.abspath(__file__)) + env_registrar.add_cuda_env_src_path( + CovidAndEconomyEnvironment.name, + os.path.join( + this_file_dir, "../foundation/scenarios/covid19/covid19_build.cu" + ), + ) + env_wrapper = FoundationEnvWrapper( + CovidAndEconomyEnvironment(**run_config["env"]), + num_envs=num_envs, + use_cuda=True, + env_registrar=env_registrar, + ) + else: + raise NotImplementedError + + # The policy_tag_to_agent_id_map dictionary maps + # policy model names to agent ids. + # ---------------------------------------------------- + policy_tag_to_agent_id_map = { + "a": [str(agent_id) for agent_id in range(env_wrapper.env.n_agents)], + "p": ["p"], + } + + # Flag indicating whether separate obs, actions and rewards placeholders + # have to be created for each policy. + # Set "create_separate_placeholders_for_each_policy" to True here + # since the agents and planner have different observation + # and action spaces. + separate_placeholder_per_policy = True + + # Flag indicating the observation dimension corresponding to + # 'num_agents'. + # Note: WarpDrive assumes that all the observation are shaped + # (num_agents, *feature_dim), i.e., the observation dimension + # corresponding to 'num_agents' is the first one. Instead, if the + # observation dimension corresponding to num_agents is the last one, + # we will need to permute the axes to align with WarpDrive's assumption + obs_dim_corresponding_to_num_agents = "last" + + # Trainer object + # -------------- + trainer = Trainer( + env_wrapper=env_wrapper, + config=run_config, + policy_tag_to_agent_id_map=policy_tag_to_agent_id_map, + create_separate_placeholders_for_each_policy=separate_placeholder_per_policy, + obs_dim_corresponding_to_num_agents=obs_dim_corresponding_to_num_agents, + ) + + # Perform training + # ---------------- + trainer.train() + trainer.graceful_close() diff --git a/envs/__init__.py b/envs/__init__.py new file mode 100644 index 0000000..3f503fb --- /dev/null +++ b/envs/__init__.py @@ -0,0 +1,5 @@ +from . import ( + simple_market, + econ_wrapper, + base_econ_wrapper + ) diff --git a/envs/base_econ_wrapper.py b/envs/base_econ_wrapper.py new file mode 100644 index 0000000..9731463 --- /dev/null +++ b/envs/base_econ_wrapper.py @@ -0,0 +1,169 @@ +from ai_economist.foundation.base import base_env +from threading import Event, Lock, Thread +from queue import Queue +class BaseEconVecEnv(): + """Base class for connecting reciever wrapper to a multi threaded econ simulation and training session""" + + base_notification=Event() #Notification for Base + reset_notification=Event() #Notification for recievers + step_notification=Event() #Notification for recievers + + action_edit_lock=Lock() + actor_actions={} + + stop_edit_lock=Lock() + stop=False + + vote_lock=Lock() + n_voters=0 + n_votes_reset=0 + + + + # States of Env + env_data_lock=Lock() + obs=None + rew=None + done=None + info=None + n_data_retrieved=0 + + def __init__(self, econ: base_env.BaseEnvironment): + self.env=econ + + def register_vote(self): + """Register reciever on base. Base now knows""" + + def run(self): + """Start the base wrapper""" + thr=Thread(target=self._run,daemon=True) + thr.run() + return thr + + def _run(self): + #Reset for run + self.base_notification.clear() + self.reset_notification.clear() + self.step_notification.clear() + + self.stop_edit_lock.release() + self.stop=False + self.action_edit_lock.release() + self.actor_actions={} + self.vote_lock.release() + self.reset_notification.clear() + self.n_votes_reset=0 + self.n_votes_step=0 + + self.env_data_lock.release() + self.obs=None + self.rew=None + self.done=None + self.info=None + #Reseting Env + self._reset() + + while True: + # Wait for notification + self.base_notification.wait() + self.base_notification.clear() # Cleard + #Check for stop signal + self.stop_edit_lock.acquire() + if self.stop: + return + self.stop_edit_lock.release() + + #check for reset + self.vote_lock.acquire() # we might edit votes + if self.n_voters==self.n_votes_reset: + ## perform reset + self.n_votes_reset=0 + self._reset() + self.vote_lock.release() + + #check for actions + self.action_edit_lock.acquire() + if self.env.n_agents==len(self.actor_actions.keys) & self.step_notification.is_set()==False: + # we have all the actions -> STEP + self._step() + self.action_edit_lock.release() # release actions + # we are done + + def stop_env(self): + """Stops the wrapper""" + self.stop_edit_lock.acquire() + self.stop=True + self.stop_edit_lock.release() + self.base_notification.set() + + + def _reset(self): + # Aquire Lock + self.env_data_lock.acquire() + self.n_votes_reset=0 + self.obs=self.env.reset() #Reset env + self.rew=None + self.done=None + self.info=None + self.env_data_lock.release() #Release lock + # Notify for reset + self.reset_notification.set() + + def _step(self): + """Steping interaly""" + + self.env_data_lock.acquire() + self.reset_notification.clear() # reset after first step + self.obs,self.rew,self.done,self.info=self.env.step(self.actor_actions) # write data + self.n_data_retrieved=0 + self.env_data_lock.release() + self.action_edit_lock.acquire() # prevent steps until everybody had the chanse to look at it + self.step_notification.set() # notify recievers + + def _prepare_step(self): + #prepare base for next step + self.action_edit_lock.acquire() # we are editing action data + if self.step_notification.is_set(): + self.step_notification.clear() + self.actor_actions={} + self.action_edit_lock.release() + + def reciever_request_step(self, actions): + """Submits actions to base processing queue. Actions as dict pairing of idx and action id""" + self._prepare_step() # New actions are bening submitted. Prepare base for new step + self.action_edit_lock.acquire() # Start to submit action dict + for k,v in actions: + if self.actor_actions[k]!=None: + raise Exception("Actor action has already been submitted. {}".format(k)) + self.actor_actions[k]=v + self.base_notification.set() #Alert base for action changes + self.action_edit_lock.release() + + def reciever_block_step(self): + """Returns with newest data after step request has been called. Blocks until all actors have submitted an action""" + self.step_notification.wait() # new data available + self.env_data_lock.acquire() # get data + obs=self.obs + rew=self.rew + done=self.done + info=self.info + self.n_data_retrieved+=1 + if self.n_data_retrieved>=self.n_voters: + self.action_edit_lock.release() # release the step so that new actions can be submitted + + self.env_data_lock.release() + return obs,rew,done,info + + def reciever_request_reset(self): + """Adds to vote count to reset. If limit is reached reset will occure""" + self.vote_lock.acquire() + self.n_votes_reset+=1 + self.vote_lock.release() + + def reciever_block_reset(self): + """Called after request will block until reset occures. Returns observations.""" + self.reset_notification.wait() + self.env_data_lock.acquire() + obs=self.obs + self.env_data_lock.release() + return obs diff --git a/envs/reciever_econ_wrapper.py b/envs/reciever_econ_wrapper.py new file mode 100644 index 0000000..7a17656 --- /dev/null +++ b/envs/reciever_econ_wrapper.py @@ -0,0 +1,67 @@ +from collections import OrderedDict +from copy import deepcopy +from typing import Any, Callable, List, Optional, Sequence, Type, Union +from ai_economist.foundation.base import base_env + +import gym +import gym.spaces +import numpy as np +from base_econ_wrapper import BaseEconVecEnv + +from stable_baselines3.common.vec_env.base_vec_env import VecEnv, VecEnvIndices, VecEnvObs, VecEnvStepReturn +from stable_baselines3.common.vec_env.util import copy_obs_dict, dict_to_obs, obs_space_info + +from ai_economist import foundation + +class RecieverEconVecEnv(gym.Env): + """Reciever part of BaseEconVecEnv. Filters by agent class and presents gym api to RL algos. Enables multi threading learning for different agent types.""" + def __init__(self, base_econ: BaseEconVecEnv, agent_classname: str): + self.base_econ=base_econ + base_econ.register_vote() + self.econ=base_econ.env + self.agent_name=agent_classname + self.agnet_idx=list(self.econ.world._agent_class_idx_map[agent_classname]) + self.idx_to_index={} + #create idx to index map + for i in range(len(self.agnet_idx)): + self.idx_to_index[self.agnet_idx[i]]=i + first_idx=self.agnet_idx[0] + + + def step_async(self, actions: dict) -> None: + """Submittes actions to Env. actions is a dict with idx -> action pair""" + data=self._dict_index_to_idx(actions) + self.base_econ.reciever_request_step(data) + + def _dict_idx_to_index(self, data): + data_out={} + for k,v in data.items(): + data_out[self.idx_to_index[k]]=v + return data_out + + def _dict_index_to_idx(self, data): + data_out={} + for k,v in data.items(): + data_out[self.agnet_idx[k]]=v + return data_out + + def step_wait(self): + #convert to econ actions + obs,rew,done,info=self.base_econ.reciever_block_step() + c_obs=self._dict_idx_to_index(obs) + c_rew=self._dict_idx_to_index(rew) + c_done=self._dict_idx_to_index(done) + c_info=self._dict_idx_to_index(info) + return c_obs,c_rew,c_done,c_info + + + def reset(self): + # env=foundation.make_env_instance(**self.config) + # self.env = env + self.base_econ.reciever_request_reset() + obs =self.base_econ.reciever_block_reset() + c_obs=self._dict_idx_to_index(obs) + return c_obs + + + diff --git a/reqirements.txt b/reqirements.txt index a671b83..765bbd9 100644 --- a/reqirements.txt +++ b/reqirements.txt @@ -1,3 +1,3 @@ -ai-economist + gym ray[rllib] \ No newline at end of file