|
32 | 32 | "from IPython.display import display_html\n",
|
33 | 33 | "import itertools\n",
|
34 | 34 | "from scipy import stats\n",
|
| 35 | + "import numpy as np\n", |
35 | 36 | "import warnings\n",
|
36 | 37 | "import matplotlib.pyplot as plt\n",
|
37 | 38 | "warnings.filterwarnings(\"ignore\")\n",
|
| 39 | + "PVALUE_VAR = 0.05\n", |
38 | 40 | "%store -r PVALUE_VAR\n",
|
39 | 41 | "\n",
|
40 | 42 | "from Project.Utils.visualize import search, searchTimeSeries, normalize_by_country\n",
|
|
48 | 50 | "\n",
|
49 | 51 | "df_gold = pd.read_csv(output_path + 'GoldDataframe.csv')\n",
|
50 | 52 | "df_gold_index = df_gold.set_index(['Country', 'Year', 'Region'])\n",
|
51 |
| - "corr_df_spearman = pd.read_csv(output_path + 'Corr_DF_pearson.csv', index_col = col_country)\n", |
52 | 53 | "\n",
|
53 |
| - "country_list = sorted(set(df_gold['Country'].tolist()))\n", |
54 |
| - "region_list = sorted(set(df_gold['Region'].tolist()))" |
| 54 | + "country_list = list(np.sort(df_gold['Country'].unique()))\n", |
| 55 | + "region_list = list(np.sort(df_gold['Region'].unique()))" |
55 | 56 | ]
|
56 | 57 | },
|
57 | 58 | {
|
|
109 | 110 | "metadata": {},
|
110 | 111 | "outputs": [],
|
111 | 112 | "source": [
|
| 113 | + "import time\n", |
| 114 | + "\n", |
112 | 115 | "def load_by_region(region):\n",
|
| 116 | + " start = time.time()\n", |
113 | 117 | " df = df_gold_index.loc[df_gold_index.index.get_level_values('Region') == region]\n",
|
114 | 118 | " df = normalize_by_country(df)\n",
|
115 | 119 | " df.sort_index(level = ['Year', 'Country'], inplace=True)\n",
|
| 120 | + " end = time.time()\n", |
| 121 | + " print(\"[{:.2f} seconds] Normalized Region {}\".format(end - start, region))\n", |
116 | 122 | " return df"
|
117 | 123 | ]
|
118 | 124 | },
|
119 | 125 | {
|
120 | 126 | "cell_type": "code",
|
121 | 127 | "execution_count": 4,
|
122 | 128 | "metadata": {},
|
| 129 | + "outputs": [ |
| 130 | + { |
| 131 | + "name": "stdout", |
| 132 | + "output_type": "stream", |
| 133 | + "text": [ |
| 134 | + "[1.03 seconds] Normalized Region East Asia and Pacific\n", |
| 135 | + "[2.23 seconds] Normalized Region Europe and Central Asia\n", |
| 136 | + "[1.27 seconds] Normalized Region Latin America and Caribbean\n", |
| 137 | + "[0.82 seconds] Normalized Region Middle East and North Africa\n", |
| 138 | + "[0.11 seconds] Normalized Region North America\n", |
| 139 | + "[0.42 seconds] Normalized Region South Asia\n", |
| 140 | + "[2.20 seconds] Normalized Region Sub-Saharan Africa\n" |
| 141 | + ] |
| 142 | + } |
| 143 | + ], |
| 144 | + "source": [ |
| 145 | + "# Precompute contries by region normalized\n", |
| 146 | + "countries_by_region = {}\n", |
| 147 | + "for r in region_list:\n", |
| 148 | + " countries_by_region[r] = load_by_region(r)" |
| 149 | + ] |
| 150 | + }, |
| 151 | + { |
| 152 | + "cell_type": "code", |
| 153 | + "execution_count": 5, |
| 154 | + "metadata": {}, |
| 155 | + "outputs": [], |
| 156 | + "source": [ |
| 157 | + "class WidgetTimeWindowCountryStatus:\n", |
| 158 | + " def __init__(self):\n", |
| 159 | + " self.filter_by = \"Country\"\n", |
| 160 | + " self.zone = None\n", |
| 161 | + " self.data = None\n", |
| 162 | + "\n", |
| 163 | + "status = WidgetTimeWindowCountryStatus()" |
| 164 | + ] |
| 165 | + }, |
123 | 166 | "outputs": [
|
124 | 167 | {
|
125 | 168 | "data": {
|
|
148 | 191 | ],
|
149 | 192 | "source": [
|
150 | 193 | "def timeWindowCountry(By, Zone, Threshold , Years):\n",
|
| 194 | + " # Update Widget Status\n", |
151 | 195 | " if By == 'Country':\n",
|
152 |
| - " if len(zone_drop.options) == len(region_list): \n", |
| 196 | + " if status.filter_by != \"Country\": \n", |
| 197 | + " status.filter_by = \"Country\"\n", |
153 | 198 | " zone_drop.options = country_list\n",
|
| 199 | + " print(\"Changed to Country List\")\n", |
154 | 200 | " return\n",
|
155 |
| - " #Search for entries of the country.\n", |
156 |
| - " df_zone = df_gold_index.loc[df_gold_index.index.get_level_values('Country') == Zone]\n", |
| 201 | + " if status.zone != Zone: \n", |
| 202 | + " #Search for entries of the country.\n", |
| 203 | + " status.data = df_gold_index.loc[df_gold_index.index.get_level_values('Country') == Zone]\n", |
| 204 | + " status.zone = Zone\n", |
157 | 205 | "\n",
|
158 | 206 | " elif By == 'Region':\n",
|
159 |
| - " if len(zone_drop.options) == len(country_list): \n", |
| 207 | + " if status.filter_by != \"Region\": \n", |
| 208 | + " status.filter_by = \"Region\"\n", |
160 | 209 | " zone_drop.options = region_list\n",
|
| 210 | + " print(\"Changed to Region List\")\n", |
161 | 211 | " return\n",
|
162 |
| - " #Search for the entries of the region and normalize.\n", |
163 |
| - " df_zone = load_by_region(Zone)\n", |
| 212 | + " if status.zone != Zone: \n", |
| 213 | + " #Search for the entries of the region and normalize.\n", |
| 214 | + " #df_zone = load_by_region(Zone)\n", |
| 215 | + " status.data = countries_by_region[Zone]\n", |
| 216 | + " status.zone = Zone\n", |
164 | 217 | "\n",
|
| 218 | + " # Recalculate Results\n", |
| 219 | + " df_zone = status.data\n", |
165 | 220 | "\n",
|
166 |
| - " #Load the selected year range and the global range.\n", |
| 221 | + " start = time.time()\n", |
167 | 222 | " df_time = searchTimeSeries(Threshold, Years[0], Years[1], True, df_zone)\n",
|
168 | 223 | " df_global = searchTimeSeries(Threshold, Years[0], Years[1], False, df_zone)\n",
|
169 |
| - "\n", |
| 224 | + " end = time.time()\n", |
| 225 | + " print(end - start)\n", |
| 226 | + " \n", |
| 227 | + " # Visualize Results\n", |
| 228 | + " \n", |
170 | 229 | " if Years[0] > Years[1]: return print(\"Please, select a valid range of years.\")\n",
|
171 | 230 | " \n",
|
172 | 231 | " space = \"\\xa0\" * 10\n",
|
|
316 | 375 | " zone_drop.options = region_list\n",
|
317 | 376 | " return\n",
|
318 | 377 | " #Search for the entries of the region and normalize.\n",
|
319 |
| - " df_zone = load_by_region(Zone)\n", |
320 |
| - " \n", |
| 378 | + " df_zone = countries_by_region[Zone]\n", |
| 379 | + "\n", |
321 | 380 | " df_highest = generate_table()\n",
|
| 381 | + " \n", |
| 382 | + " i = 0\n", |
| 383 | + " computing_text = \"Loading \"\n", |
| 384 | + " print (computing_text, end=\"\\r\")\n", |
| 385 | + "\n", |
322 | 386 | "\n",
|
323 | 387 | " #For all the combination of years...\n",
|
324 | 388 | " for years in iterable:\n",
|
| 389 | + " i = (i + 1) % 50\n", |
| 390 | + " print (computing_text + \"\".join([\".\" for _ in range(i)]), end=\"\\r\")\n", |
325 | 391 | " df_aux = searchTimeSeries(0, years[0], years[1], True, df_zone)\n",
|
326 | 392 | " #Delete indicators which are not available that year\n",
|
327 | 393 | " indicators_inter = list(set(indicators) & set(list(df_aux.index)))\n",
|
|
345 | 411 | " df_highest.at[indicator, \"Highest negative Spearman corr\"] = indicator_corr_aux\n",
|
346 | 412 | "\n",
|
347 | 413 | " df_highest = df_highest.replace(0, nan).dropna(axis=0, how='all').fillna(\"-\")\n",
|
| 414 | + " \n", |
| 415 | + " print(\" \", end=\"\\r\")\n", |
| 416 | + "\n", |
348 | 417 | " display(df_highest)\n",
|
349 | 418 | "\n",
|
350 | 419 | "by_drop = widgets.Dropdown(\n",
|
|
441 | 510 | "\n",
|
442 | 511 | "\n",
|
443 | 512 | "\n",
|
| 513 | + "# TODO By Region: Say Y axis is Qualitative (Not real values but Normalized to observe evolution vs GDP - Tendendency)\n", |
444 | 514 | "widgets.interact(plotYearRange, Zone = country_drop, Indicator = indicator_drop, Years = intslider)"
|
445 | 515 | ]
|
| 516 | + }, |
446 | 517 | }
|
447 | 518 | ],
|
448 | 519 | "metadata": {
|
|
0 commit comments