Skip to content

Commit 3d89601

Browse files
committed
Improvements
1 parent 7b04359 commit 3d89601

File tree

1 file changed

+84
-13
lines changed

1 file changed

+84
-13
lines changed

Notebook_Time_Series_Silviu.ipynb

Lines changed: 84 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,11 @@
3232
"from IPython.display import display_html\n",
3333
"import itertools\n",
3434
"from scipy import stats\n",
35+
"import numpy as np\n",
3536
"import warnings\n",
3637
"import matplotlib.pyplot as plt\n",
3738
"warnings.filterwarnings(\"ignore\")\n",
39+
"PVALUE_VAR = 0.05\n",
3840
"%store -r PVALUE_VAR\n",
3941
"\n",
4042
"from Project.Utils.visualize import search, searchTimeSeries, normalize_by_country\n",
@@ -48,10 +50,9 @@
4850
"\n",
4951
"df_gold = pd.read_csv(output_path + 'GoldDataframe.csv')\n",
5052
"df_gold_index = df_gold.set_index(['Country', 'Year', 'Region'])\n",
51-
"corr_df_spearman = pd.read_csv(output_path + 'Corr_DF_pearson.csv', index_col = col_country)\n",
5253
"\n",
53-
"country_list = sorted(set(df_gold['Country'].tolist()))\n",
54-
"region_list = sorted(set(df_gold['Region'].tolist()))"
54+
"country_list = list(np.sort(df_gold['Country'].unique()))\n",
55+
"region_list = list(np.sort(df_gold['Region'].unique()))"
5556
]
5657
},
5758
{
@@ -109,17 +110,59 @@
109110
"metadata": {},
110111
"outputs": [],
111112
"source": [
113+
"import time\n",
114+
"\n",
112115
"def load_by_region(region):\n",
116+
" start = time.time()\n",
113117
" df = df_gold_index.loc[df_gold_index.index.get_level_values('Region') == region]\n",
114118
" df = normalize_by_country(df)\n",
115119
" df.sort_index(level = ['Year', 'Country'], inplace=True)\n",
120+
" end = time.time()\n",
121+
" print(\"[{:.2f} seconds] Normalized Region {}\".format(end - start, region))\n",
116122
" return df"
117123
]
118124
},
119125
{
120126
"cell_type": "code",
121127
"execution_count": 4,
122128
"metadata": {},
129+
"outputs": [
130+
{
131+
"name": "stdout",
132+
"output_type": "stream",
133+
"text": [
134+
"[1.03 seconds] Normalized Region East Asia and Pacific\n",
135+
"[2.23 seconds] Normalized Region Europe and Central Asia\n",
136+
"[1.27 seconds] Normalized Region Latin America and Caribbean\n",
137+
"[0.82 seconds] Normalized Region Middle East and North Africa\n",
138+
"[0.11 seconds] Normalized Region North America\n",
139+
"[0.42 seconds] Normalized Region South Asia\n",
140+
"[2.20 seconds] Normalized Region Sub-Saharan Africa\n"
141+
]
142+
}
143+
],
144+
"source": [
145+
"# Precompute contries by region normalized\n",
146+
"countries_by_region = {}\n",
147+
"for r in region_list:\n",
148+
" countries_by_region[r] = load_by_region(r)"
149+
]
150+
},
151+
{
152+
"cell_type": "code",
153+
"execution_count": 5,
154+
"metadata": {},
155+
"outputs": [],
156+
"source": [
157+
"class WidgetTimeWindowCountryStatus:\n",
158+
" def __init__(self):\n",
159+
" self.filter_by = \"Country\"\n",
160+
" self.zone = None\n",
161+
" self.data = None\n",
162+
"\n",
163+
"status = WidgetTimeWindowCountryStatus()"
164+
]
165+
},
123166
"outputs": [
124167
{
125168
"data": {
@@ -148,25 +191,41 @@
148191
],
149192
"source": [
150193
"def timeWindowCountry(By, Zone, Threshold , Years):\n",
194+
" # Update Widget Status\n",
151195
" if By == 'Country':\n",
152-
" if len(zone_drop.options) == len(region_list): \n",
196+
" if status.filter_by != \"Country\": \n",
197+
" status.filter_by = \"Country\"\n",
153198
" zone_drop.options = country_list\n",
199+
" print(\"Changed to Country List\")\n",
154200
" return\n",
155-
" #Search for entries of the country.\n",
156-
" df_zone = df_gold_index.loc[df_gold_index.index.get_level_values('Country') == Zone]\n",
201+
" if status.zone != Zone: \n",
202+
" #Search for entries of the country.\n",
203+
" status.data = df_gold_index.loc[df_gold_index.index.get_level_values('Country') == Zone]\n",
204+
" status.zone = Zone\n",
157205
"\n",
158206
" elif By == 'Region':\n",
159-
" if len(zone_drop.options) == len(country_list): \n",
207+
" if status.filter_by != \"Region\": \n",
208+
" status.filter_by = \"Region\"\n",
160209
" zone_drop.options = region_list\n",
210+
" print(\"Changed to Region List\")\n",
161211
" return\n",
162-
" #Search for the entries of the region and normalize.\n",
163-
" df_zone = load_by_region(Zone)\n",
212+
" if status.zone != Zone: \n",
213+
" #Search for the entries of the region and normalize.\n",
214+
" #df_zone = load_by_region(Zone)\n",
215+
" status.data = countries_by_region[Zone]\n",
216+
" status.zone = Zone\n",
164217
"\n",
218+
" # Recalculate Results\n",
219+
" df_zone = status.data\n",
165220
"\n",
166-
" #Load the selected year range and the global range.\n",
221+
" start = time.time()\n",
167222
" df_time = searchTimeSeries(Threshold, Years[0], Years[1], True, df_zone)\n",
168223
" df_global = searchTimeSeries(Threshold, Years[0], Years[1], False, df_zone)\n",
169-
"\n",
224+
" end = time.time()\n",
225+
" print(end - start)\n",
226+
" \n",
227+
" # Visualize Results\n",
228+
" \n",
170229
" if Years[0] > Years[1]: return print(\"Please, select a valid range of years.\")\n",
171230
" \n",
172231
" space = \"\\xa0\" * 10\n",
@@ -316,12 +375,19 @@
316375
" zone_drop.options = region_list\n",
317376
" return\n",
318377
" #Search for the entries of the region and normalize.\n",
319-
" df_zone = load_by_region(Zone)\n",
320-
" \n",
378+
" df_zone = countries_by_region[Zone]\n",
379+
"\n",
321380
" df_highest = generate_table()\n",
381+
" \n",
382+
" i = 0\n",
383+
" computing_text = \"Loading \"\n",
384+
" print (computing_text, end=\"\\r\")\n",
385+
"\n",
322386
"\n",
323387
" #For all the combination of years...\n",
324388
" for years in iterable:\n",
389+
" i = (i + 1) % 50\n",
390+
" print (computing_text + \"\".join([\".\" for _ in range(i)]), end=\"\\r\")\n",
325391
" df_aux = searchTimeSeries(0, years[0], years[1], True, df_zone)\n",
326392
" #Delete indicators which are not available that year\n",
327393
" indicators_inter = list(set(indicators) & set(list(df_aux.index)))\n",
@@ -345,6 +411,9 @@
345411
" df_highest.at[indicator, \"Highest negative Spearman corr\"] = indicator_corr_aux\n",
346412
"\n",
347413
" df_highest = df_highest.replace(0, nan).dropna(axis=0, how='all').fillna(\"-\")\n",
414+
" \n",
415+
" print(\" \", end=\"\\r\")\n",
416+
"\n",
348417
" display(df_highest)\n",
349418
"\n",
350419
"by_drop = widgets.Dropdown(\n",
@@ -441,8 +510,10 @@
441510
"\n",
442511
"\n",
443512
"\n",
513+
"# TODO By Region: Say Y axis is Qualitative (Not real values but Normalized to observe evolution vs GDP - Tendendency)\n",
444514
"widgets.interact(plotYearRange, Zone = country_drop, Indicator = indicator_drop, Years = intslider)"
445515
]
516+
},
446517
}
447518
],
448519
"metadata": {

0 commit comments

Comments
 (0)