Improvements

lpinon · lpinon · commit 3d89601c703a · 2022-08-23T16:17:29.000+02:00
diff --git a/Notebook_Time_Series_Silviu.ipynb b/Notebook_Time_Series_Silviu.ipynb
@@ -32,9 +32,11 @@
     "from IPython.display import display_html\n",
     "import itertools\n",
     "from scipy import stats\n",
+    "import numpy as np\n",
     "import warnings\n",
     "import matplotlib.pyplot as plt\n",
     "warnings.filterwarnings(\"ignore\")\n",
+    "PVALUE_VAR = 0.05\n",
     "%store -r PVALUE_VAR\n",
     "\n",
     "from Project.Utils.visualize import  search, searchTimeSeries, normalize_by_country\n",
@@ -48,10 +50,9 @@
     "\n",
     "df_gold = pd.read_csv(output_path + 'GoldDataframe.csv')\n",
     "df_gold_index = df_gold.set_index(['Country', 'Year', 'Region'])\n",
-    "corr_df_spearman = pd.read_csv(output_path + 'Corr_DF_pearson.csv', index_col = col_country)\n",
     "\n",
-    "country_list = sorted(set(df_gold['Country'].tolist()))\n",
-    "region_list = sorted(set(df_gold['Region'].tolist()))"
+    "country_list = list(np.sort(df_gold['Country'].unique()))\n",
+    "region_list = list(np.sort(df_gold['Region'].unique()))"
    ]
   },
   {
@@ -109,17 +110,59 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import time\n",
+    "\n",
     "def load_by_region(region):\n",
+    "    start = time.time()\n",
     "    df = df_gold_index.loc[df_gold_index.index.get_level_values('Region') == region]\n",
     "    df = normalize_by_country(df)\n",
     "    df.sort_index(level = ['Year', 'Country'], inplace=True)\n",
+    "    end = time.time()\n",
+    "    print(\"[{:.2f} seconds] Normalized Region {}\".format(end - start, region))\n",
     "    return df"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 4,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[1.03 seconds] Normalized Region East Asia and Pacific\n",
+      "[2.23 seconds] Normalized Region Europe and Central Asia\n",
+      "[1.27 seconds] Normalized Region Latin America and Caribbean\n",
+      "[0.82 seconds] Normalized Region Middle East and North Africa\n",
+      "[0.11 seconds] Normalized Region North America\n",
+      "[0.42 seconds] Normalized Region South Asia\n",
+      "[2.20 seconds] Normalized Region Sub-Saharan Africa\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Precompute contries by region normalized\n",
+    "countries_by_region = {}\n",
+    "for r in region_list:\n",
+    "    countries_by_region[r] = load_by_region(r)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class WidgetTimeWindowCountryStatus:\n",
+    "    def __init__(self):\n",
+    "        self.filter_by = \"Country\"\n",
+    "        self.zone = None\n",
+    "        self.data = None\n",
+    "\n",
+    "status = WidgetTimeWindowCountryStatus()"
+   ]
+  },
    "outputs": [
     {
      "data": {
@@ -148,25 +191,41 @@
    ],
    "source": [
     "def timeWindowCountry(By, Zone, Threshold , Years):\n",
+    "    # Update Widget Status\n",
     "    if By == 'Country':\n",
-    "        if len(zone_drop.options) == len(region_list): \n",
+    "        if status.filter_by != \"Country\": \n",
+    "            status.filter_by = \"Country\"\n",
     "            zone_drop.options = country_list\n",
+    "            print(\"Changed to Country List\")\n",
     "            return\n",
-    "        #Search for entries of the country.\n",
-    "        df_zone = df_gold_index.loc[df_gold_index.index.get_level_values('Country') == Zone]\n",
+    "        if status.zone != Zone: \n",
+    "            #Search for entries of the country.\n",
+    "            status.data = df_gold_index.loc[df_gold_index.index.get_level_values('Country') == Zone]\n",
+    "            status.zone = Zone\n",
     "\n",
     "    elif By == 'Region':\n",
-    "        if len(zone_drop.options) == len(country_list): \n",
+    "        if status.filter_by != \"Region\": \n",
+    "            status.filter_by = \"Region\"\n",
     "            zone_drop.options = region_list\n",
+    "            print(\"Changed to Region List\")\n",
     "            return\n",
-    "        #Search for the entries of the region and normalize.\n",
-    "        df_zone = load_by_region(Zone)\n",
+    "        if status.zone != Zone: \n",
+    "            #Search for the entries of the region and normalize.\n",
+    "            #df_zone = load_by_region(Zone)\n",
+    "            status.data = countries_by_region[Zone]\n",
+    "            status.zone = Zone\n",
     "\n",
+    "    # Recalculate Results\n",
+    "    df_zone = status.data\n",
     "\n",
-    "    #Load the selected year range and the global range.\n",
+    "    start = time.time()\n",
     "    df_time = searchTimeSeries(Threshold, Years[0], Years[1], True, df_zone)\n",
     "    df_global = searchTimeSeries(Threshold, Years[0], Years[1], False, df_zone)\n",
-    "\n",
+    "    end = time.time()\n",
+    "    print(end - start)\n",
+    "    \n",
+    "    # Visualize Results\n",
+    "    \n",
     "    if Years[0] > Years[1]: return print(\"Please, select a valid range of years.\")\n",
     " \n",
     "    space = \"\\xa0\" * 10\n",
@@ -316,12 +375,19 @@
     "            zone_drop.options = region_list\n",
     "            return\n",
     "        #Search for the entries of the region and normalize.\n",
-    "        df_zone = load_by_region(Zone)\n",
-    "    \n",
+    "        df_zone = countries_by_region[Zone]\n",
+    "\n",
     "    df_highest = generate_table()\n",
+    "    \n",
+    "    i = 0\n",
+    "    computing_text = \"Loading \"\n",
+    "    print (computing_text, end=\"\\r\")\n",
+    "\n",
     "\n",
     "    #For all the combination of years...\n",
     "    for years in iterable:\n",
+    "        i = (i + 1) % 50\n",
+    "        print (computing_text + \"\".join([\".\" for _ in range(i)]), end=\"\\r\")\n",
     "        df_aux = searchTimeSeries(0, years[0], years[1], True, df_zone)\n",
     "        #Delete indicators which are not available that year\n",
     "        indicators_inter = list(set(indicators) & set(list(df_aux.index)))\n",
@@ -345,6 +411,9 @@
     "                    df_highest.at[indicator, \"Highest negative Spearman corr\"] = indicator_corr_aux\n",
     "\n",
     "    df_highest = df_highest.replace(0, nan).dropna(axis=0, how='all').fillna(\"-\")\n",
+    "    \n",
+    "    print(\"                                                                                \", end=\"\\r\")\n",
+    "\n",
     "    display(df_highest)\n",
     "\n",
     "by_drop = widgets.Dropdown(\n",
@@ -441,8 +510,10 @@
     "\n",
     "\n",
     "\n",
+    "# TODO By Region: Say Y axis is Qualitative (Not real values but Normalized to observe evolution vs GDP - Tendendency)\n",
     "widgets.interact(plotYearRange, Zone = country_drop, Indicator = indicator_drop, Years = intslider)"
    ]
+  },
   }
  ],
  "metadata": {