diff --git a/doc/table-proposal.ipynb b/doc/table-proposal.ipynb index ff6cbbb..f634ed7 100644 --- a/doc/table-proposal.ipynb +++ b/doc/table-proposal.ipynb @@ -284,7 +284,7 @@ "metadata": {}, "outputs": [], "source": [ - "final_columns = ['indicator','year','country','unit','value_name','jmp_category','commitment','value']" + "final_columns = ['indicator','year','country','unit','value_name','jmp_category','commitment','value','base_value','initial_value']" ] }, { @@ -322,7 +322,7 @@ " \"17. Water Services, Access, percent of population (2nd Dimension = Basic + Safely Managed).csv\"\n", " ]\n", " },\n", - " \"milestone_years\": [2030, 2050]\n", + " \"milestone_years\": [2019, 2030, 2050] # 2019 for initial only but we remove them after get it\n", "}" ] }, @@ -610,12 +610,11 @@ " df_split = pd.DataFrame(df['value_type'].tolist(), index=df.index)\n", " df_split.columns = ['value_name', 'jmp_category', 'commitment']\n", " df_final = pd.concat([df, df_split], axis=1)\n", + " df_final['remove'] = df_final.apply(remove_unmatches_jmp_category, axis=1) \n", + " df_final = df_final[df_final['remove'] == False].reset_index(drop=True)\n", " df_final['indicator'] = get_ifs_name(file)\n", " df_final['jmp_category'] = df_final.apply(base_jmp_category, axis=1)\n", " df_final['jmp_category'] = df_final['jmp_category'].replace({\"BS\": \"ALB\"})\n", - " df_final['remove'] = df_final.apply(remove_unmatches_jmp_category, axis=1) \n", - " df_final = df_final[df_final['remove'] == False].reset_index(drop=True)\n", - " df_final = df_final[final_columns]\n", " # Add initial value column\n", " df_final['initial_value'] = np.nan\n", " df_final['base_value'] = np.nan\n", @@ -623,10 +622,12 @@ " if \"Water Service\" in file or \"Sanitation Service\" in file: # Filter using the filename\n", " df_final['initial_value'] = df_final.apply(lambda x: add_initial_value_for_wash(x, df_final), axis=1)\n", " df_final['base_value'] = df_final.apply(lambda x: add_base_value(x, df_final), axis=1)\n", + " df_final = df_final[df_final['year'] != 2019].reset_index(drop=True) # remove after get initial value\n", " print(f\"[WASH] : {file}\")\n", " else:\n", " df_final['base_value'] = df_final.apply(lambda x: add_base_value(x, df_final, is_wash_data = False), axis=1)\n", " print(f\"[OTHER]: {file}\")\n", + " df_final = df_final[final_columns]\n", " combined_df = pd.concat([combined_df.dropna(axis=1, how='all'), df_final], ignore_index=True)" ] }, @@ -716,6 +717,19 @@ " \n", " 0\n", " Deaths by Category of Cause - Millions\n", + " 2019\n", + " All Countries\n", + " Mil People\n", + " Base\n", + " NaN\n", + " None\n", + " 1.33\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 1\n", + " Deaths by Category of Cause - Millions\n", " 2030\n", " All Countries\n", " Mil People\n", @@ -727,7 +741,7 @@ " NaN\n", " \n", " \n", - " 1\n", + " 2\n", " Deaths by Category of Cause - Millions\n", " 2050\n", " All Countries\n", @@ -740,7 +754,7 @@ " NaN\n", " \n", " \n", - " 2\n", + " 3\n", " Deaths by Category of Cause - Millions\n", " 2030\n", " All Countries\n", @@ -753,7 +767,7 @@ " NaN\n", " \n", " \n", - " 3\n", + " 4\n", " Deaths by Category of Cause - Millions\n", " 2050\n", " All Countries\n", @@ -765,37 +779,24 @@ " 1.143\n", " NaN\n", " \n", - " \n", - " 4\n", - " Deaths by Category of Cause - Millions\n", - " 2030\n", - " All Countries\n", - " Mil People\n", - " FS\n", - " SM\n", - " Full Sanitation Access in 2030\n", - " 0.955\n", - " 1.237\n", - " NaN\n", - " \n", " \n", "\n", "" ], "text/plain": [ " indicator year country unit \\\n", - "0 Deaths by Category of Cause - Millions 2030 All Countries Mil People \n", - "1 Deaths by Category of Cause - Millions 2050 All Countries Mil People \n", - "2 Deaths by Category of Cause - Millions 2030 All Countries Mil People \n", - "3 Deaths by Category of Cause - Millions 2050 All Countries Mil People \n", - "4 Deaths by Category of Cause - Millions 2030 All Countries Mil People \n", + "0 Deaths by Category of Cause - Millions 2019 All Countries Mil People \n", + "1 Deaths by Category of Cause - Millions 2030 All Countries Mil People \n", + "2 Deaths by Category of Cause - Millions 2050 All Countries Mil People \n", + "3 Deaths by Category of Cause - Millions 2030 All Countries Mil People \n", + "4 Deaths by Category of Cause - Millions 2050 All Countries Mil People \n", "\n", " value_name jmp_category commitment value base_value \\\n", - "0 Base NaN None 1.237 NaN \n", - "1 Base NaN None 1.143 NaN \n", - "2 FS ALB Full Sanitation Access in 2030 1.075 1.237 \n", - "3 FS ALB Full Sanitation Access in 2050 1.068 1.143 \n", - "4 FS SM Full Sanitation Access in 2030 0.955 1.237 \n", + "0 Base NaN None 1.33 NaN \n", + "1 Base NaN None 1.237 NaN \n", + "2 Base NaN None 1.143 NaN \n", + "3 FS ALB Full Sanitation Access in 2030 1.075 1.237 \n", + "4 FS ALB Full Sanitation Access in 2050 1.068 1.143 \n", "\n", " initial_value \n", "0 NaN \n", @@ -1683,7 +1684,7 @@ " \n", " \n", " \n", - " 28196\n", + " 31761\n", " Malnourished Children, Headcount - Millions\n", " 2030\n", " Zambia\n", @@ -1697,7 +1698,7 @@ " 3\n", " \n", " \n", - " 28197\n", + " 31762\n", " Malnourished Children, Headcount - Millions\n", " 2050\n", " Zambia\n", @@ -1716,16 +1717,16 @@ ], "text/plain": [ " indicator year country unit \\\n", - "28196 Malnourished Children, Headcount - Millions 2030 Zambia Mil People \n", - "28197 Malnourished Children, Headcount - Millions 2050 Zambia Mil People \n", + "31761 Malnourished Children, Headcount - Millions 2030 Zambia Mil People \n", + "31762 Malnourished Children, Headcount - Millions 2050 Zambia Mil People \n", "\n", " value_name jmp_category commitment value base_value initial_value \\\n", - "28196 WSI SM 6x 0.208 0.302 NaN \n", - "28197 WSI SM 6x 0.143 0.178 NaN \n", + "31761 WSI SM 6x 0.208 0.302 NaN \n", + "31762 WSI SM 6x 0.143 0.178 NaN \n", "\n", " jmp_name_id \n", - "28196 3 \n", - "28197 3 " + "31761 3 \n", + "31762 3 " ] }, "execution_count": 40, @@ -1811,63 +1812,63 @@ " \n", " \n", " \n", - " 28193\n", + " 31758\n", " 2050\n", - " 0.0003\n", - " 0.0004\n", - " NaN\n", - " 3\n", - " 10\n", - " 2\n", - " 4\n", + " 0.057\n", + " 0.035\n", + " 0.001\n", " 2\n", " 8\n", - " 3\n", - " \n", - " \n", - " 28194\n", - " 2050\n", - " 62.01\n", - " 51.48\n", - " 24.22\n", " 1\n", - " 13\n", - " 6\n", - " 6\n", - " 2\n", + " 5\n", + " 1\n", " 2\n", - " 23\n", + " 12\n", " \n", " \n", - " 28195\n", + " 31759\n", " 2050\n", - " 0.0004\n", - " 0.0004\n", - " NaN\n", - " 2\n", - " 10\n", + " 0.092\n", + " 0.035\n", + " 0.001\n", " 2\n", - " 5\n", + " 8\n", " 1\n", + " 5\n", " 1\n", " 3\n", + " 12\n", " \n", " \n", - " 28196\n", + " 31760\n", " 2050\n", - " 0.0008\n", - " 0.0008\n", - " NaN\n", - " 3\n", - " 10\n", + " 0.111\n", + " 0.035\n", + " 0.001\n", " 2\n", - " 4\n", - " 1\n", " 8\n", + " 1\n", + " 5\n", + " 1\n", " 4\n", + " 12\n", + " \n", + " \n", + " 31761\n", + " 2050\n", + " 0.427\n", + " 0.152\n", + " 0.008\n", + " 2\n", + " 8\n", + " 1\n", + " 2\n", + " 2\n", + " 6\n", + " 12\n", " \n", " \n", - " 28197\n", + " 31762\n", " 2050\n", " 0.143\n", " 0.178\n", @@ -1885,19 +1886,19 @@ "" ], "text/plain": [ - " year value base_value initial_value jmp_name_id indicator_id \\\n", - "28193 2050 0.0003 0.0004 NaN 3 10 \n", - "28194 2050 62.01 51.48 24.22 1 13 \n", - "28195 2050 0.0004 0.0004 NaN 2 10 \n", - "28196 2050 0.0008 0.0008 NaN 3 10 \n", - "28197 2050 0.143 0.178 NaN 3 4 \n", + " year value base_value initial_value jmp_name_id indicator_id \\\n", + "31758 2050 0.057 0.035 0.001 2 8 \n", + "31759 2050 0.092 0.035 0.001 2 8 \n", + "31760 2050 0.111 0.035 0.001 2 8 \n", + "31761 2050 0.427 0.152 0.008 2 8 \n", + "31762 2050 0.143 0.178 NaN 3 4 \n", "\n", " unit_id value_name_id jmp_category_id commitment_id country_id \n", - "28193 2 4 2 8 3 \n", - "28194 6 6 2 2 23 \n", - "28195 2 5 1 1 3 \n", - "28196 2 4 1 8 4 \n", - "28197 4 7 2 4 23 " + "31758 1 5 1 2 12 \n", + "31759 1 5 1 3 12 \n", + "31760 1 5 1 4 12 \n", + "31761 1 2 2 6 12 \n", + "31762 4 7 2 4 23 " ] }, "execution_count": 42, diff --git a/output_data/table_ifs.csv b/output_data/table_ifs.csv index 1cdcf4d..fd8b59e 100644 --- a/output_data/table_ifs.csv +++ b/output_data/table_ifs.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8bbaf2f8cc96c80f4498318721f581fd1a654b4a2ef880b3428077c843f8aa6b -size 944671 +oid sha256:d6306ed38c3ad657ce386288b6fd708203b8ca5ab6433e95e7d96b2c6da381f9 +size 1067034 diff --git a/tests/ifs-testing.csv b/tests/ifs-testing.csv index 4ec8ba1..ba16fed 100644 --- a/tests/ifs-testing.csv +++ b/tests/ifs-testing.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:64bb138706bad1122c248f53ee1f52888ee1812b0358a8037820a3ac74812f08 -size 2759343 +oid sha256:75cf8847faef721f01f7f2f00dbfa6a275eaf958c41559ea3de772803e1205a6 +size 3090012