diff --git a/doc/table-proposal.ipynb b/doc/table-proposal.ipynb
index ff6cbbb..f634ed7 100644
--- a/doc/table-proposal.ipynb
+++ b/doc/table-proposal.ipynb
@@ -284,7 +284,7 @@
"metadata": {},
"outputs": [],
"source": [
- "final_columns = ['indicator','year','country','unit','value_name','jmp_category','commitment','value']"
+ "final_columns = ['indicator','year','country','unit','value_name','jmp_category','commitment','value','base_value','initial_value']"
]
},
{
@@ -322,7 +322,7 @@
" \"17. Water Services, Access, percent of population (2nd Dimension = Basic + Safely Managed).csv\"\n",
" ]\n",
" },\n",
- " \"milestone_years\": [2030, 2050]\n",
+ " \"milestone_years\": [2019, 2030, 2050] # 2019 for initial only but we remove them after get it\n",
"}"
]
},
@@ -610,12 +610,11 @@
" df_split = pd.DataFrame(df['value_type'].tolist(), index=df.index)\n",
" df_split.columns = ['value_name', 'jmp_category', 'commitment']\n",
" df_final = pd.concat([df, df_split], axis=1)\n",
+ " df_final['remove'] = df_final.apply(remove_unmatches_jmp_category, axis=1) \n",
+ " df_final = df_final[df_final['remove'] == False].reset_index(drop=True)\n",
" df_final['indicator'] = get_ifs_name(file)\n",
" df_final['jmp_category'] = df_final.apply(base_jmp_category, axis=1)\n",
" df_final['jmp_category'] = df_final['jmp_category'].replace({\"BS\": \"ALB\"})\n",
- " df_final['remove'] = df_final.apply(remove_unmatches_jmp_category, axis=1) \n",
- " df_final = df_final[df_final['remove'] == False].reset_index(drop=True)\n",
- " df_final = df_final[final_columns]\n",
" # Add initial value column\n",
" df_final['initial_value'] = np.nan\n",
" df_final['base_value'] = np.nan\n",
@@ -623,10 +622,12 @@
" if \"Water Service\" in file or \"Sanitation Service\" in file: # Filter using the filename\n",
" df_final['initial_value'] = df_final.apply(lambda x: add_initial_value_for_wash(x, df_final), axis=1)\n",
" df_final['base_value'] = df_final.apply(lambda x: add_base_value(x, df_final), axis=1)\n",
+ " df_final = df_final[df_final['year'] != 2019].reset_index(drop=True) # remove after get initial value\n",
" print(f\"[WASH] : {file}\")\n",
" else:\n",
" df_final['base_value'] = df_final.apply(lambda x: add_base_value(x, df_final, is_wash_data = False), axis=1)\n",
" print(f\"[OTHER]: {file}\")\n",
+ " df_final = df_final[final_columns]\n",
" combined_df = pd.concat([combined_df.dropna(axis=1, how='all'), df_final], ignore_index=True)"
]
},
@@ -716,6 +717,19 @@
"
\n",
" 0 | \n",
" Deaths by Category of Cause - Millions | \n",
+ " 2019 | \n",
+ " All Countries | \n",
+ " Mil People | \n",
+ " Base | \n",
+ " NaN | \n",
+ " None | \n",
+ " 1.33 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Deaths by Category of Cause - Millions | \n",
" 2030 | \n",
" All Countries | \n",
" Mil People | \n",
@@ -727,7 +741,7 @@
" NaN | \n",
"
\n",
" \n",
- " 1 | \n",
+ " 2 | \n",
" Deaths by Category of Cause - Millions | \n",
" 2050 | \n",
" All Countries | \n",
@@ -740,7 +754,7 @@
" NaN | \n",
"
\n",
" \n",
- " 2 | \n",
+ " 3 | \n",
" Deaths by Category of Cause - Millions | \n",
" 2030 | \n",
" All Countries | \n",
@@ -753,7 +767,7 @@
" NaN | \n",
"
\n",
" \n",
- " 3 | \n",
+ " 4 | \n",
" Deaths by Category of Cause - Millions | \n",
" 2050 | \n",
" All Countries | \n",
@@ -765,37 +779,24 @@
" 1.143 | \n",
" NaN | \n",
"
\n",
- " \n",
- " 4 | \n",
- " Deaths by Category of Cause - Millions | \n",
- " 2030 | \n",
- " All Countries | \n",
- " Mil People | \n",
- " FS | \n",
- " SM | \n",
- " Full Sanitation Access in 2030 | \n",
- " 0.955 | \n",
- " 1.237 | \n",
- " NaN | \n",
- "
\n",
" \n",
"\n",
""
],
"text/plain": [
" indicator year country unit \\\n",
- "0 Deaths by Category of Cause - Millions 2030 All Countries Mil People \n",
- "1 Deaths by Category of Cause - Millions 2050 All Countries Mil People \n",
- "2 Deaths by Category of Cause - Millions 2030 All Countries Mil People \n",
- "3 Deaths by Category of Cause - Millions 2050 All Countries Mil People \n",
- "4 Deaths by Category of Cause - Millions 2030 All Countries Mil People \n",
+ "0 Deaths by Category of Cause - Millions 2019 All Countries Mil People \n",
+ "1 Deaths by Category of Cause - Millions 2030 All Countries Mil People \n",
+ "2 Deaths by Category of Cause - Millions 2050 All Countries Mil People \n",
+ "3 Deaths by Category of Cause - Millions 2030 All Countries Mil People \n",
+ "4 Deaths by Category of Cause - Millions 2050 All Countries Mil People \n",
"\n",
" value_name jmp_category commitment value base_value \\\n",
- "0 Base NaN None 1.237 NaN \n",
- "1 Base NaN None 1.143 NaN \n",
- "2 FS ALB Full Sanitation Access in 2030 1.075 1.237 \n",
- "3 FS ALB Full Sanitation Access in 2050 1.068 1.143 \n",
- "4 FS SM Full Sanitation Access in 2030 0.955 1.237 \n",
+ "0 Base NaN None 1.33 NaN \n",
+ "1 Base NaN None 1.237 NaN \n",
+ "2 Base NaN None 1.143 NaN \n",
+ "3 FS ALB Full Sanitation Access in 2030 1.075 1.237 \n",
+ "4 FS ALB Full Sanitation Access in 2050 1.068 1.143 \n",
"\n",
" initial_value \n",
"0 NaN \n",
@@ -1683,7 +1684,7 @@
" \n",
" \n",
" \n",
- " 28196 | \n",
+ " 31761 | \n",
" Malnourished Children, Headcount - Millions | \n",
" 2030 | \n",
" Zambia | \n",
@@ -1697,7 +1698,7 @@
" 3 | \n",
"
\n",
" \n",
- " 28197 | \n",
+ " 31762 | \n",
" Malnourished Children, Headcount - Millions | \n",
" 2050 | \n",
" Zambia | \n",
@@ -1716,16 +1717,16 @@
],
"text/plain": [
" indicator year country unit \\\n",
- "28196 Malnourished Children, Headcount - Millions 2030 Zambia Mil People \n",
- "28197 Malnourished Children, Headcount - Millions 2050 Zambia Mil People \n",
+ "31761 Malnourished Children, Headcount - Millions 2030 Zambia Mil People \n",
+ "31762 Malnourished Children, Headcount - Millions 2050 Zambia Mil People \n",
"\n",
" value_name jmp_category commitment value base_value initial_value \\\n",
- "28196 WSI SM 6x 0.208 0.302 NaN \n",
- "28197 WSI SM 6x 0.143 0.178 NaN \n",
+ "31761 WSI SM 6x 0.208 0.302 NaN \n",
+ "31762 WSI SM 6x 0.143 0.178 NaN \n",
"\n",
" jmp_name_id \n",
- "28196 3 \n",
- "28197 3 "
+ "31761 3 \n",
+ "31762 3 "
]
},
"execution_count": 40,
@@ -1811,63 +1812,63 @@
" \n",
"
\n",
" \n",
- " 28193 | \n",
+ " 31758 | \n",
" 2050 | \n",
- " 0.0003 | \n",
- " 0.0004 | \n",
- " NaN | \n",
- " 3 | \n",
- " 10 | \n",
- " 2 | \n",
- " 4 | \n",
+ " 0.057 | \n",
+ " 0.035 | \n",
+ " 0.001 | \n",
" 2 | \n",
" 8 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " 28194 | \n",
- " 2050 | \n",
- " 62.01 | \n",
- " 51.48 | \n",
- " 24.22 | \n",
" 1 | \n",
- " 13 | \n",
- " 6 | \n",
- " 6 | \n",
- " 2 | \n",
+ " 5 | \n",
+ " 1 | \n",
" 2 | \n",
- " 23 | \n",
+ " 12 | \n",
"
\n",
" \n",
- " 28195 | \n",
+ " 31759 | \n",
" 2050 | \n",
- " 0.0004 | \n",
- " 0.0004 | \n",
- " NaN | \n",
- " 2 | \n",
- " 10 | \n",
+ " 0.092 | \n",
+ " 0.035 | \n",
+ " 0.001 | \n",
" 2 | \n",
- " 5 | \n",
+ " 8 | \n",
" 1 | \n",
+ " 5 | \n",
" 1 | \n",
" 3 | \n",
+ " 12 | \n",
"
\n",
" \n",
- " 28196 | \n",
+ " 31760 | \n",
" 2050 | \n",
- " 0.0008 | \n",
- " 0.0008 | \n",
- " NaN | \n",
- " 3 | \n",
- " 10 | \n",
+ " 0.111 | \n",
+ " 0.035 | \n",
+ " 0.001 | \n",
" 2 | \n",
- " 4 | \n",
- " 1 | \n",
" 8 | \n",
+ " 1 | \n",
+ " 5 | \n",
+ " 1 | \n",
" 4 | \n",
+ " 12 | \n",
+ "
\n",
+ " \n",
+ " 31761 | \n",
+ " 2050 | \n",
+ " 0.427 | \n",
+ " 0.152 | \n",
+ " 0.008 | \n",
+ " 2 | \n",
+ " 8 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 6 | \n",
+ " 12 | \n",
"
\n",
" \n",
- " 28197 | \n",
+ " 31762 | \n",
" 2050 | \n",
" 0.143 | \n",
" 0.178 | \n",
@@ -1885,19 +1886,19 @@
""
],
"text/plain": [
- " year value base_value initial_value jmp_name_id indicator_id \\\n",
- "28193 2050 0.0003 0.0004 NaN 3 10 \n",
- "28194 2050 62.01 51.48 24.22 1 13 \n",
- "28195 2050 0.0004 0.0004 NaN 2 10 \n",
- "28196 2050 0.0008 0.0008 NaN 3 10 \n",
- "28197 2050 0.143 0.178 NaN 3 4 \n",
+ " year value base_value initial_value jmp_name_id indicator_id \\\n",
+ "31758 2050 0.057 0.035 0.001 2 8 \n",
+ "31759 2050 0.092 0.035 0.001 2 8 \n",
+ "31760 2050 0.111 0.035 0.001 2 8 \n",
+ "31761 2050 0.427 0.152 0.008 2 8 \n",
+ "31762 2050 0.143 0.178 NaN 3 4 \n",
"\n",
" unit_id value_name_id jmp_category_id commitment_id country_id \n",
- "28193 2 4 2 8 3 \n",
- "28194 6 6 2 2 23 \n",
- "28195 2 5 1 1 3 \n",
- "28196 2 4 1 8 4 \n",
- "28197 4 7 2 4 23 "
+ "31758 1 5 1 2 12 \n",
+ "31759 1 5 1 3 12 \n",
+ "31760 1 5 1 4 12 \n",
+ "31761 1 2 2 6 12 \n",
+ "31762 4 7 2 4 23 "
]
},
"execution_count": 42,
diff --git a/output_data/table_ifs.csv b/output_data/table_ifs.csv
index 1cdcf4d..fd8b59e 100644
--- a/output_data/table_ifs.csv
+++ b/output_data/table_ifs.csv
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:8bbaf2f8cc96c80f4498318721f581fd1a654b4a2ef880b3428077c843f8aa6b
-size 944671
+oid sha256:d6306ed38c3ad657ce386288b6fd708203b8ca5ab6433e95e7d96b2c6da381f9
+size 1067034
diff --git a/tests/ifs-testing.csv b/tests/ifs-testing.csv
index 4ec8ba1..ba16fed 100644
--- a/tests/ifs-testing.csv
+++ b/tests/ifs-testing.csv
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:64bb138706bad1122c248f53ee1f52888ee1812b0358a8037820a3ac74812f08
-size 2759343
+oid sha256:75cf8847faef721f01f7f2f00dbfa6a275eaf958c41559ea3de772803e1205a6
+size 3090012