Skip to content

Commit

Permalink
[#4] Fix initial value
Browse files Browse the repository at this point in the history
  • Loading branch information
dedenbangkit committed Oct 4, 2024
1 parent 0d8d444 commit 339ca33
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 91 deletions.
175 changes: 88 additions & 87 deletions doc/table-proposal.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@
"metadata": {},
"outputs": [],
"source": [
"final_columns = ['indicator','year','country','unit','value_name','jmp_category','commitment','value']"
"final_columns = ['indicator','year','country','unit','value_name','jmp_category','commitment','value','base_value','initial_value']"
]
},
{
Expand Down Expand Up @@ -322,7 +322,7 @@
" \"17. Water Services, Access, percent of population (2nd Dimension = Basic + Safely Managed).csv\"\n",
" ]\n",
" },\n",
" \"milestone_years\": [2030, 2050]\n",
" \"milestone_years\": [2019, 2030, 2050] # 2019 for initial only but we remove them after get it\n",
"}"
]
},
Expand Down Expand Up @@ -610,23 +610,24 @@
" df_split = pd.DataFrame(df['value_type'].tolist(), index=df.index)\n",
" df_split.columns = ['value_name', 'jmp_category', 'commitment']\n",
" df_final = pd.concat([df, df_split], axis=1)\n",
" df_final['remove'] = df_final.apply(remove_unmatches_jmp_category, axis=1) \n",
" df_final = df_final[df_final['remove'] == False].reset_index(drop=True)\n",
" df_final['indicator'] = get_ifs_name(file)\n",
" df_final['jmp_category'] = df_final.apply(base_jmp_category, axis=1)\n",
" df_final['jmp_category'] = df_final['jmp_category'].replace({\"BS\": \"ALB\"})\n",
" df_final['remove'] = df_final.apply(remove_unmatches_jmp_category, axis=1) \n",
" df_final = df_final[df_final['remove'] == False].reset_index(drop=True)\n",
" df_final = df_final[final_columns]\n",
" # Add initial value column\n",
" df_final['initial_value'] = np.nan\n",
" df_final['base_value'] = np.nan\n",
" df_final['commitment'] = df_final.apply(modify_commitment_name, axis=1)\n",
" if \"Water Service\" in file or \"Sanitation Service\" in file: # Filter using the filename\n",
" df_final['initial_value'] = df_final.apply(lambda x: add_initial_value_for_wash(x, df_final), axis=1)\n",
" df_final['base_value'] = df_final.apply(lambda x: add_base_value(x, df_final), axis=1)\n",
" df_final = df_final[df_final['year'] != 2019].reset_index(drop=True) # remove after get initial value\n",
" print(f\"[WASH] : {file}\")\n",
" else:\n",
" df_final['base_value'] = df_final.apply(lambda x: add_base_value(x, df_final, is_wash_data = False), axis=1)\n",
" print(f\"[OTHER]: {file}\")\n",
" df_final = df_final[final_columns]\n",
" combined_df = pd.concat([combined_df.dropna(axis=1, how='all'), df_final], ignore_index=True)"
]
},
Expand Down Expand Up @@ -716,6 +717,19 @@
" <tr>\n",
" <th>0</th>\n",
" <td>Deaths by Category of Cause - Millions</td>\n",
" <td>2019</td>\n",
" <td>All Countries</td>\n",
" <td>Mil People</td>\n",
" <td>Base</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>1.33</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Deaths by Category of Cause - Millions</td>\n",
" <td>2030</td>\n",
" <td>All Countries</td>\n",
" <td>Mil People</td>\n",
Expand All @@ -727,7 +741,7 @@
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <td>Deaths by Category of Cause - Millions</td>\n",
" <td>2050</td>\n",
" <td>All Countries</td>\n",
Expand All @@ -740,7 +754,7 @@
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <td>Deaths by Category of Cause - Millions</td>\n",
" <td>2030</td>\n",
" <td>All Countries</td>\n",
Expand All @@ -753,7 +767,7 @@
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <td>Deaths by Category of Cause - Millions</td>\n",
" <td>2050</td>\n",
" <td>All Countries</td>\n",
Expand All @@ -765,37 +779,24 @@
" <td>1.143</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Deaths by Category of Cause - Millions</td>\n",
" <td>2030</td>\n",
" <td>All Countries</td>\n",
" <td>Mil People</td>\n",
" <td>FS</td>\n",
" <td>SM</td>\n",
" <td>Full Sanitation Access in 2030</td>\n",
" <td>0.955</td>\n",
" <td>1.237</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" indicator year country unit \\\n",
"0 Deaths by Category of Cause - Millions 2030 All Countries Mil People \n",
"1 Deaths by Category of Cause - Millions 2050 All Countries Mil People \n",
"2 Deaths by Category of Cause - Millions 2030 All Countries Mil People \n",
"3 Deaths by Category of Cause - Millions 2050 All Countries Mil People \n",
"4 Deaths by Category of Cause - Millions 2030 All Countries Mil People \n",
"0 Deaths by Category of Cause - Millions 2019 All Countries Mil People \n",
"1 Deaths by Category of Cause - Millions 2030 All Countries Mil People \n",
"2 Deaths by Category of Cause - Millions 2050 All Countries Mil People \n",
"3 Deaths by Category of Cause - Millions 2030 All Countries Mil People \n",
"4 Deaths by Category of Cause - Millions 2050 All Countries Mil People \n",
"\n",
" value_name jmp_category commitment value base_value \\\n",
"0 Base NaN None 1.237 NaN \n",
"1 Base NaN None 1.143 NaN \n",
"2 FS ALB Full Sanitation Access in 2030 1.075 1.237 \n",
"3 FS ALB Full Sanitation Access in 2050 1.068 1.143 \n",
"4 FS SM Full Sanitation Access in 2030 0.955 1.237 \n",
"0 Base NaN None 1.33 NaN \n",
"1 Base NaN None 1.237 NaN \n",
"2 Base NaN None 1.143 NaN \n",
"3 FS ALB Full Sanitation Access in 2030 1.075 1.237 \n",
"4 FS ALB Full Sanitation Access in 2050 1.068 1.143 \n",
"\n",
" initial_value \n",
"0 NaN \n",
Expand Down Expand Up @@ -1683,7 +1684,7 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>28196</th>\n",
" <th>31761</th>\n",
" <td>Malnourished Children, Headcount - Millions</td>\n",
" <td>2030</td>\n",
" <td>Zambia</td>\n",
Expand All @@ -1697,7 +1698,7 @@
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28197</th>\n",
" <th>31762</th>\n",
" <td>Malnourished Children, Headcount - Millions</td>\n",
" <td>2050</td>\n",
" <td>Zambia</td>\n",
Expand All @@ -1716,16 +1717,16 @@
],
"text/plain": [
" indicator year country unit \\\n",
"28196 Malnourished Children, Headcount - Millions 2030 Zambia Mil People \n",
"28197 Malnourished Children, Headcount - Millions 2050 Zambia Mil People \n",
"31761 Malnourished Children, Headcount - Millions 2030 Zambia Mil People \n",
"31762 Malnourished Children, Headcount - Millions 2050 Zambia Mil People \n",
"\n",
" value_name jmp_category commitment value base_value initial_value \\\n",
"28196 WSI SM 6x 0.208 0.302 NaN \n",
"28197 WSI SM 6x 0.143 0.178 NaN \n",
"31761 WSI SM 6x 0.208 0.302 NaN \n",
"31762 WSI SM 6x 0.143 0.178 NaN \n",
"\n",
" jmp_name_id \n",
"28196 3 \n",
"28197 3 "
"31761 3 \n",
"31762 3 "
]
},
"execution_count": 40,
Expand Down Expand Up @@ -1811,63 +1812,63 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>28193</th>\n",
" <th>31758</th>\n",
" <td>2050</td>\n",
" <td>0.0003</td>\n",
" <td>0.0004</td>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>10</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>0.057</td>\n",
" <td>0.035</td>\n",
" <td>0.001</td>\n",
" <td>2</td>\n",
" <td>8</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28194</th>\n",
" <td>2050</td>\n",
" <td>62.01</td>\n",
" <td>51.48</td>\n",
" <td>24.22</td>\n",
" <td>1</td>\n",
" <td>13</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>23</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28195</th>\n",
" <th>31759</th>\n",
" <td>2050</td>\n",
" <td>0.0004</td>\n",
" <td>0.0004</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>10</td>\n",
" <td>0.092</td>\n",
" <td>0.035</td>\n",
" <td>0.001</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28196</th>\n",
" <th>31760</th>\n",
" <td>2050</td>\n",
" <td>0.0008</td>\n",
" <td>0.0008</td>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>10</td>\n",
" <td>0.111</td>\n",
" <td>0.035</td>\n",
" <td>0.001</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31761</th>\n",
" <td>2050</td>\n",
" <td>0.427</td>\n",
" <td>0.152</td>\n",
" <td>0.008</td>\n",
" <td>2</td>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28197</th>\n",
" <th>31762</th>\n",
" <td>2050</td>\n",
" <td>0.143</td>\n",
" <td>0.178</td>\n",
Expand All @@ -1885,19 +1886,19 @@
"</div>"
],
"text/plain": [
" year value base_value initial_value jmp_name_id indicator_id \\\n",
"28193 2050 0.0003 0.0004 NaN 3 10 \n",
"28194 2050 62.01 51.48 24.22 1 13 \n",
"28195 2050 0.0004 0.0004 NaN 2 10 \n",
"28196 2050 0.0008 0.0008 NaN 3 10 \n",
"28197 2050 0.143 0.178 NaN 3 4 \n",
" year value base_value initial_value jmp_name_id indicator_id \\\n",
"31758 2050 0.057 0.035 0.001 2 8 \n",
"31759 2050 0.092 0.035 0.001 2 8 \n",
"31760 2050 0.111 0.035 0.001 2 8 \n",
"31761 2050 0.427 0.152 0.008 2 8 \n",
"31762 2050 0.143 0.178 NaN 3 4 \n",
"\n",
" unit_id value_name_id jmp_category_id commitment_id country_id \n",
"28193 2 4 2 8 3 \n",
"28194 6 6 2 2 23 \n",
"28195 2 5 1 1 3 \n",
"28196 2 4 1 8 4 \n",
"28197 4 7 2 4 23 "
"31758 1 5 1 2 12 \n",
"31759 1 5 1 3 12 \n",
"31760 1 5 1 4 12 \n",
"31761 1 2 2 6 12 \n",
"31762 4 7 2 4 23 "
]
},
"execution_count": 42,
Expand Down
4 changes: 2 additions & 2 deletions output_data/table_ifs.csv
Git LFS file not shown
4 changes: 2 additions & 2 deletions tests/ifs-testing.csv
Git LFS file not shown

0 comments on commit 339ca33

Please sign in to comment.