Skip to content

Commit

Permalink
Update One-Hot-Encoding.ipynb
Browse files Browse the repository at this point in the history
  • Loading branch information
adiag321 committed Sep 25, 2024
1 parent b4d6779 commit 2bb2aca
Showing 1 changed file with 39 additions and 30 deletions.
69 changes: 39 additions & 30 deletions 2 - Data Preprocessing/11 - Feature Encoding/One-Hot-Encoding.ipynb
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## One Hot Encoding --\n",
"\n",
"1. Using Pandas (get_dummies)\n",
"2. Using Sklearn library (from sklearn.preprocessing import OneHotEncoder)"
]
},
{
"cell_type": "code",
"execution_count": 1,
Expand Down Expand Up @@ -253,7 +263,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 5,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -395,7 +405,7 @@
"[1022 rows x 5 columns]"
]
},
"execution_count": 27,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -410,12 +420,12 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# One- Hot Encoding Using Pandas"
"# 1. One- Hot Encoding Using Pandas"
]
},
{
"cell_type": "code",
"execution_count": 72,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -427,12 +437,12 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### Before One hot Encoding"
"### A. Before One hot Encoding"
]
},
{
"cell_type": "code",
"execution_count": 73,
"execution_count": 7,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -517,7 +527,7 @@
"435 Missing IR2 TA 0.0 212000"
]
},
"execution_count": 73,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -529,7 +539,7 @@
},
{
"cell_type": "code",
"execution_count": 74,
"execution_count": 8,
"metadata": {},
"outputs": [
{
Expand All @@ -538,7 +548,7 @@
"Index(['Alley', 'LotShape', 'GarageCond', 'MasVnrArea', 'SalePrice'], dtype='object')"
]
},
"execution_count": 74,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -551,12 +561,12 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### After One-Hot encoding"
"### B. After One-Hot encoding"
]
},
{
"cell_type": "code",
"execution_count": 75,
"execution_count": 9,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -697,7 +707,7 @@
"435 False False True "
]
},
"execution_count": 75,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -709,7 +719,7 @@
},
{
"cell_type": "code",
"execution_count": 80,
"execution_count": 10,
"metadata": {},
"outputs": [
{
Expand All @@ -734,7 +744,7 @@
" 10 GarageCond_Po 1022 non-null int32\n",
" 11 GarageCond_TA 1022 non-null int32\n",
"dtypes: int32(12)\n",
"memory usage: 88.2 KB\n",
"memory usage: 55.9 KB\n",
"None\n"
]
},
Expand Down Expand Up @@ -987,7 +997,7 @@
"[1022 rows x 12 columns]"
]
},
"execution_count": 80,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1007,12 +1017,12 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# One Hot Encoing Using Sci-kit Learn"
"# 2. One Hot Encoing Using Sci-kit Learn"
]
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -1021,7 +1031,7 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 12,
"metadata": {},
"outputs": [
{
Expand All @@ -1035,13 +1045,13 @@
{
"data": {
"text/html": [
"<style>#sk-container-id-2 {color: black;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>OneHotEncoder(drop=&#x27;first&#x27;, sparse=False, sparse_output=False)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder(drop=&#x27;first&#x27;, sparse=False, sparse_output=False)</pre></div></div></div></div></div>"
"<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>OneHotEncoder(drop=&#x27;first&#x27;, sparse=False, sparse_output=False)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder(drop=&#x27;first&#x27;, sparse=False, sparse_output=False)</pre></div></div></div></div></div>"
],
"text/plain": [
"OneHotEncoder(drop='first', sparse=False, sparse_output=False)"
]
},
"execution_count": 38,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1052,14 +1062,13 @@
" sparse=False, # this will return numpy array else it will return sparse matix\n",
" handle_unknown=\"error\") #helps to deal with unknown values\n",
"\n",
"\n",
"#here we have to remove continious variable and then feed it to the encoder\n",
"encoder.fit(X_train.drop([\"MasVnrArea\",\"SalePrice\"],axis=1))"
]
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 13,
"metadata": {},
"outputs": [
{
Expand All @@ -1070,7 +1079,7 @@
" array(['Ex', 'Fa', 'Gd', 'Missing', 'Po', 'TA'], dtype=object)]"
]
},
"execution_count": 39,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1081,7 +1090,7 @@
},
{
"cell_type": "code",
"execution_count": 48,
"execution_count": 14,
"metadata": {},
"outputs": [
{
Expand All @@ -1096,7 +1105,7 @@
" [1., 0., 0., ..., 0., 0., 1.]])"
]
},
"execution_count": 48,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1109,7 +1118,7 @@
},
{
"cell_type": "code",
"execution_count": 84,
"execution_count": 15,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -1337,7 +1346,7 @@
"[1022 rows x 10 columns]"
]
},
"execution_count": 84,
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1351,7 +1360,7 @@
},
{
"cell_type": "code",
"execution_count": 85,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -1361,7 +1370,7 @@
},
{
"cell_type": "code",
"execution_count": 86,
"execution_count": 17,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -1502,7 +1511,7 @@
"4 1.0 350.0 250000.0 "
]
},
"execution_count": 86,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
Expand Down

0 comments on commit 2bb2aca

Please sign in to comment.