From 60107c6da8a35f0b0150718cab95ead8269309b7 Mon Sep 17 00:00:00 2001 From: trevorcampbell Date: Mon, 13 Nov 2023 22:12:55 +0000 Subject: [PATCH] deploy: 4e0d6a26d807bf8da6f7b22413cfe809ea29f868 --- pull313/_sources/classification2.md | 26 +- pull313/_sources/clustering.md | 8 +- pull313/_sources/inference.md | 4 +- pull313/_sources/regression1.md | 8 +- pull313/_sources/regression2.md | 8 +- pull313/classification1.html | 204 +++++++-------- pull313/classification2.html | 176 ++++++------- pull313/clustering.html | 182 ++++++------- pull313/inference.html | 182 ++++++------- pull313/intro.html | 70 ++--- pull313/regression1.html | 138 +++++----- pull313/regression2.html | 166 ++++++------ pull313/searchindex.js | 2 +- pull313/viz.html | 392 ++++++++++++++-------------- pull313/wrangling.html | 2 +- 15 files changed, 784 insertions(+), 784 deletions(-) diff --git a/pull313/_sources/classification2.md b/pull313/_sources/classification2.md index bc250318..8b387a81 100644 --- a/pull313/_sources/classification2.md +++ b/pull313/_sources/classification2.md @@ -280,7 +280,7 @@ we call the `seed` function from the `numpy` package, and pass it any integer as Below we use the seed number `1`. At that point, Python will keep track of the randomness that occurs throughout the code. For example, we can call the `sample` method -on the series of numbers, passing the argument `n = 10` to indicate that we want 10 samples. +on the series of numbers, passing the argument `n=10` to indicate that we want 10 samples. ```{code-cell} ipython3 import numpy as np @@ -290,7 +290,7 @@ np.random.seed(1) nums_0_to_9 = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) -random_numbers1 = nums_0_to_9.sample(n = 10).to_numpy() +random_numbers1 = nums_0_to_9.sample(n=10).to_numpy() random_numbers1 ``` You can see that `random_numbers1` is a list of 10 numbers @@ -299,7 +299,7 @@ we run the `sample` method again, we will get a fresh batch of 10 numbers that also look random. ```{code-cell} ipython3 -random_numbers2 = nums_0_to_9.sample(n = 10).to_numpy() +random_numbers2 = nums_0_to_9.sample(n=10).to_numpy() random_numbers2 ``` @@ -309,12 +309,12 @@ as before---and then call the `sample` method again. ```{code-cell} ipython3 np.random.seed(1) -random_numbers1_again = nums_0_to_9.sample(n = 10).to_numpy() +random_numbers1_again = nums_0_to_9.sample(n=10).to_numpy() random_numbers1_again ``` ```{code-cell} ipython3 -random_numbers2_again = nums_0_to_9.sample(n = 10).to_numpy() +random_numbers2_again = nums_0_to_9.sample(n=10).to_numpy() random_numbers2_again ``` @@ -326,12 +326,12 @@ obtain a different sequence of random numbers. ```{code-cell} ipython3 np.random.seed(4235) -random_numbers = nums_0_to_9.sample(n = 10).to_numpy() +random_numbers = nums_0_to_9.sample(n=10).to_numpy() random_numbers ``` ```{code-cell} ipython3 -random_numbers = nums_0_to_9.sample(n = 10).to_numpy() +random_numbers = nums_0_to_9.sample(n=10).to_numpy() random_numbers ``` @@ -378,15 +378,15 @@ functions. Those functions will then use your `RandomState` to generate random n `numpy`'s default generator. For example, we can reproduce our earlier example by using a `RandomState` object with the `seed` value set to 1; we get the same lists of numbers once again. ```{code} -rnd = np.random.RandomState(seed = 1) -random_numbers1_third = nums_0_to_9.sample(n = 10, random_state = rnd).to_numpy() +rnd = np.random.RandomState(seed=1) +random_numbers1_third = nums_0_to_9.sample(n=10, random_state=rnd).to_numpy() random_numbers1_third ``` ```{code} array([2, 9, 6, 4, 0, 3, 1, 7, 8, 5]) ``` ```{code} -random_numbers2_third = nums_0_to_9.sample(n = 10, random_state = rnd).to_numpy() +random_numbers2_third = nums_0_to_9.sample(n=10, random_state=rnd).to_numpy() random_numbers2_third ``` ```{code} @@ -540,8 +540,8 @@ cancer_train["Class"].value_counts(normalize=True) ```{code-cell} ipython3 :tags: [remove-cell] -glue("cancer_train_b_prop", "{:0.0f}".format(cancer_train["Class"].value_counts(normalize = True)["Benign"]*100)) -glue("cancer_train_m_prop", "{:0.0f}".format(cancer_train["Class"].value_counts(normalize = True)["Malignant"]*100)) +glue("cancer_train_b_prop", "{:0.0f}".format(cancer_train["Class"].value_counts(normalize=True)["Benign"]*100)) +glue("cancer_train_m_prop", "{:0.0f}".format(cancer_train["Class"].value_counts(normalize=True)["Malignant"]*100)) ``` ### Preprocess the data @@ -1620,7 +1620,7 @@ for i in range(len(ks)): cancer_tune_pipe = make_pipeline(cancer_preprocessor, KNeighborsClassifier()) param_grid = { "kneighborsclassifier__n_neighbors": range(1, 21), - } ## double check: in R textbook, it is tune_grid(..., grid = 20), so I guess it matches RandomizedSearchCV + } ## double check: in R textbook, it is tune_grid(..., grid=20), so I guess it matches RandomizedSearchCV ## instead of GridSeachCV? # param_grid_rand = { # "kneighborsclassifier__n_neighbors": range(1, 100), diff --git a/pull313/_sources/clustering.md b/pull313/_sources/clustering.md index 25de3c0c..dc1c6759 100644 --- a/pull313/_sources/clustering.md +++ b/pull313/_sources/clustering.md @@ -182,10 +182,10 @@ in the clustering pipeline. ```{code-cell} ipython3 :tags: [remove-cell] penguins_standardized = penguins.assign( - bill_length_standardized = (penguins["bill_length_mm"] - penguins["bill_length_mm"].mean())/penguins["bill_length_mm"].std(), - flipper_length_standardized = (penguins["flipper_length_mm"] - penguins["flipper_length_mm"].mean())/penguins["flipper_length_mm"].std() + bill_length_standardized=(penguins["bill_length_mm"] - penguins["bill_length_mm"].mean())/penguins["bill_length_mm"].std(), + flipper_length_standardized=(penguins["flipper_length_mm"] - penguins["flipper_length_mm"].mean())/penguins["flipper_length_mm"].std() ).drop( - columns = ["bill_length_mm", "flipper_length_mm"] + columns=["bill_length_mm", "flipper_length_mm"] ) ``` @@ -261,7 +261,7 @@ kmeans = KMeans(n_clusters=3) penguin_clust = kmeans.fit(penguins_standardized) -penguins_clustered = penguins_standardized.assign(cluster = penguin_clust.labels_) +penguins_clustered = penguins_standardized.assign(cluster=penguin_clust.labels_) colored_scatter_plot = alt.Chart(penguins_clustered).mark_circle().encode( x=alt.X("flipper_length_standardized", title="Flipper Length (standardized)"), diff --git a/pull313/_sources/inference.md b/pull313/_sources/inference.md index bdb9d4b7..6e89cc1d 100644 --- a/pull313/_sources/inference.md +++ b/pull313/_sources/inference.md @@ -716,9 +716,9 @@ glue( x="mean(price)" ), base.mark_text(align="left", color="#f58518", size=12, fontWeight="bold", dx=10).transform_aggregate( - mean_price = "mean(price)", + mean_price="mean(price)", ).transform_calculate( - label = "'Mean = ' + round(datum.mean_price * 10) / 10" + label="'Mean = ' + round(datum.mean_price * 10) / 10" ).encode( x=alt.X("mean_price:Q", title="Sample mean price per night (dollars)"), y=alt.value(10), diff --git a/pull313/_sources/regression1.md b/pull313/_sources/regression1.md index e20fe67c..973d14bd 100644 --- a/pull313/_sources/regression1.md +++ b/pull313/_sources/regression1.md @@ -833,8 +833,8 @@ from sklearn.metrics import mean_squared_error sacramento_test["predicted"] = sacr_gridsearch.predict(sacramento_test) RMSPE = mean_squared_error( - y_true = sacramento_test["price"], - y_pred = sacramento_test["predicted"] + y_true=sacramento_test["price"], + y_pred=sacramento_test["predicted"] )**(1/2) RMSPE ``` @@ -1066,8 +1066,8 @@ to compute the RMSPE. ```{code-cell} ipython3 sacramento_test["predicted"] = sacr_gridsearch.predict(sacramento_test) RMSPE_mult = mean_squared_error( - y_true = sacramento_test["price"], - y_pred = sacramento_test["predicted"] + y_true=sacramento_test["price"], + y_pred=sacramento_test["predicted"] )**(1/2) RMSPE_mult diff --git a/pull313/_sources/regression2.md b/pull313/_sources/regression2.md index edca8052..60db7cd4 100644 --- a/pull313/_sources/regression2.md +++ b/pull313/_sources/regression2.md @@ -440,8 +440,8 @@ sacramento_test["predicted"] = lm.predict(sacramento_test[["sqft"]]) # calculate RMSPE RMSPE = mean_squared_error( - y_true = sacramento_test["price"], - y_pred = sacramento_test["predicted"] + y_true=sacramento_test["price"], + y_pred=sacramento_test["predicted"] )**(1/2) RMSPE @@ -734,8 +734,8 @@ Finally, we make predictions on the test data set to assess the quality of our m sacramento_test["predicted"] = mlm.predict(sacramento_test[["sqft","beds"]]) lm_mult_test_RMSPE = mean_squared_error( - y_true = sacramento_test["price"], - y_pred = sacramento_test["predicted"] + y_true=sacramento_test["price"], + y_pred=sacramento_test["predicted"] )**(1/2) lm_mult_test_RMSPE ``` diff --git a/pull313/classification1.html b/pull313/classification1.html index 857dc6fc..e21b08de 100644 --- a/pull313/classification1.html +++ b/pull313/classification1.html @@ -872,23 +872,23 @@

5.4.3. Exploring the cancer data
-
+
@@ -982,23 +982,23 @@

5.5. Classification with
-
+

Fig. 5.2 Scatter plot of concavity versus perimeter with new observation represented as a red diamond.#

@@ -1057,23 +1057,23 @@

5.5. Classification with
-
+

Fig. 5.3 Scatter plot of concavity versus perimeter. The new observation is represented as a red diamond with a line to the one nearest neighbor, which has a malignant @@ -1136,23 +1136,23 @@

5.5. Classification with
-
+

Fig. 5.4 Scatter plot of concavity versus perimeter. The new observation is represented as a red diamond with a line to the one nearest neighbor, which has a benign @@ -1215,23 +1215,23 @@

5.5. Classification with
-
+

Fig. 5.5 Scatter plot of concavity versus perimeter with three nearest neighbors.#

@@ -1311,23 +1311,23 @@

5.5.1. Distance between points
-
+

Fig. 5.6 Scatter plot of concavity versus perimeter with new observation represented as a red diamond.#

@@ -1507,23 +1507,23 @@

5.5.1. Distance between points
-
+

Fig. 5.7 Scatter plot of concavity versus perimeter with 5 nearest neighbors circled.#

@@ -1719,9 +1719,9 @@

5.5.2. More than two explanatory variabl }); } -

Fig. 5.9 Comparison of K = 3 nearest neighbors with standardized and unstandardized data.#

@@ -2512,23 +2512,23 @@

5.7.1. Centering and scaling
-
+

Fig. 5.10 Close-up of three nearest neighbors for unstandardized data.#

@@ -2625,23 +2625,23 @@

5.7.2. Balancing
-
+

@@ -2722,23 +2722,23 @@

5.7.2. Balancing
-
+

Fig. 5.12 Imbalanced data with 7 nearest neighbors to a new observation highlighted.#

@@ -2796,23 +2796,23 @@

5.7.2. Balancing
-
+

Fig. 5.13 Imbalanced data with background color indicating the decision of the classifier and the points represent the labeled data.#

@@ -2906,23 +2906,23 @@

5.7.2. Balancing
-
+

Fig. 5.14 Upsampled data with background color indicating the decision of the classifier.#

@@ -3445,23 +3445,23 @@

5.7.3. Missing data
-
+
diff --git a/pull313/classification2.html b/pull313/classification2.html index 31f0d309..34749c61 100644 --- a/pull313/classification2.html +++ b/pull313/classification2.html @@ -599,7 +599,7 @@

6.3. Evaluating performance1. At that point, Python will keep track of the randomness that occurs throughout the code. For example, we can call the sample method -on the series of numbers, passing the argument n = 10 to indicate that we want 10 samples.

+on the series of numbers, passing the argument n=10 to indicate that we want 10 samples.

@@ -626,7 +626,7 @@

6.3. Evaluating performance
-
random_numbers2 = nums_0_to_9.sample(n = 10).to_numpy()
+
random_numbers2 = nums_0_to_9.sample(n=10).to_numpy()
 random_numbers2
 
@@ -643,7 +643,7 @@

6.3. Evaluating performance
np.random.seed(1)
-random_numbers1_again = nums_0_to_9.sample(n = 10).to_numpy()
+random_numbers1_again = nums_0_to_9.sample(n=10).to_numpy()
 random_numbers1_again
 
@@ -656,7 +656,7 @@

6.3. Evaluating performance
-
random_numbers2_again = nums_0_to_9.sample(n = 10).to_numpy()
+
random_numbers2_again = nums_0_to_9.sample(n=10).to_numpy()
 random_numbers2_again
 
@@ -675,7 +675,7 @@

6.3. Evaluating performance
np.random.seed(4235)
-random_numbers = nums_0_to_9.sample(n = 10).to_numpy()
+random_numbers = nums_0_to_9.sample(n=10).to_numpy()
 random_numbers
 
@@ -688,7 +688,7 @@

6.3. Evaluating performance
-
random_numbers = nums_0_to_9.sample(n = 10).to_numpy()
+
random_numbers = nums_0_to_9.sample(n=10).to_numpy()
 random_numbers
 
@@ -738,15 +738,15 @@

6.3. Evaluating performanceRandomState to generate random numbers instead of numpy’s default generator. For example, we can reproduce our earlier example by using a RandomState object with the seed value set to 1; we get the same lists of numbers once again.

-
rnd = np.random.RandomState(seed = 1)
-random_numbers1_third = nums_0_to_9.sample(n = 10, random_state = rnd).to_numpy()
+
rnd = np.random.RandomState(seed=1)
+random_numbers1_third = nums_0_to_9.sample(n=10, random_state=rnd).to_numpy()
 random_numbers1_third
 
array([2, 9, 6, 4, 0, 3, 1, 7, 8, 5])
 
-
random_numbers2_third = nums_0_to_9.sample(n = 10, random_state = rnd).to_numpy()
+
random_numbers2_third = nums_0_to_9.sample(n=10, random_state=rnd).to_numpy()
 random_numbers2_third
 
@@ -804,23 +804,23 @@

6.5. Evaluating performance with
-
+

@@ -1523,32 +1523,32 @@

6.6.1. Cross-validation6.6.1. Cross-validation6.6.1. Cross-validation6.6.1. Cross-validation6.6.2. Parameter value selection
-
+

Fig. 6.5 Plot of estimated accuracy versus the number of neighbors.#

@@ -2256,23 +2256,23 @@

6.6.3. Under/Overfitting
-
+

Fig. 6.6 Plot of accuracy estimate versus number of neighbors for many K values.#

@@ -2347,23 +2347,23 @@

6.6.3. Under/Overfitting
-
+

Fig. 6.7 Effect of K in overfitting and underfitting.#

@@ -2648,23 +2648,23 @@

6.8.1. The effect of irrelevant predicto
-
+

Fig. 6.9 Effect of inclusion of irrelevant predictors.#

@@ -2727,23 +2727,23 @@

6.8.1. The effect of irrelevant predicto
-
+

Fig. 6.10 Tuned number of neighbors for varying number of irrelevant predictors.#

@@ -2797,23 +2797,23 @@

6.8.1. The effect of irrelevant predicto
-
+

Fig. 6.11 Accuracy versus number of irrelevant predictors for tuned and untuned number of neighbors.#

@@ -3276,23 +3276,23 @@

6.8.3. Forward selection in
-
+

Fig. 6.12 Estimated accuracy versus the number of predictors for the sequence of models built using forward selection.#

diff --git a/pull313/clustering.html b/pull313/clustering.html index 881bd9cc..1c84d9c1 100644 --- a/pull313/clustering.html +++ b/pull313/clustering.html @@ -756,23 +756,23 @@

9.4. An illustrative example
-
+

Fig. 9.2 Scatter plot of standardized bill length versus standardized flipper length.#

@@ -850,23 +850,23 @@

9.4. An illustrative example
-
+

Fig. 9.3 Scatter plot of standardized bill length versus standardized flipper length with colored groups.#

@@ -959,23 +959,23 @@

9.5.1. Measuring cluster quality
-
+

Fig. 9.4 Cluster 0 from the penguins_standardized data set example. Observations are in blue, with the cluster center highlighted in orange.#

@@ -1042,23 +1042,23 @@

9.5.1. Measuring cluster quality
-
+

Fig. 9.5 Cluster 0 from the penguins_standardized data set example. Observations are in blue, with the cluster center highlighted in orange. The distances from the observations to the cluster center are represented as black lines.#

@@ -1121,23 +1121,23 @@

9.5.1. Measuring cluster quality
-
+

Fig. 9.6 All clusters from the penguins_standardized data set example. Observations are in blue, orange, and red with the cluster center highlighted in orange. The distances from the observations to each of the respective cluster centers are represented as black lines.#

@@ -1205,23 +1205,23 @@

9.5.2. The clustering algorithm
-
+

Fig. 9.7 Random initialization of labels. Each cluster is depicted as a different color and shape.#

@@ -1287,23 +1287,23 @@

9.5.2. The clustering algorithm
-
+

Fig. 9.8 First three iterations of K-means clustering on the penguins_standardized example data set. Each pair of plots corresponds to an iteration. Within the pair, the first plot depicts the center update, and the second plot depicts the reassignment of data to clusters. Cluster centers are indicated by larger points that are outlined in black.#

@@ -1373,23 +1373,23 @@

9.5.3. Random restarts
-
+

Fig. 9.9 Random initialization of labels.#

@@ -1444,23 +1444,23 @@

9.5.3. Random restarts
-
+

Fig. 9.10 First four iterations of K-means clustering on the penguins_standardized example data set with a poor random initialization. Each pair of plots corresponds to an iteration. Within the pair, the first plot depicts the center update, and the second plot depicts the reassignment of data to clusters. Cluster centers are indicated by larger points that are outlined in black.#

@@ -1530,23 +1530,23 @@

9.5.4. Choosing K
-
+

Fig. 9.11 Clustering of the penguin data for K clusters ranging from 1 to 9. Cluster centers are indicated by larger points that are outlined in black.#

@@ -1606,23 +1606,23 @@

9.5.4. Choosing K
-
+

Fig. 9.12 Total WSSD for K clusters ranging from 1 to 9.#

@@ -1934,23 +1934,23 @@

9.6. K-means in Python
-
+

Fig. 9.13 The data colored by the cluster assignments returned by K-means.#

@@ -2179,23 +2179,23 @@

9.6. K-means in Python
-
+

Fig. 9.14 A plot showing the total WSSD versus the number of clusters.#

diff --git a/pull313/inference.html b/pull313/inference.html index 41c5be6f..5b7abb7d 100644 --- a/pull313/inference.html +++ b/pull313/inference.html @@ -1225,23 +1225,23 @@

10.4.1. Sampling distributions for propo
-
+

Fig. 10.2 Sampling distribution of the sample proportion for sample size 40.#

@@ -1349,23 +1349,23 @@

10.4.2. Sampling distributions for means
-
+

Fig. 10.3 Population distribution of price per night (dollars) for all Airbnb listings in Vancouver, Canada.#

@@ -1471,23 +1471,23 @@

10.4.2. Sampling distributions for means
-
+

Fig. 10.4 Distribution of price per night (dollars) for sample of 40 Airbnb listings.#

@@ -1686,23 +1686,23 @@

10.4.2. Sampling distributions for means
-
+

Fig. 10.5 Sampling distribution of the sample means for sample size of 40.#

@@ -1782,23 +1782,23 @@

10.4.2. Sampling distributions for means
-
+

@@ -1864,23 +1864,23 @@

10.4.2. Sampling distributions for means
-
+

@@ -2017,23 +2017,23 @@

10.5.1. Overview
-
+

Fig. 10.8 Comparison of samples of different sizes from the population.#

@@ -2622,23 +2622,23 @@

10.5.2. Bootstrapping in Python
-
+

@@ -2722,23 +2722,23 @@

10.5.2. Bootstrapping in Python
-
+

@@ -3018,23 +3018,23 @@

10.5.2. Bootstrapping in Python
-
+

@@ -3291,23 +3291,23 @@

10.5.2. Bootstrapping in Python
-
+

@@ -3368,23 +3368,23 @@

10.5.2. Bootstrapping in Python
-
+

@@ -3544,23 +3544,23 @@

10.5.3. Using the bootstrap to calculate
-
+

diff --git a/pull313/intro.html b/pull313/intro.html index f7441397..1fdad888 100644 --- a/pull313/intro.html +++ b/pull313/intro.html @@ -2047,23 +2047,23 @@

1.11.1. Using
-
+

Fig. 1.8 Bar plot of the ten Aboriginal languages most often reported by Canadian residents as their mother tongue#

@@ -2151,23 +2151,23 @@

1.11.1. Using
-
+

Fig. 1.9 Bar plot of the ten Aboriginal languages most often reported by Canadian residents as their mother tongue with x and y labels. Note that this visualization is not done yet; there are still improvements to be made.#

@@ -2237,23 +2237,23 @@

1.11.1. Using
-
+

Fig. 1.10 Horizontal bar plot of the ten Aboriginal languages most often reported by Canadian residents as their mother tongue. There are no more serious issues with this visualization, but it could be refined further.#

@@ -2325,23 +2325,23 @@

1.11.1. Using
-
+

Fig. 1.11 Bar plot of the ten Aboriginal languages most often reported by Canadian residents as their mother tongue with bars reordered.#

@@ -2451,23 +2451,23 @@

1.11.3. Putting it all together
-
+

Fig. 1.12 Bar plot of the ten Aboriginal languages most often reported by Canadian residents as their mother tongue#

diff --git a/pull313/regression1.html b/pull313/regression1.html index 6e210d22..08de1fda 100644 --- a/pull313/regression1.html +++ b/pull313/regression1.html @@ -678,23 +678,23 @@

7.4. Exploring a data set
-
+

Fig. 7.1 Scatter plot of price (USD) versus house size (square feet).#

@@ -807,23 +807,23 @@

7.5. K-nearest neighbors regression
-
+

Fig. 7.2 Scatter plot of price (USD) versus house size (square feet) with vertical line indicating 2,000 square feet on x-axis.#

@@ -992,23 +992,23 @@

7.5. K-nearest neighbors regression
-
+

Fig. 7.3 Scatter plot of price (USD) versus house size (square feet) with lines to 5 nearest neighbors.#

@@ -1082,23 +1082,23 @@

7.5. K-nearest neighbors regression
-
+

Fig. 7.4 Scatter plot of price (USD) versus house size (square feet) with predicted price for a 2,000 square-foot house based on 5 nearest neighbors represented as a red dot.#

@@ -1221,23 +1221,23 @@

7.6. Training, evaluating, and tuning th
-
+

Fig. 7.5 Scatter plot of price (USD) versus house size (square feet) with example predictions (blue line) and the error in those predictions compared with true response values for three selected observations (vertical red lines).#

@@ -1612,23 +1612,23 @@

7.6. Training, evaluating, and tuning th
-
+

Fig. 7.6 Effect of the number of neighbors on the RMSPE.#

@@ -1710,23 +1710,23 @@

7.7. Underfitting and overfitting
-
+

Fig. 7.7 Predicted values for house price (represented as a orange line) from KNN regression models for six different values for \(K\).#

@@ -1839,8 +1839,8 @@

7.8. Evaluating on the test setsacramento_test["predicted"] = sacr_gridsearch.predict(sacramento_test) RMSPE = mean_squared_error( - y_true = sacramento_test["price"], - y_pred = sacramento_test["predicted"] + y_true=sacramento_test["price"], + y_pred=sacramento_test["predicted"] )**(1/2) RMSPE

@@ -1920,23 +1920,23 @@

7.8. Evaluating on the test set
-
+

Fig. 7.8 Predicted values of house price (orange line) for the final KNN regression model.#

@@ -2027,23 +2027,23 @@

7.9. Multivariable KNN regression
-
+

Fig. 7.9 Scatter plot of the sale price of houses versus the number of bedrooms.#

@@ -2207,8 +2207,8 @@

7.9. Multivariable KNN regression

Fig. 8.1 Scatter plot of sale price versus size with line of best fit for subset of the Sacramento housing data.#

@@ -529,23 +529,23 @@

8.3. Simple linear regression
-
+

Fig. 8.2 Scatter plot of sale price versus size with line of best fit and a red dot at the predicted sale price for a 2,000 square-foot home.#

@@ -605,23 +605,23 @@

8.3. Simple linear regression
-
+

Fig. 8.3 Scatter plot of sale price versus size with many possible lines that could be drawn through the data points.#

@@ -681,23 +681,23 @@

8.3. Simple linear regression
-
+

Fig. 8.4 Scatter plot of sale price versus size with red lines denoting the vertical distances between the predicted values and the observed data points.#

@@ -868,8 +868,8 @@

8.4. Linear regression in Python# calculate RMSPE RMSPE = mean_squared_error( - y_true = sacramento_test["price"], - y_pred = sacramento_test["predicted"] + y_true=sacramento_test["price"], + y_pred=sacramento_test["predicted"] )**(1/2) RMSPE @@ -927,23 +927,23 @@

8.4. Linear regression in Python
-
+

Fig. 8.5 Scatter plot of sale price versus size with line of best fit for the full Sacramento housing data.#

@@ -1006,23 +1006,23 @@

8.5. Comparing simple linear and KNN reg
-
+

Fig. 8.6 Comparison of simple linear regression and KNN regression.#

@@ -1154,8 +1154,8 @@

8.6. Multivariable linear regression
sacramento_test["predicted"] = mlm.predict(sacramento_test[["sqft","beds"]])
 
 lm_mult_test_RMSPE = mean_squared_error(
-    y_true = sacramento_test["price"], 
-    y_pred = sacramento_test["predicted"]
+    y_true=sacramento_test["price"],
+    y_pred=sacramento_test["predicted"]
 )**(1/2)
 lm_mult_test_RMSPE
 
@@ -1193,9 +1193,9 @@

8.6. Multivariable linear regression -

Fig. 8.8 Scatter plot of a subset of the data, with outlier highlighted in red.#

@@ -1420,23 +1420,23 @@

8.7.1. Outliers
-
+

Fig. 8.9 Scatter plot of the full data, with outlier highlighted in red.#

@@ -1502,23 +1502,23 @@

8.7.2. Multicollinearity
-
+

Fig. 8.10 Scatter plot of house size (in square feet) measured by person 1 versus house size (in square feet) measured by person 2.#

@@ -1691,23 +1691,23 @@

8.8. Designing new predictors
-
+

Fig. 8.11 Example of a data set with a nonlinear relationship between the predictor and the response.#

@@ -1779,23 +1779,23 @@

8.8. Designing new predictors
-
+

Fig. 8.12 Relationship between the transformed predictor and the response.#

diff --git a/pull313/searchindex.js b/pull313/searchindex.js index 807c4818..9fc261fc 100644 --- a/pull313/searchindex.js +++ b/pull313/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["acknowledgements", "authors", "classification1", "classification2", "clustering", "index", "inference", "intro", "jupyter", "preface-text", "reading", "regression1", "regression2", "setup", "version-control", "viz", "wrangling"], "filenames": ["acknowledgements.md", "authors.md", "classification1.md", "classification2.md", "clustering.md", "index.md", "inference.md", "intro.md", "jupyter.md", "preface-text.md", "reading.md", "regression1.md", "regression2.md", "setup.md", "version-control.md", "viz.md", "wrangling.md"], "titles": ["Acknowledgments", "About the authors", "5. Classification I: training & predicting", "6. Classification II: evaluation & tuning", "9. Clustering", "Data Science", "10. Statistical inference", "1. Python and Pandas", "11. Combining code and text with Jupyter", "Preface", "2. Reading in data locally and from the web", "7. Regression I: K-nearest neighbors", "8. Regression II: linear regression", "13. Setting up your computer", "12. Collaboration with version control", "4. Effective data visualization", "3. Cleaning and wrangling data"], "terms": {"we": [0, 2, 3, 4, 7, 8, 9, 11, 12, 13, 14, 15, 16], "d": [0, 1, 6, 7, 10, 15], "like": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "thank": 0, "everyon": 0, "ha": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "contribut": [0, 1, 14], "develop": [0, 1, 3, 6, 7, 8, 9, 10, 14], "data": [0, 1, 4, 6, 9, 12, 13, 14], "scienc": [0, 1, 2, 3, 7, 8, 9, 13, 14, 16], "A": [0, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "first": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "introduct": [0, 3, 4, 6, 7, 9, 10, 12], "thi": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15], "an": [0, 1, 2, 6, 7, 8, 9, 11, 12, 13, 14, 16], "open": [0, 1, 5, 7, 8, 10, 13, 14, 15], "sourc": [0, 1, 10, 15], "textbook": [0, 1, 2, 3, 5, 9, 10, 12, 14, 16], "began": [0, 10], "collect": [0, 2, 3, 4, 6, 7, 10, 15, 16], "cours": [0, 1, 3, 4, 6, 7, 8, 9, 10, 12, 16], "read": [0, 2, 3, 5, 6, 7, 8, 9, 11, 12, 14, 15, 16], "dsci": [0, 10, 13], "100": [0, 2, 3, 6, 7, 10, 11, 12, 13, 15, 16], "new": [0, 3, 4, 6, 7, 10, 11, 13, 14, 15, 16], "introductori": [0, 3, 6], "univers": [0, 1, 6, 10], "british": [0, 1, 6, 7, 10], "columbia": [0, 1, 6, 10], "ubc": [0, 1, 10, 13], "sever": [0, 1, 2, 6, 10, 14, 15, 16], "faculti": 0, "member": [0, 2, 14], "depart": [0, 1], "statist": [0, 1, 2, 3, 4, 7, 10, 11, 12, 15], "were": [0, 2, 3, 6, 7, 8, 10, 12, 14, 15, 16], "pivot": 0, "shape": [0, 2, 3, 4, 6, 7, 10, 12, 15, 16], "direct": [0, 2, 10, 15], "greatli": [0, 16], "broad": [0, 15], "structur": [0, 3, 4, 7, 10, 15], "list": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15], "topic": [0, 3, 4, 8, 12, 14], "book": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "would": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "especi": [0, 2, 7, 10, 13, 14, 15], "mat\u00eda": 0, "salib\u00edan": 0, "barrera": 0, "hi": [0, 1], "mentorship": 0, "dure": [0, 1, 3, 7, 11, 14, 16], "initi": [0, 1, 2, 4, 7, 10, 11, 12, 14, 15], "roll": 0, "out": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "both": [0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "door": 0, "wa": [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "alwai": [0, 2, 3, 4, 8, 10, 11, 12, 13, 15, 16], "when": [0, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 16], "need": [0, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "chat": 0, "about": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "how": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "best": [0, 3, 6, 7, 10, 11, 12, 14, 15], "introduc": [0, 6, 7, 12, 14, 15, 16], "teach": [0, 1, 2, 7], "our": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "year": [0, 2, 7, 10, 15, 16], "student": [0, 1, 6], "also": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "gabriela": 0, "cohen": 0, "freue": 0, "her": [0, 1], "561": 0, "regress": [0, 2, 3, 4, 7, 9], "i": [0, 3, 4, 6, 7, 8, 9, 10, 12, 13, 15, 16], "materi": [0, 2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "from": [0, 3, 4, 6, 7, 8, 9, 11, 12, 13, 15], "master": [0, 1], "program": [0, 1, 3, 7, 8, 9, 10, 13, 15], "some": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "linear": [0, 3, 8, 11, 15], "figur": [0, 2, 7, 16], "inspir": [0, 10], "all": [0, 2, 3, 4, 6, 8, 9, 11, 12, 13, 14, 15, 16], "those": [0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "who": [0, 2, 3, 6, 7, 8, 10, 14, 15, 16], "process": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "publish": [0, 10, 15], "In": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "particular": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "review": [0, 10, 14], "feedback": [0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "suggest": [0, 2, 3, 6, 7, 11, 12, 15, 16], "rohan": 0, "alexand": 0, "isabella": 0, "ghement": 0, "virgilio": 0, "g\u00f3mez": 0, "rubio": 0, "albert": [0, 15], "kim": 0, "adam": 0, "loi": 0, "maria": 0, "prokofieva": 0, "emili": 0, "rieder": 0, "greg": [0, 14], "wilson": [0, 7, 14], "The": [0, 1, 6, 7, 10, 13, 14, 15, 16], "improv": [0, 2, 3, 4, 6, 7, 11, 12, 14, 15], "substanti": [0, 3, 11], "insight": [0, 4, 9, 15], "give": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16], "special": [0, 2, 6, 7, 10, 14, 15, 16], "jim": 0, "zidek": 0, "support": [0, 2, 3, 7, 13, 15, 16], "encourag": [0, 16], "throughout": [0, 2, 3, 7, 9, 14, 16], "roger": [0, 7], "peng": [0, 7], "gracious": 0, "offer": [0, 3, 6, 10, 11, 12, 14], "write": [0, 2, 7, 8, 12, 14, 16], "foreword": 0, "final": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "ow": 0, "debt": 0, "gratitud": 0, "over": [0, 1, 2, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15, 16], "past": [0, 2, 3, 4, 10, 11, 12, 13, 14, 15], "few": [0, 2, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15, 16], "thei": [0, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "provid": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "invalu": 0, "worksheet": [0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "found": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "bug": [0, 14, 16], "us": [0, 1, 2, 3, 4, 8, 9, 11, 12, 13, 15], "stood": 0, "veri": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "patient": [0, 2, 3], "class": [0, 2, 3, 7, 10, 15, 16], "while": [0, 2, 3, 4, 7, 9, 10, 12, 15, 16], "frantic": 0, "fix": [0, 2, 3, 6, 8, 11, 14, 15, 16], "brought": 0, "level": [0, 3, 4, 6, 7, 9, 12, 15], "enthusiasm": 0, "sustain": 0, "hard": [0, 7, 10, 15, 16], "work": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 15, 16], "creat": [0, 1, 2, 4, 6, 9, 10, 11, 12, 16], "interact": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "them": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "taught": [0, 2], "learn": [0, 1, 9], "reflect": [0, 1, 15], "content": [0, 1, 2, 5, 10, 14, 16], "translat": [0, 10], "origin": [0, 1, 2, 3, 4, 6, 7, 10, 11, 12, 15, 16], "which": [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "focus": [0, 1, 2, 3, 11], "r": [0, 1, 3, 4, 5, 6, 7, 10, 15], "languag": [0, 1, 2, 3, 6, 8, 9, 10, 11, 13, 16], "ar": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "navya": 0, "dahiya": 0, "gloria": 0, "ye": [0, 2], "complet": [0, 1, 3, 6, 7, 8, 10, 11, 13, 14], "round": [0, 3, 6], "philip": 0, "austin": 0, "leadership": 0, "guidanc": [0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "gratefulli": 0, "educ": [0, 1, 2], "resourc": [0, 1, 2, 11], "fund": 0, "exercis": [0, 9, 13], "version": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16], "tiffani": [1, 5, 7, 15], "timber": [1, 5, 7, 15], "trevor": [1, 3, 4, 5, 12], "campbel": [1, 5], "melissa": [1, 5], "lee": [1, 5, 15], "adapt": [1, 15], "python": [1, 2, 3, 9, 11, 13, 14, 15], "joel": [1, 5], "ostblom": [1, 5], "lindsei": [1, 5], "heagi": [1, 5], "associ": [1, 6, 7, 9, 10, 14, 16], "professor": 1, "research": [1, 4, 15], "autom": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "scalabl": 1, "bayesian": 1, "infer": 1, "algorithm": [1, 3, 11, 12, 15], "nonparametr": [1, 2, 11], "stream": 1, "theori": [1, 2, 4, 6, 11], "he": 1, "previous": [1, 2, 6, 7, 10, 11, 15, 16], "postdoctor": 1, "advis": [1, 10, 11, 15], "tamara": 1, "broderick": 1, "comput": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "artifici": 1, "intellig": 1, "laboratori": [1, 4], "csail": 1, "institut": [1, 3, 15], "system": [1, 10, 13, 14], "societi": 1, "idss": 1, "mit": 1, "ph": 1, "candid": [1, 3, 12], "under": [1, 5, 8, 13, 14, 16], "jonathan": 1, "inform": [1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "decis": [1, 2, 3, 6, 14], "lid": 1, "befor": [1, 2, 3, 4, 7, 8, 9, 11, 12, 13, 14, 15, 16], "engin": [1, 10, 12, 13, 15], "toronto": [1, 10, 16], "co": [1, 15], "director": 1, "vancouv": [1, 6, 10, 15, 16], "option": [1, 2, 8, 10, 12, 13, 14, 15, 16], "role": [1, 7, 10, 15], "she": 1, "curriculum": 1, "around": [1, 3, 6, 7, 11, 12, 15, 16], "respons": [1, 2, 3, 4, 10, 11, 12, 14], "applic": [1, 3, 6, 7, 11, 12, 13, 16], "solv": [1, 2, 3, 4, 7, 9, 12, 14, 16], "real": [1, 2, 3, 6, 7, 10, 11, 12, 14, 16], "world": [1, 6, 7, 9, 14, 15, 16], "problem": [1, 3, 4, 6, 7, 8, 9, 12, 14, 15, 16], "One": [1, 2, 3, 6, 7, 8, 11, 12, 14, 15, 16], "favorit": [1, 12], "graduat": 1, "collabor": [1, 8, 9], "softwar": [1, 2, 9, 10, 13, 14, 15, 16], "packag": [1, 2, 3, 4, 7, 10, 11, 12, 13, 15, 16], "modern": [1, 2, 10, 15], "tool": [1, 2, 3, 4, 7, 8, 9, 10, 12, 15, 16], "workflow": [1, 2, 3, 4, 7, 8, 9, 11, 12], "assist": 1, "undergradu": [1, 6], "center": [1, 4, 6, 10, 12, 15, 16], "approach": [1, 2, 3, 4, 6, 7, 9, 11, 12, 14, 16], "assess": [1, 3, 4, 11, 12, 14, 15], "promot": 1, "equiti": 1, "divers": [1, 6], "inclus": [1, 3, 12, 14], "earth": [1, 15], "ocean": 1, "atmospher": [1, 15], "geophys": 1, "invers": 1, "facil": [1, 14], "combin": [1, 2, 3, 4, 9, 10, 12, 14, 15], "method": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "numer": [1, 2, 6, 10, 11, 12, 15, 16], "simul": [1, 2, 6, 15], "machin": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "answer": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 15, 16], "question": [1, 2, 3, 4, 6, 9, 11, 12, 15, 16], "subsurfac": 1, "primari": [1, 2, 7, 14, 15, 16], "includ": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "miner": 1, "explor": [1, 3, 4, 6, 10, 12, 14, 15, 16], "carbon": [1, 15], "sequestr": 1, "groundwat": 1, "environment": [1, 4], "studi": [1, 2, 3, 4, 6, 7, 11, 15, 16], "bsc": 1, "alberta": [1, 10, 16], "phd": 1, "held": 1, "posit": [1, 3, 4, 7, 11, 15], "california": [1, 11], "berkelei": 1, "prior": [1, 2, 10, 14], "start": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "current": [1, 2, 7, 8, 10, 12, 14, 15, 16], "passion": 1, "reproduc": [1, 3, 4, 6, 8, 9, 10, 14], "through": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "quantit": [1, 3, 4, 6, 7, 11, 15], "imag": [1, 2, 8, 9, 10, 13, 15], "analysi": [1, 2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16], "pipelin": [1, 3, 4, 11], "stem": [1, 11], "cell": [1, 2, 3, 4, 7, 10, 12, 16], "development": 1, "biologi": [1, 14], "sinc": [1, 2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "lead": [1, 2, 3, 7, 8, 14, 15], "workshop": [1, 2], "now": [1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "care": [1, 2, 3, 10, 11, 12, 15, 16], "deepli": [1, 16], "spread": [1, 2, 4, 6, 7, 15, 16], "literaci": 1, "excit": [1, 7], "programmat": [1, 3, 10], "project": [1, 2, 8, 10, 15], "previou": [2, 3, 4, 6, 7, 8, 10, 11, 12, 15, 16], "sole": [2, 8], "descript": [2, 6, 7, 8, 9, 10, 14, 15, 16], "exploratori": [2, 3, 4, 7, 9, 11, 15], "next": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "serv": [2, 3, 6, 8, 12, 14], "forai": [2, 11], "focu": [2, 3, 4, 6, 7, 8, 11, 12, 14, 15, 16], "e": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "one": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15], "valu": [2, 4, 6, 8, 11, 12, 15], "categor": [2, 3, 4, 6, 7, 11, 15, 16], "interest": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "cover": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "basic": [2, 3, 7, 10, 12, 14, 15], "make": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16], "suitabl": [2, 16], "classifi": 2, "accur": [2, 3, 6, 11, 12, 15], "well": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "where": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "possibl": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "maxim": [2, 3, 11], "accuraci": [2, 3, 6, 11, 12], "By": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "end": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "reader": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "abl": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "do": [2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15], "follow": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "recogn": [2, 8, 10, 11, 14, 16], "situat": [2, 3, 4, 7, 11, 14, 15, 16], "appropri": [2, 3, 4, 7, 10, 11, 13, 15, 16], "what": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15], "interpret": [2, 3, 4, 7, 8, 10, 11, 12, 14, 15, 16], "output": [2, 3, 4, 7, 8, 10, 11, 12, 15, 16], "hand": [2, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15, 16], "straight": [2, 4, 11, 12, 15], "line": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 16], "euclidean": [2, 4], "graph": 2, "predictor": [2, 4, 11], "explain": [2, 4, 6, 7, 11, 12], "perform": [2, 4, 6, 7, 8, 9, 10, 11, 12, 15], "standardscal": [2, 3, 4, 11], "make_column_transform": [2, 3, 4, 11], "sampl": [2, 3, 11], "model": [2, 3, 4, 12, 14, 15], "make_pipelin": [2, 3, 4, 11], "mani": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "want": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "base": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 15], "experi": [2, 15], "For": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "instanc": [2, 3, 6, 7, 10, 16], "doctor": [2, 3], "mai": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "diagnos": [2, 3], "either": [2, 3, 4, 7, 8, 9, 11, 12, 14, 16], "diseas": 2, "healthi": 2, "symptom": 2, "s": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "email": [2, 10, 14], "might": [2, 3, 4, 6, 7, 8, 10, 11, 12, 15, 16], "tag": [2, 10, 13], "given": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "spam": 2, "text": [2, 3, 6, 7, 9, 11, 12, 13, 14, 16], "credit": 2, "card": 2, "compani": 2, "whether": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15], "purchas": [2, 4, 11, 12], "fraudul": 2, "item": [2, 4, 7, 8, 10, 11, 14, 15, 16], "amount": [2, 3, 4, 8, 10, 11, 12, 15], "locat": [2, 10, 14, 15], "These": [2, 3, 4, 7, 8, 10, 12, 14, 15], "task": [2, 4, 6, 9, 11, 15, 16], "exampl": [2, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "sometim": [2, 3, 6, 7, 10, 11, 12, 13, 15, 16], "call": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "label": [2, 4, 7, 11, 15, 16], "other": [2, 3, 4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16], "featur": [2, 3, 8, 11, 12, 14, 15], "gener": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16], "assign": [2, 4, 6, 7, 10, 15, 16], "without": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "known": [2, 3, 7, 10, 12, 15], "g": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "basi": [2, 10, 15], "similar": [2, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "know": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "name": [2, 3, 4, 6, 8, 11, 12, 13, 14, 15], "come": [2, 4, 6, 7, 11, 12, 13, 15, 16], "fact": [2, 3, 6, 7, 8, 10, 12, 14], "onc": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "can": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "There": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "could": [2, 3, 4, 6, 7, 8, 10, 11, 12, 15, 16], "wide": [2, 3, 4, 10, 12, 14, 15], "hart": [2, 11], "1967": [2, 3, 11], "hodg": [2, 11], "1951": [2, 11], "your": [2, 3, 4, 6, 7, 9, 10, 11, 12, 15, 16], "futur": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15], "you": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "encount": [2, 4, 10, 11, 12, 13, 16], "tree": [2, 3, 11], "vector": [2, 3, 10, 15], "svm": 2, "logist": [2, 3, 12], "neural": 2, "network": [2, 10], "see": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "addit": [2, 7, 11], "section": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "begin": [2, 3, 4, 6, 7, 10, 11, 14, 15, 16], "It": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "worth": [2, 3, 15, 16], "mention": [2, 3, 4, 6, 8, 10, 12, 13, 14, 16], "variat": [2, 6, 11, 15], "binari": [2, 3], "onli": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "involv": [2, 3, 4, 8, 10, 12, 13, 14, 15, 16], "diagnosi": [2, 3], "run": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "multiclass": 2, "categori": [2, 3, 4, 6, 7, 10, 15, 16], "bronchiti": 2, "pneumonia": 2, "common": [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16], "cold": 2, "digit": 2, "breast": [2, 3], "dr": [2, 4, 15], "william": [2, 3, 4], "h": [2, 10], "wolberg": [2, 3], "w": [2, 7, 10], "nick": [2, 3, 7], "street": [2, 3], "olvi": [2, 3], "l": [2, 10], "mangasarian": [2, 3], "et": [2, 3, 4, 6, 10, 12, 14, 15], "al": [2, 3, 4, 6, 10, 12, 14, 15], "1993": [2, 3], "row": [2, 3, 4, 6, 8, 11, 12, 14, 15], "repres": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "tumor": [2, 7], "benign": [2, 3, 7, 11], "malign": [2, 3, 7, 11], "measur": [2, 3, 6, 7, 11, 12, 15, 16], "nucleu": 2, "textur": [2, 3], "perimet": [2, 3, 7], "area": [2, 3, 7, 10, 11, 12, 14, 15, 16], "conduct": [2, 10], "physician": 2, "As": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "analys": [2, 3, 4, 7, 8, 9, 10, 14, 15, 16], "formul": [2, 6, 7, 11, 15], "precis": [2, 3, 6, 8, 11, 13, 14, 16], "here": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "avail": [2, 3, 7, 9, 10, 12, 13, 15], "unknown": [2, 6, 7], "show": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "import": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "becaus": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "tradit": 2, "non": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "driven": [2, 4], "quit": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "subject": [2, 8, 14, 15], "depend": [2, 3, 4, 6, 8, 10, 11, 12, 15, 16], "upon": [2, 3, 10], "skill": [2, 8, 10, 15], "experienc": 2, "furthermor": [2, 3, 15], "normal": [2, 3, 6, 14, 16], "danger": [2, 13], "stai": [2, 6, 10, 15], "same": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "place": [2, 3, 4, 6, 7, 8, 10, 11, 13, 14, 15, 16], "stop": [2, 3, 4, 8, 12, 13], "grow": [2, 3, 12], "get": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "larg": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15], "contrast": [2, 3, 4, 6, 7, 10, 11, 12, 14], "invad": 2, "surround": [2, 7, 10, 14, 15], "tissu": 2, "nearbi": [2, 3, 10], "organ": [2, 7, 8, 10, 14, 15, 16], "caus": [2, 3, 4, 7, 8, 11, 12, 15, 16], "seriou": [2, 7, 14], "damag": [2, 3], "stanford": 2, "health": 2, "2021": [2, 7, 10], "thu": [2, 3, 8, 10, 11, 12, 14, 16], "quickli": [2, 3, 7, 12, 15], "type": [2, 3, 4, 6, 8, 10, 11, 12, 13, 14, 15], "guid": [2, 7, 11, 14, 15], "treatment": [2, 3, 16], "step": [2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16], "wrangl": [2, 3, 7, 9, 10, 12, 15], "visual": [2, 3, 4, 6, 8, 9, 11, 12, 14, 16], "order": [2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16], "better": [2, 3, 4, 11, 12, 15], "understand": [2, 3, 4, 6, 7, 9, 10, 12, 14, 15, 16], "panda": [2, 3, 4, 6, 8, 10, 11, 12, 15, 16], "altair": [2, 3, 4, 8, 11, 12], "pd": [2, 3, 4, 6, 7, 10, 11, 12, 15, 16], "alt": [2, 3, 4, 6, 7, 11, 12, 15], "case": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16], "file": [2, 7, 13, 16], "contain": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "csv": [2, 3, 4, 6, 7, 11, 12, 15, 16], "header": [2, 7, 8, 14, 16], "ll": [2, 3, 6, 7, 10, 11, 13, 14, 15, 16], "read_csv": [2, 3, 4, 6, 7, 8, 11, 12, 15, 16], "function": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "argument": [2, 3, 4, 6, 7, 8, 11, 15, 16], "inspect": [2, 7, 10, 15, 16], "wdbc": 2, "id": [2, 3, 6, 15], "radiu": [2, 3], "smooth": [2, 3, 11, 15], "compact": [2, 3], "concav": [2, 3], "concave_point": [2, 3], "symmetri": [2, 3], "fractal_dimens": [2, 3], "0": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16], "842302": 2, "m": [2, 3, 7, 10, 15], "1": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "096100": 2, "2": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 16], "071512": 2, "268817": 2, "983510": 2, "567087": 2, "3": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "280628": 2, "650542": 2, "530249": 2, "215566": 2, "253764": 2, "842517": 2, "828212": 2, "353322": 2, "684473": 2, "907030": 2, "826235": 2, "486643": 2, "023825": 2, "547662": 2, "001391": 2, "867889": 2, "84300903": 2, "578499": 2, "455786": 2, "565126": 2, "557513": 2, "941382": 2, "052000": 2, "362280": 2, "035440": 2, "938859": 2, "397658": 2, "84348301": 2, "768233": 2, "253509": 2, "592166": 2, "763792": 2, "280667": 2, "399917": 2, "914213": 2, "450431": 2, "864862": 2, "4": [2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "906602": 2, "84358402": 2, "748758": 2, "150804": 2, "775011": 2, "824624": 2, "280125": 2, "538866": 2, "369806": 2, "427237": 2, "009552": 2, "561956": 2, "564": [2, 3], "926424": 2, "109139": 2, "720838": 2, "058974": 2, "341795": 2, "040926": 2, "218868": 2, "945573": 2, "318924": 2, "312314": 2, "930209": 2, "565": [2, 3], "926682": 2, "703356": 2, "083301": 2, "614511": 2, "722326": 2, "102368": 2, "017817": 2, "692434": 2, "262558": 2, "217473": 2, "057681": 2, "566": [2, 3], "926954": 2, "701667": 2, "043775": 2, "672084": 2, "577445": 2, "839745": 2, "038646": 2, "046547": 2, "105684": 2, "808406": 2, "894800": 2, "567": [2, 3], "927241": 2, "836725": 2, "334403": 2, "980781": 2, "733693": 2, "524426": 2, "269267": 2, "294046": 2, "656528": 2, "135315": 2, "042778": 2, "568": [2, 3], "92751": 2, "b": [2, 3], "806811": 2, "220718": 2, "812793": 2, "346604": 2, "109349": 2, "149741": 2, "113893": 2, "260710": 2, "819349": 2, "560539": 2, "569": [2, 3], "12": [2, 3, 4, 6, 7, 8, 10, 12, 13, 14, 15, 16], "column": [2, 3, 4, 6, 8, 11, 12, 13, 15], "biopsi": [2, 3], "remov": [2, 3, 7, 13, 14, 15], "bodi": [2, 14], "examin": [2, 3, 4, 10, 11], "presenc": [2, 3], "tradition": 2, "procedur": [2, 3, 4, 11], "invas": 2, "fine": [2, 3, 8, 14, 15, 16], "needl": 2, "aspir": 2, "present": [2, 3, 6, 7, 10, 14, 15, 16], "extract": [2, 3, 4, 7, 10, 11, 12], "small": [2, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15], "less": [2, 3, 4, 6, 10, 11, 12, 14, 15, 16], "ten": [2, 6, 7, 15], "differ": [2, 3, 4, 6, 7, 11, 12, 13, 14, 16], "below": [2, 3, 6, 7, 10, 12, 14, 15], "mean": [2, 3, 7, 8, 10, 11, 12, 14, 15, 16], "across": [2, 3, 6, 7, 10, 11, 12, 14, 15], "nuclei": 2, "record": [2, 3, 6, 7, 10, 14, 15, 16], "part": [2, 3, 4, 7, 8, 9, 10, 11, 12, 14, 15, 16], "prepar": [2, 3, 15], "have": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "been": [2, 3, 8, 10, 11, 12, 14, 15, 16], "standard": [2, 3, 4, 6, 8, 11, 12, 14, 15, 16], "discuss": [2, 3, 6, 10, 11, 12, 13, 14, 15, 16], "why": [2, 3, 7, 11, 15, 16], "later": [2, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16], "addition": [2, 3, 4, 6, 8, 10, 12, 14, 16], "uniqu": [2, 3, 7, 14], "therefor": [2, 3, 4, 6, 7, 8, 10, 11, 12, 15, 16], "total": [2, 3, 4, 7, 10, 11, 15, 16], "per": [2, 6, 10, 14, 15, 16], "identif": 2, "number": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "deviat": [2, 3, 4, 6, 16], "grai": [2, 14, 16], "length": [2, 4, 6, 7, 15, 16], "contour": 2, "insid": [2, 3, 6, 7, 8, 10, 13, 14, 15, 16], "local": [2, 11, 13], "ratio": [2, 16], "squar": [2, 3, 4, 7, 8, 10, 11, 12, 15, 16], "portion": [2, 10], "mirror": 2, "fractal": 2, "dimens": 2, "rough": [2, 4, 15], "info": [2, 3, 7, 15, 16], "preview": [2, 3, 4, 6, 7, 8, 9, 11, 12, 14, 15, 16], "frame": [2, 3, 4, 6, 8, 10, 11, 12, 15], "easier": [2, 3, 6, 7, 10, 11, 12, 13, 14, 15, 16], "lot": [2, 3, 4, 7, 10, 12, 15, 16], "print": [2, 3, 6, 7, 8, 10, 12, 13, 15, 16], "down": [2, 8, 10, 13, 14, 16], "page": [2, 3, 4, 5, 8, 10, 12, 13, 14], "instead": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "entri": [2, 3, 6, 7, 10, 15, 16], "core": [2, 3, 7, 15, 16], "datafram": [2, 3, 4, 6, 10, 11, 12, 15, 16], "rangeindex": [2, 3, 15, 16], "null": [2, 3, 15, 16], "count": [2, 3, 6, 7, 10, 15, 16], "dtype": [2, 3, 6, 15, 16], "int64": [2, 3, 10, 15, 16], "float64": [2, 3, 6, 10, 15, 16], "6": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "7": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "8": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "9": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "10": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "11": [2, 3, 4, 6, 7, 8, 10, 12, 13, 14, 15, 16], "memori": [2, 3, 10, 15, 16], "usag": [2, 3, 7, 10, 12, 15, 16], "53": [2, 3, 6, 14], "kb": [2, 3, 15, 16], "abov": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 15], "arrai": [2, 3, 4, 11, 12], "readabl": [2, 3, 7, 10, 11, 14, 15, 16], "renam": [2, 3, 6, 7, 8, 10, 11, 16], "replac": [2, 3, 6, 7, 10, 12, 13, 14, 15], "take": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "dictionari": [2, 3, 10, 16], "map": [2, 4, 7, 10, 11, 12, 15], "desir": [2, 3, 7, 10, 11, 14, 16], "verifi": [2, 6, 13], "result": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "ani": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "let": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "groupbi": [2, 6], "find": [2, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "percentag": [2, 3, 6, 7, 15], "pair": [2, 3, 4, 10, 16], "Then": [2, 3, 4, 6, 7, 8, 11, 12, 13, 14, 15, 16], "calcul": [2, 3, 4, 11, 12], "group": [2, 3, 4, 6, 7, 13, 15], "divid": [2, 3, 7, 10, 15, 16], "multipli": [2, 7, 15], "equal": [2, 3, 4, 6, 11, 12, 16], "access": [2, 3, 4, 6, 11, 13, 15, 16], "via": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15], "attribut": [2, 3, 4, 5, 7, 10, 11], "357": [2, 3], "63": [2, 3, 10, 11], "212": [2, 4, 7, 10, 15, 16], "37": [2, 3, 4, 14, 15], "size": [2, 3, 4, 6, 10, 11, 12, 16], "62": [2, 10, 11, 15], "741652": 2, "258348": 2, "conveni": [2, 3, 7, 10, 16], "value_count": [2, 3, 6, 16], "occurr": [2, 15], "If": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "pass": [2, 3, 7, 10, 11, 15, 16], "seri": [2, 3, 6, 11, 12], "occur": [2, 3, 4, 6, 8, 11, 12, 14, 15, 16], "true": [2, 3, 4, 6, 7, 8, 11, 15, 16], "fraction": [2, 3, 6, 11, 14, 15], "627417": 2, "372583": 2, "proport": [2, 3, 7, 15, 16], "draw": [2, 6, 7, 11, 12, 15], "color": [2, 3, 4, 6, 10, 11, 12, 16], "scatter": [2, 3, 4, 11, 12], "plot": [2, 3, 4, 6, 11, 12, 16], "relationship": [2, 3, 4, 6, 7, 11, 12, 15, 16], "recal": [2, 3, 4, 6, 7, 11, 12, 14, 15, 16], "default": [2, 3, 7, 8, 10, 11, 13, 14, 15, 16], "palett": 2, "colorblind": [2, 15], "friendli": [2, 15], "so": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "stick": [2, 3, 10, 14], "perim_concav": [2, 3], "chart": [2, 3, 4, 6, 11, 12], "mark_circl": [2, 3, 4, 11, 12, 15], "encod": [2, 3, 4, 6, 7, 10, 11, 12, 15], "x": [2, 3, 4, 6, 7, 10, 11, 12, 13, 15], "titl": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15], "y": [2, 3, 4, 6, 7, 8, 10, 11, 12, 15], "versu": [2, 3, 4, 6, 7, 10, 11, 12, 16], "fig": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "typic": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "fall": [2, 3, 6, 11, 14, 15], "upper": [2, 6, 14, 15], "right": [2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16], "corner": [2, 3, 13, 14, 15], "lower": [2, 3, 6, 8, 12, 15], "left": [2, 3, 4, 5, 7, 8, 10, 12, 13, 14, 15, 16], "word": [2, 3, 6, 7, 8, 10, 11, 12, 14, 16], "tend": [2, 3, 11, 14, 15], "ones": [2, 15, 16], "larger": [2, 3, 4, 6, 10, 11, 12, 14, 15], "suppos": [2, 4, 6, 7, 8, 10, 11, 14, 16], "obtain": [2, 3, 4, 6, 7, 11, 12, 14, 15, 16], "except": [2, 10, 12, 14, 16], "sai": [2, 3, 6, 8, 10, 11, 12, 13, 15, 16], "respect": [2, 3, 4, 6, 10, 14, 15, 16], "lie": 2, "middl": [2, 6, 10], "orang": [2, 4, 6, 11, 12], "cloud": [2, 10, 14, 15], "probabl": [2, 3, 6, 10, 12], "seem": [2, 3, 6, 8, 10, 11, 12, 15, 16], "actual": [2, 3, 4, 6, 7, 10, 11, 12, 14, 16], "practic": [2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "To": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "most": [2, 3, 4, 6, 7, 8, 10, 14, 15, 16], "must": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "choos": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 16], "advanc": [2, 3, 4, 6, 8, 12, 13, 14, 15, 16], "assum": [2, 6, 8], "someon": [2, 3, 7, 8, 14], "chosen": [2, 3, 4, 12, 16], "ourselv": [2, 3, 11], "illustr": [2, 3, 6, 11, 12, 15, 16], "concept": [2, 6, 7, 9, 11, 12, 14, 15], "walk": [2, 7, 11, 14], "whose": [2, 8, 10, 14, 16], "depict": [2, 4], "red": [2, 4, 8, 10, 11, 12, 13, 14], "diamond": 2, "coordin": [2, 4, 7, 15], "idea": [2, 3, 6, 7, 8, 10, 12, 13, 14, 15, 16], "close": [2, 3, 4, 6, 7, 10, 11, 14, 15], "anoth": [2, 3, 6, 7, 8, 10, 11, 12, 14, 15, 16], "expect": [2, 3, 4, 6, 7, 8, 10, 11, 12, 16], "look": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "doe": [2, 3, 4, 6, 7, 10, 11, 12, 15], "consid": [2, 3, 4, 6, 7, 11, 12, 14, 15, 16], "closest": [2, 3, 10, 15], "among": [2, 10, 14, 16], "major": [2, 3, 4, 7, 11, 12, 15, 16], "shown": [2, 3, 4, 7, 8, 10, 11, 12, 14, 15, 16], "vote": [2, 3, 7], "three": [2, 3, 4, 6, 7, 8, 9, 10, 11, 14, 15, 16], "chose": [2, 3, 15], "noth": [2, 6, 7, 12], "though": [2, 3, 6, 7, 10, 11, 12, 14, 15, 16], "odd": [2, 10], "avoid": [2, 3, 12, 15], "ti": [2, 10], "decid": [2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "often": [2, 3, 4, 6, 7, 8, 10, 11, 14, 15, 16], "just": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "denot": [2, 4, 7, 10, 11, 12, 15, 16], "a_x": 2, "a_i": 2, "b_x": 2, "b_y": 2, "definit": [2, 10, 15, 16], "plane": [2, 12], "formula": [2, 3, 4, 11, 12, 15], "mathrm": [2, 3], "sqrt": [2, 3, 11, 14], "select": [2, 4, 6, 8, 10, 11, 12, 13, 14, 15], "correspond": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "smallest": [2, 7, 11, 15, 16], "code": [2, 3, 7, 9, 10, 11, 13, 14, 15, 16], "add": [2, 3, 4, 6, 7, 10, 11, 13, 15, 16], "root": [2, 3, 10, 11, 14], "nsmallest": [2, 11, 15], "new_obs_perimet": 2, "new_obs_concav": 2, "dist_from_new": 2, "112": 2, "241202": 2, "653051": 2, "880626": 2, "258": 2, "750277": 2, "870061": 2, "979663": 2, "351": 2, "622700": 2, "541410": 2, "143088": 2, "430": 2, "416930": 2, "314364": 2, "256806": 2, "152": 2, "160091": 2, "039155": 2, "279258": 2, "tabl": [2, 3, 5, 7, 8, 10, 13, 15, 16], "mathemat": [2, 3, 6, 11, 12, 15], "detail": [2, 3, 4, 7, 8, 10, 12, 13, 14, 15, 16], "24": [2, 3, 14, 15], "65": [2, 3, 6, 10, 11, 16], "88": [2, 3], "75": [2, 3, 6, 7, 10, 11, 15, 16], "87": [2, 3], "98": [2, 7, 12, 15], "54": [2, 3, 14, 15, 16], "14": [2, 3, 4, 6, 8, 10, 14, 15, 16], "42": [2, 6, 14, 15, 16], "31": [2, 3, 14, 15, 16], "26": [2, 3, 14, 15], "16": [2, 3, 4, 6, 10, 11, 12, 14, 15], "04": [2, 10, 13, 15, 16], "28": [2, 4, 14, 15], "circl": [2, 8, 14, 15], "although": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "toward": [2, 6, 7, 14], "exactli": [2, 3, 6, 7, 10, 11, 12, 13, 15], "appli": [2, 3, 7, 11, 12, 15], "higher": [2, 3, 6, 7, 11, 12, 15, 16], "help": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "a_": 2, "dot": [2, 7, 10, 11, 12, 15], "b_": 2, "becom": [2, 3, 4, 6, 7, 8, 11, 12, 14, 16], "still": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15], "space": [2, 8, 10, 11, 12, 13, 15], "417": [2, 15], "837": 2, "had": [2, 3, 6, 7, 10, 11, 15, 16], "ad": [2, 3, 4, 10, 11, 12, 14], "up": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15], "took": [2, 6, 7], "27": [2, 7, 11, 14, 15], "new_obs_symmetri": 2, "836722": 2, "267368": 2, "400": [2, 11, 16], "334664": 2, "886368": 2, "099359": 2, "472326": 2, "562": 2, "470430": 2, "084810": 2, "154075": 2, "499268": 2, "68": 2, "365450": 2, "812359": 2, "092064": 2, "531594": 2, "055065": 2, "555575": 2, "dimension": 2, "five": [2, 3, 13, 15, 16], "3d": [2, 11, 12], "note": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "recommend": [2, 7, 8, 9, 11, 12, 13, 14, 16], "against": [2, 8, 11, 12], "purpos": [2, 3, 4, 6, 10, 11, 12, 14, 15, 16], "complic": [2, 7, 10, 11, 15], "handl": [2, 3, 7, 15], "multipl": [2, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15], "thankfulli": [2, 4], "implement": [2, 3, 12, 15], "buitinck": 2, "2013": [2, 3, 4, 12], "along": [2, 3, 6, 7, 10, 11, 13, 14, 15], "sklearn": [2, 3, 4, 11, 12], "keep": [2, 3, 6, 7, 10, 13, 14, 15, 16], "simpl": [2, 3, 4, 6, 10, 11, 13, 15, 16], "fewer": [2, 3], "mistak": [2, 3, 11, 15], "tell": [2, 3, 6, 7, 8, 10, 11, 12, 14, 15, 16], "prefer": [2, 4, 10, 12, 15, 16], "regular": [2, 10, 11, 14, 15, 16], "set_config": [2, 3, 4, 11, 12], "notic": [2, 3, 6, 7, 10, 12, 15, 16], "wai": [2, 3, 4, 6, 7, 8, 9, 10, 13, 14, 15, 16], "prefix": 2, "extens": [2, 8, 10, 12, 13, 14, 15], "subsequ": [2, 7, 15], "long": [2, 3, 4, 6, 7, 8, 10, 12, 14, 15], "clutter": [2, 15], "kneighborsclassifi": [2, 3], "38": [2, 4, 11, 14, 15], "charact": [2, 7, 8, 10, 14, 15, 16], "transform_output": [2, 3, 4, 11, 12], "modul": 2, "build": [2, 3, 11, 15], "pick": [2, 3, 4, 10, 12, 14, 15], "store": [2, 3, 4, 6, 7, 8, 10, 13, 14, 15, 16], "cancer_train": [2, 3], "specifi": [2, 3, 6, 7, 8, 10, 11, 12, 13, 15, 16], "weight": 2, "control": [2, 3, 8, 9, 10, 13], "uniform": [2, 3, 10], "choic": [2, 3, 4, 6, 11, 14, 15, 16], "weigh": [2, 7], "websit": [2, 3, 5, 10, 12, 14], "knn": [2, 3], "n_neighbor": [2, 3, 11], "jupyt": [2, 3, 4, 7, 9, 12, 13], "environ": [2, 3, 4, 7, 8, 12, 13, 14], "pleas": [2, 3, 4, 5, 7, 8, 12], "rerun": [2, 3, 4, 12], "html": [2, 3, 4, 12, 15, 16], "represent": [2, 3, 4, 10, 12], "trust": [2, 3, 4, 6, 12], "notebook": [2, 3, 4, 12, 13, 14], "On": [2, 3, 4, 7, 10, 11, 12, 14, 15, 16], "github": [2, 3, 4, 7, 10, 12, 15], "unabl": [2, 3, 4, 10, 12, 14], "render": [2, 3, 4, 8, 12, 14, 15], "try": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16], "nbviewer": [2, 3, 4, 12], "org": [2, 3, 4, 6, 7, 10, 12, 15, 16], "kneighborsclassifierkneighborsclassifi": [2, 3], "fit": [2, 3, 4, 11, 12, 15], "much": [2, 3, 4, 6, 7, 10, 11, 12, 15, 16], "outsid": [2, 3, 6, 8, 11, 12, 14, 15], "heavi": 2, "lift": 2, "modifi": [2, 3, 14], "after": [2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16], "itself": [2, 3, 6, 10, 12, 15, 16], "ran": 2, "manual": [2, 3, 4, 6, 8, 10, 11, 13, 16], "time": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 16], "new_ob": 2, "Is": [2, 4, 7, 11, 15, 16], "don": [2, 3, 4, 6, 7, 8, 10, 11, 14, 15, 16], "t": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "necessarili": [2, 3, 7, 16], "correct": [2, 3, 7, 13, 14, 15, 16], "quantifi": [2, 3, 12], "think": [2, 3, 7, 8, 10, 12, 16], "rang": [2, 3, 4, 10, 11, 12, 15, 16], "matter": [2, 11, 15, 16], "identifi": [2, 3, 4, 7, 9, 10, 11, 14, 15], "effect": [2, 4, 6, 7, 11, 12, 13, 16], "But": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "doesn": [2, 3, 8, 10, 15, 16], "salari": 2, "dollar": [2, 6, 10, 11, 12], "job": [2, 10, 15], "1000": [2, 3, 6, 15], "huge": [2, 10], "compar": [2, 3, 6, 7, 10, 11, 14, 15, 16], "conceptu": [2, 14], "opposit": 2, "yearli": 2, "temperatur": 2, "degre": 2, "kelvin": 2, "celsiu": 2, "constant": [2, 12, 15], "shift": [2, 7, 8], "273": [2, 16], "even": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "likewis": [2, 16], "hypothet": 2, "thousand": [2, 3, 10, 15], "singl": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "affect": [2, 3, 7, 8, 11, 12, 15], "chang": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 15, 16], "outcom": [2, 7], "averag": [2, 3, 6, 7, 10, 11, 12, 16], "central": 2, "subtract": [2, 3, 7], "said": [2, 3], "unstandard": [2, 4], "wisconsin": 2, "until": [2, 3, 4, 6, 7, 8, 10, 13, 14, 15, 16], "did": [2, 3, 6, 7, 9, 10, 11, 12, 14, 15, 16], "earlier": [2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16], "thing": [2, 3, 6, 8, 10, 13, 14, 15, 16], "unscaled_canc": 2, "wdbc_unscal": [2, 3], "1001": 2, "11840": [2, 3], "1326": 2, "08474": [2, 3], "1203": 2, "10960": [2, 3], "386": 2, "14250": [2, 3], "1297": 2, "10030": [2, 3], "1479": 2, "11100": [2, 3], "1261": 2, "09780": [2, 3], "858": 2, "08455": [2, 3], "1265": 2, "11780": [2, 3], "181": [2, 4], "05263": [2, 3], "unscal": 2, "uncent": 2, "Will": 2, "framework": [2, 12], "preprocessor": [2, 3, 4, 11], "manipul": [2, 10, 16], "transform": [2, 3, 4, 7, 11, 12, 16], "wrap": [2, 3, 4, 11], "columntransform": [2, 3, 4], "enabl": [2, 8, 10, 13, 14, 15, 16], "handi": [2, 7, 16], "sequenc": [2, 3, 7, 10, 13, 15], "compos": [2, 3, 4, 7, 11], "x27": [2, 3, 4], "columntransformercolumntransform": [2, 3, 4], "standardscalerstandardscal": [2, 3, 4], "individu": [2, 3, 6, 7, 12, 14, 15], "difficult": [2, 3, 4, 7, 8, 10, 12, 15, 16], "rather": [2, 3, 6, 7, 8, 10, 11, 14, 15, 16], "make_column_selector": [2, 3], "dtype_includ": [2, 3], "equival": [2, 7, 10, 12, 16], "lt": 2, "_column_transform": 2, "0x7fda5948bad0": 2, "gt": 2, "readi": [2, 3, 7, 8, 10, 13, 14], "happen": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "necessari": [2, 4, 11, 13, 15], "bit": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 15, 16], "unnecessari": 2, "howev": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "quantiti": [2, 3, 6, 15, 16], "scaled_canc": 2, "standardscaler__area": 2, "standardscaler__smooth": 2, "984375": 2, "568466": 2, "908708": 2, "826962": 2, "558884": 2, "942210": 2, "764464": 2, "283553": 2, "826229": 2, "280372": 2, "343856": 2, "041842": 2, "723842": 2, "102458": 2, "577953": 2, "840484": 2, "735218": 2, "525767": 2, "347789": 2, "112085": 2, "woohoo": 2, "input": [2, 3, 4, 7, 10, 11, 14, 16], "behavior": [2, 4, 11, 15, 16], "drop": [2, 3, 8, 13, 14, 15, 16], "remain": [2, 3, 4, 7, 13], "rest": [2, 3, 7, 12, 16], "remaind": [2, 3, 7, 10, 11, 16], "passthrough": 2, "separ": [2, 3, 4, 7, 8, 14, 15, 16], "underscor": [2, 7, 8, 14, 16], "again": [2, 3, 6, 7, 8, 10, 11, 12, 13, 15, 16], "preserv": [2, 3], "verbose_feature_names_out": [2, 4], "fals": [2, 3, 4, 7, 10, 11, 12, 15, 16], "should": [2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 15, 16], "leav": [2, 4, 12], "preprocessor_keep_al": 2, "scaled_cancer_al": 2, "wonder": [2, 6, 10], "technic": [2, 3, 7, 8, 11, 13, 14, 15, 16], "error": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "prone": [2, 3, 10, 16], "accident": [2, 3, 8, 10, 14, 15, 16], "forget": [2, 4, 14], "proper": 2, "free": [2, 3, 12, 14], "requir": [2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16], "yourself": [2, 4, 7, 10, 12, 14], "further": [2, 3, 4, 6, 7, 8, 10, 12, 15, 16], "automat": [2, 3, 4, 10, 11, 14, 15], "streamlin": 2, "effort": [2, 8, 10, 14], "side": [2, 5, 6, 7, 13, 14, 15], "annot": [2, 4, 6, 15], "within": [2, 4, 6, 7, 8, 10, 12, 13, 14, 15, 16], "nearli": [2, 4, 12, 16], "vertic": [2, 6, 7, 11, 12, 15, 16], "align": [2, 10, 15], "black": [2, 4, 10, 15], "region": [2, 3, 10, 11, 16], "domin": 2, "intuit": [2, 3, 11, 15, 16], "reason": [2, 3, 4, 6, 7, 10, 11, 12, 15, 16], "carefulli": [2, 4, 7, 10, 16], "domain": [2, 3, 7, 10, 15], "comparison": [2, 6, 12, 15, 16], "potenti": [2, 3, 4, 11, 12, 16], "issu": [2, 7, 8, 10, 12, 13, 15, 16], "imbal": 2, "overal": [2, 3, 7, 11, 15], "pattern": [2, 3, 4, 6, 7, 10, 11, 12, 15, 16], "otherwis": [2, 3, 4, 6, 7, 15], "rare": [2, 4, 15], "malici": 2, "detect": [2, 4], "rarer": 2, "unimport": 2, "revisit": [2, 3, 10, 12, 16], "head": [2, 8, 10, 13, 14, 15], "top": [2, 3, 5, 6, 7, 8, 10, 11, 12, 13, 15, 16], "n": [2, 3, 4, 6, 7, 10, 11, 15, 16], "concat": [2, 6], "glue": 2, "filter": [2, 3, 6, 10, 15], "back": [2, 3, 6, 8, 10, 11, 12, 13, 14, 15, 16], "concaten": [2, 6], "axi": [2, 7, 11, 12, 14, 16], "yield": [2, 3], "taller": 2, "horizont": [2, 7, 15], "produc": [2, 3, 7, 8, 12, 15, 16], "wider": [2, 6, 7, 16], "imbalanc": [2, 3], "rare_canc": 2, "rare_plot": 2, "With": [2, 4, 7, 10, 15, 16], "least": [2, 3, 4, 6, 8, 15], "win": 2, "highlight": [2, 4, 6, 8, 10, 12, 13, 14, 16], "13": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "background": [2, 3, 6, 10, 12, 15], "blue": [2, 4, 8, 11, 14, 16], "indic": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "despit": [2, 10, 15], "simplic": [2, 3, 14], "sound": [2, 3, 8], "manner": [2, 8, 12], "fairli": [2, 3, 6, 13, 15], "nuanc": 2, "suffic": [2, 6], "rebal": 2, "oversampl": 2, "replic": [2, 6], "power": [2, 3, 7, 10, 14, 15, 16], "own": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "increas": [2, 3, 4, 6, 11, 12, 15, 16], "randomli": [2, 3, 4, 6, 12], "properli": [2, 3, 15], "random": [2, 6, 11, 12], "malignant_canc": 2, "benign_canc": 2, "malignant_cancer_upsampl": 2, "upsampled_canc": 2, "vice": [2, 3], "versa": [2, 3], "closer": [2, 15], "upsampl": 2, "wild": [2, 7, 12], "unfortun": [2, 3, 4, 6, 8, 10, 12, 15], "challeng": [2, 14, 16], "reli": [2, 3, 8, 11, 12, 15], "expert": [2, 3, 7, 13], "knowledg": [2, 7, 12, 14, 16], "relat": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "survei": [2, 7, 16], "particip": [2, 3], "margin": [2, 8], "peopl": [2, 6, 7, 8, 11, 12, 14, 15, 16], "respond": [2, 10, 14], "certain": [2, 7, 10, 14, 15], "kind": [2, 3, 4, 6, 7, 10, 15], "fear": [2, 7], "honestli": 2, "neg": [2, 3, 8, 11, 12, 14, 15, 16], "consequ": [2, 3, 6, 8, 16], "simpli": [2, 3, 10, 15, 16], "throw": 2, "awai": [2, 3, 6, 10, 11, 12, 14, 16], "bia": [2, 12], "conclus": [2, 6, 7, 15], "inadvert": [2, 8], "ignor": [2, 3, 7, 11, 16], "easili": [2, 3, 4, 7, 8, 9, 10, 14, 15, 16], "mislead": 2, "detriment": 2, "impact": [2, 4, 6, 12, 16], "techniqu": [2, 3, 4, 6, 7, 10, 12, 15], "deal": [2, 8, 10], "isn": [2, 3, 7, 10, 11, 15], "anyth": [2, 3, 7, 12, 16], "els": [2, 7, 8, 10], "subset": [2, 6, 8, 10, 11, 12, 16], "missing_canc": 2, "wdbc_miss": 2, "nan": [2, 10, 16], "475956": 2, "834601": 2, "386808": 2, "169878": 2, "160508": 2, "137124": 2, "henc": [2, 3, 4, 8, 10, 11, 15], "too": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "accomplish": [2, 3, 6, 7, 8, 15, 16], "dropna": 2, "no_missing_canc": 2, "strategi": [2, 3, 15], "imput": 2, "fill": [2, 8, 10, 12, 15], "synthet": 2, "simpleimput": 2, "simpleimputersimpleimput": 2, "directli": [2, 3, 4, 6, 7, 8, 13, 14, 16], "imputed_canc": 2, "846860": 2, "384942": 2, "document": [2, 4, 8, 9, 10, 13, 14, 15, 16], "crucial": 2, "critic": [2, 6, 7, 8, 12, 15, 16], "chain": [2, 16], "intermedi": [2, 7], "whole": [2, 3, 4, 6, 10, 14, 16], "scratch": [2, 6, 14, 15], "knn_pipelin": [2, 3], "pipelinepipelin": [2, 3, 4], "500": [2, 6, 11, 12], "075": 2, "1500": 2, "new_observ": 2, "second": [2, 3, 4, 6, 7, 8, 10, 12, 13, 14, 15, 16], "15": [2, 3, 4, 6, 7, 8, 10, 11, 14, 15, 16], "seen": [2, 3, 11, 12, 14, 15, 16], "littl": [2, 3, 10, 11, 12, 15, 16], "grid": [2, 3, 11, 15], "meshgrid": 2, "numpi": [2, 3, 4, 6, 10, 11, 12, 15, 16], "high": [2, 3, 6, 7, 8, 9, 12], "transpar": [2, 7], "low": [2, 3, 12], "opac": [2, 11, 12, 15], "np": [2, 3, 4, 6, 11, 12], "val": 2, "arrang": [2, 6, 7, 15], "are_grid": 2, "linspac": 2, "min": [2, 11, 12, 15, 16], "95": [2, 3, 6, 7, 10, 12, 15], "max": [2, 3, 11, 12, 15, 16], "05": [2, 7, 10, 15], "50": [2, 3, 6, 7, 10, 11, 12, 14, 16], "smo_grid": 2, "asgrid": 2, "reshap": [2, 16], "knnpredgrid": 2, "bind": 2, "prediction_t": 2, "copi": [2, 10, 14, 16], "unscaled_plot": 2, "mark_point": [2, 15], "40": [2, 3, 6, 7, 10, 14, 15, 16], "nice": [2, 3, 8, 10, 12, 15], "fade": 2, "prediction_plot": 2, "300": [2, 3, 6, 15, 16], "accompani": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "repositori": [2, 3, 4, 6, 7, 10, 11, 12, 13, 15, 16], "launch": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "browser": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "click": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "binder": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "button": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "view": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "download": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "sure": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "instruct": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "setup": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "ensur": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "intend": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "blb": 2, "lar": 2, "gill": 2, "loupp": 2, "mathieu": 2, "blondel": 2, "fabian": 2, "pedregosa": 2, "andrea": 2, "mueller": 2, "olivi": 2, "grisel": 2, "vlad": 2, "nicula": 2, "peter": [2, 11], "prettenhof": 2, "alexandr": 2, "gramfort": 2, "jaqu": 2, "grobler": 2, "robert": [2, 3, 4, 12], "layton": 2, "jake": 2, "vanderpla": [2, 15], "arnaud": 2, "joli": 2, "brian": [2, 15], "holt": 2, "ga": [2, 15], "\u00eb": 2, "varoquaux": 2, "api": 2, "design": [2, 3, 8, 10, 14, 15, 16], "ecml": 2, "pkdd": 2, "mine": [2, 6], "108": [2, 3], "122": [2, 3], "ch67": [2, 11], "thoma": [2, 11], "ieee": [2, 4, 11], "transact": [2, 4, 11], "21": [2, 3, 7, 10, 11, 12, 14, 15], "fh51": [2, 11], "evelyn": [2, 3, 11], "joseph": [2, 11], "discriminatori": [2, 11], "discrimin": [2, 3, 11], "consist": [2, 4, 6, 7, 10, 11, 13, 14, 15, 16], "properti": [2, 3, 6, 7, 10, 11, 12, 15, 16], "report": [2, 3, 6, 7, 8, 11, 15, 16], "usaf": [2, 11], "school": [2, 7, 11], "aviat": [2, 11], "medicin": [2, 11], "randolph": [2, 11], "field": [2, 10, 11, 15], "texa": [2, 11], "swm93": [2, 3], "nuclear": [2, 3], "intern": [2, 3, 5, 15], "symposium": [2, 3], "electron": [2, 3, 14], "technolog": [2, 3, 15], "stanfordhcare21": 2, "url": [2, 3, 4, 6, 7, 12, 13, 14, 15, 16], "http": [2, 3, 4, 5, 6, 7, 9, 10, 12, 13, 14, 15, 16], "stanfordhealthcar": 2, "medic": [2, 3], "condit": 2, "continu": [3, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "its": [3, 4, 7, 8, 10, 11, 12, 14, 15, 16], "describ": [3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "matric": 3, "execut": [3, 10, 11, 14], "neighbor": [3, 12], "k": [3, 7, 10, 12, 15], "nearest": [3, 4, 12], "advantag": [3, 4, 6, 10, 11, 12, 13, 14, 15, 16], "disadvantag": [3, 4, 11, 12, 15], "wrong": [3, 6, 7, 12, 15, 16], "cancer": 3, "ask": [3, 4, 6, 10, 11, 12, 14, 15, 16], "kei": [3, 6, 7, 10, 13, 14, 15, 16], "impli": [3, 6], "between": [3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "oppos": [3, 10, 11, 15, 16], "memor": 3, "visit": [3, 5, 6, 7, 10, 13, 14, 15], "hospit": 3, "more": [3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "trick": 3, "asid": [3, 7, 10, 12], "match": [3, 10, 11, 12, 14, 15, 16], "observ": [3, 4, 6, 7, 11, 12, 14, 15, 16], "confid": [3, 6, 11], "golden": 3, "rule": [3, 6, 7, 11, 15], "cannot": [3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "than": [3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "realli": [3, 6, 7, 10, 11, 15, 16], "imagin": [3, 6, 8, 10, 14, 15, 16], "bad": [3, 4, 10, 15], "overestim": [3, 6], "made": [3, 4, 7, 11, 12, 13, 14, 15, 16], "frac": [3, 4, 6, 11], "summar": [3, 6, 7, 9, 10, 15, 16], "stori": [3, 8, 11, 15], "alon": [3, 6, 14], "comprehens": [3, 4, 6], "each": [3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "correctli": [3, 7, 10, 13, 15, 16], "incorrectli": 3, "57": 3, "bottom": [3, 8, 13, 14], "roughli": [3, 4, 6, 11, 12, 15], "89": [3, 7, 15], "892": 3, "misclassifi": 3, "disastr": 3, "receiv": [3, 10, 14], "particularli": [3, 10, 12, 15], "unaccept": 3, "term": [3, 4, 6, 7, 10, 11, 15, 16], "talk": [3, 10, 15], "four": [3, 4, 7, 9, 15], "perfect": [3, 15], "zero": [3, 4, 11, 12, 15, 16], "almost": [3, 4, 7, 10, 11, 15], "two": [3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "commonli": [3, 6, 7, 8, 12, 14, 15, 16], "metric": [3, 4, 11, 12], "togeth": [3, 4, 6, 8, 10, 15, 16], "inde": [3, 4, 6, 7, 10, 12, 15, 16], "20": [3, 6, 7, 10, 14, 15, 16], "quad": [3, 4], "25": [3, 6, 7, 10, 11, 14, 15, 16], "rel": [3, 4, 7, 15], "context": [3, 10, 11, 12, 15, 16], "certainli": [3, 6], "achiev": [3, 7, 11, 15, 16], "guess": [3, 4, 6, 7], "everi": [3, 6, 7, 8, 10, 12, 14, 16], "similarli": [3, 7, 10, 15, 16], "never": [3, 7, 11, 14], "obsev": 3, "Of": [3, 6, 12, 16], "somewher": [3, 7, 10, 11, 15], "extrem": [3, 6, 11, 12], "trade": [3, 4], "off": [3, 4, 6, 12], "fair": [3, 10, 11], "unbias": 3, "influenc": [3, 4, 6, 11, 12, 15], "human": [3, 4, 6, 10, 14, 15, 16], "counter": 3, "main": [3, 7, 13, 16], "tenet": 3, "determin": [3, 4, 6, 11, 13, 14, 15, 16], "everyth": [3, 6, 7, 13, 16], "point": [3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "investig": [3, 6, 7, 10, 15], "integ": [3, 10, 15, 16], "At": [3, 7, 8, 9, 10, 12], "track": [3, 6, 7, 14, 16], "nums_0_to_9": 3, "5": [3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "random_numbers1": 3, "to_numpi": 3, "appear": [3, 6, 7, 8, 10, 11, 12, 13, 14, 15], "fresh": [3, 8], "batch": 3, "random_numbers2": 3, "forc": [3, 15], "random_numbers1_again": 3, "random_numbers2_again": 3, "And": [3, 6, 7, 10, 11, 12, 14, 15, 16], "4235": 3, "random_numb": 3, "beyond": [3, 4, 7, 10, 11, 12, 13, 14, 15, 16], "explicitli": [3, 10, 14, 15, 16], "insert": [3, 14, 16], "therebi": [3, 15], "global": [3, 15], "drawback": 3, "buri": 3, "undesir": 3, "entir": [3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "plai": [3, 7, 10, 13], "randomst": 3, "random_st": 3, "rnd": 3, "random_numbers1_third": 3, "random_numbers2_third": 3, "load": [3, 4, 8, 9, 10, 11, 12, 15, 16], "quick": [3, 7, 10], "re": [3, 4, 7, 8, 9, 10, 11, 14, 15, 16], "scale": [3, 4, 10, 11, 12, 14, 15], "done": [3, 7, 8, 10, 13, 14, 15, 16], "preliminari": 3, "train_test_split": [3, 11, 12], "estim": [3, 6, 7, 9, 11, 12], "shuffl": 3, "stratifi": [3, 11], "exist": [3, 7, 8, 10, 12, 13, 14, 15, 16], "train_siz": [3, 11, 12], "model_select": [3, 11, 12], "cancer_test": 3, "index": [3, 7, 10, 14, 16], "426": 3, "461": 3, "481": [3, 15], "43": [3, 4, 14, 15], "143": 3, "334": 3, "434": 3, "miss": [3, 4, 15, 16], "626761": 3, "373239": 3, "last": [3, 6, 7, 9, 10, 14, 15, 16], "sensit": [3, 7, 12], "consider": 3, "aspect": [3, 6, 12, 15], "fortun": [3, 6, 7, 10, 11, 12, 16], "construct": [3, 6, 7, 10, 15, 16], "cancer_preprocessor": 3, "augment": [3, 4], "897374": 3, "313": 3, "893988": 3, "221": 3, "8812818": 3, "272": [3, 15], "8910988": 3, "340": [3, 15], "89813": 3, "852552": 3, "34": [3, 4, 14, 15, 16], "854039": 3, "252": [3, 11], "885429": 3, "45": [3, 4, 7, 10, 14, 15, 16], "857010": 3, "908469": 3, "correct_pr": 3, "8811188811188811": 3, "scitkit": 3, "score": [3, 10, 11], "cancer_acc_1": 3, "crosstab": 3, "83": [3, 11, 12], "agre": [3, 10, 12], "displaystyl": 3, "86": [3, 11], "81": [3, 11, 15], "That": [3, 6, 7, 10, 11, 15, 16], "pretti": [3, 6, 10], "wait": [3, 7, 10, 11, 12, 15, 16], "Or": [3, 6, 12], "someth": [3, 4, 6, 7, 8, 10, 11, 14, 15, 16], "99": [3, 4, 6, 11, 12, 15], "terribl": 3, "impress": [3, 15], "attent": [3, 7, 11, 16], "sacrif": 3, "easi": [3, 4, 7, 8, 10, 12, 14, 15, 16], "baselin": [3, 15], "regardless": [3, 10, 11, 15], "sens": [3, 4, 6, 7, 11, 12, 15, 16], "90": [3, 6, 7, 15, 16], "hope": [3, 10, 12, 15], "signific": [3, 7], "Be": [3, 10, 11, 15], "enough": [3, 6, 7, 10, 11, 12, 14, 15, 16], "usual": [3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "suspect": [3, 4, 6], "built": [3, 7, 8, 13, 16], "perspect": [3, 4, 11, 16], "hoorai": 3, "cautiou": 3, "misdiagnos": 3, "19": [3, 6, 10, 14, 15], "vast": [3, 4, 15, 16], "behav": [3, 6, 12], "principl": [3, 15], "ideal": [3, 8, 11, 16], "somehow": [3, 6, 10], "hasn": 3, "yet": [3, 6, 7, 8, 10, 11, 14, 15, 16], "rememb": [3, 6, 7, 8, 10, 12, 15, 16], "touch": [3, 15], "dai": [3, 8, 10, 14, 15], "strongli": [3, 9, 12], "whatev": [3, 4, 7, 15], "lucki": [3, 6], "perhap": [3, 6, 7, 8, 10, 11, 12, 15], "sub": [3, 10], "cancer_subtrain": 3, "cancer_valid": 3, "test_siz": 3, "acc": 3, "9252336448598131": 3, "92": [3, 6], "repeat": [3, 4, 6, 14], "84": [3, 15], "82": [3, 15], "none": [3, 4, 10, 12, 14, 16], "underli": [3, 4, 7], "reduc": [3, 4, 10, 15], "un": [3, 4], "c": [3, 7, 10], "evenli": [3, 11], "chunk": [3, 12], "iter": [3, 4, 7, 14, 15, 16], "fold": [3, 11], "cross_valid": 3, "cv": [3, 11], "convert": [3, 6, 10, 11, 15, 16], "cancer_pip": 3, "cv_5_df": 3, "fit_tim": 3, "score_tim": 3, "test_scor": 3, "004218": 3, "005888": 3, "941860": 3, "003800": 3, "005596": 3, "894118": 3, "003676": 3, "005544": 3, "858824": 3, "003792": 3, "005571": 3, "870588": 3, "003583": 3, "005472": 3, "823529": 3, "aggreg": [3, 7], "sem": 3, "uncertain": [3, 6, 11], "scope": [3, 4, 7, 11, 12, 13, 14, 15], "02": [3, 10, 15], "cv_5_metric": 3, "agg": [3, 12, 16], "003814": 3, "005614": 3, "877784": 3, "000109": 3, "000071": 3, "019656": 3, "limit": [3, 4, 10, 12, 14, 15, 16], "speed": 3, "trial": [3, 15], "cv_10": 3, "cv_10_df": 3, "cv_10_metric": 3, "004286": 3, "865947": 3, "000034": 3, "000038": 3, "019190": 3, "slightli": [3, 6, 10, 11, 12, 15], "due": [3, 4, 6, 10, 16], "reduct": 3, "dramat": 3, "cv_50_df": 3, "cv_50_metric": 3, "003634": 3, "003165": 3, "874444": 3, "000019": 3, "000011": 3, "017040": 3, "downstream": 3, "expens": [3, 10], "chemo": 3, "radiat": 3, "therapi": 3, "death": 3, "mispredict": 3, "gridsearchcv": [3, 11], "unspecifi": 3, "cancer_tune_pip": 3, "tunabl": 3, "get_param": [3, 11], "verbos": 3, "columntransformer__n_job": 3, "columntransformer__remaind": 3, "columntransformer__sparse_threshold": 3, "columntransformer__transformer_weight": 3, "columntransformer__transform": 3, "columntransformer__verbos": 3, "columntransformer__verbose_feature_names_out": 3, "columntransformer__standardscal": 3, "columntransformer__standardscaler__copi": 3, "columntransformer__standardscaler__with_mean": 3, "columntransformer__standardscaler__with_std": 3, "kneighborsclassifier__algorithm": 3, "auto": [3, 14], "kneighborsclassifier__leaf_s": 3, "30": [3, 6, 7, 10, 11, 14, 15, 16], "kneighborsclassifier__metr": 3, "minkowski": 3, "kneighborsclassifier__metric_param": 3, "kneighborsclassifier__n_job": 3, "kneighborsclassifier__n_neighbor": 3, "kneighborsclassifier__p": 3, "kneighborsclassifier__weight": 3, "wow": [3, 6, 15], "stuff": 3, "sift": 3, "muck": [3, 10], "stand": [3, 10, 11, 15], "parameter_grid": 3, "allow": [3, 4, 6, 7, 8, 9, 10, 11, 14, 15, 16], "greater": [3, 4, 10, 16], "third": 3, "skip": [3, 8, 16], "96": [3, 12, 15], "emploi": [3, 10, 11], "okai": [3, 15, 16], "param_grid": [3, 11], "cancer_tune_grid": 3, "cv_results_": [3, 11], "format": [3, 9, 10, 11, 12, 16], "accuracies_grid": 3, "mean_fit_tim": 3, "std_fit_tim": 3, "mean_score_tim": 3, "std_score_tim": 3, "param_kneighborsclassifier__n_neighbor": 3, "param": 3, "split0_test_scor": 3, "split1_test_scor": 3, "split2_test_scor": 3, "split3_test_scor": 3, "split4_test_scor": 3, "split5_test_scor": 3, "split6_test_scor": 3, "split7_test_scor": 3, "split8_test_scor": 3, "split9_test_scor": 3, "mean_test_scor": [3, 11], "17": [3, 4, 10, 12, 14, 15], "std_test_scor": [3, 11], "18": [3, 4, 6, 7, 10, 14, 15], "rank_test_scor": 3, "int32": [3, 16], "param_kneighbors_classifier__n_neighbor": 3, "unus": 3, "sem_test_scor": [3, 11], "856589": 3, "013689": 3, "853876": 3, "021710": 3, "854097": 3, "018745": 3, "019869": 3, "868272": 3, "018769": 3, "875194": 3, "017909": 3, "875249": 3, "018477": 3, "36": [3, 4, 14, 15], "889369": 3, "014765": 3, "41": [3, 11, 14, 15, 16], "891694": 3, "015519": 3, "46": [3, 4, 14, 15], "884662": 3, "014538": 3, "51": [3, 6, 14, 15], "886988": 3, "014280": 3, "56": [3, 7], "884551": 3, "016056": 3, "61": [3, 6, 7, 10], "877519": 3, "015867": 3, "66": [3, 6, 10, 11], "882281": 3, "013591": 3, "71": [3, 10], "012634": 3, "76": 3, "879900": 3, "014982": 3, "877575": 3, "015085": 3, "91": [3, 6], "015440": 3, "shortcut": [3, 8, 15], "layer": [3, 6], "accuracy_vs_k": 3, "mark_lin": [3, 4, 11, 12, 15], "85": [3, 6, 7, 10, 11, 12, 15], "neighbour": [3, 11], "highest": [3, 16], "best_params_": [3, 11], "vari": [3, 6, 11, 12, 13, 15, 16], "exact": [3, 6, 12, 15], "80": [3, 16], "justifi": [3, 15], "optim": [3, 10, 11], "decreas": [3, 4, 6, 15, 16], "reliabl": [3, 6, 8, 15], "uncertainti": [3, 6], "cost": [3, 6, 11, 12], "prohibit": [3, 11], "large_param_grid": 3, "385": 3, "large_cancer_tune_grid": 3, "large_accuracies_grid": 3, "large_accuracy_vs_k": 3, "60": [3, 6, 7], "underfit": [3, 12], "farther": [3, 15], "sort": [3, 4, 7, 8, 10, 12, 15, 16], "boundari": [3, 12], "simpler": 3, "stronger": 3, "regard": [3, 6, 7, 8, 11, 12, 16], "themselv": [3, 10, 15], "noisi": [3, 11, 15], "jag": 3, "essenti": [3, 6, 7, 8, 10, 11, 16], "problemat": [3, 8, 10, 15], "unreli": [3, 6, 12], "strike": 3, "balanc": [3, 6], "return": [3, 4, 6, 7, 10, 12, 13, 16], "put": [3, 6, 10, 11, 12, 13, 14, 16], "defin": [3, 6, 7, 9, 10, 11, 12, 15, 16], "search": [3, 4, 10, 13, 14], "retrain": [3, 11], "strength": [3, 12, 15], "weak": [3, 11, 12, 15], "nn": 3, "assumpt": [3, 4, 11, 12], "multi": 3, "slow": [3, 8, 11, 12], "treat": [3, 4, 7, 14, 15, 16], "accept": [3, 10, 11, 13, 14], "wors": [3, 7, 16], "meaning": [3, 4, 7, 10, 12, 14], "cancer_irrelev": 3, "irrelevant1": 3, "irrelevant2": 3, "30010": 3, "08690": 3, "132": [3, 6], "19740": 3, "130": [3, 6, 16], "00": [3, 6, 16], "24140": 3, "77": [3, 6], "58": [3, 15], "19800": 3, "135": [3, 6, 15], "24390": 3, "142": 3, "14400": 3, "131": 3, "09251": 3, "35140": 3, "140": [3, 6], "00000": [3, 6], "47": [3, 4, 12, 14, 15], "increasingli": [3, 10], "distanc": [3, 4, 11, 12, 15], "corrupt": 3, "surpris": 3, "outperform": 3, "combat": 3, "extra": [3, 10, 12], "nois": [3, 15], "smoothli": 3, "trend": [3, 6, 7, 11, 12, 15], "corrobor": 3, "evid": 3, "untun": 3, "scientif": [3, 11, 12, 14], "clear": [3, 4, 6, 7, 12, 14, 15, 16], "cut": 3, "obviou": [3, 8, 12, 15], "relev": [3, 10, 11, 12], "consum": [3, 6, 16], "systemat": 3, "beal": 3, "hock": 3, "lesli": 3, "moder": 3, "ab": [3, 10, 11], "bc": [3, 6, 7], "ac": 3, "abc": 3, "million": [3, 12, 15], "computation": 3, "draper": 3, "smith": 3, "1966": 3, "eforymson": 3, "straightforward": [3, 10, 15], "form": [3, 4, 6, 7, 10, 11, 12, 15, 16], "updat": [3, 4, 13, 14, 15], "big": [3, 6, 7, 10, 14, 15], "55": [3, 6, 15, 16], "caution": [3, 8, 10], "move": [3, 7, 9, 11, 12, 14, 15], "likelihood": 3, "unlucki": [3, 4], "stumbl": 3, "risk": [3, 11], "suffer": 3, "turn": [3, 4, 7, 10, 11, 12, 16], "smaller": [3, 11, 12, 15], "irrelevant3": 3, "full": [3, 6, 7, 10, 12, 14, 15, 16], "cancer_subset": 3, "sequentialfeatureselector": 3, "tri": [3, 4, 11, 12, 15], "flexibl": [3, 8, 12, 16], "resort": 3, "loop": [3, 16], "flow": 3, "mckinnei": [3, 10, 15, 16], "2012": [3, 7, 10, 15, 16], "n_total": 3, "check": [3, 7, 9, 10, 14, 15, 16], "j": [3, 7, 10], "len": [3, 10], "accuracy_dict": 3, "selected_predictor": 3, "empti": [3, 8, 14], "n_job": 3, "best_set": 3, "argmax": 3, "append": [3, 10, 15, 16], "join": [3, 10, 14], "del": [3, 15], "891103": 3, "917450": 3, "931454": 3, "926253": 3, "906955": 3, "exhibit": [3, 8], "fluctuat": [3, 11], "attempt": [3, 4, 15], "account": [3, 13, 14], "chanc": [3, 6, 13], "elbow": [3, 4], "successfulli": [3, 8, 10, 14], "judgement": 3, "excel": [3, 7, 12, 14], "tutori": [3, 8, 10, 12], "go": [3, 6, 7, 9, 10, 12, 13, 15], "jame": [3, 4, 10, 12], "great": [3, 4, 6, 7, 8, 10, 12, 14, 15], "naiv": 3, "bay": 3, "goe": [3, 7, 8, 10, 12], "popular": [3, 4, 10, 12, 14], "bkm67": 3, "martin": 3, "lansdown": 3, "mauric": 3, "georg": 3, "kendal": 3, "david": [3, 6], "mann": 3, "discard": 3, "multivari": 3, "biometrika": 3, "366": 3, "ds66": 3, "norman": 3, "harri": 3, "wilei": [3, 15], "efo66": 3, "stepwis": 3, "backward": 3, "eastern": 3, "meet": 3, "hl67": 3, "ronald": 3, "technometr": 3, "531": 3, "540": 3, "jwht13": [3, 4, 12], "gareth": [3, 4, 12], "daniela": [3, 4, 12], "witten": [3, 4, 12], "hasti": [3, 4, 12], "tibshirani": [3, 4, 12], "springer": [3, 4, 12, 15], "1st": [3, 4, 12], "edit": [3, 4, 12, 13, 15], "www": [3, 4, 7, 10, 12], "statlearn": [3, 4, 12], "com": [3, 4, 6, 10, 12, 13, 14, 15], "mck12": [3, 10, 15, 16], "ipython": [3, 10, 13, 15, 16], "o": [3, 7, 10, 13, 15, 16], "reilli": [3, 10, 15, 16], "media": [3, 8, 10, 15, 16], "inc": [3, 6, 10, 15, 16], "subgroup": [4, 7, 15, 16], "predict": [4, 7, 9, 11, 12, 15], "differenti": 4, "classif": [4, 7, 9, 11, 12], "variabl": [4, 6, 7, 8, 10, 11, 12, 15, 16], "scikit": [4, 11, 12], "colour": 4, "kmean": 4, "set": [4, 6, 8, 9, 10, 12, 14, 16], "genet": [4, 15], "ancestr": 4, "subpopul": 4, "onlin": [4, 6, 10, 13, 14, 15], "custom": [4, 15], "uncov": [4, 8, 15], "fundament": [4, 6, 7, 15], "supervis": 4, "unsupervis": 4, "imposs": [4, 6], "articl": [4, 7], "wikipedia": [4, 10], "evalu": [4, 6, 7, 12, 15], "test": [4, 6, 12, 13], "good": [4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "ascertain": 4, "rigor": [4, 11], "lloyd": 4, "1982": 4, "hierarch": 4, "princip": 4, "compon": [4, 7], "multidimension": 4, "semisupervis": 4, "goal": [4, 7, 11, 15, 16], "benefici": [4, 10], "unlabel": [4, 7], "willing": [4, 6], "seed": [4, 6, 11, 12], "palmerpenguin": 4, "horst": 4, "2020": [4, 6, 7, 15], "kristen": 4, "gorman": 4, "palmer": 4, "station": [4, 15], "antarctica": [4, 15], "ecolog": 4, "site": [4, 5, 10], "adult": 4, "penguin": 4, "2014": [4, 16], "bill": 4, "flipper": 4, "millimet": 4, "distinct": [4, 8, 15], "speci": 4, "discoveri": [4, 12], "gentoo": 4, "bill_length_mm": 4, "flipper_length_mm": 4, "39": [4, 14, 15], "196": [4, 6, 7, 11], "182": 4, "187": [4, 6, 11], "190": [4, 11, 16], "195": [4, 7, 16], "193": [4, 11], "213": [4, 7, 10, 15, 16], "215": [4, 16], "220": [4, 16], "49": [4, 14, 15], "208": 4, "52": [4, 14], "197": 4, "189": [4, 6], "penguins_standard": 4, "bill_length_standard": 4, "flipper_length_standard": 4, "641361": 4, "189773": 4, "144917": 4, "328412": 4, "517922": 4, "921755": 4, "107617": 4, "846513": 4, "409743": 4, "677761": 4, "238168": 4, "271104": 4, "902464": 4, "433767": 4, "720106": 4, "192860": 4, "645505": 4, "355522": 4, "962559": 4, "440353": 4, "762179": 4, "205012": 4, "111528": 4, "123299": 4, "786203": 4, "626855": 4, "757407": 4, "783170": 4, "108442": 4, "776057": 4, "759092": 4, "subtyp": 4, "scatter_plot": 4, "meaningless": 4, "etc": [4, 6, 7, 10, 14, 15, 16], "adjust": [4, 15], "sum": [4, 11, 16], "wssd": 4, "intertia": 4, "mu_x": 4, "mu_i": 4, "x_1": 4, "x_2": 4, "x_3": 4, "x_4": 4, "y_1": 4, "y_2": 4, "y_3": 4, "y_4": 4, "35": [4, 7, 14, 15, 16], "far": [4, 12, 14, 15, 16], "variant": 4, "minim": [4, 11, 12, 15], "reassign": 4, "longer": [4, 7, 16], "outlin": [4, 7, 10, 15, 16], "termin": [4, 13], "fourth": 4, "onward": [4, 10, 13, 15], "guarante": [4, 13], "forev": 4, "logic": [4, 7, 10, 16], "finit": [4, 6, 15], "unlik": [4, 6, 10, 11, 15], "stuck": [4, 8, 16], "solut": [4, 6, 7], "poor": [4, 10], "lowest": [4, 10, 15], "cross": [4, 11, 12], "valid": [4, 9, 10, 11, 12], "subdivid": 4, "merg": [4, 10], "diminish": 4, "reach": [4, 10, 12, 14, 15], "being": [4, 6, 7, 8, 10, 11, 14, 15, 16], "address": [4, 7, 10, 11, 12, 14], "preprocess": [4, 11], "n_cluster": 4, "kmeanskmean": 4, "penguin_clust": 4, "labels_": 4, "altern": [4, 7, 12, 14, 15, 16], "suffix": [4, 15], "nomin": [4, 15], "discret": [4, 15], "cluster_plot": 4, "inertia_": 4, "inertia": 4, "730719092276117": 4, "varieti": [4, 10, 12, 14, 16], "ks": 4, "oper": [4, 6, 7, 10, 13, 14, 15], "safest": 4, "reus": 4, "penguin_clust_k": 4, "000000": 4, "576264": 4, "730719": 4, "343613": 4, "362131": 4, "678383": 4, "293320": 4, "975016": 4, "785232": 4, "elbow_plot": 4, "bump": [4, 15], "prevent": [4, 7, 8, 10, 15, 16], "n_init": 4, "paramet": [4, 6, 10, 11, 12, 15, 16], "realm": 4, "specif": [4, 10, 11, 13, 14, 15], "companion": [4, 10], "pca": 4, "gwf14": 4, "toni": 4, "fraser": 4, "sexual": 4, "dimorph": 4, "commun": [4, 7, 10, 15], "ntarctic": 4, "genu": 4, "emph": 4, "pygosc": 4, "plo": [4, 14], "ONE": 4, "hhg20": 4, "allison": 4, "alison": 4, "hill": [4, 10], "archipelago": 4, "allisonhorst": 4, "io": [4, 7, 10, 15], "llo82": 4, "stuart": 4, "quantiz": 4, "pcm": 4, "129": 4, "137": [4, 6, 7, 12], "releas": [4, 10], "bell": [4, 6], "telephon": 4, "paper": [4, 15], "1957": 4, "web": [5, 8, 14], "navig": [5, 7, 8, 10, 13, 14], "mobil": 5, "devic": [5, 10], "menu": [5, 7, 8, 13], "datasciencebook": [5, 9, 10, 13], "ca": [5, 7, 9, 10, 11, 12, 13], "licens": 5, "creativ": 5, "noncommerci": 5, "sharealik": 5, "popul": [6, 7, 10, 15, 16], "extend": [6, 12, 15], "inferenti": [6, 7, 9, 11, 15], "interv": 6, "approxim": 6, "broader": 6, "retail": 6, "sell": 6, "iphon": 6, "accessori": 6, "market": [6, 11, 12], "strateg": 6, "product": [6, 7, 14], "north": [6, 10, 15], "american": [6, 7, 10], "colleg": 6, "campus": 6, "america": [6, 15], "owner": [6, 10, 12], "characterist": [6, 7, 10, 15, 16], "costli": 6, "taken": [6, 7, 11, 14, 15], "canada": [6, 7, 10, 15, 16], "apart": [6, 10, 15], "rent": 6, "budget": [6, 11], "studio": 6, "rental": [6, 10], "price": [6, 10, 11, 12], "month": [6, 14, 15], "monthli": 6, "airbnb": 6, "cox": 6, "marketplac": 6, "vacat": 6, "septemb": [6, 15], "neighborhood": 6, "room": 6, "accommod": 6, "bathroom": 6, "bedroom": [6, 10, 11, 12], "bed": [6, 11, 12], "night": 6, "neighbourhood": 6, "room_typ": 6, "downtown": 6, "home": [6, 7, 10, 11, 12, 13, 15, 16], "apt": [6, 13], "bath": [6, 11], "150": [6, 12, 15], "eastsid": 6, "west": 6, "kensington": 6, "cedar": 6, "cottag": 6, "146": [6, 12], "110": 6, "4589": 6, "4590": 6, "4591": 6, "oakridg": 6, "privat": [6, 10, 14], "4592": 6, "dunbar": 6, "southland": 6, "share": [6, 8, 10, 14, 15, 16], "29": [6, 14, 15, 16], "4593": 6, "145": 6, "4594": 6, "shaughnessi": 6, "citi": [6, 7, 10, 11, 16], "plan": [6, 14], "bylaw": 6, "747497": 6, "246408": 6, "005224": 6, "hotel": 6, "000871": 6, "747": 6, "155": [6, 16], "725": 6, "250": [6, 11, 16], "025": 6, "625": 6, "350": [6, 11, 12, 16], "confirm": [6, 14, 15], "histogram": 6, "000": [6, 7, 10, 11, 12, 15], "20_000": 6, "605": 6, "606": 6, "marpol": 6, "4579": 6, "4580": 6, "160": [6, 11], "1739": 6, "1740": 6, "151": [6, 7, 15], "3904": 6, "3905": 6, "185": [6, 16], "1596": 6, "1597": 6, "kitsilano": 6, "3060": 6, "3061": 6, "hast": 6, "sunris": 6, "78": 6, "19999": 6, "527": 6, "528": 6, "1587": 6, "1588": 6, "169": 6, "3860": 6, "3861": 6, "2747": 6, "2748": 6, "285": 6, "800000": 6, "0000": 6, "999": 6, "queri": [6, 10], "qualifi": 6, "750": [6, 15], "775": 6, "225": [6, 10], "19998": 6, "700": [6, 16], "275": 6, "44552": 6, "reset_index": [6, 16], "caveat": [6, 15, 16], "twice": [6, 12], "sample_proport": 6, "44547": 6, "44548": 6, "44549": 6, "44550": 6, "44551": 6, "sample_estim": 6, "675": 6, "44541": 6, "19995": 6, "44543": 6, "19996": 6, "44545": 6, "19997": 6, "20000": 6, "mind": [6, 7, 10, 14], "sampling_distribut": 6, "mark_bar": [6, 7, 15], "bin": [6, 15], "maxbin": [6, 15], "symmetr": 6, "peak": [6, 15], "74848375": 6, "748": [6, 11], "neither": [6, 11, 15], "nor": [6, 8, 12], "underestim": 6, "tendenc": 6, "travel": 6, "wish": [6, 7, 14], "overpr": [6, 11], "population_distribut": 6, "skew": 6, "tail": [6, 10], "154": 6, "5109773617762": 6, "one_sampl": 6, "sample_distribut": 6, "153": 6, "48225": 6, "48": [6, 7, 14, 15], "wouldn": [6, 14], "alreadi": [6, 7, 8, 9, 10, 11, 12, 13, 15, 16], "mean_pric": 6, "148": 6, "56075": 6, "165": [6, 16], "50500": 6, "93925": 6, "139": 6, "14650": 6, "198": 6, "50000": 6, "192": 6, "66425": 6, "144": 6, "88600": 6, "08800": 6, "156": 6, "25000": 6, "170": 6, "mean_of_sample_mean": 6, "sample_mean": 6, "disappear": 6, "thumb": [6, 15], "emphasi": 6, "saw": [6, 10, 16], "significantli": [6, 7, 8, 12, 15, 16], "notion": [6, 11], "pretend": 6, "clever": 6, "drawn": [6, 12, 15], "median": [6, 15, 16], "slope": [6, 12], "displai": [6, 7, 8, 10, 12, 14, 15, 16], "4025": 6, "4026": 6, "renfrew": 6, "collingwood": 6, "1977": [6, 15], "1978": 6, "fairview": 6, "70": [6, 10, 15, 16], "4008": 6, "4009": 6, "269": [6, 15], "1543": 6, "1544": 6, "320": 6, "3350": 6, "3351": 6, "804": 6, "805": 6, "mount": 6, "pleasant": 6, "2286": 6, "2287": 6, "105": [6, 7, 10, 15, 16], "1010": 6, "1011": 6, "strathcona": 6, "120": [6, 7, 10, 16], "1878": 6, "1879": [6, 15], "175": 6, "1644": 6, "1645": 6, "2771": 6, "2772": 6, "4151": 6, "4152": 6, "289": 6, "4495": 6, "4496": 6, "rilei": 6, "park": [6, 15], "115": 6, "1308": 6, "1309": 6, "2246": 6, "2247": 6, "2335": 6, "2336": 6, "4059": 6, "4060": 6, "1280": 6, "1281": 6, "4324": 6, "4325": 6, "3403": 6, "3404": 6, "arbutu": 6, "ridg": 6, "664": 6, "1729": 6, "1730": 6, "93": [6, 15], "3722": 6, "3723": 6, "241": 6, "242": 6, "3955": 6, "3956": 6, "1042": 6, "1043": 6, "649": 6, "650": [6, 15], "sunset": 6, "1995": [6, 15], "1996": 6, "363": 6, "364": 6, "1783": 6, "1784": 6, "806": 6, "254": 6, "255": 6, "3365": 6, "3366": 6, "4562": 6, "4563": 6, "64": [6, 10, 11, 13], "2124": 6, "2125": 6, "200": [6, 7, 10, 11, 15], "1997": 6, "1998": 6, "257": 6, "4329": 6, "4330": [6, 16], "3408": 6, "3409": 6, "635": 6, "636": 6, "grandview": 6, "woodland": 6, "103": [6, 16], "one_sample_dist": 6, "boot1": 6, "boot1_dist": 6, "ident": [6, 7, 10], "mimic": 6, "break": [6, 10, 11, 12], "boot20000": 6, "six": [6, 7, 9, 11, 15, 16], "six_bootstrap_sampl": 6, "height": [6, 12, 15], "facet": [6, 15], "67175": 6, "42500": 6, "149": [6, 7, 12], "35000": 6, "13225": 6, "179": [6, 7], "79675": 6, "188": 6, "28225": 6, "boot20000_mean": 6, "159": 6, "29675": 6, "136": [6, 12], "55725": 6, "161": 6, "93950": 6, "22500": 6, "boot_est_dist": 6, "resampl": 6, "repeatedli": 6, "percentil": [6, 16], "captur": [6, 10, 12, 15], "narrow": [6, 10, 16], "implic": 6, "comfort": [6, 14], "strict": [6, 7], "unhelp": 6, "life": [6, 7], "deadli": 6, "ascend": [6, 7, 15], "bound": [6, 15], "97": [6, 12, 15], "quantil": 6, "express": [6, 15, 16], "5th": 6, "975": 6, "ci_bound": 6, "121": [6, 11], "607069": 6, "191": [6, 7], "525362": 6, "rule_025": 6, "mark_rul": [6, 11, 15], "f58518": 6, "strokedash": [6, 11, 15], "datum": [6, 15], "width": [6, 15], "text_025": 6, "mark_text": 6, "fontweight": 6, "bold": [6, 8], "dy": 6, "f": [6, 7, 10, 11, 13], "text_975": 6, "rule_975": 6, "finish": [6, 8, 9, 10, 13, 14, 15], "journei": 6, "surfac": [6, 11, 12, 15], "foundat": [6, 7, 10, 12], "openintro": 6, "diez": 6, "2019": [6, 15], "solid": [6, 15], "grasp": 6, "natur": [6, 14, 15, 16], "coxd": 6, "murrai": 6, "insideairbnb": 6, "09": [6, 10, 15], "01": [6, 10, 15, 16], "dccetinkayarb19": 6, "\u00e7": 6, "etinkaya": 6, "rundel": 6, "christoph": 6, "barr": 6, "os": [6, 8], "dirti": 7, "clean": [7, 9, 10], "dig": [7, 10, 16], "jump": [7, 9, 10, 15], "spoken": [7, 15, 16], "resid": [7, 15], "indigen": 7, "cultur": 7, "anywher": [7, 8], "2018": [7, 15], "sadli": 7, "colon": [7, 16], "led": [7, 15], "loss": 7, "children": 7, "speak": [7, 10, 15, 16], "mother": [7, 15, 16], "tongu": [7, 15, 16], "childhood": 7, "residenti": [7, 11], "discov": 7, "act": [7, 14, 15, 16], "harm": 7, "endang": 7, "geograph": 7, "walker": 7, "2017": [7, 14], "came": [7, 11, 15], "aborigin": [7, 10, 15, 16], "truth": 7, "reconcili": 7, "commiss": 7, "action": 7, "2015": 7, "canlang": [7, 10, 15], "2016": [7, 10, 15, 16], "censu": [7, 10, 15, 16], "214": [7, 10, 15, 16], "offici": [7, 10, 15, 16], "mother_tongu": [7, 10, 15, 16], "expos": 7, "birth": 7, "most_at_hom": [7, 10, 15, 16], "most_at_work": [7, 10, 15, 16], "lang_known": [7, 10, 15, 16], "accord": [7, 10, 15, 16], "deep": [7, 12], "simplifi": [7, 10, 16], "concentr": [7, 15], "expertis": 7, "bias": 7, "aim": [7, 9, 15], "causal": [7, 11, 15], "mechanist": [7, 15], "leek": 7, "matsui": 7, "earli": [7, 9], "live": [7, 10, 15], "provinc": [7, 10], "territori": 7, "propos": 7, "hypothes": [7, 15], "polit": 7, "parti": 7, "wealth": [7, 15], "elect": 7, "quantif": 7, "factor": [7, 15], "mechan": [7, 10, 11], "pertain": [7, 15, 16], "occasion": [7, 13, 16], "race": [7, 11, 12], "runner": 7, "regularli": [7, 8], "graphic": [7, 8, 10, 13, 14, 15], "ag": 7, "old": [7, 10, 14], "50kg": 7, "cluster": [7, 9, 15], "bought": 7, "amazon": 7, "cellphon": 7, "ownership": 7, "android": 7, "phone": 7, "essenc": 7, "spreadsheet": [7, 10], "microsoft": 7, "rectangular": 7, "primarili": [7, 11, 14, 15], "voter": 7, "affili": 7, "comma": [7, 8, 11, 16], "short": [7, 10, 15], "save": [7, 10, 13, 14], "googl": [7, 10], "sheet": [7, 10], "can_lang": [7, 10, 15, 16], "plain": [7, 8, 14], "editor": [7, 8, 10, 14], "notepad": 7, "590": [7, 10, 15], "235": [7, 10, 15, 16], "665": [7, 10, 15], "afrikaan": [7, 10, 15, 16], "10260": [7, 10, 15], "4785": [7, 10, 15], "23415": [7, 10, 15], "afro": [7, 10, 15, 16], "asiat": [7, 10, 15, 16], "1150": [7, 10, 15], "44": [7, 10, 14, 15], "akan": [7, 10, 15, 16], "twi": [7, 10, 15, 16], "13460": [7, 10, 15], "5985": [7, 10, 15], "22150": [7, 10, 15], "albanian": [7, 10, 15, 16], "26895": [7, 10, 15], "13135": [7, 10, 15], "345": [7, 10, 15], "31930": [7, 10, 15], "algonquian": [7, 10, 16], "algonquin": [7, 10, 16], "1260": [7, 10], "370": [7, 10, 16], "2480": [7, 10], "sign": [7, 10, 11, 14, 15], "2685": [7, 10], "3020": [7, 10], "1145": [7, 10], "amhar": [7, 10], "22465": [7, 10], "12785": [7, 10], "33670": [7, 10], "instal": [7, 8, 9, 10, 13], "team": [7, 14], "es": 7, "innei": 7, "2010": 7, "command": [7, 8, 10, 13], "shorter": [7, 8, 10, 14, 15], "alia": [7, 8], "gave": [7, 10], "harder": [7, 15, 16], "quot": [7, 10], "letter": [7, 13, 14], "distinguish": [7, 15], "satisfi": [7, 10], "syntax": [7, 10, 14, 16], "amp": [7, 10, 15, 16], "445": [7, 10, 15, 16], "2775": [7, 10, 15], "209": [7, 10, 15, 16], "wolof": [7, 10, 15, 16], "3990": [7, 10, 15], "1385": [7, 10, 15], "8240": [7, 10, 15], "210": [7, 10, 12, 15, 16], "wood": [7, 10, 15, 16], "cree": [7, 10, 15, 16], "1840": [7, 10, 15], "800": [7, 10, 15], "2665": [7, 10, 15], "211": [7, 10, 11, 15, 16], "wu": [7, 10, 15, 16], "shanghaines": [7, 10, 15, 16], "12915": [7, 10, 15], "7650": [7, 10, 15], "16530": [7, 10, 15], "yiddish": [7, 10, 15, 16], "13555": [7, 10, 15], "7085": [7, 10, 15], "895": [7, 10, 15], "20985": [7, 10, 15], "yoruba": [7, 10, 15, 16], "9080": [7, 10, 15], "2615": [7, 10, 15], "22415": [7, 10, 15], "screen": [7, 8, 10], "symbol": [7, 13, 15, 16], "string": [7, 10, 14, 15, 16], "my_numb": 7, "alic": 7, "formal": 7, "_": [7, 8, 15, 16], "won": [7, 10, 12, 14, 16], "complain": 7, "my": [7, 8], "syntaxerror": 7, "mayb": [7, 10], "meant": 7, "convent": [7, 8, 14], "lowercas": [7, 14], "language_data": 7, "pep": 7, "guido": 7, "van": 7, "rossum": 7, "2001": 7, "minut": [7, 8, 12, 15], "underneath": [7, 8], "ve": [7, 10, 14], "largest": [7, 10, 15, 16], "sophist": 7, "restrict": [7, 12, 16], "bracket": [7, 8, 11, 16], "statement": [7, 10, 16], "written": [7, 8, 10, 14], "doubl": [7, 8, 9, 13, 15, 16], "athabaskan": [7, 10, 16], "atikamekw": [7, 10, 16], "6150": [7, 10], "5465": 7, "1100": 7, "6645": 7, "thompson": [7, 10], "ntlakapamux": [7, 10], "335": [7, 10], "450": 7, "tlingit": [7, 10], "260": 7, "tsimshian": [7, 10], "410": 7, "206": 7, "wakashan": [7, 10], "67": [7, 10, 11, 15], "aboriginal_lang": 7, "alias": 7, "wrote": 7, "terminolog": 7, "obj": 7, "programm": 7, "confus": [7, 10, 16], "appar": 7, "rescu": 7, "selected_lang": 7, "descend": [7, 15], "decend": 7, "arranged_lang": 7, "64050": 7, "inuktitut": 7, "35210": 7, "138": 7, "ojibwai": 7, "17885": 7, "oji": 7, "12855": 7, "dene": 7, "10700": 7, "32": [7, 14, 15, 16], "cayuga": 7, "squamish": 7, "iroquoian": 7, "ten_lang": 7, "125": [7, 16], "montagnai": 7, "innu": 7, "10235": 7, "119": 7, "mi": [7, 15], "kmaq": 7, "6690": 7, "3065": 7, "180": 7, "stonei": 7, "3025": 7, "becam": 7, "curiou": 7, "728": [7, 15], "canadian_popul": [7, 15], "overwrit": 7, "opt": [7, 10, 11], "mother_tongue_perc": [7, 15], "35_151_728": [7, 15], "35151728": 7, "latter": [7, 11], "clearer": [7, 15], "182210": 7, "100166": 7, "050879": 7, "036570": 7, "030439": 7, "029117": 7, "019032": 7, "017496": 7, "008719": 7, "008606": 7, "ten_lang_perc": 7, "008": 7, "temporari": [7, 14, 16], "arranged_lang_sort": 7, "trace": [7, 8], "split": [7, 11, 12, 15], "rewrit": 7, "unwieldi": 7, "parenthesi": 7, "demonstr": [7, 10, 11, 12, 15, 16], "cleaner": 7, "messi": [7, 14, 16], "pars": [7, 10, 15], "block": [7, 10], "piec": 7, "period": [7, 8, 10, 15], "Not": [7, 16], "feed": 7, "redo": 7, "overwhelm": 7, "debug": 7, "midwai": 7, "audienc": [7, 8, 14, 15], "difficulti": 7, "scrutin": 7, "speaker": [7, 15, 16], "convei": [7, 15], "understood": 7, "tidi": 7, "shortli": 7, "ax": [7, 15], "mark": [7, 10, 14, 15], "channel": [7, 10, 11, 14, 15], "barplot_mother_tongu": 7, "refin": [7, 10], "quotat": [7, 10], "modif": [7, 16], "tackl": 7, "rotat": 7, "swap": [7, 15], "barplot_mother_tongue_axi": 7, "forward": [7, 10, 11], "suit": [7, 15, 16], "alphabet": [7, 15, 16], "reorder": 7, "ordered_barplot_mother_tongu": 7, "swampi": 7, "elsewher": [7, 10], "moos": 7, "northern": 7, "east": 7, "southern": 7, "comment": [7, 14], "hash": [7, 14], "importantli": 7, "self": [7, 10], "habit": [7, 11], "highli": [7, 14], "got": 7, "tast": 7, "ten_lang_plot": 7, "nobodi": 7, "pull": [7, 10, 13], "forgotten": [7, 14], "pop": [7, 8, 10], "slowli": 7, "adept": 7, "remind": [7, 16], "lab": [7, 13], "lookup": 7, "concis": 7, "press": [7, 8], "tab": [7, 8, 10, 13, 14], "bring": [7, 10], "typo": 7, "hold": [7, 10, 15, 16], "dialogu": 7, "dialog": [7, 14], "contextu": 7, "gvr01": 7, "coghlan": 7, "barri": [7, 16], "warsaw": 7, "style": [7, 10], "0008": 7, "lp15": 7, "jeffrei": [7, 15], "347": 7, "6228": 7, "1314": 7, "1315": 7, "pm15": 7, "elizabeth": 7, "art": [7, 15], "anyon": [7, 8, 10, 14], "skybrud": 7, "consult": [7, 10, 14], "llc": 7, "bookdown": 7, "rdpeng": 7, "artofdatasci": 7, "tim20": [7, 15], "ttimber": [7, 10, 15], "wal17": 7, "anada": 7, "canadiangeograph": 7, "wil18": 7, "kori": 7, "bccampu": 7, "opentextbc": 7, "indigenizationfound": 7, "statisticscanada16a": 7, "www12": 7, "statcan": 7, "gc": 7, "recens": 7, "dp": 7, "eng": 7, "cfm": 7, "statisticscanada16b": 7, "borigin": 7, "irst": 7, "ation": 7, "\u00e9ti": 7, "nuit": 7, "sa": 7, "2016022": 7, "x2016022": 7, "statisticscanada18": 7, "evolut": 7, "1901": 7, "www150": 7, "n1": 7, "pub": 7, "630": 7, "x2018001": 7, "htm": 7, "thepdteam20": 7, "dev": 7, "februari": 7, "doi": [7, 15], "5281": 7, "zenodo": 7, "3509134": 7, "trutharcocanada12": 7, "public": [7, 14], "govern": 7, "servic": [7, 10, 14], "trutharcocanada15": 7, "ction": 7, "www2": 7, "gov": [7, 10, 15], "asset": 7, "columbian": 7, "calls_to_action_english2": 7, "pdf": [7, 15], "wesmckinney10": 7, "ata": 7, "tructur": 7, "tatist": 7, "omput": 7, "p": [7, 10, 13], "ython": 7, "t\u00e9fan": 7, "der": 7, "arrod": 7, "illman": 7, "roceed": 7, "9th": 7, "cienc": 7, "onfer": 7, "25080": 7, "majora": 7, "92bf1922": 7, "00a": 7, "interleav": 8, "narrat": 8, "platform": [8, 14], "interfac": [8, 13, 14], "dress": 8, "morn": 8, "configur": [8, 9, 13, 14], "mix": [8, 16], "formatt": 8, "artifact": 8, "analyz": [8, 9, 10, 16], "realiti": [8, 12], "consciou": [8, 14], "screenshot": 8, "easiest": [8, 13], "jupyterhub": [8, 14], "provis": 8, "authent": [8, 14], "gain": [8, 10], "instructor": [8, 9], "refer": 8, "independ": [8, 9, 15], "entireti": 8, "activ": [8, 10], "cursor": 8, "rectangl": [8, 15], "toolbar": [8, 10], "keyboard": [8, 14], "enter": [8, 10, 13, 14, 15], "arrow": [8, 14], "restart": [8, 13], "bar": [8, 10, 12, 13], "slight": [8, 11], "session": [8, 13, 14], "delet": [8, 13, 14], "emul": 8, "window": [8, 10], "statu": 8, "idl": 8, "busi": 8, "excess": 8, "unrespons": 8, "lose": 8, "connect": [8, 10, 12, 13, 14, 15], "interrupt": 8, "paus": 8, "server": [8, 10, 14], "hub": 8, "panel": 8, "shut": [8, 13], "rich": [8, 14], "italic": 8, "bullet": [8, 10], "eventu": [8, 10, 15], "unformat": 8, "unrend": 8, "box": [8, 11, 12, 13, 14], "progress": [8, 13], "autosav": 8, "disk": [8, 10], "icon": [8, 10, 13, 14], "mac": 8, "arbitrari": [8, 15], "downsid": [8, 13], "nonlinear": [8, 12, 15], "deliber": [8, 14], "referenc": 8, "unconvent": 8, "fail": 8, "nonfunct": 8, "scenario": [8, 10], "event": [8, 14], "guard": 8, "awar": [8, 14], "sooner": 8, "linearli": [8, 12], "suffici": [8, 15], "extern": [8, 14], "heavili": 8, "loc": [8, 15], "qualiti": [8, 11, 12], "package_nam": 8, "pn": 8, "librari": [8, 10, 15], "hidden": [8, 10], "delimit": 8, "ipynb": [8, 10, 14], "shareabl": 8, "firefox": 8, "safari": 8, "chrome": 8, "edg": 8, "adob": 8, "acrobat": 8, "benefit": [8, 10, 14, 16], "standalon": 8, "font": [8, 10, 15], "launcher": 8, "visibl": [8, 14, 15], "untitl": 8, "white": 8, "troublesom": [8, 14], "repetit": 8, "dash": [8, 15], "jupyterlab": 8, "keen": 8, "commonmark": 8, "cheatsheet": 8, "audit": 9, "friend": 9, "colleagu": 9, "histori": [9, 14], "chapter": 9, "spend": [9, 10, 11, 16], "restructur": 9, "usabl": 9, "coher": 9, "variou": [10, 13, 16], "laptop": [10, 14], "gatewai": 10, "unless": [10, 13, 15], "upfront": [10, 16], "devot": 10, "shoelac": 10, "trip": 10, "u": [10, 13, 15], "niform": 10, "esourc": 10, "ocat": 10, "filepath_or_buff": 10, "skiprow": 10, "ibi": 10, "list_tabl": 10, "to_csv": 10, "pplicat": 10, "rogram": 10, "nterfac": 10, "internet": [10, 13], "astronomi": 10, "pictur": [10, 15], "request": [10, 16], "remot": 10, "directori": [10, 13, 14, 15], "filesystem": 10, "folder": [10, 13, 14], "worksheet_02": 10, "happiness_report": 10, "slash": [10, 16], "proce": [10, 13, 14, 16], "happy_data": 10, "bike_shar": 10, "tutorial_01": 10, "silli": [10, 12], "redund": [10, 15], "whew": 10, "bonu": 10, "fatima": 10, "jayden": 10, "usernam": [10, 14], "link": [10, 13, 14], "video": [10, 13], "omma": 10, "epar": 10, "v": [10, 13], "alu": 10, "aren": [10, 15, 16], "canadian": [10, 16], "canlang_data": 10, "oftentim": [10, 16], "sentenc": 10, "paragraph": [10, 15], "scientist": 10, "distribut": [10, 14, 15], "permiss": [10, 14], "21930": 10, "parsererror": 10, "messag": [10, 13, 14, 15, 16], "wasn": [10, 15], "can_lang_meta": 10, "token": 10, "didn": [10, 16], "tsv": 10, "escap": 10, "backslash": 10, "can_lang_no_nam": 10, "curli": [10, 16], "brace": 10, "col_map": 10, "canlang_data_renam": 10, "immedi": [10, 12], "raw": [10, 13, 15, 16], "githubusercont": [10, 13], "datasci": 10, "whichev": 10, "xlsx": 10, "snippet": [10, 14], "_rel": 10, "j1": 10, "w8": 10, "qrj": 10, "tf": 10, "wz": 10, "hlio": 10, "8f": 10, "3wn": 10, "ed2": 10, "gz": 10, "_r": 10, "yg": 10, "tuee": 10, "6q": 10, "rzy": 10, "l60": 10, "xtp": 10, "4vt": 10, "jq": 10, "sheet_nam": 10, "sad": 10, "usecol": 10, "beforehand": 10, "libr": 10, "offic": 10, "semicolon": 10, "decim": [10, 15, 16], "european": 10, "countri": 10, "storag": 10, "user": [10, 13, 14], "manag": [10, 13, 14], "mysql": 10, "oracl": 10, "sql": 10, "simplest": [10, 15], "db": 10, "backend": 10, "send": [10, 14], "sqlalchemi": 10, "matur": 10, "deeper": 10, "friendlier": 10, "conn": 10, "retriev": [10, 11, 14, 16], "secretli": 10, "behind": [10, 14, 15], "scene": [10, 14], "canlang_t": 10, "databaset": 10, "r0": 10, "countstar": 10, "haven": [10, 13], "sent": [10, 14], "effici": [10, 12, 14, 15], "lazi": 10, "compil": 10, "str": 10, "AS": 10, "nfrom": 10, "t0": 10, "arab": 10, "419890": 10, "223535": 10, "5585": 10, "629055": 10, "mostli": [10, 14, 15, 16], "canlang_table_filt": 10, "predic": 10, "canlang_table_select": 10, "r1": 10, "aboriginal_lang_data": 10, "attributeerror": 10, "traceback": 10, "recent": [10, 13, 14], "conda": [10, 13], "lib": 10, "python3": 10, "expr": 10, "py": [10, 13, 16], "645": 10, "__getattr__": 10, "641": 10, "hint": 10, "common_typo": 10, "642": 10, "rais": [10, 15], "643": 10, "__name__": 10, "644": 10, "tahltan": 10, "crash": 10, "postgr": 10, "client": [10, 11], "host": [10, 13, 14], "localhost": 10, "port": [10, 13], "endpoint": 10, "5432": 10, "password": [10, 14], "can_mov_db": 10, "movi": 10, "fakeserv": 10, "stat": 10, "user0001": 10, "abc123": 10, "theme": [10, 15], "medium": [10, 14], "title_alias": 10, "episod": 10, "names_occup": 10, "occup": 10, "rate": 10, "ratings_t": 10, "alchemyt": 10, "average_r": 10, "num_vot": 10, "avg_rat": 10, "order_bi": 10, "backup": 10, "integr": 10, "secur": [10, 14], "simultan": [10, 14, 16], "conflict": 10, "billion": 10, "daili": 10, "chao": 10, "ensu": 10, "no_official_lang_data": 10, "no_official_languag": 10, "magic": 10, "uncommon": 10, "secret": [10, 14], "somewhat": [10, 12], "thought": [10, 12, 16], "painstak": 10, "gather": [10, 15], "yper": 10, "ext": 10, "arkup": 10, "anguag": 10, "ascad": 10, "tyle": 10, "heet": 10, "webpag": [10, 14], "wherea": [10, 12, 16], "element": [10, 15, 16], "layout": [10, 15], "subsect": 10, "richardson": 10, "2007": 10, "reitz": 10, "2023": 10, "foot": [10, 11, 12], "craiglist": 10, "craigslist": 10, "advertis": [10, 11, 12], "span": 10, "meta": 10, "hous": [10, 11, 12], "1br": 10, "hood": 10, "13768": 10, "108th": 10, "avenu": 10, "maptag": 10, "pid": 10, "6786042973": 10, "banish": 10, "trash": [10, 13], "hide": [10, 15], "post": [10, 14], "unbanish": 10, "href": 10, "restor": 10, "2285": 10, "oof": 10, "date": [10, 14, 15], "keyword": [10, 16], "grab": 10, "complex": [10, 12, 14, 15], "selectorgadget": 10, "cc": 10, "deselect": 10, "pic": 10, "footag": 10, "gadget": 10, "robot": 10, "txt": [10, 14], "cl": 10, "spider": 10, "script": 10, "scraper": 10, "crawler": 10, "explicit": [10, 16], "realist": 10, "disallow": 10, "td": 10, "nth": 10, "child": [10, 12], "largestc": 10, "target": 10, "bs4": 10, "wiki": 10, "en": 10, "parser": 10, "population_nod": 10, "slice": [10, 15, 16], "clariti": [10, 15], "greater_toronto_area": 10, "202": 10, "london": [10, 16], "_ontario": 10, "ontario": 10, "543": 10, "551": 10, "greater_montr": 10, "montreal": [10, 16], "node": 10, "rid": 10, "get_text": 10, "fantast": 10, "albeit": 10, "canada_wiki_t": 10, "metropolitan": [10, 16], "droplevel": 10, "canada_wiki_df": 10, "rank": 10, "unnam": 10, "8_level_1": 10, "9_level_1": 10, "6202225": 10, "543551": 10, "quebec": 10, "4291732": 10, "halifax": [10, 16], "nova": 10, "scotia": 10, "465703": 10, "2642825": 10, "st": [10, 16], "catharin": [10, 16], "niagara": [10, 16], "433604": 10, "ottawa": [10, 16], "gatineau": [10, 16], "1488307": 10, "windsor": [10, 16], "422630": 10, "calgari": [10, 16], "1481806": 10, "oshawa": 10, "415311": 10, "edmonton": [10, 16], "1418118": 10, "victoria": [10, 15, 16], "397237": 10, "839311": 10, "saskatoon": 10, "saskatchewan": 10, "317480": 10, "winnipeg": [10, 16], "manitoba": 10, "834678": 10, "regina": [10, 16], "249217": 10, "hamilton": 10, "785184": 10, "sherbrook": 10, "227398": 10, "kitchen": [10, 16], "cambridg": [10, 16], "waterloo": [10, 16], "575847": 10, "kelowna": [10, 16], "222162": 10, "desktop": 10, "stun": 10, "rho": 10, "ophiuchi": 10, "juli": 10, "webb": 10, "telescop": 10, "nircam": 10, "molecular": [10, 15], "signup": 10, "safe": [10, 14], "transfer": [10, 11], "infinit": 10, "bandwidth": 10, "frequent": [10, 14], "success": [10, 14], "bog": 10, "revok": 10, "grant": 10, "quota": 10, "overrun": 10, "abid": 10, "hourli": 10, "hour": [10, 11], "planetari": 10, "apod": 10, "api_kei": 10, "your_api_kei": 10, "07": [10, 15], "explan": [10, 15], "mere": 10, "390": 10, "light": 10, "sun": [10, 15], "star": 10, "planet": 10, "peer": 10, "natal": 10, "infrar": 10, "spectacular": 10, "cosmic": 10, "snapshot": [10, 13, 14], "celebr": 10, "young": 10, "brighter": 10, "clearli": [10, 15], "sport": 10, "diffract": 10, "spike": 10, "jet": 10, "shock": 10, "hydrogen": 10, "blast": 10, "newborn": 10, "yellowish": 10, "dusti": 10, "caviti": 10, "carv": 10, "energet": 10, "Near": 10, "shadow": 10, "cast": 10, "protoplanetari": 10, "hdurl": 10, "2307": 10, "stsci": 10, "01_rhooph": 10, "png": [10, 15], "media_typ": 10, "service_vers": 10, "v1": 10, "01_rhooph1024": 10, "neat": 10, "json": 10, "javascript": 10, "notat": [10, 16], "nasa_data_singl": 10, "start_dat": 10, "end_dat": 10, "nasa_data": 10, "74": [10, 15], "copyright": 10, "data_dict": 10, "nasa_df": 10, "carina": 10, "nebula": 10, "ncarlo": 10, "taylor": 10, "2305": 10, "carnorth": 10, "flat": [10, 11, 12, 15], "rock": 10, "mar": 10, "nnasa": 10, "njpl": 10, "caltech": 10, "nmsss": 10, "nprocess": 10, "ne": 10, "flatmar": 10, "03": [10, 15, 16], "centauru": 10, "peculiar": 10, "island": 10, "nmarco": 10, "lorenzi": 10, "nangu": 10, "lau": 10, "tommi": 10, "tse": 10, "ntex": 10, "ngc5128_": 10, "galaxi": 10, "famou": 10, "hole": 10, "pia23122": 10, "shackleton": 10, "shadowcam": 10, "shacklet": 10, "69": 10, "doom": 10, "eta": 10, "nesa": 10, "nhubbl": 10, "nlice": 10, "etacarin": 10, "dust": 10, "ngc": 10, "6559": 10, "nadam": 10, "ntelescop": 10, "ngc6559_": 10, "sunspot": 10, "spot": 10, "72": 10, "ring": 10, "spiral": 10, "1398": 10, "ngc1398_": 10, "73": [10, 15], "readili": 10, "heart": 10, "awesom": 10, "udac": 10, "linux": [10, 13], "rthepsfoundation23": 10, "kenneth": 10, "readthedoc": 10, "latest": [10, 13, 14, 16], "ric07": 10, "leonard": 10, "beauti": 10, "soup": 10, "april": [10, 15], "nasaesacsa": 10, "23": [10, 12, 14, 15, 16], "esa": 10, "csa": 10, "pontoppidan": 10, "pagan": 10, "esawebb": 10, "weic2316a": 10, "realtsproject21": 10, "internetlivestat": 10, "faster": [11, 15], "rmspe": [11, 12], "rmse": [11, 12], "vs": [11, 15], "person": [11, 12, 15], "week": 11, "annual": 11, "boston": 11, "marathon": 11, "sale": [11, 12], "spline": 11, "heurist": 11, "932": 11, "estat": [11, 12], "sacramento": [11, 12], "bee": 11, "newspap": 11, "realtor": 11, "zip": [11, 13, 14], "sqft": [11, 12], "latitud": 11, "longitud": 11, "z95838": 11, "836": [11, 16], "59222": 11, "631913": 11, "434879": 11, "z95823": 11, "1167": 11, "68212": 11, "478902": 11, "431028": 11, "z95815": 11, "796": 11, "68880": 11, "618305": 11, "443839": 11, "852": 11, "69307": 11, "616835": 11, "439146": 11, "z95824": 11, "797": 11, "81900": 11, "519470": 11, "435768": 11, "927": 11, "z95829": 11, "2280": 11, "232425": 11, "457679": 11, "359620": 11, "928": [11, 16], "1477": 11, "234000": 11, "499893": 11, "458890": 11, "929": 11, "citrus_height": 11, "z95610": 11, "1216": 11, "235000": 11, "708824": 11, "256803": 11, "930": [11, 15], "elk_grov": 11, "z95758": 11, "1685": 11, "235301": 11, "417000": 11, "397424": 11, "931": 11, "el_dorado_hil": 11, "z95762": 11, "1362": 11, "235738": 11, "655245": 11, "075915": 11, "livabl": 11, "feet": [11, 12], "usd": [11, 12], "unit": [11, 12, 15, 16], "front": [11, 15], "0f": [11, 12], "sold": [11, 12], "former": 11, "dive": 11, "subsampl": 11, "small_sacramento": 11, "pai": 11, "absent": 11, "small_plot": 11, "overlai": 11, "line_df": 11, "2000": 11, "dist": 11, "nearest_neighbor": 11, "298": 11, "1900": 11, "361745": 11, "487409": 11, "461413": 11, "718": 11, "antelop": 11, "z95843": 11, "2160": 11, "290000": 11, "704554": 11, "354753": 11, "rosevil": 11, "z95678": 11, "1744": 11, "326951": 11, "771917": 11, "304439": 11, "256": 11, "z95835": 11, "1718": 11, "250000": 11, "676658": 11, "528128": 11, "282": 11, "rancho_cordova": 11, "z95670": 11, "1671": 11, "175000": 11, "591477": 11, "315340": 11, "329": 11, "280739": 11, "280": [11, 15, 16], "739": 11, "unansw": 11, "abil": [11, 14, 15, 16], "lock": [11, 12], "sacramento_train": [11, 12], "sacramento_test": [11, 12], "limits_": 11, "y_i": 11, "hat": 11, "_i": 11, "th": 11, "forecast": 11, "overshoot": 11, "undershoot": 11, "equat": [11, 12], "kneighborsregressor": [11, 12], "neg_root_mean_squared_error": 11, "kneighborsregressor__n_neighbor": 11, "sacr_pipelin": 11, "sacr_preprocessor": 11, "201": 11, "sacr_gridsearch": 11, "sacr_result": 11, "param_kneighborsregressor__n_neighbor": 11, "111694": 11, "373571": 11, "2670": 11, "504864": 11, "93921": 11, "308372": 11, "2535": 11, "377923": 11, "87509": 11, "079427": 11, "206833": 11, "87326": 11, "061020": 11, "3231": 11, "353419": 11, "87092": 11, "836379": 11, "3699": 11, "135233": 11, "94560": 11, "902424": 11, "4272": 11, "135780": 11, "94820": 11, "737639": 11, "4260": 11, "831380": 11, "95015": 11, "919184": 11, "4258": 11, "119157": 11, "95332": 11, "052943": 11, "4248": 11, "208563": 11, "199": 11, "95592": 11, "418971": 11, "4229": 11, "057508": 11, "moment": [11, 16], "nonneg": 11, "neg_": 11, "convolut": 11, "alright": [11, 15], "101": [11, 16], "minimum": [11, 12, 16], "699": 11, "perfectli": [11, 14, 15], "datapoint": 11, "inflex": 11, "idiosyncrat": 11, "unseen": [11, 12], "mean_squared_error": [11, 12], "y_true": [11, 12], "y_pred": [11, 12], "83825": 11, "17556316577": 11, "825": 11, "232": 11, "neglig": 11, "buyer": 11, "afford": 11, "maximum": [11, 12, 16], "5000": 11, "superimpos": [11, 12], "qualit": [11, 12], "opportun": 11, "sqft_prediction_grid": [11, 12], "arang": 11, "base_plot": 11, "sacr_preds_plot": [11, 12], "best_k_sacr": 11, "ff7f0e": [11, 12], "concern": [11, 12], "incorpor": [11, 16], "plot_b": 11, "moreov": 11, "85886": 11, "018186": 11, "4390": 11, "111887": 11, "886": 11, "rmspe_mult": 11, "81514": 11, "60777252799": 11, "515": 11, "overlaid": [11, 12], "2d": 11, "newli": [11, 14], "character": 12, "conclud": 12, "train": 12, "slower": 12, "confusingli": 12, "undervalu": 12, "beta_0": 12, "beta_1": 12, "cdot": 12, "intercept": [12, 15], "coeffici": 12, "parametr": 12, "push": 12, "happili": 12, "crazi": 12, "shouldn": 12, "600": [12, 15], "276": 12, "027": 12, "plausibl": 12, "linearregress": 12, "linear_model": 12, "coef_": 12, "intercept_": 12, "lm": 12, "786883": 12, "16937": 12, "621919": 12, "hurt": 12, "afterward": [12, 16], "16938": 12, "938": 12, "85898": 12, "4768972745": 12, "898": 12, "tricki": [12, 13], "all_point": 12, "wiggli": 12, "curv": [12, 15], "oscil": [12, 15], "Such": 12, "fare": 12, "extrapol": 12, "obvious": 12, "mlm": 12, "linearregressionlinearregress": 12, "lm_mult_test_rmsp": 12, "83509": 12, "92922019486": 12, "510": 12, "hallmark": 12, "93632036": 12, "16046": 12, "79938106": 12, "47210": 12, "42409095276": 12, "beta_2": 12, "hyperplan": 12, "047": 12, "tune": [12, 15], "collinear": 12, "judg": 12, "unbeknownst": 12, "analyst": 12, "parent": 12, "absurdli": 12, "nevertheless": [12, 15], "subtl": [12, 16], "inaccur": 12, "ever": [12, 14, 16], "384": 12, "ft": 12, "627": 12, "274": 12, "556": 12, "231": 12, "94": 12, "ic": 12, "cream": 12, "flavor": [12, 15], "remark": 12, "homeown": 12, "df": [12, 16], "fulli": [12, 15], "5994": 12, "288853": 12, "1688": 12, "092090": 12, "9859": 12, "021194": 12, "9160": 12, "812375": 12, "6400": 12, "212624": 12, "7341": 12, "333609": 12, "8434": 12, "656970": 12, "3329": 12, "106273": 12, "7170": 12, "311442": 12, "7895": 12, "567003": 12, "cubic": 12, "z": 12, "strong": [12, 15], "magnitud": [12, 15], "leap": 12, "stone": 12, "enjoi": 12, "ventura": 13, "22": [13, 14, 15], "cpu": 13, "english": [13, 15, 16], "virtual": 13, "git": [13, 14], "rightmost": 13, "compress": [13, 15], "unzip": 13, "autograd": 13, "pre": 13, "isol": 13, "interf": 13, "ex": 13, "wizard": 13, "wsl": 13, "hyper": 13, "prompt": [13, 14], "cmd": 13, "admin": 13, "administr": 13, "log": [13, 14, 15], "bio": 13, "hotkei": 13, "esc": 13, "reboot": 13, "familiar": 13, "ubcdsci": 13, "proceed": [13, 16], "dockerfil": 13, "besid": [13, 14], "expand": [13, 14, 16], "textbox": 13, "8888": 13, "volum": 13, "path": [13, 15, 16], "jovyan": 13, "scroll": [13, 14], "127": 13, "troubleshoot": 13, "tip": 13, "dmg": 13, "intel": 13, "processor": 13, "older": 13, "appl": 13, "newer": 13, "drag": [13, 14], "sudo": 13, "certif": 13, "curl": 13, "gnupg": 13, "fssl": 13, "sh": 13, "chmod": 13, "rm": 13, "pwd": 13, "homepag": 13, "bundl": 13, "kernel": 13, "pip": 13, "upgrad": 13, "env": 13, "intro": 13, "yml": 13, "compat": 13, "xcode": 13, "x64": 13, "arm64": 13, "debian": 13, "deb": 13, "dpkg": 13, "jlab": 13, "me": 14, "ago": 14, "holder": 14, "lifespan": 14, "resolv": 14, "revis": 14, "mess": [14, 15], "repercuss": 14, "boggl": 14, "unclear": 14, "document_final_draft_fin": 14, "to_hand_in_final_v2": 14, "polish": 14, "lack": 14, "springboard": 14, "fruit": 14, "revert": 14, "Being": 14, "facilit": 14, "todai": [14, 15], "safeti": 14, "workspac": 14, "schemat": 14, "maintain": 14, "told": 14, "metadata": 14, "brief": 14, "narr": 14, "readm": 14, "md": 14, "draft": 14, "shorten": 14, "daa29d6": 14, "884c7ce": 14, "prerequisit": 14, "stage": 14, "physic": [14, 15], "placehold": 14, "synchron": 14, "eas": 14, "templat": 14, "canadian_languag": 14, "hyphen": 14, "privaci": 14, "happi": 14, "green": [14, 16], "respositori": 14, "reserv": 14, "upload": [14, 15], "toggl": 14, "markdown": 14, "archiv": 14, "defeat": 14, "prove": 14, "beginn": 14, "grain": 14, "expiri": 14, "creation": 14, "absolut": [14, 15], "tick": [14, 15], "repo": 14, "fret": 14, "eda": 14, "flag": 14, "pane": 14, "plu": 14, "untrack": 14, "checkpoint": 14, "state": [14, 15], "datetim": [14, 15], "stamp": 14, "ok": 14, "credenti": 14, "author": 14, "33": [14, 15, 16], "dismiss": 14, "invit": 14, "collaborators_github_user_nam": 14, "refresh": 14, "blend": [14, 15], "offend": 14, "preced": 14, "histor": 14, "float": [14, 16], "app": 14, "convers": [14, 15, 16], "subtop": 14, "persist": 14, "thread": 14, "searchabl": 14, "notif": 14, "repli": 14, "submit": [14, 15], "submiss": 14, "youtub": 14, "advic": 14, "gitlab": 14, "bitbucket": 14, "wbc": 14, "jennif": 14, "bryan": 14, "karen": 14, "cranston": 14, "justin": 14, "kitz": 14, "lex": 14, "nederbragt": 14, "traci": 14, "teal": 14, "subplot": 15, "raster": 15, "svg": 15, "distract": 15, "poster": 15, "wilk": 15, "oft": 15, "pie": 15, "static": 15, "math": 15, "cognit": 15, "mental": 15, "plainli": 15, "legend": 15, "scheme": 15, "surprisingli": 15, "sex": 15, "ancestri": 15, "deeb": 15, "2005": 15, "blind": 15, "reinforc": 15, "sparingli": 15, "detract": 15, "wari": 15, "overplot": 15, "overlap": 15, "zoom": 15, "vegafus": 15, "data_transform": 15, "curat": 15, "pieter": 15, "tan": 15, "noaa": 15, "gml": 15, "ralph": 15, "keel": 15, "scripp": 15, "oceanographi": 15, "dioxid": 15, "hawaii": 15, "1959": 15, "1980": 15, "co2_df": 15, "mauna_loa_data": 15, "parse_d": 15, "date_measur": 15, "ppm": 15, "338": 15, "341": 15, "06": [15, 16], "479": 15, "414": 15, "480": 15, "416": 15, "482": [15, 16], "483": 15, "484": 15, "datetime64": 15, "ns": 15, "iso": 15, "8601": 15, "alphanumer": 15, "mark_": 15, "leverag": 15, "helper": 15, "co2_scatt": 15, "upward": 15, "affirm": 15, "predecessor": 15, "successor": 15, "alter": 15, "segment": 15, "emphas": 15, "co2_lin": 15, "aha": 15, "phenomenon": 15, "fast": 15, "muddl": 15, "settl": 15, "configure_axi": 15, "titlefonts": 15, "co2_line_label": 15, "co2": 15, "configure_": 15, "1990": 15, "clip": 15, "stack": [15, 16], "co2_line_scal": 15, "late": 15, "season": 15, "summer": 15, "octob": 15, "winter": 15, "novemb": 15, "analog": 15, "paint": 15, "blank": 15, "canva": 15, "primer": 15, "akin": 15, "sketch": 15, "durat": 15, "geyser": 15, "yellowston": 15, "nation": 15, "wyom": 15, "79": 15, "333": 15, "283": 15, "533": 15, "267": 15, "117": [15, 16], "268": [15, 16], "270": 15, "817": 15, "271": 15, "467": 15, "faithful_scatt": 15, "faithful_scatter_label": 15, "faithful_scatter_labels_black": 15, "whom": 15, "hollow": 15, "can_lang_plot": 15, "can_lang_plot_label": 15, "bunch": 15, "clump": 15, "french": [15, 16], "460": 15, "850": 15, "19460850": 15, "22162865": 15, "15265335": 15, "29748265": 15, "59": [15, 16], "7166700": 15, "6943800": 15, "3825215": 15, "10242945": 15, "logarithm": 15, "squish": 15, "log_": 15, "log10": 15, "inf": 15, "can_lang_plot_log": 15, "gridlin": 15, "seven": 15, "can_lang_plot_log_revis": 15, "tickcount": 15, "kilo": 15, "mutat": 15, "most_at_home_perc": 15, "001678": 15, "000669": 15, "029188": 15, "013612": 15, "003272": 15, "001266": 15, "038291": 15, "017026": 15, "076511": 15, "037367": 15, "011351": 15, "003940": 15, "005234": 15, "002276": 15, "036741": 15, "021763": 15, "038561": 15, "020155": 15, "025831": 15, "007439": 15, "can_lang_plot_perc": 15, "meaningfulli": 15, "onto": 15, "belong": [15, 16], "can_lang_plot_categori": 15, "laid": 15, "can_lang_plot_legend": 15, "orient": 15, "tableau10": 15, "vision": 15, "unsur": 15, "dark2": 15, "aesthet": 15, "switch": 15, "can_lang_plot_them": 15, "demand": 15, "tooltip": 15, "hover": 15, "mous": 15, "pointer": 15, "can_lang_plot_tooltip": 15, "mile": 15, "mcneil": 15, "contin": 15, "south": 15, "africa": 15, "europ": 15, "asia": 15, "australia": 15, "islands_df": 15, "landmass_typ": 15, "11506": 15, "5500": 15, "16988": 15, "2968": 15, "axel": 15, "heiberg": 15, "baffin": 15, "184": 15, "bank": 15, "borneo": 15, "britain": 15, "celeb": 15, "celon": 15, "cuba": 15, "devon": 15, "ellesmer": 15, "3745": 15, "greenland": 15, "840": 15, "hainan": 15, "hispaniola": 15, "hokkaido": 15, "honshu": 15, "iceland": 15, "ireland": 15, "java": 15, "kyushu": 15, "luzon": 15, "madagascar": 15, "227": 15, "melvil": 15, "mindanao": 15, "molucca": 15, "guinea": 15, "306": 15, "zealand": 15, "newfoundland": 15, "9390": 15, "novaya": 15, "zemlya": 15, "princ": 15, "wale": 15, "sakhalin": 15, "6795": 15, "southampton": 15, "spitsbergen": 15, "sumatra": 15, "183": 15, "taiwan": 15, "tasmania": 15, "tierra": 15, "fuego": 15, "timor": 15, "islands_bar": 15, "nlargest": 15, "tilt": 15, "sort_valu": 15, "islands_top12": 15, "islands_bar_top": 15, "appeal": 15, "minu": 15, "revers": 15, "caption": 15, "slide": 15, "summari": 15, "twelv": 15, "islands_plot_sort": 15, "morlei": 15, "1882": 15, "299": 15, "792": 15, "458": 15, "km": 15, "sec": 15, "kilometr": 15, "morley_df": 15, "expt": 15, "740": 15, "900": 15, "1070": [15, 16], "940": 15, "950": 15, "810": 15, "870": 15, "experiment": 15, "fell": 15, "morley_bar": 15, "thin": 15, "bucket": 15, "morley_hist": 15, "thick": 15, "v_line": 15, "morley_hist_lin": 15, "morley_hist_color": 15, "sit": 15, "transluc": 15, "morley_hist_categor": 15, "deriv": 15, "incorrect": 15, "clearest": 15, "morley_hist_facet": 15, "1050": 15, "foremost": 15, "subtli": 15, "speed_of_light": 15, "299792": 15, "relativeerror": 15, "299000": 15, "019194": 15, "017498": 15, "035872": 15, "092578": 15, "045879": 15, "049215": 15, "052550": 15, "002516": 15, "005851": 15, "025865": 15, "morley_hist_rel": 15, "recreat": 15, "admir": 15, "morley_hist_maxbin": 15, "motiv": 15, "establish": 15, "pose": 15, "wiggl": 15, "discern": 15, "parenthes": [15, 16], "energi": 15, "automot": 15, "plant": 15, "burn": [15, 16], "fossil": 15, "fuel": 15, "greenhous": 15, "gase": 15, "byproduct": 15, "trap": 15, "heat": 15, "warm": 15, "observatori": 15, "amplitud": 15, "growth": 15, "1800": 15, "kilomet": 15, "farthest": 15, "confer": 15, "shop": 15, "billboard": 15, "pixel": 15, "lossi": 15, "lossless": 15, "jpeg": 15, "jpg": 15, "photograph": 15, "bmp": 15, "tiff": 15, "tif": 15, "gimp": 15, "redraw": 15, "ep": 15, "inkscap": 15, "shrink": 15, "portabl": 15, "hardl": 15, "1991": 15, "filenam": 15, "img": 15, "viz": 15, "faithful_plot": 15, "mb": 15, "decent": 15, "bigger": 15, "dee05": 15, "sameer": 15, "clinic": 15, "369": 15, "377": 15, "har91": 15, "wolfgang": 15, "york": 15, "mcn77": 15, "donald": 15, "mic82": 15, "veloc": 15, "nite": 15, "tate": 15, "aval": 15, "cademi": 15, "nnapoli": 15, "astronom": 15, "tk20": 15, "ccgg": 15, "vgh": 15, "jacob": 15, "granger": 15, "heer": 15, "dominik": 15, "moritz": 15, "kanit": 15, "wongsuphasawat": 15, "arvind": 15, "satyanarayan": 15, "eitan": 15, "ilia": 15, "timofeev": 15, "ben": 15, "welsh": 15, "scott": 15, "sievert": 15, "journal": [15, 16], "1057": 15, "21105": 15, "joss": 15, "01057": 15, "wil19": 15, "clau": 15, "clauswilk": 15, "dataviz": 15, "util": 16, "entiti": 16, "tabular": 16, "2235145": 16, "yellow": 16, "abbrevi": 16, "int": 16, "14159": 16, "boolean": 16, "bool": 16, "hello": 16, "nonetyp": 16, "arithmet": 16, "dict": 16, "cities_seri": 16, "separt": 16, "population_in_2016": 16, "1027613": 16, "1823281": 16, "544870": 16, "571146": 16, "321484": 16, "upcom": 16, "population_in_2016_df": 16, "criteria": 16, "wickham": 16, "No": 16, "bespok": 16, "untidi": 16, "2006": 16, "2011": 16, "land": 16, "region_lang_top5_cities_wid": 16, "cite": 16, "montr\u00e9al": 16, "lang_wid": 16, "985": 16, "1435": 16, "960": 16, "575": 16, "360": 16, "240": 16, "8485": 16, "1015": 16, "705": 16, "885": 16, "13260": 16, "2450": 16, "1090": 16, "1365": 16, "770": 16, "2440": 16, "5290": 16, "1025": 16, "380": 16, "3355": 16, "8960": 16, "3380": 16, "1430": 16, "tough": 16, "lang_mother_tidi": 16, "id_var": 16, "var_nam": 16, "value_nam": 16, "1065": 16, "1066": 16, "1067": 16, "1068": 16, "1069": 16, "met": 16, "commut": 16, "widen": 16, "region_lang_top5_cities_long": 16, "lang_long": 16, "2135": 16, "2136": 16, "2137": 16, "2138": 16, "2139": 16, "2140": 16, "lang_home_tidi": 16, "2495": 16, "1622735": 16, "1330555": 16, "8630": 16, "3245": 16, "behaviour": 16, "colum": 16, "messier": 16, "dealt": 16, "lang_messi": 16, "region_lang_top5_cities_messi": 16, "265": 16, "520": 16, "505": 16, "4045": 16, "440": 16, "330": 16, "6380": 16, "1445": 16, "530": 16, "620": 16, "3130": 16, "760": 16, "6665": 16, "860": 16, "1080": 16, "lang_messy_long": 16, "tidy_lang": 16, "astyp": 16, "depth": 16, "occas": 16, "official_lang": 16, "3836770": 16, "3218725": 16, "29800": 16, "11940": 16, "620510": 16, "412120": 16, "2669195": 16, "1607550": 16, "487": 16, "696": 16, "1065070": 16, "844740": 16, "701": 16, "910": 16, "1050410": 16, "792700": 16, "915": 16, "10950": 16, "2520": 16, "1060": 16, "ampersand": 16, "pipe": 16, "region_data": 16, "household": 16, "dwell": 16, "bellevil": 16, "43002": 16, "1354": 16, "65121": 16, "103472": 16, "45050": 16, "lethbridg": 16, "45696": 16, "3046": 16, "69699": 16, "117394": 16, "48317": 16, "thunder": 16, "bai": 16, "52545": 16, "2618": 16, "26318": 16, "121621": 16, "57146": 16, "peterborough": 16, "50533": 16, "1636": 16, "98336": 16, "121721": 16, "55662": 16, "saint": 16, "john": 16, "52872": 16, "3793": 16, "42158": 16, "126202": 16, "58398": 16, "535499": 16, "7168": 16, "96442": 16, "1323783": 16, "519693": 16, "5241": 16, "70103": 16, "1392609": 16, "960894": 16, "3040": 16, "41532": 16, "2463431": 16, "1727310": 16, "4638": 16, "24059": 16, "4098927": 16, "2135909": 16, "6269": 16, "93132": 16, "5928040": 16, "interst": 16, "city_nam": 16, "five_c": 16, "502143": 16, "9857": 16, "77908": 16, "1321426": 16, "537634": 16, "seriesa": 16, "seriesb": 16, "669": 16, "capabl": 16, "omit": 16, "startswith": 16, "darker": 16, "region_lang": 16, "moncton": 16, "saguenai": 16, "7485": 16, "7486": 16, "7487": 16, "abbotsford": 16, "mission": 16, "7488": 16, "7489": 16, "7490": 16, "23171710": 16, "std": 16, "490000e": 16, "093686e": 16, "401258e": 16, "000000e": 16, "836770e": 16, "25th": 16, "50th": 16, "75th": 16, "skipna": 16, "3061820": 16, "5600480": 16, "numeric_onli": 16, "3200": 16, "341121": 16, "3093": 16, "686248": 16, "1853": 16, "757677": 16, "5127": 16, "499332": 16, "55231": 16, "640268": 16, "64012": 16, "578320": 16, "48574": 16, "532066": 16, "94001": 16, "162338": 16, "cartoon": 16, "dataframegroupbi": 16, "0x7f1025eb0e10": 16, "137445": 16, "182390": 16, "97840": 16, "brantford": 16, "124560": 16, "troi": 16, "rivi\u00e8r": 16, "149835": 16, "331375": 16, "270715": 16, "612595": 16, "23015": 16, "875": 16, "8235": 16, "2695": 16, "102": 16, "365": 16, "23565": 16, "104": 16, "11185": 16, "122100": 16, "93495": 16, "167835": 16, "168990": 16, "115125": 16, "193445": 16, "93655": 16, "54150": 16, "100855": 16, "116645": 16, "73910": 16, "130835": 16, "937055": 16, "1343335": 16, "147805": 16, "78610": 16, "149805": 16, "1316635": 16, "2289515": 16, "302690": 16, "211705": 16, "354470": 16, "235990": 16, "166220": 16, "318540": 16, "530570": 16, "437460": 16, "749285": 16, "keyerror": 16, "qu\u00e9bec": 16, "028571": 16, "region_lang_num": 16, "wise": 16, "040": 16, "aforement": 16, "english_lang": 16, "1898": 16, "444955": 16, "2500590": 16, "1903": 16, "1918": 16, "1919": 16, "930405": 16, "1275265": 16, "1923": 16, "city_pop": 16, "unchang": 16, "tmp": 16, "ipykernel_12": 16, "2654974267": 16, "settingwithcopywarn": 16, "row_index": 16, "col_index": 16, "pydata": 16, "doc": 16, "stabl": 16, "user_guid": 16, "warn": 16, "went": 16, "silenc": 16, "div": 16, "divis": 16, "108554": 16, "151384": 16, "100543": 16, "610060": 16, "516498": 16, "647224": 16, "542966": 16, "944744": 16, "672877": 16, "764802": 16, "606588": 16, "964617": 16, "704092": 16, "794906": 16, "599882": 16, "965067": 16, "534472": 16, "658730": 16, "540123": 16, "929401": 16, "city_popul": 16, "wic14": 16, "hadlei": 16}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"acknowledg": 0, "python": [0, 4, 5, 6, 7, 8, 10, 12, 16], "edit": [0, 5, 8, 14], "about": 1, "author": 1, "classif": [2, 3], "i": [2, 11, 14], "train": [2, 3, 11], "predict": [2, 3], "overview": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "chapter": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "learn": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "object": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "The": [2, 3, 4, 8, 11, 12], "problem": [2, 11], "explor": [2, 7, 8, 11], "data": [2, 3, 5, 7, 8, 10, 11, 15, 16], "set": [2, 3, 7, 11, 13, 15], "load": [2, 7], "cancer": 2, "describ": 2, "variabl": [2, 3], "k": [2, 4, 11], "nearest": [2, 11], "neighbor": [2, 11], "distanc": 2, "between": 2, "point": 2, "evalu": [2, 3, 11], "from": [2, 10, 14, 16], "new": [2, 8, 12], "observ": 2, "each": 2, "its": 2, "5": 2, "more": 2, "than": 2, "two": 2, "explanatori": 2, "summari": [2, 3, 6, 8, 10, 16], "algorithm": [2, 4], "scikit": [2, 3], "preprocess": [2, 3], "center": 2, "scale": 2, "balanc": 2, "miss": [2, 10], "put": [2, 7], "togeth": [2, 7], "pipelin": 2, "exercis": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "refer": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "ii": [3, 12], "tune": [3, 11], "perform": [3, 16], "an": [3, 4, 10, 15], "exampl": [3, 4], "confus": 3, "matrix": 3, "tumor": 3, "imag": 3, "random": [3, 4], "seed": 3, "creat": [3, 7, 8, 14, 15], "test": [3, 11], "split": [3, 16], "classifi": 3, "label": 3, "critic": 3, "analyz": 3, "cross": 3, "valid": 3, "paramet": 3, "valu": [3, 7, 10, 16], "select": [3, 7, 16], "under": 3, "overfit": [3, 11], "predictor": [3, 12], "effect": [3, 15], "irrelev": 3, "find": 3, "good": 3, "subset": [3, 7], "forward": 3, "addit": [3, 4, 6, 8, 10, 12, 14, 15, 16], "resourc": [3, 4, 6, 8, 10, 12, 14, 15, 16], "cluster": 4, "illustr": 4, "mean": [4, 6], "measur": 4, "qualiti": 4, "restart": 4, "choos": [4, 15], "scienc": 5, "A": 5, "first": 5, "introduct": 5, "welcom": 5, "statist": [6, 16], "infer": 6, "why": [6, 10, 14], "do": [6, 16], "we": [6, 10], "need": 6, "sampl": 6, "distribut": 6, "proport": 6, "bootstrap": 6, "us": [6, 7, 10, 14, 16], "calcul": [6, 16], "plausibl": 6, "rang": 6, "panda": 7, "canadian": [7, 15], "languag": [7, 15], "ask": 7, "question": 7, "type": [7, 16], "analysi": 7, "tabular": [7, 10], "name": [7, 10, 16], "thing": 7, "frame": [7, 16], "loc": [7, 16], "filter": [7, 16], "row": [7, 10, 16], "column": [7, 10, 16], "sort_valu": 7, "head": 7, "order": 7, "ad": [7, 15, 16], "modifi": [7, 16], "combin": [7, 8, 16], "step": 7, "chain": 7, "multilin": 7, "express": 7, "visual": [7, 15], "altair": [7, 15], "bar": [7, 15], "plot": [7, 15], "format": [7, 8, 15], "chart": [7, 15], "all": [7, 10], "access": [7, 8, 10, 14], "document": 7, "code": 8, "text": [8, 10, 15], "jupyt": [8, 14], "cell": 8, "execut": 8, "kernel": 8, "markdown": 8, "save": [8, 15], "your": [8, 13, 14], "work": [8, 13, 14], "best": 8, "practic": 8, "run": 8, "notebook": 8, "includ": 8, "packag": 8, "file": [8, 10, 14, 15], "export": 8, "differ": [8, 10, 15], "html": [8, 10], "pdf": 8, "prefac": 9, "read": 10, "local": [10, 14], "web": 10, "absolut": 10, "rel": 10, "path": 10, "plain": 10, "read_csv": 10, "comma": 10, "separ": 10, "skip": 10, "when": [10, 15], "sep": 10, "argument": 10, "header": 10, "handl": [10, 14], "directli": 10, "url": 10, "preview": 10, "befor": 10, "microsoft": 10, "excel": 10, "read_excel": 10, "databas": 10, "sqlite": 10, "postgresql": 10, "should": [10, 14], "bother": 10, "write": 10, "csv": 10, "obtain": [10, 13], "scrape": 10, "css": 10, "selector": 10, "beautifulsoup": 10, "read_html": 10, "api": 10, "nasa": 10, "regress": [11, 12], "model": 11, "underfit": 11, "multivari": [11, 12], "knn": [11, 12], "strength": 11, "limit": 11, "linear": 12, "simpl": 12, "compar": 12, "multicollinear": 12, "outlier": 12, "design": 12, "other": 12, "side": 12, "up": [13, 16], "comput": 13, "worksheet": 13, "thi": [13, 16], "book": 13, "docker": 13, "window": 13, "maco": 13, "ubuntu": 13, "jupyterlab": 13, "desktop": 13, "collabor": 14, "version": 14, "control": 14, "what": [14, 16], "repositori": 14, "workflow": 14, "commit": 14, "chang": 14, "push": 14, "remot": 14, "pull": 14, "github": 14, "pen": 14, "tool": 14, "add": 14, "menu": 14, "gener": 14, "person": 14, "token": 14, "clone": 14, "specifi": 14, "make": 14, "give": 14, "project": 14, "merg": [14, 16], "conflict": 14, "commun": 14, "issu": 14, "refin": 15, "scatter": 15, "line": 15, "mauna": 15, "loa": 15, "co_": 15, "2": 15, "old": 15, "faith": 15, "erupt": 15, "time": 15, "axi": 15, "transform": 15, "color": 15, "island": 15, "landmass": 15, "histogram": 15, "michelson": 15, "speed": 15, "light": 15, "layer": 15, "binwidth": 15, "explain": 15, "size": 15, "clean": 16, "wrangl": 16, "seri": 16, "basic": 16, "doe": 16, "have": 16, "structur": 16, "tidi": 16, "go": 16, "wide": 16, "long": 16, "melt": 16, "pivot": 16, "str": 16, "deal": 16, "multipl": 16, "delimit": 16, "extract": 16, "certain": 16, "satisfi": 16, "condit": 16, "least": 16, "one": 16, "list": 16, "isin": 16, "abov": 16, "below": 16, "threshold": 16, "queri": 16, "iloc": 16, "posit": 16, "aggreg": 16, "individu": 16, "oper": 16, "group": 16, "groupbi": 16, "appli": 16, "function": 16, "across": 16}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinxcontrib.bibtex": 9, "sphinx": 56}}) \ No newline at end of file +Search.setIndex({"docnames": ["acknowledgements", "authors", "classification1", "classification2", "clustering", "index", "inference", "intro", "jupyter", "preface-text", "reading", "regression1", "regression2", "setup", "version-control", "viz", "wrangling"], "filenames": ["acknowledgements.md", "authors.md", "classification1.md", "classification2.md", "clustering.md", "index.md", "inference.md", "intro.md", "jupyter.md", "preface-text.md", "reading.md", "regression1.md", "regression2.md", "setup.md", "version-control.md", "viz.md", "wrangling.md"], "titles": ["Acknowledgments", "About the authors", "5. Classification I: training & predicting", "6. Classification II: evaluation & tuning", "9. Clustering", "Data Science", "10. Statistical inference", "1. Python and Pandas", "11. Combining code and text with Jupyter", "Preface", "2. Reading in data locally and from the web", "7. Regression I: K-nearest neighbors", "8. Regression II: linear regression", "13. Setting up your computer", "12. Collaboration with version control", "4. Effective data visualization", "3. Cleaning and wrangling data"], "terms": {"we": [0, 2, 3, 4, 7, 8, 9, 11, 12, 13, 14, 15, 16], "d": [0, 1, 6, 7, 10, 15], "like": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "thank": 0, "everyon": 0, "ha": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "contribut": [0, 1, 14], "develop": [0, 1, 3, 6, 7, 8, 9, 10, 14], "data": [0, 1, 4, 6, 9, 12, 13, 14], "scienc": [0, 1, 2, 3, 7, 8, 9, 13, 14, 16], "A": [0, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "first": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "introduct": [0, 3, 4, 6, 7, 9, 10, 12], "thi": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15], "an": [0, 1, 2, 6, 7, 8, 9, 11, 12, 13, 14, 16], "open": [0, 1, 5, 7, 8, 10, 13, 14, 15], "sourc": [0, 1, 10, 15], "textbook": [0, 1, 2, 3, 5, 9, 10, 12, 14, 16], "began": [0, 10], "collect": [0, 2, 3, 4, 6, 7, 10, 15, 16], "cours": [0, 1, 3, 4, 6, 7, 8, 9, 10, 12, 16], "read": [0, 2, 3, 5, 6, 7, 8, 9, 11, 12, 14, 15, 16], "dsci": [0, 10, 13], "100": [0, 2, 3, 6, 7, 10, 11, 12, 13, 15, 16], "new": [0, 3, 4, 6, 7, 10, 11, 13, 14, 15, 16], "introductori": [0, 3, 6], "univers": [0, 1, 6, 10], "british": [0, 1, 6, 7, 10], "columbia": [0, 1, 6, 10], "ubc": [0, 1, 10, 13], "sever": [0, 1, 2, 6, 10, 14, 15, 16], "faculti": 0, "member": [0, 2, 14], "depart": [0, 1], "statist": [0, 1, 2, 3, 4, 7, 10, 11, 12, 15], "were": [0, 2, 3, 6, 7, 8, 10, 12, 14, 15, 16], "pivot": 0, "shape": [0, 2, 3, 4, 6, 7, 10, 12, 15, 16], "direct": [0, 2, 10, 15], "greatli": [0, 16], "broad": [0, 15], "structur": [0, 3, 4, 7, 10, 15], "list": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15], "topic": [0, 3, 4, 8, 12, 14], "book": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "would": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "especi": [0, 2, 7, 10, 13, 14, 15], "mat\u00eda": 0, "salib\u00edan": 0, "barrera": 0, "hi": [0, 1], "mentorship": 0, "dure": [0, 1, 3, 7, 11, 14, 16], "initi": [0, 1, 2, 4, 7, 10, 11, 12, 14, 15], "roll": 0, "out": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "both": [0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "door": 0, "wa": [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "alwai": [0, 2, 3, 4, 8, 10, 11, 12, 13, 15, 16], "when": [0, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 16], "need": [0, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "chat": 0, "about": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "how": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "best": [0, 3, 6, 7, 10, 11, 12, 14, 15], "introduc": [0, 6, 7, 12, 14, 15, 16], "teach": [0, 1, 2, 7], "our": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "year": [0, 2, 7, 10, 15, 16], "student": [0, 1, 6], "also": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "gabriela": 0, "cohen": 0, "freue": 0, "her": [0, 1], "561": 0, "regress": [0, 2, 3, 4, 7, 9], "i": [0, 3, 4, 6, 7, 8, 9, 10, 12, 13, 15, 16], "materi": [0, 2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "from": [0, 3, 4, 6, 7, 8, 9, 11, 12, 13, 15], "master": [0, 1], "program": [0, 1, 3, 7, 8, 9, 10, 13, 15], "some": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "linear": [0, 3, 8, 11, 15], "figur": [0, 2, 7, 16], "inspir": [0, 10], "all": [0, 2, 3, 4, 6, 8, 9, 11, 12, 13, 14, 15, 16], "those": [0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "who": [0, 2, 3, 6, 7, 8, 10, 14, 15, 16], "process": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "publish": [0, 10, 15], "In": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "particular": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "review": [0, 10, 14], "feedback": [0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "suggest": [0, 2, 3, 6, 7, 11, 12, 15, 16], "rohan": 0, "alexand": 0, "isabella": 0, "ghement": 0, "virgilio": 0, "g\u00f3mez": 0, "rubio": 0, "albert": [0, 15], "kim": 0, "adam": 0, "loi": 0, "maria": 0, "prokofieva": 0, "emili": 0, "rieder": 0, "greg": [0, 14], "wilson": [0, 7, 14], "The": [0, 1, 6, 7, 10, 13, 14, 15, 16], "improv": [0, 2, 3, 4, 6, 7, 11, 12, 14, 15], "substanti": [0, 3, 11], "insight": [0, 4, 9, 15], "give": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16], "special": [0, 2, 6, 7, 10, 14, 15, 16], "jim": 0, "zidek": 0, "support": [0, 2, 3, 7, 13, 15, 16], "encourag": [0, 16], "throughout": [0, 2, 3, 7, 9, 14, 16], "roger": [0, 7], "peng": [0, 7], "gracious": 0, "offer": [0, 3, 6, 10, 11, 12, 14], "write": [0, 2, 7, 8, 12, 14, 16], "foreword": 0, "final": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "ow": 0, "debt": 0, "gratitud": 0, "over": [0, 1, 2, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15, 16], "past": [0, 2, 3, 4, 10, 11, 12, 13, 14, 15], "few": [0, 2, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15, 16], "thei": [0, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "provid": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "invalu": 0, "worksheet": [0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "found": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "bug": [0, 14, 16], "us": [0, 1, 2, 3, 4, 8, 9, 11, 12, 13, 15], "stood": 0, "veri": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "patient": [0, 2, 3], "class": [0, 2, 3, 7, 10, 15, 16], "while": [0, 2, 3, 4, 7, 9, 10, 12, 15, 16], "frantic": 0, "fix": [0, 2, 3, 6, 8, 11, 14, 15, 16], "brought": 0, "level": [0, 3, 4, 6, 7, 9, 12, 15], "enthusiasm": 0, "sustain": 0, "hard": [0, 7, 10, 15, 16], "work": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 15, 16], "creat": [0, 1, 2, 4, 6, 9, 10, 11, 12, 16], "interact": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "them": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "taught": [0, 2], "learn": [0, 1, 9], "reflect": [0, 1, 15], "content": [0, 1, 2, 5, 10, 14, 16], "translat": [0, 10], "origin": [0, 1, 2, 3, 4, 6, 7, 10, 11, 12, 15, 16], "which": [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "focus": [0, 1, 2, 3, 11], "r": [0, 1, 3, 4, 5, 6, 7, 10, 15], "languag": [0, 1, 2, 3, 6, 8, 9, 10, 11, 13, 16], "ar": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "navya": 0, "dahiya": 0, "gloria": 0, "ye": [0, 2], "complet": [0, 1, 3, 6, 7, 8, 10, 11, 13, 14], "round": [0, 3, 6], "philip": 0, "austin": 0, "leadership": 0, "guidanc": [0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "gratefulli": 0, "educ": [0, 1, 2], "resourc": [0, 1, 2, 11], "fund": 0, "exercis": [0, 9, 13], "version": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16], "tiffani": [1, 5, 7, 15], "timber": [1, 5, 7, 15], "trevor": [1, 3, 4, 5, 12], "campbel": [1, 5], "melissa": [1, 5], "lee": [1, 5, 15], "adapt": [1, 15], "python": [1, 2, 3, 9, 11, 13, 14, 15], "joel": [1, 5], "ostblom": [1, 5], "lindsei": [1, 5], "heagi": [1, 5], "associ": [1, 6, 7, 9, 10, 14, 16], "professor": 1, "research": [1, 4, 15], "autom": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "scalabl": 1, "bayesian": 1, "infer": 1, "algorithm": [1, 3, 11, 12, 15], "nonparametr": [1, 2, 11], "stream": 1, "theori": [1, 2, 4, 6, 11], "he": 1, "previous": [1, 2, 6, 7, 10, 11, 15, 16], "postdoctor": 1, "advis": [1, 10, 11, 15], "tamara": 1, "broderick": 1, "comput": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "artifici": 1, "intellig": 1, "laboratori": [1, 4], "csail": 1, "institut": [1, 3, 15], "system": [1, 10, 13, 14], "societi": 1, "idss": 1, "mit": 1, "ph": 1, "candid": [1, 3, 12], "under": [1, 5, 8, 13, 14, 16], "jonathan": 1, "inform": [1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "decis": [1, 2, 3, 6, 14], "lid": 1, "befor": [1, 2, 3, 4, 7, 8, 9, 11, 12, 13, 14, 15, 16], "engin": [1, 10, 12, 13, 15], "toronto": [1, 10, 16], "co": [1, 15], "director": 1, "vancouv": [1, 6, 10, 15, 16], "option": [1, 2, 8, 10, 12, 13, 14, 15, 16], "role": [1, 7, 10, 15], "she": 1, "curriculum": 1, "around": [1, 3, 6, 7, 11, 12, 15, 16], "respons": [1, 2, 3, 4, 10, 11, 12, 14], "applic": [1, 3, 6, 7, 11, 12, 13, 16], "solv": [1, 2, 3, 4, 7, 9, 12, 14, 16], "real": [1, 2, 3, 6, 7, 10, 11, 12, 14, 16], "world": [1, 6, 7, 9, 14, 15, 16], "problem": [1, 3, 4, 6, 7, 8, 9, 12, 14, 15, 16], "One": [1, 2, 3, 6, 7, 8, 11, 12, 14, 15, 16], "favorit": [1, 12], "graduat": 1, "collabor": [1, 8, 9], "softwar": [1, 2, 9, 10, 13, 14, 15, 16], "packag": [1, 2, 3, 4, 7, 10, 11, 12, 13, 15, 16], "modern": [1, 2, 10, 15], "tool": [1, 2, 3, 4, 7, 8, 9, 10, 12, 15, 16], "workflow": [1, 2, 3, 4, 7, 8, 9, 11, 12], "assist": 1, "undergradu": [1, 6], "center": [1, 4, 6, 10, 12, 15, 16], "approach": [1, 2, 3, 4, 6, 7, 9, 11, 12, 14, 16], "assess": [1, 3, 4, 11, 12, 14, 15], "promot": 1, "equiti": 1, "divers": [1, 6], "inclus": [1, 3, 12, 14], "earth": [1, 15], "ocean": 1, "atmospher": [1, 15], "geophys": 1, "invers": 1, "facil": [1, 14], "combin": [1, 2, 3, 4, 9, 10, 12, 14, 15], "method": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "numer": [1, 2, 6, 10, 11, 12, 15, 16], "simul": [1, 2, 6, 15], "machin": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "answer": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 15, 16], "question": [1, 2, 3, 4, 6, 9, 11, 12, 15, 16], "subsurfac": 1, "primari": [1, 2, 7, 14, 15, 16], "includ": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "miner": 1, "explor": [1, 3, 4, 6, 10, 12, 14, 15, 16], "carbon": [1, 15], "sequestr": 1, "groundwat": 1, "environment": [1, 4], "studi": [1, 2, 3, 4, 6, 7, 11, 15, 16], "bsc": 1, "alberta": [1, 10, 16], "phd": 1, "held": 1, "posit": [1, 3, 4, 7, 11, 15], "california": [1, 11], "berkelei": 1, "prior": [1, 2, 10, 14], "start": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "current": [1, 2, 7, 8, 10, 12, 14, 15, 16], "passion": 1, "reproduc": [1, 3, 4, 6, 8, 9, 10, 14], "through": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "quantit": [1, 3, 4, 6, 7, 11, 15], "imag": [1, 2, 8, 9, 10, 13, 15], "analysi": [1, 2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16], "pipelin": [1, 3, 4, 11], "stem": [1, 11], "cell": [1, 2, 3, 4, 7, 10, 12, 16], "development": 1, "biologi": [1, 14], "sinc": [1, 2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "lead": [1, 2, 3, 7, 8, 14, 15], "workshop": [1, 2], "now": [1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "care": [1, 2, 3, 10, 11, 12, 15, 16], "deepli": [1, 16], "spread": [1, 2, 4, 6, 7, 15, 16], "literaci": 1, "excit": [1, 7], "programmat": [1, 3, 10], "project": [1, 2, 8, 10, 15], "previou": [2, 3, 4, 6, 7, 8, 10, 11, 12, 15, 16], "sole": [2, 8], "descript": [2, 6, 7, 8, 9, 10, 14, 15, 16], "exploratori": [2, 3, 4, 7, 9, 11, 15], "next": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "serv": [2, 3, 6, 8, 12, 14], "forai": [2, 11], "focu": [2, 3, 4, 6, 7, 8, 11, 12, 14, 15, 16], "e": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "one": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15], "valu": [2, 4, 6, 8, 11, 12, 15], "categor": [2, 3, 4, 6, 7, 11, 15, 16], "interest": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "cover": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "basic": [2, 3, 7, 10, 12, 14, 15], "make": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16], "suitabl": [2, 16], "classifi": 2, "accur": [2, 3, 6, 11, 12, 15], "well": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "where": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "possibl": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "maxim": [2, 3, 11], "accuraci": [2, 3, 6, 11, 12], "By": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "end": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "reader": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "abl": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "do": [2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15], "follow": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "recogn": [2, 8, 10, 11, 14, 16], "situat": [2, 3, 4, 7, 11, 14, 15, 16], "appropri": [2, 3, 4, 7, 10, 11, 13, 15, 16], "what": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15], "interpret": [2, 3, 4, 7, 8, 10, 11, 12, 14, 15, 16], "output": [2, 3, 4, 7, 8, 10, 11, 12, 15, 16], "hand": [2, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15, 16], "straight": [2, 4, 11, 12, 15], "line": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 16], "euclidean": [2, 4], "graph": 2, "predictor": [2, 4, 11], "explain": [2, 4, 6, 7, 11, 12], "perform": [2, 4, 6, 7, 8, 9, 10, 11, 12, 15], "standardscal": [2, 3, 4, 11], "make_column_transform": [2, 3, 4, 11], "sampl": [2, 3, 11], "model": [2, 3, 4, 12, 14, 15], "make_pipelin": [2, 3, 4, 11], "mani": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "want": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "base": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 15], "experi": [2, 15], "For": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "instanc": [2, 3, 6, 7, 10, 16], "doctor": [2, 3], "mai": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "diagnos": [2, 3], "either": [2, 3, 4, 7, 8, 9, 11, 12, 14, 16], "diseas": 2, "healthi": 2, "symptom": 2, "s": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "email": [2, 10, 14], "might": [2, 3, 4, 6, 7, 8, 10, 11, 12, 15, 16], "tag": [2, 10, 13], "given": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "spam": 2, "text": [2, 3, 6, 7, 9, 11, 12, 13, 14, 16], "credit": 2, "card": 2, "compani": 2, "whether": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15], "purchas": [2, 4, 11, 12], "fraudul": 2, "item": [2, 4, 7, 8, 10, 11, 14, 15, 16], "amount": [2, 3, 4, 8, 10, 11, 12, 15], "locat": [2, 10, 14, 15], "These": [2, 3, 4, 7, 8, 10, 12, 14, 15], "task": [2, 4, 6, 9, 11, 15, 16], "exampl": [2, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "sometim": [2, 3, 6, 7, 10, 11, 12, 13, 15, 16], "call": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "label": [2, 4, 7, 11, 15, 16], "other": [2, 3, 4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16], "featur": [2, 3, 8, 11, 12, 14, 15], "gener": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16], "assign": [2, 4, 6, 7, 10, 15, 16], "without": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "known": [2, 3, 7, 10, 12, 15], "g": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "basi": [2, 10, 15], "similar": [2, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "know": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "name": [2, 3, 4, 6, 8, 11, 12, 13, 14, 15], "come": [2, 4, 6, 7, 11, 12, 13, 15, 16], "fact": [2, 3, 6, 7, 8, 10, 12, 14], "onc": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "can": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "There": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "could": [2, 3, 4, 6, 7, 8, 10, 11, 12, 15, 16], "wide": [2, 3, 4, 10, 12, 14, 15], "hart": [2, 11], "1967": [2, 3, 11], "hodg": [2, 11], "1951": [2, 11], "your": [2, 3, 4, 6, 7, 9, 10, 11, 12, 15, 16], "futur": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15], "you": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "encount": [2, 4, 10, 11, 12, 13, 16], "tree": [2, 3, 11], "vector": [2, 3, 10, 15], "svm": 2, "logist": [2, 3, 12], "neural": 2, "network": [2, 10], "see": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "addit": [2, 7, 11], "section": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "begin": [2, 3, 4, 6, 7, 10, 11, 14, 15, 16], "It": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "worth": [2, 3, 15, 16], "mention": [2, 3, 4, 6, 8, 10, 12, 13, 14, 16], "variat": [2, 6, 11, 15], "binari": [2, 3], "onli": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "involv": [2, 3, 4, 8, 10, 12, 13, 14, 15, 16], "diagnosi": [2, 3], "run": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "multiclass": 2, "categori": [2, 3, 4, 6, 7, 10, 15, 16], "bronchiti": 2, "pneumonia": 2, "common": [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16], "cold": 2, "digit": 2, "breast": [2, 3], "dr": [2, 4, 15], "william": [2, 3, 4], "h": [2, 10], "wolberg": [2, 3], "w": [2, 7, 10], "nick": [2, 3, 7], "street": [2, 3], "olvi": [2, 3], "l": [2, 10], "mangasarian": [2, 3], "et": [2, 3, 4, 6, 10, 12, 14, 15], "al": [2, 3, 4, 6, 10, 12, 14, 15], "1993": [2, 3], "row": [2, 3, 4, 6, 8, 11, 12, 14, 15], "repres": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "tumor": [2, 7], "benign": [2, 3, 7, 11], "malign": [2, 3, 7, 11], "measur": [2, 3, 6, 7, 11, 12, 15, 16], "nucleu": 2, "textur": [2, 3], "perimet": [2, 3, 7], "area": [2, 3, 7, 10, 11, 12, 14, 15, 16], "conduct": [2, 10], "physician": 2, "As": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "analys": [2, 3, 4, 7, 8, 9, 10, 14, 15, 16], "formul": [2, 6, 7, 11, 15], "precis": [2, 3, 6, 8, 11, 13, 14, 16], "here": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "avail": [2, 3, 7, 9, 10, 12, 13, 15], "unknown": [2, 6, 7], "show": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "import": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "becaus": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "tradit": 2, "non": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "driven": [2, 4], "quit": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "subject": [2, 8, 14, 15], "depend": [2, 3, 4, 6, 8, 10, 11, 12, 15, 16], "upon": [2, 3, 10], "skill": [2, 8, 10, 15], "experienc": 2, "furthermor": [2, 3, 15], "normal": [2, 3, 6, 14, 16], "danger": [2, 13], "stai": [2, 6, 10, 15], "same": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "place": [2, 3, 4, 6, 7, 8, 10, 11, 13, 14, 15, 16], "stop": [2, 3, 4, 8, 12, 13], "grow": [2, 3, 12], "get": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "larg": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15], "contrast": [2, 3, 4, 6, 7, 10, 11, 12, 14], "invad": 2, "surround": [2, 7, 10, 14, 15], "tissu": 2, "nearbi": [2, 3, 10], "organ": [2, 7, 8, 10, 14, 15, 16], "caus": [2, 3, 4, 7, 8, 11, 12, 15, 16], "seriou": [2, 7, 14], "damag": [2, 3], "stanford": 2, "health": 2, "2021": [2, 7, 10], "thu": [2, 3, 8, 10, 11, 12, 14, 16], "quickli": [2, 3, 7, 12, 15], "type": [2, 3, 4, 6, 8, 10, 11, 12, 13, 14, 15], "guid": [2, 7, 11, 14, 15], "treatment": [2, 3, 16], "step": [2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16], "wrangl": [2, 3, 7, 9, 10, 12, 15], "visual": [2, 3, 4, 6, 8, 9, 11, 12, 14, 16], "order": [2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16], "better": [2, 3, 4, 11, 12, 15], "understand": [2, 3, 4, 6, 7, 9, 10, 12, 14, 15, 16], "panda": [2, 3, 4, 6, 8, 10, 11, 12, 15, 16], "altair": [2, 3, 4, 8, 11, 12], "pd": [2, 3, 4, 6, 7, 10, 11, 12, 15, 16], "alt": [2, 3, 4, 6, 7, 11, 12, 15], "case": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16], "file": [2, 7, 13, 16], "contain": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "csv": [2, 3, 4, 6, 7, 11, 12, 15, 16], "header": [2, 7, 8, 14, 16], "ll": [2, 3, 6, 7, 10, 11, 13, 14, 15, 16], "read_csv": [2, 3, 4, 6, 7, 8, 11, 12, 15, 16], "function": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "argument": [2, 3, 4, 6, 7, 8, 11, 15, 16], "inspect": [2, 7, 10, 15, 16], "wdbc": 2, "id": [2, 3, 6, 15], "radiu": [2, 3], "smooth": [2, 3, 11, 15], "compact": [2, 3], "concav": [2, 3], "concave_point": [2, 3], "symmetri": [2, 3], "fractal_dimens": [2, 3], "0": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16], "842302": 2, "m": [2, 3, 7, 10, 15], "1": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "096100": 2, "2": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 16], "071512": 2, "268817": 2, "983510": 2, "567087": 2, "3": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "280628": 2, "650542": 2, "530249": 2, "215566": 2, "253764": 2, "842517": 2, "828212": 2, "353322": 2, "684473": 2, "907030": 2, "826235": 2, "486643": 2, "023825": 2, "547662": 2, "001391": 2, "867889": 2, "84300903": 2, "578499": 2, "455786": 2, "565126": 2, "557513": 2, "941382": 2, "052000": 2, "362280": 2, "035440": 2, "938859": 2, "397658": 2, "84348301": 2, "768233": 2, "253509": 2, "592166": 2, "763792": 2, "280667": 2, "399917": 2, "914213": 2, "450431": 2, "864862": 2, "4": [2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "906602": 2, "84358402": 2, "748758": 2, "150804": 2, "775011": 2, "824624": 2, "280125": 2, "538866": 2, "369806": 2, "427237": 2, "009552": 2, "561956": 2, "564": [2, 3], "926424": 2, "109139": 2, "720838": 2, "058974": 2, "341795": 2, "040926": 2, "218868": 2, "945573": 2, "318924": 2, "312314": 2, "930209": 2, "565": [2, 3], "926682": 2, "703356": 2, "083301": 2, "614511": 2, "722326": 2, "102368": 2, "017817": 2, "692434": 2, "262558": 2, "217473": 2, "057681": 2, "566": [2, 3], "926954": 2, "701667": 2, "043775": 2, "672084": 2, "577445": 2, "839745": 2, "038646": 2, "046547": 2, "105684": 2, "808406": 2, "894800": 2, "567": [2, 3], "927241": 2, "836725": 2, "334403": 2, "980781": 2, "733693": 2, "524426": 2, "269267": 2, "294046": 2, "656528": 2, "135315": 2, "042778": 2, "568": [2, 3], "92751": 2, "b": [2, 3], "806811": 2, "220718": 2, "812793": 2, "346604": 2, "109349": 2, "149741": 2, "113893": 2, "260710": 2, "819349": 2, "560539": 2, "569": [2, 3], "12": [2, 3, 4, 6, 7, 8, 10, 12, 13, 14, 15, 16], "column": [2, 3, 4, 6, 8, 11, 12, 13, 15], "biopsi": [2, 3], "remov": [2, 3, 7, 13, 14, 15], "bodi": [2, 14], "examin": [2, 3, 4, 10, 11], "presenc": [2, 3], "tradition": 2, "procedur": [2, 3, 4, 11], "invas": 2, "fine": [2, 3, 8, 14, 15, 16], "needl": 2, "aspir": 2, "present": [2, 3, 6, 7, 10, 14, 15, 16], "extract": [2, 3, 4, 7, 10, 11, 12], "small": [2, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15], "less": [2, 3, 4, 6, 10, 11, 12, 14, 15, 16], "ten": [2, 6, 7, 15], "differ": [2, 3, 4, 6, 7, 11, 12, 13, 14, 16], "below": [2, 3, 6, 7, 10, 12, 14, 15], "mean": [2, 3, 7, 8, 10, 11, 12, 14, 15, 16], "across": [2, 3, 6, 7, 10, 11, 12, 14, 15], "nuclei": 2, "record": [2, 3, 6, 7, 10, 14, 15, 16], "part": [2, 3, 4, 7, 8, 9, 10, 11, 12, 14, 15, 16], "prepar": [2, 3, 15], "have": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "been": [2, 3, 8, 10, 11, 12, 14, 15, 16], "standard": [2, 3, 4, 6, 8, 11, 12, 14, 15, 16], "discuss": [2, 3, 6, 10, 11, 12, 13, 14, 15, 16], "why": [2, 3, 7, 11, 15, 16], "later": [2, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16], "addition": [2, 3, 4, 6, 8, 10, 12, 14, 16], "uniqu": [2, 3, 7, 14], "therefor": [2, 3, 4, 6, 7, 8, 10, 11, 12, 15, 16], "total": [2, 3, 4, 7, 10, 11, 15, 16], "per": [2, 6, 10, 14, 15, 16], "identif": 2, "number": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "deviat": [2, 3, 4, 6, 16], "grai": [2, 14, 16], "length": [2, 4, 6, 7, 15, 16], "contour": 2, "insid": [2, 3, 6, 7, 8, 10, 13, 14, 15, 16], "local": [2, 11, 13], "ratio": [2, 16], "squar": [2, 3, 4, 7, 8, 10, 11, 12, 15, 16], "portion": [2, 10], "mirror": 2, "fractal": 2, "dimens": 2, "rough": [2, 4, 15], "info": [2, 3, 7, 15, 16], "preview": [2, 3, 4, 6, 7, 8, 9, 11, 12, 14, 15, 16], "frame": [2, 3, 4, 6, 8, 10, 11, 12, 15], "easier": [2, 3, 6, 7, 10, 11, 12, 13, 14, 15, 16], "lot": [2, 3, 4, 7, 10, 12, 15, 16], "print": [2, 3, 6, 7, 8, 10, 12, 13, 15, 16], "down": [2, 8, 10, 13, 14, 16], "page": [2, 3, 4, 5, 8, 10, 12, 13, 14], "instead": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "entri": [2, 3, 6, 7, 10, 15, 16], "core": [2, 3, 7, 15, 16], "datafram": [2, 3, 4, 6, 10, 11, 12, 15, 16], "rangeindex": [2, 3, 15, 16], "null": [2, 3, 15, 16], "count": [2, 3, 6, 7, 10, 15, 16], "dtype": [2, 3, 6, 15, 16], "int64": [2, 3, 10, 15, 16], "float64": [2, 3, 6, 10, 15, 16], "6": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "7": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "8": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "9": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "10": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "11": [2, 3, 4, 6, 7, 8, 10, 12, 13, 14, 15, 16], "memori": [2, 3, 10, 15, 16], "usag": [2, 3, 7, 10, 12, 15, 16], "53": [2, 3, 6, 14], "kb": [2, 3, 15, 16], "abov": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 15], "arrai": [2, 3, 4, 11, 12], "readabl": [2, 3, 7, 10, 11, 14, 15, 16], "renam": [2, 3, 6, 7, 8, 10, 11, 16], "replac": [2, 3, 6, 7, 10, 12, 13, 14, 15], "take": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "dictionari": [2, 3, 10, 16], "map": [2, 4, 7, 10, 11, 12, 15], "desir": [2, 3, 7, 10, 11, 14, 16], "verifi": [2, 6, 13], "result": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "ani": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "let": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "groupbi": [2, 6], "find": [2, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "percentag": [2, 3, 6, 7, 15], "pair": [2, 3, 4, 10, 16], "Then": [2, 3, 4, 6, 7, 8, 11, 12, 13, 14, 15, 16], "calcul": [2, 3, 4, 11, 12], "group": [2, 3, 4, 6, 7, 13, 15], "divid": [2, 3, 7, 10, 15, 16], "multipli": [2, 7, 15], "equal": [2, 3, 4, 6, 11, 12, 16], "access": [2, 3, 4, 6, 11, 13, 15, 16], "via": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15], "attribut": [2, 3, 4, 5, 7, 10, 11], "357": [2, 3], "63": [2, 3, 10, 11], "212": [2, 4, 7, 10, 15, 16], "37": [2, 3, 4, 14, 15], "size": [2, 3, 4, 6, 10, 11, 12, 16], "62": [2, 10, 11, 15], "741652": 2, "258348": 2, "conveni": [2, 3, 7, 10, 16], "value_count": [2, 3, 6, 16], "occurr": [2, 15], "If": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "pass": [2, 3, 7, 10, 11, 15, 16], "seri": [2, 3, 6, 11, 12], "occur": [2, 3, 4, 6, 8, 11, 12, 14, 15, 16], "true": [2, 3, 4, 6, 7, 8, 11, 15, 16], "fraction": [2, 3, 6, 11, 14, 15], "627417": 2, "372583": 2, "proport": [2, 3, 7, 15, 16], "draw": [2, 6, 7, 11, 12, 15], "color": [2, 3, 4, 6, 10, 11, 12, 16], "scatter": [2, 3, 4, 11, 12], "plot": [2, 3, 4, 6, 11, 12, 16], "relationship": [2, 3, 4, 6, 7, 11, 12, 15, 16], "recal": [2, 3, 4, 6, 7, 11, 12, 14, 15, 16], "default": [2, 3, 7, 8, 10, 11, 13, 14, 15, 16], "palett": 2, "colorblind": [2, 15], "friendli": [2, 15], "so": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "stick": [2, 3, 10, 14], "perim_concav": [2, 3], "chart": [2, 3, 4, 6, 11, 12], "mark_circl": [2, 3, 4, 11, 12, 15], "encod": [2, 3, 4, 6, 7, 10, 11, 12, 15], "x": [2, 3, 4, 6, 7, 10, 11, 12, 13, 15], "titl": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15], "y": [2, 3, 4, 6, 7, 8, 10, 11, 12, 15], "versu": [2, 3, 4, 6, 7, 10, 11, 12, 16], "fig": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "typic": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "fall": [2, 3, 6, 11, 14, 15], "upper": [2, 6, 14, 15], "right": [2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16], "corner": [2, 3, 13, 14, 15], "lower": [2, 3, 6, 8, 12, 15], "left": [2, 3, 4, 5, 7, 8, 10, 12, 13, 14, 15, 16], "word": [2, 3, 6, 7, 8, 10, 11, 12, 14, 16], "tend": [2, 3, 11, 14, 15], "ones": [2, 15, 16], "larger": [2, 3, 4, 6, 10, 11, 12, 14, 15], "suppos": [2, 4, 6, 7, 8, 10, 11, 14, 16], "obtain": [2, 3, 4, 6, 7, 11, 12, 14, 15, 16], "except": [2, 10, 12, 14, 16], "sai": [2, 3, 6, 8, 10, 11, 12, 13, 15, 16], "respect": [2, 3, 4, 6, 10, 14, 15, 16], "lie": 2, "middl": [2, 6, 10], "orang": [2, 4, 6, 11, 12], "cloud": [2, 10, 14, 15], "probabl": [2, 3, 6, 10, 12], "seem": [2, 3, 6, 8, 10, 11, 12, 15, 16], "actual": [2, 3, 4, 6, 7, 10, 11, 12, 14, 16], "practic": [2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "To": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "most": [2, 3, 4, 6, 7, 8, 10, 14, 15, 16], "must": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "choos": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 16], "advanc": [2, 3, 4, 6, 8, 12, 13, 14, 15, 16], "assum": [2, 6, 8], "someon": [2, 3, 7, 8, 14], "chosen": [2, 3, 4, 12, 16], "ourselv": [2, 3, 11], "illustr": [2, 3, 6, 11, 12, 15, 16], "concept": [2, 6, 7, 9, 11, 12, 14, 15], "walk": [2, 7, 11, 14], "whose": [2, 8, 10, 14, 16], "depict": [2, 4], "red": [2, 4, 8, 10, 11, 12, 13, 14], "diamond": 2, "coordin": [2, 4, 7, 15], "idea": [2, 3, 6, 7, 8, 10, 12, 13, 14, 15, 16], "close": [2, 3, 4, 6, 7, 10, 11, 14, 15], "anoth": [2, 3, 6, 7, 8, 10, 11, 12, 14, 15, 16], "expect": [2, 3, 4, 6, 7, 8, 10, 11, 12, 16], "look": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "doe": [2, 3, 4, 6, 7, 10, 11, 12, 15], "consid": [2, 3, 4, 6, 7, 11, 12, 14, 15, 16], "closest": [2, 3, 10, 15], "among": [2, 10, 14, 16], "major": [2, 3, 4, 7, 11, 12, 15, 16], "shown": [2, 3, 4, 7, 8, 10, 11, 12, 14, 15, 16], "vote": [2, 3, 7], "three": [2, 3, 4, 6, 7, 8, 9, 10, 11, 14, 15, 16], "chose": [2, 3, 15], "noth": [2, 6, 7, 12], "though": [2, 3, 6, 7, 10, 11, 12, 14, 15, 16], "odd": [2, 10], "avoid": [2, 3, 12, 15], "ti": [2, 10], "decid": [2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "often": [2, 3, 4, 6, 7, 8, 10, 11, 14, 15, 16], "just": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "denot": [2, 4, 7, 10, 11, 12, 15, 16], "a_x": 2, "a_i": 2, "b_x": 2, "b_y": 2, "definit": [2, 10, 15, 16], "plane": [2, 12], "formula": [2, 3, 4, 11, 12, 15], "mathrm": [2, 3], "sqrt": [2, 3, 11, 14], "select": [2, 4, 6, 8, 10, 11, 12, 13, 14, 15], "correspond": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "smallest": [2, 7, 11, 15, 16], "code": [2, 3, 7, 9, 10, 11, 13, 14, 15, 16], "add": [2, 3, 4, 6, 7, 10, 11, 13, 15, 16], "root": [2, 3, 10, 11, 14], "nsmallest": [2, 11, 15], "new_obs_perimet": 2, "new_obs_concav": 2, "dist_from_new": 2, "112": 2, "241202": 2, "653051": 2, "880626": 2, "258": 2, "750277": 2, "870061": 2, "979663": 2, "351": 2, "622700": 2, "541410": 2, "143088": 2, "430": 2, "416930": 2, "314364": 2, "256806": 2, "152": 2, "160091": 2, "039155": 2, "279258": 2, "tabl": [2, 3, 5, 7, 8, 10, 13, 15, 16], "mathemat": [2, 3, 6, 11, 12, 15], "detail": [2, 3, 4, 7, 8, 10, 12, 13, 14, 15, 16], "24": [2, 3, 14, 15], "65": [2, 3, 6, 10, 11, 16], "88": [2, 3], "75": [2, 3, 6, 7, 10, 11, 15, 16], "87": [2, 3], "98": [2, 7, 12, 15], "54": [2, 3, 14, 15, 16], "14": [2, 3, 4, 6, 8, 10, 14, 15, 16], "42": [2, 6, 14, 15, 16], "31": [2, 3, 14, 15, 16], "26": [2, 3, 14, 15], "16": [2, 3, 4, 6, 10, 11, 12, 14, 15], "04": [2, 10, 13, 15, 16], "28": [2, 4, 14, 15], "circl": [2, 8, 14, 15], "although": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "toward": [2, 6, 7, 14], "exactli": [2, 3, 6, 7, 10, 11, 12, 13, 15], "appli": [2, 3, 7, 11, 12, 15], "higher": [2, 3, 6, 7, 11, 12, 15, 16], "help": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "a_": 2, "dot": [2, 7, 10, 11, 12, 15], "b_": 2, "becom": [2, 3, 4, 6, 7, 8, 11, 12, 14, 16], "still": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15], "space": [2, 8, 10, 11, 12, 13, 15], "417": [2, 15], "837": 2, "had": [2, 3, 6, 7, 10, 11, 15, 16], "ad": [2, 3, 4, 10, 11, 12, 14], "up": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15], "took": [2, 6, 7], "27": [2, 7, 11, 14, 15], "new_obs_symmetri": 2, "836722": 2, "267368": 2, "400": [2, 11, 16], "334664": 2, "886368": 2, "099359": 2, "472326": 2, "562": 2, "470430": 2, "084810": 2, "154075": 2, "499268": 2, "68": 2, "365450": 2, "812359": 2, "092064": 2, "531594": 2, "055065": 2, "555575": 2, "dimension": 2, "five": [2, 3, 13, 15, 16], "3d": [2, 11, 12], "note": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "recommend": [2, 7, 8, 9, 11, 12, 13, 14, 16], "against": [2, 8, 11, 12], "purpos": [2, 3, 4, 6, 10, 11, 12, 14, 15, 16], "complic": [2, 7, 10, 11, 15], "handl": [2, 3, 7, 15], "multipl": [2, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15], "thankfulli": [2, 4], "implement": [2, 3, 12, 15], "buitinck": 2, "2013": [2, 3, 4, 12], "along": [2, 3, 6, 7, 10, 11, 13, 14, 15], "sklearn": [2, 3, 4, 11, 12], "keep": [2, 3, 6, 7, 10, 13, 14, 15, 16], "simpl": [2, 3, 4, 6, 10, 11, 13, 15, 16], "fewer": [2, 3], "mistak": [2, 3, 11, 15], "tell": [2, 3, 6, 7, 8, 10, 11, 12, 14, 15, 16], "prefer": [2, 4, 10, 12, 15, 16], "regular": [2, 10, 11, 14, 15, 16], "set_config": [2, 3, 4, 11, 12], "notic": [2, 3, 6, 7, 10, 12, 15, 16], "wai": [2, 3, 4, 6, 7, 8, 9, 10, 13, 14, 15, 16], "prefix": 2, "extens": [2, 8, 10, 12, 13, 14, 15], "subsequ": [2, 7, 15], "long": [2, 3, 4, 6, 7, 8, 10, 12, 14, 15], "clutter": [2, 15], "kneighborsclassifi": [2, 3], "38": [2, 4, 11, 14, 15], "charact": [2, 7, 8, 10, 14, 15, 16], "transform_output": [2, 3, 4, 11, 12], "modul": 2, "build": [2, 3, 11, 15], "pick": [2, 3, 4, 10, 12, 14, 15], "store": [2, 3, 4, 6, 7, 8, 10, 13, 14, 15, 16], "cancer_train": [2, 3], "specifi": [2, 3, 6, 7, 8, 10, 11, 12, 13, 15, 16], "weight": 2, "control": [2, 3, 8, 9, 10, 13], "uniform": [2, 3, 10], "choic": [2, 3, 4, 6, 11, 14, 15, 16], "weigh": [2, 7], "websit": [2, 3, 5, 10, 12, 14], "knn": [2, 3], "n_neighbor": [2, 3, 11], "jupyt": [2, 3, 4, 7, 9, 12, 13], "environ": [2, 3, 4, 7, 8, 12, 13, 14], "pleas": [2, 3, 4, 5, 7, 8, 12], "rerun": [2, 3, 4, 12], "html": [2, 3, 4, 12, 15, 16], "represent": [2, 3, 4, 10, 12], "trust": [2, 3, 4, 6, 12], "notebook": [2, 3, 4, 12, 13, 14], "On": [2, 3, 4, 7, 10, 11, 12, 14, 15, 16], "github": [2, 3, 4, 7, 10, 12, 15], "unabl": [2, 3, 4, 10, 12, 14], "render": [2, 3, 4, 8, 12, 14, 15], "try": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16], "nbviewer": [2, 3, 4, 12], "org": [2, 3, 4, 6, 7, 10, 12, 15, 16], "kneighborsclassifierkneighborsclassifi": [2, 3], "fit": [2, 3, 4, 11, 12, 15], "much": [2, 3, 4, 6, 7, 10, 11, 12, 15, 16], "outsid": [2, 3, 6, 8, 11, 12, 14, 15], "heavi": 2, "lift": 2, "modifi": [2, 3, 14], "after": [2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16], "itself": [2, 3, 6, 10, 12, 15, 16], "ran": 2, "manual": [2, 3, 4, 6, 8, 10, 11, 13, 16], "time": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 16], "new_ob": 2, "Is": [2, 4, 7, 11, 15, 16], "don": [2, 3, 4, 6, 7, 8, 10, 11, 14, 15, 16], "t": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "necessarili": [2, 3, 7, 16], "correct": [2, 3, 7, 13, 14, 15, 16], "quantifi": [2, 3, 12], "think": [2, 3, 7, 8, 10, 12, 16], "rang": [2, 3, 4, 10, 11, 12, 15, 16], "matter": [2, 11, 15, 16], "identifi": [2, 3, 4, 7, 9, 10, 11, 14, 15], "effect": [2, 4, 6, 7, 11, 12, 13, 16], "But": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "doesn": [2, 3, 8, 10, 15, 16], "salari": 2, "dollar": [2, 6, 10, 11, 12], "job": [2, 10, 15], "1000": [2, 3, 6, 15], "huge": [2, 10], "compar": [2, 3, 6, 7, 10, 11, 14, 15, 16], "conceptu": [2, 14], "opposit": 2, "yearli": 2, "temperatur": 2, "degre": 2, "kelvin": 2, "celsiu": 2, "constant": [2, 12, 15], "shift": [2, 7, 8], "273": [2, 16], "even": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "likewis": [2, 16], "hypothet": 2, "thousand": [2, 3, 10, 15], "singl": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "affect": [2, 3, 7, 8, 11, 12, 15], "chang": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 15, 16], "outcom": [2, 7], "averag": [2, 3, 6, 7, 10, 11, 12, 16], "central": 2, "subtract": [2, 3, 7], "said": [2, 3], "unstandard": [2, 4], "wisconsin": 2, "until": [2, 3, 4, 6, 7, 8, 10, 13, 14, 15, 16], "did": [2, 3, 6, 7, 9, 10, 11, 12, 14, 15, 16], "earlier": [2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16], "thing": [2, 3, 6, 8, 10, 13, 14, 15, 16], "unscaled_canc": 2, "wdbc_unscal": [2, 3], "1001": 2, "11840": [2, 3], "1326": 2, "08474": [2, 3], "1203": 2, "10960": [2, 3], "386": 2, "14250": [2, 3], "1297": 2, "10030": [2, 3], "1479": 2, "11100": [2, 3], "1261": 2, "09780": [2, 3], "858": 2, "08455": [2, 3], "1265": 2, "11780": [2, 3], "181": [2, 4], "05263": [2, 3], "unscal": 2, "uncent": 2, "Will": 2, "framework": [2, 12], "preprocessor": [2, 3, 4, 11], "manipul": [2, 10, 16], "transform": [2, 3, 4, 7, 11, 12, 16], "wrap": [2, 3, 4, 11], "columntransform": [2, 3, 4], "enabl": [2, 8, 10, 13, 14, 15, 16], "handi": [2, 7, 16], "sequenc": [2, 3, 7, 10, 13, 15], "compos": [2, 3, 4, 7, 11], "x27": [2, 3, 4], "columntransformercolumntransform": [2, 3, 4], "standardscalerstandardscal": [2, 3, 4], "individu": [2, 3, 6, 7, 12, 14, 15], "difficult": [2, 3, 4, 7, 8, 10, 12, 15, 16], "rather": [2, 3, 6, 7, 8, 10, 11, 14, 15, 16], "make_column_selector": [2, 3], "dtype_includ": [2, 3], "equival": [2, 7, 10, 12, 16], "lt": 2, "_column_transform": 2, "0x7f9e6228fc90": 2, "gt": 2, "readi": [2, 3, 7, 8, 10, 13, 14], "happen": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "necessari": [2, 4, 11, 13, 15], "bit": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 15, 16], "unnecessari": 2, "howev": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "quantiti": [2, 3, 6, 15, 16], "scaled_canc": 2, "standardscaler__area": 2, "standardscaler__smooth": 2, "984375": 2, "568466": 2, "908708": 2, "826962": 2, "558884": 2, "942210": 2, "764464": 2, "283553": 2, "826229": 2, "280372": 2, "343856": 2, "041842": 2, "723842": 2, "102458": 2, "577953": 2, "840484": 2, "735218": 2, "525767": 2, "347789": 2, "112085": 2, "woohoo": 2, "input": [2, 3, 4, 7, 10, 11, 14, 16], "behavior": [2, 4, 11, 15, 16], "drop": [2, 3, 8, 13, 14, 15, 16], "remain": [2, 3, 4, 7, 13], "rest": [2, 3, 7, 12, 16], "remaind": [2, 3, 7, 10, 11, 16], "passthrough": 2, "separ": [2, 3, 4, 7, 8, 14, 15, 16], "underscor": [2, 7, 8, 14, 16], "again": [2, 3, 6, 7, 8, 10, 11, 12, 13, 15, 16], "preserv": [2, 3], "verbose_feature_names_out": [2, 4], "fals": [2, 3, 4, 7, 10, 11, 12, 15, 16], "should": [2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 15, 16], "leav": [2, 4, 12], "preprocessor_keep_al": 2, "scaled_cancer_al": 2, "wonder": [2, 6, 10], "technic": [2, 3, 7, 8, 11, 13, 14, 15, 16], "error": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "prone": [2, 3, 10, 16], "accident": [2, 3, 8, 10, 14, 15, 16], "forget": [2, 4, 14], "proper": 2, "free": [2, 3, 12, 14], "requir": [2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16], "yourself": [2, 4, 7, 10, 12, 14], "further": [2, 3, 4, 6, 7, 8, 10, 12, 15, 16], "automat": [2, 3, 4, 10, 11, 14, 15], "streamlin": 2, "effort": [2, 8, 10, 14], "side": [2, 5, 6, 7, 13, 14, 15], "annot": [2, 4, 6, 15], "within": [2, 4, 6, 7, 8, 10, 12, 13, 14, 15, 16], "nearli": [2, 4, 12, 16], "vertic": [2, 6, 7, 11, 12, 15, 16], "align": [2, 10, 15], "black": [2, 4, 10, 15], "region": [2, 3, 10, 11, 16], "domin": 2, "intuit": [2, 3, 11, 15, 16], "reason": [2, 3, 4, 6, 7, 10, 11, 12, 15, 16], "carefulli": [2, 4, 7, 10, 16], "domain": [2, 3, 7, 10, 15], "comparison": [2, 6, 12, 15, 16], "potenti": [2, 3, 4, 11, 12, 16], "issu": [2, 7, 8, 10, 12, 13, 15, 16], "imbal": 2, "overal": [2, 3, 7, 11, 15], "pattern": [2, 3, 4, 6, 7, 10, 11, 12, 15, 16], "otherwis": [2, 3, 4, 6, 7, 15], "rare": [2, 4, 15], "malici": 2, "detect": [2, 4], "rarer": 2, "unimport": 2, "revisit": [2, 3, 10, 12, 16], "head": [2, 8, 10, 13, 14, 15], "top": [2, 3, 5, 6, 7, 8, 10, 11, 12, 13, 15, 16], "n": [2, 3, 4, 6, 7, 10, 11, 15, 16], "concat": [2, 6], "glue": 2, "filter": [2, 3, 6, 10, 15], "back": [2, 3, 6, 8, 10, 11, 12, 13, 14, 15, 16], "concaten": [2, 6], "axi": [2, 7, 11, 12, 14, 16], "yield": [2, 3], "taller": 2, "horizont": [2, 7, 15], "produc": [2, 3, 7, 8, 12, 15, 16], "wider": [2, 6, 7, 16], "imbalanc": [2, 3], "rare_canc": 2, "rare_plot": 2, "With": [2, 4, 7, 10, 15, 16], "least": [2, 3, 4, 6, 8, 15], "win": 2, "highlight": [2, 4, 6, 8, 10, 12, 13, 14, 16], "13": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "background": [2, 3, 6, 10, 12, 15], "blue": [2, 4, 8, 11, 14, 16], "indic": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "despit": [2, 10, 15], "simplic": [2, 3, 14], "sound": [2, 3, 8], "manner": [2, 8, 12], "fairli": [2, 3, 6, 13, 15], "nuanc": 2, "suffic": [2, 6], "rebal": 2, "oversampl": 2, "replic": [2, 6], "power": [2, 3, 7, 10, 14, 15, 16], "own": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "increas": [2, 3, 4, 6, 11, 12, 15, 16], "randomli": [2, 3, 4, 6, 12], "properli": [2, 3, 15], "random": [2, 6, 11, 12], "malignant_canc": 2, "benign_canc": 2, "malignant_cancer_upsampl": 2, "upsampled_canc": 2, "vice": [2, 3], "versa": [2, 3], "closer": [2, 15], "upsampl": 2, "wild": [2, 7, 12], "unfortun": [2, 3, 4, 6, 8, 10, 12, 15], "challeng": [2, 14, 16], "reli": [2, 3, 8, 11, 12, 15], "expert": [2, 3, 7, 13], "knowledg": [2, 7, 12, 14, 16], "relat": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "survei": [2, 7, 16], "particip": [2, 3], "margin": [2, 8], "peopl": [2, 6, 7, 8, 11, 12, 14, 15, 16], "respond": [2, 10, 14], "certain": [2, 7, 10, 14, 15], "kind": [2, 3, 4, 6, 7, 10, 15], "fear": [2, 7], "honestli": 2, "neg": [2, 3, 8, 11, 12, 14, 15, 16], "consequ": [2, 3, 6, 8, 16], "simpli": [2, 3, 10, 15, 16], "throw": 2, "awai": [2, 3, 6, 10, 11, 12, 14, 16], "bia": [2, 12], "conclus": [2, 6, 7, 15], "inadvert": [2, 8], "ignor": [2, 3, 7, 11, 16], "easili": [2, 3, 4, 7, 8, 9, 10, 14, 15, 16], "mislead": 2, "detriment": 2, "impact": [2, 4, 6, 12, 16], "techniqu": [2, 3, 4, 6, 7, 10, 12, 15], "deal": [2, 8, 10], "isn": [2, 3, 7, 10, 11, 15], "anyth": [2, 3, 7, 12, 16], "els": [2, 7, 8, 10], "subset": [2, 6, 8, 10, 11, 12, 16], "missing_canc": 2, "wdbc_miss": 2, "nan": [2, 10, 16], "475956": 2, "834601": 2, "386808": 2, "169878": 2, "160508": 2, "137124": 2, "henc": [2, 3, 4, 8, 10, 11, 15], "too": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "accomplish": [2, 3, 6, 7, 8, 15, 16], "dropna": 2, "no_missing_canc": 2, "strategi": [2, 3, 15], "imput": 2, "fill": [2, 8, 10, 12, 15], "synthet": 2, "simpleimput": 2, "simpleimputersimpleimput": 2, "directli": [2, 3, 4, 6, 7, 8, 13, 14, 16], "imputed_canc": 2, "846860": 2, "384942": 2, "document": [2, 4, 8, 9, 10, 13, 14, 15, 16], "crucial": 2, "critic": [2, 6, 7, 8, 12, 15, 16], "chain": [2, 16], "intermedi": [2, 7], "whole": [2, 3, 4, 6, 10, 14, 16], "scratch": [2, 6, 14, 15], "knn_pipelin": [2, 3], "pipelinepipelin": [2, 3, 4], "500": [2, 6, 11, 12], "075": 2, "1500": 2, "new_observ": 2, "second": [2, 3, 4, 6, 7, 8, 10, 12, 13, 14, 15, 16], "15": [2, 3, 4, 6, 7, 8, 10, 11, 14, 15, 16], "seen": [2, 3, 11, 12, 14, 15, 16], "littl": [2, 3, 10, 11, 12, 15, 16], "grid": [2, 3, 11, 15], "meshgrid": 2, "numpi": [2, 3, 4, 6, 10, 11, 12, 15, 16], "high": [2, 3, 6, 7, 8, 9, 12], "transpar": [2, 7], "low": [2, 3, 12], "opac": [2, 11, 12, 15], "np": [2, 3, 4, 6, 11, 12], "val": 2, "arrang": [2, 6, 7, 15], "are_grid": 2, "linspac": 2, "min": [2, 11, 12, 15, 16], "95": [2, 3, 6, 7, 10, 12, 15], "max": [2, 3, 11, 12, 15, 16], "05": [2, 7, 10, 15], "50": [2, 3, 6, 7, 10, 11, 12, 14, 16], "smo_grid": 2, "asgrid": 2, "reshap": [2, 16], "knnpredgrid": 2, "bind": 2, "prediction_t": 2, "copi": [2, 10, 14, 16], "unscaled_plot": 2, "mark_point": [2, 15], "40": [2, 3, 6, 7, 10, 14, 15, 16], "nice": [2, 3, 8, 10, 12, 15], "fade": 2, "prediction_plot": 2, "300": [2, 3, 6, 15, 16], "accompani": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "repositori": [2, 3, 4, 6, 7, 10, 11, 12, 13, 15, 16], "launch": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "browser": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "click": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "binder": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "button": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "view": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "download": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "sure": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "instruct": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "setup": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "ensur": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "intend": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "blb": 2, "lar": 2, "gill": 2, "loupp": 2, "mathieu": 2, "blondel": 2, "fabian": 2, "pedregosa": 2, "andrea": 2, "mueller": 2, "olivi": 2, "grisel": 2, "vlad": 2, "nicula": 2, "peter": [2, 11], "prettenhof": 2, "alexandr": 2, "gramfort": 2, "jaqu": 2, "grobler": 2, "robert": [2, 3, 4, 12], "layton": 2, "jake": 2, "vanderpla": [2, 15], "arnaud": 2, "joli": 2, "brian": [2, 15], "holt": 2, "ga": [2, 15], "\u00eb": 2, "varoquaux": 2, "api": 2, "design": [2, 3, 8, 10, 14, 15, 16], "ecml": 2, "pkdd": 2, "mine": [2, 6], "108": [2, 3], "122": [2, 3], "ch67": [2, 11], "thoma": [2, 11], "ieee": [2, 4, 11], "transact": [2, 4, 11], "21": [2, 3, 7, 10, 11, 12, 14, 15], "fh51": [2, 11], "evelyn": [2, 3, 11], "joseph": [2, 11], "discriminatori": [2, 11], "discrimin": [2, 3, 11], "consist": [2, 4, 6, 7, 10, 11, 13, 14, 15, 16], "properti": [2, 3, 6, 7, 10, 11, 12, 15, 16], "report": [2, 3, 6, 7, 8, 11, 15, 16], "usaf": [2, 11], "school": [2, 7, 11], "aviat": [2, 11], "medicin": [2, 11], "randolph": [2, 11], "field": [2, 10, 11, 15], "texa": [2, 11], "swm93": [2, 3], "nuclear": [2, 3], "intern": [2, 3, 5, 15], "symposium": [2, 3], "electron": [2, 3, 14], "technolog": [2, 3, 15], "stanfordhcare21": 2, "url": [2, 3, 4, 6, 7, 12, 13, 14, 15, 16], "http": [2, 3, 4, 5, 6, 7, 9, 10, 12, 13, 14, 15, 16], "stanfordhealthcar": 2, "medic": [2, 3], "condit": 2, "continu": [3, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "its": [3, 4, 7, 8, 10, 11, 12, 14, 15, 16], "describ": [3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "matric": 3, "execut": [3, 10, 11, 14], "neighbor": [3, 12], "k": [3, 7, 10, 12, 15], "nearest": [3, 4, 12], "advantag": [3, 4, 6, 10, 11, 12, 13, 14, 15, 16], "disadvantag": [3, 4, 11, 12, 15], "wrong": [3, 6, 7, 12, 15, 16], "cancer": 3, "ask": [3, 4, 6, 10, 11, 12, 14, 15, 16], "kei": [3, 6, 7, 10, 13, 14, 15, 16], "impli": [3, 6], "between": [3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "oppos": [3, 10, 11, 15, 16], "memor": 3, "visit": [3, 5, 6, 7, 10, 13, 14, 15], "hospit": 3, "more": [3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "trick": 3, "asid": [3, 7, 10, 12], "match": [3, 10, 11, 12, 14, 15, 16], "observ": [3, 4, 6, 7, 11, 12, 14, 15, 16], "confid": [3, 6, 11], "golden": 3, "rule": [3, 6, 7, 11, 15], "cannot": [3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "than": [3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "realli": [3, 6, 7, 10, 11, 15, 16], "imagin": [3, 6, 8, 10, 14, 15, 16], "bad": [3, 4, 10, 15], "overestim": [3, 6], "made": [3, 4, 7, 11, 12, 13, 14, 15, 16], "frac": [3, 4, 6, 11], "summar": [3, 6, 7, 9, 10, 15, 16], "stori": [3, 8, 11, 15], "alon": [3, 6, 14], "comprehens": [3, 4, 6], "each": [3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "correctli": [3, 7, 10, 13, 15, 16], "incorrectli": 3, "57": 3, "bottom": [3, 8, 13, 14], "roughli": [3, 4, 6, 11, 12, 15], "89": [3, 7, 15], "892": 3, "misclassifi": 3, "disastr": 3, "receiv": [3, 10, 14], "particularli": [3, 10, 12, 15], "unaccept": 3, "term": [3, 4, 6, 7, 10, 11, 15, 16], "talk": [3, 10, 15], "four": [3, 4, 7, 9, 15], "perfect": [3, 15], "zero": [3, 4, 11, 12, 15, 16], "almost": [3, 4, 7, 10, 11, 15], "two": [3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "commonli": [3, 6, 7, 8, 12, 14, 15, 16], "metric": [3, 4, 11, 12], "togeth": [3, 4, 6, 8, 10, 15, 16], "inde": [3, 4, 6, 7, 10, 12, 15, 16], "20": [3, 6, 7, 10, 14, 15, 16], "quad": [3, 4], "25": [3, 6, 7, 10, 11, 14, 15, 16], "rel": [3, 4, 7, 15], "context": [3, 10, 11, 12, 15, 16], "certainli": [3, 6], "achiev": [3, 7, 11, 15, 16], "guess": [3, 4, 6, 7], "everi": [3, 6, 7, 8, 10, 12, 14, 16], "similarli": [3, 7, 10, 15, 16], "never": [3, 7, 11, 14], "obsev": 3, "Of": [3, 6, 12, 16], "somewher": [3, 7, 10, 11, 15], "extrem": [3, 6, 11, 12], "trade": [3, 4], "off": [3, 4, 6, 12], "fair": [3, 10, 11], "unbias": 3, "influenc": [3, 4, 6, 11, 12, 15], "human": [3, 4, 6, 10, 14, 15, 16], "counter": 3, "main": [3, 7, 13, 16], "tenet": 3, "determin": [3, 4, 6, 11, 13, 14, 15, 16], "everyth": [3, 6, 7, 13, 16], "point": [3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "investig": [3, 6, 7, 10, 15], "integ": [3, 10, 15, 16], "At": [3, 7, 8, 9, 10, 12], "track": [3, 6, 7, 14, 16], "nums_0_to_9": 3, "5": [3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "random_numbers1": 3, "to_numpi": 3, "appear": [3, 6, 7, 8, 10, 11, 12, 13, 14, 15], "fresh": [3, 8], "batch": 3, "random_numbers2": 3, "forc": [3, 15], "random_numbers1_again": 3, "random_numbers2_again": 3, "And": [3, 6, 7, 10, 11, 12, 14, 15, 16], "4235": 3, "random_numb": 3, "beyond": [3, 4, 7, 10, 11, 12, 13, 14, 15, 16], "explicitli": [3, 10, 14, 15, 16], "insert": [3, 14, 16], "therebi": [3, 15], "global": [3, 15], "drawback": 3, "buri": 3, "undesir": 3, "entir": [3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "plai": [3, 7, 10, 13], "randomst": 3, "random_st": 3, "rnd": 3, "random_numbers1_third": 3, "random_numbers2_third": 3, "load": [3, 4, 8, 9, 10, 11, 12, 15, 16], "quick": [3, 7, 10], "re": [3, 4, 7, 8, 9, 10, 11, 14, 15, 16], "scale": [3, 4, 10, 11, 12, 14, 15], "done": [3, 7, 8, 10, 13, 14, 15, 16], "preliminari": 3, "train_test_split": [3, 11, 12], "estim": [3, 6, 7, 9, 11, 12], "shuffl": 3, "stratifi": [3, 11], "exist": [3, 7, 8, 10, 12, 13, 14, 15, 16], "train_siz": [3, 11, 12], "model_select": [3, 11, 12], "cancer_test": 3, "index": [3, 7, 10, 14, 16], "426": 3, "461": 3, "481": [3, 15], "43": [3, 4, 14, 15], "143": 3, "334": 3, "434": 3, "miss": [3, 4, 15, 16], "626761": 3, "373239": 3, "last": [3, 6, 7, 9, 10, 14, 15, 16], "sensit": [3, 7, 12], "consider": 3, "aspect": [3, 6, 12, 15], "fortun": [3, 6, 7, 10, 11, 12, 16], "construct": [3, 6, 7, 10, 15, 16], "cancer_preprocessor": 3, "augment": [3, 4], "897374": 3, "313": 3, "893988": 3, "221": 3, "8812818": 3, "272": [3, 15], "8910988": 3, "340": [3, 15], "89813": 3, "852552": 3, "34": [3, 4, 14, 15, 16], "854039": 3, "252": [3, 11], "885429": 3, "45": [3, 4, 7, 10, 14, 15, 16], "857010": 3, "908469": 3, "correct_pr": 3, "8811188811188811": 3, "scitkit": 3, "score": [3, 10, 11], "cancer_acc_1": 3, "crosstab": 3, "83": [3, 11, 12], "agre": [3, 10, 12], "displaystyl": 3, "86": [3, 11], "81": [3, 11, 15], "That": [3, 6, 7, 10, 11, 15, 16], "pretti": [3, 6, 10], "wait": [3, 7, 10, 11, 12, 15, 16], "Or": [3, 6, 12], "someth": [3, 4, 6, 7, 8, 10, 11, 14, 15, 16], "99": [3, 4, 6, 11, 12, 15], "terribl": 3, "impress": [3, 15], "attent": [3, 7, 11, 16], "sacrif": 3, "easi": [3, 4, 7, 8, 10, 12, 14, 15, 16], "baselin": [3, 15], "regardless": [3, 10, 11, 15], "sens": [3, 4, 6, 7, 11, 12, 15, 16], "90": [3, 6, 7, 15, 16], "hope": [3, 10, 12, 15], "signific": [3, 7], "Be": [3, 10, 11, 15], "enough": [3, 6, 7, 10, 11, 12, 14, 15, 16], "usual": [3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "suspect": [3, 4, 6], "built": [3, 7, 8, 13, 16], "perspect": [3, 4, 11, 16], "hoorai": 3, "cautiou": 3, "misdiagnos": 3, "19": [3, 6, 10, 14, 15], "vast": [3, 4, 15, 16], "behav": [3, 6, 12], "principl": [3, 15], "ideal": [3, 8, 11, 16], "somehow": [3, 6, 10], "hasn": 3, "yet": [3, 6, 7, 8, 10, 11, 14, 15, 16], "rememb": [3, 6, 7, 8, 10, 12, 15, 16], "touch": [3, 15], "dai": [3, 8, 10, 14, 15], "strongli": [3, 9, 12], "whatev": [3, 4, 7, 15], "lucki": [3, 6], "perhap": [3, 6, 7, 8, 10, 11, 12, 15], "sub": [3, 10], "cancer_subtrain": 3, "cancer_valid": 3, "test_siz": 3, "acc": 3, "9252336448598131": 3, "92": [3, 6], "repeat": [3, 4, 6, 14], "84": [3, 15], "82": [3, 15], "none": [3, 4, 10, 12, 14, 16], "underli": [3, 4, 7], "reduc": [3, 4, 10, 15], "un": [3, 4], "c": [3, 7, 10], "evenli": [3, 11], "chunk": [3, 12], "iter": [3, 4, 7, 14, 15, 16], "fold": [3, 11], "cross_valid": 3, "cv": [3, 11], "convert": [3, 6, 10, 11, 15, 16], "cancer_pip": 3, "cv_5_df": 3, "fit_tim": 3, "score_tim": 3, "test_scor": 3, "003906": 3, "005787": 3, "941860": 3, "003718": 3, "005511": 3, "894118": 3, "003568": 3, "005465": 3, "858824": 3, "003911": 3, "005487": 3, "870588": 3, "003506": 3, "005414": 3, "823529": 3, "aggreg": [3, 7], "sem": 3, "uncertain": [3, 6, 11], "scope": [3, 4, 7, 11, 12, 13, 14, 15], "02": [3, 10, 15], "cv_5_metric": 3, "agg": [3, 12, 16], "003722": 3, "005533": 3, "877784": 3, "000084": 3, "000066": 3, "019656": 3, "limit": [3, 4, 10, 12, 14, 15, 16], "speed": 3, "trial": [3, 15], "cv_10": 3, "cv_10_df": 3, "cv_10_metric": 3, "003581": 3, "004205": 3, "865947": 3, "000027": 3, "000033": 3, "019190": 3, "slightli": [3, 6, 10, 11, 12, 15], "due": [3, 4, 6, 10, 16], "reduct": 3, "dramat": 3, "cv_50_df": 3, "cv_50_metric": 3, "003563": 3, "003120": 3, "874444": 3, "000012": 3, "000007": 3, "017040": 3, "downstream": 3, "expens": [3, 10], "chemo": 3, "radiat": 3, "therapi": 3, "death": 3, "mispredict": 3, "gridsearchcv": [3, 11], "unspecifi": 3, "cancer_tune_pip": 3, "tunabl": 3, "get_param": [3, 11], "verbos": 3, "columntransformer__n_job": 3, "columntransformer__remaind": 3, "columntransformer__sparse_threshold": 3, "columntransformer__transformer_weight": 3, "columntransformer__transform": 3, "columntransformer__verbos": 3, "columntransformer__verbose_feature_names_out": 3, "columntransformer__standardscal": 3, "columntransformer__standardscaler__copi": 3, "columntransformer__standardscaler__with_mean": 3, "columntransformer__standardscaler__with_std": 3, "kneighborsclassifier__algorithm": 3, "auto": [3, 14], "kneighborsclassifier__leaf_s": 3, "30": [3, 6, 7, 10, 11, 14, 15, 16], "kneighborsclassifier__metr": 3, "minkowski": 3, "kneighborsclassifier__metric_param": 3, "kneighborsclassifier__n_job": 3, "kneighborsclassifier__n_neighbor": 3, "kneighborsclassifier__p": 3, "kneighborsclassifier__weight": 3, "wow": [3, 6, 15], "stuff": 3, "sift": 3, "muck": [3, 10], "stand": [3, 10, 11, 15], "parameter_grid": 3, "allow": [3, 4, 6, 7, 8, 9, 10, 11, 14, 15, 16], "greater": [3, 4, 10, 16], "third": 3, "skip": [3, 8, 16], "96": [3, 12, 15], "emploi": [3, 10, 11], "okai": [3, 15, 16], "param_grid": [3, 11], "cancer_tune_grid": 3, "cv_results_": [3, 11], "format": [3, 9, 10, 11, 12, 16], "accuracies_grid": 3, "mean_fit_tim": 3, "std_fit_tim": 3, "mean_score_tim": 3, "std_score_tim": 3, "param_kneighborsclassifier__n_neighbor": 3, "param": 3, "split0_test_scor": 3, "split1_test_scor": 3, "split2_test_scor": 3, "split3_test_scor": 3, "split4_test_scor": 3, "split5_test_scor": 3, "split6_test_scor": 3, "split7_test_scor": 3, "split8_test_scor": 3, "split9_test_scor": 3, "mean_test_scor": [3, 11], "17": [3, 4, 10, 12, 14, 15], "std_test_scor": [3, 11], "18": [3, 4, 6, 7, 10, 14, 15], "rank_test_scor": 3, "int32": [3, 16], "param_kneighbors_classifier__n_neighbor": 3, "unus": 3, "sem_test_scor": [3, 11], "856589": 3, "013689": 3, "853876": 3, "021710": 3, "854097": 3, "018745": 3, "019869": 3, "868272": 3, "018769": 3, "875194": 3, "017909": 3, "875249": 3, "018477": 3, "36": [3, 4, 14, 15], "889369": 3, "014765": 3, "41": [3, 11, 14, 15, 16], "891694": 3, "015519": 3, "46": [3, 4, 14, 15], "884662": 3, "014538": 3, "51": [3, 6, 14, 15], "886988": 3, "014280": 3, "56": [3, 7], "884551": 3, "016056": 3, "61": [3, 6, 7, 10], "877519": 3, "015867": 3, "66": [3, 6, 10, 11], "882281": 3, "013591": 3, "71": [3, 10], "012634": 3, "76": 3, "879900": 3, "014982": 3, "877575": 3, "015085": 3, "91": [3, 6], "015440": 3, "shortcut": [3, 8, 15], "layer": [3, 6], "accuracy_vs_k": 3, "mark_lin": [3, 4, 11, 12, 15], "85": [3, 6, 7, 10, 11, 12, 15], "neighbour": [3, 11], "highest": [3, 16], "best_params_": [3, 11], "vari": [3, 6, 11, 12, 13, 15, 16], "exact": [3, 6, 12, 15], "80": [3, 16], "justifi": [3, 15], "optim": [3, 10, 11], "decreas": [3, 4, 6, 15, 16], "reliabl": [3, 6, 8, 15], "uncertainti": [3, 6], "cost": [3, 6, 11, 12], "prohibit": [3, 11], "large_param_grid": 3, "385": 3, "large_cancer_tune_grid": 3, "large_accuracies_grid": 3, "large_accuracy_vs_k": 3, "60": [3, 6, 7], "underfit": [3, 12], "farther": [3, 15], "sort": [3, 4, 7, 8, 10, 12, 15, 16], "boundari": [3, 12], "simpler": 3, "stronger": 3, "regard": [3, 6, 7, 8, 11, 12, 16], "themselv": [3, 10, 15], "noisi": [3, 11, 15], "jag": 3, "essenti": [3, 6, 7, 8, 10, 11, 16], "problemat": [3, 8, 10, 15], "unreli": [3, 6, 12], "strike": 3, "balanc": [3, 6], "return": [3, 4, 6, 7, 10, 12, 13, 16], "put": [3, 6, 10, 11, 12, 13, 14, 16], "defin": [3, 6, 7, 9, 10, 11, 12, 15, 16], "search": [3, 4, 10, 13, 14], "retrain": [3, 11], "strength": [3, 12, 15], "weak": [3, 11, 12, 15], "nn": 3, "assumpt": [3, 4, 11, 12], "multi": 3, "slow": [3, 8, 11, 12], "treat": [3, 4, 7, 14, 15, 16], "accept": [3, 10, 11, 13, 14], "wors": [3, 7, 16], "meaning": [3, 4, 7, 10, 12, 14], "cancer_irrelev": 3, "irrelevant1": 3, "irrelevant2": 3, "30010": 3, "08690": 3, "132": [3, 6], "19740": 3, "130": [3, 6, 16], "00": [3, 6, 16], "24140": 3, "77": [3, 6], "58": [3, 15], "19800": 3, "135": [3, 6, 15], "24390": 3, "142": 3, "14400": 3, "131": 3, "09251": 3, "35140": 3, "140": [3, 6], "00000": [3, 6], "47": [3, 4, 12, 14, 15], "increasingli": [3, 10], "distanc": [3, 4, 11, 12, 15], "corrupt": 3, "surpris": 3, "outperform": 3, "combat": 3, "extra": [3, 10, 12], "nois": [3, 15], "smoothli": 3, "trend": [3, 6, 7, 11, 12, 15], "corrobor": 3, "evid": 3, "untun": 3, "scientif": [3, 11, 12, 14], "clear": [3, 4, 6, 7, 12, 14, 15, 16], "cut": 3, "obviou": [3, 8, 12, 15], "relev": [3, 10, 11, 12], "consum": [3, 6, 16], "systemat": 3, "beal": 3, "hock": 3, "lesli": 3, "moder": 3, "ab": [3, 10, 11], "bc": [3, 6, 7], "ac": 3, "abc": 3, "million": [3, 12, 15], "computation": 3, "draper": 3, "smith": 3, "1966": 3, "eforymson": 3, "straightforward": [3, 10, 15], "form": [3, 4, 6, 7, 10, 11, 12, 15, 16], "updat": [3, 4, 13, 14, 15], "big": [3, 6, 7, 10, 14, 15], "55": [3, 6, 15, 16], "caution": [3, 8, 10], "move": [3, 7, 9, 11, 12, 14, 15], "likelihood": 3, "unlucki": [3, 4], "stumbl": 3, "risk": [3, 11], "suffer": 3, "turn": [3, 4, 7, 10, 11, 12, 16], "smaller": [3, 11, 12, 15], "irrelevant3": 3, "full": [3, 6, 7, 10, 12, 14, 15, 16], "cancer_subset": 3, "sequentialfeatureselector": 3, "tri": [3, 4, 11, 12, 15], "flexibl": [3, 8, 12, 16], "resort": 3, "loop": [3, 16], "flow": 3, "mckinnei": [3, 10, 15, 16], "2012": [3, 7, 10, 15, 16], "n_total": 3, "check": [3, 7, 9, 10, 14, 15, 16], "j": [3, 7, 10], "len": [3, 10], "accuracy_dict": 3, "selected_predictor": 3, "empti": [3, 8, 14], "n_job": 3, "best_set": 3, "argmax": 3, "append": [3, 10, 15, 16], "join": [3, 10, 14], "del": [3, 15], "891103": 3, "917450": 3, "931454": 3, "926253": 3, "906955": 3, "exhibit": [3, 8], "fluctuat": [3, 11], "attempt": [3, 4, 15], "account": [3, 13, 14], "chanc": [3, 6, 13], "elbow": [3, 4], "successfulli": [3, 8, 10, 14], "judgement": 3, "excel": [3, 7, 12, 14], "tutori": [3, 8, 10, 12], "go": [3, 6, 7, 9, 10, 12, 13, 15], "jame": [3, 4, 10, 12], "great": [3, 4, 6, 7, 8, 10, 12, 14, 15], "naiv": 3, "bay": 3, "goe": [3, 7, 8, 10, 12], "popular": [3, 4, 10, 12, 14], "bkm67": 3, "martin": 3, "lansdown": 3, "mauric": 3, "georg": 3, "kendal": 3, "david": [3, 6], "mann": 3, "discard": 3, "multivari": 3, "biometrika": 3, "366": 3, "ds66": 3, "norman": 3, "harri": 3, "wilei": [3, 15], "efo66": 3, "stepwis": 3, "backward": 3, "eastern": 3, "meet": 3, "hl67": 3, "ronald": 3, "technometr": 3, "531": 3, "540": 3, "jwht13": [3, 4, 12], "gareth": [3, 4, 12], "daniela": [3, 4, 12], "witten": [3, 4, 12], "hasti": [3, 4, 12], "tibshirani": [3, 4, 12], "springer": [3, 4, 12, 15], "1st": [3, 4, 12], "edit": [3, 4, 12, 13, 15], "www": [3, 4, 7, 10, 12], "statlearn": [3, 4, 12], "com": [3, 4, 6, 10, 12, 13, 14, 15], "mck12": [3, 10, 15, 16], "ipython": [3, 10, 13, 15, 16], "o": [3, 7, 10, 13, 15, 16], "reilli": [3, 10, 15, 16], "media": [3, 8, 10, 15, 16], "inc": [3, 6, 10, 15, 16], "subgroup": [4, 7, 15, 16], "predict": [4, 7, 9, 11, 12, 15], "differenti": 4, "classif": [4, 7, 9, 11, 12], "variabl": [4, 6, 7, 8, 10, 11, 12, 15, 16], "scikit": [4, 11, 12], "colour": 4, "kmean": 4, "set": [4, 6, 8, 9, 10, 12, 14, 16], "genet": [4, 15], "ancestr": 4, "subpopul": 4, "onlin": [4, 6, 10, 13, 14, 15], "custom": [4, 15], "uncov": [4, 8, 15], "fundament": [4, 6, 7, 15], "supervis": 4, "unsupervis": 4, "imposs": [4, 6], "articl": [4, 7], "wikipedia": [4, 10], "evalu": [4, 6, 7, 12, 15], "test": [4, 6, 12, 13], "good": [4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "ascertain": 4, "rigor": [4, 11], "lloyd": 4, "1982": 4, "hierarch": 4, "princip": 4, "compon": [4, 7], "multidimension": 4, "semisupervis": 4, "goal": [4, 7, 11, 15, 16], "benefici": [4, 10], "unlabel": [4, 7], "willing": [4, 6], "seed": [4, 6, 11, 12], "palmerpenguin": 4, "horst": 4, "2020": [4, 6, 7, 15], "kristen": 4, "gorman": 4, "palmer": 4, "station": [4, 15], "antarctica": [4, 15], "ecolog": 4, "site": [4, 5, 10], "adult": 4, "penguin": 4, "2014": [4, 16], "bill": 4, "flipper": 4, "millimet": 4, "distinct": [4, 8, 15], "speci": 4, "discoveri": [4, 12], "gentoo": 4, "bill_length_mm": 4, "flipper_length_mm": 4, "39": [4, 14, 15], "196": [4, 6, 7, 11], "182": 4, "187": [4, 6, 11], "190": [4, 11, 16], "195": [4, 7, 16], "193": [4, 11], "213": [4, 7, 10, 15, 16], "215": [4, 16], "220": [4, 16], "49": [4, 14, 15], "208": 4, "52": [4, 14], "197": 4, "189": [4, 6], "penguins_standard": 4, "bill_length_standard": 4, "flipper_length_standard": 4, "641361": 4, "189773": 4, "144917": 4, "328412": 4, "517922": 4, "921755": 4, "107617": 4, "846513": 4, "409743": 4, "677761": 4, "238168": 4, "271104": 4, "902464": 4, "433767": 4, "720106": 4, "192860": 4, "645505": 4, "355522": 4, "962559": 4, "440353": 4, "762179": 4, "205012": 4, "111528": 4, "123299": 4, "786203": 4, "626855": 4, "757407": 4, "783170": 4, "108442": 4, "776057": 4, "759092": 4, "subtyp": 4, "scatter_plot": 4, "meaningless": 4, "etc": [4, 6, 7, 10, 14, 15, 16], "adjust": [4, 15], "sum": [4, 11, 16], "wssd": 4, "intertia": 4, "mu_x": 4, "mu_i": 4, "x_1": 4, "x_2": 4, "x_3": 4, "x_4": 4, "y_1": 4, "y_2": 4, "y_3": 4, "y_4": 4, "35": [4, 7, 14, 15, 16], "far": [4, 12, 14, 15, 16], "variant": 4, "minim": [4, 11, 12, 15], "reassign": 4, "longer": [4, 7, 16], "outlin": [4, 7, 10, 15, 16], "termin": [4, 13], "fourth": 4, "onward": [4, 10, 13, 15], "guarante": [4, 13], "forev": 4, "logic": [4, 7, 10, 16], "finit": [4, 6, 15], "unlik": [4, 6, 10, 11, 15], "stuck": [4, 8, 16], "solut": [4, 6, 7], "poor": [4, 10], "lowest": [4, 10, 15], "cross": [4, 11, 12], "valid": [4, 9, 10, 11, 12], "subdivid": 4, "merg": [4, 10], "diminish": 4, "reach": [4, 10, 12, 14, 15], "being": [4, 6, 7, 8, 10, 11, 14, 15, 16], "address": [4, 7, 10, 11, 12, 14], "preprocess": [4, 11], "n_cluster": 4, "kmeanskmean": 4, "penguin_clust": 4, "labels_": 4, "altern": [4, 7, 12, 14, 15, 16], "suffix": [4, 15], "nomin": [4, 15], "discret": [4, 15], "cluster_plot": 4, "inertia_": 4, "inertia": 4, "730719092276117": 4, "varieti": [4, 10, 12, 14, 16], "ks": 4, "oper": [4, 6, 7, 10, 13, 14, 15], "safest": 4, "reus": 4, "penguin_clust_k": 4, "000000": 4, "576264": 4, "730719": 4, "343613": 4, "362131": 4, "678383": 4, "293320": 4, "975016": 4, "785232": 4, "elbow_plot": 4, "bump": [4, 15], "prevent": [4, 7, 8, 10, 15, 16], "n_init": 4, "paramet": [4, 6, 10, 11, 12, 15, 16], "realm": 4, "specif": [4, 10, 11, 13, 14, 15], "companion": [4, 10], "pca": 4, "gwf14": 4, "toni": 4, "fraser": 4, "sexual": 4, "dimorph": 4, "commun": [4, 7, 10, 15], "ntarctic": 4, "genu": 4, "emph": 4, "pygosc": 4, "plo": [4, 14], "ONE": 4, "hhg20": 4, "allison": 4, "alison": 4, "hill": [4, 10], "archipelago": 4, "allisonhorst": 4, "io": [4, 7, 10, 15], "llo82": 4, "stuart": 4, "quantiz": 4, "pcm": 4, "129": 4, "137": [4, 6, 7, 12], "releas": [4, 10], "bell": [4, 6], "telephon": 4, "paper": [4, 15], "1957": 4, "web": [5, 8, 14], "navig": [5, 7, 8, 10, 13, 14], "mobil": 5, "devic": [5, 10], "menu": [5, 7, 8, 13], "datasciencebook": [5, 9, 10, 13], "ca": [5, 7, 9, 10, 11, 12, 13], "licens": 5, "creativ": 5, "noncommerci": 5, "sharealik": 5, "popul": [6, 7, 10, 15, 16], "extend": [6, 12, 15], "inferenti": [6, 7, 9, 11, 15], "interv": 6, "approxim": 6, "broader": 6, "retail": 6, "sell": 6, "iphon": 6, "accessori": 6, "market": [6, 11, 12], "strateg": 6, "product": [6, 7, 14], "north": [6, 10, 15], "american": [6, 7, 10], "colleg": 6, "campus": 6, "america": [6, 15], "owner": [6, 10, 12], "characterist": [6, 7, 10, 15, 16], "costli": 6, "taken": [6, 7, 11, 14, 15], "canada": [6, 7, 10, 15, 16], "apart": [6, 10, 15], "rent": 6, "budget": [6, 11], "studio": 6, "rental": [6, 10], "price": [6, 10, 11, 12], "month": [6, 14, 15], "monthli": 6, "airbnb": 6, "cox": 6, "marketplac": 6, "vacat": 6, "septemb": [6, 15], "neighborhood": 6, "room": 6, "accommod": 6, "bathroom": 6, "bedroom": [6, 10, 11, 12], "bed": [6, 11, 12], "night": 6, "neighbourhood": 6, "room_typ": 6, "downtown": 6, "home": [6, 7, 10, 11, 12, 13, 15, 16], "apt": [6, 13], "bath": [6, 11], "150": [6, 12, 15], "eastsid": 6, "west": 6, "kensington": 6, "cedar": 6, "cottag": 6, "146": [6, 12], "110": 6, "4589": 6, "4590": 6, "4591": 6, "oakridg": 6, "privat": [6, 10, 14], "4592": 6, "dunbar": 6, "southland": 6, "share": [6, 8, 10, 14, 15, 16], "29": [6, 14, 15, 16], "4593": 6, "145": 6, "4594": 6, "shaughnessi": 6, "citi": [6, 7, 10, 11, 16], "plan": [6, 14], "bylaw": 6, "747497": 6, "246408": 6, "005224": 6, "hotel": 6, "000871": 6, "747": 6, "155": [6, 16], "725": 6, "250": [6, 11, 16], "025": 6, "625": 6, "350": [6, 11, 12, 16], "confirm": [6, 14, 15], "histogram": 6, "000": [6, 7, 10, 11, 12, 15], "20_000": 6, "605": 6, "606": 6, "marpol": 6, "4579": 6, "4580": 6, "160": [6, 11], "1739": 6, "1740": 6, "151": [6, 7, 15], "3904": 6, "3905": 6, "185": [6, 16], "1596": 6, "1597": 6, "kitsilano": 6, "3060": 6, "3061": 6, "hast": 6, "sunris": 6, "78": 6, "19999": 6, "527": 6, "528": 6, "1587": 6, "1588": 6, "169": 6, "3860": 6, "3861": 6, "2747": 6, "2748": 6, "285": 6, "800000": 6, "0000": 6, "999": 6, "queri": [6, 10], "qualifi": 6, "750": [6, 15], "775": 6, "225": [6, 10], "19998": 6, "700": [6, 16], "275": 6, "44552": 6, "reset_index": [6, 16], "caveat": [6, 15, 16], "twice": [6, 12], "sample_proport": 6, "44547": 6, "44548": 6, "44549": 6, "44550": 6, "44551": 6, "sample_estim": 6, "675": 6, "44541": 6, "19995": 6, "44543": 6, "19996": 6, "44545": 6, "19997": 6, "20000": 6, "mind": [6, 7, 10, 14], "sampling_distribut": 6, "mark_bar": [6, 7, 15], "bin": [6, 15], "maxbin": [6, 15], "symmetr": 6, "peak": [6, 15], "74848375": 6, "748": [6, 11], "neither": [6, 11, 15], "nor": [6, 8, 12], "underestim": 6, "tendenc": 6, "travel": 6, "wish": [6, 7, 14], "overpr": [6, 11], "population_distribut": 6, "skew": 6, "tail": [6, 10], "154": 6, "5109773617762": 6, "one_sampl": 6, "sample_distribut": 6, "153": 6, "48225": 6, "48": [6, 7, 14, 15], "wouldn": [6, 14], "alreadi": [6, 7, 8, 9, 10, 11, 12, 13, 15, 16], "mean_pric": 6, "148": 6, "56075": 6, "165": [6, 16], "50500": 6, "93925": 6, "139": 6, "14650": 6, "198": 6, "50000": 6, "192": 6, "66425": 6, "144": 6, "88600": 6, "08800": 6, "156": 6, "25000": 6, "170": 6, "mean_of_sample_mean": 6, "sample_mean": 6, "disappear": 6, "thumb": [6, 15], "emphasi": 6, "saw": [6, 10, 16], "significantli": [6, 7, 8, 12, 15, 16], "notion": [6, 11], "pretend": 6, "clever": 6, "drawn": [6, 12, 15], "median": [6, 15, 16], "slope": [6, 12], "displai": [6, 7, 8, 10, 12, 14, 15, 16], "4025": 6, "4026": 6, "renfrew": 6, "collingwood": 6, "1977": [6, 15], "1978": 6, "fairview": 6, "70": [6, 10, 15, 16], "4008": 6, "4009": 6, "269": [6, 15], "1543": 6, "1544": 6, "320": 6, "3350": 6, "3351": 6, "804": 6, "805": 6, "mount": 6, "pleasant": 6, "2286": 6, "2287": 6, "105": [6, 7, 10, 15, 16], "1010": 6, "1011": 6, "strathcona": 6, "120": [6, 7, 10, 16], "1878": 6, "1879": [6, 15], "175": 6, "1644": 6, "1645": 6, "2771": 6, "2772": 6, "4151": 6, "4152": 6, "289": 6, "4495": 6, "4496": 6, "rilei": 6, "park": [6, 15], "115": 6, "1308": 6, "1309": 6, "2246": 6, "2247": 6, "2335": 6, "2336": 6, "4059": 6, "4060": 6, "1280": 6, "1281": 6, "4324": 6, "4325": 6, "3403": 6, "3404": 6, "arbutu": 6, "ridg": 6, "664": 6, "1729": 6, "1730": 6, "93": [6, 15], "3722": 6, "3723": 6, "241": 6, "242": 6, "3955": 6, "3956": 6, "1042": 6, "1043": 6, "649": 6, "650": [6, 15], "sunset": 6, "1995": [6, 15], "1996": 6, "363": 6, "364": 6, "1783": 6, "1784": 6, "806": 6, "254": 6, "255": 6, "3365": 6, "3366": 6, "4562": 6, "4563": 6, "64": [6, 10, 11, 13], "2124": 6, "2125": 6, "200": [6, 7, 10, 11, 15], "1997": 6, "1998": 6, "257": 6, "4329": 6, "4330": [6, 16], "3408": 6, "3409": 6, "635": 6, "636": 6, "grandview": 6, "woodland": 6, "103": [6, 16], "one_sample_dist": 6, "boot1": 6, "boot1_dist": 6, "ident": [6, 7, 10], "mimic": 6, "break": [6, 10, 11, 12], "boot20000": 6, "six": [6, 7, 9, 11, 15, 16], "six_bootstrap_sampl": 6, "height": [6, 12, 15], "facet": [6, 15], "67175": 6, "42500": 6, "149": [6, 7, 12], "35000": 6, "13225": 6, "179": [6, 7], "79675": 6, "188": 6, "28225": 6, "boot20000_mean": 6, "159": 6, "29675": 6, "136": [6, 12], "55725": 6, "161": 6, "93950": 6, "22500": 6, "boot_est_dist": 6, "resampl": 6, "repeatedli": 6, "percentil": [6, 16], "captur": [6, 10, 12, 15], "narrow": [6, 10, 16], "implic": 6, "comfort": [6, 14], "strict": [6, 7], "unhelp": 6, "life": [6, 7], "deadli": 6, "ascend": [6, 7, 15], "bound": [6, 15], "97": [6, 12, 15], "quantil": 6, "express": [6, 15, 16], "5th": 6, "975": 6, "ci_bound": 6, "121": [6, 11], "607069": 6, "191": [6, 7], "525362": 6, "rule_025": 6, "mark_rul": [6, 11, 15], "f58518": 6, "strokedash": [6, 11, 15], "datum": [6, 15], "width": [6, 15], "text_025": 6, "mark_text": 6, "fontweight": 6, "bold": [6, 8], "dy": 6, "f": [6, 7, 10, 11, 13], "text_975": 6, "rule_975": 6, "finish": [6, 8, 9, 10, 13, 14, 15], "journei": 6, "surfac": [6, 11, 12, 15], "foundat": [6, 7, 10, 12], "openintro": 6, "diez": 6, "2019": [6, 15], "solid": [6, 15], "grasp": 6, "natur": [6, 14, 15, 16], "coxd": 6, "murrai": 6, "insideairbnb": 6, "09": [6, 10, 15], "01": [6, 10, 15, 16], "dccetinkayarb19": 6, "\u00e7": 6, "etinkaya": 6, "rundel": 6, "christoph": 6, "barr": 6, "os": [6, 8], "dirti": 7, "clean": [7, 9, 10], "dig": [7, 10, 16], "jump": [7, 9, 10, 15], "spoken": [7, 15, 16], "resid": [7, 15], "indigen": 7, "cultur": 7, "anywher": [7, 8], "2018": [7, 15], "sadli": 7, "colon": [7, 16], "led": [7, 15], "loss": 7, "children": 7, "speak": [7, 10, 15, 16], "mother": [7, 15, 16], "tongu": [7, 15, 16], "childhood": 7, "residenti": [7, 11], "discov": 7, "act": [7, 14, 15, 16], "harm": 7, "endang": 7, "geograph": 7, "walker": 7, "2017": [7, 14], "came": [7, 11, 15], "aborigin": [7, 10, 15, 16], "truth": 7, "reconcili": 7, "commiss": 7, "action": 7, "2015": 7, "canlang": [7, 10, 15], "2016": [7, 10, 15, 16], "censu": [7, 10, 15, 16], "214": [7, 10, 15, 16], "offici": [7, 10, 15, 16], "mother_tongu": [7, 10, 15, 16], "expos": 7, "birth": 7, "most_at_hom": [7, 10, 15, 16], "most_at_work": [7, 10, 15, 16], "lang_known": [7, 10, 15, 16], "accord": [7, 10, 15, 16], "deep": [7, 12], "simplifi": [7, 10, 16], "concentr": [7, 15], "expertis": 7, "bias": 7, "aim": [7, 9, 15], "causal": [7, 11, 15], "mechanist": [7, 15], "leek": 7, "matsui": 7, "earli": [7, 9], "live": [7, 10, 15], "provinc": [7, 10], "territori": 7, "propos": 7, "hypothes": [7, 15], "polit": 7, "parti": 7, "wealth": [7, 15], "elect": 7, "quantif": 7, "factor": [7, 15], "mechan": [7, 10, 11], "pertain": [7, 15, 16], "occasion": [7, 13, 16], "race": [7, 11, 12], "runner": 7, "regularli": [7, 8], "graphic": [7, 8, 10, 13, 14, 15], "ag": 7, "old": [7, 10, 14], "50kg": 7, "cluster": [7, 9, 15], "bought": 7, "amazon": 7, "cellphon": 7, "ownership": 7, "android": 7, "phone": 7, "essenc": 7, "spreadsheet": [7, 10], "microsoft": 7, "rectangular": 7, "primarili": [7, 11, 14, 15], "voter": 7, "affili": 7, "comma": [7, 8, 11, 16], "short": [7, 10, 15], "save": [7, 10, 13, 14], "googl": [7, 10], "sheet": [7, 10], "can_lang": [7, 10, 15, 16], "plain": [7, 8, 14], "editor": [7, 8, 10, 14], "notepad": 7, "590": [7, 10, 15], "235": [7, 10, 15, 16], "665": [7, 10, 15], "afrikaan": [7, 10, 15, 16], "10260": [7, 10, 15], "4785": [7, 10, 15], "23415": [7, 10, 15], "afro": [7, 10, 15, 16], "asiat": [7, 10, 15, 16], "1150": [7, 10, 15], "44": [7, 10, 14, 15], "akan": [7, 10, 15, 16], "twi": [7, 10, 15, 16], "13460": [7, 10, 15], "5985": [7, 10, 15], "22150": [7, 10, 15], "albanian": [7, 10, 15, 16], "26895": [7, 10, 15], "13135": [7, 10, 15], "345": [7, 10, 15], "31930": [7, 10, 15], "algonquian": [7, 10, 16], "algonquin": [7, 10, 16], "1260": [7, 10], "370": [7, 10, 16], "2480": [7, 10], "sign": [7, 10, 11, 14, 15], "2685": [7, 10], "3020": [7, 10], "1145": [7, 10], "amhar": [7, 10], "22465": [7, 10], "12785": [7, 10], "33670": [7, 10], "instal": [7, 8, 9, 10, 13], "team": [7, 14], "es": 7, "innei": 7, "2010": 7, "command": [7, 8, 10, 13], "shorter": [7, 8, 10, 14, 15], "alia": [7, 8], "gave": [7, 10], "harder": [7, 15, 16], "quot": [7, 10], "letter": [7, 13, 14], "distinguish": [7, 15], "satisfi": [7, 10], "syntax": [7, 10, 14, 16], "amp": [7, 10, 15, 16], "445": [7, 10, 15, 16], "2775": [7, 10, 15], "209": [7, 10, 15, 16], "wolof": [7, 10, 15, 16], "3990": [7, 10, 15], "1385": [7, 10, 15], "8240": [7, 10, 15], "210": [7, 10, 12, 15, 16], "wood": [7, 10, 15, 16], "cree": [7, 10, 15, 16], "1840": [7, 10, 15], "800": [7, 10, 15], "2665": [7, 10, 15], "211": [7, 10, 11, 15, 16], "wu": [7, 10, 15, 16], "shanghaines": [7, 10, 15, 16], "12915": [7, 10, 15], "7650": [7, 10, 15], "16530": [7, 10, 15], "yiddish": [7, 10, 15, 16], "13555": [7, 10, 15], "7085": [7, 10, 15], "895": [7, 10, 15], "20985": [7, 10, 15], "yoruba": [7, 10, 15, 16], "9080": [7, 10, 15], "2615": [7, 10, 15], "22415": [7, 10, 15], "screen": [7, 8, 10], "symbol": [7, 13, 15, 16], "string": [7, 10, 14, 15, 16], "my_numb": 7, "alic": 7, "formal": 7, "_": [7, 8, 15, 16], "won": [7, 10, 12, 14, 16], "complain": 7, "my": [7, 8], "syntaxerror": 7, "mayb": [7, 10], "meant": 7, "convent": [7, 8, 14], "lowercas": [7, 14], "language_data": 7, "pep": 7, "guido": 7, "van": 7, "rossum": 7, "2001": 7, "minut": [7, 8, 12, 15], "underneath": [7, 8], "ve": [7, 10, 14], "largest": [7, 10, 15, 16], "sophist": 7, "restrict": [7, 12, 16], "bracket": [7, 8, 11, 16], "statement": [7, 10, 16], "written": [7, 8, 10, 14], "doubl": [7, 8, 9, 13, 15, 16], "athabaskan": [7, 10, 16], "atikamekw": [7, 10, 16], "6150": [7, 10], "5465": 7, "1100": 7, "6645": 7, "thompson": [7, 10], "ntlakapamux": [7, 10], "335": [7, 10], "450": 7, "tlingit": [7, 10], "260": 7, "tsimshian": [7, 10], "410": 7, "206": 7, "wakashan": [7, 10], "67": [7, 10, 11, 15], "aboriginal_lang": 7, "alias": 7, "wrote": 7, "terminolog": 7, "obj": 7, "programm": 7, "confus": [7, 10, 16], "appar": 7, "rescu": 7, "selected_lang": 7, "descend": [7, 15], "decend": 7, "arranged_lang": 7, "64050": 7, "inuktitut": 7, "35210": 7, "138": 7, "ojibwai": 7, "17885": 7, "oji": 7, "12855": 7, "dene": 7, "10700": 7, "32": [7, 14, 15, 16], "cayuga": 7, "squamish": 7, "iroquoian": 7, "ten_lang": 7, "125": [7, 16], "montagnai": 7, "innu": 7, "10235": 7, "119": 7, "mi": [7, 15], "kmaq": 7, "6690": 7, "3065": 7, "180": 7, "stonei": 7, "3025": 7, "becam": 7, "curiou": 7, "728": [7, 15], "canadian_popul": [7, 15], "overwrit": 7, "opt": [7, 10, 11], "mother_tongue_perc": [7, 15], "35_151_728": [7, 15], "35151728": 7, "latter": [7, 11], "clearer": [7, 15], "182210": 7, "100166": 7, "050879": 7, "036570": 7, "030439": 7, "029117": 7, "019032": 7, "017496": 7, "008719": 7, "008606": 7, "ten_lang_perc": 7, "008": 7, "temporari": [7, 14, 16], "arranged_lang_sort": 7, "trace": [7, 8], "split": [7, 11, 12, 15], "rewrit": 7, "unwieldi": 7, "parenthesi": 7, "demonstr": [7, 10, 11, 12, 15, 16], "cleaner": 7, "messi": [7, 14, 16], "pars": [7, 10, 15], "block": [7, 10], "piec": 7, "period": [7, 8, 10, 15], "Not": [7, 16], "feed": 7, "redo": 7, "overwhelm": 7, "debug": 7, "midwai": 7, "audienc": [7, 8, 14, 15], "difficulti": 7, "scrutin": 7, "speaker": [7, 15, 16], "convei": [7, 15], "understood": 7, "tidi": 7, "shortli": 7, "ax": [7, 15], "mark": [7, 10, 14, 15], "channel": [7, 10, 11, 14, 15], "barplot_mother_tongu": 7, "refin": [7, 10], "quotat": [7, 10], "modif": [7, 16], "tackl": 7, "rotat": 7, "swap": [7, 15], "barplot_mother_tongue_axi": 7, "forward": [7, 10, 11], "suit": [7, 15, 16], "alphabet": [7, 15, 16], "reorder": 7, "ordered_barplot_mother_tongu": 7, "swampi": 7, "elsewher": [7, 10], "moos": 7, "northern": 7, "east": 7, "southern": 7, "comment": [7, 14], "hash": [7, 14], "importantli": 7, "self": [7, 10], "habit": [7, 11], "highli": [7, 14], "got": 7, "tast": 7, "ten_lang_plot": 7, "nobodi": 7, "pull": [7, 10, 13], "forgotten": [7, 14], "pop": [7, 8, 10], "slowli": 7, "adept": 7, "remind": [7, 16], "lab": [7, 13], "lookup": 7, "concis": 7, "press": [7, 8], "tab": [7, 8, 10, 13, 14], "bring": [7, 10], "typo": 7, "hold": [7, 10, 15, 16], "dialogu": 7, "dialog": [7, 14], "contextu": 7, "gvr01": 7, "coghlan": 7, "barri": [7, 16], "warsaw": 7, "style": [7, 10], "0008": 7, "lp15": 7, "jeffrei": [7, 15], "347": 7, "6228": 7, "1314": 7, "1315": 7, "pm15": 7, "elizabeth": 7, "art": [7, 15], "anyon": [7, 8, 10, 14], "skybrud": 7, "consult": [7, 10, 14], "llc": 7, "bookdown": 7, "rdpeng": 7, "artofdatasci": 7, "tim20": [7, 15], "ttimber": [7, 10, 15], "wal17": 7, "anada": 7, "canadiangeograph": 7, "wil18": 7, "kori": 7, "bccampu": 7, "opentextbc": 7, "indigenizationfound": 7, "statisticscanada16a": 7, "www12": 7, "statcan": 7, "gc": 7, "recens": 7, "dp": 7, "eng": 7, "cfm": 7, "statisticscanada16b": 7, "borigin": 7, "irst": 7, "ation": 7, "\u00e9ti": 7, "nuit": 7, "sa": 7, "2016022": 7, "x2016022": 7, "statisticscanada18": 7, "evolut": 7, "1901": 7, "www150": 7, "n1": 7, "pub": 7, "630": 7, "x2018001": 7, "htm": 7, "thepdteam20": 7, "dev": 7, "februari": 7, "doi": [7, 15], "5281": 7, "zenodo": 7, "3509134": 7, "trutharcocanada12": 7, "public": [7, 14], "govern": 7, "servic": [7, 10, 14], "trutharcocanada15": 7, "ction": 7, "www2": 7, "gov": [7, 10, 15], "asset": 7, "columbian": 7, "calls_to_action_english2": 7, "pdf": [7, 15], "wesmckinney10": 7, "ata": 7, "tructur": 7, "tatist": 7, "omput": 7, "p": [7, 10, 13], "ython": 7, "t\u00e9fan": 7, "der": 7, "arrod": 7, "illman": 7, "roceed": 7, "9th": 7, "cienc": 7, "onfer": 7, "25080": 7, "majora": 7, "92bf1922": 7, "00a": 7, "interleav": 8, "narrat": 8, "platform": [8, 14], "interfac": [8, 13, 14], "dress": 8, "morn": 8, "configur": [8, 9, 13, 14], "mix": [8, 16], "formatt": 8, "artifact": 8, "analyz": [8, 9, 10, 16], "realiti": [8, 12], "consciou": [8, 14], "screenshot": 8, "easiest": [8, 13], "jupyterhub": [8, 14], "provis": 8, "authent": [8, 14], "gain": [8, 10], "instructor": [8, 9], "refer": 8, "independ": [8, 9, 15], "entireti": 8, "activ": [8, 10], "cursor": 8, "rectangl": [8, 15], "toolbar": [8, 10], "keyboard": [8, 14], "enter": [8, 10, 13, 14, 15], "arrow": [8, 14], "restart": [8, 13], "bar": [8, 10, 12, 13], "slight": [8, 11], "session": [8, 13, 14], "delet": [8, 13, 14], "emul": 8, "window": [8, 10], "statu": 8, "idl": 8, "busi": 8, "excess": 8, "unrespons": 8, "lose": 8, "connect": [8, 10, 12, 13, 14, 15], "interrupt": 8, "paus": 8, "server": [8, 10, 14], "hub": 8, "panel": 8, "shut": [8, 13], "rich": [8, 14], "italic": 8, "bullet": [8, 10], "eventu": [8, 10, 15], "unformat": 8, "unrend": 8, "box": [8, 11, 12, 13, 14], "progress": [8, 13], "autosav": 8, "disk": [8, 10], "icon": [8, 10, 13, 14], "mac": 8, "arbitrari": [8, 15], "downsid": [8, 13], "nonlinear": [8, 12, 15], "deliber": [8, 14], "referenc": 8, "unconvent": 8, "fail": 8, "nonfunct": 8, "scenario": [8, 10], "event": [8, 14], "guard": 8, "awar": [8, 14], "sooner": 8, "linearli": [8, 12], "suffici": [8, 15], "extern": [8, 14], "heavili": 8, "loc": [8, 15], "qualiti": [8, 11, 12], "package_nam": 8, "pn": 8, "librari": [8, 10, 15], "hidden": [8, 10], "delimit": 8, "ipynb": [8, 10, 14], "shareabl": 8, "firefox": 8, "safari": 8, "chrome": 8, "edg": 8, "adob": 8, "acrobat": 8, "benefit": [8, 10, 14, 16], "standalon": 8, "font": [8, 10, 15], "launcher": 8, "visibl": [8, 14, 15], "untitl": 8, "white": 8, "troublesom": [8, 14], "repetit": 8, "dash": [8, 15], "jupyterlab": 8, "keen": 8, "commonmark": 8, "cheatsheet": 8, "audit": 9, "friend": 9, "colleagu": 9, "histori": [9, 14], "chapter": 9, "spend": [9, 10, 11, 16], "restructur": 9, "usabl": 9, "coher": 9, "variou": [10, 13, 16], "laptop": [10, 14], "gatewai": 10, "unless": [10, 13, 15], "upfront": [10, 16], "devot": 10, "shoelac": 10, "trip": 10, "u": [10, 13, 15], "niform": 10, "esourc": 10, "ocat": 10, "filepath_or_buff": 10, "skiprow": 10, "ibi": 10, "list_tabl": 10, "to_csv": 10, "pplicat": 10, "rogram": 10, "nterfac": 10, "internet": [10, 13], "astronomi": 10, "pictur": [10, 15], "request": [10, 16], "remot": 10, "directori": [10, 13, 14, 15], "filesystem": 10, "folder": [10, 13, 14], "worksheet_02": 10, "happiness_report": 10, "slash": [10, 16], "proce": [10, 13, 14, 16], "happy_data": 10, "bike_shar": 10, "tutorial_01": 10, "silli": [10, 12], "redund": [10, 15], "whew": 10, "bonu": 10, "fatima": 10, "jayden": 10, "usernam": [10, 14], "link": [10, 13, 14], "video": [10, 13], "omma": 10, "epar": 10, "v": [10, 13], "alu": 10, "aren": [10, 15, 16], "canadian": [10, 16], "canlang_data": 10, "oftentim": [10, 16], "sentenc": 10, "paragraph": [10, 15], "scientist": 10, "distribut": [10, 14, 15], "permiss": [10, 14], "21930": 10, "parsererror": 10, "messag": [10, 13, 14, 15, 16], "wasn": [10, 15], "can_lang_meta": 10, "token": 10, "didn": [10, 16], "tsv": 10, "escap": 10, "backslash": 10, "can_lang_no_nam": 10, "curli": [10, 16], "brace": 10, "col_map": 10, "canlang_data_renam": 10, "immedi": [10, 12], "raw": [10, 13, 15, 16], "githubusercont": [10, 13], "datasci": 10, "whichev": 10, "xlsx": 10, "snippet": [10, 14], "_rel": 10, "j1": 10, "w8": 10, "qrj": 10, "tf": 10, "wz": 10, "hlio": 10, "8f": 10, "3wn": 10, "ed2": 10, "gz": 10, "_r": 10, "yg": 10, "tuee": 10, "6q": 10, "rzy": 10, "l60": 10, "xtp": 10, "4vt": 10, "jq": 10, "sheet_nam": 10, "sad": 10, "usecol": 10, "beforehand": 10, "libr": 10, "offic": 10, "semicolon": 10, "decim": [10, 15, 16], "european": 10, "countri": 10, "storag": 10, "user": [10, 13, 14], "manag": [10, 13, 14], "mysql": 10, "oracl": 10, "sql": 10, "simplest": [10, 15], "db": 10, "backend": 10, "send": [10, 14], "sqlalchemi": 10, "matur": 10, "deeper": 10, "friendlier": 10, "conn": 10, "retriev": [10, 11, 14, 16], "secretli": 10, "behind": [10, 14, 15], "scene": [10, 14], "canlang_t": 10, "databaset": 10, "r0": 10, "countstar": 10, "haven": [10, 13], "sent": [10, 14], "effici": [10, 12, 14, 15], "lazi": 10, "compil": 10, "str": 10, "AS": 10, "nfrom": 10, "t0": 10, "arab": 10, "419890": 10, "223535": 10, "5585": 10, "629055": 10, "mostli": [10, 14, 15, 16], "canlang_table_filt": 10, "predic": 10, "canlang_table_select": 10, "r1": 10, "aboriginal_lang_data": 10, "attributeerror": 10, "traceback": 10, "recent": [10, 13, 14], "conda": [10, 13], "lib": 10, "python3": 10, "expr": 10, "py": [10, 13, 16], "645": 10, "__getattr__": 10, "641": 10, "hint": 10, "common_typo": 10, "642": 10, "rais": [10, 15], "643": 10, "__name__": 10, "644": 10, "tahltan": 10, "crash": 10, "postgr": 10, "client": [10, 11], "host": [10, 13, 14], "localhost": 10, "port": [10, 13], "endpoint": 10, "5432": 10, "password": [10, 14], "can_mov_db": 10, "movi": 10, "fakeserv": 10, "stat": 10, "user0001": 10, "abc123": 10, "theme": [10, 15], "medium": [10, 14], "title_alias": 10, "episod": 10, "names_occup": 10, "occup": 10, "rate": 10, "ratings_t": 10, "alchemyt": 10, "average_r": 10, "num_vot": 10, "avg_rat": 10, "order_bi": 10, "backup": 10, "integr": 10, "secur": [10, 14], "simultan": [10, 14, 16], "conflict": 10, "billion": 10, "daili": 10, "chao": 10, "ensu": 10, "no_official_lang_data": 10, "no_official_languag": 10, "magic": 10, "uncommon": 10, "secret": [10, 14], "somewhat": [10, 12], "thought": [10, 12, 16], "painstak": 10, "gather": [10, 15], "yper": 10, "ext": 10, "arkup": 10, "anguag": 10, "ascad": 10, "tyle": 10, "heet": 10, "webpag": [10, 14], "wherea": [10, 12, 16], "element": [10, 15, 16], "layout": [10, 15], "subsect": 10, "richardson": 10, "2007": 10, "reitz": 10, "2023": 10, "foot": [10, 11, 12], "craiglist": 10, "craigslist": 10, "advertis": [10, 11, 12], "span": 10, "meta": 10, "hous": [10, 11, 12], "1br": 10, "hood": 10, "13768": 10, "108th": 10, "avenu": 10, "maptag": 10, "pid": 10, "6786042973": 10, "banish": 10, "trash": [10, 13], "hide": [10, 15], "post": [10, 14], "unbanish": 10, "href": 10, "restor": 10, "2285": 10, "oof": 10, "date": [10, 14, 15], "keyword": [10, 16], "grab": 10, "complex": [10, 12, 14, 15], "selectorgadget": 10, "cc": 10, "deselect": 10, "pic": 10, "footag": 10, "gadget": 10, "robot": 10, "txt": [10, 14], "cl": 10, "spider": 10, "script": 10, "scraper": 10, "crawler": 10, "explicit": [10, 16], "realist": 10, "disallow": 10, "td": 10, "nth": 10, "child": [10, 12], "largestc": 10, "target": 10, "bs4": 10, "wiki": 10, "en": 10, "parser": 10, "population_nod": 10, "slice": [10, 15, 16], "clariti": [10, 15], "greater_toronto_area": 10, "202": 10, "london": [10, 16], "_ontario": 10, "ontario": 10, "543": 10, "551": 10, "greater_montr": 10, "montreal": [10, 16], "node": 10, "rid": 10, "get_text": 10, "fantast": 10, "albeit": 10, "canada_wiki_t": 10, "metropolitan": [10, 16], "droplevel": 10, "canada_wiki_df": 10, "rank": 10, "unnam": 10, "8_level_1": 10, "9_level_1": 10, "6202225": 10, "543551": 10, "quebec": 10, "4291732": 10, "halifax": [10, 16], "nova": 10, "scotia": 10, "465703": 10, "2642825": 10, "st": [10, 16], "catharin": [10, 16], "niagara": [10, 16], "433604": 10, "ottawa": [10, 16], "gatineau": [10, 16], "1488307": 10, "windsor": [10, 16], "422630": 10, "calgari": [10, 16], "1481806": 10, "oshawa": 10, "415311": 10, "edmonton": [10, 16], "1418118": 10, "victoria": [10, 15, 16], "397237": 10, "839311": 10, "saskatoon": 10, "saskatchewan": 10, "317480": 10, "winnipeg": [10, 16], "manitoba": 10, "834678": 10, "regina": [10, 16], "249217": 10, "hamilton": 10, "785184": 10, "sherbrook": 10, "227398": 10, "kitchen": [10, 16], "cambridg": [10, 16], "waterloo": [10, 16], "575847": 10, "kelowna": [10, 16], "222162": 10, "desktop": 10, "stun": 10, "rho": 10, "ophiuchi": 10, "juli": 10, "webb": 10, "telescop": 10, "nircam": 10, "molecular": [10, 15], "signup": 10, "safe": [10, 14], "transfer": [10, 11], "infinit": 10, "bandwidth": 10, "frequent": [10, 14], "success": [10, 14], "bog": 10, "revok": 10, "grant": 10, "quota": 10, "overrun": 10, "abid": 10, "hourli": 10, "hour": [10, 11], "planetari": 10, "apod": 10, "api_kei": 10, "your_api_kei": 10, "07": [10, 15], "explan": [10, 15], "mere": 10, "390": 10, "light": 10, "sun": [10, 15], "star": 10, "planet": 10, "peer": 10, "natal": 10, "infrar": 10, "spectacular": 10, "cosmic": 10, "snapshot": [10, 13, 14], "celebr": 10, "young": 10, "brighter": 10, "clearli": [10, 15], "sport": 10, "diffract": 10, "spike": 10, "jet": 10, "shock": 10, "hydrogen": 10, "blast": 10, "newborn": 10, "yellowish": 10, "dusti": 10, "caviti": 10, "carv": 10, "energet": 10, "Near": 10, "shadow": 10, "cast": 10, "protoplanetari": 10, "hdurl": 10, "2307": 10, "stsci": 10, "01_rhooph": 10, "png": [10, 15], "media_typ": 10, "service_vers": 10, "v1": 10, "01_rhooph1024": 10, "neat": 10, "json": 10, "javascript": 10, "notat": [10, 16], "nasa_data_singl": 10, "start_dat": 10, "end_dat": 10, "nasa_data": 10, "74": [10, 15], "copyright": 10, "data_dict": 10, "nasa_df": 10, "carina": 10, "nebula": 10, "ncarlo": 10, "taylor": 10, "2305": 10, "carnorth": 10, "flat": [10, 11, 12, 15], "rock": 10, "mar": 10, "nnasa": 10, "njpl": 10, "caltech": 10, "nmsss": 10, "nprocess": 10, "ne": 10, "flatmar": 10, "03": [10, 15, 16], "centauru": 10, "peculiar": 10, "island": 10, "nmarco": 10, "lorenzi": 10, "nangu": 10, "lau": 10, "tommi": 10, "tse": 10, "ntex": 10, "ngc5128_": 10, "galaxi": 10, "famou": 10, "hole": 10, "pia23122": 10, "shackleton": 10, "shadowcam": 10, "shacklet": 10, "69": 10, "doom": 10, "eta": 10, "nesa": 10, "nhubbl": 10, "nlice": 10, "etacarin": 10, "dust": 10, "ngc": 10, "6559": 10, "nadam": 10, "ntelescop": 10, "ngc6559_": 10, "sunspot": 10, "spot": 10, "72": 10, "ring": 10, "spiral": 10, "1398": 10, "ngc1398_": 10, "73": [10, 15], "readili": 10, "heart": 10, "awesom": 10, "udac": 10, "linux": [10, 13], "rthepsfoundation23": 10, "kenneth": 10, "readthedoc": 10, "latest": [10, 13, 14, 16], "ric07": 10, "leonard": 10, "beauti": 10, "soup": 10, "april": [10, 15], "nasaesacsa": 10, "23": [10, 12, 14, 15, 16], "esa": 10, "csa": 10, "pontoppidan": 10, "pagan": 10, "esawebb": 10, "weic2316a": 10, "realtsproject21": 10, "internetlivestat": 10, "faster": [11, 15], "rmspe": [11, 12], "rmse": [11, 12], "vs": [11, 15], "person": [11, 12, 15], "week": 11, "annual": 11, "boston": 11, "marathon": 11, "sale": [11, 12], "spline": 11, "heurist": 11, "932": 11, "estat": [11, 12], "sacramento": [11, 12], "bee": 11, "newspap": 11, "realtor": 11, "zip": [11, 13, 14], "sqft": [11, 12], "latitud": 11, "longitud": 11, "z95838": 11, "836": [11, 16], "59222": 11, "631913": 11, "434879": 11, "z95823": 11, "1167": 11, "68212": 11, "478902": 11, "431028": 11, "z95815": 11, "796": 11, "68880": 11, "618305": 11, "443839": 11, "852": 11, "69307": 11, "616835": 11, "439146": 11, "z95824": 11, "797": 11, "81900": 11, "519470": 11, "435768": 11, "927": 11, "z95829": 11, "2280": 11, "232425": 11, "457679": 11, "359620": 11, "928": [11, 16], "1477": 11, "234000": 11, "499893": 11, "458890": 11, "929": 11, "citrus_height": 11, "z95610": 11, "1216": 11, "235000": 11, "708824": 11, "256803": 11, "930": [11, 15], "elk_grov": 11, "z95758": 11, "1685": 11, "235301": 11, "417000": 11, "397424": 11, "931": 11, "el_dorado_hil": 11, "z95762": 11, "1362": 11, "235738": 11, "655245": 11, "075915": 11, "livabl": 11, "feet": [11, 12], "usd": [11, 12], "unit": [11, 12, 15, 16], "front": [11, 15], "0f": [11, 12], "sold": [11, 12], "former": 11, "dive": 11, "subsampl": 11, "small_sacramento": 11, "pai": 11, "absent": 11, "small_plot": 11, "overlai": 11, "line_df": 11, "2000": 11, "dist": 11, "nearest_neighbor": 11, "298": 11, "1900": 11, "361745": 11, "487409": 11, "461413": 11, "718": 11, "antelop": 11, "z95843": 11, "2160": 11, "290000": 11, "704554": 11, "354753": 11, "rosevil": 11, "z95678": 11, "1744": 11, "326951": 11, "771917": 11, "304439": 11, "256": 11, "z95835": 11, "1718": 11, "250000": 11, "676658": 11, "528128": 11, "282": 11, "rancho_cordova": 11, "z95670": 11, "1671": 11, "175000": 11, "591477": 11, "315340": 11, "329": 11, "280739": 11, "280": [11, 15, 16], "739": 11, "unansw": 11, "abil": [11, 14, 15, 16], "lock": [11, 12], "sacramento_train": [11, 12], "sacramento_test": [11, 12], "limits_": 11, "y_i": 11, "hat": 11, "_i": 11, "th": 11, "forecast": 11, "overshoot": 11, "undershoot": 11, "equat": [11, 12], "kneighborsregressor": [11, 12], "neg_root_mean_squared_error": 11, "kneighborsregressor__n_neighbor": 11, "sacr_pipelin": 11, "sacr_preprocessor": 11, "201": 11, "sacr_gridsearch": 11, "sacr_result": 11, "param_kneighborsregressor__n_neighbor": 11, "111694": 11, "373571": 11, "2670": 11, "504864": 11, "93921": 11, "308372": 11, "2535": 11, "377923": 11, "87509": 11, "079427": 11, "206833": 11, "87326": 11, "061020": 11, "3231": 11, "353419": 11, "87092": 11, "836379": 11, "3699": 11, "135233": 11, "94560": 11, "902424": 11, "4272": 11, "135780": 11, "94820": 11, "737639": 11, "4260": 11, "831380": 11, "95015": 11, "919184": 11, "4258": 11, "119157": 11, "95332": 11, "052943": 11, "4248": 11, "208563": 11, "199": 11, "95592": 11, "418971": 11, "4229": 11, "057508": 11, "moment": [11, 16], "nonneg": 11, "neg_": 11, "convolut": 11, "alright": [11, 15], "101": [11, 16], "minimum": [11, 12, 16], "699": 11, "perfectli": [11, 14, 15], "datapoint": 11, "inflex": 11, "idiosyncrat": 11, "unseen": [11, 12], "mean_squared_error": [11, 12], "y_true": [11, 12], "y_pred": [11, 12], "83825": 11, "17556316577": 11, "825": 11, "232": 11, "neglig": 11, "buyer": 11, "afford": 11, "maximum": [11, 12, 16], "5000": 11, "superimpos": [11, 12], "qualit": [11, 12], "opportun": 11, "sqft_prediction_grid": [11, 12], "arang": 11, "base_plot": 11, "sacr_preds_plot": [11, 12], "best_k_sacr": 11, "ff7f0e": [11, 12], "concern": [11, 12], "incorpor": [11, 16], "plot_b": 11, "moreov": 11, "85886": 11, "018186": 11, "4390": 11, "111887": 11, "886": 11, "rmspe_mult": 11, "81514": 11, "60777252799": 11, "515": 11, "overlaid": [11, 12], "2d": 11, "newli": [11, 14], "character": 12, "conclud": 12, "train": 12, "slower": 12, "confusingli": 12, "undervalu": 12, "beta_0": 12, "beta_1": 12, "cdot": 12, "intercept": [12, 15], "coeffici": 12, "parametr": 12, "push": 12, "happili": 12, "crazi": 12, "shouldn": 12, "600": [12, 15], "276": 12, "027": 12, "plausibl": 12, "linearregress": 12, "linear_model": 12, "coef_": 12, "intercept_": 12, "lm": 12, "786883": 12, "16937": 12, "621919": 12, "hurt": 12, "afterward": [12, 16], "16938": 12, "938": 12, "85898": 12, "4768972745": 12, "898": 12, "tricki": [12, 13], "all_point": 12, "wiggli": 12, "curv": [12, 15], "oscil": [12, 15], "Such": 12, "fare": 12, "extrapol": 12, "obvious": 12, "mlm": 12, "linearregressionlinearregress": 12, "lm_mult_test_rmsp": 12, "83509": 12, "92922019486": 12, "510": 12, "hallmark": 12, "93632036": 12, "16046": 12, "79938106": 12, "47210": 12, "42409095276": 12, "beta_2": 12, "hyperplan": 12, "047": 12, "tune": [12, 15], "collinear": 12, "judg": 12, "unbeknownst": 12, "analyst": 12, "parent": 12, "absurdli": 12, "nevertheless": [12, 15], "subtl": [12, 16], "inaccur": 12, "ever": [12, 14, 16], "384": 12, "ft": 12, "627": 12, "274": 12, "556": 12, "231": 12, "94": 12, "ic": 12, "cream": 12, "flavor": [12, 15], "remark": 12, "homeown": 12, "df": [12, 16], "fulli": [12, 15], "5994": 12, "288853": 12, "1688": 12, "092090": 12, "9859": 12, "021194": 12, "9160": 12, "812375": 12, "6400": 12, "212624": 12, "7341": 12, "333609": 12, "8434": 12, "656970": 12, "3329": 12, "106273": 12, "7170": 12, "311442": 12, "7895": 12, "567003": 12, "cubic": 12, "z": 12, "strong": [12, 15], "magnitud": [12, 15], "leap": 12, "stone": 12, "enjoi": 12, "ventura": 13, "22": [13, 14, 15], "cpu": 13, "english": [13, 15, 16], "virtual": 13, "git": [13, 14], "rightmost": 13, "compress": [13, 15], "unzip": 13, "autograd": 13, "pre": 13, "isol": 13, "interf": 13, "ex": 13, "wizard": 13, "wsl": 13, "hyper": 13, "prompt": [13, 14], "cmd": 13, "admin": 13, "administr": 13, "log": [13, 14, 15], "bio": 13, "hotkei": 13, "esc": 13, "reboot": 13, "familiar": 13, "ubcdsci": 13, "proceed": [13, 16], "dockerfil": 13, "besid": [13, 14], "expand": [13, 14, 16], "textbox": 13, "8888": 13, "volum": 13, "path": [13, 15, 16], "jovyan": 13, "scroll": [13, 14], "127": 13, "troubleshoot": 13, "tip": 13, "dmg": 13, "intel": 13, "processor": 13, "older": 13, "appl": 13, "newer": 13, "drag": [13, 14], "sudo": 13, "certif": 13, "curl": 13, "gnupg": 13, "fssl": 13, "sh": 13, "chmod": 13, "rm": 13, "pwd": 13, "homepag": 13, "bundl": 13, "kernel": 13, "pip": 13, "upgrad": 13, "env": 13, "intro": 13, "yml": 13, "compat": 13, "xcode": 13, "x64": 13, "arm64": 13, "debian": 13, "deb": 13, "dpkg": 13, "jlab": 13, "me": 14, "ago": 14, "holder": 14, "lifespan": 14, "resolv": 14, "revis": 14, "mess": [14, 15], "repercuss": 14, "boggl": 14, "unclear": 14, "document_final_draft_fin": 14, "to_hand_in_final_v2": 14, "polish": 14, "lack": 14, "springboard": 14, "fruit": 14, "revert": 14, "Being": 14, "facilit": 14, "todai": [14, 15], "safeti": 14, "workspac": 14, "schemat": 14, "maintain": 14, "told": 14, "metadata": 14, "brief": 14, "narr": 14, "readm": 14, "md": 14, "draft": 14, "shorten": 14, "daa29d6": 14, "884c7ce": 14, "prerequisit": 14, "stage": 14, "physic": [14, 15], "placehold": 14, "synchron": 14, "eas": 14, "templat": 14, "canadian_languag": 14, "hyphen": 14, "privaci": 14, "happi": 14, "green": [14, 16], "respositori": 14, "reserv": 14, "upload": [14, 15], "toggl": 14, "markdown": 14, "archiv": 14, "defeat": 14, "prove": 14, "beginn": 14, "grain": 14, "expiri": 14, "creation": 14, "absolut": [14, 15], "tick": [14, 15], "repo": 14, "fret": 14, "eda": 14, "flag": 14, "pane": 14, "plu": 14, "untrack": 14, "checkpoint": 14, "state": [14, 15], "datetim": [14, 15], "stamp": 14, "ok": 14, "credenti": 14, "author": 14, "33": [14, 15, 16], "dismiss": 14, "invit": 14, "collaborators_github_user_nam": 14, "refresh": 14, "blend": [14, 15], "offend": 14, "preced": 14, "histor": 14, "float": [14, 16], "app": 14, "convers": [14, 15, 16], "subtop": 14, "persist": 14, "thread": 14, "searchabl": 14, "notif": 14, "repli": 14, "submit": [14, 15], "submiss": 14, "youtub": 14, "advic": 14, "gitlab": 14, "bitbucket": 14, "wbc": 14, "jennif": 14, "bryan": 14, "karen": 14, "cranston": 14, "justin": 14, "kitz": 14, "lex": 14, "nederbragt": 14, "traci": 14, "teal": 14, "subplot": 15, "raster": 15, "svg": 15, "distract": 15, "poster": 15, "wilk": 15, "oft": 15, "pie": 15, "static": 15, "math": 15, "cognit": 15, "mental": 15, "plainli": 15, "legend": 15, "scheme": 15, "surprisingli": 15, "sex": 15, "ancestri": 15, "deeb": 15, "2005": 15, "blind": 15, "reinforc": 15, "sparingli": 15, "detract": 15, "wari": 15, "overplot": 15, "overlap": 15, "zoom": 15, "vegafus": 15, "data_transform": 15, "curat": 15, "pieter": 15, "tan": 15, "noaa": 15, "gml": 15, "ralph": 15, "keel": 15, "scripp": 15, "oceanographi": 15, "dioxid": 15, "hawaii": 15, "1959": 15, "1980": 15, "co2_df": 15, "mauna_loa_data": 15, "parse_d": 15, "date_measur": 15, "ppm": 15, "338": 15, "341": 15, "06": [15, 16], "479": 15, "414": 15, "480": 15, "416": 15, "482": [15, 16], "483": 15, "484": 15, "datetime64": 15, "ns": 15, "iso": 15, "8601": 15, "alphanumer": 15, "mark_": 15, "leverag": 15, "helper": 15, "co2_scatt": 15, "upward": 15, "affirm": 15, "predecessor": 15, "successor": 15, "alter": 15, "segment": 15, "emphas": 15, "co2_lin": 15, "aha": 15, "phenomenon": 15, "fast": 15, "muddl": 15, "settl": 15, "configure_axi": 15, "titlefonts": 15, "co2_line_label": 15, "co2": 15, "configure_": 15, "1990": 15, "clip": 15, "stack": [15, 16], "co2_line_scal": 15, "late": 15, "season": 15, "summer": 15, "octob": 15, "winter": 15, "novemb": 15, "analog": 15, "paint": 15, "blank": 15, "canva": 15, "primer": 15, "akin": 15, "sketch": 15, "durat": 15, "geyser": 15, "yellowston": 15, "nation": 15, "wyom": 15, "79": 15, "333": 15, "283": 15, "533": 15, "267": 15, "117": [15, 16], "268": [15, 16], "270": 15, "817": 15, "271": 15, "467": 15, "faithful_scatt": 15, "faithful_scatter_label": 15, "faithful_scatter_labels_black": 15, "whom": 15, "hollow": 15, "can_lang_plot": 15, "can_lang_plot_label": 15, "bunch": 15, "clump": 15, "french": [15, 16], "460": 15, "850": 15, "19460850": 15, "22162865": 15, "15265335": 15, "29748265": 15, "59": [15, 16], "7166700": 15, "6943800": 15, "3825215": 15, "10242945": 15, "logarithm": 15, "squish": 15, "log_": 15, "log10": 15, "inf": 15, "can_lang_plot_log": 15, "gridlin": 15, "seven": 15, "can_lang_plot_log_revis": 15, "tickcount": 15, "kilo": 15, "mutat": 15, "most_at_home_perc": 15, "001678": 15, "000669": 15, "029188": 15, "013612": 15, "003272": 15, "001266": 15, "038291": 15, "017026": 15, "076511": 15, "037367": 15, "011351": 15, "003940": 15, "005234": 15, "002276": 15, "036741": 15, "021763": 15, "038561": 15, "020155": 15, "025831": 15, "007439": 15, "can_lang_plot_perc": 15, "meaningfulli": 15, "onto": 15, "belong": [15, 16], "can_lang_plot_categori": 15, "laid": 15, "can_lang_plot_legend": 15, "orient": 15, "tableau10": 15, "vision": 15, "unsur": 15, "dark2": 15, "aesthet": 15, "switch": 15, "can_lang_plot_them": 15, "demand": 15, "tooltip": 15, "hover": 15, "mous": 15, "pointer": 15, "can_lang_plot_tooltip": 15, "mile": 15, "mcneil": 15, "contin": 15, "south": 15, "africa": 15, "europ": 15, "asia": 15, "australia": 15, "islands_df": 15, "landmass_typ": 15, "11506": 15, "5500": 15, "16988": 15, "2968": 15, "axel": 15, "heiberg": 15, "baffin": 15, "184": 15, "bank": 15, "borneo": 15, "britain": 15, "celeb": 15, "celon": 15, "cuba": 15, "devon": 15, "ellesmer": 15, "3745": 15, "greenland": 15, "840": 15, "hainan": 15, "hispaniola": 15, "hokkaido": 15, "honshu": 15, "iceland": 15, "ireland": 15, "java": 15, "kyushu": 15, "luzon": 15, "madagascar": 15, "227": 15, "melvil": 15, "mindanao": 15, "molucca": 15, "guinea": 15, "306": 15, "zealand": 15, "newfoundland": 15, "9390": 15, "novaya": 15, "zemlya": 15, "princ": 15, "wale": 15, "sakhalin": 15, "6795": 15, "southampton": 15, "spitsbergen": 15, "sumatra": 15, "183": 15, "taiwan": 15, "tasmania": 15, "tierra": 15, "fuego": 15, "timor": 15, "islands_bar": 15, "nlargest": 15, "tilt": 15, "sort_valu": 15, "islands_top12": 15, "islands_bar_top": 15, "appeal": 15, "minu": 15, "revers": 15, "caption": 15, "slide": 15, "summari": 15, "twelv": 15, "islands_plot_sort": 15, "morlei": 15, "1882": 15, "299": 15, "792": 15, "458": 15, "km": 15, "sec": 15, "kilometr": 15, "morley_df": 15, "expt": 15, "740": 15, "900": 15, "1070": [15, 16], "940": 15, "950": 15, "810": 15, "870": 15, "experiment": 15, "fell": 15, "morley_bar": 15, "thin": 15, "bucket": 15, "morley_hist": 15, "thick": 15, "v_line": 15, "morley_hist_lin": 15, "morley_hist_color": 15, "sit": 15, "transluc": 15, "morley_hist_categor": 15, "deriv": 15, "incorrect": 15, "clearest": 15, "morley_hist_facet": 15, "1050": 15, "foremost": 15, "subtli": 15, "speed_of_light": 15, "299792": 15, "relativeerror": 15, "299000": 15, "019194": 15, "017498": 15, "035872": 15, "092578": 15, "045879": 15, "049215": 15, "052550": 15, "002516": 15, "005851": 15, "025865": 15, "morley_hist_rel": 15, "recreat": 15, "admir": 15, "morley_hist_maxbin": 15, "motiv": 15, "establish": 15, "pose": 15, "wiggl": 15, "discern": 15, "parenthes": [15, 16], "energi": 15, "automot": 15, "plant": 15, "burn": [15, 16], "fossil": 15, "fuel": 15, "greenhous": 15, "gase": 15, "byproduct": 15, "trap": 15, "heat": 15, "warm": 15, "observatori": 15, "amplitud": 15, "growth": 15, "1800": 15, "kilomet": 15, "farthest": 15, "confer": 15, "shop": 15, "billboard": 15, "pixel": 15, "lossi": 15, "lossless": 15, "jpeg": 15, "jpg": 15, "photograph": 15, "bmp": 15, "tiff": 15, "tif": 15, "gimp": 15, "redraw": 15, "ep": 15, "inkscap": 15, "shrink": 15, "portabl": 15, "hardl": 15, "1991": 15, "filenam": 15, "img": 15, "viz": 15, "faithful_plot": 15, "mb": 15, "decent": 15, "bigger": 15, "dee05": 15, "sameer": 15, "clinic": 15, "369": 15, "377": 15, "har91": 15, "wolfgang": 15, "york": 15, "mcn77": 15, "donald": 15, "mic82": 15, "veloc": 15, "nite": 15, "tate": 15, "aval": 15, "cademi": 15, "nnapoli": 15, "astronom": 15, "tk20": 15, "ccgg": 15, "vgh": 15, "jacob": 15, "granger": 15, "heer": 15, "dominik": 15, "moritz": 15, "kanit": 15, "wongsuphasawat": 15, "arvind": 15, "satyanarayan": 15, "eitan": 15, "ilia": 15, "timofeev": 15, "ben": 15, "welsh": 15, "scott": 15, "sievert": 15, "journal": [15, 16], "1057": 15, "21105": 15, "joss": 15, "01057": 15, "wil19": 15, "clau": 15, "clauswilk": 15, "dataviz": 15, "util": 16, "entiti": 16, "tabular": 16, "2235145": 16, "yellow": 16, "abbrevi": 16, "int": 16, "14159": 16, "boolean": 16, "bool": 16, "hello": 16, "nonetyp": 16, "arithmet": 16, "dict": 16, "cities_seri": 16, "separt": 16, "population_in_2016": 16, "1027613": 16, "1823281": 16, "544870": 16, "571146": 16, "321484": 16, "upcom": 16, "population_in_2016_df": 16, "criteria": 16, "wickham": 16, "No": 16, "bespok": 16, "untidi": 16, "2006": 16, "2011": 16, "land": 16, "region_lang_top5_cities_wid": 16, "cite": 16, "montr\u00e9al": 16, "lang_wid": 16, "985": 16, "1435": 16, "960": 16, "575": 16, "360": 16, "240": 16, "8485": 16, "1015": 16, "705": 16, "885": 16, "13260": 16, "2450": 16, "1090": 16, "1365": 16, "770": 16, "2440": 16, "5290": 16, "1025": 16, "380": 16, "3355": 16, "8960": 16, "3380": 16, "1430": 16, "tough": 16, "lang_mother_tidi": 16, "id_var": 16, "var_nam": 16, "value_nam": 16, "1065": 16, "1066": 16, "1067": 16, "1068": 16, "1069": 16, "met": 16, "commut": 16, "widen": 16, "region_lang_top5_cities_long": 16, "lang_long": 16, "2135": 16, "2136": 16, "2137": 16, "2138": 16, "2139": 16, "2140": 16, "lang_home_tidi": 16, "2495": 16, "1622735": 16, "1330555": 16, "8630": 16, "3245": 16, "behaviour": 16, "colum": 16, "messier": 16, "dealt": 16, "lang_messi": 16, "region_lang_top5_cities_messi": 16, "265": 16, "520": 16, "505": 16, "4045": 16, "440": 16, "330": 16, "6380": 16, "1445": 16, "530": 16, "620": 16, "3130": 16, "760": 16, "6665": 16, "860": 16, "1080": 16, "lang_messy_long": 16, "tidy_lang": 16, "astyp": 16, "depth": 16, "occas": 16, "official_lang": 16, "3836770": 16, "3218725": 16, "29800": 16, "11940": 16, "620510": 16, "412120": 16, "2669195": 16, "1607550": 16, "487": 16, "696": 16, "1065070": 16, "844740": 16, "701": 16, "910": 16, "1050410": 16, "792700": 16, "915": 16, "10950": 16, "2520": 16, "1060": 16, "ampersand": 16, "pipe": 16, "region_data": 16, "household": 16, "dwell": 16, "bellevil": 16, "43002": 16, "1354": 16, "65121": 16, "103472": 16, "45050": 16, "lethbridg": 16, "45696": 16, "3046": 16, "69699": 16, "117394": 16, "48317": 16, "thunder": 16, "bai": 16, "52545": 16, "2618": 16, "26318": 16, "121621": 16, "57146": 16, "peterborough": 16, "50533": 16, "1636": 16, "98336": 16, "121721": 16, "55662": 16, "saint": 16, "john": 16, "52872": 16, "3793": 16, "42158": 16, "126202": 16, "58398": 16, "535499": 16, "7168": 16, "96442": 16, "1323783": 16, "519693": 16, "5241": 16, "70103": 16, "1392609": 16, "960894": 16, "3040": 16, "41532": 16, "2463431": 16, "1727310": 16, "4638": 16, "24059": 16, "4098927": 16, "2135909": 16, "6269": 16, "93132": 16, "5928040": 16, "interst": 16, "city_nam": 16, "five_c": 16, "502143": 16, "9857": 16, "77908": 16, "1321426": 16, "537634": 16, "seriesa": 16, "seriesb": 16, "669": 16, "capabl": 16, "omit": 16, "startswith": 16, "darker": 16, "region_lang": 16, "moncton": 16, "saguenai": 16, "7485": 16, "7486": 16, "7487": 16, "abbotsford": 16, "mission": 16, "7488": 16, "7489": 16, "7490": 16, "23171710": 16, "std": 16, "490000e": 16, "093686e": 16, "401258e": 16, "000000e": 16, "836770e": 16, "25th": 16, "50th": 16, "75th": 16, "skipna": 16, "3061820": 16, "5600480": 16, "numeric_onli": 16, "3200": 16, "341121": 16, "3093": 16, "686248": 16, "1853": 16, "757677": 16, "5127": 16, "499332": 16, "55231": 16, "640268": 16, "64012": 16, "578320": 16, "48574": 16, "532066": 16, "94001": 16, "162338": 16, "cartoon": 16, "dataframegroupbi": 16, "0x7fd61bc32490": 16, "137445": 16, "182390": 16, "97840": 16, "brantford": 16, "124560": 16, "troi": 16, "rivi\u00e8r": 16, "149835": 16, "331375": 16, "270715": 16, "612595": 16, "23015": 16, "875": 16, "8235": 16, "2695": 16, "102": 16, "365": 16, "23565": 16, "104": 16, "11185": 16, "122100": 16, "93495": 16, "167835": 16, "168990": 16, "115125": 16, "193445": 16, "93655": 16, "54150": 16, "100855": 16, "116645": 16, "73910": 16, "130835": 16, "937055": 16, "1343335": 16, "147805": 16, "78610": 16, "149805": 16, "1316635": 16, "2289515": 16, "302690": 16, "211705": 16, "354470": 16, "235990": 16, "166220": 16, "318540": 16, "530570": 16, "437460": 16, "749285": 16, "keyerror": 16, "qu\u00e9bec": 16, "028571": 16, "region_lang_num": 16, "wise": 16, "040": 16, "aforement": 16, "english_lang": 16, "1898": 16, "444955": 16, "2500590": 16, "1903": 16, "1918": 16, "1919": 16, "930405": 16, "1275265": 16, "1923": 16, "city_pop": 16, "unchang": 16, "tmp": 16, "ipykernel_12": 16, "2654974267": 16, "settingwithcopywarn": 16, "row_index": 16, "col_index": 16, "pydata": 16, "doc": 16, "stabl": 16, "user_guid": 16, "warn": 16, "went": 16, "silenc": 16, "div": 16, "divis": 16, "108554": 16, "151384": 16, "100543": 16, "610060": 16, "516498": 16, "647224": 16, "542966": 16, "944744": 16, "672877": 16, "764802": 16, "606588": 16, "964617": 16, "704092": 16, "794906": 16, "599882": 16, "965067": 16, "534472": 16, "658730": 16, "540123": 16, "929401": 16, "city_popul": 16, "wic14": 16, "hadlei": 16}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"acknowledg": 0, "python": [0, 4, 5, 6, 7, 8, 10, 12, 16], "edit": [0, 5, 8, 14], "about": 1, "author": 1, "classif": [2, 3], "i": [2, 11, 14], "train": [2, 3, 11], "predict": [2, 3], "overview": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "chapter": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "learn": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "object": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "The": [2, 3, 4, 8, 11, 12], "problem": [2, 11], "explor": [2, 7, 8, 11], "data": [2, 3, 5, 7, 8, 10, 11, 15, 16], "set": [2, 3, 7, 11, 13, 15], "load": [2, 7], "cancer": 2, "describ": 2, "variabl": [2, 3], "k": [2, 4, 11], "nearest": [2, 11], "neighbor": [2, 11], "distanc": 2, "between": 2, "point": 2, "evalu": [2, 3, 11], "from": [2, 10, 14, 16], "new": [2, 8, 12], "observ": 2, "each": 2, "its": 2, "5": 2, "more": 2, "than": 2, "two": 2, "explanatori": 2, "summari": [2, 3, 6, 8, 10, 16], "algorithm": [2, 4], "scikit": [2, 3], "preprocess": [2, 3], "center": 2, "scale": 2, "balanc": 2, "miss": [2, 10], "put": [2, 7], "togeth": [2, 7], "pipelin": 2, "exercis": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "refer": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "ii": [3, 12], "tune": [3, 11], "perform": [3, 16], "an": [3, 4, 10, 15], "exampl": [3, 4], "confus": 3, "matrix": 3, "tumor": 3, "imag": 3, "random": [3, 4], "seed": 3, "creat": [3, 7, 8, 14, 15], "test": [3, 11], "split": [3, 16], "classifi": 3, "label": 3, "critic": 3, "analyz": 3, "cross": 3, "valid": 3, "paramet": 3, "valu": [3, 7, 10, 16], "select": [3, 7, 16], "under": 3, "overfit": [3, 11], "predictor": [3, 12], "effect": [3, 15], "irrelev": 3, "find": 3, "good": 3, "subset": [3, 7], "forward": 3, "addit": [3, 4, 6, 8, 10, 12, 14, 15, 16], "resourc": [3, 4, 6, 8, 10, 12, 14, 15, 16], "cluster": 4, "illustr": 4, "mean": [4, 6], "measur": 4, "qualiti": 4, "restart": 4, "choos": [4, 15], "scienc": 5, "A": 5, "first": 5, "introduct": 5, "welcom": 5, "statist": [6, 16], "infer": 6, "why": [6, 10, 14], "do": [6, 16], "we": [6, 10], "need": 6, "sampl": 6, "distribut": 6, "proport": 6, "bootstrap": 6, "us": [6, 7, 10, 14, 16], "calcul": [6, 16], "plausibl": 6, "rang": 6, "panda": 7, "canadian": [7, 15], "languag": [7, 15], "ask": 7, "question": 7, "type": [7, 16], "analysi": 7, "tabular": [7, 10], "name": [7, 10, 16], "thing": 7, "frame": [7, 16], "loc": [7, 16], "filter": [7, 16], "row": [7, 10, 16], "column": [7, 10, 16], "sort_valu": 7, "head": 7, "order": 7, "ad": [7, 15, 16], "modifi": [7, 16], "combin": [7, 8, 16], "step": 7, "chain": 7, "multilin": 7, "express": 7, "visual": [7, 15], "altair": [7, 15], "bar": [7, 15], "plot": [7, 15], "format": [7, 8, 15], "chart": [7, 15], "all": [7, 10], "access": [7, 8, 10, 14], "document": 7, "code": 8, "text": [8, 10, 15], "jupyt": [8, 14], "cell": 8, "execut": 8, "kernel": 8, "markdown": 8, "save": [8, 15], "your": [8, 13, 14], "work": [8, 13, 14], "best": 8, "practic": 8, "run": 8, "notebook": 8, "includ": 8, "packag": 8, "file": [8, 10, 14, 15], "export": 8, "differ": [8, 10, 15], "html": [8, 10], "pdf": 8, "prefac": 9, "read": 10, "local": [10, 14], "web": 10, "absolut": 10, "rel": 10, "path": 10, "plain": 10, "read_csv": 10, "comma": 10, "separ": 10, "skip": 10, "when": [10, 15], "sep": 10, "argument": 10, "header": 10, "handl": [10, 14], "directli": 10, "url": 10, "preview": 10, "befor": 10, "microsoft": 10, "excel": 10, "read_excel": 10, "databas": 10, "sqlite": 10, "postgresql": 10, "should": [10, 14], "bother": 10, "write": 10, "csv": 10, "obtain": [10, 13], "scrape": 10, "css": 10, "selector": 10, "beautifulsoup": 10, "read_html": 10, "api": 10, "nasa": 10, "regress": [11, 12], "model": 11, "underfit": 11, "multivari": [11, 12], "knn": [11, 12], "strength": 11, "limit": 11, "linear": 12, "simpl": 12, "compar": 12, "multicollinear": 12, "outlier": 12, "design": 12, "other": 12, "side": 12, "up": [13, 16], "comput": 13, "worksheet": 13, "thi": [13, 16], "book": 13, "docker": 13, "window": 13, "maco": 13, "ubuntu": 13, "jupyterlab": 13, "desktop": 13, "collabor": 14, "version": 14, "control": 14, "what": [14, 16], "repositori": 14, "workflow": 14, "commit": 14, "chang": 14, "push": 14, "remot": 14, "pull": 14, "github": 14, "pen": 14, "tool": 14, "add": 14, "menu": 14, "gener": 14, "person": 14, "token": 14, "clone": 14, "specifi": 14, "make": 14, "give": 14, "project": 14, "merg": [14, 16], "conflict": 14, "commun": 14, "issu": 14, "refin": 15, "scatter": 15, "line": 15, "mauna": 15, "loa": 15, "co_": 15, "2": 15, "old": 15, "faith": 15, "erupt": 15, "time": 15, "axi": 15, "transform": 15, "color": 15, "island": 15, "landmass": 15, "histogram": 15, "michelson": 15, "speed": 15, "light": 15, "layer": 15, "binwidth": 15, "explain": 15, "size": 15, "clean": 16, "wrangl": 16, "seri": 16, "basic": 16, "doe": 16, "have": 16, "structur": 16, "tidi": 16, "go": 16, "wide": 16, "long": 16, "melt": 16, "pivot": 16, "str": 16, "deal": 16, "multipl": 16, "delimit": 16, "extract": 16, "certain": 16, "satisfi": 16, "condit": 16, "least": 16, "one": 16, "list": 16, "isin": 16, "abov": 16, "below": 16, "threshold": 16, "queri": 16, "iloc": 16, "posit": 16, "aggreg": 16, "individu": 16, "oper": 16, "group": 16, "groupbi": 16, "appli": 16, "function": 16, "across": 16}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinxcontrib.bibtex": 9, "sphinx": 56}}) \ No newline at end of file diff --git a/pull313/viz.html b/pull313/viz.html index 5ba94d67..a36a5f19 100644 --- a/pull313/viz.html +++ b/pull313/viz.html @@ -739,23 +739,23 @@

4.5.1. Scatter plots and line plots: the
-
+

Fig. 4.2 Scatter plot of atmospheric concentration of CO\(_{2}\) over time.#

@@ -839,23 +839,23 @@

4.5.1. Scatter plots and line plots: the
-
+

Fig. 4.3 Line plot of atmospheric concentration of CO\(_{2}\) over time.#

@@ -935,23 +935,23 @@

4.5.1. Scatter plots and line plots: the
-
+

Fig. 4.4 Line plot of atmospheric concentration of CO\(_{2}\) over time with clearer axes and labels.#

@@ -1041,23 +1041,23 @@

4.5.1. Scatter plots and line plots: the
-
+

Fig. 4.5 Line plot of atmospheric concentration of CO\(_{2}\) from 1990 to 1995.#

@@ -1253,23 +1253,23 @@

4.5.2. Scatter plots: the Old Faithful e
-
+

Fig. 4.6 Scatter plot of waiting time and eruption time.#

@@ -1340,23 +1340,23 @@

4.5.2. Scatter plots: the Old Faithful e
-
+

Fig. 4.7 Scatter plot of waiting time and eruption time with clearer axes and labels.#

@@ -1421,23 +1421,23 @@

4.5.2. Scatter plots: the Old Faithful e
-
+

Fig. 4.8 Scatter plot of waiting time and eruption time with black points.#

@@ -1663,23 +1663,23 @@

4.5.3. Axis transformation and colored s
-
+

Fig. 4.9 Scatter plot of number of Canadians reporting a language as their mother tongue vs the primary language at home#

@@ -1757,23 +1757,23 @@

4.5.3. Axis transformation and colored s
-
+

Fig. 4.10 Scatter plot of number of Canadians reporting a language as their mother tongue vs the primary language at home with x and y labels.#

@@ -1931,23 +1931,23 @@

4.5.3. Axis transformation and colored s
-
+

Fig. 4.11 Scatter plot of number of Canadians reporting a language as their mother tongue vs the primary language at home with log-adjusted x and y axes.#

@@ -2027,23 +2027,23 @@

4.5.3. Axis transformation and colored s
-
+

Fig. 4.12 Scatter plot of number of Canadians reporting a language as their mother tongue vs the primary language at home with log-adjusted x and y axes. Only the major gridlines are shown. The suffix “k” indicates 1,000 (“kilo”), while the suffix “M” indicates 1,000,000 (“million”).#

@@ -2234,23 +2234,23 @@

4.5.3. Axis transformation and colored s
-
+

Fig. 4.13 Scatter plot of percentage of Canadians reporting a language as their mother tongue vs the primary language at home.#

@@ -2371,23 +2371,23 @@

4.5.3. Axis transformation and colored s
-
+

Fig. 4.14 Scatter plot of percentage of Canadians reporting a language as their mother tongue vs the primary language at home colored by language category.#

@@ -2466,23 +2466,23 @@

4.5.3. Axis transformation and colored s
-
+

Fig. 4.15 Scatter plot of percentage of Canadians reporting a language as their mother tongue vs the primary language at home colored by language category with the legend edited.#

@@ -2577,23 +2577,23 @@

4.5.3. Axis transformation and colored s
-
+

Fig. 4.16 Scatter plot of percentage of Canadians reporting a language as their mother tongue vs the primary language at home colored by language category with custom colors and shapes.#

@@ -2679,23 +2679,23 @@

4.5.3. Axis transformation and colored s
-
+

Fig. 4.17 Scatter plot of percentage of Canadians reporting a language as their mother tongue vs the primary language at home colored by language category with custom colors and mouse hover tooltip.#

@@ -3125,23 +3125,23 @@

4.5.4. Bar plots: the island landmass da
-
+

Fig. 4.18 Bar plot of Earth’s landmass sizes. The plot is too wide with the default settings.#

@@ -3226,23 +3226,23 @@

4.5.4. Bar plots: the island landmass da
-
+

Fig. 4.19 Bar plot of size for Earth’s largest 12 landmasses.#

@@ -3337,23 +3337,23 @@

4.5.4. Bar plots: the island landmass da
-
+

Fig. 4.20 Bar plot of size for Earth’s largest 12 landmasses, colored by landmass type, with clearer axes and labels.#

@@ -3565,23 +3565,23 @@

4.5.4. Bar plots: the island landmass da
-
+

Fig. 4.21 A bar chart of Michelson’s speed of light data.#

@@ -3656,23 +3656,23 @@

4.5.4. Bar plots: the island landmass da
-
+

Fig. 4.22 Histogram of Michelson’s speed of light data.#

@@ -3778,23 +3778,23 @@

Adding layers to an
-
+

Fig. 4.23 Histogram of Michelson’s speed of light data with vertical line indicating the true speed of light.#

@@ -3872,23 +3872,23 @@

Adding layers to an
-
+

Fig. 4.24 Histogram of Michelson’s speed of light data colored by experiment.#

@@ -3996,23 +3996,23 @@

Adding layers to an
-
+

Fig. 4.25 Histogram of Michelson’s speed of light data colored by experiment as a categorical variable.#

@@ -4099,23 +4099,23 @@

Adding layers to an
-
+

Fig. 4.26 Histogram of Michelson’s speed of light data split vertically by experiment.#

@@ -4331,23 +4331,23 @@

Adding layers to an
-
+

Fig. 4.27 Histogram of relative error split vertically by experiment with clearer axes and labels#

@@ -4421,23 +4421,23 @@

Choosing a binwidth for histograms
-
+

Fig. 4.28 Histogram of Michelson’s speed of light data.#

@@ -4509,23 +4509,23 @@

Choosing a binwidth for histograms
-
+

Fig. 4.29 Effect of varying number of max bins on histograms.#

diff --git a/pull313/wrangling.html b/pull313/wrangling.html index 798c5aa4..7125bb8a 100644 --- a/pull313/wrangling.html +++ b/pull313/wrangling.html @@ -4803,7 +4803,7 @@

3.9. Performing operations on groups of

-
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f1025eb0e10>
+
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fd61bc32490>