From 1e21eb3e501c98da1c90a94b0a808a65390f2137 Mon Sep 17 00:00:00 2001 From: trevorcampbell Date: Wed, 22 Nov 2023 00:39:13 +0000 Subject: [PATCH] deploy: 49001cfda5c3483ed2bdce5fa5dbd8d2111aa2c9 --- pull317/_sources/regression1.md | 16 +- pull317/_sources/regression2.md | 2 +- pull317/classification1.html | 204 ++++++++--------- pull317/classification2.html | 156 ++++++------- pull317/clustering.html | 182 +++++++-------- pull317/inference.html | 182 +++++++-------- pull317/intro.html | 70 +++--- pull317/objects.inv | Bin 9469 -> 9459 bytes pull317/regression1.html | 132 +++++------ pull317/regression2.html | 160 ++++++------- pull317/searchindex.js | 2 +- pull317/viz.html | 392 ++++++++++++++++---------------- pull317/wrangling.html | 2 +- 13 files changed, 750 insertions(+), 750 deletions(-) diff --git a/pull317/_sources/regression1.md b/pull317/_sources/regression1.md index f4c7c304..fb06de5a 100644 --- a/pull317/_sources/regression1.md +++ b/pull317/_sources/regression1.md @@ -476,8 +476,8 @@ us the smallest RMSPE. from sklearn.neighbors import KNeighborsRegressor # (synthetic) new prediction points -pts = pd.DataFrame({"sqft": [1250, 1850, 2250], "price": [250000, 200000, 500000]}) -finegrid = pd.DataFrame({"sqft": np.arange(900, 3901, 10)}) +pts = pd.DataFrame({"sqft": [1200, 1850, 2250], "price": [300000, 200000, 500000]}) +finegrid = pd.DataFrame({"sqft": np.arange(600, 3901, 10)}) # preprocess the data, make the pipeline sacr_preprocessor = make_column_transformer((StandardScaler(), ["sqft"])) @@ -495,12 +495,11 @@ sacr_full_preds_hid = pd.concat( ) sacr_new_preds_hid = pd.concat( - (pts, pd.DataFrame(sacr_pipeline.predict(pts), columns=["predicted"])), + (small_sacramento[["sqft", "price"]].reset_index(), pd.DataFrame(sacr_pipeline.predict(small_sacramento[["sqft", "price"]]), columns=["predicted"])), axis=1, -) +).drop(columns=["index"]) # to make altair mark_line works, need to create separate dataframes for each vertical error line -sacr_new_preds_melted_df = sacr_new_preds_hid.melt(id_vars=["sqft"]) errors_plot = ( small_plot + alt.Chart(sacr_full_preds_hid).mark_line(color="#ff7f0e").encode(x="sqft", y="predicted") @@ -508,9 +507,10 @@ errors_plot = ( .mark_circle(opacity=1) .encode(x="sqft", y="price") ) +sacr_new_preds_melted_df = sacr_new_preds_hid.melt(id_vars=["sqft"]) v_lines = [] -for i in pts["sqft"]: - line_df = sacr_new_preds_melted_df.query("sqft == @i") +for i in sacr_new_preds_hid["sqft"]: + line_df = sacr_new_preds_melted_df.query(f"sqft == {i}") v_lines.append(alt.Chart(line_df).mark_line(color="black").encode(x="sqft", y="value")) errors_plot = alt.layer(*v_lines, errors_plot) @@ -526,7 +526,7 @@ glue("fig:07-verticalerrors", errors_plot, display=False) :::{glue:figure} fig:07-verticalerrors :name: fig:07-verticalerrors -Scatter plot of price (USD) versus house size (square feet) with example predictions (orange line) and the error in those predictions compared with true response values for three selected observations (vertical lines). +Scatter plot of price (USD) versus house size (square feet) with example predictions (orange line) and the error in those predictions compared with true response values (vertical lines). ::: +++ diff --git a/pull317/_sources/regression2.md b/pull317/_sources/regression2.md index 2feadba1..5e71870a 100644 --- a/pull317/_sources/regression2.md +++ b/pull317/_sources/regression2.md @@ -313,7 +313,7 @@ Scatter plot of sale price versus size with many possible lines that could be dr Simple linear regression chooses the straight line of best fit by choosing the line that minimizes the **average squared vertical distance** between itself and -each of the observed data points in the training data. {numref}`fig:08-verticalDistToMin` illustrates +each of the observed data points in the training data (equivalent to minimizing the RMSE). {numref}`fig:08-verticalDistToMin` illustrates these vertical distances as lines. Finally, to assess the predictive accuracy of a simple linear regression model, we use RMSPE—the same measure of predictive performance we used with K-NN regression. diff --git a/pull317/classification1.html b/pull317/classification1.html index d60c5b20..431c59af 100644 --- a/pull317/classification1.html +++ b/pull317/classification1.html @@ -863,23 +863,23 @@

5.4.3. Exploring the cancer data
-
+
@@ -973,23 +973,23 @@

5.5. Classification with K-nearest neigh
-
+

Fig. 5.2 Scatter plot of concavity versus perimeter with new observation represented as a red diamond.#

@@ -1048,23 +1048,23 @@

5.5. Classification with K-nearest neigh
-
+

Fig. 5.3 Scatter plot of concavity versus perimeter. The new observation is represented as a red diamond with a line to the one nearest neighbor, which has a malignant @@ -1127,23 +1127,23 @@

5.5. Classification with K-nearest neigh
-
+

Fig. 5.4 Scatter plot of concavity versus perimeter. The new observation is represented as a red diamond with a line to the one nearest neighbor, which has a benign @@ -1206,23 +1206,23 @@

5.5. Classification with K-nearest neigh
-
+

Fig. 5.5 Scatter plot of concavity versus perimeter with three nearest neighbors.#

@@ -1302,23 +1302,23 @@

5.5.1. Distance between points
-
+

Fig. 5.6 Scatter plot of concavity versus perimeter with new observation represented as a red diamond.#

@@ -1498,23 +1498,23 @@

5.5.1. Distance between points
-
+

Fig. 5.7 Scatter plot of concavity versus perimeter with 5 nearest neighbors circled.#

@@ -1710,9 +1710,9 @@

5.5.2. More than two explanatory variabl }); } -

Fig. 5.9 Comparison of K = 3 nearest neighbors with standardized and unstandardized data.#

@@ -2503,23 +2503,23 @@

5.7.1. Centering and scaling
-
+

Fig. 5.10 Close-up of three nearest neighbors for unstandardized data.#

@@ -2616,23 +2616,23 @@

5.7.2. Balancing
-
+

@@ -2713,23 +2713,23 @@

5.7.2. Balancing
-
+

Fig. 5.12 Imbalanced data with 7 nearest neighbors to a new observation highlighted.#

@@ -2787,23 +2787,23 @@

5.7.2. Balancing
-
+

Fig. 5.13 Imbalanced data with background color indicating the decision of the classifier and the points represent the labeled data.#

@@ -2897,23 +2897,23 @@

5.7.2. Balancing
-
+

Fig. 5.14 Upsampled data with background color indicating the decision of the classifier.#

@@ -3436,23 +3436,23 @@

5.7.3. Missing data
-
+
diff --git a/pull317/classification2.html b/pull317/classification2.html index 26625fb2..fbeea28f 100644 --- a/pull317/classification2.html +++ b/pull317/classification2.html @@ -798,23 +798,23 @@

6.5. Evaluating performance with
-
+
@@ -1535,32 +1535,32 @@

6.6.1. Cross-validation6.6.1. Cross-validation6.6.1. Cross-validation6.6.1. Cross-validation6.6.2. Parameter value selection
-
+

Fig. 6.5 Plot of estimated accuracy versus the number of neighbors.#

@@ -2272,23 +2272,23 @@

6.6.3. Under/Overfitting
-
+

Fig. 6.6 Plot of accuracy estimate versus number of neighbors for many K values.#

@@ -2363,23 +2363,23 @@

6.6.3. Under/Overfitting
-
+

Fig. 6.7 Effect of K in overfitting and underfitting.#

@@ -2798,23 +2798,23 @@

6.8.1. The effect of irrelevant predicto
-
+

Fig. 6.9 Effect of inclusion of irrelevant predictors.#

@@ -2877,23 +2877,23 @@

6.8.1. The effect of irrelevant predicto
-
+

Fig. 6.10 Tuned number of neighbors for varying number of irrelevant predictors.#

@@ -2947,23 +2947,23 @@

6.8.1. The effect of irrelevant predicto
-
+

Fig. 6.11 Accuracy versus number of irrelevant predictors for tuned and untuned number of neighbors.#

@@ -3426,23 +3426,23 @@

6.8.3. Forward selection in Python
-
+

Fig. 6.12 Estimated accuracy versus the number of predictors for the sequence of models built using forward selection.#

diff --git a/pull317/clustering.html b/pull317/clustering.html index a7c81567..d93d8314 100644 --- a/pull317/clustering.html +++ b/pull317/clustering.html @@ -748,23 +748,23 @@

9.4. An illustrative example
-
+

Fig. 9.2 Scatter plot of standardized bill length versus standardized flipper length.#

@@ -842,23 +842,23 @@

9.4. An illustrative example
-
+

Fig. 9.3 Scatter plot of standardized bill length versus standardized flipper length with colored groups.#

@@ -951,23 +951,23 @@

9.5.1. Measuring cluster quality
-
+

Fig. 9.4 Cluster 0 from the penguins_standardized data set example. Observations are small blue points, with the cluster center highlighted as a large blue point with a black outline.#

@@ -1034,23 +1034,23 @@

9.5.1. Measuring cluster quality
-
+

Fig. 9.5 Cluster 0 from the penguins_standardized data set example. Observations are small blue points, with the cluster center highlighted as a large blue point with a black outline. The distances from the observations to the cluster center are represented as black lines.#

@@ -1113,23 +1113,23 @@

9.5.1. Measuring cluster quality
-
+

Fig. 9.6 All clusters from the penguins_standardized data set example. Observations are small orange, blue, and yellow points with cluster centers denoted by larger points with a black outline. The distances from the observations to each of the respective cluster centers are represented as black lines.#

@@ -1197,23 +1197,23 @@

9.5.2. The clustering algorithm
-
+

Fig. 9.7 Random initialization of labels. Each cluster is depicted as a different color and shape.#

@@ -1279,23 +1279,23 @@

9.5.2. The clustering algorithm
-
+

Fig. 9.8 First three iterations of K-means clustering on the penguins_standardized example data set. Each pair of plots corresponds to an iteration. Within the pair, the first plot depicts the center update, and the second plot depicts the reassignment of data to clusters. Cluster centers are indicated by larger points that are outlined in black.#

@@ -1365,23 +1365,23 @@

9.5.3. Random restarts
-
+

Fig. 9.9 Random initialization of labels.#

@@ -1436,23 +1436,23 @@

9.5.3. Random restarts
-
+

Fig. 9.10 First four iterations of K-means clustering on the penguins_standardized example data set with a poor random initialization. Each pair of plots corresponds to an iteration. Within the pair, the first plot depicts the center update, and the second plot depicts the reassignment of data to clusters. Cluster centers are indicated by larger points that are outlined in black.#

@@ -1522,23 +1522,23 @@

9.5.4. Choosing K
-
+

Fig. 9.11 Clustering of the penguin data for K clusters ranging from 1 to 9. Cluster centers are indicated by larger points that are outlined in black.#

@@ -1598,23 +1598,23 @@

9.5.4. Choosing K
-
+

Fig. 9.12 Total WSSD for K clusters ranging from 1 to 9.#

@@ -1926,23 +1926,23 @@

9.6. K-means in Python
-
+

Fig. 9.13 The data colored by the cluster assignments returned by K-means.#

@@ -2171,23 +2171,23 @@

9.6. K-means in Python
-
+

Fig. 9.14 A plot showing the total WSSD versus the number of clusters.#

diff --git a/pull317/inference.html b/pull317/inference.html index cc5323b8..7c445dec 100644 --- a/pull317/inference.html +++ b/pull317/inference.html @@ -1217,23 +1217,23 @@

10.4.1. Sampling distributions for propo
-
+

Fig. 10.2 Sampling distribution of the sample proportion for sample size 40.#

@@ -1341,23 +1341,23 @@

10.4.2. Sampling distributions for means
-
+

Fig. 10.3 Population distribution of price per night (dollars) for all Airbnb listings in Vancouver, Canada.#

@@ -1463,23 +1463,23 @@

10.4.2. Sampling distributions for means
-
+

Fig. 10.4 Distribution of price per night (dollars) for sample of 40 Airbnb listings.#

@@ -1678,23 +1678,23 @@

10.4.2. Sampling distributions for means
-
+

Fig. 10.5 Sampling distribution of the sample means for sample size of 40.#

@@ -1774,23 +1774,23 @@

10.4.2. Sampling distributions for means
-
+
@@ -1856,23 +1856,23 @@

10.4.2. Sampling distributions for means
-
+
@@ -2009,23 +2009,23 @@

10.5.1. Overview
-
+

Fig. 10.8 Comparison of samples of different sizes from the population.#

@@ -2614,23 +2614,23 @@

10.5.2. Bootstrapping in Python
-
+
@@ -2714,23 +2714,23 @@

10.5.2. Bootstrapping in Python
-
+
@@ -3010,23 +3010,23 @@

10.5.2. Bootstrapping in Python
-
+
@@ -3283,23 +3283,23 @@

10.5.2. Bootstrapping in Python
-
+
@@ -3360,23 +3360,23 @@

10.5.2. Bootstrapping in Python
-
+
@@ -3508,23 +3508,23 @@

10.5.3. Using the bootstrap to calculate
-
+
diff --git a/pull317/intro.html b/pull317/intro.html index 8717a55d..18f03c8a 100644 --- a/pull317/intro.html +++ b/pull317/intro.html @@ -2040,23 +2040,23 @@

1.11.1. Using
-
+

Fig. 1.8 Bar plot of the ten Aboriginal languages most often reported by Canadian residents as their mother tongue#

@@ -2144,23 +2144,23 @@

1.11.1. Using
-
+

Fig. 1.9 Bar plot of the ten Aboriginal languages most often reported by Canadian residents as their mother tongue with x and y labels. Note that this visualization is not done yet; there are still improvements to be made.#

@@ -2230,23 +2230,23 @@

1.11.1. Using
-
+

Fig. 1.10 Horizontal bar plot of the ten Aboriginal languages most often reported by Canadian residents as their mother tongue. There are no more serious issues with this visualization, but it could be refined further.#

@@ -2318,23 +2318,23 @@

1.11.1. Using
-
+

Fig. 1.11 Bar plot of the ten Aboriginal languages most often reported by Canadian residents as their mother tongue with bars reordered.#

@@ -2444,23 +2444,23 @@

1.11.3. Putting it all together
-
+

Fig. 1.12 Bar plot of the ten Aboriginal languages most often reported by Canadian residents as their mother tongue#

diff --git a/pull317/objects.inv b/pull317/objects.inv index 9e9c7f9fa6d19b1cbbe02d98232bbbdcdbdd9e5b..35ade77c4a0c0845169a8a6a6dd8b8a44ac0bb4b 100644 GIT binary patch delta 5526 zcmV;H6=~}IN%KjtkPm;B)1P)IW!#4PU3`T>_b}T-(+`!5pfyIPi7MO>WhM&Ag3@3( zMtxKiKV`|tl6TRUyhEvrEZB0SXA|JS0 zwmz1Zv#`8;9F`OzmA0}23tqBo@jSK9mI6%5|8wIe+Uhb;)Ovc0x3u*Q8IFi$4bl^>m+QtUFNJa zdAi`NFluQ9xp6TZHibE@#17|C>1MrYkJ3&^&F{R1k}7{w*E&}dmwec5E|Z!lm4)^` z2d{A8i0HI2V;s@y;%*iAX2o3@gp<3th?gK*@hmPemN*8(wEZRKtA;@iJDIJCY$rQ$ zm_Gs9dJUV4%!HMy&=~`kFzCgZb^YV6bp0aRZyeQ&*;rk)2(*eN28S0EllU~n(hi^r zkvPC00^@&oC@ss2=i``M&Bo*^`h%Y&u4Sc`P3z(7w|+T${Fl+|U{oiV`-F{bj>;I! z&t}8?tfxNFW}1uz2c6*$ByUgB=U0U5}w*gi(CA;0@ zjq3Yak0Unh+$ZDl9oz2K7tf<>RV|Ap2vKs!nsH1oXQOu+-L$cO=`8e6N$FvQYLJgV zJaeIrqo}@^jp_?fWJb1A>qHKiFd!dEI-=n?A}?nn^0LQc4sBvgOCj!U(rk`cwaZ|RK$$PPN(?u%Nq=~lVS zz*K39;2OE#B}OhC%kr84F6{f2rn_qt`JIqlUmWBpF4$$xbh&+Rw)E*Zh}jbjdj(UZ zslgO!s(y-x5HJ$2yX$FiedFHjV2hm7-FOcGqJk09gvu!ZoyLde)7q|iQ&YJ=ih6%J zIe54Ab!+JHt^@r6ta^->pzmaA*%}DvHf_4P$5Rd6lo*_kktrhCL`;3}3O8YZ{RVcX zbXUoRFd`PE0>dv?wK=he`>hd$Z?12zaRVDHV`oa4C<nif z-G`4WNRfDxaR_=)A}4!if?s{+5GH@~F=l&=iz*_(fa48nH-Vpj`r9?Ii!OA&k8Z%g z!<;KCZC{H$mL0ghd3&}9QrHYJ+nG;AG`2LM%~OQmO;V}Sogug`y*XDK9qV$Vnq;G< z&brfwiY~x(P4vB7E~8tzN&Fx|xlD8^=_2x*x9?wpDaPlBozVM@j=ptcv;y#zVFDlTlsUaIUAt?) zyZwHFPceP78%K09S>Vh}n}o}wU`!)vH?@>x6%ivqr9(j895xLNjycBrD7wxyzHtU49gDT&8Y_9O7L_ zU%-@V%bh@&EFp*Q)k%D>9z#qqiAZ*ftwmLi$G_lFcDk@j7y{BPvYCBU^4*;Y_^lVipusjHJE?m6gUG4x&?V= zHgVHUYyj=4HN|M#Fr>v&wuoVP_sqjfpE_%jm*7R72y1lCocq<|JK5D^iJEW*osWs?yT z*nV33N(s39aKP_6(9^!wTG)CukP@T9joaPVm zpvF{wW*}r}tMxEb^VOWl7UA)a%#S;2Md9i8WCMUVwsA*SpeoFUgL^Hx7SQ&!+yZN% zH^>Bqp-ITYfRrB&qX2+bJv;9aV_MaQ@9oh+`<~|K)rWEn-#(OSejmo9V>DFRuJEuC@!9p|Akka2Wz`EvnS??KI*<@M#{P|=_X$z_OQsli%8!n^1vUy8DNgJ&I z(=8jb@#uf~vy-dFwMCB3FfkoVqPAYZg9TK0j!mJ!&UK&Ye4x}GKEW|%ln!OF)&Z%b z9cI24q6DJUlo-|$?0f- z%hGVp`+%My0@FC^m4$~D$$g2sUZMJSUp^&48G3)Qp^LA;W1fZZr&a9xOIHMjbpi1r z2mg=U0U#Q0c8o5B18p{sUsE`PaiBmY7?-CEZQ4q|!9B!>Hv)s>mUvfWoX1FY`_bqj zX^NZY(4GlFU(IE*+QrNf2-+5&y~2TC%oP!x7YitN+YI-1e?z+{1FB16+mKcW5)Oz% z{BD0Xtt9XU0cyYJi;c|6J5Ut|6{Q2k6x+nM4Wf(xj_kjk{eHMG+FfF{4$GpgkaoLK zt2+8ES4tx+eB+~dV5=Bgk@=#N1`Y6TaS=a|H`yqE#!7qKd2xp7Vd#iEqbdf5W4fw! z!LS%nM6wxgh=;$f#K ziC|7AXcywq!MeYb85s1s)cT&s%5*XpKj?AHC-bpU}?PRxccx4&gI^TxK+E2Zn zhps!H9PACV?!t6ruyY_LyCG*;(0YG-y+(Q%u;x>9hr4xLgxD4_wg?>qZ(wV0_{App zo(p5FACeC%Y^9J1`ZsI+Yl-s^hdZh@cM6XfTZOpLH8*l$h*X93Goy=3kDm-+QLS^s z9*g7fsM5J@`7tohdZo&lR9Ls^>_nY0dXipD-Z9*On5Xdh9d55;p_2XKbu1=DlwAM zV+s7Y&dDVSPcysU!?SFfyCV~DL~KquPF$Tui;tBIeb_Vz#*TRuI`n) zE$tQU#d`%l${aa6rEDjbV(wI_Jzb6&L_NJmM00dcnc0_Dy5K(r=pjh?uu)6AH?Oct z7MFjeyMC@rkxLM3NW@i!0rwLH(i_UJa$XvnS=4#KuYG09naq|M2L^vrdu5(#A|2V* zv~<~ih~U-Z5ky$@xC&Z(p*3yo?4a;7x_k|)6)v;DMrBzKKP&MtINFS~e{cfmuv+r# zhhR8rjH{r@L^PKLnOY2DIK64I?N3;H%hnzh!);2jYrT5EWT$sZ_xC|hdrR*f_KIAu zz{NAnM`cUA4`!N}{Ud*(?A{8@yD$SrH#*zWPSWX!JG$J*?gPu%cRFpbgb_KW!P3g) z{*E3E8&?y!MNBQOTcZd0z?)j69E_M-=WhIAv}}KTYSD>cpEtUDH;mNFlaS)D^*pGd zwj&?7j#W`JyM?2=igP_IZRZIX+LoUJyDkoR6i--S&tZRGE@l+=`w>KrJn9hQ zLCt2OaXi_VQ+pcO>)ORG7Y}#K!6UY#7(&ySnzn@_hJ{t2QACZX0+@4J3}8+NNi83< zWjOn4J7o=g(+)iUE9V*?U;CrR0N}Ny;;DEZ#i$PCBH0Z1qf_+SK1mxjr8~v)vO$do zi>tS1>zs1*9W{T+mz+OVD4$)i*!R7Mh#Ij^DEO_dQQk*tnpx5g+ADm{X{2SPe?W$F zZo;xz0%Am0z8TJ_sGn6?-@mzj{qD_qj!Jb7M`JCmm$HxB)kNIPFM%8$@a+EB#i%su zs$cQ4rz3t_%Azg3OV#mR>3i-I5{4v@=71HmQ2uMR-qTMJ&7#wjn_Tuy!m6)BB_ zkwjou34g%Q7aC5r7mpa~LHlX3(pZz)0b!x(D^TN$*{y%xy*u%H>pJG)JGXud_nWPY z>mJd7`J#U^b%1VHpPp_M>DHkSQ4%94VE5p4ee^XMkks`((DoYi=U0g8&p-V)$`N|* z8+5!Tt6MH3Y0C!2q9v-+%q7Oh1drL!h64pysLbB;0vthgryMQq<@KWQ{$h?!i}?wt z6GrVj|4tkQ;POqmvte<2;ahZ5#cx0zC@Z4z|Zu0$2031 z=zto_#RveBp~i`XZWE4_4)``pqd!`Nf}D7~wxfq#aHefuBAqWSDwRZa+*i|!2d}8O z7RbZi5#${ZJITxWPm8c!=!YBh$LdG;%OZcUAWxW<)+g4nFe&K(WA(;p*m2%}z~>k! zIg<-1ifoU??%V+93t-k%=Z465ub~%pMFUH?z!r$yYBY(evvui;XU$}p$tqFM80HikY4={MVZq$i)liGKS{9{mX)RY_Y z95jaXNPmIR9l5V+cgDUR8`&=9S?8Uj5ANkA%}Gl({$;7Lum-@atC&se>1VgSt@e3% z__~zDf>)I1>jJ5*@WCwv!nsqhAC8^P$@lc^7KL{4cwhUnk}oUjD|>jc(yxDw9)zl+ z@-1E zy{30C@YZw&h@g1@fu1$$4eloFqAScqD2Mt5=c4cBLpOkCdhav6v49`SmKayxoO<^a zg_V2PE2zox{g=)_weo14N8iJcHl81Qo!PpsI0MpvQdz3R zrET$2G$kMu1dLiY%%Fcc=-&QWZEx(*Ba`+qL(jdTEO73v5pm=n&$awS#szUfXQk+= z#Z!y)p?z_Y>NU5b7$vB82V5Y+Uv!55M65*%<(OOOOkGQ0|iyb6izAILk!g`aW)%@UvDi0yydtNumRPy6sH!uyT@ z{XL&xZAyGfmL3^4bFRSn!UUs;i#@--=(2=pK=vTJrlWgbP0gD7s5|E{bbyZ}uld`T9&nfRa^@+c z3R^j4D}A_;SmMxMsn}x;?IY>%st-9ZDLv;eHoiVNgC$;}Cg5B_wE#(QBd# zH%ys{Lb4z=7_Lzt71d8!ailA~5F|v!^3g2L?!xA``?G$o$lY@0IaTY;`{zO87 z1H9jnIk^-V1w-}+*i*;e3PRbqUV{n75qgzk4JHQTNI(3Oh`n^Ad21xP3pp$_VsFNrexy558 zW{q|dHr+0B)|otEa8?+#w1V8Y7!RA`oK|9o^Qd&Q-n4&5X(#07cV2@@m8ol;tBFfK z>^7H4O_a()d!K|?IB-OC>XpoL@S=fCB_=ZV7Ruw#C#Po z$Z;pLHj(XQCl2!`KwGb2bCDUbQWZLDz)}XiIkQ&-xGP=1$o3mY^P+wK7E=-JbG?M{V1TlW<*zhReG zHyUn$6-~)*_jsfFzS!f44LkSAcznmUyYL*pu}4RdB{xsU5ir0DqzmOl>zUYf`b}BOw!>gSgVm&@ zq~LHKxndv3YrJE4r_~j6eW}{6d%Sh~dra1)1Tay}_1Fjrm9OJcZKPd8%2I6WY-rDIf@H*nKNB(-@3P7jvq4|Hb zwrk$hR1T1$Vona;ZGGJudc5mEe*mi<<0%T;Yo?BRZEMB$t3+iTpw2Fu);QYMOm-BE<=rpopa zwMERhE(&IkNmKXX;|fxw-eeqt9+ZFR$=;dZSD#sg$$pI49^cmTnq9NKh^lT}ryB{O0ZZS73_qIbwe&^nRnG zZ`~NV&SU~&zKW7eH~?~EhL zEnR!)>MN11`8G(5os5|Mwyb}JP5pfJ>ek+uU}UY4t0v=b(EQ_Lb|2eTI^F?(?Wv+vctDrKsKL(C3M z7Bni9r*=Tw@|DOEQq?eO##-Q}#>^wonJJ6}R{`VJC^?k)_9@s*Kmvc84P~#Qvb{|m zrZ@)9kb-VO9-2+;bQ2pudumNF+BOVnv6L-h82&x;@EYA`Zm^gt5G?o8spWfI53z4$ zNhy@Sx^x|Yg98>x0-_Ux`%xAL3GOLFL!L)(>ww-Ui~>rtW53|Eyl%$@9x?dIlE!}> zOz8Y1r5DGgnyt|z_Gf>TgU6>E*X(gYW0;}4XjA%J3K9Oygn>n|+7f|v6)Nf8mkUTl z#3GCEa$nhG#RRsW*1nPgE+wa^=6g2K@xAp8=%9U1^YiLMIfic^%Czze zuN9apV(%jYsQ`b28z?;jNfoJl=z?&+a&e@$_$Ra^8pI$SkcZe9wRJm=7V~(e^CioC z3Nqy&%?EpvW;61~NAJyaVqq9p9vWBM1=LVjfNi;0s~W4#P37gVav9*wsd2V?*`Rp2 zAA1^W$W)xGa$hWwnW3Nb0uTtXCO*N04Od#L%;*Mc4Um6-8v>igA+~H@3o{q@ARu5` zTg5kUb|Zzb8H%mYGm&D~y`mkyA`H)vgLZp0;abYtzP>hztxDm_*4xrQPKwxKEKZnU zC`{#Y#<5)&|A~a+!W1-=D>{z-&xR{w3r}F(^SrG046SUkEPnocvZSJOjb zm{LlIvRLbY)X@$z;|oy&QEEyYYl-o3g#rVnppt>S=Ha;0O9hr(e2Nj*Vq&I46EU&k%uW9QDe=!;0m;gk68HkbS!^pOT;qz1YyjSLiX%Lip1v z`u(LV0>iq2c#(tuNA3U+jW;_+7s7!yo5!yyoWVFypc0JBQ-(HerQhHl;=>z(!EsBx zD>BY=q`LlS^pG^g&2wnagrKkHGFk0n<_H9B3(sERz%S;Ch|Y@!l-q5Fd%M4(U6cXU zEwO)XNGk*h2gD(MH=9-xc!L17-}A*rX5}5Iii3*NfnthnV%rAM#eYZk-_CwNTsZA6 zFnb8a;G>Wh=(!dd*wV+U`Uf6u-qp4TusojpScXB6D6HU=#1YxTnKL3{`>+3o#g(kW z1!LP$P*U-*QIQIS`ZGkh3gkJ-%KeJq%d$sky`5Ixa$Nix^vk zj)6C@wKx2t6MWBwG1d>shZVL`$OQeHwf?old5FUu)uKCvM~tmPTvVRa4jDb!GSe&amY#W;^mKoe9e&mW zRait)c@R^cin36q`Wfg{2g}To!ZgcVao~OMUXyoppqs-PIE0ml_hKeI8W%$m_qH(h z_^_YNAr_S&N$D{KethTTl7y$3-S6R9HqG6U2{kTPc@N_Y-?J&Y(GTs>hTC7EP8wet-a8iwsv+<7#dx^2Gt6eSzx2GtcRbKco-aQ zM%q6(0d!a``E^7v95u#O&}1T-%YsZT1~Hu8G}-nitj%R>lZxRsrP#GzJz%oaJEi^m zpr^gH_YQkSu2g7pDaoBnu)KJ@z4_xP}sF~fuQC&s49+tNA1PpD<}ifx~) zjhfP(VtLu1MuUIF*W0soPPzJyn&eB;A1kEKu2}5*-a|x<*e4YH*48lZBQ?z|X$S2U zM&~rrveG{w!#Ou$*(?DuqATAFXH@jhs;uwdT)%$z<~&EGI)|gNmexzz$L(q&ZswOj z4i9*Cf9zsZ8gt>mOs9#>a1J zta(NLpc9DWJ4{S1^m^|qEbd=mhAHkf8i1cMb-H!?SV;%S<)eY%65&?|An>gPuTSHY z7i}&lzmtlT#=%G;u&abWVCV}Cr`n504E3P>v{-4ZS?z$Z(DW6m@kQ;{Kkwe9_`P)< z^YEQpzlDGM&DPCzk8HqvQJFeGx2sQ2H;Q!Y(1$3A5frd{@VY+wnhZ$l`W|R|4f^vd zMD^#N{u|{8JqHdtUX#@=myxt(17pz=)oJDu<70xyZfL`S0xVQ!?|A`^pt@9!miF>` zQFwnbN2kU7MAQkR_MLx6jskG`rrg=ExV`YLx~YF+xNo?ldXN1{(MKiA-)+-XOUi}j z`wxGd2XHSx_5p7k+`m{tR*}5`J%C~ebeagg72EI?o_3r0S=%>1jC<{a8(_f?+sQ^f zAAR;?YU08ZDxc10>X*^_K+L_Z#DQZj*v)t^YfwB6sV(eabZu;O7-iwvI$>w5py52+ zQ__DpE!?fhzNVutmBpGeWwz^sS3>{76>@{5m=BXOiSw%>sXkSbbzsX zV>IkI??2#k4V0Y8g%(A&$6|PHfb#_~YpQcYZ}m-i_v1aar!sr&T!W4ax@f{nrXw+o zbbUo_f~?xkWh?zdK7jC~0S8xIy^u)Xm!fP0@g{Ck;ZtbZT5Tx47Pf!5K;e*}d+~pF zymLZoSyvI4r*x|4PA)qG9l~zLV7>@a7qf0{BpA?_lX?4P^mRNaCN&u8aGj+mvqrld z$Ln&47YsEhelr;8a213nQwF0GK4up&x;BUPKYB`rRSNw3LA;-g1!GSl46MX_NeVj% zZIKBo#bWw%c7LyD?9LbC?>FIZ_MU%{zu&;wwIGWxT-4SCJZu0`oA?J9JYF~I#JfuE zyGH)8C^%}$4S5b4LwcmY!03+LSG7B1UyqG!m-4Lh4$=qrvXkbdB^&>;)L2*pVAfU4 zruFo*+um0DJUo1TN@Bq)%JX%B)K>W576ReiDd-Q!&gSHMdv=RLyLh~>eR+S$mmT$$ zJ-k@y*G>;Y)lvBxDMUx8E`?Ev%`Di*LB_=~5+R-kJN^MbjO1_-I24OO%qE$Tl!;LK z?RjuM^YLdcP}^S9I~aItIs-(|Jb*yY9`y!y6L!%RW+IeB{epAR_wu0|Kr_Afnci67 z4`oY?t8Y%d3yZ?az55kZWdMK4(Wt`Q4c;;!9m-`WMBj!b?C8NstS@MbMuvG5{6>;1 ze{Vfg{3>oj&;8lrH11ZLUi=GN3;TW*)qg6aO{^L6cc-XcFAoFLHZZb&D+j>&xxY@q zMTSR(bIv;i#=aLZ_ni3bZuI<<(U!uBnKEZ>5JY!A!=eI2(V*`*QAdBKIf;q>Z_RQl z>x@90!Y21zdSnSIExO%?&&l+u$7*xyqw_f>E^ebgYCZI94M+VW6+~L$IgK6cw5V1% zm+1)Jj&AMhku~eQp%zpXD>X(qjNyJBy}@0FiDi_jjP_;!B{Wd2JX+_`_b{Z5=f_@W zw(cv=fHa^~mMU>+TfBc1O$i7E0i)IpGiVOFw|{oq8$0v}rG4zsb8jdMoO^3T9Qns} zEkBWQL0r&TDSC?W)FORoUu2|u&aEg$3F_Se7l`l|o#8(bYtceECRfH%&h4kxI_5_= z$g10jQg%_9!H=8@_h}jO|Jkj;XGuV7xIeuNuL2^kLL&PI^iF?q;isHHv&5%5V*56$ ze{uEGKFo^nz9T??&nH@&5}%r-M~KawD=@w=!6@Ql&#yDOEFl_@J;?5<<}Q$Fj@DaS z5P|{0U_1!DfKZ9`+LA4AQ=QsaGOBe!?=5q(!?OkO$M8CiEB-T$*d2_n^zYmR+832Y zW_0ga<;6!ltGs_r;2^xjbh0_QWav9vDUZ)s?wWgXy7w@kRDQO}f;JG!-YjgBmz07sY0w*J=xBf5W||r4-e&EDS&amS1@EG7 zlh0yqFl$;aeahykEce}Tud~PnoQ^)n-i_$$L~`+aVOA84vFPWNW;KyaSr*=ecqQ0U z#3B<$%xNqHlWZzoV4(6LBu27ets`j+lPMXnE*ZrQW3K^h><8`vI=MQv!57CeSdMmV it?K}0*H9RT1CX<4`2Nv%z|ngB4?B;JjqyLs%C;x(2*DEo diff --git a/pull317/regression1.html b/pull317/regression1.html index 83bed651..0527bf46 100644 --- a/pull317/regression1.html +++ b/pull317/regression1.html @@ -671,23 +671,23 @@

7.4. Exploring a data set
-
+

Fig. 7.1 Scatter plot of price (USD) versus house size (square feet).#

@@ -800,23 +800,23 @@

7.5. K-nearest neighbors regression
-
+

Fig. 7.2 Scatter plot of price (USD) versus house size (square feet) with vertical line indicating 2,000 square feet on x-axis.#

@@ -985,23 +985,23 @@

7.5. K-nearest neighbors regression
-
+

Fig. 7.3 Scatter plot of price (USD) versus house size (square feet) with lines to 5 nearest neighbors (highlighted in orange).#

@@ -1075,23 +1075,23 @@

7.5. K-nearest neighbors regression
-
+

Fig. 7.4 Scatter plot of price (USD) versus house size (square feet) with predicted price for a 2,000 square-foot house based on 5 nearest neighbors represented as a red dot.#

@@ -1214,23 +1214,23 @@

7.6. Training, evaluating, and tuning th
-
+
-

Fig. 7.5 Scatter plot of price (USD) versus house size (square feet) with example predictions (orange line) and the error in those predictions compared with true response values for three selected observations (vertical lines).#

+

Fig. 7.5 Scatter plot of price (USD) versus house size (square feet) with example predictions (orange line) and the error in those predictions compared with true response values (vertical lines).#

@@ -1605,23 +1605,23 @@

7.6. Training, evaluating, and tuning th
-
+

Fig. 7.6 Effect of the number of neighbors on the RMSPE.#

@@ -1703,23 +1703,23 @@

7.7. Underfitting and overfitting
-
+

Fig. 7.7 Predicted values for house price (represented as a orange line) from K-NN regression models for six different values for \(K\).#

@@ -1912,23 +1912,23 @@

7.8. Evaluating on the test set
-
+

Fig. 7.8 Predicted values of house price (orange line) for the final K-NN regression model.#

@@ -2019,23 +2019,23 @@

7.9. Multivariable K-NN regression
-
+

Fig. 7.9 Scatter plot of the sale price of houses versus the number of bedrooms.#

@@ -2240,9 +2240,9 @@

7.9. Multivariable K-NN regression -

Fig. 8.1 Scatter plot of sale price versus size with line of best fit for subset of the Sacramento housing data.#

@@ -522,23 +522,23 @@

8.3. Simple linear regression
-
+

Fig. 8.2 Scatter plot of sale price versus size with line of best fit and a red dot at the predicted sale price for a 2,000 square-foot home.#

@@ -598,23 +598,23 @@

8.3. Simple linear regression
-
+

Fig. 8.3 Scatter plot of sale price versus size with many possible lines that could be drawn through the data points.#

Simple linear regression chooses the straight line of best fit by choosing the line that minimizes the average squared vertical distance between itself and -each of the observed data points in the training data. Fig. 8.4 illustrates +each of the observed data points in the training data (equivalent to minimizing the RMSE). Fig. 8.4 illustrates these vertical distances as lines. Finally, to assess the predictive accuracy of a simple linear regression model, we use RMSPE—the same measure of predictive performance we used with K-NN regression.

-
+

Fig. 8.4 Scatter plot of sale price versus size with lines denoting the vertical distances between the predicted values and the observed data points.#

@@ -920,23 +920,23 @@

8.4. Linear regression in Python
-
+

Fig. 8.5 Scatter plot of sale price versus size with line of best fit for the full Sacramento housing data.#

@@ -999,23 +999,23 @@

8.5. Comparing simple linear and K-NN re
-
+

Fig. 8.6 Comparison of simple linear regression and K-NN regression.#

@@ -1186,9 +1186,9 @@

8.6. Multivariable linear regression -

Fig. 8.8 Scatter plot of a subset of the data, with outlier highlighted in red.#

@@ -1413,23 +1413,23 @@

8.7.1. Outliers
-
+

Fig. 8.9 Scatter plot of the full data, with outlier highlighted in red.#

@@ -1495,23 +1495,23 @@

8.7.2. Multicollinearity
-
+

Fig. 8.10 Scatter plot of house size (in square feet) measured by person 1 versus house size (in square feet) measured by person 2.#

@@ -1684,23 +1684,23 @@

8.8. Designing new predictors
-
+

Fig. 8.11 Example of a data set with a nonlinear relationship between the predictor and the response.#

@@ -1772,23 +1772,23 @@

8.8. Designing new predictors
-
+

Fig. 8.12 Relationship between the transformed predictor and the response.#

diff --git a/pull317/searchindex.js b/pull317/searchindex.js index 84e9f183..68aaea1c 100644 --- a/pull317/searchindex.js +++ b/pull317/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["acknowledgements", "authors", "classification1", "classification2", "clustering", "index", "inference", "intro", "jupyter", "preface-text", "reading", "regression1", "regression2", "setup", "version-control", "viz", "wrangling"], "filenames": ["acknowledgements.md", "authors.md", "classification1.md", "classification2.md", "clustering.md", "index.md", "inference.md", "intro.md", "jupyter.md", "preface-text.md", "reading.md", "regression1.md", "regression2.md", "setup.md", "version-control.md", "viz.md", "wrangling.md"], "titles": ["Acknowledgments", "About the authors", "5. Classification I: training & predicting", "6. Classification II: evaluation & tuning", "9. Clustering", "Data Science", "10. Statistical inference", "1. Python and Pandas", "11. Combining code and text with Jupyter", "Preface", "2. Reading in data locally and from the web", "7. Regression I: K-nearest neighbors", "8. Regression II: linear regression", "13. Setting up your computer", "12. Collaboration with version control", "4. Effective data visualization", "3. Cleaning and wrangling data"], "terms": {"we": [0, 2, 3, 4, 7, 8, 9, 11, 12, 13, 14, 15, 16], "d": [0, 1, 6, 7, 10, 15], "like": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "thank": 0, "everyon": 0, "ha": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "contribut": [0, 1, 14], "develop": [0, 1, 3, 6, 7, 8, 9, 10, 14], "data": [0, 1, 4, 6, 9, 12, 13, 14], "scienc": [0, 1, 2, 3, 7, 8, 9, 13, 14, 16], "A": [0, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "first": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "introduct": [0, 3, 4, 6, 7, 9, 10, 12], "thi": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15], "an": [0, 1, 2, 6, 7, 8, 9, 11, 12, 13, 14, 16], "open": [0, 1, 5, 7, 8, 10, 13, 14, 15], "sourc": [0, 1, 10, 15], "textbook": [0, 1, 2, 3, 5, 9, 10, 12, 14, 16], "began": [0, 10], "collect": [0, 2, 3, 4, 6, 7, 10, 15, 16], "cours": [0, 1, 3, 4, 6, 7, 8, 9, 10, 12, 16], "read": [0, 2, 3, 5, 6, 7, 8, 9, 11, 12, 14, 15, 16], "dsci": [0, 10, 13], "100": [0, 2, 3, 6, 7, 10, 11, 12, 13, 15, 16], "new": [0, 3, 4, 6, 7, 10, 11, 13, 14, 15, 16], "introductori": [0, 3, 6], "univers": [0, 1, 6, 10], "british": [0, 1, 6, 7, 10], "columbia": [0, 1, 6, 10], "ubc": [0, 1, 10, 13], "sever": [0, 1, 2, 6, 10, 14, 15, 16], "faculti": 0, "member": [0, 2, 14], "depart": [0, 1], "statist": [0, 1, 2, 3, 4, 7, 10, 11, 12, 15], "were": [0, 2, 3, 6, 7, 8, 10, 12, 14, 15, 16], "pivot": 0, "shape": [0, 2, 4, 6, 7, 10, 12, 15, 16], "direct": [0, 2, 10, 15], "greatli": [0, 16], "broad": [0, 15], "structur": [0, 3, 4, 7, 10, 15], "list": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15], "topic": [0, 3, 4, 8, 12, 14], "book": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "would": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "especi": [0, 2, 7, 10, 13, 14, 15], "mat\u00eda": 0, "salib\u00edan": 0, "barrera": 0, "hi": [0, 1], "mentorship": 0, "dure": [0, 1, 3, 7, 11, 14, 16], "initi": [0, 1, 2, 4, 7, 10, 11, 12, 14, 15], "roll": 0, "out": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "both": [0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "door": 0, "wa": [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "alwai": [0, 2, 3, 4, 8, 10, 11, 12, 13, 15, 16], "when": [0, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 16], "need": [0, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "chat": 0, "about": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "how": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "best": [0, 3, 6, 7, 10, 11, 12, 14, 15], "introduc": [0, 6, 7, 12, 14, 15, 16], "teach": [0, 1, 2, 7], "our": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "year": [0, 2, 7, 10, 15, 16], "student": [0, 1, 6], "also": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "gabriela": 0, "cohen": 0, "freue": 0, "her": [0, 1], "561": 0, "regress": [0, 2, 3, 4, 7, 9], "i": [0, 3, 4, 6, 7, 8, 9, 10, 12, 13, 15, 16], "materi": [0, 2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "from": [0, 3, 4, 6, 7, 8, 9, 11, 12, 13, 15], "master": [0, 1], "program": [0, 1, 3, 7, 8, 9, 10, 13, 15], "some": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "linear": [0, 3, 8, 11, 15], "figur": [0, 2, 7, 16], "inspir": [0, 10], "all": [0, 2, 3, 4, 6, 8, 9, 11, 12, 13, 14, 15, 16], "those": [0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "who": [0, 2, 3, 6, 7, 8, 10, 14, 15, 16], "process": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "publish": [0, 10, 15], "In": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "particular": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "review": [0, 10, 14], "feedback": [0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "suggest": [0, 2, 3, 6, 7, 11, 12, 15, 16], "rohan": 0, "alexand": 0, "isabella": 0, "ghement": 0, "virgilio": 0, "g\u00f3mez": 0, "rubio": 0, "albert": [0, 15], "kim": 0, "adam": 0, "loi": 0, "maria": 0, "prokofieva": 0, "emili": 0, "rieder": 0, "greg": [0, 14], "wilson": [0, 7, 14], "The": [0, 1, 6, 7, 10, 13, 14, 15, 16], "improv": [0, 2, 3, 4, 6, 7, 11, 12, 14, 15], "substanti": [0, 3, 11], "insight": [0, 4, 9, 15], "give": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16], "special": [0, 2, 6, 7, 10, 14, 15, 16], "jim": 0, "zidek": 0, "support": [0, 2, 3, 7, 13, 15, 16], "encourag": [0, 16], "throughout": [0, 2, 3, 7, 9, 14, 16], "roger": [0, 7], "peng": [0, 7], "gracious": 0, "offer": [0, 3, 6, 10, 11, 12, 14], "write": [0, 2, 7, 8, 12, 14, 16], "foreword": 0, "final": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "ow": 0, "debt": 0, "gratitud": 0, "over": [0, 1, 2, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15, 16], "past": [0, 2, 3, 4, 10, 11, 12, 13, 14, 15], "few": [0, 2, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15, 16], "thei": [0, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "provid": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "invalu": 0, "worksheet": [0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "found": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "bug": [0, 14, 16], "us": [0, 1, 2, 3, 4, 8, 9, 11, 12, 13, 15], "stood": 0, "veri": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "patient": [0, 2, 3], "class": [0, 2, 3, 7, 10, 15, 16], "while": [0, 2, 3, 4, 7, 9, 10, 12, 15, 16], "frantic": 0, "fix": [0, 2, 3, 6, 8, 11, 14, 15, 16], "brought": 0, "level": [0, 3, 4, 6, 7, 9, 12, 15], "enthusiasm": 0, "sustain": 0, "hard": [0, 7, 10, 15, 16], "work": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 15, 16], "creat": [0, 1, 2, 4, 6, 9, 10, 11, 12, 16], "interact": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "them": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "taught": [0, 2], "learn": [0, 1, 9], "reflect": [0, 1, 15], "content": [0, 1, 2, 5, 10, 14, 16], "translat": [0, 10], "origin": [0, 1, 2, 3, 4, 6, 7, 10, 11, 12, 15, 16], "which": [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "focus": [0, 1, 2, 3, 11], "r": [0, 1, 3, 4, 5, 6, 7, 10, 15], "languag": [0, 1, 2, 3, 6, 8, 9, 10, 11, 13, 16], "ar": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "navya": 0, "dahiya": 0, "gloria": 0, "ye": [0, 2], "complet": [0, 1, 3, 6, 7, 8, 10, 11, 13, 14], "round": [0, 3, 6], "philip": 0, "austin": 0, "leadership": 0, "guidanc": [0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "gratefulli": 0, "educ": [0, 1, 2], "resourc": [0, 1, 2, 11], "fund": 0, "exercis": [0, 9, 13], "version": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16], "tiffani": [1, 5, 7, 15], "timber": [1, 5, 7, 15], "trevor": [1, 3, 4, 5, 12], "campbel": [1, 5], "melissa": [1, 5], "lee": [1, 5, 15], "adapt": [1, 15], "python": [1, 2, 9, 11, 13, 14, 15], "joel": [1, 5], "ostblom": [1, 5], "lindsei": [1, 5], "heagi": [1, 5], "associ": [1, 6, 7, 9, 10, 14, 16], "professor": 1, "co": [1, 15], "director": 1, "vancouv": [1, 6, 10, 15, 16], "option": [1, 2, 8, 10, 12, 13, 14, 15, 16], "role": [1, 7, 10, 15], "she": 1, "curriculum": 1, "around": [1, 3, 6, 7, 11, 12, 15, 16], "respons": [1, 2, 3, 4, 10, 11, 12, 14], "applic": [1, 3, 6, 7, 10, 11, 12, 13, 16], "solv": [1, 2, 3, 4, 7, 9, 12, 14, 16], "real": [1, 2, 3, 6, 7, 10, 11, 12, 14, 16], "world": [1, 6, 7, 9, 14, 15, 16], "problem": [1, 3, 4, 6, 7, 8, 9, 12, 14, 15, 16], "One": [1, 2, 3, 6, 7, 8, 11, 12, 14, 15, 16], "favorit": [1, 12], "graduat": 1, "collabor": [1, 8, 9], "softwar": [1, 2, 9, 10, 13, 14, 15, 16], "packag": [1, 2, 3, 4, 7, 10, 11, 12, 13, 15, 16], "modern": [1, 2, 10, 15], "tool": [1, 2, 3, 4, 7, 8, 9, 10, 12, 15, 16], "workflow": [1, 2, 3, 4, 7, 8, 9, 11, 12], "research": [1, 4, 15], "autom": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "scalabl": 1, "bayesian": 1, "infer": 1, "algorithm": [1, 3, 11, 12, 15], "nonparametr": [1, 2, 11], "stream": 1, "theori": [1, 2, 4, 6, 11], "he": 1, "previous": [1, 2, 6, 7, 10, 11, 15, 16], "postdoctor": 1, "advis": [1, 10, 11, 15], "tamara": 1, "broderick": 1, "comput": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "artifici": 1, "intellig": 1, "laboratori": [1, 4], "csail": 1, "institut": [1, 3, 15], "system": [1, 10, 13, 14], "societi": 1, "idss": 1, "mit": 1, "ph": 1, "candid": [1, 3, 12], "under": [1, 5, 8, 13, 14, 16], "jonathan": 1, "inform": [1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "decis": [1, 2, 3, 6, 14], "lid": 1, "befor": [1, 2, 3, 4, 7, 8, 9, 11, 12, 13, 14, 15, 16], "engin": [1, 10, 12, 13, 15], "toronto": [1, 10, 16], "assist": 1, "undergradu": [1, 6], "center": [1, 4, 6, 10, 12, 15, 16], "approach": [1, 2, 3, 4, 6, 7, 9, 11, 12, 14, 16], "assess": [1, 3, 4, 11, 12, 14, 15], "promot": 1, "equiti": 1, "divers": [1, 6], "inclus": [1, 3, 12, 14], "phd": 1, "passion": 1, "reproduc": [1, 3, 4, 6, 8, 9, 10, 14], "through": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "quantit": [1, 3, 4, 6, 7, 11, 15], "imag": [1, 2, 8, 9, 10, 13, 15], "analysi": [1, 2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16], "pipelin": [1, 3, 4, 11], "studi": [1, 2, 3, 4, 6, 7, 11, 15, 16], "stem": [1, 11], "cell": [1, 2, 3, 4, 7, 10, 12, 16], "development": 1, "biologi": [1, 14], "sinc": [1, 2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "lead": [1, 2, 3, 7, 8, 14, 15], "workshop": [1, 2], "now": [1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "care": [1, 2, 3, 10, 11, 12, 15, 16], "deepli": [1, 16], "spread": [1, 2, 4, 6, 7, 15, 16], "literaci": 1, "excit": [1, 7], "programmat": [1, 3, 10], "project": [1, 2, 8, 10, 15], "earth": [1, 15], "ocean": 1, "atmospher": [1, 15], "geophys": 1, "invers": 1, "facil": [1, 14], "combin": [1, 2, 3, 4, 9, 10, 12, 14, 15], "method": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "numer": [1, 2, 6, 10, 11, 12, 15, 16], "simul": [1, 2, 6, 15], "machin": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "answer": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 15, 16], "question": [1, 2, 3, 4, 6, 9, 11, 12, 15, 16], "subsurfac": 1, "primari": [1, 2, 7, 14, 15, 16], "includ": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "miner": 1, "explor": [1, 3, 4, 6, 10, 12, 14, 15, 16], "carbon": [1, 15], "sequestr": 1, "groundwat": 1, "environment": [1, 4], "bsc": 1, "alberta": [1, 10, 16], "held": [1, 3], "posit": [1, 3, 4, 7, 11, 15], "california": [1, 11], "berkelei": 1, "prior": [1, 2, 10, 14], "start": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "current": [1, 2, 7, 8, 10, 12, 14, 15, 16], "previou": [2, 3, 4, 6, 7, 8, 10, 11, 12, 15, 16], "sole": [2, 8], "descript": [2, 6, 7, 8, 9, 10, 14, 15, 16], "exploratori": [2, 3, 4, 7, 9, 11, 15], "next": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "serv": [2, 3, 6, 8, 12, 14], "forai": [2, 11], "focu": [2, 3, 4, 6, 7, 8, 11, 12, 14, 15, 16], "e": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "one": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15], "valu": [2, 4, 6, 8, 11, 12, 15], "categor": [2, 3, 4, 6, 7, 11, 15, 16], "interest": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "cover": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "basic": [2, 3, 7, 10, 12, 14, 15], "make": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16], "suitabl": [2, 16], "classifi": 2, "accur": [2, 3, 6, 11, 12, 15], "well": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "where": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "possibl": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "maxim": [2, 3, 11], "accuraci": [2, 3, 6, 11, 12], "By": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "end": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "reader": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "abl": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "do": [2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15], "follow": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "recogn": [2, 8, 10, 11, 14, 16], "situat": [2, 3, 4, 7, 11, 14, 15, 16], "appropri": [2, 3, 4, 7, 10, 11, 13, 15, 16], "what": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15], "interpret": [2, 3, 4, 7, 8, 10, 11, 12, 14, 15, 16], "output": [2, 3, 4, 7, 8, 10, 11, 12, 15, 16], "hand": [2, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15, 16], "straight": [2, 4, 11, 12, 15], "line": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 16], "euclidean": [2, 4], "graph": 2, "predictor": [2, 4, 11], "explain": [2, 4, 6, 7, 11, 12], "perform": [2, 4, 6, 7, 8, 9, 10, 11, 12, 15], "imput": 2, "step": [2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16], "model": [2, 3, 4, 12, 14, 15], "make_pipelin": [2, 3, 4, 11], "mani": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "want": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "base": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 15], "experi": [2, 15], "For": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "instanc": [2, 3, 6, 7, 10, 16], "doctor": [2, 3], "mai": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "diagnos": [2, 3], "either": [2, 3, 4, 7, 8, 9, 11, 12, 14, 16], "diseas": 2, "healthi": 2, "symptom": 2, "s": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "email": [2, 10, 14], "might": [2, 3, 4, 6, 7, 8, 10, 11, 12, 15, 16], "tag": [2, 10, 13], "given": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "spam": 2, "text": [2, 3, 6, 7, 9, 11, 12, 13, 14, 16], "credit": 2, "card": 2, "compani": 2, "whether": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15], "purchas": [2, 4, 11, 12], "fraudul": 2, "item": [2, 4, 7, 8, 10, 11, 14, 15, 16], "amount": [2, 3, 4, 8, 10, 11, 12, 15], "locat": [2, 10, 14, 15], "These": [2, 3, 4, 7, 8, 10, 12, 14, 15], "task": [2, 4, 6, 9, 11, 15, 16], "exampl": [2, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "sometim": [2, 3, 6, 7, 10, 11, 12, 13, 15, 16], "call": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "label": [2, 4, 7, 11, 15, 16], "other": [2, 3, 4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16], "featur": [2, 3, 8, 11, 12, 14, 15], "gener": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16], "assign": [2, 4, 6, 7, 10, 15, 16], "without": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "known": [2, 3, 7, 10, 12, 15], "g": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "basi": [2, 10, 15], "similar": [2, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "know": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "name": [2, 3, 4, 6, 8, 11, 12, 13, 14, 15], "come": [2, 4, 6, 7, 11, 12, 13, 15, 16], "fact": [2, 3, 6, 7, 8, 10, 12, 14], "onc": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "can": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "There": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "could": [2, 3, 4, 6, 7, 8, 10, 11, 12, 15, 16], "wide": [2, 3, 4, 10, 12, 14, 15], "hart": [2, 11], "1967": [2, 3, 11], "hodg": [2, 11], "1951": [2, 11], "your": [2, 3, 4, 6, 7, 9, 10, 11, 12, 15, 16], "futur": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15], "you": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "encount": [2, 4, 10, 11, 12, 13, 16], "tree": [2, 3, 11], "vector": [2, 3, 10, 15], "svm": 2, "logist": [2, 3, 12], "neural": 2, "network": [2, 10], "see": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "addit": [2, 7, 11], "section": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "begin": [2, 3, 4, 6, 7, 10, 11, 14, 15, 16], "It": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "worth": [2, 3, 15, 16], "mention": [2, 3, 4, 6, 8, 10, 12, 13, 14, 16], "variat": [2, 6, 11, 15], "binari": [2, 3], "onli": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "involv": [2, 3, 4, 8, 10, 12, 13, 14, 15, 16], "diagnosi": [2, 3], "run": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "multiclass": 2, "categori": [2, 3, 4, 6, 7, 10, 15, 16], "bronchiti": 2, "pneumonia": 2, "common": [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16], "cold": 2, "digit": 2, "breast": [2, 3], "dr": [2, 4, 15], "william": [2, 3, 4], "h": [2, 10], "wolberg": [2, 3], "w": [2, 7, 10], "nick": [2, 3, 7], "street": [2, 3], "olvi": [2, 3], "l": [2, 10], "mangasarian": [2, 3], "et": [2, 3, 4, 6, 10, 12, 14, 15], "al": [2, 3, 4, 6, 10, 12, 14, 15], "1993": [2, 3], "row": [2, 3, 4, 6, 8, 11, 12, 14, 15], "repres": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "tumor": [2, 7], "sampl": [2, 3, 11], "benign": [2, 3, 7, 11], "malign": [2, 3, 7, 11], "measur": [2, 3, 6, 7, 11, 12, 15, 16], "nucleu": 2, "textur": [2, 3], "perimet": [2, 3, 7], "area": [2, 3, 7, 10, 11, 12, 14, 15, 16], "conduct": [2, 10], "physician": 2, "As": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "analys": [2, 3, 4, 7, 8, 9, 10, 14, 15, 16], "formul": [2, 6, 7, 11, 15], "precis": [2, 3, 6, 8, 11, 13, 14, 16], "here": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "avail": [2, 3, 7, 9, 10, 12, 13, 15], "unknown": [2, 6, 7], "show": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "import": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "becaus": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "tradit": 2, "non": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "driven": [2, 4], "quit": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "subject": [2, 8, 14, 15], "depend": [2, 3, 4, 6, 8, 10, 11, 12, 15, 16], "upon": [2, 3, 10], "skill": [2, 8, 10, 15], "experienc": 2, "furthermor": [2, 3, 15], "normal": [2, 3, 6, 14, 16], "danger": [2, 13], "stai": [2, 6, 10, 15], "same": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "place": [2, 3, 4, 6, 7, 8, 10, 11, 13, 14, 15, 16], "stop": [2, 3, 4, 8, 12, 13], "grow": [2, 3, 12], "get": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "larg": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15], "contrast": [2, 3, 4, 6, 7, 10, 11, 12, 14], "invad": 2, "surround": [2, 7, 10, 14, 15], "tissu": 2, "nearbi": [2, 3, 10], "organ": [2, 7, 8, 10, 14, 15, 16], "caus": [2, 3, 4, 7, 8, 11, 12, 15, 16], "seriou": [2, 7, 14], "damag": [2, 3], "stanford": 2, "health": 2, "2021": [2, 7, 10], "thu": [2, 3, 8, 10, 11, 12, 14, 16], "quickli": [2, 3, 7, 12, 15], "type": [2, 3, 4, 6, 8, 10, 11, 12, 13, 14, 15], "guid": [2, 7, 11, 14, 15], "treatment": [2, 3, 16], "wrangl": [2, 3, 7, 9, 10, 12, 15], "visual": [2, 3, 4, 6, 8, 9, 11, 12, 14, 16], "order": [2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16], "better": [2, 3, 4, 11, 12, 15], "understand": [2, 3, 4, 6, 7, 9, 10, 12, 14, 15, 16], "panda": [2, 3, 4, 6, 8, 10, 11, 12, 15, 16], "altair": [2, 3, 4, 8, 11, 12], "pd": [2, 3, 4, 6, 7, 10, 11, 12, 15, 16], "alt": [2, 3, 4, 6, 7, 11, 12, 15], "case": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16], "file": [2, 7, 13, 16], "contain": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "csv": [2, 3, 4, 6, 7, 11, 12, 15, 16], "header": [2, 7, 8, 14, 16], "ll": [2, 3, 6, 7, 10, 11, 13, 14, 15, 16], "read_csv": [2, 3, 4, 6, 7, 8, 11, 12, 15, 16], "function": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "argument": [2, 3, 4, 6, 7, 8, 11, 15, 16], "inspect": [2, 7, 10, 15, 16], "wdbc": 2, "id": [2, 3, 6, 15], "radiu": [2, 3], "smooth": [2, 3, 11, 15], "compact": [2, 3], "concav": [2, 3], "concave_point": [2, 3], "symmetri": [2, 3], "fractal_dimens": [2, 3], "0": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16], "842302": 2, "m": [2, 3, 7, 10, 15], "1": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "096100": 2, "2": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 16], "071512": 2, "268817": 2, "983510": 2, "567087": 2, "3": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "280628": 2, "650542": 2, "530249": 2, "215566": 2, "253764": 2, "842517": 2, "828212": 2, "353322": 2, "684473": 2, "907030": 2, "826235": 2, "486643": 2, "023825": 2, "547662": 2, "001391": 2, "867889": 2, "84300903": 2, "578499": 2, "455786": 2, "565126": 2, "557513": 2, "941382": 2, "052000": 2, "362280": 2, "035440": 2, "938859": 2, "397658": 2, "84348301": 2, "768233": 2, "253509": 2, "592166": 2, "763792": 2, "280667": 2, "399917": 2, "914213": 2, "450431": 2, "864862": 2, "4": [2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "906602": 2, "84358402": 2, "748758": 2, "150804": 2, "775011": 2, "824624": 2, "280125": 2, "538866": 2, "369806": 2, "427237": 2, "009552": 2, "561956": 2, "564": [2, 3], "926424": 2, "109139": 2, "720838": 2, "058974": 2, "341795": 2, "040926": 2, "218868": 2, "945573": 2, "318924": 2, "312314": 2, "930209": 2, "565": [2, 3], "926682": 2, "703356": 2, "083301": 2, "614511": 2, "722326": 2, "102368": 2, "017817": 2, "692434": 2, "262558": 2, "217473": 2, "057681": 2, "566": [2, 3], "926954": 2, "701667": 2, "043775": 2, "672084": 2, "577445": 2, "839745": 2, "038646": 2, "046547": 2, "105684": 2, "808406": 2, "894800": 2, "567": [2, 3], "927241": 2, "836725": 2, "334403": 2, "980781": 2, "733693": 2, "524426": 2, "269267": 2, "294046": 2, "656528": 2, "135315": 2, "042778": 2, "568": [2, 3], "92751": 2, "b": [2, 3], "806811": 2, "220718": 2, "812793": 2, "346604": 2, "109349": 2, "149741": 2, "113893": 2, "260710": 2, "819349": 2, "560539": 2, "569": [2, 3], "12": [2, 3, 4, 6, 7, 8, 10, 12, 13, 14, 15, 16], "column": [2, 3, 4, 6, 8, 11, 12, 13, 15], "biopsi": [2, 3], "remov": [2, 3, 7, 13, 14, 15], "bodi": [2, 14], "examin": [2, 3, 4, 10, 11], "presenc": [2, 3], "tradition": 2, "procedur": [2, 3, 4, 11], "invas": 2, "fine": [2, 3, 8, 14, 15, 16], "needl": 2, "aspir": 2, "present": [2, 3, 6, 7, 10, 14, 15, 16], "extract": [2, 3, 4, 7, 10, 11, 12], "small": [2, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15], "less": [2, 3, 4, 6, 10, 11, 12, 14, 15, 16], "ten": [2, 6, 7, 15], "differ": [2, 3, 4, 6, 7, 11, 12, 13, 14, 16], "below": [2, 3, 6, 7, 10, 12, 14, 15], "mean": [2, 3, 7, 8, 10, 11, 12, 14, 15, 16], "across": [2, 3, 6, 7, 10, 11, 12, 14, 15], "nuclei": 2, "record": [2, 3, 6, 7, 10, 14, 15, 16], "part": [2, 3, 4, 7, 8, 9, 10, 11, 12, 14, 15, 16], "prepar": [2, 3, 15], "have": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "been": [2, 3, 8, 10, 11, 12, 14, 15, 16], "standard": [2, 3, 4, 6, 8, 11, 12, 14, 15, 16], "discuss": [2, 3, 6, 10, 11, 12, 13, 14, 15, 16], "why": [2, 3, 7, 11, 15, 16], "later": [2, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16], "addition": [2, 3, 4, 6, 8, 10, 12, 14, 16], "uniqu": [2, 3, 7, 14], "therefor": [2, 3, 4, 6, 7, 8, 10, 11, 12, 15, 16], "total": [2, 3, 4, 7, 10, 11, 15, 16], "per": [2, 6, 10, 14, 15, 16], "identif": 2, "number": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "deviat": [2, 3, 4, 6, 16], "grai": [2, 14, 16], "length": [2, 4, 6, 7, 15, 16], "contour": 2, "insid": [2, 3, 6, 7, 8, 10, 13, 14, 15, 16], "local": [2, 11, 13], "ratio": [2, 16], "squar": [2, 3, 4, 7, 8, 10, 11, 12, 15, 16], "portion": [2, 10], "mirror": 2, "fractal": 2, "dimens": 2, "rough": [2, 4, 15], "info": [2, 3, 7, 15, 16], "preview": [2, 3, 4, 6, 7, 8, 9, 11, 12, 14, 15, 16], "frame": [2, 3, 4, 6, 8, 10, 11, 12, 15], "easier": [2, 3, 6, 7, 10, 11, 12, 13, 14, 15, 16], "lot": [2, 3, 4, 7, 10, 12, 15, 16], "print": [2, 3, 6, 7, 8, 10, 12, 13, 15, 16], "down": [2, 8, 10, 13, 14, 16], "page": [2, 3, 4, 5, 8, 10, 12, 13, 14], "instead": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "entri": [2, 3, 6, 7, 10, 15, 16], "core": [2, 3, 7, 15, 16], "datafram": [2, 3, 4, 6, 10, 11, 12, 15, 16], "rangeindex": [2, 3, 15, 16], "null": [2, 3, 15, 16], "count": [2, 3, 6, 7, 10, 15, 16], "dtype": [2, 3, 6, 15, 16], "int64": [2, 3, 10, 15, 16], "float64": [2, 3, 6, 10, 15, 16], "6": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "7": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "8": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "9": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "10": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "11": [2, 3, 4, 6, 7, 8, 10, 12, 13, 14, 15, 16], "memori": [2, 3, 10, 15, 16], "usag": [2, 3, 7, 10, 12, 15, 16], "53": [2, 3, 6, 12, 14], "kb": [2, 3, 15, 16], "abov": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 15], "arrai": [2, 3, 4, 11, 12], "readabl": [2, 3, 7, 10, 11, 14, 15, 16], "renam": [2, 3, 6, 7, 8, 10, 11, 16], "replac": [2, 3, 6, 7, 10, 12, 13, 14, 15], "take": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "dictionari": [2, 3, 10, 16], "map": [2, 4, 7, 10, 11, 12, 15], "desir": [2, 3, 7, 10, 11, 14, 16], "verifi": [2, 6, 13], "result": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "ani": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "let": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "groupbi": [2, 6], "size": [2, 3, 4, 6, 10, 11, 12, 16], "find": [2, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "percentag": [2, 3, 6, 7, 15], "pair": [2, 3, 4, 10, 16], "Then": [2, 3, 4, 6, 7, 8, 11, 12, 13, 14, 15, 16], "calcul": [2, 3, 4, 11, 12], "group": [2, 3, 4, 6, 7, 13, 15], "divid": [2, 3, 7, 10, 15, 16], "multipli": [2, 7, 15], "equal": [2, 3, 4, 6, 11, 12, 16], "access": [2, 3, 4, 6, 11, 13, 15, 16], "via": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15], "attribut": [2, 3, 4, 5, 7, 10, 11], "357": [2, 3], "63": [2, 3, 10, 11], "212": [2, 4, 7, 10, 15, 16], "37": [2, 3, 4, 14, 15], "62": [2, 10, 11, 15], "741652": 2, "258348": 2, "conveni": [2, 3, 7, 10, 16], "value_count": [2, 3, 6, 16], "occurr": [2, 15], "If": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "pass": [2, 3, 7, 10, 11, 15, 16], "seri": [2, 3, 6, 11, 12], "occur": [2, 3, 4, 6, 8, 11, 12, 14, 15, 16], "true": [2, 3, 4, 6, 7, 8, 11, 15, 16], "fraction": [2, 3, 6, 11, 14, 15], "627417": 2, "372583": 2, "proport": [2, 3, 7, 15, 16], "draw": [2, 6, 7, 11, 12, 15], "color": [2, 3, 4, 10, 11, 12, 16], "scatter": [2, 3, 4, 11, 12], "plot": [2, 3, 4, 6, 11, 12, 16], "relationship": [2, 3, 4, 6, 7, 11, 12, 15, 16], "recal": [2, 3, 4, 6, 7, 11, 12, 14, 15, 16], "default": [2, 3, 7, 8, 10, 11, 13, 14, 15, 16], "palett": 2, "colorblind": [2, 15], "friendli": [2, 15], "so": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "stick": [2, 3, 10, 14], "perim_concav": [2, 3], "chart": [2, 3, 4, 6, 11, 12], "mark_circl": [2, 3, 4, 11, 12, 15], "encod": [2, 3, 4, 6, 7, 10, 11, 12, 15], "x": [2, 3, 4, 6, 7, 10, 11, 12, 13, 15], "titl": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15], "y": [2, 3, 4, 6, 7, 8, 10, 11, 12, 15], "versu": [2, 3, 4, 6, 7, 10, 11, 12, 16], "fig": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "typic": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "fall": [2, 3, 6, 11, 14, 15], "upper": [2, 6, 14, 15], "right": [2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16], "corner": [2, 3, 13, 14, 15], "lower": [2, 3, 6, 8, 12, 15], "left": [2, 3, 4, 5, 7, 8, 10, 12, 13, 14, 15, 16], "word": [2, 3, 6, 7, 8, 10, 11, 12, 14, 16], "tend": [2, 3, 11, 14, 15], "ones": [2, 15, 16], "larger": [2, 3, 4, 6, 10, 11, 12, 14, 15], "suppos": [2, 4, 6, 7, 8, 10, 11, 14, 16], "obtain": [2, 3, 4, 6, 7, 11, 12, 14, 15, 16], "except": [2, 10, 12, 14, 16], "sai": [2, 3, 6, 8, 10, 11, 12, 13, 15, 16], "respect": [2, 3, 4, 6, 10, 14, 15, 16], "lie": 2, "middl": [2, 6, 10], "orang": [2, 4, 11, 12], "cloud": [2, 10, 14, 15], "probabl": [2, 3, 6, 10, 12], "seem": [2, 3, 6, 8, 10, 11, 12, 15, 16], "actual": [2, 3, 4, 6, 7, 10, 11, 12, 14, 16], "practic": [2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "To": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "most": [2, 3, 4, 6, 7, 8, 10, 14, 15, 16], "must": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "choos": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 16], "advanc": [2, 3, 4, 6, 8, 12, 13, 14, 15, 16], "assum": [2, 6, 8], "someon": [2, 3, 7, 8, 14], "chosen": [2, 3, 4, 12, 16], "ourselv": [2, 3, 11], "illustr": [2, 3, 6, 11, 12, 15, 16], "concept": [2, 6, 7, 9, 11, 12, 14, 15], "walk": [2, 7, 11, 14], "whose": [2, 8, 10, 14, 16], "depict": [2, 4], "red": [2, 4, 8, 10, 11, 12, 13, 14], "diamond": 2, "coordin": [2, 4, 7, 15], "idea": [2, 3, 6, 7, 8, 10, 12, 13, 14, 15, 16], "close": [2, 3, 4, 6, 7, 10, 11, 14, 15], "anoth": [2, 3, 6, 7, 8, 10, 11, 12, 14, 15, 16], "expect": [2, 3, 4, 6, 7, 8, 10, 11, 12, 16], "look": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "doe": [2, 3, 4, 6, 7, 10, 11, 12, 15], "consid": [2, 3, 4, 6, 7, 11, 12, 14, 15, 16], "closest": [2, 3, 10, 15], "among": [2, 10, 14, 16], "major": [2, 3, 4, 7, 11, 12, 15, 16], "shown": [2, 3, 4, 7, 8, 10, 11, 12, 14, 15, 16], "vote": [2, 3, 7], "three": [2, 3, 4, 6, 7, 8, 9, 10, 11, 14, 15, 16], "chose": [2, 3, 15], "noth": [2, 6, 7, 12], "though": [2, 3, 6, 7, 10, 11, 12, 14, 15, 16], "odd": [2, 10], "avoid": [2, 3, 12, 15], "ti": [2, 10], "decid": [2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "often": [2, 3, 4, 6, 7, 8, 10, 11, 14, 15, 16], "just": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "denot": [2, 4, 7, 10, 11, 12, 15, 16], "a_x": 2, "a_i": 2, "b_x": 2, "b_y": 2, "definit": [2, 10, 15, 16], "plane": [2, 12], "formula": [2, 3, 4, 11, 12, 15], "mathrm": [2, 3], "sqrt": [2, 3, 11, 14], "select": [2, 4, 6, 8, 10, 11, 12, 13, 14, 15], "correspond": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "smallest": [2, 7, 11, 15, 16], "code": [2, 3, 7, 9, 10, 11, 13, 14, 15, 16], "add": [2, 3, 4, 6, 7, 10, 11, 13, 15, 16], "root": [2, 3, 10, 11, 14], "nsmallest": [2, 11, 15], "new_obs_perimet": 2, "new_obs_concav": 2, "dist_from_new": 2, "112": 2, "241202": 2, "653051": 2, "880626": 2, "258": 2, "750277": 2, "870061": 2, "979663": 2, "351": 2, "622700": 2, "541410": 2, "143088": 2, "430": 2, "416930": 2, "314364": 2, "256806": 2, "152": 2, "160091": 2, "039155": 2, "279258": 2, "tabl": [2, 3, 5, 7, 8, 10, 13, 15, 16], "mathemat": [2, 3, 6, 11, 12, 15], "detail": [2, 3, 4, 7, 8, 10, 12, 13, 14, 15, 16], "24": [2, 14, 15], "65": [2, 3, 6, 10, 11, 16], "88": [2, 3], "75": [2, 3, 6, 7, 10, 11, 12, 15, 16], "87": [2, 3, 11], "98": [2, 7, 12, 15], "54": [2, 3, 14, 15, 16], "14": [2, 3, 4, 6, 8, 10, 14, 15, 16], "42": [2, 6, 14, 15, 16], "31": [2, 3, 14, 15, 16], "26": [2, 3, 14, 15], "16": [2, 3, 4, 6, 10, 14, 15], "04": [2, 10, 13, 15, 16], "28": [2, 4, 11, 12, 14, 15], "circl": [2, 8, 14, 15], "although": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "toward": [2, 6, 7, 14], "exactli": [2, 3, 6, 7, 10, 11, 12, 13, 15], "appli": [2, 3, 7, 11, 12, 15], "higher": [2, 3, 6, 7, 11, 12, 15, 16], "help": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "a_": 2, "dot": [2, 7, 10, 11, 12, 15], "b_": 2, "becom": [2, 3, 4, 6, 7, 8, 11, 12, 14, 16], "still": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15], "space": [2, 8, 10, 11, 12, 13, 15], "417": [2, 15], "837": 2, "had": [2, 3, 6, 7, 10, 11, 15, 16], "ad": [2, 3, 4, 10, 11, 12, 14], "up": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15], "took": [2, 6, 7], "27": [2, 7, 11, 14, 15], "new_obs_symmetri": 2, "836722": 2, "267368": 2, "400": [2, 11, 16], "334664": 2, "886368": 2, "099359": 2, "472326": 2, "562": 2, "470430": 2, "084810": 2, "154075": 2, "499268": 2, "68": 2, "365450": 2, "812359": 2, "092064": 2, "531594": 2, "055065": 2, "555575": 2, "dimension": 2, "five": [2, 3, 13, 15, 16], "3d": [2, 11, 12], "note": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "recommend": [2, 7, 8, 9, 11, 12, 13, 14, 16], "against": [2, 8, 11, 12], "purpos": [2, 3, 4, 6, 10, 11, 12, 14, 15, 16], "complic": [2, 7, 10, 11, 15], "handl": [2, 3, 7, 15], "multipl": [2, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15], "thankfulli": [2, 4], "implement": [2, 3, 4, 12, 15], "buitinck": 2, "2013": [2, 3, 4, 12], "along": [2, 3, 6, 7, 10, 11, 13, 14, 15], "sklearn": [2, 3, 4, 11, 12], "keep": [2, 3, 6, 7, 10, 13, 14, 15, 16], "simpl": [2, 3, 4, 6, 10, 11, 13, 15, 16], "fewer": [2, 3], "mistak": [2, 3, 11, 15], "tell": [2, 3, 6, 7, 8, 10, 11, 12, 14, 15, 16], "prefer": [2, 3, 4, 10, 12, 15, 16], "regular": [2, 10, 11, 14, 15, 16], "set_config": [2, 3, 4, 11, 12], "notic": [2, 3, 6, 7, 10, 12, 15, 16], "wai": [2, 3, 4, 6, 7, 8, 9, 10, 13, 14, 15, 16], "prefix": 2, "extens": [2, 8, 10, 12, 13, 14, 15], "subsequ": [2, 7, 15], "long": [2, 3, 4, 6, 7, 8, 10, 12, 14, 15], "clutter": [2, 15], "kneighborsclassifi": [2, 3], "38": [2, 4, 11, 14, 15], "charact": [2, 7, 8, 10, 14, 15, 16], "transform_output": [2, 3, 4, 11, 12], "modul": 2, "build": [2, 3, 11, 15], "pick": [2, 3, 4, 10, 12, 14, 15], "store": [2, 3, 4, 6, 7, 8, 10, 13, 14, 15, 16], "cancer_train": [2, 3], "specifi": [2, 3, 6, 7, 8, 10, 11, 12, 13, 15, 16], "weight": 2, "control": [2, 3, 8, 9, 10, 13], "uniform": [2, 3, 10], "choic": [2, 3, 4, 6, 11, 14, 15, 16], "weigh": [2, 7], "websit": [2, 3, 5, 10, 12, 14], "knn": [2, 3], "n_neighbor": [2, 3, 11], "jupyt": [2, 3, 4, 7, 9, 12, 13], "environ": [2, 3, 4, 7, 8, 12, 13, 14], "pleas": [2, 3, 4, 5, 7, 8, 12], "rerun": [2, 3, 4, 12], "html": [2, 3, 4, 12, 15, 16], "represent": [2, 3, 4, 10, 12], "trust": [2, 3, 4, 6, 12], "notebook": [2, 3, 4, 12, 13, 14], "On": [2, 3, 4, 7, 10, 11, 12, 14, 15, 16], "github": [2, 3, 4, 7, 10, 12, 15], "unabl": [2, 3, 4, 10, 12, 14], "render": [2, 3, 4, 8, 12, 14, 15], "try": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16], "nbviewer": [2, 3, 4, 12], "org": [2, 3, 4, 6, 7, 10, 12, 15, 16], "kneighborsclassifierkneighborsclassifi": [2, 3], "fit": [2, 3, 4, 11, 12, 15], "much": [2, 3, 4, 6, 7, 10, 11, 12, 15, 16], "outsid": [2, 3, 6, 8, 11, 12, 14, 15], "heavi": 2, "lift": 2, "modifi": [2, 3, 14], "after": [2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16], "itself": [2, 3, 6, 10, 12, 15, 16], "ran": 2, "manual": [2, 3, 4, 6, 8, 10, 11, 13, 16], "time": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 16], "new_ob": 2, "Is": [2, 4, 7, 11, 15, 16], "don": [2, 3, 4, 6, 7, 8, 10, 11, 14, 15, 16], "t": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "necessarili": [2, 3, 7, 16], "correct": [2, 3, 7, 13, 14, 15, 16], "quantifi": [2, 3, 12], "think": [2, 3, 7, 8, 10, 12, 16], "rang": [2, 3, 4, 10, 11, 12, 15, 16], "matter": [2, 11, 15, 16], "identifi": [2, 3, 4, 7, 9, 10, 11, 14, 15], "effect": [2, 4, 6, 7, 11, 12, 13, 16], "But": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "doesn": [2, 3, 8, 10, 15, 16], "salari": 2, "dollar": [2, 6, 10, 11, 12], "job": [2, 10, 15], "1000": [2, 3, 6, 15], "huge": [2, 10], "compar": [2, 3, 6, 7, 10, 11, 14, 15, 16], "conceptu": [2, 14], "opposit": 2, "yearli": 2, "temperatur": 2, "degre": 2, "kelvin": 2, "celsiu": 2, "constant": [2, 12, 15], "shift": [2, 7, 8], "273": [2, 16], "even": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "likewis": [2, 16], "hypothet": 2, "thousand": [2, 3, 10, 15], "singl": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "affect": [2, 3, 7, 8, 11, 12, 15], "chang": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 15, 16], "outcom": [2, 7], "averag": [2, 3, 6, 7, 10, 11, 12, 16], "central": 2, "subtract": [2, 3, 7], "said": [2, 3], "unstandard": [2, 4], "wisconsin": 2, "until": [2, 3, 4, 6, 7, 8, 10, 13, 14, 15, 16], "did": [2, 3, 6, 7, 9, 10, 11, 12, 14, 15, 16], "earlier": [2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16], "thing": [2, 3, 6, 8, 10, 13, 14, 15, 16], "unscaled_canc": 2, "wdbc_unscal": [2, 3], "1001": 2, "11840": [2, 3], "1326": 2, "08474": [2, 3], "1203": 2, "10960": [2, 3], "386": 2, "14250": [2, 3], "1297": 2, "10030": [2, 3], "1479": 2, "11100": [2, 3], "1261": 2, "09780": [2, 3], "858": 2, "08455": [2, 3], "1265": 2, "11780": [2, 3], "181": [2, 4], "05263": [2, 3], "unscal": 2, "uncent": 2, "Will": 2, "framework": [2, 12], "preprocessor": [2, 3, 4, 11], "manipul": [2, 10, 16], "standardscal": [2, 3, 4, 11], "transform": [2, 3, 4, 7, 11, 12, 16], "wrap": [2, 3, 4, 11], "columntransform": [2, 3, 4], "make_column_transform": [2, 3, 4, 11], "enabl": [2, 8, 10, 13, 14, 15, 16], "handi": [2, 7, 16], "sequenc": [2, 3, 7, 10, 13, 15], "compos": [2, 3, 4, 7, 11], "x27": [2, 3, 4], "columntransformercolumntransform": [2, 3, 4], "standardscalerstandardscal": [2, 3, 4], "individu": [2, 3, 6, 7, 12, 14, 15], "difficult": [2, 3, 4, 7, 8, 10, 12, 15, 16], "rather": [2, 3, 6, 7, 8, 10, 11, 14, 15, 16], "make_column_selector": [2, 3], "dtype_includ": [2, 3], "equival": [2, 7, 10, 12, 16], "lt": 2, "_column_transform": 2, "0x7fbde79fae50": 2, "gt": 2, "readi": [2, 3, 7, 8, 10, 13, 14], "happen": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "necessari": [2, 4, 11, 13, 15], "bit": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 15, 16], "unnecessari": 2, "howev": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "quantiti": [2, 3, 6, 15, 16], "scaled_canc": 2, "standardscaler__area": 2, "standardscaler__smooth": 2, "984375": 2, "568466": 2, "908708": 2, "826962": 2, "558884": 2, "942210": 2, "764464": 2, "283553": 2, "826229": 2, "280372": 2, "343856": 2, "041842": 2, "723842": 2, "102458": 2, "577953": 2, "840484": 2, "735218": 2, "525767": 2, "347789": 2, "112085": 2, "woohoo": 2, "input": [2, 3, 4, 7, 10, 11, 14, 16], "behavior": [2, 4, 11, 15, 16], "drop": [2, 3, 8, 13, 14, 15, 16], "remain": [2, 3, 4, 7, 13], "rest": [2, 3, 7, 12, 16], "remaind": [2, 3, 7, 10, 11, 16], "passthrough": 2, "separ": [2, 3, 4, 7, 8, 14, 15], "underscor": [2, 7, 8, 14, 16], "again": [2, 3, 6, 7, 8, 10, 11, 12, 13, 15, 16], "preserv": [2, 3], "verbose_feature_names_out": [2, 4], "fals": [2, 3, 4, 7, 10, 11, 12, 15, 16], "should": [2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 15, 16], "leav": [2, 4, 12], "preprocessor_keep_al": 2, "scaled_cancer_al": 2, "wonder": [2, 6, 10], "technic": [2, 3, 7, 8, 11, 13, 14, 15, 16], "error": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "prone": [2, 3, 10, 16], "accident": [2, 3, 8, 10, 14, 15, 16], "forget": [2, 4, 14], "proper": 2, "free": [2, 3, 12, 14], "requir": [2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16], "yourself": [2, 4, 7, 10, 12, 14], "further": [2, 3, 4, 6, 7, 8, 10, 12, 15, 16], "automat": [2, 3, 4, 10, 11, 14, 15], "streamlin": 2, "effort": [2, 8, 10, 14], "side": [2, 5, 6, 7, 13, 14, 15], "annot": [2, 4, 15], "within": [2, 4, 6, 7, 8, 10, 12, 13, 14, 15, 16], "nearli": [2, 4, 12, 16], "vertic": [2, 6, 7, 11, 12, 15, 16], "align": [2, 10, 15], "black": [2, 4, 10, 11, 15], "region": [2, 3, 10, 11, 16], "domin": 2, "intuit": [2, 3, 11, 15, 16], "reason": [2, 3, 4, 6, 7, 10, 11, 12, 15, 16], "carefulli": [2, 4, 7, 10, 16], "domain": [2, 7, 10, 15], "comparison": [2, 6, 12, 15, 16], "potenti": [2, 3, 4, 11, 12, 16], "issu": [2, 7, 8, 10, 12, 13, 15, 16], "imbal": 2, "overal": [2, 3, 7, 11, 15], "pattern": [2, 3, 4, 6, 7, 10, 11, 12, 15, 16], "otherwis": [2, 3, 4, 6, 7, 15], "rare": [2, 4, 15], "malici": 2, "detect": [2, 4], "rarer": 2, "unimport": 2, "revisit": [2, 3, 10, 12, 16], "head": [2, 8, 10, 13, 14, 15], "top": [2, 3, 5, 7, 8, 10, 11, 12, 13, 15, 16], "n": [2, 3, 4, 6, 7, 10, 11, 15, 16], "concat": [2, 6], "glue": 2, "filter": [2, 6, 10, 15], "back": [2, 3, 6, 8, 10, 11, 12, 13, 14, 15, 16], "concaten": [2, 6], "axi": [2, 7, 11, 12, 14, 16], "yield": [2, 3], "taller": 2, "horizont": [2, 7, 15], "produc": [2, 3, 7, 8, 12, 15, 16], "wider": [2, 6, 7, 16], "imbalanc": [2, 3], "rare_canc": 2, "rare_plot": 2, "With": [2, 4, 7, 10, 15, 16], "least": [2, 3, 4, 6, 8, 15], "win": 2, "highlight": [2, 4, 6, 8, 10, 11, 12, 13, 14, 16], "13": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "background": [2, 3, 6, 10, 12, 15], "blue": [2, 4, 8, 11, 14, 16], "indic": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "despit": [2, 3, 10, 15], "simplic": [2, 3, 14], "sound": [2, 3, 8], "manner": [2, 8, 12], "fairli": [2, 3, 6, 13, 15], "nuanc": 2, "suffic": [2, 6], "rebal": 2, "oversampl": 2, "replic": [2, 6], "power": [2, 3, 7, 10, 14, 15, 16], "own": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "increas": [2, 3, 4, 6, 11, 12, 15, 16], "randomli": [2, 3, 4, 6, 12], "properli": [2, 3, 15], "random": [2, 6, 11, 12], "malignant_canc": 2, "benign_canc": 2, "malignant_cancer_upsampl": 2, "upsampled_canc": 2, "vice": [2, 3], "versa": [2, 3], "closer": [2, 15], "upsampl": 2, "wild": [2, 7, 12], "unfortun": [2, 3, 4, 6, 8, 10, 12, 15], "challeng": [2, 14, 16], "reli": [2, 3, 8, 11, 12, 15], "expert": [2, 3, 7, 13], "knowledg": [2, 7, 12, 14, 16], "relat": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "survei": [2, 7, 16], "particip": [2, 3], "margin": [2, 8], "peopl": [2, 6, 7, 8, 11, 12, 14, 15, 16], "respond": [2, 10, 14], "certain": [2, 7, 10, 14, 15], "kind": [2, 3, 4, 6, 7, 10, 15], "fear": [2, 7], "honestli": 2, "neg": [2, 3, 8, 11, 12, 14, 15, 16], "consequ": [2, 3, 6, 8, 16], "simpli": [2, 3, 10, 15, 16], "throw": 2, "awai": [2, 3, 6, 10, 11, 12, 14, 16], "bia": [2, 12], "conclus": [2, 6, 7, 15], "inadvert": [2, 8], "ignor": [2, 3, 7, 11, 16], "easili": [2, 3, 4, 7, 8, 9, 10, 14, 15, 16], "mislead": 2, "detriment": 2, "impact": [2, 4, 6, 12, 16], "techniqu": [2, 3, 4, 6, 7, 10, 12, 15], "deal": [2, 8, 10], "isn": [2, 3, 7, 10, 11, 15], "anyth": [2, 3, 7, 12, 16], "els": [2, 7, 8, 10], "subset": [2, 6, 8, 10, 11, 12, 16], "missing_canc": 2, "wdbc_miss": 2, "nan": [2, 10, 16], "475956": 2, "834601": 2, "386808": 2, "169878": 2, "160508": 2, "137124": 2, "henc": [2, 3, 4, 8, 10, 11, 15], "too": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "accomplish": [2, 3, 6, 7, 8, 15, 16], "dropna": 2, "no_missing_canc": 2, "strategi": [2, 3, 15], "fill": [2, 8, 10, 12, 15], "synthet": 2, "simpleimput": 2, "simpleimputersimpleimput": 2, "directli": [2, 3, 4, 6, 7, 8, 13, 14, 16], "imputed_canc": 2, "846860": 2, "384942": 2, "document": [2, 4, 8, 9, 10, 13, 14, 15, 16], "crucial": 2, "critic": [2, 6, 7, 8, 12, 15, 16], "chain": [2, 16], "intermedi": [2, 7], "whole": [2, 3, 4, 6, 10, 14, 16], "scratch": [2, 6, 14, 15], "nn": [2, 3], "knn_pipelin": [2, 3], "pipelinepipelin": [2, 3, 4], "500": [2, 6, 11, 12], "075": 2, "1500": 2, "new_observ": 2, "second": [2, 3, 4, 6, 7, 8, 10, 12, 13, 14, 15, 16], "15": [2, 3, 4, 6, 7, 8, 10, 12, 14, 15, 16], "seen": [2, 3, 11, 12, 14, 15, 16], "littl": [2, 3, 10, 11, 12, 15, 16], "grid": [2, 3, 11, 15], "meshgrid": 2, "numpi": [2, 3, 4, 6, 10, 11, 12, 15, 16], "high": [2, 3, 6, 7, 8, 9, 12], "transpar": [2, 7], "low": [2, 3, 12], "opac": [2, 11, 15], "np": [2, 3, 4, 6, 11, 12], "val": 2, "arrang": [2, 6, 7, 15], "are_grid": 2, "linspac": 2, "min": [2, 11, 12, 15, 16], "95": [2, 3, 6, 7, 10, 12, 15], "max": [2, 3, 11, 12, 15, 16], "05": [2, 7, 10, 15], "50": [2, 3, 6, 7, 10, 11, 12, 14, 16], "smo_grid": 2, "asgrid": 2, "reshap": [2, 16], "knnpredgrid": 2, "bind": 2, "prediction_t": 2, "copi": [2, 10, 14, 16], "unscaled_plot": 2, "mark_point": [2, 15], "40": [2, 3, 6, 7, 10, 14, 15, 16], "nice": [2, 3, 8, 10, 12, 15], "fade": 2, "prediction_plot": 2, "300": [2, 3, 6, 15, 16], "accompani": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "repositori": [2, 3, 4, 6, 7, 10, 11, 12, 13, 15, 16], "launch": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "browser": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "click": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "binder": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "button": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "view": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "download": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "sure": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "instruct": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "setup": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "ensur": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "intend": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "blb": 2, "lar": 2, "gill": 2, "loupp": 2, "mathieu": 2, "blondel": 2, "fabian": 2, "pedregosa": 2, "andrea": 2, "mueller": 2, "olivi": 2, "grisel": 2, "vlad": 2, "nicula": 2, "peter": [2, 11], "prettenhof": 2, "alexandr": 2, "gramfort": 2, "jaqu": 2, "grobler": 2, "robert": [2, 3, 4, 12], "layton": 2, "jake": 2, "vanderpla": [2, 15], "arnaud": 2, "joli": 2, "brian": [2, 15], "holt": 2, "ga": [2, 15], "\u00eb": 2, "varoquaux": 2, "api": 2, "design": [2, 3, 8, 10, 14, 15, 16], "ecml": 2, "pkdd": 2, "mine": [2, 6], "108": [2, 3], "122": [2, 3], "ch67": [2, 11], "thoma": [2, 11], "ieee": [2, 4, 11], "transact": [2, 4, 11], "21": [2, 3, 7, 10, 11, 14, 15], "fh51": [2, 11], "evelyn": [2, 3, 11], "joseph": [2, 11], "discriminatori": [2, 11], "discrimin": [2, 3, 11], "consist": [2, 4, 6, 7, 10, 11, 13, 14, 15, 16], "properti": [2, 3, 7, 10, 11, 12, 15, 16], "report": [2, 3, 6, 7, 8, 11, 15, 16], "usaf": [2, 11], "school": [2, 7, 11], "aviat": [2, 11], "medicin": [2, 11], "randolph": [2, 11], "field": [2, 10, 11, 15], "texa": [2, 11], "swm93": [2, 3], "nuclear": [2, 3], "intern": [2, 3, 5, 15], "symposium": [2, 3], "electron": [2, 3, 14], "technolog": [2, 3, 15], "stanfordhcare21": 2, "url": [2, 3, 4, 6, 7, 12, 13, 14, 15, 16], "http": [2, 3, 4, 5, 6, 7, 9, 10, 12, 13, 14, 15, 16], "stanfordhealthcar": 2, "medic": [2, 3], "condit": 2, "continu": [3, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "its": [3, 4, 7, 8, 10, 11, 12, 14, 15, 16], "describ": [3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "matric": 3, "neighbor": [3, 12], "k": [3, 7, 10, 15], "nearest": [3, 4, 12], "estim": [3, 6, 7, 9, 11, 12], "underfit": [3, 12], "advantag": [3, 4, 6, 10, 11, 12, 13, 14, 15, 16], "disadvantag": [3, 4, 11, 12, 15], "wrong": [3, 6, 7, 12, 15, 16], "cancer": 3, "ask": [3, 4, 6, 10, 11, 12, 14, 15, 16], "kei": [3, 6, 7, 10, 13, 14, 15, 16], "impli": [3, 6], "between": [3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "oppos": [3, 10, 11, 15, 16], "memor": 3, "visit": [3, 5, 6, 7, 10, 13, 14, 15], "hospit": 3, "more": [3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "trick": 3, "asid": [3, 7, 10, 12], "match": [3, 10, 11, 12, 14, 15, 16], "observ": [3, 4, 6, 7, 11, 12, 14, 15, 16], "confid": [3, 6, 11], "golden": 3, "rule": [3, 6, 7, 11, 15], "cannot": [3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "than": [3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "realli": [3, 6, 7, 10, 11, 15, 16], "imagin": [3, 6, 8, 10, 14, 15, 16], "bad": [3, 4, 10, 15], "overestim": [3, 6], "made": [3, 4, 7, 11, 12, 13, 14, 15, 16], "frac": [3, 4, 6, 11], "summar": [3, 6, 7, 9, 10, 15, 16], "stori": [3, 8, 11, 15], "alon": [3, 6, 14], "comprehens": [3, 4, 6], "each": [3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "correctli": [3, 7, 10, 13, 15, 16], "incorrectli": 3, "57": 3, "bottom": [3, 8, 13, 14], "roughli": [3, 4, 6, 11, 12, 15], "89": [3, 7, 15], "892": 3, "misclassifi": 3, "disastr": 3, "receiv": [3, 10, 14], "particularli": [3, 10, 12, 15], "unaccept": 3, "term": [3, 4, 6, 7, 10, 11, 15, 16], "talk": [3, 10, 15], "four": [3, 4, 7, 9, 15], "perfect": [3, 15], "zero": [3, 4, 11, 12, 15, 16], "almost": [3, 4, 7, 10, 11, 15], "two": [3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "commonli": [3, 6, 7, 8, 12, 14, 15, 16], "metric": [3, 4, 11, 12], "togeth": [3, 4, 6, 8, 10, 15, 16], "inde": [3, 4, 6, 7, 10, 12, 15, 16], "20": [3, 6, 7, 10, 12, 14, 15, 16], "quad": [3, 4], "25": [3, 6, 7, 10, 11, 14, 15, 16], "rel": [3, 4, 7, 15], "context": [3, 10, 11, 12, 15, 16], "certainli": [3, 6], "achiev": [3, 7, 11, 15, 16], "guess": [3, 4, 6, 7], "everi": [3, 6, 7, 8, 10, 12, 14, 16], "similarli": [3, 7, 10, 15, 16], "never": [3, 7, 11, 14], "obsev": 3, "Of": [3, 6, 12, 16], "somewher": [3, 7, 10, 11, 15], "extrem": [3, 6, 11, 12], "trade": [3, 4], "off": [3, 4, 6, 12], "fair": [3, 10, 11], "unbias": 3, "influenc": [3, 4, 6, 11, 12, 15], "human": [3, 4, 6, 10, 14, 15, 16], "counter": 3, "main": [3, 7, 13, 16], "tenet": 3, "determin": [3, 4, 6, 11, 13, 14, 15, 16], "everyth": [3, 6, 7, 13, 16], "point": [3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "investig": [3, 6, 7, 10, 15], "integ": [3, 10, 15, 16], "At": [3, 7, 8, 9, 10, 12], "track": [3, 6, 7, 14, 16], "nums_0_to_9": 3, "5": [3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "random_numbers1": 3, "to_numpi": 3, "appear": [3, 6, 7, 8, 10, 11, 12, 13, 14, 15], "fresh": [3, 8], "batch": 3, "random_numbers2": 3, "forc": [3, 15], "random_numbers1_again": 3, "random_numbers2_again": 3, "And": [3, 6, 7, 10, 11, 12, 14, 15, 16], "4235": 3, "random_numb": 3, "beyond": [3, 4, 7, 10, 11, 12, 13, 14, 15, 16], "explicitli": [3, 10, 14, 15, 16], "insert": [3, 14, 16], "therebi": [3, 15], "global": [3, 15], "drawback": 3, "buri": 3, "undesir": 3, "entir": [3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "plai": [3, 7, 10, 13], "randomst": 3, "random_st": 3, "rnd": 3, "random_numbers1_third": 3, "random_numbers2_third": 3, "load": [3, 4, 8, 9, 10, 11, 12, 15, 16], "quick": [3, 7, 10], "re": [3, 4, 7, 8, 9, 10, 11, 14, 15, 16], "scale": [3, 4, 10, 11, 12, 14, 15], "done": [3, 7, 8, 10, 13, 14, 15, 16], "preliminari": 3, "train_test_split": [3, 11, 12], "shuffl": 3, "stratifi": [3, 11], "exist": [3, 7, 8, 10, 12, 13, 14, 15, 16], "train_siz": [3, 11, 12], "model_select": [3, 11, 12], "cancer_test": 3, "index": [3, 7, 10, 14, 16], "426": 3, "196": [3, 4, 6, 7, 11], "296": 3, "43": [3, 4, 14, 15], "143": 3, "116": 3, "miss": [3, 4, 15, 16], "626761": 3, "373239": 3, "last": [3, 6, 7, 9, 10, 14, 15, 16], "sensit": [3, 7, 12], "consider": 3, "aspect": [3, 6, 12, 15], "fortun": [3, 6, 7, 10, 11, 12, 16], "construct": [3, 6, 7, 10, 15, 16], "cancer_preprocessor": 3, "augment": [3, 4], "864726": 3, "146": [3, 6], "869691": 3, "86": 3, "86135501": 3, "846226": 3, "105": [3, 6, 7, 10, 15, 16], "863030": 3, "244": 3, "884180": 3, "23": [3, 10, 14, 15, 16], "851509": 3, "125": [3, 7, 16], "86561": 3, "281": 3, "8912055": 3, "84799002": 3, "score": [3, 10, 11], "8951048951048951": 3, "90": [3, 6, 7, 15, 16], "precision_scor": 3, "recall_scor": 3, "y_true": [3, 11, 12], "y_pred": [3, 11, 12], "pos_label": 3, "8275862068965517": 3, "9056603773584906": 3, "83": 3, "91": [3, 6], "crosstab": 3, "alphabet": [3, 7, 15, 16], "80": [3, 16], "48": [3, 6, 7, 14, 15], "agre": [3, 10, 12], "displaystyl": 3, "51": [3, 6, 14, 15], "82": [3, 12, 15], "76": 3, "That": [3, 6, 7, 10, 11, 15, 16], "pretti": [3, 6, 10], "wait": [3, 7, 10, 11, 12, 15, 16], "Or": [3, 6, 12], "someth": [3, 4, 6, 7, 8, 10, 11, 14, 15, 16], "99": [3, 4, 6, 11, 12, 15], "terribl": 3, "impress": [3, 15], "attent": [3, 7, 11, 16], "sacrif": 3, "easi": [3, 4, 7, 8, 10, 12, 14, 15, 16], "baselin": [3, 15], "regardless": [3, 10, 11, 15], "sens": [3, 4, 6, 7, 11, 12, 15, 16], "hope": [3, 10, 12, 15], "signific": [3, 7], "Be": [3, 10, 11, 15], "enough": [3, 6, 7, 10, 11, 12, 14, 15, 16], "usual": [3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "suspect": [3, 4, 6], "built": [3, 7, 8, 13, 16], "perspect": [3, 4, 11, 16], "hoorai": 3, "cautiou": 3, "misdiagnos": 3, "vast": [3, 4, 15, 16], "behav": [3, 6, 12], "principl": [3, 15], "ideal": [3, 8, 11, 16], "somehow": [3, 6, 10], "hasn": 3, "yet": [3, 6, 7, 8, 10, 11, 14, 15, 16], "rememb": [3, 6, 7, 8, 10, 12, 15, 16], "touch": [3, 15], "dai": [3, 8, 10, 14, 15], "strongli": [3, 9, 12], "whatev": [3, 4, 7, 15], "lucki": [3, 6], "perhap": [3, 6, 7, 8, 10, 11, 12, 15], "sub": [3, 10], "cancer_subtrain": 3, "cancer_valid": 3, "acc": 3, "897196261682243": 3, "repeat": [3, 4, 6, 14], "none": [3, 4, 10, 12, 14, 16], "underli": [3, 4, 7], "reduc": [3, 4, 10, 15], "un": [3, 4], "c": [3, 7, 10], "evenli": [3, 11], "chunk": [3, 12], "iter": [3, 4, 7, 14, 15, 16], "fold": [3, 11], "cross_valid": 3, "cv": [3, 11], "convert": [3, 6, 10, 11, 15, 16], "cancer_pip": 3, "cv_5_df": 3, "fit_tim": 3, "score_tim": 3, "test_scor": 3, "004229": 3, "005770": 3, "837209": 3, "003728": 3, "005538": 3, "870588": 3, "003707": 3, "005706": 3, "894118": 3, "003942": 3, "005564": 3, "003583": 3, "005488": 3, "882353": 3, "aggreg": [3, 7], "sem": 3, "uncertain": [3, 6, 11], "scope": [3, 4, 7, 11, 12, 13, 14, 15], "01": [3, 6, 10, 15, 16], "cv_5_metric": 3, "agg": [3, 12, 16], "003838": 3, "005613": 3, "870971": 3, "000113": 3, "000053": 3, "009501": 3, "limit": [3, 4, 10, 12, 14, 15, 16], "speed": 3, "trial": [3, 15], "cv_10": 3, "cv_10_df": 3, "cv_10_metric": 3, "003729": 3, "004313": 3, "884939": 3, "000069": 3, "000044": 3, "006718": 3, "slightli": [3, 6, 10, 11, 12, 15], "due": [3, 4, 6, 10, 16], "reduct": 3, "dramat": 3, "cv_50_df": 3, "cv_50_metric": 3, "003722": 3, "003265": 3, "888056": 3, "000035": 3, "000036": 3, "003005": 3, "downstream": 3, "expens": [3, 10], "chemo": 3, "radiat": 3, "therapi": 3, "death": 3, "mispredict": 3, "gridsearchcv": [3, 11], "unspecifi": 3, "cancer_tune_pip": 3, "tunabl": 3, "get_param": [3, 11], "verbos": 3, "columntransformer__n_job": 3, "columntransformer__remaind": 3, "columntransformer__sparse_threshold": 3, "columntransformer__transformer_weight": 3, "columntransformer__transform": 3, "columntransformer__verbos": 3, "columntransformer__verbose_feature_names_out": 3, "columntransformer__standardscal": 3, "columntransformer__standardscaler__copi": 3, "columntransformer__standardscaler__with_mean": 3, "columntransformer__standardscaler__with_std": 3, "kneighborsclassifier__algorithm": 3, "auto": [3, 14], "kneighborsclassifier__leaf_s": 3, "30": [3, 6, 7, 10, 11, 14, 15, 16], "kneighborsclassifier__metr": 3, "minkowski": 3, "kneighborsclassifier__metric_param": 3, "kneighborsclassifier__n_job": 3, "kneighborsclassifier__n_neighbor": 3, "kneighborsclassifier__p": 3, "kneighborsclassifier__weight": 3, "wow": [3, 6, 15], "stuff": 3, "sift": 3, "muck": [3, 10], "stand": [3, 10, 11, 15], "parameter_grid": 3, "allow": [3, 4, 6, 7, 8, 9, 10, 11, 14, 15, 16], "greater": [3, 4, 10, 16], "third": 3, "skip": [3, 8, 16], "96": [3, 12, 15], "emploi": [3, 10, 11], "okai": [3, 15, 16], "param_grid": [3, 11], "cancer_tune_grid": 3, "cv_results_": [3, 11], "format": [3, 9, 10, 11, 12, 16], "accuracies_grid": 3, "19": [3, 6, 10, 14, 15], "mean_fit_tim": 3, "std_fit_tim": 3, "mean_score_tim": 3, "std_score_tim": 3, "param_kneighborsclassifier__n_neighbor": 3, "param": 3, "split0_test_scor": 3, "split1_test_scor": 3, "split2_test_scor": 3, "split3_test_scor": 3, "split4_test_scor": 3, "split5_test_scor": 3, "split6_test_scor": 3, "split7_test_scor": 3, "split8_test_scor": 3, "split9_test_scor": 3, "mean_test_scor": [3, 11], "17": [3, 4, 10, 12, 14, 15], "std_test_scor": [3, 11], "18": [3, 4, 6, 7, 10, 14, 15], "rank_test_scor": 3, "int32": [3, 16], "param_kneighbors_classifier__n_neighbor": 3, "unus": 3, "sem_test_scor": [3, 11], "845127": 3, "019966": 3, "873200": 3, "015680": 3, "861517": 3, "019547": 3, "861573": 3, "017787": 3, "866279": 3, "017889": 3, "875637": 3, "016026": 3, "885050": 3, "015406": 3, "36": [3, 4, 14, 15], "887375": 3, "013694": 3, "41": [3, 14, 15, 16], "46": [3, 4, 14, 15], "887320": 3, "013314": 3, "882669": 3, "014523": 3, "56": [3, 7], "878018": 3, "014414": 3, "61": [3, 6, 7, 10], "880343": 3, "014299": 3, "66": [3, 6, 10, 11], "015416": 3, "71": [3, 10], "877962": 3, "013660": 3, "014698": 3, "81": [3, 15], "880288": 3, "011277": 3, "875581": 3, "012967": 3, "008193": 3, "shortcut": [3, 8, 15], "layer": 3, "accuracy_vs_k": 3, "mark_lin": [3, 4, 11, 12, 15], "neighbour": [3, 11], "highest": [3, 16], "best_params_": [3, 11], "vari": [3, 6, 11, 12, 13, 15, 16], "exact": [3, 6, 12, 15], "justifi": [3, 15], "optim": [3, 10, 11], "decreas": [3, 4, 6, 15, 16], "reliabl": [3, 6, 8, 15], "uncertainti": [3, 6], "cost": [3, 6, 11, 12], "prohibit": [3, 11], "large_param_grid": 3, "385": 3, "large_cancer_tune_grid": 3, "large_accuracies_grid": 3, "large_accuracy_vs_k": 3, "farther": [3, 15], "sort": [3, 4, 7, 8, 10, 12, 15, 16], "boundari": [3, 12], "simpler": 3, "stronger": 3, "regard": [3, 6, 7, 8, 11, 12, 16], "themselv": [3, 10, 15], "noisi": [3, 11, 15], "jag": 3, "essenti": [3, 6, 7, 8, 10, 11, 16], "problemat": [3, 8, 10, 15], "unreli": [3, 6, 12], "strike": 3, "balanc": [3, 6], "qualiti": [3, 8, 11, 12], "retrain": [3, 11], "9090909090909091": 3, "8846153846153846": 3, "8679245283018868": 3, "84": [3, 15], "glanc": 3, "surpris": 3, "knew": 3, "return": [3, 4, 6, 7, 10, 12, 13, 16], "put": [3, 6, 10, 11, 12, 13, 14, 16], "defin": [3, 6, 7, 9, 10, 11, 12, 15, 16], "execut": [3, 10, 14], "search": [3, 4, 10, 13, 14], "strength": [3, 12, 15], "weak": [3, 11, 12, 15], "assumpt": [3, 4, 11, 12], "multi": 3, "slow": [3, 8, 11, 12], "treat": [3, 4, 7, 14, 15, 16], "accept": [3, 10, 11, 13, 14], "wors": [3, 7, 16], "meaning": [3, 4, 7, 10, 12, 14], "cancer_irrelev": 3, "irrelevant1": 3, "irrelevant2": 3, "30010": 3, "08690": 3, "132": [3, 6], "19740": 3, "130": [3, 6, 16], "00": [3, 6, 16], "24140": 3, "77": [3, 6], "58": [3, 15], "19800": 3, "135": [3, 6, 12, 15], "24390": 3, "142": 3, "14400": 3, "131": 3, "09251": 3, "35140": 3, "140": [3, 6], "00000": [3, 6], "47": [3, 4, 14, 15], "92": [3, 6], "increasingli": [3, 10], "distanc": [3, 4, 11, 12, 15], "corrupt": 3, "outperform": 3, "combat": 3, "extra": [3, 10, 12], "nois": [3, 15], "smoothli": 3, "trend": [3, 6, 7, 11, 12, 15], "corrobor": 3, "evid": 3, "untun": 3, "scientif": [3, 11, 12, 14], "clear": [3, 4, 6, 7, 12, 14, 15, 16], "cut": 3, "obviou": [3, 8, 12, 15], "relev": [3, 10, 11, 12], "consum": [3, 6, 16], "systemat": 3, "beal": 3, "hock": 3, "lesli": 3, "moder": 3, "ab": [3, 10, 11], "bc": [3, 6, 7], "ac": 3, "abc": 3, "million": [3, 12, 15], "computation": 3, "draper": 3, "smith": 3, "1966": 3, "eforymson": 3, "straightforward": [3, 10, 15], "form": [3, 4, 6, 7, 10, 11, 12, 15, 16], "updat": [3, 4, 13, 14, 15], "big": [3, 6, 7, 10, 14, 15], "55": [3, 6, 11, 15, 16], "caution": [3, 8, 10], "move": [3, 7, 9, 11, 12, 14, 15], "likelihood": 3, "unlucki": [3, 4], "stumbl": 3, "risk": [3, 11], "suffer": 3, "turn": [3, 4, 7, 10, 11, 12, 16], "smaller": [3, 11, 12, 15], "irrelevant3": 3, "full": [3, 6, 7, 10, 12, 14, 15, 16], "cancer_subset": 3, "sequentialfeatureselector": 3, "tri": [3, 4, 11, 12, 15], "flexibl": [3, 8, 12, 16], "resort": 3, "loop": [3, 16], "flow": 3, "mckinnei": [3, 10, 15, 16], "2012": [3, 7, 10, 15, 16], "n_total": 3, "check": [3, 7, 9, 10, 14, 15, 16], "j": [3, 7, 10], "len": [3, 10], "accuracy_dict": 3, "selected_predictor": 3, "empti": [3, 8, 14], "n_job": 3, "best_set": 3, "argmax": 3, "append": [3, 10, 15, 16], "join": [3, 10, 14], "del": [3, 15], "891103": 3, "917450": 3, "931454": 3, "926253": 3, "906955": 3, "exhibit": [3, 8], "fluctuat": [3, 11], "attempt": [3, 4, 15], "account": [3, 13, 14], "chanc": [3, 6, 13], "elbow": [3, 4], "successfulli": [3, 8, 10, 14], "judgement": 3, "excel": [3, 7, 12, 14], "tutori": [3, 8, 10, 12], "go": [3, 6, 7, 9, 10, 12, 13, 15], "jame": [3, 4, 10, 12], "great": [3, 4, 6, 7, 8, 10, 12, 14, 15], "naiv": 3, "bay": 3, "goe": [3, 7, 8, 10, 12], "popular": [3, 4, 10, 12, 14], "bkm67": 3, "martin": 3, "lansdown": 3, "mauric": 3, "georg": 3, "kendal": 3, "david": [3, 6], "mann": 3, "discard": 3, "multivari": 3, "biometrika": 3, "366": 3, "ds66": 3, "norman": 3, "harri": 3, "wilei": [3, 15], "efo66": 3, "stepwis": 3, "backward": 3, "eastern": 3, "meet": 3, "hl67": 3, "ronald": 3, "technometr": 3, "531": 3, "540": 3, "jwht13": [3, 4, 12], "gareth": [3, 4, 12], "daniela": [3, 4, 12], "witten": [3, 4, 12], "hasti": [3, 4, 12], "tibshirani": [3, 4, 12], "springer": [3, 4, 12, 15], "1st": [3, 4, 12], "edit": [3, 4, 12, 13, 15], "www": [3, 4, 7, 10, 12], "statlearn": [3, 4, 12], "com": [3, 4, 6, 10, 12, 13, 14, 15], "mck12": [3, 10, 15, 16], "ipython": [3, 10, 13, 15, 16], "o": [3, 7, 10, 13, 15, 16], "reilli": [3, 10, 15, 16], "media": [3, 8, 10, 15, 16], "inc": [3, 6, 10, 15, 16], "subgroup": [4, 7, 15, 16], "predict": [4, 7, 9, 11, 12, 15], "differenti": 4, "classif": [4, 7, 9, 11, 12], "variabl": [4, 6, 7, 8, 10, 11, 12, 15, 16], "scikit": [4, 11, 12], "set": [4, 6, 8, 9, 10, 12, 14, 16], "genet": [4, 15], "ancestr": 4, "subpopul": 4, "onlin": [4, 6, 10, 13, 14, 15], "custom": [4, 15], "uncov": [4, 8, 15], "fundament": [4, 6, 7, 15], "supervis": 4, "unsupervis": 4, "imposs": [4, 6], "articl": [4, 7], "wikipedia": [4, 10], "evalu": [4, 6, 7, 12, 15], "test": [4, 6, 12, 13], "good": [4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "ascertain": 4, "rigor": [4, 11], "lloyd": 4, "1982": 4, "hierarch": 4, "princip": 4, "compon": [4, 7], "multidimension": 4, "semisupervis": 4, "goal": [4, 7, 11, 15, 16], "benefici": [4, 10], "unlabel": [4, 7], "willing": [4, 6], "seed": [4, 6, 11, 12], "palmerpenguin": 4, "horst": 4, "2020": [4, 6, 7, 15], "kristen": 4, "gorman": 4, "palmer": 4, "station": [4, 15], "antarctica": [4, 15], "ecolog": 4, "site": [4, 5, 10], "adult": 4, "penguin": 4, "2014": [4, 16], "bill": 4, "flipper": 4, "millimet": 4, "distinct": [4, 8, 15], "speci": 4, "discoveri": [4, 12], "gentoo": 4, "bill_length_mm": 4, "flipper_length_mm": 4, "39": [4, 14, 15], "182": 4, "34": [4, 14, 15, 16], "187": [4, 6, 11], "190": [4, 11, 16], "195": [4, 7, 16], "193": [4, 11], "213": [4, 7, 10, 15, 16], "215": [4, 16], "45": [4, 7, 10, 14, 15, 16], "220": [4, 16], "49": [4, 14, 15], "208": 4, "52": [4, 14], "197": 4, "189": [4, 6], "penguins_standard": 4, "bill_length_standard": 4, "flipper_length_standard": 4, "641361": 4, "189773": 4, "144917": 4, "328412": 4, "517922": 4, "921755": 4, "107617": 4, "846513": 4, "409743": 4, "677761": 4, "238168": 4, "271104": 4, "902464": 4, "433767": 4, "720106": 4, "192860": 4, "645505": 4, "355522": 4, "962559": 4, "440353": 4, "762179": 4, "205012": 4, "111528": 4, "123299": 4, "786203": 4, "626855": 4, "757407": 4, "783170": 4, "108442": 4, "776057": 4, "759092": 4, "subtyp": 4, "scatter_plot": 4, "meaningless": 4, "etc": [4, 6, 7, 10, 14, 15, 16], "adjust": [4, 15], "sum": [4, 11, 16], "wssd": 4, "intertia": 4, "mu_x": 4, "mu_i": 4, "x_1": 4, "x_2": 4, "x_3": 4, "x_4": 4, "y_1": 4, "y_2": 4, "y_3": 4, "y_4": 4, "35": [4, 7, 14, 15, 16], "outlin": [4, 7, 10, 15, 16], "far": [4, 12, 14, 15, 16], "yellow": [4, 16], "variant": 4, "minim": [4, 11, 12, 15], "reassign": 4, "longer": [4, 7, 16], "termin": [4, 13], "fourth": 4, "onward": [4, 10, 13, 15], "guarante": [4, 13], "forev": 4, "logic": [4, 7, 10, 16], "finit": [4, 6, 15], "unlik": [4, 6, 10, 11, 15], "stuck": [4, 8, 16], "solut": [4, 6, 7], "poor": [4, 10], "lowest": [4, 10, 15], "cross": [4, 11, 12], "valid": [4, 9, 10, 11, 12], "subdivid": 4, "merg": [4, 10], "diminish": 4, "reach": [4, 10, 12, 14, 15], "being": [4, 6, 7, 8, 10, 11, 14, 15, 16], "address": [4, 7, 10, 11, 12, 14], "preprocess": [4, 11], "kmean": 4, "n_cluster": 4, "kmeanskmean": 4, "penguin_clust": 4, "labels_": 4, "altern": [4, 7, 12, 14, 15, 16], "suffix": [4, 15], "nomin": [4, 15], "discret": [4, 15], "cluster_plot": 4, "inertia_": 4, "inertia": 4, "730719092276117": 4, "varieti": [4, 10, 12, 14, 16], "ks": 4, "oper": [4, 6, 7, 10, 13, 14, 15], "safest": 4, "reus": 4, "penguin_clust_k": 4, "000000": 4, "576264": 4, "730719": 4, "343613": 4, "362131": 4, "678383": 4, "293320": 4, "975016": 4, "785232": 4, "elbow_plot": 4, "bump": [4, 15], "prevent": [4, 7, 8, 10, 15, 16], "n_init": 4, "paramet": [4, 6, 10, 11, 12, 15, 16], "realm": 4, "specif": [4, 10, 11, 13, 14, 15], "companion": [4, 10], "pca": 4, "gwf14": 4, "toni": 4, "fraser": 4, "sexual": 4, "dimorph": 4, "commun": [4, 7, 10, 15], "ntarctic": 4, "genu": 4, "emph": 4, "pygosc": 4, "plo": [4, 14], "ONE": 4, "hhg20": 4, "allison": 4, "alison": 4, "hill": [4, 10], "archipelago": 4, "allisonhorst": 4, "io": [4, 7, 10, 15], "llo82": 4, "stuart": 4, "quantiz": 4, "pcm": 4, "129": 4, "137": [4, 6, 7, 12], "releas": [4, 10], "bell": [4, 6], "telephon": 4, "paper": [4, 15], "1957": 4, "web": [5, 8, 14], "navig": [5, 7, 8, 10, 13, 14], "mobil": 5, "devic": [5, 10], "menu": [5, 7, 8, 13], "datasciencebook": [5, 9, 10, 13], "ca": [5, 7, 9, 10, 11, 12, 13], "licens": 5, "creativ": 5, "noncommerci": 5, "sharealik": 5, "popul": [6, 7, 10, 15, 16], "extend": [6, 12, 15], "inferenti": [6, 7, 9, 11, 15], "interv": 6, "approxim": 6, "broader": 6, "retail": 6, "sell": 6, "iphon": 6, "accessori": 6, "market": [6, 11, 12], "strateg": 6, "product": [6, 7, 14], "north": [6, 10, 15], "american": [6, 7, 10], "colleg": 6, "campus": 6, "america": [6, 15], "owner": [6, 10, 12], "characterist": [6, 7, 10, 15, 16], "costli": 6, "taken": [6, 7, 11, 14, 15], "canada": [6, 7, 10, 15, 16], "apart": [6, 10, 15], "rent": 6, "budget": [6, 11], "studio": 6, "rental": [6, 10], "price": [6, 10, 11, 12], "month": [6, 14, 15], "monthli": 6, "airbnb": 6, "cox": 6, "marketplac": 6, "vacat": 6, "septemb": [6, 15], "neighborhood": 6, "room": 6, "accommod": 6, "bathroom": 6, "bedroom": [6, 10, 11, 12], "bed": [6, 11, 12], "night": 6, "neighbourhood": 6, "room_typ": 6, "downtown": 6, "home": [6, 7, 10, 11, 12, 13, 15, 16], "apt": [6, 13], "bath": [6, 11], "150": [6, 15], "eastsid": 6, "west": 6, "85": [6, 7, 10, 11, 12, 15], "kensington": 6, "cedar": 6, "cottag": 6, "110": 6, "4589": 6, "4590": 6, "4591": 6, "oakridg": 6, "privat": [6, 10, 14], "4592": 6, "dunbar": 6, "southland": 6, "share": [6, 8, 10, 14, 15, 16], "29": [6, 11, 14, 15, 16], "4593": 6, "145": 6, "4594": 6, "shaughnessi": 6, "citi": [6, 7, 10, 11, 16], "plan": [6, 14], "bylaw": 6, "747497": 6, "246408": 6, "005224": 6, "hotel": 6, "000871": 6, "747": 6, "155": [6, 12, 16], "725": 6, "250": [6, 11, 16], "025": 6, "625": 6, "350": [6, 11, 12, 16], "confirm": [6, 14, 15], "histogram": 6, "000": [6, 7, 10, 11, 12, 15], "20_000": 6, "605": 6, "606": 6, "marpol": 6, "4579": 6, "4580": 6, "160": [6, 11], "1739": 6, "1740": 6, "151": [6, 7, 15], "3904": 6, "3905": 6, "185": [6, 16], "1596": 6, "1597": 6, "kitsilano": 6, "3060": 6, "3061": 6, "hast": 6, "sunris": 6, "78": 6, "19999": 6, "527": 6, "528": 6, "1587": 6, "1588": 6, "169": [6, 12], "3860": 6, "3861": 6, "2747": 6, "2748": 6, "285": 6, "800000": 6, "0000": 6, "999": 6, "queri": [6, 10], "qualifi": 6, "750": [6, 15], "775": 6, "225": [6, 10], "19998": 6, "700": [6, 16], "275": 6, "44552": 6, "reset_index": [6, 16], "caveat": [6, 15, 16], "twice": [6, 12], "sample_proport": 6, "44547": 6, "44548": 6, "44549": 6, "44550": 6, "44551": 6, "sample_estim": 6, "675": 6, "44541": 6, "19995": 6, "44543": 6, "19996": 6, "44545": 6, "19997": 6, "20000": 6, "mind": [6, 7, 10, 14], "sampling_distribut": 6, "mark_bar": [6, 7, 15], "bin": [6, 15], "maxbin": [6, 15], "symmetr": 6, "peak": [6, 15], "74848375": 6, "748": [6, 11], "neither": [6, 11, 15], "nor": [6, 8, 12], "underestim": 6, "tendenc": 6, "travel": 6, "wish": [6, 7, 14], "overpr": [6, 11], "population_distribut": 6, "skew": 6, "tail": [6, 10], "154": [6, 12], "5109773617762": 6, "one_sampl": 6, "sample_distribut": 6, "153": 6, "48225": 6, "wouldn": [6, 14], "alreadi": [6, 7, 8, 9, 10, 11, 12, 13, 15, 16], "mean_pric": 6, "148": 6, "56075": 6, "165": [6, 16], "50500": 6, "93925": 6, "139": 6, "14650": 6, "198": 6, "50000": 6, "192": 6, "66425": 6, "144": 6, "88600": 6, "08800": 6, "156": [6, 11], "25000": 6, "170": 6, "mean_of_sample_mean": 6, "sample_mean": 6, "disappear": 6, "thumb": [6, 15], "emphasi": 6, "saw": [6, 10, 11, 16], "significantli": [6, 7, 8, 12, 15, 16], "notion": [6, 11], "pretend": 6, "clever": 6, "drawn": [6, 12, 15], "median": [6, 15, 16], "slope": [6, 12], "displai": [6, 7, 8, 10, 12, 14, 15, 16], "4025": 6, "4026": 6, "renfrew": 6, "collingwood": 6, "1977": [6, 15], "1978": 6, "fairview": 6, "70": [6, 10, 15, 16], "4008": 6, "4009": 6, "269": [6, 15], "1543": 6, "1544": 6, "320": 6, "3350": 6, "3351": 6, "804": 6, "805": 6, "mount": 6, "pleasant": 6, "2286": 6, "2287": 6, "1010": 6, "1011": 6, "strathcona": 6, "120": [6, 7, 10, 16], "1878": 6, "1879": [6, 15], "175": 6, "1644": 6, "1645": 6, "2771": 6, "2772": 6, "4151": 6, "4152": 6, "289": 6, "4495": 6, "4496": 6, "rilei": 6, "park": [6, 15], "115": 6, "1308": 6, "1309": 6, "2246": 6, "2247": 6, "2335": 6, "2336": 6, "4059": 6, "4060": 6, "1280": 6, "1281": 6, "4324": 6, "4325": 6, "3403": 6, "3404": 6, "arbutu": 6, "ridg": 6, "664": 6, "1729": 6, "1730": 6, "93": [6, 15], "3722": 6, "3723": 6, "241": 6, "242": 6, "3955": 6, "3956": 6, "60": [6, 7], "1042": 6, "1043": 6, "649": 6, "650": [6, 15], "sunset": 6, "1995": [6, 15], "1996": 6, "363": 6, "364": 6, "1783": 6, "1784": 6, "806": 6, "254": 6, "255": 6, "3365": 6, "3366": 6, "4562": 6, "4563": 6, "64": [6, 10, 11, 13], "2124": 6, "2125": 6, "200": [6, 7, 10, 11, 15], "1997": 6, "1998": 6, "257": 6, "4329": 6, "4330": [6, 16], "3408": 6, "3409": 6, "635": 6, "636": 6, "grandview": 6, "woodland": 6, "103": [6, 16], "one_sample_dist": 6, "boot1": 6, "boot1_dist": 6, "ident": [6, 7, 10], "mimic": 6, "break": [6, 10, 11, 12], "boot20000": 6, "six": [6, 7, 9, 11, 15, 16], "six_bootstrap_sampl": 6, "height": [6, 12, 15], "facet": [6, 15], "67175": 6, "42500": 6, "149": [6, 7], "35000": 6, "13225": 6, "179": [6, 7], "79675": 6, "188": 6, "28225": 6, "boot20000_mean": 6, "159": 6, "29675": 6, "136": 6, "55725": 6, "161": 6, "93950": 6, "22500": 6, "boot_est_dist": 6, "resampl": 6, "repeatedli": 6, "percentil": [6, 16], "captur": [6, 10, 12, 15], "narrow": [6, 10, 16], "implic": 6, "comfort": [6, 14], "strict": [6, 7], "unhelp": 6, "life": [6, 7], "deadli": 6, "ascend": [6, 7, 15], "bound": [6, 15], "97": [6, 12, 15], "quantil": 6, "express": [6, 15, 16], "5th": 6, "975": 6, "ci_bound": 6, "121": [6, 11], "607069": 6, "191": [6, 7], "525362": 6, "finish": [6, 8, 9, 10, 13, 14, 15], "journei": 6, "surfac": [6, 11, 12, 15], "foundat": [6, 7, 10, 12], "openintro": 6, "diez": 6, "2019": [6, 15], "solid": [6, 15], "grasp": 6, "natur": [6, 14, 15, 16], "coxd": 6, "murrai": 6, "insideairbnb": 6, "09": [6, 10, 15], "dccetinkayarb19": 6, "\u00e7": 6, "etinkaya": 6, "rundel": 6, "christoph": 6, "barr": 6, "os": [6, 8], "dirti": 7, "clean": [7, 9, 10], "dig": [7, 10, 16], "jump": [7, 9, 10, 15], "symbol": [7, 13, 15, 16], "spoken": [7, 15, 16], "resid": [7, 15], "indigen": 7, "cultur": 7, "anywher": [7, 8], "2018": [7, 15], "sadli": 7, "colon": [7, 16], "led": [7, 15], "loss": 7, "children": 7, "speak": [7, 10, 15, 16], "mother": [7, 15, 16], "tongu": [7, 15, 16], "childhood": 7, "residenti": [7, 11], "discov": 7, "act": [7, 14, 15, 16], "harm": 7, "endang": 7, "geograph": 7, "walker": 7, "2017": [7, 14], "came": [7, 11, 15], "aborigin": [7, 10, 15, 16], "truth": 7, "reconcili": 7, "commiss": 7, "action": 7, "2015": 7, "canlang": [7, 10, 15], "2016": [7, 10, 15, 16], "censu": [7, 10, 15, 16], "214": [7, 10, 15, 16], "offici": [7, 10, 15, 16], "mother_tongu": [7, 10, 15, 16], "expos": 7, "birth": 7, "most_at_hom": [7, 10, 15, 16], "most_at_work": [7, 10, 15, 16], "lang_known": [7, 10, 15, 16], "accord": [7, 10, 15, 16], "deep": [7, 12], "simplifi": [7, 10, 16], "concentr": [7, 15], "expertis": 7, "bias": 7, "aim": [7, 9, 15], "causal": [7, 11, 15], "mechanist": [7, 15], "leek": 7, "matsui": 7, "earli": [7, 9], "live": [7, 10, 15], "provinc": [7, 10], "territori": 7, "propos": 7, "hypothes": [7, 15], "polit": 7, "parti": 7, "wealth": [7, 15], "elect": 7, "quantif": 7, "factor": [7, 15], "mechan": [7, 10, 11], "pertain": [7, 15, 16], "occasion": [7, 13, 16], "race": [7, 11, 12], "runner": 7, "regularli": [7, 8], "graphic": [7, 8, 10, 13, 14, 15], "ag": 7, "old": [7, 10, 14], "50kg": 7, "cluster": [7, 9, 15], "bought": 7, "amazon": 7, "cellphon": 7, "ownership": 7, "android": 7, "phone": 7, "essenc": 7, "spreadsheet": [7, 10], "microsoft": 7, "rectangular": 7, "primarili": [7, 11, 14, 15], "voter": 7, "affili": 7, "comma": [7, 8, 11, 16], "short": [7, 10, 15], "save": [7, 10, 13, 14], "googl": [7, 10], "sheet": [7, 10], "can_lang": [7, 10, 15, 16], "plain": [7, 8, 14], "editor": [7, 8, 10, 14], "notepad": 7, "590": [7, 10, 15], "235": [7, 10, 15, 16], "665": [7, 10, 15], "afrikaan": [7, 10, 15, 16], "10260": [7, 10, 15], "4785": [7, 10, 15], "23415": [7, 10, 15], "afro": [7, 10, 15, 16], "asiat": [7, 10, 15, 16], "1150": [7, 10, 15], "44": [7, 10, 14, 15], "akan": [7, 10, 15, 16], "twi": [7, 10, 15, 16], "13460": [7, 10, 15], "5985": [7, 10, 15], "22150": [7, 10, 15], "albanian": [7, 10, 15, 16], "26895": [7, 10, 15], "13135": [7, 10, 15], "345": [7, 10, 15], "31930": [7, 10, 15], "algonquian": [7, 10, 16], "algonquin": [7, 10, 16], "1260": [7, 10], "370": [7, 10, 16], "2480": [7, 10], "sign": [7, 10, 11, 14, 15], "2685": [7, 10], "3020": [7, 10], "1145": [7, 10], "amhar": [7, 10], "22465": [7, 10], "12785": [7, 10], "33670": [7, 10], "instal": [7, 8, 9, 10, 13], "team": [7, 14], "es": 7, "innei": 7, "2010": 7, "command": [7, 8, 10, 13], "shorter": [7, 8, 10, 14, 15], "alia": [7, 8], "gave": [7, 10], "harder": [7, 15, 16], "quot": [7, 10], "letter": [7, 13, 14], "distinguish": [7, 15], "satisfi": [7, 10], "syntax": [7, 10, 14, 16], "amp": [7, 10, 15, 16], "445": [7, 10, 15, 16], "2775": [7, 10, 15], "209": [7, 10, 15, 16], "wolof": [7, 10, 15, 16], "3990": [7, 10, 15], "1385": [7, 10, 15], "8240": [7, 10, 15], "210": [7, 10, 15, 16], "wood": [7, 10, 15, 16], "cree": [7, 10, 15, 16], "1840": [7, 10, 15], "800": [7, 10, 15], "2665": [7, 10, 15], "211": [7, 10, 11, 15, 16], "wu": [7, 10, 15, 16], "shanghaines": [7, 10, 15, 16], "12915": [7, 10, 15], "7650": [7, 10, 15], "16530": [7, 10, 15], "yiddish": [7, 10, 15, 16], "13555": [7, 10, 15], "7085": [7, 10, 15], "895": [7, 10, 15], "20985": [7, 10, 15], "yoruba": [7, 10, 15, 16], "9080": [7, 10, 15], "2615": [7, 10, 15], "22415": [7, 10, 15], "screen": [7, 8, 10], "string": [7, 10, 14, 15, 16], "my_numb": 7, "alic": 7, "formal": 7, "_": [7, 8, 15, 16], "won": [7, 10, 12, 14, 16], "complain": 7, "my": [7, 8], "syntaxerror": 7, "mayb": [7, 10], "meant": 7, "convent": [7, 8, 14], "lowercas": [7, 14], "language_data": 7, "pep": 7, "guido": 7, "van": 7, "rossum": 7, "2001": 7, "minut": [7, 8, 12, 15], "underneath": [7, 8], "ve": [7, 10, 14], "largest": [7, 10, 15, 16], "sophist": 7, "restrict": [7, 12, 16], "bracket": [7, 8, 11, 16], "statement": [7, 10, 16], "written": [7, 8, 10, 14], "doubl": [7, 8, 9, 13, 15, 16], "athabaskan": [7, 10, 16], "atikamekw": [7, 10, 16], "6150": [7, 10], "5465": 7, "1100": 7, "6645": 7, "thompson": [7, 10], "ntlakapamux": [7, 10], "335": [7, 10], "450": 7, "tlingit": [7, 10], "260": 7, "tsimshian": [7, 10], "410": 7, "206": 7, "wakashan": [7, 10], "67": [7, 10, 11, 15], "aboriginal_lang": 7, "alias": 7, "wrote": 7, "terminolog": 7, "obj": 7, "f": [7, 10, 11, 13], "programm": 7, "confus": [7, 10, 16], "appar": 7, "rescu": 7, "selected_lang": 7, "descend": [7, 15], "decend": 7, "arranged_lang": 7, "64050": 7, "inuktitut": 7, "35210": 7, "138": 7, "ojibwai": 7, "17885": 7, "oji": 7, "12855": 7, "dene": 7, "10700": 7, "32": [7, 12, 14, 15, 16], "cayuga": 7, "squamish": 7, "iroquoian": 7, "ten_lang": 7, "montagnai": 7, "innu": 7, "10235": 7, "119": 7, "mi": [7, 15], "kmaq": 7, "6690": 7, "3065": 7, "180": [7, 12], "stonei": 7, "3025": 7, "becam": 7, "curiou": 7, "728": [7, 15], "canadian_popul": [7, 15], "overwrit": 7, "opt": [7, 10, 11], "mother_tongue_perc": [7, 15], "35_151_728": [7, 15], "35151728": 7, "latter": [7, 11], "clearer": [7, 15], "182210": 7, "100166": 7, "050879": 7, "036570": 7, "030439": 7, "029117": 7, "019032": 7, "017496": 7, "008719": 7, "008606": 7, "ten_lang_perc": 7, "008": 7, "temporari": [7, 14, 16], "arranged_lang_sort": 7, "trace": [7, 8], "split": [7, 11, 12, 15], "rewrit": 7, "unwieldi": 7, "parenthesi": 7, "demonstr": [7, 10, 11, 12, 15, 16], "cleaner": 7, "messi": [7, 14, 16], "pars": [7, 10, 15], "block": [7, 10], "piec": 7, "period": [7, 8, 10, 15], "Not": [7, 16], "feed": 7, "redo": 7, "overwhelm": 7, "debug": 7, "midwai": 7, "audienc": [7, 8, 14, 15], "difficulti": 7, "scrutin": 7, "speaker": [7, 15, 16], "convei": [7, 15], "understood": 7, "tidi": 7, "shortli": 7, "ax": [7, 15], "mark": [7, 10, 14, 15], "channel": [7, 10, 11, 14, 15], "barplot_mother_tongu": 7, "refin": [7, 10], "quotat": [7, 10], "modif": [7, 16], "tackl": 7, "rotat": 7, "swap": [7, 15], "barplot_mother_tongue_axi": 7, "forward": [7, 10, 11], "suit": [7, 15, 16], "reorder": 7, "ordered_barplot_mother_tongu": 7, "swampi": 7, "elsewher": [7, 10], "moos": 7, "northern": 7, "east": 7, "southern": 7, "comment": [7, 14], "hash": [7, 14], "importantli": 7, "self": [7, 10], "habit": [7, 11], "highli": [7, 14], "got": 7, "tast": 7, "ten_lang_plot": 7, "nobodi": 7, "pull": [7, 10, 13], "forgotten": [7, 14], "pop": [7, 8, 10], "slowli": 7, "adept": 7, "remind": [7, 16], "lab": [7, 13], "lookup": 7, "concis": 7, "press": [7, 8], "tab": [7, 8, 10, 13, 14], "bring": [7, 10], "typo": 7, "hold": [7, 10, 15, 16], "dialogu": 7, "dialog": [7, 14], "contextu": 7, "gvr01": 7, "coghlan": 7, "barri": [7, 16], "warsaw": 7, "style": [7, 10], "0008": 7, "lp15": 7, "jeffrei": [7, 15], "347": 7, "6228": 7, "1314": 7, "1315": 7, "pm15": 7, "elizabeth": 7, "art": [7, 15], "anyon": [7, 8, 10, 14], "skybrud": 7, "consult": [7, 10, 14], "llc": 7, "bookdown": 7, "rdpeng": 7, "artofdatasci": 7, "tim20": [7, 15], "ttimber": [7, 10, 15], "wal17": 7, "anada": 7, "canadiangeograph": 7, "wil18": 7, "kori": 7, "bccampu": 7, "opentextbc": 7, "indigenizationfound": 7, "statisticscanada16a": 7, "www12": 7, "statcan": 7, "gc": 7, "recens": 7, "dp": 7, "eng": 7, "cfm": 7, "statisticscanada16b": 7, "borigin": 7, "irst": 7, "ation": 7, "\u00e9ti": 7, "nuit": 7, "sa": 7, "2016022": 7, "x2016022": 7, "statisticscanada18": 7, "evolut": 7, "1901": 7, "www150": 7, "n1": 7, "pub": 7, "630": 7, "x2018001": 7, "htm": 7, "thepdteam20": 7, "dev": 7, "februari": 7, "doi": [7, 15], "5281": 7, "zenodo": 7, "3509134": 7, "trutharcocanada12": 7, "public": [7, 14], "govern": 7, "servic": [7, 10, 14], "trutharcocanada15": 7, "ction": 7, "www2": 7, "gov": [7, 10, 15], "asset": 7, "columbian": 7, "calls_to_action_english2": 7, "pdf": [7, 15], "wesmckinney10": 7, "ata": 7, "tructur": 7, "tatist": 7, "omput": 7, "p": [7, 10, 13], "ython": 7, "t\u00e9fan": 7, "der": 7, "arrod": 7, "illman": 7, "roceed": 7, "9th": 7, "cienc": 7, "onfer": 7, "25080": 7, "majora": 7, "92bf1922": 7, "00a": 7, "interleav": 8, "narrat": 8, "platform": [8, 14], "interfac": [8, 10, 13, 14], "dress": 8, "morn": 8, "configur": [8, 9, 13, 14], "mix": [8, 16], "formatt": 8, "artifact": 8, "analyz": [8, 9, 10, 16], "realiti": [8, 12], "consciou": [8, 14], "screenshot": 8, "easiest": [8, 13], "jupyterhub": [8, 14], "provis": 8, "authent": [8, 14], "gain": [8, 10], "instructor": [8, 9], "refer": 8, "independ": [8, 9, 15], "entireti": 8, "activ": [8, 10], "cursor": 8, "rectangl": [8, 15], "toolbar": [8, 10], "keyboard": [8, 14], "enter": [8, 10, 13, 14, 15], "arrow": [8, 14], "restart": [8, 13], "bar": [8, 10, 12, 13], "slight": [8, 11], "session": [8, 13, 14], "delet": [8, 13, 14], "emul": 8, "window": [8, 10], "statu": 8, "idl": 8, "busi": 8, "excess": 8, "unrespons": 8, "lose": 8, "connect": [8, 10, 12, 13, 14, 15], "interrupt": 8, "paus": 8, "server": [8, 10, 14], "hub": 8, "panel": 8, "shut": [8, 13], "rich": [8, 14], "bold": 8, "italic": 8, "bullet": [8, 10], "eventu": [8, 10, 15], "unformat": 8, "unrend": 8, "box": [8, 11, 12, 13, 14], "progress": [8, 13], "autosav": 8, "disk": [8, 10], "icon": [8, 10, 13, 14], "mac": 8, "arbitrari": [8, 15], "downsid": [8, 13], "nonlinear": [8, 12, 15], "deliber": [8, 14], "referenc": 8, "unconvent": 8, "fail": 8, "nonfunct": 8, "scenario": [8, 10], "event": [8, 14], "guard": 8, "awar": [8, 14], "sooner": 8, "linearli": [8, 12], "suffici": [8, 15], "extern": [8, 14], "heavili": 8, "loc": [8, 15], "package_nam": 8, "pn": 8, "librari": [8, 15], "hidden": [8, 10], "ipynb": [8, 10, 14], "shareabl": 8, "firefox": 8, "safari": 8, "chrome": 8, "edg": 8, "adob": 8, "acrobat": 8, "benefit": [8, 10, 14, 16], "standalon": 8, "font": [8, 10, 15], "launcher": 8, "visibl": [8, 14, 15], "untitl": 8, "white": 8, "troublesom": [8, 14], "repetit": 8, "dash": [8, 15], "jupyterlab": 8, "keen": 8, "commonmark": 8, "cheatsheet": 8, "audit": 9, "friend": 9, "colleagu": 9, "histori": [9, 14], "chapter": 9, "spend": [9, 10, 11, 16], "restructur": 9, "usabl": 9, "coher": 9, "variou": [10, 13, 16], "laptop": [10, 14], "gatewai": 10, "unless": [10, 13, 15], "upfront": [10, 16], "devot": 10, "shoelac": 10, "trip": 10, "skiprow": 10, "ibi": 10, "list_tabl": 10, "to_csv": 10, "astronomi": 10, "pictur": [10, 15], "request": [10, 16], "internet": [10, 13], "remot": 10, "directori": [10, 13, 14, 15], "filesystem": 10, "folder": [10, 13, 14], "worksheet_02": 10, "happiness_report": 10, "slash": [10, 16], "proce": [10, 13, 14, 16], "happy_data": 10, "bike_shar": 10, "tutorial_01": 10, "silli": [10, 12], "redund": [10, 15], "whew": 10, "bonu": 10, "fatima": 10, "jayden": 10, "usernam": [10, 14], "link": [10, 13, 14], "video": [10, 13], "omma": 10, "epar": 10, "v": [10, 13], "alu": 10, "aren": [10, 15, 16], "canadian": [10, 16], "canlang_data": 10, "oftentim": [10, 16], "sentenc": 10, "paragraph": [10, 15], "scientist": 10, "distribut": [10, 14, 15], "permiss": [10, 14], "21930": 10, "parsererror": 10, "messag": [10, 13, 14, 15, 16], "wasn": [10, 15], "can_lang_meta": 10, "token": 10, "didn": [10, 16], "tsv": 10, "escap": 10, "backslash": 10, "can_lang_no_nam": 10, "curli": [10, 16], "brace": 10, "col_map": 10, "canlang_data_renam": 10, "immedi": [10, 12], "u": [10, 13, 15], "niform": 10, "esourc": 10, "ocat": 10, "raw": [10, 13, 15, 16], "githubusercont": [10, 13], "datasci": 10, "whichev": 10, "xlsx": 10, "snippet": [10, 14], "_rel": 10, "j1": 10, "w8": 10, "qrj": 10, "tf": 10, "wz": 10, "hlio": 10, "8f": 10, "3wn": 10, "ed2": 10, "gz": 10, "_r": 10, "yg": 10, "tuee": 10, "6q": 10, "rzy": 10, "l60": 10, "xtp": 10, "4vt": 10, "jq": 10, "sheet_nam": 10, "sad": 10, "usecol": 10, "beforehand": 10, "libr": 10, "offic": 10, "semicolon": 10, "decim": [10, 15, 16], "european": 10, "countri": 10, "storag": 10, "user": [10, 13, 14], "manag": [10, 13, 14], "mysql": 10, "oracl": 10, "sql": 10, "simplest": [10, 15], "db": 10, "backend": 10, "send": [10, 14], "sqlalchemi": 10, "matur": 10, "deeper": 10, "friendlier": 10, "conn": 10, "retriev": [10, 11, 14, 16], "secretli": 10, "behind": [10, 14, 15], "scene": [10, 14], "canlang_t": 10, "databaset": 10, "r0": 10, "countstar": 10, "haven": [10, 13], "sent": [10, 14], "effici": [10, 12, 14, 15], "lazi": 10, "compil": 10, "str": 10, "AS": 10, "nfrom": 10, "t0": 10, "arab": 10, "419890": 10, "223535": 10, "5585": 10, "629055": 10, "mostli": [10, 14, 15, 16], "canlang_table_filt": 10, "predic": 10, "canlang_table_select": 10, "r1": 10, "aboriginal_lang_data": 10, "attributeerror": 10, "traceback": 10, "recent": [10, 13, 14], "conda": [10, 13], "lib": 10, "python3": 10, "expr": 10, "py": [10, 13, 16], "645": 10, "__getattr__": 10, "641": 10, "hint": 10, "common_typo": 10, "642": [10, 12], "rais": [10, 15], "643": 10, "__name__": 10, "644": 10, "tahltan": 10, "crash": 10, "postgr": 10, "client": [10, 11], "host": [10, 13, 14], "localhost": 10, "port": [10, 13], "endpoint": 10, "5432": 10, "password": [10, 14], "can_mov_db": 10, "movi": 10, "fakeserv": 10, "stat": 10, "user0001": 10, "abc123": 10, "theme": [10, 15], "medium": [10, 14], "title_alias": 10, "episod": 10, "names_occup": 10, "occup": 10, "rate": 10, "ratings_t": 10, "alchemyt": 10, "average_r": 10, "num_vot": 10, "avg_rat": 10, "order_bi": 10, "backup": 10, "integr": 10, "secur": [10, 14], "simultan": [10, 14, 16], "conflict": 10, "billion": 10, "daili": 10, "chao": 10, "ensu": 10, "no_official_lang_data": 10, "no_official_languag": 10, "magic": 10, "uncommon": 10, "pplicat": 10, "rogram": 10, "nterfac": 10, "secret": [10, 14], "somewhat": [10, 12], "thought": [10, 12, 16], "painstak": 10, "gather": [10, 15], "yper": 10, "ext": 10, "arkup": 10, "anguag": 10, "ascad": 10, "tyle": 10, "heet": 10, "webpag": [10, 14], "wherea": [10, 12, 16], "element": [10, 15, 16], "layout": [10, 15], "subsect": 10, "richardson": 10, "2007": 10, "reitz": 10, "2023": 10, "foot": [10, 11, 12], "craiglist": 10, "craigslist": 10, "advertis": [10, 11, 12], "span": 10, "meta": 10, "hous": [10, 11, 12], "1br": 10, "hood": 10, "13768": 10, "108th": 10, "avenu": 10, "maptag": 10, "pid": 10, "6786042973": 10, "banish": 10, "trash": [10, 13], "hide": [10, 15], "post": [10, 14], "unbanish": 10, "href": 10, "restor": 10, "2285": 10, "oof": 10, "date": [10, 14, 15], "keyword": [10, 16], "grab": 10, "complex": [10, 12, 14, 15], "selectorgadget": 10, "cc": 10, "deselect": 10, "pic": 10, "footag": 10, "gadget": 10, "robot": 10, "txt": [10, 14], "cl": 10, "spider": 10, "script": 10, "scraper": 10, "crawler": 10, "explicit": [10, 16], "realist": 10, "disallow": 10, "td": 10, "nth": 10, "child": [10, 12], "largestc": 10, "target": 10, "bs4": 10, "wiki": 10, "en": 10, "parser": 10, "population_nod": 10, "slice": [10, 15, 16], "clariti": [10, 15], "greater_toronto_area": 10, "202": 10, "london": [10, 16], "_ontario": 10, "ontario": 10, "543": 10, "551": 10, "greater_montr": 10, "montreal": [10, 16], "node": 10, "rid": 10, "get_text": 10, "fantast": 10, "albeit": 10, "canada_wiki_t": 10, "metropolitan": [10, 16], "droplevel": 10, "canada_wiki_df": 10, "rank": 10, "unnam": 10, "8_level_1": 10, "9_level_1": 10, "6202225": 10, "543551": 10, "quebec": 10, "4291732": 10, "halifax": [10, 16], "nova": 10, "scotia": 10, "465703": 10, "2642825": 10, "st": [10, 16], "catharin": [10, 16], "niagara": [10, 16], "433604": 10, "ottawa": [10, 16], "gatineau": [10, 16], "1488307": 10, "windsor": [10, 16], "422630": 10, "calgari": [10, 16], "1481806": 10, "oshawa": 10, "415311": 10, "edmonton": [10, 16], "1418118": 10, "victoria": [10, 15, 16], "397237": 10, "839311": 10, "saskatoon": 10, "saskatchewan": 10, "317480": 10, "winnipeg": [10, 16], "manitoba": 10, "834678": 10, "regina": [10, 16], "249217": 10, "hamilton": 10, "785184": 10, "sherbrook": 10, "227398": 10, "kitchen": [10, 16], "cambridg": [10, 16], "waterloo": [10, 16], "575847": 10, "kelowna": [10, 16], "222162": 10, "desktop": 10, "stun": 10, "rho": 10, "ophiuchi": 10, "juli": 10, "webb": 10, "telescop": 10, "nircam": 10, "molecular": [10, 15], "signup": 10, "safe": [10, 14], "transfer": [10, 11], "infinit": 10, "bandwidth": 10, "frequent": [10, 14], "success": [10, 14], "bog": 10, "revok": 10, "grant": 10, "quota": 10, "overrun": 10, "abid": 10, "hourli": 10, "hour": [10, 11], "planetari": 10, "apod": 10, "api_kei": 10, "your_api_kei": 10, "07": [10, 15], "explan": [10, 15], "mere": 10, "390": 10, "light": 10, "sun": [10, 15], "star": 10, "planet": 10, "peer": 10, "natal": 10, "infrar": 10, "spectacular": 10, "cosmic": 10, "snapshot": [10, 13, 14], "celebr": 10, "young": 10, "brighter": 10, "clearli": [10, 15], "sport": 10, "diffract": 10, "spike": 10, "jet": 10, "shock": 10, "hydrogen": 10, "blast": 10, "newborn": 10, "yellowish": 10, "dusti": 10, "caviti": 10, "carv": 10, "energet": 10, "Near": 10, "shadow": 10, "cast": 10, "protoplanetari": 10, "hdurl": 10, "2307": 10, "stsci": 10, "01_rhooph": 10, "png": [10, 15], "media_typ": 10, "service_vers": 10, "v1": 10, "01_rhooph1024": 10, "neat": 10, "json": 10, "javascript": 10, "notat": [10, 16], "nasa_data_singl": 10, "start_dat": 10, "end_dat": 10, "nasa_data": 10, "74": [10, 15], "copyright": 10, "data_dict": 10, "nasa_df": 10, "carina": 10, "nebula": 10, "ncarlo": 10, "taylor": 10, "2305": 10, "carnorth": 10, "02": [10, 15], "flat": [10, 11, 12, 15], "rock": 10, "mar": 10, "nnasa": 10, "njpl": 10, "caltech": 10, "nmsss": 10, "nprocess": 10, "ne": 10, "flatmar": 10, "03": [10, 15, 16], "centauru": 10, "peculiar": 10, "island": 10, "nmarco": 10, "lorenzi": 10, "nangu": 10, "lau": 10, "tommi": 10, "tse": 10, "ntex": 10, "ngc5128_": 10, "galaxi": 10, "famou": 10, "hole": 10, "pia23122": 10, "shackleton": 10, "shadowcam": 10, "shacklet": 10, "69": 10, "doom": 10, "eta": 10, "nesa": 10, "nhubbl": 10, "nlice": 10, "etacarin": 10, "dust": 10, "ngc": 10, "6559": 10, "nadam": 10, "ntelescop": 10, "ngc6559_": 10, "sunspot": 10, "spot": 10, "72": 10, "ring": 10, "spiral": 10, "1398": 10, "ngc1398_": 10, "73": [10, 15], "readili": 10, "heart": 10, "awesom": 10, "udac": 10, "linux": [10, 13], "rthepsfoundation23": 10, "kenneth": 10, "readthedoc": 10, "latest": [10, 13, 14, 16], "ric07": 10, "leonard": 10, "beauti": 10, "soup": 10, "april": [10, 15], "nasaesacsa": 10, "esa": 10, "csa": 10, "pontoppidan": 10, "pagan": 10, "esawebb": 10, "weic2316a": 10, "realtsproject21": 10, "internetlivestat": 10, "faster": [11, 15], "rmspe": [11, 12], "person": [11, 12, 15], "week": 11, "annual": 11, "boston": 11, "marathon": 11, "sale": [11, 12], "spline": 11, "heurist": 11, "932": 11, "estat": [11, 12], "sacramento": [11, 12], "bee": 11, "newspap": 11, "realtor": 11, "zip": [11, 13, 14], "sqft": [11, 12], "latitud": 11, "longitud": 11, "z95838": 11, "836": [11, 16], "59222": 11, "631913": 11, "434879": 11, "z95823": 11, "1167": 11, "68212": 11, "478902": 11, "431028": 11, "z95815": 11, "796": 11, "68880": 11, "618305": 11, "443839": 11, "852": 11, "69307": 11, "616835": 11, "439146": 11, "z95824": 11, "797": 11, "81900": 11, "519470": 11, "435768": 11, "927": 11, "z95829": 11, "2280": 11, "232425": 11, "457679": 11, "359620": 11, "928": [11, 16], "1477": 11, "234000": 11, "499893": 11, "458890": 11, "929": 11, "citrus_height": 11, "z95610": 11, "1216": 11, "235000": 11, "708824": 11, "256803": 11, "930": [11, 15], "elk_grov": 11, "z95758": 11, "1685": 11, "235301": 11, "417000": 11, "397424": 11, "931": 11, "el_dorado_hil": 11, "z95762": 11, "1362": 11, "235738": 11, "655245": 11, "075915": 11, "livabl": 11, "feet": [11, 12], "usd": [11, 12], "unit": [11, 12, 15, 16], "front": [11, 15], "0f": [11, 12], "sold": [11, 12], "former": 11, "dive": 11, "subsampl": 11, "small_sacramento": 11, "pai": 11, "absent": 11, "small_plot": 11, "overlai": 11, "line_df": 11, "2000": 11, "mark_rul": [11, 15], "strokedash": [11, 15], "dist": 11, "nearest_neighbor": 11, "298": 11, "1900": 11, "361745": 11, "487409": 11, "461413": 11, "718": 11, "antelop": 11, "z95843": 11, "2160": 11, "290000": 11, "704554": 11, "354753": 11, "rosevil": 11, "z95678": 11, "1744": 11, "326951": 11, "771917": 11, "304439": 11, "256": 11, "252": 11, "z95835": 11, "1718": 11, "250000": 11, "676658": 11, "528128": 11, "282": 11, "rancho_cordova": 11, "z95670": 11, "1671": 11, "175000": 11, "591477": 11, "315340": 11, "329": 11, "280739": 11, "280": [11, 15, 16], "739": 11, "unansw": 11, "abil": [11, 14, 15, 16], "lock": [11, 12], "sacramento_train": [11, 12], "sacramento_test": [11, 12], "limits_": 11, "y_i": 11, "hat": 11, "_i": 11, "th": 11, "forecast": 11, "overshoot": 11, "undershoot": 11, "rmse": [11, 12], "equat": [11, 12], "kneighborsregressor": [11, 12], "neg_root_mean_squared_error": 11, "kneighborsregressor__n_neighbor": 11, "sacr_pipelin": 11, "sacr_preprocessor": 11, "201": 11, "sacr_gridsearch": 11, "sacr_result": 11, "param_kneighborsregressor__n_neighbor": 11, "117365": 11, "988307": 11, "2715": 11, "383001": 11, "93956": 11, "523683": 11, "2466": 11, "200227": 11, "89859": 11, "401722": 11, "2739": 11, "713448": 11, "87893": 11, "534919": 11, "2958": 11, "587153": 11, "86444": 11, "413831": 11, "3383": 11, "712997": 11, "92909": 11, "550051": 11, "2562": 11, "784826": 11, "93137": 11, "289780": 11, "2511": 11, "564001": 11, "93395": 11, "588763": 11, "2492": 11, "272799": 11, "93671": 11, "588088": 11, "2473": 11, "312705": 11, "199": 11, "93986": 11, "752272": 11, "048651": 11, "moment": [11, 16], "nonneg": 11, "neg_": 11, "convolut": 11, "alright": [11, 15], "101": [11, 16], "minimum": [11, 12, 16], "699": 11, "perfectli": [11, 14, 15], "datapoint": 11, "inflex": 11, "idiosyncrat": 11, "unseen": [11, 12], "mean_squared_error": [11, 12], "87498": 11, "86808211416": 11, "499": 11, "578": 11, "neglig": 11, "buyer": 11, "afford": 11, "maximum": [11, 12, 16], "5000": 11, "superimpos": [11, 12], "qualit": [11, 12], "opportun": 11, "sqft_prediction_grid": [11, 12], "arang": 11, "base_plot": 11, "sacr_preds_plot": [11, 12], "best_k_sacr": 11, "ff7f0e": [11, 12], "concern": [11, 12], "incorpor": [11, 16], "plot_b": 11, "moreov": 11, "85156": 11, "027067": 11, "3376": 11, "143313": 11, "rmspe_mult": 11, "85083": 11, "2902421959": 11, "083": 11, "overlaid": [11, 12], "2d": 11, "newli": [11, 14], "character": 12, "conclud": 12, "train": 12, "slower": 12, "confusingli": 12, "undervalu": 12, "beta_0": 12, "beta_1": 12, "cdot": 12, "intercept": [12, 15], "coeffici": 12, "parametr": 12, "push": 12, "happili": 12, "crazi": 12, "shouldn": 12, "600": [12, 15], "276": 12, "027": 12, "plausibl": 12, "linearregress": 12, "linear_model": 12, "coef_": 12, "intercept_": 12, "lm": 12, "285652": 12, "15642": 12, "309105": 12, "hurt": 12, "afterward": [12, 16], "85376": 12, "59691629931": 12, "377": [12, 15], "tricki": [12, 13], "all_point": 12, "wiggli": 12, "curv": [12, 15], "oscil": [12, 15], "Such": 12, "fare": 12, "extrapol": 12, "obvious": 12, "mlm": 12, "linearregressionlinearregress": 12, "lm_mult_test_rmsp": 12, "82331": 12, "04630202598": 12, "331": 12, "hallmark": 12, "59235377": 12, "20333": 12, "43213798": 12, "53180": 12, "26906624224": 12, "beta_2": 12, "hyperplan": 12, "333": [12, 15], "tune": [12, 15], "collinear": 12, "judg": 12, "unbeknownst": 12, "analyst": 12, "parent": 12, "absurdli": 12, "nevertheless": [12, 15], "subtl": [12, 16], "inaccur": 12, "ever": [12, 14, 16], "238": 12, "ft": 12, "041": 12, "166": 12, "539": 12, "ic": 12, "cream": 12, "flavor": [12, 15], "remark": 12, "homeown": 12, "df": [12, 16], "fulli": [12, 15], "5994": 12, "288853": 12, "1688": 12, "092090": 12, "9859": 12, "021194": 12, "9160": 12, "812375": 12, "6400": 12, "212624": 12, "7341": 12, "333609": 12, "8434": 12, "656970": 12, "3329": 12, "106273": 12, "7170": 12, "311442": 12, "7895": 12, "567003": 12, "cubic": 12, "z": 12, "strong": [12, 15], "magnitud": [12, 15], "leap": 12, "stone": 12, "enjoi": 12, "ventura": 13, "22": [13, 14, 15], "cpu": 13, "english": [13, 15, 16], "virtual": 13, "git": [13, 14], "rightmost": 13, "compress": [13, 15], "unzip": 13, "autograd": 13, "pre": 13, "isol": 13, "interf": 13, "ex": 13, "wizard": 13, "wsl": 13, "hyper": 13, "prompt": [13, 14], "cmd": 13, "admin": 13, "administr": 13, "log": [13, 14, 15], "bio": 13, "hotkei": 13, "esc": 13, "reboot": 13, "familiar": 13, "ubcdsci": 13, "proceed": [13, 16], "dockerfil": 13, "besid": [13, 14], "expand": [13, 14, 16], "textbox": 13, "8888": 13, "volum": 13, "path": [13, 15, 16], "jovyan": 13, "scroll": [13, 14], "127": 13, "troubleshoot": 13, "tip": 13, "dmg": 13, "intel": 13, "processor": 13, "older": 13, "appl": 13, "newer": 13, "drag": [13, 14], "sudo": 13, "certif": 13, "curl": 13, "gnupg": 13, "fssl": 13, "sh": 13, "chmod": 13, "rm": 13, "pwd": 13, "homepag": 13, "bundl": 13, "kernel": 13, "pip": 13, "upgrad": 13, "env": 13, "intro": 13, "yml": 13, "compat": 13, "xcode": 13, "x64": 13, "arm64": 13, "debian": 13, "deb": 13, "dpkg": 13, "jlab": 13, "me": 14, "ago": 14, "holder": 14, "lifespan": 14, "resolv": 14, "revis": 14, "mess": [14, 15], "repercuss": 14, "boggl": 14, "unclear": 14, "document_final_draft_fin": 14, "to_hand_in_final_v2": 14, "polish": 14, "lack": 14, "springboard": 14, "fruit": 14, "revert": 14, "Being": 14, "facilit": 14, "todai": [14, 15], "safeti": 14, "workspac": 14, "schemat": 14, "maintain": 14, "told": 14, "metadata": 14, "brief": 14, "narr": 14, "readm": 14, "md": 14, "draft": 14, "shorten": 14, "daa29d6": 14, "884c7ce": 14, "prerequisit": 14, "stage": 14, "physic": [14, 15], "placehold": 14, "synchron": 14, "eas": 14, "templat": 14, "canadian_languag": 14, "hyphen": 14, "privaci": 14, "happi": 14, "green": [14, 16], "respositori": 14, "reserv": 14, "upload": [14, 15], "toggl": 14, "markdown": 14, "archiv": 14, "defeat": 14, "prove": 14, "beginn": 14, "grain": 14, "expiri": 14, "creation": 14, "absolut": [14, 15], "tick": [14, 15], "repo": 14, "fret": 14, "eda": 14, "flag": 14, "pane": 14, "plu": 14, "untrack": 14, "checkpoint": 14, "state": [14, 15], "datetim": [14, 15], "stamp": 14, "ok": 14, "credenti": 14, "author": 14, "33": [14, 15, 16], "dismiss": 14, "invit": 14, "collaborators_github_user_nam": 14, "refresh": 14, "blend": [14, 15], "offend": 14, "preced": 14, "histor": 14, "float": [14, 16], "app": 14, "convers": [14, 15, 16], "subtop": 14, "persist": 14, "thread": 14, "searchabl": 14, "notif": 14, "repli": 14, "submit": [14, 15], "submiss": 14, "youtub": 14, "advic": 14, "gitlab": 14, "bitbucket": 14, "wbc": 14, "jennif": 14, "bryan": 14, "karen": 14, "cranston": 14, "justin": 14, "kitz": 14, "lex": 14, "nederbragt": 14, "traci": 14, "teal": 14, "subplot": 15, "raster": 15, "svg": 15, "distract": 15, "poster": 15, "wilk": 15, "oft": 15, "pie": 15, "static": 15, "math": 15, "cognit": 15, "mental": 15, "plainli": 15, "legend": 15, "scheme": 15, "surprisingli": 15, "sex": 15, "ancestri": 15, "deeb": 15, "2005": 15, "blind": 15, "reinforc": 15, "sparingli": 15, "detract": 15, "wari": 15, "overplot": 15, "overlap": 15, "zoom": 15, "vegafus": 15, "data_transform": 15, "curat": 15, "pieter": 15, "tan": 15, "noaa": 15, "gml": 15, "ralph": 15, "keel": 15, "scripp": 15, "oceanographi": 15, "dioxid": 15, "hawaii": 15, "1959": 15, "1980": 15, "co2_df": 15, "mauna_loa_data": 15, "parse_d": 15, "date_measur": 15, "ppm": 15, "338": 15, "340": 15, "341": 15, "06": [15, 16], "479": 15, "414": 15, "480": 15, "481": 15, "416": 15, "482": [15, 16], "483": 15, "484": 15, "datetime64": 15, "ns": 15, "iso": 15, "8601": 15, "alphanumer": 15, "mark_": 15, "leverag": 15, "helper": 15, "co2_scatt": 15, "upward": 15, "affirm": 15, "predecessor": 15, "successor": 15, "alter": 15, "segment": 15, "emphas": 15, "co2_lin": 15, "aha": 15, "phenomenon": 15, "fast": 15, "muddl": 15, "settl": 15, "configure_axi": 15, "titlefonts": 15, "co2_line_label": 15, "co2": 15, "configure_": 15, "1990": 15, "clip": 15, "stack": [15, 16], "co2_line_scal": 15, "late": 15, "season": 15, "summer": 15, "octob": 15, "winter": 15, "novemb": 15, "analog": 15, "paint": 15, "blank": 15, "canva": 15, "primer": 15, "akin": 15, "sketch": 15, "durat": 15, "geyser": 15, "yellowston": 15, "nation": 15, "wyom": 15, "79": 15, "283": 15, "533": 15, "267": 15, "117": [15, 16], "268": [15, 16], "270": 15, "817": 15, "271": 15, "467": 15, "272": 15, "faithful_scatt": 15, "faithful_scatter_label": 15, "faithful_scatter_labels_black": 15, "whom": 15, "hollow": 15, "can_lang_plot": 15, "vs": 15, "can_lang_plot_label": 15, "bunch": 15, "clump": 15, "french": [15, 16], "460": 15, "850": 15, "19460850": 15, "22162865": 15, "15265335": 15, "29748265": 15, "59": [15, 16], "7166700": 15, "6943800": 15, "3825215": 15, "10242945": 15, "logarithm": 15, "squish": 15, "log_": 15, "log10": 15, "inf": 15, "can_lang_plot_log": 15, "gridlin": 15, "seven": 15, "can_lang_plot_log_revis": 15, "tickcount": 15, "kilo": 15, "mutat": 15, "most_at_home_perc": 15, "001678": 15, "000669": 15, "029188": 15, "013612": 15, "003272": 15, "001266": 15, "038291": 15, "017026": 15, "076511": 15, "037367": 15, "011351": 15, "003940": 15, "005234": 15, "002276": 15, "036741": 15, "021763": 15, "038561": 15, "020155": 15, "025831": 15, "007439": 15, "can_lang_plot_perc": 15, "meaningfulli": 15, "onto": 15, "belong": [15, 16], "can_lang_plot_categori": 15, "laid": 15, "can_lang_plot_legend": 15, "orient": 15, "tableau10": 15, "vision": 15, "unsur": 15, "dark2": 15, "aesthet": 15, "switch": 15, "can_lang_plot_them": 15, "demand": 15, "tooltip": 15, "hover": 15, "mous": 15, "pointer": 15, "can_lang_plot_tooltip": 15, "mile": 15, "mcneil": 15, "contin": 15, "south": 15, "africa": 15, "europ": 15, "asia": 15, "australia": 15, "islands_df": 15, "landmass_typ": 15, "11506": 15, "5500": 15, "16988": 15, "2968": 15, "axel": 15, "heiberg": 15, "baffin": 15, "184": 15, "bank": 15, "borneo": 15, "britain": 15, "celeb": 15, "celon": 15, "cuba": 15, "devon": 15, "ellesmer": 15, "3745": 15, "greenland": 15, "840": 15, "hainan": 15, "hispaniola": 15, "hokkaido": 15, "honshu": 15, "iceland": 15, "ireland": 15, "java": 15, "kyushu": 15, "luzon": 15, "madagascar": 15, "227": 15, "melvil": 15, "mindanao": 15, "molucca": 15, "guinea": 15, "306": 15, "zealand": 15, "newfoundland": 15, "9390": 15, "novaya": 15, "zemlya": 15, "princ": 15, "wale": 15, "sakhalin": 15, "6795": 15, "southampton": 15, "spitsbergen": 15, "sumatra": 15, "183": 15, "taiwan": 15, "tasmania": 15, "tierra": 15, "fuego": 15, "timor": 15, "islands_bar": 15, "nlargest": 15, "tilt": 15, "sort_valu": 15, "islands_top12": 15, "islands_bar_top": 15, "appeal": 15, "minu": 15, "revers": 15, "caption": 15, "slide": 15, "summari": 15, "twelv": 15, "islands_plot_sort": 15, "morlei": 15, "1882": 15, "299": 15, "792": 15, "458": 15, "km": 15, "sec": 15, "kilometr": 15, "morley_df": 15, "expt": 15, "740": 15, "900": 15, "1070": [15, 16], "940": 15, "950": 15, "810": 15, "870": 15, "experiment": 15, "fell": 15, "morley_bar": 15, "thin": 15, "bucket": 15, "morley_hist": 15, "datum": 15, "thick": 15, "v_line": 15, "morley_hist_lin": 15, "morley_hist_color": 15, "sit": 15, "transluc": 15, "morley_hist_categor": 15, "deriv": 15, "incorrect": 15, "clearest": 15, "morley_hist_facet": 15, "1050": 15, "foremost": 15, "subtli": 15, "speed_of_light": 15, "299792": 15, "relativeerror": 15, "299000": 15, "019194": 15, "017498": 15, "035872": 15, "092578": 15, "045879": 15, "049215": 15, "052550": 15, "002516": 15, "005851": 15, "025865": 15, "morley_hist_rel": 15, "recreat": 15, "admir": 15, "morley_hist_maxbin": 15, "width": 15, "motiv": 15, "establish": 15, "pose": 15, "wiggl": 15, "discern": 15, "parenthes": [15, 16], "energi": 15, "automot": 15, "plant": 15, "burn": [15, 16], "fossil": 15, "fuel": 15, "greenhous": 15, "gase": 15, "byproduct": 15, "trap": 15, "heat": 15, "warm": 15, "observatori": 15, "amplitud": 15, "growth": 15, "1800": 15, "kilomet": 15, "farthest": 15, "confer": 15, "shop": 15, "billboard": 15, "pixel": 15, "lossi": 15, "lossless": 15, "jpeg": 15, "jpg": 15, "photograph": 15, "bmp": 15, "tiff": 15, "tif": 15, "gimp": 15, "redraw": 15, "ep": 15, "inkscap": 15, "shrink": 15, "portabl": 15, "hardl": 15, "1991": 15, "filenam": 15, "img": 15, "viz": 15, "faithful_plot": 15, "mb": 15, "decent": 15, "bigger": 15, "dee05": 15, "sameer": 15, "clinic": 15, "369": 15, "har91": 15, "wolfgang": 15, "york": 15, "mcn77": 15, "donald": 15, "mic82": 15, "veloc": 15, "nite": 15, "tate": 15, "aval": 15, "cademi": 15, "nnapoli": 15, "astronom": 15, "tk20": 15, "ccgg": 15, "vgh": 15, "jacob": 15, "granger": 15, "heer": 15, "dominik": 15, "moritz": 15, "kanit": 15, "wongsuphasawat": 15, "arvind": 15, "satyanarayan": 15, "eitan": 15, "ilia": 15, "timofeev": 15, "ben": 15, "welsh": 15, "scott": 15, "sievert": 15, "journal": [15, 16], "1057": 15, "21105": 15, "joss": 15, "01057": 15, "wil19": 15, "clau": 15, "clauswilk": 15, "dataviz": 15, "util": 16, "entiti": 16, "tabular": 16, "2235145": 16, "abbrevi": 16, "int": 16, "14159": 16, "boolean": 16, "bool": 16, "hello": 16, "nonetyp": 16, "arithmet": 16, "dict": 16, "cities_seri": 16, "separt": 16, "population_in_2016": 16, "1027613": 16, "1823281": 16, "544870": 16, "571146": 16, "321484": 16, "upcom": 16, "population_in_2016_df": 16, "criteria": 16, "wickham": 16, "No": 16, "bespok": 16, "untidi": 16, "2006": 16, "2011": 16, "land": 16, "region_lang_top5_cities_wid": 16, "cite": 16, "montr\u00e9al": 16, "lang_wid": 16, "985": 16, "1435": 16, "960": 16, "575": 16, "360": 16, "240": 16, "8485": 16, "1015": 16, "705": 16, "885": 16, "13260": 16, "2450": 16, "1090": 16, "1365": 16, "770": 16, "2440": 16, "5290": 16, "1025": 16, "380": 16, "3355": 16, "8960": 16, "3380": 16, "1430": 16, "tough": 16, "lang_mother_tidi": 16, "id_var": 16, "var_nam": 16, "value_nam": 16, "1065": 16, "1066": 16, "1067": 16, "1068": 16, "1069": 16, "met": 16, "commut": 16, "widen": 16, "region_lang_top5_cities_long": 16, "lang_long": 16, "2135": 16, "2136": 16, "2137": 16, "2138": 16, "2139": 16, "2140": 16, "lang_home_tidi": 16, "2495": 16, "1622735": 16, "1330555": 16, "8630": 16, "3245": 16, "behaviour": 16, "colum": 16, "messier": 16, "dealt": 16, "lang_messi": 16, "region_lang_top5_cities_messi": 16, "265": 16, "520": 16, "505": 16, "4045": 16, "440": 16, "330": 16, "6380": 16, "1445": 16, "530": 16, "620": 16, "3130": 16, "760": 16, "6665": 16, "860": 16, "1080": 16, "lang_messy_long": 16, "tidy_lang": 16, "astyp": 16, "depth": 16, "occas": 16, "official_lang": 16, "3836770": 16, "3218725": 16, "29800": 16, "11940": 16, "620510": 16, "412120": 16, "2669195": 16, "1607550": 16, "487": 16, "696": 16, "1065070": 16, "844740": 16, "701": 16, "910": 16, "1050410": 16, "792700": 16, "915": 16, "10950": 16, "2520": 16, "1060": 16, "ampersand": 16, "pipe": 16, "region_data": 16, "household": 16, "dwell": 16, "bellevil": 16, "43002": 16, "1354": 16, "65121": 16, "103472": 16, "45050": 16, "lethbridg": 16, "45696": 16, "3046": 16, "69699": 16, "117394": 16, "48317": 16, "thunder": 16, "bai": 16, "52545": 16, "2618": 16, "26318": 16, "121621": 16, "57146": 16, "peterborough": 16, "50533": 16, "1636": 16, "98336": 16, "121721": 16, "55662": 16, "saint": 16, "john": 16, "52872": 16, "3793": 16, "42158": 16, "126202": 16, "58398": 16, "535499": 16, "7168": 16, "96442": 16, "1323783": 16, "519693": 16, "5241": 16, "70103": 16, "1392609": 16, "960894": 16, "3040": 16, "41532": 16, "2463431": 16, "1727310": 16, "4638": 16, "24059": 16, "4098927": 16, "2135909": 16, "6269": 16, "93132": 16, "5928040": 16, "interst": 16, "city_nam": 16, "five_c": 16, "502143": 16, "9857": 16, "77908": 16, "1321426": 16, "537634": 16, "seriesa": 16, "seriesb": 16, "669": 16, "capabl": 16, "omit": 16, "startswith": 16, "darker": 16, "region_lang": 16, "moncton": 16, "saguenai": 16, "7485": 16, "7486": 16, "7487": 16, "abbotsford": 16, "mission": 16, "7488": 16, "7489": 16, "7490": 16, "23171710": 16, "std": 16, "490000e": 16, "093686e": 16, "401258e": 16, "000000e": 16, "836770e": 16, "25th": 16, "50th": 16, "75th": 16, "skipna": 16, "3061820": 16, "5600480": 16, "numeric_onli": 16, "3200": 16, "341121": 16, "3093": 16, "686248": 16, "1853": 16, "757677": 16, "5127": 16, "499332": 16, "55231": 16, "640268": 16, "64012": 16, "578320": 16, "48574": 16, "532066": 16, "94001": 16, "162338": 16, "cartoon": 16, "dataframegroupbi": 16, "0x7fbc92338f90": 16, "137445": 16, "182390": 16, "97840": 16, "brantford": 16, "124560": 16, "troi": 16, "rivi\u00e8r": 16, "149835": 16, "331375": 16, "270715": 16, "612595": 16, "23015": 16, "875": 16, "8235": 16, "2695": 16, "102": 16, "365": 16, "23565": 16, "104": 16, "11185": 16, "122100": 16, "93495": 16, "167835": 16, "168990": 16, "115125": 16, "193445": 16, "93655": 16, "54150": 16, "100855": 16, "116645": 16, "73910": 16, "130835": 16, "937055": 16, "1343335": 16, "147805": 16, "78610": 16, "149805": 16, "1316635": 16, "2289515": 16, "302690": 16, "211705": 16, "354470": 16, "235990": 16, "166220": 16, "318540": 16, "530570": 16, "437460": 16, "749285": 16, "keyerror": 16, "qu\u00e9bec": 16, "028571": 16, "region_lang_num": 16, "wise": 16, "040": 16, "aforement": 16, "english_lang": 16, "1898": 16, "444955": 16, "2500590": 16, "1903": 16, "1918": 16, "1919": 16, "930405": 16, "1275265": 16, "1923": 16, "city_pop": 16, "unchang": 16, "tmp": 16, "ipykernel_12": 16, "2654974267": 16, "settingwithcopywarn": 16, "row_index": 16, "col_index": 16, "pydata": 16, "doc": 16, "stabl": 16, "user_guid": 16, "warn": 16, "went": 16, "silenc": 16, "div": 16, "divis": 16, "108554": 16, "151384": 16, "100543": 16, "610060": 16, "516498": 16, "647224": 16, "542966": 16, "944744": 16, "672877": 16, "764802": 16, "606588": 16, "964617": 16, "704092": 16, "794906": 16, "599882": 16, "965067": 16, "534472": 16, "658730": 16, "540123": 16, "929401": 16, "city_popul": 16, "wic14": 16, "hadlei": 16}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"acknowledg": 0, "python": [0, 3, 4, 5, 6, 7, 8, 10, 12, 16], "edit": [0, 5, 8, 14], "about": 1, "author": 1, "classif": [2, 3], "i": [2, 11, 14], "train": [2, 3, 11], "predict": [2, 3], "overview": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "chapter": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "learn": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "object": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "The": [2, 3, 4, 8, 11, 12], "problem": [2, 11], "explor": [2, 7, 8, 11], "data": [2, 3, 5, 7, 8, 10, 11, 15, 16], "set": [2, 3, 7, 11, 13, 15], "load": [2, 7], "cancer": 2, "describ": 2, "variabl": [2, 3], "k": [2, 4, 11, 12], "nearest": [2, 11], "neighbor": [2, 11], "distanc": 2, "between": 2, "point": 2, "evalu": [2, 3, 11], "from": [2, 10, 14, 16], "new": [2, 8, 12], "observ": 2, "each": 2, "its": 2, "5": 2, "more": 2, "than": 2, "two": 2, "explanatori": 2, "summari": [2, 3, 6, 8, 10, 16], "algorithm": [2, 4], "scikit": [2, 3], "preprocess": [2, 3], "center": 2, "scale": 2, "balanc": 2, "miss": [2, 10], "put": [2, 7], "togeth": [2, 7], "pipelin": 2, "exercis": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "refer": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "ii": [3, 12], "tune": [3, 11], "perform": [3, 16], "an": [3, 4, 10, 15], "exampl": [3, 4], "confus": 3, "matrix": 3, "tumor": 3, "imag": 3, "random": [3, 4], "seed": 3, "creat": [3, 7, 8, 14, 15], "test": [3, 11], "split": [3, 16], "classifi": 3, "label": 3, "critic": 3, "analyz": 3, "cross": 3, "valid": 3, "paramet": 3, "valu": [3, 7, 10, 16], "select": [3, 7, 16], "under": 3, "overfit": [3, 11], "predictor": [3, 12], "effect": [3, 15], "irrelev": 3, "find": 3, "good": 3, "subset": [3, 7], "forward": 3, "addit": [3, 4, 6, 8, 10, 12, 14, 15, 16], "resourc": [3, 4, 6, 8, 10, 12, 14, 15, 16], "cluster": 4, "illustr": 4, "mean": [4, 6], "measur": 4, "qualiti": 4, "restart": 4, "choos": [4, 15], "scienc": 5, "A": 5, "first": 5, "introduct": 5, "welcom": 5, "statist": [6, 16], "infer": 6, "why": [6, 10, 14], "do": [6, 16], "we": [6, 10], "need": 6, "sampl": 6, "distribut": 6, "proport": 6, "bootstrap": 6, "us": [6, 7, 10, 14, 16], "calcul": [6, 16], "plausibl": 6, "rang": 6, "panda": 7, "canadian": [7, 15], "languag": [7, 15], "ask": 7, "question": 7, "type": [7, 16], "analysi": 7, "tabular": [7, 10], "name": [7, 10, 16], "thing": 7, "frame": [7, 16], "loc": [7, 16], "filter": [7, 16], "row": [7, 10, 16], "column": [7, 10, 16], "sort_valu": 7, "head": 7, "order": 7, "ad": [7, 15, 16], "modifi": [7, 16], "combin": [7, 8, 16], "step": 7, "chain": 7, "multilin": 7, "express": 7, "visual": [7, 15], "altair": [7, 15], "bar": [7, 15], "plot": [7, 15], "format": [7, 8, 15], "chart": [7, 15], "all": [7, 10], "access": [7, 8, 10, 14], "document": 7, "code": 8, "text": [8, 10, 15], "jupyt": [8, 14], "cell": 8, "execut": 8, "kernel": 8, "markdown": 8, "save": [8, 15], "your": [8, 13, 14], "work": [8, 13, 14], "best": 8, "practic": 8, "run": 8, "notebook": 8, "includ": 8, "packag": 8, "file": [8, 10, 14, 15], "export": 8, "differ": [8, 10, 15], "html": [8, 10], "pdf": 8, "prefac": 9, "read": 10, "local": [10, 14], "web": 10, "absolut": 10, "rel": 10, "path": 10, "plain": 10, "read_csv": 10, "comma": 10, "separ": [10, 16], "skip": 10, "when": [10, 15], "sep": 10, "argument": 10, "header": 10, "handl": [10, 14], "directli": 10, "url": 10, "preview": 10, "befor": 10, "microsoft": 10, "excel": 10, "read_excel": 10, "databas": 10, "sqlite": 10, "postgresql": 10, "should": [10, 14], "bother": 10, "write": 10, "csv": 10, "obtain": [10, 13], "scrape": 10, "css": 10, "selector": 10, "beautifulsoup": 10, "read_html": 10, "api": 10, "nasa": 10, "regress": [11, 12], "model": 11, "underfit": 11, "multivari": [11, 12], "nn": [11, 12], "strength": 11, "limit": 11, "linear": 12, "simpl": 12, "compar": 12, "multicollinear": 12, "outlier": 12, "design": 12, "other": 12, "side": 12, "up": [13, 16], "comput": 13, "worksheet": 13, "thi": [13, 16], "book": 13, "docker": 13, "window": 13, "maco": 13, "ubuntu": 13, "jupyterlab": 13, "desktop": 13, "collabor": 14, "version": 14, "control": 14, "what": [14, 16], "repositori": 14, "workflow": 14, "commit": 14, "chang": 14, "push": 14, "remot": 14, "pull": 14, "github": 14, "pen": 14, "tool": 14, "add": 14, "menu": 14, "gener": 14, "person": 14, "token": 14, "clone": 14, "specifi": 14, "make": 14, "give": 14, "project": 14, "merg": [14, 16], "conflict": 14, "commun": 14, "issu": 14, "refin": 15, "scatter": 15, "line": 15, "mauna": 15, "loa": 15, "co_": 15, "2": 15, "old": 15, "faith": 15, "erupt": 15, "time": 15, "axi": 15, "transform": 15, "color": 15, "island": 15, "landmass": 15, "histogram": 15, "michelson": 15, "speed": 15, "light": 15, "layer": 15, "binwidth": 15, "explain": 15, "size": 15, "clean": 16, "wrangl": 16, "seri": 16, "basic": 16, "doe": 16, "have": 16, "structur": 16, "tidi": 16, "go": 16, "wide": 16, "long": 16, "melt": 16, "pivot": 16, "str": 16, "deal": 16, "multipl": 16, "extract": 16, "certain": 16, "satisfi": 16, "condit": 16, "least": 16, "one": 16, "list": 16, "isin": 16, "abov": 16, "below": 16, "threshold": 16, "queri": 16, "iloc": 16, "posit": 16, "aggreg": 16, "individu": 16, "oper": 16, "group": 16, "groupbi": 16, "appli": 16, "function": 16, "across": 16}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinxcontrib.bibtex": 9, "sphinx": 56}}) \ No newline at end of file +Search.setIndex({"docnames": ["acknowledgements", "authors", "classification1", "classification2", "clustering", "index", "inference", "intro", "jupyter", "preface-text", "reading", "regression1", "regression2", "setup", "version-control", "viz", "wrangling"], "filenames": ["acknowledgements.md", "authors.md", "classification1.md", "classification2.md", "clustering.md", "index.md", "inference.md", "intro.md", "jupyter.md", "preface-text.md", "reading.md", "regression1.md", "regression2.md", "setup.md", "version-control.md", "viz.md", "wrangling.md"], "titles": ["Acknowledgments", "About the authors", "5. Classification I: training & predicting", "6. Classification II: evaluation & tuning", "9. Clustering", "Data Science", "10. Statistical inference", "1. Python and Pandas", "11. Combining code and text with Jupyter", "Preface", "2. Reading in data locally and from the web", "7. Regression I: K-nearest neighbors", "8. Regression II: linear regression", "13. Setting up your computer", "12. Collaboration with version control", "4. Effective data visualization", "3. Cleaning and wrangling data"], "terms": {"we": [0, 2, 3, 4, 7, 8, 9, 11, 12, 13, 14, 15, 16], "d": [0, 1, 6, 7, 10, 15], "like": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "thank": 0, "everyon": 0, "ha": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "contribut": [0, 1, 14], "develop": [0, 1, 3, 6, 7, 8, 9, 10, 14], "data": [0, 1, 4, 6, 9, 12, 13, 14], "scienc": [0, 1, 2, 3, 7, 8, 9, 13, 14, 16], "A": [0, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "first": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "introduct": [0, 3, 4, 6, 7, 9, 10, 12], "thi": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15], "an": [0, 1, 2, 6, 7, 8, 9, 11, 12, 13, 14, 16], "open": [0, 1, 5, 7, 8, 10, 13, 14, 15], "sourc": [0, 1, 10, 15], "textbook": [0, 1, 2, 3, 5, 9, 10, 12, 14, 16], "began": [0, 10], "collect": [0, 2, 3, 4, 6, 7, 10, 15, 16], "cours": [0, 1, 3, 4, 6, 7, 8, 9, 10, 12, 16], "read": [0, 2, 3, 5, 6, 7, 8, 9, 11, 12, 14, 15, 16], "dsci": [0, 10, 13], "100": [0, 2, 3, 6, 7, 10, 11, 12, 13, 15, 16], "new": [0, 3, 4, 6, 7, 10, 11, 13, 14, 15, 16], "introductori": [0, 3, 6], "univers": [0, 1, 6, 10], "british": [0, 1, 6, 7, 10], "columbia": [0, 1, 6, 10], "ubc": [0, 1, 10, 13], "sever": [0, 1, 2, 6, 10, 14, 15, 16], "faculti": 0, "member": [0, 2, 14], "depart": [0, 1], "statist": [0, 1, 2, 3, 4, 7, 10, 11, 12, 15], "were": [0, 2, 3, 6, 7, 8, 10, 12, 14, 15, 16], "pivot": 0, "shape": [0, 2, 4, 6, 7, 10, 12, 15, 16], "direct": [0, 2, 10, 15], "greatli": [0, 16], "broad": [0, 15], "structur": [0, 3, 4, 7, 10, 15], "list": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15], "topic": [0, 3, 4, 8, 12, 14], "book": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "would": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "especi": [0, 2, 7, 10, 13, 14, 15], "mat\u00eda": 0, "salib\u00edan": 0, "barrera": 0, "hi": [0, 1], "mentorship": 0, "dure": [0, 1, 3, 7, 11, 14, 16], "initi": [0, 1, 2, 4, 7, 10, 11, 12, 14, 15], "roll": 0, "out": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "both": [0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "door": 0, "wa": [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "alwai": [0, 2, 3, 4, 8, 10, 11, 12, 13, 15, 16], "when": [0, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 16], "need": [0, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "chat": 0, "about": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "how": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "best": [0, 3, 6, 7, 10, 11, 12, 14, 15], "introduc": [0, 6, 7, 12, 14, 15, 16], "teach": [0, 1, 2, 7], "our": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "year": [0, 2, 7, 10, 15, 16], "student": [0, 1, 6], "also": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "gabriela": 0, "cohen": 0, "freue": 0, "her": [0, 1], "561": 0, "regress": [0, 2, 3, 4, 7, 9], "i": [0, 3, 4, 6, 7, 8, 9, 10, 12, 13, 15, 16], "materi": [0, 2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "from": [0, 3, 4, 6, 7, 8, 9, 11, 12, 13, 15], "master": [0, 1], "program": [0, 1, 3, 7, 8, 9, 10, 13, 15], "some": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "linear": [0, 3, 8, 11, 15], "figur": [0, 2, 7, 16], "inspir": [0, 10], "all": [0, 2, 3, 4, 6, 8, 9, 11, 12, 13, 14, 15, 16], "those": [0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "who": [0, 2, 3, 6, 7, 8, 10, 14, 15, 16], "process": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "publish": [0, 10, 15], "In": [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "particular": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "review": [0, 10, 14], "feedback": [0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "suggest": [0, 2, 3, 6, 7, 11, 12, 15, 16], "rohan": 0, "alexand": 0, "isabella": 0, "ghement": 0, "virgilio": 0, "g\u00f3mez": 0, "rubio": 0, "albert": [0, 15], "kim": 0, "adam": 0, "loi": 0, "maria": 0, "prokofieva": 0, "emili": 0, "rieder": 0, "greg": [0, 14], "wilson": [0, 7, 14], "The": [0, 1, 6, 7, 10, 13, 14, 15, 16], "improv": [0, 2, 3, 4, 6, 7, 11, 12, 14, 15], "substanti": [0, 3, 11], "insight": [0, 4, 9, 15], "give": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16], "special": [0, 2, 6, 7, 10, 14, 15, 16], "jim": 0, "zidek": 0, "support": [0, 2, 3, 7, 13, 15, 16], "encourag": [0, 16], "throughout": [0, 2, 3, 7, 9, 14, 16], "roger": [0, 7], "peng": [0, 7], "gracious": 0, "offer": [0, 3, 6, 10, 11, 12, 14], "write": [0, 2, 7, 8, 12, 14, 16], "foreword": 0, "final": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "ow": 0, "debt": 0, "gratitud": 0, "over": [0, 1, 2, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15, 16], "past": [0, 2, 3, 4, 10, 11, 12, 13, 14, 15], "few": [0, 2, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15, 16], "thei": [0, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "provid": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "invalu": 0, "worksheet": [0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "found": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "bug": [0, 14, 16], "us": [0, 1, 2, 3, 4, 8, 9, 11, 12, 13, 15], "stood": 0, "veri": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "patient": [0, 2, 3], "class": [0, 2, 3, 7, 10, 15, 16], "while": [0, 2, 3, 4, 7, 9, 10, 12, 15, 16], "frantic": 0, "fix": [0, 2, 3, 6, 8, 11, 14, 15, 16], "brought": 0, "level": [0, 3, 4, 6, 7, 9, 12, 15], "enthusiasm": 0, "sustain": 0, "hard": [0, 7, 10, 15, 16], "work": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 15, 16], "creat": [0, 1, 2, 4, 6, 9, 10, 11, 12, 16], "interact": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "them": [0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "taught": [0, 2], "learn": [0, 1, 9], "reflect": [0, 1, 15], "content": [0, 1, 2, 5, 10, 14, 16], "translat": [0, 10], "origin": [0, 1, 2, 3, 4, 6, 7, 10, 11, 12, 15, 16], "which": [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "focus": [0, 1, 2, 3, 11], "r": [0, 1, 3, 4, 5, 6, 7, 10, 15], "languag": [0, 1, 2, 3, 6, 8, 9, 10, 11, 13, 16], "ar": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "navya": 0, "dahiya": 0, "gloria": 0, "ye": [0, 2], "complet": [0, 1, 3, 6, 7, 8, 10, 11, 13, 14], "round": [0, 3, 6], "philip": 0, "austin": 0, "leadership": 0, "guidanc": [0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "gratefulli": 0, "educ": [0, 1, 2], "resourc": [0, 1, 2, 11], "fund": 0, "exercis": [0, 9, 13], "version": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16], "tiffani": [1, 5, 7, 15], "timber": [1, 5, 7, 15], "trevor": [1, 3, 4, 5, 12], "campbel": [1, 5], "melissa": [1, 5], "lee": [1, 5, 15], "adapt": [1, 15], "python": [1, 2, 9, 11, 13, 14, 15], "joel": [1, 5], "ostblom": [1, 5], "lindsei": [1, 5], "heagi": [1, 5], "associ": [1, 6, 7, 9, 10, 14, 16], "professor": 1, "co": [1, 15], "director": 1, "vancouv": [1, 6, 10, 15, 16], "option": [1, 2, 8, 10, 12, 13, 14, 15, 16], "role": [1, 7, 10, 15], "she": 1, "curriculum": 1, "around": [1, 3, 6, 7, 11, 12, 15, 16], "respons": [1, 2, 3, 4, 10, 11, 12, 14], "applic": [1, 3, 6, 7, 10, 11, 12, 13, 16], "solv": [1, 2, 3, 4, 7, 9, 12, 14, 16], "real": [1, 2, 3, 6, 7, 10, 11, 12, 14, 16], "world": [1, 6, 7, 9, 14, 15, 16], "problem": [1, 3, 4, 6, 7, 8, 9, 12, 14, 15, 16], "One": [1, 2, 3, 6, 7, 8, 11, 12, 14, 15, 16], "favorit": [1, 12], "graduat": 1, "collabor": [1, 8, 9], "softwar": [1, 2, 9, 10, 13, 14, 15, 16], "packag": [1, 2, 3, 4, 7, 10, 11, 12, 13, 15, 16], "modern": [1, 2, 10, 15], "tool": [1, 2, 3, 4, 7, 8, 9, 10, 12, 15, 16], "workflow": [1, 2, 3, 4, 7, 8, 9, 11, 12], "research": [1, 4, 15], "autom": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "scalabl": 1, "bayesian": 1, "infer": 1, "algorithm": [1, 3, 11, 12, 15], "nonparametr": [1, 2, 11], "stream": 1, "theori": [1, 2, 4, 6, 11], "he": 1, "previous": [1, 2, 6, 7, 10, 11, 15, 16], "postdoctor": 1, "advis": [1, 10, 11, 15], "tamara": 1, "broderick": 1, "comput": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "artifici": 1, "intellig": 1, "laboratori": [1, 4], "csail": 1, "institut": [1, 3, 15], "system": [1, 10, 13, 14], "societi": 1, "idss": 1, "mit": 1, "ph": 1, "candid": [1, 3, 12], "under": [1, 5, 8, 13, 14, 16], "jonathan": 1, "inform": [1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "decis": [1, 2, 3, 6, 14], "lid": 1, "befor": [1, 2, 3, 4, 7, 8, 9, 11, 12, 13, 14, 15, 16], "engin": [1, 10, 12, 13, 15], "toronto": [1, 10, 16], "assist": 1, "undergradu": [1, 6], "center": [1, 4, 6, 10, 12, 15, 16], "approach": [1, 2, 3, 4, 6, 7, 9, 11, 12, 14, 16], "assess": [1, 3, 4, 11, 12, 14, 15], "promot": 1, "equiti": 1, "divers": [1, 6], "inclus": [1, 3, 12, 14], "phd": 1, "passion": 1, "reproduc": [1, 3, 4, 6, 8, 9, 10, 14], "through": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "quantit": [1, 3, 4, 6, 7, 11, 15], "imag": [1, 2, 8, 9, 10, 13, 15], "analysi": [1, 2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16], "pipelin": [1, 3, 4, 11], "studi": [1, 2, 3, 4, 6, 7, 11, 15, 16], "stem": [1, 11], "cell": [1, 2, 3, 4, 7, 10, 12, 16], "development": 1, "biologi": [1, 14], "sinc": [1, 2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "lead": [1, 2, 3, 7, 8, 14, 15], "workshop": [1, 2], "now": [1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "care": [1, 2, 3, 10, 11, 12, 15, 16], "deepli": [1, 16], "spread": [1, 2, 4, 6, 7, 15, 16], "literaci": 1, "excit": [1, 7], "programmat": [1, 3, 10], "project": [1, 2, 8, 10, 15], "earth": [1, 15], "ocean": 1, "atmospher": [1, 15], "geophys": 1, "invers": 1, "facil": [1, 14], "combin": [1, 2, 3, 4, 9, 10, 12, 14, 15], "method": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "numer": [1, 2, 6, 10, 11, 12, 15, 16], "simul": [1, 2, 6, 15], "machin": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "answer": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 15, 16], "question": [1, 2, 3, 4, 6, 9, 11, 12, 15, 16], "subsurfac": 1, "primari": [1, 2, 7, 14, 15, 16], "includ": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "miner": 1, "explor": [1, 3, 4, 6, 10, 12, 14, 15, 16], "carbon": [1, 15], "sequestr": 1, "groundwat": 1, "environment": [1, 4], "bsc": 1, "alberta": [1, 10, 16], "held": [1, 3], "posit": [1, 3, 4, 7, 11, 15], "california": [1, 11], "berkelei": 1, "prior": [1, 2, 10, 14], "start": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "current": [1, 2, 7, 8, 10, 12, 14, 15, 16], "previou": [2, 3, 4, 6, 7, 8, 10, 11, 12, 15, 16], "sole": [2, 8], "descript": [2, 6, 7, 8, 9, 10, 14, 15, 16], "exploratori": [2, 3, 4, 7, 9, 11, 15], "next": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "serv": [2, 3, 6, 8, 12, 14], "forai": [2, 11], "focu": [2, 3, 4, 6, 7, 8, 11, 12, 14, 15, 16], "e": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "one": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15], "valu": [2, 4, 6, 8, 11, 12, 15], "categor": [2, 3, 4, 6, 7, 11, 15, 16], "interest": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "cover": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "basic": [2, 3, 7, 10, 12, 14, 15], "make": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16], "suitabl": [2, 16], "classifi": 2, "accur": [2, 3, 6, 11, 12, 15], "well": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "where": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "possibl": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "maxim": [2, 3, 11], "accuraci": [2, 3, 6, 11, 12], "By": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "end": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "reader": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "abl": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "do": [2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15], "follow": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "recogn": [2, 8, 10, 11, 14, 16], "situat": [2, 3, 4, 7, 11, 14, 15, 16], "appropri": [2, 3, 4, 7, 10, 11, 13, 15, 16], "what": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15], "interpret": [2, 3, 4, 7, 8, 10, 11, 12, 14, 15, 16], "output": [2, 3, 4, 7, 8, 10, 11, 12, 15, 16], "hand": [2, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15, 16], "straight": [2, 4, 11, 12, 15], "line": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 16], "euclidean": [2, 4], "graph": 2, "predictor": [2, 4, 11], "explain": [2, 4, 6, 7, 11, 12], "perform": [2, 4, 6, 7, 8, 9, 10, 11, 12, 15], "imput": 2, "step": [2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16], "model": [2, 3, 4, 12, 14, 15], "make_pipelin": [2, 3, 4, 11], "mani": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "want": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "base": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 15], "experi": [2, 15], "For": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "instanc": [2, 3, 6, 7, 10, 16], "doctor": [2, 3], "mai": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "diagnos": [2, 3], "either": [2, 3, 4, 7, 8, 9, 11, 12, 14, 16], "diseas": 2, "healthi": 2, "symptom": 2, "s": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "email": [2, 10, 14], "might": [2, 3, 4, 6, 7, 8, 10, 11, 12, 15, 16], "tag": [2, 10, 13], "given": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "spam": 2, "text": [2, 3, 6, 7, 9, 11, 12, 13, 14, 16], "credit": 2, "card": 2, "compani": 2, "whether": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15], "purchas": [2, 4, 11, 12], "fraudul": 2, "item": [2, 4, 7, 8, 10, 11, 14, 15, 16], "amount": [2, 3, 4, 8, 10, 11, 12, 15], "locat": [2, 10, 14, 15], "These": [2, 3, 4, 7, 8, 10, 12, 14, 15], "task": [2, 4, 6, 9, 11, 15, 16], "exampl": [2, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "sometim": [2, 3, 6, 7, 10, 11, 12, 13, 15, 16], "call": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "label": [2, 4, 7, 11, 15, 16], "other": [2, 3, 4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16], "featur": [2, 3, 8, 11, 12, 14, 15], "gener": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16], "assign": [2, 4, 6, 7, 10, 15, 16], "without": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "known": [2, 3, 7, 10, 12, 15], "g": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "basi": [2, 10, 15], "similar": [2, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "know": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "name": [2, 3, 4, 6, 8, 11, 12, 13, 14, 15], "come": [2, 4, 6, 7, 11, 12, 13, 15, 16], "fact": [2, 3, 6, 7, 8, 10, 12, 14], "onc": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "can": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "There": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "could": [2, 3, 4, 6, 7, 8, 10, 11, 12, 15, 16], "wide": [2, 3, 4, 10, 12, 14, 15], "hart": [2, 11], "1967": [2, 3, 11], "hodg": [2, 11], "1951": [2, 11], "your": [2, 3, 4, 6, 7, 9, 10, 11, 12, 15, 16], "futur": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15], "you": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "encount": [2, 4, 10, 11, 12, 13, 16], "tree": [2, 3, 11], "vector": [2, 3, 10, 15], "svm": 2, "logist": [2, 3, 12], "neural": 2, "network": [2, 10], "see": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "addit": [2, 7, 11], "section": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "begin": [2, 3, 4, 6, 7, 10, 11, 14, 15, 16], "It": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "worth": [2, 3, 15, 16], "mention": [2, 3, 4, 6, 8, 10, 12, 13, 14, 16], "variat": [2, 6, 11, 15], "binari": [2, 3], "onli": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "involv": [2, 3, 4, 8, 10, 12, 13, 14, 15, 16], "diagnosi": [2, 3], "run": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "multiclass": 2, "categori": [2, 3, 4, 6, 7, 10, 15, 16], "bronchiti": 2, "pneumonia": 2, "common": [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16], "cold": 2, "digit": 2, "breast": [2, 3], "dr": [2, 4, 15], "william": [2, 3, 4], "h": [2, 10], "wolberg": [2, 3], "w": [2, 7, 10], "nick": [2, 3, 7], "street": [2, 3], "olvi": [2, 3], "l": [2, 10], "mangasarian": [2, 3], "et": [2, 3, 4, 6, 10, 12, 14, 15], "al": [2, 3, 4, 6, 10, 12, 14, 15], "1993": [2, 3], "row": [2, 3, 4, 6, 8, 11, 12, 14, 15], "repres": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "tumor": [2, 7], "sampl": [2, 3, 11], "benign": [2, 3, 7, 11], "malign": [2, 3, 7, 11], "measur": [2, 3, 6, 7, 11, 12, 15, 16], "nucleu": 2, "textur": [2, 3], "perimet": [2, 3, 7], "area": [2, 3, 7, 10, 11, 12, 14, 15, 16], "conduct": [2, 10], "physician": 2, "As": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "analys": [2, 3, 4, 7, 8, 9, 10, 14, 15, 16], "formul": [2, 6, 7, 11, 15], "precis": [2, 3, 6, 8, 11, 13, 14, 16], "here": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "avail": [2, 3, 7, 9, 10, 12, 13, 15], "unknown": [2, 6, 7], "show": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "import": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "becaus": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "tradit": 2, "non": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "driven": [2, 4], "quit": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "subject": [2, 8, 14, 15], "depend": [2, 3, 4, 6, 8, 10, 11, 12, 15, 16], "upon": [2, 3, 10], "skill": [2, 8, 10, 15], "experienc": 2, "furthermor": [2, 3, 15], "normal": [2, 3, 6, 14, 16], "danger": [2, 13], "stai": [2, 6, 10, 15], "same": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "place": [2, 3, 4, 6, 7, 8, 10, 11, 13, 14, 15, 16], "stop": [2, 3, 4, 8, 12, 13], "grow": [2, 3, 12], "get": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "larg": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15], "contrast": [2, 3, 4, 6, 7, 10, 11, 12, 14], "invad": 2, "surround": [2, 7, 10, 14, 15], "tissu": 2, "nearbi": [2, 3, 10], "organ": [2, 7, 8, 10, 14, 15, 16], "caus": [2, 3, 4, 7, 8, 11, 12, 15, 16], "seriou": [2, 7, 14], "damag": [2, 3], "stanford": 2, "health": 2, "2021": [2, 7, 10], "thu": [2, 3, 8, 10, 11, 12, 14, 16], "quickli": [2, 3, 7, 12, 15], "type": [2, 3, 4, 6, 8, 10, 11, 12, 13, 14, 15], "guid": [2, 7, 11, 14, 15], "treatment": [2, 3, 16], "wrangl": [2, 3, 7, 9, 10, 12, 15], "visual": [2, 3, 4, 6, 8, 9, 11, 12, 14, 16], "order": [2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16], "better": [2, 3, 4, 11, 12, 15], "understand": [2, 3, 4, 6, 7, 9, 10, 12, 14, 15, 16], "panda": [2, 3, 4, 6, 8, 10, 11, 12, 15, 16], "altair": [2, 3, 4, 8, 11, 12], "pd": [2, 3, 4, 6, 7, 10, 11, 12, 15, 16], "alt": [2, 3, 4, 6, 7, 11, 12, 15], "case": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16], "file": [2, 7, 13, 16], "contain": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "csv": [2, 3, 4, 6, 7, 11, 12, 15, 16], "header": [2, 7, 8, 14, 16], "ll": [2, 3, 6, 7, 10, 11, 13, 14, 15, 16], "read_csv": [2, 3, 4, 6, 7, 8, 11, 12, 15, 16], "function": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "argument": [2, 3, 4, 6, 7, 8, 11, 15, 16], "inspect": [2, 7, 10, 15, 16], "wdbc": 2, "id": [2, 3, 6, 15], "radiu": [2, 3], "smooth": [2, 3, 11, 15], "compact": [2, 3], "concav": [2, 3], "concave_point": [2, 3], "symmetri": [2, 3], "fractal_dimens": [2, 3], "0": [2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16], "842302": 2, "m": [2, 3, 7, 10, 15], "1": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "096100": 2, "2": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 16], "071512": 2, "268817": 2, "983510": 2, "567087": 2, "3": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "280628": 2, "650542": 2, "530249": 2, "215566": 2, "253764": 2, "842517": 2, "828212": 2, "353322": 2, "684473": 2, "907030": 2, "826235": 2, "486643": 2, "023825": 2, "547662": 2, "001391": 2, "867889": 2, "84300903": 2, "578499": 2, "455786": 2, "565126": 2, "557513": 2, "941382": 2, "052000": 2, "362280": 2, "035440": 2, "938859": 2, "397658": 2, "84348301": 2, "768233": 2, "253509": 2, "592166": 2, "763792": 2, "280667": 2, "399917": 2, "914213": 2, "450431": 2, "864862": 2, "4": [2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "906602": 2, "84358402": 2, "748758": 2, "150804": 2, "775011": 2, "824624": 2, "280125": 2, "538866": 2, "369806": 2, "427237": 2, "009552": 2, "561956": 2, "564": [2, 3], "926424": 2, "109139": 2, "720838": 2, "058974": 2, "341795": 2, "040926": 2, "218868": 2, "945573": 2, "318924": 2, "312314": 2, "930209": 2, "565": [2, 3], "926682": 2, "703356": 2, "083301": 2, "614511": 2, "722326": 2, "102368": 2, "017817": 2, "692434": 2, "262558": 2, "217473": 2, "057681": 2, "566": [2, 3], "926954": 2, "701667": 2, "043775": 2, "672084": 2, "577445": 2, "839745": 2, "038646": 2, "046547": 2, "105684": 2, "808406": 2, "894800": 2, "567": [2, 3], "927241": 2, "836725": 2, "334403": 2, "980781": 2, "733693": 2, "524426": 2, "269267": 2, "294046": 2, "656528": 2, "135315": 2, "042778": 2, "568": [2, 3], "92751": 2, "b": [2, 3], "806811": 2, "220718": 2, "812793": 2, "346604": 2, "109349": 2, "149741": 2, "113893": 2, "260710": 2, "819349": 2, "560539": 2, "569": [2, 3], "12": [2, 3, 4, 6, 7, 8, 10, 12, 13, 14, 15, 16], "column": [2, 3, 4, 6, 8, 11, 12, 13, 15], "biopsi": [2, 3], "remov": [2, 3, 7, 13, 14, 15], "bodi": [2, 14], "examin": [2, 3, 4, 10, 11], "presenc": [2, 3], "tradition": 2, "procedur": [2, 3, 4, 11], "invas": 2, "fine": [2, 3, 8, 14, 15, 16], "needl": 2, "aspir": 2, "present": [2, 3, 6, 7, 10, 14, 15, 16], "extract": [2, 3, 4, 7, 10, 11, 12], "small": [2, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15], "less": [2, 3, 4, 6, 10, 11, 12, 14, 15, 16], "ten": [2, 6, 7, 15], "differ": [2, 3, 4, 6, 7, 11, 12, 13, 14, 16], "below": [2, 3, 6, 7, 10, 12, 14, 15], "mean": [2, 3, 7, 8, 10, 11, 12, 14, 15, 16], "across": [2, 3, 6, 7, 10, 11, 12, 14, 15], "nuclei": 2, "record": [2, 3, 6, 7, 10, 14, 15, 16], "part": [2, 3, 4, 7, 8, 9, 10, 11, 12, 14, 15, 16], "prepar": [2, 3, 15], "have": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "been": [2, 3, 8, 10, 11, 12, 14, 15, 16], "standard": [2, 3, 4, 6, 8, 11, 12, 14, 15, 16], "discuss": [2, 3, 6, 10, 11, 12, 13, 14, 15, 16], "why": [2, 3, 7, 11, 15, 16], "later": [2, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16], "addition": [2, 3, 4, 6, 8, 10, 12, 14, 16], "uniqu": [2, 3, 7, 14], "therefor": [2, 3, 4, 6, 7, 8, 10, 11, 12, 15, 16], "total": [2, 3, 4, 7, 10, 11, 15, 16], "per": [2, 6, 10, 14, 15, 16], "identif": 2, "number": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "deviat": [2, 3, 4, 6, 16], "grai": [2, 14, 16], "length": [2, 4, 6, 7, 15, 16], "contour": 2, "insid": [2, 3, 6, 7, 8, 10, 13, 14, 15, 16], "local": [2, 11, 13], "ratio": [2, 16], "squar": [2, 3, 4, 7, 8, 10, 11, 12, 15, 16], "portion": [2, 10], "mirror": 2, "fractal": 2, "dimens": 2, "rough": [2, 4, 15], "info": [2, 3, 7, 15, 16], "preview": [2, 3, 4, 6, 7, 8, 9, 11, 12, 14, 15, 16], "frame": [2, 3, 4, 6, 8, 10, 11, 12, 15], "easier": [2, 3, 6, 7, 10, 11, 12, 13, 14, 15, 16], "lot": [2, 3, 4, 7, 10, 12, 15, 16], "print": [2, 3, 6, 7, 8, 10, 12, 13, 15, 16], "down": [2, 8, 10, 13, 14, 16], "page": [2, 3, 4, 5, 8, 10, 12, 13, 14], "instead": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "entri": [2, 3, 6, 7, 10, 15, 16], "core": [2, 3, 7, 15, 16], "datafram": [2, 3, 4, 6, 10, 11, 12, 15, 16], "rangeindex": [2, 3, 15, 16], "null": [2, 3, 15, 16], "count": [2, 3, 6, 7, 10, 15, 16], "dtype": [2, 3, 6, 15, 16], "int64": [2, 3, 10, 15, 16], "float64": [2, 3, 6, 10, 15, 16], "6": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "7": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "8": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "9": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "10": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "11": [2, 3, 4, 6, 7, 8, 10, 12, 13, 14, 15, 16], "memori": [2, 3, 10, 15, 16], "usag": [2, 3, 7, 10, 12, 15, 16], "53": [2, 3, 6, 12, 14], "kb": [2, 3, 15, 16], "abov": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 15], "arrai": [2, 3, 4, 11, 12], "readabl": [2, 3, 7, 10, 11, 14, 15, 16], "renam": [2, 3, 6, 7, 8, 10, 11, 16], "replac": [2, 3, 6, 7, 10, 12, 13, 14, 15], "take": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "dictionari": [2, 3, 10, 16], "map": [2, 4, 7, 10, 11, 12, 15], "desir": [2, 3, 7, 10, 11, 14, 16], "verifi": [2, 6, 13], "result": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "ani": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "let": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "groupbi": [2, 6], "size": [2, 3, 4, 6, 10, 11, 12, 16], "find": [2, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "percentag": [2, 3, 6, 7, 15], "pair": [2, 3, 4, 10, 16], "Then": [2, 3, 4, 6, 7, 8, 11, 12, 13, 14, 15, 16], "calcul": [2, 3, 4, 11, 12], "group": [2, 3, 4, 6, 7, 13, 15], "divid": [2, 3, 7, 10, 15, 16], "multipli": [2, 7, 15], "equal": [2, 3, 4, 6, 11, 12, 16], "access": [2, 3, 4, 6, 11, 13, 15, 16], "via": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15], "attribut": [2, 3, 4, 5, 7, 10, 11], "357": [2, 3], "63": [2, 3, 10, 11], "212": [2, 4, 7, 10, 15, 16], "37": [2, 3, 4, 14, 15], "62": [2, 10, 11, 15], "741652": 2, "258348": 2, "conveni": [2, 3, 7, 10, 16], "value_count": [2, 3, 6, 16], "occurr": [2, 15], "If": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "pass": [2, 3, 7, 10, 11, 15, 16], "seri": [2, 3, 6, 11, 12], "occur": [2, 3, 4, 6, 8, 11, 12, 14, 15, 16], "true": [2, 3, 4, 6, 7, 8, 11, 15, 16], "fraction": [2, 3, 6, 11, 14, 15], "627417": 2, "372583": 2, "proport": [2, 3, 7, 15, 16], "draw": [2, 6, 7, 11, 12, 15], "color": [2, 3, 4, 10, 11, 12, 16], "scatter": [2, 3, 4, 11, 12], "plot": [2, 3, 4, 6, 11, 12, 16], "relationship": [2, 3, 4, 6, 7, 11, 12, 15, 16], "recal": [2, 3, 4, 6, 7, 11, 12, 14, 15, 16], "default": [2, 3, 7, 8, 10, 11, 13, 14, 15, 16], "palett": 2, "colorblind": [2, 15], "friendli": [2, 15], "so": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "stick": [2, 3, 10, 14], "perim_concav": [2, 3], "chart": [2, 3, 4, 6, 11, 12], "mark_circl": [2, 3, 4, 11, 12, 15], "encod": [2, 3, 4, 6, 7, 10, 11, 12, 15], "x": [2, 3, 4, 6, 7, 10, 11, 12, 13, 15], "titl": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15], "y": [2, 3, 4, 6, 7, 8, 10, 11, 12, 15], "versu": [2, 3, 4, 6, 7, 10, 11, 12, 16], "fig": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "typic": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "fall": [2, 3, 6, 11, 14, 15], "upper": [2, 6, 14, 15], "right": [2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16], "corner": [2, 3, 13, 14, 15], "lower": [2, 3, 6, 8, 12, 15], "left": [2, 3, 4, 5, 7, 8, 10, 12, 13, 14, 15, 16], "word": [2, 3, 6, 7, 8, 10, 11, 12, 14, 16], "tend": [2, 3, 11, 14, 15], "ones": [2, 15, 16], "larger": [2, 3, 4, 6, 10, 11, 12, 14, 15], "suppos": [2, 4, 6, 7, 8, 10, 11, 14, 16], "obtain": [2, 3, 4, 6, 7, 11, 12, 14, 15, 16], "except": [2, 10, 12, 14, 16], "sai": [2, 3, 6, 8, 10, 11, 12, 13, 15, 16], "respect": [2, 3, 4, 6, 10, 14, 15, 16], "lie": 2, "middl": [2, 6, 10], "orang": [2, 4, 11, 12], "cloud": [2, 10, 14, 15], "probabl": [2, 3, 6, 10, 12], "seem": [2, 3, 6, 8, 10, 11, 12, 15, 16], "actual": [2, 3, 4, 6, 7, 10, 11, 12, 14, 16], "practic": [2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "To": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "most": [2, 3, 4, 6, 7, 8, 10, 14, 15, 16], "must": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "choos": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 16], "advanc": [2, 3, 4, 6, 8, 12, 13, 14, 15, 16], "assum": [2, 6, 8], "someon": [2, 3, 7, 8, 14], "chosen": [2, 3, 4, 12, 16], "ourselv": [2, 3, 11], "illustr": [2, 3, 6, 11, 12, 15, 16], "concept": [2, 6, 7, 9, 11, 12, 14, 15], "walk": [2, 7, 11, 14], "whose": [2, 8, 10, 14, 16], "depict": [2, 4], "red": [2, 4, 8, 10, 11, 12, 13, 14], "diamond": 2, "coordin": [2, 4, 7, 15], "idea": [2, 3, 6, 7, 8, 10, 12, 13, 14, 15, 16], "close": [2, 3, 4, 6, 7, 10, 11, 14, 15], "anoth": [2, 3, 6, 7, 8, 10, 11, 12, 14, 15, 16], "expect": [2, 3, 4, 6, 7, 8, 10, 11, 12, 16], "look": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "doe": [2, 3, 4, 6, 7, 10, 11, 12, 15], "consid": [2, 3, 4, 6, 7, 11, 12, 14, 15, 16], "closest": [2, 3, 10, 15], "among": [2, 10, 14, 16], "major": [2, 3, 4, 7, 11, 12, 15, 16], "shown": [2, 3, 4, 7, 8, 10, 11, 12, 14, 15, 16], "vote": [2, 3, 7], "three": [2, 3, 4, 6, 7, 8, 9, 10, 11, 14, 15, 16], "chose": [2, 3, 15], "noth": [2, 6, 7, 12], "though": [2, 3, 6, 7, 10, 11, 12, 14, 15, 16], "odd": [2, 10], "avoid": [2, 3, 12, 15], "ti": [2, 10], "decid": [2, 3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "often": [2, 3, 4, 6, 7, 8, 10, 11, 14, 15, 16], "just": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "denot": [2, 4, 7, 10, 11, 12, 15, 16], "a_x": 2, "a_i": 2, "b_x": 2, "b_y": 2, "definit": [2, 10, 15, 16], "plane": [2, 12], "formula": [2, 3, 4, 11, 12, 15], "mathrm": [2, 3], "sqrt": [2, 3, 11, 14], "select": [2, 4, 6, 8, 10, 11, 12, 13, 14, 15], "correspond": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "smallest": [2, 7, 11, 15, 16], "code": [2, 3, 7, 9, 10, 11, 13, 14, 15, 16], "add": [2, 3, 4, 6, 7, 10, 11, 13, 15, 16], "root": [2, 3, 10, 11, 14], "nsmallest": [2, 11, 15], "new_obs_perimet": 2, "new_obs_concav": 2, "dist_from_new": 2, "112": 2, "241202": 2, "653051": 2, "880626": 2, "258": 2, "750277": 2, "870061": 2, "979663": 2, "351": 2, "622700": 2, "541410": 2, "143088": 2, "430": 2, "416930": 2, "314364": 2, "256806": 2, "152": 2, "160091": 2, "039155": 2, "279258": 2, "tabl": [2, 3, 5, 7, 8, 10, 13, 15, 16], "mathemat": [2, 3, 6, 11, 12, 15], "detail": [2, 3, 4, 7, 8, 10, 12, 13, 14, 15, 16], "24": [2, 14, 15], "65": [2, 3, 6, 10, 11, 16], "88": [2, 3], "75": [2, 3, 6, 7, 10, 11, 12, 15, 16], "87": [2, 3, 11], "98": [2, 7, 12, 15], "54": [2, 3, 14, 15, 16], "14": [2, 3, 4, 6, 8, 10, 14, 15, 16], "42": [2, 6, 14, 15, 16], "31": [2, 3, 14, 15, 16], "26": [2, 3, 14, 15], "16": [2, 3, 4, 6, 10, 14, 15], "04": [2, 10, 13, 15, 16], "28": [2, 4, 11, 12, 14, 15], "circl": [2, 8, 14, 15], "although": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "toward": [2, 6, 7, 14], "exactli": [2, 3, 6, 7, 10, 11, 12, 13, 15], "appli": [2, 3, 7, 11, 12, 15], "higher": [2, 3, 6, 7, 11, 12, 15, 16], "help": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "a_": 2, "dot": [2, 7, 10, 11, 12, 15], "b_": 2, "becom": [2, 3, 4, 6, 7, 8, 11, 12, 14, 16], "still": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15], "space": [2, 8, 10, 11, 12, 13, 15], "417": [2, 15], "837": 2, "had": [2, 3, 6, 7, 10, 11, 15, 16], "ad": [2, 3, 4, 10, 11, 12, 14], "up": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15], "took": [2, 6, 7], "27": [2, 7, 11, 14, 15], "new_obs_symmetri": 2, "836722": 2, "267368": 2, "400": [2, 11, 16], "334664": 2, "886368": 2, "099359": 2, "472326": 2, "562": 2, "470430": 2, "084810": 2, "154075": 2, "499268": 2, "68": 2, "365450": 2, "812359": 2, "092064": 2, "531594": 2, "055065": 2, "555575": 2, "dimension": 2, "five": [2, 3, 13, 15, 16], "3d": [2, 11, 12], "note": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "recommend": [2, 7, 8, 9, 11, 12, 13, 14, 16], "against": [2, 8, 11, 12], "purpos": [2, 3, 4, 6, 10, 11, 12, 14, 15, 16], "complic": [2, 7, 10, 11, 15], "handl": [2, 3, 7, 15], "multipl": [2, 3, 4, 6, 7, 10, 11, 12, 13, 14, 15], "thankfulli": [2, 4], "implement": [2, 3, 4, 12, 15], "buitinck": 2, "2013": [2, 3, 4, 12], "along": [2, 3, 6, 7, 10, 11, 13, 14, 15], "sklearn": [2, 3, 4, 11, 12], "keep": [2, 3, 6, 7, 10, 13, 14, 15, 16], "simpl": [2, 3, 4, 6, 10, 11, 13, 15, 16], "fewer": [2, 3], "mistak": [2, 3, 11, 15], "tell": [2, 3, 6, 7, 8, 10, 11, 12, 14, 15, 16], "prefer": [2, 3, 4, 10, 12, 15, 16], "regular": [2, 10, 11, 14, 15, 16], "set_config": [2, 3, 4, 11, 12], "notic": [2, 3, 6, 7, 10, 12, 15, 16], "wai": [2, 3, 4, 6, 7, 8, 9, 10, 13, 14, 15, 16], "prefix": 2, "extens": [2, 8, 10, 12, 13, 14, 15], "subsequ": [2, 7, 15], "long": [2, 3, 4, 6, 7, 8, 10, 12, 14, 15], "clutter": [2, 15], "kneighborsclassifi": [2, 3], "38": [2, 4, 11, 14, 15], "charact": [2, 7, 8, 10, 14, 15, 16], "transform_output": [2, 3, 4, 11, 12], "modul": 2, "build": [2, 3, 11, 15], "pick": [2, 3, 4, 10, 12, 14, 15], "store": [2, 3, 4, 6, 7, 8, 10, 13, 14, 15, 16], "cancer_train": [2, 3], "specifi": [2, 3, 6, 7, 8, 10, 11, 12, 13, 15, 16], "weight": 2, "control": [2, 3, 8, 9, 10, 13], "uniform": [2, 3, 10], "choic": [2, 3, 4, 6, 11, 14, 15, 16], "weigh": [2, 7], "websit": [2, 3, 5, 10, 12, 14], "knn": [2, 3], "n_neighbor": [2, 3, 11], "jupyt": [2, 3, 4, 7, 9, 12, 13], "environ": [2, 3, 4, 7, 8, 12, 13, 14], "pleas": [2, 3, 4, 5, 7, 8, 12], "rerun": [2, 3, 4, 12], "html": [2, 3, 4, 12, 15, 16], "represent": [2, 3, 4, 10, 12], "trust": [2, 3, 4, 6, 12], "notebook": [2, 3, 4, 12, 13, 14], "On": [2, 3, 4, 7, 10, 11, 12, 14, 15, 16], "github": [2, 3, 4, 7, 10, 12, 15], "unabl": [2, 3, 4, 10, 12, 14], "render": [2, 3, 4, 8, 12, 14, 15], "try": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16], "nbviewer": [2, 3, 4, 12], "org": [2, 3, 4, 6, 7, 10, 12, 15, 16], "kneighborsclassifierkneighborsclassifi": [2, 3], "fit": [2, 3, 4, 11, 12, 15], "much": [2, 3, 4, 6, 7, 10, 11, 12, 15, 16], "outsid": [2, 3, 6, 8, 11, 12, 14, 15], "heavi": 2, "lift": 2, "modifi": [2, 3, 14], "after": [2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16], "itself": [2, 3, 6, 10, 12, 15, 16], "ran": 2, "manual": [2, 3, 4, 6, 8, 10, 11, 13, 16], "time": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 16], "new_ob": 2, "Is": [2, 4, 7, 11, 15, 16], "don": [2, 3, 4, 6, 7, 8, 10, 11, 14, 15, 16], "t": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "necessarili": [2, 3, 7, 16], "correct": [2, 3, 7, 13, 14, 15, 16], "quantifi": [2, 3, 12], "think": [2, 3, 7, 8, 10, 12, 16], "rang": [2, 3, 4, 10, 11, 12, 15, 16], "matter": [2, 11, 15, 16], "identifi": [2, 3, 4, 7, 9, 10, 11, 14, 15], "effect": [2, 4, 6, 7, 11, 12, 13, 16], "But": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "doesn": [2, 3, 8, 10, 15, 16], "salari": 2, "dollar": [2, 6, 10, 11, 12], "job": [2, 10, 15], "1000": [2, 3, 6, 15], "huge": [2, 10], "compar": [2, 3, 6, 7, 10, 11, 14, 15, 16], "conceptu": [2, 14], "opposit": 2, "yearli": 2, "temperatur": 2, "degre": 2, "kelvin": 2, "celsiu": 2, "constant": [2, 12, 15], "shift": [2, 7, 8], "273": [2, 16], "even": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "likewis": [2, 16], "hypothet": 2, "thousand": [2, 3, 10, 15], "singl": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "affect": [2, 3, 7, 8, 11, 12, 15], "chang": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 15, 16], "outcom": [2, 7], "averag": [2, 3, 6, 7, 10, 11, 12, 16], "central": 2, "subtract": [2, 3, 7], "said": [2, 3], "unstandard": [2, 4], "wisconsin": 2, "until": [2, 3, 4, 6, 7, 8, 10, 13, 14, 15, 16], "did": [2, 3, 6, 7, 9, 10, 11, 12, 14, 15, 16], "earlier": [2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16], "thing": [2, 3, 6, 8, 10, 13, 14, 15, 16], "unscaled_canc": 2, "wdbc_unscal": [2, 3], "1001": 2, "11840": [2, 3], "1326": 2, "08474": [2, 3], "1203": 2, "10960": [2, 3], "386": 2, "14250": [2, 3], "1297": 2, "10030": [2, 3], "1479": 2, "11100": [2, 3], "1261": 2, "09780": [2, 3], "858": 2, "08455": [2, 3], "1265": 2, "11780": [2, 3], "181": [2, 4], "05263": [2, 3], "unscal": 2, "uncent": 2, "Will": 2, "framework": [2, 12], "preprocessor": [2, 3, 4, 11], "manipul": [2, 10, 16], "standardscal": [2, 3, 4, 11], "transform": [2, 3, 4, 7, 11, 12, 16], "wrap": [2, 3, 4, 11], "columntransform": [2, 3, 4], "make_column_transform": [2, 3, 4, 11], "enabl": [2, 8, 10, 13, 14, 15, 16], "handi": [2, 7, 16], "sequenc": [2, 3, 7, 10, 13, 15], "compos": [2, 3, 4, 7, 11], "x27": [2, 3, 4], "columntransformercolumntransform": [2, 3, 4], "standardscalerstandardscal": [2, 3, 4], "individu": [2, 3, 6, 7, 12, 14, 15], "difficult": [2, 3, 4, 7, 8, 10, 12, 15, 16], "rather": [2, 3, 6, 7, 8, 10, 11, 14, 15, 16], "make_column_selector": [2, 3], "dtype_includ": [2, 3], "equival": [2, 7, 10, 12, 16], "lt": 2, "_column_transform": 2, "0x7fe3e09b15d0": 2, "gt": 2, "readi": [2, 3, 7, 8, 10, 13, 14], "happen": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "necessari": [2, 4, 11, 13, 15], "bit": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 15, 16], "unnecessari": 2, "howev": [2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "quantiti": [2, 3, 6, 15, 16], "scaled_canc": 2, "standardscaler__area": 2, "standardscaler__smooth": 2, "984375": 2, "568466": 2, "908708": 2, "826962": 2, "558884": 2, "942210": 2, "764464": 2, "283553": 2, "826229": 2, "280372": 2, "343856": 2, "041842": 2, "723842": 2, "102458": 2, "577953": 2, "840484": 2, "735218": 2, "525767": 2, "347789": 2, "112085": 2, "woohoo": 2, "input": [2, 3, 4, 7, 10, 11, 14, 16], "behavior": [2, 4, 11, 15, 16], "drop": [2, 3, 8, 13, 14, 15, 16], "remain": [2, 3, 4, 7, 13], "rest": [2, 3, 7, 12, 16], "remaind": [2, 3, 7, 10, 11, 16], "passthrough": 2, "separ": [2, 3, 4, 7, 8, 14, 15], "underscor": [2, 7, 8, 14, 16], "again": [2, 3, 6, 7, 8, 10, 11, 12, 13, 15, 16], "preserv": [2, 3], "verbose_feature_names_out": [2, 4], "fals": [2, 3, 4, 7, 10, 11, 12, 15, 16], "should": [2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 15, 16], "leav": [2, 4, 12], "preprocessor_keep_al": 2, "scaled_cancer_al": 2, "wonder": [2, 6, 10], "technic": [2, 3, 7, 8, 11, 13, 14, 15, 16], "error": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "prone": [2, 3, 10, 16], "accident": [2, 3, 8, 10, 14, 15, 16], "forget": [2, 4, 14], "proper": 2, "free": [2, 3, 12, 14], "requir": [2, 3, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16], "yourself": [2, 4, 7, 10, 12, 14], "further": [2, 3, 4, 6, 7, 8, 10, 12, 15, 16], "automat": [2, 3, 4, 10, 11, 14, 15], "streamlin": 2, "effort": [2, 8, 10, 14], "side": [2, 5, 6, 7, 13, 14, 15], "annot": [2, 4, 15], "within": [2, 4, 6, 7, 8, 10, 12, 13, 14, 15, 16], "nearli": [2, 4, 12, 16], "vertic": [2, 6, 7, 11, 12, 15, 16], "align": [2, 10, 15], "black": [2, 4, 10, 11, 15], "region": [2, 3, 10, 11, 16], "domin": 2, "intuit": [2, 3, 11, 15, 16], "reason": [2, 3, 4, 6, 7, 10, 11, 12, 15, 16], "carefulli": [2, 4, 7, 10, 16], "domain": [2, 7, 10, 15], "comparison": [2, 6, 12, 15, 16], "potenti": [2, 3, 4, 11, 12, 16], "issu": [2, 7, 8, 10, 12, 13, 15, 16], "imbal": 2, "overal": [2, 3, 7, 11, 15], "pattern": [2, 3, 4, 6, 7, 10, 11, 12, 15, 16], "otherwis": [2, 3, 4, 6, 7, 15], "rare": [2, 4, 15], "malici": 2, "detect": [2, 4], "rarer": 2, "unimport": 2, "revisit": [2, 3, 10, 12, 16], "head": [2, 8, 10, 13, 14, 15], "top": [2, 3, 5, 7, 8, 10, 11, 12, 13, 15, 16], "n": [2, 3, 4, 6, 7, 10, 11, 15, 16], "concat": [2, 6], "glue": 2, "filter": [2, 6, 10, 15], "back": [2, 3, 6, 8, 10, 11, 12, 13, 14, 15, 16], "concaten": [2, 6], "axi": [2, 7, 11, 12, 14, 16], "yield": [2, 3], "taller": 2, "horizont": [2, 7, 15], "produc": [2, 3, 7, 8, 12, 15, 16], "wider": [2, 6, 7, 16], "imbalanc": [2, 3], "rare_canc": 2, "rare_plot": 2, "With": [2, 4, 7, 10, 15, 16], "least": [2, 3, 4, 6, 8, 15], "win": 2, "highlight": [2, 4, 6, 8, 10, 11, 12, 13, 14, 16], "13": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "background": [2, 3, 6, 10, 12, 15], "blue": [2, 4, 8, 11, 14, 16], "indic": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "despit": [2, 3, 10, 15], "simplic": [2, 3, 14], "sound": [2, 3, 8], "manner": [2, 8, 12], "fairli": [2, 3, 6, 13, 15], "nuanc": 2, "suffic": [2, 6], "rebal": 2, "oversampl": 2, "replic": [2, 6], "power": [2, 3, 7, 10, 14, 15, 16], "own": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "increas": [2, 3, 4, 6, 11, 12, 15, 16], "randomli": [2, 3, 4, 6, 12], "properli": [2, 3, 15], "random": [2, 6, 11, 12], "malignant_canc": 2, "benign_canc": 2, "malignant_cancer_upsampl": 2, "upsampled_canc": 2, "vice": [2, 3], "versa": [2, 3], "closer": [2, 15], "upsampl": 2, "wild": [2, 7, 12], "unfortun": [2, 3, 4, 6, 8, 10, 12, 15], "challeng": [2, 14, 16], "reli": [2, 3, 8, 11, 12, 15], "expert": [2, 3, 7, 13], "knowledg": [2, 7, 12, 14, 16], "relat": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "survei": [2, 7, 16], "particip": [2, 3], "margin": [2, 8], "peopl": [2, 6, 7, 8, 11, 12, 14, 15, 16], "respond": [2, 10, 14], "certain": [2, 7, 10, 14, 15], "kind": [2, 3, 4, 6, 7, 10, 15], "fear": [2, 7], "honestli": 2, "neg": [2, 3, 8, 11, 12, 14, 15, 16], "consequ": [2, 3, 6, 8, 16], "simpli": [2, 3, 10, 15, 16], "throw": 2, "awai": [2, 3, 6, 10, 11, 12, 14, 16], "bia": [2, 12], "conclus": [2, 6, 7, 15], "inadvert": [2, 8], "ignor": [2, 3, 7, 11, 16], "easili": [2, 3, 4, 7, 8, 9, 10, 14, 15, 16], "mislead": 2, "detriment": 2, "impact": [2, 4, 6, 12, 16], "techniqu": [2, 3, 4, 6, 7, 10, 12, 15], "deal": [2, 8, 10], "isn": [2, 3, 7, 10, 11, 15], "anyth": [2, 3, 7, 12, 16], "els": [2, 7, 8, 10], "subset": [2, 6, 8, 10, 11, 12, 16], "missing_canc": 2, "wdbc_miss": 2, "nan": [2, 10, 16], "475956": 2, "834601": 2, "386808": 2, "169878": 2, "160508": 2, "137124": 2, "henc": [2, 3, 4, 8, 10, 11, 15], "too": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "accomplish": [2, 3, 6, 7, 8, 15, 16], "dropna": 2, "no_missing_canc": 2, "strategi": [2, 3, 15], "fill": [2, 8, 10, 12, 15], "synthet": 2, "simpleimput": 2, "simpleimputersimpleimput": 2, "directli": [2, 3, 4, 6, 7, 8, 13, 14, 16], "imputed_canc": 2, "846860": 2, "384942": 2, "document": [2, 4, 8, 9, 10, 13, 14, 15, 16], "crucial": 2, "critic": [2, 6, 7, 8, 12, 15, 16], "chain": [2, 16], "intermedi": [2, 7], "whole": [2, 3, 4, 6, 10, 14, 16], "scratch": [2, 6, 14, 15], "nn": [2, 3], "knn_pipelin": [2, 3], "pipelinepipelin": [2, 3, 4], "500": [2, 6, 11, 12], "075": 2, "1500": 2, "new_observ": 2, "second": [2, 3, 4, 6, 7, 8, 10, 12, 13, 14, 15, 16], "15": [2, 3, 4, 6, 7, 8, 10, 12, 14, 15, 16], "seen": [2, 3, 11, 12, 14, 15, 16], "littl": [2, 3, 10, 11, 12, 15, 16], "grid": [2, 3, 11, 15], "meshgrid": 2, "numpi": [2, 3, 4, 6, 10, 11, 12, 15, 16], "high": [2, 3, 6, 7, 8, 9, 12], "transpar": [2, 7], "low": [2, 3, 12], "opac": [2, 11, 15], "np": [2, 3, 4, 6, 11, 12], "val": 2, "arrang": [2, 6, 7, 15], "are_grid": 2, "linspac": 2, "min": [2, 11, 12, 15, 16], "95": [2, 3, 6, 7, 10, 12, 15], "max": [2, 3, 11, 12, 15, 16], "05": [2, 7, 10, 15], "50": [2, 3, 6, 7, 10, 11, 12, 14, 16], "smo_grid": 2, "asgrid": 2, "reshap": [2, 16], "knnpredgrid": 2, "bind": 2, "prediction_t": 2, "copi": [2, 10, 14, 16], "unscaled_plot": 2, "mark_point": [2, 15], "40": [2, 3, 6, 7, 10, 14, 15, 16], "nice": [2, 3, 8, 10, 12, 15], "fade": 2, "prediction_plot": 2, "300": [2, 3, 6, 15, 16], "accompani": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "repositori": [2, 3, 4, 6, 7, 10, 11, 12, 13, 15, 16], "launch": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "browser": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "click": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "binder": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "button": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "view": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "download": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "sure": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "instruct": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "setup": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "ensur": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "intend": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16], "blb": 2, "lar": 2, "gill": 2, "loupp": 2, "mathieu": 2, "blondel": 2, "fabian": 2, "pedregosa": 2, "andrea": 2, "mueller": 2, "olivi": 2, "grisel": 2, "vlad": 2, "nicula": 2, "peter": [2, 11], "prettenhof": 2, "alexandr": 2, "gramfort": 2, "jaqu": 2, "grobler": 2, "robert": [2, 3, 4, 12], "layton": 2, "jake": 2, "vanderpla": [2, 15], "arnaud": 2, "joli": 2, "brian": [2, 15], "holt": 2, "ga": [2, 15], "\u00eb": 2, "varoquaux": 2, "api": 2, "design": [2, 3, 8, 10, 14, 15, 16], "ecml": 2, "pkdd": 2, "mine": [2, 6], "108": [2, 3], "122": [2, 3], "ch67": [2, 11], "thoma": [2, 11], "ieee": [2, 4, 11], "transact": [2, 4, 11], "21": [2, 3, 7, 10, 11, 14, 15], "fh51": [2, 11], "evelyn": [2, 3, 11], "joseph": [2, 11], "discriminatori": [2, 11], "discrimin": [2, 3, 11], "consist": [2, 4, 6, 7, 10, 11, 13, 14, 15, 16], "properti": [2, 3, 7, 10, 11, 12, 15, 16], "report": [2, 3, 6, 7, 8, 11, 15, 16], "usaf": [2, 11], "school": [2, 7, 11], "aviat": [2, 11], "medicin": [2, 11], "randolph": [2, 11], "field": [2, 10, 11, 15], "texa": [2, 11], "swm93": [2, 3], "nuclear": [2, 3], "intern": [2, 3, 5, 15], "symposium": [2, 3], "electron": [2, 3, 14], "technolog": [2, 3, 15], "stanfordhcare21": 2, "url": [2, 3, 4, 6, 7, 12, 13, 14, 15, 16], "http": [2, 3, 4, 5, 6, 7, 9, 10, 12, 13, 14, 15, 16], "stanfordhealthcar": 2, "medic": [2, 3], "condit": 2, "continu": [3, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "its": [3, 4, 7, 8, 10, 11, 12, 14, 15, 16], "describ": [3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "matric": 3, "neighbor": [3, 12], "k": [3, 7, 10, 15], "nearest": [3, 4, 12], "estim": [3, 6, 7, 9, 11, 12], "underfit": [3, 12], "advantag": [3, 4, 6, 10, 11, 12, 13, 14, 15, 16], "disadvantag": [3, 4, 11, 12, 15], "wrong": [3, 6, 7, 12, 15, 16], "cancer": 3, "ask": [3, 4, 6, 10, 11, 12, 14, 15, 16], "kei": [3, 6, 7, 10, 13, 14, 15, 16], "impli": [3, 6], "between": [3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "oppos": [3, 10, 11, 15, 16], "memor": 3, "visit": [3, 5, 6, 7, 10, 13, 14, 15], "hospit": 3, "more": [3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "trick": 3, "asid": [3, 7, 10, 12], "match": [3, 10, 11, 12, 14, 15, 16], "observ": [3, 4, 6, 7, 11, 12, 14, 15, 16], "confid": [3, 6, 11], "golden": 3, "rule": [3, 6, 7, 11, 15], "cannot": [3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "than": [3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "realli": [3, 6, 7, 10, 11, 15, 16], "imagin": [3, 6, 8, 10, 14, 15, 16], "bad": [3, 4, 10, 15], "overestim": [3, 6], "made": [3, 4, 7, 11, 12, 13, 14, 15, 16], "frac": [3, 4, 6, 11], "summar": [3, 6, 7, 9, 10, 15, 16], "stori": [3, 8, 11, 15], "alon": [3, 6, 14], "comprehens": [3, 4, 6], "each": [3, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16], "correctli": [3, 7, 10, 13, 15, 16], "incorrectli": 3, "57": 3, "bottom": [3, 8, 13, 14], "roughli": [3, 4, 6, 11, 12, 15], "89": [3, 7, 15], "892": 3, "misclassifi": 3, "disastr": 3, "receiv": [3, 10, 14], "particularli": [3, 10, 12, 15], "unaccept": 3, "term": [3, 4, 6, 7, 10, 11, 15, 16], "talk": [3, 10, 15], "four": [3, 4, 7, 9, 15], "perfect": [3, 15], "zero": [3, 4, 11, 12, 15, 16], "almost": [3, 4, 7, 10, 11, 15], "two": [3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16], "commonli": [3, 6, 7, 8, 12, 14, 15, 16], "metric": [3, 4, 11, 12], "togeth": [3, 4, 6, 8, 10, 15, 16], "inde": [3, 4, 6, 7, 10, 12, 15, 16], "20": [3, 6, 7, 10, 12, 14, 15, 16], "quad": [3, 4], "25": [3, 6, 7, 10, 11, 14, 15, 16], "rel": [3, 4, 7, 15], "context": [3, 10, 11, 12, 15, 16], "certainli": [3, 6], "achiev": [3, 7, 11, 15, 16], "guess": [3, 4, 6, 7], "everi": [3, 6, 7, 8, 10, 12, 14, 16], "similarli": [3, 7, 10, 15, 16], "never": [3, 7, 11, 14], "obsev": 3, "Of": [3, 6, 12, 16], "somewher": [3, 7, 10, 11, 15], "extrem": [3, 6, 11, 12], "trade": [3, 4], "off": [3, 4, 6, 12], "fair": [3, 10, 11], "unbias": 3, "influenc": [3, 4, 6, 11, 12, 15], "human": [3, 4, 6, 10, 14, 15, 16], "counter": 3, "main": [3, 7, 13, 16], "tenet": 3, "determin": [3, 4, 6, 11, 13, 14, 15, 16], "everyth": [3, 6, 7, 13, 16], "point": [3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "investig": [3, 6, 7, 10, 15], "integ": [3, 10, 15, 16], "At": [3, 7, 8, 9, 10, 12], "track": [3, 6, 7, 14, 16], "nums_0_to_9": 3, "5": [3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "random_numbers1": 3, "to_numpi": 3, "appear": [3, 6, 7, 8, 10, 11, 12, 13, 14, 15], "fresh": [3, 8], "batch": 3, "random_numbers2": 3, "forc": [3, 15], "random_numbers1_again": 3, "random_numbers2_again": 3, "And": [3, 6, 7, 10, 11, 12, 14, 15, 16], "4235": 3, "random_numb": 3, "beyond": [3, 4, 7, 10, 11, 12, 13, 14, 15, 16], "explicitli": [3, 10, 14, 15, 16], "insert": [3, 14, 16], "therebi": [3, 15], "global": [3, 15], "drawback": 3, "buri": 3, "undesir": 3, "entir": [3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "plai": [3, 7, 10, 13], "randomst": 3, "random_st": 3, "rnd": 3, "random_numbers1_third": 3, "random_numbers2_third": 3, "load": [3, 4, 8, 9, 10, 11, 12, 15, 16], "quick": [3, 7, 10], "re": [3, 4, 7, 8, 9, 10, 11, 14, 15, 16], "scale": [3, 4, 10, 11, 12, 14, 15], "done": [3, 7, 8, 10, 13, 14, 15, 16], "preliminari": 3, "train_test_split": [3, 11, 12], "shuffl": 3, "stratifi": [3, 11], "exist": [3, 7, 8, 10, 12, 13, 14, 15, 16], "train_siz": [3, 11, 12], "model_select": [3, 11, 12], "cancer_test": 3, "index": [3, 7, 10, 14, 16], "426": 3, "196": [3, 4, 6, 7, 11], "296": 3, "43": [3, 4, 14, 15], "143": 3, "116": 3, "miss": [3, 4, 15, 16], "626761": 3, "373239": 3, "last": [3, 6, 7, 9, 10, 14, 15, 16], "sensit": [3, 7, 12], "consider": 3, "aspect": [3, 6, 12, 15], "fortun": [3, 6, 7, 10, 11, 12, 16], "construct": [3, 6, 7, 10, 15, 16], "cancer_preprocessor": 3, "augment": [3, 4], "864726": 3, "146": [3, 6], "869691": 3, "86": 3, "86135501": 3, "846226": 3, "105": [3, 6, 7, 10, 15, 16], "863030": 3, "244": 3, "884180": 3, "23": [3, 10, 14, 15, 16], "851509": 3, "125": [3, 7, 16], "86561": 3, "281": 3, "8912055": 3, "84799002": 3, "score": [3, 10, 11], "8951048951048951": 3, "90": [3, 6, 7, 15, 16], "precision_scor": 3, "recall_scor": 3, "y_true": [3, 11, 12], "y_pred": [3, 11, 12], "pos_label": 3, "8275862068965517": 3, "9056603773584906": 3, "83": 3, "91": [3, 6], "crosstab": 3, "alphabet": [3, 7, 15, 16], "80": [3, 16], "48": [3, 6, 7, 14, 15], "agre": [3, 10, 12], "displaystyl": 3, "51": [3, 6, 14, 15], "82": [3, 12, 15], "76": 3, "That": [3, 6, 7, 10, 11, 15, 16], "pretti": [3, 6, 10], "wait": [3, 7, 10, 11, 12, 15, 16], "Or": [3, 6, 12], "someth": [3, 4, 6, 7, 8, 10, 11, 14, 15, 16], "99": [3, 4, 6, 11, 12, 15], "terribl": 3, "impress": [3, 15], "attent": [3, 7, 11, 16], "sacrif": 3, "easi": [3, 4, 7, 8, 10, 12, 14, 15, 16], "baselin": [3, 15], "regardless": [3, 10, 11, 15], "sens": [3, 4, 6, 7, 11, 12, 15, 16], "hope": [3, 10, 12, 15], "signific": [3, 7], "Be": [3, 10, 11, 15], "enough": [3, 6, 7, 10, 11, 12, 14, 15, 16], "usual": [3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "suspect": [3, 4, 6], "built": [3, 7, 8, 13, 16], "perspect": [3, 4, 11, 16], "hoorai": 3, "cautiou": 3, "misdiagnos": 3, "vast": [3, 4, 15, 16], "behav": [3, 6, 12], "principl": [3, 15], "ideal": [3, 8, 11, 16], "somehow": [3, 6, 10], "hasn": 3, "yet": [3, 6, 7, 8, 10, 11, 14, 15, 16], "rememb": [3, 6, 7, 8, 10, 12, 15, 16], "touch": [3, 15], "dai": [3, 8, 10, 14, 15], "strongli": [3, 9, 12], "whatev": [3, 4, 7, 15], "lucki": [3, 6], "perhap": [3, 6, 7, 8, 10, 11, 12, 15], "sub": [3, 10], "cancer_subtrain": 3, "cancer_valid": 3, "acc": 3, "897196261682243": 3, "repeat": [3, 4, 6, 14], "none": [3, 4, 10, 12, 14, 16], "underli": [3, 4, 7], "reduc": [3, 4, 10, 15], "un": [3, 4], "c": [3, 7, 10], "evenli": [3, 11], "chunk": [3, 12], "iter": [3, 4, 7, 14, 15, 16], "fold": [3, 11], "cross_valid": 3, "cv": [3, 11], "convert": [3, 6, 10, 11, 15, 16], "cancer_pip": 3, "cv_5_df": 3, "fit_tim": 3, "score_tim": 3, "test_scor": 3, "004699": 3, "005864": 3, "837209": 3, "003812": 3, "005553": 3, "870588": 3, "003668": 3, "005478": 3, "894118": 3, "003818": 3, "005679": 3, "003741": 3, "882353": 3, "aggreg": [3, 7], "sem": 3, "uncertain": [3, 6, 11], "scope": [3, 4, 7, 11, 12, 13, 14, 15], "01": [3, 6, 10, 15, 16], "cv_5_metric": 3, "agg": [3, 12, 16], "003948": 3, "005625": 3, "870971": 3, "000190": 3, "000068": 3, "009501": 3, "limit": [3, 4, 10, 12, 14, 15, 16], "speed": 3, "trial": [3, 15], "cv_10": 3, "cv_10_df": 3, "cv_10_metric": 3, "003661": 3, "004261": 3, "884939": 3, "000031": 3, "000027": 3, "006718": 3, "slightli": [3, 6, 10, 11, 12, 15], "due": [3, 4, 6, 10, 16], "reduct": 3, "dramat": 3, "cv_50_df": 3, "cv_50_metric": 3, "003631": 3, "003185": 3, "888056": 3, "000012": 3, "000013": 3, "003005": 3, "downstream": 3, "expens": [3, 10], "chemo": 3, "radiat": 3, "therapi": 3, "death": 3, "mispredict": 3, "gridsearchcv": [3, 11], "unspecifi": 3, "cancer_tune_pip": 3, "tunabl": 3, "get_param": [3, 11], "verbos": 3, "columntransformer__n_job": 3, "columntransformer__remaind": 3, "columntransformer__sparse_threshold": 3, "columntransformer__transformer_weight": 3, "columntransformer__transform": 3, "columntransformer__verbos": 3, "columntransformer__verbose_feature_names_out": 3, "columntransformer__standardscal": 3, "columntransformer__standardscaler__copi": 3, "columntransformer__standardscaler__with_mean": 3, "columntransformer__standardscaler__with_std": 3, "kneighborsclassifier__algorithm": 3, "auto": [3, 14], "kneighborsclassifier__leaf_s": 3, "30": [3, 6, 7, 10, 11, 14, 15, 16], "kneighborsclassifier__metr": 3, "minkowski": 3, "kneighborsclassifier__metric_param": 3, "kneighborsclassifier__n_job": 3, "kneighborsclassifier__n_neighbor": 3, "kneighborsclassifier__p": 3, "kneighborsclassifier__weight": 3, "wow": [3, 6, 15], "stuff": 3, "sift": 3, "muck": [3, 10], "stand": [3, 10, 11, 15], "parameter_grid": 3, "allow": [3, 4, 6, 7, 8, 9, 10, 11, 14, 15, 16], "greater": [3, 4, 10, 16], "third": 3, "skip": [3, 8, 16], "96": [3, 12, 15], "emploi": [3, 10, 11], "okai": [3, 15, 16], "param_grid": [3, 11], "cancer_tune_grid": 3, "cv_results_": [3, 11], "format": [3, 9, 10, 11, 12, 16], "accuracies_grid": 3, "19": [3, 6, 10, 14, 15], "mean_fit_tim": 3, "std_fit_tim": 3, "mean_score_tim": 3, "std_score_tim": 3, "param_kneighborsclassifier__n_neighbor": 3, "param": 3, "split0_test_scor": 3, "split1_test_scor": 3, "split2_test_scor": 3, "split3_test_scor": 3, "split4_test_scor": 3, "split5_test_scor": 3, "split6_test_scor": 3, "split7_test_scor": 3, "split8_test_scor": 3, "split9_test_scor": 3, "mean_test_scor": [3, 11], "17": [3, 4, 10, 12, 14, 15], "std_test_scor": [3, 11], "18": [3, 4, 6, 7, 10, 14, 15], "rank_test_scor": 3, "int32": [3, 16], "param_kneighbors_classifier__n_neighbor": 3, "unus": 3, "sem_test_scor": [3, 11], "845127": 3, "019966": 3, "873200": 3, "015680": 3, "861517": 3, "019547": 3, "861573": 3, "017787": 3, "866279": 3, "017889": 3, "875637": 3, "016026": 3, "885050": 3, "015406": 3, "36": [3, 4, 14, 15], "887375": 3, "013694": 3, "41": [3, 14, 15, 16], "46": [3, 4, 14, 15], "887320": 3, "013314": 3, "882669": 3, "014523": 3, "56": [3, 7], "878018": 3, "014414": 3, "61": [3, 6, 7, 10], "880343": 3, "014299": 3, "66": [3, 6, 10, 11], "015416": 3, "71": [3, 10], "877962": 3, "013660": 3, "014698": 3, "81": [3, 15], "880288": 3, "011277": 3, "875581": 3, "012967": 3, "008193": 3, "shortcut": [3, 8, 15], "layer": 3, "accuracy_vs_k": 3, "mark_lin": [3, 4, 11, 12, 15], "neighbour": [3, 11], "highest": [3, 16], "best_params_": [3, 11], "vari": [3, 6, 11, 12, 13, 15, 16], "exact": [3, 6, 12, 15], "justifi": [3, 15], "optim": [3, 10, 11], "decreas": [3, 4, 6, 15, 16], "reliabl": [3, 6, 8, 15], "uncertainti": [3, 6], "cost": [3, 6, 11, 12], "prohibit": [3, 11], "large_param_grid": 3, "385": 3, "large_cancer_tune_grid": 3, "large_accuracies_grid": 3, "large_accuracy_vs_k": 3, "farther": [3, 15], "sort": [3, 4, 7, 8, 10, 12, 15, 16], "boundari": [3, 12], "simpler": 3, "stronger": 3, "regard": [3, 6, 7, 8, 11, 12, 16], "themselv": [3, 10, 15], "noisi": [3, 11, 15], "jag": 3, "essenti": [3, 6, 7, 8, 10, 11, 16], "problemat": [3, 8, 10, 15], "unreli": [3, 6, 12], "strike": 3, "balanc": [3, 6], "qualiti": [3, 8, 11, 12], "retrain": [3, 11], "9090909090909091": 3, "8846153846153846": 3, "8679245283018868": 3, "84": [3, 15], "glanc": 3, "surpris": 3, "knew": 3, "return": [3, 4, 6, 7, 10, 12, 13, 16], "put": [3, 6, 10, 11, 12, 13, 14, 16], "defin": [3, 6, 7, 9, 10, 11, 12, 15, 16], "execut": [3, 10, 14], "search": [3, 4, 10, 13, 14], "strength": [3, 12, 15], "weak": [3, 11, 12, 15], "assumpt": [3, 4, 11, 12], "multi": 3, "slow": [3, 8, 11, 12], "treat": [3, 4, 7, 14, 15, 16], "accept": [3, 10, 11, 13, 14], "wors": [3, 7, 16], "meaning": [3, 4, 7, 10, 12, 14], "cancer_irrelev": 3, "irrelevant1": 3, "irrelevant2": 3, "30010": 3, "08690": 3, "132": [3, 6], "19740": 3, "130": [3, 6, 16], "00": [3, 6, 16], "24140": 3, "77": [3, 6], "58": [3, 15], "19800": 3, "135": [3, 6, 12, 15], "24390": 3, "142": 3, "14400": 3, "131": 3, "09251": 3, "35140": 3, "140": [3, 6], "00000": [3, 6], "47": [3, 4, 14, 15], "92": [3, 6], "increasingli": [3, 10], "distanc": [3, 4, 11, 12, 15], "corrupt": 3, "outperform": 3, "combat": 3, "extra": [3, 10, 12], "nois": [3, 15], "smoothli": 3, "trend": [3, 6, 7, 11, 12, 15], "corrobor": 3, "evid": 3, "untun": 3, "scientif": [3, 11, 12, 14], "clear": [3, 4, 6, 7, 12, 14, 15, 16], "cut": 3, "obviou": [3, 8, 12, 15], "relev": [3, 10, 11, 12], "consum": [3, 6, 16], "systemat": 3, "beal": 3, "hock": 3, "lesli": 3, "moder": 3, "ab": [3, 10, 11], "bc": [3, 6, 7], "ac": 3, "abc": 3, "million": [3, 12, 15], "computation": 3, "draper": 3, "smith": 3, "1966": 3, "eforymson": 3, "straightforward": [3, 10, 15], "form": [3, 4, 6, 7, 10, 11, 12, 15, 16], "updat": [3, 4, 13, 14, 15], "big": [3, 6, 7, 10, 14, 15], "55": [3, 6, 11, 15, 16], "caution": [3, 8, 10], "move": [3, 7, 9, 11, 12, 14, 15], "likelihood": 3, "unlucki": [3, 4], "stumbl": 3, "risk": [3, 11], "suffer": 3, "turn": [3, 4, 7, 10, 11, 12, 16], "smaller": [3, 11, 12, 15], "irrelevant3": 3, "full": [3, 6, 7, 10, 12, 14, 15, 16], "cancer_subset": 3, "sequentialfeatureselector": 3, "tri": [3, 4, 11, 12, 15], "flexibl": [3, 8, 12, 16], "resort": 3, "loop": [3, 16], "flow": 3, "mckinnei": [3, 10, 15, 16], "2012": [3, 7, 10, 15, 16], "n_total": 3, "check": [3, 7, 9, 10, 14, 15, 16], "j": [3, 7, 10], "len": [3, 10], "accuracy_dict": 3, "selected_predictor": 3, "empti": [3, 8, 14], "n_job": 3, "best_set": 3, "argmax": 3, "append": [3, 10, 15, 16], "join": [3, 10, 14], "del": [3, 15], "891103": 3, "917450": 3, "931454": 3, "926253": 3, "906955": 3, "exhibit": [3, 8], "fluctuat": [3, 11], "attempt": [3, 4, 15], "account": [3, 13, 14], "chanc": [3, 6, 13], "elbow": [3, 4], "successfulli": [3, 8, 10, 14], "judgement": 3, "excel": [3, 7, 12, 14], "tutori": [3, 8, 10, 12], "go": [3, 6, 7, 9, 10, 12, 13, 15], "jame": [3, 4, 10, 12], "great": [3, 4, 6, 7, 8, 10, 12, 14, 15], "naiv": 3, "bay": 3, "goe": [3, 7, 8, 10, 12], "popular": [3, 4, 10, 12, 14], "bkm67": 3, "martin": 3, "lansdown": 3, "mauric": 3, "georg": 3, "kendal": 3, "david": [3, 6], "mann": 3, "discard": 3, "multivari": 3, "biometrika": 3, "366": 3, "ds66": 3, "norman": 3, "harri": 3, "wilei": [3, 15], "efo66": 3, "stepwis": 3, "backward": 3, "eastern": 3, "meet": 3, "hl67": 3, "ronald": 3, "technometr": 3, "531": 3, "540": 3, "jwht13": [3, 4, 12], "gareth": [3, 4, 12], "daniela": [3, 4, 12], "witten": [3, 4, 12], "hasti": [3, 4, 12], "tibshirani": [3, 4, 12], "springer": [3, 4, 12, 15], "1st": [3, 4, 12], "edit": [3, 4, 12, 13, 15], "www": [3, 4, 7, 10, 12], "statlearn": [3, 4, 12], "com": [3, 4, 6, 10, 12, 13, 14, 15], "mck12": [3, 10, 15, 16], "ipython": [3, 10, 13, 15, 16], "o": [3, 7, 10, 13, 15, 16], "reilli": [3, 10, 15, 16], "media": [3, 8, 10, 15, 16], "inc": [3, 6, 10, 15, 16], "subgroup": [4, 7, 15, 16], "predict": [4, 7, 9, 11, 12, 15], "differenti": 4, "classif": [4, 7, 9, 11, 12], "variabl": [4, 6, 7, 8, 10, 11, 12, 15, 16], "scikit": [4, 11, 12], "set": [4, 6, 8, 9, 10, 12, 14, 16], "genet": [4, 15], "ancestr": 4, "subpopul": 4, "onlin": [4, 6, 10, 13, 14, 15], "custom": [4, 15], "uncov": [4, 8, 15], "fundament": [4, 6, 7, 15], "supervis": 4, "unsupervis": 4, "imposs": [4, 6], "articl": [4, 7], "wikipedia": [4, 10], "evalu": [4, 6, 7, 12, 15], "test": [4, 6, 12, 13], "good": [4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "ascertain": 4, "rigor": [4, 11], "lloyd": 4, "1982": 4, "hierarch": 4, "princip": 4, "compon": [4, 7], "multidimension": 4, "semisupervis": 4, "goal": [4, 7, 11, 15, 16], "benefici": [4, 10], "unlabel": [4, 7], "willing": [4, 6], "seed": [4, 6, 11, 12], "palmerpenguin": 4, "horst": 4, "2020": [4, 6, 7, 15], "kristen": 4, "gorman": 4, "palmer": 4, "station": [4, 15], "antarctica": [4, 15], "ecolog": 4, "site": [4, 5, 10], "adult": 4, "penguin": 4, "2014": [4, 16], "bill": 4, "flipper": 4, "millimet": 4, "distinct": [4, 8, 15], "speci": 4, "discoveri": [4, 12], "gentoo": 4, "bill_length_mm": 4, "flipper_length_mm": 4, "39": [4, 14, 15], "182": 4, "34": [4, 14, 15, 16], "187": [4, 6, 11], "190": [4, 11, 16], "195": [4, 7, 16], "193": [4, 11], "213": [4, 7, 10, 15, 16], "215": [4, 16], "45": [4, 7, 10, 14, 15, 16], "220": [4, 16], "49": [4, 14, 15], "208": 4, "52": [4, 14], "197": 4, "189": [4, 6], "penguins_standard": 4, "bill_length_standard": 4, "flipper_length_standard": 4, "641361": 4, "189773": 4, "144917": 4, "328412": 4, "517922": 4, "921755": 4, "107617": 4, "846513": 4, "409743": 4, "677761": 4, "238168": 4, "271104": 4, "902464": 4, "433767": 4, "720106": 4, "192860": 4, "645505": 4, "355522": 4, "962559": 4, "440353": 4, "762179": 4, "205012": 4, "111528": 4, "123299": 4, "786203": 4, "626855": 4, "757407": 4, "783170": 4, "108442": 4, "776057": 4, "759092": 4, "subtyp": 4, "scatter_plot": 4, "meaningless": 4, "etc": [4, 6, 7, 10, 14, 15, 16], "adjust": [4, 15], "sum": [4, 11, 16], "wssd": 4, "intertia": 4, "mu_x": 4, "mu_i": 4, "x_1": 4, "x_2": 4, "x_3": 4, "x_4": 4, "y_1": 4, "y_2": 4, "y_3": 4, "y_4": 4, "35": [4, 7, 14, 15, 16], "outlin": [4, 7, 10, 15, 16], "far": [4, 12, 14, 15, 16], "yellow": [4, 16], "variant": 4, "minim": [4, 11, 12, 15], "reassign": 4, "longer": [4, 7, 16], "termin": [4, 13], "fourth": 4, "onward": [4, 10, 13, 15], "guarante": [4, 13], "forev": 4, "logic": [4, 7, 10, 16], "finit": [4, 6, 15], "unlik": [4, 6, 10, 11, 15], "stuck": [4, 8, 16], "solut": [4, 6, 7], "poor": [4, 10], "lowest": [4, 10, 15], "cross": [4, 11, 12], "valid": [4, 9, 10, 11, 12], "subdivid": 4, "merg": [4, 10], "diminish": 4, "reach": [4, 10, 12, 14, 15], "being": [4, 6, 7, 8, 10, 11, 14, 15, 16], "address": [4, 7, 10, 11, 12, 14], "preprocess": [4, 11], "kmean": 4, "n_cluster": 4, "kmeanskmean": 4, "penguin_clust": 4, "labels_": 4, "altern": [4, 7, 12, 14, 15, 16], "suffix": [4, 15], "nomin": [4, 15], "discret": [4, 15], "cluster_plot": 4, "inertia_": 4, "inertia": 4, "730719092276117": 4, "varieti": [4, 10, 12, 14, 16], "ks": 4, "oper": [4, 6, 7, 10, 13, 14, 15], "safest": 4, "reus": 4, "penguin_clust_k": 4, "000000": 4, "576264": 4, "730719": 4, "343613": 4, "362131": 4, "678383": 4, "293320": 4, "975016": 4, "785232": 4, "elbow_plot": 4, "bump": [4, 15], "prevent": [4, 7, 8, 10, 15, 16], "n_init": 4, "paramet": [4, 6, 10, 11, 12, 15, 16], "realm": 4, "specif": [4, 10, 11, 13, 14, 15], "companion": [4, 10], "pca": 4, "gwf14": 4, "toni": 4, "fraser": 4, "sexual": 4, "dimorph": 4, "commun": [4, 7, 10, 15], "ntarctic": 4, "genu": 4, "emph": 4, "pygosc": 4, "plo": [4, 14], "ONE": 4, "hhg20": 4, "allison": 4, "alison": 4, "hill": [4, 10], "archipelago": 4, "allisonhorst": 4, "io": [4, 7, 10, 15], "llo82": 4, "stuart": 4, "quantiz": 4, "pcm": 4, "129": 4, "137": [4, 6, 7, 12], "releas": [4, 10], "bell": [4, 6], "telephon": 4, "paper": [4, 15], "1957": 4, "web": [5, 8, 14], "navig": [5, 7, 8, 10, 13, 14], "mobil": 5, "devic": [5, 10], "menu": [5, 7, 8, 13], "datasciencebook": [5, 9, 10, 13], "ca": [5, 7, 9, 10, 11, 12, 13], "licens": 5, "creativ": 5, "noncommerci": 5, "sharealik": 5, "popul": [6, 7, 10, 15, 16], "extend": [6, 12, 15], "inferenti": [6, 7, 9, 11, 15], "interv": 6, "approxim": 6, "broader": 6, "retail": 6, "sell": 6, "iphon": 6, "accessori": 6, "market": [6, 11, 12], "strateg": 6, "product": [6, 7, 14], "north": [6, 10, 15], "american": [6, 7, 10], "colleg": 6, "campus": 6, "america": [6, 15], "owner": [6, 10, 12], "characterist": [6, 7, 10, 15, 16], "costli": 6, "taken": [6, 7, 11, 14, 15], "canada": [6, 7, 10, 15, 16], "apart": [6, 10, 15], "rent": 6, "budget": [6, 11], "studio": 6, "rental": [6, 10], "price": [6, 10, 11, 12], "month": [6, 14, 15], "monthli": 6, "airbnb": 6, "cox": 6, "marketplac": 6, "vacat": 6, "septemb": [6, 15], "neighborhood": 6, "room": 6, "accommod": 6, "bathroom": 6, "bedroom": [6, 10, 11, 12], "bed": [6, 11, 12], "night": 6, "neighbourhood": 6, "room_typ": 6, "downtown": 6, "home": [6, 7, 10, 11, 12, 13, 15, 16], "apt": [6, 13], "bath": [6, 11], "150": [6, 15], "eastsid": 6, "west": 6, "85": [6, 7, 10, 11, 12, 15], "kensington": 6, "cedar": 6, "cottag": 6, "110": 6, "4589": 6, "4590": 6, "4591": 6, "oakridg": 6, "privat": [6, 10, 14], "4592": 6, "dunbar": 6, "southland": 6, "share": [6, 8, 10, 14, 15, 16], "29": [6, 11, 14, 15, 16], "4593": 6, "145": 6, "4594": 6, "shaughnessi": 6, "citi": [6, 7, 10, 11, 16], "plan": [6, 14], "bylaw": 6, "747497": 6, "246408": 6, "005224": 6, "hotel": 6, "000871": 6, "747": 6, "155": [6, 12, 16], "725": 6, "250": [6, 11, 16], "025": 6, "625": 6, "350": [6, 11, 12, 16], "confirm": [6, 14, 15], "histogram": 6, "000": [6, 7, 10, 11, 12, 15], "20_000": 6, "605": 6, "606": 6, "marpol": 6, "4579": 6, "4580": 6, "160": [6, 11], "1739": 6, "1740": 6, "151": [6, 7, 15], "3904": 6, "3905": 6, "185": [6, 16], "1596": 6, "1597": 6, "kitsilano": 6, "3060": 6, "3061": 6, "hast": 6, "sunris": 6, "78": 6, "19999": 6, "527": 6, "528": 6, "1587": 6, "1588": 6, "169": [6, 12], "3860": 6, "3861": 6, "2747": 6, "2748": 6, "285": 6, "800000": 6, "0000": 6, "999": 6, "queri": [6, 10], "qualifi": 6, "750": [6, 15], "775": 6, "225": [6, 10], "19998": 6, "700": [6, 16], "275": 6, "44552": 6, "reset_index": [6, 16], "caveat": [6, 15, 16], "twice": [6, 12], "sample_proport": 6, "44547": 6, "44548": 6, "44549": 6, "44550": 6, "44551": 6, "sample_estim": 6, "675": 6, "44541": 6, "19995": 6, "44543": 6, "19996": 6, "44545": 6, "19997": 6, "20000": 6, "mind": [6, 7, 10, 14], "sampling_distribut": 6, "mark_bar": [6, 7, 15], "bin": [6, 15], "maxbin": [6, 15], "symmetr": 6, "peak": [6, 15], "74848375": 6, "748": [6, 11], "neither": [6, 11, 15], "nor": [6, 8, 12], "underestim": 6, "tendenc": 6, "travel": 6, "wish": [6, 7, 14], "overpr": [6, 11], "population_distribut": 6, "skew": 6, "tail": [6, 10], "154": [6, 12], "5109773617762": 6, "one_sampl": 6, "sample_distribut": 6, "153": 6, "48225": 6, "wouldn": [6, 14], "alreadi": [6, 7, 8, 9, 10, 11, 12, 13, 15, 16], "mean_pric": 6, "148": 6, "56075": 6, "165": [6, 16], "50500": 6, "93925": 6, "139": 6, "14650": 6, "198": 6, "50000": 6, "192": 6, "66425": 6, "144": 6, "88600": 6, "08800": 6, "156": [6, 11], "25000": 6, "170": 6, "mean_of_sample_mean": 6, "sample_mean": 6, "disappear": 6, "thumb": [6, 15], "emphasi": 6, "saw": [6, 10, 11, 16], "significantli": [6, 7, 8, 12, 15, 16], "notion": [6, 11], "pretend": 6, "clever": 6, "drawn": [6, 12, 15], "median": [6, 15, 16], "slope": [6, 12], "displai": [6, 7, 8, 10, 12, 14, 15, 16], "4025": 6, "4026": 6, "renfrew": 6, "collingwood": 6, "1977": [6, 15], "1978": 6, "fairview": 6, "70": [6, 10, 15, 16], "4008": 6, "4009": 6, "269": [6, 15], "1543": 6, "1544": 6, "320": 6, "3350": 6, "3351": 6, "804": 6, "805": 6, "mount": 6, "pleasant": 6, "2286": 6, "2287": 6, "1010": 6, "1011": 6, "strathcona": 6, "120": [6, 7, 10, 16], "1878": 6, "1879": [6, 15], "175": 6, "1644": 6, "1645": 6, "2771": 6, "2772": 6, "4151": 6, "4152": 6, "289": 6, "4495": 6, "4496": 6, "rilei": 6, "park": [6, 15], "115": 6, "1308": 6, "1309": 6, "2246": 6, "2247": 6, "2335": 6, "2336": 6, "4059": 6, "4060": 6, "1280": 6, "1281": 6, "4324": 6, "4325": 6, "3403": 6, "3404": 6, "arbutu": 6, "ridg": 6, "664": 6, "1729": 6, "1730": 6, "93": [6, 15], "3722": 6, "3723": 6, "241": 6, "242": 6, "3955": 6, "3956": 6, "60": [6, 7], "1042": 6, "1043": 6, "649": 6, "650": [6, 15], "sunset": 6, "1995": [6, 15], "1996": 6, "363": 6, "364": 6, "1783": 6, "1784": 6, "806": 6, "254": 6, "255": 6, "3365": 6, "3366": 6, "4562": 6, "4563": 6, "64": [6, 10, 11, 13], "2124": 6, "2125": 6, "200": [6, 7, 10, 11, 15], "1997": 6, "1998": 6, "257": 6, "4329": 6, "4330": [6, 16], "3408": 6, "3409": 6, "635": 6, "636": 6, "grandview": 6, "woodland": 6, "103": [6, 16], "one_sample_dist": 6, "boot1": 6, "boot1_dist": 6, "ident": [6, 7, 10], "mimic": 6, "break": [6, 10, 11, 12], "boot20000": 6, "six": [6, 7, 9, 11, 15, 16], "six_bootstrap_sampl": 6, "height": [6, 12, 15], "facet": [6, 15], "67175": 6, "42500": 6, "149": [6, 7], "35000": 6, "13225": 6, "179": [6, 7], "79675": 6, "188": 6, "28225": 6, "boot20000_mean": 6, "159": 6, "29675": 6, "136": 6, "55725": 6, "161": 6, "93950": 6, "22500": 6, "boot_est_dist": 6, "resampl": 6, "repeatedli": 6, "percentil": [6, 16], "captur": [6, 10, 12, 15], "narrow": [6, 10, 16], "implic": 6, "comfort": [6, 14], "strict": [6, 7], "unhelp": 6, "life": [6, 7], "deadli": 6, "ascend": [6, 7, 15], "bound": [6, 15], "97": [6, 12, 15], "quantil": 6, "express": [6, 15, 16], "5th": 6, "975": 6, "ci_bound": 6, "121": [6, 11], "607069": 6, "191": [6, 7], "525362": 6, "finish": [6, 8, 9, 10, 13, 14, 15], "journei": 6, "surfac": [6, 11, 12, 15], "foundat": [6, 7, 10, 12], "openintro": 6, "diez": 6, "2019": [6, 15], "solid": [6, 15], "grasp": 6, "natur": [6, 14, 15, 16], "coxd": 6, "murrai": 6, "insideairbnb": 6, "09": [6, 10, 15], "dccetinkayarb19": 6, "\u00e7": 6, "etinkaya": 6, "rundel": 6, "christoph": 6, "barr": 6, "os": [6, 8], "dirti": 7, "clean": [7, 9, 10], "dig": [7, 10, 16], "jump": [7, 9, 10, 15], "symbol": [7, 13, 15, 16], "spoken": [7, 15, 16], "resid": [7, 15], "indigen": 7, "cultur": 7, "anywher": [7, 8], "2018": [7, 15], "sadli": 7, "colon": [7, 16], "led": [7, 15], "loss": 7, "children": 7, "speak": [7, 10, 15, 16], "mother": [7, 15, 16], "tongu": [7, 15, 16], "childhood": 7, "residenti": [7, 11], "discov": 7, "act": [7, 14, 15, 16], "harm": 7, "endang": 7, "geograph": 7, "walker": 7, "2017": [7, 14], "came": [7, 11, 15], "aborigin": [7, 10, 15, 16], "truth": 7, "reconcili": 7, "commiss": 7, "action": 7, "2015": 7, "canlang": [7, 10, 15], "2016": [7, 10, 15, 16], "censu": [7, 10, 15, 16], "214": [7, 10, 15, 16], "offici": [7, 10, 15, 16], "mother_tongu": [7, 10, 15, 16], "expos": 7, "birth": 7, "most_at_hom": [7, 10, 15, 16], "most_at_work": [7, 10, 15, 16], "lang_known": [7, 10, 15, 16], "accord": [7, 10, 15, 16], "deep": [7, 12], "simplifi": [7, 10, 16], "concentr": [7, 15], "expertis": 7, "bias": 7, "aim": [7, 9, 15], "causal": [7, 11, 15], "mechanist": [7, 15], "leek": 7, "matsui": 7, "earli": [7, 9], "live": [7, 10, 15], "provinc": [7, 10], "territori": 7, "propos": 7, "hypothes": [7, 15], "polit": 7, "parti": 7, "wealth": [7, 15], "elect": 7, "quantif": 7, "factor": [7, 15], "mechan": [7, 10, 11], "pertain": [7, 15, 16], "occasion": [7, 13, 16], "race": [7, 11, 12], "runner": 7, "regularli": [7, 8], "graphic": [7, 8, 10, 13, 14, 15], "ag": 7, "old": [7, 10, 14], "50kg": 7, "cluster": [7, 9, 15], "bought": 7, "amazon": 7, "cellphon": 7, "ownership": 7, "android": 7, "phone": 7, "essenc": 7, "spreadsheet": [7, 10], "microsoft": 7, "rectangular": 7, "primarili": [7, 11, 14, 15], "voter": 7, "affili": 7, "comma": [7, 8, 11, 16], "short": [7, 10, 15], "save": [7, 10, 13, 14], "googl": [7, 10], "sheet": [7, 10], "can_lang": [7, 10, 15, 16], "plain": [7, 8, 14], "editor": [7, 8, 10, 14], "notepad": 7, "590": [7, 10, 15], "235": [7, 10, 15, 16], "665": [7, 10, 15], "afrikaan": [7, 10, 15, 16], "10260": [7, 10, 15], "4785": [7, 10, 15], "23415": [7, 10, 15], "afro": [7, 10, 15, 16], "asiat": [7, 10, 15, 16], "1150": [7, 10, 15], "44": [7, 10, 14, 15], "akan": [7, 10, 15, 16], "twi": [7, 10, 15, 16], "13460": [7, 10, 15], "5985": [7, 10, 15], "22150": [7, 10, 15], "albanian": [7, 10, 15, 16], "26895": [7, 10, 15], "13135": [7, 10, 15], "345": [7, 10, 15], "31930": [7, 10, 15], "algonquian": [7, 10, 16], "algonquin": [7, 10, 16], "1260": [7, 10], "370": [7, 10, 16], "2480": [7, 10], "sign": [7, 10, 11, 14, 15], "2685": [7, 10], "3020": [7, 10], "1145": [7, 10], "amhar": [7, 10], "22465": [7, 10], "12785": [7, 10], "33670": [7, 10], "instal": [7, 8, 9, 10, 13], "team": [7, 14], "es": 7, "innei": 7, "2010": 7, "command": [7, 8, 10, 13], "shorter": [7, 8, 10, 14, 15], "alia": [7, 8], "gave": [7, 10], "harder": [7, 15, 16], "quot": [7, 10], "letter": [7, 13, 14], "distinguish": [7, 15], "satisfi": [7, 10], "syntax": [7, 10, 14, 16], "amp": [7, 10, 15, 16], "445": [7, 10, 15, 16], "2775": [7, 10, 15], "209": [7, 10, 15, 16], "wolof": [7, 10, 15, 16], "3990": [7, 10, 15], "1385": [7, 10, 15], "8240": [7, 10, 15], "210": [7, 10, 15, 16], "wood": [7, 10, 15, 16], "cree": [7, 10, 15, 16], "1840": [7, 10, 15], "800": [7, 10, 15], "2665": [7, 10, 15], "211": [7, 10, 11, 15, 16], "wu": [7, 10, 15, 16], "shanghaines": [7, 10, 15, 16], "12915": [7, 10, 15], "7650": [7, 10, 15], "16530": [7, 10, 15], "yiddish": [7, 10, 15, 16], "13555": [7, 10, 15], "7085": [7, 10, 15], "895": [7, 10, 15], "20985": [7, 10, 15], "yoruba": [7, 10, 15, 16], "9080": [7, 10, 15], "2615": [7, 10, 15], "22415": [7, 10, 15], "screen": [7, 8, 10], "string": [7, 10, 14, 15, 16], "my_numb": 7, "alic": 7, "formal": 7, "_": [7, 8, 15, 16], "won": [7, 10, 12, 14, 16], "complain": 7, "my": [7, 8], "syntaxerror": 7, "mayb": [7, 10], "meant": 7, "convent": [7, 8, 14], "lowercas": [7, 14], "language_data": 7, "pep": 7, "guido": 7, "van": 7, "rossum": 7, "2001": 7, "minut": [7, 8, 12, 15], "underneath": [7, 8], "ve": [7, 10, 14], "largest": [7, 10, 15, 16], "sophist": 7, "restrict": [7, 12, 16], "bracket": [7, 8, 11, 16], "statement": [7, 10, 16], "written": [7, 8, 10, 14], "doubl": [7, 8, 9, 13, 15, 16], "athabaskan": [7, 10, 16], "atikamekw": [7, 10, 16], "6150": [7, 10], "5465": 7, "1100": 7, "6645": 7, "thompson": [7, 10], "ntlakapamux": [7, 10], "335": [7, 10], "450": 7, "tlingit": [7, 10], "260": 7, "tsimshian": [7, 10], "410": 7, "206": 7, "wakashan": [7, 10], "67": [7, 10, 11, 15], "aboriginal_lang": 7, "alias": 7, "wrote": 7, "terminolog": 7, "obj": 7, "f": [7, 10, 11, 13], "programm": 7, "confus": [7, 10, 16], "appar": 7, "rescu": 7, "selected_lang": 7, "descend": [7, 15], "decend": 7, "arranged_lang": 7, "64050": 7, "inuktitut": 7, "35210": 7, "138": 7, "ojibwai": 7, "17885": 7, "oji": 7, "12855": 7, "dene": 7, "10700": 7, "32": [7, 12, 14, 15, 16], "cayuga": 7, "squamish": 7, "iroquoian": 7, "ten_lang": 7, "montagnai": 7, "innu": 7, "10235": 7, "119": 7, "mi": [7, 15], "kmaq": 7, "6690": 7, "3065": 7, "180": [7, 12], "stonei": 7, "3025": 7, "becam": 7, "curiou": 7, "728": [7, 15], "canadian_popul": [7, 15], "overwrit": 7, "opt": [7, 10, 11], "mother_tongue_perc": [7, 15], "35_151_728": [7, 15], "35151728": 7, "latter": [7, 11], "clearer": [7, 15], "182210": 7, "100166": 7, "050879": 7, "036570": 7, "030439": 7, "029117": 7, "019032": 7, "017496": 7, "008719": 7, "008606": 7, "ten_lang_perc": 7, "008": 7, "temporari": [7, 14, 16], "arranged_lang_sort": 7, "trace": [7, 8], "split": [7, 11, 12, 15], "rewrit": 7, "unwieldi": 7, "parenthesi": 7, "demonstr": [7, 10, 11, 12, 15, 16], "cleaner": 7, "messi": [7, 14, 16], "pars": [7, 10, 15], "block": [7, 10], "piec": 7, "period": [7, 8, 10, 15], "Not": [7, 16], "feed": 7, "redo": 7, "overwhelm": 7, "debug": 7, "midwai": 7, "audienc": [7, 8, 14, 15], "difficulti": 7, "scrutin": 7, "speaker": [7, 15, 16], "convei": [7, 15], "understood": 7, "tidi": 7, "shortli": 7, "ax": [7, 15], "mark": [7, 10, 14, 15], "channel": [7, 10, 11, 14, 15], "barplot_mother_tongu": 7, "refin": [7, 10], "quotat": [7, 10], "modif": [7, 16], "tackl": 7, "rotat": 7, "swap": [7, 15], "barplot_mother_tongue_axi": 7, "forward": [7, 10, 11], "suit": [7, 15, 16], "reorder": 7, "ordered_barplot_mother_tongu": 7, "swampi": 7, "elsewher": [7, 10], "moos": 7, "northern": 7, "east": 7, "southern": 7, "comment": [7, 14], "hash": [7, 14], "importantli": 7, "self": [7, 10], "habit": [7, 11], "highli": [7, 14], "got": 7, "tast": 7, "ten_lang_plot": 7, "nobodi": 7, "pull": [7, 10, 13], "forgotten": [7, 14], "pop": [7, 8, 10], "slowli": 7, "adept": 7, "remind": [7, 16], "lab": [7, 13], "lookup": 7, "concis": 7, "press": [7, 8], "tab": [7, 8, 10, 13, 14], "bring": [7, 10], "typo": 7, "hold": [7, 10, 15, 16], "dialogu": 7, "dialog": [7, 14], "contextu": 7, "gvr01": 7, "coghlan": 7, "barri": [7, 16], "warsaw": 7, "style": [7, 10], "0008": 7, "lp15": 7, "jeffrei": [7, 15], "347": 7, "6228": 7, "1314": 7, "1315": 7, "pm15": 7, "elizabeth": 7, "art": [7, 15], "anyon": [7, 8, 10, 14], "skybrud": 7, "consult": [7, 10, 14], "llc": 7, "bookdown": 7, "rdpeng": 7, "artofdatasci": 7, "tim20": [7, 15], "ttimber": [7, 10, 15], "wal17": 7, "anada": 7, "canadiangeograph": 7, "wil18": 7, "kori": 7, "bccampu": 7, "opentextbc": 7, "indigenizationfound": 7, "statisticscanada16a": 7, "www12": 7, "statcan": 7, "gc": 7, "recens": 7, "dp": 7, "eng": 7, "cfm": 7, "statisticscanada16b": 7, "borigin": 7, "irst": 7, "ation": 7, "\u00e9ti": 7, "nuit": 7, "sa": 7, "2016022": 7, "x2016022": 7, "statisticscanada18": 7, "evolut": 7, "1901": 7, "www150": 7, "n1": 7, "pub": 7, "630": 7, "x2018001": 7, "htm": 7, "thepdteam20": 7, "dev": 7, "februari": 7, "doi": [7, 15], "5281": 7, "zenodo": 7, "3509134": 7, "trutharcocanada12": 7, "public": [7, 14], "govern": 7, "servic": [7, 10, 14], "trutharcocanada15": 7, "ction": 7, "www2": 7, "gov": [7, 10, 15], "asset": 7, "columbian": 7, "calls_to_action_english2": 7, "pdf": [7, 15], "wesmckinney10": 7, "ata": 7, "tructur": 7, "tatist": 7, "omput": 7, "p": [7, 10, 13], "ython": 7, "t\u00e9fan": 7, "der": 7, "arrod": 7, "illman": 7, "roceed": 7, "9th": 7, "cienc": 7, "onfer": 7, "25080": 7, "majora": 7, "92bf1922": 7, "00a": 7, "interleav": 8, "narrat": 8, "platform": [8, 14], "interfac": [8, 10, 13, 14], "dress": 8, "morn": 8, "configur": [8, 9, 13, 14], "mix": [8, 16], "formatt": 8, "artifact": 8, "analyz": [8, 9, 10, 16], "realiti": [8, 12], "consciou": [8, 14], "screenshot": 8, "easiest": [8, 13], "jupyterhub": [8, 14], "provis": 8, "authent": [8, 14], "gain": [8, 10], "instructor": [8, 9], "refer": 8, "independ": [8, 9, 15], "entireti": 8, "activ": [8, 10], "cursor": 8, "rectangl": [8, 15], "toolbar": [8, 10], "keyboard": [8, 14], "enter": [8, 10, 13, 14, 15], "arrow": [8, 14], "restart": [8, 13], "bar": [8, 10, 12, 13], "slight": [8, 11], "session": [8, 13, 14], "delet": [8, 13, 14], "emul": 8, "window": [8, 10], "statu": 8, "idl": 8, "busi": 8, "excess": 8, "unrespons": 8, "lose": 8, "connect": [8, 10, 12, 13, 14, 15], "interrupt": 8, "paus": 8, "server": [8, 10, 14], "hub": 8, "panel": 8, "shut": [8, 13], "rich": [8, 14], "bold": 8, "italic": 8, "bullet": [8, 10], "eventu": [8, 10, 15], "unformat": 8, "unrend": 8, "box": [8, 11, 12, 13, 14], "progress": [8, 13], "autosav": 8, "disk": [8, 10], "icon": [8, 10, 13, 14], "mac": 8, "arbitrari": [8, 15], "downsid": [8, 13], "nonlinear": [8, 12, 15], "deliber": [8, 14], "referenc": 8, "unconvent": 8, "fail": 8, "nonfunct": 8, "scenario": [8, 10], "event": [8, 14], "guard": 8, "awar": [8, 14], "sooner": 8, "linearli": [8, 12], "suffici": [8, 15], "extern": [8, 14], "heavili": 8, "loc": [8, 15], "package_nam": 8, "pn": 8, "librari": [8, 15], "hidden": [8, 10], "ipynb": [8, 10, 14], "shareabl": 8, "firefox": 8, "safari": 8, "chrome": 8, "edg": 8, "adob": 8, "acrobat": 8, "benefit": [8, 10, 14, 16], "standalon": 8, "font": [8, 10, 15], "launcher": 8, "visibl": [8, 14, 15], "untitl": 8, "white": 8, "troublesom": [8, 14], "repetit": 8, "dash": [8, 15], "jupyterlab": 8, "keen": 8, "commonmark": 8, "cheatsheet": 8, "audit": 9, "friend": 9, "colleagu": 9, "histori": [9, 14], "chapter": 9, "spend": [9, 10, 11, 16], "restructur": 9, "usabl": 9, "coher": 9, "variou": [10, 13, 16], "laptop": [10, 14], "gatewai": 10, "unless": [10, 13, 15], "upfront": [10, 16], "devot": 10, "shoelac": 10, "trip": 10, "skiprow": 10, "ibi": 10, "list_tabl": 10, "to_csv": 10, "astronomi": 10, "pictur": [10, 15], "request": [10, 16], "internet": [10, 13], "remot": 10, "directori": [10, 13, 14, 15], "filesystem": 10, "folder": [10, 13, 14], "worksheet_02": 10, "happiness_report": 10, "slash": [10, 16], "proce": [10, 13, 14, 16], "happy_data": 10, "bike_shar": 10, "tutorial_01": 10, "silli": [10, 12], "redund": [10, 15], "whew": 10, "bonu": 10, "fatima": 10, "jayden": 10, "usernam": [10, 14], "link": [10, 13, 14], "video": [10, 13], "omma": 10, "epar": 10, "v": [10, 13], "alu": 10, "aren": [10, 15, 16], "canadian": [10, 16], "canlang_data": 10, "oftentim": [10, 16], "sentenc": 10, "paragraph": [10, 15], "scientist": 10, "distribut": [10, 14, 15], "permiss": [10, 14], "21930": 10, "parsererror": 10, "messag": [10, 13, 14, 15, 16], "wasn": [10, 15], "can_lang_meta": 10, "token": 10, "didn": [10, 16], "tsv": 10, "escap": 10, "backslash": 10, "can_lang_no_nam": 10, "curli": [10, 16], "brace": 10, "col_map": 10, "canlang_data_renam": 10, "immedi": [10, 12], "u": [10, 13, 15], "niform": 10, "esourc": 10, "ocat": 10, "raw": [10, 13, 15, 16], "githubusercont": [10, 13], "datasci": 10, "whichev": 10, "xlsx": 10, "snippet": [10, 14], "_rel": 10, "j1": 10, "w8": 10, "qrj": 10, "tf": 10, "wz": 10, "hlio": 10, "8f": 10, "3wn": 10, "ed2": 10, "gz": 10, "_r": 10, "yg": 10, "tuee": 10, "6q": 10, "rzy": 10, "l60": 10, "xtp": 10, "4vt": 10, "jq": 10, "sheet_nam": 10, "sad": 10, "usecol": 10, "beforehand": 10, "libr": 10, "offic": 10, "semicolon": 10, "decim": [10, 15, 16], "european": 10, "countri": 10, "storag": 10, "user": [10, 13, 14], "manag": [10, 13, 14], "mysql": 10, "oracl": 10, "sql": 10, "simplest": [10, 15], "db": 10, "backend": 10, "send": [10, 14], "sqlalchemi": 10, "matur": 10, "deeper": 10, "friendlier": 10, "conn": 10, "retriev": [10, 11, 14, 16], "secretli": 10, "behind": [10, 14, 15], "scene": [10, 14], "canlang_t": 10, "databaset": 10, "r0": 10, "countstar": 10, "haven": [10, 13], "sent": [10, 14], "effici": [10, 12, 14, 15], "lazi": 10, "compil": 10, "str": 10, "AS": 10, "nfrom": 10, "t0": 10, "arab": 10, "419890": 10, "223535": 10, "5585": 10, "629055": 10, "mostli": [10, 14, 15, 16], "canlang_table_filt": 10, "predic": 10, "canlang_table_select": 10, "r1": 10, "aboriginal_lang_data": 10, "attributeerror": 10, "traceback": 10, "recent": [10, 13, 14], "conda": [10, 13], "lib": 10, "python3": 10, "expr": 10, "py": [10, 13, 16], "645": 10, "__getattr__": 10, "641": 10, "hint": 10, "common_typo": 10, "642": [10, 12], "rais": [10, 15], "643": 10, "__name__": 10, "644": 10, "tahltan": 10, "crash": 10, "postgr": 10, "client": [10, 11], "host": [10, 13, 14], "localhost": 10, "port": [10, 13], "endpoint": 10, "5432": 10, "password": [10, 14], "can_mov_db": 10, "movi": 10, "fakeserv": 10, "stat": 10, "user0001": 10, "abc123": 10, "theme": [10, 15], "medium": [10, 14], "title_alias": 10, "episod": 10, "names_occup": 10, "occup": 10, "rate": 10, "ratings_t": 10, "alchemyt": 10, "average_r": 10, "num_vot": 10, "avg_rat": 10, "order_bi": 10, "backup": 10, "integr": 10, "secur": [10, 14], "simultan": [10, 14, 16], "conflict": 10, "billion": 10, "daili": 10, "chao": 10, "ensu": 10, "no_official_lang_data": 10, "no_official_languag": 10, "magic": 10, "uncommon": 10, "pplicat": 10, "rogram": 10, "nterfac": 10, "secret": [10, 14], "somewhat": [10, 12], "thought": [10, 12, 16], "painstak": 10, "gather": [10, 15], "yper": 10, "ext": 10, "arkup": 10, "anguag": 10, "ascad": 10, "tyle": 10, "heet": 10, "webpag": [10, 14], "wherea": [10, 12, 16], "element": [10, 15, 16], "layout": [10, 15], "subsect": 10, "richardson": 10, "2007": 10, "reitz": 10, "2023": 10, "foot": [10, 11, 12], "craiglist": 10, "craigslist": 10, "advertis": [10, 11, 12], "span": 10, "meta": 10, "hous": [10, 11, 12], "1br": 10, "hood": 10, "13768": 10, "108th": 10, "avenu": 10, "maptag": 10, "pid": 10, "6786042973": 10, "banish": 10, "trash": [10, 13], "hide": [10, 15], "post": [10, 14], "unbanish": 10, "href": 10, "restor": 10, "2285": 10, "oof": 10, "date": [10, 14, 15], "keyword": [10, 16], "grab": 10, "complex": [10, 12, 14, 15], "selectorgadget": 10, "cc": 10, "deselect": 10, "pic": 10, "footag": 10, "gadget": 10, "robot": 10, "txt": [10, 14], "cl": 10, "spider": 10, "script": 10, "scraper": 10, "crawler": 10, "explicit": [10, 16], "realist": 10, "disallow": 10, "td": 10, "nth": 10, "child": [10, 12], "largestc": 10, "target": 10, "bs4": 10, "wiki": 10, "en": 10, "parser": 10, "population_nod": 10, "slice": [10, 15, 16], "clariti": [10, 15], "greater_toronto_area": 10, "202": 10, "london": [10, 16], "_ontario": 10, "ontario": 10, "543": 10, "551": 10, "greater_montr": 10, "montreal": [10, 16], "node": 10, "rid": 10, "get_text": 10, "fantast": 10, "albeit": 10, "canada_wiki_t": 10, "metropolitan": [10, 16], "droplevel": 10, "canada_wiki_df": 10, "rank": 10, "unnam": 10, "8_level_1": 10, "9_level_1": 10, "6202225": 10, "543551": 10, "quebec": 10, "4291732": 10, "halifax": [10, 16], "nova": 10, "scotia": 10, "465703": 10, "2642825": 10, "st": [10, 16], "catharin": [10, 16], "niagara": [10, 16], "433604": 10, "ottawa": [10, 16], "gatineau": [10, 16], "1488307": 10, "windsor": [10, 16], "422630": 10, "calgari": [10, 16], "1481806": 10, "oshawa": 10, "415311": 10, "edmonton": [10, 16], "1418118": 10, "victoria": [10, 15, 16], "397237": 10, "839311": 10, "saskatoon": 10, "saskatchewan": 10, "317480": 10, "winnipeg": [10, 16], "manitoba": 10, "834678": 10, "regina": [10, 16], "249217": 10, "hamilton": 10, "785184": 10, "sherbrook": 10, "227398": 10, "kitchen": [10, 16], "cambridg": [10, 16], "waterloo": [10, 16], "575847": 10, "kelowna": [10, 16], "222162": 10, "desktop": 10, "stun": 10, "rho": 10, "ophiuchi": 10, "juli": 10, "webb": 10, "telescop": 10, "nircam": 10, "molecular": [10, 15], "signup": 10, "safe": [10, 14], "transfer": [10, 11], "infinit": 10, "bandwidth": 10, "frequent": [10, 14], "success": [10, 14], "bog": 10, "revok": 10, "grant": 10, "quota": 10, "overrun": 10, "abid": 10, "hourli": 10, "hour": [10, 11], "planetari": 10, "apod": 10, "api_kei": 10, "your_api_kei": 10, "07": [10, 15], "explan": [10, 15], "mere": 10, "390": 10, "light": 10, "sun": [10, 15], "star": 10, "planet": 10, "peer": 10, "natal": 10, "infrar": 10, "spectacular": 10, "cosmic": 10, "snapshot": [10, 13, 14], "celebr": 10, "young": 10, "brighter": 10, "clearli": [10, 15], "sport": 10, "diffract": 10, "spike": 10, "jet": 10, "shock": 10, "hydrogen": 10, "blast": 10, "newborn": 10, "yellowish": 10, "dusti": 10, "caviti": 10, "carv": 10, "energet": 10, "Near": 10, "shadow": 10, "cast": 10, "protoplanetari": 10, "hdurl": 10, "2307": 10, "stsci": 10, "01_rhooph": 10, "png": [10, 15], "media_typ": 10, "service_vers": 10, "v1": 10, "01_rhooph1024": 10, "neat": 10, "json": 10, "javascript": 10, "notat": [10, 16], "nasa_data_singl": 10, "start_dat": 10, "end_dat": 10, "nasa_data": 10, "74": [10, 15], "copyright": 10, "data_dict": 10, "nasa_df": 10, "carina": 10, "nebula": 10, "ncarlo": 10, "taylor": 10, "2305": 10, "carnorth": 10, "02": [10, 15], "flat": [10, 11, 12, 15], "rock": 10, "mar": 10, "nnasa": 10, "njpl": 10, "caltech": 10, "nmsss": 10, "nprocess": 10, "ne": 10, "flatmar": 10, "03": [10, 15, 16], "centauru": 10, "peculiar": 10, "island": 10, "nmarco": 10, "lorenzi": 10, "nangu": 10, "lau": 10, "tommi": 10, "tse": 10, "ntex": 10, "ngc5128_": 10, "galaxi": 10, "famou": 10, "hole": 10, "pia23122": 10, "shackleton": 10, "shadowcam": 10, "shacklet": 10, "69": 10, "doom": 10, "eta": 10, "nesa": 10, "nhubbl": 10, "nlice": 10, "etacarin": 10, "dust": 10, "ngc": 10, "6559": 10, "nadam": 10, "ntelescop": 10, "ngc6559_": 10, "sunspot": 10, "spot": 10, "72": 10, "ring": 10, "spiral": 10, "1398": 10, "ngc1398_": 10, "73": [10, 15], "readili": 10, "heart": 10, "awesom": 10, "udac": 10, "linux": [10, 13], "rthepsfoundation23": 10, "kenneth": 10, "readthedoc": 10, "latest": [10, 13, 14, 16], "ric07": 10, "leonard": 10, "beauti": 10, "soup": 10, "april": [10, 15], "nasaesacsa": 10, "esa": 10, "csa": 10, "pontoppidan": 10, "pagan": 10, "esawebb": 10, "weic2316a": 10, "realtsproject21": 10, "internetlivestat": 10, "faster": [11, 15], "rmspe": [11, 12], "person": [11, 12, 15], "week": 11, "annual": 11, "boston": 11, "marathon": 11, "sale": [11, 12], "spline": 11, "heurist": 11, "932": 11, "estat": [11, 12], "sacramento": [11, 12], "bee": 11, "newspap": 11, "realtor": 11, "zip": [11, 13, 14], "sqft": [11, 12], "latitud": 11, "longitud": 11, "z95838": 11, "836": [11, 16], "59222": 11, "631913": 11, "434879": 11, "z95823": 11, "1167": 11, "68212": 11, "478902": 11, "431028": 11, "z95815": 11, "796": 11, "68880": 11, "618305": 11, "443839": 11, "852": 11, "69307": 11, "616835": 11, "439146": 11, "z95824": 11, "797": 11, "81900": 11, "519470": 11, "435768": 11, "927": 11, "z95829": 11, "2280": 11, "232425": 11, "457679": 11, "359620": 11, "928": [11, 16], "1477": 11, "234000": 11, "499893": 11, "458890": 11, "929": 11, "citrus_height": 11, "z95610": 11, "1216": 11, "235000": 11, "708824": 11, "256803": 11, "930": [11, 15], "elk_grov": 11, "z95758": 11, "1685": 11, "235301": 11, "417000": 11, "397424": 11, "931": 11, "el_dorado_hil": 11, "z95762": 11, "1362": 11, "235738": 11, "655245": 11, "075915": 11, "livabl": 11, "feet": [11, 12], "usd": [11, 12], "unit": [11, 12, 15, 16], "front": [11, 15], "0f": [11, 12], "sold": [11, 12], "former": 11, "dive": 11, "subsampl": 11, "small_sacramento": 11, "pai": 11, "absent": 11, "small_plot": 11, "overlai": 11, "line_df": 11, "2000": 11, "mark_rul": [11, 15], "strokedash": [11, 15], "dist": 11, "nearest_neighbor": 11, "298": 11, "1900": 11, "361745": 11, "487409": 11, "461413": 11, "718": 11, "antelop": 11, "z95843": 11, "2160": 11, "290000": 11, "704554": 11, "354753": 11, "rosevil": 11, "z95678": 11, "1744": 11, "326951": 11, "771917": 11, "304439": 11, "256": 11, "252": 11, "z95835": 11, "1718": 11, "250000": 11, "676658": 11, "528128": 11, "282": 11, "rancho_cordova": 11, "z95670": 11, "1671": 11, "175000": 11, "591477": 11, "315340": 11, "329": 11, "280739": 11, "280": [11, 15, 16], "739": 11, "unansw": 11, "abil": [11, 14, 15, 16], "lock": [11, 12], "sacramento_train": [11, 12], "sacramento_test": [11, 12], "limits_": 11, "y_i": 11, "hat": 11, "_i": 11, "th": 11, "forecast": 11, "overshoot": 11, "undershoot": 11, "rmse": [11, 12], "equat": [11, 12], "kneighborsregressor": [11, 12], "neg_root_mean_squared_error": 11, "kneighborsregressor__n_neighbor": 11, "sacr_pipelin": 11, "sacr_preprocessor": 11, "201": 11, "sacr_gridsearch": 11, "sacr_result": 11, "param_kneighborsregressor__n_neighbor": 11, "117365": 11, "988307": 11, "2715": 11, "383001": 11, "93956": 11, "523683": 11, "2466": 11, "200227": 11, "89859": 11, "401722": 11, "2739": 11, "713448": 11, "87893": 11, "534919": 11, "2958": 11, "587153": 11, "86444": 11, "413831": 11, "3383": 11, "712997": 11, "92909": 11, "550051": 11, "2562": 11, "784826": 11, "93137": 11, "289780": 11, "2511": 11, "564001": 11, "93395": 11, "588763": 11, "2492": 11, "272799": 11, "93671": 11, "588088": 11, "2473": 11, "312705": 11, "199": 11, "93986": 11, "752272": 11, "048651": 11, "moment": [11, 16], "nonneg": 11, "neg_": 11, "convolut": 11, "alright": [11, 15], "101": [11, 16], "minimum": [11, 12, 16], "699": 11, "perfectli": [11, 14, 15], "datapoint": 11, "inflex": 11, "idiosyncrat": 11, "unseen": [11, 12], "mean_squared_error": [11, 12], "87498": 11, "86808211416": 11, "499": 11, "578": 11, "neglig": 11, "buyer": 11, "afford": 11, "maximum": [11, 12, 16], "5000": 11, "superimpos": [11, 12], "qualit": [11, 12], "opportun": 11, "sqft_prediction_grid": [11, 12], "arang": 11, "base_plot": 11, "sacr_preds_plot": [11, 12], "best_k_sacr": 11, "ff7f0e": [11, 12], "concern": [11, 12], "incorpor": [11, 16], "plot_b": 11, "moreov": 11, "85156": 11, "027067": 11, "3376": 11, "143313": 11, "rmspe_mult": 11, "85083": 11, "2902421959": 11, "083": 11, "overlaid": [11, 12], "2d": 11, "newli": [11, 14], "character": 12, "conclud": 12, "train": 12, "slower": 12, "confusingli": 12, "undervalu": 12, "beta_0": 12, "beta_1": 12, "cdot": 12, "intercept": [12, 15], "coeffici": 12, "parametr": 12, "push": 12, "happili": 12, "crazi": 12, "shouldn": 12, "600": [12, 15], "276": 12, "027": 12, "plausibl": 12, "linearregress": 12, "linear_model": 12, "coef_": 12, "intercept_": 12, "lm": 12, "285652": 12, "15642": 12, "309105": 12, "hurt": 12, "afterward": [12, 16], "85376": 12, "59691629931": 12, "377": [12, 15], "tricki": [12, 13], "all_point": 12, "wiggli": 12, "curv": [12, 15], "oscil": [12, 15], "Such": 12, "fare": 12, "extrapol": 12, "obvious": 12, "mlm": 12, "linearregressionlinearregress": 12, "lm_mult_test_rmsp": 12, "82331": 12, "04630202598": 12, "331": 12, "hallmark": 12, "59235377": 12, "20333": 12, "43213798": 12, "53180": 12, "26906624224": 12, "beta_2": 12, "hyperplan": 12, "333": [12, 15], "tune": [12, 15], "collinear": 12, "judg": 12, "unbeknownst": 12, "analyst": 12, "parent": 12, "absurdli": 12, "nevertheless": [12, 15], "subtl": [12, 16], "inaccur": 12, "ever": [12, 14, 16], "238": 12, "ft": 12, "041": 12, "166": 12, "539": 12, "ic": 12, "cream": 12, "flavor": [12, 15], "remark": 12, "homeown": 12, "df": [12, 16], "fulli": [12, 15], "5994": 12, "288853": 12, "1688": 12, "092090": 12, "9859": 12, "021194": 12, "9160": 12, "812375": 12, "6400": 12, "212624": 12, "7341": 12, "333609": 12, "8434": 12, "656970": 12, "3329": 12, "106273": 12, "7170": 12, "311442": 12, "7895": 12, "567003": 12, "cubic": 12, "z": 12, "strong": [12, 15], "magnitud": [12, 15], "leap": 12, "stone": 12, "enjoi": 12, "ventura": 13, "22": [13, 14, 15], "cpu": 13, "english": [13, 15, 16], "virtual": 13, "git": [13, 14], "rightmost": 13, "compress": [13, 15], "unzip": 13, "autograd": 13, "pre": 13, "isol": 13, "interf": 13, "ex": 13, "wizard": 13, "wsl": 13, "hyper": 13, "prompt": [13, 14], "cmd": 13, "admin": 13, "administr": 13, "log": [13, 14, 15], "bio": 13, "hotkei": 13, "esc": 13, "reboot": 13, "familiar": 13, "ubcdsci": 13, "proceed": [13, 16], "dockerfil": 13, "besid": [13, 14], "expand": [13, 14, 16], "textbox": 13, "8888": 13, "volum": 13, "path": [13, 15, 16], "jovyan": 13, "scroll": [13, 14], "127": 13, "troubleshoot": 13, "tip": 13, "dmg": 13, "intel": 13, "processor": 13, "older": 13, "appl": 13, "newer": 13, "drag": [13, 14], "sudo": 13, "certif": 13, "curl": 13, "gnupg": 13, "fssl": 13, "sh": 13, "chmod": 13, "rm": 13, "pwd": 13, "homepag": 13, "bundl": 13, "kernel": 13, "pip": 13, "upgrad": 13, "env": 13, "intro": 13, "yml": 13, "compat": 13, "xcode": 13, "x64": 13, "arm64": 13, "debian": 13, "deb": 13, "dpkg": 13, "jlab": 13, "me": 14, "ago": 14, "holder": 14, "lifespan": 14, "resolv": 14, "revis": 14, "mess": [14, 15], "repercuss": 14, "boggl": 14, "unclear": 14, "document_final_draft_fin": 14, "to_hand_in_final_v2": 14, "polish": 14, "lack": 14, "springboard": 14, "fruit": 14, "revert": 14, "Being": 14, "facilit": 14, "todai": [14, 15], "safeti": 14, "workspac": 14, "schemat": 14, "maintain": 14, "told": 14, "metadata": 14, "brief": 14, "narr": 14, "readm": 14, "md": 14, "draft": 14, "shorten": 14, "daa29d6": 14, "884c7ce": 14, "prerequisit": 14, "stage": 14, "physic": [14, 15], "placehold": 14, "synchron": 14, "eas": 14, "templat": 14, "canadian_languag": 14, "hyphen": 14, "privaci": 14, "happi": 14, "green": [14, 16], "respositori": 14, "reserv": 14, "upload": [14, 15], "toggl": 14, "markdown": 14, "archiv": 14, "defeat": 14, "prove": 14, "beginn": 14, "grain": 14, "expiri": 14, "creation": 14, "absolut": [14, 15], "tick": [14, 15], "repo": 14, "fret": 14, "eda": 14, "flag": 14, "pane": 14, "plu": 14, "untrack": 14, "checkpoint": 14, "state": [14, 15], "datetim": [14, 15], "stamp": 14, "ok": 14, "credenti": 14, "author": 14, "33": [14, 15, 16], "dismiss": 14, "invit": 14, "collaborators_github_user_nam": 14, "refresh": 14, "blend": [14, 15], "offend": 14, "preced": 14, "histor": 14, "float": [14, 16], "app": 14, "convers": [14, 15, 16], "subtop": 14, "persist": 14, "thread": 14, "searchabl": 14, "notif": 14, "repli": 14, "submit": [14, 15], "submiss": 14, "youtub": 14, "advic": 14, "gitlab": 14, "bitbucket": 14, "wbc": 14, "jennif": 14, "bryan": 14, "karen": 14, "cranston": 14, "justin": 14, "kitz": 14, "lex": 14, "nederbragt": 14, "traci": 14, "teal": 14, "subplot": 15, "raster": 15, "svg": 15, "distract": 15, "poster": 15, "wilk": 15, "oft": 15, "pie": 15, "static": 15, "math": 15, "cognit": 15, "mental": 15, "plainli": 15, "legend": 15, "scheme": 15, "surprisingli": 15, "sex": 15, "ancestri": 15, "deeb": 15, "2005": 15, "blind": 15, "reinforc": 15, "sparingli": 15, "detract": 15, "wari": 15, "overplot": 15, "overlap": 15, "zoom": 15, "vegafus": 15, "data_transform": 15, "curat": 15, "pieter": 15, "tan": 15, "noaa": 15, "gml": 15, "ralph": 15, "keel": 15, "scripp": 15, "oceanographi": 15, "dioxid": 15, "hawaii": 15, "1959": 15, "1980": 15, "co2_df": 15, "mauna_loa_data": 15, "parse_d": 15, "date_measur": 15, "ppm": 15, "338": 15, "340": 15, "341": 15, "06": [15, 16], "479": 15, "414": 15, "480": 15, "481": 15, "416": 15, "482": [15, 16], "483": 15, "484": 15, "datetime64": 15, "ns": 15, "iso": 15, "8601": 15, "alphanumer": 15, "mark_": 15, "leverag": 15, "helper": 15, "co2_scatt": 15, "upward": 15, "affirm": 15, "predecessor": 15, "successor": 15, "alter": 15, "segment": 15, "emphas": 15, "co2_lin": 15, "aha": 15, "phenomenon": 15, "fast": 15, "muddl": 15, "settl": 15, "configure_axi": 15, "titlefonts": 15, "co2_line_label": 15, "co2": 15, "configure_": 15, "1990": 15, "clip": 15, "stack": [15, 16], "co2_line_scal": 15, "late": 15, "season": 15, "summer": 15, "octob": 15, "winter": 15, "novemb": 15, "analog": 15, "paint": 15, "blank": 15, "canva": 15, "primer": 15, "akin": 15, "sketch": 15, "durat": 15, "geyser": 15, "yellowston": 15, "nation": 15, "wyom": 15, "79": 15, "283": 15, "533": 15, "267": 15, "117": [15, 16], "268": [15, 16], "270": 15, "817": 15, "271": 15, "467": 15, "272": 15, "faithful_scatt": 15, "faithful_scatter_label": 15, "faithful_scatter_labels_black": 15, "whom": 15, "hollow": 15, "can_lang_plot": 15, "vs": 15, "can_lang_plot_label": 15, "bunch": 15, "clump": 15, "french": [15, 16], "460": 15, "850": 15, "19460850": 15, "22162865": 15, "15265335": 15, "29748265": 15, "59": [15, 16], "7166700": 15, "6943800": 15, "3825215": 15, "10242945": 15, "logarithm": 15, "squish": 15, "log_": 15, "log10": 15, "inf": 15, "can_lang_plot_log": 15, "gridlin": 15, "seven": 15, "can_lang_plot_log_revis": 15, "tickcount": 15, "kilo": 15, "mutat": 15, "most_at_home_perc": 15, "001678": 15, "000669": 15, "029188": 15, "013612": 15, "003272": 15, "001266": 15, "038291": 15, "017026": 15, "076511": 15, "037367": 15, "011351": 15, "003940": 15, "005234": 15, "002276": 15, "036741": 15, "021763": 15, "038561": 15, "020155": 15, "025831": 15, "007439": 15, "can_lang_plot_perc": 15, "meaningfulli": 15, "onto": 15, "belong": [15, 16], "can_lang_plot_categori": 15, "laid": 15, "can_lang_plot_legend": 15, "orient": 15, "tableau10": 15, "vision": 15, "unsur": 15, "dark2": 15, "aesthet": 15, "switch": 15, "can_lang_plot_them": 15, "demand": 15, "tooltip": 15, "hover": 15, "mous": 15, "pointer": 15, "can_lang_plot_tooltip": 15, "mile": 15, "mcneil": 15, "contin": 15, "south": 15, "africa": 15, "europ": 15, "asia": 15, "australia": 15, "islands_df": 15, "landmass_typ": 15, "11506": 15, "5500": 15, "16988": 15, "2968": 15, "axel": 15, "heiberg": 15, "baffin": 15, "184": 15, "bank": 15, "borneo": 15, "britain": 15, "celeb": 15, "celon": 15, "cuba": 15, "devon": 15, "ellesmer": 15, "3745": 15, "greenland": 15, "840": 15, "hainan": 15, "hispaniola": 15, "hokkaido": 15, "honshu": 15, "iceland": 15, "ireland": 15, "java": 15, "kyushu": 15, "luzon": 15, "madagascar": 15, "227": 15, "melvil": 15, "mindanao": 15, "molucca": 15, "guinea": 15, "306": 15, "zealand": 15, "newfoundland": 15, "9390": 15, "novaya": 15, "zemlya": 15, "princ": 15, "wale": 15, "sakhalin": 15, "6795": 15, "southampton": 15, "spitsbergen": 15, "sumatra": 15, "183": 15, "taiwan": 15, "tasmania": 15, "tierra": 15, "fuego": 15, "timor": 15, "islands_bar": 15, "nlargest": 15, "tilt": 15, "sort_valu": 15, "islands_top12": 15, "islands_bar_top": 15, "appeal": 15, "minu": 15, "revers": 15, "caption": 15, "slide": 15, "summari": 15, "twelv": 15, "islands_plot_sort": 15, "morlei": 15, "1882": 15, "299": 15, "792": 15, "458": 15, "km": 15, "sec": 15, "kilometr": 15, "morley_df": 15, "expt": 15, "740": 15, "900": 15, "1070": [15, 16], "940": 15, "950": 15, "810": 15, "870": 15, "experiment": 15, "fell": 15, "morley_bar": 15, "thin": 15, "bucket": 15, "morley_hist": 15, "datum": 15, "thick": 15, "v_line": 15, "morley_hist_lin": 15, "morley_hist_color": 15, "sit": 15, "transluc": 15, "morley_hist_categor": 15, "deriv": 15, "incorrect": 15, "clearest": 15, "morley_hist_facet": 15, "1050": 15, "foremost": 15, "subtli": 15, "speed_of_light": 15, "299792": 15, "relativeerror": 15, "299000": 15, "019194": 15, "017498": 15, "035872": 15, "092578": 15, "045879": 15, "049215": 15, "052550": 15, "002516": 15, "005851": 15, "025865": 15, "morley_hist_rel": 15, "recreat": 15, "admir": 15, "morley_hist_maxbin": 15, "width": 15, "motiv": 15, "establish": 15, "pose": 15, "wiggl": 15, "discern": 15, "parenthes": [15, 16], "energi": 15, "automot": 15, "plant": 15, "burn": [15, 16], "fossil": 15, "fuel": 15, "greenhous": 15, "gase": 15, "byproduct": 15, "trap": 15, "heat": 15, "warm": 15, "observatori": 15, "amplitud": 15, "growth": 15, "1800": 15, "kilomet": 15, "farthest": 15, "confer": 15, "shop": 15, "billboard": 15, "pixel": 15, "lossi": 15, "lossless": 15, "jpeg": 15, "jpg": 15, "photograph": 15, "bmp": 15, "tiff": 15, "tif": 15, "gimp": 15, "redraw": 15, "ep": 15, "inkscap": 15, "shrink": 15, "portabl": 15, "hardl": 15, "1991": 15, "filenam": 15, "img": 15, "viz": 15, "faithful_plot": 15, "mb": 15, "decent": 15, "bigger": 15, "dee05": 15, "sameer": 15, "clinic": 15, "369": 15, "har91": 15, "wolfgang": 15, "york": 15, "mcn77": 15, "donald": 15, "mic82": 15, "veloc": 15, "nite": 15, "tate": 15, "aval": 15, "cademi": 15, "nnapoli": 15, "astronom": 15, "tk20": 15, "ccgg": 15, "vgh": 15, "jacob": 15, "granger": 15, "heer": 15, "dominik": 15, "moritz": 15, "kanit": 15, "wongsuphasawat": 15, "arvind": 15, "satyanarayan": 15, "eitan": 15, "ilia": 15, "timofeev": 15, "ben": 15, "welsh": 15, "scott": 15, "sievert": 15, "journal": [15, 16], "1057": 15, "21105": 15, "joss": 15, "01057": 15, "wil19": 15, "clau": 15, "clauswilk": 15, "dataviz": 15, "util": 16, "entiti": 16, "tabular": 16, "2235145": 16, "abbrevi": 16, "int": 16, "14159": 16, "boolean": 16, "bool": 16, "hello": 16, "nonetyp": 16, "arithmet": 16, "dict": 16, "cities_seri": 16, "separt": 16, "population_in_2016": 16, "1027613": 16, "1823281": 16, "544870": 16, "571146": 16, "321484": 16, "upcom": 16, "population_in_2016_df": 16, "criteria": 16, "wickham": 16, "No": 16, "bespok": 16, "untidi": 16, "2006": 16, "2011": 16, "land": 16, "region_lang_top5_cities_wid": 16, "cite": 16, "montr\u00e9al": 16, "lang_wid": 16, "985": 16, "1435": 16, "960": 16, "575": 16, "360": 16, "240": 16, "8485": 16, "1015": 16, "705": 16, "885": 16, "13260": 16, "2450": 16, "1090": 16, "1365": 16, "770": 16, "2440": 16, "5290": 16, "1025": 16, "380": 16, "3355": 16, "8960": 16, "3380": 16, "1430": 16, "tough": 16, "lang_mother_tidi": 16, "id_var": 16, "var_nam": 16, "value_nam": 16, "1065": 16, "1066": 16, "1067": 16, "1068": 16, "1069": 16, "met": 16, "commut": 16, "widen": 16, "region_lang_top5_cities_long": 16, "lang_long": 16, "2135": 16, "2136": 16, "2137": 16, "2138": 16, "2139": 16, "2140": 16, "lang_home_tidi": 16, "2495": 16, "1622735": 16, "1330555": 16, "8630": 16, "3245": 16, "behaviour": 16, "colum": 16, "messier": 16, "dealt": 16, "lang_messi": 16, "region_lang_top5_cities_messi": 16, "265": 16, "520": 16, "505": 16, "4045": 16, "440": 16, "330": 16, "6380": 16, "1445": 16, "530": 16, "620": 16, "3130": 16, "760": 16, "6665": 16, "860": 16, "1080": 16, "lang_messy_long": 16, "tidy_lang": 16, "astyp": 16, "depth": 16, "occas": 16, "official_lang": 16, "3836770": 16, "3218725": 16, "29800": 16, "11940": 16, "620510": 16, "412120": 16, "2669195": 16, "1607550": 16, "487": 16, "696": 16, "1065070": 16, "844740": 16, "701": 16, "910": 16, "1050410": 16, "792700": 16, "915": 16, "10950": 16, "2520": 16, "1060": 16, "ampersand": 16, "pipe": 16, "region_data": 16, "household": 16, "dwell": 16, "bellevil": 16, "43002": 16, "1354": 16, "65121": 16, "103472": 16, "45050": 16, "lethbridg": 16, "45696": 16, "3046": 16, "69699": 16, "117394": 16, "48317": 16, "thunder": 16, "bai": 16, "52545": 16, "2618": 16, "26318": 16, "121621": 16, "57146": 16, "peterborough": 16, "50533": 16, "1636": 16, "98336": 16, "121721": 16, "55662": 16, "saint": 16, "john": 16, "52872": 16, "3793": 16, "42158": 16, "126202": 16, "58398": 16, "535499": 16, "7168": 16, "96442": 16, "1323783": 16, "519693": 16, "5241": 16, "70103": 16, "1392609": 16, "960894": 16, "3040": 16, "41532": 16, "2463431": 16, "1727310": 16, "4638": 16, "24059": 16, "4098927": 16, "2135909": 16, "6269": 16, "93132": 16, "5928040": 16, "interst": 16, "city_nam": 16, "five_c": 16, "502143": 16, "9857": 16, "77908": 16, "1321426": 16, "537634": 16, "seriesa": 16, "seriesb": 16, "669": 16, "capabl": 16, "omit": 16, "startswith": 16, "darker": 16, "region_lang": 16, "moncton": 16, "saguenai": 16, "7485": 16, "7486": 16, "7487": 16, "abbotsford": 16, "mission": 16, "7488": 16, "7489": 16, "7490": 16, "23171710": 16, "std": 16, "490000e": 16, "093686e": 16, "401258e": 16, "000000e": 16, "836770e": 16, "25th": 16, "50th": 16, "75th": 16, "skipna": 16, "3061820": 16, "5600480": 16, "numeric_onli": 16, "3200": 16, "341121": 16, "3093": 16, "686248": 16, "1853": 16, "757677": 16, "5127": 16, "499332": 16, "55231": 16, "640268": 16, "64012": 16, "578320": 16, "48574": 16, "532066": 16, "94001": 16, "162338": 16, "cartoon": 16, "dataframegroupbi": 16, "0x7ff9d6ae8950": 16, "137445": 16, "182390": 16, "97840": 16, "brantford": 16, "124560": 16, "troi": 16, "rivi\u00e8r": 16, "149835": 16, "331375": 16, "270715": 16, "612595": 16, "23015": 16, "875": 16, "8235": 16, "2695": 16, "102": 16, "365": 16, "23565": 16, "104": 16, "11185": 16, "122100": 16, "93495": 16, "167835": 16, "168990": 16, "115125": 16, "193445": 16, "93655": 16, "54150": 16, "100855": 16, "116645": 16, "73910": 16, "130835": 16, "937055": 16, "1343335": 16, "147805": 16, "78610": 16, "149805": 16, "1316635": 16, "2289515": 16, "302690": 16, "211705": 16, "354470": 16, "235990": 16, "166220": 16, "318540": 16, "530570": 16, "437460": 16, "749285": 16, "keyerror": 16, "qu\u00e9bec": 16, "028571": 16, "region_lang_num": 16, "wise": 16, "040": 16, "aforement": 16, "english_lang": 16, "1898": 16, "444955": 16, "2500590": 16, "1903": 16, "1918": 16, "1919": 16, "930405": 16, "1275265": 16, "1923": 16, "city_pop": 16, "unchang": 16, "tmp": 16, "ipykernel_12": 16, "2654974267": 16, "settingwithcopywarn": 16, "row_index": 16, "col_index": 16, "pydata": 16, "doc": 16, "stabl": 16, "user_guid": 16, "warn": 16, "went": 16, "silenc": 16, "div": 16, "divis": 16, "108554": 16, "151384": 16, "100543": 16, "610060": 16, "516498": 16, "647224": 16, "542966": 16, "944744": 16, "672877": 16, "764802": 16, "606588": 16, "964617": 16, "704092": 16, "794906": 16, "599882": 16, "965067": 16, "534472": 16, "658730": 16, "540123": 16, "929401": 16, "city_popul": 16, "wic14": 16, "hadlei": 16}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"acknowledg": 0, "python": [0, 3, 4, 5, 6, 7, 8, 10, 12, 16], "edit": [0, 5, 8, 14], "about": 1, "author": 1, "classif": [2, 3], "i": [2, 11, 14], "train": [2, 3, 11], "predict": [2, 3], "overview": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "chapter": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "learn": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "object": [2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], "The": [2, 3, 4, 8, 11, 12], "problem": [2, 11], "explor": [2, 7, 8, 11], "data": [2, 3, 5, 7, 8, 10, 11, 15, 16], "set": [2, 3, 7, 11, 13, 15], "load": [2, 7], "cancer": 2, "describ": 2, "variabl": [2, 3], "k": [2, 4, 11, 12], "nearest": [2, 11], "neighbor": [2, 11], "distanc": 2, "between": 2, "point": 2, "evalu": [2, 3, 11], "from": [2, 10, 14, 16], "new": [2, 8, 12], "observ": 2, "each": 2, "its": 2, "5": 2, "more": 2, "than": 2, "two": 2, "explanatori": 2, "summari": [2, 3, 6, 8, 10, 16], "algorithm": [2, 4], "scikit": [2, 3], "preprocess": [2, 3], "center": 2, "scale": 2, "balanc": 2, "miss": [2, 10], "put": [2, 7], "togeth": [2, 7], "pipelin": 2, "exercis": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "refer": [2, 3, 4, 6, 7, 10, 11, 12, 14, 15, 16], "ii": [3, 12], "tune": [3, 11], "perform": [3, 16], "an": [3, 4, 10, 15], "exampl": [3, 4], "confus": 3, "matrix": 3, "tumor": 3, "imag": 3, "random": [3, 4], "seed": 3, "creat": [3, 7, 8, 14, 15], "test": [3, 11], "split": [3, 16], "classifi": 3, "label": 3, "critic": 3, "analyz": 3, "cross": 3, "valid": 3, "paramet": 3, "valu": [3, 7, 10, 16], "select": [3, 7, 16], "under": 3, "overfit": [3, 11], "predictor": [3, 12], "effect": [3, 15], "irrelev": 3, "find": 3, "good": 3, "subset": [3, 7], "forward": 3, "addit": [3, 4, 6, 8, 10, 12, 14, 15, 16], "resourc": [3, 4, 6, 8, 10, 12, 14, 15, 16], "cluster": 4, "illustr": 4, "mean": [4, 6], "measur": 4, "qualiti": 4, "restart": 4, "choos": [4, 15], "scienc": 5, "A": 5, "first": 5, "introduct": 5, "welcom": 5, "statist": [6, 16], "infer": 6, "why": [6, 10, 14], "do": [6, 16], "we": [6, 10], "need": 6, "sampl": 6, "distribut": 6, "proport": 6, "bootstrap": 6, "us": [6, 7, 10, 14, 16], "calcul": [6, 16], "plausibl": 6, "rang": 6, "panda": 7, "canadian": [7, 15], "languag": [7, 15], "ask": 7, "question": 7, "type": [7, 16], "analysi": 7, "tabular": [7, 10], "name": [7, 10, 16], "thing": 7, "frame": [7, 16], "loc": [7, 16], "filter": [7, 16], "row": [7, 10, 16], "column": [7, 10, 16], "sort_valu": 7, "head": 7, "order": 7, "ad": [7, 15, 16], "modifi": [7, 16], "combin": [7, 8, 16], "step": 7, "chain": 7, "multilin": 7, "express": 7, "visual": [7, 15], "altair": [7, 15], "bar": [7, 15], "plot": [7, 15], "format": [7, 8, 15], "chart": [7, 15], "all": [7, 10], "access": [7, 8, 10, 14], "document": 7, "code": 8, "text": [8, 10, 15], "jupyt": [8, 14], "cell": 8, "execut": 8, "kernel": 8, "markdown": 8, "save": [8, 15], "your": [8, 13, 14], "work": [8, 13, 14], "best": 8, "practic": 8, "run": 8, "notebook": 8, "includ": 8, "packag": 8, "file": [8, 10, 14, 15], "export": 8, "differ": [8, 10, 15], "html": [8, 10], "pdf": 8, "prefac": 9, "read": 10, "local": [10, 14], "web": 10, "absolut": 10, "rel": 10, "path": 10, "plain": 10, "read_csv": 10, "comma": 10, "separ": [10, 16], "skip": 10, "when": [10, 15], "sep": 10, "argument": 10, "header": 10, "handl": [10, 14], "directli": 10, "url": 10, "preview": 10, "befor": 10, "microsoft": 10, "excel": 10, "read_excel": 10, "databas": 10, "sqlite": 10, "postgresql": 10, "should": [10, 14], "bother": 10, "write": 10, "csv": 10, "obtain": [10, 13], "scrape": 10, "css": 10, "selector": 10, "beautifulsoup": 10, "read_html": 10, "api": 10, "nasa": 10, "regress": [11, 12], "model": 11, "underfit": 11, "multivari": [11, 12], "nn": [11, 12], "strength": 11, "limit": 11, "linear": 12, "simpl": 12, "compar": 12, "multicollinear": 12, "outlier": 12, "design": 12, "other": 12, "side": 12, "up": [13, 16], "comput": 13, "worksheet": 13, "thi": [13, 16], "book": 13, "docker": 13, "window": 13, "maco": 13, "ubuntu": 13, "jupyterlab": 13, "desktop": 13, "collabor": 14, "version": 14, "control": 14, "what": [14, 16], "repositori": 14, "workflow": 14, "commit": 14, "chang": 14, "push": 14, "remot": 14, "pull": 14, "github": 14, "pen": 14, "tool": 14, "add": 14, "menu": 14, "gener": 14, "person": 14, "token": 14, "clone": 14, "specifi": 14, "make": 14, "give": 14, "project": 14, "merg": [14, 16], "conflict": 14, "commun": 14, "issu": 14, "refin": 15, "scatter": 15, "line": 15, "mauna": 15, "loa": 15, "co_": 15, "2": 15, "old": 15, "faith": 15, "erupt": 15, "time": 15, "axi": 15, "transform": 15, "color": 15, "island": 15, "landmass": 15, "histogram": 15, "michelson": 15, "speed": 15, "light": 15, "layer": 15, "binwidth": 15, "explain": 15, "size": 15, "clean": 16, "wrangl": 16, "seri": 16, "basic": 16, "doe": 16, "have": 16, "structur": 16, "tidi": 16, "go": 16, "wide": 16, "long": 16, "melt": 16, "pivot": 16, "str": 16, "deal": 16, "multipl": 16, "extract": 16, "certain": 16, "satisfi": 16, "condit": 16, "least": 16, "one": 16, "list": 16, "isin": 16, "abov": 16, "below": 16, "threshold": 16, "queri": 16, "iloc": 16, "posit": 16, "aggreg": 16, "individu": 16, "oper": 16, "group": 16, "groupbi": 16, "appli": 16, "function": 16, "across": 16}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinxcontrib.bibtex": 9, "sphinx": 56}}) \ No newline at end of file diff --git a/pull317/viz.html b/pull317/viz.html index e155a42c..fa5c28aa 100644 --- a/pull317/viz.html +++ b/pull317/viz.html @@ -733,23 +733,23 @@

4.5.1. Scatter plots and line plots: the
-
+

Fig. 4.2 Scatter plot of atmospheric concentration of CO\(_{2}\) over time.#

@@ -833,23 +833,23 @@

4.5.1. Scatter plots and line plots: the
-
+

Fig. 4.3 Line plot of atmospheric concentration of CO\(_{2}\) over time.#

@@ -929,23 +929,23 @@

4.5.1. Scatter plots and line plots: the
-
+

Fig. 4.4 Line plot of atmospheric concentration of CO\(_{2}\) over time with clearer axes and labels.#

@@ -1035,23 +1035,23 @@

4.5.1. Scatter plots and line plots: the
-
+

Fig. 4.5 Line plot of atmospheric concentration of CO\(_{2}\) from 1990 to 1995.#

@@ -1247,23 +1247,23 @@

4.5.2. Scatter plots: the Old Faithful e
-
+

Fig. 4.6 Scatter plot of waiting time and eruption time.#

@@ -1334,23 +1334,23 @@

4.5.2. Scatter plots: the Old Faithful e
-
+

Fig. 4.7 Scatter plot of waiting time and eruption time with clearer axes and labels.#

@@ -1415,23 +1415,23 @@

4.5.2. Scatter plots: the Old Faithful e
-
+

Fig. 4.8 Scatter plot of waiting time and eruption time with black points.#

@@ -1657,23 +1657,23 @@

4.5.3. Axis transformation and colored s
-
+

Fig. 4.9 Scatter plot of number of Canadians reporting a language as their mother tongue vs the primary language at home#

@@ -1751,23 +1751,23 @@

4.5.3. Axis transformation and colored s
-
+

Fig. 4.10 Scatter plot of number of Canadians reporting a language as their mother tongue vs the primary language at home with x and y labels.#

@@ -1925,23 +1925,23 @@

4.5.3. Axis transformation and colored s
-
+

Fig. 4.11 Scatter plot of number of Canadians reporting a language as their mother tongue vs the primary language at home with log-adjusted x and y axes.#

@@ -2021,23 +2021,23 @@

4.5.3. Axis transformation and colored s
-
+

Fig. 4.12 Scatter plot of number of Canadians reporting a language as their mother tongue vs the primary language at home with log-adjusted x and y axes. Only the major gridlines are shown. The suffix “k” indicates 1,000 (“kilo”), while the suffix “M” indicates 1,000,000 (“million”).#

@@ -2228,23 +2228,23 @@

4.5.3. Axis transformation and colored s
-
+

Fig. 4.13 Scatter plot of percentage of Canadians reporting a language as their mother tongue vs the primary language at home.#

@@ -2365,23 +2365,23 @@

4.5.3. Axis transformation and colored s
-
+

Fig. 4.14 Scatter plot of percentage of Canadians reporting a language as their mother tongue vs the primary language at home colored by language category.#

@@ -2460,23 +2460,23 @@

4.5.3. Axis transformation and colored s
-
+

Fig. 4.15 Scatter plot of percentage of Canadians reporting a language as their mother tongue vs the primary language at home colored by language category with the legend edited.#

@@ -2571,23 +2571,23 @@

4.5.3. Axis transformation and colored s
-
+

Fig. 4.16 Scatter plot of percentage of Canadians reporting a language as their mother tongue vs the primary language at home colored by language category with custom colors and shapes.#

@@ -2673,23 +2673,23 @@

4.5.3. Axis transformation and colored s
-
+

Fig. 4.17 Scatter plot of percentage of Canadians reporting a language as their mother tongue vs the primary language at home colored by language category with custom colors and mouse hover tooltip.#

@@ -3119,23 +3119,23 @@

4.5.4. Bar plots: the island landmass da
-
+

Fig. 4.18 Bar plot of Earth’s landmass sizes. The plot is too wide with the default settings.#

@@ -3220,23 +3220,23 @@

4.5.4. Bar plots: the island landmass da
-
+

Fig. 4.19 Bar plot of size for Earth’s largest 12 landmasses.#

@@ -3331,23 +3331,23 @@

4.5.4. Bar plots: the island landmass da
-
+

Fig. 4.20 Bar plot of size for Earth’s largest 12 landmasses, colored by landmass type, with clearer axes and labels.#

@@ -3559,23 +3559,23 @@

4.5.4. Bar plots: the island landmass da
-
+

Fig. 4.21 A bar chart of Michelson’s speed of light data.#

@@ -3650,23 +3650,23 @@

4.5.4. Bar plots: the island landmass da
-
+

Fig. 4.22 Histogram of Michelson’s speed of light data.#

@@ -3772,23 +3772,23 @@

Adding layers to an
-
+

Fig. 4.23 Histogram of Michelson’s speed of light data with vertical line indicating the true speed of light.#

@@ -3866,23 +3866,23 @@

Adding layers to an
-
+

Fig. 4.24 Histogram of Michelson’s speed of light data colored by experiment.#

@@ -3990,23 +3990,23 @@

Adding layers to an
-
+

Fig. 4.25 Histogram of Michelson’s speed of light data colored by experiment as a categorical variable.#

@@ -4093,23 +4093,23 @@

Adding layers to an
-
+

Fig. 4.26 Histogram of Michelson’s speed of light data split vertically by experiment.#

@@ -4325,23 +4325,23 @@

Adding layers to an
-
+

Fig. 4.27 Histogram of relative error split vertically by experiment with clearer axes and labels#

@@ -4415,23 +4415,23 @@

Choosing a binwidth for histograms
-
+

Fig. 4.28 Histogram of Michelson’s speed of light data.#

@@ -4503,23 +4503,23 @@

Choosing a binwidth for histograms
-
+

Fig. 4.29 Effect of varying number of max bins on histograms.#

diff --git a/pull317/wrangling.html b/pull317/wrangling.html index 0f644fd6..e7e5f3a0 100644 --- a/pull317/wrangling.html +++ b/pull317/wrangling.html @@ -4790,7 +4790,7 @@

3.9. Performing operations on groups of

-
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fbc92338f90>
+
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7ff9d6ae8950>