diff --git a/pull313/_sources/classification2.md b/pull313/_sources/classification2.md index bc250318..8b387a81 100644 --- a/pull313/_sources/classification2.md +++ b/pull313/_sources/classification2.md @@ -280,7 +280,7 @@ we call the `seed` function from the `numpy` package, and pass it any integer as Below we use the seed number `1`. At that point, Python will keep track of the randomness that occurs throughout the code. For example, we can call the `sample` method -on the series of numbers, passing the argument `n = 10` to indicate that we want 10 samples. +on the series of numbers, passing the argument `n=10` to indicate that we want 10 samples. ```{code-cell} ipython3 import numpy as np @@ -290,7 +290,7 @@ np.random.seed(1) nums_0_to_9 = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) -random_numbers1 = nums_0_to_9.sample(n = 10).to_numpy() +random_numbers1 = nums_0_to_9.sample(n=10).to_numpy() random_numbers1 ``` You can see that `random_numbers1` is a list of 10 numbers @@ -299,7 +299,7 @@ we run the `sample` method again, we will get a fresh batch of 10 numbers that also look random. ```{code-cell} ipython3 -random_numbers2 = nums_0_to_9.sample(n = 10).to_numpy() +random_numbers2 = nums_0_to_9.sample(n=10).to_numpy() random_numbers2 ``` @@ -309,12 +309,12 @@ as before---and then call the `sample` method again. ```{code-cell} ipython3 np.random.seed(1) -random_numbers1_again = nums_0_to_9.sample(n = 10).to_numpy() +random_numbers1_again = nums_0_to_9.sample(n=10).to_numpy() random_numbers1_again ``` ```{code-cell} ipython3 -random_numbers2_again = nums_0_to_9.sample(n = 10).to_numpy() +random_numbers2_again = nums_0_to_9.sample(n=10).to_numpy() random_numbers2_again ``` @@ -326,12 +326,12 @@ obtain a different sequence of random numbers. ```{code-cell} ipython3 np.random.seed(4235) -random_numbers = nums_0_to_9.sample(n = 10).to_numpy() +random_numbers = nums_0_to_9.sample(n=10).to_numpy() random_numbers ``` ```{code-cell} ipython3 -random_numbers = nums_0_to_9.sample(n = 10).to_numpy() +random_numbers = nums_0_to_9.sample(n=10).to_numpy() random_numbers ``` @@ -378,15 +378,15 @@ functions. Those functions will then use your `RandomState` to generate random n `numpy`'s default generator. For example, we can reproduce our earlier example by using a `RandomState` object with the `seed` value set to 1; we get the same lists of numbers once again. ```{code} -rnd = np.random.RandomState(seed = 1) -random_numbers1_third = nums_0_to_9.sample(n = 10, random_state = rnd).to_numpy() +rnd = np.random.RandomState(seed=1) +random_numbers1_third = nums_0_to_9.sample(n=10, random_state=rnd).to_numpy() random_numbers1_third ``` ```{code} array([2, 9, 6, 4, 0, 3, 1, 7, 8, 5]) ``` ```{code} -random_numbers2_third = nums_0_to_9.sample(n = 10, random_state = rnd).to_numpy() +random_numbers2_third = nums_0_to_9.sample(n=10, random_state=rnd).to_numpy() random_numbers2_third ``` ```{code} @@ -540,8 +540,8 @@ cancer_train["Class"].value_counts(normalize=True) ```{code-cell} ipython3 :tags: [remove-cell] -glue("cancer_train_b_prop", "{:0.0f}".format(cancer_train["Class"].value_counts(normalize = True)["Benign"]*100)) -glue("cancer_train_m_prop", "{:0.0f}".format(cancer_train["Class"].value_counts(normalize = True)["Malignant"]*100)) +glue("cancer_train_b_prop", "{:0.0f}".format(cancer_train["Class"].value_counts(normalize=True)["Benign"]*100)) +glue("cancer_train_m_prop", "{:0.0f}".format(cancer_train["Class"].value_counts(normalize=True)["Malignant"]*100)) ``` ### Preprocess the data @@ -1620,7 +1620,7 @@ for i in range(len(ks)): cancer_tune_pipe = make_pipeline(cancer_preprocessor, KNeighborsClassifier()) param_grid = { "kneighborsclassifier__n_neighbors": range(1, 21), - } ## double check: in R textbook, it is tune_grid(..., grid = 20), so I guess it matches RandomizedSearchCV + } ## double check: in R textbook, it is tune_grid(..., grid=20), so I guess it matches RandomizedSearchCV ## instead of GridSeachCV? # param_grid_rand = { # "kneighborsclassifier__n_neighbors": range(1, 100), diff --git a/pull313/_sources/clustering.md b/pull313/_sources/clustering.md index 25de3c0c..dc1c6759 100644 --- a/pull313/_sources/clustering.md +++ b/pull313/_sources/clustering.md @@ -182,10 +182,10 @@ in the clustering pipeline. ```{code-cell} ipython3 :tags: [remove-cell] penguins_standardized = penguins.assign( - bill_length_standardized = (penguins["bill_length_mm"] - penguins["bill_length_mm"].mean())/penguins["bill_length_mm"].std(), - flipper_length_standardized = (penguins["flipper_length_mm"] - penguins["flipper_length_mm"].mean())/penguins["flipper_length_mm"].std() + bill_length_standardized=(penguins["bill_length_mm"] - penguins["bill_length_mm"].mean())/penguins["bill_length_mm"].std(), + flipper_length_standardized=(penguins["flipper_length_mm"] - penguins["flipper_length_mm"].mean())/penguins["flipper_length_mm"].std() ).drop( - columns = ["bill_length_mm", "flipper_length_mm"] + columns=["bill_length_mm", "flipper_length_mm"] ) ``` @@ -261,7 +261,7 @@ kmeans = KMeans(n_clusters=3) penguin_clust = kmeans.fit(penguins_standardized) -penguins_clustered = penguins_standardized.assign(cluster = penguin_clust.labels_) +penguins_clustered = penguins_standardized.assign(cluster=penguin_clust.labels_) colored_scatter_plot = alt.Chart(penguins_clustered).mark_circle().encode( x=alt.X("flipper_length_standardized", title="Flipper Length (standardized)"), diff --git a/pull313/_sources/inference.md b/pull313/_sources/inference.md index bdb9d4b7..6e89cc1d 100644 --- a/pull313/_sources/inference.md +++ b/pull313/_sources/inference.md @@ -716,9 +716,9 @@ glue( x="mean(price)" ), base.mark_text(align="left", color="#f58518", size=12, fontWeight="bold", dx=10).transform_aggregate( - mean_price = "mean(price)", + mean_price="mean(price)", ).transform_calculate( - label = "'Mean = ' + round(datum.mean_price * 10) / 10" + label="'Mean = ' + round(datum.mean_price * 10) / 10" ).encode( x=alt.X("mean_price:Q", title="Sample mean price per night (dollars)"), y=alt.value(10), diff --git a/pull313/_sources/regression1.md b/pull313/_sources/regression1.md index e20fe67c..973d14bd 100644 --- a/pull313/_sources/regression1.md +++ b/pull313/_sources/regression1.md @@ -833,8 +833,8 @@ from sklearn.metrics import mean_squared_error sacramento_test["predicted"] = sacr_gridsearch.predict(sacramento_test) RMSPE = mean_squared_error( - y_true = sacramento_test["price"], - y_pred = sacramento_test["predicted"] + y_true=sacramento_test["price"], + y_pred=sacramento_test["predicted"] )**(1/2) RMSPE ``` @@ -1066,8 +1066,8 @@ to compute the RMSPE. ```{code-cell} ipython3 sacramento_test["predicted"] = sacr_gridsearch.predict(sacramento_test) RMSPE_mult = mean_squared_error( - y_true = sacramento_test["price"], - y_pred = sacramento_test["predicted"] + y_true=sacramento_test["price"], + y_pred=sacramento_test["predicted"] )**(1/2) RMSPE_mult diff --git a/pull313/_sources/regression2.md b/pull313/_sources/regression2.md index edca8052..60db7cd4 100644 --- a/pull313/_sources/regression2.md +++ b/pull313/_sources/regression2.md @@ -440,8 +440,8 @@ sacramento_test["predicted"] = lm.predict(sacramento_test[["sqft"]]) # calculate RMSPE RMSPE = mean_squared_error( - y_true = sacramento_test["price"], - y_pred = sacramento_test["predicted"] + y_true=sacramento_test["price"], + y_pred=sacramento_test["predicted"] )**(1/2) RMSPE @@ -734,8 +734,8 @@ Finally, we make predictions on the test data set to assess the quality of our m sacramento_test["predicted"] = mlm.predict(sacramento_test[["sqft","beds"]]) lm_mult_test_RMSPE = mean_squared_error( - y_true = sacramento_test["price"], - y_pred = sacramento_test["predicted"] + y_true=sacramento_test["price"], + y_pred=sacramento_test["predicted"] )**(1/2) lm_mult_test_RMSPE ``` diff --git a/pull313/classification1.html b/pull313/classification1.html index 857dc6fc..e21b08de 100644 --- a/pull313/classification1.html +++ b/pull313/classification1.html @@ -872,23 +872,23 @@