UBC-DSCI · trevorcampbell · Nov 13, 2023 · Nov 13, 2023 · Nov 13, 2023 · Nov 13, 2023
diff --git a/source/classification2.md b/source/classification2.md
@@ -280,7 +280,7 @@ we call the `seed` function from the `numpy` package, and pass it any integer as
 Below we use the seed number `1`. At 
 that point, Python will keep track of the randomness that occurs throughout the code.
 For example, we can call the `sample` method
-on the series of numbers, passing the argument `n = 10` to indicate that we want 10 samples.
+on the series of numbers, passing the argument `n=10` to indicate that we want 10 samples.
 
 ```{code-cell} ipython3
 import numpy as np
@@ -290,7 +290,7 @@ np.random.seed(1)
 
 nums_0_to_9 = pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
 
-random_numbers1 = nums_0_to_9.sample(n = 10).to_numpy()
+random_numbers1 = nums_0_to_9.sample(n=10).to_numpy()
 random_numbers1
 ```
 You can see that `random_numbers1` is a list of 10 numbers
@@ -299,7 +299,7 @@ we run the `sample` method again,
 we will get a fresh batch of 10 numbers that also look random.
 
 ```{code-cell} ipython3
-random_numbers2 = nums_0_to_9.sample(n = 10).to_numpy()
+random_numbers2 = nums_0_to_9.sample(n=10).to_numpy()
 random_numbers2
 ```
 
@@ -309,12 +309,12 @@ as before---and then call the `sample` method again.
 
 ```{code-cell} ipython3
 np.random.seed(1)
-random_numbers1_again = nums_0_to_9.sample(n = 10).to_numpy()
+random_numbers1_again = nums_0_to_9.sample(n=10).to_numpy()
 random_numbers1_again
 ```
 
 ```{code-cell} ipython3
-random_numbers2_again = nums_0_to_9.sample(n = 10).to_numpy()
+random_numbers2_again = nums_0_to_9.sample(n=10).to_numpy()
 random_numbers2_again
 ```
 
@@ -326,12 +326,12 @@ obtain a different sequence of random numbers.
 
 ```{code-cell} ipython3
 np.random.seed(4235)
-random_numbers = nums_0_to_9.sample(n = 10).to_numpy()
+random_numbers = nums_0_to_9.sample(n=10).to_numpy()
 random_numbers
 ```
 
 ```{code-cell} ipython3
-random_numbers = nums_0_to_9.sample(n = 10).to_numpy()
+random_numbers = nums_0_to_9.sample(n=10).to_numpy()
 random_numbers
 ```
 
@@ -378,15 +378,15 @@ functions. Those functions will then use your `RandomState` to generate random n
 `numpy`'s default generator. For example, we can reproduce our earlier example by using a `RandomState`
 object with the `seed` value set to 1; we get the same lists of numbers once again.
 ```{code}
-rnd = np.random.RandomState(seed = 1)
-random_numbers1_third = nums_0_to_9.sample(n = 10, random_state = rnd).to_numpy()
+rnd = np.random.RandomState(seed=1)
+random_numbers1_third = nums_0_to_9.sample(n=10, random_state=rnd).to_numpy()
 random_numbers1_third
 ``` 
 ```{code}
 array([2, 9, 6, 4, 0, 3, 1, 7, 8, 5])
 ```
 ```{code}
-random_numbers2_third = nums_0_to_9.sample(n = 10, random_state = rnd).to_numpy()
+random_numbers2_third = nums_0_to_9.sample(n=10, random_state=rnd).to_numpy()
 random_numbers2_third
 ``` 
 ```{code}
@@ -540,8 +540,8 @@ cancer_train["Class"].value_counts(normalize=True)
 ```{code-cell} ipython3
 :tags: [remove-cell]
 
-glue("cancer_train_b_prop", "{:0.0f}".format(cancer_train["Class"].value_counts(normalize = True)["Benign"]*100))
-glue("cancer_train_m_prop", "{:0.0f}".format(cancer_train["Class"].value_counts(normalize = True)["Malignant"]*100))
+glue("cancer_train_b_prop", "{:0.0f}".format(cancer_train["Class"].value_counts(normalize=True)["Benign"]*100))
+glue("cancer_train_m_prop", "{:0.0f}".format(cancer_train["Class"].value_counts(normalize=True)["Malignant"]*100))
 ```
 
 ### Preprocess the data
@@ -1620,7 +1620,7 @@ for i in range(len(ks)):
     cancer_tune_pipe = make_pipeline(cancer_preprocessor, KNeighborsClassifier())
     param_grid = {
         "kneighborsclassifier__n_neighbors": range(1, 21),
-    }  ## double check: in R textbook, it is tune_grid(..., grid = 20), so I guess it matches RandomizedSearchCV
+    }  ## double check: in R textbook, it is tune_grid(..., grid=20), so I guess it matches RandomizedSearchCV
        ## instead of GridSeachCV?
     # param_grid_rand = {
     #     "kneighborsclassifier__n_neighbors": range(1, 100),

diff --git a/source/clustering.md b/source/clustering.md
@@ -182,10 +182,10 @@ in the clustering pipeline.
 ```{code-cell} ipython3
 :tags: [remove-cell]
 penguins_standardized = penguins.assign(
-	bill_length_standardized = (penguins["bill_length_mm"] - penguins["bill_length_mm"].mean())/penguins["bill_length_mm"].std(),
-    flipper_length_standardized = (penguins["flipper_length_mm"] - penguins["flipper_length_mm"].mean())/penguins["flipper_length_mm"].std()
+	bill_length_standardized=(penguins["bill_length_mm"] - penguins["bill_length_mm"].mean())/penguins["bill_length_mm"].std(),
+    flipper_length_standardized=(penguins["flipper_length_mm"] - penguins["flipper_length_mm"].mean())/penguins["flipper_length_mm"].std()
 ).drop(
-    columns = ["bill_length_mm", "flipper_length_mm"]
+    columns=["bill_length_mm", "flipper_length_mm"]
 )
 ```
 
@@ -261,7 +261,7 @@ kmeans = KMeans(n_clusters=3)
 
 penguin_clust = kmeans.fit(penguins_standardized)
 
-penguins_clustered = penguins_standardized.assign(cluster = penguin_clust.labels_)
+penguins_clustered = penguins_standardized.assign(cluster=penguin_clust.labels_)
 
 colored_scatter_plot = alt.Chart(penguins_clustered).mark_circle().encode(
     x=alt.X("flipper_length_standardized", title="Flipper Length (standardized)"),

diff --git a/source/inference.md b/source/inference.md
@@ -716,9 +716,9 @@ glue(
             x="mean(price)"
         ),
         base.mark_text(align="left", color="#f58518", size=12, fontWeight="bold", dx=10).transform_aggregate(
-            mean_price = "mean(price)",
+            mean_price="mean(price)",
         ).transform_calculate(
-            label = "'Mean = ' + round(datum.mean_price * 10) / 10"
+            label="'Mean = ' + round(datum.mean_price * 10) / 10"
         ).encode(
             x=alt.X("mean_price:Q", title="Sample mean price per night (dollars)"),
             y=alt.value(10),

diff --git a/source/reading.md b/source/reading.md
@@ -392,10 +392,10 @@ contain its own column names.
 
 ```{code-cell} ipython3
 :tags: ["output_scroll"]
-canlang_data =  pd.read_csv(
+canlang_data = pd.read_csv(
     "data/can_lang_no_names.tsv",
-    sep = "\t",
-    header = None
+    sep="\t",
+    header=None
 )
 canlang_data
 ```

diff --git a/source/regression1.md b/source/regression1.md
@@ -833,8 +833,8 @@ from sklearn.metrics import mean_squared_error
 
 sacramento_test["predicted"] = sacr_gridsearch.predict(sacramento_test)
 RMSPE = mean_squared_error(
-    y_true = sacramento_test["price"],
-    y_pred = sacramento_test["predicted"]
+    y_true=sacramento_test["price"],
+    y_pred=sacramento_test["predicted"]
 )**(1/2)
 RMSPE
 ```
@@ -1066,8 +1066,8 @@ to compute the RMSPE.
 ```{code-cell} ipython3
 sacramento_test["predicted"] = sacr_gridsearch.predict(sacramento_test)
 RMSPE_mult = mean_squared_error(
-    y_true = sacramento_test["price"], 
-    y_pred = sacramento_test["predicted"]
+    y_true=sacramento_test["price"],
+    y_pred=sacramento_test["predicted"]
 )**(1/2)
 RMSPE_mult
 

diff --git a/source/regression2.md b/source/regression2.md
@@ -440,8 +440,8 @@ sacramento_test["predicted"] = lm.predict(sacramento_test[["sqft"]])
 
 # calculate RMSPE
 RMSPE = mean_squared_error(
-    y_true = sacramento_test["price"],
-    y_pred = sacramento_test["predicted"]
+    y_true=sacramento_test["price"],
+    y_pred=sacramento_test["predicted"]
 )**(1/2)
 
 RMSPE
@@ -734,8 +734,8 @@ Finally, we make predictions on the test data set to assess the quality of our m
 sacramento_test["predicted"] = mlm.predict(sacramento_test[["sqft","beds"]])
 
 lm_mult_test_RMSPE = mean_squared_error(
-    y_true = sacramento_test["price"], 
-    y_pred = sacramento_test["predicted"]
+    y_true=sacramento_test["price"],
+    y_pred=sacramento_test["predicted"]
 )**(1/2)
 lm_mult_test_RMSPE
 ```