From bc5d6755257b8d7c17a9f2d2271fc9f6072f788d Mon Sep 17 00:00:00 2001
From: James J Balamuta <james.balamuta@gmail.com>
Date: Fri, 9 Aug 2024 18:37:48 -0700
Subject: [PATCH] Add content demo

---
 .github/workflows/publish-website.yml |  67 +++++
 _quarto.yml                           |  21 ++
 index.qmd                             |  67 +++++
 slides/lecture-01.qmd                 | 360 ++++++++++++++++++++++++++
 4 files changed, 515 insertions(+)
 create mode 100644 .github/workflows/publish-website.yml
 create mode 100644 _quarto.yml
 create mode 100644 index.qmd
 create mode 100644 slides/lecture-01.qmd

diff --git a/.github/workflows/publish-website.yml b/.github/workflows/publish-website.yml
new file mode 100644
index 0000000..5a65ebe
--- /dev/null
+++ b/.github/workflows/publish-website.yml
@@ -0,0 +1,67 @@
+on:
+    push:
+      branches: [main, master]
+    release:
+        types: [published]
+    workflow_dispatch: {}
+
+name: generate-website
+
+jobs:
+    demo-website:
+      runs-on: ubuntu-latest
+      # Only restrict concurrency for non-PR jobs
+      concurrency:
+        group: quarto-publish-${{ github.event_name != 'pull_request' || github.run_id }}
+      permissions:
+        contents: read
+        pages: write
+        id-token: write
+      steps:
+        - name: "Check out repository"
+          uses: actions/checkout@v4
+  
+        # To render using knitr, we need a few more setup steps...
+        # If we didn't want the examples to use `engine: knitr`, we could
+        # skip a few of the setup steps.
+        - name: "Setup pandoc"
+          uses: r-lib/actions/setup-pandoc@v2
+  
+        - name: "Setup R"
+          uses: r-lib/actions/setup-r@v2
+  
+        - name: "Setup R dependencies for Quarto's knitr engine"
+          uses: r-lib/actions/setup-r-dependencies@v2
+          with:
+            packages:
+              any::knitr
+              any::rmarkdown
+              any::downlit
+              any::xml2
+  
+        # Back to our regularly scheduled Quarto output
+        - name: "Set up Quarto"
+          uses: quarto-dev/quarto-actions/setup@v2
+          with:
+            version: "pre-release"
+        
+        # Generate the documentation website
+        - name: Render Documentation website
+          uses: quarto-dev/quarto-actions/render@v2
+
+        # Publish the docs directory onto gh-pages
+
+        # Upload a tar file that will work with GitHub Pages
+        # Make sure to set a retention day to avoid running into a cap
+        # This artifact shouldn't be required after deployment onto pages was a success.
+        - name: Upload Pages artifact
+          uses: actions/upload-pages-artifact@v2
+          with: 
+            retention-days: 1
+        
+        # Use an Action deploy to push the artifact onto GitHub Pages
+        # This requires the `Action` tab being structured to allow for deployment
+        # instead of using `docs/` or the `gh-pages` branch of the repository
+        - name: Deploy to GitHub Pages
+          id: deployment
+          uses: actions/deploy-pages@v2
\ No newline at end of file
diff --git a/_quarto.yml b/_quarto.yml
new file mode 100644
index 0000000..03632a2
--- /dev/null
+++ b/_quarto.yml
@@ -0,0 +1,21 @@
+project:
+  type: website
+  output-dir: _site
+
+website:
+  title: "Next Generation of Data Science Education"
+  reader-mode: true
+  repo-url: https://github.com/coatless-tutorials/next-gen-data-science-education
+  repo-actions: [edit, issue]
+  navbar:
+    background: light
+    foreground: dark
+    align: right
+    right:
+      - href: index.qmd
+        text: Home
+      - href: https://r-wasm.github.io/quarto-live/
+        text: Quarto Live
+      - icon: github
+        href: https://github.com/coatless-tutorials/next-gen-data-science-education
+        aria-label: GitHub
diff --git a/index.qmd b/index.qmd
new file mode 100644
index 0000000..18e7dfb
--- /dev/null
+++ b/index.qmd
@@ -0,0 +1,67 @@
+---
+title: "Next Generation of Data Science Education"
+subtitle: "Interactive Coding in Web Browsers: A WebAssembly Demonstration"
+format:
+  html:
+    toc: true
+    toc-depth: 2
+---
+
+## Welcome to the Future of Interactive Presentations
+
+In this demonstration, we showcase cutting-edge technology that brings interactive coding directly into slide decks using WebAssembly (WASM) through the new official Quarto WebAssembly backend: [`quarto-live`](https://r-wasm.github.io/quarto-live/) by George Stagg. This innovative approach revolutionizes how we present and teach programming concepts by allowing for real-time code execution, visualization, and exercises within the presentation itself.
+
+You can view the demonstration here: 
+
+
+For more on the Quarto WebAssembly backend, see the [official documentation](https://r-wasm.github.io/quarto-live/).
+
+## What's Inside
+
+This demonstration includes a Linear Regression overview that uses both R and Python code snippets to illustrate the concepts. You can interact with the code blocks, modify them, and see the results instantly. We've also included a built-in timer on the exercise
+page to provide a stoppage time for the exercise.
+
+## The Power of WebAssembly in Presentations
+
+WebAssembly is a binary instruction format for a stack-based virtual machine, designed as a portable target for high-level languages like C, C++, and Rust. By leveraging WebAssembly:
+
+- We can run R and Python code directly in the browser.
+- Presentations become interactive, allowing audience members to modify and run code in real-time.
+- Complex computations and visualizations can be performed client-side, reducing server load and improving responsiveness.
+
+## How It Works
+
+1. **R Integration**: We use [webR](https://docs.r-wasm.org/webr/latest/), an R distribution compiled to WebAssembly, to run R code in the browser.
+2. **Python Integration**: [Pyodide](https://pyodide.org/en/stable/), a Python distribution for the browser, is used to execute Python code.
+3. **Quarto + RevealJS**: The presentation is built using [Quarto](https://quarto.org/) and [RevealJS](https://revealjs.com/), providing a smooth, web-based slide experience.
+
+## Benefits of This Approach
+
+- **Engagement**: Audience members can experiment with code in real-time, fostering active learning.
+- **Flexibility**: Presenters can easily modify examples on the fly to answer questions or explore different scenarios.
+- **Accessibility**: No need for local installations; everything runs in the browser.
+- **Reproducibility**: Ensures everyone sees the same results, regardless of their local setup.
+
+## Getting Started
+
+To explore this demo:
+
+1. Navigate through the links above to view each component.
+2. In the slide decks and tutorials, look for interactive code blocks where you can modify and run code.
+3. Experiment with different inputs and see how the outputs change in real-time.
+
+## Technical Requirements
+
+### For Viewers and Presenters
+
+- A modern web browser with WebAssembly support (most up-to-date browsers support this).
+- For the best experience, use a desktop or laptop computer rather than a mobile device.
+
+### Authoring
+
+To create interactive presentations like this, you'll need to use Quarto with the `quarto-live` backend. Please make sure you have at least Quarto v1.4.0 or later installed. You may need to install additional packages for R and Python support.
+
+
+## Feedback and Questions
+
+Enjoy exploring the future of interactive data science presentations!
\ No newline at end of file
diff --git a/slides/lecture-01.qmd b/slides/lecture-01.qmd
new file mode 100644
index 0000000..c8a0f4a
--- /dev/null
+++ b/slides/lecture-01.qmd
@@ -0,0 +1,360 @@
+---
+title: "Linear Regression Overview"
+subtitle: "A Comparison in R and Python"
+format: 
+    live-revealjs: 
+        scrollable: true
+webr:
+    packages: 
+      - ggplot2
+pyodide: 
+    packages: 
+      - scikit-learn
+      - pandas
+      - matplotlib
+engine: knitr
+---
+
+{{< include ../_extensions/r-wasm/live/_knitr.qmd >}}
+
+## Introduction
+
+Linear regression is a fundamental statistical technique used to model the relationship between a dependent variable and one or more independent variables.
+
+This presentation will cover:
+
+1. Basic Concepts
+2. Implementation in R and Python
+3. Model Evaluation
+4. Assumptions and Diagnostics
+
+---
+
+## Basic Concepts
+
+Linear regression aims to find the best-fitting straight line through the data points.
+
+The general form of a simple linear regression model is:
+
+$$Y = \beta_0 + \beta_1X + \epsilon$$
+
+Where:
+
+- $Y$ is the dependent variable
+- $X$ is the independent variable
+- $\beta_0$ is the y-intercept
+- $\beta_1$ is the slope
+- $\epsilon$ is the error term
+
+---
+
+## Implementation
+
+Let's look at how to implement linear regression in R and Python by first simulating some data
+
+::: {.panel-tabset group="language"}
+
+## R
+
+```{webr}
+# Create sample data
+set.seed(123)
+x <- 1:100
+y <- 2 * x + rnorm(100, mean = 0, sd = 20)
+data <- data.frame(x = x, y = y)
+
+head(data)
+```
+
+## Python
+
+```{pyodide}
+import numpy as np
+import pandas as pd
+
+# Create sample data
+np.random.seed(123)
+x = np.arange(1, 101)
+y = 2 * x + np.random.normal(0, 20, 100)
+data = pd.DataFrame({'x': x, 'y': y})
+
+data.head()
+```
+
+:::
+
+---
+
+## Fit Linear Regression Model
+
+Now that we have our data, let's fit a linear regression model to it:
+
+::: {.panel-tabset group="language"}
+
+## R
+
+```{webr}
+# Fit linear regression model
+model <- lm(y ~ x, data = data)
+
+# View summary of the model
+summary(model)
+```
+
+## Python
+
+```{pyodide}
+import matplotlib.pyplot as plt
+from sklearn.linear_model import LinearRegression
+
+# Fit linear regression model
+model = LinearRegression()
+model.fit(data[['x']], data['y'])
+
+# Print model coefficients
+print(f"Intercept: {model.intercept_:.2f}")
+print(f"Slope: {model.coef_[0]:.2f}")
+```
+
+:::
+
+## Visualize the Results
+
+We can visualize the data and the regression line to see how well the model fits the data using ggplot2 in R and Matplotlib in Python.
+
+::: {.panel-tabset group="language"}
+
+## R
+
+```{webr}
+library(ggplot2) 
+
+# Plot the data and regression line
+ggplot(data, aes(x = x, y = y)) +
+  geom_point() +
+  geom_smooth(method = "lm", se = FALSE, color = "red") +
+  theme_minimal() +
+  labs(title = "Linear Regression in R",
+       x = "X", y = "Y")
+```
+
+## Python
+
+```{pyodide}
+# Plot the data and regression line
+plt.figure(figsize=(10, 6))
+plt.scatter(data['x'], data['y'])
+plt.plot(data['x'], model.predict(data[['x']]), color='red')
+plt.title("Linear Regression in Python")
+plt.xlabel("X")
+plt.ylabel("Y")
+plt.show()
+```
+
+:::
+
+---
+
+## Predicting New Values
+
+We can use our linear regression model to make predictions on new data:
+
+::: {.panel-tabset group="language"}
+
+## R
+
+```{webr}
+# Predict new values
+new_data <- data.frame(x = c(101, 102, 103))
+predictions <- predict(model, newdata = new_data)
+
+predictions
+```
+
+## Python
+
+```{pyodide}
+# Predict new values
+new_data = pd.DataFrame({'x': [101, 102, 103]})
+predictions = model.predict(new_data)
+```
+
+:::
+
+---
+
+## Your Turn: Predict New Values!
+
+{{< countdown "01:30" top="10px" right="5px">}}
+
+Create a new data frame with `x` values 10, 30, and 60, then use the model to predict the corresponding y values.
+
+::: {.panel-tabset group="language"}
+
+## R 
+
+```{webr}
+#| exercise: ex_1_r
+# Create your new data frame here
+_______
+
+# Make predictions here
+_______
+
+# Print the predictions
+_______
+```
+
+```{webr}
+#| exercise: ex_1_r
+#| check: true
+
+# Create your new data frame here
+new_data <- data.frame(x = c(10, 30, 60))
+
+# Make predictions here
+predictions <- predict(model, newdata = new_data)
+
+if (isTRUE(all.equal(.result, predictions))) {
+  list(correct = TRUE, message = "Nice work!")
+} else {
+  list(correct = FALSE, message = "That's incorrect, sorry.")
+}
+```
+
+## Python 
+
+```{pyodide}
+#| exercise: ex_1_py
+# Create your new Pandas data frame here
+_______
+
+# Make predictions using the model
+_______
+
+# Print the predictions
+_______
+```
+
+```{pyodide}
+#| exercise: ex_1_py
+#| check: true
+
+# Create a new DataFrame with x values 10, 30, and 60
+new_data_solution = pd.DataFrame({'x': [10, 30, 60]})
+
+# Make predictions using the model
+predictions_solution = model.predict(new_data_solution)
+
+feedback = None
+if (result == predictions_solution):
+  feedback = { "correct": True, "message": "Nice work!" }
+else:
+  feedback = { "correct": False, "message": "That's incorrect, sorry." }
+
+feedback
+```
+
+:::
+
+
+---
+
+## Model Evaluation
+
+We can evaluate the performance of our linear regression model using various metrics:
+
+::: {.panel-tabset}
+
+## R
+
+```{webr}
+# R-squared
+summary(model)$r.squared
+
+# Root Mean Squared Error (RMSE)
+sqrt(mean(residuals(model)^2))
+
+# Mean Absolute Error (MAE)
+mean(abs(residuals(model)))
+```
+
+## Python
+
+```{pyodide}
+from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
+
+# R-squared
+r2 = r2_score(data['y'], model.predict(data[['x']]))
+
+# Root Mean Squared Error (RMSE)
+rmse = np.sqrt(mean_squared_error(data['y'], model.predict(data[['x']])))
+
+# Mean Absolute Error (MAE)
+mae = mean_absolute_error(data['y'], model.predict(data[['x']]))
+
+print(f"R-squared: {r2:.4f}")
+print(f"RMSE: {rmse:.4f}")
+print(f"MAE: {mae:.4f}")
+```
+
+:::
+
+---
+
+## Assumptions 
+
+Linear regression relies on several assumptions:
+
+1. Linearity
+2. Independence
+3. Homoscedasticity
+4. Normality of residuals
+
+---
+
+## Checking Assumptions with Diagnostics Plots
+
+Let's look at some diagnostic plots:
+
+::: {.panel-tabset}
+
+## R
+
+```{webr}
+par(mfrow = c(2, 2))
+plot(model)
+```
+
+## Python
+
+```{pyodide}
+import seaborn as sns
+
+# Residual plot
+plt.figure(figsize=(10, 6))
+sns.residplot(x=model.predict(data[['x']]), y=data['y'], lowess=True)
+plt.title("Residual Plot")
+plt.xlabel("Predicted Values")
+plt.ylabel("Residuals")
+plt.show()
+
+# Q-Q plot
+from scipy import stats
+
+fig, ax = plt.subplots(figsize=(10, 6))
+_, (__, ___, r) = stats.probplot(model.resid, plot=ax, fit=True)
+ax.set_title("Q-Q Plot")
+plt.show()
+```
+
+:::
+
+---
+
+## Conclusion
+
+- Linear regression is a powerful tool for modeling relationships between variables.
+- Both R and Python offer robust implementations and diagnostic tools.
+- Always check assumptions and perform diagnostics to ensure the validity of your model.
+- Consider more advanced techniques (e.g., multiple regression, polynomial regression) for complex relationships.
\ No newline at end of file