From 62238f9f04d6921ecb8c38a85e1aa3b6f315f4f3 Mon Sep 17 00:00:00 2001
From: Tiankai Ma <tiankaima@163.com>
Date: Tue, 11 Jun 2024 15:26:36 +0800
Subject: [PATCH] algo: add rev1_notes

---
 7e1810-algo_hw/hw1.typ   | 97 ++++++++++++++++++++++++++++++++++++----
 7e1810-algo_hw/hw2.typ   | 68 +++++++++++++++++++++++++---
 7e1810-algo_hw/hw3.typ   | 81 ++++++++++++++++++++++++++++++---
 7e1810-algo_hw/hw4.typ   | 52 +++++++++++++++++----
 7e1810-algo_hw/hw5.typ   | 35 +++++++++++----
 7e1810-algo_hw/hw6.typ   | 90 ++++++++++++++++++-------------------
 7e1810-algo_hw/hw7.typ   | 61 +++++++++++++++++++++----
 7e1810-algo_hw/hw8.typ   | 10 +----
 7e1810-algo_hw/hw9.typ   | 10 +----
 7e1810-algo_hw/main.typ  | 28 +++++++++++-
 7e1810-algo_hw/utils.typ | 25 +++++++++++
 11 files changed, 447 insertions(+), 110 deletions(-)
 create mode 100644 7e1810-algo_hw/utils.typ

diff --git a/7e1810-algo_hw/hw1.typ b/7e1810-algo_hw/hw1.typ
index f2d3325..58db0b2 100644
--- a/7e1810-algo_hw/hw1.typ
+++ b/7e1810-algo_hw/hw1.typ
@@ -1,12 +1,41 @@
+#import "utils.typ": *
+
 == HW 1 (Week 2)
 Due: 2024.03.17
 
-#let ans(it) = [
-  #pad(1em)[
-    #text(fill: blue)[
-      #it
-    ]
-  ]
+#rev1_note[
+  + Review: 渐进符号
+
+    $o, O, Theta, omega, Omega$ 的定义如下:
+
+    $
+      O(g(n)) &= {f(n) mid(|) exists c > 0, n_0 > 0, forall n >= n_0 quad& 0 <= f(n) <= c dot g(n)}\
+      o(g(n)) &= {f(n) mid(|) exists c > 0, n_0 > 0, forall n >= n_0 quad& 0 <= f(n) < c dot g(n)}\
+      Theta(g(n)) &= {
+        f(n) mid(|) exists c_1, c_2 > 0, n_0 > 0, forall n >= n_0 quad& 0 <= c_1 dot g(n) <= f(n) <= c_2 dot g(n)
+      }\
+      Omega(g(n)) &= {f(n) mid(|) exists c > 0, n_0 > 0, forall n >= n_0 quad& 0 <= c dot g(n) <= f(n)}\
+      omega(g(n)) &= {f(n) mid(|) exists c > 0, n_0 > 0, forall n >= n_0 quad& 0 <= c dot g(n) < f(n)}
+    $
+
+  + Review: 主定理
+
+    对分治算法的递归式
+
+    $T(n) = a T(n / b) + f(n)$
+
+    主定理给出了一个快速求解递归算法复杂度的复杂度, 记 $c = log_b a, f(n) = O(n^d)$
+
+    + 如果 $c > d$, 则 $T(n) = Theta(n^c)$
+
+      这里需要注意, 比较的时候要是多项式意义上的小于, 即:
+      $exists epsilon > 0, f(n) = O(n^(c - epsilon))$
+
+    + 如果 $c = d$, 则 $T(n) = Theta(n^c log n)$
+
+    + 如果 $c < d$, 则 $T(n) = Theta(f(n))$
+
+      同时需要满足正则化条件: $exists epsilon > 0,c < 1, quad a f(n\/b) <= c f(n) < f(n)$
 ]
 
 === Question 2.3-5
@@ -28,6 +57,12 @@ You can also think of insertion sort as a recursive algorithm. In order to sort
           A[i + 1] = key
   ```
 
+  #rev1_note[
+    最坏情况是:
+
+    在排序 $[1,k]$ 时, 需要将 $A[k]$ 与 $A[1:k-1]$ 中的所有元素比较一次, 以确定插入位置. 这样, 在排序 $[1,n]$ 时, 需要比较的次数为 $1 + 2 + dots.c + (n-1) = Theta(n^2)$.
+  ]
+
   The recurrence for its worst-case running time is
 
   $
@@ -35,7 +70,6 @@ You can also think of insertion sort as a recursive algorithm. In order to sort
   $
 
   The solution to the recurrence is $Theta(n^2)$ worst-case time.
-
 ]
 
 === Question 2-1
@@ -45,12 +79,16 @@ You can also think of insertion sort as a recursive algorithm. In order to sort
 Although merge sort runs in $Theta(n lg n)$ worst-case time and insertion sort runs in $Theta(n^2)$ worst-case time, the constant factors in insertion sort can make it faster in practice for small problem sizes on many machines. Thus it makes sense to coarsen the leaves of the recursion by using insertion sort within merge sort when subproblems become suffificiently small. Consider a modifification to merge sort in which $n\/k$ sublists of length $k$ are sorted using insertion sort and then merged using the standard merging mechanism, where $k$ is a value to be determined.
 
 + Show that insertion sort can sort the $n\/k$ sublists, each of length $k$, in $Theta(n k)$ worst-case time.
+
 + Show how to merge the sublists in $Theta(n lg(n\/k))$ worst-case time.
+
 + Given that the modifified algorithm runs in $Theta(n k + n lg(n\/k))$ worst-case time, what is the largest value of $k$ as a function of $n$ for which the modifified algorithm has the same running time as standard merge sort, in terms of $Theta$-notation?
+
 + How should you choose $k$ in practice?
 
 #ans[
   + For each sublist, the insertion sort can sort the $k$ elements in $Theta(k^2)$ worst-case time. Thus, the insertion sort can sort the $n\/k$ sublists, each of length $k$, in $Theta(n k)$ worst-case time.
+
   + Given $n\/k$ sorted sublists, each of length $k$, the recurrence for merging the sublists is
     $
       T(n) = cases(2 dot.c T(n\/2) + Theta(n) space.quad & n>k, 0 & n=k)
@@ -59,6 +97,16 @@ Although merge sort runs in $Theta(n lg n)$ worst-case time and insertion sort r
 
     *This could also be viewed as a tree with $lg(n\/k)$ levels with $n$ element in each level. Worst case would be $Theta(n lg (n\/k))$*
 
+    #rev1_note[
+      将 $n\/k$ 个数组看成 $n\/k$ 个元素, 作为 merge sort 的叶节点. 这样一个数有 $n\/k$ 个叶节点, 也就有 $log(n\/k)$ 层. 每层实际上合并 $n$ 个元素, 总时间复杂度为 $Theta(n lg(n\/k))$.
+
+      直接进行 $n\/k-1$ 次合并是不可行的, 这样的速度在 $Theta(n^2\/k)$, 不符合要求.
+
+      另一种可行的思路: 考虑直接合并 $n\/k$ 个有序数组, 我们比较这 $n\/k$ 个数组中, 尚未取出的最小元素, 并从中选取最小元素.
+
+      具体来说, 维护一个 $n\/k$ 大小的 heap 和一个 $n\/k$ 大小的数组, 用于存储每个数组中的当前元素. 每次取出堆顶元素, 并将对应数组的下一个元素插入堆中. 这样, 每次取出最小元素 (构建最小堆) 的时间复杂度为 $O(lg(n\/k))$, 总时间复杂度为 $O(n lg(n\/k))$.
+    ]
+
   + Take $Theta(n k + n lg(n \/ k)) = Theta(n lg n)$, consider $k = Theta(lg n)$:
     $
       Theta(n k + n lg(n \/ k))
@@ -66,16 +114,47 @@ Although merge sort runs in $Theta(n lg n)$ worst-case time and insertion sort r
       &= Theta (n lg n + n lg n - n lg (lg n)) \
       &= Theta (n lg n)
     $
+
+    #rev1_note[
+      思路:
+      $
+        Theta(n k +n log(n\/k))=O(n log n)
+      $
+      只需 $k = O(log n), log(n\/k) = O(log n)$. 这样我们得到 $k=omega(1)=O(log n)$, 选取最大边界 $k=Theta(log n)$, 通过上述验证可以发现严格记号成立, 那么最大的 $k$ 值为 $Theta(log n)$. (渐进意义上的.)
+
+      容易发现当 $k=omega(log n)$ 时, $Theta(n k + n log(n\/k))=omega(n log n)$, 这样的 $k$ 值不满足题目要求.
+    ]
+
   + Choose $k$ to be the largest length of sublist for which insertion sort is faster than merge sort. Use a small constant such as $5$ or $10$.
 
+    #rev1_note[
+      这里的主要问题是, 比较两个 $Theta$ 意义下相等的算法用时必须考虑常数, 实践中可以通过记录算法实际运算次数得到.
+    ]
 ]
 
 === Question 4.2-3
-What is the largest $k$ such that if you can multiply $3 times 3$ matrices using $k$ multiplications (not assuming commutativity of multiplication), then you can multiply $n times n$ matrices in $o(n lg 7)$ time? What is the running time of this algorithm?
+What is the largest $k$ such that if you can multiply $3 times 3$ matrices using $k$ multiplications (not assuming commutativity of multiplication), then you can multiply $n times n$ matrices in $o(n^(log 7))$ time? What is the running time of this algorithm?
 
 #ans[
+  #rev1_note[
+    稍微翻译一下题目:
+
+    如果你有一个 $k$ 次乘法的 $3 times 3$ 矩阵乘法算法, 那么这样的算法是否能否构造一个, 在 $o(n^(log_2 7))$ 时间内完成 $n times n$ 矩阵乘法? 问满足条件的最大的 $k$ 是多少.
+
+    递归式是 $T(n) = k T(n\/3) + O(n^2)$, 我们分类讨论来使用主定理:
+
+    // - $k=27$ 时这就是最基本的矩阵分块算法. 我们不妨假设 $k<27$.
+
+    - $log_3 k < 2$, 正则化条件: $k dot (n\/3)^2  < n^2$ 即 $k < 9$, 算法规模在 $T(n)=O(n^2)subset O(n^(log_2 7))$.
+
+    - $log_3 k = 2$, 此时 $T(n)=O(n^2 lg n) subset O(n^(log_2 7))$.
+
+    - $log_3 k > 2$, 为使 $T(n)=O(n^(log_3 k)) subset O(n^(log_2 7))$, 需要 $log_3 k < log_2 7$, 最大的 $k=21$.
+
+    下面这个答案中递归式是错误的, 应该改正.
+  ]
+
   Assuming $n = 3^m$. Use block matrix multiplication, the recursive running time is $T(n) = k T(n\/3) + O(1)$.
 
   When $log_3 k > 2 $, using master theorem, the largest $k$ to satisfy $log_3 k < lg 7$ is $k=21$.
-
 ]
\ No newline at end of file
diff --git a/7e1810-algo_hw/hw2.typ b/7e1810-algo_hw/hw2.typ
index 89c8e5a..9b0a576 100644
--- a/7e1810-algo_hw/hw2.typ
+++ b/7e1810-algo_hw/hw2.typ
@@ -1,16 +1,45 @@
+#import "utils.typ": *
+
 == HW 2 (Week 3)
 Due: 2024.03.24
 
-#let ans(it) = [
-  #pad(1em)[
-    #text(fill: blue)[
-      #it
-    ]
-  ]
+#rev1_note[
+  + Review: 最大堆/最小堆
+
+    最大堆是一种满足性质 $A["PARENT"(i)] >= A[i]$ 的二叉树, 其中 $"PARENT"(i) = floor(i\/2)$, 两个子节点分别是 $2i, 2i+1$. 最小堆是则满足性质 $A["PARENT"(i)] <= A[i]$.
+
+    插入过程: 向最下一层、最右侧节点插入新叶节点 (实际上就是在数组结尾添加), 添加之后向上调整, 使其重新满足最大(小)堆的性质. 调整时间复杂度 $O(log n)$
+
+    删除过程: 向下调整, 在子节点中寻找最大(小)值, 与当前节点交换, 递归调整. 调整时间复杂度 $O(log n)$
+
+    建堆过程: 从最后一个非叶节点开始, 依次向前调整, 使其满足最大(小)堆的性质. 时间复杂度 $O(n)$, 主要考虑从叶到根开始「合并」已经建好的堆, 每次都是向下调整, 时间复杂度 $O(log n - k)$, 总时间复杂度 $O(n log n) - sum log i=O(n)$
+
+  + Review: 计数排序
+
+    一个保证稳定性的思路如下, 考虑数据长度 $n$, 范围 $[0,k]$.
+    - 开辟一个 $[0,k]$ 的数组 $C$. 清零
+    - 对于原数据 $n$, 遍历一遍并加入到这个计数的数组中
+    - 计算前缀和: $C[i] = C[i] + C[i-1]$
+    - 从原数据的尾部开始, 将数据放入到 $C$ 中对应的位置, 并将 $C$ 中的值减一, 这样每次从 $C$ 中取得的数字总是不同的.
+
+    总时间复杂度: $O(n+k)$.
+
+  + Review: 基数排序
+
+    假设每个数据有 $k$ 个关键字, 每个关键字有自己的排序方式, 以第一个关键字开始, 从小到大排序, 第一个关键字相同的情况下比较第二个关键字. 以此类推, 直到最后一个关键字.
+
+    在下面这个问题中, 内层排序方式使用计数排序, 相当于每层排序 $l$ 组共计 $n$ 个元素的关键字, 按照计数排序, 每层的复杂度在 $O(n)$. 三层也还是 $O(n)$.
+
+  + Review: 比较排序方法复杂度下限
+
+    基于比较的排序方法的下界是 $Omega(n log n)$, 证明方法是通过决策树模型: 对于 $n$ 个不同元素, 有 $n!$ 种不同的排列方式, 因此决策树至少有 $n!$ 个叶节点来表示排序结果.
+
+    因此可以得到层高 $h$ 满足: $h>=log_2(n!)=Theta(n log n)$, 最好情况就是比较 $h$ 次, 所以比较排序的下界是 $Omega(n log n)$.
 ]
 
 === Question 6.2-6
 The code for MAX-HEAPIFY is quite efficient in terms of constant factors, except possibly for the recursive call in line 10, for which some compilers might produce inefficient code. Write an efficient MAX-HEAPIFY that uses an iterative control construct (a loop) instead of recursion.
+
 #ans[
   Consider the following pseudocode code:
   ```txt
@@ -38,6 +67,7 @@ Show how to implement a first-in, first-out queue with a priority queue. Show ho
 
 #ans[
   - For stack, add element with increasing priority, and pop the element with the highest priority, pseudocode:
+
     ```txt
     PUSH(S, x)
         S.top = S.top + 1
@@ -49,7 +79,9 @@ Show how to implement a first-in, first-out queue with a priority queue. Show ho
             S.top = S.top - 1
             return S[S.top + 1]
     ```
+
   - For queue, add element with decreasing priority, and pop the element with the highest priority, pseudocode:
+
     ```txt
     ENQUEUE(Q, x)
         Q.tail = Q.tail + 1
@@ -65,6 +97,19 @@ Show how to implement a first-in, first-out queue with a priority queue. Show ho
 
 === Question 7.4-6
 Consider modifying the PARTITION procedure by randomly picking three elements from subarray $A[p : r]$ and partitioning about their median (the middle value of the three elements). Approximate the probability of getting worse than an $alpha$-to-$(1 - alpha)$ split, as a function of $alpha$ in the range $0 < alpha < 1/2$.
+
+#rev1_note[
+  认为元素可以重复取得, 或者认为 $r-p$ 足够大, 这样可以保证三次选取独立.
+
+  考虑三个变量中位数的分布, 只要它落在 $[0,alpha]union[1-alpha,1]$ 之间, 这样的 $q$ 的选取就不如 $(0,alpha)union(alpha,1)$, 由于对称性, 我们只需要计算左边的部分, 共有两种情况能使得中位数落在 $[0,alpha]$:
+
+  - 前两个数和最后一个分别落在 $[0,alpha],[alpha,1]$中: $vec(2,3) times alpha^2(1-alpha)$
+
+  - 三个数均落在 $[0,alpha]$ 中: $alpha^3$
+
+  容易证明这些情况是无交的, 并且列举了所有可能的「中位数落在...」的情况, 乘 $2$ 即可:
+]
+
 #ans[
   *Assuming the same element could be picked more than once*(which should be the case in real world).
 
@@ -78,6 +123,7 @@ Consider modifying the PARTITION procedure by randomly picking three elements fr
 
 === Question 8.2-7
 Counting sort can also work efficiently if the input values have fractional parts, but the number of digits in the fractional part is small. Suppose that you are given n numbers in the range $0$ to $k$, each with at most $d$ decimal (base $10$) digits to the right of the decimal point. Modify counting sort to run in $Theta(n + 10^d k)$ time.
+
 #ans[
   To achieve $Theta(n + 10^d k)$ time, we first use $Theta(n)$ time to multiply each number by $10^d$, then change the $C[0, k]$ to $C[0, 10^d k]$, and finally use $Theta(10^d k)$ time to sort the numbers.
 
@@ -97,11 +143,11 @@ Counting sort can also work efficiently if the input values have fractional part
   ```
 
   This is the required $Theta(n + 10^d k)$ time algorithm.
-
 ]
 
 === Question 8.3-5
 Show how to sort $n$ integers in the range $0$ to $n^3 - 1$ in $O(n)$ time.
+
 #ans[
   First convert each number to base $n$, then use counting sort to sort the numbers.
 
@@ -111,10 +157,18 @@ Show how to sort $n$ integers in the range $0$ to $n^3 - 1$ in $O(n)$ time.
 
 === Question 9.3.9
 Describe an $O(n)$-time algorithm that, given a set $S$ of $n$ distinct numbers and a positive integer $k <= n$, determines the $k$ numbers in $S$ that are closest to the median of $S$.
+
+#rev1_note[
+  下面的回答中需要更正: Step 2 中, 对于每个元素 $y$ 计算的是 $abs(y-x)$. 然后记录这个作为 key, 原来的值作为 value, 进行计数排序, 最后取前 $k$ 个元素即可.
+]
+
 #ans[
   + $O(n)$: Using SELECT, we can find $x$ to be the median of $S$.
+
   + $O(n)$: Subtract $x$ from each element in $S$.
+
   + $O(n)$: Use COUNTING-SORT to sort the absolute values of the differences.
+
   + $O(k)$: Return the first $k$ elements in the sorted array.
 
   This is the required $O(n)$-time algorithm.
diff --git a/7e1810-algo_hw/hw3.typ b/7e1810-algo_hw/hw3.typ
index 6a20ce8..46d1cbd 100644
--- a/7e1810-algo_hw/hw3.typ
+++ b/7e1810-algo_hw/hw3.typ
@@ -1,12 +1,56 @@
+#import "utils.typ": *
+
 == HW 3 (Week 4)
 Due: 2024.03.31
 
-#let ans(it) = [
-  #pad(1em)[
-    #text(fill: blue)[
-      #it
-    ]
-  ]
+#rev1_note[
+  + Review: 二叉树
+
+    遍历方式: 先序遍历, 中序遍历, 后序遍历.
+
+  + Review: 二叉搜索树
+
+    - 二叉搜索树是一种二叉树, 其中每个节点 $x$ 都有一个关键字 $"key"[x]$ 以及一个指向 $x$ 的父节点的指针 $p[x]$, 以及指向左右孩子的指针 $"left"[x]$ 和 $"right"[x]$. 二叉搜索树性质:
+
+      + 对于任意节点 $x$, 其左子树中的*所有*关键字的值都小于 $"key"[x]$.
+
+      + 对于任意节点 $x$, 其右子树中的*所有*关键字的值都大于 $"key"[x]$.
+
+    - 二叉搜索树的中序遍历是一个有序序列. 此外, 通过一颗二叉搜索树的先序/后序遍历结果, 可以反推出这颗树的结构. 但是通过中序遍历结果无法唯一确定一颗二叉树.
+
+    - 前驱的搜索逻辑:
+
+      - 如果左节点不为空, 那么只需要搜索左节点的最大值(尽可能的向右、向下遍历)
+
+      - 如果左节点为空, 向上找到第一个向左的 parent , 也就是说对这个 parent 来说, 当前节点是右孩子. 如果是左孩子的话那就持续向上遍历.
+
+      - 返回最后一个父节点. 如果到根部依旧不存在向左的 parent, 那么只能说明最开始的节点已经处在整棵树的左下角, 它没有前驱, 返回空.
+
+  + Review: 红黑树
+
+    在 BST 的基础上增加 color 字段, 取值为红或黑. 红黑树的性质:
+
+    - 每个节点或者是红色, 或者是黑色.
+
+    - 根节点是黑色.
+
+    - 每个叶子节点是黑色.(空节点)
+
+    - 如果一个节点是红色, 则它的两个子节点都是黑色.
+
+    - 对于每个节点, 从该节点到其所有后代叶子节点的简单路径上, 各个颜色的节点数目相同. (黑高相同)
+
+  + Review: 逆序对
+
+    $
+      \#{(i,j) | i < j quad and quad A[i] > A[j]}
+    $
+
+  + Review: 区间树
+
+    我们对红黑树的结构进行扩张来存储一组区间, $A^((i))=[t^((i))_1, t^((i))_2]$. 与实数一样, 区间有着三分律, 即对于两个区间 $A, B$ 来说, 要么 $A sect B != emptyset$, 要么 $A$ 在 $B$ 的左侧/右侧, 这三种情况互斥.
+
+    区间树的使用左端点 (低端点) 作为排序的 key (关键字), 并且额外维护一个 $x.max$, 代表当前节点对应的子树中, 所有区间的右端点 (高端点) 的最大值, 构建方式类似转移方程, 维护方式只需要向上更新, 都不超过一般红黑树的复杂度.
 ]
 
 === Question 12.2-3
@@ -28,6 +72,12 @@ Write the `TREE-PREDECESSOR` procedure(which is symmetric to `TREE-SUCCESSOR`).
 === Question 13.1-5
 Show that the longest simple path from a node $x$ in red-black tree to a descendant leaf at most twice that of the shortest simple path from node $x$ to a descendant leaf.
 
+#rev1_note[
+  证明从红黑树节点 $x$ 到叶子节点的最长简单路径长度至多是最短简单路径长度的两倍.
+
+  下面这个答案实际上说明: 任意两条路径的黑高相同, 红色节点由于一定存在黑色的子节点, 那么红色节点的数量也不大于黑高. 最短路径不小于黑高, 最长路径不大于二倍黑高, 得证.
+]
+
 #ans[
   Consider the longest simple path $(a_1, ... ,a_s)$ & the shortest simple path $(b_1, ... ,b_t)$, they have equal number of black nodes (Property 5).
   Neither of the paths can have repeated red node (Property 4).
@@ -37,6 +87,25 @@ Show that the longest simple path from a node $x$ in red-black tree to a descend
 === Question 17.1-7
 Show how to use an order-statistic tree to count the number of inversions in an array of $n$ distinct elements in $O(n lg n)$ time.
 
+#rev1_note[
+  考虑按照如下方式去重计算:
+
+  $
+    "Inv"(j)=\#{(i,j) | i < j quad and quad A[i] > A[j]}\
+    "TotalInv" = sum_(j=1)^(n) "Inv"(j)
+  $
+
+  按照这样的思路, $"Inv"(j)$只依赖前 $A[1:j]$ 序列中元素, 具体的说, $"Inv"(j)$ 只跟 $A[j]$ 在 $A[1:j]$ 的排名相关, 记作 $r(j)$. 那么我们有:
+
+  $
+    "Inv"(j) = j - r(j) >=0
+  $
+
+  这样的思路与插入排序的思路是一致的, 当 $A[1:j-1]$ 已经是有序数组时, $A[j]$ 新的插入位置 ($r(j)$) 意味着与 $j - r(j)$ 个元素交换了位置, 即为向前的逆序数.
+
+  每次插入时间和查询位置时间所用时间都是 $O(log k)$. 总用时 $O(log n!)=O(n log n)$
+]
+
 #ans[
   $O(n lg(n))$ time is required to build a red-black treem so everytime we insert a node, we can calculate the number of inversion using $"OS-RANK"$ (which is the rank of the node, thus calculating inversions).
 ]
diff --git a/7e1810-algo_hw/hw4.typ b/7e1810-algo_hw/hw4.typ
index 389c58b..62bb1c0 100644
--- a/7e1810-algo_hw/hw4.typ
+++ b/7e1810-algo_hw/hw4.typ
@@ -1,17 +1,37 @@
+#import "utils.typ": *
 == HW4 (Week 5)
 Due: 2024.04.07
 
-#let ans(it) = [
-  #pad(1em)[
-    #text(fill: blue)[
-      #it
-    ]
-  ]
-]
-
 === Question 14.4-2
 Give pseudocode to reconstruct an LCS from te completed c table (See Theorem 14.1 Optimal substructure of an LCS) and the original sequences $X = angle.l x_1, x_2, dots.c, x_m angle.r$ and $Y = angle.l y_1, y_2, dots.c y_n angle.r$ in $O(m+n)$ time, without using the b table.
 
+#rev1_note[
+  Review: 最长公共子序列
+
+  考虑状态转移方程:
+
+  $
+    c[i,j] = cases(
+        c[i-1, j-1] + 1\, & "if" x_i = y_j,
+        max(c[i-1, j], c[i, j-1])\, & "otherwise"
+    )
+  $
+
+  通过这个方程, 可以得到最长公共子序列的长度, 但是无法直接得到最长公共子序列的内容. 通过构造一个二维数组 $b$, 可以在 $O(m+n)$ 的时间内得到最长公共子序列的内容:
+
+  - 如果 $x_i = y_j$, 那么 $b[i,j] = "TOP_LEFT"$
+
+  - 如果 $c[i-1, j] >= c[i, j-1]$, 那么 $b[i,j] = "TOP"$
+
+  - 否则, $b[i,j] = "LEFT"$
+
+  回溯时, 只需要根据 $b$ 数组的内容, 从 $c[m,n]$ 开始, 依次回溯到 $c[0,0]$, 当 $b$ 为 "TOP_LEFT" 时, 输出 $x_i$, 否则, 根据 $b$ 的内容选择回溯方向.
+
+  接下来我们考虑如何不使用 $b$ 数组, 直接输出最长公共子序列的内容.
+
+  通过观察, 我们可以发现, 当 $x_i = y_j$ 时, $c[i,j] = c[i-1, j-1] + 1$, 否则, $c[i,j] = max(c[i-1, j], c[i, j-1])$. 这意味着, 当 $x_i = y_j$ 时, $x_i$ 一定在最长公共子序列中, 否则, $x_i$ 一定不在最长公共子序列中.
+]
+
 #ans[
   Consider the following pseudocode:
 
@@ -32,9 +52,23 @@ Give pseudocode to reconstruct an LCS from te completed c table (See Theorem 14.
 === Question 14.4-5
 Give an $O(n^2)$-time algorithm to find the longest monotonically increasing subsequence of a sequence of $n$ numbers.
 
+#rev1_note[
+  思路: 给定一个序列 $L$, 复制一份并排序, 记为 $L^'$, $O(n lg n)$ 时间排序 $L$
+
+  计算 $L$ 和 $L^'$ 的最长公共子序列, 由于最长公共子序列一定是单调递增的(是 $L^'$ 的子序列), 且最长, 所以最长公共子序列即为 $L$ 的最长单调递增子序列.
+
+  特别的, 考虑此问题的一个变种, 如果要求严格递增子序列, 可以对排序结果去重, 然后再计算最长公共子序列.
+
+  另一种思路也是 DP, 构建这样一组 $c[n]$, 其中 $c[i]$ 是 $A[1:i]$ 子问题中上升子列的长度, 转移方程:
+
+  $
+    c[i] = max_(j<i) {c[j] + 1 | A[j] < A[i]}
+  $
+]
+
 #ans[
   Given a sequence of numbers $L$, make a copy and sort it, let the $L^'$ be the sorted array:
-  $o(n^2)$ time to sort $L$
+  $O(n lg n)$ time to sort $L$
 
   Then consider the following pseudocode:
 
diff --git a/7e1810-algo_hw/hw5.typ b/7e1810-algo_hw/hw5.typ
index 97639fd..b638b87 100644
--- a/7e1810-algo_hw/hw5.typ
+++ b/7e1810-algo_hw/hw5.typ
@@ -1,16 +1,9 @@
 #import "@preview/cetz:0.2.2": *
+#import "utils.typ": *
 
 == HW5 (Week 6)
 Due: 2024.04.14
 
-#let ans(it) = [
-  #pad(1em)[
-    #text(fill: blue)[
-      #it
-    ]
-  ]
-]
-
 === Question 14.5-2
 Determine the cost and structure of an optimal binary serach tree for a set of $n=7$ keys with the following probabilities:
 
@@ -52,6 +45,20 @@ Determine the cost and structure of an optimal binary serach tree for a set of $
   )
 ]
 
+#rev1_note[
+  Review: 最优二叉搜索树
+
+  考虑一组已经排序的关键字 $K={k_1, k_2, ..., k_n}$, 和对应的访问频率 $P={p_1, p_2, ... p_n}$, 另外哨兵节点的频率(假想的关键字, 位于真正关键字的「中间」) $Q={q_0, q_1, ... , q_n}$. 计算思路是: 定义 $e[i][j]$ 为包含关键字 $k_i ... k_j$ 子树的查找代价, 目标即寻找 $e[1][n]$, 这就转变为动态规划问题.
+
+  初始条件: $e[i][i-1]=q_(i-1)$. 另外考虑这样一个问题, 将$k_i, ..., k_j$ 成为一个节点的子树之后, 搜索的期望就增加了 $p_i + p_(i+1) + ... + p_j + q_(i-1)+ ... + q_j$. 记这个参数为 $w(i,j):=sum_(l=i)^j p_l + sum_(l=i-1)^j q_l$.
+
+  得到转移方程:
+
+  $
+    e[i][j] = min_(i <= r <= j)(e[i][r-1] + e[r+1][j] + w(i,j))
+  $
+]
+
 #ans[
   Running the code provided in appendix, we get the following result (cost, preorder traversal of the optimal BST):
   ```text
@@ -133,6 +140,18 @@ What is an optimal Huffman code for the following set of frequencies, based on t
 
 Can you generalize your answer to find the optimal code when the frequencies are the first $n$ Fibonacci numbers?
 
+#rev1_note[
+  Review: Huffman 编码
+
+  典型的贪心算法, 思路如下:
+
+  - 取两个使用频率最低的两个子节点, 合并, 并记录新的节点的频率为两者之和.
+
+  - 重复上述过程, 直到所有节点合并为一个节点.
+
+  细节上需要维护一个最小堆, 初始化时用时 $O(n)$, 每次弹出、加入堆用时 $O(log n)$, 总时间复杂度 $O(n log n)$.
+]
+
 #ans[
   #align(center)[
     #table(
diff --git a/7e1810-algo_hw/hw6.typ b/7e1810-algo_hw/hw6.typ
index 292fdb6..5a5c6d6 100644
--- a/7e1810-algo_hw/hw6.typ
+++ b/7e1810-algo_hw/hw6.typ
@@ -1,16 +1,9 @@
 #import "@preview/cetz:0.2.2": *
+#import "utils.typ": *
 
 == HW6 (Week 7)
 Due: 2024.04.21
 
-#let ans(it) = [
-  #pad(1em)[
-    #text(fill: blue)[
-      #it
-    ]
-  ]
-]
-
 === Question 20.1-3
 
 The transpose of a directed graph $G=(V,E)$ is the graph $G^T = (V, E^T)$ where $ E^T = {(v,u) in V times V:(u,v) in E} $ That is, $G^T$ is $G$ with all its edges reversed. *Describe efficient algorithms for computing $G^T$ from $G$, for both the adjacency-list and adjacency-matrix representation of $G$, Analyze the running times of your algorithms.*
@@ -18,24 +11,24 @@ The transpose of a directed graph $G=(V,E)$ is the graph $G^T = (V, E^T)$ where
 #ans[
   - adjacency-list:
 
-  ```txt
-  def MAKE_ADJACENCY_LIST_TRANSPOSE(G):
-    GT = EMPTY_GRAPH()
-    GT.V = G.V
-    for uT in GT.V
-      uT.adj = []
-    for u in G.V
-      for v in u.adj
-        GT.v.adj.append(u)
-  ```
+    ```txt
+    def MAKE_ADJACENCY_LIST_TRANSPOSE(G):
+      GT = EMPTY_GRAPH()
+      GT.V = G.V
+      for uT in GT.V
+        uT.adj = []
+      for u in G.V
+        for v in u.adj
+          GT.v.adj.append(u)
+    ```
 
-  Time complexity: $O(abs(V)+abs(E))$
+    Time complexity: $O(abs(V)+abs(E))$
 
   - adjacency-matrix:
 
-  Same as matrix transpose(flipping the matrix along the diagonal).
+    Same as matrix transpose(flipping the matrix along the diagonal).
 
-  Time complexity: $O(V^2)$. (with special design of matrix representation, sparse matrix: $O(abs(V)+abs(E))$, dense matrix: $O(V^2)$, lazy transpose: $O(1)$)
+    Time complexity: $O(V^2)$. (with special design of matrix representation, sparse matrix: $O(abs(V)+abs(E))$, dense matrix: $O(V^2)$, lazy transpose: $O(1)$)
 ]
 
 === Question 20.1-8
@@ -46,21 +39,22 @@ Under the assumption of uniform independent hashing, if all edge lookups are equ
 
 - what is the expected time to determine whether an edge is in the graph?
 
-  #text(fill: blue)[
+  #ans[
     $O(1)$
   ]
 
 - What disadvantages does this scheme have compared to the linked-list representation?
 
-  #text(fill: blue)[
+  #ans[
     Worst case time complexity is $O(V)$, while linked-list representation is $O(abs(u."adj"))$.
   ]
 
 - Suggest an alternate data structure for each edge list that solves these problems. Does your alternative have disadvantages compared with the hash table?
 
-  #text(fill: blue)[
+  #ans[
     Use a balanced binary search tree to store the edge list.
     - Time complexity: $O(log(abs(u."adj")))$. (worst case time complexity)
+
     - Disadvantages: $O(log(abs(u."adj"))) > O(1)$. (worse average time complexity)
   ]
 
@@ -249,31 +243,35 @@ Another way to topologically sort a directed acyclic graph $G=(V,E)$ is to repea
 #ans[
   - Implementation:
 
-  ```txt
-  def TOPOLOGICAL_SORT(G):
-    for u in G.V
-      u.indegree = 0
-    for u in G.V
-      for v in u.adj
-        v.indegree += 1
-    Q = []
-    for u in G.V
-      if u.indegree == 0
-        Q.append(u)
-    while Q
-      u = Q.pop()
-      print(u)
-      for v in u.adj
-        v.indegree -= 1
-        if v.indegree == 0
-          Q.append(v)
-  ```
-
-  Time complexity: $O(V+E)$
+    ```txt
+    def TOPOLOGICAL_SORT(G):
+      for u in G.V
+        u.indegree = 0
+      for u in G.V
+        for v in u.adj
+          v.indegree += 1
+      Q = []
+      for u in G.V
+        if u.indegree == 0
+          Q.append(u)
+      while Q
+        u = Q.pop()
+        print(u)
+        for v in u.adj
+          v.indegree -= 1
+          if v.indegree == 0
+            Q.append(v)
+    ```
+
+    Time complexity: $O(V+E)$
 
   - If $G$ has cycles:
 
-  The algorithm will not terminate, since there is no vertex of in-degree $0$.
+    #rev1_note[
+      当 $G$ 中有环时, 所有的环和环的后继都不会入队, 不会出现在拓扑序中.
+    ]
+
+    The algorithm will not terminate, since there is no vertex of in-degree $0$.
 ]
 
 === Question 20.5-4
diff --git a/7e1810-algo_hw/hw7.typ b/7e1810-algo_hw/hw7.typ
index 4955ab5..c10655b 100644
--- a/7e1810-algo_hw/hw7.typ
+++ b/7e1810-algo_hw/hw7.typ
@@ -1,18 +1,59 @@
+#import "@preview/diagraph:0.2.1": *
+#import "utils.typ": *
+
 == HW7 (Week 8)
 Due: 2024.04.28
 
-#let ans(it) = [
-  #pad(1em)[
-    #text(fill: blue)[
-      #it
-    ]
-  ]
-]
-
 === Question 21.1-1
 
 Let $(u,v)$ be a minimum-weight edge in a connected graph $G$. Show that $(u,v)$ belongs to some minimum spanning tree of $G$.
 
+#[
+  #set text(fill: black)
+  #align(center)[
+    #scale(55%, reflow: true)[
+      #raw-render(```dot
+        digraph MSTModification {
+          layout=neato
+          subgraph cluster_0 {
+              node [shape=circle, style=filled, color=lightgrey];
+              style = invis;
+
+              A1 -> B1 [label="2", dir="none"];
+              B1 -> C1 [label="3", dir="none"];
+              C1 -> D1 [label="6", dir="none"];
+              D1 -> E1 [label="4", dir="none"];
+              E1 -> F1 [label="5", dir="none"];
+              F1 -> A1 [label="1", dir="none"]; // Minimum weight edge
+          }
+          subgraph cluster_1 {
+              style = invis;
+              node [shape=circle, style=filled, color=lightgrey];
+
+              A2 -> B2 [label="2", dir="none"];
+              B2 -> C2 [label="3", dir="none"];
+              C2 -> D2 [label="6", dir="none"];
+              D2 -> E2 [label="4", dir="none"];
+              E2 -> F2 [label="5", dir="none"];
+              F2 -> A2 [style=dotted, color=red, label="Removed", dir="none"];
+          }
+          subgraph cluster_2 {
+              style = invis;
+              node [shape=circle, style=filled, color=lightgrey];
+
+              A3 -> B3 [label="2", dir="none"];
+              B3 -> C3 [label="3", dir="none"];
+              D3 -> E3 [label="4", dir="none"];
+              E3 -> F3 [label="5", dir="none"];
+              F3 -> A3 [label="1", dir="none"];
+              C3 -> D3 [style=dotted, color=red, label="Removed", dir="none"];
+          }
+      }
+      ```)
+    ]
+  ]
+]
+
 #ans[
   Let $T$ be a minimum spanning tree of $G$. If $(u,v)$ is not in $T$, then $T union \{(u,v)\}$ contains a cycle $C$. Since $(u,v)$ is the minimum-weight edge in $G$ that crosses the cut $(V(T), V - V(T))$, we can replace an edge in $C$ with $(u,v)$ to get a spanning tree $T'$ with $w(T') < w(T)$, which contradicts the assumption that $T$ is a minimum spanning tree.
 ]
@@ -21,6 +62,10 @@ Let $(u,v)$ be a minimum-weight edge in a connected graph $G$. Show that $(u,v)$
 
 Kruskal's algorithm can return different spanning trees for the same input graph $G$, depending on how it breaks ties when the edges are sorted into order. *Show that for each minimum spanning tree $T$ of $G$, there is a way to sort the edges of $G$ in Kruskal's algorithm so that the algorithm returns $T$.*
 
+#rev1_note[
+  排序时引入第二关键字, 标记是否为想要的边. 令树边优先于非树边即可.
+]
+
 #ans[
   Let $T$ be a minimum spanning tree of $G$. We sort the edges of $G$ in nondecreasing order of their weights. If there are ties, we break them arbitrarily. Since $T$ is a minimum spanning tree, the edges of $T$ are sorted before the edges not in $T$. Therefore, Kruskal's algorithm will add the edges of $T$ to the tree before adding any other edges, and the result will be $T$.
 ]
diff --git a/7e1810-algo_hw/hw8.typ b/7e1810-algo_hw/hw8.typ
index 701dfce..8c9448e 100644
--- a/7e1810-algo_hw/hw8.typ
+++ b/7e1810-algo_hw/hw8.typ
@@ -1,14 +1,8 @@
+#import "utils.typ": *
+
 == HW8 (Week 9)
 Due: 2024.05.05
 
-#let ans(it) = [
-  #pad(1em)[
-    #text(fill: blue)[
-      #it
-    ]
-  ]
-]
-
 === Exerciese 1
 Proof that Bellman-Ford maximizes $x_1+x_2+dots.c+x_n$ subject to the constraints $x_j - x_i <= w_(i j)$ for all edges $(i,j)$ and $x <= 0$, and also minmizes $max_i {x_i}-min_i {x_i}$.
 
diff --git a/7e1810-algo_hw/hw9.typ b/7e1810-algo_hw/hw9.typ
index b34a6e3..9fff18d 100644
--- a/7e1810-algo_hw/hw9.typ
+++ b/7e1810-algo_hw/hw9.typ
@@ -1,14 +1,8 @@
+#import "utils.typ": *
+
 == HW9 (Week 11)
 Due: 2024.05.19
 
-#let ans(it) = [
-  #pad(1em)[
-    #text(fill: blue)[
-      #it
-    ]
-  ]
-]
-
 === Question 32.4-1
 Compute the prefix function $pi$ for the pattern `ababbabbabbababbabb`.
 
diff --git a/7e1810-algo_hw/main.typ b/7e1810-algo_hw/main.typ
index 07ce9b0..e2e7eb2 100644
--- a/7e1810-algo_hw/main.typ
+++ b/7e1810-algo_hw/main.typ
@@ -1,6 +1,8 @@
 #import "@preview/cetz:0.2.2": *
 #import "@preview/diagraph:0.2.1": *
 
+#import "utils.typ": *
+
 #set text(
   font: ("linux libertine", "Source Han Serif SC", "Source Han Serif"),
   size: 10pt,
@@ -22,14 +24,38 @@
 
   本文档以 CC BY-NC-SA 4.0 协议发布. 请遵守学术诚信, 不得用于商业用途.
 
+  #rev1_note[
+    \* Revision 2024/06/11: 随期末复习增加了一些注释性内容, 以红色标注.
+
+    参考了助教答案中的部分内容, 在此表示感谢.
+  ]
+
   #image("imgs/sticker_1.jpg", width: 30%)
 ]
 
 #pagebreak(weak: true)
 
+#set page(
+  header: [
+    #h(1fr)
+    *USTC/ALGO24 算法基础 书面作业*
+  ],
+  footer: context [
+    #let headings = query(selector(heading.where(level: 2)).before(here()))
+    #if headings.len() > 0 {
+      text(size: 8pt, headings.last().body)
+    }
+    #h(1fr)
+    #counter(page).display(
+      "1/1",
+      both: true,
+    )
+  ],
+)
+
 #show math.equation: it => math.display(it)
 #show image: it => align(center, it)
-#show raw.where(block: true): it => rect(stroke: 0.02em, width: 100%, inset: 1em, it)
+#show raw.where(block: true): it => text(fill: blue.lighten(40%), it)
 
 #include "hw1.typ"
 #pagebreak(weak: true)
diff --git a/7e1810-algo_hw/utils.typ b/7e1810-algo_hw/utils.typ
new file mode 100644
index 0000000..eac521d
--- /dev/null
+++ b/7e1810-algo_hw/utils.typ
@@ -0,0 +1,25 @@
+#let ans(it) = [
+  #pad(1em)[
+    #text(fill: blue)[
+      #it
+    ]
+  ]
+]
+
+#let rev1_note(it) = [
+  #box(inset: 1em)[
+    #text(fill: red)[
+      #it
+    ]
+  ]
+]
+
+#let notice(it) = [
+  #rect(inset: 1em, stroke: 0.02em + green, width: 100%)[
+    #set align(center)
+    #set text(fill: green)
+    *TODO:*
+
+    #it
+  ]
+]
\ No newline at end of file