From 0c697ae1b04de768830bf25a287e1288074c33a6 Mon Sep 17 00:00:00 2001 From: Daniel Zhao Date: Fri, 23 Feb 2024 17:53:57 -0500 Subject: [PATCH 01/16] Added historical probability field and calculation for PCx --- .../management/commands/recompute_topics.py | 23 ++++++++ backend/courses/models.py | 30 ++++++++++ backend/courses/util.py | 59 +++++++++++++++++++ 3 files changed, 112 insertions(+) diff --git a/backend/courses/management/commands/recompute_topics.py b/backend/courses/management/commands/recompute_topics.py index 6cb508267..42f7b49e3 100644 --- a/backend/courses/management/commands/recompute_topics.py +++ b/backend/courses/management/commands/recompute_topics.py @@ -153,3 +153,26 @@ def handle(self, *args, **kwargs): ), f"--min-semester={min_semester} is not a valid semester." recompute_topics(min_semester, verbose=True, allow_null_parent_topic=bool(min_semester)) + + +def recompute_historical_probabilities(current_semester, verbose=False): + """ + Recomputes the historical probabilities for all topics. + """ + if verbose: + print("Recomputing historical probabilities for all topics...") + topics = Topic.objects.all() + # Iterate over each Topic + for topic in topics: + # Calculate historical_year_probability for the current topic + test = topic.courses.order_by("semester").all() + print(test) + historical_prob = historical_year_probability( + current_semester, + test + ) + # Update the historical_probabilities field for the current topic + topic.historical_probabilities_spring = historical_prob[0] + topic.historical_probabilities_summer = historical_prob[1] + topic.historical_probabilities_fall = historical_prob[0] + topic.save() \ No newline at end of file diff --git a/backend/courses/models.py b/backend/courses/models.py index fb88e94a7..6a2f9c21a 100644 --- a/backend/courses/models.py +++ b/backend/courses/models.py @@ -366,6 +366,36 @@ class Topic(models.Model): ), ) + historical_probabilities_spring = models.FloatField( + default=0, + help_text=dedent( + """ + The historical probability of a student taking a course in this topic in the spring + semester, based on historical data. This field is recomputed nightly from the + `parent_course` graph (in the recompute_soft_state cron job). + """ + ), + ) + historical_probabilities_summer = models.FloatField( + default=0, + help_text=dedent( + """ + The historical probability of a student taking a course in this topic in the summer + semester, based on historical data. This field is recomputed nightly from the + `parent_course` graph (in the recompute_soft_state cron job). + """ + ), + ) + historical_probabilities_fall = models.FloatField( + default=0, + help_text=dedent( + """ + The historical probability of a student taking a course in this topic in the fall + semester, based on historical data. This field is recomputed nightly from the + `parent_course` graph (in the recompute_soft_state cron job). + """ + ), + ) branched_from = models.ForeignKey( "Topic", related_name="branched_to", diff --git a/backend/courses/util.py b/backend/courses/util.py index 8868f7efd..d779bc68b 100644 --- a/backend/courses/util.py +++ b/backend/courses/util.py @@ -712,3 +712,62 @@ def get_semesters(semesters: str = None) -> list[str]: if s not in possible_semesters: raise ValueError(f"Provided semester {s} was not found in the db.") return sorted(semesters) + +def historical_year_probability(current, courses): + prob_distribution = [0.4, 0.3, 0.15, 0.1, 0.05] + + def normalize_and_round(prob, i): + truncate = prob_distribution[:i] + total = sum(truncate) + return list(map(lambda x: round(x / total,3), truncate)) + + def get_semester_and_course_index(semester): + semester_letter = semester[-1] + semester_number = 0 + if semester_letter == "A": + semester_number = 1 + elif semester_letter == "B": + semester_number = 2 + elif semester_letter == "C": + semester_number = 3 + semester_year = int(semester[:-1]) + return (10 * semester_year + semester_number) + + current_index = get_semester_and_course_index(current) + min_index = current_index - 50 + max_index = current_index - 10 + if courses == []: + return [0, 0, 0] + else: + last_index = get_semester_and_course_index(courses[0].semester) + print(last_index) + if last_index > min_index: + prob_distribution = normalize_and_round( + prob_distribution, + ((current_index - last_index) + 9) // 10 + ) + print(max_index) + print(prob_distribution) + p_A = 0 + p_B = 0 + p_C = 0 + for c in courses: + print(c) + index = get_semester_and_course_index(c.semester) + if index < min_index or index > max_index: + continue + diff = (current_index - index) // 10 - 1 + print(c, diff) + if diff >= len(prob_distribution): + diff = len(prob_distribution) - 1 + if index % 10 == 1: + p_A += prob_distribution[diff] + elif index % 10 == 2: + p_B += prob_distribution[diff] + elif index % 10 == 3: + p_C += prob_distribution[diff] + return [ + min(round(p_A,2),1.00), + min(round(p_B,2),1.00), + min(round(p_C,2),1.00) + ] \ No newline at end of file From 26d1e6cf04d1fae6e332a86ac150d8611eac97ea Mon Sep 17 00:00:00 2001 From: Daniel Zhao Date: Sun, 25 Feb 2024 13:19:56 -0500 Subject: [PATCH 02/16] fixed linting --- .../management/commands/recompute_topics.py | 9 +++------ backend/courses/util.py | 14 +++++--------- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/backend/courses/management/commands/recompute_topics.py b/backend/courses/management/commands/recompute_topics.py index 42f7b49e3..a9d2a7e85 100644 --- a/backend/courses/management/commands/recompute_topics.py +++ b/backend/courses/management/commands/recompute_topics.py @@ -3,7 +3,7 @@ from django.db.models import Count, OuterRef, Subquery from courses.models import Course, Topic -from courses.util import all_semesters +from courses.util import all_semesters, historical_year_probability def garbage_collect_topics(): @@ -167,12 +167,9 @@ def recompute_historical_probabilities(current_semester, verbose=False): # Calculate historical_year_probability for the current topic test = topic.courses.order_by("semester").all() print(test) - historical_prob = historical_year_probability( - current_semester, - test - ) + historical_prob = historical_year_probability(current_semester, test) # Update the historical_probabilities field for the current topic topic.historical_probabilities_spring = historical_prob[0] topic.historical_probabilities_summer = historical_prob[1] topic.historical_probabilities_fall = historical_prob[0] - topic.save() \ No newline at end of file + topic.save() diff --git a/backend/courses/util.py b/backend/courses/util.py index d779bc68b..65133fa2a 100644 --- a/backend/courses/util.py +++ b/backend/courses/util.py @@ -713,13 +713,14 @@ def get_semesters(semesters: str = None) -> list[str]: raise ValueError(f"Provided semester {s} was not found in the db.") return sorted(semesters) + def historical_year_probability(current, courses): prob_distribution = [0.4, 0.3, 0.15, 0.1, 0.05] def normalize_and_round(prob, i): truncate = prob_distribution[:i] total = sum(truncate) - return list(map(lambda x: round(x / total,3), truncate)) + return list(map(lambda x: round(x / total, 3), truncate)) def get_semester_and_course_index(semester): semester_letter = semester[-1] @@ -731,7 +732,7 @@ def get_semester_and_course_index(semester): elif semester_letter == "C": semester_number = 3 semester_year = int(semester[:-1]) - return (10 * semester_year + semester_number) + return 10 * semester_year + semester_number current_index = get_semester_and_course_index(current) min_index = current_index - 50 @@ -743,8 +744,7 @@ def get_semester_and_course_index(semester): print(last_index) if last_index > min_index: prob_distribution = normalize_and_round( - prob_distribution, - ((current_index - last_index) + 9) // 10 + prob_distribution, ((current_index - last_index) + 9) // 10 ) print(max_index) print(prob_distribution) @@ -766,8 +766,4 @@ def get_semester_and_course_index(semester): p_B += prob_distribution[diff] elif index % 10 == 3: p_C += prob_distribution[diff] - return [ - min(round(p_A,2),1.00), - min(round(p_B,2),1.00), - min(round(p_C,2),1.00) - ] \ No newline at end of file + return [min(round(p_A, 2), 1.00), min(round(p_B, 2), 1.00), min(round(p_C, 2), 1.00)] From 1c5d44cb813a6e9ea609c94073f3127ef06c41ad Mon Sep 17 00:00:00 2001 From: Daniel Zhao Date: Sun, 25 Feb 2024 13:32:38 -0500 Subject: [PATCH 03/16] fixed migrations --- .../migrations/0064_auto_20240225_1331.py | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 backend/courses/migrations/0064_auto_20240225_1331.py diff --git a/backend/courses/migrations/0064_auto_20240225_1331.py b/backend/courses/migrations/0064_auto_20240225_1331.py new file mode 100644 index 000000000..4815eea8b --- /dev/null +++ b/backend/courses/migrations/0064_auto_20240225_1331.py @@ -0,0 +1,33 @@ +# Generated by Django 3.2.23 on 2024-02-25 18:31 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('courses', '0063_auto_20231212_1750'), + ] + + operations = [ + migrations.AddField( + model_name='topic', + name='historical_probabilities_fall', + field=models.FloatField(default=0, help_text='\nThe historical probability of a student taking a course in this topic in the fall\nsemester, based on historical data. This field is recomputed nightly from the\n`parent_course` graph (in the recompute_soft_state cron job).\n'), + ), + migrations.AddField( + model_name='topic', + name='historical_probabilities_spring', + field=models.FloatField(default=0, help_text='\nThe historical probability of a student taking a course in this topic in the spring\nsemester, based on historical data. This field is recomputed nightly from the\n`parent_course` graph (in the recompute_soft_state cron job).\n'), + ), + migrations.AddField( + model_name='topic', + name='historical_probabilities_summer', + field=models.FloatField(default=0, help_text='\nThe historical probability of a student taking a course in this topic in the summer\nsemester, based on historical data. This field is recomputed nightly from the\n`parent_course` graph (in the recompute_soft_state cron job).\n'), + ), + migrations.AlterField( + model_name='section', + name='activity', + field=models.CharField(choices=[('', 'Undefined'), ('CLN', 'Clinic'), ('CRT', 'Clinical Rotation'), ('DAB', 'Dissertation Abroad'), ('DIS', 'Dissertation'), ('DPC', 'Doctoral Program Exchange'), ('FLD', 'Field Work'), ('HYB', 'Hybrid'), ('IND', 'Independent Study'), ('LAB', 'Lab'), ('LEC', 'Lecture'), ('MST', 'Masters Thesis'), ('ONL', 'Online'), ('PRC', 'Practicum'), ('REC', 'Recitation'), ('SEM', 'Seminar'), ('SRT', 'Senior Thesis'), ('STU', 'Studio')], db_index=True, help_text='The section activity, e.g. `LEC` for CIS-120-001 (2020A). Options and meanings:
"""Undefined"
"CLN""Clinic"
"CRT""Clinical Rotation"
"DAB""Dissertation Abroad"
"DIS""Dissertation"
"DPC""Doctoral Program Exchange"
"FLD""Field Work"
"HYB""Hybrid"
"IND""Independent Study"
"LAB""Lab"
"LEC""Lecture"
"MST""Masters Thesis"
"ONL""Online"
"PRC""Practicum"
"REC""Recitation"
"SEM""Seminar"
"SRT""Senior Thesis"
"STU""Studio"
', max_length=50), + ), + ] From 6d46f54869496fea77ec52637e9b0f1238632592 Mon Sep 17 00:00:00 2001 From: Daniel Zhao Date: Sun, 25 Feb 2024 13:37:33 -0500 Subject: [PATCH 04/16] fixed linting for migration --- .../migrations/0064_auto_20240225_1331.py | 59 +++++++++++++++---- 1 file changed, 46 insertions(+), 13 deletions(-) diff --git a/backend/courses/migrations/0064_auto_20240225_1331.py b/backend/courses/migrations/0064_auto_20240225_1331.py index 4815eea8b..ec711555b 100644 --- a/backend/courses/migrations/0064_auto_20240225_1331.py +++ b/backend/courses/migrations/0064_auto_20240225_1331.py @@ -6,28 +6,61 @@ class Migration(migrations.Migration): dependencies = [ - ('courses', '0063_auto_20231212_1750'), + ("courses", "0063_auto_20231212_1750"), ] operations = [ migrations.AddField( - model_name='topic', - name='historical_probabilities_fall', - field=models.FloatField(default=0, help_text='\nThe historical probability of a student taking a course in this topic in the fall\nsemester, based on historical data. This field is recomputed nightly from the\n`parent_course` graph (in the recompute_soft_state cron job).\n'), + model_name="topic", + name="historical_probabilities_fall", + field=models.FloatField( + default=0, + help_text="\nThe historical probability of a student taking a course in this topic in the fall\nsemester, based on historical data. This field is recomputed nightly from the\n`parent_course` graph (in the recompute_soft_state cron job).\n", + ), ), migrations.AddField( - model_name='topic', - name='historical_probabilities_spring', - field=models.FloatField(default=0, help_text='\nThe historical probability of a student taking a course in this topic in the spring\nsemester, based on historical data. This field is recomputed nightly from the\n`parent_course` graph (in the recompute_soft_state cron job).\n'), + model_name="topic", + name="historical_probabilities_spring", + field=models.FloatField( + default=0, + help_text="\nThe historical probability of a student taking a course in this topic in the spring\nsemester, based on historical data. This field is recomputed nightly from the\n`parent_course` graph (in the recompute_soft_state cron job).\n", + ), ), migrations.AddField( - model_name='topic', - name='historical_probabilities_summer', - field=models.FloatField(default=0, help_text='\nThe historical probability of a student taking a course in this topic in the summer\nsemester, based on historical data. This field is recomputed nightly from the\n`parent_course` graph (in the recompute_soft_state cron job).\n'), + model_name="topic", + name="historical_probabilities_summer", + field=models.FloatField( + default=0, + help_text="\nThe historical probability of a student taking a course in this topic in the summer\nsemester, based on historical data. This field is recomputed nightly from the\n`parent_course` graph (in the recompute_soft_state cron job).\n", + ), ), migrations.AlterField( - model_name='section', - name='activity', - field=models.CharField(choices=[('', 'Undefined'), ('CLN', 'Clinic'), ('CRT', 'Clinical Rotation'), ('DAB', 'Dissertation Abroad'), ('DIS', 'Dissertation'), ('DPC', 'Doctoral Program Exchange'), ('FLD', 'Field Work'), ('HYB', 'Hybrid'), ('IND', 'Independent Study'), ('LAB', 'Lab'), ('LEC', 'Lecture'), ('MST', 'Masters Thesis'), ('ONL', 'Online'), ('PRC', 'Practicum'), ('REC', 'Recitation'), ('SEM', 'Seminar'), ('SRT', 'Senior Thesis'), ('STU', 'Studio')], db_index=True, help_text='The section activity, e.g. `LEC` for CIS-120-001 (2020A). Options and meanings:
"""Undefined"
"CLN""Clinic"
"CRT""Clinical Rotation"
"DAB""Dissertation Abroad"
"DIS""Dissertation"
"DPC""Doctoral Program Exchange"
"FLD""Field Work"
"HYB""Hybrid"
"IND""Independent Study"
"LAB""Lab"
"LEC""Lecture"
"MST""Masters Thesis"
"ONL""Online"
"PRC""Practicum"
"REC""Recitation"
"SEM""Seminar"
"SRT""Senior Thesis"
"STU""Studio"
', max_length=50), + model_name="section", + name="activity", + field=models.CharField( + choices=[ + ("", "Undefined"), + ("CLN", "Clinic"), + ("CRT", "Clinical Rotation"), + ("DAB", "Dissertation Abroad"), + ("DIS", "Dissertation"), + ("DPC", "Doctoral Program Exchange"), + ("FLD", "Field Work"), + ("HYB", "Hybrid"), + ("IND", "Independent Study"), + ("LAB", "Lab"), + ("LEC", "Lecture"), + ("MST", "Masters Thesis"), + ("ONL", "Online"), + ("PRC", "Practicum"), + ("REC", "Recitation"), + ("SEM", "Seminar"), + ("SRT", "Senior Thesis"), + ("STU", "Studio"), + ], + db_index=True, + help_text='The section activity, e.g. `LEC` for CIS-120-001 (2020A). Options and meanings:
"""Undefined"
"CLN""Clinic"
"CRT""Clinical Rotation"
"DAB""Dissertation Abroad"
"DIS""Dissertation"
"DPC""Doctoral Program Exchange"
"FLD""Field Work"
"HYB""Hybrid"
"IND""Independent Study"
"LAB""Lab"
"LEC""Lecture"
"MST""Masters Thesis"
"ONL""Online"
"PRC""Practicum"
"REC""Recitation"
"SEM""Seminar"
"SRT""Senior Thesis"
"STU""Studio"
', + max_length=50, + ), ), ] From 4f349d297906904dc76872b03e81105ebeb8f114 Mon Sep 17 00:00:00 2001 From: Daniel Zhao Date: Sun, 25 Feb 2024 13:46:19 -0500 Subject: [PATCH 05/16] Added descriptors, and improved some semantics --- backend/courses/management/commands/recompute_topics.py | 1 - backend/courses/util.py | 9 ++++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/backend/courses/management/commands/recompute_topics.py b/backend/courses/management/commands/recompute_topics.py index a9d2a7e85..a6a35eb45 100644 --- a/backend/courses/management/commands/recompute_topics.py +++ b/backend/courses/management/commands/recompute_topics.py @@ -166,7 +166,6 @@ def recompute_historical_probabilities(current_semester, verbose=False): for topic in topics: # Calculate historical_year_probability for the current topic test = topic.courses.order_by("semester").all() - print(test) historical_prob = historical_year_probability(current_semester, test) # Update the historical_probabilities field for the current topic topic.historical_probabilities_spring = historical_prob[0] diff --git a/backend/courses/util.py b/backend/courses/util.py index 65133fa2a..4ddc10e5b 100644 --- a/backend/courses/util.py +++ b/backend/courses/util.py @@ -718,11 +718,14 @@ def historical_year_probability(current, courses): prob_distribution = [0.4, 0.3, 0.15, 0.1, 0.05] def normalize_and_round(prob, i): + ''' Modifies the probability distribution to account for the + fact that the last course was taken i semesters ago. ''' truncate = prob_distribution[:i] total = sum(truncate) return list(map(lambda x: round(x / total, 3), truncate)) def get_semester_and_course_index(semester): + ''' Returns an integer representing the semester. ''' semester_letter = semester[-1] semester_number = 0 if semester_letter == "A": @@ -741,23 +744,19 @@ def get_semester_and_course_index(semester): return [0, 0, 0] else: last_index = get_semester_and_course_index(courses[0].semester) - print(last_index) if last_index > min_index: prob_distribution = normalize_and_round( prob_distribution, ((current_index - last_index) + 9) // 10 ) - print(max_index) - print(prob_distribution) p_A = 0 p_B = 0 p_C = 0 for c in courses: - print(c) + '''Provides calculation''' index = get_semester_and_course_index(c.semester) if index < min_index or index > max_index: continue diff = (current_index - index) // 10 - 1 - print(c, diff) if diff >= len(prob_distribution): diff = len(prob_distribution) - 1 if index % 10 == 1: From cc3d385a44c1775d0b2bf6f32b9c01f3aeec134b Mon Sep 17 00:00:00 2001 From: Daniel Zhao Date: Sun, 25 Feb 2024 13:51:35 -0500 Subject: [PATCH 06/16] Fixed style --- backend/courses/util.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/courses/util.py b/backend/courses/util.py index 4ddc10e5b..5c4b668d0 100644 --- a/backend/courses/util.py +++ b/backend/courses/util.py @@ -718,14 +718,14 @@ def historical_year_probability(current, courses): prob_distribution = [0.4, 0.3, 0.15, 0.1, 0.05] def normalize_and_round(prob, i): - ''' Modifies the probability distribution to account for the - fact that the last course was taken i semesters ago. ''' + """Modifies the probability distribution to account for the + fact that the last course was taken i semesters ago.""" truncate = prob_distribution[:i] total = sum(truncate) return list(map(lambda x: round(x / total, 3), truncate)) def get_semester_and_course_index(semester): - ''' Returns an integer representing the semester. ''' + """Returns an integer representing the semester.""" semester_letter = semester[-1] semester_number = 0 if semester_letter == "A": @@ -752,7 +752,7 @@ def get_semester_and_course_index(semester): p_B = 0 p_C = 0 for c in courses: - '''Provides calculation''' + """Provides calculation""" index = get_semester_and_course_index(c.semester) if index < min_index or index > max_index: continue From 7d210f190db8fa1098068facab6fb12eacb272c7 Mon Sep 17 00:00:00 2001 From: Daniel Zhao Date: Tue, 12 Mar 2024 22:40:49 -0400 Subject: [PATCH 07/16] updated and fixes --- .../commands/recompute_probabilities.py | 17 +++++++++++ .../management/commands/recompute_topics.py | 14 +++++---- backend/courses/util.py | 30 +++++++++---------- 3 files changed, 40 insertions(+), 21 deletions(-) create mode 100644 backend/courses/management/commands/recompute_probabilities.py diff --git a/backend/courses/management/commands/recompute_probabilities.py b/backend/courses/management/commands/recompute_probabilities.py new file mode 100644 index 000000000..228eedd98 --- /dev/null +++ b/backend/courses/management/commands/recompute_probabilities.py @@ -0,0 +1,17 @@ +def recompute_historical_probabilities(current_semester, verbose=False): + """ + Recomputes the historical probabilities for all topics. + """ + if verbose: + print("Recomputing historical probabilities for all topics...") + topics = Topic.objects.all() + # Iterate over each Topic + for topic in topics: + # Calculate historical_year_probability for the current topic + test = topic.courses.order_by("semester").all() + historical_prob = historical_year_probability(current_semester, test) + # Update the historical_probabilities field for the current topic + topic.historical_probabilities_spring = historical_prob[0] + topic.historical_probabilities_summer = historical_prob[1] + topic.historical_probabilities_fall = historical_prob[0] + topic.save() \ No newline at end of file diff --git a/backend/courses/management/commands/recompute_topics.py b/backend/courses/management/commands/recompute_topics.py index a6a35eb45..b3caf715e 100644 --- a/backend/courses/management/commands/recompute_topics.py +++ b/backend/courses/management/commands/recompute_topics.py @@ -145,15 +145,16 @@ def handle(self, *args, **kwargs): "If an error is encountered, all changes for that semester will be rolled back. " "Any changes made to previous semesters will persist." ) - + min_semester = kwargs["min_semester"] if min_semester: assert ( min_semester in all_semesters() ), f"--min-semester={min_semester} is not a valid semester." - + semesters = sorted([sem for sem in all_semesters() if not min_semester or sem >= min_semester]) recompute_topics(min_semester, verbose=True, allow_null_parent_topic=bool(min_semester)) - + if semesters: + recompute_historical_probabilities(current_semester=semesters[-1], verbose=True) def recompute_historical_probabilities(current_semester, verbose=False): """ @@ -162,13 +163,16 @@ def recompute_historical_probabilities(current_semester, verbose=False): if verbose: print("Recomputing historical probabilities for all topics...") topics = Topic.objects.all() + length = len(topics) # Iterate over each Topic - for topic in topics: + for i, topic in enumerate(topics): + if i % 1000 == 0: + print(f"Recomputing topics for semesters >={i}/{length}") # Calculate historical_year_probability for the current topic test = topic.courses.order_by("semester").all() historical_prob = historical_year_probability(current_semester, test) # Update the historical_probabilities field for the current topic topic.historical_probabilities_spring = historical_prob[0] topic.historical_probabilities_summer = historical_prob[1] - topic.historical_probabilities_fall = historical_prob[0] + topic.historical_probabilities_fall = historical_prob[2] topic.save() diff --git a/backend/courses/util.py b/backend/courses/util.py index 5c4b668d0..b8fed3f88 100644 --- a/backend/courses/util.py +++ b/backend/courses/util.py @@ -715,6 +715,11 @@ def get_semesters(semesters: str = None) -> list[str]: def historical_year_probability(current, courses): + """ + current: the current semester represented in the 20XX(A|B|C) format + courses: a list of Course objects sorted by date in ascending order + Returns a list of 3 probabilities representing the likelihood of taking a course in each semester + """ prob_distribution = [0.4, 0.3, 0.15, 0.1, 0.05] def normalize_and_round(prob, i): @@ -736,33 +741,26 @@ def get_semester_and_course_index(semester): semester_number = 3 semester_year = int(semester[:-1]) return 10 * semester_year + semester_number - - current_index = get_semester_and_course_index(current) - min_index = current_index - 50 + + current_index = int(translate_semester(current)) // 10 + min_index = current_index - 60 max_index = current_index - 10 + p = [0, 0, 0] if courses == []: - return [0, 0, 0] + return p else: - last_index = get_semester_and_course_index(courses[0].semester) + last_index = int(translate_semester(courses[0].semester)) // 10 if last_index > min_index: prob_distribution = normalize_and_round( prob_distribution, ((current_index - last_index) + 9) // 10 ) - p_A = 0 - p_B = 0 - p_C = 0 for c in courses: """Provides calculation""" - index = get_semester_and_course_index(c.semester) + index = int(translate_semester(c.semester)) // 10 if index < min_index or index > max_index: continue diff = (current_index - index) // 10 - 1 if diff >= len(prob_distribution): diff = len(prob_distribution) - 1 - if index % 10 == 1: - p_A += prob_distribution[diff] - elif index % 10 == 2: - p_B += prob_distribution[diff] - elif index % 10 == 3: - p_C += prob_distribution[diff] - return [min(round(p_A, 2), 1.00), min(round(p_B, 2), 1.00), min(round(p_C, 2), 1.00)] + p[index % 10 - 1] += prob_distribution[diff] + return list(map(lambda x: min(round(x,2), 1.00) , p)) From 7a156cfbaaf13a029dd09cb4d526b5df7e56a875 Mon Sep 17 00:00:00 2001 From: Daniel Zhao Date: Tue, 12 Mar 2024 23:09:36 -0400 Subject: [PATCH 08/16] cleanup and deleted a file --- .../commands/recompute_probabilities.py | 17 ----------------- .../management/commands/recompute_topics.py | 7 +++++-- backend/courses/util.py | 4 ++-- 3 files changed, 7 insertions(+), 21 deletions(-) delete mode 100644 backend/courses/management/commands/recompute_probabilities.py diff --git a/backend/courses/management/commands/recompute_probabilities.py b/backend/courses/management/commands/recompute_probabilities.py deleted file mode 100644 index 228eedd98..000000000 --- a/backend/courses/management/commands/recompute_probabilities.py +++ /dev/null @@ -1,17 +0,0 @@ -def recompute_historical_probabilities(current_semester, verbose=False): - """ - Recomputes the historical probabilities for all topics. - """ - if verbose: - print("Recomputing historical probabilities for all topics...") - topics = Topic.objects.all() - # Iterate over each Topic - for topic in topics: - # Calculate historical_year_probability for the current topic - test = topic.courses.order_by("semester").all() - historical_prob = historical_year_probability(current_semester, test) - # Update the historical_probabilities field for the current topic - topic.historical_probabilities_spring = historical_prob[0] - topic.historical_probabilities_summer = historical_prob[1] - topic.historical_probabilities_fall = historical_prob[0] - topic.save() \ No newline at end of file diff --git a/backend/courses/management/commands/recompute_topics.py b/backend/courses/management/commands/recompute_topics.py index b3caf715e..43c8c5eb7 100644 --- a/backend/courses/management/commands/recompute_topics.py +++ b/backend/courses/management/commands/recompute_topics.py @@ -145,17 +145,20 @@ def handle(self, *args, **kwargs): "If an error is encountered, all changes for that semester will be rolled back. " "Any changes made to previous semesters will persist." ) - + min_semester = kwargs["min_semester"] if min_semester: assert ( min_semester in all_semesters() ), f"--min-semester={min_semester} is not a valid semester." - semesters = sorted([sem for sem in all_semesters() if not min_semester or sem >= min_semester]) + semesters = sorted( + [sem for sem in all_semesters() if not min_semester or sem >= min_semester] + ) recompute_topics(min_semester, verbose=True, allow_null_parent_topic=bool(min_semester)) if semesters: recompute_historical_probabilities(current_semester=semesters[-1], verbose=True) + def recompute_historical_probabilities(current_semester, verbose=False): """ Recomputes the historical probabilities for all topics. diff --git a/backend/courses/util.py b/backend/courses/util.py index b8fed3f88..180731c25 100644 --- a/backend/courses/util.py +++ b/backend/courses/util.py @@ -741,7 +741,7 @@ def get_semester_and_course_index(semester): semester_number = 3 semester_year = int(semester[:-1]) return 10 * semester_year + semester_number - + current_index = int(translate_semester(current)) // 10 min_index = current_index - 60 max_index = current_index - 10 @@ -763,4 +763,4 @@ def get_semester_and_course_index(semester): if diff >= len(prob_distribution): diff = len(prob_distribution) - 1 p[index % 10 - 1] += prob_distribution[diff] - return list(map(lambda x: min(round(x,2), 1.00) , p)) + return list(map(lambda x: min(round(x, 2), 1.00), p)) From 3006fdf405436f36b51556e6f6ae1f83e8c4bdcd Mon Sep 17 00:00:00 2001 From: Daniel Zhao Date: Tue, 12 Mar 2024 23:18:49 -0400 Subject: [PATCH 09/16] style and linting --- backend/courses/util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/courses/util.py b/backend/courses/util.py index 180731c25..5c8dcd5d0 100644 --- a/backend/courses/util.py +++ b/backend/courses/util.py @@ -718,7 +718,8 @@ def historical_year_probability(current, courses): """ current: the current semester represented in the 20XX(A|B|C) format courses: a list of Course objects sorted by date in ascending order - Returns a list of 3 probabilities representing the likelihood of taking a course in each semester + Returns a list of 3 probabilities representing the likelihood of taking a course in each + semester """ prob_distribution = [0.4, 0.3, 0.15, 0.1, 0.05] From 0b26cd5bfd459d9beffa2b405dfe0c49200ddfc1 Mon Sep 17 00:00:00 2001 From: Daniel Zhao Date: Sun, 24 Mar 2024 12:47:37 -0400 Subject: [PATCH 10/16] Deleted unused functions --- .../management/commands/recompute_topics.py | 10 +++---- backend/courses/util.py | 27 ++++++------------- 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/backend/courses/management/commands/recompute_topics.py b/backend/courses/management/commands/recompute_topics.py index 43c8c5eb7..44f16c84f 100644 --- a/backend/courses/management/commands/recompute_topics.py +++ b/backend/courses/management/commands/recompute_topics.py @@ -3,7 +3,7 @@ from django.db.models import Count, OuterRef, Subquery from courses.models import Course, Topic -from courses.util import all_semesters, historical_year_probability +from courses.util import all_semesters, historical_semester_probability def garbage_collect_topics(): @@ -156,10 +156,10 @@ def handle(self, *args, **kwargs): ) recompute_topics(min_semester, verbose=True, allow_null_parent_topic=bool(min_semester)) if semesters: - recompute_historical_probabilities(current_semester=semesters[-1], verbose=True) + recompute_historical_semester_probabilities(current_semester=semesters[-1], verbose=True) -def recompute_historical_probabilities(current_semester, verbose=False): +def recompute_historical_semester_probabilities(current_semester, verbose=False): """ Recomputes the historical probabilities for all topics. """ @@ -172,8 +172,8 @@ def recompute_historical_probabilities(current_semester, verbose=False): if i % 1000 == 0: print(f"Recomputing topics for semesters >={i}/{length}") # Calculate historical_year_probability for the current topic - test = topic.courses.order_by("semester").all() - historical_prob = historical_year_probability(current_semester, test) + ordered_courses = topic.courses.order_by("semester").all() + historical_prob = historical_semester_probability(current_semester, ordered_courses) # Update the historical_probabilities field for the current topic topic.historical_probabilities_spring = historical_prob[0] topic.historical_probabilities_summer = historical_prob[1] diff --git a/backend/courses/util.py b/backend/courses/util.py index 5c8dcd5d0..2fcc95ade 100644 --- a/backend/courses/util.py +++ b/backend/courses/util.py @@ -714,12 +714,14 @@ def get_semesters(semesters: str = None) -> list[str]: return sorted(semesters) -def historical_year_probability(current, courses): +def historical_semester_probability(current, courses): """ - current: the current semester represented in the 20XX(A|B|C) format - courses: a list of Course objects sorted by date in ascending order - Returns a list of 3 probabilities representing the likelihood of taking a course in each - semester + :param current: The current semester represented in the 20XX(A|B|C) format. + :type current: str + :param courses: A list of Course objects sorted by date in ascending order. + :type courses: list + :returns: A list of 3 probabilities representing the likelihood of taking a course in each semester. + :rtype: list """ prob_distribution = [0.4, 0.3, 0.15, 0.1, 0.05] @@ -730,19 +732,6 @@ def normalize_and_round(prob, i): total = sum(truncate) return list(map(lambda x: round(x / total, 3), truncate)) - def get_semester_and_course_index(semester): - """Returns an integer representing the semester.""" - semester_letter = semester[-1] - semester_number = 0 - if semester_letter == "A": - semester_number = 1 - elif semester_letter == "B": - semester_number = 2 - elif semester_letter == "C": - semester_number = 3 - semester_year = int(semester[:-1]) - return 10 * semester_year + semester_number - current_index = int(translate_semester(current)) // 10 min_index = current_index - 60 max_index = current_index - 10 @@ -756,10 +745,10 @@ def get_semester_and_course_index(semester): prob_distribution, ((current_index - last_index) + 9) // 10 ) for c in courses: - """Provides calculation""" index = int(translate_semester(c.semester)) // 10 if index < min_index or index > max_index: continue + # Diff is the number of years ago the course was taken diff = (current_index - index) // 10 - 1 if diff >= len(prob_distribution): diff = len(prob_distribution) - 1 From 8eaf0cdeddfd9e9b235db149964b5be8f680c2cc Mon Sep 17 00:00:00 2001 From: Daniel Zhao Date: Sun, 24 Mar 2024 12:57:04 -0400 Subject: [PATCH 11/16] Add order_by to the end --- backend/courses/management/commands/recompute_topics.py | 7 +++++-- backend/courses/util.py | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/backend/courses/management/commands/recompute_topics.py b/backend/courses/management/commands/recompute_topics.py index 44f16c84f..5bfa9b645 100644 --- a/backend/courses/management/commands/recompute_topics.py +++ b/backend/courses/management/commands/recompute_topics.py @@ -156,7 +156,10 @@ def handle(self, *args, **kwargs): ) recompute_topics(min_semester, verbose=True, allow_null_parent_topic=bool(min_semester)) if semesters: - recompute_historical_semester_probabilities(current_semester=semesters[-1], verbose=True) + recompute_historical_semester_probabilities( + current_semester=semesters[-1], + verbose=True + ) def recompute_historical_semester_probabilities(current_semester, verbose=False): @@ -172,7 +175,7 @@ def recompute_historical_semester_probabilities(current_semester, verbose=False) if i % 1000 == 0: print(f"Recomputing topics for semesters >={i}/{length}") # Calculate historical_year_probability for the current topic - ordered_courses = topic.courses.order_by("semester").all() + ordered_courses = topic.courses.all().order_by("semester") historical_prob = historical_semester_probability(current_semester, ordered_courses) # Update the historical_probabilities field for the current topic topic.historical_probabilities_spring = historical_prob[0] diff --git a/backend/courses/util.py b/backend/courses/util.py index 2fcc95ade..66b994029 100644 --- a/backend/courses/util.py +++ b/backend/courses/util.py @@ -720,7 +720,8 @@ def historical_semester_probability(current, courses): :type current: str :param courses: A list of Course objects sorted by date in ascending order. :type courses: list - :returns: A list of 3 probabilities representing the likelihood of taking a course in each semester. + :returns: A list of 3 probabilities representing the likelihood of + taking a course in each semester. :rtype: list """ prob_distribution = [0.4, 0.3, 0.15, 0.1, 0.05] From 7dd798178a96f62b2f8931f2ffcf3c946548b239 Mon Sep 17 00:00:00 2001 From: Daniel Zhao Date: Sun, 24 Mar 2024 13:03:03 -0400 Subject: [PATCH 12/16] Fixed styling and linting --- backend/courses/management/commands/recompute_topics.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/courses/management/commands/recompute_topics.py b/backend/courses/management/commands/recompute_topics.py index 5bfa9b645..9eb036805 100644 --- a/backend/courses/management/commands/recompute_topics.py +++ b/backend/courses/management/commands/recompute_topics.py @@ -157,8 +157,7 @@ def handle(self, *args, **kwargs): recompute_topics(min_semester, verbose=True, allow_null_parent_topic=bool(min_semester)) if semesters: recompute_historical_semester_probabilities( - current_semester=semesters[-1], - verbose=True + current_semester=semesters[-1], verbose=True ) From 6ef7189a06a27ab95a5ba883d9128676d3f35add Mon Sep 17 00:00:00 2001 From: Daniel Zhao Date: Sun, 24 Mar 2024 13:41:41 -0400 Subject: [PATCH 13/16] Added tqdm --- backend/courses/management/commands/recompute_topics.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/backend/courses/management/commands/recompute_topics.py b/backend/courses/management/commands/recompute_topics.py index 9eb036805..ae1bcf82d 100644 --- a/backend/courses/management/commands/recompute_topics.py +++ b/backend/courses/management/commands/recompute_topics.py @@ -1,6 +1,7 @@ from django.core.management.base import BaseCommand from django.db import transaction from django.db.models import Count, OuterRef, Subquery +from tqdm import tqdm from courses.models import Course, Topic from courses.util import all_semesters, historical_semester_probability @@ -168,11 +169,8 @@ def recompute_historical_semester_probabilities(current_semester, verbose=False) if verbose: print("Recomputing historical probabilities for all topics...") topics = Topic.objects.all() - length = len(topics) # Iterate over each Topic - for i, topic in enumerate(topics): - if i % 1000 == 0: - print(f"Recomputing topics for semesters >={i}/{length}") + for i, topic in tqdm(enumerate(topics)): # Calculate historical_year_probability for the current topic ordered_courses = topic.courses.all().order_by("semester") historical_prob = historical_semester_probability(current_semester, ordered_courses) From 7620241583d5a594a610d94a7fe012a21e2b7a2a Mon Sep 17 00:00:00 2001 From: Daniel Zhao Date: Mon, 8 Apr 2024 03:16:02 -0400 Subject: [PATCH 14/16] reformat historical probabilities util fn --- .../management/commands/recompute_topics.py | 5 +- backend/courses/util.py | 57 +++++++++++-------- 2 files changed, 35 insertions(+), 27 deletions(-) diff --git a/backend/courses/management/commands/recompute_topics.py b/backend/courses/management/commands/recompute_topics.py index ae1bcf82d..53e8bb166 100644 --- a/backend/courses/management/commands/recompute_topics.py +++ b/backend/courses/management/commands/recompute_topics.py @@ -170,10 +170,11 @@ def recompute_historical_semester_probabilities(current_semester, verbose=False) print("Recomputing historical probabilities for all topics...") topics = Topic.objects.all() # Iterate over each Topic - for i, topic in tqdm(enumerate(topics)): + for i, topic in tqdm(enumerate(topics), disable=not verbose, total=topics.count()): # Calculate historical_year_probability for the current topic ordered_courses = topic.courses.all().order_by("semester") - historical_prob = historical_semester_probability(current_semester, ordered_courses) + ordered_semester = [course.semester for course in ordered_courses] + historical_prob = historical_semester_probability(current_semester, ordered_semester) # Update the historical_probabilities field for the current topic topic.historical_probabilities_spring = historical_prob[0] topic.historical_probabilities_summer = historical_prob[1] diff --git a/backend/courses/util.py b/backend/courses/util.py index 66b994029..586cac58f 100644 --- a/backend/courses/util.py +++ b/backend/courses/util.py @@ -714,7 +714,7 @@ def get_semesters(semesters: str = None) -> list[str]: return sorted(semesters) -def historical_semester_probability(current, courses): +def historical_semester_probability(current_semester: str, semesters: list[str]): """ :param current: The current semester represented in the 20XX(A|B|C) format. :type current: str @@ -724,34 +724,41 @@ def historical_semester_probability(current, courses): taking a course in each semester. :rtype: list """ - prob_distribution = [0.4, 0.3, 0.15, 0.1, 0.05] + PROB_DISTRIBUTION = [0.4, 0.3, 0.15, 0.1, 0.05] def normalize_and_round(prob, i): """Modifies the probability distribution to account for the - fact that the last course was taken i semesters ago.""" - truncate = prob_distribution[:i] + fact that the last course was taken i years ago.""" + truncate = PROB_DISTRIBUTION[:i] total = sum(truncate) return list(map(lambda x: round(x / total, 3), truncate)) - current_index = int(translate_semester(current)) // 10 - min_index = current_index - 60 - max_index = current_index - 10 - p = [0, 0, 0] - if courses == []: - return p + semester_probabilities = {"A": 0.0, "B": 0.0, "C": 0.0} + current_year = int(current_semester[:-1]) + semesters = [ + semester + for semester in semesters + if semester < str(current_year) and semester > str(current_year - 5) + ] + if not semesters: + return [0, 0, 0] + if current_year - int(semesters[0][:-1]) < 5: + # If the class hasn't been offered in the last 5 years, + # we make sure the resulting probabilities sum to 1 + modified_prob_distribution = normalize_and_round( + PROB_DISTRIBUTION, current_year - int(semesters[0][:-1]) + ) else: - last_index = int(translate_semester(courses[0].semester)) // 10 - if last_index > min_index: - prob_distribution = normalize_and_round( - prob_distribution, ((current_index - last_index) + 9) // 10 - ) - for c in courses: - index = int(translate_semester(c.semester)) // 10 - if index < min_index or index > max_index: - continue - # Diff is the number of years ago the course was taken - diff = (current_index - index) // 10 - 1 - if diff >= len(prob_distribution): - diff = len(prob_distribution) - 1 - p[index % 10 - 1] += prob_distribution[diff] - return list(map(lambda x: min(round(x, 2), 1.00), p)) + modified_prob_distribution = PROB_DISTRIBUTION + for historical_semester in semesters: + historical_year = int(historical_semester[:-1]) + sem_char = historical_semester[-1].upper() # A, B, C + semester_probabilities[sem_char] += modified_prob_distribution[ + current_year - historical_year - 1 + ] + return list( + map( + lambda x: min(round(x, 2), 1.00), + [semester_probabilities["A"], semester_probabilities["B"], semester_probabilities["C"]], + ) + ) From e3e8ad8601b73c614ead49852708875bcbee6b33 Mon Sep 17 00:00:00 2001 From: Daniel Zhao Date: Mon, 8 Apr 2024 03:24:25 -0400 Subject: [PATCH 15/16] deleted conditional in recompute_topics --- backend/courses/management/commands/recompute_topics.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/backend/courses/management/commands/recompute_topics.py b/backend/courses/management/commands/recompute_topics.py index 53e8bb166..3d6c8b3a9 100644 --- a/backend/courses/management/commands/recompute_topics.py +++ b/backend/courses/management/commands/recompute_topics.py @@ -156,10 +156,8 @@ def handle(self, *args, **kwargs): [sem for sem in all_semesters() if not min_semester or sem >= min_semester] ) recompute_topics(min_semester, verbose=True, allow_null_parent_topic=bool(min_semester)) - if semesters: - recompute_historical_semester_probabilities( - current_semester=semesters[-1], verbose=True - ) + recompute_historical_semester_probabilities(current_semester=semesters[-1], verbose=True) + def recompute_historical_semester_probabilities(current_semester, verbose=False): From 2ba26d292592f0c7cc838215bd8f9fe72ba4fd8f Mon Sep 17 00:00:00 2001 From: Daniel Zhao Date: Mon, 8 Apr 2024 03:29:31 -0400 Subject: [PATCH 16/16] lint --- backend/courses/management/commands/recompute_topics.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/courses/management/commands/recompute_topics.py b/backend/courses/management/commands/recompute_topics.py index 3d6c8b3a9..c56b3094c 100644 --- a/backend/courses/management/commands/recompute_topics.py +++ b/backend/courses/management/commands/recompute_topics.py @@ -157,7 +157,6 @@ def handle(self, *args, **kwargs): ) recompute_topics(min_semester, verbose=True, allow_null_parent_topic=bool(min_semester)) recompute_historical_semester_probabilities(current_semester=semesters[-1], verbose=True) - def recompute_historical_semester_probabilities(current_semester, verbose=False):