Skip to content

Commit

Permalink
Merge pull request pennlabs#597 from pennlabs/pcx-history-probabilities
Browse files Browse the repository at this point in the history
PCX history probabilities for PDP usage
  • Loading branch information
el-agua authored Apr 10, 2024
2 parents 0d78349 + 2ba26d2 commit 98f39b8
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 2 deletions.
28 changes: 26 additions & 2 deletions backend/courses/management/commands/recompute_topics.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from django.core.management.base import BaseCommand
from django.db import transaction
from django.db.models import Count, OuterRef, Subquery
from tqdm import tqdm

from courses.models import Course, Topic
from courses.util import all_semesters
from courses.util import all_semesters, historical_semester_probability


def garbage_collect_topics():
Expand Down Expand Up @@ -151,5 +152,28 @@ def handle(self, *args, **kwargs):
assert (
min_semester in all_semesters()
), f"--min-semester={min_semester} is not a valid semester."

semesters = sorted(
[sem for sem in all_semesters() if not min_semester or sem >= min_semester]
)
recompute_topics(min_semester, verbose=True, allow_null_parent_topic=bool(min_semester))
recompute_historical_semester_probabilities(current_semester=semesters[-1], verbose=True)


def recompute_historical_semester_probabilities(current_semester, verbose=False):
"""
Recomputes the historical probabilities for all topics.
"""
if verbose:
print("Recomputing historical probabilities for all topics...")
topics = Topic.objects.all()
# Iterate over each Topic
for i, topic in tqdm(enumerate(topics), disable=not verbose, total=topics.count()):
# Calculate historical_year_probability for the current topic
ordered_courses = topic.courses.all().order_by("semester")
ordered_semester = [course.semester for course in ordered_courses]
historical_prob = historical_semester_probability(current_semester, ordered_semester)
# Update the historical_probabilities field for the current topic
topic.historical_probabilities_spring = historical_prob[0]
topic.historical_probabilities_summer = historical_prob[1]
topic.historical_probabilities_fall = historical_prob[2]
topic.save()
66 changes: 66 additions & 0 deletions backend/courses/migrations/0064_auto_20240225_1331.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Generated by Django 3.2.23 on 2024-02-25 18:31

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("courses", "0063_auto_20231212_1750"),
]

operations = [
migrations.AddField(
model_name="topic",
name="historical_probabilities_fall",
field=models.FloatField(
default=0,
help_text="\nThe historical probability of a student taking a course in this topic in the fall\nsemester, based on historical data. This field is recomputed nightly from the\n`parent_course` graph (in the recompute_soft_state cron job).\n",
),
),
migrations.AddField(
model_name="topic",
name="historical_probabilities_spring",
field=models.FloatField(
default=0,
help_text="\nThe historical probability of a student taking a course in this topic in the spring\nsemester, based on historical data. This field is recomputed nightly from the\n`parent_course` graph (in the recompute_soft_state cron job).\n",
),
),
migrations.AddField(
model_name="topic",
name="historical_probabilities_summer",
field=models.FloatField(
default=0,
help_text="\nThe historical probability of a student taking a course in this topic in the summer\nsemester, based on historical data. This field is recomputed nightly from the\n`parent_course` graph (in the recompute_soft_state cron job).\n",
),
),
migrations.AlterField(
model_name="section",
name="activity",
field=models.CharField(
choices=[
("", "Undefined"),
("CLN", "Clinic"),
("CRT", "Clinical Rotation"),
("DAB", "Dissertation Abroad"),
("DIS", "Dissertation"),
("DPC", "Doctoral Program Exchange"),
("FLD", "Field Work"),
("HYB", "Hybrid"),
("IND", "Independent Study"),
("LAB", "Lab"),
("LEC", "Lecture"),
("MST", "Masters Thesis"),
("ONL", "Online"),
("PRC", "Practicum"),
("REC", "Recitation"),
("SEM", "Seminar"),
("SRT", "Senior Thesis"),
("STU", "Studio"),
],
db_index=True,
help_text='The section activity, e.g. `LEC` for CIS-120-001 (2020A). Options and meanings: <table width=100%><tr><td>""</td><td>"Undefined"</td></tr><tr><td>"CLN"</td><td>"Clinic"</td></tr><tr><td>"CRT"</td><td>"Clinical Rotation"</td></tr><tr><td>"DAB"</td><td>"Dissertation Abroad"</td></tr><tr><td>"DIS"</td><td>"Dissertation"</td></tr><tr><td>"DPC"</td><td>"Doctoral Program Exchange"</td></tr><tr><td>"FLD"</td><td>"Field Work"</td></tr><tr><td>"HYB"</td><td>"Hybrid"</td></tr><tr><td>"IND"</td><td>"Independent Study"</td></tr><tr><td>"LAB"</td><td>"Lab"</td></tr><tr><td>"LEC"</td><td>"Lecture"</td></tr><tr><td>"MST"</td><td>"Masters Thesis"</td></tr><tr><td>"ONL"</td><td>"Online"</td></tr><tr><td>"PRC"</td><td>"Practicum"</td></tr><tr><td>"REC"</td><td>"Recitation"</td></tr><tr><td>"SEM"</td><td>"Seminar"</td></tr><tr><td>"SRT"</td><td>"Senior Thesis"</td></tr><tr><td>"STU"</td><td>"Studio"</td></tr></table>',
max_length=50,
),
),
]
30 changes: 30 additions & 0 deletions backend/courses/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,36 @@ class Topic(models.Model):
),
)

historical_probabilities_spring = models.FloatField(
default=0,
help_text=dedent(
"""
The historical probability of a student taking a course in this topic in the spring
semester, based on historical data. This field is recomputed nightly from the
`parent_course` graph (in the recompute_soft_state cron job).
"""
),
)
historical_probabilities_summer = models.FloatField(
default=0,
help_text=dedent(
"""
The historical probability of a student taking a course in this topic in the summer
semester, based on historical data. This field is recomputed nightly from the
`parent_course` graph (in the recompute_soft_state cron job).
"""
),
)
historical_probabilities_fall = models.FloatField(
default=0,
help_text=dedent(
"""
The historical probability of a student taking a course in this topic in the fall
semester, based on historical data. This field is recomputed nightly from the
`parent_course` graph (in the recompute_soft_state cron job).
"""
),
)
branched_from = models.ForeignKey(
"Topic",
related_name="branched_to",
Expand Down
50 changes: 50 additions & 0 deletions backend/courses/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,3 +721,53 @@ def get_semesters(semesters: str = None) -> list[str]:
if s not in possible_semesters:
raise ValueError(f"Provided semester {s} was not found in the db.")
return sorted(semesters)


def historical_semester_probability(current_semester: str, semesters: list[str]):
"""
:param current: The current semester represented in the 20XX(A|B|C) format.
:type current: str
:param courses: A list of Course objects sorted by date in ascending order.
:type courses: list
:returns: A list of 3 probabilities representing the likelihood of
taking a course in each semester.
:rtype: list
"""
PROB_DISTRIBUTION = [0.4, 0.3, 0.15, 0.1, 0.05]

def normalize_and_round(prob, i):
"""Modifies the probability distribution to account for the
fact that the last course was taken i years ago."""
truncate = PROB_DISTRIBUTION[:i]
total = sum(truncate)
return list(map(lambda x: round(x / total, 3), truncate))

semester_probabilities = {"A": 0.0, "B": 0.0, "C": 0.0}
current_year = int(current_semester[:-1])
semesters = [
semester
for semester in semesters
if semester < str(current_year) and semester > str(current_year - 5)
]
if not semesters:
return [0, 0, 0]
if current_year - int(semesters[0][:-1]) < 5:
# If the class hasn't been offered in the last 5 years,
# we make sure the resulting probabilities sum to 1
modified_prob_distribution = normalize_and_round(
PROB_DISTRIBUTION, current_year - int(semesters[0][:-1])
)
else:
modified_prob_distribution = PROB_DISTRIBUTION
for historical_semester in semesters:
historical_year = int(historical_semester[:-1])
sem_char = historical_semester[-1].upper() # A, B, C
semester_probabilities[sem_char] += modified_prob_distribution[
current_year - historical_year - 1
]
return list(
map(
lambda x: min(round(x, 2), 1.00),
[semester_probabilities["A"], semester_probabilities["B"], semester_probabilities["C"]],
)
)

0 comments on commit 98f39b8

Please sign in to comment.