Skip to content

Commit

Permalink
Completed Newbie Project
Browse files Browse the repository at this point in the history
Implemented document file upload, parsing, text extraction, and vector upload to Pinecone with metadata. Also implemented vector retrieval from Pinecone.
  • Loading branch information
ethanwang-04 committed Oct 27, 2023
1 parent eb01c8e commit 32c3566
Show file tree
Hide file tree
Showing 13 changed files with 1,816 additions and 954 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ frontend/yarn-error.log
__pycache__/
*.pyc

.env

# Distribution
build/
dist/
Expand Down
11 changes: 11 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"

[packages]

[dev-packages]

[requires]
python_version = "3.11"
20 changes: 20 additions & 0 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion backend/Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ dj-database-url = "*"
djangorestframework = "*"
psycopg2 = "*"
sentry-sdk = "*"
django = "==3.1.7"
django = "==3.2.4"
django-cors-headers = "*"
pyyaml = "*"
uritemplate = "*"
Expand All @@ -43,6 +43,7 @@ gunicorn = "*"
django-scheduler = "*"
typing-extensions = "*"
drf-excel = "*"
langchain = "*"

[requires]
python_version = "3"
2,266 changes: 1,314 additions & 952 deletions backend/Pipfile.lock

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions backend/ohq/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
Announcement,
Course,
CourseStatistic,
Document,
Membership,
MembershipInvite,
Profile,
Expand All @@ -12,11 +13,13 @@
QueueStatistic,
Semester,
Tag,
VectorDB,
)


admin.site.register(Course)
admin.site.register(CourseStatistic)
admin.site.register(Document)
admin.site.register(Membership)
admin.site.register(MembershipInvite)
admin.site.register(Profile)
Expand All @@ -26,3 +29,4 @@
admin.site.register(QueueStatistic)
admin.site.register(Announcement)
admin.site.register(Tag)
admin.site.register(VectorDB)
40 changes: 40 additions & 0 deletions backend/ohq/migrations/0020_auto_20231008_1655.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Generated by Django 3.2.4 on 2023-10-08 16:55

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('ohq', '0019_auto_20211114_1800'),
]

operations = [
migrations.CreateModel(
name='VectorDB',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=255)),
('time_updated', models.DateTimeField(auto_now=True)),
('top_k', models.IntegerField(blank=True, null=True)),
('course', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='ohq.course')),
],
),
migrations.CreateModel(
name='Document',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=255)),
('vector_db', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='ohq.vectordb')),
],
),
migrations.AddConstraint(
model_name='vectordb',
constraint=models.UniqueConstraint(fields=('name', 'course'), name='unique_VectorDB'),
),
migrations.AddConstraint(
model_name='document',
constraint=models.UniqueConstraint(fields=('name',), name='unique_document'),
),
]
21 changes: 21 additions & 0 deletions backend/ohq/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,3 +411,24 @@ class Announcement(models.Model):
author = models.ForeignKey(User, related_name="announcements", on_delete=models.CASCADE)
time_updated = models.DateTimeField(auto_now=True)
course = models.ForeignKey(Course, related_name="announcements", on_delete=models.CASCADE)


class VectorDB(models.Model):

name = models.CharField(max_length=255)
course = models.ForeignKey(Course, on_delete=models.CASCADE)
time_updated = models.DateTimeField(auto_now=True)
top_k = models.IntegerField(blank=True, null=True)

class Meta:
constraints = [models.UniqueConstraint(fields=["name", "course"], name="unique_VectorDB")]

class Document(models.Model):
name = models.CharField(max_length=255)
vector_db = models.ForeignKey(VectorDB, on_delete=models.CASCADE)

class Meta:
constraints = [models.UniqueConstraint(fields=["name"], name="unique_document")]

def __str__(self):
return f"{self.course}: {self.name}"
104 changes: 104 additions & 0 deletions backend/ohq/permissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,3 +503,107 @@ def has_permission(self, request, view):
return True

return True

class DocumentCreatePermission(permissions.BasePermission):
def has_permission(self, request, view):
if not request.user.is_authenticated:
return False

membership = Membership.objects.filter(
course=view.kwargs["course_pk"], user=request.user
).first()

# Non-Students can't do anything
if membership is None:
return False

if view.action == "retrieve":
return membership.is_ta

# Head TAs+ can make changes
if view.action in ["retrieve", "create", "destroy", "update", "partial_update"]:
return membership.is_leadership

class DocumentPermission(permissions.BasePermission):
def has_object_permission(self, request, view, obj):

membership = Membership.objects.get(course=view.kwargs["course_pk"], user=request.user)

if view.action == "retrieve":
return membership.is_ta

if view.action in ["retrive", "destroy", "partial_update", "update"]:
return membership.is_leadership

return False

def has_permission(self, request, view):
if not request.user.is_authenticated:
return False

membership = Membership.objects.filter(
course=view.kwargs["course_pk"], user=request.user
).first()

# Non-Students can't do anything
if membership is None:
return False

if view.action == "retrieve":
return membership.is_ta

# Head TAs+ can make changes
if view.action in ["retrieve", "create", "destroy", "update", "partial_update"]:
return membership.is_leadership

class VectorSearchPermission(permissions.BasePermission):
def has_permission(self, request, view):
if not request.user.is_authenticated:
return False

membership = Membership.objects.filter(
course=view.kwargs["course_pk"], user=request.user
).first()

# Non-Students can't do anything
if membership is None:
return False

if view.action == "retrieve":
return membership.is_ta

# Head TAs+ can make changes
if view.action in ["retrieve", "create", "destroy", "update", "partial_update"]:
return membership.is_leadership

class VectorDBPermission(permissions.BasePermission):
def has_object_permission(self, request, view, obj):

membership = Membership.objects.get(course=view.kwargs["course_pk"], user=request.user)

if view.action == "retrieve":
return membership.is_ta

if view.action in ["retrieve", "destroy", "partial_update", "update"]:
return membership.is_leadership

return False

def has_permission(self, request, view):
if not request.user.is_authenticated:
return False

membership = Membership.objects.filter(
course=view.kwargs["course_pk"], user=request.user
).first()

# Non-Students can't do anything
if membership is None:
return False

if view.action == "retrieve":
return membership.is_ta

# Head TAs+ can make changes
if view.action in ["retrieve", "create", "destroy", "update", "partial_update"]:
return membership.is_leadership
12 changes: 12 additions & 0 deletions backend/ohq/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
Announcement,
Course,
CourseStatistic,
Document,
Membership,
MembershipInvite,
Profile,
Expand All @@ -22,6 +23,7 @@
QueueStatistic,
Semester,
Tag,
VectorDB,
)
from ohq.sms import sendSMSVerification
from ohq.tasks import sendUpNextNotificationTask
Expand Down Expand Up @@ -574,3 +576,13 @@ class OccurrenceSerializer(serializers.ModelSerializer):
class Meta:
model = Occurrence
fields = ("id", "title", "description", "start", "end", "cancelled", "event")

class DocumentSerializer(serializers.ModelSerializer):
class Meta:
model = Document
fields = ("id", "name", "vector_db")

class VectorDBSerializer(serializers.ModelSerializer):
class Meta:
model = VectorDB
fields = ("id", "name", "course", "time_updated", "top_k")
20 changes: 19 additions & 1 deletion backend/ohq/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
AnnouncementViewSet,
CourseStatisticView,
CourseViewSet,
DocumentViewSet,
DocumentCreateView,
EventViewSet,
MassInviteView,
MembershipInviteViewSet,
Expand All @@ -19,6 +21,8 @@
SemesterViewSet,
TagViewSet,
UserView,
VectorDBViewSet,
VectorSearchView,
)


Expand All @@ -36,10 +40,14 @@
course_router.register("invites", MembershipInviteViewSet, basename="invite")
course_router.register("announcements", AnnouncementViewSet, basename="announcement")
course_router.register("tags", TagViewSet, basename="tag")
course_router.register("vector_dbs", VectorDBViewSet, basename="vector_dbs")

queue_router = routers.NestedSimpleRouter(course_router, "queues", lookup="queue")
queue_router.register("questions", QuestionViewSet, basename="question")

vector_db_router = routers.NestedSimpleRouter(course_router, "vector_dbs", lookup="vector_dbs")
vector_db_router.register("documents", DocumentViewSet, basename="documents")

realtime_router = RealtimeRouter()
realtime_router.register(QuestionViewSet)
realtime_router.register(AnnouncementViewSet)
Expand All @@ -61,6 +69,16 @@
CourseStatisticView.as_view(),
name="course-statistic",
),
path(
"courses/<slug:course_pk>/vector_dbs/create",
DocumentCreateView.as_view(),
name="document-create",
),
path(
"courses/<slug:course_pk>/vector_dbs/search",
VectorSearchView.as_view(),
name="document-search",
)
]

urlpatterns = router.urls + course_router.urls + queue_router.urls + additional_urls
urlpatterns = router.urls + course_router.urls + queue_router.urls + vector_db_router.urls + additional_urls
Loading

0 comments on commit 32c3566

Please sign in to comment.