From cf565b331279104fb1018a84020ac4cf97c5d1b6 Mon Sep 17 00:00:00 2001 From: Christopher Kwong <77176674+cokwong@users.noreply.github.com> Date: Mon, 20 May 2024 11:44:48 -0700 Subject: [PATCH] =?UTF-8?q?feat:=20=E2=9C=A8=20add=20study=20room=20scrape?= =?UTF-8?q?r=20and=20availability=20endpoint=20(#143)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Sanskar Mishra --- apps/api/bronya.config.ts | 13 +- apps/api/src/global.d.ts | 5 + apps/api/src/lib/utils.ts | 15 +- apps/api/src/routes/v1/graphql/resolvers.ts | 2 + .../v1/graphql/schema/studyRooms.graphql | 46 +++++ .../routes/v1/rest/studyRooms/+endpoint.ts | 30 ++++ apps/api/src/routes/v1/rest/studyRooms/lib.ts | 57 ++++++ .../src/routes/v1/rest/studyRooms/schema.ts | 13 ++ .../routes/v1/rest/studyRooms/{id}/+config.ts | 11 ++ .../v1/rest/studyRooms/{id}/+endpoint.ts | 47 +++++ .../routes/v1/rest/studyRooms/{id}/schema.ts | 12 ++ .../rest-api/reference/study-rooms.md | 157 ++++++++++++++++ apps/docs/sidebars.js | 1 + libs/db/prisma/schema.prisma | 19 ++ libs/uc-irvine-lib/package.json | 6 + libs/uc-irvine-lib/src/spaces/index.ts | 35 ++++ packages/types/index.ts | 1 + packages/types/types/studyRoom.ts | 87 +++++++++ pnpm-lock.yaml | 40 +++++ tools/study-room-scraper/.gitignore | 0 tools/study-room-scraper/package.json | 19 ++ tools/study-room-scraper/src/index.ts | 31 ++++ .../src/study-room-scraper.ts | 170 ++++++++++++++++++ 23 files changed, 815 insertions(+), 2 deletions(-) create mode 100644 apps/api/src/routes/v1/graphql/schema/studyRooms.graphql create mode 100644 apps/api/src/routes/v1/rest/studyRooms/+endpoint.ts create mode 100644 apps/api/src/routes/v1/rest/studyRooms/lib.ts create mode 100644 apps/api/src/routes/v1/rest/studyRooms/schema.ts create mode 100644 apps/api/src/routes/v1/rest/studyRooms/{id}/+config.ts create mode 100644 apps/api/src/routes/v1/rest/studyRooms/{id}/+endpoint.ts create mode 100644 apps/api/src/routes/v1/rest/studyRooms/{id}/schema.ts create mode 100644 apps/docs/docs/developers-guide/rest-api/reference/study-rooms.md create mode 100644 libs/uc-irvine-lib/src/spaces/index.ts create mode 100644 packages/types/types/studyRoom.ts create mode 100644 tools/study-room-scraper/.gitignore create mode 100644 tools/study-room-scraper/package.json create mode 100644 tools/study-room-scraper/src/index.ts create mode 100644 tools/study-room-scraper/src/study-room-scraper.ts diff --git a/apps/api/bronya.config.ts b/apps/api/bronya.config.ts index 6059ef5c..860b780f 100644 --- a/apps/api/bronya.config.ts +++ b/apps/api/bronya.config.ts @@ -18,7 +18,7 @@ import { App, Stack, Duration } from "aws-cdk-lib/core"; import { config } from "dotenv"; import type { BuildOptions } from "esbuild"; -import { normalizeCourse } from "./src/lib/utils"; +import { normalizeCourse, normalizeStudyRoom } from "./src/lib/utils"; const prisma = new PrismaClient(); @@ -121,6 +121,10 @@ export const esbuildOptions: BuildOptions = { path: args.path, namespace, })); + build.onResolve({ filter: /virtual:studyRooms/ }, (args) => ({ + path: args.path, + namespace, + })); build.onLoad({ filter: /virtual:courses/, namespace }, async () => ({ contents: `export const courses = ${JSON.stringify( Object.fromEntries( @@ -133,6 +137,13 @@ export const esbuildOptions: BuildOptions = { Object.fromEntries((await prisma.instructor.findMany()).map((x) => [x.ucinetid, x])), )}`, })); + build.onLoad({ filter: /virtual:studyRooms/, namespace }, async () => ({ + contents: `export const studyRooms = ${JSON.stringify( + Object.fromEntries( + (await prisma.studyRoom.findMany()).map(normalizeStudyRoom).map((x) => [x.id, x]), + ), + )}`, + })); }, }, ], diff --git a/apps/api/src/global.d.ts b/apps/api/src/global.d.ts index ff85b418..462254b5 100644 --- a/apps/api/src/global.d.ts +++ b/apps/api/src/global.d.ts @@ -19,3 +19,8 @@ declare module "virtual:instructors" { // eslint-disable-next-line @typescript-eslint/consistent-type-imports declare const instructors: Record; } + +declare module "virtual:studyRooms" { + // eslint-disable-next-line @typescript-eslint/consistent-type-imports + declare const studyRooms: Record; +} diff --git a/apps/api/src/lib/utils.ts b/apps/api/src/lib/utils.ts index 4315f83e..ad99526c 100644 --- a/apps/api/src/lib/utils.ts +++ b/apps/api/src/lib/utils.ts @@ -1,4 +1,4 @@ -import type { Course as PrismaCourse } from "@libs/db"; +import type { Course as PrismaCourse, StudyRoom as PrismaStudyRoom } from "@libs/db"; import type { Course, CourseLevel, @@ -6,6 +6,7 @@ import type { GECategory, InstructorPreview, PrerequisiteTree, + StudyRoom, } from "@peterportal-api/types"; const days = ["Su", "M", "Tu", "W", "Th", "F", "Sa"]; @@ -82,3 +83,15 @@ export function normalizeCourse(course: PrismaCourse): Course { terms: course.terms, }; } + +export function normalizeStudyRoom(room: PrismaStudyRoom): StudyRoom { + return { + id: room.id, + name: room.name, + capacity: room.capacity, + location: room.location, + description: room.description, + directions: room.directions, + techEnhanced: room.techEnhanced, + }; +} diff --git a/apps/api/src/routes/v1/graphql/resolvers.ts b/apps/api/src/routes/v1/graphql/resolvers.ts index e67dcf8e..95744d33 100644 --- a/apps/api/src/routes/v1/graphql/resolvers.ts +++ b/apps/api/src/routes/v1/graphql/resolvers.ts @@ -31,6 +31,8 @@ export const resolvers: ApolloServerOptions["resolvers"] = { instructors: proxyRestApi("/v1/rest/instructors"), allInstructors: proxyRestApi("/v1/rest/instructors/all"), larc: proxyRestApi("/v1/rest/larc"), + studyRooms: proxyRestApi("/v1/rest/studyrooms"), + allStudyRooms: proxyRestApi("/v1/rest/studyrooms/all"), websoc: proxyRestApi("/v1/rest/websoc", { argsTransform: geTransform }), depts: proxyRestApi("/v1/rest/websoc/depts"), terms: proxyRestApi("/v1/rest/websoc/terms"), diff --git a/apps/api/src/routes/v1/graphql/schema/studyRooms.graphql b/apps/api/src/routes/v1/graphql/schema/studyRooms.graphql new file mode 100644 index 00000000..260a0a77 --- /dev/null +++ b/apps/api/src/routes/v1/graphql/schema/studyRooms.graphql @@ -0,0 +1,46 @@ +type TimeSlot { + "Date of the time slot (YYYY-MM-DD)." + date: String! + "Start time of the time slot (HH:MM)." + start: String! + "End time of the time slot (HH:MM)." + end: String! + "If the time slot is booked." + booked: Boolean! +} + +type StudyRoom { + "ID of study room used by spaces.lib." + id: ID! + "Name of the study room and its room number." + name: String! + "Number of chairs in the study room." + capacity: Int! + "Name of study location." + location: String! + "Description of the study room." + description: String + "Directions to the study room." + directions: String + "Time slots for the study room." + timeSlots: [TimeSlot]! + "If the study room has TV or other tech enhancements." + techEnhanced: Boolean +} + +type StudyLocation { + "ID of the study location using shortened name of the location." + id: ID! + "Location ID of the study location used by space.lib." + lid: String! + "Name of the study location." + name: String! + "Rooms in the study location." + rooms: [StudyRoom!]! +} + +extend type Query { + "Fetch all study rooms." + allStudyRooms(start: String!, end: String!): [StudyLocation!]! + studyRooms(location: String!, start: String!, end: String!): StudyLocation! +} diff --git a/apps/api/src/routes/v1/rest/studyRooms/+endpoint.ts b/apps/api/src/routes/v1/rest/studyRooms/+endpoint.ts new file mode 100644 index 00000000..745c7b90 --- /dev/null +++ b/apps/api/src/routes/v1/rest/studyRooms/+endpoint.ts @@ -0,0 +1,30 @@ +import { createHandler } from "@libs/lambda"; +import { studyLocations } from "libs/uc-irvine-lib/src/spaces"; +import { ZodError } from "zod"; + +import { aggregateStudyRooms } from "./lib"; +import { QuerySchema } from "./schema"; + +export const GET = createHandler(async (event, context, res) => { + const headers = event.headers; + const query = event.queryStringParameters; + const requestId = context.awsRequestId; + try { + const parsedQuery = QuerySchema.parse(query); + if (!studyLocations[parsedQuery.location]) { + return res.createErrorResult(404, `Location ${parsedQuery.location} not found`, requestId); + } + const studyRooms = await aggregateStudyRooms( + parsedQuery.location, + parsedQuery.start, + parsedQuery.end, + ); + return res.createOKResult(studyRooms, headers, requestId); + } catch (e) { + if (e instanceof ZodError) { + const messages = e.issues.map((issue) => issue.message); + return res.createErrorResult(400, messages.join("; "), requestId); + } + return res.createErrorResult(400, e, requestId); + } +}); diff --git a/apps/api/src/routes/v1/rest/studyRooms/lib.ts b/apps/api/src/routes/v1/rest/studyRooms/lib.ts new file mode 100644 index 00000000..83c4807e --- /dev/null +++ b/apps/api/src/routes/v1/rest/studyRooms/lib.ts @@ -0,0 +1,57 @@ +import { TimeSlot, StudyLocation } from "@peterportal-api/types"; +import { studyLocations } from "libs/uc-irvine-lib/src/spaces"; +import { getStudySpaces } from "libs/uc-irvine-lib/src/spaces"; +import { studyRooms } from "virtual:studyRooms"; + +/** + * Data structure of time slots returned by libs.spaces. + */ +type Slot = { + start: string; + end: string; + itemId: number; + checkSum: string; + className: string; +}; + +/** + * Map time slots to a more readable format. + */ +export function parseTimeSlots(slots: Slot[]): { [id: string]: TimeSlot[] } { + const timeSlots: { [id: string]: TimeSlot[] } = {}; + slots.forEach((slot) => { + const roomId = slot.itemId.toString(); + const [date, start] = slot.start.split(" "); + const [_, end] = slot.end.split(" "); + const timeSlot: TimeSlot = { + date, + start, + end, + booked: !!slot.className && slot.className === "s-lc-eq-checkout", + }; + timeSlots[roomId] ??= [] + timeSlots[roomId].push(timeSlot) + }); + return timeSlots; +} + +/** + * Aggregate study rooms and their time slots into a StudyLocation object. + */ +export async function aggregateStudyRooms( + locationId: string, + start: string, + end: string, +): Promise { + const spaces = await getStudySpaces(studyLocations[locationId].lid, start, end); + const timeSlotsMap = parseTimeSlots(spaces.slots); + return { + id: locationId, + ...studyLocations[locationId], + rooms: Object.entries(timeSlotsMap) + .filter(([id, _]) => studyRooms[id] != null) + .map(([id, timeSlots]) => { + return { ...studyRooms[id], timeSlots }; + }), + }; +} diff --git a/apps/api/src/routes/v1/rest/studyRooms/schema.ts b/apps/api/src/routes/v1/rest/studyRooms/schema.ts new file mode 100644 index 00000000..a17a58a8 --- /dev/null +++ b/apps/api/src/routes/v1/rest/studyRooms/schema.ts @@ -0,0 +1,13 @@ +import { z } from "zod"; + +export const QuerySchema = z.object({ + location: z.string({ required_error: 'Parameter "location" not provided' }), + start: z + .string({ required_error: 'Parameter "start" not provided' }) + .regex(/^\d{4}-\d{2}-\d{2}$/, { message: "Start date must be in YYYY-MM-DD format" }), + end: z + .string({ required_error: 'Parameter "end" not provided' }) + .regex(/^\d{4}-\d{2}-\d{2}$/, { message: "End date must be in YYYY-MM-DD format" }), +}); + +export type Query = z.infer; diff --git a/apps/api/src/routes/v1/rest/studyRooms/{id}/+config.ts b/apps/api/src/routes/v1/rest/studyRooms/{id}/+config.ts new file mode 100644 index 00000000..68864478 --- /dev/null +++ b/apps/api/src/routes/v1/rest/studyRooms/{id}/+config.ts @@ -0,0 +1,11 @@ +import type { ApiPropsOverride } from "@bronya.js/api-construct"; + +import { esbuildOptions, constructs } from "../../../../../../bronya.config"; + +export const overrides: ApiPropsOverride = { + esbuild: esbuildOptions, + constructs: { + functionPlugin: constructs.functionPlugin, + restApiProps: constructs.restApiProps, + }, +}; diff --git a/apps/api/src/routes/v1/rest/studyRooms/{id}/+endpoint.ts b/apps/api/src/routes/v1/rest/studyRooms/{id}/+endpoint.ts new file mode 100644 index 00000000..07a476e2 --- /dev/null +++ b/apps/api/src/routes/v1/rest/studyRooms/{id}/+endpoint.ts @@ -0,0 +1,47 @@ +import { createHandler } from "@libs/lambda"; +import { studyLocations } from "libs/uc-irvine-lib/src/spaces"; +import { ZodError } from "zod"; + +import { aggregateStudyRooms } from "../lib"; + +import { QuerySchema } from "./schema"; + +export const GET = createHandler(async (event, context, res) => { + const headers = event.headers; + const query = event.queryStringParameters; + const requestId = context.awsRequestId; + const { id } = event.pathParameters ?? {}; + try { + switch (id) { + case null: + case undefined: + return res.createErrorResult(400, "Location not provided", requestId); + case "all": { + const parsedQuery = QuerySchema.parse(query); + return res.createOKResult( + await Promise.all( + Object.keys(studyLocations).map(async (locationId) => { + return aggregateStudyRooms(locationId, parsedQuery.start, parsedQuery.end); + }), + ), + headers, + requestId, + ); + } + default: { + if (studyLocations[id]) { + const parsedQuery = QuerySchema.parse(query); + const studyRooms = await aggregateStudyRooms(id, parsedQuery.start, parsedQuery.end); + return res.createOKResult(studyRooms, headers, requestId); + } + return res.createErrorResult(400, `Location ${id} not found`, requestId); + } + } + } catch (e) { + if (e instanceof ZodError) { + const messages = e.issues.map((issue) => issue.message); + return res.createErrorResult(400, messages.join("; "), requestId); + } + return res.createErrorResult(400, e, requestId); + } +}); diff --git a/apps/api/src/routes/v1/rest/studyRooms/{id}/schema.ts b/apps/api/src/routes/v1/rest/studyRooms/{id}/schema.ts new file mode 100644 index 00000000..edc6a372 --- /dev/null +++ b/apps/api/src/routes/v1/rest/studyRooms/{id}/schema.ts @@ -0,0 +1,12 @@ +import { z } from "zod"; + +export const QuerySchema = z.object({ + start: z + .string({ required_error: 'Parameter "start" not provided' }) + .regex(/^\d{4}-\d{2}-\d{2}$/, { message: "Start date must be in YYYY-MM-DD format" }), + end: z + .string({ required_error: 'Parameter "end" not provided' }) + .regex(/^\d{4}-\d{2}-\d{2}$/, { message: "End date must be in YYYY-MM-DD format" }), +}); + +export type Query = z.infer; diff --git a/apps/docs/docs/developers-guide/rest-api/reference/study-rooms.md b/apps/docs/docs/developers-guide/rest-api/reference/study-rooms.md new file mode 100644 index 00000000..317952d1 --- /dev/null +++ b/apps/docs/docs/developers-guide/rest-api/reference/study-rooms.md @@ -0,0 +1,157 @@ +--- +pagination_prev: null +pagination_next: null +--- + +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; + +# Study Rooms + +The study rooms endpoint allows users to get information and availability of study rooms that can be reserved at UCI libraries. + +## Query parameters + +#### `location` string + +The location of the study rooms to query. Five locations are available to query: + +| location | name | +| -------- | -------------------------- | +| Langson | Langson Library | +| Gateway | Gateway Study Center | +| Science | Science Library | +| MRC | Multimedia Resource Center | +| GML | Grunigen Medical Library | + +#### `start` string Required + +The start date of time slots to query. YYYY-MM-DD format. + +#### `end` string Required + +The end date of time slots to query. YYYY-MM-DD format. + +### Code sample + + + + +```bash +curl "https://api-next.peterportal.org/v1/rest/studyRooms/Science?start=2024-04-26&end=2024-04-30" +``` + +```bash +curl "https://api-next.peterportal.org/v1/rest/studyRooms?location=Science&start=2024-04-26&end=2024-04-30" +``` + + + + +### Response + + + + +```json +{ + "id": "Science", + "name": "Science Library", + "lid": "6580", + "rooms": [ + { + "id": "44667", + "name": "Science 371", + "capacity": 8, + "location": "Science Library", + "description": "This Collaborative Technology Work Space is located on the upper level of the 2nd Floor Grand Reading Room. Access via the stairway halfway through the Grand Reading Room. Digital display available. Bring your own laptop.", + "directions": "Access via the elevators or stairway, on the upper level of the Grand Reading Room.", + "techEnhanced": true, + "timeSlots": [ + { + "date": "2024-04-27", + "start": "13:00:00", + "end": "13:30:00", + "booked": false + } + "..." + ] + } + "..." + ] +} +``` + + + + +```typescript +// https://github.com/icssc/peterportal-api-next/blob/main/packages/types/types/studyRoom +type StudyLocation = { + id: string; + lid: string; + name: string; + rooms: { + id: string; + name: string; + capacity: number; + location: string; + description?: string; + directions?: string; + timeSlots?: { + date: string; + start: string; + end: string; + booked: boolean; + }[]; + techEnhanced?: boolean; + }[]; +}; +``` + + + + +## Get all study rooms + +### Code sample + + + + +```bash +curl "https://api-next.peterportal.org/v1/rest/studyRooms/all?start=2024-04-26&end=2024-04-30" +``` + + + + +### Response + + + + +```json +[ + { + "id": "Langson", + "...": "..." + }, + { + "id": "Gateway", + "...": "..." + }, + "..." +] +``` + + + + +```typescript +// https://github.com/icssc/peterportal-api-next/blob/main/packages/types/types/studyRoom +type StudyLocations = StudyLocation[]; +``` + + + diff --git a/apps/docs/sidebars.js b/apps/docs/sidebars.js index c8f126a6..56f6b20d 100644 --- a/apps/docs/sidebars.js +++ b/apps/docs/sidebars.js @@ -26,6 +26,7 @@ const sidebars = { "developers-guide/rest-api/reference/grades", "developers-guide/rest-api/reference/instructors", "developers-guide/rest-api/reference/larc", + "developers-guide/rest-api/reference/study-rooms", "developers-guide/rest-api/reference/websoc", "developers-guide/rest-api/reference/week", ], diff --git a/libs/db/prisma/schema.prisma b/libs/db/prisma/schema.prisma index 5b7de42b..a7ee3295 100644 --- a/libs/db/prisma/schema.prisma +++ b/libs/db/prisma/schema.prisma @@ -305,3 +305,22 @@ model WebsocSection { @@id([year, quarter, sectionCode, timestamp]) @@unique([year, quarter, sectionCode, timestamp], name: "idx") } + +model StudyRoom { + id String @id + name String + capacity Int + location String @default("") + description String @default("") + directions String @default("") + techEnhanced Boolean @default(false) + studyLocationId String + studyLocation StudyLocation @relation(fields: [studyLocationId], references: [id]) +} + +model StudyLocation { + id String @id + lid String + name String + rooms StudyRoom[] +} diff --git a/libs/uc-irvine-lib/package.json b/libs/uc-irvine-lib/package.json index e527a074..613f1e7b 100644 --- a/libs/uc-irvine-lib/package.json +++ b/libs/uc-irvine-lib/package.json @@ -22,6 +22,12 @@ "require": "./src/registrar/index.ts", "default": "./src/registrar/index.ts" }, + "./spaces": { + "types": "./src/spaces/index.ts", + "import": "./src/spaces/index.ts", + "require": "./src/spaces/index.ts", + "default": "./src/spaces/index.ts" + }, "./websoc": { "types": "./src/websoc/index.ts", "import": "./src/websoc/index.ts", diff --git a/libs/uc-irvine-lib/src/spaces/index.ts b/libs/uc-irvine-lib/src/spaces/index.ts new file mode 100644 index 00000000..dc104057 --- /dev/null +++ b/libs/uc-irvine-lib/src/spaces/index.ts @@ -0,0 +1,35 @@ +import fetch from "cross-fetch"; + +const LIB_SPACE_URL = "https://spaces.lib.uci.edu/spaces"; +const LIB_SPACE_AVAILABILITY_URL = "https://spaces.lib.uci.edu/spaces/availability/grid"; + +/** + * Shortened libary names mapped to their IDs used by spaces.lib.uci.edu + * See https://www.lib.uci.edu/ for shortened names + **/ +export const studyLocations: { [id: string]: { name: string; lid: string } } = { + Langson: { name: "Langson Library", lid: "6539" }, + Gateway: { name: "Gateway Study Center", lid: "6579" }, + Science: { name: "Science Library", lid: "6580" }, + MRC: { name: "Multimedia Resources Center", lid: "6581" }, + GML: { name: "Grunigen Medical Library", lid: "12189" }, +}; + +/** + * Make post request used by "https://spaces.lib.uci.edu/spaces" to retrieve room availability. + * + * @param lid - Library ID + * @param start - Date format YYYY-MM-DD + * @param end - Date format YYYY-MM-DD + * @returns {object} JSON response returned by request + */ +export async function getStudySpaces(lid: string, start: string, end: string) { + const headers = { + Referer: `${LIB_SPACE_URL}?lid=${lid}`, + }; + return await fetch(LIB_SPACE_AVAILABILITY_URL, { + method: "POST", + headers: headers, + body: new URLSearchParams({ lid, gid: "0", start, end, pageSize: "18" }), + }).then((res) => res.json()); +} diff --git a/packages/types/index.ts b/packages/types/index.ts index deeba13c..3381aa88 100644 --- a/packages/types/index.ts +++ b/packages/types/index.ts @@ -6,5 +6,6 @@ export * from "./types/grades"; export * from "./types/instructor"; export * from "./types/larc"; export * from "./types/response"; +export * from "./types/studyRoom"; export * from "./types/websoc"; export * from "./types/week"; diff --git a/packages/types/types/studyRoom.ts b/packages/types/types/studyRoom.ts new file mode 100644 index 00000000..6ca2181e --- /dev/null +++ b/packages/types/types/studyRoom.ts @@ -0,0 +1,87 @@ +/** + * An object representing a Study Room. + */ +export type StudyRoom = { + /** + * ID of study room used by spaces.lib. + */ + id: string; + /** + * Name of the study room and its room number. + */ + name: string; + /** + * Number of chairs in the study room. + */ + capacity: number; + /** + * Name of study location. + */ + location: string; + /** + * Description of the study room. + */ + description?: string; + /** + * Directions to the study room. + */ + directions?: string; + /** + * Time slots for the study room. + */ + timeSlots?: TimeSlot[]; + /** + * If the study room has TV or other tech enhancements. + */ + techEnhanced?: boolean; +}; + +/** + * An object representing a time slot and avaliability for a study room. + */ +export type TimeSlot = { + /** + * Date of the time slot (YYYY-MM-DD). + */ + date: string; + /** + * Start time of the time slot. + */ + start: string; + /** + * End time of the time slot. + */ + end: string; + /** + * If the time slot is booked. + */ + booked: boolean; +}; + +/** + * An object representing a study location. + */ +export type StudyLocation = { + /** + * ID of the study location using shortened name of the location. + */ + id: string; + /** + * Location ID of the study location used by space.lib. + */ + lid: string; + /** + * Name of the study location. + */ + name: string; + /** + * Rooms in the study location. + */ + rooms: StudyRoom[]; +}; + +/** + * The type of the payload returned on a successful response from querying + * ``/v1/rest/studyRooms/all``. + */ +export type StudyLocations = StudyLocation[]; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a4363a11..b4ddf087 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -496,6 +496,28 @@ importers: specifier: 0.20.1 version: 0.20.1 + tools/study-room-scraper: + dependencies: + '@libs/db': + specifier: workspace:^ + version: link:../../libs/db + cheerio: + specifier: ^1.0.0-rc.12 + version: 1.0.0-rc.12 + cross-fetch: + specifier: 4.0.0 + version: 4.0.0 + winston: + specifier: ^3.11.0 + version: 3.11.0 + devDependencies: + '@peterportal-api/types': + specifier: workspace:^ + version: link:../../packages/types + esbuild: + specifier: 0.20.1 + version: 0.20.1 + packages: /@aashutoshrathi/word-wrap@1.2.6: @@ -2952,6 +2974,7 @@ packages: /@commitlint/load@19.0.3(@types/node@20.11.24)(typescript@5.3.3): resolution: {integrity: sha512-18Tk/ZcDFRKIoKfEcl7kC+bYkEQ055iyKmGsYDoYWpKf6FUvBrP9bIWapuy/MB+kYiltmP9ITiUx6UXtqC9IRw==} engines: {node: '>=v18'} + requiresBuild: true dependencies: '@commitlint/config-validator': 19.0.3 '@commitlint/execute-rule': 19.0.0 @@ -13688,6 +13711,23 @@ packages: triple-beam: 1.3.0 dev: false + /winston@3.11.0: + resolution: {integrity: sha512-L3yR6/MzZAOl0DsysUXHVjOwv8mKZ71TrA/41EIduGpOOV5LQVodqN+QdQ6BS6PJ/RdIshZhq84P/fStEZkk7g==} + engines: {node: '>= 12.0.0'} + dependencies: + '@colors/colors': 1.6.0 + '@dabh/diagnostics': 2.0.3 + async: 3.2.4 + is-stream: 2.0.1 + logform: 2.5.1 + one-time: 1.0.0 + readable-stream: 3.6.2 + safe-stable-stringify: 2.4.3 + stack-trace: 0.0.10 + triple-beam: 1.3.0 + winston-transport: 4.7.0 + dev: false + /winston@3.12.0: resolution: {integrity: sha512-OwbxKaOlESDi01mC9rkM0dQqQt2I8DAUMRLZ/HpbwvDXm85IryEHgoogy5fziQy38PntgZsLlhAYHz//UPHZ5w==} engines: {node: '>= 12.0.0'} diff --git a/tools/study-room-scraper/.gitignore b/tools/study-room-scraper/.gitignore new file mode 100644 index 00000000..e69de29b diff --git a/tools/study-room-scraper/package.json b/tools/study-room-scraper/package.json new file mode 100644 index 00000000..200111b9 --- /dev/null +++ b/tools/study-room-scraper/package.json @@ -0,0 +1,19 @@ +{ + "name": "@tools/study-room-scraper", + "version": "0.0.0", + "private": true, + "description": "", + "license": "MIT", + "type": "module", + "main": "index.ts", + "dependencies": { + "@libs/db": "workspace:^", + "cheerio": "^1.0.0-rc.12", + "cross-fetch": "4.0.0", + "winston": "^3.11.0" + }, + "devDependencies": { + "@peterportal-api/types": "workspace:^", + "esbuild": "0.20.1" + } +} diff --git a/tools/study-room-scraper/src/index.ts b/tools/study-room-scraper/src/index.ts new file mode 100644 index 00000000..9c01f1eb --- /dev/null +++ b/tools/study-room-scraper/src/index.ts @@ -0,0 +1,31 @@ +import { PrismaClient } from "@libs/db"; +import type { StudyRoom } from "@peterportal-api/types"; + +import { scrapeStudyLocations } from "./study-room-scraper"; + +const prisma = new PrismaClient(); + +async function main() { + const studyLocations = await scrapeStudyLocations(); + const studyLocationInfo = Object.values(studyLocations).map((location) => { + return prisma.studyLocation.create({ + data: { + id: location.id, + lid: location.lid, + name: location.name, + rooms: { + create: location.rooms.map((room: StudyRoom) => ({ + ...room, + })), + }, + }, + }); + }); + await prisma.$transaction([ + prisma.studyRoom.deleteMany({}), + prisma.studyLocation.deleteMany({}), + ...studyLocationInfo, + ]); +} + +main().then(); diff --git a/tools/study-room-scraper/src/study-room-scraper.ts b/tools/study-room-scraper/src/study-room-scraper.ts new file mode 100644 index 00000000..3794d243 --- /dev/null +++ b/tools/study-room-scraper/src/study-room-scraper.ts @@ -0,0 +1,170 @@ +import type { StudyRoom, StudyLocation } from "@peterportal-api/types"; +import { load, Cheerio, Element, CheerioAPI } from "cheerio"; +import fetch from "cross-fetch"; +import { studyLocations, getStudySpaces } from "libs/uc-irvine-lib/src/spaces"; +import * as winston from "winston"; + +const ROOM_SPACE_URL = "https://spaces.lib.uci.edu/space"; + +type StudyLocations = Record; + +const logger = winston.createLogger({ + level: "info", + format: winston.format.combine( + winston.format.timestamp(), + winston.format.json(), + winston.format.prettyPrint(), + ), + transports: [new winston.transports.Console()], +}); + +function processGML(descriptionHeader: Cheerio, $: CheerioAPI): string { + let descriptionText = ""; + descriptionHeader.find("p").each(function () { + let paraText = $(this).text().trim(); + if (paraText.includes("\n")) { + paraText = paraText.replaceAll("\n", ", "); + if (!paraText.endsWith(":")) { + paraText += ". "; + } + } + descriptionText += paraText + " "; + }); + descriptionText = descriptionText.replace(/\s{2,}/g, " ").trim(); + descriptionText = descriptionText.replace(/\s+,/g, ","); + descriptionText = descriptionText.replace(/\.\s*\./g, "."); + descriptionText = descriptionText.replace(".,", "."); + return descriptionText; +} + +function processDescription( + descriptionHeader: Cheerio, + location: string, + $: CheerioAPI, +): string { + let descriptionText = ""; + if (location === "Grunigen Medical Library") { + descriptionText = processGML(descriptionHeader, $); + } else { + const descriptionParts: string[] = []; + descriptionHeader.contents().each((_, content) => { + if (content.nodeType === 3) { + const textContent = $(content).text().trim(); + if (textContent) { + descriptionParts.push(textContent); + } + } else if (content.nodeType === 1) { + const child = $(content); + if (child.is("p, ul, li, strong, em, span, br")) { + if (child.is("ul")) { + child.find("li").each((_, li) => { + descriptionParts.push("- " + $(li).text().trim()); + }); + } else if (child.is("br")) { + descriptionParts.push("\n"); + } else { + descriptionParts.push(child.text().trim()); + } + } + } + }); + + let combinedDescription = descriptionParts.join(" ").replace(/\n+/g, ", "); + combinedDescription = combinedDescription + .replace(/\s*,\s*/g, ", ") + .replace(/\s*\.\s*/g, ". ") + .replace(/\s{2,}/g, " ") + .replace(/\.,/g, ".") + .replace(/\.\s*\./g, "."); + + combinedDescription = combinedDescription.replace(/\.\s*$/, "."); + descriptionText = combinedDescription.trim(); + } + + if (descriptionText && !descriptionText.endsWith(".")) { + descriptionText += "."; + } + + return descriptionText; +} + +async function getRoomInfo(RoomId: string): Promise { + const url = `${ROOM_SPACE_URL}/${RoomId}`; + const room: StudyRoom = { + id: `${RoomId}`, + name: "", + capacity: 0, + location: "", + }; + try { + const res = await fetch(url); + const text = await res.text(); + const $ = load(text); + + const roomHeader = $("#s-lc-public-header-title"); + const roomHeaderText = roomHeader.text().trim(); + const headerMatch = roomHeaderText.match( + /^(.*?)\s*(\(Tech Enhanced\))?\s*\n*\s*\((.*?)\)\s*\n*\s*Capacity:\s(\d+)/, + ); + if (headerMatch) { + room.name = headerMatch[1].trim(); + if (headerMatch[2]) { + room.techEnhanced = true; + } + room.location = headerMatch[3].trim(); + room.capacity = parseInt(headerMatch[4], 10); + } + + const directionsHeader = $(".s-lc-section-directions"); + const directionsText = directionsHeader.find("p").text().trim(); + if (directionsText) { + room.directions = directionsText.trim(); + if (!room.directions.endsWith(".")) { + room.directions += "."; + } + } + + const descriptionHeader = $(".s-lc-section-description"); + room.description = processDescription(descriptionHeader, room.location, $); + + logger.info(`Scraped Room ${RoomId}`, { room }); + return room; + } catch (error) { + logger.error(`Error fetching room information for room ${RoomId}`, { error }); + return room; + } +} + +export async function scrapeStudyLocations(): Promise { + const date = new Date(); + const start = date.toLocaleDateString("en-CA", { + year: "numeric", + month: "2-digit", + day: "2-digit", + }); + date.setDate(date.getDate() + 3); + const end = date.toLocaleDateString("en-CA", { + year: "numeric", + month: "2-digit", + day: "2-digit", + }); + const studyLocationsMap: StudyLocations = {}; + const rids = new Set(); + for (const lib in studyLocations) { + const studyLocation: StudyLocation = { + id: lib, + lid: studyLocations[lib].lid, + name: lib, + rooms: [], + }; + const spaces = await getStudySpaces(studyLocation.lid, start, end); + for (const room of spaces.slots) { + if (!rids.has(room.itemId)) { + studyLocation.rooms.push(await getRoomInfo(room.itemId)); + rids.add(room.itemId); + } + } + studyLocationsMap[studyLocation.id] = studyLocation; + } + return studyLocationsMap; +}