Skip to content

Commit

Permalink
feat: implemented git suggestions for study room scraper issue
Browse files Browse the repository at this point in the history
  • Loading branch information
Sanskar Mishra committed May 20, 2024
1 parent c05c145 commit d721107
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 100 deletions.
14 changes: 5 additions & 9 deletions apps/api/src/routes/v1/rest/studyRooms/+endpoint.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,20 @@ import { createHandler } from "@libs/lambda";
import { studyLocations } from "libs/uc-irvine-lib/src/spaces";
import { ZodError } from "zod";

import { aggreagteStudyRooms } from "./lib";
import { aggregateStudyRooms } from "./lib";
import { Query, QuerySchema } from "./schema";

export const GET = createHandler(async (event, context, res) => {
const headers = event.headers;
const query = event.queryStringParameters;
const requestId = context.awsRequestId;
let parsedQuery: Query;
try {
parsedQuery = QuerySchema.parse(query);
const parsedQuery = QuerySchema.parse(query);
if (!studyLocations[parsedQuery.location]) {
return res.createErrorResult(404, `Location ${parsedQuery.location} not found`, requestId);
}
return res.createOKResult(
await aggreagteStudyRooms(parsedQuery.location, parsedQuery.start, parsedQuery.end),
headers,
requestId,
);
}
const studyRooms = await aggregateStudyRooms(parsedQuery.location, parsedQuery.start, parsedQuery.end)
return res.createOKResult(studyRooms, headers, requestId);
} catch (e) {
if (e instanceof ZodError) {
const messages = e.issues.map((issue) => issue.message);
Expand Down
11 changes: 4 additions & 7 deletions apps/api/src/routes/v1/rest/studyRooms/lib.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,16 @@ export function parseTimeSlots(slots: Slot[]): { [id: string]: TimeSlot[] } {
end,
booked: !!slot.className && slot.className === "s-lc-eq-checkout",
};
if (!timeSlots[roomId]) {
timeSlots[roomId] = [timeSlot];
} else {
timeSlots[roomId].push(timeSlot);
}
timeSlots[roomId] ??= []
timeSlots[roomId].push(timeSlot)
});
return timeSlots;
}

/**
* Aggregate study rooms and their time slots into a StudyLocation object.
*/
export async function aggreagteStudyRooms(
export async function aggregateStudyRooms(
locationId: string,
start: string,
end: string,
Expand All @@ -52,7 +49,7 @@ export async function aggreagteStudyRooms(
id: locationId,
...studyLocations[locationId],
rooms: Object.entries(timeSlotsMap)
.filter(([id, _]) => studyRooms[id])
.filter(([id, _]) => studyRooms[id] != null)
.map(([id, timeSlots]) => {
return { ...studyRooms[id], timeSlots };
}),
Expand Down
11 changes: 4 additions & 7 deletions apps/api/src/routes/v1/rest/studyRooms/{id}/+endpoint.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { createHandler } from "@libs/lambda";
import { studyLocations } from "libs/uc-irvine-lib/src/spaces";
import { ZodError } from "zod";

import { aggreagteStudyRooms } from "../lib";
import { aggregateStudyRooms } from "../lib";

import { Query, QuerySchema } from "./schema";

Expand All @@ -11,18 +11,15 @@ export const GET = createHandler(async (event, context, res) => {
const query = event.queryStringParameters;
const requestId = context.awsRequestId;
const { id } = event.pathParameters ?? {};
let parsedQuery: Query;
if (id == null) return res.createErrorResult(400, "Location not provided", requestId);
try {
switch (id) {
case null:
case undefined:
return res.createErrorResult(400, "Location not provided", requestId);
case "all":
parsedQuery = QuerySchema.parse(query);
const parsedQuery = QuerySchema.parse(query);
return res.createOKResult(
await Promise.all(
Object.keys(studyLocations).map(async (locationId) => {
return aggreagteStudyRooms(locationId, parsedQuery.start, parsedQuery.end);
return aggregateStudyRooms(locationId, parsedQuery.start, parsedQuery.end);
}),
),
headers,
Expand Down
155 changes: 78 additions & 77 deletions tools/study-room-scraper/src/study-room-scraper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,70 @@ const logger = winston.createLogger({
transports: [new winston.transports.Console()],
});

function processDescription(
descriptionHeader: cheerio.Cheerio,

Check failure on line 24 in tools/study-room-scraper/src/study-room-scraper.ts

View workflow job for this annotation

GitHub Actions / Check for TypeScript errors

Cannot find namespace 'cheerio'.
location: string,
$: cheerio.Root,

Check failure on line 26 in tools/study-room-scraper/src/study-room-scraper.ts

View workflow job for this annotation

GitHub Actions / Check for TypeScript errors

Cannot find namespace 'cheerio'.
): string {
let descriptionText = "";
if (location === "Grunigen Medical Library") {
descriptionHeader.find("p").each(function () {
let paraText = $(this).text().trim();

Check failure on line 31 in tools/study-room-scraper/src/study-room-scraper.ts

View workflow job for this annotation

GitHub Actions / Check for TypeScript errors

'this' implicitly has type 'any' because it does not have a type annotation.
if (paraText.includes("\n")) {
paraText = paraText.replaceAll("\n", ", ");
if (!paraText.endsWith(":")) {
paraText += ". ";
}
}
descriptionText += paraText + " ";
});
descriptionText = descriptionText.replace(/\s{2,}/g, " ").trim();
descriptionText = descriptionText.replace(/\s+,/g, ",");
descriptionText = descriptionText.replace(/\.\s*\./g, ".");
descriptionText = descriptionText.replace(".,", ".");
} else {
const descriptionParts: string[] = [];
descriptionHeader.contents().each((_, content) => {

Check failure on line 46 in tools/study-room-scraper/src/study-room-scraper.ts

View workflow job for this annotation

GitHub Actions / Check for TypeScript errors

Parameter '_' implicitly has an 'any' type.

Check failure on line 46 in tools/study-room-scraper/src/study-room-scraper.ts

View workflow job for this annotation

GitHub Actions / Check for TypeScript errors

Parameter 'content' implicitly has an 'any' type.
if (content.nodeType === 3) {
const textContent = $(content).text().trim();
if (textContent) {
descriptionParts.push(textContent);
}
} else if (content.nodeType === 1) {
const child = $(content);
if (child.is("p, ul, li, strong, em, span, br")) {
if (child.is("ul")) {
child.find("li").each((_, li) => {

Check failure on line 56 in tools/study-room-scraper/src/study-room-scraper.ts

View workflow job for this annotation

GitHub Actions / Check for TypeScript errors

Parameter '_' implicitly has an 'any' type.

Check failure on line 56 in tools/study-room-scraper/src/study-room-scraper.ts

View workflow job for this annotation

GitHub Actions / Check for TypeScript errors

Parameter 'li' implicitly has an 'any' type.
descriptionParts.push("- " + $(li).text().trim());
});
} else if (child.is("br")) {
descriptionParts.push("\n");
} else {
descriptionParts.push(child.text().trim());
}
}
}
});

let combinedDescription = descriptionParts.join(" ").replace(/\n+/g, ", ");
combinedDescription = combinedDescription
.replace(/\s*,\s*/g, ", ")
.replace(/\s*\.\s*/g, ". ")
.replace(/\s{2,}/g, " ")
.replace(/\.,/g, ".")
.replace(/\.\s*\./g, ".");

combinedDescription = combinedDescription.replace(/\.\s*$/, ".");
descriptionText = combinedDescription.trim();
}

if (descriptionText && !descriptionText.endsWith(".")) {
descriptionText += ".";
}

return descriptionText;
}

async function getRoomInfo(RoomId: string): Promise<StudyRoom> {
const url = `${ROOM_SPACE_URL}/${RoomId}`;
const room: StudyRoom = {
Expand All @@ -33,7 +97,6 @@ async function getRoomInfo(RoomId: string): Promise<StudyRoom> {
const text = await res.text();
const $ = load(text);

//Room Header
const roomHeader = $("#s-lc-public-header-title");
const roomHeaderText = roomHeader.text().trim();
const headerMatch = roomHeaderText.match(
Expand All @@ -47,7 +110,7 @@ async function getRoomInfo(RoomId: string): Promise<StudyRoom> {
room.location = headerMatch[3].trim();
room.capacity = parseInt(headerMatch[4], 10);
}
//Room Directions

const directionsHeader = $(".s-lc-section-directions");
const directionsText = directionsHeader.find("p").text().trim();
if (directionsText) {
Expand All @@ -58,73 +121,7 @@ async function getRoomInfo(RoomId: string): Promise<StudyRoom> {
}

const descriptionHeader = $(".s-lc-section-description");
let descriptionText = "";
if (room.location === "Grunigen Medical Library") {
// Specific processing for the Grunigen Library case
descriptionHeader.find("p").each(function () {
let paraText = $(this).text().trim();
if (paraText.includes("\n")) {
paraText = paraText.replaceAll("\n", ", ");
if (!paraText.endsWith(":")) {
paraText += ". ";
}
}
descriptionText += paraText + " ";
});
descriptionText = descriptionText.replace(/\s{2,}/g, " ").trim(); // Remove extra spaces
descriptionText = descriptionText.replace(/\s+,/g, ","); // Remove spaces before commas
descriptionText = descriptionText.replace(/\.\s*\./g, "."); // Remove extra periods
descriptionText = descriptionText.replace(".,", "."); // Remove commas after periods
} else {
// General processing for other rooms
const descriptionParts: string[] = [];
let combinedDescription = "";

descriptionHeader.contents().each((_, content) => {
if (content.nodeType === 3) {
const textContent = $(content).text().trim();
if (textContent) {
descriptionParts.push(textContent);
}
} else if (content.nodeType === 1) {
const child = $(content);
if (child.is("p, ul, li, strong, em, span, br")) {
if (child.is("ul")) {
child.find("li").each((_, li) => {
descriptionParts.push("- " + $(li).text().trim());
});
} else if (child.is("br")) {
descriptionParts.push("\n");
} else {
descriptionParts.push(child.text().trim());
}
}
}
});

// join parts and replace newline placeholders with commas
combinedDescription = descriptionParts.join(" ").replace(/\n+/g, ", ");

// clean up
combinedDescription = combinedDescription
.replace(/\s*,\s*/g, ", ")
.replace(/\s*\.\s*/g, ". ")
.replace(/\s{2,}/g, " ")
.replace(/\.,/g, ".")
.replace(/\.\s*\./g, ".");

// description ends with a single period
combinedDescription = combinedDescription.replace(/\.\s*$/, ".");

descriptionText = combinedDescription.trim();
}

if (descriptionText) {
room.description = descriptionText;
if (!room.description.endsWith(".")) {
room.description += ".";
}
}
room.description = processDescription(descriptionHeader, room.location, $);

logger.info(`Scraped Room ${RoomId}`, { room });
return room;
Expand All @@ -148,23 +145,27 @@ export async function scrapeStudyLocations(): Promise<StudyLocations> {
day: "2-digit",
});
const studyLocationsMap: StudyLocations = {};
const rids: Set<string> = new Set();
const rids = new Set<string>();
for (const lib in studyLocations) {
const studyLocation: StudyLocation = {
id: lib,
lid: studyLocations[lib].lid,
name: lib,
rooms: [],
};
const res = await getStudySpaces(studyLocation.lid, start, end);
for (const room of res.slots) {
if (rids.has(room.itemId)) {
continue;
const spaces = await getStudySpaces(studyLocation.lid, start, end);
for (const room of spaces.slots) {
if (!rids.has(room.itemId)) {
studyLocation.rooms.push(await getRoomInfo(room.itemId));
rids.add(room.itemId);
}
studyLocation.rooms.push(await getRoomInfo(room.itemId));
rids.add(room.itemId);
}
studyLocationsMap[`${studyLocation.id}`] = studyLocation;
studyLocationsMap[studyLocation.id] = studyLocation;
}
return studyLocationsMap;
}
getRoomInfo("44696");
getRoomInfo("116383");
getRoomInfo("117634");
getRoomInfo("120645");
getRoomInfo("51792");

0 comments on commit d721107

Please sign in to comment.