From 28fd0a7fb5f7a836e5028771f11b8e9d60ed2291 Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Wed, 15 May 2024 18:17:10 -0700 Subject: [PATCH] =?UTF-8?q?fix:=20=F0=9F=90=9B=20return=20parsed=20departm?= =?UTF-8?q?ents=20&=20terms?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../departments-terms-scraper/src/index.ts | 41 +++++++++++++++---- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/services/departments-terms-scraper/src/index.ts b/services/departments-terms-scraper/src/index.ts index 35bff93d..54a0a898 100644 --- a/services/departments-terms-scraper/src/index.ts +++ b/services/departments-terms-scraper/src/index.ts @@ -1,13 +1,19 @@ import { CheerioAPI, load } from "cheerio"; import fetch from "cross-fetch"; -async function fetchWebSoc() { - const response = await fetch("https://www.reg.uci.edu/perl/WebSoc"); - const body = await response.text(); - return load(body); +function formatDepartment(department: string) { + const match = department.match(/^(?.+?) \.(?:\.|\s)+(?.+)$/); + if (match?.groups) { + return { + code: match.groups.code, + name: match.groups.name, + }; + } + + throw new Error(`Failed to parse department: ${department}`); } -async function getDepartments(webSocContent: CheerioAPI): Promise { +async function getDepartments(webSocContent: CheerioAPI) { const $ = webSocContent; const departments: string[] = []; @@ -19,10 +25,23 @@ async function getDepartments(webSocContent: CheerioAPI): Promise { } }); - return departments; + return departments.map(formatDepartment); } -async function getTerms(webSocContent: CheerioAPI): Promise { +function formatTerm(term: string) { + const match = term.match(/^(?\d+) {2}(?.+)$/); + + if (match?.groups) { + return { + year: match.groups.year, + term: match.groups.term, + }; + } + + throw new Error(`Failed to parse term: ${term}`); +} + +async function getTerms(webSocContent: CheerioAPI) { const $ = webSocContent; const terms: string[] = []; @@ -31,7 +50,13 @@ async function getTerms(webSocContent: CheerioAPI): Promise { terms.push(termText); }); - return terms; + return terms.map(formatTerm); +} + +async function fetchWebSoc() { + const response = await fetch("https://www.reg.uci.edu/perl/WebSoc"); + const body = await response.text(); + return load(body); } async function getDepartmentsTerms() {