diff --git a/services/departments-terms-scraper/src/index.ts b/services/departments-terms-scraper/src/index.ts index 35bff93d..54a0a898 100644 --- a/services/departments-terms-scraper/src/index.ts +++ b/services/departments-terms-scraper/src/index.ts @@ -1,13 +1,19 @@ import { CheerioAPI, load } from "cheerio"; import fetch from "cross-fetch"; -async function fetchWebSoc() { - const response = await fetch("https://www.reg.uci.edu/perl/WebSoc"); - const body = await response.text(); - return load(body); +function formatDepartment(department: string) { + const match = department.match(/^(?.+?) \.(?:\.|\s)+(?.+)$/); + if (match?.groups) { + return { + code: match.groups.code, + name: match.groups.name, + }; + } + + throw new Error(`Failed to parse department: ${department}`); } -async function getDepartments(webSocContent: CheerioAPI): Promise { +async function getDepartments(webSocContent: CheerioAPI) { const $ = webSocContent; const departments: string[] = []; @@ -19,10 +25,23 @@ async function getDepartments(webSocContent: CheerioAPI): Promise { } }); - return departments; + return departments.map(formatDepartment); } -async function getTerms(webSocContent: CheerioAPI): Promise { +function formatTerm(term: string) { + const match = term.match(/^(?\d+) {2}(?.+)$/); + + if (match?.groups) { + return { + year: match.groups.year, + term: match.groups.term, + }; + } + + throw new Error(`Failed to parse term: ${term}`); +} + +async function getTerms(webSocContent: CheerioAPI) { const $ = webSocContent; const terms: string[] = []; @@ -31,7 +50,13 @@ async function getTerms(webSocContent: CheerioAPI): Promise { terms.push(termText); }); - return terms; + return terms.map(formatTerm); +} + +async function fetchWebSoc() { + const response = await fetch("https://www.reg.uci.edu/perl/WebSoc"); + const body = await response.text(); + return load(body); } async function getDepartmentsTerms() {