-
Notifications
You must be signed in to change notification settings - Fork 1
/
dupe_comments.ts
53 lines (47 loc) · 1.77 KB
/
dupe_comments.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
/* eslint-disable no-console */
import {Amud, ApiComment} from "./apiTypes";
import {books} from "./books";
import {cachedOutputFilePath} from "./cached_outputs";
import {ListMultimap} from "./multimap";
import {readUtf8} from "./files";
import {splitOnBookName} from "./refs";
import {flatten} from "./sefariaTextType";
function skipRef(ref: string): boolean {
if (ref.startsWith("Footnote")
|| ref.startsWith("Otzar Laazei Rashi")) {
return true;
}
const book = books.byCanonicalName[splitOnBookName(ref)[0]];
return book && book.isBibleBook();
}
for (const book of new Set(Object.values(books.byCanonicalName))) {
if (!book.isTalmud() && !book.isBibleBook()) continue;
if (book.canonicalName === "Shekalim") continue;
for (const section of book.sections) {
const filePath = cachedOutputFilePath(book, section);
const amud = JSON.parse(readUtf8(filePath)) as Amud;
const dupes = new ListMultimap<string, ApiComment>();
const visitHasCommentary = (hasCommentary: any) => {
if (!hasCommentary.commentary) return;
const comments: ApiComment[] = (
Object.values(hasCommentary.commentary).flatMap((c: any) => c.comments));
for (const comment of comments) {
if (skipRef(comment.ref)) continue;
const text = flatten(comment.he);
// TODO: check for dupes by removing all puncutation also
if (text && text !== "") {
dupes.put(text, comment);
}
visitHasCommentary(comment);
}
};
amud.sections.forEach(visitHasCommentary);
dupes.asMap().forEach((comments: ApiComment[], dupeText: string) => {
const refs = comments.map(c => c.ref);
const refSet = new Set(refs);
if (refSet.size > 1) {
console.log("MATCH", refSet, dupeText);
}
});
}
}