forked from kaieberl/paper2speech
-
Notifications
You must be signed in to change notification settings - Fork 1
/
replacements.py
124 lines (123 loc) · 4.17 KB
/
replacements.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
text_rules = [
("i\.e\.", 'that is'),
("e\.g\.", 'for example'),
("i\.i\.d\.", 'i i d'),
("Eq\.", 'Equation'),
("eq\.", 'equation'),
("Fig\.", 'Figure'),
("fig\.", 'figure'),
("Sec\.", 'Section'),
("sec\.", 'section'),
("Tab\.", 'Table'),
("tab\.", 'table'),
("vs\.", 'versus'),
("w\.r\.t\.", 'with respect to'),
("w\.r\.t", 'with respect to'),
("w\.l\.o\.g\.", 'without loss of generality'),
("tanh", 'tan <say as interpret-as="characters">h</say>'),
("NVIDIA", '<say as interpret-as="characters">N</say>vidia'),
(r"\((.*?)\)-th", r"\1-th"),
# remove numbers after sentences, e.g. ... of training.4
(r'(\w+)\.(\d+)$', r'\1'),
# add break after title number
(r'#+\s+(\d+(\.\d+)*)\s+', r'<s>\1</s>'),
(r' et al.', ' et al')
]
math_rules = [
(r'\\,', ''),
(r"\-", ' minus '),
(r"\+", ' plus '),
(r'\\left(', r'\('),
(r'\\right)', r'\)'),
(r'\\%', '%'),
(r'\\simeq', ' is approximately '),
# TODO: expectation
(r'\((\w+)\|([\w,]+)\)', r" of \1 given \2 "),
(r'(?<!^)\(([^)]+)\)', r" of \1 "),
(r"\\mathcal{N}\((.*?),(.*?)\)", r" normal distribution with mean \1 and variance \2 "),
(r"\\mathbb{N}\^{(.*?)}", r" N \1 "),
(r"\\mathbb{R}\^{(.*?)}", r" R \1 "),
(r"\\mathbb{C}\^{(.*?)}", r" C \1 "),
(r"\\mathbb{Z}\^{(.*?)}", r" Z \1 "),
(r"N\^{(.*?)}", r" N \1 "),
(r"R\^{(.*?)}", r" R \1 "),
(r"C\^{(.*?)}", r" C \1 "),
(r"\\int_{(.*?)}\^{(.*?)}", r" integral from \1 to \2 of "),
(r"\\int_(.*?)\^(.*?) ", r" integral from \1 to \2 of "),
(r"\\frac{d(.*?)}{d(.*?)}", r" d \1 over d \2 of "),
(r"\\dot{(.*?)}", r" \1 dot "),
(r"\\ddot{(.*?)}", r" \1 double dot "),
(r"\\partial_{(.*?)}", r" partial \1 "),
(r"\\frac{(.*?)}{(.*?)}", r" \1 over \2 "),
(r"\\mathcal{O}\((.*?)\)", r" order \1 "),
(r"\\mathbf{(.*?)}", r" \1 "),
(r"\\mathbb{(.*?)}", r" \1 "),
(r"\\mathcal{(.*?)}", r" \1 "),
(r"\\mathrm{(.*?)}", r" \1 "),
(r"\\bm{(.*?)}", r" \1 "),
(r"\\times", r" times "),
(r"\^{\*}", r" star "),
(r"_{\*}", r" star "),
(r"\^{2}", r" squared "),
(r"\^\{-(.*?)\}", r" to the minus \1 "),
(r"\^\{(.*?)\}", r" to the \1 "),
(r"\^", r" to the "),
(r"_\{(.*?)\}", r" sub \1 "),
(r"\\cdot", r" dot "),
(r"\\in", r" in "),
(r"\\hat{(.*?)}", r" \1 hat "),
(r"\\operatorname\*{arg\\,min}_{(.*?)}", r" arg min of \1 "),
(r"\\operatorname\*{arg\\,max}_{(.*?)}", r" arg max of \1 "),
(r"\\left\((.*?)\\right\)", r" \1 "),
(r"\\left\[(.*?)\\right\]", r" \1 "),
(r"\\left\{(.*?)\\right\}", r" \1 "),
(r"\\left\|(.*?)\\right\|", r" \1 "),
(r"\\sqrt{(.*?)}", r" square root of \1 "),
(r"\\sum_{(.*?)}\^{(.*?)}", r" sum from \1 to \2 of "),
(r"\\sum_(.*?)\^", r" sum from \1 to "),
(r"\\prod_{(.*?)}\^{(.*?)}", r" product from \1 to \2 of "),
(r"\\prod_(.*?)\^", r" product from \1 to "),
(r"\\mathrm{mod}", r" mod "),
(r"\\infty", r" infinity "),
(r"-\\infty", r" minus infinity "),
(r"\\pi", r" pi "),
(r"\\equiv", r" equivalent to "),
(r"\\approx", r" approximately "),
(r"\\neq", r" inequal "),
(r"\\leq", r" less or equal "),
(r"\\geq", r" greater equal "),
(r"\\ll", r" much less than "),
(r"\\gg", r" much greater than "),
(r"\\to", r" to "),
(r"\\sim", r" according to "),
(r"\\rightarrow", r" going to "),
(r"\\leftarrow", r" is set to "),
(r"\\mapsto", r" to "),
(r"\\infty", r" infinity "),
(r"\\alpha", r" alpha "),
(r"\\beta", r" beta "),
(r"\\gamma", r" gamma "),
(r"\\delta", r" delta "),
(r"\\epsilon", r" epsilon "),
(r"\\varepsilon", r" epsilon "),
(r"\\eta", r" eta "),
(r"\\theta", r" theta "),
(r"\\kappa", r" kappa "),
(r"\\lambda", r" lambda "),
(r"\\mu", r" mu "),
(r"\\nu", r" nu "),
(r"\\xi", r" xi "),
(r"\\rho", r" rho "),
(r"\\sigma", r" sigma "),
(r"\\tau", r" tau "),
(r"\\phi", r" phi "),
(r"\\varphi", r" phi "),
(r"\\chi", r" chi "),
(r"\\psi", r" psi "),
(r"\\omega", r" omega "),
(r"\\Gamma", r" Gamma "),
(r",", r" "),
(r"\{(.*?)\}", r"\1"),
(r" ", r" "),
# ... Add more as required
]