-
Notifications
You must be signed in to change notification settings - Fork 79
/
test.py
149 lines (124 loc) · 5.49 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import unittest
import numpy
import scipy.spatial.distance
import libwmdrelax
import wmd
from numbers import Number
class Base(unittest.TestCase):
def setUp(self):
numpy.random.seed(777)
def _get_w1_w2_dist_0(self):
vecs = numpy.random.rand(4, 4)
dist = scipy.spatial.distance.squareform(
scipy.spatial.distance.pdist(vecs)).astype(numpy.float32)
w1 = numpy.ones(4, dtype=numpy.float32) / 4
w2 = numpy.ones(4, dtype=numpy.float32) / 4
return w1, w2, dist
def _get_w1_w2_dist(self):
vecs = numpy.random.rand(4, 4)
dist = scipy.spatial.distance.squareform(
scipy.spatial.distance.pdist(vecs)).astype(numpy.float32)
w1 = numpy.ones(4, dtype=numpy.float32) / 2
w1[0] = w1[1] = 0
w2 = numpy.ones(4, dtype=numpy.float32) / 2
w2[2] = w2[3] = 0
return w1, w2, dist
class RelaxedTests(Base):
def test_no_cache_0(self):
w1, w2, dist = self._get_w1_w2_dist_0()
r = libwmdrelax.emd_relaxed(w1, w2, dist)
self.assertAlmostEqual(r, 0)
def test_no_cache(self):
w1, w2, dist = self._get_w1_w2_dist()
r = libwmdrelax.emd_relaxed(w1, w2, dist)
self.assertAlmostEqual(r, 0.6125112)
def test_with_cache(self):
cache = libwmdrelax.emd_relaxed_cache_init(4)
w1, w2, dist = self._get_w1_w2_dist()
r = libwmdrelax.emd_relaxed(w1, w2, dist, cache)
self.assertAlmostEqual(r, 0.6125112)
r = libwmdrelax.emd_relaxed(w1, w2, dist, cache=cache)
self.assertAlmostEqual(r, 0.6125112)
libwmdrelax.emd_relaxed_cache_fini(cache)
class ExactTests(Base):
def test_no_cache_0(self):
w1, w2, dist = self._get_w1_w2_dist_0()
r = libwmdrelax.emd(w1, w2, dist)
self.assertAlmostEqual(r, 0)
def test_no_cache(self):
w1, w2, dist = self._get_w1_w2_dist()
r = libwmdrelax.emd(w1, w2, dist)
self.assertAlmostEqual(r, 0.6125115)
def test_with_cache(self):
cache = libwmdrelax.emd_cache_init(4)
w1, w2, dist = self._get_w1_w2_dist()
r = libwmdrelax.emd(w1, w2, dist, cache)
self.assertAlmostEqual(r, 0.6125115)
r = libwmdrelax.emd(w1, w2, dist, cache=cache)
self.assertAlmostEqual(r, 0.6125115)
libwmdrelax.emd_cache_fini(cache)
class TailVocabularyOptimizerTests(Base):
def ndarray_almost_equals(self, a, b, msg=None):
"""Compares two 1D numpy arrays approximately."""
if len(a) != len(b):
if msg is None:
msg = ("Length of arrays are not equal: {} and {}"
.format(len(a), len(b)))
raise self.failureException(msg)
for i, (x, y) in enumerate(zip(a, b)):
try:
self.assertAlmostEqual(x, y)
except AssertionError as err:
if msg is None:
msg = ("Arrays differ at index {}: {}" .format(i, err))
raise self.failureException(msg)
def setUp(self):
self.tvo = wmd.TailVocabularyOptimizer()
self.addTypeEqualityFunc(numpy.ndarray, self.ndarray_almost_equals)
def test_trigger_ratio_getter_type(self):
trigger_ratio = self.tvo.trigger_ratio
self.assertIsInstance(trigger_ratio, Number)
def test_trigger_ratio_constructor(self):
tvo = wmd.TailVocabularyOptimizer(0.123)
self.assertAlmostEqual(tvo.trigger_ratio, 0.123)
def test_trigger_ratio_setter(self):
self.tvo.trigger_ratio = 0.456
self.assertAlmostEqual(self.tvo.trigger_ratio, 0.456)
def test_trigger_ratio_too_low(self):
with self.assertRaises(Exception):
self.tvo.trigger_ratio = -0.5
def test_trigger_ratio_too_high(self):
with self.assertRaises(Exception):
self.tvo.trigger_ratio = 1.5
def test_call_below_trigger(self):
tvo = wmd.TailVocabularyOptimizer(0.5)
words = numpy.array([1, 2, 3], dtype=int)
weights = numpy.array([0.5, 0.2, 0.3], dtype=numpy.float32)
vocabulary_max = 10
ret_words, ret_weights = tvo(words, weights, vocabulary_max)
self.assertEqual(words, ret_words)
self.assertEqual(weights, ret_weights)
def test_call_too_many_words(self):
tvo = wmd.TailVocabularyOptimizer(0.5)
words = numpy.array([11, 22, 33, 44, 55, 66, 77], dtype=int)
weights = numpy.array([0.5, 0.1, 0.4, 0.8, 0.6, 0.2, 0.7], dtype=numpy.float32)
vocabulary_max = 2
ret_words, ret_weights = tvo(words, weights, vocabulary_max)
self.assertEqual(len(ret_words), vocabulary_max)
self.assertEqual(len(ret_weights), vocabulary_max)
sorter = numpy.argsort(ret_words)
self.assertEqual(ret_words[sorter], numpy.array([44, 77]))
self.assertEqual(ret_weights[sorter], numpy.array([0.8, 0.7]))
def test_call(self):
tvo = wmd.TailVocabularyOptimizer(0.5)
words = numpy.array([11, 22, 33, 44, 55, 66, 77], dtype=int)
weights = numpy.array([0.5, 0.1, 0.4, 0.8, 0.6, 0.2, 0.7], dtype=numpy.float32)
vocabulary_max = 6
ret_words, ret_weights = tvo(words, weights, vocabulary_max)
self.assertEqual(len(ret_words), len(ret_weights))
self.assertLessEqual(len(ret_words), vocabulary_max)
sorter = numpy.argsort(ret_words)
self.assertEqual(ret_words[sorter], numpy.array([11, 33, 44, 55, 77]))
self.assertEqual(ret_weights[sorter], numpy.array([0.5, 0.4, 0.8, 0.6, 0.7]))
if __name__ == "__main__":
unittest.main()