Fix integration tests as described in #1629 (#1638)

castorini · Sep 20, 2023 · d4a829d · d4a829d
1 parent db1347c
commit d4a829d
Show file tree

Hide file tree

Showing 18 changed files with 258 additions and 199 deletions.
diff --git a/integrations/dense/test_ance.py b/integrations/dense/test_ance.py
@@ -16,8 +16,8 @@
 
 """Integration tests for ANCE and ANCE PRF using on-the-fly query encoding."""
 
+import multiprocessing
 import os
-import socket
 import unittest
 
 from integrations.utils import clean_files, run_command, parse_score, parse_score_qa, parse_score_msmarco
@@ -29,30 +29,16 @@ class TestAnce(unittest.TestCase):
     def setUp(self):
         self.temp_files = []
         self.threads = 16
-        self.batch_size = 256
+        self.batch_size = self.threads * 32
         self.rocchio_alpha = 0.4
         self.rocchio_beta = 0.6
 
-        # Hard-code larger values for internal servers
-        if socket.gethostname().startswith('damiano') or socket.gethostname().startswith('orca'):
-            self.threads = 36
-            self.batch_size = 144
-
-    def test_ance_encoded_queries(self):
-        encoded = QueryEncoder.load_encoded_queries('ance-msmarco-passage-dev-subset')
-        topics = get_topics('msmarco-passage-dev-subset')
-        for t in topics:
-            self.assertTrue(topics[t]['title'] in encoded.embedding)
-
-        encoded = QueryEncoder.load_encoded_queries('ance-dl19-passage')
-        topics = get_topics('dl19-passage')
-        for t in topics:
-            self.assertTrue(topics[t]['title'] in encoded.embedding)
-
-        encoded = QueryEncoder.load_encoded_queries('ance-dl20')
-        topics = get_topics('dl20')
-        for t in topics:
-            self.assertTrue(topics[t]['title'] in encoded.embedding)
+        half_cores = int(multiprocessing.cpu_count() / 2)
+        # If server supports more threads, then use more threads.
+        # As a heuristic, use up half up available CPU cores.
+        if half_cores > self.threads:
+            self.threads = half_cores
+            self.batch_size = half_cores * 32
 
     def test_msmarco_passage_ance_avg_prf_otf(self):
         output_file = 'test_run.dl2019.ance.avg-prf.otf.trec'

diff --git a/integrations/dense/test_distilbert_kd.py b/integrations/dense/test_distilbert_kd.py
diff --git a/integrations/dense/test_distilbert_tasb.py b/integrations/dense/test_distilbert_tasb.py
diff --git a/integrations/dense/test_dpr.py b/integrations/dense/test_dpr.py
@@ -17,8 +17,8 @@
 """Integration tests for DPR model using pre-encoded queries."""
 
 import json
+import multiprocessing
 import os
-import socket
 import unittest
 
 from integrations.utils import clean_files, run_command, parse_score_qa
@@ -30,12 +30,14 @@ class TestDpr(unittest.TestCase):
     def setUp(self):
         self.temp_files = []
         self.threads = 16
-        self.batch_size = 256
+        self.batch_size = self.threads * 32
 
-        # Hard-code larger values for internal servers
-        if socket.gethostname().startswith('damiano') or socket.gethostname().startswith('orca'):
-            self.threads = 36
-            self.batch_size = 144
+        half_cores = int(multiprocessing.cpu_count() / 2)
+        # If server supports more threads, then use more threads.
+        # As a heuristic, use up half up available CPU cores.
+        if half_cores > self.threads:
+            self.threads = half_cores
+            self.batch_size = half_cores * 32
 
     def test_dpr_nq_test_bf_otf(self):
         output_file = 'test_run.dpr.nq-test.multi.bf.otf.trec'

diff --git a/integrations/dense/test_sbert.py b/integrations/dense/test_sbert.py
@@ -16,25 +16,25 @@
 
 """Integration tests for ANCE model using on-the-fly query encoding."""
 
+import multiprocessing
 import os
-import socket
 import unittest
 
 from integrations.utils import clean_files, run_command, parse_score
-from pyserini.search import QueryEncoder
-from pyserini.search import get_topics
 
 
 class TestSBert(unittest.TestCase):
     def setUp(self):
         self.temp_files = []
         self.threads = 16
-        self.batch_size = 256
+        self.batch_size = self.threads * 32
 
-        # Hard-code larger values for internal servers
-        if socket.gethostname().startswith('damiano') or socket.gethostname().startswith('orca'):
-            self.threads = 36
-            self.batch_size = 144
+        half_cores = int(multiprocessing.cpu_count() / 2)
+        # If server supports more threads, then use more threads.
+        # As a heuristic, use up half up available CPU cores.
+        if half_cores > self.threads:
+            self.threads = half_cores
+            self.batch_size = half_cores * 32
 
     def test_msmarco_passage_sbert_bf_otf(self):
         output_file = 'test_run.msmarco-passage.sbert.bf.otf.tsv'
@@ -53,12 +53,6 @@ def test_msmarco_passage_sbert_bf_otf(self):
         self.assertEqual(status, 0)
         self.assertAlmostEqual(score, 0.3314, delta=0.0001)
 
-    def test_msmarco_passage_sbert_encoded_queries(self):
-        encoded = QueryEncoder.load_encoded_queries('sbert-msmarco-passage-dev-subset')
-        topics = get_topics('msmarco-passage-dev-subset')
-        for t in topics:
-            self.assertTrue(topics[t]['title'] in encoded.embedding)
-
     def tearDown(self):
         clean_files(self.temp_files)
 

diff --git a/integrations/dense/test_tct_colbert-v2.py b/integrations/dense/test_tct_colbert-v2.py
@@ -16,25 +16,25 @@
 
 """Integration tests for TCT-ColBERTv2 models using on-the-fly query encoding."""
 
+import multiprocessing
 import os
-import socket
 import unittest
 
 from integrations.utils import clean_files, run_command, parse_score
-from pyserini.search import QueryEncoder
-from pyserini.search import get_topics
 
 
 class TestTctColBertV2(unittest.TestCase):
     def setUp(self):
         self.temp_files = []
         self.threads = 16
-        self.batch_size = 256
+        self.batch_size = self.threads * 32
 
-        # Hard-code larger values for internal servers
-        if socket.gethostname().startswith('damiano') or socket.gethostname().startswith('orca'):
-            self.threads = 36
-            self.batch_size = 144
+        half_cores = int(multiprocessing.cpu_count() / 2)
+        # If server supports more threads, then use more threads.
+        # As a heuristic, use up half up available CPU cores.
+        if half_cores > self.threads:
+            self.threads = half_cores
+            self.batch_size = half_cores * 32
 
     def test_msmarco_passage_tct_colbert_v2_bf_otf(self):
         output_file = 'test_run.msmarco-passage.tct_colbert-v2.bf-otf.tsv'
@@ -106,24 +106,6 @@ def test_msmarco_passage_tct_colbert_v2_hnp_bf_d2q_hybrid_otf(self):
         self.assertEqual(status, 0)
         self.assertAlmostEqual(score, 0.3731, delta=0.0001)
 
-    def test_msmarco_passage_tct_colbert_v2_encoded_queries(self):
-        encoded = QueryEncoder.load_encoded_queries('tct_colbert-v2-msmarco-passage-dev-subset')
-        topics = get_topics('msmarco-passage-dev-subset')
-        for t in topics:
-            self.assertTrue(topics[t]['title'] in encoded.embedding)
-
-    def test_msmarco_passage_tct_colbert_v2_hn_encoded_queries(self):
-        encoded = QueryEncoder.load_encoded_queries('tct_colbert-v2-hn-msmarco-passage-dev-subset')
-        topics = get_topics('msmarco-passage-dev-subset')
-        for t in topics:
-            self.assertTrue(topics[t]['title'] in encoded.embedding)
-
-    def test_msmarco_passage_tct_colbert_v2_hnp_encoded_queries(self):
-        encoded = QueryEncoder.load_encoded_queries('tct_colbert-v2-hnp-msmarco-passage-dev-subset')
-        topics = get_topics('msmarco-passage-dev-subset')
-        for t in topics:
-            self.assertTrue(topics[t]['title'] in encoded.embedding)
-
     def tearDown(self):
         clean_files(self.temp_files)
 

diff --git a/integrations/dense/test_tct_colbert.py b/integrations/dense/test_tct_colbert.py
@@ -16,8 +16,8 @@
 
 """Integration tests for TCT-ColBERTv1 models using on-the-fly query encoding."""
 
+import multiprocessing
 import os
-import socket
 import unittest
 
 from integrations.utils import clean_files, run_command, parse_score
@@ -29,12 +29,14 @@ class TestTctColBert(unittest.TestCase):
     def setUp(self):
         self.temp_files = []
         self.threads = 16
-        self.batch_size = 256
+        self.batch_size = self.threads * 32
 
-        # Hard-code larger values for internal servers
-        if socket.gethostname().startswith('damiano') or socket.gethostname().startswith('orca'):
-            self.threads = 36
-            self.batch_size = 144
+        half_cores = int(multiprocessing.cpu_count() / 2)
+        # If server supports more threads, then use more threads.
+        # As a heuristic, use up half up available CPU cores.
+        if half_cores > self.threads:
+            self.threads = half_cores
+            self.batch_size = half_cores * 32
 
     def test_msmarco_passage_tct_colbert_bf_otf(self):
         output_file = 'test_run.msmarco-passage.tct_colbert.bf-otf.tsv'
@@ -169,12 +171,6 @@ def test_msmarco_doc_tct_colbert_bf_d2q_hybrid_otf(self):
         self.assertEqual(status, 0)
         self.assertAlmostEqual(score, 0.3784, places=4)
 
-    def test_msmarco_doc_tct_colbert_encoded_queries(self):
-        encoded = QueryEncoder.load_encoded_queries('tct_colbert-msmarco-doc-dev')
-        topics = get_topics('msmarco-doc-dev')
-        for t in topics:
-            self.assertTrue(topics[t]['title'] in encoded.embedding)
-
     def tearDown(self):
         clean_files(self.temp_files)
 

diff --git a/integrations/papers/test_ecir2023.py b/integrations/papers/test_ecir2023.py
@@ -16,6 +16,7 @@
 
 """Integration tests for commands in Pradeep et al. resource paper at ECIR 2023."""
 
+import multiprocessing
 import os
 import unittest
 
@@ -25,6 +26,15 @@
 class TestECIR2023(unittest.TestCase):
     def setUp(self):
         self.temp_files = []
+        self.threads = 16
+        self.batch_size = self.threads * 32
+
+        half_cores = int(multiprocessing.cpu_count() / 2)
+        # If server supports more threads, then use more threads.
+        # As a heuristic, use up half up available CPU cores.
+        if half_cores > self.threads:
+            self.threads = half_cores
+            self.batch_size = half_cores * 32
 
     def test_section5_sub2_first(self):
         """Sample code of the first command in Section 5.2."""
@@ -42,7 +52,7 @@ def test_section5_sub2_first(self):
                       --topics nq-test \
                       --encoder castorini/dkrr-dpr-nq-retriever \
                       --output {output_file} --query-prefix question: \
-                      --threads 72 --batch-size 72 \
+                      --threads {self.threads} --batch-size {self.batch_size} \
                       --hits 100'
         status = os.system(run_cmd)
         self.assertEqual(status, 0)

diff --git a/integrations/papers/test_sigir2022.py b/integrations/papers/test_sigir2022.py
@@ -16,6 +16,7 @@
 
 """Integration tests for commands in Ma et al. resource paper and Trotman et al. demo paper at SIGIR 2022."""
 
+import multiprocessing
 import os
 import unittest
 
@@ -25,6 +26,15 @@
 class TestSIGIR2022(unittest.TestCase):
     def setUp(self):
         self.temp_files = []
+        self.threads = 16
+        self.batch_size = self.threads * 8
+
+        half_cores = int(multiprocessing.cpu_count() / 2)
+        # If server supports more threads, then use more threads.
+        # As a heuristic, use up half up available CPU cores.
+        if half_cores > self.threads:
+            self.threads = half_cores
+            self.batch_size = half_cores * 8
 
     def test_Ma_etal_section4_1a(self):
         """Sample code in Section 4.1. in Ma et al. resource paper."""
@@ -57,7 +67,7 @@ def test_Ma_etal_section4_1b(self):
                       --topics msmarco-v2-passage-dev \
                       --encoder castorini/unicoil-msmarco-passage \
                       --output {output_file} \
-                      --batch 144 --threads 36 \
+                      --batch {self.batch_size} --threads {self.threads} \
                       --hits 1000 \
                       --impact'
         status = os.system(run_cmd)
@@ -78,7 +88,7 @@ def test_Trotman_etal(self):
                       --topics msmarco-passage-dev-subset-unicoil \
                       --output {output_file} \
                       --output-format msmarco \
-                      --batch 36 --threads 12 \
+                      --batch {self.batch_size} --threads {self.threads} \
                       --hits 1000 \
                       --impact'
         status = os.system(run_cmd)