From 2bb342acc124c69ec4fe13ebc3be0bd5a5bf497c Mon Sep 17 00:00:00 2001 From: Mofe Adeyemi Date: Fri, 8 Mar 2024 17:20:05 -0500 Subject: [PATCH] Modify ciral's 2cr --- docs/2cr/ciral.html | 54 ++++++++++++++++++++--------------------- pyserini/2cr/ciral.py | 8 +++--- pyserini/2cr/ciral.yaml | 8 +++--- 3 files changed, 34 insertions(+), 36 deletions(-) diff --git a/docs/2cr/ciral.html b/docs/2cr/ciral.html index b1b709521..4a0c55806 100644 --- a/docs/2cr/ciral.html +++ b/docs/2cr/ciral.html @@ -200,8 +200,7 @@

CIRAL

--language ha \ --topics ciral-v1.0-ha-test-b-native \ --index ciral-v1.0-ha \ - --output run.ciral.bm25-qt.ha.test-b.txt \ - --batch 128 --threads 16 --bm25 --hits 1000 + --output run.ciral.bm25-qt.ha.test-b.txt --threads 16 --bm25 --hits 1000 Evaluation commands: @@ -522,10 +521,10 @@

CIRAL

- + -Afriberta, pre-FT w/ MS MARCO FT w/ latin Mr. TyDi +AfriBERTa, pre-FT w/ MS MARCO FT w/ latin Mr. TyDi 0.2028 0.1682 0.2166 @@ -564,9 +563,9 @@

CIRAL

python -m pyserini.search.faiss \
   --encoder-class auto \
-  --encoder castorini/afriberta-dpr-ptf-msmarco-ft-latin-mrtydi \
+  --encoder castorini/afriberta-dpr-pft-msmarco-ft-latin-mrtydi \
   --topics ciral-v1.0-ha-test-b \
-  --index ciral-v1.0-ha-afriberta-dpr-ptf-msmarco-ft-mrtydi \
+  --index ciral-v1.0-ha-afriberta-dpr-ptf-msmarco-ft-latin-mrtydi \
   --output run.ciral.afriberta-pft-msmarco-ft-mrtydi.ha.test-b.txt \
   --batch 128 --threads 16 --hits 1000
 
@@ -585,9 +584,9 @@

CIRAL

python -m pyserini.search.faiss \
   --encoder-class auto \
-  --encoder castorini/afriberta-dpr-ptf-msmarco-ft-latin-mrtydi \
+  --encoder castorini/afriberta-dpr-pft-msmarco-ft-latin-mrtydi \
   --topics ciral-v1.0-so-test-b \
-  --index ciral-v1.0-so-afriberta-dpr-ptf-msmarco-ft-mrtydi \
+  --index ciral-v1.0-so-afriberta-dpr-ptf-msmarco-ft-latin-mrtydi \
   --output run.ciral.afriberta-pft-msmarco-ft-mrtydi.so.test-b.txt \
   --batch 128 --threads 16 --hits 1000
 
@@ -606,9 +605,9 @@

CIRAL

python -m pyserini.search.faiss \
   --encoder-class auto \
-  --encoder castorini/afriberta-dpr-ptf-msmarco-ft-latin-mrtydi \
+  --encoder castorini/afriberta-dpr-pft-msmarco-ft-latin-mrtydi \
   --topics ciral-v1.0-sw-test-b \
-  --index ciral-v1.0-sw-afriberta-dpr-ptf-msmarco-ft-mrtydi \
+  --index ciral-v1.0-sw-afriberta-dpr-ptf-msmarco-ft-latin-mrtydi \
   --output run.ciral.afriberta-pft-msmarco-ft-mrtydi.sw.test-b.txt \
   --batch 128 --threads 16 --hits 1000
 
@@ -627,9 +626,9 @@

CIRAL

python -m pyserini.search.faiss \
   --encoder-class auto \
-  --encoder castorini/afriberta-dpr-ptf-msmarco-ft-latin-mrtydi \
+  --encoder castorini/afriberta-dpr-pft-msmarco-ft-latin-mrtydi \
   --topics ciral-v1.0-yo-test-b \
-  --index ciral-v1.0-yo-afriberta-dpr-ptf-msmarco-ft-mrtydi \
+  --index ciral-v1.0-yo-afriberta-dpr-ptf-msmarco-ft-latin-mrtydi \
   --output run.ciral.afriberta-pft-msmarco-ft-mrtydi.yo.test-b.txt \
   --batch 128 --threads 16 --hits 1000
 
@@ -648,10 +647,10 @@

CIRAL

- + -RRF Fusion of BM25 Machine DT and Afriberta-DPR +RRF Fusion of BM25 Machine DT and AfriBERTa-DPR 0.2935 0.2878 0.3187 @@ -826,8 +825,7 @@

CIRAL

--language ha \ --topics ciral-v1.0-ha-test-b-native \ --index ciral-v1.0-ha \ - --output run.ciral.bm25-qt.ha.test-b.txt \ - --batch 128 --threads 16 --bm25 --hits 1000 + --output run.ciral.bm25-qt.ha.test-b.txt --threads 16 --bm25 --hits 1000 Evaluation commands: @@ -1148,10 +1146,10 @@

CIRAL

- + -Afriberta, pre-FT w/ MS MARCO FT w/ latin Mr. TyDi +AfriBERTa, pre-FT w/ MS MARCO FT w/ latin Mr. TyDi 0.3900 0.3558 0.4608 @@ -1190,9 +1188,9 @@

CIRAL

python -m pyserini.search.faiss \
   --encoder-class auto \
-  --encoder castorini/afriberta-dpr-ptf-msmarco-ft-latin-mrtydi \
+  --encoder castorini/afriberta-dpr-pft-msmarco-ft-latin-mrtydi \
   --topics ciral-v1.0-ha-test-b \
-  --index ciral-v1.0-ha-afriberta-dpr-ptf-msmarco-ft-mrtydi \
+  --index ciral-v1.0-ha-afriberta-dpr-ptf-msmarco-ft-latin-mrtydi \
   --output run.ciral.afriberta-pft-msmarco-ft-mrtydi.ha.test-b.txt \
   --batch 128 --threads 16 --hits 1000
 
@@ -1211,9 +1209,9 @@

CIRAL

python -m pyserini.search.faiss \
   --encoder-class auto \
-  --encoder castorini/afriberta-dpr-ptf-msmarco-ft-latin-mrtydi \
+  --encoder castorini/afriberta-dpr-pft-msmarco-ft-latin-mrtydi \
   --topics ciral-v1.0-so-test-b \
-  --index ciral-v1.0-so-afriberta-dpr-ptf-msmarco-ft-mrtydi \
+  --index ciral-v1.0-so-afriberta-dpr-ptf-msmarco-ft-latin-mrtydi \
   --output run.ciral.afriberta-pft-msmarco-ft-mrtydi.so.test-b.txt \
   --batch 128 --threads 16 --hits 1000
 
@@ -1232,9 +1230,9 @@

CIRAL

python -m pyserini.search.faiss \
   --encoder-class auto \
-  --encoder castorini/afriberta-dpr-ptf-msmarco-ft-latin-mrtydi \
+  --encoder castorini/afriberta-dpr-pft-msmarco-ft-latin-mrtydi \
   --topics ciral-v1.0-sw-test-b \
-  --index ciral-v1.0-sw-afriberta-dpr-ptf-msmarco-ft-mrtydi \
+  --index ciral-v1.0-sw-afriberta-dpr-ptf-msmarco-ft-latin-mrtydi \
   --output run.ciral.afriberta-pft-msmarco-ft-mrtydi.sw.test-b.txt \
   --batch 128 --threads 16 --hits 1000
 
@@ -1253,9 +1251,9 @@

CIRAL

python -m pyserini.search.faiss \
   --encoder-class auto \
-  --encoder castorini/afriberta-dpr-ptf-msmarco-ft-latin-mrtydi \
+  --encoder castorini/afriberta-dpr-pft-msmarco-ft-latin-mrtydi \
   --topics ciral-v1.0-yo-test-b \
-  --index ciral-v1.0-yo-afriberta-dpr-ptf-msmarco-ft-mrtydi \
+  --index ciral-v1.0-yo-afriberta-dpr-ptf-msmarco-ft-latin-mrtydi \
   --output run.ciral.afriberta-pft-msmarco-ft-mrtydi.yo.test-b.txt \
   --batch 128 --threads 16 --hits 1000
 
@@ -1274,10 +1272,10 @@

CIRAL

- + -RRF Fusion of BM25 Machine DT and Afriberta-DPR +RRF Fusion of BM25 Machine DT and AfriBERTa-DPR 0.6007 0.5618 0.7007 diff --git a/pyserini/2cr/ciral.py b/pyserini/2cr/ciral.py index bc07a421d..1e760d58b 100644 --- a/pyserini/2cr/ciral.py +++ b/pyserini/2cr/ciral.py @@ -52,8 +52,8 @@ html_display['bm25-qt'] = 'BM25 Human QT' html_display['bm25-dt'] = 'BM25 Machine DT' html_display['mdpr-tied-pft-msmarco'] = 'mDPR (tied encoders), pre-FT w/ MS MARCO' -html_display['afriberta-pft-msmarco-ft-mrtydi'] = 'Afriberta, pre-FT w/ MS MARCO FT w/ latin Mr. TyDi' -html_display['bm25-dt-afriberta-dpr-fusion'] = 'RRF Fusion of BM25 Machine DT and Afriberta-DPR' +html_display['afriberta-pft-msmarco-ft-mrtydi'] = 'AfriBERTa, pre-FT w/ MS MARCO FT w/ latin Mr. TyDi' +html_display['bm25-dt-afriberta-dpr-fusion'] = 'RRF Fusion of BM25 Machine DT and AfriBERTa-DPR' models = list(html_display) @@ -100,10 +100,10 @@ def print_results(table, metric, split): print(f'Metric = {metric}, Split = {split}') print(' ' * 32, end='') for lang in languages: - print(f'{lang[1]:3} ', end='') + print(f' {lang[1]:4} ', end='') print('') for model in models: - print(f'{model:30}', end='') + print(f'{model:32}', end='') for lang in languages: key = f'{model}.{lang[0]}' print(f'{table[key][split][metric]:7.4f}', end=' ') diff --git a/pyserini/2cr/ciral.yaml b/pyserini/2cr/ciral.yaml index 1cdaa72a6..037dd7e96 100644 --- a/pyserini/2cr/ciral.yaml +++ b/pyserini/2cr/ciral.yaml @@ -201,7 +201,7 @@ conditions: # afriberta-pft-msmarco-ft-mrtydi-latin - name: afriberta-pft-msmarco-ft-mrtydi.ha eval_key: ciral-v1.0-ha - command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/afriberta-dpr-ptf-msmarco-ft-latin-mrtydi --topics ciral-v1.0-ha-${split} --index ciral-v1.0-ha-afriberta-dpr-ptf-msmarco-ft-mrtydi --output $output --batch 128 --threads 16 --hits 1000 + command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/afriberta-dpr-pft-msmarco-ft-latin-mrtydi --topics ciral-v1.0-ha-${split} --index ciral-v1.0-ha-afriberta-dpr-ptf-msmarco-ft-latin-mrtydi --output $output --batch 128 --threads 16 --hits 1000 splits: - split: test-a scores: @@ -217,7 +217,7 @@ conditions: R@100: 0.3900 - name: afriberta-pft-msmarco-ft-mrtydi.so eval_key: ciral-v1.0-so - command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/afriberta-dpr-ptf-msmarco-ft-latin-mrtydi --topics ciral-v1.0-so-${split} --index ciral-v1.0-so-afriberta-dpr-ptf-msmarco-ft-mrtydi --output $output --batch 128 --threads 16 --hits 1000 + command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/afriberta-dpr-pft-msmarco-ft-latin-mrtydi --topics ciral-v1.0-so-${split} --index ciral-v1.0-so-afriberta-dpr-ptf-msmarco-ft-latin-mrtydi --output $output --batch 128 --threads 16 --hits 1000 splits: - split: test-a scores: @@ -233,7 +233,7 @@ conditions: R@100: 0.3558 - name: afriberta-pft-msmarco-ft-mrtydi.sw eval_key: ciral-v1.0-sw - command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/afriberta-dpr-ptf-msmarco-ft-latin-mrtydi --topics ciral-v1.0-sw-${split} --index ciral-v1.0-sw-afriberta-dpr-ptf-msmarco-ft-mrtydi --output $output --batch 128 --threads 16 --hits 1000 + command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/afriberta-dpr-pft-msmarco-ft-latin-mrtydi --topics ciral-v1.0-sw-${split} --index ciral-v1.0-sw-afriberta-dpr-ptf-msmarco-ft-latin-mrtydi --output $output --batch 128 --threads 16 --hits 1000 splits: - split: test-a scores: @@ -249,7 +249,7 @@ conditions: R@100: 0.4608 - name: afriberta-pft-msmarco-ft-mrtydi.yo eval_key: ciral-v1.0-yo - command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/afriberta-dpr-ptf-msmarco-ft-latin-mrtydi --topics ciral-v1.0-yo-${split} --index ciral-v1.0-yo-afriberta-dpr-ptf-msmarco-ft-mrtydi --output $output --batch 128 --threads 16 --hits 1000 + command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/afriberta-dpr-pft-msmarco-ft-latin-mrtydi --topics ciral-v1.0-yo-${split} --index ciral-v1.0-yo-afriberta-dpr-ptf-msmarco-ft-latin-mrtydi --output $output --batch 128 --threads 16 --hits 1000 splits: - split: test-a scores: