From 8102ef18f34e5e6bcec767483786e2c0a95e0892 Mon Sep 17 00:00:00 2001
From: Dani El-Ayyass <dayyass@yandex.ru>
Date: Mon, 18 Jul 2022 12:25:42 +0300
Subject: [PATCH 01/12] add github ci

---
 .github/workflows/codecov.yml | 37 +++++++++++++++++++++++++++++++++++
 .github/workflows/linter.yml  | 37 +++++++++++++++++++++++++++++++++++
 .github/workflows/tests.yml   | 30 ++++++++++++++++++++++++++++
 .pre-commit-config.yaml       | 16 +++++++--------
 4 files changed, 112 insertions(+), 8 deletions(-)
 create mode 100644 .github/workflows/codecov.yml
 create mode 100644 .github/workflows/linter.yml
 create mode 100644 .github/workflows/tests.yml

diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml
new file mode 100644
index 0000000..6a2b7a6
--- /dev/null
+++ b/.github/workflows/codecov.yml
@@ -0,0 +1,37 @@
+# This workflow will install Python dependencies and run codecov
+# https://github.com/codecov/codecov-action#example-workflowyml-with-codecov-action
+
+name: codecov
+
+on:
+  push:
+    branches: [main, develop]
+  pull_request:
+    branches: [main, develop]
+
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+    steps:
+    - uses: actions/checkout@master
+    - name: Set up Python
+      uses: actions/setup-python@master
+      with:
+        python-version: 3.7
+    - name: Install dependencies
+      run: |
+        pip install --upgrade pip
+        pip install -r requirements.txt
+        pip install pytest pytest-cov
+    - name: Generate coverage report
+      run: pytest --cov=./ --cov-report=xml
+    - name: Upload coverage to Codecov
+      uses: codecov/codecov-action@v1
+      with:
+        flags: unittests
+        env_vars: OS,PYTHON
+        fail_ci_if_error: true
+        verbose: true
diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml
new file mode 100644
index 0000000..3895dc9
--- /dev/null
+++ b/.github/workflows/linter.yml
@@ -0,0 +1,37 @@
+# This workflow will install Python dependencies and run linter
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+# TODO: update linters
+
+name: linter
+
+on:
+  push:
+    branches: [main, develop]
+  pull_request:
+    branches: [main, develop]
+
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.7
+    - name: Install dependencies
+      run: |
+        pip install --upgrade pip
+        pip install isort black flake8 mypy
+    - name: Code format check with isort
+      run: isort --check-only --profile black .
+    - name: Code format check with black
+      run: black --check .
+    - name: Code format check with flake8
+      run: flake8 --ignore E501,E203,W503 .
+    - name: Type check with mypy
+      run: mypy --ignore-missing-imports .
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 0000000..5b3f266
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,30 @@
+# This workflow will install Python dependencies and run tests with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: tests
+
+on:
+  push:
+    branches: [main, develop]
+  pull_request:
+    branches: [main, develop]
+
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: ['3.7', '3.8', '3.9', '3.10']
+        os: [ubuntu-latest, macOS-latest, add windows-latest]
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        pip install --upgrade pip
+        pip install -r requirements.txt
+    - name: Unittests
+      run: python -m unittest discover
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 2685945..f409b34 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,11 +29,11 @@ repos:
   rev: v0.961
   hooks:
     - id: mypy
-- repo: local
-  hooks:
-    - id: unittest
-      name: unittest
-      entry: venv/bin/python -m unittest discover
-      language: python
-      always_run: true
-      pass_filenames: false
+# - repo: local
+#   hooks:
+#     - id: unittest
+#       name: unittest
+#       entry: venv/bin/python -m unittest discover
+#       language: python
+#       always_run: true
+#       pass_filenames: false

From 4f5f4eb658d252b37c18e2b03987c329175bc4bc Mon Sep 17 00:00:00 2001
From: Dani El-Ayyass <dayyass@yandex.ru>
Date: Mon, 18 Jul 2022 12:26:25 +0300
Subject: [PATCH 02/12] fix imports

---
 qaner/__init__.py        |  2 +-
 qaner/dataset.py         |  3 ++-
 qaner/inference.py       |  9 +++++----
 qaner/inference_utils.py |  3 ++-
 qaner/metrics.py         |  5 +++--
 qaner/train.py           | 11 ++++++-----
 qaner/train_utils.py     |  7 ++++---
 7 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/qaner/__init__.py b/qaner/__init__.py
index 4640904..3dc1f76 100644
--- a/qaner/__init__.py
+++ b/qaner/__init__.py
@@ -1 +1 @@
-# TODO
+__version__ = "0.1.0"
diff --git a/qaner/dataset.py b/qaner/dataset.py
index 6e31194..9910dc0 100644
--- a/qaner/dataset.py
+++ b/qaner/dataset.py
@@ -2,9 +2,10 @@
 
 import torch
 import transformers
-from data_utils import Instance, Span
 from tqdm import tqdm
 
+from qaner.data_utils import Instance, Span
+
 
 # TODO: add documentation
 class Dataset(torch.utils.data.Dataset):
diff --git a/qaner/inference.py b/qaner/inference.py
index 4067ba2..e72bb98 100644
--- a/qaner/inference.py
+++ b/qaner/inference.py
@@ -2,11 +2,12 @@
 from typing import Any, Dict
 
 import torch
-from arg_parse import get_inference_args
-from data_utils import Instance
-from inference_utils import get_top_valid_spans
 from transformers import AutoModelForQuestionAnswering, AutoTokenizer
-from utils import set_global_seed
+
+from qaner.arg_parse import get_inference_args
+from qaner.data_utils import Instance
+from qaner.inference_utils import get_top_valid_spans
+from qaner.utils import set_global_seed
 
 
 # TODO: add batch inference
diff --git a/qaner/inference_utils.py b/qaner/inference_utils.py
index c19b2e3..a5ca560 100644
--- a/qaner/inference_utils.py
+++ b/qaner/inference_utils.py
@@ -3,7 +3,8 @@
 import numpy as np
 import torch
 import transformers
-from data_utils import Span
+
+from qaner.data_utils import Span
 
 
 def get_top_valid_spans(
diff --git a/qaner/metrics.py b/qaner/metrics.py
index 1b60a67..de63963 100644
--- a/qaner/metrics.py
+++ b/qaner/metrics.py
@@ -1,7 +1,8 @@
 from typing import Dict, List
 
 import numpy as np
-from data_utils import Span
+
+from qaner.data_utils import Span
 
 
 # TODO: add metrics over label types
@@ -33,7 +34,7 @@ def compute_metrics(
     confusion_matrix_pred_denominator = np.zeros(len(entity_mapper))
 
     for span_true, span_pred in zip(spans_true_batch, spans_pred_batch_top_1):
-        span_pred = span_pred[0]
+        span_pred = span_pred[0]  # type: ignore
 
         i = entity_mapper[span_true.label]
         j = entity_mapper[span_pred.label]  # type: ignore
diff --git a/qaner/train.py b/qaner/train.py
index 39b35c9..f0747bb 100644
--- a/qaner/train.py
+++ b/qaner/train.py
@@ -1,13 +1,14 @@
 import json
 
 import torch
-from arg_parse import get_train_args
-from data_utils import prepare_sentences_and_spans, read_bio_markup
-from dataset import Collator, Dataset
 from torch.utils.tensorboard import SummaryWriter
-from train_utils import train
 from transformers import AutoModelForQuestionAnswering, AutoTokenizer
-from utils import set_global_seed
+
+from qaner.arg_parse import get_train_args
+from qaner.data_utils import prepare_sentences_and_spans, read_bio_markup
+from qaner.dataset import Collator, Dataset
+from qaner.train_utils import train
+from qaner.utils import set_global_seed
 
 if __name__ == "__main__":
 
diff --git a/qaner/train_utils.py b/qaner/train_utils.py
index c1b1fe6..b20d202 100644
--- a/qaner/train_utils.py
+++ b/qaner/train_utils.py
@@ -2,13 +2,14 @@
 
 import numpy as np
 import torch
-from data_utils import Span
-from inference_utils import get_top_valid_spans
-from metrics import compute_metrics
 from torch.utils.tensorboard import SummaryWriter
 from tqdm import tqdm
 from transformers import AutoModelForQuestionAnswering
 
+from qaner.data_utils import Span
+from qaner.inference_utils import get_top_valid_spans
+from qaner.metrics import compute_metrics
+
 
 # TODO: add metrics calculation
 def train(

From 728414ecee8c83d3de6cb310cd73b03efe0a05aa Mon Sep 17 00:00:00 2001
From: Dani El-Ayyass <dayyass@yandex.ru>
Date: Mon, 18 Jul 2022 12:29:03 +0300
Subject: [PATCH 03/12] add badges

---
 README.md | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/README.md b/README.md
index 1c5dc13..861bcce 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,17 @@
+[![tests](https://github.com/dayyass/qaner/actions/workflows/tests.yml/badge.svg)](https://github.com/dayyass/qaner/actions/workflows/tests.yml)
+[![linter](https://github.com/dayyass/qaner/actions/workflows/linter.yml/badge.svg)](https://github.com/dayyass/qaner/actions/workflows/linter.yml)
+[![codecov](https://codecov.io/gh/dayyass/qaner/branch/main/graph/badge.svg?token=S3UKX8BFP3)](https://codecov.io/gh/dayyass/qaner)
+
+[![python 3.7](https://img.shields.io/badge/python-3.7-blue.svg)](https://github.com/dayyass/qaner#requirements)
+[![release (latest by date)](https://img.shields.io/github/v/release/dayyass/qaner)](https://github.com/dayyass/qaner/releases/latest)
+[![license](https://img.shields.io/github/license/dayyass/qaner?color=blue)](https://github.com/dayyass/qaner/blob/main/LICENSE)
+
+[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-black)](https://github.com/dayyass/qaner/blob/main/.pre-commit-config.yaml)
+[![code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+
+[![pypi version](https://img.shields.io/pypi/v/qaner)](https://pypi.org/project/qaner)
+[![pypi downloads](https://img.shields.io/pypi/dm/qaner)](https://pypi.org/project/qaner)
+
 # QaNER
 Unofficial implementation of [*QaNER: Prompting Question Answering Models for Few-shot Named Entity Recognition*](https://arxiv.org/abs/2203.01543).
 

From 1d4d8e97aa00cad9824f18f1486b6b18e676a0ef Mon Sep 17 00:00:00 2001
From: Dani El-Ayyass <dayyass@yandex.ru>
Date: Mon, 18 Jul 2022 12:29:34 +0300
Subject: [PATCH 04/12] add setup.py

---
 setup.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 setup.py

diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..7a885d6
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,27 @@
+from setuptools import setup
+
+from qaner import __version__
+
+with open("README.md", mode="r", encoding="utf-8") as fp:
+    long_description = fp.read()
+
+
+setup(
+    name="qaner",
+    version=__version__,
+    description="Unofficial implementation of QaNER: Prompting Question Answering Models for Few-shot Named Entity Recognition.",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    author="Dani El-Ayyass",
+    author_email="dayyass@yandex.ru",
+    license_files=["LICENSE"],
+    url="https://github.com/dayyass/QaNER",
+    packages=["qaner"],
+    install_requires=[
+        "numpy==1.21.6",
+        "tensorboard==2.9.0",
+        "torch==1.8.1",
+        "tqdm==4.64.0",
+        "transformers==4.19.2",
+    ],
+)

From eb59fca71f49f9069e6a5320ce35d976618b8ee0 Mon Sep 17 00:00:00 2001
From: Dani El-Ayyass <dayyass@yandex.ru>
Date: Mon, 18 Jul 2022 12:29:54 +0300
Subject: [PATCH 05/12] add setup.py

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 7a885d6..5606fb1 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@
     author="Dani El-Ayyass",
     author_email="dayyass@yandex.ru",
     license_files=["LICENSE"],
-    url="https://github.com/dayyass/QaNER",
+    url="https://github.com/dayyass/qaner",
     packages=["qaner"],
     install_requires=[
         "numpy==1.21.6",

From 70e81efc5230f56da5c00e77f8a49340638465bb Mon Sep 17 00:00:00 2001
From: Dani El-Ayyass <dayyass@yandex.ru>
Date: Mon, 18 Jul 2022 12:36:21 +0300
Subject: [PATCH 06/12] fix tests

---
 tests/test_dataset.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/tests/test_dataset.py b/tests/test_dataset.py
index 5327aee..03bd9ac 100644
--- a/tests/test_dataset.py
+++ b/tests/test_dataset.py
@@ -1,5 +1,4 @@
 import json
-import sys
 import unittest
 from typing import List
 
@@ -7,11 +6,9 @@
 from tqdm import tqdm
 from transformers import AutoTokenizer, BatchEncoding
 
-sys.path.append("qaner")  # TODO: fix it
-
-from data_utils import prepare_sentences_and_spans, read_bio_markup  # noqa: E402
-from dataset import Collator, Dataset, Instance, Span  # noqa: E402
-from utils import set_global_seed  # noqa: E402
+from qaner.data_utils import prepare_sentences_and_spans, read_bio_markup
+from qaner.dataset import Collator, Dataset, Instance, Span
+from qaner.utils import set_global_seed
 
 
 def validate_spans(

From b4c2f893871426a6167452b57522281ff7c47be1 Mon Sep 17 00:00:00 2001
From: Dani El-Ayyass <dayyass@yandex.ru>
Date: Mon, 18 Jul 2022 12:47:25 +0300
Subject: [PATCH 07/12] add console train and inference

---
 qaner/inference.py | 14 +++++++++++++-
 qaner/train.py     | 15 ++++++++++++++-
 setup.py           |  6 ++++++
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/qaner/inference.py b/qaner/inference.py
index e72bb98..8287d9f 100644
--- a/qaner/inference.py
+++ b/qaner/inference.py
@@ -72,7 +72,13 @@ def predict(
     return prediction
 
 
-if __name__ == "__main__":
+def main() -> int:
+    """
+    Main inference function.
+
+    Returns:
+        int: exit code.
+    """
 
     # argparse
     args = get_inference_args()
@@ -114,3 +120,9 @@ def predict(
     print(f"\nquestion: {prediction.question}\n")
     print(f"context: {prediction.context}")
     print(f"\nanswer: {prediction.answer}\n")
+
+    return 0
+
+
+if __name__ == "__main__":
+    main()
diff --git a/qaner/train.py b/qaner/train.py
index f0747bb..166d61b 100644
--- a/qaner/train.py
+++ b/qaner/train.py
@@ -10,7 +10,14 @@
 from qaner.train_utils import train
 from qaner.utils import set_global_seed
 
-if __name__ == "__main__":
+
+def main() -> int:
+    """
+    Main train function.
+
+    Returns:
+        int: exit code.
+    """
 
     # argparse
     args = get_train_args()
@@ -106,3 +113,9 @@
 
     model.save_pretrained(args.path_to_save_model)
     tokenizer.save_pretrained(args.path_to_save_model)
+
+    return 0
+
+
+if __name__ == "__main__":
+    main()
diff --git a/setup.py b/setup.py
index 5606fb1..9385cd8 100644
--- a/setup.py
+++ b/setup.py
@@ -17,6 +17,12 @@
     license_files=["LICENSE"],
     url="https://github.com/dayyass/qaner",
     packages=["qaner"],
+    entry_points={
+        "console_scripts": [
+            "qaner-train = qaner.train:main",
+            "qaner-inference = qaner.inference:main",
+        ],
+    },
     install_requires=[
         "numpy==1.21.6",
         "tensorboard==2.9.0",

From 224ad0a71064fc47182befd194990eca29d1d861 Mon Sep 17 00:00:00 2001
From: Dani El-Ayyass <dayyass@yandex.ru>
Date: Mon, 18 Jul 2022 12:53:41 +0300
Subject: [PATCH 08/12] minor changes

---
 .gitignore |  2 ++
 README.md  | 15 ++++++++++-----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index 88abad1..d1c4a7e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,3 +15,5 @@ venv
 
 runs
 dayyass
+
+qaner.egg-info
diff --git a/README.md b/README.md
index 861bcce..e51ee78 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,12 @@ Unofficial implementation of [*QaNER: Prompting Question Answering Models for Fe
 
 You can adopt this pipeline for arbitrary [BIO-markup](https://github.com/dayyass/QaNER/tree/main/data/conll2003) data.
 
-### CoNLL-2003
+## Installation
+```
+pip install qaner
+```
+
+## CoNLL-2003
 Pipeline results on CoNLL-2003 dataset:
 - [Metrics](https://tensorboard.dev/experiment/FEsbNJdmSd2LGVhga8Ku0Q/)
 - [Trained Hugging Face model](https://huggingface.co/dayyass/qaner-conll-bert-base-uncased)
@@ -26,7 +31,7 @@ Pipeline results on CoNLL-2003 dataset:
 ### Training
 Script for training QaNER model:
 ```
-python qaner/train.py \
+qaner-train \
 --bert_model_name 'bert-base-uncased' \
 --path_to_prompt_mapper 'data/conll2003/prompt_mapper.json' \
 --path_to_train_data 'data/conll2003/train.bio' \
@@ -56,7 +61,7 @@ Optional arguments:
 ### Infrerence
 Script for inference trained QaNER model:
 ```
-python qaner/inference.py \
+qaner-inference \
 --context 'EU rejects German call to boycott British lamb .' \
 --question 'What is the organization?' \
 --path_to_prompt_mapper 'data/conll2003/prompt_mapper.json' \
@@ -92,10 +97,10 @@ Possible inference questions for CoNLL-2003:
 - What is the organization? (ORG)
 - What is the miscellaneous entity? (MISC)
 
-### Requirements
+## Requirements
 Python >= 3.7
 
-### Citation
+## Citation
 ```bibtex
 @misc{liu2022qaner,
     title         = {QaNER: Prompting Question Answering Models for Few-shot Named Entity Recognition},

From 49b2aa6ead528277f22b63ce2b235515184a9027 Mon Sep 17 00:00:00 2001
From: Dani El-Ayyass <dayyass@yandex.ru>
Date: Mon, 18 Jul 2022 12:56:16 +0300
Subject: [PATCH 09/12] remove python 3.10

---
 .github/workflows/tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 5b3f266..1acf6be 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -14,7 +14,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: ['3.7', '3.8', '3.9', '3.10']
+        python-version: ['3.7', '3.8', '3.9']
         os: [ubuntu-latest, macOS-latest, add windows-latest]
     steps:
     - uses: actions/checkout@v2

From 84ece0e9160d792f452b2c31fdfa6f0e9ef52f9e Mon Sep 17 00:00:00 2001
From: Dani El-Ayyass <dayyass@yandex.ru>
Date: Mon, 18 Jul 2022 12:57:07 +0300
Subject: [PATCH 10/12] update release version

---
 qaner/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qaner/__init__.py b/qaner/__init__.py
index 3dc1f76..485f44a 100644
--- a/qaner/__init__.py
+++ b/qaner/__init__.py
@@ -1 +1 @@
-__version__ = "0.1.0"
+__version__ = "0.1.1"

From 94e38608d9eebcaa26fc689f75c7ec654dcb5cb4 Mon Sep 17 00:00:00 2001
From: Dani El-Ayyass <dayyass@yandex.ru>
Date: Mon, 18 Jul 2022 12:58:47 +0300
Subject: [PATCH 11/12] remove codecov badge

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e51ee78..3132e1a 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 [![tests](https://github.com/dayyass/qaner/actions/workflows/tests.yml/badge.svg)](https://github.com/dayyass/qaner/actions/workflows/tests.yml)
 [![linter](https://github.com/dayyass/qaner/actions/workflows/linter.yml/badge.svg)](https://github.com/dayyass/qaner/actions/workflows/linter.yml)
-[![codecov](https://codecov.io/gh/dayyass/qaner/branch/main/graph/badge.svg?token=S3UKX8BFP3)](https://codecov.io/gh/dayyass/qaner)
+<!-- [![codecov](https://codecov.io/gh/dayyass/qaner/branch/main/graph/badge.svg?token=S3UKX8BFP3)](https://codecov.io/gh/dayyass/qaner) -->
 
 [![python 3.7](https://img.shields.io/badge/python-3.7-blue.svg)](https://github.com/dayyass/qaner#requirements)
 [![release (latest by date)](https://img.shields.io/github/v/release/dayyass/qaner)](https://github.com/dayyass/qaner/releases/latest)

From 6ffa055b6ecb4be7a04a8e10269699c56d71870f Mon Sep 17 00:00:00 2001
From: Dani El-Ayyass <dayyass@yandex.ru>
Date: Mon, 18 Jul 2022 13:12:33 +0300
Subject: [PATCH 12/12] hotfix

---
 qaner/inference_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qaner/inference_utils.py b/qaner/inference_utils.py
index a5ca560..c6ff958 100644
--- a/qaner/inference_utils.py
+++ b/qaner/inference_utils.py
@@ -86,7 +86,7 @@ def get_top_valid_spans(
                 span = Span(
                     token=context[start_context_char_char:end_context_char_char],
                     label=inv_prompt_mapper[  # TODO: add inference exception
-                        question_list[i].lstrip("What is the ").rstrip("?")
+                        question_list[i].split(r"What is the ")[-1].rstrip(r"?")
                     ],
                     start_context_char_pos=start_context_char_char,
                     end_context_char_pos=end_context_char_char,