Merge pull request #128 from breezedeus/dev

Fix: some formats of models require fixed-size input images
breezedeus · Jun 24, 2024 · c4271c7 · c4271c7
2 parents f23532b + 94d7c52
commit c4271c7
Show file tree

Hide file tree

Showing 6 changed files with 47 additions and 15 deletions.
diff --git a/docs/RELEASE.md b/docs/RELEASE.md
@@ -1,6 +1,17 @@
 # Release Notes
 
-## Update 2024.06.18：**V1.1.1** Released
+# Update 2024.06.24: **V1.1.1.1** Released
+
+Major Changes:
+
+* Added a new parameter `static_resized_shape` when initializing `MathFormulaDetector`, which is used to resize the input image to a fixed size. Some formats of models require fixed-size input images during inference, such as `CoreML`.
+
+主要变更：
+
+* `MathFormulaDetector` 初始化时加入了参数 `static_resized_shape`, 用于把输入图片 resize 为固定大小。某些格式的模型在推理时需要固定大小的输入图片，如 `CoreML`。
+
+
+## Update 2024.06.18: **V1.1.1** Released
 
 Major changes:
 

diff --git a/pix2text/__version__.py b/pix2text/__version__.py
@@ -2,4 +2,4 @@
 # [Pix2Text](https://github.com/breezedeus/pix2text): an Open-Source Alternative to Mathpix.
 # Copyright (C) 2022-2024, [Breezedeus](https://www.breezedeus.com).
 
-__version__ = '1.1.1'
+__version__ = '1.1.1.1'
diff --git a/pix2text/formula_detector.py b/pix2text/formula_detector.py
@@ -1,7 +1,7 @@
 # coding: utf-8
 # [Pix2Text](https://github.com/breezedeus/pix2text): an Open-Source Alternative to Mathpix.
 # Copyright (C) 2022-2024, [Breezedeus](https://www.breezedeus.com).
-from typing import Optional, Union
+from typing import Optional, Union, Tuple
 from pathlib import Path
 import logging
 
@@ -23,15 +23,31 @@
 
 class MathFormulaDetector(YoloDetector):
     def __init__(
-            self,
-            *,
-            model_name: str = 'mfd',
-            model_backend: str = 'onnx',
-            device: str = None,
-            model_path: Optional[Union[str, Path]] = None,
-            root: Union[str, Path] = data_dir(),
-            **kwargs,
+        self,
+        *,
+        model_name: str = 'mfd',
+        model_backend: str = 'onnx',
+        device: Optional[str] = None,
+        model_path: Optional[Union[str, Path]] = None,
+        root: Union[str, Path] = data_dir(),
+        static_resized_shape: Optional[Union[int, Tuple[int, int]]] = None,
+        **kwargs,
     ):
+        """
+        Math Formula Detector based on YOLO.
+
+        Args:
+            model_name (str): model name, default is 'mfd'.
+            model_backend (str): model backend, default is 'onnx'.
+            device (optional str): device to use, default is None.
+            model_path (optional str): model path, default is None.
+            root (optional str): root directory to save model files, default is data_dir().
+            static_resized_shape (optional int or tuple): static resized shape, default is None.
+                When it is not None, the input image will be resized to this shape before detection,
+                ignoring the input parameter `resized_shape` if .detect() is called.
+                Some format of models may require a fixed input size, such as CoreML.
+            **kwargs (): other parameters.
+        """
         if model_path is None:
             model_info = AVAILABLE_MODELS.get_info(model_name, model_backend)
             model_path = prepare_model_files(root, model_info)
@@ -42,7 +58,12 @@ def __init__(
             model_path = cand_paths[0]
         logger.info(f'Use model path for MFD: {model_path}')
 
-        super().__init__(model_path=model_path, device=device)
+        super().__init__(
+            model_path=model_path,
+            device=device,
+            static_resized_shape=static_resized_shape,
+            **kwargs,
+        )
 
 
 def find_files(directory, extension):

diff --git a/requirements.in b/requirements.in
@@ -9,7 +9,7 @@ torch
 torchvision
 pillow>=5.3.0
 opencv-python
-cnstd>=1.2.4.1
+cnstd>=1.2.4.2
 cnocr[ort-cpu]>=2.3.0.2
 transformers>=4.37.0
 optimum[onnxruntime]

diff --git a/requirements.txt b/requirements.txt
@@ -36,7 +36,7 @@ cnocr[ort-cpu]==2.3.0.2
     # via
     #   -r requirements.in
     #   cnocr
-cnstd==1.2.4.1
+cnstd==1.2.4.2
     # via
     #   -r requirements.in
     #   cnocr

diff --git a/setup.py b/setup.py
@@ -27,7 +27,7 @@
     "numpy",
     "opencv-python",
     "cnocr[ort-cpu]>=2.3.0.2",
-    "cnstd>=1.2.4.1",
+    "cnstd>=1.2.4.2",
     "pillow",
     "torch",
     "torchvision",