diff --git a/docs/RELEASE.md b/docs/RELEASE.md index 2e2d7b2..d3a8953 100644 --- a/docs/RELEASE.md +++ b/docs/RELEASE.md @@ -1,6 +1,17 @@ # Release Notes -## Update 2024.06.18:**V1.1.1** Released +# Update 2024.06.24: **V1.1.1.1** Released + +Major Changes: + +* Added a new parameter `static_resized_shape` when initializing `MathFormulaDetector`, which is used to resize the input image to a fixed size. Some formats of models require fixed-size input images during inference, such as `CoreML`. + +主要变更: + +* `MathFormulaDetector` 初始化时加入了参数 `static_resized_shape`, 用于把输入图片 resize 为固定大小。某些格式的模型在推理时需要固定大小的输入图片,如 `CoreML`。 + + +## Update 2024.06.18: **V1.1.1** Released Major changes: diff --git a/pix2text/__version__.py b/pix2text/__version__.py index a8810cb..97f1c6d 100644 --- a/pix2text/__version__.py +++ b/pix2text/__version__.py @@ -2,4 +2,4 @@ # [Pix2Text](https://github.com/breezedeus/pix2text): an Open-Source Alternative to Mathpix. # Copyright (C) 2022-2024, [Breezedeus](https://www.breezedeus.com). -__version__ = '1.1.1' +__version__ = '1.1.1.1' diff --git a/pix2text/formula_detector.py b/pix2text/formula_detector.py index 2da7c9e..96366a1 100644 --- a/pix2text/formula_detector.py +++ b/pix2text/formula_detector.py @@ -1,7 +1,7 @@ # coding: utf-8 # [Pix2Text](https://github.com/breezedeus/pix2text): an Open-Source Alternative to Mathpix. # Copyright (C) 2022-2024, [Breezedeus](https://www.breezedeus.com). -from typing import Optional, Union +from typing import Optional, Union, Tuple from pathlib import Path import logging @@ -23,15 +23,31 @@ class MathFormulaDetector(YoloDetector): def __init__( - self, - *, - model_name: str = 'mfd', - model_backend: str = 'onnx', - device: str = None, - model_path: Optional[Union[str, Path]] = None, - root: Union[str, Path] = data_dir(), - **kwargs, + self, + *, + model_name: str = 'mfd', + model_backend: str = 'onnx', + device: Optional[str] = None, + model_path: Optional[Union[str, Path]] = None, + root: Union[str, Path] = data_dir(), + static_resized_shape: Optional[Union[int, Tuple[int, int]]] = None, + **kwargs, ): + """ + Math Formula Detector based on YOLO. + + Args: + model_name (str): model name, default is 'mfd'. + model_backend (str): model backend, default is 'onnx'. + device (optional str): device to use, default is None. + model_path (optional str): model path, default is None. + root (optional str): root directory to save model files, default is data_dir(). + static_resized_shape (optional int or tuple): static resized shape, default is None. + When it is not None, the input image will be resized to this shape before detection, + ignoring the input parameter `resized_shape` if .detect() is called. + Some format of models may require a fixed input size, such as CoreML. + **kwargs (): other parameters. + """ if model_path is None: model_info = AVAILABLE_MODELS.get_info(model_name, model_backend) model_path = prepare_model_files(root, model_info) @@ -42,7 +58,12 @@ def __init__( model_path = cand_paths[0] logger.info(f'Use model path for MFD: {model_path}') - super().__init__(model_path=model_path, device=device) + super().__init__( + model_path=model_path, + device=device, + static_resized_shape=static_resized_shape, + **kwargs, + ) def find_files(directory, extension): diff --git a/requirements.in b/requirements.in index 11966c5..2d9c80f 100644 --- a/requirements.in +++ b/requirements.in @@ -9,7 +9,7 @@ torch torchvision pillow>=5.3.0 opencv-python -cnstd>=1.2.4.1 +cnstd>=1.2.4.2 cnocr[ort-cpu]>=2.3.0.2 transformers>=4.37.0 optimum[onnxruntime] diff --git a/requirements.txt b/requirements.txt index 1869784..922bb52 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,7 +36,7 @@ cnocr[ort-cpu]==2.3.0.2 # via # -r requirements.in # cnocr -cnstd==1.2.4.1 +cnstd==1.2.4.2 # via # -r requirements.in # cnocr diff --git a/setup.py b/setup.py index b0b469d..88f0805 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ "numpy", "opencv-python", "cnocr[ort-cpu]>=2.3.0.2", - "cnstd>=1.2.4.1", + "cnstd>=1.2.4.2", "pillow", "torch", "torchvision",