Skip to content

Commit

Permalink
Merge pull request #128 from breezedeus/dev
Browse files Browse the repository at this point in the history
Fix: some formats of models require fixed-size input images
  • Loading branch information
breezedeus authored Jun 24, 2024
2 parents f23532b + 94d7c52 commit c4271c7
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 15 deletions.
13 changes: 12 additions & 1 deletion docs/RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
# Release Notes

## Update 2024.06.18:**V1.1.1** Released
# Update 2024.06.24: **V1.1.1.1** Released

Major Changes:

* Added a new parameter `static_resized_shape` when initializing `MathFormulaDetector`, which is used to resize the input image to a fixed size. Some formats of models require fixed-size input images during inference, such as `CoreML`.

主要变更:

* `MathFormulaDetector` 初始化时加入了参数 `static_resized_shape`, 用于把输入图片 resize 为固定大小。某些格式的模型在推理时需要固定大小的输入图片,如 `CoreML`


## Update 2024.06.18: **V1.1.1** Released

Major changes:

Expand Down
2 changes: 1 addition & 1 deletion pix2text/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
# [Pix2Text](https://github.com/breezedeus/pix2text): an Open-Source Alternative to Mathpix.
# Copyright (C) 2022-2024, [Breezedeus](https://www.breezedeus.com).

__version__ = '1.1.1'
__version__ = '1.1.1.1'
41 changes: 31 additions & 10 deletions pix2text/formula_detector.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# coding: utf-8
# [Pix2Text](https://github.com/breezedeus/pix2text): an Open-Source Alternative to Mathpix.
# Copyright (C) 2022-2024, [Breezedeus](https://www.breezedeus.com).
from typing import Optional, Union
from typing import Optional, Union, Tuple
from pathlib import Path
import logging

Expand All @@ -23,15 +23,31 @@

class MathFormulaDetector(YoloDetector):
def __init__(
self,
*,
model_name: str = 'mfd',
model_backend: str = 'onnx',
device: str = None,
model_path: Optional[Union[str, Path]] = None,
root: Union[str, Path] = data_dir(),
**kwargs,
self,
*,
model_name: str = 'mfd',
model_backend: str = 'onnx',
device: Optional[str] = None,
model_path: Optional[Union[str, Path]] = None,
root: Union[str, Path] = data_dir(),
static_resized_shape: Optional[Union[int, Tuple[int, int]]] = None,
**kwargs,
):
"""
Math Formula Detector based on YOLO.
Args:
model_name (str): model name, default is 'mfd'.
model_backend (str): model backend, default is 'onnx'.
device (optional str): device to use, default is None.
model_path (optional str): model path, default is None.
root (optional str): root directory to save model files, default is data_dir().
static_resized_shape (optional int or tuple): static resized shape, default is None.
When it is not None, the input image will be resized to this shape before detection,
ignoring the input parameter `resized_shape` if .detect() is called.
Some format of models may require a fixed input size, such as CoreML.
**kwargs (): other parameters.
"""
if model_path is None:
model_info = AVAILABLE_MODELS.get_info(model_name, model_backend)
model_path = prepare_model_files(root, model_info)
Expand All @@ -42,7 +58,12 @@ def __init__(
model_path = cand_paths[0]
logger.info(f'Use model path for MFD: {model_path}')

super().__init__(model_path=model_path, device=device)
super().__init__(
model_path=model_path,
device=device,
static_resized_shape=static_resized_shape,
**kwargs,
)


def find_files(directory, extension):
Expand Down
2 changes: 1 addition & 1 deletion requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ torch
torchvision
pillow>=5.3.0
opencv-python
cnstd>=1.2.4.1
cnstd>=1.2.4.2
cnocr[ort-cpu]>=2.3.0.2
transformers>=4.37.0
optimum[onnxruntime]
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ cnocr[ort-cpu]==2.3.0.2
# via
# -r requirements.in
# cnocr
cnstd==1.2.4.1
cnstd==1.2.4.2
# via
# -r requirements.in
# cnocr
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
"numpy",
"opencv-python",
"cnocr[ort-cpu]>=2.3.0.2",
"cnstd>=1.2.4.1",
"cnstd>=1.2.4.2",
"pillow",
"torch",
"torchvision",
Expand Down

0 comments on commit c4271c7

Please sign in to comment.