From 8de4f4409d5c8ef837b22bc560e9d3b622ee2202 Mon Sep 17 00:00:00 2001 From: breezedeus Date: Sat, 23 Sep 2023 17:27:41 +0800 Subject: [PATCH 1/7] automatically use available providers --- cnstd/ppocr/utility.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cnstd/ppocr/utility.py b/cnstd/ppocr/utility.py index f7e0ded..35bc212 100644 --- a/cnstd/ppocr/utility.py +++ b/cnstd/ppocr/utility.py @@ -154,7 +154,9 @@ def create_predictor(model_fp, mode, logger): model_file_path = model_fp if not os.path.exists(model_file_path): raise ValueError("not find model file path {}".format(model_file_path)) - sess = ort.InferenceSession(model_file_path, providers=['AzureExecutionProvider', 'CPUExecutionProvider']) + sess = ort.InferenceSession( + model_file_path, providers=ort.get_available_providers() + ) return sess, sess.get_inputs()[0], None, None From 98fab5d821ac562fa5e9249aa0232bdee41193c2 Mon Sep 17 00:00:00 2001 From: breezedeus Date: Mon, 9 Oct 2023 14:59:41 +0800 Subject: [PATCH 2/7] download models from different oss addresses, which is based on env vars --- cnstd/consts.py | 7 ++++ cnstd/detector.py | 4 +-- cnstd/ppocr/angle_classifier.py | 4 +-- cnstd/ppocr/pp_detector.py | 4 +-- cnstd/utils/utils.py | 62 +++++++++++++++++++++++++-------- 5 files changed, 60 insertions(+), 21 deletions(-) diff --git a/cnstd/consts.py b/cnstd/consts.py index 32f35bf..ac9ed12 100644 --- a/cnstd/consts.py +++ b/cnstd/consts.py @@ -17,6 +17,7 @@ # specific language governing permissions and limitations # under the License. +import os import logging from pathlib import Path from typing import Tuple, Set, Dict, Any, Optional, Union @@ -43,6 +44,8 @@ # 如: __version__ = '1.0.*',对应的 MODEL_VERSION 都是 '1.0' MODEL_VERSION = '.'.join(__version__.split('.', maxsplit=2)[:2]) VOCAB_FP = Path(__file__).parent.parent / 'label_cn.txt' +# Which OSS source will be used for downloading model files, 'CN' or 'HF' +DOWNLOAD_SOURCE = os.environ.get('CNSTD_DOWNLOAD_SOURCE', 'CN') MODEL_CONFIGS: Dict[str, Dict[str, Any]] = { 'db_resnet50': { @@ -113,6 +116,9 @@ HF_HUB_REPO_ID = "breezedeus/cnstd-cnocr-models" HF_HUB_SUBFOLDER = "models/cnstd/%s" % MODEL_VERSION +CN_OSS_ENDPOINT = ( + "https://sg-models.oss-cn-beijing.aliyuncs.com/cnstd/%s/" % MODEL_VERSION +) def format_hf_hub_url(url: str) -> dict: @@ -120,6 +126,7 @@ def format_hf_hub_url(url: str) -> dict: 'repo_id': HF_HUB_REPO_ID, 'subfolder': HF_HUB_SUBFOLDER, 'filename': url, + 'cn_oss': CN_OSS_ENDPOINT, } diff --git a/cnstd/detector.py b/cnstd/detector.py index 9a4ca43..35ac8fd 100644 --- a/cnstd/detector.py +++ b/cnstd/detector.py @@ -28,7 +28,7 @@ from PIL import Image import numpy as np -from .consts import MODEL_VERSION, AVAILABLE_MODELS +from .consts import MODEL_VERSION, AVAILABLE_MODELS, DOWNLOAD_SOURCE from .model import gen_model from .model.core import DetectionPredictor from .utils import ( @@ -144,7 +144,7 @@ def _assert_and_prepare_model_files(self, model_fp, root): % ((self._model_name, self._model_backend),) ) url = AVAILABLE_MODELS.get_url(self._model_name, self._model_backend) - get_model_file(url, self._model_dir) # download the .zip file and unzip + get_model_file(url, self._model_dir, download_source=DOWNLOAD_SOURCE) # download the .zip file and unzip fps = glob('%s/%s*.ckpt' % (self._model_dir, self._model_file_prefix)) self._model_fp = fps[0] diff --git a/cnstd/ppocr/angle_classifier.py b/cnstd/ppocr/angle_classifier.py index 62fe32e..f24627d 100755 --- a/cnstd/ppocr/angle_classifier.py +++ b/cnstd/ppocr/angle_classifier.py @@ -28,7 +28,7 @@ import cv2 import numpy as np -from ..consts import MODEL_VERSION, ANGLE_CLF_MODELS, ANGLE_CLF_SPACE +from ..consts import MODEL_VERSION, ANGLE_CLF_MODELS, ANGLE_CLF_SPACE, DOWNLOAD_SOURCE from ..utils import data_dir, get_model_file from .postprocess import build_post_process from .utility import ( @@ -89,7 +89,7 @@ def _assert_and_prepare_model_files(self, model_fp, root): ) url = ANGLE_CLF_MODELS[(self._model_name, self._model_backend)]['url'] - get_model_file(url, self._model_dir) # download the .zip file and unzip + get_model_file(url, self._model_dir, download_source=DOWNLOAD_SOURCE) # download the .zip file and unzip self._model_fp = model_fp logger.info('use model: %s' % self._model_fp) diff --git a/cnstd/ppocr/pp_detector.py b/cnstd/ppocr/pp_detector.py index b608d72..c7efc34 100755 --- a/cnstd/ppocr/pp_detector.py +++ b/cnstd/ppocr/pp_detector.py @@ -31,7 +31,7 @@ import numpy as np from .consts import PP_SPACE -from ..consts import MODEL_VERSION, AVAILABLE_MODELS +from ..consts import MODEL_VERSION, AVAILABLE_MODELS, DOWNLOAD_SOURCE from ..utils import data_dir, get_model_file, sort_boxes, get_resized_shape from .utility import ( get_image_file_list, @@ -129,7 +129,7 @@ def _assert_and_prepare_model_files(self, model_fp, root): ) url = AVAILABLE_MODELS.get_url(self._model_name, self._model_backend) - get_model_file(url, self._model_dir) # download the .zip file and unzip + get_model_file(url, self._model_dir, download_source=DOWNLOAD_SOURCE) # download the .zip file and unzip self._model_fp = model_fp logger.info('use model: %s' % self._model_fp) diff --git a/cnstd/utils/utils.py b/cnstd/utils/utils.py index 05fd9e5..78c20e1 100644 --- a/cnstd/utils/utils.py +++ b/cnstd/utils/utils.py @@ -132,7 +132,7 @@ def check_sha1(filename, sha1_hash): return sha1.hexdigest()[0:l] == sha1_hash[0:l] -def download(url, path=None, overwrite=False, sha1_hash=None): +def download(url, path=None, download_source='CN', overwrite=False, sha1_hash=None): """Download a given URL Parameters ---------- @@ -141,6 +141,7 @@ def download(url, path=None, overwrite=False, sha1_hash=None): path : str, optional Destination path to store downloaded file. By default, stores to the current directory with same name as in url. + download_source: which OSS source will be used, 'CN' or 'HF' overwrite : bool, optional Whether to overwrite destination file if already exists. sha1_hash : str, optional @@ -169,19 +170,49 @@ def download(url, path=None, overwrite=False, sha1_hash=None): if not os.path.exists(dirname): os.makedirs(dirname) - logger.info('Downloading %s from %s...' % (fname, url)) - HF_TOKEN = os.environ.get('HF_TOKEN') - with tempfile.TemporaryDirectory() as tmp_dir: - local_path = hf_hub_download( - repo_id=url["repo_id"], - subfolder=url["subfolder"], - filename=url["filename"], - repo_type="model", - cache_dir=tmp_dir, - token=HF_TOKEN, + if download_source == 'CN' and 'cn_oss' in url: + oss_url = url['cn_oss'] + url['filename'] + logger.info('Downloading %s from %s...' % (fname, oss_url)) + r = requests.get(oss_url, stream=True) + if r.status_code != 200: + raise RuntimeError("Failed downloading url %s" % oss_url) + total_length = r.headers.get('content-length') + with open(fname, 'wb') as f: + if total_length is None: # no content length header + for chunk in r.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + f.write(chunk) + else: + total_length = int(total_length) + for chunk in tqdm( + r.iter_content(chunk_size=1024), + total=int(total_length / 1024.0 + 0.5), + unit='KB', + unit_scale=False, + dynamic_ncols=True, + ): + f.write(chunk) + else: + HF_TOKEN = os.environ.get('HF_TOKEN') + logger.info('Downloading %s from HF Repo %s...' % (fname, url["repo_id"])) + with tempfile.TemporaryDirectory() as tmp_dir: + local_path = hf_hub_download( + repo_id=url["repo_id"], + subfolder=url["subfolder"], + filename=url["filename"], + repo_type="model", + cache_dir=tmp_dir, + token=HF_TOKEN, + ) + shutil.copy2(local_path, fname) + + if sha1_hash and not check_sha1(fname, sha1_hash): + raise UserWarning( + 'File {} is downloaded but the content hash does not match. ' + 'The repo may be outdated or download may be incomplete. ' + 'If the "repo_url" is overridden, consider switching to ' + 'the default repo.'.format(fname) ) - shutil.copy2(local_path, fname) - return fname @@ -189,7 +220,7 @@ class ModelDownloadingError(Exception): pass -def get_model_file(url, model_dir): +def get_model_file(url, model_dir, download_source='CN'): r"""Return location for the downloaded models on local file system. This function will download from online model zoo when model cannot be found or has mismatch. @@ -201,6 +232,7 @@ def get_model_file(url, model_dir): repo_id, subfolder, filename model_dir : str, default $CNSTD_HOME Location for keeping the model parameters. + download_source : which OSS source will be used, 'CN' or 'HF' Returns ------- @@ -214,7 +246,7 @@ def get_model_file(url, model_dir): zip_file_path = os.path.join(par_dir, url['filename']) if not os.path.exists(zip_file_path): try: - download(url, path=zip_file_path, overwrite=True) + download(url, path=zip_file_path, download_source=download_source, overwrite=True) except Exception as e: logger.error(e) message = f'Failed to download model: {url["filename"]}.' From 807c5892753b0d4385136b8e01647c1f891fd955 Mon Sep 17 00:00:00 2001 From: breezedeus Date: Mon, 9 Oct 2023 15:00:59 +0800 Subject: [PATCH 3/7] add params `model_categories` and `model_arch_yaml` --- cnstd/cli.py | 37 ++++++++++++++++++++--------- cnstd/yolov7/layout_analyzer.py | 41 ++++++++++++++++++++++----------- 2 files changed, 53 insertions(+), 25 deletions(-) diff --git a/cnstd/cli.py b/cnstd/cli.py index 88a71da..23707cd 100644 --- a/cnstd/cli.py +++ b/cnstd/cli.py @@ -197,9 +197,7 @@ def _vis_bool(img, fp): default=None, help='使用训练好的模型。默认为 `None`,表示使用系统自带的预训练模型', ) -@click.option( - "-r", "--rotated-bbox", is_flag=True, help="是否检测带角度(非水平和垂直)的文本框" -) +@click.option("-r", "--rotated-bbox", is_flag=True, help="是否检测带角度(非水平和垂直)的文本框") @click.option( "--resized-shape", type=str, @@ -323,7 +321,7 @@ def resave_model_file( @click.option( '-m', '--model-name', - type=click.Choice(['mfd', 'layout']), + type=str, default='mfd', help='模型类型。`mfd` 表示数学公式检测,`layout` 表示版面分析;默认为:`mfd`', ) @@ -341,6 +339,13 @@ def resave_model_file( default='pytorch', help='模型后端架构。当前仅支持 `pytorch`', ) +@click.option( + '-c', + '--model-categories', + type=str, + default=None, + help='模型的检测类别名称(","分割)。默认值:None,表示基于 `model_name` 自动决定', +) @click.option( '-p', '--model-fp', @@ -348,6 +353,7 @@ def resave_model_file( default=None, help='使用训练好的模型。默认为 `None`,表示使用系统自带的预训练模型', ) +@click.option('-y', '--model-arch-yaml', type=str, default=None, help='模型的配置文件路径') @click.option('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') @click.option( '-i', '--img-fp', type=str, default='./examples/mfd/zh.jpg', help='待分析的图片路径或图片目录' @@ -358,11 +364,11 @@ def resave_model_file( type=str, default=None, help='分析结果输出的图片路径。默认为 `None`,会存储在当前文件夹,文件名称为输入文件名称前面增加`out-`;' - '如输入文件名为 `img.jpg`, 输出文件名即为 `out-img.jpg`;' - '如果输入为目录,则此路径也应该是一个目录,会将输出文件存储在此目录下', + '如输入文件名为 `img.jpg`, 输出文件名即为 `out-img.jpg`;' + '如果输入为目录,则此路径也应该是一个目录,会将输出文件存储在此目录下', ) @click.option( - "--resized-shape", type=int, default=700, help='分析时把图片resize到此大小再进行。默认为 `700`', + "--resized-shape", type=int, default=608, help='分析时把图片resize到此大小再进行。默认为 `608`', ) @click.option( '--conf-thresh', type=float, default=0.25, help='Confidence Threshold。默认值为 `0.25`' @@ -374,7 +380,9 @@ def layout_analyze( model_name, model_type, model_backend, + model_categories, model_fp, + model_arch_yaml, device, img_fp, output_fp, @@ -383,11 +391,18 @@ def layout_analyze( iou_thresh, ): """对给定图片进行 MFD 或者 版面分析。""" + if not os.path.exists(img_fp): + raise FileNotFoundError(img_fp) + + if model_categories is not None: + model_categories = model_categories.split(',') analyzer = LayoutAnalyzer( model_name=model_name, model_type=model_type, model_backend=model_backend, + model_categories=model_categories, model_fp=model_fp, + model_arch_yaml=model_arch_yaml, device=device, ) @@ -400,11 +415,11 @@ def layout_analyze( elif os.path.isdir(img_fp): fn_list = glob.glob1(img_fp, '*g') input_fp_list = [os.path.join(img_fp, fn) for fn in fn_list] - assert output_fp is not None, 'output_fp should NOT be None when img_fp is a directory' + assert ( + output_fp is not None + ), 'output_fp should NOT be None when img_fp is a directory' os.makedirs(output_fp, exist_ok=True) - output_fp_list = [ - os.path.join(output_fp, 'analysis-' + fn) for fn in fn_list - ] + output_fp_list = [os.path.join(output_fp, 'analysis-' + fn) for fn in fn_list] for input_fp, output_fp in zip(input_fp_list, output_fp_list): out = analyzer.analyze( diff --git a/cnstd/yolov7/layout_analyzer.py b/cnstd/yolov7/layout_analyzer.py index f687252..5412533 100644 --- a/cnstd/yolov7/layout_analyzer.py +++ b/cnstd/yolov7/layout_analyzer.py @@ -30,7 +30,7 @@ from torch import nn from numpy import random -from ..consts import MODEL_VERSION, ANALYSIS_SPACE, ANALYSIS_MODELS +from ..consts import MODEL_VERSION, ANALYSIS_SPACE, ANALYSIS_MODELS, DOWNLOAD_SOURCE from ..utils import data_dir, get_model_file, sort_boxes from .yolo import Model from .consts import CATEGORY_DICT @@ -130,7 +130,9 @@ def __init__( *, model_type: str = 'yolov7_tiny', # 当前支持 [`yolov7_tiny`, `yolov7`]' model_backend: str = 'pytorch', + model_categories: Optional[List[str]] = None, model_fp: Optional[str] = None, + model_arch_yaml: Optional[str] = None, root: Union[str, Path] = data_dir(), device: str = 'cpu', **kwargs, @@ -141,14 +143,17 @@ def __init__( model_name (str): 模型类型。可选值:'mfd' 表示数学公式检测;'layout' 表示版面分析。默认值:'mfd' model_type (str): 模型类型。当前支持 'yolov7_tiny' 和 'yolov7'; 默认值: 'yolov7_tiny' model_backend (str): backend; 当前仅支持: 'pytorch'; 默认值: 'pytorch' - model_fp (str): model file path; default: `None`, means that the default file path will be used + model_categories (List[str]): 模型的检测类别名称。默认值:None,表示基于 `model_name` 自动决定 + model_fp (str): 模型文件路径;默认值为 None,表示使用默认文件路径。 + model_arch_yaml (str): 架构文件路径,例如 'yolov7-mfd.yaml';默认值为 None,表示将自动选择。 root (str or Path): 模型文件所在的根目录。 Linux/Mac下默认值为 `~/.cnstd`,表示模型文件所处文件夹类似 `~/.cnstd/1.2/analysis` Windows下默认值为 `C:/Users//AppData/Roaming/cnstd`。 device (str): 'cpu', or 'gpu'; default: 'cpu' **kwargs (): """ - assert model_name in CATEGORY_DICT.keys() + if model_name: + assert model_name in CATEGORY_DICT.keys() model_backend = model_backend.lower() assert model_backend in ('pytorch', 'onnx') self._model_name = model_name @@ -162,7 +167,19 @@ def __init__( self._assert_and_prepare_model_files(model_fp, root) logger.info('Use model: %s' % self._model_fp) - self.categories = CATEGORY_DICT[self._model_name] + if model_categories is not None: + self.categories = model_categories + else: + self.categories = CATEGORY_DICT[self._model_name] + + if model_arch_yaml is not None: + self._arch_yaml = model_arch_yaml + else: + VALID_MODELS = ANALYSIS_MODELS[self._model_name] + self._arch_yaml = VALID_MODELS[(self._model_type, self._model_backend)][ + 'arch_yaml' + ] + self.model = attempt_load( self.categories, self._model_fp, @@ -175,8 +192,11 @@ def __init__( # self.img_size = check_img_size(image_size, s=self.stride) # check img_size def _assert_and_prepare_model_files(self, model_fp, root): - if model_fp is not None and not os.path.isfile(model_fp): - raise FileNotFoundError('can not find model file %s' % model_fp) + if model_fp is not None: + if not os.path.isfile(model_fp): + raise FileNotFoundError('can not find model file %s' % model_fp) + self._model_fp = model_fp + return VALID_MODELS = ANALYSIS_MODELS[self._model_name] if (self._model_type, self._model_backend) not in VALID_MODELS: @@ -185,13 +205,6 @@ def _assert_and_prepare_model_files(self, model_fp, root): % ((self._model_type, self._model_backend),) ) - self._arch_yaml = VALID_MODELS[(self._model_type, self._model_backend)][ - 'arch_yaml' - ] - if model_fp is not None: - self._model_fp = model_fp - return - self._model_dir = os.path.join(root, MODEL_VERSION, ANALYSIS_SPACE) suffix = 'pt' if self._model_backend == 'pytorch' else 'onnx' model_fp = os.path.join( @@ -201,7 +214,7 @@ def _assert_and_prepare_model_files(self, model_fp, root): logger.warning('Can NOT find model file %s' % model_fp) url = VALID_MODELS[(self._model_type, self._model_backend)]['url'] - get_model_file(url, self._model_dir) # download the .zip file and unzip + get_model_file(url, self._model_dir, download_source=DOWNLOAD_SOURCE) # download the .zip file and unzip self._model_fp = model_fp From eb17c8980d59b68d6672e35633440ddb0bd0c1cb Mon Sep 17 00:00:00 2001 From: breezedeus Date: Mon, 9 Oct 2023 15:13:51 +0800 Subject: [PATCH 4/7] update docs --- README.md | 17 +++++++++++++---- RELEASE.md | 7 +++++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 3e75a3c..2e5e663 100644 --- a/README.md +++ b/README.md @@ -358,13 +358,15 @@ class LayoutAnalyzer(object): self, model_name: str = 'mfd', # 'layout' or 'mfd' *, - model_type: str = 'yolov7_tiny', + model_type: str = 'yolov7_tiny', # 当前支持 [`yolov7_tiny`, `yolov7`]' model_backend: str = 'pytorch', + model_categories: Optional[List[str]] = None, model_fp: Optional[str] = None, + model_arch_yaml: Optional[str] = None, root: Union[str, Path] = data_dir(), device: str = 'cpu', **kwargs, - ): + ) ``` 其中的参数含义如下: @@ -375,8 +377,12 @@ class LayoutAnalyzer(object): - `model_backend`: 字符串类型,表示backend。当前仅支持: 'pytorch';默认值:'pytorch' +- `model_categories`: 模型的检测类别名称。默认值:None,表示基于 `model_name` 自动决定 + - `model_fp`: 字符串类型,表示模型文件的路径。默认值:`None`,表示使用默认的文件路径 +- `model_arch_yaml`: 架构文件路径,例如 'yolov7-mfd.yaml';默认值为 None,表示将自动选择。 + - `root`: 字符串或`Path`类型,表示模型文件所在的根目录。 - Linux/Mac下默认值为 `~/.cnstd`,表示模型文件所处文件夹类似 `~/.cnstd/1.2/analysis` - Windows下默认值为 `C:/Users//AppData/Roaming/cnstd`。 @@ -502,18 +508,21 @@ Usage: cnstd analyze [OPTIONS] 对给定图片进行 MFD 或者 版面分析。 Options: - -m, --model-name [mfd|layout] 模型类型。`mfd` 表示数学公式检测,`layout` + -m, --model-name TEXT 模型类型。`mfd` 表示数学公式检测,`layout` 表示版面分析;默认为:`mfd` -t, --model-type TEXT 模型类型。当前支持 [`yolov7_tiny`, `yolov7`] -b, --model-backend [pytorch|onnx] 模型后端架构。当前仅支持 `pytorch` + -c, --model-categories TEXT 模型的检测类别名称(","分割)。默认值:None,表示基于 `model_name` + 自动决定 -p, --model-fp TEXT 使用训练好的模型。默认为 `None`,表示使用系统自带的预训练模型 + -y, --model-arch-yaml TEXT 模型的配置文件路径 --device TEXT cuda device, i.e. 0 or 0,1,2,3 or cpu -i, --img-fp TEXT 待分析的图片路径或图片目录 -o, --output-fp TEXT 分析结果输出的图片路径。默认为 `None`,会存储在当前文件夹,文件名称为输入文件名称 前面增加`out-`;如输入文件名为 `img.jpg`, 输出文件名即为 `out- img.jpg`;如果输入为目录,则此路径也应该是一个目录,会将输出文件存储在此目录下 - --resized-shape INTEGER 分析时把图片resize到此大小再进行。默认为 `700` + --resized-shape INTEGER 分析时把图片resize到此大小再进行。默认为 `608` --conf-thresh FLOAT Confidence Threshold。默认值为 `0.25` --iou-thresh FLOAT IOU threshold for NMS。默认值为 `0.45` -h, --help Show this message and exit. diff --git a/RELEASE.md b/RELEASE.md index 8f88ff9..471cbc0 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,5 +1,12 @@ # Release Notes +# Update 2023.10.09:发布 V1.2.3.5 + +主要变更: + +* 支持基于环境变量 `CNSTD_DOWNLOAD_SOURCE` 的取值,来决定不同的模型下载路径。 +* `LayoutAnalyzer` 中增加了参数 `model_categories` and `model_arch_yaml`,用于指定模型的类别名称列表和模型架构。 + # Update 2023.09.23:发布 V1.2.3.4 主要变更: From bfb5b2717b55e708907cb011c7842dc023a45220 Mon Sep 17 00:00:00 2001 From: breezedeus Date: Mon, 9 Oct 2023 15:14:01 +0800 Subject: [PATCH 5/7] bump version --- Makefile | 2 +- cnstd/__version__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 0e15451..3d0d0e9 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ package: rm -rf build python setup.py sdist bdist_wheel -VERSION = 1.2.3.4 +VERSION = 1.2.3.5 upload: python -m twine upload dist/cnstd-$(VERSION)* --verbose diff --git a/cnstd/__version__.py b/cnstd/__version__.py index 1f4a245..e8075dc 100644 --- a/cnstd/__version__.py +++ b/cnstd/__version__.py @@ -17,4 +17,4 @@ # specific language governing permissions and limitations # under the License. -__version__ = '1.2.3.4' +__version__ = '1.2.3.5' From 7d3350b568fd0d38858d3195cf7df58199bca286 Mon Sep 17 00:00:00 2001 From: breezedeus Date: Mon, 9 Oct 2023 15:24:58 +0800 Subject: [PATCH 6/7] update docs --- README.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 2e5e663..bca56e0 100644 --- a/README.md +++ b/README.md @@ -14,15 +14,19 @@ # CnSTD -# Update 2023.06.30:发布 V1.2.3 +# Update 2023.10.09:发布 V1.2.3.5 主要变更: -* 修复了模型文件自动下载的功能。HuggingFace似乎对下载文件的逻辑做了调整,导致之前版本的自动下载失败,当前版本已修复。但由于HuggingFace国内被墙,国内下载仍需 **梯子(VPN)**。 -* 更新了各个依赖包的版本号。 -# Update 2023.06.20: +* 支持基于环境变量 `CNSTD_DOWNLOAD_SOURCE` 的取值,来决定不同的模型下载路径,默认使用国内OSS地址。 +* `LayoutAnalyzer` 中增加了参数 `model_categories` and `model_arch_yaml`,用于指定模型的类别名称列表和模型架构。 + +... + +# Update 2023.06.30:发布 V1.2.3 主要变更: + * 基于新标注的数据,重新训练了 **MFD YoloV7** 模型,目前新模型已部署到 [P2T网页版](https://p2t.behye.com) 。具体说明见:[Pix2Text (P2T) 新版公式检测模型 | Breezedeus.com](https://www.breezedeus.com/article/p2t-mfd-20230613) 。 * 之前的 MFD YoloV7 模型已开放给星球会员下载,具体说明见:[P2T YoloV7 数学公式检测模型开放给星球会员下载 | Breezedeus.com](https://www.breezedeus.com/article/p2t-yolov7-for-zsxq-20230619) 。 * 增加了一些Label Studio相关的脚本,见 [scripts](scripts) 。如:利用 CnSTD 自带的 MFD 模型对目录中的图片进行公式检测后生成可导入到Label Studio中的JSON文件;以及,Label Studio标注后把导出的JSON文件转换成训练 MFD 模型所需的数据格式。注意,MFD 模型的训练代码在 [yolov7](https://github.com/breezedeus/yolov7) (`dev` branch)中。 From 31d4cc076c29bb78c3befe9689d498f71252b755 Mon Sep 17 00:00:00 2001 From: breezedeus Date: Mon, 9 Oct 2023 15:26:34 +0800 Subject: [PATCH 7/7] update docs --- README.md | 2 +- RELEASE.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index bca56e0..57fc4ae 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ 主要变更: * 支持基于环境变量 `CNSTD_DOWNLOAD_SOURCE` 的取值,来决定不同的模型下载路径,默认使用国内OSS地址。 -* `LayoutAnalyzer` 中增加了参数 `model_categories` and `model_arch_yaml`,用于指定模型的类别名称列表和模型架构。 +* `LayoutAnalyzer` 中增加了参数 `model_categories` 和 `model_arch_yaml`,用于指定模型的类别名称列表和模型架构。 ... diff --git a/RELEASE.md b/RELEASE.md index 471cbc0..a776415 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -5,7 +5,7 @@ 主要变更: * 支持基于环境变量 `CNSTD_DOWNLOAD_SOURCE` 的取值,来决定不同的模型下载路径。 -* `LayoutAnalyzer` 中增加了参数 `model_categories` and `model_arch_yaml`,用于指定模型的类别名称列表和模型架构。 +* `LayoutAnalyzer` 中增加了参数 `model_categories` 和 `model_arch_yaml`,用于指定模型的类别名称列表和模型架构。 # Update 2023.09.23:发布 V1.2.3.4