diff --git a/ppdet/engine/callbacks.py b/ppdet/engine/callbacks.py index 55455fd208..fa3aba7511 100644 --- a/ppdet/engine/callbacks.py +++ b/ppdet/engine/callbacks.py @@ -26,7 +26,7 @@ import paddle import paddle.distributed as dist -from ppdet.utils.checkpoint import save_model, save_semi_model +from ppdet.utils.checkpoint import save_model, save_semi_model, save_model_info, update_train_results from ppdet.metrics import get_infer_results from ppdet.utils.logger import setup_logger @@ -178,11 +178,12 @@ def __init__(self, model): super(Checkpointer, self).__init__(model) self.best_ap = -1000. self.save_dir = self.model.cfg.save_dir + self.uniform_output_enabled = self.model.cfg.get("uniform_output_enabled", False) if hasattr(self.model.model, 'student_model'): self.weight = self.model.model.student_model else: self.weight = self.model.model - + def on_epoch_end(self, status): # Checkpointer only performed during training mode = status['mode'] @@ -226,8 +227,15 @@ def on_epoch_end(self, status): 'metric': abs(epoch_ap), 'epoch': epoch_id + 1 } - save_path = os.path.join(self.save_dir, f"{save_name}.pdstates") + save_path = os.path.join(os.path.join(self.save_dir, save_name) if self.uniform_output_enabled else self.save_dir, f"{save_name}.pdstates") paddle.save(epoch_metric, save_path) + if self.uniform_output_enabled: + save_model_info(epoch_metric, self.save_dir, save_name) + if epoch_id + 1 == self.model.cfg.epoch: + done_flag = True + else: + done_flag = False + update_train_results(self.model.cfg, save_name, epoch_metric, done_flag=done_flag, ema=self.model.use_ema) if 'save_best_model' in status and status['save_best_model']: if epoch_ap >= self.best_ap: self.best_ap = epoch_ap @@ -237,8 +245,11 @@ def on_epoch_end(self, status): 'metric': abs(self.best_ap), 'epoch': epoch_id + 1 } - save_path = os.path.join(self.save_dir, "best_model.pdstates") + save_path = os.path.join(os.path.join(self.save_dir, save_name) if self.uniform_output_enabled else self.save_dir, "best_model.pdstates") paddle.save(best_metric, save_path) + if self.uniform_output_enabled: + save_model_info(best_metric, self.save_dir, save_name) + update_train_results(self.model.cfg, save_name, best_metric, done_flag=done_flag, ema=self.model.use_ema) logger.info("Best test {} {} is {:0.3f}.".format( key, eval_func, abs(self.best_ap))) if weight: @@ -250,10 +261,12 @@ def on_epoch_end(self, status): save_model( status['weight'], self.model.optimizer, - self.save_dir, + os.path.join(self.save_dir, save_name) if self.uniform_output_enabled else self.save_dir, save_name, epoch_id + 1, ema_model=weight) + if self.uniform_output_enabled: + self.model.export(output_dir=os.path.join(self.save_dir, save_name, "inference"), for_fd=True) else: # save model(student model) and ema_model(teacher model) # in DenseTeacher SSOD, the teacher model will be higher, @@ -270,8 +283,10 @@ def on_epoch_end(self, status): del teacher_model del student_model else: - save_model(weight, self.model.optimizer, self.save_dir, + save_model(weight, self.model.optimizer, os.path.join(self.save_dir, save_name) if self.uniform_output_enabled else self.save_dir, save_name, epoch_id + 1) + if self.uniform_output_enabled: + self.model.export(output_dir=os.path.join(self.save_dir, save_name, "inference"), for_fd=True) class WiferFaceEval(Callback): diff --git a/ppdet/engine/export_utils.py b/ppdet/engine/export_utils.py index e98e492263..c2e5fbcc7a 100644 --- a/ppdet/engine/export_utils.py +++ b/ppdet/engine/export_utils.py @@ -22,6 +22,7 @@ import paddle from ppdet.data.source.category import get_categories +from ppdet.core.workspace import load_config from ppdet.utils.logger import setup_logger logger = setup_logger('ppdet.engine') @@ -286,6 +287,19 @@ def _dump_infer_config(config, path, image_shape, model): 'metric': config['metric'], 'use_dynamic_shape': use_dynamic_shape }) + hpi_config_path = config.get("hpi_config_path", None) + if hpi_config_path: + hpi_config = load_config(hpi_config_path) + dynamic_shapes = image_shape[-1] + if hpi_config["Hpi"]["backend_config"].get("paddle_tensorrt", None): + hpi_config["Hpi"]["backend_config"]["paddle_tensorrt"][ + "dynamic_shapes"]["image"] = [[1, 3, dynamic_shapes, dynamic_shapes] for i in range(3)] + if hpi_config["Hpi"]["backend_config"].get("tensorrt", None): + hpi_config["Hpi"]["backend_config"]["tensorrt"]["dynamic_shapes"][ + "image"] = [[1, 3, dynamic_shapes, dynamic_shapes] for i in range(3)] + infer_cfg["Hpi"] = hpi_config["Hpi"] + infer_cfg["Global"] = {} + infer_cfg["Global"]["model_name"] = config["pdx_model_name"] export_onnx = config.get('export_onnx', False) export_eb = config.get('export_eb', False) diff --git a/ppdet/engine/trainer.py b/ppdet/engine/trainer.py index 6e0bec0b6d..98f2707179 100644 --- a/ppdet/engine/trainer.py +++ b/ppdet/engine/trainer.py @@ -20,6 +20,7 @@ import sys import copy import time +import yaml from tqdm import tqdm import numpy as np @@ -36,7 +37,7 @@ from ppdet.optimizer import ModelEMA from ppdet.core.workspace import create -from ppdet.utils.checkpoint import load_weight, load_pretrain_weight +from ppdet.utils.checkpoint import load_weight, load_pretrain_weight, convert_to_dict from ppdet.utils.visualizer import visualize_results, save_result from ppdet.metrics import get_infer_results, KeyPointTopDownCOCOEval, KeyPointTopDownCOCOWholeBadyHandEval, KeyPointTopDownMPIIEval, Pose3DEval from ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, RBoxMetric, JDEDetMetric, SNIPERCOCOMetric, CULaneMetric @@ -75,13 +76,23 @@ def __init__(self, cfg, mode='train'): self.custom_white_list = self.cfg.get('custom_white_list', None) self.custom_black_list = self.cfg.get('custom_black_list', None) self.use_master_grad = self.cfg.get('master_grad', False) - if 'slim' in cfg and cfg['slim_type'] == 'PTQ': + self.uniform_output_enabled = self.cfg.get('uniform_output_enabled', False) + if ('slim' in cfg and cfg['slim_type'] == 'PTQ') or self.uniform_output_enabled: self.cfg['TestDataset'] = create('TestDataset')() log_ranks = cfg.get('log_ranks', '0') if isinstance(log_ranks, str): self.log_ranks = [int(i) for i in log_ranks.split(',')] elif isinstance(log_ranks, int): self.log_ranks = [log_ranks] + train_results_path = os.path.abspath(os.path.join(self.cfg.save_dir, "train_results.json")) + if self.uniform_output_enabled: + if os.path.exists(train_results_path): + os.remove(train_results_path) + # if not os.path.exists(self.cfg.save_dir): + # os.mkdir(self.cfg.save_dir) + # with open(os.path.join(self.cfg.save_dir, "config.yaml"), "w") as f: + # config_dict = convert_to_dict(self.cfg) + # yaml.dump(config_dict, f) # build data loader capital_mode = self.mode.capitalize() @@ -1212,8 +1223,10 @@ def _get_infer_cfg_and_input_spec(self, "img_name": str, }) if prune_input: + model = ExportModel(self.model) + model.eval() static_model = paddle.jit.to_static( - self.model, input_spec=input_spec, full_graph=True) + model, input_spec=input_spec, full_graph=True) # NOTE: dy2st do not pruned program, but jit.save will prune program # input spec, prune input spec here and save with pruned input spec pruned_input_spec = _prune_input_spec( @@ -1490,3 +1503,12 @@ def reset_norm_param_attr(self, layer, **kwargs): setattr(layer, name, new_sublayer) return layer + +class ExportModel(nn.Layer): + def __init__(self, model): + super().__init__() + self.base_model = model + + def forward(self, x): + x = self.base_model(x) + return x \ No newline at end of file diff --git a/ppdet/utils/checkpoint.py b/ppdet/utils/checkpoint.py index 8672c988d6..04d41f1cb0 100644 --- a/ppdet/utils/checkpoint.py +++ b/ppdet/utils/checkpoint.py @@ -18,6 +18,7 @@ from __future__ import unicode_literals import os +import json import numpy as np import paddle import paddle.nn as nn @@ -26,6 +27,13 @@ from .logger import setup_logger logger = setup_logger(__name__) +def convert_to_dict(obj): + if isinstance(obj, dict): + return {k: convert_to_dict(v) for k, v in obj.items()} + elif isinstance(obj, list): + return [convert_to_dict(i) for i in obj] + else: + return obj def is_url(path): """ @@ -375,3 +383,66 @@ def save_semi_model(teacher_model, student_model, optimizer, save_dir, state_dict['last_iter'] = last_iter paddle.save(state_dict, save_path + str(last_epoch) + "epoch.pdopt") logger.info("Save checkpoint: {}".format(save_dir)) + +def save_model_info(model_info, save_path, prefix): + """ + save model info to the target path + """ + save_path = os.path.join(save_path, prefix) + if not os.path.exists(save_path): + os.makedirs(save_path) + with open(os.path.join(save_path, f'{prefix}.info.json'), 'w') as f: + json.dump(model_info, f) + logger.info("Already save model info in {}".format(save_path)) + +def update_train_results(config, + prefix, + metric_info, + done_flag=False, + k=5, + ema=False): + assert k >= 1 + train_results_path = os.path.join(config["save_dir"], + "train_results.json") + save_model_tag = ["pdparams", "pdopt", "pdstates"] + save_inference_tag = [ + "inference_config", "pdmodel", "pdiparams", "pdiparams.info" + ] + if ema: + save_model_tag.append("pdema") + if os.path.exists(train_results_path): + with open(train_results_path, "r") as fp: + train_results = json.load(fp) + else: + train_results = {} + train_results["model_name"] = config["pdx_model_name"] + train_results["label_dict"] = "" + train_results["train_log"] = "train.log" + train_results["config"] = "config.yaml" + train_results["models"] = {} + for i in range(1, k + 1): + train_results["models"][f"last_{i}"] = {} + train_results["models"]["best"] = {} + train_results["done_flag"] = done_flag + if prefix == "best_model": + train_results["models"]["best"]["score"] = metric_info["metric"] + for tag in save_model_tag: + train_results["models"]["best"][tag] = os.path.join( + prefix, f"{prefix}.{tag}") + for tag in save_inference_tag: + train_results["models"]["best"][tag] = os.path.join( + prefix, "inference", f"inference.{tag}" if tag != "inference_config" else "inference.yml") + else: + for i in range(k - 1, 0, -1): + train_results["models"][f"last_{i + 1}"] = train_results["models"][ + f"last_{i}"].copy() + train_results["models"][f"last_{1}"]["score"] = metric_info["metric"] + for tag in save_model_tag: + train_results["models"][f"last_{1}"][tag] = os.path.join( + prefix, f"{prefix}.{tag}") + for tag in save_inference_tag: + train_results["models"][f"last_{1}"][tag] = os.path.join( + prefix, "inference", f"inference.{tag}" if tag != "inference_config" else "inference.yml") + + with open(train_results_path, "w") as fp: + json.dump(train_results, fp) \ No newline at end of file