Skip to content

Commit

Permalink
support export after save model
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangyubo0722 committed Sep 20, 2024
1 parent 1e20933 commit 38fe21b
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 9 deletions.
27 changes: 21 additions & 6 deletions ppdet/engine/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import paddle
import paddle.distributed as dist

from ppdet.utils.checkpoint import save_model, save_semi_model
from ppdet.utils.checkpoint import save_model, save_semi_model, save_model_info, update_train_results
from ppdet.metrics import get_infer_results

from ppdet.utils.logger import setup_logger
Expand Down Expand Up @@ -178,11 +178,12 @@ def __init__(self, model):
super(Checkpointer, self).__init__(model)
self.best_ap = -1000.
self.save_dir = self.model.cfg.save_dir
self.uniform_output_enabled = self.model.cfg.get("uniform_output_enabled", False)
if hasattr(self.model.model, 'student_model'):
self.weight = self.model.model.student_model
else:
self.weight = self.model.model

def on_epoch_end(self, status):
# Checkpointer only performed during training
mode = status['mode']
Expand Down Expand Up @@ -226,8 +227,15 @@ def on_epoch_end(self, status):
'metric': abs(epoch_ap),
'epoch': epoch_id + 1
}
save_path = os.path.join(self.save_dir, f"{save_name}.pdstates")
save_path = os.path.join(os.path.join(self.save_dir, save_name) if self.uniform_output_enabled else self.save_dir, f"{save_name}.pdstates")
paddle.save(epoch_metric, save_path)
if self.uniform_output_enabled:
save_model_info(epoch_metric, self.save_dir, save_name)
if epoch_id + 1 == self.model.cfg.epoch:
done_flag = True
else:
done_flag = False
update_train_results(self.model.cfg, save_name, epoch_metric, done_flag=done_flag, ema=self.model.use_ema)
if 'save_best_model' in status and status['save_best_model']:
if epoch_ap >= self.best_ap:
self.best_ap = epoch_ap
Expand All @@ -237,8 +245,11 @@ def on_epoch_end(self, status):
'metric': abs(self.best_ap),
'epoch': epoch_id + 1
}
save_path = os.path.join(self.save_dir, "best_model.pdstates")
save_path = os.path.join(os.path.join(self.save_dir, save_name) if self.uniform_output_enabled else self.save_dir, "best_model.pdstates")
paddle.save(best_metric, save_path)
if self.uniform_output_enabled:
save_model_info(best_metric, self.save_dir, save_name)
update_train_results(self.model.cfg, save_name, best_metric, done_flag=done_flag, ema=self.model.use_ema)
logger.info("Best test {} {} is {:0.3f}.".format(
key, eval_func, abs(self.best_ap)))
if weight:
Expand All @@ -250,10 +261,12 @@ def on_epoch_end(self, status):
save_model(
status['weight'],
self.model.optimizer,
self.save_dir,
os.path.join(self.save_dir, save_name) if self.uniform_output_enabled else self.save_dir,
save_name,
epoch_id + 1,
ema_model=weight)
if self.uniform_output_enabled:
self.model.export(output_dir=os.path.join(self.save_dir, save_name, "inference"), for_fd=True)
else:
# save model(student model) and ema_model(teacher model)
# in DenseTeacher SSOD, the teacher model will be higher,
Expand All @@ -270,8 +283,10 @@ def on_epoch_end(self, status):
del teacher_model
del student_model
else:
save_model(weight, self.model.optimizer, self.save_dir,
save_model(weight, self.model.optimizer, os.path.join(self.save_dir, save_name) if self.uniform_output_enabled else self.save_dir,
save_name, epoch_id + 1)
if self.uniform_output_enabled:
self.model.export(output_dir=os.path.join(self.save_dir, save_name, "inference"), for_fd=True)


class WiferFaceEval(Callback):
Expand Down
14 changes: 14 additions & 0 deletions ppdet/engine/export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import paddle
from ppdet.data.source.category import get_categories
from ppdet.core.workspace import load_config

from ppdet.utils.logger import setup_logger
logger = setup_logger('ppdet.engine')
Expand Down Expand Up @@ -286,6 +287,19 @@ def _dump_infer_config(config, path, image_shape, model):
'metric': config['metric'],
'use_dynamic_shape': use_dynamic_shape
})
hpi_config_path = config.get("hpi_config_path", None)
if hpi_config_path:
hpi_config = load_config(hpi_config_path)
dynamic_shapes = image_shape[-1]
if hpi_config["Hpi"]["backend_config"].get("paddle_tensorrt", None):
hpi_config["Hpi"]["backend_config"]["paddle_tensorrt"][
"dynamic_shapes"]["image"] = [[1, 3, dynamic_shapes, dynamic_shapes] for i in range(3)]
if hpi_config["Hpi"]["backend_config"].get("tensorrt", None):
hpi_config["Hpi"]["backend_config"]["tensorrt"]["dynamic_shapes"][
"image"] = [[1, 3, dynamic_shapes, dynamic_shapes] for i in range(3)]
infer_cfg["Hpi"] = hpi_config["Hpi"]
infer_cfg["Global"] = {}
infer_cfg["Global"]["model_name"] = config["pdx_model_name"]
export_onnx = config.get('export_onnx', False)
export_eb = config.get('export_eb', False)

Expand Down
28 changes: 25 additions & 3 deletions ppdet/engine/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import sys
import copy
import time
import yaml
from tqdm import tqdm

import numpy as np
Expand All @@ -36,7 +37,7 @@
from ppdet.optimizer import ModelEMA

from ppdet.core.workspace import create
from ppdet.utils.checkpoint import load_weight, load_pretrain_weight
from ppdet.utils.checkpoint import load_weight, load_pretrain_weight, convert_to_dict
from ppdet.utils.visualizer import visualize_results, save_result
from ppdet.metrics import get_infer_results, KeyPointTopDownCOCOEval, KeyPointTopDownCOCOWholeBadyHandEval, KeyPointTopDownMPIIEval, Pose3DEval
from ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, RBoxMetric, JDEDetMetric, SNIPERCOCOMetric, CULaneMetric
Expand Down Expand Up @@ -75,13 +76,23 @@ def __init__(self, cfg, mode='train'):
self.custom_white_list = self.cfg.get('custom_white_list', None)
self.custom_black_list = self.cfg.get('custom_black_list', None)
self.use_master_grad = self.cfg.get('master_grad', False)
if 'slim' in cfg and cfg['slim_type'] == 'PTQ':
self.uniform_output_enabled = self.cfg.get('uniform_output_enabled', False)
if ('slim' in cfg and cfg['slim_type'] == 'PTQ') or self.uniform_output_enabled:
self.cfg['TestDataset'] = create('TestDataset')()
log_ranks = cfg.get('log_ranks', '0')
if isinstance(log_ranks, str):
self.log_ranks = [int(i) for i in log_ranks.split(',')]
elif isinstance(log_ranks, int):
self.log_ranks = [log_ranks]
train_results_path = os.path.abspath(os.path.join(self.cfg.save_dir, "train_results.json"))
if self.uniform_output_enabled:
if os.path.exists(train_results_path):
os.remove(train_results_path)
# if not os.path.exists(self.cfg.save_dir):
# os.mkdir(self.cfg.save_dir)
# with open(os.path.join(self.cfg.save_dir, "config.yaml"), "w") as f:
# config_dict = convert_to_dict(self.cfg)
# yaml.dump(config_dict, f)

# build data loader
capital_mode = self.mode.capitalize()
Expand Down Expand Up @@ -1212,8 +1223,10 @@ def _get_infer_cfg_and_input_spec(self,
"img_name": str,
})
if prune_input:
model = ExportModel(self.model)
model.eval()
static_model = paddle.jit.to_static(
self.model, input_spec=input_spec, full_graph=True)
model, input_spec=input_spec, full_graph=True)
# NOTE: dy2st do not pruned program, but jit.save will prune program
# input spec, prune input spec here and save with pruned input spec
pruned_input_spec = _prune_input_spec(
Expand Down Expand Up @@ -1490,3 +1503,12 @@ def reset_norm_param_attr(self, layer, **kwargs):
setattr(layer, name, new_sublayer)

return layer

class ExportModel(nn.Layer):
def __init__(self, model):
super().__init__()
self.base_model = model

def forward(self, x):
x = self.base_model(x)
return x
71 changes: 71 additions & 0 deletions ppdet/utils/checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from __future__ import unicode_literals

import os
import json
import numpy as np
import paddle
import paddle.nn as nn
Expand All @@ -26,6 +27,13 @@
from .logger import setup_logger
logger = setup_logger(__name__)

def convert_to_dict(obj):
if isinstance(obj, dict):
return {k: convert_to_dict(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [convert_to_dict(i) for i in obj]
else:
return obj

def is_url(path):
"""
Expand Down Expand Up @@ -375,3 +383,66 @@ def save_semi_model(teacher_model, student_model, optimizer, save_dir,
state_dict['last_iter'] = last_iter
paddle.save(state_dict, save_path + str(last_epoch) + "epoch.pdopt")
logger.info("Save checkpoint: {}".format(save_dir))

def save_model_info(model_info, save_path, prefix):
"""
save model info to the target path
"""
save_path = os.path.join(save_path, prefix)
if not os.path.exists(save_path):
os.makedirs(save_path)
with open(os.path.join(save_path, f'{prefix}.info.json'), 'w') as f:
json.dump(model_info, f)
logger.info("Already save model info in {}".format(save_path))

def update_train_results(config,
prefix,
metric_info,
done_flag=False,
k=5,
ema=False):
assert k >= 1
train_results_path = os.path.join(config["save_dir"],
"train_results.json")
save_model_tag = ["pdparams", "pdopt", "pdstates"]
save_inference_tag = [
"inference_config", "pdmodel", "pdiparams", "pdiparams.info"
]
if ema:
save_model_tag.append("pdema")
if os.path.exists(train_results_path):
with open(train_results_path, "r") as fp:
train_results = json.load(fp)
else:
train_results = {}
train_results["model_name"] = config["pdx_model_name"]
train_results["label_dict"] = ""
train_results["train_log"] = "train.log"
train_results["config"] = "config.yaml"
train_results["models"] = {}
for i in range(1, k + 1):
train_results["models"][f"last_{i}"] = {}
train_results["models"]["best"] = {}
train_results["done_flag"] = done_flag
if prefix == "best_model":
train_results["models"]["best"]["score"] = metric_info["metric"]
for tag in save_model_tag:
train_results["models"]["best"][tag] = os.path.join(
prefix, f"{prefix}.{tag}")
for tag in save_inference_tag:
train_results["models"]["best"][tag] = os.path.join(
prefix, "inference", f"inference.{tag}" if tag != "inference_config" else "inference.yml")
else:
for i in range(k - 1, 0, -1):
train_results["models"][f"last_{i + 1}"] = train_results["models"][
f"last_{i}"].copy()
train_results["models"][f"last_{1}"]["score"] = metric_info["metric"]
for tag in save_model_tag:
train_results["models"][f"last_{1}"][tag] = os.path.join(
prefix, f"{prefix}.{tag}")
for tag in save_inference_tag:
train_results["models"][f"last_{1}"][tag] = os.path.join(
prefix, "inference", f"inference.{tag}" if tag != "inference_config" else "inference.yml")

with open(train_results_path, "w") as fp:
json.dump(train_results, fp)

0 comments on commit 38fe21b

Please sign in to comment.