Skip to content

Commit

Permalink
support export after save model
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangyubo0722 committed Sep 19, 2024
1 parent 1e20933 commit f0a3ab9
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 8 deletions.
24 changes: 18 additions & 6 deletions ppdet/engine/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import paddle
import paddle.distributed as dist

from ppdet.utils.checkpoint import save_model, save_semi_model
from ppdet.utils.checkpoint import save_model, save_semi_model, save_model_info, update_train_results
from ppdet.metrics import get_infer_results

from ppdet.utils.logger import setup_logger
Expand Down Expand Up @@ -178,11 +178,12 @@ def __init__(self, model):
super(Checkpointer, self).__init__(model)
self.best_ap = -1000.
self.save_dir = self.model.cfg.save_dir
self.uniform_output_enabled = self.model.cfg.get("uniform_output_enabled", False)
if hasattr(self.model.model, 'student_model'):
self.weight = self.model.model.student_model
else:
self.weight = self.model.model

def on_epoch_end(self, status):
# Checkpointer only performed during training
mode = status['mode']
Expand Down Expand Up @@ -226,8 +227,11 @@ def on_epoch_end(self, status):
'metric': abs(epoch_ap),
'epoch': epoch_id + 1
}
save_path = os.path.join(self.save_dir, f"{save_name}.pdstates")
save_path = os.path.join(os.path.join(self.save_dir, save_name) if self.uniform_output_enabled else self.save_dir, f"{save_name}.pdstates")
paddle.save(epoch_metric, save_path)
if self.uniform_output_enabled:
save_model_info(epoch_metric, self.save_dir, save_name)
update_train_results(self.model.cfg, save_name, epoch_metric, ema=self.model.use_ema)
if 'save_best_model' in status and status['save_best_model']:
if epoch_ap >= self.best_ap:
self.best_ap = epoch_ap
Expand All @@ -237,8 +241,11 @@ def on_epoch_end(self, status):
'metric': abs(self.best_ap),
'epoch': epoch_id + 1
}
save_path = os.path.join(self.save_dir, "best_model.pdstates")
save_path = os.path.join(os.path.join(self.save_dir, save_name) if self.uniform_output_enabled else self.save_dir, "best_model.pdstates")
paddle.save(best_metric, save_path)
if self.uniform_output_enabled:
save_model_info(best_metric, self.save_dir, save_name)
update_train_results(self.model.cfg, save_name, best_metric, ema=self.model.use_ema)
logger.info("Best test {} {} is {:0.3f}.".format(
key, eval_func, abs(self.best_ap)))
if weight:
Expand All @@ -250,10 +257,13 @@ def on_epoch_end(self, status):
save_model(
status['weight'],
self.model.optimizer,
self.save_dir,
os.path.join(self.save_dir, save_name) if self.uniform_output_enabled else self.save_dir,
save_name,
epoch_id + 1,
ema_model=weight)
if self.uniform_output_enabled:
self.model.export(output_dir=os.path.join(self.save_dir, save_name, "inference"), for_fd=True)
# self.model.export(output_dir=os.path.join(self.save_dir, save_name, "inference_ema"), for_fd=True)
else:
# save model(student model) and ema_model(teacher model)
# in DenseTeacher SSOD, the teacher model will be higher,
Expand All @@ -270,8 +280,10 @@ def on_epoch_end(self, status):
del teacher_model
del student_model
else:
save_model(weight, self.model.optimizer, self.save_dir,
save_model(weight, self.model.optimizer, os.path.join(self.save_dir, save_name) if self.uniform_output_enabled else self.save_dir,
save_name, epoch_id + 1)
if self.uniform_output_enabled:
self.model.export(output_dir=os.path.join(self.save_dir, save_name, "inference"), for_fd=True)


class WiferFaceEval(Callback):
Expand Down
15 changes: 13 additions & 2 deletions ppdet/engine/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def __init__(self, cfg, mode='train'):
self.custom_white_list = self.cfg.get('custom_white_list', None)
self.custom_black_list = self.cfg.get('custom_black_list', None)
self.use_master_grad = self.cfg.get('master_grad', False)
if 'slim' in cfg and cfg['slim_type'] == 'PTQ':
if ('slim' in cfg and cfg['slim_type'] == 'PTQ') or 'uniform_output_enabled' in cfg:
self.cfg['TestDataset'] = create('TestDataset')()
log_ranks = cfg.get('log_ranks', '0')
if isinstance(log_ranks, str):
Expand Down Expand Up @@ -1212,8 +1212,10 @@ def _get_infer_cfg_and_input_spec(self,
"img_name": str,
})
if prune_input:
model = ExportModel(self.model)
model.eval()
static_model = paddle.jit.to_static(
self.model, input_spec=input_spec, full_graph=True)
model, input_spec=input_spec, full_graph=True)
# NOTE: dy2st do not pruned program, but jit.save will prune program
# input spec, prune input spec here and save with pruned input spec
pruned_input_spec = _prune_input_spec(
Expand Down Expand Up @@ -1490,3 +1492,12 @@ def reset_norm_param_attr(self, layer, **kwargs):
setattr(layer, name, new_sublayer)

return layer

class ExportModel(nn.Layer):
def __init__(self, model):
super().__init__()
self.base_model = model

def forward(self, x):
x = self.base_model(x)
return x
67 changes: 67 additions & 0 deletions ppdet/utils/checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from __future__ import unicode_literals

import os
import json
import numpy as np
import paddle
import paddle.nn as nn
Expand Down Expand Up @@ -375,3 +376,69 @@ def save_semi_model(teacher_model, student_model, optimizer, save_dir,
state_dict['last_iter'] = last_iter
paddle.save(state_dict, save_path + str(last_epoch) + "epoch.pdopt")
logger.info("Save checkpoint: {}".format(save_dir))

def save_model_info(model_info, save_path, prefix):
"""
save model info to the target path
"""
save_path = os.path.join(save_path, prefix)
if not os.path.exists(save_path):
os.makedirs(save_path)
with open(os.path.join(save_path, f'{prefix}.info.json'), 'w') as f:
json.dump(model_info, f)
logger.info("Already save model info in {}".format(save_path))

def update_train_results(config,
prefix,
metric_info,
done_flag=False,
k=5,
ema=False):
assert k >= 1
train_results_path = os.path.join(config["save_dir"],
"train_results.json")
save_model_tag = ["pdparams", "pdopt", "pdstates"]
save_inference_tag = [
"inference_config", "pdmodel", "pdiparams", "pdiparams.info"
]
if ema:
save_model_tag.append("pdema")
if os.path.exists(train_results_path):
with open(train_results_path, "r") as fp:
train_results = json.load(fp)
else:
train_results = {}
train_results["model_name"] = ""
train_results["label_dict"] = ""
train_results["train_log"] = "train.log"
train_results["config"] = "config.yaml"
train_results["models"] = {}
for i in range(1, k + 1):
train_results["models"][f"last_{i}"] = {}
train_results["models"]["best"] = {}
train_results["done_flag"] = done_flag
if prefix == "best_model":
train_results["models"]["best"]["score"] = metric_info["metric"]
for tag in save_model_tag:
train_results["models"]["best"][tag] = os.path.join(
prefix, f"{prefix}.{tag}")
for tag in save_inference_tag:
train_results["models"]["best"][tag] = os.path.join(
prefix, "inference", f"inference.{tag}")
else:
for i in range(k - 1, 0, -1):
train_results["models"][f"last_{i + 1}"] = train_results["models"][
f"last_{i}"].copy()
train_results["models"][f"last_{1}"]["score"] = metric_info["metric"]
for tag in save_model_tag:
train_results["models"][f"last_{1}"][tag] = os.path.join(
prefix, f"{prefix}.{tag}")
for tag in save_inference_tag:
train_results["models"][f"last_{1}"][tag] = os.path.join(
prefix, "inference", f"inference.{tag}")
if ema:
train_results["models"][f"last_{1}"][tag] = os.path.join(
prefix, "inference_ema", f"inference.{tag}")

with open(train_results_path, "w") as fp:
json.dump(train_results, fp)

0 comments on commit f0a3ab9

Please sign in to comment.