diff --git a/paddle/fluid/framework/ir/ipu/optimizer_extract_pass.cc b/paddle/fluid/framework/ir/ipu/optimizer_extract_pass.cc index 3d8d353cbf530..9fe50deaf2d72 100644 --- a/paddle/fluid/framework/ir/ipu/optimizer_extract_pass.cc +++ b/paddle/fluid/framework/ir/ipu/optimizer_extract_pass.cc @@ -56,7 +56,7 @@ const bool is_regularization_op(const std::string& op_namescope) { } void IpuOptimizerExtractPass::ApplyImpl(ir::Graph* graph) const { - // 这里构建的 op 符合 popart 的定义, 涉及到的一些值需要在 LowerOptimier 时获得 + // optimizer values will be extracted when lowering optimizer in ipu_backend OpDesc new_op("popart_optimizer", {}, {}, {}); new_op.SetAttr("op_role", 0); new_op.SetAttr("with_lr_sched", false); @@ -86,7 +86,7 @@ void IpuOptimizerExtractPass::ApplyImpl(ir::Graph* graph) const { bool is_regularization = is_regularization_op(op_namescope); VLOG(10) << "found optimizer releated op: " << op_type; - // initial larning_rate will be set in LowerOptimier + // initial larning_rate will be set in ipu_backend set_ops.insert(op_type); if (op_type == "sgd") { auto type = std::string{"sgd"}; diff --git a/paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.cc b/paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.cc index 975a4b62cc708..6806e44f09505 100644 --- a/paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.cc +++ b/paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.h" +#include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h" @@ -28,11 +29,8 @@ void PopartCanonicalizationPass::ApplyImpl(ir::Graph* graph) const { auto custom_ops = Get>("custom_ops"); std::vector missing_ops; - auto nodes = graph->Nodes(); - for (auto* node : nodes) { - if (!node->IsOp()) { - continue; - } + auto sorted_ops = TopologySortOperations(*graph); + for (auto* node : sorted_ops) { auto* op = node->Op(); auto op_type = op->Type(); diff --git a/paddle/fluid/platform/device/ipu/ipu_backend.cc b/paddle/fluid/platform/device/ipu/ipu_backend.cc index febd5de01c058..e0b3b08a2313d 100644 --- a/paddle/fluid/platform/device/ipu/ipu_backend.cc +++ b/paddle/fluid/platform/device/ipu/ipu_backend.cc @@ -43,17 +43,17 @@ void IpuBackend::Compile(Graph* graph, const std::vector& feed_list, const std::vector& fetch_list) { VLOG(10) << "enter IpuBackend::Compile"; - compiler_->Prepare(); - executor_->SetCompilerResources(compiler_->GetResources()); - - compiler_->InitInputs(graph, feed_list); - compiler_->LowerConstants(graph, scope_); - compiler_->LowerWeights(graph, scope_); - compiler_->LowerBody(graph); + compiler_->Prepare(graph); + compiler_->InitInputs(feed_list); + compiler_->LowerConstants(scope_); + compiler_->LowerWeights(scope_); + compiler_->LowerBody(); compiler_->InitOutputs(fetch_list); if (ipu_strategy_->is_training) { - compiler_->LowerOptimier(graph, scope_); + compiler_->LowerOptimizer(scope_); } + executor_->SetCompilerResources(compiler_->GetResources()); + is_compiled_ = true; // when call compile, means a new graph is_prepared_ = false; diff --git a/paddle/fluid/platform/device/ipu/ipu_compiler.cc b/paddle/fluid/platform/device/ipu/ipu_compiler.cc index 15cba89e9e0be..52144c227f373 100644 --- a/paddle/fluid/platform/device/ipu/ipu_compiler.cc +++ b/paddle/fluid/platform/device/ipu/ipu_compiler.cc @@ -98,6 +98,19 @@ TO GetCastSigAttrAllowNull(std::string attr, OpDesc* op_desc) { } } +GraphHelper::GraphHelper(const Graph* g) { + graph = g; + sorted_ops = framework::ir::TopologySortOperations(*g); + for (auto* node : g->Nodes()) { + nodes_id_map[node->id()] = node; + if (node->IsVar()) { + vars_name_map[node->Name()] = node; + sorted_vars_id.push_back(node->id()); + } + } + std::sort(sorted_vars_id.begin(), sorted_vars_id.end()); +} + Compiler::Compiler() { RegisterOpFunc(); } Compiler::~Compiler() { @@ -105,9 +118,10 @@ Compiler::~Compiler() { resources_.reset(); } -void Compiler::Prepare() { +void Compiler::Prepare(const Graph* graph) { builder_ = popart::Builder::create(); resources_ = std::make_unique(); + graph_helper_ = std::make_unique(graph); } void Compiler::RegisterOpFunc() { @@ -171,93 +185,24 @@ void Compiler::RegisterOpFunc() { #undef INT_VEC } -void Compiler::LowerBody(const Graph* graph) { - VLOG(10) << "enter Compiler::LowerBody"; - auto nodes = framework::ir::TopologySortOperations(*graph); - for (auto* node : nodes) { - auto* op_desc = node->Op(); - auto op_type = op_desc->Type(); - VLOG(10) << "lowering op: " << op_type; - - if (op_type == "popart_constant") { - // pass - } else if (op_type == "popart_optimizer") { - // pass - } else if (op_type == "popart_checkpointoutput") { - auto inputs = GetOpInputs(op_desc); - auto outputs = GetOpOutputs(op_desc); - auto output_ids = builder_->checkpointOutput(inputs); - InsertTensors(outputs, output_ids); - } else if (op_type == "popart_custom_op") { - auto inputs = GetOpInputs(op_desc); - auto outputs = GetOpOutputs(op_desc); - auto debug_context = BuildDebugContext(op_desc); - auto attributes = std::map{}; - for (auto& attr : op_desc->GetAttrMap()) { - CustomOpAttrVisitor visitor(&attributes, attr.first); - boost::apply_visitor(visitor, attr.second); - } - auto __op_type = - BOOST_GET_CONST(std::string, op_desc->GetAttr("__op_type")); - VLOG(10) << "Build graph from custom op: " << __op_type; - auto it = custom_ops_.find(__op_type); - auto output_ids = - builder_->customOp(it->second.popart_op, it->second.popart_op.version, - inputs, outputs.size(), attributes, debug_context); - SetIpuIndexStage(output_ids, op_desc); - InsertTensors(outputs, output_ids); - } else if (op_type == "popart_printtensor") { - auto inputs = GetOpInputs(op_desc); - auto outputs = GetOpOutputs(op_desc); - auto debug_context = BuildDebugContext(op_desc); - auto print_gradient = - BOOST_GET_CONST(int64_t, op_desc->GetAttr("print_gradient")); - auto title = BOOST_GET_CONST(std::string, op_desc->GetAttr("title")); - auto output_ids = builder_->aiGraphcoreOpset1().printtensor( - inputs, print_gradient, debug_context, title); - SetIpuIndexStage(output_ids, op_desc); - InsertTensors(outputs, output_ids); - } else { - auto itr = name_function_.find(op_type); - if (itr != name_function_.end()) { - itr->second(node->Op()); - } else { - PADDLE_THROW(platform::errors::NotFound( - "%s is not registered, please check for unsupported operators for " - "running on IPU", - op_type)); - } - } - } - VLOG(10) << "leave Compiler::LowerBody"; -} - -void Compiler::InitInputs(Graph* graph, - const std::vector& feed_list) { +void Compiler::InitInputs(const std::vector& feed_list) { for (const auto& feed_name : feed_list) { - feed_list_.push_back(feed_name); - for (const Node* n : graph->Nodes()) { - if (n->IsVar()) { - auto* var_desc = n->Var(); - if (feed_name == var_desc->Name()) { - VLOG(10) << "feed_name= " << var_desc->Name(); - auto data_type = VarType2PopartType(var_desc->GetDataType()); - popart::TensorInfo input_info{data_type, var_desc->GetShape()}; - VLOG(10) << "popart input_info = " << input_info; - popart::TensorId tensor_id = - builder_->addInputTensor(input_info, feed_name); - VLOG(10) << "popart input tensor id = " << tensor_id; - resources_->inputs.push_back(tensor_id); - resources_->tensors.emplace(var_desc->Name(), tensor_id); - } - } - } + auto* node = graph_helper_->vars_name_map[feed_name]; + auto* var_desc = node->Var(); + VLOG(10) << "feed_name= " << var_desc->Name(); + auto data_type = VarType2PopartType(var_desc->GetDataType()); + popart::TensorInfo input_info{data_type, var_desc->GetShape()}; + VLOG(10) << "popart input_info = " << input_info; + popart::TensorId tensor_id = + builder_->addInputTensor(input_info, feed_name); + VLOG(10) << "popart input tensor id = " << tensor_id; + resources_->inputs.push_back(tensor_id); + resources_->tensors.emplace(var_desc->Name(), tensor_id); } } void Compiler::InitOutputs(const std::vector& fetch_list) { for (const auto& fetch_name : fetch_list) { - fetch_list_.push_back(fetch_name); auto tensor = resources_->tensors.find(fetch_name); PADDLE_ENFORCE_NE( tensor, resources_->tensors.end(), @@ -271,14 +216,10 @@ void Compiler::InitOutputs(const std::vector& fetch_list) { } } -void Compiler::LowerConstants(const Graph* graph, const Scope* scope) { +void Compiler::LowerConstants(const Scope* scope) { auto& kid_scope = scope->NewScope(); VLOG(10) << "enter Compiler::LowerConstants"; - for (auto* node : graph->Nodes()) { - if (!node->IsOp()) { - continue; - } - + for (auto* node : graph_helper_->sorted_ops) { auto* op_desc = node->Op(); auto op_type = op_desc->Type(); if (op_type == "popart_constant") { @@ -308,17 +249,16 @@ void Compiler::LowerConstants(const Graph* graph, const Scope* scope) { VLOG(10) << "leave Compiler::LowerConstants"; } -void Compiler::LowerWeights(const Graph* graph, const Scope* scope) { +void Compiler::LowerWeights(const Scope* scope) { VLOG(10) << "enter Compiler::LowerWeights"; - PADDLE_ENFORCE_NOT_NULL(scope, - platform::errors::PreconditionNotMet( - "You should call set_scope before LowerWeights")); // at this step, the graph doesn't contains optimizer related states - for (const auto* node : graph->Nodes()) { + for (auto id : graph_helper_->sorted_vars_id) { + auto* node = graph_helper_->nodes_id_map[id]; if (node->IsVar() && !node->IsCtrlVar() && node->Var()) { if (node->Var()->Persistable() && node->inputs.empty()) { auto var_name = node->Var()->Name(); if (resources_->tensors.count(var_name) != 0) { + VLOG(10) << "found existed one, skip lowering Weight: " << var_name; continue; } VLOG(10) << "lowering weight: " << var_name; @@ -344,12 +284,68 @@ void Compiler::LowerWeights(const Graph* graph, const Scope* scope) { VLOG(10) << "leave Compiler::LowerWeights"; } -void Compiler::LowerOptimier(const Graph* graph, const Scope* scope) { - for (auto* node : graph->Nodes()) { - if (!node->IsOp()) { - continue; +void Compiler::LowerBody() { + VLOG(10) << "enter Compiler::LowerBody"; + for (auto* node : graph_helper_->sorted_ops) { + auto* op_desc = node->Op(); + auto op_type = op_desc->Type(); + VLOG(10) << "lowering op: " << op_type; + + if (op_type == "popart_constant") { + // pass + } else if (op_type == "popart_optimizer") { + // pass + } else if (op_type == "popart_checkpointoutput") { + auto inputs = GetOpInputs(op_desc); + auto outputs = GetOpOutputs(op_desc); + auto output_ids = builder_->checkpointOutput(inputs); + InsertTensors(outputs, output_ids); + } else if (op_type == "popart_custom_op") { + auto inputs = GetOpInputs(op_desc); + auto outputs = GetOpOutputs(op_desc); + auto debug_context = BuildDebugContext(op_desc); + auto attributes = std::map{}; + for (auto& attr : op_desc->GetAttrMap()) { + CustomOpAttrVisitor visitor(&attributes, attr.first); + boost::apply_visitor(visitor, attr.second); + } + auto __op_type = + BOOST_GET_CONST(std::string, op_desc->GetAttr("__op_type")); + VLOG(10) << "Build graph from custom op: " << __op_type; + auto it = custom_ops_.find(__op_type); + auto output_ids = + builder_->customOp(it->second.popart_op, it->second.popart_op.version, + inputs, outputs.size(), attributes, debug_context); + SetIpuIndexStage(output_ids, op_desc); + InsertTensors(outputs, output_ids); + } else if (op_type == "popart_printtensor") { + auto inputs = GetOpInputs(op_desc); + auto outputs = GetOpOutputs(op_desc); + auto debug_context = BuildDebugContext(op_desc); + auto print_gradient = + BOOST_GET_CONST(int64_t, op_desc->GetAttr("print_gradient")); + auto title = BOOST_GET_CONST(std::string, op_desc->GetAttr("title")); + auto output_ids = builder_->aiGraphcoreOpset1().printtensor( + inputs, print_gradient, debug_context, title); + SetIpuIndexStage(output_ids, op_desc); + InsertTensors(outputs, output_ids); + } else { + auto itr = name_function_.find(op_type); + if (itr != name_function_.end()) { + itr->second(node->Op()); + } else { + PADDLE_THROW(platform::errors::NotFound( + "%s is not registered, please check for unsupported operators for " + "running on IPU", + op_type)); + } } + } + VLOG(10) << "leave Compiler::LowerBody"; +} +void Compiler::LowerOptimizer(const Scope* scope) { + for (auto* node : graph_helper_->sorted_ops) { auto* op_desc = node->Op(); auto op_type = op_desc->Type(); if (op_type == "popart_optimizer") { diff --git a/paddle/fluid/platform/device/ipu/ipu_compiler.h b/paddle/fluid/platform/device/ipu/ipu_compiler.h index 5576266b1a771..5d1e8c2727d8f 100644 --- a/paddle/fluid/platform/device/ipu/ipu_compiler.h +++ b/paddle/fluid/platform/device/ipu/ipu_compiler.h @@ -68,34 +68,29 @@ struct CompilerResources { std::unique_ptr optimizer; }; +// helper for lowering graph +struct GraphHelper { + explicit GraphHelper(const Graph *); + + const Graph *graph; + std::map vars_name_map; + std::map nodes_id_map; + std::vector sorted_ops; + std::vector sorted_vars_id; +}; + class Compiler { public: Compiler(); ~Compiler(); - void RegisterOpFunc(); - void Prepare(); - void LowerBody(const Graph *graph); - void InitInputs(Graph *graph, const std::vector &feed_list); + void Prepare(const Graph *graph); + void InitInputs(const std::vector &feed_list); void InitOutputs(const std::vector &fetch_list); - void LowerConstants(const Graph *graph, const Scope *scope); - void LowerWeights(const Graph *graph, const Scope *scope); - void LowerOptimier(const Graph *graph, const Scope *scope); - - void InsertTensors(const std::vector &output_names, - const std::vector &tensor_ids); - void InsertTensors(const std::vector &output_names, - const std::string &tensor_id); - void SetIpuIndexStage(const std::vector &tensor_ids, - const OpDesc *op_desc); - void SetIpuIndexStage(const std::string &tensor_id, const OpDesc *op_desc); - void SetAMPAttributes(const std::vector &tensor_ids, - const OpDesc *op_desc); - void SetAMPAttributes(const std::string &tensor_id, const OpDesc *op_desc); - void SetSerializeAttributes(const std::vector &tensor_ids, - const OpDesc *op_desc); - void SetSerializeAttributes(const std::string &tensor_id, - const OpDesc *op_desc); + void LowerConstants(const Scope *scope); + void LowerWeights(const Scope *scope); + void LowerBody(); + void LowerOptimizer(const Scope *scope); void SetIpuStrategy(const IpuStrategy &strategy) { ipu_strategy_ = &strategy; @@ -112,21 +107,34 @@ class Compiler { void SaveModelProtoNoCheck(const std::string &path); private: + void RegisterOpFunc(); std::vector GetOpInputs(const OpDesc *op); const std::vector &GetOpOutputs(const OpDesc *op); popart::DebugContext BuildDebugContext(const OpDesc *op); + void InsertTensors(const std::vector &output_names, + const std::vector &tensor_ids); + void InsertTensors(const std::vector &output_names, + const std::string &tensor_id); + void SetIpuIndexStage(const std::vector &tensor_ids, + const OpDesc *op_desc); + void SetIpuIndexStage(const std::string &tensor_id, const OpDesc *op_desc); + void SetAMPAttributes(const std::vector &tensor_ids, + const OpDesc *op_desc); + void SetAMPAttributes(const std::string &tensor_id, const OpDesc *op_desc); + void SetSerializeAttributes(const std::vector &tensor_ids, + const OpDesc *op_desc); + void SetSerializeAttributes(const std::string &tensor_id, + const OpDesc *op_desc); + private: std::unique_ptr builder_; std::unique_ptr resources_; + std::unique_ptr graph_helper_; using OpFunc = std::function; std::unordered_map name_function_; - // feed_list_ & fetch_list save paddle tensor id - std::vector feed_list_; - std::vector fetch_list_; - const IpuStrategy *ipu_strategy_ = nullptr; std::map custom_ops_; };