diff --git a/paddle/fluid/framework/ir/ipu/optimizer_extract_pass.cc b/paddle/fluid/framework/ir/ipu/optimizer_extract_pass.cc
index 3d8d353cbf530..9fe50deaf2d72 100644
--- a/paddle/fluid/framework/ir/ipu/optimizer_extract_pass.cc
+++ b/paddle/fluid/framework/ir/ipu/optimizer_extract_pass.cc
@@ -56,7 +56,7 @@ const bool is_regularization_op(const std::string& op_namescope) {
 }
 
 void IpuOptimizerExtractPass::ApplyImpl(ir::Graph* graph) const {
-  // 这里构建的 op 符合 popart 的定义, 涉及到的一些值需要在 LowerOptimier 时获得
+  // optimizer values will be extracted when lowering optimizer in ipu_backend
   OpDesc new_op("popart_optimizer", {}, {}, {});
   new_op.SetAttr("op_role", 0);
   new_op.SetAttr("with_lr_sched", false);
@@ -86,7 +86,7 @@ void IpuOptimizerExtractPass::ApplyImpl(ir::Graph* graph) const {
       bool is_regularization = is_regularization_op(op_namescope);
 
       VLOG(10) << "found optimizer releated op: " << op_type;
-      // initial larning_rate will be set in LowerOptimier
+      // initial larning_rate will be set in ipu_backend
       set_ops.insert(op_type);
       if (op_type == "sgd") {
         auto type = std::string{"sgd"};
diff --git a/paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.cc b/paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.cc
index 975a4b62cc708..6806e44f09505 100644
--- a/paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.cc
+++ b/paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.cc
@@ -14,6 +14,7 @@
 
 #include "paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.h"
 
+#include "paddle/fluid/framework/ir/graph_helper.h"
 #include "paddle/fluid/framework/ir/pass_tester_helper.h"
 #include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h"
 
@@ -28,11 +29,8 @@ void PopartCanonicalizationPass::ApplyImpl(ir::Graph* graph) const {
 
   auto custom_ops = Get<std::unordered_set<std::string>>("custom_ops");
   std::vector<std::string> missing_ops;
-  auto nodes = graph->Nodes();
-  for (auto* node : nodes) {
-    if (!node->IsOp()) {
-      continue;
-    }
+  auto sorted_ops = TopologySortOperations(*graph);
+  for (auto* node : sorted_ops) {
     auto* op = node->Op();
     auto op_type = op->Type();
 
diff --git a/paddle/fluid/platform/device/ipu/ipu_backend.cc b/paddle/fluid/platform/device/ipu/ipu_backend.cc
index febd5de01c058..e0b3b08a2313d 100644
--- a/paddle/fluid/platform/device/ipu/ipu_backend.cc
+++ b/paddle/fluid/platform/device/ipu/ipu_backend.cc
@@ -43,17 +43,17 @@ void IpuBackend::Compile(Graph* graph,
                          const std::vector<std::string>& feed_list,
                          const std::vector<std::string>& fetch_list) {
   VLOG(10) << "enter IpuBackend::Compile";
-  compiler_->Prepare();
-  executor_->SetCompilerResources(compiler_->GetResources());
-
-  compiler_->InitInputs(graph, feed_list);
-  compiler_->LowerConstants(graph, scope_);
-  compiler_->LowerWeights(graph, scope_);
-  compiler_->LowerBody(graph);
+  compiler_->Prepare(graph);
+  compiler_->InitInputs(feed_list);
+  compiler_->LowerConstants(scope_);
+  compiler_->LowerWeights(scope_);
+  compiler_->LowerBody();
   compiler_->InitOutputs(fetch_list);
   if (ipu_strategy_->is_training) {
-    compiler_->LowerOptimier(graph, scope_);
+    compiler_->LowerOptimizer(scope_);
   }
+  executor_->SetCompilerResources(compiler_->GetResources());
+
   is_compiled_ = true;
   // when call compile, means a new graph
   is_prepared_ = false;
diff --git a/paddle/fluid/platform/device/ipu/ipu_compiler.cc b/paddle/fluid/platform/device/ipu/ipu_compiler.cc
index 15cba89e9e0be..52144c227f373 100644
--- a/paddle/fluid/platform/device/ipu/ipu_compiler.cc
+++ b/paddle/fluid/platform/device/ipu/ipu_compiler.cc
@@ -98,6 +98,19 @@ TO GetCastSigAttrAllowNull(std::string attr, OpDesc* op_desc) {
   }
 }
 
+GraphHelper::GraphHelper(const Graph* g) {
+  graph = g;
+  sorted_ops = framework::ir::TopologySortOperations(*g);
+  for (auto* node : g->Nodes()) {
+    nodes_id_map[node->id()] = node;
+    if (node->IsVar()) {
+      vars_name_map[node->Name()] = node;
+      sorted_vars_id.push_back(node->id());
+    }
+  }
+  std::sort(sorted_vars_id.begin(), sorted_vars_id.end());
+}
+
 Compiler::Compiler() { RegisterOpFunc(); }
 
 Compiler::~Compiler() {
@@ -105,9 +118,10 @@ Compiler::~Compiler() {
   resources_.reset();
 }
 
-void Compiler::Prepare() {
+void Compiler::Prepare(const Graph* graph) {
   builder_ = popart::Builder::create();
   resources_ = std::make_unique<CompilerResources>();
+  graph_helper_ = std::make_unique<GraphHelper>(graph);
 }
 
 void Compiler::RegisterOpFunc() {
@@ -171,93 +185,24 @@ void Compiler::RegisterOpFunc() {
 #undef INT_VEC
 }
 
-void Compiler::LowerBody(const Graph* graph) {
-  VLOG(10) << "enter Compiler::LowerBody";
-  auto nodes = framework::ir::TopologySortOperations(*graph);
-  for (auto* node : nodes) {
-    auto* op_desc = node->Op();
-    auto op_type = op_desc->Type();
-    VLOG(10) << "lowering op: " << op_type;
-
-    if (op_type == "popart_constant") {
-      // pass
-    } else if (op_type == "popart_optimizer") {
-      // pass
-    } else if (op_type == "popart_checkpointoutput") {
-      auto inputs = GetOpInputs(op_desc);
-      auto outputs = GetOpOutputs(op_desc);
-      auto output_ids = builder_->checkpointOutput(inputs);
-      InsertTensors(outputs, output_ids);
-    } else if (op_type == "popart_custom_op") {
-      auto inputs = GetOpInputs(op_desc);
-      auto outputs = GetOpOutputs(op_desc);
-      auto debug_context = BuildDebugContext(op_desc);
-      auto attributes = std::map<std::string, popart::any>{};
-      for (auto& attr : op_desc->GetAttrMap()) {
-        CustomOpAttrVisitor visitor(&attributes, attr.first);
-        boost::apply_visitor(visitor, attr.second);
-      }
-      auto __op_type =
-          BOOST_GET_CONST(std::string, op_desc->GetAttr("__op_type"));
-      VLOG(10) << "Build graph from custom op: " << __op_type;
-      auto it = custom_ops_.find(__op_type);
-      auto output_ids =
-          builder_->customOp(it->second.popart_op, it->second.popart_op.version,
-                             inputs, outputs.size(), attributes, debug_context);
-      SetIpuIndexStage(output_ids, op_desc);
-      InsertTensors(outputs, output_ids);
-    } else if (op_type == "popart_printtensor") {
-      auto inputs = GetOpInputs(op_desc);
-      auto outputs = GetOpOutputs(op_desc);
-      auto debug_context = BuildDebugContext(op_desc);
-      auto print_gradient =
-          BOOST_GET_CONST(int64_t, op_desc->GetAttr("print_gradient"));
-      auto title = BOOST_GET_CONST(std::string, op_desc->GetAttr("title"));
-      auto output_ids = builder_->aiGraphcoreOpset1().printtensor(
-          inputs, print_gradient, debug_context, title);
-      SetIpuIndexStage(output_ids, op_desc);
-      InsertTensors(outputs, output_ids);
-    } else {
-      auto itr = name_function_.find(op_type);
-      if (itr != name_function_.end()) {
-        itr->second(node->Op());
-      } else {
-        PADDLE_THROW(platform::errors::NotFound(
-            "%s is not registered, please check for unsupported operators for "
-            "running on IPU",
-            op_type));
-      }
-    }
-  }
-  VLOG(10) << "leave Compiler::LowerBody";
-}
-
-void Compiler::InitInputs(Graph* graph,
-                          const std::vector<std::string>& feed_list) {
+void Compiler::InitInputs(const std::vector<std::string>& feed_list) {
   for (const auto& feed_name : feed_list) {
-    feed_list_.push_back(feed_name);
-    for (const Node* n : graph->Nodes()) {
-      if (n->IsVar()) {
-        auto* var_desc = n->Var();
-        if (feed_name == var_desc->Name()) {
-          VLOG(10) << "feed_name= " << var_desc->Name();
-          auto data_type = VarType2PopartType(var_desc->GetDataType());
-          popart::TensorInfo input_info{data_type, var_desc->GetShape()};
-          VLOG(10) << "popart input_info = " << input_info;
-          popart::TensorId tensor_id =
-              builder_->addInputTensor(input_info, feed_name);
-          VLOG(10) << "popart input tensor id = " << tensor_id;
-          resources_->inputs.push_back(tensor_id);
-          resources_->tensors.emplace(var_desc->Name(), tensor_id);
-        }
-      }
-    }
+    auto* node = graph_helper_->vars_name_map[feed_name];
+    auto* var_desc = node->Var();
+    VLOG(10) << "feed_name= " << var_desc->Name();
+    auto data_type = VarType2PopartType(var_desc->GetDataType());
+    popart::TensorInfo input_info{data_type, var_desc->GetShape()};
+    VLOG(10) << "popart input_info = " << input_info;
+    popart::TensorId tensor_id =
+        builder_->addInputTensor(input_info, feed_name);
+    VLOG(10) << "popart input tensor id = " << tensor_id;
+    resources_->inputs.push_back(tensor_id);
+    resources_->tensors.emplace(var_desc->Name(), tensor_id);
   }
 }
 
 void Compiler::InitOutputs(const std::vector<std::string>& fetch_list) {
   for (const auto& fetch_name : fetch_list) {
-    fetch_list_.push_back(fetch_name);
     auto tensor = resources_->tensors.find(fetch_name);
     PADDLE_ENFORCE_NE(
         tensor, resources_->tensors.end(),
@@ -271,14 +216,10 @@ void Compiler::InitOutputs(const std::vector<std::string>& fetch_list) {
   }
 }
 
-void Compiler::LowerConstants(const Graph* graph, const Scope* scope) {
+void Compiler::LowerConstants(const Scope* scope) {
   auto& kid_scope = scope->NewScope();
   VLOG(10) << "enter Compiler::LowerConstants";
-  for (auto* node : graph->Nodes()) {
-    if (!node->IsOp()) {
-      continue;
-    }
-
+  for (auto* node : graph_helper_->sorted_ops) {
     auto* op_desc = node->Op();
     auto op_type = op_desc->Type();
     if (op_type == "popart_constant") {
@@ -308,17 +249,16 @@ void Compiler::LowerConstants(const Graph* graph, const Scope* scope) {
   VLOG(10) << "leave Compiler::LowerConstants";
 }
 
-void Compiler::LowerWeights(const Graph* graph, const Scope* scope) {
+void Compiler::LowerWeights(const Scope* scope) {
   VLOG(10) << "enter Compiler::LowerWeights";
-  PADDLE_ENFORCE_NOT_NULL(scope,
-                          platform::errors::PreconditionNotMet(
-                              "You should call set_scope before LowerWeights"));
   // at this step, the graph doesn't contains optimizer related states
-  for (const auto* node : graph->Nodes()) {
+  for (auto id : graph_helper_->sorted_vars_id) {
+    auto* node = graph_helper_->nodes_id_map[id];
     if (node->IsVar() && !node->IsCtrlVar() && node->Var()) {
       if (node->Var()->Persistable() && node->inputs.empty()) {
         auto var_name = node->Var()->Name();
         if (resources_->tensors.count(var_name) != 0) {
+          VLOG(10) << "found existed one, skip lowering Weight: " << var_name;
           continue;
         }
         VLOG(10) << "lowering weight: " << var_name;
@@ -344,12 +284,68 @@ void Compiler::LowerWeights(const Graph* graph, const Scope* scope) {
   VLOG(10) << "leave Compiler::LowerWeights";
 }
 
-void Compiler::LowerOptimier(const Graph* graph, const Scope* scope) {
-  for (auto* node : graph->Nodes()) {
-    if (!node->IsOp()) {
-      continue;
+void Compiler::LowerBody() {
+  VLOG(10) << "enter Compiler::LowerBody";
+  for (auto* node : graph_helper_->sorted_ops) {
+    auto* op_desc = node->Op();
+    auto op_type = op_desc->Type();
+    VLOG(10) << "lowering op: " << op_type;
+
+    if (op_type == "popart_constant") {
+      // pass
+    } else if (op_type == "popart_optimizer") {
+      // pass
+    } else if (op_type == "popart_checkpointoutput") {
+      auto inputs = GetOpInputs(op_desc);
+      auto outputs = GetOpOutputs(op_desc);
+      auto output_ids = builder_->checkpointOutput(inputs);
+      InsertTensors(outputs, output_ids);
+    } else if (op_type == "popart_custom_op") {
+      auto inputs = GetOpInputs(op_desc);
+      auto outputs = GetOpOutputs(op_desc);
+      auto debug_context = BuildDebugContext(op_desc);
+      auto attributes = std::map<std::string, popart::any>{};
+      for (auto& attr : op_desc->GetAttrMap()) {
+        CustomOpAttrVisitor visitor(&attributes, attr.first);
+        boost::apply_visitor(visitor, attr.second);
+      }
+      auto __op_type =
+          BOOST_GET_CONST(std::string, op_desc->GetAttr("__op_type"));
+      VLOG(10) << "Build graph from custom op: " << __op_type;
+      auto it = custom_ops_.find(__op_type);
+      auto output_ids =
+          builder_->customOp(it->second.popart_op, it->second.popart_op.version,
+                             inputs, outputs.size(), attributes, debug_context);
+      SetIpuIndexStage(output_ids, op_desc);
+      InsertTensors(outputs, output_ids);
+    } else if (op_type == "popart_printtensor") {
+      auto inputs = GetOpInputs(op_desc);
+      auto outputs = GetOpOutputs(op_desc);
+      auto debug_context = BuildDebugContext(op_desc);
+      auto print_gradient =
+          BOOST_GET_CONST(int64_t, op_desc->GetAttr("print_gradient"));
+      auto title = BOOST_GET_CONST(std::string, op_desc->GetAttr("title"));
+      auto output_ids = builder_->aiGraphcoreOpset1().printtensor(
+          inputs, print_gradient, debug_context, title);
+      SetIpuIndexStage(output_ids, op_desc);
+      InsertTensors(outputs, output_ids);
+    } else {
+      auto itr = name_function_.find(op_type);
+      if (itr != name_function_.end()) {
+        itr->second(node->Op());
+      } else {
+        PADDLE_THROW(platform::errors::NotFound(
+            "%s is not registered, please check for unsupported operators for "
+            "running on IPU",
+            op_type));
+      }
     }
+  }
+  VLOG(10) << "leave Compiler::LowerBody";
+}
 
+void Compiler::LowerOptimizer(const Scope* scope) {
+  for (auto* node : graph_helper_->sorted_ops) {
     auto* op_desc = node->Op();
     auto op_type = op_desc->Type();
     if (op_type == "popart_optimizer") {
diff --git a/paddle/fluid/platform/device/ipu/ipu_compiler.h b/paddle/fluid/platform/device/ipu/ipu_compiler.h
index 5576266b1a771..5d1e8c2727d8f 100644
--- a/paddle/fluid/platform/device/ipu/ipu_compiler.h
+++ b/paddle/fluid/platform/device/ipu/ipu_compiler.h
@@ -68,34 +68,29 @@ struct CompilerResources {
   std::unique_ptr<popart::Optimizer> optimizer;
 };
 
+// helper for lowering graph
+struct GraphHelper {
+  explicit GraphHelper(const Graph *);
+
+  const Graph *graph;
+  std::map<std::string, Node *> vars_name_map;
+  std::map<int, Node *> nodes_id_map;
+  std::vector<Node *> sorted_ops;
+  std::vector<int> sorted_vars_id;
+};
+
 class Compiler {
  public:
   Compiler();
   ~Compiler();
 
-  void RegisterOpFunc();
-  void Prepare();
-  void LowerBody(const Graph *graph);
-  void InitInputs(Graph *graph, const std::vector<std::string> &feed_list);
+  void Prepare(const Graph *graph);
+  void InitInputs(const std::vector<std::string> &feed_list);
   void InitOutputs(const std::vector<std::string> &fetch_list);
-  void LowerConstants(const Graph *graph, const Scope *scope);
-  void LowerWeights(const Graph *graph, const Scope *scope);
-  void LowerOptimier(const Graph *graph, const Scope *scope);
-
-  void InsertTensors(const std::vector<std::string> &output_names,
-                     const std::vector<std::string> &tensor_ids);
-  void InsertTensors(const std::vector<std::string> &output_names,
-                     const std::string &tensor_id);
-  void SetIpuIndexStage(const std::vector<std::string> &tensor_ids,
-                        const OpDesc *op_desc);
-  void SetIpuIndexStage(const std::string &tensor_id, const OpDesc *op_desc);
-  void SetAMPAttributes(const std::vector<std::string> &tensor_ids,
-                        const OpDesc *op_desc);
-  void SetAMPAttributes(const std::string &tensor_id, const OpDesc *op_desc);
-  void SetSerializeAttributes(const std::vector<std::string> &tensor_ids,
-                              const OpDesc *op_desc);
-  void SetSerializeAttributes(const std::string &tensor_id,
-                              const OpDesc *op_desc);
+  void LowerConstants(const Scope *scope);
+  void LowerWeights(const Scope *scope);
+  void LowerBody();
+  void LowerOptimizer(const Scope *scope);
 
   void SetIpuStrategy(const IpuStrategy &strategy) {
     ipu_strategy_ = &strategy;
@@ -112,21 +107,34 @@ class Compiler {
   void SaveModelProtoNoCheck(const std::string &path);
 
  private:
+  void RegisterOpFunc();
   std::vector<std::string> GetOpInputs(const OpDesc *op);
   const std::vector<std::string> &GetOpOutputs(const OpDesc *op);
   popart::DebugContext BuildDebugContext(const OpDesc *op);
 
+  void InsertTensors(const std::vector<std::string> &output_names,
+                     const std::vector<std::string> &tensor_ids);
+  void InsertTensors(const std::vector<std::string> &output_names,
+                     const std::string &tensor_id);
+  void SetIpuIndexStage(const std::vector<std::string> &tensor_ids,
+                        const OpDesc *op_desc);
+  void SetIpuIndexStage(const std::string &tensor_id, const OpDesc *op_desc);
+  void SetAMPAttributes(const std::vector<std::string> &tensor_ids,
+                        const OpDesc *op_desc);
+  void SetAMPAttributes(const std::string &tensor_id, const OpDesc *op_desc);
+  void SetSerializeAttributes(const std::vector<std::string> &tensor_ids,
+                              const OpDesc *op_desc);
+  void SetSerializeAttributes(const std::string &tensor_id,
+                              const OpDesc *op_desc);
+
  private:
   std::unique_ptr<popart::Builder> builder_;
   std::unique_ptr<CompilerResources> resources_;
+  std::unique_ptr<GraphHelper> graph_helper_;
 
   using OpFunc = std::function<void(OpDesc *op_desc)>;
   std::unordered_map<std::string, OpFunc> name_function_;
 
-  // feed_list_ & fetch_list save paddle tensor id
-  std::vector<std::string> feed_list_;
-  std::vector<std::string> fetch_list_;
-
   const IpuStrategy *ipu_strategy_ = nullptr;
   std::map<std::string, IpuCustomOpIdentifier> custom_ops_;
 };