beback4u
diff --git a/‎torch/csrc/jit/passes/loop_unrolling.cpp
+20-12 b/‎torch/csrc/jit/passes/loop_unrolling.cpp
+20-12
diff --git a/‎torch/csrc/jit/passes/loop_unrolling.h
+5-3 b/‎torch/csrc/jit/passes/loop_unrolling.h
+5-3
diff --git a/‎torch/csrc/jit/passes/peephole.cpp
+47-13 b/‎torch/csrc/jit/passes/peephole.cpp
+47-13
@@ -162,11 +162,11 @@ void replaceLoopCounter(Node* loop) {
   body->insertOutput(1, result);
 }
 
-void unroll(Node* loop) {
+bool unroll(Node* loop) {
   Graph* graph = loop->owningGraph();
   Block* body = loop->blocks().at(0);
   if (!isSmallBlock(body))
-    return;
+    return false;
 
   // We will be using a "mutable" counter outside of the loop instead of the
   // default one, because this will allow us to share it between the unrolled
@@ -184,7 +184,7 @@ void unroll(Node* loop) {
     repeatBody(body, *const_len, dest);
     loop->eraseBlock(0);
     inlineBody(loop);
-    return;
+    return true;
   }
 
   WithInsertPoint insert_point_guard{loop};
@@ -212,21 +212,25 @@ void unroll(Node* loop) {
           aten::sub,
           {iter_count,
            graph->insert(aten::mul, {unrolled_iter_count, kUnrollFactor})}));
+
+  return true;
 }
 
-void UnrollLoops(Block* block) {
+bool UnrollLoops(Block* block) {
+  bool changed = false;
   for (auto it = block->nodes().begin(); it != block->nodes().end();) {
     // XXX: unroll might destroy the current node, so we need to pre-increment
     // the iterator
     Node* node = *it;
     ++it;
     for (Block* subblock : node->blocks()) {
-      UnrollLoops(subblock);
+      changed |= UnrollLoops(subblock);
     }
     if (isForLoop(node)) {
-      unroll(node);
+      changed |= unroll(node);
     }
   }
+  return changed;
 }
 
 } // anonymous namespace
@@ -244,11 +248,12 @@ static void addCondAsOutput(Node* loop) {
   cond_output->copyMetadata(loop_view.nextCond());
 }
 
-void LoopsPeeler::run(const std::shared_ptr<Graph>& graph) {
+bool LoopsPeeler::run(const std::shared_ptr<Graph>& graph) {
   GRAPH_DUMP("Before LoopsPeeler", graph);
   collectLoops(graph->block());
   peelLoops();
   GRAPH_DUMP("After LoopsPeeler", graph);
+  return true;
 }
 
 void LoopsPeeler::collectLoop(Node* n) {
@@ -288,7 +293,7 @@ void LoopsPeeler::peelLoops() {
   }
 }
 
-void PeelProfilingLoops(const std::shared_ptr<Graph>& graph) {
+bool PeelProfilingLoops(const std::shared_ptr<Graph>& graph) {
   auto peel_predicate = [](Node* n) {
     for (auto i : n->inputs()) {
       if (i->type()->isSubtypeOf(TensorType::get())) {
@@ -300,7 +305,7 @@ void PeelProfilingLoops(const std::shared_ptr<Graph>& graph) {
   };
 
   LoopsPeeler lp(peel_predicate);
-  lp.run(graph);
+  return lp.run(graph);
 }
 
 Node* PeelLoop(Node* n, size_t times) {
@@ -360,9 +365,12 @@ Node* PeelLoop(Node* n, size_t times) {
   return peeled_copy;
 }
 
-void UnrollLoops(std::shared_ptr<Graph>& graph) {
-  UnrollLoops(graph->block());
-  EliminateDeadCode(graph);
+bool UnrollLoops(std::shared_ptr<Graph>& graph) {
+  bool changed = UnrollLoops(graph->block());
+  if (changed) {
+    EliminateDeadCode(graph);
+  }
+  return changed;
 }
 
 } // namespace jit
 
@@ -5,17 +5,19 @@
 namespace torch {
 namespace jit {
 
-TORCH_API void UnrollLoops(std::shared_ptr<Graph>& graph);
+// return true if graph is modified
+TORCH_API bool UnrollLoops(std::shared_ptr<Graph>& graph);
 
 TORCH_API Node* PeelLoop(Node* n, size_t times);
 
-TORCH_API void PeelProfilingLoops(const std::shared_ptr<Graph>& graph);
+// return true if graph is modified
+TORCH_API bool PeelProfilingLoops(const std::shared_ptr<Graph>& graph);
 
 struct TORCH_API LoopsPeeler {
   LoopsPeeler(std::function<bool(Node* n)> callback, size_t num_iterations = 1)
       : callback_(std::move(callback)), num_iterations_(num_iterations) {}
 
-  void run(const std::shared_ptr<Graph>& graph);
+  bool run(const std::shared_ptr<Graph>& graph);
 
  private:
   void collectLoop(Node* n);
 
@@ -24,10 +24,13 @@ struct PeepholeOptimizeImpl {
   PeepholeOptimizeImpl(
       const std::shared_ptr<Graph>& graph,
       bool disable_shape_peepholes)
-      : graph_(graph), shape_peepholes_(!disable_shape_peepholes) {
-    run(graph->block());
-    PeepholeOptimizeListIdioms(graph);
-    PeepholeOptimizeAliasSensitive(graph);
+      : graph_(graph), shape_peepholes_(!disable_shape_peepholes) {}
+
+  bool run() {
+    bool changed = optimizeBlock(graph_->block());
+    changed |= PeepholeOptimizeListIdioms(graph_);
+    changed |= PeepholeOptimizeAliasSensitive(graph_);
+    return changed;
   }
 
   // The intent for this optimization pass is to catch all of the small, easy to
@@ -39,12 +42,13 @@ struct PeepholeOptimizeImpl {
   //
   // TODO: Decide what kind of fixed point strategy we will have
   //
-  void run(Block* block) {
+  bool optimizeBlock(Block* block) {
+    bool changed = false;
     for (auto it = block->nodes().begin(); it != block->nodes().end(); ++it) {
       auto* node = *it;
 
       for (Block* sub_block : node->blocks()) {
-        run(sub_block);
+        changed |= optimizeBlock(sub_block);
       }
 
       if (node->kind() != prim::Constant) {
@@ -55,6 +59,7 @@ struct PeepholeOptimizeImpl {
         for (Value* output : node->outputs()) {
           if (output->type()->cast<NoneType>()) {
             output->replaceAllUsesWith(graph_->insertConstant(IValue()));
+            changed = true;
           }
         }
       }
@@ -71,6 +76,7 @@ struct PeepholeOptimizeImpl {
               " (x._grad_sum_to_size(x, None) == x) is replaced with ",
               node->input(0)->debugName());
           node->output()->replaceAllUsesWith(node->input(0));
+          changed = true;
         } else {
           auto uses = node->output()->uses();
           for (Use u : uses) {
@@ -82,6 +88,7 @@ struct PeepholeOptimizeImpl {
                   " (x._grad_sum_to_size(y)._grad_sum_to_size(z) == x._grad_sum_to_size(z)) is replaced with ",
                   node->inputs().at(0)->debugName());
               u.user->replaceInput(0, node->inputs().at(0));
+              changed = true;
             }
           }
         }
@@ -102,6 +109,7 @@ struct PeepholeOptimizeImpl {
                 " (x.expand(x.size()) == x) is replaced with ",
                 node->namedInput(attr::self)->debugName());
             node->output()->replaceAllUsesWith(node->namedInput(attr::self));
+            changed = true;
           }
         }
       } else if (node->matches("aten::t(Tensor self) -> Tensor")) {
@@ -113,6 +121,7 @@ struct PeepholeOptimizeImpl {
               " (x.t().t() == x) is replaced with ",
               input_node->input()->debugName());
           node->output()->replaceAllUsesWith(input_node->input());
+          changed = true;
         }
       } else if (
           node->matches("aten::type_as(Tensor self, Tensor other) -> Tensor") &&
@@ -127,6 +136,7 @@ struct PeepholeOptimizeImpl {
               " (x.type_as(y) == x) is replaced with ",
               node->input(0)->debugName());
           node->output()->replaceAllUsesWith(node->input(0));
+          changed = true;
         }
       } else if (
           node->kind() == aten::Float || node->kind() == aten::Int ||
@@ -140,6 +150,7 @@ struct PeepholeOptimizeImpl {
               " (x.NumToTensor().TensorToNum() == x.NumToTensor()) is replaced with ",
               node->input()->debugName());
           node->output()->replaceAllUsesWith(input_node->input());
+          changed = true;
         }
       } else if (
           node->matches("aten::size(Tensor self) -> int[]") &&
@@ -154,6 +165,7 @@ struct PeepholeOptimizeImpl {
             IValue ival(sizes);
             auto const_sizes_val = node->owningGraph()->insertConstant(ival);
             node->output()->replaceAllUsesWith(const_sizes_val);
+            changed = true;
           }
         }
       } else if (
@@ -174,6 +186,7 @@ struct PeepholeOptimizeImpl {
               IValue ival(*ptt->sizes()[norm_index]);
               auto const_sizes_val = node->owningGraph()->insertConstant(ival);
               node->output()->replaceAllUsesWith(const_sizes_val);
+              changed = true;
             }
           }
         }
@@ -187,6 +200,7 @@ struct PeepholeOptimizeImpl {
           IValue ival(at::isFloatingType(dtype));
           auto new_constant = node->owningGraph()->insertConstant(ival);
           node->output()->replaceAllUsesWith(new_constant);
+          changed = true;
         }
       } else if (
           node->matches("aten::is_complex(Tensor self) -> bool") &&
@@ -220,6 +234,7 @@ struct PeepholeOptimizeImpl {
                 " (True or False) with ",
                 n.cond()->debugName());
             n.outputs().at(i)->replaceAllUsesWith(n.cond());
+            changed = true;
           }
         }
       } else if (
@@ -240,6 +255,7 @@ struct PeepholeOptimizeImpl {
             GRAPH_UPDATE(
                 "Folding ", getHeader(node), " to ", output->debugName());
             node->output()->replaceAllUsesWith(output);
+            changed = true;
           }
         }
       } else if (
@@ -255,6 +271,7 @@ struct PeepholeOptimizeImpl {
               node->input(),
               " can't be optional");
           node->output()->replaceAllUsesWith(node->input());
+          changed = true;
         }
       } else if (node->kind() == prim::unchecked_cast) {
         // unchecked_cast is not generated for tensor properties, so we are not
@@ -267,6 +284,7 @@ struct PeepholeOptimizeImpl {
               getHeader(node),
               " as input type subtypes output type");
           node->output()->replaceAllUsesWith(node->input());
+          changed = true;
         }
       } else if (
           node->matches("prim::dtype(Tensor a) -> int") && shape_peepholes_) {
@@ -281,6 +299,7 @@ struct PeepholeOptimizeImpl {
               " with a type constant ",
               output->debugName());
           node->output()->replaceAllUsesWith(output);
+          changed = true;
         }
       } else if (
           node->matches("prim::device(Tensor a) -> Device") &&
@@ -295,6 +314,7 @@ struct PeepholeOptimizeImpl {
               " with a device constant ",
               output->debugName());
           node->output()->replaceAllUsesWith(output);
+          changed = true;
         }
       } else if (
           node->matches("aten::dim(Tensor self) -> int") && shape_peepholes_) {
@@ -309,6 +329,7 @@ struct PeepholeOptimizeImpl {
               " with a \"dim\" constant ",
               output->debugName());
           node->output()->replaceAllUsesWith(output);
+          changed = true;
         }
       } else if (
           node->matches("prim::is_cuda(Tensor a) -> bool") &&
@@ -324,6 +345,7 @@ struct PeepholeOptimizeImpl {
               " with a is_cuda constant ",
               output->debugName());
           node->output()->replaceAllUsesWith(output);
+          changed = true;
         }
       }
 
@@ -333,6 +355,7 @@ struct PeepholeOptimizeImpl {
       // the limited speedup of these optimizations
       // runAliasingSensitivePeepholeTransformations(node);
     }
+    return changed;
   }
 
   // if either the inputs or outputs of an op alias graph's inputs or
@@ -345,10 +368,11 @@ struct PeepholeOptimizeImpl {
   //     s += x
   //     return s
   //
-  void runAliasingSensitivePeepholeTransformations(Node* node) {
+  bool runAliasingSensitivePeepholeTransformations(Node* node) {
     // this code is not currently enabled, see [aliasing sensitive
     // optimizations]
     TORCH_INTERNAL_ASSERT(false);
+    bool changed = false;
     if (node->matches(
             "aten::add(Tensor self, Scalar other, Scalar alpha) -> Tensor",
             /*const_inputs=*/{attr::alpha, attr::other}) ||
@@ -363,6 +387,7 @@ struct PeepholeOptimizeImpl {
             " (x + 0 == x - 0 == x) is replaced with ",
             node->input(0)->debugName());
         node->output()->replaceAllUsesWith(node->input(0));
+        changed = true;
       }
     } else if (
         node->matches(
@@ -378,16 +403,19 @@ struct PeepholeOptimizeImpl {
             " (x * 1 == x / 1 == x) is replaced with ",
             node->input(0)->debugName());
         node->output()->replaceAllUsesWith(node->input(0));
+        changed = true;
       }
     }
+    return changed;
   }
 
  private:
   std::shared_ptr<Graph> graph_;
   bool shape_peepholes_;
 };
 
-void FuseAddMM(Block* block) {
+bool FuseAddMM(Block* block) {
+  bool changed = false;
   for (Node* node : block->nodes()) {
     // XXX: remember that if you want to simplify an expression by combining
     // multiple nodes into a different one, then you need to check that they
@@ -488,15 +516,17 @@ void FuseAddMM(Block* block) {
                 " into ",
                 addmm_value->debugName());
             node->output()->replaceAllUsesWith(addmm_value);
+            changed = true;
             continue;
           }
         }
       }
     }
     for (Block* b : node->blocks()) {
-      FuseAddMM(b);
+      changed |= FuseAddMM(b);
     }
   }
+  return changed;
 }
 
 // FuseAddMM is a separate pass from peephole optimize because it is currently
@@ -506,17 +536,21 @@ void FuseAddMM(Block* block) {
 // since after ONNX translation we would see redundant Gemm ops with sub-optimal
 // inputs. This flag is exposed so that ONNX export can pass `true` to get the
 // fused behavior, but normal JIT peephole optimization is left alone.
-void FuseAddMM(const std::shared_ptr<Graph>& graph) {
-  FuseAddMM(graph->block());
+bool FuseAddMM(const std::shared_ptr<Graph>& graph) {
+  return FuseAddMM(graph->block());
 }
 
-void PeepholeOptimize(
+bool PeepholeOptimize(
     const std::shared_ptr<Graph>& graph,
     bool addmm_fusion_enabled) {
   PeepholeOptimizeImpl peephole(graph, addmm_fusion_enabled);
+  bool changed = peephole.run();
   GRAPH_DUMP("After PeepholeOptimize: ", graph);
   // Eliminate dead code created by any peephole passes we've just done
-  EliminateDeadCode(graph->block());
+  if (changed) {
+    EliminateDeadCode(graph->block());
+  }
+  return changed;
 }
 
 } // namespace jit