From 1b0e3d3b8e27a39d3e8585ec6d344dcf79315b43 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Mon, 15 Jul 2024 20:23:24 +0900
Subject: [PATCH 01/75] initial prototype

---
 src/coreclr/jit/objectalloc.cpp | 153 +++++++++++++++++++++++++++++++-
 src/coreclr/jit/objectalloc.h   |  34 ++++++-
 2 files changed, 183 insertions(+), 4 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 89e28c5978c6d4..1ef558791f6e6c 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -30,9 +30,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 //
 PhaseStatus ObjectAllocator::DoPhase()
 {
-    if ((comp->optMethodFlags & OMF_HAS_NEWOBJ) == 0)
+    if ((comp->optMethodFlags & OMF_HAS_NEWOBJ) == 0 && (comp->optMethodFlags & OMF_HAS_NEWARRAY) == 0)
     {
-        JITDUMP("no newobjs in this method; punting\n");
+        JITDUMP("no newobjs or newarr in this method; punting\n");
         return PhaseStatus::MODIFIED_NOTHING;
     }
 
@@ -391,6 +391,7 @@ bool ObjectAllocator::MorphAllocObjNodes()
             GenTree* data     = nullptr;
 
             bool canonicalAllocObjFound = false;
+            bool canonicalAllocArrFound = false;
 
             if (stmtExpr->OperIs(GT_STORE_LCL_VAR) && stmtExpr->TypeIs(TYP_REF))
             {
@@ -400,6 +401,75 @@ bool ObjectAllocator::MorphAllocObjNodes()
                 {
                     canonicalAllocObjFound = true;
                 }
+                else if (data->IsHelperCall())
+                {
+                    GenTreeCall* call = data->AsCall();
+                    if (call->GetHelperNum() == CORINFO_HELP_NEWARR_1_VC)
+                    {
+                        canonicalAllocArrFound = true;
+                    }
+                }
+            }
+
+            if (canonicalAllocArrFound)
+            {
+                GenTreeCall* asCall = data->AsCall();
+                assert(asCall->GetHelperNum() == CORINFO_HELP_NEWARR_1_VC);
+                //------------------------------------------------------------------------
+                // We expect the following expression tree at this point
+                //  STMTx (IL 0x... ???)
+                //    * STORE_LCL_VAR   ref
+                //    \--*  CALL help ref    CORINFO_HELP_NEWARR_1_VC
+                //       +--*  CNS_INT(h) long
+                //       \--*  *
+                //------------------------------------------------------------------------
+
+                unsigned int         lclNum       = stmtExpr->AsLclVar()->GetLclNum();
+                CallArg*             arg          = asCall->gtArgs.GetArgByIndex(0);
+                GenTree*             node         = arg->GetNode();
+                CORINFO_CLASS_HANDLE clsHnd       = (CORINFO_CLASS_HANDLE)node->AsIntConCommon()->IntegralValue();
+                GenTree*             len          = arg->GetNext()->GetNode();
+                const char*          onHeapReason = nullptr;
+                unsigned int         structSize   = 0;
+                bool                 canStack     = false;
+
+                // Don't attempt to do stack allocations inside basic blocks that may be in a loop.
+                //
+                if (!IsObjectStackAllocationEnabled())
+                {
+                    onHeapReason = "[object stack allocation disabled]";
+                    canStack     = false;
+                }
+                else if (basicBlockHasBackwardJump)
+                {
+                    onHeapReason = "[alloc in loop]";
+                    canStack     = false;
+                }
+                else if (!len->IsCnsIntOrI())
+                {
+                    onHeapReason = "[non-constant size]";
+                    canStack     = false;
+                }
+                else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, (unsigned int)len->AsIntCon()->gtIconVal,
+                                                   &structSize, &onHeapReason))
+                {
+                    // reason set by the call
+                    canStack = false;
+                }
+                else
+                {
+                    JITDUMP("Allocating V%02u on the stack\n", lclNum);
+                    canStack                       = true;
+                    const unsigned int stackLclNum = MorphNewArrNodeIntoStackAlloc(asCall, clsHnd, structSize, block, stmt);
+                    m_HeapLocalToStackLocalMap.AddOrUpdate(lclNum, stackLclNum);
+                    // We keep the set of possibly-stack-pointing pointers as a superset of the set of
+                    // definitely-stack-pointing pointers. All definitely-stack-pointing pointers are in both sets.
+                    MarkLclVarAsDefinitelyStackPointing(lclNum);
+                    MarkLclVarAsPossiblyStackPointing(lclNum);
+                    stmt->GetRootNode()->gtBashToNOP();
+                    comp->optMethodFlags |= OMF_HAS_OBJSTACKALLOC;
+                    didStackAllocate = true;
+                }
             }
 
             if (canonicalAllocObjFound)
@@ -443,7 +513,7 @@ bool ObjectAllocator::MorphAllocObjNodes()
                     onHeapReason = "[alloc in loop]";
                     canStack     = false;
                 }
-                else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, &onHeapReason))
+                else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, 0, NULL, &onHeapReason))
                 {
                     // reason set by the call
                     canStack = false;
@@ -555,6 +625,77 @@ GenTree* ObjectAllocator::MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* alloc
     return helperCall;
 }
 
+unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall*         newArr,
+                                                            CORINFO_CLASS_HANDLE clsHnd,
+                                                            unsigned int         structSize,
+                                                            BasicBlock*          block,
+                                                            Statement*           stmt)
+{
+    assert(newArr != nullptr);
+    assert(m_AnalysisDone);
+    assert(clsHnd != NO_CLASS_HANDLE);
+
+    const bool         shortLifetime = false;
+    const unsigned int lclNum        = comp->lvaGrabTemp(shortLifetime DEBUGARG("stack allocated array temp"));
+
+    comp->lvaSetStruct(lclNum, comp->typGetBlkLayout(structSize), /* unsafeValueClsCheck */ false);
+
+    // Initialize the object memory if necessary.
+    bool             bbInALoop  = block->HasFlag(BBF_BACKWARD_JUMP);
+    bool             bbIsReturn = block->KindIs(BBJ_RETURN);
+    LclVarDsc* const lclDsc     = comp->lvaGetDesc(lclNum);
+    lclDsc->lvStackAllocatedBox = false;
+    if (comp->fgVarNeedsExplicitZeroInit(lclNum, bbInALoop, bbIsReturn))
+    {
+        //------------------------------------------------------------------------
+        // STMTx (IL 0x... ???)
+        //   *  STORE_LCL_VAR   struct
+        //   \--*  CNS_INT   int    0
+        //------------------------------------------------------------------------
+
+        GenTree*   init     = comp->gtNewStoreLclVarNode(lclNum, comp->gtNewIconNode(0));
+        Statement* initStmt = comp->gtNewStmt(init);
+
+        comp->fgInsertStmtBefore(block, stmt, initStmt);
+    }
+    else
+    {
+        JITDUMP("\nSuppressing zero-init for V%02u -- expect to zero in prolog\n", lclNum);
+        lclDsc->lvSuppressedZeroInit = 1;
+        comp->compSuppressedZeroInit = true;
+    }
+
+    // Initialize the vtable slot.
+    //
+    //------------------------------------------------------------------------
+    // STMTx (IL 0x... ???)
+    //   * STORE_LCL_FLD    long
+    //   \--*  CNS_INT(h) long
+    //------------------------------------------------------------------------
+
+    // Initialize the method table pointer.
+    GenTree*   init     = comp->gtNewStoreLclFldNode(lclNum, TYP_I_IMPL, 0, newArr->gtArgs.GetArgByIndex(0)->GetNode());
+    Statement* initStmt = comp->gtNewStmt(init);
+
+    comp->fgInsertStmtBefore(block, stmt, initStmt);
+
+    // Initialize the length of array.
+    //
+    //------------------------------------------------------------------------
+    // STMTx (IL 0x... ???)
+    //   * STORE_LCL_FLD    long
+    //   \--*  CNS_INT long
+    //------------------------------------------------------------------------
+
+    // Pass the length of the array.
+    GenTree*   len     = comp->gtNewStoreLclFldNode(lclNum, TYP_I_IMPL, 8, newArr->gtArgs.GetArgByIndex(1)->GetNode());
+    Statement* lenStmt = comp->gtNewStmt(len);
+
+    comp->fgInsertStmtBefore(block, stmt, lenStmt);
+
+    return lclNum;
+}
+
 //------------------------------------------------------------------------
 // MorphAllocObjNodeIntoStackAlloc: Morph a GT_ALLOCOBJ node into stack
 //                                  allocation.
@@ -682,6 +823,9 @@ bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack<GenTree*>* parent
             case GT_EQ:
             case GT_NE:
             case GT_NULLCHECK:
+            case GT_ARR_LENGTH:
+            case GT_ARR_ELEM:
+            case GT_INDEX_ADDR:
                 canLclVarEscapeViaParentStack = false;
                 break;
 
@@ -778,6 +922,9 @@ void ObjectAllocator::UpdateAncestorTypes(GenTree* tree, ArrayStack<GenTree*>* p
             case GT_EQ:
             case GT_NE:
             case GT_NULLCHECK:
+            case GT_ARR_LENGTH:
+            case GT_ARR_ELEM:
+            case GT_INDEX_ADDR:
                 break;
 
             case GT_COMMA:
diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h
index 8a873be8b34ad7..03f01e6c7d28d3 100644
--- a/src/coreclr/jit/objectalloc.h
+++ b/src/coreclr/jit/objectalloc.h
@@ -47,7 +47,11 @@ class ObjectAllocator final : public Phase
     virtual PhaseStatus DoPhase() override;
 
 private:
-    bool         CanAllocateLclVarOnStack(unsigned int lclNum, CORINFO_CLASS_HANDLE clsHnd, const char** reason);
+    bool         CanAllocateLclVarOnStack(unsigned int         lclNum,
+                                          CORINFO_CLASS_HANDLE clsHnd,
+                                          unsigned int         length,
+                                          unsigned int*        structSize,
+                                          const char**         reason);
     bool         CanLclVarEscape(unsigned int lclNum);
     void         MarkLclVarAsPossiblyStackPointing(unsigned int lclNum);
     void         MarkLclVarAsDefinitelyStackPointing(unsigned int lclNum);
@@ -64,6 +68,8 @@ class ObjectAllocator final : public Phase
     GenTree*     MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* allocObj);
     unsigned int MorphAllocObjNodeIntoStackAlloc(
         GenTreeAllocObj* allocObj, CORINFO_CLASS_HANDLE clsHnd, bool isValueClass, BasicBlock* block, Statement* stmt);
+    unsigned int MorphNewArrNodeIntoStackAlloc(
+        GenTreeCall* newArr, CORINFO_CLASS_HANDLE clsHnd, unsigned int structSize, BasicBlock* block, Statement* stmt);
     struct BuildConnGraphVisitorCallbackData;
     bool CanLclVarEscapeViaParentStack(ArrayStack<GenTree*>* parentStack, unsigned int lclNum);
     void UpdateAncestorTypes(GenTree* tree, ArrayStack<GenTree*>* parentStack, var_types newType);
@@ -119,6 +125,8 @@ inline void ObjectAllocator::EnableObjectStackAllocation()
 //
 inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int         lclNum,
                                                       CORINFO_CLASS_HANDLE clsHnd,
+                                                      unsigned int         length,
+                                                      unsigned int*        structSize,
                                                       const char**         reason)
 {
     assert(m_AnalysisDone);
@@ -150,6 +158,25 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int         lclNu
 
         classSize = comp->info.compCompHnd->getClassSize(clsHnd);
     }
+    else if (comp->info.compCompHnd->isSDArray(clsHnd))
+    {
+        CorInfoType type = comp->info.compCompHnd->getChildType(clsHnd, &clsHnd);
+        if (type != CORINFO_TYPE_UNDEF && clsHnd == NULL)
+        {
+            // This is an array of primitive types
+            classSize = (unsigned int)OFFSETOF__CORINFO_Array__data + genTypeSize(JITtype2varType(type)) * length;
+        }
+        else if (comp->info.compCompHnd->isValueClass(clsHnd))
+        {
+            classSize =
+                (unsigned int)OFFSETOF__CORINFO_Array__data + comp->info.compCompHnd->getClassSize(clsHnd) * length;
+        }
+        else
+        {
+            *reason = "[array contains gc refs]";
+            return false;
+        }
+    }
     else
     {
         if (!enableRefClasses)
@@ -181,6 +208,11 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int         lclNu
         return false;
     }
 
+    if (structSize != NULL)
+    {
+        *structSize = classSize;
+    }
+
     return true;
 }
 

From 57b7e42fbe27f51a3a3321f9d2e511a069d4c3da Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Mon, 15 Jul 2024 22:50:42 +0900
Subject: [PATCH 02/75] Morph ARR_LENGTH and INDEX_ADDR

---
 src/coreclr/jit/objectalloc.cpp | 65 ++++++++++++++++++++++-----------
 src/coreclr/jit/objectalloc.h   | 21 ++++++-----
 2 files changed, 54 insertions(+), 32 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 1ef558791f6e6c..f652ea1e5338ca 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -415,14 +415,6 @@ bool ObjectAllocator::MorphAllocObjNodes()
             {
                 GenTreeCall* asCall = data->AsCall();
                 assert(asCall->GetHelperNum() == CORINFO_HELP_NEWARR_1_VC);
-                //------------------------------------------------------------------------
-                // We expect the following expression tree at this point
-                //  STMTx (IL 0x... ???)
-                //    * STORE_LCL_VAR   ref
-                //    \--*  CALL help ref    CORINFO_HELP_NEWARR_1_VC
-                //       +--*  CNS_INT(h) long
-                //       \--*  *
-                //------------------------------------------------------------------------
 
                 unsigned int         lclNum       = stmtExpr->AsLclVar()->GetLclNum();
                 CallArg*             arg          = asCall->gtArgs.GetArgByIndex(0);
@@ -430,7 +422,7 @@ bool ObjectAllocator::MorphAllocObjNodes()
                 CORINFO_CLASS_HANDLE clsHnd       = (CORINFO_CLASS_HANDLE)node->AsIntConCommon()->IntegralValue();
                 GenTree*             len          = arg->GetNext()->GetNode();
                 const char*          onHeapReason = nullptr;
-                unsigned int         structSize   = 0;
+                unsigned int         blockSize    = 0;
                 bool                 canStack     = false;
 
                 // Don't attempt to do stack allocations inside basic blocks that may be in a loop.
@@ -450,8 +442,8 @@ bool ObjectAllocator::MorphAllocObjNodes()
                     onHeapReason = "[non-constant size]";
                     canStack     = false;
                 }
-                else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, (unsigned int)len->AsIntCon()->gtIconVal,
-                                                   &structSize, &onHeapReason))
+                else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, (unsigned int)len->AsIntCon()->gtIconVal, &blockSize,
+                                                   &onHeapReason))
                 {
                     // reason set by the call
                     canStack = false;
@@ -459,8 +451,10 @@ bool ObjectAllocator::MorphAllocObjNodes()
                 else
                 {
                     JITDUMP("Allocating V%02u on the stack\n", lclNum);
-                    canStack                       = true;
-                    const unsigned int stackLclNum = MorphNewArrNodeIntoStackAlloc(asCall, clsHnd, structSize, block, stmt);
+                    canStack = true;
+                    const unsigned int stackLclNum =
+                        MorphNewArrNodeIntoStackAlloc(asCall, clsHnd, (unsigned int)len->AsIntCon()->gtIconVal,
+                                                      blockSize, block, stmt);
                     m_HeapLocalToStackLocalMap.AddOrUpdate(lclNum, stackLclNum);
                     // We keep the set of possibly-stack-pointing pointers as a superset of the set of
                     // definitely-stack-pointing pointers. All definitely-stack-pointing pointers are in both sets.
@@ -627,7 +621,8 @@ GenTree* ObjectAllocator::MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* alloc
 
 unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall*         newArr,
                                                             CORINFO_CLASS_HANDLE clsHnd,
-                                                            unsigned int         structSize,
+                                                            unsigned int         length,
+                                                            unsigned int         blockSize,
                                                             BasicBlock*          block,
                                                             Statement*           stmt)
 {
@@ -638,7 +633,7 @@ unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall*
     const bool         shortLifetime = false;
     const unsigned int lclNum        = comp->lvaGrabTemp(shortLifetime DEBUGARG("stack allocated array temp"));
 
-    comp->lvaSetStruct(lclNum, comp->typGetBlkLayout(structSize), /* unsafeValueClsCheck */ false);
+    comp->lvaSetStruct(lclNum, comp->typGetBlkLayout(blockSize), /* unsafeValueClsCheck */ false);
 
     // Initialize the object memory if necessary.
     bool             bbInALoop  = block->HasFlag(BBF_BACKWARD_JUMP);
@@ -679,18 +674,18 @@ unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall*
 
     comp->fgInsertStmtBefore(block, stmt, initStmt);
 
-    // Initialize the length of array.
+    // Initialize the length slot.
     //
     //------------------------------------------------------------------------
     // STMTx (IL 0x... ???)
     //   * STORE_LCL_FLD    long
-    //   \--*  CNS_INT long
+    //   \--*  CNS_INT    long
     //------------------------------------------------------------------------
 
-    // Pass the length of the array.
-    GenTree*   len     = comp->gtNewStoreLclFldNode(lclNum, TYP_I_IMPL, 8, newArr->gtArgs.GetArgByIndex(1)->GetNode());
+    // Pass the total length of the array.
+    GenTree*   len     = comp->gtNewStoreLclFldNode(lclNum, TYP_I_IMPL, genTypeSize(TYP_I_IMPL),
+                                                    comp->gtNewIconNode(length, TYP_I_IMPL));
     Statement* lenStmt = comp->gtNewStmt(len);
-
     comp->fgInsertStmtBefore(block, stmt, lenStmt);
 
     return lclNum;
@@ -824,7 +819,6 @@ bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack<GenTree*>* parent
             case GT_NE:
             case GT_NULLCHECK:
             case GT_ARR_LENGTH:
-            case GT_ARR_ELEM:
             case GT_INDEX_ADDR:
                 canLclVarEscapeViaParentStack = false;
                 break;
@@ -923,7 +917,6 @@ void ObjectAllocator::UpdateAncestorTypes(GenTree* tree, ArrayStack<GenTree*>* p
             case GT_NE:
             case GT_NULLCHECK:
             case GT_ARR_LENGTH:
-            case GT_ARR_ELEM:
             case GT_INDEX_ADDR:
                 break;
 
@@ -1172,6 +1165,34 @@ void ObjectAllocator::RewriteUses()
                     }
                 }
             }
+            // Rewrite INDEX_ADDR to ADD for stack-allocated arrays.
+            else if (tree->OperIs(GT_INDEX_ADDR))
+            {
+                GenTreeIndexAddr* const indexAddr = tree->AsIndexAddr();
+                GenTree* const          arr       = indexAddr->Arr();
+                GenTree* const          ind       = indexAddr->Index();
+
+                if (arr->OperIs(GT_LCL_ADDR) && ind->IsCnsIntOrI())
+                {
+                    JITDUMP("Rewriting INDEX_ADDR to ADD [%06u]\n", m_compiler->dspTreeID(tree));
+                    GenTree* const add = m_compiler->gtNewOperNode(GT_ADD, TYP_BYREF, arr, ind);
+                    *use               = add;
+                }
+            }
+            // Rewrite ARR_LENGTH to LCL_FLD for stack-allocated arrays.
+            else if (tree->OperIsArrLength())
+            {
+                GenTreeArrLen* const arrLen = tree->AsArrLen();
+                GenTree* const       arr    = arrLen->ArrRef();
+
+                if (arr->OperIs(GT_LCL_ADDR))
+                {
+                    JITDUMP("Rewriting ARR_LENGTH to LCL_FLD [%06u]\n", m_compiler->dspTreeID(tree));
+                    GenTree* const lclFld = m_compiler->gtNewLclFldNode(arr->AsLclVarCommon()->GetLclNum(), TYP_I_IMPL,
+                                                                        OFFSETOF__CORINFO_Array__length);
+                    *use = lclFld;
+                }
+            }
 
             return Compiler::fgWalkResult::WALK_CONTINUE;
         }
diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h
index 03f01e6c7d28d3..e39f9d666b6b2d 100644
--- a/src/coreclr/jit/objectalloc.h
+++ b/src/coreclr/jit/objectalloc.h
@@ -50,7 +50,7 @@ class ObjectAllocator final : public Phase
     bool         CanAllocateLclVarOnStack(unsigned int         lclNum,
                                           CORINFO_CLASS_HANDLE clsHnd,
                                           unsigned int         length,
-                                          unsigned int*        structSize,
+                                          unsigned int*        blockSize,
                                           const char**         reason);
     bool         CanLclVarEscape(unsigned int lclNum);
     void         MarkLclVarAsPossiblyStackPointing(unsigned int lclNum);
@@ -68,8 +68,12 @@ class ObjectAllocator final : public Phase
     GenTree*     MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* allocObj);
     unsigned int MorphAllocObjNodeIntoStackAlloc(
         GenTreeAllocObj* allocObj, CORINFO_CLASS_HANDLE clsHnd, bool isValueClass, BasicBlock* block, Statement* stmt);
-    unsigned int MorphNewArrNodeIntoStackAlloc(
-        GenTreeCall* newArr, CORINFO_CLASS_HANDLE clsHnd, unsigned int structSize, BasicBlock* block, Statement* stmt);
+    unsigned int MorphNewArrNodeIntoStackAlloc(GenTreeCall*         newArr,
+                                               CORINFO_CLASS_HANDLE clsHnd,
+                                               unsigned int         length,
+                                               unsigned int         blockSize,
+                                               BasicBlock*          block,
+                                               Statement*           stmt);
     struct BuildConnGraphVisitorCallbackData;
     bool CanLclVarEscapeViaParentStack(ArrayStack<GenTree*>* parentStack, unsigned int lclNum);
     void UpdateAncestorTypes(GenTree* tree, ArrayStack<GenTree*>* parentStack, var_types newType);
@@ -123,11 +127,8 @@ inline void ObjectAllocator::EnableObjectStackAllocation()
 // Return Value:
 //    Returns true iff local variable can be allocated on the stack.
 //
-inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int         lclNum,
-                                                      CORINFO_CLASS_HANDLE clsHnd,
-                                                      unsigned int         length,
-                                                      unsigned int*        structSize,
-                                                      const char**         reason)
+inline bool ObjectAllocator::CanAllocateLclVarOnStack(
+    unsigned int lclNum, CORINFO_CLASS_HANDLE clsHnd, unsigned int length, unsigned int* blockSize, const char** reason)
 {
     assert(m_AnalysisDone);
 
@@ -208,9 +209,9 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int         lclNu
         return false;
     }
 
-    if (structSize != NULL)
+    if (blockSize != NULL)
     {
-        *structSize = classSize;
+        *blockSize = (unsigned int)classSize;
     }
 
     return true;

From 1b5b25e579940e12a52a857cdfeb2875ef2b2fcb Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Mon, 15 Jul 2024 23:01:16 +0900
Subject: [PATCH 03/75] Fix incorrect array length storage

---
 src/coreclr/jit/objectalloc.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index f652ea1e5338ca..ab48b96fcef86b 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -683,8 +683,8 @@ unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall*
     //------------------------------------------------------------------------
 
     // Pass the total length of the array.
-    GenTree*   len     = comp->gtNewStoreLclFldNode(lclNum, TYP_I_IMPL, genTypeSize(TYP_I_IMPL),
-                                                    comp->gtNewIconNode(length, TYP_I_IMPL));
+    GenTree* len =
+        comp->gtNewStoreLclFldNode(lclNum, TYP_INT, genTypeSize(TYP_I_IMPL), comp->gtNewIconNode(length, TYP_INT));
     Statement* lenStmt = comp->gtNewStmt(len);
     comp->fgInsertStmtBefore(block, stmt, lenStmt);
 
@@ -1175,8 +1175,10 @@ void ObjectAllocator::RewriteUses()
                 if (arr->OperIs(GT_LCL_ADDR) && ind->IsCnsIntOrI())
                 {
                     JITDUMP("Rewriting INDEX_ADDR to ADD [%06u]\n", m_compiler->dspTreeID(tree));
-                    GenTree* const add = m_compiler->gtNewOperNode(GT_ADD, TYP_BYREF, arr, ind);
-                    *use               = add;
+                    GenTree* const offset =
+                        m_compiler->gtNewIconNode(OFFSETOF__CORINFO_Array__data + ind->AsIntCon()->gtIconVal, TYP_INT);
+                    GenTree* const add = m_compiler->gtNewOperNode(GT_ADD, TYP_BYREF, arr, offset);
+                    *use = add;
                 }
             }
             // Rewrite ARR_LENGTH to LCL_FLD for stack-allocated arrays.
@@ -1188,9 +1190,9 @@ void ObjectAllocator::RewriteUses()
                 if (arr->OperIs(GT_LCL_ADDR))
                 {
                     JITDUMP("Rewriting ARR_LENGTH to LCL_FLD [%06u]\n", m_compiler->dspTreeID(tree));
-                    GenTree* const lclFld = m_compiler->gtNewLclFldNode(arr->AsLclVarCommon()->GetLclNum(), TYP_I_IMPL,
+                    GenTree* const lclFld = m_compiler->gtNewLclFldNode(arr->AsLclVarCommon()->GetLclNum(), TYP_INT,
                                                                         OFFSETOF__CORINFO_Array__length);
-                    *use = lclFld;
+                    *use                  = lclFld;
                 }
             }
 

From 395b7352b6ae95261dd0dabcc82c99324fa7aef0 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Mon, 15 Jul 2024 23:33:46 +0900
Subject: [PATCH 04/75] Use offset and correct type

---
 src/coreclr/jit/objectalloc.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index ab48b96fcef86b..bb440c1490ce88 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -683,8 +683,8 @@ unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall*
     //------------------------------------------------------------------------
 
     // Pass the total length of the array.
-    GenTree* len =
-        comp->gtNewStoreLclFldNode(lclNum, TYP_INT, genTypeSize(TYP_I_IMPL), comp->gtNewIconNode(length, TYP_INT));
+    GenTree*   len     = comp->gtNewStoreLclFldNode(lclNum, TYP_INT, OFFSETOF__CORINFO_Array__length,
+                                                    comp->gtNewIconNode(length, TYP_INT));
     Statement* lenStmt = comp->gtNewStmt(len);
     comp->fgInsertStmtBefore(block, stmt, lenStmt);
 

From 17de70bfae31adaa22649e0ff2d4b5cb3ff3a7cf Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Tue, 16 Jul 2024 01:17:17 +0900
Subject: [PATCH 05/75] handle reassignment

---
 src/coreclr/jit/objectalloc.cpp | 58 ++++++++++++++++++++++++---------
 src/coreclr/jit/objectalloc.h   |  2 ++
 2 files changed, 44 insertions(+), 16 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index bb440c1490ce88..a9ab5f691752e1 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -89,6 +89,27 @@ void ObjectAllocator::MarkLclVarAsEscaping(unsigned int lclNum)
     BitVecOps::AddElemD(&m_bitVecTraits, m_EscapingPointers, lclNum);
 }
 
+//------------------------------------------------------------------------------
+// MarkLclVarHasLocalStore : Mark local variable having local store.
+//
+//
+// Arguments:
+//    lclNum  - Pointing local variable number
+// Returns:
+//    true if the local variable is first marked as having local store
+//    false if the local variable is already marked as having local store
+
+bool ObjectAllocator::MarkLclVarHasLocalStore(unsigned int lclNum)
+{
+    if (BitVecOps::IsMember(&m_bitVecTraits, m_PointersHasLocalStore, lclNum))
+    {
+        return false;
+    }
+
+    BitVecOps::AddElemD(&m_bitVecTraits, m_PointersHasLocalStore, lclNum);
+    return true;
+}
+
 //------------------------------------------------------------------------------
 // MarkLclVarAsPossiblyStackPointing : Mark local variable as possibly pointing
 //                                     to a stack-allocated object.
@@ -140,6 +161,7 @@ void ObjectAllocator::DoAnalysis()
     if (comp->lvaCount > 0)
     {
         m_EscapingPointers         = BitVecOps::MakeEmpty(&m_bitVecTraits);
+        m_PointersHasLocalStore    = BitVecOps::MakeEmpty(&m_bitVecTraits);
         m_ConnGraphAdjacencyMatrix = new (comp->getAllocator(CMK_ObjectAllocator)) BitSetShortLongRep[comp->lvaCount];
 
         MarkEscapingVarsAndBuildConnGraph();
@@ -201,7 +223,9 @@ void ObjectAllocator::MarkEscapingVarsAndBuildConnGraph()
 
             if (tree->OperIsLocalStore())
             {
-                lclEscapes = false;
+                // conservatively marking locals being reassigned as escaping
+                // this can happen on arrays
+                lclEscapes = !m_allocator->MarkLclVarHasLocalStore(lclNum);
             }
             else if (tree->OperIs(GT_LCL_VAR) && tree->TypeIs(TYP_REF, TYP_BYREF, TYP_I_IMPL))
             {
@@ -404,7 +428,8 @@ bool ObjectAllocator::MorphAllocObjNodes()
                 else if (data->IsHelperCall())
                 {
                     GenTreeCall* call = data->AsCall();
-                    if (call->GetHelperNum() == CORINFO_HELP_NEWARR_1_VC)
+                    if (call->GetHelperNum() == CORINFO_HELP_NEWARR_1_VC &&
+                        call->gtArgs.GetArgByIndex(0)->GetNode()->IsCnsIntOrI())
                     {
                         canonicalAllocArrFound = true;
                     }
@@ -1168,31 +1193,32 @@ void ObjectAllocator::RewriteUses()
             // Rewrite INDEX_ADDR to ADD for stack-allocated arrays.
             else if (tree->OperIs(GT_INDEX_ADDR))
             {
-                GenTreeIndexAddr* const indexAddr = tree->AsIndexAddr();
-                GenTree* const          arr       = indexAddr->Arr();
-                GenTree* const          ind       = indexAddr->Index();
+                GenTreeIndexAddr* const gtIndexAddr = tree->AsIndexAddr();
+                GenTree* const          gtArr       = gtIndexAddr->Arr();
+                GenTree* const          gtInd       = gtIndexAddr->Index();
 
-                if (arr->OperIs(GT_LCL_ADDR) && ind->IsCnsIntOrI())
+                if (gtArr->OperIs(GT_LCL_ADDR) && gtInd->IsCnsIntOrI())
                 {
                     JITDUMP("Rewriting INDEX_ADDR to ADD [%06u]\n", m_compiler->dspTreeID(tree));
-                    GenTree* const offset =
-                        m_compiler->gtNewIconNode(OFFSETOF__CORINFO_Array__data + ind->AsIntCon()->gtIconVal, TYP_INT);
-                    GenTree* const add = m_compiler->gtNewOperNode(GT_ADD, TYP_BYREF, arr, offset);
-                    *use = add;
+                    ssize_t offset =
+                        OFFSETOF__CORINFO_Array__data + gtInd->AsIntCon()->gtIconVal * gtIndexAddr->gtElemSize;
+                    GenTree* const gtOffset = m_compiler->gtNewIconNode(offset, TYP_I_IMPL);
+                    GenTree* const gtAdd    = m_compiler->gtNewOperNode(GT_ADD, TYP_BYREF, gtArr, gtOffset);
+                    *use                    = gtAdd;
                 }
             }
             // Rewrite ARR_LENGTH to LCL_FLD for stack-allocated arrays.
             else if (tree->OperIsArrLength())
             {
-                GenTreeArrLen* const arrLen = tree->AsArrLen();
-                GenTree* const       arr    = arrLen->ArrRef();
+                GenTreeArrLen* const gtArrLen = tree->AsArrLen();
+                GenTree* const       gtArr    = gtArrLen->ArrRef();
 
-                if (arr->OperIs(GT_LCL_ADDR))
+                if (gtArr->OperIs(GT_LCL_ADDR))
                 {
                     JITDUMP("Rewriting ARR_LENGTH to LCL_FLD [%06u]\n", m_compiler->dspTreeID(tree));
-                    GenTree* const lclFld = m_compiler->gtNewLclFldNode(arr->AsLclVarCommon()->GetLclNum(), TYP_INT,
-                                                                        OFFSETOF__CORINFO_Array__length);
-                    *use                  = lclFld;
+                    GenTree* const gtLclFld = m_compiler->gtNewLclFldNode(gtArr->AsLclVarCommon()->GetLclNum(), TYP_INT,
+                                                                          OFFSETOF__CORINFO_Array__length);
+                    *use                    = gtLclFld;
                 }
             }
 
diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h
index e39f9d666b6b2d..f7ff0ffcb8bc1d 100644
--- a/src/coreclr/jit/objectalloc.h
+++ b/src/coreclr/jit/objectalloc.h
@@ -33,6 +33,7 @@ class ObjectAllocator final : public Phase
     // definitely-stack-pointing pointers. All definitely-stack-pointing pointers are in both sets.
     BitVec              m_PossiblyStackPointingPointers;
     BitVec              m_DefinitelyStackPointingPointers;
+    BitVec              m_PointersHasLocalStore;
     LocalToLocalMap     m_HeapLocalToStackLocalMap;
     BitSetShortLongRep* m_ConnGraphAdjacencyMatrix;
 
@@ -59,6 +60,7 @@ class ObjectAllocator final : public Phase
     bool         DoesLclVarPointToStack(unsigned int lclNum);
     void         DoAnalysis();
     void         MarkLclVarAsEscaping(unsigned int lclNum);
+    bool         MarkLclVarHasLocalStore(unsigned int lclNum);
     void         MarkEscapingVarsAndBuildConnGraph();
     void         AddConnGraphEdge(unsigned int sourceLclNum, unsigned int targetLclNum);
     void         ComputeEscapingNodes(BitVecTraits* bitVecTraits, BitVec& escapingNodes);

From 5443c422dc23e67dfe0a7f2c0d14ac798ab73963 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Tue, 16 Jul 2024 01:55:51 +0900
Subject: [PATCH 06/75] range check

---
 src/coreclr/jit/objectalloc.cpp | 8 ++++++--
 src/coreclr/jit/objectalloc.h   | 3 +++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index a9ab5f691752e1..ca75406fa5111b 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -481,6 +481,7 @@ bool ObjectAllocator::MorphAllocObjNodes()
                         MorphNewArrNodeIntoStackAlloc(asCall, clsHnd, (unsigned int)len->AsIntCon()->gtIconVal,
                                                       blockSize, block, stmt);
                     m_HeapLocalToStackLocalMap.AddOrUpdate(lclNum, stackLclNum);
+                    m_LocalArrToLenMap.AddOrUpdate(stackLclNum, (unsigned int)len->AsIntCon()->gtIconVal);
                     // We keep the set of possibly-stack-pointing pointers as a superset of the set of
                     // definitely-stack-pointing pointers. All definitely-stack-pointing pointers are in both sets.
                     MarkLclVarAsDefinitelyStackPointing(lclNum);
@@ -1196,11 +1197,14 @@ void ObjectAllocator::RewriteUses()
                 GenTreeIndexAddr* const gtIndexAddr = tree->AsIndexAddr();
                 GenTree* const          gtArr       = gtIndexAddr->Arr();
                 GenTree* const          gtInd       = gtIndexAddr->Index();
+                unsigned int            arrLen      = 0;
 
-                if (gtArr->OperIs(GT_LCL_ADDR) && gtInd->IsCnsIntOrI())
+                if (gtArr->OperIs(GT_LCL_ADDR) && gtInd->IsCnsIntOrI() &&
+                    m_allocator->m_LocalArrToLenMap.TryGetValue(gtArr->AsLclVarCommon()->GetLclNum(), &arrLen) &&
+                    gtInd->AsIntCon()->gtIconVal < arrLen)
                 {
                     JITDUMP("Rewriting INDEX_ADDR to ADD [%06u]\n", m_compiler->dspTreeID(tree));
-                    ssize_t offset =
+                    const ssize_t offset =
                         OFFSETOF__CORINFO_Array__data + gtInd->AsIntCon()->gtIconVal * gtIndexAddr->gtElemSize;
                     GenTree* const gtOffset = m_compiler->gtNewIconNode(offset, TYP_I_IMPL);
                     GenTree* const gtAdd    = m_compiler->gtNewOperNode(GT_ADD, TYP_BYREF, gtArr, gtOffset);
diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h
index f7ff0ffcb8bc1d..65fe381f24230a 100644
--- a/src/coreclr/jit/objectalloc.h
+++ b/src/coreclr/jit/objectalloc.h
@@ -22,6 +22,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 class ObjectAllocator final : public Phase
 {
     typedef SmallHashTable<unsigned int, unsigned int, 8U> LocalToLocalMap;
+    typedef SmallHashTable<unsigned int, unsigned int, 8U> LocalArrToLenMap;
 
     //===============================================================================
     // Data members
@@ -35,6 +36,7 @@ class ObjectAllocator final : public Phase
     BitVec              m_DefinitelyStackPointingPointers;
     BitVec              m_PointersHasLocalStore;
     LocalToLocalMap     m_HeapLocalToStackLocalMap;
+    LocalArrToLenMap    m_LocalArrToLenMap;
     BitSetShortLongRep* m_ConnGraphAdjacencyMatrix;
 
     //===============================================================================
@@ -91,6 +93,7 @@ inline ObjectAllocator::ObjectAllocator(Compiler* comp)
     , m_AnalysisDone(false)
     , m_bitVecTraits(comp->lvaCount, comp)
     , m_HeapLocalToStackLocalMap(comp->getAllocator())
+    , m_LocalArrToLenMap(comp->getAllocator())
 {
     m_EscapingPointers                = BitVecOps::UninitVal();
     m_PossiblyStackPointingPointers   = BitVecOps::UninitVal();

From b2d07da2a86bdbda8a5b913b9de7d148434b65d5 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Tue, 16 Jul 2024 02:06:42 +0900
Subject: [PATCH 07/75] throw range check failure

---
 src/coreclr/jit/objectalloc.cpp | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index ca75406fa5111b..028ce76295f5e0 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -1200,15 +1200,25 @@ void ObjectAllocator::RewriteUses()
                 unsigned int            arrLen      = 0;
 
                 if (gtArr->OperIs(GT_LCL_ADDR) && gtInd->IsCnsIntOrI() &&
-                    m_allocator->m_LocalArrToLenMap.TryGetValue(gtArr->AsLclVarCommon()->GetLclNum(), &arrLen) &&
-                    gtInd->AsIntCon()->gtIconVal < arrLen)
+                    m_allocator->m_LocalArrToLenMap.TryGetValue(gtArr->AsLclVarCommon()->GetLclNum(), &arrLen))
                 {
-                    JITDUMP("Rewriting INDEX_ADDR to ADD [%06u]\n", m_compiler->dspTreeID(tree));
-                    const ssize_t offset =
-                        OFFSETOF__CORINFO_Array__data + gtInd->AsIntCon()->gtIconVal * gtIndexAddr->gtElemSize;
-                    GenTree* const gtOffset = m_compiler->gtNewIconNode(offset, TYP_I_IMPL);
-                    GenTree* const gtAdd    = m_compiler->gtNewOperNode(GT_ADD, TYP_BYREF, gtArr, gtOffset);
-                    *use                    = gtAdd;
+                    if (gtInd->AsIntCon()->gtIconVal < arrLen)
+                    {
+                        JITDUMP("Rewriting INDEX_ADDR to ADD [%06u]\n", m_compiler->dspTreeID(tree));
+                        const ssize_t offset =
+                            OFFSETOF__CORINFO_Array__data + gtInd->AsIntCon()->gtIconVal * gtIndexAddr->gtElemSize;
+                        GenTree* const gtOffset = m_compiler->gtNewIconNode(offset, TYP_I_IMPL);
+                        GenTree* const gtAdd    = m_compiler->gtNewOperNode(GT_ADD, TYP_BYREF, gtArr, gtOffset);
+                        *use                    = gtAdd;
+                    }
+                    else
+                    {
+                        JITDUMP("Rewriting INDEX_ADDR to RNGCHKFAIL helper call [%06u]\n", m_compiler->dspTreeID(tree));
+                        GenTree* const gtRngChkFail =
+                            m_compiler->gtNewMustThrowException(CORINFO_HELP_RNGCHKFAIL, gtIndexAddr->gtType,
+                                                                gtIndexAddr->gtStructElemClass);
+                        *use = gtRngChkFail;
+                    }
                 }
             }
             // Rewrite ARR_LENGTH to LCL_FLD for stack-allocated arrays.

From b5ae9e72fcd498618a04d7b6c0915944c4ae8b53 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Tue, 16 Jul 2024 02:13:12 +0900
Subject: [PATCH 08/75] update comments

---
 src/coreclr/jit/objectalloc.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 028ce76295f5e0..2e170f36ca213b 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -700,12 +700,12 @@ unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall*
 
     comp->fgInsertStmtBefore(block, stmt, initStmt);
 
-    // Initialize the length slot.
+    // Initialize the array length.
     //
     //------------------------------------------------------------------------
     // STMTx (IL 0x... ???)
-    //   * STORE_LCL_FLD    long
-    //   \--*  CNS_INT    long
+    //   * STORE_LCL_FLD    int
+    //   \--*  CNS_INT    int
     //------------------------------------------------------------------------
 
     // Pass the total length of the array.

From 87b29de78f98c0609a43a731fff874b696355c7a Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Tue, 16 Jul 2024 02:15:36 +0900
Subject: [PATCH 09/75] add metrics

---
 src/coreclr/jit/jitmetadatalist.h |  1 +
 src/coreclr/jit/objectalloc.cpp   | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/src/coreclr/jit/jitmetadatalist.h b/src/coreclr/jit/jitmetadatalist.h
index 4aad9abd1b3ccb..225892a290d7bc 100644
--- a/src/coreclr/jit/jitmetadatalist.h
+++ b/src/coreclr/jit/jitmetadatalist.h
@@ -76,6 +76,7 @@ JITMETADATAMETRIC(NewRefClassHelperCalls,                int,              0)
 JITMETADATAMETRIC(StackAllocatedRefClasses,              int,              0)
 JITMETADATAMETRIC(NewBoxedValueClassHelperCalls,         int,              0)
 JITMETADATAMETRIC(StackAllocatedBoxedValueClasses,       int,              0)
+JITMETADATAMETRIC(StackAllocatedArrays,                  int,              0)
 
 #undef JITMETADATA
 #undef JITMETADATAINFO
diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 2e170f36ca213b..4241296819515f 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -490,6 +490,16 @@ bool ObjectAllocator::MorphAllocObjNodes()
                     comp->optMethodFlags |= OMF_HAS_OBJSTACKALLOC;
                     didStackAllocate = true;
                 }
+
+                if (canStack)
+                {
+                    comp->Metrics.StackAllocatedArrays++;
+                }
+                else
+                {
+                    assert(onHeapReason != nullptr);
+                    JITDUMP("Allocating V%02u on the heap: %s\n", lclNum, onHeapReason);
+                }
             }
 
             if (canonicalAllocObjFound)

From eeb681da150dfac5033fbe62bbdc750b64c19ff6 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Tue, 16 Jul 2024 03:10:35 +0900
Subject: [PATCH 10/75] minor cleanup

---
 src/coreclr/jit/jitmetadatalist.h | 1 +
 src/coreclr/jit/objectalloc.cpp   | 4 ++--
 src/coreclr/jit/objectalloc.h     | 4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/coreclr/jit/jitmetadatalist.h b/src/coreclr/jit/jitmetadatalist.h
index 225892a290d7bc..30733553b85640 100644
--- a/src/coreclr/jit/jitmetadatalist.h
+++ b/src/coreclr/jit/jitmetadatalist.h
@@ -76,6 +76,7 @@ JITMETADATAMETRIC(NewRefClassHelperCalls,                int,              0)
 JITMETADATAMETRIC(StackAllocatedRefClasses,              int,              0)
 JITMETADATAMETRIC(NewBoxedValueClassHelperCalls,         int,              0)
 JITMETADATAMETRIC(StackAllocatedBoxedValueClasses,       int,              0)
+JITMETADATAMETRIC(NewArrayHelperCalls,                   int,              0)
 JITMETADATAMETRIC(StackAllocatedArrays,                  int,              0)
 
 #undef JITMETADATA
diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 4241296819515f..6483c3b177c7a5 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -543,7 +543,7 @@ bool ObjectAllocator::MorphAllocObjNodes()
                     onHeapReason = "[alloc in loop]";
                     canStack     = false;
                 }
-                else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, 0, NULL, &onHeapReason))
+                else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, 0, nullptr, &onHeapReason))
                 {
                     // reason set by the call
                     canStack = false;
@@ -1212,7 +1212,7 @@ void ObjectAllocator::RewriteUses()
                 if (gtArr->OperIs(GT_LCL_ADDR) && gtInd->IsCnsIntOrI() &&
                     m_allocator->m_LocalArrToLenMap.TryGetValue(gtArr->AsLclVarCommon()->GetLclNum(), &arrLen))
                 {
-                    if (gtInd->AsIntCon()->gtIconVal < arrLen)
+                    if ((unsigned int)gtInd->AsIntCon()->gtIconVal < arrLen)
                     {
                         JITDUMP("Rewriting INDEX_ADDR to ADD [%06u]\n", m_compiler->dspTreeID(tree));
                         const ssize_t offset =
diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h
index 65fe381f24230a..099db4b3016ac8 100644
--- a/src/coreclr/jit/objectalloc.h
+++ b/src/coreclr/jit/objectalloc.h
@@ -167,7 +167,7 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(
     else if (comp->info.compCompHnd->isSDArray(clsHnd))
     {
         CorInfoType type = comp->info.compCompHnd->getChildType(clsHnd, &clsHnd);
-        if (type != CORINFO_TYPE_UNDEF && clsHnd == NULL)
+        if (type != CORINFO_TYPE_UNDEF && clsHnd == nullptr)
         {
             // This is an array of primitive types
             classSize = (unsigned int)OFFSETOF__CORINFO_Array__data + genTypeSize(JITtype2varType(type)) * length;
@@ -214,7 +214,7 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(
         return false;
     }
 
-    if (blockSize != NULL)
+    if (blockSize != nullptr)
     {
         *blockSize = (unsigned int)classSize;
     }

From dee9f389b7d16e30c4cd04f53763723dd89e13d0 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Tue, 16 Jul 2024 14:13:23 +0900
Subject: [PATCH 11/75] Introduce new temp and implement local address morphing

---
 src/coreclr/jit/importer.cpp    | 15 ++++++-
 src/coreclr/jit/lclmorph.cpp    | 42 ++++++++++++++++++
 src/coreclr/jit/objectalloc.cpp | 79 ++++-----------------------------
 src/coreclr/jit/objectalloc.h   |  5 ---
 4 files changed, 64 insertions(+), 77 deletions(-)

diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index a84f3db3ff473d..95030237765244 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -9533,9 +9533,22 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 // Remember that this function contains 'new' of an SD array.
                 optMethodFlags |= OMF_HAS_NEWARRAY;
 
+                // We assign the newly allocated object (by a GT_CALL to newarr node)
+                // to a temp. Note that the pattern "temp = allocArr" is required
+                // by ObjectAllocator phase to be able to determine newarr nodes
+                // without exhaustive walk over all expressions.
+                lclNum = lvaGrabTemp(true DEBUGARG("NewArr temp"));
+
+                impStoreToTemp(lclNum, op1, CHECK_SPILL_NONE);
+
+                assert(lvaTable[lclNum].lvSingleDef == 0);
+                lvaTable[lclNum].lvSingleDef = 1;
+                JITDUMP("Marked V%02u as a single def local\n", lclNum);
+                lvaSetClass(lclNum, resolvedToken.hClass, true /* is Exact */);
+
                 /* Push the result of the call on the stack */
 
-                impPushOnStack(op1, tiRetVal);
+                impPushOnStack(gtNewLclvNode(lclNum, TYP_REF), tiRetVal);
 
                 callTyp = TYP_REF;
             }
diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp
index 2a9d0f9a36914a..2a03eacefac2fc 100644
--- a/src/coreclr/jit/lclmorph.cpp
+++ b/src/coreclr/jit/lclmorph.cpp
@@ -826,6 +826,48 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
 
         switch (node->OperGet())
         {
+            case GT_INDEX_ADDR:
+            {
+                assert(TopValue(2).Node() == node);
+                assert(TopValue(1).Node() == node->gtGetOp1());
+                assert(TopValue(0).Node() == node->gtGetOp2());
+
+                if (node->gtGetOp2()->IsCnsIntOrI())
+                {
+                    ssize_t offset = OFFSETOF__CORINFO_Array__data +
+                                     node->gtGetOp2()->AsIntCon()->IconValue() * node->AsIndexAddr()->gtElemSize;
+                    if (FitsIn<unsigned>(offset) && TopValue(2).AddOffset(TopValue(1), static_cast<unsigned>(offset)))
+                    {
+                        INDEBUG(TopValue(0).Consume());
+                        PopValue();
+                        PopValue();
+                        break;
+                    }
+                }
+
+                EscapeValue(TopValue(0), node);
+                PopValue();
+                EscapeValue(TopValue(0), node);
+                PopValue();
+                break;
+            }
+            case GT_ARR_LENGTH:
+            {
+                Value& arr = TopValue(0);
+                EscapeValue(arr, node);
+                PopValue();
+
+                GenTree* gtArr = arr.Node();
+
+                if (gtArr->OperIs(GT_LCL_ADDR))
+                {
+                    unsigned int lclNum = gtArr->AsLclVarCommon()->GetLclNum();
+                    GenTree* gtLclFld   = m_compiler->gtNewLclFldNode(lclNum, TYP_INT, OFFSETOF__CORINFO_Array__length);
+                    SequenceLocal(gtLclFld->AsLclVarCommon());
+                    *use = node = gtLclFld;
+                }
+                break;
+            }
             case GT_STORE_LCL_FLD:
                 if (node->IsPartialLclFld(m_compiler))
                 {
diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 6483c3b177c7a5..52565b60599f54 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -89,27 +89,6 @@ void ObjectAllocator::MarkLclVarAsEscaping(unsigned int lclNum)
     BitVecOps::AddElemD(&m_bitVecTraits, m_EscapingPointers, lclNum);
 }
 
-//------------------------------------------------------------------------------
-// MarkLclVarHasLocalStore : Mark local variable having local store.
-//
-//
-// Arguments:
-//    lclNum  - Pointing local variable number
-// Returns:
-//    true if the local variable is first marked as having local store
-//    false if the local variable is already marked as having local store
-
-bool ObjectAllocator::MarkLclVarHasLocalStore(unsigned int lclNum)
-{
-    if (BitVecOps::IsMember(&m_bitVecTraits, m_PointersHasLocalStore, lclNum))
-    {
-        return false;
-    }
-
-    BitVecOps::AddElemD(&m_bitVecTraits, m_PointersHasLocalStore, lclNum);
-    return true;
-}
-
 //------------------------------------------------------------------------------
 // MarkLclVarAsPossiblyStackPointing : Mark local variable as possibly pointing
 //                                     to a stack-allocated object.
@@ -161,7 +140,6 @@ void ObjectAllocator::DoAnalysis()
     if (comp->lvaCount > 0)
     {
         m_EscapingPointers         = BitVecOps::MakeEmpty(&m_bitVecTraits);
-        m_PointersHasLocalStore    = BitVecOps::MakeEmpty(&m_bitVecTraits);
         m_ConnGraphAdjacencyMatrix = new (comp->getAllocator(CMK_ObjectAllocator)) BitSetShortLongRep[comp->lvaCount];
 
         MarkEscapingVarsAndBuildConnGraph();
@@ -223,9 +201,7 @@ void ObjectAllocator::MarkEscapingVarsAndBuildConnGraph()
 
             if (tree->OperIsLocalStore())
             {
-                // conservatively marking locals being reassigned as escaping
-                // this can happen on arrays
-                lclEscapes = !m_allocator->MarkLclVarHasLocalStore(lclNum);
+                lclEscapes = false;
             }
             else if (tree->OperIs(GT_LCL_VAR) && tree->TypeIs(TYP_REF, TYP_BYREF, TYP_I_IMPL))
             {
@@ -481,7 +457,6 @@ bool ObjectAllocator::MorphAllocObjNodes()
                         MorphNewArrNodeIntoStackAlloc(asCall, clsHnd, (unsigned int)len->AsIntCon()->gtIconVal,
                                                       blockSize, block, stmt);
                     m_HeapLocalToStackLocalMap.AddOrUpdate(lclNum, stackLclNum);
-                    m_LocalArrToLenMap.AddOrUpdate(stackLclNum, (unsigned int)len->AsIntCon()->gtIconVal);
                     // We keep the set of possibly-stack-pointing pointers as a superset of the set of
                     // definitely-stack-pointing pointers. All definitely-stack-pointing pointers are in both sets.
                     MarkLclVarAsDefinitelyStackPointing(lclNum);
@@ -1096,11 +1071,17 @@ void ObjectAllocator::RewriteUses()
 
                 if (lclVarDsc->lvType != newType)
                 {
-                    JITDUMP("changing the type of V%02u from %s to %s\n", lclNum, varTypeName(lclVarDsc->lvType),
+                    JITDUMP("Changing the type of V%02u from %s to %s\n", lclNum, varTypeName(lclVarDsc->lvType),
                             varTypeName(newType));
                     lclVarDsc->lvType = newType;
                 }
                 m_allocator->UpdateAncestorTypes(tree, &m_ancestors, newType);
+
+                if (newLclNum != BAD_VAR_NUM)
+                {
+                    JITDUMP("Update V%02u to V%02u from use [%06u]\n", lclNum, newLclNum, m_compiler->dspTreeID(tree));
+                    DISPTREE(tree);
+                }
             }
 
             return Compiler::fgWalkResult::WALK_CONTINUE;
@@ -1201,50 +1182,6 @@ void ObjectAllocator::RewriteUses()
                     }
                 }
             }
-            // Rewrite INDEX_ADDR to ADD for stack-allocated arrays.
-            else if (tree->OperIs(GT_INDEX_ADDR))
-            {
-                GenTreeIndexAddr* const gtIndexAddr = tree->AsIndexAddr();
-                GenTree* const          gtArr       = gtIndexAddr->Arr();
-                GenTree* const          gtInd       = gtIndexAddr->Index();
-                unsigned int            arrLen      = 0;
-
-                if (gtArr->OperIs(GT_LCL_ADDR) && gtInd->IsCnsIntOrI() &&
-                    m_allocator->m_LocalArrToLenMap.TryGetValue(gtArr->AsLclVarCommon()->GetLclNum(), &arrLen))
-                {
-                    if ((unsigned int)gtInd->AsIntCon()->gtIconVal < arrLen)
-                    {
-                        JITDUMP("Rewriting INDEX_ADDR to ADD [%06u]\n", m_compiler->dspTreeID(tree));
-                        const ssize_t offset =
-                            OFFSETOF__CORINFO_Array__data + gtInd->AsIntCon()->gtIconVal * gtIndexAddr->gtElemSize;
-                        GenTree* const gtOffset = m_compiler->gtNewIconNode(offset, TYP_I_IMPL);
-                        GenTree* const gtAdd    = m_compiler->gtNewOperNode(GT_ADD, TYP_BYREF, gtArr, gtOffset);
-                        *use                    = gtAdd;
-                    }
-                    else
-                    {
-                        JITDUMP("Rewriting INDEX_ADDR to RNGCHKFAIL helper call [%06u]\n", m_compiler->dspTreeID(tree));
-                        GenTree* const gtRngChkFail =
-                            m_compiler->gtNewMustThrowException(CORINFO_HELP_RNGCHKFAIL, gtIndexAddr->gtType,
-                                                                gtIndexAddr->gtStructElemClass);
-                        *use = gtRngChkFail;
-                    }
-                }
-            }
-            // Rewrite ARR_LENGTH to LCL_FLD for stack-allocated arrays.
-            else if (tree->OperIsArrLength())
-            {
-                GenTreeArrLen* const gtArrLen = tree->AsArrLen();
-                GenTree* const       gtArr    = gtArrLen->ArrRef();
-
-                if (gtArr->OperIs(GT_LCL_ADDR))
-                {
-                    JITDUMP("Rewriting ARR_LENGTH to LCL_FLD [%06u]\n", m_compiler->dspTreeID(tree));
-                    GenTree* const gtLclFld = m_compiler->gtNewLclFldNode(gtArr->AsLclVarCommon()->GetLclNum(), TYP_INT,
-                                                                          OFFSETOF__CORINFO_Array__length);
-                    *use                    = gtLclFld;
-                }
-            }
 
             return Compiler::fgWalkResult::WALK_CONTINUE;
         }
diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h
index 099db4b3016ac8..790c53d8cab669 100644
--- a/src/coreclr/jit/objectalloc.h
+++ b/src/coreclr/jit/objectalloc.h
@@ -22,7 +22,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 class ObjectAllocator final : public Phase
 {
     typedef SmallHashTable<unsigned int, unsigned int, 8U> LocalToLocalMap;
-    typedef SmallHashTable<unsigned int, unsigned int, 8U> LocalArrToLenMap;
 
     //===============================================================================
     // Data members
@@ -34,9 +33,7 @@ class ObjectAllocator final : public Phase
     // definitely-stack-pointing pointers. All definitely-stack-pointing pointers are in both sets.
     BitVec              m_PossiblyStackPointingPointers;
     BitVec              m_DefinitelyStackPointingPointers;
-    BitVec              m_PointersHasLocalStore;
     LocalToLocalMap     m_HeapLocalToStackLocalMap;
-    LocalArrToLenMap    m_LocalArrToLenMap;
     BitSetShortLongRep* m_ConnGraphAdjacencyMatrix;
 
     //===============================================================================
@@ -62,7 +59,6 @@ class ObjectAllocator final : public Phase
     bool         DoesLclVarPointToStack(unsigned int lclNum);
     void         DoAnalysis();
     void         MarkLclVarAsEscaping(unsigned int lclNum);
-    bool         MarkLclVarHasLocalStore(unsigned int lclNum);
     void         MarkEscapingVarsAndBuildConnGraph();
     void         AddConnGraphEdge(unsigned int sourceLclNum, unsigned int targetLclNum);
     void         ComputeEscapingNodes(BitVecTraits* bitVecTraits, BitVec& escapingNodes);
@@ -93,7 +89,6 @@ inline ObjectAllocator::ObjectAllocator(Compiler* comp)
     , m_AnalysisDone(false)
     , m_bitVecTraits(comp->lvaCount, comp)
     , m_HeapLocalToStackLocalMap(comp->getAllocator())
-    , m_LocalArrToLenMap(comp->getAllocator())
 {
     m_EscapingPointers                = BitVecOps::UninitVal();
     m_PossiblyStackPointingPointers   = BitVecOps::UninitVal();

From 94c103bec16ad52d39e9fc3718ce25bb0e782e11 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Tue, 16 Jul 2024 16:32:50 +0900
Subject: [PATCH 12/75] handle index out-of-range

---
 src/coreclr/jit/lclmorph.cpp    | 62 +++++++++++++++++++++++----------
 src/coreclr/jit/objectalloc.cpp |  6 ++--
 2 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp
index 2a03eacefac2fc..de4c2b2112e6f6 100644
--- a/src/coreclr/jit/lclmorph.cpp
+++ b/src/coreclr/jit/lclmorph.cpp
@@ -832,16 +832,34 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
                 assert(TopValue(1).Node() == node->gtGetOp1());
                 assert(TopValue(0).Node() == node->gtGetOp2());
 
-                if (node->gtGetOp2()->IsCnsIntOrI())
+                if (node->gtGetOp1()->OperIs(GT_LCL_VAR, GT_LCL_ADDR) && node->gtGetOp2()->IsCnsIntOrI())
                 {
-                    ssize_t offset = OFFSETOF__CORINFO_Array__data +
-                                     node->gtGetOp2()->AsIntCon()->IconValue() * node->AsIndexAddr()->gtElemSize;
-                    if (FitsIn<unsigned>(offset) && TopValue(2).AddOffset(TopValue(1), static_cast<unsigned>(offset)))
+                    if (TopValue(1).IsAddress() &&
+                        m_compiler->lvaGetDesc(TopValue(1).LclNum())->GetLayout()->IsBlockLayout())
                     {
-                        INDEBUG(TopValue(0).Consume());
-                        PopValue();
-                        PopValue();
-                        break;
+                        ssize_t offset = node->AsIndexAddr()->gtElemOffset +
+                                         node->gtGetOp2()->AsIntCon()->IconValue() * node->AsIndexAddr()->gtElemSize;
+
+                        if (offset < m_compiler->lvaLclSize(TopValue(1).LclNum()))
+                        {
+                            if (FitsIn<unsigned>(offset) &&
+                                TopValue(2).AddOffset(TopValue(1), static_cast<unsigned>(offset)))
+                            {
+                                INDEBUG(TopValue(0).Consume());
+                                PopValue();
+                                PopValue();
+                                break;
+                            }
+                        }
+                        else
+                        {
+                            GenTree* gtThrow =
+                                m_compiler->gtNewMustThrowException(CORINFO_HELP_RNGCHKFAIL, node->TypeGet(),
+                                                                    node->AsIndexAddr()->gtStructElemClass);
+                            m_compiler->lvaGetDesc(gtThrow->AsOp()->gtOp2->AsLclVarCommon())->incLvRefCnt(1, RCS_EARLY);
+                            *use           = gtThrow;
+                            m_stmtModified = true;
+                        }
                     }
                 }
 
@@ -853,19 +871,27 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
             }
             case GT_ARR_LENGTH:
             {
-                Value& arr = TopValue(0);
-                EscapeValue(arr, node);
-                PopValue();
-
-                GenTree* gtArr = arr.Node();
+                assert(TopValue(1).Node() == node);
+                assert(TopValue(0).Node() == node->gtGetOp1());
 
-                if (gtArr->OperIs(GT_LCL_ADDR))
+                if (node->gtGetOp1()->OperIs(GT_LCL_VAR, GT_LCL_ADDR))
                 {
-                    unsigned int lclNum = gtArr->AsLclVarCommon()->GetLclNum();
-                    GenTree* gtLclFld   = m_compiler->gtNewLclFldNode(lclNum, TYP_INT, OFFSETOF__CORINFO_Array__length);
-                    SequenceLocal(gtLclFld->AsLclVarCommon());
-                    *use = node = gtLclFld;
+                    if (TopValue(0).IsAddress() &&
+                        m_compiler->lvaGetDesc(TopValue(0).LclNum())->GetLayout()->IsBlockLayout())
+                    {
+                        GenTree* gtLclFld =
+                            m_compiler->gtNewLclFldNode(TopValue(0).LclNum(), TYP_INT, OFFSETOF__CORINFO_Array__length);
+                        SequenceLocal(gtLclFld->AsLclVarCommon());
+                        *use           = gtLclFld;
+                        m_stmtModified = true;
+                        INDEBUG(TopValue(0).Consume());
+                        PopValue();
+                        break;
+                    }
                 }
+
+                EscapeValue(TopValue(0), node);
+                PopValue();
                 break;
             }
             case GT_STORE_LCL_FLD:
diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 52565b60599f54..cf2767325f72ed 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -443,8 +443,8 @@ bool ObjectAllocator::MorphAllocObjNodes()
                     onHeapReason = "[non-constant size]";
                     canStack     = false;
                 }
-                else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, (unsigned int)len->AsIntCon()->gtIconVal, &blockSize,
-                                                   &onHeapReason))
+                else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, (unsigned int)len->AsIntCon()->IconValue(),
+                                                   &blockSize, &onHeapReason))
                 {
                     // reason set by the call
                     canStack = false;
@@ -454,7 +454,7 @@ bool ObjectAllocator::MorphAllocObjNodes()
                     JITDUMP("Allocating V%02u on the stack\n", lclNum);
                     canStack = true;
                     const unsigned int stackLclNum =
-                        MorphNewArrNodeIntoStackAlloc(asCall, clsHnd, (unsigned int)len->AsIntCon()->gtIconVal,
+                        MorphNewArrNodeIntoStackAlloc(asCall, clsHnd, (unsigned int)len->AsIntCon()->IconValue(),
                                                       blockSize, block, stmt);
                     m_HeapLocalToStackLocalMap.AddOrUpdate(lclNum, stackLclNum);
                     // We keep the set of possibly-stack-pointing pointers as a superset of the set of

From 12b297ba427591b68d264b6d131d94445cf65864 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Tue, 16 Jul 2024 16:47:24 +0900
Subject: [PATCH 13/75] Refactor to remove duplicates

---
 src/coreclr/jit/objectalloc.cpp | 237 +++++++++++++++-----------------
 src/coreclr/jit/objectalloc.h   |   6 +
 2 files changed, 119 insertions(+), 124 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index cf2767325f72ed..8b747df0e0bc5c 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -390,8 +390,7 @@ bool ObjectAllocator::MorphAllocObjNodes()
             GenTree* stmtExpr = stmt->GetRootNode();
             GenTree* data     = nullptr;
 
-            bool canonicalAllocObjFound = false;
-            bool canonicalAllocArrFound = false;
+            ObjectAllocationType allocType = OAT_NONE;
 
             if (stmtExpr->OperIs(GT_STORE_LCL_VAR) && stmtExpr->TypeIs(TYP_REF))
             {
@@ -399,7 +398,7 @@ bool ObjectAllocator::MorphAllocObjNodes()
 
                 if (data->OperGet() == GT_ALLOCOBJ)
                 {
-                    canonicalAllocObjFound = true;
+                    allocType = OAT_NEWOBJ;
                 }
                 else if (data->IsHelperCall())
                 {
@@ -407,24 +406,16 @@ bool ObjectAllocator::MorphAllocObjNodes()
                     if (call->GetHelperNum() == CORINFO_HELP_NEWARR_1_VC &&
                         call->gtArgs.GetArgByIndex(0)->GetNode()->IsCnsIntOrI())
                     {
-                        canonicalAllocArrFound = true;
+                        allocType = OAT_NEWARR;
                     }
                 }
             }
 
-            if (canonicalAllocArrFound)
+            if (allocType != OAT_NONE)
             {
-                GenTreeCall* asCall = data->AsCall();
-                assert(asCall->GetHelperNum() == CORINFO_HELP_NEWARR_1_VC);
-
-                unsigned int         lclNum       = stmtExpr->AsLclVar()->GetLclNum();
-                CallArg*             arg          = asCall->gtArgs.GetArgByIndex(0);
-                GenTree*             node         = arg->GetNode();
-                CORINFO_CLASS_HANDLE clsHnd       = (CORINFO_CLASS_HANDLE)node->AsIntConCommon()->IntegralValue();
-                GenTree*             len          = arg->GetNext()->GetNode();
-                const char*          onHeapReason = nullptr;
-                unsigned int         blockSize    = 0;
-                bool                 canStack     = false;
+                bool         canStack     = false;
+                const char*  onHeapReason = nullptr;
+                unsigned int lclNum       = stmtExpr->AsLclVar()->GetLclNum();
 
                 // Don't attempt to do stack allocations inside basic blocks that may be in a loop.
                 //
@@ -438,131 +429,129 @@ bool ObjectAllocator::MorphAllocObjNodes()
                     onHeapReason = "[alloc in loop]";
                     canStack     = false;
                 }
-                else if (!len->IsCnsIntOrI())
-                {
-                    onHeapReason = "[non-constant size]";
-                    canStack     = false;
-                }
-                else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, (unsigned int)len->AsIntCon()->IconValue(),
-                                                   &blockSize, &onHeapReason))
-                {
-                    // reason set by the call
-                    canStack = false;
-                }
                 else
                 {
-                    JITDUMP("Allocating V%02u on the stack\n", lclNum);
-                    canStack = true;
-                    const unsigned int stackLclNum =
-                        MorphNewArrNodeIntoStackAlloc(asCall, clsHnd, (unsigned int)len->AsIntCon()->IconValue(),
-                                                      blockSize, block, stmt);
-                    m_HeapLocalToStackLocalMap.AddOrUpdate(lclNum, stackLclNum);
-                    // We keep the set of possibly-stack-pointing pointers as a superset of the set of
-                    // definitely-stack-pointing pointers. All definitely-stack-pointing pointers are in both sets.
-                    MarkLclVarAsDefinitelyStackPointing(lclNum);
-                    MarkLclVarAsPossiblyStackPointing(lclNum);
-                    stmt->GetRootNode()->gtBashToNOP();
-                    comp->optMethodFlags |= OMF_HAS_OBJSTACKALLOC;
-                    didStackAllocate = true;
-                }
-
-                if (canStack)
-                {
-                    comp->Metrics.StackAllocatedArrays++;
-                }
-                else
-                {
-                    assert(onHeapReason != nullptr);
-                    JITDUMP("Allocating V%02u on the heap: %s\n", lclNum, onHeapReason);
-                }
-            }
+                    if (allocType == OAT_NEWARR)
+                    {
+                        assert(data->AsCall()->GetHelperNum() == CORINFO_HELP_NEWARR_1_VC);
+                        //------------------------------------------------------------------------
+                        // We expect the following expression tree at this point
+                        //  STMTx (IL 0x... ???)
+                        //    * STORE_LCL_VAR   ref
+                        //    \--*  CALL helper  ref
+                        //       +--*  CNS_INT(h) long
+                        //       \--*  CNS_INT long
+                        //------------------------------------------------------------------------
+
+                        CallArg*             arg       = data->AsCall()->gtArgs.GetArgByIndex(0);
+                        GenTree*             node      = arg->GetNode();
+                        CORINFO_CLASS_HANDLE clsHnd    = (CORINFO_CLASS_HANDLE)node->AsIntConCommon()->IntegralValue();
+                        GenTree*             len       = arg->GetNext()->GetNode();
+                        unsigned int         blockSize = 0;
+
+                        // Don't attempt to do stack allocations inside basic blocks that may be in a loop.
+                        //
+                        if (!len->IsCnsIntOrI())
+                        {
+                            onHeapReason = "[non-constant size]";
+                            canStack     = false;
+                        }
+                        else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, (unsigned int)len->AsIntCon()->IconValue(),
+                                                           &blockSize, &onHeapReason))
+                        {
+                            // reason set by the call
+                            canStack = false;
+                        }
+                        else
+                        {
+                            JITDUMP("Allocating V%02u on the stack\n", lclNum);
+                            canStack = true;
+                            const unsigned int stackLclNum =
+                                MorphNewArrNodeIntoStackAlloc(data->AsCall(), clsHnd,
+                                                              (unsigned int)len->AsIntCon()->IconValue(), blockSize,
+                                                              block, stmt);
+                            m_HeapLocalToStackLocalMap.AddOrUpdate(lclNum, stackLclNum);
+                            comp->Metrics.StackAllocatedArrays++;
+                        }
+                    }
+                    else if (allocType == OAT_NEWOBJ)
+                    {
+                        assert(basicBlockHasNewObj);
+                        //------------------------------------------------------------------------
+                        // We expect the following expression tree at this point
+                        //  STMTx (IL 0x... ???)
+                        //    * STORE_LCL_VAR   ref
+                        //    \--*  ALLOCOBJ  ref
+                        //       \--*  CNS_INT(h) long
+                        //------------------------------------------------------------------------
+
+                        CORINFO_CLASS_HANDLE clsHnd       = data->AsAllocObj()->gtAllocObjClsHnd;
+                        CORINFO_CLASS_HANDLE stackClsHnd  = clsHnd;
+                        const bool           isValueClass = comp->info.compCompHnd->isValueClass(clsHnd);
+
+                        if (isValueClass)
+                        {
+                            comp->Metrics.NewBoxedValueClassHelperCalls++;
+                            stackClsHnd = comp->info.compCompHnd->getTypeForBoxOnStack(clsHnd);
+                        }
+                        else
+                        {
+                            comp->Metrics.NewRefClassHelperCalls++;
+                        }
 
-            if (canonicalAllocObjFound)
-            {
-                assert(basicBlockHasNewObj);
-                //------------------------------------------------------------------------
-                // We expect the following expression tree at this point
-                //  STMTx (IL 0x... ???)
-                //    * STORE_LCL_VAR   ref
-                //    \--*  ALLOCOBJ  ref
-                //       \--*  CNS_INT(h) long
-                //------------------------------------------------------------------------
-
-                GenTreeAllocObj*     asAllocObj   = data->AsAllocObj();
-                unsigned int         lclNum       = stmtExpr->AsLclVar()->GetLclNum();
-                CORINFO_CLASS_HANDLE clsHnd       = data->AsAllocObj()->gtAllocObjClsHnd;
-                CORINFO_CLASS_HANDLE stackClsHnd  = clsHnd;
-                const bool           isValueClass = comp->info.compCompHnd->isValueClass(clsHnd);
-                const char*          onHeapReason = nullptr;
-                bool                 canStack     = false;
-
-                if (isValueClass)
-                {
-                    comp->Metrics.NewBoxedValueClassHelperCalls++;
-                    stackClsHnd = comp->info.compCompHnd->getTypeForBoxOnStack(clsHnd);
-                }
-                else
-                {
-                    comp->Metrics.NewRefClassHelperCalls++;
+                        if (!CanAllocateLclVarOnStack(lclNum, clsHnd, 0, nullptr, &onHeapReason))
+                        {
+                            // reason set by the call
+                            canStack = false;
+                        }
+                        else if (stackClsHnd == NO_CLASS_HANDLE)
+                        {
+                            assert(isValueClass);
+                            onHeapReason = "[no class handle for this boxed value class]";
+                            canStack     = false;
+                        }
+                        else
+                        {
+                            JITDUMP("Allocating V%02u on the stack\n", lclNum);
+                            canStack = true;
+                            const unsigned int stackLclNum =
+                                MorphAllocObjNodeIntoStackAlloc(data->AsAllocObj(), stackClsHnd, isValueClass, block,
+                                                                stmt);
+                            m_HeapLocalToStackLocalMap.AddOrUpdate(lclNum, stackLclNum);
+
+                            if (isValueClass)
+                            {
+                                comp->Metrics.StackAllocatedBoxedValueClasses++;
+                            }
+                            else
+                            {
+                                comp->Metrics.StackAllocatedRefClasses++;
+                            }
+                        }
+                    }
                 }
 
-                // Don't attempt to do stack allocations inside basic blocks that may be in a loop.
-                //
-                if (!IsObjectStackAllocationEnabled())
-                {
-                    onHeapReason = "[object stack allocation disabled]";
-                    canStack     = false;
-                }
-                else if (basicBlockHasBackwardJump)
-                {
-                    onHeapReason = "[alloc in loop]";
-                    canStack     = false;
-                }
-                else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, 0, nullptr, &onHeapReason))
-                {
-                    // reason set by the call
-                    canStack = false;
-                }
-                else if (stackClsHnd == NO_CLASS_HANDLE)
-                {
-                    assert(isValueClass);
-                    onHeapReason = "[no class handle for this boxed value class]";
-                    canStack     = false;
-                }
-                else
+                if (canStack)
                 {
-                    JITDUMP("Allocating V%02u on the stack\n", lclNum);
-                    canStack = true;
-                    const unsigned int stackLclNum =
-                        MorphAllocObjNodeIntoStackAlloc(asAllocObj, stackClsHnd, isValueClass, block, stmt);
-                    m_HeapLocalToStackLocalMap.AddOrUpdate(lclNum, stackLclNum);
                     // We keep the set of possibly-stack-pointing pointers as a superset of the set of
-                    // definitely-stack-pointing pointers. All definitely-stack-pointing pointers are in both sets.
+                    // definitely-stack-pointing pointers. All definitely-stack-pointing pointers are in both
+                    // sets.
                     MarkLclVarAsDefinitelyStackPointing(lclNum);
                     MarkLclVarAsPossiblyStackPointing(lclNum);
                     stmt->GetRootNode()->gtBashToNOP();
                     comp->optMethodFlags |= OMF_HAS_OBJSTACKALLOC;
                     didStackAllocate = true;
                 }
-
-                if (canStack)
-                {
-                    if (isValueClass)
-                    {
-                        comp->Metrics.StackAllocatedBoxedValueClasses++;
-                    }
-                    else
-                    {
-                        comp->Metrics.StackAllocatedRefClasses++;
-                    }
-                }
                 else
                 {
                     assert(onHeapReason != nullptr);
                     JITDUMP("Allocating V%02u on the heap: %s\n", lclNum, onHeapReason);
-                    data                         = MorphAllocObjNodeIntoHelperCall(asAllocObj);
-                    stmtExpr->AsLclVar()->Data() = data;
-                    stmtExpr->AddAllEffectsFlags(data);
+                    if (allocType == OAT_NEWOBJ)
+                    {
+                        data                         = MorphAllocObjNodeIntoHelperCall(data->AsAllocObj());
+                        stmtExpr->AsLclVar()->Data() = data;
+                        stmtExpr->AddAllEffectsFlags(data);
+                    }
                 }
             }
 #ifdef DEBUG
diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h
index 790c53d8cab669..e068f1a381540f 100644
--- a/src/coreclr/jit/objectalloc.h
+++ b/src/coreclr/jit/objectalloc.h
@@ -22,6 +22,12 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 class ObjectAllocator final : public Phase
 {
     typedef SmallHashTable<unsigned int, unsigned int, 8U> LocalToLocalMap;
+    enum ObjectAllocationType
+    {
+        OAT_NONE,
+        OAT_NEWOBJ,
+        OAT_NEWARR
+    };
 
     //===============================================================================
     // Data members

From e0fa91e9e8254f0273c39e0d9014123af01e1cfc Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Tue, 16 Jul 2024 17:11:26 +0900
Subject: [PATCH 14/75] Remove invalid asserts

---
 src/coreclr/jit/gentree.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h
index 5b7bc8278cab7e..e2c2fbb7992558 100644
--- a/src/coreclr/jit/gentree.h
+++ b/src/coreclr/jit/gentree.h
@@ -7458,12 +7458,11 @@ struct GenTreeArrAddr : GenTreeUnOp
 
 public:
     GenTreeArrAddr(GenTree* addr, var_types elemType, CORINFO_CLASS_HANDLE elemClassHandle, uint8_t firstElemOffset)
-        : GenTreeUnOp(GT_ARR_ADDR, TYP_BYREF, addr DEBUGARG(/* largeNode */ false))
+        : GenTreeUnOp(GT_ARR_ADDR, addr->TypeGet(), addr DEBUGARG(/* largeNode */ false))
         , m_elemClassHandle(elemClassHandle)
         , m_elemType(elemType)
         , m_firstElemOffset(firstElemOffset)
     {
-        assert(addr->TypeIs(TYP_BYREF));
         assert(((elemType == TYP_STRUCT) && (elemClassHandle != NO_CLASS_HANDLE)) ||
                (elemClassHandle == NO_CLASS_HANDLE));
     }

From 9e0a04fc31b22d95900e7239165fd11736b7d609 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Tue, 16 Jul 2024 17:12:44 +0900
Subject: [PATCH 15/75] make compiler happy

---
 src/coreclr/jit/lclmorph.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp
index de4c2b2112e6f6..3a783951f8eb08 100644
--- a/src/coreclr/jit/lclmorph.cpp
+++ b/src/coreclr/jit/lclmorph.cpp
@@ -840,7 +840,7 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
                         ssize_t offset = node->AsIndexAddr()->gtElemOffset +
                                          node->gtGetOp2()->AsIntCon()->IconValue() * node->AsIndexAddr()->gtElemSize;
 
-                        if (offset < m_compiler->lvaLclSize(TopValue(1).LclNum()))
+                        if (offset < static_cast<ssize_t>(m_compiler->lvaLclSize(TopValue(1).LclNum())))
                         {
                             if (FitsIn<unsigned>(offset) &&
                                 TopValue(2).AddOffset(TopValue(1), static_cast<unsigned>(offset)))

From ae822f81d93bda42daa1034bdd833908d05ebab6 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Tue, 16 Jul 2024 19:03:51 +0900
Subject: [PATCH 16/75] Address review feedbacks

---
 src/coreclr/jit/lclmorph.cpp    | 135 ++++++++++++++++----------------
 src/coreclr/jit/objectalloc.cpp |  50 ++++++++----
 src/coreclr/jit/objectalloc.h   |  20 ++---
 3 files changed, 111 insertions(+), 94 deletions(-)

diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp
index 3a783951f8eb08..406e49e5009753 100644
--- a/src/coreclr/jit/lclmorph.cpp
+++ b/src/coreclr/jit/lclmorph.cpp
@@ -826,74 +826,6 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
 
         switch (node->OperGet())
         {
-            case GT_INDEX_ADDR:
-            {
-                assert(TopValue(2).Node() == node);
-                assert(TopValue(1).Node() == node->gtGetOp1());
-                assert(TopValue(0).Node() == node->gtGetOp2());
-
-                if (node->gtGetOp1()->OperIs(GT_LCL_VAR, GT_LCL_ADDR) && node->gtGetOp2()->IsCnsIntOrI())
-                {
-                    if (TopValue(1).IsAddress() &&
-                        m_compiler->lvaGetDesc(TopValue(1).LclNum())->GetLayout()->IsBlockLayout())
-                    {
-                        ssize_t offset = node->AsIndexAddr()->gtElemOffset +
-                                         node->gtGetOp2()->AsIntCon()->IconValue() * node->AsIndexAddr()->gtElemSize;
-
-                        if (offset < static_cast<ssize_t>(m_compiler->lvaLclSize(TopValue(1).LclNum())))
-                        {
-                            if (FitsIn<unsigned>(offset) &&
-                                TopValue(2).AddOffset(TopValue(1), static_cast<unsigned>(offset)))
-                            {
-                                INDEBUG(TopValue(0).Consume());
-                                PopValue();
-                                PopValue();
-                                break;
-                            }
-                        }
-                        else
-                        {
-                            GenTree* gtThrow =
-                                m_compiler->gtNewMustThrowException(CORINFO_HELP_RNGCHKFAIL, node->TypeGet(),
-                                                                    node->AsIndexAddr()->gtStructElemClass);
-                            m_compiler->lvaGetDesc(gtThrow->AsOp()->gtOp2->AsLclVarCommon())->incLvRefCnt(1, RCS_EARLY);
-                            *use           = gtThrow;
-                            m_stmtModified = true;
-                        }
-                    }
-                }
-
-                EscapeValue(TopValue(0), node);
-                PopValue();
-                EscapeValue(TopValue(0), node);
-                PopValue();
-                break;
-            }
-            case GT_ARR_LENGTH:
-            {
-                assert(TopValue(1).Node() == node);
-                assert(TopValue(0).Node() == node->gtGetOp1());
-
-                if (node->gtGetOp1()->OperIs(GT_LCL_VAR, GT_LCL_ADDR))
-                {
-                    if (TopValue(0).IsAddress() &&
-                        m_compiler->lvaGetDesc(TopValue(0).LclNum())->GetLayout()->IsBlockLayout())
-                    {
-                        GenTree* gtLclFld =
-                            m_compiler->gtNewLclFldNode(TopValue(0).LclNum(), TYP_INT, OFFSETOF__CORINFO_Array__length);
-                        SequenceLocal(gtLclFld->AsLclVarCommon());
-                        *use           = gtLclFld;
-                        m_stmtModified = true;
-                        INDEBUG(TopValue(0).Consume());
-                        PopValue();
-                        break;
-                    }
-                }
-
-                EscapeValue(TopValue(0), node);
-                PopValue();
-                break;
-            }
             case GT_STORE_LCL_FLD:
                 if (node->IsPartialLclFld(m_compiler))
                 {
@@ -1174,6 +1106,73 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
                 break;
             }
 
+            case GT_INDEX_ADDR:
+            {
+                assert(TopValue(2).Node() == node);
+                assert(TopValue(1).Node() == node->gtGetOp1());
+                assert(TopValue(0).Node() == node->gtGetOp2());
+
+                if (node->gtGetOp2()->IsCnsIntOrI() && TopValue(1).IsAddress())
+                {
+                    ssize_t offset = node->AsIndexAddr()->gtElemOffset +
+                                     node->gtGetOp2()->AsIntCon()->IconValue() * node->AsIndexAddr()->gtElemSize;
+
+                    if (offset < static_cast<ssize_t>(m_compiler->lvaLclExactSize(TopValue(1).LclNum())))
+                    {
+                        if (FitsIn<unsigned>(offset) &&
+                            TopValue(2).AddOffset(TopValue(1), static_cast<unsigned>(offset)))
+                        {
+                            INDEBUG(TopValue(0).Consume());
+                            PopValue();
+                            PopValue();
+                            break;
+                        }
+                    }
+                    else
+                    {
+                        *use           = m_compiler->gtNewOperNode(GT_COMMA, node->TypeGet(),
+                                                                   m_compiler->gtNewHelperCallNode(CORINFO_HELP_RNGCHKFAIL,
+                                                                                                   TYP_VOID),
+                                                                   m_compiler->gtNewIconNode(0, TYP_BYREF));
+                        m_stmtModified = true;
+                        INDEBUG(TopValue(0).Consume());
+                        PopValue();
+                        INDEBUG(TopValue(0).Consume());
+                        PopValue();
+                        break;
+                    }
+                }
+
+                EscapeValue(TopValue(0), node);
+                PopValue();
+                EscapeValue(TopValue(0), node);
+                PopValue();
+                break;
+            }
+
+            case GT_ARR_LENGTH:
+            {
+                assert(TopValue(1).Node() == node);
+                assert(TopValue(0).Node() == node->gtGetOp1());
+
+                if (TopValue(0).IsAddress())
+                {
+                    GenTree* gtLclFld =
+                        m_compiler->gtNewLclFldNode(TopValue(0).LclNum(), TYP_INT,
+                                                    TopValue(0).Offset() + OFFSETOF__CORINFO_Array__length);
+                    SequenceLocal(gtLclFld->AsLclVarCommon());
+                    *use           = gtLclFld;
+                    m_stmtModified = true;
+                    INDEBUG(TopValue(0).Consume());
+                    PopValue();
+                    break;
+                }
+
+                EscapeValue(TopValue(0), node);
+                PopValue();
+                break;
+            }
+
             default:
                 while (TopValue(0).Node() != node)
                 {
diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 8b747df0e0bc5c..560cedbf4a6fe3 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -400,14 +400,10 @@ bool ObjectAllocator::MorphAllocObjNodes()
                 {
                     allocType = OAT_NEWOBJ;
                 }
-                else if (data->IsHelperCall())
+                else if (data->IsCall() && data->AsCall()->IsHelperCall(comp, CORINFO_HELP_NEWARR_1_VC) &&
+                         data->AsCall()->gtArgs.GetUserArgByIndex(0)->GetNode()->IsCnsIntOrI())
                 {
-                    GenTreeCall* call = data->AsCall();
-                    if (call->GetHelperNum() == CORINFO_HELP_NEWARR_1_VC &&
-                        call->gtArgs.GetArgByIndex(0)->GetNode()->IsCnsIntOrI())
-                    {
-                        allocType = OAT_NEWARR;
-                    }
+                    allocType = OAT_NEWARR;
                 }
             }
 
@@ -445,13 +441,21 @@ bool ObjectAllocator::MorphAllocObjNodes()
 
                         CallArg*             arg       = data->AsCall()->gtArgs.GetArgByIndex(0);
                         GenTree*             node      = arg->GetNode();
-                        CORINFO_CLASS_HANDLE clsHnd    = (CORINFO_CLASS_HANDLE)node->AsIntConCommon()->IntegralValue();
-                        GenTree*             len       = arg->GetNext()->GetNode();
-                        unsigned int         blockSize = 0;
+                        bool                 isExact   = false;
+                        bool                 isNonNull = false;
+                        CORINFO_CLASS_HANDLE clsHnd =
+                            comp->gtGetHelperCallClassHandle(data->AsCall(), &isExact, &isNonNull);
+                        GenTree*     len       = arg->GetNext()->GetNode();
+                        unsigned int blockSize = 0;
 
-                        // Don't attempt to do stack allocations inside basic blocks that may be in a loop.
-                        //
-                        if (!len->IsCnsIntOrI())
+                        comp->Metrics.NewArrayHelperCalls++;
+
+                        if (!isExact || !isNonNull)
+                        {
+                            onHeapReason = "[array type is either non-exact or null]";
+                            canStack     = false;
+                        }
+                        else if (!len->IsCnsIntOrI())
                         {
                             onHeapReason = "[non-constant size]";
                             canStack     = false;
@@ -619,6 +623,24 @@ GenTree* ObjectAllocator::MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* alloc
     return helperCall;
 }
 
+//------------------------------------------------------------------------
+// MorphNewArrNodeIntoStackAlloc: Morph a GT_CALL CORINFO_HELP_NEWARR_1_VC
+//                                node into stack allocation.
+//
+// Arguments:
+//    newArr       - GT_CALL that will be replaced by helper call.
+//    clsHndclsHnd - class representing the type of the array
+//    length       - length of the array
+//    blockSize    - size of the layout
+//    block        - a basic block where newArr is
+//    stmt         - a statement where newArr is
+//
+// Return Value:
+//    local num for the new stack allocated local
+//
+// Notes:
+//    This function can insert additional statements before stmt.
+//
 unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall*         newArr,
                                                             CORINFO_CLASS_HANDLE clsHnd,
                                                             unsigned int         length,
@@ -819,7 +841,6 @@ bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack<GenTree*>* parent
             case GT_NE:
             case GT_NULLCHECK:
             case GT_ARR_LENGTH:
-            case GT_INDEX_ADDR:
                 canLclVarEscapeViaParentStack = false;
                 break;
 
@@ -836,6 +857,7 @@ bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack<GenTree*>* parent
             case GT_ADD:
             case GT_BOX:
             case GT_FIELD_ADDR:
+            case GT_INDEX_ADDR:
                 // Check whether the local escapes via its grandparent.
                 ++parentIndex;
                 keepChecking = true;
diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h
index e068f1a381540f..3975995e5cf046 100644
--- a/src/coreclr/jit/objectalloc.h
+++ b/src/coreclr/jit/objectalloc.h
@@ -167,22 +167,18 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(
     }
     else if (comp->info.compCompHnd->isSDArray(clsHnd))
     {
-        CorInfoType type = comp->info.compCompHnd->getChildType(clsHnd, &clsHnd);
-        if (type != CORINFO_TYPE_UNDEF && clsHnd == nullptr)
-        {
-            // This is an array of primitive types
-            classSize = (unsigned int)OFFSETOF__CORINFO_Array__data + genTypeSize(JITtype2varType(type)) * length;
-        }
-        else if (comp->info.compCompHnd->isValueClass(clsHnd))
-        {
-            classSize =
-                (unsigned int)OFFSETOF__CORINFO_Array__data + comp->info.compCompHnd->getClassSize(clsHnd) * length;
-        }
-        else
+        CORINFO_CLASS_HANDLE elemClsHnd = NO_CLASS_HANDLE;
+        CorInfoType          corType    = comp->info.compCompHnd->getChildType(clsHnd, &elemClsHnd);
+        var_types            type       = JITtype2varType(corType);
+        ClassLayout*         elemLayout = type == TYP_STRUCT ? comp->typGetObjLayout(elemClsHnd) : nullptr;
+        if (varTypeIsGC(type) || ((elemLayout != nullptr) && elemLayout->HasGCPtr()))
         {
             *reason = "[array contains gc refs]";
             return false;
         }
+
+        unsigned elemSize = elemLayout != nullptr ? elemLayout->GetSize() : genTypeSize(type);
+        classSize         = (unsigned int)OFFSETOF__CORINFO_Array__data + elemSize * length;
     }
     else
     {

From a4588bb8ac81e0287636a668c7c99ecd5ccc7250 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Tue, 16 Jul 2024 20:20:40 +0900
Subject: [PATCH 17/75] Fix INDEX_ADDR and add Sub

---
 src/coreclr/jit/objectalloc.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 560cedbf4a6fe3..d795fdb1ae6ba5 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -855,6 +855,7 @@ bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack<GenTree*>* parent
             case GT_COLON:
             case GT_QMARK:
             case GT_ADD:
+            case GT_SUB:
             case GT_BOX:
             case GT_FIELD_ADDR:
             case GT_INDEX_ADDR:
@@ -939,7 +940,6 @@ void ObjectAllocator::UpdateAncestorTypes(GenTree* tree, ArrayStack<GenTree*>* p
             case GT_NE:
             case GT_NULLCHECK:
             case GT_ARR_LENGTH:
-            case GT_INDEX_ADDR:
                 break;
 
             case GT_COMMA:
@@ -951,7 +951,9 @@ void ObjectAllocator::UpdateAncestorTypes(GenTree* tree, ArrayStack<GenTree*>* p
                 FALLTHROUGH;
             case GT_QMARK:
             case GT_ADD:
+            case GT_SUB:
             case GT_FIELD_ADDR:
+            case GT_INDEX_ADDR:
                 if (parent->TypeGet() == TYP_REF)
                 {
                     parent->ChangeType(newType);

From 32b9e2633c76681fee7ac89544d76cbf54b84b17 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Tue, 16 Jul 2024 20:29:06 +0900
Subject: [PATCH 18/75] Support IsAddressLessThan and its friends

---
 src/coreclr/jit/objectalloc.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index d795fdb1ae6ba5..d398d390dd8c86 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -839,6 +839,10 @@ bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack<GenTree*>* parent
 
             case GT_EQ:
             case GT_NE:
+            case GT_LT:
+            case GT_GT:
+            case GT_LE:
+            case GT_GE:
             case GT_NULLCHECK:
             case GT_ARR_LENGTH:
                 canLclVarEscapeViaParentStack = false;
@@ -938,6 +942,10 @@ void ObjectAllocator::UpdateAncestorTypes(GenTree* tree, ArrayStack<GenTree*>* p
 
             case GT_EQ:
             case GT_NE:
+            case GT_LT:
+            case GT_GT:
+            case GT_LE:
+            case GT_GE:
             case GT_NULLCHECK:
             case GT_ARR_LENGTH:
                 break;

From 39d1ad9a2ba71031550f92259c1626e0ec497404 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Tue, 16 Jul 2024 23:24:00 +0900
Subject: [PATCH 19/75] Fix assertions

---
 src/coreclr/jit/objectalloc.cpp            | 22 +++++++++++++++-------
 src/coreclr/jit/objectalloc.h              |  7 +++++++
 src/coreclr/jit/promotiondecomposition.cpp |  5 +++++
 3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index d398d390dd8c86..c6fe84fd48abdf 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -336,17 +336,25 @@ void ObjectAllocator::ComputeStackObjectPointers(BitVecTraits* bitVecTraits)
                     {
                         // Check if we know what is assigned to this pointer.
                         unsigned bitCount = BitVecOps::Count(bitVecTraits, m_ConnGraphAdjacencyMatrix[lclNum]);
-                        assert(bitCount <= 1);
-                        if (bitCount == 1)
+                        if (bitCount > 0)
                         {
                             BitVecOps::Iter iter(bitVecTraits, m_ConnGraphAdjacencyMatrix[lclNum]);
-                            unsigned        rhsLclNum = 0;
-                            iter.NextElem(&rhsLclNum);
+                            unsigned        rhsLclNum               = 0;
+                            bool            definitelyStackPointing = true;
 
-                            if (DoesLclVarPointToStack(rhsLclNum))
+                            while (iter.NextElem(&rhsLclNum))
                             {
-                                // The only store to lclNum local is the definitely-stack-pointing
-                                // rhsLclNum local so lclNum local is also definitely-stack-pointing.
+                                if (!DoesLclVarPointToStack(rhsLclNum))
+                                {
+                                    definitelyStackPointing = false;
+                                    break;
+                                }
+                            }
+
+                            if (definitelyStackPointing)
+                            {
+                                // All stores to lclNum local are the definitely-stack-pointing
+                                // rhsLclNum locals so lclNum local is also definitely-stack-pointing.
                                 MarkLclVarAsDefinitelyStackPointing(lclNum);
                             }
                         }
diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h
index 3975995e5cf046..abd4e8e4067d8a 100644
--- a/src/coreclr/jit/objectalloc.h
+++ b/src/coreclr/jit/objectalloc.h
@@ -140,6 +140,7 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(
 
     bool enableBoxedValueClasses = true;
     bool enableRefClasses        = true;
+    bool enableArrays            = true;
     *reason                      = "[ok]";
 
 #ifdef DEBUG
@@ -167,6 +168,12 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(
     }
     else if (comp->info.compCompHnd->isSDArray(clsHnd))
     {
+        if (!enableArrays)
+        {
+            *reason = "[disabled by config]";
+            return false;
+        }
+
         CORINFO_CLASS_HANDLE elemClsHnd = NO_CLASS_HANDLE;
         CorInfoType          corType    = comp->info.compCompHnd->getChildType(clsHnd, &elemClsHnd);
         var_types            type       = JITtype2varType(corType);
diff --git a/src/coreclr/jit/promotiondecomposition.cpp b/src/coreclr/jit/promotiondecomposition.cpp
index bf89852547fd7b..8be998f74c16d6 100644
--- a/src/coreclr/jit/promotiondecomposition.cpp
+++ b/src/coreclr/jit/promotiondecomposition.cpp
@@ -541,6 +541,11 @@ class DecompositionPlan
 
         if (addr != nullptr)
         {
+            if (addr->IsInvariant())
+            {
+                indirFlags |= GTF_IND_INVARIANT;
+            }
+
             for (int i = 0; i < m_entries.Height(); i++)
             {
                 if (!CanSkipEntry(m_entries.BottomRef(i), dstDeaths, remainderStrategy))

From 9f408b2562f782de46c9d4d0b9be87f9747dfa89 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Wed, 17 Jul 2024 00:29:13 +0900
Subject: [PATCH 20/75] Use new overload

---
 src/coreclr/jit/objectalloc.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index c6fe84fd48abdf..8754d90932e08e 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -408,7 +408,7 @@ bool ObjectAllocator::MorphAllocObjNodes()
                 {
                     allocType = OAT_NEWOBJ;
                 }
-                else if (data->IsCall() && data->AsCall()->IsHelperCall(comp, CORINFO_HELP_NEWARR_1_VC) &&
+                else if (data->IsHelperCall(comp, CORINFO_HELP_NEWARR_1_VC) &&
                          data->AsCall()->gtArgs.GetUserArgByIndex(0)->GetNode()->IsCnsIntOrI())
                 {
                     allocType = OAT_NEWARR;

From 418a62bb6ac72313cbe87b9519d4f5b87ffe6544 Mon Sep 17 00:00:00 2001
From: Jakob Botsch Nielsen <jakob.botsch.nielsen@gmail.com>
Date: Tue, 16 Jul 2024 18:11:54 +0200
Subject: [PATCH 21/75] JIT: Remove GTF_IND_INVARIANT and GTF_IND_NONFAULTING
 flags checking

These flags are strictly optimizations. Having them required to be set
for certain indirs based on context of the operand introduces IR
invariants that are annoying to work with since it suddenly becomes
necessary for all transformations to consider "did we just introduce
this IR shape?". Instead, handle these patterns during morph as the
optimization it is and remove the strict flags checking from
`fgDebugCheckFlags`.
---
 src/coreclr/jit/fgdiagnostic.cpp | 30 ----------------------------
 src/coreclr/jit/gentree.cpp      | 34 +++++++++++++++++++++++++-------
 src/coreclr/jit/morph.cpp        | 12 +++++++++--
 3 files changed, 37 insertions(+), 39 deletions(-)

diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp
index 45cdcfea0f46b6..4818d85f8381eb 100644
--- a/src/coreclr/jit/fgdiagnostic.cpp
+++ b/src/coreclr/jit/fgdiagnostic.cpp
@@ -3361,36 +3361,6 @@ void Compiler::fgDebugCheckFlags(GenTree* tree, BasicBlock* block)
 
         case GT_IND:
             // Do we have a constant integer address as op1 that is also a handle?
-            if (op1->IsIconHandle())
-            {
-                if ((tree->gtFlags & GTF_IND_INVARIANT) != 0)
-                {
-                    actualFlags |= GTF_IND_INVARIANT;
-                }
-                if ((tree->gtFlags & GTF_IND_NONFAULTING) != 0)
-                {
-                    actualFlags |= GTF_IND_NONFAULTING;
-                }
-
-                GenTreeFlags handleKind = op1->GetIconHandleFlag();
-
-                // Some of these aren't handles to invariant data...
-                if (GenTree::HandleKindDataIsInvariant(handleKind) && (handleKind != GTF_ICON_FTN_ADDR))
-                {
-                    expectedFlags |= GTF_IND_INVARIANT;
-                }
-                else
-                {
-                    // For statics, we expect the GTF_GLOB_REF to be set. However, we currently
-                    // fail to set it in a number of situations, and so this check is disabled.
-                    // TODO: enable checking of GTF_GLOB_REF.
-                    // expectedFlags |= GTF_GLOB_REF;
-                }
-
-                // Currently we expect all indirections with constant addresses to be nonfaulting.
-                expectedFlags |= GTF_IND_NONFAULTING;
-            }
-
             assert(((tree->gtFlags & GTF_IND_TGT_NOT_HEAP) == 0) || ((tree->gtFlags & GTF_IND_TGT_HEAP) == 0));
             break;
 
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index f7da7729d23752..0b39412bc5334d 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -10631,8 +10631,7 @@ void GenTree::SetIndirExceptionFlags(Compiler* comp)
 
 //------------------------------------------------------------------------------
 // HandleKindDataIsInvariant: Returns true if the data referred to by a handle
-// address is guaranteed to be invariant. Note that GTF_ICON_FTN_ADDR handles may
-// or may not point to invariant data.
+// address is guaranteed to be invariant.
 //
 // Arguments:
 //    flags - GenTree flags for handle.
@@ -10643,11 +10642,32 @@ bool GenTree::HandleKindDataIsInvariant(GenTreeFlags flags)
     GenTreeFlags handleKind = flags & GTF_ICON_HDL_MASK;
     assert(handleKind != GTF_EMPTY);
 
-    // All handle types are assumed invariant except those specifically listed here.
-
-    return (handleKind != GTF_ICON_STATIC_HDL) && // Pointer to a mutable class Static variable
-           (handleKind != GTF_ICON_BBC_PTR) &&    // Pointer to a mutable basic block count value
-           (handleKind != GTF_ICON_GLOBAL_PTR);   // Pointer to mutable data from the VM state
+    switch (handleKind)
+    {
+        case GTF_ICON_SCOPE_HDL:
+        case GTF_ICON_CLASS_HDL:
+        case GTF_ICON_METHOD_HDL:
+        case GTF_ICON_FIELD_HDL:
+        case GTF_ICON_STR_HDL:
+        case GTF_ICON_CONST_PTR:
+        case GTF_ICON_VARG_HDL:
+        case GTF_ICON_PINVKI_HDL:
+        case GTF_ICON_TOKEN_HDL:
+        case GTF_ICON_TLS_HDL:
+        case GTF_ICON_CIDMID_HDL:
+        case GTF_ICON_FIELD_SEQ:
+        case GTF_ICON_STATIC_ADDR_PTR:
+        case GTF_ICON_SECREL_OFFSET:
+        case GTF_ICON_TLSGD_OFFSET:
+            return true;
+        case GTF_ICON_FTN_ADDR:
+        case GTF_ICON_GLOBAL_PTR:
+        case GTF_ICON_STATIC_HDL:
+        case GTF_ICON_BBC_PTR:
+        case GTF_ICON_STATIC_BOX_PTR:
+        default:
+            return false;
+    }
 }
 
 #ifdef DEBUG
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 8e510cdb99fc11..1045d96258bd61 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -8679,9 +8679,17 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
 
         case GT_IND:
         {
-            if (op1->IsIconHandle(GTF_ICON_OBJ_HDL))
+            if (op1->IsIconHandle())
             {
-                tree->gtFlags |= (GTF_IND_INVARIANT | GTF_IND_NONFAULTING | GTF_IND_NONNULL);
+                // All indirections with (handle) constant addresses are
+                // nonfaulting.
+                tree->gtFlags |= GTF_IND_NONFAULTING;
+
+                // We know some handle types always point to invariant data.
+                if (GenTree::HandleKindDataIsInvariant(op1->GetIconHandleFlag()))
+                {
+                    tree->gtFlags |= GTF_IND_INVARIANT;
+                }
             }
 
             GenTree* optimizedTree = fgMorphFinalizeIndir(tree->AsIndir());

From 4572408c20a95e905b1e3a1b8623d5127f6db57f Mon Sep 17 00:00:00 2001
From: Jakob Botsch Nielsen <jakob.botsch.nielsen@gmail.com>
Date: Tue, 16 Jul 2024 18:16:50 +0200
Subject: [PATCH 22/75] Remove old comment

---
 src/coreclr/jit/fgdiagnostic.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp
index 4818d85f8381eb..9f1a72afce63e4 100644
--- a/src/coreclr/jit/fgdiagnostic.cpp
+++ b/src/coreclr/jit/fgdiagnostic.cpp
@@ -3360,7 +3360,6 @@ void Compiler::fgDebugCheckFlags(GenTree* tree, BasicBlock* block)
             break;
 
         case GT_IND:
-            // Do we have a constant integer address as op1 that is also a handle?
             assert(((tree->gtFlags & GTF_IND_TGT_NOT_HEAP) == 0) || ((tree->gtFlags & GTF_IND_TGT_HEAP) == 0));
             break;
 

From 925576242fecf5b8d579d864394fe71fea5608f9 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Wed, 17 Jul 2024 00:57:38 +0900
Subject: [PATCH 23/75] Expose jitconfig

---
 src/coreclr/jit/jitconfigvalues.h | 1 +
 src/coreclr/jit/objectalloc.cpp   | 2 +-
 src/coreclr/jit/objectalloc.h     | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h
index 50824303d2f783..d26b3ca87cd9a2 100644
--- a/src/coreclr/jit/jitconfigvalues.h
+++ b/src/coreclr/jit/jitconfigvalues.h
@@ -650,6 +650,7 @@ CONFIG_STRING(JitObjectStackAllocationRange, W("JitObjectStackAllocationRange"))
 RELEASE_CONFIG_INTEGER(JitObjectStackAllocation, W("JitObjectStackAllocation"), 1)
 RELEASE_CONFIG_INTEGER(JitObjectStackAllocationRefClass, W("JitObjectStackAllocationRefClass"), 1)
 RELEASE_CONFIG_INTEGER(JitObjectStackAllocationBoxedValueClass, W("JitObjectStackAllocationBoxedValueClass"), 1)
+RELEASE_CONFIG_INTEGER(JitObjectStackAllocationArray, W("JitObjectStackAllocationArray"), 1)
 
 RELEASE_CONFIG_INTEGER(JitEECallTimingInfo, W("JitEECallTimingInfo"), 0)
 
diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 8754d90932e08e..1d653119784ea3 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -442,7 +442,7 @@ bool ObjectAllocator::MorphAllocObjNodes()
                         // We expect the following expression tree at this point
                         //  STMTx (IL 0x... ???)
                         //    * STORE_LCL_VAR   ref
-                        //    \--*  CALL helper  ref
+                        //    \--*  CALL help  ref    CORINFO_HELP_NEWARR_1_VC
                         //       +--*  CNS_INT(h) long
                         //       \--*  CNS_INT long
                         //------------------------------------------------------------------------
diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h
index abd4e8e4067d8a..4b1f77895d630d 100644
--- a/src/coreclr/jit/objectalloc.h
+++ b/src/coreclr/jit/objectalloc.h
@@ -146,6 +146,7 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(
 #ifdef DEBUG
     enableBoxedValueClasses = (JitConfig.JitObjectStackAllocationBoxedValueClass() != 0);
     enableRefClasses        = (JitConfig.JitObjectStackAllocationRefClass() != 0);
+    enableArrays            = (JitConfig.JitObjectStackAllocationArray() != 0);
 #endif
 
     unsigned int classSize = 0;

From 1af84b9e6196837563f03c4abbaa20dfa1f77ce0 Mon Sep 17 00:00:00 2001
From: Jakob Botsch Nielsen <jakob.botsch.nielsen@gmail.com>
Date: Tue, 16 Jul 2024 18:53:19 +0200
Subject: [PATCH 24/75] Remove another assert

---
 src/coreclr/jit/gentree.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 0b39412bc5334d..60ff8d61b65cd3 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -7571,8 +7571,6 @@ GenTree* Compiler::gtNewIndOfIconHandleNode(var_types indType, size_t addr, GenT
 
     if (isInvariant)
     {
-        assert(GenTree::HandleKindDataIsInvariant(iconFlags));
-
         // This indirection also is invariant.
         indirFlags |= GTF_IND_INVARIANT;
 

From 629c793fb9620803ba49acca0a242cf1a39fa3a7 Mon Sep 17 00:00:00 2001
From: Jakob Botsch Nielsen <jakob.botsch.nielsen@gmail.com>
Date: Tue, 16 Jul 2024 19:34:55 +0200
Subject: [PATCH 25/75] Count

---
 src/coreclr/jit/gentree.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h
index 757ce23912d732..c3eb11fbffa68e 100644
--- a/src/coreclr/jit/gentree.h
+++ b/src/coreclr/jit/gentree.h
@@ -513,18 +513,18 @@ enum GenTreeFlags : unsigned int
     GTF_ICON_FIELD_HDL          = 0x04000000, // GT_CNS_INT -- constant is a field handle
     GTF_ICON_STATIC_HDL         = 0x05000000, // GT_CNS_INT -- constant is a handle to static data
     GTF_ICON_STR_HDL            = 0x06000000, // GT_CNS_INT -- constant is a pinned handle pointing to a string object
-    GTF_ICON_OBJ_HDL            = 0x12000000, // GT_CNS_INT -- constant is an object handle (e.g. frozen string or Type object)
-    GTF_ICON_CONST_PTR          = 0x07000000, // GT_CNS_INT -- constant is a pointer to immutable data, (e.g. IAT_PPVALUE)
-    GTF_ICON_GLOBAL_PTR         = 0x08000000, // GT_CNS_INT -- constant is a pointer to mutable data (e.g. from the VM state)
-    GTF_ICON_VARG_HDL           = 0x09000000, // GT_CNS_INT -- constant is a var arg cookie handle
-    GTF_ICON_PINVKI_HDL         = 0x0A000000, // GT_CNS_INT -- constant is a pinvoke calli handle
-    GTF_ICON_TOKEN_HDL          = 0x0B000000, // GT_CNS_INT -- constant is a token handle (other than class, method or field)
-    GTF_ICON_TLS_HDL            = 0x0C000000, // GT_CNS_INT -- constant is a TLS ref with offset
-    GTF_ICON_FTN_ADDR           = 0x0D000000, // GT_CNS_INT -- constant is a function address
-    GTF_ICON_CIDMID_HDL         = 0x0E000000, // GT_CNS_INT -- constant is a class ID or a module ID
-    GTF_ICON_BBC_PTR            = 0x0F000000, // GT_CNS_INT -- constant is a basic block count pointer
-    GTF_ICON_STATIC_BOX_PTR     = 0x10000000, // GT_CNS_INT -- constant is an address of the box for a STATIC_IN_HEAP field
-    GTF_ICON_FIELD_SEQ          = 0x11000000, // <--------> -- constant is a FieldSeq* (used only as VNHandle)
+    GTF_ICON_OBJ_HDL            = 0x08000000, // GT_CNS_INT -- constant is an object handle (e.g. frozen string or Type object)
+    GTF_ICON_CONST_PTR          = 0x08000000, // GT_CNS_INT -- constant is a pointer to immutable data, (e.g. IAT_PPVALUE)
+    GTF_ICON_GLOBAL_PTR         = 0x09000000, // GT_CNS_INT -- constant is a pointer to mutable data (e.g. from the VM state)
+    GTF_ICON_VARG_HDL           = 0x0A000000, // GT_CNS_INT -- constant is a var arg cookie handle
+    GTF_ICON_PINVKI_HDL         = 0x0B000000, // GT_CNS_INT -- constant is a pinvoke calli handle
+    GTF_ICON_TOKEN_HDL          = 0x0C000000, // GT_CNS_INT -- constant is a token handle (other than class, method or field)
+    GTF_ICON_TLS_HDL            = 0x0D000000, // GT_CNS_INT -- constant is a TLS ref with offset
+    GTF_ICON_FTN_ADDR           = 0x0E000000, // GT_CNS_INT -- constant is a function address
+    GTF_ICON_CIDMID_HDL         = 0x0F000000, // GT_CNS_INT -- constant is a class ID or a module ID
+    GTF_ICON_BBC_PTR            = 0x10000000, // GT_CNS_INT -- constant is a basic block count pointer
+    GTF_ICON_STATIC_BOX_PTR     = 0x11000000, // GT_CNS_INT -- constant is an address of the box for a STATIC_IN_HEAP field
+    GTF_ICON_FIELD_SEQ          = 0x12000000, // <--------> -- constant is a FieldSeq* (used only as VNHandle)
     GTF_ICON_STATIC_ADDR_PTR    = 0x13000000, // GT_CNS_INT -- constant is a pointer to a static base address
     GTF_ICON_SECREL_OFFSET      = 0x14000000, // GT_CNS_INT -- constant is an offset in a certain section.
     GTF_ICON_TLSGD_OFFSET       = 0x15000000, // GT_CNS_INT -- constant is an argument to tls_get_addr.

From b578203f3ab0e1dee81c8235c03612f3b2f9cac6 Mon Sep 17 00:00:00 2001
From: Jakob Botsch Nielsen <jakob.botsch.nielsen@gmail.com>
Date: Tue, 16 Jul 2024 19:35:26 +0200
Subject: [PATCH 26/75] Try 2 at counting

---
 src/coreclr/jit/gentree.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h
index c3eb11fbffa68e..94e88caedc7c7e 100644
--- a/src/coreclr/jit/gentree.h
+++ b/src/coreclr/jit/gentree.h
@@ -513,7 +513,7 @@ enum GenTreeFlags : unsigned int
     GTF_ICON_FIELD_HDL          = 0x04000000, // GT_CNS_INT -- constant is a field handle
     GTF_ICON_STATIC_HDL         = 0x05000000, // GT_CNS_INT -- constant is a handle to static data
     GTF_ICON_STR_HDL            = 0x06000000, // GT_CNS_INT -- constant is a pinned handle pointing to a string object
-    GTF_ICON_OBJ_HDL            = 0x08000000, // GT_CNS_INT -- constant is an object handle (e.g. frozen string or Type object)
+    GTF_ICON_OBJ_HDL            = 0x07000000, // GT_CNS_INT -- constant is an object handle (e.g. frozen string or Type object)
     GTF_ICON_CONST_PTR          = 0x08000000, // GT_CNS_INT -- constant is a pointer to immutable data, (e.g. IAT_PPVALUE)
     GTF_ICON_GLOBAL_PTR         = 0x09000000, // GT_CNS_INT -- constant is a pointer to mutable data (e.g. from the VM state)
     GTF_ICON_VARG_HDL           = 0x0A000000, // GT_CNS_INT -- constant is a var arg cookie handle

From b4445f6e450f30f1f5557f7ceaf196dc365cd003 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Wed, 17 Jul 2024 02:56:07 +0900
Subject: [PATCH 27/75] Introduce BBF_HAS_NEWARR

---
 src/coreclr/jit/block.cpp        | 1 +
 src/coreclr/jit/block.h          | 3 ++-
 src/coreclr/jit/fgdiagnostic.cpp | 4 ++++
 src/coreclr/jit/importer.cpp     | 1 +
 src/coreclr/jit/objectalloc.cpp  | 3 ++-
 5 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp
index be2ba15e254690..e9ad2d072ec554 100644
--- a/src/coreclr/jit/block.cpp
+++ b/src/coreclr/jit/block.cpp
@@ -574,6 +574,7 @@ void BasicBlock::dspFlags() const
         {BBF_HAS_IDX_LEN, "idxlen"},
         {BBF_HAS_MD_IDX_LEN, "mdidxlen"},
         {BBF_HAS_NEWOBJ, "newobj"},
+        {BBF_HAS_NEWARR, "newarr"},
         {BBF_HAS_NULLCHECK, "nullcheck"},
         {BBF_BACKWARD_JUMP, "bwd"},
         {BBF_BACKWARD_JUMP_TARGET, "bwd-target"},
diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h
index e0989df456900f..ea95994d580d05 100644
--- a/src/coreclr/jit/block.h
+++ b/src/coreclr/jit/block.h
@@ -462,7 +462,8 @@ enum BasicBlockFlags : uint64_t
     BBF_NO_CSE_IN                      = MAKE_BBFLAG(38), // Block should kill off any incoming CSE
     BBF_CAN_ADD_PRED                   = MAKE_BBFLAG(39), // Ok to add pred edge to this block, even when "safe" edge creation disabled
     BBF_HAS_VALUE_PROFILE              = MAKE_BBFLAG(40), // Block has a node that needs a value probing
-
+    
+    BBF_HAS_NEWARR                     = MAKE_BBFLAG(41), // BB contains 'new' of an array type.
     // The following are sets of flags.
 
     // Flags to update when two blocks are compacted
diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp
index 45cdcfea0f46b6..951503ffef45e8 100644
--- a/src/coreclr/jit/fgdiagnostic.cpp
+++ b/src/coreclr/jit/fgdiagnostic.cpp
@@ -973,6 +973,10 @@ bool Compiler::fgDumpFlowGraph(Phases phase, PhasePosition pos)
             {
                 fprintf(fgxFile, "\n            hot=\"true\"");
             }
+            if (block->HasFlag(BBF_HAS_NEWARR))
+            {
+                fprintf(fgxFile, "\n            callsNewArr=\"true\"");
+            }
             if (block->HasFlag(BBF_HAS_NEWOBJ))
             {
                 fprintf(fgxFile, "\n            callsNew=\"true\"");
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index e2a9ec8d48384d..87f2210bd40ef5 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -9532,6 +9532,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 op1->AsCall()->compileTimeHelperArgumentHandle = (CORINFO_GENERIC_HANDLE)resolvedToken.hClass;
 
                 // Remember that this function contains 'new' of an SD array.
+                block->SetFlags(BBF_HAS_NEWARR);
                 optMethodFlags |= OMF_HAS_NEWARRAY;
 
                 // We assign the newly allocated object (by a GT_CALL to newarr node)
diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 1d653119784ea3..4b2a72405ebdc6 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -385,9 +385,10 @@ bool ObjectAllocator::MorphAllocObjNodes()
     for (BasicBlock* const block : comp->Blocks())
     {
         const bool basicBlockHasNewObj       = block->HasFlag(BBF_HAS_NEWOBJ);
+        const bool basicBlockHasNewArr       = block->HasFlag(BBF_HAS_NEWARR);
         const bool basicBlockHasBackwardJump = block->HasFlag(BBF_BACKWARD_JUMP);
 #ifndef DEBUG
-        if (!basicBlockHasNewObj)
+        if (!basicBlockHasNewObj && !basicBlockHasNewArr)
         {
             continue;
         }

From af9c40e109fbe9404d31ac6a138984e7f8f61a88 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Wed, 17 Jul 2024 03:04:10 +0900
Subject: [PATCH 28/75] Early exit on debug as well

---
 src/coreclr/jit/objectalloc.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 4b2a72405ebdc6..b84a9b7b809a5c 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -387,12 +387,11 @@ bool ObjectAllocator::MorphAllocObjNodes()
         const bool basicBlockHasNewObj       = block->HasFlag(BBF_HAS_NEWOBJ);
         const bool basicBlockHasNewArr       = block->HasFlag(BBF_HAS_NEWARR);
         const bool basicBlockHasBackwardJump = block->HasFlag(BBF_BACKWARD_JUMP);
-#ifndef DEBUG
+
         if (!basicBlockHasNewObj && !basicBlockHasNewArr)
         {
             continue;
         }
-#endif // DEBUG
 
         for (Statement* const stmt : block->Statements())
         {

From 8b54f5ad449af86cacdbd2acb0e2c8272df283fb Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Wed, 17 Jul 2024 03:11:23 +0900
Subject: [PATCH 29/75] Update computed flags

---
 src/coreclr/jit/block.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h
index ea95994d580d05..c70c4abdfe289c 100644
--- a/src/coreclr/jit/block.h
+++ b/src/coreclr/jit/block.h
@@ -469,7 +469,7 @@ enum BasicBlockFlags : uint64_t
     // Flags to update when two blocks are compacted
 
     BBF_COMPACT_UPD = BBF_GC_SAFE_POINT | BBF_NEEDS_GCPOLL | BBF_HAS_JMP | BBF_HAS_IDX_LEN | BBF_HAS_MD_IDX_LEN | BBF_BACKWARD_JUMP | \
-                      BBF_HAS_NEWOBJ | BBF_HAS_NULLCHECK | BBF_HAS_MDARRAYREF | BBF_LOOP_HEAD,
+                      BBF_HAS_NEWOBJ | BBF_HAS_NEWARR | BBF_HAS_NULLCHECK | BBF_HAS_MDARRAYREF | BBF_LOOP_HEAD,
 
     // Flags a block should not have had before it is split.
 
@@ -487,7 +487,7 @@ enum BasicBlockFlags : uint64_t
     // For example, the bottom block might or might not have BBF_HAS_NULLCHECK, but we assume it has BBF_HAS_NULLCHECK.
     // TODO: Should BBF_RUN_RARELY be added to BBF_SPLIT_GAINED ?
 
-    BBF_SPLIT_GAINED = BBF_DONT_REMOVE | BBF_HAS_JMP | BBF_BACKWARD_JUMP | BBF_HAS_IDX_LEN | BBF_HAS_MD_IDX_LEN | BBF_PROF_WEIGHT | \
+    BBF_SPLIT_GAINED = BBF_DONT_REMOVE | BBF_HAS_JMP | BBF_BACKWARD_JUMP | BBF_HAS_IDX_LEN | BBF_HAS_MD_IDX_LEN | BBF_PROF_WEIGHT | BBF_HAS_NEWARR | \
                        BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_HAS_NULLCHECK | BBF_HAS_HISTOGRAM_PROFILE | BBF_HAS_VALUE_PROFILE | BBF_HAS_MDARRAYREF | BBF_NEEDS_GCPOLL,
 
     // Flags that must be propagated to a new block if code is copied from a block to a new block. These are flags that
@@ -495,7 +495,7 @@ enum BasicBlockFlags : uint64_t
     // have actually copied one of these type of tree nodes, but if we only copy a portion of the block's statements,
     // we don't know (unless we actually pay close attention during the copy).
 
-    BBF_COPY_PROPAGATE = BBF_HAS_NEWOBJ | BBF_HAS_NULLCHECK | BBF_HAS_IDX_LEN | BBF_HAS_MD_IDX_LEN | BBF_HAS_MDARRAYREF,
+    BBF_COPY_PROPAGATE = BBF_HAS_NEWOBJ | BBF_HAS_NEWARR | BBF_HAS_NULLCHECK | BBF_HAS_IDX_LEN | BBF_HAS_MD_IDX_LEN | BBF_HAS_MDARRAYREF,
 };
 
 FORCEINLINE

From 6eca58d0c8017e841eade106ad2cd63fe8e0c23c Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Wed, 17 Jul 2024 20:30:23 +0900
Subject: [PATCH 30/75] Partially revert 39d1ad9

---
 src/coreclr/jit/promotiondecomposition.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/coreclr/jit/promotiondecomposition.cpp b/src/coreclr/jit/promotiondecomposition.cpp
index 8be998f74c16d6..bf89852547fd7b 100644
--- a/src/coreclr/jit/promotiondecomposition.cpp
+++ b/src/coreclr/jit/promotiondecomposition.cpp
@@ -541,11 +541,6 @@ class DecompositionPlan
 
         if (addr != nullptr)
         {
-            if (addr->IsInvariant())
-            {
-                indirFlags |= GTF_IND_INVARIANT;
-            }
-
             for (int i = 0; i < m_entries.Height(); i++)
             {
                 if (!CanSkipEntry(m_entries.BottomRef(i), dstDeaths, remainderStrategy))

From 49d85091f20ea7d90dfde8e7227b1281038a20f6 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Thu, 18 Jul 2024 00:45:42 +0900
Subject: [PATCH 31/75] Reuse existing comma node

---
 src/coreclr/jit/gentree.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index e036d9cc1b58fa..5f799e7d5010f4 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -17139,7 +17139,15 @@ GenTree* Compiler::gtWrapWithSideEffects(GenTree*     tree,
         // It should be possible to be smarter here and allow such cases by extracting the side effects
         // properly for this particular case. For now, caller is responsible for avoiding such cases.
 
-        tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), sideEffects, tree);
+        if (sideEffectsSource->OperIs(GT_COMMA) && sideEffectsSource->AsOp()->gtOp1 == sideEffects)
+        {
+            sideEffectsSource->AsOp()->gtOp2 = tree;
+            return sideEffectsSource;
+        }
+        else
+        {
+            tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), sideEffects, tree);
+        }
     }
     return tree;
 }

From 4c6e35918661bda0e852db93bf88ea9f6f56281d Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Thu, 18 Jul 2024 02:00:54 +0900
Subject: [PATCH 32/75] Respect IsBoundsChecked

---
 src/coreclr/jit/lclmorph.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp
index 406e49e5009753..fe8e5427a77035 100644
--- a/src/coreclr/jit/lclmorph.cpp
+++ b/src/coreclr/jit/lclmorph.cpp
@@ -1117,7 +1117,8 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
                     ssize_t offset = node->AsIndexAddr()->gtElemOffset +
                                      node->gtGetOp2()->AsIntCon()->IconValue() * node->AsIndexAddr()->gtElemSize;
 
-                    if (offset < static_cast<ssize_t>(m_compiler->lvaLclExactSize(TopValue(1).LclNum())))
+                    if (!node->AsIndexAddr()->IsBoundsChecked() ||
+                        offset < static_cast<ssize_t>(m_compiler->lvaLclExactSize(TopValue(1).LclNum())))
                     {
                         if (FitsIn<unsigned>(offset) &&
                             TopValue(2).AddOffset(TopValue(1), static_cast<unsigned>(offset)))

From 4d8437914ccbfe468029bc4c7d437e948b061807 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Thu, 18 Jul 2024 02:43:33 +0900
Subject: [PATCH 33/75] Check lowerbound too

---
 src/coreclr/jit/lclmorph.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp
index fe8e5427a77035..47e8b894573c5a 100644
--- a/src/coreclr/jit/lclmorph.cpp
+++ b/src/coreclr/jit/lclmorph.cpp
@@ -1118,7 +1118,8 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
                                      node->gtGetOp2()->AsIntCon()->IconValue() * node->AsIndexAddr()->gtElemSize;
 
                     if (!node->AsIndexAddr()->IsBoundsChecked() ||
-                        offset < static_cast<ssize_t>(m_compiler->lvaLclExactSize(TopValue(1).LclNum())))
+                        (static_cast<ssize_t>(node->AsIndexAddr()->gtElemOffset) <= offset &&
+                         offset < static_cast<ssize_t>(m_compiler->lvaLclExactSize(TopValue(1).LclNum()))))
                     {
                         if (FitsIn<unsigned>(offset) &&
                             TopValue(2).AddOffset(TopValue(1), static_cast<unsigned>(offset)))

From c0cad851f532f309218e166936b4162faed4d96c Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Thu, 18 Jul 2024 16:39:55 +0900
Subject: [PATCH 34/75] Fix assertion take 2

---
 src/coreclr/jit/gentree.cpp | 16 +++++++++-------
 src/coreclr/jit/morph.cpp   |  1 +
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 5f799e7d5010f4..9c8f8ca5bc81e4 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -17139,15 +17139,17 @@ GenTree* Compiler::gtWrapWithSideEffects(GenTree*     tree,
         // It should be possible to be smarter here and allow such cases by extracting the side effects
         // properly for this particular case. For now, caller is responsible for avoiding such cases.
 
-        if (sideEffectsSource->OperIs(GT_COMMA) && sideEffectsSource->AsOp()->gtOp1 == sideEffects)
-        {
-            sideEffectsSource->AsOp()->gtOp2 = tree;
-            return sideEffectsSource;
-        }
-        else
+#ifdef DEBUG
+        // The side effects may be a throw created by local morph,
+        // just remove the morphed flag to avoid unnecessary assertions.
+        if (fgIsThrow(sideEffects) && sideEffects->AsCall()->gtArgs.GetUserArgByIndex(0)->GetLateNode()->IsCnsIntOrI())
         {
-            tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), sideEffects, tree);
+            sideEffects->AsCall()->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
+            sideEffects->AsCall()->gtArgs.GetUserArgByIndex(0)->GetLateNode()->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
         }
+#endif
+
+        tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), sideEffects, tree);
     }
     return tree;
 }
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 1045d96258bd61..c47e42e84f6268 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -8272,6 +8272,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
         if (fgIsCommaThrow(tree))
         {
             tree->AsOp()->gtOp1 = fgMorphTree(tree->AsOp()->gtOp1);
+            INDEBUG(tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED);
             fgMorphTreeDone(tree);
             return tree;
         }

From d28553af24a5a510a04bf2bdee9633df047b5ab4 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Thu, 18 Jul 2024 18:17:23 +0900
Subject: [PATCH 35/75] Remove redundant jit-ee calls

---
 src/coreclr/jit/objectalloc.cpp |  7 +--
 src/coreclr/jit/objectalloc.h   | 80 ++++++++++++++++++++-------------
 2 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index b84a9b7b809a5c..31e8fa24574c49 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -468,8 +468,9 @@ bool ObjectAllocator::MorphAllocObjNodes()
                             onHeapReason = "[non-constant size]";
                             canStack     = false;
                         }
-                        else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, (unsigned int)len->AsIntCon()->IconValue(),
-                                                           &blockSize, &onHeapReason))
+                        else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, allocType,
+                                                           (unsigned int)len->AsIntCon()->IconValue(), &blockSize,
+                                                           &onHeapReason))
                         {
                             // reason set by the call
                             canStack = false;
@@ -511,7 +512,7 @@ bool ObjectAllocator::MorphAllocObjNodes()
                             comp->Metrics.NewRefClassHelperCalls++;
                         }
 
-                        if (!CanAllocateLclVarOnStack(lclNum, clsHnd, 0, nullptr, &onHeapReason))
+                        if (!CanAllocateLclVarOnStack(lclNum, clsHnd, allocType, 0, nullptr, &onHeapReason))
                         {
                             // reason set by the call
                             canStack = false;
diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h
index 4b1f77895d630d..9781205ae7583b 100644
--- a/src/coreclr/jit/objectalloc.h
+++ b/src/coreclr/jit/objectalloc.h
@@ -55,6 +55,7 @@ class ObjectAllocator final : public Phase
 private:
     bool         CanAllocateLclVarOnStack(unsigned int         lclNum,
                                           CORINFO_CLASS_HANDLE clsHnd,
+                                          ObjectAllocationType allocType,
                                           unsigned int         length,
                                           unsigned int*        blockSize,
                                           const char**         reason);
@@ -126,15 +127,22 @@ inline void ObjectAllocator::EnableObjectStackAllocation()
 //                           allocated on the stack.
 //
 // Arguments:
-//    lclNum   - Local variable number
-//    clsHnd   - Class/struct handle of the variable class
-//    reason  - [out, required] if result is false, reason why
+//    lclNum    - Local variable number
+//    clsHnd    - Class/struct handle of the variable class
+//    allocType - Type of allocation (newobj or newarr)
+//    length    - Length of the array (for newarr)
+//    blockSize - [out, optional] exact size of the object
+//    reason    - [out, required] if result is false, reason why
 //
 // Return Value:
 //    Returns true iff local variable can be allocated on the stack.
 //
-inline bool ObjectAllocator::CanAllocateLclVarOnStack(
-    unsigned int lclNum, CORINFO_CLASS_HANDLE clsHnd, unsigned int length, unsigned int* blockSize, const char** reason)
+inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int         lclNum,
+                                                      CORINFO_CLASS_HANDLE clsHnd,
+                                                      ObjectAllocationType allocType,
+                                                      unsigned int         length,
+                                                      unsigned int*        blockSize,
+                                                      const char**         reason)
 {
     assert(m_AnalysisDone);
 
@@ -151,23 +159,7 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(
 
     unsigned int classSize = 0;
 
-    if (comp->info.compCompHnd->isValueClass(clsHnd))
-    {
-        if (!enableBoxedValueClasses)
-        {
-            *reason = "[disabled by config]";
-            return false;
-        }
-
-        if (comp->info.compCompHnd->getTypeForBoxOnStack(clsHnd) == NO_CLASS_HANDLE)
-        {
-            *reason = "[no boxed type available]";
-            return false;
-        }
-
-        classSize = comp->info.compCompHnd->getClassSize(clsHnd);
-    }
-    else if (comp->info.compCompHnd->isSDArray(clsHnd))
+    if (allocType == OAT_NEWARR)
     {
         if (!enableArrays)
         {
@@ -188,21 +180,45 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(
         unsigned elemSize = elemLayout != nullptr ? elemLayout->GetSize() : genTypeSize(type);
         classSize         = (unsigned int)OFFSETOF__CORINFO_Array__data + elemSize * length;
     }
-    else
+    else if (allocType == OAT_NEWOBJ)
     {
-        if (!enableRefClasses)
+        if (comp->info.compCompHnd->isValueClass(clsHnd))
         {
-            *reason = "[disabled by config]";
-            return false;
+            if (!enableBoxedValueClasses)
+            {
+                *reason = "[disabled by config]";
+                return false;
+            }
+
+            if (comp->info.compCompHnd->getTypeForBoxOnStack(clsHnd) == NO_CLASS_HANDLE)
+            {
+                *reason = "[no boxed type available]";
+                return false;
+            }
+
+            classSize = comp->info.compCompHnd->getClassSize(clsHnd);
         }
-
-        if (!comp->info.compCompHnd->canAllocateOnStack(clsHnd))
+        else
         {
-            *reason = "[runtime disallows]";
-            return false;
+            if (!enableRefClasses)
+            {
+                *reason = "[disabled by config]";
+                return false;
+            }
+
+            if (!comp->info.compCompHnd->canAllocateOnStack(clsHnd))
+            {
+                *reason = "[runtime disallows]";
+                return false;
+            }
+
+            classSize = comp->info.compCompHnd->getHeapClassSize(clsHnd);
         }
-
-        classSize = comp->info.compCompHnd->getHeapClassSize(clsHnd);
+    }
+    else
+    {
+        assert(!"Unexpected allocation type");
+        return false;
     }
 
     if (classSize > s_StackAllocMaxSize)

From c21c4f7b93fb54a6f21d48ef9a7ed12136e6bc79 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Thu, 18 Jul 2024 20:52:36 +0900
Subject: [PATCH 36/75] Fix assertion again

---
 src/coreclr/jit/gentree.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 47d1cc5014aa36..d89b83519519e2 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -17052,9 +17052,11 @@ GenTree* Compiler::gtWrapWithSideEffects(GenTree*     tree,
         // properly for this particular case. For now, caller is responsible for avoiding such cases.
 
 #ifdef DEBUG
-        // The side effects may be a throw created by local morph,
+        // The side effects may be range check fail created by local morph,
         // just remove the morphed flag to avoid unnecessary assertions.
-        if (fgIsThrow(sideEffects) && sideEffects->AsCall()->gtArgs.GetUserArgByIndex(0)->GetLateNode()->IsCnsIntOrI())
+        if (sideEffects->IsHelperCall(this, CORINFO_HELP_RNGCHKFAIL) &&
+            sideEffects->AsCall()->gtArgs.CountUserArgs() == 1 &&
+            sideEffects->AsCall()->gtArgs.GetUserArgByIndex(0)->GetLateNode()->IsCnsIntOrI())
         {
             sideEffects->AsCall()->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
             sideEffects->AsCall()->gtArgs.GetUserArgByIndex(0)->GetLateNode()->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;

From 18ec558512303495b26cef10961b1ec7ed5d826e Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Thu, 18 Jul 2024 21:21:16 +0900
Subject: [PATCH 37/75] Check array length

---
 src/coreclr/jit/objectalloc.cpp |  5 ++---
 src/coreclr/jit/objectalloc.h   | 12 +++++++++---
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 31e8fa24574c49..2b1a604b7d8eb6 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -468,9 +468,8 @@ bool ObjectAllocator::MorphAllocObjNodes()
                             onHeapReason = "[non-constant size]";
                             canStack     = false;
                         }
-                        else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, allocType,
-                                                           (unsigned int)len->AsIntCon()->IconValue(), &blockSize,
-                                                           &onHeapReason))
+                        else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, allocType, len->AsIntCon()->IconValue(),
+                                                           &blockSize, &onHeapReason))
                         {
                             // reason set by the call
                             canStack = false;
diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h
index 9781205ae7583b..86e1b697be4628 100644
--- a/src/coreclr/jit/objectalloc.h
+++ b/src/coreclr/jit/objectalloc.h
@@ -56,7 +56,7 @@ class ObjectAllocator final : public Phase
     bool         CanAllocateLclVarOnStack(unsigned int         lclNum,
                                           CORINFO_CLASS_HANDLE clsHnd,
                                           ObjectAllocationType allocType,
-                                          unsigned int         length,
+                                          ssize_t              length,
                                           unsigned int*        blockSize,
                                           const char**         reason);
     bool         CanLclVarEscape(unsigned int lclNum);
@@ -140,7 +140,7 @@ inline void ObjectAllocator::EnableObjectStackAllocation()
 inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int         lclNum,
                                                       CORINFO_CLASS_HANDLE clsHnd,
                                                       ObjectAllocationType allocType,
-                                                      unsigned int         length,
+                                                      ssize_t              length,
                                                       unsigned int*        blockSize,
                                                       const char**         reason)
 {
@@ -167,6 +167,12 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int         lclNu
             return false;
         }
 
+        if (length < 0 || !FitsIn<int>(length))
+        {
+            *reason = "[invalid array length]";
+            return false;
+        }
+
         CORINFO_CLASS_HANDLE elemClsHnd = NO_CLASS_HANDLE;
         CorInfoType          corType    = comp->info.compCompHnd->getChildType(clsHnd, &elemClsHnd);
         var_types            type       = JITtype2varType(corType);
@@ -178,7 +184,7 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int         lclNu
         }
 
         unsigned elemSize = elemLayout != nullptr ? elemLayout->GetSize() : genTypeSize(type);
-        classSize         = (unsigned int)OFFSETOF__CORINFO_Array__data + elemSize * length;
+        classSize         = (unsigned int)OFFSETOF__CORINFO_Array__data + elemSize * static_cast<unsigned int>(length);
     }
     else if (allocType == OAT_NEWOBJ)
     {

From eadb4ad5c7271bd6eb4f247fa4bb83da828efc27 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Thu, 18 Jul 2024 23:35:38 +0900
Subject: [PATCH 38/75] Fix assertion in another way

---
 src/coreclr/jit/gentree.cpp | 28 +++++++++++++---------------
 src/coreclr/jit/morph.cpp   |  1 -
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index d89b83519519e2..5d9655af408c25 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -14385,10 +14385,15 @@ GenTree* Compiler::gtFoldExprSpecial(GenTree* tree)
 
     if (fgGlobalMorph)
     {
+#ifdef DEBUG
         // We can sometimes produce a comma over the constant if the original op
         // had a side effect, so just ensure we set the flag (which will be already
         // set for the operands otherwise).
-        INDEBUG(op->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
+        if (op->OperIs(GT_COMMA) && (op->AsOp()->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0)
+        {
+            op->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+        }
+#endif
     }
     return op;
 }
@@ -14612,10 +14617,15 @@ GenTree* Compiler::gtFoldExprSpecialFloating(GenTree* tree)
 
     if (fgGlobalMorph)
     {
+#ifdef DEBUG
         // We can sometimes produce a comma over the constant if the original op
         // had a side effect, so just ensure we set the flag (which will be already
         // set for the operands otherwise).
-        INDEBUG(op->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
+        if (op->OperIs(GT_COMMA) && (op->AsOp()->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0)
+        {
+            op->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+        }
+#endif
     }
     return op;
 }
@@ -17051,19 +17061,7 @@ GenTree* Compiler::gtWrapWithSideEffects(GenTree*     tree,
         // It should be possible to be smarter here and allow such cases by extracting the side effects
         // properly for this particular case. For now, caller is responsible for avoiding such cases.
 
-#ifdef DEBUG
-        // The side effects may be range check fail created by local morph,
-        // just remove the morphed flag to avoid unnecessary assertions.
-        if (sideEffects->IsHelperCall(this, CORINFO_HELP_RNGCHKFAIL) &&
-            sideEffects->AsCall()->gtArgs.CountUserArgs() == 1 &&
-            sideEffects->AsCall()->gtArgs.GetUserArgByIndex(0)->GetLateNode()->IsCnsIntOrI())
-        {
-            sideEffects->AsCall()->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
-            sideEffects->AsCall()->gtArgs.GetUserArgByIndex(0)->GetLateNode()->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
-        }
-#endif
-
-        tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), sideEffects, tree);
+        tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), gtCloneExpr(sideEffects), tree);
     }
     return tree;
 }
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index c47e42e84f6268..1045d96258bd61 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -8272,7 +8272,6 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
         if (fgIsCommaThrow(tree))
         {
             tree->AsOp()->gtOp1 = fgMorphTree(tree->AsOp()->gtOp1);
-            INDEBUG(tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED);
             fgMorphTreeDone(tree);
             return tree;
         }

From 9d4021cdc0661960ad42be40bdea563e4c3117d3 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Fri, 19 Jul 2024 00:29:07 +0900
Subject: [PATCH 39/75] Unset the flag to avoid unnecessary assert

---
 src/coreclr/jit/gentree.cpp | 16 +++-------------
 src/coreclr/jit/morph.cpp   |  9 +++++++++
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 5d9655af408c25..759dde9b584fb9 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -14385,15 +14385,10 @@ GenTree* Compiler::gtFoldExprSpecial(GenTree* tree)
 
     if (fgGlobalMorph)
     {
-#ifdef DEBUG
         // We can sometimes produce a comma over the constant if the original op
         // had a side effect, so just ensure we set the flag (which will be already
         // set for the operands otherwise).
-        if (op->OperIs(GT_COMMA) && (op->AsOp()->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0)
-        {
-            op->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
-        }
-#endif
+        INDEBUG(op->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
     }
     return op;
 }
@@ -14617,15 +14612,10 @@ GenTree* Compiler::gtFoldExprSpecialFloating(GenTree* tree)
 
     if (fgGlobalMorph)
     {
-#ifdef DEBUG
         // We can sometimes produce a comma over the constant if the original op
         // had a side effect, so just ensure we set the flag (which will be already
         // set for the operands otherwise).
-        if (op->OperIs(GT_COMMA) && (op->AsOp()->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0)
-        {
-            op->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
-        }
-#endif
+        INDEBUG(op->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
     }
     return op;
 }
@@ -17061,7 +17051,7 @@ GenTree* Compiler::gtWrapWithSideEffects(GenTree*     tree,
         // It should be possible to be smarter here and allow such cases by extracting the side effects
         // properly for this particular case. For now, caller is responsible for avoiding such cases.
 
-        tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), gtCloneExpr(sideEffects), tree);
+        tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), sideEffects, tree);
     }
     return tree;
 }
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 1045d96258bd61..8baaa3e7614325 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -2377,6 +2377,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
     {
         for (CallArg& arg : call->gtArgs.LateArgs())
         {
+            INDEBUG(arg.GetLateNode()->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED);
             arg.SetLateNode(fgMorphTree(arg.GetLateNode()));
             flagsSummary |= arg.GetLateNode()->gtFlags;
         }
@@ -8272,6 +8273,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
         if (fgIsCommaThrow(tree))
         {
             tree->AsOp()->gtOp1 = fgMorphTree(tree->AsOp()->gtOp1);
+            INDEBUG(tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED);
             fgMorphTreeDone(tree);
             return tree;
         }
@@ -12147,8 +12149,15 @@ GenTree* Compiler::fgMorphTree(GenTree* tree, MorphAddrContext* mac)
 
     if (fgGlobalMorph)
     {
+#ifdef DEBUG
+        /* A RNGCHKFAIL may created by local morph for stack allocated arrays and causes remorph */
+        if (tree->IsHelperCall(this, CORINFO_HELP_OVERFLOW))
+        {
+            tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
+        }
         /* Ensure that we haven't morphed this node already */
         assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
+#endif
 
         /* Before morphing the tree, we try to propagate any active assertions */
         if (optLocalAssertionProp)

From 1fff53e3d270c5920b88032492621a350971418b Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Fri, 19 Jul 2024 17:03:41 +0900
Subject: [PATCH 40/75] Add tests

---
 .../ObjectStackAllocationTests.cs             | 121 +++++++++++++++---
 1 file changed, 103 insertions(+), 18 deletions(-)

diff --git a/src/tests/JIT/opt/ObjectStackAllocation/ObjectStackAllocationTests.cs b/src/tests/JIT/opt/ObjectStackAllocation/ObjectStackAllocationTests.cs
index 33303daf7a07b2..07114d7ebd9b7c 100644
--- a/src/tests/JIT/opt/ObjectStackAllocation/ObjectStackAllocationTests.cs
+++ b/src/tests/JIT/opt/ObjectStackAllocation/ObjectStackAllocationTests.cs
@@ -156,6 +156,8 @@ public static int TestEntryPoint()
             // Stack allocation of boxed structs is now enabled
             CallTestAndVerifyAllocation(BoxSimpleStructAndAddFields, 12, expectedAllocationKind);
 
+            CallTestAndVerifyAllocation(AllocateArrayWithNonGCElements, 84, expectedAllocationKind);
+
             // The remaining tests currently never allocate on the stack
             if (expectedAllocationKind == AllocationKind.Stack) {
                 expectedAllocationKind = AllocationKind.Heap;
@@ -167,6 +169,20 @@ public static int TestEntryPoint()
             // This test calls CORINFO_HELP_CHKCASTCLASS_SPECIAL
             CallTestAndVerifyAllocation(AllocateSimpleClassAndCast, 7, expectedAllocationKind);
 
+            CallTestAndVerifyAllocation(AllocateArrayWithNonGCElementsEscape, 42, expectedAllocationKind);
+
+            // This test calls CORINFO_HELP_OVERFLOW
+            CallTestAndVerifyAllocation(AllocateArrayWithNonGCElementsOutOfRangeLeft, 0, expectedAllocationKind, true);
+
+            // This test calls CORINFO_HELP_OVERFLOW
+            CallTestAndVerifyAllocation(AllocateArrayWithNonGCElementsOutOfRangeRight, 0, expectedAllocationKind, true);
+
+            // This test calls CORINFO_HELP_ARTHEMIC_OVERFLOW
+            CallTestAndVerifyAllocation(AllocateNegativeLengthArrayWithNonGCElements, 0, expectedAllocationKind, true);
+
+            // This test calls CORINFO_HELP_ARTHEMIC_OVERFLOW
+            CallTestAndVerifyAllocation(AllocateLongLengthArrayWithNonGCElements, 0, expectedAllocationKind, true);
+
             return methodResult;
         }
 
@@ -175,27 +191,38 @@ static bool GCStressEnabled()
             return Environment.GetEnvironmentVariable("DOTNET_GCStress") != null;
         }
 
-        static void CallTestAndVerifyAllocation(Test test, int expectedResult, AllocationKind expectedAllocationsKind)
+        static void CallTestAndVerifyAllocation(Test test, int expectedResult, AllocationKind expectedAllocationsKind, bool throws = false)
         {
-            long allocatedBytesBefore = GC.GetAllocatedBytesForCurrentThread();
-            int testResult = test();
-            long allocatedBytesAfter = GC.GetAllocatedBytesForCurrentThread();
             string methodName = test.Method.Name;
-
-            if (testResult != expectedResult) {
-                Console.WriteLine($"FAILURE ({methodName}): expected {expectedResult}, got {testResult}");
-                methodResult = -1;
-            }
-            else if ((expectedAllocationsKind == AllocationKind.Stack) && (allocatedBytesBefore != allocatedBytesAfter)) {
-                Console.WriteLine($"FAILURE ({methodName}): unexpected allocation of {allocatedBytesAfter - allocatedBytesBefore} bytes");
-                methodResult = -1;
-            }
-            else if ((expectedAllocationsKind == AllocationKind.Heap) && (allocatedBytesBefore == allocatedBytesAfter)) {
-                Console.WriteLine($"FAILURE ({methodName}): unexpected stack allocation");
-                methodResult = -1;
+            try
+            {
+                long allocatedBytesBefore = GC.GetAllocatedBytesForCurrentThread();
+                int testResult = test();
+                long allocatedBytesAfter = GC.GetAllocatedBytesForCurrentThread();
+
+                if (testResult != expectedResult) {
+                    Console.WriteLine($"FAILURE ({methodName}): expected {expectedResult}, got {testResult}");
+                    methodResult = -1;
+                }
+                else if ((expectedAllocationsKind == AllocationKind.Stack) && (allocatedBytesBefore != allocatedBytesAfter)) {
+                    Console.WriteLine($"FAILURE ({methodName}): unexpected allocation of {allocatedBytesAfter - allocatedBytesBefore} bytes");
+                    methodResult = -1;
+                }
+                else if ((expectedAllocationsKind == AllocationKind.Heap) && (allocatedBytesBefore == allocatedBytesAfter)) {
+                    Console.WriteLine($"FAILURE ({methodName}): unexpected stack allocation");
+                    methodResult = -1;
+                }
+                else {
+                    Console.WriteLine($"SUCCESS ({methodName})");
+                }
             }
-            else {
-                Console.WriteLine($"SUCCESS ({methodName})");
+            catch {
+                if (throws) {
+                    Console.WriteLine($"SUCCESS ({methodName})");
+                }
+                else {
+                    throw;
+                }
             }
         }
 
@@ -339,6 +366,64 @@ static int AllocateClassWithGcFieldAndInt()
             return c.i;
         }
 
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AllocateArrayWithNonGCElements()
+        {
+            int[] array = new int[42];
+            array[24] = 42;
+            GC.Collect();
+            return array[24] + array.Length;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AllocateArrayWithNonGCElementsEscape()
+        {
+            int[] array = new int[42];
+            Use(ref array[24]);
+            GC.Collect();
+            return array[24];
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AllocateArrayWithNonGCElementsOutOfRangeRight()
+        {
+            int[] array = new int[42];
+            array[43] = 42;
+            GC.Collect();
+            return 1;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AllocateArrayWithNonGCElementsOutOfRangeLeft()
+        {
+            int[] array = new int[42];
+            array[-1] = 42;
+            GC.Collect();
+            return 1;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AllocateNegativeLengthArrayWithNonGCElements()
+        {
+            int[] array = new int["".Length - 2];
+            GC.Collect();
+            return 1;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AllocateLongLengthArrayWithNonGCElements()
+        {
+            int[] array = new int[long.MaxValue];
+            GC.Collect();
+            return 1;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static void Use(ref int v)
+        {
+            v = 42;
+        }
+
         [MethodImpl(MethodImplOptions.NoInlining)]
         private static void ZeroAllocTest()
         {

From d521a944015c707e18a9e2fbd56678e0b2689287 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Fri, 19 Jul 2024 19:08:09 +0900
Subject: [PATCH 41/75] sigh

---
 src/coreclr/jit/morph.cpp | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 8baaa3e7614325..b8983eb3a8713b 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -2377,7 +2377,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
     {
         for (CallArg& arg : call->gtArgs.LateArgs())
         {
-            INDEBUG(arg.GetLateNode()->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED);
             arg.SetLateNode(fgMorphTree(arg.GetLateNode()));
             flagsSummary |= arg.GetLateNode()->gtFlags;
         }
@@ -8269,11 +8268,20 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
             return tree;
         }
 
+#ifdef DEBUG
+        if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0)
+        {
+            return tree;
+        }
+#endif
+
         /* If we created a comma-throw tree then we need to morph op1 */
         if (fgIsCommaThrow(tree))
         {
-            tree->AsOp()->gtOp1 = fgMorphTree(tree->AsOp()->gtOp1);
-            INDEBUG(tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED);
+            INDEBUG(if ((tree->AsOp()->gtOp1->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0))
+            {
+                tree->AsOp()->gtOp1 = fgMorphTree(tree->AsOp()->gtOp1);
+            }
             fgMorphTreeDone(tree);
             return tree;
         }
@@ -12149,15 +12157,8 @@ GenTree* Compiler::fgMorphTree(GenTree* tree, MorphAddrContext* mac)
 
     if (fgGlobalMorph)
     {
-#ifdef DEBUG
-        /* A RNGCHKFAIL may created by local morph for stack allocated arrays and causes remorph */
-        if (tree->IsHelperCall(this, CORINFO_HELP_OVERFLOW))
-        {
-            tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
-        }
         /* Ensure that we haven't morphed this node already */
         assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
-#endif
 
         /* Before morphing the tree, we try to propagate any active assertions */
         if (optLocalAssertionProp)

From 97ee2bec3d262a8618c92cdfbfb69ef750da2625 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Sat, 20 Jul 2024 01:38:53 +0900
Subject: [PATCH 42/75] Support R2R/NativeAOT

---
 src/coreclr/jit/objectalloc.cpp | 59 ++++++++++++++++++++++++++++-----
 1 file changed, 50 insertions(+), 9 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 2b1a604b7d8eb6..12ce40685bd76a 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -408,10 +408,24 @@ bool ObjectAllocator::MorphAllocObjNodes()
                 {
                     allocType = OAT_NEWOBJ;
                 }
-                else if (data->IsHelperCall(comp, CORINFO_HELP_NEWARR_1_VC) &&
-                         data->AsCall()->gtArgs.GetUserArgByIndex(0)->GetNode()->IsCnsIntOrI())
+                else if (data->IsHelperCall())
                 {
-                    allocType = OAT_NEWARR;
+                    switch (data->AsCall()->GetHelperNum())
+                    {
+                        case CORINFO_HELP_NEWARR_1_VC:
+                        case CORINFO_HELP_NEWARR_1_OBJ:
+                        case CORINFO_HELP_NEWARR_1_DIRECT:
+                        case CORINFO_HELP_NEWARR_1_ALIGN8:
+#ifdef FEATURE_READYTORUN
+                        case CORINFO_HELP_READYTORUN_NEWARR_1:
+#endif
+                        {
+                            if (data->AsCall()->gtArgs.GetArgByIndex(0)->GetNode()->IsCnsIntOrI())
+                            {
+                                allocType = OAT_NEWARR;
+                            }
+                        }
+                    }
                 }
             }
 
@@ -437,25 +451,42 @@ bool ObjectAllocator::MorphAllocObjNodes()
                 {
                     if (allocType == OAT_NEWARR)
                     {
-                        assert(data->AsCall()->GetHelperNum() == CORINFO_HELP_NEWARR_1_VC);
+                        assert(basicBlockHasNewArr);
                         //------------------------------------------------------------------------
                         // We expect the following expression tree at this point
+                        // For non-ReadyToRun:
                         //  STMTx (IL 0x... ???)
                         //    * STORE_LCL_VAR   ref
-                        //    \--*  CALL help  ref    CORINFO_HELP_NEWARR_1_VC
+                        //    \--*  CALL help  ref
                         //       +--*  CNS_INT(h) long
                         //       \--*  CNS_INT long
+                        // For ReadyToRun:
+                        //  STMTx (IL 0x... ???)
+                        //    * STORE_LCL_VAR   ref
+                        //    \--*  CALL help  ref
+                        //       \--*  CNS_INT long
                         //------------------------------------------------------------------------
 
                         CallArg*             arg       = data->AsCall()->gtArgs.GetArgByIndex(0);
-                        GenTree*             node      = arg->GetNode();
                         bool                 isExact   = false;
                         bool                 isNonNull = false;
                         CORINFO_CLASS_HANDLE clsHnd =
                             comp->gtGetHelperCallClassHandle(data->AsCall(), &isExact, &isNonNull);
-                        GenTree*     len       = arg->GetNext()->GetNode();
-                        unsigned int blockSize = 0;
+                        GenTree* len = nullptr;
+#ifdef FEATURE_READYTORUN
+                        if (comp->opts.IsReadyToRun() && data->IsHelperCall(comp, CORINFO_HELP_READYTORUN_NEWARR_1))
+                        {
+                            len = arg->GetNode();
+                        }
+                        else
+#endif
+                        {
+                            len = arg->GetNext()->GetNode();
+                        }
+
+                        assert(len != nullptr);
 
+                        unsigned int blockSize = 0;
                         comp->Metrics.NewArrayHelperCalls++;
 
                         if (!isExact || !isNonNull)
@@ -659,17 +690,27 @@ unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall*
     assert(newArr != nullptr);
     assert(m_AnalysisDone);
     assert(clsHnd != NO_CLASS_HANDLE);
+    assert(newArr->IsHelperCall());
+    assert(newArr->GetHelperNum() != CORINFO_HELP_NEWARR_1_MAYBEFROZEN);
+#ifdef FEATURE_READYTORUN
+    assert(newArr->GetHelperNum() == CORINFO_HELP_READYTORUN_NEWARR_1 || !comp->opts.IsReadyToRun());
+#endif
 
     const bool         shortLifetime = false;
+    const bool         alignTo8      = newArr->GetHelperNum() == CORINFO_HELP_NEWARR_1_ALIGN8;
     const unsigned int lclNum        = comp->lvaGrabTemp(shortLifetime DEBUGARG("stack allocated array temp"));
 
+    if (alignTo8)
+    {
+        blockSize = AlignUp(blockSize, TARGET_POINTER_SIZE);
+    }
+
     comp->lvaSetStruct(lclNum, comp->typGetBlkLayout(blockSize), /* unsafeValueClsCheck */ false);
 
     // Initialize the object memory if necessary.
     bool             bbInALoop  = block->HasFlag(BBF_BACKWARD_JUMP);
     bool             bbIsReturn = block->KindIs(BBJ_RETURN);
     LclVarDsc* const lclDsc     = comp->lvaGetDesc(lclNum);
-    lclDsc->lvStackAllocatedBox = false;
     if (comp->fgVarNeedsExplicitZeroInit(lclNum, bbInALoop, bbIsReturn))
     {
         //------------------------------------------------------------------------

From 5bcb78606802d1714755715ffb392e1c52c54d56 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Sat, 20 Jul 2024 02:21:31 +0900
Subject: [PATCH 43/75] Fix building

---
 src/coreclr/jit/objectalloc.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 12ce40685bd76a..90b19ed07e19d6 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -424,6 +424,11 @@ bool ObjectAllocator::MorphAllocObjNodes()
                             {
                                 allocType = OAT_NEWARR;
                             }
+                            break;
+                        }
+                        default:
+                        {
+                            break;
                         }
                     }
                 }

From a01562e0737a0d4cdd6ccbd882806ca3c76cb4fb Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Sat, 20 Jul 2024 02:24:27 +0900
Subject: [PATCH 44/75] cleanup

---
 src/coreclr/jit/objectalloc.cpp | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 90b19ed07e19d6..5518455741d2f8 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -416,16 +416,25 @@ bool ObjectAllocator::MorphAllocObjNodes()
                         case CORINFO_HELP_NEWARR_1_OBJ:
                         case CORINFO_HELP_NEWARR_1_DIRECT:
                         case CORINFO_HELP_NEWARR_1_ALIGN8:
+                        {
+                            if (data->AsCall()->gtArgs.CountArgs() == 2 &&
+                                data->AsCall()->gtArgs.GetArgByIndex(1)->GetNode()->IsCnsIntOrI())
+                            {
+                                allocType = OAT_NEWARR;
+                            }
+                            break;
+                        }
 #ifdef FEATURE_READYTORUN
                         case CORINFO_HELP_READYTORUN_NEWARR_1:
-#endif
                         {
-                            if (data->AsCall()->gtArgs.GetArgByIndex(0)->GetNode()->IsCnsIntOrI())
+                            if (data->AsCall()->gtArgs.CountArgs() == 1 &&
+                                data->AsCall()->gtArgs.GetArgByIndex(0)->GetNode()->IsCnsIntOrI())
                             {
                                 allocType = OAT_NEWARR;
                             }
                             break;
                         }
+#endif
                         default:
                         {
                             break;
@@ -472,7 +481,6 @@ bool ObjectAllocator::MorphAllocObjNodes()
                         //       \--*  CNS_INT long
                         //------------------------------------------------------------------------
 
-                        CallArg*             arg       = data->AsCall()->gtArgs.GetArgByIndex(0);
                         bool                 isExact   = false;
                         bool                 isNonNull = false;
                         CORINFO_CLASS_HANDLE clsHnd =
@@ -481,12 +489,12 @@ bool ObjectAllocator::MorphAllocObjNodes()
 #ifdef FEATURE_READYTORUN
                         if (comp->opts.IsReadyToRun() && data->IsHelperCall(comp, CORINFO_HELP_READYTORUN_NEWARR_1))
                         {
-                            len = arg->GetNode();
+                            len = data->AsCall()->gtArgs.GetArgByIndex(0)->GetNode();
                         }
                         else
 #endif
                         {
-                            len = arg->GetNext()->GetNode();
+                            len = data->AsCall()->gtArgs.GetArgByIndex(1)->GetNode();
                         }
 
                         assert(len != nullptr);

From e728d4ff945fad1f9bd16becd9d41b02bd3de4f5 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Sat, 20 Jul 2024 03:29:41 +0900
Subject: [PATCH 45/75] remove invalid assert

---
 src/coreclr/jit/objectalloc.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 5518455741d2f8..0636f912f41977 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -705,9 +705,6 @@ unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall*
     assert(clsHnd != NO_CLASS_HANDLE);
     assert(newArr->IsHelperCall());
     assert(newArr->GetHelperNum() != CORINFO_HELP_NEWARR_1_MAYBEFROZEN);
-#ifdef FEATURE_READYTORUN
-    assert(newArr->GetHelperNum() == CORINFO_HELP_READYTORUN_NEWARR_1 || !comp->opts.IsReadyToRun());
-#endif
 
     const bool         shortLifetime = false;
     const bool         alignTo8      = newArr->GetHelperNum() == CORINFO_HELP_NEWARR_1_ALIGN8;

From d73c5c5723a5ff2b2415c8472576b50f62b4497d Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Sat, 20 Jul 2024 03:37:41 +0900
Subject: [PATCH 46/75] double align on 32bit platform

---
 src/coreclr/jit/objectalloc.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 0636f912f41977..7c49d00ce3cae9 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -741,6 +741,10 @@ unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall*
         comp->compSuppressedZeroInit = true;
     }
 
+#ifndef TARGET_64BIT
+    lclDsc->lvStructDoubleAlign = alignTo8;
+#endif
+
     // Initialize the vtable slot.
     //
     //------------------------------------------------------------------------

From c9fea239be739d24a247020e609fcd97cb49c84f Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Sat, 20 Jul 2024 03:39:40 +0900
Subject: [PATCH 47/75] Use correct alignment for align8

---
 src/coreclr/jit/objectalloc.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 7c49d00ce3cae9..421ce964bbd81c 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -712,7 +712,7 @@ unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall*
 
     if (alignTo8)
     {
-        blockSize = AlignUp(blockSize, TARGET_POINTER_SIZE);
+        blockSize = AlignUp(blockSize, 8);
     }
 
     comp->lvaSetStruct(lclNum, comp->typGetBlkLayout(blockSize), /* unsafeValueClsCheck */ false);

From 772bee6104d56407356ddc81fe7860283c1ae3fa Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Sat, 20 Jul 2024 19:25:59 +0900
Subject: [PATCH 48/75] Fix intrinsic expansion

---
 src/coreclr/jit/importercalls.cpp | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp
index 0d2045d553c826..0576c25fa5eeff 100644
--- a/src/coreclr/jit/importercalls.cpp
+++ b/src/coreclr/jit/importercalls.cpp
@@ -2579,12 +2579,28 @@ GenTree* Compiler::impInitializeArrayIntrinsic(CORINFO_SIG_INFO* sig)
     }
 
     //
-    // We start by looking at the last statement, making sure it's a store, and
-    // that the target of the store is the array passed to InitializeArray.
+    // We start by looking at the last statement, making sure it's a store.
     //
     GenTree* arrayLocalStore = impLastStmt->GetRootNode();
-    if (!arrayLocalStore->OperIs(GT_STORE_LCL_VAR) || !arrayLocalNode->OperIs(GT_LCL_VAR) ||
-        (arrayLocalStore->AsLclVar()->GetLclNum() != arrayLocalNode->AsLclVar()->GetLclNum()))
+    if (arrayLocalStore->OperIs(GT_STORE_LCL_VAR) && arrayLocalNode->OperIs(GT_LCL_VAR))
+    {
+        // Make sure the target of the store is the array passed to InitializeArray.
+        if (arrayLocalStore->AsLclVar()->GetLclNum() != arrayLocalNode->AsLclVar()->GetLclNum())
+        {
+            // The array can be spilled to a temp for stack allocation.
+            // Try getting the actual store node from the previous statement.
+            if (arrayLocalStore->AsLclVar()->Data()->OperIs(GT_LCL_VAR) && impLastStmt->GetPrevStmt() != nullptr)
+            {
+                arrayLocalStore = impLastStmt->GetPrevStmt()->GetRootNode();
+                if (!arrayLocalStore->OperIs(GT_STORE_LCL_VAR) ||
+                    arrayLocalStore->AsLclVar()->GetLclNum() != arrayLocalNode->AsLclVar()->GetLclNum())
+                {
+                    return nullptr;
+                }
+            }
+        }
+    }
+    else
     {
         return nullptr;
     }

From 9c81c04698a51493a1fd5592729ef3c6852ecf89 Mon Sep 17 00:00:00 2001
From: Steve <hez2010@outlook.com>
Date: Sat, 21 Sep 2024 14:42:16 +0900
Subject: [PATCH 49/75] Address some review feedback

---
 src/coreclr/jit/gentree.h       | 1 +
 src/coreclr/jit/lclmorph.cpp    | 2 ++
 src/coreclr/jit/objectalloc.cpp | 2 +-
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h
index c2310f126f0334..43f20028c37934 100644
--- a/src/coreclr/jit/gentree.h
+++ b/src/coreclr/jit/gentree.h
@@ -7539,6 +7539,7 @@ struct GenTreeArrAddr : GenTreeUnOp
         , m_elemType(elemType)
         , m_firstElemOffset(firstElemOffset)
     {
+        assert(addr->TypeIs(TYP_BYREF, TYP_I_IMPL));
         assert(((elemType == TYP_STRUCT) && (elemClassHandle != NO_CLASS_HANDLE)) ||
                (elemClassHandle == NO_CLASS_HANDLE));
     }
diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp
index a7ef38d65bb686..a4d0fc483e3017 100644
--- a/src/coreclr/jit/lclmorph.cpp
+++ b/src/coreclr/jit/lclmorph.cpp
@@ -1109,11 +1109,13 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
                 assert(TopValue(1).Node() == node->gtGetOp1());
                 assert(TopValue(0).Node() == node->gtGetOp2());
 
+                // We only expect TopValue(1).IsAddress() to be true for stack allocated arrays
                 if (node->gtGetOp2()->IsCnsIntOrI() && TopValue(1).IsAddress())
                 {
                     ssize_t offset = node->AsIndexAddr()->gtElemOffset +
                                      node->gtGetOp2()->AsIntCon()->IconValue() * node->AsIndexAddr()->gtElemSize;
 
+                    // For stack allocated arrays the local size is the size of the entire array
                     if (!node->AsIndexAddr()->IsBoundsChecked() ||
                         (static_cast<ssize_t>(node->AsIndexAddr()->gtElemOffset) <= offset &&
                          offset < static_cast<ssize_t>(m_compiler->lvaLclExactSize(TopValue(1).LclNum()))))
diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 421ce964bbd81c..41ffe9291c1226 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -681,7 +681,7 @@ GenTree* ObjectAllocator::MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* alloc
 //
 // Arguments:
 //    newArr       - GT_CALL that will be replaced by helper call.
-//    clsHndclsHnd - class representing the type of the array
+//    clsHnd       - class representing the type of the array
 //    length       - length of the array
 //    blockSize    - size of the layout
 //    block        - a basic block where newArr is

From 6a7e6cbc5d82d86aab12027093552f00d3329a6f Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Fri, 4 Oct 2024 10:43:25 +0900
Subject: [PATCH 50/75] Revert workaround to fgMorphSmpOp

---
 src/coreclr/jit/morph.cpp | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 9ff541d1a7e68d..28acb28732edee 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -8355,20 +8355,10 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
             return tree;
         }
 
-#ifdef DEBUG
-        if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0)
-        {
-            return tree;
-        }
-#endif
-
         /* If we created a comma-throw tree then we need to morph op1 */
         if (fgIsCommaThrow(tree))
         {
-            INDEBUG(if ((tree->AsOp()->gtOp1->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0))
-            {
-                tree->AsOp()->gtOp1 = fgMorphTree(tree->AsOp()->gtOp1);
-            }
+            tree->AsOp()->gtOp1 = fgMorphTree(tree->AsOp()->gtOp1);
             fgMorphTreeDone(tree);
             return tree;
         }

From aeeb9e354a895c868f403aa26fd6c44168ce1c03 Mon Sep 17 00:00:00 2001
From: Steve <hez2010@outlook.com>
Date: Tue, 26 Nov 2024 17:50:51 +0900
Subject: [PATCH 51/75] Fix build failure

---
 src/coreclr/jit/jitconfigvalues.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h
index 189632b4f9d7c5..d3c7634f26b238 100644
--- a/src/coreclr/jit/jitconfigvalues.h
+++ b/src/coreclr/jit/jitconfigvalues.h
@@ -660,7 +660,7 @@ CONFIG_STRING(JitObjectStackAllocationRange, "JitObjectStackAllocationRange")
 RELEASE_CONFIG_INTEGER(JitObjectStackAllocation, "JitObjectStackAllocation", 1)
 RELEASE_CONFIG_INTEGER(JitObjectStackAllocationRefClass, "JitObjectStackAllocationRefClass", 1)
 RELEASE_CONFIG_INTEGER(JitObjectStackAllocationBoxedValueClass, "JitObjectStackAllocationBoxedValueClass", 1)
-RELEASE_CONFIG_INTEGER(JitObjectStackAllocationArray, W("JitObjectStackAllocationArray"), 1)
+RELEASE_CONFIG_INTEGER(JitObjectStackAllocationArray, "JitObjectStackAllocationArray", 1)
 
 RELEASE_CONFIG_INTEGER(JitEECallTimingInfo, "JitEECallTimingInfo", 0)
 

From 1914e806485b8b90ad031ca09607874e09e096e1 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Fri, 6 Dec 2024 23:35:37 +0900
Subject: [PATCH 52/75] Try fixing remorph issue

---
 src/coreclr/jit/compiler.h  |  2 +-
 src/coreclr/jit/gentree.cpp | 29 +++++++++++++++++++++++------
 src/coreclr/jit/morph.cpp   |  9 ++++++++-
 3 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 9090f19a761d31..16fcd7a02a6875 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -3747,7 +3747,7 @@ class Compiler
 
     //-------------------------------------------------------------------------
 
-    GenTree* gtFoldExpr(GenTree* tree);
+    GenTree* gtFoldExpr(GenTree* tree, bool* folded = nullptr);
     GenTree* gtFoldExprConst(GenTree* tree);
     GenTree* gtFoldIndirConst(GenTreeIndir* indir);
     GenTree* gtFoldExprSpecial(GenTree* tree);
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index ae55c80b13cf7c..80ce88eb7c494f 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -13572,10 +13572,13 @@ void Compiler::gtDispLIRNode(GenTree* node, const char* prefixMsg /* = nullptr *
  *  and call the methods to perform the folding
  */
 
-GenTree* Compiler::gtFoldExpr(GenTree* tree)
+GenTree* Compiler::gtFoldExpr(GenTree* tree, bool* folded)
 {
     unsigned kind = tree->OperKind();
 
+    if (folded != nullptr)
+        *folded = false;
+
     /* We must have a simple operation to fold */
 
     // If we're in CSE, it's not safe to perform tree
@@ -13595,13 +13598,19 @@ GenTree* Compiler::gtFoldExpr(GenTree* tree)
     {
         if (tree->OperIsConditional())
         {
-            return gtFoldExprConditional(tree);
+            GenTree* newTree = gtFoldExprConditional(tree);
+            if (folded != nullptr)
+                *folded = newTree != tree;
+            return newTree;
         }
 
 #if defined(FEATURE_HW_INTRINSICS)
         if (tree->OperIsHWIntrinsic())
         {
-            return gtFoldExprHWIntrinsic(tree->AsHWIntrinsic());
+            GenTree* newTree = gtFoldExprHWIntrinsic(tree->AsHWIntrinsic());
+            if (folded != nullptr)
+                *folded = newTree != tree;
+            return newTree;
         }
 #endif // FEATURE_HW_INTRINSICS
 
@@ -13629,6 +13638,7 @@ GenTree* Compiler::gtFoldExpr(GenTree* tree)
     {
         if (op1->OperIsConst())
         {
+            // constants folding results in a new tree that may be folded again, don't mark it as folded
             return gtFoldExprConst(tree);
         }
     }
@@ -13640,7 +13650,8 @@ GenTree* Compiler::gtFoldExpr(GenTree* tree)
         // one of their arguments is an address.
         if (op1->OperIsConst() && op2->OperIsConst() && !tree->OperIsAtomicOp())
         {
-            /* both nodes are constants - fold the expression */
+            // both nodes are constants - fold the expression
+            // constants folding results in a new tree that may be folded again, don't mark it as folded
             return gtFoldExprConst(tree);
         }
         else if (op1->OperIsConst() || op2->OperIsConst())
@@ -13655,13 +13666,19 @@ GenTree* Compiler::gtFoldExpr(GenTree* tree)
                 return tree;
             }
 
-            return gtFoldExprSpecial(tree);
+            GenTree* newTree = gtFoldExprSpecial(tree);
+            if (folded != nullptr)
+                *folded = newTree != tree;
+            return newTree;
         }
         else if (tree->OperIsCompare())
         {
             /* comparisons of two local variables can sometimes be folded */
 
-            return gtFoldExprCompare(tree);
+            GenTree* newTree = gtFoldExprCompare(tree);
+            if (folded != nullptr)
+                *folded = newTree != tree;
+            return newTree;
         }
     }
 
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index c0b7a6417b2980..b0ac4a65d81367 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -8305,10 +8305,17 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
     }
 
     // Try to fold it, maybe we get lucky,
-    tree = gtFoldExpr(tree);
+    bool folded;
+    tree = gtFoldExpr(tree, &folded);
 
     if (oldTree != tree)
     {
+        if (folded)
+        {
+            INDEBUG(tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
+            return tree;
+        }
+
         /* if gtFoldExpr returned op1 or op2 then we are done */
         if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2))
         {

From f671085c251aa4363a99d807845484027385aa7a Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Sat, 7 Dec 2024 02:49:30 +0900
Subject: [PATCH 53/75] Minimize asmdiff for tier-0

---
 src/coreclr/jit/importer.cpp      | 34 +++++++++++++++++++------------
 src/coreclr/jit/importercalls.cpp |  5 +++++
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index 26f461aa3274a8..d5d1d2f0acf101 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -9734,25 +9734,33 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 op1->AsCall()->compileTimeHelperArgumentHandle = (CORINFO_GENERIC_HANDLE)resolvedToken.hClass;
 
                 // Remember that this function contains 'new' of an SD array.
-                block->SetFlags(BBF_HAS_NEWARR);
                 optMethodFlags |= OMF_HAS_NEWARRAY;
+                block->SetFlags(BBF_HAS_NEWARR);
 
-                // We assign the newly allocated object (by a GT_CALL to newarr node)
-                // to a temp. Note that the pattern "temp = allocArr" is required
-                // by ObjectAllocator phase to be able to determine newarr nodes
-                // without exhaustive walk over all expressions.
-                lclNum = lvaGrabTemp(true DEBUGARG("NewArr temp"));
+                if (opts.OptimizationEnabled())
+                {
+                    // We assign the newly allocated object (by a GT_CALL to newarr node)
+                    // to a temp. Note that the pattern "temp = allocArr" is required
+                    // by ObjectAllocator phase to be able to determine newarr nodes
+                    // without exhaustive walk over all expressions.
+                    lclNum = lvaGrabTemp(true DEBUGARG("NewArr temp"));
 
-                impStoreToTemp(lclNum, op1, CHECK_SPILL_NONE);
+                    impStoreToTemp(lclNum, op1, CHECK_SPILL_NONE);
 
-                assert(lvaTable[lclNum].lvSingleDef == 0);
-                lvaTable[lclNum].lvSingleDef = 1;
-                JITDUMP("Marked V%02u as a single def local\n", lclNum);
-                lvaSetClass(lclNum, resolvedToken.hClass, true /* is Exact */);
+                    assert(lvaTable[lclNum].lvSingleDef == 0);
+                    lvaTable[lclNum].lvSingleDef = 1;
+                    JITDUMP("Marked V%02u as a single def local\n", lclNum);
+                    lvaSetClass(lclNum, resolvedToken.hClass, true /* is Exact */);
 
-                /* Push the result of the call on the stack */
+                    /* Push the result of the call on the stack */
 
-                impPushOnStack(gtNewLclvNode(lclNum, TYP_REF), tiRetVal);
+                    impPushOnStack(gtNewLclvNode(lclNum, TYP_REF), tiRetVal);
+                }
+                else
+                {
+                    /* Push the result of the call on the stack */
+                    impPushOnStack(op1, tiRetVal);
+                }
 
                 callTyp = TYP_REF;
             }
diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp
index 095d256cdd57e5..9f3b5642b4b537 100644
--- a/src/coreclr/jit/importercalls.cpp
+++ b/src/coreclr/jit/importercalls.cpp
@@ -2614,6 +2614,11 @@ GenTree* Compiler::impInitializeArrayIntrinsic(CORINFO_SIG_INFO* sig)
         // Make sure the target of the store is the array passed to InitializeArray.
         if (arrayLocalStore->AsLclVar()->GetLclNum() != arrayLocalNode->AsLclVar()->GetLclNum())
         {
+            if (opts.OptimizationDisabled())
+            {
+                return nullptr;
+            }
+
             // The array can be spilled to a temp for stack allocation.
             // Try getting the actual store node from the previous statement.
             if (arrayLocalStore->AsLclVar()->Data()->OperIs(GT_LCL_VAR) && impLastStmt->GetPrevStmt() != nullptr)

From d4fd6aec0f54b40ee2e02e3e56a4d526542f25b9 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Sat, 7 Dec 2024 02:50:50 +0900
Subject: [PATCH 54/75] Check CI

---
 src/coreclr/jit/objectalloc.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 8e152a2bfcbbff..f32a3bf8b55aaa 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -347,6 +347,7 @@ void ObjectAllocator::ComputeStackObjectPointers(BitVecTraits* bitVecTraits)
                     {
                         // Check if we know what is assigned to this pointer.
                         unsigned bitCount = BitVecOps::Count(bitVecTraits, m_ConnGraphAdjacencyMatrix[lclNum]);
+                        assert(bitCount <= 1);
                         if (bitCount > 0)
                         {
                             BitVecOps::Iter iter(bitVecTraits, m_ConnGraphAdjacencyMatrix[lclNum]);

From b3ff72d3f38fd34a3709f17805957c2625a32981 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Sat, 7 Dec 2024 16:55:04 +0900
Subject: [PATCH 55/75] Take INDEX as non-escape

---
 src/coreclr/jit/objectalloc.cpp | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index f32a3bf8b55aaa..35d5f8955c5730 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -348,25 +348,16 @@ void ObjectAllocator::ComputeStackObjectPointers(BitVecTraits* bitVecTraits)
                         // Check if we know what is assigned to this pointer.
                         unsigned bitCount = BitVecOps::Count(bitVecTraits, m_ConnGraphAdjacencyMatrix[lclNum]);
                         assert(bitCount <= 1);
-                        if (bitCount > 0)
+                        if (bitCount == 1)
                         {
                             BitVecOps::Iter iter(bitVecTraits, m_ConnGraphAdjacencyMatrix[lclNum]);
-                            unsigned        rhsLclNum               = 0;
-                            bool            definitelyStackPointing = true;
+                            unsigned        rhsLclNum = 0;
+                            iter.NextElem(&rhsLclNum);
 
-                            while (iter.NextElem(&rhsLclNum))
+                            if (DoesLclVarPointToStack(rhsLclNum))
                             {
-                                if (!DoesLclVarPointToStack(rhsLclNum))
-                                {
-                                    definitelyStackPointing = false;
-                                    break;
-                                }
-                            }
-
-                            if (definitelyStackPointing)
-                            {
-                                // All stores to lclNum local are the definitely-stack-pointing
-                                // rhsLclNum locals so lclNum local is also definitely-stack-pointing.
+                                // The only store to lclNum local is the definitely-stack-pointing
+                                // rhsLclNum local so lclNum local is also definitely-stack-pointing.
                                 MarkLclVarAsDefinitelyStackPointing(lclNum);
                             }
                         }
@@ -992,7 +983,18 @@ bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack<GenTree*>* parent
             case GT_SUB:
             case GT_BOX:
             case GT_FIELD_ADDR:
+                // Check whether the local escapes via its grandparent.
+                ++parentIndex;
+                keepChecking = true;
+                break;
+
             case GT_INDEX_ADDR:
+                if (tree == parent->AsIndexAddr()->Index())
+                {
+                    // The index is not taken so the local doesn't escape.
+                    canLclVarEscapeViaParentStack = false;
+                    break;
+                }
                 // Check whether the local escapes via its grandparent.
                 ++parentIndex;
                 keepChecking = true;

From 49c39b9a69c800adf1c2479a7aaf68423091ad7b Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Sat, 7 Dec 2024 18:18:33 +0900
Subject: [PATCH 56/75] Early exit when optimization is disabled

---
 src/coreclr/jit/objectalloc.cpp | 8 ++++++++
 src/coreclr/jit/objectalloc.h   | 6 +++---
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 35d5f8955c5730..a076d78518a20e 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -37,6 +37,14 @@ PhaseStatus ObjectAllocator::DoPhase()
         return PhaseStatus::MODIFIED_NOTHING;
     }
 
+    // If optimizations are disabled and there are no newobjs, we don't need to morph anything.
+    if (comp->opts.OptimizationDisabled() && (comp->optMethodFlags & OMF_HAS_NEWOBJ) == 0)
+    {
+        JITDUMP("optimizations are disabled and there are no newobjs; punting\n");
+        comp->fgInvalidateDfsTree();
+        return PhaseStatus::MODIFIED_NOTHING;
+    }
+
     bool        enabled       = IsObjectStackAllocationEnabled();
     const char* disableReason = ": global config";
 
diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h
index 86e1b697be4628..2567f756878c62 100644
--- a/src/coreclr/jit/objectalloc.h
+++ b/src/coreclr/jit/objectalloc.h
@@ -157,7 +157,7 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int         lclNu
     enableArrays            = (JitConfig.JitObjectStackAllocationArray() != 0);
 #endif
 
-    unsigned int classSize = 0;
+    unsigned classSize = 0;
 
     if (allocType == OAT_NEWARR)
     {
@@ -184,7 +184,7 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int         lclNu
         }
 
         unsigned elemSize = elemLayout != nullptr ? elemLayout->GetSize() : genTypeSize(type);
-        classSize         = (unsigned int)OFFSETOF__CORINFO_Array__data + elemSize * static_cast<unsigned int>(length);
+        classSize         = static_cast<unsigned>(OFFSETOF__CORINFO_Array__data) + elemSize * static_cast<unsigned>(length);
     }
     else if (allocType == OAT_NEWOBJ)
     {
@@ -243,7 +243,7 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int         lclNu
 
     if (blockSize != nullptr)
     {
-        *blockSize = (unsigned int)classSize;
+        *blockSize = classSize;
     }
 
     return true;

From a696f8751a115edcf042817862eb159bac70b251 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Sat, 7 Dec 2024 20:05:59 +0900
Subject: [PATCH 57/75] Format

---
 src/coreclr/jit/objectalloc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h
index 2567f756878c62..cbab18c4b4b9f6 100644
--- a/src/coreclr/jit/objectalloc.h
+++ b/src/coreclr/jit/objectalloc.h
@@ -184,7 +184,7 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int         lclNu
         }
 
         unsigned elemSize = elemLayout != nullptr ? elemLayout->GetSize() : genTypeSize(type);
-        classSize         = static_cast<unsigned>(OFFSETOF__CORINFO_Array__data) + elemSize * static_cast<unsigned>(length);
+        classSize = static_cast<unsigned>(OFFSETOF__CORINFO_Array__data) + elemSize * static_cast<unsigned>(length);
     }
     else if (allocType == OAT_NEWOBJ)
     {

From cc88979ba272979e7c9eb9dea195830793ea63cf Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Sat, 14 Dec 2024 17:46:53 +0900
Subject: [PATCH 58/75] Revert "Try fixing remorph issue"

This reverts commit 1914e806485b8b90ad031ca09607874e09e096e1.
---
 src/coreclr/jit/compiler.h  |  2 +-
 src/coreclr/jit/gentree.cpp | 29 ++++++-----------------------
 src/coreclr/jit/morph.cpp   |  9 +--------
 3 files changed, 8 insertions(+), 32 deletions(-)

diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index f18684f5296308..5e05ff24cf1b28 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -3745,7 +3745,7 @@ class Compiler
 
     //-------------------------------------------------------------------------
 
-    GenTree* gtFoldExpr(GenTree* tree, bool* folded = nullptr);
+    GenTree* gtFoldExpr(GenTree* tree);
     GenTree* gtFoldExprConst(GenTree* tree);
     GenTree* gtFoldIndirConst(GenTreeIndir* indir);
     GenTree* gtFoldExprSpecial(GenTree* tree);
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index c31d6ac1b2c552..d8d5752a5f6441 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -13572,13 +13572,10 @@ void Compiler::gtDispLIRNode(GenTree* node, const char* prefixMsg /* = nullptr *
  *  and call the methods to perform the folding
  */
 
-GenTree* Compiler::gtFoldExpr(GenTree* tree, bool* folded)
+GenTree* Compiler::gtFoldExpr(GenTree* tree)
 {
     unsigned kind = tree->OperKind();
 
-    if (folded != nullptr)
-        *folded = false;
-
     /* We must have a simple operation to fold */
 
     // If we're in CSE, it's not safe to perform tree
@@ -13598,19 +13595,13 @@ GenTree* Compiler::gtFoldExpr(GenTree* tree, bool* folded)
     {
         if (tree->OperIsConditional())
         {
-            GenTree* newTree = gtFoldExprConditional(tree);
-            if (folded != nullptr)
-                *folded = newTree != tree;
-            return newTree;
+            return gtFoldExprConditional(tree);
         }
 
 #if defined(FEATURE_HW_INTRINSICS)
         if (tree->OperIsHWIntrinsic())
         {
-            GenTree* newTree = gtFoldExprHWIntrinsic(tree->AsHWIntrinsic());
-            if (folded != nullptr)
-                *folded = newTree != tree;
-            return newTree;
+            return gtFoldExprHWIntrinsic(tree->AsHWIntrinsic());
         }
 #endif // FEATURE_HW_INTRINSICS
 
@@ -13638,7 +13629,6 @@ GenTree* Compiler::gtFoldExpr(GenTree* tree, bool* folded)
     {
         if (op1->OperIsConst())
         {
-            // constants folding results in a new tree that may be folded again, don't mark it as folded
             return gtFoldExprConst(tree);
         }
     }
@@ -13650,8 +13640,7 @@ GenTree* Compiler::gtFoldExpr(GenTree* tree, bool* folded)
         // one of their arguments is an address.
         if (op1->OperIsConst() && op2->OperIsConst() && !tree->OperIsAtomicOp())
         {
-            // both nodes are constants - fold the expression
-            // constants folding results in a new tree that may be folded again, don't mark it as folded
+            /* both nodes are constants - fold the expression */
             return gtFoldExprConst(tree);
         }
         else if (op1->OperIsConst() || op2->OperIsConst())
@@ -13666,19 +13655,13 @@ GenTree* Compiler::gtFoldExpr(GenTree* tree, bool* folded)
                 return tree;
             }
 
-            GenTree* newTree = gtFoldExprSpecial(tree);
-            if (folded != nullptr)
-                *folded = newTree != tree;
-            return newTree;
+            return gtFoldExprSpecial(tree);
         }
         else if (tree->OperIsCompare())
         {
             /* comparisons of two local variables can sometimes be folded */
 
-            GenTree* newTree = gtFoldExprCompare(tree);
-            if (folded != nullptr)
-                *folded = newTree != tree;
-            return newTree;
+            return gtFoldExprCompare(tree);
         }
     }
 
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index eceb31f590a52c..de3deec965c5c3 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -8298,17 +8298,10 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
     }
 
     // Try to fold it, maybe we get lucky,
-    bool folded;
-    tree = gtFoldExpr(tree, &folded);
+    tree = gtFoldExpr(tree);
 
     if (oldTree != tree)
     {
-        if (folded)
-        {
-            INDEBUG(tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
-            return tree;
-        }
-
         /* if gtFoldExpr returned op1 or op2 then we are done */
         if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2))
         {

From 52b32f27d4d675969c6b32c2cabe105b12be7eca Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Fri, 20 Dec 2024 18:34:35 +0900
Subject: [PATCH 59/75] Introduce a flag for stack allocated arrays

---
 src/coreclr/jit/compiler.h      | 5 +++--
 src/coreclr/jit/lclmorph.cpp    | 6 +++---
 src/coreclr/jit/objectalloc.cpp | 2 ++
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index d99813fe5290fa..33a477c00a9f79 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -690,8 +690,9 @@ class LclVarDsc
     unsigned char lvSingleDefDisqualifyReason = 'H';
 #endif
 
-    unsigned char lvAllDefsAreNoGc    : 1; // For pinned locals: true if all defs of this local are no-gc
-    unsigned char lvStackAllocatedBox : 1; // Local is a stack allocated box
+    unsigned char lvAllDefsAreNoGc      : 1; // For pinned locals: true if all defs of this local are no-gc
+    unsigned char lvStackAllocatedBox   : 1; // Local is a stack allocated box
+    unsigned char lvStackAllocatedArray : 1; // Local is a stack allocated array
 
 #if FEATURE_MULTIREG_ARGS
     regNumber lvRegNumForSlot(unsigned slotNum)
diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp
index 0440fa119858bb..379ee02bf13845 100644
--- a/src/coreclr/jit/lclmorph.cpp
+++ b/src/coreclr/jit/lclmorph.cpp
@@ -1359,8 +1359,8 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
                 assert(TopValue(1).Node() == node->gtGetOp1());
                 assert(TopValue(0).Node() == node->gtGetOp2());
 
-                // We only expect TopValue(1).IsAddress() to be true for stack allocated arrays
-                if (node->gtGetOp2()->IsCnsIntOrI() && TopValue(1).IsAddress())
+                if (node->gtGetOp2()->IsCnsIntOrI() && TopValue(1).IsAddress() &&
+                    m_compiler->lvaGetDesc(TopValue(1).LclNum())->lvStackAllocatedArray == 1)
                 {
                     ssize_t offset = node->AsIndexAddr()->gtElemOffset +
                                      node->gtGetOp2()->AsIntCon()->IconValue() * node->AsIndexAddr()->gtElemSize;
@@ -1406,7 +1406,7 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
                 assert(TopValue(1).Node() == node);
                 assert(TopValue(0).Node() == node->gtGetOp1());
 
-                if (TopValue(0).IsAddress())
+                if (TopValue(0).IsAddress() && m_compiler->lvaGetDesc(TopValue(0).LclNum())->lvStackAllocatedArray == 1)
                 {
                     GenTree* gtLclFld =
                         m_compiler->gtNewLclFldNode(TopValue(0).LclNum(), TYP_INT,
diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 93bed8916cdabe..672e044a1c6d25 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -784,6 +784,8 @@ unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall*
     Statement* lenStmt = comp->gtNewStmt(len);
     comp->fgInsertStmtBefore(block, stmt, lenStmt);
 
+    comp->lvaGetDesc(lclNum)->lvStackAllocatedArray = 1;
+
     return lclNum;
 }
 

From 1f342bdc328f5aca9ef0f84f8af627df6532a4bc Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Sat, 21 Dec 2024 20:11:32 +0900
Subject: [PATCH 60/75] Handle in global morph

---
 src/coreclr/jit/morph.cpp | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 7f6cd2cb3af97e..75ba724c1cf5ad 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -3548,6 +3548,9 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr)
 
     noway_assert(!varTypeIsStruct(elemTyp) || (elemStructType != NO_CLASS_HANDLE));
 
+    indexAddr->Arr()   = fgMorphTree(indexAddr->Arr());
+    indexAddr->Index() = fgMorphTree(indexAddr->Index());
+
     // In minopts, we will not be expanding GT_INDEX_ADDR in order to minimize the size of the IR. As minopts
     // compilation time is roughly proportional to the size of the IR, this helps keep compilation times down.
     // Furthermore, this representation typically saves on code size in minopts w.r.t. the complete expansion
@@ -3569,8 +3572,6 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr)
     // for more straightforward bounds-check removal, CSE, etc.
     if (opts.MinOpts())
     {
-        indexAddr->Arr()   = fgMorphTree(indexAddr->Arr());
-        indexAddr->Index() = fgMorphTree(indexAddr->Index());
         indexAddr->AddAllEffectsFlags(indexAddr->Arr(), indexAddr->Index());
 
         // Mark the indirection node as needing a range check if necessary.
@@ -3602,6 +3603,29 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr)
     GenTree*          indexDefn   = nullptr; // non-NULL if we need to allocate a temp for the index expression
     GenTreeBoundsChk* boundsCheck = nullptr;
 
+    if (index->IsCnsIntOrI() && arrRef->IsAnyLocal() &&
+        lvaGetDesc(arrRef->AsLclVarCommon()->GetLclNum())->lvStackAllocatedArray == 1)
+    {
+        ssize_t offset = static_cast<ssize_t>(elemOffs) + index->AsIntCon()->IconValue() * elemSize;
+
+        // For stack allocated arrays the local size is the size of the entire array
+        if (!indexAddr->IsBoundsChecked() ||
+            (static_cast<ssize_t>(elemOffs) <= offset &&
+             offset < static_cast<ssize_t>(lvaLclExactSize(arrRef->AsLclVarCommon()->GetLclNum()))))
+        {
+            if (FitsIn<unsigned>(offset))
+            {
+                return fgMorphTree(gtNewOperNode(GT_ADD, indexAddr->TypeGet(), arrRef, gtNewIconNode(offset)));
+            }
+        }
+        else
+        {
+            return fgMorphTree(gtNewOperNode(GT_COMMA, indexAddr->TypeGet(),
+                                             gtNewHelperCallNode(CORINFO_HELP_RNGCHKFAIL, TYP_VOID),
+                                             gtNewIconNode(0, TYP_BYREF)));
+        }
+    }
+
     // If we're doing range checking, introduce a GT_BOUNDS_CHECK node for the address.
     if (indexAddr->IsBoundsChecked())
     {
@@ -3633,7 +3657,7 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr)
         }
 
         if (((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) != 0) ||
-            gtComplexityExceeds(index, MAX_ARR_COMPLEXITY) || index->OperIs(GT_LCL_FLD) ||
+            gtComplexityExceeds(index, MAX_INDEX_COMPLEXITY) || index->OperIs(GT_LCL_FLD) ||
             (index->OperIs(GT_LCL_VAR) && lvaIsLocalImplicitlyAccessedByRef(index->AsLclVar()->GetLclNum())))
         {
             unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("index expr"));
@@ -7755,6 +7779,12 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
                     return iconNode;
                 }
             }
+            if (op1->IsAnyLocal() && lvaGetDesc(op1->AsLclVarCommon())->lvStackAllocatedArray == 1)
+            {
+                return fgMorphTree(
+                    gtNewLclFldNode(op1->AsLclVarCommon()->GetLclNum(), tree->TypeGet(),
+                                    op1->AsLclVarCommon()->GetLclOffs() + OFFSETOF__CORINFO_Array__length));
+            }
             break;
 
         case GT_IND:

From 5b51919c2057fddf8543a5dce81505e693f0c64a Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Sat, 21 Dec 2024 20:11:53 +0900
Subject: [PATCH 61/75] Some special cases

---
 src/coreclr/jit/assertionprop.cpp | 24 +++++++++++++++++++-----
 src/coreclr/jit/lclvars.cpp       |  5 +++++
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp
index bad18a06c30584..4900ed872cbc99 100644
--- a/src/coreclr/jit/assertionprop.cpp
+++ b/src/coreclr/jit/assertionprop.cpp
@@ -170,16 +170,22 @@ bool IntegralRange::Contains(int64_t value) const
         {
             GenTree* const addr = node->AsIndir()->Addr();
 
-            if (node->TypeIs(TYP_INT) && addr->OperIs(GT_ADD) && addr->gtGetOp1()->OperIs(GT_LCL_VAR) &&
-                addr->gtGetOp2()->IsIntegralConst(OFFSETOF__CORINFO_Span__length))
+            if (node->TypeIs(TYP_INT) && addr->OperIs(GT_ADD) && addr->gtGetOp1()->OperIs(GT_LCL_VAR))
             {
                 GenTreeLclVar* const lclVar = addr->gtGetOp1()->AsLclVar();
 
-                if (compiler->lvaGetDesc(lclVar->GetLclNum())->IsSpan())
+                if (addr->gtGetOp2()->IsIntegralConst(OFFSETOF__CORINFO_Span__length) &&
+                    compiler->lvaGetDesc(lclVar->GetLclNum())->IsSpan())
                 {
                     assert(compiler->lvaIsImplicitByRefLocal(lclVar->GetLclNum()));
                     return {SymbolicIntegerValue::Zero, UpperBoundForType(rangeType)};
                 }
+
+                if (addr->gtGetOp2()->IsIntegralConst(OFFSETOF__CORINFO_Array__length) &&
+                    compiler->lvaGetDesc(lclVar->GetLclNum())->lvStackAllocatedArray == 1)
+                {
+                    return {SymbolicIntegerValue::Zero, SymbolicIntegerValue::ArrayLenMax};
+                }
             }
             break;
         }
@@ -189,9 +195,17 @@ bool IntegralRange::Contains(int64_t value) const
             GenTreeLclFld* const lclFld = node->AsLclFld();
             LclVarDsc* const     varDsc = compiler->lvaGetDesc(lclFld);
 
-            if (node->TypeIs(TYP_INT) && varDsc->IsSpan() && lclFld->GetLclOffs() == OFFSETOF__CORINFO_Span__length)
+            if (node->TypeIs(TYP_INT))
             {
-                return {SymbolicIntegerValue::Zero, UpperBoundForType(rangeType)};
+                if (varDsc->IsSpan() && lclFld->GetLclOffs() == OFFSETOF__CORINFO_Span__length)
+                {
+                    return {SymbolicIntegerValue::Zero, UpperBoundForType(rangeType)};
+                }
+
+                if (varDsc->lvStackAllocatedArray == 1 && lclFld->GetLclOffs() == OFFSETOF__CORINFO_Array__length)
+                {
+                    return {SymbolicIntegerValue::Zero, SymbolicIntegerValue::ArrayLenMax};
+                }
             }
 
             break;
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index 2e212085da8a92..9dc91d0865e7ca 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -2820,6 +2820,11 @@ void Compiler::StructPromotionHelper::PromoteStructVar(unsigned lclNum)
             fieldVarDsc->SetIsNeverNegative(true);
         }
 
+        if (varDsc->lvStackAllocatedArray == 1 && fieldVarDsc->lvFldOffset == OFFSETOF__CORINFO_Array__length)
+        {
+            fieldVarDsc->SetIsNeverNegative(true);
+        }
+
         // This new local may be the first time we've seen a long typed local.
         if (fieldVarDsc->lvType == TYP_LONG)
         {

From ce41a8de06860a7a06635a7098c3133fa6f0c0bc Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Sat, 21 Dec 2024 22:45:00 +0900
Subject: [PATCH 62/75] Avoid extensive remorph

---
 src/coreclr/jit/morph.cpp | 41 +++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 75ba724c1cf5ad..e7e76fabc42c8e 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -3548,9 +3548,6 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr)
 
     noway_assert(!varTypeIsStruct(elemTyp) || (elemStructType != NO_CLASS_HANDLE));
 
-    indexAddr->Arr()   = fgMorphTree(indexAddr->Arr());
-    indexAddr->Index() = fgMorphTree(indexAddr->Index());
-
     // In minopts, we will not be expanding GT_INDEX_ADDR in order to minimize the size of the IR. As minopts
     // compilation time is roughly proportional to the size of the IR, this helps keep compilation times down.
     // Furthermore, this representation typically saves on code size in minopts w.r.t. the complete expansion
@@ -3572,6 +3569,9 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr)
     // for more straightforward bounds-check removal, CSE, etc.
     if (opts.MinOpts())
     {
+        indexAddr->Arr()   = fgMorphTree(indexAddr->Arr());
+        indexAddr->Index() = fgMorphTree(indexAddr->Index());
+
         indexAddr->AddAllEffectsFlags(indexAddr->Arr(), indexAddr->Index());
 
         // Mark the indirection node as needing a range check if necessary.
@@ -3603,27 +3603,30 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr)
     GenTree*          indexDefn   = nullptr; // non-NULL if we need to allocate a temp for the index expression
     GenTreeBoundsChk* boundsCheck = nullptr;
 
-    if (index->IsCnsIntOrI() && arrRef->IsAnyLocal() &&
-        lvaGetDesc(arrRef->AsLclVarCommon()->GetLclNum())->lvStackAllocatedArray == 1)
+    if (arrRef->IsAnyLocal() && lvaGetDesc(arrRef->AsLclVarCommon()->GetLclNum())->lvStackAllocatedArray == 1)
     {
-        ssize_t offset = static_cast<ssize_t>(elemOffs) + index->AsIntCon()->IconValue() * elemSize;
-
-        // For stack allocated arrays the local size is the size of the entire array
-        if (!indexAddr->IsBoundsChecked() ||
-            (static_cast<ssize_t>(elemOffs) <= offset &&
-             offset < static_cast<ssize_t>(lvaLclExactSize(arrRef->AsLclVarCommon()->GetLclNum()))))
+        indexAddr->Index() = fgMorphTree(indexAddr->Index());
+        if (index->IsCnsIntOrI())
         {
-            if (FitsIn<unsigned>(offset))
+            ssize_t offset = static_cast<ssize_t>(elemOffs) + index->AsIntCon()->IconValue() * elemSize;
+
+            // For stack allocated arrays the local size is the size of the entire array
+            if (!indexAddr->IsBoundsChecked() ||
+                (static_cast<ssize_t>(elemOffs) <= offset &&
+                 offset < static_cast<ssize_t>(lvaLclExactSize(arrRef->AsLclVarCommon()->GetLclNum()))))
+            {
+                if (FitsIn<unsigned>(offset))
+                {
+                    return fgMorphTree(gtNewOperNode(GT_ADD, indexAddr->TypeGet(), arrRef, gtNewIconNode(offset)));
+                }
+            }
+            else
             {
-                return fgMorphTree(gtNewOperNode(GT_ADD, indexAddr->TypeGet(), arrRef, gtNewIconNode(offset)));
+                return fgMorphTree(gtNewOperNode(GT_COMMA, indexAddr->TypeGet(),
+                                                 gtNewHelperCallNode(CORINFO_HELP_RNGCHKFAIL, TYP_VOID),
+                                                 gtNewIconNode(0, TYP_BYREF)));
             }
         }
-        else
-        {
-            return fgMorphTree(gtNewOperNode(GT_COMMA, indexAddr->TypeGet(),
-                                             gtNewHelperCallNode(CORINFO_HELP_RNGCHKFAIL, TYP_VOID),
-                                             gtNewIconNode(0, TYP_BYREF)));
-        }
     }
 
     // If we're doing range checking, introduce a GT_BOUNDS_CHECK node for the address.

From 47114c676912186e6aaef82183c92775e0149928 Mon Sep 17 00:00:00 2001
From: Steven He <hez2010@outlook.com>
Date: Sat, 21 Dec 2024 22:47:16 +0900
Subject: [PATCH 63/75] Oops

---
 src/coreclr/jit/morph.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index e7e76fabc42c8e..8a1c27f8e34e07 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -3605,7 +3605,7 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr)
 
     if (arrRef->IsAnyLocal() && lvaGetDesc(arrRef->AsLclVarCommon()->GetLclNum())->lvStackAllocatedArray == 1)
     {
-        indexAddr->Index() = fgMorphTree(indexAddr->Index());
+        index = fgMorphTree(index);
         if (index->IsCnsIntOrI())
         {
             ssize_t offset = static_cast<ssize_t>(elemOffs) + index->AsIntCon()->IconValue() * elemSize;

From de30bdc5d3a7042ef8c310cb80ed6e9c04100c81 Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Thu, 9 Jan 2025 15:50:17 -0800
Subject: [PATCH 64/75] Alternative take on array stack allocation

Leave the newarr helper call in place, and don't rewrite the uses to be
uses of the local. Remove special handling in local morph and morph.

Lower the helper to the proper stores later on.

Update a few utilities to understand array base addresses may not be TYP_REF.
---
 src/coreclr/jit/assertionprop.cpp   |  24 +----
 src/coreclr/jit/compiler.cpp        |   3 +
 src/coreclr/jit/compiler.h          |  19 +++-
 src/coreclr/jit/compphases.h        |   1 +
 src/coreclr/jit/gentree.cpp         |   7 +-
 src/coreclr/jit/gentree.h           |   4 +-
 src/coreclr/jit/helperexpansion.cpp | 150 ++++++++++++++++++++++++++++
 src/coreclr/jit/lclmorph.cpp        |  71 -------------
 src/coreclr/jit/lclvars.cpp         |   5 -
 src/coreclr/jit/morph.cpp           |  35 +------
 src/coreclr/jit/objectalloc.cpp     |  52 +++++-----
 11 files changed, 207 insertions(+), 164 deletions(-)

diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp
index 4900ed872cbc99..bad18a06c30584 100644
--- a/src/coreclr/jit/assertionprop.cpp
+++ b/src/coreclr/jit/assertionprop.cpp
@@ -170,22 +170,16 @@ bool IntegralRange::Contains(int64_t value) const
         {
             GenTree* const addr = node->AsIndir()->Addr();
 
-            if (node->TypeIs(TYP_INT) && addr->OperIs(GT_ADD) && addr->gtGetOp1()->OperIs(GT_LCL_VAR))
+            if (node->TypeIs(TYP_INT) && addr->OperIs(GT_ADD) && addr->gtGetOp1()->OperIs(GT_LCL_VAR) &&
+                addr->gtGetOp2()->IsIntegralConst(OFFSETOF__CORINFO_Span__length))
             {
                 GenTreeLclVar* const lclVar = addr->gtGetOp1()->AsLclVar();
 
-                if (addr->gtGetOp2()->IsIntegralConst(OFFSETOF__CORINFO_Span__length) &&
-                    compiler->lvaGetDesc(lclVar->GetLclNum())->IsSpan())
+                if (compiler->lvaGetDesc(lclVar->GetLclNum())->IsSpan())
                 {
                     assert(compiler->lvaIsImplicitByRefLocal(lclVar->GetLclNum()));
                     return {SymbolicIntegerValue::Zero, UpperBoundForType(rangeType)};
                 }
-
-                if (addr->gtGetOp2()->IsIntegralConst(OFFSETOF__CORINFO_Array__length) &&
-                    compiler->lvaGetDesc(lclVar->GetLclNum())->lvStackAllocatedArray == 1)
-                {
-                    return {SymbolicIntegerValue::Zero, SymbolicIntegerValue::ArrayLenMax};
-                }
             }
             break;
         }
@@ -195,17 +189,9 @@ bool IntegralRange::Contains(int64_t value) const
             GenTreeLclFld* const lclFld = node->AsLclFld();
             LclVarDsc* const     varDsc = compiler->lvaGetDesc(lclFld);
 
-            if (node->TypeIs(TYP_INT))
+            if (node->TypeIs(TYP_INT) && varDsc->IsSpan() && lclFld->GetLclOffs() == OFFSETOF__CORINFO_Span__length)
             {
-                if (varDsc->IsSpan() && lclFld->GetLclOffs() == OFFSETOF__CORINFO_Span__length)
-                {
-                    return {SymbolicIntegerValue::Zero, UpperBoundForType(rangeType)};
-                }
-
-                if (varDsc->lvStackAllocatedArray == 1 && lclFld->GetLclOffs() == OFFSETOF__CORINFO_Array__length)
-                {
-                    return {SymbolicIntegerValue::Zero, SymbolicIntegerValue::ArrayLenMax};
-                }
+                return {SymbolicIntegerValue::Zero, UpperBoundForType(rangeType)};
             }
 
             break;
diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index 52975e90bf8854..8e1f5a825387b2 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -5128,6 +5128,9 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
     // Expand thread local access
     DoPhase(this, PHASE_EXPAND_TLS, &Compiler::fgExpandThreadLocalAccess);
 
+    // Expand stack allocated arrays
+    DoPhase(this, PHASE_EXPAND_STACK_ARR, &Compiler::fgExpandStackArrayAllocations);
+
     // Insert GC Polls
     DoPhase(this, PHASE_INSERT_GC_POLLS, &Compiler::fgInsertGCPolls);
 
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 01f5b38fd1bc14..93800ba08fc356 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -691,9 +691,8 @@ class LclVarDsc
     unsigned char lvSingleDefDisqualifyReason = 'H';
 #endif
 
-    unsigned char lvAllDefsAreNoGc      : 1; // For pinned locals: true if all defs of this local are no-gc
-    unsigned char lvStackAllocatedBox   : 1; // Local is a stack allocated box
-    unsigned char lvStackAllocatedArray : 1; // Local is a stack allocated array
+    unsigned char lvAllDefsAreNoGc    : 1; // For pinned locals: true if all defs of this local are no-gc
+    unsigned char lvStackAllocatedBox : 1; // Local is a stack allocated box
 
 #if FEATURE_MULTIREG_ARGS
     regNumber lvRegNumForSlot(unsigned slotNum)
@@ -6067,6 +6066,9 @@ class Compiler
     PhaseStatus fgExpandStaticInit();
     bool fgExpandStaticInitForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call);
 
+    PhaseStatus fgExpandStackArrayAllocations();
+    bool fgExpandStackArrayAllocation(BasicBlock* pBlock, Statement* stmt, GenTreeCall* call);
+
     PhaseStatus fgVNBasedIntrinsicExpansion();
     bool fgVNBasedIntrinsicExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call);
     bool fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call);
@@ -7499,6 +7501,7 @@ class Compiler
 #define OMF_HAS_SPECIAL_INTRINSICS             0x00020000 // Method contains special intrinsics expanded in late phases
 #define OMF_HAS_RECURSIVE_TAILCALL             0x00040000 // Method contains recursive tail call
 #define OMF_HAS_EXPANDABLE_CAST                0x00080000 // Method contains casts eligible for late expansion
+#define OMF_HAS_STACK_ARRAY                    0x00100000 // Method contains stack allocated arrays
 
     // clang-format on
 
@@ -7589,6 +7592,16 @@ class Compiler
         optMethodFlags |= OMF_HAS_RECURSIVE_TAILCALL;
     }
 
+    bool doesMethodHaveStackAllocatedArray()
+    {
+        return (optMethodFlags & OMF_HAS_STACK_ARRAY) != 0;
+    }
+
+    void setMethodHasStackAllocatedArray()
+    {
+        optMethodFlags |= OMF_HAS_STACK_ARRAY;
+    }
+
     void pickGDV(GenTreeCall*           call,
                  IL_OFFSET              ilOffset,
                  bool                   isInterface,
diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h
index 25811944345743..94eee3ba9bf5cd 100644
--- a/src/coreclr/jit/compphases.h
+++ b/src/coreclr/jit/compphases.h
@@ -109,6 +109,7 @@ CompPhaseNameMacro(PHASE_EXPAND_RTLOOKUPS,           "Expand runtime lookups",
 CompPhaseNameMacro(PHASE_EXPAND_STATIC_INIT,         "Expand static init",             false, -1, true)
 CompPhaseNameMacro(PHASE_EXPAND_CASTS,               "Expand casts",                   false, -1, true)
 CompPhaseNameMacro(PHASE_EXPAND_TLS,                 "Expand TLS access",              false, -1, true)
+CompPhaseNameMacro(PHASE_EXPAND_STACK_ARR,           "Expand stack array allocation",  false, -1, true)
 CompPhaseNameMacro(PHASE_INSERT_GC_POLLS,            "Insert GC Polls",                false, -1, true)
 CompPhaseNameMacro(PHASE_CREATE_THROW_HELPERS,       "Create throw helper blocks",     false, -1, true)
 CompPhaseNameMacro(PHASE_DETERMINE_FIRST_COLD_BLOCK, "Determine first cold block",     false, -1, true)
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 166a5ff7bc585d..817fe5968dfab7 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -19800,7 +19800,10 @@ void GenTreeArrAddr::ParseArrayAddress(Compiler* comp, GenTree** pArr, ValueNum*
 /* static */ void GenTreeArrAddr::ParseArrayAddressWork(
     GenTree* tree, Compiler* comp, target_ssize_t inputMul, GenTree** pArr, ValueNum* pInxVN, target_ssize_t* pOffset)
 {
-    if (tree->TypeIs(TYP_REF))
+    ValueNum  vn = comp->GetValueNumStore()->VNLiberalNormalValue(tree->gtVNPair);
+    VNFuncApp vnf;
+
+    if (tree->TypeIs(TYP_REF) || comp->GetValueNumStore()->IsVNNewArr(vn, &vnf))
     {
         // This must be the array pointer.
         assert(*pArr == nullptr);
@@ -19903,7 +19906,7 @@ void GenTreeArrAddr::ParseArrayAddress(Compiler* comp, GenTree** pArr, ValueNum*
         // If we didn't return above, must be a contribution to the non-constant part of the index VN.
         // We don't get here for GT_CNS_INT, GT_ADD, or GT_SUB, or for GT_MUL by constant, or GT_LSH of
         // constant shift. Thus, the generated index VN does not include the parsed constant offset.
-        ValueNum vn = comp->GetValueNumStore()->VNLiberalNormalValue(tree->gtVNPair);
+        //
         if (inputMul != 1)
         {
             ValueNum mulVN = comp->GetValueNumStore()->VNForLongCon(inputMul);
diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h
index 6f44ac57595751..17844a5d683819 100644
--- a/src/coreclr/jit/gentree.h
+++ b/src/coreclr/jit/gentree.h
@@ -3586,7 +3586,7 @@ class SsaNumInfo final
     }
 };
 
-// Common supertype of [STORE_]LCL_VAR, [STORE_]LCL_FLD, PHI_ARG, LCL_VAR_ADDR, LCL_FLD_ADDR.
+// Common supertype of [STORE_]LCL_VAR, [STORE_]LCL_FLD, PHI_ARG, LCL_ADDR, FIELD_ADDR.
 // This inherits from UnOp because lclvar stores are unary.
 //
 struct GenTreeLclVarCommon : public GenTreeUnOp
@@ -4227,6 +4227,7 @@ enum GenTreeCallFlags : unsigned int
     GTF_CALL_M_CAST_CAN_BE_EXPANDED    = 0x04000000, // this cast (helper call) can be expanded if it's profitable. To be removed.
     GTF_CALL_M_CAST_OBJ_NONNULL        = 0x08000000, // if we expand this specific cast we don't need to check the input object for null
                                                      // NOTE: if needed, this flag can be removed, and we can introduce new _NONNUL cast helpers
+    GTF_CALL_M_STACK_ARRAY             = 0x10000000, // this call is a new array helper for a stack allocated array.
 };
 
 inline constexpr GenTreeCallFlags operator ~(GenTreeCallFlags a)
@@ -5742,6 +5743,7 @@ struct GenTreeCall final : public GenTree
         void*                gtStubCallStubAddr;   // GTF_CALL_VIRT_STUB - these are never inlined
         CORINFO_CLASS_HANDLE gtInitClsHnd;         // Used by static init helpers, represents a class they init
         IL_OFFSET            gtCastHelperILOffset; // Used by cast helpers to save corresponding IL offset
+        unsigned             gtNewArrStackLcl; // GTF_CALL_M_STACK_ARRAY - local used for stack allocated array storage
     };
 
     union
diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp
index 56fd853acb0d60..b7ceb82315b756 100644
--- a/src/coreclr/jit/helperexpansion.cpp
+++ b/src/coreclr/jit/helperexpansion.cpp
@@ -2712,3 +2712,153 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt,
 
     return true;
 }
+
+//------------------------------------------------------------------------------
+// fgExpandStackArrayAllocations : expand "new helpers" for stack arrays
+//
+// Returns:
+//    PhaseStatus indicating what, if anything, was changed.
+//
+PhaseStatus Compiler::fgExpandStackArrayAllocations()
+{
+    PhaseStatus result = PhaseStatus::MODIFIED_NOTHING;
+
+    if (!doesMethodHaveStackAllocatedArray())
+    {
+        // The method being compiled doesn't have any stack allocated arrays.
+        return result;
+    }
+
+    // Find allocation sites, and transform them into initializations of the
+    // array method table and length, and replace the allocation call with
+    // the address of the local array.
+    //
+    bool modified = false;
+
+    for (BasicBlock* const block : Blocks())
+    {
+        for (Statement* const stmt : block->Statements())
+        {
+            if ((stmt->GetRootNode()->gtFlags & GTF_CALL) == 0)
+            {
+                continue;
+            }
+
+            for (GenTree* const tree : stmt->TreeList())
+            {
+                if (!tree->IsCall())
+                {
+                    continue;
+                }
+
+                if (fgExpandStackArrayAllocation(block, stmt, tree->AsCall()))
+                {
+                    // If we expand, we split the statement's tree
+                    // so will be done with this statment.
+                    //
+                    modified = true;
+                    break;
+                }
+            }
+        }
+    }
+
+    // we cant assert(modified) here as array allocation sites may
+    // have been unreachable or dead-coded.
+    //
+    return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
+}
+
+//------------------------------------------------------------------------------
+// fgExpandStackArrayAllocation: expand new array helpers for stack allocated arrays
+//
+// Arguments:
+//    block -  block containing the helper call to expand
+//    stmt   - Statement containing the helper call
+//    call   - The helper call
+//
+// Returns:
+//    true if a runtime lookup was found and expanded.
+//
+bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, GenTreeCall* call)
+{
+    if (!call->IsHelperCall())
+    {
+        return false;
+    }
+
+    const CorInfoHelpFunc helper         = eeGetHelperNum(call->gtCallMethHnd);
+    int                   lengthArgIndex = -1;
+
+    switch (helper)
+    {
+        case CORINFO_HELP_NEWARR_1_DIRECT:
+        case CORINFO_HELP_NEWARR_1_VC:
+        case CORINFO_HELP_NEWARR_1_OBJ:
+        case CORINFO_HELP_NEWARR_1_ALIGN8:
+            lengthArgIndex = 1;
+            break;
+
+        case CORINFO_HELP_READYTORUN_NEWARR_1:
+            lengthArgIndex = 0;
+            break;
+
+        default:
+            return false;
+    }
+
+    if ((call->gtCallMoreFlags & GTF_CALL_M_STACK_ARRAY) == 0)
+    {
+        return false;
+    }
+
+    const unsigned lclNum = call->gtNewArrStackLcl;
+
+    JITDUMP("Expanding new array helper for stack allocated array [%06d] in " FMT_BB ":\n", dspTreeID(call),
+            block->bbNum);
+    DISPTREE(call);
+    JITDUMP("\n");
+
+    Statement* newStmt = nullptr;
+    GenTree**  callUse = nullptr;
+    bool       split   = gtSplitTree(block, stmt, call, &newStmt, &callUse);
+
+    if (!split)
+    {
+        newStmt = stmt;
+    }
+
+    // Initialize the array method table pointer.
+    //
+    CORINFO_CLASS_HANDLE arrayHnd = (CORINFO_CLASS_HANDLE)call->compileTimeHelperArgumentHandle;
+    GenTree* const       mt       = gtNewIconEmbClsHndNode(arrayHnd);
+    GenTree* const       mtStore  = gtNewStoreLclFldNode(lclNum, TYP_I_IMPL, 0, mt);
+    Statement* const     mtStmt   = gtNewStmt(mtStore);
+
+    fgInsertStmtBefore(block, newStmt, mtStmt);
+    gtSetStmtInfo(mtStmt);
+    fgSetStmtSeq(mtStmt);
+
+    // Initialize the array length.
+    //
+    GenTree* const   lengthArg    = call->gtArgs.GetArgByIndex(lengthArgIndex)->GetNode();
+    GenTree* const   lengthArgInt = fgOptimizeCast(gtNewCastNode(TYP_INT, lengthArg, false, TYP_INT));
+    GenTree* const   lengthStore = gtNewStoreLclFldNode(lclNum, TYP_INT, OFFSETOF__CORINFO_Array__length, lengthArgInt);
+    Statement* const lenStmt     = gtNewStmt(lengthStore);
+
+    fgInsertStmtBefore(block, newStmt, lenStmt);
+    gtSetStmtInfo(lenStmt);
+    fgSetStmtSeq(lenStmt);
+
+    // Replace call with &lclNum.
+    //
+    GenTreeLclVarCommon* const lclNode = gtNewLclAddrNode(lclNum, 0);
+    *callUse                           = lclNode;
+    DEBUG_DESTROY_NODE(call);
+
+    gtUpdateStmtSideEffects(stmt);
+    gtSetStmtInfo(stmt);
+    fgSetStmtSeq(stmt);
+
+    return true;
+}
diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp
index 379ee02bf13845..4d1b2697215324 100644
--- a/src/coreclr/jit/lclmorph.cpp
+++ b/src/coreclr/jit/lclmorph.cpp
@@ -1353,77 +1353,6 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
                 break;
             }
 
-            case GT_INDEX_ADDR:
-            {
-                assert(TopValue(2).Node() == node);
-                assert(TopValue(1).Node() == node->gtGetOp1());
-                assert(TopValue(0).Node() == node->gtGetOp2());
-
-                if (node->gtGetOp2()->IsCnsIntOrI() && TopValue(1).IsAddress() &&
-                    m_compiler->lvaGetDesc(TopValue(1).LclNum())->lvStackAllocatedArray == 1)
-                {
-                    ssize_t offset = node->AsIndexAddr()->gtElemOffset +
-                                     node->gtGetOp2()->AsIntCon()->IconValue() * node->AsIndexAddr()->gtElemSize;
-
-                    // For stack allocated arrays the local size is the size of the entire array
-                    if (!node->AsIndexAddr()->IsBoundsChecked() ||
-                        (static_cast<ssize_t>(node->AsIndexAddr()->gtElemOffset) <= offset &&
-                         offset < static_cast<ssize_t>(m_compiler->lvaLclExactSize(TopValue(1).LclNum()))))
-                    {
-                        if (FitsIn<unsigned>(offset) &&
-                            TopValue(2).AddOffset(TopValue(1), static_cast<unsigned>(offset)))
-                        {
-                            INDEBUG(TopValue(0).Consume());
-                            PopValue();
-                            PopValue();
-                            break;
-                        }
-                    }
-                    else
-                    {
-                        *use           = m_compiler->gtNewOperNode(GT_COMMA, node->TypeGet(),
-                                                                   m_compiler->gtNewHelperCallNode(CORINFO_HELP_RNGCHKFAIL,
-                                                                                                   TYP_VOID),
-                                                                   m_compiler->gtNewIconNode(0, TYP_BYREF));
-                        m_stmtModified = true;
-                        INDEBUG(TopValue(0).Consume());
-                        PopValue();
-                        INDEBUG(TopValue(0).Consume());
-                        PopValue();
-                        break;
-                    }
-                }
-
-                EscapeValue(TopValue(0), node);
-                PopValue();
-                EscapeValue(TopValue(0), node);
-                PopValue();
-                break;
-            }
-
-            case GT_ARR_LENGTH:
-            {
-                assert(TopValue(1).Node() == node);
-                assert(TopValue(0).Node() == node->gtGetOp1());
-
-                if (TopValue(0).IsAddress() && m_compiler->lvaGetDesc(TopValue(0).LclNum())->lvStackAllocatedArray == 1)
-                {
-                    GenTree* gtLclFld =
-                        m_compiler->gtNewLclFldNode(TopValue(0).LclNum(), TYP_INT,
-                                                    TopValue(0).Offset() + OFFSETOF__CORINFO_Array__length);
-                    SequenceLocal(gtLclFld->AsLclVarCommon());
-                    *use           = gtLclFld;
-                    m_stmtModified = true;
-                    INDEBUG(TopValue(0).Consume());
-                    PopValue();
-                    break;
-                }
-
-                EscapeValue(TopValue(0), node);
-                PopValue();
-                break;
-            }
-
             default:
                 while (TopValue(0).Node() != node)
                 {
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index 9dc91d0865e7ca..2e212085da8a92 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -2820,11 +2820,6 @@ void Compiler::StructPromotionHelper::PromoteStructVar(unsigned lclNum)
             fieldVarDsc->SetIsNeverNegative(true);
         }
 
-        if (varDsc->lvStackAllocatedArray == 1 && fieldVarDsc->lvFldOffset == OFFSETOF__CORINFO_Array__length)
-        {
-            fieldVarDsc->SetIsNeverNegative(true);
-        }
-
         // This new local may be the first time we've seen a long typed local.
         if (fieldVarDsc->lvType == TYP_LONG)
         {
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 02e6dd2dab423f..8f42dd8743ce4e 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -3571,7 +3571,6 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr)
     {
         indexAddr->Arr()   = fgMorphTree(indexAddr->Arr());
         indexAddr->Index() = fgMorphTree(indexAddr->Index());
-
         indexAddr->AddAllEffectsFlags(indexAddr->Arr(), indexAddr->Index());
 
         // Mark the indirection node as needing a range check if necessary.
@@ -3603,32 +3602,6 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr)
     GenTree*          indexDefn   = nullptr; // non-NULL if we need to allocate a temp for the index expression
     GenTreeBoundsChk* boundsCheck = nullptr;
 
-    if (arrRef->IsAnyLocal() && lvaGetDesc(arrRef->AsLclVarCommon()->GetLclNum())->lvStackAllocatedArray == 1)
-    {
-        index = fgMorphTree(index);
-        if (index->IsCnsIntOrI())
-        {
-            ssize_t offset = static_cast<ssize_t>(elemOffs) + index->AsIntCon()->IconValue() * elemSize;
-
-            // For stack allocated arrays the local size is the size of the entire array
-            if (!indexAddr->IsBoundsChecked() ||
-                (static_cast<ssize_t>(elemOffs) <= offset &&
-                 offset < static_cast<ssize_t>(lvaLclExactSize(arrRef->AsLclVarCommon()->GetLclNum()))))
-            {
-                if (FitsIn<unsigned>(offset))
-                {
-                    return fgMorphTree(gtNewOperNode(GT_ADD, indexAddr->TypeGet(), arrRef, gtNewIconNode(offset)));
-                }
-            }
-            else
-            {
-                return fgMorphTree(gtNewOperNode(GT_COMMA, indexAddr->TypeGet(),
-                                                 gtNewHelperCallNode(CORINFO_HELP_RNGCHKFAIL, TYP_VOID),
-                                                 gtNewIconNode(0, TYP_BYREF)));
-            }
-        }
-    }
-
     // If we're doing range checking, introduce a GT_BOUNDS_CHECK node for the address.
     if (indexAddr->IsBoundsChecked())
     {
@@ -3660,7 +3633,7 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr)
         }
 
         if (((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) != 0) ||
-            gtComplexityExceeds(index, MAX_INDEX_COMPLEXITY) || index->OperIs(GT_LCL_FLD) ||
+            gtComplexityExceeds(index, MAX_ARR_COMPLEXITY) || index->OperIs(GT_LCL_FLD) ||
             (index->OperIs(GT_LCL_VAR) && lvaIsLocalImplicitlyAccessedByRef(index->AsLclVar()->GetLclNum())))
         {
             unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("index expr"));
@@ -7782,12 +7755,6 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
                     return iconNode;
                 }
             }
-            if (op1->IsAnyLocal() && lvaGetDesc(op1->AsLclVarCommon())->lvStackAllocatedArray == 1)
-            {
-                return fgMorphTree(
-                    gtNewLclFldNode(op1->AsLclVarCommon()->GetLclNum(), tree->TypeGet(),
-                                    op1->AsLclVarCommon()->GetLclOffs() + OFFSETOF__CORINFO_Array__length));
-            }
             break;
 
         case GT_IND:
diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 672e044a1c6d25..ef398f51dea1f7 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -457,6 +457,7 @@ bool ObjectAllocator::MorphAllocObjNodes()
             if (allocType != OAT_NONE)
             {
                 bool         canStack     = false;
+                bool         bashCall     = false;
                 const char*  onHeapReason = nullptr;
                 unsigned int lclNum       = stmtExpr->AsLclVar()->GetLclNum();
 
@@ -537,7 +538,10 @@ bool ObjectAllocator::MorphAllocObjNodes()
                                 MorphNewArrNodeIntoStackAlloc(data->AsCall(), clsHnd,
                                                               (unsigned int)len->AsIntCon()->IconValue(), blockSize,
                                                               block, stmt);
-                            m_HeapLocalToStackLocalMap.AddOrUpdate(lclNum, stackLclNum);
+
+                            // Note we do not want to rewrite uses of the array temp, so we
+                            // do not update m_HeapLocalToStackLocalMap.
+                            //
                             comp->Metrics.StackAllocatedArrays++;
                         }
                     }
@@ -594,6 +598,8 @@ bool ObjectAllocator::MorphAllocObjNodes()
                             {
                                 comp->Metrics.StackAllocatedRefClasses++;
                             }
+
+                            bashCall = true;
                         }
                     }
                 }
@@ -605,7 +611,12 @@ bool ObjectAllocator::MorphAllocObjNodes()
                     // sets.
                     MarkLclVarAsDefinitelyStackPointing(lclNum);
                     MarkLclVarAsPossiblyStackPointing(lclNum);
-                    stmt->GetRootNode()->gtBashToNOP();
+
+                    if (bashCall)
+                    {
+                        stmt->GetRootNode()->gtBashToNOP();
+                    }
+
                     comp->optMethodFlags |= OMF_HAS_OBJSTACKALLOC;
                     didStackAllocate = true;
                 }
@@ -687,8 +698,7 @@ GenTree* ObjectAllocator::MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* alloc
 }
 
 //------------------------------------------------------------------------
-// MorphNewArrNodeIntoStackAlloc: Morph a GT_CALL CORINFO_HELP_NEWARR_1_VC
-//                                node into stack allocation.
+// MorphNewArrNodeIntoStackAlloc: Morph a newarray helper call node into stack allocation.
 //
 // Arguments:
 //    newArr       - GT_CALL that will be replaced by helper call.
@@ -756,35 +766,19 @@ unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall*
     lclDsc->lvStructDoubleAlign = alignTo8;
 #endif
 
-    // Initialize the vtable slot.
+    // Mark the newarr call as being "on stack", and associate the local
     //
-    //------------------------------------------------------------------------
-    // STMTx (IL 0x... ???)
-    //   * STORE_LCL_FLD    long
-    //   \--*  CNS_INT(h) long
-    //------------------------------------------------------------------------
-
-    // Initialize the method table pointer.
-    GenTree*   init     = comp->gtNewStoreLclFldNode(lclNum, TYP_I_IMPL, 0, newArr->gtArgs.GetArgByIndex(0)->GetNode());
-    Statement* initStmt = comp->gtNewStmt(init);
-
-    comp->fgInsertStmtBefore(block, stmt, initStmt);
+    newArr->gtCallMoreFlags |= GTF_CALL_M_STACK_ARRAY;
+    newArr->gtNewArrStackLcl = lclNum;
 
-    // Initialize the array length.
+    // Retype the call result as an unmanaged pointer
     //
-    //------------------------------------------------------------------------
-    // STMTx (IL 0x... ???)
-    //   * STORE_LCL_FLD    int
-    //   \--*  CNS_INT    int
-    //------------------------------------------------------------------------
+    newArr->ChangeType(TYP_I_IMPL);
+    newArr->gtReturnType = TYP_I_IMPL;
 
-    // Pass the total length of the array.
-    GenTree*   len     = comp->gtNewStoreLclFldNode(lclNum, TYP_INT, OFFSETOF__CORINFO_Array__length,
-                                                    comp->gtNewIconNode(length, TYP_INT));
-    Statement* lenStmt = comp->gtNewStmt(len);
-    comp->fgInsertStmtBefore(block, stmt, lenStmt);
-
-    comp->lvaGetDesc(lclNum)->lvStackAllocatedArray = 1;
+    // Note that we have stack allocated arrays in this method
+    //
+    comp->setMethodHasStackAllocatedArray();
 
     return lclNum;
 }

From 5137a5cf9d8e184b28af7548c7dec8a6c336be03 Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Fri, 10 Jan 2025 11:45:34 -0800
Subject: [PATCH 65/75] basic VN support

---
 src/coreclr/jit/valuenum.cpp    | 48 +++++++++++++++++++++++++++++++--
 src/coreclr/jit/valuenum.h      |  3 +++
 src/coreclr/jit/valuenumfuncs.h |  2 ++
 3 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp
index 60ac86cf8e54e6..fd13668c405e8d 100644
--- a/src/coreclr/jit/valuenum.cpp
+++ b/src/coreclr/jit/valuenum.cpp
@@ -6245,7 +6245,19 @@ void Compiler::fgValueNumberArrayElemLoad(GenTree* loadTree, VNFuncApp* addrFunc
     ValueNum  loadValueVN = vnStore->VNForLoad(VNK_Liberal, wholeElem, elemSize, loadType, offset, loadSize);
 
     loadTree->gtVNPair.SetLiberal(loadValueVN);
-    loadTree->gtVNPair.SetConservative(vnStore->VNForExpr(compCurBB, loadType));
+
+    // If this is a local array, there are no asyncronous modifications, so we can set the
+    // conservative VN to the liberal VN.
+    //
+    VNFuncApp arrFn;
+    if (vnStore->IsVNNewLocalArr(arrVN, &arrFn))
+    {
+        loadTree->gtVNPair.SetConservative(loadValueVN);
+    }
+    else
+    {
+        loadTree->gtVNPair.SetConservative(vnStore->VNForExpr(compCurBB, loadType));
+    }
 }
 
 //------------------------------------------------------------------------
@@ -7359,7 +7371,22 @@ bool ValueNumStore::IsVNNewArr(ValueNum vn, VNFuncApp* funcApp)
     bool result = false;
     if (GetVNFunc(vn, funcApp))
     {
-        result = (funcApp->m_func == VNF_JitNewArr) || (funcApp->m_func == VNF_JitReadyToRunNewArr);
+        result = (funcApp->m_func == VNF_JitNewArr) || (funcApp->m_func == VNF_JitNewLclArr) ||
+                 (funcApp->m_func == VNF_JitReadyToRunNewArr) || (funcApp->m_func == VNF_JitReadyToRunNewLclArr);
+    }
+    return result;
+}
+
+bool ValueNumStore::IsVNNewLocalArr(ValueNum vn, VNFuncApp* funcApp)
+{
+    if (vn == NoVN)
+    {
+        return false;
+    }
+    bool result = false;
+    if (GetVNFunc(vn, funcApp))
+    {
+        result = (funcApp->m_func == VNF_JitNewLclArr) || (funcApp->m_func == VNF_JitReadyToRunNewLclArr);
     }
     return result;
 }
@@ -13489,6 +13516,7 @@ void Compiler::fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueN
         break;
 
         case VNF_JitNewArr:
+        case VNF_JitNewLclArr:
         {
             generateUniqueVN  = true;
             ValueNumPair vnp1 = vnStore->VNPNormalPair(args->GetArgByIndex(1)->GetNode()->gtVNPair);
@@ -13527,6 +13555,7 @@ void Compiler::fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueN
         break;
 
         case VNF_JitReadyToRunNewArr:
+        case VNF_JitReadyToRunNewLclArr:
         {
             generateUniqueVN  = true;
             ValueNumPair vnp1 = vnStore->VNPNormalPair(args->GetArgByIndex(0)->GetNode()->gtVNPair);
@@ -14338,7 +14367,22 @@ bool Compiler::fgValueNumberHelperCall(GenTreeCall* call)
                 }
             }
 
+            if (isAlloc && ((call->gtCallMoreFlags & GTF_CALL_M_STACK_ARRAY) != 0))
+            {
+                if (vnf == VNF_JitNewArr)
+                {
+                    vnf = VNF_JitNewLclArr;
+                    // modHeap = false;
+                }
+                else if (vnf == VNF_JitReadyToRunNewArr)
+                {
+                    vnf = VNF_JitReadyToRunNewLclArr;
+                    // modHeap = false;
+                }
+            }
+
             fgValueNumberHelperCallFunc(call, vnf, vnpExc);
+
             return modHeap;
         }
         else
diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h
index ec7b6fec04a5bf..f2ae7be7a4cabe 100644
--- a/src/coreclr/jit/valuenum.h
+++ b/src/coreclr/jit/valuenum.h
@@ -1150,6 +1150,9 @@ class ValueNumStore
     // Check if "vn" is "new [] (type handle, size)"
     bool IsVNNewArr(ValueNum vn, VNFuncApp* funcApp);
 
+    // Check if "vn" is "new [] (type handle, size) [stack allocated]"
+    bool IsVNNewLocalArr(ValueNum vn, VNFuncApp* funcApp);
+
     // Check if "vn" IsVNNewArr and return false if arr size cannot be determined.
     bool TryGetNewArrSize(ValueNum vn, int* size);
 
diff --git a/src/coreclr/jit/valuenumfuncs.h b/src/coreclr/jit/valuenumfuncs.h
index 2db7c4ffa6c9b0..227f4f26b11e9b 100644
--- a/src/coreclr/jit/valuenumfuncs.h
+++ b/src/coreclr/jit/valuenumfuncs.h
@@ -149,9 +149,11 @@ ValueNumFuncDef(GetStaticAddrTLS, 1, false, true, false)
 
 ValueNumFuncDef(JitNew, 2, false, true, false)
 ValueNumFuncDef(JitNewArr, 3, false, true, false)
+ValueNumFuncDef(JitNewLclArr, 3, false, true, false)
 ValueNumFuncDef(JitNewMdArr, 4, false, true, false)
 ValueNumFuncDef(JitReadyToRunNew, 2, false, true, false)
 ValueNumFuncDef(JitReadyToRunNewArr, 3, false, true, false)
+ValueNumFuncDef(JitReadyToRunNewLclArr, 3, false, true, false)
 ValueNumFuncDef(Box, 3, false, true, false)
 ValueNumFuncDef(BoxNullable, 3, false, false, false)
 

From aa091873316d5b286d7dcc061ef706919364b62e Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Sat, 11 Jan 2025 10:37:11 -0800
Subject: [PATCH 66/75] restore complexity change

---
 src/coreclr/jit/morph.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 8f42dd8743ce4e..6c814fd59b4b1f 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -3633,7 +3633,7 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr)
         }
 
         if (((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) != 0) ||
-            gtComplexityExceeds(index, MAX_ARR_COMPLEXITY) || index->OperIs(GT_LCL_FLD) ||
+            gtComplexityExceeds(index, MAX_INDEX_COMPLEXITY) || index->OperIs(GT_LCL_FLD) ||
             (index->OperIs(GT_LCL_VAR) && lvaIsLocalImplicitlyAccessedByRef(index->AsLclVar()->GetLclNum())))
         {
             unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("index expr"));

From c451435446fa68f16ea9d49a8b74f18f122a5758 Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Tue, 14 Jan 2025 11:25:41 -0800
Subject: [PATCH 67/75] pass address of stack local to new helper

---
 src/coreclr/jit/gentree.cpp         |  2 ++
 src/coreclr/jit/gentree.h           |  2 +-
 src/coreclr/jit/helperexpansion.cpp | 11 ++++++-----
 src/coreclr/jit/objectalloc.cpp     |  6 ++++--
 4 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 817fe5968dfab7..9da9c2f0d3cc29 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -13178,6 +13178,8 @@ const char* Compiler::gtGetWellKnownArgNameForArgMsg(WellKnownArg arg)
             return "swift self";
         case WellKnownArg::X86TailCallSpecialArg:
             return "tail call";
+        case WellKnownArg::StackArrayLocal:
+            return "stack array local";
         default:
             return nullptr;
     }
diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h
index 17844a5d683819..c84d0ee0332828 100644
--- a/src/coreclr/jit/gentree.h
+++ b/src/coreclr/jit/gentree.h
@@ -4567,6 +4567,7 @@ enum class WellKnownArg : unsigned
     SwiftError,
     SwiftSelf,
     X86TailCallSpecialArg,
+    StackArrayLocal,
 };
 
 #ifdef DEBUG
@@ -5743,7 +5744,6 @@ struct GenTreeCall final : public GenTree
         void*                gtStubCallStubAddr;   // GTF_CALL_VIRT_STUB - these are never inlined
         CORINFO_CLASS_HANDLE gtInitClsHnd;         // Used by static init helpers, represents a class they init
         IL_OFFSET            gtCastHelperILOffset; // Used by cast helpers to save corresponding IL offset
-        unsigned             gtNewArrStackLcl; // GTF_CALL_M_STACK_ARRAY - local used for stack allocated array storage
     };
 
     union
diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp
index b7ceb82315b756..16be7a9cb9bc6b 100644
--- a/src/coreclr/jit/helperexpansion.cpp
+++ b/src/coreclr/jit/helperexpansion.cpp
@@ -2812,10 +2812,12 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt,
         return false;
     }
 
-    const unsigned lclNum = call->gtNewArrStackLcl;
+    CallArg* const             stackLocalAddressArg  = call->gtArgs.FindWellKnownArg(WellKnownArg::StackArrayLocal);
+    GenTreeLclVarCommon* const stackLocalAddressNode = stackLocalAddressArg->GetNode()->AsLclVarCommon();
+    const unsigned             lclNum                = stackLocalAddressNode->GetLclNum();
 
-    JITDUMP("Expanding new array helper for stack allocated array [%06d] in " FMT_BB ":\n", dspTreeID(call),
-            block->bbNum);
+    JITDUMP("Expanding new array helper for stack allocated array V%20u [%06d] in " FMT_BB ":\n", dspTreeID(call),
+            lclNum, block->bbNum);
     DISPTREE(call);
     JITDUMP("\n");
 
@@ -2852,8 +2854,7 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt,
 
     // Replace call with &lclNum.
     //
-    GenTreeLclVarCommon* const lclNode = gtNewLclAddrNode(lclNum, 0);
-    *callUse                           = lclNode;
+    *callUse = stackLocalAddressNode;
     DEBUG_DESTROY_NODE(call);
 
     gtUpdateStmtSideEffects(stmt);
diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index ef398f51dea1f7..37f4e2ced52ed9 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -766,10 +766,12 @@ unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall*
     lclDsc->lvStructDoubleAlign = alignTo8;
 #endif
 
-    // Mark the newarr call as being "on stack", and associate the local
+    // Mark the newarr call as being "on stack", and add the address
+    // of the stack local as an argument
     //
+    GenTree* const stackLocalAddr = comp->gtNewLclAddrNode(lclNum, 0);
+    newArr->gtArgs.PushBack(comp, NewCallArg::Primitive(stackLocalAddr).WellKnown(WellKnownArg::StackArrayLocal));
     newArr->gtCallMoreFlags |= GTF_CALL_M_STACK_ARRAY;
-    newArr->gtNewArrStackLcl = lclNum;
 
     // Retype the call result as an unmanaged pointer
     //

From f6b012a5954954dd1346f0d0b4dcdd3242fe9110 Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Wed, 15 Jan 2025 12:59:45 -0800
Subject: [PATCH 68/75] temp hack to boost SPMI coverage

---
 src/coreclr/jit/helperexpansion.cpp | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp
index 16be7a9cb9bc6b..b2e7e0d23c63f2 100644
--- a/src/coreclr/jit/helperexpansion.cpp
+++ b/src/coreclr/jit/helperexpansion.cpp
@@ -2833,7 +2833,19 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt,
     // Initialize the array method table pointer.
     //
     CORINFO_CLASS_HANDLE arrayHnd = (CORINFO_CLASS_HANDLE)call->compileTimeHelperArgumentHandle;
-    GenTree* const       mt       = gtNewIconEmbClsHndNode(arrayHnd);
+
+    // Hack to reduce SPMI failures
+    GenTree* mt = nullptr;
+
+    if (opts.IsReadyToRun())
+    {
+        mt = gtNewIconEmbClsHndNode(arrayHnd);
+    }
+    else
+    {
+        mt = gtNewIconHandleNode((size_t)arrayHnd, GTF_ICON_CLASS_HDL);
+    }
+
     GenTree* const       mtStore  = gtNewStoreLclFldNode(lclNum, TYP_I_IMPL, 0, mt);
     Statement* const     mtStmt   = gtNewStmt(mtStore);
 

From a6e7bd5e4436c9482c6c1b2e5fea0fd994ccb277 Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Wed, 15 Jan 2025 13:55:58 -0800
Subject: [PATCH 69/75] avoid pessimizing tail calls. implement configurable
 size limit

---
 src/coreclr/jit/compiler.h          |  8 ++++----
 src/coreclr/jit/helperexpansion.cpp |  4 ++--
 src/coreclr/jit/jitconfigvalues.h   |  1 +
 src/coreclr/jit/lclvars.cpp         |  4 ++--
 src/coreclr/jit/morph.cpp           |  5 +++++
 src/coreclr/jit/objectalloc.cpp     | 15 ++++++++-------
 src/coreclr/jit/objectalloc.h       |  7 ++++---
 7 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 93800ba08fc356..ccd98db38aa543 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -691,8 +691,8 @@ class LclVarDsc
     unsigned char lvSingleDefDisqualifyReason = 'H';
 #endif
 
-    unsigned char lvAllDefsAreNoGc    : 1; // For pinned locals: true if all defs of this local are no-gc
-    unsigned char lvStackAllocatedBox : 1; // Local is a stack allocated box
+    unsigned char lvAllDefsAreNoGc       : 1; // For pinned locals: true if all defs of this local are no-gc
+    unsigned char lvStackAllocatedObject : 1; // Local is a stack allocated object (class, box, array, ...)
 
 #if FEATURE_MULTIREG_ARGS
     regNumber lvRegNumForSlot(unsigned slotNum)
@@ -807,9 +807,9 @@ class LclVarDsc
         return lvIsMultiRegArg || lvIsMultiRegRet;
     }
 
-    bool IsStackAllocatedBox() const
+    bool IsStackAllocatedObject() const
     {
-        return lvStackAllocatedBox;
+        return lvStackAllocatedObject;
     }
 
 #if defined(DEBUG)
diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp
index b2e7e0d23c63f2..d2d933678b7152 100644
--- a/src/coreclr/jit/helperexpansion.cpp
+++ b/src/coreclr/jit/helperexpansion.cpp
@@ -2846,8 +2846,8 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt,
         mt = gtNewIconHandleNode((size_t)arrayHnd, GTF_ICON_CLASS_HDL);
     }
 
-    GenTree* const       mtStore  = gtNewStoreLclFldNode(lclNum, TYP_I_IMPL, 0, mt);
-    Statement* const     mtStmt   = gtNewStmt(mtStore);
+    GenTree* const   mtStore = gtNewStoreLclFldNode(lclNum, TYP_I_IMPL, 0, mt);
+    Statement* const mtStmt  = gtNewStmt(mtStore);
 
     fgInsertStmtBefore(block, newStmt, mtStmt);
     gtSetStmtInfo(mtStmt);
diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h
index dd116bcc43b950..134d5ffc99ca00 100644
--- a/src/coreclr/jit/jitconfigvalues.h
+++ b/src/coreclr/jit/jitconfigvalues.h
@@ -669,6 +669,7 @@ RELEASE_CONFIG_INTEGER(JitObjectStackAllocation, "JitObjectStackAllocation", 1)
 RELEASE_CONFIG_INTEGER(JitObjectStackAllocationRefClass, "JitObjectStackAllocationRefClass", 1)
 RELEASE_CONFIG_INTEGER(JitObjectStackAllocationBoxedValueClass, "JitObjectStackAllocationBoxedValueClass", 1)
 RELEASE_CONFIG_INTEGER(JitObjectStackAllocationArray, "JitObjectStackAllocationArray", 1)
+RELEASE_CONFIG_INTEGER(JitObjectStackAllocationSize, "JitObjectStackAllocationSize", 528)
 
 RELEASE_CONFIG_INTEGER(JitEECallTimingInfo, "JitEECallTimingInfo", 0)
 
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index 5880c3520cd8e1..8d4c6751bde9e0 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -2501,9 +2501,9 @@ bool Compiler::StructPromotionHelper::CanPromoteStructVar(unsigned lclNum)
         return false;
     }
 
-    if (varDsc->lvStackAllocatedBox)
+    if (varDsc->lvStackAllocatedObject)
     {
-        JITDUMP("  struct promotion of V%02u is disabled because it is a stack allocated box\n", lclNum);
+        JITDUMP("  struct promotion of V%02u is disabled because it is a stack allocated object\n", lclNum);
         return false;
     }
 
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 019df0407dea21..7653cfba280fa2 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -5142,6 +5142,11 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call)
                     assert(lvaIsImplicitByRefLocal(lvaTable[varDsc->lvFieldLclStart].lvParentLcl));
                     assert(fgGlobalMorph);
                 }
+                else if (varDsc->IsStackAllocatedObject())
+                {
+                    // Stack allocated objects currently cannot be passed to callees
+                    // so won't be live at tail call sites.
+                }
 #if FEATURE_FIXED_OUT_ARGS
                 else if (varNum == lvaOutgoingArgSpaceVar)
                 {
diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 37f4e2ced52ed9..97ea8777c65f8b 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -730,6 +730,7 @@ unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall*
     const bool         shortLifetime = false;
     const bool         alignTo8      = newArr->GetHelperNum() == CORINFO_HELP_NEWARR_1_ALIGN8;
     const unsigned int lclNum        = comp->lvaGrabTemp(shortLifetime DEBUGARG("stack allocated array temp"));
+    LclVarDsc* const   lclDsc        = comp->lvaGetDesc(lclNum);
 
     if (alignTo8)
     {
@@ -737,11 +738,11 @@ unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall*
     }
 
     comp->lvaSetStruct(lclNum, comp->typGetBlkLayout(blockSize), /* unsafeValueClsCheck */ false);
+    lclDsc->lvStackAllocatedObject = true;
 
     // Initialize the object memory if necessary.
-    bool             bbInALoop  = block->HasFlag(BBF_BACKWARD_JUMP);
-    bool             bbIsReturn = block->KindIs(BBJ_RETURN);
-    LclVarDsc* const lclDsc     = comp->lvaGetDesc(lclNum);
+    bool bbInALoop  = block->HasFlag(BBF_BACKWARD_JUMP);
+    bool bbIsReturn = block->KindIs(BBJ_RETURN);
     if (comp->fgVarNeedsExplicitZeroInit(lclNum, bbInALoop, bbIsReturn))
     {
         //------------------------------------------------------------------------
@@ -815,10 +816,10 @@ unsigned int ObjectAllocator::MorphAllocObjNodeIntoStackAlloc(
     comp->lvaSetStruct(lclNum, clsHnd, /* unsafeValueClsCheck */ false);
 
     // Initialize the object memory if necessary.
-    bool             bbInALoop  = block->HasFlag(BBF_BACKWARD_JUMP);
-    bool             bbIsReturn = block->KindIs(BBJ_RETURN);
-    LclVarDsc* const lclDsc     = comp->lvaGetDesc(lclNum);
-    lclDsc->lvStackAllocatedBox = isValueClass;
+    bool             bbInALoop     = block->HasFlag(BBF_BACKWARD_JUMP);
+    bool             bbIsReturn    = block->KindIs(BBJ_RETURN);
+    LclVarDsc* const lclDsc        = comp->lvaGetDesc(lclNum);
+    lclDsc->lvStackAllocatedObject = true;
     if (comp->fgVarNeedsExplicitZeroInit(lclNum, bbInALoop, bbIsReturn))
     {
         //------------------------------------------------------------------------
diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h
index cbab18c4b4b9f6..6ba4e3c966c230 100644
--- a/src/coreclr/jit/objectalloc.h
+++ b/src/coreclr/jit/objectalloc.h
@@ -41,6 +41,7 @@ class ObjectAllocator final : public Phase
     BitVec              m_DefinitelyStackPointingPointers;
     LocalToLocalMap     m_HeapLocalToStackLocalMap;
     BitSetShortLongRep* m_ConnGraphAdjacencyMatrix;
+    unsigned int        m_StackAllocMaxSize;
 
     //===============================================================================
     // Methods
@@ -84,8 +85,6 @@ class ObjectAllocator final : public Phase
     struct BuildConnGraphVisitorCallbackData;
     bool CanLclVarEscapeViaParentStack(ArrayStack<GenTree*>* parentStack, unsigned int lclNum);
     void UpdateAncestorTypes(GenTree* tree, ArrayStack<GenTree*>* parentStack, var_types newType);
-
-    static const unsigned int s_StackAllocMaxSize = 0x2000U;
 };
 
 //===============================================================================
@@ -101,6 +100,8 @@ inline ObjectAllocator::ObjectAllocator(Compiler* comp)
     m_PossiblyStackPointingPointers   = BitVecOps::UninitVal();
     m_DefinitelyStackPointingPointers = BitVecOps::UninitVal();
     m_ConnGraphAdjacencyMatrix        = nullptr;
+
+    m_StackAllocMaxSize = (unsigned)JitConfig.JitObjectStackAllocationSize();
 }
 
 //------------------------------------------------------------------------
@@ -227,7 +228,7 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int         lclNu
         return false;
     }
 
-    if (classSize > s_StackAllocMaxSize)
+    if (classSize > m_StackAllocMaxSize)
     {
         *reason = "[too large]";
         return false;

From b3edc0707aa4a5e86e8a4949d8a0c040a50cd666 Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Wed, 15 Jan 2025 14:48:04 -0800
Subject: [PATCH 70/75] add missing well known arg string

---
 src/coreclr/jit/morph.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 7653cfba280fa2..b7bfc5bab5e410 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -780,6 +780,8 @@ const char* getWellKnownArgName(WellKnownArg arg)
             return "SwiftSelf";
         case WellKnownArg::X86TailCallSpecialArg:
             return "X86TailCallSpecialArg";
+        case WellKnownArg::StackArrayLocal:
+            return "StackArrayLocal";
     }
 
     return "N/A";

From 9b63e2d029fd1e85d8edc4e7f8a090f09594eba6 Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Thu, 16 Jan 2025 13:38:33 -0800
Subject: [PATCH 71/75] fix array length check

---
 src/coreclr/jit/objectalloc.h | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h
index 6ba4e3c966c230..7703a7745d160d 100644
--- a/src/coreclr/jit/objectalloc.h
+++ b/src/coreclr/jit/objectalloc.h
@@ -168,7 +168,7 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int         lclNu
             return false;
         }
 
-        if (length < 0 || !FitsIn<int>(length))
+        if ((length < 0) || (length > CORINFO_Array_MaxLength))
         {
             *reason = "[invalid array length]";
             return false;
@@ -178,14 +178,31 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int         lclNu
         CorInfoType          corType    = comp->info.compCompHnd->getChildType(clsHnd, &elemClsHnd);
         var_types            type       = JITtype2varType(corType);
         ClassLayout*         elemLayout = type == TYP_STRUCT ? comp->typGetObjLayout(elemClsHnd) : nullptr;
+
         if (varTypeIsGC(type) || ((elemLayout != nullptr) && elemLayout->HasGCPtr()))
         {
             *reason = "[array contains gc refs]";
             return false;
         }
 
-        unsigned elemSize = elemLayout != nullptr ? elemLayout->GetSize() : genTypeSize(type);
-        classSize = static_cast<unsigned>(OFFSETOF__CORINFO_Array__data) + elemSize * static_cast<unsigned>(length);
+        const unsigned elemSize = elemLayout != nullptr ? elemLayout->GetSize() : genTypeSize(type);
+
+        if (CheckedOps::MulOverflows(elemSize, static_cast<unsigned>(length), CheckedOps::Unsigned))
+        {
+            *reason = "[overflow array length]";
+            return false;
+        }
+
+        const unsigned payloadSize = elemSize * static_cast<unsigned>(length);
+        const unsigned headerSize  = static_cast<unsigned>(OFFSETOF__CORINFO_Array__data);
+
+        if (CheckedOps::AddOverflows(headerSize, payloadSize, CheckedOps::Unsigned))
+        {
+            *reason = "[overflow array length]";
+            return false;
+        }
+
+        classSize = headerSize + payloadSize;
     }
     else if (allocType == OAT_NEWOBJ)
     {

From e5356577f9eb6a94e5b86f1b36e4235fe92f04db Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Thu, 16 Jan 2025 17:10:05 -0800
Subject: [PATCH 72/75] use ClrSafeInt directly

---
 src/coreclr/jit/objectalloc.h | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h
index 7703a7745d160d..35e704974969ac 100644
--- a/src/coreclr/jit/objectalloc.h
+++ b/src/coreclr/jit/objectalloc.h
@@ -187,22 +187,17 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int         lclNu
 
         const unsigned elemSize = elemLayout != nullptr ? elemLayout->GetSize() : genTypeSize(type);
 
-        if (CheckedOps::MulOverflows(elemSize, static_cast<unsigned>(length), CheckedOps::Unsigned))
-        {
-            *reason = "[overflow array length]";
-            return false;
-        }
-
-        const unsigned payloadSize = elemSize * static_cast<unsigned>(length);
-        const unsigned headerSize  = static_cast<unsigned>(OFFSETOF__CORINFO_Array__data);
+        ClrSafeInt<unsigned> totalSize(elemSize);
+        totalSize *= static_cast<unsigned>(length);
+        totalSize += static_cast<unsigned>(OFFSETOF__CORINFO_Array__data);
 
-        if (CheckedOps::AddOverflows(headerSize, payloadSize, CheckedOps::Unsigned))
+        if (totalSize.IsOverflow())
         {
             *reason = "[overflow array length]";
             return false;
         }
 
-        classSize = headerSize + payloadSize;
+        classSize = totalSize.Value();
     }
     else if (allocType == OAT_NEWOBJ)
     {

From 07cd310bcebe4c914c08a1846e66afb9c89b07da Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Thu, 16 Jan 2025 18:22:25 -0800
Subject: [PATCH 73/75] bypass for R2R for now, since it may inhibit prejitting

---
 src/coreclr/jit/objectalloc.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index 97ea8777c65f8b..f5333274c6c104 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -428,7 +428,7 @@ bool ObjectAllocator::MorphAllocObjNodes()
                         case CORINFO_HELP_NEWARR_1_DIRECT:
                         case CORINFO_HELP_NEWARR_1_ALIGN8:
                         {
-                            if (data->AsCall()->gtArgs.CountArgs() == 2 &&
+                            if ((data->AsCall()->gtArgs.CountArgs() == 2) &&
                                 data->AsCall()->gtArgs.GetArgByIndex(1)->GetNode()->IsCnsIntOrI())
                             {
                                 allocType = OAT_NEWARR;
@@ -438,7 +438,9 @@ bool ObjectAllocator::MorphAllocObjNodes()
 #ifdef FEATURE_READYTORUN
                         case CORINFO_HELP_READYTORUN_NEWARR_1:
                         {
-                            if (data->AsCall()->gtArgs.CountArgs() == 1 &&
+                            // Disable for R2R for now, since embedding the array type can block prejitting
+                            //
+                            if (!comp->opts.IsReadyToRun() && (data->AsCall()->gtArgs.CountArgs() == 1) &&
                                 data->AsCall()->gtArgs.GetArgByIndex(0)->GetNode()->IsCnsIntOrI())
                             {
                                 allocType = OAT_NEWARR;

From 71ebf24593ea6b3a3070306302adefb23205e98d Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Mon, 20 Jan 2025 17:30:56 -0800
Subject: [PATCH 74/75] revise helper expansion

---
 src/coreclr/jit/helperexpansion.cpp | 66 +++++++++++++----------------
 1 file changed, 29 insertions(+), 37 deletions(-)

diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp
index d2d933678b7152..95da55c7b9576f 100644
--- a/src/coreclr/jit/helperexpansion.cpp
+++ b/src/coreclr/jit/helperexpansion.cpp
@@ -2807,17 +2807,17 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt,
             return false;
     }
 
-    if ((call->gtCallMoreFlags & GTF_CALL_M_STACK_ARRAY) == 0)
+    // If this is a local array, the new helper will have an arg for the array's address
+    //
+    CallArg* const stackLocalAddressArg = call->gtArgs.FindWellKnownArg(WellKnownArg::StackArrayLocal);
+
+    if (stackLocalAddressArg == nullptr)
     {
         return false;
     }
 
-    CallArg* const             stackLocalAddressArg  = call->gtArgs.FindWellKnownArg(WellKnownArg::StackArrayLocal);
-    GenTreeLclVarCommon* const stackLocalAddressNode = stackLocalAddressArg->GetNode()->AsLclVarCommon();
-    const unsigned             lclNum                = stackLocalAddressNode->GetLclNum();
-
-    JITDUMP("Expanding new array helper for stack allocated array V%20u [%06d] in " FMT_BB ":\n", dspTreeID(call),
-            lclNum, block->bbNum);
+    JITDUMP("Expanding new array helper for stack allocated array at [%06d] in " FMT_BB ":\n", dspTreeID(call),
+            block->bbNum);
     DISPTREE(call);
     JITDUMP("\n");
 
@@ -2825,53 +2825,45 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt,
     GenTree**  callUse = nullptr;
     bool       split   = gtSplitTree(block, stmt, call, &newStmt, &callUse);
 
-    if (!split)
+    if (split)
     {
-        newStmt = stmt;
+        while ((newStmt != nullptr) && (newStmt != stmt))
+        {
+            fgMorphStmtBlockOps(block, newStmt);
+            newStmt = newStmt->GetNextStmt();
+        }
     }
 
+    GenTree* const stackLocalAddress = stackLocalAddressArg->GetNode();
+
     // Initialize the array method table pointer.
     //
     CORINFO_CLASS_HANDLE arrayHnd = (CORINFO_CLASS_HANDLE)call->compileTimeHelperArgumentHandle;
 
-    // Hack to reduce SPMI failures
-    GenTree* mt = nullptr;
-
-    if (opts.IsReadyToRun())
-    {
-        mt = gtNewIconEmbClsHndNode(arrayHnd);
-    }
-    else
-    {
-        mt = gtNewIconHandleNode((size_t)arrayHnd, GTF_ICON_CLASS_HDL);
-    }
-
-    GenTree* const   mtStore = gtNewStoreLclFldNode(lclNum, TYP_I_IMPL, 0, mt);
-    Statement* const mtStmt  = gtNewStmt(mtStore);
+    GenTree* const   mt      = gtNewIconEmbClsHndNode(arrayHnd);
+    GenTree* const   mtStore = gtNewStoreValueNode(TYP_I_IMPL, stackLocalAddress, mt);
+    Statement* const mtStmt  = fgNewStmtFromTree(mtStore);
 
-    fgInsertStmtBefore(block, newStmt, mtStmt);
-    gtSetStmtInfo(mtStmt);
-    fgSetStmtSeq(mtStmt);
+    fgInsertStmtBefore(block, stmt, mtStmt);
 
     // Initialize the array length.
     //
-    GenTree* const   lengthArg    = call->gtArgs.GetArgByIndex(lengthArgIndex)->GetNode();
-    GenTree* const   lengthArgInt = fgOptimizeCast(gtNewCastNode(TYP_INT, lengthArg, false, TYP_INT));
-    GenTree* const   lengthStore = gtNewStoreLclFldNode(lclNum, TYP_INT, OFFSETOF__CORINFO_Array__length, lengthArgInt);
-    Statement* const lenStmt     = gtNewStmt(lengthStore);
+    GenTree* const   lengthArg     = call->gtArgs.GetArgByIndex(lengthArgIndex)->GetNode();
+    GenTree* const   lengthArgInt  = fgOptimizeCast(gtNewCastNode(TYP_INT, lengthArg, false, TYP_INT));
+    GenTree* const   lengthAddress = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(stackLocalAddress),
+                                                   gtNewIconNode(OFFSETOF__CORINFO_Array__length, TYP_I_IMPL));
+    GenTree* const   lengthStore   = gtNewStoreValueNode(TYP_INT, lengthAddress, lengthArgInt);
+    Statement* const lenStmt       = fgNewStmtFromTree(lengthStore);
 
-    fgInsertStmtBefore(block, newStmt, lenStmt);
-    gtSetStmtInfo(lenStmt);
-    fgSetStmtSeq(lenStmt);
+    fgInsertStmtBefore(block, stmt, lenStmt);
 
-    // Replace call with &lclNum.
+    // Replace call with local address
     //
-    *callUse = stackLocalAddressNode;
+    *callUse = gtCloneExpr(stackLocalAddress);
     DEBUG_DESTROY_NODE(call);
 
+    fgMorphStmtBlockOps(block, stmt);
     gtUpdateStmtSideEffects(stmt);
-    gtSetStmtInfo(stmt);
-    fgSetStmtSeq(stmt);
 
     return true;
 }

From 97eba58c9d2f902b7b498fbf1e174a43afc024aa Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Mon, 20 Jan 2025 18:40:37 -0800
Subject: [PATCH 75/75] revise remainder

---
 src/coreclr/jit/gentree.cpp     |  2 +-
 src/coreclr/jit/gentree.h       |  2 +-
 src/coreclr/jit/objectalloc.cpp | 37 +++++++++------------------------
 3 files changed, 12 insertions(+), 29 deletions(-)

diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index e25ec7774d3862..344fb8d2c7749b 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -13193,7 +13193,7 @@ const char* Compiler::gtGetWellKnownArgNameForArgMsg(WellKnownArg arg)
         case WellKnownArg::X86TailCallSpecialArg:
             return "tail call";
         case WellKnownArg::StackArrayLocal:
-            return "stack array local";
+            return "&lcl arr";
         default:
             return nullptr;
     }
diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h
index c84d0ee0332828..6947bf173adcb4 100644
--- a/src/coreclr/jit/gentree.h
+++ b/src/coreclr/jit/gentree.h
@@ -3586,7 +3586,7 @@ class SsaNumInfo final
     }
 };
 
-// Common supertype of [STORE_]LCL_VAR, [STORE_]LCL_FLD, PHI_ARG, LCL_ADDR, FIELD_ADDR.
+// Common supertype of [STORE_]LCL_VAR, [STORE_]LCL_FLD, PHI_ARG, LCL_ADDR.
 // This inherits from UnOp because lclvar stores are unary.
 //
 struct GenTreeLclVarCommon : public GenTreeUnOp
diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp
index f5333274c6c104..8a18a39015c74e 100644
--- a/src/coreclr/jit/objectalloc.cpp
+++ b/src/coreclr/jit/objectalloc.cpp
@@ -419,7 +419,7 @@ bool ObjectAllocator::MorphAllocObjNodes()
                 {
                     allocType = OAT_NEWOBJ;
                 }
-                else if (data->IsHelperCall())
+                else if (!comp->opts.IsReadyToRun() && data->IsHelperCall())
                 {
                     switch (data->AsCall()->GetHelperNum())
                     {
@@ -428,26 +428,14 @@ bool ObjectAllocator::MorphAllocObjNodes()
                         case CORINFO_HELP_NEWARR_1_DIRECT:
                         case CORINFO_HELP_NEWARR_1_ALIGN8:
                         {
-                            if ((data->AsCall()->gtArgs.CountArgs() == 2) &&
-                                data->AsCall()->gtArgs.GetArgByIndex(1)->GetNode()->IsCnsIntOrI())
+                            if ((data->AsCall()->gtArgs.CountUserArgs() == 2) &&
+                                data->AsCall()->gtArgs.GetUserArgByIndex(1)->GetNode()->IsCnsIntOrI())
                             {
                                 allocType = OAT_NEWARR;
                             }
                             break;
                         }
-#ifdef FEATURE_READYTORUN
-                        case CORINFO_HELP_READYTORUN_NEWARR_1:
-                        {
-                            // Disable for R2R for now, since embedding the array type can block prejitting
-                            //
-                            if (!comp->opts.IsReadyToRun() && (data->AsCall()->gtArgs.CountArgs() == 1) &&
-                                data->AsCall()->gtArgs.GetArgByIndex(0)->GetNode()->IsCnsIntOrI())
-                            {
-                                allocType = OAT_NEWARR;
-                            }
-                            break;
-                        }
-#endif
+
                         default:
                         {
                             break;
@@ -480,6 +468,11 @@ bool ObjectAllocator::MorphAllocObjNodes()
                     if (allocType == OAT_NEWARR)
                     {
                         assert(basicBlockHasNewArr);
+
+                        // R2R not yet supported
+                        //
+                        assert(!comp->opts.IsReadyToRun());
+
                         //------------------------------------------------------------------------
                         // We expect the following expression tree at this point
                         // For non-ReadyToRun:
@@ -499,17 +492,7 @@ bool ObjectAllocator::MorphAllocObjNodes()
                         bool                 isNonNull = false;
                         CORINFO_CLASS_HANDLE clsHnd =
                             comp->gtGetHelperCallClassHandle(data->AsCall(), &isExact, &isNonNull);
-                        GenTree* len = nullptr;
-#ifdef FEATURE_READYTORUN
-                        if (comp->opts.IsReadyToRun() && data->IsHelperCall(comp, CORINFO_HELP_READYTORUN_NEWARR_1))
-                        {
-                            len = data->AsCall()->gtArgs.GetArgByIndex(0)->GetNode();
-                        }
-                        else
-#endif
-                        {
-                            len = data->AsCall()->gtArgs.GetArgByIndex(1)->GetNode();
-                        }
+                        GenTree* const len = data->AsCall()->gtArgs.GetUserArgByIndex(1)->GetNode();
 
                         assert(len != nullptr);