Skip to content

Commit

Permalink
[Backport] 8316126: [Lilliput/JDK21] Cherry-pick: 8305895: Implementa…
Browse files Browse the repository at this point in the history
…tion: JEP 450: Compact Object Headers

Summary: Backport
8316126: [Lilliput/JDK21] Cherry-pick: 8305895: Implementation: JEP 450: Compact Object Headers (Experimental)
8317954: [Lilliput/JDK21] Make C2 LoadNKlassCompactHeader more robust
8316687: [Lilliput/JDK21] Various cleanups
8319724: [Lilliput] ParallelGC: Forwarded objects found during heap inspection
8316424: [Lilliput/JDK21] ZGC/CDS-related test fixes
And make UseCompactObjectHeaders product

Testing: CICD (all jtreg)

Reviewers: mmyxym, lingjun-cg, kuaiwei

Issue: #75
  • Loading branch information
linade authored Jul 18, 2024
1 parent 059b5e1 commit 64dff31
Show file tree
Hide file tree
Showing 107 changed files with 1,447 additions and 364 deletions.
16 changes: 15 additions & 1 deletion src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -7124,7 +7124,7 @@ instruct loadKlass(iRegPNoSp dst, memory8 mem)
instruct loadNKlass(iRegNNoSp dst, memory4 mem)
%{
match(Set dst (LoadNKlass mem));
predicate(!needs_acquiring_load(n));
predicate(!needs_acquiring_load(n) && !UseCompactObjectHeaders);

ins_cost(4 * INSN_COST);
format %{ "ldrw $dst, $mem\t# compressed class ptr" %}
Expand All @@ -7134,6 +7134,20 @@ instruct loadNKlass(iRegNNoSp dst, memory4 mem)
ins_pipe(iload_reg_mem);
%}

instruct loadNKlassCompactHeaders(iRegNNoSp dst, memory4 mem, rFlagsReg cr)
%{
match(Set dst (LoadNKlass mem));
effect(KILL cr);
predicate(!needs_acquiring_load(n) && UseCompactObjectHeaders);

ins_cost(4 * INSN_COST);
format %{ "ldrw $dst, $mem\t# compressed class ptr" %}
ins_encode %{
__ load_nklass_compact($dst$$Register, $mem$$base$$Register, $mem$$index$$Register, $mem$$scale, $mem$$disp);
%}
ins_pipe(pipe_slow);
%}

// Load Float
instruct loadF(vRegF dst, memory4 mem)
%{
Expand Down
8 changes: 8 additions & 0 deletions src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "c1/c1_Runtime1.hpp"
#include "classfile/javaClasses.hpp"
#include "nativeInst_aarch64.hpp"
#include "runtime/objectMonitor.hpp"
#include "runtime/sharedRuntime.hpp"
#include "vmreg_aarch64.inline.hpp"

Expand Down Expand Up @@ -233,6 +234,13 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce) {
__ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id)));
}

void LoadKlassStub::emit_code(LIR_Assembler* ce) {
assert(UseCompactObjectHeaders, "Only use with compact object headers");
__ bind(_entry);
Register d = _result->as_register();
__ ldr(d, Address(d, OM_OFFSET_NO_MONITOR_VALUE_TAG(header)));
__ b(_continuation);
}

// Implementation of patching:
// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes)
Expand Down
31 changes: 25 additions & 6 deletions src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2352,7 +2352,9 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
// We don't know the array types are compatible
if (basic_type != T_OBJECT) {
// Simple test for basic type arrays
if (UseCompressedClassPointers) {
if (UseCompactObjectHeaders) {
__ cmp_klass(src, dst, tmp, rscratch1);
} else if (UseCompressedClassPointers) {
__ ldrw(tmp, src_klass_addr);
__ ldrw(rscratch1, dst_klass_addr);
__ cmpw(tmp, rscratch1);
Expand Down Expand Up @@ -2482,21 +2484,25 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
// but not necessarily exactly of type default_type.
Label known_ok, halt;
__ mov_metadata(tmp, default_type->constant_encoding());
if (UseCompressedClassPointers) {
if (!UseCompactObjectHeaders && UseCompressedClassPointers) {
__ encode_klass_not_null(tmp);
}

if (basic_type != T_OBJECT) {

if (UseCompressedClassPointers) {
if (UseCompactObjectHeaders) {
__ cmp_klass(dst, tmp, rscratch1);
} else if (UseCompressedClassPointers) {
__ ldrw(rscratch1, dst_klass_addr);
__ cmpw(tmp, rscratch1);
} else {
__ ldr(rscratch1, dst_klass_addr);
__ cmp(tmp, rscratch1);
}
__ br(Assembler::NE, halt);
if (UseCompressedClassPointers) {
if (UseCompactObjectHeaders) {
__ cmp_klass(src, tmp, rscratch1);
} else if (UseCompressedClassPointers) {
__ ldrw(rscratch1, src_klass_addr);
__ cmpw(tmp, rscratch1);
} else {
Expand All @@ -2505,7 +2511,9 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
}
__ br(Assembler::EQ, known_ok);
} else {
if (UseCompressedClassPointers) {
if (UseCompactObjectHeaders) {
__ cmp_klass(dst, tmp, rscratch1);
} else if (UseCompressedClassPointers) {
__ ldrw(rscratch1, dst_klass_addr);
__ cmpw(tmp, rscratch1);
} else {
Expand Down Expand Up @@ -2593,7 +2601,18 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) {
}

if (UseCompressedClassPointers) {
__ ldrw(result, Address (obj, oopDesc::klass_offset_in_bytes()));
if (UseCompactObjectHeaders) {
// Check if we can take the (common) fast path, if obj is unlocked.
__ ldr(result, Address(obj, oopDesc::mark_offset_in_bytes()));
__ tst(result, markWord::monitor_value);
__ br(Assembler::NE, *op->stub()->entry());
__ bind(*op->stub()->continuation());

// Shift to get proper narrow Klass*.
__ lsr(result, result, markWord::klass_shift);
} else {
__ ldrw(result, Address (obj, oopDesc::klass_offset_in_bytes()));
}
__ decode_klass_not_null(result);
} else {
__ ldr(result, Address (obj, oopDesc::klass_offset_in_bytes()));
Expand Down
26 changes: 16 additions & 10 deletions src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,26 +180,32 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i

void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) {
assert_different_registers(obj, klass, len);
// This assumes that all prototype bits fit in an int32_t
mov(t1, (int32_t)(intptr_t)markWord::prototype().value());
str(t1, Address(obj, oopDesc::mark_offset_in_bytes()));

if (UseCompressedClassPointers) { // Take care not to kill klass
encode_klass_not_null(t1, klass);
strw(t1, Address(obj, oopDesc::klass_offset_in_bytes()));
if (UseCompactObjectHeaders) {
ldr(t1, Address(klass, Klass::prototype_header_offset()));
str(t1, Address(obj, oopDesc::mark_offset_in_bytes()));
} else {
str(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
// This assumes that all prototype bits fit in an int32_t
mov(t1, (int32_t)(intptr_t)markWord::prototype().value());
str(t1, Address(obj, oopDesc::mark_offset_in_bytes()));

if (UseCompressedClassPointers) { // Take care not to kill klass
encode_klass_not_null(t1, klass);
strw(t1, Address(obj, oopDesc::klass_offset_in_bytes()));
} else {
str(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
}
}

if (len->is_valid()) {
strw(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
int base_offset = arrayOopDesc::length_offset_in_bytes() + BytesPerInt;
assert(!UseCompactObjectHeaders || arrayOopDesc::length_offset_in_bytes() == 8, "check length offset");
if (!is_aligned(base_offset, BytesPerWord)) {
assert(is_aligned(base_offset, BytesPerInt), "must be 4-byte aligned");
// Clear gap/first 4 bytes following the length field.
strw(zr, Address(obj, base_offset));
}
} else if (UseCompressedClassPointers) {
} else if (UseCompressedClassPointers && !UseCompactObjectHeaders) {
store_klass_gap(obj, zr);
}
}
Expand Down Expand Up @@ -322,7 +328,7 @@ void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) {
verify_oop(receiver);
// explicit null check not needed since load from [klass_offset] causes a trap
// check against inline cache
assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), "must add explicit null check");
assert(UseCompactObjectHeaders || !MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), "must add explicit null check");

cmp_klass(receiver, iCache, rscratch1);
}
Expand Down
11 changes: 11 additions & 0 deletions src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,15 @@ void C2HandleAnonOMOwnerStub::emit(C2_MacroAssembler& masm) {
__ b(continuation());
}

int C2LoadNKlassStub::max_size() const {
return 8;
}

void C2LoadNKlassStub::emit(C2_MacroAssembler& masm) {
__ bind(entry());
Register d = dst();
__ ldr(d, Address(d, OM_OFFSET_NO_MONITOR_VALUE_TAG(header)));
__ b(continuation());
}

#undef __
27 changes: 27 additions & 0 deletions src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2254,3 +2254,30 @@ bool C2_MacroAssembler::in_scratch_emit_size() {
}
return MacroAssembler::in_scratch_emit_size();
}

void C2_MacroAssembler::load_nklass_compact(Register dst, Register obj, Register index, int scale, int disp) {
C2LoadNKlassStub* stub = new (Compile::current()->comp_arena()) C2LoadNKlassStub(dst);
Compile::current()->output()->add_stub(stub);

// Note: Don't clobber obj anywhere in that method!

// The incoming address is pointing into obj-start + klass_offset_in_bytes. We need to extract
// obj-start, so that we can load from the object's mark-word instead. Usually the address
// comes as obj-start in obj and klass_offset_in_bytes in disp. However, sometimes C2
// emits code that pre-computes obj-start + klass_offset_in_bytes into a register, and
// then passes that register as obj and 0 in disp. The following code extracts the base
// and offset to load the mark-word.
int offset = oopDesc::mark_offset_in_bytes() + disp - oopDesc::klass_offset_in_bytes();
if (index == noreg) {
ldr(dst, Address(obj, offset));
} else {
lea(dst, Address(obj, index, Address::lsl(scale)));
ldr(dst, Address(dst, offset));
}
// NOTE: We can't use tbnz here, because the target is sometimes too far away
// and cannot be encoded.
tst(dst, markWord::monitor_value);
br(Assembler::NE, stub->entry());
bind(stub->continuation());
lsr(dst, dst, markWord::klass_shift);
}
2 changes: 2 additions & 0 deletions src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,4 +177,6 @@
void vector_signum_sve(FloatRegister dst, FloatRegister src, FloatRegister zero,
FloatRegister one, FloatRegister vtmp, PRegister pgtmp, SIMD_RegVariant T);

void load_nklass_compact(Register dst, Register obj, Register index, int scale, int disp);

#endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
49 changes: 47 additions & 2 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4430,8 +4430,30 @@ void MacroAssembler::load_method_holder(Register holder, Register method) {
ldr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass*
}

// Loads the obj's Klass* into dst.
// Preserves all registers (incl src, rscratch1 and rscratch2).
void MacroAssembler::load_nklass_compact(Register dst, Register src) {
assert(UseCompactObjectHeaders, "expects UseCompactObjectHeaders");

Label fast;

// Check if we can take the (common) fast path, if obj is unlocked.
ldr(dst, Address(src, oopDesc::mark_offset_in_bytes()));
tbz(dst, exact_log2(markWord::monitor_value), fast);

// Fetch displaced header
ldr(dst, Address(dst, OM_OFFSET_NO_MONITOR_VALUE_TAG(header)));

// Fast-path: shift to get narrowKlass.
bind(fast);
lsr(dst, dst, markWord::klass_shift);
}

void MacroAssembler::load_klass(Register dst, Register src) {
if (UseCompressedClassPointers) {
if (UseCompactObjectHeaders) {
load_nklass_compact(dst, src);
decode_klass_not_null(dst);
} else if (UseCompressedClassPointers) {
ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
decode_klass_not_null(dst);
} else {
Expand Down Expand Up @@ -4470,8 +4492,13 @@ void MacroAssembler::load_mirror(Register dst, Register method, Register tmp1, R
}

void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {
assert_different_registers(oop, trial_klass, tmp);
if (UseCompressedClassPointers) {
ldrw(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
if (UseCompactObjectHeaders) {
load_nklass_compact(tmp, oop);
} else {
ldrw(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
}
if (CompressedKlassPointers::base() == nullptr) {
cmp(trial_klass, tmp, LSL, CompressedKlassPointers::shift());
return;
Expand All @@ -4488,9 +4515,26 @@ void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp)
cmp(trial_klass, tmp);
}

void MacroAssembler::cmp_klass(Register src, Register dst, Register tmp1, Register tmp2) {
if (UseCompactObjectHeaders) {
load_nklass_compact(tmp1, src);
load_nklass_compact(tmp2, dst);
cmpw(tmp1, tmp2);
} else if (UseCompressedClassPointers) {
ldrw(tmp1, Address(src, oopDesc::klass_offset_in_bytes()));
ldrw(tmp2, Address(dst, oopDesc::klass_offset_in_bytes()));
cmpw(tmp1, tmp2);
} else {
ldr(tmp1, Address(src, oopDesc::klass_offset_in_bytes()));
ldr(tmp2, Address(dst, oopDesc::klass_offset_in_bytes()));
cmp(tmp1, tmp2);
}
}

void MacroAssembler::store_klass(Register dst, Register src) {
// FIXME: Should this be a store release? concurrent gcs assumes
// klass length is valid if klass field is not null.
assert(!UseCompactObjectHeaders, "not with compact headers");
if (UseCompressedClassPointers) {
encode_klass_not_null(src);
strw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
Expand All @@ -4500,6 +4544,7 @@ void MacroAssembler::store_klass(Register dst, Register src) {
}

void MacroAssembler::store_klass_gap(Register dst, Register src) {
assert(!UseCompactObjectHeaders, "not with compact headers");
if (UseCompressedClassPointers) {
// Store to klass gap in destination
strw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -849,9 +849,11 @@ class MacroAssembler: public Assembler {
void load_method_holder(Register holder, Register method);

// oop manipulations
void load_nklass_compact(Register dst, Register src);
void load_klass(Register dst, Register src);
void store_klass(Register dst, Register src);
void cmp_klass(Register oop, Register trial_klass, Register tmp);
void cmp_klass(Register src, Register dst, Register tmp1, Register tmp2);

void resolve_weak_handle(Register result, Register tmp1, Register tmp2);
void resolve_oop_handle(Register result, Register tmp1, Register tmp2);
Expand Down
28 changes: 21 additions & 7 deletions src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3568,12 +3568,22 @@ void TemplateTable::_new() {

// The object is initialized before the header. If the object size is
// zero, go directly to the header initialization.
__ sub(r3, r3, sizeof(oopDesc));
if (UseCompactObjectHeaders) {
assert(is_aligned(oopDesc::base_offset_in_bytes(), BytesPerLong), "oop base offset must be 8-byte-aligned");
__ sub(r3, r3, oopDesc::base_offset_in_bytes());
} else {
__ sub(r3, r3, sizeof(oopDesc));
}
__ cbz(r3, initialize_header);

// Initialize object fields
{
__ add(r2, r0, sizeof(oopDesc));
if (UseCompactObjectHeaders) {
assert(is_aligned(oopDesc::base_offset_in_bytes(), BytesPerLong), "oop base offset must be 8-byte-aligned");
__ add(r2, r0, oopDesc::base_offset_in_bytes());
} else {
__ add(r2, r0, sizeof(oopDesc));
}
Label loop;
__ bind(loop);
__ str(zr, Address(__ post(r2, BytesPerLong)));
Expand All @@ -3583,11 +3593,15 @@ void TemplateTable::_new() {

// initialize object header only.
__ bind(initialize_header);
__ mov(rscratch1, (intptr_t)markWord::prototype().value());
__ str(rscratch1, Address(r0, oopDesc::mark_offset_in_bytes()));
__ store_klass_gap(r0, zr); // zero klass gap for compressed oops
__ store_klass(r0, r4); // store klass last

if (UseCompactObjectHeaders) {
__ ldr(rscratch1, Address(r4, Klass::prototype_header_offset()));
__ str(rscratch1, Address(r0, oopDesc::mark_offset_in_bytes()));
} else {
__ mov(rscratch1, (intptr_t)markWord::prototype().value());
__ str(rscratch1, Address(r0, oopDesc::mark_offset_in_bytes()));
__ store_klass_gap(r0, zr); // zero klass gap for compressed oops
__ store_klass(r0, r4); // store klass last
}
{
SkipIfEqual skip(_masm, &DTraceAllocProbes, false);
// Trigger dtrace event for fastpath
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
temp_reg, temp_reg2, itable_index, L_no_such_interface);

// Reduce "estimate" such that "padding" does not drop below 8.
const ptrdiff_t estimate = 124;
const ptrdiff_t estimate = UseCompactObjectHeaders ? 132 : 124;
const ptrdiff_t codesize = __ pc() - start_pc;
slop_delta = (int)(estimate - codesize);
slop_bytes += slop_delta;
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,10 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce) {
__ b(_continuation);
}

void LoadKlassStub::emit_code(LIR_Assembler* ce) {
// Currently not needed.
Unimplemented();
}

// Call return is directly after patch word
int PatchingStub::_patch_info_offset = 0;
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,10 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce) {
__ b(_continuation);
}

void LoadKlassStub::emit_code(LIR_Assembler* ce) {
// Currently not needed.
Unimplemented();
}

// Implementation of patching:
// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes).
Expand Down
Loading

0 comments on commit 64dff31

Please sign in to comment.