Skip to content

Commit

Permalink
Merge branch 'upstream-master'
Browse files Browse the repository at this point in the history
  • Loading branch information
Datadog Syncup Service committed Oct 12, 2023
2 parents e4b2941 + c1f698d commit 19b8f24
Show file tree
Hide file tree
Showing 55 changed files with 692 additions and 252 deletions.
2 changes: 1 addition & 1 deletion make/modules/java.base/Lib.gmk
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ ifeq ($(ENABLE_FALLBACK_LINKER), true)
NAME := fallbackLinker, \
CFLAGS := $(CFLAGS_JDKLIB) $(LIBFFI_CFLAGS), \
LDFLAGS := $(LDFLAGS_JDKLIB) \
$(call SET_SHARED_LIBRARY_ORIGIN), \
$(call SET_SHARED_LIBRARY_ORIGIN), \
LIBS := $(LIBFFI_LIBS), \
LIBS_windows := $(LIBFFI_LIBS) ws2_32.lib, \
))
Expand Down
5 changes: 5 additions & 0 deletions src/hotspot/cpu/riscv/assembler_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1790,6 +1790,11 @@ enum Nf {
INSN(vlse32_v, 0b0000111, 0b110, 0b10, 0b0);
INSN(vlse64_v, 0b0000111, 0b111, 0b10, 0b0);

INSN(vsse8_v, 0b0100111, 0b000, 0b10, 0b0);
INSN(vsse16_v, 0b0100111, 0b101, 0b10, 0b0);
INSN(vsse32_v, 0b0100111, 0b110, 0b10, 0b0);
INSN(vsse64_v, 0b0100111, 0b111, 0b10, 0b0);

#undef INSN
#undef patch_VLdSt

Expand Down
7 changes: 7 additions & 0 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1289,6 +1289,13 @@ class MacroAssembler: public Assembler {
}

// vector pseudo instructions
// rotate vector register left with shift bits, 32-bit version
inline void vrole32_vi(VectorRegister vd, uint32_t shift, VectorRegister tmp_vr) {
vsrl_vi(tmp_vr, vd, 32 - shift);
vsll_vi(vd, vd, shift);
vor_vv(vd, vd, tmp_vr);
}

inline void vl1r_v(VectorRegister vd, Register rs) {
vl1re8_v(vd, rs);
}
Expand Down
141 changes: 141 additions & 0 deletions src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4277,6 +4277,142 @@ class StubGenerator: public StubCodeGenerator {
return (address) start;
}

/**
* Perform the quarter round calculations on values contained within four vector registers.
*
* @param aVec the SIMD register containing only the "a" values
* @param bVec the SIMD register containing only the "b" values
* @param cVec the SIMD register containing only the "c" values
* @param dVec the SIMD register containing only the "d" values
* @param tmp_vr temporary vector register holds intermedia values.
*/
void chacha20_quarter_round(VectorRegister aVec, VectorRegister bVec,
VectorRegister cVec, VectorRegister dVec, VectorRegister tmp_vr) {
// a += b, d ^= a, d <<<= 16
__ vadd_vv(aVec, aVec, bVec);
__ vxor_vv(dVec, dVec, aVec);
__ vrole32_vi(dVec, 16, tmp_vr);

// c += d, b ^= c, b <<<= 12
__ vadd_vv(cVec, cVec, dVec);
__ vxor_vv(bVec, bVec, cVec);
__ vrole32_vi(bVec, 12, tmp_vr);

// a += b, d ^= a, d <<<= 8
__ vadd_vv(aVec, aVec, bVec);
__ vxor_vv(dVec, dVec, aVec);
__ vrole32_vi(dVec, 8, tmp_vr);

// c += d, b ^= c, b <<<= 7
__ vadd_vv(cVec, cVec, dVec);
__ vxor_vv(bVec, bVec, cVec);
__ vrole32_vi(bVec, 7, tmp_vr);
}

/**
* int com.sun.crypto.provider.ChaCha20Cipher.implChaCha20Block(int[] initState, byte[] result)
*
* Input arguments:
* c_rarg0 - state, the starting state
* c_rarg1 - key_stream, the array that will hold the result of the ChaCha20 block function
*
* Implementation Note:
* Parallelization is achieved by loading individual state elements into vectors for N blocks.
* N depends on single vector register length.
*/
address generate_chacha20Block() {
Label L_Rounds;

__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "chacha20Block");
address start = __ pc();
__ enter();

const int states_len = 16;
const int step = 4;
const Register state = c_rarg0;
const Register key_stream = c_rarg1;
const Register tmp_addr = t0;
const Register length = t1;

// Organize vector registers in an array that facilitates
// putting repetitive opcodes into loop structures below.
const VectorRegister work_vrs[16] = {
v0, v1, v2, v3, v4, v5, v6, v7,
v8, v9, v10, v11, v12, v13, v14, v15
};
const VectorRegister tmp_vr = v16;
const VectorRegister counter_vr = v17;

{
// Put 16 here, as com.sun.crypto.providerChaCha20Cipher.KS_MAX_LEN is 1024
// in java level.
__ vsetivli(length, 16, Assembler::e32, Assembler::m1);
}

// Load from source state.
// Every element in source state is duplicated to all elements in the corresponding vector.
__ mv(tmp_addr, state);
for (int i = 0; i < states_len; i += 1) {
__ vlse32_v(work_vrs[i], tmp_addr, zr);
__ addi(tmp_addr, tmp_addr, step);
}
// Adjust counter for every individual block.
__ vid_v(counter_vr);
__ vadd_vv(work_vrs[12], work_vrs[12], counter_vr);

// Perform 10 iterations of the 8 quarter round set
{
const Register loop = t2; // share t2 with other non-overlapping usages.
__ mv(loop, 10);
__ BIND(L_Rounds);

chacha20_quarter_round(work_vrs[0], work_vrs[4], work_vrs[8], work_vrs[12], tmp_vr);
chacha20_quarter_round(work_vrs[1], work_vrs[5], work_vrs[9], work_vrs[13], tmp_vr);
chacha20_quarter_round(work_vrs[2], work_vrs[6], work_vrs[10], work_vrs[14], tmp_vr);
chacha20_quarter_round(work_vrs[3], work_vrs[7], work_vrs[11], work_vrs[15], tmp_vr);

chacha20_quarter_round(work_vrs[0], work_vrs[5], work_vrs[10], work_vrs[15], tmp_vr);
chacha20_quarter_round(work_vrs[1], work_vrs[6], work_vrs[11], work_vrs[12], tmp_vr);
chacha20_quarter_round(work_vrs[2], work_vrs[7], work_vrs[8], work_vrs[13], tmp_vr);
chacha20_quarter_round(work_vrs[3], work_vrs[4], work_vrs[9], work_vrs[14], tmp_vr);

__ sub(loop, loop, 1);
__ bnez(loop, L_Rounds);
}

// Add the original state into the end working state.
// We do this by first duplicating every element in source state array to the corresponding
// vector, then adding it to the post-loop working state.
__ mv(tmp_addr, state);
for (int i = 0; i < states_len; i += 1) {
__ vlse32_v(tmp_vr, tmp_addr, zr);
__ addi(tmp_addr, tmp_addr, step);
__ vadd_vv(work_vrs[i], work_vrs[i], tmp_vr);
}
// Add the counter overlay onto work_vrs[12] at the end.
__ vadd_vv(work_vrs[12], work_vrs[12], counter_vr);

// Store result to key stream.
{
const Register stride = t2; // share t2 with other non-overlapping usages.
// Every block occupies 64 bytes, so we use 64 as stride of the vector store.
__ mv(stride, 64);
for (int i = 0; i < states_len; i += 1) {
__ vsse32_v(work_vrs[i], key_stream, stride);
__ addi(key_stream, key_stream, step);
}
}

// Return length of output key_stream
__ slli(c_rarg0, length, 6);

__ leave();
__ ret();

return (address) start;
}

#if INCLUDE_JFR

static void jfr_prologue(address the_pc, MacroAssembler* _masm, Register thread) {
Expand Down Expand Up @@ -4496,6 +4632,11 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_md5_implCompress = generate_md5_implCompress(false, "md5_implCompress");
StubRoutines::_md5_implCompressMB = generate_md5_implCompress(true, "md5_implCompressMB");
}

if (UseChaCha20Intrinsics) {
StubRoutines::_chacha20Block = generate_chacha20Block();
}

#endif // COMPILER2_OR_JVMCI
}

Expand Down
10 changes: 10 additions & 0 deletions src/hotspot/cpu/riscv/vm_version_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,16 @@ void VM_Version::initialize() {
warning("Block zeroing is not available");
FLAG_SET_DEFAULT(UseBlockZeroing, false);
}
if (UseRVV) {
if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
FLAG_SET_DEFAULT(UseChaCha20Intrinsics, true);
}
} else if (UseChaCha20Intrinsics) {
if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
warning("Chacha20 intrinsic requires RVV instructions (not available on this CPU)");
}
FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
}

#ifdef COMPILER2
c2_initialize();
Expand Down
3 changes: 2 additions & 1 deletion src/hotspot/cpu/s390/assembler_s390.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,8 @@ class RelAddr {
if ((target == nullptr) || (target == pc)) {
return 0; // Yet unknown branch destination.
} else {
guarantee(is_in_range_of_RelAddr(target, pc, shortForm), "target not within reach");
guarantee(is_in_range_of_RelAddr(target, pc, shortForm),
"target not within reach at " INTPTR_FORMAT ", distance = " INTX_FORMAT, p2i(pc), (target - pc) );
return (int)((target - pc)>>1);
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ void C1_MacroAssembler::lock_object(Register Rmark, Register Roop, Register Rbox
if (DiagnoseSyncOnValueBasedClasses != 0) {
load_klass(tmp, Roop);
testbit(Address(tmp, Klass::access_flags_offset()), exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS));
z_btrue(slow_case);
branch_optimized(Assembler::bcondAllOne, slow_case);
}

assert(LockingMode != LM_MONITOR, "LM_MONITOR is already handled, by emit_lock()");
Expand Down Expand Up @@ -170,7 +170,7 @@ void C1_MacroAssembler::unlock_object(Register Rmark, Register Roop, Register Rb
z_lg(Rmark, Address(Roop, hdr_offset));
z_lgr(tmp, Rmark);
z_nill(tmp, markWord::monitor_value);
z_brnz(slow_case);
branch_optimized(Assembler::bcondNotZero, slow_case);
lightweight_unlock(Roop, Rmark, tmp, slow_case);
} else if (LockingMode == LM_LEGACY) {
// Test if object header is pointing to the displaced header, and if so, restore
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4173,7 +4173,7 @@ void StubGenerator::generate_compiler_stubs() {
}

// Load x86_64_sort library on supported hardware to enable avx512 sort and partition intrinsics
if (UseAVX > 2 && VM_Version::supports_avx512dq()) {
if (VM_Version::is_intel() && VM_Version::supports_avx512dq()) {
void *libsimdsort = nullptr;
char ebuf_[1024];
char dll_name_simd_sort[JVM_MAXPATHLEN];
Expand Down
9 changes: 6 additions & 3 deletions src/hotspot/share/gc/g1/g1Allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,11 +191,14 @@ size_t G1Allocator::unsafe_max_tlab_alloc() {
uint node_index = current_node_index();
HeapRegion* hr = mutator_alloc_region(node_index)->get();
size_t max_tlab = _g1h->max_tlab_size() * wordSize;
if (hr == nullptr) {

if (hr == nullptr || hr->free() < MinTLABSize) {
// The next TLAB allocation will most probably happen in a new region,
// therefore we can attempt to allocate the maximum allowed TLAB size.
return max_tlab;
} else {
return clamp(hr->free(), MinTLABSize, max_tlab);
}

return MIN2(hr->free(), max_tlab);
}

size_t G1Allocator::used_in_alloc_regions() {
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/gc/serial/cardTableRS.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
#include "precompiled.hpp"
#include "classfile/classLoaderDataGraph.hpp"
#include "gc/serial/cardTableRS.hpp"
#include "gc/serial/generation.hpp"
#include "gc/serial/serialHeap.hpp"
#include "gc/shared/generation.hpp"
#include "gc/shared/space.inline.hpp"
#include "memory/allocation.inline.hpp"
#include "memory/iterator.inline.hpp"
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/gc/serial/defNewGeneration.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@
#define SHARE_GC_SERIAL_DEFNEWGENERATION_HPP

#include "gc/serial/cSpaceCounters.hpp"
#include "gc/serial/generation.hpp"
#include "gc/shared/ageTable.hpp"
#include "gc/shared/copyFailedInfo.hpp"
#include "gc/shared/gc_globals.hpp"
#include "gc/shared/generation.hpp"
#include "gc/shared/generationCounters.hpp"
#include "gc/shared/preservedMarks.hpp"
#include "gc/shared/stringdedup/stringDedup.hpp"
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/gc/serial/genMarkSweep.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "compiler/oopMap.hpp"
#include "gc/serial/cardTableRS.hpp"
#include "gc/serial/defNewGeneration.hpp"
#include "gc/serial/generation.hpp"
#include "gc/serial/genMarkSweep.hpp"
#include "gc/serial/serialGcRefProcProxyTask.hpp"
#include "gc/serial/serialHeap.hpp"
Expand All @@ -42,7 +43,6 @@
#include "gc/shared/gcTimer.hpp"
#include "gc/shared/gcTrace.hpp"
#include "gc/shared/gcTraceTime.inline.hpp"
#include "gc/shared/generation.hpp"
#include "gc/shared/modRefBarrierSet.hpp"
#include "gc/shared/preservedMarks.inline.hpp"
#include "gc/shared/referencePolicy.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@

#include "precompiled.hpp"
#include "gc/serial/cardTableRS.hpp"
#include "gc/serial/generation.hpp"
#include "gc/shared/collectedHeap.inline.hpp"
#include "gc/shared/continuationGCSupport.inline.hpp"
#include "gc/shared/gcLocker.hpp"
#include "gc/shared/gcTimer.hpp"
#include "gc/shared/gcTrace.hpp"
#include "gc/shared/genCollectedHeap.hpp"
#include "gc/shared/generation.hpp"
#include "gc/shared/generationSpec.hpp"
#include "gc/shared/space.inline.hpp"
#include "gc/shared/spaceDecorator.inline.hpp"
Expand Down Expand Up @@ -94,18 +94,6 @@ void Generation::print_summary_info_on(outputStream* st) {

// Utility iterator classes

class GenerationIsInReservedClosure : public SpaceClosure {
public:
const void* _p;
Space* sp;
virtual void do_space(Space* s) {
if (sp == nullptr) {
if (s->is_in_reserved(_p)) sp = s;
}
}
GenerationIsInReservedClosure(const void* p) : _p(p), sp(nullptr) {}
};

class GenerationIsInClosure : public SpaceClosure {
public:
const void* _p;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -377,4 +377,4 @@ class Generation: public CHeapObj<mtGC> {

};

#endif // SHARE_GC_SHARED_GENERATION_HPP
#endif // SHARE_GC_SERIAL_GENERATION_HPP
2 changes: 1 addition & 1 deletion src/hotspot/share/gc/serial/tenuredGeneration.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#define SHARE_GC_SERIAL_TENUREDGENERATION_HPP

#include "gc/serial/cSpaceCounters.hpp"
#include "gc/shared/generation.hpp"
#include "gc/serial/generation.hpp"
#include "gc/shared/gcStats.hpp"
#include "gc/shared/generationCounters.hpp"
#include "utilities/macros.hpp"
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/gc/shared/genArguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
*/

#include "precompiled.hpp"
#include "gc/serial/generation.hpp"
#include "gc/shared/cardTable.hpp"
#include "gc/shared/genArguments.hpp"
#include "gc/shared/generation.hpp"
#include "logging/log.hpp"
#include "runtime/globals_extension.hpp"
#include "runtime/java.hpp"
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/gc/shared/genCollectedHeap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
#ifndef SHARE_GC_SHARED_GENCOLLECTEDHEAP_HPP
#define SHARE_GC_SHARED_GENCOLLECTEDHEAP_HPP

#include "gc/serial/generation.hpp"
#include "gc/shared/collectedHeap.hpp"
#include "gc/shared/generation.hpp"
#include "gc/shared/oopStorageParState.hpp"
#include "gc/shared/preGCValues.hpp"
#include "gc/shared/softRefPolicy.hpp"
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/gc/shared/genMemoryPools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
*/

#include "precompiled.hpp"
#include "gc/shared/generation.hpp"
#include "gc/serial/generation.hpp"
#include "gc/shared/genMemoryPools.hpp"
#include "gc/shared/space.hpp"
#if INCLUDE_SERIALGC
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/gc/shared/generationSpec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#ifndef SHARE_GC_SHARED_GENERATIONSPEC_HPP
#define SHARE_GC_SHARED_GENERATIONSPEC_HPP

#include "gc/shared/generation.hpp"
#include "gc/serial/generation.hpp"
#include "utilities/align.hpp"

// The specification of a generation. This class also encapsulates
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/gc/shared/space.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@

#include "gc/shared/space.hpp"

#include "gc/serial/generation.hpp"
#include "gc/shared/collectedHeap.hpp"
#include "gc/shared/generation.hpp"
#include "gc/shared/spaceDecorator.hpp"
#include "oops/oop.inline.hpp"
#include "oops/oopsHierarchy.hpp"
Expand Down
Loading

0 comments on commit 19b8f24

Please sign in to comment.