/* * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. *
*/
// Declaration and definition of StubGenerator (no .hpp file). // For a more detailed description of the stub routine structure // see the comment in stubRoutines.hpp
// Call stubs are used to call Java from C // // Arguments: // c_rarg0: call wrapper address address // c_rarg1: result address // c_rarg2: result type BasicType // c_rarg3: method Method* // c_rarg4: (interpreter) entry point address // c_rarg5: parameters intptr_t* // c_rarg6: parameter size (in words) int // c_rarg7: thread Thread* // // There is no return from the stub itself as any Java result // is written to result // // we save x1 (ra) as the return PC at the base of the frame and // link x8 (fp) below it as the frame pointer installing sp (x2) // into fp. // // we save x10-x17, which accounts for all the c arguments. // // TODO: strictly do we need to save them all? they are treated as // volatile by C so could we omit saving the ones we are going to // place in global registers (thread? method?) or those we only use // during setup of the Java call? // // we don't need to save x5 which C uses as an indirect result location // return register. // // we don't need to save x6-x7 and x28-x31 which both C and Java treat as // volatile // // we save x9, x18-x27, f8-f9, and f18-f27 which Java uses as temporary // registers and C expects to be callee-save // // so the stub frame looks like this when we enter Java code // // [ return_from_Java ] <--- sp // [ argument word n ] // ... // -34 [ argument word 1 ] // -33 [ saved f27 ] <--- sp_after_call // -32 [ saved f26 ] // -31 [ saved f25 ] // -30 [ saved f24 ] // -29 [ saved f23 ] // -28 [ saved f22 ] // -27 [ saved f21 ] // -26 [ saved f20 ] // -25 [ saved f19 ] // -24 [ saved f18 ] // -23 [ saved f9 ] // -22 [ saved f8 ] // -21 [ saved x27 ] // -20 [ saved x26 ] // -19 [ saved x25 ] // -18 [ saved x24 ] // -17 [ saved x23 ] // -16 [ saved x22 ] // -15 [ saved x21 ] // -14 [ saved x20 ] // -13 [ saved x19 ] // -12 [ saved x18 ] // -11 [ saved x9 ] // -10 [ call wrapper (x10) ] // -9 [ result (x11) ] // -8 [ result type (x12) ] // -7 [ method (x13) ] // -6 [ entry point (x14) ] // -5 [ parameters (x15) ] // -4 [ parameter size (x16) ] // -3 [ thread (x17) ] // -2 [ saved fp (x8) ] // -1 [ saved ra (x1) ] // 0 [ ] <--- fp == saved sp (x2)
// Call stub stack layout word offsets from fp enum call_stub_layout {
sp_after_call_off = -33,
// set up frame and move sp to end of save area
__ enter();
__ addi(sp, fp, sp_after_call_off * wordSize);
// save register parameters and Java temporary/global registers // n.b. we save thread even though it gets installed in // xthread because we want to sanity check tp later
__ sd(c_rarg7, thread);
__ sw(c_rarg6, parameter_size);
__ sd(c_rarg5, parameters);
__ sd(c_rarg4, entry_point);
__ sd(c_rarg3, method);
__ sd(c_rarg2, result_type);
__ sd(c_rarg1, result);
__ sd(c_rarg0, call_wrapper);
// install Java thread in global register now we have saved // whatever value it held
__ mv(xthread, c_rarg7);
// And method
__ mv(xmethod, c_rarg3);
// set up the heapbase register
__ reinit_heapbase();
#ifdef ASSERT // make sure we have no pending exceptions
{
Label L;
__ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
__ beqz(t0, L);
__ stop("StubRoutines::call_stub: entered with pending exception");
__ BIND(L);
} #endif // pass parameters if any
__ mv(esp, sp);
__ slli(t0, c_rarg6, LogBytesPerWord);
__ sub(t0, sp, t0); // Move SP out of the way
__ andi(sp, t0, -2 * wordSize);
BLOCK_COMMENT("pass parameters if any");
Label parameters_done; // parameter count is still in c_rarg6 // and parameter pointer identifying param 1 is in c_rarg5
__ beqz(c_rarg6, parameters_done);
// save current address for use by exception handling code
return_address = __ pc();
// store result depending on type (everything that is not // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) // n.b. this assumes Java returns an integral result in x10 // and a floating result in j_farg0
__ ld(j_rarg2, result);
Label is_long, is_float, is_double, exit;
__ ld(j_rarg1, result_type);
__ mv(t0, (u1)T_OBJECT);
__ beq(j_rarg1, t0, is_long);
__ mv(t0, (u1)T_LONG);
__ beq(j_rarg1, t0, is_long);
__ mv(t0, (u1)T_FLOAT);
__ beq(j_rarg1, t0, is_float);
__ mv(t0, (u1)T_DOUBLE);
__ beq(j_rarg1, t0, is_double);
// handle T_INT case
__ sw(x10, Address(j_rarg2));
__ BIND(exit);
// pop parameters
__ addi(esp, fp, sp_after_call_off * wordSize);
// Return point for a Java call if there's an exception thrown in // Java code. The exception is caught and transformed into a // pending exception stored in JavaThread that can be tested from // within the VM. // // Note: Usually the parameters are removed by the callee. In case // of an exception crossing an activation frame boundary, that is // not the case if the callee is compiled code => need to setup the // sp. // // x10: exception oop
// complete return to VM
assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
__ j(StubRoutines::_call_stub_return_address);
return start;
}
// Continuation point for runtime calls returning with a pending // exception. The pending exception check happened in the runtime // or native call stub. The pending exception in Thread is // converted into a Java-level exception. // // Contract with Java-level exception handlers: // x10: exception // x13: throwing pc // // NOTE: At entry of this stub, exception-pc must be in RA !!
// NOTE: this is always used as a jump target within generated code // so it just needs to be generated code with no x86 prolog
// Upon entry, RA points to the return address returning into // Java (interpreted or compiled) code; i.e., the return address // becomes the throwing pc. // // Arguments pushed before the runtime call are still on the stack // but the exception handler will reset the stack pointer -> // ignore them. A potential result in registers can be ignored as // well.
#ifdef ASSERT // make sure this code is only executed if there is a pending exception
{
Label L;
__ ld(t0, Address(xthread, Thread::pending_exception_offset()));
__ bnez(t0, L);
__ stop("StubRoutines::forward exception: no pending exception (1)");
__ bind(L);
} #endif
// compute exception handler into x9
// call the VM to find the handler address associated with the // caller address. pass thread in x10 and caller pc (ret address) // in x11. n.b. the caller pc is in ra, unlike x86 where it is on // the stack.
__ mv(c_rarg1, ra); // ra will be trashed by the VM call so we move it to x9 // (callee-saved) because we also need to pass it to the handler // returned by this call.
__ mv(x9, ra);
BLOCK_COMMENT("call exception_handler_for_return_address");
__ call_VM_leaf(CAST_FROM_FN_PTR(address,
SharedRuntime::exception_handler_for_return_address),
xthread, c_rarg1); // we should not really care that ra is no longer the callee // address. we saved the value the handler needs in x9 so we can // just copy it to x13. however, the C2 handler will push its own // frame and then calls into the VM and the VM code asserts that // the PC for the frame above the handler belongs to a compiled // Java method. So, we restore ra here to satisfy that assert.
__ mv(ra, x9); // setup x10 & x13 & clear pending exception
__ mv(x13, x9);
__ mv(x9, x10);
__ ld(x10, Address(xthread, Thread::pending_exception_offset()));
__ sd(zr, Address(xthread, Thread::pending_exception_offset()));
#ifdef ASSERT // make sure exception is set
{
Label L;
__ bnez(x10, L);
__ stop("StubRoutines::forward exception: no pending exception (2)");
__ bind(L);
} #endif
// continue at exception handler // x10: exception // x13: throwing pc // x9: exception handler
__ verify_oop(x10);
__ jr(x9);
// object is in x10 // make sure object is 'reasonable'
__ beqz(x10, exit); // if obj is NULL it is OK
#if INCLUDE_ZGC if (UseZGC) { // Check if mask is good. // verifies that ZAddressBadMask & x10 == 0
__ ld(c_rarg3, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
__ andr(c_rarg2, x10, c_rarg3);
__ bnez(c_rarg2, error);
} #endif
// Check if the oop is in the right area of memory
__ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask());
__ andr(c_rarg2, x10, c_rarg3);
__ mv(c_rarg3, (intptr_t) Universe::verify_oop_bits());
// Compare c_rarg2 and c_rarg3.
__ bne(c_rarg2, c_rarg3, error);
// make sure klass is 'reasonable', which is not zero.
__ load_klass(x10, x10); // get klass
__ beqz(x10, error); // if klass is NULL it is broken
// return if everything seems ok
__ bind(exit);
__ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp); // pop c_rarg2 and c_rarg3
__ ret();
// handle errors
__ bind(error);
__ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp); // pop c_rarg2 and c_rarg3
// The inner part of zero_words(). // // Inputs: // x28: the HeapWord-aligned base address of an array to zero. // x29: the count in HeapWords, x29 > 0. // // Returns x28 and x29, adjusted for the caller to clear. // x28: the base address of the tail of words left to clear. // x29: the number of words in the tail. // x29 < MacroAssembler::zero_words_block_size.
// Bulk copy of blocks of 8 words. // // count is a count of words. // // Precondition: count >= 8 // // Postconditions: // // The least significant bit of count contains the remaining count // of words to copy. The rest of count is trash. // // s and d are adjusted to point to the remaining words to copy // void generate_copy_longs(Label &start, Register s, Register d, Register count,
copy_direction direction) { int unit = wordSize * direction; int bias = wordSize;
// All-singing all-dancing memory copy. // // Copy count units of memory from s to d. The size of a unit is // step, which can be positive or negative depending on the direction // of copy. If is_aligned is false, we align the source address. // /* * if (is_aligned) { * if (count >= 32) * goto copy32_loop; * if (count >= 8) * goto copy8_loop; * goto copy_small; * } * bool is_backwards = step < 0; * int granularity = uabs(step); * count = count * granularity; * count bytes * * if (is_backwards) { * s += count; * d += count; * } * * count limit maybe greater than 16, for better performance * if (count < 16) { * goto copy_small; * } * * if ((dst % 8) == (src % 8)) { * aligned; * goto copy_big; * } * * copy_big: * if the amount to copy is more than (or equal to) 32 bytes goto copy32_loop * else goto copy8_loop * copy_small: * load element one by one; * done;
*/
// Arguments: // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary // ignored // is_oop - true => oop array, so generate store check code // name - stub name string // // Inputs: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as ssize_t, can be zero // // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let // the hardware handle it. The two dwords within qwords that span // cache line boundaries will still be loaded and stored atomically. // // Side Effects: // disjoint_int_copy_entry is set to the no-overlap entry point // used by generate_conjoint_int_oop_copy(). //
address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address* entry, constchar* name, bool dest_uninitialized = false) { constRegister s = c_rarg0, d = c_rarg1, count = c_rarg2;
RegSet saved_reg = RegSet::of(s, d, count);
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
__ enter();
if (entry != NULL) {
*entry = __ pc(); // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
// Arguments: // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary // ignored // is_oop - true => oop array, so generate store check code // name - stub name string // // Inputs: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as ssize_t, can be zero // // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let // the hardware handle it. The two dwords within qwords that span // cache line boundaries will still be loaded and stored atomically. //
address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target,
address* entry, constchar* name, bool dest_uninitialized = false) { constRegister s = c_rarg0, d = c_rarg1, count = c_rarg2;
RegSet saved_regs = RegSet::of(s, d, count);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
__ enter();
if (entry != NULL) {
*entry = __ pc(); // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
// use fwd copy when (d-s) above_equal (count*size)
__ sub(t0, d, s);
__ slli(t1, count, exact_log2(size));
__ bgeu(t0, t1, nooverlap_target);
DecoratorSet decorators = IN_HEAP | IS_ARRAY; if (dest_uninitialized) {
decorators |= IS_DEST_UNINITIALIZED;
} if (aligned) {
decorators |= ARRAYCOPY_ALIGNED;
}
// Arguments: // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary // ignored // name - stub name string // // Inputs: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as ssize_t, can be zero // // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, // we let the hardware handle it. The one to eight bytes within words, // dwords or qwords that span cache line boundaries will still be loaded // and stored atomically. // // Side Effects: // disjoint_byte_copy_entry is set to the no-overlap entry point // // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, // we let the hardware handle it. The one to eight bytes within words, // dwords or qwords that span cache line boundaries will still be loaded // and stored atomically. // // Side Effects: // disjoint_byte_copy_entry is set to the no-overlap entry point // used by generate_conjoint_byte_copy(). //
address generate_disjoint_byte_copy(bool aligned, address* entry, constchar* name) { constbool not_oop = false; return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name);
}
// Arguments: // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary // ignored // name - stub name string // // Inputs: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as ssize_t, can be zero // // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, // we let the hardware handle it. The one to eight bytes within words, // dwords or qwords that span cache line boundaries will still be loaded // and stored atomically. //
address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
address* entry, constchar* name) { constbool not_oop = false; return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name);
}
// Arguments: // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary // ignored // name - stub name string // // Inputs: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as ssize_t, can be zero // // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we // let the hardware handle it. The two or four words within dwords // or qwords that span cache line boundaries will still be loaded // and stored atomically. // // Side Effects: // disjoint_short_copy_entry is set to the no-overlap entry point // used by generate_conjoint_short_copy(). //
address generate_disjoint_short_copy(bool aligned,
address* entry, constchar* name) { constbool not_oop = false; return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name);
}
// Arguments: // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary // ignored // name - stub name string // // Inputs: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as ssize_t, can be zero // // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we // let the hardware handle it. The two or four words within dwords // or qwords that span cache line boundaries will still be loaded // and stored atomically. //
address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
address* entry, constchar* name) { constbool not_oop = false; return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name);
}
// Arguments: // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary // ignored // name - stub name string // // Inputs: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as ssize_t, can be zero // // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let // the hardware handle it. The two dwords within qwords that span // cache line boundaries will still be loaded and stored atomically. // // Side Effects: // disjoint_int_copy_entry is set to the no-overlap entry point // used by generate_conjoint_int_oop_copy(). //
address generate_disjoint_int_copy(bool aligned, address* entry, constchar* name, bool dest_uninitialized = false) { constbool not_oop = false; return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name);
}
// Arguments: // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary // ignored // name - stub name string // // Inputs: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as ssize_t, can be zero // // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let // the hardware handle it. The two dwords within qwords that span // cache line boundaries will still be loaded and stored atomically. //
address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
address* entry, constchar* name, bool dest_uninitialized = false) { constbool not_oop = false; return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name);
}
// Arguments: // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes // ignored // name - stub name string // // Inputs: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as size_t, can be zero // // Side Effects: // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the // no-overlap entry point used by generate_conjoint_long_oop_copy(). //
address generate_disjoint_long_copy(bool aligned, address* entry, constchar* name, bool dest_uninitialized = false) { constbool not_oop = false; return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name);
}
// Arguments: // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes // ignored // name - stub name string // // Inputs: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as size_t, can be zero //
address generate_conjoint_long_copy(bool aligned,
address nooverlap_target, address* entry, constchar* name, bool dest_uninitialized = false) { constbool not_oop = false; return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name);
}
// Arguments: // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes // ignored // name - stub name string // // Inputs: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as size_t, can be zero // // Side Effects: // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the // no-overlap entry point used by generate_conjoint_long_oop_copy(). //
address generate_disjoint_oop_copy(bool aligned, address* entry, constchar* name, bool dest_uninitialized) { constbool is_oop = true; const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized);
}
// Arguments: // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes // ignored // name - stub name string // // Inputs: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - element count, treated as size_t, can be zero //
address generate_conjoint_oop_copy(bool aligned,
address nooverlap_target, address* entry, constchar* name, bool dest_uninitialized) { constbool is_oop = true; const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry,
name, dest_uninitialized);
}
// Helper for generating a dynamic type check. // Smashes t0, t1. void generate_type_check(Register sub_klass, Register super_check_offset, Register super_klass,
Label& L_success) {
assert_different_registers(sub_klass, super_check_offset, super_klass);
// Registers used as temps (x7, x9, x18 are save-on-entry) constRegister count_save = x19; // orig elementscount constRegister start_to = x18; // destination array start address constRegister copied_oop = x7; // actual oop copied constRegister r9_klass = x9; // oop._klass
//--------------------------------------------------------------- // Assembler stub will be used for this call to arraycopy // if the two arrays are subtypes of Object[] but the // destination array type is not equal to or a supertype // of the source type. Each element must be separately // checked.
assert_different_registers(from, to, count, ckoff, ckval, start_to,
copied_oop, r9_klass, count_save);
// save the original count
__ mv(count_save, count);
// Copy from low to high addresses
__ mv(start_to, to); // Save destination array start address
__ j(L_load_element);
// ======== begin loop ======== // (Loop is rotated; its entry is L_load_element.) // Loop control: // for count to 0 do // copied_oop = load_heap_oop(from++) // ... generate_type_check ... // store_heap_oop(to++, copied_oop) // end
__ align(OptoLoopAlignment);
__ BIND(L_store_element);
__ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, noreg, AS_RAW); // store the oop
__ add(to, to, UseCompressedOops ? 4 : 8);
__ sub(count, count, 1);
__ beqz(count, L_do_card_marks);
__ load_klass(r9_klass, copied_oop);// query the object klass
generate_type_check(r9_klass, ckoff, ckval, L_store_element); // ======== end loop ========
// It was a real error; we must depend on the caller to finish the job. // Register count = remaining oops, count_orig = total oops. // Emit GC store barriers for the oops we have copied and report // their number to the caller.
// Have to clean up high 32 bits of 'src_pos' and 'dst_pos'.
__ zero_extend(src_pos, src_pos, 32);
__ zero_extend(dst_pos, dst_pos, 32);
BLOCK_COMMENT("arraycopy_range_checks done");
}
// // Generate 'unsafe' array copy stub // Though just as safe as the other stubs, it takes an unscaled // size_t argument instead of an element count. // // Input: // c_rarg0 - source array address // c_rarg1 - destination array address // c_rarg2 - byte count, treated as ssize_t, can be zero // // Examines the alignment of the operands and dispatches // to a long, int, short, or byte copy loop. //
address generate_unsafe_copy(constchar* name,
address byte_copy_entry,
address short_copy_entry,
address int_copy_entry,
address long_copy_entry) {
assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
int_copy_entry != NULL && long_copy_entry != NULL);
Label L_long_aligned, L_int_aligned, L_short_aligned; constRegister s = c_rarg0, d = c_rarg1, count = c_rarg2;
// Registers used as temps constRegister dst_klass = c_rarg5;
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
__ enter(); // required for proper stackwalking of RuntimeStub frame
// bump this on entry, not on exit:
inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
//----------------------------------------------------------------------- // Assembler stub will be used for this call to arraycopy // if the following conditions are met: // // (1) src and dst must not be null. // (2) src_pos must not be negative. // (3) dst_pos must not be negative. // (4) length must not be negative. // (5) src klass and dst klass should be the same and not NULL. // (6) src and dst should be arrays. // (7) src_pos + length must not exceed length of src. // (8) dst_pos + length must not exceed length of dst. //
// if [src == NULL] then return -1
__ beqz(src, L_failed);
// if [src_pos < 0] then return -1 // i.e. sign bit set
__ andi(t0, src_pos, 1UL << 31);
__ bnez(t0, L_failed);
// if [dst == NULL] then return -1
__ beqz(dst, L_failed);
// if [dst_pos < 0] then return -1 // i.e. sign bit set
__ andi(t0, dst_pos, 1UL << 31);
__ bnez(t0, L_failed);
// registers used as temp constRegister scratch_length = x28; // elements count to copy constRegister scratch_src_klass = x29; // array klass constRegister lh = x30; // layout helper
// if [length < 0] then return -1
__ addw(scratch_length, length, zr); // length (elements count, 32-bits value) // i.e. sign bit set
__ andi(t0, scratch_length, 1UL << 31);
__ bnez(t0, L_failed);
__ load_klass(scratch_src_klass, src); #ifdef ASSERT
{
BLOCK_COMMENT("assert klasses not null {");
Label L1, L2;
__ bnez(scratch_src_klass, L2); // it is broken if klass is NULL
__ bind(L1);
__ stop("broken null klass");
__ bind(L2);
__ load_klass(t0, dst, t1);
__ beqz(t0, L1); // this would be broken also
BLOCK_COMMENT("} assert klasses not null done");
} #endif
// if [src->klass() != dst->klass()] then return -1
__ load_klass(t1, dst);
__ bne(t1, scratch_src_klass, L_failed);
// if [src->is_Array() != NULL] then return -1 // i.e. (lh >= 0)
__ andi(t0, lh, 1UL << 31);
__ beqz(t0, L_failed);
// At this point, it is known to be a typeArray (array_tag 0x3). #ifdef ASSERT
{
BLOCK_COMMENT("assert primitive array {");
Label L;
__ mvw(t1, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
__ bge(lh, t1, L);
__ stop("must be a primitive array");
__ bind(L);
BLOCK_COMMENT("} assert primitive array done");
} #endif
// Get array_header_in_bytes() int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1); int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width;
__ slli(t0_offset, lh, XLEN - lh_header_size_msb); // left shift to remove 24 ~ 32;
__ srli(t0_offset, t0_offset, XLEN - lh_header_size_width); // array_offset
__ add(src, src, t0_offset); // src array offset
__ add(dst, dst, t0_offset); // dst array offset
BLOCK_COMMENT("choose copy loop based on element size");
// next registers should be set before the jump to corresponding stub constRegister from = c_rarg0; // source array address constRegister to = c_rarg1; // destination array address constRegister count = c_rarg2; // elements count
// 'from', 'to', 'count' registers should be set in such order // since they are the same as 'src', 'src_pos', 'dst'.
assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
// The possible values of elsize are 0-3, i.e. exact_log2(element // size in bytes). We do a simple bitwise binary search.
__ BIND(L_copy_bytes);
__ andi(t0, x22_elsize, 2);
__ bnez(t0, L_copy_ints);
__ andi(t0, x22_elsize, 1);
__ bnez(t0, L_copy_shorts);
__ add(from, src, src_pos); // src_addr
__ add(to, dst, dst_pos); // dst_addr
__ addw(count, scratch_length, zr); // length
__ j(RuntimeAddress(byte_copy_entry));
__ BIND(L_checkcast_copy); // live at this point: scratch_src_klass, scratch_length, t2 (dst_klass)
{ // Before looking at dst.length, make sure dst is also an objArray.
__ lwu(t0, Address(t2, lh_offset));
__ mvw(t1, objArray_lh);
__ bne(t0, t1, L_failed);
// It is safe to examine both src.length and dst.length.
arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
t2, L_failed);
__ load_klass(dst_klass, dst); // reload
// Marshal the base address arguments now, freeing registers.
__ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
__ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
__ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
__ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
__ addw(count, length, zr); // length (reloaded) constRegister sco_temp = c_rarg3; // this register is free now
assert_different_registers(from, to, count, sco_temp,
dst_klass, scratch_src_klass);
// Generate the type check. constint sco_offset = in_bytes(Klass::super_check_offset_offset());
__ lwu(sco_temp, Address(dst_klass, sco_offset));
// Fetch destination element klass from the ObjArrayKlass header. int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
__ ld(dst_klass, Address(dst_klass, ek_offset));
__ lwu(sco_temp, Address(dst_klass, sco_offset));
// the checkcast_copy loop needs two extra arguments:
assert(c_rarg3 == sco_temp, "#3 already in place"); // Set up arguments for checkcast_copy_entry.
__ mv(c_rarg4, dst_klass); // dst.klass.element_klass
__ j(RuntimeAddress(checkcast_copy_entry));
}
__ BIND(L_failed);
__ mv(x10, -1);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret();
return start;
}
// // Generate stub for array fill. If "aligned" is true, the // "to" address is assumed to be heapword aligned. // // Arguments for generated stub: // to: c_rarg0 // value: c_rarg1 // count: c_rarg2 treated as signed //
address generate_fill(BasicType t, bool aligned, constchar* name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
BLOCK_COMMENT("Entry:");
constRegister to = c_rarg0; // source array address constRegister value = c_rarg1; // value constRegister count = c_rarg2; // elements count
constRegister bz_base = x28; // base for block_zero routine constRegister cnt_words = x29; // temp register constRegister tmp_reg = t1;
__ enter();
Label L_fill_elements, L_exit1;
int shift = -1; switch (t) { case T_BYTE:
shift = 0;
// Zero extend value // 8 bit -> 16 bit
__ andi(value, value, 0xff);
__ mv(tmp_reg, value);
__ slli(tmp_reg, tmp_reg, 8);
__ orr(value, value, tmp_reg);
// 16 bit -> 32 bit
__ mv(tmp_reg, value);
__ slli(tmp_reg, tmp_reg, 16);
__ orr(value, value, tmp_reg);
__ mv(tmp_reg, 8 >> shift); // Short arrays (< 8 bytes) fill by element
__ bltu(count, tmp_reg, L_fill_elements); break; case T_SHORT:
shift = 1; // Zero extend value // 16 bit -> 32 bit
__ andi(value, value, 0xffff);
__ mv(tmp_reg, value);
__ slli(tmp_reg, tmp_reg, 16);
__ orr(value, value, tmp_reg);
// Short arrays (< 8 bytes) fill by element
__ mv(tmp_reg, 8 >> shift);
__ bltu(count, tmp_reg, L_fill_elements); break; case T_INT:
shift = 2;
// Short arrays (< 8 bytes) fill by element
__ mv(tmp_reg, 8 >> shift);
__ bltu(count, tmp_reg, L_fill_elements); break; default: ShouldNotReachHere();
}
// Align source address at 8 bytes address boundary.
Label L_skip_align1, L_skip_align2, L_skip_align4; if (!aligned) { switch (t) { case T_BYTE: // One byte misalignment happens only for byte arrays.
__ andi(t0, to, 1);
__ beqz(t0, L_skip_align1);
__ sb(value, Address(to, 0));
__ addi(to, to, 1);
__ addiw(count, count, -1);
__ bind(L_skip_align1); // Fallthrough case T_SHORT: // Two bytes misalignment happens only for byte and short (char) arrays.
__ andi(t0, to, 2);
__ beqz(t0, L_skip_align2);
__ sh(value, Address(to, 0));
__ addi(to, to, 2);
__ addiw(count, count, -(2 >> shift));
__ bind(L_skip_align2); // Fallthrough case T_INT: // Align to 8 bytes, we know we are 4 byte aligned to start.
__ andi(t0, to, 4);
__ beqz(t0, L_skip_align4);
__ sw(value, Address(to, 0));
__ addi(to, to, 4);
__ addiw(count, count, -(4 >> shift));
__ bind(L_skip_align4); break; default: ShouldNotReachHere();
}
}
// // Fill large chunks //
__ srliw(cnt_words, count, 3 - shift); // number of words
// Remaining count is less than 8 bytes. Fill it by a single store. // Note that the total length is no less than 8 bytes. if (t == T_BYTE || t == T_SHORT) {
__ beqz(count, L_exit1);
__ shadd(to, count, to, tmp_reg, shift); // points to the end
__ sd(value, Address(to, -8)); // overwrite some elements
__ bind(L_exit1);
__ leave();
__ ret();
}
//*** jint // Aligned versions
StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry, "arrayof_jint_disjoint_arraycopy");
StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy, "arrayof_jint_arraycopy"); // In 64 bit we need both aligned and unaligned versions of jint arraycopy. // entry_jint_arraycopy always points to the unaligned version
StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry, "jint_disjoint_arraycopy");
StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry,
&entry_jint_arraycopy, "jint_arraycopy");
if (needle_isL != haystack_isL) {
__ inflate_lo32(ch1, tmp, match_mask, trailing_zeros);
} // xorr, sub, orr, notr, andr // compare and set match_mask[i] with 0x80/0x8000 (Latin1/UTF16) if ch2[i] == first[i] // eg: // first: aa aa aa aa aa aa aa aa // ch2: aa aa li nx jd ka aa aa // match_mask: 80 80 00 00 00 00 80 80
__ compute_match_mask(ch2, first, match_mask, mask1, mask2);
__ align(OptoLoopAlignment);
__ bind(L_HAS_ZERO_LOOP_NOMATCH); // 1) Restore "result" index. Index was wordSize/str2_chr_size * N until // L_HAS_ZERO block. Byte octet was analyzed in L_HAS_ZERO_LOOP, // so, result was increased at max by wordSize/str2_chr_size - 1, so, // respective high bit wasn't changed. L_LOOP_PROCEED will increase // result by analyzed characters value, so, we can just reset lower bits // in result here. Clear 2 lower bits for UU/UL and 3 bits for LL // 2) restore needle_len and haystack_len values from "compressed" haystack_len // 3) advance haystack value to represent next haystack octet. result & 7/3 is // index of last analyzed substring inside current octet. So, haystack in at // respective start address. We need to advance it to next octet
__ andi(match_mask, result, wordSize / haystack_chr_size - 1);
__ srli(needle_len, haystack_len, BitsPerByte * wordSize / 2);
__ andi(result, result, haystack_isL ? -8 : -4);
__ slli(tmp, match_mask, haystack_chr_shift);
__ sub(haystack, haystack, tmp);
__ addw(haystack_len, haystack_len, zr);
__ j(L_LOOP_PROCEED);
// Zero the m*n result.
mv(Rhi_mn, zr);
mv(Rlo_mn, zr);
}
// The core multiply-accumulate step of a Montgomery // multiplication. The idea is to schedule operations as a // pipeline so that instructions with long latencies (loads and // multiplies) have time to complete before their results are // used. This most benefits in-order implementations of the // architecture but out-of-order ones also benefit. void step() {
block_comment("step"); // MACC(Ra, Rb, tmp0, tmp1, tmp2); // Ra = *++Pa; // Rb = *--Pb;
mulhu(Rhi_ab, Ra, Rb);
mul(Rlo_ab, Ra, Rb);
addi(Pa, Pa, wordSize);
ld(Ra, Address(Pa));
addi(Pb, Pb, -wordSize);
ld(Rb, Address(Pb));
acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n from the // previous iteration. // MACC(Rm, Rn, tmp0, tmp1, tmp2); // Rm = *++Pm; // Rn = *--Pn;
mulhu(Rhi_mn, Rm, Rn);
mul(Rlo_mn, Rm, Rn);
addi(Pm, Pm, wordSize);
ld(Rm, Address(Pm));
addi(Pn, Pn, -wordSize);
ld(Rn, Address(Pn));
acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
}
#ifndef PRODUCT // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply");
{
mul(Rlo_mn, Rm, Rn);
add(Rlo_mn, tmp0, Rlo_mn);
Label ok;
beqz(Rlo_mn, ok);
stop("broken Montgomery multiply");
bind(ok);
} #endif // We have very carefully set things up so that // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate // the lower half of Rm * Rn because we know the result already: // it must be -tmp0. tmp0 + (-tmp0) must generate a carry iff // tmp0 != 0. So, rather than do a mul and an cad we just set // the carry flag iff tmp0 is nonzero. // // mul(Rlo_mn, Rm, Rn); // cad(zr, tmp0, Rlo_mn);
addi(t0, tmp0, -1);
sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
cadc(tmp0, tmp1, Rhi_mn, t0);
adc(tmp1, tmp2, zr, t0);
mv(tmp2, zr);
}
void post2(Register i, Register len) {
block_comment("post2");
sub(Rj, i, len);
cad(tmp0, tmp0, Rlo_mn, t0); // The pending m*n, low part
// As soon as we know the least significant digit of our result, // store it. // Pm_base[i-len] = tmp0; // Rj as temp register
slli(Rj, Rj, LogBytesPerWord);
add(Rj, Pm_base, Rj);
sd(tmp0, Address(Rj));
// tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
cadc(tmp0, tmp1, Rhi_mn, t0); // The pending m*n, high part
adc(tmp1, tmp2, zr, t0);
mv(tmp2, zr);
}
// A carry in tmp0 after Montgomery multiplication means that we // should subtract multiples of n from our result in m. We'll // keep doing that until there is no carry. void normalize(Register len) {
block_comment("normalize"); // while (tmp0) // tmp0 = sub(Pm_base, Pn_base, tmp0, len);
Label loop, post, again; Register cnt = tmp1, i = tmp2; // Re-use registers; we're done with them now
beqz(tmp0, post); {
bind(again); {
mv(i, zr);
mv(cnt, len);
slli(Rn, i, LogBytesPerWord);
add(Rm, Pm_base, Rn);
ld(Rm, Address(Rm));
add(Rn, Pn_base, Rn);
ld(Rn, Address(Rn));
mv(t0, 1); // set carry flag, i.e. no borrow
align(16);
bind(loop); {
notr(Rn, Rn);
add(Rm, Rm, t0);
add(Rm, Rm, Rn);
sltu(t0, Rm, Rn);
slli(Rn, i, LogBytesPerWord); // Rn as temp register
add(Rn, Pm_base, Rn);
sd(Rm, Address(Rn));
add(i, i, 1);
slli(Rn, i, LogBytesPerWord);
add(Rm, Pm_base, Rn);
ld(Rm, Address(Rm));
add(Rn, Pn_base, Rn);
ld(Rn, Address(Rn));
sub(cnt, cnt, 1);
} bnez(cnt, loop);
addi(tmp0, tmp0, -1);
add(tmp0, tmp0, t0);
} bnez(tmp0, again);
} bind(post);
}
// Move memory at s to d, reversing words. // Increments d to end of copied memory // Destroys tmp1, tmp2 // Preserves len // Leaves s pointing to the address which was in d at start void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) {
assert(tmp1->encoding() < x28->encoding(), "register corruption");
assert(tmp2->encoding() < x28->encoding(), "register corruption");
#ifndef PRODUCT // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply");
{
mul(Rlo_mn, Rm, Rn);
add(Rlo_mn, tmp0, Rlo_mn);
Label ok;
beqz(Rlo_mn, ok); {
stop("broken Montgomery multiply");
} bind(ok);
} #endif // We have very carefully set things up so that // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate // the lower half of Rm * Rn because we know the result already: // it must be -tmp0. tmp0 + (-tmp0) must generate a carry iff // tmp0 != 0. So, rather than do a mul and a cad we just set // the carry flag iff tmp0 is nonzero. // // mul(Rlo_mn, Rm, Rn); // cad(zr, tmp, Rlo_mn);
addi(t0, tmp0, -1);
sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
cadc(tmp0, tmp1, Rhi_mn, t0);
adc(tmp1, tmp2, zr, t0);
mv(tmp2, zr);
}
public: /** * Fast Montgomery multiplication. The derivation of the * algorithm is in A Cryptographic Library for the Motorola * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. * * Arguments: * * Inputs for multiplication: * c_rarg0 - int array elements a * c_rarg1 - int array elements b * c_rarg2 - int array elements n (the modulus) * c_rarg3 - int length * c_rarg4 - int inv * c_rarg5 - int array elements m (the result) * * Inputs for squaring: * c_rarg0 - int array elements a * c_rarg1 - int array elements n (the modulus) * c_rarg2 - int length * c_rarg3 - int inv * c_rarg4 - int array elements m (the result) *
*/
address generate_multiply() {
Label argh, nothing;
bind(argh);
stop("MontgomeryMultiply total_allocation must be <= 8192");
srliw(Rlen, Rlen, 1); // length in longwords = len/2
{ // Copy input args, reversing as we go. We use Ra as a // temporary variable.
reverse(Ra, Pa_base, Rlen, Ri, Rj); if (!_squaring)
reverse(Ra, Pb_base, Rlen, Ri, Rj);
reverse(Ra, Pn_base, Rlen, Ri, Rj);
}
// Push all call-saved registers and also Pm_base which we'll need // at the end.
save_regs();
mv(Ra, Pm_base); // Save Pm_base in Ra
restore_regs(); // Restore caller's Pm_base
// Copy our result into caller's Pm_base
reverse(Pm_base, Ra, Rlen, Ri, Rj);
leave();
bind(nothing);
ret();
return entry;
}
/** * * Arguments: * * Inputs: * c_rarg0 - int array elements a * c_rarg1 - int array elements n (the modulus) * c_rarg2 - int length * c_rarg3 - int inv * c_rarg4 - int array elements m (the result) *
*/
address generate_square() {
Label argh;
bind(argh);
stop("MontgomeryMultiply total_allocation must be <= 8192");
srliw(Rlen, Rlen, 1); // length in longwords = len/2
{ // Copy input args, reversing as we go. We use Ra as a // temporary variable.
reverse(Ra, Pa_base, Rlen, Ri, Rj);
reverse(Ra, Pn_base, Rlen, Ri, Rj);
}
// Push all call-saved registers and also Pm_base which we'll need // at the end.
save_regs();
mv(Pm_base, Ra);
mv(tmp0, zr);
mv(tmp1, zr);
mv(tmp2, zr);
block_comment("for (int i = 0; i < len; i++) {");
mv(Ri, zr); {
Label loop, end;
bind(loop);
bge(Ri, Rlen, end);
mv(Ra, Pm_base); // Save Pm_base in Ra
restore_regs(); // Restore caller's Pm_base
// Copy our result into caller's Pm_base
reverse(Pm_base, Ra, Rlen, Ri, Rj);
leave();
ret();
return entry;
}
}; #endif// COMPILER2
// Continuation point for throwing of implicit exceptions that are // not handled in the current activation. Fabricates an exception // oop and initiates normal exception dispatching in this // frame. Since we need to preserve callee-saved values (currently // only for C2, but done for C1 as well) we need a callee-saved oop // map and therefore have to make these stubs into RuntimeStubs // rather than BufferBlobs. If the compiler needs all registers to // be preserved between the fault point and the exception handler // then it must assume responsibility for that in // AbstractCompiler::continuation_for_implicit_null_exception or // continuation_for_implicit_division_by_zero_exception. All other // implicit exceptions (e.g., NullPointerException or // AbstractMethodError on entry) are either at call sites or // otherwise assume that stack unwinding will be initiated, so // caller saved registers were assumed volatile in the compiler.
#undef __ #define __ masm->
address generate_throw_exception(constchar* name,
address runtime_entry, Register arg1 = noreg, Register arg2 = noreg) { // Information about frame layout at time of blocking runtime call. // Note that we only have to preserve callee-saved registers since // the compilers are responsible for supplying a continuation point // if they expect all registers to be preserved. // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0
assert_cond(runtime_entry != NULL); enum layout {
fp_off = 0,
fp_off2,
return_off,
return_off2,
framesize // inclusive of return address
};
// This is an inlined and slightly modified version of call_VM // which has the ability to fetch the return PC out of // thread-local storage and also sets up last_Java_sp slightly // differently than the real call_VM
__ enter(); // Save FP and RA before call
assert(is_even(framesize / 2), "sp not 16-byte aligned");
// ra and fp are already in place
__ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog
int frame_complete = __ pc() - start;
// Set up last_Java_sp and last_Java_fp
address the_pc = __ pc();
__ set_last_Java_frame(sp, fp, the_pc, t0);
if (return_barrier) { // preserve possible return value from a method returning to the return barrier
__ sub(sp, sp, 2 * wordSize);
__ fsd(f10, Address(sp, 0 * wordSize));
__ sd(x10, Address(sp, 1 * wordSize));
}
__ mvw(c_rarg1, (return_barrier ? 1 : 0));
__ call_VM_leaf(CAST_FROM_FN_PTR(address, Continuation::prepare_thaw), xthread, c_rarg1);
__ mv(t1, x10); // x10 contains the size of the frames to thaw, 0 if overflow or no more frames
if (return_barrier) { // restore return value (no safepoint in the call to thaw, so even an oop return value should be OK)
__ ld(x10, Address(sp, 1 * wordSize));
__ fld(f10, Address(sp, 0 * wordSize));
__ add(sp, sp, 2 * wordSize);
}
Label thaw_success; // t1 contains the size of the frames to thaw, 0 if overflow or no more frames
__ bnez(t1, thaw_success);
__ la(t0, ExternalAddress(StubRoutines::throw_StackOverflowError_entry()));
__ jr(t0);
__ bind(thaw_success);
// make room for the thawed frames
__ sub(t0, sp, t1);
__ andi(sp, t0, -16); // align
if (return_barrier) { // save original return value -- again
__ sub(sp, sp, 2 * wordSize);
__ fsd(f10, Address(sp, 0 * wordSize));
__ sd(x10, Address(sp, 1 * wordSize));
}
// If we want, we can templatize thaw by kind, and have three different entries
__ mvw(c_rarg1, (uint32_t)kind);
__ call_VM_leaf(Continuation::thaw_entry(), xthread, c_rarg1);
__ mv(t1, x10); // x10 is the sp of the yielding frame
if (return_barrier) { // restore return value (no safepoint in the call to thaw, so even an oop return value should be OK)
__ ld(x10, Address(sp, 1 * wordSize));
__ fld(f10, Address(sp, 0 * wordSize));
__ add(sp, sp, 2 * wordSize);
} else {
__ mv(x10, zr); // return 0 (success) from doYield
}
// we're now on the yield frame (which is in an address above us b/c sp has been pushed down)
__ mv(fp, t1);
__ sub(sp, t1, 2 * wordSize); // now pointing to fp spill
if (return_barrier_exception) {
__ ld(c_rarg1, Address(fp, -1 * wordSize)); // return address
__ verify_oop(x10);
__ mv(x9, x10); // save return value contaning the exception oop in callee-saved x9
// For c2: c_rarg0 is junk, call to runtime to write a checkpoint. // It returns a jobject handle to the event writer. // The handle is dereferenced and the return value is the event writer oop. static RuntimeStub* generate_jfr_write_checkpoint() { enum layout {
fp_off,
fp_off2,
return_off,
return_off2,
framesize // inclusive of return address
};
int insts_size = 512; int locs_size = 64;
CodeBuffer code("jfr_write_checkpoint", insts_size, locs_size);
OopMapSet* oop_maps = new OopMapSet();
MacroAssembler* masm = new MacroAssembler(&code);
MacroAssembler* _masm = masm;
OopMap* map = new OopMap(framesize, 1);
oop_maps->add_gc_map(the_pc - start, map);
RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size)
RuntimeStub::new_runtime_stub("jfr_write_checkpoint", &code, frame_complete,
(framesize >> (LogBytesPerWord - LogBytesPerInt)),
oop_maps, false); return stub;
}
#endif// INCLUDE_JFR
#undef __
// Initialization void generate_initial() { // Generate initial stubs and initializes the entry points
// entry points that exist in all platforms Note: This is code // that could be shared among different platforms - however the // benefit seems to be smaller than the disadvantage of having a // much more complicated generator structure. See also comment in // stubRoutines.hpp.
// is referenced by megamorphic call
StubRoutines::_catch_exception_entry = generate_catch_exception();
// Build this early so it's available for the interpreter.
StubRoutines::_throw_StackOverflowError_entry =
generate_throw_exception("StackOverflowError throw_exception",
CAST_FROM_FN_PTR(address,
SharedRuntime::throw_StackOverflowError));
StubRoutines::_throw_delayed_StackOverflowError_entry =
generate_throw_exception("delayed StackOverflowError throw_exception",
CAST_FROM_FN_PTR(address,
SharedRuntime::throw_delayed_StackOverflowError));
}
void generate_all() { // support for verify_oop (must happen after universe_init) if (VerifyOops) {
StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
}
public:
StubGenerator(CodeBuffer* code, int phase) : StubCodeGenerator(code) { if (phase == 0) {
generate_initial();
} elseif (phase == 1) {
generate_phase1(); // stubs that must be available for the interpreter
} else {
generate_all();
}
}
}; // end class declaration
#define UCM_TABLE_MAX_ENTRIES 8 void StubGenerator_generate(CodeBuffer* code, int phase) { if (UnsafeCopyMemory::_table == NULL) {
UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
}
StubGenerator g(code, phase);
}
Messung V0.5 in Prozent
¤ Dauer der Verarbeitung: 0.70 Sekunden
(vorverarbeitet am 2026-04-26)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.